diff --git a/cts/cli/regression.crm_mon.exp b/cts/cli/regression.crm_mon.exp index fa516aada3..c823c3674d 100644 --- a/cts/cli/regression.crm_mon.exp +++ b/cts/cli/regression.crm_mon.exp @@ -1,5039 +1,5039 @@ =#=#=#= Begin test: Basic text output =#=#=#= Cluster Summary: * Stack: corosync * Current DC: cluster02 (version) - partition with quorum * Last updated: * Last change: * 5 nodes configured * 32 resource instances configured (4 DISABLED) Node List: * Online: [ cluster01 cluster02 ] * GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ] Active Resources: * Clone Set: ping-clone [ping]: * Started: [ cluster01 cluster02 ] * Fencing (stonith:fence_xvm): Started cluster01 * dummy (ocf:pacemaker:Dummy): Started cluster02 * Container bundle set: httpd-bundle [pcmk:http]: * httpd-bundle-0 (192.168.122.131) (ocf:heartbeat:apache): Started cluster01 * httpd-bundle-1 (192.168.122.132) (ocf:heartbeat:apache): Started cluster02 * httpd-bundle-2 (192.168.122.133) (ocf:heartbeat:apache): Stopped * Resource Group: exim-group: * Public-IP (ocf:heartbeat:IPaddr): Started cluster02 * Email (lsb:exim): Started cluster02 * Clone Set: mysql-clone-group [mysql-group]: * Started: [ cluster01 cluster02 ] * Clone Set: promotable-clone [promotable-rsc] (promotable): * Promoted: [ cluster02 ] * Unpromoted: [ cluster01 ] =#=#=#= End test: Basic text output - OK (0) =#=#=#= * Passed: crm_mon - Basic text output =#=#=#= Begin test: XML output =#=#=#= =#=#=#= End test: XML output - OK (0) =#=#=#= * Passed: crm_mon - XML output =#=#=#= Begin test: Basic text output without node section =#=#=#= Cluster Summary: * Stack: corosync * Current DC: cluster02 (version) - partition with quorum * Last updated: * Last change: * 5 nodes configured * 32 resource instances configured (4 DISABLED) Active Resources: * Clone Set: ping-clone [ping]: * Started: [ cluster01 cluster02 ] * Fencing (stonith:fence_xvm): Started cluster01 * dummy (ocf:pacemaker:Dummy): Started cluster02 * Container bundle set: httpd-bundle [pcmk:http]: * httpd-bundle-0 (192.168.122.131) (ocf:heartbeat:apache): Started cluster01 * httpd-bundle-1 (192.168.122.132) (ocf:heartbeat:apache): Started cluster02 * httpd-bundle-2 (192.168.122.133) (ocf:heartbeat:apache): Stopped * Resource Group: exim-group: * Public-IP (ocf:heartbeat:IPaddr): Started cluster02 * Email (lsb:exim): Started cluster02 * Clone Set: mysql-clone-group [mysql-group]: * Started: [ cluster01 cluster02 ] * Clone Set: promotable-clone [promotable-rsc] (promotable): * Promoted: [ cluster02 ] * Unpromoted: [ cluster01 ] =#=#=#= End test: Basic text output without node section - OK (0) =#=#=#= * Passed: crm_mon - Basic text output without node section =#=#=#= Begin test: XML output without the node section =#=#=#= =#=#=#= End test: XML output without the node section - OK (0) =#=#=#= * Passed: crm_mon - XML output without the node section =#=#=#= Begin test: Text output with only the node section =#=#=#= Node List: * Online: [ cluster01 cluster02 ] * GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ] =#=#=#= End test: Text output with only the node section - OK (0) =#=#=#= * Passed: crm_mon - Text output with only the node section =#=#=#= Begin test: Complete text output =#=#=#= Cluster Summary: * Stack: corosync * Current DC: cluster02 (version) - partition with quorum * Last updated: * Last change: * 5 nodes configured * 32 resource instances configured (4 DISABLED) Node List: * Online: [ cluster01 cluster02 ] * GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ] Active Resources: * Clone Set: ping-clone [ping]: * Started: [ cluster01 cluster02 ] * Fencing (stonith:fence_xvm): Started cluster01 * dummy (ocf:pacemaker:Dummy): Started cluster02 * Container bundle set: httpd-bundle [pcmk:http]: * httpd-bundle-0 (192.168.122.131) (ocf:heartbeat:apache): Started cluster01 * httpd-bundle-1 (192.168.122.132) (ocf:heartbeat:apache): Started cluster02 * httpd-bundle-2 (192.168.122.133) (ocf:heartbeat:apache): Stopped * Resource Group: exim-group: * Public-IP (ocf:heartbeat:IPaddr): Started cluster02 * Email (lsb:exim): Started cluster02 * Clone Set: mysql-clone-group [mysql-group]: * Started: [ cluster01 cluster02 ] * Clone Set: promotable-clone [promotable-rsc] (promotable): * Promoted: [ cluster02 ] * Unpromoted: [ cluster01 ] Node Attributes: * Node: cluster01: * location : office * pingd : 1000 * Node: cluster02: * pingd : 1000 Operations: * Node: cluster02: * ping: migration-threshold=1000000: * (11) start * (12) monitor: interval="10000ms" * dummy: migration-threshold=1000000: * (18) start * (19) monitor: interval="60000ms" * Public-IP: migration-threshold=1000000: * (2) start * Email: migration-threshold=1000000: * (2) start * mysql-proxy: migration-threshold=1000000: * (2) start * (3) monitor: interval="10000ms" * promotable-rsc: migration-threshold=1000000: * (4) monitor: interval="10000ms" * (5) cancel: interval="10000ms" * (6) promote * (7) monitor: interval="5000ms" * httpd-bundle-ip-192.168.122.132: migration-threshold=1000000: * (2) start * (3) monitor: interval="60000ms" * httpd-bundle-docker-1: migration-threshold=1000000: * (2) start * (3) monitor: interval="60000ms" * httpd-bundle-1: migration-threshold=1000000: * (2) start * (3) monitor: interval="30000ms" * Node: cluster01: * ping: migration-threshold=1000000: * (17) start * (18) monitor: interval="10000ms" * Fencing: migration-threshold=1000000: * (15) start * (20) monitor: interval="60000ms" * dummy: migration-threshold=1000000: * (16) stop * mysql-proxy: migration-threshold=1000000: * (2) start * (3) monitor: interval="10000ms" * promotable-rsc: migration-threshold=1000000: * (2) start * (4) monitor: interval="10000ms" * httpd-bundle-ip-192.168.122.131: migration-threshold=1000000: * (2) start * (3) monitor: interval="60000ms" * httpd-bundle-docker-0: migration-threshold=1000000: * (2) start * (3) monitor: interval="60000ms" * httpd-bundle-0: migration-threshold=1000000: * (2) start * (3) monitor: interval="30000ms" * Node: httpd-bundle-0: * httpd: migration-threshold=1000000: * (1) start * Node: httpd-bundle-1: * httpd: migration-threshold=1000000: * (1) start Negative Location Constraints: * not-on-cluster1 prevents dummy from running on cluster01 =#=#=#= End test: Complete text output - OK (0) =#=#=#= * Passed: crm_mon - Complete text output =#=#=#= Begin test: Complete text output with detail =#=#=#= Cluster Summary: * Stack: corosync * Current DC: cluster02 (2) (version) - partition with quorum * Last updated: * Last change: * 5 nodes configured * 32 resource instances configured (4 DISABLED) Node List: * Node cluster01 (1): online, feature set <3.15.1 * Node cluster02 (2): online, feature set <3.15.1 * GuestNode httpd-bundle-0@cluster01: online * GuestNode httpd-bundle-1@cluster02: online * GuestNode httpd-bundle-2@: OFFLINE Active Resources: * Clone Set: ping-clone [ping]: * ping (ocf:pacemaker:ping): Started cluster02 * ping (ocf:pacemaker:ping): Started cluster01 * Fencing (stonith:fence_xvm): Started cluster01 * dummy (ocf:pacemaker:Dummy): Started cluster02 * Container bundle set: httpd-bundle [pcmk:http]: * Replica[0] * httpd-bundle-ip-192.168.122.131 (ocf:heartbeat:IPaddr2): Started cluster01 * httpd (ocf:heartbeat:apache): Started httpd-bundle-0 * httpd-bundle-docker-0 (ocf:heartbeat:docker): Started cluster01 * httpd-bundle-0 (ocf:pacemaker:remote): Started cluster01 * Replica[1] * httpd-bundle-ip-192.168.122.132 (ocf:heartbeat:IPaddr2): Started cluster02 * httpd (ocf:heartbeat:apache): Started httpd-bundle-1 * httpd-bundle-docker-1 (ocf:heartbeat:docker): Started cluster02 * httpd-bundle-1 (ocf:pacemaker:remote): Started cluster02 * Replica[2] * httpd-bundle-ip-192.168.122.133 (ocf:heartbeat:IPaddr2): Stopped * httpd (ocf:heartbeat:apache): Stopped * httpd-bundle-docker-2 (ocf:heartbeat:docker): Stopped * httpd-bundle-2 (ocf:pacemaker:remote): Stopped * Resource Group: exim-group: * Public-IP (ocf:heartbeat:IPaddr): Started cluster02 * Email (lsb:exim): Started cluster02 * Clone Set: mysql-clone-group [mysql-group]: * Resource Group: mysql-group:0: * mysql-proxy (lsb:mysql-proxy): Started cluster02 * Resource Group: mysql-group:1: * mysql-proxy (lsb:mysql-proxy): Started cluster01 * Clone Set: promotable-clone [promotable-rsc] (promotable): * promotable-rsc (ocf:pacemaker:Stateful): Promoted cluster02 (test_description) * promotable-rsc (ocf:pacemaker:Stateful): Unpromoted cluster01 (test_description) * promotable-rsc (ocf:pacemaker:Stateful): Stopped (test_description) * promotable-rsc (ocf:pacemaker:Stateful): Stopped (test_description) * promotable-rsc (ocf:pacemaker:Stateful): Stopped (test_description) Node Attributes: * Node: cluster01 (1): * location : office * pingd : 1000 * Node: cluster02 (2): * pingd : 1000 Operations: * Node: cluster02 (2): * ping: migration-threshold=1000000: * (11) start * (12) monitor: interval="10000ms" * dummy: migration-threshold=1000000: * (18) start * (19) monitor: interval="60000ms" * Public-IP: migration-threshold=1000000: * (2) start * Email: migration-threshold=1000000: * (2) start * mysql-proxy: migration-threshold=1000000: * (2) start * (3) monitor: interval="10000ms" * promotable-rsc: migration-threshold=1000000: * (4) monitor: interval="10000ms" * (5) cancel: interval="10000ms" * (6) promote * (7) monitor: interval="5000ms" * httpd-bundle-ip-192.168.122.132: migration-threshold=1000000: * (2) start * (3) monitor: interval="60000ms" * httpd-bundle-docker-1: migration-threshold=1000000: * (2) start * (3) monitor: interval="60000ms" * httpd-bundle-1: migration-threshold=1000000: * (2) start * (3) monitor: interval="30000ms" * Node: cluster01 (1): * ping: migration-threshold=1000000: * (17) start * (18) monitor: interval="10000ms" * Fencing: migration-threshold=1000000: * (15) start * (20) monitor: interval="60000ms" * dummy: migration-threshold=1000000: * (16) stop * mysql-proxy: migration-threshold=1000000: * (2) start * (3) monitor: interval="10000ms" * promotable-rsc: migration-threshold=1000000: * (2) start * (4) monitor: interval="10000ms" * httpd-bundle-ip-192.168.122.131: migration-threshold=1000000: * (2) start * (3) monitor: interval="60000ms" * httpd-bundle-docker-0: migration-threshold=1000000: * (2) start * (3) monitor: interval="60000ms" * httpd-bundle-0: migration-threshold=1000000: * (2) start * (3) monitor: interval="30000ms" * Node: httpd-bundle-0@cluster01: * httpd: migration-threshold=1000000: * (1) start * Node: httpd-bundle-1@cluster02: * httpd: migration-threshold=1000000: * (1) start Negative Location Constraints: * not-on-cluster1 prevents dummy from running on cluster01 (1) =#=#=#= End test: Complete text output with detail - OK (0) =#=#=#= * Passed: crm_mon - Complete text output with detail =#=#=#= Begin test: Complete brief text output =#=#=#= Cluster Summary: * Stack: corosync * Current DC: cluster02 (version) - partition with quorum * Last updated: * Last change: * 5 nodes configured * 32 resource instances configured (4 DISABLED) Node List: * Online: [ cluster01 cluster02 ] * GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ] Active Resources: * 1 (ocf:pacemaker:Dummy): Active cluster02 * 1 (stonith:fence_xvm): Active cluster01 * Clone Set: ping-clone [ping]: * Started: [ cluster01 cluster02 ] * Container bundle set: httpd-bundle [pcmk:http]: * httpd-bundle-0 (192.168.122.131) (ocf:heartbeat:apache): Started cluster01 * httpd-bundle-1 (192.168.122.132) (ocf:heartbeat:apache): Started cluster02 * httpd-bundle-2 (192.168.122.133) (ocf:heartbeat:apache): Stopped * Resource Group: exim-group: * 1/1 (lsb:exim): Active cluster02 * 1/1 (ocf:heartbeat:IPaddr): Active cluster02 * Clone Set: mysql-clone-group [mysql-group]: * Started: [ cluster01 cluster02 ] * Clone Set: promotable-clone [promotable-rsc] (promotable): * Promoted: [ cluster02 ] * Unpromoted: [ cluster01 ] Node Attributes: * Node: cluster01: * location : office * pingd : 1000 * Node: cluster02: * pingd : 1000 Operations: * Node: cluster02: * ping: migration-threshold=1000000: * (11) start * (12) monitor: interval="10000ms" * dummy: migration-threshold=1000000: * (18) start * (19) monitor: interval="60000ms" * Public-IP: migration-threshold=1000000: * (2) start * Email: migration-threshold=1000000: * (2) start * mysql-proxy: migration-threshold=1000000: * (2) start * (3) monitor: interval="10000ms" * promotable-rsc: migration-threshold=1000000: * (4) monitor: interval="10000ms" * (5) cancel: interval="10000ms" * (6) promote * (7) monitor: interval="5000ms" * httpd-bundle-ip-192.168.122.132: migration-threshold=1000000: * (2) start * (3) monitor: interval="60000ms" * httpd-bundle-docker-1: migration-threshold=1000000: * (2) start * (3) monitor: interval="60000ms" * httpd-bundle-1: migration-threshold=1000000: * (2) start * (3) monitor: interval="30000ms" * Node: cluster01: * ping: migration-threshold=1000000: * (17) start * (18) monitor: interval="10000ms" * Fencing: migration-threshold=1000000: * (15) start * (20) monitor: interval="60000ms" * dummy: migration-threshold=1000000: * (16) stop * mysql-proxy: migration-threshold=1000000: * (2) start * (3) monitor: interval="10000ms" * promotable-rsc: migration-threshold=1000000: * (2) start * (4) monitor: interval="10000ms" * httpd-bundle-ip-192.168.122.131: migration-threshold=1000000: * (2) start * (3) monitor: interval="60000ms" * httpd-bundle-docker-0: migration-threshold=1000000: * (2) start * (3) monitor: interval="60000ms" * httpd-bundle-0: migration-threshold=1000000: * (2) start * (3) monitor: interval="30000ms" * Node: httpd-bundle-0: * httpd: migration-threshold=1000000: * (1) start * Node: httpd-bundle-1: * httpd: migration-threshold=1000000: * (1) start Negative Location Constraints: * not-on-cluster1 prevents dummy from running on cluster01 =#=#=#= End test: Complete brief text output - OK (0) =#=#=#= * Passed: crm_mon - Complete brief text output =#=#=#= Begin test: Complete text output grouped by node =#=#=#= Cluster Summary: * Stack: corosync * Current DC: cluster02 (version) - partition with quorum * Last updated: * Last change: * 5 nodes configured * 32 resource instances configured (4 DISABLED) Node List: * Node cluster01: online: * Resources: * ping (ocf:pacemaker:ping): Started * Fencing (stonith:fence_xvm): Started * mysql-proxy (lsb:mysql-proxy): Started * promotable-rsc (ocf:pacemaker:Stateful): Unpromoted (test_description) * httpd-bundle-ip-192.168.122.131 (ocf:heartbeat:IPaddr2): Started * httpd-bundle-docker-0 (ocf:heartbeat:docker): Started * Node cluster02: online: * Resources: * ping (ocf:pacemaker:ping): Started * dummy (ocf:pacemaker:Dummy): Started * Public-IP (ocf:heartbeat:IPaddr): Started * Email (lsb:exim): Started * mysql-proxy (lsb:mysql-proxy): Started * promotable-rsc (ocf:pacemaker:Stateful): Promoted (test_description) * httpd-bundle-ip-192.168.122.132 (ocf:heartbeat:IPaddr2): Started * httpd-bundle-docker-1 (ocf:heartbeat:docker): Started * GuestNode httpd-bundle-0: online: * Resources: * httpd (ocf:heartbeat:apache): Started * GuestNode httpd-bundle-1: online: * Resources: * httpd (ocf:heartbeat:apache): Started * GuestNode httpd-bundle-2: OFFLINE: * Resources: Node Attributes: * Node: cluster01: * location : office * pingd : 1000 * Node: cluster02: * pingd : 1000 Operations: * Node: cluster02: * ping: migration-threshold=1000000: * (11) start * (12) monitor: interval="10000ms" * dummy: migration-threshold=1000000: * (18) start * (19) monitor: interval="60000ms" * Public-IP: migration-threshold=1000000: * (2) start * Email: migration-threshold=1000000: * (2) start * mysql-proxy: migration-threshold=1000000: * (2) start * (3) monitor: interval="10000ms" * promotable-rsc: migration-threshold=1000000: * (4) monitor: interval="10000ms" * (5) cancel: interval="10000ms" * (6) promote * (7) monitor: interval="5000ms" * httpd-bundle-ip-192.168.122.132: migration-threshold=1000000: * (2) start * (3) monitor: interval="60000ms" * httpd-bundle-docker-1: migration-threshold=1000000: * (2) start * (3) monitor: interval="60000ms" * httpd-bundle-1: migration-threshold=1000000: * (2) start * (3) monitor: interval="30000ms" * Node: cluster01: * ping: migration-threshold=1000000: * (17) start * (18) monitor: interval="10000ms" * Fencing: migration-threshold=1000000: * (15) start * (20) monitor: interval="60000ms" * dummy: migration-threshold=1000000: * (16) stop * mysql-proxy: migration-threshold=1000000: * (2) start * (3) monitor: interval="10000ms" * promotable-rsc: migration-threshold=1000000: * (2) start * (4) monitor: interval="10000ms" * httpd-bundle-ip-192.168.122.131: migration-threshold=1000000: * (2) start * (3) monitor: interval="60000ms" * httpd-bundle-docker-0: migration-threshold=1000000: * (2) start * (3) monitor: interval="60000ms" * httpd-bundle-0: migration-threshold=1000000: * (2) start * (3) monitor: interval="30000ms" * Node: httpd-bundle-0: * httpd: migration-threshold=1000000: * (1) start * Node: httpd-bundle-1: * httpd: migration-threshold=1000000: * (1) start Negative Location Constraints: * not-on-cluster1 prevents dummy from running on cluster01 =#=#=#= End test: Complete text output grouped by node - OK (0) =#=#=#= * Passed: crm_mon - Complete text output grouped by node =#=#=#= Begin test: Complete brief text output grouped by node =#=#=#= Cluster Summary: * Stack: corosync * Current DC: cluster02 (version) - partition with quorum * Last updated: * Last change: * 5 nodes configured * 32 resource instances configured (4 DISABLED) Node List: * Node cluster01: online: * Resources: * 1 (lsb:mysql-proxy): Active * 1 (ocf:heartbeat:IPaddr2): Active * 1 (ocf:heartbeat:docker): Active * 1 (ocf:pacemaker:Stateful): Active * 1 (ocf:pacemaker:ping): Active * 1 (ocf:pacemaker:remote): Active * 1 (stonith:fence_xvm): Active * Node cluster02: online: * Resources: * 1 (lsb:exim): Active * 1 (lsb:mysql-proxy): Active * 1 (ocf:heartbeat:IPaddr): Active * 1 (ocf:heartbeat:IPaddr2): Active * 1 (ocf:heartbeat:docker): Active * 1 (ocf:pacemaker:Dummy): Active * 1 (ocf:pacemaker:Stateful): Active * 1 (ocf:pacemaker:ping): Active * 1 (ocf:pacemaker:remote): Active * GuestNode httpd-bundle-0: online: * Resources: * 1 (ocf:heartbeat:apache): Active * GuestNode httpd-bundle-1: online: * Resources: * 1 (ocf:heartbeat:apache): Active Node Attributes: * Node: cluster01: * location : office * pingd : 1000 * Node: cluster02: * pingd : 1000 Operations: * Node: cluster02: * ping: migration-threshold=1000000: * (11) start * (12) monitor: interval="10000ms" * dummy: migration-threshold=1000000: * (18) start * (19) monitor: interval="60000ms" * Public-IP: migration-threshold=1000000: * (2) start * Email: migration-threshold=1000000: * (2) start * mysql-proxy: migration-threshold=1000000: * (2) start * (3) monitor: interval="10000ms" * promotable-rsc: migration-threshold=1000000: * (4) monitor: interval="10000ms" * (5) cancel: interval="10000ms" * (6) promote * (7) monitor: interval="5000ms" * httpd-bundle-ip-192.168.122.132: migration-threshold=1000000: * (2) start * (3) monitor: interval="60000ms" * httpd-bundle-docker-1: migration-threshold=1000000: * (2) start * (3) monitor: interval="60000ms" * httpd-bundle-1: migration-threshold=1000000: * (2) start * (3) monitor: interval="30000ms" * Node: cluster01: * ping: migration-threshold=1000000: * (17) start * (18) monitor: interval="10000ms" * Fencing: migration-threshold=1000000: * (15) start * (20) monitor: interval="60000ms" * dummy: migration-threshold=1000000: * (16) stop * mysql-proxy: migration-threshold=1000000: * (2) start * (3) monitor: interval="10000ms" * promotable-rsc: migration-threshold=1000000: * (2) start * (4) monitor: interval="10000ms" * httpd-bundle-ip-192.168.122.131: migration-threshold=1000000: * (2) start * (3) monitor: interval="60000ms" * httpd-bundle-docker-0: migration-threshold=1000000: * (2) start * (3) monitor: interval="60000ms" * httpd-bundle-0: migration-threshold=1000000: * (2) start * (3) monitor: interval="30000ms" * Node: httpd-bundle-0: * httpd: migration-threshold=1000000: * (1) start * Node: httpd-bundle-1: * httpd: migration-threshold=1000000: * (1) start Negative Location Constraints: * not-on-cluster1 prevents dummy from running on cluster01 =#=#=#= End test: Complete brief text output grouped by node - OK (0) =#=#=#= * Passed: crm_mon - Complete brief text output grouped by node =#=#=#= Begin test: XML output grouped by node =#=#=#= =#=#=#= End test: XML output grouped by node - OK (0) =#=#=#= * Passed: crm_mon - XML output grouped by node =#=#=#= Begin test: Complete text output filtered by node =#=#=#= Cluster Summary: * Stack: corosync * Current DC: cluster02 (version) - partition with quorum * Last updated: * Last change: * 5 nodes configured * 32 resource instances configured (4 DISABLED) Node List: * Online: [ cluster01 ] Active Resources: * Clone Set: ping-clone [ping]: * Started: [ cluster01 ] * Fencing (stonith:fence_xvm): Started cluster01 * Container bundle set: httpd-bundle [pcmk:http]: * httpd-bundle-0 (192.168.122.131) (ocf:heartbeat:apache): Started cluster01 * httpd-bundle-2 (192.168.122.133) (ocf:heartbeat:apache): Stopped * Clone Set: mysql-clone-group [mysql-group]: * Started: [ cluster01 ] * Clone Set: promotable-clone [promotable-rsc] (promotable): * Unpromoted: [ cluster01 ] Node Attributes: * Node: cluster01: * location : office * pingd : 1000 Operations: * Node: cluster01: * ping: migration-threshold=1000000: * (17) start * (18) monitor: interval="10000ms" * Fencing: migration-threshold=1000000: * (15) start * (20) monitor: interval="60000ms" * dummy: migration-threshold=1000000: * (16) stop * mysql-proxy: migration-threshold=1000000: * (2) start * (3) monitor: interval="10000ms" * promotable-rsc: migration-threshold=1000000: * (2) start * (4) monitor: interval="10000ms" * httpd-bundle-ip-192.168.122.131: migration-threshold=1000000: * (2) start * (3) monitor: interval="60000ms" * httpd-bundle-docker-0: migration-threshold=1000000: * (2) start * (3) monitor: interval="60000ms" * httpd-bundle-0: migration-threshold=1000000: * (2) start * (3) monitor: interval="30000ms" Negative Location Constraints: * not-on-cluster1 prevents dummy from running on cluster01 =#=#=#= End test: Complete text output filtered by node - OK (0) =#=#=#= * Passed: crm_mon - Complete text output filtered by node =#=#=#= Begin test: XML output filtered by node =#=#=#= =#=#=#= End test: XML output filtered by node - OK (0) =#=#=#= * Passed: crm_mon - XML output filtered by node =#=#=#= Begin test: Complete text output filtered by tag =#=#=#= Cluster Summary: * Stack: corosync * Current DC: cluster02 (version) - partition with quorum * Last updated: * Last change: * 5 nodes configured * 32 resource instances configured (4 DISABLED) Node List: * Online: [ cluster02 ] Active Resources: * Clone Set: ping-clone [ping]: * Started: [ cluster02 ] * dummy (ocf:pacemaker:Dummy): Started cluster02 * Container bundle set: httpd-bundle [pcmk:http]: * httpd-bundle-1 (192.168.122.132) (ocf:heartbeat:apache): Started cluster02 * httpd-bundle-2 (192.168.122.133) (ocf:heartbeat:apache): Stopped * Resource Group: exim-group: * Public-IP (ocf:heartbeat:IPaddr): Started cluster02 * Email (lsb:exim): Started cluster02 * Clone Set: mysql-clone-group [mysql-group]: * Started: [ cluster02 ] * Clone Set: promotable-clone [promotable-rsc] (promotable): * Promoted: [ cluster02 ] Node Attributes: * Node: cluster02: * pingd : 1000 Operations: * Node: cluster02: * ping: migration-threshold=1000000: * (11) start * (12) monitor: interval="10000ms" * dummy: migration-threshold=1000000: * (18) start * (19) monitor: interval="60000ms" * Public-IP: migration-threshold=1000000: * (2) start * Email: migration-threshold=1000000: * (2) start * mysql-proxy: migration-threshold=1000000: * (2) start * (3) monitor: interval="10000ms" * promotable-rsc: migration-threshold=1000000: * (4) monitor: interval="10000ms" * (5) cancel: interval="10000ms" * (6) promote * (7) monitor: interval="5000ms" * httpd-bundle-ip-192.168.122.132: migration-threshold=1000000: * (2) start * (3) monitor: interval="60000ms" * httpd-bundle-docker-1: migration-threshold=1000000: * (2) start * (3) monitor: interval="60000ms" * httpd-bundle-1: migration-threshold=1000000: * (2) start * (3) monitor: interval="30000ms" Negative Location Constraints: * not-on-cluster1 prevents dummy from running on cluster01 =#=#=#= End test: Complete text output filtered by tag - OK (0) =#=#=#= * Passed: crm_mon - Complete text output filtered by tag =#=#=#= Begin test: XML output filtered by tag =#=#=#= =#=#=#= End test: XML output filtered by tag - OK (0) =#=#=#= * Passed: crm_mon - XML output filtered by tag =#=#=#= Begin test: Complete text output filtered by resource tag =#=#=#= Cluster Summary: * Stack: corosync * Current DC: cluster02 (version) - partition with quorum * Last updated: * Last change: * 5 nodes configured * 32 resource instances configured (4 DISABLED) Node List: * Online: [ cluster01 cluster02 ] * GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ] Active Resources: * Fencing (stonith:fence_xvm): Started cluster01 Node Attributes: * Node: cluster01: * location : office * pingd : 1000 * Node: cluster02: * pingd : 1000 Operations: * Node: cluster01: * Fencing: migration-threshold=1000000: * (15) start * (20) monitor: interval="60000ms" =#=#=#= End test: Complete text output filtered by resource tag - OK (0) =#=#=#= * Passed: crm_mon - Complete text output filtered by resource tag =#=#=#= Begin test: XML output filtered by resource tag =#=#=#= =#=#=#= End test: XML output filtered by resource tag - OK (0) =#=#=#= * Passed: crm_mon - XML output filtered by resource tag =#=#=#= Begin test: Basic text output filtered by node that doesn't exist =#=#=#= Cluster Summary: * Stack: corosync * Current DC: cluster02 (version) - partition with quorum * Last updated: * Last change: * 5 nodes configured * 32 resource instances configured (4 DISABLED) Active Resources: * No active resources =#=#=#= End test: Basic text output filtered by node that doesn't exist - OK (0) =#=#=#= * Passed: crm_mon - Basic text output filtered by node that doesn't exist =#=#=#= Begin test: XML output filtered by node that doesn't exist =#=#=#= =#=#=#= End test: XML output filtered by node that doesn't exist - OK (0) =#=#=#= * Passed: crm_mon - XML output filtered by node that doesn't exist =#=#=#= Begin test: Basic text output with inactive resources =#=#=#= Cluster Summary: * Stack: corosync * Current DC: cluster02 (version) - partition with quorum * Last updated: * Last change: * 5 nodes configured * 32 resource instances configured (4 DISABLED) Node List: * Online: [ cluster01 cluster02 ] * GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ] Full List of Resources: * Clone Set: ping-clone [ping]: * Started: [ cluster01 cluster02 ] * Fencing (stonith:fence_xvm): Started cluster01 * dummy (ocf:pacemaker:Dummy): Started cluster02 * Clone Set: inactive-clone [inactive-dhcpd] (disabled): * Stopped (disabled): [ cluster01 cluster02 ] * Resource Group: inactive-group (disabled): * inactive-dummy-1 (ocf:pacemaker:Dummy): Stopped (disabled) * inactive-dummy-2 (ocf:pacemaker:Dummy): Stopped (disabled) * Container bundle set: httpd-bundle [pcmk:http]: * httpd-bundle-0 (192.168.122.131) (ocf:heartbeat:apache): Started cluster01 * httpd-bundle-1 (192.168.122.132) (ocf:heartbeat:apache): Started cluster02 * httpd-bundle-2 (192.168.122.133) (ocf:heartbeat:apache): Stopped * Resource Group: exim-group: * Public-IP (ocf:heartbeat:IPaddr): Started cluster02 * Email (lsb:exim): Started cluster02 * Clone Set: mysql-clone-group [mysql-group]: * Started: [ cluster01 cluster02 ] * Clone Set: promotable-clone [promotable-rsc] (promotable): * Promoted: [ cluster02 ] * Unpromoted: [ cluster01 ] =#=#=#= End test: Basic text output with inactive resources - OK (0) =#=#=#= * Passed: crm_mon - Basic text output with inactive resources =#=#=#= Begin test: Basic text output with inactive resources, filtered by node =#=#=#= Cluster Summary: * Stack: corosync * Current DC: cluster02 (version) - partition with quorum * Last updated: * Last change: * 5 nodes configured * 32 resource instances configured (4 DISABLED) Node List: * Online: [ cluster02 ] Full List of Resources: * Clone Set: ping-clone [ping]: * Started: [ cluster02 ] * dummy (ocf:pacemaker:Dummy): Started cluster02 * Clone Set: inactive-clone [inactive-dhcpd] (disabled): * Stopped (disabled): [ cluster02 ] * Resource Group: inactive-group (disabled): * inactive-dummy-1 (ocf:pacemaker:Dummy): Stopped (disabled) * inactive-dummy-2 (ocf:pacemaker:Dummy): Stopped (disabled) * Container bundle set: httpd-bundle [pcmk:http]: * httpd-bundle-1 (192.168.122.132) (ocf:heartbeat:apache): Started cluster02 * httpd-bundle-2 (192.168.122.133) (ocf:heartbeat:apache): Stopped * Resource Group: exim-group: * Public-IP (ocf:heartbeat:IPaddr): Started cluster02 * Email (lsb:exim): Started cluster02 * Clone Set: mysql-clone-group [mysql-group]: * Started: [ cluster02 ] * Clone Set: promotable-clone [promotable-rsc] (promotable): * Promoted: [ cluster02 ] =#=#=#= End test: Basic text output with inactive resources, filtered by node - OK (0) =#=#=#= * Passed: crm_mon - Basic text output with inactive resources, filtered by node =#=#=#= Begin test: Complete text output filtered by primitive resource =#=#=#= Cluster Summary: * Stack: corosync * Current DC: cluster02 (version) - partition with quorum * Last updated: * Last change: * 5 nodes configured * 32 resource instances configured (4 DISABLED) Node List: * Online: [ cluster01 cluster02 ] * GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ] Active Resources: * Fencing (stonith:fence_xvm): Started cluster01 Node Attributes: * Node: cluster01: * location : office * pingd : 1000 * Node: cluster02: * pingd : 1000 Operations: * Node: cluster01: * Fencing: migration-threshold=1000000: * (15) start * (20) monitor: interval="60000ms" =#=#=#= End test: Complete text output filtered by primitive resource - OK (0) =#=#=#= * Passed: crm_mon - Complete text output filtered by primitive resource =#=#=#= Begin test: XML output filtered by primitive resource =#=#=#= =#=#=#= End test: XML output filtered by primitive resource - OK (0) =#=#=#= * Passed: crm_mon - XML output filtered by primitive resource =#=#=#= Begin test: Complete text output filtered by group resource =#=#=#= Cluster Summary: * Stack: corosync * Current DC: cluster02 (version) - partition with quorum * Last updated: * Last change: * 5 nodes configured * 32 resource instances configured (4 DISABLED) Node List: * Online: [ cluster01 cluster02 ] * GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ] Active Resources: * Resource Group: exim-group: * Public-IP (ocf:heartbeat:IPaddr): Started cluster02 * Email (lsb:exim): Started cluster02 Node Attributes: * Node: cluster01: * location : office * pingd : 1000 * Node: cluster02: * pingd : 1000 Operations: * Node: cluster02: * Public-IP: migration-threshold=1000000: * (2) start * Email: migration-threshold=1000000: * (2) start =#=#=#= End test: Complete text output filtered by group resource - OK (0) =#=#=#= * Passed: crm_mon - Complete text output filtered by group resource =#=#=#= Begin test: XML output filtered by group resource =#=#=#= =#=#=#= End test: XML output filtered by group resource - OK (0) =#=#=#= * Passed: crm_mon - XML output filtered by group resource =#=#=#= Begin test: Complete text output filtered by group resource member =#=#=#= Cluster Summary: * Stack: corosync * Current DC: cluster02 (version) - partition with quorum * Last updated: * Last change: * 5 nodes configured * 32 resource instances configured (4 DISABLED) Node List: * Online: [ cluster01 cluster02 ] * GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ] Active Resources: * Resource Group: exim-group: * Public-IP (ocf:heartbeat:IPaddr): Started cluster02 Node Attributes: * Node: cluster01: * location : office * pingd : 1000 * Node: cluster02: * pingd : 1000 Operations: * Node: cluster02: * Public-IP: migration-threshold=1000000: * (2) start =#=#=#= End test: Complete text output filtered by group resource member - OK (0) =#=#=#= * Passed: crm_mon - Complete text output filtered by group resource member =#=#=#= Begin test: XML output filtered by group resource member =#=#=#= =#=#=#= End test: XML output filtered by group resource member - OK (0) =#=#=#= * Passed: crm_mon - XML output filtered by group resource member =#=#=#= Begin test: Complete text output filtered by clone resource =#=#=#= Cluster Summary: * Stack: corosync * Current DC: cluster02 (version) - partition with quorum * Last updated: * Last change: * 5 nodes configured * 32 resource instances configured (4 DISABLED) Node List: * Online: [ cluster01 cluster02 ] * GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ] Active Resources: * Clone Set: ping-clone [ping]: * Started: [ cluster01 cluster02 ] Node Attributes: * Node: cluster01: * location : office * pingd : 1000 * Node: cluster02: * pingd : 1000 Operations: * Node: cluster02: * ping: migration-threshold=1000000: * (11) start * (12) monitor: interval="10000ms" * Node: cluster01: * ping: migration-threshold=1000000: * (17) start * (18) monitor: interval="10000ms" =#=#=#= End test: Complete text output filtered by clone resource - OK (0) =#=#=#= * Passed: crm_mon - Complete text output filtered by clone resource =#=#=#= Begin test: XML output filtered by clone resource =#=#=#= =#=#=#= End test: XML output filtered by clone resource - OK (0) =#=#=#= * Passed: crm_mon - XML output filtered by clone resource =#=#=#= Begin test: Complete text output filtered by clone resource instance =#=#=#= Cluster Summary: * Stack: corosync * Current DC: cluster02 (version) - partition with quorum * Last updated: * Last change: * 5 nodes configured * 32 resource instances configured (4 DISABLED) Node List: * Online: [ cluster01 cluster02 ] * GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ] Active Resources: * Clone Set: ping-clone [ping]: * Started: [ cluster01 cluster02 ] Node Attributes: * Node: cluster01: * location : office * pingd : 1000 * Node: cluster02: * pingd : 1000 Operations: * Node: cluster02: * ping: migration-threshold=1000000: * (11) start * (12) monitor: interval="10000ms" * Node: cluster01: * ping: migration-threshold=1000000: * (17) start * (18) monitor: interval="10000ms" =#=#=#= End test: Complete text output filtered by clone resource instance - OK (0) =#=#=#= * Passed: crm_mon - Complete text output filtered by clone resource instance =#=#=#= Begin test: XML output filtered by clone resource instance =#=#=#= =#=#=#= End test: XML output filtered by clone resource instance - OK (0) =#=#=#= * Passed: crm_mon - XML output filtered by clone resource instance =#=#=#= Begin test: Complete text output filtered by exact clone resource instance =#=#=#= Cluster Summary: * Stack: corosync * Current DC: cluster02 (2) (version) - partition with quorum * Last updated: * Last change: * 5 nodes configured * 32 resource instances configured (4 DISABLED) Node List: * Node cluster01 (1): online, feature set <3.15.1 * Node cluster02 (2): online, feature set <3.15.1 * GuestNode httpd-bundle-0@cluster01: online * GuestNode httpd-bundle-1@cluster02: online * GuestNode httpd-bundle-2@: OFFLINE Active Resources: * Clone Set: ping-clone [ping]: * ping (ocf:pacemaker:ping): Started cluster02 Node Attributes: * Node: cluster01 (1): * location : office * pingd : 1000 * Node: cluster02 (2): * pingd : 1000 Operations: * Node: cluster02 (2): * ping: migration-threshold=1000000: * (11) start * (12) monitor: interval="10000ms" * Node: cluster01 (1): * ping: migration-threshold=1000000: * (17) start * (18) monitor: interval="10000ms" =#=#=#= End test: Complete text output filtered by exact clone resource instance - OK (0) =#=#=#= * Passed: crm_mon - Complete text output filtered by exact clone resource instance =#=#=#= Begin test: XML output filtered by exact clone resource instance =#=#=#= =#=#=#= End test: XML output filtered by exact clone resource instance - OK (0) =#=#=#= * Passed: crm_mon - XML output filtered by exact clone resource instance =#=#=#= Begin test: Basic text output filtered by resource that doesn't exist =#=#=#= Cluster Summary: * Stack: corosync * Current DC: cluster02 (version) - partition with quorum * Last updated: * Last change: * 5 nodes configured * 32 resource instances configured (4 DISABLED) Node List: * Online: [ cluster01 cluster02 ] * GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ] Active Resources: * No active resources =#=#=#= End test: Basic text output filtered by resource that doesn't exist - OK (0) =#=#=#= * Passed: crm_mon - Basic text output filtered by resource that doesn't exist =#=#=#= Begin test: XML output filtered by resource that doesn't exist =#=#=#= =#=#=#= End test: XML output filtered by resource that doesn't exist - OK (0) =#=#=#= * Passed: crm_mon - XML output filtered by resource that doesn't exist =#=#=#= Begin test: Basic text output with inactive resources, filtered by tag =#=#=#= Cluster Summary: * Stack: corosync * Current DC: cluster02 (version) - partition with quorum * Last updated: * Last change: * 5 nodes configured * 32 resource instances configured (4 DISABLED) Node List: * Online: [ cluster01 cluster02 ] * GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ] Full List of Resources: * Clone Set: inactive-clone [inactive-dhcpd] (disabled): * Stopped (disabled): [ cluster01 cluster02 ] * Resource Group: inactive-group (disabled): * inactive-dummy-1 (ocf:pacemaker:Dummy): Stopped (disabled) * inactive-dummy-2 (ocf:pacemaker:Dummy): Stopped (disabled) =#=#=#= End test: Basic text output with inactive resources, filtered by tag - OK (0) =#=#=#= * Passed: crm_mon - Basic text output with inactive resources, filtered by tag =#=#=#= Begin test: Basic text output with inactive resources, filtered by bundle resource =#=#=#= Cluster Summary: * Stack: corosync * Current DC: cluster02 (version) - partition with quorum * Last updated: * Last change: * 5 nodes configured * 32 resource instances configured (4 DISABLED) Node List: * Online: [ cluster01 cluster02 ] * GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ] Full List of Resources: * Container bundle set: httpd-bundle [pcmk:http]: * httpd-bundle-0 (192.168.122.131) (ocf:heartbeat:apache): Started cluster01 * httpd-bundle-1 (192.168.122.132) (ocf:heartbeat:apache): Started cluster02 * httpd-bundle-2 (192.168.122.133) (ocf:heartbeat:apache): Stopped =#=#=#= End test: Basic text output with inactive resources, filtered by bundle resource - OK (0) =#=#=#= * Passed: crm_mon - Basic text output with inactive resources, filtered by bundle resource =#=#=#= Begin test: XML output filtered by inactive bundle resource =#=#=#= =#=#=#= End test: XML output filtered by inactive bundle resource - OK (0) =#=#=#= * Passed: crm_mon - XML output filtered by inactive bundle resource =#=#=#= Begin test: Basic text output with inactive resources, filtered by bundled IP address resource =#=#=#= Cluster Summary: * Stack: corosync * Current DC: cluster02 (version) - partition with quorum * Last updated: * Last change: * 5 nodes configured * 32 resource instances configured (4 DISABLED) Node List: * Online: [ cluster01 cluster02 ] * GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ] Full List of Resources: * Container bundle set: httpd-bundle [pcmk:http]: * Replica[0] * httpd-bundle-ip-192.168.122.131 (ocf:heartbeat:IPaddr2): Started cluster01 =#=#=#= End test: Basic text output with inactive resources, filtered by bundled IP address resource - OK (0) =#=#=#= * Passed: crm_mon - Basic text output with inactive resources, filtered by bundled IP address resource =#=#=#= Begin test: XML output filtered by bundled IP address resource =#=#=#= =#=#=#= End test: XML output filtered by bundled IP address resource - OK (0) =#=#=#= * Passed: crm_mon - XML output filtered by bundled IP address resource =#=#=#= Begin test: Basic text output with inactive resources, filtered by bundled container =#=#=#= Cluster Summary: * Stack: corosync * Current DC: cluster02 (version) - partition with quorum * Last updated: * Last change: * 5 nodes configured * 32 resource instances configured (4 DISABLED) Node List: * Online: [ cluster01 cluster02 ] * GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ] Full List of Resources: * Container bundle set: httpd-bundle [pcmk:http]: * Replica[1] * httpd-bundle-docker-1 (ocf:heartbeat:docker): Started cluster02 =#=#=#= End test: Basic text output with inactive resources, filtered by bundled container - OK (0) =#=#=#= * Passed: crm_mon - Basic text output with inactive resources, filtered by bundled container =#=#=#= Begin test: XML output filtered by bundled container =#=#=#= =#=#=#= End test: XML output filtered by bundled container - OK (0) =#=#=#= * Passed: crm_mon - XML output filtered by bundled container =#=#=#= Begin test: Basic text output with inactive resources, filtered by bundle connection =#=#=#= Cluster Summary: * Stack: corosync * Current DC: cluster02 (version) - partition with quorum * Last updated: * Last change: * 5 nodes configured * 32 resource instances configured (4 DISABLED) Node List: * Online: [ cluster01 cluster02 ] * GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ] Full List of Resources: * Container bundle set: httpd-bundle [pcmk:http]: * Replica[0] * httpd-bundle-0 (ocf:pacemaker:remote): Started cluster01 =#=#=#= End test: Basic text output with inactive resources, filtered by bundle connection - OK (0) =#=#=#= * Passed: crm_mon - Basic text output with inactive resources, filtered by bundle connection =#=#=#= Begin test: XML output filtered by bundle connection =#=#=#= =#=#=#= End test: XML output filtered by bundle connection - OK (0) =#=#=#= * Passed: crm_mon - XML output filtered by bundle connection =#=#=#= Begin test: Basic text output with inactive resources, filtered by bundled primitive resource =#=#=#= Cluster Summary: * Stack: corosync * Current DC: cluster02 (version) - partition with quorum * Last updated: * Last change: * 5 nodes configured * 32 resource instances configured (4 DISABLED) Node List: * Online: [ cluster01 cluster02 ] * GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ] Full List of Resources: * Container bundle set: httpd-bundle [pcmk:http]: * Replica[0] * httpd (ocf:heartbeat:apache): Started httpd-bundle-0 * Replica[1] * httpd (ocf:heartbeat:apache): Started httpd-bundle-1 * Replica[2] * httpd (ocf:heartbeat:apache): Stopped =#=#=#= End test: Basic text output with inactive resources, filtered by bundled primitive resource - OK (0) =#=#=#= * Passed: crm_mon - Basic text output with inactive resources, filtered by bundled primitive resource =#=#=#= Begin test: XML output filtered by bundled primitive resource =#=#=#= =#=#=#= End test: XML output filtered by bundled primitive resource - OK (0) =#=#=#= * Passed: crm_mon - XML output filtered by bundled primitive resource =#=#=#= Begin test: Complete text output, filtered by clone name in cloned group =#=#=#= Cluster Summary: * Stack: corosync * Current DC: cluster02 (2) (version) - partition with quorum * Last updated: * Last change: * 5 nodes configured * 32 resource instances configured (4 DISABLED) Node List: * Node cluster01 (1): online, feature set <3.15.1 * Node cluster02 (2): online, feature set <3.15.1 * GuestNode httpd-bundle-0@cluster01: online * GuestNode httpd-bundle-1@cluster02: online * GuestNode httpd-bundle-2@: OFFLINE Active Resources: * Clone Set: mysql-clone-group [mysql-group]: * Resource Group: mysql-group:0: * mysql-proxy (lsb:mysql-proxy): Started cluster02 * Resource Group: mysql-group:1: * mysql-proxy (lsb:mysql-proxy): Started cluster01 Node Attributes: * Node: cluster01 (1): * location : office * pingd : 1000 * Node: cluster02 (2): * pingd : 1000 Operations: * Node: cluster02 (2): * mysql-proxy: migration-threshold=1000000: * (2) start * (3) monitor: interval="10000ms" * Node: cluster01 (1): * mysql-proxy: migration-threshold=1000000: * (2) start * (3) monitor: interval="10000ms" =#=#=#= End test: Complete text output, filtered by clone name in cloned group - OK (0) =#=#=#= * Passed: crm_mon - Complete text output, filtered by clone name in cloned group =#=#=#= Begin test: XML output, filtered by clone name in cloned group =#=#=#= =#=#=#= End test: XML output, filtered by clone name in cloned group - OK (0) =#=#=#= * Passed: crm_mon - XML output, filtered by clone name in cloned group =#=#=#= Begin test: Complete text output, filtered by group name in cloned group =#=#=#= Cluster Summary: * Stack: corosync * Current DC: cluster02 (2) (version) - partition with quorum * Last updated: * Last change: * 5 nodes configured * 32 resource instances configured (4 DISABLED) Node List: * Node cluster01 (1): online, feature set <3.15.1 * Node cluster02 (2): online, feature set <3.15.1 * GuestNode httpd-bundle-0@cluster01: online * GuestNode httpd-bundle-1@cluster02: online * GuestNode httpd-bundle-2@: OFFLINE Active Resources: * Clone Set: mysql-clone-group [mysql-group]: * Resource Group: mysql-group:0: * mysql-proxy (lsb:mysql-proxy): Started cluster02 * Resource Group: mysql-group:1: * mysql-proxy (lsb:mysql-proxy): Started cluster01 Node Attributes: * Node: cluster01 (1): * location : office * pingd : 1000 * Node: cluster02 (2): * pingd : 1000 Operations: * Node: cluster02 (2): * mysql-proxy: migration-threshold=1000000: * (2) start * (3) monitor: interval="10000ms" * Node: cluster01 (1): * mysql-proxy: migration-threshold=1000000: * (2) start * (3) monitor: interval="10000ms" =#=#=#= End test: Complete text output, filtered by group name in cloned group - OK (0) =#=#=#= * Passed: crm_mon - Complete text output, filtered by group name in cloned group =#=#=#= Begin test: XML output, filtered by group name in cloned group =#=#=#= =#=#=#= End test: XML output, filtered by group name in cloned group - OK (0) =#=#=#= * Passed: crm_mon - XML output, filtered by group name in cloned group =#=#=#= Begin test: Complete text output, filtered by exact group instance name in cloned group =#=#=#= Cluster Summary: * Stack: corosync * Current DC: cluster02 (2) (version) - partition with quorum * Last updated: * Last change: * 5 nodes configured * 32 resource instances configured (4 DISABLED) Node List: * Node cluster01 (1): online, feature set <3.15.1 * Node cluster02 (2): online, feature set <3.15.1 * GuestNode httpd-bundle-0@cluster01: online * GuestNode httpd-bundle-1@cluster02: online * GuestNode httpd-bundle-2@: OFFLINE Active Resources: * Clone Set: mysql-clone-group [mysql-group]: * Resource Group: mysql-group:1: * mysql-proxy (lsb:mysql-proxy): Started cluster01 Node Attributes: * Node: cluster01 (1): * location : office * pingd : 1000 * Node: cluster02 (2): * pingd : 1000 Operations: * Node: cluster02 (2): * mysql-proxy: migration-threshold=1000000: * (2) start * (3) monitor: interval="10000ms" * Node: cluster01 (1): * mysql-proxy: migration-threshold=1000000: * (2) start * (3) monitor: interval="10000ms" =#=#=#= End test: Complete text output, filtered by exact group instance name in cloned group - OK (0) =#=#=#= * Passed: crm_mon - Complete text output, filtered by exact group instance name in cloned group =#=#=#= Begin test: XML output, filtered by exact group instance name in cloned group =#=#=#= =#=#=#= End test: XML output, filtered by exact group instance name in cloned group - OK (0) =#=#=#= * Passed: crm_mon - XML output, filtered by exact group instance name in cloned group =#=#=#= Begin test: Complete text output, filtered by primitive name in cloned group =#=#=#= Cluster Summary: * Stack: corosync * Current DC: cluster02 (2) (version) - partition with quorum * Last updated: * Last change: * 5 nodes configured * 32 resource instances configured (4 DISABLED) Node List: * Node cluster01 (1): online, feature set <3.15.1 * Node cluster02 (2): online, feature set <3.15.1 * GuestNode httpd-bundle-0@cluster01: online * GuestNode httpd-bundle-1@cluster02: online * GuestNode httpd-bundle-2@: OFFLINE Active Resources: * Clone Set: mysql-clone-group [mysql-group]: * Resource Group: mysql-group:0: * mysql-proxy (lsb:mysql-proxy): Started cluster02 * Resource Group: mysql-group:1: * mysql-proxy (lsb:mysql-proxy): Started cluster01 Node Attributes: * Node: cluster01 (1): * location : office * pingd : 1000 * Node: cluster02 (2): * pingd : 1000 Operations: * Node: cluster02 (2): * mysql-proxy: migration-threshold=1000000: * (2) start * (3) monitor: interval="10000ms" * Node: cluster01 (1): * mysql-proxy: migration-threshold=1000000: * (2) start * (3) monitor: interval="10000ms" =#=#=#= End test: Complete text output, filtered by primitive name in cloned group - OK (0) =#=#=#= * Passed: crm_mon - Complete text output, filtered by primitive name in cloned group =#=#=#= Begin test: XML output, filtered by primitive name in cloned group =#=#=#= =#=#=#= End test: XML output, filtered by primitive name in cloned group - OK (0) =#=#=#= * Passed: crm_mon - XML output, filtered by primitive name in cloned group =#=#=#= Begin test: Complete text output, filtered by exact primitive instance name in cloned group =#=#=#= Cluster Summary: * Stack: corosync * Current DC: cluster02 (2) (version) - partition with quorum * Last updated: * Last change: * 5 nodes configured * 32 resource instances configured (4 DISABLED) Node List: * Node cluster01 (1): online, feature set <3.15.1 * Node cluster02 (2): online, feature set <3.15.1 * GuestNode httpd-bundle-0@cluster01: online * GuestNode httpd-bundle-1@cluster02: online * GuestNode httpd-bundle-2@: OFFLINE Active Resources: * Clone Set: mysql-clone-group [mysql-group]: * Resource Group: mysql-group:1: * mysql-proxy (lsb:mysql-proxy): Started cluster01 Node Attributes: * Node: cluster01 (1): * location : office * pingd : 1000 * Node: cluster02 (2): * pingd : 1000 Operations: * Node: cluster02 (2): * mysql-proxy: migration-threshold=1000000: * (2) start * (3) monitor: interval="10000ms" * Node: cluster01 (1): * mysql-proxy: migration-threshold=1000000: * (2) start * (3) monitor: interval="10000ms" =#=#=#= End test: Complete text output, filtered by exact primitive instance name in cloned group - OK (0) =#=#=#= * Passed: crm_mon - Complete text output, filtered by exact primitive instance name in cloned group =#=#=#= Begin test: XML output, filtered by exact primitive instance name in cloned group =#=#=#= =#=#=#= End test: XML output, filtered by exact primitive instance name in cloned group - OK (0) =#=#=#= * Passed: crm_mon - XML output, filtered by exact primitive instance name in cloned group =#=#=#= Begin test: Text output of partially active resources =#=#=#= -unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (unimplemented feature)| rc=3 id=dummy-2_last_failure_0 -unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (invalid parameter)| rc=2 id=httpd_last_failure_0 +unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (unimplemented feature) | dummy-2_last_failure_0 +unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (invalid parameter) | httpd_last_failure_0 Cluster Summary: * Stack: corosync * Current DC: cluster02 (2) (version) - partition with quorum * Last updated: * Last change: * 4 nodes configured * 16 resource instances configured (1 DISABLED) Node List: * Node cluster01 (1): online, feature set <3.15.1 * Node cluster02 (2): online, feature set <3.15.1 * GuestNode httpd-bundle-0@cluster02: online * GuestNode httpd-bundle-1@cluster01: online Active Resources: * Clone Set: ping-clone [ping]: * ping (ocf:pacemaker:ping): Started cluster01 * ping (ocf:pacemaker:ping): Stopped (not installed) * Fencing (stonith:fence_xvm): Started cluster01 * Container bundle set: httpd-bundle [pcmk:http]: * Replica[0] * httpd-bundle-ip-192.168.122.131 (ocf:heartbeat:IPaddr2): Started cluster02 * httpd (ocf:heartbeat:apache): Started httpd-bundle-0 * httpd-bundle-docker-0 (ocf:heartbeat:docker): Started cluster02 * httpd-bundle-0 (ocf:pacemaker:remote): Started cluster02 * Replica[1] * httpd-bundle-ip-192.168.122.132 (ocf:heartbeat:IPaddr2): Started cluster01 * httpd (ocf:heartbeat:apache): FAILED httpd-bundle-1 * httpd-bundle-docker-1 (ocf:heartbeat:docker): Started cluster01 * httpd-bundle-1 (ocf:pacemaker:remote): Started cluster01 * Resource Group: partially-active-group (2 members inactive): * dummy-1 (ocf:pacemaker:Dummy): Started cluster02 * dummy-2 (ocf:pacemaker:Dummy): FAILED cluster02 Failed Resource Actions: * dummy-2_monitor_0 on cluster02 'unimplemented feature' (3): call=2, status='complete', queued=0ms, exec=33ms =#=#=#= End test: Text output of partially active resources - OK (0) =#=#=#= * Passed: crm_mon - Text output of partially active resources =#=#=#= Begin test: XML output of partially active resources =#=#=#= -unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (unimplemented feature)| rc=3 id=dummy-2_last_failure_0 -unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (invalid parameter)| rc=2 id=httpd_last_failure_0 +unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (unimplemented feature) | dummy-2_last_failure_0 +unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (invalid parameter) | httpd_last_failure_0 =#=#=#= End test: XML output of partially active resources - OK (0) =#=#=#= * Passed: crm_mon - XML output of partially active resources =#=#=#= Begin test: Text output of partially active resources, with inactive resources =#=#=#= -unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (unimplemented feature)| rc=3 id=dummy-2_last_failure_0 -unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (invalid parameter)| rc=2 id=httpd_last_failure_0 +unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (unimplemented feature) | dummy-2_last_failure_0 +unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (invalid parameter) | httpd_last_failure_0 Cluster Summary: * Stack: corosync * Current DC: cluster02 (2) (version) - partition with quorum * Last updated: * Last change: * 4 nodes configured * 16 resource instances configured (1 DISABLED) Node List: * Node cluster01 (1): online, feature set <3.15.1 * Node cluster02 (2): online, feature set <3.15.1 * GuestNode httpd-bundle-0@cluster02: online * GuestNode httpd-bundle-1@cluster01: online Full List of Resources: * Clone Set: ping-clone [ping]: * ping (ocf:pacemaker:ping): Started cluster01 * ping (ocf:pacemaker:ping): Stopped (not installed) * Fencing (stonith:fence_xvm): Started cluster01 * Container bundle set: httpd-bundle [pcmk:http]: * Replica[0] * httpd-bundle-ip-192.168.122.131 (ocf:heartbeat:IPaddr2): Started cluster02 * httpd (ocf:heartbeat:apache): Started httpd-bundle-0 * httpd-bundle-docker-0 (ocf:heartbeat:docker): Started cluster02 * httpd-bundle-0 (ocf:pacemaker:remote): Started cluster02 * Replica[1] * httpd-bundle-ip-192.168.122.132 (ocf:heartbeat:IPaddr2): Started cluster01 * httpd (ocf:heartbeat:apache): FAILED httpd-bundle-1 * httpd-bundle-docker-1 (ocf:heartbeat:docker): Started cluster01 * httpd-bundle-1 (ocf:pacemaker:remote): Started cluster01 * Resource Group: partially-active-group: * dummy-1 (ocf:pacemaker:Dummy): Started cluster02 * dummy-2 (ocf:pacemaker:Dummy): FAILED cluster02 * dummy-3 (ocf:pacemaker:Dummy): Stopped (disabled) * dummy-4 (ocf:pacemaker:Dummy): Stopped (not installed) * smart-mon (ocf:pacemaker:HealthSMART): Stopped (not installed) Failed Resource Actions: * dummy-2_monitor_0 on cluster02 'unimplemented feature' (3): call=2, status='complete', queued=0ms, exec=33ms =#=#=#= End test: Text output of partially active resources, with inactive resources - OK (0) =#=#=#= * Passed: crm_mon - Text output of partially active resources, with inactive resources =#=#=#= Begin test: Complete brief text output, with inactive resources =#=#=#= -unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (unimplemented feature)| rc=3 id=dummy-2_last_failure_0 -unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (invalid parameter)| rc=2 id=httpd_last_failure_0 +unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (unimplemented feature) | dummy-2_last_failure_0 +unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (invalid parameter) | httpd_last_failure_0 Cluster Summary: * Stack: corosync * Current DC: cluster02 (2) (version) - partition with quorum * Last updated: * Last change: * 4 nodes configured * 16 resource instances configured (1 DISABLED) Node List: * Node cluster01 (1): online, feature set <3.15.1 * Node cluster02 (2): online, feature set <3.15.1 * GuestNode httpd-bundle-0@cluster02: online * GuestNode httpd-bundle-1@cluster01: online Full List of Resources: * 0/1 (ocf:pacemaker:HealthSMART): Active * 1/1 (stonith:fence_xvm): Active cluster01 * Clone Set: ping-clone [ping]: * ping (ocf:pacemaker:ping): Started cluster01 * ping (ocf:pacemaker:ping): Stopped (not installed) * Container bundle set: httpd-bundle [pcmk:http]: * Replica[0] * httpd-bundle-ip-192.168.122.131 (ocf:heartbeat:IPaddr2): Started cluster02 * httpd (ocf:heartbeat:apache): Started httpd-bundle-0 * httpd-bundle-docker-0 (ocf:heartbeat:docker): Started cluster02 * httpd-bundle-0 (ocf:pacemaker:remote): Started cluster02 * Replica[1] * httpd-bundle-ip-192.168.122.132 (ocf:heartbeat:IPaddr2): Started cluster01 * httpd (ocf:heartbeat:apache): FAILED httpd-bundle-1 * httpd-bundle-docker-1 (ocf:heartbeat:docker): Started cluster01 * httpd-bundle-1 (ocf:pacemaker:remote): Started cluster01 * Resource Group: partially-active-group: * 2/4 (ocf:pacemaker:Dummy): Active cluster02 Node Attributes: * Node: cluster01 (1): * pingd : 1000 * Node: cluster02 (2): * pingd : 1000 Operations: * Node: cluster02 (2): * httpd-bundle-ip-192.168.122.131: migration-threshold=1000000: * (2) start * (3) monitor: interval="60000ms" * httpd-bundle-docker-0: migration-threshold=1000000: * (2) start * (3) monitor: interval="60000ms" * httpd-bundle-0: migration-threshold=1000000: * (2) start * (3) monitor: interval="30000ms" * dummy-1: migration-threshold=1000000: * (2) start * dummy-2: migration-threshold=1000000: * (2) probe * dummy-4: migration-threshold=1000000: * (2) probe * smart-mon: migration-threshold=1000000: * (9) probe * ping: migration-threshold=1000000: * (6) probe * Node: cluster01 (1): * Fencing: migration-threshold=1000000: * (15) start * (20) monitor: interval="60000ms" * ping: migration-threshold=1000000: * (2) start * (3) monitor: interval="10000ms" * httpd-bundle-ip-192.168.122.132: migration-threshold=1000000: * (2) start * (3) monitor: interval="60000ms" * httpd-bundle-docker-1: migration-threshold=1000000: * (2) start * (3) monitor: interval="60000ms" * httpd-bundle-1: migration-threshold=1000000: * (2) start * (3) monitor: interval="30000ms" * Node: httpd-bundle-0@cluster02: * httpd: migration-threshold=1000000: * (1) start * Node: httpd-bundle-1@cluster01: * httpd: migration-threshold=1000000: * (1) probe Failed Resource Actions: * dummy-2_monitor_0 on cluster02 'unimplemented feature' (3): call=2, status='complete', queued=0ms, exec=33ms =#=#=#= End test: Complete brief text output, with inactive resources - OK (0) =#=#=#= * Passed: crm_mon - Complete brief text output, with inactive resources =#=#=#= Begin test: Text output of partially active group =#=#=#= -unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (unimplemented feature)| rc=3 id=dummy-2_last_failure_0 -unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (invalid parameter)| rc=2 id=httpd_last_failure_0 +unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (unimplemented feature) | dummy-2_last_failure_0 +unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (invalid parameter) | httpd_last_failure_0 Cluster Summary: * Stack: corosync * Current DC: cluster02 (version) - partition with quorum * Last updated: * Last change: * 4 nodes configured * 16 resource instances configured (1 DISABLED) Node List: * Online: [ cluster01 cluster02 ] * GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ] Active Resources: * Resource Group: partially-active-group (2 members inactive): * dummy-1 (ocf:pacemaker:Dummy): Started cluster02 * dummy-2 (ocf:pacemaker:Dummy): FAILED cluster02 =#=#=#= End test: Text output of partially active group - OK (0) =#=#=#= * Passed: crm_mon - Text output of partially active group =#=#=#= Begin test: Text output of partially active group, with inactive resources =#=#=#= -unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (unimplemented feature)| rc=3 id=dummy-2_last_failure_0 -unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (invalid parameter)| rc=2 id=httpd_last_failure_0 +unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (unimplemented feature) | dummy-2_last_failure_0 +unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (invalid parameter) | httpd_last_failure_0 Cluster Summary: * Stack: corosync * Current DC: cluster02 (version) - partition with quorum * Last updated: * Last change: * 4 nodes configured * 16 resource instances configured (1 DISABLED) Node List: * Online: [ cluster01 cluster02 ] * GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ] Full List of Resources: * Resource Group: partially-active-group: * dummy-1 (ocf:pacemaker:Dummy): Started cluster02 * dummy-2 (ocf:pacemaker:Dummy): FAILED cluster02 * dummy-3 (ocf:pacemaker:Dummy): Stopped (disabled) * dummy-4 (ocf:pacemaker:Dummy): Stopped (not installed) =#=#=#= End test: Text output of partially active group, with inactive resources - OK (0) =#=#=#= * Passed: crm_mon - Text output of partially active group, with inactive resources =#=#=#= Begin test: Text output of active member of partially active group =#=#=#= -unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (unimplemented feature)| rc=3 id=dummy-2_last_failure_0 -unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (invalid parameter)| rc=2 id=httpd_last_failure_0 +unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (unimplemented feature) | dummy-2_last_failure_0 +unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (invalid parameter) | httpd_last_failure_0 Cluster Summary: * Stack: corosync * Current DC: cluster02 (version) - partition with quorum * Last updated: * Last change: * 4 nodes configured * 16 resource instances configured (1 DISABLED) Node List: * Online: [ cluster01 cluster02 ] * GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ] Active Resources: * Resource Group: partially-active-group (2 members inactive): * dummy-1 (ocf:pacemaker:Dummy): Started cluster02 =#=#=#= End test: Text output of active member of partially active group - OK (0) =#=#=#= * Passed: crm_mon - Text output of active member of partially active group =#=#=#= Begin test: Text output of inactive member of partially active group =#=#=#= -unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (unimplemented feature)| rc=3 id=dummy-2_last_failure_0 -unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (invalid parameter)| rc=2 id=httpd_last_failure_0 +unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (unimplemented feature) | dummy-2_last_failure_0 +unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (invalid parameter) | httpd_last_failure_0 Cluster Summary: * Stack: corosync * Current DC: cluster02 (2) (version) - partition with quorum * Last updated: * Last change: * 4 nodes configured * 16 resource instances configured (1 DISABLED) Node List: * Node cluster01 (1): online, feature set <3.15.1 * Node cluster02 (2): online, feature set <3.15.1 * GuestNode httpd-bundle-0@cluster02: online * GuestNode httpd-bundle-1@cluster01: online Active Resources: * Resource Group: partially-active-group (2 members inactive): * dummy-2 (ocf:pacemaker:Dummy): FAILED cluster02 Failed Resource Actions: * dummy-2_monitor_0 on cluster02 'unimplemented feature' (3): call=2, status='complete', queued=0ms, exec=33ms =#=#=#= End test: Text output of inactive member of partially active group - OK (0) =#=#=#= * Passed: crm_mon - Text output of inactive member of partially active group =#=#=#= Begin test: Complete brief text output grouped by node, with inactive resources =#=#=#= -unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (unimplemented feature)| rc=3 id=dummy-2_last_failure_0 -unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (invalid parameter)| rc=2 id=httpd_last_failure_0 +unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (unimplemented feature) | dummy-2_last_failure_0 +unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (invalid parameter) | httpd_last_failure_0 Cluster Summary: * Stack: corosync * Current DC: cluster02 (2) (version) - partition with quorum * Last updated: * Last change: * 4 nodes configured * 16 resource instances configured (1 DISABLED) Node List: * Node cluster01 (1): online, feature set <3.15.1: * Resources: * 1 (ocf:heartbeat:IPaddr2): Active * 1 (ocf:heartbeat:docker): Active * 1 (ocf:pacemaker:ping): Active * 1 (ocf:pacemaker:remote): Active * 1 (stonith:fence_xvm): Active * Node cluster02 (2): online, feature set <3.15.1: * Resources: * 1 (ocf:heartbeat:IPaddr2): Active * 1 (ocf:heartbeat:docker): Active * 2 (ocf:pacemaker:Dummy): Active * 1 (ocf:pacemaker:remote): Active * GuestNode httpd-bundle-0@cluster02: online: * Resources: * 1 (ocf:heartbeat:apache): Active * GuestNode httpd-bundle-1@cluster01: online: * Resources: * 1 (ocf:heartbeat:apache): Active Inactive Resources: * Clone Set: ping-clone [ping]: * ping (ocf:pacemaker:ping): Started cluster01 * ping (ocf:pacemaker:ping): Stopped (not installed) * Resource Group: partially-active-group: * 2/4 (ocf:pacemaker:Dummy): Active cluster02 * smart-mon (ocf:pacemaker:HealthSMART): Stopped (not installed) Node Attributes: * Node: cluster01 (1): * pingd : 1000 * Node: cluster02 (2): * pingd : 1000 Operations: * Node: cluster02 (2): * httpd-bundle-ip-192.168.122.131: migration-threshold=1000000: * (2) start * (3) monitor: interval="60000ms" * httpd-bundle-docker-0: migration-threshold=1000000: * (2) start * (3) monitor: interval="60000ms" * httpd-bundle-0: migration-threshold=1000000: * (2) start * (3) monitor: interval="30000ms" * dummy-1: migration-threshold=1000000: * (2) start * dummy-2: migration-threshold=1000000: * (2) probe * dummy-4: migration-threshold=1000000: * (2) probe * smart-mon: migration-threshold=1000000: * (9) probe * ping: migration-threshold=1000000: * (6) probe * Node: cluster01 (1): * Fencing: migration-threshold=1000000: * (15) start * (20) monitor: interval="60000ms" * ping: migration-threshold=1000000: * (2) start * (3) monitor: interval="10000ms" * httpd-bundle-ip-192.168.122.132: migration-threshold=1000000: * (2) start * (3) monitor: interval="60000ms" * httpd-bundle-docker-1: migration-threshold=1000000: * (2) start * (3) monitor: interval="60000ms" * httpd-bundle-1: migration-threshold=1000000: * (2) start * (3) monitor: interval="30000ms" * Node: httpd-bundle-0@cluster02: * httpd: migration-threshold=1000000: * (1) start * Node: httpd-bundle-1@cluster01: * httpd: migration-threshold=1000000: * (1) probe Failed Resource Actions: * dummy-2_monitor_0 on cluster02 'unimplemented feature' (3): call=2, status='complete', queued=0ms, exec=33ms =#=#=#= End test: Complete brief text output grouped by node, with inactive resources - OK (0) =#=#=#= * Passed: crm_mon - Complete brief text output grouped by node, with inactive resources =#=#=#= Begin test: Text output of partially active resources, with inactive resources, filtered by node =#=#=#= -unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (unimplemented feature)| rc=3 id=dummy-2_last_failure_0 -unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (invalid parameter)| rc=2 id=httpd_last_failure_0 +unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (unimplemented feature) | dummy-2_last_failure_0 +unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (invalid parameter) | httpd_last_failure_0 Cluster Summary: * Stack: corosync * Current DC: cluster02 (version) - partition with quorum * Last updated: * Last change: * 4 nodes configured * 16 resource instances configured (1 DISABLED) Node List: * Online: [ cluster01 ] Full List of Resources: * Clone Set: ping-clone [ping]: * Started: [ cluster01 ] * Fencing (stonith:fence_xvm): Started cluster01 * Container bundle set: httpd-bundle [pcmk:http]: * httpd-bundle-1 (192.168.122.132) (ocf:heartbeat:apache): FAILED cluster01 * smart-mon (ocf:pacemaker:HealthSMART): Stopped (not installed) =#=#=#= End test: Text output of partially active resources, with inactive resources, filtered by node - OK (0) =#=#=#= * Passed: crm_mon - Text output of partially active resources, with inactive resources, filtered by node =#=#=#= Begin test: Text output of partially active resources, filtered by node =#=#=#= -unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (unimplemented feature)| rc=3 id=dummy-2_last_failure_0 -unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (invalid parameter)| rc=2 id=httpd_last_failure_0 +unpack_rsc_op error: Preventing dummy-2 from restarting on cluster02 because of hard failure (unimplemented feature) | dummy-2_last_failure_0 +unpack_rsc_op error: Preventing httpd-bundle-clone from restarting on httpd-bundle-1 because of hard failure (invalid parameter) | httpd_last_failure_0 =#=#=#= End test: Text output of partially active resources, filtered by node - OK (0) =#=#=#= * Passed: crm_mon - Text output of partially active resources, filtered by node =#=#=#= Begin test: Text output of active unmanaged resource on offline node =#=#=#= Cluster Summary: * Stack: corosync * Current DC: cluster02 (version) - partition with quorum * Last updated: * Last change: * 2 nodes configured * 3 resource instances configured *** Resource management is DISABLED *** The cluster will not attempt to start, stop or recover services Node List: * Online: [ cluster01 ] * OFFLINE: [ cluster02 ] Active Resources: * Fencing (stonith:fence_xvm): Started cluster01 (maintenance) * rsc1 (ocf:pacemaker:Dummy): Started cluster01 (maintenance) * rsc2 (ocf:pacemaker:Dummy): Started cluster02 (maintenance) =#=#=#= End test: Text output of active unmanaged resource on offline node - OK (0) =#=#=#= * Passed: crm_mon - Text output of active unmanaged resource on offline node =#=#=#= Begin test: XML output of active unmanaged resource on offline node =#=#=#= =#=#=#= End test: XML output of active unmanaged resource on offline node - OK (0) =#=#=#= * Passed: crm_mon - XML output of active unmanaged resource on offline node =#=#=#= Begin test: Brief text output of active unmanaged resource on offline node =#=#=#= Cluster Summary: * Stack: corosync * Current DC: cluster02 (version) - partition with quorum * Last updated: * Last change: * 2 nodes configured * 3 resource instances configured *** Resource management is DISABLED *** The cluster will not attempt to start, stop or recover services Node List: * Online: [ cluster01 ] * OFFLINE: [ cluster02 ] Active Resources: * 1 (ocf:pacemaker:Dummy): Active cluster01 * 1 (ocf:pacemaker:Dummy): Active cluster02 * 1 (stonith:fence_xvm): Active cluster01 =#=#=#= End test: Brief text output of active unmanaged resource on offline node - OK (0) =#=#=#= * Passed: crm_mon - Brief text output of active unmanaged resource on offline node =#=#=#= Begin test: Brief text output of active unmanaged resource on offline node, grouped by node =#=#=#= Cluster Summary: * Stack: corosync * Current DC: cluster02 (version) - partition with quorum * Last updated: * Last change: * 2 nodes configured * 3 resource instances configured *** Resource management is DISABLED *** The cluster will not attempt to start, stop or recover services Node List: * Node cluster01: online: * Resources: * 1 (ocf:pacemaker:Dummy): Active * 1 (stonith:fence_xvm): Active * Node cluster02: OFFLINE: * Resources: * 1 (ocf:pacemaker:Dummy): Active =#=#=#= End test: Brief text output of active unmanaged resource on offline node, grouped by node - OK (0) =#=#=#= * Passed: crm_mon - Brief text output of active unmanaged resource on offline node, grouped by node =#=#=#= Begin test: Text output of all resources with maintenance-mode enabled =#=#=#= Cluster Summary: * Stack: corosync * Current DC: cluster02 (version) - partition with quorum * Last updated: * Last change: * 5 nodes configured * 32 resource instances configured (4 DISABLED) *** Resource management is DISABLED *** The cluster will not attempt to start, stop or recover services Node List: * GuestNode httpd-bundle-0: maintenance * GuestNode httpd-bundle-1: maintenance * Online: [ cluster01 cluster02 ] Full List of Resources: * Clone Set: ping-clone [ping] (maintenance): * ping (ocf:pacemaker:ping): Started cluster02 (maintenance) * ping (ocf:pacemaker:ping): Started cluster01 (maintenance) * Fencing (stonith:fence_xvm): Started cluster01 (maintenance) * dummy (ocf:pacemaker:Dummy): Started cluster02 (maintenance) * Clone Set: inactive-clone [inactive-dhcpd] (disabled, maintenance): * Stopped (disabled): [ cluster01 cluster02 ] * Resource Group: inactive-group (disabled, maintenance): * inactive-dummy-1 (ocf:pacemaker:Dummy): Stopped (disabled, maintenance) * inactive-dummy-2 (ocf:pacemaker:Dummy): Stopped (disabled, maintenance) * Container bundle set: httpd-bundle [pcmk:http] (maintenance): * httpd-bundle-0 (192.168.122.131) (ocf:heartbeat:apache): Started cluster01 (maintenance) * httpd-bundle-1 (192.168.122.132) (ocf:heartbeat:apache): Started cluster02 (maintenance) * httpd-bundle-2 (192.168.122.133) (ocf:heartbeat:apache): Stopped (maintenance) * Resource Group: exim-group (maintenance): * Public-IP (ocf:heartbeat:IPaddr): Started cluster02 (maintenance) * Email (lsb:exim): Started cluster02 (maintenance) * Clone Set: mysql-clone-group [mysql-group] (maintenance): * Resource Group: mysql-group:0 (maintenance): * mysql-proxy (lsb:mysql-proxy): Started cluster02 (maintenance) * Resource Group: mysql-group:1 (maintenance): * mysql-proxy (lsb:mysql-proxy): Started cluster01 (maintenance) * Clone Set: promotable-clone [promotable-rsc] (promotable, maintenance): * promotable-rsc (ocf:pacemaker:Stateful): Promoted cluster02 (maintenance) * promotable-rsc (ocf:pacemaker:Stateful): Unpromoted cluster01 (maintenance) =#=#=#= End test: Text output of all resources with maintenance-mode enabled - OK (0) =#=#=#= * Passed: crm_mon - Text output of all resources with maintenance-mode enabled =#=#=#= Begin test: XML output of all resources with maintenance-mode enabled =#=#=#= =#=#=#= End test: XML output of all resources with maintenance-mode enabled - OK (0) =#=#=#= * Passed: crm_mon - XML output of all resources with maintenance-mode enabled =#=#=#= Begin test: Text output of all resources with maintenance enabled for a node =#=#=#= Cluster Summary: * Stack: corosync * Current DC: cluster02 (version) - partition with quorum * Last updated: * Last change: * 5 nodes configured * 32 resource instances configured (4 DISABLED) Node List: * Node cluster02: maintenance * GuestNode httpd-bundle-1: maintenance * Online: [ cluster01 ] * GuestOnline: [ httpd-bundle-0 ] Full List of Resources: * Clone Set: ping-clone [ping]: * ping (ocf:pacemaker:ping): Started cluster02 (maintenance) * Started: [ cluster01 ] * Fencing (stonith:fence_xvm): Started cluster01 * dummy (ocf:pacemaker:Dummy): Started cluster02 (maintenance) * Clone Set: inactive-clone [inactive-dhcpd] (disabled): * Stopped (disabled): [ cluster01 cluster02 ] * Resource Group: inactive-group (disabled): * inactive-dummy-1 (ocf:pacemaker:Dummy): Stopped (disabled) * inactive-dummy-2 (ocf:pacemaker:Dummy): Stopped (disabled) * Container bundle set: httpd-bundle [pcmk:http]: * httpd-bundle-0 (192.168.122.131) (ocf:heartbeat:apache): Started cluster01 * httpd-bundle-1 (192.168.122.132) (ocf:heartbeat:apache): Started cluster02 (maintenance) * httpd-bundle-2 (192.168.122.133) (ocf:heartbeat:apache): Stopped * Resource Group: exim-group: * Public-IP (ocf:heartbeat:IPaddr): Started cluster02 (maintenance) * Email (lsb:exim): Started cluster02 (maintenance) * Clone Set: mysql-clone-group [mysql-group]: * Resource Group: mysql-group:0: * mysql-proxy (lsb:mysql-proxy): Started cluster02 (maintenance) * Started: [ cluster01 ] * Clone Set: promotable-clone [promotable-rsc] (promotable): * promotable-rsc (ocf:pacemaker:Stateful): Promoted cluster02 (maintenance) * Unpromoted: [ cluster01 ] =#=#=#= End test: Text output of all resources with maintenance enabled for a node - OK (0) =#=#=#= * Passed: crm_mon - Text output of all resources with maintenance enabled for a node =#=#=#= Begin test: XML output of all resources with maintenance enabled for a node =#=#=#= =#=#=#= End test: XML output of all resources with maintenance enabled for a node - OK (0) =#=#=#= * Passed: crm_mon - XML output of all resources with maintenance enabled for a node =#=#=#= Begin test: Text output of all resources with maintenance meta attribute true =#=#=#= Cluster Summary: * Stack: corosync * Current DC: cluster02 (version) - partition with quorum * Last updated: * Last change: * 5 nodes configured * 32 resource instances configured (4 DISABLED) Node List: * GuestNode httpd-bundle-0: maintenance * GuestNode httpd-bundle-1: maintenance * Online: [ cluster01 cluster02 ] Full List of Resources: * Clone Set: ping-clone [ping] (maintenance): * ping (ocf:pacemaker:ping): Started cluster02 (maintenance) * ping (ocf:pacemaker:ping): Started cluster01 (maintenance) * Fencing (stonith:fence_xvm): Started cluster01 * dummy (ocf:pacemaker:Dummy): Started cluster02 (maintenance) * Clone Set: inactive-clone [inactive-dhcpd] (disabled, maintenance): * Stopped (disabled): [ cluster01 cluster02 ] * Resource Group: inactive-group (disabled, maintenance): * inactive-dummy-1 (ocf:pacemaker:Dummy): Stopped (disabled, maintenance) * inactive-dummy-2 (ocf:pacemaker:Dummy): Stopped (disabled, maintenance) * Container bundle set: httpd-bundle [pcmk:http] (maintenance): * httpd-bundle-0 (192.168.122.131) (ocf:heartbeat:apache): Started cluster01 (maintenance) * httpd-bundle-1 (192.168.122.132) (ocf:heartbeat:apache): Started cluster02 (maintenance) * httpd-bundle-2 (192.168.122.133) (ocf:heartbeat:apache): Stopped (maintenance) * Resource Group: exim-group (maintenance): * Public-IP (ocf:heartbeat:IPaddr): Started cluster02 (maintenance) * Email (lsb:exim): Started cluster02 (maintenance) * Clone Set: mysql-clone-group [mysql-group] (maintenance): * Resource Group: mysql-group:0 (maintenance): * mysql-proxy (lsb:mysql-proxy): Started cluster02 (maintenance) * Resource Group: mysql-group:1 (maintenance): * mysql-proxy (lsb:mysql-proxy): Started cluster01 (maintenance) * Clone Set: promotable-clone [promotable-rsc] (promotable, maintenance): * promotable-rsc (ocf:pacemaker:Stateful): Promoted cluster02 (maintenance) * promotable-rsc (ocf:pacemaker:Stateful): Unpromoted cluster01 (maintenance) =#=#=#= End test: Text output of all resources with maintenance meta attribute true - OK (0) =#=#=#= * Passed: crm_mon - Text output of all resources with maintenance meta attribute true =#=#=#= Begin test: XML output of all resources with maintenance meta attribute true =#=#=#= =#=#=#= End test: XML output of all resources with maintenance meta attribute true - OK (0) =#=#=#= * Passed: crm_mon - XML output of all resources with maintenance meta attribute true =#=#=#= Begin test: Text output of guest node's container on different node from its remote resource =#=#=#= Cluster Summary: * Stack: corosync * Current DC: cent7-host2 (version) - partition with quorum * Last updated: * Last change: * 4 nodes configured * 10 resource instances configured Node List: * Online: [ cent7-host1 cent7-host2 ] * GuestOnline: [ httpd-bundle1-0 httpd-bundle2-0 ] Active Resources: * Resource Group: group1: * dummy1 (ocf:pacemaker:Dummy): Started cent7-host1 * Resource Group: group2: * dummy2 (ocf:pacemaker:Dummy): Started cent7-host2 * Container bundle: httpd-bundle1 [pcmktest:http]: * httpd-bundle1-0 (192.168.20.188) (ocf:heartbeat:apache): Started cent7-host1 * Container bundle: httpd-bundle2 [pcmktest:http]: * httpd-bundle2-0 (192.168.20.190) (ocf:heartbeat:apache): Started cent7-host2 =#=#=#= End test: Text output of guest node's container on different node from its remote resource - OK (0) =#=#=#= * Passed: crm_mon - Text output of guest node's container on different node from its remote resource =#=#=#= Begin test: Complete text output of guest node's container on different node from its remote resource =#=#=#= Cluster Summary: * Stack: corosync * Current DC: cent7-host2 (3232262829) (version) - partition with quorum * Last updated: * Last change: * 4 nodes configured * 10 resource instances configured Node List: * Node cent7-host1 (3232262828): online, feature set <3.15.1 * Node cent7-host2 (3232262829): online, feature set <3.15.1 * GuestNode httpd-bundle1-0@cent7-host1: online * GuestNode httpd-bundle2-0@cent7-host2: online Active Resources: * Resource Group: group1: * dummy1 (ocf:pacemaker:Dummy): Started cent7-host1 * Resource Group: group2: * dummy2 (ocf:pacemaker:Dummy): Started cent7-host2 * Container bundle: httpd-bundle1 [pcmktest:http]: * httpd-bundle1-ip-192.168.20.188 (ocf:heartbeat:IPaddr2): Started cent7-host1 * httpd1 (ocf:heartbeat:apache): Started httpd-bundle1-0 * httpd-bundle1-docker-0 (ocf:heartbeat:docker): Started cent7-host1 * httpd-bundle1-0 (ocf:pacemaker:remote): Started cent7-host2 * Container bundle: httpd-bundle2 [pcmktest:http]: * httpd-bundle2-ip-192.168.20.190 (ocf:heartbeat:IPaddr2): Started cent7-host2 * httpd2 (ocf:heartbeat:apache): Started httpd-bundle2-0 * httpd-bundle2-docker-0 (ocf:heartbeat:docker): Started cent7-host2 * httpd-bundle2-0 (ocf:pacemaker:remote): Started cent7-host2 =#=#=#= End test: Complete text output of guest node's container on different node from its remote resource - OK (0) =#=#=#= * Passed: crm_mon - Complete text output of guest node's container on different node from its remote resource diff --git a/cts/scheduler/xml/rec-rsc-9.xml b/cts/scheduler/xml/rec-rsc-9.xml index 74c56bad29..a0cb8d6db7 100644 --- a/cts/scheduler/xml/rec-rsc-9.xml +++ b/cts/scheduler/xml/rec-rsc-9.xml @@ -1,63 +1,63 @@ - + diff --git a/include/crm/pengine/internal.h b/include/crm/pengine/internal.h index 1eb37d051a..b70ddc8716 100644 --- a/include/crm/pengine/internal.h +++ b/include/crm/pengine/internal.h @@ -1,713 +1,739 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PE_INTERNAL__H # define PE_INTERNAL__H # include # include +# include # include # include # include # include # include const char *pe__resource_description(const pe_resource_t *rsc, uint32_t show_opts); enum pe__clone_flags { // Whether instances should be started sequentially pe__clone_ordered = (1 << 0), // Whether promotion scores have been added pe__clone_promotion_added = (1 << 1), // Whether promotion constraints have been added pe__clone_promotion_constrained = (1 << 2), }; bool pe__clone_is_ordered(const pe_resource_t *clone); int pe__set_clone_flag(pe_resource_t *clone, enum pe__clone_flags flag); enum pe__group_flags { pe__group_ordered = (1 << 0), // Members start sequentially pe__group_colocated = (1 << 1), // Members must be on same node }; bool pe__group_flag_is_set(const pe_resource_t *group, uint32_t flags); pe_resource_t *pe__last_group_member(const pe_resource_t *group); # define pe_rsc_info(rsc, fmt, args...) crm_log_tag(LOG_INFO, rsc ? rsc->id : "", fmt, ##args) # define pe_rsc_debug(rsc, fmt, args...) crm_log_tag(LOG_DEBUG, rsc ? rsc->id : "", fmt, ##args) # define pe_rsc_trace(rsc, fmt, args...) crm_log_tag(LOG_TRACE, rsc ? rsc->id : "", fmt, ##args) # define pe_err(fmt...) do { \ was_processing_error = TRUE; \ pcmk__config_err(fmt); \ } while (0) # define pe_warn(fmt...) do { \ was_processing_warning = TRUE; \ pcmk__config_warn(fmt); \ } while (0) # define pe_proc_err(fmt...) { was_processing_error = TRUE; crm_err(fmt); } # define pe_proc_warn(fmt...) { was_processing_warning = TRUE; crm_warn(fmt); } #define pe__set_working_set_flags(working_set, flags_to_set) do { \ (working_set)->flags = pcmk__set_flags_as(__func__, __LINE__, \ LOG_TRACE, "Working set", crm_system_name, \ (working_set)->flags, (flags_to_set), #flags_to_set); \ } while (0) #define pe__clear_working_set_flags(working_set, flags_to_clear) do { \ (working_set)->flags = pcmk__clear_flags_as(__func__, __LINE__, \ LOG_TRACE, "Working set", crm_system_name, \ (working_set)->flags, (flags_to_clear), #flags_to_clear); \ } while (0) #define pe__set_resource_flags(resource, flags_to_set) do { \ (resource)->flags = pcmk__set_flags_as(__func__, __LINE__, \ LOG_TRACE, "Resource", (resource)->id, (resource)->flags, \ (flags_to_set), #flags_to_set); \ } while (0) #define pe__clear_resource_flags(resource, flags_to_clear) do { \ (resource)->flags = pcmk__clear_flags_as(__func__, __LINE__, \ LOG_TRACE, "Resource", (resource)->id, (resource)->flags, \ (flags_to_clear), #flags_to_clear); \ } while (0) #define pe__set_action_flags(action, flags_to_set) do { \ (action)->flags = pcmk__set_flags_as(__func__, __LINE__, \ LOG_TRACE, \ "Action", (action)->uuid, \ (action)->flags, \ (flags_to_set), \ #flags_to_set); \ } while (0) #define pe__clear_action_flags(action, flags_to_clear) do { \ (action)->flags = pcmk__clear_flags_as(__func__, __LINE__, \ LOG_TRACE, \ "Action", (action)->uuid, \ (action)->flags, \ (flags_to_clear), \ #flags_to_clear); \ } while (0) #define pe__set_raw_action_flags(action_flags, action_name, flags_to_set) do { \ action_flags = pcmk__set_flags_as(__func__, __LINE__, \ LOG_TRACE, "Action", action_name, \ (action_flags), \ (flags_to_set), #flags_to_set); \ } while (0) #define pe__clear_raw_action_flags(action_flags, action_name, flags_to_clear) do { \ action_flags = pcmk__clear_flags_as(__func__, __LINE__, \ LOG_TRACE, \ "Action", action_name, \ (action_flags), \ (flags_to_clear), \ #flags_to_clear); \ } while (0) #define pe__set_action_flags_as(function, line, action, flags_to_set) do { \ (action)->flags = pcmk__set_flags_as((function), (line), \ LOG_TRACE, \ "Action", (action)->uuid, \ (action)->flags, \ (flags_to_set), \ #flags_to_set); \ } while (0) #define pe__clear_action_flags_as(function, line, action, flags_to_clear) do { \ (action)->flags = pcmk__clear_flags_as((function), (line), \ LOG_TRACE, \ "Action", (action)->uuid, \ (action)->flags, \ (flags_to_clear), \ #flags_to_clear); \ } while (0) #define pe__set_order_flags(order_flags, flags_to_set) do { \ order_flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \ "Ordering", "constraint", \ order_flags, (flags_to_set), \ #flags_to_set); \ } while (0) #define pe__clear_order_flags(order_flags, flags_to_clear) do { \ order_flags = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \ "Ordering", "constraint", \ order_flags, (flags_to_clear), \ #flags_to_clear); \ } while (0) // Some warnings we don't want to print every transition enum pe_warn_once_e { pe_wo_blind = (1 << 0), pe_wo_restart_type = (1 << 1), pe_wo_role_after = (1 << 2), pe_wo_poweroff = (1 << 3), pe_wo_require_all = (1 << 4), pe_wo_order_score = (1 << 5), pe_wo_neg_threshold = (1 << 6), pe_wo_remove_after = (1 << 7), pe_wo_ping_node = (1 << 8), pe_wo_order_inst = (1 << 9), pe_wo_coloc_inst = (1 << 10), pe_wo_group_order = (1 << 11), pe_wo_group_coloc = (1 << 12), }; extern uint32_t pe_wo; #define pe_warn_once(pe_wo_bit, fmt...) do { \ if (!pcmk_is_set(pe_wo, pe_wo_bit)) { \ if (pe_wo_bit == pe_wo_blind) { \ crm_warn(fmt); \ } else { \ pe_warn(fmt); \ } \ pe_wo = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \ "Warn-once", "logging", pe_wo, \ (pe_wo_bit), #pe_wo_bit); \ } \ } while (0); typedef struct pe__location_constraint_s { char *id; // Constraint XML ID pe_resource_t *rsc_lh; // Resource being located enum rsc_role_e role_filter; // Role to locate enum pe_discover_e discover_mode; // Resource discovery GList *node_list_rh; // List of pe_node_t* } pe__location_t; typedef struct pe__order_constraint_s { int id; uint32_t flags; // Group of enum pe_ordering flags void *lh_opaque; pe_resource_t *lh_rsc; pe_action_t *lh_action; char *lh_action_task; void *rh_opaque; pe_resource_t *rh_rsc; pe_action_t *rh_action; char *rh_action_task; } pe__ordering_t; typedef struct notify_data_s { GSList *keys; // Environment variable name/value pairs const char *action; pe_action_t *pre; pe_action_t *post; pe_action_t *pre_done; pe_action_t *post_done; GList *active; /* notify_entry_t* */ GList *inactive; /* notify_entry_t* */ GList *start; /* notify_entry_t* */ GList *stop; /* notify_entry_t* */ GList *demote; /* notify_entry_t* */ GList *promote; /* notify_entry_t* */ GList *promoted; /* notify_entry_t* */ GList *unpromoted; /* notify_entry_t* */ GHashTable *allowed_nodes; } notify_data_t; const pe_resource_t *pe__const_top_resource(const pe_resource_t *rsc, bool include_bundle); int pe__clone_max(const pe_resource_t *clone); int pe__clone_promoted_max(const pe_resource_t *clone); int pe__clone_promoted_node_max(const pe_resource_t *clone); pe_action_t *pe__new_rsc_pseudo_action(pe_resource_t *rsc, const char *task, bool optional, bool runnable); void pe__create_promotable_pseudo_ops(pe_resource_t *clone, bool any_promoting, bool any_demoting); bool pe_can_fence(const pe_working_set_t *data_set, const pe_node_t *node); void add_hash_param(GHashTable * hash, const char *name, const char *value); char *native_parameter(pe_resource_t * rsc, pe_node_t * node, gboolean create, const char *name, pe_working_set_t * data_set); pe_node_t *native_location(const pe_resource_t *rsc, GList **list, int current); void pe_metadata(pcmk__output_t *out); void verify_pe_options(GHashTable * options); void common_update_score(pe_resource_t * rsc, const char *id, int score); void native_add_running(pe_resource_t * rsc, pe_node_t * node, pe_working_set_t * data_set, gboolean failed); gboolean native_unpack(pe_resource_t * rsc, pe_working_set_t * data_set); gboolean group_unpack(pe_resource_t * rsc, pe_working_set_t * data_set); gboolean clone_unpack(pe_resource_t * rsc, pe_working_set_t * data_set); gboolean pe__unpack_bundle(pe_resource_t *rsc, pe_working_set_t *data_set); pe_resource_t *native_find_rsc(pe_resource_t *rsc, const char *id, const pe_node_t *node, int flags); gboolean native_active(pe_resource_t * rsc, gboolean all); gboolean group_active(pe_resource_t * rsc, gboolean all); gboolean clone_active(pe_resource_t * rsc, gboolean all); gboolean pe__bundle_active(pe_resource_t *rsc, gboolean all); //! \deprecated This function will be removed in a future release void native_print(pe_resource_t *rsc, const char *pre_text, long options, void *print_data); //! \deprecated This function will be removed in a future release void group_print(pe_resource_t *rsc, const char *pre_text, long options, void *print_data); //! \deprecated This function will be removed in a future release void clone_print(pe_resource_t *rsc, const char *pre_text, long options, void *print_data); //! \deprecated This function will be removed in a future release void pe__print_bundle(pe_resource_t *rsc, const char *pre_text, long options, void *print_data); gchar *pcmk__native_output_string(const pe_resource_t *rsc, const char *name, const pe_node_t *node, uint32_t show_opts, const char *target_role, bool show_nodes); int pe__name_and_nvpairs_xml(pcmk__output_t *out, bool is_list, const char *tag_name , size_t pairs_count, ...); char *pe__node_display_name(pe_node_t *node, bool print_detail); // Clone notifications (pe_notif.c) void pe__create_notifications(pe_resource_t *rsc, notify_data_t *n_data); notify_data_t *pe__clone_notif_pseudo_ops(pe_resource_t *rsc, const char *task, pe_action_t *action, pe_action_t *complete); void pe__free_notification_data(notify_data_t *n_data); void pe__order_notifs_after_fencing(const pe_action_t *action, pe_resource_t *rsc, pe_action_t *stonith_op); static inline const char * pe__rsc_bool_str(const pe_resource_t *rsc, uint64_t rsc_flag) { return pcmk__btoa(pcmk_is_set(rsc->flags, rsc_flag)); } int pe__clone_xml(pcmk__output_t *out, va_list args); int pe__clone_default(pcmk__output_t *out, va_list args); int pe__group_xml(pcmk__output_t *out, va_list args); int pe__group_default(pcmk__output_t *out, va_list args); int pe__bundle_xml(pcmk__output_t *out, va_list args); int pe__bundle_html(pcmk__output_t *out, va_list args); int pe__bundle_text(pcmk__output_t *out, va_list args); int pe__node_html(pcmk__output_t *out, va_list args); int pe__node_text(pcmk__output_t *out, va_list args); int pe__node_xml(pcmk__output_t *out, va_list args); int pe__resource_xml(pcmk__output_t *out, va_list args); int pe__resource_html(pcmk__output_t *out, va_list args); int pe__resource_text(pcmk__output_t *out, va_list args); void native_free(pe_resource_t * rsc); void group_free(pe_resource_t * rsc); void clone_free(pe_resource_t * rsc); void pe__free_bundle(pe_resource_t *rsc); enum rsc_role_e native_resource_state(const pe_resource_t * rsc, gboolean current); enum rsc_role_e group_resource_state(const pe_resource_t * rsc, gboolean current); enum rsc_role_e clone_resource_state(const pe_resource_t * rsc, gboolean current); enum rsc_role_e pe__bundle_resource_state(const pe_resource_t *rsc, gboolean current); void pe__count_common(pe_resource_t *rsc); void pe__count_bundle(pe_resource_t *rsc); void common_free(pe_resource_t * rsc); pe_node_t *pe__copy_node(const pe_node_t *this_node); extern time_t get_effective_time(pe_working_set_t * data_set); /* Failure handling utilities (from failcounts.c) */ // bit flags for fail count handling options enum pe_fc_flags_e { pe_fc_default = (1 << 0), pe_fc_effective = (1 << 1), // don't count expired failures pe_fc_fillers = (1 << 2), // if container, include filler failures in count }; int pe_get_failcount(const pe_node_t *node, pe_resource_t *rsc, time_t *last_failure, uint32_t flags, const xmlNode *xml_op); pe_action_t *pe__clear_failcount(pe_resource_t *rsc, const pe_node_t *node, const char *reason, pe_working_set_t *data_set); /* Functions for finding/counting a resource's active nodes */ bool pe__count_active_node(const pe_resource_t *rsc, pe_node_t *node, pe_node_t **active, unsigned int *count_all, unsigned int *count_clean); pe_node_t *pe__find_active_requires(const pe_resource_t *rsc, unsigned int *count); static inline pe_node_t * pe__current_node(const pe_resource_t *rsc) { return (rsc == NULL)? NULL : rsc->fns->active_node(rsc, NULL, NULL); } /* Binary like operators for lists of nodes */ extern void node_list_exclude(GHashTable * list, GList *list2, gboolean merge_scores); GHashTable *pe__node_list2table(const GList *list); static inline gpointer pe_hash_table_lookup(GHashTable * hash, gconstpointer key) { if (hash) { return g_hash_table_lookup(hash, key); } return NULL; } extern pe_action_t *get_pseudo_op(const char *name, pe_working_set_t * data_set); extern gboolean order_actions(pe_action_t * lh_action, pe_action_t * rh_action, enum pe_ordering order); void pe__show_node_weights_as(const char *file, const char *function, int line, bool to_log, const pe_resource_t *rsc, const char *comment, GHashTable *nodes, pe_working_set_t *data_set); #define pe__show_node_weights(level, rsc, text, nodes, data_set) \ pe__show_node_weights_as(__FILE__, __func__, __LINE__, \ (level), (rsc), (text), (nodes), (data_set)) xmlNode *find_rsc_op_entry(const pe_resource_t *rsc, const char *key); pe_action_t *custom_action(pe_resource_t *rsc, char *key, const char *task, const pe_node_t *on_node, gboolean optional, gboolean foo, pe_working_set_t *data_set); # define delete_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_DELETE, 0) # define delete_action(rsc, node, optional) custom_action( \ rsc, delete_key(rsc), CRMD_ACTION_DELETE, node, \ optional, TRUE, rsc->cluster); # define stopped_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_STOPPED, 0) # define stopped_action(rsc, node, optional) custom_action( \ rsc, stopped_key(rsc), CRMD_ACTION_STOPPED, node, \ optional, TRUE, rsc->cluster); # define stop_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_STOP, 0) # define stop_action(rsc, node, optional) custom_action( \ rsc, stop_key(rsc), CRMD_ACTION_STOP, node, \ optional, TRUE, rsc->cluster); # define reload_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_RELOAD_AGENT, 0) # define start_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_START, 0) # define start_action(rsc, node, optional) custom_action( \ rsc, start_key(rsc), CRMD_ACTION_START, node, \ optional, TRUE, rsc->cluster) # define started_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_STARTED, 0) # define started_action(rsc, node, optional) custom_action( \ rsc, started_key(rsc), CRMD_ACTION_STARTED, node, \ optional, TRUE, rsc->cluster) # define promote_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_PROMOTE, 0) # define promote_action(rsc, node, optional) custom_action( \ rsc, promote_key(rsc), CRMD_ACTION_PROMOTE, node, \ optional, TRUE, rsc->cluster) # define promoted_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_PROMOTED, 0) # define promoted_action(rsc, node, optional) custom_action( \ rsc, promoted_key(rsc), CRMD_ACTION_PROMOTED, node, \ optional, TRUE, rsc->cluster) # define demote_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_DEMOTE, 0) # define demote_action(rsc, node, optional) custom_action( \ rsc, demote_key(rsc), CRMD_ACTION_DEMOTE, node, \ optional, TRUE, rsc->cluster) # define demoted_key(rsc) pcmk__op_key(rsc->id, CRMD_ACTION_DEMOTED, 0) # define demoted_action(rsc, node, optional) custom_action( \ rsc, demoted_key(rsc), CRMD_ACTION_DEMOTED, node, \ optional, TRUE, rsc->cluster) extern int pe_get_configured_timeout(pe_resource_t *rsc, const char *action, pe_working_set_t *data_set); pe_action_t *find_first_action(const GList *input, const char *uuid, const char *task, const pe_node_t *on_node); enum action_tasks get_complex_task(const pe_resource_t *rsc, const char *name); extern GList *find_actions(GList *input, const char *key, const pe_node_t *on_node); GList *find_actions_exact(GList *input, const char *key, const pe_node_t *on_node); GList *pe__resource_actions(const pe_resource_t *rsc, const pe_node_t *node, const char *task, bool require_node); extern void pe_free_action(pe_action_t * action); void resource_location(pe_resource_t *rsc, const pe_node_t *node, int score, const char *tag, pe_working_set_t *data_set); extern int pe__is_newer_op(const xmlNode *xml_a, const xmlNode *xml_b, bool same_node_default); extern gint sort_op_by_callid(gconstpointer a, gconstpointer b); gboolean get_target_role(const pe_resource_t *rsc, enum rsc_role_e *role); void pe__set_next_role(pe_resource_t *rsc, enum rsc_role_e role, const char *why); pe_resource_t *find_clone_instance(const pe_resource_t *rsc, const char *sub_id); extern void destroy_ticket(gpointer data); extern pe_ticket_t *ticket_new(const char *ticket_id, pe_working_set_t * data_set); // Resources for manipulating resource names const char *pe_base_name_end(const char *id); char *clone_strip(const char *last_rsc_id); char *clone_zero(const char *last_rsc_id); static inline bool pe_base_name_eq(const pe_resource_t *rsc, const char *id) { if (id && rsc && rsc->id) { // Number of characters in rsc->id before any clone suffix size_t base_len = pe_base_name_end(rsc->id) - rsc->id + 1; return (strlen(id) == base_len) && !strncmp(id, rsc->id, base_len); } return false; } int pe__target_rc_from_xml(const xmlNode *xml_op); gint pe__cmp_node_name(gconstpointer a, gconstpointer b); bool is_set_recursive(const pe_resource_t *rsc, long long flag, bool any); enum rsc_digest_cmp_val { /*! Digests are the same */ RSC_DIGEST_MATCH = 0, /*! Params that require a restart changed */ RSC_DIGEST_RESTART, /*! Some parameter changed. */ RSC_DIGEST_ALL, /*! rsc op didn't have a digest associated with it, so * it is unknown if parameters changed or not. */ RSC_DIGEST_UNKNOWN, }; typedef struct op_digest_cache_s { enum rsc_digest_cmp_val rc; xmlNode *params_all; xmlNode *params_secure; xmlNode *params_restart; char *digest_all_calc; char *digest_secure_calc; char *digest_restart_calc; } op_digest_cache_t; op_digest_cache_t *pe__calculate_digests(pe_resource_t *rsc, const char *task, guint *interval_ms, const pe_node_t *node, const xmlNode *xml_op, GHashTable *overrides, bool calc_secure, pe_working_set_t *data_set); void pe__free_digests(gpointer ptr); op_digest_cache_t *rsc_action_digest_cmp(pe_resource_t *rsc, const xmlNode *xml_op, pe_node_t *node, pe_working_set_t *data_set); pe_action_t *pe_fence_op(pe_node_t *node, const char *op, bool optional, const char *reason, bool priority_delay, pe_working_set_t *data_set); void trigger_unfencing(pe_resource_t *rsc, pe_node_t *node, const char *reason, pe_action_t *dependency, pe_working_set_t *data_set); char *pe__action2reason(const pe_action_t *action, enum pe_action_flags flag); void pe_action_set_reason(pe_action_t *action, const char *reason, bool overwrite); void pe__add_action_expected_result(pe_action_t *action, int expected_result); void pe__set_resource_flags_recursive(pe_resource_t *rsc, uint64_t flags); void pe__clear_resource_flags_recursive(pe_resource_t *rsc, uint64_t flags); void pe__clear_resource_flags_on_all(pe_working_set_t *data_set, uint64_t flag); gboolean add_tag_ref(GHashTable * tags, const char * tag_name, const char * obj_ref); //! \deprecated This function will be removed in a future release void print_rscs_brief(GList *rsc_list, const char * pre_text, long options, void * print_data, gboolean print_all); int pe__rscs_brief_output(pcmk__output_t *out, GList *rsc_list, unsigned int options); void pe_fence_node(pe_working_set_t * data_set, pe_node_t * node, const char *reason, bool priority_delay); pe_node_t *pe_create_node(const char *id, const char *uname, const char *type, const char *score, pe_working_set_t * data_set); //! \deprecated This function will be removed in a future release void common_print(pe_resource_t *rsc, const char *pre_text, const char *name, const pe_node_t *node, long options, void *print_data); int pe__common_output_text(pcmk__output_t *out, const pe_resource_t *rsc, const char *name, const pe_node_t *node, unsigned int options); int pe__common_output_html(pcmk__output_t *out, const pe_resource_t *rsc, const char *name, const pe_node_t *node, unsigned int options); GList *pe__bundle_containers(const pe_resource_t *bundle); pe_resource_t *pe__find_bundle_replica(const pe_resource_t *bundle, const pe_node_t *node); bool pe__bundle_needs_remote_name(pe_resource_t *rsc); const char *pe__add_bundle_remote_name(pe_resource_t *rsc, pe_working_set_t *data_set, xmlNode *xml, const char *field); const char *pe_node_attribute_calculated(const pe_node_t *node, const char *name, const pe_resource_t *rsc); const char *pe_node_attribute_raw(const pe_node_t *node, const char *name); bool pe__is_universal_clone(const pe_resource_t *rsc, const pe_working_set_t *data_set); void pe__add_param_check(const xmlNode *rsc_op, pe_resource_t *rsc, pe_node_t *node, enum pe_check_parameters, pe_working_set_t *data_set); void pe__foreach_param_check(pe_working_set_t *data_set, void (*cb)(pe_resource_t*, pe_node_t*, const xmlNode*, enum pe_check_parameters)); void pe__free_param_checks(pe_working_set_t *data_set); bool pe__shutdown_requested(const pe_node_t *node); void pe__update_recheck_time(time_t recheck, pe_working_set_t *data_set); /*! * \internal * \brief Register xml formatting message functions. * * \param[in,out] out Output object to register messages with */ void pe__register_messages(pcmk__output_t *out); void pe__unpack_dataset_nvpairs(const xmlNode *xml_obj, const char *set_name, const pe_rule_eval_data_t *rule_data, GHashTable *hash, const char *always_first, gboolean overwrite, pe_working_set_t *data_set); bool pe__resource_is_disabled(const pe_resource_t *rsc); pe_action_t *pe__clear_resource_history(pe_resource_t *rsc, const pe_node_t *node, pe_working_set_t *data_set); GList *pe__rscs_with_tag(pe_working_set_t *data_set, const char *tag_name); GList *pe__unames_with_tag(pe_working_set_t *data_set, const char *tag_name); bool pe__rsc_has_tag(pe_working_set_t *data_set, const char *rsc, const char *tag); bool pe__uname_has_tag(pe_working_set_t *data_set, const char *node, const char *tag); bool pe__rsc_running_on_any(pe_resource_t *rsc, GList *node_list); GList *pe__filter_rsc_list(GList *rscs, GList *filter); GList * pe__build_node_name_list(pe_working_set_t *data_set, const char *s); GList * pe__build_rsc_list(pe_working_set_t *data_set, const char *s); bool pcmk__rsc_filtered_by_node(pe_resource_t *rsc, GList *only_node); gboolean pe__bundle_is_filtered(const pe_resource_t *rsc, GList *only_rsc, gboolean check_parent); gboolean pe__clone_is_filtered(const pe_resource_t *rsc, GList *only_rsc, gboolean check_parent); gboolean pe__group_is_filtered(const pe_resource_t *rsc, GList *only_rsc, gboolean check_parent); gboolean pe__native_is_filtered(const pe_resource_t *rsc, GList *only_rsc, gboolean check_parent); xmlNode *pe__failed_probe_for_rsc(const pe_resource_t *rsc, const char *name); const char *pe__clone_child_id(const pe_resource_t *rsc); int pe__sum_node_health_scores(const pe_node_t *node, int base_health); int pe__node_health(pe_node_t *node); static inline enum pcmk__health_strategy pe__health_strategy(pe_working_set_t *data_set) { return pcmk__parse_health_strategy(pe_pref(data_set->config_hash, PCMK__OPT_NODE_HEALTH_STRATEGY)); } static inline int pe__health_score(const char *option, pe_working_set_t *data_set) { return char2score(pe_pref(data_set->config_hash, option)); } /*! * \internal * \brief Return a string suitable for logging as a node name * * \param[in] node Node to return a node name string for * * \return Node name if available, otherwise node ID if available, * otherwise "unspecified node" if node is NULL or "unidentified node" * if node has neither a name nor ID. */ static inline const char * pe__node_name(const pe_node_t *node) { if (node == NULL) { return "unspecified node"; } else if (node->details->uname != NULL) { return node->details->uname; } else if (node->details->id != NULL) { return node->details->id; } else { return "unidentified node"; } } /*! * \internal * \brief Check whether two node objects refer to the same node * * \param[in] node1 First node object to compare * \param[in] node2 Second node object to compare * * \return true if \p node1 and \p node2 refer to the same node */ static inline bool pe__same_node(const pe_node_t *node1, const pe_node_t *node2) { return (node1 != NULL) && (node2 != NULL) && (node1->details == node2->details); } + +/*! + * \internal + * \brief Get the operation key from an action history entry + * + * \param[in] xml Action history entry + * + * \return Entry's operation key + */ +static inline const char * +pe__xe_history_key(const xmlNode *xml) +{ + if (xml == NULL) { + return NULL; + } else { + /* @COMPAT Pacemaker <= 1.1.5 did not add the key, and used the ID + * instead. Checking for that allows us to process old saved CIBs, + * including some regression tests. + */ + const char *key = crm_element_value(xml, XML_LRM_ATTR_TASK_KEY); + + return pcmk__str_empty(key)? ID(xml) : key; + } +} + #endif diff --git a/lib/pengine/pe_output.c b/lib/pengine/pe_output.c index 46c76fbd93..68cc867423 100644 --- a/lib/pengine/pe_output.c +++ b/lib/pengine/pe_output.c @@ -1,3113 +1,3108 @@ /* * Copyright 2019-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include const char * pe__resource_description(const pe_resource_t *rsc, uint32_t show_opts) { const char * desc = NULL; // User-supplied description if (pcmk_any_flags_set(show_opts, pcmk_show_rsc_only|pcmk_show_description) || pcmk__list_of_multiple(rsc->running_on)) { desc = crm_element_value(rsc->xml, XML_ATTR_DESC); } return desc; } /* Never display node attributes whose name starts with one of these prefixes */ #define FILTER_STR { PCMK__FAIL_COUNT_PREFIX, PCMK__LAST_FAILURE_PREFIX, \ "shutdown", "terminate", "standby", "#", NULL } static int compare_attribute(gconstpointer a, gconstpointer b) { int rc; rc = strcmp((const char *)a, (const char *)b); return rc; } /*! * \internal * \brief Determine whether extended information about an attribute should be added. * * \param[in] node Node that ran this resource * \param[in,out] rsc_list List of resources for this node * \param[in,out] data_set Cluster working set * \param[in] attrname Attribute to find * \param[out] expected_score Expected value for this attribute * * \return true if extended information should be printed, false otherwise * \note Currently, extended information is only supported for ping/pingd * resources, for which a message will be printed if connectivity is lost * or degraded. */ static bool add_extra_info(const pe_node_t *node, GList *rsc_list, pe_working_set_t *data_set, const char *attrname, int *expected_score) { GList *gIter = NULL; for (gIter = rsc_list; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; const char *type = g_hash_table_lookup(rsc->meta, "type"); const char *name = NULL; GHashTable *params = NULL; if (rsc->children != NULL) { if (add_extra_info(node, rsc->children, data_set, attrname, expected_score)) { return true; } } if (!pcmk__strcase_any_of(type, "ping", "pingd", NULL)) { continue; } params = pe_rsc_params(rsc, node, data_set); name = g_hash_table_lookup(params, "name"); if (name == NULL) { name = "pingd"; } /* To identify the resource with the attribute name. */ if (pcmk__str_eq(name, attrname, pcmk__str_casei)) { int host_list_num = 0; const char *hosts = g_hash_table_lookup(params, "host_list"); const char *multiplier = g_hash_table_lookup(params, "multiplier"); int multiplier_i; if (hosts) { char **host_list = g_strsplit(hosts, " ", 0); host_list_num = g_strv_length(host_list); g_strfreev(host_list); } if ((multiplier == NULL) || (pcmk__scan_min_int(multiplier, &multiplier_i, INT_MIN) != pcmk_rc_ok)) { /* The ocf:pacemaker:ping resource agent defaults multiplier to * 1. The agent currently does not handle invalid text, but it * should, and this would be a reasonable choice ... */ multiplier_i = 1; } *expected_score = host_list_num * multiplier_i; return true; } } return false; } static GList * filter_attr_list(GList *attr_list, char *name) { int i; const char *filt_str[] = FILTER_STR; CRM_CHECK(name != NULL, return attr_list); /* filtering automatic attributes */ for (i = 0; filt_str[i] != NULL; i++) { if (g_str_has_prefix(name, filt_str[i])) { return attr_list; } } return g_list_insert_sorted(attr_list, name, compare_attribute); } static GList * get_operation_list(xmlNode *rsc_entry) { GList *op_list = NULL; xmlNode *rsc_op = NULL; for (rsc_op = pcmk__xe_first_child(rsc_entry); rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op)) { const char *task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); const char *interval_ms_s = crm_element_value(rsc_op, XML_LRM_ATTR_INTERVAL_MS); const char *op_rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC); int op_rc_i; pcmk__scan_min_int(op_rc, &op_rc_i, 0); /* Display 0-interval monitors as "probe" */ if (pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei) && pcmk__str_eq(interval_ms_s, "0", pcmk__str_null_matches | pcmk__str_casei)) { task = "probe"; } /* Ignore notifies and some probes */ if (pcmk__str_eq(task, CRMD_ACTION_NOTIFY, pcmk__str_casei) || (pcmk__str_eq(task, "probe", pcmk__str_casei) && (op_rc_i == 7))) { continue; } if (pcmk__str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, pcmk__str_none)) { op_list = g_list_append(op_list, rsc_op); } } op_list = g_list_sort(op_list, sort_op_by_callid); return op_list; } static void add_dump_node(gpointer key, gpointer value, gpointer user_data) { xmlNodePtr node = user_data; pcmk_create_xml_text_node(node, (const char *) key, (const char *) value); } static void append_dump_text(gpointer key, gpointer value, gpointer user_data) { char **dump_text = user_data; char *new_text = crm_strdup_printf("%s %s=%s", *dump_text, (char *)key, (char *)value); free(*dump_text); *dump_text = new_text; } static const char * get_cluster_stack(pe_working_set_t *data_set) { xmlNode *stack = get_xpath_object("//nvpair[@name='cluster-infrastructure']", data_set->input, LOG_DEBUG); return stack? crm_element_value(stack, XML_NVPAIR_ATTR_VALUE) : "unknown"; } static char * last_changed_string(const char *last_written, const char *user, const char *client, const char *origin) { if (last_written != NULL || user != NULL || client != NULL || origin != NULL) { return crm_strdup_printf("%s%s%s%s%s%s%s", last_written ? last_written : "", user ? " by " : "", user ? user : "", client ? " via " : "", client ? client : "", origin ? " on " : "", origin ? origin : ""); } else { return strdup(""); } } static char * op_history_string(xmlNode *xml_op, const char *task, const char *interval_ms_s, int rc, bool print_timing) { const char *call = crm_element_value(xml_op, XML_LRM_ATTR_CALLID); char *interval_str = NULL; char *buf = NULL; if (interval_ms_s && !pcmk__str_eq(interval_ms_s, "0", pcmk__str_casei)) { char *pair = pcmk__format_nvpair("interval", interval_ms_s, "ms"); interval_str = crm_strdup_printf(" %s", pair); free(pair); } if (print_timing) { char *last_change_str = NULL; char *exec_str = NULL; char *queue_str = NULL; const char *value = NULL; time_t epoch = 0; if ((crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE, &epoch) == pcmk_ok) && (epoch > 0)) { char *epoch_str = pcmk__epoch2str(&epoch, 0); last_change_str = crm_strdup_printf(" %s=\"%s\"", XML_RSC_OP_LAST_CHANGE, pcmk__s(epoch_str, "")); free(epoch_str); } value = crm_element_value(xml_op, XML_RSC_OP_T_EXEC); if (value) { char *pair = pcmk__format_nvpair(XML_RSC_OP_T_EXEC, value, "ms"); exec_str = crm_strdup_printf(" %s", pair); free(pair); } value = crm_element_value(xml_op, XML_RSC_OP_T_QUEUE); if (value) { char *pair = pcmk__format_nvpair(XML_RSC_OP_T_QUEUE, value, "ms"); queue_str = crm_strdup_printf(" %s", pair); free(pair); } buf = crm_strdup_printf("(%s) %s:%s%s%s%s rc=%d (%s)", call, task, interval_str ? interval_str : "", last_change_str ? last_change_str : "", exec_str ? exec_str : "", queue_str ? queue_str : "", rc, services_ocf_exitcode_str(rc)); if (last_change_str) { free(last_change_str); } if (exec_str) { free(exec_str); } if (queue_str) { free(queue_str); } } else { buf = crm_strdup_printf("(%s) %s%s%s", call, task, interval_str ? ":" : "", interval_str ? interval_str : ""); } if (interval_str) { free(interval_str); } return buf; } static char * resource_history_string(pe_resource_t *rsc, const char *rsc_id, bool all, int failcount, time_t last_failure) { char *buf = NULL; if (rsc == NULL) { buf = crm_strdup_printf("%s: orphan", rsc_id); } else if (all || failcount || last_failure > 0) { char *failcount_s = NULL; char *lastfail_s = NULL; if (failcount > 0) { failcount_s = crm_strdup_printf(" %s=%d", PCMK__FAIL_COUNT_PREFIX, failcount); } else { failcount_s = strdup(""); } if (last_failure > 0) { buf = pcmk__epoch2str(&last_failure, 0); lastfail_s = crm_strdup_printf(" %s='%s'", PCMK__LAST_FAILURE_PREFIX, buf); free(buf); } buf = crm_strdup_printf("%s: migration-threshold=%d%s%s", rsc_id, rsc->migration_threshold, failcount_s, lastfail_s? lastfail_s : ""); free(failcount_s); free(lastfail_s); } else { buf = crm_strdup_printf("%s:", rsc_id); } return buf; } static const char * get_node_feature_set(pe_node_t *node) { const char *feature_set = NULL; if (node->details->online && !pe__is_guest_or_remote_node(node)) { feature_set = g_hash_table_lookup(node->details->attrs, CRM_ATTR_FEATURE_SET); /* The feature set attribute is present since 3.15.1. If it is missing * then the node must be running an earlier version. */ if (feature_set == NULL) { feature_set = "<3.15.1"; } } return feature_set; } static bool is_mixed_version(pe_working_set_t *data_set) { const char *feature_set = NULL; for (GList *gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = gIter->data; const char *node_feature_set = get_node_feature_set(node); if (node_feature_set != NULL) { if (feature_set == NULL) { feature_set = node_feature_set; } else if (strcmp(feature_set, node_feature_set) != 0) { return true; } } } return false; } static char * formatted_xml_buf(pe_resource_t *rsc, bool raw) { if (raw) { return dump_xml_formatted(rsc->orig_xml ? rsc->orig_xml : rsc->xml); } else { return dump_xml_formatted(rsc->xml); } } PCMK__OUTPUT_ARGS("cluster-summary", "pe_working_set_t *", "enum pcmk_pacemakerd_state", "uint32_t", "uint32_t") static int cluster_summary(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); enum pcmk_pacemakerd_state pcmkd_state = (enum pcmk_pacemakerd_state) va_arg(args, int); uint32_t section_opts = va_arg(args, uint32_t); uint32_t show_opts = va_arg(args, uint32_t); int rc = pcmk_rc_no_output; const char *stack_s = get_cluster_stack(data_set); if (pcmk_is_set(section_opts, pcmk_section_stack)) { PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Cluster Summary"); out->message(out, "cluster-stack", stack_s, pcmkd_state); } if (pcmk_is_set(section_opts, pcmk_section_dc)) { xmlNode *dc_version = get_xpath_object("//nvpair[@name='dc-version']", data_set->input, LOG_DEBUG); const char *dc_version_s = dc_version? crm_element_value(dc_version, XML_NVPAIR_ATTR_VALUE) : NULL; const char *quorum = crm_element_value(data_set->input, XML_ATTR_HAVE_QUORUM); char *dc_name = data_set->dc_node ? pe__node_display_name(data_set->dc_node, pcmk_is_set(show_opts, pcmk_show_node_id)) : NULL; bool mixed_version = is_mixed_version(data_set); PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Cluster Summary"); out->message(out, "cluster-dc", data_set->dc_node, quorum, dc_version_s, dc_name, mixed_version); free(dc_name); } if (pcmk_is_set(section_opts, pcmk_section_times)) { const char *last_written = crm_element_value(data_set->input, XML_CIB_ATTR_WRITTEN); const char *user = crm_element_value(data_set->input, XML_ATTR_UPDATE_USER); const char *client = crm_element_value(data_set->input, XML_ATTR_UPDATE_CLIENT); const char *origin = crm_element_value(data_set->input, XML_ATTR_UPDATE_ORIG); PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Cluster Summary"); out->message(out, "cluster-times", data_set->localhost, last_written, user, client, origin); } if (pcmk_is_set(section_opts, pcmk_section_counts)) { PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Cluster Summary"); out->message(out, "cluster-counts", g_list_length(data_set->nodes), data_set->ninstances, data_set->disabled_resources, data_set->blocked_resources); } if (pcmk_is_set(section_opts, pcmk_section_options)) { PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Cluster Summary"); out->message(out, "cluster-options", data_set); } PCMK__OUTPUT_LIST_FOOTER(out, rc); if (pcmk_is_set(section_opts, pcmk_section_maint_mode)) { if (out->message(out, "maint-mode", data_set->flags) == pcmk_rc_ok) { rc = pcmk_rc_ok; } } return rc; } PCMK__OUTPUT_ARGS("cluster-summary", "pe_working_set_t *", "enum pcmk_pacemakerd_state", "uint32_t", "uint32_t") static int cluster_summary_html(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); enum pcmk_pacemakerd_state pcmkd_state = (enum pcmk_pacemakerd_state) va_arg(args, int); uint32_t section_opts = va_arg(args, uint32_t); uint32_t show_opts = va_arg(args, uint32_t); int rc = pcmk_rc_no_output; const char *stack_s = get_cluster_stack(data_set); if (pcmk_is_set(section_opts, pcmk_section_stack)) { PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Cluster Summary"); out->message(out, "cluster-stack", stack_s, pcmkd_state); } /* Always print DC if none, even if not requested */ if (data_set->dc_node == NULL || pcmk_is_set(section_opts, pcmk_section_dc)) { xmlNode *dc_version = get_xpath_object("//nvpair[@name='dc-version']", data_set->input, LOG_DEBUG); const char *dc_version_s = dc_version? crm_element_value(dc_version, XML_NVPAIR_ATTR_VALUE) : NULL; const char *quorum = crm_element_value(data_set->input, XML_ATTR_HAVE_QUORUM); char *dc_name = data_set->dc_node ? pe__node_display_name(data_set->dc_node, pcmk_is_set(show_opts, pcmk_show_node_id)) : NULL; bool mixed_version = is_mixed_version(data_set); PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Cluster Summary"); out->message(out, "cluster-dc", data_set->dc_node, quorum, dc_version_s, dc_name, mixed_version); free(dc_name); } if (pcmk_is_set(section_opts, pcmk_section_times)) { const char *last_written = crm_element_value(data_set->input, XML_CIB_ATTR_WRITTEN); const char *user = crm_element_value(data_set->input, XML_ATTR_UPDATE_USER); const char *client = crm_element_value(data_set->input, XML_ATTR_UPDATE_CLIENT); const char *origin = crm_element_value(data_set->input, XML_ATTR_UPDATE_ORIG); PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Cluster Summary"); out->message(out, "cluster-times", data_set->localhost, last_written, user, client, origin); } if (pcmk_is_set(section_opts, pcmk_section_counts)) { PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Cluster Summary"); out->message(out, "cluster-counts", g_list_length(data_set->nodes), data_set->ninstances, data_set->disabled_resources, data_set->blocked_resources); } if (pcmk_is_set(section_opts, pcmk_section_options)) { /* Kind of a hack - close the list we may have opened earlier in this * function so we can put all the options into their own list. We * only want to do this on HTML output, though. */ PCMK__OUTPUT_LIST_FOOTER(out, rc); out->begin_list(out, NULL, NULL, "Config Options"); out->message(out, "cluster-options", data_set); } PCMK__OUTPUT_LIST_FOOTER(out, rc); if (pcmk_is_set(section_opts, pcmk_section_maint_mode)) { if (out->message(out, "maint-mode", data_set->flags) == pcmk_rc_ok) { rc = pcmk_rc_ok; } } return rc; } char * pe__node_display_name(pe_node_t *node, bool print_detail) { char *node_name; const char *node_host = NULL; const char *node_id = NULL; int name_len; CRM_ASSERT((node != NULL) && (node->details != NULL) && (node->details->uname != NULL)); /* Host is displayed only if this is a guest node and detail is requested */ if (print_detail && pe__is_guest_node(node)) { const pe_resource_t *container = node->details->remote_rsc->container; const pe_node_t *host_node = pe__current_node(container); if (host_node && host_node->details) { node_host = host_node->details->uname; } if (node_host == NULL) { node_host = ""; /* so we at least get "uname@" to indicate guest */ } } /* Node ID is displayed if different from uname and detail is requested */ if (print_detail && !pcmk__str_eq(node->details->uname, node->details->id, pcmk__str_casei)) { node_id = node->details->id; } /* Determine name length */ name_len = strlen(node->details->uname) + 1; if (node_host) { name_len += strlen(node_host) + 1; /* "@node_host" */ } if (node_id) { name_len += strlen(node_id) + 3; /* + " (node_id)" */ } /* Allocate and populate display name */ node_name = malloc(name_len); CRM_ASSERT(node_name != NULL); strcpy(node_name, node->details->uname); if (node_host) { strcat(node_name, "@"); strcat(node_name, node_host); } if (node_id) { strcat(node_name, " ("); strcat(node_name, node_id); strcat(node_name, ")"); } return node_name; } int pe__name_and_nvpairs_xml(pcmk__output_t *out, bool is_list, const char *tag_name , size_t pairs_count, ...) { xmlNodePtr xml_node = NULL; va_list args; CRM_ASSERT(tag_name != NULL); xml_node = pcmk__output_xml_peek_parent(out); CRM_ASSERT(xml_node != NULL); xml_node = is_list ? create_xml_node(xml_node, tag_name) : xmlNewChild(xml_node, NULL, (pcmkXmlStr) tag_name, NULL); va_start(args, pairs_count); while(pairs_count--) { const char *param_name = va_arg(args, const char *); const char *param_value = va_arg(args, const char *); if (param_name && param_value) { crm_xml_add(xml_node, param_name, param_value); } }; va_end(args); if (is_list) { pcmk__output_xml_push_parent(out, xml_node); } return pcmk_rc_ok; } static const char * role_desc(enum rsc_role_e role) { if (role == RSC_ROLE_PROMOTED) { #ifdef PCMK__COMPAT_2_0 return "as " RSC_ROLE_PROMOTED_LEGACY_S " "; #else return "in " RSC_ROLE_PROMOTED_S " role "; #endif } return ""; } PCMK__OUTPUT_ARGS("ban", "pe_node_t *", "pe__location_t *", "uint32_t") static int ban_html(pcmk__output_t *out, va_list args) { pe_node_t *pe_node = va_arg(args, pe_node_t *); pe__location_t *location = va_arg(args, pe__location_t *); uint32_t show_opts = va_arg(args, uint32_t); char *node_name = pe__node_display_name(pe_node, pcmk_is_set(show_opts, pcmk_show_node_id)); char *buf = crm_strdup_printf("%s\tprevents %s from running %son %s", location->id, location->rsc_lh->id, role_desc(location->role_filter), node_name); pcmk__output_create_html_node(out, "li", NULL, NULL, buf); free(node_name); free(buf); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("ban", "pe_node_t *", "pe__location_t *", "uint32_t") static int ban_text(pcmk__output_t *out, va_list args) { pe_node_t *pe_node = va_arg(args, pe_node_t *); pe__location_t *location = va_arg(args, pe__location_t *); uint32_t show_opts = va_arg(args, uint32_t); char *node_name = pe__node_display_name(pe_node, pcmk_is_set(show_opts, pcmk_show_node_id)); out->list_item(out, NULL, "%s\tprevents %s from running %son %s", location->id, location->rsc_lh->id, role_desc(location->role_filter), node_name); free(node_name); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("ban", "pe_node_t *", "pe__location_t *", "uint32_t") static int ban_xml(pcmk__output_t *out, va_list args) { pe_node_t *pe_node = va_arg(args, pe_node_t *); pe__location_t *location = va_arg(args, pe__location_t *); uint32_t show_opts G_GNUC_UNUSED = va_arg(args, uint32_t); const char *promoted_only = pcmk__btoa(location->role_filter == RSC_ROLE_PROMOTED); char *weight_s = pcmk__itoa(pe_node->weight); pcmk__output_create_xml_node(out, "ban", "id", location->id, "resource", location->rsc_lh->id, "node", pe_node->details->uname, "weight", weight_s, "promoted-only", promoted_only, /* This is a deprecated alias for * promoted_only. Removing it will break * backward compatibility of the API schema, * which will require an API schema major * version bump. */ "master_only", promoted_only, NULL); free(weight_s); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("ban-list", "pe_working_set_t *", "const char *", "GList *", "uint32_t", "bool") static int ban_list(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); const char *prefix = va_arg(args, const char *); GList *only_rsc = va_arg(args, GList *); uint32_t show_opts = va_arg(args, uint32_t); bool print_spacer = va_arg(args, int); GList *gIter, *gIter2; int rc = pcmk_rc_no_output; /* Print each ban */ for (gIter = data_set->placement_constraints; gIter != NULL; gIter = gIter->next) { pe__location_t *location = gIter->data; const pe_resource_t *rsc = location->rsc_lh; if (prefix != NULL && !g_str_has_prefix(location->id, prefix)) { continue; } if (!pcmk__str_in_list(rsc_printable_id(rsc), only_rsc, pcmk__str_star_matches) && !pcmk__str_in_list(rsc_printable_id(pe__const_top_resource(rsc, false)), only_rsc, pcmk__str_star_matches)) { continue; } for (gIter2 = location->node_list_rh; gIter2 != NULL; gIter2 = gIter2->next) { pe_node_t *node = (pe_node_t *) gIter2->data; if (node->weight < 0) { PCMK__OUTPUT_LIST_HEADER(out, print_spacer, rc, "Negative Location Constraints"); out->message(out, "ban", node, location, show_opts); } } } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } PCMK__OUTPUT_ARGS("cluster-counts", "unsigned int", "int", "int", "int") static int cluster_counts_html(pcmk__output_t *out, va_list args) { unsigned int nnodes = va_arg(args, unsigned int); int nresources = va_arg(args, int); int ndisabled = va_arg(args, int); int nblocked = va_arg(args, int); xmlNodePtr nodes_node = pcmk__output_create_xml_node(out, "li", NULL); xmlNodePtr resources_node = pcmk__output_create_xml_node(out, "li", NULL); char *nnodes_str = crm_strdup_printf("%d node%s configured", nnodes, pcmk__plural_s(nnodes)); pcmk_create_html_node(nodes_node, "span", NULL, NULL, nnodes_str); free(nnodes_str); if (ndisabled && nblocked) { char *s = crm_strdup_printf("%d resource instance%s configured (%d ", nresources, pcmk__plural_s(nresources), ndisabled); pcmk_create_html_node(resources_node, "span", NULL, NULL, s); free(s); pcmk_create_html_node(resources_node, "span", NULL, "bold", "DISABLED"); s = crm_strdup_printf(", %d ", nblocked); pcmk_create_html_node(resources_node, "span", NULL, NULL, s); free(s); pcmk_create_html_node(resources_node, "span", NULL, "bold", "BLOCKED"); pcmk_create_html_node(resources_node, "span", NULL, NULL, " from further action due to failure)"); } else if (ndisabled && !nblocked) { char *s = crm_strdup_printf("%d resource instance%s configured (%d ", nresources, pcmk__plural_s(nresources), ndisabled); pcmk_create_html_node(resources_node, "span", NULL, NULL, s); free(s); pcmk_create_html_node(resources_node, "span", NULL, "bold", "DISABLED"); pcmk_create_html_node(resources_node, "span", NULL, NULL, ")"); } else if (!ndisabled && nblocked) { char *s = crm_strdup_printf("%d resource instance%s configured (%d ", nresources, pcmk__plural_s(nresources), nblocked); pcmk_create_html_node(resources_node, "span", NULL, NULL, s); free(s); pcmk_create_html_node(resources_node, "span", NULL, "bold", "BLOCKED"); pcmk_create_html_node(resources_node, "span", NULL, NULL, " from further action due to failure)"); } else { char *s = crm_strdup_printf("%d resource instance%s configured", nresources, pcmk__plural_s(nresources)); pcmk_create_html_node(resources_node, "span", NULL, NULL, s); free(s); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-counts", "unsigned int", "int", "int", "int") static int cluster_counts_text(pcmk__output_t *out, va_list args) { unsigned int nnodes = va_arg(args, unsigned int); int nresources = va_arg(args, int); int ndisabled = va_arg(args, int); int nblocked = va_arg(args, int); out->list_item(out, NULL, "%d node%s configured", nnodes, pcmk__plural_s(nnodes)); if (ndisabled && nblocked) { out->list_item(out, NULL, "%d resource instance%s configured " "(%d DISABLED, %d BLOCKED from " "further action due to failure)", nresources, pcmk__plural_s(nresources), ndisabled, nblocked); } else if (ndisabled && !nblocked) { out->list_item(out, NULL, "%d resource instance%s configured " "(%d DISABLED)", nresources, pcmk__plural_s(nresources), ndisabled); } else if (!ndisabled && nblocked) { out->list_item(out, NULL, "%d resource instance%s configured " "(%d BLOCKED from further action " "due to failure)", nresources, pcmk__plural_s(nresources), nblocked); } else { out->list_item(out, NULL, "%d resource instance%s configured", nresources, pcmk__plural_s(nresources)); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-counts", "unsigned int", "int", "int", "int") static int cluster_counts_xml(pcmk__output_t *out, va_list args) { unsigned int nnodes = va_arg(args, unsigned int); int nresources = va_arg(args, int); int ndisabled = va_arg(args, int); int nblocked = va_arg(args, int); xmlNodePtr nodes_node = pcmk__output_create_xml_node(out, "nodes_configured", NULL); xmlNodePtr resources_node = pcmk__output_create_xml_node(out, "resources_configured", NULL); char *s = pcmk__itoa(nnodes); crm_xml_add(nodes_node, "number", s); free(s); s = pcmk__itoa(nresources); crm_xml_add(resources_node, "number", s); free(s); s = pcmk__itoa(ndisabled); crm_xml_add(resources_node, "disabled", s); free(s); s = pcmk__itoa(nblocked); crm_xml_add(resources_node, "blocked", s); free(s); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-dc", "pe_node_t *", "const char *", "const char *", "char *", "int") static int cluster_dc_html(pcmk__output_t *out, va_list args) { pe_node_t *dc = va_arg(args, pe_node_t *); const char *quorum = va_arg(args, const char *); const char *dc_version_s = va_arg(args, const char *); char *dc_name = va_arg(args, char *); bool mixed_version = va_arg(args, int); xmlNodePtr node = pcmk__output_create_xml_node(out, "li", NULL); pcmk_create_html_node(node, "span", NULL, "bold", "Current DC: "); if (dc) { char *buf = crm_strdup_printf("%s (version %s) -", dc_name, dc_version_s ? dc_version_s : "unknown"); pcmk_create_html_node(node, "span", NULL, NULL, buf); free(buf); if (mixed_version) { pcmk_create_html_node(node, "span", NULL, "warning", " MIXED-VERSION"); } pcmk_create_html_node(node, "span", NULL, NULL, " partition"); if (crm_is_true(quorum)) { pcmk_create_html_node(node, "span", NULL, NULL, " with"); } else { pcmk_create_html_node(node, "span", NULL, "warning", " WITHOUT"); } pcmk_create_html_node(node, "span", NULL, NULL, " quorum"); } else { pcmk_create_html_node(node, "span", NULL, "warning", "NONE"); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-dc", "pe_node_t *", "const char *", "const char *", "char *", "int") static int cluster_dc_text(pcmk__output_t *out, va_list args) { pe_node_t *dc = va_arg(args, pe_node_t *); const char *quorum = va_arg(args, const char *); const char *dc_version_s = va_arg(args, const char *); char *dc_name = va_arg(args, char *); bool mixed_version = va_arg(args, int); if (dc) { out->list_item(out, "Current DC", "%s (version %s) - %spartition %s quorum", dc_name, dc_version_s ? dc_version_s : "unknown", mixed_version ? "MIXED-VERSION " : "", crm_is_true(quorum) ? "with" : "WITHOUT"); } else { out->list_item(out, "Current DC", "NONE"); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-dc", "pe_node_t *", "const char *", "const char *", "char *", "int") static int cluster_dc_xml(pcmk__output_t *out, va_list args) { pe_node_t *dc = va_arg(args, pe_node_t *); const char *quorum = va_arg(args, const char *); const char *dc_version_s = va_arg(args, const char *); char *dc_name G_GNUC_UNUSED = va_arg(args, char *); bool mixed_version = va_arg(args, int); if (dc) { pcmk__output_create_xml_node(out, "current_dc", "present", "true", "version", dc_version_s ? dc_version_s : "", "name", dc->details->uname, "id", dc->details->id, "with_quorum", pcmk__btoa(crm_is_true(quorum)), "mixed_version", pcmk__btoa(mixed_version), NULL); } else { pcmk__output_create_xml_node(out, "current_dc", "present", "false", NULL); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("maint-mode", "unsigned long long int") static int cluster_maint_mode_text(pcmk__output_t *out, va_list args) { unsigned long long flags = va_arg(args, unsigned long long); if (pcmk_is_set(flags, pe_flag_maintenance_mode)) { pcmk__formatted_printf(out, "\n *** Resource management is DISABLED ***\n"); pcmk__formatted_printf(out, " The cluster will not attempt to start, stop or recover services\n"); return pcmk_rc_ok; } else if (pcmk_is_set(flags, pe_flag_stop_everything)) { pcmk__formatted_printf(out, "\n *** Resource management is DISABLED ***\n"); pcmk__formatted_printf(out, " The cluster will keep all resources stopped\n"); return pcmk_rc_ok; } else { return pcmk_rc_no_output; } } PCMK__OUTPUT_ARGS("cluster-options", "pe_working_set_t *") static int cluster_options_html(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); out->list_item(out, NULL, "STONITH of failed nodes %s", pcmk_is_set(data_set->flags, pe_flag_stonith_enabled) ? "enabled" : "disabled"); out->list_item(out, NULL, "Cluster is %s", pcmk_is_set(data_set->flags, pe_flag_symmetric_cluster) ? "symmetric" : "asymmetric"); switch (data_set->no_quorum_policy) { case no_quorum_freeze: out->list_item(out, NULL, "No quorum policy: Freeze resources"); break; case no_quorum_stop: out->list_item(out, NULL, "No quorum policy: Stop ALL resources"); break; case no_quorum_demote: out->list_item(out, NULL, "No quorum policy: Demote promotable " "resources and stop all other resources"); break; case no_quorum_ignore: out->list_item(out, NULL, "No quorum policy: Ignore"); break; case no_quorum_suicide: out->list_item(out, NULL, "No quorum policy: Suicide"); break; } if (pcmk_is_set(data_set->flags, pe_flag_maintenance_mode)) { xmlNodePtr node = pcmk__output_create_xml_node(out, "li", NULL); pcmk_create_html_node(node, "span", NULL, NULL, "Resource management: "); pcmk_create_html_node(node, "span", NULL, "bold", "DISABLED"); pcmk_create_html_node(node, "span", NULL, NULL, " (the cluster will not attempt to start, stop, or recover services)"); } else if (pcmk_is_set(data_set->flags, pe_flag_stop_everything)) { xmlNodePtr node = pcmk__output_create_xml_node(out, "li", NULL); pcmk_create_html_node(node, "span", NULL, NULL, "Resource management: "); pcmk_create_html_node(node, "span", NULL, "bold", "STOPPED"); pcmk_create_html_node(node, "span", NULL, NULL, " (the cluster will keep all resources stopped)"); } else { out->list_item(out, NULL, "Resource management: enabled"); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-options", "pe_working_set_t *") static int cluster_options_log(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); if (pcmk_is_set(data_set->flags, pe_flag_maintenance_mode)) { return out->info(out, "Resource management is DISABLED. The cluster will not attempt to start, stop or recover services."); } else if (pcmk_is_set(data_set->flags, pe_flag_stop_everything)) { return out->info(out, "Resource management is DISABLED. The cluster has stopped all resources."); } else { return pcmk_rc_no_output; } } PCMK__OUTPUT_ARGS("cluster-options", "pe_working_set_t *") static int cluster_options_text(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); out->list_item(out, NULL, "STONITH of failed nodes %s", pcmk_is_set(data_set->flags, pe_flag_stonith_enabled) ? "enabled" : "disabled"); out->list_item(out, NULL, "Cluster is %s", pcmk_is_set(data_set->flags, pe_flag_symmetric_cluster) ? "symmetric" : "asymmetric"); switch (data_set->no_quorum_policy) { case no_quorum_freeze: out->list_item(out, NULL, "No quorum policy: Freeze resources"); break; case no_quorum_stop: out->list_item(out, NULL, "No quorum policy: Stop ALL resources"); break; case no_quorum_demote: out->list_item(out, NULL, "No quorum policy: Demote promotable " "resources and stop all other resources"); break; case no_quorum_ignore: out->list_item(out, NULL, "No quorum policy: Ignore"); break; case no_quorum_suicide: out->list_item(out, NULL, "No quorum policy: Suicide"); break; } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-options", "pe_working_set_t *") static int cluster_options_xml(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); const char *no_quorum_policy = NULL; char *stonith_timeout_str = pcmk__itoa(data_set->stonith_timeout); char *priority_fencing_delay_str = pcmk__itoa(data_set->priority_fencing_delay * 1000); switch (data_set->no_quorum_policy) { case no_quorum_freeze: no_quorum_policy = "freeze"; break; case no_quorum_stop: no_quorum_policy = "stop"; break; case no_quorum_demote: no_quorum_policy = "demote"; break; case no_quorum_ignore: no_quorum_policy = "ignore"; break; case no_quorum_suicide: no_quorum_policy = "suicide"; break; } pcmk__output_create_xml_node(out, "cluster_options", "stonith-enabled", pcmk__btoa(pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)), "symmetric-cluster", pcmk__btoa(pcmk_is_set(data_set->flags, pe_flag_symmetric_cluster)), "no-quorum-policy", no_quorum_policy, "maintenance-mode", pcmk__btoa(pcmk_is_set(data_set->flags, pe_flag_maintenance_mode)), "stop-all-resources", pcmk__btoa(pcmk_is_set(data_set->flags, pe_flag_stop_everything)), "stonith-timeout-ms", stonith_timeout_str, "priority-fencing-delay-ms", priority_fencing_delay_str, NULL); free(stonith_timeout_str); free(priority_fencing_delay_str); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-stack", "const char *", "enum pcmk_pacemakerd_state") static int cluster_stack_html(pcmk__output_t *out, va_list args) { const char *stack_s = va_arg(args, const char *); enum pcmk_pacemakerd_state pcmkd_state = (enum pcmk_pacemakerd_state) va_arg(args, int); xmlNodePtr node = pcmk__output_create_xml_node(out, "li", NULL); pcmk_create_html_node(node, "span", NULL, "bold", "Stack: "); pcmk_create_html_node(node, "span", NULL, NULL, stack_s); if (pcmkd_state != pcmk_pacemakerd_state_invalid) { pcmk_create_html_node(node, "span", NULL, NULL, " ("); pcmk_create_html_node(node, "span", NULL, NULL, pcmk__pcmkd_state_enum2friendly(pcmkd_state)); pcmk_create_html_node(node, "span", NULL, NULL, ")"); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-stack", "const char *", "enum pcmk_pacemakerd_state") static int cluster_stack_text(pcmk__output_t *out, va_list args) { const char *stack_s = va_arg(args, const char *); enum pcmk_pacemakerd_state pcmkd_state = (enum pcmk_pacemakerd_state) va_arg(args, int); if (pcmkd_state != pcmk_pacemakerd_state_invalid) { out->list_item(out, "Stack", "%s (%s)", stack_s, pcmk__pcmkd_state_enum2friendly(pcmkd_state)); } else { out->list_item(out, "Stack", "%s", stack_s); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-stack", "const char *", "enum pcmk_pacemakerd_state") static int cluster_stack_xml(pcmk__output_t *out, va_list args) { const char *stack_s = va_arg(args, const char *); enum pcmk_pacemakerd_state pcmkd_state = (enum pcmk_pacemakerd_state) va_arg(args, int); const char *state_s = NULL; if (pcmkd_state != pcmk_pacemakerd_state_invalid) { state_s = pcmk_pacemakerd_api_daemon_state_enum2text(pcmkd_state); } pcmk__output_create_xml_node(out, "stack", "type", stack_s, "pacemakerd-state", state_s, NULL); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-times", "const char *", "const char *", "const char *", "const char *", "const char *") static int cluster_times_html(pcmk__output_t *out, va_list args) { const char *our_nodename = va_arg(args, const char *); const char *last_written = va_arg(args, const char *); const char *user = va_arg(args, const char *); const char *client = va_arg(args, const char *); const char *origin = va_arg(args, const char *); xmlNodePtr updated_node = pcmk__output_create_xml_node(out, "li", NULL); xmlNodePtr changed_node = pcmk__output_create_xml_node(out, "li", NULL); char *time_s = pcmk__epoch2str(NULL, 0); pcmk_create_html_node(updated_node, "span", NULL, "bold", "Last updated: "); pcmk_create_html_node(updated_node, "span", NULL, NULL, time_s); if (our_nodename != NULL) { pcmk_create_html_node(updated_node, "span", NULL, NULL, " on "); pcmk_create_html_node(updated_node, "span", NULL, NULL, our_nodename); } free(time_s); time_s = last_changed_string(last_written, user, client, origin); pcmk_create_html_node(changed_node, "span", NULL, "bold", "Last change: "); pcmk_create_html_node(changed_node, "span", NULL, NULL, time_s); free(time_s); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-times", "const char *", "const char *", "const char *", "const char *", "const char *") static int cluster_times_xml(pcmk__output_t *out, va_list args) { const char *our_nodename = va_arg(args, const char *); const char *last_written = va_arg(args, const char *); const char *user = va_arg(args, const char *); const char *client = va_arg(args, const char *); const char *origin = va_arg(args, const char *); char *time_s = pcmk__epoch2str(NULL, 0); pcmk__output_create_xml_node(out, "last_update", "time", time_s, "origin", our_nodename, NULL); pcmk__output_create_xml_node(out, "last_change", "time", last_written ? last_written : "", "user", user ? user : "", "client", client ? client : "", "origin", origin ? origin : "", NULL); free(time_s); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-times", "const char *", "const char *", "const char *", "const char *", "const char *") static int cluster_times_text(pcmk__output_t *out, va_list args) { const char *our_nodename = va_arg(args, const char *); const char *last_written = va_arg(args, const char *); const char *user = va_arg(args, const char *); const char *client = va_arg(args, const char *); const char *origin = va_arg(args, const char *); char *time_s = pcmk__epoch2str(NULL, 0); out->list_item(out, "Last updated", "%s%s%s", time_s, (our_nodename != NULL)? " on " : "", pcmk__s(our_nodename, "")); free(time_s); time_s = last_changed_string(last_written, user, client, origin); out->list_item(out, "Last change", " %s", time_s); free(time_s); return pcmk_rc_ok; } /*! * \internal * \brief Display a failed action in less-technical natural language * * \param[in,out] out Output object to use for display * \param[in] xml_op XML containing failed action * \param[in] op_key Operation key of failed action * \param[in] node_name Where failed action occurred * \param[in] rc OCF exit code of failed action * \param[in] status Execution status of failed action * \param[in] exit_reason Exit reason given for failed action * \param[in] exec_time String containing execution time in milliseconds */ static void failed_action_friendly(pcmk__output_t *out, const xmlNode *xml_op, const char *op_key, const char *node_name, int rc, int status, const char *exit_reason, const char *exec_time) { char *rsc_id = NULL; char *task = NULL; guint interval_ms = 0; time_t last_change_epoch = 0; GString *str = NULL; if (pcmk__str_empty(op_key) || !parse_op_key(op_key, &rsc_id, &task, &interval_ms)) { rsc_id = strdup("unknown resource"); task = strdup("unknown action"); interval_ms = 0; } CRM_ASSERT((rsc_id != NULL) && (task != NULL)); str = g_string_sized_new(256); // Should be sufficient for most messages pcmk__g_strcat(str, rsc_id, " ", NULL); if (interval_ms != 0) { pcmk__g_strcat(str, pcmk__readable_interval(interval_ms), "-interval ", NULL); } pcmk__g_strcat(str, crm_action_str(task, interval_ms), " on ", node_name, NULL); if (status == PCMK_EXEC_DONE) { pcmk__g_strcat(str, " returned '", services_ocf_exitcode_str(rc), "'", NULL); if (!pcmk__str_empty(exit_reason)) { pcmk__g_strcat(str, " (", exit_reason, ")", NULL); } } else { pcmk__g_strcat(str, " could not be executed (", pcmk_exec_status_str(status), NULL); if (!pcmk__str_empty(exit_reason)) { pcmk__g_strcat(str, ": ", exit_reason, NULL); } g_string_append_c(str, ')'); } if (crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE, &last_change_epoch) == pcmk_ok) { char *s = pcmk__epoch2str(&last_change_epoch, 0); pcmk__g_strcat(str, " at ", s, NULL); free(s); } if (!pcmk__str_empty(exec_time)) { int exec_time_ms = 0; if ((pcmk__scan_min_int(exec_time, &exec_time_ms, 0) == pcmk_rc_ok) && (exec_time_ms > 0)) { pcmk__g_strcat(str, " after ", pcmk__readable_interval(exec_time_ms), NULL); } } out->list_item(out, NULL, "%s", str->str); g_string_free(str, TRUE); free(rsc_id); free(task); } /*! * \internal * \brief Display a failed action with technical details * * \param[in,out] out Output object to use for display * \param[in] xml_op XML containing failed action * \param[in] op_key Operation key of failed action * \param[in] node_name Where failed action occurred * \param[in] rc OCF exit code of failed action * \param[in] status Execution status of failed action * \param[in] exit_reason Exit reason given for failed action * \param[in] exec_time String containing execution time in milliseconds */ static void failed_action_technical(pcmk__output_t *out, const xmlNode *xml_op, const char *op_key, const char *node_name, int rc, int status, const char *exit_reason, const char *exec_time) { const char *call_id = crm_element_value(xml_op, XML_LRM_ATTR_CALLID); const char *queue_time = crm_element_value(xml_op, XML_RSC_OP_T_QUEUE); const char *exit_status = services_ocf_exitcode_str(rc); const char *lrm_status = pcmk_exec_status_str(status); time_t last_change_epoch = 0; GString *str = NULL; if (pcmk__str_empty(op_key)) { op_key = "unknown operation"; } if (pcmk__str_empty(exit_status)) { exit_status = "unknown exit status"; } if (pcmk__str_empty(call_id)) { call_id = "unknown"; } str = g_string_sized_new(256); g_string_append_printf(str, "%s on %s '%s' (%d): call=%s, status='%s'", op_key, node_name, exit_status, rc, call_id, lrm_status); if (!pcmk__str_empty(exit_reason)) { pcmk__g_strcat(str, ", exitreason='", exit_reason, "'", NULL); } if (crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE, &last_change_epoch) == pcmk_ok) { char *last_change_str = pcmk__epoch2str(&last_change_epoch, 0); pcmk__g_strcat(str, ", " XML_RSC_OP_LAST_CHANGE "=" "'", last_change_str, "'", NULL); free(last_change_str); } if (!pcmk__str_empty(queue_time)) { pcmk__g_strcat(str, ", queued=", queue_time, "ms", NULL); } if (!pcmk__str_empty(exec_time)) { pcmk__g_strcat(str, ", exec=", exec_time, "ms", NULL); } out->list_item(out, NULL, "%s", str->str); g_string_free(str, TRUE); } PCMK__OUTPUT_ARGS("failed-action", "xmlNodePtr", "uint32_t") static int failed_action_default(pcmk__output_t *out, va_list args) { xmlNodePtr xml_op = va_arg(args, xmlNodePtr); uint32_t show_opts = va_arg(args, uint32_t); - const char *op_key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY); + const char *op_key = pe__xe_history_key(xml_op); const char *node_name = crm_element_value(xml_op, XML_ATTR_UNAME); const char *exit_reason = crm_element_value(xml_op, XML_LRM_ATTR_EXIT_REASON); const char *exec_time = crm_element_value(xml_op, XML_RSC_OP_T_EXEC); int rc; int status; pcmk__scan_min_int(crm_element_value(xml_op, XML_LRM_ATTR_RC), &rc, 0); pcmk__scan_min_int(crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS), &status, 0); - if (pcmk__str_empty(op_key)) { - op_key = ID(xml_op); - } if (pcmk__str_empty(node_name)) { node_name = "unknown node"; } if (pcmk_is_set(show_opts, pcmk_show_failed_detail)) { failed_action_technical(out, xml_op, op_key, node_name, rc, status, exit_reason, exec_time); } else { failed_action_friendly(out, xml_op, op_key, node_name, rc, status, exit_reason, exec_time); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("failed-action", "xmlNodePtr", "uint32_t") static int failed_action_xml(pcmk__output_t *out, va_list args) { xmlNodePtr xml_op = va_arg(args, xmlNodePtr); uint32_t show_opts G_GNUC_UNUSED = va_arg(args, uint32_t); - const char *op_key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY); + const char *op_key = pe__xe_history_key(xml_op); + const char *op_key_name = "op_key"; int rc; int status; const char *exit_reason = crm_element_value(xml_op, XML_LRM_ATTR_EXIT_REASON); time_t epoch = 0; char *rc_s = NULL; char *reason_s = crm_xml_escape(exit_reason ? exit_reason : "none"); xmlNodePtr node = NULL; pcmk__scan_min_int(crm_element_value(xml_op, XML_LRM_ATTR_RC), &rc, 0); pcmk__scan_min_int(crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS), &status, 0); rc_s = pcmk__itoa(rc); + if (crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY) == NULL) { + op_key_name = "id"; + } node = pcmk__output_create_xml_node(out, "failure", - (op_key == NULL)? "id" : "op_key", - (op_key == NULL)? ID(xml_op) : op_key, + op_key_name, op_key, "node", crm_element_value(xml_op, XML_ATTR_UNAME), "exitstatus", services_ocf_exitcode_str(rc), "exitreason", pcmk__s(reason_s, ""), "exitcode", rc_s, "call", crm_element_value(xml_op, XML_LRM_ATTR_CALLID), "status", pcmk_exec_status_str(status), NULL); free(rc_s); if ((crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE, &epoch) == pcmk_ok) && (epoch > 0)) { guint interval_ms = 0; char *interval_ms_s = NULL; char *rc_change = pcmk__epoch2str(&epoch, crm_time_log_date |crm_time_log_timeofday |crm_time_log_with_timezone); crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); interval_ms_s = crm_strdup_printf("%u", interval_ms); pcmk__xe_set_props(node, XML_RSC_OP_LAST_CHANGE, rc_change, "queued", crm_element_value(xml_op, XML_RSC_OP_T_QUEUE), "exec", crm_element_value(xml_op, XML_RSC_OP_T_EXEC), "interval", interval_ms_s, "task", crm_element_value(xml_op, XML_LRM_ATTR_TASK), NULL); free(interval_ms_s); free(rc_change); } free(reason_s); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("failed-action-list", "pe_working_set_t *", "GList *", "GList *", "uint32_t", "bool") static int failed_action_list(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); uint32_t show_opts = va_arg(args, uint32_t); bool print_spacer = va_arg(args, int); xmlNode *xml_op = NULL; int rc = pcmk_rc_no_output; - const char *id = NULL; - if (xmlChildElementCount(data_set->failed) == 0) { return rc; } for (xml_op = pcmk__xml_first_child(data_set->failed); xml_op != NULL; xml_op = pcmk__xml_next(xml_op)) { char *rsc = NULL; if (!pcmk__str_in_list(crm_element_value(xml_op, XML_ATTR_UNAME), only_node, pcmk__str_star_matches|pcmk__str_casei)) { continue; } if (pcmk_xe_mask_probe_failure(xml_op)) { continue; } - id = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY); - if (!parse_op_key(id ? id : ID(xml_op), &rsc, NULL, NULL)) { + if (!parse_op_key(pe__xe_history_key(xml_op), &rsc, NULL, NULL)) { continue; } if (!pcmk__str_in_list(rsc, only_rsc, pcmk__str_star_matches)) { free(rsc); continue; } free(rsc); PCMK__OUTPUT_LIST_HEADER(out, print_spacer, rc, "Failed Resource Actions"); out->message(out, "failed-action", xml_op, show_opts); } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } static void status_node(pe_node_t *node, xmlNodePtr parent, uint32_t show_opts) { int health = pe__node_health(node); // Cluster membership if (node->details->online) { pcmk_create_html_node(parent, "span", NULL, "online", " online"); } else { pcmk_create_html_node(parent, "span", NULL, "offline", " OFFLINE"); } // Standby mode if (node->details->standby_onfail && (node->details->running_rsc != NULL)) { pcmk_create_html_node(parent, "span", NULL, "standby", " (in standby due to on-fail," " with active resources)"); } else if (node->details->standby_onfail) { pcmk_create_html_node(parent, "span", NULL, "standby", " (in standby due to on-fail)"); } else if (node->details->standby && (node->details->running_rsc != NULL)) { pcmk_create_html_node(parent, "span", NULL, "standby", " (in standby, with active resources)"); } else if (node->details->standby) { pcmk_create_html_node(parent, "span", NULL, "standby", " (in standby)"); } // Maintenance mode if (node->details->maintenance) { pcmk_create_html_node(parent, "span", NULL, "maint", " (in maintenance mode)"); } // Node health if (health < 0) { pcmk_create_html_node(parent, "span", NULL, "health_red", " (health is RED)"); } else if (health == 0) { pcmk_create_html_node(parent, "span", NULL, "health_yellow", " (health is YELLOW)"); } // Feature set if (pcmk_is_set(show_opts, pcmk_show_feature_set)) { const char *feature_set = get_node_feature_set(node); if (feature_set != NULL) { char *buf = crm_strdup_printf(", feature set %s", feature_set); pcmk_create_html_node(parent, "span", NULL, NULL, buf); free(buf); } } } PCMK__OUTPUT_ARGS("node", "pe_node_t *", "uint32_t", "bool", "GList *", "GList *") static int node_html(pcmk__output_t *out, va_list args) { pe_node_t *node = va_arg(args, pe_node_t *); uint32_t show_opts = va_arg(args, uint32_t); bool full = va_arg(args, int); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); char *node_name = pe__node_display_name(node, pcmk_is_set(show_opts, pcmk_show_node_id)); if (full) { xmlNodePtr item_node; if (pcmk_all_flags_set(show_opts, pcmk_show_brief | pcmk_show_rscs_by_node)) { GList *rscs = pe__filter_rsc_list(node->details->running_rsc, only_rsc); out->begin_list(out, NULL, NULL, "%s:", node_name); item_node = pcmk__output_xml_create_parent(out, "li", NULL); pcmk_create_html_node(item_node, "span", NULL, NULL, "Status:"); status_node(node, item_node, show_opts); if (rscs != NULL) { uint32_t new_show_opts = (show_opts | pcmk_show_rsc_only) & ~pcmk_show_inactive_rscs; out->begin_list(out, NULL, NULL, "Resources"); pe__rscs_brief_output(out, rscs, new_show_opts); out->end_list(out); } pcmk__output_xml_pop_parent(out); out->end_list(out); } else if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) { GList *lpc2 = NULL; int rc = pcmk_rc_no_output; out->begin_list(out, NULL, NULL, "%s:", node_name); item_node = pcmk__output_xml_create_parent(out, "li", NULL); pcmk_create_html_node(item_node, "span", NULL, NULL, "Status:"); status_node(node, item_node, show_opts); for (lpc2 = node->details->running_rsc; lpc2 != NULL; lpc2 = lpc2->next) { pe_resource_t *rsc = (pe_resource_t *) lpc2->data; PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Resources"); show_opts |= pcmk_show_rsc_only; out->message(out, crm_map_element_name(rsc->xml), show_opts, rsc, only_node, only_rsc); } PCMK__OUTPUT_LIST_FOOTER(out, rc); pcmk__output_xml_pop_parent(out); out->end_list(out); } else { char *buf = crm_strdup_printf("%s:", node_name); item_node = pcmk__output_create_xml_node(out, "li", NULL); pcmk_create_html_node(item_node, "span", NULL, "bold", buf); status_node(node, item_node, show_opts); free(buf); } } else { out->begin_list(out, NULL, NULL, "%s:", node_name); } free(node_name); return pcmk_rc_ok; } /*! * \internal * \brief Get a human-friendly textual description of a node's status * * \param[in] node Node to check * * \return String representation of node's status */ static const char * node_text_status(const pe_node_t *node) { if (node->details->unclean) { if (node->details->online) { return "UNCLEAN (online)"; } else if (node->details->pending) { return "UNCLEAN (pending)"; } else { return "UNCLEAN (offline)"; } } else if (node->details->pending) { return "pending"; } else if (node->details->standby_onfail && node->details->online) { return "standby (on-fail)"; } else if (node->details->standby) { if (node->details->online) { if (node->details->running_rsc) { return "standby (with active resources)"; } else { return "standby"; } } else { return "OFFLINE (standby)"; } } else if (node->details->maintenance) { if (node->details->online) { return "maintenance"; } else { return "OFFLINE (maintenance)"; } } else if (node->details->online) { return "online"; } return "OFFLINE"; } PCMK__OUTPUT_ARGS("node", "pe_node_t *", "uint32_t", "bool", "GList *", "GList *") static int node_text(pcmk__output_t *out, va_list args) { pe_node_t *node = va_arg(args, pe_node_t *); uint32_t show_opts = va_arg(args, uint32_t); bool full = va_arg(args, int); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); if (full) { char *node_name = pe__node_display_name(node, pcmk_is_set(show_opts, pcmk_show_node_id)); GString *str = g_string_sized_new(64); int health = pe__node_health(node); // Create a summary line with node type, name, and status if (pe__is_guest_node(node)) { g_string_append(str, "GuestNode"); } else if (pe__is_remote_node(node)) { g_string_append(str, "RemoteNode"); } else { g_string_append(str, "Node"); } pcmk__g_strcat(str, " ", node_name, ": ", node_text_status(node), NULL); if (health < 0) { g_string_append(str, " (health is RED)"); } else if (health == 0) { g_string_append(str, " (health is YELLOW)"); } if (pcmk_is_set(show_opts, pcmk_show_feature_set)) { const char *feature_set = get_node_feature_set(node); if (feature_set != NULL) { pcmk__g_strcat(str, ", feature set ", feature_set, NULL); } } /* If we're grouping by node, print its resources */ if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) { if (pcmk_is_set(show_opts, pcmk_show_brief)) { GList *rscs = pe__filter_rsc_list(node->details->running_rsc, only_rsc); if (rscs != NULL) { uint32_t new_show_opts = (show_opts | pcmk_show_rsc_only) & ~pcmk_show_inactive_rscs; out->begin_list(out, NULL, NULL, "%s", str->str); out->begin_list(out, NULL, NULL, "Resources"); pe__rscs_brief_output(out, rscs, new_show_opts); out->end_list(out); out->end_list(out); g_list_free(rscs); } } else { GList *gIter2 = NULL; out->begin_list(out, NULL, NULL, "%s", str->str); out->begin_list(out, NULL, NULL, "Resources"); for (gIter2 = node->details->running_rsc; gIter2 != NULL; gIter2 = gIter2->next) { pe_resource_t *rsc = (pe_resource_t *) gIter2->data; show_opts |= pcmk_show_rsc_only; out->message(out, crm_map_element_name(rsc->xml), show_opts, rsc, only_node, only_rsc); } out->end_list(out); out->end_list(out); } } else { out->list_item(out, NULL, "%s", str->str); } g_string_free(str, TRUE); free(node_name); } else { char *node_name = pe__node_display_name(node, pcmk_is_set(show_opts, pcmk_show_node_id)); out->begin_list(out, NULL, NULL, "Node: %s", node_name); free(node_name); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node", "pe_node_t *", "uint32_t", "bool", "GList *", "GList *") static int node_xml(pcmk__output_t *out, va_list args) { pe_node_t *node = va_arg(args, pe_node_t *); uint32_t show_opts G_GNUC_UNUSED = va_arg(args, uint32_t); bool full = va_arg(args, int); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); if (full) { const char *node_type = "unknown"; char *length_s = pcmk__itoa(g_list_length(node->details->running_rsc)); int health = pe__node_health(node); const char *health_s = NULL; const char *feature_set; switch (node->details->type) { case node_member: node_type = "member"; break; case node_remote: node_type = "remote"; break; case node_ping: node_type = "ping"; break; } if (health < 0) { health_s = "red"; } else if (health == 0) { health_s = "yellow"; } else { health_s = "green"; } feature_set = get_node_feature_set(node); pe__name_and_nvpairs_xml(out, true, "node", 15, "name", node->details->uname, "id", node->details->id, "online", pcmk__btoa(node->details->online), "standby", pcmk__btoa(node->details->standby), "standby_onfail", pcmk__btoa(node->details->standby_onfail), "maintenance", pcmk__btoa(node->details->maintenance), "pending", pcmk__btoa(node->details->pending), "unclean", pcmk__btoa(node->details->unclean), "health", health_s, "feature_set", feature_set, "shutdown", pcmk__btoa(node->details->shutdown), "expected_up", pcmk__btoa(node->details->expected_up), "is_dc", pcmk__btoa(node->details->is_dc), "resources_running", length_s, "type", node_type); if (pe__is_guest_node(node)) { xmlNodePtr xml_node = pcmk__output_xml_peek_parent(out); crm_xml_add(xml_node, "id_as_resource", node->details->remote_rsc->container->id); } if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) { GList *lpc = NULL; for (lpc = node->details->running_rsc; lpc != NULL; lpc = lpc->next) { pe_resource_t *rsc = (pe_resource_t *) lpc->data; show_opts |= pcmk_show_rsc_only; out->message(out, crm_map_element_name(rsc->xml), show_opts, rsc, only_node, only_rsc); } } free(length_s); out->end_list(out); } else { pcmk__output_xml_create_parent(out, "node", "name", node->details->uname, NULL); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-attribute", "const char *", "const char *", "bool", "int") static int node_attribute_text(pcmk__output_t *out, va_list args) { const char *name = va_arg(args, const char *); const char *value = va_arg(args, const char *); bool add_extra = va_arg(args, int); int expected_score = va_arg(args, int); if (add_extra) { int v; if (value == NULL) { v = 0; } else { pcmk__scan_min_int(value, &v, INT_MIN); } if (v <= 0) { out->list_item(out, NULL, "%-32s\t: %-10s\t: Connectivity is lost", name, value); } else if (v < expected_score) { out->list_item(out, NULL, "%-32s\t: %-10s\t: Connectivity is degraded (Expected=%d)", name, value, expected_score); } else { out->list_item(out, NULL, "%-32s\t: %-10s", name, value); } } else { out->list_item(out, NULL, "%-32s\t: %-10s", name, value); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-attribute", "const char *", "const char *", "bool", "int") static int node_attribute_html(pcmk__output_t *out, va_list args) { const char *name = va_arg(args, const char *); const char *value = va_arg(args, const char *); bool add_extra = va_arg(args, int); int expected_score = va_arg(args, int); if (add_extra) { int v; char *s = crm_strdup_printf("%s: %s", name, value); xmlNodePtr item_node = pcmk__output_create_xml_node(out, "li", NULL); if (value == NULL) { v = 0; } else { pcmk__scan_min_int(value, &v, INT_MIN); } pcmk_create_html_node(item_node, "span", NULL, NULL, s); free(s); if (v <= 0) { pcmk_create_html_node(item_node, "span", NULL, "bold", "(connectivity is lost)"); } else if (v < expected_score) { char *buf = crm_strdup_printf("(connectivity is degraded -- expected %d", expected_score); pcmk_create_html_node(item_node, "span", NULL, "bold", buf); free(buf); } } else { out->list_item(out, NULL, "%s: %s", name, value); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-and-op", "pe_working_set_t *", "xmlNodePtr") static int node_and_op(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); xmlNodePtr xml_op = va_arg(args, xmlNodePtr); pe_resource_t *rsc = NULL; gchar *node_str = NULL; char *last_change_str = NULL; const char *op_rsc = crm_element_value(xml_op, "resource"); - const char *op_key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY); int status; time_t last_change = 0; pcmk__scan_min_int(crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS), &status, PCMK_EXEC_UNKNOWN); rsc = pe_find_resource(data_set->resources, op_rsc); if (rsc) { const pe_node_t *node = pe__current_node(rsc); const char *target_role = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); uint32_t show_opts = pcmk_show_rsc_only | pcmk_show_pending; if (node == NULL) { node = rsc->pending_node; } node_str = pcmk__native_output_string(rsc, rsc_printable_id(rsc), node, show_opts, target_role, false); } else { node_str = crm_strdup_printf("Unknown resource %s", op_rsc); } if (crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE, &last_change) == pcmk_ok) { last_change_str = crm_strdup_printf(", %s='%s', exec=%sms", XML_RSC_OP_LAST_CHANGE, pcmk__trim(ctime(&last_change)), crm_element_value(xml_op, XML_RSC_OP_T_EXEC)); } out->list_item(out, NULL, "%s: %s (node=%s, call=%s, rc=%s%s): %s", - node_str, op_key ? op_key : ID(xml_op), + node_str, pe__xe_history_key(xml_op), crm_element_value(xml_op, XML_ATTR_UNAME), crm_element_value(xml_op, XML_LRM_ATTR_CALLID), crm_element_value(xml_op, XML_LRM_ATTR_RC), last_change_str ? last_change_str : "", pcmk_exec_status_str(status)); g_free(node_str); free(last_change_str); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-and-op", "pe_working_set_t *", "xmlNodePtr") static int node_and_op_xml(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); xmlNodePtr xml_op = va_arg(args, xmlNodePtr); pe_resource_t *rsc = NULL; const char *op_rsc = crm_element_value(xml_op, "resource"); - const char *op_key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY); int status; time_t last_change = 0; xmlNode *node = NULL; pcmk__scan_min_int(crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS), &status, PCMK_EXEC_UNKNOWN); node = pcmk__output_create_xml_node(out, "operation", - "op", op_key ? op_key : ID(xml_op), + "op", pe__xe_history_key(xml_op), "node", crm_element_value(xml_op, XML_ATTR_UNAME), "call", crm_element_value(xml_op, XML_LRM_ATTR_CALLID), "rc", crm_element_value(xml_op, XML_LRM_ATTR_RC), "status", pcmk_exec_status_str(status), NULL); rsc = pe_find_resource(data_set->resources, op_rsc); if (rsc) { const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); const char *kind = crm_element_value(rsc->xml, XML_ATTR_TYPE); char *agent_tuple = NULL; agent_tuple = crm_strdup_printf("%s:%s:%s", class, pcmk_is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_provider) ? crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER) : "", kind); pcmk__xe_set_props(node, "rsc", rsc_printable_id(rsc), "agent", agent_tuple, NULL); free(agent_tuple); } if (crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE, &last_change) == pcmk_ok) { pcmk__xe_set_props(node, XML_RSC_OP_LAST_CHANGE, pcmk__trim(ctime(&last_change)), XML_RSC_OP_T_EXEC, crm_element_value(xml_op, XML_RSC_OP_T_EXEC), NULL); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-attribute", "const char *", "const char *", "bool", "int") static int node_attribute_xml(pcmk__output_t *out, va_list args) { const char *name = va_arg(args, const char *); const char *value = va_arg(args, const char *); bool add_extra = va_arg(args, int); int expected_score = va_arg(args, int); xmlNodePtr node = pcmk__output_create_xml_node(out, "attribute", "name", name, "value", value, NULL); if (add_extra) { char *buf = pcmk__itoa(expected_score); crm_xml_add(node, "expected", buf); free(buf); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-attribute-list", "pe_working_set_t *", "uint32_t", "bool", "GList *", "GList *") static int node_attribute_list(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); uint32_t show_opts = va_arg(args, uint32_t); bool print_spacer = va_arg(args, int); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); int rc = pcmk_rc_no_output; /* Display each node's attributes */ for (GList *gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = gIter->data; GList *attr_list = NULL; GHashTableIter iter; gpointer key; if (!node || !node->details || !node->details->online) { continue; } g_hash_table_iter_init(&iter, node->details->attrs); while (g_hash_table_iter_next (&iter, &key, NULL)) { attr_list = filter_attr_list(attr_list, key); } if (attr_list == NULL) { continue; } if (!pcmk__str_in_list(node->details->uname, only_node, pcmk__str_star_matches|pcmk__str_casei)) { g_list_free(attr_list); continue; } PCMK__OUTPUT_LIST_HEADER(out, print_spacer, rc, "Node Attributes"); out->message(out, "node", node, show_opts, false, only_node, only_rsc); for (GList *aIter = attr_list; aIter != NULL; aIter = aIter->next) { const char *name = aIter->data; const char *value = NULL; int expected_score = 0; bool add_extra = false; value = pe_node_attribute_raw(node, name); add_extra = add_extra_info(node, node->details->running_rsc, data_set, name, &expected_score); /* Print attribute name and value */ out->message(out, "node-attribute", name, value, add_extra, expected_score); } g_list_free(attr_list); out->end_list(out); } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } PCMK__OUTPUT_ARGS("node-capacity", "const pe_node_t *", "const char *") static int node_capacity(pcmk__output_t *out, va_list args) { const pe_node_t *node = va_arg(args, pe_node_t *); const char *comment = va_arg(args, const char *); char *dump_text = crm_strdup_printf("%s: %s capacity:", comment, pe__node_name(node)); g_hash_table_foreach(node->details->utilization, append_dump_text, &dump_text); out->list_item(out, NULL, "%s", dump_text); free(dump_text); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-capacity", "const pe_node_t *", "const char *") static int node_capacity_xml(pcmk__output_t *out, va_list args) { const pe_node_t *node = va_arg(args, pe_node_t *); const char *comment = va_arg(args, const char *); xmlNodePtr xml_node = pcmk__output_create_xml_node(out, "capacity", "node", node->details->uname, "comment", comment, NULL); g_hash_table_foreach(node->details->utilization, add_dump_node, xml_node); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-history-list", "pe_working_set_t *", "pe_node_t *", "xmlNodePtr", "GList *", "GList *", "uint32_t", "uint32_t") static int node_history_list(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); pe_node_t *node = va_arg(args, pe_node_t *); xmlNode *node_state = va_arg(args, xmlNode *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); uint32_t section_opts = va_arg(args, uint32_t); uint32_t show_opts = va_arg(args, uint32_t); xmlNode *lrm_rsc = NULL; xmlNode *rsc_entry = NULL; int rc = pcmk_rc_no_output; lrm_rsc = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE); /* Print history of each of the node's resources */ for (rsc_entry = first_named_child(lrm_rsc, XML_LRM_TAG_RESOURCE); rsc_entry != NULL; rsc_entry = crm_next_same_xml(rsc_entry)) { const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); pe_resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); const pe_resource_t *parent = pe__const_top_resource(rsc, false); /* We can't use is_filtered here to filter group resources. For is_filtered, * we have to decide whether to check the parent or not. If we check the * parent, all elements of a group will always be printed because that's how * is_filtered works for groups. If we do not check the parent, sometimes * this will filter everything out. * * For other resource types, is_filtered is okay. */ if (parent->variant == pe_group) { if (!pcmk__str_in_list(rsc_printable_id(rsc), only_rsc, pcmk__str_star_matches) && !pcmk__str_in_list(rsc_printable_id(parent), only_rsc, pcmk__str_star_matches)) { continue; } } else { if (rsc->fns->is_filtered(rsc, only_rsc, TRUE)) { continue; } } if (!pcmk_is_set(section_opts, pcmk_section_operations)) { time_t last_failure = 0; int failcount = pe_get_failcount(node, rsc, &last_failure, pe_fc_default, NULL); if (failcount <= 0) { continue; } if (rc == pcmk_rc_no_output) { rc = pcmk_rc_ok; out->message(out, "node", node, show_opts, false, only_node, only_rsc); } out->message(out, "resource-history", rsc, rsc_id, false, failcount, last_failure, false); } else { GList *op_list = get_operation_list(rsc_entry); pe_resource_t *rsc = pe_find_resource(data_set->resources, crm_element_value(rsc_entry, XML_ATTR_ID)); if (op_list == NULL) { continue; } if (rc == pcmk_rc_no_output) { rc = pcmk_rc_ok; out->message(out, "node", node, show_opts, false, only_node, only_rsc); } out->message(out, "resource-operation-list", data_set, rsc, node, op_list, show_opts); } } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } PCMK__OUTPUT_ARGS("node-list", "GList *", "GList *", "GList *", "uint32_t", "bool") static int node_list_html(pcmk__output_t *out, va_list args) { GList *nodes = va_arg(args, GList *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); uint32_t show_opts = va_arg(args, uint32_t); bool print_spacer G_GNUC_UNUSED = va_arg(args, int); int rc = pcmk_rc_no_output; for (GList *gIter = nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; if (!pcmk__str_in_list(node->details->uname, only_node, pcmk__str_star_matches|pcmk__str_casei)) { continue; } PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Node List"); out->message(out, "node", node, show_opts, true, only_node, only_rsc); } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } PCMK__OUTPUT_ARGS("node-list", "GList *", "GList *", "GList *", "uint32_t", "bool") static int node_list_text(pcmk__output_t *out, va_list args) { GList *nodes = va_arg(args, GList *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); uint32_t show_opts = va_arg(args, uint32_t); bool print_spacer = va_arg(args, int); /* space-separated lists of node names */ GString *online_nodes = NULL; GString *online_remote_nodes = NULL; GString *online_guest_nodes = NULL; GString *offline_nodes = NULL; GString *offline_remote_nodes = NULL; int rc = pcmk_rc_no_output; for (GList *gIter = nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; char *node_name = pe__node_display_name(node, pcmk_is_set(show_opts, pcmk_show_node_id)); if (!pcmk__str_in_list(node->details->uname, only_node, pcmk__str_star_matches|pcmk__str_casei)) { free(node_name); continue; } PCMK__OUTPUT_LIST_HEADER(out, print_spacer, rc, "Node List"); // Determine whether to display node individually or in a list if (node->details->unclean || node->details->pending || (node->details->standby_onfail && node->details->online) || node->details->standby || node->details->maintenance || pcmk_is_set(show_opts, pcmk_show_rscs_by_node) || pcmk_is_set(show_opts, pcmk_show_feature_set) || (pe__node_health(node) <= 0)) { // Display node individually } else if (node->details->online) { // Display online node in a list if (pe__is_guest_node(node)) { pcmk__add_word(&online_guest_nodes, 1024, node_name); } else if (pe__is_remote_node(node)) { pcmk__add_word(&online_remote_nodes, 1024, node_name); } else { pcmk__add_word(&online_nodes, 1024, node_name); } free(node_name); continue; } else { // Display offline node in a list if (pe__is_remote_node(node)) { pcmk__add_word(&offline_remote_nodes, 1024, node_name); } else if (pe__is_guest_node(node)) { /* ignore offline guest nodes */ } else { pcmk__add_word(&offline_nodes, 1024, node_name); } free(node_name); continue; } /* If we get here, node is in bad state, or we're grouping by node */ out->message(out, "node", node, show_opts, true, only_node, only_rsc); free(node_name); } /* If we're not grouping by node, summarize nodes by status */ if (online_nodes != NULL) { out->list_item(out, "Online", "[ %s ]", (const char *) online_nodes->str); g_string_free(online_nodes, TRUE); } if (offline_nodes != NULL) { out->list_item(out, "OFFLINE", "[ %s ]", (const char *) offline_nodes->str); g_string_free(offline_nodes, TRUE); } if (online_remote_nodes) { out->list_item(out, "RemoteOnline", "[ %s ]", (const char *) online_remote_nodes->str); g_string_free(online_remote_nodes, TRUE); } if (offline_remote_nodes) { out->list_item(out, "RemoteOFFLINE", "[ %s ]", (const char *) offline_remote_nodes->str); g_string_free(offline_remote_nodes, TRUE); } if (online_guest_nodes != NULL) { out->list_item(out, "GuestOnline", "[ %s ]", (const char *) online_guest_nodes->str); g_string_free(online_guest_nodes, TRUE); } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } PCMK__OUTPUT_ARGS("node-list", "GList *", "GList *", "GList *", "uint32_t", "bool") static int node_list_xml(pcmk__output_t *out, va_list args) { GList *nodes = va_arg(args, GList *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); uint32_t show_opts = va_arg(args, uint32_t); bool print_spacer G_GNUC_UNUSED = va_arg(args, int); out->begin_list(out, NULL, NULL, "nodes"); for (GList *gIter = nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; if (!pcmk__str_in_list(node->details->uname, only_node, pcmk__str_star_matches|pcmk__str_casei)) { continue; } out->message(out, "node", node, show_opts, true, only_node, only_rsc); } out->end_list(out); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-summary", "pe_working_set_t *", "GList *", "GList *", "uint32_t", "uint32_t", "bool") static int node_summary(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); uint32_t section_opts = va_arg(args, uint32_t); uint32_t show_opts = va_arg(args, uint32_t); bool print_spacer = va_arg(args, int); xmlNode *node_state = NULL; xmlNode *cib_status = pcmk_find_cib_element(data_set->input, XML_CIB_TAG_STATUS); int rc = pcmk_rc_no_output; if (xmlChildElementCount(cib_status) == 0) { return rc; } for (node_state = first_named_child(cib_status, XML_CIB_TAG_STATE); node_state != NULL; node_state = crm_next_same_xml(node_state)) { pe_node_t *node = pe_find_node_id(data_set->nodes, ID(node_state)); if (!node || !node->details || !node->details->online) { continue; } if (!pcmk__str_in_list(node->details->uname, only_node, pcmk__str_star_matches|pcmk__str_casei)) { continue; } PCMK__OUTPUT_LIST_HEADER(out, print_spacer, rc, pcmk_is_set(section_opts, pcmk_section_operations) ? "Operations" : "Migration Summary"); out->message(out, "node-history-list", data_set, node, node_state, only_node, only_rsc, section_opts, show_opts); } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } PCMK__OUTPUT_ARGS("node-weight", "const pe_resource_t *", "const char *", "const char *", "const char *") static int node_weight(pcmk__output_t *out, va_list args) { const pe_resource_t *rsc = va_arg(args, const pe_resource_t *); const char *prefix = va_arg(args, const char *); const char *uname = va_arg(args, const char *); const char *score = va_arg(args, const char *); if (rsc) { out->list_item(out, NULL, "%s: %s allocation score on %s: %s", prefix, rsc->id, uname, score); } else { out->list_item(out, NULL, "%s: %s = %s", prefix, uname, score); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-weight", "const pe_resource_t *", "const char *", "const char *", "const char *") static int node_weight_xml(pcmk__output_t *out, va_list args) { const pe_resource_t *rsc = va_arg(args, const pe_resource_t *); const char *prefix = va_arg(args, const char *); const char *uname = va_arg(args, const char *); const char *score = va_arg(args, const char *); xmlNodePtr node = pcmk__output_create_xml_node(out, "node_weight", "function", prefix, "node", uname, "score", score, NULL); if (rsc) { crm_xml_add(node, "id", rsc->id); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("op-history", "xmlNodePtr", "const char *", "const char *", "int", "uint32_t") static int op_history_text(pcmk__output_t *out, va_list args) { xmlNodePtr xml_op = va_arg(args, xmlNodePtr); const char *task = va_arg(args, const char *); const char *interval_ms_s = va_arg(args, const char *); int rc = va_arg(args, int); uint32_t show_opts = va_arg(args, uint32_t); char *buf = op_history_string(xml_op, task, interval_ms_s, rc, pcmk_is_set(show_opts, pcmk_show_timing)); out->list_item(out, NULL, "%s", buf); free(buf); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("op-history", "xmlNodePtr", "const char *", "const char *", "int", "uint32_t") static int op_history_xml(pcmk__output_t *out, va_list args) { xmlNodePtr xml_op = va_arg(args, xmlNodePtr); const char *task = va_arg(args, const char *); const char *interval_ms_s = va_arg(args, const char *); int rc = va_arg(args, int); uint32_t show_opts = va_arg(args, uint32_t); char *rc_s = pcmk__itoa(rc); xmlNodePtr node = pcmk__output_create_xml_node(out, "operation_history", "call", crm_element_value(xml_op, XML_LRM_ATTR_CALLID), "task", task, "rc", rc_s, "rc_text", services_ocf_exitcode_str(rc), NULL); free(rc_s); if (interval_ms_s && !pcmk__str_eq(interval_ms_s, "0", pcmk__str_casei)) { char *s = crm_strdup_printf("%sms", interval_ms_s); crm_xml_add(node, "interval", s); free(s); } if (pcmk_is_set(show_opts, pcmk_show_timing)) { const char *value = NULL; time_t epoch = 0; if ((crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE, &epoch) == pcmk_ok) && (epoch > 0)) { char *s = pcmk__epoch2str(&epoch, 0); crm_xml_add(node, XML_RSC_OP_LAST_CHANGE, s); free(s); } value = crm_element_value(xml_op, XML_RSC_OP_T_EXEC); if (value) { char *s = crm_strdup_printf("%sms", value); crm_xml_add(node, XML_RSC_OP_T_EXEC, s); free(s); } value = crm_element_value(xml_op, XML_RSC_OP_T_QUEUE); if (value) { char *s = crm_strdup_printf("%sms", value); crm_xml_add(node, XML_RSC_OP_T_QUEUE, s); free(s); } } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("promotion-score", "pe_resource_t *", "pe_node_t *", "const char *") static int promotion_score(pcmk__output_t *out, va_list args) { pe_resource_t *child_rsc = va_arg(args, pe_resource_t *); pe_node_t *chosen = va_arg(args, pe_node_t *); const char *score = va_arg(args, const char *); out->list_item(out, NULL, "%s promotion score on %s: %s", child_rsc->id, chosen? chosen->details->uname : "none", score); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("promotion-score", "pe_resource_t *", "pe_node_t *", "const char *") static int promotion_score_xml(pcmk__output_t *out, va_list args) { pe_resource_t *child_rsc = va_arg(args, pe_resource_t *); pe_node_t *chosen = va_arg(args, pe_node_t *); const char *score = va_arg(args, const char *); xmlNodePtr node = pcmk__output_create_xml_node(out, "promotion_score", "id", child_rsc->id, "score", score, NULL); if (chosen) { crm_xml_add(node, "node", chosen->details->uname); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("resource-config", "pe_resource_t *", "bool") static int resource_config(pcmk__output_t *out, va_list args) { pe_resource_t *rsc = va_arg(args, pe_resource_t *); bool raw = va_arg(args, int); char *rsc_xml = formatted_xml_buf(rsc, raw); out->output_xml(out, "xml", rsc_xml); free(rsc_xml); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("resource-config", "pe_resource_t *", "bool") static int resource_config_text(pcmk__output_t *out, va_list args) { pe_resource_t *rsc = va_arg(args, pe_resource_t *); bool raw = va_arg(args, int); char *rsc_xml = formatted_xml_buf(rsc, raw); pcmk__formatted_printf(out, "Resource XML:\n"); out->output_xml(out, "xml", rsc_xml); free(rsc_xml); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("resource-history", "pe_resource_t *", "const char *", "bool", "int", "time_t", "bool") static int resource_history_text(pcmk__output_t *out, va_list args) { pe_resource_t *rsc = va_arg(args, pe_resource_t *); const char *rsc_id = va_arg(args, const char *); bool all = va_arg(args, int); int failcount = va_arg(args, int); time_t last_failure = va_arg(args, time_t); bool as_header = va_arg(args, int); char *buf = resource_history_string(rsc, rsc_id, all, failcount, last_failure); if (as_header) { out->begin_list(out, NULL, NULL, "%s", buf); } else { out->list_item(out, NULL, "%s", buf); } free(buf); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("resource-history", "pe_resource_t *", "const char *", "bool", "int", "time_t", "bool") static int resource_history_xml(pcmk__output_t *out, va_list args) { pe_resource_t *rsc = va_arg(args, pe_resource_t *); const char *rsc_id = va_arg(args, const char *); bool all = va_arg(args, int); int failcount = va_arg(args, int); time_t last_failure = va_arg(args, time_t); bool as_header = va_arg(args, int); xmlNodePtr node = pcmk__output_xml_create_parent(out, "resource_history", "id", rsc_id, NULL); if (rsc == NULL) { pcmk__xe_set_bool_attr(node, "orphan", true); } else if (all || failcount || last_failure > 0) { char *migration_s = pcmk__itoa(rsc->migration_threshold); pcmk__xe_set_props(node, "orphan", "false", "migration-threshold", migration_s, NULL); free(migration_s); if (failcount > 0) { char *s = pcmk__itoa(failcount); crm_xml_add(node, PCMK__FAIL_COUNT_PREFIX, s); free(s); } if (last_failure > 0) { char *s = pcmk__epoch2str(&last_failure, 0); crm_xml_add(node, PCMK__LAST_FAILURE_PREFIX, s); free(s); } } if (!as_header) { pcmk__output_xml_pop_parent(out); } return pcmk_rc_ok; } static void print_resource_header(pcmk__output_t *out, uint32_t show_opts) { if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) { /* Active resources have already been printed by node */ out->begin_list(out, NULL, NULL, "Inactive Resources"); } else if (pcmk_is_set(show_opts, pcmk_show_inactive_rscs)) { out->begin_list(out, NULL, NULL, "Full List of Resources"); } else { out->begin_list(out, NULL, NULL, "Active Resources"); } } PCMK__OUTPUT_ARGS("resource-list", "pe_working_set_t *", "uint32_t", "bool", "GList *", "GList *", "bool") static int resource_list(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); uint32_t show_opts = va_arg(args, uint32_t); bool print_summary = va_arg(args, int); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); bool print_spacer = va_arg(args, int); GList *rsc_iter; int rc = pcmk_rc_no_output; bool printed_header = false; /* If we already showed active resources by node, and * we're not showing inactive resources, we have nothing to do */ if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node) && !pcmk_is_set(show_opts, pcmk_show_inactive_rscs)) { return rc; } /* If we haven't already printed resources grouped by node, * and brief output was requested, print resource summary */ if (pcmk_is_set(show_opts, pcmk_show_brief) && !pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) { GList *rscs = pe__filter_rsc_list(data_set->resources, only_rsc); PCMK__OUTPUT_SPACER_IF(out, print_spacer); print_resource_header(out, show_opts); printed_header = true; rc = pe__rscs_brief_output(out, rscs, show_opts); g_list_free(rscs); } /* For each resource, display it if appropriate */ for (rsc_iter = data_set->resources; rsc_iter != NULL; rsc_iter = rsc_iter->next) { pe_resource_t *rsc = (pe_resource_t *) rsc_iter->data; int x; /* Complex resources may have some sub-resources active and some inactive */ gboolean is_active = rsc->fns->active(rsc, TRUE); gboolean partially_active = rsc->fns->active(rsc, FALSE); /* Skip inactive orphans (deleted but still in CIB) */ if (pcmk_is_set(rsc->flags, pe_rsc_orphan) && !is_active) { continue; /* Skip active resources if we already displayed them by node */ } else if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) { if (is_active) { continue; } /* Skip primitives already counted in a brief summary */ } else if (pcmk_is_set(show_opts, pcmk_show_brief) && (rsc->variant == pe_native)) { continue; /* Skip resources that aren't at least partially active, * unless we're displaying inactive resources */ } else if (!partially_active && !pcmk_is_set(show_opts, pcmk_show_inactive_rscs)) { continue; } else if (partially_active && !pe__rsc_running_on_any(rsc, only_node)) { continue; } if (!printed_header) { PCMK__OUTPUT_SPACER_IF(out, print_spacer); print_resource_header(out, show_opts); printed_header = true; } /* Print this resource */ x = out->message(out, crm_map_element_name(rsc->xml), show_opts, rsc, only_node, only_rsc); if (x == pcmk_rc_ok) { rc = pcmk_rc_ok; } } if (print_summary && rc != pcmk_rc_ok) { if (!printed_header) { PCMK__OUTPUT_SPACER_IF(out, print_spacer); print_resource_header(out, show_opts); printed_header = true; } if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) { out->list_item(out, NULL, "No inactive resources"); } else if (pcmk_is_set(show_opts, pcmk_show_inactive_rscs)) { out->list_item(out, NULL, "No resources"); } else { out->list_item(out, NULL, "No active resources"); } } if (printed_header) { out->end_list(out); } return rc; } PCMK__OUTPUT_ARGS("resource-operation-list", "pe_working_set_t *", "pe_resource_t *", "pe_node_t *", "GList *", "uint32_t") static int resource_operation_list(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set G_GNUC_UNUSED = va_arg(args, pe_working_set_t *); pe_resource_t *rsc = va_arg(args, pe_resource_t *); pe_node_t *node = va_arg(args, pe_node_t *); GList *op_list = va_arg(args, GList *); uint32_t show_opts = va_arg(args, uint32_t); GList *gIter = NULL; int rc = pcmk_rc_no_output; /* Print each operation */ for (gIter = op_list; gIter != NULL; gIter = gIter->next) { xmlNode *xml_op = (xmlNode *) gIter->data; const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *interval_ms_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL_MS); const char *op_rc = crm_element_value(xml_op, XML_LRM_ATTR_RC); int op_rc_i; pcmk__scan_min_int(op_rc, &op_rc_i, 0); /* Display 0-interval monitors as "probe" */ if (pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei) && pcmk__str_eq(interval_ms_s, "0", pcmk__str_null_matches | pcmk__str_casei)) { task = "probe"; } /* If this is the first printed operation, print heading for resource */ if (rc == pcmk_rc_no_output) { time_t last_failure = 0; int failcount = pe_get_failcount(node, rsc, &last_failure, pe_fc_default, NULL); out->message(out, "resource-history", rsc, rsc_printable_id(rsc), true, failcount, last_failure, true); rc = pcmk_rc_ok; } /* Print the operation */ out->message(out, "op-history", xml_op, task, interval_ms_s, op_rc_i, show_opts); } /* Free the list we created (no need to free the individual items) */ g_list_free(op_list); PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } PCMK__OUTPUT_ARGS("resource-util", "pe_resource_t *", "pe_node_t *", "const char *") static int resource_util(pcmk__output_t *out, va_list args) { pe_resource_t *rsc = va_arg(args, pe_resource_t *); pe_node_t *node = va_arg(args, pe_node_t *); const char *fn = va_arg(args, const char *); char *dump_text = crm_strdup_printf("%s: %s utilization on %s:", fn, rsc->id, pe__node_name(node)); g_hash_table_foreach(rsc->utilization, append_dump_text, &dump_text); out->list_item(out, NULL, "%s", dump_text); free(dump_text); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("resource-util", "pe_resource_t *", "pe_node_t *", "const char *") static int resource_util_xml(pcmk__output_t *out, va_list args) { pe_resource_t *rsc = va_arg(args, pe_resource_t *); pe_node_t *node = va_arg(args, pe_node_t *); const char *fn = va_arg(args, const char *); xmlNodePtr xml_node = pcmk__output_create_xml_node(out, "utilization", "resource", rsc->id, "node", node->details->uname, "function", fn, NULL); g_hash_table_foreach(rsc->utilization, add_dump_node, xml_node); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("ticket", "pe_ticket_t *") static int ticket_html(pcmk__output_t *out, va_list args) { pe_ticket_t *ticket = va_arg(args, pe_ticket_t *); if (ticket->last_granted > -1) { char *epoch_str = pcmk__epoch2str(&(ticket->last_granted), 0); out->list_item(out, NULL, "%s:\t%s%s %s=\"%s\"", ticket->id, ticket->granted ? "granted" : "revoked", ticket->standby ? " [standby]" : "", "last-granted", pcmk__s(epoch_str, "")); free(epoch_str); } else { out->list_item(out, NULL, "%s:\t%s%s", ticket->id, ticket->granted ? "granted" : "revoked", ticket->standby ? " [standby]" : ""); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("ticket", "pe_ticket_t *") static int ticket_text(pcmk__output_t *out, va_list args) { pe_ticket_t *ticket = va_arg(args, pe_ticket_t *); if (ticket->last_granted > -1) { char *epoch_str = pcmk__epoch2str(&(ticket->last_granted), 0); out->list_item(out, ticket->id, "%s%s %s=\"%s\"", ticket->granted ? "granted" : "revoked", ticket->standby ? " [standby]" : "", "last-granted", pcmk__s(epoch_str, "")); free(epoch_str); } else { out->list_item(out, ticket->id, "%s%s", ticket->granted ? "granted" : "revoked", ticket->standby ? " [standby]" : ""); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("ticket", "pe_ticket_t *") static int ticket_xml(pcmk__output_t *out, va_list args) { pe_ticket_t *ticket = va_arg(args, pe_ticket_t *); xmlNodePtr node = NULL; node = pcmk__output_create_xml_node(out, "ticket", "id", ticket->id, "status", ticket->granted ? "granted" : "revoked", "standby", pcmk__btoa(ticket->standby), NULL); if (ticket->last_granted > -1) { char *buf = pcmk__epoch2str(&ticket->last_granted, 0); crm_xml_add(node, "last-granted", buf); free(buf); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("ticket-list", "pe_working_set_t *", "bool") static int ticket_list(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); bool print_spacer = va_arg(args, int); GHashTableIter iter; gpointer key, value; if (g_hash_table_size(data_set->tickets) == 0) { return pcmk_rc_no_output; } PCMK__OUTPUT_SPACER_IF(out, print_spacer); /* Print section heading */ out->begin_list(out, NULL, NULL, "Tickets"); /* Print each ticket */ g_hash_table_iter_init(&iter, data_set->tickets); while (g_hash_table_iter_next(&iter, &key, &value)) { pe_ticket_t *ticket = (pe_ticket_t *) value; out->message(out, "ticket", ticket); } /* Close section */ out->end_list(out); return pcmk_rc_ok; } static pcmk__message_entry_t fmt_functions[] = { { "ban", "default", ban_text }, { "ban", "html", ban_html }, { "ban", "xml", ban_xml }, { "ban-list", "default", ban_list }, { "bundle", "default", pe__bundle_text }, { "bundle", "xml", pe__bundle_xml }, { "bundle", "html", pe__bundle_html }, { "clone", "default", pe__clone_default }, { "clone", "xml", pe__clone_xml }, { "cluster-counts", "default", cluster_counts_text }, { "cluster-counts", "html", cluster_counts_html }, { "cluster-counts", "xml", cluster_counts_xml }, { "cluster-dc", "default", cluster_dc_text }, { "cluster-dc", "html", cluster_dc_html }, { "cluster-dc", "xml", cluster_dc_xml }, { "cluster-options", "default", cluster_options_text }, { "cluster-options", "html", cluster_options_html }, { "cluster-options", "log", cluster_options_log }, { "cluster-options", "xml", cluster_options_xml }, { "cluster-summary", "default", cluster_summary }, { "cluster-summary", "html", cluster_summary_html }, { "cluster-stack", "default", cluster_stack_text }, { "cluster-stack", "html", cluster_stack_html }, { "cluster-stack", "xml", cluster_stack_xml }, { "cluster-times", "default", cluster_times_text }, { "cluster-times", "html", cluster_times_html }, { "cluster-times", "xml", cluster_times_xml }, { "failed-action", "default", failed_action_default }, { "failed-action", "xml", failed_action_xml }, { "failed-action-list", "default", failed_action_list }, { "group", "default", pe__group_default}, { "group", "xml", pe__group_xml }, { "maint-mode", "text", cluster_maint_mode_text }, { "node", "default", node_text }, { "node", "html", node_html }, { "node", "xml", node_xml }, { "node-and-op", "default", node_and_op }, { "node-and-op", "xml", node_and_op_xml }, { "node-capacity", "default", node_capacity }, { "node-capacity", "xml", node_capacity_xml }, { "node-history-list", "default", node_history_list }, { "node-list", "default", node_list_text }, { "node-list", "html", node_list_html }, { "node-list", "xml", node_list_xml }, { "node-weight", "default", node_weight }, { "node-weight", "xml", node_weight_xml }, { "node-attribute", "default", node_attribute_text }, { "node-attribute", "html", node_attribute_html }, { "node-attribute", "xml", node_attribute_xml }, { "node-attribute-list", "default", node_attribute_list }, { "node-summary", "default", node_summary }, { "op-history", "default", op_history_text }, { "op-history", "xml", op_history_xml }, { "primitive", "default", pe__resource_text }, { "primitive", "xml", pe__resource_xml }, { "primitive", "html", pe__resource_html }, { "promotion-score", "default", promotion_score }, { "promotion-score", "xml", promotion_score_xml }, { "resource-config", "default", resource_config }, { "resource-config", "text", resource_config_text }, { "resource-history", "default", resource_history_text }, { "resource-history", "xml", resource_history_xml }, { "resource-list", "default", resource_list }, { "resource-operation-list", "default", resource_operation_list }, { "resource-util", "default", resource_util }, { "resource-util", "xml", resource_util_xml }, { "ticket", "default", ticket_text }, { "ticket", "html", ticket_html }, { "ticket", "xml", ticket_xml }, { "ticket-list", "default", ticket_list }, { NULL, NULL, NULL } }; void pe__register_messages(pcmk__output_t *out) { pcmk__register_messages(out, fmt_functions); } diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c index d2633225ce..aa1662b58d 100644 --- a/lib/pengine/unpack.c +++ b/lib/pengine/unpack.c @@ -1,4529 +1,4754 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include CRM_TRACE_INIT_DATA(pe_status); +// A (parsed) resource action history entry +struct action_history { + pe_resource_t *rsc; // Resource that history is for + pe_node_t *node; // Node that history is for + xmlNode *xml; // History entry XML + + // Parsed from entry XML + const char *id; // XML ID of history entry + const char *key; // Operation key of action + const char *task; // Action name + const char *exit_reason; // Exit reason given for result + guint interval_ms; // Action interval + int call_id; // Call ID of action + int expected_exit_status; // Expected exit status of action + int exit_status; // Actual exit status of action + int execution_status; // Execution status of action +}; + /* This uses pcmk__set_flags_as()/pcmk__clear_flags_as() directly rather than * use pe__set_working_set_flags()/pe__clear_working_set_flags() so that the * flag is stringified more readably in log messages. */ #define set_config_flag(data_set, option, flag) do { \ const char *scf_value = pe_pref((data_set)->config_hash, (option)); \ if (scf_value != NULL) { \ if (crm_is_true(scf_value)) { \ (data_set)->flags = pcmk__set_flags_as(__func__, __LINE__, \ LOG_TRACE, "Working set", \ crm_system_name, (data_set)->flags, \ (flag), #flag); \ } else { \ (data_set)->flags = pcmk__clear_flags_as(__func__, __LINE__,\ LOG_TRACE, "Working set", \ crm_system_name, (data_set)->flags, \ (flag), #flag); \ } \ } \ } while(0) static void unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, xmlNode **last_failure, - enum action_fail_response *failed, - pe_working_set_t *data_set); + enum action_fail_response *failed); static void determine_remote_online_status(pe_working_set_t *data_set, pe_node_t *this_node); static void add_node_attrs(const xmlNode *xml_obj, pe_node_t *node, bool overwrite, pe_working_set_t *data_set); static void determine_online_status(const xmlNode *node_state, pe_node_t *this_node, pe_working_set_t *data_set); static void unpack_node_lrm(pe_node_t *node, const xmlNode *xml, pe_working_set_t *data_set); // Bitmask for warnings we only want to print once uint32_t pe_wo = 0; static gboolean is_dangling_guest_node(pe_node_t *node) { /* we are looking for a remote-node that was supposed to be mapped to a * container resource, but all traces of that container have disappeared * from both the config and the status section. */ if (pe__is_guest_or_remote_node(node) && node->details->remote_rsc && node->details->remote_rsc->container == NULL && pcmk_is_set(node->details->remote_rsc->flags, pe_rsc_orphan_container_filler)) { return TRUE; } return FALSE; } /*! * \brief Schedule a fence action for a node * * \param[in,out] data_set Current working set of cluster * \param[in,out] node Node to fence * \param[in] reason Text description of why fencing is needed * \param[in] priority_delay Whether to consider `priority-fencing-delay` */ void pe_fence_node(pe_working_set_t * data_set, pe_node_t * node, const char *reason, bool priority_delay) { CRM_CHECK(node, return); /* A guest node is fenced by marking its container as failed */ if (pe__is_guest_node(node)) { pe_resource_t *rsc = node->details->remote_rsc->container; if (!pcmk_is_set(rsc->flags, pe_rsc_failed)) { if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) { crm_notice("Not fencing guest node %s " "(otherwise would because %s): " "its guest resource %s is unmanaged", pe__node_name(node), reason, rsc->id); } else { crm_warn("Guest node %s will be fenced " "(by recovering its guest resource %s): %s", pe__node_name(node), rsc->id, reason); /* We don't mark the node as unclean because that would prevent the * node from running resources. We want to allow it to run resources * in this transition if the recovery succeeds. */ node->details->remote_requires_reset = TRUE; pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); } } } else if (is_dangling_guest_node(node)) { crm_info("Cleaning up dangling connection for guest node %s: " "fencing was already done because %s, " "and guest resource no longer exists", pe__node_name(node), reason); pe__set_resource_flags(node->details->remote_rsc, pe_rsc_failed|pe_rsc_stop); } else if (pe__is_remote_node(node)) { pe_resource_t *rsc = node->details->remote_rsc; if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pe_rsc_managed)) { crm_notice("Not fencing remote node %s " "(otherwise would because %s): connection is unmanaged", pe__node_name(node), reason); } else if(node->details->remote_requires_reset == FALSE) { node->details->remote_requires_reset = TRUE; crm_warn("Remote node %s %s: %s", pe__node_name(node), pe_can_fence(data_set, node)? "will be fenced" : "is unclean", reason); } node->details->unclean = TRUE; // No need to apply `priority-fencing-delay` for remote nodes pe_fence_op(node, NULL, TRUE, reason, FALSE, data_set); } else if (node->details->unclean) { crm_trace("Cluster node %s %s because %s", pe__node_name(node), pe_can_fence(data_set, node)? "would also be fenced" : "also is unclean", reason); } else { crm_warn("Cluster node %s %s: %s", pe__node_name(node), pe_can_fence(data_set, node)? "will be fenced" : "is unclean", reason); node->details->unclean = TRUE; pe_fence_op(node, NULL, TRUE, reason, priority_delay, data_set); } } // @TODO xpaths can't handle templates, rules, or id-refs // nvpair with provides or requires set to unfencing #define XPATH_UNFENCING_NVPAIR XML_CIB_TAG_NVPAIR \ "[(@" XML_NVPAIR_ATTR_NAME "='" PCMK_STONITH_PROVIDES "'" \ "or @" XML_NVPAIR_ATTR_NAME "='" XML_RSC_ATTR_REQUIRES "') " \ "and @" XML_NVPAIR_ATTR_VALUE "='" PCMK__VALUE_UNFENCING "']" // unfencing in rsc_defaults or any resource #define XPATH_ENABLE_UNFENCING \ "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RESOURCES \ "//" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR \ "|/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RSCCONFIG \ "/" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR static void set_if_xpath(uint64_t flag, const char *xpath, pe_working_set_t *data_set) { xmlXPathObjectPtr result = NULL; if (!pcmk_is_set(data_set->flags, flag)) { result = xpath_search(data_set->input, xpath); if (result && (numXpathResults(result) > 0)) { pe__set_working_set_flags(data_set, flag); } freeXpathObject(result); } } gboolean unpack_config(xmlNode * config, pe_working_set_t * data_set) { const char *value = NULL; GHashTable *config_hash = pcmk__strkey_table(free, free); pe_rule_eval_data_t rule_data = { .node_hash = NULL, .role = RSC_ROLE_UNKNOWN, .now = data_set->now, .match_data = NULL, .rsc_data = NULL, .op_data = NULL }; data_set->config_hash = config_hash; pe__unpack_dataset_nvpairs(config, XML_CIB_TAG_PROPSET, &rule_data, config_hash, CIB_OPTIONS_FIRST, FALSE, data_set); verify_pe_options(data_set->config_hash); set_config_flag(data_set, "enable-startup-probes", pe_flag_startup_probes); if (!pcmk_is_set(data_set->flags, pe_flag_startup_probes)) { crm_info("Startup probes: disabled (dangerous)"); } value = pe_pref(data_set->config_hash, XML_ATTR_HAVE_WATCHDOG); if (value && crm_is_true(value)) { crm_info("Watchdog-based self-fencing will be performed via SBD if " "fencing is required and stonith-watchdog-timeout is nonzero"); pe__set_working_set_flags(data_set, pe_flag_have_stonith_resource); } /* Set certain flags via xpath here, so they can be used before the relevant * configuration sections are unpacked. */ set_if_xpath(pe_flag_enable_unfencing, XPATH_ENABLE_UNFENCING, data_set); value = pe_pref(data_set->config_hash, "stonith-timeout"); data_set->stonith_timeout = (int) crm_parse_interval_spec(value); crm_debug("STONITH timeout: %d", data_set->stonith_timeout); set_config_flag(data_set, "stonith-enabled", pe_flag_stonith_enabled); crm_debug("STONITH of failed nodes is %s", pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)? "enabled" : "disabled"); data_set->stonith_action = pe_pref(data_set->config_hash, "stonith-action"); if (!strcmp(data_set->stonith_action, "poweroff")) { pe_warn_once(pe_wo_poweroff, "Support for stonith-action of 'poweroff' is deprecated " "and will be removed in a future release (use 'off' instead)"); data_set->stonith_action = "off"; } crm_trace("STONITH will %s nodes", data_set->stonith_action); set_config_flag(data_set, "concurrent-fencing", pe_flag_concurrent_fencing); crm_debug("Concurrent fencing is %s", pcmk_is_set(data_set->flags, pe_flag_concurrent_fencing)? "enabled" : "disabled"); value = pe_pref(data_set->config_hash, XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY); if (value) { data_set->priority_fencing_delay = crm_parse_interval_spec(value) / 1000; crm_trace("Priority fencing delay is %ds", data_set->priority_fencing_delay); } set_config_flag(data_set, "stop-all-resources", pe_flag_stop_everything); crm_debug("Stop all active resources: %s", pcmk__btoa(pcmk_is_set(data_set->flags, pe_flag_stop_everything))); set_config_flag(data_set, "symmetric-cluster", pe_flag_symmetric_cluster); if (pcmk_is_set(data_set->flags, pe_flag_symmetric_cluster)) { crm_debug("Cluster is symmetric" " - resources can run anywhere by default"); } value = pe_pref(data_set->config_hash, "no-quorum-policy"); if (pcmk__str_eq(value, "ignore", pcmk__str_casei)) { data_set->no_quorum_policy = no_quorum_ignore; } else if (pcmk__str_eq(value, "freeze", pcmk__str_casei)) { data_set->no_quorum_policy = no_quorum_freeze; } else if (pcmk__str_eq(value, "demote", pcmk__str_casei)) { data_set->no_quorum_policy = no_quorum_demote; } else if (pcmk__str_eq(value, "suicide", pcmk__str_casei)) { if (pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) { int do_panic = 0; crm_element_value_int(data_set->input, XML_ATTR_QUORUM_PANIC, &do_panic); if (do_panic || pcmk_is_set(data_set->flags, pe_flag_have_quorum)) { data_set->no_quorum_policy = no_quorum_suicide; } else { crm_notice("Resetting no-quorum-policy to 'stop': cluster has never had quorum"); data_set->no_quorum_policy = no_quorum_stop; } } else { pcmk__config_err("Resetting no-quorum-policy to 'stop' because " "fencing is disabled"); data_set->no_quorum_policy = no_quorum_stop; } } else { data_set->no_quorum_policy = no_quorum_stop; } switch (data_set->no_quorum_policy) { case no_quorum_freeze: crm_debug("On loss of quorum: Freeze resources"); break; case no_quorum_stop: crm_debug("On loss of quorum: Stop ALL resources"); break; case no_quorum_demote: crm_debug("On loss of quorum: " "Demote promotable resources and stop other resources"); break; case no_quorum_suicide: crm_notice("On loss of quorum: Fence all remaining nodes"); break; case no_quorum_ignore: crm_notice("On loss of quorum: Ignore"); break; } set_config_flag(data_set, "stop-orphan-resources", pe_flag_stop_rsc_orphans); crm_trace("Orphan resources are %s", pcmk_is_set(data_set->flags, pe_flag_stop_rsc_orphans)? "stopped" : "ignored"); set_config_flag(data_set, "stop-orphan-actions", pe_flag_stop_action_orphans); crm_trace("Orphan resource actions are %s", pcmk_is_set(data_set->flags, pe_flag_stop_action_orphans)? "stopped" : "ignored"); value = pe_pref(data_set->config_hash, "remove-after-stop"); if (value != NULL) { if (crm_is_true(value)) { pe__set_working_set_flags(data_set, pe_flag_remove_after_stop); #ifndef PCMK__COMPAT_2_0 pe_warn_once(pe_wo_remove_after, "Support for the remove-after-stop cluster property is" " deprecated and will be removed in a future release"); #endif } else { pe__clear_working_set_flags(data_set, pe_flag_remove_after_stop); } } set_config_flag(data_set, "maintenance-mode", pe_flag_maintenance_mode); crm_trace("Maintenance mode: %s", pcmk__btoa(pcmk_is_set(data_set->flags, pe_flag_maintenance_mode))); set_config_flag(data_set, "start-failure-is-fatal", pe_flag_start_failure_fatal); crm_trace("Start failures are %s", pcmk_is_set(data_set->flags, pe_flag_start_failure_fatal)? "always fatal" : "handled by failcount"); if (pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) { set_config_flag(data_set, "startup-fencing", pe_flag_startup_fencing); } if (pcmk_is_set(data_set->flags, pe_flag_startup_fencing)) { crm_trace("Unseen nodes will be fenced"); } else { pe_warn_once(pe_wo_blind, "Blind faith: not fencing unseen nodes"); } pe__unpack_node_health_scores(data_set); data_set->placement_strategy = pe_pref(data_set->config_hash, "placement-strategy"); crm_trace("Placement strategy: %s", data_set->placement_strategy); set_config_flag(data_set, "shutdown-lock", pe_flag_shutdown_lock); crm_trace("Resources will%s be locked to cleanly shut down nodes", (pcmk_is_set(data_set->flags, pe_flag_shutdown_lock)? "" : " not")); if (pcmk_is_set(data_set->flags, pe_flag_shutdown_lock)) { value = pe_pref(data_set->config_hash, XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT); data_set->shutdown_lock = crm_parse_interval_spec(value) / 1000; crm_trace("Shutdown locks expire after %us", data_set->shutdown_lock); } return TRUE; } pe_node_t * pe_create_node(const char *id, const char *uname, const char *type, const char *score, pe_working_set_t * data_set) { pe_node_t *new_node = NULL; if (pe_find_node(data_set->nodes, uname) != NULL) { pcmk__config_warn("More than one node entry has name '%s'", uname); } new_node = calloc(1, sizeof(pe_node_t)); if (new_node == NULL) { return NULL; } new_node->weight = char2score(score); new_node->details = calloc(1, sizeof(struct pe_node_shared_s)); if (new_node->details == NULL) { free(new_node); return NULL; } crm_trace("Creating node for entry %s/%s", uname, id); new_node->details->id = id; new_node->details->uname = uname; new_node->details->online = FALSE; new_node->details->shutdown = FALSE; new_node->details->rsc_discovery_enabled = TRUE; new_node->details->running_rsc = NULL; new_node->details->data_set = data_set; if (pcmk__str_eq(type, "member", pcmk__str_null_matches | pcmk__str_casei)) { new_node->details->type = node_member; } else if (pcmk__str_eq(type, "remote", pcmk__str_casei)) { new_node->details->type = node_remote; pe__set_working_set_flags(data_set, pe_flag_have_remote_nodes); } else { /* @COMPAT 'ping' is the default for backward compatibility, but it * should be changed to 'member' at a compatibility break */ if (!pcmk__str_eq(type, "ping", pcmk__str_casei)) { pcmk__config_warn("Node %s has unrecognized type '%s', " "assuming 'ping'", pcmk__s(uname, "without name"), type); } pe_warn_once(pe_wo_ping_node, "Support for nodes of type 'ping' (such as %s) is " "deprecated and will be removed in a future release", pcmk__s(uname, "unnamed node")); new_node->details->type = node_ping; } new_node->details->attrs = pcmk__strkey_table(free, free); if (pe__is_guest_or_remote_node(new_node)) { g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND), strdup("remote")); } else { g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND), strdup("cluster")); } new_node->details->utilization = pcmk__strkey_table(free, free); new_node->details->digest_cache = pcmk__strkey_table(free, pe__free_digests); data_set->nodes = g_list_insert_sorted(data_set->nodes, new_node, pe__cmp_node_name); return new_node; } static const char * expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pe_working_set_t *data) { xmlNode *attr_set = NULL; xmlNode *attr = NULL; const char *container_id = ID(xml_obj); const char *remote_name = NULL; const char *remote_server = NULL; const char *remote_port = NULL; const char *connect_timeout = "60s"; const char *remote_allow_migrate=NULL; const char *is_managed = NULL; for (attr_set = pcmk__xe_first_child(xml_obj); attr_set != NULL; attr_set = pcmk__xe_next(attr_set)) { if (!pcmk__str_eq((const char *)attr_set->name, XML_TAG_META_SETS, pcmk__str_casei)) { continue; } for (attr = pcmk__xe_first_child(attr_set); attr != NULL; attr = pcmk__xe_next(attr)) { const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE); const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME); if (pcmk__str_eq(name, XML_RSC_ATTR_REMOTE_NODE, pcmk__str_casei)) { remote_name = value; } else if (pcmk__str_eq(name, "remote-addr", pcmk__str_casei)) { remote_server = value; } else if (pcmk__str_eq(name, "remote-port", pcmk__str_casei)) { remote_port = value; } else if (pcmk__str_eq(name, "remote-connect-timeout", pcmk__str_casei)) { connect_timeout = value; } else if (pcmk__str_eq(name, "remote-allow-migrate", pcmk__str_casei)) { remote_allow_migrate=value; } else if (pcmk__str_eq(name, XML_RSC_ATTR_MANAGED, pcmk__str_casei)) { is_managed = value; } } } if (remote_name == NULL) { return NULL; } if (pe_find_resource(data->resources, remote_name) != NULL) { return NULL; } pe_create_remote_xml(parent, remote_name, container_id, remote_allow_migrate, is_managed, connect_timeout, remote_server, remote_port); return remote_name; } static void handle_startup_fencing(pe_working_set_t *data_set, pe_node_t *new_node) { if ((new_node->details->type == node_remote) && (new_node->details->remote_rsc == NULL)) { /* Ignore fencing for remote nodes that don't have a connection resource * associated with them. This happens when remote node entries get left * in the nodes section after the connection resource is removed. */ return; } if (pcmk_is_set(data_set->flags, pe_flag_startup_fencing)) { // All nodes are unclean until we've seen their status entry new_node->details->unclean = TRUE; } else { // Blind faith ... new_node->details->unclean = FALSE; } /* We need to be able to determine if a node's status section * exists or not separate from whether the node is unclean. */ new_node->details->unseen = TRUE; } gboolean unpack_nodes(xmlNode * xml_nodes, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; pe_node_t *new_node = NULL; const char *id = NULL; const char *uname = NULL; const char *type = NULL; const char *score = NULL; for (xml_obj = pcmk__xe_first_child(xml_nodes); xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) { if (pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_NODE, pcmk__str_none)) { new_node = NULL; id = crm_element_value(xml_obj, XML_ATTR_ID); uname = crm_element_value(xml_obj, XML_ATTR_UNAME); type = crm_element_value(xml_obj, XML_ATTR_TYPE); score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE); crm_trace("Processing node %s/%s", uname, id); if (id == NULL) { pcmk__config_err("Ignoring <" XML_CIB_TAG_NODE "> entry in configuration without id"); continue; } new_node = pe_create_node(id, uname, type, score, data_set); if (new_node == NULL) { return FALSE; } handle_startup_fencing(data_set, new_node); add_node_attrs(xml_obj, new_node, FALSE, data_set); crm_trace("Done with node %s", crm_element_value(xml_obj, XML_ATTR_UNAME)); } } if (data_set->localhost && pe_find_node(data_set->nodes, data_set->localhost) == NULL) { crm_info("Creating a fake local node"); pe_create_node(data_set->localhost, data_set->localhost, NULL, 0, data_set); } return TRUE; } static void setup_container(pe_resource_t * rsc, pe_working_set_t * data_set) { const char *container_id = NULL; if (rsc->children) { g_list_foreach(rsc->children, (GFunc) setup_container, data_set); return; } container_id = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_CONTAINER); if (container_id && !pcmk__str_eq(container_id, rsc->id, pcmk__str_casei)) { pe_resource_t *container = pe_find_resource(data_set->resources, container_id); if (container) { rsc->container = container; pe__set_resource_flags(container, pe_rsc_is_container); container->fillers = g_list_append(container->fillers, rsc); pe_rsc_trace(rsc, "Resource %s's container is %s", rsc->id, container_id); } else { pe_err("Resource %s: Unknown resource container (%s)", rsc->id, container_id); } } } gboolean unpack_remote_nodes(xmlNode * xml_resources, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; /* Create remote nodes and guest nodes from the resource configuration * before unpacking resources. */ for (xml_obj = pcmk__xe_first_child(xml_resources); xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) { const char *new_node_id = NULL; /* Check for remote nodes, which are defined by ocf:pacemaker:remote * primitives. */ if (xml_contains_remote_node(xml_obj)) { new_node_id = ID(xml_obj); /* The "pe_find_node" check is here to make sure we don't iterate over * an expanded node that has already been added to the node list. */ if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) { crm_trace("Found remote node %s defined by resource %s", new_node_id, ID(xml_obj)); pe_create_node(new_node_id, new_node_id, "remote", NULL, data_set); } continue; } /* Check for guest nodes, which are defined by special meta-attributes * of a primitive of any type (for example, VirtualDomain or Xen). */ if (pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_RESOURCE, pcmk__str_none)) { /* This will add an ocf:pacemaker:remote primitive to the * configuration for the guest node's connection, to be unpacked * later. */ new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources, data_set); if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) { crm_trace("Found guest node %s in resource %s", new_node_id, ID(xml_obj)); pe_create_node(new_node_id, new_node_id, "remote", NULL, data_set); } continue; } /* Check for guest nodes inside a group. Clones are currently not * supported as guest nodes. */ if (pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_GROUP, pcmk__str_none)) { xmlNode *xml_obj2 = NULL; for (xml_obj2 = pcmk__xe_first_child(xml_obj); xml_obj2 != NULL; xml_obj2 = pcmk__xe_next(xml_obj2)) { new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources, data_set); if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) { crm_trace("Found guest node %s in resource %s inside group %s", new_node_id, ID(xml_obj2), ID(xml_obj)); pe_create_node(new_node_id, new_node_id, "remote", NULL, data_set); } } } } return TRUE; } /* Call this after all the nodes and resources have been * unpacked, but before the status section is read. * * A remote node's online status is reflected by the state * of the remote node's connection resource. We need to link * the remote node to this connection resource so we can have * easy access to the connection resource during the scheduler calculations. */ static void link_rsc2remotenode(pe_working_set_t *data_set, pe_resource_t *new_rsc) { pe_node_t *remote_node = NULL; if (new_rsc->is_remote_node == FALSE) { return; } if (pcmk_is_set(data_set->flags, pe_flag_quick_location)) { /* remote_nodes and remote_resources are not linked in quick location calculations */ return; } remote_node = pe_find_node(data_set->nodes, new_rsc->id); CRM_CHECK(remote_node != NULL, return); pe_rsc_trace(new_rsc, "Linking remote connection resource %s to %s", new_rsc->id, pe__node_name(remote_node)); remote_node->details->remote_rsc = new_rsc; if (new_rsc->container == NULL) { /* Handle start-up fencing for remote nodes (as opposed to guest nodes) * the same as is done for cluster nodes. */ handle_startup_fencing(data_set, remote_node); } else { /* pe_create_node() marks the new node as "remote" or "cluster"; now * that we know the node is a guest node, update it correctly. */ g_hash_table_replace(remote_node->details->attrs, strdup(CRM_ATTR_KIND), strdup("container")); } } static void destroy_tag(gpointer data) { pe_tag_t *tag = data; if (tag) { free(tag->id); g_list_free_full(tag->refs, free); free(tag); } } /*! * \internal * \brief Parse configuration XML for resource information * * \param[in] xml_resources Top of resource configuration XML * \param[in,out] data_set Where to put resource information * * \return TRUE * * \note unpack_remote_nodes() MUST be called before this, so that the nodes can * be used when pe__unpack_resource() calls resource_location() */ gboolean unpack_resources(const xmlNode *xml_resources, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; GList *gIter = NULL; data_set->template_rsc_sets = pcmk__strkey_table(free, destroy_tag); for (xml_obj = pcmk__xe_first_child(xml_resources); xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) { pe_resource_t *new_rsc = NULL; const char *id = ID(xml_obj); if (pcmk__str_empty(id)) { pcmk__config_err("Ignoring <%s> resource without ID", crm_element_name(xml_obj)); continue; } if (pcmk__str_eq((const char *) xml_obj->name, XML_CIB_TAG_RSC_TEMPLATE, pcmk__str_none)) { if (g_hash_table_lookup_extended(data_set->template_rsc_sets, id, NULL, NULL) == FALSE) { /* Record the template's ID for the knowledge of its existence anyway. */ g_hash_table_insert(data_set->template_rsc_sets, strdup(id), NULL); } continue; } crm_trace("Unpacking <%s " XML_ATTR_ID "='%s'>", crm_element_name(xml_obj), id); if (pe__unpack_resource(xml_obj, &new_rsc, NULL, data_set) == pcmk_rc_ok) { data_set->resources = g_list_append(data_set->resources, new_rsc); pe_rsc_trace(new_rsc, "Added resource %s", new_rsc->id); } else { pcmk__config_err("Ignoring <%s> resource '%s' " "because configuration is invalid", crm_element_name(xml_obj), id); } } for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; setup_container(rsc, data_set); link_rsc2remotenode(data_set, rsc); } data_set->resources = g_list_sort(data_set->resources, pe__cmp_rsc_priority); if (pcmk_is_set(data_set->flags, pe_flag_quick_location)) { /* Ignore */ } else if (pcmk_is_set(data_set->flags, pe_flag_stonith_enabled) && !pcmk_is_set(data_set->flags, pe_flag_have_stonith_resource)) { pcmk__config_err("Resource start-up disabled since no STONITH resources have been defined"); pcmk__config_err("Either configure some or disable STONITH with the stonith-enabled option"); pcmk__config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity"); } return TRUE; } gboolean unpack_tags(xmlNode * xml_tags, pe_working_set_t * data_set) { xmlNode *xml_tag = NULL; data_set->tags = pcmk__strkey_table(free, destroy_tag); for (xml_tag = pcmk__xe_first_child(xml_tags); xml_tag != NULL; xml_tag = pcmk__xe_next(xml_tag)) { xmlNode *xml_obj_ref = NULL; const char *tag_id = ID(xml_tag); if (!pcmk__str_eq((const char *)xml_tag->name, XML_CIB_TAG_TAG, pcmk__str_none)) { continue; } if (tag_id == NULL) { pcmk__config_err("Ignoring <%s> without " XML_ATTR_ID, crm_element_name(xml_tag)); continue; } for (xml_obj_ref = pcmk__xe_first_child(xml_tag); xml_obj_ref != NULL; xml_obj_ref = pcmk__xe_next(xml_obj_ref)) { const char *obj_ref = ID(xml_obj_ref); if (!pcmk__str_eq((const char *)xml_obj_ref->name, XML_CIB_TAG_OBJ_REF, pcmk__str_none)) { continue; } if (obj_ref == NULL) { pcmk__config_err("Ignoring <%s> for tag '%s' without " XML_ATTR_ID, crm_element_name(xml_obj_ref), tag_id); continue; } if (add_tag_ref(data_set->tags, tag_id, obj_ref) == FALSE) { return FALSE; } } } return TRUE; } /* The ticket state section: * "/cib/status/tickets/ticket_state" */ static gboolean unpack_ticket_state(xmlNode * xml_ticket, pe_working_set_t * data_set) { const char *ticket_id = NULL; const char *granted = NULL; const char *last_granted = NULL; const char *standby = NULL; xmlAttrPtr xIter = NULL; pe_ticket_t *ticket = NULL; ticket_id = ID(xml_ticket); if (pcmk__str_empty(ticket_id)) { return FALSE; } crm_trace("Processing ticket state for %s", ticket_id); ticket = g_hash_table_lookup(data_set->tickets, ticket_id); if (ticket == NULL) { ticket = ticket_new(ticket_id, data_set); if (ticket == NULL) { return FALSE; } } for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) { const char *prop_name = (const char *)xIter->name; const char *prop_value = crm_element_value(xml_ticket, prop_name); if (pcmk__str_eq(prop_name, XML_ATTR_ID, pcmk__str_none)) { continue; } g_hash_table_replace(ticket->state, strdup(prop_name), strdup(prop_value)); } granted = g_hash_table_lookup(ticket->state, "granted"); if (granted && crm_is_true(granted)) { ticket->granted = TRUE; crm_info("We have ticket '%s'", ticket->id); } else { ticket->granted = FALSE; crm_info("We do not have ticket '%s'", ticket->id); } last_granted = g_hash_table_lookup(ticket->state, "last-granted"); if (last_granted) { long long last_granted_ll; pcmk__scan_ll(last_granted, &last_granted_ll, 0LL); ticket->last_granted = (time_t) last_granted_ll; } standby = g_hash_table_lookup(ticket->state, "standby"); if (standby && crm_is_true(standby)) { ticket->standby = TRUE; if (ticket->granted) { crm_info("Granted ticket '%s' is in standby-mode", ticket->id); } } else { ticket->standby = FALSE; } crm_trace("Done with ticket state for %s", ticket_id); return TRUE; } static gboolean unpack_tickets_state(xmlNode * xml_tickets, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; for (xml_obj = pcmk__xe_first_child(xml_tickets); xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) { if (!pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_TICKET_STATE, pcmk__str_none)) { continue; } unpack_ticket_state(xml_obj, data_set); } return TRUE; } static void unpack_handle_remote_attrs(pe_node_t *this_node, const xmlNode *state, pe_working_set_t *data_set) { const char *resource_discovery_enabled = NULL; const xmlNode *attrs = NULL; pe_resource_t *rsc = NULL; if (!pcmk__str_eq((const char *)state->name, XML_CIB_TAG_STATE, pcmk__str_none)) { return; } if ((this_node == NULL) || !pe__is_guest_or_remote_node(this_node)) { return; } crm_trace("Processing Pacemaker Remote node %s", pe__node_name(this_node)); pcmk__scan_min_int(crm_element_value(state, XML_NODE_IS_MAINTENANCE), &(this_node->details->remote_maintenance), 0); rsc = this_node->details->remote_rsc; if (this_node->details->remote_requires_reset == FALSE) { this_node->details->unclean = FALSE; this_node->details->unseen = FALSE; } attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); add_node_attrs(attrs, this_node, TRUE, data_set); if (pe__shutdown_requested(this_node)) { crm_info("%s is shutting down", pe__node_name(this_node)); this_node->details->shutdown = TRUE; } if (crm_is_true(pe_node_attribute_raw(this_node, "standby"))) { crm_info("%s is in standby mode", pe__node_name(this_node)); this_node->details->standby = TRUE; } if (crm_is_true(pe_node_attribute_raw(this_node, "maintenance")) || ((rsc != NULL) && !pcmk_is_set(rsc->flags, pe_rsc_managed))) { crm_info("%s is in maintenance mode", pe__node_name(this_node)); this_node->details->maintenance = TRUE; } resource_discovery_enabled = pe_node_attribute_raw(this_node, XML_NODE_ATTR_RSC_DISCOVERY); if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) { if (pe__is_remote_node(this_node) && !pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) { crm_warn("Ignoring " XML_NODE_ATTR_RSC_DISCOVERY " attribute on Pacemaker Remote node %s" " because fencing is disabled", pe__node_name(this_node)); } else { /* This is either a remote node with fencing enabled, or a guest * node. We don't care whether fencing is enabled when fencing guest * nodes, because they are "fenced" by recovering their containing * resource. */ crm_info("%s has resource discovery disabled", pe__node_name(this_node)); this_node->details->rsc_discovery_enabled = FALSE; } } } /*! * \internal * \brief Unpack a cluster node's transient attributes * * \param[in] state CIB node state XML * \param[in,out] node Cluster node whose attributes are being unpacked * \param[in,out] data_set Cluster working set */ static void unpack_transient_attributes(const xmlNode *state, pe_node_t *node, pe_working_set_t *data_set) { const char *discovery = NULL; const xmlNode *attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); add_node_attrs(attrs, node, TRUE, data_set); if (crm_is_true(pe_node_attribute_raw(node, "standby"))) { crm_info("%s is in standby mode", pe__node_name(node)); node->details->standby = TRUE; } if (crm_is_true(pe_node_attribute_raw(node, "maintenance"))) { crm_info("%s is in maintenance mode", pe__node_name(node)); node->details->maintenance = TRUE; } discovery = pe_node_attribute_raw(node, XML_NODE_ATTR_RSC_DISCOVERY); if ((discovery != NULL) && !crm_is_true(discovery)) { crm_warn("Ignoring " XML_NODE_ATTR_RSC_DISCOVERY " attribute for %s because disabling resource discovery " "is not allowed for cluster nodes", pe__node_name(node)); } } /*! * \internal * \brief Unpack a node state entry (first pass) * * Unpack one node state entry from status. This unpacks information from the * node_state element itself and node attributes inside it, but not the * resource history inside it. Multiple passes through the status are needed to * fully unpack everything. * * \param[in] state CIB node state XML * \param[in,out] data_set Cluster working set */ static void unpack_node_state(const xmlNode *state, pe_working_set_t *data_set) { const char *id = NULL; const char *uname = NULL; pe_node_t *this_node = NULL; id = crm_element_value(state, XML_ATTR_ID); if (id == NULL) { crm_warn("Ignoring malformed " XML_CIB_TAG_STATE " entry without " XML_ATTR_ID); return; } uname = crm_element_value(state, XML_ATTR_UNAME); if (uname == NULL) { crm_warn("Ignoring malformed " XML_CIB_TAG_STATE " entry without " XML_ATTR_UNAME); return; } this_node = pe_find_node_any(data_set->nodes, id, uname); if (this_node == NULL) { pcmk__config_warn("Ignoring recorded node state for '%s' because " "it is no longer in the configuration", uname); return; } if (pe__is_guest_or_remote_node(this_node)) { /* We can't determine the online status of Pacemaker Remote nodes until * after all resource history has been unpacked. In this first pass, we * do need to mark whether the node has been fenced, as this plays a * role during unpacking cluster node resource state. */ pcmk__scan_min_int(crm_element_value(state, XML_NODE_IS_FENCED), &(this_node->details->remote_was_fenced), 0); return; } unpack_transient_attributes(state, this_node, data_set); /* Provisionally mark this cluster node as clean. We have at least seen it * in the current cluster's lifetime. */ this_node->details->unclean = FALSE; this_node->details->unseen = FALSE; crm_trace("Determining online status of cluster node %s (id %s)", pe__node_name(this_node), id); determine_online_status(state, this_node, data_set); if (!pcmk_is_set(data_set->flags, pe_flag_have_quorum) && this_node->details->online && (data_set->no_quorum_policy == no_quorum_suicide)) { /* Everything else should flow from this automatically * (at least until the scheduler becomes able to migrate off * healthy resources) */ pe_fence_node(data_set, this_node, "cluster does not have quorum", FALSE); } } /*! * \internal * \brief Unpack nodes' resource history as much as possible * * Unpack as many nodes' resource history as possible in one pass through the * status. We need to process Pacemaker Remote nodes' connections/containers * before unpacking their history; the connection/container history will be * in another node's history, so it might take multiple passes to unpack * everything. * * \param[in] status CIB XML status section * \param[in] fence If true, treat any not-yet-unpacked nodes as unseen * \param[in,out] data_set Cluster working set * * \return Standard Pacemaker return code (specifically pcmk_rc_ok if done, * or EAGAIN if more unpacking remains to be done) */ static int unpack_node_history(const xmlNode *status, bool fence, pe_working_set_t *data_set) { int rc = pcmk_rc_ok; // Loop through all node_state entries in CIB status for (const xmlNode *state = first_named_child(status, XML_CIB_TAG_STATE); state != NULL; state = crm_next_same_xml(state)) { const char *id = ID(state); const char *uname = crm_element_value(state, XML_ATTR_UNAME); pe_node_t *this_node = NULL; if ((id == NULL) || (uname == NULL)) { // Warning already logged in first pass through status section crm_trace("Not unpacking resource history from malformed " XML_CIB_TAG_STATE " without id and/or uname"); continue; } this_node = pe_find_node_any(data_set->nodes, id, uname); if (this_node == NULL) { // Warning already logged in first pass through status section crm_trace("Not unpacking resource history for node %s because " "no longer in configuration", id); continue; } if (this_node->details->unpacked) { crm_trace("Not unpacking resource history for node %s because " "already unpacked", id); continue; } if (fence) { // We're processing all remaining nodes } else if (pe__is_guest_node(this_node)) { /* We can unpack a guest node's history only after we've unpacked * other resource history to the point that we know that the node's * connection and containing resource are both up. */ pe_resource_t *rsc = this_node->details->remote_rsc; if ((rsc == NULL) || (rsc->role != RSC_ROLE_STARTED) || (rsc->container->role != RSC_ROLE_STARTED)) { crm_trace("Not unpacking resource history for guest node %s " "because container and connection are not known to " "be up", id); continue; } } else if (pe__is_remote_node(this_node)) { /* We can unpack a remote node's history only after we've unpacked * other resource history to the point that we know that the node's * connection is up, with the exception of when shutdown locks are * in use. */ pe_resource_t *rsc = this_node->details->remote_rsc; if ((rsc == NULL) || (!pcmk_is_set(data_set->flags, pe_flag_shutdown_lock) && (rsc->role != RSC_ROLE_STARTED))) { crm_trace("Not unpacking resource history for remote node %s " "because connection is not known to be up", id); continue; } /* If fencing and shutdown locks are disabled and we're not processing * unseen nodes, then we don't want to unpack offline nodes until online * nodes have been unpacked. This allows us to number active clone * instances first. */ } else if (!pcmk_any_flags_set(data_set->flags, pe_flag_stonith_enabled |pe_flag_shutdown_lock) && !this_node->details->online) { crm_trace("Not unpacking resource history for offline " "cluster node %s", id); continue; } if (pe__is_guest_or_remote_node(this_node)) { determine_remote_online_status(data_set, this_node); unpack_handle_remote_attrs(this_node, state, data_set); } crm_trace("Unpacking resource history for %snode %s", (fence? "unseen " : ""), id); this_node->details->unpacked = TRUE; unpack_node_lrm(this_node, state, data_set); rc = EAGAIN; // Other node histories might depend on this one } return rc; } /* remove nodes that are down, stopping */ /* create positive rsc_to_node constraints between resources and the nodes they are running on */ /* anything else? */ gboolean unpack_status(xmlNode * status, pe_working_set_t * data_set) { xmlNode *state = NULL; crm_trace("Beginning unpack"); if (data_set->tickets == NULL) { data_set->tickets = pcmk__strkey_table(free, destroy_ticket); } for (state = pcmk__xe_first_child(status); state != NULL; state = pcmk__xe_next(state)) { if (pcmk__str_eq((const char *)state->name, XML_CIB_TAG_TICKETS, pcmk__str_none)) { unpack_tickets_state((xmlNode *) state, data_set); } else if (pcmk__str_eq((const char *)state->name, XML_CIB_TAG_STATE, pcmk__str_none)) { unpack_node_state(state, data_set); } } while (unpack_node_history(status, FALSE, data_set) == EAGAIN) { crm_trace("Another pass through node resource histories is needed"); } // Now catch any nodes we didn't see unpack_node_history(status, pcmk_is_set(data_set->flags, pe_flag_stonith_enabled), data_set); /* Now that we know where resources are, we can schedule stops of containers * with failed bundle connections */ if (data_set->stop_needed != NULL) { for (GList *item = data_set->stop_needed; item; item = item->next) { pe_resource_t *container = item->data; pe_node_t *node = pe__current_node(container); if (node) { stop_action(container, node, FALSE); } } g_list_free(data_set->stop_needed); data_set->stop_needed = NULL; } /* Now that we know status of all Pacemaker Remote connections and nodes, * we can stop connections for node shutdowns, and check the online status * of remote/guest nodes that didn't have any node history to unpack. */ for (GList *gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *this_node = gIter->data; if (!pe__is_guest_or_remote_node(this_node)) { continue; } if (this_node->details->shutdown && (this_node->details->remote_rsc != NULL)) { pe__set_next_role(this_node->details->remote_rsc, RSC_ROLE_STOPPED, "remote shutdown"); } if (!this_node->details->unpacked) { determine_remote_online_status(data_set, this_node); } } return TRUE; } static gboolean determine_online_status_no_fencing(pe_working_set_t *data_set, const xmlNode *node_state, pe_node_t *this_node) { gboolean online = FALSE; const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE); const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER); const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER); const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED); if (!crm_is_true(in_cluster)) { crm_trace("Node is down: in_cluster=%s", pcmk__s(in_cluster, "")); } else if (pcmk__str_eq(is_peer, ONLINESTATUS, pcmk__str_casei)) { if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) { online = TRUE; } else { crm_debug("Node is not ready to run resources: %s", join); } } else if (this_node->details->expected_up == FALSE) { crm_trace("Controller is down: " "in_cluster=%s is_peer=%s join=%s expected=%s", pcmk__s(in_cluster, ""), pcmk__s(is_peer, ""), pcmk__s(join, ""), pcmk__s(exp_state, "")); } else { /* mark it unclean */ pe_fence_node(data_set, this_node, "peer is unexpectedly down", FALSE); crm_info("in_cluster=%s is_peer=%s join=%s expected=%s", pcmk__s(in_cluster, ""), pcmk__s(is_peer, ""), pcmk__s(join, ""), pcmk__s(exp_state, "")); } return online; } static gboolean determine_online_status_fencing(pe_working_set_t *data_set, const xmlNode *node_state, pe_node_t *this_node) { gboolean online = FALSE; gboolean do_terminate = FALSE; bool crmd_online = FALSE; const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE); const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER); const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER); const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED); const char *terminate = pe_node_attribute_raw(this_node, "terminate"); /* - XML_NODE_IN_CLUSTER ::= true|false - XML_NODE_IS_PEER ::= online|offline - XML_NODE_JOIN_STATE ::= member|down|pending|banned - XML_NODE_EXPECTED ::= member|down */ if (crm_is_true(terminate)) { do_terminate = TRUE; } else if (terminate != NULL && strlen(terminate) > 0) { /* could be a time() value */ char t = terminate[0]; if (t != '0' && isdigit(t)) { do_terminate = TRUE; } } crm_trace("%s: in_cluster=%s is_peer=%s join=%s expected=%s term=%d", pe__node_name(this_node), pcmk__s(in_cluster, ""), pcmk__s(is_peer, ""), pcmk__s(join, ""), pcmk__s(exp_state, ""), do_terminate); online = crm_is_true(in_cluster); crmd_online = pcmk__str_eq(is_peer, ONLINESTATUS, pcmk__str_casei); if (exp_state == NULL) { exp_state = CRMD_JOINSTATE_DOWN; } if (this_node->details->shutdown) { crm_debug("%s is shutting down", pe__node_name(this_node)); /* Slightly different criteria since we can't shut down a dead peer */ online = crmd_online; } else if (in_cluster == NULL) { pe_fence_node(data_set, this_node, "peer has not been seen by the cluster", FALSE); } else if (pcmk__str_eq(join, CRMD_JOINSTATE_NACK, pcmk__str_casei)) { pe_fence_node(data_set, this_node, "peer failed Pacemaker membership criteria", FALSE); } else if (do_terminate == FALSE && pcmk__str_eq(exp_state, CRMD_JOINSTATE_DOWN, pcmk__str_casei)) { if (crm_is_true(in_cluster) || crmd_online) { crm_info("- %s is not ready to run resources", pe__node_name(this_node)); this_node->details->standby = TRUE; this_node->details->pending = TRUE; } else { crm_trace("%s is down or still coming up", pe__node_name(this_node)); } } else if (do_terminate && pcmk__str_eq(join, CRMD_JOINSTATE_DOWN, pcmk__str_casei) && crm_is_true(in_cluster) == FALSE && !crmd_online) { crm_info("%s was just shot", pe__node_name(this_node)); online = FALSE; } else if (crm_is_true(in_cluster) == FALSE) { // Consider `priority-fencing-delay` for lost nodes pe_fence_node(data_set, this_node, "peer is no longer part of the cluster", TRUE); } else if (!crmd_online) { pe_fence_node(data_set, this_node, "peer process is no longer available", FALSE); /* Everything is running at this point, now check join state */ } else if (do_terminate) { pe_fence_node(data_set, this_node, "termination was requested", FALSE); } else if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) { crm_info("%s is active", pe__node_name(this_node)); } else if (pcmk__strcase_any_of(join, CRMD_JOINSTATE_PENDING, CRMD_JOINSTATE_DOWN, NULL)) { crm_info("%s is not ready to run resources", pe__node_name(this_node)); this_node->details->standby = TRUE; this_node->details->pending = TRUE; } else { pe_fence_node(data_set, this_node, "peer was in an unknown state", FALSE); crm_warn("%s: in-cluster=%s is-peer=%s join=%s expected=%s term=%d shutdown=%d", pe__node_name(this_node), pcmk__s(in_cluster, ""), pcmk__s(is_peer, ""), pcmk__s(join, ""), pcmk__s(exp_state, ""), do_terminate, this_node->details->shutdown); } return online; } static void determine_remote_online_status(pe_working_set_t * data_set, pe_node_t * this_node) { pe_resource_t *rsc = this_node->details->remote_rsc; pe_resource_t *container = NULL; pe_node_t *host = NULL; /* If there is a node state entry for a (former) Pacemaker Remote node * but no resource creating that node, the node's connection resource will * be NULL. Consider it an offline remote node in that case. */ if (rsc == NULL) { this_node->details->online = FALSE; goto remote_online_done; } container = rsc->container; if (container && pcmk__list_of_1(rsc->running_on)) { host = rsc->running_on->data; } /* If the resource is currently started, mark it online. */ if (rsc->role == RSC_ROLE_STARTED) { crm_trace("%s node %s presumed ONLINE because connection resource is started", (container? "Guest" : "Remote"), this_node->details->id); this_node->details->online = TRUE; } /* consider this node shutting down if transitioning start->stop */ if (rsc->role == RSC_ROLE_STARTED && rsc->next_role == RSC_ROLE_STOPPED) { crm_trace("%s node %s shutting down because connection resource is stopping", (container? "Guest" : "Remote"), this_node->details->id); this_node->details->shutdown = TRUE; } /* Now check all the failure conditions. */ if(container && pcmk_is_set(container->flags, pe_rsc_failed)) { crm_trace("Guest node %s UNCLEAN because guest resource failed", this_node->details->id); this_node->details->online = FALSE; this_node->details->remote_requires_reset = TRUE; } else if (pcmk_is_set(rsc->flags, pe_rsc_failed)) { crm_trace("%s node %s OFFLINE because connection resource failed", (container? "Guest" : "Remote"), this_node->details->id); this_node->details->online = FALSE; } else if (rsc->role == RSC_ROLE_STOPPED || (container && container->role == RSC_ROLE_STOPPED)) { crm_trace("%s node %s OFFLINE because its resource is stopped", (container? "Guest" : "Remote"), this_node->details->id); this_node->details->online = FALSE; this_node->details->remote_requires_reset = FALSE; } else if (host && (host->details->online == FALSE) && host->details->unclean) { crm_trace("Guest node %s UNCLEAN because host is unclean", this_node->details->id); this_node->details->online = FALSE; this_node->details->remote_requires_reset = TRUE; } remote_online_done: crm_trace("Remote node %s online=%s", this_node->details->id, this_node->details->online ? "TRUE" : "FALSE"); } static void determine_online_status(const xmlNode *node_state, pe_node_t *this_node, pe_working_set_t *data_set) { gboolean online = FALSE; const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED); CRM_CHECK(this_node != NULL, return); this_node->details->shutdown = FALSE; this_node->details->expected_up = FALSE; if (pe__shutdown_requested(this_node)) { this_node->details->shutdown = TRUE; } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) { this_node->details->expected_up = TRUE; } if (this_node->details->type == node_ping) { this_node->details->unclean = FALSE; online = FALSE; /* As far as resource management is concerned, * the node is safely offline. * Anyone caught abusing this logic will be shot */ } else if (!pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) { online = determine_online_status_no_fencing(data_set, node_state, this_node); } else { online = determine_online_status_fencing(data_set, node_state, this_node); } if (online) { this_node->details->online = TRUE; } else { /* remove node from contention */ this_node->fixed = TRUE; // @COMPAT deprecated and unused this_node->weight = -INFINITY; } if (online && this_node->details->shutdown) { /* don't run resources here */ this_node->fixed = TRUE; // @COMPAT deprecated and unused this_node->weight = -INFINITY; } if (this_node->details->type == node_ping) { crm_info("%s is not a Pacemaker node", pe__node_name(this_node)); } else if (this_node->details->unclean) { pe_proc_warn("%s is unclean", pe__node_name(this_node)); } else if (this_node->details->online) { crm_info("%s is %s", pe__node_name(this_node), this_node->details->shutdown ? "shutting down" : this_node->details->pending ? "pending" : this_node->details->standby ? "standby" : this_node->details->maintenance ? "maintenance" : "online"); } else { crm_trace("%s is offline", pe__node_name(this_node)); } } /*! * \internal * \brief Find the end of a resource's name, excluding any clone suffix * * \param[in] id Resource ID to check * * \return Pointer to last character of resource's base name */ const char * pe_base_name_end(const char *id) { if (!pcmk__str_empty(id)) { const char *end = id + strlen(id) - 1; for (const char *s = end; s > id; --s) { switch (*s) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': break; case ':': return (s == end)? s : (s - 1); default: return end; } } return end; } return NULL; } /*! * \internal * \brief Get a resource name excluding any clone suffix * * \param[in] last_rsc_id Resource ID to check * * \return Pointer to newly allocated string with resource's base name * \note It is the caller's responsibility to free() the result. * This asserts on error, so callers can assume result is not NULL. */ char * clone_strip(const char *last_rsc_id) { const char *end = pe_base_name_end(last_rsc_id); char *basename = NULL; CRM_ASSERT(end); basename = strndup(last_rsc_id, end - last_rsc_id + 1); CRM_ASSERT(basename); return basename; } /*! * \internal * \brief Get the name of the first instance of a cloned resource * * \param[in] last_rsc_id Resource ID to check * * \return Pointer to newly allocated string with resource's base name plus :0 * \note It is the caller's responsibility to free() the result. * This asserts on error, so callers can assume result is not NULL. */ char * clone_zero(const char *last_rsc_id) { const char *end = pe_base_name_end(last_rsc_id); size_t base_name_len = end - last_rsc_id + 1; char *zero = NULL; CRM_ASSERT(end); zero = calloc(base_name_len + 3, sizeof(char)); CRM_ASSERT(zero); memcpy(zero, last_rsc_id, base_name_len); zero[base_name_len] = ':'; zero[base_name_len + 1] = '0'; return zero; } static pe_resource_t * create_fake_resource(const char *rsc_id, const xmlNode *rsc_entry, pe_working_set_t *data_set) { pe_resource_t *rsc = NULL; xmlNode *xml_rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE); copy_in_properties(xml_rsc, rsc_entry); crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id); crm_log_xml_debug(xml_rsc, "Orphan resource"); if (pe__unpack_resource(xml_rsc, &rsc, NULL, data_set) != pcmk_rc_ok) { return NULL; } if (xml_contains_remote_node(xml_rsc)) { pe_node_t *node; crm_debug("Detected orphaned remote node %s", rsc_id); node = pe_find_node(data_set->nodes, rsc_id); if (node == NULL) { node = pe_create_node(rsc_id, rsc_id, "remote", NULL, data_set); } link_rsc2remotenode(data_set, rsc); if (node) { crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id); node->details->shutdown = TRUE; } } if (crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER)) { /* This orphaned rsc needs to be mapped to a container. */ crm_trace("Detected orphaned container filler %s", rsc_id); pe__set_resource_flags(rsc, pe_rsc_orphan_container_filler); } pe__set_resource_flags(rsc, pe_rsc_orphan); data_set->resources = g_list_append(data_set->resources, rsc); return rsc; } /*! * \internal * \brief Create orphan instance for anonymous clone resource history * * \param[in,out] parent Clone resource that orphan will be added to * \param[in] rsc_id Orphan's resource ID * \param[in] node Where orphan is active (for logging only) * \param[in,out] data_set Cluster working set * * \return Newly added orphaned instance of \p parent */ static pe_resource_t * create_anonymous_orphan(pe_resource_t *parent, const char *rsc_id, const pe_node_t *node, pe_working_set_t *data_set) { pe_resource_t *top = pe__create_clone_child(parent, data_set); // find_rsc() because we might be a cloned group pe_resource_t *orphan = top->fns->find_rsc(top, rsc_id, NULL, pe_find_clone); pe_rsc_debug(parent, "Created orphan %s for %s: %s on %s", top->id, parent->id, rsc_id, pe__node_name(node)); return orphan; } /*! * \internal * \brief Check a node for an instance of an anonymous clone * * Return a child instance of the specified anonymous clone, in order of * preference: (1) the instance running on the specified node, if any; * (2) an inactive instance (i.e. within the total of clone-max instances); * (3) a newly created orphan (i.e. clone-max instances are already active). * * \param[in,out] data_set Cluster information * \param[in] node Node on which to check for instance * \param[in,out] parent Clone to check * \param[in] rsc_id Name of cloned resource in history (without instance) */ static pe_resource_t * find_anonymous_clone(pe_working_set_t *data_set, const pe_node_t *node, pe_resource_t *parent, const char *rsc_id) { GList *rIter = NULL; pe_resource_t *rsc = NULL; pe_resource_t *inactive_instance = NULL; gboolean skip_inactive = FALSE; CRM_ASSERT(parent != NULL); CRM_ASSERT(pe_rsc_is_clone(parent)); CRM_ASSERT(!pcmk_is_set(parent->flags, pe_rsc_unique)); // Check for active (or partially active, for cloned groups) instance pe_rsc_trace(parent, "Looking for %s on %s in %s", rsc_id, pe__node_name(node), parent->id); for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) { GList *locations = NULL; pe_resource_t *child = rIter->data; /* Check whether this instance is already known to be active or pending * anywhere, at this stage of unpacking. Because this function is called * for a resource before the resource's individual operation history * entries are unpacked, locations will generally not contain the * desired node. * * However, there are three exceptions: * (1) when child is a cloned group and we have already unpacked the * history of another member of the group on the same node; * (2) when we've already unpacked the history of another numbered * instance on the same node (which can happen if globally-unique * was flipped from true to false); and * (3) when we re-run calculations on the same data set as part of a * simulation. */ child->fns->location(child, &locations, 2); if (locations) { /* We should never associate the same numbered anonymous clone * instance with multiple nodes, and clone instances can't migrate, * so there must be only one location, regardless of history. */ CRM_LOG_ASSERT(locations->next == NULL); if (((pe_node_t *)locations->data)->details == node->details) { /* This child instance is active on the requested node, so check * for a corresponding configured resource. We use find_rsc() * instead of child because child may be a cloned group, and we * need the particular member corresponding to rsc_id. * * If the history entry is orphaned, rsc will be NULL. */ rsc = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone); if (rsc) { /* If there are multiple instance history entries for an * anonymous clone in a single node's history (which can * happen if globally-unique is switched from true to * false), we want to consider the instances beyond the * first as orphans, even if there are inactive instance * numbers available. */ if (rsc->running_on) { crm_notice("Active (now-)anonymous clone %s has " "multiple (orphan) instance histories on %s", parent->id, pe__node_name(node)); skip_inactive = TRUE; rsc = NULL; } else { pe_rsc_trace(parent, "Resource %s, active", rsc->id); } } } g_list_free(locations); } else { pe_rsc_trace(parent, "Resource %s, skip inactive", child->id); if (!skip_inactive && !inactive_instance && !pcmk_is_set(child->flags, pe_rsc_block)) { // Remember one inactive instance in case we don't find active inactive_instance = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone); /* ... but don't use it if it was already associated with a * pending action on another node */ if (inactive_instance && inactive_instance->pending_node && (inactive_instance->pending_node->details != node->details)) { inactive_instance = NULL; } } } } if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) { pe_rsc_trace(parent, "Resource %s, empty slot", inactive_instance->id); rsc = inactive_instance; } /* If the resource has "requires" set to "quorum" or "nothing", and we don't * have a clone instance for every node, we don't want to consume a valid * instance number for unclean nodes. Such instances may appear to be active * according to the history, but should be considered inactive, so we can * start an instance elsewhere. Treat such instances as orphans. * * An exception is instances running on guest nodes -- since guest node * "fencing" is actually just a resource stop, requires shouldn't apply. * * @TODO Ideally, we'd use an inactive instance number if it is not needed * for any clean instances. However, we don't know that at this point. */ if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pe_rsc_needs_fencing) && (!node->details->online || node->details->unclean) && !pe__is_guest_node(node) && !pe__is_universal_clone(parent, data_set)) { rsc = NULL; } if (rsc == NULL) { rsc = create_anonymous_orphan(parent, rsc_id, node, data_set); pe_rsc_trace(parent, "Resource %s, orphan", rsc->id); } return rsc; } static pe_resource_t * unpack_find_resource(pe_working_set_t *data_set, const pe_node_t *node, const char *rsc_id) { pe_resource_t *rsc = NULL; pe_resource_t *parent = NULL; crm_trace("looking for %s", rsc_id); rsc = pe_find_resource(data_set->resources, rsc_id); if (rsc == NULL) { /* If we didn't find the resource by its name in the operation history, * check it again as a clone instance. Even when clone-max=0, we create * a single :0 orphan to match against here. */ char *clone0_id = clone_zero(rsc_id); pe_resource_t *clone0 = pe_find_resource(data_set->resources, clone0_id); if (clone0 && !pcmk_is_set(clone0->flags, pe_rsc_unique)) { rsc = clone0; parent = uber_parent(clone0); crm_trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id); } else { crm_trace("%s is not known as %s either (orphan)", rsc_id, clone0_id); } free(clone0_id); } else if (rsc->variant > pe_native) { crm_trace("Resource history for %s is orphaned because it is no longer primitive", rsc_id); return NULL; } else { parent = uber_parent(rsc); } if (pe_rsc_is_anon_clone(parent)) { if (pe_rsc_is_bundled(parent)) { rsc = pe__find_bundle_replica(parent->parent, node); } else { char *base = clone_strip(rsc_id); rsc = find_anonymous_clone(data_set, node, parent, base); free(base); CRM_ASSERT(rsc != NULL); } } if (rsc && !pcmk__str_eq(rsc_id, rsc->id, pcmk__str_casei) && !pcmk__str_eq(rsc_id, rsc->clone_name, pcmk__str_casei)) { pcmk__str_update(&rsc->clone_name, rsc_id); pe_rsc_debug(rsc, "Internally renamed %s on %s to %s%s", rsc_id, pe__node_name(node), rsc->id, (pcmk_is_set(rsc->flags, pe_rsc_orphan)? " (ORPHAN)" : "")); } return rsc; } static pe_resource_t * process_orphan_resource(const xmlNode *rsc_entry, const pe_node_t *node, pe_working_set_t *data_set) { pe_resource_t *rsc = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); crm_debug("Detected orphan resource %s on %s", rsc_id, pe__node_name(node)); rsc = create_fake_resource(rsc_id, rsc_entry, data_set); if (rsc == NULL) { return NULL; } if (!pcmk_is_set(data_set->flags, pe_flag_stop_rsc_orphans)) { pe__clear_resource_flags(rsc, pe_rsc_managed); } else { CRM_CHECK(rsc != NULL, return NULL); pe_rsc_trace(rsc, "Added orphan %s", rsc->id); resource_location(rsc, NULL, -INFINITY, "__orphan_do_not_run__", data_set); } return rsc; } static void process_rsc_state(pe_resource_t * rsc, pe_node_t * node, enum action_fail_response on_fail) { pe_node_t *tmpnode = NULL; char *reason = NULL; enum action_fail_response save_on_fail = action_fail_ignore; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s", rsc->id, role2text(rsc->role), pe__node_name(node), fail2text(on_fail)); /* process current state */ if (rsc->role != RSC_ROLE_UNKNOWN) { pe_resource_t *iter = rsc; while (iter) { if (g_hash_table_lookup(iter->known_on, node->details->id) == NULL) { pe_node_t *n = pe__copy_node(node); pe_rsc_trace(rsc, "%s%s%s known on %s", rsc->id, ((rsc->clone_name == NULL)? "" : " also known as "), ((rsc->clone_name == NULL)? "" : rsc->clone_name), pe__node_name(n)); g_hash_table_insert(iter->known_on, (gpointer) n->details->id, n); } if (pcmk_is_set(iter->flags, pe_rsc_unique)) { break; } iter = iter->parent; } } /* If a managed resource is believed to be running, but node is down ... */ if (rsc->role > RSC_ROLE_STOPPED && node->details->online == FALSE && node->details->maintenance == FALSE && pcmk_is_set(rsc->flags, pe_rsc_managed)) { gboolean should_fence = FALSE; /* If this is a guest node, fence it (regardless of whether fencing is * enabled, because guest node fencing is done by recovery of the * container resource rather than by the fencer). Mark the resource * we're processing as failed. When the guest comes back up, its * operation history in the CIB will be cleared, freeing the affected * resource to run again once we are sure we know its state. */ if (pe__is_guest_node(node)) { pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); should_fence = TRUE; } else if (pcmk_is_set(rsc->cluster->flags, pe_flag_stonith_enabled)) { if (pe__is_remote_node(node) && node->details->remote_rsc && !pcmk_is_set(node->details->remote_rsc->flags, pe_rsc_failed)) { /* Setting unseen means that fencing of the remote node will * occur only if the connection resource is not going to start * somewhere. This allows connection resources on a failed * cluster node to move to another node without requiring the * remote nodes to be fenced as well. */ node->details->unseen = TRUE; reason = crm_strdup_printf("%s is active there (fencing will be" " revoked if remote connection can " "be re-established elsewhere)", rsc->id); } should_fence = TRUE; } if (should_fence) { if (reason == NULL) { reason = crm_strdup_printf("%s is thought to be active there", rsc->id); } pe_fence_node(rsc->cluster, node, reason, FALSE); } free(reason); } /* In order to calculate priority_fencing_delay correctly, save the failure information and pass it to native_add_running(). */ save_on_fail = on_fail; if (node->details->unclean) { /* No extra processing needed * Also allows resources to be started again after a node is shot */ on_fail = action_fail_ignore; } switch (on_fail) { case action_fail_ignore: /* nothing to do */ break; case action_fail_demote: pe__set_resource_flags(rsc, pe_rsc_failed); demote_action(rsc, node, FALSE); break; case action_fail_fence: /* treat it as if it is still running * but also mark the node as unclean */ reason = crm_strdup_printf("%s failed there", rsc->id); pe_fence_node(rsc->cluster, node, reason, FALSE); free(reason); break; case action_fail_standby: node->details->standby = TRUE; node->details->standby_onfail = TRUE; break; case action_fail_block: /* is_managed == FALSE will prevent any * actions being sent for the resource */ pe__clear_resource_flags(rsc, pe_rsc_managed); pe__set_resource_flags(rsc, pe_rsc_block); break; case action_fail_migrate: /* make sure it comes up somewhere else * or not at all */ resource_location(rsc, node, -INFINITY, "__action_migration_auto__", rsc->cluster); break; case action_fail_stop: pe__set_next_role(rsc, RSC_ROLE_STOPPED, "on-fail=stop"); break; case action_fail_recover: if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); stop_action(rsc, node, FALSE); } break; case action_fail_restart_container: pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); if (rsc->container && pe_rsc_is_bundled(rsc)) { /* A bundle's remote connection can run on a different node than * the bundle's container. We don't necessarily know where the * container is running yet, so remember it and add a stop * action for it later. */ rsc->cluster->stop_needed = g_list_prepend(rsc->cluster->stop_needed, rsc->container); } else if (rsc->container) { stop_action(rsc->container, node, FALSE); } else if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { stop_action(rsc, node, FALSE); } break; case action_fail_reset_remote: pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); if (pcmk_is_set(rsc->cluster->flags, pe_flag_stonith_enabled)) { tmpnode = NULL; if (rsc->is_remote_node) { tmpnode = pe_find_node(rsc->cluster->nodes, rsc->id); } if (tmpnode && pe__is_remote_node(tmpnode) && tmpnode->details->remote_was_fenced == 0) { /* The remote connection resource failed in a way that * should result in fencing the remote node. */ pe_fence_node(rsc->cluster, tmpnode, "remote connection is unrecoverable", FALSE); } } /* require the stop action regardless if fencing is occurring or not. */ if (rsc->role > RSC_ROLE_STOPPED) { stop_action(rsc, node, FALSE); } /* if reconnect delay is in use, prevent the connection from exiting the * "STOPPED" role until the failure is cleared by the delay timeout. */ if (rsc->remote_reconnect_ms) { pe__set_next_role(rsc, RSC_ROLE_STOPPED, "remote reset"); } break; } /* ensure a remote-node connection failure forces an unclean remote-node * to be fenced. By setting unseen = FALSE, the remote-node failure will * result in a fencing operation regardless if we're going to attempt to * reconnect to the remote-node in this transition or not. */ if (pcmk_is_set(rsc->flags, pe_rsc_failed) && rsc->is_remote_node) { tmpnode = pe_find_node(rsc->cluster->nodes, rsc->id); if (tmpnode && tmpnode->details->unclean) { tmpnode->details->unseen = FALSE; } } if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { if (pcmk_is_set(rsc->flags, pe_rsc_orphan)) { if (pcmk_is_set(rsc->flags, pe_rsc_managed)) { pcmk__config_warn("Detected active orphan %s running on %s", rsc->id, pe__node_name(node)); } else { pcmk__config_warn("Resource '%s' must be stopped manually on " "%s because cluster is configured not to " "stop active orphans", rsc->id, pe__node_name(node)); } } native_add_running(rsc, node, rsc->cluster, (save_on_fail != action_fail_ignore)); switch (on_fail) { case action_fail_ignore: break; case action_fail_demote: case action_fail_block: pe__set_resource_flags(rsc, pe_rsc_failed); break; default: pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); break; } } else if (rsc->clone_name && strchr(rsc->clone_name, ':') != NULL) { /* Only do this for older status sections that included instance numbers * Otherwise stopped instances will appear as orphans */ pe_rsc_trace(rsc, "Resetting clone_name %s for %s (stopped)", rsc->clone_name, rsc->id); free(rsc->clone_name); rsc->clone_name = NULL; } else { GList *possible_matches = pe__resource_actions(rsc, node, RSC_STOP, FALSE); GList *gIter = possible_matches; for (; gIter != NULL; gIter = gIter->next) { pe_action_t *stop = (pe_action_t *) gIter->data; pe__set_action_flags(stop, pe_action_optional); } g_list_free(possible_matches); } /* A successful stop after migrate_to on the migration source doesn't make * the partially migrated resource stopped on the migration target. */ if (rsc->role == RSC_ROLE_STOPPED && rsc->partial_migration_source && rsc->partial_migration_source->details == node->details && rsc->partial_migration_target && rsc->running_on) { rsc->role = RSC_ROLE_STARTED; } } /* create active recurring operations as optional */ static void process_recurring(pe_node_t * node, pe_resource_t * rsc, int start_index, int stop_index, GList *sorted_op_list, pe_working_set_t * data_set) { int counter = -1; const char *task = NULL; const char *status = NULL; GList *gIter = sorted_op_list; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index); for (; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; guint interval_ms = 0; char *key = NULL; const char *id = ID(rsc_op); counter++; if (node->details->online == FALSE) { pe_rsc_trace(rsc, "Skipping %s on %s: node is offline", rsc->id, pe__node_name(node)); break; /* Need to check if there's a monitor for role="Stopped" */ } else if (start_index < stop_index && counter <= stop_index) { pe_rsc_trace(rsc, "Skipping %s on %s: resource is not active", id, pe__node_name(node)); continue; } else if (counter < start_index) { pe_rsc_trace(rsc, "Skipping %s on %s: old %d", id, pe__node_name(node), counter); continue; } crm_element_value_ms(rsc_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); if (interval_ms == 0) { pe_rsc_trace(rsc, "Skipping %s on %s: non-recurring", id, pe__node_name(node)); continue; } status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS); if (pcmk__str_eq(status, "-1", pcmk__str_casei)) { pe_rsc_trace(rsc, "Skipping %s on %s: status", id, pe__node_name(node)); continue; } task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); /* create the action */ key = pcmk__op_key(rsc->id, task, interval_ms); pe_rsc_trace(rsc, "Creating %s on %s", key, pe__node_name(node)); custom_action(rsc, key, task, node, TRUE, TRUE, data_set); } } void calculate_active_ops(const GList *sorted_op_list, int *start_index, int *stop_index) { int counter = -1; int implied_monitor_start = -1; int implied_clone_start = -1; const char *task = NULL; const char *status = NULL; *stop_index = -1; *start_index = -1; for (const GList *iter = sorted_op_list; iter != NULL; iter = iter->next) { const xmlNode *rsc_op = (const xmlNode *) iter->data; counter++; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS); if (pcmk__str_eq(task, CRMD_ACTION_STOP, pcmk__str_casei) && pcmk__str_eq(status, "0", pcmk__str_casei)) { *stop_index = counter; } else if (pcmk__strcase_any_of(task, CRMD_ACTION_START, CRMD_ACTION_MIGRATED, NULL)) { *start_index = counter; } else if ((implied_monitor_start <= *stop_index) && pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei)) { const char *rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC); if (pcmk__strcase_any_of(rc, "0", "8", NULL)) { implied_monitor_start = counter; } } else if (pcmk__strcase_any_of(task, CRMD_ACTION_PROMOTE, CRMD_ACTION_DEMOTE, NULL)) { implied_clone_start = counter; } } if (*start_index == -1) { if (implied_clone_start != -1) { *start_index = implied_clone_start; } else if (implied_monitor_start != -1) { *start_index = implied_monitor_start; } } } // If resource history entry has shutdown lock, remember lock node and time static void unpack_shutdown_lock(const xmlNode *rsc_entry, pe_resource_t *rsc, const pe_node_t *node, pe_working_set_t *data_set) { time_t lock_time = 0; // When lock started (i.e. node shutdown time) if ((crm_element_value_epoch(rsc_entry, XML_CONFIG_ATTR_SHUTDOWN_LOCK, &lock_time) == pcmk_ok) && (lock_time != 0)) { if ((data_set->shutdown_lock > 0) && (get_effective_time(data_set) > (lock_time + data_set->shutdown_lock))) { pe_rsc_info(rsc, "Shutdown lock for %s on %s expired", rsc->id, pe__node_name(node)); pe__clear_resource_history(rsc, node, data_set); } else { /* @COMPAT I don't like breaking const signatures, but * rsc->lock_node should really be const -- we just can't change it * until the next API compatibility break. */ rsc->lock_node = (pe_node_t *) node; rsc->lock_time = lock_time; } } } /*! * \internal * \brief Unpack one lrm_resource entry from a node's CIB status * * \param[in,out] node Node whose status is being unpacked * \param[in] rsc_entry lrm_resource XML being unpacked * \param[in,out] data_set Cluster working set * * \return Resource corresponding to the entry, or NULL if no operation history */ static pe_resource_t * unpack_lrm_resource(pe_node_t *node, const xmlNode *lrm_resource, pe_working_set_t *data_set) { GList *gIter = NULL; int stop_index = -1; int start_index = -1; enum rsc_role_e req_role = RSC_ROLE_UNKNOWN; const char *rsc_id = ID(lrm_resource); pe_resource_t *rsc = NULL; GList *op_list = NULL; GList *sorted_op_list = NULL; xmlNode *rsc_op = NULL; xmlNode *last_failure = NULL; enum action_fail_response on_fail = action_fail_ignore; enum rsc_role_e saved_role = RSC_ROLE_UNKNOWN; if (rsc_id == NULL) { crm_warn("Ignoring malformed " XML_LRM_TAG_RESOURCE " entry without id"); return NULL; } crm_trace("Unpacking " XML_LRM_TAG_RESOURCE " for %s on %s", rsc_id, pe__node_name(node)); // Build a list of individual lrm_rsc_op entries, so we can sort them for (rsc_op = first_named_child(lrm_resource, XML_LRM_TAG_RSC_OP); rsc_op != NULL; rsc_op = crm_next_same_xml(rsc_op)) { op_list = g_list_prepend(op_list, rsc_op); } if (!pcmk_is_set(data_set->flags, pe_flag_shutdown_lock)) { if (op_list == NULL) { // If there are no operations, there is nothing to do return NULL; } } /* find the resource */ rsc = unpack_find_resource(data_set, node, rsc_id); if (rsc == NULL) { if (op_list == NULL) { // If there are no operations, there is nothing to do return NULL; } else { rsc = process_orphan_resource(lrm_resource, node, data_set); } } CRM_ASSERT(rsc != NULL); // Check whether the resource is "shutdown-locked" to this node if (pcmk_is_set(data_set->flags, pe_flag_shutdown_lock)) { unpack_shutdown_lock(lrm_resource, rsc, node, data_set); } /* process operations */ saved_role = rsc->role; rsc->role = RSC_ROLE_UNKNOWN; sorted_op_list = g_list_sort(op_list, sort_op_by_callid); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; - unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail, data_set); + unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail); } /* create active recurring operations as optional */ calculate_active_ops(sorted_op_list, &start_index, &stop_index); process_recurring(node, rsc, start_index, stop_index, sorted_op_list, data_set); /* no need to free the contents */ g_list_free(sorted_op_list); process_rsc_state(rsc, node, on_fail); if (get_target_role(rsc, &req_role)) { if (rsc->next_role == RSC_ROLE_UNKNOWN || req_role < rsc->next_role) { pe__set_next_role(rsc, req_role, XML_RSC_ATTR_TARGET_ROLE); } else if (req_role > rsc->next_role) { pe_rsc_info(rsc, "%s: Not overwriting calculated next role %s" " with requested next role %s", rsc->id, role2text(rsc->next_role), role2text(req_role)); } } if (saved_role > rsc->role) { rsc->role = saved_role; } return rsc; } static void handle_orphaned_container_fillers(const xmlNode *lrm_rsc_list, pe_working_set_t *data_set) { for (const xmlNode *rsc_entry = pcmk__xe_first_child(lrm_rsc_list); rsc_entry != NULL; rsc_entry = pcmk__xe_next(rsc_entry)) { pe_resource_t *rsc; pe_resource_t *container; const char *rsc_id; const char *container_id; if (!pcmk__str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, pcmk__str_casei)) { continue; } container_id = crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER); rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); if (container_id == NULL || rsc_id == NULL) { continue; } container = pe_find_resource(data_set->resources, container_id); if (container == NULL) { continue; } rsc = pe_find_resource(data_set->resources, rsc_id); if (rsc == NULL || !pcmk_is_set(rsc->flags, pe_rsc_orphan_container_filler) || rsc->container != NULL) { continue; } pe_rsc_trace(rsc, "Mapped container of orphaned resource %s to %s", rsc->id, container_id); rsc->container = container; container->fillers = g_list_append(container->fillers, rsc); } } /*! * \internal * \brief Unpack one node's lrm status section * * \param[in,out] node Node whose status is being unpacked * \param[in] xml CIB node state XML * \param[in,out] data_set Cluster working set */ static void unpack_node_lrm(pe_node_t *node, const xmlNode *xml, pe_working_set_t *data_set) { bool found_orphaned_container_filler = false; // Drill down to lrm_resources section xml = find_xml_node(xml, XML_CIB_TAG_LRM, FALSE); if (xml == NULL) { return; } xml = find_xml_node(xml, XML_LRM_TAG_RESOURCES, FALSE); if (xml == NULL) { return; } // Unpack each lrm_resource entry for (const xmlNode *rsc_entry = first_named_child(xml, XML_LRM_TAG_RESOURCE); rsc_entry != NULL; rsc_entry = crm_next_same_xml(rsc_entry)) { pe_resource_t *rsc = unpack_lrm_resource(node, rsc_entry, data_set); if ((rsc != NULL) && pcmk_is_set(rsc->flags, pe_rsc_orphan_container_filler)) { found_orphaned_container_filler = true; } } /* Now that all resource state has been unpacked for this node, map any * orphaned container fillers to their container resource. */ if (found_orphaned_container_filler) { handle_orphaned_container_fillers(xml, data_set); } } static void set_active(pe_resource_t * rsc) { const pe_resource_t *top = pe__const_top_resource(rsc, false); if (top && pcmk_is_set(top->flags, pe_rsc_promotable)) { rsc->role = RSC_ROLE_UNPROMOTED; } else { rsc->role = RSC_ROLE_STARTED; } } static void set_node_score(gpointer key, gpointer value, gpointer user_data) { pe_node_t *node = value; int *score = user_data; node->weight = *score; } #define XPATH_NODE_STATE "/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS \ "/" XML_CIB_TAG_STATE #define SUB_XPATH_LRM_RESOURCE "/" XML_CIB_TAG_LRM \ "/" XML_LRM_TAG_RESOURCES \ "/" XML_LRM_TAG_RESOURCE #define SUB_XPATH_LRM_RSC_OP "/" XML_LRM_TAG_RSC_OP static xmlNode * find_lrm_op(const char *resource, const char *op, const char *node, const char *source, int target_rc, pe_working_set_t *data_set) { GString *xpath = NULL; xmlNode *xml = NULL; CRM_CHECK((resource != NULL) && (op != NULL) && (node != NULL), return NULL); xpath = g_string_sized_new(256); pcmk__g_strcat(xpath, XPATH_NODE_STATE "[@" XML_ATTR_UNAME "='", node, "']" SUB_XPATH_LRM_RESOURCE "[@" XML_ATTR_ID "='", resource, "']" SUB_XPATH_LRM_RSC_OP "[@" XML_LRM_ATTR_TASK "='", op, "'", NULL); /* Need to check against transition_magic too? */ if ((source != NULL) && (strcmp(op, CRMD_ACTION_MIGRATE) == 0)) { pcmk__g_strcat(xpath, " and @" XML_LRM_ATTR_MIGRATE_TARGET "='", source, "']", NULL); } else if ((source != NULL) && (strcmp(op, CRMD_ACTION_MIGRATED) == 0)) { pcmk__g_strcat(xpath, " and @" XML_LRM_ATTR_MIGRATE_SOURCE "='", source, "']", NULL); } else { g_string_append_c(xpath, ']'); } xml = get_xpath_object((const char *) xpath->str, data_set->input, LOG_DEBUG); g_string_free(xpath, TRUE); if (xml && target_rc >= 0) { int rc = PCMK_OCF_UNKNOWN_ERROR; int status = PCMK_EXEC_ERROR; crm_element_value_int(xml, XML_LRM_ATTR_RC, &rc); crm_element_value_int(xml, XML_LRM_ATTR_OPSTATUS, &status); if ((rc != target_rc) || (status != PCMK_EXEC_DONE)) { return NULL; } } return xml; } static xmlNode * find_lrm_resource(const char *rsc_id, const char *node_name, pe_working_set_t *data_set) { GString *xpath = NULL; xmlNode *xml = NULL; CRM_CHECK((rsc_id != NULL) && (node_name != NULL), return NULL); xpath = g_string_sized_new(256); pcmk__g_strcat(xpath, XPATH_NODE_STATE "[@" XML_ATTR_UNAME "='", node_name, "']" SUB_XPATH_LRM_RESOURCE "[@" XML_ATTR_ID "='", rsc_id, "']", NULL); xml = get_xpath_object((const char *) xpath->str, data_set->input, LOG_DEBUG); g_string_free(xpath, TRUE); return xml; } /*! * \internal * \brief Check whether a resource has no completed action history on a node * * \param[in,out] rsc Resource to check * \param[in] node_name Node to check * * \return true if \p rsc_id is unknown on \p node_name, otherwise false */ static bool unknown_on_node(pe_resource_t *rsc, const char *node_name) { bool result = false; xmlXPathObjectPtr search; GString *xpath = g_string_sized_new(256); pcmk__g_strcat(xpath, XPATH_NODE_STATE "[@" XML_ATTR_UNAME "='", node_name, "']" SUB_XPATH_LRM_RESOURCE "[@" XML_ATTR_ID "='", rsc->id, "']" SUB_XPATH_LRM_RSC_OP "[@" XML_LRM_ATTR_RC "!='193']", NULL); search = xpath_search(rsc->cluster->input, (const char *) xpath->str); result = (numXpathResults(search) == 0); freeXpathObject(search); g_string_free(xpath, TRUE); return result; } /*! * \brief Check whether a probe/monitor indicating the resource was not running * on a node happened after some event * * \param[in] rsc_id Resource being checked * \param[in] node_name Node being checked * \param[in] xml_op Event that monitor is being compared to * \param[in] same_node Whether the operations are on the same node * \param[in,out] data_set Cluster working set * * \return true if such a monitor happened after event, false otherwise */ static bool monitor_not_running_after(const char *rsc_id, const char *node_name, const xmlNode *xml_op, bool same_node, pe_working_set_t *data_set) { /* Any probe/monitor operation on the node indicating it was not running * there */ xmlNode *monitor = find_lrm_op(rsc_id, CRMD_ACTION_STATUS, node_name, NULL, PCMK_OCF_NOT_RUNNING, data_set); return (monitor && pe__is_newer_op(monitor, xml_op, same_node) > 0); } /*! * \brief Check whether any non-monitor operation on a node happened after some * event * * \param[in] rsc_id Resource being checked * \param[in] node_name Node being checked * \param[in] xml_op Event that non-monitor is being compared to * \param[in] same_node Whether the operations are on the same node * \param[in,out] data_set Cluster working set * * \return true if such a operation happened after event, false otherwise */ static bool non_monitor_after(const char *rsc_id, const char *node_name, const xmlNode *xml_op, bool same_node, pe_working_set_t *data_set) { xmlNode *lrm_resource = NULL; lrm_resource = find_lrm_resource(rsc_id, node_name, data_set); if (lrm_resource == NULL) { return false; } for (xmlNode *op = first_named_child(lrm_resource, XML_LRM_TAG_RSC_OP); op != NULL; op = crm_next_same_xml(op)) { const char * task = NULL; if (op == xml_op) { continue; } task = crm_element_value(op, XML_LRM_ATTR_TASK); if (pcmk__str_any_of(task, CRMD_ACTION_START, CRMD_ACTION_STOP, CRMD_ACTION_MIGRATE, CRMD_ACTION_MIGRATED, NULL) && pe__is_newer_op(op, xml_op, same_node) > 0) { return true; } } return false; } /*! * \brief Check whether the resource has newer state on a node after a migration * attempt * * \param[in] rsc_id Resource being checked * \param[in] node_name Node being checked * \param[in] migrate_to Any migrate_to event that is being compared to * \param[in] migrate_from Any migrate_from event that is being compared to * \param[in,out] data_set Cluster working set * * \return true if such a operation happened after event, false otherwise */ static bool newer_state_after_migrate(const char *rsc_id, const char *node_name, const xmlNode *migrate_to, const xmlNode *migrate_from, pe_working_set_t *data_set) { const xmlNode *xml_op = migrate_to; const char *source = NULL; const char *target = NULL; bool same_node = false; if (migrate_from) { xml_op = migrate_from; } source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); /* It's preferred to compare to the migrate event on the same node if * existing, since call ids are more reliable. */ if (pcmk__str_eq(node_name, target, pcmk__str_casei)) { if (migrate_from) { xml_op = migrate_from; same_node = true; } else { xml_op = migrate_to; } } else if (pcmk__str_eq(node_name, source, pcmk__str_casei)) { if (migrate_to) { xml_op = migrate_to; same_node = true; } else { xml_op = migrate_from; } } /* If there's any newer non-monitor operation on the node, or any newer * probe/monitor operation on the node indicating it was not running there, * the migration events potentially no longer matter for the node. */ return non_monitor_after(rsc_id, node_name, xml_op, same_node, data_set) || monitor_not_running_after(rsc_id, node_name, xml_op, same_node, data_set); } /*! * \internal * \brief Parse migration source and target node names from history entry * * \param[in] entry Resource history entry for a migration action * \param[in] source_node If not NULL, source must match this node * \param[in] target_node If not NULL, target must match this node * \param[out] source_name Where to store migration source node name * \param[out] target_name Where to store migration target node name * * \return Standard Pacemaker return code */ static int get_migration_node_names(const xmlNode *entry, const pe_node_t *source_node, const pe_node_t *target_node, const char **source_name, const char **target_name) { - const char *id = ID(entry); - - if (id == NULL) { - crm_err("Ignoring resource history entry without ID"); - return pcmk_rc_unpack_error; - } - *source_name = crm_element_value(entry, XML_LRM_ATTR_MIGRATE_SOURCE); *target_name = crm_element_value(entry, XML_LRM_ATTR_MIGRATE_TARGET); if ((*source_name == NULL) || (*target_name == NULL)) { crm_err("Ignoring resource history entry %s without " XML_LRM_ATTR_MIGRATE_SOURCE " and " XML_LRM_ATTR_MIGRATE_TARGET, - id); + ID(entry)); return pcmk_rc_unpack_error; } if ((source_node != NULL) && !pcmk__str_eq(*source_name, source_node->details->uname, pcmk__str_casei|pcmk__str_null_matches)) { crm_err("Ignoring resource history entry %s because " XML_LRM_ATTR_MIGRATE_SOURCE "='%s' does not match %s", - id, *source_name, pe__node_name(source_node)); + ID(entry), *source_name, pe__node_name(source_node)); return pcmk_rc_unpack_error; } if ((target_node != NULL) && !pcmk__str_eq(*target_name, target_node->details->uname, pcmk__str_casei|pcmk__str_null_matches)) { crm_err("Ignoring resource history entry %s because " XML_LRM_ATTR_MIGRATE_TARGET "='%s' does not match %s", - id, *target_name, pe__node_name(target_node)); + ID(entry), *target_name, pe__node_name(target_node)); return pcmk_rc_unpack_error; } return pcmk_rc_ok; } /* * \internal * \brief Add a migration source to a resource's list of dangling migrations * * If the migrate_to and migrate_from actions in a live migration both * succeeded, but there is no stop on the source, the migration is considered * "dangling." Add the source to the resource's dangling migration list, which * will be used to schedule a stop on the source without affecting the target. * * \param[in,out] rsc Resource involved in migration * \param[in] node Migration source */ static void add_dangling_migration(pe_resource_t *rsc, const pe_node_t *node) { pe_rsc_trace(rsc, "Dangling migration of %s requires stop on %s", rsc->id, pe__node_name(node)); rsc->role = RSC_ROLE_STOPPED; rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, (gpointer) node); } +/*! + * \internal + * \brief Update resource role etc. after a successful migrate_to action + * + * \param[in,out] history Parsed action result history + */ static void -unpack_migrate_to_success(pe_resource_t *rsc, const pe_node_t *node, - const xmlNode *xml_op) +unpack_migrate_to_success(struct action_history *history) { /* A complete migration sequence is: * 1. migrate_to on source node (which succeeded if we get to this function) * 2. migrate_from on target node * 3. stop on source node * * If no migrate_from has happened, the migration is considered to be * "partial". If the migrate_from succeeded but no stop has happened, the * migration is considered to be "dangling". * * If a successful migrate_to and stop have happened on the source node, we * still need to check for a partial migration, due to scenarios (easier to * produce with batch-limit=1) like: * * - A resource is migrating from node1 to node2, and a migrate_to is * initiated for it on node1. * * - node2 goes into standby mode while the migrate_to is pending, which * aborts the transition. * * - Upon completion of the migrate_to, a new transition schedules a stop * on both nodes and a start on node1. * * - If the new transition is aborted for any reason while the resource is * stopping on node1, the transition after that stop completes will see * the migrate_to and stop on the source, but it's still a partial * migration, and the resource must be stopped on node2 because it is * potentially active there due to the migrate_to. * * We also need to take into account that either node's history may be * cleared at any point in the migration process. */ int from_rc = PCMK_OCF_OK; int from_status = PCMK_EXEC_PENDING; pe_node_t *target_node = NULL; xmlNode *migrate_from = NULL; const char *source = NULL; const char *target = NULL; bool source_newer_op = false; bool target_newer_state = false; bool active_on_target = false; // Get source and target node names from XML - if (get_migration_node_names(xml_op, node, NULL, &source, + if (get_migration_node_names(history->xml, history->node, NULL, &source, &target) != pcmk_rc_ok) { return; } // Check for newer state on the source - source_newer_op = non_monitor_after(rsc->id, source, xml_op, true, - rsc->cluster); + source_newer_op = non_monitor_after(history->rsc->id, source, history->xml, + true, history->rsc->cluster); // Check for a migrate_from action from this source on the target - migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, target, - source, -1, rsc->cluster); + migrate_from = find_lrm_op(history->rsc->id, CRMD_ACTION_MIGRATED, target, + source, -1, history->rsc->cluster); if (migrate_from != NULL) { if (source_newer_op) { /* There's a newer non-monitor operation on the source and a * migrate_from on the target, so this migrate_to is irrelevant to * the resource's state. */ return; } crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc); crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, &from_status); } /* If the resource has newer state on both the source and target after the * migration events, this migrate_to is irrelevant to the resource's state. */ - target_newer_state = newer_state_after_migrate(rsc->id, target, xml_op, - migrate_from, rsc->cluster); + target_newer_state = newer_state_after_migrate(history->rsc->id, target, + history->xml, migrate_from, + history->rsc->cluster); if (source_newer_op && target_newer_state) { return; } /* Check for dangling migration (migrate_from succeeded but stop not done). * We know there's no stop because we already returned if the target has a * migrate_from and the source has any newer non-monitor operation. */ if ((from_rc == PCMK_OCF_OK) && (from_status == PCMK_EXEC_DONE)) { - add_dangling_migration(rsc, node); + add_dangling_migration(history->rsc, history->node); return; } /* Without newer state, this migrate_to implies the resource is active. * (Clones are not allowed to migrate, so role can't be promoted.) */ - rsc->role = RSC_ROLE_STARTED; + history->rsc->role = RSC_ROLE_STARTED; - target_node = pe_find_node(rsc->cluster->nodes, target); + target_node = pe_find_node(history->rsc->cluster->nodes, target); active_on_target = !target_newer_state && (target_node != NULL) && target_node->details->online; if (from_status != PCMK_EXEC_PENDING) { // migrate_from failed on target if (active_on_target) { - native_add_running(rsc, target_node, rsc->cluster, TRUE); + native_add_running(history->rsc, target_node, history->rsc->cluster, + TRUE); } else { // Mark resource as failed, require recovery, and prevent migration - pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); - pe__clear_resource_flags(rsc, pe_rsc_allow_migrate); + pe__set_resource_flags(history->rsc, pe_rsc_failed|pe_rsc_stop); + pe__clear_resource_flags(history->rsc, pe_rsc_allow_migrate); } return; } // The migrate_from is pending, complete but erased, or to be scheduled /* If there is no history at all for the resource on an online target, then * it was likely cleaned. Just return, and we'll schedule a probe. Once we * have the probe result, it will be reflected in target_newer_state. */ if ((target_node != NULL) && target_node->details->online - && unknown_on_node(rsc, target)) { + && unknown_on_node(history->rsc, target)) { return; } if (active_on_target) { - pe_node_t *source_node = pe_find_node(rsc->cluster->nodes, source); + pe_node_t *source_node = pe_find_node(history->rsc->cluster->nodes, + source); - native_add_running(rsc, target_node, rsc->cluster, FALSE); + native_add_running(history->rsc, target_node, history->rsc->cluster, + FALSE); if ((source_node != NULL) && source_node->details->online) { /* This is a partial migration: the migrate_to completed * successfully on the source, but the migrate_from has not * completed. Remember the source and target; if the newly * chosen target remains the same when we schedule actions * later, we may continue with the migration. */ - rsc->partial_migration_target = target_node; - rsc->partial_migration_source = source_node; + history->rsc->partial_migration_target = target_node; + history->rsc->partial_migration_source = source_node; } } else if (!source_newer_op) { // Mark resource as failed, require recovery, and prevent migration - pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); - pe__clear_resource_flags(rsc, pe_rsc_allow_migrate); + pe__set_resource_flags(history->rsc, pe_rsc_failed|pe_rsc_stop); + pe__clear_resource_flags(history->rsc, pe_rsc_allow_migrate); } } +/*! + * \internal + * \brief Update resource role etc. after a failed migrate_to action + * + * \param[in,out] history Parsed action result history + */ static void -unpack_migrate_to_failure(pe_resource_t *rsc, const pe_node_t *node, - const xmlNode *xml_op, pe_working_set_t *data_set) +unpack_migrate_to_failure(struct action_history *history) { xmlNode *target_migrate_from = NULL; const char *source = NULL; const char *target = NULL; // Get source and target node names from XML - if (get_migration_node_names(xml_op, node, NULL, &source, + if (get_migration_node_names(history->xml, history->node, NULL, &source, &target) != pcmk_rc_ok) { return; } /* If a migration failed, we have to assume the resource is active. Clones * are not allowed to migrate, so role can't be promoted. */ - rsc->role = RSC_ROLE_STARTED; + history->rsc->role = RSC_ROLE_STARTED; // Check for migrate_from on the target - target_migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, target, - source, PCMK_OCF_OK, data_set); + target_migrate_from = find_lrm_op(history->rsc->id, CRMD_ACTION_MIGRATED, + target, source, PCMK_OCF_OK, + history->rsc->cluster); if (/* If the resource state is unknown on the target, it will likely be * probed there. * Don't just consider it running there. We will get back here anyway in * case the probe detects it's running there. */ - !unknown_on_node(rsc, target) + !unknown_on_node(history->rsc, target) /* If the resource has newer state on the target after the migration * events, this migrate_to no longer matters for the target. */ - && !newer_state_after_migrate(rsc->id, target, xml_op, target_migrate_from, - data_set)) { + && !newer_state_after_migrate(history->rsc->id, target, history->xml, + target_migrate_from, + history->rsc->cluster)) { /* The resource has no newer state on the target, so assume it's still * active there. * (if it is up). */ - pe_node_t *target_node = pe_find_node(data_set->nodes, target); + pe_node_t *target_node = pe_find_node(history->rsc->cluster->nodes, + target); if (target_node && target_node->details->online) { - native_add_running(rsc, target_node, data_set, FALSE); + native_add_running(history->rsc, target_node, history->rsc->cluster, + FALSE); } - } else if (!non_monitor_after(rsc->id, source, xml_op, true, data_set)) { + } else if (!non_monitor_after(history->rsc->id, source, history->xml, true, + history->rsc->cluster)) { /* We know the resource has newer state on the target, but this * migrate_to still matters for the source as long as there's no newer * non-monitor operation there. */ // Mark node as having dangling migration so we can force a stop later - rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, - (gpointer) node); + history->rsc->dangling_migrations = + g_list_prepend(history->rsc->dangling_migrations, + (gpointer) history->node); } } +/*! + * \internal + * \brief Update resource role etc. after a failed migrate_from action + * + * \param[in,out] history Parsed action result history + */ static void -unpack_migrate_from_failure(pe_resource_t *rsc, const pe_node_t *node, - const xmlNode *xml_op, pe_working_set_t *data_set) +unpack_migrate_from_failure(struct action_history *history) { xmlNode *source_migrate_to = NULL; const char *source = NULL; const char *target = NULL; // Get source and target node names from XML - if (get_migration_node_names(xml_op, NULL, node, &source, + if (get_migration_node_names(history->xml, NULL, history->node, &source, &target) != pcmk_rc_ok) { return; } /* If a migration failed, we have to assume the resource is active. Clones * are not allowed to migrate, so role can't be promoted. */ - rsc->role = RSC_ROLE_STARTED; + history->rsc->role = RSC_ROLE_STARTED; // Check for a migrate_to on the source - source_migrate_to = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATE, - source, target, PCMK_OCF_OK, data_set); + source_migrate_to = find_lrm_op(history->rsc->id, CRMD_ACTION_MIGRATE, + source, target, PCMK_OCF_OK, + history->rsc->cluster); if (/* If the resource state is unknown on the source, it will likely be * probed there. * Don't just consider it running there. We will get back here anyway in * case the probe detects it's running there. */ - !unknown_on_node(rsc, source) + !unknown_on_node(history->rsc, source) /* If the resource has newer state on the source after the migration * events, this migrate_from no longer matters for the source. */ - && !newer_state_after_migrate(rsc->id, source, source_migrate_to, xml_op, - data_set)) { + && !newer_state_after_migrate(history->rsc->id, source, + source_migrate_to, history->xml, + history->rsc->cluster)) { /* The resource has no newer state on the source, so assume it's still * active there (if it is up). */ - pe_node_t *source_node = pe_find_node(data_set->nodes, source); + pe_node_t *source_node = pe_find_node(history->rsc->cluster->nodes, + source); if (source_node && source_node->details->online) { - native_add_running(rsc, source_node, data_set, TRUE); + native_add_running(history->rsc, source_node, history->rsc->cluster, + TRUE); } } } +/*! + * \internal + * \brief Add an action to cluster's list of failed actions + * + * \param[in,out] history Parsed action result history + */ static void -record_failed_op(xmlNode *op, const pe_node_t *node, - const pe_resource_t *rsc, pe_working_set_t *data_set) +record_failed_op(struct action_history *history) { - xmlNode *xIter = NULL; - const char *op_key = crm_element_value(op, XML_LRM_ATTR_TASK_KEY); - - if (node->details->online == FALSE) { + if (!(history->node->details->online)) { return; } - for (xIter = data_set->failed->children; xIter; xIter = xIter->next) { - const char *key = crm_element_value(xIter, XML_LRM_ATTR_TASK_KEY); + for (const xmlNode *xIter = history->rsc->cluster->failed->children; + xIter != NULL; xIter = xIter->next) { + + const char *key = pe__xe_history_key(xIter); const char *uname = crm_element_value(xIter, XML_ATTR_UNAME); - if(pcmk__str_eq(op_key, key, pcmk__str_casei) && pcmk__str_eq(uname, node->details->uname, pcmk__str_casei)) { + if (pcmk__str_eq(history->key, key, pcmk__str_none) + && pcmk__str_eq(uname, history->node->details->uname, + pcmk__str_casei)) { crm_trace("Skipping duplicate entry %s on %s", - op_key, pe__node_name(node)); + history->key, pe__node_name(history->node)); return; } } - crm_trace("Adding entry %s on %s", op_key, pe__node_name(node)); - crm_xml_add(op, XML_ATTR_UNAME, node->details->uname); - crm_xml_add(op, XML_LRM_ATTR_RSCID, rsc->id); - add_node_copy(data_set->failed, op); -} - -static const char * -get_op_key(const xmlNode *xml_op) -{ - const char *key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY); - if(key == NULL) { - key = ID(xml_op); - } - return key; + crm_trace("Adding entry for %s on %s to failed action list", + history->key, pe__node_name(history->node)); + crm_xml_add(history->xml, XML_ATTR_UNAME, history->node->details->uname); + crm_xml_add(history->xml, XML_LRM_ATTR_RSCID, history->rsc->id); + add_node_copy(history->rsc->cluster->failed, history->xml); } static char * last_change_str(const xmlNode *xml_op) { time_t when; char *result = NULL; if (crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE, &when) == pcmk_ok) { char *when_s = pcmk__epoch2str(&when, 0); const char *p = strchr(when_s, ' '); // Skip day of week to make message shorter if ((p != NULL) && (*(++p) != '\0')) { result = strdup(p); CRM_ASSERT(result != NULL); } free(when_s); } if (result == NULL) { result = strdup("unknown time"); CRM_ASSERT(result != NULL); } return result; } /*! * \internal * \brief Compare two on-fail values * * \param[in] first One on-fail value to compare * \param[in] second The other on-fail value to compare * * \return A negative number if second is more severe than first, zero if they * are equal, or a positive number if first is more severe than second. * \note This is only needed until the action_fail_response values can be * renumbered at the next API compatibility break. */ static int cmp_on_fail(enum action_fail_response first, enum action_fail_response second) { switch (first) { case action_fail_demote: switch (second) { case action_fail_ignore: return 1; case action_fail_demote: return 0; default: return -1; } break; case action_fail_reset_remote: switch (second) { case action_fail_ignore: case action_fail_demote: case action_fail_recover: return 1; case action_fail_reset_remote: return 0; default: return -1; } break; case action_fail_restart_container: switch (second) { case action_fail_ignore: case action_fail_demote: case action_fail_recover: case action_fail_reset_remote: return 1; case action_fail_restart_container: return 0; default: return -1; } break; default: break; } switch (second) { case action_fail_demote: return (first == action_fail_ignore)? -1 : 1; case action_fail_reset_remote: switch (first) { case action_fail_ignore: case action_fail_demote: case action_fail_recover: return -1; default: return 1; } break; case action_fail_restart_container: switch (first) { case action_fail_ignore: case action_fail_demote: case action_fail_recover: case action_fail_reset_remote: return -1; default: return 1; } break; default: break; } return first - second; } +/*! + * \internal + * \brief Ban a resource (or its clone if an anonymous instance) from all nodes + * + * \param[in,out] rsc Resource to ban + */ static void -unpack_rsc_op_failure(pe_resource_t *rsc, const pe_node_t *node, int rc, - xmlNode *xml_op, xmlNode **last_failure, - enum action_fail_response *on_fail, - pe_working_set_t *data_set) +ban_from_all_nodes(pe_resource_t *rsc) { - bool is_probe = false; - pe_action_t *action = NULL; + int score = -INFINITY; + pe_resource_t *fail_rsc = rsc; - const char *key = get_op_key(xml_op); - const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); - const char *exit_reason = crm_element_value(xml_op, - XML_LRM_ATTR_EXIT_REASON); - char *last_change_s = NULL; + if (fail_rsc->parent != NULL) { + pe_resource_t *parent = uber_parent(fail_rsc); - CRM_ASSERT(rsc); - CRM_CHECK(task != NULL, return); + if (pe_rsc_is_anon_clone(parent)) { + /* For anonymous clones, if an operation with on-fail=stop fails for + * any instance, the entire clone must stop. + */ + fail_rsc = parent; + } + } - *last_failure = xml_op; + // Ban the resource from all nodes + crm_notice("%s will not be started under current conditions", fail_rsc->id); + if (fail_rsc->allowed_nodes != NULL) { + g_hash_table_destroy(fail_rsc->allowed_nodes); + } + fail_rsc->allowed_nodes = pe__node_list2table(rsc->cluster->nodes); + g_hash_table_foreach(fail_rsc->allowed_nodes, set_node_score, &score); +} + +/*! + * \internal + * \brief Update resource role, failure handling, etc., after a failed action + * + * \param[in,out] history Parsed action result history + * \param[out] last_failure Set this to action XML + * \param[in,out] on_fail What should be done about the result + */ +static void +unpack_rsc_op_failure(struct action_history *history, xmlNode **last_failure, + enum action_fail_response *on_fail) +{ + bool is_probe = false; + pe_action_t *action = NULL; + char *last_change_s = NULL; - is_probe = pcmk_xe_is_probe(xml_op); - last_change_s = last_change_str(xml_op); + *last_failure = history->xml; - if (exit_reason == NULL) { - exit_reason = ""; - } + is_probe = pcmk_xe_is_probe(history->xml); + last_change_s = last_change_str(history->xml); - if (!pcmk_is_set(data_set->flags, pe_flag_symmetric_cluster) - && (rc == PCMK_OCF_NOT_INSTALLED)) { + if (!pcmk_is_set(history->rsc->cluster->flags, pe_flag_symmetric_cluster) + && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) { crm_trace("Unexpected result (%s%s%s) was recorded for " - "%s of %s on %s at %s " CRM_XS " rc=%d id=%s", - services_ocf_exitcode_str(rc), - (*exit_reason? ": " : ""), exit_reason, - (is_probe? "probe" : task), rsc->id, pe__node_name(node), - last_change_s, rc, ID(xml_op)); + "%s of %s on %s at %s " CRM_XS " exit-status=%d id=%s", + services_ocf_exitcode_str(history->exit_status), + (pcmk__str_empty(history->exit_reason)? "" : ": "), + pcmk__s(history->exit_reason, ""), + (is_probe? "probe" : history->task), history->rsc->id, + pe__node_name(history->node), last_change_s, + history->exit_status, history->id); } else { crm_warn("Unexpected result (%s%s%s) was recorded for " - "%s of %s on %s at %s " CRM_XS " rc=%d id=%s", - services_ocf_exitcode_str(rc), - (*exit_reason? ": " : ""), exit_reason, - (is_probe? "probe" : task), rsc->id, pe__node_name(node), - last_change_s, rc, ID(xml_op)); - - if (is_probe && (rc != PCMK_OCF_OK) - && (rc != PCMK_OCF_NOT_RUNNING) - && (rc != PCMK_OCF_RUNNING_PROMOTED)) { + "%s of %s on %s at %s " CRM_XS " exit-status=%d id=%s", + services_ocf_exitcode_str(history->exit_status), + (pcmk__str_empty(history->exit_reason)? "" : ": "), + pcmk__s(history->exit_reason, ""), + (is_probe? "probe" : history->task), history->rsc->id, + pe__node_name(history->node), last_change_s, + history->exit_status, history->id); + + if (is_probe && (history->exit_status != PCMK_OCF_OK) + && (history->exit_status != PCMK_OCF_NOT_RUNNING) + && (history->exit_status != PCMK_OCF_RUNNING_PROMOTED)) { /* A failed (not just unexpected) probe result could mean the user * didn't know resources will be probed even where they can't run. */ crm_notice("If it is not possible for %s to run on %s, see " "the resource-discovery option for location constraints", - rsc->id, pe__node_name(node)); + history->rsc->id, pe__node_name(history->node)); } - record_failed_op(xml_op, node, rsc, data_set); + record_failed_op(history); } free(last_change_s); - action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set); + action = custom_action(history->rsc, strdup(history->key), history->task, + NULL, TRUE, FALSE, history->rsc->cluster); if (cmp_on_fail(*on_fail, action->on_fail) < 0) { - pe_rsc_trace(rsc, "on-fail %s -> %s for %s (%s)", fail2text(*on_fail), - fail2text(action->on_fail), action->uuid, key); + pe_rsc_trace(history->rsc, "on-fail %s -> %s for %s (%s)", + fail2text(*on_fail), fail2text(action->on_fail), + action->uuid, history->key); *on_fail = action->on_fail; } - if (!strcmp(task, CRMD_ACTION_STOP)) { - resource_location(rsc, node, -INFINITY, "__stop_fail__", data_set); + if (strcmp(history->task, CRMD_ACTION_STOP) == 0) { + resource_location(history->rsc, history->node, -INFINITY, + "__stop_fail__", history->rsc->cluster); - } else if (!strcmp(task, CRMD_ACTION_MIGRATE)) { - unpack_migrate_to_failure(rsc, node, xml_op, data_set); + } else if (strcmp(history->task, CRMD_ACTION_MIGRATE) == 0) { + unpack_migrate_to_failure(history); - } else if (!strcmp(task, CRMD_ACTION_MIGRATED)) { - unpack_migrate_from_failure(rsc, node, xml_op, data_set); + } else if (strcmp(history->task, CRMD_ACTION_MIGRATED) == 0) { + unpack_migrate_from_failure(history); - } else if (!strcmp(task, CRMD_ACTION_PROMOTE)) { - rsc->role = RSC_ROLE_PROMOTED; + } else if (strcmp(history->task, CRMD_ACTION_PROMOTE) == 0) { + history->rsc->role = RSC_ROLE_PROMOTED; - } else if (!strcmp(task, CRMD_ACTION_DEMOTE)) { + } else if (strcmp(history->task, CRMD_ACTION_DEMOTE) == 0) { if (action->on_fail == action_fail_block) { - rsc->role = RSC_ROLE_PROMOTED; - pe__set_next_role(rsc, RSC_ROLE_STOPPED, + history->rsc->role = RSC_ROLE_PROMOTED; + pe__set_next_role(history->rsc, RSC_ROLE_STOPPED, "demote with on-fail=block"); - } else if(rc == PCMK_OCF_NOT_RUNNING) { - rsc->role = RSC_ROLE_STOPPED; + } else if (history->exit_status == PCMK_OCF_NOT_RUNNING) { + history->rsc->role = RSC_ROLE_STOPPED; } else { /* Staying in the promoted role would put the scheduler and * controller into a loop. Setting the role to unpromoted is not * dangerous because the resource will be stopped as part of * recovery, and any promotion will be ordered after that stop. */ - rsc->role = RSC_ROLE_UNPROMOTED; + history->rsc->role = RSC_ROLE_UNPROMOTED; } } - if(is_probe && rc == PCMK_OCF_NOT_INSTALLED) { + if (is_probe && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) { /* leave stopped */ - pe_rsc_trace(rsc, "Leaving %s stopped", rsc->id); - rsc->role = RSC_ROLE_STOPPED; + pe_rsc_trace(history->rsc, "Leaving %s stopped", history->rsc->id); + history->rsc->role = RSC_ROLE_STOPPED; - } else if (rsc->role < RSC_ROLE_STARTED) { - pe_rsc_trace(rsc, "Setting %s active", rsc->id); - set_active(rsc); + } else if (history->rsc->role < RSC_ROLE_STARTED) { + pe_rsc_trace(history->rsc, "Setting %s active", history->rsc->id); + set_active(history->rsc); } - pe_rsc_trace(rsc, "Resource %s: role=%s, unclean=%s, on_fail=%s, fail_role=%s", - rsc->id, role2text(rsc->role), - pcmk__btoa(node->details->unclean), + pe_rsc_trace(history->rsc, + "Resource %s: role=%s, unclean=%s, on_fail=%s, fail_role=%s", + history->rsc->id, role2text(history->rsc->role), + pcmk__btoa(history->node->details->unclean), fail2text(action->on_fail), role2text(action->fail_role)); - if (action->fail_role != RSC_ROLE_STARTED && rsc->next_role < action->fail_role) { - pe__set_next_role(rsc, action->fail_role, "failure"); + if ((action->fail_role != RSC_ROLE_STARTED) + && (history->rsc->next_role < action->fail_role)) { + pe__set_next_role(history->rsc, action->fail_role, "failure"); } if (action->fail_role == RSC_ROLE_STOPPED) { - int score = -INFINITY; - - pe_resource_t *fail_rsc = rsc; - - if (fail_rsc->parent) { - pe_resource_t *parent = uber_parent(fail_rsc); - - if (pe_rsc_is_clone(parent) - && !pcmk_is_set(parent->flags, pe_rsc_unique)) { - /* For clone resources, if a child fails on an operation - * with on-fail = stop, all the resources fail. Do this by preventing - * the parent from coming up again. */ - fail_rsc = parent; - } - } - crm_notice("%s will not be started under current conditions", - fail_rsc->id); - /* make sure it doesn't come up again */ - if (fail_rsc->allowed_nodes != NULL) { - g_hash_table_destroy(fail_rsc->allowed_nodes); - } - fail_rsc->allowed_nodes = pe__node_list2table(data_set->nodes); - g_hash_table_foreach(fail_rsc->allowed_nodes, set_node_score, &score); + ban_from_all_nodes(history->rsc); } pe_free_action(action); } /*! * \internal - * \brief Check whether a resource with a failed action can be recovered + * \brief Block a resource with a failed action if it cannot be recovered * * If resource action is a failed stop and fencing is not possible, mark the * resource as unmanaged and blocked, since recovery cannot be done. * - * \param[in,out] rsc Resource with failed action - * \param[in] node Node where action failed - * \param[in] task Name of action that failed - * \param[in] exit_status Exit status of failed action (for logging only) - * \param[in] xml_op XML of failed action result (for logging only) + * \param[in,out] history Parsed action history entry */ static void -check_recoverable(pe_resource_t *rsc, const pe_node_t *node, const char *task, - int exit_status, const xmlNode *xml_op) +block_if_unrecoverable(struct action_history *history) { - const char *exit_reason = NULL; char *last_change_s = NULL; - if (strcmp(task, CRMD_ACTION_STOP) != 0) { + if (strcmp(history->task, CRMD_ACTION_STOP) != 0) { return; // All actions besides stop are always recoverable } - if (pe_can_fence(node->details->data_set, node)) { + if (pe_can_fence(history->node->details->data_set, history->node)) { return; // Failed stops are recoverable via fencing } - exit_reason = crm_element_value(xml_op, XML_LRM_ATTR_EXIT_REASON); - last_change_s = last_change_str(xml_op); + last_change_s = last_change_str(history->xml); pe_proc_err("No further recovery can be attempted for %s " "because %s on %s failed (%s%s%s) at %s " - CRM_XS " rc=%d id=%s", rsc->id, task, pe__node_name(node), - services_ocf_exitcode_str(exit_status), - ((exit_reason == NULL)? "" : ": "), pcmk__s(exit_reason, ""), - last_change_s, exit_status, ID(xml_op)); + CRM_XS " rc=%d id=%s", + history->rsc->id, history->task, pe__node_name(history->node), + services_ocf_exitcode_str(history->exit_status), + (pcmk__str_empty(history->exit_reason)? "" : ": "), + pcmk__s(history->exit_reason, ""), + last_change_s, history->exit_status, history->id); free(last_change_s); - pe__clear_resource_flags(rsc, pe_rsc_managed); - pe__set_resource_flags(rsc, pe_rsc_block); + pe__clear_resource_flags(history->rsc, pe_rsc_managed); + pe__set_resource_flags(history->rsc, pe_rsc_block); } /*! * \internal - * \brief Update an integer value and why + * \brief Update action history's execution status and why * - * \param[in,out] i Pointer to integer to update - * \param[out] why Where to store reason for update - * \param[in] value New value - * \param[in] reason Description of why value was changed + * \param[in,out] history Parsed action history entry + * \param[out] why Where to store reason for update + * \param[in] value New value + * \param[in] reason Description of why value was changed */ static inline void -remap_because(int *i, const char **why, int value, const char *reason) +remap_because(struct action_history *history, const char **why, int value, + const char *reason) { - if (*i != value) { - *i = value; + if (history->execution_status != value) { + history->execution_status = value; *why = reason; } } /*! * \internal * \brief Remap informational monitor results and operation status * * For the monitor results, certain OCF codes are for providing extended information * to the user about services that aren't yet failed but not entirely healthy either. * These must be treated as the "normal" result by Pacemaker. * * For operation status, the action result can be used to determine an appropriate * status for the purposes of responding to the action. The status provided by the * executor is not directly usable since the executor does not know what was expected. * - * \param[in,out] xml_op Operation history entry XML from CIB status - * \param[in,out] rsc Resource that operation history entry is for - * \param[in] node Node where operation was executed - * \param[in,out] data_set Current cluster working set - * \param[in,out] on_fail What should be done about the result - * \param[in] target_rc Expected return code of operation - * \param[in,out] rc Actual return code of operation (treated as OCF) - * \param[in,out] status Operation execution status + * \param[in,out] history Parsed action history entry + * \param[in,out] on_fail What should be done about the result * * \note If the result is remapped and the node is not shutting down or failed, * the operation will be recorded in the data set's list of failed operations * to highlight it for the user. * * \note This may update the resource's current and next role. */ static void -remap_operation(xmlNode *xml_op, pe_resource_t *rsc, const pe_node_t *node, - pe_working_set_t *data_set, enum action_fail_response *on_fail, - int target_rc, int *rc, int *status) +remap_operation(struct action_history *history, + enum action_fail_response *on_fail) { bool is_probe = false; - int orig_exit_status = *rc; - int orig_exec_status = *status; + int orig_exit_status = history->exit_status; + int orig_exec_status = history->execution_status; const char *why = NULL; - const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); - const char *key = get_op_key(xml_op); - const char *exit_reason = crm_element_value(xml_op, - XML_LRM_ATTR_EXIT_REASON); + const char *task = history->task; char *last_change_s = NULL; if (pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_none)) { // Remap degraded results to their usual counterparts - *rc = pcmk__effective_rc(*rc); - if (*rc != orig_exit_status) { + history->exit_status = pcmk__effective_rc(history->exit_status); + if (history->exit_status != orig_exit_status) { why = "degraded monitor result"; - if (!node->details->shutdown || node->details->online) { - record_failed_op(xml_op, node, rsc, data_set); + if (!history->node->details->shutdown + || history->node->details->online) { + record_failed_op(history); } } } - if (!pe_rsc_is_bundled(rsc) && pcmk_xe_mask_probe_failure(xml_op)) { - if ((*status != PCMK_EXEC_DONE) || (*rc != PCMK_OCF_NOT_RUNNING)) { - *status = PCMK_EXEC_DONE; - *rc = PCMK_OCF_NOT_RUNNING; - why = "irrelevant probe result"; - } + if (!pe_rsc_is_bundled(history->rsc) + && pcmk_xe_mask_probe_failure(history->xml) + && ((history->execution_status != PCMK_EXEC_DONE) + || (history->exit_status != PCMK_OCF_NOT_RUNNING))) { + history->execution_status = PCMK_EXEC_DONE; + history->exit_status = PCMK_OCF_NOT_RUNNING; + why = "irrelevant probe result"; } - /* If the executor reported an operation status of anything but done or + /* If the executor reported an execution status of anything but done or * error, consider that final. But for done or error, we know better whether * it should be treated as a failure or not, because we know the expected * result. */ - switch (*status) { + switch (history->execution_status) { case PCMK_EXEC_DONE: case PCMK_EXEC_ERROR: break; - default: + + // These should be treated as node-fatal + case PCMK_EXEC_NO_FENCE_DEVICE: + case PCMK_EXEC_NO_SECRETS: + history->execution_status = PCMK_EXEC_ERROR_HARD; + why = "node-fatal error"; goto remap_done; - } - if (exit_reason == NULL) { - exit_reason = ""; + default: + goto remap_done; } - is_probe = pcmk_xe_is_probe(xml_op); + is_probe = pcmk_xe_is_probe(history->xml); if (is_probe) { task = "probe"; } - if (target_rc < 0) { + if (history->expected_exit_status < 0) { /* Pre-1.0 Pacemaker versions, and Pacemaker 1.1.6 or earlier with * Heartbeat 2.0.7 or earlier as the cluster layer, did not include the - * target_rc in the transition key, which (along with the similar case - * of a corrupted transition key in the CIB) will be reported to this - * function as -1. Pacemaker 2.0+ does not support rolling upgrades from - * those versions or processing of saved CIB files from those versions, - * so we do not need to care much about this case. + * expected exit status in the transition key, which (along with the + * similar case of a corrupted transition key in the CIB) will be + * reported to this function as -1. Pacemaker 2.0+ does not support + * rolling upgrades from those versions or processing of saved CIB files + * from those versions, so we do not need to care much about this case. */ - remap_because(status, &why, PCMK_EXEC_ERROR, "obsolete history format"); - crm_warn("Expected result not found for %s on %s (corrupt or obsolete CIB?)", - key, pe__node_name(node)); + remap_because(history, &why, PCMK_EXEC_ERROR, + "obsolete history format"); + crm_warn("Expected result not found for %s on %s " + "(corrupt or obsolete CIB?)", + history->key, pe__node_name(history->node)); - } else if (*rc == target_rc) { - remap_because(status, &why, PCMK_EXEC_DONE, "expected result"); + } else if (history->exit_status == history->expected_exit_status) { + remap_because(history, &why, PCMK_EXEC_DONE, "expected result"); } else { - remap_because(status, &why, PCMK_EXEC_ERROR, "unexpected result"); - pe_rsc_debug(rsc, "%s on %s: expected %d (%s), got %d (%s%s%s)", - key, pe__node_name(node), - target_rc, services_ocf_exitcode_str(target_rc), - *rc, services_ocf_exitcode_str(*rc), - (*exit_reason? ": " : ""), exit_reason); + remap_because(history, &why, PCMK_EXEC_ERROR, "unexpected result"); + pe_rsc_debug(history->rsc, + "%s on %s: expected %d (%s), got %d (%s%s%s)", + history->key, pe__node_name(history->node), + history->expected_exit_status, + services_ocf_exitcode_str(history->expected_exit_status), + history->exit_status, + services_ocf_exitcode_str(history->exit_status), + (pcmk__str_empty(history->exit_reason)? "" : ": "), + pcmk__s(history->exit_reason, "")); } - last_change_s = last_change_str(xml_op); + last_change_s = last_change_str(history->xml); - switch (*rc) { + switch (history->exit_status) { case PCMK_OCF_OK: - if (is_probe && (target_rc == PCMK_OCF_NOT_RUNNING)) { - remap_because(status, &why, PCMK_EXEC_DONE, "probe"); - pe_rsc_info(rsc, "Probe found %s active on %s at %s", - rsc->id, pe__node_name(node), last_change_s); + if (is_probe + && (history->expected_exit_status == PCMK_OCF_NOT_RUNNING)) { + remap_because(history, &why, PCMK_EXEC_DONE, "probe"); + pe_rsc_info(history->rsc, "Probe found %s active on %s at %s", + history->rsc->id, pe__node_name(history->node), + last_change_s); } break; case PCMK_OCF_NOT_RUNNING: - if (is_probe || (target_rc == *rc) - || !pcmk_is_set(rsc->flags, pe_rsc_managed)) { + if (is_probe + || (history->expected_exit_status == history->exit_status) + || !pcmk_is_set(history->rsc->flags, pe_rsc_managed)) { - remap_because(status, &why, PCMK_EXEC_DONE, "exit status"); - rsc->role = RSC_ROLE_STOPPED; + remap_because(history, &why, PCMK_EXEC_DONE, "exit status"); + history->rsc->role = RSC_ROLE_STOPPED; /* clear any previous failure actions */ *on_fail = action_fail_ignore; - pe__set_next_role(rsc, RSC_ROLE_UNKNOWN, "not running"); + pe__set_next_role(history->rsc, RSC_ROLE_UNKNOWN, + "not running"); } break; case PCMK_OCF_RUNNING_PROMOTED: - if (is_probe && (*rc != target_rc)) { - remap_because(status, &why, PCMK_EXEC_DONE, "probe"); - pe_rsc_info(rsc, + if (is_probe + && (history->exit_status != history->expected_exit_status)) { + remap_because(history, &why, PCMK_EXEC_DONE, "probe"); + pe_rsc_info(history->rsc, "Probe found %s active and promoted on %s at %s", - rsc->id, pe__node_name(node), last_change_s); + history->rsc->id, pe__node_name(history->node), + last_change_s); } - rsc->role = RSC_ROLE_PROMOTED; + history->rsc->role = RSC_ROLE_PROMOTED; break; case PCMK_OCF_DEGRADED_PROMOTED: case PCMK_OCF_FAILED_PROMOTED: - rsc->role = RSC_ROLE_PROMOTED; - remap_because(status, &why, PCMK_EXEC_ERROR, "exit status"); + history->rsc->role = RSC_ROLE_PROMOTED; + remap_because(history, &why, PCMK_EXEC_ERROR, "exit status"); break; case PCMK_OCF_NOT_CONFIGURED: - remap_because(status, &why, PCMK_EXEC_ERROR_FATAL, "exit status"); + remap_because(history, &why, PCMK_EXEC_ERROR_FATAL, "exit status"); break; case PCMK_OCF_UNIMPLEMENT_FEATURE: { guint interval_ms = 0; - crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, + crm_element_value_ms(history->xml, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); if (interval_ms == 0) { - check_recoverable(rsc, node, task, *rc, xml_op); - remap_because(status, &why, PCMK_EXEC_ERROR_HARD, + block_if_unrecoverable(history); + remap_because(history, &why, PCMK_EXEC_ERROR_HARD, "exit status"); } else { - remap_because(status, &why, PCMK_EXEC_NOT_SUPPORTED, + remap_because(history, &why, PCMK_EXEC_NOT_SUPPORTED, "exit status"); } } break; case PCMK_OCF_NOT_INSTALLED: case PCMK_OCF_INVALID_PARAM: case PCMK_OCF_INSUFFICIENT_PRIV: - check_recoverable(rsc, node, task, *rc, xml_op); - remap_because(status, &why, PCMK_EXEC_ERROR_HARD, "exit status"); + block_if_unrecoverable(history); + remap_because(history, &why, PCMK_EXEC_ERROR_HARD, "exit status"); break; default: - if (*status == PCMK_EXEC_DONE) { + if (history->execution_status == PCMK_EXEC_DONE) { crm_info("Treating unknown exit status %d from %s of %s " "on %s at %s as failure", - *rc, task, rsc->id, pe__node_name(node), - last_change_s); - remap_because(status, &why, PCMK_EXEC_ERROR, + history->exit_status, task, history->rsc->id, + pe__node_name(history->node), last_change_s); + remap_because(history, &why, PCMK_EXEC_ERROR, "unknown exit status"); } break; } free(last_change_s); remap_done: if (why != NULL) { - pe_rsc_trace(rsc, + pe_rsc_trace(history->rsc, "Remapped %s result from [%s: %s] to [%s: %s] " "because of %s", - key, pcmk_exec_status_str(orig_exec_status), + history->key, pcmk_exec_status_str(orig_exec_status), crm_exit_str(orig_exit_status), - pcmk_exec_status_str(*status), crm_exit_str(*rc), why); + pcmk_exec_status_str(history->execution_status), + crm_exit_str(history->exit_status), why); } } // return TRUE if start or monitor last failure but parameters changed static bool should_clear_for_param_change(const xmlNode *xml_op, const char *task, pe_resource_t *rsc, pe_node_t *node) { if (!strcmp(task, "start") || !strcmp(task, "monitor")) { if (pe__bundle_needs_remote_name(rsc)) { /* We haven't allocated resources yet, so we can't reliably * substitute addr parameters for the REMOTE_CONTAINER_HACK. * When that's needed, defer the check until later. */ pe__add_param_check(xml_op, rsc, node, pe_check_last_failure, rsc->cluster); } else { op_digest_cache_t *digest_data = NULL; digest_data = rsc_action_digest_cmp(rsc, xml_op, node, rsc->cluster); switch (digest_data->rc) { case RSC_DIGEST_UNKNOWN: crm_trace("Resource %s history entry %s on %s" " has no digest to compare", - rsc->id, get_op_key(xml_op), node->details->id); + rsc->id, pe__xe_history_key(xml_op), + node->details->id); break; case RSC_DIGEST_MATCH: break; default: return TRUE; } } } return FALSE; } // Order action after fencing of remote node, given connection rsc static void order_after_remote_fencing(pe_action_t *action, pe_resource_t *remote_conn, pe_working_set_t *data_set) { pe_node_t *remote_node = pe_find_node(data_set->nodes, remote_conn->id); if (remote_node) { pe_action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL, FALSE, data_set); order_actions(fence, action, pe_order_implies_then); } } static bool should_ignore_failure_timeout(const pe_resource_t *rsc, const char *task, guint interval_ms, bool is_last_failure) { /* Clearing failures of recurring monitors has special concerns. The * executor reports only changes in the monitor result, so if the * monitor is still active and still getting the same failure result, * that will go undetected after the failure is cleared. * * Also, the operation history will have the time when the recurring * monitor result changed to the given code, not the time when the * result last happened. * * @TODO We probably should clear such failures only when the failure * timeout has passed since the last occurrence of the failed result. * However we don't record that information. We could maybe approximate * that by clearing only if there is a more recent successful monitor or * stop result, but we don't even have that information at this point * since we are still unpacking the resource's operation history. * * This is especially important for remote connection resources with a * reconnect interval, so in that case, we skip clearing failures * if the remote node hasn't been fenced. */ if (rsc->remote_reconnect_ms && pcmk_is_set(rsc->cluster->flags, pe_flag_stonith_enabled) && (interval_ms != 0) && pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei)) { pe_node_t *remote_node = pe_find_node(rsc->cluster->nodes, rsc->id); if (remote_node && !remote_node->details->remote_was_fenced) { if (is_last_failure) { crm_info("Waiting to clear monitor failure for remote node %s" " until fencing has occurred", rsc->id); } return TRUE; } } return FALSE; } /*! * \internal * \brief Check operation age and schedule failure clearing when appropriate * * This function has two distinct purposes. The first is to check whether an * operation history entry is expired (i.e. the resource has a failure timeout, * the entry is older than the timeout, and the resource either has no fail * count or its fail count is entirely older than the timeout). The second is to * schedule fail count clearing when appropriate (i.e. the operation is expired * and either the resource has an expired fail count or the operation is a * last_failure for a remote connection resource with a reconnect interval, * or the operation is a last_failure for a start or monitor operation and the * resource's parameters have changed since the operation). * - * \param[in,out] rsc Resource that operation happened to - * \param[in,out] node Node that operation happened on - * \param[in] rc Actual result of operation - * \param[in] xml_op Operation history entry XML + * \param[in,out] history Parsed action result history * - * \return TRUE if operation history entry is expired, FALSE otherwise + * \return true if operation history entry is expired, otherwise false */ static bool -check_operation_expiry(pe_resource_t *rsc, pe_node_t *node, int rc, - const xmlNode *xml_op) +check_operation_expiry(struct action_history *history) { - bool expired = FALSE; - bool is_last_failure = pcmk__ends_with(ID(xml_op), "_last_failure_0"); + bool expired = false; + bool is_last_failure = pcmk__ends_with(history->id, "_last_failure_0"); time_t last_run = 0; - guint interval_ms = 0; int unexpired_fail_count = 0; - const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *clear_reason = NULL; - crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); - - if ((rsc->failure_timeout > 0) - && (crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE, + if ((history->rsc->failure_timeout > 0) + && (crm_element_value_epoch(history->xml, XML_RSC_OP_LAST_CHANGE, &last_run) == 0)) { // Resource has a failure-timeout, and history entry has a timestamp - time_t now = get_effective_time(rsc->cluster); + time_t now = get_effective_time(history->rsc->cluster); time_t last_failure = 0; // Is this particular operation history older than the failure timeout? - if ((now >= (last_run + rsc->failure_timeout)) - && !should_ignore_failure_timeout(rsc, task, interval_ms, + if ((now >= (last_run + history->rsc->failure_timeout)) + && !should_ignore_failure_timeout(history->rsc, history->task, + history->interval_ms, is_last_failure)) { - expired = TRUE; + expired = true; } // Does the resource as a whole have an unexpired fail count? - unexpired_fail_count = pe_get_failcount(node, rsc, &last_failure, - pe_fc_effective, xml_op); + unexpired_fail_count = pe_get_failcount(history->node, history->rsc, + &last_failure, pe_fc_effective, + history->xml); // Update scheduler recheck time according to *last* failure crm_trace("%s@%lld is %sexpired @%lld with unexpired_failures=%d timeout=%ds" " last-failure@%lld", - ID(xml_op), (long long) last_run, (expired? "" : "not "), - (long long) now, unexpired_fail_count, rsc->failure_timeout, - (long long) last_failure); - last_failure += rsc->failure_timeout + 1; + history->id, (long long) last_run, (expired? "" : "not "), + (long long) now, unexpired_fail_count, + history->rsc->failure_timeout, (long long) last_failure); + last_failure += history->rsc->failure_timeout + 1; if (unexpired_fail_count && (now < last_failure)) { - pe__update_recheck_time(last_failure, rsc->cluster); + pe__update_recheck_time(last_failure, history->rsc->cluster); } } if (expired) { - if (pe_get_failcount(node, rsc, NULL, pe_fc_default, xml_op)) { - + if (pe_get_failcount(history->node, history->rsc, NULL, pe_fc_default, + history->xml)) { // There is a fail count ignoring timeout if (unexpired_fail_count == 0) { // There is no fail count considering timeout clear_reason = "it expired"; } else { /* This operation is old, but there is an unexpired fail count. * In a properly functioning cluster, this should only be * possible if this operation is not a failure (otherwise the * fail count should be expired too), so this is really just a * failsafe. */ - expired = FALSE; + expired = false; } - } else if (is_last_failure && rsc->remote_reconnect_ms) { + } else if (is_last_failure + && (history->rsc->remote_reconnect_ms != 0)) { /* Clear any expired last failure when reconnect interval is set, * even if there is no fail count. */ clear_reason = "reconnect interval is set"; } } if (!expired && is_last_failure - && should_clear_for_param_change(xml_op, task, rsc, node)) { + && should_clear_for_param_change(history->xml, history->task, + history->rsc, history->node)) { clear_reason = "resource parameters have changed"; } if (clear_reason != NULL) { // Schedule clearing of the fail count - pe_action_t *clear_op = pe__clear_failcount(rsc, node, clear_reason, - rsc->cluster); + pe_action_t *clear_op = pe__clear_failcount(history->rsc, history->node, + clear_reason, + history->rsc->cluster); - if (pcmk_is_set(rsc->cluster->flags, pe_flag_stonith_enabled) - && rsc->remote_reconnect_ms) { + if (pcmk_is_set(history->rsc->cluster->flags, pe_flag_stonith_enabled) + && (history->rsc->remote_reconnect_ms != 0)) { /* If we're clearing a remote connection due to a reconnect * interval, we want to wait until any scheduled fencing * completes. * * We could limit this to remote_node->details->unclean, but at * this point, that's always true (it won't be reliable until * after unpack_node_history() is done). */ crm_info("Clearing %s failure will wait until any scheduled " - "fencing of %s completes", task, rsc->id); - order_after_remote_fencing(clear_op, rsc, rsc->cluster); + "fencing of %s completes", + history->task, history->rsc->id); + order_after_remote_fencing(clear_op, history->rsc, + history->rsc->cluster); } } - if (expired && (interval_ms == 0) && pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei)) { - switch(rc) { + if (expired && (history->interval_ms == 0) + && pcmk__str_eq(history->task, CRMD_ACTION_STATUS, pcmk__str_none)) { + switch (history->exit_status) { case PCMK_OCF_OK: case PCMK_OCF_NOT_RUNNING: case PCMK_OCF_RUNNING_PROMOTED: case PCMK_OCF_DEGRADED: case PCMK_OCF_DEGRADED_PROMOTED: // Don't expire probes that return these values - expired = FALSE; + expired = false; break; } } return expired; } int pe__target_rc_from_xml(const xmlNode *xml_op) { int target_rc = 0; const char *key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY); if (key == NULL) { return -1; } decode_transition_key(key, NULL, NULL, NULL, &target_rc); return target_rc; } +/*! + * \internal + * \brief Get the failure handling for an action + * + * \param[in,out] history Parsed action history entry + * + * \return Failure handling appropriate to action + */ static enum action_fail_response -get_action_on_fail(pe_resource_t *rsc, const char *key, const char *task, pe_working_set_t * data_set) +get_action_on_fail(struct action_history *history) { enum action_fail_response result = action_fail_recover; - pe_action_t *action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set); + pe_action_t *action = custom_action(history->rsc, strdup(history->key), + history->task, NULL, TRUE, FALSE, + history->rsc->cluster); result = action->on_fail; pe_free_action(action); - return result; } +/*! + * \internal + * \brief Update a resource's state for an action result + * + * \param[in,out] history Parsed action history entry + * \param[in] exit_status Exit status to base new state on + * \param[in] last_failure Resource's last_failure entry, if known + * \param[in,out] on_fail Resource's current failure handling + */ static void -update_resource_state(pe_resource_t *rsc, const pe_node_t *node, - const xmlNode *xml_op, const char *task, int rc, - xmlNode *last_failure, enum action_fail_response *on_fail, - pe_working_set_t *data_set) +update_resource_state(struct action_history *history, int exit_status, + const xmlNode *last_failure, + enum action_fail_response *on_fail) { - gboolean clear_past_failure = FALSE; - - CRM_ASSERT(rsc); - CRM_ASSERT(xml_op); + bool clear_past_failure = false; + + if ((exit_status == PCMK_OCF_NOT_INSTALLED) + || (!pe_rsc_is_bundled(history->rsc) + && pcmk_xe_mask_probe_failure(history->xml))) { + history->rsc->role = RSC_ROLE_STOPPED; + + } else if (exit_status == PCMK_OCF_NOT_RUNNING) { + clear_past_failure = true; + + } else if (pcmk__str_eq(history->task, CRMD_ACTION_STATUS, + pcmk__str_none)) { + if ((last_failure != NULL) + && pcmk__str_eq(history->key, pe__xe_history_key(last_failure), + pcmk__str_none)) { + clear_past_failure = true; + } + if (history->rsc->role < RSC_ROLE_STARTED) { + set_active(history->rsc); + } - if (rc == PCMK_OCF_NOT_INSTALLED || (!pe_rsc_is_bundled(rsc) && pcmk_xe_mask_probe_failure(xml_op))) { - rsc->role = RSC_ROLE_STOPPED; + } else if (pcmk__str_eq(history->task, CRMD_ACTION_START, pcmk__str_none)) { + history->rsc->role = RSC_ROLE_STARTED; + clear_past_failure = true; - } else if (rc == PCMK_OCF_NOT_RUNNING) { - clear_past_failure = TRUE; + } else if (pcmk__str_eq(history->task, CRMD_ACTION_STOP, pcmk__str_none)) { + history->rsc->role = RSC_ROLE_STOPPED; + clear_past_failure = true; - } else if (pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei)) { - if (last_failure) { - const char *op_key = get_op_key(xml_op); - const char *last_failure_key = get_op_key(last_failure); + } else if (pcmk__str_eq(history->task, CRMD_ACTION_PROMOTE, + pcmk__str_none)) { + history->rsc->role = RSC_ROLE_PROMOTED; + clear_past_failure = true; - if (pcmk__str_eq(op_key, last_failure_key, pcmk__str_casei)) { - clear_past_failure = TRUE; - } + } else if (pcmk__str_eq(history->task, CRMD_ACTION_DEMOTE, + pcmk__str_none)) { + if (*on_fail == action_fail_demote) { + // Demote clears an error only if on-fail=demote + clear_past_failure = true; } + history->rsc->role = RSC_ROLE_UNPROMOTED; - if (rsc->role < RSC_ROLE_STARTED) { - set_active(rsc); - } + } else if (pcmk__str_eq(history->task, CRMD_ACTION_MIGRATED, + pcmk__str_none)) { + history->rsc->role = RSC_ROLE_STARTED; + clear_past_failure = true; - } else if (pcmk__str_eq(task, CRMD_ACTION_START, pcmk__str_casei)) { - rsc->role = RSC_ROLE_STARTED; - clear_past_failure = TRUE; + } else if (pcmk__str_eq(history->task, CRMD_ACTION_MIGRATE, + pcmk__str_none)) { + unpack_migrate_to_success(history); - } else if (pcmk__str_eq(task, CRMD_ACTION_STOP, pcmk__str_casei)) { - rsc->role = RSC_ROLE_STOPPED; - clear_past_failure = TRUE; + } else if (history->rsc->role < RSC_ROLE_STARTED) { + pe_rsc_trace(history->rsc, "%s active on %s", + history->rsc->id, pe__node_name(history->node)); + set_active(history->rsc); + } - } else if (pcmk__str_eq(task, CRMD_ACTION_PROMOTE, pcmk__str_casei)) { - rsc->role = RSC_ROLE_PROMOTED; - clear_past_failure = TRUE; + if (!clear_past_failure) { + return; + } - } else if (pcmk__str_eq(task, CRMD_ACTION_DEMOTE, pcmk__str_casei)) { + switch (*on_fail) { + case action_fail_stop: + case action_fail_fence: + case action_fail_migrate: + case action_fail_standby: + pe_rsc_trace(history->rsc, + "%s (%s) is not cleared by a completed %s", + history->rsc->id, fail2text(*on_fail), history->task); + break; - if (*on_fail == action_fail_demote) { - // Demote clears an error only if on-fail=demote - clear_past_failure = TRUE; + case action_fail_block: + case action_fail_ignore: + case action_fail_demote: + case action_fail_recover: + case action_fail_restart_container: + *on_fail = action_fail_ignore; + pe__set_next_role(history->rsc, RSC_ROLE_UNKNOWN, + "clear past failures"); + break; + + case action_fail_reset_remote: + if (history->rsc->remote_reconnect_ms == 0) { + /* With no reconnect interval, the connection is allowed to + * start again after the remote node is fenced and + * completely stopped. (With a reconnect interval, we wait + * for the failure to be cleared entirely before attempting + * to reconnect.) + */ + *on_fail = action_fail_ignore; + pe__set_next_role(history->rsc, RSC_ROLE_UNKNOWN, + "clear past failures and reset remote"); + } + break; + } +} + +/*! + * \internal + * \brief Check whether a given history entry matters for resource state + * + * \param[in] history Parsed action history entry + * + * \return true if action can affect resource state, otherwise false + */ +static inline bool +can_affect_state(struct action_history *history) +{ +#if 0 + /* @COMPAT It might be better to parse only actions we know we're interested + * in, rather than exclude a couple we don't. However that would be a + * behavioral change that should be done at a major or minor series release. + * Currently, unknown operations can affect whether a resource is considered + * active and/or failed. + */ + return pcmk__str_any_of(history->task, CRMD_ACTION_STATUS, + CRMD_ACTION_START, CRMD_ACTION_STOP, + CRMD_ACTION_PROMOTE, CRMD_ACTION_DEMOTE, + CRMD_ACTION_MIGRATE, CRMD_ACTION_MIGRATED, + "asyncmon", NULL); +#else + return !pcmk__str_any_of(history->task, CRMD_ACTION_NOTIFY, + CRMD_ACTION_METADATA, NULL); +#endif +} + +/*! + * \internal + * \brief Unpack execution/exit status and exit reason from a history entry + * + * \param[in,out] history Action history entry to unpack + * + * \return Standard Pacemaker return code + */ +static int +unpack_action_result(struct action_history *history) +{ + if ((crm_element_value_int(history->xml, XML_LRM_ATTR_OPSTATUS, + &(history->execution_status)) < 0) + || (history->execution_status < PCMK_EXEC_PENDING) + || (history->execution_status > PCMK_EXEC_MAX) + || (history->execution_status == PCMK_EXEC_CANCELLED)) { + crm_err("Ignoring resource history entry %s for %s on %s " + "with invalid " XML_LRM_ATTR_OPSTATUS " '%s'", + history->id, history->rsc->id, pe__node_name(history->node), + pcmk__s(crm_element_value(history->xml, XML_LRM_ATTR_OPSTATUS), + "")); + return pcmk_rc_unpack_error; + } + if ((crm_element_value_int(history->xml, XML_LRM_ATTR_RC, + &(history->exit_status)) < 0) + || (history->exit_status < 0) || (history->exit_status > CRM_EX_MAX)) { +#if 0 + /* @COMPAT We should ignore malformed entries, but since that would + * change behavior, it should be done at a major or minor series + * release. + */ + crm_err("Ignoring resource history entry %s for %s on %s " + "with invalid " XML_LRM_ATTR_RC " '%s'", + history->id, history->rsc->id, pe__node_name(history->node), + pcmk__s(crm_element_value(history->xml, XML_LRM_ATTR_RC), + "")); + return pcmk_rc_unpack_error; +#else + history->exit_status = CRM_EX_ERROR; +#endif + } + history->exit_reason = crm_element_value(history->xml, + XML_LRM_ATTR_EXIT_REASON); + return pcmk_rc_ok; +} + +/*! + * \internal + * \brief Process an action history entry whose result expired + * + * \param[in,out] history Parsed action history entry + * \param[in] orig_exit_status Action exit status before remapping + * + * \return Standard Pacemaker return code (in particular, pcmk_rc_ok means the + * entry needs no further processing) + */ +static int +process_expired_result(struct action_history *history, int orig_exit_status) +{ + if (!pe_rsc_is_bundled(history->rsc) + && pcmk_xe_mask_probe_failure(history->xml) + && (orig_exit_status != history->expected_exit_status)) { + + if (history->rsc->role <= RSC_ROLE_STOPPED) { + history->rsc->role = RSC_ROLE_UNKNOWN; } - rsc->role = RSC_ROLE_UNPROMOTED; + crm_trace("Ignoring resource history entry %s for probe of %s on %s: " + "Masked failure expired", + history->id, history->rsc->id, + pe__node_name(history->node)); + return pcmk_rc_ok; + } - } else if (pcmk__str_eq(task, CRMD_ACTION_MIGRATED, pcmk__str_casei)) { - rsc->role = RSC_ROLE_STARTED; - clear_past_failure = TRUE; + if (history->exit_status == history->expected_exit_status) { + return pcmk_rc_undetermined; // Only failures expire + } - } else if (pcmk__str_eq(task, CRMD_ACTION_MIGRATE, pcmk__str_casei)) { - unpack_migrate_to_success(rsc, node, xml_op); + if (history->interval_ms == 0) { + crm_notice("Ignoring resource history entry %s for %s of %s on %s: " + "Expired failure", + history->id, history->task, history->rsc->id, + pe__node_name(history->node)); + return pcmk_rc_ok; + } - } else if (rsc->role < RSC_ROLE_STARTED) { - pe_rsc_trace(rsc, "%s active on %s", rsc->id, pe__node_name(node)); - set_active(rsc); + if (history->node->details->online && !history->node->details->unclean) { + /* Reschedule the recurring action. schedule_cancel() won't work at + * this stage, so as a hacky workaround, forcibly change the restart + * digest so pcmk__check_action_config() does what we want later. + * + * @TODO We should skip this if there is a newer successful monitor. + * Also, this causes rescheduling only if the history entry + * has an op-digest (which the expire-non-blocked-failure + * scheduler regression test doesn't, but that may not be a + * realistic scenario in production). + */ + crm_notice("Rescheduling %s-interval %s of %s on %s " + "after failure expired", + pcmk__readable_interval(history->interval_ms), history->task, + history->rsc->id, pe__node_name(history->node)); + crm_xml_add(history->xml, XML_LRM_ATTR_RESTART_DIGEST, + "calculated-failure-timeout"); + return pcmk_rc_ok; } - /* clear any previous failure actions */ - if (clear_past_failure) { - switch (*on_fail) { - case action_fail_stop: - case action_fail_fence: - case action_fail_migrate: - case action_fail_standby: - pe_rsc_trace(rsc, "%s.%s is not cleared by a completed stop", - rsc->id, fail2text(*on_fail)); - break; + return pcmk_rc_undetermined; +} - case action_fail_block: - case action_fail_ignore: - case action_fail_demote: - case action_fail_recover: - case action_fail_restart_container: - *on_fail = action_fail_ignore; - pe__set_next_role(rsc, RSC_ROLE_UNKNOWN, "clear past failures"); - break; - case action_fail_reset_remote: - if (rsc->remote_reconnect_ms == 0) { - /* With no reconnect interval, the connection is allowed to - * start again after the remote node is fenced and - * completely stopped. (With a reconnect interval, we wait - * for the failure to be cleared entirely before attempting - * to reconnect.) - */ - *on_fail = action_fail_ignore; - pe__set_next_role(rsc, RSC_ROLE_UNKNOWN, - "clear past failures and reset remote"); - } - break; +/*! + * \internal + * \brief Process a masked probe failure + * + * \param[in,out] history Parsed action history entry + * \param[in] orig_exit_status Action exit status before remapping + * \param[in] last_failure Resource's last_failure entry, if known + * \param[in,out] on_fail Resource's current failure handling + */ +static void +mask_probe_failure(struct action_history *history, int orig_exit_status, + const xmlNode *last_failure, + enum action_fail_response *on_fail) +{ + pe_resource_t *ban_rsc = history->rsc; + + if (!pcmk_is_set(history->rsc->flags, pe_rsc_unique)) { + ban_rsc = uber_parent(history->rsc); + } + + crm_notice("Treating probe result '%s' for %s on %s as 'not running'", + services_ocf_exitcode_str(orig_exit_status), history->rsc->id, + pe__node_name(history->node)); + update_resource_state(history, history->expected_exit_status, last_failure, + on_fail); + crm_xml_add(history->xml, XML_ATTR_UNAME, history->node->details->uname); + + record_failed_op(history); + resource_location(ban_rsc, history->node, -INFINITY, "masked-probe-failure", + history->rsc->cluster); +} + +/*! + * \internal + * \brief Update a resource's role etc. for a pending action + * + * \param[in,out] history Parsed history entry for pending action + */ +static void +process_pending_action(struct action_history *history) +{ + if (strcmp(history->task, CRMD_ACTION_START) == 0) { + pe__set_resource_flags(history->rsc, pe_rsc_start_pending); + set_active(history->rsc); + + } else if (strcmp(history->task, CRMD_ACTION_PROMOTE) == 0) { + history->rsc->role = RSC_ROLE_PROMOTED; + + } else if ((strcmp(history->task, CRMD_ACTION_MIGRATE) == 0) + && history->node->details->unclean) { + /* A migrate_to action is pending on a unclean source, so force a stop + * on the target. + */ + const char *migrate_target = NULL; + pe_node_t *target = NULL; + + migrate_target = crm_element_value(history->xml, + XML_LRM_ATTR_MIGRATE_TARGET); + target = pe_find_node(history->rsc->cluster->nodes, migrate_target); + if (target != NULL) { + stop_action(history->rsc, target, FALSE); } } + + if (history->rsc->pending_task != NULL) { + /* There should never be multiple pending actions, but as a failsafe, + * just remember the first one processed for display purposes. + */ + return; + } + + if (pcmk_is_probe(history->task, history->interval_ms)) { + /* Pending probes are currently never displayed, even if pending + * operations are requested. If we ever want to change that, + * enable the below and the corresponding part of + * native.c:native_pending_task(). + */ +#if 0 + history->rsc->pending_task = strdup("probe"); + history->rsc->pending_node = history->node; +#endif + } else { + history->rsc->pending_task = strdup(history->task); + history->rsc->pending_node = history->node; + } } static void unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - xmlNode **last_failure, enum action_fail_response *on_fail, - pe_working_set_t *data_set) + xmlNode **last_failure, enum action_fail_response *on_fail) { - int rc = 0; int old_rc = 0; - int task_id = 0; - int target_rc = 0; - int old_target_rc = 0; - int status = PCMK_EXEC_UNKNOWN; - guint interval_ms = 0; - const char *task = NULL; - const char *task_key = NULL; - const char *exit_reason = NULL; bool expired = false; pe_resource_t *parent = rsc; enum action_fail_response failure_strategy = action_fail_recover; - bool maskable_probe_failure = false; - char *last_change_s = NULL; + + struct action_history history = { + .rsc = rsc, + .node = node, + .xml = xml_op, + .execution_status = PCMK_EXEC_UNKNOWN, + }; CRM_CHECK(rsc && node && xml_op, return); - target_rc = pe__target_rc_from_xml(xml_op); - task_key = get_op_key(xml_op); - task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); - exit_reason = crm_element_value(xml_op, XML_LRM_ATTR_EXIT_REASON); - if (exit_reason == NULL) { - exit_reason = ""; + history.id = ID(xml_op); + if (history.id == NULL) { + crm_err("Ignoring resource history entry for %s on %s without ID", + rsc->id, pe__node_name(node)); + return; } - crm_element_value_int(xml_op, XML_LRM_ATTR_RC, &rc); - crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id); - crm_element_value_int(xml_op, XML_LRM_ATTR_OPSTATUS, &status); - crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); - - CRM_CHECK(task != NULL, return); - CRM_CHECK((status >= PCMK_EXEC_PENDING) && (status <= PCMK_EXEC_MAX), - return); - - if (!strcmp(task, CRMD_ACTION_NOTIFY) || - !strcmp(task, CRMD_ACTION_METADATA)) { - /* safe to ignore these */ + // Task and interval + history.task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); + if (history.task == NULL) { + crm_err("Ignoring resource history entry %s for %s on %s without " + XML_LRM_ATTR_TASK, history.id, rsc->id, pe__node_name(node)); + return; + } + crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, + &(history.interval_ms)); + if (!can_affect_state(&history)) { + pe_rsc_trace(rsc, + "Ignoring resource history entry %s for %s on %s " + "with irrelevant action '%s'", + history.id, rsc->id, pe__node_name(node), history.task); return; } - if (!pcmk_is_set(rsc->flags, pe_rsc_unique)) { - parent = uber_parent(rsc); + if (unpack_action_result(&history) != pcmk_rc_ok) { + return; // Error already logged } + history.expected_exit_status = pe__target_rc_from_xml(xml_op); + history.key = pe__xe_history_key(xml_op); + crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &(history.call_id)); + pe_rsc_trace(rsc, "Unpacking %s (%s call %d on %s): %s (%s)", - ID(xml_op), task, task_id, pe__node_name(node), - pcmk_exec_status_str(status), crm_exit_str(rc)); + history.id, history.task, history.call_id, pe__node_name(node), + pcmk_exec_status_str(history.execution_status), + crm_exit_str(history.exit_status)); if (node->details->unclean) { pe_rsc_trace(rsc, "%s is running on %s, which is unclean (further action " "depends on value of stop's on-fail attribute)", rsc->id, pe__node_name(node)); } /* It should be possible to call remap_operation() first then call - * check_operation_expiry() only if rc != target_rc, because there should - * never be a fail count without at least one unexpected result in the - * resource history. That would be more efficient by avoiding having to call - * check_operation_expiry() for expected results. + * check_operation_expiry() only if exit_status != expected_exit_status, + * because there should never be a fail count without at least one + * unexpected result in the resource history. That would be more efficient + * by avoiding having to call check_operation_expiry() for expected results. * * However, we do have such configurations in the scheduler regression * tests, even if it shouldn't be possible with the current code. It's * probably a good idea anyway, but that would require updating the test * inputs to something currently possible. */ - if ((status != PCMK_EXEC_NOT_INSTALLED) - && check_operation_expiry(rsc, node, rc, xml_op)) { + if ((history.execution_status != PCMK_EXEC_NOT_INSTALLED) + && check_operation_expiry(&history)) { expired = true; } - old_rc = rc; - old_target_rc = target_rc; - - remap_operation(xml_op, rsc, node, data_set, on_fail, target_rc, - &rc, &status); - - maskable_probe_failure = !pe_rsc_is_bundled(rsc) && pcmk_xe_mask_probe_failure(xml_op); + old_rc = history.exit_status; - last_change_s = last_change_str(xml_op); - - if (expired && maskable_probe_failure && old_rc != old_target_rc) { - if (rsc->role <= RSC_ROLE_STOPPED) { - rsc->role = RSC_ROLE_UNKNOWN; - } + remap_operation(&history, on_fail); + if (expired && (process_expired_result(&history, old_rc) == pcmk_rc_ok)) { goto done; - - } else if (expired && (rc != target_rc)) { - const char *magic = crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC); - - if (interval_ms == 0) { - crm_notice("Ignoring expired %s failure on %s " - CRM_XS " actual=%d expected=%d magic=%s", - task_key, pe__node_name(node), rc, target_rc, magic); - goto done; - - } else if(node->details->online && node->details->unclean == FALSE) { - /* Reschedule the recurring monitor. schedule_cancel() won't work at - * this stage, so as a hacky workaround, forcibly change the restart - * digest so pcmk__check_action_config() does what we want later. - * - * @TODO We should skip this if there is a newer successful monitor. - * Also, this causes rescheduling only if the history entry - * has an op-digest (which the expire-non-blocked-failure - * scheduler regression test doesn't, but that may not be a - * realistic scenario in production). - */ - crm_notice("Rescheduling %s after failure expired on %s " - CRM_XS " actual=%d expected=%d magic=%s", - task_key, pe__node_name(node), rc, target_rc, magic); - crm_xml_add(xml_op, XML_LRM_ATTR_RESTART_DIGEST, "calculated-failure-timeout"); - goto done; - } } - if (maskable_probe_failure) { - crm_notice("Treating probe result '%s' for %s on %s as 'not running'", - services_ocf_exitcode_str(old_rc), rsc->id, - pe__node_name(node)); - update_resource_state(rsc, node, xml_op, task, target_rc, *last_failure, - on_fail, data_set); - crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname); - - record_failed_op(xml_op, node, rsc, data_set); - resource_location(parent, node, -INFINITY, "masked-probe-failure", data_set); + if (!pe_rsc_is_bundled(rsc) && pcmk_xe_mask_probe_failure(xml_op)) { + mask_probe_failure(&history, old_rc, *last_failure, on_fail); goto done; } - switch (status) { - case PCMK_EXEC_CANCELLED: - // Should never happen - pe_err("Resource history contains cancellation '%s' " - "(%s of %s on %s at %s)", - ID(xml_op), task, rsc->id, pe__node_name(node), - last_change_s); - goto done; + if (!pcmk_is_set(rsc->flags, pe_rsc_unique)) { + parent = uber_parent(rsc); + } + switch (history.execution_status) { case PCMK_EXEC_PENDING: - if (!strcmp(task, CRMD_ACTION_START)) { - pe__set_resource_flags(rsc, pe_rsc_start_pending); - set_active(rsc); - - } else if (!strcmp(task, CRMD_ACTION_PROMOTE)) { - rsc->role = RSC_ROLE_PROMOTED; - - } else if (!strcmp(task, CRMD_ACTION_MIGRATE) && node->details->unclean) { - /* If a pending migrate_to action is out on a unclean node, - * we have to force the stop action on the target. */ - const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); - pe_node_t *target = pe_find_node(data_set->nodes, migrate_target); - if (target) { - stop_action(rsc, target, FALSE); - } - } - - if (rsc->pending_task == NULL) { - if ((interval_ms != 0) || strcmp(task, CRMD_ACTION_STATUS)) { - rsc->pending_task = strdup(task); - - /* @COMPAT I don't like breaking const signatures, but - * rsc->pending_node should really be const -- we just can't - * change it until the next API compatibilit break. - */ - rsc->pending_node = (pe_node_t *) node; - } else { - /* Pending probes are not printed, even if pending - * operations are requested. If someone ever requests that - * behavior, enable the below and the corresponding part of - * native.c:native_pending_task(). - */ -#if 0 - rsc->pending_task = strdup("probe"); - rsc->pending_node = (pe_node_t *) node; -#endif - } - } + process_pending_action(&history); goto done; case PCMK_EXEC_DONE: - update_resource_state(rsc, node, xml_op, task, rc, *last_failure, on_fail, data_set); + update_resource_state(&history, history.exit_status, *last_failure, + on_fail); goto done; case PCMK_EXEC_NOT_INSTALLED: - failure_strategy = get_action_on_fail(rsc, task_key, task, data_set); + failure_strategy = get_action_on_fail(&history); if (failure_strategy == action_fail_ignore) { crm_warn("Cannot ignore failed %s of %s on %s: " "Resource agent doesn't exist " CRM_XS " status=%d rc=%d id=%s", - task, rsc->id, pe__node_name(node), status, rc, - ID(xml_op)); + history.task, rsc->id, pe__node_name(node), + history.execution_status, history.exit_status, + history.id); /* Also for printing it as "FAILED" by marking it as pe_rsc_failed later */ *on_fail = action_fail_migrate; } - resource_location(parent, node, -INFINITY, "hard-error", data_set); - unpack_rsc_op_failure(rsc, node, rc, xml_op, last_failure, on_fail, data_set); + resource_location(parent, node, -INFINITY, "hard-error", + rsc->cluster); + unpack_rsc_op_failure(&history, last_failure, on_fail); goto done; case PCMK_EXEC_NOT_CONNECTED: if (pe__is_guest_or_remote_node(node) && pcmk_is_set(node->details->remote_rsc->flags, pe_rsc_managed)) { /* We should never get into a situation where a managed remote * connection resource is considered OK but a resource action * behind the connection gets a "not connected" status. But as a * fail-safe in case a bug or unusual circumstances do lead to * that, ensure the remote connection is considered failed. */ pe__set_resource_flags(node->details->remote_rsc, pe_rsc_failed|pe_rsc_stop); } break; // Not done, do error handling case PCMK_EXEC_ERROR: case PCMK_EXEC_ERROR_HARD: case PCMK_EXEC_ERROR_FATAL: case PCMK_EXEC_TIMEOUT: case PCMK_EXEC_NOT_SUPPORTED: case PCMK_EXEC_INVALID: break; // Not done, do error handling - case PCMK_EXEC_NO_FENCE_DEVICE: - case PCMK_EXEC_NO_SECRETS: - status = PCMK_EXEC_ERROR_HARD; - break; // Not done, do error handling + default: // No other value should be possible at this point + break; } - failure_strategy = get_action_on_fail(rsc, task_key, task, data_set); + failure_strategy = get_action_on_fail(&history); if ((failure_strategy == action_fail_ignore) || (failure_strategy == action_fail_restart_container - && !strcmp(task, CRMD_ACTION_STOP))) { + && (strcmp(history.task, CRMD_ACTION_STOP) == 0))) { - crm_warn("Pretending failed %s (%s%s%s) of %s on %s at %s " - "succeeded " CRM_XS " rc=%d id=%s", - task, services_ocf_exitcode_str(rc), - (*exit_reason? ": " : ""), exit_reason, rsc->id, - pe__node_name(node), last_change_s, rc, ID(xml_op)); + char *last_change_s = last_change_str(xml_op); - update_resource_state(rsc, node, xml_op, task, target_rc, *last_failure, - on_fail, data_set); + crm_warn("Pretending failed %s (%s%s%s) of %s on %s at %s succeeded " + CRM_XS " %s", + history.task, services_ocf_exitcode_str(history.exit_status), + (pcmk__str_empty(history.exit_reason)? "" : ": "), + pcmk__s(history.exit_reason, ""), rsc->id, pe__node_name(node), + last_change_s, history.id); + free(last_change_s); + + update_resource_state(&history, history.expected_exit_status, + *last_failure, on_fail); crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname); pe__set_resource_flags(rsc, pe_rsc_failure_ignored); - record_failed_op(xml_op, node, rsc, data_set); + record_failed_op(&history); if ((failure_strategy == action_fail_restart_container) && cmp_on_fail(*on_fail, action_fail_recover) <= 0) { *on_fail = failure_strategy; } } else { - unpack_rsc_op_failure(rsc, node, rc, xml_op, last_failure, on_fail, - data_set); + unpack_rsc_op_failure(&history, last_failure, on_fail); + + if (history.execution_status == PCMK_EXEC_ERROR_HARD) { + uint8_t log_level = LOG_ERR; - if (status == PCMK_EXEC_ERROR_HARD) { - do_crm_log(rc != PCMK_OCF_NOT_INSTALLED?LOG_ERR:LOG_NOTICE, + if (history.exit_status == PCMK_OCF_NOT_INSTALLED) { + log_level = LOG_NOTICE; + } + do_crm_log(log_level, "Preventing %s from restarting on %s because " - "of hard failure (%s%s%s)" CRM_XS " rc=%d id=%s", + "of hard failure (%s%s%s) " CRM_XS " %s", parent->id, pe__node_name(node), - services_ocf_exitcode_str(rc), - (*exit_reason? ": " : ""), exit_reason, - rc, ID(xml_op)); - resource_location(parent, node, -INFINITY, "hard-error", data_set); + services_ocf_exitcode_str(history.exit_status), + (pcmk__str_empty(history.exit_reason)? "" : ": "), + pcmk__s(history.exit_reason, ""), history.id); + resource_location(parent, node, -INFINITY, "hard-error", + rsc->cluster); - } else if (status == PCMK_EXEC_ERROR_FATAL) { + } else if (history.execution_status == PCMK_EXEC_ERROR_FATAL) { crm_err("Preventing %s from restarting anywhere because " - "of fatal failure (%s%s%s) " CRM_XS " rc=%d id=%s", - parent->id, services_ocf_exitcode_str(rc), - (*exit_reason? ": " : ""), exit_reason, - rc, ID(xml_op)); - resource_location(parent, NULL, -INFINITY, "fatal-error", data_set); + "of fatal failure (%s%s%s) " CRM_XS " %s", + parent->id, services_ocf_exitcode_str(history.exit_status), + (pcmk__str_empty(history.exit_reason)? "" : ": "), + pcmk__s(history.exit_reason, ""), history.id); + resource_location(parent, NULL, -INFINITY, "fatal-error", + rsc->cluster); } } done: - free(last_change_s); pe_rsc_trace(rsc, "%s role on %s after %s is %s (next %s)", - rsc->id, pe__node_name(node), ID(xml_op), + rsc->id, pe__node_name(node), history.id, role2text(rsc->role), role2text(rsc->next_role)); } static void add_node_attrs(const xmlNode *xml_obj, pe_node_t *node, bool overwrite, pe_working_set_t *data_set) { const char *cluster_name = NULL; pe_rule_eval_data_t rule_data = { .node_hash = NULL, .role = RSC_ROLE_UNKNOWN, .now = data_set->now, .match_data = NULL, .rsc_data = NULL, .op_data = NULL }; g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_UNAME), strdup(node->details->uname)); g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_ID), strdup(node->details->id)); if (pcmk__str_eq(node->details->id, data_set->dc_uuid, pcmk__str_casei)) { data_set->dc_node = node; node->details->is_dc = TRUE; g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_TRUE)); } else { g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_FALSE)); } cluster_name = g_hash_table_lookup(data_set->config_hash, "cluster-name"); if (cluster_name) { g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_CLUSTER_NAME), strdup(cluster_name)); } pe__unpack_dataset_nvpairs(xml_obj, XML_TAG_ATTR_SETS, &rule_data, node->details->attrs, NULL, overwrite, data_set); pe__unpack_dataset_nvpairs(xml_obj, XML_TAG_UTILIZATION, &rule_data, node->details->utilization, NULL, FALSE, data_set); if (pe_node_attribute_raw(node, CRM_ATTR_SITE_NAME) == NULL) { const char *site_name = pe_node_attribute_raw(node, "site-name"); if (site_name) { g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_SITE_NAME), strdup(site_name)); } else if (cluster_name) { /* Default to cluster-name if unset */ g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_SITE_NAME), strdup(cluster_name)); } } } static GList * extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter) { int counter = -1; int stop_index = -1; int start_index = -1; xmlNode *rsc_op = NULL; GList *gIter = NULL; GList *op_list = NULL; GList *sorted_op_list = NULL; /* extract operations */ op_list = NULL; sorted_op_list = NULL; for (rsc_op = pcmk__xe_first_child(rsc_entry); rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op)) { if (pcmk__str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, pcmk__str_none)) { crm_xml_add(rsc_op, "resource", rsc); crm_xml_add(rsc_op, XML_ATTR_UNAME, node); op_list = g_list_prepend(op_list, rsc_op); } } if (op_list == NULL) { /* if there are no operations, there is nothing to do */ return NULL; } sorted_op_list = g_list_sort(op_list, sort_op_by_callid); /* create active recurring operations as optional */ if (active_filter == FALSE) { return sorted_op_list; } op_list = NULL; calculate_active_ops(sorted_op_list, &start_index, &stop_index); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; counter++; if (start_index < stop_index) { crm_trace("Skipping %s: not active", ID(rsc_entry)); break; } else if (counter < start_index) { crm_trace("Skipping %s: old", ID(rsc_op)); continue; } op_list = g_list_append(op_list, rsc_op); } g_list_free(sorted_op_list); return op_list; } GList * find_operations(const char *rsc, const char *node, gboolean active_filter, pe_working_set_t * data_set) { GList *output = NULL; GList *intermediate = NULL; xmlNode *tmp = NULL; xmlNode *status = find_xml_node(data_set->input, XML_CIB_TAG_STATUS, TRUE); pe_node_t *this_node = NULL; xmlNode *node_state = NULL; for (node_state = pcmk__xe_first_child(status); node_state != NULL; node_state = pcmk__xe_next(node_state)) { if (pcmk__str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, pcmk__str_none)) { const char *uname = crm_element_value(node_state, XML_ATTR_UNAME); if (node != NULL && !pcmk__str_eq(uname, node, pcmk__str_casei)) { continue; } this_node = pe_find_node(data_set->nodes, uname); if(this_node == NULL) { CRM_LOG_ASSERT(this_node != NULL); continue; } else if (pe__is_guest_or_remote_node(this_node)) { determine_remote_online_status(data_set, this_node); } else { determine_online_status(node_state, this_node, data_set); } if (this_node->details->online || pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) { /* offline nodes run no resources... * unless stonith is enabled in which case we need to * make sure rsc start events happen after the stonith */ xmlNode *lrm_rsc = NULL; tmp = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); tmp = find_xml_node(tmp, XML_LRM_TAG_RESOURCES, FALSE); for (lrm_rsc = pcmk__xe_first_child(tmp); lrm_rsc != NULL; lrm_rsc = pcmk__xe_next(lrm_rsc)) { if (pcmk__str_eq((const char *)lrm_rsc->name, XML_LRM_TAG_RESOURCE, pcmk__str_none)) { const char *rsc_id = crm_element_value(lrm_rsc, XML_ATTR_ID); if (rsc != NULL && !pcmk__str_eq(rsc_id, rsc, pcmk__str_casei)) { continue; } intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter); output = g_list_concat(output, intermediate); } } } } } return output; } diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c index aae756c861..88cacbac2a 100644 --- a/lib/pengine/utils.c +++ b/lib/pengine/utils.c @@ -1,917 +1,915 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include "pe_status_private.h" extern bool pcmk__is_daemon; gboolean ghash_free_str_str(gpointer key, gpointer value, gpointer user_data); /*! * \internal * \brief Check whether we can fence a particular node * * \param[in] data_set Working set for cluster * \param[in] node Name of node to check * * \return true if node can be fenced, false otherwise */ bool pe_can_fence(const pe_working_set_t *data_set, const pe_node_t *node) { if (pe__is_guest_node(node)) { /* Guest nodes are fenced by stopping their container resource. We can * do that if the container's host is either online or fenceable. */ pe_resource_t *rsc = node->details->remote_rsc->container; for (GList *n = rsc->running_on; n != NULL; n = n->next) { pe_node_t *container_node = n->data; if (!container_node->details->online && !pe_can_fence(data_set, container_node)) { return false; } } return true; } else if (!pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) { return false; /* Turned off */ } else if (!pcmk_is_set(data_set->flags, pe_flag_have_stonith_resource)) { return false; /* No devices */ } else if (pcmk_is_set(data_set->flags, pe_flag_have_quorum)) { return true; } else if (data_set->no_quorum_policy == no_quorum_ignore) { return true; } else if(node == NULL) { return false; } else if(node->details->online) { crm_notice("We can fence %s without quorum because they're in our membership", pe__node_name(node)); return true; } crm_trace("Cannot fence %s", pe__node_name(node)); return false; } /*! * \internal * \brief Copy a node object * * \param[in] this_node Node object to copy * * \return Newly allocated shallow copy of this_node * \note This function asserts on errors and is guaranteed to return non-NULL. */ pe_node_t * pe__copy_node(const pe_node_t *this_node) { pe_node_t *new_node = NULL; CRM_ASSERT(this_node != NULL); new_node = calloc(1, sizeof(pe_node_t)); CRM_ASSERT(new_node != NULL); new_node->rsc_discover_mode = this_node->rsc_discover_mode; new_node->weight = this_node->weight; new_node->fixed = this_node->fixed; // @COMPAT deprecated and unused new_node->details = this_node->details; return new_node; } /* any node in list1 or list2 and not in the other gets a score of -INFINITY */ void node_list_exclude(GHashTable * hash, GList *list, gboolean merge_scores) { GHashTable *result = hash; pe_node_t *other_node = NULL; GList *gIter = list; GHashTableIter iter; pe_node_t *node = NULL; g_hash_table_iter_init(&iter, hash); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { other_node = pe_find_node_id(list, node->details->id); if (other_node == NULL) { node->weight = -INFINITY; } else if (merge_scores) { node->weight = pcmk__add_scores(node->weight, other_node->weight); } } for (; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; other_node = pe_hash_table_lookup(result, node->details->id); if (other_node == NULL) { pe_node_t *new_node = pe__copy_node(node); new_node->weight = -INFINITY; g_hash_table_insert(result, (gpointer) new_node->details->id, new_node); } } } /*! * \internal * \brief Create a node hash table from a node list * * \param[in] list Node list * * \return Hash table equivalent of node list */ GHashTable * pe__node_list2table(const GList *list) { GHashTable *result = NULL; result = pcmk__strkey_table(NULL, free); for (const GList *gIter = list; gIter != NULL; gIter = gIter->next) { pe_node_t *new_node = pe__copy_node((const pe_node_t *) gIter->data); g_hash_table_insert(result, (gpointer) new_node->details->id, new_node); } return result; } /*! * \internal * \brief Compare two nodes by name, with numeric portions sorted numerically * * Sort two node names case-insensitively like strcasecmp(), but with any * numeric portions of the name sorted numerically. For example, "node10" will * sort higher than "node9" but lower than "remotenode9". * * \param[in] a First node to compare (can be \c NULL) * \param[in] b Second node to compare (can be \c NULL) * * \retval -1 \c a comes before \c b (or \c a is \c NULL and \c b is not) * \retval 0 \c a and \c b are equal (or both are \c NULL) * \retval 1 \c a comes after \c b (or \c b is \c NULL and \c a is not) */ gint pe__cmp_node_name(gconstpointer a, gconstpointer b) { const pe_node_t *node1 = (const pe_node_t *) a; const pe_node_t *node2 = (const pe_node_t *) b; if ((node1 == NULL) && (node2 == NULL)) { return 0; } if (node1 == NULL) { return -1; } if (node2 == NULL) { return 1; } return pcmk__numeric_strcasecmp(node1->details->uname, node2->details->uname); } /*! * \internal * \brief Output node weights to stdout * * \param[in] rsc Use allowed nodes for this resource * \param[in] comment Text description to prefix lines with * \param[in] nodes If rsc is not specified, use these nodes * \param[in,out] data_set Cluster working set */ static void pe__output_node_weights(const pe_resource_t *rsc, const char *comment, GHashTable *nodes, pe_working_set_t *data_set) { pcmk__output_t *out = data_set->priv; // Sort the nodes so the output is consistent for regression tests GList *list = g_list_sort(g_hash_table_get_values(nodes), pe__cmp_node_name); for (const GList *gIter = list; gIter != NULL; gIter = gIter->next) { const pe_node_t *node = (const pe_node_t *) gIter->data; out->message(out, "node-weight", rsc, comment, node->details->uname, pcmk_readable_score(node->weight)); } g_list_free(list); } /*! * \internal * \brief Log node weights at trace level * * \param[in] file Caller's filename * \param[in] function Caller's function name * \param[in] line Caller's line number * \param[in] rsc If not NULL, include this resource's ID in logs * \param[in] comment Text description to prefix lines with * \param[in] nodes Nodes whose scores should be logged */ static void pe__log_node_weights(const char *file, const char *function, int line, const pe_resource_t *rsc, const char *comment, GHashTable *nodes) { GHashTableIter iter; pe_node_t *node = NULL; // Don't waste time if we're not tracing at this point pcmk__if_tracing({}, return); g_hash_table_iter_init(&iter, nodes); while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) { if (rsc) { qb_log_from_external_source(function, file, "%s: %s allocation score on %s: %s", LOG_TRACE, line, 0, comment, rsc->id, pe__node_name(node), pcmk_readable_score(node->weight)); } else { qb_log_from_external_source(function, file, "%s: %s = %s", LOG_TRACE, line, 0, comment, pe__node_name(node), pcmk_readable_score(node->weight)); } } } /*! * \internal * \brief Log or output node weights * * \param[in] file Caller's filename * \param[in] function Caller's function name * \param[in] line Caller's line number * \param[in] to_log Log if true, otherwise output * \param[in] rsc If not NULL, use this resource's ID in logs, * and show scores recursively for any children * \param[in] comment Text description to prefix lines with * \param[in] nodes Nodes whose scores should be shown * \param[in,out] data_set Cluster working set */ void pe__show_node_weights_as(const char *file, const char *function, int line, bool to_log, const pe_resource_t *rsc, const char *comment, GHashTable *nodes, pe_working_set_t *data_set) { if (rsc != NULL && pcmk_is_set(rsc->flags, pe_rsc_orphan)) { // Don't show allocation scores for orphans return; } if (nodes == NULL) { // Nothing to show return; } if (to_log) { pe__log_node_weights(file, function, line, rsc, comment, nodes); } else { pe__output_node_weights(rsc, comment, nodes, data_set); } // If this resource has children, repeat recursively for each if (rsc && rsc->children) { for (GList *gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; pe__show_node_weights_as(file, function, line, to_log, child, comment, child->allowed_nodes, data_set); } } } /*! * \internal * \brief Compare two resources by priority * * \param[in] a First resource to compare (can be \c NULL) * \param[in] b Second resource to compare (can be \c NULL) * * \retval -1 \c a->priority > \c b->priority (or \c b is \c NULL and \c a is * not) * \retval 0 \c a->priority == \c b->priority (or both \c a and \c b are * \c NULL) * \retval 1 \c a->priority < \c b->priority (or \c a is \c NULL and \c b is * not) */ gint pe__cmp_rsc_priority(gconstpointer a, gconstpointer b) { const pe_resource_t *resource1 = (const pe_resource_t *)a; const pe_resource_t *resource2 = (const pe_resource_t *)b; if (a == NULL && b == NULL) { return 0; } if (a == NULL) { return 1; } if (b == NULL) { return -1; } if (resource1->priority > resource2->priority) { return -1; } if (resource1->priority < resource2->priority) { return 1; } return 0; } static void resource_node_score(pe_resource_t *rsc, const pe_node_t *node, int score, const char *tag) { pe_node_t *match = NULL; if ((rsc->exclusive_discover || (node->rsc_discover_mode == pe_discover_never)) && pcmk__str_eq(tag, "symmetric_default", pcmk__str_casei)) { /* This string comparision may be fragile, but exclusive resources and * exclusive nodes should not have the symmetric_default constraint * applied to them. */ return; } else if (rsc->children) { GList *gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; resource_node_score(child_rsc, node, score, tag); } } pe_rsc_trace(rsc, "Setting %s for %s on %s: %d", tag, rsc->id, pe__node_name(node), score); match = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (match == NULL) { match = pe__copy_node(node); g_hash_table_insert(rsc->allowed_nodes, (gpointer) match->details->id, match); } match->weight = pcmk__add_scores(match->weight, score); } void resource_location(pe_resource_t *rsc, const pe_node_t *node, int score, const char *tag, pe_working_set_t *data_set) { if (node != NULL) { resource_node_score(rsc, node, score, tag); } else if (data_set != NULL) { GList *gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { pe_node_t *node_iter = (pe_node_t *) gIter->data; resource_node_score(rsc, node_iter, score, tag); } } else { GHashTableIter iter; pe_node_t *node_iter = NULL; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node_iter)) { resource_node_score(rsc, node_iter, score, tag); } } if (node == NULL && score == -INFINITY) { if (rsc->allocated_to) { crm_info("Deallocating %s from %s", rsc->id, pe__node_name(rsc->allocated_to)); free(rsc->allocated_to); rsc->allocated_to = NULL; } } } time_t get_effective_time(pe_working_set_t * data_set) { if(data_set) { if (data_set->now == NULL) { crm_trace("Recording a new 'now'"); data_set->now = crm_time_new(NULL); } return crm_time_get_seconds_since_epoch(data_set->now); } crm_trace("Defaulting to 'now'"); return time(NULL); } gboolean get_target_role(const pe_resource_t *rsc, enum rsc_role_e *role) { enum rsc_role_e local_role = RSC_ROLE_UNKNOWN; const char *value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); CRM_CHECK(role != NULL, return FALSE); if (pcmk__str_eq(value, "started", pcmk__str_null_matches | pcmk__str_casei) || pcmk__str_eq("default", value, pcmk__str_casei)) { return FALSE; } local_role = text2role(value); if (local_role == RSC_ROLE_UNKNOWN) { pcmk__config_err("Ignoring '" XML_RSC_ATTR_TARGET_ROLE "' for %s " "because '%s' is not valid", rsc->id, value); return FALSE; } else if (local_role > RSC_ROLE_STARTED) { if (pcmk_is_set(pe__const_top_resource(rsc, false)->flags, pe_rsc_promotable)) { if (local_role > RSC_ROLE_UNPROMOTED) { /* This is what we'd do anyway, just leave the default to avoid messing up the placement algorithm */ return FALSE; } } else { pcmk__config_err("Ignoring '" XML_RSC_ATTR_TARGET_ROLE "' for %s " "because '%s' only makes sense for promotable " "clones", rsc->id, value); return FALSE; } } *role = local_role; return TRUE; } gboolean order_actions(pe_action_t * lh_action, pe_action_t * rh_action, enum pe_ordering order) { GList *gIter = NULL; pe_action_wrapper_t *wrapper = NULL; GList *list = NULL; if (order == pe_order_none) { return FALSE; } if (lh_action == NULL || rh_action == NULL) { return FALSE; } crm_trace("Creating action wrappers for ordering: %s then %s", lh_action->uuid, rh_action->uuid); /* Ensure we never create a dependency on ourselves... it's happened */ CRM_ASSERT(lh_action != rh_action); /* Filter dups, otherwise update_action_states() has too much work to do */ gIter = lh_action->actions_after; for (; gIter != NULL; gIter = gIter->next) { pe_action_wrapper_t *after = (pe_action_wrapper_t *) gIter->data; if (after->action == rh_action && (after->type & order)) { return FALSE; } } wrapper = calloc(1, sizeof(pe_action_wrapper_t)); wrapper->action = rh_action; wrapper->type = order; list = lh_action->actions_after; list = g_list_prepend(list, wrapper); lh_action->actions_after = list; wrapper = calloc(1, sizeof(pe_action_wrapper_t)); wrapper->action = lh_action; wrapper->type = order; list = rh_action->actions_before; list = g_list_prepend(list, wrapper); rh_action->actions_before = list; return TRUE; } void destroy_ticket(gpointer data) { pe_ticket_t *ticket = data; if (ticket->state) { g_hash_table_destroy(ticket->state); } free(ticket->id); free(ticket); } pe_ticket_t * ticket_new(const char *ticket_id, pe_working_set_t * data_set) { pe_ticket_t *ticket = NULL; if (pcmk__str_empty(ticket_id)) { return NULL; } if (data_set->tickets == NULL) { data_set->tickets = pcmk__strkey_table(free, destroy_ticket); } ticket = g_hash_table_lookup(data_set->tickets, ticket_id); if (ticket == NULL) { ticket = calloc(1, sizeof(pe_ticket_t)); if (ticket == NULL) { crm_err("Cannot allocate ticket '%s'", ticket_id); return NULL; } crm_trace("Creaing ticket entry for %s", ticket_id); ticket->id = strdup(ticket_id); ticket->granted = FALSE; ticket->last_granted = -1; ticket->standby = FALSE; ticket->state = pcmk__strkey_table(free, free); g_hash_table_insert(data_set->tickets, strdup(ticket->id), ticket); } return ticket; } const char * rsc_printable_id(const pe_resource_t *rsc) { return pcmk_is_set(rsc->flags, pe_rsc_unique)? rsc->id : ID(rsc->xml); } void pe__clear_resource_flags_recursive(pe_resource_t *rsc, uint64_t flags) { pe__clear_resource_flags(rsc, flags); for (GList *gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe__clear_resource_flags_recursive((pe_resource_t *) gIter->data, flags); } } void pe__clear_resource_flags_on_all(pe_working_set_t *data_set, uint64_t flag) { for (GList *lpc = data_set->resources; lpc != NULL; lpc = lpc->next) { pe_resource_t *r = (pe_resource_t *) lpc->data; pe__clear_resource_flags_recursive(r, flag); } } void pe__set_resource_flags_recursive(pe_resource_t *rsc, uint64_t flags) { pe__set_resource_flags(rsc, flags); for (GList *gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe__set_resource_flags_recursive((pe_resource_t *) gIter->data, flags); } } void trigger_unfencing(pe_resource_t *rsc, pe_node_t *node, const char *reason, pe_action_t *dependency, pe_working_set_t *data_set) { if (!pcmk_is_set(data_set->flags, pe_flag_enable_unfencing)) { /* No resources require it */ return; } else if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pe_rsc_fence_device)) { /* Wasn't a stonith device */ return; } else if(node && node->details->online && node->details->unclean == FALSE && node->details->shutdown == FALSE) { pe_action_t *unfence = pe_fence_op(node, "on", FALSE, reason, FALSE, data_set); if(dependency) { order_actions(unfence, dependency, pe_order_optional); } } else if(rsc) { GHashTableIter iter; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if(node->details->online && node->details->unclean == FALSE && node->details->shutdown == FALSE) { trigger_unfencing(rsc, node, reason, dependency, data_set); } } } } gboolean add_tag_ref(GHashTable * tags, const char * tag_name, const char * obj_ref) { pe_tag_t *tag = NULL; GList *gIter = NULL; gboolean is_existing = FALSE; CRM_CHECK(tags && tag_name && obj_ref, return FALSE); tag = g_hash_table_lookup(tags, tag_name); if (tag == NULL) { tag = calloc(1, sizeof(pe_tag_t)); if (tag == NULL) { return FALSE; } tag->id = strdup(tag_name); tag->refs = NULL; g_hash_table_insert(tags, strdup(tag_name), tag); } for (gIter = tag->refs; gIter != NULL; gIter = gIter->next) { const char *existing_ref = (const char *) gIter->data; if (pcmk__str_eq(existing_ref, obj_ref, pcmk__str_none)){ is_existing = TRUE; break; } } if (is_existing == FALSE) { tag->refs = g_list_append(tag->refs, strdup(obj_ref)); crm_trace("Added: tag=%s ref=%s", tag->id, obj_ref); } return TRUE; } /*! * \internal * \brief Check whether shutdown has been requested for a node * * \param[in] node Node to check * * \return TRUE if node has shutdown attribute set and nonzero, FALSE otherwise * \note This differs from simply using node->details->shutdown in that it can * be used before that has been determined (and in fact to determine it), * and it can also be used to distinguish requested shutdown from implicit * shutdown of remote nodes by virtue of their connection stopping. */ bool pe__shutdown_requested(const pe_node_t *node) { const char *shutdown = pe_node_attribute_raw(node, XML_CIB_ATTR_SHUTDOWN); return !pcmk__str_eq(shutdown, "0", pcmk__str_null_matches); } /*! * \internal * \brief Update a data set's "recheck by" time * * \param[in] recheck Epoch time when recheck should happen * \param[in,out] data_set Current working set */ void pe__update_recheck_time(time_t recheck, pe_working_set_t *data_set) { if ((recheck > get_effective_time(data_set)) && ((data_set->recheck_by == 0) || (data_set->recheck_by > recheck))) { data_set->recheck_by = recheck; } } /*! * \internal * \brief Extract nvpair blocks contained by a CIB XML element into a hash table * * \param[in] xml_obj XML element containing blocks of nvpair elements * \param[in] set_name If not NULL, only use blocks of this element * \param[in] rule_data Matching parameters to use when unpacking * \param[out] hash Where to store extracted name/value pairs * \param[in] always_first If not NULL, process block with this ID first * \param[in] overwrite Whether to replace existing values with same name * \param[in,out] data_set Cluster working set containing \p xml_obj */ void pe__unpack_dataset_nvpairs(const xmlNode *xml_obj, const char *set_name, const pe_rule_eval_data_t *rule_data, GHashTable *hash, const char *always_first, gboolean overwrite, pe_working_set_t *data_set) { crm_time_t *next_change = crm_time_new_undefined(); pe_eval_nvpairs(data_set->input, xml_obj, set_name, rule_data, hash, always_first, overwrite, next_change); if (crm_time_is_defined(next_change)) { time_t recheck = (time_t) crm_time_get_seconds_since_epoch(next_change); pe__update_recheck_time(recheck, data_set); } crm_time_free(next_change); } bool pe__resource_is_disabled(const pe_resource_t *rsc) { const char *target_role = NULL; CRM_CHECK(rsc != NULL, return false); target_role = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); if (target_role) { enum rsc_role_e target_role_e = text2role(target_role); if ((target_role_e == RSC_ROLE_STOPPED) || ((target_role_e == RSC_ROLE_UNPROMOTED) && pcmk_is_set(pe__const_top_resource(rsc, false)->flags, pe_rsc_promotable))) { return true; } } return false; } bool pe__rsc_running_on_any(pe_resource_t *rsc, GList *node_list) { for (GList *ele = rsc->running_on; ele; ele = ele->next) { pe_node_t *node = (pe_node_t *) ele->data; if (pcmk__str_in_list(node->details->uname, node_list, pcmk__str_star_matches|pcmk__str_casei)) { return true; } } return false; } bool pcmk__rsc_filtered_by_node(pe_resource_t *rsc, GList *only_node) { return (rsc->fns->active(rsc, FALSE) && !pe__rsc_running_on_any(rsc, only_node)); } GList * pe__filter_rsc_list(GList *rscs, GList *filter) { GList *retval = NULL; for (GList *gIter = rscs; gIter; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; /* I think the second condition is safe here for all callers of this * function. If not, it needs to move into pe__node_text. */ if (pcmk__str_in_list(rsc_printable_id(rsc), filter, pcmk__str_star_matches) || (rsc->parent && pcmk__str_in_list(rsc_printable_id(rsc->parent), filter, pcmk__str_star_matches))) { retval = g_list_prepend(retval, rsc); } } return retval; } GList * pe__build_node_name_list(pe_working_set_t *data_set, const char *s) { GList *nodes = NULL; if (pcmk__str_eq(s, "*", pcmk__str_null_matches)) { /* Nothing was given so return a list of all node names. Or, '*' was * given. This would normally fall into the pe__unames_with_tag branch * where it will return an empty list. Catch it here instead. */ nodes = g_list_prepend(nodes, strdup("*")); } else { pe_node_t *node = pe_find_node(data_set->nodes, s); if (node) { /* The given string was a valid uname for a node. Return a * singleton list containing just that uname. */ nodes = g_list_prepend(nodes, strdup(s)); } else { /* The given string was not a valid uname. It's either a tag or * it's a typo or something. In the first case, we'll return a * list of all the unames of the nodes with the given tag. In the * second case, we'll return a NULL pointer and nothing will * get displayed. */ nodes = pe__unames_with_tag(data_set, s); } } return nodes; } GList * pe__build_rsc_list(pe_working_set_t *data_set, const char *s) { GList *resources = NULL; if (pcmk__str_eq(s, "*", pcmk__str_null_matches)) { resources = g_list_prepend(resources, strdup("*")); } else { pe_resource_t *rsc = pe_find_resource_with_flags(data_set->resources, s, pe_find_renamed|pe_find_any); if (rsc) { /* A colon in the name we were given means we're being asked to filter * on a specific instance of a cloned resource. Put that exact string * into the filter list. Otherwise, use the printable ID of whatever * resource was found that matches what was asked for. */ if (strstr(s, ":") != NULL) { resources = g_list_prepend(resources, strdup(rsc->id)); } else { resources = g_list_prepend(resources, strdup(rsc_printable_id(rsc))); } } else { /* The given string was not a valid resource name. It's a tag or a * typo or something. See pe__build_node_name_list() for more * detail. */ resources = pe__rscs_with_tag(data_set, s); } } return resources; } xmlNode * pe__failed_probe_for_rsc(const pe_resource_t *rsc, const char *name) { const pe_resource_t *parent = pe__const_top_resource(rsc, false); const char *rsc_id = rsc->id; if (parent->variant == pe_clone) { rsc_id = pe__clone_child_id(parent); } for (xmlNode *xml_op = pcmk__xml_first_child(rsc->cluster->failed); xml_op != NULL; xml_op = pcmk__xml_next(xml_op)) { const char *value = NULL; char *op_id = NULL; /* This resource operation is not a failed probe. */ if (!pcmk_xe_mask_probe_failure(xml_op)) { continue; } /* This resource operation was not run on the given node. Note that if name is * NULL, this will always succeed. */ value = crm_element_value(xml_op, XML_LRM_ATTR_TARGET); if (value == NULL || !pcmk__str_eq(value, name, pcmk__str_casei|pcmk__str_null_matches)) { continue; } - /* This resource operation has no operation_key. */ - value = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY); - if (!parse_op_key(value ? value : ID(xml_op), &op_id, NULL, NULL)) { - continue; + if (!parse_op_key(pe__xe_history_key(xml_op), &op_id, NULL, NULL)) { + continue; // This history entry is missing an operation key } /* This resource operation's ID does not match the rsc_id we are looking for. */ if (!pcmk__str_eq(op_id, rsc_id, pcmk__str_none)) { free(op_id); continue; } free(op_id); return xml_op; } return NULL; } diff --git a/tools/crm_mon.c b/tools/crm_mon.c index 0d356a3211..3867514ca7 100644 --- a/tools/crm_mon.c +++ b/tools/crm_mon.c @@ -1,2190 +1,2186 @@ /* - * Copyright 2004-2022 the Pacemaker project contributors + * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // pcmk__ends_with_ext() #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "crm_mon.h" #define SUMMARY "Provides a summary of cluster's current state.\n\n" \ "Outputs varying levels of detail in a number of different formats." /* * Definitions indicating which items to print */ static uint32_t show; static uint32_t show_opts = pcmk_show_pending; /* * Definitions indicating how to output */ static mon_output_format_t output_format = mon_output_unset; /* other globals */ static GIOChannel *io_channel = NULL; static GMainLoop *mainloop = NULL; static guint reconnect_timer = 0; static mainloop_timer_t *refresh_timer = NULL; static enum pcmk_pacemakerd_state pcmkd_state = pcmk_pacemakerd_state_invalid; static cib_t *cib = NULL; static stonith_t *st = NULL; static xmlNode *current_cib = NULL; static GError *error = NULL; static pcmk__common_args_t *args = NULL; static pcmk__output_t *out = NULL; static GOptionContext *context = NULL; static gchar **processed_args = NULL; static time_t last_refresh = 0; volatile crm_trigger_t *refresh_trigger = NULL; static enum pcmk__fence_history fence_history = pcmk__fence_history_none; int interactive_fence_level = 0; static pcmk__supported_format_t formats[] = { #if CURSES_ENABLED CRM_MON_SUPPORTED_FORMAT_CURSES, #endif PCMK__SUPPORTED_FORMAT_HTML, PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, PCMK__SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; PCMK__OUTPUT_ARGS("crm-mon-disconnected", "const char *", "int") static int crm_mon_disconnected_default(pcmk__output_t *out, va_list args) { return pcmk_rc_no_output; } PCMK__OUTPUT_ARGS("crm-mon-disconnected", "const char *", "int") static int crm_mon_disconnected_html(pcmk__output_t *out, va_list args) { const char *desc = va_arg(args, const char *); enum pcmk_pacemakerd_state state = (enum pcmk_pacemakerd_state) va_arg(args, int); if (out->dest != stdout) { out->reset(out); } pcmk__output_create_xml_text_node(out, "span", "Not connected to CIB"); if (desc != NULL) { pcmk__output_create_xml_text_node(out, "span", ": "); pcmk__output_create_xml_text_node(out, "span", desc); } if (state != pcmk_pacemakerd_state_invalid) { const char *state_s = pcmk__pcmkd_state_enum2friendly(state); pcmk__output_create_xml_text_node(out, "span", " ("); pcmk__output_create_xml_text_node(out, "span", state_s); pcmk__output_create_xml_text_node(out, "span", ")"); } out->finish(out, CRM_EX_DISCONNECT, true, NULL); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("crm-mon-disconnected", "const char *", "int") static int crm_mon_disconnected_text(pcmk__output_t *out, va_list args) { const char *desc = va_arg(args, const char *); enum pcmk_pacemakerd_state state = (enum pcmk_pacemakerd_state) va_arg(args, int); int rc = pcmk_rc_ok; if (out->dest != stdout) { out->reset(out); } if (state != pcmk_pacemakerd_state_invalid) { rc = out->info(out, "Not connected to CIB%s%s (%s)", (desc != NULL)? ": " : "", pcmk__s(desc, ""), pcmk__pcmkd_state_enum2friendly(state)); } else { rc = out->info(out, "Not connected to CIB%s%s", (desc != NULL)? ": " : "", pcmk__s(desc, "")); } out->finish(out, CRM_EX_DISCONNECT, true, NULL); return rc; } PCMK__OUTPUT_ARGS("crm-mon-disconnected", "const char *", "int") static int crm_mon_disconnected_xml(pcmk__output_t *out, va_list args) { const char *desc = va_arg(args, const char *); enum pcmk_pacemakerd_state state = (enum pcmk_pacemakerd_state) va_arg(args, int); const char *state_s = NULL; if (out->dest != stdout) { out->reset(out); } if (state != pcmk_pacemakerd_state_invalid) { state_s = pcmk_pacemakerd_api_daemon_state_enum2text(state); } pcmk__output_create_xml_node(out, "crm-mon-disconnected", XML_ATTR_DESC, desc, "pacemakerd-state", state_s, NULL); out->finish(out, CRM_EX_DISCONNECT, true, NULL); return pcmk_rc_ok; } static pcmk__message_entry_t fmt_functions[] = { { "crm-mon-disconnected", "default", crm_mon_disconnected_default }, { "crm-mon-disconnected", "html", crm_mon_disconnected_html }, { "crm-mon-disconnected", "text", crm_mon_disconnected_text }, { "crm-mon-disconnected", "xml", crm_mon_disconnected_xml }, { NULL, NULL, NULL }, }; /* Define exit codes for monitoring-compatible output * For nagios plugins, the possibilities are * OK=0, WARN=1, CRIT=2, and UNKNOWN=3 */ #define MON_STATUS_WARN CRM_EX_ERROR #define MON_STATUS_CRIT CRM_EX_INVALID_PARAM #define MON_STATUS_UNKNOWN CRM_EX_UNIMPLEMENT_FEATURE #define RECONNECT_MSECS 5000 struct { guint reconnect_ms; gboolean daemonize; gboolean fence_connect; gboolean one_shot; gboolean print_pending; gboolean show_bans; gboolean watch_fencing; char *pid_file; char *external_agent; char *external_recipient; char *neg_location_prefix; char *only_node; char *only_rsc; GSList *user_includes_excludes; GSList *includes_excludes; } options = { .fence_connect = TRUE, .reconnect_ms = RECONNECT_MSECS }; static crm_exit_t clean_up(crm_exit_t exit_code); static void crm_diff_update(const char *event, xmlNode * msg); static void clean_up_on_connection_failure(int rc); static int mon_refresh_display(gpointer user_data); static int setup_cib_connection(void); static int setup_fencer_connection(void); static int setup_api_connections(void); static void mon_st_callback_event(stonith_t * st, stonith_event_t * e); static void mon_st_callback_display(stonith_t * st, stonith_event_t * e); static void refresh_after_event(gboolean data_updated, gboolean enforce); static uint32_t all_includes(mon_output_format_t fmt) { if (fmt == mon_output_monitor || fmt == mon_output_plain || fmt == mon_output_console) { return ~pcmk_section_options; } else { return pcmk_section_all; } } static uint32_t default_includes(mon_output_format_t fmt) { switch (fmt) { case mon_output_monitor: case mon_output_plain: case mon_output_console: case mon_output_html: case mon_output_cgi: return pcmk_section_summary |pcmk_section_nodes |pcmk_section_resources |pcmk_section_failures; case mon_output_xml: case mon_output_legacy_xml: return all_includes(fmt); default: return 0; } } struct { const char *name; uint32_t bit; } sections[] = { { "attributes", pcmk_section_attributes }, { "bans", pcmk_section_bans }, { "counts", pcmk_section_counts }, { "dc", pcmk_section_dc }, { "failcounts", pcmk_section_failcounts }, { "failures", pcmk_section_failures }, { PCMK__VALUE_FENCING, pcmk_section_fencing_all }, { "fencing-failed", pcmk_section_fence_failed }, { "fencing-pending", pcmk_section_fence_pending }, { "fencing-succeeded", pcmk_section_fence_worked }, { "maint-mode", pcmk_section_maint_mode }, { "nodes", pcmk_section_nodes }, { "operations", pcmk_section_operations }, { "options", pcmk_section_options }, { "resources", pcmk_section_resources }, { "stack", pcmk_section_stack }, { "summary", pcmk_section_summary }, { "tickets", pcmk_section_tickets }, { "times", pcmk_section_times }, { NULL } }; static uint32_t find_section_bit(const char *name) { for (int i = 0; sections[i].name != NULL; i++) { if (pcmk__str_eq(sections[i].name, name, pcmk__str_casei)) { return sections[i].bit; } } return 0; } static gboolean apply_exclude(const gchar *excludes, GError **error) { char **parts = NULL; gboolean result = TRUE; parts = g_strsplit(excludes, ",", 0); for (char **s = parts; *s != NULL; s++) { uint32_t bit = find_section_bit(*s); if (pcmk__str_eq(*s, "all", pcmk__str_none)) { show = 0; } else if (pcmk__str_eq(*s, PCMK__VALUE_NONE, pcmk__str_none)) { show = all_includes(output_format); } else if (bit != 0) { show &= ~bit; } else { g_set_error(error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "--exclude options: all, attributes, bans, counts, dc, " "failcounts, failures, fencing, fencing-failed, " "fencing-pending, fencing-succeeded, maint-mode, nodes, " PCMK__VALUE_NONE ", operations, options, resources, " "stack, summary, tickets, times"); result = FALSE; break; } } g_strfreev(parts); return result; } static gboolean apply_include(const gchar *includes, GError **error) { char **parts = NULL; gboolean result = TRUE; parts = g_strsplit(includes, ",", 0); for (char **s = parts; *s != NULL; s++) { uint32_t bit = find_section_bit(*s); if (pcmk__str_eq(*s, "all", pcmk__str_none)) { show = all_includes(output_format); } else if (pcmk__starts_with(*s, "bans")) { show |= pcmk_section_bans; if (options.neg_location_prefix != NULL) { free(options.neg_location_prefix); options.neg_location_prefix = NULL; } if (strlen(*s) > 4 && (*s)[4] == ':') { options.neg_location_prefix = strdup(*s+5); } } else if (pcmk__str_any_of(*s, "default", "defaults", NULL)) { show |= default_includes(output_format); } else if (pcmk__str_eq(*s, PCMK__VALUE_NONE, pcmk__str_none)) { show = 0; } else if (bit != 0) { show |= bit; } else { g_set_error(error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "--include options: all, attributes, bans[:PREFIX], counts, dc, " "default, failcounts, failures, fencing, fencing-failed, " "fencing-pending, fencing-succeeded, maint-mode, nodes, " PCMK__VALUE_NONE ", operations, options, resources, " "stack, summary, tickets, times"); result = FALSE; break; } } g_strfreev(parts); return result; } static gboolean apply_include_exclude(GSList *lst, GError **error) { gboolean rc = TRUE; GSList *node = lst; while (node != NULL) { char *s = node->data; if (pcmk__starts_with(s, "--include=")) { rc = apply_include(s+10, error); } else if (pcmk__starts_with(s, "-I=")) { rc = apply_include(s+3, error); } else if (pcmk__starts_with(s, "--exclude=")) { rc = apply_exclude(s+10, error); } else if (pcmk__starts_with(s, "-U=")) { rc = apply_exclude(s+3, error); } if (rc != TRUE) { break; } node = node->next; } return rc; } static gboolean user_include_exclude_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { char *s = crm_strdup_printf("%s=%s", option_name, optarg); options.user_includes_excludes = g_slist_append(options.user_includes_excludes, s); return TRUE; } static gboolean include_exclude_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { char *s = crm_strdup_printf("%s=%s", option_name, optarg); options.includes_excludes = g_slist_append(options.includes_excludes, s); return TRUE; } static gboolean as_cgi_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { pcmk__str_update(&args->output_ty, "html"); output_format = mon_output_cgi; options.one_shot = TRUE; return TRUE; } static gboolean as_html_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { pcmk__str_update(&args->output_dest, optarg); pcmk__str_update(&args->output_ty, "html"); output_format = mon_output_html; umask(S_IWGRP | S_IWOTH); return TRUE; } static gboolean as_simple_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { pcmk__str_update(&args->output_ty, "text"); output_format = mon_output_monitor; options.one_shot = TRUE; return TRUE; } static gboolean as_xml_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { pcmk__str_update(&args->output_ty, "xml"); output_format = mon_output_legacy_xml; return TRUE; } static gboolean fence_history_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { if (optarg == NULL) { interactive_fence_level = 2; } else { pcmk__scan_min_int(optarg, &interactive_fence_level, 0); } switch (interactive_fence_level) { case 3: options.fence_connect = TRUE; fence_history = pcmk__fence_history_full; return include_exclude_cb("--include", PCMK__VALUE_FENCING, data, err); case 2: options.fence_connect = TRUE; fence_history = pcmk__fence_history_full; return include_exclude_cb("--include", PCMK__VALUE_FENCING, data, err); case 1: options.fence_connect = TRUE; fence_history = pcmk__fence_history_full; return include_exclude_cb("--include", "fencing-failed,fencing-pending", data, err); case 0: options.fence_connect = FALSE; fence_history = pcmk__fence_history_none; return include_exclude_cb("--exclude", PCMK__VALUE_FENCING, data, err); default: g_set_error(err, PCMK__EXITC_ERROR, CRM_EX_INVALID_PARAM, "Fence history must be 0-3"); return FALSE; } } static gboolean group_by_node_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { show_opts |= pcmk_show_rscs_by_node; return TRUE; } static gboolean hide_headers_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return user_include_exclude_cb("--exclude", "summary", data, err); } static gboolean inactive_resources_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { show_opts |= pcmk_show_inactive_rscs; return TRUE; } static gboolean no_curses_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { output_format = mon_output_plain; return TRUE; } static gboolean print_brief_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { show_opts |= pcmk_show_brief; return TRUE; } static gboolean print_detail_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { show_opts |= pcmk_show_details; return TRUE; } static gboolean print_description_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { show_opts |= pcmk_show_description; return TRUE; } static gboolean print_timing_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { show_opts |= pcmk_show_timing; return user_include_exclude_cb("--include", "operations", data, err); } static gboolean reconnect_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { int rc = crm_get_msec(optarg); if (rc == -1) { g_set_error(err, PCMK__EXITC_ERROR, CRM_EX_INVALID_PARAM, "Invalid value for -i: %s", optarg); return FALSE; } else { options.reconnect_ms = crm_parse_interval_spec(optarg); } return TRUE; } static gboolean show_attributes_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return user_include_exclude_cb("--include", "attributes", data, err); } static gboolean show_bans_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { if (optarg != NULL) { char *s = crm_strdup_printf("bans:%s", optarg); gboolean rc = user_include_exclude_cb("--include", s, data, err); free(s); return rc; } else { return user_include_exclude_cb("--include", "bans", data, err); } } static gboolean show_failcounts_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return user_include_exclude_cb("--include", "failcounts", data, err); } static gboolean show_operations_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return user_include_exclude_cb("--include", "failcounts,operations", data, err); } static gboolean show_tickets_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return user_include_exclude_cb("--include", "tickets", data, err); } static gboolean use_cib_file_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { setenv("CIB_file", optarg, 1); options.one_shot = TRUE; return TRUE; } #define INDENT " " /* *INDENT-OFF* */ static GOptionEntry addl_entries[] = { { "interval", 'i', 0, G_OPTION_ARG_CALLBACK, reconnect_cb, "Update frequency (default is 5 seconds)", "TIMESPEC" }, { "one-shot", '1', 0, G_OPTION_ARG_NONE, &options.one_shot, "Display the cluster status once on the console and exit", NULL }, { "daemonize", 'd', 0, G_OPTION_ARG_NONE, &options.daemonize, "Run in the background as a daemon.\n" INDENT "Requires at least one of --output-to and --external-agent.", NULL }, { "pid-file", 'p', 0, G_OPTION_ARG_FILENAME, &options.pid_file, "(Advanced) Daemon pid file location", "FILE" }, { "external-agent", 'E', 0, G_OPTION_ARG_FILENAME, &options.external_agent, "A program to run when resource operations take place", "FILE" }, { "external-recipient", 'e', 0, G_OPTION_ARG_STRING, &options.external_recipient, "A recipient for your program (assuming you want the program to send something to someone).", "RCPT" }, { "watch-fencing", 'W', 0, G_OPTION_ARG_NONE, &options.watch_fencing, "Listen for fencing events. For use with --external-agent.", NULL }, { "xml-file", 'x', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_CALLBACK, use_cib_file_cb, NULL, NULL }, { NULL } }; static GOptionEntry display_entries[] = { { "include", 'I', 0, G_OPTION_ARG_CALLBACK, user_include_exclude_cb, "A list of sections to include in the output.\n" INDENT "See `Output Control` help for more information.", "SECTION(s)" }, { "exclude", 'U', 0, G_OPTION_ARG_CALLBACK, user_include_exclude_cb, "A list of sections to exclude from the output.\n" INDENT "See `Output Control` help for more information.", "SECTION(s)" }, { "node", 0, 0, G_OPTION_ARG_STRING, &options.only_node, "When displaying information about nodes, show only what's related to the given\n" INDENT "node, or to all nodes tagged with the given tag", "NODE" }, { "resource", 0, 0, G_OPTION_ARG_STRING, &options.only_rsc, "When displaying information about resources, show only what's related to the given\n" INDENT "resource, or to all resources tagged with the given tag", "RSC" }, { "group-by-node", 'n', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, group_by_node_cb, "Group resources by node", NULL }, { "inactive", 'r', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, inactive_resources_cb, "Display inactive resources", NULL }, { "failcounts", 'f', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_failcounts_cb, "Display resource fail counts", NULL }, { "operations", 'o', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_operations_cb, "Display resource operation history", NULL }, { "timing-details", 't', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_timing_cb, "Display resource operation history with timing details", NULL }, { "tickets", 'c', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_tickets_cb, "Display cluster tickets", NULL }, { "fence-history", 'm', G_OPTION_FLAG_OPTIONAL_ARG, G_OPTION_ARG_CALLBACK, fence_history_cb, "Show fence history:\n" INDENT "0=off, 1=failures and pending (default without option),\n" INDENT "2=add successes (default without value for option),\n" INDENT "3=show full history without reduction to most recent of each flavor", "LEVEL" }, { "neg-locations", 'L', G_OPTION_FLAG_OPTIONAL_ARG, G_OPTION_ARG_CALLBACK, show_bans_cb, "Display negative location constraints [optionally filtered by id prefix]", NULL }, { "show-node-attributes", 'A', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_attributes_cb, "Display node attributes", NULL }, { "hide-headers", 'D', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, hide_headers_cb, "Hide all headers", NULL }, { "show-detail", 'R', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_detail_cb, "Show more details (node IDs, individual clone instances)", NULL }, { "show-description", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_description_cb, "Show resource descriptions", NULL }, { "brief", 'b', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_brief_cb, "Brief output", NULL }, { "pending", 'j', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_NONE, &options.print_pending, "Display pending state if 'record-pending' is enabled", NULL }, { "simple-status", 's', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, as_simple_cb, "Display the cluster status once as a simple one line output (suitable for nagios)", NULL }, { NULL } }; static GOptionEntry deprecated_entries[] = { { "as-html", 'h', G_OPTION_FLAG_FILENAME, G_OPTION_ARG_CALLBACK, as_html_cb, "Write cluster status to the named HTML file.\n" INDENT "Use --output-as=html --output-to=FILE instead.", "FILE" }, { "as-xml", 'X', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, as_xml_cb, "Write cluster status as XML to stdout. This will enable one-shot mode.\n" INDENT "Use --output-as=xml instead.", NULL }, { "disable-ncurses", 'N', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, no_curses_cb, "Disable the use of ncurses.\n" INDENT "Use --output-as=text instead.", NULL }, { "web-cgi", 'w', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, as_cgi_cb, "Web mode with output suitable for CGI (preselected when run as *.cgi).\n" INDENT "Use --output-as=html --html-cgi instead.", NULL }, { NULL } }; /* *INDENT-ON* */ /* Reconnect to the CIB and fencing agent after reconnect_ms has passed. This sounds * like it would be more broadly useful, but only ever happens after a disconnect via * mon_cib_connection_destroy. */ static gboolean reconnect_after_timeout(gpointer data) { #if CURSES_ENABLED if (output_format == mon_output_console) { clear(); refresh(); } #endif out->transient(out, "Reconnecting..."); if (setup_api_connections() == pcmk_rc_ok) { // Trigger redrawing the screen (needs reconnect_timer == 0) reconnect_timer = 0; refresh_after_event(FALSE, TRUE); return G_SOURCE_REMOVE; } out->message(out, "crm-mon-disconnected", "Latest connection attempt failed", pcmkd_state); reconnect_timer = g_timeout_add(options.reconnect_ms, reconnect_after_timeout, NULL); return G_SOURCE_REMOVE; } /* Called from various places when we are disconnected from the CIB or from the * fencing agent. If the CIB connection is still valid, this function will also * attempt to sign off and reconnect. */ static void mon_cib_connection_destroy(gpointer user_data) { const char *msg = "Connection to the cluster lost"; pcmkd_state = pcmk_pacemakerd_state_invalid; /* No crm-mon-disconnected message for console; a working implementation * is not currently worth the effort */ out->transient(out, "%s", msg); out->message(out, "crm-mon-disconnected", msg, pcmkd_state); if (refresh_timer != NULL) { /* we'll trigger a refresh after reconnect */ mainloop_timer_stop(refresh_timer); } if (reconnect_timer) { /* we'll trigger a new reconnect-timeout at the end */ g_source_remove(reconnect_timer); reconnect_timer = 0; } /* the client API won't properly reconnect notifications if they are still * in the table - so remove them */ stonith_api_delete(st); st = NULL; if (cib) { cib->cmds->signoff(cib); reconnect_timer = g_timeout_add(options.reconnect_ms, reconnect_after_timeout, NULL); } } /* Signal handler installed into the mainloop for normal program shutdown */ static void mon_shutdown(int nsig) { clean_up(CRM_EX_OK); } #if CURSES_ENABLED static volatile sighandler_t ncurses_winch_handler; /* Signal handler installed the regular way (not into the main loop) for when * the screen is resized. Commonly, this happens when running in an xterm and * the user changes its size. */ static void mon_winresize(int nsig) { static int not_done; int lines = 0, cols = 0; if (!not_done++) { if (ncurses_winch_handler) /* the original ncurses WINCH signal handler does the * magic of retrieving the new window size; * otherwise, we'd have to use ioctl or tgetent */ (*ncurses_winch_handler) (SIGWINCH); getmaxyx(stdscr, lines, cols); resizeterm(lines, cols); /* Alert the mainloop code we'd like the refresh_trigger to run next * time the mainloop gets around to checking. */ mainloop_set_trigger((crm_trigger_t *) refresh_trigger); } not_done--; } #endif static int setup_fencer_connection(void) { int rc = pcmk_ok; if (options.fence_connect && st == NULL) { st = stonith_api_new(); } if (!options.fence_connect || st == NULL || st->state != stonith_disconnected) { return rc; } rc = st->cmds->connect(st, crm_system_name, NULL); if (rc == pcmk_ok) { crm_trace("Setting up stonith callbacks"); if (options.watch_fencing) { st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT, mon_st_callback_event); st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, mon_st_callback_event); } else { st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT, mon_st_callback_display); st->cmds->register_notification(st, T_STONITH_NOTIFY_HISTORY, mon_st_callback_display); } } else { stonith_api_delete(st); st = NULL; } return rc; } static int setup_cib_connection(void) { int rc = pcmk_rc_ok; CRM_CHECK(cib != NULL, return EINVAL); if (cib->state != cib_disconnected) { // Already connected with notifications registered for CIB updates return rc; } rc = cib__signon_query(out, &cib, ¤t_cib); if (rc == pcmk_rc_ok) { rc = pcmk_legacy2rc(cib->cmds->set_connection_dnotify(cib, mon_cib_connection_destroy)); if (rc == EPROTONOSUPPORT) { out->err(out, "CIB client does not support connection loss " "notifications; crm_mon will be unable to reconnect after " "connection loss"); rc = pcmk_rc_ok; } if (rc == pcmk_rc_ok) { cib->cmds->del_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update); rc = pcmk_legacy2rc(cib->cmds->add_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update)); } if (rc != pcmk_rc_ok) { if (rc == EPROTONOSUPPORT) { out->err(out, "CIB client does not support CIB diff " "notifications"); } else { out->err(out, "CIB diff notification setup failed"); } out->err(out, "Cannot monitor CIB changes; exiting"); cib__clean_up_connection(&cib); stonith_api_delete(st); st = NULL; } } return rc; } /* This is used to set up the fencing options after the interactive UI has been stared. * fence_history_cb can't be used because it builds up a list of includes/excludes that * then have to be processed with apply_include_exclude and that could affect other * things. */ static void set_fencing_options(int level) { switch (level) { case 3: options.fence_connect = TRUE; fence_history = pcmk__fence_history_full; show |= pcmk_section_fencing_all; break; case 2: options.fence_connect = TRUE; fence_history = pcmk__fence_history_full; show |= pcmk_section_fencing_all; break; case 1: options.fence_connect = TRUE; fence_history = pcmk__fence_history_full; show |= pcmk_section_fence_failed | pcmk_section_fence_pending; break; default: interactive_fence_level = 0; options.fence_connect = FALSE; fence_history = pcmk__fence_history_none; show &= ~pcmk_section_fencing_all; break; } } static int setup_api_connections(void) { int rc = pcmk_rc_ok; CRM_CHECK(cib != NULL, return EINVAL); if (cib->state != cib_disconnected) { return rc; } if (cib->variant == cib_native) { rc = pcmk__pacemakerd_status(out, crm_system_name, options.reconnect_ms / 2, false, &pcmkd_state); if (rc != pcmk_rc_ok) { return rc; } switch (pcmkd_state) { case pcmk_pacemakerd_state_running: case pcmk_pacemakerd_state_remote: case pcmk_pacemakerd_state_shutting_down: /* Fencer and CIB may still be available while shutting down or * running on a Pacemaker Remote node */ break; default: // Fencer and CIB are definitely unavailable return ENOTCONN; } setup_fencer_connection(); } rc = setup_cib_connection(); return rc; } #if CURSES_ENABLED static const char * get_option_desc(char c) { const char *desc = "No help available"; for (GOptionEntry *entry = display_entries; entry != NULL; entry++) { if (entry->short_name == c) { desc = entry->description; break; } } return desc; } #define print_option_help(out, option, condition) \ curses_formatted_printf(out, "%c %c: \t%s\n", ((condition)? '*': ' '), option, get_option_desc(option)); /* This function is called from the main loop when there is something to be read * on stdin, like an interactive user's keystroke. All it does is read the keystroke, * set flags (or show the page showing which keystrokes are valid), and redraw the * screen. It does not do anything with connections to the CIB or fencing agent * agent what would happen in mon_refresh_display. */ static gboolean detect_user_input(GIOChannel *channel, GIOCondition condition, gpointer user_data) { int c; gboolean config_mode = FALSE; while (1) { /* Get user input */ c = getchar(); switch (c) { case 'm': interactive_fence_level++; if (interactive_fence_level > 3) { interactive_fence_level = 0; } set_fencing_options(interactive_fence_level); break; case 'c': show ^= pcmk_section_tickets; break; case 'f': show ^= pcmk_section_failcounts; break; case 'n': show_opts ^= pcmk_show_rscs_by_node; break; case 'o': show ^= pcmk_section_operations; if (!pcmk_is_set(show, pcmk_section_operations)) { show_opts &= ~pcmk_show_timing; } break; case 'r': show_opts ^= pcmk_show_inactive_rscs; break; case 'R': show_opts ^= pcmk_show_details; #ifdef PCMK__COMPAT_2_0 // Keep failed action output the same as 2.0.x show_opts |= pcmk_show_failed_detail; #endif break; case 't': show_opts ^= pcmk_show_timing; if (pcmk_is_set(show_opts, pcmk_show_timing)) { show |= pcmk_section_operations; } break; case 'A': show ^= pcmk_section_attributes; break; case 'L': show ^= pcmk_section_bans; break; case 'D': /* If any header is shown, clear them all, otherwise set them all */ if (pcmk_any_flags_set(show, pcmk_section_summary)) { show &= ~pcmk_section_summary; } else { show |= pcmk_section_summary; } /* Regardless, we don't show options in console mode. */ show &= ~pcmk_section_options; break; case 'b': show_opts ^= pcmk_show_brief; break; case 'j': show_opts ^= pcmk_show_pending; break; case '?': config_mode = TRUE; break; default: /* All other keys just redraw the screen. */ goto refresh; } if (!config_mode) goto refresh; clear(); refresh(); curses_formatted_printf(out, "%s", "Display option change mode\n"); print_option_help(out, 'c', pcmk_is_set(show, pcmk_section_tickets)); print_option_help(out, 'f', pcmk_is_set(show, pcmk_section_failcounts)); print_option_help(out, 'n', pcmk_is_set(show_opts, pcmk_show_rscs_by_node)); print_option_help(out, 'o', pcmk_is_set(show, pcmk_section_operations)); print_option_help(out, 'r', pcmk_is_set(show_opts, pcmk_show_inactive_rscs)); print_option_help(out, 't', pcmk_is_set(show_opts, pcmk_show_timing)); print_option_help(out, 'A', pcmk_is_set(show, pcmk_section_attributes)); print_option_help(out, 'L', pcmk_is_set(show, pcmk_section_bans)); print_option_help(out, 'D', !pcmk_is_set(show, pcmk_section_summary)); #ifdef PCMK__COMPAT_2_0 print_option_help(out, 'R', pcmk_any_flags_set(show_opts, pcmk_show_details & ~pcmk_show_failed_detail)); #else print_option_help(out, 'R', pcmk_any_flags_set(show_opts, pcmk_show_details)); #endif print_option_help(out, 'b', pcmk_is_set(show_opts, pcmk_show_brief)); print_option_help(out, 'j', pcmk_is_set(show_opts, pcmk_show_pending)); curses_formatted_printf(out, "%d m: \t%s\n", interactive_fence_level, get_option_desc('m')); curses_formatted_printf(out, "%s", "\nToggle fields via field letter, type any other key to return\n"); } refresh: refresh_after_event(FALSE, TRUE); return TRUE; } #endif // CURSES_ENABLED // Basically crm_signal_handler(SIGCHLD, SIG_IGN) plus the SA_NOCLDWAIT flag static void avoid_zombies(void) { struct sigaction sa; memset(&sa, 0, sizeof(struct sigaction)); if (sigemptyset(&sa.sa_mask) < 0) { crm_warn("Cannot avoid zombies: %s", pcmk_rc_str(errno)); return; } sa.sa_handler = SIG_IGN; sa.sa_flags = SA_RESTART|SA_NOCLDWAIT; if (sigaction(SIGCHLD, &sa, NULL) < 0) { crm_warn("Cannot avoid zombies: %s", pcmk_rc_str(errno)); } } static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; GOptionEntry extra_prog_entries[] = { { "quiet", 'Q', 0, G_OPTION_ARG_NONE, &(args->quiet), "Be less descriptive in output.", NULL }, { NULL } }; const char *description = "Notes:\n\n" "If this program is called as crm_mon.cgi, --output-as=html --html-cgi will\n" "automatically be added to the command line arguments.\n\n" "Time Specification:\n\n" "The TIMESPEC in any command line option can be specified in many different\n" "formats. It can be just an integer number of seconds, a number plus units\n" "(ms/msec/us/usec/s/sec/m/min/h/hr), or an ISO 8601 period specification.\n\n" "Output Control:\n\n" "By default, a certain list of sections are written to the output destination.\n" "The default varies based on the output format - XML includes everything, while\n" "other output formats will display less. This list can be modified with the\n" "--include and --exclude command line options. Each option may be given multiple\n" "times on the command line, and each can give a comma-separated list of sections.\n" "The options are applied to the default set, from left to right as seen on the\n" "command line. For a list of valid sections, pass --include=list or --exclude=list.\n\n" "Interactive Use:\n\n" "When run interactively, crm_mon can be told to hide and display various sections\n" "of output. To see a help screen explaining the options, hit '?'. Any key stroke\n" "aside from those listed will cause the screen to refresh.\n\n" "Examples:\n\n" "Display the cluster status on the console with updates as they occur:\n\n" "\tcrm_mon\n\n" "Display the cluster status on the console just once then exit:\n\n" "\tcrm_mon -1\n\n" "Display your cluster status, group resources by node, and include inactive resources in the list:\n\n" "\tcrm_mon --group-by-node --inactive\n\n" "Start crm_mon as a background daemon and have it write the cluster status to an HTML file:\n\n" "\tcrm_mon --daemonize --output-as html --output-to /path/to/docroot/filename.html\n\n" "Start crm_mon and export the current cluster status as XML to stdout, then exit:\n\n" "\tcrm_mon --output-as xml\n\n"; #if CURSES_ENABLED context = pcmk__build_arg_context(args, "console (default), html, text, xml", group, NULL); #else context = pcmk__build_arg_context(args, "text (default), html, xml", group, NULL); #endif pcmk__add_main_args(context, extra_prog_entries); g_option_context_set_description(context, description); pcmk__add_arg_group(context, "display", "Display Options:", "Show display options", display_entries); pcmk__add_arg_group(context, "additional", "Additional Options:", "Show additional options", addl_entries); pcmk__add_arg_group(context, "deprecated", "Deprecated Options:", "Show deprecated options", deprecated_entries); return context; } /* If certain format options were specified, we want to set some extra * options. We can just process these like they were given on the * command line. */ static void add_output_args(void) { GError *err = NULL; if (output_format == mon_output_plain) { if (!pcmk__force_args(context, &err, "%s --text-fancy", g_get_prgname())) { g_propagate_error(&error, err); clean_up(CRM_EX_USAGE); } } else if (output_format == mon_output_cgi) { if (!pcmk__force_args(context, &err, "%s --html-cgi", g_get_prgname())) { g_propagate_error(&error, err); clean_up(CRM_EX_USAGE); } } else if (output_format == mon_output_xml) { if (!pcmk__force_args(context, &err, "%s --xml-simple-list --xml-substitute", g_get_prgname())) { g_propagate_error(&error, err); clean_up(CRM_EX_USAGE); } } else if (output_format == mon_output_legacy_xml) { output_format = mon_output_xml; if (!pcmk__force_args(context, &err, "%s --xml-legacy --xml-substitute", g_get_prgname())) { g_propagate_error(&error, err); clean_up(CRM_EX_USAGE); } } } /* Which output format to use could come from two places: The --as-xml * style arguments we gave in deprecated_entries above, or the formatted output * arguments added by pcmk__register_formats. If the latter were used, * output_format will be mon_output_unset. * * Call the callbacks as if those older style arguments were provided so * the various things they do get done. */ static void reconcile_output_format(pcmk__common_args_t *args) { gboolean retval = TRUE; GError *err = NULL; if (output_format != mon_output_unset) { return; } if (pcmk__str_eq(args->output_ty, "html", pcmk__str_casei)) { char *dest = NULL; pcmk__str_update(&dest, args->output_dest); retval = as_html_cb("h", dest, NULL, &err); free(dest); } else if (pcmk__str_eq(args->output_ty, "text", pcmk__str_casei)) { retval = no_curses_cb("N", NULL, NULL, &err); } else if (pcmk__str_eq(args->output_ty, "xml", pcmk__str_casei)) { pcmk__str_update(&args->output_ty, "xml"); output_format = mon_output_xml; } else if (options.one_shot) { pcmk__str_update(&args->output_ty, "text"); output_format = mon_output_plain; } else if (!options.daemonize && args->output_dest != NULL) { options.one_shot = TRUE; pcmk__str_update(&args->output_ty, "text"); output_format = mon_output_plain; } else { /* Neither old nor new arguments were given, so set the default. */ pcmk__str_update(&args->output_ty, "console"); output_format = mon_output_console; } if (!retval) { g_propagate_error(&error, err); clean_up(CRM_EX_USAGE); } } static void clean_up_on_connection_failure(int rc) { if (output_format == mon_output_monitor) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "CLUSTER CRIT: Connection to cluster failed: %s", pcmk_rc_str(rc)); clean_up(MON_STATUS_CRIT); } else if (rc == ENOTCONN) { if (pcmkd_state == pcmk_pacemakerd_state_remote) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error: remote-node not connected to cluster"); } else { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error: cluster is not available on this node"); } } else { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Connection to cluster failed: %s", pcmk_rc_str(rc)); } clean_up(pcmk_rc2exitc(rc)); } static void one_shot(void) { int rc = pcmk__status(out, cib, fence_history, show, show_opts, options.only_node, options.only_rsc, options.neg_location_prefix, output_format == mon_output_monitor, 0); if (rc == pcmk_rc_ok) { clean_up(pcmk_rc2exitc(rc)); } else { clean_up_on_connection_failure(rc); } } static void exit_on_invalid_cib(void) { if (cib != NULL) { return; } // Shouldn't really be possible g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Invalid CIB source"); clean_up(CRM_EX_ERROR); } int main(int argc, char **argv) { int rc = pcmk_rc_ok; GOptionGroup *output_group = NULL; args = pcmk__new_common_args(SUMMARY); context = build_arg_context(args, &output_group); pcmk__register_formats(output_group, formats); options.pid_file = strdup("/tmp/ClusterMon.pid"); pcmk__cli_init_logging("crm_mon", 0); // Avoid needing to wait for subprocesses forked for -E/--external-agent avoid_zombies(); if (pcmk__ends_with_ext(argv[0], ".cgi")) { output_format = mon_output_cgi; options.one_shot = TRUE; } processed_args = pcmk__cmdline_preproc(argv, "ehimpxEILU"); fence_history_cb("--fence-history", "1", NULL, NULL); /* Set an HTML title regardless of what format we will eventually use. This can't * be done in add_output_args. That function is called after command line * arguments are processed in the next block, which means it'll override whatever * title the user provides. Doing this here means the user can give their own * title on the command line. */ if (!pcmk__force_args(context, &error, "%s --html-title \"Cluster Status\"", g_get_prgname())) { return clean_up(CRM_EX_USAGE); } if (!g_option_context_parse_strv(context, &processed_args, &error)) { return clean_up(CRM_EX_USAGE); } for (int i = 0; i < args->verbosity; i++) { crm_bump_log_level(argc, argv); } if (!args->version) { if (args->quiet) { include_exclude_cb("--exclude", "times", NULL, NULL); } if (options.watch_fencing) { fence_history_cb("--fence-history", "0", NULL, NULL); options.fence_connect = TRUE; } /* create the cib-object early to be able to do further * decisions based on the cib-source */ cib = cib_new(); exit_on_invalid_cib(); switch (cib->variant) { case cib_native: // Everything (fencer, CIB, pcmkd status) should be available break; case cib_file: // Live fence history is not meaningful fence_history_cb("--fence-history", "0", NULL, NULL); /* Notifications are unsupported; nothing to monitor * @COMPAT: Let setup_cib_connection() handle this by exiting? */ options.one_shot = TRUE; break; case cib_remote: // We won't receive any fencing updates fence_history_cb("--fence-history", "0", NULL, NULL); break; case cib_undefined: case cib_database: default: /* something is odd */ exit_on_invalid_cib(); break; } if (options.one_shot) { if (output_format == mon_output_console) { output_format = mon_output_plain; } } else if (options.daemonize) { if (pcmk__str_eq(args->output_dest, "-", pcmk__str_null_matches|pcmk__str_casei) && !options.external_agent) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "--daemonize requires at least one of --output-to and --external-agent"); return clean_up(CRM_EX_USAGE); } } else if (output_format == mon_output_console) { #if CURSES_ENABLED crm_enable_stderr(FALSE); #else options.one_shot = TRUE; output_format = mon_output_plain; printf("Defaulting to one-shot mode\n"); printf("You need to have curses available at compile time to enable console mode\n"); #endif } } reconcile_output_format(args); add_output_args(); /* output_format MUST NOT BE CHANGED AFTER THIS POINT. */ if (args->version && output_format == mon_output_console) { /* Use the text output format here if we are in curses mode but were given * --version. Displaying version information uses printf, and then we * immediately exit. We don't want to initialize curses for that. */ rc = pcmk__output_new(&out, "text", args->output_dest, argv); } else { rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); } if (rc != pcmk_rc_ok) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error creating output format %s: %s", args->output_ty, pcmk_rc_str(rc)); return clean_up(CRM_EX_ERROR); } if (options.daemonize) { if (!options.external_agent && (output_format == mon_output_console || output_format == mon_output_unset || output_format == mon_output_none)) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "--daemonize requires --output-as=[html|text|xml]"); return clean_up(CRM_EX_USAGE); } crm_enable_stderr(FALSE); cib_delete(cib); cib = NULL; pcmk__daemonize(crm_system_name, options.pid_file); cib = cib_new(); exit_on_invalid_cib(); } show = default_includes(output_format); /* Apply --include/--exclude flags we used internally. There's no error reporting * here because this would be a programming error. */ apply_include_exclude(options.includes_excludes, &error); /* And now apply any --include/--exclude flags the user gave on the command line. * These are done in a separate pass from the internal ones because we want to * make sure whatever the user specifies overrides whatever we do. */ if (!apply_include_exclude(options.user_includes_excludes, &error)) { return clean_up(CRM_EX_USAGE); } /* Sync up the initial value of interactive_fence_level with whatever was set with * --include/--exclude= options. */ if (pcmk_all_flags_set(show, pcmk_section_fencing_all)) { interactive_fence_level = 3; } else if (pcmk_is_set(show, pcmk_section_fence_worked)) { interactive_fence_level = 2; } else if (pcmk_any_flags_set(show, pcmk_section_fence_failed | pcmk_section_fence_pending)) { interactive_fence_level = 1; } else { interactive_fence_level = 0; } pcmk__register_lib_messages(out); crm_mon_register_messages(out); pe__register_messages(out); stonith__register_messages(out); // Messages internal to this file, nothing curses-specific pcmk__register_messages(out, fmt_functions); if (args->version) { out->version(out, false); return clean_up(CRM_EX_OK); } /* Extra sanity checks when in CGI mode */ if (output_format == mon_output_cgi) { if (cib->variant == cib_file) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "CGI mode used with CIB file"); return clean_up(CRM_EX_USAGE); } else if (options.external_agent != NULL) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "CGI mode cannot be used with --external-agent"); return clean_up(CRM_EX_USAGE); } else if (options.daemonize == TRUE) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "CGI mode cannot be used with -d"); return clean_up(CRM_EX_USAGE); } } if (output_format == mon_output_xml || output_format == mon_output_legacy_xml) { show_opts |= pcmk_show_inactive_rscs | pcmk_show_timing; if (!options.daemonize) { options.one_shot = TRUE; } } if ((output_format == mon_output_html || output_format == mon_output_cgi) && out->dest != stdout) { pcmk__html_add_header("meta", "http-equiv", "refresh", "content", pcmk__itoa(options.reconnect_ms / 1000), NULL); } #ifdef PCMK__COMPAT_2_0 // Keep failed action output the same as 2.0.x show_opts |= pcmk_show_failed_detail; #endif crm_info("Starting %s", crm_system_name); cib__set_output(cib, out); if (options.one_shot) { one_shot(); } out->message(out, "crm-mon-disconnected", "Waiting for initial connection", pcmkd_state); do { out->transient(out, "Connecting to cluster..."); rc = setup_api_connections(); if (rc != pcmk_rc_ok) { if ((rc == ENOTCONN) || (rc == ECONNREFUSED)) { out->transient(out, "Connection failed. Retrying in %ums...", options.reconnect_ms); } // Give some time to view all output even if we won't retry pcmk__sleep_ms(options.reconnect_ms); #if CURSES_ENABLED if (output_format == mon_output_console) { clear(); refresh(); } #endif } } while ((rc == ENOTCONN) || (rc == ECONNREFUSED)); if (rc != pcmk_rc_ok) { clean_up_on_connection_failure(rc); } set_fencing_options(interactive_fence_level); mon_refresh_display(NULL); mainloop = g_main_loop_new(NULL, FALSE); mainloop_add_signal(SIGTERM, mon_shutdown); mainloop_add_signal(SIGINT, mon_shutdown); #if CURSES_ENABLED if (output_format == mon_output_console) { ncurses_winch_handler = crm_signal_handler(SIGWINCH, mon_winresize); if (ncurses_winch_handler == SIG_DFL || ncurses_winch_handler == SIG_IGN || ncurses_winch_handler == SIG_ERR) ncurses_winch_handler = NULL; io_channel = g_io_channel_unix_new(STDIN_FILENO); g_io_add_watch(io_channel, G_IO_IN, detect_user_input, NULL); } #endif /* When refresh_trigger->trigger is set to TRUE, call mon_refresh_display. In * this file, that is anywhere mainloop_set_trigger is called. */ refresh_trigger = mainloop_add_trigger(G_PRIORITY_LOW, mon_refresh_display, NULL); g_main_loop_run(mainloop); g_main_loop_unref(mainloop); if (io_channel != NULL) { g_io_channel_shutdown(io_channel, TRUE, NULL); } crm_info("Exiting %s", crm_system_name); return clean_up(CRM_EX_OK); } static int send_custom_trap(const char *node, const char *rsc, const char *task, int target_rc, int rc, int status, const char *desc) { pid_t pid; /*setenv needs chars, these are ints */ char *rc_s = pcmk__itoa(rc); char *status_s = pcmk__itoa(status); char *target_rc_s = pcmk__itoa(target_rc); crm_debug("Sending external notification to '%s' via '%s'", options.external_recipient, options.external_agent); if(rsc) { setenv("CRM_notify_rsc", rsc, 1); } if (options.external_recipient) { setenv("CRM_notify_recipient", options.external_recipient, 1); } setenv("CRM_notify_node", node, 1); setenv("CRM_notify_task", task, 1); setenv("CRM_notify_desc", desc, 1); setenv("CRM_notify_rc", rc_s, 1); setenv("CRM_notify_target_rc", target_rc_s, 1); setenv("CRM_notify_status", status_s, 1); pid = fork(); if (pid == -1) { crm_perror(LOG_ERR, "notification fork() failed."); } if (pid == 0) { /* crm_debug("notification: I am the child. Executing the nofitication program."); */ execl(options.external_agent, options.external_agent, NULL); exit(CRM_EX_ERROR); } crm_trace("Finished running custom notification program '%s'.", options.external_agent); free(target_rc_s); free(status_s); free(rc_s); return 0; } static int handle_rsc_op(xmlNode *xml, void *userdata) { const char *node_id = (const char *) userdata; int rc = -1; int status = -1; int target_rc = -1; gboolean notify = TRUE; char *rsc = NULL; char *task = NULL; const char *desc = NULL; const char *magic = NULL; const char *id = NULL; const char *node = NULL; xmlNode *n = xml; xmlNode * rsc_op = xml; if(strcmp((const char*)xml->name, XML_LRM_TAG_RSC_OP) != 0) { pcmk__xe_foreach_child(xml, NULL, handle_rsc_op, (void *) node_id); return pcmk_rc_ok; } - id = crm_element_value(rsc_op, XML_LRM_ATTR_TASK_KEY); - if (id == NULL) { - /* Compatibility with <= 1.1.5 */ - id = ID(rsc_op); - } + id = pe__xe_history_key(rsc_op); magic = crm_element_value(rsc_op, XML_ATTR_TRANSITION_MAGIC); if (magic == NULL) { /* non-change */ return pcmk_rc_ok; } if (!decode_transition_magic(magic, NULL, NULL, NULL, &status, &rc, &target_rc)) { crm_err("Invalid event %s detected for %s", magic, id); return pcmk_rc_ok; } if (parse_op_key(id, &rsc, &task, NULL) == FALSE) { crm_err("Invalid event detected for %s", id); goto bail; } node = crm_element_value(rsc_op, XML_LRM_ATTR_TARGET); while (n != NULL && !pcmk__str_eq(XML_CIB_TAG_STATE, TYPE(n), pcmk__str_casei)) { n = n->parent; } if(node == NULL && n) { node = crm_element_value(n, XML_ATTR_UNAME); } if (node == NULL && n) { node = ID(n); } if (node == NULL) { node = node_id; } if (node == NULL) { crm_err("No node detected for event %s (%s)", magic, id); goto bail; } /* look up where we expected it to be? */ desc = pcmk_rc_str(pcmk_rc_ok); if ((status == PCMK_EXEC_DONE) && (target_rc == rc)) { crm_notice("%s of %s on %s completed: %s", task, rsc, node, desc); if (rc == PCMK_OCF_NOT_RUNNING) { notify = FALSE; } } else if (status == PCMK_EXEC_DONE) { desc = services_ocf_exitcode_str(rc); crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc); } else { desc = pcmk_exec_status_str(status); crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc); } if (notify && options.external_agent) { send_custom_trap(node, rsc, task, target_rc, rc, status, desc); } bail: free(rsc); free(task); return pcmk_rc_ok; } /* This function is just a wrapper around mainloop_set_trigger so that it can be * called from a mainloop directly. It's simply another way of ensuring the screen * gets redrawn. */ static gboolean mon_trigger_refresh(gpointer user_data) { mainloop_set_trigger((crm_trigger_t *) refresh_trigger); return FALSE; } static int handle_op_for_node(xmlNode *xml, void *userdata) { const char *node = crm_element_value(xml, XML_ATTR_UNAME); if (node == NULL) { node = ID(xml); } handle_rsc_op(xml, (void *) node); return pcmk_rc_ok; } static void crm_diff_update_v2(const char *event, xmlNode * msg) { xmlNode *change = NULL; xmlNode *diff = get_message_xml(msg, F_CIB_UPDATE_RESULT); for (change = pcmk__xml_first_child(diff); change != NULL; change = pcmk__xml_next(change)) { const char *name = NULL; const char *op = crm_element_value(change, XML_DIFF_OP); const char *xpath = crm_element_value(change, XML_DIFF_PATH); xmlNode *match = NULL; const char *node = NULL; if(op == NULL) { continue; } else if(strcmp(op, "create") == 0) { match = change->children; } else if(strcmp(op, "move") == 0) { continue; } else if(strcmp(op, "delete") == 0) { continue; } else if(strcmp(op, "modify") == 0) { match = first_named_child(change, XML_DIFF_RESULT); if(match) { match = match->children; } } if(match) { name = (const char *)match->name; } crm_trace("Handling %s operation for %s %p, %s", op, xpath, match, name); if(xpath == NULL) { /* Version field, ignore */ } else if(name == NULL) { crm_debug("No result for %s operation to %s", op, xpath); CRM_ASSERT(strcmp(op, "delete") == 0 || strcmp(op, "move") == 0); } else if(strcmp(name, XML_TAG_CIB) == 0) { pcmk__xe_foreach_child(first_named_child(match, XML_CIB_TAG_STATUS), NULL, handle_op_for_node, NULL); } else if(strcmp(name, XML_CIB_TAG_STATUS) == 0) { pcmk__xe_foreach_child(match, NULL, handle_op_for_node, NULL); } else if(strcmp(name, XML_CIB_TAG_STATE) == 0) { node = crm_element_value(match, XML_ATTR_UNAME); if (node == NULL) { node = ID(match); } handle_rsc_op(match, (void *) node); } else if(strcmp(name, XML_CIB_TAG_LRM) == 0) { node = ID(match); handle_rsc_op(match, (void *) node); } else if(strcmp(name, XML_LRM_TAG_RESOURCES) == 0) { char *local_node = pcmk__xpath_node_id(xpath, "lrm"); handle_rsc_op(match, local_node); free(local_node); } else if(strcmp(name, XML_LRM_TAG_RESOURCE) == 0) { char *local_node = pcmk__xpath_node_id(xpath, "lrm"); handle_rsc_op(match, local_node); free(local_node); } else if(strcmp(name, XML_LRM_TAG_RSC_OP) == 0) { char *local_node = pcmk__xpath_node_id(xpath, "lrm"); handle_rsc_op(match, local_node); free(local_node); } else { crm_trace("Ignoring %s operation for %s %p, %s", op, xpath, match, name); } } } static void crm_diff_update_v1(const char *event, xmlNode * msg) { /* Process operation updates */ xmlXPathObject *xpathObj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_LRM_TAG_RSC_OP); int lpc = 0, max = numXpathResults(xpathObj); for (lpc = 0; lpc < max; lpc++) { xmlNode *rsc_op = getXpathResult(xpathObj, lpc); handle_rsc_op(rsc_op, NULL); } freeXpathObject(xpathObj); } static void crm_diff_update(const char *event, xmlNode * msg) { int rc = -1; static bool stale = FALSE; gboolean cib_updated = FALSE; xmlNode *diff = get_message_xml(msg, F_CIB_UPDATE_RESULT); out->progress(out, false); if (current_cib != NULL) { rc = xml_apply_patchset(current_cib, diff, TRUE); switch (rc) { case -pcmk_err_diff_resync: case -pcmk_err_diff_failed: crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc); free_xml(current_cib); current_cib = NULL; break; case pcmk_ok: cib_updated = TRUE; break; default: crm_notice("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc); free_xml(current_cib); current_cib = NULL; } } if (current_cib == NULL) { crm_trace("Re-requesting the full cib"); cib->cmds->query(cib, NULL, ¤t_cib, cib_scope_local | cib_sync_call); } if (options.external_agent) { int format = 0; crm_element_value_int(diff, "format", &format); switch(format) { case 1: crm_diff_update_v1(event, msg); break; case 2: crm_diff_update_v2(event, msg); break; default: crm_err("Unknown patch format: %d", format); } } if (current_cib == NULL) { if(!stale) { out->info(out, "--- Stale data ---"); } stale = TRUE; return; } stale = FALSE; refresh_after_event(cib_updated, FALSE); } static int mon_refresh_display(gpointer user_data) { int rc = pcmk_rc_ok; last_refresh = time(NULL); if (output_format == mon_output_none || output_format == mon_output_unset) { return G_SOURCE_REMOVE; } if (fence_history == pcmk__fence_history_full && !pcmk_all_flags_set(show, pcmk_section_fencing_all) && output_format != mon_output_xml) { fence_history = pcmk__fence_history_reduced; } // Get an up-to-date pacemakerd status for the cluster summary if (cib->variant == cib_native) { pcmk__pacemakerd_status(out, crm_system_name, options.reconnect_ms / 2, false, &pcmkd_state); } if (out->dest != stdout) { out->reset(out); } rc = pcmk__output_cluster_status(out, st, cib, current_cib, pcmkd_state, fence_history, show, show_opts, options.only_node,options.only_rsc, options.neg_location_prefix, output_format == mon_output_monitor); if (output_format == mon_output_monitor && rc != pcmk_rc_ok) { clean_up(MON_STATUS_WARN); return G_SOURCE_REMOVE; } else if (rc == pcmk_rc_schema_validation) { clean_up(CRM_EX_CONFIG); return G_SOURCE_REMOVE; } if (out->dest != stdout) { out->finish(out, CRM_EX_OK, true, NULL); } return G_SOURCE_CONTINUE; } /* This function is called for fencing events (see setup_fencer_connection() for * which ones) when --watch-fencing is used on the command line */ static void mon_st_callback_event(stonith_t * st, stonith_event_t * e) { if (st->state == stonith_disconnected) { /* disconnect cib as well and have everything reconnect */ mon_cib_connection_destroy(NULL); } else if (options.external_agent) { char *desc = stonith__event_description(e); send_custom_trap(e->target, NULL, e->operation, pcmk_ok, e->result, 0, desc); free(desc); } } /* Cause the screen to be redrawn (via mainloop_set_trigger) when various conditions are met: * * - If the last update occurred more than reconnect_ms ago (defaults to 5s, but * can be changed via the -i command line option), or * - After every 10 CIB updates, or * - If it's been 2s since the last update * * This function sounds like it would be more broadly useful, but it is only called when a * fencing event is received or a CIB diff occurrs. */ static void refresh_after_event(gboolean data_updated, gboolean enforce) { static int updates = 0; time_t now = time(NULL); if (data_updated) { updates++; } if(refresh_timer == NULL) { refresh_timer = mainloop_timer_add("refresh", 2000, FALSE, mon_trigger_refresh, NULL); } if (reconnect_timer > 0) { /* we will receive a refresh request after successful reconnect */ mainloop_timer_stop(refresh_timer); return; } /* as we're not handling initial failure of fencer-connection as * fatal give it a retry here * not getting here if cib-reconnection is already on the way */ setup_fencer_connection(); if (enforce || ((now - last_refresh) > (options.reconnect_ms / 1000)) || updates >= 10) { mainloop_set_trigger((crm_trigger_t *) refresh_trigger); mainloop_timer_stop(refresh_timer); updates = 0; } else { mainloop_timer_start(refresh_timer); } } /* This function is called for fencing events (see setup_fencer_connection() for * which ones) when --watch-fencing is NOT used on the command line */ static void mon_st_callback_display(stonith_t * st, stonith_event_t * e) { if (st->state == stonith_disconnected) { /* disconnect cib as well and have everything reconnect */ mon_cib_connection_destroy(NULL); } else { out->progress(out, false); refresh_after_event(TRUE, FALSE); } } /* * De-init ncurses, disconnect from the CIB manager, disconnect fencing, * deallocate memory and show usage-message if requested. * * We don't actually return, but nominally returning crm_exit_t allows a usage * like "return clean_up(exit_code);" which helps static analysis understand the * code flow. */ static crm_exit_t clean_up(crm_exit_t exit_code) { /* Quitting crm_mon is much more complicated than it ought to be. */ /* (1) Close connections, free things, etc. */ cib__clean_up_connection(&cib); stonith_api_delete(st); free(options.neg_location_prefix); free(options.only_node); free(options.only_rsc); free(options.pid_file); g_slist_free_full(options.includes_excludes, free); g_strfreev(processed_args); /* (2) If this is abnormal termination and we're in curses mode, shut down * curses first. Any messages displayed to the screen before curses is shut * down will be lost because doing the shut down will also restore the * screen to whatever it looked like before crm_mon was started. */ if ((error != NULL || exit_code == CRM_EX_USAGE) && output_format == mon_output_console) { out->finish(out, exit_code, false, NULL); pcmk__output_free(out); out = NULL; } /* (3) If this is a command line usage related failure, print the usage * message. */ if (exit_code == CRM_EX_USAGE && (output_format == mon_output_console || output_format == mon_output_plain)) { char *help = g_option_context_get_help(context, TRUE, NULL); fprintf(stderr, "%s", help); g_free(help); } pcmk__free_arg_context(context); /* (4) If this is any kind of error, print the error out and exit. Make * sure to handle situations both before and after formatted output is * set up. We want errors to appear formatted if at all possible. */ if (error != NULL) { if (out != NULL) { out->err(out, "%s: %s", g_get_prgname(), error->message); out->finish(out, exit_code, true, NULL); pcmk__output_free(out); } else { fprintf(stderr, "%s: %s\n", g_get_prgname(), error->message); } g_clear_error(&error); crm_exit(exit_code); } /* (5) Print formatted output to the screen if we made it far enough in * crm_mon to be able to do so. */ if (out != NULL) { if (!options.daemonize) { out->finish(out, exit_code, true, NULL); } pcmk__output_free(out); pcmk__unregister_formats(); } crm_exit(exit_code); }