diff --git a/ChangeLog b/ChangeLog index d70edbd0bc..e4458908f4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,2005 +1,2219 @@ +* Wed Jun 24 2015 Andrew Beekhof Pacemaker-1.1.13-1 +- Update source tarball to revision: 2a1847e +- Changesets: 750 +- Diff: 156 files changed, 11323 insertions(+), 3725 deletions(-) + +- Features added since Pacemaker-1.1.12 + + Allow fail-counts to be removed en-mass when the new attrd is in operation + + attrd supports private attributes (not written to CIB) + + crmd: Ensure a watchdog device is in use if stonith-watchdog-timeout is configured + + crmd: If configured, trigger the watchdog immediately if we loose quorum and no-quorum-policy=suicide + + crm_diff: Support generating a difference without versions details if --no-version/-u is supplied + + crm_resource: Implement an intelligent restart capability + + Fencing: Advertise the watchdog device for fencing operations + + Fencing: Allow the cluster to recover resources if the watchdog is in use + + fencing: cl#5134 - Support random fencing delay to avoid double fencing + + mcp: Allow orphan children to initiate node panic via SIGQUIT + + mcp: Turn on sbd integration if pacemakerd finds it running + + mcp: Two new error codes that result in machine reset or power off + + Officially support the resource-discovery attribute for location constraints + + PE: Allow natural ordering of colocation sets + + PE: Support non-actionable degraded mode for OCF + + pengine: cl#5207 - Display "UNCLEAN" for resources running on unclean offline nodes + + remote: pcmk remote client tool for use with container wrapper script + + Support machine panics for some kinds of errors (via sbd if available) + + tools: add crm_resource --wait option + + tools: attrd_updater supports --query and --all options + + tools: attrd_updater: Allow attributes to be set for other nodes + +- Changes since Pacemaker-1.1.12 + + pengine: exclusive discovery implies rsc is only allowed on exclusive subset of nodes + + acl: Correctly implement the 'reference' acl directive + + acl: Do not delay evaluation of added nodes in some situations + + attrd: b22b1fe did uuid test too early + + attrd: Clean out the node cache when requested by the admin + + attrd: fixes double free in attrd legacy + + attrd: properly write attributes for peers once uuid is discovered + + attrd: refresh should force an immediate write-out of all attributes + + attrd: Simplify how node deletions happen + + Bug rhbz#1067544 - Tools: Correctly handle --ban, --move and --locate for master/slave groups + + Bug rhbz#1181824 - Ensure the DC can be reliably fenced + + cib: Ability to upgrade cib validation schema in legacy mode + + cib: Always generate digests for cib diffs in legacy mode + + cib: assignment where comparison intended + + cib: Avoid nodeid conflicts we don't care about + + cib: Correctly add "update-origin", "update-client" and "update-user" attributes for cib + + cib: Correctly set up signal handlers + + cib: Correctly track node state + + cib: Do not update on disk backups if we're just querying them + + cib: Enable cib legacy mode for plugin-based clusters + + cib: Ensure file-based backends treat '-o section' consistently with the native backend + + cib: Ensure upgrade operations from a non-DC get an acknowledgement + + cib: No need to enforce cib digests for v2 diffs in legacy mode + + cib: Revert d153b86 to instantly get cib synchronized in legacy mode + + cib: tls sock cleanup for remote cib connections + + cli: Ensure subsequent unknown long options are correctly detected + + cluster: Invoke crm_remove_conflicting_peer() only when the new node's uname is being assigned in the node cache + + common: Increment current and age for lib common as a result of APIs being added + + corosync: Bug cl#5232 - Somewhat gracefully handle nodes with invalid UUIDs + + corosync: Avoid unnecessary repeated CMAP API calls + + crmd/pengine: handle on-fail=ignore properly + + crmd: Add "on_node" attribute for *_last_failure_0 lrm resource operations + + crmd: All peers need to track node shutdown requests + + crmd: Cached copies of transient attributes cease to be valid once a node leaves the membership + + crmd: Correctly add the local option that validates against schema for pengine to calculate + + crmd: Disable debug logging that results in significant overhead + + crmd: do not remove connection resources during re-probe + + crmd: don't update fail count twice for same failure + + crmd: Ensure remote connection resources timeout properly during 'migrate_from' action + + crmd: Ensure throttle_mode() does something on Linux + + crmd: Fixes crash when remote connection migration fails + + crmd: gracefully handle remote node disconnects during op execution + + crmd: Handle remote connection failures while executing ops on remote connection + + crmd: include remote nodes when forcing cluster wide resource reprobe + + crmd: never stop recurring monitor ops for pcmk remote during incomplete migration + + crmd: Prevent the old version of DC from being fenced when it shuts down for rolling-upgrade + + crmd: Prevent use-of-NULL during reprobe + + crmd: properly update job limit for baremetal remote-nodes + + crmd: Remote-node throttle jobs count towards cluster-node hosting conneciton rsc + + crmd: Reset stonith failcount to recover transitioner when the node rejoins + + crmd: resolves memory leak in crmd. + + crmd: respect start-failure-is-fatal even for artifically injected events + + crmd: Wait for all pending operations to complete before poking the policy engine + + crmd: When container's host is fenced, cancel in-flight operations + + crm_attribute: Correctly update config options when -o crm_config is specified + + crm_failcount: Better error reporting when no resource is specified + + crm_mon: add exit reason to resource failure output + + crm_mon: Fill CRM_notify_node in traps with node's uname rather than node's id if possible + + crm_mon: Repair notification delivery when the v2 patch format is in use + + crm_node: Correctly remove nodes from the CIB by nodeid + + crm_report: More patterns for finding logs on non-DC nodes + + crm_resource: Allow resource restart operations to be node specific + + crm_resource: avoid deletion of lrm cache on node with resource discovery disabled. + + crm_resource: Calculate how long to wait for a restart based on the resource timeouts + + crm_resource: Clean up memory in --restart error paths + + crm_resource: Display the locations of all anonymous clone children when supplying the children's common ID + + crm_resource: Ensure --restart sets/clears meta attributes + + crm_resource: Ensure fail-counts are purged when we redetect the state of all resources + + crm_resource: Implement --timeout for resource restart operations + + crm_resource: Include group members when calculating the next timeout + + crm_resource: Memory leak in error paths + + crm_resource: Prevent use-after-free + + crm_resource: Repair regression test outputs + + crm_resource: Use-after-free when restarting a resource + + dbus: ref count leaks + + dbus: Ensure both the read and write queues get dispatched + + dbus: Fail gracefully if malloc fails + + dbus: handle dispatch queue when multiple replies need to be processed + + dbus: Notice when dbus connections get disabled + + dbus: Remove double-free introduced while trying to make coverity shut up + + ensure if B is colocated with A, B can never run without A + + fence_legacy: Avoid passing 'port' to cluster-glue agents + + fencing: Allow nodes to be purged from the member cache + + fencing: Correctly make args for fencing agents + + fencing: Correctly wait for self-fencing to occur when the watchdog is in use + + fencing: Ensure the hostlist parameter is set for watchdog agents + + fencing: Force 'stonith-ng' as the system name + + fencing: Gracefully handle invalid metadata from agents + + fencing: If configured, wait stonith-watchdog-timer seconds for self-fencing to complete + + fencing: Reject actions for devices that haven't been explicitly registered yet + + ipc: properly allocate server enforced buffer size on client + + ipc: use server enforced buffer during ipc client send + + lrmd, services: interpret LSB status codes properly + + lrmd: add back support for class heartbeat agents + + lrmd: cancel pending async connection during disconnect + + lrmd: enable ipc proxy for docker-wrapper privileged mode + + lrmd: fix rescheduling of systemd monitor op during start + + lrmd: Handle systemd reporting 'done' before a resource is actually stopped + + lrmd: Hint to child processes that using sd_notify is not required + + lrmd: Log with the correct personality + + lrmd: Prevent glib assert triggered by timers being removed from mainloop more than once + + lrmd: report original timeout when systemd operation completes + + lrmd: store failed operation exit reason in cib + + mainloop: resolves race condition mainloop poll involving modification of ipc connections + + make targetted reprobe for remote node work, crm_resource -C -N + + mcp: Allow a configurable delay when debugging shutdown issues + + mcp: Avoid requiring 'export' for SYS-V sysconfig options + + Membership: Detect and resolve nodes that change their ID + + pacemakerd: resolves memory leak of xml structure in pacemakerd + + pengine: ability to launch resources in isolated containers + + pengine: add #kind=remote for baremetal remote-nodes + + pengine: allow baremetal remote-nodes to recover without requiring fencing when cluster-node fails + + pengine: allow remote-nodes to be placed in maintenance mode + + pengine: Avoid trailing whitespaces when printing resource state + + pengine: cl#5130 - Choose nodes capable of running all the colocated utilization resources + + pengine: cl#5130 - Only check the capacities of the nodes that are allowed to run the resource + + pengine: Correctly compare feature set to determine how to unpack meta attributes + + pengine: disable migrations for resources with isolation containers + + pengine: disable reloading of resources within isolated container wrappers + + pengine: Do not aggregate children in a pending state into the started/stopped/etc lists + + pengine: Do not record duplicate copies of the failed actions + + pengine: Do not reschedule monitors that are no longer needed while resource definitions have changed + + pengine: Fence baremetal remote when recurring monitor op fails + + pengine: Fix colocation with unmanaged resources + + pengine: Fix the behaviors of multi-state resources with asymmetrical ordering + + pengine: fixes pengine crash with orphaned remote node connection resource + + pengine: fixes segfault caused by malformed log warning + + pengine: handle cloned isolated resources in a sane way + + pengine: handle isolated resource scenario, cloned group of isolated resources + + pengine: Handle ordering between stateful and migratable resources + + pengine: imply stop in container node resources when host node is fenced + + pengine: only fence baremetal remote when connection can fails or can not be recovered + + pengine: only kill process group on timeout when on-fail does not equal block. + + pengine: per-node control over resource discovery + + pengine: prefer migration target for remote node connections + + pengine: prevent disabling rsc discovery per node in certain situations + + pengine: Prevent use-after-free in sort_rsc_process_order() + + pengine: properly handle ordering during remote connection partial migration + + pengine: properly recover remote-nodes when cluster-node proxy goes offline + + pengine: remove unnecessary whitespace from notify environment variables + + pengine: require-all feature for ordered clones + + pengine: Resolve memory leaks + + pengine: resource discovery mode for location constraints + + pengine: restart master instances on instance attribute changes + + pengine: Turn off legacy unpacking of resource options into the meta hashtable + + pengine: Watchdog integration is sufficient for fencing + + Perform systemd reloads asynchronously + + ping: Correctly advertise multiplier default + + Prefer to inherit the watchdog timeout from SBD + + properly record stop args after reload + + provide fake meta data for ra class heartbeat + + remote: report timestamps for remote connection resource operations + + remote: Treat recv msg timeout as a disconnect + + service: Prevent potential use-of-NULL in metadata lookups + + solaris: Allow compilation when dirent.d_type is not available + + solaris: Correctly replace the linux swab functions + + solaris: Disable throttling since /proc doesn't exist + + stonith-ng: Correctly observe the watchdog completion timeout + + stonith-ng: Correctly track node state + + stonith-ng: Reset mainloop source IDs after removing them + + systemd: Correctly handle long running stop actions + + systemd: Ensure failed monitor operations always return + + systemd: Ensure we don't call dbus_message_unref() with NULL + + systemd: fix crash caused when canceling in-flight operation + + systemd: Kindly ask dbus NOT to kill the process if the dbus connection fails + + systemd: Perform actions asynchronously + + systemd: Perform monitor operations without blocking + + systemd: Tell systemd not to take DBus down from underneath us + + systemd: Trick systemd into not stopping our services before us during shutdown + + tools: Improve crm_mon output with certain option combinations + + upstart: Monitor actions always return 'ok' or 'not running' + + upstart: Perform more parts of monitor operations without blocking + + xml: add 'require-all' to xml schema for constraints + + xml: cl#5231 - Unset the deleted attributes in the resulting diffs + + xml: Clone the latest constraint schema in preparation for changes" + + xml: Correctly create v1 patchsets when deleting attributes + + xml: Do not change the ordering of properties when applying v1 cib diffs + + xml: Do not dump deleted attributes + + xml: Do not prune leaves from v1 cib diffs that are being created with digests + + xml: Ensure ACLs are reapplied before calculating what a replace operation changed + + xml: Fix upgrade-1.3.xsl to correctly transform ACL rules with "attribute" + + xml: Prevent assert errors in crm_element_value() on applying a patch without version information + + xml: Prevent potential use-of-NULL + + * Tue Jul 22 2014 Andrew Beekhof Pacemaker-1.1.12-1 - Update source tarball to revision: 93a037d - Changesets: 795 - Diff: 195 files changed, 13772 insertions(+), 6176 deletions(-) - Features added since Pacemaker-1.1.11 + Changes to the ACL schema to support nodes and unix groups + cib: Check ACLs prior to making the update instead of parsing the diff afterwards + cib: Default ACL support to on + cib: Enable the more efficient xml patchset format + cib: Implement zero-copy status update + cib: Send all r/w operations via the cluster connection and have all nodes process them + crmd: Set "cluster-name" property to corosync's "cluster_name" by default for corosync-2 + crm_mon: Display brief output if "-b/--brief" is supplied or 'b' is toggled + crm_report: Allow ssh alternatives to be used + crm_ticket: Support multiple modifications for a ticket in an atomic operation + extra: Add logrotate configuration file for /var/log/pacemaker.log + Fencing: Add the ability to call stonith_api_time() from stonith_admin + logging: daemons always get a log file, unless explicitly set to configured 'none' + logging: allows the user to specify a log level that is output to syslog + PE: Automatically re-unfence a node if the fencing device definition changes + pengine: cl#5174 - Allow resource sets and templates for location constraints + pengine: Support cib object tags + pengine: Support cluster-specific instance attributes based on rules + pengine: Support id-ref in nvpair with optional "name" + pengine: Support per-resource maintenance mode + pengine: Support site-specific instance attributes based on rules + tools: Allow crm_shadow to create older configuration versions + tools: Display pending state in crm_mon/crm_resource/crm_simulate if --pending/-j is supplied (cl#5178) + xml: Add the ability to have lightweight schema revisions + xml: Enable resource sets in location constraints for 1.2 schema + xml: Support resources that require unfencing - Changes since Pacemaker-1.1.11 + acl: Authenticate pacemaker-remote requests with the node name as the client + acl: Read access must be explicitly granted + attrd: Ensure attribute dampening is always observed + attrd: Remove offline nodes from node cache for "peer-remove" requests + Bug cl#5055 - Improved migration support. + Bug cl#5184 - Ensure pending probes that ultimately fail are correctly updated + Bug cl#5196 - pengine: Check values after expanding templates + Bug cl#5212 - Do not promote instances when quorum is lots and no-quorum-policy=freeze + Bug cl#5213 - Ensure role colocation with -INFINITY is enforced + Bug cl#5213 - Limit the scope of the previous commit to the masters role + Bug cl#5219 - pengine: Allow unrelated resources with a common colocation target to remain promoted + Bug cl#5222 - cib: Repair rolling update capability + Bug cl#5222 - Enable legacy mode whenever a broadcast update is detected + Bug rhbz#1036631 - Stop members of cloned groups when dependancies are stopped + Bug rhbz#1054307 - cname pattern match should be more restrictive in init script + Bug rhbz#1057697 - Use native DBus library for systemd/upstart support to avoid problematic use of threads + Bug rhbz#1097457 - Limit the scope of the previous fix and include a helpful comment + Bug rhbz#1097457 - Prevent invalid transition when resource are ordered to start after the container they're started in + cib: allow setting permanent remote-node attributes + cib: Auto-detect which patchset format to use + cib: Determine the best value of validate-with if one is not supplied + cib: Do not disable cib disk writes if on-disk cib is corrupt + cib: Ensure 'cibadmin -R/--replace' commands get replies + cib: Erasing the cib is an admin action, bump the admin_epoch instead + cib: Fix remote cib based on TLS + cib: Ingore patch failures if we already have their contents + cib: Validate that everyone still sees the same configuration once all updates have completed + cibadmin: Allow priviliged clients to perform tasks as unpriviliged users + cibadmin: Remove dangerous commands that exposed unnecessary implementation internal details + cluster: Fix segfault on removing a node + cluster: Prevent search of unames from attempting to create node entries for unknown nodes + cluster: Remove unknown offline nodes with conflicting unames from node cache + controld: Do not consider the dlm up until the address list is present + controld: handling startup fencing within the controld agent, not the dlm + controld: Return OCF_ERR_INSTALLED instead of OCF_NOT_INSTALLED + crmd: Ack pending operations that were cancelled due to rsc deletion + crmd: Actions can only be executed if their pre-requisits completed successfully + crmd: avoid double free caused by nested hash table removal + crmd: Avoid spamming the cib by triggering a transition only once per non-status change + crmd: Correctly react to successful unfencing operations + crmd: Correctly recognise operation cancellations we initiated + crmd: Do not erase the status section for unfenced nodes + crmd: Do not overwrite existing node state when fencing completes + crmd: Do not start timers for already completed operations + crmd: Ensure crm_config options are re-read on updates + crmd: Fenced nodes that return prior to an election do not need to have their status section reset + crmd: make lrm_state hash table not case sensitive + crmd: make node_state erase correctly + crmd: Only write fence_averride if open() returns a positive file descriptor + crmd: Prevent manual fencing confirmations from attempting to create node entries for unknown nodes + crmd: Prevent SIGPIPE when notifying CMAN about fencing operations + crmd: Remove state of unknown nodes with conflicting unames from CIB + crmd: Remove unknown nodes with conflicting unames from CIB + crmd: Report unsuccessful unfencing operations + crm_diff: Allow the generation of xml patchsets without digests + crm_mon: Allow the file created by --as-html to be world readable + crm_mon: Ensure resource attributes have been unpacked before displaying connectivity data + crm_node: Only remove the named resource from the cib + crm_report: Gracefully handle rediculously large logfiles + crm_report: Only gather dlm data if dlm_controld is running + crm_resource: Gracefully handle -EACCESS when querying the cib + crm_verify: Perform a full set of calculations whenever the status section is present + fencing: Advertise support for reboot/on/off in the metadata for legacy agents + fencing: Automatically switch from 'list' to 'status' to 'static-list' if those actions are not advertised in the metadata + fencing: Cache metadata lookups to avoid repeated blocking during device registration + fencing: Correctly record which peer performed the fencing operation + fencing: default to 'off' when agent does not advertise 'reboot' in metadata + fencing: Do not unregister/register all stonith devices on every resource agent change + fencing: Execute all required fencing devices regardless of what topology level they are at + fencing: Fence using all required devices + fencing: Pass the correct options when looking up the history by node name + fencing: Update stonith device list only if stonith is enabled + get_cluster_type: failing concurrent tool invocations on heartbeat + ignore SIGPIPE when gnutls is in use + iso8601: Different logic is needed when logging and calculating durations + iso8601: Fix memory leak in duration calculation + Logging: Bootstrap daemon logging before processing arguments but configure it afterwards + lrmd: Cancel recurring operations before stop action is executed + lrmd: Expose logging variables expected by OCF agents + lrmd: Handle systemd reporting 'done' before a resource is actually stopped/started + lrmd: Merge duplicate recurring monitor operations + lrmd: Prevent OCF agents from logging to random files due to "value" of setenv() being NULL + lrmd: Provide stderr output from agents if available, otherwise fall back to stdout + mainloop: Better handle the killing of processes in the act of exiting + mainloop: Canceling in-flight operations should not fail if child process has already exited. + mainloop: Fixes use after free in process monitor code + mcp: Tell systemd not to respawn us if we exit with rc=100 + membership: Avoid duplicate peer entries in the peer cache + pengine: Allow container nodes to migrate with connection resource + pengine: avoid assert by searching for stop action on correct node during LogActions + pengine: Block restart of resources if any dependent resource in a group is unmanaged + pengine: cl#5186 - Avoid running rsc on two nodes when node is fenced during migration + pengine: cl#5187 - Prevent resources in an anti-colocation from even temporarily running on a same node + pengine: cl#5200 - Before migrating utilization-using resources to a node, take off the load that will no longer run there if it's not introducing transition loop + pengine: Correctly handle origin offsets in the future + pengine: Correctly observe requires=nothing + pengine: Default sequential to TRUE for resource sets for consistency with colocation sets + pengine: Delay unfencing until after we know the state of all resources that require unfencing + pengine: Do not initiate fencing for unclean nodes when fencing is disabled + pengine: Ensure instance numbers are preserved for cloned templates + pengine: Ensure unfencing only happens once, even if the transition is interrupted + pengine: Fencing devices default to only requiring quorum in order to start + pengine: fixes invalid transition caused by clones with more than 10 instances + pengine: Force record pending for migrate_to actions + pengine: handles edge case where container order constraints are not honored during migration + pengine: Ignore failure-timeout only if the failed operation has on-fail="block" + pengine: Mark unrunnable stop actions as "blocked" and show the correct current locations + pengine: Memory leaks + pengine: properly handle fencing of container remote-nodes when the container is orphaned + pengine: properly place resource within a container when container is a remote-node. + pengine: Unfencing is based on device probes, there is no need to unfence when normal resources are found active + pengine: Use "#cluster-name" in rules for setting cluster-specific instance attributes + pengine: Use "#site-name" in rules for setting site-specific instance attributes + remote: Allow baremetal remote-node connection resources to migrate + remote: clear remote-node status correctly + remote: Enable migration support for baremetal connection resources by default + remote: Handle request/response ipc proxy correctly + services: Correctly reset the nice value for lrmd's children + services: Do not allow duplicate recurring op entries + services: Do not block synced service executions + services: Fixes segfault associated with cancelling in-flight recurring operations. + services: Remove cancelled recurring ops from internal lists as early as possible + services: Remove file descriptors from mainloop as soon as we have drained them + services: Reset the scheduling policy and priority for lrmd's children without replying on SCHED_RESET_ON_FORK + services_action_cancel: Interpret return code from mainloop_child_kill() correctly + stonith_admin: Ensure pointers passed to sscanf() are properly initialized + stonith_api_time_helper now returns when the most recent fencing operation completed + systemd: Prevent use-of-NULL when determining if an agent exists + systemd: Try to handle dbus actions that complete prior to configuring a callback + Tools: Non-daemons shouldn't abort just because xml parsing failed + Upstart: Allow comilation with glib versions older than 2.28 + Upstart: Do not attempt upstart jobs if we cannot connect to dbus + When data was old, it fixed so that the newest cib might not be acquired. + xml: Check all available schemas when doing upgrades + xml: Correctly determine the lowest allowed schema version + xml: Correctly enforce ACLs after a replace operation + xml: Correctly infer attribute changes after a replace operation + xml: Create the correct diff when only part of a document is changed + xml: Detect attribute ordering changes + xml: Detect content that is added and removed in the same update + xml: Do not prune meaningful leaves from v1 patchsets + xml: Empty patchsets are considered to have applied cleanly + xml: Ensure patches always have version details set + xml: Find the minimal set of changes when part of a document is replaced + xml: If validate-with is missing, we find the most recent schema that accepts it and go from there + xml: Introduce a 'move' primitive for v2 patch sets + xml: Preserve the attribute order in the patch for subsequent digest validation + xml: Resolve memory leak when logging xml blobs + xml: Update xml validation to allow '' * Thu Feb 13 2014 David Vossel Pacemaker-1.1.11-1 - Update source tarball to revision: 33f9d09 - Changesets: 462 - Diff: 147 files changed, 6810 insertions(+), 4057 deletions(-) - Features added since Pacemaker-1.1.10 + attrd: A truly atomic version of attrd for use where CPG is used for cluster communication + cib: Allow values to be added/updated and removed in a single update + cib: Support XML comments in diffs + Core: Allow blackbox logging to be disabled with SIGUSR2 + crmd: Do not block on proxied calls from pacemaker_remoted + crmd: Enable cluster-wide throttling when the cib heavily exceeds its target load + crmd: Make the per-node action limit directly configurable in the CIB + crmd: Slow down recovery on nodes with IO load + crmd: Track CPU usage on cluster nodes and slow down recovery on nodes with high CPU/IO load + crm_mon: add --hide-headers option to hide all headers + crm_node: Display partition output in sorted order + crm_report: Collect logs directly from journald if available + Fencing: On timeout, clean up the agent's entire process group + Fencing: Support agents that need the host to be unfenced at startup + ipc: Raise the default buffer size to 128k + PE: Add a special attribute for distinguishing between real nodes and containers in constraint rules + PE: Allow location constraints to take a regex pattern to match against resource IDs + pengine: Distinguish between the agent being missing and something the agent needs being missing + remote: Properly version the remote connection protocol - Changes since Pacemaker-1.1.10 + Bug rhbz#1011618 - Consistently use 'Slave' as the role for unpromoted master/slave resources + Bug rhbz#1057697 - Use native DBus library for systemd and upstart support to avoid problematic use of threads + attrd: Any variable called 'cluster' makes the daemon crash before reaching main() + attrd: Avoid infinite write loop for unknown peers + attrd: Drop all attributes for peers that left the cluster + attrd: Give remote-nodes ability to set attributes with attrd + attrd: Prevent inflation of attribute dampen intervals + attrd: Support SI units for attribute dampening + Bug cl#5171 - pengine: Don't prevent clones from running due to dependant resources + Bug cl#5179 - Corosync: Attempt to retrieve a peer's node name if it is not already known + Bug cl#5181 - corosync: Ensure node IDs are written to the CIB as unsigned integers + Bug rhbz#902407 - crm_resource: Handle --ban for master/slave resources as advertised + cib: Correctly check for archived configuration files + cib: Correctly log short-form xml diffs + cib: Fix remote cib based on TLS + cibadmin: Report errors during sign-off + cli: Do not enabled blackbox for cli tools + cluster: Fix segfault on removing a node + cman: Do not start pacemaker if cman startup fails + cman: Start clvmd and friends from the init script if enabled + Command-line tools should stop after an assertion failure + controld: Use the correct variant of dlm_controld for corosync-2 clusters + cpg: Correctly set the group name length + cpg: Ensure the CPG group is always null-terminated + cpg: Only process one message at a time to allow other priority jobs to be performed + crmd: Correctly observe the configured batch-limit + crmd: Correctly update expected state when the previous DC shuts down + crmd: Correcty update the history cache when recurring ops change their return code + crmd: Don't add node_state to cib, if we have not seen or fenced this node yet + crmd: don't segfault on shutdown when using heartbeat + crmd: Prevent recurring monitors being cancelled due to notify operations + crmd: Reliably detect and act on reprobe operations from the policy engine + crmd: When a peer expectedly shuts down, record the new join and expected states into the cib + crmd: When the DC gracefully shuts down, record the new expected state into the cib + crm_attribute: Do not swallow hostname lookup failures + crm_mon: Do not display duplicates of failed actions + crm_mon: Reduce flickering in interactive mode + crm_resource: Observe --master modifier for --move + crm_resource: Provide a meaningful error if --master is used for primitives and groups + fencing: Allow fencing for node after topology entries are deleted + fencing: Apply correct score to the resource of group + fencing: Ignore changes to non-fencing resources + fencing: Observe pcmk_host_list during automatic unfencing + fencing: Put all fencing agent processes into their own process group + fencing: Wait until all possible replies are recieved before continuing with unverified devices + ipc: Compress msgs based on client's actual max send size + ipc: Have the ipc server enforce a minimum buffer size all clients must use. + iso8601: Prevent dates from jumping backwards a day in some timezones + lrmd: Correctly calculate metadata for the 'service' class + lrmd: Correctly cancel monitor actions for lsb/systemd/service resources on cleaning up + mcp: Remove LSB hints that instruct chkconfig to start pacemaker at boot time + mcp: Some distros complain when LSB scripts do not include Default-Start/Stop directives + pengine: Allow fencing of baremetal remote nodes + pengine: cl#5186 - Avoid running rsc on two nodes when node is fenced during migration + pengine: Correctly account for the location preferences of things colocated with a group + pengine: Correctly handle demotion of grouped masters that are partially demoted + pengine: Disable container node probes due to constraint conflicts + pengine: Do not allow colocation with blocked clone instances + pengine: Do not re-allocate clone instances that are blocked in the Stopped state + pengine: Do not restart resources that depend on unmanaged resources + pengine: Force record pending for migrate_to actions + pengine: Location constraints with role=Started should prevent masters from running at all + pengine: Order demote/promote of resources on remote nodes to happen only once the connection is up + pengine: Properly handle orphaned multistate resources living on remote-nodes + pengine: Properly shutdown orphaned remote connection resources + pengine: Recover unexpectedly running container nodes. + remote: Add support for ipv6 into pacemaker_remote daemon + remote: Handle endian changes between client and server and improve forward compatibility + services: Fixes segfault associated with cancelling in-flight recurring operations. + services: Reset the scheduling policy and priority for lrmd's children without replying on SCHED_RESET_ON_FORK * Fri Jul 26 2013 Andrew Beekhof Pacemaker-1.1.10-1 - Update source tarball to revision: ab2e209 - Changesets: 602 - Diff: 143 files changed, 8162 insertions(+), 5159 deletions(-) - Features added since Pacemaker-1.1.9 + Core: Convert all exit codes to positive errno values + crm_error: Add the ability to list and print error symbols + crm_resource: Allow individual resources to be reprobed + crm_resource: Allow options to be set recursively + crm_resource: Implement --ban for moving resources away from nodes and --clear (replaces --unmove) + crm_resource: Support OCF tracing when using --force-(check|start|stop) + PE: Allow active nodes in our current membership to be fenced without quorum + PE: Suppress meaningless IDs when displaying anonymous clone status + Turn off auto-respawning of systemd services when the cluster starts them + Bug cl#5128 - pengine: Support maintenance mode for a single node - Changes since Pacemaker-1.1.9 + crmd: cib: stonithd: Memory leaks resolved and improved use of glib reference counting + attrd: Fixes deleted attributes during dc election + Bug cf#5153 - Correctly display clone failcounts in crm_mon + Bug cl#5133 - pengine: Correctly observe on-fail=block for failed demote operation + Bug cl#5148 - legacy: Correctly remove a node that used to have a different nodeid + Bug cl#5151 - Ensure node names are consistently compared without case + Bug cl#5152 - crmd: Correctly clean up fenced nodes during membership changes + Bug cl#5154 - Do not expire failures when on-fail=block is present + Bug cl#5155 - pengine: Block the stop of resources if any depending resource is unmanaged + Bug cl#5157 - Allow migration in the absence of some colocation constraints + Bug cl#5161 - crmd: Prevent memory leak in operation cache + Bug cl#5164 - crmd: Fixes crash when using pacemaker-remote + Bug cl#5164 - pengine: Fixes segfault when calculating transition with remote-nodes. + Bug cl#5167 - crm_mon: Only print "stopped" node list for incomplete clone sets + Bug cl#5168 - Prevent clones from being bounced around the cluster due to location constraints + Bug cl#5170 - Correctly support on-fail=block for clones + cib: Correctly read back archived configurations if the primary is corrupted + cib: The result is not valid when diffs fail to apply cleanly for CLI tools + cib: Restore the ability to embed comments in the configuration + cluster: Detect and warn about node names with capitals + cman: Do not pretend we know the state of nodes we've never seen + cman: Do not unconditionally start cman if it is already running + cman: Support non-blocking CPG calls + Core: Ensure the blackbox is saved on abnormal program termination + corosync: Detect the loss of members for which we only know the nodeid + corosync: Do not pretend we know the state of nodes we've never seen + corosync: Ensure removed peers are erased from all caches + corosync: Nodes that can persist in sending CPG messages must be alive afterall + crmd: Do not get stuck in S_POLICY_ENGINE if a node we couldn't fence returns + crmd: Do not update fail-count and last-failure for old failures + crmd: Ensure all membership operations can complete while trying to cancel a transition + crmd: Ensure operations for cleaned up resources don't block recovery + crmd: Ensure we return to a stable state if there have been too many fencing failures + crmd: Initiate node shutdown if another node claims to have successfully fenced us + crmd: Prevent messages for remote crmd clients from being relayed to wrong daemons + crmd: Properly handle recurring monitor operations for remote-node agent + crmd: Store last-run and last-rc-change for all operations + crm_mon: Ensure stale pid files are updated when a new process is started + crm_report: Correctly collect logs when 'uname -n' reports fully qualified names + fencing: Fail the operation once all peers have been exhausted + fencing: Restore the ability to manually confirm that fencing completed + ipc: Allow unpriviliged clients to clean up after server failures + ipc: Restore the ability for members of the haclient group to connect to the cluster + legacy: Support "crm_node --remove" with a node name for corosync plugin (bnc#805278) + lrmd: Default to the upstream location for resource agent scratch directory + lrmd: Pass errors from lsb metadata generation back to the caller + pengine: Correctly handle resources that recover before we operate on them + pengine: Delete the old resource state on every node whenever the resource type is changed + pengine: Detect constraints with inappropriate actions (ie. promote for a clone) + pengine: Ensure per-node resource parameters are used during probes + pengine: If fencing is unavailable or disabled, block further recovery for resources that fail to stop + pengine: Implement the rest of get_timet_now() and rename to get_effective_time + pengine: Re-initiate _active_ recurring monitors that previously failed but have timed out + remote: Workaround for inconsistent tls handshake behavior between gnutls versions + systemd: Ensure we get shut down correctly by systemd + systemd: Reload systemd after adding/removing override files for cluster services + xml: Check for and replace non-printing characters with their octal equivalent while exporting xml text + xml: Prevent lockups by setting a more reliable buffer allocation strategy * Fri Mar 08 2013 Andrew Beekhof Pacemaker-1.1.9-1 - Update source tarball to revision: 7e42d77 - Statistics: Changesets: 731 Diff: 1301 files changed, 92909 insertions(+), 57455 deletions(-) - Features added in Pacemaker-1.1.9 + corosync: Allow cman and corosync 2.0 nodes to use a name other than uname() + corosync: Use queues to avoid blocking when sending CPG messages + ipc: Compress messages that exceed the configured IPC message limit + ipc: Use queues to prevent slow clients from blocking the server + ipc: Use shared memory by default + lrmd: Support nagios remote monitoring + lrmd: Pacemaker Remote Daemon for extending pacemaker functionality outside corosync cluster. + pengine: Check for master/slave resources that are not OCF agents + pengine: Support a 'requires' resource meta-attribute for controlling whether it needs quorum, fencing or nothing + pengine: Support for resource container + pengine: Support resources that require unfencing before start - Changes since Pacemaker-1.1.8 + attrd: Correctly handle deletion of non-existant attributes + Bug cl#5135 - Improved detection of the active cluster type + Bug rhbz#913093 - Use crm_node instead of uname + cib: Avoid use-after-free by correctly support cib_no_children for non-xpath queries + cib: Correctly process XML diff's involving element removal + cib: Performance improvements for non-DC nodes + cib: Prevent error message by correctly handling peer replies + cib: Prevent ordering changes when applying xml diffs + cib: Remove text nodes from cib replace operations + cluster: Detect node name collisions in corosync + cluster: Preserve corosync membership state when matching node name/id entries + cman: Force fenced to terminate on shutdown + cman: Ignore qdisk 'nodes' + core: Drop per-user core directories + corosync: Avoid errors when closing failed connections + corosync: Ensure peer state is preserved when matching names to nodeids + corosync: Clean up CMAP connections after querying node name + corosync: Correctly detect corosync 2.0 clusters even if we don't have permission to access it + crmd: Bug cl#5144 - Do not updated the expected status of failed nodes + crmd: Correctly determin if cluster disconnection was abnormal + crmd: Correctly relay messages for remote clients (bnc#805626, bnc#804704) + crmd: Correctly stall the FSA when waiting for additional inputs + crmd: Detect and recover when we are evicted from CPG + crmd: Differentiate between a node that is up and coming up in peer_update_callback() + crmd: Have cib operation timeouts scale with node count + crmd: Improved continue/wait logic in do_dc_join_finalize() + crmd: Prevent election storms caused by getrusage() values being too close + crmd: Prevent timeouts when performing pacemaker level membership negotiation + crmd: Prevent use-after-free of fsa_message_queue during exit + crmd: Store all current actions when stalling the FSA + crm_mon: Do not try to render a blank cib and indicate the previous output is now stale + crm_mon: Fixes crm_mon crash when using snmp traps. + crm_mon: Look for the correct error codes when applying configuration updates + crm_report: Ensure policy engine logs are found + crm_report: Fix node list detection + crm_resource: Have crm_resource generate a valid transition key when sending resource commands to the crmd + date/time: Bug cl#5118 - Correctly convert seconds-since-epoch to the current time + fencing: Attempt to provide more information that just 'generic error' for failed actions + fencing: Correctly record completed but previously unknown fencing operations + fencing: Correctly terminate when all device options have been exhausted + fencing: cov#739453 - String not null terminated + fencing: Do not merge new fencing requests with stale ones from dead nodes + fencing: Do not start fencing until entire device topology is found or query results timeout. + fencing: Do not wait for the query timeout if all replies have arrived + fencing: Fix passing of parameters from CMAN containing '=' + fencing: Fix non-comparison when sorting devices by priority + fencing: On failure, only try a topology device once from the remote level. + fencing: Only try peers for non-topology based operations once + fencing: Retry stonith device for duration of action's timeout period. + heartbeat: Remove incorrect assert during cluster connect + ipc: Bug cl#5110 - Prevent 100% CPU usage when looking for synchronous replies + ipc: Use 50k as the default compression threshold + legacy: Prevent assertion failure on routing ais messages (bnc#805626) + legacy: Re-enable logging from the pacemaker plugin + legacy: Relax the 'active' check for plugin based clusters to avoid false negatives + legacy: Skip peer process check if the process list is empty in crm_is_corosync_peer_active() + mcp: Only define HA_DEBUGLOG to avoid agent calls to ocf_log printing everything twice + mcp: Re-attach to existing pacemaker components when mcp fails + pengine: Any location constraint for the slave role applies to all roles + pengine: Avoid leaking memory when cleaning up failcounts and using containers + pengine: Bug cl#5101 - Ensure stop order is preserved for partially active groups + pengine: Bug cl#5140 - Allow set members to be stopped when the subseqent set has require-all=false + pengine: Bug cl#5143 - Prevent shuffling of anonymous master/slave instances + pengine: Bug rhbz#880249 - Ensure orphan masters are demoted before being stopped + pengine: Bug rhbz#880249 - Teach the PE how to recover masters into primitives + pengine: cl#5025 - Automatically clear failcount for start/monitor failures after resource parameters change + pengine: cl#5099 - Probe operation uses the timeout value from the minimum interval monitor by default (#bnc776386) + pengine: cl#5111 - When clone/master child rsc has on-fail=stop, insure all children stop on failure. + pengine: cl#5142 - Do not delete orphaned children of an anonymous clone + pengine: Correctly unpack active anonymous clones + pengine: Ensure previous migrations are closed out before attempting another one + pengine: Introducing the whitebox container resources feature + pengine: Prevent double-free for cloned primitive from template + pengine: Process rsc_ticket dependencies earlier for correctly allocating resources (bnc#802307) + pengine: Remove special cases for fencing resources + pengine: rhbz#902459 - Remove rsc node status for orphan resources + systemd: Gracefully handle unexpected DBus return types + Replace the use of the insecure mktemp(3) with mkstemp(3) * Thu Sep 20 2012 Andrew Beekhof Pacemaker-1.1.8-1 - Update source tarball to revision: 1a5341f - Statistics: Changesets: 1019 Diff: 2107 files changed, 117258 insertions(+), 73606 deletions(-) - All APIs have been cleaned up and reduced to essentials - Pacemaker now includes a replacement lrmd that supports systemd and upstart agents - Config and state files (cib.xml, PE inputs and core files) have moved to new locations - The crm shell has become a separate project and no longer included with Pacemaker - All daemons/tools now have a unified set of error codes based on errno.h (see crm_error) - Changes since Pacemaker-1.1.7 + Core: Bug cl#5032 - Rewrite the iso8601 date handling code + Core: Correctly extract the version details from a diff + Core: Log blackbox contents, if enabled, when an error occurs + Core: Only LOG_NOTICE and higher are sent to syslog + Core: Replace use of IPC from clplumbing with IPC from libqb + Core: SIGUSR1 now enables blackbox logging, SIGTRAP to write out + Core: Support a blackbox for additional logging detail after crashes/errors + Promote support for advanced fencing logic to the stable schema + Promote support for node starting scores to the stable schema + Promote support for service and systemd to the stable schema + attrd: Differentiate between updating all our attributes and everybody updating all theirs too + attrd: Have single-shot clients wait for an ack before disconnecting + cib: cl#5026 - Synced cib updates should not return until the cpg broadcast is complete. + corosync: Detect when the first corosync has not yet formed and handle it gracefully + corosync: Obtain a full list of configured nodes, including their names, when we connect to the quorum API + corosync: Obtain a node name from DNS if one was not already known + corosync: Populate the cib nodelist from corosync if available + corosync: Use the CFG API and DNS to determine node names if not configured in corosync.conf + crmd: Block after 10 failed fencing attempts for a node + crmd: cl#5051 - Fixes file leak in pe ipc connection initialization. + crmd: cl#5053 - Fixes fail-count not being updated properly. + crmd: cl#5057 - Restart sub-systems correctly (bnc#755671) + crmd: cl#5068 - Fixes crm_node -R option so it works with corosync 2.0 + crmd: Correctly re-establish failed attrd connections + crmd: Detect when the quorum API isn't configured for corosync 2.0 + crmd: Do not overwrite any configured node type (eg. quorum node) + crmd: Enable use of new lrmd daemon and client library in crmd. + crmd: Overhaul the way node state is recorded and updated in the CIB + fencing: Bug rhbz#853537 - Prevent use-of-NULL when the cib libraries are not available + fencing: cl#5073 - Add 'off' as an valid value for stonith-action option. + fencing: cl#5092 - Always timeout stonith operations if timeout period expires. + fencing: cl#5093 - Stonith per device timeout option + fencing: Clean up if we detect a failed connection + fencing: Delegate complex self fencing requests - we wont be around to see it to completion + fencing: Ensure all peers are notified of complex fencing op completion + fencing: Fix passing of fence_legacy parameters containing '=' + fencing: Gracefully handle metadata requests for unknown agents + fencing: Return cached dynamic target list for busy devices. + fencing: rhbz#801355 - Abort transition on DC when external fencing operation is detected + fencing: rhbz#801355 - Merge fence requests for identical operations already in progress. + fencing: rhbz#801355 - Report fencing operations external of pacemaker to cib + fencing: Specify the action to perform using action= instead of the older option= + fencing: Stop building fake metadata for broken agents + fencing: Tolerate agents that report empty metadata in the admin tool + mcp: Correctly retry the connection to corosync on failure + mcp: Do not shut down IPC until the last client exits + mcp: Prevent use-after-free when running against corosync 1.x + pengine: Bug cl#5059 - Use the correct action's status when calculating required actions for interleaved clones + pengine: Bypass online/offline checking resource detection for ping/quorum nodes + pengine: cl#5044 - migrate_to no longer requires load_stopped for avoiding possible transition loop + pengine: cl#5069 - Honor 'on-fail=ignore' even when operation is disabled. + pengine: cl#5070 - Allow influence of promotion score when multistate rsc is left hand of colocation + pengine: cl#5072 - Fixes monitor op stopping after rsc promotion. + pengine: cl#5072 - Fixes pengine regression test failures + pengine: Correctly set the status for nodes not intended to run Pacemaker + pengine: Do not append instance numbers to anonymous clones + pengine: Fix failcount expiration + pengine: Fix memory leaks found by valgrind + pengine: Fix use-after-free and use-of-NULL errors detected by coverity + pengine: Fixes use of colocation scores other than +/- INFINITY + pengine: Improve detection of rejoining nodes + pengine: Prevent use-of-NULL when tracing is enabled + pengine: Stonith resources are allowed to start even if their probes haven't completed on partially active nodes + services: New class called 'service' which expands to the correct (LSB/systemd/upstart) standard + services: Support Asynchronous systemd/upstart actions + Tools: crm_shadow - Bug cl#5062 - Correctly set argv[0] when forking a shell process + Tools: crm_report: Always include system logs (if we can find them) * Wed Mar 28 2012 Andrew Beekhof Pacemaker-1.1.7-1 - Update source tarball to revision: bc7ff2c - Statistics: Changesets: 513 Diff: 1171 files changed, 90472 insertions(+), 19368 deletions(-) - Changes since Pacemaker-1.1.6.1 + ais: Prepare for corosync versions using IPC from libqb + cib: Correctly shutdown in the presence of peers without relying on timers + cib: Don't halt disk writes if the previous digest is missing + cib: Determine when there are no peers to respond to our shutdown request and exit + cib: Ensure no additional messages are processed after we begin terminating + Cluster: Hook up the callbacks to the corosync quorum notifications + Core: basename() may modify its input, do not pass in a constant + Core: Bug cl#5016 - Prevent failures in recurring ops from being lost + Core: Bug rhbz#800054 - Correctly retrieve heartbeat uuids + Core: Correctly determine when an XML file should be decompressed + Core: Correctly track the length of a string without reading from uninitialzied memory (valgrind) + Core: Ensure signals are handled eventually in the absense of timer sources or IPC messages + Core: Prevent use-of-NULL in crm_update_peer() + Core: Strip text nodes from on disk xml files + Core: Support libqb for logging + corosync: Consistently set the correct uuid with get_node_uuid() + Corosync: Correctly disconnect from corosync variants + Corosync: Correctly extract the node id from membership udpates + corosync: Correctly infer lost members from the quorum API + Corosync: Default to using the nodeid as the node's uuid (instead of uname) + corosync: Ensure we catch nodes that leave the membership, even if the ringid doesn't change + corosync: Hook up CPG membership + corosync: Relax a development assert and gracefully handle the error condition + corosync: Remove deprecated member of the CFG API + corosync: Treat CS_ERR_QUEUE_FULL the same as CS_ERR_TRY_AGAIN + corosync: Unset the process list when nodes dissappear on us + crmd: Also purge fencing results when we enter S_NOT_DC + crmd: Bug cl#5015 - Remove the failed operation as well as the resulting fail-count and last-failure attributes + crmd: Correctly determine when a node can suicide with fencing + crmd: Election - perform the age comparison only once + crmd: Fast-track shutdown if we couldn't request it via attrd + crmd: Leave it up to the PE to decide which ops can/cannot be reload + crmd: Prevent use-after-free when calling delete_resource due to CRM_OP_REPROBE + crmd: Supply format arguments in the correct order + fencing: Add missing format parameter + fencing: Add the fencing topology section to the 1.1 configuration schema + fencing: fence_legacy - Drop spurilous host argument from status query + fencing: fence_legacy - Ensure port is available as an environment variable when calling monitor + fencing: fence_pcmk - don't block if nothing is specified on stdin + fencing: Fix log format error + fencing: Fix segfault caused by passing garbage to dlsym() + fencing: Fix use-of-NULL in process_remote_stonith_query() + fencing: Fix use-of-NULL when listing installed devices + fencing: Implement support for advanced fencing topologies: eg. kdump || (network && disk) || power + fencing: More gracefully handle failed 'list' operations for devices that only support a single connection + fencing: Prevent duplicate free when listing devices + fencing: Prevent uninitialized pointers being passed to free + fencing: Prevent use-after-free, we may need the query result for subsequent operations + fencing: Provide enough data to construct an entry in the node's fencing history + fencing: Standardize on /one/ method for clients to request members be fenced + fencing: Supress errors when listing all registered devices + mcp: corosync_cfg_state_track was removed from the corosync API, luckily we didnt use it for anything + mcp: Do not specify a WorkingDirectory in the systemd unit file - startup fails if its not available + mcp: Set the HA_quorum_type env variable consistently with our corosync plugin + mcp: Shut down if one of our child processes can/should not be respawned + pengine: Bug cl#5000 - Ensure ordering is preserved when depending on partial sets + pengine: Bug cl#5028 - Unmanaged services should block shutdown unless in maintainence mode + pengine: Bug cl#5038 - Prevent restart of anonymous clones when clone-max decreases + pengine: Bug cl#5007 - Fixes use of colocation constraints with multi-state resources + pengine: Bug cl#5014 - Prevent asymmetrical order constraints from causing resource stops + pengine: Bug cl#5000 - Implements ability to create rsc_order constraint sets such that A can start after B or C has started. + pengine: Correctly migrate a resource that has just migrated + pengine: Correct return from error path + pengine: Detect reloads of previously migrated resources + pengine: Ensure post-migration stop actions occur before node shutdown + pengine: Log as loudly as possible when we cannot shut down a cluster node + pengine: Reload of a resource no longer causes a restart of dependant resources + pengine: Support limiting the number of concurrent live migrations + pengine: Support referencing templates in constraints + pengine: Support of referencing resource templates in resource sets + pengine: Support to make tickets standby for relinquishing tickets gracefully + stonith: A "start" operation of a stonith resource does a "monitor" on the device beyond registering it + stonith: Bug rhbz#745526 - Ensure stonith_admin actually gets called by fence_pcmk + Stonith: Ensure all nodes receive and deliver notifications of the manual override + stonith: Fix the stonith timeout issue (cl#5009, bnc#727498) + Stonith: Implement a manual override for when nodes are known to be safely off + Tools: Bug cl#5003 - Prevent use-after-free in crm_simlate + Tools: crm_mon - Support to display tickets (based on Yuusuke Iida's work) + Tools: crm_simulate - Support to grant/revoke/standby/activate tickets from the new ticket state section + Tools: Implement crm_node functionality for native corosync + Fix a number of potential problems reported by coverity * Wed Aug 31 2011 Andrew Beekhof 1.1.6-1 - Update source tarball to revision: 676e5f25aa46 tip - Statistics: Changesets: 376 Diff: 1761 files changed, 36259 insertions(+), 140578 deletions(-) - Changes since Pacemaker-1.1.5 + ais: check for retryable errors when dispatching AIS messages + ais: Correctly disconnect from Corosync and Cman based clusters + ais: Followup to previous patch - Ensure we drain the corosync queue of messages when Glib tells us there is input + ais: Handle IPC error before checking for NULL data (bnc#702907) + cib: Check the validation version before adding the originator details of a CIB change + cib: Remove disconnected remote connections from mainloop + cman: Correctly override existing fenced operations + cman: Dequeue all the cman emitted events and not only the first one leaving the others in the event's queue. + cman: Don't call fenced_join and fenced_leave when notifying cman of a fencing event. + cman: We need to run the crmd as root for CMAN so that we can ACK fencing operations + Core: Cancelled and pending operations do not count as failed + Core: Ensure there is sufficient space for EOS when building short-form option strings + Core: Fix variable expansion in pkg-config files + Core: Partial revert of accidental commit in previous patch + Core: Use dlopen to load heartbeat libraries on-demand + crmd: Bug lf#2509 - Watch for config option changes from the CIB even if we're not the DC + crmd: Bug lf#2528 - Introduce a slight delay when creating a transition to allow attrd time to perform its updates + crmd: Bug lf#2559 - Fail actions that were scheduled for a failed/fenced node + crmd: Bug lf#2584 - Allow nodes to fence themselves if they're the last one standing + crmd: Bug lf#2632 - Correctly handle nodes that return faster than stonith + crmd: Cancel timers for actions that were pending on dead nodes + crmd: Catch fence operations that claim to succeed but did not really + crmd: Do not wait for actions that were pending on dead nodes + crmd: Ensure we do not attempt to perform action on failed nodes + crmd: Prevent use-of-NULL by g_hash_table_iter_next() + crmd: Recurring actions shouldn't cause the last non-recurring action to be forgotten + crmd: Store only the last and last failed operation in the CIB + mcp: dirname() modifies the input path - pass in a copy of the logfile path + mcp: Enable stack detection logic instead of forcing 'corosync' + mcp: Fix spelling mistake in systemd service script that prevents shutdown + mcp: Shut down if corosync becomes unavailable + mcp: systemd control file is now functional + pengine: Before migrating an utilization-using resource to a node, take off the load which will no longer run there (lf#2599, bnc#695440) + pengine: Before migrating an utilization-using resource to a node, take off the load which will no longer run there (regression tests) (lf#2599, bnc#695440) + pengine: Bug lf#2574 - Prevent shuffling by choosing the correct clone instance to stop + pengine: Bug lf#2575 - Use uname for migration variables, id is a UUID on heartbeat + pengine: Bug lf#2581 - Avoid group restart when clone (re)starts on an unrelated node + pengine: Bug lf#2613, lf#2619 - Group migration after failures and non-default utilization policies + pengine: Bug suse#707150 - Prevent services being active if dependancies on clones are not satisfied + pengine: Correctly recognise which recurring operations are currently active + pengine: Demote from Master does not clear previous errors + pengine: Ensure restarts due to definition changes cause the start action to be re-issued not probes + pengine: Ensure role is preserved for unmanaged resources + pengine: Ensure unmanaged resources have the correct role set so the correct monitor operation is chosen + pengine: Fix memory leak for re-allocated resources reported by valgrind + pengine: Implement cluster ticket and deadman + pengine: Implement resource template + pengine: Correctly determine the state of multi-state resources with a partial operation history + pengine: Only allocate master/slave resources once + pengine: Partial revert of 'Minor code cleanup CS: cf6bca32376c On: 2011-08-15' + pengine: Resolve memory leak reported by valgrind + pengine: Restore the ability to save inputs to disk + Shell: implement -w,--wait option to wait for the transition to finish + Shell: repair template list command + Shell: set of commands to examine logs, reports, etc + Stonith: Consolidate pcmk_host_map into run_stonith_agent so that it is applied consistently + Stonith: Deprecate pcmk_arg_map for the saner pcmk_host_argument + Stonith: Fix use-of-NULL by g_hash_table_lookup + Stonith: Improved pcmk_host_map parsing + Stonith: Prevent use-of-NULL by g_hash_table_lookup + Stonith: Prevent use-of-NULL when no Linux-HA stonith agents are present + stonith: Add missing entries to stonith_error2string() + Stonith: Correctly finish sending agent options if the initial write is interrupted + stonith: Correctly handle synchronous calls + stonith: Coverity - Correctly construct result list for the query API call + stonith: Coverity - Remove badly constructed memory allocation from the query API call + stonith: Ensure completed operations are recorded as such in the history + Stonith: Ensure device parameters are passed to the daemon during registration + stonith: Fix use-of-NULL in stonith_api_device_list() + stonith: stonith_admin - Prevent use of uninitialized pointer by --history command + Tools: Bug lf#2528 - Make progress when attrd_updater is called repeatedly within the dampen interval but with the same value + Tools: crm_report - Correctly extract data from the local node + Tools: crm_report - Remove newlines when detecting the node list + Tools: crm_report - Repair the ability to extract data from the local machine + Tools: crm_report - Report on all detected backtraces * Fri Feb 11 2011 Andrew Beekhof 1.1.5-1 - Update source tarball to revision: baad6636a053 - Statistics: Changesets: 184 Diff: 605 files changed, 46103 insertions(+), 26417 deletions(-) - Changes since Pacemaker-1.1.4 + Add the ability to delegate sub-sections of the cluster to non-root users via ACLs Needs to be enabled at compile time, not enabled by default. + ais: Bug lf#2550 - Report failed processes immediately + Core: Prevent recently introduced use-after-free in replace_xml_child() + Core: Reinstate the logic that skips past non-XML_ELEMENT_NODE children + Core: Remove extra calls to xmlCleanupParser resulting in use-after-free + Core: Repair reference to child-of-child after removal of xml_child_iter_filter from get_message_xml() + crmd: Bug lf#2545 - Ensure notify variables are accurate for stop operations + crmd: Cancel recurring operations while we're still connected to the lrmd + crmd: Reschedule the PE_START action if its not already running when we try to use it + crmd: Update failcount for failed promote and demote operations + pengine: Bug lf#2445 - Avoid relying on stickness for stable clone placement + pengine: Bug lf#2445 - Do not override configured clone stickiness values + pengine: Bug lf#2493 - Don't imply colocation requirements when applying ordering constraints with clones + pengine: Bug lf#2495 - Prevent segfault by validating the contents of ordering sets + pengine: Bug lf#2508 - Correctly reconstruct the status of anonymous cloned groups + pengine: Bug lf#2518 - Avoid spamming the logs with errors for orphan resources + pengine: Bug lf#2544 - Prevent unstable clone placement by factoring in the current node's score before all others + pengine: Bug lf#2554 - target-role alone is not sufficient to promote resources + pengine: Correct target_rc for probes of inactive resources (fix regression introduced by cs:ac3f03006e95) + pengine: Ensure that fencing has completed for stop actions on stonith-dependent resources (lf#2551) + pengine: Only update the node's promotion score if the resource is active there + pengine: Only use the promotion score from the current clone instance + pengine: Prevent use-of-NULL resulting from variable shadowing spotted by Coverity + pengine: Prevent use-of-NULL when there is status for an undefined node + pengine: Prevet use-after-free resulting from unintended recursion when chosing a node to promote master/slave resources + Shell: don't create empty optional sections (bnc#665131) + Stonith: Teach stonith_admin to automagically obtain the current node attributes for the target from the CIB + tools: Bug lf#2527 - Prevent use-of-NULL in crm_simulate + Tools: Prevent crm_resource commands from being lost due to the use of cib_scope_local * Wed Oct 20 2010 Andrew Beekhof 1.1.4-1 - Update source tarball to revision: 75406c3eb2c1 tip - Statistics: Changesets: 169 Diff: 772 files changed, 56172 insertions(+), 39309 deletions(-) - Changes since Pacemaker-1.1.3 + Italian translation of Clusters from Scratch + Significant performance enhancements to the Policy Engine and CIB + cib: Bug lf#2506 - Don't remove client's when notifications fail, they might just be too big + cib: Drop invalid/failed connections from the client hashtable + cib: Ensure all diffs sent to peers have sufficient ordering information + cib: Ensure non-change diffs can preserve the ordering on the other side + cib: Fix the feature set check + cib: Include version information on our synthesised diffs when nothing changed + cib: Optimize the way we detect group/set ordering changes - 15% speedup + cib: Prevent false detection of config updates with the new diff format + cib: Reduce unnecessary copying when comparing xml objects + cib: Repair the processing of updates sent from peer nodes + cib: Revert part of a recent commit that purged still valid connections + cib: The feature set version check is only valid if the current value is non-NULL + Core: Actually removing diff markers is necessary + Core: Bug lf#2506 - Drop the compression limit because Heartbeat's IPC code sucks + Core: Cache Relax-NG schemas - profiling indicates many cycles are wasted needlessly re-parsing them + Core: Correctly compare against crm_log_level in the logging macros + Core: Correctly extract the version details from a diff + Core: Correctly hook up the RNG schema cache + Core: Correctly use lazy_xml_sort() for v2 digests + Core: Don't compress large payload elements unless we're approaching message limits + Core: Don't insert empty ID tags when applying diffs + Core: Enable the improve v2 digests + Core: Ensure ordering is preserved when applying diffs + Core: Fix the CRM_CHECK macro + Core: Modify the v2 digest algorithm so that some fields are sorted + Core: Prevent use-after-free when creating a CIB update for a timed out action + Core: Prevent use-of-NULL when cleaning up RelaxNG data structures + Core: Provide significant performance improvements by implementing versioned diffs and digests + crmd: All pending operations should be recorded, even recurring ones with high start delays + crmd: Don't abort transitions when probes are completed on a node + crmd: Don't hide stop events that time out - allowing faster recovery in the presence of overloaded hosts + crmd: Ensure the CIB is always writable on the DC by removing a timing hole + crmd: Include the correct transition details for timed out operations + crmd: Prevent use of NULL by making copies of the operation's hash table + crmd: There's no need to check the cib version from the 'added' part of diff updates + crmd: Use the supplied timeout for stop actions + mcp: Ensure valgrind is able to log its output somewhere + mcp: Use 99/01 for the start/stop sequence to avoid problems with services (such as libvirtd) started by init - Patch from Vladislav Bogdanov + pengine: Ensure fencing of the DC preceeds the STONITH_DONE operation + pengine: Fix memory leak introduced as part of the conversion to GHashTables + pengine: Fix memory leak when processing completed migration actions + pengine: Fix typo leading to use-of-NULL in the new ordering code + pengine: Free memory in recently introduced helper function + pengine: lf#2478 - Implement improved handling and recovery of atomic resource migrations + pengine: Obtain massive speedup by prepending to the list of ordering constraints (which can grow quite large) + pengine: Optimize the logic for deciding which non-grouped anonymous clone instances to probe for + pengine: Prevent clones from being stopped because resources colocated with them cannot be active + pengine: Try to ensure atomic migration ops occur within a single transition + pengine: Use hashtables instead of linked lists for performance sensitive datastructures + pengine: Use the original digest algorithm for parameter lists + stonith: cleanup children on timeout in fence_legacy + Stonith: Fix two memory leaks + Tools: crm_shadow - Avoid replacing the entire configuration (including status) * Tue Sep 21 2010 Andrew Beekhof 1.1.3-1 - Update source tarball to revision: e3bb31c56244 tip - Statistics: Changesets: 352 Diff: 481 files changed, 14130 insertions(+), 11156 deletions(-) - Changes since Pacemaker-1.1.2.1 + ais: Bug lf#2401 - Improved processing when the peer crmd processes join/leave + ais: Correct the logic for conecting to plugin based clusters + ais: Do not supply a process list in mcp-mode + ais: Drop support for whitetank in the 1.1 release series + ais: Get an initial dump of the node membership when connecting to quorum-based clusters + ais: Guard against saturated cpg connections + ais: Handle CS_ERR_TRY_AGAIN in more cases + ais: Move the code for finding uid before the fork so that the child does no logging + ais: Never allow quorum plugins to affect connection to the pacemaker plugin + ais: Sign everyone up for peer process updates, not just the crmd + ais: The cluster type needs to be set before initializing classic openais connections + cib: Also free query result for xpath operations that return more than one hit + cib: Attempt to resolve memory corruption when forking a child to write the cib to disk + cib: Correctly free memory when writing out the cib to disk + cib: Fix the application of unversioned diffs + cib: Remove old developmental error logging + cib: Restructure the 'valid peer' check for deciding which instructions to ignore + cman: Correctly process membership/quorum changes from the pcmk plugin. Allow other message types through untouched + cman: Filter directed messages not intended for us + cman: Grab the initial membership when we connect + cman: Keep the list of peer processes up-to-date + cman: Make sure our common hooks are called after a cman membership update + cman: Make sure we can compile without cman present + cman: Populate sender details for cpg messages + cman: Update the ringid for cman based clusters + Core: Correctly unpack HA_Messages containing multiple entries with the same name + Core: crm_count_member() should only track nodes that have the full stack up + Core: New developmental logging system inspired by the kernel and a PoC from Lars Ellenberg + crmd: All nodes should see status updates, not just he DC + crmd: Allow non-DC nodes to clear failcounts too + crmd: Base DC election on process relative uptime + crmd: Bug lf#2439 - cancel_op() can also return HA_RSCBUSY + crmd: Bug lf#2439 - Handle asynchronous notification of resource deletion events + crmd: Bug lf#2458 - Ensure stop actions always have the relevant resource attributes + crmd: Disable age as a criteria for cman based clusters, its not reliable enough + crmd: Ensure we activate the DC timer if we detect an alternate DC + crmd: Factor the nanosecond component of process uptime in elections + crmd: Fix assertion failure when performing async resource failures + crmd: Fix handling of async resource deletion results + crmd: Include the action for crm graph operations + crmd: Make sure the membership cache is accurate after a sucessful fencing operation + crmd: Make sure we always poke the FSA after a transition to clear any TE_HALT actions + crmd: Offer crm-level membership once the peer starts the crmd process + crmd: Only need to request quorum update for plugin based clusters + crmd: Prevent assertion failure for stop actions resulting from cs: 3c0bc17c6daf + crmd: Prevent everyone from loosing DC elections by correctly initializing all relevant variables + crmd: Prevent segmentation fault + crmd: several fixes for async resource delete (thanks to beekhof) + crmd: Use the correct define/size for lrm resource IDs + Introduce two new cluster types 'cman' and 'corosync', replaces 'quorum_provider' concept + mcp: Add missing headers when built without heartbeat support + mcp: Correctly initialize the string containing the list of active daemons + mcp: Fix macro expansion in init script + mcp: Fix the expansion of the pid file in the init script + mcp: Handle CS_ERR_TRY_AGAIN when connecting to libcfg + mcp: Make sure we can compile the mcp without cman present + mcp: New master control process for (re)spawning pacemaker daemons + mcp: Read config early so we can re-initialize logging asap if daemonizing + mcp: Rename the mcp binary to pacemakerd and create a 'pacemaker' init script + mcp: Resend our process list after every CPG change + mcp: Tell chkconfig we need to shut down early on + pengine: Avoid creating invalid ordering constraints for probes that are not needed + pengine: Bug lf#1959 - Fail unmanaged resources should not prevent other services from shutting down + pengine: Bug lf#2422 - Ordering dependencies on partially active groups not observed properly + pengine: Bug lf#2424 - Use notify oepration definition if it exists in the configuration + pengine: Bug lf#2433 - No services should be stopped until probes finish + pengine: Bug lf#2453 - Enforce clone ordering in the absense of colocation constraints + pengine: Bug lf#2476 - Repair on-fail=block for groups and primitive resources + pengine: Correctly detect when there is a real failcount that expired and needs to be cleared + pengine: Correctly handle pseudo action creation + pengine: Correctly order clone startup after group/clone start + pengine: Correct use-after-free introduced in the prior patch + pengine: Do not demote resources because something that requires it can not run + pengine: Fix colocation for interleaved clones + pengine: Fix colocation with partially active groups + pengine: Fix potential use-after-free defect from coverity + pengine: Fix previous merge + pengine: Fix use-after-free in order_actions() reported by valgrind + pengine: Make the current data set a global variable so it does not need to be passed around everywhere + pengine: Prevent endless loop when looking for operation definitions in the configuration + pengine: Prevent segfault by ensuring the arguments to do_calculations() are initialized + pengine: Rewrite the ordering constraint logic to be simplicity, clarity and maintainability + pengine: Wait until stonith is available, do not fall back to shutdown for nodes requesting termination + Resolve coverity RESOURCE_LEAK defects + Shell: Complete the transition to using crm_attribute instead of crm_failcount and crm_standby + stonith: Advertise stonith-ng options in the metadata + stonith: Bug lf#2461 - Prevent segfault by not looking up operations if the hashtable has not been initialized yet + stonith: Bug lf#2473 - Add the timeout at the top level where the daemon is looking for it + Stonith: Bug lf#2473 - Ensure stonith operations complete within the timeout and are terminated if they run too long + stonith: Bug lf#2473 - Ensure timeouts are included for fencing operations + stonith: Bug lf#2473 - Gracefully handle remote operations that arrive late (after we have done notifications) + stonith: Correctly parse pcmk_host_list parameters that appear on a single line + stonith: Map poweron/poweroff back to on/off expected by the stonith tool from cluster-glue + stonith: pass the configuration to the stonith program via environment variables (bnc#620781) + Stonith: Use the timeout specified by the user + Support starting plugin-based Pacemaker clusters with the MCP as well + Tools: Bug lf#2456 - Fix assertion failure in crm_resource + tools: crm_node - Repair the ability to connect to openais based clusters + tools: crm_node - Use the correct short option for --cman + tools: crm_report - corosync.conf wont necessarily contain the text 'pacemaker' anymore + Tools: crm_simulate - Fix use-after-free in when terminating + tools: crm_simulate - Resolve coverity USE_AFTER_FREE defect + Tools: Drop the 'pingd' daemon and resource agent in favor of ocf:pacemaker:ping + Tools: Fix recently introduced use-of-NULL + Tools: Fix use-after-free defects from coverity * Wed May 12 2010 Andrew Beekhof 1.1.2-1 - Update source tarball to revision: c25c972a25cc tip - Statistics: Changesets: 339 Diff: 708 files changed, 37918 insertions(+), 10584 deletions(-) - Changes since Pacemaker-1.1.1 + ais: Do not count votes from offline nodes and calculate current votes before sending quorum data + ais: Ensure the list of active processes sent to clients is always up-to-date + ais: Look for the correct conf variable for turning on file logging + ais: Need to find a better and thread-safe way to set core_uses_pid. Disable for now. + ais: Use the threadsafe version of getpwnam + Core: Bump the feature set due to the new failcount expiry feature + Core: fix memory leaks exposed by valgrind + Core: Bug lf#2414 - Prevent use-after-free reported by valgrind when doing xpath based deletions + crmd: Bug lf#2414 - Prevent use-after-free of the PE connection after it dies + crmd: Bug lf#2414 - Prevent use-after-free of the stonith-ng connection + crmd: Bug lf#2401 - Improved detection of partially active peers + crmd: Bug lf#2379 - Ensure the cluster terminates when the PE is not available + crmd: Do not allow the target_rc to be misused by resource agents + crmd: Do not ignore action timeouts based on FSA state + crmd: Ensure we dont get stuck in S_PENDING if we loose an election to someone that never talks to us again + crmd: Fix memory leaks exposed by valgrind + crmd: Remove race condition that could lead to multiple instances of a clone being active on a machine + crmd: Send erase_status_tag() calls to the local CIB when the DC is fenced, since there is no DC to accept them + crmd: Use global fencing notifications to prevent secondary fencing operations of the DC + pengine: Bug lf#2317 - Avoid needless restart of primitive depending on a clone + pengine: Bug lf#2361 - Ensure clones observe mandatory ordering constraints if the LHS is unrunnable + pengine: Bug lf#2383 - Combine failcounts for all instances of an anonymous clone on a host + pengine: Bug lf#2384 - Fix intra-set colocation and ordering + pengine: Bug lf#2403 - Enforce mandatory promotion (colocation) constraints + pengine: Bug lf#2412 - Correctly find clone instances by their prefix + pengine: Do not be so quick to pull the trigger on nodes that are coming up + pengine: Fix memory leaks exposed by valgrind + pengine: Rewrite native_merge_weights() to avoid Fix use-after-free + Shell: Bug bnc#590035 - always reload status if working with the cluster + Shell: Bug bnc#592762 - Default to using the status section from the live CIB + Shell: Bug lf#2315 - edit multiple meta_attributes sets in resource management + Shell: Bug lf#2221 - enable comments + Shell: Bug bnc#580492 - implement new cibstatus interface and commands + Shell: Bug bnc#585471 - new cibstatus import command + Shell: check timeouts also against the default-action-timeout property + Shell: new configure filter command + Tools: crm_mon - fix memory leaks exposed by valgrind * Tue Feb 16 2010 Andrew Beekhof - 1.1.1-1 - First public release of Pacemaker 1.1 - Package reference documentation in a doc subpackage - Move cts into a subpackage so that it can be easily consumed by others - Update source tarball to revision: 17d9cd4ee29f + New stonith daemon that supports global notifications + Service placement influenced by the physical resources + A new tool for simulating failures and the cluster’s reaction to them + Ability to serialize an otherwise unrelated a set of resource actions (eg. Xen migrations) * Mon Jan 18 2010 Andrew Beekhof - 1.0.7-1 - Update source tarball to revision: 2eed906f43e9 (stable-1.0) tip - Statistics: Changesets: 193 Diff: 220 files changed, 15933 insertions(+), 8782 deletions(-) - Changes since 1.0.5-4 + pengine: Bug 2213 - Ensure groups process location constraints so that clone-node-max works for cloned groups + pengine: Bug lf#2153 - non-clones should not restart when clones stop/start on other nodes + pengine: Bug lf#2209 - Clone ordering should be able to prevent startup of dependant clones + pengine: Bug lf#2216 - Correctly identify the state of anonymous clones when deciding when to probe + pengine: Bug lf#2225 - Operations that require fencing should wait for 'stonith_complete' not 'all_stopped'. + pengine: Bug lf#2225 - Prevent clone peers from stopping while another is instance is (potentially) being fenced + pengine: Correctly anti-colocate with a group + pengine: Correctly unpack ordering constraints for resource sets to avoid graph loops + Tools: crm: load help from crm_cli.txt + Tools: crm: resource sets (bnc#550923) + Tools: crm: support for comments (LF 2221) + Tools: crm: support for description attribute in resources/operations (bnc#548690) + Tools: hb2openais: add EVMS2 CSM processing (and other changes) (bnc#548093) + Tools: hb2openais: do not allow empty rules, clones, or groups (LF 2215) + Tools: hb2openais: refuse to convert pure EVMS volumes + cib: Ensure the loop for login message terminates + cib: Finally fix reliability of receiving large messages over remote plaintext connections + cib: Fix remote notifications + cib: For remote connections, default to CRM_DAEMON_USER since thats the only one that the cib can validate the password for using PAM + cib: Remote plaintext - Retry sending parts of the message that did not fit the first time + crmd: Ensure batch-limit is correctly enforced + crmd: Ensure we have the latest status after a transition abort + (bnc#547579,547582): Tools: crm: status section editing support + shell: Add allow-migrate as allowed meta-attribute (bnc#539968) + Medium: Build: Do not automatically add -L/lib, it could cause 64-bit arches to break + Medium: pengine: Bug lf#2206 - rsc_order constraints always use score at the top level + Medium: pengine: Only complain about target-role=master for non m/s resources + Medium: pengine: Prevent non-multistate resources from being promoted through target-role + Medium: pengine: Provide a default action for resource-set ordering + Medium: pengine: Silently fix requires=fencing for stonith resources so that it can be set in op_defaults + Medium: Tools: Bug lf#2286 - Allow the shell to accept template parameters on the command line + Medium: Tools: Bug lf#2307 - Provide a way to determin the nodeid of past cluster members + Medium: Tools: crm: add update method to template apply (LF 2289) + Medium: Tools: crm: direct RA interface for ocf class resource agents (LF 2270) + Medium: Tools: crm: direct RA interface for stonith class resource agents (LF 2270) + Medium: Tools: crm: do not add score which does not exist + Medium: Tools: crm: do not consider warnings as errors (LF 2274) + Medium: Tools: crm: do not remove sets which contain id-ref attribute (LF 2304) + Medium: Tools: crm: drop empty attributes elements + Medium: Tools: crm: exclude locations when testing for pathological constraints (LF 2300) + Medium: Tools: crm: fix exit code on single shot commands + Medium: Tools: crm: fix node delete (LF 2305) + Medium: Tools: crm: implement -F (--force) option + Medium: Tools: crm: rename status to cibstatus (LF 2236) + Medium: Tools: crm: revisit configure commit + Medium: Tools: crm: stay in crm if user specified level only (LF 2286) + Medium: Tools: crm: verify changes on exit from the configure level + Medium: ais: Some clients such as gfs_controld want a cluster name, allow one to be specified in corosync.conf + Medium: cib: Clean up logic for receiving remote messages + Medium: cib: Create valid notification control messages + Medium: cib: Indicate where the remote connection came from + Medium: cib: Send password prompt to stderr so that stdout can be redirected + Medium: cts: Fix rsh handling when stdout is not required + Medium: doc: Fill in the section on removing a node from an AIS-based cluster + Medium: doc: Update the docs to reflect the 0.6/1.0 rolling upgrade problem + Medium: doc: Use Publican for docbook based documentation + Medium: fencing: stonithd: add metadata for stonithd instance attributes (and support in the shell) + Medium: fencing: stonithd: ignore case when comparing host names (LF 2292) + Medium: tools: Make crm_mon functional with remote connections + Medium: xml: Add stopped as a supported role for operations + Medium: xml: Bug bnc#552713 - Treat node unames as text fields not IDs + Medium: xml: Bug lf#2215 - Create an always-true expression for empty rules when upgrading from 0.6 * Thu Oct 29 2009 Andrew Beekhof - 1.0.5-4 - Include the fixes from CoroSync integration testing - Move the resource templates - they are not documentation - Ensure documentation is placed in a standard location - Exclude documentation that is included elsewhere in the package - Update the tarball from upstream to version ee19d8e83c2a + cib: Correctly clean up when both plaintext and tls remote ports are requested + pengine: Bug bnc#515172 - Provide better defaults for lt(e) and gt(e) comparisions + pengine: Bug lf#2197 - Allow master instances placemaker to be influenced by colocation constraints + pengine: Make sure promote/demote pseudo actions are created correctly + pengine: Prevent target-role from promoting more than master-max instances + ais: Bug lf#2199 - Prevent expected-quorum-votes from being populated with garbage + ais: Prevent deadlock - dont try to release IPC message if the connection failed + cib: For validation errors, send back the full CIB so the client can display the errors + cib: Prevent use-after-free for remote plaintext connections + crmd: Bug lf#2201 - Prevent use-of-NULL when running heartbeat * Wed Oct 13 2009 Andrew Beekhof - 1.0.5-3 - Update the tarball from upstream to version 38cd629e5c3c + Core: Bug lf#2169 - Allow dtd/schema validation to be disabled + pengine: Bug lf#2106 - Not all anonymous clone children are restarted after configuration change + pengine: Bug lf#2170 - stop-all-resources option had no effect + pengine: Bug lf#2171 - Prevent groups from starting if they depend on a complex resource which can not + pengine: Disable resource management if stonith-enabled=true and no stonith resources are defined + pengine: do not include master score if it would prevent allocation + ais: Avoid excessive load by checking for dead children every 1s (instead of 100ms) + ais: Bug rh#525589 - Prevent shutdown deadlocks when running on CoroSync + ais: Gracefully handle changes to the AIS nodeid + crmd: Bug bnc#527530 - Wait for the transition to complete before leaving S_TRANSITION_ENGINE + crmd: Prevent use-after-free with LOG_DEBUG_3 + Medium: xml: Mask the "symmetrical" attribute on rsc_colocation constraints (bnc#540672) + Medium (bnc#520707): Tools: crm: new templates ocfs2 and clvm + Medium: Build: Invert the disable ais/heartbeat logic so that --without (ais|heartbeat) is available to rpmbuild + Medium: pengine: Bug lf#2178 - Indicate unmanaged clones + Medium: pengine: Bug lf#2180 - Include node information for all failed ops + Medium: pengine: Bug lf#2189 - Incorrect error message when unpacking simple ordering constraint + Medium: pengine: Correctly log resources that would like to start but can not + Medium: pengine: Stop ptest from logging to syslog + Medium: ais: Include version details in plugin name + Medium: crmd: Requery the resource metadata after every start operation * Fri Aug 21 2009 Tomas Mraz - 1.0.5-2.1 - rebuilt with new openssl * Wed Aug 19 2009 Andrew Beekhof - 1.0.5-2 - Add versioned perl dependency as specified by https://fedoraproject.org/wiki/Packaging/Perl#Packages_that_link_to_libperl - No longer remove RPATH data, it prevents us finding libperl.so and no other libraries were being hardcoded - Compile in support for heartbeat - Conditionally add heartbeat-devel and corosynclib-devel to the -devel requirements depending on which stacks are supported * Mon Aug 17 2009 Andrew Beekhof - 1.0.5-1 - Add dependency on resource-agents - Use the version of the configure macro that supplies --prefix, --libdir, etc - Update the tarball from upstream to version 462f1569a437 (Pacemaker 1.0.5 final) + Tools: crm_resource - Advertise --move instead of --migrate + Medium: Extra: New node connectivity RA that uses system ping and attrd_updater + Medium: crmd: Note that dc-deadtime can be used to mask the brokeness of some switches * Tue Aug 11 2009 Ville Skyttä - 1.0.5-0.7.c9120a53a6ae.hg - Use bzipped upstream tarball. * Wed Jul 29 2009 Andrew Beekhof - 1.0.5-0.6.c9120a53a6ae.hg - Add back missing build auto* dependancies - Minor cleanups to the install directive * Tue Jul 28 2009 Andrew Beekhof - 1.0.5-0.5.c9120a53a6ae.hg - Add a leading zero to the revision when alphatag is used * Tue Jul 28 2009 Andrew Beekhof - 1.0.5-0.4.c9120a53a6ae.hg - Incorporate the feedback from the cluster-glue review - Realistically, the version is a 1.0.5 pre-release - Use the global directive instead of define for variables - Use the haclient/hacluster group/user instead of daemon - Use the _configure macro - Fix install dependancies * Fri Jul 24 2009 Andrew Beekhof - 1.0.4-3 - Initial Fedora checkin - Include an AUTHORS and license file in each package - Change the library package name to pacemaker-libs to be more Fedora compliant - Remove execute permissions from xml related files - Reference the new cluster-glue devel package name - Update the tarball from upstream to version c9120a53a6ae + pengine: Only prevent migration if the clone dependency is stopping/starting on the target node + pengine: Bug 2160 - Dont shuffle clones due to colocation + pengine: New implementation of the resource migration (not stop/start) logic + Medium: Tools: crm_resource - Prevent use-of-NULL by requiring a resource name for the -A and -a options + Medium: pengine: Prevent use-of-NULL in find_first_action() * Tue Jul 14 2009 Andrew Beekhof - 1.0.4-2 - Reference authors from the project AUTHORS file instead of listing in description - Change Source0 to reference the Mercurial repo - Cleaned up the summaries and descriptions - Incorporate the results of Fedora package self-review * Thu Jun 04 2009 Andrew Beekhof - 1.0.4-1 - Update source tarball to revision: 1d87d3e0fc7f (stable-1.0) - Statistics: Changesets: 209 Diff: 266 files changed, 12010 insertions(+), 8276 deletions(-) - Changes since Pacemaker-1.0.3 + (bnc#488291): ais: do not rely on byte endianness on ptr cast + (bnc#507255): Tools: crm: delete rsc/op_defaults (these meta_attributes are killing me) + (bnc#507255): Tools: crm: import properly rsc/op_defaults + (LF 2114): Tools: crm: add support for operation instance attributes + ais: Bug lf#2126 - Messages replies cannot be routed to transient clients + ais: Fix compilation for the latest Corosync API (v1719) + attrd: Do not perform all updates as complete refreshes + cib: Fix huge memory leak affecting heartbeat-based clusters + Core: Allow xpath queries to match attributes + Core: Generate the help text directly from a tool options struct + Core: Handle differences in 0.6 messaging format + crmd: Bug lf#2120 - All transient node attribute updates need to go via attrd + crmd: Correctly calculate how long an FSA action took to avoid spamming the logs with errors + crmd: Fix another large memory leak affecting Heartbeat based clusters + lha: Restore compatability with older versions + pengine: Bug bnc#495687 - Filesystem is not notified of successful STONITH under some conditions + pengine: Make running a cluster with STONITH enabled but no STONITH resources an error and provide details on resolutions + pengine: Prevent use-ofNULL when using resource ordering sets + pengine: Provide inter-notification ordering guarantees + pengine: Rewrite the notification code to be understanable and extendable + Tools: attrd - Prevent race condition resulting in the cluster forgetting the node wishes to shut down + Tools: crm: regression tests + Tools: crm_mon - Fix smtp notifications + Tools: crm_resource - Repair the ability to query meta attributes + Low Build: Bug lf#2105 - Debian package should contain pacemaker doc and crm templates + Medium (bnc#507255): Tools: crm: handle empty rsc/op_defaults properly + Medium (bnc#507255): Tools: crm: use the right obj_type when creating objects from xml nodes + Medium (LF 2107): Tools: crm: revisit exit codes in configure + Medium: cib: Do not bother validating updates that only affect the status section + Medium: Core: Include supported stacks in version information + Medium: crmd: Record in the CIB, the cluster infrastructure being used + Medium: cts: Do not combine crm_standby arguments - the wrapper can not process them + Medium: cts: Fix the CIBAusdit class + Medium: Extra: Refresh showscores script from Dominik + Medium: pengine: Build a statically linked version of ptest + Medium: pengine: Correctly log the actions for resources that are being recovered + Medium: pengine: Correctly log the occurance of promotion events + Medium: pengine: Implememt node health based on a patch from Mark Hamzy + Medium: Tools: Add examples to help text outputs + Medium: Tools: crm: catch syntax errors for configure load + Medium: Tools: crm: implement erasing nodes in configure erase + Medium: Tools: crm: work with parents only when managing xml objects + Medium: Tools: crm_mon - Add option to run custom notification program on resource operations (Patch by Dominik Klein) + Medium: Tools: crm_resource - Allow --cleanup to function on complex resources and cluster-wide + Medium: Tools: haresource2cib.py - Patch from horms to fix conversion error + Medium: Tools: Include stack information in crm_mon output + Medium: Tools: Two new options (--stack,--constraints) to crm_resource for querying how a resource is configured * Wed Apr 08 2009 Andrew Beekhof - 1.0.3-1 - Update source tarball to revision: b133b3f19797 (stable-1.0) tip - Statistics: Changesets: 383 Diff: 329 files changed, 15471 insertions(+), 15119 deletions(-) - Changes since Pacemaker-1.0.2 + Added tag SLE11-HAE-GMC for changeset 9196be9830c2 + ais plugin: Fix quorum calculation (bnc#487003) + ais: Another memory fix leak in error path + ais: Bug bnc#482847, bnc#482905 - Force a clean exit of OpenAIS once Pacemaker has finished unloading + ais: Bug bnc#486858 - Fix update_member() to prevent spamming clients with membership events containing no changes + ais: Centralize all quorum calculations in the ais plugin and allow expected votes to be configured int he cib + ais: Correctly handle a return value of zero from openais_dispatch_recv() + ais: Disable logging to a file + ais: Fix memory leak in error path + ais: IPC messages are only in scope until a response is sent + All signal handlers used with CL_SIGNAL() need to be as minimal as possible + cib: Bug bnc#482885 - Simplify CIB disk-writes to prevent data loss. Required a change to the backup filename format + cib: crmd: Revert part of 9782ab035003. Complex shutdown routines need G_main_add_SignalHandler to avoid race coditions + crm: Avoid infinite loop during crm configure edit (bnc#480327) + crmd: Avoid a race condition by waiting for the attrd update to trigger a transition automatically + crmd: Bug bnc#480977 - Prevent extra, partial, shutdown when a node restarts too quickly + crmd: Bug bnc#480977 - Prevent extra, partial, shutdown when a node restarts too quickly (verified) + crmd: Bug bnc#489063 - Ensure the DC is always unset after we 'loose' an election + crmd: Bug BSC#479543 - Correctly find the migration source for timed out migrate_from actions + crmd: Call crm_peer_init() before we start the FSA - prevents a race condition when used with Heartbeat + crmd: Erasing the status section should not be forced to the local node + crmd: Fix memory leak in cib notication processing code + crmd: Fix memory leak in transition graph processing + crmd: Fix memory leaks found by valgrind + crmd: More memory leaks fixes found by valgrind + fencing: stonithd: is_heartbeat_cluster is a no-no if there is no heartbeat support + pengine: Bug bnc#466788 - Exclude nodes that can not run resources + pengine: Bug bnc#466788 - Make colocation based on node attributes work + pengine: Bug BNC#478687 - Do not crash when clone-max is 0 + pengine: Bug bnc#488721 - Fix id-ref expansion for clones, the doc-root for clone children is not the cib root + pengine: Bug bnc#490418 - Correctly determine node state for nodes wishing to be terminated + pengine: Bug LF#2087 - Correctly parse the state of anonymous clones that have multiple instances on a given node + pengine: Bug lf#2089 - Meta attributes are not inherited by clone children + pengine: Bug lf#2091 - Correctly restart modified resources that were found active by a probe + pengine: Bug lf#2094 - Fix probe ordering for cloned groups + pengine: Bug LF:2075 - Fix large pingd memory leaks + pengine: Correctly attach orphaned clone children to their parent + pengine: Correctly handle terminate node attributes that are set to the output from time() + pengine: Ensure orphaned clone members are hooked up to the parent when clone-max=0 + pengine: Fix memory leak in LogActions + pengine: Fix the determination of whether a group is active + pengine: Look up the correct promotion preference for anonymous masters + pengine: Simplify handling of start failures by changing the default migration-threshold to INFINITY + pengine: The ordered option for clones no longer causes extra start/stop operations + RA: Bug bnc#490641 - Shut down dlm_controld with -TERM instead of -KILL + RA: pingd: Set default ping interval to 1 instead of 0 seconds + Resources: pingd - Correctly tell the ping daemon to shut down + Tools: Bug bnc#483365 - Ensure the command from cluster_test includes a value for --log-facility + Tools: cli: fix and improve delete command + Tools: crm: add and implement templates + Tools: crm: add support for command aliases and some common commands (i.e. cd,exit) + Tools: crm: create top configuration nodes if they are missing + Tools: crm: fix parsing attributes for rules (broken by the previous changeset) + Tools: crm: new ra set of commands + Tools: crm: resource agents information management + Tools: crm: rsc/op_defaults + Tools: crm: support for no value attribute in nvpairs + Tools: crm: the new configure monitor command + Tools: crm: the new configure node command + Tools: crm_mon - Prevent use-of-NULL when summarizing an orphan + Tools: hb2openais: create clvmd clone for respawn evmsd in ha.cf + Tools: hb2openais: fix a serious recursion bug in xml node processing + Tools: hb2openais: fix ocfs2 processing + Tools: pingd - prevent double free of getaddrinfo() output in error path + Tools: The default re-ping interval for pingd should be 1s not 1ms + Medium (bnc#479049): Tools: crm: add validation of resource type for the configure primitive command + Medium (bnc#479050): Tools: crm: add help for RA parameters in tab completion + Medium (bnc#479050): Tools: crm: add tab completion for primitive params/meta/op + Medium (bnc#479050): Tools: crm: reimplement cluster properties completion + Medium (bnc#486968): Tools: crm: listnodes function requires no parameters (do not mix completion with other stuff) + Medium: ais: Remove the ugly hack for dampening AIS membership changes + Medium: cib: Fix memory leaks by using mainloop_add_signal + Medium: cib: Move more logging to the debug level (was info) + Medium: cib: Overhaul the processing of synchronous replies + Medium: Core: Add library functions for instructing the cluster to terminate nodes + Medium: crmd: Add new expected-quorum-votes option + Medium: crmd: Allow up to 5 retires when an attrd update fails + Medium: crmd: Automatically detect and use new values for crm_config options + Medium: crmd: Bug bnc#490426 - Escalated shutdowns stall when there are pending resource operations + Medium: crmd: Clean up and optimize the DC election algorithm + Medium: crmd: Fix memory leak in shutdown + Medium: crmd: Fix memory leaks spotted by Valgrind + Medium: crmd: Ingore join messages from hosts other than our DC + Medium: crmd: Limit the scope of resource updates to the status section + Medium: crmd: Prevent the crmd from being respawned if its told to shut down when it did not ask to be + Medium: crmd: Re-check the election status after membership events + Medium: crmd: Send resource updates via the local CIB during elections + Medium: pengine: Bug bnc#491441 - crm_mon does not display operations returning 'uninstalled' correctly + Medium: pengine: Bug lf#2101 - For location constraints, role=Slave is equivalent to role=Started + Medium: pengine: Clean up the API - removed ->children() and renamed ->find_child() to fine_rsc() + Medium: pengine: Compress the display of healthy anonymous clones + Medium: pengine: Correctly log the actions for resources that are being recovered + Medium: pengine: Determin a promotion score for complex resources + Medium: pengine: Ensure clones always have a value for globally-unique + Medium: pengine: Prevent orphan clones from being allocated + Medium: RA: controld: Return proper exit code for stop op. + Medium: Tools: Bug bnc#482558 - Fix logging test in cluster_test + Medium: Tools: Bug bnc#482828 - Fix quoting in cluster_test logging setup + Medium: Tools: Bug bnc#482840 - Include directory path to CTSlab.py + Medium: Tools: crm: add more user input checks + Medium: Tools: crm: do not check resource status of we are working with a shadow + Medium: Tools: crm: fix id-refs and allow reference to top objects (i.e. primitive) + Medium: Tools: crm: ignore comments in the CIB + Medium: Tools: crm: multiple column output would not work with small lists + Medium: Tools: crm: refuse to delete running resources + Medium: Tools: crm: rudimentary if-else for templates + Medium: Tools: crm: Start/stop clones via target-role. + Medium: Tools: crm_mon - Compress the node status for healthy and offline nodes + Medium: Tools: crm_shadow - Return 0/cib_ok when --create-empty succeeds + Medium: Tools: crm_shadow - Support -e, the short form of --create-empty + Medium: Tools: Make attrd quieter + Medium: Tools: pingd - Avoid using various clplumbing functions as they seem to leak + Medium: Tools: Reduce pingd logging * Mon Feb 16 2009 Andrew Beekhof - 1.0.2-1 - Update source tarball to revision: d232d19daeb9 (stable-1.0) tip - Statistics: Changesets: 441 Diff: 639 files changed, 20871 insertions(+), 21594 deletions(-) - Changes since Pacemaker-1.0.1 + (bnc#450815): Tools: crm cli: do not generate id for the operations tag + ais: Add support for the new AIS IPC layer + ais: Always set header.error to the correct default: SA_AIS_OK + ais: Bug BNC#456243 - Ensure the membership cache always contains an entry for the local node + ais: Bug BNC:456208 - Prevent deadlocks by not logging in the child process before exec() + ais: By default, disable supprt for the WIP openais IPC patch + ais: Detect and handle situations where ais and the crm disagree on the node name + ais: Ensure crm_peer_seq is updated after a membership update + ais: Make sure all IPC header fields are set to sane defaults + ais: Repair and streamline service load now that whitetank startup functions correctly + build: create and install doc files + cib: Allow clients without mainloop to connect to the cib + cib: CID:18 - Fix use-of-NULL in cib_perform_op + cib: CID:18 - Repair errors introduced in b5a18704477b - Fix use-of-NULL in cib_perform_op + cib: Ensure diffs contain the correct values of admin_epoch + cib: Fix four moderately sized memory leaks detected by Valgrind + Core: CID:10 - Prevent indexing into an array of schemas with a negative value + Core: CID:13 - Fix memory leak in log_data_element + Core: CID:15 - Fix memory leak in crm_get_peer + Core: CID:6 - Fix use-of-NULL in copy_ha_msg_input + Core: Fix crash in the membership code preventing node shutdown + Core: Fix more memory leaks foudn by valgrind + Core: Prevent unterminated strings after decompression + crmd: Bug BNC:467995 - Delay marking STONITH operations complete until STONITH tells us so + crmd: Bug LF:1962 - Do not NACK peers because they are not (yet) in our membership. Just ignore them. + crmd: Bug LF:2010 - Ensure fencing cib updates create the node_state entry if needed to preent re-fencing during cluster startup + crmd: Correctly handle reconnections to attrd + crmd: Ensure updates for lost migrate operations indicate which node it tried to migrating to + crmd: If there are no nodes to finalize, start an election. + crmd: If there are no nodes to welcome, start an election. + crmd: Prevent node attribute loss by detecting attrd disconnections immediately + crmd: Prevent node re-probe loops by ensuring manditory actions always complete + pengine: Bug 2005 - Fix startup ordering of cloned stonith groups + pengine: Bug 2006 - Correctly reprobe cloned groups + pengine: Bug BNC:465484 - Fix the no-quorum-policy=suicide option + pengine: Bug LF:1996 - Correctly process disabled monitor operations + pengine: CID:19 - Fix use-of-NULL in determine_online_status + pengine: Clones now default to globally-unique=false + pengine: Correctly calculate the number of available nodes for the clone to use + pengine: Only shoot online nodes with no-quorum-policy=suicide + pengine: Prevent on-fail settings being ignored after a resource is successfully stopped + pengine: Prevent use-of-NULL for failed migrate actions in process_rsc_state() + pengine: Remove an optimization for the terminate node attribute that caused the cluster to block indefinitly + pengine: Repar the ability to colocate based on node attributes other than uname + pengine: Start the correct monitor operation for unmanaged masters + stonith: CID:3 - Fix another case of exceptionally poor error handling by the original stonith developers + stonith: CID:5 - Checking for NULL and then dereferencing it anyway is an interesting approach to error handling + stonithd: Sending IPC to the cluster is a privileged operation + stonithd: wrong checks for shmid (0 is a valid id) + Tools: attrd - Correctly determine when an attribute has stopped changing and should be committed to the CIB + Tools: Bug 2003 - pingd does not correctly detect failures when the interface is down + Tools: Bug 2003 - pingd does not correctly handle node-down events on multi-NIC systems + Tools: Bug 2021 - pingd does not detect sequence wrapping correctly, incorrectly reports nodes offline + Tools: Bug BNC:468066 - Do not use the result of uname() when its no longer in scope + Tools: Bug BNC:473265 - crm_resource -L dumps core + Tools: Bug LF:2001 - Transient node attributes should be set via attrd + Tools: Bug LF:2036 - crm_resource cannot set/get parameters for cloned resources + Tools: Bug LF:2046 - Node attribute updates are lost because attrd can take too long to start + Tools: Cause the correct clone instance to be failed with crm_resource -F + Tools: cluster_test - Allow the user to select a stack and fix CTS invocation + Tools: crm cli: allow rename only if the resource is stopped + Tools: crm cli: catch system errors on file operations + Tools: crm cli: completion for ids in configure + Tools: crm cli: drop '-rsc' from attributes for order constraint + Tools: crm cli: exit with an appropriate exit code + Tools: crm cli: fix wrong order of action and resource in order constraint + Tools: crm cli: fox wrong exit code + Tools: crm cli: improve handling of cib attributes + Tools: crm cli: new command: configure rename + Tools: crm cli: new command: configure upgrade + Tools: crm cli: new command: node delete + Tools: crm cli: prevent key errors on missing cib attributes + Tools: crm cli: print long help for help topics + Tools: crm cli: return on syntax error when parsing score + Tools: crm cli: rsc_location can be without nvpairs + Tools: crm cli: short node preference location constraint + Tools: crm cli: sometimes, on errors, level would change on single shot use + Tools: crm cli: syntax: drop a bunch of commas (remains of help tables conversion) + Tools: crm cli: verify user input for sanity + Tools: crm: find expressions within rules (do not always skip xml nodes due to used id) + Tools: crm_master should not define a set id now that attrd is used. Defining one can break lookups + Tools: crm_mon Use the OID assigned to the project by IANA for SNMP traps + Medium (bnc#445622): Tools: crm cli: improve the node show command and drop node status + Medium (LF 2009): stonithd: improve timeouts for remote fencing + Medium: ais: Allow dead peers to be removed from membership calculations + Medium: ais: Pass node deletion events on to clients + Medium: ais: Sanitize ipc usage + Medium: ais: Supply the node uname in addtion to the id + Medium: Build: Clean up configure to ensure NON_FATAL_CFLAGS is consistent with CFLAGS (ie. includes -g) + Medium: Build: Install cluster_test + Medium: Build: Use more restrictive CFLAGS and fix the resulting errors + Medium: cib: CID:20 - Fix potential use-after-free in cib_native_signon + Medium: Core: Bug BNC:474727 - Set a maximum time to wait for IPC messages + Medium: Core: CID:12 - Fix memory leak in decode_transition_magic error path + Medium: Core: CID:14 - Fix memory leak in calculate_xml_digest error path + Medium: Core: CID:16 - Fix memory leak in date_to_string error path + Medium: Core: Try to track down the cause of XML parsing errors + Medium: crmd: Bug BNC:472473 - Do not wait excessive amounts of time for lost actions + Medium: crmd: Bug BNC:472473 - Reduce the transition timeout to action_timeout+network_delay + Medium: crmd: Do not fast-track the processing of LRM refreshes when there are pending actions. + Medium: crmd: do_dc_join_filter_offer - Check the 'join' message is for the current instance before deciding to NACK peers + Medium: crmd: Find option values without having to do a config upgrade + Medium: crmd: Implement shutdown using a transient node attribute + Medium: crmd: Update the crmd options to use dashes instead of underscores + Medium: cts: Add 'cluster reattach' to the suite of automated regression tests + Medium: cts: cluster_test - Make some usability enhancements + Medium: CTS: cluster_test - suggest a valid port number + Medium: CTS: Fix python import order + Medium: cts: Implement an automated SplitBrain test + Medium: CTS: Remove references to deleted classes + Medium: Extra: Resources - Use HA_VARRUN instead of HA_RSCTMP for state files as Heartbeat removes HA_RSCTMP at startup + Medium: HB: Bug 1933 - Fake crmd_client_status_callback() calls because HB does not provide them for already running processes + Medium: pengine: CID:17 - Fix memory leak in find_actions_by_task error path + Medium: pengine: CID:7,8 - Prevent hypothetical use-of-NULL in LogActions + Medium: pengine: Defer logging the actions performed on a resource until we have processed ordering constraints + Medium: pengine: Remove the symmetrical attribute of colocation constraints + Medium: Resources: pingd - fix the meta defaults + Medium: Resources: Stateful - Add missing meta defaults + Medium: stonithd: exit if we the pid file cannot be locked + Medium: Tools: Allow attrd clients to specify the ID the attribute should be created with + Medium: Tools: attrd - Allow attribute updates to be performed from a hosts peer + Medium: Tools: Bug LF:1994 - Clean up crm_verify return codes + Medium: Tools: Change the pingd defaults to ping hosts once every second (instead of 5 times every 10 seconds) + Medium: Tools: cibmin - Detect resource operations with a view to providing email/snmp/cim notification + Medium: Tools: crm cli: add back symmetrical for order constraints + Medium: Tools: crm cli: generate role in location when converting from xml + Medium: Tools: crm cli: handle shlex exceptions + Medium: Tools: crm cli: keep order of help topics + Medium: Tools: crm cli: refine completion for ids in configure + Medium: Tools: crm cli: replace inf with INFINITY + Medium: Tools: crm cli: streamline cib load and parsing + Medium: Tools: crm cli: supply provider only for ocf class primitives + Medium: Tools: crm_mon - Add support for sending mail notifications of resource events + Medium: Tools: crm_mon - Include the DC version in status summary + Medium: Tools: crm_mon - Sanitize startup and option processing + Medium: Tools: crm_mon - switch to event-driven updates and add support for sending snmp traps + Medium: Tools: crm_shadow - Replace the --locate option with the saner --edit + Medium: Tools: hb2openais: do not remove Evmsd resources, but replace them with clvmd + Medium: Tools: hb2openais: replace crmadmin with crm_mon + Medium: Tools: hb2openais: replace the lsb class with ocf for o2cb + Medium: Tools: hb2openais: reuse code + Medium: Tools: LF:2029 - Display an error if crm_resource is used to reset the operation history of non-primitive resources + Medium: Tools: Make pingd resilient to attrd failures + Medium: Tools: pingd - fix the command line switches + Medium: Tools: Rename ccm_tool to crm_node * Tue Nov 18 2008 Andrew Beekhof - 1.0.1-1 - Update source tarball to revision: 6fc5ce8302ab (stable-1.0) tip - Statistics: Changesets: 170 Diff: 816 files changed, 7633 insertions(+), 6286 deletions(-) - Changes since Pacemaker-1.0.1 + ais: Allow the crmd to get callbacks whenever a node state changes + ais: Create an option for starting the mgmtd daemon automatically + ais: Ensure HA_RSCTMP exists for use by resource agents + ais: Hook up the openais.conf config logging options + ais: Zero out the PID of disconnecting clients + cib: Ensure global updates cause a disk write when appropriate + Core: Add an extra snaity check to getXpathResults() to prevent segfaults + Core: Do not redefine __FUNCTION__ unnecessarily + Core: Repair the ability to have comments in the configuration + crmd: Bug:1975 - crmd should wait indefinitely for stonith operations to complete + crmd: Ensure PE processing does not occur for all error cases in do_pe_invoke_callback + crmd: Requests to the CIB should cause any prior PE calculations to be ignored + heartbeat: Wait for membership 'up' events before removing stale node status data + pengine: Bug LF:1988 - Ensure recurring operations always have the correct target-rc set + pengine: Bug LF:1988 - For unmanaged resources we need to skip the usual can_run_resources() checks + pengine: Ensure the terminate node attribute is handled correctly + pengine: Fix optional colocation + pengine: Improve up the detection of 'new' nodes joining the cluster + pengine: Prevent assert failures in master_color() by ensuring unmanaged masters are always reallocated to their current location + Tools: crm cli: parser: return False on syntax error and None for comments + Tools: crm cli: unify template and edit commands + Tools: crm_shadow - Show more line number information after validation failures + Tools: hb2openais: add option to upgrade the CIB to v3.0 + Tools: hb2openais: add U option to getopts and update usage + Tools: hb2openais: backup improved and multiple fixes + Tools: hb2openais: fix class/provider reversal + Tools: hb2openais: fix testing + Tools: hb2openais: move the CIB update to the end + Tools: hb2openais: update logging and set logfile appropriately + Tools: LF:1969 - Attrd never sets any properties in the cib + Tools: Make attrd functional on OpenAIS + Medium: ais: Hook up the options for specifying the expected number of nodes and total quorum votes + Medium: ais: Look for pacemaker options inside the service block with 'name: pacemaker' instead of creating an addtional configuration block + Medium: ais: Provide better feedback when nodes change nodeids (in openais.conf) + Medium: cib: Always store cib contents on disk with num_updates=0 + Medium: cib: Ensure remote access ports are cleaned up on shutdown + Medium: crmd: Detect deleted resource operations automatically + Medium: crmd: Erase a nodes resource operations and transient attributes after a successful STONITH + Medium: crmd: Find a more appropriate place to update quorum and refresh attrd attributes + Medium: crmd: Fix the handling of unexpected PE exits to ensure the current CIB is stored + Medium: crmd: Fix the recording of pending operations in the CIB + Medium: crmd: Initiate an attrd refresh _after_ the status section has been fully repopulated + Medium: crmd: Only the DC should update quorum in an openais cluster + Medium: Ensure meta attributes are used consistantly + Medium: pengine: Allow group and clone level resource attributes + Medium: pengine: Bug N:437719 - Ensure scores from colocated resources count when allocating groups + Medium: pengine: Prevent lsb scripts from being used in globally unique clones + Medium: pengine: Make a best-effort guess at a migration threshold for people with 0.6 configs + Medium: Resources: controld - ensure we are part of a clone with globally_unique=false + Medium: Tools: attrd - Automatically refresh all attributes after a CIB replace operation + Medium: Tools: Bug LF:1985 - crm_mon - Correctly process failed cib queries to allow reconnection after cluster restarts + Medium: Tools: Bug LF:1987 - crm_verify incorrectly warns of configuration upgrades for the most recent version + Medium: Tools: crm (bnc#441028): check for key error in attributes management + Medium: Tools: crm_mon - display the meaning of the operation rc code instead of the status + Medium: Tools: crm_mon - Fix the display of timing data + Medium: Tools: crm_verify - check that we are being asked to validate a complete config + Medium: xml: Relax the restriction on the contents of rsc_locaiton.node * Thu Oct 16 2008 Andrew Beekhof - 1.0.0-1 - Update source tarball to revision: 388654dfef8f tip - Statistics: Changesets: 261 Diff: 3021 files changed, 244985 insertions(+), 111596 deletions(-) - Changes since f805e1b30103 + add the crm cli program + ais: Move the service id definition to a common location and make sure it is always used + build: rename hb2openais.sh to .in and replace paths with vars + cib: Implement --create for crm_shadow + cib: Remove dead files + Core: Allow the expected number of quorum votes to be configrable + Core: cl_malloc and friends were removed from Heartbeat + Core: Only call xmlCleanupParser() if we parsed anything. Doing so unconditionally seems to cause a segfault + hb2openais.sh: improve pingd handling; several bugs fixed + hb2openais: fix clone creation; replace EVMS strings + new hb2openais.sh conversion script + pengine: Bug LF:1950 - Ensure the current values for all notification variables are always set (even if empty) + pengine: Bug LF:1955 - Ensure unmanaged masters are unconditionally repromoted to ensure they are monitored correctly. + pengine: Bug LF:1955 - Fix another case of filtering causing unmanaged master failures + pengine: Bug LF:1955 - Umanaged mode prevents master resources from being allocated correctly + pengine: Bug N:420538 - Anit-colocation caused a positive node preference + pengine: Correctly handle unmanaged resources to prevent them from being started elsewhere + pengine: crm_resource - Fix the --migrate command + pengine: MAke stonith-enabled default to true and warn if no STONITH resources are found + pengine: Make sure orphaned clone children are created correctly + pengine: Monitors for unmanaged resources do not need to wait for start/promote/demote actions to complete + stonithd (LF 1951): fix remote stonith operations + stonithd: fix handling of timeouts + stonithd: fix logic for stonith resource priorities + stonithd: implement the fence-timeout instance attribute + stonithd: initialize value before reading fence-timeout + stonithd: set timeouts for fencing ops to the timeout of the start op + stonithd: stonith rsc priorities (new feature) + Tools: Add hb2openais - a tool for upgrading a Heartbeat cluster to use OpenAIS instead + Tools: crm_verify - clean up the upgrade logic to prevent crash on invalid configurations + Tools: Make pingd functional on Linux + Update version numbers for 1.0 candidates + Medium: ais: Add support for a synchronous call to retrieve the nodes nodeid + Medium: ais: Use the agreed service number + Medium: Build: Reliably detect heartbeat libraries during configure + Medium: Build: Supply prototypes for libreplace functions when needed + Medium: Build: Teach configure how to find corosync + Medium: Core: Provide better feedback if Pacemaker is started by a stack it does not support + Medium: crmd: Avoid calling GHashTable functions with NULL + Medium: crmd: Delay raising I_ERROR when the PE exits until we have had a chance to save the current CIB + Medium: crmd: Hook up the stonith-timeout option to stonithd + Medium: crmd: Prevent potential use-of-NULL in global_timer_callback + Medium: crmd: Rationalize the logging of graph aborts + Medium: pengine: Add a stonith_timeout option and remove new options that are better set in rsc_defaults + Medium: pengine: Allow external entities to ask for a node to be shot by creating a terminate=true transient node attribute + Medium: pengine: Bug LF:1950 - Notifications do not contain all documented resource state fields + Medium: pengine: Bug N:417585 - Do not restart group children whos individual score drops below zero + Medium: pengine: Detect clients that disconnect before receiving their reply + Medium: pengine: Implement a true maintenance mode + Medium: pengine: Implement on-fail=standby for NTT. Derived from a patch by Satomi TANIGUCHI + Medium: pengine: Print the correct message when stonith is disabled + Medium: pengine: ptest - check the input is valid before proceeding + Medium: pengine: Revert group stickiness to the 'old way' + Medium: pengine: Use the correct attribute for action 'requires' (was prereq) + Medium: stonithd: Fix compilation without full heartbeat install + Medium: stonithd: exit with better code on empty host list + Medium: tools: Add a new regression test for CLI tools + Medium: tools: crm_resource - return with non-zero when a resource migration command is invalid + Medium: tools: crm_shadow - Allow the admin to start with an empty CIB (and no cluster connection) + Medium: xml: pacemaker-0.7 is now an alias for the 1.0 schema * Mon Sep 22 2008 Andrew Beekhof - 0.7.3-1 - Update source tarball to revision: 33e677ab7764+ tip - Statistics: Changesets: 133 Diff: 89 files changed, 7492 insertions(+), 1125 deletions(-) - Changes since f805e1b30103 + Tools: add the crm cli program + Core: cl_malloc and friends were removed from Heartbeat + Core: Only call xmlCleanupParser() if we parsed anything. Doing so unconditionally seems to cause a segfault + new hb2openais.sh conversion script + pengine: Bug LF:1950 - Ensure the current values for all notification variables are always set (even if empty) + pengine: Bug LF:1955 - Ensure unmanaged masters are unconditionally repromoted to ensure they are monitored correctly. + pengine: Bug LF:1955 - Fix another case of filtering causing unmanaged master failures + pengine: Bug LF:1955 - Umanaged mode prevents master resources from being allocated correctly + pengine: Bug N:420538 - Anit-colocation caused a positive node preference + pengine: Correctly handle unmanaged resources to prevent them from being started elsewhere + pengine: crm_resource - Fix the --migrate command + pengine: MAke stonith-enabled default to true and warn if no STONITH resources are found + pengine: Make sure orphaned clone children are created correctly + pengine: Monitors for unmanaged resources do not need to wait for start/promote/demote actions to complete + stonithd (LF 1951): fix remote stonith operations + Tools: crm_verify - clean up the upgrade logic to prevent crash on invalid configurations + Medium: ais: Add support for a synchronous call to retrieve the nodes nodeid + Medium: ais: Use the agreed service number + Medium: pengine: Allow external entities to ask for a node to be shot by creating a terminate=true transient node attribute + Medium: pengine: Bug LF:1950 - Notifications do not contain all documented resource state fields + Medium: pengine: Bug N:417585 - Do not restart group children whos individual score drops below zero + Medium: pengine: Implement a true maintenance mode + Medium: pengine: Print the correct message when stonith is disabled + Medium: stonithd: exit with better code on empty host list + Medium: xml: pacemaker-0.7 is now an alias for the 1.0 schema * Wed Aug 20 2008 Andrew Beekhof - 0.7.1-1 - Update source tarball to revision: f805e1b30103+ tip - Statistics: Changesets: 184 Diff: 513 files changed, 43408 insertions(+), 43783 deletions(-) - Changes since 0.7.0-19 + Fix compilation when GNUTLS isnt found + admin: Fix use-after-free in crm_mon + Build: Remove testing code that prevented heartbeat-only builds + cib: Use single quotes so that the xpath queries for nvpairs will succeed + crmd: Always connect to stonithd when the TE starts and ensure we notice if it dies + crmd: Correctly handle a dead PE process + crmd: Make sure async-failures cause the failcount to be incrimented + pengine: Bug LF:1941 - Handle failed clone instance probes when clone-max < #nodes + pengine: Parse resource ordering sets correctly + pengine: Prevent use-of-NULL - order->rsc_rh will not always be non-NULL + pengine: Unpack colocation sets correctly + Tools: crm_mon - Prevent use-of-NULL for orphaned resources + Medium: ais: Add support for a synchronous call to retrieve the nodes nodeid + Medium: ais: Allow transient clients to receive membership updates + Medium: ais: Avoid double-free in error path + Medium: ais: Include in the mebership nodes for which we have not determined their hostname + Medium: ais: Spawn the PE from the ais plugin instead of the crmd + Medium: cib: By default, new configurations use the latest schema + Medium: cib: Clean up the CIB if it was already disconnected + Medium: cib: Only incriment num_updates if something actually changed + Medium: cib: Prevent use-after-free in client after abnormal termination of the CIB + Medium: Core: Fix memory leak in xpath searches + Medium: Core: Get more details regarding parser errors + Medium: Core: Repair expand_plus_plus - do not call char2score on unexpanded values + Medium: Core: Switch to the libxml2 parser - its significantly faster + Medium: Core: Use a libxml2 library function for xml -> text conversion + Medium: crmd: Asynchronous failure actions have no parameters + Medium: crmd: Avoid calling glib functions with NULL + Medium: crmd: Do not allow an election to promote a node from S_STARTING + Medium: crmd: Do not vote if we have not completed the local startup + Medium: crmd: Fix te_update_diff() now that get_object_root() functions differently + Medium: crmd: Fix the lrmd xpath expressions to not contain quotes + Medium: crmd: If we get a join offer during an election, better restart the election + Medium: crmd: No further processing is needed when using the LRMs API call for failing resources + Medium: crmd: Only update have-quorum if the value changed + Medium: crmd: Repair the input validation logic in do_te_invoke + Medium: cts: CIBs can no longer contain comments + Medium: cts: Enable a bunch of tests that were incorrectly disabled + Medium: cts: The libxml2 parser wont allow v1 resources to use integers as parameter names + Medium: Do not use the cluster UID and GID directly. Look them up based on the configured value of HA_CCMUSER + Medium: Fix compilation when heartbeat is not supported + Medium: pengine: Allow groups to be involved in optional ordering constraints + Medium: pengine: Allow sets of operations to be reused by multiple resources + Medium: pengine: Bug LF:1941 - Mark extra clone instances as orphans and do not show inactive ones + Medium: pengine: Determin the correct migration-threshold during resource expansion + Medium: pengine: Implement no-quorum-policy=suicide (FATE #303619) + Medium: pengine: Clean up resources after stopping old copies of the PE + Medium: pengine: Teach the PE how to stop old copies of itself + Medium: Tools: Backport hb_report updates + Medium: Tools: cib_shadow - On create, spawn a new shell with CIB_shadow and PS1 set accordingly + Medium: Tools: Rename cib_shadow to crm_shadow * Fri Jul 18 2008 Andrew Beekhof - 0.7.0-19 - Update source tarball to revision: 007c3a1c50f5 (unstable) tip - Statistics: Changesets: 108 Diff: 216 files changed, 4632 insertions(+), 4173 deletions(-) - Changes added since unstable-0.7 + admin: Fix use-after-free in crm_mon + ais: Change the tag for the ais plugin to "pacemaker" (used in openais.conf) + ais: Log terminated processes as an error + cib: Performance - Reorganize things to avoid calculating the XML diff twice + pengine: Bug LF:1941 - Handle failed clone instance probes when clone-max < #nodes + pengine: Fix memory leak in action2xml + pengine: Make OCF_ERR_ARGS a node-level error rather than a cluster-level one + pengine: Properly handle clones that are not installed on all nodes + Medium: admin: cibadmin - Show any validation errors if the upgrade failed + Medium: admin: cib_shadow - Implement --locate to display the underlying filename + Medium: admin: cib_shadow - Implement a --diff option + Medium: admin: cib_shadow - Implement a --switch option + Medium: admin: crm_resource - create more compact constraints that do not use lifetime (which is deprecated) + Medium: ais: Approximate born_on for OpenAIS based clusters + Medium: cib: Remove do_id_check, it is a poor substitute for ID validation by a schema + Medium: cib: Skip construction of pre-notify messages if no-one wants one + Medium: Core: Attempt to streamline some key functions to increase performance + Medium: Core: Clean up XML parser after validation + Medium: crmd: Detect and optimize the CRMs behavior when processing diffs of an LRM refresh + Medium: Fix memory leaks when resetting the name of an XML object + Medium: pengine: Prefer the current location if it is one of a group of nodes with the same (highest) score * Wed Jun 25 2008 Andrew Beekhof - 0.7.0-1 - Update source tarball to revision: bde0c7db74fb tip - Statistics: Changesets: 439 Diff: 676 files changed, 41310 insertions(+), 52071 deletions(-) - Changes added since stable-0.6 + A new tool for setting up and invoking CTS + Admin: All tools now use --node (-N) for specifying node unames + Admin: All tools now use --xml-file (-x) and --xml-text (-X) for specifying where to find XML blobs + cib: Cleanup the API - remove redundant input fields + cib: Implement CIB_shadow - a facility for making and testing changes before uploading them to the cluster + cib: Make registering per-op callbacks an API call and renamed (for clarity) the API call for requesting notifications + Core: Add a facility for automatically upgrading old configurations + Core: Adopt libxml2 as the XML processing library - all external clients need to be recompiled + Core: Allow sending TLS messages larger than the MTU + Core: Fix parsing of time-only ISO dates + Core: Smarter handling of XML values containing quotes + Core: XML memory corruption - catch, and handle, cases where we are overwriting an attribute value with itself + Core: The xml ID type does not allow UUIDs that start with a number + Core: Implement XPath based versions of query/delete/replace/modify + Core: Remove some HA2.0.(3,4) compatability code + crmd: Overhaul the detection of nodes that are starting vs. failed + pengine: Bug LF:1459 - Allow failures to expire + pengine: Have the PE do non-persistent configuration upgrades before performing calculations + pengine: Replace failure-stickiness with a simple 'migration-threshold' + tengine: Simplify the design by folding the tengine process into the crmd + Medium: Admin: Bug LF:1438 - Allow the list of all/active resource operations to be queried by crm_resource + Medium: Admin: Bug LF:1708 - crm_resource should print a warning if an attribute is already set as a meta attribute + Medium: Admin: Bug LF:1883 - crm_mon should display fail-count and operation history + Medium: Admin: Bug LF:1883 - crm_mon should display operation timing data + Medium: Admin: Bug N:371785 - crm_resource -C does not also clean up fail-count attributes + Medium: Admin: crm_mon - include timing data for failed actions + Medium: ais: Read options from the environment since objdb is not completely usable yet + Medium: cib: Add sections for op_defaults and rsc_defaults + Medium: cib: Better matching notification callbacks (for detecting duplicates and removal) + Medium: cib: Bug LF:1348 - Allow rules and attribute sets to be referenced for use in other objects + Medium: cib: BUG LF:1918 - By default, all cib calls now timeout after 30s + Medium: cib: Detect updates that decrease the version tuple + Medium: cib: Implement a client-side operation timeout - Requires LHA update + Medium: cib: Implement callbacks and async notifications for remote connections + Medium: cib: Make cib->cmds->update() an alias for modify at the API level (also implemented in cibadmin) + Medium: cib: Mark the CIB as disconnected if the IPC connection is terminated + Medium: cib: New call option 'cib_can_create' which can be passed to modify actions - allows the object to be created if it does not exist yet + Medium: cib: Reimplement get|set|delete attributes using XPath + Medium: cib: Remove some useless parts of the API + Medium: cib: Remove the 'attributes' scaffolding from the new format + Medium: cib: Implement the ability for clients to connect to remote servers + Medium: Core: Add support for validating xml against RelaxNG schemas + Medium: Core: Allow more than one item to be modified/deleted in XPath based operations + Medium: Core: Fix the sort_pairs function for creating sorted xml objects + Medium: Core: iso8601 - Implement subtract_duration and fix subtract_time + Medium: Core: Reduce the amount of xml copying occuring + Medium: Core: Support value='value+=N' XML updates (in addtion to value='value++') + Medium: crmd: Add support for lrm_ops->fail_rsc if its available + Medium: crmd: HB - watch link status for node leaving events + Medium: crmd: Bug LF:1924 - Improved handling of lrmd disconnects and shutdowns + Medium: crmd: Do not wait for actions with a start_delay over 5 minutes. Confirm them immediately + Medium: pengine: Bug LF:1328 - Do not fencing nodes in clusters without managed resources + Medium: pengine: Bug LF:1461 - Give transient node attributes (in ) preference over persistent ones (in ) + Medium: pengine: Bug LF:1884, Bug LF:1885 - Implement N:M ordering and colocation constraints + Medium: pengine: Bug LF:1886 - Create a resource and operation 'defaults' config section + Medium: pengine: Bug LF:1892 - Allow recurring actions to be triggered at known times + Medium: pengine: Bug LF:1926 - Probes should complete before stop actions are invoked + Medium: pengine: Fix the standby when its set as a transient attribute + Medium: pengine: Implement a global 'stop-all-resources' option + Medium: pengine: Implement cibpipe, a tool for performing/simulating config changes "offline" + Medium: pengine: We do not allow colocation with specific clone instances + Medium: Tools: pingd - Implement a stack-independant version of pingd + Medium: xml: Ship an xslt for upgrading from 0.6 to 0.7 * Thu Jun 19 2008 Andrew Beekhof - 0.6.5-1 - Update source tarball to revision: b9fe723d1ac5 tip - Statistics: Changesets: 48 Diff: 37 files changed, 1204 insertions(+), 234 deletions(-) - Changes since Pacemaker-0.6.4 + Admin: Repair the ability to delete failcounts + ais: Audit IPC handling between the AIS plugin and CRM processes + ais: Have the plugin create needed /var/lib directories + ais: Make sure the sync and async connections are assigned correctly (not swapped) + cib: Correctly detect configuration changes - num_updates does not count + pengine: Apply stickiness values to the whole group, not the individual resources + pengine: Bug N:385265 - Ensure groups are migrated instead of remaining partially active on the current node + pengine: Bug N:396293 - Enforce manditory group restarts due to ordering constraints + pengine: Correctly recover master instances found active on more than one node + pengine: Fix memory leaks reported by Valgrind + Medium: Admin: crm_mon - Misc improvements from Satomi Taniguchi + Medium: Bug LF:1900 - Resource stickiness should not allow placement in asynchronous clusters + Medium: crmd: Ensure joins are completed promptly when a node taking part dies + Medium: pengine: Avoid clone instance shuffling in more cases + Medium: pengine: Bug LF:1906 - Remove an optimization in native_merge_weights() causing group scores to behave eratically + Medium: pengine: Make use of target_rc data to correctly process resource operations + Medium: pengine: Prevent a possible use of NULL in sort_clone_instance() + Medium: tengine: Include target rc in the transition key - used to correctly determin operation failure * Thu May 22 2008 Andrew Beekhof - 0.6.4-1 - Update source tarball to revision: 226d8e356924 tip - Statistics: Changesets: 55 Diff: 199 files changed, 7103 insertions(+), 12378 deletions(-) - Changes since Pacemaker-0.6.3 + crmd: Bug LF:1881 LF:1882 - Overhaul the logic for operation cancelation and deletion + crmd: Bug LF:1894 - Make sure cancelled recurring operations are cleaned out from the CIB + pengine: Bug N:387749 - Colocation with clones causes unnecessary clone instance shuffling + pengine: Ensure 'master' monitor actions are cancelled _before_ we demote the resource + pengine: Fix assert failure leading to core dump - make sure variable is properly initialized + pengine: Make sure 'slave' monitoring happens after the resource has been demoted + pengine: Prevent failure stickiness underflows (where too many failures become a _positive_ preference) + Medium: Admin: crm_mon - Only complain if the output file could not be opened + Medium: Common: filter_action_parameters - enable legacy handling only for older versions + Medium: pengine: Bug N:385265 - The failure stickiness of group children is ignored until it reaches -INFINITY + Medium: pengine: Implement master and clone colocation by exlcuding nodes rather than setting ones score to INFINITY (similar to cs: 756afc42dc51) + Medium: tengine: Bug LF:1875 - Correctly find actions to cancel when their node leaves the cluster * Wed Apr 23 2008 Andrew Beekhof - 0.6.3-1 - Update source tarball to revision: fd8904c9bc67 tip - Statistics: Changesets: 117 Diff: 354 files changed, 19094 insertions(+), 11338 deletions(-) - Changes since Pacemaker-0.6.2 + Admin: Bug LF:1848 - crm_resource - Pass set name and id to delete_resource_attr() in the correct order + Build: SNMP has been moved to the management/pygui project + crmd: Bug LF1837 - Unmanaged resources prevent crmd from shutting down + crmd: Prevent use-after-free in lrm interface code (Patch based on work by Keisuke MORI) + pengine: Allow the cluster to make progress by not retrying failed demote actions + pengine: Anti-colocation with slave should not prevent master colocation + pengine: Bug LF 1768 - Wait more often for STONITH ops to complete before starting resources + pengine: Bug LF1836 - Allow is-managed-default=false to be overridden by individual resources + pengine: Bug LF185 - Prevent pointless master/slave instance shuffling by ignoring the master-pref of stopped instances + pengine: Bug N-191176 - Implement interleaved ordering for clone-to-clone scenarios + pengine: Bug N-347004 - Ensure clone notifications are always sent when an instance is stopped/started + pengine: Bug N-347004 - Include notification ordering is correct for interleaved clones + pengine: Bug PM-11 - Directly link probe_complete to starting clone instances + pengine: Bug PM1 - Fix setting failcounts when applied to complex resources + pengine: Bug PM12, LF1648 - Extensive revision of group ordering + pengine: Bug PM7 - Ensure masters are always demoted before they are stopped + pengine: Create probes after allocation to allow smarter handling of anonymous clones + pengine: Do not prioritize clone instances that must be moved + pengine: Fix error in previous commit that allowed more than the required number of masters to be promoted + pengine: Group start ordering fixes + pengine: Implement promote/demote ordering for cloned groups + tengine: Repair failcount updates + tengine: Use the correct offset when updating failcount + Medium: Admin: Add a summary output that can be easily parsed by CTS for audit purposes + Medium: Build: Make configure fail if bz2 or libxml2 are not present + Medium: Build: Re-instate a better default for LCRSODIR + Medium: CIB: Bug LF-1861 - Filter irrelvant error status from synchronous CIB clients + Medium: Core: Bug 1849 - Invalid conversion of ordinal leap year to gregorian date + Medium: Core: Drop compataibility code for 2.0.4 and 2.0.5 clusters + Medium: crmd: Bug LF-1860 - Automatically cancel recurring ops before demote and promote operations (not only stops) + Medium: crmd: Save the current CIB contents if we detect the PE crashed + Medium: pengine: Bug LF:1866 - Fix version check when applying compatability handling for failed start operations + Medium: pengine: Bug LF:1866 - Restore the ability to have start failures not be fatal + Medium: pengine: Bug PM1 - Failcount applies to all instances of non-unique clone + Medium: pengine: Correctly set the state of partially active master/slave groups + Medium: pengine: Do not claim to be stopping an already stopped orphan + Medium: pengine: Ensure implies_left ordering constraints are always effective + Medium: pengine: Indicate each resources 'promotion' score + Medium: pengine: Prevent a possible use-of-NULL + Medium: pengine: Reprocess the current action if it changed (so that any prior dependancies are updated) + Medium: tengine: Bug LF-1859 - Wait for fail-count updates to complete before terminating the transition + Medium: tengine: Bug LF:1859 - Do not abort graphs due to our own failcount updates + Medium: tengine: Bug LF:1859 - Prevent the TE from interupting itself * Thu Feb 14 2008 Andrew Beekhof - 0.6.2-1 - Update source tarball to revision: 28b1a8c1868b tip - Statistics: Changesets: 11 Diff: 7 files changed, 58 insertions(+), 18 deletions(-) - Changes since Pacemaker-0.6.1 + haresources2cib.py: set default-action-timeout to the default (20s) + haresources2cib.py: update ra parameters lists + Medium: SNMP: Allow the snmp subagent to be built (patch from MATSUDA, Daiki) + Medium: Tools: Make sure the autoconf variables in haresources2cib are expanded * Tue Feb 12 2008 Andrew Beekhof - 0.6.1-1 - Update source tarball to revision: e7152d1be933 tip - Statistics: Changesets: 25 Diff: 37 files changed, 1323 insertions(+), 227 deletions(-) - Changes since Pacemaker-0.6.0 + CIB: Ensure changes to top-level attributes (like admin_epoch) cause a disk write + CIB: Ensure the archived file hits the disk before returning + CIB: Repair the ability to do 'atomic incriment' updates (value="value++") + crmd: Bug #7 - Connecting to the crmd immediately after startup causes use-of-NULL + Medium: CIB: Mask cib_diff_resync results from the caller - they do not need to know + Medium: crmd: Delay starting the IPC server until we are fully functional + Medium: CTS: Fix the startup patterns + Medium: pengine: Bug 1820 - Allow the first resource in a group to be migrated + Medium: pengine: Bug 1820 - Check the colocation dependancies of resources to be migrated * Mon Jan 14 2008 Andrew Beekhof - 0.6.0-1 - This is the first release of the Pacemaker Cluster Resource Manager formerly part of Heartbeat. - For those looking for the GUI, mgmtd, CIM or TSA components, they are now found in the new pacemaker-pygui project. Build dependancies prevent them from being included in Heartbeat (since the built-in CRM is no longer supported) and, being non-core components, are not included with Pacemaker. - Update source tarball to revision: c94b92d550cf - Statistics: Changesets: 347 Diff: 2272 files changed, 132508 insertions(+), 305991 deletions(-) - Test hardware: + 6-node vmware cluster (sles10-sp1/256Mb/vmware stonith) on a single host (opensuse10.3/2Gb/2.66Ghz Quad Core2) + 7-node EMC Centera cluster (sles10/512Mb/2Ghz Xeon/ssh stonith) - Notes: Heartbeat Stack + All testing was performed with STONITH enabled + The CRM was enabled using the "crm respawn" directive - Notes: OpenAIS Stack + This release contains a preview of support for the OpenAIS cluster stack + The current release of the OpenAIS project is missing two important patches that we require. OpenAIS packages containing these patches are available for most major distributions at: http://download.opensuse.org/repositories/server:/ha-clustering + The OpenAIS stack is not currently recommended for use in clusters that have shared data as STONITH support is not yet implimented + pingd is not yet available for use with the OpenAIS stack + 3 significant OpenAIS issues were found during testing of 4 and 6 node clusters. We are activly working together with the OpenAIS project to get these resolved. - Pending bugs encountered during testing: + OpenAIS #1736 - Openais membership took 20s to stabilize + Heartbeat #1750 - ipc_bufpool_update: magic number in head does not match + OpenAIS #1793 - Assertion failure in memb_state_gather_enter() + OpenAIS #1796 - Cluster message corruption - Changes since Heartbeat-2.1.2-24 + Add OpenAIS support + Admin: crm_uuid - Look in the right place for Heartbeat UUID files + admin: Exit and indicate a problem if the crmd exits while crmadmin is performing a query + cib: Fix CIB_OP_UPDATE calls that modify the whole CIB + cib: Fix compilation when supporting the heartbeat stack + cib: Fix memory leaks caused by the switch to get_message_xml() + cib: HA_VALGRIND_ENABLED needs to be set _and_ set to 1|yes|true + cib: Use get_message_xml() in preference to cl_get_struct() + cib: Use the return value from call to write() in cib_send_plaintext() + Core: ccm nodes can legitimately have a node id of 0 + Core: Fix peer-process tracking for the Heartbeat stack + Core: Heartbeat does not send status notifications for nodes that were already part of the cluster. Fake them instead + CRM: Add children to HA_Messages such that the field name matches F_XML_TAGNAME + crm: Adopt a more flexible appraoch to enabling Valgrind + crm: Fix compilation when bzip2 is not installed + CRM: Future-proof get_message_xml() + crmd: Filter election responses based on time not FSA state + crmd: Handle all possible peer states in crmd_ha_status_callback() + crmd: Make sure the current date/time is set - prevents use-of-NULL when evaluating rules + crmd: Relax an assertion regrading ccm membership instances + crmd: Use (node->processes&crm_proc_ais) to accurately update the CIB after replace operations + crmd: Heartbeat: Accurately record peer client status + pengine: Bug 1777 - Allow colocation with a resource in the Stopped state + pengine: Bug 1822 - Prevent use-of-NULL in PromoteRsc() + pengine: Implement three recovery policies based on op_status and op_rc + pengine: Parse fail-count correctly (it may be set to ININFITY) + pengine: Prevent graph-loop when stonith agents need to be moved around before a STONITH op + pengine: Prevent graph-loops when two operations have the same name+interval + tengine: Cancel active timers when destroying graphs + tengine: Ensure failcount is set correctly for failed stops/starts + tengine: Update failcount for oeprations that time out + Medium: admin: Prevent hang in crm_mon -1 when there is no cib connection - Patch from Junko IKEDA + Medium: cib: Require --force|-f when performing potentially dangerous commands with cibadmin + Medium: cib: Tweak the shutdown code + Medium: Common: Only count peer processes of active nodes + Medium: Core: Create generic cluster sign-in method + Medium: core: Fix compilation when Heartbeat support is disabled + Medium: Core: General cleanup for supporting two stacks + Medium: Core: iso6601 - Support parsing of time-only strings + Medium: core: Isolate more code that is only needed when SUPPORT_HEARTBEAT is enabled + Medium: crm: Improved logging of errors in the XML parser + Medium: crmd: Fix potential use-of-NULL in string comparison + Medium: crmd: Reimpliment syncronizing of CIB queries and updates when invoking the PE + Medium: crm_mon: Indicate when a node is both in standby mode and offline + Medium: pengine: Bug 1822 - Do not try an promote groups if not all of it is active + Medium: pengine: on_fail=nothing is an alias for 'ignore' not 'restart' + Medium: pengine: Prevent a potential use-of-NULL in cron_range_satisfied() + snmp subagent: fix a problem on displaying an unmanaged group + snmp subagent: use the syslog setting + snmp: v2 support (thanks to Keisuke MORI) + snmp_subagent - made it not complain about some things if shutting down diff --git a/cib/callbacks.c b/cib/callbacks.c index 71c487e3f5..1452ded809 100644 --- a/cib/callbacks.c +++ b/cib/callbacks.c @@ -1,1622 +1,1713 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "common.h" +static unsigned long cib_local_bcast_num = 0; + typedef struct cib_local_notify_s { xmlNode *notify_src; char *client_id; gboolean from_peer; gboolean sync_reply; } cib_local_notify_t; int next_client_id = 0; + +#if SUPPORT_PLUGIN +gboolean legacy_mode = TRUE; +#else gboolean legacy_mode = FALSE; +#endif + qb_ipcs_service_t *ipcs_ro = NULL; qb_ipcs_service_t *ipcs_rw = NULL; qb_ipcs_service_t *ipcs_shm = NULL; gint cib_GCompareFunc(gconstpointer a, gconstpointer b); gboolean can_write(int flags); void send_cib_replace(const xmlNode * sync_request, const char *host); void cib_process_request(xmlNode * request, gboolean privileged, gboolean force_synchronous, gboolean from_peer, crm_client_t * cib_client); int cib_process_command(xmlNode * request, xmlNode ** reply, xmlNode ** cib_diff, gboolean privileged); gboolean cib_common_callback(qb_ipcs_connection_t * c, void *data, size_t size, gboolean privileged); static gboolean cib_read_legacy_mode(void) { static gboolean init = TRUE; static gboolean legacy = FALSE; if(init) { init = FALSE; legacy = daemon_option_enabled("cib", "legacy"); if(legacy) { crm_notice("Enabled legacy mode"); } } return legacy; } -static gboolean cib_legacy_mode(void) +gboolean cib_legacy_mode(void) { +#if SUPPORT_PLUGIN + return TRUE; +#endif + if(cib_read_legacy_mode()) { return TRUE; } return legacy_mode; } static int32_t cib_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { crm_trace("Connection %p", c); if (cib_shutdown_flag) { crm_info("Ignoring new client [%d] during shutdown", crm_ipcs_client_pid(c)); return -EPERM; } if (crm_client_new(c, uid, gid) == NULL) { return -EIO; } return 0; } static void cib_ipc_created(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); } static int32_t cib_ipc_dispatch_rw(qb_ipcs_connection_t * c, void *data, size_t size) { crm_client_t *client = crm_client_get(c); crm_trace("%p message from %s", c, client->id); return cib_common_callback(c, data, size, TRUE); } static int32_t cib_ipc_dispatch_ro(qb_ipcs_connection_t * c, void *data, size_t size) { crm_client_t *client = crm_client_get(c); crm_trace("%p message from %s", c, client->id); return cib_common_callback(c, data, size, FALSE); } /* Error code means? */ static int32_t cib_ipc_closed(qb_ipcs_connection_t * c) { crm_client_t *client = crm_client_get(c); if (client == NULL) { return 0; } crm_trace("Connection %p", c); crm_client_destroy(client); return 0; } static void cib_ipc_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); cib_ipc_closed(c); if (cib_shutdown_flag) { cib_shutdown(0); } } struct qb_ipcs_service_handlers ipc_ro_callbacks = { .connection_accept = cib_ipc_accept, .connection_created = cib_ipc_created, .msg_process = cib_ipc_dispatch_ro, .connection_closed = cib_ipc_closed, .connection_destroyed = cib_ipc_destroy }; struct qb_ipcs_service_handlers ipc_rw_callbacks = { .connection_accept = cib_ipc_accept, .connection_created = cib_ipc_created, .msg_process = cib_ipc_dispatch_rw, .connection_closed = cib_ipc_closed, .connection_destroyed = cib_ipc_destroy }; void cib_common_callback_worker(uint32_t id, uint32_t flags, xmlNode * op_request, crm_client_t * cib_client, gboolean privileged) { const char *op = crm_element_value(op_request, F_CIB_OPERATION); if (crm_str_eq(op, CRM_OP_REGISTER, TRUE)) { if (flags & crm_ipc_client_response) { xmlNode *ack = create_xml_node(NULL, __FUNCTION__); crm_xml_add(ack, F_CIB_OPERATION, CRM_OP_REGISTER); crm_xml_add(ack, F_CIB_CLIENTID, cib_client->id); crm_ipcs_send(cib_client, id, ack, flags); cib_client->request_id = 0; free_xml(ack); } return; } else if (crm_str_eq(op, T_CIB_NOTIFY, TRUE)) { /* Update the notify filters for this client */ int on_off = 0; long long bit = 0; const char *type = crm_element_value(op_request, F_CIB_NOTIFY_TYPE); crm_element_value_int(op_request, F_CIB_NOTIFY_ACTIVATE, &on_off); crm_debug("Setting %s callbacks for %s (%s): %s", type, cib_client->name, cib_client->id, on_off ? "on" : "off"); if (safe_str_eq(type, T_CIB_POST_NOTIFY)) { bit = cib_notify_post; } else if (safe_str_eq(type, T_CIB_PRE_NOTIFY)) { bit = cib_notify_pre; } else if (safe_str_eq(type, T_CIB_UPDATE_CONFIRM)) { bit = cib_notify_confirm; } else if (safe_str_eq(type, T_CIB_DIFF_NOTIFY)) { bit = cib_notify_diff; } else if (safe_str_eq(type, T_CIB_REPLACE_NOTIFY)) { bit = cib_notify_replace; } if (on_off) { set_bit(cib_client->options, bit); } else { clear_bit(cib_client->options, bit); } if (flags & crm_ipc_client_response) { /* TODO - include rc */ crm_ipcs_send_ack(cib_client, id, flags, "ack", __FUNCTION__, __LINE__); } return; } cib_process_request(op_request, FALSE, privileged, FALSE, cib_client); } int32_t cib_common_callback(qb_ipcs_connection_t * c, void *data, size_t size, gboolean privileged) { uint32_t id = 0; uint32_t flags = 0; int call_options = 0; crm_client_t *cib_client = crm_client_get(c); xmlNode *op_request = crm_ipcs_recv(cib_client, data, size, &id, &flags); if (op_request) { crm_element_value_int(op_request, F_CIB_CALLOPTS, &call_options); } if (op_request == NULL) { crm_trace("Invalid message from %p", c); crm_ipcs_send_ack(cib_client, id, flags, "nack", __FUNCTION__, __LINE__); return 0; } else if(cib_client == NULL) { crm_trace("Invalid client %p", c); return 0; } if (is_set(call_options, cib_sync_call)) { CRM_ASSERT(flags & crm_ipc_client_response); CRM_LOG_ASSERT(cib_client->request_id == 0); /* This means the client has two synchronous events in-flight */ cib_client->request_id = id; /* Reply only to the last one */ } if (cib_client->name == NULL) { const char *value = crm_element_value(op_request, F_CIB_CLIENTNAME); if (value == NULL) { cib_client->name = crm_itoa(cib_client->pid); } else { cib_client->name = strdup(value); } } crm_xml_add(op_request, F_CIB_CLIENTID, cib_client->id); crm_xml_add(op_request, F_CIB_CLIENTNAME, cib_client->name); #if ENABLE_ACL CRM_ASSERT(cib_client->user != NULL); crm_acl_get_set_user(op_request, F_CIB_USER, cib_client->user); #endif crm_log_xml_trace(op_request, "Client[inbound]"); cib_common_callback_worker(id, flags, op_request, cib_client, privileged); free_xml(op_request); return 0; } static uint64_t ping_seq = 0; static char *ping_digest = NULL; static bool ping_modified_since = FALSE; int sync_our_cib(xmlNode * request, gboolean all); static gboolean cib_digester_cb(gpointer data) { if (cib_is_master) { char buffer[32]; xmlNode *ping = create_xml_node(NULL, "ping"); ping_seq++; free(ping_digest); ping_digest = NULL; ping_modified_since = FALSE; snprintf(buffer, 32, U64T, ping_seq); crm_trace("Requesting peer digests (%s)", buffer); crm_xml_add(ping, F_TYPE, "cib"); crm_xml_add(ping, F_CIB_OPERATION, CRM_OP_PING); crm_xml_add(ping, F_CIB_PING_ID, buffer); crm_xml_add(ping, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); send_cluster_message(NULL, crm_msg_cib, ping, TRUE); free_xml(ping); } return FALSE; } static void process_ping_reply(xmlNode *reply) { uint64_t seq = 0; const char *host = crm_element_value(reply, F_ORIG); xmlNode *pong = get_message_xml(reply, F_CIB_CALLDATA); const char *seq_s = crm_element_value(pong, F_CIB_PING_ID); const char *digest = crm_element_value(pong, XML_ATTR_DIGEST); if (seq_s) { seq = crm_int_helper(seq_s, NULL); } if(digest == NULL) { crm_trace("Ignoring ping reply %s from %s with no digest", seq_s, host); } else if(seq != ping_seq) { crm_trace("Ignoring out of sequence ping reply %s from %s", seq_s, host); } else if(ping_modified_since) { crm_trace("Ignoring ping reply %s from %s: cib updated since", seq_s, host); } else { const char *version = crm_element_value(pong, XML_ATTR_CRM_VERSION); if(ping_digest == NULL) { crm_trace("Calculating new digest"); ping_digest = calculate_xml_versioned_digest(the_cib, FALSE, TRUE, version); } crm_trace("Processing ping reply %s from %s (%s)", seq_s, host, digest); if(safe_str_eq(ping_digest, digest) == FALSE) { xmlNode *remote_cib = get_message_xml(pong, F_CIB_CALLDATA); crm_notice("Local CIB %s.%s.%s.%s differs from %s: %s.%s.%s.%s %p", crm_element_value(the_cib, XML_ATTR_GENERATION_ADMIN), crm_element_value(the_cib, XML_ATTR_GENERATION), crm_element_value(the_cib, XML_ATTR_NUMUPDATES), ping_digest, host, remote_cib?crm_element_value(remote_cib, XML_ATTR_GENERATION_ADMIN):"_", remote_cib?crm_element_value(remote_cib, XML_ATTR_GENERATION):"_", remote_cib?crm_element_value(remote_cib, XML_ATTR_NUMUPDATES):"_", digest, remote_cib); if(remote_cib && remote_cib->children) { /* Additional debug */ xml_calculate_changes(the_cib, remote_cib); xml_log_changes(LOG_INFO, __FUNCTION__, remote_cib); crm_trace("End of differences"); } free_xml(remote_cib); sync_our_cib(reply, FALSE); } } } static void do_local_notify(xmlNode * notify_src, const char *client_id, gboolean sync_reply, gboolean from_peer) { /* send callback to originating child */ crm_client_t *client_obj = NULL; int local_rc = pcmk_ok; int call_id = 0; crm_element_value_int(notify_src, F_CIB_CALLID, &call_id); if (client_id != NULL) { client_obj = crm_client_get_by_id(client_id); } if (client_obj == NULL) { local_rc = -ECONNRESET; crm_trace("No client to sent response %d to, F_CIB_CLIENTID not set.", call_id); } else { int rid = 0; if (sync_reply) { if (client_obj->ipcs) { CRM_LOG_ASSERT(client_obj->request_id); rid = client_obj->request_id; client_obj->request_id = 0; crm_trace("Sending response %d to %s %s", rid, client_obj->name, from_peer ? "(originator of delegated request)" : ""); } else { crm_trace("Sending response [call %d] to %s %s", call_id, client_obj->name, from_peer ? "(originator of delegated request)" : ""); } } else { crm_trace("Sending event %d to %s %s", call_id, client_obj->name, from_peer ? "(originator of delegated request)" : ""); } switch (client_obj->kind) { case CRM_CLIENT_IPC: if (crm_ipcs_send(client_obj, rid, notify_src, sync_reply?crm_ipc_flags_none:crm_ipc_server_event) < 0) { local_rc = -ENOMSG; } break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: #endif case CRM_CLIENT_TCP: crm_remote_send(client_obj->remote, notify_src); break; default: crm_err("Unknown transport %d for %s", client_obj->kind, client_obj->name); } } if (local_rc != pcmk_ok && client_obj != NULL) { crm_warn("%sSync reply to %s failed: %s", sync_reply ? "" : "A-", client_obj ? client_obj->name : "", pcmk_strerror(local_rc)); } } +static void +local_notify_destroy_callback(gpointer data) +{ + cib_local_notify_t *notify = data; + + free_xml(notify->notify_src); + free(notify->client_id); + free(notify); +} + +static void +check_local_notify(int bcast_id) +{ + cib_local_notify_t *notify = NULL; + + if (!local_notify_queue) { + return; + } + + notify = g_hash_table_lookup(local_notify_queue, GINT_TO_POINTER(bcast_id)); + + if (notify) { + do_local_notify(notify->notify_src, notify->client_id, notify->sync_reply, + notify->from_peer); + g_hash_table_remove(local_notify_queue, GINT_TO_POINTER(bcast_id)); + } +} + +static void +queue_local_notify(xmlNode * notify_src, const char *client_id, gboolean sync_reply, + gboolean from_peer) +{ + cib_local_notify_t *notify = calloc(1, sizeof(cib_local_notify_t)); + + notify->notify_src = notify_src; + notify->client_id = strdup(client_id); + notify->sync_reply = sync_reply; + notify->from_peer = from_peer; + + if (!local_notify_queue) { + local_notify_queue = g_hash_table_new_full(g_direct_hash, + g_direct_equal, NULL, + local_notify_destroy_callback); + } + + g_hash_table_insert(local_notify_queue, GINT_TO_POINTER(cib_local_bcast_num), notify); +} + static void parse_local_options_v1(crm_client_t * cib_client, int call_type, int call_options, const char *host, const char *op, gboolean * local_notify, gboolean * needs_reply, gboolean * process, gboolean * needs_forward) { if (cib_op_modifies(call_type) && !(call_options & cib_inhibit_bcast)) { /* we need to send an update anyway */ *needs_reply = TRUE; } else { *needs_reply = FALSE; } if (host == NULL && (call_options & cib_scope_local)) { crm_trace("Processing locally scoped %s op from %s", op, cib_client->name); *local_notify = TRUE; } else if (host == NULL && cib_is_master) { crm_trace("Processing master %s op locally from %s", op, cib_client->name); *local_notify = TRUE; } else if (safe_str_eq(host, cib_our_uname)) { crm_trace("Processing locally addressed %s op from %s", op, cib_client->name); *local_notify = TRUE; } else if (stand_alone) { *needs_forward = FALSE; *local_notify = TRUE; *process = TRUE; } else { crm_trace("%s op from %s needs to be forwarded to %s", op, cib_client->name, host ? host : "the master instance"); *needs_forward = TRUE; *process = FALSE; } } static void parse_local_options_v2(crm_client_t * cib_client, int call_type, int call_options, const char *host, const char *op, gboolean * local_notify, gboolean * needs_reply, gboolean * process, gboolean * needs_forward) { if (cib_op_modifies(call_type)) { if(safe_str_eq(op, CIB_OP_MASTER) || safe_str_eq(op, CIB_OP_SLAVE)) { /* Always handle these locally */ *process = TRUE; *needs_reply = FALSE; *local_notify = TRUE; *needs_forward = FALSE; return; } else { /* Redirect all other updates via CPG */ *needs_reply = TRUE; *needs_forward = TRUE; *process = FALSE; crm_trace("%s op from %s needs to be forwarded to %s", op, cib_client->name, host ? host : "the master instance"); return; } } *process = TRUE; *needs_reply = FALSE; *local_notify = TRUE; *needs_forward = FALSE; if (stand_alone) { crm_trace("Processing %s op from %s (stand-alone)", op, cib_client->name); } else if (host == NULL) { crm_trace("Processing unaddressed %s op from %s", op, cib_client->name); } else if (safe_str_eq(host, cib_our_uname)) { crm_trace("Processing locally addressed %s op from %s", op, cib_client->name); } else { crm_trace("%s op from %s needs to be forwarded to %s", op, cib_client->name, host); *needs_forward = TRUE; *process = FALSE; } } static void parse_local_options(crm_client_t * cib_client, int call_type, int call_options, const char *host, const char *op, gboolean * local_notify, gboolean * needs_reply, gboolean * process, gboolean * needs_forward) { if(cib_legacy_mode()) { parse_local_options_v1(cib_client, call_type, call_options, host, op, local_notify, needs_reply, process, needs_forward); } else { parse_local_options_v2(cib_client, call_type, call_options, host, op, local_notify, needs_reply, process, needs_forward); } } static gboolean parse_peer_options_v1(int call_type, xmlNode * request, gboolean * local_notify, gboolean * needs_reply, gboolean * process, gboolean * needs_forward) { const char *op = NULL; const char *host = NULL; const char *delegated = NULL; const char *originator = crm_element_value(request, F_ORIG); const char *reply_to = crm_element_value(request, F_CIB_ISREPLY); const char *update = crm_element_value(request, F_CIB_GLOBAL_UPDATE); gboolean is_reply = safe_str_eq(reply_to, cib_our_uname); if (crm_is_true(update)) { *needs_reply = FALSE; if (is_reply) { *local_notify = TRUE; crm_trace("Processing global/peer update from %s" " that originated from us", originator); } else { crm_trace("Processing global/peer update from %s", originator); } return TRUE; } crm_trace("Processing %s request sent by %s", op, originator); op = crm_element_value(request, F_CIB_OPERATION); if (safe_str_eq(op, "cib_shutdown_req")) { /* Always process these */ *local_notify = FALSE; if (reply_to == NULL || is_reply) { *process = TRUE; } if (is_reply) { *needs_reply = FALSE; } return *process; } if (is_reply && safe_str_eq(op, CRM_OP_PING)) { process_ping_reply(request); return FALSE; } if (is_reply) { crm_trace("Forward reply sent from %s to local clients", originator); *process = FALSE; *needs_reply = FALSE; *local_notify = TRUE; return TRUE; } host = crm_element_value(request, F_CIB_HOST); if (host != NULL && safe_str_eq(host, cib_our_uname)) { crm_trace("Processing %s request sent to us from %s", op, originator); return TRUE; } else if(is_reply == FALSE && safe_str_eq(op, CRM_OP_PING)) { crm_trace("Processing %s request sent to %s by %s", op, host?host:"everyone", originator); *needs_reply = TRUE; return TRUE; } else if (host == NULL && cib_is_master == TRUE) { crm_trace("Processing %s request sent to master instance from %s", op, originator); return TRUE; } delegated = crm_element_value(request, F_CIB_DELEGATED); if (delegated != NULL) { crm_trace("Ignoring msg for master instance"); } else if (host != NULL) { /* this is for a specific instance and we're not it */ crm_trace("Ignoring msg for instance on %s", crm_str(host)); } else if (reply_to == NULL && cib_is_master == FALSE) { /* this is for the master instance and we're not it */ crm_trace("Ignoring reply to %s", crm_str(reply_to)); } else if (safe_str_eq(op, "cib_shutdown_req")) { if (reply_to != NULL) { crm_debug("Processing %s from %s", op, host); *needs_reply = FALSE; } else { crm_debug("Processing %s reply from %s", op, host); } return TRUE; } else { crm_err("Nothing for us to do?"); crm_log_xml_err(request, "Peer[inbound]"); } return FALSE; } static gboolean parse_peer_options_v2(int call_type, xmlNode * request, gboolean * local_notify, gboolean * needs_reply, gboolean * process, gboolean * needs_forward) { const char *host = NULL; const char *delegated = crm_element_value(request, F_CIB_DELEGATED); const char *op = crm_element_value(request, F_CIB_OPERATION); const char *originator = crm_element_value(request, F_ORIG); const char *reply_to = crm_element_value(request, F_CIB_ISREPLY); const char *update = crm_element_value(request, F_CIB_GLOBAL_UPDATE); gboolean is_reply = safe_str_eq(reply_to, cib_our_uname); if(safe_str_eq(op, CIB_OP_REPLACE)) { /* sync_our_cib() sets F_CIB_ISREPLY */ if (reply_to) { delegated = reply_to; } goto skip_is_reply; } else if(safe_str_eq(op, CIB_OP_SYNC)) { } else if (is_reply && safe_str_eq(op, CRM_OP_PING)) { process_ping_reply(request); return FALSE; } else if (safe_str_eq(op, CIB_OP_UPGRADE)) { /* Only the DC (node with the oldest software) should process * this operation if F_CIB_SCHEMA_MAX is unset * * If the DC is happy it will then send out another * CIB_OP_UPGRADE which will tell all nodes to do the actual * upgrade. * * Except this time F_CIB_SCHEMA_MAX will be set which puts a * limit on how far newer nodes will go */ const char *max = crm_element_value(request, F_CIB_SCHEMA_MAX); crm_trace("Parsing %s operation%s for %s with max=%s", op, is_reply?" reply":"", cib_is_master?"master":"slave", max); if(max == NULL && cib_is_master) { /* We are the DC, check if this upgrade is allowed */ goto skip_is_reply; } else if(max) { /* Ok, go ahead and upgrade to 'max' */ goto skip_is_reply; } else { return FALSE; /* Ignore */ } } else if (crm_is_true(update)) { crm_info("Detected legacy %s global update from %s", op, originator); send_sync_request(NULL); legacy_mode = TRUE; return FALSE; } else if (is_reply && cib_op_modifies(call_type)) { crm_trace("Ignoring legacy %s reply sent from %s to local clients", op, originator); return FALSE; } else if (safe_str_eq(op, "cib_shutdown_req")) { /* Legacy handling */ crm_debug("Legacy handling of %s message from %s", op, originator); *local_notify = FALSE; if (reply_to == NULL) { *process = TRUE; } return *process; } if(is_reply) { crm_trace("Handling %s reply sent from %s to local clients", op, originator); *process = FALSE; *needs_reply = FALSE; *local_notify = TRUE; return TRUE; } skip_is_reply: *process = TRUE; *needs_reply = FALSE; if(safe_str_eq(delegated, cib_our_uname)) { *local_notify = TRUE; } else { *local_notify = FALSE; } host = crm_element_value(request, F_CIB_HOST); if (host != NULL && safe_str_eq(host, cib_our_uname)) { crm_trace("Processing %s request sent to us from %s", op, originator); *needs_reply = TRUE; return TRUE; } else if (host != NULL) { /* this is for a specific instance and we're not it */ crm_trace("Ignoring %s operation for instance on %s", op, crm_str(host)); return FALSE; } else if(is_reply == FALSE && safe_str_eq(op, CRM_OP_PING)) { *needs_reply = TRUE; } crm_trace("Processing %s request sent to everyone by %s/%s on %s %s", op, crm_element_value(request, F_CIB_CLIENTNAME), crm_element_value(request, F_CIB_CALLID), originator, (*local_notify)?"(notify)":""); return TRUE; } static gboolean parse_peer_options(int call_type, xmlNode * request, gboolean * local_notify, gboolean * needs_reply, gboolean * process, gboolean * needs_forward) { /* TODO: What happens when an update comes in after node A * requests the CIB from node B, but before it gets the reply (and * sends out the replace operation) */ if(cib_legacy_mode()) { return parse_peer_options_v1( call_type, request, local_notify, needs_reply, process, needs_forward); } else { return parse_peer_options_v2( call_type, request, local_notify, needs_reply, process, needs_forward); } } static void forward_request(xmlNode * request, crm_client_t * cib_client, int call_options) { const char *op = crm_element_value(request, F_CIB_OPERATION); const char *host = crm_element_value(request, F_CIB_HOST); crm_xml_add(request, F_CIB_DELEGATED, cib_our_uname); if (host != NULL) { crm_trace("Forwarding %s op to %s", op, host); send_cluster_message(crm_get_peer(0, host), crm_msg_cib, request, FALSE); } else { crm_trace("Forwarding %s op to master instance", op); send_cluster_message(NULL, crm_msg_cib, request, FALSE); } /* Return the request to its original state */ xml_remove_prop(request, F_CIB_DELEGATED); if (call_options & cib_discard_reply) { crm_trace("Client not interested in reply"); } } static gboolean send_peer_reply(xmlNode * msg, xmlNode * result_diff, const char *originator, gboolean broadcast) { CRM_ASSERT(msg != NULL); if (broadcast) { /* this (successful) call modified the CIB _and_ the * change needs to be broadcast... * send via HA to other nodes */ int diff_add_updates = 0; int diff_add_epoch = 0; int diff_add_admin_epoch = 0; int diff_del_updates = 0; int diff_del_epoch = 0; int diff_del_admin_epoch = 0; const char *digest = NULL; + int format = 1; CRM_LOG_ASSERT(result_diff != NULL); digest = crm_element_value(result_diff, XML_ATTR_DIGEST); + crm_element_value_int(result_diff, "format", &format); + cib_diff_version_details(result_diff, &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates); crm_trace("Sending update diff %d.%d.%d -> %d.%d.%d %s", diff_del_admin_epoch, diff_del_epoch, diff_del_updates, diff_add_admin_epoch, diff_add_epoch, diff_add_updates, digest); crm_xml_add(msg, F_CIB_ISREPLY, originator); crm_xml_add(msg, F_CIB_GLOBAL_UPDATE, XML_BOOLEAN_TRUE); crm_xml_add(msg, F_CIB_OPERATION, CIB_OP_APPLY_DIFF); - CRM_ASSERT(digest != NULL); + if (format == 1) { + CRM_ASSERT(digest != NULL); + } add_message_xml(msg, F_CIB_UPDATE_DIFF, result_diff); crm_log_xml_explicit(msg, "copy"); return send_cluster_message(NULL, crm_msg_cib, msg, TRUE); } else if (originator != NULL) { /* send reply via HA to originating node */ crm_trace("Sending request result to %s only", originator); crm_xml_add(msg, F_CIB_ISREPLY, originator); return send_cluster_message(crm_get_peer(0, originator), crm_msg_cib, msg, FALSE); } return FALSE; } void cib_process_request(xmlNode * request, gboolean force_synchronous, gboolean privileged, gboolean unused, crm_client_t * cib_client) { int call_type = 0; int call_options = 0; gboolean process = TRUE; gboolean is_update = TRUE; gboolean from_peer = TRUE; gboolean needs_reply = TRUE; gboolean local_notify = FALSE; gboolean needs_forward = FALSE; gboolean global_update = crm_is_true(crm_element_value(request, F_CIB_GLOBAL_UPDATE)); xmlNode *op_reply = NULL; xmlNode *result_diff = NULL; int rc = pcmk_ok; const char *op = crm_element_value(request, F_CIB_OPERATION); const char *originator = crm_element_value(request, F_ORIG); const char *host = crm_element_value(request, F_CIB_HOST); const char *target = NULL; const char *call_id = crm_element_value(request, F_CIB_CALLID); const char *client_id = crm_element_value(request, F_CIB_CLIENTID); const char *client_name = crm_element_value(request, F_CIB_CLIENTNAME); const char *reply_to = crm_element_value(request, F_CIB_ISREPLY); if (cib_client) { from_peer = FALSE; } cib_num_ops++; if (cib_num_ops == 0) { cib_num_fail = 0; cib_num_local = 0; cib_num_updates = 0; crm_info("Stats wrapped around"); } crm_element_value_int(request, F_CIB_CALLOPTS, &call_options); if (force_synchronous) { call_options |= cib_sync_call; } if (host != NULL && strlen(host) == 0) { host = NULL; } if (host) { target = host; } else if (call_options & cib_scope_local) { target = "local host"; } else { target = "master"; } if (from_peer) { crm_trace("Processing peer %s operation from %s/%s on %s intended for %s (reply=%s)", op, client_name, call_id, originator, target, reply_to); } else { crm_xml_add(request, F_ORIG, cib_our_uname); crm_trace("Processing local %s operation from %s/%s intended for %s", op, client_name, call_id, target); } rc = cib_get_operation_id(op, &call_type); if (rc != pcmk_ok) { /* TODO: construct error reply? */ crm_err("Pre-processing of command failed: %s", pcmk_strerror(rc)); return; } if (from_peer == FALSE) { parse_local_options(cib_client, call_type, call_options, host, op, &local_notify, &needs_reply, &process, &needs_forward); } else if (parse_peer_options(call_type, request, &local_notify, &needs_reply, &process, &needs_forward) == FALSE) { return; } is_update = cib_op_modifies(call_type); if (is_update) { cib_num_updates++; } if (call_options & cib_discard_reply) { needs_reply = is_update; local_notify = FALSE; } if (needs_forward) { const char *host = crm_element_value(request, F_CIB_HOST); const char *section = crm_element_value(request, F_CIB_SECTION); crm_info("Forwarding %s operation for section %s to %s (origin=%s/%s/%s)", op, section ? section : "'all'", host ? host : "master", originator ? originator : "local", client_name, call_id); forward_request(request, cib_client, call_options); return; } if (cib_status != pcmk_ok) { const char *call = crm_element_value(request, F_CIB_CALLID); rc = cib_status; crm_err("Operation ignored, cluster configuration is invalid." " Please repair and restart: %s", pcmk_strerror(cib_status)); op_reply = create_xml_node(NULL, "cib-reply"); crm_xml_add(op_reply, F_TYPE, T_CIB); crm_xml_add(op_reply, F_CIB_OPERATION, op); crm_xml_add(op_reply, F_CIB_CALLID, call); crm_xml_add(op_reply, F_CIB_CLIENTID, client_id); crm_xml_add_int(op_reply, F_CIB_CALLOPTS, call_options); crm_xml_add_int(op_reply, F_CIB_RC, rc); crm_trace("Attaching reply output"); add_message_xml(op_reply, F_CIB_CALLDATA, the_cib); crm_log_xml_explicit(op_reply, "cib:reply"); } else if (process) { time_t finished = 0; int now = time(NULL); int level = LOG_INFO; const char *section = crm_element_value(request, F_CIB_SECTION); cib_num_local++; rc = cib_process_command(request, &op_reply, &result_diff, privileged); if (is_update == FALSE) { level = LOG_TRACE; } else if (global_update) { switch (rc) { case pcmk_ok: level = LOG_INFO; break; case -pcmk_err_old_data: case -pcmk_err_diff_resync: case -pcmk_err_diff_failed: level = LOG_TRACE; break; default: level = LOG_ERR; } } else if (rc != pcmk_ok && is_update) { cib_num_fail++; level = LOG_WARNING; } do_crm_log(level, "Completed %s operation for section %s: %s (rc=%d, origin=%s/%s/%s, version=%s.%s.%s)", op, section ? section : "'all'", pcmk_strerror(rc), rc, originator ? originator : "local", client_name, call_id, the_cib ? crm_element_value(the_cib, XML_ATTR_GENERATION_ADMIN) : "0", the_cib ? crm_element_value(the_cib, XML_ATTR_GENERATION) : "0", the_cib ? crm_element_value(the_cib, XML_ATTR_NUMUPDATES) : "0"); finished = time(NULL); if (finished - now > 3) { crm_trace("%s operation took %ds to complete", op, finished - now); crm_write_blackbox(0, NULL); } if (op_reply == NULL && (needs_reply || local_notify)) { crm_err("Unexpected NULL reply to message"); crm_log_xml_err(request, "null reply"); needs_reply = FALSE; local_notify = FALSE; } } /* from now on we are the server */ if(is_update && cib_legacy_mode() == FALSE) { crm_trace("Completed pre-sync update from %s/%s/%s%s", originator ? originator : "local", client_name, call_id, local_notify?" with local notification":""); } else if (needs_reply == FALSE || stand_alone) { /* nothing more to do... * this was a non-originating slave update */ crm_trace("Completed slave update"); + } else if (cib_legacy_mode() && + rc == pcmk_ok && result_diff != NULL && !(call_options & cib_inhibit_bcast)) { + gboolean broadcast = FALSE; + + cib_local_bcast_num++; + crm_xml_add_int(request, F_CIB_LOCAL_NOTIFY_ID, cib_local_bcast_num); + broadcast = send_peer_reply(request, result_diff, originator, TRUE); + + if (broadcast && client_id && local_notify && op_reply) { + + /* If we have been asked to sync the reply, + * and a bcast msg has gone out, we queue the local notify + * until we know the bcast message has been received */ + local_notify = FALSE; + crm_trace("Queuing local %ssync notification for %s", + (call_options & cib_sync_call) ? "" : "a-", client_id); + + queue_local_notify(op_reply, client_id, (call_options & cib_sync_call), from_peer); + op_reply = NULL; /* the reply is queued, so don't free here */ + } + } else if (call_options & cib_discard_reply) { crm_trace("Caller isn't interested in reply"); } else if (from_peer) { if (is_update == FALSE || result_diff == NULL) { crm_trace("Request not broadcast: R/O call"); } else if (call_options & cib_inhibit_bcast) { crm_trace("Request not broadcast: inhibited"); } else if (rc != pcmk_ok) { crm_trace("Request not broadcast: call failed: %s", pcmk_strerror(rc)); } else { crm_trace("Directing reply to %s", originator); } send_peer_reply(op_reply, result_diff, originator, FALSE); } if (local_notify && client_id) { crm_trace("Performing local %ssync notification for %s", (call_options & cib_sync_call) ? "" : "a-", client_id); if (process == FALSE) { do_local_notify(request, client_id, call_options & cib_sync_call, from_peer); } else { do_local_notify(op_reply, client_id, call_options & cib_sync_call, from_peer); } } free_xml(op_reply); free_xml(result_diff); return; } int cib_process_command(xmlNode * request, xmlNode ** reply, xmlNode ** cib_diff, gboolean privileged) { xmlNode *input = NULL; xmlNode *output = NULL; xmlNode *result_cib = NULL; xmlNode *current_cib = NULL; int call_type = 0; int call_options = 0; const char *op = NULL; const char *section = NULL; const char *call_id = crm_element_value(request, F_CIB_CALLID); int rc = pcmk_ok; int rc2 = pcmk_ok; gboolean send_r_notify = FALSE; gboolean global_update = FALSE; gboolean config_changed = FALSE; gboolean manage_counters = TRUE; static mainloop_timer_t *digest_timer = NULL; CRM_ASSERT(cib_status == pcmk_ok); if(digest_timer == NULL) { digest_timer = mainloop_timer_add("digester", 5000, FALSE, cib_digester_cb, NULL); } *reply = NULL; *cib_diff = NULL; current_cib = the_cib; /* Start processing the request... */ op = crm_element_value(request, F_CIB_OPERATION); crm_element_value_int(request, F_CIB_CALLOPTS, &call_options); rc = cib_get_operation_id(op, &call_type); if (rc == pcmk_ok && privileged == FALSE) { rc = cib_op_can_run(call_type, call_options, privileged, global_update); } rc2 = cib_op_prepare(call_type, request, &input, §ion); if (rc == pcmk_ok) { rc = rc2; } if (rc != pcmk_ok) { crm_trace("Call setup failed: %s", pcmk_strerror(rc)); goto done; } else if (cib_op_modifies(call_type) == FALSE) { rc = cib_perform_op(op, call_options, cib_op_func(call_type), TRUE, section, request, input, FALSE, &config_changed, current_cib, &result_cib, NULL, &output); CRM_CHECK(result_cib == NULL, free_xml(result_cib)); goto done; } /* Handle a valid write action */ global_update = crm_is_true(crm_element_value(request, F_CIB_GLOBAL_UPDATE)); if (global_update) { /* legacy code */ manage_counters = FALSE; call_options |= cib_force_diff; crm_trace("Global update detected"); CRM_CHECK(call_type == 3 || call_type == 4, crm_err("Call type: %d", call_type); crm_log_xml_err(request, "bad op")); } if (rc == pcmk_ok) { ping_modified_since = TRUE; if (call_options & cib_inhibit_bcast) { /* skip */ crm_trace("Skipping update: inhibit broadcast"); manage_counters = FALSE; } if (is_not_set(call_options, cib_dryrun) && safe_str_eq(section, XML_CIB_TAG_STATUS)) { /* Copying large CIBs accounts for a huge percentage of our CIB usage */ call_options |= cib_zero_copy; } else { clear_bit(call_options, cib_zero_copy); } /* result_cib must not be modified after cib_perform_op() returns */ rc = cib_perform_op(op, call_options, cib_op_func(call_type), FALSE, section, request, input, manage_counters, &config_changed, current_cib, &result_cib, cib_diff, &output); if (manage_counters == FALSE) { /* Legacy code * If the diff is NULL at this point, its because nothing changed */ config_changed = cib_config_changed(NULL, NULL, cib_diff); } /* Always write to disk for replace ops, * this also negates the need to detect ordering changes */ if (crm_str_eq(CIB_OP_REPLACE, op, TRUE)) { config_changed = TRUE; } } if (rc == pcmk_ok && is_not_set(call_options, cib_dryrun)) { if(is_not_set(call_options, cib_zero_copy)) { rc = activateCibXml(result_cib, config_changed, op); } if (rc == pcmk_ok && cib_internal_config_changed(*cib_diff)) { cib_read_config(config_hash, result_cib); } if (crm_str_eq(CIB_OP_REPLACE, op, TRUE)) { if (section == NULL) { send_r_notify = TRUE; } else if (safe_str_eq(section, XML_TAG_CIB)) { send_r_notify = TRUE; } else if (safe_str_eq(section, XML_CIB_TAG_NODES)) { send_r_notify = TRUE; } else if (safe_str_eq(section, XML_CIB_TAG_STATUS)) { send_r_notify = TRUE; } } else if (crm_str_eq(CIB_OP_ERASE, op, TRUE)) { send_r_notify = TRUE; } mainloop_timer_stop(digest_timer); mainloop_timer_start(digest_timer); } else if (rc == -pcmk_err_schema_validation) { CRM_ASSERT(is_not_set(call_options, cib_zero_copy)); if (output != NULL) { crm_log_xml_info(output, "cib:output"); free_xml(output); } output = result_cib; } else { if(is_not_set(call_options, cib_zero_copy)) { free_xml(result_cib); } } if ((call_options & cib_inhibit_notify) == 0) { const char *client = crm_element_value(request, F_CIB_CLIENTNAME); crm_trace("Sending notifications"); cib_diff_notify(call_options, client, call_id, op, input, rc, *cib_diff); } if (send_r_notify) { const char *origin = crm_element_value(request, F_ORIG); cib_replace_notify(origin, the_cib, rc, *cib_diff); } xml_log_patchset(LOG_TRACE, "cib:diff", *cib_diff); done: if ((call_options & cib_discard_reply) == 0) { const char *caller = crm_element_value(request, F_CIB_CLIENTID); *reply = create_xml_node(NULL, "cib-reply"); crm_xml_add(*reply, F_TYPE, T_CIB); crm_xml_add(*reply, F_CIB_OPERATION, op); crm_xml_add(*reply, F_CIB_CALLID, call_id); crm_xml_add(*reply, F_CIB_CLIENTID, caller); crm_xml_add_int(*reply, F_CIB_CALLOPTS, call_options); crm_xml_add_int(*reply, F_CIB_RC, rc); if (output != NULL) { crm_trace("Attaching reply output"); add_message_xml(*reply, F_CIB_CALLDATA, output); } crm_log_xml_explicit(*reply, "cib:reply"); } crm_trace("cleanup"); if (cib_op_modifies(call_type) == FALSE && output != current_cib) { free_xml(output); output = NULL; } if (call_type >= 0) { cib_op_cleanup(call_type, call_options, &input, &output); } crm_trace("done"); return rc; } gint cib_GCompareFunc(gconstpointer a, gconstpointer b) { const xmlNode *a_msg = a; const xmlNode *b_msg = b; int msg_a_id = 0; int msg_b_id = 0; const char *value = NULL; value = crm_element_value_const(a_msg, F_CIB_CALLID); msg_a_id = crm_parse_int(value, NULL); value = crm_element_value_const(b_msg, F_CIB_CALLID); msg_b_id = crm_parse_int(value, NULL); if (msg_a_id == msg_b_id) { return 0; } else if (msg_a_id < msg_b_id) { return -1; } return 1; } #if SUPPORT_HEARTBEAT void cib_ha_peer_callback(HA_Message * msg, void *private_data) { xmlNode *xml = convert_ha_message(NULL, msg, __FUNCTION__); cib_peer_callback(xml, private_data); free_xml(xml); } #endif void cib_peer_callback(xmlNode * msg, void *private_data) { const char *reason = NULL; const char *originator = crm_element_value(msg, F_ORIG); if (cib_legacy_mode() && (originator == NULL || crm_str_eq(originator, cib_our_uname, TRUE))) { /* message is from ourselves */ + int bcast_id = 0; + + if (!(crm_element_value_int(msg, F_CIB_LOCAL_NOTIFY_ID, &bcast_id))) { + check_local_notify(bcast_id); + } return; } else if (crm_peer_cache == NULL) { reason = "membership not established"; goto bail; } if (crm_element_value(msg, F_CIB_CLIENTNAME) == NULL) { crm_xml_add(msg, F_CIB_CLIENTNAME, originator); } /* crm_log_xml_trace("Peer[inbound]", msg); */ cib_process_request(msg, FALSE, TRUE, TRUE, NULL); return; bail: if (reason) { const char *seq = crm_element_value(msg, F_SEQ); const char *op = crm_element_value(msg, F_CIB_OPERATION); crm_warn("Discarding %s message (%s) from %s: %s", op, seq, originator, reason); } } #if SUPPORT_HEARTBEAT extern oc_ev_t *cib_ev_token; static void *ccm_library = NULL; int (*ccm_api_callback_done) (void *cookie) = NULL; int (*ccm_api_handle_event) (const oc_ev_t * token) = NULL; void cib_client_status_callback(const char *node, const char *client, const char *status, void *private) { /* Heartbeat only */ crm_node_t *peer = NULL; if (safe_str_eq(client, CRM_SYSTEM_CIB)) { peer = crm_get_peer(0, node); if (safe_str_neq(peer->state, CRM_NODE_MEMBER)) { crm_warn("This peer is not a ccm member (yet). " "Status ignored: Client %s/%s announced status [%s]", node, client, status); return; } crm_info("Status update: Client %s/%s now has status [%s]", node, client, status); if (safe_str_eq(status, JOINSTATUS)) { status = ONLINESTATUS; } else if (safe_str_eq(status, LEAVESTATUS)) { status = OFFLINESTATUS; } crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cib, status); } return; } int cib_ccm_dispatch(gpointer user_data) { int rc = 0; oc_ev_t *ccm_token = (oc_ev_t *) user_data; crm_trace("received callback"); if (ccm_api_handle_event == NULL) { ccm_api_handle_event = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_handle_event", 1); } rc = (*ccm_api_handle_event) (ccm_token); if (0 == rc) { return 0; } crm_err("CCM connection appears to have failed: rc=%d.", rc); /* eventually it might be nice to recover and reconnect... but until then... */ crm_err("Exiting to recover from CCM connection failure"); return crm_exit(ENOTCONN); } int current_instance = 0; void cib_ccm_msg_callback(oc_ed_t event, void *cookie, size_t size, const void *data) { gboolean update_id = FALSE; const oc_ev_membership_t *membership = data; CRM_ASSERT(membership != NULL); crm_info("Processing CCM event=%s (id=%d)", ccm_event_name(event), membership->m_instance); if (current_instance > membership->m_instance) { crm_err("Membership instance ID went backwards! %d->%d", current_instance, membership->m_instance); CRM_ASSERT(current_instance <= membership->m_instance); } switch (event) { case OC_EV_MS_NEW_MEMBERSHIP: case OC_EV_MS_INVALID: update_id = TRUE; break; case OC_EV_MS_PRIMARY_RESTORED: update_id = TRUE; break; case OC_EV_MS_NOT_PRIMARY: crm_trace("Ignoring transitional CCM event: %s", ccm_event_name(event)); break; case OC_EV_MS_EVICTED: crm_err("Evicted from CCM: %s", ccm_event_name(event)); break; default: crm_err("Unknown CCM event: %d", event); } if (update_id) { unsigned int lpc = 0; CRM_CHECK(membership != NULL, return); current_instance = membership->m_instance; for (lpc = 0; lpc < membership->m_n_out; lpc++) { crm_update_ccm_node(membership, lpc + membership->m_out_idx, CRM_NODE_LOST, current_instance); } for (lpc = 0; lpc < membership->m_n_member; lpc++) { crm_update_ccm_node(membership, lpc + membership->m_memb_idx, CRM_NODE_MEMBER, current_instance); } heartbeat_cluster->llc_ops->client_status(heartbeat_cluster, NULL, crm_system_name, 0); } if (ccm_api_callback_done == NULL) { ccm_api_callback_done = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_callback_done", 1); } (*ccm_api_callback_done) (cookie); return; } #endif gboolean can_write(int flags) { return TRUE; } static gboolean cib_force_exit(gpointer data) { crm_notice("Forcing exit!"); terminate_cib(__FUNCTION__, TRUE); return FALSE; } static void disconnect_remote_client(gpointer key, gpointer value, gpointer user_data) { crm_client_t *a_client = value; crm_err("Disconnecting %s... Not implemented", crm_str(a_client->name)); } void cib_shutdown(int nsig) { struct qb_ipcs_stats srv_stats; if (cib_shutdown_flag == FALSE) { int disconnects = 0; qb_ipcs_connection_t *c = NULL; cib_shutdown_flag = TRUE; c = qb_ipcs_connection_first_get(ipcs_rw); while (c != NULL) { qb_ipcs_connection_t *last = c; c = qb_ipcs_connection_next_get(ipcs_rw, last); crm_debug("Disconnecting r/w client %p...", last); qb_ipcs_disconnect(last); qb_ipcs_connection_unref(last); disconnects++; } c = qb_ipcs_connection_first_get(ipcs_ro); while (c != NULL) { qb_ipcs_connection_t *last = c; c = qb_ipcs_connection_next_get(ipcs_ro, last); crm_debug("Disconnecting r/o client %p...", last); qb_ipcs_disconnect(last); qb_ipcs_connection_unref(last); disconnects++; } c = qb_ipcs_connection_first_get(ipcs_shm); while (c != NULL) { qb_ipcs_connection_t *last = c; c = qb_ipcs_connection_next_get(ipcs_shm, last); crm_debug("Disconnecting non-blocking r/w client %p...", last); qb_ipcs_disconnect(last); qb_ipcs_connection_unref(last); disconnects++; } disconnects += crm_hash_table_size(client_connections); crm_debug("Disconnecting %d remote clients", crm_hash_table_size(client_connections)); g_hash_table_foreach(client_connections, disconnect_remote_client, NULL); crm_info("Disconnected %d clients", disconnects); } qb_ipcs_stats_get(ipcs_rw, &srv_stats, QB_FALSE); if (crm_hash_table_size(client_connections) == 0) { crm_info("All clients disconnected (%d)", srv_stats.active_connections); initiate_exit(); } else { crm_info("Waiting on %d clients to disconnect (%d)", crm_hash_table_size(client_connections), srv_stats.active_connections); } } void initiate_exit(void) { int active = 0; xmlNode *leaving = NULL; active = crm_active_peers(); if (active < 2) { terminate_cib(__FUNCTION__, FALSE); return; } crm_info("Sending disconnect notification to %d peers...", active); leaving = create_xml_node(NULL, "exit-notification"); crm_xml_add(leaving, F_TYPE, "cib"); crm_xml_add(leaving, F_CIB_OPERATION, "cib_shutdown_req"); send_cluster_message(NULL, crm_msg_cib, leaving, TRUE); free_xml(leaving); g_timeout_add(crm_get_msec("5s"), cib_force_exit, NULL); } extern int remote_fd; extern int remote_tls_fd; void terminate_cib(const char *caller, gboolean fast) { if (remote_fd > 0) { close(remote_fd); remote_fd = 0; } if (remote_tls_fd > 0) { close(remote_tls_fd); remote_tls_fd = 0; } if (!fast) { crm_info("%s: Disconnecting from cluster infrastructure", caller); crm_cluster_disconnect(&crm_cluster); } uninitializeCib(); crm_info("%s: Exiting%s...", caller, fast ? " fast" : mainloop ? " from mainloop" : ""); if (fast == FALSE && mainloop != NULL && g_main_is_running(mainloop)) { g_main_quit(mainloop); } else { qb_ipcs_destroy(ipcs_ro); qb_ipcs_destroy(ipcs_rw); qb_ipcs_destroy(ipcs_shm); if (fast) { crm_exit(EINVAL); } else { crm_exit(pcmk_ok); } } } diff --git a/cib/callbacks.h b/cib/callbacks.h index 7549a6c19d..bca9992191 100644 --- a/cib/callbacks.h +++ b/cib/callbacks.h @@ -1,80 +1,82 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #ifdef HAVE_GNUTLS_GNUTLS_H # undef KEYFILE # include #endif extern gboolean cib_is_master; extern GHashTable *peer_hash; extern GHashTable *config_hash; /* *INDENT-OFF* */ enum cib_notifications { cib_notify_pre = 0x0001, cib_notify_post = 0x0002, cib_notify_replace = 0x0004, cib_notify_confirm = 0x0008, cib_notify_diff = 0x0010, }; /* *INDENT-ON* */ typedef struct cib_operation_s { const char *operation; gboolean modifies_cib; gboolean needs_privileges; gboolean needs_quorum; int (*prepare) (xmlNode *, xmlNode **, const char **); int (*cleanup) (int, xmlNode **, xmlNode **); int (*fn) (const char *, int, const char *, xmlNode *, xmlNode *, xmlNode *, xmlNode **, xmlNode **); } cib_operation_t; extern struct qb_ipcs_service_handlers ipc_ro_callbacks; extern struct qb_ipcs_service_handlers ipc_rw_callbacks; extern qb_ipcs_service_t *ipcs_ro; extern qb_ipcs_service_t *ipcs_rw; extern qb_ipcs_service_t *ipcs_shm; extern void cib_peer_callback(xmlNode * msg, void *private_data); extern void cib_client_status_callback(const char *node, const char *client, const char *status, void *private); extern void cib_common_callback_worker(uint32_t id, uint32_t flags, xmlNode * op_request, crm_client_t * cib_client, gboolean privileged); void cib_shutdown(int nsig); void initiate_exit(void); void terminate_cib(const char *caller, gboolean fast); +extern gboolean cib_legacy_mode(void); + #if SUPPORT_HEARTBEAT extern void cib_ha_peer_callback(HA_Message * msg, void *private_data); extern int cib_ccm_dispatch(gpointer user_data); extern void cib_ccm_msg_callback(oc_ed_t event, void *cookie, size_t size, const void *data); #endif diff --git a/cib/messages.c b/cib/messages.c index 9c66349752..363562c086 100644 --- a/cib/messages.c +++ b/cib/messages.c @@ -1,562 +1,569 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define MAX_DIFF_RETRY 5 #ifdef CIBPIPE gboolean cib_is_master = TRUE; #else gboolean cib_is_master = FALSE; #endif xmlNode *the_cib = NULL; gboolean syncd_once = FALSE; extern const char *cib_our_uname; int revision_check(xmlNode * cib_update, xmlNode * cib_copy, int flags); int get_revision(xmlNode * xml_obj, int cur_revision); int updateList(xmlNode * local_cib, xmlNode * update_command, xmlNode * failed, int operation, const char *section); gboolean check_generation(xmlNode * newCib, xmlNode * oldCib); gboolean update_results(xmlNode * failed, xmlNode * target, const char *operation, int return_code); int cib_update_counter(xmlNode * xml_obj, const char *field, gboolean reset); int sync_our_cib(xmlNode * request, gboolean all); extern xmlNode *cib_msg_copy(const xmlNode * msg, gboolean with_data); extern gboolean cib_shutdown_flag; int cib_process_shutdown_req(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { #ifdef CIBPIPE return -EINVAL; #else int result = pcmk_ok; const char *host = crm_element_value(req, F_ORIG); *answer = NULL; if (crm_element_value(req, F_CIB_ISREPLY) == NULL) { crm_info("Shutdown REQ from %s", host); return pcmk_ok; } else if (cib_shutdown_flag) { crm_info("Shutdown ACK from %s", host); terminate_cib(__FUNCTION__, FALSE); return pcmk_ok; } else { crm_err("Shutdown ACK from %s - not shutting down", host); result = -EINVAL; } return result; #endif } int cib_process_default(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { int result = pcmk_ok; crm_trace("Processing \"%s\" event", op); *answer = NULL; if (op == NULL) { result = -EINVAL; crm_err("No operation specified"); } else if (strcasecmp(CRM_OP_NOOP, op) == 0) { ; } else { result = -EPROTONOSUPPORT; crm_err("Action [%s] is not supported by the CIB", op); } return result; } int cib_process_quit(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { int result = pcmk_ok; crm_trace("Processing \"%s\" event", op); crm_warn("The CRMd has asked us to exit... complying"); crm_exit(pcmk_ok); return result; } int cib_process_readwrite(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { #ifdef CIBPIPE return -EINVAL; #else int result = pcmk_ok; crm_trace("Processing \"%s\" event", op); if (safe_str_eq(op, CIB_OP_ISMASTER)) { if (cib_is_master == TRUE) { result = pcmk_ok; } else { result = -EPERM; } return result; } if (safe_str_eq(op, CIB_OP_MASTER)) { if (cib_is_master == FALSE) { crm_info("We are now in R/W mode"); cib_is_master = TRUE; syncd_once = TRUE; } else { crm_debug("We are still in R/W mode"); } } else if (cib_is_master) { crm_info("We are now in R/O mode"); cib_is_master = FALSE; } return result; #endif } int sync_in_progress = 0; void send_sync_request(const char *host) { xmlNode *sync_me = create_xml_node(NULL, "sync-me"); crm_info("Requesting re-sync from peer"); sync_in_progress++; crm_xml_add(sync_me, F_TYPE, "cib"); crm_xml_add(sync_me, F_CIB_OPERATION, CIB_OP_SYNC_ONE); crm_xml_add(sync_me, F_CIB_DELEGATED, cib_our_uname); send_cluster_message(host ? crm_get_peer(0, host) : NULL, crm_msg_cib, sync_me, FALSE); free_xml(sync_me); } int cib_process_ping(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { #ifdef CIBPIPE return -EINVAL; #else const char *host = crm_element_value(req, F_ORIG); const char *seq = crm_element_value(req, F_CIB_PING_ID); char *digest = calculate_xml_versioned_digest(the_cib, FALSE, TRUE, CRM_FEATURE_SET); static struct qb_log_callsite *cs = NULL; crm_trace("Processing \"%s\" event %s from %s", op, seq, host); *answer = create_xml_node(NULL, XML_CRM_TAG_PING); crm_xml_add(*answer, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); crm_xml_add(*answer, XML_ATTR_DIGEST, digest); crm_xml_add(*answer, F_CIB_PING_ID, seq); if (cs == NULL) { cs = qb_log_callsite_get(__func__, __FILE__, __FUNCTION__, LOG_TRACE, __LINE__, crm_trace_nonlog); } if (cs && cs->targets) { /* Append additional detail so the reciever can log the differences */ add_message_xml(*answer, F_CIB_CALLDATA, the_cib); } else { /* Always include at least the version details */ const char *tag = TYPE(the_cib); xmlNode *shallow = create_xml_node(NULL, tag); copy_in_properties(shallow, the_cib); add_message_xml(*answer, F_CIB_CALLDATA, shallow); free_xml(shallow); } crm_info("Reporting our current digest to %s: %s for %s.%s.%s (%p %d)", host, digest, crm_element_value(existing_cib, XML_ATTR_GENERATION_ADMIN), crm_element_value(existing_cib, XML_ATTR_GENERATION), crm_element_value(existing_cib, XML_ATTR_NUMUPDATES), existing_cib, cs && cs->targets); free(digest); return pcmk_ok; #endif } int cib_process_sync(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { #ifdef CIBPIPE return -EINVAL; #else return sync_our_cib(req, TRUE); #endif } int cib_process_upgrade_server(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { #ifdef CIBPIPE return -EINVAL; #else int rc = pcmk_ok; *answer = NULL; if(crm_element_value(req, F_CIB_SCHEMA_MAX)) { return cib_process_upgrade( op, options, section, req, input, existing_cib, result_cib, answer); } else { int new_version = 0; int current_version = 0; xmlNode *scratch = copy_xml(existing_cib); const char *host = crm_element_value(req, F_ORIG); const char *value = crm_element_value(existing_cib, XML_ATTR_VALIDATION); crm_trace("Processing \"%s\" event", op); if (value != NULL) { current_version = get_schema_version(value); } rc = update_validation(&scratch, &new_version, 0, TRUE, TRUE); if (new_version > current_version) { xmlNode *up = create_xml_node(NULL, __FUNCTION__); rc = pcmk_ok; crm_notice("Upgrade request from %s verified", host); crm_xml_add(up, F_TYPE, "cib"); crm_xml_add(up, F_CIB_OPERATION, CIB_OP_UPGRADE); crm_xml_add(up, F_CIB_SCHEMA_MAX, get_schema_name(new_version)); crm_xml_add(up, F_CIB_DELEGATED, host); crm_xml_add(up, F_CIB_CLIENTID, crm_element_value(req, F_CIB_CLIENTID)); crm_xml_add(up, F_CIB_CALLOPTS, crm_element_value(req, F_CIB_CALLOPTS)); crm_xml_add(up, F_CIB_CALLID, crm_element_value(req, F_CIB_CALLID)); - send_cluster_message(NULL, crm_msg_cib, up, FALSE); + if (cib_legacy_mode() && cib_is_master) { + rc = cib_process_upgrade( + op, options, section, up, input, existing_cib, result_cib, answer); + + } else { + send_cluster_message(NULL, crm_msg_cib, up, FALSE); + } + free_xml(up); } else if(rc == pcmk_ok) { rc = -pcmk_err_schema_unchanged; } free_xml(scratch); } return rc; #endif } int cib_process_sync_one(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { #ifdef CIBPIPE return -EINVAL; #else return sync_our_cib(req, FALSE); #endif } int cib_server_process_diff(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { int rc = pcmk_ok; if (cib_is_master) { /* the master is never waiting for a resync */ sync_in_progress = 0; } if (sync_in_progress > MAX_DIFF_RETRY) { /* request another full-sync, * the last request may have been lost */ sync_in_progress = 0; } if (sync_in_progress) { int diff_add_updates = 0; int diff_add_epoch = 0; int diff_add_admin_epoch = 0; int diff_del_updates = 0; int diff_del_epoch = 0; int diff_del_admin_epoch = 0; cib_diff_version_details(input, &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates); sync_in_progress++; crm_notice("Not applying diff %d.%d.%d -> %d.%d.%d (sync in progress)", diff_del_admin_epoch, diff_del_epoch, diff_del_updates, diff_add_admin_epoch, diff_add_epoch, diff_add_updates); return -pcmk_err_diff_resync; } rc = cib_process_diff(op, options, section, req, input, existing_cib, result_cib, answer); if (rc == -pcmk_err_diff_resync && cib_is_master == FALSE) { free_xml(*result_cib); *result_cib = NULL; send_sync_request(NULL); } else if (rc == -pcmk_err_diff_resync) { rc = -pcmk_err_diff_failed; if (options & cib_force_diff) { crm_warn("Not requesting full refresh in R/W mode"); } } return rc; } int cib_process_replace_svr(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { const char *tag = crm_element_name(input); int rc = cib_process_replace(op, options, section, req, input, existing_cib, result_cib, answer); if (rc == pcmk_ok && safe_str_eq(tag, XML_TAG_CIB)) { sync_in_progress = 0; } return rc; } static int delete_cib_object(xmlNode * parent, xmlNode * delete_spec) { const char *object_name = NULL; const char *object_id = NULL; xmlNode *equiv_node = NULL; int result = pcmk_ok; if (delete_spec != NULL) { object_name = crm_element_name(delete_spec); } object_id = crm_element_value(delete_spec, XML_ATTR_ID); crm_trace("Processing: <%s id=%s>", crm_str(object_name), crm_str(object_id)); if (delete_spec == NULL) { result = -EINVAL; } else if (parent == NULL) { result = -EINVAL; } else if (object_id == NULL) { /* placeholder object */ equiv_node = find_xml_node(parent, object_name, FALSE); } else { equiv_node = find_entity(parent, object_name, object_id); } if (result != pcmk_ok) { ; /* nothing */ } else if (equiv_node == NULL) { result = pcmk_ok; } else if (xml_has_children(delete_spec) == FALSE) { /* only leaves are deleted */ crm_debug("Removing leaf: <%s id=%s>", crm_str(object_name), crm_str(object_id)); free_xml(equiv_node); equiv_node = NULL; } else { xmlNode *child = NULL; for (child = __xml_first_child(delete_spec); child != NULL; child = __xml_next(child)) { int tmp_result = delete_cib_object(equiv_node, child); /* only the first error is likely to be interesting */ if (tmp_result != pcmk_ok && result == pcmk_ok) { result = tmp_result; } } } return result; } int cib_process_delete_absolute(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { xmlNode *failed = NULL; int result = pcmk_ok; xmlNode *update_section = NULL; crm_trace("Processing \"%s\" event for section=%s", op, crm_str(section)); if (safe_str_eq(XML_CIB_TAG_SECTION_ALL, section)) { section = NULL; } else if (safe_str_eq(XML_TAG_CIB, section)) { section = NULL; } else if (safe_str_eq(crm_element_name(input), XML_TAG_CIB)) { section = NULL; } CRM_CHECK(strcasecmp(CIB_OP_DELETE, op) == 0, return -EINVAL); if (input == NULL) { crm_err("Cannot perform modification with no data"); return -EINVAL; } failed = create_xml_node(NULL, XML_TAG_FAILED); update_section = get_object_root(section, *result_cib); result = delete_cib_object(update_section, input); update_results(failed, input, op, result); if (xml_has_children(failed)) { CRM_CHECK(result != pcmk_ok, result = -EINVAL); } if (result != pcmk_ok) { crm_log_xml_err(failed, "CIB Update failures"); *answer = failed; } else { free_xml(failed); } return result; } gboolean check_generation(xmlNode * newCib, xmlNode * oldCib) { if (cib_compare_generation(newCib, oldCib) >= 0) { return TRUE; } crm_warn("Generation from update is older than the existing one"); return FALSE; } #ifndef CIBPIPE int sync_our_cib(xmlNode * request, gboolean all) { int result = pcmk_ok; char *digest = NULL; const char *host = crm_element_value(request, F_ORIG); const char *op = crm_element_value(request, F_CIB_OPERATION); xmlNode *replace_request = cib_msg_copy(request, FALSE); CRM_CHECK(the_cib != NULL,;); CRM_CHECK(replace_request != NULL,;); crm_debug("Syncing CIB to %s", all ? "all peers" : host); if (all == FALSE && host == NULL) { crm_log_xml_err(request, "bad sync"); } /* remove the "all == FALSE" condition * * sync_from was failing, the local client wasnt being notified * because it didnt know it was a reply * setting this does not prevent the other nodes from applying it * if all == TRUE */ if (host != NULL) { crm_xml_add(replace_request, F_CIB_ISREPLY, host); } if (all) { xml_remove_prop(replace_request, F_CIB_HOST); } crm_xml_add(replace_request, F_CIB_OPERATION, CIB_OP_REPLACE); crm_xml_add(replace_request, "original_" F_CIB_OPERATION, op); crm_xml_add(replace_request, F_CIB_GLOBAL_UPDATE, XML_BOOLEAN_TRUE); crm_xml_add(replace_request, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); digest = calculate_xml_versioned_digest(the_cib, FALSE, TRUE, CRM_FEATURE_SET); crm_xml_add(replace_request, XML_ATTR_DIGEST, digest); add_message_xml(replace_request, F_CIB_CALLDATA, the_cib); if (send_cluster_message (all ? NULL : crm_get_peer(0, host), crm_msg_cib, replace_request, FALSE) == FALSE) { result = -ENOTCONN; } free_xml(replace_request); free(digest); return result; } #endif diff --git a/fencing/remote.c b/fencing/remote.c index a5680357e9..d4ed9341bd 100644 --- a/fencing/remote.c +++ b/fencing/remote.c @@ -1,1609 +1,1611 @@ /* * Copyright (C) 2009 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TIMEOUT_MULTIPLY_FACTOR 1.2 typedef struct st_query_result_s { char *host; int devices; /* only try peers for non-topology based operations once */ gboolean tried; GListPtr device_list; GHashTable *custom_action_timeouts; GHashTable *delay_maxes; /* Subset of devices that peer has verified connectivity on */ GHashTable *verified_devices; } st_query_result_t; GHashTable *remote_op_list = NULL; void call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer); static void remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup); extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data, int call_options); static void report_timeout_period(remote_fencing_op_t * op, int op_timeout); static int get_op_total_timeout(remote_fencing_op_t * op, st_query_result_t * chosen_peer, int default_timeout); static gint sort_strings(gconstpointer a, gconstpointer b) { return strcmp(a, b); } static void free_remote_query(gpointer data) { if (data) { st_query_result_t *query = data; crm_trace("Free'ing query result from %s", query->host); free(query->host); g_list_free_full(query->device_list, free); g_hash_table_destroy(query->custom_action_timeouts); g_hash_table_destroy(query->delay_maxes); g_hash_table_destroy(query->verified_devices); free(query); } } static void clear_remote_op_timers(remote_fencing_op_t * op) { if (op->query_timer) { g_source_remove(op->query_timer); op->query_timer = 0; } if (op->op_timer_total) { g_source_remove(op->op_timer_total); op->op_timer_total = 0; } if (op->op_timer_one) { g_source_remove(op->op_timer_one); op->op_timer_one = 0; } } static void free_remote_op(gpointer data) { remote_fencing_op_t *op = data; crm_trace("Free'ing op %s for %s", op->id, op->target); crm_log_xml_debug(op->request, "Destroying"); clear_remote_op_timers(op); free(op->id); free(op->action); free(op->target); free(op->client_id); free(op->client_name); free(op->originator); if (op->query_results) { g_list_free_full(op->query_results, free_remote_query); } if (op->request) { free_xml(op->request); op->request = NULL; } if (op->devices_list) { g_list_free_full(op->devices_list, free); op->devices_list = NULL; } if (op->required_list) { g_list_free_full(op->required_list, free); op->required_list = NULL; } free(op); } static xmlNode * create_op_done_notify(remote_fencing_op_t * op, int rc) { xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE); crm_xml_add_int(notify_data, "state", op->state); crm_xml_add_int(notify_data, F_STONITH_RC, rc); crm_xml_add(notify_data, F_STONITH_TARGET, op->target); crm_xml_add(notify_data, F_STONITH_ACTION, op->action); crm_xml_add(notify_data, F_STONITH_DELEGATE, op->delegate); crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, op->id); crm_xml_add(notify_data, F_STONITH_ORIGIN, op->originator); crm_xml_add(notify_data, F_STONITH_CLIENTID, op->client_id); crm_xml_add(notify_data, F_STONITH_CLIENTNAME, op->client_name); return notify_data; } static void bcast_result_to_peers(remote_fencing_op_t * op, int rc) { static int count = 0; xmlNode *bcast = create_xml_node(NULL, T_STONITH_REPLY); xmlNode *notify_data = create_op_done_notify(op, rc); count++; crm_trace("Broadcasting result to peers"); crm_xml_add(bcast, F_TYPE, T_STONITH_NOTIFY); crm_xml_add(bcast, F_SUBTYPE, "broadcast"); crm_xml_add(bcast, F_STONITH_OPERATION, T_STONITH_NOTIFY); crm_xml_add_int(bcast, "count", count); add_message_xml(bcast, F_STONITH_CALLDATA, notify_data); send_cluster_message(NULL, crm_msg_stonith_ng, bcast, FALSE); free_xml(notify_data); free_xml(bcast); return; } static void handle_local_reply_and_notify(remote_fencing_op_t * op, xmlNode * data, int rc) { xmlNode *notify_data = NULL; xmlNode *reply = NULL; if (op->notify_sent == TRUE) { /* nothing to do */ return; } /* Do notification with a clean data object */ notify_data = create_op_done_notify(op, rc); crm_xml_add_int(data, "state", op->state); crm_xml_add(data, F_STONITH_TARGET, op->target); crm_xml_add(data, F_STONITH_OPERATION, op->action); reply = stonith_construct_reply(op->request, NULL, data, rc); crm_xml_add(reply, F_STONITH_DELEGATE, op->delegate); /* Send fencing OP reply to local client that initiated fencing */ do_local_reply(reply, op->client_id, op->call_options & st_opt_sync_call, FALSE); /* bcast to all local clients that the fencing operation happend */ do_stonith_notify(0, T_STONITH_NOTIFY_FENCE, rc, notify_data); /* mark this op as having notify's already sent */ op->notify_sent = TRUE; free_xml(reply); free_xml(notify_data); } static void handle_duplicates(remote_fencing_op_t * op, xmlNode * data, int rc) { GListPtr iter = NULL; for (iter = op->duplicates; iter != NULL; iter = iter->next) { remote_fencing_op_t *other = iter->data; if (other->state == st_duplicate) { /* Ie. it hasn't timed out already */ other->state = op->state; crm_debug("Peforming duplicate notification for %s@%s.%.8s = %s", other->client_name, other->originator, other->id, pcmk_strerror(rc)); remote_op_done(other, data, rc, TRUE); } else { crm_err("Skipping duplicate notification for %s@%s - %d", other->client_name, other->originator, other->state); } } } /*! * \internal * \brief Finalize a remote operation. * * \description This function has two code paths. * * Path 1. This node is the owner of the operation and needs * to notify the cpg group via a broadcast as to the operation's * results. * * Path 2. The cpg broadcast is received. All nodes notify their local * stonith clients the operation results. * * So, The owner of the operation first notifies the cluster of the result, * and once that cpg notify is received back it notifies all the local clients. * * Nodes that are passive watchers of the operation will receive the * broadcast and only need to notify their local clients the operation finished. * * \param op, The fencing operation to finalize * \param data, The xml msg reply (if present) of the last delegated fencing * operation. * \param dup, Is this operation a duplicate, if so treat it a little differently * making sure the broadcast is not sent out. */ static void remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup) { int level = LOG_ERR; const char *subt = NULL; xmlNode *local_data = NULL; op->completed = time(NULL); clear_remote_op_timers(op); if (op->notify_sent == TRUE) { crm_err("Already sent notifications for '%s of %s by %s' (for=%s@%s.%.8s, state=%d): %s", op->action, op->target, op->delegate ? op->delegate : "", op->client_name, op->originator, op->id, op->state, pcmk_strerror(rc)); goto remote_op_done_cleanup; } - if (!op->delegate && data) { + if (!op->delegate && data && rc != -ENODEV && rc != -EHOSTUNREACH) { xmlNode *ndata = get_xpath_object("//@" F_STONITH_DELEGATE, data, LOG_TRACE); if(ndata) { op->delegate = crm_element_value_copy(ndata, F_STONITH_DELEGATE); + } else { + op->delegate = crm_element_value_copy(data, F_ORIG); } } if (data == NULL) { data = create_xml_node(NULL, "remote-op"); local_data = data; } /* Tell everyone the operation is done, we will continue * with doing the local notifications once we receive * the broadcast back. */ subt = crm_element_value(data, F_SUBTYPE); if (dup == FALSE && safe_str_neq(subt, "broadcast")) { /* Defer notification until the bcast message arrives */ bcast_result_to_peers(op, rc); goto remote_op_done_cleanup; } if (rc == pcmk_ok || dup) { level = LOG_NOTICE; } else if (safe_str_neq(op->originator, stonith_our_uname)) { level = LOG_NOTICE; } do_crm_log(level, "Operation %s of %s by %s for %s@%s.%.8s: %s", op->action, op->target, op->delegate ? op->delegate : "", op->client_name, op->originator, op->id, pcmk_strerror(rc)); handle_local_reply_and_notify(op, data, rc); if (dup == FALSE) { handle_duplicates(op, data, rc); } /* Free non-essential parts of the record * Keep the record around so we can query the history */ if (op->query_results) { g_list_free_full(op->query_results, free_remote_query); op->query_results = NULL; } if (op->request) { free_xml(op->request); op->request = NULL; } remote_op_done_cleanup: free_xml(local_data); } static gboolean remote_op_watchdog_done(gpointer userdata) { remote_fencing_op_t *op = userdata; op->op_timer_one = 0; crm_notice("Remote %s operation on %s for %s.%8s assumed complete", op->action, op->target, op->client_name, op->id); op->state = st_done; remote_op_done(op, NULL, pcmk_ok, FALSE); return FALSE; } static gboolean remote_op_timeout_one(gpointer userdata) { remote_fencing_op_t *op = userdata; op->op_timer_one = 0; crm_notice("Remote %s operation on %s for %s.%8s timed out", op->action, op->target, op->client_name, op->id); call_remote_stonith(op, NULL); return FALSE; } static gboolean remote_op_timeout(gpointer userdata) { remote_fencing_op_t *op = userdata; op->op_timer_total = 0; if (op->state == st_done) { crm_debug("Action %s (%s) for %s (%s) already completed", op->action, op->id, op->target, op->client_name); return FALSE; } crm_debug("Action %s (%s) for %s (%s) timed out", op->action, op->id, op->target, op->client_name); op->state = st_failed; remote_op_done(op, NULL, -ETIME, FALSE); return FALSE; } static gboolean remote_op_query_timeout(gpointer data) { remote_fencing_op_t *op = data; op->query_timer = 0; if (op->state == st_done) { crm_debug("Operation %s for %s already completed", op->id, op->target); } else if (op->state == st_exec) { crm_debug("Operation %s for %s already in progress", op->id, op->target); } else if (op->query_results) { crm_debug("Query %s for %s complete: %d", op->id, op->target, op->state); call_remote_stonith(op, NULL); } else { crm_debug("Query %s for %s timed out: %d", op->id, op->target, op->state); if (op->op_timer_total) { g_source_remove(op->op_timer_total); op->op_timer_total = 0; } remote_op_timeout(op); } return FALSE; } static gboolean topology_is_empty(stonith_topology_t *tp) { int i; if (tp == NULL) { return TRUE; } for (i = 0; i < ST_LEVEL_MAX; i++) { if (tp->levels[i] != NULL) { return FALSE; } } return TRUE; } static void add_required_device(remote_fencing_op_t * op, const char *device) { GListPtr match = g_list_find_custom(op->required_list, device, sort_strings); if (match) { /* device already marked required */ return; } op->required_list = g_list_prepend(op->required_list, strdup(device)); /* make sure the required devices is in the current list of devices to be executed */ if (op->devices_list) { GListPtr match = g_list_find_custom(op->devices_list, device, sort_strings); if (match == NULL) { op->devices_list = g_list_append(op->devices_list, strdup(device)); } } } /* deep copy the device list */ static void set_op_device_list(remote_fencing_op_t * op, GListPtr devices) { GListPtr lpc = NULL; if (op->devices_list) { g_list_free_full(op->devices_list, free); op->devices_list = NULL; } for (lpc = devices; lpc != NULL; lpc = lpc->next) { op->devices_list = g_list_append(op->devices_list, strdup(lpc->data)); } /* tack on whatever required devices have not been executed * to the end of the current devices list. This ensures that * the required devices will get executed regardless of what topology * level they exist at. */ for (lpc = op->required_list; lpc != NULL; lpc = lpc->next) { GListPtr match = g_list_find_custom(op->devices_list, lpc->data, sort_strings); if (match == NULL) { op->devices_list = g_list_append(op->devices_list, strdup(lpc->data)); } } op->devices = op->devices_list; } stonith_topology_t * find_topology_for_host(const char *host) { stonith_topology_t *tp = g_hash_table_lookup(topology, host); if(tp == NULL) { int status = 1; regex_t r_patt; GHashTableIter tIter; crm_trace("Testing %d topologies for a match", g_hash_table_size(topology)); g_hash_table_iter_init(&tIter, topology); while (g_hash_table_iter_next(&tIter, NULL, (gpointer *) & tp)) { if (regcomp(&r_patt, tp->node, REG_EXTENDED)) { crm_info("Bad regex '%s' for fencing level", tp->node); } else { status = regexec(&r_patt, host, 0, NULL, 0); } if (status == 0) { crm_notice("Matched %s with %s", host, tp->node); break; } crm_trace("No match for %s with %s", host, tp->node); tp = NULL; } } return tp; } /*! * \internal * \brief Set fencing operation's device list to target's next topology level * * \param[in,out] op Remote fencing operation to modify * * \return pcmk_ok if successful, target was not specified (i.e. queries) or * target has no topology, or -EINVAL if no more topology levels to try */ static int stonith_topology_next(remote_fencing_op_t * op) { stonith_topology_t *tp = NULL; if (op->target) { /* Queries don't have a target set */ tp = find_topology_for_host(op->target); } if (topology_is_empty(tp)) { return pcmk_ok; } set_bit(op->call_options, st_opt_topology); do { op->level++; } while (op->level < ST_LEVEL_MAX && tp->levels[op->level] == NULL); if (op->level < ST_LEVEL_MAX) { crm_trace("Attempting fencing level %d for %s (%d devices) - %s@%s.%.8s", op->level, op->target, g_list_length(tp->levels[op->level]), op->client_name, op->originator, op->id); set_op_device_list(op, tp->levels[op->level]); return pcmk_ok; } crm_notice("All fencing options to fence %s for %s@%s.%.8s failed", op->target, op->client_name, op->originator, op->id); return -EINVAL; } /*! * \brief Check to see if this operation is a duplicate of another in flight * operation. If so merge this operation into the inflight operation, and mark * it as a duplicate. */ static void merge_duplicates(remote_fencing_op_t * op) { GHashTableIter iter; remote_fencing_op_t *other = NULL; time_t now = time(NULL); g_hash_table_iter_init(&iter, remote_op_list); while (g_hash_table_iter_next(&iter, NULL, (void **)&other)) { crm_node_t *peer = NULL; if (other->state > st_exec) { /* Must be in-progress */ continue; } else if (safe_str_neq(op->target, other->target)) { /* Must be for the same node */ continue; } else if (safe_str_neq(op->action, other->action)) { crm_trace("Must be for the same action: %s vs. ", op->action, other->action); continue; } else if (safe_str_eq(op->client_name, other->client_name)) { crm_trace("Must be for different clients: %s", op->client_name); continue; } else if (safe_str_eq(other->target, other->originator)) { crm_trace("Can't be a suicide operation: %s", other->target); continue; } peer = crm_get_peer(0, other->originator); if(fencing_peer_active(peer) == FALSE) { crm_notice("Failing stonith action %s for node %s originating from %s@%s.%.8s: Originator is dead", other->action, other->target, other->client_name, other->originator, other->id); other->state = st_failed; continue; } else if(other->total_timeout > 0 && now > (other->total_timeout + other->created)) { crm_info("Stonith action %s for node %s originating from %s@%s.%.8s is too old: %d vs. %d + %d", other->action, other->target, other->client_name, other->originator, other->id, now, other->created, other->total_timeout); continue; } /* There is another in-flight request to fence the same host * Piggyback on that instead. If it fails, so do we. */ other->duplicates = g_list_append(other->duplicates, op); if (other->total_timeout == 0) { crm_trace("Making a best-guess as to the timeout used"); other->total_timeout = op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, NULL, op->base_timeout); } crm_notice ("Merging stonith action %s for node %s originating from client %s.%.8s with identical request from %s@%s.%.8s (%ds)", op->action, op->target, op->client_name, op->id, other->client_name, other->originator, other->id, other->total_timeout); report_timeout_period(op, other->total_timeout); op->state = st_duplicate; } } static uint32_t fencing_active_peers(void) { uint32_t count = 0; crm_node_t *entry; GHashTableIter gIter; g_hash_table_iter_init(&gIter, crm_peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { if(fencing_peer_active(entry)) { count++; } } return count; } int stonith_manual_ack(xmlNode * msg, remote_fencing_op_t * op) { xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR); op->state = st_done; op->completed = time(NULL); op->delegate = strdup("a human"); crm_notice("Injecting manual confirmation that %s is safely off/down", crm_element_value(dev, F_STONITH_TARGET)); remote_op_done(op, msg, pcmk_ok, FALSE); /* Replies are sent via done_cb->stonith_send_async_reply()->do_local_reply() */ return -EINPROGRESS; } /*! * \internal * \brief Create a new remote stonith op * \param client, he local stonith client id that initaited the operation * \param request, The request from the client that started the operation * \param peer, Is this operation owned by another stonith peer? Operations * owned by other peers are stored on all the stonith nodes, but only the * owner executes the operation. All the nodes get the results to the operation * once the owner finishes executing it. */ void * create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer) { remote_fencing_op_t *op = NULL; xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE); int call_options = 0; if (remote_op_list == NULL) { remote_op_list = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_remote_op); } /* If this operation is owned by another node, check to make * sure we haven't already created this operation. */ if (peer && dev) { const char *op_id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID); CRM_CHECK(op_id != NULL, return NULL); op = g_hash_table_lookup(remote_op_list, op_id); if (op) { crm_debug("%s already exists", op_id); return op; } } op = calloc(1, sizeof(remote_fencing_op_t)); crm_element_value_int(request, F_STONITH_TIMEOUT, (int *)&(op->base_timeout)); if (peer && dev) { op->id = crm_element_value_copy(dev, F_STONITH_REMOTE_OP_ID); } else { op->id = crm_generate_uuid(); } g_hash_table_replace(remote_op_list, op->id, op); CRM_LOG_ASSERT(g_hash_table_lookup(remote_op_list, op->id) != NULL); crm_trace("Created %s", op->id); op->state = st_query; op->replies_expected = fencing_active_peers(); op->action = crm_element_value_copy(dev, F_STONITH_ACTION); op->originator = crm_element_value_copy(dev, F_STONITH_ORIGIN); op->delegate = crm_element_value_copy(dev, F_STONITH_DELEGATE); /* May not be set */ op->created = time(NULL); if (op->originator == NULL) { /* Local or relayed request */ op->originator = strdup(stonith_our_uname); } CRM_LOG_ASSERT(client != NULL); if (client) { op->client_id = strdup(client); } op->client_name = crm_element_value_copy(request, F_STONITH_CLIENTNAME); op->target = crm_element_value_copy(dev, F_STONITH_TARGET); op->request = copy_xml(request); /* TODO: Figure out how to avoid this */ crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); op->call_options = call_options; crm_element_value_int(request, F_STONITH_CALLID, (int *)&(op->client_callid)); crm_trace("%s new stonith op: %s - %s of %s for %s", (peer && dev) ? "Recorded" : "Generated", op->id, op->action, op->target, op->client_name); if (op->call_options & st_opt_cs_nodeid) { int nodeid = crm_atoi(op->target, NULL); crm_node_t *node = crm_get_peer(nodeid, NULL); /* Ensure the conversion only happens once */ op->call_options &= ~st_opt_cs_nodeid; if (node && node->uname) { free(op->target); op->target = strdup(node->uname); } else { crm_warn("Could not expand nodeid '%s' into a host name (%p)", op->target, node); } } /* check to see if this is a duplicate operation of another in-flight operation */ merge_duplicates(op); return op; } remote_fencing_op_t * initiate_remote_stonith_op(crm_client_t * client, xmlNode * request, gboolean manual_ack) { int query_timeout = 0; xmlNode *query = NULL; const char *client_id = NULL; remote_fencing_op_t *op = NULL; if (client) { client_id = client->id; } else { client_id = crm_element_value(request, F_STONITH_CLIENTID); } CRM_LOG_ASSERT(client_id != NULL); op = create_remote_stonith_op(client_id, request, FALSE); op->owner = TRUE; if (manual_ack) { crm_notice("Initiating manual confirmation for %s: %s", op->target, op->id); return op; } CRM_CHECK(op->action, return NULL); if (stonith_topology_next(op) != pcmk_ok) { op->state = st_failed; } switch (op->state) { case st_failed: crm_warn("Initiation of remote operation %s for %s: failed (%s)", op->action, op->target, op->id); remote_op_done(op, NULL, -EINVAL, FALSE); return op; case st_duplicate: crm_info("Initiating remote operation %s for %s: %s (duplicate)", op->action, op->target, op->id); return op; default: crm_notice("Initiating remote operation %s for %s: %s (%d)", op->action, op->target, op->id, op->state); } query = stonith_create_op(op->client_callid, op->id, STONITH_OP_QUERY, NULL, 0); crm_xml_add(query, F_STONITH_REMOTE_OP_ID, op->id); crm_xml_add(query, F_STONITH_TARGET, op->target); crm_xml_add(query, F_STONITH_ACTION, op->action); crm_xml_add(query, F_STONITH_ORIGIN, op->originator); crm_xml_add(query, F_STONITH_CLIENTID, op->client_id); crm_xml_add(query, F_STONITH_CLIENTNAME, op->client_name); crm_xml_add_int(query, F_STONITH_TIMEOUT, op->base_timeout); crm_xml_add_int(query, F_STONITH_CALLOPTS, op->call_options); send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE); free_xml(query); query_timeout = op->base_timeout * TIMEOUT_MULTIPLY_FACTOR; op->query_timer = g_timeout_add((1000 * query_timeout), remote_op_query_timeout, op); return op; } enum find_best_peer_options { /*! Skip checking the target peer for capable fencing devices */ FIND_PEER_SKIP_TARGET = 0x0001, /*! Only check the target peer for capable fencing devices */ FIND_PEER_TARGET_ONLY = 0x0002, /*! Skip peers and devices that are not verified */ FIND_PEER_VERIFIED_ONLY = 0x0004, }; static st_query_result_t * find_best_peer(const char *device, remote_fencing_op_t * op, enum find_best_peer_options options) { GListPtr iter = NULL; gboolean verified_devices_only = (options & FIND_PEER_VERIFIED_ONLY) ? TRUE : FALSE; if (!device && is_set(op->call_options, st_opt_topology)) { return NULL; } for (iter = op->query_results; iter != NULL; iter = iter->next) { st_query_result_t *peer = iter->data; crm_trace("Testing result from %s for %s with %d devices: %d %x", peer->host, op->target, peer->devices, peer->tried, options); if ((options & FIND_PEER_SKIP_TARGET) && safe_str_eq(peer->host, op->target)) { continue; } if ((options & FIND_PEER_TARGET_ONLY) && safe_str_neq(peer->host, op->target)) { continue; } if (is_set(op->call_options, st_opt_topology)) { /* Do they have the next device of the current fencing level? */ GListPtr match = NULL; if (verified_devices_only && !g_hash_table_lookup(peer->verified_devices, device)) { continue; } match = g_list_find_custom(peer->device_list, device, sort_strings); if (match) { crm_trace("Removing %s from %s (%d remaining)", (char *)match->data, peer->host, g_list_length(peer->device_list)); peer->device_list = g_list_remove(peer->device_list, match->data); return peer; } } else if (peer->devices > 0 && peer->tried == FALSE) { if (verified_devices_only && !g_hash_table_size(peer->verified_devices)) { continue; } /* No topology: Use the current best peer */ crm_trace("Simple fencing"); return peer; } } return NULL; } static st_query_result_t * stonith_choose_peer(remote_fencing_op_t * op) { const char *device = NULL; st_query_result_t *peer = NULL; uint32_t active = fencing_active_peers(); do { if (op->devices) { device = op->devices->data; crm_trace("Checking for someone to fence %s with %s", op->target, device); } else { crm_trace("Checking for someone to fence %s", op->target); } peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET|FIND_PEER_VERIFIED_ONLY); if (peer) { crm_trace("Found verified peer %s for %s", peer->host, device?device:""); return peer; } if(op->query_timer != 0 && op->replies < QB_MIN(op->replies_expected, active)) { crm_trace("Waiting before looking for unverified devices to fence %s", op->target); return NULL; } peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET); if (peer) { crm_trace("Found best unverified peer %s", peer->host); return peer; } peer = find_best_peer(device, op, FIND_PEER_TARGET_ONLY); if(peer) { crm_trace("%s will fence itself", peer->host); return peer; } /* Try the next fencing level if there is one */ } while (is_set(op->call_options, st_opt_topology) && stonith_topology_next(op) == pcmk_ok); crm_notice("Couldn't find anyone to fence %s with %s", op->target, device?device:""); return NULL; } static int get_device_timeout(st_query_result_t * peer, const char *device, int default_timeout) { gpointer res; int delay_max = 0; if (!peer || !device) { return default_timeout; } res = g_hash_table_lookup(peer->delay_maxes, device); if (res && GPOINTER_TO_INT(res) > 0) { delay_max = GPOINTER_TO_INT(res); } res = g_hash_table_lookup(peer->custom_action_timeouts, device); return res ? GPOINTER_TO_INT(res) + delay_max : default_timeout + delay_max; } static int get_peer_timeout(st_query_result_t * peer, int default_timeout) { int total_timeout = 0; GListPtr cur = NULL; for (cur = peer->device_list; cur; cur = cur->next) { total_timeout += get_device_timeout(peer, cur->data, default_timeout); } return total_timeout ? total_timeout : default_timeout; } static int get_op_total_timeout(remote_fencing_op_t * op, st_query_result_t * chosen_peer, int default_timeout) { int total_timeout = 0; stonith_topology_t *tp = find_topology_for_host(op->target); if (is_set(op->call_options, st_opt_topology) && tp) { int i; GListPtr device_list = NULL; GListPtr iter = NULL; /* Yep, this looks scary, nested loops all over the place. * Here is what is going on. * Loop1: Iterate through fencing levels. * Loop2: If a fencing level has devices, loop through each device * Loop3: For each device in a fencing level, see what peer owns it * and what that peer has reported the timeout is for the device. */ for (i = 0; i < ST_LEVEL_MAX; i++) { if (!tp->levels[i]) { continue; } for (device_list = tp->levels[i]; device_list; device_list = device_list->next) { for (iter = op->query_results; iter != NULL; iter = iter->next) { st_query_result_t *peer = iter->data; if (g_list_find_custom(peer->device_list, device_list->data, sort_strings)) { total_timeout += get_device_timeout(peer, device_list->data, default_timeout); break; } } /* End Loop3: match device with peer that owns device, find device's timeout period */ } /* End Loop2: iterate through devices at a specific level */ } /*End Loop1: iterate through fencing levels */ } else if (chosen_peer) { total_timeout = get_peer_timeout(chosen_peer, default_timeout); } else { total_timeout = default_timeout; } return total_timeout ? total_timeout : default_timeout; } static void report_timeout_period(remote_fencing_op_t * op, int op_timeout) { GListPtr iter = NULL; xmlNode *update = NULL; const char *client_node = NULL; const char *client_id = NULL; const char *call_id = NULL; if (op->call_options & st_opt_sync_call) { /* There is no reason to report the timeout for a syncronous call. It * is impossible to use the reported timeout to do anything when the client * is blocking for the response. This update is only important for * async calls that require a callback to report the results in. */ return; } else if (!op->request) { return; } crm_trace("Reporting timeout for %s.%.8s", op->client_name, op->id); client_node = crm_element_value(op->request, F_STONITH_CLIENTNODE); call_id = crm_element_value(op->request, F_STONITH_CALLID); client_id = crm_element_value(op->request, F_STONITH_CLIENTID); if (!client_node || !call_id || !client_id) { return; } if (safe_str_eq(client_node, stonith_our_uname)) { /* The client is connected to this node, send the update direclty to them */ do_stonith_async_timeout_update(client_id, call_id, op_timeout); return; } /* The client is connected to another node, relay this update to them */ update = stonith_create_op(op->client_callid, op->id, STONITH_OP_TIMEOUT_UPDATE, NULL, 0); crm_xml_add(update, F_STONITH_REMOTE_OP_ID, op->id); crm_xml_add(update, F_STONITH_CLIENTID, client_id); crm_xml_add(update, F_STONITH_CALLID, call_id); crm_xml_add_int(update, F_STONITH_TIMEOUT, op_timeout); send_cluster_message(crm_get_peer(0, client_node), crm_msg_stonith_ng, update, FALSE); free_xml(update); for (iter = op->duplicates; iter != NULL; iter = iter->next) { remote_fencing_op_t *dup = iter->data; crm_trace("Reporting timeout for duplicate %s.%.8s", dup->client_name, dup->id); report_timeout_period(iter->data, op_timeout); } } void call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer) { const char *device = NULL; int timeout = op->base_timeout; crm_trace("State for %s.%.8s: %s %d", op->target, op->client_name, op->id, op->state); if (peer == NULL && !is_set(op->call_options, st_opt_topology)) { peer = stonith_choose_peer(op); } if (!op->op_timer_total) { int total_timeout = get_op_total_timeout(op, peer, op->base_timeout); op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * total_timeout; op->op_timer_total = g_timeout_add(1000 * op->total_timeout, remote_op_timeout, op); report_timeout_period(op, op->total_timeout); crm_info("Total remote op timeout set to %d for fencing of node %s for %s.%.8s", total_timeout, op->target, op->client_name, op->id); } if (is_set(op->call_options, st_opt_topology) && op->devices) { /* Ignore any preference, they might not have the device we need */ /* When using topology, the stonith_choose_peer function pops off * the peer from the op's query results. Make sure to calculate * the op_timeout before calling this function when topology is in use */ peer = stonith_choose_peer(op); device = op->devices->data; timeout = get_device_timeout(peer, device, op->base_timeout); } if (peer) { int timeout_one = 0; xmlNode *remote_op = stonith_create_op(op->client_callid, op->id, STONITH_OP_FENCE, NULL, 0); crm_xml_add(remote_op, F_STONITH_REMOTE_OP_ID, op->id); crm_xml_add(remote_op, F_STONITH_TARGET, op->target); crm_xml_add(remote_op, F_STONITH_ACTION, op->action); crm_xml_add(remote_op, F_STONITH_ORIGIN, op->originator); crm_xml_add(remote_op, F_STONITH_CLIENTID, op->client_id); crm_xml_add(remote_op, F_STONITH_CLIENTNAME, op->client_name); crm_xml_add_int(remote_op, F_STONITH_TIMEOUT, timeout); crm_xml_add_int(remote_op, F_STONITH_CALLOPTS, op->call_options); if (device) { timeout_one = TIMEOUT_MULTIPLY_FACTOR * get_device_timeout(peer, device, op->base_timeout); crm_info("Requesting that %s perform op %s %s with %s for %s (%ds)", peer->host, op->action, op->target, device, op->client_name, timeout_one); crm_xml_add(remote_op, F_STONITH_DEVICE, device); crm_xml_add(remote_op, F_STONITH_MODE, "slave"); } else { timeout_one = TIMEOUT_MULTIPLY_FACTOR * get_peer_timeout(peer, op->base_timeout); crm_info("Requesting that %s perform op %s %s for %s (%ds, %ds)", peer->host, op->action, op->target, op->client_name, timeout_one, stonith_watchdog_timeout_ms); crm_xml_add(remote_op, F_STONITH_MODE, "smart"); } op->state = st_exec; if (op->op_timer_one) { g_source_remove(op->op_timer_one); } if(stonith_watchdog_timeout_ms > 0 && device && safe_str_eq(device, "watchdog")) { crm_notice("Waiting %ds for %s to self-terminate for %s.%.8s (%p)", stonith_watchdog_timeout_ms/1000, op->target, op->client_name, op->id, device); op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op); /* TODO: We should probably look into peer->device_list to verify watchdog is going to be in use */ } else if(stonith_watchdog_timeout_ms > 0 && safe_str_eq(peer->host, op->target) && safe_str_neq(op->action, "on")) { crm_notice("Waiting %ds for %s to self-terminate for %s.%.8s (%p)", stonith_watchdog_timeout_ms/1000, op->target, op->client_name, op->id, device); op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op); } else { op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op); } send_cluster_message(crm_get_peer(0, peer->host), crm_msg_stonith_ng, remote_op, FALSE); peer->tried = TRUE; free_xml(remote_op); return; } else if (op->owner == FALSE) { crm_err("The termination of %s for %s is not ours to control", op->target, op->client_name); } else if (op->query_timer == 0) { /* We've exhausted all available peers */ crm_info("No remaining peers capable of terminating %s for %s (%d)", op->target, op->client_name, op->state); CRM_LOG_ASSERT(op->state < st_done); remote_op_timeout(op); } else if(op->replies >= op->replies_expected || op->replies >= fencing_active_peers()) { int rc = -EHOSTUNREACH; /* if the operation never left the query state, * but we have all the expected replies, then no devices * are available to execute the fencing operation. */ if(stonith_watchdog_timeout_ms && (device == NULL || safe_str_eq(device, "watchdog"))) { crm_notice("Waiting %ds for %s to self-terminate for %s.%.8s (%p)", stonith_watchdog_timeout_ms/1000, op->target, op->client_name, op->id, device); op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op); return; } if (op->state == st_query) { crm_info("None of the %d peers have devices capable of terminating %s for %s (%d)", op->replies, op->target, op->client_name, op->state); rc = -ENODEV; } else { crm_info("None of the %d peers are capable of terminating %s for %s (%d)", op->replies, op->target, op->client_name, op->state); } op->state = st_failed; remote_op_done(op, NULL, rc, FALSE); } else if (device) { crm_info("Waiting for additional peers capable of terminating %s with %s for %s.%.8s", op->target, device, op->client_name, op->id); } else { crm_info("Waiting for additional peers capable of terminating %s for %s%.8s", op->target, op->client_name, op->id); } } /*! * \internal * \brief Comparison function for sorting query results * * \param[in] a GList item to compare * \param[in] b GList item to compare * * \return Per the glib documentation, "a negative integer if the first value * comes before the second, 0 if they are equal, or a positive integer * if the first value comes after the second." */ static gint sort_peers(gconstpointer a, gconstpointer b) { const st_query_result_t *peer_a = a; const st_query_result_t *peer_b = b; return (peer_b->devices - peer_a->devices); } /*! * \internal * \brief Determine if all the devices in the topology are found or not */ static gboolean all_topology_devices_found(remote_fencing_op_t * op) { GListPtr device = NULL; GListPtr iter = NULL; GListPtr match = NULL; stonith_topology_t *tp = NULL; gboolean skip_target = FALSE; int i; tp = find_topology_for_host(op->target); if (!tp) { return FALSE; } if (safe_str_eq(op->action, "off") || safe_str_eq(op->action, "reboot")) { /* Don't count the devices on the target node if we are killing * the target node. */ skip_target = TRUE; } for (i = 0; i < ST_LEVEL_MAX; i++) { for (device = tp->levels[i]; device; device = device->next) { match = NULL; for (iter = op->query_results; iter && !match; iter = iter->next) { st_query_result_t *peer = iter->data; if (skip_target && safe_str_eq(peer->host, op->target)) { continue; } match = g_list_find_custom(peer->device_list, device->data, sort_strings); } if (!match) { return FALSE; } } } return TRUE; } int process_remote_stonith_query(xmlNode * msg) { int devices = 0; gboolean host_is_target = FALSE; gboolean have_all_replies = FALSE; const char *id = NULL; const char *host = NULL; remote_fencing_op_t *op = NULL; st_query_result_t *result = NULL; uint32_t replies_expected; xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR); xmlNode *child = NULL; CRM_CHECK(dev != NULL, return -EPROTO); id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID); CRM_CHECK(id != NULL, return -EPROTO); dev = get_xpath_object("//@" F_STONITH_AVAILABLE_DEVICES, msg, LOG_ERR); CRM_CHECK(dev != NULL, return -EPROTO); crm_element_value_int(dev, F_STONITH_AVAILABLE_DEVICES, &devices); op = g_hash_table_lookup(remote_op_list, id); if (op == NULL) { crm_debug("Unknown or expired remote op: %s", id); return -EOPNOTSUPP; } replies_expected = QB_MIN(op->replies_expected, fencing_active_peers()); if ((++op->replies >= replies_expected) && (op->state == st_query)) { have_all_replies = TRUE; } host = crm_element_value(msg, F_ORIG); host_is_target = safe_str_eq(host, op->target); if (devices <= 0) { /* If we're doing 'known' then we might need to fire anyway */ crm_trace("Query result %d of %d from %s for %s/%s (%d devices) %s", op->replies, replies_expected, host, op->target, op->action, devices, id); if (have_all_replies) { crm_info("All query replies have arrived, continuing (%d expected/%d received for id %s)", replies_expected, op->replies, id); call_remote_stonith(op, NULL); } return pcmk_ok; } crm_info("Query result %d of %d from %s for %s/%s (%d devices) %s", op->replies, replies_expected, host, op->target, op->action, devices, id); result = calloc(1, sizeof(st_query_result_t)); result->host = strdup(host); result->devices = devices; result->custom_action_timeouts = g_hash_table_new_full(crm_str_hash, g_str_equal, free, NULL); result->delay_maxes = g_hash_table_new_full(crm_str_hash, g_str_equal, free, NULL); result->verified_devices = g_hash_table_new_full(crm_str_hash, g_str_equal, free, NULL); for (child = __xml_first_child(dev); child != NULL; child = __xml_next(child)) { const char *device = ID(child); int action_timeout = 0; int delay_max = 0; int verified = 0; int required = 0; if (device) { result->device_list = g_list_prepend(result->device_list, strdup(device)); crm_element_value_int(child, F_STONITH_ACTION_TIMEOUT, &action_timeout); crm_element_value_int(child, F_STONITH_DELAY_MAX, &delay_max); crm_element_value_int(child, F_STONITH_DEVICE_VERIFIED, &verified); crm_element_value_int(child, F_STONITH_DEVICE_REQUIRED, &required); if (action_timeout) { crm_trace("Peer %s with device %s returned action timeout %d", result->host, device, action_timeout); g_hash_table_insert(result->custom_action_timeouts, strdup(device), GINT_TO_POINTER(action_timeout)); } if (delay_max > 0) { crm_trace("Peer %s with device %s returned maximum of random delay %d", result->host, device, delay_max); g_hash_table_insert(result->delay_maxes, strdup(device), GINT_TO_POINTER(delay_max)); } if (verified) { crm_trace("Peer %s has confirmed a verified device %s", result->host, device); g_hash_table_insert(result->verified_devices, strdup(device), GINT_TO_POINTER(verified)); } if (required) { crm_trace("Peer %s requires device %s to execute for action %s", result->host, device, op->action); /* This matters when executing a topology. Required devices will get * executed regardless of their topology level. We use this for unfencing. */ add_required_device(op, device); } } } CRM_CHECK(devices == g_list_length(result->device_list), crm_err("Mis-match: Query claimed to have %d devices but %d found", devices, g_list_length(result->device_list))); op->query_results = g_list_insert_sorted(op->query_results, result, sort_peers); if (is_set(op->call_options, st_opt_topology)) { /* If we start the fencing before all the topology results are in, * it is possible fencing levels will be skipped because of the missing * query results. */ if (op->state == st_query && all_topology_devices_found(op)) { /* All the query results are in for the topology, start the fencing ops. */ crm_trace("All topology devices found"); call_remote_stonith(op, result); } else if (have_all_replies) { crm_info("All topology query replies have arrived, continuing (%d expected/%d received) ", replies_expected, op->replies); call_remote_stonith(op, NULL); } } else if (op->state == st_query) { /* We have a result for a non-topology fencing op that looks promising, * go ahead and start fencing before query timeout */ if (host_is_target == FALSE && g_hash_table_size(result->verified_devices)) { /* we have a verified device living on a peer that is not the target */ crm_trace("Found %d verified devices", g_hash_table_size(result->verified_devices)); call_remote_stonith(op, result); } else if (have_all_replies) { crm_info("All query replies have arrived, continuing (%d expected/%d received) ", replies_expected, op->replies); call_remote_stonith(op, NULL); } else { crm_trace("Waiting for more peer results before launching fencing operation"); } } else if (op->state == st_done) { crm_info("Discarding query result from %s (%d devices): Operation is in state %d", result->host, result->devices, op->state); } return pcmk_ok; } int process_remote_stonith_exec(xmlNode * msg) { int rc = 0; const char *id = NULL; const char *device = NULL; remote_fencing_op_t *op = NULL; xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR); CRM_CHECK(dev != NULL, return -EPROTO); id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID); CRM_CHECK(id != NULL, return -EPROTO); dev = get_xpath_object("//@" F_STONITH_RC, msg, LOG_ERR); CRM_CHECK(dev != NULL, return -EPROTO); crm_element_value_int(dev, F_STONITH_RC, &rc); device = crm_element_value(dev, F_STONITH_DEVICE); if (remote_op_list) { op = g_hash_table_lookup(remote_op_list, id); } if (op == NULL && rc == pcmk_ok) { /* Record successful fencing operations */ const char *client_id = crm_element_value(dev, F_STONITH_CLIENTID); op = create_remote_stonith_op(client_id, dev, TRUE); } if (op == NULL) { /* Could be for an event that began before we started */ /* TODO: Record the op for later querying */ crm_info("Unknown or expired remote op: %s", id); return -EOPNOTSUPP; } if (op->devices && device && safe_str_neq(op->devices->data, device)) { crm_err ("Received outdated reply for device %s (instead of %s) to %s node %s. Operation already timed out at remote level.", device, op->devices->data, op->action, op->target); return rc; } if (safe_str_eq(crm_element_value(msg, F_SUBTYPE), "broadcast")) { crm_debug("Marking call to %s for %s on behalf of %s@%s.%.8s: %s (%d)", op->action, op->target, op->client_name, op->id, op->originator, pcmk_strerror(rc), rc); if (rc == pcmk_ok) { op->state = st_done; } else { op->state = st_failed; } remote_op_done(op, msg, rc, FALSE); return pcmk_ok; } else if (safe_str_neq(op->originator, stonith_our_uname)) { /* If this isn't a remote level broadcast, and we are not the * originator of the operation, we should not be receiving this msg. */ crm_err ("%s received non-broadcast fencing result for operation it does not own (device %s targeting %s)", stonith_our_uname, device, op->target); return rc; } if (is_set(op->call_options, st_opt_topology)) { const char *device = crm_element_value(msg, F_STONITH_DEVICE); crm_notice("Call to %s for %s on behalf of %s@%s: %s (%d)", device, op->target, op->client_name, op->originator, pcmk_strerror(rc), rc); /* We own the op, and it is complete. broadcast the result to all nodes * and notify our local clients. */ if (op->state == st_done) { remote_op_done(op, msg, rc, FALSE); return rc; } /* An operation completed succesfully but has not yet been marked as done. * Continue the topology if more devices exist at the current level, otherwise * mark as done. */ if (rc == pcmk_ok) { GListPtr required_match = g_list_find_custom(op->required_list, device, sort_strings); if (op->devices) { /* Success, are there any more? */ op->devices = op->devices->next; } if (required_match) { op->required_list = g_list_remove(op->required_list, required_match->data); } /* if no more devices at this fencing level, we are done, * else we need to contine with executing the next device in the list */ if (op->devices == NULL) { crm_trace("Marking complex fencing op for %s as complete", op->target); op->state = st_done; remote_op_done(op, msg, rc, FALSE); return rc; } } else { /* This device failed, time to try another topology level. If no other * levels are available, mark this operation as failed and report results. */ if (stonith_topology_next(op) != pcmk_ok) { op->state = st_failed; remote_op_done(op, msg, rc, FALSE); return rc; } } } else if (rc == pcmk_ok && op->devices == NULL) { crm_trace("All done for %s", op->target); op->state = st_done; remote_op_done(op, msg, rc, FALSE); return rc; } else if (rc == -ETIME && op->devices == NULL) { /* If the operation timed out don't bother retrying other peers. */ op->state = st_failed; remote_op_done(op, msg, rc, FALSE); return rc; } else { /* fall-through and attempt other fencing action using another peer */ } /* Retry on failure or execute the rest of the topology */ crm_trace("Next for %s on behalf of %s@%s (rc was %d)", op->target, op->originator, op->client_name, rc); call_remote_stonith(op, NULL); return rc; } int stonith_fence_history(xmlNode * msg, xmlNode ** output) { int rc = 0; const char *target = NULL; xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_TRACE); if (dev) { int options = 0; target = crm_element_value(dev, F_STONITH_TARGET); crm_element_value_int(msg, F_STONITH_CALLOPTS, &options); if (target && (options & st_opt_cs_nodeid)) { int nodeid = crm_atoi(target, NULL); crm_node_t *node = crm_get_peer(nodeid, NULL); if (node) { target = node->uname; } } } crm_trace("Looking for operations on %s in %p", target, remote_op_list); *output = create_xml_node(NULL, F_STONITH_HISTORY_LIST); if (remote_op_list) { GHashTableIter iter; remote_fencing_op_t *op = NULL; g_hash_table_iter_init(&iter, remote_op_list); while (g_hash_table_iter_next(&iter, NULL, (void **)&op)) { xmlNode *entry = NULL; if (target && strcmp(op->target, target) != 0) { continue; } rc = 0; crm_trace("Attaching op %s", op->id); entry = create_xml_node(*output, STONITH_OP_EXEC); crm_xml_add(entry, F_STONITH_TARGET, op->target); crm_xml_add(entry, F_STONITH_ACTION, op->action); crm_xml_add(entry, F_STONITH_ORIGIN, op->originator); crm_xml_add(entry, F_STONITH_DELEGATE, op->delegate); crm_xml_add(entry, F_STONITH_CLIENTNAME, op->client_name); crm_xml_add_int(entry, F_STONITH_DATE, op->completed); crm_xml_add_int(entry, F_STONITH_STATE, op->state); } } return rc; } gboolean stonith_check_fence_tolerance(int tolerance, const char *target, const char *action) { GHashTableIter iter; time_t now = time(NULL); remote_fencing_op_t *rop = NULL; crm_trace("tolerance=%d, remote_op_list=%p", tolerance, remote_op_list); if (tolerance <= 0 || !remote_op_list || target == NULL || action == NULL) { return FALSE; } g_hash_table_iter_init(&iter, remote_op_list); while (g_hash_table_iter_next(&iter, NULL, (void **)&rop)) { if (strcmp(rop->target, target) != 0) { continue; } else if (rop->state != st_done) { continue; } else if (strcmp(rop->action, action) != 0) { continue; } else if ((rop->completed + tolerance) < now) { continue; } crm_notice("Target %s was fenced (%s) less than %ds ago by %s on behalf of %s", target, action, tolerance, rop->delegate, rop->originator); return TRUE; } return FALSE; } diff --git a/include/crm/lrmd.h b/include/crm/lrmd.h index e3a0d63803..730cad3cee 100644 --- a/include/crm/lrmd.h +++ b/include/crm/lrmd.h @@ -1,465 +1,466 @@ /* * Copyright (c) 2012 David Vossel * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ /** * \file * \brief Local Resource Manager * \ingroup lrmd */ #include #include #ifndef LRMD__H # define LRMD__H typedef struct lrmd_s lrmd_t; typedef struct lrmd_key_value_s { char *key; char *value; struct lrmd_key_value_s *next; } lrmd_key_value_t; #define LRMD_PROTOCOL_VERSION "1.0" /* *INDENT-OFF* */ #define DEFAULT_REMOTE_KEY_LOCATION "/etc/pacemaker/authkey" #define ALT_REMOTE_KEY_LOCATION "/etc/corosync/authkey" #define DEFAULT_REMOTE_PORT 3121 #define DEFAULT_REMOTE_USERNAME "lrmd" #define F_LRMD_OPERATION "lrmd_op" #define F_LRMD_CLIENTNAME "lrmd_clientname" #define F_LRMD_IS_IPC_PROVIDER "lrmd_is_ipc_provider" #define F_LRMD_CLIENTID "lrmd_clientid" #define F_LRMD_PROTOCOL_VERSION "lrmd_protocol_version" #define F_LRMD_REMOTE_MSG_TYPE "lrmd_remote_msg_type" #define F_LRMD_REMOTE_MSG_ID "lrmd_remote_msg_id" #define F_LRMD_CALLBACK_TOKEN "lrmd_async_id" #define F_LRMD_CALLID "lrmd_callid" #define F_LRMD_CANCEL_CALLID "lrmd_cancel_callid" #define F_LRMD_CALLOPTS "lrmd_callopt" #define F_LRMD_CALLDATA "lrmd_calldata" #define F_LRMD_RC "lrmd_rc" #define F_LRMD_EXEC_RC "lrmd_exec_rc" #define F_LRMD_OP_STATUS "lrmd_exec_op_status" #define F_LRMD_TIMEOUT "lrmd_timeout" #define F_LRMD_CLASS "lrmd_class" #define F_LRMD_PROVIDER "lrmd_provider" #define F_LRMD_TYPE "lrmd_type" #define F_LRMD_ORIGIN "lrmd_origin" #define F_LRMD_RSC_RUN_TIME "lrmd_run_time" #define F_LRMD_RSC_RCCHANGE_TIME "lrmd_rcchange_time" #define F_LRMD_RSC_EXEC_TIME "lrmd_exec_time" #define F_LRMD_RSC_QUEUE_TIME "lrmd_queue_time" #define F_LRMD_RSC_ID "lrmd_rsc_id" #define F_LRMD_RSC_ACTION "lrmd_rsc_action" #define F_LRMD_RSC_USERDATA_STR "lrmd_rsc_userdata_str" #define F_LRMD_RSC_OUTPUT "lrmd_rsc_output" #define F_LRMD_RSC_EXIT_REASON "lrmd_rsc_exit_reason" #define F_LRMD_RSC_START_DELAY "lrmd_rsc_start_delay" #define F_LRMD_RSC_INTERVAL "lrmd_rsc_interval" #define F_LRMD_RSC_METADATA "lrmd_rsc_metadata_res" #define F_LRMD_RSC_DELETED "lrmd_rsc_deleted" #define F_LRMD_RSC "lrmd_rsc" #define LRMD_OP_RSC_CHK_REG "lrmd_rsc_check_register" #define LRMD_OP_RSC_REG "lrmd_rsc_register" #define LRMD_OP_RSC_EXEC "lrmd_rsc_exec" #define LRMD_OP_RSC_CANCEL "lrmd_rsc_cancel" #define LRMD_OP_RSC_UNREG "lrmd_rsc_unregister" #define LRMD_OP_RSC_INFO "lrmd_rsc_info" #define LRMD_OP_RSC_METADATA "lrmd_rsc_metadata" #define LRMD_OP_POKE "lrmd_rsc_poke" #define LRMD_OP_NEW_CLIENT "lrmd_rsc_new_client" #define F_LRMD_IPC_OP "lrmd_ipc_op" #define F_LRMD_IPC_IPC_SERVER "lrmd_ipc_server" #define F_LRMD_IPC_SESSION "lrmd_ipc_session" #define F_LRMD_IPC_CLIENT "lrmd_ipc_client" #define F_LRMD_IPC_PROXY_NODE "lrmd_ipc_proxy_node" #define F_LRMD_IPC_USER "lrmd_ipc_user" #define F_LRMD_IPC_MSG "lrmd_ipc_msg" #define F_LRMD_IPC_MSG_ID "lrmd_ipc_msg_id" #define F_LRMD_IPC_MSG_FLAGS "lrmd_ipc_msg_flags" #define T_LRMD "lrmd" #define T_LRMD_REPLY "lrmd_reply" #define T_LRMD_NOTIFY "lrmd_notify" #define T_LRMD_IPC_PROXY "lrmd_ipc_proxy" /* *INDENT-ON* */ /*! * \brief Create a new local lrmd connection */ lrmd_t *lrmd_api_new(void); /*! * \brief Create a new remote lrmd connection using tls backend * * \param nodename name of remote node identified with this connection * \param server name of server to connect to * \param port port number to connect to * * \note nodename and server may be the same value. */ lrmd_t *lrmd_remote_api_new(const char *nodename, const char *server, int port); /*! * \brief Use after lrmd_poll returns 1 to read and dispatch a message * * \param[in,out] lrmd lrmd connection object * * \return TRUE if connection is still up, FALSE if disconnected */ bool lrmd_dispatch(lrmd_t * lrmd); /*! * \brief Poll for a specified timeout period to determine if a message * is ready for dispatch. * \retval 1 msg is ready * \retval 0 timeout occured * \retval negative error code */ int lrmd_poll(lrmd_t * lrmd, int timeout); /*! * \brief Destroy lrmd object */ void lrmd_api_delete(lrmd_t * lrmd); lrmd_key_value_t *lrmd_key_value_add(lrmd_key_value_t * kvp, const char *key, const char *value); /* *INDENT-OFF* */ /* Reserved for future use */ enum lrmd_call_options { lrmd_opt_none = 0x00000000, /* lrmd_opt_sync_call = 0x00000001, //Not implemented, patches welcome. */ /*! Only notify the client originating a exec() the results */ lrmd_opt_notify_orig_only = 0x00000002, /*! Drop recurring operations initiated by a client when client disconnects. * This call_option is only valid when registering a resource. When used * remotely with the pacemaker_remote daemon, this option means that recurring * operations will be dropped once all the remote connections disconnect. */ lrmd_opt_drop_recurring = 0x00000003, /*! Only send out notifications for recurring operations whenthe result changes */ lrmd_opt_notify_changes_only = 0x00000004, }; enum lrmd_callback_event { lrmd_event_register, lrmd_event_unregister, lrmd_event_exec_complete, lrmd_event_disconnect, lrmd_event_connect, lrmd_event_poke, lrmd_event_new_client, }; /* *INDENT-ON* */ typedef struct lrmd_event_data_s { /*! Type of event, register, unregister, call_completed... */ enum lrmd_callback_event type; /*! The resource this event occurred on. */ const char *rsc_id; /*! The action performed, start, stop, monitor... */ const char *op_type; /*! The userdata string given do exec() api function */ const char *user_data; /*! The client api call id associated with this event */ int call_id; /*! The operation's timeout period in ms. */ int timeout; /*! The operation's recurring interval in ms. */ int interval; /*! The operation's start delay value in ms. */ int start_delay; /*! This operation that just completed is on a deleted rsc. */ int rsc_deleted; /*! The executed ra return code mapped to OCF */ enum ocf_exitcode rc; /*! The lrmd status returned for exec_complete events */ int op_status; - /*! exit failure reason string from resource agent operation */ - const char *exit_reason; /*! stdout from resource agent operation */ const char *output; /*! Timestamp of when op ran */ unsigned int t_run; /*! Timestamp of last rc change */ unsigned int t_rcchange; /*! Time in length op took to execute */ unsigned int exec_time; /*! Time in length spent in queue */ unsigned int queue_time; /*! int connection result. Used for connection and poke events */ int connection_rc; /* This is a GHashTable containing the * parameters given to the operation */ void *params; /* client node name associated with this conneciton. * This is useful if multiple clients are being utilized by * a single process. This name allows the actions to be matched * to the proper client. */ const char *remote_nodename; + /*! exit failure reason string from resource agent operation */ + const char *exit_reason; + } lrmd_event_data_t; lrmd_event_data_t *lrmd_copy_event(lrmd_event_data_t * event); void lrmd_free_event(lrmd_event_data_t * event); typedef struct lrmd_rsc_info_s { char *id; char *type; char *class; char *provider; } lrmd_rsc_info_t; lrmd_rsc_info_t *lrmd_copy_rsc_info(lrmd_rsc_info_t * rsc_info); void lrmd_free_rsc_info(lrmd_rsc_info_t * rsc_info); typedef void (*lrmd_event_callback) (lrmd_event_data_t * event); typedef struct lrmd_list_s { const char *val; struct lrmd_list_s *next; } lrmd_list_t; void lrmd_list_freeall(lrmd_list_t * head); void lrmd_key_value_freeall(lrmd_key_value_t * head); typedef struct lrmd_api_operations_s { /*! * \brief Connect from the lrmd. * * \retval 0, success * \retval negative error code on failure */ int (*connect) (lrmd_t * lrmd, const char *client_name, int *fd); /*! * \brief Establish an connection to lrmd, don't block while connecting. * \note this function requires the use of mainloop. * * \note The is returned using the event callback. * \note When this function returns 0, the callback will be invoked * to report the final result of the connect. * \retval 0, connect in progress, wait for event callback * \retval -1, failure. */ int (*connect_async) (lrmd_t * lrmd, const char *client_name, int timeout /*ms */ ); /*! * \brief Is connected to lrmd daemon? * * \retval 0, false * \retval 1, true */ int (*is_connected) (lrmd_t * lrmd); /*! * \brief Poke lrmd connection to verify it is still capable of serving requests * \note The response comes in the form of a poke event to the callback. * * \retval 0, wait for response in callback * \retval -1, connection failure, callback may not be invoked */ int (*poke_connection) (lrmd_t * lrmd); /*! * \brief Disconnect from the lrmd. * * \retval 0, success * \retval negative error code on failure */ int (*disconnect) (lrmd_t * lrmd); /*! * \brief Register a resource with the lrmd. * * \note Synchronous, guaranteed to occur in daemon before function returns. * * \retval 0, success * \retval negative error code on failure */ int (*register_rsc) (lrmd_t * lrmd, const char *rsc_id, const char *class, const char *provider, const char *agent, enum lrmd_call_options options); /*! * \brief Retrieve registration info for a rsc * * \retval info on success * \retval NULL on failure */ lrmd_rsc_info_t *(*get_rsc_info) (lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options); /*! * \brief Unregister a resource from the lrmd. * * \note All pending and recurring operations will be cancelled * automatically. * * \note Synchronous, guaranteed to occur in daemon before function returns. * * \retval 0, success * \retval -1, success, but operations are currently executing on the rsc which will * return once they are completed. * \retval negative error code on failure * */ int (*unregister_rsc) (lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options); /*! * \brief Sets the callback to receive lrmd events on. */ void (*set_callback) (lrmd_t * lrmd, lrmd_event_callback callback); /*! * \brief Issue a command on a resource * * \note Asynchronous, command is queued in daemon on function return, but * execution of command is not synced. * * \note Operations on individual resources are guaranteed to occur * in the order the client api calls them in. * * \note Operations between different resources are not guaranteed * to occur in any specific order in relation to one another * regardless of what order the client api is called in. * \retval call_id to track async event result on success * \retval negative error code on failure */ int (*exec) (lrmd_t * lrmd, const char *rsc_id, const char *action, const char *userdata, /* userdata string given back in event notification */ int interval, /* ms */ int timeout, /* ms */ int start_delay, /* ms */ enum lrmd_call_options options, lrmd_key_value_t * params); /* ownership of params is given up to api here */ /*! * \brief Cancel a recurring command. * * \note Synchronous, guaranteed to occur in daemon before function returns. * * \note The cancel is completed async from this call. * We can be guaranteed the cancel has completed once * the callback receives an exec_complete event with * the lrmd_op_status signifying that the operation is * cancelled. * \note For each resource, cancel operations and exec operations * are processed in the order they are received. * It is safe to assume that for a single resource, a cancel * will occur in the lrmd before an exec if the client's cancel * api call occurs before the exec api call. * * It is not however safe to assume any operation on one resource will * occur before an operation on another resource regardless of * the order the client api is called in. * * \retval 0, cancel command sent. * \retval negative error code on failure */ int (*cancel) (lrmd_t * lrmd, const char *rsc_id, const char *action, int interval); /*! * \brief Get the metadata documentation for a resource. * * \note Value is returned in output. Output must be freed when set * * \retval lrmd_ok success * \retval negative error code on failure */ int (*get_metadata) (lrmd_t * lrmd, const char *class, const char *provider, const char *agent, char **output, enum lrmd_call_options options); /*! * \brief Retrieve a list of installed resource agents. * * \note if class is not provided, all known agents will be returned * \note list must be freed using lrmd_list_freeall() * * \retval num items in list on success * \retval negative error code on failure */ int (*list_agents) (lrmd_t * lrmd, lrmd_list_t ** agents, const char *class, const char *provider); /*! * \brief Retrieve a list of resource agent providers * * \note When the agent is provided, only the agent's provider will be returned * \note When no agent is supplied, all providers will be returned. * \note List must be freed using lrmd_list_freeall() * * \retval num items in list on success * \retval negative error code on failure */ int (*list_ocf_providers) (lrmd_t * lrmd, const char *agent, lrmd_list_t ** providers); /*! * \brief Retrieve a list of standards supported by this machine/installation * * \note List must be freed using lrmd_list_freeall() * * \retval num items in list on success * \retval negative error code on failure */ int (*list_standards) (lrmd_t * lrmd, lrmd_list_t ** standards); } lrmd_api_operations_t; struct lrmd_s { lrmd_api_operations_t *cmds; void *private; }; static inline const char * lrmd_event_type2str(enum lrmd_callback_event type) { switch (type) { case lrmd_event_register: return "register"; case lrmd_event_unregister: return "unregister"; case lrmd_event_exec_complete: return "exec_complete"; case lrmd_event_disconnect: return "disconnect"; case lrmd_event_connect: return "connect"; case lrmd_event_poke: return "poke"; case lrmd_event_new_client: return "new_client"; } return "unknown"; } #endif diff --git a/include/crm/pengine/status.h b/include/crm/pengine/status.h index 4bfa3fecf0..421495901a 100644 --- a/include/crm/pengine/status.h +++ b/include/crm/pengine/status.h @@ -1,406 +1,405 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef PENGINE_STATUS__H # define PENGINE_STATUS__H # include # include # include typedef struct node_s pe_node_t; typedef struct node_s node_t; typedef struct pe_action_s action_t; typedef struct pe_action_s pe_action_t; typedef struct resource_s resource_t; typedef struct ticket_s ticket_t; typedef enum no_quorum_policy_e { no_quorum_freeze, no_quorum_stop, no_quorum_ignore, no_quorum_suicide } no_quorum_policy_t; enum node_type { node_ping, node_member, node_remote }; enum pe_restart { pe_restart_restart, pe_restart_ignore }; enum pe_find { pe_find_renamed = 0x001, pe_find_clone = 0x004, pe_find_current = 0x008, pe_find_inactive = 0x010, }; # define pe_flag_have_quorum 0x00000001ULL # define pe_flag_symmetric_cluster 0x00000002ULL # define pe_flag_is_managed_default 0x00000004ULL # define pe_flag_maintenance_mode 0x00000008ULL # define pe_flag_stonith_enabled 0x00000010ULL # define pe_flag_have_stonith_resource 0x00000020ULL # define pe_flag_enable_unfencing 0x00000040ULL # define pe_flag_stop_rsc_orphans 0x00000100ULL # define pe_flag_stop_action_orphans 0x00000200ULL # define pe_flag_stop_everything 0x00000400ULL # define pe_flag_start_failure_fatal 0x00001000ULL # define pe_flag_remove_after_stop 0x00002000ULL # define pe_flag_startup_probes 0x00010000ULL # define pe_flag_have_status 0x00020000ULL # define pe_flag_have_remote_nodes 0x00040000ULL # define pe_flag_quick_location 0x00100000ULL # define pe_flag_sanitized 0x00200000ULL typedef struct pe_working_set_s { xmlNode *input; crm_time_t *now; /* options extracted from the input */ char *dc_uuid; node_t *dc_node; const char *stonith_action; const char *placement_strategy; unsigned long long flags; int stonith_timeout; int default_resource_stickiness; no_quorum_policy_t no_quorum_policy; GHashTable *config_hash; GHashTable *tickets; GHashTable *singletons; /* Actions for which there can be only one - ie. fence nodeX */ GListPtr nodes; GListPtr resources; GListPtr placement_constraints; GListPtr ordering_constraints; GListPtr colocation_constraints; GListPtr ticket_constraints; GListPtr actions; xmlNode *failed; xmlNode *op_defaults; xmlNode *rsc_defaults; /* stats */ int num_synapse; int max_valid_nodes; int order_id; int action_id; /* final output */ xmlNode *graph; GHashTable *template_rsc_sets; const char *localhost; GHashTable *tags; } pe_working_set_t; struct node_shared_s { const char *id; const char *uname; /* Make all these flags into a bitfield one day */ gboolean online; gboolean standby; gboolean standby_onfail; gboolean pending; gboolean unclean; gboolean unseen; gboolean shutdown; gboolean expected_up; gboolean is_dc; - gboolean rsc_discovery_enabled; - - gboolean remote_requires_reset; - gboolean remote_was_fenced; int num_resources; GListPtr running_rsc; /* resource_t* */ GListPtr allocated_rsc; /* resource_t* */ resource_t *remote_rsc; GHashTable *attrs; /* char* => char* */ enum node_type type; GHashTable *utilization; /*! cache of calculated rsc digests for this node. */ GHashTable *digest_cache; gboolean maintenance; + gboolean rsc_discovery_enabled; + gboolean remote_requires_reset; + gboolean remote_was_fenced; }; struct node_s { int weight; gboolean fixed; - int rsc_discover_mode; int count; struct node_shared_s *details; + int rsc_discover_mode; }; # include # define pe_rsc_orphan 0x00000001ULL # define pe_rsc_managed 0x00000002ULL # define pe_rsc_block 0x00000004ULL /* Further operations are prohibited due to failure policy */ # define pe_rsc_orphan_container_filler 0x00000008ULL # define pe_rsc_notify 0x00000010ULL # define pe_rsc_unique 0x00000020ULL # define pe_rsc_fence_device 0x00000040ULL # define pe_rsc_provisional 0x00000100ULL # define pe_rsc_allocating 0x00000200ULL # define pe_rsc_merging 0x00000400ULL # define pe_rsc_munging 0x00000800ULL # define pe_rsc_try_reload 0x00001000ULL # define pe_rsc_reload 0x00002000ULL # define pe_rsc_failed 0x00010000ULL # define pe_rsc_shutdown 0x00020000ULL # define pe_rsc_runnable 0x00040000ULL # define pe_rsc_start_pending 0x00080000ULL # define pe_rsc_starting 0x00100000ULL # define pe_rsc_stopping 0x00200000ULL # define pe_rsc_migrating 0x00400000ULL # define pe_rsc_allow_migrate 0x00800000ULL # define pe_rsc_failure_ignored 0x01000000ULL # define pe_rsc_unexpectedly_running 0x02000000ULL # define pe_rsc_maintenance 0x04000000ULL # define pe_rsc_needs_quorum 0x10000000ULL # define pe_rsc_needs_fencing 0x20000000ULL # define pe_rsc_needs_unfencing 0x40000000ULL # define pe_rsc_have_unfencing 0x80000000ULL enum pe_graph_flags { pe_graph_none = 0x00000, pe_graph_updated_first = 0x00001, pe_graph_updated_then = 0x00002, pe_graph_disable = 0x00004, }; /* *INDENT-OFF* */ enum pe_action_flags { pe_action_pseudo = 0x00001, pe_action_runnable = 0x00002, pe_action_optional = 0x00004, pe_action_print_always = 0x00008, pe_action_have_node_attrs = 0x00010, pe_action_failure_is_fatal = 0x00020, /* no longer used, here for API compatibility */ pe_action_implied_by_stonith = 0x00040, pe_action_migrate_runnable = 0x00080, pe_action_dumped = 0x00100, pe_action_processed = 0x00200, pe_action_clear = 0x00400, pe_action_dangle = 0x00800, pe_action_requires_any = 0x01000, /* This action requires one or mre of its dependencies to be runnable * We use this to clear the runnable flag before checking dependencies */ pe_action_reschedule = 0x02000, pe_action_tracking = 0x04000, }; /* *INDENT-ON* */ struct resource_s { char *id; char *clone_name; xmlNode *xml; xmlNode *orig_xml; xmlNode *ops_xml; resource_t *parent; void *variant_opaque; enum pe_obj_types variant; resource_object_functions_t *fns; resource_alloc_functions_t *cmds; enum rsc_recovery_type recovery_type; enum pe_restart restart_type; int priority; int stickiness; int sort_index; int failure_timeout; int remote_reconnect_interval; int effective_priority; int migration_threshold; gboolean is_remote_node; - gboolean exclusive_discover; unsigned long long flags; GListPtr rsc_cons_lhs; /* rsc_colocation_t* */ GListPtr rsc_cons; /* rsc_colocation_t* */ GListPtr rsc_location; /* rsc_to_node_t* */ GListPtr actions; /* action_t* */ GListPtr rsc_tickets; /* rsc_ticket* */ node_t *allocated_to; GListPtr running_on; /* node_t* */ GHashTable *known_on; /* node_t* */ GHashTable *allowed_nodes; /* node_t* */ enum rsc_role_e role; enum rsc_role_e next_role; GHashTable *meta; GHashTable *parameters; GHashTable *utilization; GListPtr children; /* resource_t* */ GListPtr dangling_migrations; /* node_t* */ node_t *partial_migration_target; node_t *partial_migration_source; resource_t *container; GListPtr fillers; char *pending_task; const char *isolation_wrapper; + gboolean exclusive_discover; }; struct pe_action_s { int id; int priority; resource_t *rsc; node_t *node; xmlNode *op_entry; char *task; char *uuid; char *cancel_task; enum pe_action_flags flags; enum rsc_start_requirement needs; enum action_fail_response on_fail; enum rsc_role_e fail_role; action_t *pre_notify; action_t *pre_notified; action_t *post_notify; action_t *post_notified; int seen_count; GHashTable *meta; GHashTable *extra; GListPtr actions_before; /* action_warpper_t* */ GListPtr actions_after; /* action_warpper_t* */ }; struct ticket_s { char *id; gboolean granted; time_t last_granted; gboolean standby; GHashTable *state; }; typedef struct tag_s { char *id; GListPtr refs; } tag_t; enum pe_link_state { pe_link_not_dumped, pe_link_dumped, pe_link_dup, }; /* *INDENT-OFF* */ enum pe_ordering { pe_order_none = 0x0, /* deleted */ pe_order_optional = 0x1, /* pure ordering, nothing implied */ pe_order_apply_first_non_migratable = 0x2, /* Only apply this constraint's ordering if first is not migratable. */ pe_order_implies_first = 0x10, /* If 'then' is required, ensure 'first' is too */ pe_order_implies_then = 0x20, /* If 'first' is required, ensure 'then' is too */ pe_order_implies_first_master = 0x40, /* Imply 'first' is required when 'then' is required and then's rsc holds Master role. */ /* first requires then to be both runnable and migrate runnable. */ pe_order_implies_first_migratable = 0x80, pe_order_runnable_left = 0x100, /* 'then' requires 'first' to be runnable */ pe_order_pseudo_left = 0x200, /* 'then' can only be pseudo if 'first' is runnable */ pe_order_implies_then_on_node = 0x400, /* If 'first' is required on 'nodeX', * ensure instances of 'then' on 'nodeX' are too. * Only really useful if 'then' is a clone and 'first' is not */ pe_order_restart = 0x1000, /* 'then' is runnable if 'first' is optional or runnable */ pe_order_stonith_stop = 0x2000, /* only applies if the action is non-pseudo */ pe_order_serialize_only = 0x4000, /* serialize */ pe_order_implies_first_printed = 0x10000, /* Like ..implies_first but only ensures 'first' is printed, not manditory */ pe_order_implies_then_printed = 0x20000, /* Like ..implies_then but only ensures 'then' is printed, not manditory */ pe_order_asymmetrical = 0x100000, /* Indicates asymmetrical one way ordering constraint. */ pe_order_load = 0x200000, /* Only relevant if... */ pe_order_one_or_more = 0x400000, /* 'then' is only runnable if one or more of it's dependencies are too */ pe_order_anti_colocation = 0x800000, pe_order_preserve = 0x1000000, /* Hack for breaking user ordering constraints with container resources */ pe_order_trace = 0x4000000, /* test marker */ }; /* *INDENT-ON* */ typedef struct action_wrapper_s action_wrapper_t; struct action_wrapper_s { enum pe_ordering type; enum pe_link_state state; action_t *action; }; const char *rsc_printable_id(resource_t *rsc); gboolean cluster_status(pe_working_set_t * data_set); void set_working_set_defaults(pe_working_set_t * data_set); void cleanup_calculations(pe_working_set_t * data_set); resource_t *pe_find_resource(GListPtr rsc_list, const char *id_rh); node_t *pe_find_node(GListPtr node_list, const char *uname); node_t *pe_find_node_id(GListPtr node_list, const char *id); node_t *pe_find_node_any(GListPtr node_list, const char *id, const char *uname); GListPtr find_operations(const char *rsc, const char *node, gboolean active_filter, pe_working_set_t * data_set); #endif diff --git a/lib/cib/cib_utils.c b/lib/cib/cib_utils.c index 28b8e81dc5..d321517dc3 100644 --- a/lib/cib/cib_utils.c +++ b/lib/cib/cib_utils.c @@ -1,843 +1,843 @@ /* * Copyright (c) 2004 International Business Machines * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #include #include #include #include #include #include #include #include #include #include #include #include #include struct config_root_s { const char *name; const char *parent; const char *path; }; /* * "//crm_config" will also work in place of "/cib/configuration/crm_config" * The / prefix means find starting from the root, whereas the // prefix means * find anywhere and risks multiple matches */ /* *INDENT-OFF* */ struct config_root_s known_paths[] = { { NULL, NULL, "//cib" }, { XML_TAG_CIB, NULL, "//cib" }, { XML_CIB_TAG_STATUS, "/cib", "//cib/status" }, { XML_CIB_TAG_CONFIGURATION,"/cib", "//cib/configuration" }, { XML_CIB_TAG_CRMCONFIG, "/cib/configuration", "//cib/configuration/crm_config" }, { XML_CIB_TAG_NODES, "/cib/configuration", "//cib/configuration/nodes" }, { XML_CIB_TAG_DOMAINS, "/cib/configuration", "//cib/configuration/domains" }, { XML_CIB_TAG_RESOURCES, "/cib/configuration", "//cib/configuration/resources" }, { XML_CIB_TAG_CONSTRAINTS, "/cib/configuration", "//cib/configuration/constraints" }, { XML_CIB_TAG_OPCONFIG, "/cib/configuration", "//cib/configuration/op_defaults" }, { XML_CIB_TAG_RSCCONFIG, "/cib/configuration", "//cib/configuration/rsc_defaults" }, { XML_CIB_TAG_ACLS, "/cib/configuration", "//cib/configuration/acls" }, { XML_TAG_FENCING_TOPOLOGY, "/cib/configuration", "//cib/configuration/fencing-topology" }, { XML_CIB_TAG_SECTION_ALL, NULL, "//cib" }, }; /* *INDENT-ON* */ int cib_compare_generation(xmlNode * left, xmlNode * right) { int lpc = 0; const char *attributes[] = { XML_ATTR_GENERATION_ADMIN, XML_ATTR_GENERATION, XML_ATTR_NUMUPDATES, }; crm_log_xml_trace(left, "left"); crm_log_xml_trace(right, "right"); for (lpc = 0; lpc < DIMOF(attributes); lpc++) { int int_elem_l = -1; int int_elem_r = -1; const char *elem_r = NULL; const char *elem_l = crm_element_value(left, attributes[lpc]); if (right != NULL) { elem_r = crm_element_value(right, attributes[lpc]); } if (elem_l != NULL) { int_elem_l = crm_parse_int(elem_l, NULL); } if (elem_r != NULL) { int_elem_r = crm_parse_int(elem_r, NULL); } if (int_elem_l < int_elem_r) { crm_trace("%s (%s < %s)", attributes[lpc], crm_str(elem_l), crm_str(elem_r)); return -1; } else if (int_elem_l > int_elem_r) { crm_trace("%s (%s > %s)", attributes[lpc], crm_str(elem_l), crm_str(elem_r)); return 1; } } return 0; } /* Deprecated - doesn't expose -EACCES */ xmlNode * get_cib_copy(cib_t * cib) { xmlNode *xml_cib; int options = cib_scope_local | cib_sync_call; int rc = pcmk_ok; if (cib->state == cib_disconnected) { return NULL; } rc = cib->cmds->query(cib, NULL, &xml_cib, options); if (rc == -EACCES) { return NULL; } else if (rc != pcmk_ok) { crm_err("Couldnt retrieve the CIB"); free_xml(xml_cib); return NULL; } else if (xml_cib == NULL) { crm_err("The CIB result was empty"); free_xml(xml_cib); return NULL; } if (safe_str_eq(crm_element_name(xml_cib), XML_TAG_CIB)) { return xml_cib; } free_xml(xml_cib); return NULL; } xmlNode * cib_get_generation(cib_t * cib) { xmlNode *the_cib = NULL; xmlNode *generation = create_xml_node(NULL, XML_CIB_TAG_GENERATION_TUPPLE); cib->cmds->query(cib, NULL, &the_cib, cib_scope_local | cib_sync_call); if (the_cib != NULL) { copy_in_properties(generation, the_cib); free_xml(the_cib); } return generation; } gboolean cib_version_details(xmlNode * cib, int *admin_epoch, int *epoch, int *updates) { *epoch = -1; *updates = -1; *admin_epoch = -1; if (cib == NULL) { return FALSE; } else { crm_element_value_int(cib, XML_ATTR_GENERATION, epoch); crm_element_value_int(cib, XML_ATTR_NUMUPDATES, updates); crm_element_value_int(cib, XML_ATTR_GENERATION_ADMIN, admin_epoch); } return TRUE; } gboolean cib_diff_version_details(xmlNode * diff, int *admin_epoch, int *epoch, int *updates, int *_admin_epoch, int *_epoch, int *_updates) { int add[] = { 0, 0, 0 }; int del[] = { 0, 0, 0 }; xml_patch_versions(diff, add, del); *admin_epoch = add[0]; *epoch = add[1]; *updates = add[2]; *_admin_epoch = del[0]; *_epoch = del[1]; *_updates = del[2]; return TRUE; } /* * The caller should never free the return value */ const char * get_object_path(const char *object_type) { int lpc = 0; int max = DIMOF(known_paths); for (; lpc < max; lpc++) { if ((object_type == NULL && known_paths[lpc].name == NULL) || safe_str_eq(object_type, known_paths[lpc].name)) { return known_paths[lpc].path; } } return NULL; } const char * get_object_parent(const char *object_type) { int lpc = 0; int max = DIMOF(known_paths); for (; lpc < max; lpc++) { if (safe_str_eq(object_type, known_paths[lpc].name)) { return known_paths[lpc].parent; } } return NULL; } xmlNode * get_object_root(const char *object_type, xmlNode * the_root) { const char *xpath = get_object_path(object_type); if (xpath == NULL) { return the_root; /* or return NULL? */ } return get_xpath_object(xpath, the_root, LOG_DEBUG_4); } /* * It is the callers responsibility to free both the new CIB (output) * and the new CIB (input) */ xmlNode * createEmptyCib(int admin_epoch) { xmlNode *cib_root = NULL, *config = NULL; cib_root = create_xml_node(NULL, XML_TAG_CIB); crm_xml_add(cib_root, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); crm_xml_add(cib_root, XML_ATTR_VALIDATION, xml_latest_schema()); crm_xml_add_int(cib_root, XML_ATTR_GENERATION, admin_epoch); crm_xml_add_int(cib_root, XML_ATTR_NUMUPDATES, 0); crm_xml_add_int(cib_root, XML_ATTR_GENERATION_ADMIN, 0); config = create_xml_node(cib_root, XML_CIB_TAG_CONFIGURATION); create_xml_node(cib_root, XML_CIB_TAG_STATUS); create_xml_node(config, XML_CIB_TAG_CRMCONFIG); create_xml_node(config, XML_CIB_TAG_NODES); create_xml_node(config, XML_CIB_TAG_RESOURCES); create_xml_node(config, XML_CIB_TAG_CONSTRAINTS); return cib_root; } static bool cib_acl_enabled(xmlNode *xml, const char *user) { bool rc = FALSE; #if ENABLE_ACL if(pcmk_acl_required(user)) { const char *value = NULL; GHashTable *options = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); cib_read_config(options, xml); value = cib_pref(options, "enable-acl"); rc = crm_is_true(value); g_hash_table_destroy(options); } crm_debug("CIB ACL is %s", rc ? "enabled" : "disabled"); #endif return rc; } int cib_perform_op(const char *op, int call_options, cib_op_t * fn, gboolean is_query, const char *section, xmlNode * req, xmlNode * input, gboolean manage_counters, gboolean * config_changed, xmlNode * current_cib, xmlNode ** result_cib, xmlNode ** diff, xmlNode ** output) { int rc = pcmk_ok; gboolean check_dtd = TRUE; xmlNode *top = NULL; xmlNode *scratch = NULL; xmlNode *local_diff = NULL; const char *new_version = NULL; static struct qb_log_callsite *diff_cs = NULL; const char *user = crm_element_value(req, F_CIB_USER); bool with_digest = FALSE; crm_trace("Begin %s%s op", is_query ? "read-only " : "", op); CRM_CHECK(output != NULL, return -ENOMSG); CRM_CHECK(result_cib != NULL, return -ENOMSG); CRM_CHECK(config_changed != NULL, return -ENOMSG); if(output) { *output = NULL; } *result_cib = NULL; *config_changed = FALSE; if (fn == NULL) { return -EINVAL; } if (is_query) { xmlNode *cib_ro = current_cib; xmlNode *cib_filtered = NULL; if(cib_acl_enabled(cib_ro, user)) { if(xml_acl_filtered_copy(user, current_cib, current_cib, &cib_filtered)) { if (cib_filtered == NULL) { crm_debug("Pre-filtered the entire cib"); return -EACCES; } cib_ro = cib_filtered; crm_log_xml_trace(cib_ro, "filtered"); } } rc = (*fn) (op, call_options, section, req, input, cib_ro, result_cib, output); if(output == NULL || *output == NULL) { /* nothing */ } else if(cib_filtered == *output) { cib_filtered = NULL; /* Let them have this copy */ } else if(*output == current_cib) { /* They already know not to free it */ } else if(cib_filtered && (*output)->doc == cib_filtered->doc) { /* We're about to free the document of which *output is a part */ *output = copy_xml(*output); } else if((*output)->doc == current_cib->doc) { /* Give them a copy they can free */ *output = copy_xml(*output); } free_xml(cib_filtered); return rc; } if (is_set(call_options, cib_zero_copy)) { /* Conditional on v2 patch style */ scratch = current_cib; /* Create a shallow copy of current_cib for the version details */ current_cib = create_xml_node(NULL, (const char *)scratch->name); copy_in_properties(current_cib, scratch); top = current_cib; xml_track_changes(scratch, user, NULL, cib_acl_enabled(scratch, user)); rc = (*fn) (op, call_options, section, req, input, scratch, &scratch, output); } else { scratch = copy_xml(current_cib); xml_track_changes(scratch, user, NULL, cib_acl_enabled(scratch, user)); rc = (*fn) (op, call_options, section, req, input, current_cib, &scratch, output); if(scratch && xml_tracking_changes(scratch) == FALSE) { crm_trace("Inferring changes after %s op", op); xml_track_changes(scratch, user, current_cib, cib_acl_enabled(current_cib, user)); xml_calculate_changes(current_cib, scratch); } CRM_CHECK(current_cib != scratch, return -EINVAL); } xml_acl_disable(scratch); /* Allow the system to make any additional changes */ if (rc == pcmk_ok && scratch == NULL) { rc = -EINVAL; goto done; } else if(rc == pcmk_ok && xml_acl_denied(scratch)) { crm_trace("ACL rejected part or all of the proposed changes"); rc = -EACCES; goto done; } else if (rc != pcmk_ok) { goto done; } if (scratch) { new_version = crm_element_value(scratch, XML_ATTR_CRM_VERSION); if (new_version && compare_version(new_version, CRM_FEATURE_SET) > 0) { crm_err("Discarding update with feature set '%s' greater than our own '%s'", new_version, CRM_FEATURE_SET); rc = -EPROTONOSUPPORT; goto done; } } if (current_cib) { int old = 0; int new = 0; crm_element_value_int(scratch, XML_ATTR_GENERATION_ADMIN, &new); crm_element_value_int(current_cib, XML_ATTR_GENERATION_ADMIN, &old); if (old > new) { crm_err("%s went backwards: %d -> %d (Opts: 0x%x)", XML_ATTR_GENERATION_ADMIN, old, new, call_options); crm_log_xml_warn(req, "Bad Op"); crm_log_xml_warn(input, "Bad Data"); rc = -pcmk_err_old_data; } else if (old == new) { crm_element_value_int(scratch, XML_ATTR_GENERATION, &new); crm_element_value_int(current_cib, XML_ATTR_GENERATION, &old); if (old > new) { crm_err("%s went backwards: %d -> %d (Opts: 0x%x)", XML_ATTR_GENERATION, old, new, call_options); crm_log_xml_warn(req, "Bad Op"); crm_log_xml_warn(input, "Bad Data"); rc = -pcmk_err_old_data; } } } crm_trace("Massaging CIB contents"); strip_text_nodes(scratch); fix_plus_plus_recursive(scratch); if (is_set(call_options, cib_zero_copy)) { /* At this point, current_cib is just the 'cib' tag and its properties, * * The v1 format would barf on this, but we know the v2 patch * format only needs it for the top-level version fields */ local_diff = xml_create_patchset(2, current_cib, scratch, (bool*)config_changed, manage_counters); } else { static time_t expires = 0; time_t tm_now = time(NULL); if (expires < tm_now) { expires = tm_now + 60; /* Validate clients are correctly applying v2-style diffs at most once a minute */ with_digest = TRUE; } local_diff = xml_create_patchset(0, current_cib, scratch, (bool*)config_changed, manage_counters); } xml_log_changes(LOG_TRACE, __FUNCTION__, scratch); xml_accept_changes(scratch); if (diff_cs == NULL) { diff_cs = qb_log_callsite_get(__PRETTY_FUNCTION__, __FILE__, "diff-validation", LOG_DEBUG, __LINE__, crm_trace_nonlog); } if(local_diff) { patchset_process_digest(local_diff, current_cib, scratch, with_digest); xml_log_patchset(LOG_INFO, __FUNCTION__, local_diff); crm_log_xml_trace(local_diff, "raw patch"); } if (is_not_set(call_options, cib_zero_copy) /* The original to compare against doesn't exist */ && local_diff && crm_is_callsite_active(diff_cs, LOG_TRACE, 0)) { /* Validate the calculated patch set */ int test_rc, format = 1; xmlNode * c = copy_xml(current_cib); crm_element_value_int(local_diff, "format", &format); test_rc = xml_apply_patchset(c, local_diff, manage_counters); if(test_rc != pcmk_ok) { save_xml_to_file(c, "PatchApply:calculated", NULL); save_xml_to_file(current_cib, "PatchApply:input", NULL); save_xml_to_file(scratch, "PatchApply:actual", NULL); save_xml_to_file(local_diff, "PatchApply:diff", NULL); crm_err("v%d patchset error, patch failed to apply: %s (%d)", format, pcmk_strerror(test_rc), test_rc); } free_xml(c); } if (safe_str_eq(section, XML_CIB_TAG_STATUS)) { /* Throttle the amount of costly validation we perform due to status updates * a) we don't really care whats in the status section * b) we don't validate any of it's contents at the moment anyway */ check_dtd = FALSE; } /* === scratch must not be modified after this point === * Exceptions, anything in: static filter_t filter[] = { { 0, XML_ATTR_ORIGIN }, { 0, XML_CIB_ATTR_WRITTEN }, { 0, XML_ATTR_UPDATE_ORIG }, { 0, XML_ATTR_UPDATE_CLIENT }, { 0, XML_ATTR_UPDATE_USER }, }; */ if (*config_changed && is_not_set(call_options, cib_no_mtime)) { char *now_str = NULL; time_t now = time(NULL); const char *schema = crm_element_value(scratch, XML_ATTR_VALIDATION); now_str = ctime(&now); now_str[24] = EOS; /* replace the newline */ crm_xml_replace(scratch, XML_CIB_ATTR_WRITTEN, now_str); if (schema) { static int minimum_schema = 0; int current_schema = get_schema_version(schema); if (minimum_schema == 0) { - minimum_schema = get_schema_version("pacemaker-1.1"); + minimum_schema = get_schema_version("pacemaker-1.2"); } /* Does the CIB support the "update-*" attributes... */ if (current_schema >= minimum_schema) { const char *origin = crm_element_value(req, F_ORIG); CRM_LOG_ASSERT(origin != NULL); crm_xml_replace(scratch, XML_ATTR_UPDATE_ORIG, origin); crm_xml_replace(scratch, XML_ATTR_UPDATE_CLIENT, crm_element_value(req, F_CIB_CLIENTNAME)); #if ENABLE_ACL crm_xml_replace(scratch, XML_ATTR_UPDATE_USER, crm_element_value(req, F_CIB_USER)); #endif } } } crm_trace("Perform validation: %s", check_dtd ? "true" : "false"); if (rc == pcmk_ok && check_dtd && validate_xml(scratch, NULL, TRUE) == FALSE) { const char *current_dtd = crm_element_value(scratch, XML_ATTR_VALIDATION); crm_warn("Updated CIB does not validate against %s schema/dtd", crm_str(current_dtd)); rc = -pcmk_err_schema_validation; } done: *result_cib = scratch; #if ENABLE_ACL if(rc != pcmk_ok && cib_acl_enabled(current_cib, user)) { if(xml_acl_filtered_copy(user, current_cib, scratch, result_cib)) { if (*result_cib == NULL) { crm_debug("Pre-filtered the entire cib result"); } free_xml(scratch); } } #endif if(diff) { *diff = local_diff; } else { free_xml(local_diff); } free_xml(top); crm_trace("Done"); return rc; } xmlNode * cib_create_op(int call_id, const char *token, const char *op, const char *host, const char *section, xmlNode * data, int call_options, const char *user_name) { xmlNode *op_msg = create_xml_node(NULL, "cib_command"); CRM_CHECK(op_msg != NULL, return NULL); CRM_CHECK(token != NULL, return NULL); crm_xml_add(op_msg, F_XML_TAGNAME, "cib_command"); crm_xml_add(op_msg, F_TYPE, T_CIB); crm_xml_add(op_msg, F_CIB_CALLBACK_TOKEN, token); crm_xml_add(op_msg, F_CIB_OPERATION, op); crm_xml_add(op_msg, F_CIB_HOST, host); crm_xml_add(op_msg, F_CIB_SECTION, section); crm_xml_add_int(op_msg, F_CIB_CALLID, call_id); #if ENABLE_ACL if (user_name) { crm_xml_add(op_msg, F_CIB_USER, user_name); } #endif crm_trace("Sending call options: %.8lx, %d", (long)call_options, call_options); crm_xml_add_int(op_msg, F_CIB_CALLOPTS, call_options); if (data != NULL) { add_message_xml(op_msg, F_CIB_CALLDATA, data); } if (call_options & cib_inhibit_bcast) { CRM_CHECK((call_options & cib_scope_local), return NULL); } return op_msg; } void cib_native_callback(cib_t * cib, xmlNode * msg, int call_id, int rc) { xmlNode *output = NULL; cib_callback_client_t *blob = NULL; cib_callback_client_t local_blob; local_blob.id = NULL; local_blob.callback = NULL; local_blob.user_data = NULL; local_blob.only_success = FALSE; if (msg != NULL) { crm_element_value_int(msg, F_CIB_RC, &rc); crm_element_value_int(msg, F_CIB_CALLID, &call_id); output = get_message_xml(msg, F_CIB_CALLDATA); } blob = g_hash_table_lookup(cib_op_callback_table, GINT_TO_POINTER(call_id)); if (blob != NULL) { local_blob = *blob; blob = NULL; remove_cib_op_callback(call_id, FALSE); } else { crm_trace("No callback found for call %d", call_id); local_blob.callback = NULL; } if (cib == NULL) { crm_debug("No cib object supplied"); } if (rc == -pcmk_err_diff_resync) { /* This is an internal value that clients do not and should not care about */ rc = pcmk_ok; } if (local_blob.callback != NULL && (rc == pcmk_ok || local_blob.only_success == FALSE)) { crm_trace("Invoking callback %s for call %d", crm_str(local_blob.id), call_id); local_blob.callback(msg, call_id, rc, output, local_blob.user_data); } else if (cib && cib->op_callback == NULL && rc != pcmk_ok) { crm_warn("CIB command failed: %s", pcmk_strerror(rc)); crm_log_xml_debug(msg, "Failed CIB Update"); } if (cib && cib->op_callback != NULL) { crm_trace("Invoking global callback for call %d", call_id); cib->op_callback(msg, call_id, rc, output); } crm_trace("OP callback activated for %d", call_id); } void cib_native_notify(gpointer data, gpointer user_data) { xmlNode *msg = user_data; cib_notify_client_t *entry = data; const char *event = NULL; if (msg == NULL) { crm_warn("Skipping callback - NULL message"); return; } event = crm_element_value(msg, F_SUBTYPE); if (entry == NULL) { crm_warn("Skipping callback - NULL callback client"); return; } else if (entry->callback == NULL) { crm_warn("Skipping callback - NULL callback"); return; } else if (safe_str_neq(entry->event, event)) { crm_trace("Skipping callback - event mismatch %p/%s vs. %s", entry, entry->event, event); return; } crm_trace("Invoking callback for %p/%s event...", entry, event); entry->callback(event, msg); crm_trace("Callback invoked..."); } pe_cluster_option cib_opts[] = { /* name, old-name, validate, default, description */ {"enable-acl", NULL, "boolean", NULL, "false", &check_boolean, "Enable CIB ACL", NULL} , }; void cib_metadata(void) { config_metadata("Cluster Information Base", "1.0", "Cluster Information Base Options", "This is a fake resource that details the options that can be configured for the Cluster Information Base.", cib_opts, DIMOF(cib_opts)); } void verify_cib_options(GHashTable * options) { verify_all_options(options, cib_opts, DIMOF(cib_opts)); } const char * cib_pref(GHashTable * options, const char *name) { return get_cluster_pref(options, cib_opts, DIMOF(cib_opts), name); } gboolean cib_read_config(GHashTable * options, xmlNode * current_cib) { xmlNode *config = NULL; crm_time_t *now = NULL; if (options == NULL || current_cib == NULL) { return FALSE; } now = crm_time_new(NULL); g_hash_table_remove_all(options); config = get_object_root(XML_CIB_TAG_CRMCONFIG, current_cib); if (config) { unpack_instance_attributes(current_cib, config, XML_CIB_TAG_PROPSET, NULL, options, CIB_OPTIONS_FIRST, FALSE, now); } verify_cib_options(options); crm_time_free(now); return TRUE; } int cib_apply_patch_event(xmlNode * event, xmlNode * input, xmlNode ** output, int level) { int rc = pcmk_err_generic; xmlNode *diff = NULL; CRM_ASSERT(event); CRM_ASSERT(input); CRM_ASSERT(output); crm_element_value_int(event, F_CIB_RC, &rc); diff = get_message_xml(event, F_CIB_UPDATE_RESULT); if (rc < pcmk_ok || diff == NULL) { return rc; } if (level > LOG_CRIT) { xml_log_patchset(level, "Config update", diff); } if (input != NULL) { rc = cib_process_diff(NULL, cib_none, NULL, event, diff, input, output, NULL); if (rc != pcmk_ok) { crm_debug("Update didn't apply: %s (%d) %p", pcmk_strerror(rc), rc, *output); if (rc == -pcmk_err_old_data) { crm_trace("Masking error, we already have the supplied update"); return pcmk_ok; } free_xml(*output); *output = NULL; return rc; } } return rc; } gboolean cib_internal_config_changed(xmlNode * diff) { gboolean changed = FALSE; xmlXPathObject *xpathObj = NULL; if (diff == NULL) { return FALSE; } xpathObj = xpath_search(diff, "//" XML_CIB_TAG_CRMCONFIG); if (numXpathResults(xpathObj) > 0) { changed = TRUE; } freeXpathObject(xpathObj); return changed; } int cib_internal_op(cib_t * cib, const char *op, const char *host, const char *section, xmlNode * data, xmlNode ** output_data, int call_options, const char *user_name) { int (*delegate) (cib_t * cib, const char *op, const char *host, const char *section, xmlNode * data, xmlNode ** output_data, int call_options, const char *user_name) = cib->delegate_fn; #if ENABLE_ACL if(user_name == NULL) { user_name = getenv("CIB_user"); } #endif return delegate(cib, op, host, section, data, output_data, call_options, user_name); } diff --git a/lib/common/Makefile.am b/lib/common/Makefile.am index f5c07660ce..a593f40237 100644 --- a/lib/common/Makefile.am +++ b/lib/common/Makefile.am @@ -1,49 +1,49 @@ # # Copyright (C) 2004 Andrew Beekhof # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include \ -I$(top_builddir)/libltdl -I$(top_srcdir)/libltdl \ -I$(top_builddir)/lib/gnu -I$(top_srcdir)/lib/gnu ## libraries lib_LTLIBRARIES = libcrmcommon.la # Can't use -Wcast-qual here because glib insists on pretending things are const # when they're not and thus we need the crm_element_value_const() hack # s390 needs -fPIC # s390-suse-linux/bin/ld: .libs/ipc.o: relocation R_390_PC32DBL against `__stack_chk_fail@@GLIBC_2.4' can not be used when making a shared object; recompile with -fPIC CFLAGS = $(CFLAGS_COPY:-Wcast-qual=) -fPIC libcrmcommon_la_SOURCES = compat.c digest.c ipc.c io.c utils.c xml.c iso8601.c remote.c mainloop.c logging.c watchdog.c if BUILD_CIBSECRETS libcrmcommon_la_SOURCES += cib_secrets.c endif -libcrmcommon_la_LDFLAGS = -version-info 8:0:5 +libcrmcommon_la_LDFLAGS = -version-info 7:0:4 libcrmcommon_la_LIBADD = @LIBADD_DL@ $(GNUTLSLIBS) libcrmcommon_la_SOURCES += $(top_builddir)/lib/gnu/md5.c clean-generic: rm -f *.log *.debug *.xml *~ install-exec-local: uninstall-local: diff --git a/lib/common/xml.c b/lib/common/xml.c index e272049cca..8eed245fe0 100644 --- a/lib/common/xml.c +++ b/lib/common/xml.c @@ -1,6006 +1,6023 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if HAVE_BZLIB_H # include #endif #if HAVE_LIBXML2 # include # include # include #endif #if HAVE_LIBXSLT # include # include #endif #define XML_BUFFER_SIZE 4096 #define XML_PARSER_DEBUG 0 void xml_log(int priority, const char *fmt, ...) G_GNUC_PRINTF(2, 3); static inline int __get_prefix(const char *prefix, xmlNode *xml, char *buffer, int offset); void xml_log(int priority, const char *fmt, ...) { va_list ap; va_start(ap, fmt); qb_log_from_external_source_va(__FUNCTION__, __FILE__, fmt, priority, __LINE__, 0, ap); va_end(ap); } typedef struct { xmlRelaxNGPtr rng; xmlRelaxNGValidCtxtPtr valid; xmlRelaxNGParserCtxtPtr parser; } relaxng_ctx_cache_t; struct schema_s { int type; float version; char *name; char *location; char *transform; int after_transform; void *cache; }; typedef struct { int found; const char *string; } filter_t; enum xml_private_flags { xpf_none = 0x0000, xpf_dirty = 0x0001, xpf_deleted = 0x0002, xpf_created = 0x0004, xpf_modified = 0x0008, xpf_tracking = 0x0010, xpf_processed = 0x0020, xpf_skip = 0x0040, xpf_moved = 0x0080, xpf_acl_enabled = 0x0100, xpf_acl_read = 0x0200, xpf_acl_write = 0x0400, xpf_acl_deny = 0x0800, xpf_acl_create = 0x1000, xpf_acl_denied = 0x2000, }; typedef struct xml_private_s { long check; uint32_t flags; char *user; GListPtr acls; GListPtr deleted_paths; } xml_private_t; typedef struct xml_acl_s { enum xml_private_flags mode; char *xpath; } xml_acl_t; /* *INDENT-OFF* */ static filter_t filter[] = { { 0, XML_ATTR_ORIGIN }, { 0, XML_CIB_ATTR_WRITTEN }, { 0, XML_ATTR_UPDATE_ORIG }, { 0, XML_ATTR_UPDATE_CLIENT }, { 0, XML_ATTR_UPDATE_USER }, }; /* *INDENT-ON* */ static struct schema_s *known_schemas = NULL; static int xml_schema_max = 0; static xmlNode *subtract_xml_comment(xmlNode * parent, xmlNode * left, xmlNode * right, gboolean * changed); static xmlNode *find_xml_comment(xmlNode * root, xmlNode * search_comment); static int add_xml_comment(xmlNode * parent, xmlNode * target, xmlNode * update); static bool __xml_acl_check(xmlNode *xml, const char *name, enum xml_private_flags mode); const char *__xml_acl_to_text(enum xml_private_flags flags); static int xml_latest_schema_index(void) { return xml_schema_max - 4; } static int xml_minimum_schema_index(void) { static int best = 0; if(best == 0) { int lpc = 0; float target = 0.0; best = xml_latest_schema_index(); target = floor(known_schemas[best].version); for(lpc = best; lpc > 0; lpc--) { if(known_schemas[lpc].version < target) { return best; } else { best = lpc; } } best = xml_latest_schema_index(); } return best; } const char * xml_latest_schema(void) { return get_schema_name(xml_latest_schema_index()); } #define CHUNK_SIZE 1024 static inline bool TRACKING_CHANGES(xmlNode *xml) { if(xml == NULL || xml->doc == NULL || xml->doc->_private == NULL) { return FALSE; } else if(is_set(((xml_private_t *)xml->doc->_private)->flags, xpf_tracking)) { return TRUE; } return FALSE; } #define buffer_print(buffer, max, offset, fmt, args...) do { \ int rc = (max); \ if(buffer) { \ rc = snprintf((buffer) + (offset), (max) - (offset), fmt, ##args); \ } \ if(buffer && rc < 0) { \ crm_perror(LOG_ERR, "snprintf failed at offset %d", offset); \ (buffer)[(offset)] = 0; \ } else if(rc >= ((max) - (offset))) { \ char *tmp = NULL; \ (max) = QB_MAX(CHUNK_SIZE, (max) * 2); \ tmp = realloc_safe((buffer), (max) + 1); \ CRM_ASSERT(tmp); \ (buffer) = tmp; \ } else { \ offset += rc; \ break; \ } \ } while(1); static void insert_prefix(int options, char **buffer, int *offset, int *max, int depth) { if (options & xml_log_option_formatted) { size_t spaces = 2 * depth; if ((*buffer) == NULL || spaces >= ((*max) - (*offset))) { (*max) = QB_MAX(CHUNK_SIZE, (*max) * 2); (*buffer) = realloc_safe((*buffer), (*max) + 1); } memset((*buffer) + (*offset), ' ', spaces); (*offset) += spaces; } } static const char * get_schema_root(void) { static const char *base = NULL; if (base == NULL) { base = getenv("PCMK_schema_directory"); } if (base == NULL || strlen(base) == 0) { base = CRM_DTD_DIRECTORY; } return base; } static char * get_schema_path(const char *name, const char *file) { const char *base = get_schema_root(); if(file) { return crm_strdup_printf("%s/%s", base, file); } return crm_strdup_printf("%s/%s.rng", base, name); } static int schema_filter(const struct dirent * a) { int rc = 0; float version = 0; if(strstr(a->d_name, "pacemaker-") != a->d_name) { /* crm_trace("%s - wrong prefix", a->d_name); */ } else if(strstr(a->d_name, ".rng") == NULL) { /* crm_trace("%s - wrong suffix", a->d_name); */ } else if(sscanf(a->d_name, "pacemaker-%f.rng", &version) == 0) { /* crm_trace("%s - wrong format", a->d_name); */ } else if(strcmp(a->d_name, "pacemaker-1.1.rng") == 0) { /* crm_trace("%s - hack", a->d_name); */ } else { /* crm_debug("%s - candidate", a->d_name); */ rc = 1; } return rc; } static int schema_sort(const struct dirent ** a, const struct dirent **b) { int rc = 0; float a_version = 0.0; float b_version = 0.0; sscanf(a[0]->d_name, "pacemaker-%f.rng", &a_version); sscanf(b[0]->d_name, "pacemaker-%f.rng", &b_version); if(a_version > b_version) { rc = 1; } else if(a_version < b_version) { rc = -1; } /* crm_trace("%s (%f) vs. %s (%f) : %d", a[0]->d_name, a_version, b[0]->d_name, b_version, rc); */ return rc; } static void __xml_schema_add( int type, float version, const char *name, const char *location, const char *transform, int after_transform) { int last = xml_schema_max; xml_schema_max++; known_schemas = realloc_safe(known_schemas, xml_schema_max*sizeof(struct schema_s)); CRM_ASSERT(known_schemas != NULL); memset(known_schemas+last, 0, sizeof(struct schema_s)); known_schemas[last].type = type; known_schemas[last].after_transform = after_transform; if(version > 0.0) { known_schemas[last].version = version; known_schemas[last].name = crm_strdup_printf("pacemaker-%.1f", version); known_schemas[last].location = crm_strdup_printf("%s.rng", known_schemas[last].name); } else { char dummy[1024]; CRM_ASSERT(name); CRM_ASSERT(location); sscanf(name, "%[^-]-%f", dummy, &version); known_schemas[last].version = version; known_schemas[last].name = strdup(name); known_schemas[last].location = strdup(location); } if(transform) { known_schemas[last].transform = strdup(transform); } if(after_transform == 0) { after_transform = xml_schema_max; } known_schemas[last].after_transform = after_transform; if(known_schemas[last].after_transform < 0) { crm_debug("Added supported schema %d: %s (%s)", last, known_schemas[last].name, known_schemas[last].location); } else if(known_schemas[last].transform) { crm_debug("Added supported schema %d: %s (%s upgrades to %d with %s)", last, known_schemas[last].name, known_schemas[last].location, known_schemas[last].after_transform, known_schemas[last].transform); } else { crm_debug("Added supported schema %d: %s (%s upgrades to %d)", last, known_schemas[last].name, known_schemas[last].location, known_schemas[last].after_transform); } } static int __xml_build_schema_list(void) { int lpc, max; const char *base = get_schema_root(); struct dirent **namelist = NULL; max = scandir(base, &namelist, schema_filter, schema_sort); __xml_schema_add(1, 0.0, "pacemaker-0.6", "crm.dtd", "upgrade06.xsl", 3); __xml_schema_add(1, 0.0, "transitional-0.6", "crm-transitional.dtd", "upgrade06.xsl", 3); __xml_schema_add(2, 0.0, "pacemaker-0.7", "pacemaker-1.0.rng", NULL, 0); if (max < 0) { crm_notice("scandir(%s) failed: %s (%d)", base, strerror(errno), errno); } else { for (lpc = 0; lpc < max; lpc++) { int next = 0; float version = 0.0; char *transform = NULL; sscanf(namelist[lpc]->d_name, "pacemaker-%f.rng", &version); if((lpc + 1) < max) { float next_version = 0.0; sscanf(namelist[lpc+1]->d_name, "pacemaker-%f.rng", &next_version); if(floor(version) < floor(next_version)) { struct stat s; char *xslt = NULL; transform = crm_strdup_printf("upgrade-%.1f.xsl", version); xslt = get_schema_path(NULL, transform); if(stat(xslt, &s) != 0) { crm_err("Transform %s not found", xslt); free(xslt); __xml_schema_add(2, version, NULL, NULL, NULL, -1); break; } else { free(xslt); } } } else { next = -1; } __xml_schema_add(2, version, NULL, NULL, transform, next); free(namelist[lpc]); free(transform); } } /* 1.1 was the old name for -next */ __xml_schema_add(2, 0.0, "pacemaker-1.1", "pacemaker-next.rng", NULL, 0); __xml_schema_add(2, 0.0, "pacemaker-next", "pacemaker-next.rng", NULL, -1); __xml_schema_add(0, 0.0, "none", "N/A", NULL, -1); free(namelist); return TRUE; } static void set_parent_flag(xmlNode *xml, long flag) { for(; xml; xml = xml->parent) { xml_private_t *p = xml->_private; if(p == NULL) { /* During calls to xmlDocCopyNode(), _private will be unset for parent nodes */ } else { p->flags |= flag; /* crm_trace("Setting flag %x due to %s[@id=%s]", flag, xml->name, ID(xml)); */ } } } static void set_doc_flag(xmlNode *xml, long flag) { if(xml && xml->doc && xml->doc->_private){ /* During calls to xmlDocCopyNode(), xml->doc may be unset */ xml_private_t *p = xml->doc->_private; p->flags |= flag; /* crm_trace("Setting flag %x due to %s[@id=%s]", flag, xml->name, ID(xml)); */ } } static void __xml_node_dirty(xmlNode *xml) { set_doc_flag(xml, xpf_dirty); set_parent_flag(xml, xpf_dirty); } static void __xml_node_clean(xmlNode *xml) { xmlNode *cIter = NULL; xml_private_t *p = xml->_private; if(p) { p->flags = 0; } for (cIter = __xml_first_child(xml); cIter != NULL; cIter = __xml_next(cIter)) { __xml_node_clean(cIter); } } static void crm_node_created(xmlNode *xml) { xmlNode *cIter = NULL; xml_private_t *p = xml->_private; if(p && TRACKING_CHANGES(xml)) { if(is_not_set(p->flags, xpf_created)) { p->flags |= xpf_created; __xml_node_dirty(xml); } for (cIter = __xml_first_child(xml); cIter != NULL; cIter = __xml_next(cIter)) { crm_node_created(cIter); } } } static void crm_attr_dirty(xmlAttr *a) { xmlNode *parent = a->parent; xml_private_t *p = NULL; p = a->_private; p->flags |= (xpf_dirty|xpf_modified); p->flags = (p->flags & ~xpf_deleted); /* crm_trace("Setting flag %x due to %s[@id=%s, @%s=%s]", */ /* xpf_dirty, parent?parent->name:NULL, ID(parent), a->name, a->children->content); */ __xml_node_dirty(parent); } int get_tag_name(const char *input, size_t offset, size_t max); int get_attr_name(const char *input, size_t offset, size_t max); int get_attr_value(const char *input, size_t offset, size_t max); gboolean can_prune_leaf(xmlNode * xml_node); void diff_filter_context(int context, int upper_bound, int lower_bound, xmlNode * xml_node, xmlNode * parent); int in_upper_context(int depth, int context, xmlNode * xml_node); int add_xml_object(xmlNode * parent, xmlNode * target, xmlNode * update, gboolean as_diff); static inline const char * crm_attr_value(xmlAttr * attr) { if (attr == NULL || attr->children == NULL) { return NULL; } return (const char *)attr->children->content; } static inline xmlAttr * crm_first_attr(xmlNode * xml) { if (xml == NULL) { return NULL; } return xml->properties; } #define XML_PRIVATE_MAGIC (long) 0x81726354 static void __xml_acl_free(void *data) { if(data) { xml_acl_t *acl = data; free(acl->xpath); free(acl); } } static void __xml_private_clean(xml_private_t *p) { if(p) { CRM_ASSERT(p->check == XML_PRIVATE_MAGIC); free(p->user); p->user = NULL; if(p->acls) { g_list_free_full(p->acls, __xml_acl_free); p->acls = NULL; } if(p->deleted_paths) { g_list_free_full(p->deleted_paths, free); p->deleted_paths = NULL; } } } static void __xml_private_free(xml_private_t *p) { __xml_private_clean(p); free(p); } static void pcmkDeregisterNode(xmlNodePtr node) { __xml_private_free(node->_private); } static void pcmkRegisterNode(xmlNodePtr node) { xml_private_t *p = NULL; switch(node->type) { case XML_ELEMENT_NODE: case XML_DOCUMENT_NODE: case XML_ATTRIBUTE_NODE: case XML_COMMENT_NODE: p = calloc(1, sizeof(xml_private_t)); p->check = XML_PRIVATE_MAGIC; /* Flags will be reset if necessary when tracking is enabled */ p->flags |= (xpf_dirty|xpf_created); node->_private = p; break; case XML_TEXT_NODE: case XML_DTD_NODE: break; default: /* Ignore */ crm_trace("Ignoring %p %d", node, node->type); CRM_LOG_ASSERT(node->type == XML_ELEMENT_NODE); break; } if(p && TRACKING_CHANGES(node)) { /* XML_ELEMENT_NODE doesn't get picked up here, node->doc is * not hooked up at the point we are called */ set_doc_flag(node, xpf_dirty); __xml_node_dirty(node); } } static xml_acl_t * __xml_acl_create(xmlNode * xml, xmlNode *target, enum xml_private_flags mode) { xml_acl_t *acl = NULL; xml_private_t *p = NULL; const char *tag = crm_element_value(xml, XML_ACL_ATTR_TAG); const char *ref = crm_element_value(xml, XML_ACL_ATTR_REF); const char *xpath = crm_element_value(xml, XML_ACL_ATTR_XPATH); if(tag == NULL) { /* Compatability handling for pacemaker < 1.1.12 */ tag = crm_element_value(xml, XML_ACL_ATTR_TAGv1); } if(ref == NULL) { /* Compatability handling for pacemaker < 1.1.12 */ ref = crm_element_value(xml, XML_ACL_ATTR_REFv1); } if(target == NULL || target->doc == NULL || target->doc->_private == NULL){ CRM_ASSERT(target); CRM_ASSERT(target->doc); CRM_ASSERT(target->doc->_private); return NULL; } else if (tag == NULL && ref == NULL && xpath == NULL) { crm_trace("No criteria %p", xml); return NULL; } p = target->doc->_private; acl = calloc(1, sizeof(xml_acl_t)); if (acl) { const char *attr = crm_element_value(xml, XML_ACL_ATTR_ATTRIBUTE); acl->mode = mode; if(xpath) { acl->xpath = strdup(xpath); crm_trace("Using xpath: %s", acl->xpath); } else { int offset = 0; char buffer[XML_BUFFER_SIZE]; if(tag) { offset += snprintf(buffer + offset, XML_BUFFER_SIZE - offset, "//%s", tag); } else { offset += snprintf(buffer + offset, XML_BUFFER_SIZE - offset, "//*"); } if(ref || attr) { offset += snprintf(buffer + offset, XML_BUFFER_SIZE - offset, "["); } if(ref) { offset += snprintf(buffer + offset, XML_BUFFER_SIZE - offset, "@id='%s'", ref); } if(ref && attr) { offset += snprintf(buffer + offset, XML_BUFFER_SIZE - offset, " and "); } if(attr) { offset += snprintf(buffer + offset, XML_BUFFER_SIZE - offset, "@%s", attr); } if(ref || attr) { offset += snprintf(buffer + offset, XML_BUFFER_SIZE - offset, "]"); } CRM_LOG_ASSERT(offset > 0); acl->xpath = strdup(buffer); crm_trace("Built xpath: %s", acl->xpath); } p->acls = g_list_append(p->acls, acl); } return acl; } static gboolean __xml_acl_parse_entry(xmlNode * acl_top, xmlNode * acl_entry, xmlNode *target) { xmlNode *child = NULL; for (child = __xml_first_child(acl_entry); child; child = __xml_next(child)) { const char *tag = crm_element_name(child); const char *kind = crm_element_value(child, XML_ACL_ATTR_KIND); if (strcmp(XML_ACL_TAG_PERMISSION, tag) == 0){ tag = kind; } crm_trace("Processing %s %p", tag, child); if(tag == NULL) { CRM_ASSERT(tag != NULL); } else if (strcmp(XML_ACL_TAG_ROLE_REF, tag) == 0 || strcmp(XML_ACL_TAG_ROLE_REFv1, tag) == 0) { const char *ref_role = crm_element_value(child, XML_ATTR_ID); if (ref_role) { xmlNode *role = NULL; for (role = __xml_first_child(acl_top); role; role = __xml_next(role)) { if (strcmp(XML_ACL_TAG_ROLE, (const char *)role->name) == 0) { const char *role_id = crm_element_value(role, XML_ATTR_ID); if (role_id && strcmp(ref_role, role_id) == 0) { crm_debug("Unpacking referenced role: %s", role_id); __xml_acl_parse_entry(acl_top, role, target); break; } } } } } else if (strcmp(XML_ACL_TAG_READ, tag) == 0) { __xml_acl_create(child, target, xpf_acl_read); } else if (strcmp(XML_ACL_TAG_WRITE, tag) == 0) { __xml_acl_create(child, target, xpf_acl_write); } else if (strcmp(XML_ACL_TAG_DENY, tag) == 0) { __xml_acl_create(child, target, xpf_acl_deny); } else { crm_warn("Unknown ACL entry: %s/%s", tag, kind); } } return TRUE; } /* */ const char * __xml_acl_to_text(enum xml_private_flags flags) { if(is_set(flags, xpf_acl_deny)) { return "deny"; } if(is_set(flags, xpf_acl_write)) { return "read/write"; } if(is_set(flags, xpf_acl_read)) { return "read"; } return "none"; } static void __xml_acl_apply(xmlNode *xml) { GListPtr aIter = NULL; xml_private_t *p = NULL; xmlXPathObjectPtr xpathObj = NULL; if(xml_acl_enabled(xml) == FALSE) { p = xml->doc->_private; crm_trace("Not applying ACLs for %s", p->user); return; } p = xml->doc->_private; for(aIter = p->acls; aIter != NULL; aIter = aIter->next) { int max = 0, lpc = 0; xml_acl_t *acl = aIter->data; xpathObj = xpath_search(xml, acl->xpath); max = numXpathResults(xpathObj); for(lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); char *path = xml_get_path(match); p = match->_private; crm_trace("Applying %x to %s for %s", acl->mode, path, acl->xpath); #ifdef SUSE_ACL_COMPAT if(is_not_set(p->flags, acl->mode)) { if(is_set(p->flags, xpf_acl_read) || is_set(p->flags, xpf_acl_write) || is_set(p->flags, xpf_acl_deny)) { crm_config_warn("Configuration element %s is matched by multiple ACL rules, only the first applies ('%s' wins over '%s')", path, __xml_acl_to_text(p->flags), __xml_acl_to_text(acl->mode)); free(path); continue; } } #endif p->flags |= acl->mode; free(path); } crm_trace("Now enforcing ACL: %s (%d matches)", acl->xpath, max); freeXpathObject(xpathObj); } p = xml->_private; if(is_not_set(p->flags, xpf_acl_read) && is_not_set(p->flags, xpf_acl_write)) { p->flags |= xpf_acl_deny; p = xml->doc->_private; crm_info("Enforcing default ACL for %s to %s", p->user, crm_element_name(xml)); } } static void __xml_acl_unpack(xmlNode *source, xmlNode *target, const char *user) { #if ENABLE_ACL xml_private_t *p = NULL; if(target == NULL || target->doc == NULL || target->doc->_private == NULL) { return; } p = target->doc->_private; if(pcmk_acl_required(user) == FALSE) { crm_trace("no acls needed for '%s'", user); } else if(p->acls == NULL) { xmlNode *acls = get_xpath_object("//"XML_CIB_TAG_ACLS, source, LOG_TRACE); free(p->user); p->user = strdup(user); if(acls) { xmlNode *child = NULL; for (child = __xml_first_child(acls); child; child = __xml_next(child)) { const char *tag = crm_element_name(child); if (strcmp(tag, XML_ACL_TAG_USER) == 0 || strcmp(tag, XML_ACL_TAG_USERv1) == 0) { const char *id = crm_element_value(child, XML_ATTR_ID); if(id && strcmp(id, user) == 0) { crm_debug("Unpacking ACLs for %s", id); __xml_acl_parse_entry(acls, child, target); } } } } } #endif } static inline bool __xml_acl_mode_test(enum xml_private_flags allowed, enum xml_private_flags requested) { if(is_set(allowed, xpf_acl_deny)) { return FALSE; } else if(is_set(allowed, requested)) { return TRUE; } else if(is_set(requested, xpf_acl_read) && is_set(allowed, xpf_acl_write)) { return TRUE; } else if(is_set(requested, xpf_acl_create) && is_set(allowed, xpf_acl_write)) { return TRUE; } else if(is_set(requested, xpf_acl_create) && is_set(allowed, xpf_created)) { return TRUE; } return FALSE; } /* rc = TRUE if orig_cib has been filtered * That means '*result' rather than 'xml' should be exploited afterwards */ static bool __xml_purge_attributes(xmlNode *xml) { xmlNode *child = NULL; xmlAttr *xIter = NULL; bool readable_children = FALSE; xml_private_t *p = xml->_private; if(__xml_acl_mode_test(p->flags, xpf_acl_read)) { crm_trace("%s is readable", crm_element_name(xml), ID(xml)); return TRUE; } xIter = crm_first_attr(xml); while(xIter != NULL) { xmlAttr *tmp = xIter; const char *prop_name = (const char *)xIter->name; xIter = xIter->next; if (strcmp(prop_name, XML_ATTR_ID) == 0) { continue; } xmlUnsetProp(xml, tmp->name); } child = __xml_first_child(xml); while ( child != NULL ) { xmlNode *tmp = child; child = __xml_next(child); readable_children |= __xml_purge_attributes(tmp); } if(readable_children == FALSE) { free_xml(xml); /* Nothing readable under here, purge completely */ } return readable_children; } bool xml_acl_filtered_copy(const char *user, xmlNode* acl_source, xmlNode *xml, xmlNode ** result) { GListPtr aIter = NULL; xmlNode *target = NULL; xml_private_t *p = NULL; xml_private_t *doc = NULL; *result = NULL; if(xml == NULL || pcmk_acl_required(user) == FALSE) { crm_trace("no acls needed for '%s'", user); return FALSE; } crm_trace("filtering copy of %p for '%s'", xml, user); target = copy_xml(xml); if(target == NULL) { return TRUE; } __xml_acl_unpack(acl_source, target, user); set_doc_flag(target, xpf_acl_enabled); __xml_acl_apply(target); doc = target->doc->_private; for(aIter = doc->acls; aIter != NULL && target; aIter = aIter->next) { int max = 0; xml_acl_t *acl = aIter->data; if(acl->mode != xpf_acl_deny) { /* Nothing to do */ } else if(acl->xpath) { int lpc = 0; xmlXPathObjectPtr xpathObj = xpath_search(target, acl->xpath); max = numXpathResults(xpathObj); for(lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); crm_trace("Purging attributes from %s", acl->xpath); if(__xml_purge_attributes(match) == FALSE && match == target) { crm_trace("No access to the entire document for %s", user); freeXpathObject(xpathObj); return TRUE; } } crm_trace("Enforced ACL %s (%d matches)", acl->xpath, max); freeXpathObject(xpathObj); } } p = target->_private; if(is_set(p->flags, xpf_acl_deny) && __xml_purge_attributes(target) == FALSE) { crm_trace("No access to the entire document for %s", user); return TRUE; } if(doc->acls) { g_list_free_full(doc->acls, __xml_acl_free); doc->acls = NULL; } else { crm_trace("Ordinary user '%s' cannot access the CIB without any defined ACLs", doc->user); free_xml(target); target = NULL; } if(target) { *result = target; } return TRUE; } static void __xml_acl_post_process(xmlNode * xml) { xmlNode *cIter = __xml_first_child(xml); xml_private_t *p = xml->_private; if(is_set(p->flags, xpf_created)) { xmlAttr *xIter = NULL; char *path = xml_get_path(xml); /* Always allow new scaffolding, ie. node with no attributes or only an 'id' * Except in the ACLs section */ for (xIter = crm_first_attr(xml); xIter != NULL; xIter = xIter->next) { const char *prop_name = (const char *)xIter->name; if (strcmp(prop_name, XML_ATTR_ID) == 0 && strstr(path, "/"XML_CIB_TAG_ACLS"/") == NULL) { /* Delay the acl check */ continue; } else if(__xml_acl_check(xml, NULL, xpf_acl_write)) { crm_trace("Creation of %s=%s is allowed", crm_element_name(xml), ID(xml)); break; } else { crm_trace("Cannot add new node %s at %s", crm_element_name(xml), path); if(xml != xmlDocGetRootElement(xml->doc)) { xmlUnlinkNode(xml); xmlFreeNode(xml); } free(path); return; } } free(path); } while (cIter != NULL) { xmlNode *child = cIter; cIter = __xml_next(cIter); /* In case it is free'd */ __xml_acl_post_process(child); } } bool xml_acl_denied(xmlNode *xml) { if(xml && xml->doc && xml->doc->_private){ xml_private_t *p = xml->doc->_private; return is_set(p->flags, xpf_acl_denied); } return FALSE; } void xml_acl_disable(xmlNode *xml) { if(xml_acl_enabled(xml)) { xml_private_t *p = xml->doc->_private; /* Catch anything that was created but shouldn't have been */ __xml_acl_apply(xml); __xml_acl_post_process(xml); clear_bit(p->flags, xpf_acl_enabled); } } bool xml_acl_enabled(xmlNode *xml) { if(xml && xml->doc && xml->doc->_private){ xml_private_t *p = xml->doc->_private; return is_set(p->flags, xpf_acl_enabled); } return FALSE; } void xml_track_changes(xmlNode * xml, const char *user, xmlNode *acl_source, bool enforce_acls) { xml_accept_changes(xml); crm_trace("Tracking changes%s to %p", enforce_acls?" with ACLs":"", xml); set_doc_flag(xml, xpf_tracking); if(enforce_acls) { if(acl_source == NULL) { acl_source = xml; } set_doc_flag(xml, xpf_acl_enabled); __xml_acl_unpack(acl_source, xml, user); __xml_acl_apply(xml); } } bool xml_tracking_changes(xmlNode * xml) { if(xml == NULL) { return FALSE; } else if(is_set(((xml_private_t *)xml->doc->_private)->flags, xpf_tracking)) { return TRUE; } return FALSE; } bool xml_document_dirty(xmlNode *xml) { if(xml != NULL && xml->doc && xml->doc->_private) { xml_private_t *doc = xml->doc->_private; return is_set(doc->flags, xpf_dirty); } return FALSE; } /* */ static int __xml_offset(xmlNode *xml) { int position = 0; xmlNode *cIter = NULL; for(cIter = xml; cIter->prev; cIter = cIter->prev) { xml_private_t *p = ((xmlNode*)cIter->prev)->_private; if(is_not_set(p->flags, xpf_skip)) { position++; } } return position; } static int __xml_offset_no_deletions(xmlNode *xml) { int position = 0; xmlNode *cIter = NULL; for(cIter = xml; cIter->prev; cIter = cIter->prev) { xml_private_t *p = ((xmlNode*)cIter->prev)->_private; if(is_not_set(p->flags, xpf_deleted)) { position++; } } return position; } static void __xml_build_changes(xmlNode * xml, xmlNode *patchset) { xmlNode *cIter = NULL; xmlAttr *pIter = NULL; xmlNode *change = NULL; xml_private_t *p = xml->_private; if(patchset && is_set(p->flags, xpf_created)) { int offset = 0; char buffer[XML_BUFFER_SIZE]; if(__get_prefix(NULL, xml->parent, buffer, offset) > 0) { int position = __xml_offset_no_deletions(xml); change = create_xml_node(patchset, XML_DIFF_CHANGE); crm_xml_add(change, XML_DIFF_OP, "create"); crm_xml_add(change, XML_DIFF_PATH, buffer); crm_xml_add_int(change, XML_DIFF_POSITION, position); add_node_copy(change, xml); } return; } for (pIter = crm_first_attr(xml); pIter != NULL; pIter = pIter->next) { xmlNode *attr = NULL; p = pIter->_private; if(is_not_set(p->flags, xpf_deleted) && is_not_set(p->flags, xpf_dirty)) { continue; } if(change == NULL) { int offset = 0; char buffer[XML_BUFFER_SIZE]; if(__get_prefix(NULL, xml, buffer, offset) > 0) { change = create_xml_node(patchset, XML_DIFF_CHANGE); crm_xml_add(change, XML_DIFF_OP, "modify"); crm_xml_add(change, XML_DIFF_PATH, buffer); change = create_xml_node(change, XML_DIFF_LIST); } } attr = create_xml_node(change, XML_DIFF_ATTR); crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, (const char *)pIter->name); if(p->flags & xpf_deleted) { crm_xml_add(attr, XML_DIFF_OP, "unset"); } else { const char *value = crm_element_value(xml, (const char *)pIter->name); crm_xml_add(attr, XML_DIFF_OP, "set"); crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, value); } } if(change) { xmlNode *result = NULL; change = create_xml_node(change->parent, XML_DIFF_RESULT); result = create_xml_node(change, (const char *)xml->name); for (pIter = crm_first_attr(xml); pIter != NULL; pIter = pIter->next) { const char *value = crm_element_value(xml, (const char *)pIter->name); p = pIter->_private; if (is_not_set(p->flags, xpf_deleted)) { crm_xml_add(result, (const char *)pIter->name, value); } } } for (cIter = __xml_first_child(xml); cIter != NULL; cIter = __xml_next(cIter)) { __xml_build_changes(cIter, patchset); } p = xml->_private; if(patchset && is_set(p->flags, xpf_moved)) { int offset = 0; char buffer[XML_BUFFER_SIZE]; crm_trace("%s.%s moved to position %d", xml->name, ID(xml), __xml_offset(xml)); if(__get_prefix(NULL, xml, buffer, offset) > 0) { change = create_xml_node(patchset, XML_DIFF_CHANGE); crm_xml_add(change, XML_DIFF_OP, "move"); crm_xml_add(change, XML_DIFF_PATH, buffer); crm_xml_add_int(change, XML_DIFF_POSITION, __xml_offset_no_deletions(xml)); } } } static void __xml_accept_changes(xmlNode * xml) { xmlNode *cIter = NULL; xmlAttr *pIter = NULL; xml_private_t *p = xml->_private; p->flags = xpf_none; pIter = crm_first_attr(xml); while (pIter != NULL) { const xmlChar *name = pIter->name; p = pIter->_private; pIter = pIter->next; if(p->flags & xpf_deleted) { xml_remove_prop(xml, (const char *)name); } else { p->flags = xpf_none; } } for (cIter = __xml_first_child(xml); cIter != NULL; cIter = __xml_next(cIter)) { __xml_accept_changes(cIter); } } static bool is_config_change(xmlNode *xml) { GListPtr gIter = NULL; xml_private_t *p = NULL; xmlNode *config = first_named_child(xml, XML_CIB_TAG_CONFIGURATION); if(config) { p = config->_private; } if(p && is_set(p->flags, xpf_dirty)) { return TRUE; } if(xml->doc && xml->doc->_private) { p = xml->doc->_private; for(gIter = p->deleted_paths; gIter; gIter = gIter->next) { char *path = gIter->data; if(strstr(path, "/"XML_TAG_CIB"/"XML_CIB_TAG_CONFIGURATION) != NULL) { return TRUE; } } } return FALSE; } static void xml_repair_v1_diff(xmlNode * last, xmlNode * next, xmlNode * local_diff, gboolean changed) { int lpc = 0; xmlNode *cib = NULL; xmlNode *diff_child = NULL; const char *tag = NULL; const char *vfields[] = { XML_ATTR_GENERATION_ADMIN, XML_ATTR_GENERATION, XML_ATTR_NUMUPDATES, }; if (local_diff == NULL) { crm_trace("Nothing to do"); return; } tag = "diff-removed"; diff_child = find_xml_node(local_diff, tag, FALSE); if (diff_child == NULL) { diff_child = create_xml_node(local_diff, tag); } tag = XML_TAG_CIB; cib = find_xml_node(diff_child, tag, FALSE); if (cib == NULL) { cib = create_xml_node(diff_child, tag); } for(lpc = 0; last && lpc < DIMOF(vfields); lpc++){ const char *value = crm_element_value(last, vfields[lpc]); crm_xml_add(diff_child, vfields[lpc], value); if(changed || lpc == 2) { crm_xml_add(cib, vfields[lpc], value); } } tag = "diff-added"; diff_child = find_xml_node(local_diff, tag, FALSE); if (diff_child == NULL) { diff_child = create_xml_node(local_diff, tag); } tag = XML_TAG_CIB; cib = find_xml_node(diff_child, tag, FALSE); if (cib == NULL) { cib = create_xml_node(diff_child, tag); } for(lpc = 0; next && lpc < DIMOF(vfields); lpc++){ const char *value = crm_element_value(next, vfields[lpc]); crm_xml_add(diff_child, vfields[lpc], value); } if (next) { xmlAttrPtr xIter = NULL; for (xIter = next->properties; xIter; xIter = xIter->next) { const char *p_name = (const char *)xIter->name; const char *p_value = crm_element_value(next, p_name); xmlSetProp(cib, (const xmlChar *)p_name, (const xmlChar *)p_value); } } crm_log_xml_explicit(local_diff, "Repaired-diff"); } static xmlNode * xml_create_patchset_v1(xmlNode *source, xmlNode *target, bool config, bool suppress) { xmlNode *patchset = diff_xml_object(source, target, suppress); if(patchset) { CRM_LOG_ASSERT(xml_document_dirty(target)); xml_repair_v1_diff(source, target, patchset, config); crm_xml_add(patchset, "format", "1"); } return patchset; } static xmlNode * xml_create_patchset_v2(xmlNode *source, xmlNode *target) { int lpc = 0; GListPtr gIter = NULL; xml_private_t *doc = NULL; xmlNode *v = NULL; xmlNode *version = NULL; xmlNode *patchset = NULL; const char *vfields[] = { XML_ATTR_GENERATION_ADMIN, XML_ATTR_GENERATION, XML_ATTR_NUMUPDATES, }; CRM_ASSERT(target); if(xml_document_dirty(target) == FALSE) { return NULL; } CRM_ASSERT(target->doc); doc = target->doc->_private; patchset = create_xml_node(NULL, XML_TAG_DIFF); crm_xml_add_int(patchset, "format", 2); version = create_xml_node(patchset, XML_DIFF_VERSION); v = create_xml_node(version, XML_DIFF_VSOURCE); for(lpc = 0; lpc < DIMOF(vfields); lpc++){ const char *value = crm_element_value(source, vfields[lpc]); if(value == NULL) { value = "1"; } crm_xml_add(v, vfields[lpc], value); } v = create_xml_node(version, XML_DIFF_VTARGET); for(lpc = 0; lpc < DIMOF(vfields); lpc++){ const char *value = crm_element_value(target, vfields[lpc]); if(value == NULL) { value = "1"; } crm_xml_add(v, vfields[lpc], value); } for(gIter = doc->deleted_paths; gIter; gIter = gIter->next) { xmlNode *change = create_xml_node(patchset, XML_DIFF_CHANGE); crm_xml_add(change, XML_DIFF_OP, "delete"); crm_xml_add(change, XML_DIFF_PATH, gIter->data); } __xml_build_changes(target, patchset); return patchset; } static gboolean patch_legacy_mode(void) { static gboolean init = TRUE; static gboolean legacy = FALSE; if(init) { init = FALSE; legacy = daemon_option_enabled("cib", "legacy"); if(legacy) { crm_notice("Enabled legacy mode"); } } return legacy; } xmlNode * xml_create_patchset(int format, xmlNode *source, xmlNode *target, bool *config_changed, bool manage_version) { int counter = 0; bool config = FALSE; xmlNode *patch = NULL; const char *version = crm_element_value(source, XML_ATTR_CRM_VERSION); xml_acl_disable(target); if(xml_document_dirty(target) == FALSE) { crm_trace("No change %d", format); return NULL; /* No change */ } config = is_config_change(target); if(config_changed) { *config_changed = config; } if(manage_version && config) { crm_trace("Config changed %d", format); crm_xml_add(target, XML_ATTR_NUMUPDATES, "0"); crm_element_value_int(target, XML_ATTR_GENERATION, &counter); crm_xml_add_int(target, XML_ATTR_GENERATION, counter+1); } else if(manage_version) { crm_trace("Status changed %d", format); crm_element_value_int(target, XML_ATTR_NUMUPDATES, &counter); crm_xml_add_int(target, XML_ATTR_NUMUPDATES, counter+1); } if(format == 0) { if(patch_legacy_mode()) { format = 1; } else if(compare_version("3.0.8", version) < 0) { format = 2; } else { format = 1; } crm_trace("Using patch format %d for version: %s", format, version); } switch(format) { case 1: patch = xml_create_patchset_v1(source, target, config, FALSE); break; case 2: patch = xml_create_patchset_v2(source, target); break; default: crm_err("Unknown patch format: %d", format); return NULL; } return patch; } void patchset_process_digest(xmlNode *patch, xmlNode *source, xmlNode *target, bool with_digest) { int format = 1; const char *version = NULL; char *digest = NULL; if (patch == NULL || source == NULL || target == NULL) { return; } /* NOTE: We should always call xml_accept_changes() before calculating digest. */ /* Otherwise, with an on-tracking dirty target, we could get a wrong digest. */ CRM_LOG_ASSERT(xml_document_dirty(target) == FALSE); crm_element_value_int(patch, "format", &format); if (format > 1 && with_digest == FALSE) { return; } version = crm_element_value(source, XML_ATTR_CRM_VERSION); digest = calculate_xml_versioned_digest(target, FALSE, TRUE, version); crm_xml_add(patch, XML_ATTR_DIGEST, digest); free(digest); return; } static void __xml_log_element(int log_level, const char *file, const char *function, int line, const char *prefix, xmlNode * data, int depth, int options); void xml_log_patchset(uint8_t log_level, const char *function, xmlNode * patchset) { int format = 1; xmlNode *child = NULL; xmlNode *added = NULL; xmlNode *removed = NULL; gboolean is_first = TRUE; int add[] = { 0, 0, 0 }; int del[] = { 0, 0, 0 }; const char *fmt = NULL; const char *digest = NULL; int options = xml_log_option_formatted; static struct qb_log_callsite *patchset_cs = NULL; if (patchset_cs == NULL) { patchset_cs = qb_log_callsite_get(function, __FILE__, "xml-patchset", log_level, __LINE__, 0); } if (patchset == NULL) { crm_trace("Empty patch"); return; } else if (log_level == 0) { /* Log to stdout */ } else if (crm_is_callsite_active(patchset_cs, log_level, 0) == FALSE) { return; } xml_patch_versions(patchset, add, del); fmt = crm_element_value(patchset, "format"); digest = crm_element_value(patchset, XML_ATTR_DIGEST); if (add[2] != del[2] || add[1] != del[1] || add[0] != del[0]) { do_crm_log_alias(log_level, __FILE__, function, __LINE__, "Diff: --- %d.%d.%d %s", del[0], del[1], del[2], fmt); do_crm_log_alias(log_level, __FILE__, function, __LINE__, "Diff: +++ %d.%d.%d %s", add[0], add[1], add[2], digest); } else if (patchset != NULL && (add[0] || add[1] || add[2])) { do_crm_log_alias(log_level, __FILE__, function, __LINE__, "%s: Local-only Change: %d.%d.%d", function ? function : "", add[0], add[1], add[2]); } crm_element_value_int(patchset, "format", &format); if(format == 2) { xmlNode *change = NULL; for (change = __xml_first_child(patchset); change != NULL; change = __xml_next(change)) { const char *op = crm_element_value(change, XML_DIFF_OP); const char *xpath = crm_element_value(change, XML_DIFF_PATH); if(op == NULL) { } else if(strcmp(op, "create") == 0) { int lpc = 0, max = 0; char *prefix = crm_strdup_printf("++ %s: ", xpath); max = strlen(prefix); __xml_log_element(log_level, __FILE__, function, __LINE__, prefix, change->children, 0, xml_log_option_formatted|xml_log_option_open); for(lpc = 2; lpc < max; lpc++) { prefix[lpc] = ' '; } __xml_log_element(log_level, __FILE__, function, __LINE__, prefix, change->children, 0, xml_log_option_formatted|xml_log_option_close|xml_log_option_children); free(prefix); } else if(strcmp(op, "move") == 0) { do_crm_log_alias(log_level, __FILE__, function, __LINE__, "+~ %s moved to offset %s", xpath, crm_element_value(change, XML_DIFF_POSITION)); } else if(strcmp(op, "modify") == 0) { xmlNode *clist = first_named_child(change, XML_DIFF_LIST); char buffer_set[XML_BUFFER_SIZE]; char buffer_unset[XML_BUFFER_SIZE]; int o_set = 0; int o_unset = 0; buffer_set[0] = 0; buffer_unset[0] = 0; for (child = __xml_first_child(clist); child != NULL; child = __xml_next(child)) { const char *name = crm_element_value(child, "name"); op = crm_element_value(child, XML_DIFF_OP); if(op == NULL) { } else if(strcmp(op, "set") == 0) { const char *value = crm_element_value(child, "value"); if(o_set > 0) { o_set += snprintf(buffer_set + o_set, XML_BUFFER_SIZE - o_set, ", "); } o_set += snprintf(buffer_set + o_set, XML_BUFFER_SIZE - o_set, "@%s=%s", name, value); } else if(strcmp(op, "unset") == 0) { if(o_unset > 0) { o_unset += snprintf(buffer_unset + o_unset, XML_BUFFER_SIZE - o_unset, ", "); } o_unset += snprintf(buffer_unset + o_unset, XML_BUFFER_SIZE - o_unset, "@%s", name); } } if(o_set) { do_crm_log_alias(log_level, __FILE__, function, __LINE__, "+ %s: %s", xpath, buffer_set); } if(o_unset) { do_crm_log_alias(log_level, __FILE__, function, __LINE__, "-- %s: %s", xpath, buffer_unset); } } else if(strcmp(op, "delete") == 0) { do_crm_log_alias(log_level, __FILE__, function, __LINE__, "-- %s", xpath); } } return; } if (log_level < LOG_DEBUG || function == NULL) { options |= xml_log_option_diff_short; } removed = find_xml_node(patchset, "diff-removed", FALSE); for (child = __xml_first_child(removed); child != NULL; child = __xml_next(child)) { log_data_element(log_level, __FILE__, function, __LINE__, "- ", child, 0, options | xml_log_option_diff_minus); if (is_first) { is_first = FALSE; } else { do_crm_log_alias(log_level, __FILE__, function, __LINE__, " --- "); } } is_first = TRUE; added = find_xml_node(patchset, "diff-added", FALSE); for (child = __xml_first_child(added); child != NULL; child = __xml_next(child)) { log_data_element(log_level, __FILE__, function, __LINE__, "+ ", child, 0, options | xml_log_option_diff_plus); if (is_first) { is_first = FALSE; } else { do_crm_log_alias(log_level, __FILE__, function, __LINE__, " +++ "); } } } void xml_log_changes(uint8_t log_level, const char *function, xmlNode * xml) { GListPtr gIter = NULL; xml_private_t *doc = NULL; CRM_ASSERT(xml); CRM_ASSERT(xml->doc); doc = xml->doc->_private; if(is_not_set(doc->flags, xpf_dirty)) { return; } for(gIter = doc->deleted_paths; gIter; gIter = gIter->next) { do_crm_log_alias(log_level, __FILE__, function, __LINE__, "-- %s", (char*)gIter->data); } log_data_element(log_level, __FILE__, function, __LINE__, "+ ", xml, 0, xml_log_option_formatted|xml_log_option_dirty_add); } void xml_accept_changes(xmlNode * xml) { xmlNode *top = NULL; xml_private_t *doc = NULL; if(xml == NULL) { return; } crm_trace("Accepting changes to %p", xml); doc = xml->doc->_private; top = xmlDocGetRootElement(xml->doc); __xml_private_clean(xml->doc->_private); if(is_not_set(doc->flags, xpf_dirty)) { doc->flags = xpf_none; return; } doc->flags = xpf_none; __xml_accept_changes(top); } /* Simplified version for applying v1-style XML patches */ static void __subtract_xml_object(xmlNode * target, xmlNode * patch) { xmlNode *patch_child = NULL; xmlNode *cIter = NULL; xmlAttrPtr xIter = NULL; char *id = NULL; const char *name = NULL; const char *value = NULL; if (target == NULL || patch == NULL) { return; } if (target->type == XML_COMMENT_NODE) { gboolean dummy; subtract_xml_comment(target->parent, target, patch, &dummy); } name = crm_element_name(target); CRM_CHECK(name != NULL, return); CRM_CHECK(safe_str_eq(crm_element_name(target), crm_element_name(patch)), return); CRM_CHECK(safe_str_eq(ID(target), ID(patch)), return); /* check for XML_DIFF_MARKER in a child */ id = crm_element_value_copy(target, XML_ATTR_ID); value = crm_element_value(patch, XML_DIFF_MARKER); if (value != NULL && strcmp(value, "removed:top") == 0) { crm_trace("We are the root of the deletion: %s.id=%s", name, id); free_xml(target); free(id); return; } for (xIter = crm_first_attr(patch); xIter != NULL; xIter = xIter->next) { const char *p_name = (const char *)xIter->name; /* Removing and then restoring the id field would change the ordering of properties */ if (safe_str_neq(p_name, XML_ATTR_ID)) { xml_remove_prop(target, p_name); } } /* changes to child objects */ cIter = __xml_first_child(target); while (cIter) { xmlNode *target_child = cIter; cIter = __xml_next(cIter); if (target_child->type == XML_COMMENT_NODE) { patch_child = find_xml_comment(patch, target_child); } else { patch_child = find_entity(patch, crm_element_name(target_child), ID(target_child)); } __subtract_xml_object(target_child, patch_child); } free(id); } static void __add_xml_object(xmlNode * parent, xmlNode * target, xmlNode * patch) { xmlNode *patch_child = NULL; xmlNode *target_child = NULL; xmlAttrPtr xIter = NULL; const char *id = NULL; const char *name = NULL; const char *value = NULL; if (patch == NULL) { return; } else if (parent == NULL && target == NULL) { return; } /* check for XML_DIFF_MARKER in a child */ value = crm_element_value(patch, XML_DIFF_MARKER); if (target == NULL && value != NULL && strcmp(value, "added:top") == 0) { id = ID(patch); name = crm_element_name(patch); crm_trace("We are the root of the addition: %s.id=%s", name, id); add_node_copy(parent, patch); return; } else if(target == NULL) { id = ID(patch); name = crm_element_name(patch); crm_err("Could not locate: %s.id=%s", name, id); return; } if (target->type == XML_COMMENT_NODE) { add_xml_comment(parent, target, patch); } name = crm_element_name(target); CRM_CHECK(name != NULL, return); CRM_CHECK(safe_str_eq(crm_element_name(target), crm_element_name(patch)), return); CRM_CHECK(safe_str_eq(ID(target), ID(patch)), return); for (xIter = crm_first_attr(patch); xIter != NULL; xIter = xIter->next) { const char *p_name = (const char *)xIter->name; const char *p_value = crm_element_value(patch, p_name); xml_remove_prop(target, p_name); /* Preserve the patch order */ crm_xml_add(target, p_name, p_value); } /* changes to child objects */ for (patch_child = __xml_first_child(patch); patch_child != NULL; patch_child = __xml_next(patch_child)) { if (patch_child->type == XML_COMMENT_NODE) { target_child = find_xml_comment(target, patch_child); } else { target_child = find_entity(target, crm_element_name(patch_child), ID(patch_child)); } __add_xml_object(target, target_child, patch_child); } } bool xml_patch_versions(xmlNode *patchset, int add[3], int del[3]) { int lpc = 0; int format = 1; xmlNode *tmp = NULL; const char *vfields[] = { XML_ATTR_GENERATION_ADMIN, XML_ATTR_GENERATION, XML_ATTR_NUMUPDATES, }; crm_element_value_int(patchset, "format", &format); switch(format) { case 1: tmp = find_xml_node(patchset, "diff-removed", FALSE); tmp = find_xml_node(tmp, "cib", FALSE); if(tmp == NULL) { /* Revert to the diff-removed line */ tmp = find_xml_node(patchset, "diff-removed", FALSE); } break; case 2: tmp = find_xml_node(patchset, "version", FALSE); tmp = find_xml_node(tmp, "source", FALSE); break; default: crm_warn("Unknown patch format: %d", format); return -EINVAL; } if (tmp) { for(lpc = 0; lpc < DIMOF(vfields); lpc++) { crm_element_value_int(tmp, vfields[lpc], &(del[lpc])); crm_trace("Got %d for del[%s]", del[lpc], vfields[lpc]); } } switch(format) { case 1: tmp = find_xml_node(patchset, "diff-added", FALSE); tmp = find_xml_node(tmp, "cib", FALSE); if(tmp == NULL) { /* Revert to the diff-added line */ tmp = find_xml_node(patchset, "diff-added", FALSE); } break; case 2: tmp = find_xml_node(patchset, "version", FALSE); tmp = find_xml_node(tmp, "target", FALSE); break; default: crm_warn("Unknown patch format: %d", format); return -EINVAL; } if (tmp) { for(lpc = 0; lpc < DIMOF(vfields); lpc++) { crm_element_value_int(tmp, vfields[lpc], &(add[lpc])); crm_trace("Got %d for add[%s]", add[lpc], vfields[lpc]); } } return pcmk_ok; } static int xml_patch_version_check(xmlNode *xml, xmlNode *patchset, int format) { int lpc = 0; bool changed = FALSE; int this[] = { 0, 0, 0 }; int add[] = { 0, 0, 0 }; int del[] = { 0, 0, 0 }; const char *vfields[] = { XML_ATTR_GENERATION_ADMIN, XML_ATTR_GENERATION, XML_ATTR_NUMUPDATES, }; for(lpc = 0; lpc < DIMOF(vfields); lpc++) { crm_element_value_int(xml, vfields[lpc], &(this[lpc])); crm_trace("Got %d for this[%s]", this[lpc], vfields[lpc]); if (this[lpc] < 0) { this[lpc] = 0; } } /* Set some defaults in case nothing is present */ add[0] = this[0]; add[1] = this[1]; add[2] = this[2] + 1; for(lpc = 0; lpc < DIMOF(vfields); lpc++) { del[lpc] = this[lpc]; } xml_patch_versions(patchset, add, del); for(lpc = 0; lpc < DIMOF(vfields); lpc++) { if(this[lpc] < del[lpc]) { crm_debug("Current %s is too low (%d < %d)", vfields[lpc], this[lpc], del[lpc]); return -pcmk_err_diff_resync; } else if(this[lpc] > del[lpc]) { crm_info("Current %s is too high (%d > %d)", vfields[lpc], this[lpc], del[lpc]); return -pcmk_err_old_data; } } for(lpc = 0; lpc < DIMOF(vfields); lpc++) { if(add[lpc] > del[lpc]) { changed = TRUE; } } if(changed == FALSE) { crm_notice("Versions did not change in patch %d.%d.%d", add[0], add[1], add[2]); return -pcmk_err_old_data; } crm_debug("Can apply patch %d.%d.%d to %d.%d.%d", add[0], add[1], add[2], this[0], this[1], this[2]); return pcmk_ok; } static int xml_apply_patchset_v1(xmlNode *xml, xmlNode *patchset, bool check_version) { int rc = pcmk_ok; int root_nodes_seen = 0; char *version = crm_element_value_copy(xml, XML_ATTR_CRM_VERSION); xmlNode *child_diff = NULL; xmlNode *added = find_xml_node(patchset, "diff-added", FALSE); xmlNode *removed = find_xml_node(patchset, "diff-removed", FALSE); xmlNode *old = copy_xml(xml); crm_trace("Substraction Phase"); for (child_diff = __xml_first_child(removed); child_diff != NULL; child_diff = __xml_next(child_diff)) { CRM_CHECK(root_nodes_seen == 0, rc = FALSE); if (root_nodes_seen == 0) { __subtract_xml_object(xml, child_diff); } root_nodes_seen++; } if (root_nodes_seen > 1) { crm_err("(-) Diffs cannot contain more than one change set... saw %d", root_nodes_seen); rc = -ENOTUNIQ; } root_nodes_seen = 0; crm_trace("Addition Phase"); if (rc == pcmk_ok) { xmlNode *child_diff = NULL; for (child_diff = __xml_first_child(added); child_diff != NULL; child_diff = __xml_next(child_diff)) { CRM_CHECK(root_nodes_seen == 0, rc = FALSE); if (root_nodes_seen == 0) { __add_xml_object(NULL, xml, child_diff); } root_nodes_seen++; } } if (root_nodes_seen > 1) { crm_err("(+) Diffs cannot contain more than one change set... saw %d", root_nodes_seen); rc = -ENOTUNIQ; } purge_diff_markers(xml); /* Purge prior to checking the digest */ free_xml(old); free(version); return rc; } static xmlNode * __first_xml_child_match(xmlNode *parent, const char *name, const char *id) { xmlNode *cIter = NULL; for (cIter = __xml_first_child(parent); cIter != NULL; cIter = __xml_next(cIter)) { if(strcmp((const char *)cIter->name, name) != 0) { continue; } else if(id) { const char *cid = ID(cIter); if(cid == NULL || strcmp(cid, id) != 0) { continue; } } return cIter; } return NULL; } static xmlNode * __xml_find_path(xmlNode *top, const char *key) { xmlNode *target = (xmlNode*)top->doc; char *id = malloc(XML_BUFFER_SIZE); char *tag = malloc(XML_BUFFER_SIZE); char *section = malloc(XML_BUFFER_SIZE); char *current = strdup(key); char *remainder = malloc(XML_BUFFER_SIZE); int rc = 0; while(current) { rc = sscanf (current, "/%[^/]%s", section, remainder); if(rc <= 0) { crm_trace("Done"); break; } else if(rc > 2) { crm_trace("Aborting on %s", current); target = NULL; break; } else if(tag && section) { int f = sscanf (section, "%[^[][@id='%[^']", tag, id); switch(f) { case 1: target = __first_xml_child_match(target, tag, NULL); break; case 2: target = __first_xml_child_match(target, tag, id); break; default: crm_trace("Aborting on %s", section); target = NULL; break; } if(rc == 1 || target == NULL) { crm_trace("Done"); break; } else { char *tmp = current; current = remainder; remainder = tmp; } } } if(target) { char *path = (char *)xmlGetNodePath(target); crm_trace("Found %s for %s", path, key); free(path); } else { crm_debug("No match for %s", key); } free(remainder); free(current); free(section); free(tag); free(id); return target; } static int xml_apply_patchset_v2(xmlNode *xml, xmlNode *patchset, bool check_version) { int rc = pcmk_ok; xmlNode *change = NULL; for (change = __xml_first_child(patchset); change != NULL; change = __xml_next(change)) { xmlNode *match = NULL; const char *op = crm_element_value(change, XML_DIFF_OP); const char *xpath = crm_element_value(change, XML_DIFF_PATH); crm_trace("Processing %s %s", change->name, op); if(op == NULL) { continue; } #if 0 match = get_xpath_object(xpath, xml, LOG_TRACE); #else match = __xml_find_path(xml, xpath); #endif crm_trace("Performing %s on %s with %p", op, xpath, match); if(match == NULL && strcmp(op, "delete") == 0) { crm_debug("No %s match for %s in %p", op, xpath, xml->doc); continue; } else if(match == NULL) { crm_err("No %s match for %s in %p", op, xpath, xml->doc); rc = -pcmk_err_diff_failed; continue; } else if(strcmp(op, "create") == 0) { int position = 0; xmlNode *child = NULL; xmlNode *match_child = NULL; match_child = match->children; crm_element_value_int(change, XML_DIFF_POSITION, &position); while(match_child && position != __xml_offset(match_child)) { match_child = match_child->next; } child = xmlDocCopyNode(change->children, match->doc, 1); if(match_child) { crm_trace("Adding %s at position %d", child->name, position); xmlAddPrevSibling(match_child, child); } else if(match->last) { /* Add to the end */ crm_trace("Adding %s at position %d (end)", child->name, position); xmlAddNextSibling(match->last, child); } else { crm_trace("Adding %s at position %d (first)", child->name, position); CRM_LOG_ASSERT(position == 0); xmlAddChild(match, child); } crm_node_created(child); } else if(strcmp(op, "move") == 0) { int position = 0; crm_element_value_int(change, XML_DIFF_POSITION, &position); if(position != __xml_offset(match)) { xmlNode *match_child = NULL; int p = position; if(p > __xml_offset(match)) { p++; /* Skip ourselves */ } CRM_ASSERT(match->parent != NULL); match_child = match->parent->children; while(match_child && p != __xml_offset(match_child)) { match_child = match_child->next; } crm_trace("Moving %s to position %d (was %d, prev %p, %s %p)", match->name, position, __xml_offset(match), match->prev, match_child?"next":"last", match_child?match_child:match->parent->last); if(match_child) { xmlAddPrevSibling(match_child, match); } else { CRM_ASSERT(match->parent->last != NULL); xmlAddNextSibling(match->parent->last, match); } } else { crm_trace("%s is already in position %d", match->name, position); } if(position != __xml_offset(match)) { crm_err("Moved %s.%d to position %d instead of %d (%p)", match->name, ID(match), __xml_offset(match), position, match->prev); rc = -pcmk_err_diff_failed; } } else if(strcmp(op, "delete") == 0) { free_xml(match); } else if(strcmp(op, "modify") == 0) { xmlAttr *pIter = crm_first_attr(match); xmlNode *attrs = __xml_first_child(first_named_child(change, XML_DIFF_RESULT)); if(attrs == NULL) { rc = -ENOMSG; continue; } while(pIter != NULL) { const char *name = (const char *)pIter->name; pIter = pIter->next; xml_remove_prop(match, name); } for (pIter = crm_first_attr(attrs); pIter != NULL; pIter = pIter->next) { const char *name = (const char *)pIter->name; const char *value = crm_element_value(attrs, name); crm_xml_add(match, name, value); } } else { crm_err("Unknown operation: %s", op); } } return rc; } int xml_apply_patchset(xmlNode *xml, xmlNode *patchset, bool check_version) { int format = 1; int rc = pcmk_ok; xmlNode *old = NULL; const char *digest = crm_element_value(patchset, XML_ATTR_DIGEST); if(patchset == NULL) { return rc; } xml_log_patchset(LOG_TRACE, __FUNCTION__, patchset); crm_element_value_int(patchset, "format", &format); if(check_version) { rc = xml_patch_version_check(xml, patchset, format); if(rc != pcmk_ok) { return rc; } } if(digest) { /* Make it available for logging if the result doesn't have the expected digest */ old = copy_xml(xml); } if(rc == pcmk_ok) { switch(format) { case 1: rc = xml_apply_patchset_v1(xml, patchset, check_version); break; case 2: rc = xml_apply_patchset_v2(xml, patchset, check_version); break; default: crm_err("Unknown patch format: %d", format); rc = -EINVAL; } } if(rc == pcmk_ok && digest) { static struct qb_log_callsite *digest_cs = NULL; char *new_digest = NULL; char *version = crm_element_value_copy(xml, XML_ATTR_CRM_VERSION); if (digest_cs == NULL) { digest_cs = qb_log_callsite_get(__func__, __FILE__, "diff-digest", LOG_TRACE, __LINE__, crm_trace_nonlog); } new_digest = calculate_xml_versioned_digest(xml, FALSE, TRUE, version); if (safe_str_neq(new_digest, digest)) { crm_info("v%d digest mis-match: expected %s, calculated %s", format, digest, new_digest); rc = -pcmk_err_diff_failed; if (digest_cs && digest_cs->targets) { save_xml_to_file(old, "PatchDigest:input", NULL); save_xml_to_file(xml, "PatchDigest:result", NULL); save_xml_to_file(patchset,"PatchDigest:diff", NULL); } else { crm_trace("%p %0.6x", digest_cs, digest_cs ? digest_cs->targets : 0); } } else { crm_trace("v%d digest matched: expected %s, calculated %s", format, digest, new_digest); } free(new_digest); free(version); } free_xml(old); return rc; } xmlNode * find_xml_node(xmlNode * root, const char *search_path, gboolean must_find) { xmlNode *a_child = NULL; const char *name = "NULL"; if (root != NULL) { name = crm_element_name(root); } if (search_path == NULL) { crm_warn("Will never find "); return NULL; } for (a_child = __xml_first_child(root); a_child != NULL; a_child = __xml_next(a_child)) { if (strcmp((const char *)a_child->name, search_path) == 0) { /* crm_trace("returning node (%s).", crm_element_name(a_child)); */ return a_child; } } if (must_find) { crm_warn("Could not find %s in %s.", search_path, name); } else if (root != NULL) { crm_trace("Could not find %s in %s.", search_path, name); } else { crm_trace("Could not find %s in .", search_path); } return NULL; } xmlNode * find_entity(xmlNode * parent, const char *node_name, const char *id) { xmlNode *a_child = NULL; for (a_child = __xml_first_child(parent); a_child != NULL; a_child = __xml_next(a_child)) { /* Uncertain if node_name == NULL check is strictly necessary here */ if (node_name == NULL || strcmp((const char *)a_child->name, node_name) == 0) { const char *cid = ID(a_child); if (id == NULL || (cid != NULL && strcmp(id, cid) == 0)) { return a_child; } } } crm_trace("node <%s id=%s> not found in %s.", node_name, id, crm_element_name(parent)); return NULL; } void copy_in_properties(xmlNode * target, xmlNode * src) { if (src == NULL) { crm_warn("No node to copy properties from"); } else if (target == NULL) { crm_err("No node to copy properties into"); } else { xmlAttrPtr pIter = NULL; for (pIter = crm_first_attr(src); pIter != NULL; pIter = pIter->next) { const char *p_name = (const char *)pIter->name; const char *p_value = crm_attr_value(pIter); expand_plus_plus(target, p_name, p_value); } } return; } void fix_plus_plus_recursive(xmlNode * target) { /* TODO: Remove recursion and use xpath searches for value++ */ xmlNode *child = NULL; xmlAttrPtr pIter = NULL; for (pIter = crm_first_attr(target); pIter != NULL; pIter = pIter->next) { const char *p_name = (const char *)pIter->name; const char *p_value = crm_attr_value(pIter); expand_plus_plus(target, p_name, p_value); } for (child = __xml_first_child(target); child != NULL; child = __xml_next(child)) { fix_plus_plus_recursive(child); } } void expand_plus_plus(xmlNode * target, const char *name, const char *value) { int offset = 1; int name_len = 0; int int_value = 0; int value_len = 0; const char *old_value = NULL; if (value == NULL || name == NULL) { return; } old_value = crm_element_value(target, name); if (old_value == NULL) { /* if no previous value, set unexpanded */ goto set_unexpanded; } else if (strstr(value, name) != value) { goto set_unexpanded; } name_len = strlen(name); value_len = strlen(value); if (value_len < (name_len + 2) || value[name_len] != '+' || (value[name_len + 1] != '+' && value[name_len + 1] != '=')) { goto set_unexpanded; } /* if we are expanding ourselves, * then no previous value was set and leave int_value as 0 */ if (old_value != value) { int_value = char2score(old_value); } if (value[name_len + 1] != '+') { const char *offset_s = value + (name_len + 2); offset = char2score(offset_s); } int_value += offset; if (int_value > INFINITY) { int_value = (int)INFINITY; } crm_xml_add_int(target, name, int_value); return; set_unexpanded: if (old_value == value) { /* the old value is already set, nothing to do */ return; } crm_xml_add(target, name, value); return; } xmlDoc * getDocPtr(xmlNode * node) { xmlDoc *doc = NULL; CRM_CHECK(node != NULL, return NULL); doc = node->doc; if (doc == NULL) { doc = xmlNewDoc((const xmlChar *)"1.0"); xmlDocSetRootElement(doc, node); xmlSetTreeDoc(node, doc); } return doc; } xmlNode * add_node_copy(xmlNode * parent, xmlNode * src_node) { xmlNode *child = NULL; xmlDoc *doc = getDocPtr(parent); CRM_CHECK(src_node != NULL, return NULL); child = xmlDocCopyNode(src_node, doc, 1); xmlAddChild(parent, child); crm_node_created(child); return child; } int add_node_nocopy(xmlNode * parent, const char *name, xmlNode * child) { add_node_copy(parent, child); free_xml(child); return 1; } static bool __xml_acl_check(xmlNode *xml, const char *name, enum xml_private_flags mode) { CRM_ASSERT(xml); CRM_ASSERT(xml->doc); CRM_ASSERT(xml->doc->_private); #if ENABLE_ACL { if(TRACKING_CHANGES(xml) && xml_acl_enabled(xml)) { int offset = 0; xmlNode *parent = xml; char buffer[XML_BUFFER_SIZE]; xml_private_t *docp = xml->doc->_private; if(docp->acls == NULL) { crm_trace("Ordinary user %s cannot access the CIB without any defined ACLs", docp->user); set_doc_flag(xml, xpf_acl_denied); return FALSE; } offset = __get_prefix(NULL, xml, buffer, offset); if(name) { offset += snprintf(buffer + offset, XML_BUFFER_SIZE - offset, "[@%s]", name); } CRM_LOG_ASSERT(offset > 0); /* Walk the tree upwards looking for xml_acl_* flags * - Creating an attribute requires write permissions for the node * - Creating a child requires write permissions for the parent */ if(name) { xmlAttr *attr = xmlHasProp(xml, (const xmlChar *)name); if(attr && mode == xpf_acl_create) { mode = xpf_acl_write; } } while(parent && parent->_private) { xml_private_t *p = parent->_private; if(__xml_acl_mode_test(p->flags, mode)) { return TRUE; } else if(is_set(p->flags, xpf_acl_deny)) { crm_trace("%x access denied to %s: parent", mode, buffer); set_doc_flag(xml, xpf_acl_denied); return FALSE; } parent = parent->parent; } crm_trace("%x access denied to %s: default", mode, buffer); set_doc_flag(xml, xpf_acl_denied); return FALSE; } } #endif return TRUE; } const char * crm_xml_add(xmlNode * node, const char *name, const char *value) { bool dirty = FALSE; xmlAttr *attr = NULL; CRM_CHECK(node != NULL, return NULL); CRM_CHECK(name != NULL, return NULL); if (value == NULL) { return NULL; } #if XML_PARANOIA_CHECKS { const char *old_value = NULL; old_value = crm_element_value(node, name); /* Could be re-setting the same value */ CRM_CHECK(old_value != value, crm_err("Cannot reset %s with crm_xml_add(%s)", name, value); return value); } #endif if(TRACKING_CHANGES(node)) { const char *old = crm_element_value(node, name); if(old == NULL || value == NULL || strcmp(old, value) != 0) { dirty = TRUE; } } if(dirty && __xml_acl_check(node, name, xpf_acl_create) == FALSE) { crm_trace("Cannot add %s=%s to %s", name, value, node->name); return NULL; } attr = xmlSetProp(node, (const xmlChar *)name, (const xmlChar *)value); if(dirty) { crm_attr_dirty(attr); } CRM_CHECK(attr && attr->children && attr->children->content, return NULL); return (char *)attr->children->content; } const char * crm_xml_replace(xmlNode * node, const char *name, const char *value) { bool dirty = FALSE; xmlAttr *attr = NULL; const char *old_value = NULL; CRM_CHECK(node != NULL, return NULL); CRM_CHECK(name != NULL && name[0] != 0, return NULL); old_value = crm_element_value(node, name); /* Could be re-setting the same value */ CRM_CHECK(old_value != value, return value); if(__xml_acl_check(node, name, xpf_acl_write) == FALSE) { /* Create a fake object linked to doc->_private instead? */ crm_trace("Cannot replace %s=%s to %s", name, value, node->name); return NULL; } else if (old_value != NULL && value == NULL) { xml_remove_prop(node, name); return NULL; } else if (value == NULL) { return NULL; } if(TRACKING_CHANGES(node)) { if(old_value == NULL || value == NULL || strcmp(old_value, value) != 0) { dirty = TRUE; } } attr = xmlSetProp(node, (const xmlChar *)name, (const xmlChar *)value); if(dirty) { crm_attr_dirty(attr); } CRM_CHECK(attr && attr->children && attr->children->content, return NULL); return (char *)attr->children->content; } const char * crm_xml_add_int(xmlNode * node, const char *name, int value) { char *number = crm_itoa(value); const char *added = crm_xml_add(node, name, number); free(number); return added; } xmlNode * create_xml_node(xmlNode * parent, const char *name) { xmlDoc *doc = NULL; xmlNode *node = NULL; if (name == NULL || name[0] == 0) { CRM_CHECK(name != NULL && name[0] == 0, return NULL); return NULL; } if (parent == NULL) { doc = xmlNewDoc((const xmlChar *)"1.0"); node = xmlNewDocRawNode(doc, NULL, (const xmlChar *)name, NULL); xmlDocSetRootElement(doc, node); } else { doc = getDocPtr(parent); node = xmlNewDocRawNode(doc, NULL, (const xmlChar *)name, NULL); xmlAddChild(parent, node); } crm_node_created(node); return node; } static inline int __get_prefix(const char *prefix, xmlNode *xml, char *buffer, int offset) { const char *id = ID(xml); if(offset == 0 && prefix == NULL && xml->parent) { offset = __get_prefix(NULL, xml->parent, buffer, offset); } if(id) { offset += snprintf(buffer + offset, XML_BUFFER_SIZE - offset, "/%s[@id='%s']", (const char *)xml->name, id); } else if(xml->name) { offset += snprintf(buffer + offset, XML_BUFFER_SIZE - offset, "/%s", (const char *)xml->name); } return offset; } char * xml_get_path(xmlNode *xml) { int offset = 0; char buffer[XML_BUFFER_SIZE]; if(__get_prefix(NULL, xml, buffer, offset) > 0) { return strdup(buffer); } return NULL; } void free_xml(xmlNode * child) { if (child != NULL) { xmlNode *top = NULL; xmlDoc *doc = child->doc; xml_private_t *p = child->_private; if (doc != NULL) { top = xmlDocGetRootElement(doc); } if (doc != NULL && top == child) { /* Free everything */ xmlFreeDoc(doc); } else if(__xml_acl_check(child, NULL, xpf_acl_write) == FALSE) { int offset = 0; char buffer[XML_BUFFER_SIZE]; __get_prefix(NULL, child, buffer, offset); crm_trace("Cannot remove %s %x", buffer, p->flags); return; } else { if(doc && TRACKING_CHANGES(child) && is_not_set(p->flags, xpf_created)) { int offset = 0; char buffer[XML_BUFFER_SIZE]; if(__get_prefix(NULL, child, buffer, offset) > 0) { crm_trace("Deleting %s %p from %p", buffer, child, doc); p = doc->_private; p->deleted_paths = g_list_append(p->deleted_paths, strdup(buffer)); set_doc_flag(child, xpf_dirty); } } /* Free this particular subtree * Make sure to unlink it from the parent first */ xmlUnlinkNode(child); xmlFreeNode(child); } } } xmlNode * copy_xml(xmlNode * src) { xmlDoc *doc = xmlNewDoc((const xmlChar *)"1.0"); xmlNode *copy = xmlDocCopyNode(src, doc, 1); xmlDocSetRootElement(doc, copy); xmlSetTreeDoc(copy, doc); return copy; } static void crm_xml_err(void *ctx, const char *msg, ...) G_GNUC_PRINTF(2, 3); static void crm_xml_err(void *ctx, const char *msg, ...) { int len = 0; va_list args; char *buf = NULL; static int buffer_len = 0; static char *buffer = NULL; static struct qb_log_callsite *xml_error_cs = NULL; va_start(args, msg); len = vasprintf(&buf, msg, args); if(xml_error_cs == NULL) { xml_error_cs = qb_log_callsite_get( __func__, __FILE__, "xml library error", LOG_TRACE, __LINE__, crm_trace_nonlog); } if (strchr(buf, '\n')) { buf[len - 1] = 0; if (buffer) { crm_err("XML Error: %s%s", buffer, buf); free(buffer); } else { crm_err("XML Error: %s", buf); } if (xml_error_cs && xml_error_cs->targets) { crm_abort(__FILE__, __PRETTY_FUNCTION__, __LINE__, "xml library error", TRUE, TRUE); } buffer = NULL; buffer_len = 0; } else if (buffer == NULL) { buffer_len = len; buffer = buf; buf = NULL; } else { buffer = realloc_safe(buffer, 1 + buffer_len + len); memcpy(buffer + buffer_len, buf, len); buffer_len += len; buffer[buffer_len] = 0; } va_end(args); free(buf); } xmlNode * string2xml(const char *input) { xmlNode *xml = NULL; xmlDocPtr output = NULL; xmlParserCtxtPtr ctxt = NULL; xmlErrorPtr last_error = NULL; if (input == NULL) { crm_err("Can't parse NULL input"); return NULL; } /* create a parser context */ ctxt = xmlNewParserCtxt(); CRM_CHECK(ctxt != NULL, return NULL); /* xmlCtxtUseOptions(ctxt, XML_PARSE_NOBLANKS|XML_PARSE_RECOVER); */ xmlCtxtResetLastError(ctxt); xmlSetGenericErrorFunc(ctxt, crm_xml_err); /* initGenericErrorDefaultFunc(crm_xml_err); */ output = xmlCtxtReadDoc(ctxt, (const xmlChar *)input, NULL, NULL, XML_PARSE_NOBLANKS | XML_PARSE_RECOVER); if (output) { xml = xmlDocGetRootElement(output); } last_error = xmlCtxtGetLastError(ctxt); if (last_error && last_error->code != XML_ERR_OK) { /* crm_abort(__FILE__,__FUNCTION__,__LINE__, "last_error->code != XML_ERR_OK", TRUE, TRUE); */ /* * http://xmlsoft.org/html/libxml-xmlerror.html#xmlErrorLevel * http://xmlsoft.org/html/libxml-xmlerror.html#xmlParserErrors */ crm_warn("Parsing failed (domain=%d, level=%d, code=%d): %s", last_error->domain, last_error->level, last_error->code, last_error->message); if (last_error->code == XML_ERR_DOCUMENT_EMPTY) { CRM_LOG_ASSERT("Cannot parse an empty string"); } else if (last_error->code != XML_ERR_DOCUMENT_END) { crm_err("Couldn't%s parse %d chars: %s", xml ? " fully" : "", (int)strlen(input), input); if (xml != NULL) { crm_log_xml_err(xml, "Partial"); } } else { int len = strlen(input); int lpc = 0; while(lpc < len) { crm_warn("Parse error[+%.3d]: %.80s", lpc, input+lpc); lpc += 80; } CRM_LOG_ASSERT("String parsing error"); } } xmlFreeParserCtxt(ctxt); return xml; } xmlNode * stdin2xml(void) { size_t data_length = 0; size_t read_chars = 0; char *xml_buffer = NULL; xmlNode *xml_obj = NULL; do { size_t next = XML_BUFFER_SIZE + data_length + 1; if(next <= 0) { crm_err("Buffer size exceeded at: %l + %d", data_length, XML_BUFFER_SIZE); break; } xml_buffer = realloc_safe(xml_buffer, next); read_chars = fread(xml_buffer + data_length, 1, XML_BUFFER_SIZE, stdin); data_length += read_chars; } while (read_chars > 0); if (data_length == 0) { crm_warn("No XML supplied on stdin"); free(xml_buffer); return NULL; } xml_buffer[data_length] = '\0'; xml_obj = string2xml(xml_buffer); free(xml_buffer); crm_log_xml_trace(xml_obj, "Created fragment"); return xml_obj; } static char * decompress_file(const char *filename) { char *buffer = NULL; #if HAVE_BZLIB_H int rc = 0; size_t length = 0, read_len = 0; BZFILE *bz_file = NULL; FILE *input = fopen(filename, "r"); if (input == NULL) { crm_perror(LOG_ERR, "Could not open %s for reading", filename); return NULL; } bz_file = BZ2_bzReadOpen(&rc, input, 0, 0, NULL, 0); if (rc != BZ_OK) { BZ2_bzReadClose(&rc, bz_file); return NULL; } rc = BZ_OK; while (rc == BZ_OK) { buffer = realloc_safe(buffer, XML_BUFFER_SIZE + length + 1); read_len = BZ2_bzRead(&rc, bz_file, buffer + length, XML_BUFFER_SIZE); crm_trace("Read %ld bytes from file: %d", (long)read_len, rc); if (rc == BZ_OK || rc == BZ_STREAM_END) { length += read_len; } } buffer[length] = '\0'; if (rc != BZ_STREAM_END) { crm_err("Couldnt read compressed xml from file"); free(buffer); buffer = NULL; } BZ2_bzReadClose(&rc, bz_file); fclose(input); #else crm_err("Cannot read compressed files:" " bzlib was not available at compile time"); #endif return buffer; } void strip_text_nodes(xmlNode * xml) { xmlNode *iter = xml->children; while (iter) { xmlNode *next = iter->next; switch (iter->type) { case XML_TEXT_NODE: /* Remove it */ xmlUnlinkNode(iter); xmlFreeNode(iter); break; case XML_ELEMENT_NODE: /* Search it */ strip_text_nodes(iter); break; default: /* Leave it */ break; } iter = next; } } xmlNode * filename2xml(const char *filename) { xmlNode *xml = NULL; xmlDocPtr output = NULL; const char *match = NULL; xmlParserCtxtPtr ctxt = NULL; xmlErrorPtr last_error = NULL; static int xml_options = XML_PARSE_NOBLANKS | XML_PARSE_RECOVER; /* create a parser context */ ctxt = xmlNewParserCtxt(); CRM_CHECK(ctxt != NULL, return NULL); /* xmlCtxtUseOptions(ctxt, XML_PARSE_NOBLANKS|XML_PARSE_RECOVER); */ xmlCtxtResetLastError(ctxt); xmlSetGenericErrorFunc(ctxt, crm_xml_err); /* initGenericErrorDefaultFunc(crm_xml_err); */ if (filename) { match = strstr(filename, ".bz2"); } if (filename == NULL) { /* STDIN_FILENO == fileno(stdin) */ output = xmlCtxtReadFd(ctxt, STDIN_FILENO, "unknown.xml", NULL, xml_options); } else if (match == NULL || match[4] != 0) { output = xmlCtxtReadFile(ctxt, filename, NULL, xml_options); } else { char *input = decompress_file(filename); output = xmlCtxtReadDoc(ctxt, (const xmlChar *)input, NULL, NULL, xml_options); free(input); } if (output && (xml = xmlDocGetRootElement(output))) { strip_text_nodes(xml); } last_error = xmlCtxtGetLastError(ctxt); if (last_error && last_error->code != XML_ERR_OK) { /* crm_abort(__FILE__,__FUNCTION__,__LINE__, "last_error->code != XML_ERR_OK", TRUE, TRUE); */ /* * http://xmlsoft.org/html/libxml-xmlerror.html#xmlErrorLevel * http://xmlsoft.org/html/libxml-xmlerror.html#xmlParserErrors */ crm_err("Parsing failed (domain=%d, level=%d, code=%d): %s", last_error->domain, last_error->level, last_error->code, last_error->message); if (last_error && last_error->code != XML_ERR_OK) { crm_err("Couldn't%s parse %s", xml ? " fully" : "", filename); if (xml != NULL) { crm_log_xml_err(xml, "Partial"); } } } xmlFreeParserCtxt(ctxt); return xml; } /*! * \internal * \brief Add a "last written" attribute to an XML node, set to current time * * \param[in] xml_node XML node to get attribute * * \return Value that was set, or NULL on error */ const char * crm_xml_add_last_written(xmlNode *xml_node) { time_t now = time(NULL); char *now_str = ctime(&now); now_str[24] = EOS; /* replace the newline */ return crm_xml_add(xml_node, XML_CIB_ATTR_WRITTEN, now_str); } static int write_xml_stream(xmlNode * xml_node, const char *filename, FILE * stream, gboolean compress) { int res = 0; char *buffer = NULL; unsigned int out = 0; CRM_CHECK(stream != NULL, return -1); crm_trace("Writing XML out to %s", filename); if (xml_node == NULL) { crm_err("Cannot write NULL to %s", filename); fclose(stream); return -1; } crm_log_xml_trace(xml_node, "Writing out"); buffer = dump_xml_formatted(xml_node); CRM_CHECK(buffer != NULL && strlen(buffer) > 0, crm_log_xml_warn(xml_node, "dump:failed"); goto bail); if (compress) { #if HAVE_BZLIB_H int rc = BZ_OK; unsigned int in = 0; BZFILE *bz_file = NULL; bz_file = BZ2_bzWriteOpen(&rc, stream, 5, 0, 30); if (rc != BZ_OK) { crm_err("bzWriteOpen failed: %d", rc); } else { BZ2_bzWrite(&rc, bz_file, buffer, strlen(buffer)); if (rc != BZ_OK) { crm_err("bzWrite() failed: %d", rc); } } if (rc == BZ_OK) { BZ2_bzWriteClose(&rc, bz_file, 0, &in, &out); if (rc != BZ_OK) { crm_err("bzWriteClose() failed: %d", rc); out = -1; } else { crm_trace("%s: In: %d, out: %d", filename, in, out); } } #else crm_err("Cannot write compressed files:" " bzlib was not available at compile time"); #endif } if (out <= 0) { res = fprintf(stream, "%s", buffer); if (res < 0) { crm_perror(LOG_ERR, "Cannot write output to %s", filename); goto bail; } } bail: if (fflush(stream) != 0) { crm_perror(LOG_ERR, "fflush for %s failed:", filename); res = -1; } if (fsync(fileno(stream)) < 0) { crm_perror(LOG_ERR, "fsync for %s failed:", filename); res = -1; } fclose(stream); crm_trace("Saved %d bytes to the Cib as XML", res); free(buffer); return res; } int write_xml_fd(xmlNode * xml_node, const char *filename, int fd, gboolean compress) { FILE *stream = NULL; CRM_CHECK(fd > 0, return -1); stream = fdopen(fd, "w"); return write_xml_stream(xml_node, filename, stream, compress); } int write_xml_file(xmlNode * xml_node, const char *filename, gboolean compress) { FILE *stream = NULL; stream = fopen(filename, "w"); return write_xml_stream(xml_node, filename, stream, compress); } xmlNode * get_message_xml(xmlNode * msg, const char *field) { xmlNode *tmp = first_named_child(msg, field); return __xml_first_child(tmp); } gboolean add_message_xml(xmlNode * msg, const char *field, xmlNode * xml) { xmlNode *holder = create_xml_node(msg, field); add_node_copy(holder, xml); return TRUE; } static char * crm_xml_escape_shuffle(char *text, int start, int *length, const char *replace) { int lpc; int offset = strlen(replace) - 1; /* We have space for 1 char already */ *length += offset; text = realloc_safe(text, *length); for (lpc = (*length) - 1; lpc > (start + offset); lpc--) { text[lpc] = text[lpc - offset]; } memcpy(text + start, replace, offset + 1); return text; } char * crm_xml_escape(const char *text) { int index; int changes = 0; int length = 1 + strlen(text); char *copy = strdup(text); /* * When xmlCtxtReadDoc() parses < and friends in a * value, it converts them to their human readable * form. * * If one uses xmlNodeDump() to convert it back to a * string, all is well, because special characters are * converted back to their escape sequences. * * However xmlNodeDump() is randomly dog slow, even with the same * input. So we need to replicate the escapeing in our custom * version so that the result can be re-parsed by xmlCtxtReadDoc() * when necessary. */ for (index = 0; index < length; index++) { switch (copy[index]) { case 0: break; case '<': copy = crm_xml_escape_shuffle(copy, index, &length, "<"); changes++; break; case '>': copy = crm_xml_escape_shuffle(copy, index, &length, ">"); changes++; break; case '"': copy = crm_xml_escape_shuffle(copy, index, &length, """); changes++; break; case '\'': copy = crm_xml_escape_shuffle(copy, index, &length, "'"); changes++; break; case '&': copy = crm_xml_escape_shuffle(copy, index, &length, "&"); changes++; break; case '\t': /* Might as well just expand to a few spaces... */ copy = crm_xml_escape_shuffle(copy, index, &length, " "); changes++; break; case '\n': /* crm_trace("Convert: \\%.3o", copy[index]); */ copy = crm_xml_escape_shuffle(copy, index, &length, "\\n"); changes++; break; case '\r': copy = crm_xml_escape_shuffle(copy, index, &length, "\\r"); changes++; break; /* For debugging... case '\\': crm_trace("Passthrough: \\%c", copy[index+1]); break; */ default: /* Check for and replace non-printing characters with their octal equivalent */ if(copy[index] < ' ' || copy[index] > '~') { char *replace = crm_strdup_printf("\\%.3o", copy[index]); /* crm_trace("Convert to octal: \\%.3o", copy[index]); */ copy = crm_xml_escape_shuffle(copy, index, &length, replace); free(replace); changes++; } } } if (changes) { crm_trace("Dumped '%s'", copy); } return copy; } static inline void dump_xml_attr(xmlAttrPtr attr, int options, char **buffer, int *offset, int *max) { char *p_value = NULL; const char *p_name = NULL; + xml_private_t *p = NULL; CRM_ASSERT(buffer != NULL); if (attr == NULL || attr->children == NULL) { return; } + p = attr->_private; + if (p && is_set(p->flags, xpf_deleted)) { + return; + } + p_name = (const char *)attr->name; p_value = crm_xml_escape((const char *)attr->children->content); buffer_print(*buffer, *max, *offset, " %s=\"%s\"", p_name, p_value); free(p_value); } static void __xml_log_element(int log_level, const char *file, const char *function, int line, const char *prefix, xmlNode * data, int depth, int options) { int max = 0; int offset = 0; const char *name = NULL; const char *hidden = NULL; xmlNode *child = NULL; xmlAttrPtr pIter = NULL; if(data == NULL) { return; } name = crm_element_name(data); if(is_set(options, xml_log_option_open)) { char *buffer = NULL; insert_prefix(options, &buffer, &offset, &max, depth); if(data->type == XML_COMMENT_NODE) { buffer_print(buffer, max, offset, ""); } else { buffer_print(buffer, max, offset, "<%s", name); } hidden = crm_element_value(data, "hidden"); for (pIter = crm_first_attr(data); pIter != NULL; pIter = pIter->next) { xml_private_t *p = pIter->_private; const char *p_name = (const char *)pIter->name; const char *p_value = crm_attr_value(pIter); char *p_copy = NULL; if(is_set(p->flags, xpf_deleted)) { continue; } else if ((is_set(options, xml_log_option_diff_plus) || is_set(options, xml_log_option_diff_minus)) && strcmp(XML_DIFF_MARKER, p_name) == 0) { continue; } else if (hidden != NULL && p_name[0] != 0 && strstr(hidden, p_name) != NULL) { p_copy = strdup("*****"); } else { p_copy = crm_xml_escape(p_value); } buffer_print(buffer, max, offset, " %s=\"%s\"", p_name, p_copy); free(p_copy); } if(xml_has_children(data) == FALSE) { buffer_print(buffer, max, offset, "/>"); } else if(is_set(options, xml_log_option_children)) { buffer_print(buffer, max, offset, ">"); } else { buffer_print(buffer, max, offset, "/>"); } do_crm_log_alias(log_level, file, function, line, "%s %s", prefix, buffer); free(buffer); } if(data->type == XML_COMMENT_NODE) { return; } else if(xml_has_children(data) == FALSE) { return; } else if(is_set(options, xml_log_option_children)) { offset = 0; max = 0; for (child = __xml_first_child(data); child != NULL; child = __xml_next(child)) { __xml_log_element(log_level, file, function, line, prefix, child, depth + 1, options|xml_log_option_open|xml_log_option_close); } } if(is_set(options, xml_log_option_close)) { char *buffer = NULL; insert_prefix(options, &buffer, &offset, &max, depth); buffer_print(buffer, max, offset, "", name); do_crm_log_alias(log_level, file, function, line, "%s %s", prefix, buffer); free(buffer); } } static void __xml_log_change_element(int log_level, const char *file, const char *function, int line, const char *prefix, xmlNode * data, int depth, int options) { xml_private_t *p; char *prefix_m = NULL; xmlNode *child = NULL; xmlAttrPtr pIter = NULL; if(data == NULL) { return; } p = data->_private; prefix_m = strdup(prefix); prefix_m[1] = '+'; if(is_set(p->flags, xpf_dirty) && is_set(p->flags, xpf_created)) { /* Continue and log full subtree */ __xml_log_element(log_level, file, function, line, prefix_m, data, depth, options|xml_log_option_open|xml_log_option_close|xml_log_option_children); } else if(is_set(p->flags, xpf_dirty)) { char *spaces = calloc(80, 1); int s_count = 0, s_max = 80; char *prefix_del = NULL; char *prefix_moved = NULL; const char *flags = prefix; insert_prefix(options, &spaces, &s_count, &s_max, depth); prefix_del = strdup(prefix); prefix_del[0] = '-'; prefix_del[1] = '-'; prefix_moved = strdup(prefix); prefix_moved[1] = '~'; if(is_set(p->flags, xpf_moved)) { flags = prefix_moved; } else { flags = prefix; } __xml_log_element(log_level, file, function, line, flags, data, depth, options|xml_log_option_open); for (pIter = crm_first_attr(data); pIter != NULL; pIter = pIter->next) { const char *aname = (const char*)pIter->name; p = pIter->_private; if(is_set(p->flags, xpf_deleted)) { const char *value = crm_element_value(data, aname); flags = prefix_del; do_crm_log_alias(log_level, file, function, line, "%s %s @%s=%s", flags, spaces, aname, value); } else if(is_set(p->flags, xpf_dirty)) { const char *value = crm_element_value(data, aname); if(is_set(p->flags, xpf_created)) { flags = prefix_m; } else if(is_set(p->flags, xpf_modified)) { flags = prefix; } else if(is_set(p->flags, xpf_moved)) { flags = prefix_moved; } else { flags = prefix; } do_crm_log_alias(log_level, file, function, line, "%s %s @%s=%s", flags, spaces, aname, value); } } free(prefix_moved); free(prefix_del); free(spaces); for (child = __xml_first_child(data); child != NULL; child = __xml_next(child)) { __xml_log_change_element(log_level, file, function, line, prefix, child, depth + 1, options); } __xml_log_element(log_level, file, function, line, prefix, data, depth, options|xml_log_option_close); } else { for (child = __xml_first_child(data); child != NULL; child = __xml_next(child)) { __xml_log_change_element(log_level, file, function, line, prefix, child, depth + 1, options); } } free(prefix_m); } void log_data_element(int log_level, const char *file, const char *function, int line, const char *prefix, xmlNode * data, int depth, int options) { xmlNode *a_child = NULL; char *prefix_m = NULL; if (prefix == NULL) { prefix = ""; } /* Since we use the same file and line, to avoid confusing libqb, we need to use the same format strings */ if (data == NULL) { do_crm_log_alias(log_level, file, function, line, "%s: %s", prefix, "No data to dump as XML"); return; } if(is_set(options, xml_log_option_dirty_add) || is_set(options, xml_log_option_dirty_add)) { __xml_log_change_element(log_level, file, function, line, prefix, data, depth, options); return; } if (is_set(options, xml_log_option_formatted)) { if (is_set(options, xml_log_option_diff_plus) && (data->children == NULL || crm_element_value(data, XML_DIFF_MARKER))) { options |= xml_log_option_diff_all; prefix_m = strdup(prefix); prefix_m[1] = '+'; prefix = prefix_m; } else if (is_set(options, xml_log_option_diff_minus) && (data->children == NULL || crm_element_value(data, XML_DIFF_MARKER))) { options |= xml_log_option_diff_all; prefix_m = strdup(prefix); prefix_m[1] = '-'; prefix = prefix_m; } } if (is_set(options, xml_log_option_diff_short) && is_not_set(options, xml_log_option_diff_all)) { /* Still searching for the actual change */ for (a_child = __xml_first_child(data); a_child != NULL; a_child = __xml_next(a_child)) { log_data_element(log_level, file, function, line, prefix, a_child, depth + 1, options); } return; } __xml_log_element(log_level, file, function, line, prefix, data, depth, options|xml_log_option_open|xml_log_option_close|xml_log_option_children); free(prefix_m); } static void dump_filtered_xml(xmlNode * data, int options, char **buffer, int *offset, int *max) { int lpc; xmlAttrPtr xIter = NULL; static int filter_len = DIMOF(filter); for (lpc = 0; options && lpc < filter_len; lpc++) { filter[lpc].found = FALSE; } for (xIter = crm_first_attr(data); xIter != NULL; xIter = xIter->next) { bool skip = FALSE; const char *p_name = (const char *)xIter->name; for (lpc = 0; skip == FALSE && lpc < filter_len; lpc++) { if (filter[lpc].found == FALSE && strcmp(p_name, filter[lpc].string) == 0) { filter[lpc].found = TRUE; skip = TRUE; break; } } if (skip == FALSE) { dump_xml_attr(xIter, options, buffer, offset, max); } } } static void dump_xml_element(xmlNode * data, int options, char **buffer, int *offset, int *max, int depth) { const char *name = NULL; CRM_ASSERT(max != NULL); CRM_ASSERT(offset != NULL); CRM_ASSERT(buffer != NULL); if (data == NULL) { crm_trace("Nothing to dump"); return; } if (*buffer == NULL) { *offset = 0; *max = 0; } name = crm_element_name(data); CRM_ASSERT(name != NULL); insert_prefix(options, buffer, offset, max, depth); buffer_print(*buffer, *max, *offset, "<%s", name); if (options & xml_log_option_filtered) { dump_filtered_xml(data, options, buffer, offset, max); } else { xmlAttrPtr xIter = NULL; for (xIter = crm_first_attr(data); xIter != NULL; xIter = xIter->next) { dump_xml_attr(xIter, options, buffer, offset, max); } } if (data->children == NULL) { buffer_print(*buffer, *max, *offset, "/>"); } else { buffer_print(*buffer, *max, *offset, ">"); } if (options & xml_log_option_formatted) { buffer_print(*buffer, *max, *offset, "\n"); } if (data->children) { xmlNode *xChild = NULL; for (xChild = __xml_first_child(data); xChild != NULL; xChild = __xml_next(xChild)) { crm_xml_dump(xChild, options, buffer, offset, max, depth + 1); } insert_prefix(options, buffer, offset, max, depth); buffer_print(*buffer, *max, *offset, "", name); if (options & xml_log_option_formatted) { buffer_print(*buffer, *max, *offset, "\n"); } } } static void dump_xml_comment(xmlNode * data, int options, char **buffer, int *offset, int *max, int depth) { CRM_ASSERT(max != NULL); CRM_ASSERT(offset != NULL); CRM_ASSERT(buffer != NULL); if (data == NULL) { crm_trace("Nothing to dump"); return; } if (*buffer == NULL) { *offset = 0; *max = 0; } insert_prefix(options, buffer, offset, max, depth); buffer_print(*buffer, *max, *offset, ""); if (options & xml_log_option_formatted) { buffer_print(*buffer, *max, *offset, "\n"); } } void crm_xml_dump(xmlNode * data, int options, char **buffer, int *offset, int *max, int depth) { + if(data == NULL) { + *offset = 0; + *max = 0; + } #if 0 if (is_not_set(options, xml_log_option_filtered)) { /* Turning this code on also changes the PE tests for some reason * (not just newlines). Figure out why before considering to * enable this permanently. * * It exists to help debug slowness in xmlNodeDump() and * potentially if we ever want to go back to it. * * In theory its a good idea (reuse) but our custom version does * better for the filtered case and avoids the final strdup() for * everything */ time_t now, next; xmlDoc *doc = NULL; xmlBuffer *xml_buffer = NULL; *buffer = NULL; doc = getDocPtr(data); /* doc will only be NULL if data is */ CRM_CHECK(doc != NULL, return); now = time(NULL); xml_buffer = xmlBufferCreate(); CRM_ASSERT(xml_buffer != NULL); /* The default allocator XML_BUFFER_ALLOC_EXACT does far too many * realloc()s and it can take upwards of 18 seconds (yes, seconds) * to dump a 28kb tree which XML_BUFFER_ALLOC_DOUBLEIT can do in * less than 1 second. * * We could also use xmlBufferCreateSize() to start with a * sane-ish initial size and avoid the first few doubles. */ xmlBufferSetAllocationScheme(xml_buffer, XML_BUFFER_ALLOC_DOUBLEIT); *max = xmlNodeDump(xml_buffer, doc, data, 0, (options & xml_log_option_formatted)); if (*max > 0) { *buffer = strdup((char *)xml_buffer->content); } next = time(NULL); if ((now + 1) < next) { crm_log_xml_trace(data, "Long time"); crm_err("xmlNodeDump() -> %dbytes took %ds", *max, next - now); } xmlBufferFree(xml_buffer); return; } #endif switch(data->type) { case XML_ELEMENT_NODE: /* Handle below */ dump_xml_element(data, options, buffer, offset, max, depth); break; case XML_TEXT_NODE: /* Ignore */ return; case XML_COMMENT_NODE: dump_xml_comment(data, options, buffer, offset, max, depth); break; default: crm_warn("Unhandled type: %d", data->type); return; /* XML_ATTRIBUTE_NODE = 2 XML_CDATA_SECTION_NODE = 4 XML_ENTITY_REF_NODE = 5 XML_ENTITY_NODE = 6 XML_PI_NODE = 7 XML_DOCUMENT_NODE = 9 XML_DOCUMENT_TYPE_NODE = 10 XML_DOCUMENT_FRAG_NODE = 11 XML_NOTATION_NODE = 12 XML_HTML_DOCUMENT_NODE = 13 XML_DTD_NODE = 14 XML_ELEMENT_DECL = 15 XML_ATTRIBUTE_DECL = 16 XML_ENTITY_DECL = 17 XML_NAMESPACE_DECL = 18 XML_XINCLUDE_START = 19 XML_XINCLUDE_END = 20 XML_DOCB_DOCUMENT_NODE = 21 */ } } void crm_buffer_add_char(char **buffer, int *offset, int *max, char c) { buffer_print(*buffer, *max, *offset, "%c", c); } char * dump_xml_formatted(xmlNode * an_xml_node) { char *buffer = NULL; int offset = 0, max = 0; crm_xml_dump(an_xml_node, xml_log_option_formatted, &buffer, &offset, &max, 0); return buffer; } char * dump_xml_unformatted(xmlNode * an_xml_node) { char *buffer = NULL; int offset = 0, max = 0; crm_xml_dump(an_xml_node, 0, &buffer, &offset, &max, 0); return buffer; } gboolean xml_has_children(const xmlNode * xml_root) { if (xml_root != NULL && xml_root->children != NULL) { return TRUE; } return FALSE; } int crm_element_value_int(xmlNode * data, const char *name, int *dest) { const char *value = crm_element_value(data, name); CRM_CHECK(dest != NULL, return -1); if (value) { *dest = crm_int_helper(value, NULL); return 0; } return -1; } int crm_element_value_const_int(const xmlNode * data, const char *name, int *dest) { return crm_element_value_int((xmlNode *) data, name, dest); } const char * crm_element_value_const(const xmlNode * data, const char *name) { return crm_element_value((xmlNode *) data, name); } char * crm_element_value_copy(xmlNode * data, const char *name) { char *value_copy = NULL; const char *value = crm_element_value(data, name); if (value != NULL) { value_copy = strdup(value); } return value_copy; } void xml_remove_prop(xmlNode * obj, const char *name) { if(__xml_acl_check(obj, NULL, xpf_acl_write) == FALSE) { crm_trace("Cannot remove %s from %s", name, obj->name); } else if(TRACKING_CHANGES(obj)) { /* Leave in place (marked for removal) until after the diff is calculated */ xml_private_t *p = NULL; xmlAttr *attr = xmlHasProp(obj, (const xmlChar *)name); p = attr->_private; set_parent_flag(obj, xpf_dirty); p->flags |= xpf_deleted; /* crm_trace("Setting flag %x due to %s[@id=%s].%s", xpf_dirty, obj->name, ID(obj), name); */ } else { xmlUnsetProp(obj, (const xmlChar *)name); } } void purge_diff_markers(xmlNode * a_node) { xmlNode *child = NULL; CRM_CHECK(a_node != NULL, return); xml_remove_prop(a_node, XML_DIFF_MARKER); for (child = __xml_first_child(a_node); child != NULL; child = __xml_next(child)) { purge_diff_markers(child); } } void save_xml_to_file(xmlNode * xml, const char *desc, const char *filename) { char *f = NULL; if (filename == NULL) { char *uuid = crm_generate_uuid(); f = crm_strdup_printf("/tmp/%s", uuid); filename = f; free(uuid); } crm_info("Saving %s to %s", desc, filename); write_xml_file(xml, filename, FALSE); free(f); } gboolean apply_xml_diff(xmlNode * old, xmlNode * diff, xmlNode ** new) { gboolean result = TRUE; int root_nodes_seen = 0; static struct qb_log_callsite *digest_cs = NULL; const char *digest = crm_element_value(diff, XML_ATTR_DIGEST); const char *version = crm_element_value(diff, XML_ATTR_CRM_VERSION); xmlNode *child_diff = NULL; xmlNode *added = find_xml_node(diff, "diff-added", FALSE); xmlNode *removed = find_xml_node(diff, "diff-removed", FALSE); CRM_CHECK(new != NULL, return FALSE); if (digest_cs == NULL) { digest_cs = qb_log_callsite_get(__func__, __FILE__, "diff-digest", LOG_TRACE, __LINE__, crm_trace_nonlog); } crm_trace("Substraction Phase"); for (child_diff = __xml_first_child(removed); child_diff != NULL; child_diff = __xml_next(child_diff)) { CRM_CHECK(root_nodes_seen == 0, result = FALSE); if (root_nodes_seen == 0) { *new = subtract_xml_object(NULL, old, child_diff, FALSE, NULL, NULL); } root_nodes_seen++; } if (root_nodes_seen == 0) { *new = copy_xml(old); } else if (root_nodes_seen > 1) { crm_err("(-) Diffs cannot contain more than one change set..." " saw %d", root_nodes_seen); result = FALSE; } root_nodes_seen = 0; crm_trace("Addition Phase"); if (result) { xmlNode *child_diff = NULL; for (child_diff = __xml_first_child(added); child_diff != NULL; child_diff = __xml_next(child_diff)) { CRM_CHECK(root_nodes_seen == 0, result = FALSE); if (root_nodes_seen == 0) { add_xml_object(NULL, *new, child_diff, TRUE); } root_nodes_seen++; } } if (root_nodes_seen > 1) { crm_err("(+) Diffs cannot contain more than one change set..." " saw %d", root_nodes_seen); result = FALSE; } else if (result && digest) { char *new_digest = NULL; purge_diff_markers(*new); /* Purge now so the diff is ok */ new_digest = calculate_xml_versioned_digest(*new, FALSE, TRUE, version); if (safe_str_neq(new_digest, digest)) { crm_info("Digest mis-match: expected %s, calculated %s", digest, new_digest); result = FALSE; crm_trace("%p %0.6x", digest_cs, digest_cs ? digest_cs->targets : 0); if (digest_cs && digest_cs->targets) { save_xml_to_file(old, "diff:original", NULL); save_xml_to_file(diff, "diff:input", NULL); save_xml_to_file(*new, "diff:new", NULL); } } else { crm_trace("Digest matched: expected %s, calculated %s", digest, new_digest); } free(new_digest); } else if (result) { purge_diff_markers(*new); /* Purge now so the diff is ok */ } return result; } static void __xml_diff_object(xmlNode * old, xmlNode * new) { xmlNode *cIter = NULL; xmlAttr *pIter = NULL; CRM_CHECK(new != NULL, return); if(old == NULL) { crm_node_created(new); __xml_acl_post_process(new); /* Check creation is allowed */ return; } else { xml_private_t *p = new->_private; if(p->flags & xpf_processed) { /* Avoid re-comparing nodes */ return; } p->flags |= xpf_processed; } for (pIter = crm_first_attr(new); pIter != NULL; pIter = pIter->next) { xml_private_t *p = pIter->_private; /* Assume everything was just created and take it from there */ p->flags |= xpf_created; } for (pIter = crm_first_attr(old); pIter != NULL; ) { xmlAttr *prop = pIter; xml_private_t *p = NULL; const char *name = (const char *)pIter->name; const char *old_value = crm_element_value(old, name); xmlAttr *exists = xmlHasProp(new, pIter->name); pIter = pIter->next; if(exists == NULL) { p = new->doc->_private; /* Prevent the dirty flag being set recursively upwards */ clear_bit(p->flags, xpf_tracking); exists = xmlSetProp(new, (const xmlChar *)name, (const xmlChar *)old_value); set_bit(p->flags, xpf_tracking); p = exists->_private; p->flags = 0; crm_trace("Lost %s@%s=%s", old->name, name, old_value); xml_remove_prop(new, name); } else { int p_new = __xml_offset((xmlNode*)exists); int p_old = __xml_offset((xmlNode*)prop); const char *value = crm_element_value(new, name); p = exists->_private; p->flags = (p->flags & ~xpf_created); if(strcmp(value, old_value) != 0) { /* Restore the original value, so we can call crm_xml_add() whcih checks ACLs */ char *vcopy = crm_element_value_copy(new, name); crm_trace("Modified %s@%s %s->%s", old->name, name, old_value, vcopy); xmlSetProp(new, prop->name, (const xmlChar *)old_value); crm_xml_add(new, name, vcopy); free(vcopy); } else if(p_old != p_new) { crm_info("Moved %s@%s (%d -> %d)", old->name, name, p_old, p_new); __xml_node_dirty(new); p->flags |= xpf_dirty|xpf_moved; if(p_old > p_new) { p = prop->_private; p->flags |= xpf_skip; } else { p = exists->_private; p->flags |= xpf_skip; } } } } for (pIter = crm_first_attr(new); pIter != NULL; ) { xmlAttr *prop = pIter; xml_private_t *p = pIter->_private; pIter = pIter->next; if(is_set(p->flags, xpf_created)) { char *name = strdup((const char *)prop->name); char *value = crm_element_value_copy(new, name); crm_trace("Created %s@%s=%s", new->name, name, value); /* Remove plus create wont work as it will modify the relative attribute ordering */ if(__xml_acl_check(new, name, xpf_acl_write)) { crm_attr_dirty(prop); } else { xmlUnsetProp(new, prop->name); /* Remove - change not allowed */ } free(value); free(name); } } for (cIter = __xml_first_child(old); cIter != NULL; ) { xmlNode *old_child = cIter; xmlNode *new_child = find_entity(new, crm_element_name(cIter), ID(cIter)); cIter = __xml_next(cIter); if(new_child) { __xml_diff_object(old_child, new_child); } else { xml_private_t *p = old_child->_private; /* Create then free (which will check the acls if necessary) */ xmlNode *candidate = add_node_copy(new, old_child); xmlNode *top = xmlDocGetRootElement(candidate->doc); __xml_node_clean(candidate); __xml_acl_apply(top); /* Make sure any ACLs are applied to 'candidate' */ free_xml(candidate); if(NULL == find_entity(new, crm_element_name(old_child), ID(old_child))) { p->flags |= xpf_skip; } } } for (cIter = __xml_first_child(new); cIter != NULL; ) { xmlNode *new_child = cIter; xmlNode *old_child = find_entity(old, crm_element_name(cIter), ID(cIter)); cIter = __xml_next(cIter); if(old_child == NULL) { xml_private_t *p = new_child->_private; p->flags |= xpf_skip; __xml_diff_object(old_child, new_child); } else { /* Check for movement, we already checked for differences */ int p_new = __xml_offset(new_child); int p_old = __xml_offset(old_child); xml_private_t *p = new_child->_private; if(p_old != p_new) { crm_info("%s.%s moved from %d to %d - %d", new_child->name, ID(new_child), p_old, p_new); p->flags |= xpf_moved; if(p_old > p_new) { p = old_child->_private; p->flags |= xpf_skip; } else { p = new_child->_private; p->flags |= xpf_skip; } } } } } void xml_calculate_changes(xmlNode * old, xmlNode * new) { CRM_CHECK(safe_str_eq(crm_element_name(old), crm_element_name(new)), return); CRM_CHECK(safe_str_eq(ID(old), ID(new)), return); if(xml_tracking_changes(new) == FALSE) { xml_track_changes(new, NULL, NULL, FALSE); } __xml_diff_object(old, new); } xmlNode * diff_xml_object(xmlNode * old, xmlNode * new, gboolean suppress) { xmlNode *tmp1 = NULL; xmlNode *diff = create_xml_node(NULL, "diff"); xmlNode *removed = create_xml_node(diff, "diff-removed"); xmlNode *added = create_xml_node(diff, "diff-added"); crm_xml_add(diff, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); tmp1 = subtract_xml_object(removed, old, new, FALSE, NULL, "removed:top"); if (suppress && tmp1 != NULL && can_prune_leaf(tmp1)) { free_xml(tmp1); } tmp1 = subtract_xml_object(added, new, old, TRUE, NULL, "added:top"); if (suppress && tmp1 != NULL && can_prune_leaf(tmp1)) { free_xml(tmp1); } if (added->children == NULL && removed->children == NULL) { free_xml(diff); diff = NULL; } return diff; } gboolean can_prune_leaf(xmlNode * xml_node) { xmlNode *cIter = NULL; xmlAttrPtr pIter = NULL; gboolean can_prune = TRUE; const char *name = crm_element_name(xml_node); if (safe_str_eq(name, XML_TAG_RESOURCE_REF) || safe_str_eq(name, XML_CIB_TAG_OBJ_REF) || safe_str_eq(name, XML_ACL_TAG_ROLE_REF) || safe_str_eq(name, XML_ACL_TAG_ROLE_REFv1)) { return FALSE; } for (pIter = crm_first_attr(xml_node); pIter != NULL; pIter = pIter->next) { const char *p_name = (const char *)pIter->name; if (strcmp(p_name, XML_ATTR_ID) == 0) { continue; } can_prune = FALSE; } cIter = __xml_first_child(xml_node); while (cIter) { xmlNode *child = cIter; cIter = __xml_next(cIter); if (can_prune_leaf(child)) { free_xml(child); } else { can_prune = FALSE; } } return can_prune; } void diff_filter_context(int context, int upper_bound, int lower_bound, xmlNode * xml_node, xmlNode * parent) { xmlNode *us = NULL; xmlNode *child = NULL; xmlAttrPtr pIter = NULL; xmlNode *new_parent = parent; const char *name = crm_element_name(xml_node); CRM_CHECK(xml_node != NULL && name != NULL, return); us = create_xml_node(parent, name); for (pIter = crm_first_attr(xml_node); pIter != NULL; pIter = pIter->next) { const char *p_name = (const char *)pIter->name; const char *p_value = crm_attr_value(pIter); lower_bound = context; crm_xml_add(us, p_name, p_value); } if (lower_bound >= 0 || upper_bound >= 0) { crm_xml_add(us, XML_ATTR_ID, ID(xml_node)); new_parent = us; } else { upper_bound = in_upper_context(0, context, xml_node); if (upper_bound >= 0) { crm_xml_add(us, XML_ATTR_ID, ID(xml_node)); new_parent = us; } else { free_xml(us); us = NULL; } } for (child = __xml_first_child(us); child != NULL; child = __xml_next(child)) { diff_filter_context(context, upper_bound - 1, lower_bound - 1, child, new_parent); } } int in_upper_context(int depth, int context, xmlNode * xml_node) { if (context == 0) { return 0; } if (xml_node->properties) { return depth; } else if (depth < context) { xmlNode *child = NULL; for (child = __xml_first_child(xml_node); child != NULL; child = __xml_next(child)) { if (in_upper_context(depth + 1, context, child)) { return depth; } } } return 0; } static xmlNode * find_xml_comment(xmlNode * root, xmlNode * search_comment) { xmlNode *a_child = NULL; CRM_CHECK(search_comment->type == XML_COMMENT_NODE, return NULL); for (a_child = __xml_first_child(root); a_child != NULL; a_child = __xml_next(a_child)) { if (a_child->type != XML_COMMENT_NODE) { continue; } if (safe_str_eq((const char *)a_child->content, (const char *)search_comment->content)) { return a_child; } } return NULL; } static xmlNode * subtract_xml_comment(xmlNode * parent, xmlNode * left, xmlNode * right, gboolean * changed) { CRM_CHECK(left != NULL, return NULL); CRM_CHECK(left->type == XML_COMMENT_NODE, return NULL); if (right == NULL || safe_str_neq((const char *)left->content, (const char *)right->content)) { xmlNode *deleted = NULL; deleted = add_node_copy(parent, left); *changed = TRUE; return deleted; } return NULL; } xmlNode * subtract_xml_object(xmlNode * parent, xmlNode * left, xmlNode * right, gboolean full, gboolean * changed, const char *marker) { gboolean dummy = FALSE; gboolean skip = FALSE; xmlNode *diff = NULL; xmlNode *right_child = NULL; xmlNode *left_child = NULL; xmlAttrPtr xIter = NULL; const char *id = NULL; const char *name = NULL; const char *value = NULL; const char *right_val = NULL; int lpc = 0; static int filter_len = DIMOF(filter); if (changed == NULL) { changed = &dummy; } if (left == NULL) { return NULL; } if (left->type == XML_COMMENT_NODE) { return subtract_xml_comment(parent, left, right, changed); } id = ID(left); if (right == NULL) { xmlNode *deleted = NULL; crm_trace("Processing <%s id=%s> (complete copy)", crm_element_name(left), id); deleted = add_node_copy(parent, left); crm_xml_add(deleted, XML_DIFF_MARKER, marker); *changed = TRUE; return deleted; } name = crm_element_name(left); CRM_CHECK(name != NULL, return NULL); CRM_CHECK(safe_str_eq(crm_element_name(left), crm_element_name(right)), return NULL); /* check for XML_DIFF_MARKER in a child */ value = crm_element_value(right, XML_DIFF_MARKER); if (value != NULL && strcmp(value, "removed:top") == 0) { crm_trace("We are the root of the deletion: %s.id=%s", name, id); *changed = TRUE; return NULL; } /* Avoiding creating the full heirarchy would save even more work here */ diff = create_xml_node(parent, name); /* Reset filter */ for (lpc = 0; lpc < filter_len; lpc++) { filter[lpc].found = FALSE; } /* changes to child objects */ for (left_child = __xml_first_child(left); left_child != NULL; left_child = __xml_next(left_child)) { gboolean child_changed = FALSE; if (left_child->type == XML_COMMENT_NODE) { right_child = find_xml_comment(right, left_child); } else { right_child = find_entity(right, crm_element_name(left_child), ID(left_child)); } subtract_xml_object(diff, left_child, right_child, full, &child_changed, marker); if (child_changed) { *changed = TRUE; } } if (*changed == FALSE) { /* Nothing to do */ } else if (full) { xmlAttrPtr pIter = NULL; for (pIter = crm_first_attr(left); pIter != NULL; pIter = pIter->next) { const char *p_name = (const char *)pIter->name; const char *p_value = crm_attr_value(pIter); xmlSetProp(diff, (const xmlChar *)p_name, (const xmlChar *)p_value); } /* We already have everything we need... */ goto done; } else if (id) { xmlSetProp(diff, (const xmlChar *)XML_ATTR_ID, (const xmlChar *)id); } /* changes to name/value pairs */ for (xIter = crm_first_attr(left); xIter != NULL; xIter = xIter->next) { const char *prop_name = (const char *)xIter->name; + xmlAttrPtr right_attr = NULL; + xml_private_t *p = NULL; if (strcmp(prop_name, XML_ATTR_ID) == 0) { continue; } skip = FALSE; for (lpc = 0; skip == FALSE && lpc < filter_len; lpc++) { if (filter[lpc].found == FALSE && strcmp(prop_name, filter[lpc].string) == 0) { filter[lpc].found = TRUE; skip = TRUE; break; } } if (skip) { continue; } + right_attr = xmlHasProp(right, (const xmlChar *)prop_name); + if (right_attr) { + p = right_attr->_private; + } + right_val = crm_element_value(right, prop_name); - if (right_val == NULL) { + if (right_val == NULL || (p && is_set(p->flags, xpf_deleted))) { /* new */ *changed = TRUE; if (full) { xmlAttrPtr pIter = NULL; for (pIter = crm_first_attr(left); pIter != NULL; pIter = pIter->next) { const char *p_name = (const char *)pIter->name; const char *p_value = crm_attr_value(pIter); xmlSetProp(diff, (const xmlChar *)p_name, (const xmlChar *)p_value); } break; } else { const char *left_value = crm_element_value(left, prop_name); xmlSetProp(diff, (const xmlChar *)prop_name, (const xmlChar *)value); crm_xml_add(diff, prop_name, left_value); } } else { /* Only now do we need the left value */ const char *left_value = crm_element_value(left, prop_name); if (strcmp(left_value, right_val) == 0) { /* unchanged */ } else { *changed = TRUE; if (full) { xmlAttrPtr pIter = NULL; crm_trace("Changes detected to %s in <%s id=%s>", prop_name, crm_element_name(left), id); for (pIter = crm_first_attr(left); pIter != NULL; pIter = pIter->next) { const char *p_name = (const char *)pIter->name; const char *p_value = crm_attr_value(pIter); xmlSetProp(diff, (const xmlChar *)p_name, (const xmlChar *)p_value); } break; } else { crm_trace("Changes detected to %s (%s -> %s) in <%s id=%s>", prop_name, left_value, right_val, crm_element_name(left), id); crm_xml_add(diff, prop_name, left_value); } } } } if (*changed == FALSE) { free_xml(diff); return NULL; } else if (full == FALSE && id) { crm_xml_add(diff, XML_ATTR_ID, id); } done: return diff; } static int add_xml_comment(xmlNode * parent, xmlNode * target, xmlNode * update) { CRM_CHECK(update != NULL, return 0); CRM_CHECK(update->type == XML_COMMENT_NODE, return 0); if (target == NULL) { target = find_xml_comment(parent, update); } if (target == NULL) { add_node_copy(parent, update); /* We wont reach here currently */ } else if (safe_str_neq((const char *)target->content, (const char *)update->content)) { xmlFree(target->content); target->content = xmlStrdup(update->content); } return 0; } int add_xml_object(xmlNode * parent, xmlNode * target, xmlNode * update, gboolean as_diff) { xmlNode *a_child = NULL; const char *object_id = NULL; const char *object_name = NULL; #if XML_PARSE_DEBUG crm_log_xml_trace("update:", update); crm_log_xml_trace("target:", target); #endif CRM_CHECK(update != NULL, return 0); if (update->type == XML_COMMENT_NODE) { return add_xml_comment(parent, target, update); } object_name = crm_element_name(update); object_id = ID(update); CRM_CHECK(object_name != NULL, return 0); if (target == NULL && object_id == NULL) { /* placeholder object */ target = find_xml_node(parent, object_name, FALSE); } else if (target == NULL) { target = find_entity(parent, object_name, object_id); } if (target == NULL) { target = create_xml_node(parent, object_name); CRM_CHECK(target != NULL, return 0); #if XML_PARSER_DEBUG crm_trace("Added <%s%s%s/>", crm_str(object_name), object_id ? " id=" : "", object_id ? object_id : ""); } else { crm_trace("Found node <%s%s%s/> to update", crm_str(object_name), object_id ? " id=" : "", object_id ? object_id : ""); #endif } CRM_CHECK(safe_str_eq(crm_element_name(target), crm_element_name(update)), return 0); if (as_diff == FALSE) { /* So that expand_plus_plus() gets called */ copy_in_properties(target, update); } else { /* No need for expand_plus_plus(), just raw speed */ xmlAttrPtr pIter = NULL; for (pIter = crm_first_attr(update); pIter != NULL; pIter = pIter->next) { const char *p_name = (const char *)pIter->name; const char *p_value = crm_attr_value(pIter); /* Remove it first so the ordering of the update is preserved */ xmlUnsetProp(target, (const xmlChar *)p_name); xmlSetProp(target, (const xmlChar *)p_name, (const xmlChar *)p_value); } } for (a_child = __xml_first_child(update); a_child != NULL; a_child = __xml_next(a_child)) { #if XML_PARSER_DEBUG crm_trace("Updating child <%s id=%s>", crm_element_name(a_child), ID(a_child)); #endif add_xml_object(target, NULL, a_child, as_diff); } #if XML_PARSER_DEBUG crm_trace("Finished with <%s id=%s>", crm_str(object_name), crm_str(object_id)); #endif return 0; } gboolean update_xml_child(xmlNode * child, xmlNode * to_update) { gboolean can_update = TRUE; xmlNode *child_of_child = NULL; CRM_CHECK(child != NULL, return FALSE); CRM_CHECK(to_update != NULL, return FALSE); if (safe_str_neq(crm_element_name(to_update), crm_element_name(child))) { can_update = FALSE; } else if (safe_str_neq(ID(to_update), ID(child))) { can_update = FALSE; } else if (can_update) { #if XML_PARSER_DEBUG crm_log_xml_trace(child, "Update match found..."); #endif add_xml_object(NULL, child, to_update, FALSE); } for (child_of_child = __xml_first_child(child); child_of_child != NULL; child_of_child = __xml_next(child_of_child)) { /* only update the first one */ if (can_update) { break; } can_update = update_xml_child(child_of_child, to_update); } return can_update; } int find_xml_children(xmlNode ** children, xmlNode * root, const char *tag, const char *field, const char *value, gboolean search_matches) { int match_found = 0; CRM_CHECK(root != NULL, return FALSE); CRM_CHECK(children != NULL, return FALSE); if (tag != NULL && safe_str_neq(tag, crm_element_name(root))) { } else if (value != NULL && safe_str_neq(value, crm_element_value(root, field))) { } else { if (*children == NULL) { *children = create_xml_node(NULL, __FUNCTION__); } add_node_copy(*children, root); match_found = 1; } if (search_matches || match_found == 0) { xmlNode *child = NULL; for (child = __xml_first_child(root); child != NULL; child = __xml_next(child)) { match_found += find_xml_children(children, child, tag, field, value, search_matches); } } return match_found; } gboolean replace_xml_child(xmlNode * parent, xmlNode * child, xmlNode * update, gboolean delete_only) { gboolean can_delete = FALSE; xmlNode *child_of_child = NULL; const char *up_id = NULL; const char *child_id = NULL; const char *right_val = NULL; CRM_CHECK(child != NULL, return FALSE); CRM_CHECK(update != NULL, return FALSE); up_id = ID(update); child_id = ID(child); if (up_id == NULL || (child_id && strcmp(child_id, up_id) == 0)) { can_delete = TRUE; } if (safe_str_neq(crm_element_name(update), crm_element_name(child))) { can_delete = FALSE; } if (can_delete && delete_only) { xmlAttrPtr pIter = NULL; for (pIter = crm_first_attr(update); pIter != NULL; pIter = pIter->next) { const char *p_name = (const char *)pIter->name; const char *p_value = crm_attr_value(pIter); right_val = crm_element_value(child, p_name); if (safe_str_neq(p_value, right_val)) { can_delete = FALSE; } } } if (can_delete && parent != NULL) { crm_log_xml_trace(child, "Delete match found..."); if (delete_only || update == NULL) { free_xml(child); } else { xmlNode *tmp = copy_xml(update); xmlDoc *doc = tmp->doc; xmlNode *old = NULL; xml_accept_changes(tmp); old = xmlReplaceNode(child, tmp); if(xml_tracking_changes(tmp)) { /* Replaced sections may have included relevant ACLs */ __xml_acl_apply(tmp); } xml_calculate_changes(old, tmp); xmlDocSetRootElement(doc, old); free_xml(old); } child = NULL; return TRUE; } else if (can_delete) { crm_log_xml_debug(child, "Cannot delete the search root"); can_delete = FALSE; } child_of_child = __xml_first_child(child); while (child_of_child) { xmlNode *next = __xml_next(child_of_child); can_delete = replace_xml_child(child, child_of_child, update, delete_only); /* only delete the first one */ if (can_delete) { child_of_child = NULL; } else { child_of_child = next; } } return can_delete; } void hash2nvpair(gpointer key, gpointer value, gpointer user_data) { const char *name = key; const char *s_value = value; xmlNode *xml_node = user_data; xmlNode *xml_child = create_xml_node(xml_node, XML_CIB_TAG_NVPAIR); crm_xml_add(xml_child, XML_ATTR_ID, name); crm_xml_add(xml_child, XML_NVPAIR_ATTR_NAME, name); crm_xml_add(xml_child, XML_NVPAIR_ATTR_VALUE, s_value); crm_trace("dumped: name=%s value=%s", name, s_value); } void hash2smartfield(gpointer key, gpointer value, gpointer user_data) { const char *name = key; const char *s_value = value; xmlNode *xml_node = user_data; if (isdigit(name[0])) { xmlNode *tmp = create_xml_node(xml_node, XML_TAG_PARAM); crm_xml_add(tmp, XML_NVPAIR_ATTR_NAME, name); crm_xml_add(tmp, XML_NVPAIR_ATTR_VALUE, s_value); } else if (crm_element_value(xml_node, name) == NULL) { crm_xml_add(xml_node, name, s_value); crm_trace("dumped: %s=%s", name, s_value); } else { crm_trace("duplicate: %s=%s", name, s_value); } } void hash2field(gpointer key, gpointer value, gpointer user_data) { const char *name = key; const char *s_value = value; xmlNode *xml_node = user_data; if (crm_element_value(xml_node, name) == NULL) { crm_xml_add(xml_node, name, s_value); } else { crm_trace("duplicate: %s=%s", name, s_value); } } void hash2metafield(gpointer key, gpointer value, gpointer user_data) { char *crm_name = NULL; if (key == NULL || value == NULL) { return; } else if (((char *)key)[0] == '#') { return; } else if (strstr(key, ":")) { return; } crm_name = crm_meta_name(key); hash2field(crm_name, value, user_data); free(crm_name); } GHashTable * xml2list(xmlNode * parent) { xmlNode *child = NULL; xmlAttrPtr pIter = NULL; xmlNode *nvpair_list = NULL; GHashTable *nvpair_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); CRM_CHECK(parent != NULL, return nvpair_hash); nvpair_list = find_xml_node(parent, XML_TAG_ATTRS, FALSE); if (nvpair_list == NULL) { crm_trace("No attributes in %s", crm_element_name(parent)); crm_log_xml_trace(parent, "No attributes for resource op"); } crm_log_xml_trace(nvpair_list, "Unpacking"); for (pIter = crm_first_attr(nvpair_list); pIter != NULL; pIter = pIter->next) { const char *p_name = (const char *)pIter->name; const char *p_value = crm_attr_value(pIter); crm_trace("Added %s=%s", p_name, p_value); g_hash_table_insert(nvpair_hash, strdup(p_name), strdup(p_value)); } for (child = __xml_first_child(nvpair_list); child != NULL; child = __xml_next(child)) { if (strcmp((const char *)child->name, XML_TAG_PARAM) == 0) { const char *key = crm_element_value(child, XML_NVPAIR_ATTR_NAME); const char *value = crm_element_value(child, XML_NVPAIR_ATTR_VALUE); crm_trace("Added %s=%s", key, value); if (key != NULL && value != NULL) { g_hash_table_insert(nvpair_hash, strdup(key), strdup(value)); } } } return nvpair_hash; } typedef struct name_value_s { const char *name; const void *value; } name_value_t; static gint sort_pairs(gconstpointer a, gconstpointer b) { int rc = 0; const name_value_t *pair_a = a; const name_value_t *pair_b = b; CRM_ASSERT(a != NULL); CRM_ASSERT(pair_a->name != NULL); CRM_ASSERT(b != NULL); CRM_ASSERT(pair_b->name != NULL); rc = strcmp(pair_a->name, pair_b->name); if (rc < 0) { return -1; } else if (rc > 0) { return 1; } return 0; } static void dump_pair(gpointer data, gpointer user_data) { name_value_t *pair = data; xmlNode *parent = user_data; crm_xml_add(parent, pair->name, pair->value); } xmlNode * sorted_xml(xmlNode * input, xmlNode * parent, gboolean recursive) { xmlNode *child = NULL; GListPtr sorted = NULL; GListPtr unsorted = NULL; name_value_t *pair = NULL; xmlNode *result = NULL; const char *name = NULL; xmlAttrPtr pIter = NULL; CRM_CHECK(input != NULL, return NULL); name = crm_element_name(input); CRM_CHECK(name != NULL, return NULL); result = create_xml_node(parent, name); for (pIter = crm_first_attr(input); pIter != NULL; pIter = pIter->next) { const char *p_name = (const char *)pIter->name; const char *p_value = crm_attr_value(pIter); pair = calloc(1, sizeof(name_value_t)); pair->name = p_name; pair->value = p_value; unsorted = g_list_prepend(unsorted, pair); pair = NULL; } sorted = g_list_sort(unsorted, sort_pairs); g_list_foreach(sorted, dump_pair, result); g_list_free_full(sorted, free); for (child = __xml_first_child(input); child != NULL; child = __xml_next(child)) { if (recursive) { sorted_xml(child, result, recursive); } else { add_node_copy(result, child); } } return result; } static gboolean validate_with_dtd(xmlDocPtr doc, gboolean to_logs, const char *dtd_file) { gboolean valid = TRUE; xmlDtdPtr dtd = NULL; xmlValidCtxtPtr cvp = NULL; CRM_CHECK(doc != NULL, return FALSE); CRM_CHECK(dtd_file != NULL, return FALSE); dtd = xmlParseDTD(NULL, (const xmlChar *)dtd_file); if(dtd == NULL) { crm_err("Could not locate/parse DTD: %s", dtd_file); return TRUE; } cvp = xmlNewValidCtxt(); if(cvp) { if (to_logs) { cvp->userData = (void *)LOG_ERR; cvp->error = (xmlValidityErrorFunc) xml_log; cvp->warning = (xmlValidityWarningFunc) xml_log; } else { cvp->userData = (void *)stderr; cvp->error = (xmlValidityErrorFunc) fprintf; cvp->warning = (xmlValidityWarningFunc) fprintf; } if (!xmlValidateDtd(cvp, doc, dtd)) { valid = FALSE; } xmlFreeValidCtxt(cvp); } else { crm_err("Internal error: No valid context"); } xmlFreeDtd(dtd); return valid; } xmlNode * first_named_child(xmlNode * parent, const char *name) { xmlNode *match = NULL; for (match = __xml_first_child(parent); match != NULL; match = __xml_next(match)) { /* * name == NULL gives first child regardless of name; this is * semantically incorrect in this funciton, but may be necessary * due to prior use of xml_child_iter_filter */ if (name == NULL || strcmp((const char *)match->name, name) == 0) { return match; } } return NULL; } #if 0 static void relaxng_invalid_stderr(void *userData, xmlErrorPtr error) { /* Structure xmlError struct _xmlError { int domain : What part of the library raised this er int code : The error code, e.g. an xmlParserError char * message : human-readable informative error messag xmlErrorLevel level : how consequent is the error char * file : the filename int line : the line number if available char * str1 : extra string information char * str2 : extra string information char * str3 : extra string information int int1 : extra number information int int2 : column number of the error or 0 if N/A void * ctxt : the parser context if available void * node : the node in the tree } */ crm_err("Structured error: line=%d, level=%d %s", error->line, error->level, error->message); } #endif static gboolean validate_with_relaxng(xmlDocPtr doc, gboolean to_logs, const char *relaxng_file, relaxng_ctx_cache_t ** cached_ctx) { int rc = 0; gboolean valid = TRUE; relaxng_ctx_cache_t *ctx = NULL; CRM_CHECK(doc != NULL, return FALSE); CRM_CHECK(relaxng_file != NULL, return FALSE); if (cached_ctx && *cached_ctx) { ctx = *cached_ctx; } else { crm_info("Creating RNG parser context"); ctx = calloc(1, sizeof(relaxng_ctx_cache_t)); xmlLoadExtDtdDefaultValue = 1; ctx->parser = xmlRelaxNGNewParserCtxt(relaxng_file); CRM_CHECK(ctx->parser != NULL, goto cleanup); if (to_logs) { xmlRelaxNGSetParserErrors(ctx->parser, (xmlRelaxNGValidityErrorFunc) xml_log, (xmlRelaxNGValidityWarningFunc) xml_log, GUINT_TO_POINTER(LOG_ERR)); } else { xmlRelaxNGSetParserErrors(ctx->parser, (xmlRelaxNGValidityErrorFunc) fprintf, (xmlRelaxNGValidityWarningFunc) fprintf, stderr); } ctx->rng = xmlRelaxNGParse(ctx->parser); CRM_CHECK(ctx->rng != NULL, crm_err("Could not find/parse %s", relaxng_file); goto cleanup); ctx->valid = xmlRelaxNGNewValidCtxt(ctx->rng); CRM_CHECK(ctx->valid != NULL, goto cleanup); if (to_logs) { xmlRelaxNGSetValidErrors(ctx->valid, (xmlRelaxNGValidityErrorFunc) xml_log, (xmlRelaxNGValidityWarningFunc) xml_log, GUINT_TO_POINTER(LOG_ERR)); } else { xmlRelaxNGSetValidErrors(ctx->valid, (xmlRelaxNGValidityErrorFunc) fprintf, (xmlRelaxNGValidityWarningFunc) fprintf, stderr); } } /* xmlRelaxNGSetValidStructuredErrors( */ /* valid, relaxng_invalid_stderr, valid); */ xmlLineNumbersDefault(1); rc = xmlRelaxNGValidateDoc(ctx->valid, doc); if (rc > 0) { valid = FALSE; } else if (rc < 0) { crm_err("Internal libxml error during validation\n"); } cleanup: if (cached_ctx) { *cached_ctx = ctx; } else { if (ctx->parser != NULL) { xmlRelaxNGFreeParserCtxt(ctx->parser); } if (ctx->valid != NULL) { xmlRelaxNGFreeValidCtxt(ctx->valid); } if (ctx->rng != NULL) { xmlRelaxNGFree(ctx->rng); } free(ctx); } return valid; } void crm_xml_init(void) { static bool init = TRUE; if(init) { init = FALSE; /* The default allocator XML_BUFFER_ALLOC_EXACT does far too many * realloc_safe()s and it can take upwards of 18 seconds (yes, seconds) * to dump a 28kb tree which XML_BUFFER_ALLOC_DOUBLEIT can do in * less than 1 second. */ xmlSetBufferAllocationScheme(XML_BUFFER_ALLOC_DOUBLEIT); /* Populate and free the _private field when nodes are created and destroyed */ xmlDeregisterNodeDefault(pcmkDeregisterNode); xmlRegisterNodeDefault(pcmkRegisterNode); __xml_build_schema_list(); } } void crm_xml_cleanup(void) { int lpc = 0; relaxng_ctx_cache_t *ctx = NULL; crm_info("Cleaning up memory from libxml2"); for (; lpc < xml_schema_max; lpc++) { switch (known_schemas[lpc].type) { case 0: /* None */ break; case 1: /* DTD - Not cached */ break; case 2: /* RNG - Cached */ ctx = (relaxng_ctx_cache_t *) known_schemas[lpc].cache; if (ctx == NULL) { break; } if (ctx->parser != NULL) { xmlRelaxNGFreeParserCtxt(ctx->parser); } if (ctx->valid != NULL) { xmlRelaxNGFreeValidCtxt(ctx->valid); } if (ctx->rng != NULL) { xmlRelaxNGFree(ctx->rng); } free(ctx); known_schemas[lpc].cache = NULL; break; default: break; } free(known_schemas[lpc].name); free(known_schemas[lpc].location); free(known_schemas[lpc].transform); } free(known_schemas); xsltCleanupGlobals(); xmlCleanupParser(); } static gboolean validate_with(xmlNode * xml, int method, gboolean to_logs) { xmlDocPtr doc = NULL; gboolean valid = FALSE; int type = 0; char *file = NULL; if(method < 0) { return FALSE; } type = known_schemas[method].type; if(type == 0) { return TRUE; } CRM_CHECK(xml != NULL, return FALSE); doc = getDocPtr(xml); file = get_schema_path(known_schemas[method].name, known_schemas[method].location); crm_trace("Validating with: %s (type=%d)", crm_str(file), type); switch (type) { case 1: valid = validate_with_dtd(doc, to_logs, file); break; case 2: valid = validate_with_relaxng(doc, to_logs, file, (relaxng_ctx_cache_t **) & (known_schemas[method].cache)); break; default: crm_err("Unknown validator type: %d", type); break; } free(file); return valid; } #include static void dump_file(const char *filename) { FILE *fp = NULL; int ch, line = 0; CRM_CHECK(filename != NULL, return); fp = fopen(filename, "r"); CRM_CHECK(fp != NULL, return); fprintf(stderr, "%4d ", ++line); do { ch = getc(fp); if (ch == EOF) { putc('\n', stderr); break; } else if (ch == '\n') { fprintf(stderr, "\n%4d ", ++line); } else { putc(ch, stderr); } } while (1); fclose(fp); } gboolean validate_xml_verbose(xmlNode * xml_blob) { int fd = 0; xmlDoc *doc = NULL; xmlNode *xml = NULL; gboolean rc = FALSE; char *filename = strdup(CRM_STATE_DIR "/cib-invalid.XXXXXX"); umask(S_IWGRP | S_IWOTH | S_IROTH); fd = mkstemp(filename); write_xml_fd(xml_blob, filename, fd, FALSE); dump_file(filename); doc = xmlParseFile(filename); xml = xmlDocGetRootElement(doc); rc = validate_xml(xml, NULL, FALSE); free_xml(xml); unlink(filename); free(filename); return rc; } gboolean validate_xml(xmlNode * xml_blob, const char *validation, gboolean to_logs) { int version = 0; if (validation == NULL) { validation = crm_element_value(xml_blob, XML_ATTR_VALIDATION); } if (validation == NULL) { int lpc = 0; bool valid = FALSE; validation = crm_element_value(xml_blob, "ignore-dtd"); if (crm_is_true(validation)) { /* Legacy compatibilty */ crm_xml_add(xml_blob, XML_ATTR_VALIDATION, "none"); return TRUE; } /* Work it out */ for (lpc = 0; lpc < xml_schema_max; lpc++) { if(validate_with(xml_blob, lpc, FALSE)) { valid = TRUE; crm_xml_add(xml_blob, XML_ATTR_VALIDATION, known_schemas[lpc].name); crm_info("XML validated against %s", known_schemas[lpc].name); if(known_schemas[lpc].after_transform == 0) { break; } } } return valid; } version = get_schema_version(validation); if (strcmp(validation, "none") == 0) { return TRUE; } else if(version < xml_schema_max) { return validate_with(xml_blob, version, to_logs); } crm_err("Unknown validator: %s", validation); return FALSE; } #if HAVE_LIBXSLT static xmlNode * apply_transformation(xmlNode * xml, const char *transform) { char *xform = NULL; xmlNode *out = NULL; xmlDocPtr res = NULL; xmlDocPtr doc = NULL; xsltStylesheet *xslt = NULL; CRM_CHECK(xml != NULL, return FALSE); doc = getDocPtr(xml); xform = get_schema_path(NULL, transform); xmlLoadExtDtdDefaultValue = 1; xmlSubstituteEntitiesDefault(1); xslt = xsltParseStylesheetFile((const xmlChar *)xform); CRM_CHECK(xslt != NULL, goto cleanup); res = xsltApplyStylesheet(xslt, doc, NULL); CRM_CHECK(res != NULL, goto cleanup); out = xmlDocGetRootElement(res); cleanup: if (xslt) { xsltFreeStylesheet(xslt); } free(xform); return out; } #endif const char * get_schema_name(int version) { if (version < 0 || version >= xml_schema_max) { return "unknown"; } return known_schemas[version].name; } int get_schema_version(const char *name) { int lpc = 0; if(name == NULL) { name = "none"; } for (; lpc < xml_schema_max; lpc++) { if (safe_str_eq(name, known_schemas[lpc].name)) { return lpc; } } return -1; } /* set which validation to use */ #include int update_validation(xmlNode ** xml_blob, int *best, int max, gboolean transform, gboolean to_logs) { xmlNode *xml = NULL; char *value = NULL; int max_stable_schemas = xml_latest_schema_index(); int lpc = 0, match = -1, rc = pcmk_ok; CRM_CHECK(best != NULL, return -EINVAL); CRM_CHECK(xml_blob != NULL, return -EINVAL); CRM_CHECK(*xml_blob != NULL, return -EINVAL); *best = 0; xml = *xml_blob; value = crm_element_value_copy(xml, XML_ATTR_VALIDATION); if (value != NULL) { match = get_schema_version(value); lpc = match; if (lpc >= 0 && transform == FALSE) { lpc++; } else if (lpc < 0) { crm_debug("Unknown validation type"); lpc = 0; } } if (match >= max_stable_schemas) { /* nothing to do */ free(value); *best = match; return pcmk_ok; } while(lpc <= max_stable_schemas) { gboolean valid = TRUE; crm_debug("Testing '%s' validation (%d of %d)", known_schemas[lpc].name ? known_schemas[lpc].name : "", lpc, max_stable_schemas); valid = validate_with(xml, lpc, to_logs); if (valid) { *best = lpc; } else { crm_trace("%s validation failed", known_schemas[lpc].name ? known_schemas[lpc].name : ""); } if (valid && transform) { xmlNode *upgrade = NULL; int next = known_schemas[lpc].after_transform; if (next < 0) { crm_trace("Stopping at %s", known_schemas[lpc].name); break; } else if (max > 0 && lpc == max) { crm_trace("Upgrade limit reached at %s (lpc=%d, next=%d, max=%d)", known_schemas[lpc].name, lpc, next, max); break; } else if (max > 0 && next > max) { crm_debug("Upgrade limit reached at %s (lpc=%d, next=%d, max=%d)", known_schemas[lpc].name, lpc, next, max); break; } else if (known_schemas[lpc].transform == NULL) { crm_notice("%s-style configuration is also valid for %s", known_schemas[lpc].name, known_schemas[next].name); if (validate_with(xml, next, to_logs)) { crm_debug("Configuration valid for schema: %s", known_schemas[next].name); lpc = next; *best = next; rc = pcmk_ok; } else { crm_info("Configuration not valid for schema: %s", known_schemas[next].name); } } else { crm_debug("Upgrading %s-style configuration to %s with %s", known_schemas[lpc].name, known_schemas[next].name, known_schemas[lpc].transform ? known_schemas[lpc].transform : "no-op"); #if HAVE_LIBXSLT upgrade = apply_transformation(xml, known_schemas[lpc].transform); #endif if (upgrade == NULL) { crm_err("Transformation %s failed", known_schemas[lpc].transform); rc = -pcmk_err_transform_failed; } else if (validate_with(upgrade, next, to_logs)) { crm_info("Transformation %s successful", known_schemas[lpc].transform); lpc = next; *best = next; free_xml(xml); xml = upgrade; rc = pcmk_ok; } else { crm_err("Transformation %s did not produce a valid configuration", known_schemas[lpc].transform); crm_log_xml_info(upgrade, "transform:bad"); free_xml(upgrade); rc = -pcmk_err_schema_validation; } } } } if (*best > match) { crm_info("%s the configuration from %s to %s", transform?"Transformed":"Upgraded", value ? value : "", known_schemas[*best].name); crm_xml_add(xml, XML_ATTR_VALIDATION, known_schemas[*best].name); } *xml_blob = xml; free(value); return rc; } /* * From xpath2.c * * All the elements returned by an XPath query are pointers to * elements from the tree *except* namespace nodes where the XPath * semantic is different from the implementation in libxml2 tree. * As a result when a returned node set is freed when * xmlXPathFreeObject() is called, that routine must check the * element type. But node from the returned set may have been removed * by xmlNodeSetContent() resulting in access to freed data. * * This can be exercised by running * valgrind xpath2 test3.xml '//discarded' discarded * * There is 2 ways around it: * - make a copy of the pointers to the nodes from the result set * then call xmlXPathFreeObject() and then modify the nodes * or * - remove the references from the node set, if they are not namespace nodes, before calling xmlXPathFreeObject(). */ void freeXpathObject(xmlXPathObjectPtr xpathObj) { int lpc, max = numXpathResults(xpathObj); if(xpathObj == NULL) { return; } for(lpc = 0; lpc < max; lpc++) { if (xpathObj->nodesetval->nodeTab[lpc] && xpathObj->nodesetval->nodeTab[lpc]->type != XML_NAMESPACE_DECL) { xpathObj->nodesetval->nodeTab[lpc] = NULL; } } /* _Now_ its safe to free it */ xmlXPathFreeObject(xpathObj); } xmlNode * getXpathResult(xmlXPathObjectPtr xpathObj, int index) { xmlNode *match = NULL; int max = numXpathResults(xpathObj); CRM_CHECK(index >= 0, return NULL); CRM_CHECK(xpathObj != NULL, return NULL); if (index >= max) { crm_err("Requested index %d of only %d items", index, max); return NULL; } else if(xpathObj->nodesetval->nodeTab[index] == NULL) { /* Previously requested */ return NULL; } match = xpathObj->nodesetval->nodeTab[index]; CRM_CHECK(match != NULL, return NULL); if (xpathObj->nodesetval->nodeTab[index]->type != XML_NAMESPACE_DECL) { /* See the comment for freeXpathObject() */ xpathObj->nodesetval->nodeTab[index] = NULL; } if (match->type == XML_DOCUMENT_NODE) { /* Will happen if section = '/' */ match = match->children; } else if (match->type != XML_ELEMENT_NODE && match->parent && match->parent->type == XML_ELEMENT_NODE) { /* reurning the parent instead */ match = match->parent; } else if (match->type != XML_ELEMENT_NODE) { /* We only support searching nodes */ crm_err("We only support %d not %d", XML_ELEMENT_NODE, match->type); match = NULL; } return match; } void dedupXpathResults(xmlXPathObjectPtr xpathObj) { int lpc, max = numXpathResults(xpathObj); if (xpathObj == NULL) { return; } for (lpc = 0; lpc < max; lpc++) { xmlNode *xml = NULL; gboolean dedup = FALSE; if (xpathObj->nodesetval->nodeTab[lpc] == NULL) { continue; } xml = xpathObj->nodesetval->nodeTab[lpc]->parent; for (; xml; xml = xml->parent) { int lpc2 = 0; for (lpc2 = 0; lpc2 < max; lpc2++) { if (xpathObj->nodesetval->nodeTab[lpc2] == xml) { xpathObj->nodesetval->nodeTab[lpc] = NULL; dedup = TRUE; break; } } if (dedup) { break; } } } } /* the caller needs to check if the result contains a xmlDocPtr or xmlNodePtr */ xmlXPathObjectPtr xpath_search(xmlNode * xml_top, const char *path) { xmlDocPtr doc = NULL; xmlXPathObjectPtr xpathObj = NULL; xmlXPathContextPtr xpathCtx = NULL; const xmlChar *xpathExpr = (const xmlChar *)path; CRM_CHECK(path != NULL, return NULL); CRM_CHECK(xml_top != NULL, return NULL); CRM_CHECK(strlen(path) > 0, return NULL); doc = getDocPtr(xml_top); xpathCtx = xmlXPathNewContext(doc); CRM_ASSERT(xpathCtx != NULL); xpathObj = xmlXPathEvalExpression(xpathExpr, xpathCtx); xmlXPathFreeContext(xpathCtx); return xpathObj; } gboolean cli_config_update(xmlNode ** xml, int *best_version, gboolean to_logs) { gboolean rc = TRUE; const char *value = crm_element_value(*xml, XML_ATTR_VALIDATION); int version = get_schema_version(value); int min_version = xml_minimum_schema_index(); if (version < min_version) { xmlNode *converted = NULL; converted = copy_xml(*xml); update_validation(&converted, &version, 0, TRUE, to_logs); value = crm_element_value(converted, XML_ATTR_VALIDATION); if (version < min_version) { if (to_logs) { crm_config_err("Your current configuration could only be upgraded to %s... " "the minimum requirement is %s.\n", crm_str(value), get_schema_name(min_version)); } else { fprintf(stderr, "Your current configuration could only be upgraded to %s... " "the minimum requirement is %s.\n", crm_str(value), get_schema_name(min_version)); } free_xml(converted); converted = NULL; rc = FALSE; } else { free_xml(*xml); *xml = converted; if (version < xml_latest_schema_index()) { crm_config_warn("Your configuration was internally updated to %s... " "which is acceptable but not the most recent", get_schema_name(version)); } else if (to_logs) { crm_info("Your configuration was internally updated to the latest version (%s)", get_schema_name(version)); } } } else if (version >= get_schema_version("none")) { if (to_logs) { crm_config_warn("Configuration validation is currently disabled." " It is highly encouraged and prevents many common cluster issues."); } else { fprintf(stderr, "Configuration validation is currently disabled." " It is highly encouraged and prevents many common cluster issues.\n"); } } if (best_version) { *best_version = version; } return rc; } xmlNode * expand_idref(xmlNode * input, xmlNode * top) { const char *tag = NULL; const char *ref = NULL; xmlNode *result = input; char *xpath_string = NULL; if (result == NULL) { return NULL; } else if (top == NULL) { top = input; } tag = crm_element_name(result); ref = crm_element_value(result, XML_ATTR_IDREF); if (ref != NULL) { int xpath_max = 512, offset = 0; xpath_string = calloc(1, xpath_max); offset += snprintf(xpath_string + offset, xpath_max - offset, "//%s[@id='%s']", tag, ref); CRM_LOG_ASSERT(offset > 0); result = get_xpath_object(xpath_string, top, LOG_ERR); if (result == NULL) { char *nodePath = (char *)xmlGetNodePath(top); crm_err("No match for %s found in %s: Invalid configuration", xpath_string, crm_str(nodePath)); free(nodePath); } } free(xpath_string); return result; } xmlNode * get_xpath_object_relative(const char *xpath, xmlNode * xml_obj, int error_level) { int len = 0; xmlNode *result = NULL; char *xpath_full = NULL; char *xpath_prefix = NULL; if (xml_obj == NULL || xpath == NULL) { return NULL; } xpath_prefix = (char *)xmlGetNodePath(xml_obj); len += strlen(xpath_prefix); len += strlen(xpath); xpath_full = strdup(xpath_prefix); xpath_full = realloc_safe(xpath_full, len + 1); strncat(xpath_full, xpath, len); result = get_xpath_object(xpath_full, xml_obj, error_level); free(xpath_prefix); free(xpath_full); return result; } xmlNode * get_xpath_object(const char *xpath, xmlNode * xml_obj, int error_level) { int max; xmlNode *result = NULL; xmlXPathObjectPtr xpathObj = NULL; char *nodePath = NULL; char *matchNodePath = NULL; if (xpath == NULL) { return xml_obj; /* or return NULL? */ } xpathObj = xpath_search(xml_obj, xpath); nodePath = (char *)xmlGetNodePath(xml_obj); max = numXpathResults(xpathObj); if (max < 1) { do_crm_log(error_level, "No match for %s in %s", xpath, crm_str(nodePath)); crm_log_xml_explicit(xml_obj, "Unexpected Input"); } else if (max > 1) { int lpc = 0; do_crm_log(error_level, "Too many matches for %s in %s", xpath, crm_str(nodePath)); for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); CRM_LOG_ASSERT(match != NULL); if(match != NULL) { matchNodePath = (char *)xmlGetNodePath(match); do_crm_log(error_level, "%s[%d] = %s", xpath, lpc, crm_str(matchNodePath)); free(matchNodePath); } } crm_log_xml_explicit(xml_obj, "Bad Input"); } else { result = getXpathResult(xpathObj, 0); } freeXpathObject(xpathObj); free(nodePath); return result; } const char * crm_element_value(xmlNode * data, const char *name) { xmlAttr *attr = NULL; if (data == NULL) { crm_err("Couldn't find %s in NULL", name ? name : ""); CRM_LOG_ASSERT(data != NULL); return NULL; } else if (name == NULL) { crm_err("Couldn't find NULL in %s", crm_element_name(data)); return NULL; } attr = xmlHasProp(data, (const xmlChar *)name); if (attr == NULL || attr->children == NULL) { return NULL; } return (const char *)attr->children->content; } diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c index 80f0064374..6ac4074182 100644 --- a/lib/fencing/st_client.c +++ b/lib/fencing/st_client.c @@ -1,2517 +1,2522 @@ /* * Copyright (c) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #include #include #include #include #include #include #include #include #include #include #include #include #include /* Add it for compiling on OSX */ #include #include #include #include #include #ifdef HAVE_STONITH_STONITH_H # include # define LHA_STONITH_LIBRARY "libstonith.so.1" static void *lha_agents_lib = NULL; #endif #include CRM_TRACE_INIT_DATA(stonith); struct stonith_action_s { /*! user defined data */ char *agent; char *action; char *victim; char *args; int timeout; int async; void *userdata; void (*done_cb) (GPid pid, gint status, const char *output, gpointer user_data); /*! internal async track data */ int fd_stdout; int last_timeout_signo; /*! internal timing information */ time_t initial_start_time; int tries; int remaining_timeout; guint timer_sigterm; guint timer_sigkill; int max_retries; /* device output data */ GPid pid; int rc; char *output; }; typedef struct stonith_private_s { char *token; crm_ipc_t *ipc; mainloop_io_t *source; GHashTable *stonith_op_callback_table; GList *notify_list; void (*op_callback) (stonith_t * st, stonith_callback_data_t * data); } stonith_private_t; typedef struct stonith_notify_client_s { const char *event; const char *obj_id; /* implement one day */ const char *obj_type; /* implement one day */ void (*notify) (stonith_t * st, stonith_event_t * e); } stonith_notify_client_t; typedef struct stonith_callback_client_s { void (*callback) (stonith_t * st, stonith_callback_data_t * data); const char *id; void *user_data; gboolean only_success; gboolean allow_timeout_updates; struct timer_rec_s *timer; } stonith_callback_client_t; struct notify_blob_s { stonith_t *stonith; xmlNode *xml; }; struct timer_rec_s { int call_id; int timeout; guint ref; stonith_t *stonith; }; typedef int (*stonith_op_t) (const char *, int, const char *, xmlNode *, xmlNode *, xmlNode *, xmlNode **, xmlNode **); static const char META_TEMPLATE[] = "\n" "\n" "\n" " 1.0\n" " \n" "%s\n" " \n" " %s\n" "%s\n" " \n" " \n" " \n" " \n" " \n" " \n" " \n" " \n" " 2.0\n" " \n" "\n"; bool stonith_dispatch(stonith_t * st); int stonith_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata); void stonith_perform_callback(stonith_t * stonith, xmlNode * msg, int call_id, int rc); xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data, int call_options); int stonith_send_command(stonith_t * stonith, const char *op, xmlNode * data, xmlNode ** output_data, int call_options, int timeout); static void stonith_connection_destroy(gpointer user_data); static void stonith_send_notification(gpointer data, gpointer user_data); static int internal_stonith_action_execute(stonith_action_t * action); static void stonith_connection_destroy(gpointer user_data) { stonith_t *stonith = user_data; stonith_private_t *native = NULL; struct notify_blob_s blob; crm_trace("Sending destroyed notification"); blob.stonith = stonith; blob.xml = create_xml_node(NULL, "notify"); native = stonith->private; native->ipc = NULL; native->source = NULL; stonith->state = stonith_disconnected; crm_xml_add(blob.xml, F_TYPE, T_STONITH_NOTIFY); crm_xml_add(blob.xml, F_SUBTYPE, T_STONITH_NOTIFY_DISCONNECT); g_list_foreach(native->notify_list, stonith_send_notification, &blob); free_xml(blob.xml); } xmlNode * create_device_registration_xml(const char *id, const char *namespace, const char *agent, stonith_key_value_t * params, const char *rsc_provides) { xmlNode *data = create_xml_node(NULL, F_STONITH_DEVICE); xmlNode *args = create_xml_node(data, XML_TAG_ATTRS); #if HAVE_STONITH_STONITH_H namespace = get_stonith_provider(agent, namespace); if (safe_str_eq(namespace, "heartbeat")) { hash2field((gpointer) "plugin", (gpointer) agent, args); agent = "fence_legacy"; } #endif crm_xml_add(data, XML_ATTR_ID, id); crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__); crm_xml_add(data, "agent", agent); crm_xml_add(data, "namespace", namespace); if (rsc_provides) { crm_xml_add(data, "rsc_provides", rsc_provides); } for (; params; params = params->next) { hash2field((gpointer) params->key, (gpointer) params->value, args); } return data; } static int stonith_api_register_device(stonith_t * st, int call_options, const char *id, const char *namespace, const char *agent, stonith_key_value_t * params) { int rc = 0; xmlNode *data = NULL; data = create_device_registration_xml(id, namespace, agent, params, NULL); rc = stonith_send_command(st, STONITH_OP_DEVICE_ADD, data, NULL, call_options, 0); free_xml(data); return rc; } static int stonith_api_remove_device(stonith_t * st, int call_options, const char *name) { int rc = 0; xmlNode *data = NULL; data = create_xml_node(NULL, F_STONITH_DEVICE); crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__); crm_xml_add(data, XML_ATTR_ID, name); rc = stonith_send_command(st, STONITH_OP_DEVICE_DEL, data, NULL, call_options, 0); free_xml(data); return rc; } static int stonith_api_remove_level(stonith_t * st, int options, const char *node, int level) { int rc = 0; xmlNode *data = NULL; data = create_xml_node(NULL, F_STONITH_LEVEL); crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__); crm_xml_add(data, F_STONITH_TARGET, node); crm_xml_add_int(data, XML_ATTR_ID, level); rc = stonith_send_command(st, STONITH_OP_LEVEL_DEL, data, NULL, options, 0); free_xml(data); return rc; } xmlNode * create_level_registration_xml(const char *node, int level, stonith_key_value_t * device_list) { xmlNode *data = create_xml_node(NULL, F_STONITH_LEVEL); crm_xml_add_int(data, XML_ATTR_ID, level); crm_xml_add(data, F_STONITH_TARGET, node); crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__); for (; device_list; device_list = device_list->next) { xmlNode *dev = create_xml_node(data, F_STONITH_DEVICE); crm_xml_add(dev, XML_ATTR_ID, device_list->value); } return data; } static int stonith_api_register_level(stonith_t * st, int options, const char *node, int level, stonith_key_value_t * device_list) { int rc = 0; xmlNode *data = create_level_registration_xml(node, level, device_list); rc = stonith_send_command(st, STONITH_OP_LEVEL_ADD, data, NULL, options, 0); free_xml(data); return rc; } static void append_arg(gpointer key, gpointer value, gpointer user_data) { int len = 3; /* =, \n, \0 */ int last = 0; char **args = user_data; CRM_CHECK(key != NULL, return); CRM_CHECK(value != NULL, return); if (strstr(key, "pcmk_")) { return; } else if (strstr(key, CRM_META)) { return; } else if (safe_str_eq(key, "crm_feature_set")) { return; } len += strlen(key); len += strlen(value); if (*args != NULL) { last = strlen(*args); } *args = realloc_safe(*args, last + len); crm_trace("Appending: %s=%s", (char *)key, (char *)value); sprintf((*args) + last, "%s=%s\n", (char *)key, (char *)value); } static void append_const_arg(const char *key, const char *value, char **arg_list) { CRM_LOG_ASSERT(key && value); if(key && value) { char *glib_sucks_key = strdup(key); char *glib_sucks_value = strdup(value); append_arg(glib_sucks_key, glib_sucks_value, arg_list); free(glib_sucks_value); free(glib_sucks_key); } } static void append_host_specific_args(const char *victim, const char *map, GHashTable * params, char **arg_list) { char *name = NULL; int last = 0, lpc = 0, max = 0; if (map == NULL) { /* The best default there is for now... */ crm_debug("Using default arg map: port=uname"); append_const_arg("port", victim, arg_list); return; } max = strlen(map); crm_debug("Processing arg map: %s", map); for (; lpc < max + 1; lpc++) { if (isalpha(map[lpc])) { /* keep going */ } else if (map[lpc] == '=' || map[lpc] == ':') { free(name); name = calloc(1, 1 + lpc - last); memcpy(name, map + last, lpc - last); crm_debug("Got name: %s", name); last = lpc + 1; } else if (map[lpc] == 0 || map[lpc] == ',' || isspace(map[lpc])) { char *param = NULL; const char *value = NULL; param = calloc(1, 1 + lpc - last); memcpy(param, map + last, lpc - last); last = lpc + 1; crm_debug("Got key: %s", param); if (name == NULL) { crm_err("Misparsed '%s', found '%s' without a name", map, param); free(param); continue; } if (safe_str_eq(param, "uname")) { value = victim; } else { char *key = crm_meta_name(param); value = g_hash_table_lookup(params, key); free(key); } if (value) { crm_debug("Setting '%s'='%s' (%s) for %s", name, value, param, victim); append_const_arg(name, value, arg_list); } else { crm_err("No node attribute '%s' for '%s'", name, victim); } free(name); name = NULL; free(param); if (map[lpc] == 0) { break; } } else if (isspace(map[lpc])) { last = lpc; } } free(name); } static char * make_args(const char *agent, const char *action, const char *victim, uint32_t victim_nodeid, GHashTable * device_args, GHashTable * port_map) { char buffer[512]; char *arg_list = NULL; const char *value = NULL; const char *_action = action; CRM_CHECK(action != NULL, return NULL); buffer[511] = 0; snprintf(buffer, 511, "pcmk_%s_action", action); if (device_args) { value = g_hash_table_lookup(device_args, buffer); } if (value == NULL && device_args) { /* Legacy support for early 1.1 releases - Remove for 1.4 */ snprintf(buffer, 511, "pcmk_%s_cmd", action); value = g_hash_table_lookup(device_args, buffer); } if (value == NULL && device_args && safe_str_eq(action, "off")) { /* Legacy support for late 1.1 releases - Remove for 1.4 */ value = g_hash_table_lookup(device_args, "pcmk_poweroff_action"); } if (value) { crm_info("Substituting action '%s' for requested operation '%s'", value, action); action = value; } append_const_arg(STONITH_ATTR_ACTION_OP, action, &arg_list); if (victim && device_args) { const char *alias = victim; const char *param = g_hash_table_lookup(device_args, STONITH_ATTR_HOSTARG); if (port_map && g_hash_table_lookup(port_map, victim)) { alias = g_hash_table_lookup(port_map, victim); } /* Always supply the node's name too: * https://fedorahosted.org/cluster/wiki/FenceAgentAPI */ append_const_arg("nodename", victim, &arg_list); if (victim_nodeid) { char nodeid_str[33] = { 0, }; if (snprintf(nodeid_str, 33, "%u", (unsigned int)victim_nodeid)) { crm_info("For stonith action (%s) for victim %s, adding nodeid (%d) to parameters", action, victim, nodeid_str); append_const_arg("nodeid", nodeid_str, &arg_list); } } /* Check if we need to supply the victim in any other form */ if(safe_str_eq(agent, "fence_legacy")) { value = agent; } else if (param == NULL) { const char *map = g_hash_table_lookup(device_args, STONITH_ATTR_ARGMAP); if (map == NULL) { param = "port"; value = g_hash_table_lookup(device_args, param); } else { /* Legacy handling */ append_host_specific_args(alias, map, device_args, &arg_list); value = map; /* Nothing more to do */ } } else if (safe_str_eq(param, "none")) { value = param; /* Nothing more to do */ } else { value = g_hash_table_lookup(device_args, param); } /* Don't overwrite explictly set values for $param */ if (value == NULL || safe_str_eq(value, "dynamic")) { crm_debug("Performing %s action for node '%s' as '%s=%s'", action, victim, param, alias); append_const_arg(param, alias, &arg_list); } } if (device_args) { g_hash_table_foreach(device_args, append_arg, &arg_list); } if(device_args && g_hash_table_lookup(device_args, STONITH_ATTR_ACTION_OP)) { if(safe_str_eq(_action,"list") || safe_str_eq(_action,"status") || safe_str_eq(_action,"monitor") || safe_str_eq(_action,"metadata")) { /* Force use of the calculated command for support ops * We don't want list or monitor ops initiating fencing, regardless of what the admin configured */ append_const_arg(STONITH_ATTR_ACTION_OP, action, &arg_list); } } return arg_list; } static gboolean st_child_term(gpointer data) { int rc = 0; stonith_action_t *track = data; crm_info("Child %d timed out, sending SIGTERM", track->pid); track->timer_sigterm = 0; track->last_timeout_signo = SIGTERM; rc = kill(-track->pid, SIGTERM); if (rc < 0) { crm_perror(LOG_ERR, "Couldn't send SIGTERM to %d", track->pid); } return FALSE; } static gboolean st_child_kill(gpointer data) { int rc = 0; stonith_action_t *track = data; crm_info("Child %d timed out, sending SIGKILL", track->pid); track->timer_sigkill = 0; track->last_timeout_signo = SIGKILL; rc = kill(-track->pid, SIGKILL); if (rc < 0) { crm_perror(LOG_ERR, "Couldn't send SIGKILL to %d", track->pid); } return FALSE; } static void stonith_action_clear_tracking_data(stonith_action_t * action) { if (action->timer_sigterm > 0) { g_source_remove(action->timer_sigterm); action->timer_sigterm = 0; } if (action->timer_sigkill > 0) { g_source_remove(action->timer_sigkill); action->timer_sigkill = 0; } if (action->fd_stdout) { close(action->fd_stdout); action->fd_stdout = 0; } free(action->output); action->output = NULL; action->rc = 0; action->pid = 0; action->last_timeout_signo = 0; } static void stonith_action_destroy(stonith_action_t * action) { stonith_action_clear_tracking_data(action); free(action->agent); free(action->args); free(action->action); free(action->victim); free(action); } #define FAILURE_MAX_RETRIES 2 stonith_action_t * stonith_action_create(const char *agent, const char *_action, const char *victim, uint32_t victim_nodeid, int timeout, GHashTable * device_args, GHashTable * port_map) { stonith_action_t *action; action = calloc(1, sizeof(stonith_action_t)); crm_debug("Initiating action %s for agent %s (target=%s)", _action, agent, victim); action->args = make_args(agent, _action, victim, victim_nodeid, device_args, port_map); action->agent = strdup(agent); action->action = strdup(_action); if (victim) { action->victim = strdup(victim); } action->timeout = action->remaining_timeout = timeout; action->max_retries = FAILURE_MAX_RETRIES; if (device_args) { char buffer[512]; const char *value = NULL; snprintf(buffer, 511, "pcmk_%s_retries", _action); value = g_hash_table_lookup(device_args, buffer); if (value) { action->max_retries = atoi(value); } } return action; } #define READ_MAX 500 static char * read_output(int fd) { char buffer[READ_MAX]; char *output = NULL; int len = 0; int more = 0; if (!fd) { return NULL; } do { errno = 0; memset(&buffer, 0, READ_MAX); more = read(fd, buffer, READ_MAX - 1); if (more > 0) { buffer[more] = 0; /* Make sure its nul-terminated for logging * 'more' is always less than our buffer size */ crm_trace("Got %d more bytes: %.200s...", more, buffer); output = realloc_safe(output, len + more + 1); snprintf(output + len, more + 1, "%s", buffer); len += more; } } while (more == (READ_MAX - 1) || (more < 0 && errno == EINTR)); return output; } static gboolean update_remaining_timeout(stonith_action_t * action) { int diff = time(NULL) - action->initial_start_time; if (action->tries >= action->max_retries) { crm_info("Attempted to execute agent %s (%s) the maximum number of times (%d) allowed", action->agent, action->action, action->max_retries); action->remaining_timeout = 0; } else if ((action->rc != -ETIME) && diff < (action->timeout * 0.7)) { /* only set remaining timeout period if there is 30% * or greater of the original timeout period left */ action->remaining_timeout = action->timeout - diff; } else { action->remaining_timeout = 0; } return action->remaining_timeout ? TRUE : FALSE; } static void stonith_action_async_done(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) { stonith_action_t *action = mainloop_child_userdata(p); if (action->timer_sigterm > 0) { g_source_remove(action->timer_sigterm); action->timer_sigterm = 0; } if (action->timer_sigkill > 0) { g_source_remove(action->timer_sigkill); action->timer_sigkill = 0; } if (action->last_timeout_signo) { action->rc = -ETIME; crm_notice("Child process %d performing action '%s' timed out with signal %d", pid, action->action, action->last_timeout_signo); } else if (signo) { action->rc = -ECONNABORTED; crm_notice("Child process %d performing action '%s' timed out with signal %d", pid, action->action, signo); } else { action->rc = exitcode; crm_debug("Child process %d performing action '%s' exited with rc %d", pid, action->action, exitcode); } action->output = read_output(action->fd_stdout); if (action->rc != pcmk_ok && update_remaining_timeout(action)) { int rc = internal_stonith_action_execute(action); if (rc == pcmk_ok) { return; } } if (action->done_cb) { action->done_cb(pid, action->rc, action->output, action->userdata); } stonith_action_destroy(action); } static int internal_stonith_action_execute(stonith_action_t * action) { int pid, status, len, rc = -EPROTO; int ret; int total = 0; int p_read_fd, p_write_fd; /* parent read/write file descriptors */ int c_read_fd, c_write_fd; /* child read/write file descriptors */ int fd1[2]; int fd2[2]; int is_retry = 0; /* clear any previous tracking data */ stonith_action_clear_tracking_data(action); if (!action->tries) { action->initial_start_time = time(NULL); } action->tries++; if (action->tries > 1) { crm_info("Attempt %d to execute %s (%s). remaining timeout is %d", action->tries, action->agent, action->action, action->remaining_timeout); is_retry = 1; } c_read_fd = c_write_fd = p_read_fd = p_write_fd = -1; if (action->args == NULL || action->agent == NULL) goto fail; len = strlen(action->args); if (pipe(fd1)) goto fail; p_read_fd = fd1[0]; c_write_fd = fd1[1]; if (pipe(fd2)) goto fail; c_read_fd = fd2[0]; p_write_fd = fd2[1]; crm_debug("forking"); pid = fork(); if (pid < 0) { rc = -ECHILD; goto fail; } if (!pid) { /* child */ setpgid(0, 0); close(1); /* coverity[leaked_handle] False positive */ if (dup(c_write_fd) < 0) goto fail; close(2); /* coverity[leaked_handle] False positive */ if (dup(c_write_fd) < 0) goto fail; close(0); /* coverity[leaked_handle] False positive */ if (dup(c_read_fd) < 0) goto fail; /* keep c_write_fd open so parent can report all errors. */ close(c_read_fd); close(p_read_fd); close(p_write_fd); /* keep retries from executing out of control */ if (is_retry) { sleep(1); } execlp(action->agent, action->agent, NULL); exit(EXIT_FAILURE); } /* parent */ action->pid = pid; ret = fcntl(p_read_fd, F_SETFL, fcntl(p_read_fd, F_GETFL, 0) | O_NONBLOCK); if (ret < 0) { crm_perror(LOG_NOTICE, "Could not change the output of %s to be non-blocking", action->agent); } do { crm_debug("sending args"); ret = write(p_write_fd, action->args + total, len - total); if (ret > 0) { total += ret; } } while (errno == EINTR && total < len); if (total != len) { crm_perror(LOG_ERR, "Sent %d not %d bytes", total, len); if (ret >= 0) { rc = -ECOMM; } goto fail; } close(p_write_fd); p_write_fd = -1; /* async */ if (action->async) { action->fd_stdout = p_read_fd; mainloop_child_add(pid, 0/* Move the timeout here? */, action->action, action, stonith_action_async_done); crm_trace("Op: %s on %s, pid: %d, timeout: %ds", action->action, action->agent, pid, action->remaining_timeout); action->last_timeout_signo = 0; if (action->remaining_timeout) { action->timer_sigterm = g_timeout_add(1000 * action->remaining_timeout, st_child_term, action); action->timer_sigkill = g_timeout_add(1000 * (action->remaining_timeout + 5), st_child_kill, action); } else { crm_err("No timeout set for stonith operation %s with device %s", action->action, action->agent); } close(c_write_fd); close(c_read_fd); return 0; } else { /* sync */ int timeout = action->remaining_timeout + 1; pid_t p = 0; while (action->remaining_timeout < 0 || timeout > 0) { p = waitpid(pid, &status, WNOHANG); if (p > 0) { break; } sleep(1); timeout--; } if (timeout == 0) { int killrc = kill(-pid, SIGKILL); if (killrc && errno != ESRCH) { crm_err("kill(%d, KILL) failed: %s (%d)", pid, pcmk_strerror(errno), errno); } /* * From sigprocmask(2): * It is not possible to block SIGKILL or SIGSTOP. Attempts to do so are silently ignored. * * This makes it safe to skip WNOHANG here */ p = waitpid(pid, &status, 0); } if (p <= 0) { crm_perror(LOG_ERR, "waitpid(%d)", pid); } else if (p != pid) { crm_err("Waited for %d, got %d", pid, p); } action->output = read_output(p_read_fd); action->rc = -ECONNABORTED; rc = action->rc; if (timeout == 0) { action->rc = -ETIME; } else if (WIFEXITED(status)) { crm_debug("result = %d", WEXITSTATUS(status)); action->rc = -WEXITSTATUS(status); rc = 0; } else if (WIFSIGNALED(status)) { crm_err("call %s for %s exited due to signal %d", action->action, action->agent, WTERMSIG(status)); } else { crm_err("call %s for %s exited abnormally. stopped=%d, continued=%d", action->action, action->agent, WIFSTOPPED(status), WIFCONTINUED(status)); } } fail: if (p_read_fd >= 0) { close(p_read_fd); } if (p_write_fd >= 0) { close(p_write_fd); } if (c_read_fd >= 0) { close(c_read_fd); } if (c_write_fd >= 0) { close(c_write_fd); } return rc; } GPid stonith_action_execute_async(stonith_action_t * action, void *userdata, void (*done) (GPid pid, int rc, const char *output, gpointer user_data)) { int rc = 0; if (!action) { return -1; } action->userdata = userdata; action->done_cb = done; action->async = 1; rc = internal_stonith_action_execute(action); return rc < 0 ? rc : action->pid; } int stonith_action_execute(stonith_action_t * action, int *agent_result, char **output) { int rc = 0; if (!action) { return -1; } do { rc = internal_stonith_action_execute(action); if (rc == pcmk_ok) { /* success! */ break; } /* keep retrying while we have time left */ } while (update_remaining_timeout(action)); if (rc) { /* error */ return rc; } if (agent_result) { *agent_result = action->rc; } if (output) { *output = action->output; action->output = NULL; /* handed it off, do not free */ } stonith_action_destroy(action); return rc; } static int stonith_api_device_list(stonith_t * stonith, int call_options, const char *namespace, stonith_key_value_t ** devices, int timeout) { int count = 0; if (devices == NULL) { crm_err("Parameter error: stonith_api_device_list"); return -EFAULT; } /* Include Heartbeat agents */ if (namespace == NULL || safe_str_eq("heartbeat", namespace)) { #if HAVE_STONITH_STONITH_H static gboolean need_init = TRUE; char **entry = NULL; char **type_list = NULL; static char **(*type_list_fn) (void) = NULL; static void (*type_free_fn) (char **) = NULL; if (need_init) { need_init = FALSE; type_list_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_types", FALSE); type_free_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_free_hostlist", FALSE); } if (type_list_fn) { type_list = (*type_list_fn) (); } for (entry = type_list; entry != NULL && *entry; ++entry) { crm_trace("Added: %s", *entry); *devices = stonith_key_value_add(*devices, NULL, *entry); count++; } if (type_list && type_free_fn) { (*type_free_fn) (type_list); } #else if (namespace != NULL) { return -EINVAL; /* Heartbeat agents not supported */ } #endif } /* Include Red Hat agents, basically: ls -1 @sbin_dir@/fence_* */ if (namespace == NULL || safe_str_eq("redhat", namespace)) { struct dirent **namelist; int file_num = scandir(RH_STONITH_DIR, &namelist, 0, alphasort); if (file_num > 0) { struct stat prop; char buffer[FILENAME_MAX + 1]; while (file_num--) { if ('.' == namelist[file_num]->d_name[0]) { free(namelist[file_num]); continue; } else if (0 != strncmp(RH_STONITH_PREFIX, namelist[file_num]->d_name, strlen(RH_STONITH_PREFIX))) { free(namelist[file_num]); continue; } snprintf(buffer, FILENAME_MAX, "%s/%s", RH_STONITH_DIR, namelist[file_num]->d_name); if (stat(buffer, &prop) == 0 && S_ISREG(prop.st_mode)) { *devices = stonith_key_value_add(*devices, NULL, namelist[file_num]->d_name); count++; } free(namelist[file_num]); } free(namelist); } } return count; } #if HAVE_STONITH_STONITH_H static inline char * strdup_null(const char *val) { if (val) { return strdup(val); } return NULL; } static void stonith_plugin(int priority, const char *fmt, ...) __attribute__((__format__ (__printf__, 2, 3))); static void stonith_plugin(int priority, const char *format, ...) { int err = errno; va_list ap; int len = 0; char *string = NULL; va_start(ap, format); len = vasprintf (&string, format, ap); CRM_ASSERT(len > 0); do_crm_log_alias(priority, __FILE__, __func__, __LINE__, "%s", string); free(string); errno = err; } #endif static int stonith_api_device_metadata(stonith_t * stonith, int call_options, const char *agent, const char *namespace, char **output, int timeout) { int rc = 0; char *buffer = NULL; const char *provider = get_stonith_provider(agent, namespace); crm_trace("looking up %s/%s metadata", agent, provider); /* By having this in a library, we can access it from stonith_admin * when neither lrmd or stonith-ng are running * Important for the crm shell's validations... */ if (safe_str_eq(provider, "redhat")) { stonith_action_t *action = stonith_action_create(agent, "metadata", NULL, 0, 5, NULL, NULL); int exec_rc = stonith_action_execute(action, &rc, &buffer); + xmlNode *xml = NULL; + xmlNode *actions = NULL; + xmlXPathObject *xpathObj = NULL; if (exec_rc < 0 || rc != 0 || buffer == NULL) { + crm_warn("Could not obtain metadata for %s", agent); crm_debug("Query failed: %d %d: %s", exec_rc, rc, crm_str(buffer)); free(buffer); /* Just in case */ return -EINVAL; + } - } else { - - xmlNode *xml = string2xml(buffer); - xmlNode *actions = NULL; - xmlXPathObject *xpathObj = NULL; + xml = string2xml(buffer); + if(xml == NULL) { + crm_warn("Metadata for %s is invalid", agent); + free(buffer); + return -EINVAL; + } - xpathObj = xpath_search(xml, "//actions"); - if (numXpathResults(xpathObj) > 0) { - actions = getXpathResult(xpathObj, 0); - } + xpathObj = xpath_search(xml, "//actions"); + if (numXpathResults(xpathObj) > 0) { + actions = getXpathResult(xpathObj, 0); + } - freeXpathObject(xpathObj); + freeXpathObject(xpathObj); - /* Now fudge the metadata so that the start/stop actions appear */ - xpathObj = xpath_search(xml, "//action[@name='stop']"); - if (numXpathResults(xpathObj) <= 0) { - xmlNode *tmp = NULL; + /* Now fudge the metadata so that the start/stop actions appear */ + xpathObj = xpath_search(xml, "//action[@name='stop']"); + if (numXpathResults(xpathObj) <= 0) { + xmlNode *tmp = NULL; - tmp = create_xml_node(actions, "action"); - crm_xml_add(tmp, "name", "stop"); - crm_xml_add(tmp, "timeout", "20s"); + tmp = create_xml_node(actions, "action"); + crm_xml_add(tmp, "name", "stop"); + crm_xml_add(tmp, "timeout", "20s"); - tmp = create_xml_node(actions, "action"); - crm_xml_add(tmp, "name", "start"); - crm_xml_add(tmp, "timeout", "20s"); - } + tmp = create_xml_node(actions, "action"); + crm_xml_add(tmp, "name", "start"); + crm_xml_add(tmp, "timeout", "20s"); + } - freeXpathObject(xpathObj); + freeXpathObject(xpathObj); - /* Now fudge the metadata so that the port isn't required in the configuration */ - xpathObj = xpath_search(xml, "//parameter[@name='port']"); - if (numXpathResults(xpathObj) > 0) { - /* We'll fill this in */ - xmlNode *tmp = getXpathResult(xpathObj, 0); + /* Now fudge the metadata so that the port isn't required in the configuration */ + xpathObj = xpath_search(xml, "//parameter[@name='port']"); + if (numXpathResults(xpathObj) > 0) { + /* We'll fill this in */ + xmlNode *tmp = getXpathResult(xpathObj, 0); - crm_xml_add(tmp, "required", "0"); - } + crm_xml_add(tmp, "required", "0"); + } - freeXpathObject(xpathObj); - free(buffer); - buffer = dump_xml_formatted(xml); - free_xml(xml); - if (!buffer) { - return -EINVAL; - } + freeXpathObject(xpathObj); + free(buffer); + buffer = dump_xml_formatted(xml); + free_xml(xml); + if (!buffer) { + return -EINVAL; } } else { #if !HAVE_STONITH_STONITH_H return -EINVAL; /* Heartbeat agents not supported */ #else int bufferlen = 0; static const char *no_parameter_info = ""; Stonith *stonith_obj = NULL; static gboolean need_init = TRUE; static Stonith *(*st_new_fn) (const char *) = NULL; static const char *(*st_info_fn) (Stonith *, int) = NULL; static void (*st_del_fn) (Stonith *) = NULL; static void (*st_log_fn) (Stonith *, PILLogFun) = NULL; if (need_init) { need_init = FALSE; st_new_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_new", FALSE); st_del_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_delete", FALSE); st_log_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_set_log", FALSE); st_info_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_get_info", FALSE); } if (lha_agents_lib && st_new_fn && st_del_fn && st_info_fn && st_log_fn) { char *xml_meta_longdesc = NULL; char *xml_meta_shortdesc = NULL; char *meta_param = NULL; char *meta_longdesc = NULL; char *meta_shortdesc = NULL; stonith_obj = (*st_new_fn) (agent); if (stonith_obj) { (*st_log_fn) (stonith_obj, (PILLogFun) & stonith_plugin); meta_longdesc = strdup_null((*st_info_fn) (stonith_obj, ST_DEVICEDESCR)); if (meta_longdesc == NULL) { crm_warn("no long description in %s's metadata.", agent); meta_longdesc = strdup(no_parameter_info); } meta_shortdesc = strdup_null((*st_info_fn) (stonith_obj, ST_DEVICEID)); if (meta_shortdesc == NULL) { crm_warn("no short description in %s's metadata.", agent); meta_shortdesc = strdup(no_parameter_info); } meta_param = strdup_null((*st_info_fn) (stonith_obj, ST_CONF_XML)); if (meta_param == NULL) { crm_warn("no list of parameters in %s's metadata.", agent); meta_param = strdup(no_parameter_info); } (*st_del_fn) (stonith_obj); } else { return -EINVAL; /* Heartbeat agents not supported */ } xml_meta_longdesc = (char *)xmlEncodeEntitiesReentrant(NULL, (const unsigned char *)meta_longdesc); xml_meta_shortdesc = (char *)xmlEncodeEntitiesReentrant(NULL, (const unsigned char *)meta_shortdesc); bufferlen = strlen(META_TEMPLATE) + strlen(agent) + strlen(xml_meta_longdesc) + strlen(xml_meta_shortdesc) + strlen(meta_param) + 1; buffer = calloc(1, bufferlen); snprintf(buffer, bufferlen - 1, META_TEMPLATE, agent, xml_meta_longdesc, xml_meta_shortdesc, meta_param); xmlFree(xml_meta_longdesc); xmlFree(xml_meta_shortdesc); free(meta_shortdesc); free(meta_longdesc); free(meta_param); } #endif } if (output) { *output = buffer; } else { free(buffer); } return rc; } static int stonith_api_query(stonith_t * stonith, int call_options, const char *target, stonith_key_value_t ** devices, int timeout) { int rc = 0, lpc = 0, max = 0; xmlNode *data = NULL; xmlNode *output = NULL; xmlXPathObjectPtr xpathObj = NULL; CRM_CHECK(devices != NULL, return -EINVAL); data = create_xml_node(NULL, F_STONITH_DEVICE); crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__); crm_xml_add(data, F_STONITH_TARGET, target); crm_xml_add(data, F_STONITH_ACTION, "off"); rc = stonith_send_command(stonith, STONITH_OP_QUERY, data, &output, call_options, timeout); if (rc < 0) { return rc; } xpathObj = xpath_search(output, "//@agent"); if (xpathObj) { max = numXpathResults(xpathObj); for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); CRM_LOG_ASSERT(match != NULL); if(match != NULL) { crm_info("%s[%d] = %s", "//@agent", lpc, xmlGetNodePath(match)); *devices = stonith_key_value_add(*devices, NULL, crm_element_value(match, XML_ATTR_ID)); } } freeXpathObject(xpathObj); } free_xml(output); free_xml(data); return max; } static int stonith_api_call(stonith_t * stonith, int call_options, const char *id, const char *action, const char *victim, int timeout, xmlNode ** output) { int rc = 0; xmlNode *data = NULL; data = create_xml_node(NULL, F_STONITH_DEVICE); crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__); crm_xml_add(data, F_STONITH_DEVICE, id); crm_xml_add(data, F_STONITH_ACTION, action); crm_xml_add(data, F_STONITH_TARGET, victim); rc = stonith_send_command(stonith, STONITH_OP_EXEC, data, output, call_options, timeout); free_xml(data); return rc; } static int stonith_api_list(stonith_t * stonith, int call_options, const char *id, char **list_info, int timeout) { int rc; xmlNode *output = NULL; rc = stonith_api_call(stonith, call_options, id, "list", NULL, timeout, &output); if (output && list_info) { const char *list_str; list_str = crm_element_value(output, "st_output"); if (list_str) { *list_info = strdup(list_str); } } if (output) { free_xml(output); } return rc; } static int stonith_api_monitor(stonith_t * stonith, int call_options, const char *id, int timeout) { return stonith_api_call(stonith, call_options, id, "monitor", NULL, timeout, NULL); } static int stonith_api_status(stonith_t * stonith, int call_options, const char *id, const char *port, int timeout) { return stonith_api_call(stonith, call_options, id, "status", port, timeout, NULL); } static int stonith_api_fence(stonith_t * stonith, int call_options, const char *node, const char *action, int timeout, int tolerance) { int rc = 0; xmlNode *data = NULL; data = create_xml_node(NULL, __FUNCTION__); crm_xml_add(data, F_STONITH_TARGET, node); crm_xml_add(data, F_STONITH_ACTION, action); crm_xml_add_int(data, F_STONITH_TIMEOUT, timeout); crm_xml_add_int(data, F_STONITH_TOLERANCE, tolerance); rc = stonith_send_command(stonith, STONITH_OP_FENCE, data, NULL, call_options, timeout); free_xml(data); return rc; } static int stonith_api_confirm(stonith_t * stonith, int call_options, const char *target) { return stonith_api_fence(stonith, call_options | st_opt_manual_ack, target, "off", 0, 0); } static int stonith_api_history(stonith_t * stonith, int call_options, const char *node, stonith_history_t ** history, int timeout) { int rc = 0; xmlNode *data = NULL; xmlNode *output = NULL; stonith_history_t *last = NULL; *history = NULL; if (node) { data = create_xml_node(NULL, __FUNCTION__); crm_xml_add(data, F_STONITH_TARGET, node); } rc = stonith_send_command(stonith, STONITH_OP_FENCE_HISTORY, data, &output, call_options | st_opt_sync_call, timeout); free_xml(data); if (rc == 0) { xmlNode *op = NULL; xmlNode *reply = get_xpath_object("//" F_STONITH_HISTORY_LIST, output, LOG_ERR); for (op = __xml_first_child(reply); op != NULL; op = __xml_next(op)) { stonith_history_t *kvp; kvp = calloc(1, sizeof(stonith_history_t)); kvp->target = crm_element_value_copy(op, F_STONITH_TARGET); kvp->action = crm_element_value_copy(op, F_STONITH_ACTION); kvp->origin = crm_element_value_copy(op, F_STONITH_ORIGIN); kvp->delegate = crm_element_value_copy(op, F_STONITH_DELEGATE); kvp->client = crm_element_value_copy(op, F_STONITH_CLIENTNAME); crm_element_value_int(op, F_STONITH_DATE, &kvp->completed); crm_element_value_int(op, F_STONITH_STATE, &kvp->state); if (last) { last->next = kvp; } else { *history = kvp; } last = kvp; } } return rc; } gboolean is_redhat_agent(const char *agent) { int rc = 0; struct stat prop; char buffer[FILENAME_MAX + 1]; snprintf(buffer, FILENAME_MAX, "%s/%s", RH_STONITH_DIR, agent); rc = stat(buffer, &prop); if (rc >= 0 && S_ISREG(prop.st_mode)) { return TRUE; } return FALSE; } const char * get_stonith_provider(const char *agent, const char *provider) { /* This function sucks */ if (is_redhat_agent(agent)) { return "redhat"; #if HAVE_STONITH_STONITH_H } else { Stonith *stonith_obj = NULL; static gboolean need_init = TRUE; static Stonith *(*st_new_fn) (const char *) = NULL; static void (*st_del_fn) (Stonith *) = NULL; if (need_init) { need_init = FALSE; st_new_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_new", FALSE); st_del_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_delete", FALSE); } if (lha_agents_lib && st_new_fn && st_del_fn) { stonith_obj = (*st_new_fn) (agent); if (stonith_obj) { (*st_del_fn) (stonith_obj); return "heartbeat"; } } #endif } crm_err("No such device: %s", agent); return NULL; } static gint stonithlib_GCompareFunc(gconstpointer a, gconstpointer b) { int rc = 0; const stonith_notify_client_t *a_client = a; const stonith_notify_client_t *b_client = b; CRM_CHECK(a_client->event != NULL && b_client->event != NULL, return 0); rc = strcmp(a_client->event, b_client->event); if (rc == 0) { if (a_client->notify == NULL || b_client->notify == NULL) { return 0; } else if (a_client->notify == b_client->notify) { return 0; } else if (((long)a_client->notify) < ((long)b_client->notify)) { crm_err("callbacks for %s are not equal: %p vs. %p", a_client->event, a_client->notify, b_client->notify); return -1; } crm_err("callbacks for %s are not equal: %p vs. %p", a_client->event, a_client->notify, b_client->notify); return 1; } return rc; } xmlNode * stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data, int call_options) { xmlNode *op_msg = create_xml_node(NULL, "stonith_command"); CRM_CHECK(op_msg != NULL, return NULL); CRM_CHECK(token != NULL, return NULL); crm_xml_add(op_msg, F_XML_TAGNAME, "stonith_command"); crm_xml_add(op_msg, F_TYPE, T_STONITH_NG); crm_xml_add(op_msg, F_STONITH_CALLBACK_TOKEN, token); crm_xml_add(op_msg, F_STONITH_OPERATION, op); crm_xml_add_int(op_msg, F_STONITH_CALLID, call_id); crm_trace("Sending call options: %.8lx, %d", (long)call_options, call_options); crm_xml_add_int(op_msg, F_STONITH_CALLOPTS, call_options); if (data != NULL) { add_message_xml(op_msg, F_STONITH_CALLDATA, data); } return op_msg; } static void stonith_destroy_op_callback(gpointer data) { stonith_callback_client_t *blob = data; if (blob->timer && blob->timer->ref > 0) { g_source_remove(blob->timer->ref); } free(blob->timer); free(blob); } static int stonith_api_signoff(stonith_t * stonith) { stonith_private_t *native = stonith->private; crm_debug("Signing out of the STONITH Service"); if (native->source != NULL) { /* Attached to mainloop */ mainloop_del_ipc_client(native->source); native->source = NULL; native->ipc = NULL; } else if (native->ipc) { /* Not attached to mainloop */ crm_ipc_t *ipc = native->ipc; native->ipc = NULL; crm_ipc_close(ipc); crm_ipc_destroy(ipc); } free(native->token); native->token = NULL; stonith->state = stonith_disconnected; return pcmk_ok; } static int stonith_api_signon(stonith_t * stonith, const char *name, int *stonith_fd) { int rc = pcmk_ok; stonith_private_t *native = stonith->private; static struct ipc_client_callbacks st_callbacks = { .dispatch = stonith_dispatch_internal, .destroy = stonith_connection_destroy }; crm_trace("Connecting command channel"); stonith->state = stonith_connected_command; if (stonith_fd) { /* No mainloop */ native->ipc = crm_ipc_new("stonith-ng", 0); if (native->ipc && crm_ipc_connect(native->ipc)) { *stonith_fd = crm_ipc_get_fd(native->ipc); } else if (native->ipc) { crm_perror(LOG_ERR, "Connection to STONITH manager failed"); rc = -ENOTCONN; } } else { /* With mainloop */ native->source = mainloop_add_ipc_client("stonith-ng", G_PRIORITY_MEDIUM, 0, stonith, &st_callbacks); native->ipc = mainloop_get_ipc_client(native->source); } if (native->ipc == NULL) { crm_debug("Could not connect to the Stonith API"); rc = -ENOTCONN; } if (rc == pcmk_ok) { xmlNode *reply = NULL; xmlNode *hello = create_xml_node(NULL, "stonith_command"); crm_xml_add(hello, F_TYPE, T_STONITH_NG); crm_xml_add(hello, F_STONITH_OPERATION, CRM_OP_REGISTER); crm_xml_add(hello, F_STONITH_CLIENTNAME, name); rc = crm_ipc_send(native->ipc, hello, crm_ipc_client_response, -1, &reply); if (rc < 0) { crm_perror(LOG_DEBUG, "Couldn't complete registration with the fencing API: %d", rc); rc = -ECOMM; } else if (reply == NULL) { crm_err("Did not receive registration reply"); rc = -EPROTO; } else { const char *msg_type = crm_element_value(reply, F_STONITH_OPERATION); const char *tmp_ticket = crm_element_value(reply, F_STONITH_CLIENTID); if (safe_str_neq(msg_type, CRM_OP_REGISTER)) { crm_err("Invalid registration message: %s", msg_type); crm_log_xml_err(reply, "Bad reply"); rc = -EPROTO; } else if (tmp_ticket == NULL) { crm_err("No registration token provided"); crm_log_xml_err(reply, "Bad reply"); rc = -EPROTO; } else { crm_trace("Obtained registration token: %s", tmp_ticket); native->token = strdup(tmp_ticket); rc = pcmk_ok; } } free_xml(reply); free_xml(hello); } if (rc == pcmk_ok) { #if HAVE_MSGFROMIPC_TIMEOUT stonith->call_timeout = MAX_IPC_DELAY; #endif crm_debug("Connection to STONITH successful"); return pcmk_ok; } crm_debug("Connection to STONITH failed: %s", pcmk_strerror(rc)); stonith->cmds->disconnect(stonith); return rc; } static int stonith_set_notification(stonith_t * stonith, const char *callback, int enabled) { int rc = pcmk_ok; xmlNode *notify_msg = create_xml_node(NULL, __FUNCTION__); stonith_private_t *native = stonith->private; if (stonith->state != stonith_disconnected) { crm_xml_add(notify_msg, F_STONITH_OPERATION, T_STONITH_NOTIFY); if (enabled) { crm_xml_add(notify_msg, F_STONITH_NOTIFY_ACTIVATE, callback); } else { crm_xml_add(notify_msg, F_STONITH_NOTIFY_DEACTIVATE, callback); } rc = crm_ipc_send(native->ipc, notify_msg, crm_ipc_client_response, -1, NULL); if (rc < 0) { crm_perror(LOG_DEBUG, "Couldn't register for fencing notifications: %d", rc); rc = -ECOMM; } else { rc = pcmk_ok; } } free_xml(notify_msg); return rc; } static int stonith_api_add_notification(stonith_t * stonith, const char *event, void (*callback) (stonith_t * stonith, stonith_event_t * e)) { GList *list_item = NULL; stonith_notify_client_t *new_client = NULL; stonith_private_t *private = NULL; private = stonith->private; crm_trace("Adding callback for %s events (%d)", event, g_list_length(private->notify_list)); new_client = calloc(1, sizeof(stonith_notify_client_t)); new_client->event = event; new_client->notify = callback; list_item = g_list_find_custom(private->notify_list, new_client, stonithlib_GCompareFunc); if (list_item != NULL) { crm_warn("Callback already present"); free(new_client); return -ENOTUNIQ; } else { private->notify_list = g_list_append(private->notify_list, new_client); stonith_set_notification(stonith, event, 1); crm_trace("Callback added (%d)", g_list_length(private->notify_list)); } return pcmk_ok; } static int stonith_api_del_notification(stonith_t * stonith, const char *event) { GList *list_item = NULL; stonith_notify_client_t *new_client = NULL; stonith_private_t *private = NULL; crm_debug("Removing callback for %s events", event); private = stonith->private; new_client = calloc(1, sizeof(stonith_notify_client_t)); new_client->event = event; new_client->notify = NULL; list_item = g_list_find_custom(private->notify_list, new_client, stonithlib_GCompareFunc); stonith_set_notification(stonith, event, 0); if (list_item != NULL) { stonith_notify_client_t *list_client = list_item->data; private->notify_list = g_list_remove(private->notify_list, list_client); free(list_client); crm_trace("Removed callback"); } else { crm_trace("Callback not present"); } free(new_client); return pcmk_ok; } static gboolean stonith_async_timeout_handler(gpointer data) { struct timer_rec_s *timer = data; crm_err("Async call %d timed out after %dms", timer->call_id, timer->timeout); stonith_perform_callback(timer->stonith, NULL, timer->call_id, -ETIME); /* Always return TRUE, never remove the handler * We do that in stonith_del_callback() */ return TRUE; } static void set_callback_timeout(stonith_callback_client_t * callback, stonith_t * stonith, int call_id, int timeout) { struct timer_rec_s *async_timer = callback->timer; if (timeout <= 0) { return; } if (!async_timer) { async_timer = calloc(1, sizeof(struct timer_rec_s)); callback->timer = async_timer; } async_timer->stonith = stonith; async_timer->call_id = call_id; /* Allow a fair bit of grace to allow the server to tell us of a timeout * This is only a fallback */ async_timer->timeout = (timeout + 60) * 1000; if (async_timer->ref) { g_source_remove(async_timer->ref); } async_timer->ref = g_timeout_add(async_timer->timeout, stonith_async_timeout_handler, async_timer); } static void update_callback_timeout(int call_id, int timeout, stonith_t * st) { stonith_callback_client_t *callback = NULL; stonith_private_t *private = st->private; callback = g_hash_table_lookup(private->stonith_op_callback_table, GINT_TO_POINTER(call_id)); if (!callback || !callback->allow_timeout_updates) { return; } set_callback_timeout(callback, st, call_id, timeout); } static void invoke_callback(stonith_t * st, int call_id, int rc, void *userdata, void (*callback) (stonith_t * st, stonith_callback_data_t * data)) { stonith_callback_data_t data = { 0, }; data.call_id = call_id; data.rc = rc; data.userdata = userdata; callback(st, &data); } static int stonith_api_add_callback(stonith_t * stonith, int call_id, int timeout, int options, void *user_data, const char *callback_name, void (*callback) (stonith_t * st, stonith_callback_data_t * data)) { stonith_callback_client_t *blob = NULL; stonith_private_t *private = NULL; CRM_CHECK(stonith != NULL, return -EINVAL); CRM_CHECK(stonith->private != NULL, return -EINVAL); private = stonith->private; if (call_id == 0) { private->op_callback = callback; } else if (call_id < 0) { if (!(options & st_opt_report_only_success)) { crm_trace("Call failed, calling %s: %s", callback_name, pcmk_strerror(call_id)); invoke_callback(stonith, call_id, call_id, user_data, callback); } else { crm_warn("STONITH call failed: %s", pcmk_strerror(call_id)); } return FALSE; } blob = calloc(1, sizeof(stonith_callback_client_t)); blob->id = callback_name; blob->only_success = (options & st_opt_report_only_success) ? TRUE : FALSE; blob->user_data = user_data; blob->callback = callback; blob->allow_timeout_updates = (options & st_opt_timeout_updates) ? TRUE : FALSE; if (timeout > 0) { set_callback_timeout(blob, stonith, call_id, timeout); } g_hash_table_insert(private->stonith_op_callback_table, GINT_TO_POINTER(call_id), blob); crm_trace("Added callback to %s for call %d", callback_name, call_id); return TRUE; } static int stonith_api_del_callback(stonith_t * stonith, int call_id, bool all_callbacks) { stonith_private_t *private = stonith->private; if (all_callbacks) { private->op_callback = NULL; g_hash_table_destroy(private->stonith_op_callback_table); private->stonith_op_callback_table = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, stonith_destroy_op_callback); } else if (call_id == 0) { private->op_callback = NULL; } else { g_hash_table_remove(private->stonith_op_callback_table, GINT_TO_POINTER(call_id)); } return pcmk_ok; } static void stonith_dump_pending_op(gpointer key, gpointer value, gpointer user_data) { int call = GPOINTER_TO_INT(key); stonith_callback_client_t *blob = value; crm_debug("Call %d (%s): pending", call, crm_str(blob->id)); } void stonith_dump_pending_callbacks(stonith_t * stonith) { stonith_private_t *private = stonith->private; if (private->stonith_op_callback_table == NULL) { return; } return g_hash_table_foreach(private->stonith_op_callback_table, stonith_dump_pending_op, NULL); } void stonith_perform_callback(stonith_t * stonith, xmlNode * msg, int call_id, int rc) { stonith_private_t *private = NULL; stonith_callback_client_t *blob = NULL; stonith_callback_client_t local_blob; CRM_CHECK(stonith != NULL, return); CRM_CHECK(stonith->private != NULL, return); private = stonith->private; local_blob.id = NULL; local_blob.callback = NULL; local_blob.user_data = NULL; local_blob.only_success = FALSE; if (msg != NULL) { crm_element_value_int(msg, F_STONITH_RC, &rc); crm_element_value_int(msg, F_STONITH_CALLID, &call_id); } CRM_CHECK(call_id > 0, crm_log_xml_err(msg, "Bad result")); blob = g_hash_table_lookup(private->stonith_op_callback_table, GINT_TO_POINTER(call_id)); if (blob != NULL) { local_blob = *blob; blob = NULL; stonith_api_del_callback(stonith, call_id, FALSE); } else { crm_trace("No callback found for call %d", call_id); local_blob.callback = NULL; } if (local_blob.callback != NULL && (rc == pcmk_ok || local_blob.only_success == FALSE)) { crm_trace("Invoking callback %s for call %d", crm_str(local_blob.id), call_id); invoke_callback(stonith, call_id, rc, local_blob.user_data, local_blob.callback); } else if (private->op_callback == NULL && rc != pcmk_ok) { crm_warn("STONITH command failed: %s", pcmk_strerror(rc)); crm_log_xml_debug(msg, "Failed STONITH Update"); } if (private->op_callback != NULL) { crm_trace("Invoking global callback for call %d", call_id); invoke_callback(stonith, call_id, rc, NULL, private->op_callback); } crm_trace("OP callback activated."); } /* */ static stonith_event_t * xml_to_event(xmlNode * msg) { stonith_event_t *event = calloc(1, sizeof(stonith_event_t)); const char *ntype = crm_element_value(msg, F_SUBTYPE); char *data_addr = crm_strdup_printf("//%s", ntype); xmlNode *data = get_xpath_object(data_addr, msg, LOG_DEBUG); crm_log_xml_trace(msg, "stonith_notify"); crm_element_value_int(msg, F_STONITH_RC, &(event->result)); if (safe_str_eq(ntype, T_STONITH_NOTIFY_FENCE)) { event->operation = crm_element_value_copy(msg, F_STONITH_OPERATION); if (data) { event->origin = crm_element_value_copy(data, F_STONITH_ORIGIN); event->action = crm_element_value_copy(data, F_STONITH_ACTION); event->target = crm_element_value_copy(data, F_STONITH_TARGET); event->executioner = crm_element_value_copy(data, F_STONITH_DELEGATE); event->id = crm_element_value_copy(data, F_STONITH_REMOTE_OP_ID); event->client_origin = crm_element_value_copy(data, F_STONITH_CLIENTNAME); event->device = crm_element_value_copy(data, F_STONITH_DEVICE); } else { crm_err("No data for %s event", ntype); crm_log_xml_notice(msg, "BadEvent"); } } free(data_addr); return event; } static void event_free(stonith_event_t * event) { free(event->id); free(event->type); free(event->message); free(event->operation); free(event->origin); free(event->action); free(event->target); free(event->executioner); free(event->device); free(event->client_origin); free(event); } static void stonith_send_notification(gpointer data, gpointer user_data) { struct notify_blob_s *blob = user_data; stonith_notify_client_t *entry = data; stonith_event_t *st_event = NULL; const char *event = NULL; if (blob->xml == NULL) { crm_warn("Skipping callback - NULL message"); return; } event = crm_element_value(blob->xml, F_SUBTYPE); if (entry == NULL) { crm_warn("Skipping callback - NULL callback client"); return; } else if (entry->notify == NULL) { crm_warn("Skipping callback - NULL callback"); return; } else if (safe_str_neq(entry->event, event)) { crm_trace("Skipping callback - event mismatch %p/%s vs. %s", entry, entry->event, event); return; } st_event = xml_to_event(blob->xml); crm_trace("Invoking callback for %p/%s event...", entry, event); entry->notify(blob->stonith, st_event); crm_trace("Callback invoked..."); event_free(st_event); } int stonith_send_command(stonith_t * stonith, const char *op, xmlNode * data, xmlNode ** output_data, int call_options, int timeout) { int rc = 0; int reply_id = -1; enum crm_ipc_flags ipc_flags = crm_ipc_flags_none; xmlNode *op_msg = NULL; xmlNode *op_reply = NULL; stonith_private_t *native = stonith->private; if (stonith->state == stonith_disconnected) { return -ENOTCONN; } if (output_data != NULL) { *output_data = NULL; } if (op == NULL) { crm_err("No operation specified"); return -EINVAL; } if (call_options & st_opt_sync_call) { ipc_flags |= crm_ipc_client_response; } stonith->call_id++; /* prevent call_id from being negative (or zero) and conflicting * with the stonith_errors enum * use 2 because we use it as (stonith->call_id - 1) below */ if (stonith->call_id < 1) { stonith->call_id = 1; } CRM_CHECK(native->token != NULL,; ); op_msg = stonith_create_op(stonith->call_id, native->token, op, data, call_options); if (op_msg == NULL) { return -EINVAL; } crm_xml_add_int(op_msg, F_STONITH_TIMEOUT, timeout); crm_trace("Sending %s message to STONITH service, Timeout: %ds", op, timeout); rc = crm_ipc_send(native->ipc, op_msg, ipc_flags, 1000 * (timeout + 60), &op_reply); free_xml(op_msg); if (rc < 0) { crm_perror(LOG_ERR, "Couldn't perform %s operation (timeout=%ds): %d", op, timeout, rc); rc = -ECOMM; goto done; } crm_log_xml_trace(op_reply, "Reply"); if (!(call_options & st_opt_sync_call)) { crm_trace("Async call %d, returning", stonith->call_id); CRM_CHECK(stonith->call_id != 0, return -EPROTO); free_xml(op_reply); return stonith->call_id; } rc = pcmk_ok; crm_element_value_int(op_reply, F_STONITH_CALLID, &reply_id); if (reply_id == stonith->call_id) { crm_trace("Syncronous reply %d received", reply_id); if (crm_element_value_int(op_reply, F_STONITH_RC, &rc) != 0) { rc = -ENOMSG; } if ((call_options & st_opt_discard_reply) || output_data == NULL) { crm_trace("Discarding reply"); } else { *output_data = op_reply; op_reply = NULL; /* Prevent subsequent free */ } } else if (reply_id <= 0) { crm_err("Received bad reply: No id set"); crm_log_xml_err(op_reply, "Bad reply"); free_xml(op_reply); rc = -ENOMSG; } else { crm_err("Received bad reply: %d (wanted %d)", reply_id, stonith->call_id); crm_log_xml_err(op_reply, "Old reply"); free_xml(op_reply); rc = -ENOMSG; } done: if (crm_ipc_connected(native->ipc) == FALSE) { crm_err("STONITH disconnected"); stonith->state = stonith_disconnected; } free_xml(op_reply); return rc; } /* Not used with mainloop */ bool stonith_dispatch(stonith_t * st) { gboolean stay_connected = TRUE; stonith_private_t *private = NULL; CRM_ASSERT(st != NULL); private = st->private; while (crm_ipc_ready(private->ipc)) { if (crm_ipc_read(private->ipc) > 0) { const char *msg = crm_ipc_buffer(private->ipc); stonith_dispatch_internal(msg, strlen(msg), st); } if (crm_ipc_connected(private->ipc) == FALSE) { crm_err("Connection closed"); stay_connected = FALSE; } } return stay_connected; } int stonith_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata) { const char *type = NULL; struct notify_blob_s blob; stonith_t *st = userdata; stonith_private_t *private = NULL; CRM_ASSERT(st != NULL); private = st->private; blob.stonith = st; blob.xml = string2xml(buffer); if (blob.xml == NULL) { crm_warn("Received a NULL msg from STONITH service: %s.", buffer); return 0; } /* do callbacks */ type = crm_element_value(blob.xml, F_TYPE); crm_trace("Activating %s callbacks...", type); if (safe_str_eq(type, T_STONITH_NG)) { stonith_perform_callback(st, blob.xml, 0, 0); } else if (safe_str_eq(type, T_STONITH_NOTIFY)) { g_list_foreach(private->notify_list, stonith_send_notification, &blob); } else if (safe_str_eq(type, T_STONITH_TIMEOUT_VALUE)) { int call_id = 0; int timeout = 0; crm_element_value_int(blob.xml, F_STONITH_TIMEOUT, &timeout); crm_element_value_int(blob.xml, F_STONITH_CALLID, &call_id); update_callback_timeout(call_id, timeout, st); } else { crm_err("Unknown message type: %s", type); crm_log_xml_warn(blob.xml, "BadReply"); } free_xml(blob.xml); return 1; } static int stonith_api_free(stonith_t * stonith) { int rc = pcmk_ok; crm_trace("Destroying %p", stonith); if (stonith->state != stonith_disconnected) { crm_trace("Disconnecting %p first", stonith); rc = stonith->cmds->disconnect(stonith); } if (stonith->state == stonith_disconnected) { stonith_private_t *private = stonith->private; crm_trace("Removing %d callbacks", g_hash_table_size(private->stonith_op_callback_table)); g_hash_table_destroy(private->stonith_op_callback_table); crm_trace("Destroying %d notification clients", g_list_length(private->notify_list)); g_list_free_full(private->notify_list, free); free(stonith->private); free(stonith->cmds); free(stonith); } else { crm_err("Not free'ing active connection: %s (%d)", pcmk_strerror(rc), rc); } return rc; } void stonith_api_delete(stonith_t * stonith) { crm_trace("Destroying %p", stonith); if(stonith) { stonith->cmds->free(stonith); } } stonith_t * stonith_api_new(void) { stonith_t *new_stonith = NULL; stonith_private_t *private = NULL; new_stonith = calloc(1, sizeof(stonith_t)); private = calloc(1, sizeof(stonith_private_t)); new_stonith->private = private; private->stonith_op_callback_table = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, stonith_destroy_op_callback); private->notify_list = NULL; new_stonith->call_id = 1; new_stonith->state = stonith_disconnected; new_stonith->cmds = calloc(1, sizeof(stonith_api_operations_t)); /* *INDENT-OFF* */ new_stonith->cmds->free = stonith_api_free; new_stonith->cmds->connect = stonith_api_signon; new_stonith->cmds->disconnect = stonith_api_signoff; new_stonith->cmds->list = stonith_api_list; new_stonith->cmds->monitor = stonith_api_monitor; new_stonith->cmds->status = stonith_api_status; new_stonith->cmds->fence = stonith_api_fence; new_stonith->cmds->confirm = stonith_api_confirm; new_stonith->cmds->history = stonith_api_history; new_stonith->cmds->list_agents = stonith_api_device_list; new_stonith->cmds->metadata = stonith_api_device_metadata; new_stonith->cmds->query = stonith_api_query; new_stonith->cmds->remove_device = stonith_api_remove_device; new_stonith->cmds->register_device = stonith_api_register_device; new_stonith->cmds->remove_level = stonith_api_remove_level; new_stonith->cmds->register_level = stonith_api_register_level; new_stonith->cmds->remove_callback = stonith_api_del_callback; new_stonith->cmds->register_callback = stonith_api_add_callback; new_stonith->cmds->remove_notification = stonith_api_del_notification; new_stonith->cmds->register_notification = stonith_api_add_notification; /* *INDENT-ON* */ return new_stonith; } stonith_key_value_t * stonith_key_value_add(stonith_key_value_t * head, const char *key, const char *value) { stonith_key_value_t *p, *end; p = calloc(1, sizeof(stonith_key_value_t)); if (key) { p->key = strdup(key); } if (value) { p->value = strdup(value); } end = head; while (end && end->next) { end = end->next; } if (end) { end->next = p; } else { head = p; } return head; } void stonith_key_value_freeall(stonith_key_value_t * head, int keys, int values) { stonith_key_value_t *p; while (head) { p = head->next; if (keys) { free(head->key); } if (values) { free(head->value); } free(head); head = p; } } #define api_log_open() openlog("stonith-api", LOG_CONS | LOG_NDELAY | LOG_PID, LOG_DAEMON) #define api_log(level, fmt, args...) syslog(level, "%s: "fmt, __FUNCTION__, args) int stonith_api_kick(uint32_t nodeid, const char *uname, int timeout, bool off) { char *name = NULL; const char *action = "reboot"; int rc = -EPROTO; stonith_t *st = NULL; enum stonith_call_options opts = st_opt_sync_call | st_opt_allow_suicide; api_log_open(); st = stonith_api_new(); if (st) { rc = st->cmds->connect(st, "stonith-api", NULL); if(rc != pcmk_ok) { api_log(LOG_ERR, "Connection failed, could not kick (%s) node %u/%s : %s (%d)", action, nodeid, uname, pcmk_strerror(rc), rc); } } if (uname != NULL) { name = strdup(uname); } else if (nodeid > 0) { opts |= st_opt_cs_nodeid; name = crm_itoa(nodeid); } if (off) { action = "off"; } if (rc == pcmk_ok) { rc = st->cmds->fence(st, opts, name, action, timeout, 0); if(rc != pcmk_ok) { api_log(LOG_ERR, "Could not kick (%s) node %u/%s : %s (%d)", action, nodeid, uname, pcmk_strerror(rc), rc); } else { api_log(LOG_NOTICE, "Node %u/%s kicked: %s ", nodeid, uname, action); } } if (st) { st->cmds->disconnect(st); stonith_api_delete(st); } free(name); return rc; } time_t stonith_api_time(uint32_t nodeid, const char *uname, bool in_progress) { int rc = 0; char *name = NULL; time_t when = 0; stonith_t *st = NULL; stonith_history_t *history, *hp = NULL; enum stonith_call_options opts = st_opt_sync_call; st = stonith_api_new(); if (st) { rc = st->cmds->connect(st, "stonith-api", NULL); if(rc != pcmk_ok) { api_log(LOG_NOTICE, "Connection failed: %s (%d)", pcmk_strerror(rc), rc); } } if (uname != NULL) { name = strdup(uname); } else if (nodeid > 0) { opts |= st_opt_cs_nodeid; name = crm_itoa(nodeid); } if (st && rc == pcmk_ok) { int entries = 0; int progress = 0; int completed = 0; rc = st->cmds->history(st, opts, name, &history, 120); for (hp = history; hp; hp = hp->next) { entries++; if (in_progress) { progress++; if (hp->state != st_done && hp->state != st_failed) { when = time(NULL); } } else if (hp->state == st_done) { completed++; if (hp->completed > when) { when = hp->completed; } } } if(rc == pcmk_ok) { api_log(LOG_INFO, "Found %d entries for %u/%s: %d in progress, %d completed", entries, nodeid, uname, progress, completed); } else { api_log(LOG_ERR, "Could not retrieve fence history for %u/%s: %s (%d)", nodeid, uname, pcmk_strerror(rc), rc); } } if (st) { st->cmds->disconnect(st); stonith_api_delete(st); } if(when) { api_log(LOG_INFO, "Node %u/%s last kicked at: %ld", nodeid, uname, (long int)when); } free(name); return when; } #if HAVE_STONITH_STONITH_H # include const char *i_hate_pils(int rc); const char * i_hate_pils(int rc) { return PIL_strerror(rc); } #endif diff --git a/lib/lrmd/Makefile.am b/lib/lrmd/Makefile.am index e98d1e5b50..f961ae1d41 100644 --- a/lib/lrmd/Makefile.am +++ b/lib/lrmd/Makefile.am @@ -1,34 +1,34 @@ # Copyright (c) 2012 David Vossel # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # # MAINTAINERCLEANFILES = Makefile.in AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include \ -I$(top_builddir)/libltdl -I$(top_srcdir)/libltdl \ -I$(top_builddir) -I$(top_srcdir) lib_LTLIBRARIES = liblrmd.la liblrmd_la_SOURCES = lrmd_client.c proxy_common.c -liblrmd_la_LDFLAGS = -version-info 3:0:0 +liblrmd_la_LDFLAGS = -version-info 3:0:2 liblrmd_la_LIBADD = $(top_builddir)/lib/common/libcrmcommon.la \ $(top_builddir)/lib/services/libcrmservice.la \ $(top_builddir)/lib/fencing/libstonithd.la AM_CFLAGS = $(AM_CPPFLAGS) diff --git a/lib/pengine/Makefile.am b/lib/pengine/Makefile.am index 29b72065dc..78da0751ad 100644 --- a/lib/pengine/Makefile.am +++ b/lib/pengine/Makefile.am @@ -1,42 +1,42 @@ # # Copyright (C) 2004 Andrew Beekhof # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include \ -I$(top_builddir)/libltdl -I$(top_srcdir)/libltdl ## libraries lib_LTLIBRARIES = libpe_rules.la libpe_status.la ## SOURCES noinst_HEADERS = unpack.h variant.h libpe_rules_la_LDFLAGS = -version-info 2:4:0 libpe_rules_la_SOURCES = rules.c common.c libpe_rules_la_LIBADD = $(top_builddir)/lib/common/libcrmcommon.la -libpe_status_la_LDFLAGS = -version-info 8:0:0 +libpe_status_la_LDFLAGS = -version-info 8:0:4 libpe_status_la_SOURCES = status.c unpack.c utils.c complex.c native.c group.c clone.c rules.c common.c libpe_status_la_LIBADD = @CURSESLIBS@ $(top_builddir)/lib/common/libcrmcommon.la clean-generic: rm -f *.log *.debug *~ install-exec-local: uninstall-local: diff --git a/pengine/Makefile.am b/pengine/Makefile.am index d14d911e32..31532cfe94 100644 --- a/pengine/Makefile.am +++ b/pengine/Makefile.am @@ -1,86 +1,86 @@ # # Copyright (C) 2004 Andrew Beekhof # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # include $(top_srcdir)/Makefile.common AM_CPPFLAGS += -I$(top_builddir) -I$(top_srcdir) halibdir = $(CRM_DAEMON_DIR) PE_TESTS = $(wildcard test10/*.scores) testdir = $(datadir)/$(PACKAGE)/tests/pengine test_SCRIPTS = regression.sh test_DATA = regression.core.sh test10dir = $(datadir)/$(PACKAGE)/tests/pengine/test10 test10_DATA = $(PE_TESTS) $(PE_TESTS:%.scores=%.xml) $(PE_TESTS:%.scores=%.exp) $(PE_TESTS:%.scores=%.dot) $(PE_TESTS:%.scores=%.summary) $(wildcard test10/*.stderr) COMMONLIBS = \ $(top_builddir)/lib/common/libcrmcommon.la \ $(top_builddir)/lib/pengine/libpe_status.la \ libpengine.la $(CURSESLIBS) $(CLUSTERLIBS) beekhof: echo $(shell ls -1 test10/*.xml) #TESTS = test10/*.xml TESTS = test10/bug-rh-1097457.xml TEST_EXTENSIONS = .xml XML_LOG_COMPILER = ./regression.sh AM_XML_LOG_FLAGS = -V --run #LOG_COMPILER = #AM_LOG_FLAGS = -V ## libraries lib_LTLIBRARIES = libpengine.la ## binary progs halib_PROGRAMS = pengine if BUILD_XML_HELP man7_MANS = pengine.7 endif ## SOURCES noinst_HEADERS = allocate.h utils.h pengine.h #utils.h pengine.h -libpengine_la_LDFLAGS = -version-info 8:0:0 +libpengine_la_LDFLAGS = -version-info 8:0:4 # -L$(top_builddir)/lib/pils -lpils -export-dynamic -module -avoid-version libpengine_la_SOURCES = pengine.c allocate.c utils.c constraints.c libpengine_la_SOURCES += native.c group.c clone.c master.c graph.c utilization.c libpengine_la_LIBADD = $(top_builddir)/lib/pengine/libpe_status.la \ $(top_builddir)/lib/cib/libcib.la pengine_SOURCES = main.c pengine_LDADD = $(top_builddir)/lib/cib/libcib.la $(COMMONLIBS) # libcib for get_object_root() # $(top_builddir)/lib/hbclient/libhbclient.la install-exec-local: $(mkinstalldirs) $(DESTDIR)/$(PE_STATE_DIR) -chown $(CRM_DAEMON_USER) $(DESTDIR)/$(PE_STATE_DIR) -chgrp $(CRM_DAEMON_GROUP) $(DESTDIR)/$(PE_STATE_DIR) -chmod 750 $(DESTDIR)/$(PE_STATE_DIR) uninstall-local: clean-generic: rm -f test10/*.pe.* diff --git a/pengine/regression.sh b/pengine/regression.sh index d57da17718..d184798a02 100755 --- a/pengine/regression.sh +++ b/pengine/regression.sh @@ -1,817 +1,819 @@ #!/bin/bash # Copyright (C) 2004 Andrew Beekhof # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This software is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # core=`dirname $0` . $core/regression.core.sh || exit 1 create_mode="true" info Generating test outputs for these tests... # do_test file description info Done. echo "" info Performing the following tests from $io_dir create_mode="false" echo "" do_test simple1 "Offline " do_test simple2 "Start " do_test simple3 "Start 2 " do_test simple4 "Start Failed" do_test simple6 "Stop Start " do_test simple7 "Shutdown " #do_test simple8 "Stonith " #do_test simple9 "Lower version" #do_test simple10 "Higher version" do_test simple11 "Priority (ne)" do_test simple12 "Priority (eq)" do_test simple8 "Stickiness" echo "" do_test group1 "Group " do_test group2 "Group + Native " do_test group3 "Group + Group " do_test group4 "Group + Native (nothing)" do_test group5 "Group + Native (move) " do_test group6 "Group + Group (move) " do_test group7 "Group colocation" do_test group13 "Group colocation (cant run)" do_test group8 "Group anti-colocation" do_test group9 "Group recovery" do_test group10 "Group partial recovery" do_test group11 "Group target_role" do_test group14 "Group stop (graph terminated)" do_test group15 "-ve group colocation" do_test bug-1573 "Partial stop of a group with two children" do_test bug-1718 "Mandatory group ordering - Stop group_FUN" do_test bug-lf-2613 "Move group on failure" do_test bug-lf-2619 "Move group on clone failure" do_test group-fail "Ensure stop order is preserved for partially active groups" do_test group-unmanaged "No need to restart r115 because r114 is unmanaged" do_test group-unmanaged-stopped "Make sure r115 is stopped when r114 fails" do_test group-dependants "Account for the location preferences of things colocated with a group" echo "" do_test rsc_dep1 "Must not " do_test rsc_dep3 "Must " do_test rsc_dep5 "Must not 3 " do_test rsc_dep7 "Must 3 " do_test rsc_dep10 "Must (but cant)" do_test rsc_dep2 "Must (running) " do_test rsc_dep8 "Must (running : alt) " do_test rsc_dep4 "Must (running + move)" do_test asymmetric "Asymmetric - require explicit location constraints" echo "" do_test orphan-0 "Orphan ignore" do_test orphan-1 "Orphan stop" do_test orphan-2 "Orphan stop, remove failcount" echo "" do_test params-0 "Params: No change" do_test params-1 "Params: Changed" do_test params-2 "Params: Resource definition" do_test params-4 "Params: Reload" do_test params-5 "Params: Restart based on probe digest" do_test novell-251689 "Resource definition change + target_role=stopped" do_test bug-lf-2106 "Restart all anonymous clone instances after config change" do_test params-6 "Params: Detect reload in previously migrated resource" do_test nvpair-id-ref "Support id-ref in nvpair with optional name" do_test not-reschedule-unneeded-monitor "Do not reschedule unneeded monitors while resource definitions have changed" echo "" do_test target-0 "Target Role : baseline" do_test target-1 "Target Role : master" do_test target-2 "Target Role : invalid" echo "" do_test base-score "Set a node's default score for all nodes" echo "" do_test date-1 "Dates" -t "2005-020" do_test date-2 "Date Spec - Pass" -t "2005-020T12:30" do_test date-3 "Date Spec - Fail" -t "2005-020T11:30" do_test origin "Timing of recurring operations" -t "2014-05-07 00:28:00" do_test probe-0 "Probe (anon clone)" do_test probe-1 "Pending Probe" do_test probe-2 "Correctly re-probe cloned groups" do_test probe-3 "Probe (pending node)" do_test probe-4 "Probe (pending node + stopped resource)" --rc 4 do_test standby "Standby" do_test comments "Comments" echo "" do_test one-or-more-0 "Everything starts" do_test one-or-more-1 "Nothing starts because of A" do_test one-or-more-2 "D can start because of C" do_test one-or-more-3 "D cannot start because of B and C" do_test one-or-more-4 "D cannot start because of target-role" do_test one-or-more-5 "Start A and F even though C and D are stopped" do_test one-or-more-6 "Leave A running even though B is stopped" do_test one-or-more-7 "Leave A running even though C is stopped" do_test bug-5140-require-all-false "Allow basegrp:0 to stop" do_test clone-require-all-1 "clone B starts node 3 and 4" do_test clone-require-all-2 "clone B remains stopped everywhere" do_test clone-require-all-3 "clone B stops everywhere because A stops everywhere" do_test clone-require-all-4 "clone B remains on node 3 and 4 with only one instance of A remaining." do_test clone-require-all-5 "clone B starts on node 1 3 and 4" do_test clone-require-all-6 "clone B remains active after shutting down instances of A" do_test clone-require-all-7 "clone A and B both start at the same time. all instances of A start before B." do_test clone-require-all-no-interleave-1 "C starts everywhere after A and B" do_test clone-require-all-no-interleave-2 "C starts on nodes 1, 2, and 4 with only one active instance of B" do_test clone-require-all-no-interleave-3 "C remains active when instance of B is stopped on one node and started on another." do_test one-or-more-unrunnnable-instances "Avoid dependancies on instances that wont ever be started" echo "" do_test order1 "Order start 1 " do_test order2 "Order start 2 " do_test order3 "Order stop " do_test order4 "Order (multiple) " do_test order5 "Order (move) " do_test order6 "Order (move w/ restart) " do_test order7 "Order (manditory) " do_test order-optional "Order (score=0) " do_test order-required "Order (score=INFINITY) " do_test bug-lf-2171 "Prevent group start when clone is stopped" do_test order-clone "Clone ordering should be able to prevent startup of dependant clones" do_test order-sets "Ordering for resource sets" do_test order-serialize "Serialize resources without inhibiting migration" do_test order-serialize-set "Serialize a set of resources without inhibiting migration" do_test clone-order-primitive "Order clone start after a primitive" do_test clone-order-16instances "Verify ordering of 16 cloned resources" do_test order-optional-keyword "Order (optional keyword)" do_test order-mandatory "Order (mandatory keyword)" do_test bug-lf-2493 "Don't imply colocation requirements when applying ordering constraints with clones" do_test ordered-set-basic-startup "Constraint set with default order settings." do_test ordered-set-natural "Allow natural set ordering" do_test order-wrong-kind "Order (error)" echo "" do_test coloc-loop "Colocation - loop" do_test coloc-many-one "Colocation - many-to-one" do_test coloc-list "Colocation - many-to-one with list" do_test coloc-group "Colocation - groups" do_test coloc-slave-anti "Anti-colocation with slave shouldn't prevent master colocation" do_test coloc-attr "Colocation based on node attributes" do_test coloc-negative-group "Negative colocation with a group" do_test coloc-intra-set "Intra-set colocation" do_test bug-lf-2435 "Colocation sets with a negative score" do_test coloc-clone-stays-active "Ensure clones don't get stopped/demoted because a dependant must stop" do_test coloc_fp_logic "Verify floating point calculations in colocation are working" do_test colo_master_w_native "cl#5070 - Verify promotion order is affected when colocating master to native rsc." do_test colo_slave_w_native "cl#5070 - Verify promotion order is affected when colocating slave to native rsc." do_test anti-colocation-order "cl#5187 - Prevent resources in an anti-colocation from even temporarily running on a same node" do_test enforce-colo1 "Always enforce B with A INFINITY." do_test complex_enforce_colo "Always enforce B with A INFINITY. (make sure heat-engine stops)" echo "" do_test rsc-sets-seq-true "Resource Sets - sequential=false" do_test rsc-sets-seq-false "Resource Sets - sequential=true" do_test rsc-sets-clone "Resource Sets - Clone" do_test rsc-sets-master "Resource Sets - Master" do_test rsc-sets-clone-1 "Resource Sets - Clone (lf#2404)" #echo "" #do_test agent1 "version: lt (empty)" #do_test agent2 "version: eq " #do_test agent3 "version: gt " echo "" do_test attrs1 "string: eq (and) " do_test attrs2 "string: lt / gt (and)" do_test attrs3 "string: ne (or) " do_test attrs4 "string: exists " do_test attrs5 "string: not_exists " do_test attrs6 "is_dc: true " do_test attrs7 "is_dc: false " do_test attrs8 "score_attribute " do_test per-node-attrs "Per node resource parameters" echo "" do_test mon-rsc-1 "Schedule Monitor - start" do_test mon-rsc-2 "Schedule Monitor - move " do_test mon-rsc-3 "Schedule Monitor - pending start " do_test mon-rsc-4 "Schedule Monitor - move/pending start" echo "" do_test rec-rsc-0 "Resource Recover - no start " do_test rec-rsc-1 "Resource Recover - start " do_test rec-rsc-2 "Resource Recover - monitor " do_test rec-rsc-3 "Resource Recover - stop - ignore" do_test rec-rsc-4 "Resource Recover - stop - block " do_test rec-rsc-5 "Resource Recover - stop - fence " do_test rec-rsc-6 "Resource Recover - multiple - restart" do_test rec-rsc-7 "Resource Recover - multiple - stop " do_test rec-rsc-8 "Resource Recover - multiple - block " do_test rec-rsc-9 "Resource Recover - group/group" do_test monitor-recovery "on-fail=block + resource recovery detected by recurring monitor" do_test stop-failure-no-quorum "Stop failure without quorum" do_test stop-failure-no-fencing "Stop failure without fencing available" do_test stop-failure-with-fencing "Stop failure with fencing available" echo "" do_test quorum-1 "No quorum - ignore" do_test quorum-2 "No quorum - freeze" do_test quorum-3 "No quorum - stop " do_test quorum-4 "No quorum - start anyway" do_test quorum-5 "No quorum - start anyway (group)" do_test quorum-6 "No quorum - start anyway (clone)" do_test bug-cl-5212 "No promotion with no-quorum-policy=freeze" echo "" do_test rec-node-1 "Node Recover - Startup - no fence" do_test rec-node-2 "Node Recover - Startup - fence " do_test rec-node-3 "Node Recover - HA down - no fence" do_test rec-node-4 "Node Recover - HA down - fence " do_test rec-node-5 "Node Recover - CRM down - no fence" do_test rec-node-6 "Node Recover - CRM down - fence " do_test rec-node-7 "Node Recover - no quorum - ignore " do_test rec-node-8 "Node Recover - no quorum - freeze " do_test rec-node-9 "Node Recover - no quorum - stop " do_test rec-node-10 "Node Recover - no quorum - stop w/fence" do_test rec-node-11 "Node Recover - CRM down w/ group - fence " do_test rec-node-12 "Node Recover - nothing active - fence " do_test rec-node-13 "Node Recover - failed resource + shutdown - fence " do_test rec-node-15 "Node Recover - unknown lrm section" do_test rec-node-14 "Serialize all stonith's" echo "" do_test multi1 "Multiple Active (stop/start)" echo "" do_test migrate-begin "Normal migration" do_test migrate-success "Completed migration" do_test migrate-partial-1 "Completed migration, missing stop on source" do_test migrate-partial-2 "Successful migrate_to only" do_test migrate-partial-3 "Successful migrate_to only, target down" do_test migrate-partial-4 "Migrate from the correct host after migrate_to+migrate_from" do_test bug-5186-partial-migrate "Handle partial migration when src node loses membership" do_test migrate-fail-2 "Failed migrate_from" do_test migrate-fail-3 "Failed migrate_from + stop on source" do_test migrate-fail-4 "Failed migrate_from + stop on target - ideally we wouldn't need to re-stop on target" do_test migrate-fail-5 "Failed migrate_from + stop on source and target" do_test migrate-fail-6 "Failed migrate_to" do_test migrate-fail-7 "Failed migrate_to + stop on source" do_test migrate-fail-8 "Failed migrate_to + stop on target - ideally we wouldn't need to re-stop on target" do_test migrate-fail-9 "Failed migrate_to + stop on source and target" do_test migrate-stop "Migration in a stopping stack" do_test migrate-start "Migration in a starting stack" do_test migrate-stop_start "Migration in a restarting stack" do_test migrate-stop-complex "Migration in a complex stopping stack" do_test migrate-start-complex "Migration in a complex starting stack" do_test migrate-stop-start-complex "Migration in a complex moving stack" do_test migrate-shutdown "Order the post-migration 'stop' before node shutdown" do_test migrate-1 "Migrate (migrate)" do_test migrate-2 "Migrate (stable)" do_test migrate-3 "Migrate (failed migrate_to)" do_test migrate-4 "Migrate (failed migrate_from)" do_test novell-252693 "Migration in a stopping stack" do_test novell-252693-2 "Migration in a starting stack" do_test novell-252693-3 "Non-Migration in a starting and stopping stack" do_test bug-1820 "Migration in a group" do_test bug-1820-1 "Non-migration in a group" do_test migrate-5 "Primitive migration with a clone" do_test migrate-fencing "Migration after Fencing" do_test migrate-both-vms "Migrate two VMs that have no colocation" do_test 1-a-then-bm-move-b "Advanced migrate logic. A then B. migrate B." do_test 2-am-then-b-move-a "Advanced migrate logic, A then B, migrate A without stopping B" do_test 3-am-then-bm-both-migrate "Advanced migrate logic. A then B. migrate both" do_test 4-am-then-bm-b-not-migratable "Advanced migrate logic, A then B, B not migratable" do_test 5-am-then-bm-a-not-migratable "Advanced migrate logic. A then B. move both, a not migratable" do_test 6-migrate-group "Advanced migrate logic, migrate a group" do_test 7-migrate-group-one-unmigratable "Advanced migrate logic, migrate group mixed with allow-migrate true/false" do_test 8-am-then-bm-a-migrating-b-stopping "Advanced migrate logic, A then B, A migrating, B stopping" do_test 9-am-then-bm-b-migrating-a-stopping "Advanced migrate logic, A then B, B migrate, A stopping" do_test 10-a-then-bm-b-move-a-clone "Advanced migrate logic, A clone then B, migrate B while stopping A" do_test 11-a-then-bm-b-move-a-clone-starting "Advanced migrate logic, A clone then B, B moving while A is start/stopping" do_test a-promote-then-b-migrate "A promote then B start. migrate B" do_test a-demote-then-b-migrate "A demote then B stop. migrate B" #echo "" #do_test complex1 "Complex " do_test bug-lf-2422 "Dependancy on partially active group - stop ocfs:*" echo "" do_test clone-anon-probe-1 "Probe the correct (anonymous) clone instance for each node" do_test clone-anon-probe-2 "Avoid needless re-probing of anonymous clones" do_test clone-anon-failcount "Merge failcounts for anonymous clones" do_test inc0 "Incarnation start" do_test inc1 "Incarnation start order" do_test inc2 "Incarnation silent restart, stop, move" do_test inc3 "Inter-incarnation ordering, silent restart, stop, move" do_test inc4 "Inter-incarnation ordering, silent restart, stop, move (ordered)" do_test inc5 "Inter-incarnation ordering, silent restart, stop, move (restart 1)" do_test inc6 "Inter-incarnation ordering, silent restart, stop, move (restart 2)" do_test inc7 "Clone colocation" do_test inc8 "Clone anti-colocation" do_test inc9 "Non-unique clone" do_test inc10 "Non-unique clone (stop)" do_test inc11 "Primitive colocation with clones" do_test inc12 "Clone shutdown" do_test cloned-group "Make sure only the correct number of cloned groups are started" do_test cloned-group-stop "Ensure stopping qpidd also stops glance and cinder" do_test clone-no-shuffle "Dont prioritize allocation of instances that must be moved" do_test clone-max-zero "Orphan processing with clone-max=0" do_test clone-anon-dup "Bug LF#2087 - Correctly parse the state of anonymous clones that are active more than once per node" do_test bug-lf-2160 "Dont shuffle clones due to colocation" do_test bug-lf-2213 "clone-node-max enforcement for cloned groups" do_test bug-lf-2153 "Clone ordering constraints" do_test bug-lf-2361 "Ensure clones observe mandatory ordering constraints if the LHS is unrunnable" do_test bug-lf-2317 "Avoid needless restart of primitive depending on a clone" do_test clone-colocate-instance-1 "Colocation with a specific clone instance (negative example)" do_test clone-colocate-instance-2 "Colocation with a specific clone instance" do_test clone-order-instance "Ordering with specific clone instances" do_test bug-lf-2453 "Enforce mandatory clone ordering without colocation" do_test bug-lf-2508 "Correctly reconstruct the status of anonymous cloned groups" do_test bug-lf-2544 "Balanced clone placement" do_test bug-lf-2445 "Redistribute clones with node-max > 1 and stickiness = 0" do_test bug-lf-2574 "Avoid clone shuffle" do_test bug-lf-2581 "Avoid group restart due to unrelated clone (re)start" do_test bug-cl-5168 "Don't shuffle clones" do_test bug-cl-5170 "Prevent clone from starting with on-fail=block" do_test clone-fail-block-colocation "Move colocated group when failed clone has on-fail=block" do_test clone-interleave-1 "Clone-3 cannot start on pcmk-1 due to interleaved ordering (no colocation)" do_test clone-interleave-2 "Clone-3 must stop on pcmk-1 due to interleaved ordering (no colocation)" do_test clone-interleave-3 "Clone-3 must be recovered on pcmk-1 due to interleaved ordering (no colocation)" echo "" do_test unfence-startup "Clean unfencing" do_test unfence-definition "Unfencing when the agent changes" do_test unfence-parameters "Unfencing when the agent parameters changes" echo "" do_test master-0 "Stopped -> Slave" do_test master-1 "Stopped -> Promote" do_test master-2 "Stopped -> Promote : notify" do_test master-3 "Stopped -> Promote : master location" do_test master-4 "Started -> Promote : master location" do_test master-5 "Promoted -> Promoted" do_test master-6 "Promoted -> Promoted (2)" do_test master-7 "Promoted -> Fenced" do_test master-8 "Promoted -> Fenced -> Moved" do_test master-9 "Stopped + Promotable + No quorum" do_test master-10 "Stopped -> Promotable : notify with monitor" do_test master-11 "Stopped -> Promote : colocation" do_test novell-239082 "Demote/Promote ordering" do_test novell-239087 "Stable master placement" do_test master-12 "Promotion based solely on rsc_location constraints" do_test master-13 "Include preferences of colocated resources when placing master" do_test master-demote "Ordering when actions depends on demoting a slave resource" do_test master-ordering "Prevent resources from starting that need a master" do_test bug-1765 "Master-Master Colocation (dont stop the slaves)" do_test master-group "Promotion of cloned groups" do_test bug-lf-1852 "Don't shuffle master/slave instances unnecessarily" do_test master-failed-demote "Dont retry failed demote actions" do_test master-failed-demote-2 "Dont retry failed demote actions (notify=false)" do_test master-depend "Ensure resources that depend on the master don't get allocated until the master does" do_test master-reattach "Re-attach to a running master" do_test master-allow-start "Don't include master score if it would prevent allocation" do_test master-colocation "Allow master instances placemaker to be influenced by colocation constraints" do_test master-pseudo "Make sure promote/demote pseudo actions are created correctly" do_test master-role "Prevent target-role from promoting more than master-max instances" do_test bug-lf-2358 "Master-Master anti-colocation" do_test master-promotion-constraint "Mandatory master colocation constraints" do_test unmanaged-master "Ensure role is preserved for unmanaged resources" do_test master-unmanaged-monitor "Start the correct monitor operation for unmanaged masters" do_test master-demote-2 "Demote does not clear past failure" do_test master-move "Move master based on failure of colocated group" do_test master-probed-score "Observe the promotion score of probed resources" do_test colocation_constraint_stops_master "cl#5054 - Ensure master is demoted when stopped by colocation constraint" do_test colocation_constraint_stops_slave "cl#5054 - Ensure slave is not demoted when stopped by colocation constraint" do_test order_constraint_stops_master "cl#5054 - Ensure master is demoted when stopped by order constraint" do_test order_constraint_stops_slave "cl#5054 - Ensure slave is not demoted when stopped by order constraint" do_test master_monitor_restart "cl#5072 - Ensure master monitor operation will start after promotion." do_test bug-rh-880249 "Handle replacement of an m/s resource with a primitive" do_test bug-5143-ms-shuffle "Prevent master shuffling due to promotion score" do_test master-demote-block "Block promotion if demote fails with on-fail=block" do_test master-dependant-ban "Don't stop instances from being active because a dependant is banned from that host" do_test master-stop "Stop instances due to location constraint with role=Started" do_test master-partially-demoted-group "Allow partially demoted group to finish demoting" do_test bug-cl-5213 "Ensure role colocation with -INFINITY is enforced" do_test bug-cl-5219 "Allow unrelated resources with a common colocation target to remain promoted" do_test master-asymmetrical-order "Fix the behaviors of multi-state resources with asymmetrical ordering" do_test master-notify "Master promotion with notifies" echo "" do_test history-1 "Correctly parse stateful-1 resource state" echo "" do_test managed-0 "Managed (reference)" do_test managed-1 "Not managed - down " do_test managed-2 "Not managed - up " do_test bug-5028 "Shutdown should block if anything depends on an unmanaged resource" do_test bug-5028-detach "Ensure detach still works" do_test bug-5028-bottom "Ensure shutdown still blocks if the blocked resource is at the bottom of the stack" do_test unmanaged-stop-1 "cl#5155 - Block the stop of resources if any depending resource is unmanaged " do_test unmanaged-stop-2 "cl#5155 - Block the stop of resources if the first resource in a mandatory stop order is unmanaged " do_test unmanaged-stop-3 "cl#5155 - Block the stop of resources if any depending resource in a group is unmanaged " do_test unmanaged-stop-4 "cl#5155 - Block the stop of resources if any depending resource in the middle of a group is unmanaged " do_test unmanaged-block-restart "Block restart of resources if any dependent resource in a group is unmanaged" echo "" do_test interleave-0 "Interleave (reference)" do_test interleave-1 "coloc - not interleaved" do_test interleave-2 "coloc - interleaved " do_test interleave-3 "coloc - interleaved (2)" do_test interleave-pseudo-stop "Interleaved clone during stonith" do_test interleave-stop "Interleaved clone during stop" do_test interleave-restart "Interleaved clone during dependancy restart" echo "" do_test notify-0 "Notify reference" do_test notify-1 "Notify simple" do_test notify-2 "Notify simple, confirm" do_test notify-3 "Notify move, confirm" do_test novell-239079 "Notification priority" #do_test notify-2 "Notify - 764" echo "" do_test 594 "OSDL #594 - Unrunnable actions scheduled in transition" do_test 662 "OSDL #662 - Two resources start on one node when incarnation_node_max = 1" do_test 696 "OSDL #696 - CRM starts stonith RA without monitor" do_test 726 "OSDL #726 - Attempting to schedule rsc_posic041_monitor_5000 _after_ a stop" do_test 735 "OSDL #735 - Correctly detect that rsc_hadev1 is stopped on hadev3" do_test 764 "OSDL #764 - Missing monitor op for DoFencing:child_DoFencing:1" do_test 797 "OSDL #797 - Assert triggered: task_id_i > max_call_id" do_test 829 "OSDL #829" do_test 994 "OSDL #994 - Stopping the last resource in a resource group causes the entire group to be restarted" do_test 994-2 "OSDL #994 - with a dependant resource" do_test 1360 "OSDL #1360 - Clone stickiness" do_test 1484 "OSDL #1484 - on_fail=stop" do_test 1494 "OSDL #1494 - Clone stability" do_test unrunnable-1 "Unrunnable" do_test stonith-0 "Stonith loop - 1" do_test stonith-1 "Stonith loop - 2" do_test stonith-2 "Stonith loop - 3" do_test stonith-3 "Stonith startup" do_test stonith-4 "Stonith node state" --rc 4 do_test bug-1572-1 "Recovery of groups depending on master/slave" do_test bug-1572-2 "Recovery of groups depending on master/slave when the master is never re-promoted" do_test bug-1685 "Depends-on-master ordering" do_test bug-1822 "Dont promote partially active groups" do_test bug-pm-11 "New resource added to a m/s group" do_test bug-pm-12 "Recover only the failed portion of a cloned group" do_test bug-n-387749 "Don't shuffle clone instances" do_test bug-n-385265 "Don't ignore the failure stickiness of group children - resource_idvscommon should stay stopped" do_test bug-n-385265-2 "Ensure groups are migrated instead of remaining partially active on the current node" do_test bug-lf-1920 "Correctly handle probes that find active resources" do_test bnc-515172 "Location constraint with multiple expressions" do_test colocate-primitive-with-clone "Optional colocation with a clone" do_test use-after-free-merge "Use-after-free in native_merge_weights" do_test bug-lf-2551 "STONITH ordering for stop" do_test bug-lf-2606 "Stonith implies demote" do_test bug-lf-2474 "Ensure resource op timeout takes precedence over op_defaults" do_test bug-suse-707150 "Prevent vm-01 from starting due to colocation/ordering" do_test bug-5014-A-start-B-start "Verify when A starts B starts using symmetrical=false" do_test bug-5014-A-stop-B-started "Verify when A stops B does not stop if it has already started using symmetric=false" do_test bug-5014-A-stopped-B-stopped "Verify when A is stopped and B has not started, B does not start before A using symmetric=false" do_test bug-5014-CthenAthenB-C-stopped "Verify when C then A is symmetrical=true, A then B is symmetric=false, and C is stopped that nothing starts." do_test bug-5014-CLONE-A-start-B-start "Verify when A starts B starts using clone resources with symmetric=false" do_test bug-5014-CLONE-A-stop-B-started "Verify when A stops B does not stop if it has already started using clone resources with symmetric=false." do_test bug-5014-GROUP-A-start-B-start "Verify when A starts B starts when using group resources with symmetric=false." do_test bug-5014-GROUP-A-stopped-B-started "Verify when A stops B does not stop if it has already started using group resources with symmetric=false." do_test bug-5014-GROUP-A-stopped-B-stopped "Verify when A is stopped and B has not started, B does not start before A using group resources with symmetric=false." do_test bug-5014-ordered-set-symmetrical-false "Verify ordered sets work with symmetrical=false" do_test bug-5014-ordered-set-symmetrical-true "Verify ordered sets work with symmetrical=true" do_test bug-5007-masterslave_colocation "Verify use of colocation scores other than INFINITY and -INFINITY work on multi-state resources." do_test bug-5038 "Prevent restart of anonymous clones when clone-max decreases" do_test bug-5025-1 "Automatically clean up failcount after resource config change with reload" do_test bug-5025-2 "Make sure clear failcount action isn't set when config does not change." do_test bug-5025-3 "Automatically clean up failcount after resource config change with restart" do_test bug-5025-4 "Clear failcount when last failure is a start op and rsc attributes changed." do_test failcount "Ensure failcounts are correctly expired" do_test failcount-block "Ensure failcounts are not expired when on-fail=block is present" do_test monitor-onfail-restart "bug-5058 - Monitor failure with on-fail set to restart" do_test monitor-onfail-stop "bug-5058 - Monitor failure wiht on-fail set to stop" do_test bug-5059 "No need to restart p_stateful1:*" do_test bug-5069-op-enabled "Test on-fail=ignore with failure when monitor is enabled." do_test bug-5069-op-disabled "Test on-fail-ignore with failure when monitor is disabled." do_test obsolete-lrm-resource "cl#5115 - Do not use obsolete lrm_resource sections" do_test expire-non-blocked-failure "Ignore failure-timeout only if the failed operation has on-fail=block" do_test ignore_stonith_rsc_order1 "cl#5056- Ignore order constraint between stonith and non-stonith rsc." do_test ignore_stonith_rsc_order2 "cl#5056- Ignore order constraint with group rsc containing mixed stonith and non-stonith." do_test ignore_stonith_rsc_order3 "cl#5056- Ignore order constraint, stonith clone and mixed group" do_test ignore_stonith_rsc_order4 "cl#5056- Ignore order constraint, stonith clone and clone with nested mixed group" do_test honor_stonith_rsc_order1 "cl#5056- Honor order constraint, stonith clone and pure stonith group(single rsc)." do_test honor_stonith_rsc_order2 "cl#5056- Honor order constraint, stonith clone and pure stonith group(multiple rsc)" do_test honor_stonith_rsc_order3 "cl#5056- Honor order constraint, stonith clones with nested pure stonith group." do_test honor_stonith_rsc_order4 "cl#5056- Honor order constraint, between two native stonith rscs." do_test probe-timeout "cl#5099 - Default probe timeout" echo "" do_test systemhealth1 "System Health () #1" do_test systemhealth2 "System Health () #2" do_test systemhealth3 "System Health () #3" do_test systemhealthn1 "System Health (None) #1" do_test systemhealthn2 "System Health (None) #2" do_test systemhealthn3 "System Health (None) #3" do_test systemhealthm1 "System Health (Migrate On Red) #1" do_test systemhealthm2 "System Health (Migrate On Red) #2" do_test systemhealthm3 "System Health (Migrate On Red) #3" do_test systemhealtho1 "System Health (Only Green) #1" do_test systemhealtho2 "System Health (Only Green) #2" do_test systemhealtho3 "System Health (Only Green) #3" do_test systemhealthp1 "System Health (Progessive) #1" do_test systemhealthp2 "System Health (Progessive) #2" do_test systemhealthp3 "System Health (Progessive) #3" echo "" do_test utilization "Placement Strategy - utilization" do_test minimal "Placement Strategy - minimal" do_test balanced "Placement Strategy - balanced" echo "" do_test placement-stickiness "Optimized Placement Strategy - stickiness" do_test placement-priority "Optimized Placement Strategy - priority" do_test placement-location "Optimized Placement Strategy - location" do_test placement-capacity "Optimized Placement Strategy - capacity" echo "" do_test utilization-order1 "Utilization Order - Simple" do_test utilization-order2 "Utilization Order - Complex" do_test utilization-order3 "Utilization Order - Migrate" do_test utilization-order4 "Utilization Order - Live Mirgration (bnc#695440)" do_test utilization-shuffle "Don't displace prmExPostgreSQLDB2 on act2, Start prmExPostgreSQLDB1 on act3" do_test load-stopped-loop "Avoid transition loop due to load_stopped (cl#5044)" do_test load-stopped-loop-2 "cl#5235 - Prevent graph loops that can be introduced by load_stopped -> migrate_to ordering" echo "" do_test colocated-utilization-primitive-1 "Colocated Utilization - Primitive" do_test colocated-utilization-primitive-2 "Colocated Utilization - Choose the most capable node" do_test colocated-utilization-group "Colocated Utilization - Group" do_test colocated-utilization-clone "Colocated Utilization - Clone" +do_test utilization-check-allowed-nodes "Only check the capacities of the nodes that can run the resource" + echo "" do_test reprobe-target_rc "Ensure correct target_rc for reprobe of inactive resources" do_test node-maintenance-1 "cl#5128 - Node maintenance" do_test node-maintenance-2 "cl#5128 - Node maintenance (coming out of maintenance mode)" do_test rsc-maintenance "Per-resource maintenance" echo "" do_test not-installed-agent "The resource agent is missing" do_test not-installed-tools "Something the resource agent needs is missing" echo "" do_test stopped-monitor-00 "Stopped Monitor - initial start" do_test stopped-monitor-01 "Stopped Monitor - failed started" do_test stopped-monitor-02 "Stopped Monitor - started multi-up" do_test stopped-monitor-03 "Stopped Monitor - stop started" do_test stopped-monitor-04 "Stopped Monitor - failed stop" do_test stopped-monitor-05 "Stopped Monitor - start unmanaged" do_test stopped-monitor-06 "Stopped Monitor - unmanaged multi-up" do_test stopped-monitor-07 "Stopped Monitor - start unmanaged multi-up" do_test stopped-monitor-08 "Stopped Monitor - migrate" do_test stopped-monitor-09 "Stopped Monitor - unmanage started" do_test stopped-monitor-10 "Stopped Monitor - unmanaged started multi-up" do_test stopped-monitor-11 "Stopped Monitor - stop unmanaged started" do_test stopped-monitor-12 "Stopped Monitor - unmanaged started multi-up (targer-role="Stopped")" do_test stopped-monitor-20 "Stopped Monitor - initial stop" do_test stopped-monitor-21 "Stopped Monitor - stopped single-up" do_test stopped-monitor-22 "Stopped Monitor - stopped multi-up" do_test stopped-monitor-23 "Stopped Monitor - start stopped" do_test stopped-monitor-24 "Stopped Monitor - unmanage stopped" do_test stopped-monitor-25 "Stopped Monitor - unmanaged stopped multi-up" do_test stopped-monitor-26 "Stopped Monitor - start unmanaged stopped" do_test stopped-monitor-27 "Stopped Monitor - unmanaged stopped multi-up (target-role="Started")" do_test stopped-monitor-30 "Stopped Monitor - new node started" do_test stopped-monitor-31 "Stopped Monitor - new node stopped" echo"" do_test ticket-primitive-1 "Ticket - Primitive (loss-policy=stop, initial)" do_test ticket-primitive-2 "Ticket - Primitive (loss-policy=stop, granted)" do_test ticket-primitive-3 "Ticket - Primitive (loss-policy-stop, revoked)" do_test ticket-primitive-4 "Ticket - Primitive (loss-policy=demote, initial)" do_test ticket-primitive-5 "Ticket - Primitive (loss-policy=demote, granted)" do_test ticket-primitive-6 "Ticket - Primitive (loss-policy=demote, revoked)" do_test ticket-primitive-7 "Ticket - Primitive (loss-policy=fence, initial)" do_test ticket-primitive-8 "Ticket - Primitive (loss-policy=fence, granted)" do_test ticket-primitive-9 "Ticket - Primitive (loss-policy=fence, revoked)" do_test ticket-primitive-10 "Ticket - Primitive (loss-policy=freeze, initial)" do_test ticket-primitive-11 "Ticket - Primitive (loss-policy=freeze, granted)" do_test ticket-primitive-12 "Ticket - Primitive (loss-policy=freeze, revoked)" do_test ticket-primitive-13 "Ticket - Primitive (loss-policy=stop, standby, granted)" do_test ticket-primitive-14 "Ticket - Primitive (loss-policy=stop, granted, standby)" do_test ticket-primitive-15 "Ticket - Primitive (loss-policy=stop, standby, revoked)" do_test ticket-primitive-16 "Ticket - Primitive (loss-policy=demote, standby, granted)" do_test ticket-primitive-17 "Ticket - Primitive (loss-policy=demote, granted, standby)" do_test ticket-primitive-18 "Ticket - Primitive (loss-policy=demote, standby, revoked)" do_test ticket-primitive-19 "Ticket - Primitive (loss-policy=fence, standby, granted)" do_test ticket-primitive-20 "Ticket - Primitive (loss-policy=fence, granted, standby)" do_test ticket-primitive-21 "Ticket - Primitive (loss-policy=fence, standby, revoked)" do_test ticket-primitive-22 "Ticket - Primitive (loss-policy=freeze, standby, granted)" do_test ticket-primitive-23 "Ticket - Primitive (loss-policy=freeze, granted, standby)" do_test ticket-primitive-24 "Ticket - Primitive (loss-policy=freeze, standby, revoked)" echo"" do_test ticket-group-1 "Ticket - Group (loss-policy=stop, initial)" do_test ticket-group-2 "Ticket - Group (loss-policy=stop, granted)" do_test ticket-group-3 "Ticket - Group (loss-policy-stop, revoked)" do_test ticket-group-4 "Ticket - Group (loss-policy=demote, initial)" do_test ticket-group-5 "Ticket - Group (loss-policy=demote, granted)" do_test ticket-group-6 "Ticket - Group (loss-policy=demote, revoked)" do_test ticket-group-7 "Ticket - Group (loss-policy=fence, initial)" do_test ticket-group-8 "Ticket - Group (loss-policy=fence, granted)" do_test ticket-group-9 "Ticket - Group (loss-policy=fence, revoked)" do_test ticket-group-10 "Ticket - Group (loss-policy=freeze, initial)" do_test ticket-group-11 "Ticket - Group (loss-policy=freeze, granted)" do_test ticket-group-12 "Ticket - Group (loss-policy=freeze, revoked)" do_test ticket-group-13 "Ticket - Group (loss-policy=stop, standby, granted)" do_test ticket-group-14 "Ticket - Group (loss-policy=stop, granted, standby)" do_test ticket-group-15 "Ticket - Group (loss-policy=stop, standby, revoked)" do_test ticket-group-16 "Ticket - Group (loss-policy=demote, standby, granted)" do_test ticket-group-17 "Ticket - Group (loss-policy=demote, granted, standby)" do_test ticket-group-18 "Ticket - Group (loss-policy=demote, standby, revoked)" do_test ticket-group-19 "Ticket - Group (loss-policy=fence, standby, granted)" do_test ticket-group-20 "Ticket - Group (loss-policy=fence, granted, standby)" do_test ticket-group-21 "Ticket - Group (loss-policy=fence, standby, revoked)" do_test ticket-group-22 "Ticket - Group (loss-policy=freeze, standby, granted)" do_test ticket-group-23 "Ticket - Group (loss-policy=freeze, granted, standby)" do_test ticket-group-24 "Ticket - Group (loss-policy=freeze, standby, revoked)" echo"" do_test ticket-clone-1 "Ticket - Clone (loss-policy=stop, initial)" do_test ticket-clone-2 "Ticket - Clone (loss-policy=stop, granted)" do_test ticket-clone-3 "Ticket - Clone (loss-policy-stop, revoked)" do_test ticket-clone-4 "Ticket - Clone (loss-policy=demote, initial)" do_test ticket-clone-5 "Ticket - Clone (loss-policy=demote, granted)" do_test ticket-clone-6 "Ticket - Clone (loss-policy=demote, revoked)" do_test ticket-clone-7 "Ticket - Clone (loss-policy=fence, initial)" do_test ticket-clone-8 "Ticket - Clone (loss-policy=fence, granted)" do_test ticket-clone-9 "Ticket - Clone (loss-policy=fence, revoked)" do_test ticket-clone-10 "Ticket - Clone (loss-policy=freeze, initial)" do_test ticket-clone-11 "Ticket - Clone (loss-policy=freeze, granted)" do_test ticket-clone-12 "Ticket - Clone (loss-policy=freeze, revoked)" do_test ticket-clone-13 "Ticket - Clone (loss-policy=stop, standby, granted)" do_test ticket-clone-14 "Ticket - Clone (loss-policy=stop, granted, standby)" do_test ticket-clone-15 "Ticket - Clone (loss-policy=stop, standby, revoked)" do_test ticket-clone-16 "Ticket - Clone (loss-policy=demote, standby, granted)" do_test ticket-clone-17 "Ticket - Clone (loss-policy=demote, granted, standby)" do_test ticket-clone-18 "Ticket - Clone (loss-policy=demote, standby, revoked)" do_test ticket-clone-19 "Ticket - Clone (loss-policy=fence, standby, granted)" do_test ticket-clone-20 "Ticket - Clone (loss-policy=fence, granted, standby)" do_test ticket-clone-21 "Ticket - Clone (loss-policy=fence, standby, revoked)" do_test ticket-clone-22 "Ticket - Clone (loss-policy=freeze, standby, granted)" do_test ticket-clone-23 "Ticket - Clone (loss-policy=freeze, granted, standby)" do_test ticket-clone-24 "Ticket - Clone (loss-policy=freeze, standby, revoked)" echo"" do_test ticket-master-1 "Ticket - Master (loss-policy=stop, initial)" do_test ticket-master-2 "Ticket - Master (loss-policy=stop, granted)" do_test ticket-master-3 "Ticket - Master (loss-policy-stop, revoked)" do_test ticket-master-4 "Ticket - Master (loss-policy=demote, initial)" do_test ticket-master-5 "Ticket - Master (loss-policy=demote, granted)" do_test ticket-master-6 "Ticket - Master (loss-policy=demote, revoked)" do_test ticket-master-7 "Ticket - Master (loss-policy=fence, initial)" do_test ticket-master-8 "Ticket - Master (loss-policy=fence, granted)" do_test ticket-master-9 "Ticket - Master (loss-policy=fence, revoked)" do_test ticket-master-10 "Ticket - Master (loss-policy=freeze, initial)" do_test ticket-master-11 "Ticket - Master (loss-policy=freeze, granted)" do_test ticket-master-12 "Ticket - Master (loss-policy=freeze, revoked)" do_test ticket-master-13 "Ticket - Master (loss-policy=stop, standby, granted)" do_test ticket-master-14 "Ticket - Master (loss-policy=stop, granted, standby)" do_test ticket-master-15 "Ticket - Master (loss-policy=stop, standby, revoked)" do_test ticket-master-16 "Ticket - Master (loss-policy=demote, standby, granted)" do_test ticket-master-17 "Ticket - Master (loss-policy=demote, granted, standby)" do_test ticket-master-18 "Ticket - Master (loss-policy=demote, standby, revoked)" do_test ticket-master-19 "Ticket - Master (loss-policy=fence, standby, granted)" do_test ticket-master-20 "Ticket - Master (loss-policy=fence, granted, standby)" do_test ticket-master-21 "Ticket - Master (loss-policy=fence, standby, revoked)" do_test ticket-master-22 "Ticket - Master (loss-policy=freeze, standby, granted)" do_test ticket-master-23 "Ticket - Master (loss-policy=freeze, granted, standby)" do_test ticket-master-24 "Ticket - Master (loss-policy=freeze, standby, revoked)" echo "" do_test ticket-rsc-sets-1 "Ticket - Resource sets (1 ticket, initial)" do_test ticket-rsc-sets-2 "Ticket - Resource sets (1 ticket, granted)" do_test ticket-rsc-sets-3 "Ticket - Resource sets (1 ticket, revoked)" do_test ticket-rsc-sets-4 "Ticket - Resource sets (2 tickets, initial)" do_test ticket-rsc-sets-5 "Ticket - Resource sets (2 tickets, granted)" do_test ticket-rsc-sets-6 "Ticket - Resource sets (2 tickets, granted)" do_test ticket-rsc-sets-7 "Ticket - Resource sets (2 tickets, revoked)" do_test ticket-rsc-sets-8 "Ticket - Resource sets (1 ticket, standby, granted)" do_test ticket-rsc-sets-9 "Ticket - Resource sets (1 ticket, granted, standby)" do_test ticket-rsc-sets-10 "Ticket - Resource sets (1 ticket, standby, revoked)" do_test ticket-rsc-sets-11 "Ticket - Resource sets (2 tickets, standby, granted)" do_test ticket-rsc-sets-12 "Ticket - Resource sets (2 tickets, standby, granted)" do_test ticket-rsc-sets-13 "Ticket - Resource sets (2 tickets, granted, standby)" do_test ticket-rsc-sets-14 "Ticket - Resource sets (2 tickets, standby, revoked)" do_test cluster-specific-params "Cluster-specific instance attributes based on rules" do_test site-specific-params "Site-specific instance attributes based on rules" echo "" do_test template-1 "Template - 1" do_test template-2 "Template - 2" do_test template-3 "Template - 3 (merge operations)" do_test template-coloc-1 "Template - Colocation 1" do_test template-coloc-2 "Template - Colocation 2" do_test template-coloc-3 "Template - Colocation 3" do_test template-order-1 "Template - Order 1" do_test template-order-2 "Template - Order 2" do_test template-order-3 "Template - Order 3" do_test template-ticket "Template - Ticket" do_test template-rsc-sets-1 "Template - Resource Sets 1" do_test template-rsc-sets-2 "Template - Resource Sets 2" do_test template-rsc-sets-3 "Template - Resource Sets 3" do_test template-rsc-sets-4 "Template - Resource Sets 4" do_test template-clone-primitive "Cloned primitive from template" do_test template-clone-group "Cloned group from template" do_test location-sets-templates "Resource sets and templates - Location" do_test tags-coloc-order-1 "Tags - Colocation and Order (Simple)" do_test tags-coloc-order-2 "Tags - Colocation and Order (Resource Sets with Templates)" do_test tags-location "Tags - Location" do_test tags-ticket "Tags - Ticket" echo "" do_test container-1 "Container - initial" do_test container-2 "Container - monitor failed" do_test container-3 "Container - stop failed" do_test container-4 "Container - reached migration-threshold" do_test container-group-1 "Container in group - initial" do_test container-group-2 "Container in group - monitor failed" do_test container-group-3 "Container in group - stop failed" do_test container-group-4 "Container in group - reached migration-threshold" do_test container-is-remote-node "Place resource within container when container is remote-node" do_test bug-rh-1097457 "Kill user defined container/contents ordering" echo "" do_test whitebox-fail1 "Fail whitebox container rsc." do_test whitebox-fail2 "Fail whitebox container rsc lrmd connection." do_test whitebox-fail3 "Failed containers should not run nested on remote nodes." do_test whitebox-start "Start whitebox container with resources assigned to it" do_test whitebox-stop "Stop whitebox container with resources assigned to it" do_test whitebox-move "Move whitebox container with resources assigned to it" do_test whitebox-asymmetric "Verify connection rsc opts-in based on container resource" do_test whitebox-ms-ordering "Verify promote/demote can not occur before connection is established" do_test whitebox-orphaned "Properly shutdown orphaned whitebox container" do_test whitebox-orphan-ms "Properly tear down orphan ms resources on remote-nodes" do_test whitebox-unexpectedly-running "Recover container nodes the cluster did not start." do_test whitebox-migrate1 "Migrate both container and connection resource" do_test whitebox-imply-stop-on-fence "imply stop action on container node rsc when host node is fenced" do_test whitebox-nested-group "Verify guest remote-node works nested in a group" echo "" do_test remote-startup-probes "Baremetal remote-node startup probes" do_test remote-startup "Startup a newly discovered remote-nodes with no status." do_test remote-fence-unclean "Fence unclean baremetal remote-node" do_test remote-fence-unclean2 "Fence baremetal remote-node after cluster node fails and connection can not be recovered" do_test remote-move "Move remote-node connection resource" do_test remote-disable "Disable a baremetal remote-node" do_test remote-orphaned "Properly shutdown orphaned connection resource" do_test remote-orphaned2 "verify we can handle orphaned remote connections with active resources on the remote" do_test remote-recover "Recover connection resource after cluster-node fails." do_test remote-stale-node-entry "Make sure we properly handle leftover remote-node entries in the node section" do_test remote-partial-migrate "Make sure partial migrations are handled before ops on the remote node." do_test remote-partial-migrate2 "Make sure partial migration target is prefered for remote connection." do_test remote-recover-fail "Make sure start failure causes fencing if rsc are active on remote." do_test remote-start-fail "Make sure a start failure does not result in fencing if no active resources are on remote." do_test remote-unclean2 "Make monitor failure always results in fencing, even if no rsc are active on remote." do_test remote-fence-before-reconnect "Fence before clearing recurring monitor failure" echo "" do_test resource-discovery "Exercises resource-discovery location constraint option." do_test rsc-discovery-per-node "Disable resource discovery per node" echo "" do_test isolation-start-all "Start docker isolated resources." do_test isolation-restart-all "Restart docker isolated resources." do_test isolation-clone "Cloned isolated primitive." echo "" test_results diff --git a/pengine/test10/utilization-check-allowed-nodes.dot b/pengine/test10/utilization-check-allowed-nodes.dot new file mode 100644 index 0000000000..d09efbc097 --- /dev/null +++ b/pengine/test10/utilization-check-allowed-nodes.dot @@ -0,0 +1,19 @@ +digraph "g" { +"load_stopped_node1 node1" [ style=bold color="green" fontcolor="orange"] +"load_stopped_node2 node2" [ style=bold color="green" fontcolor="orange"] +"probe_complete node1" -> "probe_complete" [ style = bold] +"probe_complete node1" [ style=bold color="green" fontcolor="black"] +"probe_complete node2" -> "probe_complete" [ style = bold] +"probe_complete node2" [ style=bold color="green" fontcolor="black"] +"probe_complete" -> "rsc1_start_0 node2" [ style = bold] +"probe_complete" [ style=bold color="green" fontcolor="orange"] +"rsc1_monitor_0 node1" -> "probe_complete node1" [ style = bold] +"rsc1_monitor_0 node1" [ style=bold color="green" fontcolor="black"] +"rsc1_monitor_0 node2" -> "probe_complete node2" [ style = bold] +"rsc1_monitor_0 node2" [ style=bold color="green" fontcolor="black"] +"rsc1_start_0 node2" [ style=bold color="green" fontcolor="black"] +"rsc2_monitor_0 node1" -> "probe_complete node1" [ style = bold] +"rsc2_monitor_0 node1" [ style=bold color="green" fontcolor="black"] +"rsc2_monitor_0 node2" -> "probe_complete node2" [ style = bold] +"rsc2_monitor_0 node2" [ style=bold color="green" fontcolor="black"] +} diff --git a/pengine/test10/utilization-check-allowed-nodes.exp b/pengine/test10/utilization-check-allowed-nodes.exp new file mode 100644 index 0000000000..134ccb383c --- /dev/null +++ b/pengine/test10/utilization-check-allowed-nodes.exp @@ -0,0 +1,112 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/pengine/test10/utilization-check-allowed-nodes.scores b/pengine/test10/utilization-check-allowed-nodes.scores new file mode 100644 index 0000000000..26887e2f62 --- /dev/null +++ b/pengine/test10/utilization-check-allowed-nodes.scores @@ -0,0 +1,5 @@ +Allocation scores: +native_color: rsc1 allocation score on node1: -INFINITY +native_color: rsc1 allocation score on node2: 0 +native_color: rsc2 allocation score on node1: -INFINITY +native_color: rsc2 allocation score on node2: 0 diff --git a/pengine/test10/utilization-check-allowed-nodes.summary b/pengine/test10/utilization-check-allowed-nodes.summary new file mode 100644 index 0000000000..12bf19a9cd --- /dev/null +++ b/pengine/test10/utilization-check-allowed-nodes.summary @@ -0,0 +1,26 @@ + +Current cluster status: +Online: [ node1 node2 ] + + rsc1 (ocf::pacemaker:Dummy): Stopped + rsc2 (ocf::pacemaker:Dummy): Stopped + +Transition Summary: + * Start rsc1 (node2) + +Executing cluster transition: + * Resource action: rsc1 monitor on node2 + * Resource action: rsc1 monitor on node1 + * Resource action: rsc2 monitor on node2 + * Resource action: rsc2 monitor on node1 + * Pseudo action: probe_complete + * Pseudo action: load_stopped_node1 + * Pseudo action: load_stopped_node2 + * Resource action: rsc1 start on node2 + +Revised cluster status: +Online: [ node1 node2 ] + + rsc1 (ocf::pacemaker:Dummy): Started node2 + rsc2 (ocf::pacemaker:Dummy): Stopped + diff --git a/pengine/test10/utilization-check-allowed-nodes.xml b/pengine/test10/utilization-check-allowed-nodes.xml new file mode 100644 index 0000000000..39cf51f9b6 --- /dev/null +++ b/pengine/test10/utilization-check-allowed-nodes.xml @@ -0,0 +1,39 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/pengine/utilization.c b/pengine/utilization.c index 982fcc96b4..db41b21133 100644 --- a/pengine/utilization.c +++ b/pengine/utilization.c @@ -1,472 +1,482 @@ /* * Copyright (C) 2014 Gao,Yan * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include static GListPtr find_colocated_rscs(GListPtr colocated_rscs, resource_t * rsc, resource_t * orig_rsc); static GListPtr group_find_colocated_rscs(GListPtr colocated_rscs, resource_t * rsc, resource_t * orig_rsc); static void group_add_unallocated_utilization(GHashTable * all_utilization, resource_t * rsc, GListPtr all_rscs); struct compare_data { const node_t *node1; const node_t *node2; int result; }; static void do_compare_capacity1(gpointer key, gpointer value, gpointer user_data) { int node1_capacity = 0; int node2_capacity = 0; struct compare_data *data = user_data; node1_capacity = crm_parse_int(value, "0"); node2_capacity = crm_parse_int(g_hash_table_lookup(data->node2->details->utilization, key), "0"); if (node1_capacity > node2_capacity) { data->result--; } else if (node1_capacity < node2_capacity) { data->result++; } } static void do_compare_capacity2(gpointer key, gpointer value, gpointer user_data) { int node1_capacity = 0; int node2_capacity = 0; struct compare_data *data = user_data; if (g_hash_table_lookup_extended(data->node1->details->utilization, key, NULL, NULL)) { return; } node1_capacity = 0; node2_capacity = crm_parse_int(value, "0"); if (node1_capacity > node2_capacity) { data->result--; } else if (node1_capacity < node2_capacity) { data->result++; } } /* rc < 0 if 'node1' has more capacity remaining * rc > 0 if 'node1' has less capacity remaining */ int compare_capacity(const node_t * node1, const node_t * node2) { struct compare_data data; data.node1 = node1; data.node2 = node2; data.result = 0; g_hash_table_foreach(node1->details->utilization, do_compare_capacity1, &data); g_hash_table_foreach(node2->details->utilization, do_compare_capacity2, &data); return data.result; } struct calculate_data { GHashTable *current_utilization; gboolean plus; }; static void do_calculate_utilization(gpointer key, gpointer value, gpointer user_data) { const char *current = NULL; char *result = NULL; struct calculate_data *data = user_data; current = g_hash_table_lookup(data->current_utilization, key); if (data->plus) { result = crm_itoa(crm_parse_int(current, "0") + crm_parse_int(value, "0")); g_hash_table_replace(data->current_utilization, strdup(key), result); } else if (current) { result = crm_itoa(crm_parse_int(current, "0") - crm_parse_int(value, "0")); g_hash_table_replace(data->current_utilization, strdup(key), result); } } /* Specify 'plus' to FALSE when allocating * Otherwise to TRUE when deallocating */ void calculate_utilization(GHashTable * current_utilization, GHashTable * utilization, gboolean plus) { struct calculate_data data; data.current_utilization = current_utilization; data.plus = plus; g_hash_table_foreach(utilization, do_calculate_utilization, &data); } struct capacity_data { node_t *node; const char *rsc_id; gboolean is_enough; }; static void check_capacity(gpointer key, gpointer value, gpointer user_data) { int required = 0; int remaining = 0; struct capacity_data *data = user_data; required = crm_parse_int(value, "0"); remaining = crm_parse_int(g_hash_table_lookup(data->node->details->utilization, key), "0"); if (required > remaining) { CRM_ASSERT(data->rsc_id); CRM_ASSERT(data->node); crm_debug("Node %s has no enough %s for %s: required=%d remaining=%d", data->node->details->uname, (char *)key, data->rsc_id, required, remaining); data->is_enough = FALSE; } } static gboolean have_enough_capacity(node_t * node, const char * rsc_id, GHashTable * utilization) { struct capacity_data data; data.node = node; data.rsc_id = rsc_id; data.is_enough = TRUE; g_hash_table_foreach(utilization, check_capacity, &data); return data.is_enough; } static void native_add_unallocated_utilization(GHashTable * all_utilization, resource_t * rsc) { if(is_set(rsc->flags, pe_rsc_provisional) == FALSE) { return; } calculate_utilization(all_utilization, rsc->utilization, TRUE); } static void add_unallocated_utilization(GHashTable * all_utilization, resource_t * rsc, GListPtr all_rscs, resource_t * orig_rsc) { if(is_set(rsc->flags, pe_rsc_provisional) == FALSE) { return; } if (rsc->variant == pe_native) { pe_rsc_trace(orig_rsc, "%s: Adding %s as colocated utilization", orig_rsc->id, rsc->id); native_add_unallocated_utilization(all_utilization, rsc); } else if (rsc->variant == pe_group) { pe_rsc_trace(orig_rsc, "%s: Adding %s as colocated utilization", orig_rsc->id, rsc->id); group_add_unallocated_utilization(all_utilization, rsc, all_rscs); } else if (rsc->variant == pe_clone || rsc->variant == pe_master) { GListPtr gIter1 = NULL; gboolean existing = FALSE; /* Check if there's any child already existing in the list */ gIter1 = rsc->children; for (; gIter1 != NULL; gIter1 = gIter1->next) { resource_t *child = (resource_t *) gIter1->data; GListPtr gIter2 = NULL; if (g_list_find(all_rscs, child)) { existing = TRUE; } else { /* Check if there's any child of another cloned group already existing in the list */ gIter2 = child->children; for (; gIter2 != NULL; gIter2 = gIter2->next) { resource_t *grandchild = (resource_t *) gIter2->data; if (g_list_find(all_rscs, grandchild)) { pe_rsc_trace(orig_rsc, "%s: Adding %s as colocated utilization", orig_rsc->id, child->id); add_unallocated_utilization(all_utilization, child, all_rscs, orig_rsc); existing = TRUE; break; } } } } if (existing == FALSE) { resource_t *first_child = (resource_t *) rsc->children->data; pe_rsc_trace(orig_rsc, "%s: Adding %s as colocated utilization", orig_rsc->id, ID(first_child->xml)); add_unallocated_utilization(all_utilization, first_child, all_rscs, orig_rsc); } } } static GHashTable * sum_unallocated_utilization(resource_t * rsc, GListPtr colocated_rscs) { GListPtr gIter = NULL; GListPtr all_rscs = NULL; GHashTable *all_utilization = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); all_rscs = g_list_copy(colocated_rscs); if (g_list_find(all_rscs, rsc) == FALSE) { all_rscs = g_list_append(all_rscs, rsc); } for (gIter = all_rscs; gIter != NULL; gIter = gIter->next) { resource_t *listed_rsc = (resource_t *) gIter->data; if(is_set(listed_rsc->flags, pe_rsc_provisional) == FALSE) { continue; } pe_rsc_trace(rsc, "%s: Processing unallocated colocated %s", rsc->id, listed_rsc->id); add_unallocated_utilization(all_utilization, listed_rsc, all_rscs, rsc); } g_list_free(all_rscs); return all_utilization; } static GListPtr find_colocated_rscs(GListPtr colocated_rscs, resource_t * rsc, resource_t * orig_rsc) { GListPtr gIter = NULL; if (rsc == NULL) { return colocated_rscs; } else if (g_list_find(colocated_rscs, rsc)) { return colocated_rscs; } crm_trace("%s: %s is supposed to be colocated with %s", orig_rsc->id, rsc->id, orig_rsc->id); colocated_rscs = g_list_append(colocated_rscs, rsc); for (gIter = rsc->rsc_cons; gIter != NULL; gIter = gIter->next) { rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; resource_t *rsc_rh = constraint->rsc_rh; /* Break colocation loop */ if (rsc_rh == orig_rsc) { continue; } if (constraint->score == INFINITY && filter_colocation_constraint(rsc, rsc_rh, constraint, TRUE) == influence_rsc_location) { if (rsc_rh->variant == pe_group) { /* Need to use group_variant_data */ colocated_rscs = group_find_colocated_rscs(colocated_rscs, rsc_rh, orig_rsc); } else { colocated_rscs = find_colocated_rscs(colocated_rscs, rsc_rh, orig_rsc); } } } for (gIter = rsc->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) { rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; resource_t *rsc_lh = constraint->rsc_lh; /* Break colocation loop */ if (rsc_lh == orig_rsc) { continue; } if (rsc_lh->variant <= pe_group && rsc->variant > pe_group) { /* We do not know if rsc_lh will be colocated with orig_rsc in this case */ continue; } if (constraint->score == INFINITY && filter_colocation_constraint(rsc_lh, rsc, constraint, TRUE) == influence_rsc_location) { if (rsc_lh->variant == pe_group) { /* Need to use group_variant_data */ colocated_rscs = group_find_colocated_rscs(colocated_rscs, rsc_lh, orig_rsc); } else { colocated_rscs = find_colocated_rscs(colocated_rscs, rsc_lh, orig_rsc); } } } return colocated_rscs; } void process_utilization(resource_t * rsc, node_t ** prefer, pe_working_set_t * data_set) { int alloc_details = scores_log_level + 1; if (safe_str_neq(data_set->placement_strategy, "default")) { - GListPtr gIter = NULL; + GHashTableIter iter; GListPtr colocated_rscs = NULL; gboolean any_capable = FALSE; + node_t *node = NULL; colocated_rscs = find_colocated_rscs(colocated_rscs, rsc, rsc); if (colocated_rscs) { GHashTable *unallocated_utilization = NULL; char *rscs_id = crm_concat(rsc->id, "and its colocated resources", ' '); node_t *most_capable_node = NULL; unallocated_utilization = sum_unallocated_utilization(rsc, colocated_rscs); - for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { - node_t *node = (node_t *) gIter->data; + g_hash_table_iter_init(&iter, rsc->allowed_nodes); + while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { + if (can_run_resources(node) == FALSE || node->weight < 0) { + continue; + } if (have_enough_capacity(node, rscs_id, unallocated_utilization)) { any_capable = TRUE; } if (most_capable_node == NULL || compare_capacity(node, most_capable_node) < 0) { /* < 0 means 'node' is more capable */ most_capable_node = node; } } if (any_capable) { - for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { - node_t *node = (node_t *) gIter->data; + g_hash_table_iter_init(&iter, rsc->allowed_nodes); + while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { + if (can_run_resources(node) == FALSE || node->weight < 0) { + continue; + } if (have_enough_capacity(node, rscs_id, unallocated_utilization) == FALSE) { pe_rsc_debug(rsc, "Resource %s and its colocated resources cannot be allocated to node %s: no enough capacity", rsc->id, node->details->uname); resource_location(rsc, node, -INFINITY, "__limit_utilization__", data_set); } } } else if (*prefer == NULL) { *prefer = most_capable_node; } if (unallocated_utilization) { g_hash_table_destroy(unallocated_utilization); } g_list_free(colocated_rscs); free(rscs_id); } if (any_capable == FALSE) { - for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { - node_t *node = (node_t *) gIter->data; + g_hash_table_iter_init(&iter, rsc->allowed_nodes); + while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { + if (can_run_resources(node) == FALSE || node->weight < 0) { + continue; + } if (have_enough_capacity(node, rsc->id, rsc->utilization) == FALSE) { pe_rsc_debug(rsc, "Resource %s cannot be allocated to node %s: no enough capacity", rsc->id, node->details->uname); resource_location(rsc, node, -INFINITY, "__limit_utilization__", data_set); } } } dump_node_scores(alloc_details, rsc, "Post-utilization", rsc->allowed_nodes); } } #define VARIANT_GROUP 1 #include GListPtr group_find_colocated_rscs(GListPtr colocated_rscs, resource_t * rsc, resource_t * orig_rsc) { group_variant_data_t *group_data = NULL; get_group_variant_data(group_data, rsc); if (group_data->colocated || (rsc->parent && (rsc->parent->variant == pe_clone || rsc->parent->variant == pe_master))) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; colocated_rscs = find_colocated_rscs(colocated_rscs, child_rsc, orig_rsc); } } else { if (group_data->first_child) { colocated_rscs = find_colocated_rscs(colocated_rscs, group_data->first_child, orig_rsc); } } colocated_rscs = find_colocated_rscs(colocated_rscs, rsc, orig_rsc); return colocated_rscs; } static void group_add_unallocated_utilization(GHashTable * all_utilization, resource_t * rsc, GListPtr all_rscs) { group_variant_data_t *group_data = NULL; get_group_variant_data(group_data, rsc); if (group_data->colocated || (rsc->parent && (rsc->parent->variant == pe_clone || rsc->parent->variant == pe_master))) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; if (is_set(child_rsc->flags, pe_rsc_provisional) && g_list_find(all_rscs, child_rsc) == FALSE) { native_add_unallocated_utilization(all_utilization, child_rsc); } } } else { if (group_data->first_child && is_set(group_data->first_child->flags, pe_rsc_provisional) && g_list_find(all_rscs, group_data->first_child) == FALSE) { native_add_unallocated_utilization(all_utilization, group_data->first_child); } } }