diff --git a/.gitignore b/.gitignore index adc41d6735..6cb9c51a41 100644 --- a/.gitignore +++ b/.gitignore @@ -1,205 +1,204 @@ # Common \#* .\#* GPATH GRTAGS GTAGS TAGS Makefile Makefile.in .deps .libs *.pc *.pyc *.bz2 *.tar.gz *.rpm *.la *.lo *.o *~ *.gcda *.gcno # Autobuild aclocal.m4 autoconf autoheader autom4te.cache/ automake build.counter compile config.guess config.log config.status config.sub configure depcomp install-sh include/stamp-* libtool libtool.m4 ltdl.m4 libltdl ltmain.sh missing py-compile /m4/argz.m4 /m4/ltargz.m4 /m4/ltoptions.m4 /m4/ltsugar.m4 /m4/ltversion.m4 /m4/lt~obsolete.m4 test-driver ylwrap # Configure targets Doxyfile /cts/CTS.py /cts/CTSlab.py /cts/CTSvars.py /cts/LSBDummy /cts/OCFIPraTest.py /cts/benchmark/clubench /cts/cluster_test /cts/cts /cts/cts-cli /cts/cts-coverage /cts/cts-exec /cts/cts-regression /cts/cts-scheduler /cts/cts-stonithd /cts/fence_dummy /cts/lxc_autogen.sh /cts/pacemaker-cts-dummyd /cts/pacemaker-cts-dummyd@.service /daemons/execd/pacemaker_remote /daemons/execd/pacemaker_remote.service /daemons/fenced/fence_legacy /daemons/pacemakerd/pacemaker /daemons/pacemakerd/pacemaker.combined.upstart /daemons/pacemakerd/pacemaker.service /daemons/pacemakerd/pacemaker.upstart extra/logrotate/pacemaker include/config.h include/config.h.in include/crm_config.h publican.cfg /tools/cibsecret /tools/crm_error /tools/crm_failcount /tools/crm_master /tools/crm_mon.service /tools/crm_mon.upstart /tools/crm_report /tools/crm_standby /tools/report.collector /tools/report.common # Build targets *.7 *.7.xml *.7.html *.8 *.8.xml *.8.html doc/*/en-US/images/*.png doc/*/tmp/** doc/*/publish cib/cib cib/cibmon cib/cibpipe /daemons/attrd/pacemaker-attrd /daemons/controld/pacemaker-controld /daemons/execd/cts-exec-helper /daemons/execd/pacemaker-execd /daemons/execd/pacemaker-remoted /daemons/fenced/stonith-test -/daemons/fenced/stonith_admin /daemons/fenced/pacemaker-fenced /daemons/fenced/pacemaker-fenced.xml /daemons/pacemakerd/pacemakerd /daemons/schedulerd/pacemaker-schedulerd /daemons/schedulerd/pacemaker-schedulerd.xml /daemons/schedulerd/ptest doc/api/* doc/Clusters_from_Scratch.txt doc/Pacemaker_Explained.txt doc/acls.html doc/crm_fencing.html doc/publican-catalog* scratch -tools/attrd_updater -tools/cibadmin -tools/crm_attribute -tools/crm_diff -tools/crm_mon -tools/crm_node -tools/crm_resource -tools/crm_shadow -tools/crm_simulate -tools/crm_verify -tools/crmadmin -tools/iso8601 -tools/crm_ticket -tools/report.collector.1 +/tools/attrd_updater +/tools/cibadmin +/tools/crmadmin +/tools/crm_attribute +/tools/crm_diff +/tools/crm_mon +/tools/crm_node +/tools/crm_resource +/tools/crm_shadow +/tools/crm_simulate +/tools/crm_ticket +/tools/crm_verify +/tools/iso8601 +/tools/stonith_admin xml/crm.dtd xml/pacemaker*.rng xml/versions.rng doc/shared/en-US/*.xml doc/Clusters_from_Scratch.build doc/Clusters_from_Scratch/en-US/Ap-*.xml doc/Clusters_from_Scratch/en-US/Ch-*.xml doc/Pacemaker_Administration.build doc/Pacemaker_Administration/en-US/Ch-*.xml doc/Pacemaker_Development.build doc/Pacemaker_Development/en-US/Ch-*.xml doc/Pacemaker_Explained.build doc/Pacemaker_Explained/en-US/Ch-*.xml doc/Pacemaker_Explained/en-US/Ap-*.xml doc/Pacemaker_Remote.build doc/Pacemaker_Remote/en-US/Ch-*.xml lib/gnu/libgnu.a lib/gnu/stdalign.h *.coverity # Test detritus /cts/.regression.failed.diff /cts/scheduler/*.ref /cts/scheduler/*.up /cts/scheduler/*.up.err /cts/scheduler/bug-rh-1097457.log /cts/scheduler/bug-rh-1097457.trs /cts/scheduler/shadow.* /cts/test-suite.log /xml/test-2/*.up /xml/test-2/*.up.err # Formerly built files (helps when jumping back and forth in checkout) /attrd /coverage.sh /crmd /cts/HBDummy /fencing /lrmd /mcp /pengine #Other mock HTML pacemaker*.spec coverity-* compat_reports .ABI-build abi_dumps logs *.patch *.diff *.sed *.orig *.rej *.swp diff --git a/daemons/fenced/Makefile.am b/daemons/fenced/Makefile.am index 4bd87e1663..2a5755a9ed 100644 --- a/daemons/fenced/Makefile.am +++ b/daemons/fenced/Makefile.am @@ -1,52 +1,42 @@ # # Copyright 2004-2018 International Business Machines # Author: Sun Jiang Dong # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # include $(top_srcdir)/Makefile.common ## binary progs halibdir = $(CRM_DAEMON_DIR) halib_PROGRAMS = pacemaker-fenced stonith-test -sbin_PROGRAMS = stonith_admin sbin_SCRIPTS = fence_legacy noinst_HEADERS = pacemaker-fenced.h if BUILD_XML_HELP man7_MANS = pacemaker-fenced.7 endif stonith_test_SOURCES = test.c stonith_test_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ $(top_builddir)/lib/fencing/libstonithd.la -stonith_admin_SOURCES = admin.c - -stonith_admin_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ - $(top_builddir)/lib/cib/libcib.la \ - $(top_builddir)/lib/pengine/libpe_status.la \ - $(top_builddir)/lib/cluster/libcrmcluster.la \ - $(top_builddir)/lib/fencing/libstonithd.la \ - $(CLUSTERLIBS) - pacemaker_fenced_CPPFLAGS = -I$(top_srcdir)/daemons/schedulerd $(AM_CPPFLAGS) pacemaker_fenced_YFLAGS = -d pacemaker_fenced_CFLAGS = $(CFLAGS_HARDENED_EXE) pacemaker_fenced_LDFLAGS = $(LDFLAGS_HARDENED_EXE) pacemaker_fenced_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ $(top_builddir)/lib/cluster/libcrmcluster.la \ $(top_builddir)/lib/fencing/libstonithd.la \ $(top_builddir)/lib/pengine/libpe_status.la \ $(top_builddir)/daemons/schedulerd/libpengine.la \ $(CLUSTERLIBS) pacemaker_fenced_SOURCES = pacemaker-fenced.c \ fenced_commands.c \ fenced_remote.c CLEANFILES = $(man7_MANS) $(man8_MANS) diff --git a/daemons/fenced/README.md b/daemons/fenced/README.md index 0a3a916aeb..854e6fa5ca 100644 --- a/daemons/fenced/README.md +++ b/daemons/fenced/README.md @@ -1,145 +1,143 @@ # Directory contents -* `admin.c`, `stonith_admin.8`: `stonith_admin` command-line tool and its man - page * `pacemaker-fenced.c`, `pacemaker-fenced.h`, `fenced_commands.c`, `fenced_remote.c`, `pacemaker-fenced.7`: pacemaker-fenced (the fencer) and its man page * `fence_dummy`, `fence_legacy`, `fence_legacy.8`: Pacemaker-supplied fence agents and their man pages * `test.c`: `stonith-test` command-line tool # How fencing requests are handled ## Bird's eye view In the broadest terms, stonith works like this: 1. The initiator (an external program such as `stonith_admin`, or the cluster itself via the controller) asks the local fencer, "Hey, can you fence this node?" 1. The local fencer asks all the fencers in the cluster (including itself), "Hey, what fencing devices do you have access to that can fence this node?" 1. Each fencer in the cluster replies with a list of available devices that it knows about. 1. Once the original fencer gets all the replies, it asks the most appropriate fencer peer to actually carry out the fencing. It may send out more than one such request if the target node must be fenced with multiple devices. 1. The chosen fencer(s) call the appropriate fencing resource agent(s) to do the fencing, then replies to the original fencer with the result. 1. The original fencer broadcasts the result to all fencers. 1. Each fencer sends the result to each of its local clients (including, at some point, the initiator). ## Detailed view ### Initiating a fencing request A fencing request can be initiated by the cluster or externally, using the libfencing API. * The cluster always initiates fencing via `daemons/controld/controld_te_actions.c:te_fence_node()` (which calls the `fence()` API). This occurs when a graph synapse contains a `CRM_OP_FENCE` XML operation. * The main external clients are `stonith_admin` and `stonith-test`. Highlights of the fencing API: * `stonith_api_new()` creates and returns a new `stonith_t` object, whose `cmds` member has methods for connect, disconnect, fence, etc. * the `fence()` method creates and sends a `STONITH_OP_FENCE XML` request with the desired action and target node. Callers do not have to choose or even have any knowledge about particular fencing devices. ### Fencing queries The function calls for a stonith request go something like this as of this writing: The local fencer receives the client's request via an IPC or messaging layer callback, which calls * `stonith_command()`, which (for requests) calls * `handle_request()`, which (for `STONITH_OP_FENCE` from a client) calls * `initiate_remote_stonith_op()`, which creates a `STONITH_OP_QUERY` XML request with the target, desired action, timeout, etc.. then broadcasts the operation to the cluster group (i.e. all fencer instances) and starts a timer. The query is broadcast because (1) location constraints might prevent the local node from accessing the stonith device directly, and (2) even if the local node does have direct access, another node might be preferred to carry out the fencing. Each fencer receives the original fencer's STONITH_OP_QUERY` broadcast request via IPC or messaging layer callback, which calls: * `stonith_command()`, which (for requests) calls * `handle_request()`, which (for `STONITH_OP_QUERY` from a peer) calls * `stonith_query()`, which calls * `get_capable_devices()` with `stonith_query_capable_device_db()` to add device information to an XML reply and send it. (A message is considered a reply if it contains `T_STONITH_REPLY`, which is only set by fencer peers, not clients.) The original fencer receives all peers' `STONITH_OP_QUERY` replies via IPC or messaging layer callback, which calls: * `stonith_command()`, which (for replies) calls * `handle_reply()` which (for `STONITH_OP_QUERY`) calls * `process_remote_stonith_query()`, which allocates a new query result structure, parses device information into it, and adds it to operation object. It increments the number of replies received for this operation, and compares it against the expected number of replies (i.e. the number of active peers), and if this is the last expected reply, calls * `call_remote_stonith()`, which calculates the timeout and sends `STONITH_OP_FENCE` request(s) to carry out the fencing. If the target node has a fencing "topology" (which allows specifications such as "this node can be fenced either with device A, or devices B and C in combination"), it will choose the device(s), and send out as many requests as needed. If it chooses a device, it will choose the peer; a peer is preferred if it has "verified" access to the desired device, meaning that it has the device "running" on it and thus has a monitor operation ensuring reachability. ### Fencing operations Each `STONITH_OP_FENCE` request goes something like this as of this writing: The chosen peer fencer receives the `STONITH_OP_FENCE` request via IPC or messaging layer callback, which calls: * `stonith_command()`, which (for requests) calls * `handle_request()`, which (for `STONITH_OP_FENCE` from a peer) calls * `stonith_fence()`, which calls * `schedule_stonith_command()` (using supplied device if `F_STONITH_DEVICE` was set, otherwise the highest-priority capable device obtained via `get_capable_devices()` with `stonith_fence_get_devices_cb()`), which adds the operation to the device's pending operations list and triggers processing. The chosen peer fencer's mainloop is triggered and calls * `stonith_device_dispatch()`, which calls * `stonith_device_execute()`, which pops off the next item from the device's pending operations list. If acting as the (internally implemented) watchdog agent, it panics the node, otherwise it calls * `stonith_action_create()` and `stonith_action_execute_async()` to call the fencing agent. The chosen peer fencer's mainloop is triggered again once the fencing agent returns, and calls * `stonith_action_async_done()` which adds the results to an action object then calls its * done callback (`st_child_done()`), which calls `schedule_stonith_command()` for a new device if there are further required actions to execute or if the original action failed, then builds and sends an XML reply to the original fencer (via `stonith_send_async_reply()`), then checks whether any pending actions are the same as the one just executed and merges them if so. ### Fencing replies The original fencer receives the `STONITH_OP_FENCE` reply via IPC or messaging layer callback, which calls: * `stonith_command()`, which (for replies) calls * `handle_reply()`, which calls * `process_remote_stonith_exec()`, which calls either `call_remote_stonith()` (to retry a failed operation, or try the next device in a topology is appropriate, which issues a new `STONITH_OP_FENCE` request, proceeding as before) or `remote_op_done()` (if the operation is definitively failed or successful). * remote_op_done() broadcasts the result to all peers. Finally, all peers receive the broadcast result and call * `remote_op_done()`, which sends the result to all local clients. diff --git a/tools/Makefile.am b/tools/Makefile.am index 643b787408..ed40f14abc 100644 --- a/tools/Makefile.am +++ b/tools/Makefile.am @@ -1,132 +1,153 @@ # # Copyright 2004-2018 Andrew Beekhof # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # include $(top_srcdir)/Makefile.common if BUILD_SYSTEMD systemdunit_DATA = crm_mon.service endif noinst_HEADERS = crm_resource.h fake_transition.h pcmkdir = $(datadir)/$(PACKAGE) pcmk_DATA = report.common report.collector sbin_SCRIPTS = crm_report crm_standby crm_master crm_failcount if BUILD_CIBSECRETS sbin_SCRIPTS += cibsecret endif EXTRA_DIST = $(sbin_SCRIPTS) -sbin_PROGRAMS = crm_simulate crmadmin cibadmin crm_node crm_attribute crm_resource crm_verify \ - crm_shadow attrd_updater crm_diff crm_mon iso8601 crm_ticket crm_error +sbin_PROGRAMS = attrd_updater \ + cibadmin \ + crmadmin \ + crm_simulate \ + crm_attribute \ + crm_diff \ + crm_error \ + crm_mon \ + crm_node \ + crm_resource \ + crm_shadow \ + crm_verify \ + crm_ticket \ + iso8601 \ + stonith_admin if BUILD_SERVICELOG sbin_PROGRAMS += notifyServicelogEvent endif if BUILD_OPENIPMI_SERVICELOG sbin_PROGRAMS += ipmiservicelogd endif ## SOURCES MAN8DEPS = crm_attribute crm_node crmadmin_SOURCES = crmadmin.c crmadmin_LDADD = $(top_builddir)/lib/pengine/libpe_status.la \ $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/common/libcrmcommon.la \ $(CLUSTERLIBS) crm_error_SOURCES = crm_error.c crm_error_LDADD = $(top_builddir)/lib/common/libcrmcommon.la cibadmin_SOURCES = cibadmin.c cibadmin_LDADD = $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/common/libcrmcommon.la crm_shadow_SOURCES = cib_shadow.c crm_shadow_LDADD = $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/common/libcrmcommon.la crm_node_SOURCES = crm_node.c crm_node_LDADD = $(top_builddir)/lib/cluster/libcrmcluster.la \ $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/common/libcrmcommon.la \ $(CLUSTERLIBS) crm_simulate_SOURCES = crm_simulate.c fake_transition.c crm_simulate_CFLAGS = -I$(top_srcdir)/daemons/schedulerd crm_simulate_LDADD = $(top_builddir)/lib/pengine/libpe_status.la \ $(top_builddir)/daemons/schedulerd/libpengine.la \ $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/lrmd/liblrmd.la \ $(top_builddir)/lib/transition/libtransitioner.la \ $(top_builddir)/lib/common/libcrmcommon.la crm_diff_SOURCES = crm_diff.c crm_diff_LDADD = $(top_builddir)/lib/common/libcrmcommon.la crm_mon_SOURCES = crm_mon.c crm_mon_CFLAGS = -I$(top_srcdir)/daemons/schedulerd crm_mon_LDADD = $(top_builddir)/lib/pengine/libpe_status.la \ $(top_builddir)/lib/fencing/libstonithd.la \ $(top_builddir)/daemons/schedulerd/libpengine.la \ $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/common/libcrmcommon.la \ $(CURSESLIBS) # Arguments could be made that this should live in crm/pengine crm_verify_SOURCES = crm_verify.c crm_verify_LDADD = $(top_builddir)/lib/pengine/libpe_status.la \ $(top_builddir)/daemons/schedulerd/libpengine.la \ $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/common/libcrmcommon.la crm_attribute_SOURCES = crm_attribute.c crm_attribute_LDADD = $(top_builddir)/lib/cluster/libcrmcluster.la \ $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/common/libcrmcommon.la crm_resource_SOURCES = crm_resource.c crm_resource_ban.c crm_resource_runtime.c crm_resource_print.c fake_transition.c crm_resource_CFLAGS = -I$(top_srcdir)/daemons/schedulerd crm_resource_LDADD = $(top_builddir)/lib/pengine/libpe_rules.la \ $(top_builddir)/lib/lrmd/liblrmd.la \ $(top_builddir)/lib/services/libcrmservice.la \ $(top_builddir)/lib/pengine/libpe_status.la \ $(top_builddir)/daemons/schedulerd/libpengine.la \ $(top_builddir)/lib/transition/libtransitioner.la \ $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/common/libcrmcommon.la iso8601_SOURCES = test.iso8601.c iso8601_LDADD = $(top_builddir)/lib/common/libcrmcommon.la attrd_updater_SOURCES = attrd_updater.c attrd_updater_LDADD = $(top_builddir)/lib/common/libcrmcommon.la crm_ticket_SOURCES = crm_ticket.c crm_ticket_CFLAGS = -I$(top_srcdir)/daemons/schedulerd crm_ticket_LDADD = $(top_builddir)/lib/pengine/libpe_rules.la \ $(top_builddir)/lib/pengine/libpe_status.la \ $(top_builddir)/daemons/schedulerd/libpengine.la \ $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/common/libcrmcommon.la +stonith_admin_SOURCES = stonith_admin.c +stonith_admin_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ + $(top_builddir)/lib/cib/libcib.la \ + $(top_builddir)/lib/pengine/libpe_status.la \ + $(top_builddir)/lib/cluster/libcrmcluster.la \ + $(top_builddir)/lib/fencing/libstonithd.la \ + $(CLUSTERLIBS) + if BUILD_SERVICELOG notifyServicelogEvent_SOURCES = notifyServicelogEvent.c notifyServicelogEvent_CFLAGS = $(SERVICELOG_CFLAGS) notifyServicelogEvent_LDADD = $(top_builddir)/lib/common/libcrmcommon.la $(SERVICELOG_LIBS) endif if BUILD_OPENIPMI_SERVICELOG ipmiservicelogd_SOURCES = ipmiservicelogd.c ipmiservicelogd_CFLAGS = $(OPENIPMI_SERVICELOG_CFLAGS) $(SERVICELOG_CFLAGS) ipmiservicelogd_LDFLAGS = $(top_builddir)/lib/common/libcrmcommon.la $(OPENIPMI_SERVICELOG_LIBS) $(SERVICELOG_LIBS) endif CLEANFILES = $(man8_MANS) diff --git a/daemons/fenced/admin.c b/tools/stonith_admin.c similarity index 100% rename from daemons/fenced/admin.c rename to tools/stonith_admin.c