diff --git a/doc/sphinx/Makefile.am b/doc/sphinx/Makefile.am index 25fd411f05..e6435a80a9 100644 --- a/doc/sphinx/Makefile.am +++ b/doc/sphinx/Makefile.am @@ -1,112 +1,113 @@ # # Copyright 2003-2020 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # include $(top_srcdir)/mk/common.mk # Things you might want to override on the command line # Books to generate -BOOKS ?= Pacemaker_Development \ - Pacemaker_Remote +BOOKS ?= Pacemaker_Administration \ + Pacemaker_Development \ + Pacemaker_Remote # Output formats to generate. Possible values: # html (multiple HTML files) # dirhtml (HTML files named index.html in multiple directories) # singlehtml (a single large HTML file) # text # pdf # epub # latex # linkcheck (not actually a format; check validity of external links) # # The results will end up in /_build/ BOOK_FORMATS ?= html # Set to "a4" or "letter" if building latex format PAPER ?= letter # Additional options for sphinx-build SPHINXFLAGS ?= # toplevel rsync destination for www targets (without trailing slash) RSYNC_DEST ?= root@www.clusterlabs.org:/var/www/html # End of useful overrides EXTRA_DIST = $(wildcard */*.rst) # recursive, preserve symlinks/permissions/times, verbose, compress, # don't cross filesystems, sparse, show progress RSYNC_OPTS = -rlptvzxS --progress BOOK_RSYNC_DEST = $(RSYNC_DEST)/$(PACKAGE)/doc/$(PACKAGE_SERIES) TAG ?= $(shell [ -n "`git tag --points-at HEAD | head -1`" ] \ && ( git tag --points-at HEAD | head -1 ) \ || git log --pretty=format:Pacemaker-2.0.3-%h -n 1 HEAD) BOOK = none if BUILD_SPHINX_DOCS $(BOOKS:%=%/conf.py): conf.py.in $(AM_V_GEN)sed \ -e 's/%VERSION%/$(VERSION)/g' \ -e 's/%BOOK_ID%/$(@:%/conf.py=%)/g' \ -e 's/%BOOK_TITLE%/$(subst _, ,$(@:%/conf.py=%))/g' \ $(<) > "$@" $(BOOK)/_build: _static/pacemaker.css $(BOOK)/conf.py $(wildcard $(srcdir)/$(BOOK)/*.rst) @echo 'Building "$(subst _, ,$(BOOK))" because of $?' $(PCMK_quiet) $(AM_V_at)rm -rf "$@" $(AM_V_BOOK)for format in $(BOOK_FORMATS); do \ echo -e "\n * Building $$format" $(PCMK_quiet); \ doctrees="doctrees"; \ real_format="$$format"; \ case "$$format" in \ pdf) real_format="latex" ;; \ gettext) doctrees="gettext-doctrees" ;; \ esac; \ $(SPHINX) -b "$$real_format" -d "$@/$$doctrees" \ -c "$(builddir)/$(BOOK)" \ -D latex_paper_size=$(PAPER) $(SPHINXFLAGS) \ "$(srcdir)/$(BOOK)" "$@/$$format" \ $(PCMK_quiet); \ if [ "$$format" = "pdf" ]; then \ $(MAKE) $(AM_MAKEFLAGS) -C "$@/$$format" \ all-pdf; \ fi; \ done endif .PHONY: books-upload books-upload: all if BUILD_SPHINX_DOCS @echo "Uploading $(PACKAGE_SERIES) documentation set" @for book in $(BOOKS); do \ echo " * $$book"; \ buildfile="$$book/_build/build-$(PACKAGE_SERIES).txt"; \ echo "Generated on `date --utc` from version $(TAG)" \ > "$$buildfile"; \ rsync $(RSYNC_OPTS) "$$buildfile" \ $(BOOK_FORMATS:%=$$book/_build/%) \ "$(BOOK_RSYNC_DEST)/$$book/"; \ done endif all-local: if BUILD_SPHINX_DOCS @for book in $(BOOKS); do \ $(MAKE) $(AM_MAKEFLAGS) BOOK=$$book \ PAPER="$(PAPER)" SPHINXFLAGS="$(SPHINXFLAGS)" \ BOOK_FORMATS="$(BOOK_FORMATS)" $$book/_build; \ done endif clean-local: $(AM_V_at)-rm -rf $(BOOKS:%="$(builddir)/%/_build") $(BOOKS:%="$(builddir)/%/conf.py") diff --git a/doc/sphinx/Pacemaker_Administration/agents.rst b/doc/sphinx/Pacemaker_Administration/agents.rst new file mode 100644 index 0000000000..ba3645e0b2 --- /dev/null +++ b/doc/sphinx/Pacemaker_Administration/agents.rst @@ -0,0 +1,380 @@ +Resource Agents +--------------- + +Resource Agent Actions +###################### + +If one resource depends on another resource via constraints, the cluster will +interpret an expected result as sufficient to continue with dependent actions. +This may cause timing issues if the resource agent start returns before the +service is not only launched but fully ready to perform its function, or if the +resource agent stop returns before the service has fully released all its +claims on system resources. At a minimum, the start or stop should not return +before a status command would return the expected (started or stopped) result. + +OCF Resource Agents +################### + +Location of Custom Scripts +__________________________ + +.. index:: OCF resource agents + +OCF Resource Agents are found in ``/usr/lib/ocf/resource.d/$PROVIDER`` + +When creating your own agents, you are encouraged to create a new directory +under ``/usr/lib/ocf/resource.d/`` so that they are not confused with (or +overwritten by) the agents shipped by existing providers. + +So, for example, if you choose the provider name of big-corp and want a new +resource named big-app, you would create a resource agent called +``/usr/lib/ocf/resource.d/big-corp/big-app`` and define a resource: + +.. code-block: xml + + + +Actions +_______ + +All OCF resource agents are required to implement the following actions. + +.. table:: Required Actions for OCF Agents + ++--------------+-------------+------------------------------------------------+ +| Action | Description | Instructions | ++==============+=============+================================================+ +| start | Start the | Return 0 on success and an appropriate | +| | resource | error code otherwise. Must not report | +| | | success until the resource is fully | +| | | active. | +| | | | +| | | .. index:: | +| | | pair: start; OCF action | +| | | pair: start; action | ++--------------+-------------+------------------------------------------------+ +| stop | Stop the | Return 0 on success and an appropriate | +| | resource | error code otherwise. Must not report | +| | | success until the resource is fully | +| | | stopped. | +| | | | +| | | .. index:: | +| | | pair: stop; OCF action | +| | | pair: stop; action | ++--------------+-------------+------------------------------------------------+ +| monitor | Check the | Exit 0 if the resource is running, 7 | +| | resource's | if it is stopped, and any other OCF | +| | state | exit code if it is failed. NOTE: The | +| | | monitor script should test the state | +| | | of the resource on the local machine | +| | | only. | +| | | | +| | | .. index:: | +| | | pair: monitor; OCF action | +| | | pair: monitor; action | ++--------------+-------------+------------------------------------------------+ +| meta-data | Describe | Provide information about this | +| | the | resource in the XML format defined by | +| | resource | the OCF standard. Exit with 0. NOTE: | +| | | This is *not* required to be performed | +| | | as root. | +| | | | +| | | .. index:: | +| | | pair: meta-data; OCF action | +| | | pair: meta-data; action | ++--------------+-------------+------------------------------------------------+ +| validate-all | Verify the | Return 0 if parameters are valid, 2 if | +| | supplied | not valid, and 6 if resource is not | +| | parameters | configured. | +| | | | +| | | .. index:: | +| | | pair: validate-all; OCF action | +| | | pair: validate-all; action | ++--------------+-------------+------------------------------------------------+ + +Additional requirements (not part of the OCF specification) are placed on +agents that will be used for advanced concepts such as clone resources. + +.. table:: Optional Actions for OCF Resource Agents + ++--------------+-------------+------------------------------------------------+ +| Action | Description | Instructions | ++==============+=============+================================================+ +| promote | Promote the | Return 0 on success | +| | local | | +| | instance of | .. index:: | +| | a promotable| pair: promote; OCF action | +| | clone | pair: promote; action | +| | resource to | | +| | the master | | +| | (primary) | | +| | state. | | ++--------------+-------------+------------------------------------------------+ +| demote | Demote the | Return 0 on success | +| | local | | +| | instance of | .. index:: | +| | a promotable| pair: demote; OCF action | +| | clone | pair: demote; action | +| | resource to | | +| | the slave | | +| | (secondary) | | +| | state. | | ++--------------+-------------+------------------------------------------------+ +| notify | Used by the | Must not fail. Must exit with 0 | +| | cluster to | | +| | send | .. index:: | +| | the agent | pair: notify; OCF action | +| | pre- and | pair: notify; action | +| | post- | | +| | notification| | +| | events | | +| | telling the | | +| | resource | | +| | what has | | +| | happened and| | +| | will happen.| | ++--------------+-------------+------------------------------------------------+ + +One action specified in the OCF specs, ``recover``, is not currently used by +the cluster. It is intended to be a variant of the ``start`` action that tries +to recover a resource locally. + +.. important:: + + If you create a new OCF resource agent, use `ocf-tester` to verify that the + agent complies with the OCF standard properly. + +.. index:: ocf-tester + +How are OCF Return Codes Interpreted? +_____________________________________ + +The first thing the cluster does is to check the return code against +the expected result. If the result does not match the expected value, +then the operation is considered to have failed, and recovery action is +initiated. + +There are three types of failure recovery: + +.. table:: Types of recovery performed by the cluster + ++-------+------------------------------+--------------------------------------+ +| Type | Description | Action Taken by the Cluster | ++=======+==============================+======================================+ +| soft | A transient error occurred | Restart the resource or move it to a | +| | | new location | +| | .. index:: | | +| | pair: soft; OCF error | | ++-------+------------------------------+--------------------------------------+ +| hard | A non-transient error that | Move the resource elsewhere and | +| | may be specific to the | prevent it from being retried on the | +| | current node | current node | +| | | | +| | .. index:: | | +| | pair: hard; OCF error | | ++-------+------------------------------+--------------------------------------+ +| fatal | A non-transient error that | Stop the resource and prevent it | +| | will be common to all | from being started on any cluster | +| | cluster nodes (e.g. a bad | node | +| | configuration was specified) | | +| | | | +| | .. index:: | | +| | pair: fatal; OCF error | | ++-------+------------------------------+--------------------------------------+ + +.. _ocf_return_codes: + +OCF Return Codes +________________ + +The following table outlines the different OCF return codes and the type of +recovery the cluster will initiate when a failure code is received. Although +counterintuitive, even actions that return 0 (aka. ``OCF_SUCCESS``) can be +considered to have failed, if 0 was not the expected return value. + +.. table:: OCF Exit Codes and their Recovery Types + ++-------+-----------------------+---------------------------------------------+----------+ +| Exit | OCF Alias | Description | Recovery | +| Code | | | | ++=======+=======================+=============================================+==========+ +| 0 | OCF_SUCCESS | Success. The command completed successfully.| soft | +| | | This is the expected result for all start, | | +| | | stop, promote and demote commands. | | +| | | | | +| | | .. index:: | | +| | | pair: return code; OCF_SUCCESS | | +| | | pair: return code; 0 | | ++-------+-----------------------+---------------------------------------------+----------+ +| 1 | OCF_ERR_GENERIC | Generic "there was a problem" | soft | +| | | error code. | | +| | | | | +| | | .. index:: | | +| | | pair: return code; OCF_ERR_GENERIC | | +| | | pair: return code; 1 | | ++-------+-----------------------+---------------------------------------------+----------+ +| 2 | OCF_ERR_ARGS | The resource's configuration is not valid on| hard | +| | | this machine. E.g. it refers to a location | | +| | | not found on the node. | | +| | | | | +| | | .. index:: | | +| | | pair: return code; OCF_ERR_ARGS | | +| | | pair: return code; 2 | | ++-------+-----------------------+---------------------------------------------+----------+ +| 3 | OCF_ERR_UNIMPLEMENTED | The requested action is not | hard | +| | | implemented. | | +| | | | | +| | | .. index:: | | +| | | pair: return code; OCF_ERR_UNIMPLEMENTED | | +| | | pair: return code; 3 | | ++-------+-----------------------+---------------------------------------------+----------+ +| 4 | OCF_ERR_PERM | The resource agent does not have | hard | +| | | sufficient privileges to complete the task. | | +| | | | | +| | | .. index:: | | +| | | pair: return code; OCF_ERR_PERM | | +| | | pair: return code; 4 | | ++-------+-----------------------+---------------------------------------------+----------+ +| 5 | OCF_ERR_INSTALLED | The tools required by the resource are | hard | +| | | not installed on this machine. | | +| | | | | +| | | .. index:: | | +| | | pair: return code; OCF_ERR_INSTALLED | | +| | | pair: return code; 5 | | ++-------+-----------------------+---------------------------------------------+----------+ +| 6 | OCF_ERR_CONFIGURED | The resource's configuration is invalid. | fatal | +| | | E.g. required parameters are missing. | | +| | | | | +| | | .. index:: | | +| | | pair: return code; OCF_ERR_CONFIGURED | | +| | | pair: return code; 6 | | ++-------+-----------------------+---------------------------------------------+----------+ +| 7 | OCF_NOT_RUNNING | The resource is safely stopped. The cluster | N/A | +| | | will not attempt to stop a resource that | | +| | | returns this for any action. | | +| | | | | +| | | .. index:: | | +| | | pair: return code; OCF_NOT_RUNNING | | +| | | pair: return code; 7 | | ++-------+-----------------------+---------------------------------------------+----------+ +| 8 | OCF_RUNNING_MASTER | The resource is running in | soft | +| | | master mode. | | +| | | | | +| | | .. index:: | | +| | | pair: return code; OCF_RUNNING_MASTER | | +| | | pair: return code; 8 | | ++-------+-----------------------+---------------------------------------------+----------+ +| 9 | OCF_FAILED_MASTER | The resource is in master mode but has | soft | +| | | failed. The resource will be demoted, | | +| | | stopped and then started (and possibly | | +| | | promoted) again. | | +| | | | | +| | | .. index:: | | +| | | pair: return code; OCF_FAILED_MASTER | | +| | | pair: return code; 9 | | ++-------+-----------------------+---------------------------------------------+----------+ +| other | *none* | Custom error code. | soft | +| | | | | +| | | .. index:: | | +| | | pair: return code; other | | ++-------+-----------------------+---------------------------------------------+----------+ + +Exceptions to the recovery handling described above: + +* Probes (non-recurring monitor actions) that find a resource active + (or in master mode) will not result in recovery action unless it is + also found active elsewhere. +* The recovery action taken when a resource is found active more than + once is determined by the resource's ``multiple-active`` property. +* Recurring actions that return ``OCF_ERR_UNIMPLEMENTED`` + do not cause any type of recovery. + + +LSB Resource Agents (Init Scripts) +################################## + +LSB Compliance +______________ + +The relevant part of the +`LSB specifications `_ +includes a description of all the return codes listed here. + +Assuming `some_service` is configured correctly and currently +inactive, the following sequence will help you determine if it is +LSB-compatible: + +#. Start (stopped): + + .. code-block:: none + + # /etc/init.d/some_service start ; echo "result: $?" + + * Did the service start? + * Did the echo command print ``result: 0`` (in addition to the init script's + usual output)? + +#. Status (running): + + .. code-block:: none + + # /etc/init.d/some_service status ; echo "result: $?" + + * Did the script accept the command? + * Did the script indicate the service was running? + * Did the echo command print ``result: 0`` (in addition to the init script's + usual output)? + +#. Start (running): + + .. code-block:: none + + # /etc/init.d/some_service start ; echo "result: $?" + + * Is the service still running? + * Did the echo command print ``result: 0`` (in addition to the init + script's usual output)? + +#. Stop (running): + + .. code-block:: none + + # /etc/init.d/some_service stop ; echo "result: $?" + + * Was the service stopped? + * Did the echo command print ``result: 0`` (in addition to the init + script's usual output)? + +#. Status (stopped): + + .. code-block:: none + + # /etc/init.d/some_service status ; echo "result: $?" + + * Did the script accept the command? + * Did the script indicate the service was not running? + * Did the echo command print ``result: 3`` (in addition to the init + script's usual output)? + +#. Stop (stopped): + + .. code-block:: none + + # /etc/init.d/some_service stop ; echo "result: $?" + + * Is the service still stopped? + * Did the echo command print ``result: 0`` (in addition to the init + script's usual output)? + +#. Status (failed): + + This step is not readily testable and relies on manual inspection of the script. + + The script can use one of the error codes (other than 3) listed in the + LSB spec to indicate that it is active but failed. This tells the + cluster that before moving the resource to another node, it needs to + stop it on the existing one first. + +If the answer to any of the above questions is no, then the script is not +LSB-compliant. Your options are then to either fix the script or write an OCF +agent based on the existing script. diff --git a/doc/sphinx/Pacemaker_Administration/cluster.rst b/doc/sphinx/Pacemaker_Administration/cluster.rst new file mode 100644 index 0000000000..26da9e5c68 --- /dev/null +++ b/doc/sphinx/Pacemaker_Administration/cluster.rst @@ -0,0 +1,64 @@ +The Cluster Layer +----------------- + +Pacemaker and the Cluster Layer +############################### + +Pacemaker utilizes an underlying cluster layer for two purposes: + +* obtaining quorum +* messaging between nodes + +Currently, only Corosync 2 and later is supported for this layer. + +Managing Nodes in a Corosync-Based Cluster +########################################## + +Adding a New Corosync Node +__________________________ + +.. index:: + pair: corosync; add cluster node + +To add a new node: + +#. Install Corosync and Pacemaker on the new host. +#. Copy ``/etc/corosync/corosync.conf`` and ``/etc/corosync/authkey`` (if it + exists) from an existing node. You may need to modify the ``mcastaddr`` + option to match the new node's IP address. +#. Start the cluster software on the new host. If a log message containing + "Invalid digest" appears from Corosync, the keys are not consistent between + the machines. + +Removing a Corosync Node +________________________ + +.. index:: + pair: corosync; remove cluster node + +Because the messaging and membership layers are the authoritative +source for cluster nodes, deleting them from the CIB is not a complete +solution. First, one must arrange for corosync to forget about the +node (**pcmk-1** in the example below). + +#. Stop the cluster on the host to be removed. How to do this will vary with + your operating system and installed versions of cluster software, for example, + ``pcs cluster stop`` if you are using pcs for cluster management. +#. From one of the remaining active cluster nodes, tell Pacemaker to forget + about the removed host, which will also delete the node from the CIB: + + .. code-block:: none + + # crm_node -R pcmk-1 + +Replacing a Corosync Node +_________________________ + +.. index:: + pair: corosync; replace cluster node + +To replace an existing cluster node: + +#. Make sure the old node is completely stopped. +#. Give the new machine the same hostname and IP address as the old one. +#. Follow the procedure above for adding a node. diff --git a/doc/sphinx/Pacemaker_Administration/configuring.rst b/doc/sphinx/Pacemaker_Administration/configuring.rst new file mode 100644 index 0000000000..626c396bbb --- /dev/null +++ b/doc/sphinx/Pacemaker_Administration/configuring.rst @@ -0,0 +1,264 @@ +Configuring Pacemaker +--------------------- + +Pacemaker's configuration, the CIB, is stored in XML format. Cluster +administrators have multiple options for modifying the configuration either via +the XML, or at a more abstract (and easier for humans to understand) level. + +Pacemaker reacts to configuration changes as soon as they are saved. +Pacemaker's command-line tools and most higher-level tools provide the ability +to batch changes together and commit them at once, rather than make a series of +small changes, which could cause avoid unnecessary actions as Pacemaker +responds to each change individually. + +Pacemaker tracks revisions to the configuration and will reject any update +older than the current revision. Thus, it is a good idea to serialize all +changes to the configuration. Avoid attempting simultaneous changes, whether on +the same node or different nodes, and whether manually or using some automated +configuration tool. + +.. note:: + + It is not necessary to update the configuration on all cluster nodes. + Pacemaker immediately synchronizes changes to all active members of the + cluster. To reduce bandwidth, the cluster only broadcasts the incremental + updates that result from your changes and uses checksums to ensure that each + copy is consistent. + + +Configuration Using Higher-level Tools +______________________________________ + +Most users will benefit from using higher-level tools provided by projects +separate from Pacemaker. Some of the most commonly used include the crm shell, +hawk, and pcs. [#]_ + +See those projects' documentation for details on how to configure Pacemaker +using them. + + +Configuration Using Pacemaker's Command-Line Tools +__________________________________________________ + +Pacemaker provides lower-level, command-line tools to manage the cluster. Most +configuration tasks can be performed with these tools, without needing any XML +knowledge. + +To enable STONITH for example, one could run: + +.. code-block:: none + + # crm_attribute --name stonith-enabled --update 1 + +Or, to check whether **node1** is allowed to run resources, there is: + +.. code-block:: none + + # crm_standby --query --node node1 + +Or, to change the failure threshold of **my-test-rsc**, one can use: + +.. code-block:: none + + # crm_resource -r my-test-rsc --set-parameter migration-threshold --parameter-value 3 --meta + +Examples of using these tools for specific cases will be given throughout this +document where appropriate. See the man pages for further details. + +See :ref:`cibadmin` for how to edit the CIB using XML. + +See :ref:`crm_shadow` for a way to make a series of changes, then commit them +all at once to the live cluster. + + +Working with CIB Properties +########################### + +Although these fields can be written to by the user, in +most cases the cluster will overwrite any values specified by the +user with the "correct" ones. + +To change the ones that can be specified by the user, for example +``admin_epoch``, one should use: + +.. code-block:: none + + # cibadmin --modify --xml-text '' + +A complete set of CIB properties will look something like this: + +.. topic:: XML attributes set for a cib element + + .. code-block:: xml + + + + +Querying and Setting Cluster Options +#################################### + +.. index:: + pair: cluster option; querying + pair: cluster option; setting + +Cluster options can be queried and modified using the ``crm_attribute`` tool. +To get the current value of ``cluster-delay``, you can run: + +.. code-block:: none + + # crm_attribute --query --name cluster-delay + +which is more simply written as + +.. code-block:: none + + # crm_attribute -G -n cluster-delay + +If a value is found, you'll see a result like this: + +.. code-block:: none + + # crm_attribute -G -n cluster-delay + scope=crm_config name=cluster-delay value=60s + +If no value is found, the tool will display an error: + +.. code-block:: none + + # crm_attribute -G -n clusta-deway + scope=crm_config name=clusta-deway value=(null) + Error performing operation: No such device or address + +To use a different value (for example, 30 seconds), simply run: + +.. code-block:: none + + # crm_attribute --name cluster-delay --update 30s + +To go back to the cluster's default value, you can delete the value, for example: + +.. code-block:: none + + # crm_attribute --name cluster-delay --delete + Deleted crm_config option: id=cib-bootstrap-options-cluster-delay name=cluster-delay + + +When Options are Listed More Than Once +______________________________________ + +If you ever see something like the following, it means that the option you're +modifying is present more than once. + +.. topic:: Deleting an option that is listed twice + + .. code-block:: none + + # crm_attribute --name batch-limit --delete + + Multiple attributes match name=batch-limit in crm_config: + Value: 50 (set=cib-bootstrap-options, id=cib-bootstrap-options-batch-limit) + Value: 100 (set=custom, id=custom-batch-limit) + Please choose from one of the matches above and supply the 'id' with --id + +In such cases, follow the on-screen instructions to perform the requested +action. To determine which value is currently being used by the cluster, refer +to the "Rules" chapter of *Pacemaker Explained*. + + +.. _remote_connection: + +Connecting from a Remote Machine +################################ + +.. index:: + pair: cluster; remote connection + pair: cluster; remote administration + +Provided Pacemaker is installed on a machine, it is possible to connect to the +cluster even if the machine itself is not in the same cluster. To do this, one +simply sets up a number of environment variables and runs the same commands as +when working on a cluster node. + +.. table:: Environment Variables Used to Connect to Remote Instances of the CIB + ++----------------------+-----------+----------------------------------------------+ +| Environment Variable | Default | Description | ++======================+===========+==============================================+ +| CIB_user | $USER | The user to connect as. Needs to be | +| | | part of the ``haclient`` group on | +| | | the target host. | +| | | | +| | | .. index:: | +| | | pair: environment variable; CIB_user | ++----------------------+-----------+----------------------------------------------+ +| CIB_passwd | | The user's password. Read from the | +| | | command line if unset. | +| | | | +| | | .. index:: | +| | | pair: environment variable; CIB_passwd | ++----------------------+-----------+----------------------------------------------+ +| CIB_server | localhost | The host to contact | +| | | | +| | | .. index:: | +| | | pair: environment variable; CIB_server | ++----------------------+-----------+----------------------------------------------+ +| CIB_port | | The port on which to contact the server; | +| | | required. | +| | | | +| | | .. index:: | +| | | pair: environment variable; CIB_port | ++----------------------+-----------+----------------------------------------------+ +| CIB_encrypted | TRUE | Whether to encrypt network traffic | +| | | | +| | | .. index:: | +| | | pair: environment variable; CIB_encrypted | ++----------------------+-----------+----------------------------------------------+ + +So, if **c001n01** is an active cluster node and is listening on port 1234 +for connections, and **someuser** is a member of the **haclient** group, +then the following would prompt for **someuser**'s password and return +the cluster's current configuration: + +.. code-block:: none + + # export CIB_port=1234; export CIB_server=c001n01; export CIB_user=someuser; + # cibadmin -Q + +For security reasons, the cluster does not listen for remote connections by +default. If you wish to allow remote access, you need to set the +``remote-tls-port`` (encrypted) or ``remote-clear-port`` (unencrypted) CIB +properties (i.e., those kept in the ``cib`` tag, like ``num_updates`` and +``epoch``). + +.. table:: Extra top-level CIB properties for remote access + ++----------------------+-----------+------------------------------------------------------+ +| CIB Property | Default | Description | ++======================+===========+======================================================+ +| remote-tls-port | | Listen for encrypted remote connections | +| | | on this port. | +| | | | +| | | .. index:: | +| | | pair: remote connection option; remote-tls-port | ++----------------------+-----------+------------------------------------------------------+ +| remote-clear-port | | Listen for plaintext remote connections | +| | | on this port. | +| | | | +| | | .. index:: | +| | | pair: remote connection option; remote-clear-port | ++----------------------+-----------+------------------------------------------------------+ + +.. important:: + + The Pacemaker version on the administration host must be the same or greater + than the version(s) on the cluster nodes. Otherwise, it may not have the + schema files necessary to validate the CIB. + + +.. rubric:: Footnotes + +.. [#] For a list, see "Configuration Tools" at + https://clusterlabs.org/components.html diff --git a/doc/sphinx/Pacemaker_Administration/index.rst b/doc/sphinx/Pacemaker_Administration/index.rst new file mode 100644 index 0000000000..97731245d0 --- /dev/null +++ b/doc/sphinx/Pacemaker_Administration/index.rst @@ -0,0 +1,34 @@ +Pacemaker Administration +======================== + +*Managing Pacemaker Clusters* + + +Abstract +-------- +This document has instructions and tips for system administrators who +manage high-availability clusters using Pacemaker. + + +Table of Contents +----------------- + +.. toctree:: + :maxdepth: 3 + :numbered: + + intro + installing + cluster + configuring + tools + troubleshooting + upgrading + agents + + +Index +----- + +* :ref:`genindex` +* :ref:`search` diff --git a/doc/sphinx/Pacemaker_Administration/installing.rst b/doc/sphinx/Pacemaker_Administration/installing.rst new file mode 100644 index 0000000000..334a06ca88 --- /dev/null +++ b/doc/sphinx/Pacemaker_Administration/installing.rst @@ -0,0 +1,107 @@ +Installing Cluster Software +--------------------------- + +Installing the Software +####################### + +Most major Linux distributions have pacemaker packages in their standard +package repositories, or the software can be built from source code. +See the `Install wiki page `_ +for details. + +Enabling Pacemaker +################## + +Enabling Pacemaker For Corosync version 2 and greater +_____________________________________________________ + +High-level cluster management tools are available that can configure +corosync for you. This document focuses on the lower-level details +if you want to configure corosync yourself. + +Corosync configuration is normally located in +``/etc/corosync/corosync.conf``. + +.. topic:: Corosync configuration file for two nodes **myhost1** and **myhost2** + + .. code-block:: none + + totem { + version: 2 + secauth: off + cluster_name: mycluster + transport: udpu + } + + nodelist { + node { + ring0_addr: myhost1 + nodeid: 1 + } + node { + ring0_addr: myhost2 + nodeid: 2 + } + } + + quorum { + provider: corosync_votequorum + two_node: 1 + } + + logging { + to_syslog: yes + } + +.. topic:: Corosync configuration file for three nodes **myhost1**, **myhost2** and **myhost3** + + .. code-block:: none + + totem { + version: 2 + secauth: off + cluster_name: mycluster + transport: udpu + } + + nodelist { + node { + ring0_addr: myhost1 + nodeid: 1 + } + node { + ring0_addr: myhost2 + nodeid: 2 + } + node { + ring0_addr: myhost3 + nodeid: 3 + } + } + + quorum { + provider: corosync_votequorum + } + + logging { + to_syslog: yes + } + +In the above examples, the ``totem`` section defines what protocol version and +options (including encryption) to use, [#]_ +and gives the cluster a unique name (``mycluster`` in these examples). + +The ``node`` section lists the nodes in this cluster. + +The ``quorum`` section defines how the cluster uses quorum. The important thing +is that two-node clusters must be handled specially, so ``two_node: 1`` must be +defined for two-node clusters (it will be ignored for clusters of any other +size). + +The ``logging`` section should be self-explanatory. + +.. rubric:: Footnotes + +.. [#] Please consult the Corosync website (http://www.corosync.org/) and + documentation for details on enabling encryption and peer authentication + for the cluster. diff --git a/doc/sphinx/Pacemaker_Administration/intro.rst b/doc/sphinx/Pacemaker_Administration/intro.rst new file mode 100644 index 0000000000..067e293849 --- /dev/null +++ b/doc/sphinx/Pacemaker_Administration/intro.rst @@ -0,0 +1,21 @@ +Introduction +------------ + +The Scope of this Document +########################## + +The purpose of this document is to help system administrators learn how to +manage a Pacemaker cluster. + +System administrators may be interested in other parts of the +`Pacemaker documentation set `_ +such as *Clusters from Scratch*, a step-by-step guide to setting up an example +cluster, and *Pacemaker Explained*, an exhaustive reference for cluster +configuration. + +Multiple higher-level tools (both command-line and GUI) are available to +simplify cluster management. However, this document focuses on the lower-level +command-line tools that come with Pacemaker itself. The concepts are applicable +to the higher-level tools, though the syntax would differ. + +.. include:: ../shared/pacemaker-intro.rst diff --git a/doc/sphinx/Pacemaker_Administration/tools.rst b/doc/sphinx/Pacemaker_Administration/tools.rst new file mode 100644 index 0000000000..603bfbc101 --- /dev/null +++ b/doc/sphinx/Pacemaker_Administration/tools.rst @@ -0,0 +1,553 @@ +Using Pacemaker Command-Line Tools +---------------------------------- + +.. _cmdline_output: + +Controlling Command Line Output +############################### + +Some of the pacemaker command line utilities have been converted to a new +output system. Among these tools are ``crm_mon`` and ``stonith_admin``. This +is an ongoing project, and more tools will be converted over time. This system +lets you control the formatting of output with ``--output-as=`` and the +destination of output with ``--output-to=``. + +The available formats vary by tool, but at least plain text and XML are +supported by all tools that use the new system. The default format is plain +text. The default destination is stdout but can be redirected to any file. +Some formats support command line options for changing the style of the output. +For instance: + +.. code-block:: none + + # crm_mon --help-output + Usage: + crm_mon [OPTION?] + + Provides a summary of cluster's current state. + + Outputs varying levels of detail in a number of different formats. + + Output Options: + --output-as=FORMAT Specify output format as one of: console (default), html, text, xml + --output-to=DEST Specify file name for output (or "-" for stdout) + --html-cgi Add text needed to use output in a CGI program + --html-stylesheet=URI Link to an external CSS stylesheet + --html-title=TITLE Page title + --text-fancy Use more highly formatted output + +.. _crm_mon: + +Monitor a Cluster with crm_mon +############################## + +.. index:: + pair: command-line tool; crm_mon + +The ``crm_mon`` utility displays the current state of an active cluster. It can +show the cluster status organized by node or by resource, and can be used in +either single-shot or dynamically updating mode. It can also display operations +performed and information about failures. + +Using this tool, you can examine the state of the cluster for irregularities, +and see how it responds when you cause or simulate failures. + +See the manual page or the output of ``crm_mon --help`` for a full description +of its many options. + +.. topic:: Sample output from crm_mon -1 + + .. code-block:: none + + Cluster Summary: + * Stack: corosync + * Current DC: node2 (version 2.0.0-1) - partition with quorum + * Last updated: Mon Jan 29 12:18:42 2018 + * Last change: Mon Jan 29 12:18:40 2018 by root via crm_attribute on node3 + * 5 nodes configured + * 2 resources configured + + Node List: + * Online: [ node1 node2 node3 node4 node5 ] + + * Active resources: + * Fencing (stonith:fence_xvm): Started node1 + * IP (ocf:heartbeat:IPaddr2): Started node2 + +.. topic:: Sample output from crm_mon -n -1 + + .. code-block:: none + + Cluster Summary: + * Stack: corosync + * Current DC: node2 (version 2.0.0-1) - partition with quorum + * Last updated: Mon Jan 29 12:21:48 2018 + * Last change: Mon Jan 29 12:18:40 2018 by root via crm_attribute on node3 + * 5 nodes configured + * 2 resources configured + + * Node List: + * Node node1: online + * Fencing (stonith:fence_xvm): Started + * Node node2: online + * IP (ocf:heartbeat:IPaddr2): Started + * Node node3: online + * Node node4: online + * Node node5: online + +As mentioned in an earlier chapter, the DC is the node is where decisions are +made. The cluster elects a node to be DC as needed. The only significance of +the choice of DC to an administrator is the fact that its logs will have the +most information about why decisions were made. + +.. _crm_mon_css: + +Styling crm_mon output +______________________ + +.. index:: + pair: crm_mon; CSS + +Various parts of ``crm_mon``'s HTML output have a CSS class associated with +them. Not everything does, but some of the most interesting portions do. In +the following example, the status of each node has an ``online`` class and the +details of each resource have an ``rsc-ok`` class. + +.. code-block:: html + +

Node List

+
    +
  • + Node: cluster01 online +
  • +
    • ping (ocf::pacemaker:ping): Started
  • +
  • + Node: cluster02 online +
  • +
    • ping (ocf::pacemaker:ping): Started
  • +
+ +By default, a stylesheet for styling these classes is included in the head of +the HTML output. The relevant portions of this stylesheet that would be used +in the above example is: + +.. code-block:: css + + + +If you want to override some or all of the styling, simply create your own +stylesheet, place it on a web server, and pass ``--html-stylesheet=`` +to ``crm_mon``. The link is added after the default stylesheet, so your +changes take precedence. You don't need to duplicate the entire default. +Only include what you want to change. + +.. _cibadmin: + +Edit the CIB XML with cibadmin +############################## + +.. index:: + pair: command-line tool; cibadmin + +The most flexible tool for modifying the configuration is Pacemaker's +``cibadmin`` command. With ``cibadmin``, you can query, add, remove, update +or replace any part of the configuration. All changes take effect immediately, +so there is no need to perform a reload-like operation. + +The simplest way of using ``cibadmin`` is to use it to save the current +configuration to a temporary file, edit that file with your favorite +text or XML editor, and then upload the revised configuration. + +.. topic:: Safely using an editor to modify the cluster configuration + + .. code-block:: none + + # cibadmin --query > tmp.xml + # vi tmp.xml + # cibadmin --replace --xml-file tmp.xml + +Some of the better XML editors can make use of a RELAX NG schema to +help make sure any changes you make are valid. The schema describing +the configuration can be found in ``pacemaker.rng``, which may be +deployed in a location such as ``/usr/share/pacemaker`` depending on your +operating system distribution and how you installed the software. + +If you want to modify just one section of the configuration, you can +query and replace just that section to avoid modifying any others. + +.. topic:: Safely using an editor to modify only the resources section + + .. code-block:: none + + # cibadmin --query --scope resources > tmp.xml + # vi tmp.xml + # cibadmin --replace --scope resources --xml-file tmp.xml + +To quickly delete a part of the configuration, identify the object you wish to +delete by XML tag and id. For example, you might search the CIB for all +STONITH-related configuration: + +.. topic:: Searching for STONITH-related configuration items + + .. code-block:: none + + # cibadmin --query | grep stonith + + + + + + + + + + + +If you wanted to delete the ``primitive`` tag with id ``child_DoFencing``, +you would run: + +.. code-block:: + + # cibadmin --delete --xml-text '' + +See the cibadmin man page for more options. + +.. warning:: + + Never edit the live ``cib.xml`` file directly. Pacemaker will detect such + changes and refuse to use the configuration. + + +.. _crm_shadow: + +Batch Configuration Changes with crm_shadow +########################################### + +.. index:: + pair: command-line tool; crm_shadow + +Often, it is desirable to preview the effects of a series of configuration +changes before updating the live configuration all at once. For this purpose, +``crm_shadow`` creates a "shadow" copy of the configuration and arranges for +all the command-line tools to use it. + +To begin, simply invoke ``crm_shadow --create`` with a name of your choice, +and follow the simple on-screen instructions. Shadow copies are identified with +a name to make it possible to have more than one. + +.. warning:: + + Read this section and the on-screen instructions carefully; failure to do so + could result in destroying the cluster's active configuration! + +.. topic:: Creating and displaying the active sandbox + + .. code-block:: none + + # crm_shadow --create test + Setting up shadow instance + Type Ctrl-D to exit the crm_shadow shell + shadow[test]: + shadow[test] # crm_shadow --which + test + +From this point on, all cluster commands will automatically use the shadow copy +instead of talking to the cluster's active configuration. Once you have +finished experimenting, you can either make the changes active via the +``--commit`` option, or discard them using the ``--delete`` option. Again, be +sure to follow the on-screen instructions carefully! + +For a full list of ``crm_shadow`` options and commands, invoke it with the +``--help`` option. + +.. topic:: Use sandbox to make multiple changes all at once, discard them, and verify real configuration is untouched + + .. code-block:: none + + shadow[test] # crm_failcount -r rsc_c001n01 -G + scope=status name=fail-count-rsc_c001n01 value=0 + shadow[test] # crm_standby --node c001n02 -v on + shadow[test] # crm_standby --node c001n02 -G + scope=nodes name=standby value=on + + shadow[test] # cibadmin --erase --force + shadow[test] # cibadmin --query + + + + + + + + + + shadow[test] # crm_shadow --delete test --force + Now type Ctrl-D to exit the crm_shadow shell + shadow[test] # exit + # crm_shadow --which + No active shadow configuration defined + # cibadmin -Q + + + + + + + +See the next section, :ref:`crm_simulate`, for how to test your changes before +committing them to the live cluster. + + +.. _crm_simulate: + +Simulate Cluster Activity with crm_simulate +########################################### + +.. index:: + pair: command-line tool; crm_simulate + +The command-line tool `crm_simulate` shows the results of the same logic +the cluster itself uses to respond to a particular cluster configuration and +status. + +As always, the man page is the primary documentation, and should be consulted +for further details. This section aims for a better conceptual explanation and +practical examples. + +Replaying cluster decision-making logic +_______________________________________ + +At any given time, one node in a Pacemaker cluster will be elected DC, and that +node will run Pacemaker's scheduler to make decisions. + +Each time decisions need to be made (a "transition"), the DC will have log +messages like "Calculated transition ... saving inputs in ..." with a file +name. You can grab the named file and replay the cluster logic to see why +particular decisions were made. The file contains the live cluster +configuration at that moment, so you can also look at it directly to see the +value of node attributes, etc., at that time. + +The simplest usage is (replacing $FILENAME with the actual file name): + +.. topic:: Simulate cluster response to a given CIB + + .. code-block:: none + + # crm_simulate --simulate --xml-file $FILENAME + +That will show the cluster state when the process started, the actions that +need to be taken ("Transition Summary"), and the resulting cluster state if the +actions succeed. Most actions will have a brief description of why they were +required. + +The transition inputs may be compressed. ``crm_simulate`` can handle these +compressed files directly, though if you want to edit the file, you'll need to +uncompress it first. + +You can do the same simulation for the live cluster configuration at the +current moment. This is useful mainly when using ``crm_shadow`` to create a +sandbox version of the CIB; the ``--live-check`` option will use the shadow CIB +if one is in effect. + +.. topic:: Simulate cluster response to current live CIB or shadow CIB + + .. code-block:: none + + # crm_simulate --simulate --live-check + + +Why decisions were made +_______________________ + +To get further insight into the "why", it gets user-unfriendly very quickly. If +you add the ``--show-scores`` option, you will also see all the scores that +went into the decision-making. The node with the highest cumulative score for a +resource will run it. You can look for ``-INFINITY`` scores in particular to +see where complete bans came into effect. + +You can also add ``-VVVV`` to get more detailed messages about what's happening +under the hood. You can add up to two more V's even, but that's usually useful +only if you're a masochist or tracing through the source code. + + +Visualizing the action sequence +_______________________________ + +Another handy feature is the ability to generate a visual graph of the actions +needed, using the ``--dot-file`` option. This relies on the separate +Graphviz [#]_ project. + +.. topic:: Generate a visual graph of cluster actions from a saved CIB + + .. code-block:: none + + # crm_simulate --simulate --xml-file $FILENAME --dot-file $FILENAME.dot + # dot $FILENAME.dot -Tsvg > $FILENAME.svg + +``$FILENAME.dot`` will contain a GraphViz representation of the cluster's +response to your changes, including all actions with their ordering +dependencies. + +``$FILENAME.svg`` will be the same information in a standard graphical format +that you can view in your browser or other app of choice. You could, of course, +use other ``dot`` options to generate other formats. + +How to interpret the graphical output: + + * Bubbles indicate actions, and arrows indicate ordering dependencies + * Resource actions have text of the form + ``__ `` indicating that the + specified action will be executed for the specified resource on the + specified node, once if interval is 0 or at specified recurring interval + otherwise + * Actions with black text will be sent to the executor (that is, the + appropriate agent will be invoked) + * Actions with orange text are "pseudo" actions that the cluster uses + internally for ordering but require no real activity + * Actions with a solid green border are part of the transition (that is, the + cluster will attempt to execute them in the given order -- though a + transition can be interrupted by action failure or new events) + * Dashed arrows indicate dependencies that are not present in the transition + graph + * Actions with a dashed border will not be executed. If the dashed border is + blue, the cluster does not feel the action needs to be executed. If the + dashed border is red, the cluster would like to execute the action but + cannot. Any actions depending on an action with a dashed border will not be + able to execute. + * Loops should not happen, and should be reported as a bug if found. + +.. topic:: Small Cluster Transition + + .. image:: ../../shared/en-US/images/Policy-Engine-small.png + :alt: An example transition graph as represented by Graphviz + :height: 325 + :width: 1161 + :scale: 75 % + :align: center + +In the above example, it appears that a new node, ``pcmk-2``, has come online +and that the cluster is checking to make sure ``rsc1``, ``rsc2`` and ``rsc3`` +are not already running there (indicated by the ``rscN_monitor_0`` entries). +Once it did that, and assuming the resources were not active there, it would +have liked to stop ``rsc1`` and ``rsc2`` on ``pcmk-1`` and move them to +``pcmk-2``. However, there appears to be some problem and the cluster cannot or +is not permitted to perform the stop actions which implies it also cannot +perform the start actions. For some reason, the cluster does not want to start +``rsc3`` anywhere. + +.. topic:: Complex Cluster Transition + + .. image:: ../../shared/en-US/images/Policy-Engine-big.png + :alt: Complex transition graph that you're not expected to be able to read + :width: 1455 + :height: 1945 + :scale: 75 % + :align: center + + +What-if scenarios +_________________ + +You can make changes to the saved or shadow CIB and simulate it again, to see +how Pacemaker would react differently. You can edit the XML by hand, use +command-line tools such as ``cibadmin`` with either a shadow CIB or the +``CIB_file`` environment variable set to the filename, or use higher-level tool +support (see the man pages of the specific tool you're using for how to perform +actions on a saved CIB file rather than the live CIB). + +You can also inject node failures and/or action failures into the simulation; +see the ``crm_simulate`` man page for more details. + +This capability is useful when using a shadow CIB to edit the configuration. +Before committing the changes to the live cluster with ``crm_shadow --commit``, +you can use ``crm_simulate`` to see how the cluster will react to the changes. + +.. _attrd_updater: + +.. _crm_attribute: + +Manage Node Attributes, Cluster Options and Defaults with crm_attribute and attrd_updater +######################################################################################### + +.. index:: + pair: command-line tool; attrd_updater + pair: command-line tool; crm_attribute + +``crm_attribute`` and ``attrd_updater`` are confusingly similar tools with subtle +differences. + +``attrd_updater`` can query and update node attributes. ``crm_attribute`` can query +and update not only node attributes, but also cluster options, resource +defaults, and operation defaults. + +To understand the differences, it helps to understand the various types of node +attribute. + +.. table:: Types of Node Attributes + ++-----------+----------+-------------------+------------------+----------------+----------------+ +| Type | Recorded | Recorded in | Survive full | Manageable by | Manageable by | +| | in CIB? | attribute manager | cluster restart? | crm_attribute? | attrd_updater? | +| | | memory? | | | | ++===========+==========+===================+==================+================+================+ +| permanent | yes | no | yes | yes | no | ++-----------+----------+-------------------+------------------+----------------+----------------+ +| transient | yes | yes | no | yes | yes | ++-----------+----------+-------------------+------------------+----------------+----------------+ +| private | no | yes | no | no | yes | ++-----------+----------+-------------------+------------------+----------------+----------------+ + +As you can see from the table above, ``crm_attribute`` can manage permanent and +transient node attributes, while ``attrd_updater`` can manage transient and +private node attributes. + +The difference between the two tools lies mainly in *how* they update node +attributes: ``attrd_updater`` always contacts the Pacemaker attribute manager +directly, while ``crm_attribute`` will contact the attribute manager only for +transient node attributes, and will instead modify the CIB directly for +permanent node attributes (and for transient node attributes when unable to +contact the attribute manager). + +By contacting the attribute manager directly, ``attrd_updater`` can change +an attribute's "dampening" (whether changes are immediately flushed to the CIB +or after a specified amount of time, to minimize disk writes for frequent +changes), set private node attributes (which are never written to the CIB), and +set attributes for nodes that don't yet exist. + +By modifying the CIB directly, ``crm_attribute`` can set permanent node +attributes (which are only in the CIB and not managed by the attribute +manager), and can be used with saved CIB files and shadow CIBs. + +However a transient node attribute is set, it is synchronized between the CIB +and the attribute manager, on all nodes. + + +Other Commonly Used Tools +######################### + +Other command-line tools include: + +.. index:: + pair: command-line tool; crm_failcount + pair: command-line tool; crm_node + pair: command-line tool; crm_report + pair: command-line tool; crm_standby + pair: command-line tool; crm_verify + pair: command-line tool; stonith_admin + +* ``crm_failcount``: query or delete resource fail counts +* ``crm_node``: manage cluster nodes +* ``crm_report``: generate a detailed cluster report for bug submissions +* ``crm_resource``: manage cluster resources +* ``crm_standby``: manage standby status of nodes +* ``crm_verify``: validate a CIB +* ``stonith_admin``: manage fencing devices + +See the manual pages for details. + +.. rubric:: Footnotes + +.. [#] Graph visualization software. See http://www.graphviz.org/ for details. diff --git a/doc/sphinx/Pacemaker_Administration/troubleshooting.rst b/doc/sphinx/Pacemaker_Administration/troubleshooting.rst new file mode 100644 index 0000000000..53970a0f94 --- /dev/null +++ b/doc/sphinx/Pacemaker_Administration/troubleshooting.rst @@ -0,0 +1,67 @@ +Troubleshooting Cluster Problems +-------------------------------- + +Logging +####### + +Pacemaker by default logs messages of notice severity and higher to the system +log, and messages of info severity and higher to the detail log, which by +default is ``/var/log/pacemaker/pacemaker.log``. + +Logging options can be controlled via environment variables at Pacemaker +start-up. Where these are set varies by operating system (often +``/etc/sysconfig/pacemaker`` or ``/etc/default/pacemaker``). + +Because cluster problems are often highly complex, involving multiple machines, +cluster daemons, and managed services, Pacemaker logs rather verbosely to +provide as much context as possible. It is an ongoing priority to make these +logs more user-friendly, but by necessity there is a lot of obscure, low-level +information that can make them difficult to follow. + +The default log rotation configuration shipped with Pacemaker (typically +installed in ``/etc/logrotate.d/pacemaker``) rotates the log when it reaches +100MB in size, or weekly, whichever comes first. + +If you configure debug or (Heaven forbid) trace-level logging, the logs can +grow enormous quite quickly. Because rotated logs are by default named with the +year, month, and day only, this can cause name collisions if your logs exceed +100MB in a single day. You can add ``dateformat -%Y%m%d-%H`` to the rotation +configuration to avoid this. + +Transitions +########### + +A key concept in understanding how a Pacemaker cluster functions is a +*transition*. A transition is a set of actions that need to be taken to bring +the cluster from its current state to the desired state (as expressed by the +configuration). + +Whenever a relevant event happens (a node joining or leaving the cluster, +a resource failing, etc.), the controller will ask the scheduler to recalculate +the status of the cluster, which generates a new transition. The controller +then performs the actions in the transition in the proper order. + +Each transition can be identified in the logs by a line like: + +.. code-block: none + + notice: Calculated transition 19, saving inputs in /var/lib/pacemaker/pengine/pe-input-1463.bz2 + +The file listed as the "inputs" is a snapshot of the cluster configuration and +state at that moment (the CIB). This file can help determine why particular +actions were scheduled. The ``crm_simulate`` command, described in +:ref:`crm_simulate`, can be used to replay the file. + +Further Information About Troubleshooting +######################################### + +Andrew Beekhof wrote a series of articles about troubleshooting in his blog, +`The Cluster Guy `_: + +* `Debugging Pacemaker `_ +* `Debugging the Policy Engine `_ +* `Pacemaker Logging `_ + +The articles were written for an earlier version of Pacemaker, so many of the +specific names and log messages to look for have changed, but the concepts are +still valid. diff --git a/doc/sphinx/Pacemaker_Administration/upgrading.rst b/doc/sphinx/Pacemaker_Administration/upgrading.rst new file mode 100644 index 0000000000..b2f00cc914 --- /dev/null +++ b/doc/sphinx/Pacemaker_Administration/upgrading.rst @@ -0,0 +1,485 @@ +Upgrading a Pacemaker Cluster +----------------------------- + +Pacemaker Versioning +#################### + +Pacemaker has an overall release version, plus separate version numbers for +certain internal components. + +* **Pacemaker release version:** This version consists of three numbers + (*x.y.z*). + + The major version number (the *x* in *x.y.z*) increases when at least some + rolling upgrades are not possible from the previous major version. For example, + a rolling upgrade from 1.0.8 to 1.1.15 should always be supported, but a + rolling upgrade from 1.0.8 to 2.0.0 may not be possible. + + The minor version (the *y* in *x.y.z*) increases when there are significant + changes in cluster default behavior, tool behavior, and/or the API interface + (for software that utilizes Pacemaker libraries). The main benefit is to alert + you to pay closer attention to the release notes, to see if you might be + affected. + + The release counter (the *z* in *x.y.z*) is increased with all public releases + of Pacemaker, which typically include both bug fixes and new features. + +* **CRM feature set:** This version number applies to the communication between + full cluster nodes, and is used to avoid problems in mixed-version clusters. + + The major version number increases when nodes with different versions would not + work (rolling upgrades are not allowed). The minor version number increases + when mixed-version clusters are allowed only during rolling upgrades. The + minor-minor version number is ignored, but allows resource agents to detect + cluster support for various features. [#]_ + + Pacemaker ensures that the longest-running node is the cluster's DC. This + ensures new features are not enabled until all nodes are upgraded to support + them. + +* **Pacemaker Remote protocol version:** This version applies to communication + between a Pacemaker Remote node and the cluster. It increases when an older + cluster node would have problems hosting the connection to a newer + Pacemaker Remote node. To avoid these problems, Pacemaker Remote nodes will + accept connections only from cluster nodes with the same or newer + Pacemaker Remote protocol version. + + Unlike with CRM feature set differences between full cluster nodes, + mixed Pacemaker Remote protocol versions between Pacemaker Remote nodes and + full cluster nodes are fine, as long as the Pacemaker Remote nodes have the + older version. This can be useful, for example, to host a legacy application + in an older operating system version used as a Pacemaker Remote node. + +* **XML schema version:** Pacemaker’s configuration syntax — what's allowed in + the Configuration Information Base (CIB) — has its own version. This allows + the configuration syntax to evolve over time while still allowing clusters + with older configurations to work without change. + +Upgrading Cluster Software +########################## + +There are three approaches to upgrading a cluster, each with advantages and +disadvantages. + +.. table:: Upgrade Methods + ++---------------------------------------------------+----------+----------+--------+---------+----------+----------+ +| Method | Available| Can be | Service| Service | Exercises| Allows | +| | between | used with| outage | recovery| failover | change of| +| | all | Pacemaker| during | during | logic | messaging| +| | versions | Remote | upgrade| upgrade | | layer | +| | | nodes | | | | [#]_ | ++===================================================+==========+==========+========+=========+==========+==========+ +| Complete cluster shutdown | yes | yes | always | N/A | no | yes | +| | | | | | | | +| .. index:: | | | | | | | +| pair: cluster; upgrade with shutdown | | | | | | | +| pair: upgrade; upgrade with shutdown | | | | | | | ++---------------------------------------------------+----------+----------+--------+---------+----------+----------+ +| Rolling (node by node) | no | yes | always | yes | yes | no | +| | | | [#]_ | | | | +| | | | | | | | +| .. index:: | | | | | | | +| pair: cluster; rolling upgrade | | | | | | | +| pair: upgrade; rolling upgrade | | | | | | | ++---------------------------------------------------+----------+----------+--------+---------+----------+----------+ +| Detach and reattach | yes | no | only | no | no | yes | +| | | | due to | | | | +| | | | failure| | | | +| | | | | | | | +| .. index:: | | | | | | | +| pair: cluster; upgrade with detach and reattach| | | | | | | +| pair: upgrade; upgrade with detach and reattach| | | | | | | ++---------------------------------------------------+----------+----------+--------+---------+----------+----------+ + +Complete Cluster Shutdown +_________________________ + +In this scenario, one shuts down all cluster nodes and resources, +then upgrades all the nodes before restarting the cluster. + +#. On each node: + + a. Shutdown the cluster software (pacemaker and the messaging layer). + #. Upgrade the Pacemaker software. This may also include upgrading the + messaging layer and/or the underlying operating system. + #. Check the configuration with the ``crm_verify`` tool. + +#. On each node: + + a. Start the cluster software. + +Currently, only Corosync version 2 and greater is supported as the cluster +layer, but if another stack is supported in the future, the stack does not +need to be the same one before the upgrade. + +One variation of this approach is to build a new cluster on new hosts. +This allows the new version to be tested beforehand, and minimizes downtime by +having the new nodes ready to be placed in production as soon as the old nodes +are shut down. + +Rolling (node by node) +______________________ + +In this scenario, each node is removed from the cluster, upgraded, and then +brought back online, until all nodes are running the newest version. + +Special considerations when planning a rolling upgrade: + +* If you plan to upgrade other cluster software -- such as the messaging layer -- + at the same time, consult that software's documentation for its compatibility + with a rolling upgrade. + +* If the major version number is changing in the Pacemaker version you are + upgrading to, a rolling upgrade may not be possible. Read the new version's + release notes (as well the information here) for what limitations may exist. + +* If the CRM feature set is changing in the Pacemaker version you are upgrading + to, you should run a mixed-version cluster only during a small rolling + upgrade window. If one of the older nodes drops out of the cluster for any + reason, it will not be able to rejoin until it is upgraded. + +* If the Pacemaker Remote protocol version is changing, all cluster nodes + should be upgraded before upgrading any Pacemaker Remote nodes. + +See the ClusterLabs wiki's +`release calendar `_ +to figure out whether the CRM feature set and/or Pacemaker Remote protocol +version changed between the the Pacemaker release versions in your rolling +upgrade. + +To perform a rolling upgrade, on each node in turn: + +#. Put the node into standby mode, and wait for any active resources + to be moved cleanly to another node. (This step is optional, but + allows you to deal with any resource issues before the upgrade.) +#. Shutdown the cluster software (pacemaker and the messaging layer) on the node. +#. Upgrade the Pacemaker software. This may also include upgrading the + messaging layer and/or the underlying operating system. +#. If this is the first node to be upgraded, check the configuration + with the ``crm_verify`` tool. +#. Start the messaging layer. + This must be the same messaging layer (currently only Corosync version 2 and + greater is supported) that the rest of the cluster is using. + +.. note:: + + Even if a rolling upgrade from the current version of the cluster to the + newest version is not directly possible, it may be possible to perform a + rolling upgrade in multiple steps, by upgrading to an intermediate version + first. + +.. table:: Version Compatibility Table + ++-------------------------+---------------------------+ +| Version being Installed | Oldest Compatible Version | ++=========================+===========================+ +| Pacemaker 2.y.z | Pacemaker 1.1.11 [#]_ | ++-------------------------+---------------------------+ +| Pacemaker 1.y.z | Pacemaker 1.0.0 | ++-------------------------+---------------------------+ +| Pacemaker 0.7.z | Pacemaker 0.6.z | ++-------------------------+---------------------------+ + +Detach and Reattach +___________________ + +The reattach method is a variant of a complete cluster shutdown, where the +resources are left active and get re-detected when the cluster is restarted. + +This method may not be used if the cluster contains any Pacemaker Remote nodes. + +#. Tell the cluster to stop managing services. This is required to allow the + services to remain active after the cluster shuts down. + + .. code-block:: none + + # crm_attribute --name maintenance-mode --update true + +#. On each node, shutdown the cluster software (pacemaker and the messaging + layer), and upgrade the Pacemaker software. This may also include upgrading + the messaging layer. While the underlying operating system may be upgraded + at the same time, that will be more likely to cause outages in the detached + services (certainly, if a reboot is required). +#. Check the configuration with the ``crm_verify`` tool. +#. On each node, start the cluster software. + Currently, only Corosync version 2 and greater is supported as the cluster + layer, but if another stack is supported in the future, the stack does not + need to be the same one before the upgrade. +#. Verify that the cluster re-detected all resources correctly. +#. Allow the cluster to resume managing resources again: + + .. code-block:: none + + # crm_attribute --name maintenance-mode --delete + +.. note:: + + While the goal of the detach-and-reattach method is to avoid disturbing + running services, resources may still move after the upgrade if any + resource's location is governed by a rule based on transient node + attributes. Transient node attributes are erased when the node leaves the + cluster. A common example is using the ``ocf:pacemaker:ping`` resource to + set a node attribute used to locate other resources. + +Upgrading the Configuration +########################### + +.. index:: + pair: upgrading; configuration + +The CIB schema version can change from one Pacemaker version to another. + +After cluster software is upgraded, the cluster will continue to use the older +schema version that it was previously using. This can be useful, for example, +when administrators have written tools that modify the configuration, and are +based on the older syntax. [#]_ + +However, when using an older syntax, new features may be unavailable, and there +is a performance impact, since the cluster must do a non-persistent +configuration upgrade before each transition. So while using the old syntax is +possible, it is not advisable to continue using it indefinitely. + +Even if you wish to continue using the old syntax, it is a good idea to +follow the upgrade procedure outlined below, except for the last step, to ensure +that the new software has no problems with your existing configuration (since it +will perform much the same task internally). + +If you are brave, it is sufficient simply to run ``cibadmin --upgrade``. + +A more cautious approach would proceed like this: + +#. Create a shadow copy of the configuration. The later commands will + automatically operate on this copy, rather than the live configuration. + + .. code-block:: none + + # crm_shadow --create shadow + +#. Verify the configuration is valid with the new software (which may be + stricter about syntax mistakes, or may have dropped support for deprecated + features): + + .. index:: + pair: verify; configuration + + .. code-block:: none + + # crm_verify --live-check + +#. Fix any errors or warnings. +#. Perform the upgrade: + + .. code-block:: none + + # cibadmin --upgrade + +#. If this step fails, there are three main possibilities: + + a. The configuration was not valid to start with (did you do steps 2 and + 3?). + #. The transformation failed; `report a bug `_. + #. The transformation was successful but produced an invalid result. + + If the result of the transformation is invalid, you may see a number of + errors from the validation library. If these are not helpful, visit the + `Validation FAQ wiki page `_ + and/or try the manual upgrade procedure described below. + +#. Check the changes: + + .. code-block:: none + + # crm_shadow --diff + + If at this point there is anything about the upgrade that you wish to + fine-tune (for example, to change some of the automatic IDs), now is the + time to do so: + + .. code-block:: none + + # crm_shadow --edit + + This will open the configuration in your favorite editor (whichever is + specified by the standard ``$EDITOR`` environment variable). + +#. Preview how the cluster will react: + + .. code-block:: none + + # crm_simulate --live-check --save-dotfile shadow.dot -S + # dot -Tsvg shadow.dot -o shadow.svg + + You can then view shadow.svg with any compatible image viewer or web + browser. Verify that either no resource actions will occur or that you are + happy with any that are scheduled. If the output contains actions you do + not expect (possibly due to changes to the score calculations), you may need + to make further manual changes. See :ref:`crm_simulate` for further details + on how to interpret the output of ``crm_simulate`` and ``dot``. + +#. Upload the changes: + + .. code-block:: none + + # crm_shadow --commit shadow --force + + In the unlikely event this step fails, please report a bug. + +.. note:: + + It is also possible to perform the configuration upgrade steps manually: + + #. Locate the ``upgrade*.xsl`` conversion scripts provided with the source + code. These will often be installed in a location such as + ``/usr/share/pacemaker``, or may be obtained from the + `source repository `_. + + #. Run the conversion scripts that apply to your older version, for example: + + .. code-block:: none + + # xsltproc /path/to/upgrade06.xsl config06.xml > config10.xml + + #. Locate the ``pacemaker.rng`` script (from the same location as the xsl + files). + #. Check the XML validity: + + .. code-block:: none + + # xmllint --relaxng /path/to/pacemaker.rng config10.xml + + The advantage of this method is that it can be performed without the cluster + running, and any validation errors are often more informative. + + +What Changed in 2.0 +################### + +The main goal of the 2.0 release was to remove support for deprecated syntax, +along with some small changes in default configuration behavior and tool +behavior. Highlights: + +* Only Corosync version 2 and greater is now supported as the underlying + cluster layer. Support for Heartbeat and Corosync 1 (including CMAN) is + removed. + +* The Pacemaker detail log file is now stored in + ``/var/log/pacemaker/pacemaker.log`` by default. + +* The record-pending cluster property now defaults to true, which + allows status tools such as crm_mon to show operations that are in + progress. + +* Support for a number of deprecated build options, environment variables, + and configuration settings has been removed. + +* The ``master`` tag has been deprecated in favor of using the ``clone`` tag + with the new ``promotable`` meta-attribute set to ``true``. "Master/slave" + clone resources are now referred to as "promotable" clone resources, though + it will take longer for the full terminology change to be completed. + +* The public API for Pacemaker libraries that software applications can use + has changed significantly. + +For a detailed list of changes, see the release notes and the +`Pacemaker 2.0 Changes `_ +page on the ClusterLabs wiki. + + +What Changed in 1.0 +################### + +New +___ + +* Failure timeouts. +* New section for resource and operation defaults. +* Tool for making offline configuration changes. +* ``Rules``, ``instance_attributes``, ``meta_attributes`` and sets of + operations can be defined once and referenced in multiple places. +* The CIB now accepts XPath-based create/modify/delete operations. See + ``cibadmin --help``. +* Multi-dimensional colocation and ordering constraints. +* The ability to connect to the CIB from non-cluster machines. +* Allow recurring actions to be triggered at known times. + + +Changed +_______ + +* Syntax + + * All resource and cluster options now use dashes (-) instead of underscores + (_) + * ``master_slave`` was renamed to ``master`` + * The ``attributes`` container tag was removed + * The operation field ``pre-req`` has been renamed ``requires`` + * All operations must have an ``interval``, ``start``/``stop`` must have it + set to zero + +* The ``stonith-enabled`` option now defaults to true. +* The cluster will refuse to start resources if ``stonith-enabled`` is true (or + unset) and no STONITH resources have been defined +* The attributes of colocation and ordering constraints were renamed for + clarity. +* ``resource-failure-stickiness`` has been replaced by ``migration-threshold``. +* The parameters for command-line tools have been made consistent +* Switched to 'RelaxNG' schema validation and 'libxml2' parser + + * id fields are now XML IDs which have the following limitations: + + * id's cannot contain colons (:) + * id's cannot begin with a number + * id's must be globally unique (not just unique for that tag) + + * Some fields (such as those in constraints that refer to resources) are + IDREFs. + + This means that they must reference existing resources or objects in + order for the configuration to be valid. Removing an object which is + referenced elsewhere will therefore fail. + + * The CIB representation, from which a MD5 digest is calculated to verify + CIBs on the nodes, has changed. + + This means that every CIB update will require a full refresh on any + upgraded nodes until the cluster is fully upgraded to 1.0. This will result + in significant performance degradation and it is therefore highly + inadvisable to run a mixed 1.0/0.6 cluster for any longer than absolutely + necessary. + +* Ping node information no longer needs to be added to ``ha.cf``. Simply + include the lists of hosts in your ping resource(s). + + +Removed +_______ + + +* Syntax + + * It is no longer possible to set resource meta options as top-level + attributes. Use meta-attributes instead. + * Resource and operation defaults are no longer read from ``crm_config``. + +.. rubric:: Footnotes + +.. [#] Before CRM feature set 3.1.0 (Pacemaker 2.0.0), the minor-minor version + number was treated the same as the minor version. + +.. [#] Currently, Corosync version 2 and greater is the only supported cluster + stack, but other stacks have been supported by past versions, and may be + supported by future versions. + +.. [#] Any active resources will be moved off the node being upgraded, so there + will be at least a brief outage unless all resources can be migrated + "live". + +.. [#] Rolling upgrades from Pacemaker 1.1.z to 2.y.z are possible only if the + cluster uses corosync version 2 or greater as its messaging layer, and + the Cluster Information Base (CIB) uses schema 1.0 or higher in its + ``validate-with`` property. + +.. [#] As of Pacemaker 2.0.0, only schema versions pacemaker-1.0 and higher + are supported (excluding pacemaker-1.1, which was an experimental schema + now known as pacemaker-next). diff --git a/doc/sphinx/shared/pacemaker-intro.rst b/doc/sphinx/shared/pacemaker-intro.rst new file mode 100644 index 0000000000..37c39afd56 --- /dev/null +++ b/doc/sphinx/shared/pacemaker-intro.rst @@ -0,0 +1,201 @@ +What Is Pacemaker? +#################### + +Pacemaker is a high-availability *cluster resource manager* -- software that +runs on a set of hosts (a *cluster* of *nodes*) in order to preserve integrity +and minimize downtime of desired services (*resources*). [#]_ It is maintained +by the `ClusterLabs `_ community. + +Pacemaker's key features include: + +* Detection of and recovery from node- and service-level failures +* Ability to ensure data integrity by fencing faulty nodes +* Support for one or more nodes per cluster +* Support for multiple resource interface standards (anything that can be + scripted can be clustered) +* Support (but no requirement) for shared storage +* Support for practically any redundancy configuration (active/passive, N+1, + etc.) +* Automatically replicated configuration that can be updated from any node +* Ability to specify cluster-wide relationships between services, + such as ordering, colocation and anti-colocation +* Support for advanced service types, such as *clones* (services that need to + be active on multiple nodes), *promotable clones* (clones that can run in + one of two roles), and containerized services +* Unified, scriptable cluster management tools + +.. note:: Fencing + + *Fencing*, also known as *STONITH* (an acronym for Shoot The Other Node In + The Head), is the ability to ensure that it is not possible for a node to be + running a service. This is accomplished via *fence devices* such as + intelligent power switches that cut power to the target, or intelligent + network switches that cut the target's access to the local network. + + Pacemaker represents fence devices as a special class of resource. + + A cluster cannot safely recover from certain failure conditions, such as an + unresponsive node, without fencing. + +Cluster Architecture +____________________ + +At a high level, a cluster can be viewed as having these parts (which together +are often referred to as the *cluster stack*): + + * **Resources:** These are the reason for the cluster's being -- the services + that need to be kept highly available. + + * **Resource agents:** These are scripts or operating system components that + start, stop, and monitor resources, given a set of resource parameters. + These provide a uniform interface between Pacemaker and the managed + services. + + * **Fence agents:** These are scripts that execute node fencing actions, + given a target and fence device parameters. + + * **Cluster membership layer:** This component provides reliable messaging, + membership, and quorum information about the cluster. Currently, Pacemaker + supports `Corosync `_ as this layer. + + * **Cluster resource manager:** Pacemaker provides the brain that processes + and reacts to events that occur in the cluster. These events may include + nodes joining or leaving the cluster; resource events caused by failures, + maintenance, or scheduled activities; and other administrative actions. + To achieve the desired availability, Pacemaker may start and stop resources + and fence nodes. + + * **Cluster tools:** These provide an interface for users to interact with the + cluster. Various command-line and graphical (GUI) interfaces are available. + +Most managed services are not, themselves, cluster-aware. However, many popular +open-source cluster filesystems make use of a common *Distributed Lock +Manager* (DLM), which makes direct use of Corosync for its messaging and +membership capabilities and Pacemaker for the ability to fence nodes. + +.. image:: ../../shared/en-US/images/pcmk-stack.png + :alt: Example cluster stack + :scale: 75 % + :align: center + +Pacemaker Architecture +______________________ + +Pacemaker itself is composed of multiple daemons that work together: + +* pacemakerd +* pacemaker-attrd +* pacemaker-based +* pacemaker-controld +* pacemaker-execd +* pacemaker-fenced +* pacemaker-schedulerd + +.. image:: ../../shared/en-US/images/pcmk-internals.png + :alt: Pacemaker software components + :scale: 65 % + :align: center + +The Pacemaker master process (pacemakerd) spawns all the other daemons, and +respawns them if they unexpectedly exit. + +The *Cluster Information Base* (CIB) is an +`XML `_ representation of the cluster's +configuration and the state of all nodes and resources. The *CIB manager* +(pacemaker-based) keeps the CIB synchronized across the cluster, and handles +requests to modify it. + +The *attribute manager* (pacemaker-attrd) maintains a database of attributes +for all nodes, keeps it synchronized across the cluster, and handles requests +to modify them. These attributes are usually recorded in the CIB. + +Given a snapshot of the CIB as input, the *scheduler* (pacemaker-schedulerd) +determines what actions are necessary to achieve the desired state of the +cluster. + +The *local executor* (pacemaker-execd) handles requests to execute +resource agents on the local cluster node, and returns the result. + +The *fencer* (pacemaker-fenced) handles requests to fence nodes. Given a target +node, the fencer decides which cluster node(s) should execute which fencing +device(s), and calls the necessary fencing agents (either directly, or via +requests to the fencer peers on other nodes), and returns the result. + +The *controller* (pacemaker-controld) is Pacemaker's coordinator, maintaining a +consistent view of the cluster membership and orchestrating all the other +components. + +Pacemaker centralizes cluster decision-making by electing one of the controller +instances as the 'Designated Controller' ('DC'). Should the elected DC process +(or the node it is on) fail, a new one is quickly established. The DC responds +to cluster events by taking a current snapshot of the CIB, feeding it to the +scheduler, then asking the executors (either directly on the local node, or via +requests to controller peers on other nodes) and the fencer to execute any +necessary actions. + +.. note:: **Old daemon names** + + The Pacemaker daemons were renamed in version 2.0. You may still find + references to the old names, especially in documentation targeted to + version 1.1. + + .. table:: + + +-------------------+---------------------+ + | Old name | New name | + +===================+=====================+ + | attrd | pacemaker-attrd | + +-------------------+---------------------+ + | cib | pacemaker-based | + +-------------------+---------------------+ + | crmd | pacemaker-controld | + +-------------------+---------------------+ + | lrmd | pacemaker-execd | + +-------------------+---------------------+ + | stonithd | pacemaker-fenced | + +-------------------+---------------------+ + | pacemaker_remoted | pacemaker-remoted | + +-------------------+---------------------+ + +Node Redundancy Designs +_______________________ + +Pacemaker supports practically any `node redundancy configuration +`_ +including *Active/Active*, *Active/Passive*, *N+1*, *N+M*, *N-to-1* and +*N-to-N*. + +Active/passive clusters with two (or more) nodes using Pacemaker and +`DRBD `_ are +a cost-effective high-availability solution for many situations. One of the +nodes provides the desired services, and if it fails, the other node takes +over. + +.. image:: ../../shared/en-US/images/pcmk-active-passive.png + :alt: Active/Passive Redundancy + :align: center + :scale: 75 % + +Pacemaker also supports multiple nodes in a shared-failover design, reducing +hardware costs by allowing several active/passive clusters to be combined and +share a common backup node. + +.. image:: ../../shared/en-US/images/pcmk-shared-failover.png + :alt: Shared Failover + :align: center + :scale: 75 % + +When shared storage is available, every node can potentially be used for +failover. Pacemaker can even run multiple copies of services to spread out the +workload. This is sometimes called N to N Redundancy. + +.. image:: ../../shared/en-US/images/pcmk-active-active.png + :alt: N to N Redundancy + :align: center + :scale: 75 % + +.. rubric:: Footnotes + +.. [#] *Cluster* is sometimes used in other contexts to refer to hosts grouped + together for other purposes, such as high-performance computing (HPC), + but Pacemaker is not intended for those purposes.