diff --git a/doc/Clusters_from_Scratch/en-US/Ap-Corosync-Conf.txt b/doc/Clusters_from_Scratch/en-US/Ap-Corosync-Conf.txt
new file mode 100644
index 0000000000..0017fcec57
--- /dev/null
+++ b/doc/Clusters_from_Scratch/en-US/Ap-Corosync-Conf.txt
@@ -0,0 +1,68 @@
+[appendix]
+= Sample Corosync Configuration =
+
+.Sample Corosync.conf for a two-node cluster
+.....
+# Please read the Corosync.conf.5 manual page
+compatibility: whitetank
+
+totem {
+ version: 2
+
+ # How long before declaring a token lost (ms)
+ token: 5000
+
+ # How many token retransmits before forming a new configuration
+ token_retransmits_before_loss_const: 10
+
+ # How long to wait for join messages in the membership protocol (ms)
+ join: 1000
+
+ # How long to wait for consensus to be achieved before starting a new
+ # round of membership configuration (ms)
+ consensus: 6000
+
+ # Turn off the virtual synchrony filter
+ vsftype: none
+
+ # Number of messages that may be sent by one processor on receipt of the token
+ max_messages: 20
+
+ # Stagger sending the node join messages by 1..send_join ms
+ send_join: 45
+
+ # Limit generated nodeids to 31-bits (positive signed integers)
+ clear_node_high_bit: yes
+
+ # Disable encryption
+ secauth: off
+
+ # How many threads to use for encryption/decryption
+ threads: 0
+
+ # Optionally assign a fixed node id (integer)
+ # nodeid: 1234
+
+ interface {
+ ringnumber: 0
+
+ # The following values need to be set based on your environment
+ bindnetaddr: 192.168.122.0
+ mcastaddr: 226.94.1.1
+ mcastport: 4000
+ }
+}
+
+logging {
+ debug: off
+ fileline: off
+ to_syslog: yes
+ to_stderr: off
+ syslog_facility: daemon
+ timestamp: on
+}
+
+amf {
+ mode: disabled
+}
+.....
diff --git a/doc/Clusters_from_Scratch/en-US/Ap-Corosync-Conf.xml b/doc/Clusters_from_Scratch/en-US/Ap-Corosync-Conf.xml
deleted file mode 100644
index 094b5b2c31..0000000000
--- a/doc/Clusters_from_Scratch/en-US/Ap-Corosync-Conf.xml
+++ /dev/null
@@ -1,79 +0,0 @@
-
-
-%BOOK_ENTITIES;
-]>
-
- Sample Corosync Configuration
-
-
- Sample Corosync.conf for a two-node cluster
-
-
-
-
-
-
diff --git a/doc/Clusters_from_Scratch/en-US/Ap-Reading.txt b/doc/Clusters_from_Scratch/en-US/Ap-Reading.txt
new file mode 100644
index 0000000000..83017f2df8
--- /dev/null
+++ b/doc/Clusters_from_Scratch/en-US/Ap-Reading.txt
@@ -0,0 +1,12 @@
+[appendix]
+= Further Reading =
+
+- Project Website
+http://www.clusterlabs.org
+
+- Cluster Commands
+A comprehensive guide to cluster commands has been written by Novell and can be found at:
+ http://www.novell.com/documentation/sles11/book_sleha/index.html?page=/documentation/sles11/book_sleha/data/book_sleha.html
+
+- Corosync
+ http://www.corosync.org
diff --git a/doc/Clusters_from_Scratch/en-US/Ap-Reading.xml b/doc/Clusters_from_Scratch/en-US/Ap-Reading.xml
deleted file mode 100644
index 7dabd982b4..0000000000
--- a/doc/Clusters_from_Scratch/en-US/Ap-Reading.xml
+++ /dev/null
@@ -1,30 +0,0 @@
-
-
-%BOOK_ENTITIES;
-]>
-
- Further Reading
-
- Project Website
-
-
- http://www.clusterlabs.org
-
-
- Cluster Commands
-
-
- A comprehensive guide to cluster commands has been written by Novell and can be found at:
-
-
- http://www.novell.com/documentation/sles11/book_sleha/index.html?page=/documentation/sles11/book_sleha/data/book_sleha.html
-
-
- Corosync
-
-
- http://www.corosync.org
-
-
-
diff --git a/doc/Clusters_from_Scratch/en-US/Ch-Active-Passive.xml b/doc/Clusters_from_Scratch/en-US/Ch-Active-Passive.xml
deleted file mode 100644
index c2e89c7492..0000000000
--- a/doc/Clusters_from_Scratch/en-US/Ch-Active-Passive.xml
+++ /dev/null
@@ -1,319 +0,0 @@
-
-
-
-
-
- Creating an Active/Passive Cluster
-
-
-Exploring the Existing Configuration
-When Pacemaker starts up, it automatically records the number and details
-of the nodes in the cluster as well as which stack is being used and the
-version of Pacemaker being used.
-This is what the base configuration should look like.
-# crm configure show
-node pcmk-1
-node pcmk-2
-property $id="cib-bootstrap-options" \
- dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
- cluster-infrastructure="openais" \
- expected-quorum-votes="2"
-For those that are not of afraid of XML, you can see the raw
-configuration by appending "xml" to the previous command.
-# crm configure show xml
-<?xml version="1.0" ?>
-<cib admin_epoch="0" crm_feature_set="3.0.1" dc-uuid="pcmk-1" epoch="13" have-quorum="1" num_updates="7" validate-with="pacemaker-1.0">
- <configuration>
- <crm_config>
- <cluster_property_set id="cib-bootstrap-options">
- <nvpair id="cib-bootstrap-options-dc-version" name="dc-version" value="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f"/>
- <nvpair id="cib-bootstrap-options-cluster-infrastructure" name="cluster-infrastructure" value="openais"/>
- <nvpair id="cib-bootstrap-options-expected-quorum-votes" name="expected-quorum-votes" value="2"/>
- </cluster_property_set>
- </crm_config>
- <rsc_defaults/>
- <op_defaults/>
- <nodes>
- <node id="pcmk-1" type="normal" uname="pcmk-1"/>
- <node id="pcmk-2" type="normal" uname="pcmk-2"/>
- </nodes>
- <resources/>
- <constraints/>
- </configuration>
-</cib>
-Before we make any changes, its a good idea to check the validity of
-the configuration.
-# crm_verify -L
-crm_verify[2195]: 2009/08/27_16:57:12 ERROR: unpack_resources: Resource start-up disabled since no STONITH resources have been defined
-crm_verify[2195]: 2009/08/27_16:57:12 ERROR: unpack_resources: Either configure some or disable STONITH with the stonith-enabled option
-crm_verify[2195]: 2009/08/27_16:57:12 ERROR: unpack_resources: NOTE: Clusters with shared data need STONITH to ensure data integrity
-Errors found during check: config not valid -V may provide more details
-#
-As you can see, the tool has found some errors.
-In order to guarantee the safety of your data
-If the data is corrupt, there is little point in continuing to make it available
-, Pacemaker ships with STONITH
-A common node fencing mechanism. Used to ensure data integrity by powering off "bad" nodes
-enabled. However it also knows when no STONITH configuration has been
-supplied and reports this as a problem (since the cluster would not be
-able to make progress if a situation requiring node fencing arose).
-For now, we will disable this feature and configure it later in the
-Configuring STONITH section. It is important to note that the use of
-STONITH is highly encouraged, turning it off tells the cluster to
-simply pretend that failed nodes are safely powered off. Some vendors
-will even refuse to support clusters that have it disabled.
-To disable STONITH, we set the stonith-enabled cluster option to
-false.
-# crm configure property stonith-enabled=false
-# crm_verify -L
-With the new cluster option set, the configuration is now valid.
-
-The use of stonith-enabled=false is completely inappropriate for a
-production cluster. We use it here to defer the discussion of its
-configuration which can differ widely from one installation to the
-next. See for information on why STONITH is important
-and details on how to configure it.
-
-
-
-Adding a Resource
-The first thing we should do is configure an IP address. Regardless of
-where the cluster service(s) are running, we need a consistent address
-to contact them on. Here I will choose and add 192.168.122.101 as the
-floating address, give it the imaginative name ClusterIP and tell the
-cluster to check that its running every 30 seconds.
-
-The chosen address must not be one already associated with
-a physical node
-
-# crm configure primitive ClusterIP ocf:heartbeat:IPaddr2 \
- params ip=192.168.122.101 cidr_netmask=32 \
- op monitor interval=30s
-The other important piece of information here is ocf:heartbeat:IPaddr2.
-This tells Pacemaker three things about the resource you want to
-add. The first field, ocf, is the standard to which the resource
-script conforms to and where to find it. The second field is specific
-to OCF resources and tells the cluster which namespace to find the
-resource script in, in this case heartbeat. The last field indicates
-the name of the resource script.
-To obtain a list of the available resource classes, run
-# crm ra classesheartbeat
-lsb ocf / heartbeat pacemakerstonith
-To then find all the OCF resource agents provided by Pacemaker and
-Heartbeat, run
-# crm ra list ocf pacemaker
-ClusterMon Dummy Stateful SysInfo SystemHealth controld
-ping pingd
-# crm ra list ocf heartbeat
-AoEtarget AudibleAlarm ClusterMon Delay
-Dummy EvmsSCC Evmsd Filesystem
-ICP IPaddr IPaddr2 IPsrcaddr
-LVM LinuxSCSI MailTo ManageRAID
-ManageVE Pure-FTPd Raid1 Route
-SAPDatabase SAPInstance SendArp ServeRAID
-SphinxSearchDaemon Squid Stateful SysInfo
-VIPArip VirtualDomain WAS WAS6
-WinPopup Xen Xinetd anything
-apache db2 drbd eDir88
-iSCSILogicalUnit iSCSITarget ids iscsi
-ldirectord mysql mysql-proxy nfsserver
-oracle oralsnr pgsql pingd
-portblock rsyncd scsi2reservation sfex
-tomcat vmware
-#
-Now verify that the IP resource has been added and display the cluster’s
-status to see that it is now active.
-# crm configure shownode pcmk-1
-node pcmk-2primitive ClusterIP ocf:heartbeat:IPaddr2 \
- params ip="192.168.122.101" cidr_netmask="32" \
- op monitor interval="30s"
-property $id="cib-bootstrap-options" \
- dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
- cluster-infrastructure="openais" \
- expected-quorum-votes="2" \
- stonith-enabled="false" \
-# crm_mon
-============
-Last updated: Fri Aug 28 15:23:48 2009
-Stack: openais
-Current DC: pcmk-1 - partition with quorum
-Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
-2 Nodes configured, 2 expected votes
-1 Resources configured.
-============
-
-Online: [ pcmk-1 pcmk-2 ]
-ClusterIP (ocf::heartbeat:IPaddr): Started pcmk-1
-
-
-
diff --git a/doc/Clusters_from_Scratch/en-US/Ch-Stonith.xml b/doc/Clusters_from_Scratch/en-US/Ch-Stonith.xml
deleted file mode 100644
index 5033e4c475..0000000000
--- a/doc/Clusters_from_Scratch/en-US/Ch-Stonith.xml
+++ /dev/null
@@ -1,231 +0,0 @@
-
-
-
-
-
- Configure STONITH
-
-
-What Is STONITH
-STONITH is an acronym for Shoot-The-Other-Node-In-The-Head and it
-protects your data from being corrupted by rogue nodes or concurrent
-access.
-Just because a node is unresponsive, this doesn’t mean it isn’t
-accessing your data. The only way to be 100% sure that your data is
-safe, is to use STONITH so we can be certain that the node is truly
-offline, before allowing the data to be accessed from another node.
-STONITH also has a role to play in the event that a clustered service
-cannot be stopped. In this case, the cluster uses STONITH to force the
-whole node offline, thereby making it safe to start the service
-elsewhere.
-
-
-What STONITH Device Should You Use
-It is crucial that the STONITH device can allow the cluster to
-differentiate between a node failure and a network one.
-The biggest mistake people make in choosing a STONITH device is to
-use remote power switch (such as many on-board IMPI controllers) that
-shares power with the node it controls. In such cases, the cluster
-cannot be sure if the node is really offline, or active and suffering
-from a network fault.
-Likewise, any device that relies on the machine being active (such as
-SSH-based "devices" used during testing) are inappropriate.
-
-
-Configuring STONITH
-
-
-
-Find the correct driver: stonith_admin --list-installed
-
-
-
-
-Since every device is different, the parameters needed to configure
- it will vary. To find out the parameters associated with the device,
- run: stonith_admin --metadata --agent type
-
-The output should be XML formatted text containing additional
-parameter descriptions. We will endevor to make the output more
-friendly in a later version.
-
-
-
-Enter the shell crm Create an editable copy of the existing
- configuration cib new stonith Create a fencing resource containing a
- primitive resource with a class of stonith, a type of type and a
- parameter for each of the values returned in step 2: configure
- primitive …
-
-
-
-
-If the device does not know how to fence nodes based on their uname,
- you may also need to set the special pcmk_host_map parameter. See
- man stonithd for details.
-
-
-
-
-If the device does not support the list command, you may also need
- to set the special pcmk_host_list and/or pcmk_host_check
- parameters. See man stonithd for details.
-
-
-
-
-If the device does not expect the victim to be specified with the
- port parameter, you may also need to set the special
- pcmk_host_argument parameter. See man stonithd for details.
-
-
-
-
-Upload it into the CIB from the shell: cib commit stonith
-
-
-
-
-Once the stonith resource is running, you can test it by executing:
- stonith_admin --reboot nodename. Although you might want to stop the
- cluster on that machine first.
-
-
-
-
-
-Example
-Assuming we have an chassis containing four nodes and an IPMI device
-active on 10.0.0.1, then we would chose the fence_ipmilan driver in step
-2 and obtain the following list of parameters
-# stonith_admin --metadata -a fence_ipmilan
-<?xml version="1.0" ?>
-<resource-agent name="fence_ipmilan" shortdesc="Fence agent for IPMI over LAN">
-<longdesc>
-fence_ipmilan is an I/O Fencing agent which can be used with machines controlled by IPMI. This agent calls support software using ipmitool (http://ipmitool.sf.net/).
-
-To use fence_ipmilan with HP iLO 3 you have to enable lanplus option (lanplus / -P) and increase wait after operation to 4 seconds (power_wait=4 / -T 4)</longdesc>
-<parameters>
- <parameter name="auth" unique="1">
- <getopt mixed="-A" />
- <content type="string" />
- <shortdesc >IPMI Lan Auth type (md5, password, or none)</shortdesc>
- </parameter>
- <parameter name="ipaddr" unique="1">
- <getopt mixed="-a" />
- <content type="string" />
- <shortdesc >IPMI Lan IP to talk to</shortdesc>
- </parameter>
- <parameter name="passwd" unique="1">
- <getopt mixed="-p" />
- <content type="string" />
- <shortdesc >Password (if required) to control power on IPMI device</shortdesc>
- </parameter>
- <parameter name="passwd_script" unique="1">
- <getopt mixed="-S" />
- <content type="string" />
- <shortdesc >Script to retrieve password (if required)</shortdesc>
- </parameter>
- <parameter name="lanplus" unique="1">
- <getopt mixed="-P" />
- <content type="boolean" />
- <shortdesc >Use Lanplus</shortdesc>
- </parameter>
- <parameter name="login" unique="1">
- <getopt mixed="-l" />
- <content type="string" />
- <shortdesc >Username/Login (if required) to control power on IPMI device</shortdesc>
- </parameter>
- <parameter name="action" unique="1">
- <getopt mixed="-o" />
- <content type="string" default="reboot"/>
- <shortdesc >Operation to perform. Valid operations: on, off, reboot, status, list, diag, monitor or metadata</shortdesc>
- </parameter>
- <parameter name="timeout" unique="1">
- <getopt mixed="-t" />
- <content type="string" />
- <shortdesc >Timeout (sec) for IPMI operation</shortdesc>
- </parameter>
- <parameter name="cipher" unique="1">
- <getopt mixed="-C" />
- <content type="string" />
- <shortdesc >Ciphersuite to use (same as ipmitool -C parameter)</shortdesc>
- </parameter>
- <parameter name="method" unique="1">
- <getopt mixed="-M" />
- <content type="string" default="onoff"/>
- <shortdesc >Method to fence (onoff or cycle)</shortdesc>
- </parameter>
- <parameter name="power_wait" unique="1">
- <getopt mixed="-T" />
- <content type="string" default="2"/>
- <shortdesc >Wait X seconds after on/off operation</shortdesc>
- </parameter>
- <parameter name="delay" unique="1">
- <getopt mixed="-f" />
- <content type="string" />
- <shortdesc >Wait X seconds before fencing is started</shortdesc>
- </parameter>
- <parameter name="verbose" unique="1">
- <getopt mixed="-v" />
- <content type="boolean" />
- <shortdesc >Verbose mode</shortdesc>
- </parameter>
-</parameters>
-<actions>
- <action name="on" />
- <action name="off" />
- <action name="reboot" />
- <action name="status" />
- <action name="diag" />
- <action name="list" />
- <action name="monitor" />
- <action name="metadata" />
-</actions>
-</resource-agent>
-from which we would create a STONITH resource fragment that might look
-like this
-# crm crm(live)# cib new stonith
-INFO: stonith shadow CIB created
-crm(stonith)# configure primitive impi-fencing stonith::fence_ipmilan \
- params pcmk_host_list="pcmk-1 pcmk-2" ipaddr=10.0.0.1 login=testuser passwd=abc123 \
- op monitor interval="60s"
-And finally, since we disabled it earlier, we need to re-enable STONITH.
-At this point we should have the following configuration..
-crm(stonith)# configure property stonith-enabled="true"crm(stonith)# configure shownode pcmk-1
-node pcmk-2
-primitive WebData ocf:linbit:drbd \
- params drbd_resource="wwwdata" \
- op monitor interval="60s"
-primitive WebFS ocf:heartbeat:Filesystem \
- params device="/dev/drbd/by-res/wwwdata" directory="/var/www/html" fstype="gfs2"
-primitive WebSite ocf:heartbeat:apache \
- params configfile="/etc/httpd/conf/httpd.conf" \
- op monitor interval="1min"
-primitive ClusterIP ocf:heartbeat:IPaddr2 \
- params ip="192.168.122.101" cidr_netmask="32" clusterip_hash="sourceip" \
- op monitor interval="30s"primitive ipmi-fencing stonith::fence_ipmilan \ params pcmk_host_list="pcmk-1 pcmk-2" ipaddr=10.0.0.1 login=testuser passwd=abc123 \ op monitor interval="60s"ms WebDataClone WebData \
- meta master-max="2" master-node-max="1" clone-max="2" clone-node-max="1" notify="true"
-clone WebFSClone WebFS
-clone WebIP ClusterIP \
- meta globally-unique="true" clone-max="2" clone-node-max="2"
-clone WebSiteClone WebSite
-colocation WebSite-with-WebFS inf: WebSiteClone WebFSClone
-colocation fs_on_drbd inf: WebFSClone WebDataClone:Master
-colocation website-with-ip inf: WebSiteClone WebIP
-order WebFS-after-WebData inf: WebDataClone:promote WebFSClone:start
-order WebSite-after-WebFS inf: WebFSClone WebSiteClone
-order apache-after-ip inf: WebIP WebSiteClone
-property $id="cib-bootstrap-options" \
- dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
- cluster-infrastructure="openais" \
- expected-quorum-votes="2" \
- stonith-enabled="true" \
- no-quorum-policy="ignore"
-rsc_defaults $id="rsc-options" \
- resource-stickiness="100"
-crm(stonith)# cib commit stonithINFO: commited 'stonith' shadow CIB to the cluster
-crm(stonith)# quit
-bye
-
-
diff --git a/doc/Clusters_from_Scratch/en-US/Ch-Tools.txt b/doc/Clusters_from_Scratch/en-US/Ch-Tools.txt
index 8f1d6bc767..d946d0f19a 100644
--- a/doc/Clusters_from_Scratch/en-US/Ch-Tools.txt
+++ b/doc/Clusters_from_Scratch/en-US/Ch-Tools.txt
@@ -1,123 +1,125 @@
-= Using Pacemaker Tools =
+= Pacemaker Tools =
+
+== Using Pacemaker Tools ==
In the dark past, configuring Pacemaker required the administrator to
read and write XML. In true UNIX style, there were also a number of
different commands that specialized in different aspects of querying
and updating the cluster.
Since Pacemaker 1.0, this has all changed and we have an integrated,
scriptable, cluster shell that hides all the messy XML scaffolding. It
even allows you to queue up several changes at once and commit them
atomically.
Take some time to familiarize yourself with what it can do.
....
# crm --help
usage:
crm [-D display_type]
crm [-D display_type] args
crm [-D display_type] [-f file]
Use crm without arguments for an interactive session.
Supply one or more arguments for a "single-shot" use.
Specify with -f a file which contains a script. Use '-' for
standard input or use pipe/redirection.
crm displays cli format configurations using a color scheme
and/or in uppercase. Pick one of "color" or "uppercase", or
use "-D color,uppercase" if you want colorful uppercase.
Get plain output by "-D plain". The default may be set in
user preferences (options).
Examples:
# crm -f stopapp2.cli
# crm < stopapp2.cli
# crm resource stop global_www
# crm status
....
The primary tool for monitoring the status of the cluster is crm_mon
(also available as crm status). It can be run in a variety of modes
and has a number of output options. To find out about any of the tools
that come with Pacemaker, simply invoke them with the --help option or
consult the included man pages. Both sets of output are created from
the tool, and so will always be in sync with each other and the tool
itself.
Additionally, the Pacemaker version and supported cluster stack(s) is
available via the --version option.
....
# crm_mon --version
Pacemaker 1.1.5
Written by Andrew Beekhof
# crm_mon --help
crm_mon - Provides a summary of cluster's current state.
Outputs varying levels of detail in a number of different formats.
Usage: crm_mon mode [options]
Options:
-?, --help This text
-$, --version Version information
-V, --verbose Increase debug output
Modes:
-h, --as-html=value Write cluster status to the named file
-w, --web-cgi Web mode with output suitable for cgi
-s, --simple-status Display the cluster status once as a simple one line output (suitable for nagios)
-S, --snmp-traps=value Send SNMP traps to this station
-T, --mail-to=value Send Mail alerts to this user. See also --mail-from, --mail-host, --mail-prefix
Display Options:
-n, --group-by-node Group resources by node
-r, --inactive Display inactive resources
-f, --failcounts Display resource fail counts
-o, --operations Display resource operation history
-t, --timing-details Display resource operation history with timing details
Additional Options:
-i, --interval=value Update frequency in seconds
-1, --one-shot Display the cluster status once on the console and exit
-N, --disable-ncurses Disable the use of ncurses
-d, --daemonize Run in the background as a daemon
-p, --pid-file=value (Advanced) Daemon pid file location
-F, --mail-from=value Mail alerts should come from the named user
-H, --mail-host=value Mail alerts should be sent via the named host
-P, --mail-prefix=value Subjects for mail alerts should start with this string
-E, --external-agent=value A program to run when resource operations take place.
-e, --external-recipient=valueA recipient for your program (assuming you want the program to send something to someone).
Examples:
Display the cluster´s status on the console with updates as they occur:
# crm_mon
Display the cluster´s status on the console just once then exit:
# crm_mon -1
Display your cluster´s status, group resources by node, and include inactive resources in the list:
# crm_mon --group-by-node --inactive
Start crm_mon as a background daemon and have it write the cluster´s status to an HTML file:
# crm_mon --daemonize --as-html /path/to/docroot/filename.html
Start crm_mon as a background daemon and have it send email alerts:
# crm_mon --daemonize --mail-to user@example.com --mail-host mail.example.com
Start crm_mon as a background daemon and have it send SNMP alerts:
# crm_mon --daemonize --snmp-traps snmptrapd.example.com
Report bugs to pacemaker@oss.clusterlabs.org
....
[NOTE]
======
If the SNMP and/or email options are not listed, then Pacemaker was not
built to support them. This may be by the choice of your distribution or
the required libraries may not have been available. Please contact
whoever supplied you with the packages for more details.
======