Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/doc/Clusters_from_Scratch/en-US/Ap-Configuration.xml b/doc/Clusters_from_Scratch/en-US/Ap-Configuration.xml
index 0add20ae3e..268a065256 100644
--- a/doc/Clusters_from_Scratch/en-US/Ap-Configuration.xml
+++ b/doc/Clusters_from_Scratch/en-US/Ap-Configuration.xml
@@ -1,274 +1,274 @@
<?xml version='1.0' encoding='utf-8' ?>
<!DOCTYPE appendix PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
<!ENTITY % BOOK_ENTITIES SYSTEM "Clusters_from_Scratch.ent">
%BOOK_ENTITIES;
]>
<appendix>
<title>Configuration Recap</title>
<section>
<title>Final Cluster Configuration</title>
<screen>
[root@pcmk-1 ~]# crm configure show
node pcmk-1
node pcmk-2
primitive WebData ocf:linbit:drbd \
        params drbd_resource="wwwdata" \
        op monitor interval="60s"
primitive WebFS ocf:heartbeat:Filesystem \
        params device="/dev/drbd/by-res/wwwdata" directory="/var/www/html" fstype=”gfs2”
primitive WebSite ocf:heartbeat:apache \
        params configfile="/etc/httpd/conf/httpd.conf" \
        op monitor interval="1min"
primitive ClusterIP ocf:heartbeat:IPaddr2 \
        params ip=”192.168.122.101” cidr_netmask=”32” clusterip_hash=”sourceip” \
        op monitor interval="30s"
primitive dlm ocf:pacemaker:controld \
        op monitor interval="120s"
primitive gfs-control ocf:pacemaker:controld \
   params daemon=”gfs_controld.pcmk” args=”-g 0” \
        op monitor interval="120s"
primitive rsa-fencing stonith::external/ibmrsa \
        params hostname=”pcmk-1 pcmk-2" ipaddr=192.168.122.31 userid=mgmt passwd=abc123 type=ibm \
        op monitor interval="60s"
ms WebDataClone WebData \
        meta master-max="2" master-node-max="1" clone-max="2" clone-node-max="1" notify="true"
clone Fencing rsa-fencing
clone WebFSClone WebFS
clone WebIP ClusterIP  \
        meta globally-unique=”true” clone-max=”2” clone-node-max=”2”
clone WebSiteClone WebSite
clone dlm-clone dlm \
        meta interleave="true"
clone gfs-clone gfs-control \
        meta interleave="true"
colocation WebFS-with-gfs-control inf: WebFSClone gfs-clone
colocation WebSite-with-WebFS inf: WebSiteClone WebFSClone
colocation fs_on_drbd inf: WebFSClone WebDataClone:Master
colocation gfs-with-dlm inf: gfs-clone dlm-clone
colocation website-with-ip inf: WebSiteClone WebIP
order WebFS-after-WebData inf: WebDataClone:promote WebFSClone:start
order WebSite-after-WebFS inf: WebFSClone WebSiteClone
order apache-after-ip inf: WebIP WebSiteClone
order start-WebFS-after-gfs-control inf: gfs-clone WebFSClone
order start-gfs-after-dlm inf: dlm-clone gfs-clone
property $id="cib-bootstrap-options" \
-        dc-version="1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7" \
+        dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
        cluster-infrastructure="openais" \
        expected-quorum-votes=”2” \
        stonith-enabled=”true” \
        no-quorum-policy="ignore"
rsc_defaults $id="rsc-options" \
        resource-stickiness=”100”
</screen>
</section>
<section>
<title>Node List</title>
<para>
The list of cluster nodes is automatically populated by the cluster.
</para>
<screen>
node pcmk-1
node pcmk-2
</screen>
</section>
<section>
<title>Cluster Options</title>
<para>
This is where the cluster automatically stores some information about the cluster
</para>
<orderedlist>
<listitem>
<para>
dc-version - the version (including upstream source-code hash) of Pacemaker used on the DC
</para>
</listitem>
<listitem>
<para>
cluster-infrastructure - the cluster infrastructure being used (heartbeat or openais)
</para>
</listitem>
<listitem>
<para>
expected-quorum-votes - the maximum number of nodes expected to be part of the cluster
</para>
</listitem>
</orderedlist>
<para>
and where the admin can set options that control the way the cluster operates
</para>
<orderedlist>
<listitem>
<para>
stonith-enabled=true - Make use of STONITH
</para>
</listitem>
<listitem>
<para>
no-quorum-policy=ignore - Ignore loss of quorum and continue to host resources.
</para>
</listitem>
</orderedlist>
<screen>
property $id="cib-bootstrap-options" \
-        dc-version="1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7" \
+        dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
        cluster-infrastructure="openais" \
        expected-quorum-votes=”2” \
        stonith-enabled=”true” \
        no-quorum-policy="ignore"
</screen>
</section>
<section>
<title>Resources</title>
<section>
<title>Default Options</title>
<para>
Here we configure cluster options that apply to every resource.
</para>
<orderedlist>
<listitem>
<para>
resource-stickiness - Specify the aversion to moving resources to other machines
</para>
</listitem>
</orderedlist>
<screen>
rsc_defaults $id="rsc-options" \
        resource-stickiness=”100”
</screen>
</section>
<section>
<title>Fencing</title>
<para>
<note>
<para>
TODO: Add text here
</para>
</note>
</para>
<screen>
primitive rsa-fencing stonith::external/ibmrsa \
        params hostname=”pcmk-1 pcmk-2" ipaddr=192.168.122.31 userid=mgmt passwd=abc123 type=ibm \
        op monitor interval="60s"
clone Fencing rsa-fencing
</screen>
</section>
<section>
<title>Service Address</title>
<para>
Users of the services provided by the cluster require an unchanging address with which to access it. Additionally, we cloned the address so it will be active on both nodes. An iptables rule (created as part of the resource agent) is used to ensure that each request only processed by one of the two clone instances. The additional meta options tell the cluster that we want two instances of the clone (one “request bucket” for each node) and that if one node fails, then the remaining node should hold both.
</para>
<screen>
primitive ClusterIP ocf:heartbeat:IPaddr2 \
        params ip=”192.168.122.101” cidr_netmask=”32” clusterip_hash=”sourceip” \
        op monitor interval="30s"
clone WebIP ClusterIP  
        meta globally-unique=”true” clone-max=”2” clone-node-max=”2”
</screen>
<note>
<para>
TODO: The RA should check for globally-unique=true when cloned
</para>
</note>
</section>
<section>
<title>Distributed lock manager</title>
<para>
Cluster filesystems like GFS2 require a lock manager. This service starts the daemon that provides user-space applications (such as the GFS2 daemon) with access to the in-kernel lock manager. Since we need it to be available on all nodes in the cluster, we have it cloned.
</para>
<screen>
primitive dlm ocf:pacemaker:controld \
        op monitor interval="120s"
clone dlm-clone dlm \
        meta interleave="true
</screen>
<note>
<para>
TODO: Confirm <literal>interleave</literal> is no longer needed
</para>
</note>
</section>
<section>
<title>GFS control daemon</title>
<para>
GFS2 also needs a user-space/kernel bridge that runs on every node. So here we have another clone, however this time we must also specify that it can only run on machines that are also running the DLM (colocation constraint) and that it can only be started after the DLM is running (order constraint). Additionally, the gfs-control clone should only care about the DLM instances it is paired with, so we need to set the interleave option.
</para>
<screen>
primitive gfs-control ocf:pacemaker:controld \
   params daemon=”gfs_controld.pcmk” args=”-g 0” \
        op monitor interval="120s"
clone gfs-clone gfs-control \
        meta interleave="true"
colocation gfs-with-dlm inf: gfs-clone dlm-clone
order start-gfs-after-dlm inf: dlm-clone gfs-clone
</screen>
</section>
<section>
<title>DRBD - Shared Storage</title>
<para>
Here we define the DRBD service and specify which DRBD resource (from drbd.conf) it should manage. We make it a master/slave resource and, in order to have an active/active setup, allow both instances to be promoted by specifying master-max=2. We also set the notify option so that the cluster will tell DRBD agent when it’s peer changes state.
</para>
<screen>
primitive WebData ocf:linbit:drbd \
        params drbd_resource="wwwdata" \
        op monitor interval="60s"
ms WebDataClone WebData \
        meta master-max="2" master-node-max="1" clone-max="2" clone-node-max="1" notify="true"
</screen>
</section>
<section>
<title>Cluster Filesystem</title>
<para>
The cluster filesystem ensures that files are read and written correctly. We need to specify the block device (provided by DRBD), where we want it mounted and that we are using GFS2. Again it is a clone because it is intended to be active on both nodes. The additional constraints ensure that it can only be started on nodes with active gfs-control and drbd instances.
</para>
<screen>
primitive WebFS ocf:heartbeat:Filesystem \
        params device="/dev/drbd/by-res/wwwdata" directory="/var/www/html" fstype=”gfs2”
clone WebFSClone WebFS
colocation WebFS-with-gfs-control inf: WebFSClone gfs-clone
colocation fs_on_drbd inf: WebFSClone WebDataClone:Master
order WebFS-after-WebData inf: WebDataClone:promote WebFSClone:start
order start-WebFS-after-gfs-control inf: gfs-clone WebFSClone
</screen>
</section>
<section>
<title>Apache</title>
<para>
Lastly we have the actual service, Apache. We need only tell the cluster where to find it’s main configuration file and restrict it to running on nodes that have the required filesystem mounted and the IP address active.
</para>
<screen>
primitive WebSite ocf:heartbeat:apache \
        params configfile="/etc/httpd/conf/httpd.conf" \
        op monitor interval="1min"
clone WebSiteClone WebSite
colocation WebSite-with-WebFS inf: WebSiteClone WebFSClone
colocation website-with-ip inf: WebSiteClone WebIP
order apache-after-ip inf: WebIP WebSiteClone
order WebSite-after-WebFS inf: WebFSClone WebSiteClone
</screen>
</section>
</section>
</appendix>
diff --git a/doc/Clusters_from_Scratch/en-US/Ap-Corosync-Conf.xml b/doc/Clusters_from_Scratch/en-US/Ap-Corosync-Conf.xml
index e162b643e3..094b5b2c31 100644
--- a/doc/Clusters_from_Scratch/en-US/Ap-Corosync-Conf.xml
+++ b/doc/Clusters_from_Scratch/en-US/Ap-Corosync-Conf.xml
@@ -1,74 +1,79 @@
<?xml version='1.0' encoding='utf-8' ?>
<!DOCTYPE appendix PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
<!ENTITY % BOOK_ENTITIES SYSTEM "Clusters_from_Scratch.ent">
%BOOK_ENTITIES;
]>
<appendix id="ap-corosync-conf">
- <title>Sample Corosync.conf</title>
+ <title>Sample Corosync Configuration</title>
- <screen>
+ <example>
+ <title>Sample Corosync.conf for a two-node cluster</title>
+ <programlisting>
+ <![CDATA[
# Please read the Corosync.conf.5 manual page
compatibility: whitetank
totem {
        version: 2
        # How long before declaring a token lost (ms)
        token:          5000
        # How many token retransmits before forming a new configuration
        token_retransmits_before_loss_const: 10
        # How long to wait for join messages in the membership protocol (ms)
        join:           1000
        # How long to wait for consensus to be achieved before starting a new
        # round of membership configuration (ms)
        consensus:      6000
        # Turn off the virtual synchrony filter
        vsftype:        none
        # Number of messages that may be sent by one processor on receipt of the token
        max_messages:   20
        # Stagger sending the node join messages by 1..send_join ms
        send_join: 45
        # Limit generated nodeids to 31-bits (positive signed integers)
        clear_node_high_bit: yes
        # Disable encryption
        secauth:        off
        # How many threads to use for encryption/decryption
        threads:           0
        # Optionally assign a fixed node id (integer)
        # nodeid:         1234
        interface {
                ringnumber: 0
                # The following values need to be set based on your environment
                bindnetaddr: 192.168.122.0
                mcastaddr: 226.94.1.1
                mcastport: 4000
        }
}
logging {
        debug: off
        fileline: off
        to_syslog: yes
        to_stderr: off
        syslog_facility: daemon
        timestamp: on
}
amf {
        mode: disabled
}
- </screen>
+ ]]>
+ </programlisting>
+ </example>
</appendix>
diff --git a/doc/Clusters_from_Scratch/en-US/Ch-Active-Passive.xml b/doc/Clusters_from_Scratch/en-US/Ch-Active-Passive.xml
index 1a4373ba29..9f508bc07e 100644
--- a/doc/Clusters_from_Scratch/en-US/Ch-Active-Passive.xml
+++ b/doc/Clusters_from_Scratch/en-US/Ch-Active-Passive.xml
@@ -1,401 +1,408 @@
<?xml version='1.0' encoding='utf-8' ?>
<!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
<!ENTITY % BOOK_ENTITIES SYSTEM "Clusters_from_Scratch.ent">
%BOOK_ENTITIES;
]>
<chapter>
<title>Creating an Active/Passive Cluster</title>
<section>
<title>Exploring the Existing Configuration</title>
<para>
When Pacemaker starts up, it automatically records the number and details of the nodes in the cluster as well as which stack is being used and the version of Pacemaker being used.
</para>
<para>
This is what the base configuration should look like.
</para>
<screen>
[root@pcmk-2 ~]# <userinput>crm configure show</userinput>
node pcmk-1
node pcmk-2
property $id="cib-bootstrap-options" \
-        dc-version="1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7" \
+        dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
        cluster-infrastructure="openais" \
        expected-quorum-votes="2"
</screen>
<para>
For those that are not of afraid of XML, you can see the raw configuration by appending “xml” to the previous command.
</para>
<screen>
[root@pcmk-2 ~]# <userinput>crm configure show xml</userinput>
&lt;?xml version="1.0" ?&gt;
&lt;cib admin_epoch="0" crm_feature_set="3.0.1" dc-uuid="pcmk-1" epoch="13" have-quorum="1" num_updates="7" validate-with="pacemaker-1.0"&gt;
  &lt;configuration&gt;
    &lt;crm_config&gt;
      &lt;cluster_property_set id="cib-bootstrap-options"&gt;
-        &lt;nvpair id="cib-bootstrap-options-dc-version" name="dc-version" value="1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7"/&gt;
+        &lt;nvpair id="cib-bootstrap-options-dc-version" name="dc-version" value="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f"/&gt;
        &lt;nvpair id="cib-bootstrap-options-cluster-infrastructure" name="cluster-infrastructure" value="openais"/&gt;
        &lt;nvpair id="cib-bootstrap-options-expected-quorum-votes" name="expected-quorum-votes" value="2"/&gt;
      &lt;/cluster_property_set&gt;
    &lt;/crm_config&gt;
    &lt;rsc_defaults/&gt;
    &lt;op_defaults/&gt;
    &lt;nodes&gt;
      &lt;node id="pcmk-1" type="normal" uname="pcmk-1"/&gt;
      &lt;node id="pcmk-2" type="normal" uname="pcmk-2"/&gt;
    &lt;/nodes&gt;
    &lt;resources/&gt;
    &lt;constraints/&gt;
  &lt;/configuration&gt;
&lt;/cib&gt;
</screen>
<para>
The last XML you’ll see in this document
</para>
<para>
Before we make any changes, its a good idea to check the validity of the configuration.
</para>
<screen>
[root@pcmk-1 ~]# <userinput>crm_verify -L</userinput>
crm_verify[2195]: 2009/08/27_16:57:12 ERROR: unpack_resources: <emphasis>Resource start-up disabled since no STONITH resources have been defined</emphasis>
crm_verify[2195]: 2009/08/27_16:57:12 ERROR: unpack_resources: Either configure some or disable STONITH with the stonith-enabled option
crm_verify[2195]: 2009/08/27_16:57:12 ERROR: unpack_resources: NOTE: Clusters with shared data need STONITH to ensure data integrity
<emphasis>Errors found during check: config not valid</emphasis>
  -V may provide more details
[root@pcmk-1 ~]#
</screen>
<para>
As you can see, the tool has found some errors.
</para>
<para>
In order to guarantee the safety of your data <footnote>
<para>
If the data is corrupt, there is little point in continuing to make it available
</para>
</footnote> , Pacemaker ships with STONITH <footnote>
<para>
A common node fencing mechanism. Used to ensure data integrity by powering off “bad” nodes.
</para>
</footnote> enabled. However it also knows when no STONITH configuration has been supplied and reports this as a problem (since the cluster would not be able to make progress if a situation requiring node fencing arose).
</para>
<para>
For now, we will disable this feature and configure it later in the Configuring STONITH section. It is important to note that the use of STONITH is highly encouraged, turning it off tells the cluster to simply pretend that failed nodes are safely powered off. Some vendors will even refuse to support clusters that have it disabled.
</para>
<para>
To disable STONITH, we set the stonith-enabled cluster option to false.
</para>
<blockquote>
<para>
<userinput>crm configure property stonith-enabled=false</userinput>
</para>
<para>
<userinput>crm_verify -L</userinput>
</para>
</blockquote>
<para>
With the new cluster option set, the configuration is now valid.
</para>
+ <warning>
+ <para>
+ The use of <literal>stonith-enabled=false</literal> is completely inappropriate for a production cluster.
+ We use it here to defer the discussion of its configuration which can differ widely from one installation to the next.
+ See <xref linkend="ch-stonith"/> for information on why STONITH is important and details on how to configure it.
+ </para>
+ </warning>
</section>
<section>
<title>Adding a Resource</title>
<para>
The first thing we should do is configure an IP address. Regardless of where the cluster service(s) are running, we need a consistent address to contact them on. Here I will choose and add 192.168.122.101 as the floating address, give it the imaginative name ClusterIP and tell the cluster to check that its running every 30 seconds.
</para>
<important>
<para>
The chosen address must not be one already associated with a physical node
</para>
</important>
<screen>
<userinput>crm configure primitive ClusterIP ocf:heartbeat:IPaddr2 \ </userinput>
        <userinput>params ip=192.168.122.101 cidr_netmask=32 \ </userinput>
        <userinput>op monitor interval=30s</userinput>
</screen>
<para>
The other important piece of information here is ocf:heartbeat:IPaddr2. This tells Pacemaker three things about the resource you want to add. The first field, ocf, is the standard to which the resource script conforms to and where to find it. The second field is specific to OCF resources and tells the cluster which namespace to find the resource script in, in this case heartbeat. The last field indicates the name of the resource script.
</para>
<para>
To obtain a list of the available resource classes, run
</para>
<screen>
[root@pcmk-1 ~]# <userinput>crm ra classes</userinput>
heartbeat
lsb
<emphasis>ocf / heartbeat pacemaker</emphasis>
stonith
</screen>
<para>
To then find all the OCF resource agents provided by Pacemaker and Heartbeat, run
</para>
<screen>
[root@pcmk-1 ~]# <userinput>crm ra list ocf pacemaker</userinput>
ClusterMon     Dummy          Stateful       SysInfo        SystemHealth   controld
ping           pingd          
[root@pcmk-1 ~]# <userinput>crm ra list ocf heartbeat</userinput>
AoEtarget              AudibleAlarm           ClusterMon             Delay
Dummy                  EvmsSCC                Evmsd                  Filesystem
ICP                    IPaddr                 IPaddr2                IPsrcaddr
LVM                    LinuxSCSI              MailTo                 ManageRAID
ManageVE               Pure-FTPd              Raid1                  Route
SAPDatabase            SAPInstance            SendArp                ServeRAID
SphinxSearchDaemon     Squid                  Stateful               SysInfo
VIPArip                VirtualDomain          WAS                    WAS6
WinPopup               Xen                    Xinetd                 anything
apache                 db2                    drbd                   eDir88
iSCSILogicalUnit       iSCSITarget            ids                    iscsi
ldirectord             mysql                  mysql-proxy            nfsserver
oracle                 oralsnr                pgsql                  pingd
portblock              rsyncd                 scsi2reservation       sfex
tomcat                 vmware                
[root@pcmk-1 ~]#
</screen>
<para>
Now verify that the IP resource has been added and display the cluster’s status to see that it is now active.
</para>
<screen>
[root@pcmk-1 ~]# <userinput>crm configure show</userinput>
node pcmk-1
node pcmk-2
<emphasis>primitive ClusterIP ocf:heartbeat:IPaddr2 \</emphasis>
<emphasis> params ip="192.168.122.101" cidr_netmask="32" \</emphasis>
<emphasis> op monitor interval="30s"</emphasis>
property $id="cib-bootstrap-options" \
-        dc-version="1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7" \
+        dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
        cluster-infrastructure="openais" \
        expected-quorum-votes="2" \
        stonith-enabled="false" \
[root@pcmk-1 ~]# <userinput>crm_mon</userinput>
============
Last updated: Fri Aug 28 15:23:48 2009
Stack: openais
Current DC: pcmk-1 - partition with quorum
-Version: 1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7
+Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
2 Nodes configured, 2 expected votes
1 Resources configured.
============
Online: [ pcmk-1 pcmk-2 ]
<emphasis>ClusterIP (ocf::heartbeat:IPaddr): Started</emphasis> pcmk-1
</screen>
</section>
<section>
<title>Perform a Failover</title>
<para>
Being a high-availability cluster, we should test failover of our new resource before moving on.
</para>
<para>
First, find the node on which the IP address is running.
</para>
<screen>
[root@pcmk-1 ~]# <userinput>crm resource status ClusterIP</userinput>
resource ClusterIP is running on: pcmk-1
[root@pcmk-1 ~]#
</screen>
<para>
Shut down Pacemaker and Corosync on that machine.
</para>
<screen>
[root@pcmk-1 ~]# <userinput>ssh pcmk-1 -- /etc/init.d/pacemaker stop</userinput>
<emphasis>Signaling Pacemaker Cluster Manager to terminate: [ OK ]</emphasis>
<emphasis>Waiting for cluster services to unload:. [ OK ]</emphasis>
[root@pcmk-1 ~]# <userinput>ssh pcmk-1 -- /etc/init.d/corosync stop</userinput>
<emphasis>Stopping Corosync Cluster Engine (corosync): [ OK ]</emphasis>
<emphasis>Waiting for services to unload: [ OK ]</emphasis>
[root@pcmk-1 ~]#
</screen>
<para>
Once Corosync is no longer running, go to the other node and check the cluster status with crm_mon.
</para>
<screen>
[root@pcmk-2 ~]# <userinput>crm_mon</userinput>
============
Last updated: Fri Aug 28 15:27:35 2009
Stack: openais
Current DC: pcmk-2 - <emphasis>partition WITHOUT quorum</emphasis>
-Version: 1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7
+Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
2 Nodes configured, 2 expected votes
1 Resources configured.
============
Online: [ pcmk-2 ]
<emphasis>OFFLINE: [ pcmk-1 ]</emphasis>
</screen>
<para>
There are three things to notice about the cluster’s current state. The first is that, as expected, pcmk-1 is now offline. However we can also see that ClusterIP isn’t running anywhere!
</para>
<section>
<title>Quorum and Two-Node Clusters</title>
<para>
This is because the cluster no longer has quorum, as can be seen by the text “partition WITHOUT quorum” (emphasised green) in the output above. In order to reduce the possibility of data corruption, Pacemaker’s default behavior is to stop all resources if the cluster does not have quorum.
</para>
<para>
A cluster is said to have quorum when more than half the known or expected nodes are online, or for the mathematically inclined, whenever the following equation is true:
</para>
<para>
total_nodes - 1 &lt; 2 * active_nodes
</para>
<para>
Therefore a two-node cluster only has quorum when both nodes are running, which is no longer the case for our cluster. This would normally make the creation of a two-node cluster pointless<footnote>
<para>
Actually some would argue that two-node clusters are always pointless, but that is an argument for another time.
</para>
</footnote>, however it is possible to control how Pacemaker behaves when quorum is lost. In particular, we can tell the cluster to simply ignore quorum altogether.
</para>
<screen>
[root@pcmk-1 ~]# <userinput>crm configure property no-quorum-policy=ignore</userinput>
[root@pcmk-1 ~]# <userinput>crm configure show </userinput>
node pcmk-1
node pcmk-2
primitive ClusterIP ocf:heartbeat:IPaddr2 \
        params ip="192.168.122.101" cidr_netmask="32" \
        op monitor interval="30s"
property $id="cib-bootstrap-options" \
-        dc-version="1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7" \
+        dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
        cluster-infrastructure="openais" \
        expected-quorum-votes="2" \
        stonith-enabled="false" \
        <emphasis>no-quorum-policy="ignore"</emphasis>
</screen>
<para>
After a few moments, the cluster will start the IP address on the remaining node. Note that the cluster still does not have quorum.
</para>
<screen>
[root@pcmk-2 ~]# <userinput>crm_mon</userinput>
============
Last updated: Fri Aug 28 15:30:18 2009
Stack: openais
Current DC: pcmk-2 - <emphasis>partition WITHOUT quorum</emphasis>
-Version: 1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7
+Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
2 Nodes configured, 2 expected votes
1 Resources configured.
============
Online: [ pcmk-2 ]
<emphasis>OFFLINE: [ pcmk-1 ]</emphasis>
<emphasis>ClusterIP (ocf::heartbeat:IPaddr): Started pcmk-2</emphasis>
</screen>
<para>
Now simulate node recovery by restarting the cluster stack on pcmk-1 and check the cluster’s status.
</para>
<screen>
[root@pcmk-1 ~]# <userinput>/etc/init.d/corosync start</userinput>
<emphasis>Starting Corosync Cluster Engine (corosync): [ OK ]</emphasis>
[root@pcmk-1 ~]# <userinput>/etc/init.d/pacemaker start</userinput>
<emphasis>Starting Pacemaker Cluster Manager: [ OK ]</emphasis>
[root@pcmk-1 ~]# <userinput>crm_mon</userinput>
============
Last updated: Fri Aug 28 15:32:13 2009
Stack: openais
Current DC: pcmk-2 - partition with quorum
-Version: 1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7
+Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
2 Nodes configured, 2 expected votes
1 Resources configured.
============
Online: [ <emphasis>pcmk-1</emphasis> pcmk-2 ]
ClusterIP        (ocf::heartbeat:IPaddr):        <emphasis>Started pcmk-1</emphasis>
</screen>
<para>
Here we see something that some may consider surprising, the IP is back running at its original location!
</para>
</section>
<section>
<title>Prevent Resources from Moving after Recovery</title>
<para>
In some circumstances it is highly desirable to prevent healthy resources from being moved around the cluster. Move resources almost always requires a period of downtime and for complex services like Oracle databases, this period can be quite long.
</para>
<para>
To address this, Pacemaker has the concept of resource stickiness which controls how much a service prefers to stay running where it is. You may like to think of it as the “cost” of any downtime. By default, Pacemaker assumes there is zero cost associated with moving resources and will do so to achieve “optimal<footnote>
<para>
It should be noted that Pacemaker’s definition of optimal may not always agree with that of a human’s. The order in which Pacemaker processes lists of resources and nodes create implicit preferences (required in order to create a stabile solution) in situations where the administrator had not explicitly specified some.
</para>
</footnote>” resource placement. We can specify a different stickiness for every resource, but it is often sufficient to change the default.
</para>
<screen>
crm configure rsc_defaults resource-stickiness=100
[root@pcmk-2 ~]# <userinput>crm configure show</userinput>
node pcmk-1
node pcmk-2
primitive ClusterIP ocf:heartbeat:IPaddr2 \
        params ip="192.168.122.101" cidr_netmask="32" \
        op monitor interval="30s"
property $id="cib-bootstrap-options" \
-        dc-version="1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7" \
+        dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
        cluster-infrastructure="openais" \
        expected-quorum-votes="2" \
        stonith-enabled="false" \
        no-quorum-policy="ignore"
<emphasis>rsc_defaults $id="rsc-options" \</emphasis>
<emphasis> resource-stickiness="100"</emphasis>
</screen>
<para>
If we now retry the failover test, we see that as expected ClusterIP still moves to pcmk-2 when pcmk-1 is taken offline.
</para>
<screen>
[root@pcmk-1 ~]# <userinput>ssh pcmk-1 -- /etc/init.d/pacemaker stop</userinput>
Signaling Pacemaker Cluster Manager to terminate: [ OK ]
Waiting for cluster services to unload:. [ OK ]
[root@pcmk-1 ~]# <userinput>ssh pcmk-1 -- /etc/init.d/corosync stop</userinput>
Stopping Corosync Cluster Engine (corosync):               [  OK  ]
Waiting for services to unload:                            [  OK  ]
[root@pcmk-1 ~]# <userinput>ssh pcmk-2 -- crm_mon -1</userinput>
============
Last updated: Fri Aug 28 15:39:38 2009
Stack: openais
Current DC: pcmk-2 - partition WITHOUT quorum
-Version: 1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7
+Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
2 Nodes configured, 2 expected votes
1 Resources configured.
============
Online: [ pcmk-2 ]
<emphasis>OFFLINE: [ pcmk-1 ]</emphasis>
ClusterIP        (ocf::heartbeat:IPaddr):        <emphasis>Started pcmk-2</emphasis>
</screen>
<para>
However when we bring pcmk-1 back online, ClusterIP now remains running on pcmk-2.
</para>
<screen>
[root@pcmk-1 ~]# <userinput>/etc/init.d/corosync start</userinput>
<emphasis>Starting Corosync Cluster Engine (corosync): [ OK ]</emphasis>
[root@pcmk-1 ~]# <userinput>/etc/init.d/pacemaker start</userinput>
<emphasis>Starting Pacemaker Cluster Manager: [ OK ]</emphasis>
[root@pcmk-1 ~]# <userinput>crm_mon</userinput>
============
Last updated: Fri Aug 28 15:41:23 2009
Stack: openais
Current DC: pcmk-2 - partition with quorum
-Version: 1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7
+Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
2 Nodes configured, 2 expected votes
1 Resources configured.
============
Online: [ <emphasis>pcmk-1</emphasis> pcmk-2 ]
ClusterIP        (ocf::heartbeat:IPaddr):        Started <emphasis>pcmk-2</emphasis>
</screen>
</section>
</section>
</chapter>
diff --git a/doc/Clusters_from_Scratch/en-US/Ch-Apache.xml b/doc/Clusters_from_Scratch/en-US/Ch-Apache.xml
index 567d5a83b0..667c2c391d 100644
--- a/doc/Clusters_from_Scratch/en-US/Ch-Apache.xml
+++ b/doc/Clusters_from_Scratch/en-US/Ch-Apache.xml
@@ -1,472 +1,472 @@
<?xml version='1.0' encoding='utf-8' ?>
<!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
<!ENTITY % BOOK_ENTITIES SYSTEM "Clusters_from_Scratch.ent">
%BOOK_ENTITIES;
]>
<chapter>
<title>Apache - Adding More Services</title>
<note>
<para>
Now that we have a basic but functional active/passive two-node cluster, we’re ready to add some real services. We’re going to start with Apache because its a feature of many clusters and relatively simple to configure.
</para>
</note>
<section>
<title>Installation</title>
<para>
Before continuing, we need to make sure Apache is installed on <emphasis>both</emphasis> hosts.
</para>
<screen>
[root@ppcmk-1 ~]# <userinput>yum install -y httpd</userinput>
Setting up Install Process
Resolving Dependencies
--&gt; Running transaction check
---&gt; Package httpd.x86_64 0:2.2.13-2.fc12 set to be updated
--&gt; Processing Dependency: httpd-tools = 2.2.13-2.fc12 for package: httpd-2.2.13-2.fc12.x86_64
--&gt; Processing Dependency: apr-util-ldap for package: httpd-2.2.13-2.fc12.x86_64
--&gt; Processing Dependency: /etc/mime.types for package: httpd-2.2.13-2.fc12.x86_64
--&gt; Processing Dependency: libaprutil-1.so.0()(64bit) for package: httpd-2.2.13-2.fc12.x86_64
--&gt; Processing Dependency: libapr-1.so.0()(64bit) for package: httpd-2.2.13-2.fc12.x86_64
--&gt; Running transaction check
---&gt; Package apr.x86_64 0:1.3.9-2.fc12 set to be updated
---&gt; Package apr-util.x86_64 0:1.3.9-2.fc12 set to be updated
---&gt; Package apr-util-ldap.x86_64 0:1.3.9-2.fc12 set to be updated
---&gt; Package httpd-tools.x86_64 0:2.2.13-2.fc12 set to be updated
---&gt; Package mailcap.noarch 0:2.1.30-1.fc12 set to be updated
--&gt; Finished Dependency Resolution
Dependencies Resolved
=======================================================================================
 Package               Arch             Version                Repository         Size
=======================================================================================
Installing:
 httpd               x86_64           2.2.13-2.fc12            rawhide           735 k
Installing for dependencies:
 apr                 x86_64           1.3.9-2.fc12             rawhide           117 k
 apr-util            x86_64           1.3.9-2.fc12             rawhide            84 k
 apr-util-ldap       x86_64           1.3.9-2.fc12             rawhide            15 k
 httpd-tools         x86_64           2.2.13-2.fc12            rawhide            63 k
 mailcap             noarch           2.1.30-1.fc12            rawhide            25 k
Transaction Summary
=======================================================================================
Install       6 Package(s)
Upgrade       0 Package(s)
Total download size: 1.0 M
Downloading Packages:
(1/6): apr-1.3.9-2.fc12.x86_64.rpm                                   | 117 kB     00:00    
(2/6): apr-util-1.3.9-2.fc12.x86_64.rpm                             |  84 kB     00:00    
(3/6): apr-util-ldap-1.3.9-2.fc12.x86_64.rpm                         |  15 kB     00:00    
(4/6): httpd-2.2.13-2.fc12.x86_64.rpm                               | 735 kB     00:00    
(5/6): httpd-tools-2.2.13-2.fc12.x86_64.rpm                         |  63 kB     00:00    
(6/6): mailcap-2.1.30-1.fc12.noarch.rpm                             |  25 kB     00:00    
----------------------------------------------------------------------------------------
Total                                                       875 kB/s | 1.0 MB     00:01    
Running rpm_check_debug
Running Transaction Test
Finished Transaction Test
Transaction Test Succeeded
Running Transaction
  Installing     : apr-1.3.9-2.fc12.x86_64                                         1/6
  Installing     : apr-util-1.3.9-2.fc12.x86_64                                     2/6
  Installing     : apr-util-ldap-1.3.9-2.fc12.x86_64                               3/6
  Installing     : httpd-tools-2.2.13-2.fc12.x86_64                                 4/6
  Installing     : mailcap-2.1.30-1.fc12.noarch                                     5/6
  Installing     : httpd-2.2.13-2.fc12.x86_64                                       6/6
Installed:
  httpd.x86_64 0:2.2.13-2.fc12                                                        
Dependency Installed:
  apr.x86_64 0:1.3.9-2.fc12            apr-util.x86_64 0:1.3.9-2.fc12
  apr-util-ldap.x86_64 0:1.3.9-2.fc12  httpd-tools.x86_64 0:2.2.13-2.fc12
  mailcap.noarch 0:2.1.30-1.fc12  
Complete!
[root@pcmk-1 ~]#
</screen>
<para>
Also, we need the wget tool in order for the cluster to be able to check the status of the Apache server.
</para>
<screen>
[root@pcmk-1 ~]# <userinput>yum install -y wget</userinput>
Setting up Install Process
Resolving Dependencies
--&gt; Running transaction check
---&gt; Package wget.x86_64 0:1.11.4-5.fc12 set to be updated
--&gt; Finished Dependency Resolution
Dependencies Resolved
===========================================================================================
 Package        Arch             Version                      Repository               Size
===========================================================================================
Installing:
 wget         x86_64          1.11.4-5.fc12                   rawhide                393 k
Transaction Summary
===========================================================================================
Install       1 Package(s)
Upgrade       0 Package(s)
Total download size: 393 k
Downloading Packages:
wget-1.11.4-5.fc12.x86_64.rpm                                            | 393 kB     00:00    
Running rpm_check_debug
Running Transaction Test
Finished Transaction Test
Transaction Test Succeeded
Running Transaction
  Installing     : wget-1.11.4-5.fc12.x86_64                                            1/1
Installed:
  wget.x86_64 0:1.11.4-5.fc12
Complete!
[root@pcmk-1 ~]#
</screen>
</section>
<section>
<title>Preparation</title>
<para>
First we need to create a page for Apache to serve up. On Fedora the default Apache docroot is /var/www/html, so we’ll create an index file there.
</para>
<screen>
[root@pcmk-1 ~]# <userinput>cat &lt;&lt;-END &gt;/var/www/html/index.html</userinput>
&lt;html&gt;
&lt;body&gt;My Test Site - pcmk-1&lt;/body&gt;
&lt;/html&gt;
END
[root@pcmk-1 ~]#
</screen>
<para>
For the moment, we will simplify things by serving up only a static site and manually sync the data between the two nodes. So run the command again on pcmk-2.
</para>
<screen>
[root@pcmk-2 ~]# <userinput>cat &lt;&lt;-END &gt;/var/www/html/index.html</userinput>
&lt;html&gt;
&lt;body&gt;My Test Site - pcmk-2&lt;/body&gt;
&lt;/html&gt;
END
[root@pcmk-2 ~]#
</screen>
</section>
<section>
<title>Enable the Apache status URL</title>
<para>
In order to monitor the health of your Apache instance, and recover it if it fails, the resource agent used by Pacemaker assumes the server-status URL is available.
Look for the following in /etc/httpd/conf/httpd.conf and make sure it is not disabled or commented out:
</para>
<screen>
&lt;Location /server-status>
SetHandler server-status
Order deny,allow
Deny from all
Allow from 127.0.0.1
&lt;/Location>
</screen>
</section>
<section>
<title>Update the Configuration</title>
<para>
At this point, Apache is ready to go, all that needs to be done is to add it to the cluster. Lets call the resource WebSite. We need to use an OCF script called apache in the heartbeat namespace <footnote>
<para>
Compare the key used here ocf:heartbeat:apache with the one we used earlier for the IP address: ocf:heartbeat:IPaddr2
</para>
</footnote> , the only required parameter is the path to the main Apache configuration file and we’ll tell the cluster to check once a minute that apache is still running.
</para>
<screen>
[root@pcmk-1 ~]# <userinput>crm configure primitive WebSite ocf:heartbeat:apache params configfile=/etc/httpd/conf/httpd.conf op monitor interval=1min</userinput>
[root@pcmk-1 ~]# <userinput>crm configure show</userinput>
node pcmk-1
node pcmk-2
<emphasis>primitive WebSite ocf:heartbeat:apache \</emphasis>
<emphasis> params configfile="/etc/httpd/conf/httpd.conf" \</emphasis>
<emphasis> op monitor interval="1min"</emphasis>
primitive ClusterIP ocf:heartbeat:IPaddr2 \
        params ip="192.168.122.101" cidr_netmask="32" \
        op monitor interval="30s"
property $id="cib-bootstrap-options" \
-        dc-version="1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7" \
+        dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
        cluster-infrastructure="openais" \
        expected-quorum-votes="2" \
        stonith-enabled="false" \
        no-quorum-policy="ignore"
rsc_defaults $id="rsc-options" \
        resource-stickiness="100"
</screen>
<para>
After a short delay, we should see the cluster start apache
</para>
<screen>
[root@pcmk-1 ~]# <userinput>crm_mon</userinput>
============
Last updated: Fri Aug 28 16:12:49 2009
Stack: openais
Current DC: pcmk-2 - partition with quorum
-Version: 1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7
+Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
2 Nodes configured, 2 expected votes
2 Resources configured.
============
Online: [ pcmk-1 pcmk-2 ]
ClusterIP        (ocf::heartbeat:IPaddr):        Started pcmk-2
WebSite        (ocf::heartbeat:apache):        <emphasis>Started pcmk-1</emphasis>
</screen>
<para>
Wait a moment, the WebSite resource isn’t running on the same host as our IP address!
</para>
</section>
<section>
<title>Ensuring Resources Run on the Same Host</title>
<para>
To reduce the load on any one machine, Pacemaker will generally try to spread the configured resources across the cluster nodes. However we can tell the cluster that two resources are related and need to run on the same host (or not at all). Here we instruct the cluster that WebSite can only run on the host that ClusterIP is active on. If ClusterIP is not active anywhere, WebSite will not be permitted to run anywhere.
</para>
<screen>
[root@pcmk-1 ~]# <userinput>crm configure colocation website-with-ip INFINITY: WebSite ClusterIP</userinput>
[root@pcmk-1 ~]# <userinput>crm configure show</userinput>
node pcmk-1
node pcmk-2
primitive WebSite ocf:heartbeat:apache \
        params configfile="/etc/httpd/conf/httpd.conf" \
        op monitor interval="1min"
primitive ClusterIP ocf:heartbeat:IPaddr2 \
        params ip="192.168.122.101" cidr_netmask="32" \
        op monitor interval="30s"
<emphasis>colocation website-with-ip inf: WebSite ClusterIP</emphasis>
property $id="cib-bootstrap-options" \
-        dc-version="1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7" \
+        dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
        cluster-infrastructure="openais" \
        expected-quorum-votes="2" \
        stonith-enabled="false" \
        no-quorum-policy="ignore"
rsc_defaults $id="rsc-options" \
        resource-stickiness="100"
[root@pcmk-1 ~]# <userinput>crm_mon</userinput>
============
Last updated: Fri Aug 28 16:14:34 2009
Stack: openais
Current DC: pcmk-2 - partition with quorum
-Version: 1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7
+Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
2 Nodes configured, 2 expected votes
2 Resources configured.
============
Online: [ pcmk-1 pcmk-2 ]
ClusterIP        (ocf::heartbeat:IPaddr):        Started pcmk-2
WebSite        (ocf::heartbeat:apache):        Started pcmk-2
</screen>
</section>
<section>
<title>Controlling Resource Start/Stop Ordering</title>
<para>
When Apache starts, it binds to the available IP addresses. It doesn’t know about any addresses we add afterwards, so not only do they need to run on the same node, but we need to make sure ClusterIP is already active before we start WebSite. We do this by adding an ordering constraint. We need to give it a name (chose something descriptive like apache-after-ip), indicate that its mandatory (so that any recovery for ClusterIP will also trigger recovery of WebSite) and list the two resources in the order we need them to start.
</para>
<screen>
[root@pcmk-1 ~]# <userinput>crm configure order apache-after-ip mandatory: ClusterIP WebSite</userinput>
[root@pcmk-1 ~]# <userinput>crm configure show</userinput>
node pcmk-1
node pcmk-2
primitive WebSite ocf:heartbeat:apache \
        params configfile="/etc/httpd/conf/httpd.conf" \
        op monitor interval="1min"
primitive ClusterIP ocf:heartbeat:IPaddr2 \
        params ip="192.168.122.101" cidr_netmask="32" \
        op monitor interval="30s"
colocation website-with-ip inf: WebSite ClusterIP
<emphasis>order apache-after-ip inf: ClusterIP WebSite</emphasis>
property $id="cib-bootstrap-options" \
-        dc-version="1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7" \
+        dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
        cluster-infrastructure="openais" \
        expected-quorum-votes="2" \
        stonith-enabled="false" \
        no-quorum-policy="ignore"
rsc_defaults $id="rsc-options" \
        resource-stickiness="100"
</screen>
</section>
<section>
<title>Specifying a Preferred Location</title>
<para>
Pacemaker does not rely on any sort of hardware symmetry between nodes, so it may well be that one machine is more powerful than the other. In such cases it makes sense to host the resources there if it is available. To do this we create a location constraint. Again we give it a descriptive name (prefer-pcmk-1), specify the resource we want to run there (WebSite), how badly we’d like it to run there (we’ll use 50 for now, but in a two-node situation almost any value above 0 will do) and the host’s name.
</para>
<screen>
[root@pcmk-1 ~]# <userinput>crm configure location prefer-pcmk-1 WebSite 50: pcmk-1</userinput>
[root@pcmk-1 ~]# <userinput>crm configure show</userinput>
node pcmk-1
node pcmk-2
primitive WebSite ocf:heartbeat:apache \
        params configfile="/etc/httpd/conf/httpd.conf" \
        op monitor interval="1min"
primitive ClusterIP ocf:heartbeat:IPaddr2 \
        params ip="192.168.122.101" cidr_netmask="32" \
        op monitor interval="30s"
<emphasis>location prefer-pcmk-1 WebSite 50: pcmk-1</emphasis>
colocation website-with-ip inf: WebSite ClusterIP
property $id="cib-bootstrap-options" \
-        dc-version="1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7" \
+        dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
        cluster-infrastructure="openais" \
        expected-quorum-votes="2" \
        stonith-enabled="false" \
        no-quorum-policy="ignore"
rsc_defaults $id="rsc-options" \
        resource-stickiness="100"
[root@pcmk-1 ~]# <userinput>crm_mon</userinput>
============
Last updated: Fri Aug 28 16:17:35 2009
Stack: openais
Current DC: pcmk-2 - partition with quorum
-Version: 1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7
+Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
2 Nodes configured, 2 expected votes
2 Resources configured.
============
Online: [ pcmk-1 pcmk-2 ]
ClusterIP        (ocf::heartbeat:IPaddr):        <emphasis>Started pcmk-2</emphasis>
WebSite        (ocf::heartbeat:apache):        <emphasis>Started pcmk-2</emphasis>
</screen>
<para>
Wait a minute, the resources are still on pcmk-2!
</para>
<para>
Even though we now prefer pcmk-1 over pcmk-2, that preference is (intentionally) less than the resource stickiness (how much we preferred not to have unnecessary downtime).
</para>
<para>
To see the current placement scores, you can use a tool called ptest
</para>
<para>
ptest -sL
<note>
<para>
Include output
</para>
</note>
</para>
<para>
There is a way to force them to move though...
</para>
</section>
<section>
<title>Manually Moving Resources Around the Cluster</title>
<para>
There are always times when an administrator needs to override the cluster and force resources to move to a specific location. Underneath we use location constraints like the one we created above, happily you don’t need to care. Just provide the name of the resource and the intended location, we’ll do the rest.
</para>
<screen>
[root@pcmk-1 ~]# <userinput>crm resource move WebSite pcmk-1</userinput>
[root@pcmk-1 ~]# <userinput>crm_mon</userinput>
============
Last updated: Fri Aug 28 16:19:24 2009
Stack: openais
Current DC: pcmk-2 - partition with quorum
-Version: 1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7
+Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
2 Nodes configured, 2 expected votes
2 Resources configured.
============
Online: [ pcmk-1 pcmk-2 ]
ClusterIP        (ocf::heartbeat:IPaddr):        Started pcmk-1
WebSite        (ocf::heartbeat:apache):        Started pcmk-1
Notice how the colocation rule we created has ensured that ClusterIP was also moved to pcmk-1.
For the curious, we can see the effect of this command by examining the configuration
crm configure show
[root@pcmk-1 ~]# <userinput>crm configure show</userinput>
node pcmk-1
node pcmk-2
primitive WebSite ocf:heartbeat:apache \
        params configfile="/etc/httpd/conf/httpd.conf" \
        op monitor interval="1min"
primitive ClusterIP ocf:heartbeat:IPaddr2 \
        params ip="192.168.122.101" cidr_netmask="32" \
        op monitor interval="30s"
<emphasis>location cli-prefer-WebSite WebSite \</emphasis>
<emphasis> rule $id="cli-prefer-rule-WebSite" inf: #uname eq pcmk-1</emphasis>
location prefer-pcmk-1 WebSite 50: pcmk-1
colocation website-with-ip inf: WebSite ClusterIP
property $id="cib-bootstrap-options" \
-        dc-version="1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7" \
+        dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
        cluster-infrastructure="openais" \
        expected-quorum-votes="2" \
        stonith-enabled="false" \
        no-quorum-policy="ignore"
rsc_defaults $id="rsc-options" \
        resource-stickiness="100"
</screen>
<para>
Highlighted is the automated constraint used to move the resources to pcmk-1
</para>
<section>
<title>Giving Control Back to the Cluster</title>
<para>
Once we’ve finished whatever activity that required us to move the resources to pcmk-1, in our case nothing, we can then allow the cluster to resume normal operation with the unmove command. Since we previously configured a default stickiness, the resources will remain on pcmk-1.
</para>
<screen>
[root@pcmk-1 ~]# <userinput>crm resource unmove WebSite</userinput>
[root@pcmk-1 ~]# <userinput>crm configure show</userinput>
node pcmk-1
node pcmk-2
primitive WebSite ocf:heartbeat:apache \
        params configfile="/etc/httpd/conf/httpd.conf" \
        op monitor interval="1min"
primitive ClusterIP ocf:heartbeat:IPaddr2 \
        params ip="192.168.122.101" cidr_netmask="32" \
        op monitor interval="30s"
location prefer-pcmk-1 WebSite 50: pcmk-1
colocation website-with-ip inf: WebSite ClusterIP
property $id="cib-bootstrap-options" \
-        dc-version="1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7" \
+        dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
        cluster-infrastructure="openais" \
        expected-quorum-votes="2" \
        stonith-enabled="false" \
        no-quorum-policy="ignore"
rsc_defaults $id="rsc-options" \
        resource-stickiness="100"
</screen>
<para>
Note that the automated constraint is now gone. If we check the cluster status, we can also see that as expected the resources are still active on pcmk-1.
</para>
<screen>
[root@pcmk-1 ~]# <userinput>crm_mon</userinput>
============
Last updated: Fri Aug 28 16:20:53 2009
Stack: openais
Current DC: pcmk-2 - partition with quorum
-Version: 1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7
+Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
2 Nodes configured, 2 expected votes
2 Resources configured.
============
Online: [ pcmk-1 pcmk-2 ]
ClusterIP        (ocf::heartbeat:IPaddr):        <emphasis>Started pcmk-1</emphasis>
WebSite        (ocf::heartbeat:apache):        <emphasis>Started pcmk-1</emphasis>
</screen>
</section>
</section>
</chapter>
diff --git a/doc/Clusters_from_Scratch/en-US/Ch-Shared-Storage.xml b/doc/Clusters_from_Scratch/en-US/Ch-Shared-Storage.xml
index cc99a68f5a..03d974b410 100644
--- a/doc/Clusters_from_Scratch/en-US/Ch-Shared-Storage.xml
+++ b/doc/Clusters_from_Scratch/en-US/Ch-Shared-Storage.xml
@@ -1,528 +1,528 @@
<?xml version='1.0' encoding='utf-8' ?>
<!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
<!ENTITY % BOOK_ENTITIES SYSTEM "Clusters_from_Scratch.ent">
%BOOK_ENTITIES;
]>
<chapter>
<title>Replicated Storage with DRBD</title>
<para>
Even if you’re serving up static websites, having to manually synchronize the contents of that website to all the machines in the cluster is not ideal.
For dynamic websites, such as a wiki, its not even an option.
Not everyone care afford network-attached storage but somehow the data needs to be kept in sync.
Enter DRBD which can be thought of as network based RAID-1.
See <ulink url="http://www.drbd.org/">http://www.drbd.org</ulink>/ for more details.
</para>
<para>
</para>
<section>
<title>Install the DRBD Packages</title>
<para>
Since its inclusion in the upstream 2.6.33 kernel, everything needed to use DRBD ships with &DISTRO; &DISTRO_VERSION;.
All you need to do is install it:
</para>
<screen>
[root@pcmk-1 ~]# <userinput>yum install -y drbd-pacemaker</userinput>
Loaded plugins: presto, refresh-packagekit
Setting up Install Process
Resolving Dependencies
--> Running transaction check
---> Package drbd-pacemaker.x86_64 0:8.3.7-2.fc13 set to be updated
--> Processing Dependency: drbd-utils = 8.3.7-2.fc13 for package: drbd-pacemaker-8.3.7-2.fc13.x86_64
--> Running transaction check
---> Package drbd-utils.x86_64 0:8.3.7-2.fc13 set to be updated
--> Finished Dependency Resolution
Dependencies Resolved
=================================================================================
Package Arch Version Repository Size
=================================================================================
Installing:
drbd-pacemaker x86_64 8.3.7-2.fc13 fedora 19 k
Installing for dependencies:
drbd-utils x86_64 8.3.7-2.fc13 fedora 165 k
Transaction Summary
=================================================================================
Install 2 Package(s)
Upgrade 0 Package(s)
Total download size: 184 k
Installed size: 427 k
Downloading Packages:
Setting up and reading Presto delta metadata
fedora/prestodelta | 1.7 kB 00:00
Processing delta metadata
Package(s) data still to download: 184 k
(1/2): drbd-pacemaker-8.3.7-2.fc13.x86_64.rpm | 19 kB 00:01
(2/2): drbd-utils-8.3.7-2.fc13.x86_64.rpm | 165 kB 00:02
---------------------------------------------------------------------------------
Total 45 kB/s | 184 kB 00:04
Running rpm_check_debug
Running Transaction Test
Transaction Test Succeeded
Running Transaction
Installing : drbd-utils-8.3.7-2.fc13.x86_64 1/2
Installing : drbd-pacemaker-8.3.7-2.fc13.x86_64 2/2
Installed:
drbd-pacemaker.x86_64 0:8.3.7-2.fc13
Dependency Installed:
drbd-utils.x86_64 0:8.3.7-2.fc13
Complete!
[root@pcmk-1 ~]#
</screen>
</section>
<section>
<title>Configure DRBD</title>
<para>
Before we configure DRBD, we need to set aside some disk for it to use.
</para>
<section>
<title>Create A Partition for DRBD</title>
<para>
If you have more than 1Gb free, feel free to use it.
For this guide however, 1Gb is plenty of space for a single html file and sufficient for later holding the GFS2 metadata.
</para>
<screen>
[root@pcmk-1 ~]# <userinput>lvcreate -n drbd-demo -L 1G VolGroup</userinput>
  Logical volume "drbd-demo" created
[root@pcmk-1 ~]# <userinput>lvs</userinput>
  LV        VG       Attr   LSize   Origin Snap%  Move Log Copy%  Convert
  <emphasis>drbd-demo VolGroup -wi-a- 1.00G</emphasis>                                      
  lv_root   VolGroup -wi-ao   7.30G                                      
  lv_swap   VolGroup -wi-ao 500.00M
</screen>
<para>
Repeat this on the second node, be sure to use the same size partition.
</para>
<screen>
[root@pcmk-2 ~]# <userinput>lvs</userinput>
  LV      VG       Attr   LSize   Origin Snap%  Move Log Copy%  Convert
  lv_root VolGroup -wi-ao   7.30G                                      
  lv_swap <emphasis>VolGroup</emphasis> -wi-ao 500.00M                                      
[root@pcmk-2 ~]# <userinput>lvcreate -n drbd-demo -L 1G VolGroup</userinput>
 <emphasis> Logical volume "drbd-demo" created</emphasis>
[root@pcmk-2 ~]# <userinput>lvs</userinput>
  LV        VG       Attr   LSize   Origin Snap%  Move Log Copy%  Convert
  <emphasis>drbd-demo VolGroup -wi-a- 1.00G </emphasis>                                     
  lv_root   VolGroup -wi-ao   7.30G                                      
  lv_swap   VolGroup -wi-ao 500.00M
</screen>
</section>
<section>
<title>Write the DRBD Config</title>
<para>
There is no series of commands for build a DRBD configuration, so simply copy the configuration below to /etc/drbd.conf
</para>
<para>
Detailed information on the directives used in this configuration (and other alternatives) is available from <ulink url="http://www.drbd.org/users-guide/ch-configure.html">http://www.drbd.org/users-guide/ch-configure.html</ulink>
</para>
<warning>
<para>
Be sure to use the names and addresses of <emphasis>your</emphasis> nodes if they differ from the ones used in this guide.
</para>
</warning>
<screen>
global {
  usage-count yes;
}
common {
  protocol C;
}
resource wwwdata {
  meta-disk internal;
  device    /dev/drbd1;
  syncer {
    verify-alg sha1;
  }
  net {
    allow-two-primaries;
  }
 <emphasis> on pcmk-1</emphasis> {
    disk      /dev/mapper/<emphasis>VolGroup</emphasis>-drbd--demo;
    address   192.168.122.101<emphasis>:7789;</emphasis>
  }
  <emphasis>on</emphasis>
<emphasis>pcmk-2</emphasis> {
    disk      /dev/mapper/<emphasis>VolGroup</emphasis>-drbd--demo;
    address   192.168.122.102<emphasis>:7789;</emphasis>
  }
}
</screen>
<note>
<para>
TODO: Explain the reason for the allow-two-primaries option
</para>
</note>
</section>
<section>
<title>Initialize and Load DRBD</title>
<para>
With the configuration in place, we can now perform the DRBD initialization
</para>
<screen>
[root@pcmk-1 ~]# <userinput>drbdadm create-md wwwdata</userinput>
md_offset 12578816
al_offset 12546048
bm_offset 12541952
Found some data
 ==&gt; This might destroy existing data! &lt;==
Do you want to proceed?
[need to type 'yes' to confirm] <userinput>yes</userinput>
Writing meta data...
initializing activity log
NOT initialized bitmap
New drbd meta data block successfully created.
success
</screen>
<para>
Now load the DRBD kernel module and confirm that everything is sane
</para>
<screen>
[root@pcmk-1 ~]# <userinput>modprobe drbd</userinput>
[root@pcmk-1 ~]# <userinput>drbdadm up wwwdata</userinput>
[root@pcmk-1 ~]# <userinput>cat /proc/drbd</userinput>
version: 8.3.6 (api:88/proto:86-90)
GIT-hash: f3606c47cc6fcf6b3f086e425cb34af8b7a81bbf build by root@pcmk-1, 2009-12-08 11:22:57
<emphasis> 1: cs:WFConnection ro:Secondary/Unknown ds:Inconsistent/DUnknown C r--</emphasis>--
    ns:0 nr:0 dw:0 dr:0 al:0 bm:0 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:12248
[root@pcmk-1 ~]#
Repeat on the second node
drbdadm --force create-md wwwdata
modprobe drbd
drbdadm up wwwdata
cat /proc/drbd
[root@pcmk-2 ~]# <userinput>drbdadm --force create-md wwwdata</userinput>
Writing meta data...
initializing activity log
NOT initialized bitmap
New drbd meta data block successfully created.
success
[root@pcmk-2 ~]# <userinput>modprobe drbd</userinput>
WARNING: Deprecated config file /etc/modprobe.conf, all config files belong into /etc/modprobe.d/.
[root@pcmk-2 ~]# <userinput>drbdadm up wwwdata</userinput>
[root@pcmk-2 ~]# <userinput>cat /proc/drbd</userinput>
version: 8.3.6 (api:88/proto:86-90)
GIT-hash: f3606c47cc6fcf6b3f086e425cb34af8b7a81bbf build by root@pcmk-1, 2009-12-08 11:22:57
<emphasis> 1: cs:Connected ro:Secondary/Secondary ds:Inconsistent/Inconsistent C r----</emphasis>
    ns:0 nr:0 dw:0 dr:0 al:0 bm:0 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:12248
</screen>
<para>
Now we need to tell DRBD which set of data to use.
Since both sides contain garbage, we can run the following on pcmk-1:
</para>
<screen>
[root@pcmk-1 ~]# <userinput>drbdadm -- --overwrite-data-of-peer primary wwwdata</userinput>
[root@pcmk-1 ~]# <userinput>cat /proc/drbd</userinput>
version: 8.3.6 (api:88/proto:86-90)
GIT-hash: f3606c47cc6fcf6b3f086e425cb34af8b7a81bbf build by root@pcmk-1, 2009-12-08 11:22:57
 1: cs:SyncSource ro:Primary/Secondary ds:UpToDate/<emphasis>Inconsistent</emphasis> C r----
    ns:2184 nr:0 dw:0 dr:2472 al:0 bm:0 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:10064
        [=====&gt;..............] sync'ed: 33.4% (10064/12248)K
        finish: 0:00:37 speed: 240 (240) K/sec
[root@pcmk-1 ~]# <userinput>cat /proc/drbd</userinput>
version: 8.3.6 (api:88/proto:86-90)
GIT-hash: f3606c47cc6fcf6b3f086e425cb34af8b7a81bbf build by root@pcmk-1, 2009-12-08 11:22:57
 1: <emphasis>cs:Connected ro:Primary/Secondary ds:UpToDate/UpToDate</emphasis> C r----
    ns:12248 nr:0 dw:0 dr:12536 al:0 bm:1 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:0
</screen>
<para>
pcmk-1 is now in the Primary state which allows it to be written to.
Which means its a good point at which to create a filesystem and populate it with some data to serve up via our WebSite resource.
</para>
</section>
<section>
<title>Populate DRBD with Data</title>
<screen>
[root@pcmk-1 ~]# <userinput>mkfs.ext4 /dev/drbd1</userinput>
mke2fs 1.41.4 (27-Jan-2009)
Filesystem label=
OS type: Linux
Block size=1024 (log=0)
Fragment size=1024 (log=0)
3072 inodes, 12248 blocks
612 blocks (5.00%) reserved for the super user
First data block=1
Maximum filesystem blocks=12582912
2 block groups
8192 blocks per group, 8192 fragments per group
1536 inodes per group
Superblock backups stored on blocks:
        8193
Writing inode tables: done                            
Creating journal (1024 blocks): done
Writing superblocks and filesystem accounting information: done
This filesystem will be automatically checked every 26 mounts or
180 days, whichever comes first.  Use tune2fs -c or -i to override.
Now mount the newly created filesystem so we can create our index file
mount /dev/drbd1 /mnt/
cat &lt;&lt;-END &gt;/mnt/index.html
&lt;html&gt;
&lt;body&gt;My Test Site - drbd&lt;/body&gt;
&lt;/html&gt;
END
umount /dev/drbd1
[root@pcmk-1 ~]# <userinput>mount /dev/drbd1 /mnt/</userinput>
[root@pcmk-1 ~]# <userinput>cat &lt;&lt;-END &gt;/mnt/index.html</userinput>
&gt; &lt;html&gt;
&gt; &lt;body&gt;My Test Site - drbd&lt;/body&gt;
&gt; &lt;/html&gt;
&gt; END
[root@pcmk-1 ~]# <userinput>umount /dev/drbd1</userinput>
</screen>
</section>
</section>
<section>
<title>Configure the Cluster for DRBD</title>
<para>
One handy feature of the crm shell is that you can use it in interactive mode to make several changes atomically.
</para>
<para>
First we launch the shell. The prompt will change to indicate you’re in interactive mode.
</para>
<screen>
[root@pcmk-1 ~]# <userinput>crm</userinput>
cib crm(live)#
</screen>
<para>
Next we must create a working copy or the current configuration.
This is where all our changes will go.
The cluster will not see any of them until we say its ok.
Notice again how the prompt changes, this time to indicate that we’re no longer looking at the live cluster.
</para>
<screen>
cib crm(live)# <userinput>cib new drbd</userinput>
INFO: drbd shadow CIB created
crm(drbd)#
</screen>
<para>
Now we can create our DRBD clone and display the revised configuration.
</para>
<screen>
crm(drbd)# <userinput>configure primitive WebData ocf:linbit:drbd params drbd_resource=wwwdata \</userinput>
<userinput>        op monitor interval=60s</userinput>
crm(drbd)# <userinput>configure ms WebDataClone WebData meta master-max=1 master-node-max=1 \</userinput>
<userinput>        clone-max=2 clone-node-max=1 notify=true</userinput>
crm(drbd)# <userinput>configure show</userinput>
node pcmk-1
node pcmk-2
<emphasis>primitive WebData ocf:linbit:drbd \</emphasis>
<emphasis> params drbd_resource="wwwdata" \</emphasis>
<emphasis> op monitor interval="60s"</emphasis>
primitive WebSite ocf:heartbeat:apache \
        params configfile="/etc/httpd/conf/httpd.conf" \
        op monitor interval="1min"
primitive ClusterIP ocf:heartbeat:IPaddr2 \
        params ip="192.168.122.101" cidr_netmask="32" \
        op monitor interval="30s"
<emphasis>ms WebDataClone WebData \</emphasis>
<emphasis> meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true"</emphasis>
location prefer-pcmk-1 WebSite 50: pcmk-1
colocation website-with-ip inf: WebSite ClusterIP
order apache-after-ip inf: ClusterIP WebSite
property $id="cib-bootstrap-options" \
-        dc-version="1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7" \
+        dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
        cluster-infrastructure="openais" \
        expected-quorum-votes=”2” \
        stonith-enabled="false" \
        no-quorum-policy="ignore"
rsc_defaults $id="rsc-options" \
        resource-stickiness=”100”
</screen>
<para>
Once we’re happy with the changes, we can tell the cluster to start using them and use crm_mon to check everything is functioning.
</para>
<screen>
crm(drbd)# <userinput>cib commit drbd</userinput>
INFO: commited 'drbd' shadow CIB to the cluster
crm(drbd)# <userinput>quit</userinput>
bye
[root@pcmk-1 ~]# <userinput>crm_mon</userinput>
============
Last updated: Tue Sep  1 09:37:13 2009
Stack: openais
Current DC: pcmk-1 - partition with quorum
-Version: 1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7
+Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
2 Nodes configured, 2 expected votes
3 Resources configured.
============
Online: [ pcmk-1 pcmk-2 ]
ClusterIP        (ocf::heartbeat:IPaddr):        Started pcmk-1
WebSite (ocf::heartbeat:apache):        Started pcmk-1
<emphasis>Master/Slave Set: WebDataClone</emphasis>
<emphasis> Masters: [ pcmk-2 ]</emphasis>
<emphasis> Slaves: [ pcmk-1 ]</emphasis>
</screen>
<note>
<para>
Include details on adding a second DRBD resource
</para>
</note>
<para>
Now that DRBD is functioning we can configure a Filesystem resource to use it.
In addition to the filesystem’s definition, we also need to tell the cluster where it can be located (only on the DRBD Primary) and when it is allowed to start (after the Primary was promoted).
</para>
<para>
Once again we’ll use the shell’s interactive mode
</para>
<screen>
[root@pcmk-1 ~]# <userinput>crm</userinput>
crm(live)# <userinput>cib new fs</userinput>
INFO: fs shadow CIB created
crm(fs)# <userinput>configure primitive WebFS ocf:heartbeat:Filesystem \</userinput>
<userinput>        params device="/dev/drbd/by-res/wwwdata" directory="/var/www/html" fstype="ext4"</userinput>
crm(fs)# <userinput>configure colocation fs_on_drbd inf: WebFS WebDataClone:Master</userinput>
crm(fs)# <userinput>configure order WebFS-after-WebData inf: WebDataClone:promote WebFS:start</userinput>
</screen>
<para>
We also need to tell the cluster that Apache needs to run on the same machine as the filesystem and that it must be active before Apache can start.
</para>
<screen>
crm(fs)# <userinput>configure colocation WebSite-with-WebFS inf: WebSite WebFS</userinput>
crm(fs)# <userinput>configure order WebSite-after-WebFS inf: WebFS WebSite</userinput>
</screen>
<para>
Time to review the updated configuration:
</para>
<screen>
[root@pcmk-1 ~]# <userinput>crm configure show</userinput>
node pcmk-1
node pcmk-2
primitive WebData ocf:linbit:drbd \
        params drbd_resource="wwwdata" \
        op monitor interval="60s"
primitive WebFS ocf:heartbeat:Filesystem \
        params device="/dev/drbd/by-res/wwwdata" directory="/var/www/html" fstype="ext4"
primitive WebSite ocf:heartbeat:apache \
        params configfile="/etc/httpd/conf/httpd.conf" \
        op monitor interval="1min"
primitive ClusterIP ocf:heartbeat:IPaddr2 \
        params ip="192.168.122.101" cidr_netmask="32" \
        op monitor interval="30s"
ms WebDataClone WebData \
        meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true"
location prefer-pcmk-1 WebSite 50: pcmk-1
colocation WebSite-with-WebFS inf: WebSite WebFS
colocation fs_on_drbd inf: WebFS WebDataClone:Master
colocation website-with-ip inf: WebSite ClusterIP
order WebFS-after-WebData inf: WebDataClone:promote WebFS:start
order WebSite-after-WebFS inf: WebFS WebSite
order apache-after-ip inf: ClusterIP WebSite
property $id="cib-bootstrap-options" \
-        dc-version="1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7" \
+        dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
        cluster-infrastructure="openais" \
        expected-quorum-votes=”2” \
        stonith-enabled="false" \
        no-quorum-policy="ignore"
rsc_defaults $id="rsc-options" \
        resource-stickiness=”100”
</screen>
<para>
After reviewing the new configuration, we again upload it and watch the cluster put it into effect.
</para>
<screen>
crm(fs)# <userinput>cib commit fs</userinput>
INFO: commited 'fs' shadow CIB to the cluster
crm(fs)# <userinput>quit</userinput>
bye
[root@pcmk-1 ~]# <userinput>crm_mon</userinput>
============
Last updated: Tue Sep  1 10:08:44 2009
Stack: openais
Current DC: pcmk-1 - partition with quorum
-Version: 1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7
+Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
2 Nodes configured, 2 expected votes
4 Resources configured.
============
Online: [ pcmk-1 pcmk-2 ]
ClusterIP        (ocf::heartbeat:IPaddr):        Started pcmk-1
<emphasis>WebSite (ocf::heartbeat:apache): Started pcmk-1</emphasis>
Master/Slave Set: WebDataClone
        Masters: [ pcmk-1 ]
        Slaves: [ pcmk-2 ]
<emphasis>WebFS (ocf::heartbeat:Filesystem): Started pcmk-1</emphasis>
</screen>
<section>
<title>Testing Migration</title>
<para>
We could shut down the active node again, but another way to safely simulate recovery is to put the node into what is called “standby mode”.
Nodes in this state tell the cluster that they are not allowed to run resources.
Any resources found active there will be moved elsewhere.
This feature can be particularly useful when updating the resources’ packages.
</para>
<para>
Put the local node into standby mode and observe the cluster move all the resources to the other node.
Note also that the node’s status will change to indicate that it can no longer host resources.
</para>
<screen>
[root@pcmk-1 ~]# <userinput>crm node standby</userinput>
[root@pcmk-1 ~]# <userinput>crm_mon</userinput>
============
Last updated: Tue Sep  1 10:09:57 2009
Stack: openais
Current DC: pcmk-1 - partition with quorum
-Version: 1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7
+Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
2 Nodes configured, 2 expected votes
4 Resources configured.
============
<emphasis>Node pcmk-1: standby</emphasis>
Online: [ pcmk-2 ]
ClusterIP        (ocf::heartbeat:IPaddr):        <emphasis>Started pcmk-2</emphasis>
WebSite (ocf::heartbeat:apache):        <emphasis>Started pcmk-2</emphasis>
Master/Slave Set: WebDataClone
        <emphasis>Masters: [ pcmk-2 ]</emphasis>
        Stopped: [ WebData:1 ]
WebFS   (ocf::heartbeat:Filesystem):    <emphasis>Started pcmk-2</emphasis>
</screen>
<para>
Once we’ve done everything we needed to on pcmk-1 (in this case nothing, we just wanted to see the resources move), we can allow the node to be a full cluster member again.
</para>
<screen>
[root@pcmk-1 ~]# <userinput>crm node online</userinput>
[root@pcmk-1 ~]# <userinput>crm_mon</userinput>
============
Last updated: Tue Sep  1 10:13:25 2009
Stack: openais
Current DC: pcmk-1 - partition with quorum
-Version: 1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7
+Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
2 Nodes configured, 2 expected votes
4 Resources configured.
============
<emphasis>Online: [ pcmk-1 pcmk-2 ]</emphasis>
ClusterIP        (ocf::heartbeat:IPaddr):        Started pcmk-2
WebSite (ocf::heartbeat:apache):        Started pcmk-2
Master/Slave Set: WebDataClone
        Masters: [ pcmk-2 ]
        Slaves: [ pcmk-1 ]
WebFS   (ocf::heartbeat:Filesystem):    Started pcmk-2
</screen>
<para>
Notice that our resource stickiness settings prevent the services from migrating back to pcmk-1.
</para>
</section>
</section>
</chapter>
diff --git a/doc/Clusters_from_Scratch/en-US/Ch-Stonith.xml b/doc/Clusters_from_Scratch/en-US/Ch-Stonith.xml
index 3aaf5cdbcd..5b88f4c69a 100644
--- a/doc/Clusters_from_Scratch/en-US/Ch-Stonith.xml
+++ b/doc/Clusters_from_Scratch/en-US/Ch-Stonith.xml
@@ -1,161 +1,161 @@
<?xml version='1.0' encoding='utf-8' ?>
<!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
<!ENTITY % BOOK_ENTITIES SYSTEM "Clusters_from_Scratch.ent">
%BOOK_ENTITIES;
]>
-<chapter>
+<chapter id="ch-stonith">
<title>Configure STONITH</title>
<section>
<title>Why You Need STONITH</title>
<para>
STONITH is an acronym for Shoot-The-Other-Node-In-The-Head and it protects your data from being corrupted by rouge nodes or concurrent access.
</para>
<para>
Just because a node is unresponsive, this doesn’t mean it isn’t accessing your data. The only way to be 100% sure that your data is safe, is to use STONITH so we can be certain that the node is truly offline, before allowing the data to be accessed from another node.
</para>
<para>
STONITH also has a role to play in the event that a clustered service cannot be stopped. In this case, the cluster uses STONITH to force the whole node offline, thereby making it safe to start the service elsewhere.
</para>
</section>
<section>
<title>What STONITH Device Should You Use</title>
<para>
It is crucial that the STONITH device can allow the cluster to differentiate between a node failure and a network one.
</para>
<para>
The biggest mistake people make in choosing a STONITH device is to use remote power switch (such as many onboard IMPI controllers) that shares power with the node it controls. In such cases, the cluster cannot be sure if the node is really offline, or active and suffering from a network fault.
</para>
<para>
Likewise, any device that relies on the machine being active (such as SSH-based “devices” used during testing) are inappropriate.
</para>
</section>
<section>
<title>Configuring STONITH</title>
<orderedlist>
<listitem>
<para>
Find the correct driver: stonith -L
</para>
</listitem>
<listitem>
<para>
Since every device is different, the parameters needed to configure it will vary. To find out the parameters required by the device: stonith -t {type} -n
</para>
</listitem>
</orderedlist>
<para>
Hopefully the developers chose names that make sense, if not you can query for some additional information by finding an active cluster node and running:
</para>
<screen>lrmadmin -M stonith {type} pacemaker
</screen>
<para>
The output should be XML formatted text containing additional parameter descriptions
</para>
<orderedlist>
<listitem>
<para>
Create a file called stonith.xml containing a primitive resource with a class of stonith, a type of {type} and a parameter for each of the values returned in step 2
</para>
</listitem>
<listitem>
<para>
Create a clone from the primitive resource if the device can shoot more than one node<emphasis> and supports multiple simultaneous connections</emphasis>.
</para>
</listitem>
<listitem>
<para>
Upload it into the CIB using cibadmin: cibadmin -C -o resources --xml-file stonith.xml
</para>
</listitem>
</orderedlist>
<section>
<title>Example</title>
<para>
Assuming we have an IBM BladeCenter containing our two nodes and the management interface is active on 192.168.122.31, then we would chose the external/ibmrsa driver in step 2 and obtain the following list of parameters
</para>
<screen>
stonith -t external/ibmrsa -n
[root@pcmk-1 ~]# <userinput>stonith -t external/ibmrsa -n</userinput>
hostname  ipaddr  userid  passwd  type
</screen>
<para>
Assuming we know the username and password for the management interface, we would create a STONITH resource with the shell
</para>
<screen>
[root@pcmk-1 ~]# <userinput>crm </userinput>
crm(live)# <userinput>cib new stonith</userinput>
INFO: stonith shadow CIB created
crm(stonith)# <userinput>configure primitive rsa-fencing stonith::external/ibmrsa \</userinput>
<userinput>        params hostname=”pcmk-1 pcmk-2" ipaddr=192.168.122.31 userid=mgmt passwd=abc123 type=ibm \</userinput>
<userinput>        op monitor interval="60s"</userinput>
crm(stonith)# <userinput>configure clone Fencing rsa-fencing</userinput>
</screen>
<para>
And finally, since we disabled it earlier, we need to re-enable STONITH
</para>
<screen>
crm(stonith)# <userinput>configure property stonith-enabled="true"</userinput>
crm(stonith)# <userinput>configure show</userinput>
node pcmk-1
node pcmk-2
primitive WebData ocf:linbit:drbd \
        params drbd_resource="wwwdata" \
        op monitor interval="60s"
primitive WebFS ocf:heartbeat:Filesystem \
        params device="/dev/drbd/by-res/wwwdata" directory="/var/www/html" fstype=”gfs2”
primitive WebSite ocf:heartbeat:apache \
        params configfile="/etc/httpd/conf/httpd.conf" \
        op monitor interval="1min"
primitive ClusterIP ocf:heartbeat:IPaddr2 \
        params ip=”192.168.122.101” cidr_netmask=”32” clusterip_hash=”sourceip” \
        op monitor interval="30s"
primitive dlm ocf:pacemaker:controld \
        op monitor interval="120s"
primitive gfs-control ocf:pacemaker:controld \
   params daemon=”gfs_controld.pcmk” args=”-g 0” \
        op monitor interval="120s"
<emphasis>primitive rsa-fencing stonith::external/ibmrsa \</emphasis>
<emphasis> params hostname=”pcmk-1 pcmk-2" ipaddr=192.168.122.31 userid=mgmt passwd=abc123 type=ibm \</emphasis>
<emphasis> op monitor interval="60s"</emphasis>
ms WebDataClone WebData \
        meta master-max="2" master-node-max="1" clone-max="2" clone-node-max="1" notify="true"
<emphasis>clone Fencing rsa-fencing </emphasis>
clone WebFSClone WebFS
clone WebIP ClusterIP  \
        meta globally-unique=”true” clone-max=”2” clone-node-max=”2”
clone WebSiteClone WebSite
clone dlm-clone dlm \
        meta interleave="true"
clone gfs-clone gfs-control \
        meta interleave="true"
colocation WebFS-with-gfs-control inf: WebFSClone gfs-clone
colocation WebSite-with-WebFS inf: WebSiteClone WebFSClone
colocation fs_on_drbd inf: WebFSClone WebDataClone:Master
colocation gfs-with-dlm inf: gfs-clone dlm-clone
colocation website-with-ip inf: WebSiteClone WebIP
order WebFS-after-WebData inf: WebDataClone:promote WebFSClone:start
order WebSite-after-WebFS inf: WebFSClone WebSiteClone
order apache-after-ip inf: WebIP WebSiteClone
order start-WebFS-after-gfs-control inf: gfs-clone WebFSClone
order start-gfs-after-dlm inf: dlm-clone gfs-clone
property $id="cib-bootstrap-options" \
-        dc-version="1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7" \
+        dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
        cluster-infrastructure="openais" \
        expected-quorum-votes=”2” \
        <emphasis>stonith-enabled="true"</emphasis> \
        no-quorum-policy="ignore"
rsc_defaults $id="rsc-options" \
        resource-stickiness=”100”
</screen>
</section>
</section>
</chapter>
diff --git a/doc/Clusters_from_Scratch/en-US/Ch-Tools.xml b/doc/Clusters_from_Scratch/en-US/Ch-Tools.xml
index cef8355d03..23a67cfd1b 100644
--- a/doc/Clusters_from_Scratch/en-US/Ch-Tools.xml
+++ b/doc/Clusters_from_Scratch/en-US/Ch-Tools.xml
@@ -1,122 +1,121 @@
<?xml version='1.0' encoding='utf-8' ?>
<!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
<!ENTITY % BOOK_ENTITIES SYSTEM "Clusters_from_Scratch.ent">
%BOOK_ENTITIES;
]>
<chapter>
<title>Using Pacemaker Tools</title>
<para>
In the dark past, configuring Pacemaker required the administrator to read and write XML. In true UNIX style, there were also a number of different commands that specialized in different aspects of querying and updating the cluster.
</para>
<para>
Since Pacemaker 1.0, this has all changed and we have an integrated, scriptable, cluster shell that hides all the messy XML scaffolding. It even allows you to queue up several changes at once and commit them atomically.
</para>
<para>
Take some time to familiarize yourself with what it can do.
</para>
<screen>
[root@pcmk-1 ~]# <userinput>crm --help</userinput>
usage:
    crm [-D display_type]
    crm [-D display_type] args
    crm [-D display_type] [-f file]
    Use crm without arguments for an interactive session.
    Supply one or more arguments for a "single-shot" use.
    Specify with -f a file which contains a script. Use '-' for
    standard input or use pipe/redirection.
    crm displays cli format configurations using a color scheme
    and/or in uppercase. Pick one of "color" or "uppercase", or
    use "-D color,uppercase" if you want colorful uppercase.
    Get plain output by "-D plain". The default may be set in
    user preferences (options).
Examples:
    # crm -f stopapp2.cli
    # crm &lt; stopapp2.cli
    # crm resource stop global_www
    # crm status
</screen>
<para>
The primary tool for monitoring the status of the cluster is crm_mon (also available as crm status). It can be run in a variety of modes and has a number of output options. To find out about any of the tools that come with Pacemaker, simply invoke them with the <command>--help</command> option or consult the included man pages. Both sets of output are created from the tool, and so will always be in sync with each other and the tool itself.
</para>
<para>
Additionally, the Pacemaker version and supported cluster stack(s) is available via the <command>--version</command> option.
</para>
<screen>
[root@pcmk-1 ~]# <userinput>crm_mon --version</userinput>
-crm_mon 1.0.5 for OpenAIS and Heartbeat (Build: 462f1569a43740667daf7b0f6b521742e9eb8fa7)
-
+Pacemaker 1.1.5
Written by Andrew Beekhof
[root@pcmk-1 ~]# <userinput>crm_mon --help</userinput>
crm_mon - Provides a summary of cluster's current state.
Outputs varying levels of detail in a number of different formats.
Usage: crm_mon mode [options]
Options:
 -?, --help                 This text
 -$, --version             Version information
 -V, --verbose             Increase debug output
Modes:
 -h, --as-html=value        Write cluster status to the named file
 -w, --web-cgi             Web mode with output suitable for cgi
 -s, --simple-status       Display the cluster status once as a simple one line output (suitable for nagios)
 -S, --snmp-traps=value    Send SNMP traps to this station
 -T, --mail-to=value        Send Mail alerts to this user.  See also --mail-from, --mail-host, --mail-prefix
Display Options:
 -n, --group-by-node       Group resources by node
 -r, --inactive             Display inactive resources
 -f, --failcounts           Display resource fail counts
 -o, --operations           Display resource operation history
 -t, --timing-details       Display resource operation history with timing details
Additional Options:
 -i, --interval=value           Update frequency in seconds
 -1, --one-shot                 Display the cluster status once on the console and exit
 -N, --disable-ncurses          Disable the use of ncurses
 -d, --daemonize                Run in the background as a daemon
 -p, --pid-file=value           (Advanced) Daemon pid file location
 -F, --mail-from=value          Mail alerts should come from the named user
 -H, --mail-host=value          Mail alerts should be sent via the named host
 -P, --mail-prefix=value        Subjects for mail alerts should start with this string
 -E, --external-agent=value     A program to run when resource operations take place.
 -e, --external-recipient=value A recipient for your program (assuming you want the program to send something to someone).
Examples:
Display the cluster´s status on the console with updates as they occur:
        # crm_mon
Display the cluster´s status on the console just once then exit:
        # crm_mon -1
Display your cluster´s status, group resources by node, and include inactive resources in the list:
        # crm_mon --group-by-node --inactive
Start crm_mon as a background daemon and have it write the cluster´s status to an HTML file:
        # crm_mon --daemonize --as-html /path/to/docroot/filename.html
Start crm_mon as a background daemon and have it send email alerts:
        # crm_mon --daemonize --mail-to user@example.com --mail-host mail.example.com
Start crm_mon as a background daemon and have it send SNMP alerts:
        # crm_mon --daemonize --snmp-traps snmptrapd.example.com
Report bugs to pacemaker@oss.clusterlabs.org
</screen>
<note>
<para>
If the SNMP and/or email options are not listed, then Pacemaker was not built to support them. This may be by the choice of your distribution or the required libraries may not have been available. Please contact whoever supplied you with the packages for more details.
</para>
</note>
</chapter>
diff --git a/doc/Clusters_from_Scratch/en-US/Ch-Verification.xml b/doc/Clusters_from_Scratch/en-US/Ch-Verification.xml
index b3b4a3c1f4..b8149eb5e0 100644
--- a/doc/Clusters_from_Scratch/en-US/Ch-Verification.xml
+++ b/doc/Clusters_from_Scratch/en-US/Ch-Verification.xml
@@ -1,140 +1,140 @@
<?xml version='1.0' encoding='utf-8' ?>
<!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
<!ENTITY % BOOK_ENTITIES SYSTEM "Clusters_from_Scratch.ent">
%BOOK_ENTITIES;
]>
<chapter>
<title>Verify Cluster Installation</title>
<section>
<title>Verify Corosync Installation</title>
<para>
Start Corosync on the first node
</para>
<screen>
[root@pcmk-1 ~]# <userinput>/etc/init.d/corosync start</userinput>
<emphasis>Starting Corosync Cluster Engine (corosync): [ OK ]</emphasis>
</screen>
<para>
Check the cluster started correctly and that an initial membership was able to form
</para>
<screen>
[root@pcmk-1 ~]# <userinput>grep -e "corosync.*network interface" -e "Corosync Cluster Engine" -e "Successfully read main configuration file" /var/log/messages</userinput>
Aug 27 09:05:34 pcmk-1 corosync[1540]: [MAIN  ] Corosync Cluster Engine ('1.1.0'): started and ready to provide service.
Aug 27 09:05:34 pcmk-1 corosync[1540]: [MAIN  ] Successfully read main configuration file '/etc/corosync/corosync.conf'.
[root@pcmk-1 ~]# <userinput>grep TOTEM /var/log/messages</userinput>
Aug 27 09:05:34 pcmk-1 corosync[1540]: [TOTEM ] Initializing transport (UDP/IP).
Aug 27 09:05:34 pcmk-1 corosync[1540]: [TOTEM ] Initializing transmit/receive security: libtomcrypt SOBER128/SHA1HMAC (mode 0).
Aug 27 09:05:35 pcmk-1 corosync[1540]: [TOTEM ] <emphasis>The network interface [192.168.122.101] is now up.</emphasis>
Aug 27 09:05:35 pcmk-1 corosync[1540]: [TOTEM ] <emphasis>A processor joined or left the membership and a new membership was formed.</emphasis>
</screen>
<para>
With one node functional, its now safe to start Corosync on the second node as well.
</para>
<screen>
[root@pcmk-1 ~]# <userinput>ssh pcmk-2 -- /etc/init.d/corosync start</userinput>
<emphasis>Starting Corosync Cluster Engine (corosync): [ OK ]</emphasis>
[root@pcmk-1 ~]#
</screen>
<para>
Check the cluster formed correctly
</para>
<screen>
[root@pcmk-1 ~]# <userinput>grep TOTEM /var/log/messages</userinput>
Aug 27 09:05:34 pcmk-1 corosync[1540]: [TOTEM ] Initializing transport (UDP/IP).
Aug 27 09:05:34 pcmk-1 corosync[1540]: [TOTEM ] Initializing transmit/receive security: libtomcrypt SOBER128/SHA1HMAC (mode 0).
Aug 27 09:05:35 pcmk-1 corosync[1540]: [TOTEM ] <emphasis>The network interface [192.168.122.101] is now up.</emphasis>
Aug 27 09:05:35 pcmk-1 corosync[1540]: [TOTEM ] <emphasis>A processor joined or left the membership and a new membership was formed.</emphasis>
Aug 27 09:12:11 pcmk-1 corosync[1540]: [TOTEM ] <emphasis>A processor joined or left the membership and a new membership was formed</emphasis>.
</screen>
</section>
<section>
<title>Verify Pacemaker Installation</title>
<para>
Now that we have confirmed that Corosync is functional we can check the rest of the stack.
</para>
<screen>
[root@pcmk-1 ~]# <userinput>grep pcmk_startup /var/log/messages</userinput>
Aug 27 09:05:35 pcmk-1 corosync[1540]:   [pcmk  ] info: pcmk_startup: <emphasis>CRM: Initialized</emphasis>
Aug 27 09:05:35 pcmk-1 corosync[1540]:   [pcmk  ] Logging: Initialized pcmk_startup
Aug 27 09:05:35 pcmk-1 corosync[1540]:   [pcmk  ] info: pcmk_startup: Maximum core file size is: 18446744073709551615
Aug 27 09:05:35 pcmk-1 corosync[1540]:   [pcmk  ] info: pcmk_startup: <emphasis>Service: 9</emphasis>
Aug 27 09:05:35 pcmk-1 corosync[1540]:   [pcmk  ] info: pcmk_startup: <emphasis>Local hostname: pcmk-1</emphasis>
</screen>
<para>
- Now try starting Pacemaker and the necessary processes have been started
+ Now try starting Pacemaker and check the necessary processes have been started
</para>
<screen>
[root@pcmk-1 ~]# <userinput>/etc/init.d/pacemaker start</userinput>
<emphasis>Starting Pacemaker Cluster Manager: [ OK ]</emphasis>
</screen>
<screen>
[root@pcmk-1 ~]# <userinput>grep -e pacemakerd.*get_config_opt -e pacemakerd.*start_child -e "Starting Pacemaker" /var/log/messages</userinput>
Feb 8 13:31:24 pcmk-1 pacemakerd: [13155]: info: get_config_opt: Found 'pacemaker' for option: name
Feb 8 13:31:24 pcmk-1 pacemakerd: [13155]: info: get_config_opt: <emphasis>Found '1' for option: ver</emphasis>
Feb 8 13:31:24 pcmk-1 pacemakerd: [13155]: info: get_config_opt: Defaulting to 'no' for option: use_logd
Feb 8 13:31:24 pcmk-1 pacemakerd: [13155]: info: get_config_opt: Defaulting to 'no' for option: use_mgmtd
Feb 8 13:31:24 pcmk-1 pacemakerd: [13155]: info: get_config_opt: Found 'on' for option: debug
Feb 8 13:31:24 pcmk-1 pacemakerd: [13155]: info: get_config_opt: Found 'yes' for option: to_logfile
Feb 8 13:31:24 pcmk-1 pacemakerd: [13155]: info: get_config_opt: Found '/var/log/corosync.log' for option: logfile
Feb 8 13:31:24 pcmk-1 pacemakerd: [13155]: info: get_config_opt: Found 'yes' for option: to_syslog
Feb 8 13:31:24 pcmk-1 pacemakerd: [13155]: info: get_config_opt: Found 'daemon' for option: syslog_facility
Feb 8 16:50:38 pcmk-1 pacemakerd: [13990]: info: main: <emphasis>Starting Pacemaker 1.1.5 (Build: 31f088949239+)</emphasis>: docbook-manpages publican ncurses trace-logging cman cs-quorum heartbeat corosync snmp libesmtp
Feb 8 16:50:38 pcmk-1 pacemakerd: [13990]: info: start_child: Forked child 14022 <emphasis>for process stonith-ng</emphasis>
Feb 8 16:50:38 pcmk-1 pacemakerd: [13990]: info: start_child: Forked child 14023 <emphasis>for process cib</emphasis>
Feb 8 16:50:38 pcmk-1 pacemakerd: [13990]: info: start_child: Forked child 14024 <emphasis>for process lrmd</emphasis>
Feb 8 16:50:38 pcmk-1 pacemakerd: [13990]: info: start_child: Forked child 14025 <emphasis>for process attrd</emphasis>
Feb 8 16:50:38 pcmk-1 pacemakerd: [13990]: info: start_child: Forked child 14026 <emphasis>for process pengine</emphasis>
Feb 8 16:50:38 pcmk-1 pacemakerd: [13990]: info: start_child: Forked child 14027 <emphasis>for process crmd</emphasis>
</screen>
<screen>
[root@pcmk-1 ~]# <userinput>ps axf</userinput>
  PID TTY      STAT   TIME COMMAND
    2 ?        S&lt;     0:00 [kthreadd]
    3 ?        S&lt;     0:00  \_ [migration/0]
... lots of processes ...
 13990 ?    S      0:01 <emphasis>pacemakerd</emphasis>
 14022 ?    Sa      0:00  <emphasis>\_ </emphasis>/usr/lib64/heartbeat/<emphasis>stonithd</emphasis>
 14023 ?    Sa      0:00  <emphasis>\_ </emphasis>/usr/lib64/heartbeat/<emphasis>cib</emphasis>
 14024 ?    Sa      0:00  <emphasis>\_ </emphasis>/usr/lib64/heartbeat/<emphasis>lrmd</emphasis>
 14025 ?    Sa      0:00  <emphasis>\_ </emphasis>/usr/lib64/heartbeat/<emphasis>attrd</emphasis>
 14026 ?    Sa      0:00  <emphasis>\_ </emphasis>/usr/lib64/heartbeat/<emphasis>pengine</emphasis>
 14027 ?    Sa      0:00  <emphasis>\_ </emphasis>/usr/lib64/heartbeat/<emphasis>crmd</emphasis>
</screen>
<para>
Next, check for any ERRORs during startup - there shouldn’t be any.
</para>
<screen>
[root@pcmk-1 ~]# <userinput>grep ERROR: /var/log/messages | grep -v unpack_resources</userinput>
[root@pcmk-1 ~]#
</screen>
<para>
Repeat on the other node and display the cluster's status.
</para>
<screen>
[root@pcmk-1 ~]# <userinput>ssh pcmk-2 -- /etc/init.d/pacemaker start</userinput>
<emphasis>Starting Pacemaker Cluster Manager: [ OK ]</emphasis>
[root@pcmk-1 ~]# <userinput>crm_mon</userinput>
============
Last updated: Thu Aug 27 16:54:55 2009
<emphasis>Stack: openais</emphasis>
Current DC: pcmk-1 - <emphasis>partition with quorum</emphasis>
-Version: 1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7
+Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
<emphasis>2 Nodes configured, 2 expected votes</emphasis>
<emphasis>0 Resources configured</emphasis>.
============
<emphasis>Online: [ pcmk-1 pcmk-2 ]</emphasis>
</screen>
</section>
</chapter>

File Metadata

Mime Type
text/x-diff
Expires
Mon, Apr 21, 11:11 AM (1 d, 2 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1664815
Default Alt Text
(97 KB)

Event Timeline