diff --git a/doc/Clusters_from_Scratch/en-US/Ap-Configuration.xml b/doc/Clusters_from_Scratch/en-US/Ap-Configuration.xml
index 0add20ae3e..268a065256 100644
--- a/doc/Clusters_from_Scratch/en-US/Ap-Configuration.xml
+++ b/doc/Clusters_from_Scratch/en-US/Ap-Configuration.xml
@@ -1,274 +1,274 @@
%BOOK_ENTITIES;
]>
Configuration Recap
Final Cluster Configuration
[root@pcmk-1 ~]# crm configure show
node pcmk-1
node pcmk-2
primitive WebData ocf:linbit:drbd \
params drbd_resource="wwwdata" \
op monitor interval="60s"
primitive WebFS ocf:heartbeat:Filesystem \
params device="/dev/drbd/by-res/wwwdata" directory="/var/www/html" fstype=”gfs2”
primitive WebSite ocf:heartbeat:apache \
params configfile="/etc/httpd/conf/httpd.conf" \
op monitor interval="1min"
primitive ClusterIP ocf:heartbeat:IPaddr2 \
params ip=”192.168.122.101” cidr_netmask=”32” clusterip_hash=”sourceip” \
op monitor interval="30s"
primitive dlm ocf:pacemaker:controld \
op monitor interval="120s"
primitive gfs-control ocf:pacemaker:controld \
params daemon=”gfs_controld.pcmk” args=”-g 0” \
op monitor interval="120s"
primitive rsa-fencing stonith::external/ibmrsa \
params hostname=”pcmk-1 pcmk-2" ipaddr=192.168.122.31 userid=mgmt passwd=abc123 type=ibm \
op monitor interval="60s"
ms WebDataClone WebData \
meta master-max="2" master-node-max="1" clone-max="2" clone-node-max="1" notify="true"
clone Fencing rsa-fencing
clone WebFSClone WebFS
clone WebIP ClusterIP \
meta globally-unique=”true” clone-max=”2” clone-node-max=”2”
clone WebSiteClone WebSite
clone dlm-clone dlm \
meta interleave="true"
clone gfs-clone gfs-control \
meta interleave="true"
colocation WebFS-with-gfs-control inf: WebFSClone gfs-clone
colocation WebSite-with-WebFS inf: WebSiteClone WebFSClone
colocation fs_on_drbd inf: WebFSClone WebDataClone:Master
colocation gfs-with-dlm inf: gfs-clone dlm-clone
colocation website-with-ip inf: WebSiteClone WebIP
order WebFS-after-WebData inf: WebDataClone:promote WebFSClone:start
order WebSite-after-WebFS inf: WebFSClone WebSiteClone
order apache-after-ip inf: WebIP WebSiteClone
order start-WebFS-after-gfs-control inf: gfs-clone WebFSClone
order start-gfs-after-dlm inf: dlm-clone gfs-clone
property $id="cib-bootstrap-options" \
- dc-version="1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7" \
+ dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
cluster-infrastructure="openais" \
expected-quorum-votes=”2” \
stonith-enabled=”true” \
no-quorum-policy="ignore"
rsc_defaults $id="rsc-options" \
resource-stickiness=”100”
Node List
The list of cluster nodes is automatically populated by the cluster.
node pcmk-1
node pcmk-2
Cluster Options
This is where the cluster automatically stores some information about the cluster
dc-version - the version (including upstream source-code hash) of Pacemaker used on the DC
cluster-infrastructure - the cluster infrastructure being used (heartbeat or openais)
expected-quorum-votes - the maximum number of nodes expected to be part of the cluster
and where the admin can set options that control the way the cluster operates
stonith-enabled=true - Make use of STONITH
no-quorum-policy=ignore - Ignore loss of quorum and continue to host resources.
property $id="cib-bootstrap-options" \
- dc-version="1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7" \
+ dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
cluster-infrastructure="openais" \
expected-quorum-votes=”2” \
stonith-enabled=”true” \
no-quorum-policy="ignore"
Resources
Default Options
Here we configure cluster options that apply to every resource.
resource-stickiness - Specify the aversion to moving resources to other machines
rsc_defaults $id="rsc-options" \
resource-stickiness=”100”
Fencing
TODO: Add text here
primitive rsa-fencing stonith::external/ibmrsa \
params hostname=”pcmk-1 pcmk-2" ipaddr=192.168.122.31 userid=mgmt passwd=abc123 type=ibm \
op monitor interval="60s"
clone Fencing rsa-fencing
Service Address
Users of the services provided by the cluster require an unchanging address with which to access it. Additionally, we cloned the address so it will be active on both nodes. An iptables rule (created as part of the resource agent) is used to ensure that each request only processed by one of the two clone instances. The additional meta options tell the cluster that we want two instances of the clone (one “request bucket” for each node) and that if one node fails, then the remaining node should hold both.
primitive ClusterIP ocf:heartbeat:IPaddr2 \
params ip=”192.168.122.101” cidr_netmask=”32” clusterip_hash=”sourceip” \
op monitor interval="30s"
clone WebIP ClusterIP
meta globally-unique=”true” clone-max=”2” clone-node-max=”2”
TODO: The RA should check for globally-unique=true when cloned
Distributed lock manager
Cluster filesystems like GFS2 require a lock manager. This service starts the daemon that provides user-space applications (such as the GFS2 daemon) with access to the in-kernel lock manager. Since we need it to be available on all nodes in the cluster, we have it cloned.
primitive dlm ocf:pacemaker:controld \
op monitor interval="120s"
clone dlm-clone dlm \
meta interleave="true
TODO: Confirm interleave is no longer needed
GFS control daemon
GFS2 also needs a user-space/kernel bridge that runs on every node. So here we have another clone, however this time we must also specify that it can only run on machines that are also running the DLM (colocation constraint) and that it can only be started after the DLM is running (order constraint). Additionally, the gfs-control clone should only care about the DLM instances it is paired with, so we need to set the interleave option.
primitive gfs-control ocf:pacemaker:controld \
params daemon=”gfs_controld.pcmk” args=”-g 0” \
op monitor interval="120s"
clone gfs-clone gfs-control \
meta interleave="true"
colocation gfs-with-dlm inf: gfs-clone dlm-clone
order start-gfs-after-dlm inf: dlm-clone gfs-clone
DRBD - Shared Storage
Here we define the DRBD service and specify which DRBD resource (from drbd.conf) it should manage. We make it a master/slave resource and, in order to have an active/active setup, allow both instances to be promoted by specifying master-max=2. We also set the notify option so that the cluster will tell DRBD agent when it’s peer changes state.
primitive WebData ocf:linbit:drbd \
params drbd_resource="wwwdata" \
op monitor interval="60s"
ms WebDataClone WebData \
meta master-max="2" master-node-max="1" clone-max="2" clone-node-max="1" notify="true"
Cluster Filesystem
The cluster filesystem ensures that files are read and written correctly. We need to specify the block device (provided by DRBD), where we want it mounted and that we are using GFS2. Again it is a clone because it is intended to be active on both nodes. The additional constraints ensure that it can only be started on nodes with active gfs-control and drbd instances.
primitive WebFS ocf:heartbeat:Filesystem \
params device="/dev/drbd/by-res/wwwdata" directory="/var/www/html" fstype=”gfs2”
clone WebFSClone WebFS
colocation WebFS-with-gfs-control inf: WebFSClone gfs-clone
colocation fs_on_drbd inf: WebFSClone WebDataClone:Master
order WebFS-after-WebData inf: WebDataClone:promote WebFSClone:start
order start-WebFS-after-gfs-control inf: gfs-clone WebFSClone
Apache
Lastly we have the actual service, Apache. We need only tell the cluster where to find it’s main configuration file and restrict it to running on nodes that have the required filesystem mounted and the IP address active.
primitive WebSite ocf:heartbeat:apache \
params configfile="/etc/httpd/conf/httpd.conf" \
op monitor interval="1min"
clone WebSiteClone WebSite
colocation WebSite-with-WebFS inf: WebSiteClone WebFSClone
colocation website-with-ip inf: WebSiteClone WebIP
order apache-after-ip inf: WebIP WebSiteClone
order WebSite-after-WebFS inf: WebFSClone WebSiteClone
diff --git a/doc/Clusters_from_Scratch/en-US/Ap-Corosync-Conf.xml b/doc/Clusters_from_Scratch/en-US/Ap-Corosync-Conf.xml
index e162b643e3..094b5b2c31 100644
--- a/doc/Clusters_from_Scratch/en-US/Ap-Corosync-Conf.xml
+++ b/doc/Clusters_from_Scratch/en-US/Ap-Corosync-Conf.xml
@@ -1,74 +1,79 @@
%BOOK_ENTITIES;
]>
- Sample Corosync.conf
+ Sample Corosync Configuration
-
+
+ Sample Corosync.conf for a two-node cluster
+
+
+ ]]>
+
+
diff --git a/doc/Clusters_from_Scratch/en-US/Ch-Active-Passive.xml b/doc/Clusters_from_Scratch/en-US/Ch-Active-Passive.xml
index 1a4373ba29..9f508bc07e 100644
--- a/doc/Clusters_from_Scratch/en-US/Ch-Active-Passive.xml
+++ b/doc/Clusters_from_Scratch/en-US/Ch-Active-Passive.xml
@@ -1,401 +1,408 @@
%BOOK_ENTITIES;
]>
Creating an Active/Passive Cluster
Exploring the Existing Configuration
When Pacemaker starts up, it automatically records the number and details of the nodes in the cluster as well as which stack is being used and the version of Pacemaker being used.
This is what the base configuration should look like.
[root@pcmk-2 ~]# crm configure show
node pcmk-1
node pcmk-2
property $id="cib-bootstrap-options" \
- dc-version="1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7" \
+ dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
cluster-infrastructure="openais" \
expected-quorum-votes="2"
For those that are not of afraid of XML, you can see the raw configuration by appending “xml” to the previous command.
[root@pcmk-2 ~]# crm configure show xml
<?xml version="1.0" ?>
<cib admin_epoch="0" crm_feature_set="3.0.1" dc-uuid="pcmk-1" epoch="13" have-quorum="1" num_updates="7" validate-with="pacemaker-1.0">
<configuration>
<crm_config>
<cluster_property_set id="cib-bootstrap-options">
- <nvpair id="cib-bootstrap-options-dc-version" name="dc-version" value="1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7"/>
+ <nvpair id="cib-bootstrap-options-dc-version" name="dc-version" value="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f"/>
<nvpair id="cib-bootstrap-options-cluster-infrastructure" name="cluster-infrastructure" value="openais"/>
<nvpair id="cib-bootstrap-options-expected-quorum-votes" name="expected-quorum-votes" value="2"/>
</cluster_property_set>
</crm_config>
<rsc_defaults/>
<op_defaults/>
<nodes>
<node id="pcmk-1" type="normal" uname="pcmk-1"/>
<node id="pcmk-2" type="normal" uname="pcmk-2"/>
</nodes>
<resources/>
<constraints/>
</configuration>
</cib>
The last XML you’ll see in this document
Before we make any changes, its a good idea to check the validity of the configuration.
[root@pcmk-1 ~]# crm_verify -L
crm_verify[2195]: 2009/08/27_16:57:12 ERROR: unpack_resources: Resource start-up disabled since no STONITH resources have been defined
crm_verify[2195]: 2009/08/27_16:57:12 ERROR: unpack_resources: Either configure some or disable STONITH with the stonith-enabled option
crm_verify[2195]: 2009/08/27_16:57:12 ERROR: unpack_resources: NOTE: Clusters with shared data need STONITH to ensure data integrity
Errors found during check: config not valid
-V may provide more details
[root@pcmk-1 ~]#
As you can see, the tool has found some errors.
In order to guarantee the safety of your data
If the data is corrupt, there is little point in continuing to make it available
, Pacemaker ships with STONITH
A common node fencing mechanism. Used to ensure data integrity by powering off “bad” nodes.
enabled. However it also knows when no STONITH configuration has been supplied and reports this as a problem (since the cluster would not be able to make progress if a situation requiring node fencing arose).
For now, we will disable this feature and configure it later in the Configuring STONITH section. It is important to note that the use of STONITH is highly encouraged, turning it off tells the cluster to simply pretend that failed nodes are safely powered off. Some vendors will even refuse to support clusters that have it disabled.
To disable STONITH, we set the stonith-enabled cluster option to false.
crm configure property stonith-enabled=false
crm_verify -L
With the new cluster option set, the configuration is now valid.
+
+
+ The use of stonith-enabled=false is completely inappropriate for a production cluster.
+ We use it here to defer the discussion of its configuration which can differ widely from one installation to the next.
+ See for information on why STONITH is important and details on how to configure it.
+
+
Adding a Resource
The first thing we should do is configure an IP address. Regardless of where the cluster service(s) are running, we need a consistent address to contact them on. Here I will choose and add 192.168.122.101 as the floating address, give it the imaginative name ClusterIP and tell the cluster to check that its running every 30 seconds.
The chosen address must not be one already associated with a physical node
crm configure primitive ClusterIP ocf:heartbeat:IPaddr2 \
params ip=192.168.122.101 cidr_netmask=32 \
op monitor interval=30s
The other important piece of information here is ocf:heartbeat:IPaddr2. This tells Pacemaker three things about the resource you want to add. The first field, ocf, is the standard to which the resource script conforms to and where to find it. The second field is specific to OCF resources and tells the cluster which namespace to find the resource script in, in this case heartbeat. The last field indicates the name of the resource script.
To obtain a list of the available resource classes, run
[root@pcmk-1 ~]# crm ra classes
heartbeat
lsb
ocf / heartbeat pacemaker
stonith
To then find all the OCF resource agents provided by Pacemaker and Heartbeat, run
[root@pcmk-1 ~]# crm ra list ocf pacemaker
ClusterMon Dummy Stateful SysInfo SystemHealth controld
ping pingd
[root@pcmk-1 ~]# crm ra list ocf heartbeat
AoEtarget AudibleAlarm ClusterMon Delay
Dummy EvmsSCC Evmsd Filesystem
ICP IPaddr IPaddr2 IPsrcaddr
LVM LinuxSCSI MailTo ManageRAID
ManageVE Pure-FTPd Raid1 Route
SAPDatabase SAPInstance SendArp ServeRAID
SphinxSearchDaemon Squid Stateful SysInfo
VIPArip VirtualDomain WAS WAS6
WinPopup Xen Xinetd anything
apache db2 drbd eDir88
iSCSILogicalUnit iSCSITarget ids iscsi
ldirectord mysql mysql-proxy nfsserver
oracle oralsnr pgsql pingd
portblock rsyncd scsi2reservation sfex
tomcat vmware
[root@pcmk-1 ~]#
Now verify that the IP resource has been added and display the cluster’s status to see that it is now active.
[root@pcmk-1 ~]# crm configure show
node pcmk-1
node pcmk-2
primitive ClusterIP ocf:heartbeat:IPaddr2 \
params ip="192.168.122.101" cidr_netmask="32" \
op monitor interval="30s"
property $id="cib-bootstrap-options" \
- dc-version="1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7" \
+ dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
cluster-infrastructure="openais" \
expected-quorum-votes="2" \
stonith-enabled="false" \
[root@pcmk-1 ~]# crm_mon
============
Last updated: Fri Aug 28 15:23:48 2009
Stack: openais
Current DC: pcmk-1 - partition with quorum
-Version: 1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7
+Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
2 Nodes configured, 2 expected votes
1 Resources configured.
============
Online: [ pcmk-1 pcmk-2 ]
ClusterIP (ocf::heartbeat:IPaddr): Started pcmk-1
Perform a Failover
Being a high-availability cluster, we should test failover of our new resource before moving on.
First, find the node on which the IP address is running.
[root@pcmk-1 ~]# crm resource status ClusterIP
resource ClusterIP is running on: pcmk-1
[root@pcmk-1 ~]#
Shut down Pacemaker and Corosync on that machine.
[root@pcmk-1 ~]# ssh pcmk-1 -- /etc/init.d/pacemaker stop
Signaling Pacemaker Cluster Manager to terminate: [ OK ]
Waiting for cluster services to unload:. [ OK ]
[root@pcmk-1 ~]# ssh pcmk-1 -- /etc/init.d/corosync stop
Stopping Corosync Cluster Engine (corosync): [ OK ]
Waiting for services to unload: [ OK ]
[root@pcmk-1 ~]#
Once Corosync is no longer running, go to the other node and check the cluster status with crm_mon.
[root@pcmk-2 ~]# crm_mon
============
Last updated: Fri Aug 28 15:27:35 2009
Stack: openais
Current DC: pcmk-2 - partition WITHOUT quorum
-Version: 1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7
+Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
2 Nodes configured, 2 expected votes
1 Resources configured.
============
Online: [ pcmk-2 ]
OFFLINE: [ pcmk-1 ]
There are three things to notice about the cluster’s current state. The first is that, as expected, pcmk-1 is now offline. However we can also see that ClusterIP isn’t running anywhere!
Quorum and Two-Node Clusters
This is because the cluster no longer has quorum, as can be seen by the text “partition WITHOUT quorum” (emphasised green) in the output above. In order to reduce the possibility of data corruption, Pacemaker’s default behavior is to stop all resources if the cluster does not have quorum.
A cluster is said to have quorum when more than half the known or expected nodes are online, or for the mathematically inclined, whenever the following equation is true:
total_nodes - 1 < 2 * active_nodes
Therefore a two-node cluster only has quorum when both nodes are running, which is no longer the case for our cluster. This would normally make the creation of a two-node cluster pointless
Actually some would argue that two-node clusters are always pointless, but that is an argument for another time.
, however it is possible to control how Pacemaker behaves when quorum is lost. In particular, we can tell the cluster to simply ignore quorum altogether.
[root@pcmk-1 ~]# crm configure property no-quorum-policy=ignore
[root@pcmk-1 ~]# crm configure show
node pcmk-1
node pcmk-2
primitive ClusterIP ocf:heartbeat:IPaddr2 \
params ip="192.168.122.101" cidr_netmask="32" \
op monitor interval="30s"
property $id="cib-bootstrap-options" \
- dc-version="1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7" \
+ dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
cluster-infrastructure="openais" \
expected-quorum-votes="2" \
stonith-enabled="false" \
no-quorum-policy="ignore"
After a few moments, the cluster will start the IP address on the remaining node. Note that the cluster still does not have quorum.
[root@pcmk-2 ~]# crm_mon
============
Last updated: Fri Aug 28 15:30:18 2009
Stack: openais
Current DC: pcmk-2 - partition WITHOUT quorum
-Version: 1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7
+Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
2 Nodes configured, 2 expected votes
1 Resources configured.
============
Online: [ pcmk-2 ]
OFFLINE: [ pcmk-1 ]
ClusterIP (ocf::heartbeat:IPaddr): Started pcmk-2
Now simulate node recovery by restarting the cluster stack on pcmk-1 and check the cluster’s status.
[root@pcmk-1 ~]# /etc/init.d/corosync start
Starting Corosync Cluster Engine (corosync): [ OK ]
[root@pcmk-1 ~]# /etc/init.d/pacemaker start
Starting Pacemaker Cluster Manager: [ OK ]
[root@pcmk-1 ~]# crm_mon
============
Last updated: Fri Aug 28 15:32:13 2009
Stack: openais
Current DC: pcmk-2 - partition with quorum
-Version: 1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7
+Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
2 Nodes configured, 2 expected votes
1 Resources configured.
============
Online: [ pcmk-1 pcmk-2 ]
ClusterIP (ocf::heartbeat:IPaddr): Started pcmk-1
Here we see something that some may consider surprising, the IP is back running at its original location!
Prevent Resources from Moving after Recovery
In some circumstances it is highly desirable to prevent healthy resources from being moved around the cluster. Move resources almost always requires a period of downtime and for complex services like Oracle databases, this period can be quite long.
To address this, Pacemaker has the concept of resource stickiness which controls how much a service prefers to stay running where it is. You may like to think of it as the “cost” of any downtime. By default, Pacemaker assumes there is zero cost associated with moving resources and will do so to achieve “optimal
It should be noted that Pacemaker’s definition of optimal may not always agree with that of a human’s. The order in which Pacemaker processes lists of resources and nodes create implicit preferences (required in order to create a stabile solution) in situations where the administrator had not explicitly specified some.
” resource placement. We can specify a different stickiness for every resource, but it is often sufficient to change the default.
crm configure rsc_defaults resource-stickiness=100
[root@pcmk-2 ~]# crm configure show
node pcmk-1
node pcmk-2
primitive ClusterIP ocf:heartbeat:IPaddr2 \
params ip="192.168.122.101" cidr_netmask="32" \
op monitor interval="30s"
property $id="cib-bootstrap-options" \
- dc-version="1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7" \
+ dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
cluster-infrastructure="openais" \
expected-quorum-votes="2" \
stonith-enabled="false" \
no-quorum-policy="ignore"
rsc_defaults $id="rsc-options" \
resource-stickiness="100"
If we now retry the failover test, we see that as expected ClusterIP still moves to pcmk-2 when pcmk-1 is taken offline.
[root@pcmk-1 ~]# ssh pcmk-1 -- /etc/init.d/pacemaker stop
Signaling Pacemaker Cluster Manager to terminate: [ OK ]
Waiting for cluster services to unload:. [ OK ]
[root@pcmk-1 ~]# ssh pcmk-1 -- /etc/init.d/corosync stop
Stopping Corosync Cluster Engine (corosync): [ OK ]
Waiting for services to unload: [ OK ]
[root@pcmk-1 ~]# ssh pcmk-2 -- crm_mon -1
============
Last updated: Fri Aug 28 15:39:38 2009
Stack: openais
Current DC: pcmk-2 - partition WITHOUT quorum
-Version: 1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7
+Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
2 Nodes configured, 2 expected votes
1 Resources configured.
============
Online: [ pcmk-2 ]
OFFLINE: [ pcmk-1 ]
ClusterIP (ocf::heartbeat:IPaddr): Started pcmk-2
However when we bring pcmk-1 back online, ClusterIP now remains running on pcmk-2.
[root@pcmk-1 ~]# /etc/init.d/corosync start
Starting Corosync Cluster Engine (corosync): [ OK ]
[root@pcmk-1 ~]# /etc/init.d/pacemaker start
Starting Pacemaker Cluster Manager: [ OK ]
[root@pcmk-1 ~]# crm_mon
============
Last updated: Fri Aug 28 15:41:23 2009
Stack: openais
Current DC: pcmk-2 - partition with quorum
-Version: 1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7
+Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
2 Nodes configured, 2 expected votes
1 Resources configured.
============
Online: [ pcmk-1 pcmk-2 ]
ClusterIP (ocf::heartbeat:IPaddr): Started pcmk-2
diff --git a/doc/Clusters_from_Scratch/en-US/Ch-Apache.xml b/doc/Clusters_from_Scratch/en-US/Ch-Apache.xml
index 567d5a83b0..667c2c391d 100644
--- a/doc/Clusters_from_Scratch/en-US/Ch-Apache.xml
+++ b/doc/Clusters_from_Scratch/en-US/Ch-Apache.xml
@@ -1,472 +1,472 @@
%BOOK_ENTITIES;
]>
Apache - Adding More Services
Now that we have a basic but functional active/passive two-node cluster, we’re ready to add some real services. We’re going to start with Apache because its a feature of many clusters and relatively simple to configure.
Installation
Before continuing, we need to make sure Apache is installed on both hosts.
[root@ppcmk-1 ~]# yum install -y httpd
Setting up Install Process
Resolving Dependencies
--> Running transaction check
---> Package httpd.x86_64 0:2.2.13-2.fc12 set to be updated
--> Processing Dependency: httpd-tools = 2.2.13-2.fc12 for package: httpd-2.2.13-2.fc12.x86_64
--> Processing Dependency: apr-util-ldap for package: httpd-2.2.13-2.fc12.x86_64
--> Processing Dependency: /etc/mime.types for package: httpd-2.2.13-2.fc12.x86_64
--> Processing Dependency: libaprutil-1.so.0()(64bit) for package: httpd-2.2.13-2.fc12.x86_64
--> Processing Dependency: libapr-1.so.0()(64bit) for package: httpd-2.2.13-2.fc12.x86_64
--> Running transaction check
---> Package apr.x86_64 0:1.3.9-2.fc12 set to be updated
---> Package apr-util.x86_64 0:1.3.9-2.fc12 set to be updated
---> Package apr-util-ldap.x86_64 0:1.3.9-2.fc12 set to be updated
---> Package httpd-tools.x86_64 0:2.2.13-2.fc12 set to be updated
---> Package mailcap.noarch 0:2.1.30-1.fc12 set to be updated
--> Finished Dependency Resolution
Dependencies Resolved
=======================================================================================
Package Arch Version Repository Size
=======================================================================================
Installing:
httpd x86_64 2.2.13-2.fc12 rawhide 735 k
Installing for dependencies:
apr x86_64 1.3.9-2.fc12 rawhide 117 k
apr-util x86_64 1.3.9-2.fc12 rawhide 84 k
apr-util-ldap x86_64 1.3.9-2.fc12 rawhide 15 k
httpd-tools x86_64 2.2.13-2.fc12 rawhide 63 k
mailcap noarch 2.1.30-1.fc12 rawhide 25 k
Transaction Summary
=======================================================================================
Install 6 Package(s)
Upgrade 0 Package(s)
Total download size: 1.0 M
Downloading Packages:
(1/6): apr-1.3.9-2.fc12.x86_64.rpm | 117 kB 00:00
(2/6): apr-util-1.3.9-2.fc12.x86_64.rpm | 84 kB 00:00
(3/6): apr-util-ldap-1.3.9-2.fc12.x86_64.rpm | 15 kB 00:00
(4/6): httpd-2.2.13-2.fc12.x86_64.rpm | 735 kB 00:00
(5/6): httpd-tools-2.2.13-2.fc12.x86_64.rpm | 63 kB 00:00
(6/6): mailcap-2.1.30-1.fc12.noarch.rpm | 25 kB 00:00
----------------------------------------------------------------------------------------
Total 875 kB/s | 1.0 MB 00:01
Running rpm_check_debug
Running Transaction Test
Finished Transaction Test
Transaction Test Succeeded
Running Transaction
Installing : apr-1.3.9-2.fc12.x86_64 1/6
Installing : apr-util-1.3.9-2.fc12.x86_64 2/6
Installing : apr-util-ldap-1.3.9-2.fc12.x86_64 3/6
Installing : httpd-tools-2.2.13-2.fc12.x86_64 4/6
Installing : mailcap-2.1.30-1.fc12.noarch 5/6
Installing : httpd-2.2.13-2.fc12.x86_64 6/6
Installed:
httpd.x86_64 0:2.2.13-2.fc12
Dependency Installed:
apr.x86_64 0:1.3.9-2.fc12 apr-util.x86_64 0:1.3.9-2.fc12
apr-util-ldap.x86_64 0:1.3.9-2.fc12 httpd-tools.x86_64 0:2.2.13-2.fc12
mailcap.noarch 0:2.1.30-1.fc12
Complete!
[root@pcmk-1 ~]#
Also, we need the wget tool in order for the cluster to be able to check the status of the Apache server.
[root@pcmk-1 ~]# yum install -y wget
Setting up Install Process
Resolving Dependencies
--> Running transaction check
---> Package wget.x86_64 0:1.11.4-5.fc12 set to be updated
--> Finished Dependency Resolution
Dependencies Resolved
===========================================================================================
Package Arch Version Repository Size
===========================================================================================
Installing:
wget x86_64 1.11.4-5.fc12 rawhide 393 k
Transaction Summary
===========================================================================================
Install 1 Package(s)
Upgrade 0 Package(s)
Total download size: 393 k
Downloading Packages:
wget-1.11.4-5.fc12.x86_64.rpm | 393 kB 00:00
Running rpm_check_debug
Running Transaction Test
Finished Transaction Test
Transaction Test Succeeded
Running Transaction
Installing : wget-1.11.4-5.fc12.x86_64 1/1
Installed:
wget.x86_64 0:1.11.4-5.fc12
Complete!
[root@pcmk-1 ~]#
Preparation
First we need to create a page for Apache to serve up. On Fedora the default Apache docroot is /var/www/html, so we’ll create an index file there.
[root@pcmk-1 ~]# cat <<-END >/var/www/html/index.html
<html>
<body>My Test Site - pcmk-1</body>
</html>
END
[root@pcmk-1 ~]#
For the moment, we will simplify things by serving up only a static site and manually sync the data between the two nodes. So run the command again on pcmk-2.
[root@pcmk-2 ~]# cat <<-END >/var/www/html/index.html
<html>
<body>My Test Site - pcmk-2</body>
</html>
END
[root@pcmk-2 ~]#
Enable the Apache status URL
In order to monitor the health of your Apache instance, and recover it if it fails, the resource agent used by Pacemaker assumes the server-status URL is available.
Look for the following in /etc/httpd/conf/httpd.conf and make sure it is not disabled or commented out:
<Location /server-status>
SetHandler server-status
Order deny,allow
Deny from all
Allow from 127.0.0.1
</Location>
Update the Configuration
At this point, Apache is ready to go, all that needs to be done is to add it to the cluster. Lets call the resource WebSite. We need to use an OCF script called apache in the heartbeat namespace
Compare the key used here ocf:heartbeat:apache with the one we used earlier for the IP address: ocf:heartbeat:IPaddr2
, the only required parameter is the path to the main Apache configuration file and we’ll tell the cluster to check once a minute that apache is still running.
[root@pcmk-1 ~]# crm configure primitive WebSite ocf:heartbeat:apache params configfile=/etc/httpd/conf/httpd.conf op monitor interval=1min
[root@pcmk-1 ~]# crm configure show
node pcmk-1
node pcmk-2
primitive WebSite ocf:heartbeat:apache \
params configfile="/etc/httpd/conf/httpd.conf" \
op monitor interval="1min"
primitive ClusterIP ocf:heartbeat:IPaddr2 \
params ip="192.168.122.101" cidr_netmask="32" \
op monitor interval="30s"
property $id="cib-bootstrap-options" \
- dc-version="1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7" \
+ dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
cluster-infrastructure="openais" \
expected-quorum-votes="2" \
stonith-enabled="false" \
no-quorum-policy="ignore"
rsc_defaults $id="rsc-options" \
resource-stickiness="100"
After a short delay, we should see the cluster start apache
[root@pcmk-1 ~]# crm_mon
============
Last updated: Fri Aug 28 16:12:49 2009
Stack: openais
Current DC: pcmk-2 - partition with quorum
-Version: 1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7
+Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
2 Nodes configured, 2 expected votes
2 Resources configured.
============
Online: [ pcmk-1 pcmk-2 ]
ClusterIP (ocf::heartbeat:IPaddr): Started pcmk-2
WebSite (ocf::heartbeat:apache): Started pcmk-1
Wait a moment, the WebSite resource isn’t running on the same host as our IP address!
Ensuring Resources Run on the Same Host
To reduce the load on any one machine, Pacemaker will generally try to spread the configured resources across the cluster nodes. However we can tell the cluster that two resources are related and need to run on the same host (or not at all). Here we instruct the cluster that WebSite can only run on the host that ClusterIP is active on. If ClusterIP is not active anywhere, WebSite will not be permitted to run anywhere.
[root@pcmk-1 ~]# crm configure colocation website-with-ip INFINITY: WebSite ClusterIP
[root@pcmk-1 ~]# crm configure show
node pcmk-1
node pcmk-2
primitive WebSite ocf:heartbeat:apache \
params configfile="/etc/httpd/conf/httpd.conf" \
op monitor interval="1min"
primitive ClusterIP ocf:heartbeat:IPaddr2 \
params ip="192.168.122.101" cidr_netmask="32" \
op monitor interval="30s"
colocation website-with-ip inf: WebSite ClusterIP
property $id="cib-bootstrap-options" \
- dc-version="1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7" \
+ dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
cluster-infrastructure="openais" \
expected-quorum-votes="2" \
stonith-enabled="false" \
no-quorum-policy="ignore"
rsc_defaults $id="rsc-options" \
resource-stickiness="100"
[root@pcmk-1 ~]# crm_mon
============
Last updated: Fri Aug 28 16:14:34 2009
Stack: openais
Current DC: pcmk-2 - partition with quorum
-Version: 1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7
+Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
2 Nodes configured, 2 expected votes
2 Resources configured.
============
Online: [ pcmk-1 pcmk-2 ]
ClusterIP (ocf::heartbeat:IPaddr): Started pcmk-2
WebSite (ocf::heartbeat:apache): Started pcmk-2
Controlling Resource Start/Stop Ordering
When Apache starts, it binds to the available IP addresses. It doesn’t know about any addresses we add afterwards, so not only do they need to run on the same node, but we need to make sure ClusterIP is already active before we start WebSite. We do this by adding an ordering constraint. We need to give it a name (chose something descriptive like apache-after-ip), indicate that its mandatory (so that any recovery for ClusterIP will also trigger recovery of WebSite) and list the two resources in the order we need them to start.
[root@pcmk-1 ~]# crm configure order apache-after-ip mandatory: ClusterIP WebSite
[root@pcmk-1 ~]# crm configure show
node pcmk-1
node pcmk-2
primitive WebSite ocf:heartbeat:apache \
params configfile="/etc/httpd/conf/httpd.conf" \
op monitor interval="1min"
primitive ClusterIP ocf:heartbeat:IPaddr2 \
params ip="192.168.122.101" cidr_netmask="32" \
op monitor interval="30s"
colocation website-with-ip inf: WebSite ClusterIP
order apache-after-ip inf: ClusterIP WebSite
property $id="cib-bootstrap-options" \
- dc-version="1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7" \
+ dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
cluster-infrastructure="openais" \
expected-quorum-votes="2" \
stonith-enabled="false" \
no-quorum-policy="ignore"
rsc_defaults $id="rsc-options" \
resource-stickiness="100"
Specifying a Preferred Location
Pacemaker does not rely on any sort of hardware symmetry between nodes, so it may well be that one machine is more powerful than the other. In such cases it makes sense to host the resources there if it is available. To do this we create a location constraint. Again we give it a descriptive name (prefer-pcmk-1), specify the resource we want to run there (WebSite), how badly we’d like it to run there (we’ll use 50 for now, but in a two-node situation almost any value above 0 will do) and the host’s name.
[root@pcmk-1 ~]# crm configure location prefer-pcmk-1 WebSite 50: pcmk-1
[root@pcmk-1 ~]# crm configure show
node pcmk-1
node pcmk-2
primitive WebSite ocf:heartbeat:apache \
params configfile="/etc/httpd/conf/httpd.conf" \
op monitor interval="1min"
primitive ClusterIP ocf:heartbeat:IPaddr2 \
params ip="192.168.122.101" cidr_netmask="32" \
op monitor interval="30s"
location prefer-pcmk-1 WebSite 50: pcmk-1
colocation website-with-ip inf: WebSite ClusterIP
property $id="cib-bootstrap-options" \
- dc-version="1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7" \
+ dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
cluster-infrastructure="openais" \
expected-quorum-votes="2" \
stonith-enabled="false" \
no-quorum-policy="ignore"
rsc_defaults $id="rsc-options" \
resource-stickiness="100"
[root@pcmk-1 ~]# crm_mon
============
Last updated: Fri Aug 28 16:17:35 2009
Stack: openais
Current DC: pcmk-2 - partition with quorum
-Version: 1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7
+Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
2 Nodes configured, 2 expected votes
2 Resources configured.
============
Online: [ pcmk-1 pcmk-2 ]
ClusterIP (ocf::heartbeat:IPaddr): Started pcmk-2
WebSite (ocf::heartbeat:apache): Started pcmk-2
Wait a minute, the resources are still on pcmk-2!
Even though we now prefer pcmk-1 over pcmk-2, that preference is (intentionally) less than the resource stickiness (how much we preferred not to have unnecessary downtime).
To see the current placement scores, you can use a tool called ptest
ptest -sL
Include output
There is a way to force them to move though...
Manually Moving Resources Around the Cluster
There are always times when an administrator needs to override the cluster and force resources to move to a specific location. Underneath we use location constraints like the one we created above, happily you don’t need to care. Just provide the name of the resource and the intended location, we’ll do the rest.
[root@pcmk-1 ~]# crm resource move WebSite pcmk-1
[root@pcmk-1 ~]# crm_mon
============
Last updated: Fri Aug 28 16:19:24 2009
Stack: openais
Current DC: pcmk-2 - partition with quorum
-Version: 1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7
+Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
2 Nodes configured, 2 expected votes
2 Resources configured.
============
Online: [ pcmk-1 pcmk-2 ]
ClusterIP (ocf::heartbeat:IPaddr): Started pcmk-1
WebSite (ocf::heartbeat:apache): Started pcmk-1
Notice how the colocation rule we created has ensured that ClusterIP was also moved to pcmk-1.
For the curious, we can see the effect of this command by examining the configuration
crm configure show
[root@pcmk-1 ~]# crm configure show
node pcmk-1
node pcmk-2
primitive WebSite ocf:heartbeat:apache \
params configfile="/etc/httpd/conf/httpd.conf" \
op monitor interval="1min"
primitive ClusterIP ocf:heartbeat:IPaddr2 \
params ip="192.168.122.101" cidr_netmask="32" \
op monitor interval="30s"
location cli-prefer-WebSite WebSite \
rule $id="cli-prefer-rule-WebSite" inf: #uname eq pcmk-1
location prefer-pcmk-1 WebSite 50: pcmk-1
colocation website-with-ip inf: WebSite ClusterIP
property $id="cib-bootstrap-options" \
- dc-version="1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7" \
+ dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
cluster-infrastructure="openais" \
expected-quorum-votes="2" \
stonith-enabled="false" \
no-quorum-policy="ignore"
rsc_defaults $id="rsc-options" \
resource-stickiness="100"
Highlighted is the automated constraint used to move the resources to pcmk-1
Giving Control Back to the Cluster
Once we’ve finished whatever activity that required us to move the resources to pcmk-1, in our case nothing, we can then allow the cluster to resume normal operation with the unmove command. Since we previously configured a default stickiness, the resources will remain on pcmk-1.
[root@pcmk-1 ~]# crm resource unmove WebSite
[root@pcmk-1 ~]# crm configure show
node pcmk-1
node pcmk-2
primitive WebSite ocf:heartbeat:apache \
params configfile="/etc/httpd/conf/httpd.conf" \
op monitor interval="1min"
primitive ClusterIP ocf:heartbeat:IPaddr2 \
params ip="192.168.122.101" cidr_netmask="32" \
op monitor interval="30s"
location prefer-pcmk-1 WebSite 50: pcmk-1
colocation website-with-ip inf: WebSite ClusterIP
property $id="cib-bootstrap-options" \
- dc-version="1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7" \
+ dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
cluster-infrastructure="openais" \
expected-quorum-votes="2" \
stonith-enabled="false" \
no-quorum-policy="ignore"
rsc_defaults $id="rsc-options" \
resource-stickiness="100"
Note that the automated constraint is now gone. If we check the cluster status, we can also see that as expected the resources are still active on pcmk-1.
[root@pcmk-1 ~]# crm_mon
============
Last updated: Fri Aug 28 16:20:53 2009
Stack: openais
Current DC: pcmk-2 - partition with quorum
-Version: 1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7
+Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
2 Nodes configured, 2 expected votes
2 Resources configured.
============
Online: [ pcmk-1 pcmk-2 ]
ClusterIP (ocf::heartbeat:IPaddr): Started pcmk-1
WebSite (ocf::heartbeat:apache): Started pcmk-1
diff --git a/doc/Clusters_from_Scratch/en-US/Ch-Shared-Storage.xml b/doc/Clusters_from_Scratch/en-US/Ch-Shared-Storage.xml
index cc99a68f5a..03d974b410 100644
--- a/doc/Clusters_from_Scratch/en-US/Ch-Shared-Storage.xml
+++ b/doc/Clusters_from_Scratch/en-US/Ch-Shared-Storage.xml
@@ -1,528 +1,528 @@
%BOOK_ENTITIES;
]>
Replicated Storage with DRBD
Even if you’re serving up static websites, having to manually synchronize the contents of that website to all the machines in the cluster is not ideal.
For dynamic websites, such as a wiki, its not even an option.
Not everyone care afford network-attached storage but somehow the data needs to be kept in sync.
Enter DRBD which can be thought of as network based RAID-1.
See http://www.drbd.org/ for more details.
Install the DRBD Packages
Since its inclusion in the upstream 2.6.33 kernel, everything needed to use DRBD ships with &DISTRO; &DISTRO_VERSION;.
All you need to do is install it:
[root@pcmk-1 ~]# yum install -y drbd-pacemaker
Loaded plugins: presto, refresh-packagekit
Setting up Install Process
Resolving Dependencies
--> Running transaction check
---> Package drbd-pacemaker.x86_64 0:8.3.7-2.fc13 set to be updated
--> Processing Dependency: drbd-utils = 8.3.7-2.fc13 for package: drbd-pacemaker-8.3.7-2.fc13.x86_64
--> Running transaction check
---> Package drbd-utils.x86_64 0:8.3.7-2.fc13 set to be updated
--> Finished Dependency Resolution
Dependencies Resolved
=================================================================================
Package Arch Version Repository Size
=================================================================================
Installing:
drbd-pacemaker x86_64 8.3.7-2.fc13 fedora 19 k
Installing for dependencies:
drbd-utils x86_64 8.3.7-2.fc13 fedora 165 k
Transaction Summary
=================================================================================
Install 2 Package(s)
Upgrade 0 Package(s)
Total download size: 184 k
Installed size: 427 k
Downloading Packages:
Setting up and reading Presto delta metadata
fedora/prestodelta | 1.7 kB 00:00
Processing delta metadata
Package(s) data still to download: 184 k
(1/2): drbd-pacemaker-8.3.7-2.fc13.x86_64.rpm | 19 kB 00:01
(2/2): drbd-utils-8.3.7-2.fc13.x86_64.rpm | 165 kB 00:02
---------------------------------------------------------------------------------
Total 45 kB/s | 184 kB 00:04
Running rpm_check_debug
Running Transaction Test
Transaction Test Succeeded
Running Transaction
Installing : drbd-utils-8.3.7-2.fc13.x86_64 1/2
Installing : drbd-pacemaker-8.3.7-2.fc13.x86_64 2/2
Installed:
drbd-pacemaker.x86_64 0:8.3.7-2.fc13
Dependency Installed:
drbd-utils.x86_64 0:8.3.7-2.fc13
Complete!
[root@pcmk-1 ~]#
Configure DRBD
Before we configure DRBD, we need to set aside some disk for it to use.
Create A Partition for DRBD
If you have more than 1Gb free, feel free to use it.
For this guide however, 1Gb is plenty of space for a single html file and sufficient for later holding the GFS2 metadata.
[root@pcmk-1 ~]# lvcreate -n drbd-demo -L 1G VolGroup
Logical volume "drbd-demo" created
[root@pcmk-1 ~]# lvs
LV VG Attr LSize Origin Snap% Move Log Copy% Convert
drbd-demo VolGroup -wi-a- 1.00G
lv_root VolGroup -wi-ao 7.30G
lv_swap VolGroup -wi-ao 500.00M
Repeat this on the second node, be sure to use the same size partition.
[root@pcmk-2 ~]# lvs
LV VG Attr LSize Origin Snap% Move Log Copy% Convert
lv_root VolGroup -wi-ao 7.30G
lv_swap VolGroup -wi-ao 500.00M
[root@pcmk-2 ~]# lvcreate -n drbd-demo -L 1G VolGroup
Logical volume "drbd-demo" created
[root@pcmk-2 ~]# lvs
LV VG Attr LSize Origin Snap% Move Log Copy% Convert
drbd-demo VolGroup -wi-a- 1.00G
lv_root VolGroup -wi-ao 7.30G
lv_swap VolGroup -wi-ao 500.00M
Write the DRBD Config
There is no series of commands for build a DRBD configuration, so simply copy the configuration below to /etc/drbd.conf
Detailed information on the directives used in this configuration (and other alternatives) is available from http://www.drbd.org/users-guide/ch-configure.html
Be sure to use the names and addresses of your nodes if they differ from the ones used in this guide.
global {
usage-count yes;
}
common {
protocol C;
}
resource wwwdata {
meta-disk internal;
device /dev/drbd1;
syncer {
verify-alg sha1;
}
net {
allow-two-primaries;
}
on pcmk-1 {
disk /dev/mapper/VolGroup-drbd--demo;
address 192.168.122.101:7789;
}
on
pcmk-2 {
disk /dev/mapper/VolGroup-drbd--demo;
address 192.168.122.102:7789;
}
}
TODO: Explain the reason for the allow-two-primaries option
Initialize and Load DRBD
With the configuration in place, we can now perform the DRBD initialization
[root@pcmk-1 ~]# drbdadm create-md wwwdata
md_offset 12578816
al_offset 12546048
bm_offset 12541952
Found some data
==> This might destroy existing data! <==
Do you want to proceed?
[need to type 'yes' to confirm] yes
Writing meta data...
initializing activity log
NOT initialized bitmap
New drbd meta data block successfully created.
success
Now load the DRBD kernel module and confirm that everything is sane
[root@pcmk-1 ~]# modprobe drbd
[root@pcmk-1 ~]# drbdadm up wwwdata
[root@pcmk-1 ~]# cat /proc/drbd
version: 8.3.6 (api:88/proto:86-90)
GIT-hash: f3606c47cc6fcf6b3f086e425cb34af8b7a81bbf build by root@pcmk-1, 2009-12-08 11:22:57
1: cs:WFConnection ro:Secondary/Unknown ds:Inconsistent/DUnknown C r----
ns:0 nr:0 dw:0 dr:0 al:0 bm:0 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:12248
[root@pcmk-1 ~]#
Repeat on the second node
drbdadm --force create-md wwwdata
modprobe drbd
drbdadm up wwwdata
cat /proc/drbd
[root@pcmk-2 ~]# drbdadm --force create-md wwwdata
Writing meta data...
initializing activity log
NOT initialized bitmap
New drbd meta data block successfully created.
success
[root@pcmk-2 ~]# modprobe drbd
WARNING: Deprecated config file /etc/modprobe.conf, all config files belong into /etc/modprobe.d/.
[root@pcmk-2 ~]# drbdadm up wwwdata
[root@pcmk-2 ~]# cat /proc/drbd
version: 8.3.6 (api:88/proto:86-90)
GIT-hash: f3606c47cc6fcf6b3f086e425cb34af8b7a81bbf build by root@pcmk-1, 2009-12-08 11:22:57
1: cs:Connected ro:Secondary/Secondary ds:Inconsistent/Inconsistent C r----
ns:0 nr:0 dw:0 dr:0 al:0 bm:0 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:12248
Now we need to tell DRBD which set of data to use.
Since both sides contain garbage, we can run the following on pcmk-1:
[root@pcmk-1 ~]# drbdadm -- --overwrite-data-of-peer primary wwwdata
[root@pcmk-1 ~]# cat /proc/drbd
version: 8.3.6 (api:88/proto:86-90)
GIT-hash: f3606c47cc6fcf6b3f086e425cb34af8b7a81bbf build by root@pcmk-1, 2009-12-08 11:22:57
1: cs:SyncSource ro:Primary/Secondary ds:UpToDate/Inconsistent C r----
ns:2184 nr:0 dw:0 dr:2472 al:0 bm:0 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:10064
[=====>..............] sync'ed: 33.4% (10064/12248)K
finish: 0:00:37 speed: 240 (240) K/sec
[root@pcmk-1 ~]# cat /proc/drbd
version: 8.3.6 (api:88/proto:86-90)
GIT-hash: f3606c47cc6fcf6b3f086e425cb34af8b7a81bbf build by root@pcmk-1, 2009-12-08 11:22:57
1: cs:Connected ro:Primary/Secondary ds:UpToDate/UpToDate C r----
ns:12248 nr:0 dw:0 dr:12536 al:0 bm:1 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:0
pcmk-1 is now in the Primary state which allows it to be written to.
Which means its a good point at which to create a filesystem and populate it with some data to serve up via our WebSite resource.
Populate DRBD with Data
[root@pcmk-1 ~]# mkfs.ext4 /dev/drbd1
mke2fs 1.41.4 (27-Jan-2009)
Filesystem label=
OS type: Linux
Block size=1024 (log=0)
Fragment size=1024 (log=0)
3072 inodes, 12248 blocks
612 blocks (5.00%) reserved for the super user
First data block=1
Maximum filesystem blocks=12582912
2 block groups
8192 blocks per group, 8192 fragments per group
1536 inodes per group
Superblock backups stored on blocks:
8193
Writing inode tables: done
Creating journal (1024 blocks): done
Writing superblocks and filesystem accounting information: done
This filesystem will be automatically checked every 26 mounts or
180 days, whichever comes first. Use tune2fs -c or -i to override.
Now mount the newly created filesystem so we can create our index file
mount /dev/drbd1 /mnt/
cat <<-END >/mnt/index.html
<html>
<body>My Test Site - drbd</body>
</html>
END
umount /dev/drbd1
[root@pcmk-1 ~]# mount /dev/drbd1 /mnt/
[root@pcmk-1 ~]# cat <<-END >/mnt/index.html
> <html>
> <body>My Test Site - drbd</body>
> </html>
> END
[root@pcmk-1 ~]# umount /dev/drbd1
Configure the Cluster for DRBD
One handy feature of the crm shell is that you can use it in interactive mode to make several changes atomically.
First we launch the shell. The prompt will change to indicate you’re in interactive mode.
[root@pcmk-1 ~]# crm
cib crm(live)#
Next we must create a working copy or the current configuration.
This is where all our changes will go.
The cluster will not see any of them until we say its ok.
Notice again how the prompt changes, this time to indicate that we’re no longer looking at the live cluster.
cib crm(live)# cib new drbd
INFO: drbd shadow CIB created
crm(drbd)#
Now we can create our DRBD clone and display the revised configuration.
crm(drbd)# configure primitive WebData ocf:linbit:drbd params drbd_resource=wwwdata \
op monitor interval=60s
crm(drbd)# configure ms WebDataClone WebData meta master-max=1 master-node-max=1 \
clone-max=2 clone-node-max=1 notify=true
crm(drbd)# configure show
node pcmk-1
node pcmk-2
primitive WebData ocf:linbit:drbd \
params drbd_resource="wwwdata" \
op monitor interval="60s"
primitive WebSite ocf:heartbeat:apache \
params configfile="/etc/httpd/conf/httpd.conf" \
op monitor interval="1min"
primitive ClusterIP ocf:heartbeat:IPaddr2 \
params ip="192.168.122.101" cidr_netmask="32" \
op monitor interval="30s"
ms WebDataClone WebData \
meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true"
location prefer-pcmk-1 WebSite 50: pcmk-1
colocation website-with-ip inf: WebSite ClusterIP
order apache-after-ip inf: ClusterIP WebSite
property $id="cib-bootstrap-options" \
- dc-version="1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7" \
+ dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
cluster-infrastructure="openais" \
expected-quorum-votes=”2” \
stonith-enabled="false" \
no-quorum-policy="ignore"
rsc_defaults $id="rsc-options" \
resource-stickiness=”100”
Once we’re happy with the changes, we can tell the cluster to start using them and use crm_mon to check everything is functioning.
crm(drbd)# cib commit drbd
INFO: commited 'drbd' shadow CIB to the cluster
crm(drbd)# quit
bye
[root@pcmk-1 ~]# crm_mon
============
Last updated: Tue Sep 1 09:37:13 2009
Stack: openais
Current DC: pcmk-1 - partition with quorum
-Version: 1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7
+Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
2 Nodes configured, 2 expected votes
3 Resources configured.
============
Online: [ pcmk-1 pcmk-2 ]
ClusterIP (ocf::heartbeat:IPaddr): Started pcmk-1
WebSite (ocf::heartbeat:apache): Started pcmk-1
Master/Slave Set: WebDataClone
Masters: [ pcmk-2 ]
Slaves: [ pcmk-1 ]
Include details on adding a second DRBD resource
Now that DRBD is functioning we can configure a Filesystem resource to use it.
In addition to the filesystem’s definition, we also need to tell the cluster where it can be located (only on the DRBD Primary) and when it is allowed to start (after the Primary was promoted).
Once again we’ll use the shell’s interactive mode
[root@pcmk-1 ~]# crm
crm(live)# cib new fs
INFO: fs shadow CIB created
crm(fs)# configure primitive WebFS ocf:heartbeat:Filesystem \
params device="/dev/drbd/by-res/wwwdata" directory="/var/www/html" fstype="ext4"
crm(fs)# configure colocation fs_on_drbd inf: WebFS WebDataClone:Master
crm(fs)# configure order WebFS-after-WebData inf: WebDataClone:promote WebFS:start
We also need to tell the cluster that Apache needs to run on the same machine as the filesystem and that it must be active before Apache can start.
crm(fs)# configure colocation WebSite-with-WebFS inf: WebSite WebFS
crm(fs)# configure order WebSite-after-WebFS inf: WebFS WebSite
Time to review the updated configuration:
[root@pcmk-1 ~]# crm configure show
node pcmk-1
node pcmk-2
primitive WebData ocf:linbit:drbd \
params drbd_resource="wwwdata" \
op monitor interval="60s"
primitive WebFS ocf:heartbeat:Filesystem \
params device="/dev/drbd/by-res/wwwdata" directory="/var/www/html" fstype="ext4"
primitive WebSite ocf:heartbeat:apache \
params configfile="/etc/httpd/conf/httpd.conf" \
op monitor interval="1min"
primitive ClusterIP ocf:heartbeat:IPaddr2 \
params ip="192.168.122.101" cidr_netmask="32" \
op monitor interval="30s"
ms WebDataClone WebData \
meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true"
location prefer-pcmk-1 WebSite 50: pcmk-1
colocation WebSite-with-WebFS inf: WebSite WebFS
colocation fs_on_drbd inf: WebFS WebDataClone:Master
colocation website-with-ip inf: WebSite ClusterIP
order WebFS-after-WebData inf: WebDataClone:promote WebFS:start
order WebSite-after-WebFS inf: WebFS WebSite
order apache-after-ip inf: ClusterIP WebSite
property $id="cib-bootstrap-options" \
- dc-version="1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7" \
+ dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
cluster-infrastructure="openais" \
expected-quorum-votes=”2” \
stonith-enabled="false" \
no-quorum-policy="ignore"
rsc_defaults $id="rsc-options" \
resource-stickiness=”100”
After reviewing the new configuration, we again upload it and watch the cluster put it into effect.
crm(fs)# cib commit fs
INFO: commited 'fs' shadow CIB to the cluster
crm(fs)# quit
bye
[root@pcmk-1 ~]# crm_mon
============
Last updated: Tue Sep 1 10:08:44 2009
Stack: openais
Current DC: pcmk-1 - partition with quorum
-Version: 1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7
+Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
2 Nodes configured, 2 expected votes
4 Resources configured.
============
Online: [ pcmk-1 pcmk-2 ]
ClusterIP (ocf::heartbeat:IPaddr): Started pcmk-1
WebSite (ocf::heartbeat:apache): Started pcmk-1
Master/Slave Set: WebDataClone
Masters: [ pcmk-1 ]
Slaves: [ pcmk-2 ]
WebFS (ocf::heartbeat:Filesystem): Started pcmk-1
Testing Migration
We could shut down the active node again, but another way to safely simulate recovery is to put the node into what is called “standby mode”.
Nodes in this state tell the cluster that they are not allowed to run resources.
Any resources found active there will be moved elsewhere.
This feature can be particularly useful when updating the resources’ packages.
Put the local node into standby mode and observe the cluster move all the resources to the other node.
Note also that the node’s status will change to indicate that it can no longer host resources.
[root@pcmk-1 ~]# crm node standby
[root@pcmk-1 ~]# crm_mon
============
Last updated: Tue Sep 1 10:09:57 2009
Stack: openais
Current DC: pcmk-1 - partition with quorum
-Version: 1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7
+Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
2 Nodes configured, 2 expected votes
4 Resources configured.
============
Node pcmk-1: standby
Online: [ pcmk-2 ]
ClusterIP (ocf::heartbeat:IPaddr): Started pcmk-2
WebSite (ocf::heartbeat:apache): Started pcmk-2
Master/Slave Set: WebDataClone
Masters: [ pcmk-2 ]
Stopped: [ WebData:1 ]
WebFS (ocf::heartbeat:Filesystem): Started pcmk-2
Once we’ve done everything we needed to on pcmk-1 (in this case nothing, we just wanted to see the resources move), we can allow the node to be a full cluster member again.
[root@pcmk-1 ~]# crm node online
[root@pcmk-1 ~]# crm_mon
============
Last updated: Tue Sep 1 10:13:25 2009
Stack: openais
Current DC: pcmk-1 - partition with quorum
-Version: 1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7
+Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
2 Nodes configured, 2 expected votes
4 Resources configured.
============
Online: [ pcmk-1 pcmk-2 ]
ClusterIP (ocf::heartbeat:IPaddr): Started pcmk-2
WebSite (ocf::heartbeat:apache): Started pcmk-2
Master/Slave Set: WebDataClone
Masters: [ pcmk-2 ]
Slaves: [ pcmk-1 ]
WebFS (ocf::heartbeat:Filesystem): Started pcmk-2
Notice that our resource stickiness settings prevent the services from migrating back to pcmk-1.
diff --git a/doc/Clusters_from_Scratch/en-US/Ch-Stonith.xml b/doc/Clusters_from_Scratch/en-US/Ch-Stonith.xml
index 3aaf5cdbcd..5b88f4c69a 100644
--- a/doc/Clusters_from_Scratch/en-US/Ch-Stonith.xml
+++ b/doc/Clusters_from_Scratch/en-US/Ch-Stonith.xml
@@ -1,161 +1,161 @@
%BOOK_ENTITIES;
]>
-
+
Configure STONITH
Why You Need STONITH
STONITH is an acronym for Shoot-The-Other-Node-In-The-Head and it protects your data from being corrupted by rouge nodes or concurrent access.
Just because a node is unresponsive, this doesn’t mean it isn’t accessing your data. The only way to be 100% sure that your data is safe, is to use STONITH so we can be certain that the node is truly offline, before allowing the data to be accessed from another node.
STONITH also has a role to play in the event that a clustered service cannot be stopped. In this case, the cluster uses STONITH to force the whole node offline, thereby making it safe to start the service elsewhere.
What STONITH Device Should You Use
It is crucial that the STONITH device can allow the cluster to differentiate between a node failure and a network one.
The biggest mistake people make in choosing a STONITH device is to use remote power switch (such as many onboard IMPI controllers) that shares power with the node it controls. In such cases, the cluster cannot be sure if the node is really offline, or active and suffering from a network fault.
Likewise, any device that relies on the machine being active (such as SSH-based “devices” used during testing) are inappropriate.
Configuring STONITH
Find the correct driver: stonith -L
Since every device is different, the parameters needed to configure it will vary. To find out the parameters required by the device: stonith -t {type} -n
Hopefully the developers chose names that make sense, if not you can query for some additional information by finding an active cluster node and running:
lrmadmin -M stonith {type} pacemaker
The output should be XML formatted text containing additional parameter descriptions
Create a file called stonith.xml containing a primitive resource with a class of stonith, a type of {type} and a parameter for each of the values returned in step 2
Create a clone from the primitive resource if the device can shoot more than one node and supports multiple simultaneous connections.
Upload it into the CIB using cibadmin: cibadmin -C -o resources --xml-file stonith.xml
Example
Assuming we have an IBM BladeCenter containing our two nodes and the management interface is active on 192.168.122.31, then we would chose the external/ibmrsa driver in step 2 and obtain the following list of parameters
stonith -t external/ibmrsa -n
[root@pcmk-1 ~]# stonith -t external/ibmrsa -n
hostname ipaddr userid passwd type
Assuming we know the username and password for the management interface, we would create a STONITH resource with the shell
[root@pcmk-1 ~]# crm
crm(live)# cib new stonith
INFO: stonith shadow CIB created
crm(stonith)# configure primitive rsa-fencing stonith::external/ibmrsa \
params hostname=”pcmk-1 pcmk-2" ipaddr=192.168.122.31 userid=mgmt passwd=abc123 type=ibm \
op monitor interval="60s"
crm(stonith)# configure clone Fencing rsa-fencing
And finally, since we disabled it earlier, we need to re-enable STONITH
crm(stonith)# configure property stonith-enabled="true"
crm(stonith)# configure show
node pcmk-1
node pcmk-2
primitive WebData ocf:linbit:drbd \
params drbd_resource="wwwdata" \
op monitor interval="60s"
primitive WebFS ocf:heartbeat:Filesystem \
params device="/dev/drbd/by-res/wwwdata" directory="/var/www/html" fstype=”gfs2”
primitive WebSite ocf:heartbeat:apache \
params configfile="/etc/httpd/conf/httpd.conf" \
op monitor interval="1min"
primitive ClusterIP ocf:heartbeat:IPaddr2 \
params ip=”192.168.122.101” cidr_netmask=”32” clusterip_hash=”sourceip” \
op monitor interval="30s"
primitive dlm ocf:pacemaker:controld \
op monitor interval="120s"
primitive gfs-control ocf:pacemaker:controld \
params daemon=”gfs_controld.pcmk” args=”-g 0” \
op monitor interval="120s"
primitive rsa-fencing stonith::external/ibmrsa \
params hostname=”pcmk-1 pcmk-2" ipaddr=192.168.122.31 userid=mgmt passwd=abc123 type=ibm \
op monitor interval="60s"
ms WebDataClone WebData \
meta master-max="2" master-node-max="1" clone-max="2" clone-node-max="1" notify="true"
clone Fencing rsa-fencing
clone WebFSClone WebFS
clone WebIP ClusterIP \
meta globally-unique=”true” clone-max=”2” clone-node-max=”2”
clone WebSiteClone WebSite
clone dlm-clone dlm \
meta interleave="true"
clone gfs-clone gfs-control \
meta interleave="true"
colocation WebFS-with-gfs-control inf: WebFSClone gfs-clone
colocation WebSite-with-WebFS inf: WebSiteClone WebFSClone
colocation fs_on_drbd inf: WebFSClone WebDataClone:Master
colocation gfs-with-dlm inf: gfs-clone dlm-clone
colocation website-with-ip inf: WebSiteClone WebIP
order WebFS-after-WebData inf: WebDataClone:promote WebFSClone:start
order WebSite-after-WebFS inf: WebFSClone WebSiteClone
order apache-after-ip inf: WebIP WebSiteClone
order start-WebFS-after-gfs-control inf: gfs-clone WebFSClone
order start-gfs-after-dlm inf: dlm-clone gfs-clone
property $id="cib-bootstrap-options" \
- dc-version="1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7" \
+ dc-version="1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f" \
cluster-infrastructure="openais" \
expected-quorum-votes=”2” \
stonith-enabled="true" \
no-quorum-policy="ignore"
rsc_defaults $id="rsc-options" \
resource-stickiness=”100”
diff --git a/doc/Clusters_from_Scratch/en-US/Ch-Tools.xml b/doc/Clusters_from_Scratch/en-US/Ch-Tools.xml
index cef8355d03..23a67cfd1b 100644
--- a/doc/Clusters_from_Scratch/en-US/Ch-Tools.xml
+++ b/doc/Clusters_from_Scratch/en-US/Ch-Tools.xml
@@ -1,122 +1,121 @@
%BOOK_ENTITIES;
]>
Using Pacemaker Tools
In the dark past, configuring Pacemaker required the administrator to read and write XML. In true UNIX style, there were also a number of different commands that specialized in different aspects of querying and updating the cluster.
Since Pacemaker 1.0, this has all changed and we have an integrated, scriptable, cluster shell that hides all the messy XML scaffolding. It even allows you to queue up several changes at once and commit them atomically.
Take some time to familiarize yourself with what it can do.
[root@pcmk-1 ~]# crm --help
usage:
crm [-D display_type]
crm [-D display_type] args
crm [-D display_type] [-f file]
Use crm without arguments for an interactive session.
Supply one or more arguments for a "single-shot" use.
Specify with -f a file which contains a script. Use '-' for
standard input or use pipe/redirection.
crm displays cli format configurations using a color scheme
and/or in uppercase. Pick one of "color" or "uppercase", or
use "-D color,uppercase" if you want colorful uppercase.
Get plain output by "-D plain". The default may be set in
user preferences (options).
Examples:
# crm -f stopapp2.cli
# crm < stopapp2.cli
# crm resource stop global_www
# crm status
The primary tool for monitoring the status of the cluster is crm_mon (also available as crm status). It can be run in a variety of modes and has a number of output options. To find out about any of the tools that come with Pacemaker, simply invoke them with the --help option or consult the included man pages. Both sets of output are created from the tool, and so will always be in sync with each other and the tool itself.
Additionally, the Pacemaker version and supported cluster stack(s) is available via the --version option.
[root@pcmk-1 ~]# crm_mon --version
-crm_mon 1.0.5 for OpenAIS and Heartbeat (Build: 462f1569a43740667daf7b0f6b521742e9eb8fa7)
-
+Pacemaker 1.1.5
Written by Andrew Beekhof
[root@pcmk-1 ~]# crm_mon --help
crm_mon - Provides a summary of cluster's current state.
Outputs varying levels of detail in a number of different formats.
Usage: crm_mon mode [options]
Options:
-?, --help This text
-$, --version Version information
-V, --verbose Increase debug output
Modes:
-h, --as-html=value Write cluster status to the named file
-w, --web-cgi Web mode with output suitable for cgi
-s, --simple-status Display the cluster status once as a simple one line output (suitable for nagios)
-S, --snmp-traps=value Send SNMP traps to this station
-T, --mail-to=value Send Mail alerts to this user. See also --mail-from, --mail-host, --mail-prefix
Display Options:
-n, --group-by-node Group resources by node
-r, --inactive Display inactive resources
-f, --failcounts Display resource fail counts
-o, --operations Display resource operation history
-t, --timing-details Display resource operation history with timing details
Additional Options:
-i, --interval=value Update frequency in seconds
-1, --one-shot Display the cluster status once on the console and exit
-N, --disable-ncurses Disable the use of ncurses
-d, --daemonize Run in the background as a daemon
-p, --pid-file=value (Advanced) Daemon pid file location
-F, --mail-from=value Mail alerts should come from the named user
-H, --mail-host=value Mail alerts should be sent via the named host
-P, --mail-prefix=value Subjects for mail alerts should start with this string
-E, --external-agent=value A program to run when resource operations take place.
-e, --external-recipient=value A recipient for your program (assuming you want the program to send something to someone).
Examples:
Display the cluster´s status on the console with updates as they occur:
# crm_mon
Display the cluster´s status on the console just once then exit:
# crm_mon -1
Display your cluster´s status, group resources by node, and include inactive resources in the list:
# crm_mon --group-by-node --inactive
Start crm_mon as a background daemon and have it write the cluster´s status to an HTML file:
# crm_mon --daemonize --as-html /path/to/docroot/filename.html
Start crm_mon as a background daemon and have it send email alerts:
# crm_mon --daemonize --mail-to user@example.com --mail-host mail.example.com
Start crm_mon as a background daemon and have it send SNMP alerts:
# crm_mon --daemonize --snmp-traps snmptrapd.example.com
Report bugs to pacemaker@oss.clusterlabs.org
If the SNMP and/or email options are not listed, then Pacemaker was not built to support them. This may be by the choice of your distribution or the required libraries may not have been available. Please contact whoever supplied you with the packages for more details.
diff --git a/doc/Clusters_from_Scratch/en-US/Ch-Verification.xml b/doc/Clusters_from_Scratch/en-US/Ch-Verification.xml
index b3b4a3c1f4..b8149eb5e0 100644
--- a/doc/Clusters_from_Scratch/en-US/Ch-Verification.xml
+++ b/doc/Clusters_from_Scratch/en-US/Ch-Verification.xml
@@ -1,140 +1,140 @@
%BOOK_ENTITIES;
]>
Verify Cluster Installation
Verify Corosync Installation
Start Corosync on the first node
[root@pcmk-1 ~]# /etc/init.d/corosync start
Starting Corosync Cluster Engine (corosync): [ OK ]
Check the cluster started correctly and that an initial membership was able to form
[root@pcmk-1 ~]# grep -e "corosync.*network interface" -e "Corosync Cluster Engine" -e "Successfully read main configuration file" /var/log/messages
Aug 27 09:05:34 pcmk-1 corosync[1540]: [MAIN ] Corosync Cluster Engine ('1.1.0'): started and ready to provide service.
Aug 27 09:05:34 pcmk-1 corosync[1540]: [MAIN ] Successfully read main configuration file '/etc/corosync/corosync.conf'.
[root@pcmk-1 ~]# grep TOTEM /var/log/messages
Aug 27 09:05:34 pcmk-1 corosync[1540]: [TOTEM ] Initializing transport (UDP/IP).
Aug 27 09:05:34 pcmk-1 corosync[1540]: [TOTEM ] Initializing transmit/receive security: libtomcrypt SOBER128/SHA1HMAC (mode 0).
Aug 27 09:05:35 pcmk-1 corosync[1540]: [TOTEM ] The network interface [192.168.122.101] is now up.
Aug 27 09:05:35 pcmk-1 corosync[1540]: [TOTEM ] A processor joined or left the membership and a new membership was formed.
With one node functional, its now safe to start Corosync on the second node as well.
[root@pcmk-1 ~]# ssh pcmk-2 -- /etc/init.d/corosync start
Starting Corosync Cluster Engine (corosync): [ OK ]
[root@pcmk-1 ~]#
Check the cluster formed correctly
[root@pcmk-1 ~]# grep TOTEM /var/log/messages
Aug 27 09:05:34 pcmk-1 corosync[1540]: [TOTEM ] Initializing transport (UDP/IP).
Aug 27 09:05:34 pcmk-1 corosync[1540]: [TOTEM ] Initializing transmit/receive security: libtomcrypt SOBER128/SHA1HMAC (mode 0).
Aug 27 09:05:35 pcmk-1 corosync[1540]: [TOTEM ] The network interface [192.168.122.101] is now up.
Aug 27 09:05:35 pcmk-1 corosync[1540]: [TOTEM ] A processor joined or left the membership and a new membership was formed.
Aug 27 09:12:11 pcmk-1 corosync[1540]: [TOTEM ] A processor joined or left the membership and a new membership was formed.
Verify Pacemaker Installation
Now that we have confirmed that Corosync is functional we can check the rest of the stack.
[root@pcmk-1 ~]# grep pcmk_startup /var/log/messages
Aug 27 09:05:35 pcmk-1 corosync[1540]: [pcmk ] info: pcmk_startup: CRM: Initialized
Aug 27 09:05:35 pcmk-1 corosync[1540]: [pcmk ] Logging: Initialized pcmk_startup
Aug 27 09:05:35 pcmk-1 corosync[1540]: [pcmk ] info: pcmk_startup: Maximum core file size is: 18446744073709551615
Aug 27 09:05:35 pcmk-1 corosync[1540]: [pcmk ] info: pcmk_startup: Service: 9
Aug 27 09:05:35 pcmk-1 corosync[1540]: [pcmk ] info: pcmk_startup: Local hostname: pcmk-1
- Now try starting Pacemaker and the necessary processes have been started
+ Now try starting Pacemaker and check the necessary processes have been started
[root@pcmk-1 ~]# /etc/init.d/pacemaker start
Starting Pacemaker Cluster Manager: [ OK ]
[root@pcmk-1 ~]# grep -e pacemakerd.*get_config_opt -e pacemakerd.*start_child -e "Starting Pacemaker" /var/log/messages
Feb 8 13:31:24 pcmk-1 pacemakerd: [13155]: info: get_config_opt: Found 'pacemaker' for option: name
Feb 8 13:31:24 pcmk-1 pacemakerd: [13155]: info: get_config_opt: Found '1' for option: ver
Feb 8 13:31:24 pcmk-1 pacemakerd: [13155]: info: get_config_opt: Defaulting to 'no' for option: use_logd
Feb 8 13:31:24 pcmk-1 pacemakerd: [13155]: info: get_config_opt: Defaulting to 'no' for option: use_mgmtd
Feb 8 13:31:24 pcmk-1 pacemakerd: [13155]: info: get_config_opt: Found 'on' for option: debug
Feb 8 13:31:24 pcmk-1 pacemakerd: [13155]: info: get_config_opt: Found 'yes' for option: to_logfile
Feb 8 13:31:24 pcmk-1 pacemakerd: [13155]: info: get_config_opt: Found '/var/log/corosync.log' for option: logfile
Feb 8 13:31:24 pcmk-1 pacemakerd: [13155]: info: get_config_opt: Found 'yes' for option: to_syslog
Feb 8 13:31:24 pcmk-1 pacemakerd: [13155]: info: get_config_opt: Found 'daemon' for option: syslog_facility
Feb 8 16:50:38 pcmk-1 pacemakerd: [13990]: info: main: Starting Pacemaker 1.1.5 (Build: 31f088949239+): docbook-manpages publican ncurses trace-logging cman cs-quorum heartbeat corosync snmp libesmtp
Feb 8 16:50:38 pcmk-1 pacemakerd: [13990]: info: start_child: Forked child 14022 for process stonith-ng
Feb 8 16:50:38 pcmk-1 pacemakerd: [13990]: info: start_child: Forked child 14023 for process cib
Feb 8 16:50:38 pcmk-1 pacemakerd: [13990]: info: start_child: Forked child 14024 for process lrmd
Feb 8 16:50:38 pcmk-1 pacemakerd: [13990]: info: start_child: Forked child 14025 for process attrd
Feb 8 16:50:38 pcmk-1 pacemakerd: [13990]: info: start_child: Forked child 14026 for process pengine
Feb 8 16:50:38 pcmk-1 pacemakerd: [13990]: info: start_child: Forked child 14027 for process crmd
[root@pcmk-1 ~]# ps axf
PID TTY STAT TIME COMMAND
2 ? S< 0:00 [kthreadd]
3 ? S< 0:00 \_ [migration/0]
... lots of processes ...
13990 ? S 0:01 pacemakerd
14022 ? Sa 0:00 \_ /usr/lib64/heartbeat/stonithd
14023 ? Sa 0:00 \_ /usr/lib64/heartbeat/cib
14024 ? Sa 0:00 \_ /usr/lib64/heartbeat/lrmd
14025 ? Sa 0:00 \_ /usr/lib64/heartbeat/attrd
14026 ? Sa 0:00 \_ /usr/lib64/heartbeat/pengine
14027 ? Sa 0:00 \_ /usr/lib64/heartbeat/crmd
Next, check for any ERRORs during startup - there shouldn’t be any.
[root@pcmk-1 ~]# grep ERROR: /var/log/messages | grep -v unpack_resources
[root@pcmk-1 ~]#
Repeat on the other node and display the cluster's status.
[root@pcmk-1 ~]# ssh pcmk-2 -- /etc/init.d/pacemaker start
Starting Pacemaker Cluster Manager: [ OK ]
[root@pcmk-1 ~]# crm_mon
============
Last updated: Thu Aug 27 16:54:55 2009
Stack: openais
Current DC: pcmk-1 - partition with quorum
-Version: 1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7
+Version: 1.1.5-bdd89e69ba545404d02445be1f3d72e6a203ba2f
2 Nodes configured, 2 expected votes
0 Resources configured.
============
Online: [ pcmk-1 pcmk-2 ]