Page MenuHomeClusterLabs Projects

No OneTemporary

This file is larger than 256 KB, so syntax highlighting was skipped.
diff --git a/doc/man/ra2refentry.xsl b/doc/man/ra2refentry.xsl
index ac148ef6c..d0535fd36 100644
--- a/doc/man/ra2refentry.xsl
+++ b/doc/man/ra2refentry.xsl
@@ -1,644 +1,642 @@
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="1.0">
<xsl:output indent="yes"
doctype-public="-//OASIS//DTD DocBook XML V4.4//EN"
doctype-system="http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd"/>
<!--<xsl:strip-space elements="longdesc shortdesc"/>-->
<!-- Package name. -->
<xsl:param name="package">resource-agents</xsl:param>
<!-- Package version number. Must be passed in. -->
<xsl:param name="version"/>
<!-- RA class -->
<xsl:param name="class">ocf</xsl:param>
<!-- RA provider -->
<xsl:param name="provider">heartbeat</xsl:param>
<!-- Man volume number -->
<xsl:param name="manvolum">7</xsl:param>
<!-- -->
<xsl:param name="variable.prefix"/>
<!-- Separator between different action/@name -->
<xsl:param name="separator"> | </xsl:param>
<xsl:variable name="manpagetitleprefix"><xsl:value-of select="$class"/>_<xsl:value-of select="$provider"/>_</xsl:variable>
<xsl:template match="/">
<refentry>
<xsl:apply-templates mode="root"/>
</refentry>
</xsl:template>
<xsl:template match="resource-agent" mode="root">
<xsl:param name="this" select="self::resource-agent"/>
<xsl:attribute name="id">
<xsl:text>re-ra-</xsl:text>
<xsl:value-of select="@name"/>
</xsl:attribute>
<xsl:apply-templates select="$this" mode="refentryinfo"/>
<xsl:apply-templates select="$this" mode="refmeta"/>
<xsl:apply-templates select="$this" mode="refnamediv"/>
<xsl:apply-templates select="$this" mode="synopsis"/>
<xsl:apply-templates select="$this" mode="description"/>
<xsl:apply-templates select="$this" mode="parameters"/>
<xsl:apply-templates select="$this" mode="actions"/>
<xsl:apply-templates select="$this" mode="examplecrmsh"/>
<xsl:apply-templates select="$this" mode="examplepcs"/>
<xsl:apply-templates select="$this" mode="seealso"/>
</xsl:template>
<!-- Empty Templates -->
<xsl:template match="node()" mode="root"/>
<xsl:template match="*" mode="refmeta"/>
<xsl:template match="*" mode="refnamediv"/>
<xsl:template match="*" mode="synopsis"/>
<xsl:template match="*" mode="description"/>
<xsl:template match="*" mode="parameters"/>
<!-- Mode refentryinfo -->
<xsl:template match="resource-agent" mode="refentryinfo">
<refentryinfo>
<productname><xsl:value-of select="$package"/></productname>
<productnumber><xsl:value-of select="$version"/></productnumber>
- <corpauthor>Linux-HA contributors (see the resource agent source for information about individual authors)</corpauthor>
+ <corpauthor>ClusterLabs contributors (see the resource agent source for information about individual authors)</corpauthor>
</refentryinfo>
</xsl:template>
<!-- Mode refmeta -->
<xsl:template match="resource-agent" mode="refmeta">
<refmeta>
<refentrytitle><xsl:value-of select="$manpagetitleprefix"/><xsl:value-of select="@name"/></refentrytitle>
<manvolnum><xsl:value-of select="$manvolum"/></manvolnum>
<refmiscinfo class="manual">OCF resource agents</refmiscinfo>
</refmeta>
</xsl:template>
<!-- Mode refnamediv -->
<xsl:template match="resource-agent" mode="refnamediv">
<refnamediv>
<refname><xsl:value-of select="$manpagetitleprefix"/><xsl:value-of select="@name"/></refname>
<refpurpose><xsl:apply-templates select="shortdesc"/></refpurpose>
</refnamediv>
</xsl:template>
<!-- Mode synopsis -->
<xsl:template match="resource-agent" mode="synopsis">
<refsynopsisdiv>
<cmdsynopsis sepchar=" ">
<command moreinfo="none">
- <xsl:value-of select="@name"/>
+ <xsl:value-of select="@name"/>
</command>
<xsl:apply-templates select="actions" mode="synopsis"/>
</cmdsynopsis>
</refsynopsisdiv>
</xsl:template>
<xsl:template match="actions" mode="synopsis">
<group choice="opt" rep="norepeat">
<xsl:apply-templates select="action[@name = 'start'][1]" mode="synopsis"/>
<xsl:apply-templates select="action[@name = 'stop'][1]" mode="synopsis"/>
<xsl:apply-templates select="action[@name = 'status'][1]" mode="synopsis"/>
<xsl:apply-templates select="action[@name = 'monitor'][1]" mode="synopsis"/>
<xsl:apply-templates select="action[@name = 'migrate_to'][1]" mode="synopsis"/>
<xsl:apply-templates select="action[@name = 'migrate_from'][1]" mode="synopsis"/>
<xsl:apply-templates select="action[@name = 'promote'][1]" mode="synopsis"/>
<xsl:apply-templates select="action[@name = 'demote'][1]" mode="synopsis"/>
<xsl:apply-templates select="action[@name = 'meta-data'][1]" mode="synopsis"/>
<xsl:apply-templates select="action[@name = 'validate-all'][1]" mode="synopsis"/>
</group>
</xsl:template>
<xsl:template match="action" mode="synopsis">
<arg choice="plain" rep="norepeat">
<xsl:value-of select="@name"/>
</arg>
</xsl:template>
<!-- Mode Description -->
<!-- break string into <para> elements on linefeeds -->
<!-- would be so much easier with replace(...) -->
<xsl:template name="break_into_para">
<xsl:param name="string" />
<xsl:choose>
<xsl:when test="starts-with($string, '&#xA;') or starts-with($string, ' ')" >
- <!-- trim leading newlines and other witespace -->
- <xsl:variable name="normalized" select="normalize-space($string)" />
- <xsl:variable name="nlen" select="string-length($normalized)" />
- <xsl:if test="$nlen &gt; 0" >
- <xsl:variable name="leading" select="string-length(substring-before($string, substring($normalized, 1, 1)))" />
+ <!-- trim leading newlines and other witespace -->
+ <xsl:variable name="normalized" select="normalize-space($string)" />
+ <xsl:variable name="nlen" select="string-length($normalized)" />
+ <xsl:if test="$nlen &gt; 0" >
+ <xsl:variable name="leading" select="string-length(substring-before($string, substring($normalized, 1, 1)))" />
<xsl:call-template name="break_into_para">
- <xsl:with-param name="string" select="substring($string, $leading + 1)" />
+ <xsl:with-param name="string" select="substring($string, $leading + 1)" />
</xsl:call-template>
- </xsl:if>
+ </xsl:if>
</xsl:when>
<xsl:otherwise>
<xsl:variable name="lf" select="'&#xA;&#xA;'" />
<xsl:variable name="lf_dash" select="'&#xA;-'" />
<xsl:choose>
<xsl:when test="contains($string, $lf)">
<xsl:variable name="first" select="substring-before($string, $lf)" />
<!-- recursively call on remaining string -->
<xsl:call-template name="break_into_para">
- <xsl:with-param name="string" select="$first"/>
+ <xsl:with-param name="string" select="$first"/>
</xsl:call-template>
<xsl:call-template name="break_into_para">
- <xsl:with-param name="string" select="substring-after($string, $lf)" />
+ <xsl:with-param name="string" select="substring-after($string, $lf)" />
</xsl:call-template>
</xsl:when>
<xsl:when test="contains($string, $lf_dash)">
<xsl:variable name="first" select="substring-before($string, $lf_dash)" />
<!-- recursively call on remaining string -->
<xsl:call-template name="break_into_para">
- <xsl:with-param name="string" select="$first"/>
+ <xsl:with-param name="string" select="$first"/>
</xsl:call-template>
<xsl:call-template name="break_into_para">
- <xsl:with-param name="string" select="concat('-',substring-after($string, $lf_dash))" />
+ <xsl:with-param name="string" select="concat('-',substring-after($string, $lf_dash))" />
</xsl:call-template>
</xsl:when>
<xsl:otherwise>
<para>
- <xsl:value-of select="'&#xA;'"/>
- <xsl:value-of select="$string"/>
- <xsl:value-of select="'&#xA;'"/>
+ <xsl:value-of select="'&#xA;'"/>
+ <xsl:value-of select="$string"/>
+ <xsl:value-of select="'&#xA;'"/>
</para>
- <xsl:value-of select="'&#xA;'"/>
+ <xsl:value-of select="'&#xA;'"/>
</xsl:otherwise>
</xsl:choose>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<xsl:template match="resource-agent" mode="description">
<refsection>
<title>Description</title>
<xsl:apply-templates mode="description"/>
</refsection>
</xsl:template>
<xsl:template match="text()" mode="longdesc">
<xsl:call-template name="break_into_para">
<xsl:with-param name="string" select="." />
</xsl:call-template>
</xsl:template>
<xsl:template match="longdesc" mode="description">
<xsl:apply-templates mode="longdesc"/>
</xsl:template>
<xsl:template match="actions" mode="description">
<xsl:if test="action[@name = 'migrate_from' or @name = 'migrate_to']">
<para>This resource agent may be configured for <emphasis>native
migration</emphasis> if available in the cluster manager. For
Pacemaker, the
<parameter>allow-migrate=&quot;true&quot;</parameter> meta
attribute enables native migration.</para>
</xsl:if>
<xsl:apply-templates mode="longdesc"/>
</xsl:template>
<!-- Mode Parameters -->
<xsl:template match="resource-agent" mode="parameters">
<refsection>
<title>Supported Parameters</title>
<xsl:choose>
- <xsl:when test="parameters">
- <xsl:apply-templates mode="parameters"/>
- </xsl:when>
- <xsl:otherwise>
- <para>
- <xsl:text>This resource agent does not support any parameters.</xsl:text>
- </para>
- </xsl:otherwise>
+ <xsl:when test="parameters">
+ <xsl:apply-templates mode="parameters"/>
+ </xsl:when>
+ <xsl:otherwise>
+ <para>
+ <xsl:text>This resource agent does not support any parameters.</xsl:text>
+ </para>
+ </xsl:otherwise>
</xsl:choose>
</refsection>
</xsl:template>
<xsl:template match="resource-agent/shortdesc|resource-agent/longdesc" mode="parameters"/>
<xsl:template match="parameters" mode="parameters">
<variablelist>
<xsl:apply-templates mode="parameters"/>
</variablelist>
</xsl:template>
<xsl:template match="parameter" mode="parameters">
<varlistentry>
<term>
<option><xsl:value-of select="concat($variable.prefix, @name)"/></option>
</term>
<listitem>
- <xsl:apply-templates select="longdesc" mode="parameters"/>
- <para>
- <xsl:apply-templates select="content" mode="parameters"/>
- </para>
+ <xsl:apply-templates select="longdesc" mode="parameters"/>
+ <para>
+ <xsl:apply-templates select="content" mode="parameters"/>
+ </para>
</listitem>
</varlistentry>
</xsl:template>
<xsl:template match="longdesc" mode="parameters">
<xsl:apply-templates select="node()" mode="longdesc"/>
</xsl:template>
<xsl:template match="shortdesc" mode="parameters">
<xsl:apply-templates select="text()" mode="parameters"/>
</xsl:template>
<xsl:template match="content" mode="parameters">
<xsl:if test="@type != '' or @default != ''">
<xsl:text> (</xsl:text>
<xsl:if test="../@unique = 1">
- <xsl:text>unique, </xsl:text>
+ <xsl:text>unique, </xsl:text>
</xsl:if>
<xsl:choose>
- <xsl:when test="../@required = 1">
- <xsl:text>required</xsl:text>
- </xsl:when>
- <xsl:otherwise>
- <xsl:text>optional</xsl:text>
- </xsl:otherwise>
+ <xsl:when test="../@required = 1">
+ <xsl:text>required</xsl:text>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:text>optional</xsl:text>
+ </xsl:otherwise>
</xsl:choose>
<xsl:text>, </xsl:text>
<xsl:if test="@parameter != ''">
- <xsl:value-of select="@type"/>
- <xsl:text>, </xsl:text>
+ <xsl:value-of select="@type"/>
+ <xsl:text>, </xsl:text>
</xsl:if>
<xsl:if test="@type != ''">
- <xsl:value-of select="@type"/>
- <xsl:text>, </xsl:text>
+ <xsl:value-of select="@type"/>
+ <xsl:text>, </xsl:text>
</xsl:if>
<xsl:choose>
- <xsl:when test="@default != ''">
- <xsl:text>default </xsl:text>
- <xsl:if test="@type = 'string'">
- <xsl:text>&quot;</xsl:text>
- </xsl:if>
- <code>
- <xsl:value-of select="@default"/>
- </code>
- <xsl:if test="@type = 'string'">
- <xsl:text>&quot;</xsl:text>
- </xsl:if>
- </xsl:when>
- <xsl:when test="@type='boolean' and @default = ''">
- <xsl:text>default </xsl:text>
- <code>false</code>
- </xsl:when>
- <xsl:otherwise>
- <xsl:text>no default</xsl:text>
- </xsl:otherwise>
+ <xsl:when test="@default != ''">
+ <xsl:text>default </xsl:text>
+ <xsl:if test="@type = 'string'">
+ <xsl:text>&quot;</xsl:text>
+ </xsl:if>
+ <code>
+ <xsl:value-of select="@default"/>
+ </code>
+ <xsl:if test="@type = 'string'">
+ <xsl:text>&quot;</xsl:text>
+ </xsl:if>
+ </xsl:when>
+ <xsl:when test="@type='boolean' and @default = ''">
+ <xsl:text>default </xsl:text>
+ <code>false</code>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:text>no default</xsl:text>
+ </xsl:otherwise>
</xsl:choose>
<xsl:text>)</xsl:text>
</xsl:if>
</xsl:template>
<!-- Mode Actions -->
<xsl:template match="resource-agent" mode="actions">
<refsection>
<title>Supported Actions</title>
<xsl:choose>
- <xsl:when test="actions">
- <xsl:apply-templates select="actions" mode="actions"/>
- </xsl:when>
- <xsl:otherwise>
- <!-- This should actually never happen. Every RA must
- advertise the actions it supports. -->
- <para>
- <xsl:text>This resource agent does not advertise any supported actions.</xsl:text>
- </para>
- </xsl:otherwise>
+ <xsl:when test="actions">
+ <xsl:apply-templates select="actions" mode="actions"/>
+ </xsl:when>
+ <xsl:otherwise>
+ <!-- This should actually never happen. Every RA must
+ advertise the actions it supports. -->
+ <para>
+ <xsl:text>This resource agent does not advertise any supported actions.</xsl:text>
+ </para>
+ </xsl:otherwise>
</xsl:choose>
</refsection>
</xsl:template>
<xsl:template match="actions" mode="actions">
<para>This resource agent supports the following actions (operations):</para>
<variablelist>
<xsl:apply-templates select="action" mode="actions"/>
</variablelist>
</xsl:template>
<xsl:template match="action" mode="actions">
<varlistentry>
<term>
<option>
- <xsl:value-of select="@name"/>
- <xsl:if test="@role != ''">
- <xsl:text> (</xsl:text>
- <xsl:value-of select="@role"/>
- <xsl:text> role)</xsl:text>
- </xsl:if>
+ <xsl:value-of select="@name"/>
+ <xsl:if test="@role != ''">
+ <xsl:text> (</xsl:text>
+ <xsl:value-of select="@role"/>
+ <xsl:text> role)</xsl:text>
+ </xsl:if>
</option>
</term>
<listitem>
<para>
- <xsl:choose>
- <xsl:when test="@name = 'start'">
- <xsl:text>Starts the resource.</xsl:text>
- </xsl:when>
- <xsl:when test="@name = 'stop'">
- <xsl:text>Stops the resource.</xsl:text>
- </xsl:when>
- <xsl:when test="@name = 'status'">
- <xsl:text>Performs a status check.</xsl:text>
- </xsl:when>
- <xsl:when test="@name = 'monitor'">
- <xsl:text>Performs a detailed status check.</xsl:text>
- </xsl:when>
- <xsl:when test="@name = 'promote'">
- <xsl:text>Promotes the resource to the Master role.</xsl:text>
- </xsl:when>
- <xsl:when test="@name = 'demote'">
- <xsl:text>Demotes the resource to the Slave role.</xsl:text>
- </xsl:when>
- <xsl:when test="@name = 'migrate_from'">
- <xsl:text>Executes steps necessary for migrating the
- resource </xsl:text>
- <emphasis>away from</emphasis>
- <xsl:text> the node.</xsl:text>
- </xsl:when>
- <xsl:when test="@name = 'migrate_to'">
- <xsl:text>Executes steps necessary for migrating the
- resource </xsl:text>
- <emphasis>to</emphasis>
- <xsl:text> the node.</xsl:text>
- </xsl:when>
- <xsl:when test="@name = 'validate-all'">
- <xsl:text>Performs a validation of the resource configuration.</xsl:text>
- </xsl:when>
- <xsl:when test="@name = 'meta-data'">
- <xsl:text>Retrieves resource agent metadata (internal use only).</xsl:text>
- </xsl:when>
- </xsl:choose>
- <xsl:if test="@timeout != ''">
- <xsl:text> Suggested minimum timeout: </xsl:text>
- <xsl:value-of select="@timeout"/>
- <xsl:text>.</xsl:text>
- </xsl:if>
- <xsl:if test="@interval != ''">
- <xsl:text> Suggested interval: </xsl:text>
- <xsl:value-of select="@interval"/>
- <xsl:text>.</xsl:text>
- </xsl:if>
+ <xsl:choose>
+ <xsl:when test="@name = 'start'">
+ <xsl:text>Starts the resource.</xsl:text>
+ </xsl:when>
+ <xsl:when test="@name = 'stop'">
+ <xsl:text>Stops the resource.</xsl:text>
+ </xsl:when>
+ <xsl:when test="@name = 'status'">
+ <xsl:text>Performs a status check.</xsl:text>
+ </xsl:when>
+ <xsl:when test="@name = 'monitor'">
+ <xsl:text>Performs a detailed status check.</xsl:text>
+ </xsl:when>
+ <xsl:when test="@name = 'promote'">
+ <xsl:text>Promotes the resource to the Master role.</xsl:text>
+ </xsl:when>
+ <xsl:when test="@name = 'demote'">
+ <xsl:text>Demotes the resource to the Slave role.</xsl:text>
+ </xsl:when>
+ <xsl:when test="@name = 'migrate_from'">
+ <xsl:text>Executes steps necessary for migrating the
+ resource </xsl:text>
+ <emphasis>away from</emphasis>
+ <xsl:text> the node.</xsl:text>
+ </xsl:when>
+ <xsl:when test="@name = 'migrate_to'">
+ <xsl:text>Executes steps necessary for migrating the
+ resource </xsl:text>
+ <emphasis>to</emphasis>
+ <xsl:text> the node.</xsl:text>
+ </xsl:when>
+ <xsl:when test="@name = 'validate-all'">
+ <xsl:text>Performs a validation of the resource configuration.</xsl:text>
+ </xsl:when>
+ <xsl:when test="@name = 'meta-data'">
+ <xsl:text>Retrieves resource agent metadata (internal use only).</xsl:text>
+ </xsl:when>
+ </xsl:choose>
+ <xsl:if test="@timeout != ''">
+ <xsl:text> Suggested minimum timeout: </xsl:text>
+ <xsl:value-of select="@timeout"/>
+ <xsl:text>.</xsl:text>
+ </xsl:if>
+ <xsl:if test="@interval != ''">
+ <xsl:text> Suggested interval: </xsl:text>
+ <xsl:value-of select="@interval"/>
+ <xsl:text>.</xsl:text>
+ </xsl:if>
</para>
</listitem>
</varlistentry>
</xsl:template>
<!-- Mode Example CRM Shell-->
<xsl:template match="resource-agent" mode="examplecrmsh">
<refsection>
<title>Example CRM Shell</title>
<para>
- <xsl:text>The following is an example configuration for a </xsl:text>
- <xsl:value-of select="@name"/>
- <xsl:text> resource using the </xsl:text>
- <citerefentry><refentrytitle>crm</refentrytitle><manvolnum>8</manvolnum></citerefentry>
- <xsl:text> shell:</xsl:text>
+ <xsl:text>The following is an example configuration for a </xsl:text>
+ <xsl:value-of select="@name"/>
+ <xsl:text> resource using the </xsl:text>
+ <citerefentry><refentrytitle>crm</refentrytitle><manvolnum>8</manvolnum></citerefentry>
+ <xsl:text> shell:</xsl:text>
</para>
<programlisting>
- <xsl:text>primitive p_</xsl:text>
- <xsl:value-of select="@name"/>
- <xsl:text> </xsl:text>
- <xsl:value-of select="$class"/>
- <xsl:text>:</xsl:text>
- <xsl:value-of select="$provider"/>
- <xsl:text>:</xsl:text>
- <xsl:choose>
- <xsl:when test="parameters/parameter[@required = 1]">
- <xsl:value-of select="@name"/>
- <xsl:text> \
+ <xsl:text>primitive p_</xsl:text>
+ <xsl:value-of select="@name"/>
+ <xsl:text> </xsl:text>
+ <xsl:value-of select="$class"/>
+ <xsl:text>:</xsl:text>
+ <xsl:value-of select="$provider"/>
+ <xsl:text>:</xsl:text>
+ <xsl:choose>
+ <xsl:when test="parameters/parameter[@required = 1]">
+ <xsl:value-of select="@name"/>
+ <xsl:text> \
params \
</xsl:text>
- <xsl:apply-templates select="parameters" mode="examplecrmsh"/>
- </xsl:when>
- <xsl:otherwise>
- <xsl:value-of select="@name"/><xsl:text> \</xsl:text>
- </xsl:otherwise>
- </xsl:choose>
- <!-- Insert a suggested allow-migrate meta attribute if the
- resource agent supports migration -->
- <xsl:if test="actions/action[@name = 'migrate_from' or @name = 'migrate_to']">
- <xsl:text>
+ <xsl:apply-templates select="parameters" mode="examplecrmsh"/>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:value-of select="@name"/><xsl:text> \</xsl:text>
+ </xsl:otherwise>
+ </xsl:choose>
+ <!-- Insert a suggested allow-migrate meta attribute if the
+ resource agent supports migration -->
+ <xsl:if test="actions/action[@name = 'migrate_from' or @name = 'migrate_to']">
+ <xsl:text>
meta allow-migrate="true" \</xsl:text>
- </xsl:if>
- <xsl:apply-templates select="actions" mode="examplecrmsh"/>
+ </xsl:if>
+ <xsl:apply-templates select="actions" mode="examplecrmsh"/>
</programlisting>
<!-- Insert a master/slave set definition if the resource
agent supports promotion and demotion -->
<xsl:if test="actions/action/@name = 'promote' and actions/action/@name = 'demote'">
- <programlisting>
- <xsl:text>ms ms_</xsl:text>
- <xsl:value-of select="@name"/>
- <xsl:text> p_</xsl:text>
- <xsl:value-of select="@name"/>
- <xsl:text> \
+ <programlisting>
+ <xsl:text>ms ms_</xsl:text>
+ <xsl:value-of select="@name"/>
+ <xsl:text> p_</xsl:text>
+ <xsl:value-of select="@name"/>
+ <xsl:text> \
meta notify="true" interleave="true"</xsl:text>
- </programlisting>
+ </programlisting>
</xsl:if>
</refsection>
</xsl:template>
<xsl:template match="parameters" mode="examplecrmsh">
<xsl:apply-templates select="parameter[@required = 1]" mode="examplecrmsh"/>
</xsl:template>
<xsl:template match="parameter" mode="examplecrmsh">
<xsl:text> </xsl:text>
<xsl:value-of select="@name"/>
<xsl:text>=</xsl:text>
<xsl:apply-templates select="content" mode="examplecrmsh"/>
<xsl:text> \</xsl:text>
<xsl:if test="following-sibling::parameter/@required = 1">
<xsl:text>
</xsl:text>
</xsl:if>
</xsl:template>
<xsl:template match="content" mode="examplecrmsh">
<xsl:choose>
<xsl:when test="@default != ''">
- <xsl:text>"</xsl:text>
- <xsl:value-of select="@default"/>
- <xsl:text>"</xsl:text>
+ <xsl:text>"</xsl:text>
+ <xsl:value-of select="@default"/>
+ <xsl:text>"</xsl:text>
</xsl:when>
<xsl:otherwise>
- <replaceable><xsl:value-of select="@type"/></replaceable>
+ <replaceable><xsl:value-of select="@type"/></replaceable>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<xsl:template match="actions" mode="examplecrmsh">
<!-- In the CRM shell example, show only the monitor action -->
<xsl:apply-templates select="action[@name = 'monitor']" mode="examplecrmsh"/>
</xsl:template>
<xsl:template match="action" mode="examplecrmsh">
<xsl:text>
op </xsl:text>
<xsl:value-of select="@name"/>
<xsl:text> </xsl:text>
<xsl:apply-templates select="@*" mode="examplecrmsh"/>
<xsl:if test="following-sibling::action/@name = 'monitor'">
<xsl:text>\</xsl:text>
</xsl:if>
</xsl:template>
<xsl:template match="action/@*" mode="examplecrmsh">
<xsl:choose>
<xsl:when test="name() = 'name'"><!-- suppress --></xsl:when>
<xsl:otherwise>
- <xsl:value-of select="name()"/>
- <xsl:text>="</xsl:text>
- <xsl:value-of select="current()"/>
- <xsl:text>" </xsl:text>
+ <xsl:value-of select="name()"/>
+ <xsl:text>="</xsl:text>
+ <xsl:value-of select="current()"/>
+ <xsl:text>" </xsl:text>
</xsl:otherwise>
</xsl:choose>
<xsl:if test="following-sibling::*">
<xsl:text> </xsl:text>
</xsl:if>
</xsl:template>
<xsl:template match="longdesc" mode="examplecrmsh"/>
<xsl:template match="shortdesc" mode="examplecrmsh"/>
<!-- Mode Example PCS-->
<xsl:template match="resource-agent" mode="examplepcs">
<refsection>
<title>Example PCS</title>
<para>
- <xsl:text>The following is an example configuration for a </xsl:text>
- <xsl:value-of select="@name"/>
- <xsl:text> resource using </xsl:text>
- <citerefentry><refentrytitle>pcs</refentrytitle><manvolnum>8</manvolnum></citerefentry>
+ <xsl:text>The following is an example configuration for a </xsl:text>
+ <xsl:value-of select="@name"/>
+ <xsl:text> resource using </xsl:text>
+ <citerefentry><refentrytitle>pcs</refentrytitle><manvolnum>8</manvolnum></citerefentry>
</para>
<programlisting>
- <xsl:text>pcs resource create p_</xsl:text>
- <xsl:value-of select="@name"/>
- <xsl:text> </xsl:text>
- <xsl:value-of select="$class"/>
- <xsl:text>:</xsl:text>
- <xsl:value-of select="$provider"/>
- <xsl:text>:</xsl:text>
- <xsl:choose>
- <xsl:when test="parameters/parameter[@required = 1]">
- <xsl:value-of select="@name"/>
- <xsl:text> \
+ <xsl:text>pcs resource create p_</xsl:text>
+ <xsl:value-of select="@name"/>
+ <xsl:text> </xsl:text>
+ <xsl:value-of select="$class"/>
+ <xsl:text>:</xsl:text>
+ <xsl:value-of select="$provider"/>
+ <xsl:text>:</xsl:text>
+ <xsl:choose>
+ <xsl:when test="parameters/parameter[@required = 1]">
+ <xsl:value-of select="@name"/>
+ <xsl:text> \
</xsl:text>
- <xsl:apply-templates select="parameters" mode="examplepcs"/>
- </xsl:when>
- <xsl:otherwise>
- <xsl:value-of select="@name"/><xsl:text> \</xsl:text>
- </xsl:otherwise>
- </xsl:choose>
- <xsl:apply-templates select="actions" mode="examplepcs"/>
+ <xsl:apply-templates select="parameters" mode="examplepcs"/>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:value-of select="@name"/><xsl:text> \</xsl:text>
+ </xsl:otherwise>
+ </xsl:choose>
+ <xsl:apply-templates select="actions" mode="examplepcs"/>
<!-- Insert a master/slave set definition if the resource
agent supports promotion and demotion -->
<xsl:if test="actions/action/@name = 'promote' and actions/action/@name = 'demote'">
- <xsl:text>--master</xsl:text>
+ <xsl:text>--master</xsl:text>
</xsl:if>
</programlisting>
</refsection>
</xsl:template>
<xsl:template match="parameters" mode="examplepcs">
<xsl:apply-templates select="parameter[@required = 1]" mode="examplepcs"/>
</xsl:template>
<xsl:template match="parameter" mode="examplepcs">
<xsl:text> </xsl:text>
<xsl:value-of select="@name"/>
<xsl:text>=</xsl:text>
<xsl:apply-templates select="content" mode="examplepcs"/>
<xsl:text> \</xsl:text>
<xsl:if test="following-sibling::parameter/@required = 1">
<xsl:text>
</xsl:text>
</xsl:if>
</xsl:template>
<xsl:template match="content" mode="examplepcs">
<xsl:choose>
<xsl:when test="@default != ''">
- <xsl:text>"</xsl:text>
- <xsl:value-of select="@default"/>
- <xsl:text>"</xsl:text>
+ <xsl:text>"</xsl:text>
+ <xsl:value-of select="@default"/>
+ <xsl:text>"</xsl:text>
</xsl:when>
<xsl:otherwise>
- <replaceable><xsl:value-of select="@type"/></replaceable>
+ <replaceable><xsl:value-of select="@type"/></replaceable>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<xsl:template match="actions" mode="examplepcs">
<!-- In the CRM shell example, show only the monitor action -->
<xsl:apply-templates select="action[@name = 'monitor']" mode="examplepcs"/>
</xsl:template>
<xsl:template match="action" mode="examplepcs">
<xsl:text>
op </xsl:text>
<xsl:value-of select="@name"/>
<xsl:text> </xsl:text>
<xsl:apply-templates select="@*" mode="examplepcs"/>
<xsl:if test="following-sibling::action/@name = 'monitor'">
<xsl:text>\</xsl:text>
</xsl:if>
</xsl:template>
<xsl:template match="action/@*" mode="examplepcs">
<xsl:choose>
<xsl:when test="name() = 'name'"><!-- suppress --></xsl:when>
<xsl:otherwise>
- <xsl:value-of select="name()"/>
- <xsl:text>="</xsl:text>
- <xsl:value-of select="current()"/>
- <xsl:text>" </xsl:text>
+ <xsl:value-of select="name()"/>
+ <xsl:text>="</xsl:text>
+ <xsl:value-of select="current()"/>
+ <xsl:text>" </xsl:text>
</xsl:otherwise>
</xsl:choose>
<xsl:if test="following-sibling::*">
<xsl:text> </xsl:text>
</xsl:if>
</xsl:template>
<xsl:template match="longdesc" mode="examplepcs"/>
<xsl:template match="shortdesc" mode="examplepcs"/>
<xsl:template match="resource-agent" mode="seealso">
<refsection>
<title>See also</title>
<para>
- <ulink>
- <xsl:attribute name="url">
- <xsl:text>http://www.linux-ha.org/wiki/</xsl:text>
- <xsl:value-of select="@name"/>
- <xsl:text>_(resource_agent)</xsl:text>
- </xsl:attribute>
- </ulink>
+ <ulink>
+ <xsl:attribute name="url">
+ <xsl:text>http://clusterlabs.org/</xsl:text>
+ </xsl:attribute>
+ </ulink>
</para>
</refsection>
</xsl:template>
</xsl:stylesheet>
diff --git a/heartbeat/Delay b/heartbeat/Delay
index f9d303bf8..ab0796579 100755
--- a/heartbeat/Delay
+++ b/heartbeat/Delay
@@ -1,223 +1,223 @@
#!/bin/sh
#
#
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
# License: GNU General Public License (GPL)
#
# This script is a test resource for introducing delay.
#
# usage: $0 {start|stop|status|monitor|meta-data}
#
# OCF parameters are as below:
# OCF_RESKEY_startdelay
# OCF_RESKEY_stopdelay
# OCF_RESKEY_mondelay
#
#
# OCF_RESKEY_startdelay defaults to 20 (seconds)
# OCF_RESKEY_stopdelay defaults to $OCF_RESKEY_startdelay
# OCF_RESKEY_mondelay defaults to $OCF_RESKEY_startdelay
#
#
# This is really a test resource script.
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
usage() {
cat <<-!
usage: $0 {start|stop|status|monitor|meta-data|validate-all}
!
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="Delay">
<version>1.0</version>
<longdesc lang="en">
This script is a test resource for introducing delay.
</longdesc>
<shortdesc lang="en">Waits for a defined timespan</shortdesc>
<parameters>
<parameter name="startdelay" unique="0" required="0">
<longdesc lang="en">
How long in seconds to delay on start operation.
</longdesc>
<shortdesc lang="en">Start delay</shortdesc>
<content type="integer" default="20" />
</parameter>
<parameter name="stopdelay" unique="0" required="0">
<longdesc lang="en">
How long in seconds to delay on stop operation.
Defaults to "startdelay" if unspecified.
</longdesc>
<shortdesc lang="en">Stop delay</shortdesc>
<content type="integer" default="30" />
</parameter>
<parameter name="mondelay" unique="0" required="0">
<longdesc lang="en">
How long in seconds to delay on monitor operation.
Defaults to "startdelay" if unspecified.
</longdesc>
<shortdesc lang="en">Monitor delay</shortdesc>
<content type="integer" default="30" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="30" />
<action name="stop" timeout="30" />
<action name="status" depth="0" timeout="30" interval="10" />
<action name="monitor" depth="0" timeout="30" interval="10" />
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="5" />
</actions>
</resource-agent>
END
}
Delay_stat() {
ha_pseudo_resource Delay_${OCF_RESOURCE_INSTANCE} monitor
}
Delay_Status() {
if
Delay_stat
then
ocf_log info "Delay is running OK"
return $OCF_SUCCESS
else
ocf_log info "Delay is stopped"
return $OCF_NOT_RUNNING
fi
}
Delay_Monitor() {
Delay_Validate_All -q
sleep $OCF_RESKEY_mondelay
Delay_Status
}
Delay_Start() {
if
Delay_stat
then
ocf_log info "Delay already running."
return $OCF_SUCCESS
else
Delay_Validate_All -q
ha_pseudo_resource Delay_${OCF_RESOURCE_INSTANCE} start
rc=$?
sleep $OCF_RESKEY_startdelay
if
[ $rc -ne 0 ]
then
return $OCF_ERR_PERM
fi
return $OCF_SUCCESS
fi
}
Delay_Stop() {
if
Delay_stat
then
Delay_Validate_All -q
ha_pseudo_resource Delay_${OCF_RESOURCE_INSTANCE} stop
rc=$?
sleep $OCF_RESKEY_stopdelay
if
[ $rc -ne 0 ]
then
return $OCF_ERR_PERM
fi
return $OCF_SUCCESS
else
ocf_log info "Delay already stopped."
return $OCF_SUCCESS
fi
}
# Check if all the arguments are valid numbers, a string is considered valid if:
# 1. It does not contain any character but digits and period ".";
# 2. The period "." does not occur more than once
Are_Valid_Numbers() {
for i in "$@"; do
echo $i |grep -v [^0-9.] |grep -q -v [.].*[.]
if test $? -ne 0; then
return $OCF_ERR_ARGS
fi
done
return $OCF_SUCCESS
}
Delay_Validate_All() {
# Be quiet when specified -q option _and_ validation succeded
getopts "q" option
if test $option = "q"; then
quiet=yes
else
quiet=no
fi
shift $(($OPTIND -1))
if Are_Valid_Numbers $OCF_RESKEY_startdelay $OCF_RESKEY_stopdelay \
$OCF_RESKEY_mondelay; then
if test $quiet = "no"; then
echo "Validate OK"
fi
# _Return_ on validation success
return $OCF_SUCCESS
else
ocf_exit_reason "Some of the instance parameters are invalid"
# _Exit_ on validation failure
exit $OCF_ERR_ARGS
fi
}
if [ $# -ne 1 ]; then
usage
exit $OCF_ERR_ARGS
fi
: ${OCF_RESKEY_startdelay=20}
: ${OCF_RESKEY_stopdelay=$OCF_RESKEY_startdelay}
: ${OCF_RESKEY_mondelay=$OCF_RESKEY_startdelay}
case $1 in
meta-data) meta_data
exit $OCF_SUCCESS
;;
start) Delay_Start
;;
stop) Delay_Stop
;;
monitor) Delay_Monitor
;;
status) Delay_Status
;;
validate-all) Delay_Validate_All
;;
usage) usage
exit $OCF_SUCCESS
;;
*) usage
exit $OCF_ERR_ARGS
;;
esac
exit $?
diff --git a/heartbeat/EvmsSCC b/heartbeat/EvmsSCC
index 802bac470..21dfc7bde 100755
--- a/heartbeat/EvmsSCC
+++ b/heartbeat/EvmsSCC
@@ -1,216 +1,216 @@
#!/bin/sh
#
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
# License: GNU General Public License (GPL)
#
# EvmsSCC
# Description: Runs evms_activate in a heartbeat cluster to activate a
# EVMS shared cluster container in the cluster.
# Original Author: Jo De Baer (jdebaer@novell.com)
# Original Release: 06 Nov 2006
#
# usage: ./EvmsSCC {start|stop|status|monitor|meta-data}
#
# The goal of this resource agent is to provoke the creation of device file
# in /dev/emvs which correspond to EVMS2 volumes that reside in a EVMS2 shared
# cluster container. As such it should be run as a clone resource in the
# cluster. Logic inside the resource agent will make sure that "evms_activate"
# is run on only one node in the cluster, both at cluster startup time as well
# as when a node joins the cluster.
#
# Typically, resources that need to mount EVMS2 volumes should run after this
# resource agent has finished it's run. As such those resources should be made
# "dependent" on this resource agent by the cluster administrator. An example
# of resources that should depend on this resource agent are Filesystem resource
# agent that mount OCFS2 volumes that reside on EVMS2 volumes in a shared
# EVMS2 cluster container.
#
# For this resource agent to do it's job correctly, evmsd must be running on
# the node where the agent is started. Usually evmsd is started by the cluster
# software via a respawn statement in /etc/ha.d/ha.cf. If you encounter timing
# issues where evmsd is not yet started but where the cluster already starts
# the EvmsSCC clone, then you should comment out the evmsd respawn statement
# in /etc/ha.d/ha.cf and start evmsd on each node in the cluster via a separate
# clone resource agent. The EvmsSCC resource agent cloneset should then be made
# dependent to this evmsd cloneset. This will guarantee that emvsd is running
# before EvmsSCC is started, on each node in the cluster.
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
# Utilities used by this script
CUT=cut
EVMSACTIVATE=evms_activate
usage() {
cat <<-EOT
usage: $0 {start|stop|status|monitor|meta-data}
EOT
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="EvmsSCC">
<version>1.0</version>
<longdesc lang="en">
Deprecation warning: EVMS is no longer actively maintained and should not be used. This agent is deprecated and may be removed from a future release. --
Resource script for EVMS shared cluster container. It runs evms_activate on one node in the cluster.
</longdesc>
<shortdesc lang="en">Manages EVMS Shared Cluster Containers (SCCs) (deprecated)</shortdesc>
<parameters>
<parameter name="ignore_deprecation">
<longdesc lang="en">
If set to true, suppresses the deprecation warning for this agent.
</longdesc>
<shortdesc lang="en">Suppress deprecation warning</shortdesc>
<content type="boolean" default="false" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="60" />
<action name="stop" timeout="60" />
<action name="notify" timeout="60" />
<action name="status" depth="0" timeout="10" interval="10" />
<action name="monitor" depth="0" timeout="10" interval="10" />
<action name="meta-data" timeout="5" />
</actions>
</resource-agent>
END
}
EvmsSCC_status()
{
# At the moment we don't support monitoring EVMS activations. We just return "not running" to cope with the pre-start monitor call.
return $OCF_NOT_RUNNING
}
EvmsSCC_notify()
{
local n_type="$OCF_RESKEY_CRM_meta_notify_type"
local n_op="$OCF_RESKEY_CRM_meta_notify_operation"
local n_active="$OCF_RESKEY_CRM_meta_notify_active_uname"
local n_stop="$OCF_RESKEY_CRM_meta_notify_stop_uname"
local n_start="$OCF_RESKEY_CRM_meta_notify_start_uname"
case "$n_type" in
pre)
case "$n_op" in
start) ocf_log debug "EvmsSCC: Notify: Starting node(s): $n_start."
EvmsSCC_start_notify_common
;;
esac
;;
esac
return $OCF_SUCCESS
}
EvmsSCC_start()
{
local n_type="$OCF_RESKEY_CRM_meta_notify_type"
local n_op="$OCF_RESKEY_CRM_meta_notify_operation"
local n_active="$OCF_RESKEY_CRM_meta_notify_active_uname"
local n_stop="$OCF_RESKEY_CRM_meta_notify_stop_uname"
local n_start="$OCF_RESKEY_CRM_meta_notify_start_uname"
ocf_log debug "EvmsSCC: Start: starting node(s): $n_start."
EvmsSCC_start_notify_common
return $OCF_SUCCESS
}
EvmsSCC_stop()
{
return $OCF_SUCCESS
}
EvmsSCC_start_notify_common()
{
local n_myself=${HA_CURHOST:-$(uname -n | tr A-Z a-z)}
ocf_log debug "EvmsSCC: Start_Notify: I am node $n_myself."
n_active="$n_active $n_start"
case " $n_active " in
*" $n_myself "*) ;;
*) ocf_log err "EvmsSCC: $n_myself (local) not on active list!"
return $OCF_ERR_GENERIC
;;
esac
#pick the first node from the starting list
#when the cluster boots this will be one of the many booting nodes
#when a node later joins the cluster, this will be the joining node
local n_first=$(echo $n_start | cut -d ' ' -f 1)
ocf_log debug "EvmsSCC: Start_Notify: First node in starting list is $n_first."
if [ "$n_myself" = "$n_first" ] ; then
ocf_log debug "EvmsSCC: Start_Notify: I am running ${EVMSACTIVATE}."
while true ; do
if ! ${EVMSACTIVATE} -q 2> /dev/null ; then
SLEEP_TIME=$(($(ocf_maybe_random) % 40))
ocf_log info "EvmsSCC: Evms call failed - sleeping for $SLEEP_TIME seconds and then trying again."
sleep $SLEEP_TIME
else
break
fi
done
fi
return $OCF_SUCCESS
}
# Check the arguments passed to this script
if
[ $# -ne 1 ]
then
usage
exit $OCF_ERR_ARGS
fi
OP=$1
case $OP in
meta-data) meta_data
exit $OCF_SUCCESS
;;
usage) usage
exit $OCF_SUCCESS
;;
esac
# Be obnoxious, log deprecation warning on every invocation (unless
# suppressed by resource configuration).
ocf_deprecated
check_binary $CUT
check_binary $EVMSACTIVATE
case $OP in
start) EvmsSCC_start
;;
notify) EvmsSCC_notify
;;
stop) EvmsSCC_stop
;;
status|monitor) EvmsSCC_status
;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
exit $?
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index 9baf14073..d009329cb 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -1,883 +1,883 @@
#!/bin/sh
#
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
# License: GNU General Public License (GPL)
#
# Filesystem
# Description: Manages a Filesystem on a shared storage medium.
# Original Author: Eric Z. Ayers (eric.ayers@compgen.com)
# Original Release: 25 Oct 2000
#
# usage: ./Filesystem {start|stop|status|monitor|validate-all|meta-data}
#
# OCF parameters are as below:
# OCF_RESKEY_device
# OCF_RESKEY_directory
# OCF_RESKEY_fstype
# OCF_RESKEY_options
# OCF_RESKEY_statusfile_prefix
# OCF_RESKEY_run_fsck
# OCF_RESKEY_fast_stop
# OCF_RESKEY_force_clones
#
#OCF_RESKEY_device : name of block device for the filesystem. e.g. /dev/sda1, /dev/md0
# Or a -U or -L option for mount, or an NFS mount specification
#OCF_RESKEY_directory : the mount point for the filesystem
#OCF_RESKEY_fstype : optional name of the filesystem type. e.g. ext2
#OCF_RESKEY_options : options to be given to the mount command via -o
#OCF_RESKEY_statusfile_prefix : the prefix used for a status file for monitoring
#OCF_RESKEY_run_fsck : fsck execution mode: auto(default)/force/no
#OCF_RESKEY_fast_stop : fast stop: yes(default)/no
#OCF_RESKEY_force_clones : allow running the resource as clone. e.g. local xfs mounts
# for each brick in a glusterfs setup
#
#
# This assumes you want to manage a filesystem on a shared (SCSI) bus,
# on a replicated device (such as DRBD), or a network filesystem (such
# as NFS or Samba).
#
# Do not put this filesystem in /etc/fstab. This script manages all of
# that for you.
#
# NOTE: If 2 or more nodes mount the same file system read-write, and
# that file system is not designed for that specific purpose
# (such as GFS or OCFS2), and is not a network file system like
# NFS or Samba, then the filesystem is going to become
# corrupted.
#
# As a result, you should use this together with the stonith
# option and redundant, independent communications paths.
#
# If you don't do this, don't blame us when you scramble your
# disk.
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
# Defaults
DFLT_STATUSDIR=".Filesystem_status/"
# Variables used by multiple methods
HOSTOS=`uname`
# The status file is going to an extra directory, by default
#
prefix=${OCF_RESKEY_statusfile_prefix}
: ${prefix:=$DFLT_STATUSDIR}
suffix="${OCF_RESOURCE_INSTANCE}"
[ "$OCF_RESKEY_CRM_meta_clone" ] &&
suffix="${suffix}_$OCF_RESKEY_CRM_meta_clone"
suffix="${suffix}_`uname -n`"
STATUSFILE=${OCF_RESKEY_directory}/$prefix$suffix
#######################################################################
usage() {
cat <<-EOT
usage: $0 {start|stop|status|monitor|validate-all|meta-data}
EOT
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="Filesystem">
<version>1.1</version>
<longdesc lang="en">
Resource script for Filesystem. It manages a Filesystem on a
shared storage medium.
The standard monitor operation of depth 0 (also known as probe)
checks if the filesystem is mounted. If you want deeper tests,
set OCF_CHECK_LEVEL to one of the following values:
10: read first 16 blocks of the device (raw read)
This doesn't exercise the filesystem at all, but the device on
which the filesystem lives. This is noop for non-block devices
such as NFS, SMBFS, or bind mounts.
20: test if a status file can be written and read
The status file must be writable by root. This is not always the
case with an NFS mount, as NFS exports usually have the
"root_squash" option set. In such a setup, you must either use
read-only monitoring (depth=10), export with "no_root_squash" on
your NFS server, or grant world write permissions on the
directory where the status file is to be placed.
</longdesc>
<shortdesc lang="en">Manages filesystem mounts</shortdesc>
<parameters>
<parameter name="device" required="1">
<longdesc lang="en">
The name of block device for the filesystem, or -U, -L options for mount, or NFS mount specification.
</longdesc>
<shortdesc lang="en">block device</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="directory" required="1">
<longdesc lang="en">
The mount point for the filesystem.
</longdesc>
<shortdesc lang="en">mount point</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="fstype" required="1">
<longdesc lang="en">
The type of filesystem to be mounted.
</longdesc>
<shortdesc lang="en">filesystem type</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="options">
<longdesc lang="en">
Any extra options to be given as -o options to mount.
For bind mounts, add "bind" here and set fstype to "none".
We will do the right thing for options such as "bind,ro".
</longdesc>
<shortdesc lang="en">options</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="statusfile_prefix">
<longdesc lang="en">
The prefix to be used for a status file for resource monitoring
with depth 20. If you don't specify this parameter, all status
files will be created in a separate directory.
</longdesc>
<shortdesc lang="en">status file prefix</shortdesc>
<content type="string" default="$DFLT_STATUSDIR" />
</parameter>
<parameter name="run_fsck">
<longdesc lang="en">
Specify how to decide whether to run fsck or not.
"auto" : decide to run fsck depending on the fstype(default)
"force" : always run fsck regardless of the fstype
"no" : do not run fsck ever.
</longdesc>
<shortdesc lang="en">run_fsck</shortdesc>
<content type="string" default="auto" />
</parameter>
<parameter name="fast_stop">
<longdesc lang="en">
Normally, we expect no users of the filesystem and the stop
operation to finish quickly. If you cannot control the filesystem
users easily and want to prevent the stop action from failing,
then set this parameter to "no" and add an appropriate timeout
for the stop operation.
</longdesc>
<shortdesc lang="en">fast stop</shortdesc>
<content type="boolean" default="yes" />
</parameter>
<parameter name="force_clones">
<longdesc lang="en">
The use of a clone setup for local filesystems is forbidden
by default. For special setups like glusterfs, cloning a mount
of a local device with a filesystem like ext4 or xfs independently
on several nodes is a valid use case.
Only set this to "true" if you know what you are doing!
</longdesc>
<shortdesc lang="en">allow running as a clone, regardless of filesystem type</shortdesc>
<content type="boolean" default="false" />
</parameter>
<parameter name="force_unmount">
<longdesc lang="en">
This option allows specifying how to handle processes that are
currently accessing the mount directory.
"true" : Default value, kill processes accessing mount point
"safe" : Kill processes accessing mount point using methods that
avoid functions that could potentially block during process
detection
"false" : Do not kill any processes.
The 'safe' option uses shell logic to walk the /procs/ directory
for pids using the mount point while the default option uses the
fuser cli tool. fuser is known to perform operations that can potentially
block if unresponsive nfs mounts are in use on the system.
</longdesc>
<shortdesc lang="en">Kill processes before unmount</shortdesc>
<content type="boolean" default="true" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="60" />
<action name="stop" timeout="60" />
<action name="notify" timeout="60" />
<action name="monitor" depth="0" timeout="40" interval="20" />
<action name="validate-all" timeout="5" />
<action name="meta-data" timeout="5" />
</actions>
</resource-agent>
END
}
#
# Make sure the kernel does the right thing with the FS buffers
# This function should be called after unmounting and before mounting
# It may not be necessary in 2.4 and later kernels, but it shouldn't hurt
# anything either...
#
# It's really a bug that you have to do this at all...
#
flushbufs() {
if have_binary $BLOCKDEV ; then
if [ "$blockdevice" = "yes" ] ; then
$BLOCKDEV --flushbufs $1
return $?
fi
fi
return 0
}
# Take advantage of /etc/mtab if present, use portable mount command
# otherwise. Normalize format to "dev mountpoint fstype".
is_bind_mount() {
echo "$options" | grep -w bind >/dev/null 2>&1
}
list_mounts() {
local inpf=""
if [ -e "/proc/mounts" ] && ! is_bind_mount; then
inpf=/proc/mounts
elif [ -f "/etc/mtab" -a -r "/etc/mtab" ]; then
inpf=/etc/mtab
fi
if [ "$inpf" ]; then
cut -d' ' -f1,2,3 < $inpf
else
$MOUNT | cut -d' ' -f1,3,5
fi
}
determine_blockdevice() {
if [ $blockdevice = "yes" ]; then
return
fi
# Get the current real device name, if possible.
# (specified devname could be -L or -U...)
case "$FSTYPE" in
nfs4|nfs|smbfs|cifs|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs|none)
: ;;
*)
DEVICE=`list_mounts | grep " $MOUNTPOINT " | cut -d' ' -f1`
if [ -b "$DEVICE" ]; then
blockdevice=yes
fi
;;
esac
}
# Lists all filesystems potentially mounted under a given path,
# excluding the path itself.
list_submounts() {
list_mounts | grep " $1/" | cut -d' ' -f2 | sort -r
}
# kernels < 2.6.26 can't handle bind remounts
bind_kernel_check() {
echo "$options" | grep -w ro >/dev/null 2>&1 ||
return
uname -r | awk -F. '
$1==2 && $2==6 {
sub("[^0-9].*","",$3);
if ($3<26)
exit(1);
}'
[ $? -ne 0 ] &&
ocf_log warn "kernel `uname -r` cannot handle read only bind mounts"
}
bind_mount() {
if is_bind_mount && [ "$options" != "-o bind" ]
then
bind_kernel_check
bind_opts=`echo $options | sed 's/bind/remount/'`
$MOUNT $bind_opts $MOUNTPOINT
else
true # make sure to return OK
fi
}
is_option() {
echo $OCF_RESKEY_options | grep -w "$1" >/dev/null 2>&1
}
is_fsck_needed() {
case $OCF_RESKEY_run_fsck in
force) true;;
no) false;;
""|auto)
case $FSTYPE in
ext4|ext4dev|ext3|reiserfs|reiser4|nss|xfs|jfs|vfat|fat|nfs4|nfs|cifs|smbfs|ocfs2|gfs2|none|lustre|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs)
false;;
*)
true;;
esac;;
*)
ocf_log warn "Invalid parameter value for fsck: '$OCF_RESKEY_run_fsck'; setting to 'auto'"
OCF_RESKEY_run_fsck="auto"
is_fsck_needed;;
esac
}
fstype_supported()
{
local support="$FSTYPE"
local rc
if [ "X${HOSTOS}" != "XOpenBSD" ];then
# skip checking /proc/filesystems for obsd
return $OCF_SUCCESS
fi
if [ -z "$FSTYPE" -o "$FSTYPE" = none ]; then
: No FSTYPE specified, rely on the system has the right file-system support already
return $OCF_SUCCESS
fi
# support fuse-filesystems (e.g. GlusterFS)
case $FSTYPE in
fuse.*|glusterfs|rozofs) support="fuse";;
esac
grep -w "$support"'$' /proc/filesystems >/dev/null
if [ $? -eq 0 ]; then
# found the fs type
return $OCF_SUCCESS
fi
# if here, we should attempt to load the module and then
# check the if the filesystem support exists again.
$MODPROBE $support >/dev/null
if [ $? -ne 0 ]; then
ocf_exit_reason "Couldn't find filesystem $FSTYPE in /proc/filesystems and failed to load kernel module"
return $OCF_ERR_INSTALLED
fi
# It is possible for the module to load and not be complete initialized
# before we check /proc/filesystems again. Give this a few trys before
# giving up entirely.
for try in $(seq 5); do
grep -w "$support"'$' /proc/filesystems >/dev/null
if [ $? -eq 0 ] ; then
# yes. found the filesystem after doing the modprobe
return $OCF_SUCCESS
fi
ocf_log debug "Unable to find support for $FSTYPE in /proc/filesystems after modprobe, trying again"
sleep 1
done
ocf_exit_reason "Couldn't find filesystem $FSTYPE in /proc/filesystems"
return $OCF_ERR_INSTALLED
}
#
# START: Start up the filesystem
#
Filesystem_start()
{
# See if the device is already mounted.
if Filesystem_status >/dev/null 2>&1 ; then
ocf_log info "Filesystem $MOUNTPOINT is already mounted."
return $OCF_SUCCESS
fi
fstype_supported || exit $OCF_ERR_INSTALLED
# Check the filesystem & auto repair.
# NOTE: Some filesystem types don't need this step... Please modify
# accordingly
if [ $blockdevice = "yes" ]; then
if [ "$DEVICE" != "/dev/null" -a ! -b "$DEVICE" ] ; then
ocf_exit_reason "Couldn't find device [$DEVICE]. Expected /dev/??? to exist"
exit $OCF_ERR_INSTALLED
fi
if is_fsck_needed; then
ocf_log info "Starting filesystem check on $DEVICE"
if [ -z "$FSTYPE" ]; then
$FSCK -p $DEVICE
else
$FSCK -t $FSTYPE -p $DEVICE
fi
# NOTE: if any errors at all are detected, it returns non-zero
# if the error is >= 4 then there is a big problem
if [ $? -ge 4 ]; then
ocf_exit_reason "Couldn't successfully fsck filesystem for $DEVICE"
return $OCF_ERR_GENERIC
fi
fi
fi
[ -d "$MOUNTPOINT" ] ||
ocf_run mkdir -p $MOUNTPOINT
if [ ! -d "$MOUNTPOINT" ] ; then
ocf_exit_reason "Couldn't find directory [$MOUNTPOINT] to use as a mount point"
exit $OCF_ERR_INSTALLED
fi
flushbufs $DEVICE
# Mount the filesystem.
case "$FSTYPE" in
none) $MOUNT $options $DEVICE $MOUNTPOINT &&
bind_mount
;;
"") $MOUNT $options $DEVICE $MOUNTPOINT ;;
*) $MOUNT -t $FSTYPE $options $DEVICE $MOUNTPOINT ;;
esac
if [ $? -ne 0 ]; then
ocf_exit_reason "Couldn't mount device [$DEVICE] as $MOUNTPOINT"
return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
}
# end of Filesystem_start
get_pids()
{
local dir=$1
local procs
local mmap_procs
if ocf_is_true "$FORCE_UNMOUNT"; then
if [ "X${HOSTOS}" = "XOpenBSD" ];then
fstat | grep $dir | awk '{print $3}'
else
$FUSER -m $dir 2>/dev/null
fi
elif [ "$FORCE_UNMOUNT" = "safe" ]; then
procs=$(find /proc/[0-9]*/ -type l -lname "${dir}/*" -or -lname "${dir}" 2>/dev/null | awk -F/ '{print $3}')
mmap_procs=$(grep " ${dir}" /proc/[0-9]*/maps | awk -F/ '{print $3}')
printf "${procs}\n${mmap_procs}" | sort | uniq
fi
}
signal_processes() {
local dir=$1
local sig=$2
local pids pid
# fuser returns a non-zero return code if none of the
# specified files is accessed or in case of a fatal
# error.
pids=$(get_pids "$dir")
if [ -z "$pids" ]; then
ocf_log info "No processes on $dir were signalled. force_unmount is set to '$FORCE_UNMOUNT'"
return
fi
for pid in $pids; do
ocf_log info "sending signal $sig to: `ps -f $pid | tail -1`"
kill -s $sig $pid
done
}
try_umount() {
local SUB=$1
$UMOUNT $umount_force $SUB
list_mounts | grep -q " $SUB " >/dev/null 2>&1 || {
ocf_log info "unmounted $SUB successfully"
return $OCF_SUCCESS
}
return $OCF_ERR_GENERIC
}
fs_stop() {
local SUB=$1 timeout=$2 sig cnt
for sig in TERM KILL; do
cnt=$((timeout/2)) # try half time with TERM
while [ $cnt -gt 0 ]; do
try_umount $SUB &&
return $OCF_SUCCESS
ocf_exit_reason "Couldn't unmount $SUB; trying cleanup with $sig"
signal_processes $SUB $sig
cnt=$((cnt-1))
sleep 1
done
done
return $OCF_ERR_GENERIC
}
#
# STOP: Unmount the filesystem
#
Filesystem_stop()
{
# See if the device is currently mounted
Filesystem_status >/dev/null 2>&1
if [ $? -eq $OCF_NOT_RUNNING ]; then
# Already unmounted, wonderful.
rc=$OCF_SUCCESS
else
# Wipe the status file, but continue with a warning if
# removal fails -- the file system might be read only
if [ $OCF_CHECK_LEVEL -eq 20 ]; then
rm -f ${STATUSFILE}
if [ $? -ne 0 ]; then
ocf_log warn "Failed to remove status file ${STATUSFILE}."
fi
fi
# Determine the real blockdevice this is mounted on (if
# possible) prior to unmounting.
determine_blockdevice
# For networked filesystems, there's merit in trying -f:
case "$FSTYPE" in
nfs4|nfs|cifs|smbfs) umount_force="-f" ;;
esac
# Umount all sub-filesystems mounted under $MOUNTPOINT/ too.
local timeout
for SUB in `list_submounts $MOUNTPOINT` $MOUNTPOINT; do
ocf_log info "Trying to unmount $SUB"
if ocf_is_true "$FAST_STOP"; then
timeout=6
else
timeout=${OCF_RESKEY_CRM_meta_timeout:="20000"}
timeout=$((timeout/1000))
fi
fs_stop $SUB $timeout
rc=$?
if [ $rc -ne $OCF_SUCCESS ]; then
ocf_exit_reason "Couldn't unmount $SUB, giving up!"
fi
done
fi
flushbufs $DEVICE
return $rc
}
# end of Filesystem_stop
#
# STATUS: is the filesystem mounted or not?
#
Filesystem_status()
{
if list_mounts | grep -q " $MOUNTPOINT " >/dev/null 2>&1; then
rc=$OCF_SUCCESS
msg="$MOUNTPOINT is mounted (running)"
else
rc=$OCF_NOT_RUNNING
msg="$MOUNTPOINT is unmounted (stopped)"
fi
# Special case "monitor" to check whether the UUID cached and
# on-disk still match?
case "$OP" in
status) ocf_log info "$msg";;
esac
return $rc
}
# end of Filesystem_status
# Note: the read/write tests below will stall in case the
# underlying block device (or in the case of a NAS mount, the
# NAS server) has gone away. In that case, if I/O does not
# return to normal in time, the operation hits its timeout
# and it is up to the CRM to initiate appropriate recovery
# actions (such as fencing the node).
#
# MONITOR 10: read the device
#
Filesystem_monitor_10()
{
if [ "$blockdevice" = "no" ] ; then
ocf_log warn "$DEVICE is not a block device, monitor 10 is noop"
return $OCF_SUCCESS
fi
dd_opts="iflag=direct bs=4k count=1"
err_output=`dd if=$DEVICE $dd_opts 2>&1 >/dev/null`
if [ $? -ne 0 ]; then
ocf_exit_reason "Failed to read device $DEVICE"
ocf_log err "dd said: $err_output"
return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
}
#
# MONITOR 20: write and read a status file
#
Filesystem_monitor_20()
{
if [ "$blockdevice" = "no" ] ; then
# O_DIRECT not supported on cifs/smbfs
dd_opts="oflag=sync bs=4k conv=fsync,sync"
else
# Writing to the device in O_DIRECT mode is imperative
# to bypass caches.
dd_opts="oflag=direct,sync bs=4k conv=fsync,sync"
fi
status_dir=`dirname $STATUSFILE`
[ -d "$status_dir" ] || mkdir -p "$status_dir"
err_output=`echo "${OCF_RESOURCE_INSTANCE}" | dd of=${STATUSFILE} $dd_opts 2>&1`
if [ $? -ne 0 ]; then
ocf_exit_reason "Failed to write status file ${STATUSFILE}"
ocf_log err "dd said: $err_output"
return $OCF_ERR_GENERIC
fi
test -f ${STATUSFILE}
if [ $? -ne 0 ]; then
ocf_exit_reason "Cannot stat the status file ${STATUSFILE}"
return $OCF_ERR_GENERIC
fi
cat ${STATUSFILE} > /dev/null
if [ $? -ne 0 ]; then
ocf_exit_reason "Cannot read the status file ${STATUSFILE}"
return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
}
Filesystem_monitor()
{
Filesystem_status
rc=$?
if [ $rc -ne $OCF_SUCCESS ]; then
return $rc
fi
if [ $rc -eq $OCF_SUCCESS -a $OCF_CHECK_LEVEL -gt 0 ]; then
case "$OCF_CHECK_LEVEL" in
10) Filesystem_monitor_10; rc=$?;;
20) Filesystem_monitor_20; rc=$?;;
*)
ocf_exit_reason "unsupported monitor level $OCF_CHECK_LEVEL"
rc=$OCF_ERR_CONFIGURED
;;
esac
fi
return $rc
}
# end of Filesystem_monitor
#
# VALIDATE_ALL: Are the instance parameters valid?
# FIXME!! The only part that's useful is the return code.
# This code always returns $OCF_SUCCESS (!)
#
Filesystem_validate_all()
{
if [ -n "$MOUNTPOINT" ] && [ ! -d "$MOUNTPOINT" ]; then
ocf_log warn "Mountpoint $MOUNTPOINT does not exist"
fi
# Check if the $FSTYPE is workable
# NOTE: Without inserting the $FSTYPE module, this step may be imprecise
# TODO: This is Linux specific crap.
if [ ! -z "$FSTYPE" -a "$FSTYPE" != none ]; then
cut -f2 /proc/filesystems |grep -q ^$FSTYPE$
if [ $? -ne 0 ]; then
modpath=/lib/modules/`uname -r`
moddep=$modpath/modules.dep
# Do we have $FSTYPE in modules.dep?
cut -d' ' -f1 $moddep |grep -q "^$modpath.*$FSTYPE\.k\?o:$"
if [ $? -ne 0 ]; then
ocf_log info "It seems we do not have $FSTYPE support"
fi
fi
fi
# If we are supposed to do monitoring with status files, then
# we need a utility to write in O_DIRECT mode.
if [ $OCF_CHECK_LEVEL -gt 0 ]; then
check_binary dd
# Note: really old coreutils version do not support
# the "oflag" option for dd. We don't check for that
# here. In case dd does not support oflag, monitor is
# bound to fail, with dd spewing an error message to
# the logs. On such systems, we must do without status
# file monitoring.
fi
#TODO: How to check the $options ?
return $OCF_SUCCESS
}
#
# set the blockdevice variable to "no" or "yes"
#
set_blockdevice_var() {
blockdevice=no
# these are definitely not block devices
case $FSTYPE in
nfs4|nfs|smbfs|cifs|none|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs) return;;
esac
if `is_option "loop"`; then
return
fi
case $DEVICE in
-*) # Oh... An option to mount instead... Typically -U or -L
;;
/dev/null) # Special case for BSC
blockdevice=yes
;;
*)
if [ ! -b "$DEVICE" -a ! -d "$DEVICE" -a "X$OP" != Xstart ] ; then
ocf_log warn "Couldn't find device [$DEVICE]. Expected /dev/??? to exist"
fi
if [ ! -d "$DEVICE" ]; then
blockdevice=yes
fi
;;
esac
}
# Check the arguments passed to this script
if [ $# -ne 1 ]; then
usage
exit $OCF_ERR_ARGS
fi
# Check the OCF_RESKEY_ environment variables...
FORCE_UNMOUNT="yes"
if [ -n "${OCF_RESKEY_force_unmount}" ]; then
FORCE_UNMOUNT=$OCF_RESKEY_force_unmount
fi
DEVICE=$OCF_RESKEY_device
FSTYPE=$OCF_RESKEY_fstype
if [ ! -z "$OCF_RESKEY_options" ]; then
options="-o $OCF_RESKEY_options"
fi
FAST_STOP=${OCF_RESKEY_fast_stop:="yes"}
OP=$1
# These operations do not require instance parameters
case $OP in
meta-data) meta_data
exit $OCF_SUCCESS
;;
usage) usage
exit $OCF_SUCCESS
;;
esac
if [ x = x"$DEVICE" ]; then
ocf_exit_reason "Please set OCF_RESKEY_device to the device to be managed"
exit $OCF_ERR_CONFIGURED
fi
set_blockdevice_var
# Normalize instance parameters:
# It is possible that OCF_RESKEY_directory has one or even multiple trailing "/".
# But the output of `mount` and /proc/mounts do not.
if [ -z "$OCF_RESKEY_directory" ]; then
if [ X$OP = "Xstart" -o $blockdevice = "no" ]; then
ocf_exit_reason "Please specify the directory"
exit $OCF_ERR_CONFIGURED
fi
else
MOUNTPOINT=$(echo $OCF_RESKEY_directory | sed 's/\/*$//')
: ${MOUNTPOINT:=/}
# At this stage, $MOUNTPOINT does not contain trailing "/" unless it is "/"
# TODO: / mounted via Filesystem sounds dangerous. On stop, we'll
# kill the whole system. Is that a good idea?
fi
# Check to make sure the utilites are found
if [ "X${HOSTOS}" != "XOpenBSD" ];then
check_binary $MODPROBE
check_binary $FUSER
fi
check_binary $FSCK
check_binary $MOUNT
check_binary $UMOUNT
if [ "$OP" != "monitor" ]; then
ocf_log info "Running $OP for $DEVICE on $MOUNTPOINT"
fi
case $OP in
status) Filesystem_status
exit $?
;;
monitor) Filesystem_monitor
exit $?
;;
validate-all) Filesystem_validate_all
exit $?
;;
stop) Filesystem_stop
exit $?
;;
esac
CLUSTERSAFE=0
is_option "ro" &&
CLUSTERSAFE=2
case $FSTYPE in
nfs4|nfs|smbfs|cifs|none|gfs2|glusterfs|ceph|ocfs2|overlay|overlayfs|tmpfs)
CLUSTERSAFE=1 # this is kind of safe too
;;
# add here CLUSTERSAFE=0 for all filesystems which are not
# cluster aware and which, even if when mounted read-only,
# could still modify parts of it such as journal/metadata
ext4|ext4dev|ext3|reiserfs|reiser4|xfs|jfs)
if ocf_is_true "$OCF_RESKEY_force_clones"; then
CLUSTERSAFE=2
else
CLUSTERSAFE=0 # these are not allowed
fi
;;
esac
if ocf_is_clone; then
case $CLUSTERSAFE in
0)
ocf_exit_reason "DANGER! $FSTYPE on $DEVICE is NOT cluster-aware!"
ocf_log err "DO NOT RUN IT AS A CLONE!"
ocf_log err "Politely refusing to proceed to avoid data corruption."
exit $OCF_ERR_CONFIGURED
;;
2)
ocf_log warn "$FSTYPE on $DEVICE is NOT cluster-aware!"
if ocf_is_true "$OCF_RESKEY_force_clones"; then
ocf_log warn "But we'll let it run because we trust _YOU_ verified it's safe to do so."
else
ocf_log warn "But we'll let it run because it is mounted read-only."
ocf_log warn "Please make sure that it's meta data is read-only too!"
fi
;;
esac
fi
case $OP in
start) Filesystem_start
;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
exit $?
diff --git a/heartbeat/ICP b/heartbeat/ICP
index c427e7a39..8f187e082 100755
--- a/heartbeat/ICP
+++ b/heartbeat/ICP
@@ -1,296 +1,296 @@
#!/bin/sh
#
#
# ICP
#
# Description: Manages an ICP Vortex clustered host drive as an HA resource
#
#
# Author: Lars Marowsky-Bree <lmb@suse.de>
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
# License: GNU General Public License (GPL)
# Copyright: (C) 2002 SuSE Linux AG
#
#
# An example usage in /etc/ha.d/haresources:
# node1 10.0.0.170 LinuxSCSI::0:0 ICP::c0h1::/dev/sdb1 LVM::myvolname
#
# Notice that you will need to get the utility "icpclucon" from the ICP
# support to use this.
#
# See usage() function below for more details...
#
# OCF parameters are as below:
# OCF_RESKEY_driveid
# OCF_RESKEY_device
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
#
ICPCLUCON=/usr/sbin/icpclucon
#
usage() {
methods=`ICP_methods | grep -v methods`
methods=`echo $methods | tr ' ' '|'`
cat <<-!
usage: $0 ($methods)
$0 manages an ICP Vortex clustered host drive.
The 'start' operation reserves the given host drive.
The 'stop' operation releses the given host drive.
The 'status' operation reports whether the host drive is reserved.
The 'monitor' operation reports whether the host drive is reserved.
The 'validate-all' operation reports whether OCF instance parameters are valid.
The 'methods' operation reports on the methods $0 supports
!
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="ICP">
<version>1.0</version>
<longdesc lang="en">
Resource script for ICP. It Manages an ICP Vortex clustered host drive as an
HA resource.
</longdesc>
<shortdesc lang="en">Manages an ICP Vortex clustered host drive</shortdesc>
<parameters>
<parameter name="driveid" unique="0" required="1">
<longdesc lang="en">
The ICP cluster drive ID.
</longdesc>
<shortdesc lang="en">ICP cluster drive ID</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="device" unique="0" required="1">
<longdesc lang="en">
The device name.
</longdesc>
<shortdesc lang="en">device</shortdesc>
<content type="string" default="" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="20" />
<action name="stop" timeout="20" />
<action name="status" depth="0" timeout="20" interval="10" />
<action name="monitor" depth="0" timeout="20" interval="10" />
<action name="validate-all" timeout="5" />
<action name="meta-data" timeout="5" />
</actions>
</resource-agent>
END
}
#
# methods: What methods/operations do we support?
#
ICP_methods() {
cat <<-!
start
stop
status
monitor
methods
validate-all
meta-data
usage
!
}
ICP_status() {
local icp_out
icp_out=$($ICPCLUCON -v -status $1)
if [ $? -ne 0 ]; then
ocf_log "err" "Hostdrive not reserved by us."
return $OCF_ERR_GENERIC
fi
if expr match "$icp_out" \
'.*Drive is reserved by this host.*' >/dev/null 2>&1 ; then
ocf_log "info" "Volume $1 is reserved by us."
return $OCF_SUCCESS
elif expr match "$icp_out" \
'.*Drive is not reserved by any host.*' >/dev/null 2>&1 ; then
ocf_log "err" "Volume $1 not reserved by any host."
return $OCF_NOT_RUNNING
else
ocf_log "err" "Unknown output from icpclucon. Assuming we do not have a reservation:"
ocf_log "err" "$icp_out"
return $OCF_NOT_RUNNING
fi
}
ICP_report_status() {
if ICP_status $1 ; then
echo "$1: running"
return $OCF_SUCCESS
else
echo "$1: not running"
return $OCF_NOT_RUNNING
fi
}
#
# Monitor the host drive - does it really seem to be working?
#
#
ICP_monitor() {
if
ICP_status $1
then
return $?
else
ocf_log "err" "ICP host drive $1 is offline"
return $OCF_NOT_RUNNING
fi
}
Clear_bufs() {
$BLOCKDEV --flushbufs $1
}
#
# Enable ICP host drive
#
ICP_start() {
ocf_log "info" "Activating host drive $1"
ocf_run $ICPCLUCON -v -reserve $1
if [ $? -ne 0 ]; then
ocf_log "info" "Forcing reservation of $1"
ocf_run $ICPCLUCON -v -force $1 || return $OCF_ERR_GENERIC
fi
if
ICP_status $1
then
: OK
# A reservation isn't as prompt as it should be
sleep 3
return $OCF_SUCCESS
else
ocf_log "err" "ICP: $1 was not reserved correctly"
return $OCF_ERR_GENERIC
fi
}
#
# Release the ICP host drive
#
ICP_stop() {
ocf_log "info" "Releasing ICP host drive $1"
ocf_run $ICPCLUCON -v -release $1 || return $OCF_ERR_GENERIC
ocf_log "info" "Verifying reservation"
if ICP_status $1 ; then
ocf_log "err" "ICP: $1 was not released correctly"
return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
}
ICP_validate_all() {
check_binary $BLOCKDEV
check_binary $ICPCLUCON
$ICPCLUCON -v -status $driveid >/dev/null 2>&1
if [ $? -ne 0 ]; then
ocf_log err "Invalid driveid $driveid"
exit $OCF_ERR_ARGS
fi
if [ ! -b $device ]; then
ocf_log err "Device $device is not a block device"
exit $OCF_ERR_ARGS
fi
# Do not know how to check the association of $device with $driveid.
return $OCF_SUCCESS
}
#
# 'main' starts here...
#
if
( [ $# -ne 1 ] )
then
usage
exit $OCF_ERR_ARGS
fi
# These operations do not require OCF instance parameters to be set
case "$1" in
meta-data) meta_data
exit $OCF_SUCCESS;;
methods) ICP_methods
exit $OCF_SUCCESS;;
usage) usage
exit $OCF_SUCCESS;;
*) ;;
esac
if
[ -z "$OCF_RESKEY_driveid" ]
then
ocf_log err "Please specify OCF_RESKEY_driveid"
exit $OCF_ERR_ARGS
fi
if [ -z "$OCF_RESKEY_device" ]; then
ocf_log err "Please specify OCF_RESKEY_device"
exit $OCF_ERR_ARGS
fi
driveid=$OCF_RESKEY_driveid
device=$OCF_RESKEY_device
# What kind of method was invoked?
case "$1" in
start) ICP_validate_all
ICP_start $driveid
Clear_bufs $device
exit $?;;
stop) ICP_stop $driveid
Clear_bufs $device
exit $?;;
status) ICP_report_status $driveid
exit $?;;
monitor) ICP_monitor $driveid
exit $?;;
validate-all) ICP_validate_all
exit $?;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED;;
esac
diff --git a/heartbeat/IPaddr b/heartbeat/IPaddr
index 8ada6c4d2..8c9fb20f3 100755
--- a/heartbeat/IPaddr
+++ b/heartbeat/IPaddr
@@ -1,892 +1,892 @@
#!/bin/sh
#
# License: GNU General Public License (GPL)
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
#
# This script manages IP alias IP addresses
#
# It can add an IP alias, or remove one.
#
# usage: $0 {start|stop|status|monitor|validate-all|meta-data}
#
# The "start" arg adds an IP alias.
#
# Surprisingly, the "stop" arg removes one. :-)
#
# OCF parameters are as below
# OCF_RESKEY_ip
# OCF_RESKEY_broadcast
# OCF_RESKEY_nic
# OCF_RESKEY_cidr_netmask
# OCF_RESKEY_lvs_support ( e.g. true, on, 1 )
# OCF_RESKEY_ARP_INTERVAL_MS
# OCF_RESKEY_ARP_REPEAT
# OCF_RESKEY_ARP_BACKGROUND (e.g. yes )
# OCF_RESKEY_ARP_NETMASK
# OCF_RESKEY_local_start_script
# OCF_RESKEY_local_stop_script
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
SENDARP=$HA_BIN/send_arp
FINDIF=$HA_BIN/findif
VLDIR=$HA_RSCTMP
SENDARPPIDDIR=$HA_RSCTMP
SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$OCF_RESKEY_ip"
USAGE="usage: $0 {start|stop|status|monitor|validate-all|meta-data}";
#######################################################################
SYSTYPE="`uname -s`"
case "$SYSTYPE" in
SunOS)
# `uname -r` = 5.9 -> SYSVERSION = 9
SYSVERSION="`uname -r | cut -d. -f 2`"
;;
Darwin)
# Treat Darwin the same as the other BSD variants (matched as *BSD)
SYSTYPE="${SYSTYPE}BSD"
;;
*)
;;
esac
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="IPaddr">
<version>1.0</version>
<longdesc lang="en">
This script manages IP alias IP addresses
It can add an IP alias, or remove one.
</longdesc>
<shortdesc lang="en">Manages virtual IPv4 addresses (portable version)</shortdesc>
<parameters>
<parameter name="ip" unique="1" required="1">
<longdesc lang="en">
The IPv4 address to be configured in dotted quad notation, for example
"192.168.1.1".
</longdesc>
<shortdesc lang="en">IPv4 address</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="nic" unique="0">
<longdesc lang="en">
The base network interface on which the IP address will be brought
online.
If left empty, the script will try and determine this from the
routing table.
Do NOT specify an alias interface in the form eth0:1 or anything here;
rather, specify the base interface only.
Prerequisite:
There must be at least one static IP address, which is not managed by
the cluster, assigned to the network interface.
If you can not assign any static IP address on the interface,
modify this kernel parameter:
sysctl -w net.ipv4.conf.all.promote_secondaries=1
(or per device)
</longdesc>
<shortdesc lang="en">Network interface</shortdesc>
<content type="string" default="eth0"/>
</parameter>
<parameter name="cidr_netmask">
<longdesc lang="en">
The netmask for the interface in CIDR format. (ie, 24), or in
dotted quad notation 255.255.255.0).
If unspecified, the script will also try to determine this from the
routing table.
</longdesc>
<shortdesc lang="en">Netmask</shortdesc>
<content type="string" default=""/>
</parameter>
<parameter name="broadcast">
<longdesc lang="en">
Broadcast address associated with the IP. If left empty, the script will
determine this from the netmask.
</longdesc>
<shortdesc lang="en">Broadcast address</shortdesc>
<content type="string" default=""/>
</parameter>
<parameter name="iflabel">
<longdesc lang="en">
You can specify an additional label for your IP address here.
</longdesc>
<shortdesc lang="en">Interface label</shortdesc>
<content type="string" default=""/>
</parameter>
<parameter name="lvs_support">
<longdesc lang="en">
Enable support for LVS Direct Routing configurations. In case a IP
address is stopped, only move it to the loopback device to allow the
local node to continue to service requests, but no longer advertise it
on the network.
</longdesc>
<shortdesc lang="en">Enable support for LVS DR</shortdesc>
<content type="boolean" default="false"/>
</parameter>
<parameter name="local_stop_script">
<longdesc lang="en">
Script called when the IP is released
</longdesc>
<shortdesc lang="en">Script called when the IP is released</shortdesc>
<content type="string" default=""/>
</parameter>
<parameter name="local_start_script">
<longdesc lang="en">
Script called when the IP is added
</longdesc>
<shortdesc lang="en">Script called when the IP is added</shortdesc>
<content type="string" default=""/>
</parameter>
<parameter name="ARP_INTERVAL_MS">
<longdesc lang="en">
milliseconds between ARPs
</longdesc>
<shortdesc lang="en">milliseconds between gratuitous ARPs</shortdesc>
<content type="integer" default="500"/>
</parameter>
<parameter name="ARP_REPEAT">
<longdesc lang="en">
How many gratuitous ARPs to send out when bringing up a new address
</longdesc>
<shortdesc lang="en">repeat count</shortdesc>
<content type="integer" default="10"/>
</parameter>
<parameter name="ARP_BACKGROUND">
<longdesc lang="en">
run in background (no longer any reason to do this)
</longdesc>
<shortdesc lang="en">run in background</shortdesc>
<content type="boolean" default="yes"/>
</parameter>
<parameter name="ARP_NETMASK">
<longdesc lang="en">
netmask for ARP - in nonstandard hexadecimal format.
</longdesc>
<shortdesc lang="en">netmask for ARP</shortdesc>
<content type="string" default="ffffffffffff"/>
</parameter>
</parameters>
<actions>
<action name="start" timeout="20s" />
<action name="stop" timeout="20s" />
<action name="monitor" depth="0" timeout="20s" interval="5s" />
<action name="validate-all" timeout="20s" />
<action name="meta-data" timeout="5s" />
</actions>
</resource-agent>
END
exit $OCF_SUCCESS
}
# The 'ping' command takes highly OS-dependent arguments, so this
# function creates a suitable argument list for the host OS's 'ping'.
# We use a subset of its functionality:
# 1. single packet
# 2. reasonable timeout (say 1 second)
#
# arguments:
# $1: IP address to ping
# result string:
# arguments for ping command
#
# If more flexibility is needed, they could be specified in the environment
# to this function, to adjust the resulting 'ping' arguments.
# David Lee <t.d.lee@durham.ac.uk> May 2007
pingargs() {
_baseip=$1
_timeout=1 # seconds
_pktcount=1
_systype="`uname -s`"
case $_systype in
Linux)
# Default is perpetual ping: need "-c $_pktcount".
# -c count -t timetolive -q(uiet) -n(umeric) -W timeout
_pingargs="-c $_pktcount -q -n $_baseip"
;;
SunOS)
# Default is immediate (or timeout) return.
_pingargs="$_baseip $_timeout"
;;
*)
_pingargs="-c $_pktcount $_baseip"
;;
esac
echo "$_pingargs"
}
# On Linux systems the (hidden) loopback interface may
# conflict with the requested IP address. If so, this
# unoriginal code will remove the offending loopback address
# and save it in VLDIR so it can be added back in later
# when the IPaddr is released.
#
lvs_remove_conflicting_loopback() {
ipaddr="$1"
ifname="$2"
ocf_log info "Removing conflicting loopback $ifname."
if
echo $ifname > "$VLDIR/$ipaddr"
then
: Saved loopback information in $VLDIR/$ipaddr
else
ocf_log err "Could not save conflicting loopback $ifname." \
"it will not be restored."
fi
if [ ! -z "${OCF_RESKEY_local_stop_script}" ]; then
if [ -x "${OCF_RESKEY_local_stop_script}" ]; then
${OCF_RESKEY_local_stop_script} $*
fi
fi
delete_interface "$ifname" "$ipaddr"
# Forcibly remove the route (if it exists) to the loopback.
delete_route "$ipaddr"
}
#
# On Linux systems the (hidden) loopback interface may
# need to be restored if it has been taken down previously
# by lvs_remove_conflicting_loopback()
#
lvs_restore_loopback() {
ipaddr="$1"
if [ ! -s "$VLDIR/$ipaddr" ]; then
return
fi
ifname=`cat "$VLDIR/$ipaddr"`
ocf_log info "Restoring loopback IP Address $ipaddr on $ifname."
CMD="OCF_RESKEY_cidr_netmask=32 OCF_RESKEY_ip=$1 OCF_RESKEY_nic=$ifname $FINDIF"
if
NICINFO=`eval $CMD`
NICINFO=`echo $NICINFO | tr " " " " | tr -s " "`
then
netmask_text=`echo "$NICINFO" | cut -f3 -d " "`
broadcast=`echo "$NICINFO" | cut -f5 -d " "`
else
echo "ERROR: $CMD failed (rc=$rc)"
exit $OCF_ERR_GENERIC
fi
add_interface "$ipaddr" "$ifname" "$ifname" $netmask_text $broadcast
rm -f "$VLDIR/$ipaddr"
}
#
# Find out which alias serves the given IP address
# The argument is an IP address, and its output
# is an aliased interface name (e.g., "eth0:0").
#
find_interface_solaris() {
ipaddr="$1"
$IFCONFIG $IFCONFIG_A_OPT | $AWK '{if ($0 ~ /.*: / && NR > 1) {print "\n"$0} else {print}}' |
while read ifname linkstuff
do
: ifname = $ifname
read inet addr junk
: inet = $inet addr = $addr
while
read line && [ "X$line" != "X" ]
do
: Nothing
done
case $ifname in
*:*) ;;
*) continue;;
esac
# This doesn't look right for a box with multiple NICs.
# It looks like it always selects the first interface on
# a machine. Yet, we appear to use the results for this case too...
ifname=`echo "$ifname" | sed s'%:$%%'`
case $addr in
addr:$ipaddr) echo $ifname; return $OCF_SUCCESS;;
$ipaddr) echo $ifname; return $OCF_SUCCESS;;
esac
done
return $OCF_ERR_GENERIC
}
find_interface_bsd() {
$IFCONFIG $IFCONFIG_A_OPT | awk -v ip_addr="$ipaddr" '
/UP,/ && $0 ~ /^[a-z]+[0-9]:/ {
if_name=$1; sub(":$","",if_name);
}
$1 == "inet" && $2 == ip_addr {
print if_name
exit(0)
}'
}
#
# Find out which alias serves the given IP address
# The argument is an IP address, and its output
# is an aliased interface name (e.g., "eth0:0").
#
find_interface_generic() {
ipaddr="$1"
$IFCONFIG $IFCONFIG_A_OPT |
while read ifname linkstuff
do
: Read gave us ifname = $ifname
read inet addr junk
: Read gave us inet = $inet addr = $addr
while
read line && [ "X$line" != "X" ]
do
: Nothing
done
case $ifname in
*:*) ifname=`echo $ifname | sed 's/:$//'`;;
*) continue;;
esac
: "comparing $ipaddr to $addr (from ifconfig)"
case $addr in
addr:$ipaddr) echo $ifname; return $OCF_SUCCESS;;
$ipaddr) echo $ifname; return $OCF_SUCCESS;;
esac
done
return $OCF_ERR_GENERIC
}
#
# Find out which alias serves the given IP address
# The argument is an IP address, and its output
# is an aliased interface name (e.g., "eth0:0").
#
find_interface() {
ipaddr="$1"
case "$SYSTYPE" in
SunOS)
NIC=`find_interface_solaris $ipaddr`;;
*BSD)
NIC=`find_interface_bsd $ipaddr`;;
*)
NIC=`find_interface_generic $ipaddr`;;
esac
echo $NIC
return $OCF_SUCCESS;
}
#
# Find an unused interface/alias name for us to use for new IP alias
# The argument is an IP address, and the output
# is an aliased interface name (e.g., "eth0:0", "dc0", "le0:0").
#
find_free_interface() {
NIC="$1"
if [ "X$NIC" = "X" ]; then
ocf_log err "No free interface found for $OCF_RESKEY_ip"
return $OCF_ERR_GENERIC;
fi
NICBASE="$VLDIR/IPaddr-$NIC"
touch "$NICBASE"
case "$SYSTYPE" in
*BSD)
echo $NIC;
return $OCF_SUCCESS;;
SunOS)
j=1
IFLIST=`$IFCONFIG $IFCONFIG_A_OPT | \
grep "^$NIC:[0-9]" | sed 's%: .*%%'`;;
*)
j=0
IFLIST=`$IFCONFIG $IFCONFIG_A_OPT | \
grep "^$NIC:[0-9]" | sed 's% .*%%'`
TRYADRCNT=`ls "${NICBASE}:"* 2>/dev/null | wc -w | tr -d ' '`
if [ -f "${NICBASE}:${TRYADRCNT}" ]; then
: OK
else
j="${TRYADRCNT}"
fi
;;
esac
IFLIST=" `echo $IFLIST` "
while
[ $j -lt 512 ]
do
case $IFLIST in
*" "$NIC:$j" "*)
;;
*)
NICLINK="$NICBASE:$j"
if
ln "$NICBASE" "$NICLINK" 2>/dev/null
then
echo "$NIC:$j"
return $OCF_SUCCESS
fi
;;
esac
j=`expr $j + 1`
done
return $OCF_ERR_GENERIC
}
delete_route () {
ipaddr="$1"
case "$SYSTYPE" in
SunOS) return 0;;
*BSD) CMD="$ROUTE -n delete -host $ipaddr";;
*) CMD="$ROUTE -n del -host $ipaddr";;
esac
$CMD
return $?
}
delete_interface () {
ifname="$1"
ipaddr="$2"
case "$SYSTYPE" in
SunOS)
if [ "$SYSVERSION" -ge 8 ] ; then
CMD="$IFCONFIG $ifname unplumb"
else
CMD="$IFCONFIG $ifname 0 down"
fi;;
Darwin*)
CMD="$IFCONFIG $ifname $ipaddr delete";;
*BSD)
CMD="$IFCONFIG $ifname inet $ipaddr delete";;
*)
CMD="$IFCONFIG $ifname down";;
esac
ocf_log info "$CMD"
$CMD
return $?
}
add_interface () {
ipaddr="$1"
iface_base="$2"
iface="$3"
netmask="$4"
broadcast="$5"
if [ $# != 5 ]; then
ocf_log err "Insufficient arguments to add_interface: $*"
exit $OCF_ERR_ARGS
fi
case "$SYSTYPE" in
SunOS)
if [ "$SYSVERSION" -ge 8 ] ; then
$IFCONFIG $iface plumb
rc=$?
if [ $rc -ne 0 ] ; then
echo "ERROR: '$IFCONFIG $iface plumb' failed."
return $rc
fi
fi
# At Solaris 10, this single-command version sometimes broke.
# Almost certainly an S10 bug.
# CMD="$IFCONFIG $iface inet $ipaddr $text up"
# So hack the following workaround:
CMD="$IFCONFIG $iface inet $ipaddr"
CMD="$CMD && $IFCONFIG $iface netmask $netmask"
CMD="$CMD && $IFCONFIG $iface up"
;;
*BSD)
# netmask is always set to 255.255.255.255 for an alias
CMD="$IFCONFIG $iface inet $ipaddr netmask 255.255.255.255 alias";;
*)
CMD="$IFCONFIG $iface $ipaddr netmask $netmask broadcast $broadcast";;
esac
# Use "eval $CMD" (not "$CMD"): it might be a chain of two or more commands.
ocf_log info "eval $CMD"
eval $CMD
rc=$?
if [ $rc != 0 ]; then
echo "ERROR: eval $CMD failed (rc=$rc)"
fi
return $rc
}
#
# Remove the IP alias for the requested IP address...
#
ip_stop() {
SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$OCF_RESKEY_ip"
NIC=`find_interface $OCF_RESKEY_ip`
if [ -f "$SENDARPPIDFILE" ]; then
cat "$SENDARPPIDFILE" | xargs kill
rm -f "$SENDARPPIDFILE"
fi
if [ -z "$NIC" ]; then
: Requested interface not in use
return $OCF_SUCCESS
fi
if [ ${OCF_RESKEY_lvs_support} = 1 ]; then
case $NIC in
lo*)
: Requested interface is on loopback
return $OCF_SUCCESS;;
esac
fi
delete_route "$OCF_RESKEY_ip"
delete_interface "$NIC" "$OCF_RESKEY_ip"
rc=$?
if [ ${OCF_RESKEY_lvs_support} = 1 ]; then
lvs_restore_loopback "$OCF_RESKEY_ip"
fi
# remove lock file...
rm -f "$VLDIR/IPaddr-$NIC"
if [ $rc != 0 ]; then
ocf_log warn "IP Address $OCF_RESKEY_ip NOT released: rc=$rc"
return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
}
#
# Add an IP alias for the requested IP address...
#
# It could be that we already have taken it, in which case it should
# do nothing.
#
ip_start() {
#
# Do we already service this IP address?
#
ip_status_internal
if [ $? = $OCF_SUCCESS ]; then
# Nothing to do, the IP is already active
return $OCF_SUCCESS;
fi
NIC_unique=`find_free_interface $OCF_RESKEY_nic`
if [ -n "$NIC_unique" ]; then
: OK got interface [$NIC_unique] for $OCF_RESKEY_ip
else
return $OCF_ERR_GENERIC
fi
# This logic is mostly to support LVS (If I understand it correctly)
if [ ${OCF_RESKEY_lvs_support} = 1 ]; then
NIC_current=`find_interface $OCF_RESKEY_ip`
case $NIC_unique in
lo*)
if [ x"$NIC_unique" = x"$NIC_current" ]; then
# Its already "running" and not moving, nothing to do.
ocf_log err "Could not find a non-loopback device to move $OCF_RESKEY_ip to"
return $OCF_ERR_GENERIC
fi;;
*) lvs_remove_conflicting_loopback "$OCF_RESKEY_ip" "$NIC_current";;
esac
fi
if [ ! -z "${OCF_RESKEY_local_start_script}" ]; then
if [ -x "${OCF_RESKEY_local_start_script}" ]; then
${OCF_RESKEY_local_start_script} $*
fi
fi
add_interface "$OCF_RESKEY_ip" "$OCF_RESKEY_nic" "$NIC_unique" \
"$OCF_RESKEY_cidr_netmask" "$OCF_RESKEY_broadcast"
rc=$?
if [ $rc != 0 ]; then
ocf_log err "Could not add $OCF_RESKEY_ip to $OCF_RESKEY_nic: rc=$rc"
return $rc
fi
# The address is active, now notify others about it using sendarp
if [ "$SYSTYPE" = "DarwinBSD" -a "$NIC_unique" = "lo0" ]; then
# Darwin can't send ARPs on loopback devices
SENDARP="x$SENDARP" # Prevent the binary from being found
fi
if [ -x $SENDARP ]; then
TARGET_INTERFACE=`echo $NIC_unique | sed 's%:.*%%'`
SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$OCF_RESKEY_ip"
ARGS="-i $OCF_RESKEY_ARP_INTERVAL_MS -r $OCF_RESKEY_ARP_REPEAT"
ARGS="$ARGS -p $SENDARPPIDFILE $TARGET_INTERFACE $OCF_RESKEY_ip"
ARGS="$ARGS auto $OCF_RESKEY_ip $OCF_RESKEY_ARP_NETMASK"
ocf_log debug "Sending Gratuitous Arp for $OCF_RESKEY_ip on $NIC_unique [$TARGET_INTERFACE]"
case $OCF_RESKEY_ARP_BACKGROUND in
yes) ($SENDARP $ARGS || ocf_log err "Could not send gratuitous arps. rc=$?" & ) >&2 ;;
*) $SENDARP $ARGS || ocf_log err "Could not send gratuitous arps. rc=$?";;
esac
fi
ip_status_internal
return $?
}
ip_status_internal() {
NIC=`find_interface "$OCF_RESKEY_ip"`
if [ "x$NIC" = x ]; then
return $OCF_NOT_RUNNING
elif [ "${OCF_RESKEY_lvs_support}" = "1" ]; then
case $NIC in
lo*) return $OCF_NOT_RUNNING;;
*) return $OCF_SUCCESS;;
esac
else
if [ x$OCF_RESKEY_nic != x ]; then
simple_OCF_NIC=`echo $OCF_RESKEY_nic | awk -F: '{print $1}'`
simple_NIC=`echo $NIC | awk -F: '{print $1}'`
if [ $simple_OCF_NIC != $simple_NIC ]; then
ocf_log err "$OCF_RESKEY_ip is running an interface ($simple_NIC) instead of the configured one ($simple_OCF_NIC)"
return $OCF_ERR_GENERIC
fi
fi
return $OCF_SUCCESS
fi
}
ip_status() {
ip_status_internal
rc=$?
if [ $rc = $OCF_SUCCESS ]; then
echo "running"
elif [ $rc = $OCF_NOT_RUNNING ]; then
echo "stopped"
else
echo "unknown"
fi
return $rc;
}
#
# Determine if this IP address is really being served, or not.
# Note that we must distinguish if *we're* serving it locally...
#
ip_monitor() {
ip_status_internal
rc=$?
if [ $OCF_CHECK_LEVEL = 0 -o $rc != 0 ]; then
return $rc
fi
ocf_log info "Checking IP stack"
PINGARGS="`pingargs $OCF_RESKEY_ip`"
for j in 1 2 3 4 5 6 7 8 9 10; do
MSG=`$PING $PINGARGS 2>&1`
if [ $? = 0 ]; then
return $OCF_SUCCESS
fi
done
ocf_log err "$MSG"
return $OCF_ERR_GENERIC
}
is_positive_integer() {
ocf_is_decimal $1 && [ $1 -ge 1 ]
if [ $? = 0 ]; then
return 1
fi
return 0
}
ip_validate_all() {
: ${OCF_RESKEY_ARP_BACKGROUND=yes}
: ${OCF_RESKEY_ARP_NETMASK=ffffffffffff}
: ${OCF_RESKEY_ARP_INTERVAL_MS=500}
: ${OCF_RESKEY_ARP_REPEAT=10}
check_binary $AWK
check_binary $IFCONFIG
check_binary $ROUTE
check_binary $PING
if is_positive_integer $OCF_RESKEY_ARP_INTERVAL_MS
then
ocf_log err "Invalid parameter value: ARP_INTERVAL_MS [$OCF_RESKEY_ARP_INTERVAL_MS]"
return $OCF_ERR_ARGS
fi
if is_positive_integer $OCF_RESKEY_ARP_REPEAT
then
ocf_log err "Invalid parameter value: ARP_REPEAT [$OCF_RESKEY_ARP_REPEAT]"
return $OCF_ERR_ARGS
fi
: ${OCF_RESKEY_lvs_support=0}
if [ "$SYSTYPE" = "Linux" -o "$SYSTYPE" = "SunOS" ]; then
:
else
if [ "${OCF_RESKEY_lvs_support}" = "1" ]; then
ocf_log err "$SYSTYPE does not support LVS"
return $OCF_ERR_GENERIC
fi
fi
case $OCF_RESKEY_ip in
"") ocf_log err "Required parameter OCF_RESKEY_ip is missing"
return $OCF_ERR_CONFIGURED;;
[0-9]*.[0-9]*.[0-9]*.*[0-9]) : OK;;
*) ocf_log err "Parameter OCF_RESKEY_ip [$OCF_RESKEY_ip] not an IP address"
return $OCF_ERR_CONFIGURED;;
esac
# Unconditionally do this?
case $OCF_RESKEY_nic in
*:*)
OCF_RESKEY_nic=`echo $OCF_RESKEY_nic | sed 's/:.*//'`
;;
esac
NICINFO=`$FINDIF`
rc=$?
if [ $rc != 0 ]; then
ocf_log err "$FINDIF failed [rc=$rc]."
return $OCF_ERR_GENERIC
fi
tmp=`echo "$NICINFO" | cut -f1`
if
[ "x$OCF_RESKEY_nic" = "x" ]
then
ocf_log info "Using calculated nic for ${OCF_RESKEY_ip}: $tmp"
OCF_RESKEY_nic=$tmp
elif
[ x$tmp != x${OCF_RESKEY_nic} ]
then
ocf_log err "Invalid parameter value: nic [$OCF_RESKEY_nic] Calculated nic: [$tmp]"
return $OCF_ERR_ARGS
fi
tmp=`echo "$NICINFO" | cut -f2 | cut -d ' ' -f2`
if
[ "x$OCF_RESKEY_cidr_netmask" != "x$tmp" ]
then
ocf_log info "Using calculated netmask for ${OCF_RESKEY_ip}: $tmp"
fi
# Always use the calculated version becuase it might have been specified
# using CIDR notation which not every system accepts
OCF_RESKEY_netmask=$tmp
OCF_RESKEY_cidr_netmask=$tmp; export OCF_RESKEY_cidr_netmask
tmp=`echo "$NICINFO" | cut -f3 | cut -d ' ' -f2`
if
[ "x$OCF_RESKEY_broadcast" = "x" ]
then
ocf_log debug "Using calculated broadcast for ${OCF_RESKEY_ip}: $tmp"
OCF_RESKEY_broadcast=$tmp
elif [ x$tmp != x${OCF_RESKEY_broadcast} ]; then
ocf_log err "Invalid parameter value: broadcast [$OCF_RESKEY_broadcast] Calculated broadcast: [$tmp]"
return $OCF_ERR_ARGS
fi
return $OCF_SUCCESS
}
usage() {
echo $USAGE >&2
return $1
}
if [ $# -ne 1 ]; then
usage $OCF_ERR_ARGS
fi
: ${OCF_RESKEY_lvs_support=0}
# Normalize the value of lvs_support
if [ "${OCF_RESKEY_lvs_support}" = "true" \
-o "${OCF_RESKEY_lvs_support}" = "on" \
-o "${OCF_RESKEY_lvs_support}" = "yes" \
-o "${OCF_RESKEY_lvs_support}" = "1" ]; then
OCF_RESKEY_lvs_support=1
else
OCF_RESKEY_lvs_support=0
fi
# Note: We had a version out there for a while which used
# netmask instead of cidr_netmask. So, don't remove this aliasing code!
if
[ ! -z "$OCF_RESKEY_netmask" -a -z "$OCF_RESKEY_cidr_netmask" ]
then
OCF_RESKEY_cidr_netmask=$OCF_RESKEY_netmask
export OCF_RESKEY_cidr_netmask
fi
case $1 in
meta-data) meta_data;;
start) ip_validate_all && ip_start;;
stop) ip_stop;;
status) ip_status;;
monitor) ip_monitor;;
validate-all) ip_validate_all;;
usage) usage $OCF_SUCCESS;;
*) usage $OCF_ERR_UNIMPLEMENTED;;
esac
exit $?
diff --git a/heartbeat/IPsrcaddr b/heartbeat/IPsrcaddr
index 33c5be62f..08fd8a623 100755
--- a/heartbeat/IPsrcaddr
+++ b/heartbeat/IPsrcaddr
@@ -1,503 +1,503 @@
#!/bin/sh
#
# Description: IPsrcaddr - Preferred source address modification
#
# Author: John Sutton <john@scl.co.uk>
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
# License: GNU General Public License (GPL)
# Copyright: SCL Internet
#
# Based on the IPaddr script.
#
# This script manages the preferred source address associated with
# packets which originate on the localhost and are routed through the
# default route. By default, i.e. without the use of this script or
# similar, these packets will carry the IP of the primary i.e. the
# non-aliased interface. This can be a nuisance if you need to ensure
# that such packets carry the same IP irrespective of which host in
# a redundant cluster they actually originate from.
#
# It can add a preferred source address, or remove one.
#
# usage: IPsrcaddr {start|stop|status|monitor|validate-all|meta-data}
#
# The "start" arg adds a preferred source address.
#
# Surprisingly, the "stop" arg removes it. :-)
#
# NOTES:
#
# 1) There must be one and not more than 1 default route! Mainly because
# I can't see why you should have more than one. And if there is more
# than one, we would have to box clever to find out which one is to be
# modified, or we would have to pass its identity as an argument.
#
# 2) The script depends on Alexey Kuznetsov's ip utility from the
# iproute aka iproute2 package.
#
# 3) No checking is done to see if the passed in IP address can
# reasonably be associated with the interface on which the default
# route exists. So unless you want to deliberately spoof your source IP,
# check it! Normally, I would expect that your haresources looks
# something like:
#
# nodename ip1 ip2 ... ipN IPsrcaddr::ipX
#
# where ipX is one of the ip1 to ipN.
#
# OCF parameters are as below:
# OCF_RESKEY_ipaddress
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
USAGE="usage: $0 {start|stop|status|monitor|validate-all|meta-data}";
CMDSHOW="$IP2UTIL route show to exact 0.0.0.0/0"
CMDCHANGE="$IP2UTIL route change to "
SYSTYPE="`uname -s`"
usage() {
echo $USAGE >&2
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="IPsrcaddr">
<version>1.0</version>
<longdesc lang="en">
Resource script for IPsrcaddr. It manages the preferred source address
modification.
</longdesc>
<shortdesc lang="en">Manages the preferred source address for outgoing IP packets</shortdesc>
<parameters>
<parameter name="ipaddress" unique="0" required="1">
<longdesc lang="en">
The IP address.
</longdesc>
<shortdesc lang="en">IP address</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="cidr_netmask">
<longdesc lang="en">
The netmask for the interface in CIDR format. (ie, 24), or in
dotted quad notation 255.255.255.0).
</longdesc>
<shortdesc lang="en">Netmask</shortdesc>
<content type="string" default=""/>
</parameter>
</parameters>
<actions>
<action name="start" timeout="20s" />
<action name="stop" timeout="20s" />
<action name="monitor" depth="0" timeout="20s" interval="10" />
<action name="validate-all" timeout="5" />
<action name="meta-data" timeout="5" />
</actions>
</resource-agent>
END
}
errorexit() {
ocf_exit_reason "$*"
exit $OCF_ERR_GENERIC
}
#
# We can distinguish 3 cases: no preferred source address, a
# preferred source address exists which matches that specified, and one
# exists but doesn't match that specified. srca_read() returns 1,0,2
# respectively.
#
# The output of route show is something along the lines of:
#
# default via X.X.X.X dev eth1 src Y.Y.Y.Y
#
# where the src clause "src Y.Y.Y.Y" may or may not be present
WS="[`echo -en ' \t'`]"
OCTET="[0-9]\{1,3\}"
IPADDR="\($OCTET\.\)\{3\}$OCTET"
SRCCLAUSE="src$WS$WS*\($IPADDR\)"
MATCHROUTE="\(.*${WS}\)\($SRCCLAUSE\)\($WS.*\|$\)"
FINDIF=$HA_BIN/findif
# findif needs that to be set
export OCF_RESKEY_ip=$OCF_RESKEY_ipaddress
srca_read() {
# Capture the default route - doublequotes prevent word splitting...
DEFROUTE="`$CMDSHOW`" || errorexit "command '$CMDSHOW' failed"
# ... so we can make sure there is only 1 default route
[ 1 -eq `echo "$DEFROUTE" | wc -l` ] || \
errorexit "more than 1 default route exists"
# But there might still be no default route
[ -z "$DEFROUTE" ] && errorexit "no default route exists"
# Sed out the source ip address if it exists
SRCIP=`echo $DEFROUTE | sed -n "s/$MATCHROUTE/\3/p"`
# and what remains after stripping out the source ip address clause
ROUTE_WO_SRC=`echo $DEFROUTE | sed "s/$MATCHROUTE/\1\5/"`
[ -z "$SRCIP" ] && return 1
[ $SRCIP = $1 ] && return 0
return 2
}
#
# Add (or change if it already exists) the preferred source address
# The exit code should conform to LSB exit codes.
#
srca_start() {
srca_read $1
rc=$?
if [ $rc = 0 ]; then
rc=$OCF_SUCCESS
ocf_log info "The ip route has been already set.($NETWORK, $INTERFACE, $ROUTE_WO_SRC)"
else
ip route replace $NETWORK dev $INTERFACE src $1 || \
errorexit "command 'ip route replace $NETWORK dev $INTERFACE src $1' failed"
$CMDCHANGE $ROUTE_WO_SRC src $1 || \
errorexit "command '$CMDCHANGE $ROUTE_WO_SRC src $1' failed"
rc=$?
fi
return $rc
}
#
# Remove (if it exists) the preferred source address.
# If one exists but it's not the same as the one specified, that's
# an error. Maybe that's the wrong behaviour because if this fails
# then when IPaddr releases the associated interface (if there is one)
# your default route will also get dropped ;-(
# The exit code should conform to LSB exit codes.
#
srca_stop() {
srca_read $1
rc=$?
if [ $rc = 1 ]; then
# We do not have a preferred source address for now
ocf_log info "No preferred source address defined, nothing to stop"
exit $OCF_SUCCESS
fi
[ $rc = 2 ] && errorexit "The address you specified to stop does not match the preferred source address"
ip route replace $NETWORK dev $INTERFACE || \
errorexit "command 'ip route replace $NETWORK dev $INTERFACE' failed"
$CMDCHANGE $ROUTE_WO_SRC || \
errorexit "command '$CMDCHANGE $ROUTE_WO_SRC' failed"
return $?
}
srca_status() {
srca_read $1
case $? in
0) echo "OK"
return $OCF_SUCCESS;;
1) echo "No preferred source address defined"
return $OCF_NOT_RUNNING;;
2) echo "Preferred source address has incorrect value"
return $OCF_ERR_GENERIC;;
esac
}
# A not reliable IP address checking function, which only picks up those _obvious_ violations...
#
# It accepts IPv4 address in dotted quad notation, for example "192.168.1.1"
#
# 100% confidence whenever it reports "negative",
# but may get false "positive" answer.
#
CheckIP() {
ip="$1"
case $ip in
*[!0-9.]*) #got invalid char
false;;
.*|*.) #begin or end by ".", which is invalid
false;;
*..*) #consecutive ".", which is invalid
false;;
*.*.*.*.*) #four decimal dots, which is too many
false;;
*.*.*.*) #exactly three decimal dots, candidate, evaluate each field
local IFS=.
set -- $ip
if
( [ $1 -le 254 ] && [ $2 -le 254 ] && [ $3 -le 254 ] && [ $4 -le 254 ] )
then
if [ $1 -eq 127 ]; then
ocf_exit_reason "IP address [$ip] is a loopback address, thus can not be preferred source address"
exit $OCF_ERR_CONFIGURED
fi
else
true
fi
;;
*) #less than three decimal dots
false;;
esac
return $? # This return is unnecessary, this comment too :)
}
#
# Find out which interface or alias serves the given IP address
# The argument is an IP address, and its output
# is an (aliased) interface name (e.g., "eth0" and "eth0:0").
#
find_interface_solaris() {
$IFCONFIG $IFCONFIG_A_OPT | $AWK '{if ($0 ~ /.*: / && NR > 1) {print "\n"$0} else {print}}' |
while read ifname linkstuff
do
: ifname = $ifname
read inet addr junk
: inet = $inet addr = $addr
while
read line && [ "X$line" != "X" ]
do
: Nothing
done
# This doesn't look right for a box with multiple NICs.
# It looks like it always selects the first interface on
# a machine. Yet, we appear to use the results for this case too...
ifname=`echo "$ifname" | sed s'%:*$%%'`
case $addr in
addr:$BASEIP) echo $ifname; return $OCF_SUCCESS;;
$BASEIP) echo $ifname; return $OCF_SUCCESS;;
esac
done
return $OCF_ERR_GENERIC
}
#
# Find out which interface or alias serves the given IP address
# The argument is an IP address, and its output
# is an (aliased) interface name (e.g., "eth0" and "eth0:0").
#
find_interface_generic() {
local iface=`$IP2UTIL -o -f inet addr show | grep "\ $BASEIP" \
| cut -d ' ' -f2 | grep -v '^ipsec[0-9][0-9]*$'`
if [ -z "$iface" ]; then
return $OCF_ERR_GENERIC
else
echo $iface
return $OCF_SUCCESS
fi
}
#
# Find out which interface or alias serves the given IP address
# The argument is an IP address, and its output
# is an (aliased) interface name (e.g., "eth0" and "eth0:0").
#
find_interface() {
case "$SYSTYPE" in
SunOS)
IF=`find_interface_solaris $BASEIP`
;;
*)
IF=`find_interface_generic $BASEIP`
;;
esac
echo $IF
return $OCF_SUCCESS;
}
ip_status() {
BASEIP="$1"
case "$SYSTYPE" in
Darwin)
# Treat Darwin the same as the other BSD variants (matched as *BSD)
SYSTYPE="${SYSTYPE}BSD"
;;
*)
;;
esac
case "$SYSTYPE" in
*BSD)
$IFCONFIG $IFCONFIG_A_OPT | grep "inet.*[: ]$BASEIP " >/dev/null 2>&1
if [ $? = 0 ]; then
return $OCF_SUCCESS
else
return $OCF_NOT_RUNNING
fi;;
Linux|SunOS)
IF=`find_interface "$BASEIP"`
if [ -z "$IF" ]; then
return $OCF_NOT_RUNNING
fi
case $IF in
lo*)
ocf_exit_reason "IP address [$BASEIP] is served by loopback, thus can not be preferred source address"
exit $OCF_ERR_CONFIGURED
;;
*)return $OCF_SUCCESS;;
esac
;;
*)
if [ -z "$IF" ]; then
return $OCF_NOT_RUNNING
else
return $OCF_SUCCESS
fi;;
esac
}
srca_validate_all() {
if [ -z "$OCF_RESKEY_ipaddress" ]; then
# usage
ocf_exit_reason "Please set OCF_RESKEY_ipaddress to the preferred source IP address!"
return $OCF_ERR_CONFIGURED
fi
if ! [ "x$SYSTYPE" = "xLinux" ]; then
# checks after this point are only relevant for linux.
return $OCF_SUCCESS
fi
check_binary $AWK
check_binary $IFCONFIG
# The IP address should be in good shape
if CheckIP "$ipaddress"; then
:
else
ocf_exit_reason "Invalid IP address [$ipaddress]"
return $OCF_ERR_CONFIGURED
fi
if ocf_is_probe; then
return $OCF_SUCCESS
fi
# We should serve this IP address of course
if ip_status "$ipaddress"; then
:
else
ocf_exit_reason "We are not serving [$ipaddress], hence can not make it a preferred source address"
return $OCF_ERR_INSTALLED
fi
return $OCF_SUCCESS
}
if
( [ $# -ne 1 ] )
then
usage
exit $OCF_ERR_ARGS
fi
# These operations do not require the OCF instance parameters to be set
case $1 in
meta-data) meta_data
exit $OCF_SUCCESS
;;
usage) usage
exit $OCF_SUCCESS
;;
*)
;;
esac
ipaddress="$OCF_RESKEY_ipaddress"
srca_validate_all
rc=$?
if [ $rc -ne $OCF_SUCCESS ]; then
case $1 in
# if we can't validate the configuration during a stop, that
# means the resources isn't configured correctly. There's no way
# to actually stop the resource in this situation because there's
# no way it could have even started. Return success here
# to indicate that the resource is not running, otherwise the
# stop action will fail causing the node to be fenced just because
# of a mis configuration.
stop) exit $OCF_SUCCESS;;
*) exit $rc;;
esac
fi
findif_out=`$FINDIF -C`
rc=$?
[ $rc -ne 0 ] && {
ocf_exit_reason "[$FINDIF -C] failed"
exit $rc
}
INTERFACE=`echo $findif_out | awk '{print $1}'`
NETWORK=`ip route list dev $INTERFACE scope link match $ipaddress|grep -o '^[^ ]*'`
case $1 in
start) srca_start $ipaddress
;;
stop) srca_stop $ipaddress
;;
status) srca_status $ipaddress
;;
monitor) srca_status $ipaddress
;;
validate-all) srca_validate_all
;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
exit $?
#
# Version 0.3 2002/11/04 17:00:00 John Sutton <john@scl.co.uk>
# Name changed from IPsrcroute to IPsrcaddr and now reports errors
# using ha_log rather than on stderr.
#
# Version 0.2 2002/11/02 17:00:00 John Sutton <john@scl.co.uk>
# Changed status output to "OK" to satisfy ResourceManager's
# we_own_resource() function.
#
# Version 0.1 2002/11/01 17:00:00 John Sutton <john@scl.co.uk>
# First effort but does the job?
#
diff --git a/heartbeat/LVM b/heartbeat/LVM
index 75cd6fae8..79c279127 100755
--- a/heartbeat/LVM
+++ b/heartbeat/LVM
@@ -1,713 +1,713 @@
#!/bin/sh
#
#
# LVM
#
# Description: Manages an LVM volume as an HA resource
#
#
# Author: Alan Robertson
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
# License: GNU General Public License (GPL)
# Copyright: (C) 2002 - 2005 International Business Machines, Inc.
#
# This code significantly inspired by the LVM resource
# in FailSafe by Lars Marowsky-Bree
#
#
# An example usage in /etc/ha.d/haresources:
# node1 10.0.0.170 ServeRAID::1::1 LVM::myvolname
#
# See usage() function below for more details...
#
# OCF parameters are as below:
# OCF_RESKEY_volgrpname
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
usage() {
methods=`LVM_methods`
methods=`echo $methods | tr ' ' '|'`
cat <<EOF
usage: $0 $methods
$0 manages an Linux Volume Manager volume (LVM) as an HA resource
The 'start' operation brings the given volume online
The 'stop' operation takes the given volume offline
The 'status' operation reports whether the volume is available
The 'monitor' operation reports whether the volume seems present
The 'validate-all' operation checks whether the OCF parameters are valid
The 'meta-data' operation show meta data
The 'methods' operation reports on the methods $0 supports
EOF
}
# default for "tag"
OUR_TAG="pacemaker"
meta_data() {
cat <<EOF
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="LVM">
<version>1.0</version>
<longdesc lang="en">
Resource script for LVM. It manages an Linux Volume Manager volume (LVM)
as an HA resource.
</longdesc>
<shortdesc lang="en">Controls the availability of an LVM Volume Group</shortdesc>
<parameters>
<parameter name="volgrpname" unique="1" required="1">
<longdesc lang="en">
The name of volume group.
</longdesc>
<shortdesc lang="en">Volume group name</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="exclusive" unique="0" required="0">
<longdesc lang="en">
If set, the volume group will be activated exclusively. This option works one of
two ways. If the volume group has the cluster attribute set, then the volume group
will be activated exclusively using clvmd across the cluster. If the cluster attribute
is not set, the volume group will be activated exclusively using a tag and the volume_list
filter. When the tag option is in use, the volume_list in lvm.con must be initialized. This
can be as simple as setting 'volume_list = []' depending on your setup.
</longdesc>
<shortdesc lang="en">Exclusive activation</shortdesc>
<content type="boolean" default="false" />
</parameter>
<parameter name="tag" unique="0" required="0">
<longdesc lang="en">
If "exclusive" is set on a non clustered volume group, this overrides the tag to be used.
</longdesc>
<shortdesc lang="en">Exclusive activation tag</shortdesc>
<content type="string" default="$OUR_TAG" />
</parameter>
<parameter name="partial_activation" unique="0" required="0">
<longdesc lang="en">
If set, the volume group will be activated partially even with some
physical volumes missing. It helps to set to true when using mirrored
logical volumes.
</longdesc>
<shortdesc lang="en">Activate VG partially when missing PVs</shortdesc>
<content type="string" default="false" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="30" />
<action name="stop" timeout="30" />
<action name="status" timeout="30" />
<action name="monitor" depth="0" timeout="30" interval="10" />
<action name="methods" timeout="5" />
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="5" />
</actions>
</resource-agent>
EOF
}
#
# methods: What methods/operations do we support?
#
LVM_methods() {
cat <<EOF
start
stop
status
monitor
methods
validate-all
meta-data
usage
EOF
}
##
# returns mode
#
# 0 = normal (non-exclusive) local activation
# 1 = tagged-exclusive activation
# 2 = clvm-exclusive activation
##
VG_MODE=
get_vg_mode()
{
if [ -n "$VG_MODE" ]; then
echo "$VG_MODE"
return
fi
VG_MODE=0
if ocf_is_true "$OCF_RESKEY_exclusive"; then
case $(vgs -o attr --noheadings $OCF_RESKEY_volgrpname | tr -d ' ') in
?????c*)
VG_MODE=2 ;;
*)
VG_MODE=1 ;;
esac
fi
echo "$VG_MODE"
}
##
# Verify tags setup
##
verify_tags_environment()
{
##
# The volume_list must be initialized to something in order to
# guarantee our tag will be filtered on startup
##
if ! lvm dumpconfig activation/volume_list; then
ocf_log err "LVM: Improper setup detected"
ocf_exit_reason "The volume_list filter must be initialized in lvm.conf for exclusive activation without clvmd"
return $OCF_ERR_GENERIC
fi
##
# Our tag must _NOT_ be in the volume_list. This agent
# overrides the volume_list during activation using the
# special tag reserved for cluster activation
##
if lvm dumpconfig activation/volume_list | grep -e "\"@$OUR_TAG\"" -e "\"${OCF_RESKEY_volgrpname}\""; then
ocf_log err "LVM: Improper setup detected"
ocf_exit_reason "The volume_list in lvm.conf must not contain the cluster tag, \"$OUR_TAG\", or volume group, $OCF_RESKEY_volgrpname"
return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
}
check_initrd_warning()
{
# First check to see if there is an initrd img we can safely
# compare timestamps agaist. If not, don't even bother with
# this check. This is known to work in rhel/fedora distros
ls "/boot/*$(uname -r)*.img" > /dev/null 2>&1
if [ $? -ne 0 ]; then
return
fi
##
# Now check to see if the initrd has been updated.
# If not, the machine could boot and activate the VG outside
# the control of pacemaker
##
if [ "$(find /boot -name *.img -newer /etc/lvm/lvm.conf)" = "" ]; then
ocf_log warn "LVM: Improper setup detected"
ocf_log warn "* initrd image needs to be newer than lvm.conf"
# While dangerous if not done the first time, there are many
# cases where we don't simply want to fail here. Instead,
# keep warning until the user remakes the initrd - or has
# it done for them by upgrading the kernel.
#
# initrd can be updated using this command.
# dracut -H -f /boot/initramfs-$(uname -r).img $(uname -r)
#
fi
}
##
# does this vg have our tag
##
check_tags()
{
local owner=`vgs -o tags --noheadings $OCF_RESKEY_volgrpname | tr -d ' '`
if [ -z "$owner" ]; then
# No-one owns this VG yet
return 1
fi
if [ "$OUR_TAG" = "$owner" ]; then
# yep, this is ours
return 0
fi
# some other tag is set on this vg
return 2
}
strip_tags()
{
local i
for i in `vgs --noheadings -o tags $OCF_RESKEY_volgrpname | sed s/","/" "/g`; do
ocf_log info "Stripping tag, $i"
# LVM version 2.02.98 allows changing tags if PARTIAL
vgchange --deltag $i $OCF_RESKEY_volgrpname
done
if [ ! -z `vgs -o tags --noheadings $OCF_RESKEY_volgrpname | tr -d ' '` ]; then
ocf_exit_reason "Failed to remove ownership tags from $OCF_RESKEY_volgrpname"
return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
}
set_tags()
{
check_tags
case $? in
0)
# we already own it.
return $OCF_SUCCESS
;;
2)
# other tags are set, strip them before setting
if ! strip_tags; then
return $OCF_ERR_GENERIC
fi
;;
*)
: ;;
esac
vgchange --addtag $OUR_TAG $OCF_RESKEY_volgrpname
if [ $? -ne 0 ]; then
ocf_exit_reason "Failed to add ownership tag to $OCF_RESKEY_volgrpname"
return $OCF_ERR_GENERIC
fi
ocf_log info "New tag \"$OUR_TAG\" added to $OCF_RESKEY_volgrpname"
return $OCF_SUCCESS
}
#
# Return LVM status (silently)
#
LVM_status() {
local rc=1
loglevel="debug"
# Set the log level of the error message
if [ "X${2}" = "X" ]; then
loglevel="err"
if ocf_is_probe; then
loglevel="warn"
else
if [ ${OP_METHOD} = "stop" ]; then
loglevel="info"
fi
fi
fi
if [ -d /dev/$1 ]; then
test "`cd /dev/$1 && ls`" != ""
rc=$?
if [ $rc -ne 0 ]; then
ocf_exit_reason "VG $1 with no logical volumes is not supported by this RA!"
fi
fi
if [ $rc -ne 0 ]; then
ocf_log $loglevel "LVM Volume $1 is not available (stopped)"
rc=$OCF_NOT_RUNNING
else
case $(get_vg_mode) in
1) # exclusive with tagging.
# If vg is running, make sure the correct tag is present. Otherwise we
# can not guarantee exclusive activation.
if ! check_tags; then
ocf_exit_reason "WARNING: $OCF_RESKEY_volgrpname is active without the cluster tag, \"$OUR_TAG\""
rc=$OCF_ERR_GENERIC
fi
# make sure the environment for tags activation is still valid
if ! verify_tags_environment; then
rc=$OCF_ERR_GENERIC
fi
# let the user know if their initrd is older than lvm.conf.
check_initrd_warning
;;
*)
: ;;
esac
fi
if [ "X${2}" = "X" ]; then
# status call return
return $rc
fi
# Report on LVM volume status to stdout...
if [ $rc -eq 0 ]; then
echo "Volume $1 is available (running)"
else
echo "Volume $1 is not available (stopped)"
fi
return $rc
}
get_activate_options()
{
local options="-a"
case $(get_vg_mode) in
0) options="${options}ly";;
1) options="${options}y --config activation{volume_list=[\"@${OUR_TAG}\"]}";;
2) options="${options}ey";;
esac
if ocf_is_true "$OCF_RESKEY_partial_activation" ; then
options="${options} --partial"
fi
# for clones (clustered volume groups), we'll also have to force
# monitoring, even if disabled in lvm.conf.
if ocf_is_clone; then
options="$options --monitor y"
fi
echo $options
}
##
# Attempt to deactivate vg cluster wide and then start the vg exclusively
##
retry_exclusive_start()
{
local vgchange_options="$(get_activate_options)"
# Deactivate each LV in the group one by one cluster wide
set -- $(lvs -o name,attr --noheadings $OCF_RESKEY_volgrpname 2> /dev/null)
while [ $# -ge 2 ]; do
case $2 in
????ao*)
# open LVs cannot be deactivated.
return $OCF_ERR_GENERIC;;
*)
if ! lvchange -an $OCF_RESKEY_volgrpname/$1; then
ocf_exit_reason "Unable to perform required deactivation of $OCF_RESKEY_volgrpname/$1 before starting"
return $OCF_ERR_GENERIC
fi
;;
esac
shift 2
done
ocf_run vgchange $vgchange_options $OCF_RESKEY_volgrpname
}
#
# Enable LVM volume
#
LVM_start() {
local vgchange_options="$(get_activate_options)"
local vg=$1
local clvmd=0
# TODO: This MUST run vgimport as well
ocf_log info "Activating volume group $vg"
if [ "$LVM_MAJOR" -eq "1" ]; then
ocf_run vgscan $vg
else
ocf_run vgscan
fi
case $(get_vg_mode) in
2)
clvmd=1
;;
1)
if ! set_tags; then
return $OCF_ERR_GENERIC
fi
;;
*)
: ;;
esac
if ! ocf_run vgchange $vgchange_options $vg; then
if [ $clvmd -eq 0 ]; then
return $OCF_ERR_GENERIC
fi
# Failure to exclusively activate cluster vg.:
# This could be caused by a remotely active LV, Attempt
# to disable volume group cluster wide and try again.
# Allow for some settling
sleep 5
if ! retry_exclusive_start; then
return $OCF_ERR_GENERIC
fi
fi
if LVM_status $vg; then
: OK Volume $vg activated just fine!
return $OCF_SUCCESS
else
ocf_exit_reason "LVM: $vg did not activate correctly"
return $OCF_NOT_RUNNING
fi
}
#
# Disable the LVM volume
#
LVM_stop() {
local res=$OCF_ERR_GENERIC
local vgchange_options="-aln"
local vg=$1
if ! vgs $vg > /dev/null 2>&1; then
ocf_log info "Volume group $vg not found"
return $OCF_SUCCESS
fi
ocf_log info "Deactivating volume group $vg"
case $(get_vg_mode) in
1) vgchange_options="-an" ;;
esac
for i in $(seq 10)
do
ocf_run vgchange $vgchange_options $vg
res=$?
if LVM_status $vg; then
ocf_exit_reason "LVM: $vg did not stop correctly"
res=1
fi
if [ $res -eq 0 ]; then
break
fi
res=$OCF_ERR_GENERIC
ocf_log warn "$vg still Active"
ocf_log info "Retry deactivating volume group $vg"
sleep 1
which udevadm > /dev/null 2>&1 && udevadm settle --timeout=5
done
case $(get_vg_mode) in
1)
if [ $res -eq 0 ]; then
strip_tags
res=$?
fi
;;
esac
return $res
}
#
# Check whether the OCF instance parameters are valid
#
LVM_validate_all() {
check_binary $AWK
##
# lvmetad is a daemon that caches lvm metadata to improve the
# performance of LVM commands. This daemon should never be used when
# volume groups exist that are being managed by the cluster. The lvmetad
# daemon introduces a response lag, where certain LVM commands look like
# they have completed (like vg activation) when in fact the command
# is still in progress by the lvmetad. This can cause reliability issues
# when managing volume groups in the cluster. For Example, if you have a
# volume group that is a dependency for another application, it is possible
# the cluster will think the volume group is activated and attempt to start
# the application before volume group is really accesible... lvmetad is bad.
##
lvm dumpconfig global/use_lvmetad | grep 'use_lvmetad.*=.*1' > /dev/null 2>&1
if [ $? -eq 0 ]; then
# for now warn users that lvmetad is enabled and that they should disable it. In the
# future we may want to consider refusing to start, or killing the lvmetad daemon.
ocf_log warn "Disable lvmetad in lvm.conf. lvmetad should never be enabled in a clustered environment. Set use_lvmetad=0 and kill the lvmetad process"
fi
##
# Off-the-shelf tests...
##
VGOUT=`vgck ${VOLUME} 2>&1`
if [ $? -ne 0 ]; then
# Inconsistency might be due to missing physical volumes, which doesn't
# automatically mean we should fail. If partial_activation=true then
# we should let start try to handle it, or if no PVs are listed as
# "unknown device" then another node may have marked a device missing
# where we have access to all of them and can start without issue.
if vgs -o pv_attr --noheadings $OCF_RESKEY_volgrpname 2>/dev/null | grep 'm' > /dev/null 2>&1; then
if vgs -o pv_name --noheadings $OCF_RESKEY_volgrpname 2>/dev/null | grep -E "unknown device|Couldn't find device|Device mismatch detected" > /dev/null 2>&1; then
if ! ocf_is_true "$OCF_RESKEY_partial_activation" ; then
# We are missing devices and cannot activate partially
ocf_exit_reason "Volume group [$VOLUME] has devices missing. Consider partial_activation=true to attempt to activate partially"
exit $OCF_ERR_GENERIC
else
# We are missing devices but are allowed to activate partially.
# Assume that caused the vgck failure and carry on
ocf_log warn "Volume group inconsistency detected with missing device(s) and partial_activation enabled. Proceeding with requested action."
fi
fi
# else the vg is partial but all devices are accounted for, so another
# node must have marked the device missing. Proceed.
else
# vgck failure was for something other than missing devices
ocf_exit_reason "Volume group [$VOLUME] does not exist or contains error! ${VGOUT}"
exit $OCF_ERR_GENERIC
fi
fi
##
# Does the Volume Group exist?
##
if [ "$LVM_MAJOR" = "1" ]; then
VGOUT=`vgdisplay ${VOLUME} 2>&1`
else
VGOUT=`vgdisplay -v ${VOLUME} 2>&1`
fi
if [ $? -ne 0 ]; then
ocf_exit_reason "Volume group [$VOLUME] does not exist or contains error! ${VGOUT}"
exit $OCF_ERR_GENERIC
fi
##
# If exclusive activation is not enabled, then
# further checking of proper setup is not necessary
##
if ! ocf_is_true "$OCF_RESKEY_exclusive"; then
return $OCF_SUCCESS;
fi
##
# Having cloned lvm resources with exclusive vg activation makes no sense at all.
##
if ocf_is_clone; then
ocf_exit_reason "cloned lvm resources can not be activated exclusively"
exit $OCF_ERR_CONFIGURED
fi
##
# Make sure the cluster attribute is set and clvmd is up when exclusive
# activation is enabled. Otherwise we can't exclusively activate the volume group.
##
case $(get_vg_mode) in
1) # exclusive activation using tags
if ! verify_tags_environment; then
exit $OCF_ERR_GENERIC
fi
;;
2) # exclusive activation with clvmd
##
# verify is clvmd running
##
if ! ps -C clvmd > /dev/null 2>&1; then
ocf_exit_reason "$OCF_RESKEY_volgrpname has the cluster attribute set, but 'clvmd' is not running"
exit $OCF_ERR_GENERIC
fi
;;
*)
: ;;
esac
return $OCF_SUCCESS
}
#
# 'main' starts here...
#
if
[ $# -ne 1 ]
then
usage
exit $OCF_ERR_ARGS
fi
case $1 in
meta-data) meta_data
exit $OCF_SUCCESS;;
methods) LVM_methods
exit $?;;
usage) usage
exit $OCF_SUCCESS;;
*) ;;
esac
if
[ -z "$OCF_RESKEY_volgrpname" ]
then
ocf_exit_reason "You must identify the volume group name!"
exit $OCF_ERR_CONFIGURED
fi
# Get the LVM version number, for this to work we assume(thanks to panjiam):
#
# LVM1 outputs like this
#
# # vgchange --version
# vgchange: Logical Volume Manager 1.0.3
# Heinz Mauelshagen, Sistina Software 19/02/2002 (IOP 10)
#
# LVM2 and higher versions output in this format
#
# # vgchange --version
# LVM version: 2.00.15 (2004-04-19)
# Library version: 1.00.09-ioctl (2004-03-31)
# Driver version: 4.1.0
LVM_VERSION=`vgchange --version 2>&1 | \
$AWK '/Logical Volume Manager/ {print $5"\n"; exit; }
/LVM version:/ {printf $3"\n"; exit;}'`
rc=$?
if
( [ $rc -ne 0 ] || [ -z "$LVM_VERSION" ] )
then
ocf_exit_reason "LVM: $1 could not determine LVM version. Try 'vgchange --version' manually and modify $0 ?"
exit $OCF_ERR_INSTALLED
fi
LVM_MAJOR="${LVM_VERSION%%.*}"
VOLUME=$OCF_RESKEY_volgrpname
OP_METHOD=$1
if [ -n "$OCF_RESKEY_tag" ]; then
OUR_TAG=$OCF_RESKEY_tag
fi
# What kind of method was invoked?
case "$1" in
start)
LVM_validate_all
LVM_start $VOLUME
exit $?;;
stop) LVM_stop $VOLUME
exit $?;;
status) LVM_status $VOLUME $1
exit $?;;
monitor) LVM_status $VOLUME
exit $?;;
validate-all) LVM_validate_all
;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED;;
esac
diff --git a/heartbeat/LinuxSCSI b/heartbeat/LinuxSCSI
index ce033c00b..89fed6b74 100755
--- a/heartbeat/LinuxSCSI
+++ b/heartbeat/LinuxSCSI
@@ -1,314 +1,314 @@
#!/bin/sh
#
#
# LinuxSCSI
#
# Description: Enables/Disables SCSI devices to protect them from being
# used by mistake
#
#
# Author: Alan Robertson
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
# License: GNU General Public License (GPL)
# Copyright: (C) 2002 - 2005 IBM
#
# CAVEATS: See the usage message for some important warnings
#
# usage: ./LinuxSCSI (start|stop|status|monitor|meta-data|validate-all|methods)
#
# OCF parameters are as below:
# OCF_RESKEY_scsi
#
# An example usage in /etc/ha.d/haresources:
# node1 10.0.0.170 LinuxSCSI:0:0:11
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
zeropat="[ 0]0"
PROCSCSI=/proc/scsi/scsi
usage() {
cat <<EOF
usage: $0 (start|stop|status|monitor|meta-data|validate-all|methods)
$0 manages the availability of a SCSI device from the point
of view of the linux kernel. It make Linux believe the
device has gone away, and it can make it come back again.
The purpose of this resource script is to keep admins from
accidentally messing with a shared disk that is managed by the
HA subsystem and is currently owned by the other side.
To get maximum benefit from this feature, you should (manually)
disable the resources on boot, and let your HA software enable
them when it wants to acquire the disk.
The kernel code says this is potentially dangerous. DO NOT USE
IT ON AN ACTIVE DEVICE. If the device is inactive, this script
will make it stay inactive, when given "off". If you inactivate
the wrong device, you may have to reboot your machine, and your
data may take a hit.
On the other hand, at least one RAID controller requires the
use of this technique for it to work correctly in a failover
environment - so it is believed that it is more stable in this
usage than the comments in the code imply.
Here are the warnings from the kernel source about the "stop"
operation as of 2.4.10:
------------------------------
Consider this feature pre-BETA.
CAUTION: This is not for hotplugging your peripherals. As
SCSI was not designed for this, you could damage your
hardware and thoroughly confuse the SCSI subsystem.
Similar warnings apply to the "start" operation...
Consider this feature BETA.
CAUTION: This is not for hotplugging your peripherals.
As SCSI was not designed for this you could damage your
hardware !
However perhaps it is legal to switch on an already connected
device. It is perhaps not guaranteed this device doesn't corrupt
an ongoing data transfer.
-------------------------
So, Caveat Emptor, and test this feature thoroughly on
your kernel and your configuration with real load on the SCSI
bus before using it in production!
Another potential trouble spot...
The order in which you bring up LinuxSCSI resources determines which
SCSI device they show up as on Linux. If you have two SCSI devices
in different resource groups they will be brought up asyncronously
resulting in indeterminate device name assignments. This usually
happens in an active-active configuration.
To solve this you probably should use LVM or EVMS to manage these
volumes. LVM and EVMS solve this problem for you by labels they
keep in the volumes. If you don't use a reasonable volume manager,
then you'll have to mount by UUID.
EOF
}
meta_data() {
cat <<EOF
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="LinuxSCSI">
<version>1.0</version>
<longdesc lang="en">
Deprecation warning: This agent makes use of Linux SCSI hot-plug
functionality which has been superseded by SCSI reservations. It is
deprecated and may be removed from a future release. See the
scsi2reservation and sfex agents for alternatives. --
This is a resource agent for LinuxSCSI. It manages the availability of a
SCSI device from the point of view of the linux kernel. It make Linux
believe the device has gone away, and it can make it come back again.
</longdesc>
<shortdesc lang="en">Enables and disables SCSI devices through the
kernel SCSI hot-plug subsystem (deprecated)</shortdesc>
<parameters>
<parameter name="scsi" unique="0" required="1">
<longdesc lang="en">
The SCSI instance to be managed.
</longdesc>
<shortdesc lang="en">SCSI instance</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="ignore_deprecation">
<longdesc lang="en">
If set to true, suppresses the deprecation warning for this agent.
</longdesc>
<shortdesc lang="en">Suppress deprecation warning</shortdesc>
<content type="boolean" default="false" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="20s" />
<action name="stop" timeout="20s" />
<action name="methods" timeout="5" />
<action name="status" depth="0" timeout="20s" interval="10" />
<action name="monitor" depth="0" timeout="20s" interval="10" />
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="5" />
</actions>
</resource-agent>
EOF
}
scsi_methods() {
cat <<EOF
start
stop
status
monitor
validate-all
methods
EOF
}
parseinst() {
lun=0
case "$1" in
[0-9]*:[0-9]*:[0-9]*);;
[0-9]*:[0-9]*:[0-9]*:[0-9]*)
lun=`echo "$1" | cut -d: -f4`;;
*) #host=error
#channel=error
#target=error
#lun=error
ocf_log err "Invalid SCSI instance $1"
exit $OCF_ERR_ARGS
esac
host=`echo "$1" | cut -d: -f1`
channel=`echo "$1" | cut -d: -f2`
target=`echo "$1" | cut -d: -f3`
}
#
# start: Enable the given SCSI device in the kernel
#
scsi_start() {
parseinst "$1"
# [ $target = error ] && exit 1
# echo "scsi-add-single-device $host $channel $target $lun" >>$PROCSCSI
echo "scsi add-single-device $host $channel $target $lun" >>$PROCSCSI
if
scsi_status "$1"
then
return $OCF_SUCCESS
else
ocf_log err "SCSI device $1 not active!"
return $OCF_ERR_GENERIC
fi
}
#
# stop: Disable the given SCSI device in the kernel
#
scsi_stop() {
parseinst "$1"
# [ $target = error ] && exit 1
echo "scsi remove-single-device $host $channel $target $lun" >>$PROCSCSI
if
scsi_status "$1"
then
ocf_log err "SCSI device $1 still active!"
return $OCF_ERR_GENERIC
else
return $OCF_SUCCESS
fi
}
#
# status: is the given device now available?
#
scsi_status() {
parseinst "$1"
# [ $target = error ] && exit 1
[ $channel -eq 0 ] && channel=$zeropat
[ $target -eq 0 ] && target=$zeropat
[ $lun -eq 0 ] && lun=$zeropat
greppat="Host: *scsi$host *Channel: *$channel *Id: *$target *Lun: *$lun"
grep -i "$greppat" $PROCSCSI >/dev/null
if [ $? -eq 0 ]; then
return $OCF_SUCCESS
else
return $OCF_NOT_RUNNING
fi
}
#
# validate_all: Check the OCF instance parameters
#
scsi_validate_all() {
parseinst $instance
return $OCF_SUCCESS
}
if
( [ $# -ne 1 ] )
then
ocf_log err "Parameter number error."
usage
exit $OCF_ERR_GENERIC
fi
#if
# [ -z "$OCF_RESKEY_scsi" ] && [ "X$1" = "Xmethods" ]
#then
# scsi_methods
# exit #?
#fi
case $1 in
methods) scsi_methods
exit $OCF_SUCCESS
;;
meta-data) meta_data
exit $OCF_SUCCESS
;;
usage) usage
exit $OCF_SUCCESS
;;
*) ;;
esac
# Be obnoxious, log deprecation warning on every invocation (unless
# suppressed by resource configuration).
ocf_deprecated
if
[ -z "$OCF_RESKEY_scsi" ]
then
ocf_log err "You have to set a valid scsi id at least!"
# usage
exit $OCF_ERR_GENERIC
fi
instance=$OCF_RESKEY_scsi
case $1 in
start) scsi_start $instance
;;
stop) scsi_stop $instance
;;
status|monitor)
if
scsi_status $instance
then
ocf_log info "SCSI device $instance is running"
return $OCF_SUCCESS
else
ocf_log info "SCSI device $instance is stopped"
exit $OCF_NOT_RUNNING
fi
;;
validate-all) scsi_validate_all
;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
exit $?
diff --git a/heartbeat/Raid1 b/heartbeat/Raid1
index 7cf658b59..bef2606cf 100755
--- a/heartbeat/Raid1
+++ b/heartbeat/Raid1
@@ -1,556 +1,556 @@
#!/bin/sh
#
#
# License: GNU General Public License (GPL)
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
#
# Raid1
# Description: Manages a Linux software RAID device on a shared storage medium.
# Original Author: Eric Z. Ayers (eric.ayers@compgen.com)
# Original Release: 25 Oct 2000
# RAID patches: http://people.redhat.com/mingo/raid-patches/
# Word to the Wise: http://lwn.net/2000/0810/a/raid-faq.php3
# Sympathetic Ear: mailto:linux-raid@vger.kernel.org
#
# usage: $0 {start|stop|status|monitor|validate-all|usage|meta-data}
#
#
# EXAMPLE config file /etc/raidtab.md0
# This file must exist on both machines!
#
# raiddev /dev/md0
# raid-level 1
# nr-raid-disks 2
# chunk-size 64k
# persistent-superblock 1
# #nr-spare-disks 0
# device /dev/sda1
# raid-disk 0
# device /dev/sdb1
# raid-disk 1
#
# EXAMPLE config file /etc/mdadm.conf (for more info:man mdadm.conf)
#
# DEVICE /dev/sdb1 /dev/sdc1
# ARRAY /dev/md0 UUID=4a865b55:ba27ef8d:29cd5701:6fb42799
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
usage() {
cat <<-EOT
usage: $0 {start|stop|status|monitor|validate-all|usage|meta-data}
EOT
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="Raid1">
<version>1.0</version>
<longdesc lang="en">
This resource agent manages Linux software RAID (MD) devices on
a shared storage medium. It uses mdadm(8) to start, stop, and
monitor the MD devices. Raidtools are supported, but deprecated.
See https://raid.wiki.kernel.org/index.php/Linux_Raid for more
information.
</longdesc>
<shortdesc lang="en">Manages Linux software RAID (MD) devices on shared storage</shortdesc>
<parameters>
<parameter name="raidconf" unique="0" required="1">
<longdesc lang="en">
The RAID configuration file, e.g. /etc/mdadm.conf.
</longdesc>
<shortdesc lang="en">RAID config file</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="raiddev" unique="0" required="1">
<longdesc lang="en">
One or more block devices to use, space separated. Alternatively,
set to "auto" to manage all devices specified in raidconf.
</longdesc>
<shortdesc lang="en">block device</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="homehost" unique="0" required="0">
<longdesc lang="en">
The value for the homehost directive; this is an mdadm feature to
protect RAIDs against being activated by accident. It is recommended to
create RAIDs managed by the cluster with "homehost" set to a special
value, so they are not accidentially auto-assembled by nodes not
supposed to own them.
</longdesc>
<shortdesc lang="en">Homehost for mdadm</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="force_stop" unique="0" required="0">
<longdesc lang="en">
If processes or kernel threads are using the array, it cannot be
stopped. We will try to stop processes, first by sending TERM and
then, if that doesn't help in $PROC_CLEANUP_TIME seconds, using KILL.
The lsof(8) program is required to get the list of array users.
Of course, the kernel threads cannot be stopped this way.
If the processes are critical for data integrity, then set this
parameter to false. Note that in that case the stop operation
will fail and the node will be fenced.
</longdesc>
<shortdesc lang="en">force stop processes using the array</shortdesc>
<content type="boolean" default="true" />
</parameter>
<parameter name="udev" unique="0" required="0">
<longdesc lang="en">
Wait until udevd creates a device in the start operation. On a
normally loaded host this should happen quickly, but you may be
unlucky. If you are not using udev set this to "no".
</longdesc>
<shortdesc lang="en">udev</shortdesc>
<content type="boolean" default="true" />
</parameter>
<parameter name="force_clones">
<longdesc lang="en">
Activating the same md RAID array on multiple nodes at the same time
will result in data corruption and thus is forbidden by default.
A safe example could be an array that is only named identically across
all nodes, but is in fact distinct.
Only set this to "true" if you know what you are doing!
</longdesc>
<shortdesc lang="en">force ability to run as a clone</shortdesc>
<content type="boolean" default="false" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="20s" />
<action name="stop" timeout="20s" />
<action name="status" depth="0" timeout="20s" interval="10" />
<action name="monitor" depth="0" timeout="20s" interval="10" />
<action name="validate-all" timeout="5" />
<action name="meta-data" timeout="5" />
</actions>
</resource-agent>
END
}
udev_settle() {
if ocf_is_true $WAIT_FOR_UDEV; then
udevadm settle $*
fi
}
list_conf_arrays() {
test -f $RAIDCONF || {
ocf_exit_reason "$RAIDCONF gone missing!"
exit $OCF_ERR_GENERIC
}
grep ^ARRAY $RAIDCONF | awk '{print $2}'
}
forall() {
local func=$1
local checkall=$2
local mddev rc=0
for mddev in $RAIDDEVS; do
$func $mddev
rc=$(($rc | $?))
[ "$checkall" = all ] && continue
[ $rc -ne 0 ] && return $rc
done
return $rc
}
are_arrays_stopped() {
local rc mddev
for mddev in $RAIDDEVS; do
raid1_monitor_one $mddev
rc=$?
[ $rc -ne $OCF_NOT_RUNNING ] && break
done
test $rc -eq $OCF_NOT_RUNNING
}
md_assemble() {
local mddev=$1
$MDADM --assemble $mddev --config=$RAIDCONF $MDADM_HOMEHOST
udev_settle --exit-if-exists=$mddev
}
#
# START: Start up the RAID device
#
raid1_start() {
local rc
raid1_monitor
rc=$?
if [ $rc -eq $OCF_SUCCESS ]; then
# md already online, nothing to do.
return $OCF_SUCCESS
fi
if [ $rc -ne $OCF_NOT_RUNNING ]; then
# If the array is in a broken state, this agent doesn't
# know how to repair that.
ocf_exit_reason "$RAIDDEVS in a broken state; cannot start (rc=$rc)"
return $OCF_ERR_GENERIC
fi
if [ $HAVE_RAIDTOOLS = "true" ]; then
# Run raidstart to start up the RAID array
$RAIDSTART --configfile $RAIDCONF $MDDEV
else
forall md_assemble all
fi
raid1_monitor
if [ $? -eq $OCF_SUCCESS ]; then
return $OCF_SUCCESS
else
ocf_exit_reason "Couldn't start RAID for $RAIDDEVS"
return $OCF_ERR_GENERIC
fi
}
#
# STOP: stop the RAID device
#
mark_readonly() {
local mddev=$1
local rc
ocf_log info "Attempting to mark array $mddev readonly"
$MDADM --readonly $mddev --config=$RAIDCONF
rc=$?
if [ $rc -ne 0 ]; then
ocf_exit_reason "Failed to set $mddev readonly (rc=$rc)"
fi
return $rc
}
mknod_raid1_stop() {
# first create a block device file, then try to stop the
# array
local rc n tmp_block_file
n=`echo $1 | sed 's/[^0-9]*//'`
if ! ocf_is_decimal "$n"; then
ocf_log warn "could not get the minor device number from $1"
return 1
fi
tmp_block_file="$HA_RSCTMP/${OCF_RESOURCE_INSTANCE}-`basename $1`"
rm -f $tmp_block_file
ocf_log info "block device file $1 missing, creating one in order to stop the array"
mknod $tmp_block_file b 9 $n
$MDADM --stop $tmp_block_file --config=$RAIDCONF --wait-clean -W
rc=$?
rm -f $tmp_block_file
return $rc
}
raid1_stop_one() {
ocf_log info "Stopping array $1"
if [ -b "$1" ]; then
$MDADM --stop $1 --config=$RAIDCONF --wait-clean -W &&
return
else
# newer mdadm releases can stop arrays when given the
# basename; try that first
$MDADM --stop `basename $1` --config=$RAIDCONF --wait-clean -W &&
return
# otherwise create a block device file
mknod_raid1_stop $1
fi
}
get_users_pids() {
local mddev=$1
local outp l
ocf_log debug "running lsof to list $mddev users..."
outp=`lsof $mddev | tail -n +2`
echo "$outp" | awk '{print $2}' | sort -u
echo "$outp" | while read l; do
ocf_log warn "$l"
done
}
stop_raid_users() {
local pids
pids=`forall get_users_pids all | sort -u`
if [ -z "$pids" ]; then
ocf_log warn "lsof reported no users holding arrays"
return 2
else
ocf_stop_processes TERM $PROC_CLEANUP_TIME $pids
fi
}
stop_arrays() {
if [ $HAVE_RAIDTOOLS = "true" ]; then
$RAIDSTOP --configfile $RAIDCONF $MDDEV
else
forall raid1_stop_one all
fi
}
showusers() {
local disk
for disk; do
if have_binary lsof; then
ocf_log info "running lsof to list $disk users..."
ocf_run -warn lsof $disk
fi
if [ -d /sys/block/$disk/holders ]; then
ocf_log info "ls -l /sys/block/$disk/holders"
ocf_run -warn ls -l /sys/block/$disk/holders
fi
done
}
raid1_stop() {
local rc
# See if the MD device is already cleanly stopped:
if are_arrays_stopped; then
return $OCF_SUCCESS
fi
# Turn off raid
if ! stop_arrays; then
if ocf_is_true $FORCESTOP; then
if have_binary lsof; then
stop_raid_users
case $? in
2) false;;
*) stop_arrays;;
esac
else
ocf_log warn "install lsof(8) to list users holding the disk"
false
fi
else
false
fi
fi
rc=$?
if [ $rc -ne 0 ]; then
ocf_log warn "Couldn't stop RAID for $RAIDDEVS (rc=$rc)"
showusers $RAIDDEVS
if [ $HAVE_RAIDTOOLS != "true" ]; then
forall mark_readonly all
fi
return $OCF_ERR_GENERIC
fi
if are_arrays_stopped; then
return $OCF_SUCCESS
fi
ocf_exit_reason "RAID $RAIDDEVS still active after stop command!"
return $OCF_ERR_GENERIC
}
#
# monitor: a less noisy status
#
raid1_monitor_one() {
local mddev=$1
local md=`echo $mddev | sed 's,/dev/,,'`
local rc
local TRY_READD=0
local pbsize
# check if the md device exists first
# but not if we are in the stop operation
# device existence is important only for the running arrays
if [ "$__OCF_ACTION" != "stop" -a ! -b $mddev ]; then
ocf_log info "$mddev is not a block device"
return $OCF_NOT_RUNNING
fi
if ! grep -e "^$md[ \t:]" /proc/mdstat >/dev/null ; then
ocf_log info "$md not found in /proc/mdstat"
return $OCF_NOT_RUNNING
fi
if [ $HAVE_RAIDTOOLS != "true" ]; then
$MDADM --detail --test $mddev >/dev/null 2>&1 ; rc=$?
case $rc in
0) ;;
1) ocf_log warn "$mddev has at least one failed device."
TRY_READD=1
;;
2) ocf_exit_reason "$mddev has failed."
return $OCF_ERR_GENERIC
;;
4) ocf_exit_reason "mdadm failed on $mddev."
return $OCF_ERR_GENERIC
;;
*) ocf_exit_reason "mdadm returned an unknown result ($rc)."
return $OCF_ERR_GENERIC
;;
esac
fi
if [ "$__OCF_ACTION" = "monitor" -a "$OCF_RESKEY_CRM_meta_interval" != 0 \
-a $TRY_READD -eq 1 -a $OCF_CHECK_LEVEL -gt 0 ]; then
ocf_log info "Attempting recovery sequence to re-add devices on $mddev:"
$MDADM $mddev --fail detached
$MDADM $mddev --remove failed
$MDADM $mddev --re-add missing
# TODO: At this stage, there's nothing to actually do
# here. Either this worked or it did not.
fi
pbsize=`(blockdev --getpbsz $mddev || stat -c "%o" $mddev) 2>/dev/null`
if [ -z "$pbsize" ]; then
ocf_log warn "both blockdev and stat could not get the block size (will use 4k)"
pbsize=4096 # try with 4k
fi
if ! dd if=$mddev count=1 bs=$pbsize of=/dev/null \
iflag=direct >/dev/null 2>&1 ; then
ocf_exit_reason "$mddev: I/O error on read"
return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
}
raid1_monitor() {
forall raid1_monitor_one
}
#
# STATUS: is the raid device online or offline?
#
raid1_status() {
# See if the MD device is online
local rc
raid1_monitor
rc=$?
if [ $rc -ne $OCF_SUCCESS ]; then
echo "stopped"
else
echo "running"
fi
return $rc
}
raid1_validate_all() {
return $OCF_SUCCESS
}
PROC_CLEANUP_TIME=3
if
( [ $# -ne 1 ] )
then
usage
exit $OCF_ERR_ARGS
fi
case "$1" in
meta-data)
meta_data
exit $OCF_SUCCESS
;;
usage)
usage
exit $OCF_SUCCESS
;;
*)
;;
esac
RAIDCONF="$OCF_RESKEY_raidconf"
MDDEV="$OCF_RESKEY_raiddev"
FORCESTOP="${OCF_RESKEY_force_stop:-1}"
WAIT_FOR_UDEV="${OCF_RESKEY_udev:-1}"
if [ -z "$RAIDCONF" ] ; then
ocf_exit_reason "Please set OCF_RESKEY_raidconf!"
exit $OCF_ERR_CONFIGURED
fi
if [ ! -r "$RAIDCONF" ] ; then
ocf_exit_reason "Configuration file [$RAIDCONF] does not exist, or can not be opend!"
exit $OCF_ERR_INSTALLED
fi
if [ -z "$MDDEV" ] ; then
ocf_exit_reason "Please set OCF_RESKEY_raiddev to the Raid device you want to control!"
exit $OCF_ERR_CONFIGURED
fi
if ocf_is_clone && ! ocf_is_true "$OCF_RESKEY_force_clones"; then
ocf_exit_reason "md RAID arrays are NOT safe to run as a clone!"
ocf_log err "Please read the comment on the force_clones parameter."
exit $OCF_ERR_CONFIGURED
fi
if ocf_is_true $WAIT_FOR_UDEV && ! have_binary udevadm; then
if [ "$__OCF_ACTION" = "start" ]; then
ocf_log warn "either install udevadm or set udev to false"
ocf_log info "setting udev to false!"
fi
WAIT_FOR_UDEV=0
fi
if ! ocf_is_true $WAIT_FOR_UDEV; then
export MDADM_NO_UDEV=1
fi
if ocf_is_true $FORCESTOP && ! have_binary lsof; then
ocf_log warn "Please install lsof(8), we may need it when stopping Raid device! Now continuing anyway ..."
fi
HAVE_RAIDTOOLS=false
if have_binary $MDADM >/dev/null 2>&1 ; then
if [ -n "$OCF_RESKEY_homehost" ]; then
MDADM_HOMEHOST="--homehost=${OCF_RESKEY_homehost}"
else
MDADM_HOMEHOST=""
fi
else
check_binary $RAIDSTART
HAVE_RAIDTOOLS=true
fi
if [ $HAVE_RAIDTOOLS = true ]; then
if [ "$MDDEV" = "auto" ]; then
ocf_exit_reason "autoconf supported only with mdadm!"
exit $OCF_ERR_INSTALLED
elif [ `echo $MDDEV|wc -w` -gt 1 ]; then
ocf_exit_reason "multiple devices supported only with mdadm!"
exit $OCF_ERR_INSTALLED
fi
fi
if [ "$MDDEV" = "auto" ]; then
RAIDDEVS=`list_conf_arrays`
else
RAIDDEVS="$MDDEV"
fi
# At this stage,
# [ $HAVE_RAIDTOOLS = false ] <=> we have $MDADM,
# otherwise we have raidtools (raidstart and raidstop)
# Look for how we are called
case "$1" in
start)
raid1_start
;;
stop)
raid1_stop
;;
status)
raid1_status
;;
monitor)
raid1_monitor
;;
validate-all)
raid1_validate_all
;;
*)
usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
exit $?
diff --git a/heartbeat/SendArp b/heartbeat/SendArp
index b67404f24..9d0b48726 100755
--- a/heartbeat/SendArp
+++ b/heartbeat/SendArp
@@ -1,267 +1,267 @@
#!/bin/sh
#
#
# Copyright (c) 2006, Huang Zhen <zhen.huang@gmail.com>
# Converting original heartbeat RA to OCF RA.
#
# Copyright (C) 2004 Horms <horms@verge.net.au>
#
# Based on IPaddr2: Copyright (C) 2003 Tuomo Soini <tis@foobar.fi>
#
# License: GNU General Public License (GPL)
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
#
# This script send out gratuitous Arp for an IP address
#
# It can be used _instead_ of the IPaddr2 or IPaddr resource
# to send gratuitous arp for an IP address on a given interface,
# without adding the address to that interface. I.e. if for
# some reason you want to send gratuitous arp for addresses
# managed by IPaddr2 or IPaddr on an additional interface.
#
# OCF parameters are as below:
# OCF_RESKEY_ip
# OCF_RESKEY_nic
#
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
SENDARP=$HA_BIN/send_arp
SENDARPPIDDIR=${HA_RSCTMP}
BASEIP="$OCF_RESKEY_ip"
INTERFACE="$OCF_RESKEY_nic"
RESIDUAL=""
SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$BASEIP"
BACKGROUND=${OCF_RESKEY_background:-"yes"}
# Set default values
: ${ARP_INTERVAL_MS=200} # milliseconds between ARPs
: ${ARP_REPEAT=5} # repeat count
: ${ARP_BACKGROUND=$BACKGROUND} # no to run in foreground
: ${ARP_NETMASK=ffffffffffff} # netmask for ARP
#######################################################################
sendarp_meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="SendArp" version="1.0">
<version>1.0</version>
<longdesc lang="en">
This RA can be used _instead_ of the IPaddr2 or IPaddr RA to
send gratuitous ARP for an IP address on a given interface,
without adding the address to that interface. For example,
if for some resaon you wanted to send gratuitous ARP for
addresses managed by IPaddr2 or IPaddr on an additional
interface.
</longdesc>
<shortdesc lang="en">Broadcasts unsolicited ARP announcements</shortdesc>
<parameters>
<parameter name="ip" unique="0" required="1">
<longdesc lang="en">
The IP address for sending ARP packet.
</longdesc>
<shortdesc lang="en">IP address</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="nic" unique="0" required="1">
<longdesc lang="en">
The NIC for sending ARP packet.
</longdesc>
<shortdesc lang="en">NIC</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="background" unique="0" required="0">
<longdesc lang="en">
Send ARPs in background. Set to false if you want to test if
sending ARPs succeeded.
</longdesc>
<shortdesc lang="en">Send ARPs in background</shortdesc>
<content type="boolean" default="true" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="20s" />
<action name="stop" timeout="20s" />
<action name="monitor" depth="0" timeout="20" interval="10" />
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="20s" />
</actions>
</resource-agent>
END
}
#######################################################################
sendarp_usage() {
cat <<END
usage: $0 {start|stop|monitor|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END
}
#
# Send gratuitous arp
#
sendarp_start() {
local rc
sendarp_validate
if [ $? = $OCF_ERR_CONFIGURED ]; then
return $OCF_ERR_CONFIGURED
fi
sendarp_monitor
if [ $? = $OCF_SUCCESS ]; then
return $OCF_SUCCESS
fi
[ -r ${HA_CONFDIR}/arp_config ] && . ${HA_CONFDIR}/arp_config
if [ -r "${HA_CONFDIR}/arp_config:${TARGET_INTERFACE}" ]; then
. "${HA_CONFDIR}/arp_config:${TARGET_INTERFACE}"
fi
ARGS="-i $ARP_INTERVAL_MS -r $ARP_REPEAT -p $SENDARPPIDFILE $INTERFACE $BASEIP auto $BASEIP $ARP_NETMASK"
ocf_log debug "$SENDARP $ARGS"
rc=$OCF_SUCCESS
if ocf_is_true $ARP_BACKGROUND; then
# not possible to check the status without wait! we can
# just log the outcome
# and wait-ing would be equal to not running in
# background
($SENDARP $ARGS ||
ocf_exit_reason "Could not send gratuitous arps") &
else
$SENDARP $ARGS || {
ocf_exit_reason "Could not send gratuitous arps"
rc=$OCF_ERR_GENERIC
}
fi
if [ $rc -eq $OCF_SUCCESS ]; then
ha_pseudo_resource SendArp_${OCF_RESOURCE_INSTANCE} start
fi
return $rc
}
#
# Stop sending gratuitous arp
#
sendarp_stop() {
sendarp_monitor
if [ $? -eq $OCF_NOT_RUNNING ]; then
return $OCF_SUCCESS
fi
rc=$OCF_SUCCESS
if
[ -f "$SENDARPPIDFILE" ]
then
kill `cat "$SENDARPPIDFILE"`
rc=$?
case $rc in
0)
ocf_log info "killed previously running send_arp for $BASEIP"
rm -f "$SENDARPPIDFILE"
rc=$OCF_SUCCESS
;;
*)
ocf_log warn "Could not kill previously running send_arp for $BASEIP"
rc=$OCF_ERR_GENERIC
;;
esac
fi
case $rc in
$OCF_SUCCESS)
ocf_log info "SendArp for $BASEIP/$INTERFACE released"
ha_pseudo_resource SendArp_${OCF_RESOURCE_INSTANCE} stop
;;
*)
ocf_log warn "SendArp for $BASEIP/$INTERFACE NOT released"
;;
esac
return $rc
}
#
sendarp_monitor() {
if [ -f "$SENDARPPIDFILE" ]; then
return $OCF_SUCCESS
fi
ha_pseudo_resource SendArp_${OCF_RESOURCE_INSTANCE} monitor
}
sendarp_validate() {
if [ -z "$INTERFACE" -o -z "$BASEIP" -o -n "$RESIDUAL" ]
then
return $OCF_ERR_CONFIGURED
fi
return $OCF_SUCCESS
}
case $__OCF_ACTION in
meta-data) sendarp_meta_data
exit $OCF_SUCCESS
;;
start) sendarp_start
;;
stop) sendarp_stop
;;
monitor) sendarp_monitor
;;
status) sendarp_monitor
if [ $? = $OCF_SUCCESS ]; then
echo "running"
exit $OCF_SUCCESS;
else
echo "stopped"
exit $OCF_NOT_RUNNING;
fi
;;
validate-all) sendarp_validate
;;
usage|help) sendarp_usage
exit $OCF_SUCCESS
;;
*) sendarp_usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
diff --git a/heartbeat/ServeRAID b/heartbeat/ServeRAID
index a66084da0..4308813c8 100755
--- a/heartbeat/ServeRAID
+++ b/heartbeat/ServeRAID
@@ -1,419 +1,419 @@
#!/bin/sh
#
#
# ServeRAID
#
# Description: Enables/Disables shared ServeRAID merge groups
#
# Author: Alan Robertson, Renzo Alejandro Granados
#
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
#
# License: GNU General Public License (GPL)
#
# Copyright: (C) 2002-2005 International Business Machines
# (C) 2002 Renzo Alejandro Granados
#
# usage: ./ServeRAID (start|stop|status|monitor|validate-all|meta-data)
#
# OCF parameters are as below:
# OCF_RESKEY_serveraid
# (Adapter number of the ServeRAID adapter)
# OCF_RESKEY_mergegroup
# (MergeGroup # of the logical drive under consideration)
#
# The ServeRAID clustering model is a bit odd, and its terminology needs
# a little explanation
#
# Logical Volume - a particular SCSI id {target id and LUN} on
# a particular controller.
#
# Merge Group - when active on one side or the other of the ServeRAID
# configuration it corresponds with a logical drive.
# Merge group numbers are permanently assigned to a particular
# chunk of storage. Shared merge groups are in the
# range of 1 to 8, and are largely arbitrary.
# Unshared merge groups start at 200.
# We can only deal with shared merge groups. When a merge
# group is activated on one of the controllers, it becomes
# a logical volume on that system. NOTE: The order in
# which the Merge Groups are activated determines which
# SCSI Ids they become. This makes for extra headaches
# for this script to deal with. It also means that if
# you have more than one shared ServeRAID merge group on
# a particular controller, that the SCSI IDs will not
# be constant. This requires mounting by uuid or label.
#
# One of the ServerRAID controllers has to be configured with
# SCSI initiator ID 6, and the other with SCSI id 7.
#
# At this time, the ServeRAID clustering solution only works with
# RAID 1 setups. It does NOT support RAID 5. This is a firmware
# bug in the ServeRAID where it doesn't fail over correctly
# if the RAID5 array is in a critical state...
#
# Note that this script requires ServeRAID software version 6.10 or
# later. This software is now available from IBM.
#
# An example usage in /etc/ha.d/haresources:
# node1 10.0.0.170 ServeRAID::1::1
#
# Older ServeRAID utility returns 1 when it succeeds (weird)
# BUT - the newly released version is more normal...
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
srsuccess=0
SCSI="scsi "
usage() {
cat <<-EOF
usage: $0 (start|stop|status|monitor|validate-all|meta-data)
You have to set the following environment virables before running $0 :
OCF_RESKEY_serveraid
(Adapter number of the ServeRAID adapter)
OCF_RESKEY_mergegroup
(MergeGroup # of the logical drive under consideration)
ServeRAID adapters are numbered starting from 1.
The shared merge group number is a number between 1 and 8 inclusive.
It indicates to the controller which logical disk to fail over.
node1 10.0.0.170 ServeRAID::1::1
PREREQUISITES:
You must configure your ServeRAID adapters for clustering for this
to work.
To do this, you must use the bootable "ServeRAID Support CD" and right
click your controller and pick "configure for clustering". The Linux
version of the ServeRAID manager does not have the "configure for
clustering" option.
You will need at least version 6.10 (~July 2003 release) of the ipssend
command for this script to work.
EOF
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="ServeRAID">
<version>1.0</version>
<longdesc lang="en">
Resource script for ServeRAID. It enables/disables shared ServeRAID merge groups.
</longdesc>
<shortdesc lang="en">Enables and disables shared ServeRAID merge groups</shortdesc>
<parameters>
<parameter name="serveraid" unique="0" required="1">
<longdesc lang="en">
The adapter number of the ServeRAID adapter.
</longdesc>
<shortdesc lang="en">serveraid</shortdesc>
<content type="integer" default="" />
</parameter>
<parameter name="mergegroup" unique="0" required="1">
<longdesc lang="en">
The logical drive under consideration.
</longdesc>
<shortdesc lang="en">mergegroup</shortdesc>
<content type="integer" default="" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="40" />
<action name="stop" timeout="40" />
<action name="status" depth="0" timeout="20" interval="10" />
<action name="monitor" depth="0" timeout="20" interval="10" />
<action name="validate-all" timeout="5" />
<action name="meta-data" timeout="5" />
<action name="methods" timeout="5" />
</actions>
</resource-agent>
END
}
ServeRAID_methods() {
cat <<-!
start
stop
status
validate-all
methods
usage
meta-data
!
}
ServeRAIDSCSI="/proc/scsi/ips"
IPS=ipssend
proc_scsi=/proc/scsi/scsi
parseinst() {
sr_adapter=error
sr_mergegroup=error
hostid=error
sr_logicaldrivenumber=error
if
[ $# -ne 2 ]
then
ocf_log err "Invalid ServeRAID instance: $*"
exit $OCF_ERR_ARGS
fi
PerlScript='next unless /^Host/; $_ .= <>.<>; print "$1 " if /SERVERAID/ and /Proces/ and /scsi(\d+)/'
# Get the list of host ids of the ServeRAID host adapters
hostlist=`$PERL -ne "${PerlScript}" <$proc_scsi`
# Figure the host id of the desired ServeRAID adapter
hostid=`echo $hostlist | cut -d' ' -f$1`
if
[ ! -f "$ServeRAIDSCSI/$hostid" ]
then
ocf_log err "No such ServeRAID adapter: $1"
exit $OCF_ERR_ARGS
fi
case $2 in
[1-8]);;
*) ocf_log err "Invalid Shared Merge Group Number: $2"
exit $OCF_ERR_ARGS;;
esac
sr_adapter=$1
sr_mergegroup=$2
CheckRaidLevel
return $?
}
SRLogicalDriveConfig() {
$IPS getconfig $sr_adapter ld
}
MergeGroupToSCSI_ID() {
PerlScript="while (<>) {
/logical drive number *([0-9]+)/i && (\$ld=\$1);
/part of merge group *: *$sr_mergegroup *\$/i && print \$ld - 1, \"\n\";
}"
ID=`SRLogicalDriveConfig | $PERL -e "$PerlScript"`
case $ID in
[0-9]*) echo "$ID"; return 0;;
*) return 1;;
esac
}
MergeGroupRaidLevel() {
PerlScript="while (<>) {
/RAID level *: *([0-9]+[A-Za-z]*)/i && (\$ld=\$1);
/part of merge group *: *$sr_mergegroup *\$/i && print \$ld, \"\n\";
}"
Level=`SRLogicalDriveConfig | $PERL -e "$PerlScript"`
case $Level in
?*) echo "$Level"; return 0;;
*) return 1;;
esac
}
CheckRaidLevel() {
RAIDlevel=`MergeGroupRaidLevel`
case $RAIDlevel in
*5*)
ocf_log err "ServeRAID device $sr_adapter $sr_mergegroup is RAID level $RAIDlevel"
ocf_log err "This level of ServeRAID RAID is not supported for failover by the firmware."
exit $OCF_ERR_GENERIC;;
esac
return $OCF_SUCCESS
}
ReleaseSCSI() {
targetid=`MergeGroupToSCSI_ID`
echo "${SCSI}remove-single-device $hostid 0 $targetid 0" > $proc_scsi
}
AddSCSI() {
targetid=`MergeGroupToSCSI_ID`
echo "${SCSI}add-single-device $hostid 0 $targetid 0" > $proc_scsi
}
#
# start: Enable the given ServeRAID device
#
ServeRAID_start() {
if
ServeRAID_status $serveraid $mergegroup
then
ocf_log debug "ServeRAID merge group $serveraid $mergegroup is running."
return $OCF_SUCCESS
else
if
#
# Normally we do a MERGE PARTNER, but if we still own the drive for
# some reason, then we'll need to do a MERGE OWN instead...
#
out=`$IPS MERGE $sr_adapter $sr_mergegroup PARTNER 2>&1`
if
[ $? -eq $srsuccess ]
then
ocf_log info "$out"
else
ocf_run $IPS MERGE $sr_adapter $sr_mergegroup OWN
fi
then
: OK All is well!
targetid=`MergeGroupToSCSI_ID`
sr_logicaldrivenumber=`expr $targetid + 1`
#run $IPS SYNCH $sr_adapter $sr_logicaldrivenumber &
# This version of the SYNCH command requires the 6.10 or later
# ServeRAID support CD.
# To avoid issues when called by lrmd, redirect stdout->stderr.
# Use () to create a subshell to make the redirection be synchronized.
( ocf_run $IPS SYNCH $sr_adapter $sr_mergegroup & ) >&2
AddSCSI
else
return $OCF_ERR_GENERIC
fi
fi
if
ServeRAID_status "$@"
then
return $OCF_SUCCESS
else
ocf_log err "ServeRAID device $1 not active!"
exit $OCF_ERR_GENERIC
fi
}
#
# stop: Disable the given ServeRAID device
#
ServeRAID_stop() {
parseinst "$@"
ReleaseSCSI
if
ocf_run $IPS UNMERGE $sr_adapter $sr_mergegroup
then
: UNMERGE $sr_adapter $sr_mergegroup worked
fi
if
ServeRAID_status "$@"
then
ocf_log err "ServeRAID device $* is still active!"
return $OCF_ERR_GENERIC
else
return $OCF_SUCCESS
fi
}
#
# status: is the given device now available?
#
ServeRAID_status() {
parseinst "$@"
#
# The output we're looking for
# Part of merge group : 2
#
SRLogicalDriveConfig \
| grep -i "part of merge group[ ]*: *$sr_mergegroup *\$" >/dev/null
}
#
# validate_all: are the OCF instance parameters valid?
#
ServeRAID_validate_all() {
check_binary $PERL
# parseinst() will do all the work...
parseinst "$@"
return $?
}
if
( [ $# -ne 1 ] )
then
usage
exit $OCF_ERR_ARGS
fi
# These operations don't require OCF instance parameters to be set
case "$1" in
meta-data)
meta_data
exit $OCF_SUCCESS;;
#
# methods: What methods do we support?
#
methods)
ServeRAID_methods
exit $?;;
usage)
usage
exit $OCF_SUCCESS;;
*)
;;
esac
if
( [ -z "$OCF_RESKEY_serveraid" ] || [ -z "$OCF_RESKEY_mergegroup" ] )
then
ocf_log err "You have to set the OCF_RESKEY_serveraid and OCF_RESKEY_mergegroup\n
enviroment virables before running $0 !"
# usage
exit $OCF_ERR_GENERIC
fi
: Right Number of arguments..
serveraid=$OCF_RESKEY_serveraid
mergegroup=$OCF_RESKEY_mergegroup
# Look for the start, stop, status, or methods calls...
case "$1" in
stop)
ServeRAID_stop $serveraid $mergegroup
exit $?;;
start)
ServeRAID_start $serveraid $mergegroup
exit $?;;
status|monitor)
if
ServeRAID_status $serveraid $mergegroup
then
ocf_log debug "ServeRAID merge group $serveraid $mergegroup is running."
exit $OCF_SUCCESS
else
ocf_log debug "ServeRAID merge group $serveraid $mergegroup is stopped."
exit $OCF_NOT_RUNNING
fi
exit $?;;
validate-all)
ServeRAID_validate_all $serveraid $mergegroup
exit $?;;
*)
usage
exit $OCF_ERR_UNIMPLEMENTED;;
esac
diff --git a/heartbeat/VIPArip b/heartbeat/VIPArip
index 01c6c994f..12804dffb 100755
--- a/heartbeat/VIPArip
+++ b/heartbeat/VIPArip
@@ -1,302 +1,302 @@
#!/bin/sh
#
# License: GNU General Public License (GPL)
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
# Author: Huang Zhen <zhenhltc@cn.ibm.com>
# Copyright (c) 2006 International Business Machines
#
# Virtual IP Address by RIP2 protocol.
# This script manages IP alias in different subnet with quagga/ripd.
# It can add an IP alias, or remove one.
#
# The quagga package should be installed to run this RA
#
# usage: $0 {start|stop|status|monitor|validate-all|meta-data}
#
# The "start" arg adds an IP alias.
# Surprisingly, the "stop" arg removes one. :-)
#
# OCF parameters are as below
# OCF_RESKEY_ip The IP address in different subnet
# OCF_RESKEY_nic The nic for broadcast the route information
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
RIPDCONF=$HA_RSCTMP/VIPArip-ripd.conf
ZEBRA=/usr/sbin/zebra
RIPD=/usr/sbin/ripd
USAGE="usage: $0 {start|stop|status|monitor|validate-all|meta-data}";
#######################################################################
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="VIPArip">
<version>1.0</version>
<longdesc lang="en">
Virtual IP Address by RIP2 protocol.
This script manages IP alias in different subnet with quagga/ripd.
It can add an IP alias, or remove one.
</longdesc>
<shortdesc lang="en">Manages a virtual IP address through RIP2</shortdesc>
<parameters>
<parameter name="ip" unique="1" required="1">
<longdesc lang="en">
The IPv4 address in different subnet, for example "192.168.1.1".
</longdesc>
<shortdesc lang="en">The IP address in different subnet</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="nic" unique="0">
<longdesc lang="en">
The nic for broadcast the route information.
The ripd uses this nic to broadcast the route informaton to others
</longdesc>
<shortdesc lang="en">The nic for broadcast the route information</shortdesc>
<content type="string" default="eth0"/>
</parameter>
<parameter name="zebra_binary" unique="0">
<longdesc lang="en">
Absolute path to the zebra binary.
</longdesc>
<shortdesc lang="en">zebra binary</shortdesc>
<content type="string" default="$ZEBRA"/>
</parameter>
<parameter name="ripd_binary" unique="0">
<longdesc lang="en">
Absolute path to the ripd binary.
</longdesc>
<shortdesc lang="en">ripd binary</shortdesc>
<content type="string" default="$RIPD"/>
</parameter>
</parameters>
<actions>
<action name="start" timeout="20s" />
<action name="stop" timeout="20s" />
<action name="monitor" depth="0" timeout="20s" interval="5s" />
<action name="validate-all" timeout="20s" />
<action name="meta-data" timeout="5s" />
</actions>
</resource-agent>
END
exit $OCF_SUCCESS
}
usage() {
echo $USAGE >&2
}
new_config_file() {
echo new_config_file $1 $2 $3
cat >$RIPDCONF <<END
hostname ripd
password zebra
debug rip events
debug rip packet
debug rip zebra
log file /var/log/quagga/quagga.log
router rip
!nic_tag
no passive-interface $2
network $2
distribute-list private out $2
distribute-list private in $2
!metric_tag
redistribute connected metric $3
!ip_tag
access-list private permit $1/32
access-list private deny any
END
}
check_params() {
if [ x"$OCF_RESKEY_ip" = x ]
then
ocf_log err "ip is a required parameter"
exit $OCF_ERR_CONFIGURED
fi
}
set_metric() {
echo set_metric $1
sed "s/redistribute connected metric .*/redistribute connected metric $1/g" $RIPDCONF > $RIPDCONF.tmp
cp $RIPDCONF.tmp $RIPDCONF
}
add_ip() {
echo add_ip $1
sed "s/ip_tag/ip_tag\naccess-list private permit $1\/32/g" $RIPDCONF > $RIPDCONF.tmp
cp $RIPDCONF.tmp $RIPDCONF
}
del_ip() {
echo del_ip $1
sed "/$1/d" $RIPDCONF > $RIPDCONF.tmp
cp $RIPDCONF.tmp $RIPDCONF
if $GREP "access-list private permit" $RIPDCONF>/dev/null
then
echo some other IP is running
reload_config
else
stop_quagga
echo remove $RIPDCONF
rm $RIPDCONF
fi
}
add_nic() {
echo add_nic $1
if $GREP "network $1" $RIPDCONF >/dev/null
then
echo the nic is already in the config file
else
sed "s/nic_tag/nic_tag\n no passive-interface $1\n network $1\n distribute-list private out $1\n distribute-list private in $1/g" $RIPDCONF > $RIPDCONF.tmp
cp $RIPDCONF.tmp $RIPDCONF
fi
}
reload_config() {
echo reload_config
echo $RIPDCONF:
cat $RIPDCONF
echo killall -SIGHUP ripd
killall -SIGHUP ripd
}
start_quagga() {
echo start_quagga
echo $RIPDCONF:
cat $RIPDCONF
echo $ZEBRA -d
$ZEBRA -d
echo $RIPD -d -f $RIPDCONF
$RIPD -d -f $RIPDCONF
}
stop_quagga() {
echo stop_quagga
echo $RIPDCONF:
cat $RIPDCONF
echo killall -SIGTERM ripd
killall -SIGTERM ripd
echo killall -SIGTERM zebra
killall -SIGTERM zebra
}
start_rip_ip() {
echo start_rip_ip
check_params
if [ x"$OCF_RESKEY_nic" = x ]
then
echo OCF_RESKEY_nic is null, set to eth0
OCF_RESKEY_nic="eth0"
fi
status_rip_ip
case $? in
$OCF_SUCCESS)
ocf_log info "already running"
exit $OCF_SUCCESS
;;
$OCF_NOT_RUNNING)
;;
*)
ocf_log info "state undefined, stopping first"
stop_rip_ip
;;
esac
$IP2UTIL addr add $OCF_RESKEY_ip/32 dev lo
if [ -f "$RIPDCONF" ]
then
# there is a config file, add new data(IP,nic,metric)
# to the existing config file.
add_ip $OCF_RESKEY_ip
add_nic $OCF_RESKEY_nic
set_metric 1
reload_config
echo sleep 3
sleep 3
set_metric 3
reload_config
else
new_config_file $OCF_RESKEY_ip $OCF_RESKEY_nic 1
start_quagga
echo sleep 3
sleep 3
set_metric 3
reload_config
fi
return $OCF_SUCCESS
}
stop_rip_ip() {
echo stop_rip_ip
check_params
status_rip_ip
if [ $? = $OCF_NOT_RUNNING ]
then
exit $OCF_SUCCESS
fi
$IP2UTIL addr del $OCF_RESKEY_ip dev lo
echo sleep 2
sleep 2
del_ip $OCF_RESKEY_ip
return $OCF_SUCCESS
}
status_rip_ip() {
check_params
if $IP2UTIL addr | $GREP $OCF_RESKEY_ip >/dev/null
then
if $GREP $OCF_RESKEY_ip $RIPDCONF >/dev/null
then
if pidof ripd >/dev/null
then
return $OCF_SUCCESS
fi
fi
return $OCF_ERR_GENERIC
fi
return $OCF_NOT_RUNNING
}
if
[ $# -ne 1 ]
then
usage
exit $OCF_ERR_ARGS
fi
[ x != x"$OCF_RESKEY_zebra_binary" ] &&
ZEBRA=$OCF_RESKEY_zebra_binary
[ x != x"$OCF_RESKEY_ripd_binary" ] &&
RIPD=$OCF_RESKEY_ripd_binary
case $1 in
start) start_rip_ip;;
stop) stop_rip_ip;;
status) status_rip_ip;;
monitor) status_rip_ip;;
validate-all) check_binary $IP2UTIL
exit $OCF_SUCCESS;;
meta-data) meta_data;;
usage) usage; exit $OCF_SUCCESS;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
diff --git a/heartbeat/VirtualDomain b/heartbeat/VirtualDomain
index 188b7c3ab..0b614f5f3 100755
--- a/heartbeat/VirtualDomain
+++ b/heartbeat/VirtualDomain
@@ -1,842 +1,842 @@
#!/bin/sh
#
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
# License: GNU General Public License (GPL)
#
# Resource Agent for domains managed by the libvirt API.
# Requires a running libvirt daemon (libvirtd).
#
# (c) 2008-2010 Florian Haas, Dejan Muhamedagic,
# and Linux-HA contributors
#
# usage: $0 {start|stop|status|monitor|migrate_to|migrate_from|meta-data|validate-all}
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
# Defaults
OCF_RESKEY_migration_downtime_default=0
OCF_RESKEY_migration_speed_default=0
OCF_RESKEY_force_stop_default=0
OCF_RESKEY_autoset_utilization_cpu_default="true"
OCF_RESKEY_autoset_utilization_hv_memory_default="true"
OCF_RESKEY_migrateport_default=$(( 49152 + $(ocf_maybe_random) % 64 ))
OCF_RESKEY_CRM_meta_timeout_default=90000
OCF_RESKEY_save_config_on_stop_default=false
OCF_RESKEY_sync_config_on_stop_default=false
: ${OCF_RESKEY_migration_downtime=${OCF_RESKEY_migration_downtime_default}}
: ${OCF_RESKEY_migration_speed=${OCF_RESKEY_migration_speed_default}}
: ${OCF_RESKEY_force_stop=${OCF_RESKEY_force_stop_default}}
: ${OCF_RESKEY_autoset_utilization_cpu=${OCF_RESKEY_autoset_utilization_cpu_default}}
: ${OCF_RESKEY_autoset_utilization_hv_memory=${OCF_RESKEY_autoset_utilization_hv_memory_default}}
: ${OCF_RESKEY_migrateport=${OCF_RESKEY_migrateport_default}}
: ${OCF_RESKEY_CRM_meta_timeout=${OCF_RESKEY_CRM_meta_timeout_default}}
: ${OCF_RESKEY_save_config_on_stop=${OCF_RESKEY_save_config_on_stop_default}}
: ${OCF_RESKEY_sync_config_on_stop=${OCF_RESKEY_sync_config_on_stop_default}}
if ocf_is_true ${OCF_RESKEY_sync_config_on_stop}; then
OCF_RESKEY_save_config_on_stop="true"
fi
#######################################################################
## I'd very much suggest to make this RA use bash,
## and then use magic $SECONDS.
## But for now:
NOW=$(date +%s)
usage() {
echo "usage: $0 {start|stop|status|monitor|migrate_to|migrate_from|meta-data|validate-all}"
}
VirtualDomain_meta_data() {
cat <<EOF
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="VirtualDomain">
<version>1.1</version>
<longdesc lang="en">
Resource agent for a virtual domain (a.k.a. domU, virtual machine,
virtual environment etc., depending on context) managed by libvirtd.
</longdesc>
<shortdesc lang="en">Manages virtual domains through the libvirt virtualization framework</shortdesc>
<parameters>
<parameter name="config" unique="1" required="1">
<longdesc lang="en">
Absolute path to the libvirt configuration file,
for this virtual domain.
</longdesc>
<shortdesc lang="en">Virtual domain configuration file</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="hypervisor" unique="0" required="0">
<longdesc lang="en">
Hypervisor URI to connect to. See the libvirt documentation for
details on supported URI formats. The default is system dependent.
Determine the system's default uri by running 'virsh --quiet uri'.
</longdesc>
<shortdesc lang="en">Hypervisor URI</shortdesc>
<content type="string"/>
</parameter>
<parameter name="force_stop" unique="0" required="0">
<longdesc lang="en">
Always forcefully shut down ("destroy") the domain on stop. The default
behavior is to resort to a forceful shutdown only after a graceful
shutdown attempt has failed. You should only set this to true if
your virtual domain (or your virtualization backend) does not support
graceful shutdown.
</longdesc>
<shortdesc lang="en">Always force shutdown on stop</shortdesc>
<content type="boolean" default="${OCF_RESKEY_force_stop_default}" />
</parameter>
<parameter name="migration_transport" unique="0" required="0">
<longdesc lang="en">
Transport used to connect to the remote hypervisor while
migrating. Please refer to the libvirt documentation for details on
transports available. If this parameter is omitted, the resource will
use libvirt's default transport to connect to the remote hypervisor.
</longdesc>
<shortdesc lang="en">Remote hypervisor transport</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="migration_downtime" unique="0" required="0">
<longdesc lang="en">
Define max downtime during live migration in milliseconds
</longdesc>
<shortdesc lang="en">Live migration downtime</shortdesc>
<content type="integer" default="${OCF_RESKEY_migration_downtime_default}" />
</parameter>
<parameter name="migration_speed" unique="0" required="0">
<longdesc lang="en">
Define live migration speed per resource in MiB/s
</longdesc>
<shortdesc lang="en">Live migration speed</shortdesc>
<content type="integer" default="${OCF_RESKEY_migration_speed_default}" />
</parameter>
<parameter name="migration_network_suffix" unique="0" required="0">
<longdesc lang="en">
Use a dedicated migration network. The migration URI is composed by
adding this parameters value to the end of the node name. If the node
name happens to be an FQDN (as opposed to an unqualified host name),
insert the suffix immediately prior to the first period (.) in the FQDN.
At the moment Qemu/KVM and Xen migration via a dedicated network is supported.
Note: Be sure this composed host name is locally resolveable and the
associated IP is reachable through the favored network.
See also the migrate_options parameter below.
</longdesc>
<shortdesc lang="en">Migration network host name suffix</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="migrate_options" unique="0" required="0">
<longdesc lang="en">
Extra virsh options for the guest live migration. You can also specify
here --migrateuri if the calculated migrate URI is unsuitable for your
environment. If --migrateuri is set then migration_network_suffix
and migrateport are effectively ignored. Use "%n" as the placeholder
for the target node name.
Please refer to the libvirt documentation for details on guest
migration.
</longdesc>
<shortdesc lang="en">live migrate options</shortdesc>
<content type="string" />
</parameter>
<parameter name="monitor_scripts" unique="0" required="0">
<longdesc lang="en">
To additionally monitor services within the virtual domain, add this
parameter with a list of scripts to monitor.
Note: when monitor scripts are used, the start and migrate_from operations
will complete only when all monitor scripts have completed successfully.
Be sure to set the timeout of these operations to accommodate this delay.
</longdesc>
<shortdesc lang="en">space-separated list of monitor scripts</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="autoset_utilization_cpu" unique="0" required="0">
<longdesc lang="en">
If set true, the agent will detect the number of domainU's vCPUs from virsh, and put it
into the CPU utilization of the resource when the monitor is executed.
</longdesc>
<shortdesc lang="en">Enable auto-setting the CPU utilization of the resource</shortdesc>
<content type="boolean" default="true" />
</parameter>
<parameter name="autoset_utilization_hv_memory" unique="0" required="0">
<longdesc lang="en">
If set true, the agent will detect the number of *Max memory* from virsh, and put it
into the hv_memory utilization of the resource when the monitor is executed.
</longdesc>
<shortdesc lang="en">Enable auto-setting the hv_memory utilization of the resource</shortdesc>
<content type="boolean" default="true" />
</parameter>
<parameter name="migrateport" unique="0" required="0">
<longdesc lang="en">
This port will be used in the qemu migrateuri. If unset, the port will be a random highport.
</longdesc>
<shortdesc lang="en">Port for migrateuri</shortdesc>
<content type="integer" />
</parameter>
<parameter name="save_config_on_stop" unique="0" required="0">
<longdesc lang="en">
Changes to a running VM's config are normally lost on stop.
This parameter instructs the RA to save the configuration back to the xml file provided in the "config" parameter.
</longdesc>
<shortdesc lang="en">Save running VM's config back to its config file</shortdesc>
<content type="boolean" />
</parameter>
<parameter name="sync_config_on_stop" unique="0" required="0">
<longdesc lang="en">
Setting this automatically enables save_config_on_stop.
When enabled this parameter instructs the RA to
call csync2 -x to synchronize the file to all nodes.
csync2 must be properly set up for this to work.
</longdesc>
<shortdesc lang="en">Save running VM's config back to its config file</shortdesc>
<content type="boolean" />
</parameter>
<parameter name="snapshot">
<longdesc lang="en">
Path to the snapshot directory where the virtual machine image will be stored. When this
parameter is set, the virtual machine's RAM state will be saved to a file in the snapshot
directory when stopped. If on start a state file is present for the domain, the domain
will be restored to the same state it was in right before it stopped last. This option
is incompatible with the 'force_stop' option.
</longdesc>
<shortdesc lang="en">
Restore state on start/stop
</shortdesc>
<content type="string" default=""/>
</parameter>
</parameters>
<actions>
<action name="start" timeout="90" />
<action name="stop" timeout="90" />
<action name="status" depth="0" timeout="30" interval="10" />
<action name="monitor" depth="0" timeout="30" interval="10" />
<action name="migrate_from" timeout="60" />
<action name="migrate_to" timeout="120" />
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="5" />
</actions>
</resource-agent>
EOF
}
set_util_attr() {
local attr=$1 val=$2
local cval outp
cval=$(crm_resource -Q -r $OCF_RESOURCE_INSTANCE -z -g $attr 2>/dev/null)
if [ $? -ne 0 ] && [ -z "$cval" ]; then
crm_resource -Q -r $OCF_RESOURCE_INSTANCE -z -g $attr 2>&1 | grep -e "not connected" > /dev/null 2>&1
if [ $? -eq 0 ]; then
ocf_log debug "Unable to set utilization attribute, cib is not available"
return
fi
fi
if [ "$cval" != "$val" ]; then
outp=$(crm_resource -r $OCF_RESOURCE_INSTANCE -z -p $attr -v $val 2>&1) ||
ocf_log warn "crm_resource failed to set utilization attribute $attr: $outp"
fi
}
update_utilization() {
local dom_cpu dom_mem
if ocf_is_true "$OCF_RESKEY_autoset_utilization_cpu"; then
dom_cpu=$(LANG=C virsh $VIRSH_OPTIONS dominfo ${DOMAIN_NAME} 2>/dev/null | awk '/CPU\(s\)/{print $2}')
test -n "$dom_cpu" && set_util_attr cpu $dom_cpu
fi
if ocf_is_true "$OCF_RESKEY_autoset_utilization_hv_memory"; then
dom_mem=$(LANG=C virsh $VIRSH_OPTIONS dominfo ${DOMAIN_NAME} 2>/dev/null | awk '/Max memory/{printf("%d", $3/1024)}')
test -n "$dom_mem" && set_util_attr hv_memory "$dom_mem"
fi
}
get_emulator()
{
local emulator=""
emulator=$(virsh $VIRSH_OPTIONS dumpxml $DOMAIN_NAME 2>/dev/null | sed -n -e 's/^.*<emulator>\(.*\)<\/emulator>.*$/\1/p')
if [ -z "$emulator" ] && [ -e "$EMULATOR_STATE" ]; then
emulator=$(cat $EMULATOR_STATE)
fi
if [ -z "$emulator" ]; then
emulator=$(cat ${OCF_RESKEY_config} | sed -n -e 's/^.*<emulator>\(.*\)<\/emulator>.*$/\1/p')
fi
if [ -n "$emulator" ]; then
basename $emulator
fi
}
update_emulator_cache()
{
local emulator
emulator=$(get_emulator)
if [ -n "$emulator" ]; then
echo $emulator > $EMULATOR_STATE
fi
}
# attempt to check domain status outside of libvirt using the emulator process
pid_status()
{
local rc=$OCF_ERR_GENERIC
local emulator=$(get_emulator)
# An emulator is not required, so only report message in debug mode
local loglevel="debug"
if ocf_is_probe; then
loglevel="notice"
fi
case "$emulator" in
qemu-kvm|qemu-dm|qemu-system-*)
rc=$OCF_NOT_RUNNING
ps awx | grep -E "[q]emu-(kvm|dm|system).*-name $DOMAIN_NAME " > /dev/null 2>&1
if [ $? -eq 0 ]; then
rc=$OCF_SUCCESS
fi
;;
libvirt_lxc)
rc=$OCF_NOT_RUNNING
ps awx | grep -E "[l]ibvirt_lxc.*-name $DOMAIN_NAME " > /dev/null 2>&1
if [ $? -eq 0 ]; then
rc=$OCF_SUCCESS
fi
;;
# This can be expanded to check for additional emulators
*)
# We may be running xen with PV domains, they don't
# have an emulator set. try xl list or xen-lists
if have_binary xl; then
rc=$OCF_NOT_RUNNING
xl list $DOMAIN_NAME >/dev/null 2>&1
if [ $? -eq 0 ]; then
rc=$OCF_SUCCESS
fi
elif have_binary xen-list; then
rc=$OCF_NOT_RUNNING
xen-list $DOMAIN_NAME 2>/dev/null | grep -qs "State.*[-r][-b][-p]--" 2>/dev/null
if [ $? -eq 0 ]; then
rc=$OCF_SUCCESS
fi
else
ocf_log $loglevel "Unable to determine emulator for $DOMAIN_NAME"
fi
;;
esac
if [ $rc -eq $OCF_SUCCESS ]; then
ocf_log debug "Virtual domain $DOMAIN_NAME is currently running."
elif [ $rc -eq $OCF_NOT_RUNNING ]; then
ocf_log debug "Virtual domain $DOMAIN_NAME is currently not running."
fi
return $rc
}
VirtualDomain_status() {
local try=0
rc=$OCF_ERR_GENERIC
status="no state"
while [ "$status" = "no state" ]; do
try=$(($try + 1 ))
status=$(LANG=C virsh $VIRSH_OPTIONS domstate $DOMAIN_NAME 2>&1 | tr 'A-Z' 'a-z')
case "$status" in
*"error:"*"domain not found"|*"error:"*"failed to get domain"*|"shut off")
# shut off: domain is defined, but not started, will not happen if
# domain is created but not defined
# "Domain not found" or "failed to get domain": domain is not defined
# and thus not started
ocf_log debug "Virtual domain $DOMAIN_NAME is not running: $(echo $status | sed s/error://g)"
rc=$OCF_NOT_RUNNING
;;
running|paused|idle|blocked|"in shutdown")
# running: domain is currently actively consuming cycles
# paused: domain is paused (suspended)
# idle: domain is running but idle
# blocked: synonym for idle used by legacy Xen versions
# in shutdown: the domain is in process of shutting down, but has not completely shutdown or crashed.
ocf_log debug "Virtual domain $DOMAIN_NAME is currently $status."
rc=$OCF_SUCCESS
;;
""|*"failed to "*"connect to the hypervisor"*|"no state")
# Empty string may be returned when virsh does not
# receive a reply from libvirtd.
# "no state" may occur when the domain is currently
# being migrated (on the migration target only), or
# whenever virsh can't reliably obtain the domain
# state.
status="no state"
if [ "$__OCF_ACTION" = "stop" ] && [ $try -ge 3 ]; then
# During the stop operation, we want to bail out
# quickly, so as to be able to force-stop (destroy)
# the domain if necessary.
ocf_log error "Virtual domain $DOMAIN_NAME has no state during stop operation, bailing out."
return $OCF_ERR_GENERIC;
elif [ "$__OCF_ACTION" = "monitor" ]; then
pid_status
rc=$?
if [ $rc -ne $OCF_ERR_GENERIC ]; then
# we've successfully determined the domains status outside of libvirt
return $rc
fi
else
# During all other actions, we just wait and try
# again, relying on the CRM/LRM to time us out if
# this takes too long.
ocf_log info "Virtual domain $DOMAIN_NAME currently has no state, retrying."
fi
sleep 1
;;
*)
# any other output is unexpected.
ocf_log error "Virtual domain $DOMAIN_NAME has unknown status \"$status\"!"
sleep 1
;;
esac
done
return $rc
}
# virsh undefine removes configuration files if they are in
# directories which are managed by libvirt. such directories
# include also subdirectories of /etc (for instance
# /etc/libvirt/*) which may be surprising. VirtualDomain didn't
# include the undefine call before, hence this wasn't an issue
# before.
#
# There seems to be no way to find out which directories are
# managed by libvirt.
#
verify_undefined() {
local tmpf
if virsh --connect=${OCF_RESKEY_hypervisor} list --all --name 2>/dev/null | grep -wqs "$DOMAIN_NAME"
then
tmpf=$(mktemp -t vmcfgsave.XXXXXX)
if [ ! -r "$tmpf" ]; then
ocf_log warn "unable to create temp file, disk full?"
# we must undefine the domain
virsh $VIRSH_OPTIONS undefine $DOMAIN_NAME > /dev/null 2>&1
else
cp -p $OCF_RESKEY_config $tmpf
virsh $VIRSH_OPTIONS undefine $DOMAIN_NAME > /dev/null 2>&1
[ -f $OCF_RESKEY_config ] || cp -f $tmpf $OCF_RESKEY_config
rm -f $tmpf
fi
fi
}
VirtualDomain_start() {
local snapshotimage
if VirtualDomain_status; then
ocf_log info "Virtual domain $DOMAIN_NAME already running."
return $OCF_SUCCESS
fi
snapshotimage="$OCF_RESKEY_snapshot/${DOMAIN_NAME}.state"
if [ -n "$OCF_RESKEY_snapshot" -a -f "$snapshotimage" ]; then
virsh restore $snapshotimage
if [ $? -eq 0 ]; then
rm -f $snapshotimage
return $OCF_SUCCESS
fi
ocf_exit_reason "Failed to restore ${DOMAIN_NAME} from state file in ${OCF_RESKEY_snapshot} directory."
return $OCF_ERR_GENERIC
fi
# Make sure domain is undefined before creating.
# The 'create' command guarantees that the domain will be
# undefined on shutdown, but requires the domain to be undefined.
# if a user defines the domain
# outside of this agent, we have to ensure that the domain
# is restored to an 'undefined' state before creating.
verify_undefined
virsh $VIRSH_OPTIONS create ${OCF_RESKEY_config}
rc=$?
if [ $rc -ne 0 ]; then
ocf_exit_reason "Failed to start virtual domain ${DOMAIN_NAME}."
return $OCF_ERR_GENERIC
fi
while ! VirtualDomain_monitor; do
sleep 1
done
return $OCF_SUCCESS
}
force_stop()
{
local out ex translate
local status=0
ocf_log info "Issuing forced shutdown (destroy) request for domain ${DOMAIN_NAME}."
out=$(LANG=C virsh $VIRSH_OPTIONS destroy ${DOMAIN_NAME} 2>&1)
ex=$?
translate=$(echo $out|tr 'A-Z' 'a-z')
echo >&2 "$translate"
case $ex$translate in
*"error:"*"domain is not running"*|*"error:"*"domain not found"*|\
*"error:"*"failed to get domain"*)
: ;; # unexpected path to the intended outcome, all is well
[!0]*)
ocf_exit_reason "forced stop failed"
return $OCF_ERR_GENERIC ;;
0*)
while [ $status != $OCF_NOT_RUNNING ]; do
VirtualDomain_status
status=$?
done ;;
esac
return $OCF_SUCCESS
}
sync_config(){
ocf_log info "Syncing $DOMAIN_NAME config file with csync2 -x ${OCF_RESKEY_config}"
if ! csync2 -x ${OCF_RESKEY_config}; then
ocf_log warn "Syncing ${OCF_RESKEY_config} failed.";
fi
}
save_config(){
CFGTMP=$(mktemp -t vmcfgsave.XXX)
virsh $VIRSH_OPTIONS dumpxml --inactive --security-info ${DOMAIN_NAME} > ${CFGTMP}
if [ -s ${CFGTMP} ]; then
if ! cmp -s ${CFGTMP} ${OCF_RESKEY_config}; then
if virt-xml-validate ${CFGTMP} domain 2>/dev/null ; then
ocf_log info "Saving domain $DOMAIN_NAME to ${OCF_RESKEY_config}. Please make sure it's present on all nodes or sync_config_on_stop is on."
if cat ${CFGTMP} > ${OCF_RESKEY_config} ; then
ocf_log info "Saved $DOMAIN_NAME domain's configuration to ${OCF_RESKEY_config}."
if ocf_is_true "$OCF_RESKEY_sync_config_on_stop"; then
sync_config
fi
else
ocf_log warn "Moving ${CFGTMP} to ${OCF_RESKEY_config} failed."
fi
else
ocf_log warn "Domain $DOMAIN_NAME config failed to validate after dump. Skipping config update."
fi
fi
else
ocf_log warn "Domain $DOMAIN_NAME config has 0 size. Skipping config update."
fi
rm -f ${CFGTMP}
}
VirtualDomain_stop() {
local i
local status
local shutdown_timeout
local needshutdown=1
VirtualDomain_status
status=$?
case $status in
$OCF_SUCCESS)
if ocf_is_true $OCF_RESKEY_force_stop; then
# if force stop, don't bother attempting graceful shutdown.
force_stop
return $?
fi
ocf_log info "Issuing graceful shutdown request for domain ${DOMAIN_NAME}."
if [ -n "$OCF_RESKEY_snapshot" ]; then
virsh save $DOMAIN_NAME "$OCF_RESKEY_snapshot/${DOMAIN_NAME}.state"
if [ $? -eq 0 ]; then
needshutdown=0
else
ocf_log error "Failed to save snapshot state of ${DOMAIN_NAME} on stop"
fi
fi
# save config if needed
if ocf_is_true "$OCF_RESKEY_save_config_on_stop"; then
save_config
fi
# issue the shutdown if save state didn't shutdown for us
if [ $needshutdown -eq 1 ]; then
# Issue a graceful shutdown request
virsh $VIRSH_OPTIONS shutdown ${DOMAIN_NAME}
fi
# The "shutdown_timeout" we use here is the operation
# timeout specified in the CIB, minus 5 seconds
shutdown_timeout=$(( $NOW + ($OCF_RESKEY_CRM_meta_timeout/1000) -5 ))
# Loop on status until we reach $shutdown_timeout
while [ $NOW -lt $shutdown_timeout ]; do
VirtualDomain_status
status=$?
case $status in
$OCF_NOT_RUNNING)
# This was a graceful shutdown.
return $OCF_SUCCESS
;;
$OCF_SUCCESS)
# Domain is still running, keep
# waiting (until shutdown_timeout
# expires)
sleep 1
;;
*)
# Something went wrong. Bail out and
# resort to forced stop (destroy).
break;
esac
NOW=$(date +%s)
done
;;
$OCF_NOT_RUNNING)
ocf_log info "Domain $DOMAIN_NAME already stopped."
return $OCF_SUCCESS
esac
# OK. Now if the above graceful shutdown hasn't worked, kill
# off the domain with destroy. If that too does not work,
# have the LRM time us out.
force_stop
}
mk_migrateuri() {
local target_node
local migrate_target
local hypervisor
target_node="$OCF_RESKEY_CRM_meta_migrate_target"
# A typical migration URI via a special migration network looks
# like "tcp://bar-mig:49152". The port would be randomly chosen
# by libvirt from the range 49152-49215 if omitted, at least since
# version 0.7.4 ...
if [ -n "${OCF_RESKEY_migration_network_suffix}" ]; then
hypervisor="${OCF_RESKEY_hypervisor%%[+:]*}"
# Hostname might be a FQDN
migrate_target=$(echo ${target_node} | sed -e "s,^\([^.]\+\),\1${OCF_RESKEY_migration_network_suffix},")
case $hypervisor in
qemu)
# For quiet ancient libvirt versions a migration port is needed
# and the URI must not contain the "//". Newer versions can handle
# the "bad" URI.
echo "tcp:${migrate_target}:${OCF_RESKEY_migrateport}"
;;
xen)
echo "xenmigr://${migrate_target}"
;;
*)
ocf_log warn "$DOMAIN_NAME: Migration via dedicated network currently not supported for ${hypervisor}."
;;
esac
fi
}
VirtualDomain_migrate_to() {
local rc
local target_node
local remoteuri
local transport_suffix
local migrateuri
local migrate_opts
local migrate_pid
target_node="$OCF_RESKEY_CRM_meta_migrate_target"
if VirtualDomain_status; then
# Find out the remote hypervisor to connect to. That is, turn
# something like "qemu://foo:9999/system" into
# "qemu+tcp://bar:9999/system"
if [ -n "${OCF_RESKEY_migration_transport}" ]; then
transport_suffix="+${OCF_RESKEY_migration_transport}"
fi
# User defined migrateuri or do we make one?
migrate_opts="$OCF_RESKEY_migrate_options"
if echo "$migrate_opts" | fgrep -qs -- "--migrateuri="; then
migrateuri=`echo "$migrate_opts" |
sed "s/.*--migrateuri=\([^ ]*\).*/\1/;s/%n/$target_node/g"`
migrate_opts=`echo "$migrate_opts" |
sed "s/\(.*\)--migrateuri=[^ ]*\(.*\)/\1\2/"`
else
migrateuri=`mk_migrateuri`
fi
# Scared of that sed expression? So am I. :-)
remoteuri=$(echo ${OCF_RESKEY_hypervisor} | sed -e "s,\(.*\)://[^/:]*\(:\?[0-9]*\)/\(.*\),\1${transport_suffix}://${target_node}\2/\3,")
# save config if needed
if ocf_is_true "$OCF_RESKEY_save_config_on_stop"; then
save_config
fi
# Live migration speed limit
if [ ${OCF_RESKEY_migration_speed} -ne 0 ]; then
ocf_log info "$DOMAIN_NAME: Setting live migration speed limit for $DOMAIN_NAME (using: virsh ${VIRSH_OPTIONS} migrate-setspeed $DOMAIN_NAME ${OCF_RESKEY_migration_speed})."
virsh ${VIRSH_OPTIONS} migrate-setspeed $DOMAIN_NAME ${OCF_RESKEY_migration_speed}
fi
# OK, we know where to connect to. Now do the actual migration.
ocf_log info "$DOMAIN_NAME: Starting live migration to ${target_node} (using: virsh ${VIRSH_OPTIONS} migrate --live $migrate_opts $DOMAIN_NAME $remoteuri $migrateuri)."
virsh ${VIRSH_OPTIONS} migrate --live $migrate_opts $DOMAIN_NAME $remoteuri $migrateuri &
migrate_pid=${!}
# Live migration downtime interval
# Note: You can set downtime only while live migration is in progress
if [ ${OCF_RESKEY_migration_downtime} -ne 0 ]; then
sleep 2
ocf_log info "$DOMAIN_NAME: Setting live migration downtime for $DOMAIN_NAME (using: virsh ${VIRSH_OPTIONS} migrate-setmaxdowntime $DOMAIN_NAME ${OCF_RESKEY_migration_downtime})."
virsh ${VIRSH_OPTIONS} migrate-setmaxdowntime $DOMAIN_NAME ${OCF_RESKEY_migration_downtime}
fi
wait ${migrate_pid}
rc=$?
if [ $rc -ne 0 ]; then
ocf_exit_reason "$DOMAIN_NAME: live migration to ${target_node} failed: $rc"
return $OCF_ERR_GENERIC
else
ocf_log info "$DOMAIN_NAME: live migration to ${target_node} succeeded."
return $OCF_SUCCESS
fi
else
ocf_exit_reason "$DOMAIN_NAME: migrate_to: Not active locally!"
return $OCF_ERR_GENERIC
fi
}
VirtualDomain_migrate_from() {
while ! VirtualDomain_monitor; do
sleep 1
done
ocf_log info "$DOMAIN_NAME: live migration from ${OCF_RESKEY_CRM_meta_migrate_source} succeeded."
# save config if needed
if ocf_is_true "$OCF_RESKEY_save_config_on_stop"; then
save_config
fi
return $OCF_SUCCESS
}
VirtualDomain_monitor() {
# First, check the domain status. If that returns anything other
# than $OCF_SUCCESS, something is definitely wrong.
VirtualDomain_status
rc=$?
if [ ${rc} -eq ${OCF_SUCCESS} ]; then
# OK, the generic status check turned out fine. Now, if we
# have monitor scripts defined, run them one after another.
for script in ${OCF_RESKEY_monitor_scripts}; do
script_output="$($script 2>&1)"
script_rc=$?
if [ ${script_rc} -ne ${OCF_SUCCESS} ]; then
# A monitor script returned a non-success exit
# code. Stop iterating over the list of scripts, log a
# warning message, and propagate $OCF_ERR_GENERIC.
ocf_exit_reason "Monitor command \"${script}\" for domain ${DOMAIN_NAME} returned ${script_rc} with output: ${script_output}"
rc=$OCF_ERR_GENERIC
break
else
ocf_log debug "Monitor command \"${script}\" for domain ${DOMAIN_NAME} completed successfully with output: ${script_output}"
fi
done
fi
update_emulator_cache
update_utilization
# Save configuration on monitor as well, so we will have a better chance of
# having fresh and up to date config files on all nodes.
if ocf_is_true "$OCF_RESKEY_save_config_on_stop"; then
save_config
fi
return ${rc}
}
VirtualDomain_validate_all() {
if ocf_is_true $OCF_RESKEY_force_stop && [ -n "$OCF_RESKEY_snapshot" ]; then
ocf_exit_reason "The 'force_stop' and 'snapshot' options can not be used together."
return $OCF_ERR_CONFIGURED
fi
# check if we can read the config file (otherwise we're unable to
# deduce $DOMAIN_NAME from it, see below)
if [ ! -r $OCF_RESKEY_config ]; then
if ocf_is_probe; then
ocf_log info "Configuration file $OCF_RESKEY_config not readable during probe."
elif [ "$__OCF_ACTION" = "stop" ]; then
ocf_log info "Configuration file $OCF_RESKEY_config not readable, resource considered stopped."
else
ocf_exit_reason "Configuration file $OCF_RESKEY_config does not exist or not readable."
fi
return $OCF_ERR_INSTALLED
fi
if [ -z $DOMAIN_NAME ]; then
ocf_exit_reason "Unable to determine domain name."
return $OCF_ERR_INSTALLED
fi
# Check if csync2 is available when config tells us we might need it.
if ocf_is_true $OCF_RESKEY_sync_config_on_stop; then
check_binary csync2
fi
# Check if migration_speed is a decimal value
if ! ocf_is_decimal ${OCF_RESKEY_migration_speed}; then
ocf_exit_reason "migration_speed has to be a decimal value"
return $OCF_ERR_CONFIGURED
fi
# Check if migration_downtime is a decimal value
if ! ocf_is_decimal ${OCF_RESKEY_migration_downtime}; then
ocf_exit_reason "migration_downtime has to be a decimal value"
return $OCF_ERR_CONFIGURED
fi
}
VirtualDomain_getconfig() {
# Grab the virsh uri default, but only if hypervisor isn't set
: ${OCF_RESKEY_hypervisor=$(virsh --quiet uri 2>/dev/null)}
# Set options to be passed to virsh:
VIRSH_OPTIONS="--connect=${OCF_RESKEY_hypervisor} --quiet"
# Retrieve the domain name from the xml file.
DOMAIN_NAME=`egrep '[[:space:]]*<name>.*</name>[[:space:]]*$' ${OCF_RESKEY_config} 2>/dev/null | sed -e 's/[[:space:]]*<name>\(.*\)<\/name>[[:space:]]*$/\1/'`
EMULATOR_STATE="${HA_RSCTMP}/VirtualDomain-${DOMAIN_NAME}-emu.state"
}
OCF_REQUIRED_PARAMS="config"
OCF_REQUIRED_BINARIES="virsh sed"
ocf_rarun $*
diff --git a/heartbeat/WAS b/heartbeat/WAS
index a46cdd9be..3c7469328 100755
--- a/heartbeat/WAS
+++ b/heartbeat/WAS
@@ -1,572 +1,572 @@
#!/bin/sh
#
#
# WAS
#
# Description: Manages a Websphere Application Server as an HA resource
#
#
# Author: Alan Robertson
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
# License: GNU General Public License (GPL)
# Copyright: (C) 2002 - 2005 International Business Machines, Inc.
#
#
# An example usage in /etc/ha.d/haresources:
# node1 10.0.0.170 WAS::/opt/WebSphere/ApplicationServer/config/server-cfg.xml
#
# See usage() function below for more details...
#
# OCF parameters are as below:
# OCF_RESKEY_config
# (WAS-configuration file, used for the single server edition of WAS)
# OCF_RESKEY_port
# (WAS-<snoop>-port-number, used for the advanced edition of WAS)
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
WASDIR=/opt/WebSphere/AppServer
if
[ ! -d $WASDIR ]
then
WASDIR=/usr/WebSphere/AppServer
fi
STARTTIME=300 # 5 minutes
DEFAULT_WASPORTS="9080"
#
#
WASBIN=$WASDIR/bin
DEFAULT=$WASDIR/config/server-cfg.xml
#
# Print usage message
#
usage() {
methods=`WAS_methods | grep -v methods`
methods=`echo $methods | tr ' ' '|'`
cat <<-END
usage: $0 ($methods)
For the single server edition of WAS, you have to set the following
enviroment virable:
OCF_RESKEY_config
(WAS-configuration file)
For the advanced edition of WAS, you have to set the following
enviroment virable:
OCF_RESKEY_port
(WAS-<snoop>-port-number)
$0 manages a Websphere Application Server (WAS) as an HA resource
The 'start' operation starts WAS.
The 'stop' operation stops WAS.
The 'status' operation reports whether WAS is running
The 'monitor' operation reports whether the WAS seems to be working
(httpd also needs to be working for this case)
The 'validate-all' operation reports whether the OCF instance parameter (OCF_RESKEY_config or OCF_RESKEY_port) is valid
The 'methods' operation reports on the methods $0 supports
This is known to work with the Single Server edition of Websphere,
and is believed to work with the Advanced edition too.
Since the Advanced Edition has no configuration file (it's in a the
database) you need to give a port number instead of a
configuration file for this config parameter.
The default configuration file for the single server edition is:
$DEFAULT
The default snoop-port for the advanced edition is: $DEFAULT_WASPORTS
The start and stop operations must be run as root.
The status operation will report a pid of "-" for the
WAS root process using unless it is run as root.
If you don't have xmllint on your system, parsing of WAS
configuration files is very primitive.
In this case, the port specification we need from the XML
config file has to be on the same line as the
first part of the <transports/> tag.
We run servlet/snoop on the first transport port listed in
the config file for the "monitor" operation.
END
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="WAS">
<version>1.0</version>
<longdesc lang="en">
Resource script for WAS. It manages a Websphere Application Server (WAS) as
an HA resource.
</longdesc>
<shortdesc lang="en">Manages a WebSphere Application Server instance</shortdesc>
<parameters>
<parameter name="config" unique="0" required="0">
<longdesc lang="en">
The WAS-configuration file.
</longdesc>
<shortdesc lang="en">configration file</shortdesc>
<content type="string" default="$DEFAULT" />
</parameter>
<parameter name="port" unique="0">
<longdesc lang="en">
The WAS-(snoop)-port-number.
</longdesc>
<shortdesc lang="en">port</shortdesc>
<content type="integer" default="$DEFAULT_WASPORTS" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="300" />
<action name="stop" timeout="300" />
<action name="status" depth="0" timeout="30" interval="10" />
<action name="monitor" depth="0" timeout="30" interval="10" />
<action name="validate-all" timeout="5" />
<action name="meta-data" timeout="5" />
<action name="methods" timeout="5" />
</actions>
</resource-agent>
END
}
#
# Reformat the XML document in a sort of canonical form
# if we can. If we don't have xmllint, we just cat it out
# and hope for the best ;-)
#
xmlcat() {
if
[ "X$XMLcat" = X ]
then
XMLcat=`which xmllint 2>/dev/null`
if
[ "X${XMLcat}" = X -o ! -x "${XMLcat}" ]
then
XMLcat=cat
else
XMLcat="$XMLcat --recover --format"
fi
fi
for j in "$@"
do
${XMLcat} "$j"
done
}
#
#This is a bit skanky, but it works anyway...
#
#<transports xmi:type="applicationserver:HTTPTransport" xmi:id="HttpTransport_1" hostname="*" port="9080"/>
#<transports xmi:type="applicationserver:HTTPTransport" xmi:id="HttpTransport_2" hostname="*" port="9443" sslEnabled="true"/>
#<transports xmi:type="applicationserver:HTTPTransport" xmi:id="HttpTransport_3" hostname="*" port="9090" external="false"/>
#
# It's not really skanky if we can find xmllint on the system, because it
# reformats tags so they are all on one line, which is all we we need...
#
#
# Get the numbers of the ports WAS should be listening on...
#
# If we don't have xmllint around, then the applicationserver and the
# port= specification have to be on the same line in the XML config file.
#
GetWASPorts() {
case $1 in
[0-9]*) echo "$1" | tr ',' '\012';;
*)
xmlcat $1 | grep -i 'transports.*applicationserver:HTTPTransport' |
grep port= |
sed -e 's%.*port= *"* *%%' \
-e 's%[^0-9][^0-9]*.*$%%'
# Delete up to port=, throw away optional quote and optional
# white space.
# Throw away everything after the first non-digit.
# This should leave us the port number all by itself...
esac
}
#
# We assume that the first port listed in the <transports/>
# is the one we should run servlet/snoop on.
#
GetWASSnoopPort() {
GetWASPorts "$@" | head -n1
}
#
# Return information on the processname/id for the WAS ports
#
# pid/java is the expected output. Several lines, one per port...
#
#
WASPortInfo() {
pat=""
once=yes
PortCount=0
for j in $*
do
case $pat in
"") pat="$j";;
*) pat="$pat|$j";;
esac
PortCount=`expr $PortCount + 1`
done
netstat -ltnp 2>/dev/null| egrep -i "($pat) .*LISTEN" | sed 's%.*LISTEN *%%'
}
#
# Return the number of WAS ports which are open
#
CheckWASPortsInUse() {
count=`WASPortInfo "$@" | wc -l`
echo $count
}
#
# Return the pid(s) of the processes that have WAS ports open
#
WASPIDs() {
WASPortInfo "$@" | sort -u | cut -f1 -d/
}
#
# The version of ps that returns all processes and their (long) args
# It's only used by WAS_procs, which isn't used for anything ;-)
#
ps_long() {
ps axww
}
#
# The total set of WAS processes (single server only)
#
WAS_procs() {
ps_long | grep -i "config=$1" | grep -i java | cut -d' ' -f1
}
#
# methods: What methods/operations do we support?
#
WAS_methods() {
cat <<-!
start
stop
status
methods
validate-all
meta-data
usage
!
if
have_binary $WGET
then
echo monitor
fi
}
#
# Return WAS status (silently)
#
WAS_status() {
WASPorts=`GetWASPorts $1`
PortsInUse=`CheckWASPortsInUse $WASPorts`
case $PortsInUse in
0) false;;
*) true;;
esac
}
#
# Report on WAS status to stdout...
#
WAS_report_status() {
WASPorts=`GetWASPorts $1`
PortCount=`echo $WASPorts | wc -w`
PortCount=`echo $PortCount`
PortsInUse=`CheckWASPortsInUse $WASPorts`
case $PortsInUse in
0) ocf_log debug "WAS: server $1 is stopped."; return $OCF_NOT_RUNNING;;
*)
pids=`WASPIDs $WASPorts`
if
[ $PortsInUse -ge $PortCount ]
then
ocf_log debug "WAS: server $1 is running (pid" $pids "et al)."
else
ocf_log debug "WAS: server $1 is running (pid $pids et al) but not listening on all ports."
fi
return $OCF_SUCCESS;;
esac
}
#
# Monitor WAS - does it really seem to be working?
#
# For this we invoke the snoop applet via wget.
#
# This is actually faster than WAS_status above...
#
WAS_monitor() {
trap '[ -z "$tmpfile" ] || rmtempfile "$tmpfile"' 0
tmpfile=`maketempfile` || return 1
SnoopPort=`GetWASSnoopPort $1`
output=`$WGET -nv -O$tmpfile http://localhost:$SnoopPort/servlet/snoop 2>&1`
rc=$?
if
[ $rc -eq 0 ]
then
if
grep -i 'user-agent.*Wget' $tmpfile >/dev/null
then
: OK
else
ocf_log "err" "WAS: $1: no user-agent from snoop application"
rc=$OCF_ERR_GENERIC
fi
else
ocf_log "err" "WAS: $1: wget failure: $output"
rc=$OCF_ERR_GENERIC
fi
return $rc
}
#
# Start WAS instance
#
WAS_start() {
# Launch Arguments:
#
# -configFile <configFile>
# -nodeName <nodeName>
# -serverName <serverName>
# -oltEnabled
# -oltHost <hostname>
# -oltPort <port>
# -debugEnabled
# -jdwpPort <port>
# -debugSource <sourcePath>
# -serverTrace <traceString>
# -serverTraceFile <traceFile>
# -script [<scriptFile>]
# -platform <platformName>
# -noExecute
# -help
if
[ -x $WASBIN/startServer.sh ]
then
cmd="$WASBIN/startServer.sh -configFile $1"
else
cmd="$WASBIN/startupServer.sh"
fi
if
ocf_run $cmd
then
if
WAS_wait_4_start $STARTTIME "$@"
then
#true
return $OCF_SUCCESS
else
ocf_log "err" "WAS server $1 did not start correctly"
return $OCF_ERR_GENERIC
fi
else
#false
return $OCF_ERR_GENERIC
fi
}
#
# Wait for WAS to actually start up.
#
# It seems to take between 30 and 60 seconds for it to
# start up on a trivial WAS instance.
#
WAS_wait_4_start() {
max=$1
retries=0
shift
while
[ $retries -lt $max ]
do
if
WAS_status "$@"
then
return $OCF_SUCCESS
else
sleep 1
fi
retries=`expr $retries + 1`
done
WAS_status "$@"
}
#
# Shut down WAS
#
WAS_stop() {
# They don't return good return codes...
# And, they seem to allow anyone to stop WAS (!)
if
[ -x $WASBIN/stopServer.sh ]
then
ocf_run $WASBIN/stopServer.sh -configFile $1
else
WASPorts=`GetWASPorts $1`
kill `WASPIDs $WASPorts`
fi
if
WAS_status $1
then
ocf_log "err" "WAS: $1 did not stop correctly"
#false
return $OCF_ERR_GENERIC
else
#true
return $OCF_SUCCESS
fi
}
#
# Check if the port is valid
#
CheckPort() {
ocf_is_decimal "$1" && [ $1 -gt 0 ]
}
WAS_validate_all() {
if [ -x $WASBIN/startServer.sh ]; then
# $arg should be config file
if [ ! -f "$arg" ]; then
ocf_log err "Configuration file [$arg] does not exist"
exit $OCF_ERR_ARGS
fi
# $arg should specify a valid port number at the very least
local WASPorts=`GetWASPorts $arg`
if [ -z "$WASPorts" ]; then
ocf_log err "No port number specified in configuration file [$arg]"
exit $OCF_ERR_CONFIGURED
fi
local port
local have_valid_port=false
for port in $WASPorts; do
if CheckPort $port; then
have_valid_port=true
break
fi
done
if [ "false" = "$have_valid_port" ]; then
ocf_log err "No valid port number specified in configuration file [$arg]"
exit $OCF_ERR_CONFIGURED
fi
elif [ -x $WASBIN/startupServer.sh ]; then
# $arg should be port number
if CheckPort "$arg"; then
ocf_log err "Port number is required but [$arg] is not valid port number"
exit $OCF_ERR_ARGS
fi
else
# Do not know hot to validate_all
ocf_log warn "Do not know how to validate-all, assuming validation OK"
return $OCF_SUCCESS
fi
}
#
# 'main' starts here...
#
if
( [ $# -ne 1 ] )
then
usage
exit $OCF_ERR_ARGS
fi
#
# Supply default configuration parameter(s)
#
if
( [ -z $OCF_RESKEY_config ] && [ -z $OCF_RESKEY_port ] )
then
if
[ -f $DEFAULT ]
then
arg=$DEFAULT
else
arg=$DEFAULT_WASPORTS
fi
elif
[ ! -z $OCF_RESKEY_config ]
then
arg=$OCF_RESKEY_config
else
arg=$OCF_RESKEY_port
fi
if
[ ! -f $arg ]
then
case $arg in
[0-9]*) ;; # ignore port numbers...
*) ocf_log "err" "WAS configuration file $arg does not exist!"
usage
exit $OCF_ERR_ARGS;;
esac
fi
# What kind of method was invoked?
case "$1" in
meta-data) meta_data
exit $OCF_SUCCESS;;
start) WAS_start $arg
exit $?;;
stop) WAS_stop $arg
exit $?;;
status) WAS_report_status $arg
exit $?;;
monitor) WAS_monitor $arg
exit $?;;
validate-all) WAS_validate_all $arg
exit $?;;
methods) WAS_methods
exit $?;;
usage) usage
exit $OCF_SUCCESS;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED;;
esac
diff --git a/heartbeat/WAS6 b/heartbeat/WAS6
index 560ae602f..b3e7e2245 100755
--- a/heartbeat/WAS6
+++ b/heartbeat/WAS6
@@ -1,546 +1,546 @@
#!/bin/sh
# WAS6
#
# Description: Manages a Websphere Application Server as an HA resource
#
#
# Author: Ru Xiang Min
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
# License: GNU General Public License (GPL)
# Copyright: (C) 2006 International Business Machines China, Ltd., Inc.
#
#
# An example usage in /etc/ha.d/haresources:
# node1 10.0.0.170 WAS::/opt/IBM/WebSphere/AppServer/profiles/default/config/cells/Node01Cell/nodes/Node01/serverindex.xml
#
# See usage() function below for more details...
#
# OCF parameters are as below:
# OCF_RESKEY_profile
# (WAS profile name, used for the single server edition of WAS6)
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
WAS_DIR=/opt/IBM/WebSphere/AppServer
if
[ ! -d $WAS_DIR ]
then
WAS_DIR=/usr/IBM/WebSphere/AppServer
fi
STARTTIME=300 # 5 minutes
DEFAULT_WASPORTS="9080"
#
#
WAS_BIN=$WAS_DIR/bin
DEFAULT=default
#
# Print usage message
#
usage() {
methods=`WAS_methods | grep -v methods`
methods=`echo $methods | tr ' ' '|'`
cat <<-END
usage: $0 ($methods)
For the single server edition of WAS6, you have to set the following
enviroment virable:
OCF_RESKEY_profile
(WAS profile name)
$0 manages a Websphere Application Server 6(WAS6) as an HA resource
The 'start' operation starts WAS6.
The 'stop' operation stops WAS6.
The 'status' operation reports whether WAS6 is running
The 'monitor' operation reports whether the WAS6 seems to be working
(httpd also needs to be working for this case)
The 'validate-all' operation reports whether the OCF instance parameter (OCF_RESKEY_profileName ) is valid
The 'methods' operation reports on the methods $0 supports
This is known to work with the Single Server edition of Websphere.
The default profile name for the single server edition is:
$DEFAULT
The start and stop operations must be run as root.
The status operation will report a pid of "-" for the
WAS root process using unless it is run as root.
If you don't have xmllint on your system, parsing of WAS
configuration files is very primitive.
We run servlet/snoop on the seventh transport port listed in
the config file for the "monitor" operation.
END
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="WAS6">
<version>1.0</version>
<longdesc lang="en">
Resource script for WAS6. It manages a Websphere Application Server (WAS6) as
an HA resource.
</longdesc>
<shortdesc lang="en">Manages a WebSphere Application Server 6 instance</shortdesc>
<parameters>
<parameter name="profile" unique="0" required="0">
<longdesc lang="en">
The WAS profile name.
</longdesc>
<shortdesc lang="en">profile name</shortdesc>
<content type="string" default="$DEFAULT" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="300" />
<action name="stop" timeout="300" />
<action name="status" depth="0" timeout="30" interval="10" />
<action name="monitor" depth="0" timeout="30" interval="10" />
<action name="validate-all" timeout="5" />
<action name="meta-data" timeout="5" />
<action name="methods" timeout="5" />
</actions>
</resource-agent>
END
}
#
# Reformat the XML document in a sort of canonical form
# if we can. If we don't have xmllint, we just cat it out
# and hope for the best ;-)
#
xmlcat() {
if
[ "X$XMLcat" = X ]
then
XMLcat=`which xmllint 2>/dev/null`
if
[ "X${XMLcat}" = X -o ! -x "${XMLcat}" ]
then
XMLcat=cat
else
XMLcat="$XMLcat --recover --format"
fi
fi
for j in "$@"
do
${XMLcat} "$j"
done
}
#
#This is a bit skanky, but it works anyway...
#
# It's not really skanky if we can find xmllint on the system, because it
# reformats tags so they are all on one line, which is all we we need...
#
#
# Get the numbers of the ports WAS should be listening on...
#
# If we don't have xmllint around, then the applicationserver and the
# port= specification have to be on the same line in the XML config file.
#
GetWASPorts() {
case $1 in
[0-9]*) echo "$1" | tr ',' '\012';;
*)
xmlcat ${WAS_DIR}/profiles/${WAS_PROFILE_NAME}/config/cells/${WAS_CELL}/nodes/${WAS_NODE}/serverindex.xml |
grep port= |
sed -e 's%.*port= *"* *%%' \
-e 's%[^0-9][^0-9]*.*$%%'
# Delete up to port=, throw away optional quote and optional
# white space.
# Throw away everything after the first non-digit.
# This should leave us the port number all by itself...
esac
}
#
# We assume that the seventh port listed in the serverindex.xml
# is the one we should run servlet/snoop on.
#
GetWASSnoopPort() {
GetWASPorts "$@" | sed -n '7p'
}
#
# Return information on the processname/id for the WAS ports
#
# pid/java is the expected output. Several lines, one per port...
#
#
WASPortInfo() {
pat=""
once=yes
PortCount=0
for j in $*
do
case $pat in
"") pat="$j";;
*) pat="$pat|$j";;
esac
PortCount=`expr $PortCount + 1`
done
netstat -ltnp 2>/dev/null| egrep -i "($pat) .*LISTEN" | sed 's%.*LISTEN *%%'
}
#
# Return the number of WAS ports which are open
#
CheckWASPortsInUse() {
count=`WASPortInfo "$@" | wc -l`
echo $count
}
#
# Return the pid(s) of the processes that have WAS ports open
#
WASPIDs() {
WASPortInfo "$@" | sort -u | cut -f1 -d/
}
#
# The version of ps that returns all processes and their (long) args
# It's only used by WAS_procs, which isn't used for anything ;-)
#
ps_long() {
ps axww
}
#
# The total set of WAS processes (single server only)
#
WAS_procs() {
ps_long | grep -i "config=$1" | grep -i java | cut -d' ' -f1
}
#
# methods: What methods/operations do we support?
#
WAS_methods() {
cat <<-!
start
stop
status
methods
validate-all
meta-data
usage
!
if
have_binary $WGET
then
echo " monitor"
fi
}
#
# Return WAS status (silently)
#
WAS_status() {
WASPorts=`GetWASPorts $1`
PortsInUse=`CheckWASPortsInUse $WASPorts`
case $PortsInUse in
0) false;;
*) true;;
esac
}
#
# Report on WAS status to stdout...
#
WAS_report_status() {
WASPorts=`GetWASPorts $1`
PortCount=`echo $WASPorts | wc -w`
PortCount=`echo $PortCount`
PortsInUse=`CheckWASPortsInUse $WASPorts`
case $PortsInUse in
0) ocf_log debug "WAS: server $1 is stopped."; return $OCF_NOT_RUNNING;;
*)
pids=`WASPIDs $WASPorts`
if
[ $PortsInUse -ge $PortCount ]
then
ocf_log debug "WAS: server $1 is running (pid" $pids "et al)."
else
ocf_log debug "WAS: server $1 is running (pid $pids et al) but not listening on all ports."
fi
return $OCF_SUCCESS;;
esac
}
#
# Monitor WAS - does it really seem to be working?
#
# For this we invoke the snoop applet via wget.
#
# This is actually faster than WAS_status above...
#
WAS_monitor() {
trap '[ -z "$tmpfile" || rmtempfile "$tmpfile"' 0
tmpfile=`maketempfile` || exit 1
SnoopPort=`GetWASSnoopPort $1`
output=`$WGET -nv -O$tmpfile http://localhost:$SnoopPort/snoop 2>&1`
rc=$?
if
[ $rc -eq 0 ]
then
if
grep -i 'user-agent.*Wget' $tmpfile >/dev/null
then
: OK
else
ocf_log "err" "WAS: $1: no user-agent from snoop application"
rc=$OCF_ERR_GENERIC
fi
else
ocf_log "err" "WAS: $1: wget failure: $output"
rc=$OCF_ERR_GENERIC
fi
return $rc
}
#
# Start WAS instance
#
WAS_start() {
# Launch Arguments:
# -nowait
# -quiet
# -logfile <filename>
# -replacelog
# -trace
# -script [<script filename >] [-background]
# -timeout <seconds>
# -statusport <portnumber>
# -profileName <profile>
# -help
if
[ -x $WAS_BIN/startServer.sh ]
then
cmd="$WAS_BIN/startServer.sh server1 -profileName $1"
fi
if
ocf_run $cmd
then
if
WAS_wait_4_start $STARTTIME "$@"
then
#true
return $OCF_SUCCESS
else
ocf_log "err" "WAS server $1 did not start correctly"
return $OCF_ERR_GENERIC
fi
else
#false
if
WAS_wait_4_start $STARTTIME "$@"
then
#true
return $OCF_SUCCESS
else
ocf_log "err" "WAS server $1 did not start correctly"
return $OCF_ERR_GENERIC
fi
fi
}
#
# Wait for WAS to actually start up.
#
# It seems to take between 30 and 60 seconds for it to
# start up on a trivial WAS instance.
#
WAS_wait_4_start() {
max=$1
retries=0
shift
while
[ $retries -lt $max ]
do
if
WAS_status "$@"
then
return $OCF_SUCCESS
else
sleep 1
fi
retries=`expr $retries + 1`
done
WAS_status "$@"
}
#
# Shut down WAS
#
WAS_stop() {
# They don't return good return codes...
# And, they seem to allow anyone to stop WAS (!)
if
[ -x $WAS_BIN/stopServer.sh ]
then
ocf_run $WAS_BIN/stopServer.sh server1 -profileName $1
else
WASPorts=`GetWASPorts $1`
kill `WASPIDs $WASPorts`
fi
if
WAS_status $1
then
ocf_log "err" "WAS: $1 did not stop correctly"
#false
return $OCF_ERR_GENERIC
else
#true
return $OCF_SUCCESS
fi
}
#
# Check if the port is valid
#
CheckPort() {
ocf_is_decimal "$1" && [ $1 -gt 0 ]
}
WAS_validate_all() {
if [ -x $WAS_BIN/startServer.sh ]; then
# $arg should be profile name
if [ ! -f ${WAS_DIR}/profiles/${arg}/config/cells/${WAS_CELL}/nodes/${WAS_NODE}/serverindex.xml ]; then
ocf_log err "profile [$arg] does not exist"
exit $OCF_ERR_ARGS
fi
# $arg should specify a valid port number at the very least
local WASPorts=`GetWASPorts $arg`
if [ -z "$WASPorts" ]; then
ocf_log err "No port number specified in configuration file of profile [$arg]"
exit $OCF_ERR_CONFIGURED
fi
local port
local have_valid_port=false
for port in $WASPorts; do
if CheckPort $port; then
have_valid_port=true
break
fi
done
if [ "false" = "$have_valid_port" ]; then
ocf_log err "No valid port number specified in configuration file of profile [$arg]"
exit $OCF_ERR_CONFIGURED
fi
elif [ -x $WAS_BIN/startupServer.sh ]; then
# $arg should be port number
if CheckPort "$arg"; then
ocf_log err "Port number is required but [$arg] is not valid port number"
exit $OCF_ERR_ARGS
fi
else
# Do not know hot to validate_all
ocf_log warn "Do not know how to validate-all, assuming validation OK"
return $OCF_SUCCESS
fi
}
#
# 'main' starts here...
#
if
( [ $# -ne 1 ] )
then
usage
exit $OCF_ERR_ARGS
fi
# These operations don't require OCF instance parameters to be set
case "$1" in
meta-data) meta_data
exit $OCF_SUCCESS;;
usage) usage
exit $OCF_SUCCESS;;
methods) WAS_methods
exit $?;;
*);;
esac
#
# Supply default configuration parameter(s)
#
if
[ -z $OCF_RESKEY_profile ]
then
arg=$DEFAULT
else
arg=$OCF_RESKEY_profile
fi
if
[ ! -d ${WAS_DIR}/profiles/$arg ]
then
ocf_log "err" "WAS profile $arg does not exist!"
usage
exit $OCF_ERR_ARGS
fi
WAS_PROFILE_NAME=$arg
if [ "${WAS_PROFILE_NAME:=}" != "" ]; then
WAS_PROFILE_FSDB_SCRIPT=${WAS_DIR}/properties/fsdb/${WAS_PROFILE_NAME}.sh
fi
if [ "${WAS_PROFILE_FSDB_SCRIPT:=}" != "" ] && [ -f ${WAS_PROFILE_FSDB_SCRIPT} ]; then
. ${WAS_PROFILE_FSDB_SCRIPT}
fi
if [ "${WAS_USER_SCRIPT:=}" != "" ]; then
. ${WAS_USER_SCRIPT}
fi
# What kind of method was invoked?
case "$1" in
start) WAS_start $arg
exit $?;;
stop) WAS_stop $arg
exit $?;;
status) WAS_report_status $arg
exit $?;;
monitor) WAS_monitor $arg
exit $?;;
validate-all) WAS_validate_all $arg
exit $?;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED;;
esac
diff --git a/heartbeat/Xen b/heartbeat/Xen
index e273f6ec4..92291ee15 100755
--- a/heartbeat/Xen
+++ b/heartbeat/Xen
@@ -1,572 +1,572 @@
#!/bin/sh
#
#
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
# License: GNU General Public License (GPL)
#
# Resource Agent for the Xen Hypervisor.
# Manages Xen virtual machine instances by
# mapping cluster resource start and stop,
# to Xen create and shutdown, respectively.
#
# usage: $0 {start|stop|status|monitor|meta-data}
#
# OCF parameters are as below:
# OCF_RESKEY_xmfile
# Absolute path to the Xen control file,
# for this virtual machine.
# OCF_RESKEY_allow_mem_management
# Change memory usage on start/stop/migration
# of virtual machine
# OCF_RESKEY_reserved_Dom0_memory
# minimum memory reserved for domain 0
# OCF_RESKEY_monitor_scripts
# scripts to monitor services within the
# virtual domain
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
usage() {
cat <<-END
usage: $0 {start|stop|status|monitor|meta-data|validate-all}
END
}
: ${OCF_RESKEY_xmfile=/etc/xen/vm/MyDomU}
: ${OCF_RESKEY_shutdown_acpi=0}
: ${OCF_RESKEY_allow_mem_management=0}
: ${OCF_RESKEY_reserved_Dom0_memory=512}
# prefer xl
xentool=$(which xl 2> /dev/null || which xm)
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="Xen">
<version>1.0</version>
<longdesc lang="en">
Resource Agent for the Xen Hypervisor.
Manages Xen virtual machine instances by mapping cluster resource
start and stop, to Xen create and shutdown, respectively.
A note on names
We will try to extract the name from the config file (the xmfile
attribute). If you use a simple assignment statement, then you
should be fine. Otherwise, if there's some python acrobacy
involved such as dynamically assigning names depending on other
variables, and we will try to detect this, then please set the
name attribute. You should also do that if there is any chance of
a pathological situation where a config file might be missing,
for example if it resides on a shared storage. If all fails, we
finally fall back to the instance id to preserve backward
compatibility.
Para-virtualized guests can also be migrated by enabling the
meta_attribute allow-migrate.
</longdesc>
<shortdesc lang="en">Manages Xen unprivileged domains (DomUs)</shortdesc>
<parameters>
<parameter name="xmfile" unique="0" required="1">
<longdesc lang="en">
Absolute path to the Xen control file,
for this virtual machine.
</longdesc>
<shortdesc lang="en">Xen control file</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="name" unique="0" required="0">
<longdesc lang="en">
Name of the virtual machine.
</longdesc>
<shortdesc lang="en">Xen DomU name</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="shutdown_timeout">
<longdesc lang="en">
The Xen agent will first try an orderly shutdown using xl shutdown.
Should this not succeed within this timeout, the agent will escalate to
xl destroy, forcibly killing the node.
If this is not set, it will default to two-third of the stop action
timeout.
Setting this value to 0 forces an immediate destroy.
</longdesc>
<shortdesc lang="en">Shutdown escalation timeout</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="shutdown_acpi" unique="0" required="0">
<longdesc lang="en">
Handle shutdown by simulating an ACPI power button event.
Enable this to allow graceful shutdown for HVM domains
without installed PV drivers.
</longdesc>
<shortdesc lang="en">Simulate power button event on shutdown</shortdesc>
<content type="boolean" default="0" />
</parameter>
<parameter name="allow_mem_management" unique="0" required="0">
<longdesc lang="en">
This parameter enables dynamic adjustment of memory for start
and stop actions used for Dom0 and the DomUs. The default is
to not adjust memory dynamically.
</longdesc>
<shortdesc lang="en">Use dynamic memory management</shortdesc>
<content type="boolean" default="0" />
</parameter>
<parameter name="node_ip_attribute">
<longdesc lang="en">
In case of a live migration, the system will default to using the IP
address associated with the hostname via DNS or /etc/hosts.
This parameter allows you to specify a node attribute that will be
queried instead for the target node, overriding the IP address. This
allows you to use a dedicated network for live migration traffic to a
specific node.
Warning: make very sure the IP address does point to the right node. Or
else the live migration will end up somewhere else, greatly confusing
the cluster and causing havoc.
</longdesc>
<shortdesc lang="en">Node attribute containing target IP address</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="reserved_Dom0_memory" unique="0" required="0">
<longdesc lang="en">
In case memory management is used, this parameter
defines the minimum amount of memory to be reserved
for the dom0. The default minimum memory is 512MB.
</longdesc>
<shortdesc lang="en">Minimum Dom0 memory</shortdesc>
<content type="string" default="512" />
</parameter>
<parameter name="monitor_scripts" unique="0" required="0">
<longdesc lang="en">
To additionally monitor services within the unprivileged domain,
add this parameter with a list of scripts to monitor.
</longdesc>
<shortdesc lang="en">list of space separated monitor scripts</shortdesc>
<content type="string" default="" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="60" />
<action name="stop" timeout="40" />
<action name="migrate_from" timeout="120" />
<action name="migrate_to" timeout="120" />
<action name="monitor" depth="0" timeout="30" interval="10" />
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="5" />
</actions>
</resource-agent>
END
}
Xen_Status() {
if expr "x$xentool" : "x.*xl" >/dev/null; then
$xentool list $1 >/dev/null 2>&1
if [ $? -ne 0 ]; then
return $OCF_NOT_RUNNING
else
return $OCF_SUCCESS
fi
fi
if have_binary xen-list; then
xen-list $1 2>/dev/null | grep -qs "State.*[-r][-b][-p]--" 2>/dev/null
if [ $? -ne 0 ]; then
return $OCF_NOT_RUNNING
else
return $OCF_SUCCESS
fi
fi
STATUS=`$xentool list --long $1 2>/dev/null | grep status 2>/dev/null`
if [ "X${STATUS}" != "X" ]; then
# we have Xen 3.0.4 or higher
STATUS_NOSPACES=`echo "$STATUS" | awk '{ print $1,$2}'`
if [ "$STATUS_NOSPACES" = "(status 2)" -o "$STATUS_NOSPACES" = "(status 1)" ]; then
return $OCF_SUCCESS
else
return $OCF_NOT_RUNNING
fi
else
# we have Xen 3.0.3 or lower
STATUS=`$xentool list --long $1 2>/dev/null | grep state 2>/dev/null`
echo "${STATUS}" | grep -qs "[-r][-b][-p]---"
if [ $? -ne 0 ]; then
return $OCF_NOT_RUNNING
else
return $OCF_SUCCESS
fi
fi
}
# If the guest is rebooting, it may completely disappear from the
# list of defined guests, thus xl/xen-list would return with not
# running; apparently, this period lasts only for a second or
# two
# If a status returns not running, then test status
# again for 5 times (perhaps it'll show up)
Xen_Status_with_Retry() {
local rc cnt=5
Xen_Status $1
rc=$?
while [ $rc -eq $OCF_NOT_RUNNING -a $cnt -gt 0 ]; do
case "$__OCF_ACTION" in
stop)
ocf_log debug "domain $1 reported as not running, waiting $cnt seconds ..."
;;
monitor)
ocf_log warn "domain $1 reported as not running, but it is expected to be running! Retrying for $cnt seconds ..."
;;
*) : not reachable
;;
esac
sleep 1
Xen_Status $1
rc=$?
cnt=$((cnt-1))
done
return $rc
}
Xen_Adjust_Memory() {
if ocf_is_true "${OCF_RESKEY_allow_mem_management}"; then
CNTNEW=$1
RUNNING=`Xen_List_running`
RUNCNT=`Xen_Count_running`
MAXMEM=`Xen_Total_Memory`
if [ ${RUNCNT} -eq 0 -a ${CNTNEW} -eq 0 ]; then
RUNCNT=1
fi
#NEWMEM=`echo "(${MAXMEM}-${OCF_RESKEY_reserved_Dom0_memory})/(${RUNCNT}+${CNTNEW})"|bc`
NEWMEM=$(( (${MAXMEM} - ${OCF_RESKEY_reserved_Dom0_memory}) / (${RUNCNT} + ${CNTNEW} ) ))
# do not rely on ballooning add dom0_mem=512 instead to force memory for dom0
#$xentool mem-set Domain-0 ${OCF_RESKEY_reserved_Dom0_memory}
for DOM in ${RUNNING}; do
$xentool mem-set ${DOM} ${NEWMEM}
done
ocf_log info "Adjusted memory to: $NEWMEM, for the following $RUNCNT domains: $RUNNING"
fi
}
Xen_List_all() {
$xentool list | grep -v -e "Name" -e "Domain-0" | awk '{print $1}'
}
Xen_List_running() {
ALL_DOMS=`Xen_List_all`
for DOM in ${ALL_DOMS}; do
if Xen_Status $DOM; then
echo "${DOM} "
fi
done
}
Xen_Count_running() {
Xen_List_running | wc -w
}
Xen_Monitor() {
if ocf_is_probe; then
Xen_Status ${DOMAIN_NAME}
else
Xen_Status_with_Retry ${DOMAIN_NAME}
fi
if [ $? -eq ${OCF_NOT_RUNNING} ]; then
ocf_is_probe ||
ocf_log err "Xen domain $DOMAIN_NAME stopped"
return ${OCF_NOT_RUNNING}
fi
if [ "X${OCF_RESKEY_monitor_scripts}" = "X" ]; then
return ${OCF_SUCCESS}
fi
for SCRIPT in ${OCF_RESKEY_monitor_scripts}; do
$SCRIPT
if [ $? -ne 0 ]; then
return ${OCF_ERR_GENERIC}
fi
done
return ${OCF_SUCCESS}
}
Xen_Total_Memory() {
$xentool info | grep "^total_memory" | awk '{print $3}'
}
Xen_Start() {
if Xen_Status ${DOMAIN_NAME}; then
ocf_log info "Xen domain $DOMAIN_NAME already running."
return $OCF_SUCCESS
fi
if [ ! -f "${OCF_RESKEY_xmfile}" ]; then
ocf_log err "Config file ${OCF_RESKEY_xmfile} for $DOMAIN_NAME does not exist."
return $OCF_ERR_INSTALLED
fi
if ocf_is_true "${OCF_RESKEY_allow_mem_management}"; then
Xen_Adjust_Memory 1
ocf_log info "New memory for virtual domains: ${NEWMEM}"
sed -i -e "/^memory=/ s/^memory=.*/memory=${NEWMEM}/" ${OCF_RESKEY_xmfile}
$xentool mem-set ${DOMAIN_NAME} ${NEWMEM}
fi
# the latest xl management tool is squeamish about some
# characters in a name (the vm name is xen-f):
# /etc/xen/vm/xen-f:15: config parsing error near `xen':
# syntax error, unexpected IDENT, expecting STRING or NUMBER
# or '['
# /etc/xen/vm/xen-f:15: config parsing error near `-f': lexical error
#
# the older xm management tool cannot digest quotes (see
# https://developerbugs.linuxfoundation.org/show_bug.cgi?id=2671)
#
# hence the following
if expr "x$xentool" : "x.*xl" >/dev/null; then
$xentool create ${OCF_RESKEY_xmfile} name=\"$DOMAIN_NAME\"
else
$xentool create ${OCF_RESKEY_xmfile} name="$DOMAIN_NAME"
fi
rc=$?
if [ $rc -ne 0 ]; then
return $OCF_ERR_GENERIC
else
if ocf_is_true "${OCF_RESKEY_allow_mem_management}"; then
$xentool mem-set ${DOMAIN_NAME} ${NEWMEM}
fi
fi
while sleep 1; do
Xen_Monitor && return $OCF_SUCCESS
done
}
xen_domain_stop() {
local dom=$1
local timeout
if [ -n "$OCF_RESKEY_shutdown_timeout" ]; then
timeout=$OCF_RESKEY_shutdown_timeout
elif [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
# Allow 2/3 of the action timeout for the orderly shutdown
# (The origin unit is ms, hence the conversion)
timeout=$((OCF_RESKEY_CRM_meta_timeout/1500))
else
timeout=60
fi
if [ "$timeout" -gt 0 ]; then
ocf_log info "Xen domain $dom will be stopped (timeout: ${timeout}s)"
if ocf_is_true "${OCF_RESKEY_shutdown_acpi}"; then
$xentool trigger $dom power
else
$xentool shutdown $dom
fi
while Xen_Status $dom && [ "$timeout" -gt 0 ]; do
ocf_log debug "$dom still not stopped. Waiting..."
timeout=$((timeout-1))
sleep 1
done
fi
if [ "$timeout" -eq 0 ]; then
while Xen_Status $dom; do
ocf_log warn "Xen domain $dom will be destroyed!"
$xenkill $dom
sleep 1
done
# Note: This does not give up. stop isn't allowed to to fail.
# If $xentool destroy fails, stop will eventually timeout.
# This is the correct behaviour.
fi
ocf_log info "Xen domain $dom stopped."
}
Xen_Stop() {
local vm
if Xen_Status_with_Retry ${DOMAIN_NAME}; then
vm=${DOMAIN_NAME}
elif Xen_Status migrating-${DOMAIN_NAME}; then
ocf_log info "Xen domain $DOMAIN_NAME is migrating"
vm="migrating-${DOMAIN_NAME}"
else
ocf_log info "Xen domain $DOMAIN_NAME already stopped."
fi
if [ "$vm" ]; then
xen_domain_stop $vm
else
# It is supposed to be gone, but there have been situations where
# $xentool list / xen-list showed it as stopped but it was still
# instantiated. Nuke it once more to make sure:
$xenkill ${DOMAIN_NAME}
fi
Xen_Adjust_Memory 0
return $OCF_SUCCESS
}
Xen_Migrate_To() {
target_node="$OCF_RESKEY_CRM_meta_migrate_target"
target_attr="$OCF_RESKEY_node_ip_attribute"
target_addr="$target_node"
if Xen_Status ${DOMAIN_NAME}; then
ocf_log info "$DOMAIN_NAME: Starting $xentool migrate to $target_node"
if [ -n "$target_attr" ]; then
nodevalue=`crm_attribute --type nodes --node-uname $target_node --attr-name $target_attr --get-value -q`
if [ -n "${nodevalue}" -a "${nodevalue}" != "(null)" ]; then
target_addr="$nodevalue"
ocf_log info "$DOMAIN_NAME: $target_node is using address $target_addr"
fi
fi
if expr "x$xentool" : "x.*xm" >/dev/null; then
$xentool migrate --live $DOMAIN_NAME $target_addr
else
$xentool migrate $DOMAIN_NAME $target_addr
fi
rc=$?
if [ $rc -ne 0 ]; then
ocf_log err "$DOMAIN_NAME: $xentool migrate to $target_node failed: $rc"
return $OCF_ERR_GENERIC
else
Xen_Adjust_Memory 0
ocf_log info "$DOMAIN_NAME: $xentool migrate to $target_node succeeded."
return $OCF_SUCCESS
fi
else
ocf_log err "$DOMAIN_NAME: migrate_to: Not active locally!"
return $OCF_ERR_GENERIC
fi
}
Xen_Migrate_From() {
if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
# Allow 2/3 of the action timeout for status to stabilize
# (The origin unit is ms, hence the conversion)
timeout=$((OCF_RESKEY_CRM_meta_timeout/1500))
else
timeout=10 # should be plenty
fi
while ! Xen_Status ${DOMAIN_NAME} && [ $timeout -gt 0 ]; do
ocf_log debug "$DOMAIN_NAME: Not yet active locally, waiting (timeout: ${timeout}s)"
timeout=$((timeout-1))
sleep 1
done
if Xen_Status ${DOMAIN_NAME}; then
Xen_Adjust_Memory 0
ocf_log info "$DOMAIN_NAME: Active locally, migration successful"
return $OCF_SUCCESS
else
ocf_log err "$DOMAIN_NAME: Not active locally, migration failed!"
return $OCF_ERR_GENERIC
fi
}
Xen_Validate_All() {
return $OCF_SUCCESS
}
if [ $# -ne 1 ]; then
usage
exit $OCF_ERR_ARGS
fi
case $1 in
meta-data)
meta_data
exit $OCF_SUCCESS
;;
usage)
usage
exit $OCF_SUCCESS
;;
esac
# the name business:
#
# 1. use the name attribute, or
# 2. find the name in the config file (if it exists) and use that
# unless it contains funny characters such as '%' or space, or
# 3. use the OCF_RESOURCE_INSTANCE
if [ x"${OCF_RESKEY_name}" != x ]; then
DOMAIN_NAME="${OCF_RESKEY_name}"
else
if [ -f "${OCF_RESKEY_xmfile}" ]; then
DOMAIN_NAME=`awk '$1~/^name(=|$)/{print}' ${OCF_RESKEY_xmfile} | sed 's/.*=[[:space:]]*//' | tr -d "[\"']"`
if echo "$DOMAIN_NAME" | grep -qs '[%[:space:]]'; then
DOMAIN_NAME=""
fi
fi
DOMAIN_NAME=${DOMAIN_NAME:-${OCF_RESOURCE_INSTANCE}}
fi
for binary in sed awk; do
check_binary $binary
done
if have_binary xen-destroy ; then
xenkill="xen-destroy"
else
xenkill="$xentool destroy"
fi
if [ -n "$OCF_RESKEY_shutdown_timeout" ]; then
ocf_is_decimal "$OCF_RESKEY_shutdown_timeout" || {
ocf_log err "shutdown_timeout must be a number"
exit $OCF_ERR_CONFIGURED
}
fi
case $1 in
start)
Xen_Start
;;
stop)
Xen_Stop
;;
migrate_to)
Xen_Migrate_To
;;
migrate_from)
Xen_Migrate_From
;;
monitor)
Xen_Monitor
;;
status)
Xen_Status ${DOMAIN_NAME}
;;
validate-all)
Xen_Validate_All
;;
*)
usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
exit $?
diff --git a/heartbeat/Xinetd b/heartbeat/Xinetd
index b6a7b56e2..78d021ea2 100755
--- a/heartbeat/Xinetd
+++ b/heartbeat/Xinetd
@@ -1,250 +1,250 @@
#!/bin/sh
#
# Startup/shutdown script for services managed by xinetd.
#
# Copyright (C) 2003 Charlie Brooks
# Copyright (C) 2011 Ulrich Windl
#
# WARNING: Tested ONLY on SLES11 SP1 at this time.
#
# Author: Charlie Brooks <ha@HBCS.Org>
# Description: given parameters of a service name and start|stop|status,
# will enable, disable or report on a specified xinetd service
# Config: all services must have a descriptor file in /etc/xinetd.d
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
# License: GNU General Public License (GPL)
#
# OCF parameters are as below:
# OCF_RESKEY_service
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
service=$OCF_RESKEY_service
SVCDEF=/etc/xinetd.d/$service
#######################################################################
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="Xinetd">
<version>1.0</version>
<longdesc lang="en">
Resource script for Xinetd. It starts/stops services managed
by xinetd by enabling or disabling them in the configuration file.
The xinetd daemon itself must be running: we are not going to start or
stop it ourselves.
All services should have a line saying either "disable=yes" or "disable=no".
The script just changes those settings before reloading xinetd.
Important: in case the services managed by the cluster are the
only ones enabled, you should specify the -stayalive option for
xinetd or it will exit on Heartbeat stop. Alternatively, you may
enable some internal service such as echo.
</longdesc>
<shortdesc lang="en">Manages a service of Xinetd</shortdesc>
<parameters>
<parameter name="service" unique="0" required="1">
<longdesc lang="en">
The name of the service managed by xinetd.
</longdesc>
<shortdesc lang="en">service name</shortdesc>
<content type="string" default="" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="20s" />
<action name="stop" timeout="20s" />
<action name="restart" timeout="20s" />
<action name="status" depth="0" timeout="10" interval="10" />
<action name="monitor" depth="0" timeout="10" interval="10" />
<action name="validate-all" timeout="5" />
<action name="meta-data" timeout="5" />
</actions>
</resource-agent>
END
}
get_xinetd_pid() {
ps -e -o pid,comm | $AWK '$2 == "xinetd" { print $1 }'
}
# force xinetd to reload the service descriptions
hup_inetd () {
# don't rely on the pid file, but lookup xinetd in the list of
# processes
local pid
pid=`get_xinetd_pid`
if [ "$pid" ]; then
if kill -s HUP $pid; then
ocf_log info "asked xinetd to reload by sending SIGHUP to process $pid!"
else
ocf_exit_reason "could not send SIGHUP to process $pid!"
exit $OCF_ERR_GENERIC
fi
else
ocf_exit_reason "xinetd process not found!"
exit $OCF_ERR_GENERIC
fi
}
# check "disable = X", printing X
check_service()
{
ocf_log "info" "checking \"disable\" in $1"
local result=$(sed -nre 's/^[ ]*disable[ ]*=[ ]*([^ ]+)[# ]*/\1/p' $1)
echo "$result"
}
# change "disable = X" to desired value
change_service()
{
ocf_log "info" "setting \"disable = $1\" in $2"
if ! sed -i -re 's/^([ ]*disable[ ]*=[ ]*)([^ ]+)([# ]*)/\1'"$1"'\3/' $2
then
ocf_log "err" "could not edit $2"
return 1
fi
return 0
}
xup_status () {
local disabled="$(check_service $SVCDEF)"
if [ "${disabled:=no}" = no ]; then
echo running
return $OCF_SUCCESS
elif [ "$disabled" = yes ]; then
echo stopped
return $OCF_NOT_RUNNING
else
echo unknown
return $OCF_ERR_CONFIGURED
fi
}
xup_start () {
if [ "running" = "`xup_status`" ]; then
ocf_log info "service $service already started"
exit $OCF_SUCCESS
fi
ocf_log "info" "enabling in $SVCDEF"
if change_service "no" $SVCDEF; then
hup_inetd
fi
}
xup_stop () {
if [ "stopped" = "`xup_status`" ]; then
ocf_log info "service $service already stopped"
exit $OCF_SUCCESS
fi
ocf_log "info" "disabling in $SVCDEF"
if change_service "yes" $SVCDEF; then
hup_inetd
fi
}
xup_usage () {
echo "Usage: $0 {start|stop|restart|status|monitor|validate-all|meta-data}"
return 0
}
xup_validate_all () {
if [ ! -f "$SVCDEF" ]; then
ocf_exit_reason "service $service missing $SVCDEF"
return $OCF_ERR_INSTALLED
fi
return $OCF_SUCCESS
}
if [ $# -ne 1 ]; then
xup_usage
exit $OCF_ERR_ARGS
fi
# These operations do not require OCF instance parameters to be set
case "$1" in
meta-data)
meta_data
exit $OCF_SUCCESS
;;
usage)
xup_usage
exit $OCF_SUCCESS
;;
esac
if [ -z "$OCF_RESKEY_service" ]; then
ocf_exit_reason "please define \"service\" parameter"
if [ "$1" = "start" ]; then
exit $OCF_ERR_CONFIGURED
else
exit $OCF_NOT_RUNNING
fi
fi
# Is xinetd running at all
if [ -z "`get_xinetd_pid`" ]; then
case "$1" in
stop) exit $OCF_SUCCESS;;
start)
ocf_exit_reason "xinetd not running, we manage just xinetd services, not the daemon itself"
exit $OCF_ERR_INSTALLED
;;
status|monitor)
if ocf_is_probe; then
exit $OCF_NOT_RUNNING
else
ocf_exit_reason "xinetd stopped"
exit $OCF_ERR_GENERIC
fi
;;
esac
fi
# Make sure the OCF_RESKEY_service is a valid xinetd service name
if [ ! -f $SVCDEF ]; then
ocf_exit_reason "service definition $SVCDEF not found!"
if [ "$1" = "start" ]; then
exit $OCF_ERR_INSTALLED
else
exit $OCF_NOT_RUNNING
fi
fi
# See how we were called.
case "$1" in
start)
xup_start
;;
stop)
xup_stop
;;
restart)
$0 stop
$0 start
;;
status)
xup_status
;;
monitor)
xup_status > /dev/null
;;
validate-all)
xup_validate_all
;;
*)
xup_usage
exit $OCF_ERR_UNIMPLEMENTED
esac
exit $?
diff --git a/heartbeat/apache b/heartbeat/apache
index 0cb110142..9f02fb54c 100755
--- a/heartbeat/apache
+++ b/heartbeat/apache
@@ -1,698 +1,698 @@
#!/bin/sh
#
# High-Availability Apache/IBMhttp control script
#
# apache (aka IBMhttpd)
#
# Description: starts/stops apache web servers.
#
# Author: Alan Robertson
# Sun Jiang Dong
#
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
#
# License: GNU General Public License (GPL)
#
# Copyright: (C) 2002-2005 International Business Machines
#
#
# An example usage in /etc/ha.d/haresources:
# node1 10.0.0.170 apache::/opt/IBMHTTPServer/conf/httpd.conf
# node1 10.0.0.170 IBMhttpd
#
# Our parsing of the Apache config files is very rudimentary.
# It'll work with lots of different configurations - but not every
# possible configuration.
#
# Patches are being accepted ;-)
#
# OCF parameters:
# OCF_RESKEY_configfile
# OCF_RESKEY_httpd
# OCF_RESKEY_port
# OCF_RESKEY_statusurl
# OCF_RESKEY_options
# OCF_RESKEY_testregex
# OCF_RESKEY_client
# OCF_RESKEY_testurl
# OCF_RESKEY_testregex10
# OCF_RESKEY_testconffile
# OCF_RESKEY_testname
# OCF_RESKEY_envfiles
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
. ${OCF_FUNCTIONS_DIR}/apache-conf.sh
. ${OCF_FUNCTIONS_DIR}/http-mon.sh
HA_VARRUNDIR=${HA_VARRUN}
#######################################################################
#
# Configuration options - usually you don't need to change these
#
#######################################################################
#
IBMHTTPD=/opt/IBMHTTPServer/bin/httpd
HTTPDLIST="/sbin/httpd2 /usr/sbin/httpd2 /usr/sbin/apache2 /sbin/httpd /usr/sbin/httpd /usr/sbin/apache $IBMHTTPD"
MPM=/usr/share/apache2/find_mpm
if [ -x $MPM ]; then
HTTPDLIST="$HTTPDLIST `$MPM 2>/dev/null`"
fi
LOCALHOST="http://localhost"
HTTPDOPTS="-DSTATUS"
DEFAULT_IBMCONFIG=/opt/IBMHTTPServer/conf/httpd.conf
DEFAULT_SUSECONFIG="/etc/apache2/httpd.conf"
DEFAULT_RHELCONFIG="/etc/httpd/conf/httpd.conf"
DEFAULT_DEBIANCONFIG="/etc/apache2/apache2.conf"
#
# You can also set
# HTTPD
# PORT
# STATUSURL
# CONFIGFILE
# in this section if what we're doing doesn't work for you...
#
# End of Configuration options
#######################################################################
CMD=`basename $0`
# The config-file-pathname is the pathname to the configuration
# file for this web server. Various appropriate defaults are
# assumed if no config file is specified. If this command is
# invoked as *IBM*, then the default config file name is
# $DEFAULT_IBMCONFIG, otherwise the default config file
# will be either $DEFAULT_RHELCONFIG or $DEFAULT_SUSECONFIG depending
# on which is detected.
usage() {
cat <<-END
usage: $0 action
action:
start start the web server
stop stop the web server
status return the status of web server, run or down
monitor return TRUE if the web server appears to be working.
For this to be supported you must configure mod_status
and give it a server-status URL. You have to have
installed either curl or wget for this to work.
meta-data show meta data message
validate-all validate the instance parameters
END
}
get_pid() {
if [ -f $PidFile ]; then
cat $PidFile
else
false
fi
}
#
# return TRUE if a process with given PID is running
#
ProcessRunning() {
local pid=$1
# Use /proc if it looks like it's here...
if [ -d /proc -a -d /proc/1 ]; then
[ -d /proc/$pid ]
else
# This assumes we're running as root...
kill -s 0 "$pid" >/dev/null 2>&1
fi
}
silent_status() {
local pid
pid=`get_pid`
if [ -n "$pid" ]; then
ProcessRunning $pid
else
: No pid file
false
fi
}
# May be useful to add other distros in future
validate_default_config() {
if [ -e /etc/SuSE-release ]; then
validate_default_suse_config
elif [ -e /etc/debian_version ]; then
validate_default_debian_config
else
return 0
fi
}
# When using the default /etc/apache2/httpd.conf on SUSE, the file
# /etc/apache2/sysconfig.d/include.conf is required to be present,
# but this is only generated if you run the apache init script
# (with contents derived from /etc/sysconfig/apache2). So, here,
# if we're using the default system config file and it requires
# that include, we run "/etc/init.d/apache2 configtest" to ensure
# the relevant config is generated and valid. We're also taking
# this opportunity to enable mod_status if it's not present.
validate_default_suse_config() {
if [ "$CONFIGFILE" = "$DEFAULT_SUSECONFIG" ] && \
grep -Eq '^Include[[:space:]]+/etc/apache2/sysconfig.d/include.conf' "$CONFIGFILE"
then
[ -x "/usr/sbin/a2enmod" ] && ocf_run -q /usr/sbin/a2enmod status
# init script style, for crusty old SUSE
if [ -e "/etc/init.d/apache2" ]; then
ocf_run -q /etc/init.d/apache2 configtest || return 1
# systemd style, for shiny new SUSE
elif [ -e "/usr/sbin/start_apache2" ]; then
ocf_run -q /usr/sbin/start_apache2 -t || return 1
fi
fi
return 0
}
# Debian's Default configuration uses a lock directory /var/lock/apache2
# which is only generated using the lsb init script issues configtest. To
# ensure these default directories are present it's useful to run a configtest
# prior to the resource startup which will create the needed directories
#
# To support multiple apache instances the debian scripts and configs
# obey apache2/envvars. (copy /etc/apache2 -> /etc/apache2-instance)
# adjust (SUFFIX) envvars and set OCF_RESKEY_envfiles
validate_default_debian_config() {
if find /etc/apache2* -name apache2.conf | grep -q "$CONFIGFILE"
then
export APACHE_CONFDIR=$(dirname $CONFIGFILE)
[ -x "/usr/sbin/a2enmod" ] && ocf_run -q /usr/sbin/a2enmod status
ocf_run -q /usr/sbin/apache2ctl configtest || return 1
fi
return 0
}
apache_start() {
if
silent_status
then
ocf_log info "$CMD already running (pid `get_pid`)"
return $OCF_SUCCESS
fi
validate_default_config || return $OCF_ERR_CONFIGURED
if [ -z $PIDFILE_DIRECTIVE ]; then
ocf_run $HTTPD $HTTPDOPTS $OPTIONS -f $CONFIGFILE
else
ocf_run $HTTPD $HTTPDOPTS $OPTIONS -f $CONFIGFILE -c "PidFile $PidFile"
fi
tries=0
while : # wait until the user set timeout
do
apache_monitor
ec=$?
if [ $ec -eq $OCF_NOT_RUNNING ]
then
tries=`expr $tries + 1`
ocf_log info "waiting for apache $CONFIGFILE to come up"
sleep 1
else
break
fi
done
if [ $ec -ne 0 ] && silent_status; then
apache_stop
fi
return $ec
}
signal_children()
{
for sig in SIGTERM SIGHUP SIGKILL ; do
if pgrep -f $HTTPD.*$CONFIGFILE >/dev/null ; then
pkill -$sig -f $HTTPD.*$CONFIGFILE >/dev/null
ocf_log info "signal $sig sent to apache children"
sleep 1
else
break
fi
done
}
graceful_stop()
{
local tries=10
local pid=$1
# Try graceful stop for half timeout period if timeout period is present
if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
tries=$((($OCF_RESKEY_CRM_meta_timeout/1000) / 2))
fi
ocf_log info "Attempting graceful stop of apache PID $pid"
kill -WINCH $pid >/dev/null
while
ProcessRunning $pid &&
[ $tries -gt 0 ]
do
sleep 1
tries=`expr $tries - 1`
done
if [ $tries -eq 0 ]; then
# graceful stop didn't work, process still up.
return 1
fi
return 0
}
kill_stop()
{
local tries=0
local pid=$1
ocf_log info "Killing apache PID $pid"
while
ProcessRunning $pid &&
[ $tries -lt 10 ]
do
if [ $tries -ne 0 ]; then
# don't sleep on the first try
sleep 1
fi
kill $pid >/dev/null
tries=`expr $tries + 1`
done
}
apache_stop() {
local ret=$OCF_SUCCESS
local pid
if ! silent_status; then
ocf_log info "$CMD is not running."
signal_children
return $ret
fi
pid=`get_pid`
graceful_stop $pid
if [ $? -ne 0 ]; then
kill_stop $pid
if ProcessRunning $pid; then
ocf_exit_reason "$CMD still running ($pid). Killing pid failed."
ret=$OCF_ERR_GENERIC
fi
fi
if [ $ret -eq 0 ]; then
ocf_log info "$CMD stopped."
fi
signal_children
return $ret
}
apache_monitor_10() {
if [ -f "$TESTCONFFILE" ] && [ -r "$TESTCONFFILE" ]; then
readtestconf < $TESTCONFFILE
else
test_url="$TESTURL"
test_regex="$TESTREGEX10"
fi
whattorun=`gethttpclient`
fixtesturl
is_testconf_sane ||
return $OCF_ERR_CONFIGURED
if $whattorun "$test_url" | grep -Ei "$test_regex" > /dev/null
then
return $OCF_SUCCESS
else
if ! ocf_is_probe; then
ocf_exit_reason "Failed to access httpd status page."
fi
return $OCF_ERR_GENERIC
fi
}
# If the user has not provided any basic monitoring
# information, allow the agent to verify the server is
# healthy and capable of processing requests by requesting
# the http header of website's index
attempt_index_monitor_request() {
local indexpage=""
if [ -n "$OCF_RESKEY_testregex" ]; then
return 1;
fi
if [ -n "$OCF_RESKEY_testregex10" ]; then
return 1;
fi
if [ -n "$OCF_RESKEY_testurl" ]; then
return 1;
fi
if [ -n "$OCF_RESKEY_statusurl" ]; then
return 1;
fi
if [ -n "$OCF_RESKEY_testconffile" ]; then
return 1;
fi
indexpage=$(buildlocalurl)
request_url_header $indexpage
if [ $? -ne 0 ]; then
return $OCF_ERR_GENERIC
fi
ocf_log debug "Successfully retrieved http header at $indexpage"
return 0
}
apache_monitor_basic() {
if ${ourhttpclient}_func "$STATUSURL" | grep -Ei "$TESTREGEX" > /dev/null
then
return $OCF_SUCCESS
fi
attempt_index_monitor_request
if [ $? -eq 0 ]; then
return $OCF_SUCCESS
fi
if ! ocf_is_probe; then
ocf_exit_reason "Failed to access httpd status page."
fi
return $OCF_ERR_GENERIC
}
apache_monitor() {
silent_status
if [ $? -ne 0 ]; then
ocf_log info "$CMD not running"
return $OCF_NOT_RUNNING
fi
ourhttpclient=`findhttpclient` # we'll need one
if [ -z "$ourhttpclient" ]; then
ocf_exit_reason "could not find a http client; make sure that either wget or curl is available"
return $OCF_ERR_INSTALLED
fi
case `ocf_check_level 10` in
0) apache_monitor_basic;;
10) apache_monitor_10;;
esac
}
detect_default_config()
{
if [ -f $DEFAULT_SUSECONFIG ]; then
echo $DEFAULT_SUSECONFIG
elif [ -f $DEFAULT_DEBIANCONFIG ]; then
echo $DEFAULT_DEBIANCONFIG
else
echo $DEFAULT_RHELCONFIG
fi
}
apache_meta_data(){
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="apache">
<version>1.0</version>
<longdesc lang="en">
This is the resource agent for the Apache Web server.
This resource agent operates both version 1.x and version 2.x Apache
servers.
The start operation ends with a loop in which monitor is
repeatedly called to make sure that the server started and that
it is operational. Hence, if the monitor operation does not
succeed within the start operation timeout, the apache resource
will end with an error status.
The monitor operation by default loads the server status page
which depends on the mod_status module and the corresponding
configuration file (usually /etc/apache2/mod_status.conf).
Make sure that the server status page works and that the access
is allowed *only* from localhost (address 127.0.0.1).
See the statusurl and testregex attributes for more details.
See also http://httpd.apache.org/
</longdesc>
<shortdesc lang="en">Manages an Apache Web server instance</shortdesc>
<parameters>
<parameter name="configfile" required="0" unique="1">
<longdesc lang="en">
The full pathname of the Apache configuration file.
This file is parsed to provide defaults for various other
resource agent parameters.
</longdesc>
<shortdesc lang="en">configuration file path</shortdesc>
<content type="string" default="$(detect_default_config)" />
</parameter>
<parameter name="httpd">
<longdesc lang="en">
The full pathname of the httpd binary (optional).
</longdesc>
<shortdesc lang="en">httpd binary path</shortdesc>
<content type="string" default="/usr/sbin/httpd" />
</parameter>
<parameter name="port" >
<longdesc lang="en">
A port number that we can probe for status information
using the statusurl.
This will default to the port number found in the
configuration file, or 80, if none can be found
in the configuration file.
</longdesc>
<shortdesc lang="en">httpd port</shortdesc>
<content type="integer" />
</parameter>
<parameter name="statusurl">
<longdesc lang="en">
The URL to monitor (the apache server status page by default).
If left unspecified, it will be inferred from
the apache configuration file.
If you set this, make sure that it succeeds *only* from the
localhost (127.0.0.1). Otherwise, it may happen that the cluster
complains about the resource being active on multiple nodes.
</longdesc>
<shortdesc lang="en">url name</shortdesc>
<content type="string" />
</parameter>
<parameter name="testregex">
<longdesc lang="en">
Regular expression to match in the output of statusurl.
Case insensitive.
</longdesc>
<shortdesc lang="en">monitor regular expression</shortdesc>
<content type="string" default="exists, but impossible to show in a human readable format (try grep testregex)"/>
</parameter>
<parameter name="client">
<longdesc lang="en">
Client to use to query to Apache. If not specified, the RA will
try to find one on the system. Currently, wget and curl are
supported. For example, you can set this parameter to "curl" if
you prefer that to wget.
</longdesc>
<shortdesc lang="en">http client</shortdesc>
<content type="string" default=""/>
</parameter>
<parameter name="testurl">
<longdesc lang="en">
URL to test. If it does not start with "http", then it's
considered to be relative to the Listen address.
</longdesc>
<shortdesc lang="en">test url</shortdesc>
<content type="string" />
</parameter>
<parameter name="testregex10">
<longdesc lang="en">
Regular expression to match in the output of testurl.
Case insensitive.
</longdesc>
<shortdesc lang="en">extended monitor regular expression</shortdesc>
<content type="string" />
</parameter>
<parameter name="testconffile">
<longdesc lang="en">
A file which contains test configuration. Could be useful if
you have to check more than one web application or in case sensitive
info should be passed as arguments (passwords). Furthermore,
using a config file is the only way to specify certain
parameters.
Please see README.webapps for examples and file description.
</longdesc>
<shortdesc lang="en">test configuration file</shortdesc>
<content type="string" />
</parameter>
<parameter name="testname">
<longdesc lang="en">
Name of the test within the test configuration file.
</longdesc>
<shortdesc lang="en">test name</shortdesc>
<content type="string" />
</parameter>
<parameter name="options">
<longdesc lang="en">
Extra options to apply when starting apache. See man httpd(8).
</longdesc>
<shortdesc lang="en">command line options</shortdesc>
<content type="string" />
</parameter>
<parameter name="envfiles">
<longdesc lang="en">
Files (one or more) which contain extra environment variables.
If you want to prevent script from reading the default file, set
this parameter to empty string.
</longdesc>
<shortdesc lang="en">environment settings files</shortdesc>
<content type="string" default="/etc/apache2/envvars"/>
</parameter>
<parameter name="use_ipv6">
<longdesc lang="en">
We will try to detect if the URL (for monitor) is IPv6, but if
that doesn't work set this to true to enforce IPv6.
</longdesc>
<shortdesc lang="en">use ipv6 with http clients</shortdesc>
<content type="boolean" default="false"/>
</parameter>
</parameters>
<actions>
<action name="start" timeout="40s" />
<action name="stop" timeout="60s" />
<action name="status" timeout="30s" />
<action name="monitor" depth="0" timeout="20s" interval="10" />
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="5" />
</actions>
</resource-agent>
END
return $OCF_SUCCESS
}
apache_validate_all() {
if [ -z "$HTTPD" ]; then
ocf_exit_reason "apache httpd program not found"
return $OCF_ERR_INSTALLED
fi
if [ ! -x "$HTTPD" ]; then
ocf_exit_reason "HTTPD $HTTPD not found or is not an executable!"
return $OCF_ERR_INSTALLED
fi
if [ ! -f $CONFIGFILE ]; then
ocf_exit_reason "Configuration file $CONFIGFILE not found!"
return $OCF_ERR_INSTALLED
fi
# validate testconffile/testurl before apache_monitor_10()
if [ -n "$TESTCONFFILE" ]; then
if [ ! -f "$TESTCONFFILE" ] || [ ! -r "$TESTCONFFILE" ]; then
ocf_exit_reason "Configuration file $TESTCONFFILE not found, or not readable."
return $OCF_ERR_INSTALLED
fi
else
if [ -n "$TESTURL" ]; then
# remove leading or trailing spaces/tabs
local temp=$(printf "$TESTURL" | sed -e 's/^[ \t]*//g' -e 's/[ \t]*$//g')
if [ -z "$temp" ]; then
ocf_exit_reason "testurl: \"$TESTURL\" seems to be an empty string?"
return $OCF_ERR_CONFIGURED
fi
fi
# FIXME: validate TESTREGEX10 will be needed if empty regex is not allow.
fi
ocf_mkstatedir root 755 `dirname $PidFile` || return $OCF_ERR_INSTALLED
return $OCF_SUCCESS
}
find_httpd_prog() {
case $0 in
*IBM*)
HTTPD=$IBMHTTPD
DefaultConfig=$DEFAULT_IBMCONFIG;;
*)
HTTPD=
for h in $HTTPDLIST
do
if [ -f $h -a -x $h ]; then
HTTPD=$h
break
fi
done
# Let the user know that the $HTTPD used is not the one (s)he specified via $OCF_RESKEY_httpd
if [ "X$OCF_RESKEY_httpd" != X -a "X$HTTPD" != X ]; then
ocf_log info "Using $HTTPD as HTTPD"
fi
DefaultConfig=$(detect_default_config)
;;
esac
}
apache_getconfig() {
# these variables are global
HTTPD="$OCF_RESKEY_httpd"
PORT="$OCF_RESKEY_port"
STATUSURL="$OCF_RESKEY_statusurl"
CONFIGFILE="$OCF_RESKEY_configfile"
OPTIONS="$OCF_RESKEY_options"
CLIENT=${OCF_RESKEY_client}
TESTREGEX=${OCF_RESKEY_testregex:-'</ *html *>'}
TESTURL="$OCF_RESKEY_testurl"
TESTREGEX10=${OCF_RESKEY_testregex10}
TESTCONFFILE="$OCF_RESKEY_testconffile"
TESTNAME="$OCF_RESKEY_testname"
: ${OCF_RESKEY_envfiles="/etc/apache2/envvars"}
source_envfiles $OCF_RESKEY_envfiles
if [ "X$HTTPD" = X -o ! -f "$HTTPD" -o ! -x "$HTTPD" ]; then
find_httpd_prog
fi
CONFIGFILE=${CONFIGFILE:-$DefaultConfig}
if [ -n "$HTTPD" ]; then
httpd_basename=`basename $HTTPD`
case $httpd_basename in
*-*) httpd_basename=`echo "$httpd_basename" | sed -e 's%\-.*%%'`;;
esac
fi
GetParams $CONFIGFILE
}
OCF_REQUIRED_PARAMS=""
OCF_REQUIRED_BINARIES=""
ocf_rarun $*
diff --git a/heartbeat/apache-conf.sh b/heartbeat/apache-conf.sh
index d94dbd3e8..291821716 100644
--- a/heartbeat/apache-conf.sh
+++ b/heartbeat/apache-conf.sh
@@ -1,196 +1,196 @@
#
# Common apache code
# (sourced by apache)
#
# Author: Alan Robertson
# Sun Jiang Dong
#
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
#
# License: GNU General Public License (GPL)
#
# Copyright: (C) 2002-2005 International Business Machines
#
source_envfiles() {
for f; do
[ -f "$f" -a -r "$f" ] &&
. "$f"
done
}
apachecat() {
awk '
function procline() {
split($0,a);
if( a[1]~/^[Ii]nclude$/ ) {
includedir=a[2];
gsub("\"","",includedir);
procinclude(includedir);
} else {
if( a[1]=="ServerRoot" ) {
rootdir=a[2];
gsub("\"","",rootdir);
}
print;
}
}
function printfile(infile, a) {
while( (getline<infile) > 0 ) {
procline();
}
close(infile);
}
function allfiles(dir, cmd,f) {
cmd="find -L "dir" -type f";
while( ( cmd | getline f ) > 0 ) {
printfile(f);
}
close(cmd);
}
function listfiles(pattern, cmd,f) {
cmd="ls "pattern" 2>/dev/null";
while( ( cmd | getline f ) > 0 ) {
printfile(f);
}
close(cmd);
}
function procinclude(spec) {
if( rootdir!="" && spec!~/^\// ) {
spec=rootdir"/"spec;
}
if( isdir(spec) ) {
allfiles(spec); # read all files in a directory (and subdirs)
} else {
listfiles(spec); # there could be jokers
}
}
function isdir(s) {
return !system("test -d \""s"\"");
}
{ procline(); }
' $1 |
sed 's/#.*//;s/[[:blank:]]*$//;s/^[[:blank:]]*//' |
grep -v '^$'
}
#
# set parameters (as shell vars) from our apache config file
#
get_apache_params() {
configfile=$1
shift 1
vars=$(echo "$@" | sed 's/ /,/g')
eval `
apachecat $configfile | awk -v vars="$vars" '
BEGIN{
split(vars,v,",");
for( i in v )
vl[i]=tolower(v[i]);
}
{
for( i in v )
if( tolower($1)==vl[i] ) {
print v[i]"="$2
delete vl[i]
break
}
}
'`
}
#
# Return the location(s) that are handled by the given handler
#
FindLocationForHandler() {
PerlScript='while (<>) {
/<Location "?([^ >"]+)/i && ($loc=$1);
'"/SetHandler +$2"'/i && print "$loc\n";
}'
apachecat $1 | perl -e "$PerlScript"
}
#
# Check if the port is valid
#
CheckPort() {
ocf_is_decimal "$1" && [ $1 -gt 0 ]
}
buildlocalurl() {
[ "x$Listen" != "x" ] &&
echo "http://${Listen}" ||
echo "${LOCALHOST}:${PORT}"
}
# the test url may need a local prefix (as specified in the
# apache Listen directive)
fixtesturl() {
echo $test_url | grep -qs "^http" && return
test_url="`buildlocalurl`$test_url"
}
#
# Get all the parameters we need from the Apache config file
#
GetParams() {
ConfigFile=$1
if [ ! -f $ConfigFile ]; then
return $OCF_ERR_INSTALLED
fi
get_apache_params $ConfigFile ServerRoot PidFile Port Listen
case $PidFile in
/*) ;;
[[:alnum:]]*) PidFile=$ServerRoot/$PidFile;;
*)
# If the PidFile is not set in the config, set
# a default location.
PidFile=$HA_VARRUNDIR/${httpd_basename}.pid
# Force the daemon to use this location by using
# the -c option, which adds the PidFile directive
# as if it was in the configuration file to begin with.
PIDFILE_DIRECTIVE="true"
;;
esac
for p in "$PORT" "$Port" 80; do
if CheckPort "$p"; then
PORT="$p"
break
fi
done
echo $Listen | grep ':' >/dev/null || # Listen could be just port spec
Listen="localhost:$Listen"
#
# It's difficult to figure out whether the server supports
# the status operation.
# (we start our server with -DSTATUS - just in case :-))
#
# Typically (but not necessarily) the status URL is /server-status
#
# For us to think status will work, we have to have the following things:
#
# - The server-status handler has to be mapped to some URL somewhere
#
# We assume that:
#
# - the "main" web server at $PORT will also support it if we can find it
# somewhere in the file
# - it will be supported at the same URL as the one we find in the file
#
# If this doesn't work for you, then set the statusurl attribute.
#
if
[ "X$STATUSURL" = "X" ]
then
StatusURL=`FindLocationForHandler $1 server-status | tail -1`
STATUSURL="`buildlocalurl`$StatusURL"
fi
if ! test "$PidFile"; then
return $OCF_ERR_INSTALLED
else
return $OCF_SUCCESS
fi
}
diff --git a/heartbeat/asterisk b/heartbeat/asterisk
index e6318fc1d..c6586a32b 100755
--- a/heartbeat/asterisk
+++ b/heartbeat/asterisk
@@ -1,484 +1,484 @@
#!/bin/sh
#
#
# Asterisk
#
# Description: Manages an Asterisk PBX as an HA resource
#
# Authors: Martin Gerhard Loschwitz
# Florian Haas
#
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
# License: GNU General Public License (GPL)
#
# (c) 2011 hastexo Professional Services GmbH
#
# This resource agent is losely derived from the MySQL resource
# agent, which itself is made available to the public under the
# following copyright:
#
# (c) 2002-2005 International Business Machines, Inc.
# 2005-2010 Linux-HA contributors
#
# See usage() function below for more details ...
#
# OCF instance parameters:
# OCF_RESKEY_binary
# OCF_RESKEY_canary_binary
# OCF_RESKEY_config
# OCF_RESKEY_user
# OCF_RESKEY_group
# OCF_RESKEY_additional_parameters
# OCF_RESKEY_realtime
# OCF_RESKEY_maxfiles
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
# Fill in some defaults if no values are specified
HOSTOS=`uname`
if [ "X${HOSTOS}" = "XOpenBSD" ]; then
OCF_RESKEY_user_default="_asterisk"
OCF_RESKEY_group_default="_asterisk"
else
OCF_RESKEY_user_default="asterisk"
OCF_RESKEY_group_default="asterisk"
fi
OCF_RESKEY_binary_default="asterisk"
OCF_RESKEY_canary_binary_default="astcanary"
OCF_RESKEY_config_default="/etc/asterisk/asterisk.conf"
OCF_RESKEY_additional_parameters_default="-g -vvv"
OCF_RESKEY_realtime_default="false"
OCF_RESKEY_maxfiles_default="8192"
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
: ${OCF_RESKEY_canary_binary=${OCF_RESKEY_canary_binary_default}}
: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}}
: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}}
: ${OCF_RESKEY_group=${OCF_RESKEY_group_default}}
: ${OCF_RESKEY_additional_parameters=${OCF_RESKEY_additional_parameters_default}}
: ${OCF_RESKEY_realtime=${OCF_RESKEY_realtime_default}}
: ${OCF_RESKEY_maxfiles=${OCF_RESKEY_maxfiles_default}}
#######################################################################
usage() {
cat <<UEND
usage: $0 (start|stop|validate-all|meta-data|status|monitor)
$0 manages an Asterisk PBX as an HA resource.
The 'start' operation starts the database.
The 'stop' operation stops the database.
The 'validate-all' operation reports whether the parameters are valid
The 'meta-data' operation reports this RA's meta-data information
The 'status' operation reports whether the database is running
The 'monitor' operation reports whether the database seems to be working
UEND
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="asterisk">
<version>1.0</version>
<longdesc lang="en">
Resource agent for the Asterisk PBX.
May manage an Asterisk PBX telephony system or a clone set that
forms an Asterisk distributed device setup.
</longdesc>
<shortdesc lang="en">Manages an Asterisk PBX</shortdesc>
<parameters>
<parameter name="binary" unique="0" required="0">
<longdesc lang="en">
Location of the Asterisk PBX server binary
</longdesc>
<shortdesc lang="en">Asterisk PBX server binary</shortdesc>
<content type="string" default="${OCF_RESKEY_binary_default}" />
</parameter>
<parameter name="canary_binary" unique="0" required="0">
<longdesc lang="en">
Location of the Asterisk PBX Canary server binary
</longdesc>
<shortdesc lang="en">Asterisk PBX Canary server binary</shortdesc>
<content type="string" default="${OCF_RESKEY_canary_binary_default}" />
</parameter>
<parameter name="config" unique="0" required="0">
<longdesc lang="en">
The Asterisk PBX configuration file
</longdesc>
<shortdesc lang="en">Asterisk PBX config</shortdesc>
<content type="string" default="${OCF_RESKEY_config_default}" />
</parameter>
<parameter name="user" unique="0" required="0">
<longdesc lang="en">
User running Asterisk PBX daemon
</longdesc>
<shortdesc lang="en">Asterisk PBX user</shortdesc>
<content type="string" default="${OCF_RESKEY_user_default}" />
</parameter>
<parameter name="group" unique="0" required="0">
<longdesc lang="en">
Group running Asterisk PBX daemon (for logfile and directory permissions)
</longdesc>
<shortdesc lang="en">Asterisk PBX group</shortdesc>
<content type="string" default="${OCF_RESKEY_group_default}" />
</parameter>
<parameter name="additional_parameters" unique="0" required="0">
<longdesc lang="en">
Additional parameters which are passed to the Asterisk PBX on
startup (e.g. -L &lt;load&gt; or -M &lt;value&gt;).
</longdesc>
<shortdesc lang="en">Additional parameters to pass to the Asterisk PBX</shortdesc>
<content type="string" default="${OCF_RESKEY_additional_parameters_default}" />
</parameter>
<parameter name="realtime" unique="0" required="0">
<longdesc lang="en">
Determines whether the Asterisk PBX daemon will be run with
realtime priority or not.
</longdesc>
<shortdesc lang="en">Asterisk PBX realtime priority</shortdesc>
<content type="boolean" default="${OCF_RESKEY_realtime_default}" />
</parameter>
<parameter name="maxfiles" unique="0" required="0">
<longdesc lang="en">
Determines how many files the Asterisk PBX is allowed to open at
a time. Helps to fix the 'Too many open files' error message.
</longdesc>
<shortdesc lang="en">Asterisk PBX allowed MAXFILES</shortdesc>
<content type="integer" default="${OCF_RESKEY_maxfiles_default}" />
</parameter>
<parameter name="monitor_sipuri" unique="0" required="0">
<longdesc lang="en">
A SIP URI to check when monitoring. During monitor, the agent will
attempt to do a SIP OPTIONS request against this URI.
Requires the sipsak utility to be present and executable.
If unset, the agent does no SIP URI monitoring.
</longdesc>
<shortdesc lang="en">SIP URI to check when monitoring</shortdesc>
<content type="string" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="20" />
<action name="stop" timeout="20" />
<action name="status" timeout="20" />
<action name="monitor" timeout="30" interval="20" />
<action name="validate-all" timeout="5" />
<action name="meta-data" timeout="5" />
</actions>
</resource-agent>
END
}
#######################################################################
# Convenience functions
asterisk_rx() {
# if $HOME is set, asterisk -rx writes a .asterisk_history there
(
unset HOME
ocf_run $OCF_RESKEY_binary -r -s $ASTRUNDIR/asterisk.ctl -x "$1"
)
}
#######################################################################
# Functions invoked by resource manager actions
asterisk_validate() {
local rc
check_binary $OCF_RESKEY_binary
check_binary pgrep
if [ -n "$OCF_RESKEY_monitor_sipuri" ]; then
check_binary sipsak
fi
# A config file on shared storage that is not available
# during probes is OK.
if [ ! -f $OCF_RESKEY_config ]; then
if ! ocf_is_probe; then
ocf_log err "Config $OCF_RESKEY_config doesn't exist"
return $OCF_ERR_INSTALLED
fi
ocf_log warn "Config $OCF_RESKEY_config not available during a probe"
fi
getent passwd $OCF_RESKEY_user >/dev/null 2>&1
rc=$?
if [ $rc -ne 0 ]; then
ocf_log err "User $OCF_RESKEY_user doesn't exist"
return $OCF_ERR_INSTALLED
fi
getent group $OCF_RESKEY_group >/dev/null 2>&1
rc=$?
if [ $rc -ne 0 ]; then
ocf_log err "Group $OCF_RESKEY_group doesn't exist"
return $OCF_ERR_INSTALLED
fi
true
}
asterisk_status() {
local pid
local rc
if [ ! -f $ASTRUNDIR/asterisk.pid ]; then
ocf_log info "Asterisk PBX is not running"
return $OCF_NOT_RUNNING
fi
pid=`cat $ASTRUNDIR/asterisk.pid`
ocf_run kill -s 0 $pid
rc=$?
if [ $rc -eq 0 ]; then
if ocf_is_true "$OCF_RESKEY_realtime"; then
astcanary_pid=`pgrep -d " " -f "astcanary $ASTRUNDIR/alt.asterisk.canary.tweet.tweet.tweet"`
if [ ! "$astcanary_pid" ]; then
ocf_log err "Asterisk PBX is running but astcanary is not although it should"
return $OCF_ERR_GENERIC
fi
else
return $OCF_SUCCESS
fi
else
ocf_log info "Asterisk PBX not running: removing old PID file"
rm -f $ASTRUNDIR/asterisk.pid
return $OCF_NOT_RUNNING
fi
}
asterisk_monitor() {
local rc
asterisk_status
rc=$?
# If status returned an error, return that immediately
if [ $rc -ne $OCF_SUCCESS ]; then
return $rc
fi
# Check whether connecting to asterisk is possible
asterisk_rx 'core show channels count'
rc=$?
if [ $rc -ne 0 ]; then
if [ "$__OCF_ACTION" = "start" ]; then
ocf_log info "Asterisk PBX not running yet"
return $OCF_NOT_RUNNING;
else
ocf_log err "Failed to connect to the Asterisk PBX"
return $OCF_ERR_GENERIC;
fi
fi
# Optionally check the monitor URI with sipsak
# The return values:
# 0 means that a 200 was received.
# 1 means something else then 1xx or 2xx was received.
# 2 will be returned on local errors like non resolvable names
# or wrong options combination.
# 3 will be returned on remote errors like socket errors
# (e.g. icmp error), redirects without a contact header or
# simply no answer (timeout).
# This can also happen if sipsak is run too early after asterisk
# start.
if [ -n "$OCF_RESKEY_monitor_sipuri" ]; then
ocf_run sipsak -s "$OCF_RESKEY_monitor_sipuri"
rc=$?
case "$rc" in
1|2) return $OCF_ERR_GENERIC;;
3) return $OCF_NOT_RUNNING;;
esac
fi
ocf_log debug "Asterisk PBX monitor succeeded"
return $OCF_SUCCESS
}
asterisk_start() {
local asterisk_extra_params
local dir
local rc
asterisk_status
rc=$?
if [ $rc -eq $OCF_SUCCESS ]; then
ocf_log info "Asterisk PBX already running"
return $OCF_SUCCESS
fi
# If Asterisk is not already running, make sure there is no
# old astcanary instance when the new asterisk starts. To
# achieve this, kill old astcanary instances belonging to
# this $ASTRUNDIR.
# Find out PIDs of running astcanaries
astcanary_pid=`pgrep -d " " -f "astcanary $ASTRUNDIR/alt.asterisk.canary.tweet.tweet.tweet"`
# If there are astcanaries running that belong to $ASTRUNDIR,
# kill them.
if [ "$astcanary_pid" ]; then
for i in $astcanary_pid; do ocf_run kill -s KILL $astcanary_pid; done
fi
for dir in $ASTRUNDIR $ASTLOGDIR $ASTLOGDIR/cdr-csv $ASTLOGDIR/cdr-custom; do
if [ ! -d "$dir" ]; then
ocf_run install -d -o $OCF_RESKEY_user -g $OCF_RESKEY_group $dir \
|| exit $OCF_ERR_GENERIC
fi
# Regardless of whether we just created the directory or it
# already existed, check whether it is writable by the configured
# user
if ! su -s /bin/sh - $OCF_RESKEY_user -c "test -w $dir"; then
ocf_log err "Directory $dir is not writable by $OCF_RESKEY_user"
exit $OCF_ERR_PERM
fi
done
# set MAXFILES
ulimit -n $OCF_RESKEY_maxfiles
# Determine whether Asterisk PBX is supposed to run in Realtime mode
# or not and make asterisk daemonize automatically
if ocf_is_true "$OCF_RESKEY_realtime"; then
asterisk_extra_params="-F -p"
else
asterisk_extra_params="-F"
fi
ocf_run ${OCF_RESKEY_binary} -G $OCF_RESKEY_group -U $OCF_RESKEY_user \
-C $OCF_RESKEY_config \
$OCF_RESKEY_additional_parameters \
$asterisk_extra_params
rc=$?
if [ $rc -ne 0 ]; then
ocf_log err "Asterisk PBX start command failed: $rc"
exit $OCF_ERR_GENERIC
fi
# Spin waiting for the server to come up.
# Let the CRM/LRM time us out if required
while true; do
asterisk_monitor
rc=$?
[ $rc -eq $OCF_SUCCESS ] && break
if [ $rc -ne $OCF_NOT_RUNNING ]; then
ocf_log err "Asterisk PBX start failed"
exit $OCF_ERR_GENERIC
fi
sleep 2
done
ocf_log info "Asterisk PBX started"
return $OCF_SUCCESS
}
asterisk_stop() {
local pid
local astcanary_pid
local rc
asterisk_status
rc=$?
if [ $rc -eq $OCF_NOT_RUNNING ]; then
ocf_log info "Asterisk PBX already stopped"
return $OCF_SUCCESS
fi
pid=`cat $ASTRUNDIR/asterisk.pid`
ocf_run kill -s TERM $pid
rc=$?
if [ $rc -ne 0 ]; then
ocf_log err "Asterisk PBX couldn't be stopped"
exit $OCF_ERR_GENERIC
fi
# stop waiting
shutdown_timeout=15
if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
shutdown_timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000)-5))
fi
count=0
while [ $count -lt $shutdown_timeout ]; do
asterisk_status
rc=$?
if [ $rc -eq $OCF_NOT_RUNNING ]; then
break
fi
count=`expr $count + 1`
sleep 1
ocf_log debug "Asterisk PBX still hasn't stopped yet. Waiting ..."
done
asterisk_status
rc=$?
if [ $rc -ne $OCF_NOT_RUNNING ]; then
# SIGTERM didn't help either, try SIGKILL
ocf_log info "Asterisk PBX failed to stop after ${shutdown_timeout}s using SIGTERM. Trying SIGKILL ..."
ocf_run kill -s KILL $pid
fi
# After killing asterisk, stop astcanary
if ocf_is_true "$OCF_RESKEY_realtime"; then
astcanary_pid=`pgrep -d " " -f "astcanary $ASTRUNDIR/alt.asterisk.canary.tweet.tweet.tweet"`
if [ "$astcanary_pid" ]; then
for i in $astcanary_pid; do ocf_run kill -s KILL $astcanary_pid; done
fi
fi
ocf_log info "Asterisk PBX stopped"
return $OCF_SUCCESS
}
#######################################################################
case "$1" in
meta-data) meta_data
exit $OCF_SUCCESS;;
usage|help) usage
exit $OCF_SUCCESS;;
esac
# Anything except meta-data and help must pass validation
asterisk_validate || exit $?
# Now that validate has passed and we can be sure to be able to read
# the config file, set convenience variables
ASTRUNDIR=`grep astrundir $OCF_RESKEY_config | awk '/^astrundir/ {print $3}'`
ASTLOGDIR=`grep astlogdir $OCF_RESKEY_config | awk '/^astlogdir/ {print $3}'`
# What kind of method was invoked?
case "$1" in
start) asterisk_start;;
stop) asterisk_stop;;
status) asterisk_status;;
monitor) asterisk_monitor;;
validate-all) ;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED;;
esac
diff --git a/heartbeat/dnsupdate b/heartbeat/dnsupdate
index 2db2aa45c..d8347996a 100755
--- a/heartbeat/dnsupdate
+++ b/heartbeat/dnsupdate
@@ -1,276 +1,276 @@
#!/bin/bash
#
#
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
# License: GNU General Public License v2
#
# Copyright (c) 2014 SUSE Linux Products GmbH, Lars Marowsky-Brée
# All Rights Reserved.
#
#######################################################################
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
# TODO:
# - Should setting CNAMEs be supported?
# - Should multiple A records be supported?
usage() {
cat <<-!
usage: $0 {start|stop|status|monitor|meta-data|validate-all}
!
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="dnsupdate">
<version>1.0</version>
<longdesc lang="en">
This resource agent manages IP take-over via dynamic DNS updates.
</longdesc>
<shortdesc lang="en">IP take-over via dynamic DNS update</shortdesc>
<parameters>
<parameter name="hostname" unique="1" required="1">
<longdesc lang="en">
The hostname whose IP address will need to be updated.
</longdesc>
<shortdesc lang="en">Hostname to update</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="ip" unique="0" required="1">
<longdesc lang="en">
IP address to set.
</longdesc>
<shortdesc lang="en">IP address to set</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="ttl" unique="0" required="0">
<longdesc lang="en">
Time to live, in seconds, for the DNS record. This
affects how soon DNS updates propagate. It should be
a reasonable compromise between update speed and DNS
server load.
If using booth, the ticket timeout is a good start.
</longdesc>
<shortdesc lang="en">TTL for the DNS record</shortdesc>
<content type="integer" default="300" />
</parameter>
<parameter name="keyfile" unique="0" required="0">
<longdesc lang="en">
The file containing the shared secret needed to update
the DNS record. Please see the nsupdate man page for
the exact syntax.
</longdesc>
<shortdesc lang="en">nsupdate key file</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="server" unique="0" required="0">
<longdesc lang="en">
Which DNS server to send these updates for. When no
server is provided, this defaults to the master server
for the correct zone.
</longdesc>
<shortdesc lang="en">DNS server to contact</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="serverport" unique="0" required="0">
<longdesc lang="en">
Port number on the DNS server.
Note: due to a limitation in the nsupdate command, this option will only
take effect if you also specify the DNS server!
</longdesc>
<shortdesc lang="en">Port number on the DNS server</shortdesc>
<content type="integer" default="53" />
</parameter>
<parameter name="nsupdate_opts" unique="0" required="0">
<longdesc lang="en">
Additional options to be passed to nsupdate.
</longdesc>
<shortdesc lang="en">Additional nsupdate options</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="unregister_on_stop" unique="0" required="0">
<longdesc lang="en">
Whether or not to actively remove records on stop. This is not needed
for normal operation, since the site taking over the IP address will
delete all previous records.
</longdesc>
<shortdesc lang="en">Remove A record on stop</shortdesc>
<content type="boolean" default="false" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="30" />
<action name="stop" timeout="30" />
<action name="status" depth="0" timeout="30" interval="10" />
<action name="monitor" depth="0" timeout="30" interval="10" />
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="5" />
</actions>
</resource-agent>
END
}
dnsupdate_status() {
# The resource is considered active if the current IP
# address is returned as the only response.
local record=$(dig ${dig_opts} ${hostname}. A +short 2>/dev/null)
if [ "$record" = "$ip" ]; then
return $OCF_SUCCESS
fi
return $OCF_NOT_RUNNING
}
dnsupdate_monitor() {
if ocf_is_probe ; then
#
return $OCF_NOT_RUNNING
fi
dnsupdate_status
}
dnsupdate_start() {
if dnsupdate_status ; then
ocf_log info "$hostname already resolves to $ip"
return $OCF_SUCCESS
fi
ocf_log info "Updating DNS records for $hostname"
(
if [ -n "$dns_server" ]; then
echo "server ${dns_server} ${dns_serverport}"
fi
echo "update delete $hostname A"
echo "update add $hostname ${OCF_RESKEY_ttl} A $ip"
echo "send"
) | nsupdate ${nsupdate_opts}
dnsupdate_monitor
return $?
}
dnsupdate_stop() {
if ocf_is_true "${OCF_RESKEY_unregister_on_stop}" && dnsupdate_status ; then
ocf_log info "Unregistering $hostname with $ip from DNS server"
(
if [ -n "$dns_server" ]; then
echo "server ${dns_server} ${dns_serverport}"
fi
echo "update delete $hostname A $ip"
echo "send"
) | nsupdate ${nsupdate_opts}
dnsupdate_monitor
if [ $? -ne $OCF_NOT_RUNNING ]; then
ocf_log warn "Unregistering failed!"
# There's no point in invoking a stop failure
# here. If another site takes over the record,
# it'll delete all previous entries anyway.
fi
fi
return $OCF_SUCCESS
}
dnsupdate_validate() {
hostname=${OCF_RESKEY_hostname}
ip=${OCF_RESKEY_ip}
dig_opts=""
dns_server=${OCF_RESKEY_server}
: ${OCF_RESKEY_serverport:="53"}
dns_serverport=${OCF_RESKEY_serverport}
: ${OCF_RESKEY_ttl:="300"}
nsupdate_opts=${OCF_RESKEY_nsupdate_opts}
if [ -z "$nsupdate_opts" -a -n "$OCF_RESKEY_opts" ]; then
nsupdate_opts=${OCF_RESKEY_opts}
ocf_log warn "opts was never an advertised parameter, please use nsupdate_opts"
fi
if [ -z "$hostname" ]; then
ocf_log err "No hostname specified."
exit $OCF_ERR_CONFIGURED
fi
if [ -z "$ip" ]; then
ocf_log err "No IP specified."
exit $OCF_ERR_CONFIGURED
fi
if ! ocf_is_decimal $OCF_RESKEY_ttl ; then
ocf_log err "ttl $OCF_RESKEY_ttl is not valid"
exit $OCF_ERR_CONFIGURED
fi
if ! ocf_is_decimal $dns_serverport ; then
ocf_log err "serverport $dns_serverport is not valid"
exit $OCF_ERR_CONFIGURED
fi
dig_opts+=" -p ${dns_serverport}"
if [ -n "$dns_server" ]; then
dig_opts+=" @${dns_server}"
fi
if [ -n "$OCF_RESKEY_keyfile" ]; then
if [ ! -f ${OCF_RESKEY_keyfile} ]; then
ocf_log err "keyfile $OCF_RESKEY_keyfile does not exist"
exit $OCF_ERR_CONFIGURED
fi
nsupdate_opts+=" -k $OCF_RESKEY_keyfile"
fi
}
if [ $# -ne 1 ]; then
usage
exit $OCF_ERR_ARGS
fi
case $1 in
meta-data) meta_data
exit $OCF_SUCCESS
;;
usage) usage
exit $OCF_SUCCESS
;;
esac
check_binary dig
check_binary nsupdate
dnsupdate_validate
case $1 in
start) dnsupdate_start
;;
stop) dnsupdate_stop
;;
monitor) dnsupdate_monitor
;;
status) dnsupdate_status
;;
validate-all) # We've already run this
exit $OCF_SUCCESS
;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
exit $?
diff --git a/heartbeat/http-mon.sh b/heartbeat/http-mon.sh
index 46a482099..ce13ccd39 100644
--- a/heartbeat/http-mon.sh
+++ b/heartbeat/http-mon.sh
@@ -1,140 +1,140 @@
#
# General http monitor code
# (sourced by apache and httpmon)
#
# Author: Alan Robertson
# Sun Jiang Dong
#
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
#
# License: GNU General Public License (GPL)
#
# Copyright: (C) 2002-2005 International Business Machines
#
# default options for http clients
# NB: We _always_ test a local resource, so it should be
# safe to connect from the local interface.
bind_address="127.0.0.1"
curl_ipv6_opts=""
if ocf_is_true "$OCF_RESKEY_use_ipv6" || echo "$STATUSURL" | grep -qs "::"; then
bind_address="::1"
curl_ipv6_opts="-g"
fi
WGETOPTS="-O- -q -L --no-proxy --bind-address=$bind_address"
CURLOPTS="-o - -Ss -L --interface lo $curl_ipv6_opts"
request_url_header() {
which curl >/dev/null 2>&1
if [ $? -eq 0 ]; then
curl -IL --connect-timeout 5 --interface lo $curl_ipv6_opts "$1" > /dev/null 2>&1
return $?
fi
which wget >/dev/null 2>&1
if [ $? -eq 0 ]; then
local header=$(wget --server-response --spider --timeout=5 --tries=2 "$1" 2>&1)
if [ $? -eq 0 ]; then
return $OCF_SUCCESS
fi
# a 4xx error is still a server response.
echo "$header" | grep "HTTP/1.1 4.. " > /dev/null 2>&1
return $?
fi
return $OCF_ERR_GENERIC
}
#
# run the http client
#
curl_func() {
cl_opts="$CURLOPTS $test_httpclient_opts"
if [ x != "x$test_user" ]; then
echo "-u $test_user:$test_password" |
curl -K - $cl_opts "$1"
else
curl $cl_opts "$1"
fi
}
wget_func() {
auth=""
cl_opts="$WGETOPTS $test_httpclient_opts"
[ x != "x$test_user" ] &&
auth="--http-user=$test_user --http-passwd=$test_password"
wget $auth $cl_opts "$1"
}
#
# rely on whatever the user provided
userdefined() {
$test_httpclient $test_httpclient_opts "$1"
}
#
# find a good http client
#
findhttpclient() {
# prefer wget (for historical reasons)
if [ "x$CLIENT" != x ] && which "$CLIENT" >/dev/null 2>&1; then
echo "$CLIENT"
elif which wget >/dev/null 2>&1; then
echo "wget"
elif which curl >/dev/null 2>&1; then
echo "curl"
else
return 1
fi
}
gethttpclient() {
[ -z "$test_httpclient" ] &&
test_httpclient=$ourhttpclient
case "$test_httpclient" in
curl|wget) echo ${test_httpclient}_func;; #these are supported
*) echo userdefined;;
esac
}
# test configuration good?
is_testconf_sane() {
if [ "x$test_regex" = x -o "x$test_url" = x ]; then
ocf_log err "test regular expression or test url empty"
return 1
fi
if [ "x$test_user$test_password" != x -a \( "x$test_user" = x -o "x$test_password" = x \) ]; then
ocf_log err "bad user authentication for extended test"
return 1
fi
return 0
}
#
# read the test definition from the config
#
readtestconf() {
test_name="$1" # we look for this one or the first one if empty
lcnt=0
readdef=""
test_url="" test_regex=""
test_user="" test_password=""
test_httpclient="" test_httpclient_opts=""
while read key value; do
lcnt=$((lcnt+1))
if [ "$readdef" ]; then
case "$key" in
"url") test_url="$value" ;;
"user") test_user="$value" ;;
"password") test_password="$value" ;;
"client") test_httpclient="$value" ;;
"client_opts") test_httpclient_opts="$value" ;;
"match") test_regex="$value" ;;
"end") break ;;
"#"*|"") ;;
*) ocf_log err "$lcnt: $key: unknown keyword"; return 1 ;;
esac
else
[ "$key" = "test" ] &&
[ -z "$test_name" -o "$test_name" = "$value" ] &&
readdef=1
fi
done
}
diff --git a/heartbeat/ids b/heartbeat/ids
index 740f4500b..f2329336d 100755
--- a/heartbeat/ids
+++ b/heartbeat/ids
@@ -1,738 +1,738 @@
#!/bin/sh
#
#
# ids
#
# Description:
#
# OCF resource agent that manages an
# IBM Informix Dynamic Server (IDS) instance
# as an High-Availability resource.
####
#
# Author: Lars D. Forseth, <lars.forseth@de.ibm.com> or <lars@forseth.de>
# Created: May 25th 2007
# Last Modified: July 30th 2007
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
# License: GNU General Public License (GPL), Version 2 or later
# Copyright: (c) 2002 - 2007 International Business Machines, Inc.
#
# This code is inspired by the db2 OCF resource agent
# written by Alan Robertson, <alanr@unix.sh>
####
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
####
#
# Example usage as it would appear in /etc/ha.d/haresources:
# node1 192.168.0.1 ids::/informix::ids1::onconfig.ids1
#
#
# --> Note that passing dbname and sqltestquery in heartbeat version 1 style is not supported!
#
# See usage() function below for more details...
####
#
# OCF instance parameters:
# OCF_RESKEY_informixdir
# OCF_RESKEY_informixserver
# OCF_RESKEY_onconfig
# OCF_RESKEY_dbname
# OCF_RESKEY_sqltestquery
####
#
# Include general OCF functions and variables (such as OCF return codes).
#
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#
# Function that displays the usage of this script.
#
ids_usage() {
methods=`ids_methods`
methods=`echo $methods | tr ' ' '|'`
echo "
usage: $0 ($methods)
$0 manages an IBM Informix Dynamic Server (IDS) instance as an High-Availability resource.
The 'start' operation starts the database.
The 'stop' operation stops the database.
The 'status' operation reports whether the database is running
The 'monitor' operation reports whether the database seems to be working
The 'validate-all' operation reports whether the parameters are valid
The 'methods' operation lists the methods $0 supports
The 'usage' operation displays this text
The 'meta-data' operation returns the meta-data (in XML) of this resource script
"
}
#
# Function that displays the possible methods this script supports.
#
ids_methods() {
echo "
start
stop
status
monitor
validate-all
methods
usage
meta-data
"
}
#
# Function that displays the meta-data of this OCF resource agent.
#
ids_meta_data() {
cat <<-!
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="ids">
<version>1.0</version>
<longdesc lang="en">
OCF resource agent to manage an IBM Informix Dynamic Server (IDS) instance as an High-Availability resource.
</longdesc>
<shortdesc lang="en">Manages an Informix Dynamic Server (IDS) instance</shortdesc>
<parameters>
<parameter name="informixdir" required="0">
<longdesc lang="en">
The value the environment variable INFORMIXDIR has after a typical installation of IDS.
Or in other words: the path (without trailing '/') where IDS was installed to.
If this parameter is unspecified the script will try to get the value from the shell environment.
</longdesc>
<shortdesc lang="en">
INFORMIXDIR environment variable
</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="informixserver" required="0">
<longdesc lang="en">
The value the environment variable INFORMIXSERVER has after a typical installation of IDS.
Or in other words: the name of the IDS server instance to manage.
If this parameter is unspecified the script will try to get the value from the shell environment.
</longdesc>
<shortdesc lang="en">
INFORMIXSERVER environment variable
</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="onconfig" required="0">
<longdesc lang="en">
The value the environment variable ONCONFIG has after a typical installation of IDS.
Or in other words: the name of the configuration file for the IDS instance specified in INFORMIXSERVER.
The specified configuration file will be searched at '$INFORMIXDIR/etc/$ONCONFIG'.
If this parameter is unspecified the script will try to get the value from the shell environment.
</longdesc>
<shortdesc lang="en">
ONCONFIG environment variable
</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="dbname" required="0">
<longdesc lang="en">
This parameter defines which database to use in order to monitor the IDS instance.
If this parameter is unspecified the script will use the 'sysmaster' database as a default.
</longdesc>
<shortdesc lang="en">
database to use for monitoring, defaults to 'sysmaster'
</shortdesc>
<content type="string" default="sysmaster" />
</parameter>
<parameter name="sqltestquery" required="0">
<longdesc lang="en">
SQL test query to run on the database specified by the parameter 'dbname'
in order to monitor the IDS instance and determine if it's functional or not.
If this parameter is unspecified the script will use 'SELECT COUNT(*) FROM systables;' as a default.
</longdesc>
<shortdesc lang="en">
SQL test query to use for monitoring, defaults to 'SELECT COUNT(*) FROM systables;'
</shortdesc>
<content type="string" default="SELECT COUNT(*) FROM systables;" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="120" />
<action name="stop" timeout="120" />
<action name="status" timeout="60" />
<action name="monitor" depth="0" timeout="30" interval="10" />
<action name="validate-all" timeout="5" />
<action name="meta-data" timeout="5" />
<action name="methods" timeout="5" />
<action name="usage" timeout="5" />
</actions>
</resource-agent>
!
}
#
# Function that either forwards log messages to the ocf_log function
# provided by heartbeat or simply prints them to standard out via echo.
# This is determined by setting the variable "idslogger" to "echo" or "ocf".
# The default for "idslogger" is "ocf".
#
ids_log() {
# Where should the passed log messages be passed to,
# to the standard output via the echo command ("echo")
# or to the ocf_log function provided by heartbeat ("ocf") ?
# Default is "ocf".
idslogger="ocf"
# When the variable "idsdebug" is not set to "true"
# this function (ids_log) will not print any info message
# that has been forwarded to it!
# This is done in order to spare if-statements within the
# other functions in this script and to centralize the decision
# whether to have a chatty resource script or not... ;)
# Nevertheless, error messages will always be printed!
idsdebug=false
# Only continue if the two expected parameters
# are not empty and "idsdebug" is set to "true"
# or the message is of type "error".
if [ $# -eq 2 -a -n "$1" -a -n "$2" ]; then
if [ "$idsdebug" = "true" -o "$1" = "error" ]; then
case $idslogger in
# Print messages to stdout via echo command.
echo)
echo "`date +'%b %d %H:%M:%S'`: [$1] $2";;
# Pass messages to ocf_log function.
ocf|*)
ocf_log "$1" "$2";;
esac
fi
fi
}
#
# Function that prints the current values of important environment variables
# needed by the script and the IDS instance itself. The just mentioned variables are:
# - INFORMIXDIR
# - INFORMIXSERVER
# - ONCONFIG
# - PATH
# - LD_LIBRARY_PATH
#
ids_debug() {
ids_log info "called ids_debug"
ids_log info "INFORMIXDIR=$INFORMIXDIR"
ids_log info "OCF_RESKEY_informixdir=$OCF_RESKEY_informixdir"
ids_log info "INFORMIXSERVER=$INFORMIXSERVER"
ids_log info "OCF_RESKEY_informixserver=$OCF_RESKEY_informixserver"
ids_log info "ONCONFIG=$ONCONFIG"
ids_log info "OCF_RESKEY_onconfig=$OCF_RESKEY_onconfig"
ids_log info "PATH=$PATH"
ids_log info "LD_LIBRARY_PATH=$LD_LIBRARY_PATH"
ids_log info "dbname=$OCF_RESKEY_dbname"
ids_log info "sqltestquery=$OCF_RESKEY_sqltestquery"
ids_log info "this script is run as user: `id`"
ids_log info "...in the current working directory: `pwd`"
}
#
# Function that validates if the passed parameters are valid and sets them if valid.
# If the first three parameters have not been passed,
# this function checks whether they have been already set in the parent's shell environment.
# The variables that are checked and set (only the capitalized ones are set) are:
# - INFORMIXDIR
# - INFORMIXSERVER
# - ONCONFIG
# - PATH
# - LD_LIBRARY_PATH
# - dbname
# - sqltestquery
#
ids_validate() {
ids_log info "called ids_validate"
rc=$OCF_SUCCESS
# Check if INFORMIX, INFORMIXSERVER and ONCONFIG
# have been passed or set and validate them.
# OCF vars not passed, vars empty - set and export them to the shell environment.
if [ -n "$OCF_RESKEY_informixdir" -a -n "$OCF_RESKEY_informixserver" -a -n "$OCF_RESKEY_onconfig" ]; then
ids_log info "ids_validate: passed vars not empty"
INFORMIXDIR=$OCF_RESKEY_informixdir
export INFORMIXDIR
INFORMIXSERVER=$OCF_RESKEY_informixserver
export INFORMIXSERVER
ONCONFIG=$OCF_RESKEY_onconfig
export ONCONFIG
fi
# Check if INFORMIXDIR is non-empty and a directory (and if there was an error so far).
if [ $rc -eq $OCF_SUCCESS -a -n "$INFORMIXDIR" -a -d "$INFORMIXDIR" ]; then
ids_log info "ids_validate: INFORMIXDIR is valid: $INFORMIXDIR"
rc=$OCF_SUCCESS
else
ids_log error "ids_validate: INFORMIXDIR is invalid: $INFORMIXDIR"
rc=$OCF_ERR_ARGS
fi
# Check if INFORMIXSERVER is non-empty (and if there was an error so far).
if [ $rc -eq $OCF_SUCCESS -a -n "$INFORMIXSERVER" ]; then
ids_log info "ids_validate: INFORMIXSERVER is valid: $INFORMIXSERVER"
rc=$OCF_SUCCESS
else
ids_log error "ids_validate: INFORMIXSERVER is invalid: $INFORMIXSERVER"
rc=$OCF_ERR_ARGS
fi
# Check if ONCONFIG is non-empty and a non-empty file (and if there was an error so far).
if [ $rc -eq $OCF_SUCCESS -a -n "$ONCONFIG" -a -s "$INFORMIXDIR/etc/$ONCONFIG" ]; then
ids_log info "ids_validate: ONCONFIG is a non-empty file in: \$INFORMIXDIR/etc/\$ONCONFIG where ONCONFIG=$ONCONFIG"
rc=$OCF_SUCCESS
else
if [ -z "$ONCONFIG" -a -s "$INFORMIXDIR/etc/onconfig" ]; then
ONCONFIG="onconfig"
export ONCONFIG
ids_log info "ids_validate: ONCONFIG is a non-empty file in: \$INFORMIXDIR/etc/\$ONCONFIG where ONCONFIG=$ONCONFIG"
rc=$OCF_SUCCESS
else
if [ -z "$ONCONFIG" -a -s "$INFORMIXDIR/etc/onconfig.std" ]; then
ONCONFIG="onconfig.std"
export ONCONFIG
ids_log info "ids_validate: ONCONFIG is a non-empty file in: \$INFORMIXDIR/etc/\$ONCONFIG where ONCONFIG=$ONCONFIG"
rc=$OCF_SUCCESS
else
ids_log error "ids_validate: ONCONFIG is invalid, searched for it in: \$INFORMIXDIR/etc/\$ONCONFIG where ONCONFIG=$ONCONFIG"
rc=$OCF_ERR_ARGS
fi
fi
fi
# Check if the commands oninit, onstat, onmode and dbaccess exist in INFORMIXDIR/bin/
# and whether they are executable (do this only if there wasn't an error so far).
if [ $rc -eq $OCF_SUCCESS -a -x "$INFORMIXDIR/bin/oninit" -a -x "$INFORMIXDIR/bin/onstat" -a -x "$INFORMIXDIR/bin/onmode" -a -x "$INFORMIXDIR/bin/dbaccess" ]; then
ids_log info "ids_validate: oninit, onstat and dbaccess exist and are executable in: \$INFORMIXDIR/bin/"
rc=$OCF_SUCCESS
else
ids_log error "ids_validate: oninit, onstat or dbacces don't exist or they are not executable in: \$INFORMIXDIR/bin/"
rc=$OCF_ERR_PERM
fi
# Extend PATH and LD_LIBRARY_PATH as needed for the IDS instance to run properly
# BUT: only do this if it hasn't been done before! Otherwise PATH and LD_LIBRARY_PATH will
# keep on growing every time heartbeat calls the IDS resource agent script! ;)
echo $PATH | grep $INFORMIXDIR > /dev/null 2>&1
inpath=$?
if [ $rc -eq $OCF_SUCCESS -a $inpath -ne 0 ]; then
PATH="${INFORMIXDIR}/bin":${PATH}
export PATH
ids_log info "ids_validate: PATH did not contain INFORMIXDIR, added \$INFORMIXDIR/bin"
else
ids_log info "ids_validate: INFORMIXDIR already in PATH, where PATH=$PATH"
fi
echo $LD_LIBRARY_PATH | grep $INFORMIXDIR > /dev/null 2>&1
inldlibpath=$?
if [ $rc -eq $OCF_SUCCESS -a $inldlibpath -ne 0 ]; then
LD_LIBRARY_PATH="${INFORMIXDIR}/lib:${INFORMIXDIR}/lib/esql"
export LD_LIBRARY_PATH
ids_log info "ids_validate: LD_LIBRARY_PATH did not contain INFORMIXDIR, added \$INFORMIXDIR/lib and \$INFORMIXDIR/lib/esql, added them"
else
ids_log info "ids_validate: INFORMIXDIR already in LD_LIBRARY_PATH, where LD_LIBRARY_PATH=$LD_LIBRARY_PATH"
fi
# Check if dbname is empty (and if there was an error so far)
# if it is empty, assign default.
if [ $rc -eq $OCF_SUCCESS -a -n "$OCF_RESKEY_dbname" ]; then
ids_log info "ids_validate: dbname is valid: $OCF_RESKEY_dbname"
rc=$OCF_SUCCESS
else
ids_log info "ids_validate: dbname is invalid: $OCF_RESKEY_dbname"
ids_log info "ids_validate: using 'sysmaster' as default..."
OCF_RESKEY_dbname="sysmaster"
export OCF_RESKEY_dbname
rc=$OCF_SUCCESS
fi
# Check if sqltestquery is empty (and if there was an error so far)
# if it is empty, assign default.
if [ $rc -eq $OCF_SUCCESS -a -n "$OCF_RESKEY_sqltestquery" ]; then
ids_log info "ids_validate: sqltestquery is valid: $OCF_RESKEY_sqltestquery"
rc=$OCF_SUCCESS
else
ids_log info "ids_validate: sqltestquery is invalid: $OCF_RESKEY_sqltestquery"
ids_log info "ids_validate: using 'SELECT COUNT(*) FROM systables;' as default..."
OCF_RESKEY_sqltestquery="SELECT COUNT(*) FROM systables;"
export OCF_RESKEY_sqltestquery
rc=$OCF_SUCCESS
fi
# Return exit status code.
return $rc
}
#
# Function that start the IDS instance and reports any error that
# may occur while starting.
#
ids_start() {
ids_log info "called ids_start"
# Get current status of IDS instance.
ids_status
stat=$?
case $stat in
# IDS instance already running - exit with success.
$OCF_SUCCESS)
ids_log info "ids_start: IDS instance already running: $stat"
rc=$OCF_SUCCESS;;
# IDS instance in undefined state - exit with error.
$OCF_ERR_GENERIC)
ids_log error "ids_start: IDS instance in undefined state: $stat"
ids_debug
rc=$OCF_ERR_GENERIC;;
# IDS instance not running - try to start it.
$OCF_NOT_RUNNING)
ids_log info "ids_start: executing 'oninit' now..."
oninit
stat=$?
ids_log info "ids_start: done executing 'oninit': $stat"
# The oninit command terminated successfully - check new state of IDS instance.
if [ $stat -eq 0 ]; then
# Initialize stat with failure exit status code.
stat=$OCF_ERR_GENERIC
# Endless loop that waits until IDS is completely online.
# If IDS takes too long to achieve this or even hangs,
# the timeout settings of heartbeat will cancel the starting
# of the IDS resource and therefore terminate the loop.
while [ $stat -ne $OCF_SUCCESS ]; do
ids_status
stat=$?
done
# IDS is running now - success.
ids_log info "ids_start: IDS instance successfully started: $stat"
rc=$OCF_SUCCESS
# The oninit command terminated with an error - starting the IDS resource failed!
else
ids_log error "ids_start: starting IDS instance failed: $stat"
ids_debug
rc=$OCF_ERR_GENERIC
fi
;;
# Unexpected state - return OCF_ERR_UNIMPLEMENTED error.
*)
ids_log error "ids_start: unexpected state returned from ids_status: $stat"
ids_debug
rc=$OCF_ERR_UNIMPLEMENTED;;
esac
# Return exit status code.
return $rc
}
#
# Function that stops the IDS instance and reports any error that
# may occur while stopping.
#
ids_stop() {
ids_log info "caled ids_stop"
ids_status
stat=$?
case $stat in
# IDS instance is not running - success stopping it.
$OCF_NOT_RUNNING)
ids_log info "ids_stop: IDS instance is not running: $stat"
rc=$OCF_SUCCESS;;
# IDS instance is in an undefined state - exit with error.
$OCF_ERR_GENERIC)
ids_log error "ids_stop: IDS instance in undefined state: $stat"
ids_debug
rc=$OCF_ERR_GENERIC;;
# IDS instance is running - try to stop it.
$OCF_SUCCESS)
ids_log info "ids_stop: running 'onmode -kuy' now..."
onmode -kuy
stat=$?
ids_log info "ids_stop: done running 'onmode -kuy' now: $stat"
# The onmode command terminated successfully - check new state of the IDS instance.
if [ $stat -eq 0 ]; then
ids_status
stat=$?
# New state is: not running - success.
if [ $stat -eq $OCF_NOT_RUNNING ]; then
ids_log info "ids_stop: IDS instance successfully stopped: $stat"
rc=$OCF_SUCCESS
# New state is: running or even undefined - failure!
else
ids_log error "ids_stop: stopping IDS instance failed: $stat"
ids_debug
rc=$OCF_ERR_GENERIC
fi
# The onmode command terminated with an error - stopping the IDS resource failed!
else
ids_log error "ids_stop: stopping IDS instance (by executing 'onmode -kuy') failed: $stat"
ids_debug
rc=$OCF_ERR_GENERIC
fi
;;
# Unexpected state - return OCF_ERR_UNIMPLEMENTED error.
*)
ids_log error "ids_stop: unexpected state returned from ids_status: $stat"
ids_debug
rc=$OCF_ERR_UNIMPLEMENTED;;
esac
# Return exit status code indicating whether IDS was successfully stopped or not.
return $rc
}
#
# Function that determines the current status/state of the IDS instance,
# meaning whether it is running (the case when output of "onstat -" contains "On-Line"),
# not running (the case when output of "onstat -" contains "shared memory not initialized")
# or in an undefined state (the case output of "onstat -" contains "Quiescent", "Single-User", or other).
# If the IDS instance is declared running the exit status code will indicate succes, otherwise failure of course.
#
ids_status() {
ids_log info "called ids_status"
# Get current status from the onstat tool and store it.
stat=`onstat -`
case $stat in
# IDS instance is running.
*"On-Line"*)
ids_log info "ids_status: IDS instance running: $stat"
rc=$OCF_SUCCESS;;
# IDS instance is not running.
*"shared memory not initialized"*)
ids_log info "ids_status: IDS instance not running: $stat"
rc=$OCF_NOT_RUNNING;;
# IDS instance is in an undefined state!
*)
ids_log error "ids_status: IDS instance status undefined: $stat"
rc=$OCF_ERR_GENERIC;;
esac
# Return exit status code (ergo current status of the IDS instance) to caller
return $rc
}
#
# Function that monitors the current status _and_ funtionality of the IDS instance.
# First the state of the instance is determined. If it is running, a sql test query is
# executed on the database. If the sql test query executes sucessfully, the instance's
# status is rechecked and if it is still running, the script terminates with an exit
# status code indicating success. If any of the above described steps fails,
# the script terminates with an error.
#
ids_monitor() {
ids_log info "called ids_monitor"
ids_status
stat=$?
case $stat in
# IDS instance is not running - monitoring failed.
$OCF_NOT_RUNNING)
ids_log info "ids_monitor: IDS instance is not running: $stat"
rc=$OCF_NOT_RUNNING;;
# IDS instance in an undefined state - exit with error.
$OCF_ERR_GENERIC)
ids_log error "ids_monitor: IDS instance in undefined state: $stat"
ids_debug
rc=$OCF_ERR_GENERIC;;
# IDS instance is running - try to execute the sql test query and recheck state.
$OCF_SUCCESS)
ids_log info "ids_monitor: IDS instance is running (before executing sql test query)"
ids_log info "ids_monitor: running sql test query now..."
echo $OCF_RESKEY_sqltestquery | dbaccess $OCF_RESKEY_dbname - > /dev/null 2>&1
stat=$?
ids_log info "ids_monitor: done running sql test query now: $stat"
# The sql test query terminated successfully - check the new state of the IDS instance.
if [ $stat -eq 0 ]; then
ids_status
stat=$?
# New state is: running - success.
if [ $stat -eq $OCF_SUCCESS ]; then
ids_log info "ids_monitor: successfully ran sql test query on IDS instance: $stat"
rc=$OCF_SUCCESS
# New state is: not running or even undefined - failure!
else
ids_log error "ids_monitor: running sql test query on IDS instance failed: $stat"
ids_debug
rc=$OCF_ERR_GENERIC
fi
# The sql test query terminated with an error - exit with error!
else
ids_log error "ids_monitor: running sql test query on IDS instance failed: $stat"
ids_debug
rc=$OCF_ERR_GENERIC
fi
;;
# Unexpected state - return OCF_ERR_UNIMPLEMENTED error!
*)
ids_log error "ids_monitor: unexpected state returned from ids_status: $stat"
ids_debug
rc=$OCF_ERR_UNIMPLEMENTED;;
esac
# Return exit status code indicating whether IDS is running and functional or not.
return $rc
}
###
#
# M A I N S E C T I O N
#
###
case "$1" in
usage)
ids_usage
exit $?;;
meta-data)
ids_meta_data
exit $?;;
esac
# Validate configuration (parameters and such)
# passed to this script and only process the method parameter
# if the configuration is valid! Otherwise exit with OCF_ERR_ARGS error code.
# Only check configuration when given method is not "validate-all",
# as in case of "validate-all" the configuration will be checked anyway! ;)
if [ "$1" != "validate-all" ]; then
ids_validate
valid=$?
ids_log info "main section: validated ids RA configuration, result: $valid"
# Configuration invalid - terminate with error message.
if [ $valid -ne $OCF_SUCCESS ]; then
ids_log error "main section: terminating script due to invalid configuration"
ids_debug
exit $OCF_ERR_ARGS
fi
fi
# Configuration valid or method equals to "validate-all" - react depending on called method.
case "$1" in
start)
ids_start
exit $?;;
stop)
ids_stop
exit $?;;
status)
ids_status
exit $?;;
monitor)
ids_monitor
exit $?;;
validate-all)
ids_validate
exit $?;;
methods)
ids_methods
exit $?;;
*)
ids_log error "mainsection: no or invalid command supplied: $1"
exit $OCF_ERR_UNIMPLEMENTED;;
esac
###############################################################################
diff --git a/heartbeat/lxc b/heartbeat/lxc
index dca5fe413..d2f544f27 100755
--- a/heartbeat/lxc
+++ b/heartbeat/lxc
@@ -1,373 +1,374 @@
#!/bin/bash
-# Should now conform to guidelines: http://www.linux-ha.org/doc/dev-guides/ra-dev-guide.html
+# Should now conform to guidelines:
+# https://github.com/ClusterLabs/resource-agents/blob/master/doc/dev-guides/ra-dev-guide.asc
#
-# LXC (Linux Containers) OCF RA.
+# LXC (Linux Containers) OCF RA.
# Used to cluster enable the start, stop and monitoring of a LXC container.
#
# Copyright (c) 2011 AkurIT.com.au, Darren Thompson
# All Rights Reserved.
#
# Without limiting the rights of the original copyright holders
# This resource is licensed under GPL version 2
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
# OCF instance parameters
# OCF_RESKEY_container
# OCF_RESKEY_config
# OCF_RESKEY_log
# OCF_RESKEY_use_screen
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
# Defaults
OCF_RESKEY_log_default="${HA_RSCTMP}/${OCF_RESOURCE_INSTANCE}.log"
OCF_RESKEY_use_screen_default="false"
: ${OCF_RESKEY_log=${OCF_RESKEY_log_default}}
: ${OCF_RESKEY_use_screen=${OCF_RESKEY_use_screen_default}}
# Set default TRANS_RES_STATE (temporary file to "flag" if resource was stated but not stopped)
TRANS_RES_STATE="${HA_RSCTMP}/${OCF_RESOURCE_INSTANCE}.state"
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="lxc" version="0.1">
<version>0.1</version>
<longdesc lang="en">Allows LXC containers to be managed by the cluster.
If the container is running "init" it will also perform an orderly shutdown.
It is 'assumed' that the 'init' system will do an orderly shudown if presented with a 'kill -PWR' signal.
On a 'sysvinit' this would require the container to have an inittab file containing "p0::powerfail:/sbin/init 0"
I have absolutly no idea how this is done with 'upstart' or 'systemd', YMMV if your container is using one of them.</longdesc>
<shortdesc lang="en">Manages LXC containers</shortdesc>
<parameters>
<parameter name="container" required="1" unique="1">
<longdesc lang="en">The unique name for this 'Container Instance' e.g. 'test1'.</longdesc>
<shortdesc lang="en">Container Name</shortdesc>
<content type="string" default=""/>
</parameter>
<parameter name="config" required="1" unique="0">
<longdesc lang="en">Absolute path to the file holding the specific configuration for this container e.g. '/etc/lxc/test1/config'.</longdesc>
<shortdesc lang="en">The LXC config file.</shortdesc>
<content type="string" default=""/>
</parameter>
<parameter name="log" required="0" unique="0">
<longdesc lang="en">Absolute path to the container log file</longdesc>
<shortdesc lang="en">Container log file</shortdesc>
<content type="string" default="${OCF_RESKEY_log_default}"/>
</parameter>
<parameter name="use_screen" required="0" unique="0">
<longdesc lang="en">Provides the option of capturing the 'root console' from the container and showing it on a separate screen.
To see the screen output run 'screen -r {container name}'
The default value is set to 'false', change to 'true' to activate this option</longdesc>
<shortdesc lang="en">Use 'screen' for container 'root console' output</shortdesc>
<content type="boolean" default="${OCF_RESKEY_use_screen_default}"/>
</parameter>
</parameters>
<actions>
<action name="start" timeout="10" />
<action name="stop" timeout="30" />
<action name="monitor" timeout="20" interval="60" depth="0"/>
<action name="validate-all" timeout="20" />
<action name="meta-data" timeout="5" />
</actions>
</resource-agent>
END
}
LXC_usage() {
cat <<END
usage: $0 {start|stop|monitor|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END
}
lxc_version() {
if have_binary lxc-version ; then
lxc-version | cut -d' ' -f 3
else # since LXC 1.0.0 all commands knows about --version
lxc-info --version
fi
}
cgroup_mounted() {
# test cgroup_mounted, mount if required
# Various possible overrides to cgroup mount point.
# If kernel supplies cgroup mount point, prefer it.
CGROUP_MOUNT_POINT=/var/run/lxc/cgroup
CGROUP_MOUNT_NAME=lxc
CGROUP_MOUNTED=false
[[ -d /sys/fs/cgroup ]] && CGROUP_MOUNT_POINT=/sys/fs/cgroup CGROUP_MOUNT_NAME=cgroup
# If cgroup already mounted, use it no matter where it is.
# If multiple cgroup mounts, prefer the one named lxc if any.
eval `awk 'BEGIN{P="";N=""}END{print("cgmp="P" cgmn="N)}($3=="cgroup"){N=$1;P=$2;if($1="lxc")exit}' /proc/mounts`
[[ "$cgmn" && "$cgmp" && -d "$cgmp" ]] && CGROUP_MOUNT_POINT=$cgmp CGROUP_MOUNT_NAME=$cgmn CGROUP_MOUNTED=true
$CGROUP_MOUNTED || {
[[ -d $CGROUP_MOUNT_POINT ]] || ocf_run mkdir -p $CGROUP_MOUNT_POINT
ocf_run mount -t cgroup $CGROUP_MOUNT_NAME $CGROUP_MOUNT_POINT
}
echo 1 >${CGROUP_MOUNT_POINT}/notify_on_release
return 0
}
LXC_start() {
# put this here as it's so long it gets messy later!!!
if ocf_is_true $OCF_RESKEY_use_screen; then
STARTCMD="screen -dmS ${OCF_RESKEY_container} lxc-start -f ${OCF_RESKEY_config} -n ${OCF_RESKEY_container} -o ${OCF_RESKEY_log}"
else
STARTCMD="lxc-start -f ${OCF_RESKEY_config} -n ${OCF_RESKEY_container} -o ${OCF_RESKEY_log} -d"
fi
LXC_status
if [ $? -eq $OCF_SUCCESS ]; then
ocf_log debug "Resource $OCF_RESOURCE_INSTANCE is already running"
ocf_run touch "${TRANS_RES_STATE}" || exit $OCF_ERR_GENERIC
return $OCF_SUCCESS
fi
cgroup_mounted
if [ $? -ne 0 ]; then
ocf_log err "Unable to find cgroup mount"
exit $OCF_ERR_GENERIC
fi
ocf_log info "Starting" ${OCF_RESKEY_container}
ocf_run ${STARTCMD} || exit $OCF_ERR_GENERIC
# Spin on status, wait for the cluster manager to time us out if
# we fail
while ! LXC_status; do
ocf_log info "Container ${OCF_RESKEY_container} has not started, waiting"
sleep 1
done
ocf_run touch "${TRANS_RES_STATE}" || exit $OCF_ERR_GENERIC
return $OCF_SUCCESS
}
LXC_stop() {
local shutdown_timeout
local now
LXC_status
if [ $? -eq $OCF_NOT_RUNNING ]; then
ocf_log debug "Resource $OCF_RESOURCE_INSTANCE is already stopped"
ocf_run rm -f $TRANS_RES_STATE
return $OCF_SUCCESS
fi
cgroup_mounted
if [ $? -ne 0 ]; then
ocf_log err "Unable to find cgroup mount"
exit $OCF_ERR_GENERIC
fi
# If the container is running "init" and is able to perform and orderly shutdown, then it should be done.
# It is 'assumed' that the 'init' system will do an orderly shudown if presented with a 'kill -PWR' signal.
# On a 'sysvinit' this would require the container to have an inittab file containing "p0::powerfail:/sbin/init 0"
declare -i PID=0
declare CMD=
# LXC prior 1.0.0
if ocf_version_cmp "`lxc_version`" 1.0.0 ; then
# This should work for traditional 'sysvinit' and 'upstart'
lxc-ps --name "${OCF_RESKEY_container}" -- -C init -o pid,comm |while read CN PID CMD ;do
[ $PID -gt 1 ] || continue
[ "$CMD" = "init" ] || continue
ocf_log info "Sending \"OS shut down\" instruction to" ${OCF_RESKEY_container} "as it was found to be using \"sysV init\" or \"upstart\""
kill -PWR $PID
done
# This should work for containers using 'systemd' instead of 'init'
lxc-ps --name "${OCF_RESKEY_container}" -- -C systemd -o pid,comm |while read CN PID CMD ;do
[ $PID -gt 1 ] || continue
[ "$CMD" = "systemd" ] || continue
ocf_log info "Sending \"OS shut down\" instruction to" ${OCF_RESKEY_container} "as it was found to be using \"systemd\""
kill -PWR $PID
done
else
PID=$(lxc-info --name "${OCF_RESKEY_container}" -p -H)
# If there is no PID the container seems to be down which
# shouldn't happen.
if [ $PID -eq 0 ]; then
ocf_log err "${OCF_RESKEY_container} seems to run, but has no PID."
exit $OCF_ERR_GENERIC
fi
# Rescue me.
if [ $PID -eq 1 ]; then
ocf_log err "${OCF_RESKEY_container} seems to run with PID 1 which cannot be."
PID=0
CMD=
else
CMD=$(ps -o comm= -p $PID)
fi
# This should work for traditional 'sysvinit' and 'upstart'
if [ "$CMD" = "init" ]; then
ocf_log info "Sending \"OS shut down\" instruction to" ${OCF_RESKEY_container} "as it was found to be using \"sysV init\" or \"upstart\""
kill -PWR $PID
fi
# This should work for containers using 'systemd' instead of 'init'
if [ "$CMD" = "systemd" ]; then
ocf_log info "Sending \"OS shut down\" instruction to" ${OCF_RESKEY_container} "as it was found to be using \"systemd\""
kill -PWR $PID
fi
fi
# The "shutdown_timeout" we use here is the operation
# timeout specified in the CIB, minus 5 seconds
now=$(date +%s)
shutdown_timeout=$(( $now + ($OCF_RESKEY_CRM_meta_timeout/1000) -5 ))
# Loop on status until we reach $shutdown_timeout
while [ $now -lt $shutdown_timeout ]; do
LXC_status
status=$?
case $status in
"$OCF_NOT_RUNNING")
ocf_run rm -f $TRANS_RES_STATE
return $OCF_SUCCESS
;;
"$OCF_SUCCESS")
# Container is still running, keep waiting (until
# shutdown_timeout expires)
sleep 1
;;
*)
# Something went wrong. Bail out and
# resort to forced stop (destroy).
break;
esac
now=$(date +%s)
done
# If the container is still running, it will be stopped now. regardless of state!
# LXC prior 1.0.0
if ocf_version_cmp "`lxc_version`" 1.0.0 ; then
ocf_run lxc-stop -n ${OCF_RESKEY_container} || exit $OCF_ERR_GENERIC
else
ocf_run lxc-stop -n ${OCF_RESKEY_container} -k || exit $OCF_ERR_GENERIC
fi
ocf_log info "Container" ${OCF_RESKEY_container} "stopped"
ocf_run rm -f $TRANS_RES_STATE
return $OCF_SUCCESS
}
LXC_status() {
# run lxc-info with -s option for LXC-0.7.5 or later
local lxc_info_opt="-s"
ocf_version_cmp "`lxc_version`" 0.7.5 && lxc_info_opt=""
S=`lxc-info $lxc_info_opt -n ${OCF_RESKEY_container}`
ocf_log debug "State of ${OCF_RESKEY_container}: $S"
if [[ "${S##* }" = "RUNNING" ]] ; then
return $OCF_SUCCESS
fi
return $OCF_NOT_RUNNING
}
LXC_monitor() {
LXC_status && return $OCF_SUCCESS
if [ -f $TRANS_RES_STATE ]; then
ocf_log err "${OCF_RESKEY_container} is not running, but state file ${TRANS_RES_STATE} exists."
exit $OCF_ERR_GENERIC
fi
return $OCF_NOT_RUNNING
}
LXC_validate() {
# Quick check that all required attributes are set
if [ -z "${OCF_RESKEY_container}" ]; then
ocf_log err "LXC container name not set!"
exit $OCF_ERR_CONFIGURED
fi
if [ -z "${OCF_RESKEY_config}" ]; then
ocf_log err "LXC configuration filename name not set!"
exit $OCF_ERR_CONFIGURED
fi
# Tests that apply only to non-probes
if ! ocf_is_probe; then
if ! [ -f "${OCF_RESKEY_config}" ]; then
ocf_log err "LXC configuration file \"${OCF_RESKEY_config}\" missing or not found!"
exit $OCF_ERR_INSTALLED
fi
if ocf_is_true $OCF_RESKEY_use_screen; then
check_binary screen
fi
check_binary lxc-start
check_binary lxc-stop
if ocf_version_cmp "`lxc_version`" 1.0.0 ; then
check_binary lxc-ps
fi
check_binary lxc-info
fi
return $OCF_SUCCESS
}
if [ $# -ne 1 ]; then
LXC_usage
exit $OCF_ERR_ARGS
fi
case $__OCF_ACTION in
meta-data) meta_data
exit $OCF_SUCCESS
;;
usage|help) LXC_usage
exit $OCF_SUCCESS
;;
esac
# Everything except usage and meta-data must pass the validate test
LXC_validate
case $__OCF_ACTION in
start) LXC_start;;
stop) LXC_stop;;
status) LXC_status;;
monitor) LXC_monitor;;
validate-all) ;;
*) LXC_usage
ocf_log err "$0 was called with unsupported arguments: $*"
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
rc=$?
ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
exit $rc
diff --git a/heartbeat/mysql b/heartbeat/mysql
index be914d3b2..e76213b0d 100755
--- a/heartbeat/mysql
+++ b/heartbeat/mysql
@@ -1,1045 +1,1045 @@
#!/bin/sh
#
#
# MySQL
#
# Description: Manages a MySQL database as Linux-HA resource
#
# Authors: Alan Robertson: DB2 Script
# Jakub Janczak: rewrite as MySQL
# Andrew Beekhof: cleanup and import
# Sebastian Reitenbach: add OpenBSD defaults, more cleanup
# Narayan Newton: add Gentoo/Debian defaults
# Marian Marinov, Florian Haas: add replication capability
# Yves Trudeau, Baron Schwartz: add VIP support and improve replication
#
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
# License: GNU General Public License (GPL)
#
# (c) 2002-2005 International Business Machines, Inc.
# 2005-2010 Linux-HA contributors
#
# An example usage in /etc/ha.d/haresources:
# node1 10.0.0.170 mysql
#
# See usage() function below for more details...
#
# OCF instance parameters:
# OCF_RESKEY_binary
# OCF_RESKEY_client_binary
# OCF_RESKEY_config
# OCF_RESKEY_datadir
# OCF_RESKEY_user
# OCF_RESKEY_group
# OCF_RESKEY_test_table
# OCF_RESKEY_test_user
# OCF_RESKEY_test_passwd
# OCF_RESKEY_enable_creation
# OCF_RESKEY_additional_parameters
# OCF_RESKEY_log
# OCF_RESKEY_pid
# OCF_RESKEY_socket
# OCF_RESKEY_replication_user
# OCF_RESKEY_replication_passwd
# OCF_RESKEY_replication_port
# OCF_RESKEY_max_slave_lag
# OCF_RESKEY_evict_outdated_slaves
# OCF_RESKEY_reader_attribute
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
. ${OCF_FUNCTIONS_DIR}/mysql-common.sh
#######################################################################
usage() {
cat <<UEND
usage: $0 (start|stop|validate-all|meta-data|monitor|promote|demote|notify)
$0 manages a MySQL Database as an HA resource.
The 'start' operation starts the database.
The 'stop' operation stops the database.
The 'status' operation reports whether the database is running
The 'monitor' operation reports whether the database seems to be working
The 'promote' operation makes this mysql server run as master
The 'demote' operation makes this mysql server run as slave
The 'validate-all' operation reports whether the parameters are valid
UEND
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="mysql">
<version>1.0</version>
<longdesc lang="en">
Resource script for MySQL.
May manage a standalone MySQL database, a clone set with externally
managed replication, or a complete master/slave replication setup.
Note, when master/slave replication is in use, the resource must
be setup to use notifications. Set 'notify=true' in the metadata
attributes when defining a MySQL master/slave instance.
While managing replication, the default behavior is to use uname -n
values in the change master to command. Other IPs can be specified
manually by adding a node attribute \${INSTANCE_ATTR_NAME}_mysql_master_IP
giving the IP to use for replication. For example, if the mysql primitive
you are using is p_mysql, the attribute to set will be
p_mysql_mysql_master_IP.
</longdesc>
<shortdesc lang="en">Manages a MySQL database instance</shortdesc>
<parameters>
<parameter name="binary" unique="0" required="0">
<longdesc lang="en">
Location of the MySQL server binary
</longdesc>
<shortdesc lang="en">MySQL server binary</shortdesc>
<content type="string" default="${OCF_RESKEY_binary_default}" />
</parameter>
<parameter name="client_binary" unique="0" required="0">
<longdesc lang="en">
Location of the MySQL client binary
</longdesc>
<shortdesc lang="en">MySQL client binary</shortdesc>
<content type="string" default="${OCF_RESKEY_client_binary_default}" />
</parameter>
<parameter name="config" unique="0" required="0">
<longdesc lang="en">
Configuration file
</longdesc>
<shortdesc lang="en">MySQL config</shortdesc>
<content type="string" default="${OCF_RESKEY_config_default}" />
</parameter>
<parameter name="datadir" unique="0" required="0">
<longdesc lang="en">
Directory containing databases
</longdesc>
<shortdesc lang="en">MySQL datadir</shortdesc>
<content type="string" default="${OCF_RESKEY_datadir_default}" />
</parameter>
<parameter name="user" unique="0" required="0">
<longdesc lang="en">
User running MySQL daemon
</longdesc>
<shortdesc lang="en">MySQL user</shortdesc>
<content type="string" default="${OCF_RESKEY_user_default}" />
</parameter>
<parameter name="group" unique="0" required="0">
<longdesc lang="en">
Group running MySQL daemon (for logfile and directory permissions)
</longdesc>
<shortdesc lang="en">MySQL group</shortdesc>
<content type="string" default="${OCF_RESKEY_group_default}"/>
</parameter>
<parameter name="log" unique="0" required="0">
<longdesc lang="en">
The logfile to be used for mysqld.
</longdesc>
<shortdesc lang="en">MySQL log file</shortdesc>
<content type="string" default="${OCF_RESKEY_log_default}"/>
</parameter>
<parameter name="pid" unique="0" required="0">
<longdesc lang="en">
The pidfile to be used for mysqld.
</longdesc>
<shortdesc lang="en">MySQL pid file</shortdesc>
<content type="string" default="${OCF_RESKEY_pid_default}"/>
</parameter>
<parameter name="socket" unique="0" required="0">
<longdesc lang="en">
The socket to be used for mysqld.
</longdesc>
<shortdesc lang="en">MySQL socket</shortdesc>
<content type="string" default="${OCF_RESKEY_socket_default}"/>
</parameter>
<parameter name="test_table" unique="0" required="0">
<longdesc lang="en">
Table to be tested in monitor statement (in database.table notation)
</longdesc>
<shortdesc lang="en">MySQL test table</shortdesc>
<content type="string" default="${OCF_RESKEY_test_table_default}" />
</parameter>
<parameter name="test_user" unique="0" required="0">
<longdesc lang="en">
MySQL test user, must have select privilege on test_table
</longdesc>
<shortdesc lang="en">MySQL test user</shortdesc>
<content type="string" default="${OCF_RESKEY_test_user_default}" />
</parameter>
<parameter name="test_passwd" unique="0" required="0">
<longdesc lang="en">
MySQL test user password
</longdesc>
<shortdesc lang="en">MySQL test user password</shortdesc>
<content type="string" default="${OCF_RESKEY_test_passwd_default}" />
</parameter>
<parameter name="enable_creation" unique="0" required="0">
<longdesc lang="en">
If the MySQL database does not exist, it will be created
</longdesc>
<shortdesc lang="en">Create the database if it does not exist</shortdesc>
<content type="boolean" default="${OCF_RESKEY_enable_creation_default}"/>
</parameter>
<parameter name="additional_parameters" unique="0" required="0">
<longdesc lang="en">
Additional parameters which are passed to the mysqld on startup.
(e.g. --skip-external-locking or --skip-grant-tables)
</longdesc>
<shortdesc lang="en">Additional parameters to pass to mysqld</shortdesc>
<content type="string" default="${OCF_RESKEY_additional_parameters_default}"/>
</parameter>
<parameter name="replication_user" unique="0" required="0">
<longdesc lang="en">
MySQL replication user. This user is used for starting and stopping
MySQL replication, for setting and resetting the master host, and for
setting and unsetting read-only mode. Because of that, this user must
have SUPER, REPLICATION SLAVE, REPLICATION CLIENT, PROCESS and RELOAD
privileges on all nodes within the cluster. Mandatory if you define a
master-slave resource.
</longdesc>
<shortdesc lang="en">MySQL replication user</shortdesc>
<content type="string" default="${OCF_RESKEY_replication_user_default}" />
</parameter>
<parameter name="replication_passwd" unique="0" required="0">
<longdesc lang="en">
MySQL replication password. Used for replication client and slave.
Mandatory if you define a master-slave resource.
</longdesc>
<shortdesc lang="en">MySQL replication user password</shortdesc>
<content type="string" default="${OCF_RESKEY_replication_passwd_default}" />
</parameter>
<parameter name="replication_port" unique="0" required="0">
<longdesc lang="en">
The port on which the Master MySQL instance is listening.
</longdesc>
<shortdesc lang="en">MySQL replication port</shortdesc>
<content type="string" default="${OCF_RESKEY_replication_port_default}" />
</parameter>
<parameter name="max_slave_lag" unique="0" required="0">
<longdesc lang="en">
The maximum number of seconds a replication slave is allowed to lag
behind its master. Do not set this to zero. What the cluster manager
does in case a slave exceeds this maximum lag is determined by the
evict_outdated_slaves parameter.
</longdesc>
<shortdesc lang="en">Maximum time (seconds) a MySQL slave is allowed
to lag behind a master</shortdesc>
<content type="integer" default="${OCF_RESKEY_max_slave_lag_default}"/>
</parameter>
<parameter name="evict_outdated_slaves" unique="0" required="0">
<longdesc lang="en">
If set to true, any slave which is more than max_slave_lag seconds
behind the master has its MySQL instance shut down. If this parameter
is set to false in a primitive or clone resource, it is simply
ignored. If set to false in a master/slave resource, then exceeding
the maximum slave lag will merely push down the master preference so
the lagging slave is never promoted to the new master.
</longdesc>
<shortdesc lang="en">Determines whether to shut down badly lagging
slaves</shortdesc>
<content type="boolean" default="${OCF_RESKEY_evict_outdated_slaves_default}" />
</parameter>
<parameter name="reader_attribute" unique="1" required="0">
<longdesc lang="en">
An attribute that the RA can manage to specify whether a node
can be read from. This node attribute will be 1 if it's fine to
read from the node, and 0 otherwise (for example, when a slave
has lagged too far behind the master).
A typical example for the use of this attribute would be to tie
a set of IP addresses to MySQL slaves that can be read from.
This parameter is only meaningful in master/slave set configurations.
</longdesc>
<shortdesc lang="en">Sets the node attribute that determines
whether a node is usable for clients to read from.</shortdesc>
<content type="string" default="${OCF_RESKEY_reader_attribute_default}" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="120" />
<action name="stop" timeout="120" />
<action name="status" timeout="60" />
<action name="monitor" depth="0" timeout="30" interval="20" />
<action name="monitor" role="Master" depth="0" timeout="30" interval="10" />
<action name="monitor" role="Slave" depth="0" timeout="30" interval="30" />
<action name="promote" timeout="120" />
<action name="demote" timeout="120" />
<action name="notify" timeout="90" />
<action name="validate-all" timeout="5" />
<action name="meta-data" timeout="5" />
</actions>
</resource-agent>
END
}
# Convenience functions
set_read_only() {
# Sets or unsets read-only mode. Accepts one boolean as its
# optional argument. If invoked without any arguments, defaults to
# enabling read only mode. Should only be set in master/slave
# setups.
# Returns $OCF_SUCCESS if the operation succeeds, or
# $OCF_ERR_GENERIC if it fails.
local ro_val
if ocf_is_true $1; then
ro_val="on"
else
ro_val="off"
fi
ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
-e "SET GLOBAL read_only=${ro_val}"
}
get_read_only() {
# Check if read-only is set
local read_only_state
read_only_state=`$MYSQL $MYSQL_OPTIONS_REPL \
-e "SHOW VARIABLES" | grep -w read_only | awk '{print $2}'`
if [ "$read_only_state" = "ON" ]; then
return 0
else
return 1
fi
}
is_slave() {
# Determine whether the machine is currently running as a MySQL
# slave, as determined per SHOW SLAVE STATUS. Returns 1 if SHOW
# SLAVE STATUS creates an empty result set, 0 otherwise.
local rc
local tmpfile
# Check whether this machine should be slave
if ! ocf_is_ms || ! get_read_only; then
return 1
fi
get_slave_info
rc=$?
rm -f $tmpfile
if [ $rc -eq 0 ]; then
# show slave status is not empty
# Is there a master_log_file defined? (master_log_file is deleted
# by reset slave
if [ "$master_log_file" ]; then
return 0
else
return 1
fi
else
# "SHOW SLAVE STATUS" returns an empty set if instance is not a
# replication slave
return 1
fi
}
parse_slave_info() {
# Extracts field $1 from result of "SHOW SLAVE STATUS\G" from file $2
sed -ne "s/^.* $1: \(.*\)$/\1/p" < $2
}
get_slave_info() {
# Warning: this sets $tmpfile and LEAVE this file! You must delete it after use!
local mysql_options
if [ "$master_log_file" -a "$master_host" ]; then
# variables are already defined, get_slave_info has been run before
return $OCF_SUCCESS
else
tmpfile=`mktemp ${HA_RSCTMP}/check_slave.${OCF_RESOURCE_INSTANCE}.XXXXXX`
$MYSQL $MYSQL_OPTIONS_REPL \
-e 'SHOW SLAVE STATUS\G' > $tmpfile
if [ -s $tmpfile ]; then
master_host=`parse_slave_info Master_Host $tmpfile`
master_user=`parse_slave_info Master_User $tmpfile`
master_port=`parse_slave_info Master_Port $tmpfile`
master_log_file=`parse_slave_info Master_Log_File $tmpfile`
master_log_pos=`parse_slave_info Read_Master_Log_Pos $tmpfile`
slave_sql=`parse_slave_info Slave_SQL_Running $tmpfile`
slave_io=`parse_slave_info Slave_IO_Running $tmpfile`
last_errno=`parse_slave_info Last_Errno $tmpfile`
secs_behind=`parse_slave_info Seconds_Behind_Master $tmpfile`
ocf_log debug "MySQL instance running as a replication slave"
else
# Instance produced an empty "SHOW SLAVE STATUS" output --
# instance is not a slave
ocf_exit_reason "check_slave invoked on an instance that is not a replication slave."
return $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
fi
}
check_slave() {
# Checks slave status
local rc new_master
get_slave_info
rc=$?
if [ $rc -eq 0 ]; then
# Did we receive an error other than max_connections?
if [ $last_errno -ne 0 -a $last_errno -ne "$MYSQL_TOO_MANY_CONN_ERR" ]; then
# Whoa. Replication ran into an error. This slave has
# diverged from its master. Make sure this resource
# doesn't restart in place.
ocf_exit_reason "MySQL instance configured for replication, but replication has failed."
ocf_log err "See $tmpfile for details"
# Just pull the reader VIP away, killing MySQL here would be pretty evil
# on a loaded server
set_reader_attr 0
exit $OCF_SUCCESS
fi
# If we got max_connections, let's remove the vip
if [ $last_errno -eq "$MYSQL_TOO_MANY_CONN_ERR" ]; then
set_reader_attr 0
exit $OCF_SUCCESS
fi
if [ "$slave_io" != 'Yes' ]; then
# Not necessarily a bad thing. The master may have
# temporarily shut down, and the slave may just be
# reconnecting. A warning can't hurt, though.
ocf_log warn "MySQL Slave IO threads currently not running."
# Sanity check, are we at least on the right master
new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1`
if [ "$master_host" != "$new_master" ]; then
# Not pointing to the right master, not good, removing the VIPs
set_reader_attr 0
exit $OCF_SUCCESS
fi
fi
if [ "$slave_sql" != 'Yes' ]; then
# We don't have a replication SQL thread running. Not a
# good thing. Try to recoved by restarting the SQL thread
# and remove reader vip. Prevent MySQL restart.
ocf_exit_reason "MySQL Slave SQL threads currently not running."
ocf_log err "See $tmpfile for details"
# Remove reader vip
set_reader_attr 0
# try to restart slave
ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
-e "START SLAVE"
# Return success to prevent a restart
exit $OCF_SUCCESS
fi
if ocf_is_true $OCF_RESKEY_evict_outdated_slaves; then
# We're supposed to bail out if we lag too far
# behind. Let's check our lag.
if [ "$secs_behind" = "NULL" ] || [ $secs_behind -gt $OCF_RESKEY_max_slave_lag ]; then
ocf_exit_reason "MySQL Slave is $secs_behind seconds behind master (allowed maximum: $OCF_RESKEY_max_slave_lag)."
ocf_log err "See $tmpfile for details"
# Remove reader vip
set_reader_attr 0
exit $OCF_ERR_INSTALLED
fi
elif ocf_is_ms; then
# Even if we're not set to evict lagging slaves, we can
# still use the seconds behind master value to set our
# master preference.
local master_pref
master_pref=$((${OCF_RESKEY_max_slave_lag}-${secs_behind}))
if [ $master_pref -lt 0 ]; then
# Sanitize a below-zero preference to just zero
master_pref=0
fi
$CRM_MASTER -v $master_pref
fi
# is the slave ok to have a VIP on it
if [ "$secs_behind" = "NULL" ] || [ $secs_behind -gt $OCF_RESKEY_max_slave_lag ]; then
set_reader_attr 0
else
set_reader_attr 1
fi
ocf_log debug "MySQL instance running as a replication slave"
rm -f $tmpfile
else
# Instance produced an empty "SHOW SLAVE STATUS" output --
# instance is not a slave
# TODO: Needs to handle when get_slave_info will return too many connections error
rm -f $tmpfile
ocf_exit_reason "check_slave invoked on an instance that is not a replication slave."
exit $OCF_ERR_GENERIC
fi
}
set_master() {
local new_master master_log_file master_log_pos
local master_params
new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1`
# Keep replication position
get_slave_info
if [ "$master_log_file" -a "$new_master" = "$master_host" ]; then
# master_params=", MASTER_LOG_FILE='$master_log_file', \
# MASTER_LOG_POS=$master_log_pos"
ocf_log info "Kept master pos for $master_host : $master_log_file:$master_log_pos"
rm -f $tmpfile
return
else
master_log_file=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f2`
master_log_pos=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f3`
if [ -n "$master_log_file" -a -n "$master_log_pos" ]; then
master_params=", MASTER_LOG_FILE='$master_log_file', \
MASTER_LOG_POS=$master_log_pos"
ocf_log info "Restored master pos for $new_master : $master_log_file:$master_log_pos"
fi
fi
# Informs the MySQL server of the master to replicate
# from. Accepts one mandatory argument which must contain the host
# name of the new master host. The master must either be unchanged
# from the laste master the slave replicated from, or freshly
# reset with RESET MASTER.
ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
-e "CHANGE MASTER TO MASTER_HOST='$new_master', \
MASTER_PORT=$OCF_RESKEY_replication_port, \
MASTER_USER='$OCF_RESKEY_replication_user', \
MASTER_PASSWORD='$OCF_RESKEY_replication_passwd' $master_params"
rm -f $tmpfile
}
unset_master(){
# Instructs the MySQL server to stop replicating from a master
# host.
# If we're currently not configured to be replicating from any
# host, then there's nothing to do. But we do log a warning as
# no-one but the CRM should be touching the MySQL master/slave
# configuration.
if ! is_slave; then
ocf_log warn "Attempted to unset the replication master on an instance that is not configured as a replication slave"
return $OCF_SUCCESS
fi
local tmpfile
tmpfile=`mktemp ${HA_RSCTMP}/unset_master.${OCF_RESOURCE_INSTANCE}.XXXXXX`
# At this point, the master is read only so there should not be much binlogs to transfer
# Let's wait for the last bits
while true; do
$MYSQL $MYSQL_OPTIONS_REPL \
-e 'SHOW PROCESSLIST\G' > $tmpfile
if grep -i 'Waiting for master to send event' $tmpfile >/dev/null; then
ocf_log info "MySQL slave has finished reading master binary log"
break
fi
if grep -i 'Reconnecting after a failed master event read' $tmpfile >/dev/null; then
ocf_log info "Master is down, no more binary logs to come"
break
fi
if grep -i 'Connecting to master' $tmpfile >/dev/null; then
ocf_log info "Master is down, no more binary logs to come"
break
fi
if ! grep 'system user' $tmpfile >/dev/null; then
ocf_log info "Slave is not running - not waiting to finish"
break
fi
sleep 1
done
# Now, stop the slave I/O thread and wait for relay log
# processing to complete
ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
-e "STOP SLAVE IO_THREAD"
if [ $? -gt 0 ]; then
ocf_exit_reason "Error stopping slave IO thread"
exit $OCF_ERR_GENERIC
fi
while true; do
$MYSQL $MYSQL_OPTIONS_REPL \
-e 'SHOW PROCESSLIST\G' > $tmpfile
if grep -i 'Has read all relay log' $tmpfile >/dev/null; then
ocf_log info "MySQL slave has finished processing relay log"
break
fi
if ! grep -q 'system user' $tmpfile; then
ocf_log info "Slave not runnig - not waiting to finish"
break
fi
ocf_log info "Waiting for MySQL slave to finish processing relay log"
sleep 1
done
rm -f $tmpfile
# Now, stop all slave activity and unset the master host
ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
-e "STOP SLAVE"
if [ $? -gt 0 ]; then
ocf_exit_reason "Error stopping rest slave threads"
exit $OCF_ERR_GENERIC
fi
ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
-e "RESET SLAVE /*!50516 ALL */;"
if [ $? -gt 0 ]; then
ocf_exit_reason "Failed to reset slave"
exit $OCF_ERR_GENERIC
fi
}
# Start replication as slave
start_slave() {
ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
-e "START SLAVE"
}
# Set the attribute controlling the readers VIP
set_reader_attr() {
local curr_attr_value
curr_attr_value=$(get_reader_attr)
if [ "$curr_attr_value" -ne "$1" ]; then
$CRM_ATTR -l reboot --name ${OCF_RESKEY_reader_attribute} -v $1
fi
}
# get the attribute controlling the readers VIP
get_reader_attr() {
local attr_value
local rc
attr_value=`$CRM_ATTR -l reboot --name ${OCF_RESKEY_reader_attribute} --query -q`
rc=$?
if [ "$rc" -eq "0" ]; then
echo $attr_value
else
echo -1
fi
}
# Stores data for MASTER STATUS from MySQL
update_data_master_status() {
master_status_file="${HA_RSCTMP}/master_status.${OCF_RESOURCE_INSTANCE}"
$MYSQL $MYSQL_OPTIONS_REPL -e "SHOW MASTER STATUS\G" > $master_status_file
}
# Returns the specified value from the stored copy of SHOW MASTER STATUS.
# should be call after update_data_master_status for tmpfile
# Arguments:
# $1 The value to get.
get_master_status() {
awk -v var="$1" '$1 == var ":" {print substr($0, index($0, ":") + 2)}' "$master_status_file"
}
# Determines what IP address is attached to the current host. The output of the
# crm_attribute command looks like this:
# scope=nodes name=IP value=10.2.2.161
# If the ${INSTANCE_ATTR_NAME}_MYSQL_MASTER_IP node attribute is not defined, fallback is to uname -n
# The ${INSTANCE_ATTR_NAME}_MYSQL_MASTER_IP is the IP address that will be used for the
# change master to command.
get_local_ip() {
local IP
IP=`$CRM_ATTR -l forever -n ${INSTANCE_ATTR_NAME}_mysql_master_IP -q -G`
if [ ! $? -eq 0 ]; then
uname -n
else
echo $IP
fi
}
#######################################################################
# Functions invoked by resource manager actions
mysql_monitor() {
local rc
local status_loglevel="err"
# Set loglevel to info during probe
if ocf_is_probe; then
status_loglevel="info"
fi
mysql_common_status $status_loglevel
rc=$?
# TODO: check max connections error
# If status returned an error, return that immediately
if [ $rc -ne $OCF_SUCCESS ]; then
return $rc
fi
if [ $OCF_CHECK_LEVEL -gt 0 -a -n "$OCF_RESKEY_test_table" ]; then
# Check if this instance is configured as a slave, and if so
# check slave status
if is_slave; then
check_slave
fi
# Check for test table
ocf_run -q $MYSQL $MYSQL_OPTIONS_TEST \
-e "SELECT COUNT(*) FROM $OCF_RESKEY_test_table"
rc=$?
if [ $rc -ne 0 ]; then
ocf_exit_reason "Failed to select from $test_table";
return $OCF_ERR_GENERIC;
fi
fi
if ocf_is_ms && ! get_read_only; then
ocf_log debug "MySQL monitor succeeded (master)";
return $OCF_RUNNING_MASTER
else
ocf_log debug "MySQL monitor succeeded";
return $OCF_SUCCESS
fi
}
mysql_start() {
local rc
if ocf_is_ms; then
# Initialize the ReaderVIP attribute, monitor will enable it
set_reader_attr 0
fi
mysql_common_status info
if [ $? = $OCF_SUCCESS ]; then
ocf_log info "MySQL already running"
return $OCF_SUCCESS
fi
mysql_common_prepare_dirs
# Uncomment to perform permission clensing
# - not convinced this should be enabled by default
#
#chmod 0755 $OCF_RESKEY_datadir
#chown -R $OCF_RESKEY_user $OCF_RESKEY_datadir
#chgrp -R $OCF_RESKEY_group $OCF_RESKEY_datadir
mysql_extra_params=
if ocf_is_ms; then
mysql_extra_params="--skip-slave-start"
fi
mysql_common_start $mysql_extra_params
rc=$?
if [ $rc != $OCF_SUCCESS ]; then
return $rc
fi
if ocf_is_ms; then
# We're configured as a stateful resource. We must start as
# slave by default. At this point we don't know if the CRM has
# already promoted a master. So, we simply start in read only
# mode.
set_read_only on
# Now, let's see whether there is a master. We might be a new
# node that is just joining the cluster, and the CRM may have
# promoted a master before.
master_host=`echo $OCF_RESKEY_CRM_meta_notify_master_uname|tr -d " "`
if [ "$master_host" -a "$master_host" != ${NODENAME} ]; then
ocf_log info "Changing MySQL configuration to replicate from $master_host."
set_master
start_slave
if [ $? -ne 0 ]; then
ocf_exit_reason "Failed to start slave"
return $OCF_ERR_GENERIC
fi
else
ocf_log info "No MySQL master present - clearing replication state"
unset_master
fi
# We also need to set a master preference, otherwise Pacemaker
# won't ever promote us in the absence of any explicit
# preference set by the administrator. We choose a low
# greater-than-zero preference.
$CRM_MASTER -v 1
fi
# Initial monitor action
if [ -n "$OCF_RESKEY_test_table" -a -n "$OCF_RESKEY_test_user" -a -n "$OCF_RESKEY_test_passwd" ]; then
OCF_CHECK_LEVEL=10
fi
mysql_monitor
rc=$?
if [ $rc != $OCF_SUCCESS -a $rc != $OCF_RUNNING_MASTER ]; then
ocf_exit_reason "Failed initial monitor action"
return $rc
fi
ocf_log info "MySQL started"
return $OCF_SUCCESS
}
mysql_stop() {
if ocf_is_ms; then
# clear preference for becoming master
$CRM_MASTER -D
# Remove VIP capability
set_reader_attr 0
fi
mysql_common_stop
}
mysql_promote() {
local master_info
if ( ! mysql_common_status err ); then
return $OCF_NOT_RUNNING
fi
ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
-e "STOP SLAVE"
# Set Master Info in CIB, cluster level attribute
update_data_master_status
master_info="$(get_local_ip)|$(get_master_status File)|$(get_master_status Position)"
${CRM_ATTR_REPL_INFO} -v "$master_info"
rm -f $tmpfile
set_read_only off || return $OCF_ERR_GENERIC
# Existing master gets a higher-than-default master preference, so
# the cluster manager does not shuffle the master role around
# unnecessarily
$CRM_MASTER -v $((${OCF_RESKEY_max_slave_lag}+1))
# A master can accept reads
set_reader_attr 1
return $OCF_SUCCESS
}
mysql_demote() {
if ! mysql_common_status err; then
return $OCF_NOT_RUNNING
fi
# Return master preference to default, so the cluster manager gets
# a chance to select a new master
$CRM_MASTER -v 1
}
mysql_notify() {
# If not configured as a Stateful resource, we make no sense of
# notifications.
if ! ocf_is_ms; then
ocf_log info "This agent makes no use of notifications unless running in master/slave mode."
return $OCF_SUCCESS
fi
local type_op
type_op="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}"
ocf_log debug "Received $type_op notification."
case "$type_op" in
'pre-promote')
# Nothing to do now here, new replication info not yet published
;;
'post-promote')
# The master has completed its promotion. Now is a good
# time to check whether our replication slave is working
# correctly.
master_host=`echo $OCF_RESKEY_CRM_meta_notify_promote_uname|tr -d " "`
if [ "$master_host" = ${NODENAME} ]; then
ocf_log info "This will be the new master, ignoring post-promote notification."
else
ocf_log info "Resetting replication"
unset_master
if [ $? -ne 0 ]; then
return $OCF_ERR_GENERIC
fi
ocf_log info "Changing MySQL configuration to replicate from $master_host"
set_master
if [ $? -ne 0 ]; then
return $OCF_ERR_GENERIC
fi
start_slave
if [ $? -ne 0 ]; then
ocf_exit_reason "Failed to start slave"
return $OCF_ERR_GENERIC
fi
fi
return $OCF_SUCCESS
;;
'pre-demote')
demote_host=`echo $OCF_RESKEY_CRM_meta_notify_demote_uname|tr -d " "`
if [ $demote_host = ${NODENAME} ]; then
ocf_log info "post-demote notification for $demote_host"
set_read_only on
if [ $? -ne 0 ]; then
ocf_exit_reason "Failed to set read-only";
return $OCF_ERR_GENERIC;
fi
# Must kill all existing user threads because they are still Read/write
# in order for the slaves to complete the read of binlogs
local tmpfile
tmpfile=`mktemp ${HA_RSCTMP}/threads.${OCF_RESOURCE_INSTANCE}.XXXXXX`
$MYSQL $MYSQL_OPTIONS_REPL \
-e "SHOW PROCESSLIST" > $tmpfile
for thread in `awk '$0 !~ /Binlog Dump|system user|event_scheduler|SHOW PROCESSLIST/ && $0 ~ /^[0-9]/ {print $1}' $tmpfile`
do
ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
-e "KILL ${thread}"
done
else
ocf_log info "Ignoring post-demote notification execpt for my own demotion."
fi
return $OCF_SUCCESS
;;
'post-demote')
demote_host=`echo $OCF_RESKEY_CRM_meta_notify_demote_uname|tr -d " "`
if [ $demote_host = ${NODENAME} ]; then
ocf_log info "Ignoring post-demote notification for my own demotion."
return $OCF_SUCCESS
fi
ocf_log info "post-demote notification for $demote_host."
# The former master has just been gracefully demoted.
unset_master
;;
*)
return $OCF_SUCCESS
;;
esac
}
#######################################################################
##########################################################################
# If DEBUG_LOG is set, make this resource agent easy to debug: set up the
# debug log and direct all output to it. Otherwise, redirect to /dev/null.
# The log directory must be a directory owned by root, with permissions 0700,
# and the log must be writable and not a symlink.
##########################################################################
DEBUG_LOG="/tmp/mysql.ocf.ra.debug/log"
if [ "${DEBUG_LOG}" -a -w "${DEBUG_LOG}" -a ! -L "${DEBUG_LOG}" ]; then
DEBUG_LOG_DIR="${DEBUG_LOG%/*}"
if [ -d "${DEBUG_LOG_DIR}" ]; then
exec 9>>"$DEBUG_LOG"
exec 2>&9
date >&9
echo "$*" >&9
env | grep OCF_ | sort >&9
set -x
else
exec 9>/dev/null
fi
fi
case "$1" in
meta-data) meta_data
exit $OCF_SUCCESS;;
usage|help) usage
exit $OCF_SUCCESS;;
esac
mysql_common_validate
rc=$?
LSB_STATUS_STOPPED=3
if [ $rc -ne 0 ]; then
case "$1" in
stop) ;;
monitor)
mysql_common_status "info"
if [ $? -eq $OCF_SUCCESS ]; then
# if validatation fails and pid is active, always treat this as an error
ocf_exit_reason "environment validation failed, active pid is in unknown state."
exit $OCF_ERR_GENERIC
fi
# validation failed and pid is not active, it's safe to say this instance is inactive.
exit $OCF_NOT_RUNNING;;
status) exit $LSB_STATUS_STOPPED;;
*) exit $rc;;
esac
fi
# What kind of method was invoked?
case "$1" in
start) mysql_start;;
stop) mysql_stop;;
status) mysql_common_status err;;
monitor) mysql_monitor;;
promote) mysql_promote;;
demote) mysql_demote;;
notify) mysql_notify;;
validate-all) exit $OCF_SUCCESS;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED;;
esac
# vi:sw=4:ts=4:et:
diff --git a/heartbeat/nginx b/heartbeat/nginx
index 532eb81b2..d28c5a108 100755
--- a/heartbeat/nginx
+++ b/heartbeat/nginx
@@ -1,947 +1,947 @@
#!/bin/sh
#
# High-Availability nginx OCF resource agent
#
# nginx
#
# Description: starts/stops nginx servers.
#
# Author: Alan Robertson
# Dejan Muhamedagic
# This code is based significantly on the apache resource agent
#
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
#
# License: GNU General Public License (GPL)
#
# Copyright: (C) 2002-2010 International Business Machines
#
#
# Our parsing of the nginx config files is very rudimentary.
# It'll work with lots of different configurations - but not every
# possible configuration.
#
# Patches are being accepted ;-)
#
# OCF parameters:
# OCF_RESKEY_configfile
# OCF_RESKEY_nginx
# OCF_RESKEY_port
# OCF_RESKEY_options
# OCF_RESKEY_status10regex
# OCF_RESKEY_status10url
# OCF_RESKEY_client
# OCF_RESKEY_test20url
# OCF_RESKEY_test20regex
# OCF_RESKEY_test20conffile
# OCF_RESKEY_test20name
# OCF_RESKEY_external_monitor30_cmd
#
#
# TO DO:
# More extensive tests of extended monitor actions
# Look at the --with-http_stub_status_module for validating
# the configuration? (or is that automatically done?)
# Checking could certainly result in better error
# messages.
# Allow for the fact that the config file and so on might all be
# on shared disks - this affects the validate-all option.
: ${OCF_FUNCTIONS_DIR=$OCF_ROOT/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
HA_VARRUNDIR=${HA_VARRUN}
#######################################################################
#
# Configuration options - usually you don't need to change these
#
#######################################################################
#
NGINXDLIST="/usr/sbin/nginx /usr/local/sbin/nginx"
# default options for http clients
# NB: We _always_ test a local resource, so it should be
# safe to connect from the local interface.
WGETOPTS="-O- -q -L --no-proxy --bind-address=127.0.0.1"
CURLOPTS="-o - -Ss -L --interface lo"
LOCALHOST="http://localhost"
NGINXDOPTS=""
#
#
# End of Configuration options
#######################################################################
CMD=`basename $0`
# The config-file-pathname is the pathname to the configuration
# file for this web server. Various appropriate defaults are
# assumed if no config file is specified.
usage() {
cat <<-EOF
usage: $0 action
action:
start start nginx
stop stop nginx
reload reload the nginx configuration
status return the status of web server, running or stopped
monitor return TRUE if the web server appears to be working.
For this to be supported you must configure mod_status
and give it a server-status URL - or configure what URL
you wish to be monitored. You have to have installed
either curl or wget for this to work.
meta-data show meta data message
validate-all validate the instance parameters
EOF
exit $1
}
#
# run the http client
#
curl_func() {
cl_opts="$CURLOPTS $test_httpclient_opts"
if
[ x != "x$test_user" ]
then
echo "-u $test_user:$test_password" |
curl -K - $cl_opts "$1"
else
curl $cl_opts "$1"
fi
}
wget_func() {
auth=""
cl_opts="$WGETOPTS $test_httpclient_opts"
[ x != "x$test_user" ] &&
auth="--http-user=$test_user --http-passwd=$test_password"
wget $auth $cl_opts "$1"
}
#
# rely on whatever the user provided
userdefined() {
$test_httpclient $test_httpclient_opts "$1"
}
#
# find a good http client
#
findhttpclient() {
# prefer curl if present...
if
[ "x$CLIENT" != x ]
then
echo "$CLIENT"
elif
which curl >/dev/null 2>&1
then
echo "curl"
elif
which wget >/dev/null 2>&1
then
echo "wget"
else
return 1
fi
}
gethttpclient() {
[ -z "$test_httpclient" ] &&
test_httpclient=$ourhttpclient
case "$test_httpclient" in
curl|wget) echo ${test_httpclient}_func;; #these are supported
*) echo userdefined;;
esac
}
# test configuration good?
is_testconf_sane() {
if
[ "x$test_regex" = x -o "x$test_url" = x ]
then
ocf_log err "test regular expression or test url empty"
return 1
fi
if
[ "x$test_user$test_password" != x -a \( "x$test_user" = x -o "x$test_password" = x \) ]
then
ocf_log err "bad user authentication for extended test"
return 1
fi
return 0
}
#
# read the test definition from the config
#
readtestconf() {
test_name="$1" # we look for this one or the first one if empty
lcnt=0
readdef=""
test_url="" test_regex=""
test_user="" test_password=""
test_httpclient="" test_httpclient_opts=""
while
read key value
do
lcnt=$((lcnt+1))
if
[ "$readdef" ]
then
case "$key" in
"url") test_url="$value" ;;
"user") test_user="$value" ;;
"password") test_password="$value" ;;
"client") test_httpclient="$value" ;;
"client_opts") test_httpclient_opts="$value" ;;
"match") test_regex="$value" ;;
"end") break ;;
"#"*|"") ;;
*) ocf_log err "$lcnt: $key: unknown keyword"; return 1 ;;
esac
else
[ "$key" = "test" ] &&
[ -z "$test_name" -o "$test_name" = "$value" ] &&
readdef=1
fi
done
}
nginxcat() {
awk '
function procline() {
split($0,a);
if( a[1]~/^[Ii]nclude$/ ) {
procinclude(a[2]);
} else {
if( a[1]=="root" ) {
rootdir=a[2];
gsub("\"","",rootdir);
}
print;
}
}
function printfile(infile, a) {
while( (getline<infile) > 0 ) {
procline();
}
close(infile);
}
function allfiles(dir, cmd,f) {
cmd="find -L "dir" -type f";
while( ( cmd | getline f ) > 0 ) {
printfile(f);
}
close(cmd);
}
function listfiles(pattern, cmd,f) {
cmd="ls "pattern" 2>/dev/null";
while( ( cmd | getline f ) > 0 ) {
printfile(f);
}
close(cmd);
}
function procinclude(spec) {
if( rootdir!="" && spec!~/^\// ) {
spec=rootdir"/"spec;
}
if( isdir(spec) ) {
allfiles(spec); # read all files in a directory (and subdirs)
} else {
listfiles(spec); # there could be jokers
}
}
function isdir(s) {
return !system("test -d \""s"\"");
}
{ procline(); }
' $1 |
sed 's/#.*//;s/[[:blank:]]*$//;s/^[[:blank:]]*//' |
grep -v '^$'
}
#
# set parameters (as shell vars) from our nginx config file
#
get_nginx_params() {
configfile=$1
shift 1
vars=`echo "$@" | sed 's/ /,/g'`
eval `
nginxcat $configfile | awk -v vars="$vars" '
BEGIN{
split(vars,v,",");
for( i in v )
vl[i]=tolower(v[i]);
}
{
for( i in v )
if( tolower($1)==vl[i] ) {
print v[i]"="$2
delete vl[i]
break
}
}
'`
}
#
# Return the location(s) that are handled by the given handler
#
FindLocationForHandler() {
PerlScript='while (<>) {
/^\s*location\s+([^ \s{]+)\s*{/i && ($loc=$1);
/^\s*stub_status\s+on\s*;$2/i && print "$loc\n";
}'
nginxcat $1 | perl -e "$PerlScript"
}
#
# Check if the port is valid
#
CheckPort() {
lclport="$1"
case "$lclport" in
*:[0-9]*) lclport=`echo "$lclport" | sed 's%^[^:][^:]*:%%'`
esac
ocf_is_decimal "$lclport" && [ $lclport -gt 0 -a $lclport -lt 65537 ]
}
buildlocalurl() {
[ "x$listen" != "x" ] &&
echo "http://${listen}" ||
echo "${LOCALHOST}:${PORT}"
}
#
# Get all the parameters we need from the Nginx config file
#
GetParams() {
ConfigFile=$1
DEFAULT_PID=`echo "$NGINX_CONFIGURATION" | sed -e 's%.*--pid-path=%%' -e 's% *--.*%%'`
if
[ ! -f $ConfigFile ]
then
return 1
fi
get_nginx_params $ConfigFile root pid listen
PidFile="$pid"
case $PidFile in
"") PidFile=$DEFAULT_PID ;;
*) ;;
esac
for p in "$PORT" "$listen" 80
do
if
CheckPort "$p"
then
PORT="$p"
break
fi
done
echo $listen | grep ':' >/dev/null || # Listen could be just port spec
listen="localhost:$listen"
#
# It's difficult to figure out whether the server supports
# the status operation.
# (we start our server with -DSTATUS - just in case :-))
#
# Typically (but not necessarily) the status URL is /nginx_status
#
# For us to think status will work, we have to have the following things:
#
# - The server-status handler has to be mapped to some URL somewhere
#
# We assume that:
#
# - the "main" web server at $PORT will also support it if we can find it
# somewhere in the file
# - it will be supported at the same URL as the one we find in the file
#
# If this doesn't work for you, then set the status10url attribute.
#
if
[ "X$STATUSURL" = "X" ]
then
StatusURL=`FindLocationForHandler $1 nginx_status | tail -1`
STATUSURL="`buildlocalurl`$StatusURL"
fi
test ! -z "$PidFile"
}
#
# return TRUE if a process with given PID is running
#
ProcessRunning() {
NginxPID=$1
# Use /proc if it looks like it's here...
if
[ -d /proc -a -d /proc/1 ]
then
[ -d /proc/$NginxPID ]
else
# This assumes we're running as root...
kill -0 "$NginxPID" >/dev/null 2>&1
fi
}
silent_status() {
if
[ -f $PidFile -a -s $PidFile ] && ocf_is_decimal "`cat $PidFile`"
then
ProcessRunning `cat $PidFile`
else
: No pid file
false
fi
}
start_nginx() {
if
silent_status
then
ocf_log info "$CMD already running (pid $NginxPID)"
return $OCF_SUCCESS
fi
if
ocf_run $NGINXD $OPTIONS -t -c $CONFIGFILE
then
: Configuration file $CONFIGFILE looks OK
else
return $OCF_ERR_CONFIGURED
fi
NGINX_VERSION=`$NGINXD -v 2>&1`
ocf_log info "Starting $NGINXD - $NGINX_VERSION"
ocf_log info "$NGINXD build configuration: $NGINX_CONFIGURATION"
if
ocf_run $NGINXD $NGINXDOPTS $OPTIONS -c $CONFIGFILE
then
: $NGINXD started without errors!
else
return $OCF_ERR_GENERIC
fi
tries=0
# This looks like a potential infinite loop - but it's not in practice
# The LRM will time us out and kill us if nginx never starts working.
while
monitor_nginx
ec=$?
if
[ $ec -eq $OCF_NOT_RUNNING ]
then
tries=`expr $tries + 1`
ocf_log info "Waiting for $NGINXD $OPTIONS -c $CONFIGFILE to come up (try $tries)"
true
else
false
fi
do
sleep 1
done
return $ec
}
stop_nginx() {
if
silent_status
then
if
kill $NginxPID
then
tries=0
while
ProcessRunning $NginxPID && [ $tries -lt 10 ]
do
sleep 1
kill $NginxPID >/dev/null
ocf_log info "Killing nginx PID $NginxPID"
tries=`expr $tries + 1`
done
else
ocf_log warn "Killing nginx PID $NginxPID FAILED."
fi
if
ProcessRunning $NginxPID
then
ocf_log info "$CMD still running ($NginxPID)."
false
else
ocf_log info "$CMD stopped."
fi
else
ocf_log info "$CMD is not running."
fi
#
# I'm not convinced this is a wonderful idea (AlanR)
#
for sig in SIGTERM SIGHUP SIGKILL
do
if
pgrep -f "$NGINXD.*$CONFIGFILE" >/dev/null
then
pkill -$sig -f $NGINXD.*$CONFIGFILE >/dev/null
ocf_log info "nginxd children were signalled ($sig)"
sleep 1
else
break
fi
done
}
reload_nginx() {
if
silent_status
then
if
kill -1 $NginxPID
then
: $NGINX reload signal to $NginxPID succeeded
return $OCF_SUCCESS
fi
return $OCF_ERR_GENERIC
fi
start_nginx
}
status_nginx() {
silent_status
rc=$?
if
[ $rc -eq 0 ]
then
ocf_log info "$CMD is running (pid $NginxPID)."
return $OCF_SUCCESS
else
ocf_log info "$CMD is stopped."
return $OCF_NOT_RUNNING
fi
}
fixtesturl() {
echo $test_url | grep -qs "^http" && return
test_url="`buildlocalurl`$test_url"
}
monitor_nginx_external() {
if
[ -z "$EXTMONITOR" ]
then
ocf_log err "$External level 30 Monitor Command not configured."
return $OCF_ERR_CONFIGURED
fi
extbase=`echo $EXTMONITOR | sed 's% .*%%'`
if
case "$extbase" in
/*) test -f "$extbase" -a -x "$extbase";;
*) which "$extbase" >/dev/null 2>&1
esac
then
: OK - $extbase seems to be there...
else
ocf_log err "$External monitor command [$extbase] is not installed."
return $OCF_ERR_CONFIGURED
fi
if
$extbase
then
: OK - $extbase succeeded
else
ocf_log err "$extbase reported failure [rc=$?]"
return $OCF_NOT_RUNNING
fi
return $OCF_SUCCESS
}
monitor_nginx_extended() {
if
[ -f "$TESTCONFFILE" -a -r "$TESTCONFFILE" ]
then
readtestconf < $TESTCONFFILE
else
test_url="$TESTURL"
test_regex="$TESTREGEX20"
fi
whattorun=`gethttpclient`
fixtesturl
is_testconf_sane || return $OCF_ERR_CONFIGURED
$whattorun "$test_url" | grep -Ei "$test_regex" > /dev/null
}
monitor_nginx_basic() {
if
[ -z "$STATUSURL" ]
then
ocf_log err "status10url parameter empty"
return $OCF_ERR_CONFIGURED
elif
[ -z "$ourhttpclient" ]
then
ocf_log err "could not find a http client; make sure that either wget or curl is available"
return $OCF_ERR_CONFIGURED
fi
${ourhttpclient}_func "$STATUSURL" | grep -Ei "$TESTREGEX" > /dev/null
}
monitor_nginx() {
silent_status
if
[ $? -ne 0 ]
then
ocf_log info "$CMD not running"
return $OCF_NOT_RUNNING
fi
if
[ -z "$OCF_CHECK_LEVEL" ] || [ "$OCF_CHECK_LEVEL" -lt 10 ]
then
return 0
fi
ourhttpclient=`findhttpclient` # we'll need one
if
[ "$OCF_CHECK_LEVEL" -lt 20 ]
then
monitor_nginx_basic
elif
[ "$OCF_CHECK_LEVEL" -lt 30 ]
then
monitor_nginx_extended
else
monitor_nginx_external
fi
}
metadata_nginx(){
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="nginx">
<version>1.0</version>
<longdesc lang="en">
This is the resource agent for the Nginx web/proxy server.
This resource agent does not monitor POP or IMAP servers, as
we don't know how to determine meaningful status for them.
The start operation ends with a loop in which monitor is
repeatedly called to make sure that the server started and that
it is operational. Hence, if the monitor operation does not
succeed within the start operation timeout, the nginx resource
will end with an error status.
The default monitor operation will verify that nginx is running.
The level 10 monitor operation by default will try and fetch the /nginx_status
page - which is commented out in sample nginx configurations.
Make sure that the /nginx_status page works and that the access
is restricted to localhost (address 127.0.0.1) plus whatever
places _outside the cluster_ you want to monitor the server from.
See the status10url and status10regex attributes for more details.
The level 20 monitor operation will perform a more complex set of tests
from a configuration file.
The level 30 monitor operation will run an external command to perform
an arbitrary monitoring operation.
</longdesc>
<shortdesc lang="en">Manages an Nginx web/proxy server instance</shortdesc>
<parameters>
<parameter name="configfile" required="0" unique="1">
<longdesc lang="en">
The full pathname of the Nginx configuration file.
This file is parsed to provide defaults for various other
resource agent parameters.
</longdesc>
<shortdesc lang="en">configuration file path</shortdesc>
<content type="string"/>
</parameter>
<parameter name="httpd">
<longdesc lang="en">
The full pathname of the httpd binary (optional).
</longdesc>
<shortdesc lang="en">httpd binary path</shortdesc>
<content type="string" default="/usr/sbin/httpd" />
</parameter>
<parameter name="port" >
<longdesc lang="en">
A port number that we can probe for status information
using the statusurl.
This will default to the port number found in the
configuration file, or 80, if none can be found
in the configuration file.
</longdesc>
<shortdesc lang="en">httpd port</shortdesc>
<content type="integer" />
</parameter>
<parameter name="status10url">
<longdesc lang="en">
The URL to monitor (the nginx server status page by default) when given a level 10 monitor operation.
If left unspecified, it will be inferred from
the nginx configuration file, or defaulted to /nginx_status.
If you set this, make sure that it succeeds *only* from the
localhost (127.0.0.1) and no other cluster nodes.
Otherwise, the cluster software may complain
about it being active on multiple nodes.
</longdesc>
<shortdesc lang="en">url name</shortdesc>
<content type="string" />
</parameter>
<parameter name="status10regex">
<longdesc lang="en">
Regular expression to match in the output of status10url.
Case insensitive.
</longdesc>
<shortdesc lang="en">monitor regular expression</shortdesc>
<content type="string" default="Reading: [0-9]+ Writing: [0-9]+ Waiting: [0-9]+"/>
</parameter>
<parameter name="testclient">
<longdesc lang="en">
Client to use to query to Nginx for level 10 and level 20 tests.
If not specified, the RA will try to find one on the system.
Currently, wget and curl are supported, with curl being preferred.
For example, you can set this paramter to "wget" if you prefer that to curl.
</longdesc>
<shortdesc lang="en">http client</shortdesc>
<content type="string" />
</parameter>
<parameter name="test20url">
<longdesc lang="en">
URL to test. If it does not start with "http", then it's
considered to be relative to the document root address.
</longdesc>
<shortdesc lang="en">Level 20 monitor url</shortdesc>
<content type="string" />
</parameter>
<parameter name="test20regex">
<longdesc lang="en">
Regular expression to match in the output of test20url.
Case insensitive.
</longdesc>
<shortdesc lang="en">Level 20 monitor regular expression</shortdesc>
<content type="string" />
</parameter>
<parameter name="test20conffile">
<longdesc lang="en">
A file which contains a more complex test configuration. Could be useful if
you have to check more than one web application or in case sensitive
info should be passed as arguments (passwords). Furthermore,
using a config file is the only way to specify certain parameters.
Please see README.webapps for examples and file description.
</longdesc>
<shortdesc lang="en">Level 20 test configuration file</shortdesc>
<content type="string" />
</parameter>
<parameter name="test20name">
<longdesc lang="en">
Name of the test within the test configuration file.
</longdesc>
<shortdesc lang="en">Level 20 test name</shortdesc>
<content type="string" />
</parameter>
<parameter name="external_monitor30_cmd">
<longdesc lang="en">
Command string to run which implements level 30 monitoring.
</longdesc>
<shortdesc lang="en">Level 30 test string</shortdesc>
<content type="string" />
</parameter>
<parameter name="options">
<longdesc lang="en">
Extra options to apply when starting nginx.
</longdesc>
<shortdesc lang="en">nginx start options</shortdesc>
<content type="string" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="40s" />
<action name="stop" timeout="60s" />
<action name="reload" timeout="40s" />
<action name="status" timeout="30s" />
<action name="monitor" timeout="30s" depth="0" interval="10s" />
<action name="monitor" timeout="30s" depth="10" interval="30s" />
<action name="monitor" timeout="45s" depth="20" />
<action name="monitor" timeout="60s" depth="30" />
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="5" />
</actions>
</resource-agent>
END
exit $OCF_SUCCESS
}
validate_all_nginx() {
if
CheckPort $PORT
# We are sure to succeed here, since we forced $PORT to be valid in GetParams()
then
: OK
else
ocf_log err "Port number $PORT is invalid!"
exit $OCF_ERR_ARGS
fi
if
[ -z $STATUSURL ]
then
: OK to be empty
else
case $STATUSURL in
http://*/*) ;;
*) ocf_log err "Invalid STATUSURL $STATUSURL"
exit $OCF_ERR_ARGS ;;
esac
fi
if
[ ! -x $NGINXD ]
then
ocf_log err "NGINXD $NGINXD not found or is not an executable!"
exit $OCF_ERR_ARGS
fi
if
[ ! -f $CONFIGFILE ]
then
# We are sure to succeed here, since we have parsed $CONFIGFILE before getting here
ocf_log err "Configuration file $CONFIGFILE not found!"
exit $OCF_ERR_CONFIGURED
fi
if
ocf_run $NGINXD $OPTIONS -t -c $CONFIGFILE
then
: Cool $NGINXD likes $CONFIGFILE
else
ocf_log err "$NGINXD $OPTIONS -t -c $CONFIGFILE reported a configuration error."
return $OCF_ERR_CONFIGURED
fi
return $OCF_SUCCESS
}
if
[ $# -eq 1 ]
then
COMMAND=$1
NGINXD="$OCF_RESKEY_httpd"
PORT="$OCF_RESKEY_port"
STATUSURL="$OCF_RESKEY_status10url"
CONFIGFILE="$OCF_RESKEY_configfile"
OPTIONS="$OCF_RESKEY_options"
CLIENT=${OCF_RESKEY_client}
TESTREGEX=${OCF_RESKEY_status10regex:-'Reading: [0-9]+ Writing: [0-9]+ Waiting: [0-9]+'}
TESTURL="$OCF_RESKEY_test20url"
TESTREGEX20=${OCF_RESKEY_test20regex}
TESTCONFFILE="$OCF_RESKEY_test20conffile"
TESTNAME="$OCF_RESKEY_test20name"
EXTMONITOR="$OCF_RESKEY_external_monitor30_cmd"
else
usage $OCF_ERR_ARGS
fi
LSB_STATUS_STOPPED=3
if
[ "X$NGINXD" = X -o ! -f "$NGINXD" -o ! -x "$NGINXD" ]
then
NGINXD=
for h in $NGINXDLIST
do
if
[ -f "$h" -a -x "$h" ]
then
NGINXD="$h"
break
fi
done
# It is possible that we still do not have a valid httpd at this stage
if
[ -z "$NGINXD" ]
then
case $COMMAND in
stop) exit $OCF_SUCCESS;;
monitor) exit $OCF_NOT_RUNNING;;
status) exit $LSB_STATUS_STOPPED;;
meta-data) metadata_nginx;;
esac
ocf_log err "nginx binary not found! Please verify you've installed it"
exit $OCF_ERR_INSTALLED
fi
# Let the user know that the $NGINXD used is the one (s)he specified via $OCF_RESKEY_httpd
if
[ ! -z "$OCF_RESKEY_httpd" ]
then
ocf_log info "Using $NGINXD as nginx"
fi
fi
httpd_basename=`basename $NGINXD`
case $httpd_basename in
*-*) httpd_basename=`echo "$httpd_basename" | sed -e 's%\-.*%%'`;;
esac
NGINX_CONFIGURATION=`$NGINXD -V 2>&1 |grep 'configure arguments:'`
DEFAULT_CONFIG=`echo "$NGINX_CONFIGURATION" | sed -e 's%.*--conf-path=%%' -e 's% *--.*%%'`
case "$CONFIGFILE" in
"") CONFIGFILE=$DEFAULT_CONFIG;;
*) ;;
esac
if
[ ! -f "$CONFIGFILE" ]
then
case $COMMAND in
stop) ocf_log warn "$CONFIGFILE not found - nginx considered stopped"
exit $OCF_SUCCESS;;
monitor) exit $OCF_NOT_RUNNING;;
status) exit $LSB_STATUS_STOPPED;;
esac
fi
if
[ "X$COMMAND" = Xmeta-data ] || GetParams $CONFIGFILE
then
: OK
else
ocf_log err "Cannot parse config file [$CONFIGFILE]"
exit $OCF_ERR_CONFIGURED
fi
case $COMMAND in
start) start_nginx;;
stop) stop_nginx;;
reload) reload_nginx;;
status) status_nginx;;
monitor) monitor_nginx;;
meta-data) metadata_nginx;;
validate-all) validate_all_nginx;;
*) usage $OCF_ERR_UNIMPLEMENTED;;
esac
diff --git a/heartbeat/ora-common.sh b/heartbeat/ora-common.sh
index 728caafec..b238d12ff 100644
--- a/heartbeat/ora-common.sh
+++ b/heartbeat/ora-common.sh
@@ -1,88 +1,88 @@
# ora-common.sh
#
# Description: Common code for oracle and oralsnr resource agents
#
#
# Author: Dejan Muhamedagic
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
# License: GNU General Public License (GPL)
# Copyright: (C) 2012 Dejan Muhamedagic, SUSE/Attachmate
#
# Gather up information about our oracle instance
rmtmpfiles() {
rm -f $TMPFILES
}
ora_common_getconfig() {
ORACLE_SID=$1
# optional, defaults to whatever is in oratab
ORACLE_HOME=$2
# optional, defaults to the owner of ORACLE_HOME
ORACLE_OWNER=$3
# optional, defaults to $ORACLE_HOME/network/admin
# (only the oralsnr may provide and use this one)
TNS_ADMIN=$4
# get ORACLE_HOME from /etc/oratab if not set
[ x = "x$ORACLE_HOME" ] &&
ORACLE_HOME=`awk -F: "/^$ORACLE_SID:/"'{print $2}' /etc/oratab`
# there a better way to find out ORACLE_OWNER?
[ x = "x$ORACLE_OWNER" ] &&
ORACLE_OWNER=`ls -ld $ORACLE_HOME/. 2>/dev/null | awk 'NR==1{print $3}'`
# There are use-cases were users want to be able to set a custom TMS_ADMIN path.
# When TNS_ADMIN is not provided, use the default path.
[ x = "x$TNS_ADMIN" ] &&
TNS_ADMIN=$ORACLE_HOME/network/admin
LD_LIBRARY_PATH=$ORACLE_HOME/lib
LIBPATH=$ORACLE_HOME/lib
PATH=$ORACLE_HOME/bin:$ORACLE_HOME/dbs:$PATH
export ORACLE_SID ORACLE_HOME ORACLE_OWNER TNS_ADMIN
export LD_LIBRARY_PATH LIBPATH
ORA_ENVF=`mktemp`
dumporaenv > $ORA_ENVF
chmod 644 $ORA_ENVF
TMPFILES="$ORA_ENVF"
trap "rmtmpfiles" EXIT
}
ora_common_validate_all() {
# Let's make sure a few important things are set...
if [ x = "x$ORACLE_HOME" ]; then
ocf_log info "ORACLE_HOME not set"
return $OCF_ERR_INSTALLED
fi
if [ x = "x$ORACLE_OWNER" ]; then
ocf_log info "ORACLE_OWNER not set"
return $OCF_ERR_INSTALLED
fi
US=`id -u -n`
if [ $US != root -a $US != $ORACLE_OWNER ]
then
ocf_exit_reason "$0 must be run as root or $ORACLE_OWNER"
return $OCF_ERR_PERM
fi
return 0
}
dumporaenv() {
cat<<EOF
PATH=$ORACLE_HOME/bin:$ORACLE_HOME/dbs:$PATH
ORACLE_SID=$ORACLE_SID
ORACLE_HOME=$ORACLE_HOME
ORACLE_OWNER=$ORACLE_OWNER
LD_LIBRARY_PATH=$ORACLE_HOME/lib
LIBPATH=$ORACLE_HOME/lib
TNS_ADMIN=$TNS_ADMIN
export ORACLE_SID ORACLE_HOME ORACLE_OWNER TNS_ADMIN
export LD_LIBRARY_PATH LIBPATH
EOF
}
# vim:tabstop=4:shiftwidth=4:textwidth=0:wrapmargin=0
diff --git a/heartbeat/oracle b/heartbeat/oracle
index 6fad5bc6f..67e438594 100755
--- a/heartbeat/oracle
+++ b/heartbeat/oracle
@@ -1,770 +1,770 @@
#!/bin/sh
#
#
# oracle
#
# Description: Manages an Oracle Database as a High-Availability
# resource
#
#
# Author: Dejan Muhamedagic
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
# License: GNU General Public License (GPL)
# Copyright: (C) 2006 International Business Machines, Inc.
#
# This code inspired by the DB2 resource script
# written by Alan Robertson
#
# An example usage in /etc/ha.d/haresources:
# node1 10.0.0.170 oracle::RK1::/oracle/10.2::orark1
#
# See oracle_usage() function below for more details...
#
# OCF instance parameters:
# OCF_RESKEY_sid
# OCF_RESKEY_home (optional; else read it from /etc/oratab)
# OCF_RESKEY_user (optional; figure it out by checking file ownership)
# OCF_RESKEY_ipcrm (optional; defaults to "instance")
# OCF_RESKEY_clear_backupmode (optional; default to "false")
# OCF_RESKEY_shutdown_method (optional; default to "checkpoint/abort")
# OCF_RESKEY_monuser (optional; defaults to "OCFMON")
# OCF_RESKEY_monpassword (optional; defaults to "OCFMON")
# OCF_RESKEY_monprofile (optional; defaults to "OCFMONPROFILE")
#
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
. ${OCF_FUNCTIONS_DIR}/ora-common.sh
#######################################################################
oracle_usage() {
methods=`oracle_methods`
methods=`echo $methods | tr ' ' '|'`
cat <<-!
usage: $0 {$methods}
$0 manages an Oracle Database instance as an HA resource.
The 'start' operation starts the database.
The 'stop' operation stops the database.
The 'status' operation reports whether the database is running
The 'monitor' operation reports whether the database seems to be working
The 'dumpinstipc' operation prints IPC resources used by the instance
The 'cleanup' operation tries to clean up after Oracle was brutally stopped
The 'validate-all' operation reports whether the parameters are valid
The 'methods' operation reports on the methods $0 supports
!
}
# Defaults
OCF_RESKEY_monuser_default="OCFMON"
OCF_RESKEY_monpassword_default="OCFMON"
OCF_RESKEY_monprofile_default="OCFMONPROFILE"
oracle_meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="oracle">
<version>1.0</version>
<longdesc lang="en">
Resource script for oracle. Manages an Oracle Database instance
as an HA resource.
</longdesc>
<shortdesc lang="en">Manages an Oracle Database instance</shortdesc>
<parameters>
<parameter name="sid" unique="1" required="1">
<longdesc lang="en">
The Oracle SID (aka ORACLE_SID).
</longdesc>
<shortdesc lang="en">sid</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="home" unique="0">
<longdesc lang="en">
The Oracle home directory (aka ORACLE_HOME).
If not specified, then the SID along with its home should be listed in
/etc/oratab.
</longdesc>
<shortdesc lang="en">home</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="user" unique="0">
<longdesc lang="en">
The Oracle owner (aka ORACLE_OWNER).
If not specified, then it is set to the owner of
file \$ORACLE_HOME/dbs/*\${ORACLE_SID}.ora.
If this does not work for you, just set it explicitely.
</longdesc>
<shortdesc lang="en">user</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="monuser" unique="0">
<longdesc lang="en">
Monitoring user name. Every connection as
sysdba is logged in an audit log. This can
result in a large number of new files created.
A new user is created (if it doesn't exist) in
the start action and subsequently used in monitor.
It should have very limited rights. Make sure
that the password for this user does not expire.
</longdesc>
<shortdesc lang="en">monuser</shortdesc>
<content type="string" default="$OCF_RESKEY_monuser_default" />
</parameter>
<parameter name="monpassword" unique="0">
<longdesc lang="en">
Password for the monitoring user. Make sure
that the password for this user does not expire.
</longdesc>
<shortdesc lang="en">monpassword</shortdesc>
<content type="string" default="$OCF_RESKEY_monpassword_default" />
</parameter>
<parameter name="monprofile" unique="0">
<longdesc lang="en">
Profile used by the monitoring user. If the
profile does not exist, it will be created
with a non-expiring password.
</longdesc>
<shortdesc lang="en">monprofile</shortdesc>
<content type="string" default="$OCF_RESKEY_monprofile_default" />
</parameter>
<parameter name="ipcrm" unique="0">
<longdesc lang="en">
Sometimes IPC objects (shared memory segments and semaphores)
belonging to an Oracle instance might be left behind which
prevents the instance from starting. It is not easy to figure out
which shared segments belong to which instance, in particular when
more instances are running as same user.
What we use here is the "oradebug" feature and its "ipc" trace
utility. It is not optimal to parse the debugging information, but
I am not aware of any other way to find out about the IPC
information. In case the format or wording of the trace report
changes, parsing might fail. There are some precautions, however,
to prevent stepping on other peoples toes. There is also a
dumpinstipc option which will make us print the IPC objects which
belong to the instance. Use it to see if we parse the trace file
correctly.
Three settings are possible:
- none: don't mess with IPC and hope for the best (beware: you'll
probably be out of luck, sooner or later)
- instance: try to figure out the IPC stuff which belongs to the
instance and remove only those (default; should be safe)
- orauser: remove all IPC belonging to the user which runs the
instance (don't use this if you run more than one instance as same
user or if other apps running as this user use IPC)
The default setting "instance" should be safe to use, but in that
case we cannot guarantee that the instance will start. In case IPC
objects were already left around, because, for instance, someone
mercilessly killing Oracle processes, there is no way any more to
find out which IPC objects should be removed. In that case, human
intervention is necessary, and probably _all_ instances running as
same user will have to be stopped. The third setting, "orauser",
guarantees IPC objects removal, but it does that based only on IPC
objects ownership, so you should use that only if every instance
runs as separate user.
Please report any problems. Suggestions/fixes welcome.
</longdesc>
<shortdesc lang="en">ipcrm</shortdesc>
<content type="string" default="instance" />
</parameter>
<parameter name="clear_backupmode" unique="0" required="0">
<longdesc lang="en">
The clear of the backup mode of ORACLE.
</longdesc>
<shortdesc lang="en">clear_backupmode</shortdesc>
<content type="boolean" default="false" />
</parameter>
<parameter name="shutdown_method" unique="0" required="0">
<longdesc lang="en">
How to stop Oracle is a matter of taste it seems. The default
method ("checkpoint/abort") is:
alter system checkpoint;
shutdown abort;
This should be the fastest safe way bring the instance down. If
you find "shutdown abort" distasteful, set this attribute to
"immediate" in which case we will
shutdown immediate;
If you still think that there's even better way to shutdown an
Oracle instance we are willing to listen.
</longdesc>
<shortdesc lang="en">shutdown_method</shortdesc>
<content type="string" default="checkpoint/abort" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="120" />
<action name="stop" timeout="120" />
<action name="status" timeout="5" />
<action name="monitor" depth="0" timeout="30" interval="120" />
<action name="validate-all" timeout="5" />
<action name="methods" timeout="5" />
<action name="meta-data" timeout="5" />
</actions>
</resource-agent>
END
}
#
# methods: What methods/operations do we support?
#
oracle_methods() {
cat <<-!
start
stop
status
monitor
dumpinstipc
showdbstat
cleanup
validate-all
methods
meta-data
usage
!
}
#
# Run commands as the Oracle owner...
#
execsql() {
if [ "$US" = "$ORACLE_OWNER" ]; then
sqlplus -S /nolog
else
su - $ORACLE_OWNER -s /bin/sh -c ". $ORA_ENVF; sqlplus -S /nolog"
fi
}
#
# Run commands in the oracle admin sqlplus...
#
common_sql_opts() {
cat<<EOF
set feedback off
set heading off
set pagesize 0
EOF
}
common_sql_filter() {
grep -v '^Connected' |
grep -v '^ENV MSG:' |
grep -v 'Your password will expire in'
}
runsql() {
local conn_s="$1"
shift 1
local func
(
echo "$conn_s"
common_sql_opts
for func; do $func; done
) |
execsql | common_sql_filter
}
dbasql() {
runsql "connect / as sysdba" $*
}
monsql() {
runsql "connect \"$MONUSR\"/\"$MONPWD\"" $*
}
# use dbasql_one if the query should result in a single line output
# at times people stuff commands in oracle .profile
# which may produce extra output
dbasql_one() {
dbasql $* | tail -1
}
monsql_one() {
monsql $* | tail -1
}
#
# various interesting sql
#
dbstat() {
echo 'select status from v$instance;'
}
dbmount() {
echo 'alter database mount;'
}
dbopen() {
echo 'alter database open;'
}
dbstop_immediate() {
echo 'shutdown immediate'
}
dbstop_checkpoint_abort() {
echo 'alter system checkpoint;'
echo 'shutdown abort'
}
dbstop() {
case "${shutdown_method}" in
"immediate")
dbstop_immediate
;;
"checkpoint/abort")
dbstop_checkpoint_abort
;;
esac
}
dbstart() {
echo 'startup'
}
dbstart_mount() {
echo 'startup mount'
}
dbendbackup() {
echo 'alter database end backup;'
}
db_backup_mode() {
echo "select 'COUNT'||count(*) from v\$backup where status='ACTIVE';"
}
is_clear_backupmode_set(){
[ x"${clear_backupmode}" = x"true" ]
}
is_instance_in_backup_mode() {
local count
count="`dbasql_one db_backup_mode | sed 's/COUNT//'`"
[ x"$count" != x"0" ]
}
clear_backup_mode() {
local output
output="`dbasql dbendbackup`"
ocf_log info "Oracle instance $ORACLE_SID alter database end backup: $output"
}
getdumpdest() {
#echo 'select value from v$parameter where name = \'user_dump_dest\';'
echo "select value from v\$parameter where name = 'user_dump_dest';"
}
getipc() {
echo "oradebug setmypid"
echo "oradebug tracefile_name"
echo "oradebug ipc"
}
show_mon_profile() {
echo "select PROFILE from dba_profiles where PROFILE='$MONPROFILE';"
}
mk_mon_profile() {
cat<<EOF
create profile "$MONPROFILE" limit FAILED_LOGIN_ATTEMPTS UNLIMITED PASSWORD_LIFE_TIME UNLIMITED;
EOF
}
show_mon_user() {
echo "select USERNAME, ACCOUNT_STATUS from dba_users where USERNAME='$MONUSR';"
}
mk_mon_user() {
cat<<EOF
create user "$MONUSR" identified by "$MONPWD" profile "$MONPROFILE";
grant create session to "$MONUSR";
grant select on v_\$instance to "$MONUSR";
EOF
}
show_mon_user_profile() {
echo "select PROFILE from dba_users where USERNAME='$MONUSR';"
}
set_mon_user_profile() {
echo "alter user "$MONUSR" profile '$MONPROFILE';"
}
reset_mon_user_password() {
echo "alter user "$MONUSR" identified by "$MONPWD";"
}
check_mon_profile() {
local output
output=`dbasql show_mon_profile`
if echo "$output" | grep -iw "^$MONPROFILE" >/dev/null; then
return 0
fi
output=`dbasql mk_mon_profile show_mon_profile`
if echo "$output" | grep -iw "^$MONPROFILE" >/dev/null; then
return 0
elif echo "$output" | grep ORA-65140 >/dev/null 2>&1; then
ocf_exit_reason "monprofile must start with C## for container databases"
return $OCF_ERR_CONFIGURED
else
ocf_exit_reason "could not create $MONPROFILE oracle profile"
ocf_log err "sqlplus output: $output"
return 1
fi
}
check_mon_user() {
local output
local output2
output=`dbasql show_mon_user`
if echo "$output" | grep -iw "^$MONUSR" >/dev/null; then
if echo "$output" | grep -w "EXPIRED" >/dev/null; then
dbasql reset_mon_user_password
fi
output=`dbasql show_mon_user_profile`
if echo "$output" | grep -iw "^$MONPROFILE" >/dev/null; then
return 0
else
output=`dbasql set_mon_user_profile`
output2=`dbasql show_mon_user_profile`
if echo "$output2" | grep -iw "^$MONPROFILE" >/dev/null; then
return 0
fi
ocf_exit_reason "could not set profile for $MONUSR oracle user"
ocf_log err "sqlplus output: $output( $output2 )"
return 1
fi
fi
output=`dbasql mk_mon_user show_mon_user`
if echo "$output" | grep -iw "^$MONUSR" >/dev/null; then
return 0
elif echo "$output" | grep ORA-65096 >/dev/null 2>&1; then
ocf_exit_reason "monuser must start with C## for container databases"
return $OCF_ERR_CONFIGURED
else
ocf_exit_reason "could not create $MONUSR oracle user"
ocf_log err "sqlplus output: $output"
return 1
fi
}
#
# print the output of dbstat (for debugging)
#
showdbstat() {
echo "Full output:"
dbstat | execsql
echo "Stripped output:"
echo "<`dbasql dbstat`>"
}
#
# IPC stuff: not overly complex, but quite involved :-/
#
# Part 1: Oracle
other_trace_junk() {
echo $1 | sed 's/trc$/trm/'
}
dumpinstipc() {
local output tracef
output=`dbasql getipc` # filename in the 2nd line
tracef=`echo "$output" | awk 'NR==2' | grep '^/.*trc$'`
if [ "$tracef" ]; then
echo $tracef
else
ocf_log warn "'dbasql getipc' failed: $output"
return 1
fi
}
parseipc() {
local inf=$1
if [ ! -f "$1" ]; then
ocf_log warn "$1: no such ipc trace file"
return 1
fi
awk '
$3 == "Shmid" {n=1;next}
n {
if( $3~/^[0-9]+$/ ) print $3;
n=0
}
' $inf |
sort -u | sed 's/^/m:/'
awk '
/Semaphore List/ {insems=1;next}
insems {
for( i=1; i<=NF; i++ )
if( $i~/^[0-9]+$/ ) print $i;
}
/system semaphore information/ {exit}
' $inf |
sort -u | sed 's/^/s:/'
TMPFILES="$TMPFILES $inf `other_trace_junk $inf`"
}
# Part 2: OS (ipcs,ipcrm)
filteroraipc() { # this portable?
grep -w $ORACLE_OWNER | awk '{print $2}'
}
ipcdesc() {
local what=$1
case $what in
m) echo "shared memory segment";;
s) echo "semaphore";;
q) echo "message queue";;
esac
}
rmipc() {
local what=$1 id=$2
ipcs -$what | filteroraipc | grep -iw $id >/dev/null 2>&1 ||
return
ocf_log info "Removing `ipcdesc $what` $id."
ipcrm -$what $id
}
ipcrm_orauser() {
local what id
for what in m s q; do
for id in `ipcs -$what | filteroraipc`; do
rmipc $what $id
done
done
}
ipcrm_instance() {
local ipcobj
for ipcobj; do
rmipc `echo $ipcobj | sed 's/:/ /'`
done
}
#
# oracle_status: is the Oracle instance running?
#
# quick check to see if the instance is up
is_proc_running() {
ps -ef | grep -wiqs "[^ ]*[_]pmon_${ORACLE_SID}"
}
# instance in OPEN state?
instance_live() {
local status=`monsql_one dbstat`
[ "$status" = OPEN ] && return 0
status=`dbasql_one dbstat`
if [ "$status" = OPEN ]; then
return 0
else
ocf_log info "$ORACLE_SID instance state is not OPEN (dbstat output: $status)"
return 1
fi
}
ora_cleanup() {
#rm -fr /tmp/.oracle #???
rm -f `ls $ORACLE_HOME/dbs/lk* | grep -i "$ORACLE_SID\$"`
#return
case $IPCRM in
none)
;;
instance)
ipcrm_instance $*
;;
orauser)
ipcrm_orauser $*
;;
esac
}
oracle_getconfig() {
ora_common_getconfig "$OCF_RESKEY_sid" "$OCF_RESKEY_home" "$OCF_RESKEY_user"
clear_backupmode=${OCF_RESKEY_clear_backupmode:-"false"}
shutdown_method=${OCF_RESKEY_shutdown_method:-"checkpoint/abort"}
IPCRM=${OCF_RESKEY_ipcrm:-"instance"}
}
#
# oracle_start: Start the Oracle instance
#
# NOTE: We handle instance in the MOUNTED and STARTED states
# efficiently
# We *do not* handle instance in the restricted or read-only
# mode, i.e. it appears as running, but its availability is
# "not for general use"
#
oracle_start() {
local status output
if is_proc_running; then
status="`dbasql_one dbstat`"
case "$status" in
"OPEN")
: nothing to be done, we can leave right now
ocf_log info "Oracle instance $ORACLE_SID already running"
return $OCF_SUCCESS
;;
"STARTED")
output=`dbasql dbmount`
;;
"MOUNTED")
: we proceed if mounted
;;
*) # status unknown
output=`dbasql dbstop dbstart_mount`
;;
esac
else
output="`dbasql dbstart_mount`"
# try to cleanup in case of
# ORA-01081: cannot start already-running ORACLE - shut it down first
if echo "$output" | grep ORA-01081 >/dev/null 2>&1; then
ocf_log info "ORA-01081 error found, trying to cleanup oracle (dbstart_mount output: $output)"
ora_cleanup
output=`dbasql dbstop_immediate`
output=`dbasql dbstart_mount`
fi
fi
# oracle instance should be mounted.
status="`dbasql_one dbstat`"
case "$status" in
"MOUNTED")
;;
*)
: error!!
ocf_exit_reason "oracle $ORACLE_SID can not be mounted (status: $status)"
return $OCF_ERR_GENERIC
;;
esac
# It is examined whether mode is "online backup mode",
# and if it is true, makes clear the mode.
# Afterwards, DB is opened.
if is_clear_backupmode_set && is_instance_in_backup_mode; then
clear_backup_mode
fi
output=`dbasql dbopen`
# check/create the monitor profile
if ! check_mon_profile; then
return $OCF_ERR_GENERIC
fi
# check/create the monitor user
if ! check_mon_user; then
return $OCF_ERR_GENERIC
fi
if ! is_proc_running; then
ocf_exit_reason "oracle process not running: $output"
return $OCF_ERR_GENERIC
elif ! instance_live; then
ocf_exit_reason "oracle instance $ORACLE_SID not started: $output"
return $OCF_ERR_GENERIC
else
: cool, we are up and running
ocf_log info "Oracle instance $ORACLE_SID started: $output"
return $OCF_SUCCESS
fi
}
#
# oracle_stop: Stop the Oracle instance
#
oracle_stop() {
local status output ipc=""
if is_proc_running; then
[ "$IPCRM" = "instance" ] && ipc=$(parseipc `dumpinstipc`)
output=`dbasql dbstop`
else
ocf_log info "Oracle instance $ORACLE_SID already stopped"
return $OCF_SUCCESS
fi
ocf_stop_processes TERM $PROCS_CLEANUP_TIME `proc_pids` # kill the procs if they hanged
if is_proc_running; then
ocf_exit_reason "Oracle instance $ORACLE_SID not stopped: $output"
return $OCF_ERR_GENERIC
else
ocf_log info "Oracle instance $ORACLE_SID stopped: $output"
sleep 1 # give em a chance to cleanup
ocf_log info "Cleaning up for $ORACLE_SID"
ora_cleanup "$ipc"
return $OCF_SUCCESS
fi
}
#
# oracle_monitor: Can the Oracle instance do anything useful?
#
oracle_monitor() {
if ! is_proc_running; then
ocf_log info "oracle process not running"
return $OCF_NOT_RUNNING
fi
if ! instance_live; then
ocf_exit_reason "oracle instance $ORACLE_SID is down"
return $OCF_ERR_GENERIC
fi
#ocf_log info "Oracle instance $ORACLE_SID is alive"
return $OCF_SUCCESS
}
# other supported actions
oracle_status() {
if is_proc_running
then
echo Oracle instance $ORACLE_SID is running
exit $OCF_SUCCESS
else
echo Oracle instance $ORACLE_SID is stopped
exit $OCF_NOT_RUNNING
fi
}
oracle_dumpinstipc() {
is_proc_running && parseipc `dumpinstipc`
}
oracle_showdbstat() {
showdbstat
}
oracle_cleanup() {
if [ "$IPCRM" = "instance" ]; then
ora_cleanup $(parseipc `dumpinstipc`)
else
ora_cleanup
fi
}
oracle_validate_all() {
case "${shutdown_method}" in
"immediate") ;;
"checkpoint/abort") ;;
*) ocf_exit_reason "unsupported shutdown_method, please read meta-data"
return $OCF_ERR_CONFIGURED
;;
esac
case "${IPCRM}" in
"none"|"instance"|"orauser") ;;
*) ocf_exit_reason "unsupported ipcrm setting, please read meta-data"
return $OCF_ERR_CONFIGURED
;;
esac
ora_common_validate_all
}
# used in ora-common.sh
show_procs() {
ps -e -o pid,args | grep -i "[o]ra[a-zA-Z0-9_]*$ORACLE_SID$"
}
proc_pids() { show_procs | awk '{print $1}'; }
PROCS_CLEANUP_TIME="30"
MONUSR=${OCF_RESKEY_monuser:-$OCF_RESKEY_monuser_default}
MONPWD=${OCF_RESKEY_monpassword:-$OCF_RESKEY_monpassword_default}
MONPROFILE=${OCF_RESKEY_monprofile:-$OCF_RESKEY_monprofile_default}
MONUSR=$(echo "$MONUSR" | awk '{print toupper($0)}')
MONPROFILE=$(echo "$MONPROFILE" | awk '{print toupper($0)}')
OCF_REQUIRED_PARAMS="sid"
OCF_REQUIRED_BINARIES="sqlplus"
ocf_rarun $*
#
# vim:tabstop=4:shiftwidth=4:textwidth=0:wrapmargin=0
diff --git a/heartbeat/oralsnr b/heartbeat/oralsnr
index 622138c6f..83c512806 100755
--- a/heartbeat/oralsnr
+++ b/heartbeat/oralsnr
@@ -1,281 +1,281 @@
#!/bin/sh
#
#
# oralsnr
#
# Description: Manages an Oracle Listener as a High-Availability
# resource
#
#
# Author: Dejan Muhamedagic
-# Support: linux-ha@lists.linux-ha.org
+# Support: users@clusterlabs.org
# License: GNU General Public License (GPL)
# Copyright: (C) 2006 International Business Machines, Inc.
#
# This code inspired by the DB2 resource script
# written by Alan Robertson
#
# An example usage in /etc/ha.d/haresources:
# node1 10.0.0.170 oralsnr::sid::home::user::listener
#
# See oralsnr_usage() function below for more details...
#
# OCF instance parameters:
# OCF_RESKEY_sid (mandatory; for the monitor op)
# OCF_RESKEY_home (optional; else read it from /etc/oratab)
# OCF_RESKEY_user (optional; user to run the listener)
# OCF_RESKEY_listener (optional; defaults to LISTENER)
#
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
. ${OCF_FUNCTIONS_DIR}/ora-common.sh
#######################################################################
SH=/bin/sh
oralsnr_usage() {
methods=`oralsnr_methods`
methods=`echo $methods | tr ' ' '|'`
cat <<-!
usage: $0 ($methods)
$0 manages an Oracle Database instance as an HA resource.
The 'start' operation starts the database.
The 'stop' operation stops the database.
The 'status' operation reports whether the database is running
The 'monitor' operation reports whether the database seems to be working
The 'validate-all' operation reports whether the parameters are valid
The 'methods' operation reports on the methods $0 supports
!
}
oralsnr_meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="oralsnr">
<version>1.0</version>
<longdesc lang="en">
Resource script for Oracle Listener. It manages an
Oracle Listener instance as an HA resource.
</longdesc>
<shortdesc lang="en">Manages an Oracle TNS listener</shortdesc>
<parameters>
<parameter name="sid" unique="1" required="1">
<longdesc lang="en">
The Oracle SID (aka ORACLE_SID). Necessary for the monitor op,
i.e. to do tnsping SID.
</longdesc>
<shortdesc lang="en">sid</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="home" unique="0">
<longdesc lang="en">
The Oracle home directory (aka ORACLE_HOME).
If not specified, then the SID should be listed in /etc/oratab.
</longdesc>
<shortdesc lang="en">home</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="user" unique="0">
<longdesc lang="en">
Run the listener as this user.
</longdesc>
<shortdesc lang="en">user</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="listener" unique="1">
<longdesc lang="en">
Listener instance to be started (as defined in listener.ora).
Defaults to LISTENER.
</longdesc>
<shortdesc lang="en">listener</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="tns_admin" required="0" unique="1">
<longdesc lang="en">
Full path to the directory that contains the Oracle
listener tnsnames.ora configuration file. The shell
variable TNS_ADMIN is set to the value provided.
</longdesc>
<shortdesc lang="en">
Full path to the directory containing tnsnames.ora
</shortdesc>
<content type="string"/>
</parameter>
</parameters>
<actions>
<action name="start" timeout="120" />
<action name="stop" timeout="120" />
<action name="status" timeout="60" />
<action name="monitor" depth="0" timeout="30" interval="10" />
<action name="validate-all" timeout="5" />
<action name="meta-data" timeout="5" />
<action name="methods" timeout="5" />
</actions>
</resource-agent>
END
}
#
# methods: What methods/operations do we support?
#
oralsnr_methods() {
cat <<-!
start
stop
status
monitor
validate-all
methods
meta-data
usage
!
}
#
# Run commands as the Oracle owner...
#
runasdba() {
if [ "$US" = "$ORACLE_OWNER" ]; then
$SH
else
(
echo ". $ORA_ENVF"
cat
) | su -s $SH - $ORACLE_OWNER
fi
}
#
# oralsnr_start: Start the Oracle listener instance
#
oralsnr_start() {
if is_proc_running && test_tnsping; then
: nothing to be done, we can leave right now
ocf_log info "Listener $listener already running"
return $OCF_SUCCESS
fi
output=`echo lsnrctl start $listener | runasdba`
if test_tnsping; then
: cool, we are up and running
ocf_log info "Listener $listener running: $output"
return $OCF_SUCCESS
else
ocf_exit_reason "Listener $listener appears to have started, but is not running properly: $output"
ocf_log err "Probable Oracle configuration error"
return $OCF_ERR_GENERIC
fi
}
#
# oralsnr_stop: Stop the Oracle instance
#
oralsnr_stop() {
if is_proc_running; then
output=`echo lsnrctl stop $listener | runasdba`
else
ocf_log info "Listener $listener already stopped"
return $OCF_SUCCESS
fi
ocf_stop_processes TERM $PROCS_CLEANUP_TIME `proc_pids` # kill the procs if they hanged
if is_proc_running; then
ocf_exit_reason "Listener $listener not stopped: $output"
return $OCF_ERR_GENERIC
else
ocf_log info "Listener $listener stopped: $output"
return $OCF_SUCCESS
fi
}
#
# is_proc_running: is the listener running?
#
is_proc_running() {
show_procs | grep "." > /dev/null
}
# the following two should be run only if the process is running
test_listener() {
local output
output=`lsnrctl status $listener`
if echo "$output" | tail -1 | grep -qs 'completed successfully'
then
return $OCF_SUCCESS
else
ocf_exit_reason "$listener status failed: $output"
return $OCF_ERR_GENERIC
fi
}
# and does it work?
test_tnsping() {
local output
output=`tnsping $ORACLE_SID`
if echo "$output" | tail -1 | grep -qs '^OK'; then
return $OCF_SUCCESS
else
ocf_exit_reason "tnsping $ORACLE_SID failed: $output"
return $OCF_ERR_GENERIC
fi
}
#
# oralsnr_monitor: Can we connect to the listener?
#
oralsnr_monitor() {
if is_proc_running; then
test_listener && test_tnsping
else
return $OCF_NOT_RUNNING
fi
}
oralsnr_status() {
if is_proc_running
then
echo Listener $listener is running
exit $OCF_SUCCESS
else
echo Listener $listener is stopped
exit $OCF_NOT_RUNNING
fi
}
oralsnr_getconfig() {
ora_common_getconfig "$OCF_RESKEY_sid" "$OCF_RESKEY_home" "$OCF_RESKEY_user" "$OCF_RESKEY_tns_admin"
listener=${OCF_RESKEY_listener:-"LISTENER"}
}
oralsnr_validate_all() {
ora_common_validate_all
}
# used in ora-common.sh
show_procs() {
ps -U "$ORACLE_OWNER" -o pid,user,args |
grep '[t]nslsnr' | grep -i -w "$listener"
}
proc_pids() { show_procs | awk '{print $1}'; }
PROCS_CLEANUP_TIME="10"
OCF_REQUIRED_PARAMS="sid"
OCF_REQUIRED_BINARIES="lsnrctl tnsping"
ocf_rarun $*
#
# vim:tabstop=4:shiftwidth=4:textwidth=0:wrapmargin=0
diff --git a/ldirectord/OCF/ldirectord.in b/ldirectord/OCF/ldirectord.in
index f7c639eba..aea462606 100644
--- a/ldirectord/OCF/ldirectord.in
+++ b/ldirectord/OCF/ldirectord.in
@@ -1,269 +1,269 @@
#!/bin/sh
#
# ldirectord OCF RA. Wrapper around @sbindir@/ldirectord to
# be OCF RA compliant and therefore to get the possibility
# to monitor ldirectord by HAv2.
# Tested on SuSE Linux Enterprise Server 10.
#
# Should conform to the specification found at
-# http://www.linux-ha.org/OCFResourceAgent
+# https://github.com/ClusterLabs/resource-agents/blob/master/doc/dev-guides/ra-dev-guide.asc
# and
-# http://www.opencf.org/cgi-bin/viewcvs.cgi/specs/ra/resource-agent-api.txt?rev=HEAD
+# https://github.com/ClusterLabs/OCF-spec/blob/master/ra/resource-agent-api.md
#
# ToDo: Add parameter to start several instances of ldirectord
# with different config files.
#
# Copyright (c) 2007 Andreas Mock (andreas.mock@web.de)
# All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
#######################################################################
#
# OCF Parameters
# OCF_RESKEY_configfile
# OCF_RESKEY_ldirectord
#
#######################################################################
# Initialization:
HA_VARRUNDIR=${HA_VARRUN}
. ${OCF_ROOT}/lib/heartbeat/ocf-shellfuncs
LDIRCONF=${OCF_RESKEY_configfile:-@sysconfdir@/ha.d/ldirectord.cf}
LDIRECTORD=${OCF_RESKEY_ldirectord:-@sbindir@/ldirectord}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="ldirectord" version="0.9">
<version>1.0</version>
<longdesc lang="en">
It's a simple OCF RA wrapper for ldirectord and uses the ldirectord interface
to create the OCF compliant interface. You win monitoring of ldirectord.
Be warned: Asking ldirectord status is an expensive action.
</longdesc>
<shortdesc lang="en">Wrapper OCF Resource Agent for ldirectord</shortdesc>
<parameters>
<parameter name="configfile" unique="1">
<longdesc lang="en">
The full pathname of the ldirectord configuration file.
</longdesc>
<shortdesc lang="en">configuration file path</shortdesc>
<content type="string" default="@sysconfdir@/ha.d/ldirectord.cf" />
</parameter>
<parameter name="ldirectord">
<longdesc lang="en">
The full pathname of the ldirectord.
</longdesc>
<shortdesc lang="en">ldirectord binary path</shortdesc>
<content type="string" default="@sbindir@/ldirectord" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="15" />
<action name="stop" timeout="15" />
<action name="monitor" depth="0" timeout="10" interval="20" />
<action name="meta-data" timeout="10" />
<action name="validate-all" timeout="10" />
</actions>
</resource-agent>
END
}
#######################################################################
ldir_init() {
# check the supplied parameters exist enough that we can do all the other
# operations
if [ ! -f $LDIRCONF ]; then
ocf_log warn "$LDIRCONF not found, ldirectord not installed"
exit $OCF_ERR_INSTALLED
fi
if [ ! -x $LDIRECTORD ]; then
ocf_log warn "$LDIRECTORD not found, ldirectord not installed"
exit $OCF_ERR_INSTALLED
fi
}
ldirectord_usage() {
cat <<END
usage: $0 {start|stop|monitor|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END
}
ldirectord_exit() {
exit $1
}
ldirectord_start() {
ldirectord_status
RET=$?
# if ldirectord is running or there's an error, pass on this return code
if [ $RET -ne $OCF_NOT_RUNNING ]; then
return $RET
fi
ocf_log info "Starting ldirectord"
echo $LDIRECTORD $LDIRCONF start
$LDIRECTORD $LDIRCONF start
RET=$?
if [ $RET -ne 0 ]; then
return $OCF_ERR_GENERIC
fi
# call status again to make sure we're running properly
ldirectord_status
}
ldirectord_stop() {
# a status check is simple enough to see if ldirectord is running or not.
# It will also error out if there's configuration parsing errors so we
# can try kill ldirectord even if the config is broken.
ldirectord_status
RET=$?
# ldirectord may be running, hard to tell when status returns an error
if [ $RET -eq $OCF_ERR_GENERIC ]; then
# get the PID of the right ldirectord process
PID=`pgrep -f "$LDIRECTORD $LDIRCONF start" 2>&1`
RET=$?
if [ $RET -eq 0 ]; then
ocf_log warn "Killing ldirectord($PID) with SIGTERM"
kill $PID
fi
pgrep -f "$LDIRECTORD $LDIRCONF start" >/dev/null 2>&1
RET=$?
# if ldirectord is not running any more, we've (kind of) successfully
# stopped it
if [ $RET -eq 1 ]; then
return $OCF_SUCCESS
else
# ldirectord is still running? Kill it badly
ocf_log warn "Killing ldirectord($PID) with SIGKILL"
kill -9 $PID
pgrep -f "$LDIRECTORD $LDIRCONF start" >/dev/null 2>&1
RET=$?
# if it's not dead after here, we can't really do anything more
if [ $RET -eq 1 ]; then
return $OCF_SUCCESS
fi
fi
# if none of our kills work, return an error. This should force the
# resource unmanaged
# on this node, requiring manual intervention.
return $OCF_ERR_GENERIC
else
ocf_log info "Stopping ldirectord"
# if ldirectord status is not an error, issue a stop. Multiple stops
# will return 0
$LDIRECTORD $LDIRCONF stop
RET=$?
case $RET in
0) return $RET;;
*) return 1;;
esac
fi
}
# simple check to see if ldirectord is running, returns the proper OCF codes.
ldirectord_status() {
OUTPUT=`$LDIRECTORD $LDIRCONF status 2>&1`
case $? in
0) return $OCF_SUCCESS;;
1) expr match "$OUTPUT" '.*ldirectord stale pid file.*' >/dev/null
if [ $? -eq 0 ]; then
return $OCF_NOT_RUNNING
else
return $OCF_ERR_GENERIC
fi;;
2) ocf_log err "$LDIRCONF has configuration errors"
echo $OUTPUT
return $OCF_ERR_GENERIC;;
3) return $OCF_NOT_RUNNING;;
*) echo $OUTPUT
return $OCF_ERR_GENERIC;;
esac
}
ldirectord_monitor() {
# check if the process is running first
ldirectord_status
RET=$?
if [ $RET -ne $OCF_SUCCESS ]; then
return $RET
fi
# do more advanced checks here for high OCF_CHECK_LEVELs. Don't know what
# more we can do at this time,
# a status call already hits LVS in the kernel.
}
ldirectord_validate() {
#ldir_init is already called, there's nothing more we can validate unless
#we add more attributes
return 0
}
case $1 in
meta-data|usage|help) : ;;
*) ldir_init $@ ;;
esac
case $__OCF_ACTION in
meta-data) meta_data
exit $OCF_SUCCESS
;;
start) ldirectord_start
ldirectord_exit $?
;;
stop) ldirectord_stop
ldirectord_exit $?
;;
monitor) ldirectord_monitor
ldirectord_exit $?
;;
validate-all) ldirectord_validate
ldirectord_exit $?
;;
usage|help) ldirectord_usage
exit $OCF_SUCCESS
;;
*) ldirectord_usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
diff --git a/resource-agents.spec.in b/resource-agents.spec.in
index 55a2ef968..553221c81 100644
--- a/resource-agents.spec.in
+++ b/resource-agents.spec.in
@@ -1,347 +1,347 @@
#
# All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed
# upon. The license for this file, and modifications and additions to the
# file, is the same license as for the pristine package itself (unless the
# license for the pristine package is not an Open Source License, in which
# case the license is the MIT License). An "Open Source License" is a
# license that conforms to the Open Source Definition (Version 1.9)
# published by the Open Source Initiative.
#
%global rcver @rcver@
%global alphatag @alphatag@
%global numcomm @numcomm@
%global dirty @dirty@
#
# Since this spec file supports multiple distributions, ensure we
# use the correct group for each.
#
# SSLeay (required by ldirectord)
%if 0%{?suse_version}
%global SSLeay perl-Net_SSLeay
%else
%global SSLeay perl-Net-SSLeay
%endif
# determine the ras-set to process based on configure invokation
%bcond_@rgmanager@ rgmanager
%bcond_@linux-ha@ linuxha
# build with HA_BIN compatibility for the existing Heartbeat stack
%bcond_@compat-habindir@ compat_habindir
%if %{with compat_habindir}
%global _libexecdir %{_libdir}
%endif
Name: resource-agents
Summary: Open Source HA Reusable Cluster Resource Scripts
Version: @version@
Release: @specver@%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist}
License: GPLv2+ and LGPLv2+
URL: https://github.com/ClusterLabs/resource-agents
%if 0%{?fedora} || 0%{?centos_version} || 0%{?rhel}
Group: System Environment/Base
%else
Group: Productivity/Clustering/HA
%endif
Source0: %{name}-%{version}%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:-%{alphatag}}%{?dirty:-%{dirty}}.tar.bz2
Obsoletes: heartbeat-resources <= %{version}
Provides: heartbeat-resources = %{version}
## Setup/build bits
BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX)
# Build dependencies
BuildRequires: automake autoconf pkgconfig
BuildRequires: perl python-devel
BuildRequires: libxslt glib2-devel
BuildRequires: which
%if 0%{?fedora} || 0%{?centos_version} || 0%{?rhel}
BuildRequires: cluster-glue-libs-devel
BuildRequires: docbook-style-xsl docbook-dtds
%if 0%{?rhel} == 0
BuildRequires: libnet-devel
%endif
%endif
%if 0%{?suse_version}
%if 0%{?suse_version} >= 1140
BuildRequires: libnet1
%else
BuildRequires: libnet
%endif
BuildRequires: libglue-devel
BuildRequires: libxslt docbook_4 docbook-xsl-stylesheets
%endif
## Runtime deps
## These apply to rgmanager agents only to guarantee agents
## are functional
%if %{with rgmanager}
# system tools shared by several agents
Requires: /bin/bash /bin/grep /bin/sed /bin/gawk
Requires: /bin/ps /usr/bin/pkill /bin/hostname /bin/netstat
Requires: /sbin/fuser
Requires: /sbin/findfs /bin/mount
# fs.sh
Requires: /sbin/quotaon /sbin/quotacheck
Requires: /sbin/fsck
Requires: /sbin/fsck.ext2 /sbin/fsck.ext3 /sbin/fsck.ext4
Requires: /sbin/fsck.xfs
# ip.sh
Requires: /sbin/ip /usr/sbin/ethtool
Requires: /sbin/rdisc /usr/sbin/arping /bin/ping /bin/ping6
# lvm.sh
Requires: /sbin/lvm
# netfs.sh
Requires: /sbin/mount.nfs /sbin/mount.nfs4 /sbin/mount.cifs
Requires: /usr/sbin/rpc.nfsd /sbin/rpc.statd /usr/sbin/rpc.mountd
%endif
%description
A set of scripts to interface with several services to operate in a
High Availability environment for both Pacemaker and rgmanager
service managers.
%if %{with linuxha}
%package -n ldirectord
License: GPLv2+
Summary: A Monitoring Daemon for Maintaining High Availability Resources
%if 0%{?fedora} || 0%{?centos_version} || 0%{?rhel}
Group: System Environment/Daemons
%else
Group: Productivity/Clustering/HA
%endif
Obsoletes: heartbeat-ldirectord <= %{version}
Provides: heartbeat-ldirectord = %{version}
%if 0%{?fedora} > 18 || 0%{?centos_version} > 6 || 0%{?rhel} > 6
BuildRequires: perl-podlators
%endif
Requires: %{SSLeay} perl-libwww-perl perl-MailTools
Requires: ipvsadm logrotate
%if 0%{?fedora_version}
Requires: perl-Net-IMAP-Simple-SSL
Requires(post): /sbin/chkconfig
Requires(preun):/sbin/chkconfig
%endif
%if %{defined systemd_requires}
BuildRequires: systemd
%{?systemd_requires}
%endif
%description -n ldirectord
The Linux Director Daemon (ldirectord) was written by Jacob Rief.
<jacob.rief@tiscover.com>
ldirectord is a stand alone daemon for monitoring the services on real
servers. Currently, HTTP, HTTPS, and FTP services are supported.
-lditrecord is simple to install and works with the heartbeat code
-(http://www.linux-ha.org/).
+ldirectord is simple to install and works with Pacemaker
+(http://clusterlabs.org/).
See 'ldirectord -h' and linux-ha/doc/ldirectord for more information.
%endif
%prep
%if 0%{?suse_version} == 0 && 0%{?fedora} == 0 && 0%{?centos_version} == 0 && 0%{?rhel} == 0
%{error:Unable to determine the distribution/version. This is generally caused by missing /etc/rpm/macros.dist. Please install the correct build packages or define the required macros manually.}
exit 1
%endif
%setup -q -n %{name}-%{version}%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:-%{alphatag}}%{?dirty:-%{dirty}}
%build
if [ ! -f configure ]; then
./autogen.sh
fi
%if 0%{?fedora} >= 11 || 0%{?centos_version} > 5 || 0%{?rhel} > 5
CFLAGS="$(echo '%{optflags}')"
%global conf_opt_fatal "--enable-fatal-warnings=no"
%else
CFLAGS="${CFLAGS} ${RPM_OPT_FLAGS}"
%global conf_opt_fatal "--enable-fatal-warnings=yes"
%endif
%if %{with rgmanager}
%global rasset rgmanager
%endif
%if %{with linuxha}
%global rasset linux-ha
%endif
%if %{with rgmanager} && %{with linuxha}
%global rasset all
%endif
export CFLAGS
%configure \
%{?conf_opt_rsctmpdir:%conf_opt_rsctmpdir} \
%{conf_opt_fatal} \
%if %{defined _unitdir}
--with-systemdsystemunitdir=%{_unitdir} \
%endif
--with-pkg-name=%{name} \
--with-ras-set=%{rasset}
%if %{defined jobs}
JFLAGS="$(echo '-j%{jobs}')"
%else
JFLAGS="$(echo '%{_smp_mflags}')"
%endif
make $JFLAGS
%install
rm -rf %{buildroot}
make install DESTDIR=%{buildroot}
## tree fixup
# remove docs (there is only one and they should come from doc sections in files)
rm -rf %{buildroot}/usr/share/doc/resource-agents
%if %{with linuxha}
%if 0%{?suse_version}
test -d %{buildroot}/sbin || mkdir %{buildroot}/sbin
(
cd %{buildroot}/sbin
%if %{defined _unitdir}
ln -s /usr/sbin/service rcldirectord
%else
ln -sf /%{_sysconfdir}/init.d/ldirectord rcldirectord
%endif
) || true
%endif
%endif
%clean
rm -rf %{buildroot}
%files
%defattr(-,root,root)
%doc AUTHORS COPYING COPYING.GPLv3 COPYING.LGPL ChangeLog
%if %{with linuxha}
%doc doc/README.webapps
%doc %{_datadir}/%{name}/ra-api-1.dtd
%endif
%if %{with rgmanager}
%{_datadir}/cluster
%{_sbindir}/rhev-check.sh
%endif
%if %{with linuxha}
%dir /usr/lib/ocf
%dir /usr/lib/ocf/resource.d
%dir /usr/lib/ocf/lib
/usr/lib/ocf/lib/heartbeat
/usr/lib/ocf/resource.d/heartbeat
%if %{with rgmanager}
/usr/lib/ocf/resource.d/redhat
%endif
%dir %{_datadir}/%{name}
%dir %{_datadir}/%{name}/ocft
%{_datadir}/%{name}/ocft/configs
%{_datadir}/%{name}/ocft/caselib
%{_datadir}/%{name}/ocft/helpers.sh
%{_datadir}/%{name}/ocft/runocft
%{_datadir}/%{name}/ocft/runocft.prereq
%{_datadir}/%{name}/ocft/README
%{_datadir}/%{name}/ocft/README.zh_CN
%{_sbindir}/ocf-tester
%{_sbindir}/ocft
%{_sbindir}/sfex_init
%{_sbindir}/sfex_stat
%{_includedir}/heartbeat
%dir %attr (1755, root, root) %{_var}/run/resource-agents
%{_mandir}/man7/*.7*
%{_mandir}/man8/ocf-tester.8*
%{_mandir}/man8/sfex_init.8*
# For compatability with pre-existing agents
%dir %{_sysconfdir}/ha.d
%{_sysconfdir}/ha.d/shellfuncs
%{_libexecdir}/heartbeat
%post -n resource-agents
if [ $1 = 2 ]; then
if [ -d %{_var}/run/heartbeat/rsctmp ]; then
cp -fpr %{_var}/run/heartbeat/rsctmp/* %{_var}/run/resource-agents/ 1>/dev/null 2>&1
rm -fr %{_var}/run/heartbeat/rsctmp
fi
fi
%if %{with rgmanager}
ccs_update_schema > /dev/null 2>&1 ||:
%endif
%if 0%{?suse_version}
%preun -n ldirectord
%if %{defined _unitdir}
%service_del_preun ldirectord.service
%else
%stop_on_removal ldirectord
%endif
%postun -n ldirectord
%if %{defined _unitdir}
%service_del_postun ldirectord.service
%else
%insserv_cleanup
%endif
%post -n ldirectord
%if %{defined _unitdir}
%service_add_post ldirectord.service
%endif
%pre -n ldirectord
%if %{defined _unitdir}
%service_add_pre ldirectord.service
%endif
%endif
%if 0%{?fedora}
%preun -n ldirectord
/sbin/chkconfig --del ldirectord
%postun -n ldirectord -p /sbin/ldconfig
%post -n ldirectord
/sbin/chkconfig --add ldirectord
%endif
%files -n ldirectord
%defattr(-,root,root)
%{_sbindir}/ldirectord
%doc ldirectord/ldirectord.cf COPYING
%{_mandir}/man8/ldirectord.8*
%config(noreplace) %{_sysconfdir}/logrotate.d/ldirectord
%dir %{_sysconfdir}/ha.d
%dir %{_sysconfdir}/ha.d/resource.d
%{_sysconfdir}/ha.d/resource.d/ldirectord
%if %{defined _unitdir}
%{_unitdir}/ldirectord.service
%exclude %{_sysconfdir}/init.d/ldirectord
%else
%{_sysconfdir}/init.d/ldirectord
%endif
%if 0%{?suse_version}
/sbin/rcldirectord
%endif
%if 0%{?fedora}
/usr/lib/ocf/resource.d/heartbeat/ldirectord
%endif
%endif
%changelog
* @date@ Autotools generated version <nobody@nowhere.org> - @version@-@specver@-@numcomm@.@alphatag@.@dirty@
- Autotools generated version

File Metadata

Mime Type
text/x-diff
Expires
Wed, Oct 15, 11:53 PM (2 h, 54 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2530589
Default Alt Text
(442 KB)

Event Timeline