Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/heartbeat/storage-mon.in b/heartbeat/storage-mon.in
index 5b289fe55..875095670 100644
--- a/heartbeat/storage-mon.in
+++ b/heartbeat/storage-mon.in
@@ -1,263 +1,263 @@
#!@BASH_SHELL@
#
# Copyright (C) 2021 Red Hat, Inc. All rights reserved.
#
# Authors: Christine Caulfield <ccaulfie@redhat.com>
# Fabio M. Di Nitto <fdinitto@redhat.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
#
# Checks storage I/O status of all given drives and writes the #health-storage
# status into the CIB
# Implementation is heavily based on ocf:pacemaker:HealtSMART
#
# It sends a single block on IO to a radom location on the device and reports any errors returned.
# If the IO hangs, that will also be returned. (bear in mind tha tmay also hang the C app in some
# instances).
#
# It's worth making a note in the RA description that the smartmon RA is also recommended (this
# does not replace it), and that Pacemaker health checking should be configued.
#
# https://clusterlabs.org/pacemaker/doc/2.1/Pacemaker_Explained/singlehtml/index.html#tracking-node-health
#######################################################################
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#
STORAGEMON=$HA_BIN/storage_mon
ATTRDUP=/usr/sbin/attrd_updater
OCF_RESKEY_CRM_meta_interval_default="0"
OCF_RESKEY_io_timeout_default="10"
OCF_RESKEY_inject_errors_default=""
OCF_RESKEY_state_file_default="${HA_RSCTMP%%/}/storage-mon-${OCF_RESOURCE_INSTANCE}.state"
# Explicitly list all environment variables used, to make static analysis happy
: ${OCF_RESKEY_CRM_meta_interval:=${OCF_RESKEY_CRM_meta_interval_default}}
: ${OCF_RESKEY_drives:=""}
: ${OCF_RESKEY_io_timeout:=${OCF_RESKEY_io_timeout_default}}
: ${OCF_RESKEY_inject_errors:=${OCF_RESKEY_inject_errors_default}}
: ${OCF_RESKEY_state_file:=${OCF_RESKEY_state_file_default}}
#######################################################################
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="storage-mon">
<version>1.0</version>
<longdesc lang="en">
System health agent that checks the storage I/O status of the given drives and
updates the #health-storage attribute. Usage is highly recommended in combination
-with storage-mon monitoring agent. The agent currently support a maximum of 25
+with the HealthSMART monitoring agent. The agent currently support a maximum of 25
devices per instance.
</longdesc>
<shortdesc lang="en">storage I/O health status</shortdesc>
<parameters>
<parameter name="state_file" unique="1">
<longdesc lang="en">
Location to store the resource state in.
</longdesc>
<shortdesc lang="en">State file</shortdesc>
<content type="string" default="${OCF_RESKEY_state_file_default}" />
</parameter>
<parameter name="drives" unique="1" required="1">
<longdesc lang="en">
The drive(s) to check as a SPACE separated list. Enter the full path to the device, e.g. "/dev/sda".
</longdesc>
<shortdesc lang="en">Drives to check</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="io_timeout" unique="0">
<longdesc lang="en">
Specify disk I/O timeout in seconds. Minimum 1, recommeded 10 (default).
</longdesc>
<shortdesc lang="en">Disk I/O timeout</shortdesc>
<content type="integer" default="${OCF_RESKEY_io_timeout_default}" />
</parameter>
<parameter name="inject_errors" unique="0">
<longdesc lang="en">
Used only for testing! Specify % of I/O errors to simulate drives failures.
</longdesc>
<shortdesc lang="en">Specify % of I/O errors to simulate drives failures</shortdesc>
<content type="integer" default="${OCF_RESKEY_inject_errors_default}" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="10s" />
<action name="stop" timeout="120s" />
<action name="monitor" timeout="120s" interval="30s" start-delay="0s" />
<action name="meta-data" timeout="5s" />
<action name="validate-all" timeout="10s" />
</actions>
</resource-agent>
END
return $OCF_SUCCESS
}
#######################################################################
storage-mon_usage() {
cat <<END
usage: $0 {start|stop|monitor|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END
return $1
}
storage-mon_init() {
#Test for presence of storage_mon helper
if [ ! -x "$STORAGEMON" ] ; then
ocf_log err "${STORAGEMON} not installed."
exit $OCF_ERR_INSTALLED
fi
i=0
for DRIVE in ${OCF_RESKEY_drives}; do
if [ ! -e "$DRIVE" ] ; then
ocf_log err "${DRIVE} not found on the system"
exit $OCF_ERR_INSTALLED
fi
i=$((i + 1))
done
if [ "$i" -gt "25" ]; then
ocf_log err "Too many drives ($i) configured for this agent. Max 25."
exit $OCF_ERR_CONFIGURED
fi
if [ "${OCF_RESKEY_io_timeout}" -lt "1" ]; then
ocf_log err "Minimum timeout is 1. Recommended 10 (default)."
exit $OCF_ERR_CONFIGURED
fi
if [ -n "${OCF_RESKEY_inject_errors}" ]; then
if [ "${OCF_RESKEY_inject_errors}" -lt "1" ] || [ "${OCF_RESKEY_inject_errors}" -gt "100" ]; then
ocf_log err "Inject errors % has to be a value between 1 and 100."
exit $OCF_ERR_CONFIGURED
fi
fi
}
storage-mon_validate() {
storage-mon_init
# Is the state directory writable?
state_dir=$(dirname "$OCF_RESKEY_state_file")
touch "$state_dir/$$"
if [ $? -ne 0 ]; then
return $OCF_ERR_CONFIGURED
fi
rm "$state_dir/$$"
return $OCF_SUCCESS
}
storage-mon_monitor() {
storage-mon_init
# Monitor _MUST!_ differentiate correctly between running
# (SUCCESS), failed (ERROR) or _cleanly_ stopped (NOT RUNNING).
# That is THREE states, not just yes/no.
if [ ! -f "${OCF_RESKEY_state_file}" ]; then
return $OCF_NOT_RUNNING
fi
# generate command line
cmdline=""
for DRIVE in ${OCF_RESKEY_drives}; do
cmdline="$cmdline --device $DRIVE --score 1"
done
cmdline="$cmdline --timeout ${OCF_RESKEY_io_timeout}"
if [ -n "${OCF_RESKEY_inject_errors}" ]; then
cmdline="$cmdline --inject-errors-percent ${OCF_RESKEY_inject_errors}"
fi
$STORAGEMON $cmdline
if [ $? -ne 0 ]; then
status="red"
else
status="green"
fi
"$ATTRDUP" -n "#health-${OCF_RESOURCE_INSTANCE}" -U "$status" -d "5s"
return $OCF_SUCCESS
}
storage-mon_start() {
storage-mon_monitor
if [ $? -eq $OCF_SUCCESS ]; then
return $OCF_SUCCESS
fi
touch "${OCF_RESKEY_state_file}"
}
storage-mon_stop() {
storage-mon_monitor
if [ $? -eq $OCF_SUCCESS ]; then
rm "${OCF_RESKEY_state_file}"
fi
return $OCF_SUCCESS
}
storage-mon_validate() {
storage-mon_init
# Is the state directory writable?
state_dir=$(dirname "${OCF_RESKEY_state_file}")
touch "$state_dir/$$"
if [ $? -ne 0 ]; then
return $OCF_ERR_CONFIGURED
fi
rm "$state_dir/$$"
return $OCF_SUCCESS
}
case "$__OCF_ACTION" in
start) storage-mon_start;;
stop) storage-mon_stop;;
monitor) storage-mon_monitor;;
validate-all) storage-mon_validate;;
meta-data) meta_data;;
usage|help) storage-mon_usage $OCF_SUCCESS;;
*) storage-mon_usage $OCF_ERR_UNIMPLEMENTED;;
esac
rc=$?
ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
exit $rc
# vim: set filetype=sh:

File Metadata

Mime Type
text/x-diff
Expires
Tue, Jul 8, 6:43 PM (1 h, 48 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1968805
Default Alt Text
(8 KB)

Event Timeline