diff --git a/extra/resources/controld b/extra/resources/controld index a0eb8f2d68..7d7531964f 100644 --- a/extra/resources/controld +++ b/extra/resources/controld @@ -1,309 +1,313 @@ #!/bin/sh # # Resource Agent for managing the DLM controld process. # # Copyright (c) 2009 Novell, Inc # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS=${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs} . ${OCF_FUNCTIONS} : ${__OCF_ACTION=$1} ####################################################################### if [ -e "$OCF_ROOT/resource.d/heartbeat/controld" ]; then ocf_log info "Using heartbeat controld agent" $OCF_ROOT/resource.d/heartbeat/controld $1 exit $? fi meta_data() { cat < 1.0 This Resource Agent can control the dlm_controld services needed by cluster-aware file systems. It assumes that dlm_controld is in your default PATH. In most cases, it should be run as an anonymous clone. DLM Agent for cluster file systems Any additional options to start the dlm_controld service with DLM Options The location where configfs is or should be mounted Location of configfs The daemon to start - supports gfs_controld(.pcmk) and dlm_controld(.pcmk) The daemon to start Allow DLM start-up even if STONITH/fencing is disabled in the cluster. Setting this option to true will cause cluster malfunction and hangs on fail-over for DLM clients that require fencing (such as GFS2, OCFS2, and cLVM2). This option is advanced use only. Allow start-up even without STONITH/fencing END } ####################################################################### DLM_SYSFS_DIR="/sys/kernel/dlm" controld_usage() { cat <&1) if [ $? -eq 0 ]; then if [ -n "$tmp" ]; then ocf_log err "Uncontrolled lockspace exists, system must reboot. Executing suicide fencing" stonith_admin --reboot=$(crm_node -n) --tag controld exit $OCF_ERR_GENERIC fi fi } controld_start() { controld_monitor; rc=$? case $rc in $OCF_SUCCESS) return $OCF_SUCCESS;; $OCF_NOT_RUNNING) ;; *) return $OCF_ERR_GENERIC;; esac if [ ! -e $OCF_RESKEY_configdir ]; then modprobe configfs if [ ! -e $OCF_RESKEY_configdir ]; then ocf_log err "$OCF_RESKEY_configdir not available" return $OCF_ERR_INSTALLED fi fi mount | grep "type configfs" > /dev/null if [ $? != 0 ]; then mount -t configfs none $OCF_RESKEY_configdir fi if [ ! -e $OCF_RESKEY_configdir/dlm ]; then modprobe dlm if [ ! -e $OCF_RESKEY_configdir/dlm ]; then ocf_log err "$OCF_RESKEY_configdir/dlm not available" return $OCF_ERR_INSTALLED fi fi if ! ocf_is_true "$OCF_RESKEY_allow_stonith_disabled" && \ ! ocf_is_true "`crm_attribute --type=crm_config --name=stonith-enabled --query --quiet --default=true`"; then ocf_log err "The cluster property stonith-enabled may not be deactivated to use the DLM" return $OCF_ERR_CONFIGURED fi ${OCF_RESKEY_daemon} $OCF_RESKEY_args while true do sleep 1 controld_monitor; rc=$? case $rc in $OCF_SUCCESS) local addr_list addr_list="$(cat /sys/kernel/config/dlm/cluster/comms/*/addr_list 2>/dev/null)" if [ $? -eq 0 ] && [ -n "$addr_list" ]; then return $OCF_SUCCESS fi ;; $OCF_NOT_RUNNING) return $OCF_NOT_RUNNING ;; *) return $OCF_ERR_GENERIC ;; esac ocf_log debug "Waiting for ${OCF_RESKEY_daemon} to be ready" done } controld_stop() { controld_monitor; rc=$? if [ $rc = $OCF_NOT_RUNNING ]; then return $OCF_SUCCESS fi killall -TERM ${OCF_RESKEY_daemon}; rc=$? if [ $rc != 0 ]; then return $OCF_ERR_GENERIC fi rc=$OCF_SUCCESS while [ $rc = $OCF_SUCCESS ]; do controld_monitor; rc=$? sleep 1 done if [ $rc = $OCF_NOT_RUNNING ]; then rc=$OCF_SUCCESS fi return $rc } controld_monitor() { local rc killall -0 ${OCF_RESKEY_daemon} >/dev/null 2>&1 ; rc=$? case $rc in - 0) dlm_tool ls | grep -q "wait fencing" - if [ $? -eq 0 ]; then + 0) smw=$(dlm_tool status -v | grep "stateful_merge_wait=" | cut -d= -f2) + if [ $smw -eq 1 ]; then + ocf_log err "DLM status is: stateful_merge_wait" + rc=$OCF_ERR_GENERIC + elif [ -z "$smw" ] && dlm_tool ls | grep -q "wait fencing" && \ + ! stonith_admin -H -V | grep -q "wishes to"; then ocf_log err "DLM status is: wait fencing" rc=$OCF_ERR_GENERIC else rc=$OCF_SUCCESS fi ;; 1) rc=$OCF_NOT_RUNNING;; *) rc=$OCF_ERR_GENERIC;; esac # if the dlm is not successfully running, but # dlm lockspace bits are left over, we self must fence. if [ $rc -ne $OCF_SUCCESS ]; then check_uncontrolled_locks fi return $rc } controld_validate() { check_binary killall check_binary ${OCF_RESKEY_daemon} case ${OCF_RESKEY_CRM_meta_globally_unique} in yes|Yes|true|True|1) ocf_log err "$OCF_RESOURCE_INSTANCE must be configured with the globally_unique=false meta attribute" exit $OCF_ERR_CONFIGURED ;; esac [ -d /var/run/cluster ] || mkdir /var/run/cluster return $OCF_SUCCESS } : ${OCF_RESKEY_sctp=false} : ${OCF_RESKEY_configdir=/sys/kernel/config} : ${OCF_RESKEY_CRM_meta_globally_unique:="false"} case "$HA_quorum_type" in pcmk) daemon_ext=".pcmk";; *) daemon_ext="";; esac case "$OCF_RESOURCE_INSTANCE" in *[gG][fF][sS]*) : ${OCF_RESKEY_args=-g 0} : ${OCF_RESKEY_daemon=gfs_controld${daemon_ext}} ;; *[dD][lL][mM]*) : ${OCF_RESKEY_args=-s 0} : ${OCF_RESKEY_daemon=dlm_controld${daemon_ext}} ;; *) : ${OCF_RESKEY_args=-s 0} : ${OCF_RESKEY_daemon=dlm_controld${daemon_ext}} esac case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; start) controld_validate; controld_start;; stop) controld_stop;; monitor) controld_validate; controld_monitor;; validate-all) controld_validate;; usage|help) controld_usage exit $OCF_SUCCESS ;; *) controld_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? exit $rc