diff --git a/heartbeat/nfsserver b/heartbeat/nfsserver index 3cec5c8e7..4b6767bfb 100755 --- a/heartbeat/nfsserver +++ b/heartbeat/nfsserver @@ -1,660 +1,692 @@ #!/bin/sh # nfsserver # # Description: Manages nfs server as OCF resource # by hxinwei@gmail.com # License: GNU General Public License v2 (GPLv2) and later if [ -n "$OCF_DEBUG_LIBRARY" ]; then . $OCF_DEBUG_LIBRARY else : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs fi if is_redhat_based; then . ${OCF_FUNCTIONS_DIR}/nfsserver-redhat.sh fi DEFAULT_INIT_SCRIPT="/etc/init.d/nfsserver" if ! [ -f $DEFAULT_INIT_SCRIPT ]; then # On some systems, the script is just called nfs DEFAULT_INIT_SCRIPT="/etc/init.d/nfs" fi DEFAULT_NOTIFY_CMD=`which sm-notify` DEFAULT_NOTIFY_CMD=${DEFAULT_NOTIFY_CMD:-"/sbin/sm-notify"} DEFAULT_NOTIFY_FOREGROUND="false" DEFAULT_RPCPIPEFS_DIR="/var/lib/nfs/rpc_pipefs" EXEC_MODE=0 SELINUX_ENABLED=-1 STATD_PATH="/var/lib/nfs" STATD_DIR="" nfsserver_meta_data() { cat < 1.0 Nfsserver helps to manage the Linux nfs server as a failover-able resource in Linux-HA. It depends on Linux specific NFS implementation details, so is considered not portable to other platforms yet. Manages an NFS server The default init script shipped with the Linux distro. The nfsserver resource agent offloads the start/stop/monitor work to the init script because the procedure to start/stop/monitor nfsserver varies on different Linux distro. In the event that this option is not set, this agent will attempt to use an init script at this location, ${DEFAULT_INIT_SCRIPT}, or detect a systemd unit-file to use in the event that no init script is detected. Init script for nfsserver Do not send reboot notifications to NFSv3 clients during server startup. Disable NFSv3 server reboot notifications Keeps the sm-notify attached to its controlling terminal and running in the foreground. Keeps the notify tool running in the foreground. Specifies the length of sm-notify retry time, in minutes, to continue retrying notifications to unresponsive hosts. If this option is not specified, sm-notify attempts to send notifications for 15 minutes. Specifying a value of 0 causes sm-notify to continue sending notifications to unresponsive peers until it is manually killed. Specifies the length of sm-notify retry time (minutes). Comma separated list of floating IP addresses used to access the nfs service IP addresses. The nfsserver resource agent will save nfs related information in this specific directory. And this directory must be able to fail-over before nfsserver itself. Directory to store nfs server related information. The mount point for the sunrpc file system. Default is $DEFAULT_RPCPIPEFS_DIR. This script will mount (bind) nfs_shared_infodir on /var/lib/nfs/ (cannot be changed), and this script will mount the sunrpc file system on $DEFAULT_RPCPIPEFS_DIR (default, can be changed by this parameter). If you want to move only rpc_pipefs/ (e.g. to keep rpc_pipefs/ local) from default, please set this value. The mount point for the sunrpc file system. $( is_redhat_based && nfsserver_redhat_meta_data ) END return $OCF_SUCCESS } nfsserver_usage() { cat < /dev/null 2>&1 && selinuxenabled SELINUX_ENABLED=$? if [ $SELINUX_ENABLED -eq 0 ]; then export SELINUX_LABEL="$(ls -ldZ $STATD_PATH | cut -f4 -d' ')" fi ## # EXEC_MODE values # 1 user init script or default init script # 2 systemd (with nfs-lock.service) # 3 systemd (with rpc-statd.service) # # On error, this function will terminate the process # with error code $OCF_ERR_INSTALLED ## set_exec_mode() { ## # If EXEC_MODE is already set, we don't need to run this function again. ## if [ $EXEC_MODE -ne 0 ]; then return 0; fi ## # If the user defined an init script, It must exist for us to continue ## if [ -n "$OCF_RESKEY_nfs_init_script" ]; then # check_binary will exit the process if init script does not exist check_binary ${OCF_RESKEY_nfs_init_script} EXEC_MODE=1 return 0 fi ## # Check to see if the default init script exists, if so we'll use that. ## if which $DEFAULT_INIT_SCRIPT > /dev/null 2>&1; then OCF_RESKEY_nfs_init_script=$DEFAULT_INIT_SCRIPT EXEC_MODE=1 return 0 fi ## # Attempt systemd (with nfs-lock.service). ## if which systemctl > /dev/null 2>&1; then if systemctl list-unit-files | grep nfs-server > /dev/null && systemctl list-unit-files | grep nfs-lock > /dev/null; then EXEC_MODE=2 # when using systemd, the nfs-lock service file handles nfsv3 locking daemons for us. return 0 fi fi ## # Attempt systemd (with rpc-statd.service). ## if which systemctl > /dev/null 2>&1; then if systemctl list-unit-files | grep nfs-server > /dev/null && systemctl list-unit-files | grep rpc-statd > /dev/null; then EXEC_MODE=3 return 0 fi fi ocf_exit_reason "No init script or systemd unit file detected for nfs server" exit $OCF_ERR_INSTALLED } ## # wrapper for init script and systemd calls. ## nfs_exec() { local cmd=$1 set_exec_mode case $EXEC_MODE in 1) ${OCF_RESKEY_nfs_init_script} $cmd;; 2) systemctl $cmd nfs-server.service ;; 3) systemctl $cmd nfs-server.service ;; esac } v3locking_exec() { local cmd=$1 set_exec_mode if [ $EXEC_MODE -eq 2 ]; then systemctl $cmd nfs-lock.service elif [ $EXEC_MODE -eq 3 ]; then systemctl $cmd rpc-statd.service else case $cmd in start) locking_start;; stop) locking_stop;; status) locking_status;; esac fi } +nfsserver_systemd_monitor() +{ + local threads_num + local rc + + nfs_exec is-active + rc=$? + + # Now systemctl is-active can't detect the failure of kernel process like nfsd. + # So, if the return value of systemctl is-active is 0, check the threads number + # to make sure the process is running really. + # /proc/fs/nfsd/threads has the numbers of the nfsd threads. + if [ $rc -eq 0 ]; then + threads_num=`cat /proc/fs/nfsd/threads 2>/dev/null` + if [ $? -eq 0 ]; then + if [ $threads_num -gt 0 ]; then + return $OCF_SUCCESS + else + return 3 + fi + else + return $OCF_ERR_GENERIC + fi + fi + + return $rc +} + nfsserver_monitor () { + set_exec_mode fn=`mktemp` - nfs_exec status > $fn 2>&1 + case $EXEC_MODE in + 1) nfs_exec status > $fn 2>&1;; + [23]) nfsserver_systemd_monitor > $fn 2>&1;; + esac rc=$? ocf_log debug "$(cat $fn)" rm -f $fn #Adapte LSB status code to OCF return code if [ $rc -eq 0 ]; then # don't report success if nfs servers are up # without locking daemons. v3locking_exec "status" rc=$? if [ $rc -ne 0 ]; then ocf_exit_reason "NFS server is up, but the locking daemons are down" rc=$OCF_ERR_GENERIC fi return $rc elif [ $rc -eq 3 ]; then return $OCF_NOT_RUNNING else return $OCF_ERR_GENERIC fi } prepare_directory () { if [ -z "$fp" ]; then return fi [ -d "$fp" ] || mkdir -p $fp [ -d "$rpcpipefs_make_dir" ] || mkdir -p $rpcpipefs_make_dir [ -d "$fp/v4recovery" ] || mkdir -p $fp/v4recovery [ -d "$fp/$STATD_DIR" ] || mkdir -p "$fp/$STATD_DIR" [ -d "$fp/$STATD_DIR/sm" ] || mkdir -p "$fp/$STATD_DIR/sm" [ -d "$fp/$STATD_DIR/sm.ha" ] || mkdir -p "$fp/$STATD_DIR/sm.ha" [ -d "$fp/$STATD_DIR/sm.bak" ] || mkdir -p "$fp/$STATD_DIR/sm.bak" [ -n "`id -u rpcuser 2>/dev/null`" -a "`id -g rpcuser 2>/dev/null`" ] && chown -R rpcuser.rpcuser "$fp/$STATD_DIR" [ -f "$fp/etab" ] || touch "$fp/etab" [ -f "$fp/xtab" ] || touch "$fp/xtab" [ -f "$fp/rmtab" ] || touch "$fp/rmtab" dd if=/dev/urandom of=$fp/$STATD_DIR/state bs=1 count=4 >/dev/null 2>&1 [ -n "`id -u rpcuser 2>/dev/null`" -a "`id -g rpcuser 2>/dev/null`" ] && chown rpcuser.rpcuser "$fp/$STATD_DIR/state" [ $SELINUX_ENABLED -eq 0 ] && chcon -R "$SELINUX_LABEL" "$fp" } is_bound () { if mount | grep -q "on $1 type"; then return 0 fi return 1 } bind_tree () { if [ -z "$fp" ]; then return fi if is_bound /var/lib/nfs; then ocf_log debug "$fp is already bound to /var/lib/nfs" return 0 fi mount --bind $fp /var/lib/nfs [ $SELINUX_ENABLED -eq 0 ] && restorecon /var/lib/nfs } unbind_tree () { if `mount | grep -q " on $rpcpipefs_umount_dir"`; then umount -t rpc_pipefs $rpcpipefs_umount_dir fi if is_bound /var/lib/nfs; then umount /var/lib/nfs fi } binary_status() { local binary=$1 local pid pid=$(pgrep ${binary}) case $? in 0) echo "$pid" return $OCF_SUCCESS;; 1) return $OCF_NOT_RUNNING;; *) return $OCF_ERR_GENERIC;; esac } locking_status() { binary_status "rpc.statd" > /dev/null 2>&1 } locking_start() { local ret=$OCF_SUCCESS ocf_log info "Starting rpc.statd." rpc.statd $STATDARG ret=$? if [ $ret -ne 0 ]; then ocf_log err "Failed to start rpc.statd" return $ret fi [ -d /var/lock/subsys ] && touch /var/lock/subsys/nfslock return $ret } terminate() { local pids local i=0 while : ; do pids=$(binary_status $1) [ -z "$pids" ] && return 0 kill $pids sleep 1 i=$((i + 1)) [ $i -gt 3 ] && return 1 done } killkill() { local pids local i=0 while : ; do pids=$(binary_status $1) [ -z "$pids" ] && return 0 kill -9 $pids sleep 1 i=$((i + 1)) [ $i -gt 3 ] && return 1 done } stop_process() { local process=$1 ocf_log info "Stopping $process" if terminate $process; then ocf_log debug "$process is stopped" else if killkill $process; then ocf_log debug "$process is stopped" else ocf_log debug "Failed to stop $process" return 1 fi fi return 0 } locking_stop() { ret=0 # sm-notify can prevent umount of /var/lib/nfs/statd if # it is still trying to notify unresponsive clients. stop_process sm-notify if [ $? -ne 0 ]; then ret=$OCF_ERR_GENERIC fi stop_process rpc.statd if [ $? -ne 0 ]; then ret=$OCF_ERR_GENERIC fi return $ret } notify_locks() { if ocf_is_true "$OCF_RESKEY_nfs_no_notify"; then # we've been asked not to notify clients return; fi # run in foreground, if requested if ocf_is_true "$OCF_RESKEY_nfs_notify_foreground"; then opts="-d" fi if [ -n "$OCF_RESKEY_nfs_smnotify_retry_time" ]; then opts="$opts -m $OCF_RESKEY_nfs_smnotify_retry_time" fi if [ -n "$OCF_RESKEY_statd_outgoing_port" ]; then opts="$opts -p $OCF_RESKEY_statd_outgoing_port" fi # forces re-notificaiton regardless if notifies have already gone out opts="$opts -f" ocf_log info "executing sm-notify" if [ -n "$OCF_RESKEY_nfs_ip" ]; then for ip in `echo ${OCF_RESKEY_nfs_ip} | sed 's/,/ /g'`; do cp -rpfn $STATD_PATH/sm.ha/* $STATD_PATH/ > /dev/null 2>&1 sm-notify $opts -v $ip done else sm-notify $opts fi } nfsserver_start () { local rc; if nfsserver_monitor; then ocf_log debug "NFS server is already started" return $OCF_SUCCESS fi is_redhat_based && set_env_args prepare_directory bind_tree # remove the sm-notify pid so sm-notify will be allowed to run again without requiring a reboot. rm -f /var/run/sm-notify.pid # # Synchronize these before starting statd # cp -rpfn $STATD_PATH/sm.ha/* $STATD_PATH/ > /dev/null 2>&1 rm -rf $STATD_PATH/sm.ha/* > /dev/null 2>&1 cp -rpf $STATD_PATH/sm $STATD_PATH/sm.bak /var/lib/nfs/state $STATD_PATH/sm.ha > /dev/null 2>&1 ocf_log info "Starting NFS server ..." # mounts /proc/fs/nfsd for us lsmod | grep -q nfsd if [ $? -ne 0 ]; then modprobe nfsd fi # check to see if we need to start rpc.statd v3locking_exec "status" if [ $? -ne $OCF_SUCCESS ]; then v3locking_exec "start" rc=$? if [ $rc -ne 0 ]; then ocf_exit_reason "Failed to start NFS server locking daemons" return $rc fi else ocf_log info "rpc.statd already up" fi fn=`mktemp` nfs_exec start > $fn 2>&1 rc=$? ocf_log debug "$(cat $fn)" rm -f $fn if [ $rc -ne 0 ]; then ocf_exit_reason "Failed to start NFS server" return $rc fi notify_locks ocf_log info "NFS server started" return $OCF_SUCCESS } nfsserver_stop () { ocf_log info "Stopping NFS server ..." # backup the current sm state information to the ha folder before stopping. # the ha folder will be synced after startup, restoring the statd client state rm -rf $STATD_PATH/sm.ha/* > /dev/null 2>&1 cp -rpf $STATD_PATH/sm $STATD_PATH/sm.bak /var/lib/nfs/state $STATD_PATH/sm.ha > /dev/null 2>&1 fn=`mktemp` nfs_exec stop > $fn 2>&1 rc=$? ocf_log debug "$(cat $fn)" rm -f $fn v3locking_exec "stop" if [ $? -ne 0 ]; then ocf_exit_reason "Failed to stop NFS locking daemons" rc=$OCF_ERR_GENERIC fi if [ $rc -eq 0 ]; then unbind_tree ocf_log info "NFS server stopped" else ocf_exit_reason "Failed to stop NFS server" fi return $rc } nfsserver_validate () { ## # set_exec_mode will exit if nfs server is not installed ## set_exec_mode check_binary ${OCF_RESKEY_nfs_notify_cmd} if [ -n "$OCF_RESKEY_CRM_meta_clone" ] && [ -n "$OCF_RESKEY_nfs_shared_infodir" ]; then ocf_exit_reason "This RA does not support clone mode when a shared info directory is in use." exit $OCF_ERR_CONFIGURED fi if [ -n "$OCF_RESKEY_nfs_smnotify_retry_time" ]; then if ! ocf_is_decimal "$OCF_RESKEY_nfs_smnotify_retry_time"; then ocf_exit_reason "Invalid nfs_smnotify_retry_time [$OCF_RESKEY_nfs_smnotify_retry_time]" exit $OCF_ERR_CONFIGURED fi fi case ${OCF_RESKEY_nfs_notify_cmd##*/} in sm-notify|rpc.statd) ;; *) ocf_exit_reason "Invalid nfs_notify_cmd [$OCF_RESKEY_nfs_notify_cmd]" exit $OCF_ERR_CONFIGURED ;; esac return $OCF_SUCCESS } nfsserver_validate case $__OCF_ACTION in start) nfsserver_start ;; stop) nfsserver_stop ;; monitor) nfsserver_monitor ;; validate-all) exit $OCF_SUCCESS ;; *) nfsserver_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac