diff --git a/heartbeat/CTDB b/heartbeat/CTDB index bb0164671..62cc97d24 100755 --- a/heartbeat/CTDB +++ b/heartbeat/CTDB @@ -1,478 +1,477 @@ #!/bin/sh # # OCF Resource Agent for managing CTDB # # Copyright (c) 2009-2010 Novell Inc., Tim Serong # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # # TODO: # - Verify timeouts are sane # - Monitor differentiate between error and not running? # - Do we need to verify globally unique setting? # - Should set CTDB_NODES to ${HA_RSCTMP}/ctdb (generated based on # current nodes) # - Be more clever about monitor op: # ctdb --socket=/tmp/ctdb.socket status # Number of nodes:2 # pnn:0 192.168.101.14 DISABLED (THIS NODE) # pnn:1 192.168.101.15 DISABLED # Generation:665993634 # Size:2 # hash:0 lmaster:0 # hash:1 lmaster:1 # Recovery mode:NORMAL (0) # Recovery master:1 # ^ if this says pnn:0...DISABLED|UNHEALTHY, there is a problem, # e.g. ctdb socket not specified in smb.conf. # - Lots of "No public addresses file found. Nothing to do for # 10.interfaces" junk in ctdb log file. Can we fix/suppress this? # - Look at enabling set_ctdb_variables() if necessary. # - Probably possible for sysconfig file to not be restored if # CTDB dies unexpectedly. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/resource.d/heartbeat} . ${OCF_FUNCTIONS_DIR}/.ocf-shellfuncs ####################################################################### # Default parameter values: : ${OCF_RESKEY_ctdb_config_dir:=/etc/ctdb} : ${OCF_RESKEY_ctdb_binary:=/usr/bin/ctdb} : ${OCF_RESKEY_ctdbd_binary:=/usr/sbin/ctdbd} : ${OCF_RESKEY_ctdb_socket:=/var/lib/ctdb/ctdb.socket} : ${OCF_RESKEY_ctdb_dbdir:=/var/lib/ctdb} : ${OCF_RESKEY_ctdb_logfile:=/var/log/ctdb/log.ctdb} : ${OCF_RESKEY_ctdb_debuglevel:=2} : ${OCF_RESKEY_smb_conf:=/etc/samba/smb.conf} ####################################################################### meta_data() { cat < 1.0 This resource agent manages CTDB, allowing one to use Clustered Samba in a Linux-HA/Pacemaker cluster. You need a shared filesystem (e.g. OCFS2) on which the CTDB lock will be stored. Configure shares in smb.conf on all nodes, and create /etc/ctdb/nodes containing a list of private IP addresses of each node in the cluster. Configure this RA as a clone, and it will take care of the rest. For more information see http://linux-ha.org/wiki/CTDB_(resource_agent) CTDB Resource Agent The location of a shared lock file, common across all nodes. This must be on shared storage, e.g.: /shared-fs/samba/ctdb.lock CTDB shared lock file The directory for smbd to use for storing such files as smbpasswd and secrets.tdb. Old versions of CTBD (prior to 1.0.50) required this to be on shared storage. This parameter should not be set for current versions of CTDB, and only remains in the RA for backwards compatibility. Samba private dir (deprecated) The directory containing various CTDB configuration files. The "nodes" and "notify.sh" scripts are expected to be in this directory, as is the "events.d" subdirectory. CTDB config file directory Full path to the CTDB binary. CTDB binary path Full path to the CTDB cluster daemon binary. CTDB Daemon binary path Full path to the domain socket that ctdbd will create, used for local clients to attach and communicate with the ctdb daemon. CTDB socket location The directory to put the local CTDB database files in. Persistent database files will be put in ctdb_dbdir/persistent. CTDB database directory Full path to log file. To log to syslog instead, use the value "syslog". CTDB log file location What debug level to run at (0-10). Higher means more verbose. CTDB debug level Path to default samba config file. Path to smb.conf END } ####################################################################### # Figure out path to /etc/sysconfig/ctdb (same logic as # loadconfig() from /etc/ctdb/functions if [ -f /etc/sysconfig/ctdb ]; then CTDB_SYSCONFIG=/etc/sysconfig/ctdb elif [ -f /etc/default/ctdb ]; then CTDB_SYSCONFIG=/etc/default/ctdb elif [ -f $OCF_RESKEY_ctdb_config_dir/ctdb ]; then CTDB_SYSCONFIG=$OCF_RESKEY_ctdb_config_dir/ctdb fi # Backup paths CTDB_SYSCONFIG_BACKUP=${HA_RSCTMP}/ctdb-${OCF_RESOURCE_INSTANCE} # This function has no effect (currently no way to set CTDB_SET_*) # but remains here in case we need it in future. set_ctdb_variables() { rv=$OCF_SUCCESS set | grep ^CTDB_SET_ | cut -d_ -f3- | while read v; do varname=`echo $v | cut -d= -f1` value=`echo $v | cut -d= -f2` $OCF_RESKEY_ctdb_binary --socket=$OCF_RESKEY_ctdb_socket setvar $varname $value || rv=$OCF_ERR_GENERIC done || rv=$OCF_ERR_GENERIC return $rv } # Add necessary settings to /etc/samba/smb.conf. In a perfect world, # we'd be able to generate a new, temporary, smb.conf file somewhere, # something like: # include = /etc/samba/smb.conf # [global] # clustering = yes # # ...etc... # Unfortunately, we can't do this, because there's no way to tell the # smb init script where the temporary config is, so we just edit # the default config file. init_smb_conf() { local private_dir [ -n "$OCF_RESKEY_smb_private_dir" ] && private_dir="\tprivate dir = $OCF_RESKEY_smb_private_dir\n" grep -Eiv \ '^[[:space:]]*(# CTDB-RA:|passdb backend|clustering|idmap backend|private dir|ctdbd socket)' \ $OCF_RESKEY_smb_conf | sed "/^[[:space:]]*\[global\]/ a\\ \t# CTDB-RA: Begin auto-generated section (do not change below)\n\ \tpassdb backend = tdbsam\n\ \tclustering = yes\n\ \tidmap backend = tdb2\n\ \tctdbd socket = $OCF_RESKEY_ctdb_socket\n$private_dir\ \t# CTDB-RA: End auto-generated section (do not change above)" > $OCF_RESKEY_smb_conf.$$ mv -f $OCF_RESKEY_smb_conf.$$ $OCF_RESKEY_smb_conf } # Get rid of that section we added cleanup_smb_conf() { sed '/# CTDB-RA: Begin/,/# CTDB-RA: End/d' $OCF_RESKEY_smb_conf > $OCF_RESKEY_smb_conf.$$ mv -f $OCF_RESKEY_smb_conf.$$ $OCF_RESKEY_smb_conf } # Save current CTDB config file and generate a new, minimal version # that is just enough to get Samba running. save_ctdb_sysconfig() { # If one of our auto-generated config files is already present, return immediately grep -qa '# CTDB-RA: Auto-generated' $CTDB_SYSCONFIG && return # Otherwise, backup... cp -p $CTDB_SYSCONFIG $CTDB_SYSCONFIG_BACKUP if [ $? -eq 0 ]; then ocf_log info "Saved $CTDB_SYSCONFIG to $CTDB_SYSCONFIG_BACKUP, generating new runtime $CTDB_SYSCONFIG" # ...and generate cat >$CTDB_SYSCONFIG </dev/null for pdbase in $(ls $persistent_db_dir/*.tdb.[0-9] 2>/dev/null$) ; do /usr/bin/tdbdump $pdbase >/dev/null 2>/dev/null || { ocf_log err "Persistent database $pdbase is corrupted! CTDB will not start." return $OCF_ERR_GENERIC } done # Add necessary configuration to smb.conf init_smb_conf if [ $? -ne 0 ]; then ocf_log err "Failed to update $OCF_RESKEY_smb_conf." return $OCF_ERR_GENERIC fi # Save sysconfig (we're going to generate a minimal one # in place of what's there) save_ctdb_sysconfig # Use logfile by default, or syslog if asked for log_option="--logfile=$OCF_RESKEY_ctdb_logfile" [ "$OCF_RESKEY_ctdb_logfile" = "syslog" ] && log_option="--syslog" # Start her up $OCF_RESKEY_ctdbd_binary \ --reclock=$OCF_RESKEY_ctdb_recovery_lock \ --nlist=$OCF_RESKEY_ctdb_config_dir/nodes \ --socket=$OCF_RESKEY_ctdb_socket \ --dbdir=$OCF_RESKEY_ctdb_dbdir \ --dbdir-persistent=$OCF_RESKEY_ctdb_dbdir/persistent \ --event-script-dir=$OCF_RESKEY_ctdb_config_dir/events.d \ --notification-script=$OCF_RESKEY_ctdb_config_dir/notify.sh \ --transport=tcp \ --start-as-disabled \ $log_option \ -d $OCF_RESKEY_ctdb_debuglevel if [ $? -ne 0 ]; then # restore sysconfig & cleanup smb.conf restore_ctdb_sysconfig cleanup_smb_conf ocf_log err "Failed to execute $OCF_RESKEY_ctdbd_binary." return $OCF_ERR_GENERIC else # Wait a bit for CTDB to stabilize - # (can be broken if, e.g. ctdb socket wrong - # or not specified in smb.conf) - for i in `seq 30`; do + # (until start times out if necessary) + while true; do # Initial sleep is intentional (ctdb init script # has sleep after ctdbd start, but before invoking # ctdb to talk to it) sleep 1 status=$($OCF_RESKEY_ctdb_binary --socket=$OCF_RESKEY_ctdb_socket status 2>/dev/null) if [ $? -ne 0 ]; then # CTDB will be running, kill it before returning ctdb_stop ocf_log err "Can't invoke $OCF_RESKEY_ctdb_binary --socket=$OCF_RESKEY_ctdb_socket status" return $OCF_ERR_GENERIC fi if ! echo $status | grep -qs 'UNHEALTHY (THIS'; then # Status does not say this node is unhealthy, # so we're good to go - set up any extra # variables and (hopefully) return success set_ctdb_variables return $? fi done fi # ctdbd will (or can) actually still be running at this point, so kill it ctdb_stop ocf_log err "Timeout waiting for CTDB to stabilize" return $OCF_ERR_GENERIC } ctdb_stop() { # Do nothing if already stopped pkill -0 -f $OCF_RESKEY_ctdbd_binary || return $OCF_SUCCESS # Tell it to die nicely $OCF_RESKEY_ctdb_binary --socket=$OCF_RESKEY_ctdb_socket shutdown >/dev/null 2>&1 rv=$? # No more Mr. Nice Guy count=0 while pkill -0 -f $OCF_RESKEY_ctdbd_binary ; do sleep 1 count=$(($count + 1)) [ $count -gt 10 ] && { ocf_log info "killing ctdbd " pkill -9 -f $OCF_RESKEY_ctdbd_binary pkill -9 -f ${OCF_RESKEY_ctdb_config_dir}/events.d/ } done # Restore saved sysconfig & cleanup smb.conf restore_ctdb_sysconfig cleanup_smb_conf # Be paranoid about return codes [ $rv -eq $OCF_SUCCESS ] && return $OCF_SUCCESS return $OCF_ERR_GENERIC } ctdb_monitor() { $OCF_RESKEY_ctdb_binary --socket=$OCF_RESKEY_ctdb_socket ping > /dev/null 2>&1 && return $OCF_SUCCESS return $OCF_NOT_RUNNING } ctdb_validate() { if [ -z "$CTDB_SYSCONFIG" ]; then ocf_log err "Can't find CTDB config file (expecting /etc/sysconfig/ctdb, /etc/default/ctdb or similar)" return $OCF_ERR_INSTALLED fi if [ ! -f "$OCF_RESKEY_smb_conf" ]; then ocf_log err "Samba config file '$OCF_RESKEY_smb_conf' does not exist." return $OCF_ERR_INSTALLED fi if [ -f "${OCF_RESKEY_ctdb_config_dir}/public_addresses" ]; then ocf_log warn "CTDB file '${OCF_RESKEY_ctdb_config_dir}/public_addresses' exists - CTDB will try to manage IP failover!" fi if [ -z "$OCF_RESKEY_ctdb_recovery_lock" ]; then ocf_log err "ctdb_recovery_lock not specified." return $OCF_ERR_ARGS fi lock_dir=$(dirname "$OCF_RESKEY_ctdb_recovery_lock") touch "$lock_dir/$$" 2>/dev/null if [ $? != 0 ]; then ocf_log err "Directory for lock file '$OCF_RESKEY_ctdb_recovery_lock' does not exist, or is not writable." return $OCF_ERR_ARGS fi rm "$lock_dir/$$" return $OCF_SUCCESS } case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; start) ctdb_start;; stop) ctdb_stop;; monitor) ctdb_monitor;; validate-all) ctdb_validate;; usage|help) ctdb_usage exit $OCF_SUCCESS ;; *) ctdb_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc