diff --git a/heartbeat/CTDB b/heartbeat/CTDB index 0355a48d7..322a355be 100755 --- a/heartbeat/CTDB +++ b/heartbeat/CTDB @@ -1,730 +1,736 @@ #!/bin/sh # # OCF Resource Agent for managing CTDB # # Copyright (c) 2009-2010 Novell Inc., Tim Serong # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # # # OVERVIEW # # When run by itself, CTDB can handle IP failover and includes scripts # to manage various services (Samba, Winbind, HTTP, etc.). When run as # a resource in a Pacemaker cluster, this additional functionality # should not be used; instead one should define separate resources for # CTDB, Samba, Winbind, IP addresses, etc. # # As of 2010-11-17, there is no separate OCF Samba or Winbind RA, so # it is still possible to configure CTDB so that it manages these # resources itself. In future, once Samba and Winbind RAs are # available, this ability will be deprecated and ultimately removed. # # This RA intentionally provides no ability to configure CTDB such that # it manages IP failover, HTTP, NFS, etc. # # # TODO: # - ctdb_stop doesn't really support multiple independent CTDB instances, # unless they're running from distinct ctdbd binaries (it uses pkill # $OCF_RESKEY_ctdbd_binary if "ctdb stop" doesn't work, which it might # not under heavy load - this will kill all ctdbd instances on the # system). OTOH, running multiple CTDB instances per node is, well, # AFAIK, completely crazy. Can't run more than one in a vanilla CTDB # cluster, with the CTDB init script. So it might be nice to address # this for complete semantic correctness of the RA, but shouldn't # actually cause any trouble in real life. # - As much as possible, get rid of auto config generation # - Especially smb.conf # - Verify timeouts are sane # - Monitor differentiate between error and not running? # - Do we need to verify globally unique setting? # - Should set CTDB_NODES to ${HA_RSCTMP}/ctdb (generated based on # current nodes) # - Look at enabling set_ctdb_variables() if necessary. # - Probably possible for sysconfig file to not be restored if # CTDB dies unexpectedly. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### # Default parameter values: # Some distro's ctdb package stores the persistent db in /var/lib/ctdb, # others store in /var/ctdb. This attempts to detect the correct default # directory. var_prefix="/var/lib/ctdb" if [ ! -d "$var_prefix" ] && [ -d "/var/ctdb" ]; then var_prefix="/var/ctdb" fi : ${OCF_RESKEY_ctdb_manages_samba:=no} : ${OCF_RESKEY_ctdb_manages_winbind:=no} : ${OCF_RESKEY_ctdb_service_smb:=""} : ${OCF_RESKEY_ctdb_service_nmb:=""} : ${OCF_RESKEY_ctdb_service_winbind:=""} : ${OCF_RESKEY_ctdb_samba_skip_share_check:=yes} : ${OCF_RESKEY_ctdb_monitor_free_memory:=100} : ${OCF_RESKEY_ctdb_start_as_disabled:=no} : ${OCF_RESKEY_ctdb_config_dir:=/etc/ctdb} : ${OCF_RESKEY_ctdb_binary:=/usr/bin/ctdb} : ${OCF_RESKEY_ctdbd_binary:=/usr/sbin/ctdbd} : ${OCF_RESKEY_ctdb_socket:=${var_prefix}/ctdb.socket} : ${OCF_RESKEY_ctdb_dbdir:=${var_prefix}} : ${OCF_RESKEY_ctdb_logfile:=/var/log/ctdb/log.ctdb} : ${OCF_RESKEY_ctdb_debuglevel:=2} : ${OCF_RESKEY_smb_conf:=/etc/samba/smb.conf} : ${OCF_RESKEY_smb_passdb_backend:=tdbsam} : ${OCF_RESKEY_smb_idmap_backend:=tdb2} ####################################################################### meta_data() { cat < 1.0 This resource agent manages CTDB, allowing one to use Clustered Samba in a Linux-HA/Pacemaker cluster. You need a shared filesystem (e.g. OCFS2) on which the CTDB lock will be stored. Create /etc/ctdb/nodes containing a list of private IP addresses of each node in the cluster, then configure this RA as a clone. To have CTDB manage Samba, set ctdb_manages_samba="yes". Note that this option will be deprecated in future, in favour of configuring a separate Samba resource. For more information see http://linux-ha.org/wiki/CTDB_(resource_agent) CTDB Resource Agent The location of a shared lock file, common across all nodes. This must be on shared storage, e.g.: /shared-fs/samba/ctdb.lock CTDB shared lock file Should CTDB manage starting/stopping the Samba service for you? This will be deprecated in future, in favor of configuring a separate Samba resource. Should CTDB manage Samba? Should CTDB manage starting/stopping the Winbind service for you? This will be deprecated in future, in favor of configuring a separate Winbind resource. Should CTDB manage Winbind? Name of smb init script. Only necessary if CTDB is managing Samba directly. Will usually be auto-detected. Name of smb init script Name of nmb init script. Only necessary if CTDB is managing Samba directly. Will usually be auto-detected. Name of nmb init script Name of winbind init script. Only necessary if CTDB is managing Winbind directly. Will usually be auto-detected. Name of winbind init script If there are very many shares it may not be feasible to check that all of them are available during each monitoring interval. In that case this check can be disabled. Skip share check during monitor? If the amount of free memory drops below this value the node will become unhealthy and ctdb and all managed services will be shutdown. Once this occurs, the administrator needs to find the reason for the OOM situation, rectify it and restart ctdb with "service ctdb start". Minimum amount of free memory (MB) When set to yes, the CTDB node will start in DISABLED mode and not host any public ip addresses. Start CTDB disabled? The directory containing various CTDB configuration files. The "nodes" and "notify.sh" scripts are expected to be in this directory, as is the "events.d" subdirectory. CTDB config file directory Full path to the CTDB binary. CTDB binary path Full path to the CTDB cluster daemon binary. CTDB Daemon binary path Full path to the domain socket that ctdbd will create, used for local clients to attach and communicate with the ctdb daemon. CTDB socket location The directory to put the local CTDB database files in. Persistent database files will be put in ctdb_dbdir/persistent. CTDB database directory Full path to log file. To log to syslog instead, use the value "syslog". CTDB log file location What debug level to run at (0-10). Higher means more verbose. CTDB debug level Path to default samba config file. Only necessary if CTDB is managing Samba. Path to smb.conf The directory for smbd to use for storing such files as smbpasswd and secrets.tdb. Old versions of CTBD (prior to 1.0.50) required this to be on shared storage. This parameter should not be set for current versions of CTDB, and only remains in the RA for backwards compatibility. Samba private dir (deprecated) Which backend to use for storing user and possibly group information. Only necessary if CTDB is managing Samba. Samba passdb backend Which backend to use for SID/uid/gid mapping. Only necessary if CTDB is managing Samba. Samba idmap backend Which fileid:algorithm to use with vfs_fileid. The correct value depends on which clustered filesystem is in use, e.g.: for OCFS2, this should be set to "fsid". Only necessary if CTDB is managing Samba. Samba VFS fileid algorithm END } ####################################################################### # Figure out path to /etc/sysconfig/ctdb (same logic as # loadconfig() from /etc/ctdb/functions if [ -f /etc/sysconfig/ctdb ]; then CTDB_SYSCONFIG=/etc/sysconfig/ctdb elif [ -f /etc/default/ctdb ]; then CTDB_SYSCONFIG=/etc/default/ctdb elif [ -f $OCF_RESKEY_ctdb_config_dir/ctdb ]; then CTDB_SYSCONFIG=$OCF_RESKEY_ctdb_config_dir/ctdb fi # Backup paths CTDB_SYSCONFIG_BACKUP=${CTDB_SYSCONFIG}.ctdb-ra-orig invoke_ctdb() { # CTDB's defaults are: local timeout=3 local timelimit=120 # ...but we override with the timeout for the current op: if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then timeout=$((OCF_RESKEY_CRM_meta_timeout/1000)) timelimit=$((OCF_RESKEY_CRM_meta_timeout/1000)) fi $OCF_RESKEY_ctdb_binary --socket=$OCF_RESKEY_ctdb_socket \ -t $timeout -T $timelimit \ "$@" } # Enable any event scripts that are explicitly required. # Any others will ultimately be invoked or not based on how they ship # with CTDB, but will generally have no effect, beacuase the relevant # CTDB_MANAGES_* options won't be set in /etc/sysconfig/ctdb. enable_event_scripts() { local event_dir=$OCF_RESKEY_ctdb_config_dir/events.d if [ -f "${OCF_RESKEY_ctdb_config_dir}/public_addresses" ]; then chmod u+x $event_dir/10.interface else chmod a-x $event_dir/10.interface fi if [ -f "${OCF_RESKEY_ctdb_config_dir}/static-routes" ]; then chmod u+x $event_dir/11.routing else chmod a-x $event_dir/11.routing fi if ocf_is_true "$OCF_RESKEY_ctdb_manages_samba" || \ ocf_is_true "$OCF_RESKEY_ctdb_manages_winbind"; then chmod u+x $event_dir/50.samba else chmod a-x $event_dir/50.samba fi } # This function has no effect (currently no way to set CTDB_SET_*) # but remains here in case we need it in future. set_ctdb_variables() { rv=$OCF_SUCCESS set | grep ^CTDB_SET_ | cut -d_ -f3- | while read v; do varname=`echo $v | cut -d= -f1` value=`echo $v | cut -d= -f2` invoke_ctdb setvar $varname $value || rv=$OCF_ERR_GENERIC done || rv=$OCF_ERR_GENERIC return $rv } # Add necessary settings to /etc/samba/smb.conf. In a perfect world, # we'd be able to generate a new, temporary, smb.conf file somewhere, # something like: # include = /etc/samba/smb.conf # [global] # clustering = yes # # ...etc... # Unfortunately, we can't do this, because there's no way to tell the # smb init script where the temporary config is, so we just edit # the default config file. init_smb_conf() { # Don't screw around with the config if CTDB isn't managing Samba! ocf_is_true "$OCF_RESKEY_ctdb_manages_samba" || return 0 # replace these things in smb.conf local repl='# CTDB-RA:|passdb backend|clustering|idmap backend|private dir|ctdbd socket' local private_dir [ -n "$OCF_RESKEY_smb_private_dir" ] && private_dir="\tprivate dir = $OCF_RESKEY_smb_private_dir\n" local vfs_fileid local do_vfs=0 if [ -n "$OCF_RESKEY_smb_fileid_algorithm" ]; then repl="${repl}|fileid:algorithm|fileid:mapping" vfs_fileid="\tfileid:algorithm = $OCF_RESKEY_smb_fileid_algorithm\n" if sed -n '/^[[:space:]]*\[global\]/,/^[[:space:]]*\[/p' $OCF_RESKEY_smb_conf | \ grep -Eq '^[[:space:]]*vfs objects'; then # vfs objects already specified, will append fileid to existing line do_vfs=1 else vfs_fileid="$vfs_fileid\tvfs objects = fileid\n" fi fi awk ' /^[[:space:]]*\[/ { global = 0 } /^[[:space:]]*\[global\]/ { global = 1 } { if(global) { if ('$do_vfs' && $0 ~ /^[[:space:]]vfs objects/ && $0 !~ /fileid/) { print $0" fileid" } else if ($0 !~ /^[[:space:]]*('"$repl"')/) { print } } else { print } }' $OCF_RESKEY_smb_conf | sed "/^[[:space:]]*\[global\]/ a\\ \t# CTDB-RA: Begin auto-generated section (do not change below)\n\ \tpassdb backend = $OCF_RESKEY_smb_passdb_backend\n\ \tclustering = yes\n\ \tidmap backend = $OCF_RESKEY_smb_idmap_backend\n\ \tctdbd socket = $OCF_RESKEY_ctdb_socket\n$private_dir$vfs_fileid\ \t# CTDB-RA: End auto-generated section (do not change above)" > $OCF_RESKEY_smb_conf.$$ mv -f $OCF_RESKEY_smb_conf.$$ $OCF_RESKEY_smb_conf } # Get rid of that section we added cleanup_smb_conf() { ocf_is_true "$OCF_RESKEY_ctdb_manages_samba" || return 0 sed '/# CTDB-RA: Begin/,/# CTDB-RA: End/d' $OCF_RESKEY_smb_conf > $OCF_RESKEY_smb_conf.$$ mv -f $OCF_RESKEY_smb_conf.$$ $OCF_RESKEY_smb_conf } append_ctdb_sysconfig() { [ -n "$2" ] && echo "$1=$2" >> $CTDB_SYSCONFIG } # Generate a new, minimal CTDB config file that's just enough # to get CTDB running as configured by the RA parameters. generate_ctdb_sysconfig() { # Backup existing sysconfig if we're not already using an auto-generated one grep -qa '# CTDB-RA: Auto-generated' $CTDB_SYSCONFIG || cp -p $CTDB_SYSCONFIG $CTDB_SYSCONFIG_BACKUP if [ $? -ne 0 ]; then ocf_log warn "Unable to backup $CTDB_SYSCONFIG to $CTDB_SYSCONFIG_BACKUP" fi ocf_log info "Generating new $CTDB_SYSCONFIG" # Note to maintainers and other random hackers: # Parameters may need to be set here, for CTDB event # scripts to pick up, or may need to be passed to ctdbd # when starting, or both. Be careful. The CTDB source # tree and manpages are your friends. As a concrete # example, setting CTDB_START_AS_DISABLED here is # completely useless, as this is actually a command line # argument for ctdbd; it's not used anywhere else. cat >$CTDB_SYSCONFIG </dev/null for pdbase in $(ls $persistent_db_dir/*.tdb.[0-9] 2>/dev/null$) ; do /usr/bin/tdbdump $pdbase >/dev/null 2>/dev/null || { ocf_log err "Persistent database $pdbase is corrupted! CTDB will not start." return $OCF_ERR_GENERIC } done # Add necessary configuration to smb.conf init_smb_conf if [ $? -ne 0 ]; then ocf_log err "Failed to update $OCF_RESKEY_smb_conf." return $OCF_ERR_GENERIC fi # Generate new CTDB sysconfig generate_ctdb_sysconfig enable_event_scripts # Use logfile by default, or syslog if asked for local log_option="--logfile=$OCF_RESKEY_ctdb_logfile" - [ "$OCF_RESKEY_ctdb_logfile" = "syslog" ] && log_option="--syslog" + if [ "$OCF_RESKEY_ctdb_logfile" = "syslog" ]; then + log_option="--syslog" + elif [ ! -d "$(dirname $OCF_RESKEY_ctdb_logfile)" ]; then + # ensure the logfile's directory exists, otherwise ctdb will fail to start + mkdir -p $(dirname $OCF_RESKEY_ctdb_logfile) + fi + # public addresses file (should not be present, but need to set for correctness if it is) local pub_addr_option="" [ -f "${OCF_RESKEY_ctdb_config_dir}/public_addresses" ] && \ pub_addr_option="--public-addresses=${OCF_RESKEY_ctdb_config_dir}/public_addresses" # start as disabled local start_as_disabled="--start-as-disabled" ocf_is_true "$OCF_RESKEY_ctdb_start_as_disabled" || start_as_disabled="" # Start her up $OCF_RESKEY_ctdbd_binary \ --reclock=$OCF_RESKEY_ctdb_recovery_lock \ --nlist=$OCF_RESKEY_ctdb_config_dir/nodes \ --socket=$OCF_RESKEY_ctdb_socket \ --dbdir=$OCF_RESKEY_ctdb_dbdir \ --dbdir-persistent=$OCF_RESKEY_ctdb_dbdir/persistent \ --event-script-dir=$OCF_RESKEY_ctdb_config_dir/events.d \ --notification-script=$OCF_RESKEY_ctdb_config_dir/notify.sh \ --transport=tcp \ $start_as_disabled $log_option $pub_addr_option \ -d $OCF_RESKEY_ctdb_debuglevel if [ $? -ne 0 ]; then # cleanup smb.conf cleanup_smb_conf ocf_log err "Failed to execute $OCF_RESKEY_ctdbd_binary." return $OCF_ERR_GENERIC else # Wait a bit for CTDB to stabilize # (until start times out if necessary) while true; do # Initial sleep is intentional (ctdb init script # has sleep after ctdbd start, but before invoking # ctdb to talk to it) sleep 1 status=$(invoke_ctdb status 2>/dev/null) if [ $? -ne 0 ]; then # CTDB will be running, kill it before returning ctdb_stop ocf_log err "Can't invoke $OCF_RESKEY_ctdb_binary --socket=$OCF_RESKEY_ctdb_socket status" return $OCF_ERR_GENERIC fi if ! echo $status | grep -qs 'UNHEALTHY (THIS'; then # Status does not say this node is unhealthy, # so we're good to go. Do a bit of final # setup and (hopefully) return success. set_ctdb_variables return $? fi done fi # ctdbd will (or can) actually still be running at this point, so kill it ctdb_stop ocf_log err "Timeout waiting for CTDB to stabilize" return $OCF_ERR_GENERIC } ctdb_stop() { # Do nothing if already stopped pkill -0 -f $OCF_RESKEY_ctdbd_binary || return $OCF_SUCCESS # Tell it to die nicely invoke_ctdb shutdown >/dev/null 2>&1 rv=$? # No more Mr. Nice Guy count=0 while pkill -0 -f $OCF_RESKEY_ctdbd_binary ; do sleep 1 count=$(($count + 1)) [ $count -gt 10 ] && { ocf_log info "killing ctdbd " pkill -9 -f $OCF_RESKEY_ctdbd_binary pkill -9 -f ${OCF_RESKEY_ctdb_config_dir}/events.d/ } done # Cleanup smb.conf cleanup_smb_conf # It was a clean shutdown, return success [ $rv -eq $OCF_SUCCESS ] && return $OCF_SUCCESS # Unclean shutdown, return success if there's no ctdbds left (we # killed them forcibly, but at least they're good and dead). pkill -0 -f $OCF_RESKEY_ctdbd_binary || return $OCF_SUCCESS # Problem: ctdb shutdown didn't work and neither did some vigorous # kill -9ing. Only thing to do is report failure. return $OCF_ERR_GENERIC } ctdb_monitor() { local status # "ctdb status" exits non-zero if CTDB isn't running. # It can also exit non-zero if there's a timeout (ctdbd blocked, # stalled, massive load, or otherwise wedged). If it's actually # not running, STDERR will say "Errno:Connection refused(111)", # whereas if it's wedged, it'll say various other unpleasant things. status=$(invoke_ctdb status 2>&1) if [ $? -ne 0 ]; then if echo $status | grep -qs 'Connection refused'; then return $OCF_NOT_RUNNING else ocf_log err "CTDB status call failed: $status" return $OCF_ERR_GENERIC fi fi if echo $status | grep -Eqs '(OK|DISABLED) \(THIS'; then return $OCF_SUCCESS fi ocf_log err "CTDB status is bad: $status" return $OCF_ERR_GENERIC } ctdb_validate() { # Required binaries (full path to tdbdump is intentional, as that's # what's used in ctdb_start, which was lifted from the init script) for binary in pkill /usr/bin/tdbdump; do check_binary $binary done if [ -z "$CTDB_SYSCONFIG" ]; then ocf_log err "Can't find CTDB config file (expecting /etc/sysconfig/ctdb, /etc/default/ctdb or similar)" return $OCF_ERR_INSTALLED fi if ocf_is_true "$OCF_RESKEY_ctdb_manages_samba" && [ ! -f "$OCF_RESKEY_smb_conf" ]; then ocf_log err "Samba config file '$OCF_RESKEY_smb_conf' does not exist." return $OCF_ERR_INSTALLED fi if [ -f "${OCF_RESKEY_ctdb_config_dir}/public_addresses" ]; then ocf_log warn "CTDB file '${OCF_RESKEY_ctdb_config_dir}/public_addresses' exists - CTDB will try to manage IP failover!" fi if [ ! -f "$OCF_RESKEY_ctdb_config_dir/nodes" ]; then ocf_log err "$OCF_RESKEY_ctdb_config_dir/nodes does not exist." return $OCF_ERR_ARGS fi if [ -z "$OCF_RESKEY_ctdb_recovery_lock" ]; then ocf_log err "ctdb_recovery_lock not specified." return $OCF_ERR_CONFIGURED fi lock_dir=$(dirname "$OCF_RESKEY_ctdb_recovery_lock") touch "$lock_dir/$$" 2>/dev/null if [ $? != 0 ]; then ocf_log err "Directory for lock file '$OCF_RESKEY_ctdb_recovery_lock' does not exist, or is not writable." return $OCF_ERR_ARGS fi rm "$lock_dir/$$" return $OCF_SUCCESS } case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; start) ctdb_start;; stop) ctdb_stop;; monitor) ctdb_monitor;; validate-all) ctdb_validate;; usage|help) ctdb_usage exit $OCF_SUCCESS ;; *) ctdb_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc