diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem index 88fe2e2eb..8e0127531 100755 --- a/heartbeat/Filesystem +++ b/heartbeat/Filesystem @@ -1,1187 +1,1187 @@ #!/bin/sh # # Support: users@clusterlabs.org # License: GNU General Public License (GPL) # # Filesystem # Description: Manages a Filesystem on a shared storage medium. # Original Author: Eric Z. Ayers (eric.ayers@compgen.com) # Original Release: 25 Oct 2000 # # usage: ./Filesystem {start|stop|status|monitor|validate-all|meta-data} # # OCF parameters are as below: # OCF_RESKEY_device # OCF_RESKEY_directory # OCF_RESKEY_fstype # OCF_RESKEY_options # OCF_RESKEY_statusfile_prefix # OCF_RESKEY_run_fsck # OCF_RESKEY_fast_stop # OCF_RESKEY_force_clones # #OCF_RESKEY_device : name of block device for the filesystem. e.g. /dev/sda1, /dev/md0 # Or a -U or -L option for mount, or an NFS mount specification #OCF_RESKEY_directory : the mount point for the filesystem #OCF_RESKEY_fstype : optional name of the filesystem type. e.g. ext2 #OCF_RESKEY_options : options to be given to the mount command via -o #OCF_RESKEY_statusfile_prefix : the prefix used for a status file for monitoring #OCF_RESKEY_run_fsck : fsck execution mode: auto(default)/force/no #OCF_RESKEY_fast_stop : fast stop: yes(default)/no #OCF_RESKEY_force_clones : allow running the resource as clone. e.g. local xfs mounts # for each brick in a glusterfs setup # # # This assumes you want to manage a filesystem on a shared (SCSI) bus, # on a replicated device (such as DRBD), or a network filesystem (such # as NFS or Samba). # # Do not put this filesystem in /etc/fstab. This script manages all of # that for you. # # NOTE: If 2 or more nodes mount the same file system read-write, and # that file system is not designed for that specific purpose # (such as GFS or OCFS2), and is not a network file system like # NFS or Samba, then the filesystem is going to become # corrupted. # # As a result, you should use this together with the stonith # option and redundant, independent communications paths. # # If you don't do this, don't blame us when you scramble your # disk. ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults DFLT_STATUSDIR=".Filesystem_status/" # Parameter defaults OCF_RESKEY_device_default="" OCF_RESKEY_directory_default="" OCF_RESKEY_fstype_default="" OCF_RESKEY_options_default="" OCF_RESKEY_statusfile_prefix_default="${DFLT_STATUSDIR}" OCF_RESKEY_run_fsck_default="auto" OCF_RESKEY_fast_stop_default="no" OCF_RESKEY_force_clones_default="false" OCF_RESKEY_force_unmount_default="true" OCF_RESKEY_term_signals_default="TERM" OCF_RESKEY_kill_signals_default="KILL" OCF_RESKEY_signal_delay_default="1" # RHEL specific defaults if is_redhat_based; then get_os_ver ocf_version_cmp "$VER" "9.0" 2>/dev/null case "$?" in # RHEL >= 9 1|2) OCF_RESKEY_force_unmount_default="safe";; # RHEL < 9 and fallback if ocf_version_cmp() fails *) OCF_RESKEY_fast_stop_default="yes";; esac fi : ${OCF_RESKEY_device=${OCF_RESKEY_device_default}} : ${OCF_RESKEY_directory=${OCF_RESKEY_directory_default}} : ${OCF_RESKEY_fstype=${OCF_RESKEY_fstype_default}} : ${OCF_RESKEY_options=${OCF_RESKEY_options_default}} : ${OCF_RESKEY_statusfile_prefix=${OCF_RESKEY_statusfile_prefix_default}} : ${OCF_RESKEY_run_fsck=${OCF_RESKEY_run_fsck_default}} if [ -z "${OCF_RESKEY_fast_stop}" ]; then case "$OCF_RESKEY_fstype" in gfs2) OCF_RESKEY_fast_stop="no";; *) OCF_RESKEY_fast_stop=${OCF_RESKEY_fast_stop_default};; esac fi : ${OCF_RESKEY_force_clones=${OCF_RESKEY_force_clones_default}} : ${OCF_RESKEY_force_unmount=${OCF_RESKEY_force_unmount_default}} : ${OCF_RESKEY_term_signals=${OCF_RESKEY_term_signals_default}} : ${OCF_RESKEY_kill_signals=${OCF_RESKEY_kill_signals_default}} : ${OCF_RESKEY_signal_delay=${OCF_RESKEY_signal_delay_default}} # Variables used by multiple methods HOSTOS=$(uname) TAB=' ' # The status file is going to an extra directory, by default # prefix=${OCF_RESKEY_statusfile_prefix} : ${prefix:=$DFLT_STATUSDIR} suffix="${OCF_RESOURCE_INSTANCE}" [ "$OCF_RESKEY_CRM_meta_clone" ] && suffix="${suffix}_$OCF_RESKEY_CRM_meta_clone" suffix="${suffix}_$(uname -n)" STATUSFILE="${OCF_RESKEY_directory}/$prefix$suffix" ####################################################################### usage() { cat <<-EOT usage: $0 {start|stop|status|monitor|validate-all|meta-data} EOT } meta_data() { cat < 1.0 Resource script for Filesystem. It manages a Filesystem on a shared storage medium. The standard monitor operation of depth 0 (also known as probe) checks if the filesystem is mounted. If you want deeper tests, set OCF_CHECK_LEVEL to one of the following values: 10: read first 16 blocks of the device (raw read) This doesn't exercise the filesystem at all, but the device on which the filesystem lives. This is noop for non-block devices such as NFS, SMBFS, or bind mounts. 20: test if a status file can be written and read The status file must be writable by root. This is not always the case with an NFS mount, as NFS exports usually have the "root_squash" option set. In such a setup, you must either use read-only monitoring (depth=10), export with "no_root_squash" on your NFS server, or grant world write permissions on the directory where the status file is to be placed. Manages filesystem mounts The name of block device for the filesystem, or -U, -L options for mount, or NFS mount specification. NOTE: On Linux /dev/disk/by-{uuid,label}/ are preferred to -U/-L. block device The mount point for the filesystem. mount point The type of filesystem to be mounted. filesystem type Any extra options to be given as -o options to mount. For bind mounts, add "bind" here and set fstype to "none". We will do the right thing for options such as "bind,ro". options The prefix to be used for a status file for resource monitoring with depth 20. If you don't specify this parameter, all status files will be created in a separate directory. status file prefix Specify how to decide whether to run fsck or not. "auto" : decide to run fsck depending on the fstype(default) "force" : always run fsck regardless of the fstype "no" : do not run fsck ever. run_fsck Normally, we expect no users of the filesystem and the stop operation to finish quickly. If you cannot control the filesystem users easily and want to prevent the stop action from failing, then set this parameter to "no" and add an appropriate timeout for the stop operation. This defaults to "no" for GFS2 filesystems. fast stop The use of a clone setup for local filesystems is forbidden by default. For special setups like glusterfs, cloning a mount of a local device with a filesystem like ext4 or xfs independently on several nodes is a valid use case. Only set this to "true" if you know what you are doing! allow running as a clone, regardless of filesystem type This option allows specifying how to handle processes that are currently accessing the mount directory. "true" : Kill processes accessing mount point "safe" : Kill processes accessing mount point using methods that avoid functions that could potentially block during process detection "false" : Do not kill any processes. The 'safe' option uses shell logic to walk the /procs/ directory for pids using the mount point while the default option uses the fuser cli tool. fuser is known to perform operations that can potentially block if unresponsive nfs mounts are in use on the system. Kill processes before unmount Signals (names or numbers, whitespace separated) to send processes during graceful termination phase in stop-action. Signals (names or numbers, whitespace separated) to send processes during graceful termination phase in stop-action Signals (names or numbers, whitespace separated) to send processes during forceful killing phase in stop-action. Signals (names or numbers, whitespace separated) to send processes during forceful killing phase in stop-action How many seconds to wait after sending term/kill signals to processes in stop-action. How many seconds to wait after sending term/kill signals to processes in stop-action END } # # Make sure the kernel does the right thing with the FS buffers # This function should be called after unmounting and before mounting # It may not be necessary in 2.4 and later kernels, but it shouldn't hurt # anything either... # # It's really a bug that you have to do this at all... # flushbufs() { if have_binary $BLOCKDEV ; then if [ "$blockdevice" = "yes" ] ; then $BLOCKDEV --flushbufs $1 return $? fi fi return 0 } # Take advantage of /etc/mtab if present, use portable mount command # otherwise. Normalize format to "dev mountpoint fstype". is_bind_mount() { echo "$options" | grep -w bind >/dev/null 2>&1 } list_mounts() { local inpf="" local mount_list="" local check_list="x" if [ -e "/proc/mounts" ] && ! is_bind_mount; then inpf=/proc/mounts elif [ -f "/etc/mtab" -a -r "/etc/mtab" ]; then inpf=/etc/mtab fi # Make sure that the mount list has not been changed while reading. while [ "$mount_list" != "$check_list" ]; do check_list="$mount_list" if [ "$inpf" ]; then # ... # Spaces in device or mountpoint are octal \040 in $inpf # Convert literal spaces (field separators) to tabs mount_list=$(cut -d' ' -f1,2,3 < $inpf | tr ' ' "$TAB") else # on type ... # Use tabs as field separators match_string='\(.*\) on \(.*\) type \([^[:space:]]\+\) .*' replace_string="\\1${TAB}\\2${TAB}\\3" mount_list=$($MOUNT | sed "s/$match_string/$replace_string/g") fi done # Convert octal \040 to space characters printf "$mount_list" } determine_blockdevice() { if [ $blockdevice = "yes" ]; then return fi # Get the current real device name, if possible. # (specified devname could be -L or -U...) case "$FSTYPE" in nfs4|nfs|efs|smbfs|cifs|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs|zfs|cvfs|none|lustre) : ;; *) match_string="${TAB}${CANONICALIZED_MOUNTPOINT}${TAB}" DEVICE=$(list_mounts | grep "$match_string" | cut -d"$TAB" -f1) if [ -b "$DEVICE" ]; then blockdevice=yes fi ;; esac } # Lists all filesystems potentially mounted under a given path, # excluding the path itself. list_submounts() { list_mounts | grep "${TAB}${1}/" | cut -d"$TAB" -f2 | sort -r } # Lists all bind mounts of a given file system, # excluding the path itself. list_bindmounts() { if is_bind_mount; then # skip bind mount # we should not umount the original file system via a bind mount return fi match_string="${TAB}${1}${TAB}" if list_mounts | grep "$match_string" >/dev/null 2>&1; then mount_disk=$(list_mounts | grep "$match_string" | cut -d"$TAB" -f1) else return fi if [ -b "$mount_disk" ]; then list_mounts | grep "$mount_disk" | grep -v "$match_string" | cut -d"$TAB" -f2 | sort -r fi } # kernels < 2.6.26 can't handle bind remounts bind_kernel_check() { echo "$options" | grep -w ro >/dev/null 2>&1 || return uname -r | awk -F. ' $1==2 && $2==6 { sub("[^0-9].*","",$3); if ($3<26) exit(1); }' [ $? -ne 0 ] && ocf_log warn "kernel $(uname -r) cannot handle read only bind mounts" } bind_root_mount_check() { if [ "$(df -P "$1" | awk 'END{print $6}')" = "/" ]; then return 1 else return 0 fi } bind_mount() { if is_bind_mount && [ "$options" != "-o bind" ] then bind_kernel_check bind_opts=$(echo "$options" | sed 's/bind/remount/') $MOUNT $bind_opts "$MOUNTPOINT" else true # make sure to return OK fi } is_option() { echo "$OCF_RESKEY_options" | grep -w "$1" >/dev/null 2>&1 } is_fsck_needed() { case $OCF_RESKEY_run_fsck in force) true;; no) false;; ""|auto) case "$FSTYPE" in ext4|ext4dev|ext3|reiserfs|reiser4|nss|xfs|jfs|vfat|fat|nfs4|nfs|efs|cifs|smbfs|ocfs2|gfs2|none|lustre|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs|zfs|cvfs) false;; *) true;; esac;; *) ocf_log warn "Invalid parameter value for fsck: '$OCF_RESKEY_run_fsck'; setting to 'auto'" OCF_RESKEY_run_fsck="auto" is_fsck_needed;; esac } fstype_supported() { local support="$FSTYPE" local rc if [ "X${HOSTOS}" = "XOpenBSD" ];then # skip checking /proc/filesystems for obsd return $OCF_SUCCESS fi if [ -z "$FSTYPE" -o "$FSTYPE" = none ]; then : No FSTYPE specified, rely on the system has the right file-system support already return $OCF_SUCCESS fi # support fuse-filesystems (e.g. GlusterFS) and Amazon Elastic File # System (EFS) case "$FSTYPE" in fuse.*|glusterfs|rozofs) support="fuse";; efs) check_binary "mount.efs"; support="nfs4";; esac if [ "$support" != "$FSTYPE" ]; then ocf_log info "Checking support for $FSTYPE as \"$support\"" fi grep -w "$support"'$' /proc/filesystems >/dev/null if [ $? -eq 0 ]; then # found the fs type return $OCF_SUCCESS fi # if here, we should attempt to load the module and then # check the if the filesystem support exists again. $MODPROBE $support >/dev/null if [ $? -ne 0 ]; then ocf_exit_reason "Couldn't find filesystem $support in /proc/filesystems and failed to load kernel module" return $OCF_ERR_INSTALLED fi # It is possible for the module to load and not be complete initialized # before we check /proc/filesystems again. Give this a few trys before # giving up entirely. for try in $(seq 5); do grep -w "$support"'$' /proc/filesystems >/dev/null if [ $? -eq 0 ] ; then # yes. found the filesystem after doing the modprobe return $OCF_SUCCESS fi ocf_log debug "Unable to find support for $support in /proc/filesystems after modprobe, trying again" sleep 1 done ocf_exit_reason "Couldn't find filesystem $support in /proc/filesystems" return $OCF_ERR_INSTALLED } # # In the case a fresh filesystem is just created from another # node on the shared storage, and is not visible yet. Then try # partprobe to refresh /dev/disk/by-{label,uuid}/* up to date. # # DEVICE can be /dev/xxx, -U, -L # trigger_udev_rules_if_needed() { local refresh_flag="no" local tmp local timeout if [ $blockdevice = "yes" ]; then tmp="$DEVICE" if [ "$DEVICE" != "/dev/null" -a ! -b "$DEVICE" ] ; then refresh_flag="yes" fi else tmp="$(echo $DEVICE|awk '{$1=""; print substr($0,2)}')" case "$DEVICE" in -U*|--uuid*) tmp="/dev/disk/by-uuid/$tmp" ;; -L*|--label*) tmp="/dev/disk/by-label/$tmp" ;; *) # bind mount? return ;; esac [ ! -b "$tmp" ] && refresh_flag="yes" fi [ "$refresh_flag" = "no" ] && return have_binary partprobe && partprobe >/dev/null 2>&1 timeout=${OCF_RESKEY_CRM_meta_timeout:="60000"} timeout=$((timeout/1000)) have_binary udevadm && udevadm settle -t $timeout --exit-if-exists=$tmp return $? } # # START: Start up the filesystem # Filesystem_start() { # Check if there are any mounts mounted under the mountpoint match_string="${TAB}${CANONICALIZED_MOUNTPOINT}" if list_mounts | grep -E "$match_string/\w+" >/dev/null 2>&1; then ocf_log err "There is one or more mounts mounted under $MOUNTPOINT." return $OCF_ERR_CONFIGURED fi # See if the device is already mounted. Filesystem_status case "$?" in $OCF_SUCCESS) ocf_log info "Filesystem $MOUNTPOINT is already mounted." return $OCF_SUCCESS ;; $OCF_ERR_CONFIGURED) return $OCF_ERR_CONFIGURED ;; esac fstype_supported || exit $OCF_ERR_INSTALLED # Check the filesystem & auto repair. # NOTE: Some filesystem types don't need this step... Please modify # accordingly trigger_udev_rules_if_needed if [ $blockdevice = "yes" ]; then if [ "$DEVICE" != "/dev/null" -a ! -b "$DEVICE" ] ; then ocf_exit_reason "Couldn't find device [$DEVICE]. Expected /dev/??? to exist" exit $OCF_ERR_INSTALLED fi if is_fsck_needed; then ocf_log info "Starting filesystem check on $DEVICE" if [ -z "$FSTYPE" ]; then $FSCK -p "$DEVICE" else $FSCK -t "$FSTYPE" -p "$DEVICE" fi # NOTE: if any errors at all are detected, it returns non-zero # if the error is >= 4 then there is a big problem if [ $? -ge 4 ]; then ocf_exit_reason "Couldn't successfully fsck filesystem for $DEVICE" return $OCF_ERR_GENERIC fi fi fi [ -d "$MOUNTPOINT" ] || ocf_run mkdir -p "$MOUNTPOINT" if [ ! -d "$MOUNTPOINT" ] ; then ocf_exit_reason "Couldn't find directory [$MOUNTPOINT] to use as a mount point" exit $OCF_ERR_INSTALLED fi flushbufs "$DEVICE" # Mount the filesystem. case "$FSTYPE" in none) $MOUNT $options $device_opt "$DEVICE" "$MOUNTPOINT" && bind_mount ;; "") $MOUNT $options $device_opt "$DEVICE" "$MOUNTPOINT" ;; *) $MOUNT -t "$FSTYPE" $options $device_opt "$DEVICE" "$MOUNTPOINT" ;; esac if [ $? -ne 0 ]; then ocf_exit_reason "Couldn't mount device [$DEVICE] as $MOUNTPOINT" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # end of Filesystem_start get_pids() { local dir=$1 local procs local mmap_procs if is_bind_mount && ocf_is_true "$FORCE_UNMOUNT" && ! bind_root_mount_check "$DEVICE"; then ocf_log debug "Change force_umount from '$FORCE_UNMOUNT' to 'safe'" FORCE_UNMOUNT=safe fi if ocf_is_true "$FORCE_UNMOUNT"; then if [ "X${HOSTOS}" = "XOpenBSD" ]; then fstat | grep $dir | awk '{print $3}' elif [ "X${HOSTOS}" = "XFreeBSD" ]; then $FUSER -c $dir 2>/dev/null else $FUSER -Mm $dir 2>/dev/null fi elif [ "$FORCE_UNMOUNT" = "safe" ]; then procs=$(find /proc/[0-9]*/ -type l -lname "${dir}/*" -or -lname "${dir}" 2>/dev/null | awk -F/ '{print $3}') mmap_procs=$(grep " ${dir}/" /proc/[0-9]*/maps | awk -F/ '{print $3}') printf "${procs}\n${mmap_procs}" | sort | uniq fi } signal_processes() { local dir=$1 local sig=$2 local pids pid # fuser returns a non-zero return code if none of the # specified files is accessed or in case of a fatal # error. pids=$(get_pids "$dir") if [ -z "$pids" ]; then ocf_log info "No processes on $dir were signalled. force_unmount is set to '$FORCE_UNMOUNT'" return fi for pid in $pids; do ocf_log info "sending signal $sig to: $(ps -f $pid | tail -1)" kill -s $sig $pid done } try_umount() { local SUB="$1" $UMOUNT $umount_force "$SUB" list_mounts | grep "${TAB}${SUB}${TAB}" >/dev/null 2>&1 || { ocf_log info "unmounted $SUB successfully" return $OCF_SUCCESS } return $OCF_ERR_GENERIC } timeout_child() { local pid="$1" timeout="$2" killer ret # start job in the background that will KILL the given process after timeout expires sleep $timeout && kill -s KILL $pid & killer=$! # block until the child process either exits on its own or gets killed by the above killer pipeline wait $pid ret=$? # ret would be 127 + child exit code if the timeout expired (see "man wait" for more info) [ $ret -lt 128 ] && kill -s KILL $killer return $ret } fs_stop_loop() { local SUB="$1" signals="$2" sig while true; do for sig in $signals; do signal_processes "$SUB" $sig done sleep $OCF_RESKEY_signal_delay try_umount "$SUB" && return $OCF_SUCCESS done } fs_stop() { local SUB="$1" timeout=$2 grace_time ret grace_time=$((timeout/2)) # try gracefully terminating processes for up to half of the configured timeout fs_stop_loop "$SUB" "$OCF_RESKEY_term_signals" & timeout_child $! $grace_time ret=$? [ $ret -eq $OCF_SUCCESS ] && return $ret # try killing them for the rest of the timeout fs_stop_loop "$SUB" "$OCF_RESKEY_kill_signals" & timeout_child $! $grace_time ret=$? [ $ret -eq $OCF_SUCCESS ] && return $ret # timeout expired ocf_exit_reason "Couldn't unmount $SUB within given timeout" return $OCF_ERR_GENERIC } # # STOP: Unmount the filesystem # Filesystem_stop() { # See if the device is currently mounted Filesystem_status >/dev/null 2>&1 if [ $? -eq $OCF_NOT_RUNNING ]; then # Already unmounted, wonderful. rc=$OCF_SUCCESS else # Wipe the status file, but continue with a warning if # removal fails -- the file system might be read only if [ $OCF_CHECK_LEVEL -eq 20 ]; then rm -f "${STATUSFILE}" if [ $? -ne 0 ]; then ocf_log warn "Failed to remove status file ${STATUSFILE}." fi fi # Determine the real blockdevice this is mounted on (if # possible) prior to unmounting. determine_blockdevice # For networked filesystems, there's merit in trying -f: case "$FSTYPE" in nfs4|nfs|efs|cifs|smbfs) umount_force="-f" ;; esac # Umount all sub-filesystems mounted under $MOUNTPOINT/ too. local timeout while read SUB; do ocf_log info "Trying to unmount $SUB" if ocf_is_true "$FAST_STOP"; then timeout=6 else timeout=${OCF_RESKEY_CRM_meta_timeout:="20000"} timeout=$((timeout/1000)) fi fs_stop "$SUB" $timeout rc=$? if [ $rc -ne $OCF_SUCCESS ]; then ocf_exit_reason "Couldn't unmount $SUB, giving up!" fi done <<-EOF $(list_submounts "$CANONICALIZED_MOUNTPOINT"; \ list_bindmounts "$CANONICALIZED_MOUNTPOINT"; \ echo $CANONICALIZED_MOUNTPOINT) EOF fi flushbufs "$DEVICE" return $rc } # end of Filesystem_stop # # STATUS: is the filesystem mounted or not? # Filesystem_status() { local match_string="${TAB}${CANONICALIZED_MOUNTPOINT}${TAB}" local mounted_device=$(list_mounts | grep "$match_string" | awk '{print $1}') if [ -n "$mounted_device" ]; then if [ "X$blockdevice" = "Xyes" ]; then if [ -e "$DEVICE" ] ; then local canonicalized_device="$(readlink -f "$DEVICE")" if [ $? -ne 0 ]; then ocf_exit_reason "Could not canonicalize $DEVICE because readlink failed" exit $OCF_ERR_GENERIC fi else local canonicalized_device="$DEVICE" fi if [ -e "$mounted_device" ] ; then local canonicalized_mounted_device="$(readlink -f "$mounted_device")" if [ $? -ne 0 ]; then ocf_exit_reason "Could not canonicalize $mounted_device because readlink failed" exit $OCF_ERR_GENERIC fi else local canonicalized_mounted_device="$mounted_device" fi if [ "$canonicalized_device" != "$canonicalized_mounted_device" ]; then if ocf_is_probe || [ "$__OCF_ACTION" = "stop" ]; then ocf_log debug "Another device ($mounted_device) is already mounted on $MOUNTPOINT" rc=$OCF_NOT_RUNNING else ocf_exit_reason "Another device ($mounted_device) is already mounted on $MOUNTPOINT" rc=$OCF_ERR_CONFIGURED fi fi else rc=$OCF_SUCCESS msg="$MOUNTPOINT is mounted (running)" fi else rc=$OCF_NOT_RUNNING msg="$MOUNTPOINT is unmounted (stopped)" fi # Special case "monitor" to check whether the UUID cached and # on-disk still match? case "$OP" in status) ocf_log info "$msg";; esac return $rc } # end of Filesystem_status # Note: the read/write tests below will stall in case the # underlying block device (or in the case of a NAS mount, the # NAS server) has gone away. In that case, if I/O does not # return to normal in time, the operation hits its timeout # and it is up to the CRM to initiate appropriate recovery # actions (such as fencing the node). # # MONITOR 10: read the device # Filesystem_monitor_10() { if [ "$blockdevice" = "no" ] ; then ocf_log warn "$DEVICE is not a block device, monitor 10 is noop" return $OCF_SUCCESS fi dd_opts="iflag=direct bs=4k count=1" err_output=$(dd if="$DEVICE" $dd_opts 2>&1 >/dev/null) if [ $? -ne 0 ]; then ocf_exit_reason "Failed to read device $DEVICE" ocf_log err "dd said: $err_output" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } # # MONITOR 20: write and read a status file # Filesystem_monitor_20() { if [ "$blockdevice" = "no" ] ; then # O_DIRECT not supported on cifs/smbfs dd_opts="oflag=sync bs=4k conv=fsync,sync" else # Writing to the device in O_DIRECT mode is imperative # to bypass caches. dd_opts="oflag=direct,sync bs=4k conv=fsync,sync" fi status_dir=$(dirname "$STATUSFILE") [ -d "$status_dir" ] || mkdir -p "$status_dir" err_output=$(echo "${OCF_RESOURCE_INSTANCE}" | dd of="${STATUSFILE}" $dd_opts 2>&1) if [ $? -ne 0 ]; then ocf_exit_reason "Failed to write status file ${STATUSFILE}" ocf_log err "dd said: $err_output" return $OCF_ERR_GENERIC fi test -f "${STATUSFILE}" if [ $? -ne 0 ]; then ocf_exit_reason "Cannot stat the status file ${STATUSFILE}" return $OCF_ERR_GENERIC fi cat "${STATUSFILE}" > /dev/null if [ $? -ne 0 ]; then ocf_exit_reason "Cannot read the status file ${STATUSFILE}" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } Filesystem_monitor() { Filesystem_status rc=$? if [ $rc -ne $OCF_SUCCESS ]; then return $rc fi if [ $rc -eq $OCF_SUCCESS -a $OCF_CHECK_LEVEL -gt 0 ]; then case "$OCF_CHECK_LEVEL" in 10) Filesystem_monitor_10; rc=$?;; 20) Filesystem_monitor_20; rc=$?;; *) ocf_exit_reason "unsupported monitor level $OCF_CHECK_LEVEL" rc=$OCF_ERR_CONFIGURED ;; esac fi return $rc } # end of Filesystem_monitor # # VALIDATE_ALL: Are the instance parameters valid? # FIXME!! The only part that's useful is the return code. # This code always returns $OCF_SUCCESS (!) # FIXME!! Needs some tuning to match fstype_supported() (e.g., for # fuse). Can we just call fstype_supported() with a flag like # "no_modprobe" instead? # Filesystem_validate_all() { # Check if the $FSTYPE is workable # NOTE: Without inserting the $FSTYPE module, this step may be imprecise # TODO: This is Linux specific crap. if [ ! -z "$FSTYPE" -a "$FSTYPE" != none ]; then cut -f2 /proc/filesystems | grep "^${FSTYPE}$" >/dev/null 2>&1 if [ $? -ne 0 ]; then modpath=/lib/modules/$(uname -r) moddep=$modpath/modules.dep # Do we have $FSTYPE in modules.dep? cut -d' ' -f1 $moddep \ | grep "^${modpath}.*${FSTYPE}\.k\?o:$" >/dev/null 2>&1 if [ $? -ne 0 ]; then ocf_log info "It seems we do not have $FSTYPE support" fi fi fi # If we are supposed to do monitoring with status files, then # we need a utility to write in O_DIRECT mode. if [ $OCF_CHECK_LEVEL -gt 0 ]; then check_binary dd # Note: really old coreutils version do not support # the "oflag" option for dd. We don't check for that # here. In case dd does not support oflag, monitor is # bound to fail, with dd spewing an error message to # the logs. On such systems, we must do without status # file monitoring. fi #TODO: How to check the $options ? return $OCF_SUCCESS } # # set the blockdevice variable to "no" or "yes" # set_blockdevice_var() { blockdevice=no # these are definitely not block devices case "$FSTYPE" in nfs4|nfs|efs|smbfs|cifs|none|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs|zfs|cvfs|lustre) return;; esac if $(is_option "loop"); then return fi case "$DEVICE" in --uuid=*|--uuid\ *|--label=*|--label\ *) device_opt=$(echo $DEVICE | sed "s/\([[:blank:]]\|=\).*//") DEVICE=$(echo $DEVICE | sed -E "s/$device_opt([[:blank:]]*|=)//") ;; -U*|-L*) # short versions of --uuid/--label device_opt=$(echo $DEVICE | cut -c1-2) DEVICE=$(echo $DEVICE | sed "s/$device_opt[[:blank:]]*//") ;; /dev/null) # Special case for BSC blockdevice=yes ;; *) - if [ ! -b "$DEVICE" -a ! -d "$DEVICE" -a "X$OP" != Xstart ] ; then + if [ ! -b "$DEVICE" -a ! -d "$DEVICE" -a "X$OP" != Xstart ] && ! ocf_is_probe ; then ocf_log warn "Couldn't find device [$DEVICE]. Expected /dev/??? to exist" fi if [ ! -d "$DEVICE" ]; then blockdevice=yes fi ;; esac } # Check the arguments passed to this script if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi # Check the OCF_RESKEY_ environment variables... FORCE_UNMOUNT="yes" if [ -n "${OCF_RESKEY_force_unmount}" ]; then FORCE_UNMOUNT=$OCF_RESKEY_force_unmount fi DEVICE="$OCF_RESKEY_device" case "$DEVICE" in [[:space:]]*|*[[:space:]]) ocf_exit_reason "device parameter does not accept leading or trailing whitespace characters" exit $OCF_ERR_CONFIGURED ;; esac FSTYPE=$OCF_RESKEY_fstype if [ ! -z "$OCF_RESKEY_options" ]; then options="-o $OCF_RESKEY_options" fi FAST_STOP=${OCF_RESKEY_fast_stop:="yes"} OP=$1 # These operations do not require instance parameters case $OP in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; esac if [ x = x"$DEVICE" ]; then ocf_exit_reason "Please set OCF_RESKEY_device to the device to be managed" exit $OCF_ERR_CONFIGURED fi set_blockdevice_var # Normalize instance parameters: # It is possible that OCF_RESKEY_directory has one or even multiple trailing "/". # But the output of `mount` and /proc/mounts do not. if [ -z "$OCF_RESKEY_directory" ]; then if [ X$OP = "Xstart" -o $blockdevice = "no" ]; then ocf_exit_reason "Please specify the directory" exit $OCF_ERR_CONFIGURED fi else MOUNTPOINT="$(echo "$OCF_RESKEY_directory" | sed 's/\/*$//')" : ${MOUNTPOINT:=/} case "$MOUNTPOINT" in [[:space:]]*|*[[:space:]]) ocf_exit_reason "directory parameter does not accept leading or trailing whitespace characters" exit $OCF_ERR_CONFIGURED ;; esac if [ -e "$MOUNTPOINT" ] ; then CANONICALIZED_MOUNTPOINT="$(readlink -f "$MOUNTPOINT")" if [ $? -ne 0 ]; then ocf_exit_reason "Could not canonicalize $MOUNTPOINT because readlink failed" exit $OCF_ERR_GENERIC fi else CANONICALIZED_MOUNTPOINT="$MOUNTPOINT" fi if echo "$CANONICALIZED_MOUNTPOINT" | grep -q "^\s*/\s*$"; then if ocf_is_probe; then ocf_log debug "/ cannot be managed in a cluster" exit $OCF_NOT_RUNNING elif [ "$__OCF_ACTION" = "start" ] || [ "$__OCF_ACTION" = "monitor" ] || [ "$__OCF_ACTION" = "status" ]; then ocf_exit_reason "/ cannot be managed in a cluster" exit $OCF_ERR_CONFIGURED elif [ "$__OCF_ACTION" = "stop" ]; then exit $OCF_SUCCESS fi fi fi # Check to make sure the utilites are found if [ "X${HOSTOS}" != "XOpenBSD" ];then check_binary $MODPROBE check_binary $FUSER fi check_binary $FSCK check_binary $MOUNT check_binary $UMOUNT if [ "$OP" != "monitor" ]; then ocf_log info "Running $OP for $DEVICE on $MOUNTPOINT" fi case $OP in status) Filesystem_status exit $? ;; monitor) Filesystem_monitor exit $? ;; validate-all) Filesystem_validate_all exit $? ;; stop) Filesystem_stop exit $? ;; esac CLUSTERSAFE=0 is_option "ro" && CLUSTERSAFE=2 case "$FSTYPE" in nfs4|nfs|efs|smbfs|cifs|none|gfs2|glusterfs|ceph|ocfs2|overlay|overlayfs|tmpfs|cvfs|lustre) CLUSTERSAFE=1 # this is kind of safe too systemd_drop_in "99-Filesystem-remote" "After" "remote-fs.target" ;; # add here CLUSTERSAFE=0 for all filesystems which are not # cluster aware and which, even if when mounted read-only, # could still modify parts of it such as journal/metadata ext4|ext4dev|ext3|reiserfs|reiser4|xfs|jfs) if ocf_is_true "$OCF_RESKEY_force_clones"; then CLUSTERSAFE=2 systemd_drop_in "99-Filesystem-remote" "After" "remote-fs.target" else CLUSTERSAFE=0 # these are not allowed fi ;; esac if ocf_is_clone; then case $CLUSTERSAFE in 0) ocf_exit_reason "DANGER! $FSTYPE on $DEVICE is NOT cluster-aware!" ocf_log err "DO NOT RUN IT AS A CLONE!" ocf_log err "Politely refusing to proceed to avoid data corruption." exit $OCF_ERR_CONFIGURED ;; 2) ocf_log warn "$FSTYPE on $DEVICE is NOT cluster-aware!" if ocf_is_true "$OCF_RESKEY_force_clones"; then ocf_log warn "But we'll let it run because we trust _YOU_ verified it's safe to do so." else ocf_log warn "But we'll let it run because it is mounted read-only." ocf_log warn "Please make sure that it's meta data is read-only too!" fi ;; esac fi case $OP in start) Filesystem_start ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/heartbeat/ocf-shellfuncs.in b/heartbeat/ocf-shellfuncs.in index 4616375aa..96f612095 100644 --- a/heartbeat/ocf-shellfuncs.in +++ b/heartbeat/ocf-shellfuncs.in @@ -1,1104 +1,1104 @@ # # # Common helper functions for the OCF Resource Agents supplied by # heartbeat. # # Copyright (c) 2004 SUSE LINUX AG, Lars Marowsky-Brée # All Rights Reserved. # # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # Build version: $Format:%H$ # TODO: Some of this should probably split out into a generic OCF # library for shell scripts, but for the time being, we'll just use it # ourselves... # # TODO wish-list: # - Generic function for evaluating version numbers # - Generic function(s) to extract stuff from our own meta-data # - Logging function which automatically adds resource identifier etc # prefixes # TODO: Move more common functionality for OCF RAs here. # # This was common throughout all legacy Heartbeat agents unset LC_ALL; export LC_ALL unset LANGUAGE; export LANGUAGE : ${HA_SBIN_DIR:=@sbindir@} __SCRIPT_NAME=`basename $0` if [ -z "$OCF_ROOT" ]; then : ${OCF_ROOT=@OCF_ROOT_DIR@} fi if [ "$OCF_FUNCTIONS_DIR" = ${OCF_ROOT}/resource.d/heartbeat ]; then # old unset OCF_FUNCTIONS_DIR fi : ${OCF_FUNCTIONS_DIR:=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-binaries . ${OCF_FUNCTIONS_DIR}/ocf-returncodes . ${OCF_FUNCTIONS_DIR}/ocf-directories . ${OCF_FUNCTIONS_DIR}/ocf-rarun . ${OCF_FUNCTIONS_DIR}/ocf-distro # Define OCF_RESKEY_CRM_meta_interval in case it isn't already set, # to make sure that ocf_is_probe() always works : ${OCF_RESKEY_CRM_meta_interval=0} ocf_is_root() { if [ X`id -u` = X0 ]; then true else false fi } ocf_maybe_random() { if test -c /dev/urandom; then od -An -N4 -tu4 /dev/urandom | tr -d '[:space:]' else awk -v pid=$$ 'BEGIN{srand(pid); print rand()}' | sed 's/^.*[.]//' fi } # Portability comments: # o The following rely on Bourne "sh" pattern-matching, which is usually # that for filename generation (note: not regexp). # o The "*) true ;;" clause is probably unnecessary, but is included # here for completeness. # o The negation in the pattern uses "!". This seems to be common # across many OSes (whereas the alternative "^" fails on some). # o If an OS is encountered where this negation fails, then a possible # alternative would be to replace the function contents by (e.g.): # [ -z "`echo $1 | tr -d '[0-9]'`" ] # ocf_is_decimal() { case "$1" in ""|*[!0-9]*) # empty, or at least one non-decimal false ;; *) true ;; esac } ocf_is_true() { case "$1" in yes|true|1|YES|TRUE|True|ja|on|ON) true ;; *) false ;; esac } ocf_is_hex() { case "$1" in ""|*[!0-9a-fA-F]*) # empty, or at least one non-hex false ;; *) true ;; esac } ocf_is_octal() { case "$1" in ""|*[!0-7]*) # empty, or at least one non-octal false ;; *) true ;; esac } __ocf_set_defaults() { __OCF_ACTION="$1" # Return to sanity for the agents... unset LANG LC_ALL=C export LC_ALL # TODO: Review whether we really should source this. Or rewrite # to match some emerging helper function syntax...? This imports # things which no OCF RA should be using... # Strip the OCF_RESKEY_ prefix from this particular parameter if [ -z "$OCF_RESKEY_OCF_CHECK_LEVEL" ]; then : ${OCF_CHECK_LEVEL:=0} else : ${OCF_CHECK_LEVEL:=$OCF_RESKEY_OCF_CHECK_LEVEL} fi if [ ! -d "$OCF_ROOT" ]; then ha_log "ERROR: OCF_ROOT points to non-directory $OCF_ROOT." exit $OCF_ERR_GENERIC fi if [ -z "$OCF_RESOURCE_TYPE" ]; then : ${OCF_RESOURCE_TYPE:=$__SCRIPT_NAME} fi if [ "x$__OCF_ACTION" = "xmeta-data" ]; then : ${OCF_RESOURCE_INSTANCE:="RESOURCE_ID"} fi if [ -z "$OCF_RA_VERSION_MAJOR" ]; then : We are being invoked as an init script. : Fill in some things with reasonable values. : ${OCF_RESOURCE_INSTANCE:="default"} return 0 fi if [ -z "$OCF_RESOURCE_INSTANCE" ]; then ha_log "ERROR: Need to tell us our resource instance name." exit $OCF_ERR_ARGS fi } hadate() { date "+${HA_DATEFMT}" } set_logtag() { if [ -z "$HA_LOGTAG" ]; then if [ -n "$OCF_RESOURCE_INSTANCE" ]; then HA_LOGTAG="$__SCRIPT_NAME($OCF_RESOURCE_INSTANCE)[$$]" else HA_LOGTAG="$__SCRIPT_NAME[$$]" fi fi } __ha_log() { local ignore_stderr=false local loglevel [ "x$1" = "x--ignore-stderr" ] && ignore_stderr=true && shift [ none = "$HA_LOGFACILITY" ] && HA_LOGFACILITY="" # if we're connected to a tty, then output to stderr if tty >/dev/null; then if [ "x$HA_debug" = "x0" -a "x$loglevel" = xdebug ] ; then return 0 elif [ "$ignore_stderr" = "true" ]; then # something already printed this error to stderr, so ignore return 0 fi if [ "$HA_LOGTAG" ]; then echo "$HA_LOGTAG: $*" else echo "$*" fi >&2 return 0 fi set_logtag if [ "x${HA_LOGD}" = "xyes" ] ; then ha_logger -t "${HA_LOGTAG}" "$@" if [ "$?" -eq "0" ] ; then return 0 fi fi if [ -n "$HA_LOGFACILITY" ] then : logging through syslog # loglevel is unknown, use 'notice' for now loglevel=notice case "${*}" in *ERROR*) loglevel=err;; *WARN*) loglevel=warning;; *INFO*|info) loglevel=info;; esac logger -t "$HA_LOGTAG" -p ${HA_LOGFACILITY}.${loglevel} "${*}" fi if [ -n "$HA_LOGFILE" ] then : appending to $HA_LOGFILE echo `hadate`" $HA_LOGTAG: ${*}" >> $HA_LOGFILE fi if [ -z "$HA_LOGFACILITY" -a -z "$HA_LOGFILE" ] && ! [ "$ignore_stderr" = "true" ] then : appending to stderr echo `hadate`"${*}" >&2 fi if [ -n "$HA_DEBUGLOG" ] then : appending to $HA_DEBUGLOG if [ "$HA_LOGFILE"x != "$HA_DEBUGLOG"x ]; then echo "$HA_LOGTAG: "`hadate`"${*}" >> $HA_DEBUGLOG fi fi } ha_log() { __ha_log "$@" } ha_debug() { if [ "x${HA_debug}" = "x0" ] || [ -z "${HA_debug}" ] ; then return 0 fi if tty >/dev/null; then if [ "$HA_LOGTAG" ]; then echo "$HA_LOGTAG: $*" else echo "$*" fi >&2 return 0 fi set_logtag if [ "x${HA_LOGD}" = "xyes" ] ; then ha_logger -t "${HA_LOGTAG}" -D "ha-debug" "$@" if [ "$?" -eq "0" ] ; then return 0 fi fi [ none = "$HA_LOGFACILITY" ] && HA_LOGFACILITY="" if [ -n "$HA_LOGFACILITY" ] then : logging through syslog logger -t "$HA_LOGTAG" -p "${HA_LOGFACILITY}.debug" "${*}" fi if [ -n "$HA_DEBUGLOG" ] then : appending to $HA_DEBUGLOG echo "$HA_LOGTAG: "`hadate`"${*}" >> $HA_DEBUGLOG fi if [ -z "$HA_LOGFACILITY" -a -z "$HA_DEBUGLOG" ] then : appending to stderr echo "$HA_LOGTAG: `hadate`${*}: ${HA_LOGFACILITY}" >&2 fi } ha_parameter() { local VALUE VALUE=`sed -e 's%[ ][ ]*% %' -e 's%^ %%' -e 's%#.*%%' $HA_CF | grep -i "^$1 " | sed 's%[^ ]* %%'` if [ "X$VALUE" = X ] then case $1 in keepalive) VALUE=2;; deadtime) ka=`ha_parameter keepalive` VALUE=`expr $ka '*' 2 '+' 1`;; esac fi echo $VALUE } ocf_log() { # TODO: Revisit and implement internally. if [ $# -lt 2 ] then ocf_log err "Not enough arguments [$#] to ocf_log." fi __OCF_PRIO="$1" shift __OCF_MSG="$*" case "${__OCF_PRIO}" in crit) __OCF_PRIO="CRIT";; err) __OCF_PRIO="ERROR";; warn) __OCF_PRIO="WARNING";; info) __OCF_PRIO="INFO";; debug)__OCF_PRIO="DEBUG";; *) __OCF_PRIO=`echo ${__OCF_PRIO}| tr '[a-z]' '[A-Z]'`;; esac if [ "${__OCF_PRIO}" = "DEBUG" ]; then ha_debug "${__OCF_PRIO}: $__OCF_MSG" else ha_log "${__OCF_PRIO}: $__OCF_MSG" fi } # # ocf_exit_reason: print exit error string to stderr # Usage: Allows the OCF script to provide a string # describing why the exit code was returned. # Arguments: reason - required, The string that represents why the error # occured. # ocf_exit_reason() { local cookie="$OCF_EXIT_REASON_PREFIX" local fmt local msg # No argument is likely not intentional. # Just one argument implies a printf format string of just "%s". # "Least surprise" in case some interpolated string from variable # expansion or other contains a percent sign. # More than one argument: first argument is going to be the format string. case $# in 0) ocf_log err "Not enough arguments to ocf_log_exit_msg." ;; 1) fmt="%s" ;; *) fmt=$1 shift case $fmt in *%*) : ;; # ok, does look like a format string *) ocf_log warn "Does not look like format string: [$fmt]" ;; esac ;; esac if [ -z "$cookie" ]; then # use a default prefix cookie="ocf-exit-reason:" fi msg=$(printf "${fmt}" "$@") printf >&2 "%s%s\n" "$cookie" "$msg" __ha_log --ignore-stderr "ERROR: $msg" } # # ocf_deprecated: Log a deprecation warning # Usage: ocf_deprecated [param-name] # Arguments: param-name optional, name of a boolean resource # parameter that can be used to suppress # the warning (default # "ignore_deprecation") ocf_deprecated() { local param param=${1:-ignore_deprecation} # don't use ${!param} here, it's a bashism if ! ocf_is_true $(eval echo \$OCF_RESKEY_$param); then ocf_log warn "This resource agent is deprecated" \ "and may be removed in a future release." \ "See the man page for details." \ "To suppress this warning, set the \"${param}\"" \ "resource parameter to true." fi } # # Ocf_run: Run a script, and log its output. # Usage: ocf_run [-q] [-info|-warn|-err] # -q: don't log the output of the command if it succeeds # -info|-warn|-err: log the output of the command at given # severity if it fails (defaults to err) # ocf_run() { local rc local output local verbose=1 local loglevel=err local var for var in 1 2 do case "$1" in "-q") verbose="" shift 1;; - "-info"|"-warn"|"-err") - loglevel=`echo $1 | sed -e s/-//g` + "-info"|"-warn"|"-err"|"-debug") + loglevel=${1#-} shift 1;; *) ;; esac done output=`"$@" 2>&1` rc=$? [ -n "$output" ] && output="$(echo "$output" | tr -s ' \t\r\n' ' ')" if [ $rc -eq 0 ]; then if [ "$verbose" -a ! -z "$output" ]; then ocf_log info "$output" fi else if [ ! -z "$output" ]; then ocf_log $loglevel "$output" else ocf_log $loglevel "command failed: $*" fi fi return $rc } ocf_pidfile_status() { local pid pidfile="$1" if [ ! -e "$pidfile" ]; then # Not exists return 2 fi pid=$(cat "$pidfile") kill -0 "$pid" > /dev/null 2>&1 if [ $? = 0 ]; then return 0 fi # Stale return 1 } # mkdir(1) based locking # first the directory is created with the name given as $1 # then a file named "pid" is created within that directory with # the process PID # stale locks are handled carefully, the inode of a directory # needs to match before and after test if the process is running # empty directories are also handled appropriately # we relax (sleep) occasionally to allow for other processes to # finish managing the lock in case they are in the middle of the # business relax() { sleep 0.5; } ocf_get_stale_pid() { local piddir pid dir_inode piddir="$1" [ -z "$piddir" ] && return 2 dir_inode="`ls -di $piddir 2>/dev/null`" [ -z "$dir_inode" ] && return 1 pid=`cat $piddir/pid 2>/dev/null` if [ -z "$pid" ]; then # empty directory? relax if [ "$dir_inode" = "`ls -di $piddir 2>/dev/null`" ]; then echo $dir_inode else return 1 fi elif kill -0 $pid >/dev/null 2>&1; then return 1 elif relax && [ -e "$piddir/pid" ] && [ "$dir_inode" = "`ls -di $piddir 2>/dev/null`" ]; then echo $pid else return 1 fi } # There is a race when the following two functions to manage the # lock file (mk and rm) are invoked in parallel by different # instances. It is up to the caller to reduce probability of that # taking place (see ocf_take_lock() below). ocf_mk_pid() { mkdir $1 2>/dev/null && echo $$ > $1/pid } ocf_rm_pid() { rm -f $1/pid rmdir $1 2>/dev/null } # Testing and subsequently removing a stale lock (containing the # process pid) is inherently difficult to do in such a way as to # prevent a race between creating a pid file and removing it and # its directory. We reduce the probability of that happening by # checking if the stale lock persists over a random period of # time. ocf_take_lock() { local lockdir=$1 local rnd local stale_pid # we don't want it too short, so strip leading zeros rnd=$(ocf_maybe_random | sed 's/^0*//') stale_pid=`ocf_get_stale_pid $lockdir` if [ -n "$stale_pid" ]; then sleep 0.$rnd # remove "stale pid" only if it persists [ "$stale_pid" = "`ocf_get_stale_pid $lockdir`" ] && ocf_rm_pid $lockdir fi while ! ocf_mk_pid $lockdir; do ocf_log info "Sleeping until $lockdir is released..." sleep 0.$rnd done } ocf_release_lock_on_exit() { trap "ocf_rm_pid $1" EXIT } # returns true if the CRM is currently running a probe. A probe is # defined as a monitor operation with a monitoring interval of zero. ocf_is_probe() { [ "$__OCF_ACTION" = "monitor" -a "$OCF_RESKEY_CRM_meta_interval" = 0 ] } # returns true if the resource is configured as a clone. This is # defined as a resource where the clone-max meta attribute is present. ocf_is_clone() { [ ! -z "${OCF_RESKEY_CRM_meta_clone_max}" ] } # returns true if the resource is configured as a multistate # (master/slave) resource. This is defined as a resource where the # master-max meta attribute is present, and set to greater than zero. ocf_is_ms() { [ "${OCF_RESKEY_CRM_meta_promotable}" = "true" ] || { [ ! -z "${OCF_RESKEY_CRM_meta_master_max}" ] && [ "${OCF_RESKEY_CRM_meta_master_max}" -gt 0 ]; } } # version check functions # allow . and - to delimit version numbers # max version number is 999 # ocf_is_ver() { echo $1 | grep '^[0-9][0-9.-]*[0-9A-Za-z.\+-]*$' >/dev/null 2>&1 } # usage: ocf_version_cmp VER1 VER2 # version strings can contain digits, dots, and dashes # must start and end with a digit # returns: # 0: VER1 smaller (older) than VER2 # 1: versions equal # 2: VER1 greater (newer) than VER2 # 3: bad format ocf_version_cmp() { ocf_is_ver "$1" || return 3 ocf_is_ver "$2" || return 3 local v1=$1 local v2=$2 sort_version="sort -t. -k 1,1n -k 2,2n -k 3,3n -k 4,4n" older=$( (echo "$v1"; echo "$v2") | $sort_version | head -1 ) if [ "$v1" = "$v2" ]; then return 1 elif [ "$v1" = "$older" ]; then return 0 else return 2 # -1 would look funny in shell ;-) fi } ocf_local_nodename() { # use crm_node -n for pacemaker > 1.1.8 which pacemakerd > /dev/null 2>&1 if [ $? -eq 0 ]; then local version=$(pacemakerd -$ | awk '/^Pacemaker /{ print $2 }') version=$(echo $version | awk -F- '{ print $1 }') ocf_version_cmp "$version" "1.1.8" if [ $? -eq 2 ]; then which crm_node > /dev/null 2>&1 if [ $? -eq 0 ]; then crm_node -n return fi fi fi # otherwise use uname -n uname -n } # usage: dirname DIR dirname() { local a local b [ $# = 1 ] || return 1 a="$1" while [ 1 ]; do b="${a%/}" [ "$a" = "$b" ] && break a="$b" done b=${a%/*} [ -z "$b" -o "$a" = "$b" ] && b="." echo "$b" return 0 } # usage: systemd_is_running # returns: # 0 PID 1 is systemd # 1 otherwise systemd_is_running() { [ "$(cat /proc/1/comm 2>/dev/null)" = "systemd" ] } # usage: systemd_drop_in systemd_drop_in() { local conf_file if [ $# -ne 3 ]; then ocf_log err "Incorrect number of arguments [$#] for systemd_drop_in." fi systemdrundir="/run/systemd/system/resource-agents-deps.target.d" mkdir -p "$systemdrundir" conf_file="$systemdrundir/$1.conf" cat >"$conf_file" <&1) rc=$? ocf_log debug "result: $result" [ $rc -eq 0 ] && break sleep $sleep done if [ $rc -ne 0 ]; then ocf_exit_reason "curl $args failed $tries tries" exit $OCF_ERR_GENERIC fi echo "$result" return $rc } # move process to root cgroup if realtime scheduling is enabled ocf_move_to_root_cgroup_if_rt_enabled() { if [ -e "/sys/fs/cgroup/cpu/cpu.rt_runtime_us" ]; then echo $$ >> /sys/fs/cgroup/cpu/tasks if [ "$?" -ne "0" ]; then ocf_log warn "Unable to move PID $$ to the root cgroup" fi fi } # usage: crm_mon_no_validation args... # run crm_mon without any cib schema validation # This is useful when an agent runs in a bundle to avoid potential # schema validation errors when host and bundle are not perfectly aligned # To be used, your shell must support on process substitution (e.g. bash) # returns: # crm_mon_no_validation() { # The subshell prevents parsing error with incompatible shells "$SHELL" -c "CIB_file=<(${HA_SBIN_DIR}/cibadmin -Q | sed 's/validate-with=\"[^\"]*\"/validate-with=\"none\"/') \ ${HA_SBIN_DIR}/crm_mon \$*" -- $* } # # pseudo_resource status tracking function... # # This allows pseudo resources to give correct status information. As we add # resource monitoring, and better resource tracking in general, this will # become essential. # # These scripts work because ${HA_RSCTMP} is cleaned on node reboot. # # We create "resource-string" tracking files under ${HA_RSCTMP} in a # very simple way: # # Existence of "${HA_RSCTMP}/resource-string" means that we consider # the resource named by "resource-string" to be running. # # Note that "resource-string" needs to be unique. Using the resource type # plus the resource instance arguments to make up the resource string # is probably sufficient... # # usage: ha_pseudo_resource resource-string op [tracking_file] # where op is {start|stop|monitor|status|restart|reload|print} # print is a special op which just prints the tracking file location # user can override our choice of the tracking file location by # specifying it as the third arg # Note that all operations are silent... # ha_pseudo_resource() { local ha_resource_tracking_file="${3:-${HA_RSCTMP}/$1}" case $2 in start|restart|reload) touch "$ha_resource_tracking_file";; stop) rm -f "$ha_resource_tracking_file";; status|monitor) if [ -f "$ha_resource_tracking_file" ] then return 0 else case $2 in status) return 3;; *) return 7;; esac fi;; print) echo "$ha_resource_tracking_file";; *) return 3;; esac } # usage: rmtempdir TMPDIR rmtempdir() { [ $# = 1 ] || return 1 if [ -e "$1" ]; then rmdir "$1" || return 1 fi return 0 } # usage: maketempfile [-d] maketempfile() { if [ $# = 1 -a "$1" = "-d" ]; then mktemp -d return 0 elif [ $# != 0 ]; then return 1 fi mktemp return 0 } # usage: rmtempfile TMPFILE rmtempfile () { [ $# = 1 ] || return 1 if [ -e "$1" ]; then rm "$1" || return 1 fi return 0 } # echo the first lower supported check level # pass set of levels supported by the agent # (in increasing order, 0 is optional) ocf_check_level() { local lvl prev lvl=0 prev=0 if ocf_is_decimal "$OCF_CHECK_LEVEL"; then # the level list should be very short for lvl; do if [ "$lvl" -eq "$OCF_CHECK_LEVEL" ]; then break elif [ "$lvl" -gt "$OCF_CHECK_LEVEL" ]; then lvl=$prev # the previous one break fi prev=$lvl done fi echo $lvl } # usage: ocf_stop_processes SIGNALS WAIT_TIME PIDS # # we send signals (use quotes for more than one!) in the order # given; if one or more processes are still running we try KILL; # the wait_time is the _total_ time we'll spend in this function # this time may be slightly exceeded if the processes won't leave # # returns: # 0: all processes left # 1: some processes still running # # example: # # ocf_stop_processes TERM 5 $pids # ocf_stop_processes() { local signals="$1" local wait_time="$(($2/`echo $signals|wc -w`))" shift 2 local pids="$*" local sig i test -z "$pids" && return 0 for sig in $signals KILL; do kill -s $sig $pids 2>/dev/null # try to leave early, and yet leave processes time to exit sleep 0.2 for i in `seq $wait_time`; do kill -s 0 $pids 2>/dev/null || return 0 sleep 1 done done return 1 } # # create a given status directory # if the directory path doesn't start with $HA_VARRUN, then # we return with error (most of the calls would be with the user # supplied configuration, hence we need to do necessary # protection) # used mostly for PID files # # usage: ocf_mkstatedir owner permissions path # # owner: user.group # permissions: permissions # path: directory path # # example: # ocf_mkstatedir named 755 `dirname $pidfile` # ocf_mkstatedir() { local owner local perms local path owner=$1 perms=$2 path=$3 test -d $path && return 0 [ $(id -u) = 0 ] || return 1 case $path in ${HA_VARRUN%/}/*) : this path is ok ;; *) ocf_log err "cannot create $path (does not start with $HA_VARRUN)" return 1 ;; esac mkdir -p $path && chown $owner $path && chmod $perms $path } # # create a unique status directory in $HA_VARRUN # used mostly for PID files # the directory is by default set to # $HA_VARRUN/$OCF_RESOURCE_INSTANCE # the directory name is printed to stdout # # usage: ocf_unique_rundir owner permissions name # # owner: user.group (default: "root") # permissions: permissions (default: "755") # name: some unique string (default: "$OCF_RESOURCE_INSTANCE") # # to use the default either don't set the parameter or set it to # empty string ("") # example: # # STATEDIR=`ocf_unique_rundir named "" myownstatedir` # ocf_unique_rundir() { local path local owner local perms local name owner=${1:-"root"} perms=${2:-"755"} name=${3:-"$OCF_RESOURCE_INSTANCE"} path=$HA_VARRUN/$name if [ ! -d $path ]; then [ $(id -u) = 0 ] || return 1 mkdir -p $path && chown $owner $path && chmod $perms $path || return 1 fi echo $path } # # RA tracing may be turned on by setting OCF_TRACE_RA # the trace output will be saved to OCF_TRACE_FILE, if set, or # by default to # $HA_VARLIB/trace_ra//.. # e.g. $HA_VARLIB/trace_ra/oracle/db.start.2012-11-27.08:37:08 # # OCF_TRACE_FILE: # - FD (small integer [3-9]) in that case it is up to the callers # to capture output; the FD _must_ be open for writing # - absolute path # # NB: FD 9 may be used for tracing with bash >= v4 in case # OCF_TRACE_FILE is set to a path. # ocf_bash_has_xtracefd() { [ -n "$BASH_VERSION" ] && [ ${BASH_VERSINFO[0]} -ge 4 ] } # for backwards compatibility ocf_is_bash4() { ocf_bash_has_xtracefd } ocf_trace_redirect_to_file() { local dest=$1 if ocf_bash_has_xtracefd; then exec 9>$dest BASH_XTRACEFD=9 else exec 2>$dest fi } ocf_trace_redirect_to_fd() { local fd=$1 if ocf_bash_has_xtracefd; then BASH_XTRACEFD=$fd else exec 2>&$fd fi } __ocf_test_trc_dest() { local dest=$1 if ! touch $dest; then ocf_log warn "$dest not writable, trace not going to happen" __OCF_TRC_DEST="" __OCF_TRC_MANAGE="" return 1 fi return 0 } ocf_default_trace_dest() { tty >/dev/null && return if [ -n "$OCF_RESOURCE_TYPE" -a \ -n "$OCF_RESOURCE_INSTANCE" -a -n "$__OCF_ACTION" ]; then local ts=`date +%F.%T` __OCF_TRC_DEST=${OCF_RESKEY_trace_dir}/${OCF_RESOURCE_TYPE}/${OCF_RESOURCE_INSTANCE}.${__OCF_ACTION}.$ts __OCF_TRC_MANAGE="1" fi } ocf_start_trace() { export __OCF_TRC_DEST="" __OCF_TRC_MANAGE="" case "$OCF_TRACE_FILE" in [3-9]) ocf_trace_redirect_to_fd "$OCF_TRACE_FILE" ;; /*/*) __OCF_TRC_DEST=$OCF_TRACE_FILE ;; "") ocf_default_trace_dest ;; *) ocf_log warn "OCF_TRACE_FILE must be set to either FD (open for writing) or absolute file path" ocf_default_trace_dest ;; esac if [ "$__OCF_TRC_DEST" ]; then mkdir -p `dirname $__OCF_TRC_DEST` __ocf_test_trc_dest $__OCF_TRC_DEST || return ocf_trace_redirect_to_file "$__OCF_TRC_DEST" fi if [ -n "$BASH_VERSION" ]; then PS4='+ `date +"%T"`: ${FUNCNAME[0]:+${FUNCNAME[0]}:}${LINENO}: ' fi set -x env=$( echo; printenv | sort ) } ocf_stop_trace() { set +x } # Helper functions to map from nodename/bundle-name and physical hostname # list_index_for_word "node0 node1 node2 node3 node4 node5" node4 --> 5 # list_word_at_index "NA host1 host2 host3 host4 host5" 3 --> host2 # list_index_for_word "node1 node2 node3 node4 node5" node7 --> "" # list_word_at_index "host1 host2 host3 host4 host5" 8 --> "" # attribute_target node1 --> host1 list_index_for_word() { echo $1 | tr ' ' '\n' | awk -v x="$2" '$0~x {print NR}' } list_word_at_index() { echo $1 | tr ' ' '\n' | awk -v n="$2" 'n == NR' } ocf_attribute_target() { if [ x$1 = x ]; then if [ x$OCF_RESKEY_CRM_meta_container_attribute_target = xhost -a x$OCF_RESKEY_CRM_meta_physical_host != x ]; then echo $OCF_RESKEY_CRM_meta_physical_host else if [ x$OCF_RESKEY_CRM_meta_on_node != x ]; then echo $OCF_RESKEY_CRM_meta_on_node else ocf_local_nodename fi fi return elif [ x"$OCF_RESKEY_CRM_meta_notify_all_uname" != x ]; then index=$(list_index_for_word "$OCF_RESKEY_CRM_meta_notify_all_uname" $1) mapping="" if [ x$index != x ]; then mapping=$(list_word_at_index "$OCF_RESKEY_CRM_meta_notify_all_hosts" $index) fi if [ x$mapping != x -a x$mapping != xNA ]; then echo $mapping return fi fi echo $1 } ocf_promotion_score() { ocf_version_cmp "$OCF_RESKEY_crm_feature_set" "3.10.0" res=$? if [ $res -eq 2 ] || [ $res -eq 1 ] || ! have_binary "crm_master"; then ${HA_SBIN_DIR}/crm_attribute -p ${OCF_RESOURCE_INSTANCE} $@ else ${HA_SBIN_DIR}/crm_master -l reboot $@ fi } __ocf_set_defaults "$@" : ${OCF_TRACE_RA:=$OCF_RESKEY_trace_ra} : ${OCF_RESKEY_trace_dir:="$HA_VARLIB/trace_ra"} ocf_is_true "$OCF_TRACE_RA" && ocf_start_trace # pacemaker sets HA_use_logd, some others use HA_LOGD :/ if ocf_is_true "$HA_use_logd"; then : ${HA_LOGD:=yes} fi diff --git a/heartbeat/pgsql b/heartbeat/pgsql index 532063ac5..69384f155 100755 --- a/heartbeat/pgsql +++ b/heartbeat/pgsql @@ -1,2263 +1,2263 @@ #!/bin/sh # # Description: Manages a PostgreSQL Server as an OCF High-Availability # resource # # Authors: Serge Dubrouski (sergeyfd@gmail.com) -- original RA # Florian Haas (florian@linbit.com) -- makeover # Takatoshi MATSUO (matsuo.tak@gmail.com) -- support replication # David Corlette (dcorlette@netiq.com) -- add support for non-standard library locations and non-standard port # # Copyright: 2006-2012 Serge Dubrouski # and other Linux-HA contributors # License: GNU General Public License (GPL) # ############################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Use runuser if available for SELinux. if [ -x /sbin/runuser ]; then SU=runuser else SU=su fi # # Get PostgreSQL Configuration parameter # get_pgsql_param() { local param_name param_name=$1 perl_code="if (/^\s*$param_name[\s=]+\s*(.*)$/) { \$dir=\$1; \$dir =~ s/\s*\#.*//; \$dir =~ s/^'(\S*)'/\$1/; print \$dir;}" perl -ne "$perl_code" < $OCF_RESKEY_config } # Defaults OCF_RESKEY_pgctl_default=/usr/bin/pg_ctl OCF_RESKEY_psql_default=/usr/bin/psql OCF_RESKEY_pgdata_default=/var/lib/pgsql/data OCF_RESKEY_pgdba_default=postgres OCF_RESKEY_pghost_default="" OCF_RESKEY_pgport_default=5432 OCF_RESKEY_pglibs_default=/usr/lib OCF_RESKEY_start_opt_default="" OCF_RESKEY_ctl_opt_default="" OCF_RESKEY_pgdb_default=template1 OCF_RESKEY_logfile_default=/dev/null OCF_RESKEY_socketdir_default="" OCF_RESKEY_stop_escalate_default=90 OCF_RESKEY_monitor_user_default="" OCF_RESKEY_monitor_password_default="" OCF_RESKEY_monitor_sql_default="select now();" OCF_RESKEY_check_wal_receiver_default="false" # Defaults for replication OCF_RESKEY_rep_mode_default=none OCF_RESKEY_node_list_default="" OCF_RESKEY_restore_command_default="" OCF_RESKEY_archive_cleanup_command_default="" OCF_RESKEY_recovery_end_command_default="" OCF_RESKEY_master_ip_default="" OCF_RESKEY_repuser_default="postgres" OCF_RESKEY_primary_conninfo_opt_default="" OCF_RESKEY_restart_on_promote_default="false" OCF_RESKEY_tmpdir_default="/var/lib/pgsql/tmp" OCF_RESKEY_xlog_check_count_default="3" OCF_RESKEY_crm_attr_timeout_default="5" OCF_RESKEY_stop_escalate_in_slave_default=90 OCF_RESKEY_replication_slot_name_default="" : ${OCF_RESKEY_pgctl=${OCF_RESKEY_pgctl_default}} : ${OCF_RESKEY_psql=${OCF_RESKEY_psql_default}} : ${OCF_RESKEY_pgdata=${OCF_RESKEY_pgdata_default}} : ${OCF_RESKEY_pgdba=${OCF_RESKEY_pgdba_default}} : ${OCF_RESKEY_pghost=${OCF_RESKEY_pghost_default}} : ${OCF_RESKEY_pgport=${OCF_RESKEY_pgport_default}} : ${OCF_RESKEY_pglibs=${OCF_RESKEY_pglibs_default}} : ${OCF_RESKEY_config=${OCF_RESKEY_pgdata}/postgresql.conf} : ${OCF_RESKEY_start_opt=${OCF_RESKEY_start_opt_default}} : ${OCF_RESKEY_ctl_opt=${OCF_RESKEY_ctl_opt_default}} : ${OCF_RESKEY_pgdb=${OCF_RESKEY_pgdb_default}} : ${OCF_RESKEY_logfile=${OCF_RESKEY_logfile_default}} : ${OCF_RESKEY_socketdir=${OCF_RESKEY_socketdir_default}} : ${OCF_RESKEY_stop_escalate=${OCF_RESKEY_stop_escalate_default}} : ${OCF_RESKEY_monitor_user=${OCF_RESKEY_monitor_user_default}} : ${OCF_RESKEY_monitor_password=${OCF_RESKEY_monitor_password_default}} : ${OCF_RESKEY_monitor_sql=${OCF_RESKEY_monitor_sql_default}} : ${OCF_RESKEY_check_wal_receiver=${OCF_RESKEY_check_wal_receiver_default}} # for replication : ${OCF_RESKEY_rep_mode=${OCF_RESKEY_rep_mode_default}} : ${OCF_RESKEY_node_list=${OCF_RESKEY_node_list_default}} : ${OCF_RESKEY_restore_command=${OCF_RESKEY_restore_command_default}} : ${OCF_RESKEY_archive_cleanup_command=${OCF_RESKEY_archive_cleanup_command_default}} : ${OCF_RESKEY_recovery_end_command=${OCF_RESKEY_recovery_end_command_default}} : ${OCF_RESKEY_master_ip=${OCF_RESKEY_master_ip_default}} : ${OCF_RESKEY_repuser=${OCF_RESKEY_repuser_default}} : ${OCF_RESKEY_primary_conninfo_opt=${OCF_RESKEY_primary_conninfo_opt_default}} : ${OCF_RESKEY_restart_on_promote=${OCF_RESKEY_restart_on_promote_default}} : ${OCF_RESKEY_tmpdir=${OCF_RESKEY_tmpdir_default}} : ${OCF_RESKEY_xlog_check_count=${OCF_RESKEY_xlog_check_count_default}} : ${OCF_RESKEY_crm_attr_timeout=${OCF_RESKEY_crm_attr_timeout_default}} : ${OCF_RESKEY_stop_escalate_in_slave=${OCF_RESKEY_stop_escalate_in_slave_default}} : ${OCF_RESKEY_replication_slot_name=${OCF_RESKEY_replication_slot_name_default}} usage() { cat < 1.0 Resource script for PostgreSQL. It manages a PostgreSQL as an HA resource. Manages a PostgreSQL database instance Path to pg_ctl command. pgctl Start options (-o start_opt in pg_ctl). "-i -p 5432" for example. start_opt Additional pg_ctl options (-w, -W etc..). ctl_opt Path to psql command. psql Path to PostgreSQL data directory. pgdata User that owns PostgreSQL. pgdba Hostname/IP address where PostgreSQL is listening pghost Port where PostgreSQL is listening pgport Custom location of the Postgres libraries. If not set, the standard location will be used. pglibs PostgreSQL user that pgsql RA will user for monitor operations. If it's not set pgdba user will be used. monitor_user Password for monitor user. monitor_password SQL script that will be used for monitor operations. monitor_sql Path to the PostgreSQL configuration file for the instance. Configuration file Database that will be used for monitoring. pgdb Path to PostgreSQL server log output file. logfile Unix socket directory for PostgreSQL. If you use PostgreSQL 9.3 or higher and define unix_socket_directories in the postgresql.conf, then you must set socketdir to determine which directory is used for psql command. socketdir Number of seconds to wait for stop (using -m fast) before resorting to -m immediate stop escalation Replication mode may be set to "async" or "sync" or "slave". They require PostgreSQL 9.1 or later. Once set, "async" and "sync" require node_list, master_ip, and restore_command parameters,as well as configuring PostgreSQL for replication (in postgresql.conf and pg_hba.conf). "slave" means that RA only makes recovery.conf before starting to connect to primary which is running somewhere. It doesn't need master/slave setting. It requires master_ip restore_command parameters. rep_mode All node names. Please separate each node name with a space. This is optional for replication. Defaults to all nodes in the cluster node list restore_command for recovery.conf. This is required for replication. restore_command archive_cleanup_command for recovery.conf. This is used for replication and is optional. archive_cleanup_command recovery_end_command for recovery.conf. This is used for replication and is optional. recovery_end_command Master's floating IP address to be connected from hot standby. This parameter is used for "primary_conninfo" in recovery.conf. This is required for replication. master ip User used to connect to the master server. This parameter is used for "primary_conninfo" in recovery.conf. This is required for replication. repuser primary_conninfo options of recovery.conf except host, port, user and application_name. This is optional for replication. primary_conninfo_opt If this is true, RA deletes recovery.conf and restarts PostgreSQL on promote to keep Timeline ID. It probably makes fail-over slower. It's recommended to set on-fail of promote up as fence. This is optional for replication. restart_on_promote Set this option when using replication slots. Can only use lower case letters, numbers and underscore for replication_slot_name. The replication slots would be created for each node, with the name adding the node name as postfix. For example, replication_slot_name is "sample" and 2 slaves which are "node1" and "node2" connect to their slots, the slots names are "sample_node1" and "sample_node2". If the node name contains a upper case letter, hyphen and dot, those characters will be converted to a lower case letter or an underscore. For example, Node-1.example.com to node_1_example_com. pgsql RA doesn't monitor and delete the replication slot. When the slave node has been disconnected in failure or the like, execute one of the following manually. Otherwise it may eventually cause a disk full because the master node will continue to accumulate the unsent WAL. 1. recover and reconnect the slave node to the master node as soon as possible. 2. delete the slot on the master node by following psql command. $ select pg_drop_replication_slot('replication_slot_name'); replication_slot_name Path to temporary directory. This is optional for replication. tmpdir Number of checks of xlog on monitor before promote. This is optional for replication. Note: For backward compatibility, the terms are unified with PostgreSQL 9. If you are using PostgreSQL 10 or later, replace "xlog" with "wal". Likewise, replacing "location" with "lsn". xlog check count The timeout of crm_attribute forever update command. Default value is 5 seconds. This is optional for replication. The timeout of crm_attribute forever update command. Number of seconds to wait for stop (using -m fast) before resorting to -m immediate in slave state. This is optional for replication. stop escalation_in_slave If this is true, RA checks wal_receiver process on monitor and notifies its status using "(resource name)-receiver-status" attribute. It's useful for checking whether PostgreSQL (hot standby) connects to primary. The attribute shows status as "normal" or "normal (master)" or "ERROR". Note that if you configure PostgreSQL as master/slave resource, then wal receiver is not running in the master and the attribute shows status as "normal (master)" consistently because it is normal status. check_wal_receiver EOF } # # Run the given command in the Resource owner environment... # runasowner() { local quietrun="" local loglevel="-err" local var for var in 1 2 do case "$1" in "-q") quietrun="-q" shift 1;; - "info"|"warn"|"err") + "info"|"warn"|"err"|"debug") loglevel="-$1" shift 1;; *) ;; esac done ocf_run $quietrun $loglevel $SU $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; $*" } # # Shell escape # escape_string() { echo "$*" | sed -e "s/'/'\\\\''/g" } # # methods: What methods/operations do we support? # pgsql_methods() { cat </dev/null 2>&1" + runasowner -q debug "kill -s 0 $PID" return $? fi # No PID file false } pgsql_wal_receiver_status() { local PID local receiver_parent_pids local pgsql_real_monitor_status=$1 PID=`head -n 1 $PIDFILE` receiver_parent_pids=`ps -ef | tr -s " " | grep "[w]al\s*receiver" | cut -d " " -f 3` if echo "$receiver_parent_pids" | grep -q -w "$PID" ; then attrd_updater -n "$PGSQL_WAL_RECEIVER_STATUS_ATTR" -v "normal" return 0 fi if [ $pgsql_real_monitor_status -eq "$OCF_RUNNING_MASTER" ]; then attrd_updater -n "$PGSQL_WAL_RECEIVER_STATUS_ATTR" -v "normal (master)" return 0 fi attrd_updater -n "$PGSQL_WAL_RECEIVER_STATUS_ATTR" -v "ERROR" ocf_log warn "wal receiver process is not running" return 1 } # # pgsql_real_monitor # pgsql_real_monitor() { local loglevel local rc local output # Set the log level of the error message loglevel=${1:-err} if ! pgsql_status then ocf_log info "PostgreSQL is down" return $OCF_NOT_RUNNING fi if is_replication; then #Check replication state output=`exec_sql "${CHECK_MS_SQL}"` rc=$? if [ $rc -ne 0 ]; then report_psql_error $rc $loglevel "Can't get PostgreSQL recovery status." return $OCF_ERR_GENERIC fi case "$output" in f) ocf_log debug "PostgreSQL is running as a primary." if [ "$OCF_RESKEY_monitor_sql" = "$OCF_RESKEY_monitor_sql_default" ]; then if ocf_is_probe; then # Set initial score for primary. exec_with_retry 0 ocf_promotion_score -v $PROMOTE_ME fi return $OCF_RUNNING_MASTER fi ;; t) ocf_log debug "PostgreSQL is running as a hot standby." if ocf_is_probe; then # Set initial score for hot standby. exec_with_retry 0 ocf_promotion_score -v $CAN_NOT_PROMOTE fi return $OCF_SUCCESS;; *) ocf_exit_reason "$CHECK_MS_SQL output is $output" return $OCF_ERR_GENERIC;; esac fi OCF_RESKEY_monitor_sql=`escape_string "$OCF_RESKEY_monitor_sql"` runasowner -q $loglevel "$OCF_RESKEY_psql $psql_options \ -c '$OCF_RESKEY_monitor_sql'" rc=$? if [ $rc -ne 0 ]; then report_psql_error $rc $loglevel "PostgreSQL $OCF_RESKEY_pgdb isn't running." return $OCF_ERR_GENERIC fi if is_replication; then return $OCF_RUNNING_MASTER fi return $OCF_SUCCESS } pgsql_replication_monitor() { local rc rc=$1 if [ $rc -ne $OCF_SUCCESS -a $rc -ne "$OCF_RUNNING_MASTER" ]; then return $rc fi # If I am Master if [ $rc -eq $OCF_RUNNING_MASTER ]; then change_data_status "$NODENAME" "LATEST" change_pgsql_status "$NODENAME" "PRI" control_slave_status || return $OCF_ERR_GENERIC if [ "$RE_CONTROL_SLAVE" = "true" ]; then sleep 2 ocf_log info "re-controlling slave status." RE_CONTROL_SLAVE="none" control_slave_status || return $OCF_ERR_GENERIC fi return $rc fi # I can't get master node name from $OCF_RESKEY_CRM_meta_notify_master_uname on monitor, # so I will get master node name using crm_mon -n print_crm_mon | grep -q -i -E "/dev/null` if [ $? -eq 0 ]; then my_master_baseline=`$CRM_ATTR_REBOOT -N "$NODENAME" -n \ "$PGSQL_MASTER_BASELINE" -G -q 2>/dev/null` # get older location cmp_location=`printf "$master_baseline\n$my_master_baseline\n" |\ sort | head -1` if [ "$cmp_location" != "$my_master_baseline" ]; then # We used to set the failcount to INF for the resource here in # order to move the master to the other node. However, setting # the failcount should be done only by the CRM and so this use # got deprecated in pacemaker version 1.1.17. Now we do the # "ban resource from the node". ocf_exit_reason "My data is newer than new master's one. New master's location : $master_baseline" exec_with_retry 0 $CRM_RESOURCE -B -r $OCF_RESOURCE_INSTANCE -N $NODENAME -Q return $OCF_ERR_GENERIC fi fi fi return $OCF_SUCCESS } pgsql_notify() { local type="${OCF_RESKEY_CRM_meta_notify_type}" local op="${OCF_RESKEY_CRM_meta_notify_operation}" local rc if ! is_replication; then return $OCF_SUCCESS fi ocf_log debug "notify: ${type} for ${op}" case $type in pre) case $op in promote) pgsql_pre_promote return $? ;; esac ;; post) case $op in promote) delete_xlog_location PROMOTE_NODE=`echo $OCF_RESKEY_CRM_meta_notify_promote_uname | \ sed "s/ /\n/g" | head -1 | tr '[A-Z]' '[a-z]'` if [ "$PROMOTE_NODE" != "$NODENAME" ]; then delete_master_baseline fi return $OCF_SUCCESS ;; demote) pgsql_post_demote return $? ;; start|stop) MASTER_NODE=`echo $OCF_RESKEY_CRM_meta_notify_master_uname | \ sed "s/ /\n/g" | head -1 | tr '[A-Z]' '[a-z]'` if [ "$NODENAME" = "$MASTER_NODE" ]; then control_slave_status fi return $OCF_SUCCESS ;; esac ;; esac return $OCF_SUCCESS } control_slave_status() { local rc local data_status local target local all_data_status local tmp_data_status local number_of_nodes all_data_status=`exec_sql "${CHECK_REPLICATION_STATE_SQL}"` rc=$? if [ $rc -eq 0 ]; then if [ -n "$all_data_status" ]; then all_data_status=`echo $all_data_status | sed "s/\n/ /g"` fi else report_psql_error $rc err "Can't get PostgreSQL replication status." return 1 fi number_of_nodes=`echo $NODE_LIST | wc -w` for target in $NODE_LIST; do if [ "$target" = "$NODENAME" ]; then continue fi data_status="DISCONNECT" if [ -n "$all_data_status" ]; then for tmp_data_status in $all_data_status; do if ! echo $tmp_data_status | grep -q "^${target}|"; then continue fi data_status=`echo $tmp_data_status | cut -d "|" -f 2,3` ocf_log debug "node_name and data_status is $tmp_data_status" break done fi case "$data_status" in "STREAMING|SYNC") change_data_status "$target" "$data_status" change_master_score "$target" "$CAN_PROMOTE" change_pgsql_status "$target" "HS:sync" ;; "STREAMING|ASYNC") change_data_status "$target" "$data_status" if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then change_master_score "$target" "$CAN_NOT_PROMOTE" set_sync_mode "$target" else if [ $number_of_nodes -le 2 ]; then change_master_score "$target" "$CAN_PROMOTE" else # I can't determine which slave's data is newest in async mode. change_master_score "$target" "$CAN_NOT_PROMOTE" fi fi change_pgsql_status "$target" "HS:async" ;; "STREAMING|POTENTIAL") change_data_status "$target" "$data_status" change_master_score "$target" "$CAN_NOT_PROMOTE" change_pgsql_status "$target" "HS:potential" ;; "DISCONNECT") change_data_status "$target" "$data_status" change_master_score "$target" "$CAN_NOT_PROMOTE" if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then set_async_mode "$target" fi ;; *) change_data_status "$target" "$data_status" change_master_score "$target" "$CAN_NOT_PROMOTE" if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then set_async_mode "$target" fi change_pgsql_status "$target" "HS:connected" ;; esac done return 0 } have_master_right() { local old local new local output local data_status local node local mylocation local count local newestXlog local oldfile local newfile ocf_log debug "Checking if I have a master right." data_status=`$CRM_ATTR_FOREVER -N "$NODENAME" -n \ "$PGSQL_DATA_STATUS_ATTR" -G -q 2>/dev/null` if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then if [ -n "$data_status" -a "$data_status" != "STREAMING|SYNC" -a \ "$data_status" != "LATEST" ]; then ocf_log warn "My data is out-of-date. status=$data_status" return 1 fi else if [ -n "$data_status" -a "$data_status" != "STREAMING|SYNC" -a \ "$data_status" != "STREAMING|ASYNC" -a \ "$data_status" != "LATEST" ]; then ocf_log warn "My data is out-of-date. status=$data_status" return 1 fi fi ocf_log info "My data status=$data_status." show_xlog_location if [ $? -ne 0 ]; then ocf_exit_reason "Failed to show my xlog location." exit $OCF_ERR_GENERIC fi old=0 for count in `seq $OCF_RESKEY_xlog_check_count`; do if [ -f ${XLOG_NOTE_FILE}.$count ]; then old=$count continue fi break done new=`expr $old + 1` # get xlog locations of all nodes for node in ${NODE_LIST}; do output=`$CRM_ATTR_REBOOT -N "$node" -n \ "$PGSQL_XLOG_LOC_NAME" -G -q 2>/dev/null` if [ $? -ne 0 ]; then ocf_log warn "Can't get $node xlog location." continue else ocf_log info "$node xlog location : $output" echo "$node $output" >> ${XLOG_NOTE_FILE}.${new} if [ "$node" = "$NODENAME" ]; then mylocation=$output fi fi done oldfile=`cat ${XLOG_NOTE_FILE}.${old} 2>/dev/null` newfile=`cat ${XLOG_NOTE_FILE}.${new} 2>/dev/null` if [ "$oldfile" != "$newfile" ]; then # reset counter rm -f ${XLOG_NOTE_FILE}.* printf "$newfile\n" > ${XLOG_NOTE_FILE}.0 return 1 fi if [ "$new" -ge "$OCF_RESKEY_xlog_check_count" ]; then newestXlog=`printf "$newfile\n" | sort -t " " -k 2,3 -r | \ head -1 | cut -d " " -f 2` if [ "$newestXlog" = "$mylocation" ]; then ocf_log info "I have a master right." exec_with_retry 5 ocf_promotion_score -v $PROMOTE_ME return 0 fi change_data_status "$NODENAME" "DISCONNECT" ocf_log info "I don't have correct master data." # reset counter rm -f ${XLOG_NOTE_FILE}.* printf "$newfile\n" > ${XLOG_NOTE_FILE}.0 fi return 1 } is_replication() { if [ "$OCF_RESKEY_rep_mode" != "none" -a "$OCF_RESKEY_rep_mode" != "slave" ]; then return 0 fi return 1 } use_replication_slot() { if [ -n "$OCF_RESKEY_replication_slot_name" ]; then return 0 fi return 1 } create_replication_slot_name() { local number_of_nodes=0 local target local replication_slot_name local replication_slot_name_list_tmp local replication_slot_name_list if [ -n "$NODE_LIST" ]; then number_of_nodes=`echo $NODE_LIST | wc -w` fi if [ $number_of_nodes -le 0 ]; then replication_slot_name_list="" # The Master node should have some slots equal to the number of Slaves, and # the Slave nodes connect to their dedicated slot on the Master. # To ensuring that the slots name are each unique, add postfix to $OCF_RESKEY_replication_slot. # The postfix is "_$target". else for target in $NODE_LIST do if [ "$target" != "$NODENAME" ]; then # The Uppercase, "-" and "." don't allow to use in slot_name. # If the NODENAME contains them, convert upper case to lower case and "_" and "." to "_". target=`echo "$target" | tr 'A-Z.-' 'a-z__'` replication_slot_name="$OCF_RESKEY_replication_slot_name"_"$target" replication_slot_name_list_tmp="$replication_slot_name_list" replication_slot_name_list="$replication_slot_name_list_tmp $replication_slot_name" fi done fi echo $replication_slot_name_list } delete_replication_slot(){ DELETE_REPLICATION_SLOT_sql="SELECT pg_drop_replication_slot('$1');" output=`exec_sql "$DELETE_REPLICATION_SLOT_sql"` return $? } delete_replication_slots() { local replication_slot_name_list local replication_slot_name replication_slot_name_list=`create_replication_slot_name` ocf_log debug "replication slot names are $replication_slot_name_list." for replication_slot_name in $replication_slot_name_list do if [ `check_replication_slot $replication_slot_name` = "1" ]; then delete_replication_slot $replication_slot_name if [ $? -eq 0 ]; then ocf_log info "PostgreSQL delete the replication slot($replication_slot_name)." else ocf_exit_reason "$output" return $OCF_ERR_GENERIC fi fi done } create_replication_slots() { local replication_slot_name local replication_slot_name_list local output local rc local CREATE_REPLICATION_SLOT_sql local DELETE_REPLICATION_SLOT_sql replication_slot_name_list=`create_replication_slot_name` ocf_log debug "replication slot names are $replication_slot_name_list." for replication_slot_name in $replication_slot_name_list do # If the same name slot is already exists, initialize(delete and create) the slot. if [ `check_replication_slot $replication_slot_name` = "1" ]; then delete_replication_slot $replication_slot_name if [ $? -eq 0 ]; then ocf_log info "PostgreSQL delete the replication slot($replication_slot_name)." else ocf_exit_reason "$output" return $OCF_ERR_GENERIC fi fi CREATE_REPLICATION_SLOT_sql="SELECT pg_create_physical_replication_slot('$replication_slot_name');" output=`exec_sql "$CREATE_REPLICATION_SLOT_sql"` rc=$? if [ $rc -eq 0 ]; then ocf_log info "PostgreSQL creates the replication slot($replication_slot_name)." else ocf_exit_reason "$output" return $OCF_ERR_GENERIC fi done return 0 } # This function check the replication slot does exists. check_replication_slot(){ local replication_slot_name=$1 local output local CHECK_REPLICATION_SLOT_sql="SELECT count(*) FROM pg_replication_slots WHERE slot_name = '$replication_slot_name'" output=`exec_sql "$CHECK_REPLICATION_SLOT_sql"` echo "$output" } # On postgreSQL 10 or later, "location" means "lsn". get_my_location() { local rc local output local replay_loc local receive_loc local output1 local output2 local log1 local log2 local newer_location output=`exec_sql "$CHECK_XLOG_LOC_SQL"` rc=$? if [ $rc -ne 0 ]; then report_psql_error $rc err "Can't get my xlog location." return 1 fi replay_loc=`echo $output | cut -d "|" -f 1` receive_loc=`echo $output | cut -d "|" -f 2` output1=`echo "$replay_loc" | cut -d "/" -f 1` output2=`echo "$replay_loc" | cut -d "/" -f 2` log1=`printf "%08s\n" $output1 | sed "s/ /0/g"` log2=`printf "%08s\n" $output2 | sed "s/ /0/g"` replay_loc="${log1}${log2}" output1=`echo "$receive_loc" | cut -d "/" -f 1` output2=`echo "$receive_loc" | cut -d "/" -f 2` log1=`printf "%08s\n" $output1 | sed "s/ /0/g"` log2=`printf "%08s\n" $output2 | sed "s/ /0/g"` receive_loc="${log1}${log2}" newer_location=`printf "$replay_loc\n$receive_loc" | sort -r | head -1` echo "$newer_location" return 0 } # On postgreSQL 10 or later, "xlog_location" means "wal_lsn". show_xlog_location() { local location location=`get_my_location` || return 1 exec_with_retry 0 $CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_XLOG_LOC_NAME" -v "$location" } # On postgreSQL 10 or later, "xlog_location" means "wal_lsn". delete_xlog_location() { exec_with_retry 5 $CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_XLOG_LOC_NAME" -D } show_master_baseline() { local rc local location location=`get_my_location` ocf_log info "My master baseline : $location." exec_with_retry 0 $CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_MASTER_BASELINE" -v "$location" } delete_master_baseline() { exec_with_retry 5 $CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_MASTER_BASELINE" -D } set_async_mode_all() { [ "$OCF_RESKEY_rep_mode" = "sync" ] || return 0 ocf_log info "Set all nodes into async mode." runasowner -q err "echo \"synchronous_standby_names = ''\" > \"$REP_MODE_CONF\"" if [ $? -ne 0 ]; then ocf_exit_reason "Can't set all nodes into async mode." return 1 fi return 0 } set_async_mode() { cat $REP_MODE_CONF | grep -q -E "(\"$1\")|([,' ]$1[,' ])" if [ $? -eq 0 ]; then ocf_log info "Setup $1 into async mode." runasowner -q err "echo \"synchronous_standby_names = ''\" > \"$REP_MODE_CONF\"" else ocf_log debug "$1 is already in async mode." return 0 fi exec_with_retry 0 reload_conf } set_sync_mode() { local sync_node_in_conf sync_node_in_conf=`cat $REP_MODE_CONF | cut -d "'" -f 2` if [ -n "$sync_node_in_conf" ]; then ocf_log debug "$sync_node_in_conf is already sync mode." else ocf_log info "Setup $1 into sync mode." runasowner -q err "echo \"synchronous_standby_names = '\\\"$1\\\"'\" > \"$REP_MODE_CONF\"" [ "$RE_CONTROL_SLAVE" = "false" ] && RE_CONTROL_SLAVE="true" exec_with_retry 0 reload_conf fi } reload_conf() { # Invoke pg_ctl runasowner "$OCF_RESKEY_pgctl -D $OCF_RESKEY_pgdata reload" if [ $? -eq 0 ]; then ocf_log info "Reload configuration file." else ocf_exit_reason "Can't reload configuration file." return 1 fi return 0 } user_recovery_conf() { local nodename_tmp # put archive_cleanup_command and recovery_end_command only when defined by user if [ -n "$OCF_RESKEY_archive_cleanup_command" ]; then echo "archive_cleanup_command = '${OCF_RESKEY_archive_cleanup_command}'" fi if [ -n "$OCF_RESKEY_recovery_end_command" ]; then echo "recovery_end_command = '${OCF_RESKEY_recovery_end_command}'" fi if use_replication_slot; then nodename_tmp=`echo "$NODENAME" | tr 'A-Z.-' 'a-z__'` echo "primary_slot_name = '${OCF_RESKEY_replication_slot_name}_$nodename_tmp'" fi } make_recovery_conf() { runasowner "touch $RECOVERY_CONF" if [ $? -ne 0 ]; then ocf_exit_reason "Can't create recovery.conf." return 1 fi cat > $RECOVERY_CONF <> $RECOVERY_CONF <> $RECOVERY_CONF ocf_log debug "Created recovery.conf. host=${OCF_RESKEY_master_ip}, user=${OCF_RESKEY_repuser}" return 0 } # change pgsql-status. # arg1:node, arg2: value change_pgsql_status() { local output if ! is_node_online $1; then return 0 fi output=`$CRM_ATTR_REBOOT -N "$1" -n "$PGSQL_STATUS_ATTR" -G -q 2>/dev/null` if [ "$output" != "$2" ]; then # If slave's disk is broken, RA cannot read PID file # and misjudges the PostgreSQL as down while it is running. # It causes overwriting of pgsql-status by Master because replication is still connected. if [ "$output" = "STOP" -o "$output" = "UNKNOWN" ]; then if [ "$1" != "$NODENAME" ]; then ocf_log warn "Changing $PGSQL_STATUS_ATTR on $1 : $output->$2 by $NODENAME is prohibited." return 0 fi fi ocf_log info "Changing $PGSQL_STATUS_ATTR on $1 : $output->$2." exec_with_retry 0 $CRM_ATTR_REBOOT -N "$1" -n "$PGSQL_STATUS_ATTR" -v "$2" fi return 0 } # change pgsql-data-status. # arg1:node, arg2: value change_data_status() { local output if ! node_exist $1; then return 0 fi while : do output=`$CRM_ATTR_FOREVER -N "$1" -n "$PGSQL_DATA_STATUS_ATTR" -G -q 2>/dev/null` if [ "$output" != "$2" ]; then ocf_log info "Changing $PGSQL_DATA_STATUS_ATTR on $1 : $output->$2." exec_with_retry 0 exec_with_timeout 0 "$CRM_ATTR_FOREVER" -N $1 -n $PGSQL_DATA_STATUS_ATTR -v "$2" else break fi done return 0 } # set master-score # arg1:node, arg2: score, arg3: resoure set_master_score() { local current_score current_score=`$CRM_ATTR_REBOOT -N "$1" -n "master-$3" -G -q 2>/dev/null` if [ -n "$current_score" -a "$current_score" != "$2" ]; then ocf_log info "Changing $3 master score on $1 : $current_score->$2." exec_with_retry 0 $CRM_ATTR_REBOOT -N "$1" -n "master-$3" -v "$2" fi return 0 } # change master-score # arg1:node, arg2: score change_master_score() { local instance if ! is_node_online $1; then return 0 fi if echo $OCF_RESOURCE_INSTANCE | grep -q ":"; then # If Pacemaker version is 1.0.x instance=0 while : do if [ "$instance" -ge "$OCF_RESKEY_CRM_meta_clone_max" ]; then break fi if [ "${RESOURCE_NAME}:${instance}" = "$OCF_RESOURCE_INSTANCE" ]; then instance=`expr $instance + 1` continue fi set_master_score $1 $2 "${RESOURCE_NAME}:${instance}" || return 1 instance=`expr $instance + 1` done else # If globally-unique=false and Pacemaker version is 1.1.8 or higher # Master/Slave resource has no instance number set_master_score $1 $2 ${RESOURCE_NAME} || return 1 fi return 0 } report_psql_error() { local rc local loglevel local message rc=$1 loglevel=${2:-err} message="$3" ocf_log $loglevel "$message rc=$rc" if [ $rc -eq 1 ]; then ocf_exit_reason "Fatal error (out of memory, file not found, etc.) occurred while executing the psql command." elif [ $rc -eq 2 ]; then ocf_log $loglevel "Connection error (connection to the server went bad and the session was not interactive) occurred while executing the psql command." elif [ $rc -eq 3 ]; then ocf_exit_reason "Script error (the variable ON_ERROR_STOP was set) occurred while executing the psql command." fi } # # timeout management function # arg1 timeout >= 0 (if arg1 is 0, OCF_RESKEY_crm_attr_timeout is used.) # arg2 : command # arg3 : command's args exec_with_timeout() { local func_pid local count=$OCF_RESKEY_crm_attr_timeout local rc if [ "$1" -ne 0 ]; then count=$1 fi shift $* & func_pid=$! sleep .1 while kill -s 0 $func_pid >/dev/null 2>&1; do sleep 1 count=`expr $count - 1` if [ $count -le 0 ]; then ocf_exit_reason "\"$*\" (pid=$func_pid) timed out." kill -s 9 $func_pid >/dev/null 2>&1 return 1 fi ocf_log info "Waiting($count). \"$*\" (pid=$func_pid)." done wait $func_pid } # retry command when command doesn't return 0 # arg1 : count >= 0 (if arg1 is 0, it retries command in infinitum(1day)) # arg2..argN : command and args exec_with_retry() { local count="86400" local output local rc if [ "$1" -ne 0 ]; then count=$1 fi shift while [ $count -gt 0 ]; do output=`$*` rc=$? if [ $rc -ne 0 ]; then ocf_log warn "Retrying(remain $count). \"$*\" failed. rc=$rc. stdout=\"$output\"." count=`expr $count - 1` sleep 1 else printf "${output}" return 0 fi done ocf_exit_reason "giving up executing \"$*\"" return $rc } is_node_online() { print_crm_mon | grep -q -i "> $OCF_RESKEY_config fi fi fi if [ ! -n "$OCF_RESKEY_master_ip" ]; then ocf_exit_reason "master_ip can't be empty." return $OCF_ERR_CONFIGURED fi fi if is_replication; then REP_MODE_CONF=${OCF_RESKEY_tmpdir}/rep_mode.conf PGSQL_LOCK=${OCF_RESKEY_tmpdir}/PGSQL.lock XLOG_NOTE_FILE=${OCF_RESKEY_tmpdir}/xlog_note CRM_ATTR_REBOOT="${HA_SBIN_DIR}/crm_attribute -l reboot" CRM_ATTR_FOREVER="${HA_SBIN_DIR}/crm_attribute -l forever" CRM_RESOURCE="${HA_SBIN_DIR}/crm_resource" CAN_NOT_PROMOTE="-INFINITY" CAN_PROMOTE="100" PROMOTE_ME="1000" CHECK_MS_SQL="select pg_is_in_recovery()" CHECK_SYNCHRONOUS_STANDBY_NAMES_SQL="show synchronous_standby_names" ocf_version_cmp "$version" "10" rc=$? if [ $rc -eq 1 ]||[ $rc -eq 2 ]; then CHECK_XLOG_LOC_SQL="select pg_last_wal_replay_lsn(),pg_last_wal_receive_lsn()" else CHECK_XLOG_LOC_SQL="select pg_last_xlog_replay_location(),pg_last_xlog_receive_location()" fi CHECK_REPLICATION_STATE_SQL="select application_name,upper(state),upper(sync_state) from pg_stat_replication" PGSQL_STATUS_ATTR="${RESOURCE_NAME}-status" PGSQL_DATA_STATUS_ATTR="${RESOURCE_NAME}-data-status" PGSQL_XLOG_LOC_NAME="${RESOURCE_NAME}-xlog-loc" PGSQL_MASTER_BASELINE="${RESOURCE_NAME}-master-baseline" NODE_LIST=`echo $OCF_RESKEY_node_list | tr '[A-Z]' '[a-z]'` RE_CONTROL_SLAVE="false" if ! ocf_is_ms; then ocf_exit_reason "Replication(rep_mode=async or sync) requires Master/Slave configuration." return $OCF_ERR_CONFIGURED fi if [ ! "$OCF_RESKEY_rep_mode" = "sync" -a ! "$OCF_RESKEY_rep_mode" = "async" ]; then ocf_exit_reason "Invalid rep_mode : $OCF_RESKEY_rep_mode" return $OCF_ERR_CONFIGURED fi if [ ! -n "$NODE_LIST" ]; then ocf_exit_reason "node_list can't be empty." return $OCF_ERR_CONFIGURED fi if [ $check_config_rc -eq 0 ]; then rep_mode_string="include '$REP_MODE_CONF' # added by pgsql RA" if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then if ! grep -q "^[[:space:]]*$rep_mode_string" $OCF_RESKEY_config; then ocf_log info "adding include directive into $OCF_RESKEY_config" echo "$rep_mode_string" >> $OCF_RESKEY_config fi else if grep -q "$rep_mode_string" $OCF_RESKEY_config; then ocf_log info "deleting include directive from $OCF_RESKEY_config" rep_mode_string=`echo $rep_mode_string | sed -e 's|/|\\\\/|g'` sed -i "/$rep_mode_string/d" $OCF_RESKEY_config fi fi fi if ! mkdir -p $OCF_RESKEY_tmpdir || ! chown $OCF_RESKEY_pgdba $OCF_RESKEY_tmpdir || ! chmod 700 $OCF_RESKEY_tmpdir; then ocf_exit_reason "Can't create directory $OCF_RESKEY_tmpdir or it is not readable by $OCF_RESKEY_pgdba" return $OCF_ERR_PERM fi fi if [ "$OCF_RESKEY_rep_mode" = "slave" ]; then if ocf_is_ms; then ocf_exit_reason "Replication(rep_mode=slave) does not support Master/Slave configuration." return $OCF_ERR_CONFIGURED fi fi if use_replication_slot; then ocf_version_cmp "$version" "9.4" rc=$? if [ $rc -eq 0 ]||[ $rc -eq 3 ]; then ocf_exit_reason "Replication slot needs PostgreSQL 9.4 or higher." return $OCF_ERR_CONFIGURED fi echo "$OCF_RESKEY_replication_slot_name" | grep -q -e '[^a-z0-9_]' if [ $? -eq 0 ]; then ocf_exit_reason "Invalid replication_slot_name($OCF_RESKEY_replication_slot_name). only use lower case letters, numbers, and the underscore character." return $OCF_ERR_CONFIGURED fi fi return $OCF_SUCCESS } # Validate most critical parameters pgsql_validate_all() { local rc getent passwd $OCF_RESKEY_pgdba >/dev/null 2>&1 if [ ! $? -eq 0 ]; then ocf_exit_reason "User $OCF_RESKEY_pgdba doesn't exist"; return $OCF_ERR_INSTALLED; fi if [ -n "$OCF_RESKEY_monitor_user" ] && [ -z "$OCF_RESKEY_monitor_password" ]; then ocf_exit_reason "monitor password can't be empty" return $OCF_ERR_CONFIGURED fi if [ -z "$OCF_RESKEY_monitor_user" ] && [ -n "$OCF_RESKEY_monitor_password" ]; then ocf_exit_reason "monitor_user has to be set if monitor_password is set" return $OCF_ERR_CONFIGURED fi if [ "$OCF_CHECK_LEVEL" -eq 10 ]; then validate_ocf_check_level_10 rc=$? [ $rc -ne "$OCF_SUCCESS" ] && exit $rc fi return $OCF_SUCCESS } # # Check if we need to create a log file # check_log_file() { if [ ! -e "$1" ] then touch $1 > /dev/null 2>&1 chown $OCF_RESKEY_pgdba:`getent passwd $OCF_RESKEY_pgdba | cut -d ":" -f 4` $1 fi #Check if $OCF_RESKEY_pgdba can write to the log file if ! runasowner "test -w $1" then return 1 fi return 0 } # # Check if we need to create stats temp directory in tmpfs # check_stat_temp_directory() { local stats_temp stats_temp=`get_pgsql_param stats_temp_directory` if [ -z "$stats_temp" ]; then return fi if [ "${stats_temp#/}" = "$stats_temp" ]; then stats_temp="$OCF_RESKEY_pgdata/$stats_temp" fi if [ -d "$stats_temp" ]; then return fi if ! mkdir -p "$stats_temp"; then ocf_exit_reason "Can't create directory $stats_temp" exit $OCF_ERR_PERM fi if ! chown $OCF_RESKEY_pgdba: "$stats_temp"; then ocf_exit_reason "Can't change ownership for $stats_temp" exit $OCF_ERR_PERM fi if ! chmod 700 "$stats_temp"; then ocf_exit_reason "Can't change permissions for $stats_temp" exit $OCF_ERR_PERM fi } # # Check socket directory # check_socket_dir() { if [ ! -d "$OCF_RESKEY_socketdir" ]; then if ! mkdir "$OCF_RESKEY_socketdir"; then ocf_exit_reason "Can't create directory $OCF_RESKEY_socketdir" exit $OCF_ERR_PERM fi if ! chown $OCF_RESKEY_pgdba:`getent passwd \ $OCF_RESKEY_pgdba | cut -d ":" -f 4` "$OCF_RESKEY_socketdir" then ocf_exit_reason "Can't change ownership for $OCF_RESKEY_socketdir" exit $OCF_ERR_PERM fi if ! chmod 2775 "$OCF_RESKEY_socketdir"; then ocf_exit_reason "Can't change permissions for $OCF_RESKEY_socketdir" exit $OCF_ERR_PERM fi else if ! runasowner "touch $OCF_RESKEY_socketdir/test.$$"; then ocf_exit_reason "$OCF_RESKEY_pgdba can't create files in $OCF_RESKEY_socketdir" exit $OCF_ERR_PERM fi rm $OCF_RESKEY_socketdir/test.$$ fi } print_crm_mon() { if [ -z "$CRM_MON_OUTPUT" ]; then ocf_version_cmp "$OCF_RESKEY_crm_feature_set" "3.1.0" res=$? if [ -z "$OCF_RESKEY_crm_feature_set" ] || [ $res -eq 2 ]; then XMLOPT="--output-as=xml" ocf_version_cmp "$OCF_RESKEY_crm_feature_set" "3.2.0" if [ $? -eq 1 ]; then crm_mon -1 $XMLOPT >/dev/null 2>&1 if [ $? -ne 0 ]; then XMLOPT="--as-xml" fi fi else XMLOPT="--as-xml" fi CRM_MON_OUTPUT=`exec_with_retry 0 crm_mon -1 $XMLOPT` fi printf "${CRM_MON_OUTPUT}\n" } # # 'main' starts here... # if [ $# -ne 1 ] then usage exit $OCF_ERR_GENERIC fi PIDFILE=${OCF_RESKEY_pgdata}/postmaster.pid BACKUPLABEL=${OCF_RESKEY_pgdata}/backup_label RESOURCE_NAME=`echo $OCF_RESOURCE_INSTANCE | cut -d ":" -f 1` PGSQL_WAL_RECEIVER_STATUS_ATTR="${RESOURCE_NAME}-receiver-status" RECOVERY_CONF=${OCF_RESKEY_pgdata}/recovery.conf NODENAME=$(ocf_local_nodename | tr '[A-Z]' '[a-z]') USE_STANDBY_SIGNAL=false case "$1" in methods) pgsql_methods exit $?;; meta-data) meta_data exit $OCF_SUCCESS;; esac [ "$__OCF_ACTION" != "validate-all" ] && OCF_CHECK_LEVEL=10 pgsql_validate_all rc=$? [ "$1" = "validate-all" ] && exit $rc if [ $rc -ne 0 ] then case "$1" in stop) if is_replication; then change_pgsql_status "$NODENAME" "UNKNOWN" fi exit $OCF_SUCCESS;; monitor) exit $OCF_NOT_RUNNING;; status) exit $OCF_NOT_RUNNING;; *) exit $rc;; esac fi US=`id -u -n` if [ $US != root -a $US != $OCF_RESKEY_pgdba ] then ocf_exit_reason "$0 must be run as root or $OCF_RESKEY_pgdba" exit $OCF_ERR_GENERIC fi # make psql command options if [ -n "$OCF_RESKEY_monitor_user" ]; then PGUSER=$OCF_RESKEY_monitor_user; export PGUSER PGPASSWORD=$OCF_RESKEY_monitor_password; export PGPASSWORD psql_options="-p $OCF_RESKEY_pgport $OCF_RESKEY_pgdb" else psql_options="-p $OCF_RESKEY_pgport -U $OCF_RESKEY_pgdba $OCF_RESKEY_pgdb" fi if [ -n "$OCF_RESKEY_pghost" ]; then psql_options="$psql_options -h $OCF_RESKEY_pghost" else if [ -n "$OCF_RESKEY_socketdir" ]; then psql_options="$psql_options -h $OCF_RESKEY_socketdir" fi fi if [ -n "$OCF_RESKEY_pgport" ]; then export PGPORT=$OCF_RESKEY_pgport fi if [ -n "$OCF_RESKEY_pglibs" ]; then if [ -n "$LD_LIBRARY_PATH" ]; then export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$OCF_RESKEY_pglibs else export LD_LIBRARY_PATH=$OCF_RESKEY_pglibs fi fi # What kind of method was invoked? case "$1" in status) if pgsql_status then ocf_log info "PostgreSQL is up" exit $OCF_SUCCESS else ocf_log info "PostgreSQL is down" exit $OCF_NOT_RUNNING fi;; monitor) pgsql_monitor exit $?;; start) pgsql_start exit $?;; promote) pgsql_promote exit $?;; demote) pgsql_demote exit $?;; notify) pgsql_notify exit $?;; stop) pgsql_stop exit $?;; *) exit $OCF_ERR_UNIMPLEMENTED;; esac