diff --git a/heartbeat/garbd b/heartbeat/garbd index aa0cea45f..b88d448fb 100755 --- a/heartbeat/garbd +++ b/heartbeat/garbd @@ -1,417 +1,430 @@ #!/bin/sh # # Copyright (c) 2015 Damien Ciabrini # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ## # README. # # Resource agent for garbd, the Galera arbitrator # # You can use this agent if you run an even number of galera nodes, # and you want an additional node to avoid split-brain situations. # # garbd requires that a Galera cluster is running, so make sure to # add a proper ordering constraint to the cluster, e.g.: # # pcs constraint order galera-master then garbd # # If you add garbd to the cluster while Galera is not running, you # might want to disable it before setting up ordering constraint, e.g.: # # pcs resource create garbd garbd \ # wsrep_cluster_address=gcomm://node1:4567,node2:4567 \ # meta target-role=stopped # # Use location constraints to avoid running galera and garbd on # the same node, e.g.: # # pcs constraint colocation add garbd with galera-master -INFINITY # pcs constraint location garbd prefers node3=INFINITY # ## ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### # Set default paramenter values OCF_RESKEY_binary_default="/usr/sbin/garbd" OCF_RESKEY_log_default="/var/log/garbd.log" OCF_RESKEY_pid_default="/var/run/garbd.pid" OCF_RESKEY_user_default="mysql" if [ "X${HOSTOS}" = "XOpenBSD" ];then OCF_RESKEY_group_default="_mysql" else OCF_RESKEY_group_default="mysql" fi : ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} : ${OCF_RESKEY_log=${OCF_RESKEY_log_default}} : ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}} : ${OCF_RESKEY_user=${OCF_RESKEY_user_default}} : ${OCF_RESKEY_group=${OCF_RESKEY_group_default}} usage() { cat < 1.0 Resource script for managing Galera arbitrator. Manages a galera arbitrator instance Location of the Galera arbitrator binary garbd server binary User running the garbd process garbd user Group running garbd (for logfile permissions) garbd group The logfile to be used for garbd. Galera arbitrator log file The pidfile to be used for garbd. Galera arbitrator pidfile Additional parameters which are passed to garbd on startup. Additional parameters to pass to garbd The galera cluster address. This takes the form of: gcomm://node:port,node:port,node:port Unlike Galera servers, port is mandatory for garbd. Galera cluster address The group name of the Galera cluster to connect to. Galera cluster name END } garbd_start() { local rc local pid local start_wait local garbd_params garbd_status info rc=$? if [ $rc -eq $OCF_SUCCESS ]; then ocf_exit_reason "garbd started outside of the cluster's control" return $OCF_ERR_GENERIC; fi touch $OCF_RESKEY_log chown $OCF_RESKEY_user:$OCF_RESKEY_group $OCF_RESKEY_log chmod 0640 $OCF_RESKEY_log [ -x /sbin/restorecon ] && /sbin/restorecon $OCF_RESKEY_log garbd_params="--address=${OCF_RESKEY_wsrep_cluster_address} \ --group ${OCF_RESKEY_wsrep_cluster_name} \ --log ${OCF_RESKEY_log}" if [ ! -z "${OCF_RESKEY_options}" ]; then garbd_params="${garbd_params} --options=${OCF_RESKEY_options}" fi # garbd has no parameter to run as a specific user, # so we need to start it by our own means pid=$(su - -s /bin/sh $OCF_RESKEY_user -c "${OCF_RESKEY_binary} ${garbd_params} >/dev/null 2>&1 & echo \$!") # garbd doesn't create a pidfile either, so we create our own echo $pid > $OCF_RESKEY_pid if [ $? -ne 0 ]; then ocf_exit_reason "Cannot create pidfile for garbd at $OCF_RESKEY_pid (rc=$?), please check your installation" return $OCF_ERR_GENERIC fi # Spin waiting for garbd to connect to the cluster. # Let the CRM/LRM time us out if required. start_wait=1 while [ $start_wait -eq 1 ]; do garbd_monitor info rc=$? if [ $rc -eq $OCF_NOT_RUNNING ]; then ocf_exit_reason "garbd failed to start (pid=$pid), check logs in ${OCF_RESKEY_log}" return $OCF_ERR_GENERIC elif [ $rc -eq $OCF_SUCCESS ]; then start_wait=0 fi sleep 2 done ocf_log info "garbd connected to cluster \"${OCF_RESKEY_wsrep_cluster_name}\"" return $OCF_SUCCESS } garbd_status() { local loglevel=$1 local rc ocf_pidfile_status $OCF_RESKEY_pid rc=$? if [ $rc -eq 0 ]; then return $OCF_SUCCESS elif [ $rc -eq 2 ]; then return $OCF_NOT_RUNNING else # clean up if pidfile is stale if [ $rc -eq 1 ]; then ocf_log $loglevel "garbd not running: removing old PID file" rm -f $OCF_RESKEY_pid fi return $OCF_ERR_GENERIC fi } +_port_by_pid() +{ + local pid + pid="$1" + if have_binary "netstat"; then + netstat -tnp 2>/dev/null | grep -s -q "ESTABLISHED.*${pid}/" + else + ss -Htnp 2>/dev/null | grep -s -q "^ESTAB.*pid=${pid}" + fi +} + garbd_monitor() { local rc local pid local loglevel=$1 # Set loglevel to info during probe if ocf_is_probe; then loglevel="info" fi garbd_status $loglevel rc=$? # probe just wants to know if garbd is running or not if ocf_is_probe && [ $rc -ne $OCF_SUCCESS ]; then rc=$OCF_NOT_RUNNING fi # Consider garbd is working if it's connected to at least # one node in the galera cluster. # Note: a Galera node in Non-Primary state will be # stopped by the galera RA. So we can assume that # garbd will always be connected to the right partition if [ $rc -eq $OCF_SUCCESS ]; then pid=`cat $OCF_RESKEY_pid 2> /dev/null ` - netstat -tnp 2>/dev/null | grep -s -q "ESTABLISHED.*${pid}/" + _port_by_pid $pid if [ $? -ne 0 ]; then ocf_log $loglevel "garbd disconnected from cluster \"${OCF_RESKEY_wsrep_cluster_name}\"" rc=$OCF_ERR_GENERIC fi fi return $rc } garbd_stop() { local rc local pid if [ ! -f $OCF_RESKEY_pid ]; then ocf_log info "garbd is not running" return $OCF_SUCCESS fi pid=`cat $OCF_RESKEY_pid 2> /dev/null ` ocf_log info "stopping garbd" # make sure the process is stopped ocf_stop_processes TERM 10 $pid rc=$? if [ $rc -ne 0 ]; then return $OCF_ERR_GENERIC else rm -f $OCF_RESKEY_pid ocf_log info "garbd stopped" return $OCF_SUCCESS fi } garbd_validate() { if ! have_binary "$OCF_RESKEY_binary"; then ocf_exit_reason "Setup problem: couldn't find command: $OCF_RESKEY_binary" return $OCF_ERR_INSTALLED; fi if ! have_binary "netstat"; then - ocf_exit_reason "Setup problem: couldn't find command: netstat" - return $OCF_ERR_INSTALLED; + if ! have_binary "ss"; then + ocf_exit_reason "Setup problem: couldn't find command: netstat or ss" + return $OCF_ERR_INSTALLED; + fi fi if [ -z "$OCF_RESKEY_wsrep_cluster_address" ]; then ocf_exit_reason "garbd must be configured with a wsrep_cluster_address value." return $OCF_ERR_CONFIGURED fi # unlike galera RA, ports must be set in cluster address for garbd # https://github.com/codership/galera/issues/98 for node in $(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' '); do echo $node | grep -s -q ':[1-9][0-9]*$' if [ $? -ne 0 ]; then ocf_exit_reason "wsrep_cluster_address must specify ports (gcomm://node1:port,node2:port)." return $OCF_ERR_CONFIGURED fi done # Ensure that the encryption method is set if garbd is configured # to use SSL. echo $OCF_RESKEY_options | grep -s -q -i -E '\bsocket.ssl_(key|cert)=' if [ $? -eq 0 ]; then echo $OCF_RESKEY_options | grep -s -q -i -E '\bsocket.ssl_cipher=' if [ $? -ne 0 ]; then ocf_exit_reason "option socket.ssl_cipher must be set if SSL is enabled." return $OCF_ERR_CONFIGURED fi fi if [ -z "$OCF_RESKEY_wsrep_cluster_name" ]; then ocf_exit_reason "garbd must be configured with a wsrep_cluster_name value." return $OCF_ERR_CONFIGURED fi if ! getent passwd $OCF_RESKEY_user >/dev/null 2>&1; then ocf_exit_reason "User $OCF_RESKEY_user doesn't exist" return $OCF_ERR_INSTALLED fi if ! getent group $OCF_RESKEY_group >/dev/null 2>&1; then ocf_exit_reason "Group $OCF_RESKEY_group doesn't exist" return $OCF_ERR_INSTALLED fi return $OCF_SUCCESS } case "$1" in meta-data) meta_data exit $OCF_SUCCESS;; usage|help) usage exit $OCF_SUCCESS;; esac garbd_validate rc=$? # trap configuration errors early, but don't block stop in such cases LSB_STATUS_STOPPED=3 if [ $rc -ne 0 ]; then case "$1" in stop) exit $OCF_SUCCESS;; status) exit $LSB_STATUS_STOPPED;; *) exit $rc;; esac fi # What kind of method was invoked? case "$1" in start) garbd_start;; stop) garbd_stop;; status) garbd_status err;; monitor) garbd_monitor err;; promote) garbd_promote;; demote) garbd_demote;; validate-all) exit $OCF_SUCCESS;; *) usage exit $OCF_ERR_UNIMPLEMENTED;; esac