diff --git a/heartbeat/clvm b/heartbeat/clvm index 23e6f9faf..94a43927a 100755 --- a/heartbeat/clvm +++ b/heartbeat/clvm @@ -1,428 +1,428 @@ #!/bin/bash # -# Copyright (c) 2014 David Vossel +# Copyright (c) 2014 David Vossel # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs . ${OCF_FUNCTIONS_DIR}/ocf-directories ####################################################################### meta_data() { cat < 1.0 This agent manages the clvmd daemon. clvmd Start with cmirrord (cluster mirror log daemon). activate cmirrord Options to clvmd. Refer to clvmd.8 for detailed descriptions. Daemon Options Whether or not to activate all cluster volume groups after starting the clvmd or not. Note that clustered volume groups will always be deactivated before the clvmd stops regardless of what this option is set to. Activate volume groups END } ####################################################################### : ${OCF_RESKEY_daemon_options:="-d0"} : ${OCF_RESKEY_activate_vgs:="true"} sbindir=$HA_SBIN_DIR if [ -z $sbindir ]; then sbindir=/usr/sbin fi DAEMON="clvmd" CMIRROR="cmirrord" DAEMON_PATH="${sbindir}/clvmd" CMIRROR_PATH="${sbindir}/cmirrord" LVMCONF="${sbindir}/lvmconf" LOCK_FILE="/var/lock/subsys/$DAEMON" # attempt to detect where the vg tools are located # for some reason this isn't consistent with sbindir # in some distros. vgtoolsdir=$(dirname $(which vgchange 2> /dev/null) 2> /dev/null) if [ -z "$vgtoolsdir" ]; then vgtoolsdir="$sbindir" fi LVM_VGCHANGE=${vgtoolsdir}/vgchange LVM_VGDISPLAY=${vgtoolsdir}/vgdisplay LVM_VGSCAN=${vgtoolsdir}/vgscan # Leaving this in for legacy. We do not want to advertize # the abilty to set options in the systconfig exists, we want # to expand the OCF style options as necessary instead. [ -f /etc/sysconfig/cluster ] && . /etc/sysconfig/cluster [ -f /etc/sysconfig/$DAEMON ] && . /etc/sysconfig/$DAEMON CLVMD_TIMEOUT="90" if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then CLVMD_TIMEOUT=$(($OCF_RESKEY_CRM_meta_timeout/1000)) fi clvmd_usage() { cat </dev/null | grep -a "${binary}" > /dev/null 2>&1 if [ $? -eq 0 ];then # shortcut without requiring pgrep to search through all procs return $OCF_SUCCESS fi fi pid=$(pgrep ${binary}) case $? in 0) ocf_log info "PID file (pid:${pid} at $pidfile) created for ${binary}." echo "$pid" > $pidfile return $OCF_SUCCESS;; 1) rm -f "$pidfile" > /dev/null 2>&1 ocf_log info "$binary is not running" return $OCF_NOT_RUNNING;; *) rm -f "$pidfile" > /dev/null 2>&1 ocf_exit_reason "Error encountered detecting pid status of $binary" return $OCF_ERR_GENERIC;; esac } clvmd_status() { local rc local mirror_rc clvmd_validate if [ $? -ne $OCF_SUCCESS ]; then ocf_exit_reason "Unable to monitor, Environment validation failed." return $? fi check_process $DAEMON rc=$? mirror_rc=$rc if ocf_is_true $OCF_RESKEY_with_cmirrord; then check_process $CMIRROR mirror_rc=$? fi # If these ever don't match, return error to force recovery if [ $mirror_rc -ne $rc ]; then return $OCF_ERR_GENERIC fi return $rc } # NOTE: replace this with vgs, once display filter per attr is implemented. clustered_vgs() { ${LVM_VGDISPLAY} 2>/dev/null | awk 'BEGIN {RS="VG Name"} {if (/Clustered/) print $1;}' } wait_for_process() { local binary=$1 local timeout=$2 local count=0 ocf_log info "Waiting for $binary to exit" while [ $count -le $timeout ]; do check_process $binary if [ $? -eq $OCF_NOT_RUNNING ]; then ocf_log info "$binary terminated" return $OCF_SUCCESS fi sleep 1 count=$((count+1)) done return $OCF_ERR_GENERIC } time_left() { local end=$1 local default=$2 local now=$SECONDS local result=0 result=$(( $end - $now )) if [ $result -lt $default ]; then return $default fi return $result } clvmd_stop() { local LVM_VGS local rc=$OCF_SUCCESS local end=$(( $SECONDS + $CLVMD_TIMEOUT )) clvmd_status if [ $? -eq $OCF_NOT_RUNNING ]; then return $OCF_SUCCESS fi check_process $DAEMON if [ $? -ne $OCF_NOT_RUNNING ]; then LVM_VGS="$(clustered_vgs)" if [ -n "$LVM_VGS" ]; then ocf_log info "Deactivating clustered VG(s):" ocf_run ${LVM_VGCHANGE} -anl $LVM_VGS if [ $? -ne 0 ]; then ocf_exit_reason "Failed to deactivate volume groups, cluster vglist = $LVM_VGS" return $OCF_ERR_GENERIC fi fi ocf_log info "Signaling $DAEMON to exit" killall -TERM $DAEMON if [ $? != 0 ]; then ocf_exit_reason "Failed to signal -TERM to $DAEMON" return $OCF_ERR_GENERIC fi wait_for_process $DAEMON $CLVMD_TIMEOUT rc=$? if [ $rc -ne $OCF_SUCCESS ]; then ocf_exit_reason "$DAEMON failed to exit" return $rc fi rm -f $LOCK_FILE fi check_process $CMIRROR if [ $? -ne $OCF_NOT_RUNNING ] && ocf_is_true $OCF_RESKEY_with_cmirrord; then local timeout ocf_log info "Signaling $CMIRROR to exit" killall -INT $CMIRROR time_left $end 10; timeout=$? wait_for_process $CMIRROR $timeout rc=$? if [ $rc -ne $OCF_SUCCESS ]; then killall -KILL $CMIRROR time_left $end 10; timeout=$? wait_for_process $CMIRROR $(time_left $end 10) rc=$? fi fi return $rc } start_process() { local binary_path=$1 local opts=$2 check_process "$(basename $binary_path)" if [ $? -ne $OCF_SUCCESS ]; then ocf_log info "Starting $binary_path: " ocf_run $binary_path $opts rc=$? if [ $rc -ne 0 ]; then ocf_exit_reason "Failed to launch $binary_path, exit code $rc" exit $OCF_ERR_GENERIC fi fi return $OCF_SUCCESS } clvmd_activate_all() { if ! ocf_is_true "$OCF_RESKEY_activate_vgs"; then ocf_log info "skipping vg activation, activate_vgs is set to $OCF_RESKEY_activate_vgs" return $OCF_SUCCESS fi # Activate all volume groups by leaving the # "volume group name" parameter empty ocf_run ${LVM_VGCHANGE} -aay if [ $? -ne 0 ]; then ocf_log info "Failed to activate VG(s):" clvmd_stop return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } clvmd_start() { local rc=0 local CLVMDOPTS="-T${CLVMD_TIMEOUT} $OCF_RESKEY_daemon_options" clvmd_validate if [ $? -ne $OCF_SUCCESS ]; then ocf_exit_reason "Unable to start, Environment validation failed." return $? fi clvmd_status if [ $? -eq $OCF_SUCCESS ]; then ocf_log debug "$DAEMON already started" clvmd_activate_all return $?; fi # autoset locking type to clusted when lvmconf tool is available if [ -x "$LVMCONF" ]; then $LVMCONF --enable-cluster > /dev/null 2>&1 fi # if either of these fail, script will exit OCF_ERR_GENERIC if ocf_is_true $OCF_RESKEY_with_cmirrord; then start_process $CMIRROR_PATH fi start_process $DAEMON_PATH $CLVMDOPTS # Refresh local cache. # # It's possible that new PVs were added to this, or other VGs # while this node was down. So we run vgscan here to avoid # any potential "Missing UUID" messages with subsequent # LVM commands. # The following step would be better and more informative to the user: # 'action "Refreshing VG(s) local cache:" ${LVM_VGSCAN}' # but it could show warnings such as: # 'clvmd not running on node x-y-z Unable to obtain global lock.' # and the action would be shown as FAILED when in reality it didn't. # Ideally vgscan should have a startup mode that would not print # unnecessary warnings. ${LVM_VGSCAN} > /dev/null 2>&1 touch $LOCK_FILE clvmd_activate_all clvmd_status return $? } case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS;; start) clvmd_start;; stop) clvmd_stop;; monitor) clvmd_status;; validate-all) clvmd_validate;; usage|help) clvmd_usage;; *) clvmd_usage exit $OCF_ERR_UNIMPLEMENTED;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc diff --git a/heartbeat/docker b/heartbeat/docker index 7cf10b253..00ef0da16 100755 --- a/heartbeat/docker +++ b/heartbeat/docker @@ -1,436 +1,436 @@ #!/bin/sh # # The docker HA resource agent creates and launches a docker container # based off a supplied docker image. Containers managed by this agent # are both created and removed upon the agent's start and stop actions. # -# Copyright (c) 2014 David Vossel +# Copyright (c) 2014 David Vossel # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### meta_data() { cat < 1.0 The docker HA resource agent creates and launches a docker container based off a supplied docker image. Containers managed by this agent are both created and removed upon the agent's start and stop actions. Docker container resource agent. The docker image to base this container off of. docker image The name to give the created container. By default this will be that resource's instance name. docker container name Allow the image to be pulled from the configured docker registry when the image does not exist locally. NOTE, this can drastically increase the time required to start the container if the image repository is pulled over the network. Allow pulling non-local images Add options to be appended to the 'docker run' command which is used when creating the container during the start action. This option allows users to do things such as setting a custom entry point and injecting environment variables into the newly created container. Note the '-d' option is supplied regardless of this value to force containers to run in the background. NOTE: Do not explicitly specify the --name argument in the run_opts. This agent will set --name using either the resource's instance or the name provided in the 'name' argument of this agent. run options Specifiy a command to launch within the container once it has initialized. run command Specifiy the full path of a command to launch within the container to check the health of the container. This command must return 0 to indicate that the container is healthy. A non-zero return code will indicate that the container has failed and should be recovered. The command is executed using nsenter. In the future 'docker exec' will be used once it is more widely supported. monitor command Kill a container immediately rather than waiting for it to gracefully shutdown force kill Allow the container to be reused after stopping the container. By default containers are removed after stop. With the reuse option containers will persist after the container stops. reuse container END } ####################################################################### REQUIRE_IMAGE_PULL=0 docker_usage() { cat <&1) rc=$? if [ $rc -ne 0 ]; then ocf_log info "monitor cmd exit code = $rc" ocf_log info "stdout/stderr: $out" if [ $rc -eq 127 ]; then ocf_exit_reason "monitor_cmd, ${OCF_RESKEY_monitor_cmd} , not found within container." # there is no recovering from this, exit immediately exit $OCF_ERR_ARGS fi rc=$OCF_ERR_GENERIC else ocf_log info "monitor cmd passed: exit code = $rc" fi return $rc } container_exists() { docker inspect --format {{.State.Running}} $CONTAINER | egrep '(true|false)' >/dev/null 2>&1 } remove_container() { if ocf_is_true "$OCF_RESKEY_reuse"; then # never remove the container if we have reuse enabled. return 0 fi container_exists if [ $? -ne 0 ]; then # don't attempt to remove a container that doesn't exist return 0 fi ocf_log notice "Cleaning up inactive container, ${CONTAINER}." ocf_run docker rm $CONTAINER } docker_simple_status() { local val container_exists if [ $? -ne 0 ]; then return $OCF_NOT_RUNNING fi # retrieve the 'Running' attribute for the container val=$(docker inspect --format {{.State.Running}} $CONTAINER 2>/dev/null) if [ $? -ne 0 ]; then #not running as a result of container not being found return $OCF_NOT_RUNNING fi if ocf_is_true "$val"; then # container exists and is running return $OCF_SUCCESS fi return $OCF_NOT_RUNNING } docker_monitor() { local rc=0 docker_simple_status rc=$? if [ $rc -ne 0 ]; then return $rc fi monitor_cmd_exec } docker_start() { local run_opts="-d --name=${CONTAINER}" # check to see if the container has already started docker_simple_status if [ $? -eq $OCF_SUCCESS ]; then return $OCF_SUCCESS fi if [ -n "$OCF_RESKEY_run_opts" ]; then run_opts="$run_opts $OCF_RESKEY_run_opts" fi if [ $REQUIRE_IMAGE_PULL -eq 1 ]; then ocf_log notice "Beginning pull of image, ${OCF_RESKEY_image}" docker pull "${OCF_RESKEY_image}" if [ $? -ne 0 ]; then ocf_exit_reason "failed to pull image ${OCF_RESKEY_image}" return $OCF_ERR_GENERIC fi fi if ocf_is_true "$OCF_RESKEY_reuse" && container_exists; then ocf_log info "starting existing container $CONTAINER." ocf_run docker start $CONTAINER else # make sure any previous container matching our container name is cleaned up first. # we already know at this point it wouldn't be running remove_container ocf_log info "running container $CONTAINER for the first time" ocf_run docker run $run_opts $OCF_RESKEY_image $OCF_RESKEY_run_cmd fi if [ $? -ne 0 ]; then ocf_exit_reason "docker failed to launch container" return $OCF_ERR_GENERIC fi # wait for monitor to pass before declaring that the container is started while true; do docker_simple_status if [ $? -ne $OCF_SUCCESS ]; then ocf_exit_reason "Newly created docker container exited after start" return $OCF_ERR_GENERIC fi monitor_cmd_exec if [ $? -eq $OCF_SUCCESS ]; then ocf_log notice "Container $CONTAINER started successfully" return $OCF_SUCCESS fi ocf_exit_reason "waiting on monitor_cmd to pass after start" sleep 1 done } docker_stop() { local timeout=60 docker_simple_status if [ $? -eq $OCF_NOT_RUNNING ]; then remove_container return $OCF_SUCCESS fi if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000) -10 )) if [ $timeout -lt 10 ]; then timeout=10 fi fi if ocf_is_true "$OCF_RESKEY_force_kill"; then ocf_run docker kill $CONTAINER else ocf_log debug "waiting $timeout second[s] before killing container" ocf_run docker stop -t=$timeout $CONTAINER fi if [ $? -ne 0 ]; then ocf_exit_reason "Failed to stop container, ${CONTAINER}, based on image, ${OCF_RESKEY_image}." return $OCF_ERR_GENERIC fi remove_container if [ $? -ne 0 ]; then ocf_exit_reason "Failed to remove stopped container, ${CONTAINER}, based on image, ${OCF_RESKEY_image}." return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } image_exists() { # assume that OCF_RESKEY_name have been validated local IMAGE_NAME="$(echo ${OCF_RESKEY_name} | awk -F':' '{print $1}')" # if no tag was specified, use default "latest" local COLON_FOUND=0 local IMAGE_TAG="latest" COLON_FOUND="$(echo "${OCF_RESKEY_name}" | grep -o ':' | grep -c .)" if [ ${COLON_FOUND} -ne 0 ]; then IMAGE_TAG="$(echo ${OCF_RESKEY_name} | awk -F':' '{print $NF}')" fi # IMAGE_NAME might be following formats: # - image # - repository/image # - docker.io/image (some distro will display "docker.io/" as prefix) docker images | awk '{print $1 ":" $2}' | egrep -q -s "^(docker.io\/)?${IMAGE_NAME}:${IMAGE_TAG}\$" if [ $? -eq 0 ]; then # image found return 0 fi if ocf_is_true "$OCF_RESKEY_allow_pull"; then REQUIRE_IMAGE_PULL=1 ocf_log notice "Image (${OCF_RESKEY_image}) does not exist locally but will be pulled during start" return 0 fi # image not found. return 1 } docker_validate() { check_binary docker if [ -z "$OCF_RESKEY_image" ]; then ocf_exit_reason "'image' option is required" exit $OCF_ERR_CONFIGURED fi if [ -n "$OCF_RESKEY_monitor_cmd" ]; then ocf_log info "checking for nsenter, which is required when 'monitor_cmd' is specified" check_binary nsenter fi image_exists if [ $? -ne 0 ]; then ocf_exit_reason "base image, ${OCF_RESKEY_image}, could not be found." exit $OCF_ERR_CONFIGURED fi return $OCF_SUCCESS } : ${OCF_RESKEY_name=${OCF_RESOURCE_INSTANCE}} if [ -n "$OCF_RESKEY_container" ]; then # we'll keep the container attribute around for a bit in order not to break # any existing deployments. The 'name' attribute is prefered now though. CONTAINER=$OCF_RESKEY_container ocf_log warn "The 'container' attribute is depreciated" else CONTAINER=$OCF_RESKEY_name fi case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS;; start) docker_validate docker_start;; stop) docker_stop;; monitor) docker_monitor;; validate-all) docker_validate;; usage|help) docker_usage exit $OCF_SUCCESS ;; *) docker_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc diff --git a/heartbeat/galera b/heartbeat/galera index d74a70daa..920507bc0 100755 --- a/heartbeat/galera +++ b/heartbeat/galera @@ -1,721 +1,721 @@ #!/bin/sh # -# Copyright (c) 2014 David Vossel +# Copyright (c) 2014 David Vossel # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ## # README. # # This agent only supports being configured as a multistate Master # resource. # # Slave vs Master role: # # During the 'Slave' role, galera instances are in read-only mode and # will not attempt to connect to the cluster. This role exists only as # a means to determine which galera instance is the most up-to-date. The # most up-to-date node will be used to bootstrap a galera cluster that # has no current members. # # The galera instances will only begin to be promoted to the Master role # once all the nodes in the 'wsrep_cluster_address' connection address # have entered read-only mode. At that point the node containing the # database that is most current will be promoted to Master. Once the first # Master instance bootstraps the galera cluster, the other nodes will be # promoted to Master as well. # # Example: Create a galera cluster using nodes rhel7-node1 rhel7-node2 rhel7-node3 # # pcs resource create db galera enable_creation=true \ # wsrep_cluster_address="gcomm://rhel7-auto1,rhel7-auto2,rhel7-auto3" meta master-max=3 --master # # By setting the 'enable_creation' option, the database will be automatically # generated at startup. The meta attribute 'master-max=3' means that all 3 # nodes listed in the wsrep_cluster_address list will be allowed to connect # to the galera cluster and perform replication. # # NOTE: If you have more nodes in the pacemaker cluster then you wish # to have in the galera cluster, make sure to use location contraints to prevent # pacemaker from attempting to place a galera instance on a node that is # not in the 'wsrep_cluster_address" list. # ## ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs . ${OCF_FUNCTIONS_DIR}/mysql-common.sh # It is common for some galera instances to store # check user that can be used to query status # in this file if [ -f "/etc/sysconfig/clustercheck" ]; then . /etc/sysconfig/clustercheck fi ####################################################################### usage() { cat < 1.0 Resource script for managing galara database. Manages a galara instance Location of the MySQL server binary MySQL server binary Location of the MySQL client binary MySQL client binary Configuration file MySQL config Directory containing databases MySQL datadir User running MySQL daemon MySQL user Group running MySQL daemon (for logfile and directory permissions) MySQL group The logfile to be used for mysqld. MySQL log file The pidfile to be used for mysqld. MySQL pid file The socket to be used for mysqld. MySQL socket If the MySQL database does not exist, it will be created Create the database if it does not exist Additional parameters which are passed to the mysqld on startup. (e.g. --skip-external-locking or --skip-grant-tables) Additional parameters to pass to mysqld The galera cluster address. This takes the form of: gcomm://node,node,node Only nodes present in this node list will be allowed to start a galera instance. It is expected that the galera node names listed in this address match valid pacemaker node names. Galera cluster address Cluster check user. MySQL test user Cluster check user password check password END } get_option_variable() { local key=$1 $MYSQL $MYSQL_OPTIONS_CHECK -e "SHOW VARIABLES like '$key';" | tail -1 } get_status_variable() { local key=$1 $MYSQL $MYSQL_OPTIONS_CHECK -e "show status like '$key';" | tail -1 } set_bootstrap_node() { local node=$1 ${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-bootstrap" -v "true" } clear_bootstrap_node() { ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-bootstrap" -D } is_bootstrap() { ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-bootstrap" -Q 2>/dev/null } clear_last_commit() { ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" -D } set_last_commit() { ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" -v $1 } get_last_commit() { local node=$1 if [ -z "$node" ]; then ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" -Q 2>/dev/null else ${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" -Q 2>/dev/null fi } wait_for_sync() { local state=$(get_status_variable "wsrep_local_state") ocf_log info "Waiting for database to sync with the cluster. " while [ "$state" != "4" ]; do sleep 1 state=$(get_status_variable "wsrep_local_state") done ocf_log info "Database synced." } is_primary() { cluster_status=$(get_status_variable "wsrep_cluster_status") if [ "$cluster_status" = "Primary" ]; then return 0 fi if [ -z "$cluster_status" ]; then ocf_exit_reason "Unable to retrieve wsrep_cluster_status, verify check_user '$OCF_RESKEY_check_user' has permissions to view status" else ocf_log info "Galera instance wsrep_cluster_status=${cluster_status}" fi return 1 } is_readonly() { local res=$(get_option_variable "read_only") if ! ocf_is_true "$res"; then return 1 fi cluster_status=$(get_status_variable "wsrep_cluster_status") if ! [ "$cluster_status" = "Disconnected" ]; then return 1 fi return 0 } master_exists() { if [ "$__OCF_ACTION" = "demote" ]; then # We don't want to detect master instances during demote. # 1. we could be detecting ourselves as being master, which is no longer the case. # 2. we could be detecting other master instances that are in the process of shutting down. # by not detecting other master instances in "demote" we are deferring this check # to the next recurring monitor operation which will be much more accurate return 1 fi # determine if a master instance is already up and is healthy crm_mon --as-xml | grep "resource.*id=\"${OCF_RESOURCE_INSTANCE}\".*role=\"Master\".*active=\"true\".*orphaned=\"false\".*failed=\"false\"" > /dev/null 2>&1 return $? } clear_master_score() { local node=$1 if [ -z "$node" ]; then $CRM_MASTER -D else $CRM_MASTER -D -N $node fi } set_master_score() { local node=$1 if [ -z "$node" ]; then $CRM_MASTER -v 100 else $CRM_MASTER -N $node -v 100 fi } promote_everyone() { for node in $(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' '); do set_master_score $node done } greater_than_equal_long() { # there are values we need to compare in this script # that are too large for shell -gt to process echo | awk -v n1="$1" -v n2="$2" '{if (n1>=n2) printf ("true"); else printf ("false");}' | grep -q "true" } detect_first_master() { local best_commit=0 local best_node="$NODENAME" local last_commit=0 local missing_nodes=0 for node in $(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' '); do last_commit=$(get_last_commit $node) if [ -z "$last_commit" ]; then ocf_log info "Waiting on node <${node}> to report database status before Master instances can start." missing_nodes=1 continue fi # this means -1, or that no commit has occured yet. if [ "$last_commit" = "18446744073709551615" ]; then last_commit="0" fi greater_than_equal_long "$last_commit" "$best_commit" if [ $? -eq 0 ]; then best_node=$node best_commit=$last_commit fi done if [ $missing_nodes -eq 1 ]; then return fi ocf_log info "Promoting $best_node to be our bootstrap node" set_master_score $best_node set_bootstrap_node $best_node } # For galera, promote is really start galera_promote() { local rc local extra_opts local bootstrap master_exists if [ $? -eq 0 ]; then # join without bootstrapping extra_opts="--wsrep-cluster-address=${OCF_RESKEY_wsrep_cluster_address}" else bootstrap=$(is_bootstrap) if ocf_is_true $bootstrap; then ocf_log info "Node <${NODENAME}> is bootstrapping the cluster" extra_opts="--wsrep-cluster-address=gcomm://" else ocf_exit_reason "Failure, Attempted to promote Master instance of $OCF_RESOURCE_INSTANCE before bootstrap node has been detected." clear_last_commit return $OCF_ERR_GENERIC fi fi galera_monitor if [ $? -eq $OCF_RUNNING_MASTER ]; then if ocf_is_true $bootstrap; then promote_everyone clear_bootstrap_node ocf_log info "boostrap node already up, promoting the rest of the galera instances." fi clear_last_commit return $OCF_SUCCESS fi # last commit is no longer relevant once promoted clear_last_commit mysql_common_prepare_dirs mysql_common_start "$extra_opts" rc=$? if [ $rc != $OCF_SUCCESS ]; then return $rc fi galera_monitor rc=$? if [ $rc != $OCF_SUCCESS -a $rc != $OCF_RUNNING_MASTER ]; then ocf_exit_reason "Failed initial monitor action" return $rc fi is_readonly if [ $? -eq 0 ]; then ocf_exit_reason "Failure. Master instance started in read-only mode, check configuration." return $OCF_ERR_GENERIC fi is_primary if [ $? -ne 0 ]; then ocf_exit_reason "Failure. Master instance started, but is not in Primary mode." return $OCF_ERR_GENERIC fi if ocf_is_true $bootstrap; then promote_everyone clear_bootstrap_node ocf_log info "Bootstrap complete, promoting the rest of the galera instances." else # if this is not the bootstrap node, make sure this instance # syncs with the rest of the cluster before promotion returns. wait_for_sync fi ocf_log info "Galera started" return $OCF_SUCCESS } galera_demote() { mysql_common_stop rc=$? if [ $rc -ne $OCF_SUCCESS ] && [ $rc -ne $OCF_NOT_RUNNING ]; then ocf_exit_reason "Failed to stop Master galera instance during demotion to Master" return $rc fi # if this node was previously a bootstrap node, that is no longer the case. clear_bootstrap_node clear_last_commit # record last commit by "starting" galera. start is just detection of the last sequence number galera_start } galera_start() { local last_commit echo $OCF_RESKEY_wsrep_cluster_address | grep -q $NODENAME if [ $? -ne 0 ]; then ocf_exit_reason "local node <${NODENAME}> must be a member of the wsrep_cluster_address <${OCF_RESKEY_wsrep_cluster_address}>to start this galera instance" return $OCF_ERR_CONFIGURED fi galera_monitor if [ $? -eq $OCF_RUNNING_MASTER ]; then ocf_exit_reason "master galera instance started outside of the cluster's control" return $OCF_ERR_GENERIC fi mysql_common_prepare_dirs ocf_log info "attempting to detect last commit version by reading ${OCF_RESKEY_datadir}/grastate.dat" last_commit="$(cat ${OCF_RESKEY_datadir}/grastate.dat | sed -n 's/^seqno.\s*\(.*\)\s*$/\1/p')" if [ -z "$last_commit" ] || [ "$last_commit" = "-1" ]; then ocf_log info "now attempting to detect last commit version using 'mysqld_safe --wsrep-recover'" local tmp=$(mktemp) ${OCF_RESKEY_binary} --defaults-file=$OCF_RESKEY_config \ --pid-file=$OCF_RESKEY_pid \ --socket=$OCF_RESKEY_socket \ --datadir=$OCF_RESKEY_datadir \ --user=$OCF_RESKEY_user \ --wsrep-recover > $tmp 2>&1 last_commit="$(cat $tmp | sed -n 's/.*WSREP\:\s*[R|r]ecovered\s*position.*\:\(.*\)\s*$/\1/p')" rm -f $tmp if [ "$last_commit" = "-1" ]; then last_commit="0" fi fi if [ -z "$last_commit" ]; then ocf_exit_reason "Unable to detect last known write sequence number" clear_last_commit return $OCF_ERR_GENERIC fi ocf_log info "Last commit version found: $last_commit" set_last_commit $last_commit master_exists if [ $? -eq 0 ]; then ocf_log info "Master instances are already up, setting master score so this instance will join galera cluster." set_master_score $NODENAME else clear_master_score detect_first_master fi return $OCF_SUCCESS } galera_monitor() { local rc local status_loglevel="err" # Set loglevel to info during probe if ocf_is_probe; then status_loglevel="info" fi mysql_common_status $status_loglevel rc=$? if [ $rc -eq $OCF_NOT_RUNNING ]; then last_commit=$(get_last_commit $node) if [ -n "$last_commit" ]; then # if last commit is set, this instance is considered started in slave mode rc=$OCF_SUCCESS master_exists if [ $? -ne 0 ]; then detect_first_master else # a master instance exists and is healthy, promote this # local read only instance # so it can join the master galera cluster. set_master_score fi fi return $rc elif [ $rc -ne $OCF_SUCCESS ]; then return $rc fi # if we make it here, mysql is running. Check cluster status now. echo $OCF_RESKEY_wsrep_cluster_address | grep -q $NODENAME if [ $? -ne 0 ]; then ocf_exit_reason "local node <${NODENAME}> is started, but is not a member of the wsrep_cluster_address <${OCF_RESKEY_wsrep_cluster_address}>" return $OCF_ERR_GENERIC fi is_primary if [ $? -eq 0 ]; then if ocf_is_probe; then # restore master score during probe # if we detect this is a master instance set_master_score fi rc=$OCF_RUNNING_MASTER else ocf_exit_reason "local node <${NODENAME}> is started, but not in primary mode. Unknown state." rc=$OCF_ERR_GENERIC fi return $rc } galera_stop() { local rc # make sure the process is stopped mysql_common_stop rc=$1 clear_last_commit clear_master_score clear_bootstrap_node return $rc } galera_validate() { if ! ocf_is_ms; then ocf_exit_reason "Galera must be configured as a multistate Master/Slave resource." return $OCF_ERR_CONFIGURED fi if [ -z "$OCF_RESKEY_wsrep_cluster_address" ]; then ocf_exit_reason "Galera must be configured with a wsrep_cluster_address value." return $OCF_ERR_CONFIGURED fi mysql_common_validate } case "$1" in meta-data) meta_data exit $OCF_SUCCESS;; usage|help) usage exit $OCF_SUCCESS;; esac galera_validate rc=$? LSB_STATUS_STOPPED=3 if [ $rc -ne 0 ]; then case "$1" in stop) exit $OCF_SUCCESS;; monitor) exit $OCF_NOT_RUNNING;; status) exit $LSB_STATUS_STOPPED;; *) exit $rc;; esac fi if [ -z "${OCF_RESKEY_check_passwd}" ]; then # This value is automatically sourced from /etc/sysconfig/checkcluster if available OCF_RESKEY_check_passwd=${MYSQL_PASSWORD} fi if [ -z "${OCF_RESKEY_check_user}" ]; then # This value is automatically sourced from /etc/sysconfig/checkcluster if available OCF_RESKEY_check_user=${MYSQL_USERNAME} fi : ${OCF_RESKEY_check_user="root"} MYSQL_OPTIONS_CHECK="-nNE --user=${OCF_RESKEY_check_user}" if [ -n "${OCF_RESKEY_check_passwd}" ]; then MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK --password=${OCF_RESKEY_check_passwd}" fi # What kind of method was invoked? case "$1" in start) galera_start;; stop) galera_stop;; status) mysql_common_status err;; monitor) galera_monitor;; promote) galera_promote;; demote) galera_demote;; validate-all) exit $OCF_SUCCESS;; *) usage exit $OCF_ERR_UNIMPLEMENTED;; esac # vi:sw=4:ts=4:et: diff --git a/heartbeat/nfsnotify b/heartbeat/nfsnotify index 5f72d586a..b8dc1e408 100755 --- a/heartbeat/nfsnotify +++ b/heartbeat/nfsnotify @@ -1,315 +1,315 @@ #!/bin/bash # -# Copyright (c) 2014 David Vossel +# Copyright (c) 2014 David Vossel # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs . ${OCF_FUNCTIONS_DIR}/ocf-directories ####################################################################### sbindir=$HA_SBIN_DIR if [ -z "$sbindir" ]; then sbindir=/usr/sbin fi SELINUX_ENABLED=-1 NFSNOTIFY_TMP_DIR="${HA_RSCTMP}/nfsnotify_${OCF_RESOURCE_INSTANCE}/" HA_STATD_PIDFILE="$NFSNOTIFY_TMP_DIR/rpc.statd_${OCF_RESOURCE_INSTANCE}.pid" HA_STATD_PIDFILE_PREV="$NFSNOTIFY_TMP_DIR/rpc.statd_${OCF_RESOURCE_INSTANCE}.pid.prev" STATD_PATH="/var/lib/nfs/statd" SM_NOTIFY_BINARY="${sbindir}/sm-notify" IS_RENOTIFY=0 meta_data() { cat < 1.0 This agent sends NFSv3 reboot notifications to clients which informs clients to reclaim locks. sm-notify reboot notifications Comma separated list of floating IP addresses or host names that clients use to access the nfs service. This will be used to set the source address and mon_name of the SN_NOTIFY reboot notifications. source IP addresses Additional arguments to send to the sm-notify command. By default this agent will always set sm-notify's '-f' option. When the source_host option is set, the '-v' option will be used automatically to set the proper source address. Any additional sm-notify arguments set with this option will be used in addition to the previous default arguments. sm-notify arguments END } v3notify_usage() { cat < /dev/null 2>&1 if [ $? -eq 0 ]; then # it is useful to know if sm-notify processes were actually left around # or not during the stop/start operation. Whether this condition is true # or false does not indicate a failure. It does indicate that # there are probably some unresponsive nfs clients out there that are keeping # the sm-notify processes retrying. ocf_log info "previous sm-notify processes terminated before $__OCF_ACTION action." fi } v3notify_stop() { killall_smnotify rm -f $HA_STATD_PIDFILE_PREV > /dev/null 2>&1 mv $HA_STATD_PIDFILE $HA_STATD_PIDFILE_PREV > /dev/null 2>&1 return $OCF_SUCCESS } check_statd_pidfile() { local binary="rpc.statd" local pidfile="$HA_STATD_PIDFILE" ocf_log debug "Checking status for ${binary}." if [ -e "$pidfile" ]; then cat /proc/$(cat $pidfile)/cmdline 2>/dev/null | grep -a "${binary}" > /dev/null 2>&1 if [ $? -eq 0 ]; then return $OCF_SUCCESS fi ocf_exit_reason "$(cat $pidfile) for $binary is no longer running, sm-notify needs to re-notify clients" return $OCF_ERR_GENERIC fi # if we don't have a pid file for rpc.statd, we have not yet sent the notifications return $OCF_NOT_RUNNING } write_statd_pid() { local binary="rpc.statd" local pidfile="$HA_STATD_PIDFILE" local pid pid=$(pgrep ${binary}) case $? in 0) ocf_log info "PID file (pid:${pid} at $pidfile) created for ${binary}." mkdir -p $(dirname $pidfile) echo "$pid" > $pidfile return $OCF_SUCCESS;; 1) rm -f "$pidfile" > /dev/null 2>&1 ocf_log info "$binary is not running" return $OCF_NOT_RUNNING;; *) rm -f "$pidfile" > /dev/null 2>&1 ocf_exit_reason "Error encountered detecting pid status of $binary" return $OCF_ERR_GENERIC;; esac } copy_statd() { local src=$1 local dest=$2 if ! [ -d "$dest" ]; then mkdir -p "$dest" fi cp -rpfn $src/sm $src/sm.bak $src/state $dest > /dev/null 2>&1 # make sure folder ownership and selinux lables stay consistent [ -n "`id -u rpcuser`" -a "`id -g rpcuser`" ] && chown rpcuser.rpcuser "$dest" [ $SELINUX_ENABLED -eq 0 ] && chcon -R "$SELINUX_LABEL" "$dest" } v3notify_start() { local rc=$OCF_SUCCESS local cur_statd local statd_backup local is_renotify=0 # monitor, see if we need to notify or not v3notify_monitor if [ $? -eq 0 ]; then return $OCF_SUCCESS fi # kill off any other sm-notify processes that might already be running. killall_smnotify # record the pid of rpc.statd. if this pid ever changes, we have to re-notify write_statd_pid rc=$? if [ $rc -ne 0 ]; then return $rc fi # if the last time we ran nfs-notify, it was with the same statd process, # consider this a re-notification. During re-notifications we do not let the # sm-notify binary have access to the real statd directory. if [ "$(cat $HA_STATD_PIDFILE)" = "$(cat $HA_STATD_PIDFILE_PREV 2>/dev/null)" ]; then ocf_log info "Renotifying clients" is_renotify=1 fi statd_backup="$STATD_PATH/nfsnotify.bu" copy_statd "$STATD_PATH" "$statd_backup" if [ -z "$OCF_RESKEY_source_host" ]; then if [ "$is_renotify" -eq 0 ]; then cur_statd="$STATD_PATH" else cur_statd="$statd_backup" fi ocf_log info "sending notifications on default source address." $SM_NOTIFY_BINARY -f $OCF_RESKEY_notify_args -P $cur_statd if [ $? -ne 0 ]; then ocf_exit_reason "sm-notify execution failed, view syslog for more information" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS fi # do sm-notify for each ip for ip in `echo ${OCF_RESKEY_source_host} | sed 's/,/ /g'`; do # have the first sm-notify use the actual statd directory so the # notify list can be managed properly. if [ "$is_renotify" -eq 0 ]; then cur_statd="$STATD_PATH" # everything after the first notify we are considering a renotification # which means we don't use the real statd directory. is_renotify=1 else # use our copied statd directory for the remaining ip addresses cur_statd="$STATD_PATH/nfsnotify_${OCF_RESOURCE_INSTANCE}_${ip}" copy_statd "$statd_backup" "$cur_statd" fi ocf_log info "sending notifications with source address $ip" $SM_NOTIFY_BINARY -f $OCF_RESKEY_notify_args -v $ip -P "$cur_statd" if [ $? -ne 0 ]; then ocf_exit_reason "sm-notify with source host set to [ $ip ] failed. view syslog for more information" return $OCF_ERR_GENERIC fi done return $OCF_SUCCESS } v3notify_monitor() { # verify rpc.statd is up, and that the rpc.statd pid is the same one we # found during the start. otherwise rpc.statd recovered and we need to notify # again. check_statd_pidfile } case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS;; usage|help) v3notify_usage exit $OCF_SUCCESS;; *) ;; esac which restorecon > /dev/null 2>&1 && selinuxenabled SELINUX_ENABLED=$? if [ $SELINUX_ENABLED -eq 0 ]; then export SELINUX_LABEL="$(ls -ldZ $STATD_PATH | cut -f4 -d' ')" fi case $__OCF_ACTION in start) v3notify_start;; stop) v3notify_stop;; monitor) v3notify_monitor;; validate-all) v3notify_validate;; *) v3notify_usage exit $OCF_ERR_UNIMPLEMENTED;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster index 5b55f690b..cc45f09ad 100755 --- a/heartbeat/rabbitmq-cluster +++ b/heartbeat/rabbitmq-cluster @@ -1,370 +1,370 @@ #!/bin/sh # -# Copyright (c) 2014 David Vossel +# Copyright (c) 2014 David Vossel # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### RMQ_SERVER=/usr/sbin/rabbitmq-server RMQ_CTL=/usr/sbin/rabbitmqctl RMQ_DATA_DIR="/var/lib/rabbitmq/mnesia" RMQ_PID_DIR="/var/run/rabbitmq" RMQ_PID_FILE="/var/run/rabbitmq/rmq.pid" RMQ_LOG_DIR="/var/log/rabbitmq" NODENAME=$(ocf_local_nodename) RMQ_CRM_ATTR_COOKIE="rmq-node-attr-${OCF_RESOURCE_INSTANCE}" meta_data() { cat < 1.0 Starts cloned rabbitmq cluster instance rabbitmq clustered Policy string to pass to 'rabbitmqctl set_policy' right after bootstrapping the first rabbitmq instance. rabbitmqctl set_policy args END } ####################################################################### rmq_usage() { cat < /dev/null 2>&1 } rmq_local_node() { local node_name=$(rabbitmqctl status 2>&1 | sed -n -e "s/^.*[S|s]tatus of node \(.*\)\s.*$/\1/p" | tr -d "'") if [ -z "$node_name" ]; then node_name=$(cat /etc/rabbitmq/rabbitmq-env.conf 2>/dev/null | grep "\s*RABBITMQ_NODENAME=" | awk -F= '{print $2}') fi echo "$node_name" } rmq_join_list() { cibadmin -Q --xpath "//node_state[@crmd='online']//nvpair[@name='$RMQ_CRM_ATTR_COOKIE']" | grep "$RMQ_CRM_ATTR_COOKIE" | sed -n -e "s/^.*value=.\(.*\)\".*$/\1/p" } rmq_write_nodename() { local node_name=$(rmq_local_node) if [ -z "$node_name" ]; then ocf_log err "Failed to determine rabbitmq node name, exiting" exit $OCF_ERR_GENERIC fi # store the pcmknode to rmq node mapping as an attribute ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "$RMQ_CRM_ATTR_COOKIE" -v "$node_name" } rmq_delete_nodename() { # remove node-name ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "$RMQ_CRM_ATTR_COOKIE" -D } prepare_dir () { if [ ! -d ${1} ] ; then mkdir -p ${1} chown -R rabbitmq:rabbitmq ${1} chmod 755 ${1} fi } remove_pid () { rm -f ${RMQ_PID_FILE} > /dev/null 2>&1 } rmq_monitor() { local rc $RMQ_CTL cluster_status > /dev/null 2>&1 rc=$? case "$rc" in 0) ocf_log debug "RabbitMQ server is running normally" rmq_write_nodename return $OCF_SUCCESS ;; 2) ocf_log info "RabbitMQ server is not running" rmq_delete_nodename return $OCF_NOT_RUNNING ;; *) ocf_log err "Unexpected return code from '$RMQ_CTL cluster status' exit code: $rc" rmq_delete_nodename return $OCF_ERR_GENERIC ;; esac } rmq_init_and_wait() { local rc prepare_dir $RMQ_PID_DIR prepare_dir $RMQ_LOG_DIR remove_pid # the server startup script uses this environment variable export RABBITMQ_PID_FILE="$RMQ_PID_FILE" setsid sh -c "$RMQ_SERVER > ${RMQ_LOG_DIR}/startup_log 2> ${RMQ_LOG_DIR}/startup_err" & ocf_log info "Waiting for server to start" $RMQ_CTL wait $RMQ_PID_FILE rc=$? if [ $rc -ne $OCF_SUCCESS ]; then remove_pid ocf_log info "rabbitmq-server start failed: $rc" return $OCF_ERR_GENERIC fi rmq_monitor return $? } rmq_set_policy() { $RMQ_CTL set_policy $@ > /dev/null 2>&1 } rmq_start_first() { local rc ocf_log info "Bootstrapping rabbitmq cluster" rmq_wipe_data rmq_init_and_wait rc=$? if [ $rc -eq 0 ]; then rc=$OCF_SUCCESS ocf_log info "cluster bootstrapped" if [ -n "$OCF_RESKEY_set_policy" ]; then # do not quote set_policy, we are passing in arguments rmq_set_policy $OCF_RESKEY_set_policy > /dev/null 2>&1 if [ $? -ne 0 ]; then ocf_log err "Failed to set policy: $OCF_RESKEY_set_policy" rc=$OCF_ERR_GENERIC else ocf_log info "Policy set: $OCF_RESKEY_set_policy" fi fi else ocf_log info "failed to bootstrap cluster. Check SELINUX policy" rc=$OCF_ERR_GENERIC fi return $rc } rmq_join_existing() { local join_list="$1" local rc=$OCF_ERR_GENERIC ocf_log info "Joining existing cluster with [ $(echo $join_list | tr '\n' ' ') ] nodes." rmq_init_and_wait if [ $? -ne 0 ]; then return $OCF_ERR_GENERIC fi # unconditionally join the cluster $RMQ_CTL stop_app > /dev/null 2>&1 for node in $(echo "$join_list"); do ocf_log info "Attempting to join cluster with target node $node" $RMQ_CTL join_cluster $node if [ $? -eq 0 ]; then ocf_log info "Joined cluster by connecting to node $node, starting app" $RMQ_CTL start_app rc=$? if [ $rc -ne 0 ]; then ocf_log err "'$RMQ_CTL start_app' failed" fi break; fi done if [ "$rc" -ne 0 ]; then ocf_log info "Join process incomplete, shutting down." return $OCF_ERR_GENERIC fi ocf_log info "Successfully joined existing rabbitmq cluster" return $OCF_SUCCESS } rmq_start() { local join_list="" local rc rmq_monitor if [ $? -eq $OCF_SUCCESS ]; then return $OCF_SUCCESS fi join_list=$(rmq_join_list) # No join list means no active instances are up. This instance # is the first, so it needs to bootstrap the rest if [ -z "$join_list" ]; then rmq_start_first rc=$? return $rc fi # first try to join without wiping mnesia data rmq_join_existing "$join_list" if [ $? -ne 0 ]; then ocf_log info "node failed to join, wiping data directory and trying again" # if the graceful join fails, use the hammer and reset all the data. rmq_stop rmq_wipe_data rmq_join_existing "$join_list" if [ $? -ne 0 ]; then ocf_log info "node failed to join even after reseting local data. Check SELINUX policy" return $OCF_ERR_GENERIC fi fi return $OCF_SUCCESS } rmq_stop() { rmq_monitor if [ $? -eq $OCF_NOT_RUNNING ]; then return $OCF_SUCCESS fi $RMQ_CTL stop rc=$? if [ $rc -ne 0 ]; then ocf_log err "rabbitmq-server stop command failed: $RMQ_CTL stop, $rc" return $rc fi #TODO add kill logic stop_wait=1 while [ $stop_wait = 1 ]; do rmq_monitor rc=$? if [ "$rc" -eq $OCF_NOT_RUNNING ]; then stop_wait=0 break elif [ "$rc" -ne $OCF_SUCCESS ]; then ocf_log info "rabbitmq-server stop failed: $rc" exit $OCF_ERR_GENERIC fi sleep 1 done remove_pid return $OCF_SUCCESS } rmq_validate() { check_binary $RMQ_SERVER check_binary $RMQ_CTL # This resource only makes sense as a clone right now. at some point # we may want to verify the following. #TODO verify cloned #TODO verify ordered=true # Given that this resource does the cluster join explicitly, # having a cluster_nodes list in the static config file will # likely conflict with this agent. #TODO verify no cluster list in rabbitmq conf #cat /etc/rabbitmq/rabbitmq.config | grep "cluster_nodes" return $OCF_SUCCESS } case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; start) rmq_start;; stop) rmq_stop;; monitor) rmq_monitor;; validate-all) rmq_validate;; usage|help) rmq_usage exit $OCF_SUCCESS ;; *) rmq_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc diff --git a/rgmanager/src/resources/db2.sh b/rgmanager/src/resources/db2.sh index f396ff62f..57991f926 100755 --- a/rgmanager/src/resources/db2.sh +++ b/rgmanager/src/resources/db2.sh @@ -1,133 +1,133 @@ #!/bin/bash # # Copyright (c) 2011 Holger Teutsch -# Copyright (c) 2014 David Vossel +# Copyright (c) 2014 David Vossel # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # # NOTE: # # This agent is a wrapper around the heartbeat/db2 agent which limits the heartbeat # db2 agent to Standard role support. This allows cluster managers such as rgmanager # which do not have multi-state resource support to manage db2 instances with # a limited feature set. # export LC_ALL=C export LANG=C export PATH=/bin:/sbin:/usr/bin:/usr/sbin . $(dirname $0)/ocf-shellfuncs meta_data() { cat < 1.0 Resource Agent that manages an IBM DB2 LUW databases in Standard role. Multiple partitions are supported. When partitions are in use, each partition must be configured as a separate primitive resource. Resource Agent that manages an IBM DB2 LUW databases in Standard role with multiple partition support. The instance of the database(s). instance List of databases to be managed, e.g "db1 db2". Defaults to all databases in the instance. List of databases to be managed The number of the partion (DBPARTITIONNUM) to be managed. database partition number (DBPARTITIONNUM) END } heartbeat_db2_wrapper() { # default heartbeat agent ocf root. export OCF_ROOT=/usr/lib/ocf heartbeat_db2="${OCF_ROOT}/resource.d/heartbeat/db2" if ! [ -a $heartbeat_db2 ]; then echo "heartbeat db2 agent not found at '${heartbeat_db2}'" exit $OCF_ERR_INSTALLED fi $heartbeat_db2 $1 } case $1 in meta-data) meta_data exit 0 ;; validate-all) heartbeat_db2_wrapper $1 exit $? ;; start) heartbeat_db2_wrapper $1 exit $? ;; stop) heartbeat_db2_wrapper $1 exit $? ;; status|monitor) heartbeat_db2_wrapper "monitor" exit $? ;; restart) heartbeat_db2_wrapper "stop" rc=$? if [ $rc -ne 0 ]; then exit $rc fi heartbeat_db2_wrapper "start" exit $? ;; *) echo "Usage: db2.sh {start|stop|monitor|validate-all|meta-data}" exit $OCF_ERR_UNIMPLEMENTED ;; esac