diff --git a/heartbeat/Xen b/heartbeat/Xen index 38ceeb9c5..002d36465 100755 --- a/heartbeat/Xen +++ b/heartbeat/Xen @@ -1,572 +1,572 @@ #!/bin/sh # # # Support: users@clusterlabs.org # License: GNU General Public License (GPL) # # Resource Agent for the Xen Hypervisor. # Manages Xen virtual machine instances by # mapping cluster resource start and stop, # to Xen create and shutdown, respectively. # # usage: $0 {start|stop|status|monitor|meta-data} # # OCF parameters are as below: # OCF_RESKEY_xmfile # Absolute path to the Xen control file, # for this virtual machine. # OCF_RESKEY_allow_mem_management # Change memory usage on start/stop/migration # of virtual machine # OCF_RESKEY_reserved_Dom0_memory # minimum memory reserved for domain 0 # OCF_RESKEY_monitor_scripts # scripts to monitor services within the # virtual domain ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### usage() { cat <<-END usage: $0 {start|stop|status|monitor|meta-data|validate-all} END } : ${OCF_RESKEY_xmfile=/etc/xen/vm/MyDomU} : ${OCF_RESKEY_shutdown_acpi=0} : ${OCF_RESKEY_allow_mem_management=0} : ${OCF_RESKEY_reserved_Dom0_memory=512} # prefer xl xentool=$(which xl 2> /dev/null || which xm) meta_data() { cat < 1.0 Resource Agent for the Xen Hypervisor. Manages Xen virtual machine instances by mapping cluster resource start and stop, to Xen create and shutdown, respectively. A note on names We will try to extract the name from the config file (the xmfile attribute). If you use a simple assignment statement, then you should be fine. Otherwise, if there's some python acrobacy involved such as dynamically assigning names depending on other variables, and we will try to detect this, then please set the name attribute. You should also do that if there is any chance of a pathological situation where a config file might be missing, for example if it resides on a shared storage. If all fails, we finally fall back to the instance id to preserve backward compatibility. Para-virtualized guests can also be migrated by enabling the meta_attribute allow-migrate. Manages Xen unprivileged domains (DomUs) Absolute path to the Xen control file, for this virtual machine. Xen control file Name of the virtual machine. Xen DomU name The Xen agent will first try an orderly shutdown using xl shutdown. Should this not succeed within this timeout, the agent will escalate to xl destroy, forcibly killing the node. If this is not set, it will default to two-third of the stop action timeout. Setting this value to 0 forces an immediate destroy. Shutdown escalation timeout Handle shutdown by simulating an ACPI power button event. Enable this to allow graceful shutdown for HVM domains without installed PV drivers. Simulate power button event on shutdown This parameter enables dynamic adjustment of memory for start and stop actions used for Dom0 and the DomUs. The default is to not adjust memory dynamically. Use dynamic memory management In case of a live migration, the system will default to using the IP address associated with the hostname via DNS or /etc/hosts. This parameter allows you to specify a node attribute that will be queried instead for the target node, overriding the IP address. This allows you to use a dedicated network for live migration traffic to a specific node. Warning: make very sure the IP address does point to the right node. Or else the live migration will end up somewhere else, greatly confusing the cluster and causing havoc. Node attribute containing target IP address In case memory management is used, this parameter defines the minimum amount of memory to be reserved for the dom0. The default minimum memory is 512MB. Minimum Dom0 memory To additionally monitor services within the unprivileged domain, add this parameter with a list of scripts to monitor. list of space separated monitor scripts END } Xen_Status() { if expr "x$xentool" : "x.*xl" >/dev/null; then $xentool list $1 >/dev/null 2>&1 if [ $? -ne 0 ]; then return $OCF_NOT_RUNNING else return $OCF_SUCCESS fi fi if have_binary xen-list; then xen-list $1 2>/dev/null | grep -qs "State.*[-r][-b][-p]--" 2>/dev/null if [ $? -ne 0 ]; then return $OCF_NOT_RUNNING else return $OCF_SUCCESS fi fi STATUS=`$xentool list --long $1 2>/dev/null | grep status 2>/dev/null` if [ "X${STATUS}" != "X" ]; then # we have Xen 3.0.4 or higher STATUS_NOSPACES=`echo "$STATUS" | awk '{ print $1,$2}'` if [ "$STATUS_NOSPACES" = "(status 2)" -o "$STATUS_NOSPACES" = "(status 1)" ]; then return $OCF_SUCCESS else return $OCF_NOT_RUNNING fi else # we have Xen 3.0.3 or lower STATUS=`$xentool list --long $1 2>/dev/null | grep state 2>/dev/null` echo "${STATUS}" | grep -qs "[-r][-b][-p]---" if [ $? -ne 0 ]; then return $OCF_NOT_RUNNING else return $OCF_SUCCESS fi fi } # If the guest is rebooting, it may completely disappear from the # list of defined guests, thus xl/xen-list would return with not # running; apparently, this period lasts only for a second or # two # If a status returns not running, then test status # again for 5 times (perhaps it'll show up) Xen_Status_with_Retry() { local rc cnt=5 Xen_Status $1 rc=$? while [ $rc -eq $OCF_NOT_RUNNING -a $cnt -gt 0 ]; do case "$__OCF_ACTION" in stop) ocf_log debug "domain $1 reported as not running, waiting $cnt seconds ..." ;; monitor) ocf_log warn "domain $1 reported as not running, but it is expected to be running! Retrying for $cnt seconds ..." ;; *) : not reachable ;; esac sleep 1 Xen_Status $1 rc=$? cnt=$((cnt-1)) done return $rc } Xen_Adjust_Memory() { if ocf_is_true "${OCF_RESKEY_allow_mem_management}"; then CNTNEW=$1 RUNNING=`Xen_List_running` RUNCNT=`Xen_Count_running` MAXMEM=`Xen_Total_Memory` if [ ${RUNCNT} -eq 0 -a ${CNTNEW} -eq 0 ]; then RUNCNT=1 fi #NEWMEM=`echo "(${MAXMEM}-${OCF_RESKEY_reserved_Dom0_memory})/(${RUNCNT}+${CNTNEW})"|bc` NEWMEM=$(( (${MAXMEM} - ${OCF_RESKEY_reserved_Dom0_memory}) / (${RUNCNT} + ${CNTNEW} ) )) # do not rely on ballooning add dom0_mem=512 instead to force memory for dom0 #$xentool mem-set Domain-0 ${OCF_RESKEY_reserved_Dom0_memory} for DOM in ${RUNNING}; do $xentool mem-set ${DOM} ${NEWMEM} done ocf_log info "Adjusted memory to: $NEWMEM, for the following $RUNCNT domains: $RUNNING" fi } Xen_List_all() { $xentool list | grep -v -e "Name" -e "Domain-0" | awk '{print $1}' } Xen_List_running() { ALL_DOMS=`Xen_List_all` for DOM in ${ALL_DOMS}; do if Xen_Status $DOM; then echo "${DOM} " fi done } Xen_Count_running() { Xen_List_running | wc -w } Xen_Monitor() { if ocf_is_probe; then Xen_Status ${DOMAIN_NAME} else Xen_Status_with_Retry ${DOMAIN_NAME} fi if [ $? -eq ${OCF_NOT_RUNNING} ]; then ocf_is_probe || ocf_log err "Xen domain $DOMAIN_NAME stopped" return ${OCF_NOT_RUNNING} fi if [ "X${OCF_RESKEY_monitor_scripts}" = "X" ]; then return ${OCF_SUCCESS} fi for SCRIPT in ${OCF_RESKEY_monitor_scripts}; do $SCRIPT if [ $? -ne 0 ]; then return ${OCF_ERR_GENERIC} fi done return ${OCF_SUCCESS} } Xen_Total_Memory() { $xentool info | grep "^total_memory" | awk '{print $3}' } Xen_Start() { if Xen_Status ${DOMAIN_NAME}; then ocf_log info "Xen domain $DOMAIN_NAME already running." return $OCF_SUCCESS fi if [ ! -f "${OCF_RESKEY_xmfile}" ]; then ocf_log err "Config file ${OCF_RESKEY_xmfile} for $DOMAIN_NAME does not exist." return $OCF_ERR_INSTALLED fi if ocf_is_true "${OCF_RESKEY_allow_mem_management}"; then Xen_Adjust_Memory 1 ocf_log info "New memory for virtual domains: ${NEWMEM}" sed -i -e "/^memory=/ s/^memory=.*/memory=${NEWMEM}/" ${OCF_RESKEY_xmfile} $xentool mem-set ${DOMAIN_NAME} ${NEWMEM} fi # the latest xl management tool is squeamish about some # characters in a name (the vm name is xen-f): # /etc/xen/vm/xen-f:15: config parsing error near `xen': # syntax error, unexpected IDENT, expecting STRING or NUMBER # or '[' # /etc/xen/vm/xen-f:15: config parsing error near `-f': lexical error # # the older xm management tool cannot digest quotes (see # https://developerbugs.linuxfoundation.org/show_bug.cgi?id=2671) # # hence the following if expr "x$xentool" : "x.*xl" >/dev/null; then $xentool create ${OCF_RESKEY_xmfile} name=\"$DOMAIN_NAME\" else $xentool create ${OCF_RESKEY_xmfile} name="$DOMAIN_NAME" fi rc=$? if [ $rc -ne 0 ]; then return $OCF_ERR_GENERIC else if ocf_is_true "${OCF_RESKEY_allow_mem_management}"; then $xentool mem-set ${DOMAIN_NAME} ${NEWMEM} fi fi while sleep 1; do Xen_Monitor && return $OCF_SUCCESS done } xen_domain_stop() { local dom=$1 local timeout if [ -n "$OCF_RESKEY_shutdown_timeout" ]; then timeout=$OCF_RESKEY_shutdown_timeout elif [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then # Allow 2/3 of the action timeout for the orderly shutdown # (The origin unit is ms, hence the conversion) timeout=$((OCF_RESKEY_CRM_meta_timeout/1500)) else timeout=60 fi if [ "$timeout" -gt 0 ]; then ocf_log info "Xen domain $dom will be stopped (timeout: ${timeout}s)" if ocf_is_true "${OCF_RESKEY_shutdown_acpi}"; then $xentool trigger $dom power else $xentool shutdown $dom fi while Xen_Status $dom && [ "$timeout" -gt 0 ]; do ocf_log debug "$dom still not stopped. Waiting..." timeout=$((timeout-1)) sleep 1 done fi if [ "$timeout" -eq 0 ]; then while Xen_Status $dom; do ocf_log warn "Xen domain $dom will be destroyed!" $xenkill $dom sleep 1 done # Note: This does not give up. stop isn't allowed to to fail. # If $xentool destroy fails, stop will eventually timeout. # This is the correct behaviour. fi ocf_log info "Xen domain $dom stopped." } Xen_Stop() { local vm if Xen_Status_with_Retry ${DOMAIN_NAME}; then vm=${DOMAIN_NAME} elif Xen_Status migrating-${DOMAIN_NAME}; then ocf_log info "Xen domain $DOMAIN_NAME is migrating" vm="migrating-${DOMAIN_NAME}" else ocf_log info "Xen domain $DOMAIN_NAME already stopped." fi if [ "$vm" ]; then xen_domain_stop $vm else # It is supposed to be gone, but there have been situations where # $xentool list / xen-list showed it as stopped but it was still # instantiated. Nuke it once more to make sure: $xenkill ${DOMAIN_NAME} fi Xen_Adjust_Memory 0 return $OCF_SUCCESS } Xen_Migrate_To() { target_node="$OCF_RESKEY_CRM_meta_migrate_target" target_attr="$OCF_RESKEY_node_ip_attribute" target_addr="$target_node" if Xen_Status ${DOMAIN_NAME}; then ocf_log info "$DOMAIN_NAME: Starting $xentool migrate to $target_node" if [ -n "$target_attr" ]; then - nodevalue=`crm_attribute --type nodes --node $target_node --attr-name $target_attr --get-value -q` + nodevalue=`crm_attribute --type nodes --node $target_node --attr-name $target_attr -G -q` if [ -n "${nodevalue}" -a "${nodevalue}" != "(null)" ]; then target_addr="$nodevalue" ocf_log info "$DOMAIN_NAME: $target_node is using address $target_addr" fi fi if expr "x$xentool" : "x.*xm" >/dev/null; then $xentool migrate --live $DOMAIN_NAME $target_addr else $xentool migrate $DOMAIN_NAME $target_addr fi rc=$? if [ $rc -ne 0 ]; then ocf_log err "$DOMAIN_NAME: $xentool migrate to $target_node failed: $rc" return $OCF_ERR_GENERIC else Xen_Adjust_Memory 0 ocf_log info "$DOMAIN_NAME: $xentool migrate to $target_node succeeded." return $OCF_SUCCESS fi else ocf_log err "$DOMAIN_NAME: migrate_to: Not active locally!" return $OCF_ERR_GENERIC fi } Xen_Migrate_From() { if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then # Allow 2/3 of the action timeout for status to stabilize # (The origin unit is ms, hence the conversion) timeout=$((OCF_RESKEY_CRM_meta_timeout/1500)) else timeout=10 # should be plenty fi while ! Xen_Status ${DOMAIN_NAME} && [ $timeout -gt 0 ]; do ocf_log debug "$DOMAIN_NAME: Not yet active locally, waiting (timeout: ${timeout}s)" timeout=$((timeout-1)) sleep 1 done if Xen_Status ${DOMAIN_NAME}; then Xen_Adjust_Memory 0 ocf_log info "$DOMAIN_NAME: Active locally, migration successful" return $OCF_SUCCESS else ocf_log err "$DOMAIN_NAME: Not active locally, migration failed!" return $OCF_ERR_GENERIC fi } Xen_Validate_All() { return $OCF_SUCCESS } if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi case $1 in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; esac # the name business: # # 1. use the name attribute, or # 2. find the name in the config file (if it exists) and use that # unless it contains funny characters such as '%' or space, or # 3. use the OCF_RESOURCE_INSTANCE if [ x"${OCF_RESKEY_name}" != x ]; then DOMAIN_NAME="${OCF_RESKEY_name}" else if [ -f "${OCF_RESKEY_xmfile}" ]; then DOMAIN_NAME=`awk '$1~/^name(=|$)/{print}' ${OCF_RESKEY_xmfile} | sed 's/.*=[[:space:]]*//' | tr -d "[\"']"` if echo "$DOMAIN_NAME" | grep -qs '[%[:space:]]'; then DOMAIN_NAME="" fi fi DOMAIN_NAME=${DOMAIN_NAME:-${OCF_RESOURCE_INSTANCE}} fi for binary in sed awk; do check_binary $binary done if have_binary xen-destroy ; then xenkill="xen-destroy" else xenkill="$xentool destroy" fi if [ -n "$OCF_RESKEY_shutdown_timeout" ]; then ocf_is_decimal "$OCF_RESKEY_shutdown_timeout" || { ocf_log err "shutdown_timeout must be a number" exit $OCF_ERR_CONFIGURED } fi case $1 in start) Xen_Start ;; stop) Xen_Stop ;; migrate_to) Xen_Migrate_To ;; migrate_from) Xen_Migrate_From ;; monitor) Xen_Monitor ;; status) Xen_Status ${DOMAIN_NAME} ;; validate-all) Xen_Validate_All ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/heartbeat/redis b/heartbeat/redis index 9aae69cda..fcd8c2345 100755 --- a/heartbeat/redis +++ b/heartbeat/redis @@ -1,626 +1,626 @@ #!/bin/bash # # Resource agent script for redis server. # # Copyright (c) 2013 Patrick Hemmer # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs : ${OCF_RESKEY_bin:=/usr/bin/redis-server} : ${OCF_RESKEY_client_bin:=/usr/bin/redis-cli} : ${OCF_RESKEY_user:=redis} : ${OCF_RESKEY_rundir:=/var/run/redis} : ${OCF_RESKEY_pidfile_name:=redis-server.pid} : ${OCF_RESKEY_socket_name:=redis.sock} : ${OCF_RESKEY_port:=6379} if [ -z "$OCF_RESKEY_config" ]; then if [ -f "/etc/redis.conf" ]; then OCF_RESKEY_config="/etc/redis.conf" else OCF_RESKEY_config="/etc/redis/redis.conf" fi fi CHECK_SLAVE_STATE=0 REDIS_CHECK_DUMP="/usr/bin/redis-check-dump" REDIS_SERVER="$OCF_RESKEY_bin" REDIS_CLIENT="$OCF_RESKEY_client_bin" REDIS_CONFIG="$OCF_RESKEY_config" REDIS_USER="$OCF_RESKEY_user" REDIS_RUNDIR="$OCF_RESKEY_rundir" REDIS_PIDFILE="$OCF_RESKEY_rundir/$OCF_RESKEY_pidfile_name" REDIS_SOCKET="$OCF_RESKEY_rundir/$OCF_RESKEY_socket_name" REDIS_REPLICATION_PORT="$OCF_RESKEY_port" if ! [ -f $REDIS_CHECK_DUMP ]; then REDIS_CHECK_DUMP="$(which redis-check-dump 2>/dev/null)" fi if [ -z "$REDIS_CHECK_DUMP" ]; then REDIS_CHECK_DUMP="$(which redis-check-rdb 2>/dev/null)" fi if [ -r "$REDIS_CONFIG" ]; then REDIS_DUMP_DIR="$(grep "^\s*dir\s" < "$REDIS_CONFIG" | awk '{ print $2 }' 2>/dev/null)" REDIS_DUMP_FILE="$(grep "^\s*dbfilename\s" < "$REDIS_CONFIG" | awk '{ print $2 }' 2>/dev/null)" fi : ${REDIS_DUMP_DIR:=/var/lib/redis/} : ${REDIS_DUMP_FILE:=dump.rdb} redis_meta_data() { cat < 1.0 Resource agent script for redis server. This resource fully supports master/slave replication. The master preference of a node is determined by the 'slave_priority' parameter of the redis config. When taking the resource from 'unmanaged' to 'managed', the currently active master will be given a priority of 1000 (plus 1 for each active connection). The default 'slave_priority' is 100, so the master will stay master. For a slave to become master after converting the resource to managed, set a slave_priority greater than 1000. Redis server Path to \`redis-server\` Path to \`redis-server\` Path to \`redis-cli\` Path to \`redis-cli\` Path to 'redis.conf' Path to 'redis.conf' User to run redis as Redis user Directory to store socket and pid file in Redis var/run dir The filename to use for the pidfile. Will be created in the rundir. Should only be a basename, not a full path. Redis pidfile name The filename to use for the socket. Will be crated in the rundir. Should only be a basename, not a full path. Redis socket name Port for replication client to connect to on remote server Replication port During redis cluster bootstrap, wait for the last known master to be promoted before allowing any other instances in the cluster to be promoted. This lessens the risk of data loss when persistent data is in use. Wait for last known master EOI } INSTANCE_ATTR_NAME=$(echo "${OCF_RESOURCE_INSTANCE}" | awk -F : '{print $1}') CRM_ATTR_REPL_INFO="${HA_SBIN_DIR}/crm_attribute --type crm_config --name ${INSTANCE_ATTR_NAME}_REPL_INFO -s redis_replication" MASTER_HOST="" MASTER_ACTIVE_CACHED="" MASTER_ACTIVE="" master_is_active() { if [ -z "$MASTER_ACTIVE_CACHED" ]; then # determine if a master instance is already up and is healthy crm_mon --as-xml | grep "resource.*id=\"${OCF_RESOURCE_INSTANCE}\".*role=\"Master\".*active=\"true\".*orphaned=\"false\".*failed=\"false\"" > /dev/null 2>&1 MASTER_ACTIVE=$? MASTER_ACTIVE_CACHED="true" fi return $MASTER_ACTIVE } set_master() { MASTER_HOST="$1" ${CRM_ATTR_REPL_INFO} -v "$1" -q } last_known_master() { if [ -z "$MASTER_HOST" ]; then MASTER_HOST="$(${CRM_ATTR_REPL_INFO} --query -q 2>/dev/null)" fi echo "$MASTER_HOST" } crm_master_reboot() { local node node=$(ocf_attribute_target) "${HA_SBIN_DIR}/crm_master" -N "$node" -l reboot "$@" } calculate_score() { perf_score="$1" connected_clients="$2" if ocf_is_true "$OCF_RESKEY_wait_last_known_master"; then # only set perferred score by slave_priority if # we are not waiting for the last known master. Otherwise # we want the agent to have complete control over the scoring. perf_score="" connected_clients="0" fi if [[ -z "$perf_score" ]]; then if [[ "$(last_known_master)" == "$NODENAME" ]]; then perf_score=1000 else perf_score=1 fi fi perf_score=$(( perf_score + connected_clients )) echo "$perf_score" } set_score() { local score local last_master score="$1" if ocf_is_true "$OCF_RESKEY_wait_last_known_master" && ! master_is_active; then last_master="$(last_known_master)" if [ -n "$last_master" ] && [[ "$last_master" != "$NODENAME" ]]; then ocf_log info "Postponing setting master score for ${NODENAME} until last known master instance [${last_master}] is promoted" return fi fi ocf_log debug "monitor: Setting master score to '$score'" crm_master_reboot -v "$score" } redis_client() { ocf_log debug "redis_client: '$REDIS_CLIENT' -s '$REDIS_SOCKET' $*" if [ -n "$clientpasswd" ]; then "$REDIS_CLIENT" -s "$REDIS_SOCKET" -a "$clientpasswd" "$@" | sed 's/\r//' else "$REDIS_CLIENT" -s "$REDIS_SOCKET" "$@" | sed 's/\r//' fi } simple_status() { local pid if ! [ -f "$REDIS_PIDFILE" ]; then return $OCF_NOT_RUNNING fi pid="$(<"$REDIS_PIDFILE")" pidof "$REDIS_SERVER" | grep -q "\<$pid\>" || return $OCF_NOT_RUNNING ocf_log debug "monitor: redis-server running under pid $pid" return $OCF_SUCCESS } redis_monitor() { local res simple_status res=$? if (( res != OCF_SUCCESS )); then return $res fi typeset -A info while read line; do [[ "$line" == "#"* ]] && continue [[ "$line" != *":"* ]] && continue IFS=':' read -r key value <<< "$line" info[$key]="$value" done < <(redis_client info) if [[ -z "${info[role]}" ]]; then ocf_log err "monitor: Could not get role from \`$REDIS_CLIENT -s $REDIS_SOCKET info\`" return $OCF_ERR_GENERIC fi if ocf_is_ms; then # Here we see if a score has already been set. # If score isn't set we the redis setting 'slave_priority'. # If that isn't set, we default to 1000 for a master, and 1 for slave. # We then add 1 for each connected client - score="$(crm_master_reboot --get-value --quiet 2>/dev/null)" + score="$(crm_master_reboot -G --quiet 2>/dev/null)" if [[ -z "$score" ]]; then score=$(calculate_score "${info[slave_priority]}" "${info[connected_clients]}") set_score "$score" fi if [[ "${info[role]}" == "master" ]]; then if ocf_is_probe; then set_master "$NODENAME" fi return $OCF_RUNNING_MASTER fi if [ "$CHECK_SLAVE_STATE" -eq 1 ]; then if [[ "${info[master_link_status]}" != "up" ]]; then ocf_log info "monitor: Slave mode link has not yet been established (link=${info[master_link_status]})" return $OCF_ERR_GENERIC fi if [[ "${info[master_host]}" != "$(last_known_master)" ]]; then ocf_log err "monitor: Slave mode current master does not match running master. current=${info[master_host]}, running=$(last_known_master)" return $OCF_ERR_GENERIC fi fi fi return $OCF_SUCCESS } check_dump_file() { if ! have_binary "$REDIS_CHECK_DUMP"; then return 0 fi $REDIS_CHECK_DUMP ${REDIS_DUMP_DIR}/${REDIS_DUMP_FILE} 2>&1 } redis_start() { local size redis_monitor status=$? if (( status == OCF_SUCCESS )) || (( status == OCF_RUNNING_MASTER )); then ocf_log info "start: redis is already running" return $OCF_SUCCESS fi [[ ! -d "$REDIS_RUNDIR" ]] && mkdir -p "$REDIS_RUNDIR" chown -R "$REDIS_USER" "$REDIS_RUNDIR" if have_binary "restorecon"; then restorecon -Rv "$REDIS_RUNDIR" fi # check for 0 byte database dump file. This is an unrecoverable start # condition that we can avoid by deleting the 0 byte database file. if [ -f "${REDIS_DUMP_DIR}/${REDIS_DUMP_FILE}" ]; then size="$(stat --format "%s" ${REDIS_DUMP_DIR}/${REDIS_DUMP_FILE})" if [ "$?" -eq "0" ] && [ "$size" -eq "0" ]; then ocf_log notice "Detected 0 byte ${REDIS_DUMP_FILE}, deleting zero length file to avoid start failure." rm -f "${REDIS_DUMP_DIR}/${REDIS_DUMP_FILE}" fi fi ocf_log info "start: $REDIS_SERVER --daemonize yes --unixsocket '$REDIS_SOCKET' --pidfile '$REDIS_PIDFILE'" output="$(su "$REDIS_USER" -s /bin/sh -c "cd '$REDIS_RUNDIR'; exec '$REDIS_SERVER' '$REDIS_CONFIG' --daemonize yes --unixsocket '$REDIS_SOCKET' --pidfile '$REDIS_PIDFILE'" 2>&1)" while true; do # wait for redis to start typeset -A info while read line; do [[ "$line" == "#"* ]] && continue [[ "$line" != *":"* ]] && continue IFS=':' read -r key value <<< "$line" info[$key]="$value" done < <(redis_client info) if (( info[loading] == 0 )); then break elif (( info[loading] == 1 )); then sleep "${info[loading_eta_seconds]}" elif pidof "$REDIS_SERVER" >/dev/null; then # unknown error, but the process still exists. # This check is mainly because redis daemonizes before it starts listening, causing `redis-cli` to fail # See https://github.com/antirez/redis/issues/2368 # It's possible that the `pidof` will pick up a different redis, but in that case, the start operation will just time out sleep 1 else check_output="$(check_dump_file)" ocf_log err "start: Unknown error waiting for redis to start. redis-check-dump output=${check_output//$'\n'/; }" return $OCF_ERR_GENERIC fi done while ! [ -s "$REDIS_PIDFILE" ]; do ocf_log debug "start: Waiting for pid file '$REDIS_PIDFILE' to appear" sleep 1 done ocf_is_ms && redis_demote # pacemaker expects resources to start in slave mode redis_monitor status=$? if (( status == OCF_SUCCESS )) || (( status == OCF_RUNNING_MASTER )); then return $OCF_SUCCESS fi check_output="$(check_dump_file)" ocf_log err "start: Unknown error starting redis. redis-server output=${output//$'\n'/; } redis-check-dump output=${check_output//$'\n'/; }" return $status } redis_stop() { redis_monitor status=$? if (( status == OCF_NOT_RUNNING )); then ocf_log info "stop: redis is already stopped" crm_master_reboot -D return $OCF_SUCCESS fi pid="$(<"$REDIS_PIDFILE")" kill -TERM "$pid" while true; do simple_status status=$? if (( status == OCF_NOT_RUNNING )); then crm_master_reboot -D return $OCF_SUCCESS fi sleep 1 done } redis_promote() { redis_monitor status=$? if (( status == OCF_RUNNING_MASTER )); then ocf_log info "promote: Already running as master" set_master "$NODENAME" return $OCF_SUCCESS elif (( status != OCF_SUCCESS )); then ocf_log err "promote: Node is not running as a slave" return $OCF_ERR_GENERIC fi redis_client slaveof no one redis_monitor status=$? if (( status == OCF_RUNNING_MASTER )); then set_master "$NODENAME" return $OCF_SUCCESS fi ocf_log err "promote: Unknown error while promoting to master (status=$status)" return $OCF_ERR_GENERIC } redis_demote() { local master_host local master_port # client kill is only supported in Redis 2.8.12 or greater version=$(redis_client -v | awk '{print $NF}') ocf_version_cmp "$version" "2.8.11" client_kill=$? CHECK_SLAVE_STATE=1 redis_monitor status=$? if (( status == OCF_SUCCESS )); then ocf_log info "demote: Already running as slave" return $OCF_SUCCESS elif (( status == OCF_NOT_RUNNING )); then ocf_log err "demote: Failed to demote, redis not running." return $OCF_NOT_RUNNING fi master_host="$(last_known_master)" master_port="${REDIS_REPLICATION_PORT}" # The elected master has to remain a slave during startup. # During this period a placeholder master host is assigned. if [ -z "$master_host" ] || [[ "$master_host" == "$NODENAME" ]]; then CHECK_SLAVE_STATE=0 master_host="no-such-master" elif ! master_is_active; then # no master has been promoted yet. we'll be notified when the # master starts. CHECK_SLAVE_STATE=0 master_host="no-such-master" fi ocf_log info "demote: Setting master to '$master_host'" redis_client slaveof "$master_host" "$master_port" # Wait forever for the slave to connect to the master and finish the # sync. Timeout is controlled by Pacemaker "op start timeout=XX". # # hint: redis master_link_status will only come "up" when # the SYNC with the master has completed. # This can take an arbitraty time (data) and should # only be parametrized by the start operation timeout # by the administrator, not by this resource agent code while true; do # Wait infinite if replication is syncing # Then start/demote operation timeout determines timeout if [ "$client_kill" -eq 2 ]; then redis_client CLIENT PAUSE 2000 fi redis_monitor status=$? if (( status == OCF_SUCCESS )); then if [ "$client_kill" -eq 2 ]; then redis_client CLIENT KILL type normal fi return $OCF_SUCCESS fi sleep 1 done ocf_log err "demote: Unexpected error setting slave mode (status=$status)" return $OCF_ERR_GENERIC } redis_notify() { mode="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}" case "$mode" in post-demote|post-promote) # change the master redis_monitor status=$? if (( status == OCF_SUCCESS )); then # were a slave # calling demote updates the slave's connection # to the newly appointed Master instance. redis_demote fi ;; esac return $OCF_SUCCESS } redis_validate() { if [[ -x "$REDIS_SERVER" ]]; then ocf_log err "validate: $REDIS_SERVER does not exist or is not executable" return $OCF_ERR_INSTALLED fi if [[ -x "$REDIS_CLIENT" ]]; then ocf_log err "validate: $REDIS_CLIENT does not exist or is not executable" return $OCF_ERR_INSTALLED fi if [[ -f "$REDIS_CONFIG" ]]; then ocf_log err "validate: $REDIS_CONFIG does not exist" return $OCF_ERR_CONFIGURED fi if ! getent passwd "$REDIS_USER" &>/dev/null; then ocf_log err "validate: $REDIS_USER is not a valid user" return $OCF_ERR_CONFIGURED fi } NODENAME=$(ocf_attribute_target) if [ -r "$REDIS_CONFIG" ]; then clientpasswd="$(sed -n -e 's/^\s*requirepass\s*\(.*\)\s*$/\1/p' < $REDIS_CONFIG | tail -n 1)" fi ocf_log debug "action=${1:-$__OCF_ACTION} notify_type=${OCF_RESKEY_CRM_meta_notify_type} notify_operation=${OCF_RESKEY_CRM_meta_notify_operation} master_host=${OCF_RESKEY_CRM_meta_notify_master_uname} slave_host=${OCF_RESKEY_CRM_meta_notify_slave_uname} promote_host=${OCF_RESKEY_CRM_meta_notify_promote_uname} demote_host=${OCF_RESKEY_CRM_meta_notify_demote_uname}; params: bin=${OCF_RESKEY_bin} client_bin=${OCF_RESKEY_client_bin} config=${OCF_RESKEY_config} user=${OCF_RESKEY_user} rundir=${OCF_RESKEY_rundir} port=${OCF_RESKEY_port}" case "${1:-$__OCF_ACTION}" in status|monitor) redis_monitor ;; start) redis_start ;; stop) redis_stop ;; restart) redis_stop && redis_start ;; promote) redis_promote ;; demote) redis_demote ;; notify) redis_notify ;; meta-data) redis_meta_data ;; validate-all) redis_validate ;; *) echo "Usage: $0 {monitor|start|stop|restart|promote|demote|notify|validate-all|meta-data}" exit $OCF_ERR_UNIMPLEMENTED ;; esac status=$? ocf_log debug "exit_status=$status" exit $status