diff --git a/heartbeat/redis b/heartbeat/redis index 4c428e570..6b479b2a5 100755 --- a/heartbeat/redis +++ b/heartbeat/redis @@ -1,398 +1,519 @@ #!/bin/bash . ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs : ${OCF_RESKEY_bin:=/usr/bin/redis-server} : ${OCF_RESKEY_client_bin:=/usr/bin/redis-cli} -: ${OCF_RESKEY_config:=/etc/redis/redis.conf} : ${OCF_RESKEY_user:=redis} : ${OCF_RESKEY_rundir:=/var/run/redis} : ${OCF_RESKEY_pidfile_name:=redis-server.pid} : ${OCF_RESKEY_socket_name:=redis.sock} : ${OCF_RESKEY_port:=6379} +if [ -z "$OCF_RESKEY_config" ]; then + if [ -f "/etc/redis.conf" ]; then + OCF_RESKEY_config="/etc/redis.conf" + else + OCF_RESKEY_config="/etc/redis/redis.conf" + fi +fi + +CHECK_SLAVE_STATE=0 + REDIS_SERVER="$OCF_RESKEY_bin" REDIS_CLIENT="$OCF_RESKEY_client_bin" REDIS_CONFIG="$OCF_RESKEY_config" REDIS_USER="$OCF_RESKEY_user" REDIS_RUNDIR="$OCF_RESKEY_rundir" REDIS_PIDFILE="$OCF_RESKEY_rundir/$OCF_RESKEY_pidfile_name" REDIS_SOCKET="$OCF_RESKEY_rundir/$OCF_RESKEY_socket_name" REDIS_REPLICATION_PORT="$OCF_RESKEY_port" function meta_data() { cat < 1.0 Resource agent script for redis server. This resource fully supports master/slave replication. The master preference of a node is determined by the 'slave_priority' parameter of the redis config. When taking the resource from 'unmanaged' to 'managed', the currently active master will be given a priority of 1000 (plus 1 for each active connection). The default 'slave_priority' is 100, so the master will stay master. For a slave to become master after converting the resource to managed, set a slave_priority greater than 1000. Redis server Path to \`redis-server\` Path to \`redis-server\` Path to \`redis-cli\` Path to \`redis-cli\` Path to 'redis.conf' Path to 'redis.conf' User to run redis as Redis user Directory to store socket and pid file in Redis var/run dir The filename to use for the pidfile. Will be created in the rundir. Should only be a basename, not a full path. Redis pidfile name The filename to use for the socket. Will be crated in the rundir. Should only be a basename, not a full path. Redis socket name Port for replication client to connect to on remote server Replication port + + + +During redis cluster bootstrap, wait for the last known master to be +promoted before allowing any other instances in the cluster to be +promoted. This lessens the risk of data loss when persistent data +is in use. + +Wait for last known master + + - - - + + + EOI } +INSTANCE_ATTR_NAME=`echo ${OCF_RESOURCE_INSTANCE}| awk -F : '{print $1}'` +CRM_ATTR_REPL_INFO="${HA_SBIN_DIR}/crm_attribute --type crm_config --name ${INSTANCE_ATTR_NAME}_REPL_INFO -s redis_replication" +MASTER_HOST="" +MASTER_ACTIVE_CACHED="" +MASTER_ACTIVE="" + +master_is_active() +{ + if [ -z "$MASTER_ACTIVE_CACHED" ]; then + # determine if a master instance is already up and is healthy + crm_mon --as-xml | grep "resource.*id=\"${OCF_RESOURCE_INSTANCE}\".*role=\"Master\".*active=\"true\".*orphaned=\"false\".*failed=\"false\"" > /dev/null 2>&1 + MASTER_ACTIVE=$? + MASTER_ACTIVE_CACHED="true" + fi + return $MASTER_ACTIVE +} + +function set_master() +{ + MASTER_HOST="$1" + ${CRM_ATTR_REPL_INFO} -v "$1" -q +} + +function last_known_master() +{ + if [ -z "$MASTER_HOST" ]; then + MASTER_HOST="$(${CRM_ATTR_REPL_INFO} --query -q 2>/dev/null)" + fi + echo "$MASTER_HOST" +} + function crm_master_reboot() { "${HA_SBIN_DIR}/crm_master" -l reboot "$@" } +function calculate_score() +{ + perf_score="$1" + connected_clients="$2" + + if ocf_is_true "$OCF_RESKEY_wait_last_known_master"; then + # only set perferred score by slave_priority if + # we are not waiting for the last known master. Otherwise + # we want the agent to have complete control over the scoring. + perf_score="" + connected_clients="0" + fi + + if [[ -z "$perf_score" ]]; then + if [[ "$(last_known_master)" == "$NODENAME" ]]; then + perf_score=1000 + else + perf_score=1 + fi + fi + perf_score=$(( perf_score + connected_clients )) + echo "$perf_score" +} + +function set_score() +{ + local score="$1" + + if ocf_is_true "$OCF_RESKEY_wait_last_known_master" && ! master_is_active; then + local last_master="$(last_known_master)" + if [ -n "$last_master" ] && [[ "$last_master" != "$NODENAME" ]]; then + ocf_log info "Postponing setting master score for ${NODENAME} until last known master instance [${last_master}] is promoted" + return + fi + fi + + ocf_log debug "monitor: Setting master score to '$score'" + crm_master_reboot -v "$score" +} + function redis_client() { ocf_log debug "redis_client: '$REDIS_CLIENT' -s '$REDIS_SOCKET' $@" "$REDIS_CLIENT" -s "$REDIS_SOCKET" "$@" | sed 's/\r//' } -function monitor() { +function simple_status() { + local pid + + if ! [ -f "$REDIS_PIDFILE" ]; then + return $OCF_NOT_RUNNING + fi + pid="$(<"$REDIS_PIDFILE")" pidof "$REDIS_SERVER" | grep -q "\<$pid\>" || return $OCF_NOT_RUNNING ocf_log debug "monitor: redis-server running under pid $pid" + return $OCF_SUCCESS +} + +function monitor() { + local res + + simple_status + res=$? + if (( res != OCF_SUCCESS )); then + return $res + fi + typeset -A info while read line; do [[ "$line" == "#"* ]] && continue [[ "$line" != *":"* ]] && continue IFS=':' read -r key value <<< "$line" info[$key]="$value" done < <(redis_client info) if [[ -z "${info[role]}" ]]; then - pidof "$REDIS_SERVER" | grep -q "\<$pid\>" || return $OCF_NOT_RUNNING ocf_log err "monitor: Could not get role from \`$REDIS_CLIENT -s $REDIS_SOCKET info\`" return $OCF_ERR_GENERIC fi if ocf_is_ms; then # Here we see if a score has already been set. # If score isn't set we the redis setting 'slave_priority'. # If that isn't set, we default to 1000 for a master, and 1 for slave. # We then add 1 for each connected client - score="$(crm_master_reboot --get-value --quiet)" + score="$(crm_master_reboot --get-value --quiet 2>/dev/null)" if [[ -z "$score" ]]; then - score="${info[slave_priority]}" - if [[ -z "$score" ]]; then - if [[ "${info[role]}" == "master" ]]; then - score=1000 - else - score=1 - fi - fi - score=$(( score + info[connected_clients] )) - ocf_log debug "monitor: Setting master score to '$score'" - crm_master_reboot -v "$score" + score=$(calculate_score "${info[slave_priority]}" "${info[connected_clients]}") + set_score "$score" fi if [[ "${info[role]}" == "master" ]]; then + if ocf_is_probe; then + set_master "$NODENAME" + fi return $OCF_RUNNING_MASTER fi - if [[ -n "$CHECK_SLAVE_STATE" ]]; then + if [ "$CHECK_SLAVE_STATE" -eq 1 ]; then if [[ "${info[master_link_status]}" != "up" ]]; then - ocf_log err "monitor: Slave mode link has failed (link=${info[master_link_status]})" + ocf_log info "monitor: Slave mode link has not yet been established (link=${info[master_link_status]})" return $OCF_ERR_GENERIC fi - if [[ "${info[master_host]}" != "${OCF_RESKEY_CRM_meta_notify_master_uname}" ]]; then - ocf_log err "monitor: Slave mode current master does not match running master. current=${info[master_host]}, running=${OCF_RESKEY_CRM_meta_notify_master_uname}" + if [[ "${info[master_host]}" != "$(last_known_master)" ]]; then + ocf_log err "monitor: Slave mode current master does not match running master. current=${info[master_host]}, running=$(last_known_master)" return $OCF_ERR_GENERIC fi fi fi return $OCF_SUCCESS } function start() { monitor status=$? if (( status == OCF_SUCCESS )) || (( status == OCF_RUNNING_MASTER )); then ocf_log info "start: redis is already running" return $OCF_SUCCESS fi [[ ! -d "$REDIS_RUNDIR" ]] && mkdir -p "$REDIS_RUNDIR" chown -R "$REDIS_USER" "$REDIS_RUNDIR" ocf_log info "start: $REDIS_SERVER --daemonize yes --unixsocket '$REDIS_SOCKET' --pidfile '$REDIS_PIDFILE'" output="$(su "$REDIS_USER" -s /bin/sh -c "cd '$REDIS_RUNDIR'; exec '$REDIS_SERVER' '$REDIS_CONFIG' --daemonize yes --unixsocket '$REDIS_SOCKET' --pidfile '$REDIS_PIDFILE'" 2>&1)" while true; do # wait for redis to start typeset -A info while read line; do [[ "$line" == "#"* ]] && continue [[ "$line" != *":"* ]] && continue IFS=':' read -r key value <<< "$line" info[$key]="$value" done < <(redis_client info) if (( info[loading] == 0 )); then break elif (( info[loading] == 1 )); then sleep "${info[loading_eta_seconds]}" elif pidof "$REDIS_SERVER" >/dev/null; then # unknown error, but the process still exists. # This check is mainly because redis daemonizes before it starts listening, causing `redis-cli` to fail # See https://github.com/antirez/redis/issues/2368 # It's possible that the `pidof` will pick up a different redis, but in that case, the start operation will just time out sleep 1 else ocf_log err "start: Unknown error waiting for redis to start" return $OCF_ERR_GENERIC fi done ocf_is_ms && demote # pacemaker expects resources to start in slave mode monitor status=$? if (( status == OCF_SUCCESS )) || (( status == OCF_RUNNING_MASTER )); then return $OCF_SUCCESS fi ocf_log err "start: Unknown error starting redis. output=${output//$'\n'/; }" return $status } function stop() { monitor status=$? if (( status == OCF_NOT_RUNNING )); then ocf_log info "stop: redis is already stopped" + crm_master_reboot -D return $OCF_SUCCESS fi pid="$(<"$REDIS_PIDFILE")" kill -TERM "$pid" while true; do - monitor + simple_status status=$? if (( status == OCF_NOT_RUNNING )); then crm_master_reboot -D return $OCF_SUCCESS fi - if (( status != OCF_RUNNING_MASTER )) && (( status != OCF_SUCCESS )) && (( status != OCF_ERR_GENERIC )); then # we allow OCF_ERR_GENERIC because monitor can generate an error if we probe redis in the middle of shutdown (the socket won't be responding but the process is up) - ocf_log err "stop: Unknown error while stopping" - return $OCF_ERR_GENERIC - fi sleep 1 done } function promote() { monitor status=$? if (( status == OCF_RUNNING_MASTER )); then ocf_log info "promote: Already running as master" + set_master "$NODENAME" return $OCF_SUCCESS elif (( status != OCF_SUCCESS )); then ocf_log err "promote: Node is not running as a slave" return $OCF_ERR_GENERIC fi redis_client slaveof no one monitor status=$? if (( status == OCF_RUNNING_MASTER )); then + set_master "$NODENAME" return $OCF_SUCCESS fi ocf_log err "promote: Unknown error while promoting to master (status=$status)" return $OCF_ERR_GENERIC } function demote() { - CHECK_SLAVE_STATE=1 monitor + local master_host + local master_port + + CHECK_SLAVE_STATE=1 + monitor status=$? if (( status == OCF_SUCCESS )); then ocf_log info "demote: Already running as slave" return $OCF_SUCCESS + elif (( status == OCF_NOT_RUNNING )); then + ocf_log err "demote: Failed to demote, redis not running." + return $OCF_NOT_RUNNING fi - master_host="${OCF_RESKEY_CRM_meta_notify_promote_uname// /}" - : "${master_host:=${OCF_RESKEY_CRM_meta_notify_master_uname// /}}" + master_host="$(last_known_master)" master_port="${REDIS_REPLICATION_PORT}" # The elected master has to remain a slave during startup. # During this period a placeholder master host is assigned. - current_host="$(crm_node -n)" - if [[ "$master_host" == "$current_host" ]]; then + if [ -z "$master_host" ] || [[ "$master_host" == "$NODENAME" ]]; then + CHECK_SLAVE_STATE=0 + master_host="no-such-master" + elif ! master_is_active; then + # no master has been promoted yet. we'll be notified when the + # master starts. + CHECK_SLAVE_STATE=0 master_host="no-such-master" fi ocf_log info "demote: Setting master to '$master_host'" redis_client slaveof "$master_host" "$master_port" - monitor - status=$? + # wait briefly for the slave to connect to the master + for (( c=1; c <= 20; c++ )) + do + monitor + status=$? + if (( status == OCF_SUCCESS )); then + return $OCF_SUCCESS + fi + sleep 1 + done - if (( status == OCF_SUCCESS )); then - return $OCF_SUCCESS - fi ocf_log err "demote: Unexpected error setting slave mode (status=$status)" return $OCF_ERR_GENERIC } function notify() { mode="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}" case "$mode" in post-demote|post-promote) # change the master monitor status=$? if (( status == OCF_SUCCESS )); then # were a slave + # calling demote updates the slave's connection + # to the newly appointed Master instance. demote fi ;; esac return $OCF_SUCCESS } function validate() { if [[ -x "$REDIS_SERVER" ]]; then ocf_log err "validate: $REDIS_SERVER does not exist or is not executable" return $OCF_ERR_INSTALLED fi if [[ -x "$REDIS_CLIENT" ]]; then ocf_log err "validate: $REDIS_CLIENT does not exist or is not executable" return $OCF_ERR_INSTALLED fi if [[ -f "$REDIS_CONFIG" ]]; then ocf_log err "validate: $REDIS_CONFIG does not exist" return $OCF_ERR_CONFIGURED fi if ! getent passwd "$REDIS_USER" &>/dev/null; then ocf_log err "validate: $REDIS_USER is not a valid user" return $OCF_ERR_CONFIGURED fi } +NODENAME=$(ocf_local_nodename) + ocf_log debug "action=${1:-$__OCF_ACTION} notify_type=${OCF_RESKEY_CRM_meta_notify_type} notify_operation=${OCF_RESKEY_CRM_meta_notify_operation} master_host=${OCF_RESKEY_CRM_meta_notify_master_uname} slave_host=${OCF_RESKEY_CRM_meta_notify_slave_uname} promote_host=${OCF_RESKEY_CRM_meta_notify_promote_uname} demote_host=${OCF_RESKEY_CRM_meta_notify_demote_uname}; params: bin=${OCF_RESKEY_bin} client_bin=${OCF_RESKEY_client_bin} config=${OCF_RESKEY_config} user=${OCF_RESKEY_user} rundir=${OCF_RESKEY_rundir} port=${OCF_RESKEY_port}" case "${1:-$__OCF_ACTION}" in status|monitor) monitor ;; start) start ;; stop) stop ;; restart) stop && start ;; promote) promote ;; demote) demote ;; notify) notify ;; meta-data) meta_data ;; validate-all) validate ;; *) echo "Usage: $0 {monitor|start|stop|restart|promote|demote|notify|validate-all|meta-data}" exit $OCF_ERR_UNIMPLEMENTED ;; esac status=$? ocf_log debug "exit_status=$status" exit $status