diff --git a/heartbeat/pgsql b/heartbeat/pgsql index d16c96bb4..adecd46c4 100755 --- a/heartbeat/pgsql +++ b/heartbeat/pgsql @@ -1,1767 +1,1733 @@ #!/bin/sh # # Description: Manages a PostgreSQL Server as an OCF High-Availability # resource # # Authors: Serge Dubrouski (sergeyfd@gmail.com) -- original RA # Florian Haas (florian@linbit.com) -- makeover # Takatoshi MATSUO (matsuo.tak@gmail.com) -- support replication # # Copyright: 2006-2012 Serge Dubrouski # and other Linux-HA contributors # License: GNU General Public License (GPL) # ############################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # # Get PostgreSQL Configuration parameter # get_pgsql_param() { local config local param_name param_name=$1 #Check that config file exists if [ -n "$OCF_RESKEY_config" ]; then config=$OCF_RESKEY_config else config=$OCF_RESKEY_pgdata/postgresql.conf fi check_config "$config" [ $? -eq 0 ] || return perl_code="if (/^\s*$param_name[\s=]+\s*(.*)$/) { \$dir=\$1; \$dir =~ s/\s*\#.*//; \$dir =~ s/^'(\S*)'/\$1/; print \$dir;}" perl -ne "$perl_code" < $config } # Defaults OCF_RESKEY_pgctl_default=/usr/bin/pg_ctl OCF_RESKEY_psql_default=/usr/bin/psql OCF_RESKEY_pgdata_default=/var/lib/pgsql/data OCF_RESKEY_pgdba_default=postgres OCF_RESKEY_pghost_default="" OCF_RESKEY_pgport_default=5432 OCF_RESKEY_config_default="" OCF_RESKEY_start_opt_default="" OCF_RESKEY_pgdb_default=template1 OCF_RESKEY_logfile_default=/dev/null OCF_RESKEY_stop_escalate_default=30 OCF_RESKEY_monitor_user_default="" OCF_RESKEY_monitor_password_default="" OCF_RESKEY_monitor_sql_default="select now();" # Defaults for replication OCF_RESKEY_rep_mode_default=none OCF_RESKEY_node_list_default="" OCF_RESKEY_restore_command_default="" OCF_RESKEY_master_ip_default="" OCF_RESKEY_repuser_default="postgres" OCF_RESKEY_primary_conninfo_opt_default="" OCF_RESKEY_tmpdir_default="/var/lib/pgsql/tmp" OCF_RESKEY_xlog_check_count_default="3" OCF_RESKEY_crm_attr_timeout_default="5" OCF_RESKEY_stop_escalate_in_slave_default=30 : ${OCF_RESKEY_pgctl=${OCF_RESKEY_pgctl_default}} : ${OCF_RESKEY_psql=${OCF_RESKEY_psql_default}} : ${OCF_RESKEY_pgdata=${OCF_RESKEY_pgdata_default}} : ${OCF_RESKEY_pgdba=${OCF_RESKEY_pgdba_default}} : ${OCF_RESKEY_pghost=${OCF_RESKEY_pghost_default}} : ${OCF_RESKEY_pgport=${OCF_RESKEY_pgport_default}} : ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} : ${OCF_RESKEY_start_opt=${OCF_RESKEY_start_opt_default}} : ${OCF_RESKEY_pgdb=${OCF_RESKEY_pgdb_default}} : ${OCF_RESKEY_logfile=${OCF_RESKEY_logfile_default}} : ${OCF_RESKEY_stop_escalate=${OCF_RESKEY_stop_escalate_default}} : ${OCF_RESKEY_monitor_user=${OCF_RESKEY_monitor_user_default}} : ${OCF_RESKEY_monitor_password=${OCF_RESKEY_monitor_password_default}} : ${OCF_RESKEY_monitor_sql=${OCF_RESKEY_monitor_sql_default}} # for replication : ${OCF_RESKEY_rep_mode=${OCF_RESKEY_rep_mode_default}} : ${OCF_RESKEY_node_list=${OCF_RESKEY_node_list_default}} : ${OCF_RESKEY_restore_command=${OCF_RESKEY_restore_command_default}} : ${OCF_RESKEY_master_ip=${OCF_RESKEY_master_ip_default}} : ${OCF_RESKEY_repuser=${OCF_RESKEY_repuser_default}} : ${OCF_RESKEY_primary_conninfo_opt=${OCF_RESKEY_primary_conninfo_opt_default}} : ${OCF_RESKEY_tmpdir=${OCF_RESKEY_tmpdir_default}} : ${OCF_RESKEY_xlog_check_count=${OCF_RESKEY_xlog_check_count_default}} : ${OCF_RESKEY_crm_attr_timeout=${OCF_RESKEY_crm_attr_timeout_default}} : ${OCF_RESKEY_stop_escalate_in_slave=${OCF_RESKEY_stop_escalate_in_slave_default}} usage() { cat < 1.0 Resource script for PostgreSQL. It manages a PostgreSQL as an HA resource. Manages a PostgreSQL database instance Path to pg_ctl command. pgctl Start options (-o start_opt in pg_ctl). "-i -p 5432" for example. start_opt Additional pg_ctl options (-w, -W etc..). ctl_opt Path to psql command. psql Path to PostgreSQL data directory. pgdata User that owns PostgreSQL. pgdba Hostname/IP address where PostgreSQL is listening pghost Port where PostgreSQL is listening pgport PostgreSQL user that pgsql RA will user for monitor operations. If it's not set pgdba user will be used. monitor_user Password for monitor user. monitor_password SQL script that will be used for monitor operations. monitor_sql Path to the PostgreSQL configuration file for the instance Configuration file Database that will be used for monitoring. pgdb Path to PostgreSQL server log output file. logfile Unix socket directory for PostgeSQL socketdir Number of shutdown retries (using -m fast) before resorting to -m immediate stop escalation Replication mode(none(default)/async/sync). "async" and "sync" require PostgreSQL 9.1 or later. If you use async or sync, it requires node_list, master_ip, restore_command parameters, and needs setting postgresql.conf, pg_hba.conf up for replication. Please delete "include /../../rep_mode.conf" line in postgresql.conf when you switch from sync to async. rep_mode All node names. Please separate each node name with a space. This is required for replication. node list restore_command for recovery.conf. This is required for replication. restore_command Master's floating IP address to be connected from hot standby. This parameter is used for "primary_conninfo" in recovery.conf. This is required for replication. master ip User used to connect to the master server. This parameter is used for "primary_conninfo" in recovery.conf. This is required for replication. repuser primary_conninfo options of recovery.conf except host, port, user and application_name. This is optional for replication. primary_conninfo_opt Path to temporary directory. This is optional for replication. tmpdir Number of checking xlog on monitor before promote. This is optional for replication. xlog check count The timeout of crm_attribute forever update command. Default value is 5 seconds. This is optional for replication. The timeout of crm_attribute forever update command. Number of shutdown retries (using -m fast) before resorting to -m immediate in Slave state. This is optional for replication. stop escalation_in_slave EOF } # # Run the given command in the Resource owner environment... # runasowner() { local quietrun="" local loglevel="-err" local var for var in 1 2 do case "$1" in "-q") quietrun="-q" shift 1;; "warn"|"err") loglevel="-$1" shift 1;; *) ;; esac done ocf_run $quietrun $loglevel su $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; $*" } # # Shell escape # escape_string() { echo "$*" | sed -e "s/'/'\\\\''/g" } # # methods: What methods/operations do we support? # pgsql_methods() { cat </dev/null 2>&1" return $? fi # No PID file false } # # pgsql_real_monitor # pgsql_real_monitor() { local loglevel local rc local output # Set the log level of the error message loglevel=${1:-err} if ! pgsql_status then ocf_log info "PostgreSQL is down" return $OCF_NOT_RUNNING fi if is_replication; then #Check replication state output=`su $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; \ $OCF_RESKEY_psql $psql_options -U $OCF_RESKEY_pgdba \ -Atc \"${CHECK_MS_SQL}\""` rc=$? if [ $rc -ne 0 ]; then report_psql_error $rc $loglevel return $OCF_ERR_GENERIC fi case "$output" in f) ocf_log debug "PostgreSQL is running as a primary." if [ "$OCF_RESKEY_monitor_sql" = "$OCF_RESKEY_monitor_sql_default" ]; then return $OCF_RUNNING_MASTER fi ;; t) ocf_log debug "PostgreSQL is running as a hot standby." return $OCF_SUCCESS;; *) ocf_log err "$CHECK_MS_SQL output is $output" return $OCF_ERR_GENERIC;; esac fi OCF_RESKEY_monitor_sql=`escape_string "$OCF_RESKEY_monitor_sql"` runasowner -q $loglevel "$OCF_RESKEY_psql $psql_options \ -c '$OCF_RESKEY_monitor_sql'" rc=$? if [ $rc -ne 0 ]; then report_psql_error $rc $loglevel return $OCF_ERR_GENERIC fi if is_replication; then return $OCF_RUNNING_MASTER fi return $OCF_SUCCESS } pgsql_replication_monitor() { local rc local rsc local instance local my_status local data_status local is_master="" rc=$1 if [ $rc -ne $OCF_SUCCESS -a $rc -ne "$OCF_RUNNING_MASTER" ]; then return $rc fi # If I am Master if [ $rc -eq $OCF_RUNNING_MASTER ]; then change_data_status "$NODENAME" "LATEST" change_pgsql_status "$NODENAME" "PRI" control_slave_status || return $OCF_ERR_GENERIC return $rc fi # I can't get master node name from $OCF_RESKEY_CRM_meta_notify_master_uname on monitor, # so I will get master node name using crm_mon -n if output=`crm_mon -n1 | grep " Master"`; then rsc=`echo $OCF_RESOURCE_INSTANCE | cut -d ":" -f 1` instance=0 while : do if [ "$instance" -ge "$OCF_RESKEY_CRM_meta_clone_max" ]; then break fi if echo "$output" | grep "${rsc}:${instance}"; then is_master="yes" break fi instance=`expr $instance + 1` done fi if [ ! -n "$is_master" ]; then # If I am Slave and Master is not exist ocf_log info "Master does not exist." change_pgsql_status "$NODENAME" "HS:alone" have_master_right if [ $? -eq 0 ]; then rm -f ${XLOG_NOTE_FILE}.* fi else output=`$CRM_ATTR_FOREVER -N "$NODENAME" \ -n "$PGSQL_DATA_STATUS_ATTR" -G -q` if [ "$output" = "DISCONNECT" ]; then change_pgsql_status "$NODENAME" "HS:alone" fi fi return $rc } #pgsql_monitor: pgsql_real_monitor() wrapper for replication pgsql_monitor() { local rc pgsql_real_monitor rc=$? if ! is_replication; then return $rc else pgsql_replication_monitor $rc return $? fi } # pgsql_post_demote pgsql_post_demote() { DEMOTE_NODE=`echo $OCF_RESKEY_CRM_meta_notify_demote_uname | sed "s/ /\n/g" | head -1` ocf_log debug "post-demote called. Demote uname is $DEMOTE_NODE" if [ "$DEMOTE_NODE" != "$NODENAME" ]; then if ! echo $OCF_RESKEY_CRM_meta_notify_master_uname | grep $NODENAME; then show_master_baseline change_pgsql_status "$NODENAME" "HS:alone" fi fi return $OCF_SUCCESS } pgsql_pre_promote() { local master_baseline local my_master_baseline local cmp_location local number_of_nodes # If my data is newer than new master's one, I fail my resource. PROMOTE_NODE=`echo $OCF_RESKEY_CRM_meta_notify_promote_uname | \ sed "s/ /\n/g" | head -1` - number_of_nodes=`echo $OCF_RESKEY_node_list | \ - sed -e "s/ */ /g" | sed -e "s/^ \| $//g" | \ - sed -e "s/ /\n/g" | wc -l` + number_of_nodes=`echo $OCF_RESKEY_node_list | wc -w` if [ $number_of_nodes -ge 3 -a \ "$OCF_RESKEY_rep_mode" = "sync" -a \ "$PROMOTE_NODE" != "$NODENAME" ]; then master_baseline=`$CRM_ATTR_REBOOT -N "$PROMOTE_NODE" -n \ "$PGSQL_MASTER_BASELINE" -G -q 2>/dev/null` if [ $? -eq 0 ]; then my_master_baseline=`$CRM_ATTR_REBOOT -N "$NODENAME" -n \ "$PGSQL_MASTER_BASELINE" -G -q 2>/dev/null` # get older location cmp_location=`printf "$master_baseline\n$my_master_baseline\n" |\ sort | head -1` if [ "$cmp_location" != "$my_master_baseline" ]; then ocf_log err "My data is newer than new master's one. New master's location : $master_baseline" $CRM_FAILCOUNT -r $OCF_RESOURCE_INSTANCE -U $NODENAME -v INFINITY return $OCF_ERR_GENERIC fi fi fi return $OCF_SUCCESS } pgsql_notify() { local type="${OCF_RESKEY_CRM_meta_notify_type}" local op="${OCF_RESKEY_CRM_meta_notify_operation}" local rc if ! is_replication; then return $OCF_SUCCESS fi ocf_log debug "notify: ${type} for ${op}" case $type in pre) case $op in promote) pgsql_pre_promote return $? ;; esac ;; post) case $op in promote) delete_xlog_location PROMOTE_NODE=`echo $OCF_RESKEY_CRM_meta_notify_promote_uname | \ sed "s/ /\n/g" | head -1` if [ "$PROMOTE_NODE" != "$NODENAME" ]; then delete_master_baseline fi return $OCF_SUCCESS ;; demote) pgsql_post_demote return $? ;; start|stop) if [ "$NODENAME " = "$OCF_RESKEY_CRM_meta_notify_master_uname" ]; then control_slave_status fi return $OCF_SUCCESS ;; esac ;; esac return $OCF_SUCCESS } control_slave_status() { local rc local data_status local target local all_data_status local tmp_data_status local node_name local number_of_nodes all_data_status=`su $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; \ $OCF_RESKEY_psql $psql_options -U $OCF_RESKEY_pgdba \ -Atc \"${CHECK_REPLICATION_STATE_SQL}\""` rc=$? if [ $rc -eq 0 ]; then if [ -n "$all_data_status" ]; then all_data_status=`echo $all_data_status | sed "s/\n/ /g"` fi else report_psql_error $rc warn return 1 fi - number_of_nodes=`echo $OCF_RESKEY_node_list | \ - sed -e "s/ */ /g" | sed -e "s/^ \| $//g" | \ - sed -e "s/ /\n/g" | wc -l` + number_of_nodes=`echo $OCF_RESKEY_node_list | wc -w` for target in $OCF_RESKEY_node_list; do if [ "$target" = "$NODENAME" ]; then continue fi data_status="DISCONNECT" if [ -n "$all_data_status" ]; then for tmp_data_status in $all_data_status; do node_name=`echo $tmp_data_status | cut -d "|" -f 1` state=`echo $tmp_data_status | cut -d "|" -f 2` sync_state=`echo $tmp_data_status | cut -d "|" -f 3` ocf_log debug "node=$node_name, state=$state, sync_state=$sync_state" if [ "$node_name" = "$target" ];then data_status="$state|$sync_state" break fi done fi case "$data_status" in "STREAMING|SYNC") change_data_status "$target" "$data_status" change_master_score "$target" "$CAN_PROMOTE" change_pgsql_status "$target" "HS:sync" ;; "STREAMING|ASYNC") change_data_status "$target" "$data_status" if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then change_master_score "$target" "$CAN_NOT_PROMOTE" if ! is_sync_mode "$target"; then set_sync_mode "$target" fi else if [ $number_of_nodes -le 2 ]; then change_master_score "$target" "$CAN_PROMOTE" else # I can't determine which slave's data is newest in async mode. change_master_score "$target" "$CAN_NOT_PROMOTE" fi fi change_pgsql_status "$target" "HS:async" ;; "STREAMING|POTENTIAL") change_data_status "$target" "$data_status" change_master_score "$target" "$CAN_NOT_PROMOTE" change_pgsql_status "$target" "HS:potential" ;; "DISCONNECT") change_data_status "$target" "$data_status" change_master_score "$target" "$CAN_NOT_PROMOTE" if [ "$OCF_RESKEY_rep_mode" = "sync" ] && \ is_sync_mode "$target"; then set_async_mode "$target" fi ;; *) change_data_status "$target" "$data_status" change_master_score "$target" "$CAN_NOT_PROMOTE" if [ "$OCF_RESKEY_rep_mode" = "sync" ] && \ is_sync_mode "$target"; then set_async_mode "$target" fi change_pgsql_status "$target" "HS:connected" ;; esac done return 0 } have_master_right() { local old local new local output local data_status local node local mylocation local count local newestXlog local oldfile local newfile ocf_log debug "Checking if I have a master right." data_status=`$CRM_ATTR_FOREVER -N "$NODENAME" -n \ "$PGSQL_DATA_STATUS_ATTR" -G -q` if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then if [ -n "$data_status" -a "$data_status" != "STREAMING|SYNC" -a \ "$data_status" != "LATEST" ]; then ocf_log warn "My data is out-of-date. status=$data_status" return 1 fi else if [ -n "$data_status" -a "$data_status" != "STREAMING|SYNC" -a \ "$data_status" != "STREAMING|ASYNC" -a \ "$data_status" != "LATEST" ]; then ocf_log warn "My data is out-of-date. status=$data_status" return 1 fi fi ocf_log info "My data status=$data_status." show_xlog_location if [ $? -ne 0 ]; then ocf_log err "Failed to show my xlog location." exit $OCF_ERR_GENERIC fi old=0 for count in `seq $OCF_RESKEY_xlog_check_count`; do if [ -f ${XLOG_NOTE_FILE}.$count ]; then old=$count continue fi break done new=`expr $old + 1` # get xlog locations of all nodes for node in ${OCF_RESKEY_node_list}; do output=`$CRM_ATTR_REBOOT -N "$node" -n \ "$PGSQL_XLOG_LOC_NAME" -G -q 2>/dev/null` if [ $? -ne 0 ]; then ocf_log warn "Can't get $node xlog location." continue else ocf_log info "$node xlog location : $output" echo "$node $output" >> ${XLOG_NOTE_FILE}.${new} if [ "$node" = "$NODENAME" ]; then mylocation=$output fi fi done oldfile=`cat ${XLOG_NOTE_FILE}.${old} 2>/dev/null` newfile=`cat ${XLOG_NOTE_FILE}.${new} 2>/dev/null` if [ "$oldfile" != "$newfile" ]; then # reset counter rm -f ${XLOG_NOTE_FILE}.* printf "$newfile\n" > ${XLOG_NOTE_FILE}.0 return 1 fi if [ "$new" -ge "$OCF_RESKEY_xlog_check_count" ]; then newestXlog=`printf "$newfile\n" | sort -t " " -k 2,3 -r | \ head -1 | cut -d " " -f 2` if [ "$newestXlog" = "$mylocation" ]; then ocf_log info "I have a master right." $CRM_MASTER -v $PROMOTE_ME return 0 fi change_data_status "$NODENAME" "DISCONNECT" ocf_log info "I don't have correct master data." # reset counter rm -f ${XLOG_NOTE_FILE}.* printf "$newfile\n" > ${XLOG_NOTE_FILE}.0 fi return 1 } is_replication() { if [ "$OCF_RESKEY_rep_mode" != "none" ]; then return 0 fi return 1 } get_my_location() { local rc local output local replay_loc local receive_loc local output1 local output2 local log1 local log2 local newer_location output=`su $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; \ $OCF_RESKEY_psql $psql_options -U $OCF_RESKEY_pgdba \ -Atc \"${CHECK_XLOG_LOC_SQL}\""` rc=$? if [ $rc -ne 0 ]; then report_psql_error $rc warn ocf_log err "Can't get my xlog location." return 1 fi replay_loc=`echo $output | cut -d "|" -f 1` receive_loc=`echo $output | cut -d "|" -f 2` output1=`echo "$replay_loc" | cut -d "/" -f 1` output2=`echo "$replay_loc" | cut -d "/" -f 2` log1=`printf "%08s\n" $output1 | sed "s/ /0/g"` log2=`printf "%08s\n" $output2 | sed "s/ /0/g"` replay_loc="${log1}${log2}" output1=`echo "$receive_loc" | cut -d "/" -f 1` output2=`echo "$receive_loc" | cut -d "/" -f 2` log1=`printf "%08s\n" $output1 | sed "s/ /0/g"` log2=`printf "%08s\n" $output2 | sed "s/ /0/g"` receive_loc="${log1}${log2}" newer_location=`printf "$replay_loc\n$receive_loc" | sort -r | head -1` echo "$newer_location" return 0 } show_xlog_location() { local location location=`get_my_location` || return 1 $CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_XLOG_LOC_NAME" -v "$location" - return $? } delete_xlog_location() { $CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_XLOG_LOC_NAME" -D - return $? } show_master_baseline() { local rc local location runasowner -q err "$OCF_RESKEY_psql $psql_options \ -U $OCF_RESKEY_pgdba -c 'CHECKPOINT'" rc=$? if [ $rc -ne 0 ]; then report_psql_error $rc warn fi location=`get_my_location` ocf_log info "My master baseline : $location." $CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_MASTER_BASELINE" -v "$location" - return $? } delete_master_baseline() { $CRM_ATTR_REBOOT -N "$NODENAME" -n "$PGSQL_MASTER_BASELINE" -D - return $? } set_async_mode_all() { [ "$OCF_RESKEY_rep_mode" = "sync" ] || return 0 ocf_log info "Set all nodes into async mode." runasowner -q err "echo "" > \"$REP_MODE_CONF\"" if [ $? -ne 0 ]; then ocf_log err "Can't set all nodes into async mode." return 1 fi return 0 } set_async_mode() { local sync_node_in_conf sync_node_in_conf=`cat $REP_MODE_CONF | cut -d "'" -f 2` if [ -n "$sync_node_in_conf" ]; then - if ! echo $sync_node_in_conf | grep "$1"; then - ocf_log info "$1 is already in async mode." - return 0 + ocf_log info "Setup $1 into async mode." + sync_node_in_conf=`echo $sync_node_in_conf | sed "s/$1//g" |\ + sed "s/^,//g" | sed "s/,,/,/g" | sed "s/,$//g"` + if [ -n $sync_node_in_conf ]; then + echo "synchronous_standby_names = '$sync_node_in_conf'" > "$REP_MODE_CONF" else - ocf_log info "Setup $1 into async mode." - sync_node_in_conf=`echo $sync_node_in_conf | sed "s/$1//g" |\ - sed "s/^,//g" | sed "s/,,/,/g" | sed "s/,$//g"` - if [ -n $sync_node_in_conf ]; then - echo "synchronous_standby_names = '$sync_node_in_conf'" > "$REP_MODE_CONF" - else - echo "" > "$REP_MODE_CONF" - fi + echo "" > "$REP_MODE_CONF" fi else ocf_log info "$1 is already in async mode." return 0 fi ocf_log info "All synced nodes : \"$sync_node_in_conf\"" reload_conf - return $? } set_sync_mode() { local sync_node_in_conf sync_node_in_conf=`cat $REP_MODE_CONF | cut -d "'" -f 2` if [ -n "$sync_node_in_conf" ]; then - if echo "$sync_node_in_conf" | grep "$1"; then - ocf_log info "$1 is already in sync mode." - return 0 - else - ocf_log info "Setup $1 into sync mode." - echo "synchronous_standby_names = '$sync_node_in_conf,$1'" > "$REP_MODE_CONF" - fi + ocf_log info "Setup $1 into sync mode." + echo "synchronous_standby_names = '$sync_node_in_conf,$1'" > "$REP_MODE_CONF" else ocf_log info "Setup $1 into sync mode." echo "synchronous_standby_names = '$1'" > "$REP_MODE_CONF" fi sync_node_in_conf=`cat $REP_MODE_CONF | cut -d "'" -f 2` ocf_log info "All synced nodes : \"$sync_node_in_conf\"" reload_conf - return $? } is_sync_mode() { - local target - - sync_node_in_conf=`cat $REP_MODE_CONF | cut -d "'" -f 2 | sed "s/,/ /g"` - if [ -n "$sync_node_in_conf" ]; then - for target in $sync_node_in_conf; do - if [ "$target" = "$1" ];then - return 0 - fi - done - fi - return 1 + cat $REP_MODE_CONF | grep -q -e "[,' ]$1[,' ]" } reload_conf() { # Invoke pg_ctl runasowner "$OCF_RESKEY_pgctl -D $OCF_RESKEY_pgdata reload" if [ $? -eq 0 ]; then ocf_log info "Reload configuration file." else ocf_log err "Can't reload configuration file." return 1 fi return 0 } make_recovery_conf() { runasowner "touch $RECOVERY_CONF" if [ $? -ne 0 ]; then ocf_log err "Can't create recovery.conf." return 1 fi cat > $RECOVERY_CONF </dev/null` if [ "$output" != "$2" ]; then # If slave's disk is broken, RA cannot read PID file # and misjudges the PostgreSQL as down while it is running. # It causes overwriting of pgsql-status by Master because replication is still connected. if [ "$output" = "STOP" -o "$output" = "UNKNOWN" ]; then if [ "$1" != "$NODENAME" ]; then ocf_log warn "Changing $PGSQL_STATUS_ATTR on $1 : $output->$2 by $NODENAME is prohibited." return 0 fi fi ocf_log info "Changing $PGSQL_STATUS_ATTR on $1 : $output->$2." $CRM_ATTR_REBOOT -N "$1" -n "$PGSQL_STATUS_ATTR" -v "$2" if [ $? -ne 0 ]; then ocf_log err "Can't change $PGSQL_STATUS_ATTR." return 1 fi fi return 0 } # change pgsql-data-status. # arg1:node, arg2: value change_data_status() { local output if ! node_exist $1; then return 0 fi while : do output=`$CRM_ATTR_FOREVER -N "$1" -n "$PGSQL_DATA_STATUS_ATTR" -G -q 2>/dev/null` if [ "$output" != "$2" ]; then ocf_log info "Changing $PGSQL_DATA_STATUS_ATTR on $1 : $output->$2." exec_func_with_timeout "$CRM_ATTR_FOREVER" "-N $1 -n \ $PGSQL_DATA_STATUS_ATTR -v \"$2\"" \ $OCF_RESKEY_crm_attr_timeout if [ $? -ne 0 ]; then ocf_log err "Can't change $PGSQL_DATA_STATUS_ATTR." return 1 fi else break fi done return 0 } # change master-score # arg1:node, arg2: score change_master_score() { local rsc local instance local current_score if ! is_node_online $1; then return 0 fi rsc=`echo $OCF_RESOURCE_INSTANCE | cut -d ":" -f 1` instance=0 while : do if [ "$instance" -ge "$OCF_RESKEY_CRM_meta_clone_max" ]; then break fi if [ "${rsc}:${instance}" = "$OCF_RESOURCE_INSTANCE" ]; then instance=`expr $instance + 1` continue fi current_score=`$CRM_ATTR_REBOOT -N "$1" -n "master-${rsc}:${instance}" -G -q 2>/dev/null` if [ -n "$current_score" -a "$current_score" != "$2" ]; then ocf_log info "Changing ${rsc}:${instance} master score on $1 : $current_score->$2." $CRM_ATTR_REBOOT -N "$target" -n "master-${rsc}:${instance}" -v "$2" if [ $? -ne 0 ]; then ocf_log err "Can't change master score." return 1 fi fi instance=`expr $instance + 1` done return 0 } report_psql_error() { local rc local loglevel rc=$1 loglevel=${2:-err} ocf_log $loglevel "PostgreSQL $OCF_RESKEY_pgdb isn't running" if [ $rc -eq 1 ]; then ocf_log err "Fatal error (out of memory, file not found, etc.) occurred while executing the psql command." elif [ $rc -eq 2 ]; then ocf_log $loglevel "Connection error (connection to the server went bad and the session was not interactive) occurred while executing the psql command." elif [ $rc -eq 3 ]; then ocf_log err "Script error (the variable ON_ERROR_STOP was set) occurred while executing the psql command." fi } # # timeout management function # arg1 : command # arg2 : command's args # arg3 : timeout(s) # exec_func_with_timeout() { local func_pid local count local rc $1 `eval echo $2` & func_pid=$! count=0 while kill -s 0 $func_pid >/dev/null 2>&1; do sleep 1 count=`expr $count + 1` if [ $count -ge $3 ]; then ocf_log debug "Execute $1 time out." kill -s 9 $func_pid >/dev/null 2>&1 return 0 fi done wait $func_pid - rc=$? - return $rc } is_node_online() { crm_mon -1 -n | grep -e "^Node $1 " -e "^Node $1:" | grep -q -v "OFFLINE" - return $? } node_exist() { crm_mon -1 -n | grep -q "^Node $1" - return $? } check_binary2() { if ! have_binary "$1"; then ocf_log err "Setup problem: couldn't find command: $1" return 1 fi return 0 } check_config() { local rc=0 if [ ! -f "$1" ]; then if ocf_is_probe; then ocf_log info "Configuration file is $1 not readable during probe." rc=1 else ocf_log err "Configuration file $1 doesn't exist" rc=2 fi fi return $rc } # Validate most critical parameters pgsql_validate_all() { local version if ! check_binary2 "$OCF_RESKEY_pgctl" || ! check_binary2 "$OCF_RESKEY_psql"; then return $OCF_ERR_INSTALLED fi if [ -n "$OCF_RESKEY_config" -a ! -f "$OCF_RESKEY_config" ]; then check_config "$OCF_RESKEY_config" [ $? -eq 2 ] && return $OCF_ERR_INSTALLED fi getent passwd $OCF_RESKEY_pgdba >/dev/null 2>&1 if [ ! $? -eq 0 ]; then ocf_log err "User $OCF_RESKEY_pgdba doesn't exist"; return $OCF_ERR_INSTALLED; fi if ocf_is_probe; then ocf_log info "Don't check $OCF_RESKEY_pgdata during probe" else if ! runasowner "test -w $OCF_RESKEY_pgdata"; then ocf_log err "Directory $OCF_RESKEY_pgdata is not writable by $OCF_RESKEY_pgdba" return $OCF_ERR_PERM; fi fi if [ -n "$OCF_RESKEY_monitor_user" -a ! -n "$OCF_RESKEY_monitor_password" ] then ocf_log err "monitor password can't be empty" return $OCF_ERR_CONFIGURED fi if [ ! -n "$OCF_RESKEY_monitor_user" -a -n "$OCF_RESKEY_monitor_password" ] then ocf_log err "monitor_user has to be set if monitor_password is set" return $OCF_ERR_CONFIGURED fi if is_replication; then version=`cat $OCF_RESKEY_pgdata/PG_VERSION` if [ `printf "$version\n9.1" | sort -n | head -1` != "9.1" ]; then ocf_log err "Replication mode needs PostgreSQL 9.1 or higher." return $OCF_ERR_INSTALLED fi if ! ocf_is_ms; then ocf_log err "Replication requires Master/Slave configuration." return $OCF_ERR_CONFIGURED fi if [ ! "$OCF_RESKEY_rep_mode" = "sync" -a ! "$OCF_RESKEY_rep_mode" = "async" ]; then ocf_log err "Invalid rep_mode : $OCF_RESKEY_rep_mode" return $OCF_ERR_CONFIGURED fi if [ ! -n "$OCF_RESKEY_master_ip" ]; then ocf_log err "master_ip can't be empty." return $OCF_ERR_CONFIGURED fi if [ ! -n "$OCF_RESKEY_node_list" ]; then ocf_log err "node_list can't be empty." return $OCF_ERR_CONFIGURED fi if [ "$OCF_RESKEY_rep_mode" = "sync" ]; then if ! grep -q "include '$REP_MODE_CONF' # added by pgsql RA" $OCF_RESKEY_pgdata/postgresql.conf; then echo "include '$REP_MODE_CONF' # added by pgsql RA" >> $OCF_RESKEY_pgdata/postgresql.conf fi fi if ! mkdir -p $OCF_RESKEY_tmpdir || ! chown $OCF_RESKEY_pgdba $OCF_RESKEY_tmpdir || ! chmod 700 $OCF_RESKEY_tmpdir; then ocf_log err "Can't create directory $OCF_RESKEY_tmpdir or it is not readable by $OCF_RESKEY_pgdba" return $OCF_ERR_PERM fi fi return $OCF_SUCCESS } # # Check if we need to create a log file # check_log_file() { if [ ! -f "$1" ] then touch $1 > /dev/null 2>&1 chown $OCF_RESKEY_pgdba:`getent passwd $OCF_RESKEY_pgdba | cut -d ":" -f 4` $1 fi #Check if $OCF_RESKEY_pgdba can write to the log file if ! runasowner "test -w $1" then return 1 fi return 0 } # # Check socket directory # check_socket_dir() { if [ ! -d "$OCF_RESKEY_socketdir" ]; then if ! mkdir "$OCF_RESKEY_socketdir"; then ocf_log err "Can't create directory $OCF_RESKEY_socketdir" exit $OCF_ERR_PERM fi if ! chown $OCF_RESKEY_pgdba:`getent passwd \ $OCF_RESKEY_pgdba | cut -d ":" -f 4` "$OCF_RESKEY_socketdir" then ocf_log err "Can't change ownership for $OCF_RESKEY_socketdir" exit $OCF_ERR_PERM fi if ! chmod 2775 "$OCF_RESKEY_socketdir"; then ocf_log err "Can't change permissions for $OCF_RESKEY_socketdir" exit $OCF_ERR_PERM fi else if ! runasowner "touch $OCF_RESKEY_socketdir/test.$$"; then ocf_log err "$OCF_RESKEY_pgdba can't create files in $OCF_RESKEY_socketdir" exit $OCF_ERR_PERM fi rm $OCF_RESKEY_socketdir/test.$$ fi } # # 'main' starts here... # if [ $# -ne 1 ] then usage exit $OCF_ERR_GENERIC fi PIDFILE=${OCF_RESKEY_pgdata}/postmaster.pid BACKUPLABEL=${OCF_RESKEY_pgdata}/backup_label if is_replication; then RECOVERY_CONF=${OCF_RESKEY_pgdata}/recovery.conf REP_MODE_CONF=${OCF_RESKEY_tmpdir}/rep_mode.conf PGSQL_LOCK=${OCF_RESKEY_tmpdir}/PGSQL.lock XLOG_NOTE_FILE=${OCF_RESKEY_tmpdir}/xlog_note CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot" CRM_ATTR_REBOOT="${HA_SBIN_DIR}/crm_attribute -l reboot" CRM_ATTR_FOREVER="${HA_SBIN_DIR}/crm_attribute -l forever" CRM_FAILCOUNT="${HA_SBIN_DIR}/crm_failcount" CAN_NOT_PROMOTE="-INFINITY" CAN_PROMOTE="100" PROMOTE_ME="1000" CHECK_MS_SQL="select pg_is_in_recovery()" CHECK_XLOG_LOC_SQL="select pg_last_xlog_replay_location(),pg_last_xlog_receive_location()" CHECK_REPLICATION_STATE_SQL="select application_name,upper(state),upper(sync_state) from pg_stat_replication" PGSQL_STATUS_ATTR="pgsql-status" PGSQL_DATA_STATUS_ATTR="pgsql-data-status" PGSQL_XLOG_LOC_NAME="pgsql-xlog-loc" PGSQL_MASTER_BASELINE="pgsql-master-baseline" NODENAME=`uname -n` OPERATION=$1 fi case "$1" in methods) pgsql_methods exit $?;; meta-data) meta_data exit $OCF_SUCCESS;; esac # $OCF_RESKEY_pgdata has to be initialized at this momemnt : ${OCF_RESKEY_socketdir=`get_pgsql_param unix_socket_directory`} pgsql_validate_all rc=$? [ "$1" = "validate-all" ] && exit $rc if [ $rc -ne 0 ] then case "$1" in stop) exit $OCF_SUCCESS;; monitor) exit $OCF_NOT_RUNNING;; status) exit $OCF_NOT_RUNNING;; *) exit $rc;; esac fi US=`id -u -n` if [ $US != root -a $US != $OCF_RESKEY_pgdba ] then ocf_log err "$0 must be run as root or $OCF_RESKEY_pgdba" exit $OCF_ERR_GENERIC fi # make psql command options if [ -n "$OCF_RESKEY_monitor_user" ]; then PGUSER=$OCF_RESKEY_monitor_user; export PGUSER PGPASSWORD=$OCF_RESKEY_monitor_password; export PGPASSWORD psql_options="-p $OCF_RESKEY_pgport $OCF_RESKEY_pgdb" else psql_options="-p $OCF_RESKEY_pgport -U $OCF_RESKEY_pgdba $OCF_RESKEY_pgdb" fi if [ -n "$OCF_RESKEY_pghost" ]; then psql_options="$psql_options -h $OCF_RESKEY_pghost" else if [ -n "$OCF_RESKEY_socketdir" ]; then psql_options="$psql_options -h $OCF_RESKEY_socketdir" fi fi # What kind of method was invoked? case "$1" in status) if pgsql_status then ocf_log info "PostgreSQL is up" exit $OCF_SUCCESS else ocf_log info "PostgreSQL is down" exit $OCF_NOT_RUNNING fi;; monitor) pgsql_monitor exit $?;; start) pgsql_start exit $?;; promote) pgsql_promote exit $?;; demote) pgsql_demote exit $?;; notify) pgsql_notify exit $?;; stop) pgsql_stop exit $?;; *) exit $OCF_ERR_UNIMPLEMENTED;; esac