diff --git a/heartbeat/galera.in b/heartbeat/galera.in index b518595cb..b29d68bf7 100755 --- a/heartbeat/galera.in +++ b/heartbeat/galera.in @@ -1,1106 +1,1105 @@ #!@BASH_SHELL@ # # Copyright (c) 2014 David Vossel # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ## # README. # # This agent only supports being configured as a multistate Promoted # resource. # # Unpromoted vs Promoted role: # # During the 'Unpromoted' role, galera instances are in read-only mode and # will not attempt to connect to the cluster. This role exists only as # a means to determine which galera instance is the most up-to-date. The # most up-to-date node will be used to bootstrap a galera cluster that # has no current members. # # The galera instances will only begin to be promoted to the Promoted role # once all the nodes in the 'wsrep_cluster_address' connection address # have entered read-only mode. At that point the node containing the # database that is most current will be promoted to Promoted. Once the first # Promoted instance bootstraps the galera cluster, the other nodes will be # promoted to Promoted as well. # # Example: Create a galera cluster using nodes rhel7-node1 rhel7-node2 rhel7-node3 # # pcs resource create db galera enable_creation=true \ # wsrep_cluster_address="gcomm://rhel7-auto1,rhel7-auto2,rhel7-auto3" meta promoted-max=3 --promoted # # By setting the 'enable_creation' option, the database will be automatically # generated at startup. The meta attribute 'promoted-max=3' means that all 3 # nodes listed in the wsrep_cluster_address list will be allowed to connect # to the galera cluster and perform replication. # # NOTE: If you have more nodes in the pacemaker cluster then you wish # to have in the galera cluster, make sure to use location contraints to prevent # pacemaker from attempting to place a galera instance on a node that is # not in the 'wsrep_cluster_address" list. # ## ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs if [ "$__OCF_ACTION" != "meta-data" ]; then . ${OCF_FUNCTIONS_DIR}/mysql-common.sh NODENAME=$(ocf_attribute_target) fi # It is common for some galera instances to store # check user that can be used to query status # in this file if [ -f "/etc/sysconfig/clustercheck" ]; then . /etc/sysconfig/clustercheck elif [ -f "/etc/default/clustercheck" ]; then . /etc/default/clustercheck fi # Parameter defaults OCF_RESKEY_wsrep_cluster_address_default="" OCF_RESKEY_cluster_host_map_default="" OCF_RESKEY_check_user_default="" OCF_RESKEY_check_passwd_default="" OCF_RESKEY_two_node_mode_default="false" : ${OCF_RESKEY_wsrep_cluster_address=${OCF_RESKEY_wsrep_cluster_address_default}} : ${OCF_RESKEY_cluster_host_map=${OCF_RESKEY_cluster_host_map_default}} : ${OCF_RESKEY_check_user=${OCF_RESKEY_check_user_default}} : ${OCF_RESKEY_check_passwd=${OCF_RESKEY_check_passwd_default}} : ${OCF_RESKEY_two_node_mode=${OCF_RESKEY_two_node_mode_default}} ####################################################################### # Defaults: OCF_RESKEY_check_passwd_use_empty_default=0 : ${OCF_RESKEY_check_passwd_use_empty=${OCF_RESKEY_check_passwd_use_empty_default}} ####################################################################### usage() { cat < 1.0 Resource script for managing galera database. Manages a galera instance Location of the MySQL server binary MySQL server binary Location of the MySQL client binary MySQL client binary Configuration file MySQL config Directory containing databases MySQL datadir User running MySQL daemon MySQL user Group running MySQL daemon (for logfile and directory permissions) MySQL group The logfile to be used for mysqld. MySQL log file The pidfile to be used for mysqld. MySQL pid file The socket to be used for mysqld. MySQL socket If the MySQL database does not exist, it will be created Create the database if it does not exist Additional parameters which are passed to the mysqld on startup. (e.g. --skip-external-locking or --skip-grant-tables) Additional parameters to pass to mysqld The galera cluster address. This takes the form of: gcomm://node,node,node Only nodes present in this node list will be allowed to start a galera instance. The galera node names listed in this address are expected to match valid pacemaker node names. If both names need to differ, you must provide a mapping in option cluster_host_map. Galera cluster address A mapping of pacemaker node names to galera node names. To be used when both pacemaker and galera names need to differ, (e.g. when galera names map to IP from a specific network interface) This takes the form of: pcmk1:node.1.galera;pcmk2:node.2.galera;pcmk3:node.3.galera where the galera resource started on node pcmk1 would be named node.1.galera in the wsrep_cluster_address Pacemaker to Galera name mapping Cluster check user. MySQL test user Cluster check user password. Empty passwords are ignored unless the parameter "check_passwd_use_empty" is set to 1. check password Use an empty "check_passwd" password. If this parameter is set to 1, "check_passwd" will be ignored and an empty password is used when calling the "mysql" client binary. check password use empty If running in a 2-node pacemaker cluster, rely on pacemaker quorum to allow automatic recovery even when the other node is unreachable. Use it with caution! (and fencing) Special recovery when running on a 2-node cluster - END } get_option_variable() { local key=$1 $MYSQL $MYSQL_OPTIONS_CHECK -e "SHOW VARIABLES like '$key';" | tail -1 } get_status_variable() { local key=$1 $MYSQL $MYSQL_OPTIONS_CHECK -e "show status like '$key';" | tail -1 } set_bootstrap_node() { local node=$(ocf_attribute_target $1) ${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-bootstrap" -v "true" } clear_bootstrap_node() { ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-bootstrap" -D } is_bootstrap() { ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-bootstrap" --quiet 2>/dev/null } set_no_grastate() { ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-no-grastate" -v "true" } clear_no_grastate() { ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-no-grastate" -D } is_no_grastate() { local node=$(ocf_attribute_target $1) ${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-no-grastate" --quiet 2>/dev/null } clear_last_commit() { ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" -D } set_last_commit() { ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" -v $1 } get_last_commit() { local node=$(ocf_attribute_target $1) if [ -z "$node" ]; then ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" --quiet 2>/dev/null else ${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" --quiet 2>/dev/null fi } clear_safe_to_bootstrap() { ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-safe-to-bootstrap" -D } set_safe_to_bootstrap() { ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-safe-to-bootstrap" -v $1 } get_safe_to_bootstrap() { local node=$(ocf_attribute_target $1) if [ -z "$node" ]; then ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-safe-to-bootstrap" --quiet 2>/dev/null else ${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-safe-to-bootstrap" --quiet 2>/dev/null fi } wait_for_sync() { local state=$(get_status_variable "wsrep_local_state") ocf_log info "Waiting for database to sync with the cluster. " while [ "$state" != "4" ]; do sleep 1 state=$(get_status_variable "wsrep_local_state") done ocf_log info "Database synced." } is_primary() { cluster_status=$(get_status_variable "wsrep_cluster_status") if [ "$cluster_status" = "Primary" ]; then return 0 fi if [ -z "$cluster_status" ]; then ocf_exit_reason "Unable to retrieve wsrep_cluster_status, verify check_user '$OCF_RESKEY_check_user' has permissions to view status" else ocf_log info "Galera instance wsrep_cluster_status=${cluster_status}" fi return 1 } is_readonly() { local res=$(get_option_variable "read_only") if ! ocf_is_true "$res"; then return 1 fi cluster_status=$(get_status_variable "wsrep_cluster_status") if ! [ "$cluster_status" = "Disconnected" ]; then return 1 fi return 0 } is_two_node_mode_active() { # crm_node or corosync-quorumtool cannot access various corosync # flags when running inside a bundle, so only count the cluster # members ocf_is_true "$OCF_RESKEY_two_node_mode" && crm_mon_no_validation -1X | xmllint --xpath "count(//nodes/node[@type='member'])" - | grep -q -w 2 } is_last_node_in_quorate_partition() { # when a network split occurs in a 2-node cluster, pacemaker # fences the other node and try to retain quorum. So until # the fencing is resolved (and the status of the peer node # is clean), we shouldn't consider ourself quorate. local partition_members=$(${HA_SBIN_DIR}/crm_node -p | wc -w) local quorate=$(${HA_SBIN_DIR}/crm_node -q) local clean_members=$(crm_mon_no_validation -1X | xmllint --xpath 'count(//nodes/node[@type="member" and @unclean="false"])' -) [ "$partition_members" = 1 ] && [ "$quorate" = 1 ] && [ "$clean_members" = 2 ] } master_exists() { if [ "$__OCF_ACTION" = "demote" ]; then # We don't want to detect master instances during demote. # 1. we could be detecting ourselves as being master, which is no longer the case. # 2. we could be detecting other master instances that are in the process of shutting down. # by not detecting other master instances in "demote" we are deferring this check # to the next recurring monitor operation which will be much more accurate return 1 fi # determine if a master instance is already up and is healthy ocf_version_cmp "$OCF_RESKEY_crm_feature_set" "3.1.0" res=$? if [ -z "$OCF_RESKEY_crm_feature_set" ] || [ $res -eq 2 ]; then XMLOPT="--output-as=xml" ocf_version_cmp "$OCF_RESKEY_crm_feature_set" "3.2.0" if [ $? -eq 1 ]; then crm_mon_no_validation -1 $XMLOPT >/dev/null 2>&1 if [ $? -ne 0 ]; then XMLOPT="--as-xml" fi fi else XMLOPT="--as-xml" fi crm_mon_no_validation -1 $XMLOPT | grep -q -i -E "resource.*id=\"${INSTANCE_ATTR_NAME}\".*role=\"(Promoted|Master)\".*active=\"true\".*orphaned=\"false\".*failed=\"false\"" return $? } clear_master_score() { local node=$(ocf_attribute_target $1) if [ -z "$node" ]; then ocf_promotion_score -D else ocf_promotion_score -D -N $node fi } set_master_score() { local node=$(ocf_attribute_target $1) if [ -z "$node" ]; then ocf_promotion_score -v 100 else ocf_promotion_score -N $node -v 100 fi } promote_everyone() { for node in $(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' '); do local pcmk_node=$(galera_to_pcmk_name $node) if [ -z "$pcmk_node" ]; then ocf_log err "Could not determine pacemaker node from galera name <${node}>." return else node=$pcmk_node fi set_master_score $node done } greater_than_equal_long() { # there are values we need to compare in this script # that are too large for shell -gt to process echo | awk -v n1="$1" -v n2="$2" '{if (n1>=n2) printf ("true"); else printf ("false");}' | grep -q "true" } galera_to_pcmk_name() { local galera=$1 if [ -z "$OCF_RESKEY_cluster_host_map" ]; then echo $galera else echo "$OCF_RESKEY_cluster_host_map" | tr ';' '\n' | tr -d ' ' | sed 's/:/ /' | awk -F' ' '$2=="'"$galera"'" {print $1;exit}' fi } pcmk_to_galera_name() { local pcmk=$1 if [ -z "$OCF_RESKEY_cluster_host_map" ]; then echo $pcmk else echo "$OCF_RESKEY_cluster_host_map" | tr ';' '\n' | tr -d ' ' | sed 's/:/ /' | awk -F' ' '$1=="'"$pcmk"'" {print $2;exit}' fi } detect_first_master() { local best_commit=0 local last_commit=0 local missing_nodes=0 local nodes="" local nodes_recovered="" local all_nodes local best_node_gcomm local best_node local safe_to_bootstrap all_nodes=$(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' ') best_node_gcomm=$(echo "$all_nodes" | sed 's/^.* \(.*\)$/\1/') best_node=$(galera_to_pcmk_name $best_node_gcomm) if [ -z "$best_node" ]; then ocf_log err "Could not determine initial best node from galera name <${best_node_gcomm}>." return fi # avoid selecting a recovered node as bootstrap if possible for node in $all_nodes; do local pcmk_node=$(galera_to_pcmk_name $node) if [ -z "$pcmk_node" ]; then ocf_log err "Could not determine pacemaker node from galera name <${node}>." return else node=$pcmk_node fi if is_no_grastate $node; then nodes_recovered="$nodes_recovered $node" else nodes="$nodes $node" fi done for node in $nodes_recovered $nodes; do # On clean shutdown, galera sets the last stopped node as 'safe to bootstrap', # so use this hint when we can safe_to_bootstrap=$(get_safe_to_bootstrap $node) # Special case for 2-node clusters: during a network split, rely on # pacemaker's quorum to check whether we can restart galera if [ "$safe_to_bootstrap" != "1" ] && [ "$node" = "$NODENAME" ] && is_two_node_mode_active; then is_last_node_in_quorate_partition if [ $? -eq 0 ]; then ocf_log warn "Survived a split in a 2-node cluster, considering ourselves safe to bootstrap" safe_to_bootstrap=1 fi fi if [ "$safe_to_bootstrap" = "1" ]; then # Galera marked the node as safe to boostrap during shutdown. Let's just # pick it as our bootstrap node. ocf_log info "Node <${node}> is marked as safe to bootstrap." best_node=$node # We don't need to wait for the other nodes to report state in this case missing_nodes=0 break fi last_commit=$(get_last_commit $node) if [ -z "$last_commit" ]; then ocf_log info "Waiting on node <${node}> to report database status before Master instances can start." missing_nodes=1 continue fi # this means -1, or that no commit has occured yet. if [ "$last_commit" = "18446744073709551615" ]; then last_commit="0" fi greater_than_equal_long "$last_commit" "$best_commit" if [ $? -eq 0 ]; then best_node=$(ocf_attribute_target $node) best_commit=$last_commit fi done if [ $missing_nodes -eq 1 ]; then return fi ocf_log info "Promoting $best_node to be our bootstrap node" set_bootstrap_node $best_node set_master_score $best_node } detect_safe_to_bootstrap() { local safe_to_bootstrap="" local uuid="" local seqno="" if [ -f ${OCF_RESKEY_datadir}/grastate.dat ]; then ocf_log info "attempting to read safe_to_bootstrap flag from ${OCF_RESKEY_datadir}/grastate.dat" safe_to_bootstrap=$(sed -n 's/^safe_to_bootstrap:\s*\(.*\)$/\1/p' < ${OCF_RESKEY_datadir}/grastate.dat) uuid=$(sed -n 's/^uuid:\s*\(.*\)$/\1/p' < ${OCF_RESKEY_datadir}/grastate.dat) seqno=$(sed -n 's/^seqno:\s*\(.*\)$/\1/p' < ${OCF_RESKEY_datadir}/grastate.dat) fi if [ -z "$uuid" ] || \ [ "$uuid" = "00000000-0000-0000-0000-000000000000" ]; then clear_safe_to_bootstrap return fi if [ "$safe_to_bootstrap" = "1" ]; then if [ -z "$seqno" ] || [ "$seqno" = "-1" ]; then clear_safe_to_bootstrap return fi fi if [ "$safe_to_bootstrap" = "1" ] || [ "$safe_to_bootstrap" = "0" ]; then set_safe_to_bootstrap $safe_to_bootstrap else clear_safe_to_bootstrap fi } detect_last_commit() { local last_commit local recover_args="--defaults-file=$OCF_RESKEY_config \ --pid-file=$OCF_RESKEY_pid \ --socket=$OCF_RESKEY_socket \ --datadir=$OCF_RESKEY_datadir" local recovery_file_regex='s/.*WSREP\:.*position\s*recovery.*--log_error='\''\([^'\'']*\)'\''.*/\1/p' local recovered_position_regex='s/.*WSREP\:\s*[R|r]ecovered\s*position.*\:\(.*\)\s*$/\1/p' # codership/galera#354 # Some ungraceful shutdowns can leave an empty gvwstate.dat on # disk. This will prevent galera to join the cluster if it is # configured to attempt PC recovery. Removing that file makes the # node fall back to the normal, unoptimized joining process. if [ -f ${OCF_RESKEY_datadir}/gvwstate.dat ] && \ [ ! -s ${OCF_RESKEY_datadir}/gvwstate.dat ]; then ocf_log warn "empty ${OCF_RESKEY_datadir}/gvwstate.dat detected, removing it to prevent PC recovery failure at next restart" rm -f ${OCF_RESKEY_datadir}/gvwstate.dat fi ocf_log info "attempting to detect last commit version by reading ${OCF_RESKEY_datadir}/grastate.dat" last_commit="$(cat ${OCF_RESKEY_datadir}/grastate.dat | sed -n 's/^seqno.\s*\(.*\)\s*$/\1/p')" if [ -z "$last_commit" ] || [ "$last_commit" = "-1" ]; then local tmp=$(mktemp) chown $OCF_RESKEY_user:$OCF_RESKEY_group $tmp # if we pass here because grastate.dat doesn't exist, # try not to bootstrap from this node if possible if [ ! -f ${OCF_RESKEY_datadir}/grastate.dat ]; then set_no_grastate fi ocf_log info "now attempting to detect last commit version using 'mysqld_safe --wsrep-recover'" $SU - $OCF_RESKEY_user -s /bin/sh -c \ "${OCF_RESKEY_binary} $recover_args --wsrep-recover --log-error=$tmp 2>/dev/null" last_commit="$(cat $tmp | sed -n $recovered_position_regex | tail -1)" if [ -z "$last_commit" ]; then # Galera uses InnoDB's 2pc transactions internally. If # server was stopped in the middle of a replication, the # recovery may find a "prepared" XA transaction in the # redo log, and mysql won't recover automatically local recovery_file="$(cat $tmp | sed -n $recovery_file_regex)" if [ -e $recovery_file ]; then cat $recovery_file | grep -q -E '\[ERROR\]\s+Found\s+[0-9]+\s+prepared\s+transactions!' 2>/dev/null if [ $? -eq 0 ]; then # we can only rollback the transaction, but that's OK # since the DB will get resynchronized anyway ocf_log warn "local node <${NODENAME}> was not shutdown properly. Rollback stuck transaction with --tc-heuristic-recover" $SU - $OCF_RESKEY_user -s /bin/sh -c \ "${OCF_RESKEY_binary} $recover_args --wsrep-recover \ --tc-heuristic-recover=rollback --log-error=$tmp 2>/dev/null" last_commit="$(cat $tmp | sed -n $recovered_position_regex | tail -1)" if [ ! -z "$last_commit" ]; then ocf_log warn "State recovered. force SST at next restart for full resynchronization" rm -f ${OCF_RESKEY_datadir}/grastate.dat # try not to bootstrap from this node if possible set_no_grastate fi fi fi fi rm -f $tmp fi if [ ! -z "$last_commit" ]; then ocf_log info "Last commit version found: $last_commit" set_last_commit $last_commit return $OCF_SUCCESS else ocf_exit_reason "Unable to detect last known write sequence number" clear_last_commit return $OCF_ERR_GENERIC fi } # For galera, promote is really start galera_promote() { local rc local extra_opts local bootstrap local safe_to_bootstrap master_exists if [ $? -eq 0 ]; then # join without bootstrapping extra_opts="--wsrep-cluster-address=${OCF_RESKEY_wsrep_cluster_address}" else bootstrap=$(is_bootstrap) if ocf_is_true $bootstrap; then # The best node for bootstrapping wasn't cleanly shutdown. Allow # bootstrapping anyways if [ "$(get_safe_to_bootstrap)" = "0" ]; then sed -ie 's/^\(safe_to_bootstrap:\) 0/\1 1/' ${OCF_RESKEY_datadir}/grastate.dat ocf_log info "safe_to_bootstrap in ${OCF_RESKEY_datadir}/grastate.dat set to 1 on node ${NODENAME}" fi ocf_log info "Node <${NODENAME}> is bootstrapping the cluster" extra_opts="--wsrep-cluster-address=gcomm://" else # We are being promoted without having the bootstrap # attribute in the CIB, which means we are supposed to # join a cluster; however if we end up here, there is no # Master remaining right now, which means there is no # cluster to join anymore. So force a demotion, and and # let the RA decide later which node should be the next # bootstrap node. ocf_log warn "There is no running cluster to join, demoting ourself" clear_master_score return $OCF_SUCCESS fi fi galera_monitor if [ $? -eq $OCF_RUNNING_MASTER ]; then if ocf_is_true $bootstrap; then promote_everyone clear_bootstrap_node ocf_log info "boostrap node already up, promoting the rest of the galera instances." fi clear_safe_to_bootstrap clear_last_commit return $OCF_SUCCESS fi # last commit/safe_to_bootstrap flag are no longer relevant once promoted clear_last_commit clear_safe_to_bootstrap mysql_common_prepare_dirs mysql_common_start "$extra_opts" rc=$? if [ $rc != $OCF_SUCCESS ]; then return $rc fi # At this point, the mysql pidfile is created on disk and the # mysql server is reacheable via its UNIX socket. If we are a # joiner, SST transfers (rsync) have finished, but an IST may # still be requested or ongoing galera_monitor rc=$? if [ $rc != $OCF_SUCCESS -a $rc != $OCF_RUNNING_MASTER ]; then ocf_exit_reason "Failed initial monitor action" return $rc fi is_readonly if [ $? -eq 0 ]; then ocf_exit_reason "Failure. Master instance started in read-only mode, check configuration." return $OCF_ERR_GENERIC fi if ocf_is_true $bootstrap; then promote_everyone clear_bootstrap_node # clear attribute no-grastate. if last shutdown was # not clean, we cannot be extra-cautious by requesting a SST # since this is the bootstrap node clear_no_grastate ocf_log info "Bootstrap complete, promoting the rest of the galera instances." else # if this is not the bootstrap node, make sure this instance # syncs with the rest of the cluster before promotion returns. wait_for_sync # sync is done, clear info about last startup clear_no_grastate fi ocf_log info "Galera started" return $OCF_SUCCESS } galera_demote() { mysql_common_stop rc=$? if [ $rc -ne $OCF_SUCCESS ] && [ $rc -ne $OCF_NOT_RUNNING ]; then ocf_exit_reason "Failed to stop Master galera instance during demotion to Master" return $rc fi # if this node was previously a bootstrap node, that is no longer the case. clear_bootstrap_node clear_last_commit clear_no_grastate clear_safe_to_bootstrap # Clear master score here rather than letting pacemaker do so once # demote finishes. This way a promote cannot take place right # after this demote even if pacemaker is requested to do so. It # will first have to run a start/monitor op, to reprobe the state # of the other galera nodes and act accordingly. clear_master_score # record last commit for next promotion detect_safe_to_bootstrap detect_last_commit rc=$? return $rc } galera_start() { local rc local galera_node galera_node=$(pcmk_to_galera_name $NODENAME) if [ -z "$galera_node" ]; then ocf_exit_reason "Could not determine galera name from pacemaker node <${NODENAME}>." return $OCF_ERR_CONFIGURED fi echo $OCF_RESKEY_wsrep_cluster_address | grep -q -F $galera_node if [ $? -ne 0 ]; then ocf_exit_reason "local node <${NODENAME}> (galera node <${galera_node}>) must be a member of the wsrep_cluster_address <${OCF_RESKEY_wsrep_cluster_address}> to start this galera instance" return $OCF_ERR_CONFIGURED fi galera_monitor if [ $? -eq $OCF_RUNNING_MASTER ]; then ocf_exit_reason "master galera instance started outside of the cluster's control" return $OCF_ERR_GENERIC fi mysql_common_prepare_dirs detect_safe_to_bootstrap detect_last_commit rc=$? if [ $rc -ne $OCF_SUCCESS ]; then return $rc fi master_exists if [ $? -eq 0 ]; then ocf_log info "Master instances are already up, setting master score so this instance will join galera cluster." set_master_score $NODENAME else clear_master_score detect_first_master fi return $OCF_SUCCESS } galera_monitor() { local rc local galera_node local status_loglevel="err" # Set loglevel to info during probe if ocf_is_probe; then status_loglevel="info" fi mysql_common_status $status_loglevel rc=$? if [ $rc -eq $OCF_NOT_RUNNING ]; then last_commit=$(get_last_commit $node) if [ -n "$last_commit" ]; then # if last commit is set, this instance is considered started in slave mode rc=$OCF_SUCCESS master_exists if [ $? -ne 0 ]; then detect_first_master else # a master instance exists and is healthy, promote this # local read only instance # so it can join the master galera cluster. set_master_score fi fi return $rc elif [ $rc -ne $OCF_SUCCESS ]; then return $rc fi # if we make it here, mysql is running. Check cluster status now. galera_node=$(pcmk_to_galera_name $NODENAME) if [ -z "$galera_node" ]; then ocf_exit_reason "Could not determine galera name from pacemaker node <${NODENAME}>." return $OCF_ERR_CONFIGURED fi echo $OCF_RESKEY_wsrep_cluster_address | grep -q -F $galera_node if [ $? -ne 0 ]; then ocf_exit_reason "local node <${NODENAME}> (galera node <${galera_node}>) is started, but is not a member of the wsrep_cluster_address <${OCF_RESKEY_wsrep_cluster_address}>" return $OCF_ERR_GENERIC fi is_primary if [ $? -eq 0 ]; then if ocf_is_probe; then # restore master score during probe # if we detect this is a master instance set_master_score fi rc=$OCF_RUNNING_MASTER else # It seems that with recent galera (26.4+), a joiner that is # connected to a Primary component and is preparing its IST # request might still temporarily report its state as # Non-Primary. Do not fail in this case as the promote # operation will loop until the IST finishes or the promote # times out. if [ "$__OCF_ACTION" = "promote" ] && ! ocf_is_true $(is_bootstrap); then ocf_log info "local node <${NODENAME}> is receiving a State Transfer." else ocf_exit_reason "local node <${NODENAME}> is started, but not in primary mode. Unknown state." rc=$OCF_ERR_GENERIC fi fi return $rc } galera_stop() { local rc # make sure the process is stopped mysql_common_stop rc=$1 clear_safe_to_bootstrap clear_last_commit clear_master_score clear_bootstrap_node clear_no_grastate return $rc } galera_validate() { if [ "$OCF_CHECK_LEVEL" -eq 10 ]; then if ! ocf_is_ms; then ocf_exit_reason "Galera must be configured as a multistate Master/Slave resource." return $OCF_ERR_CONFIGURED fi fi if [ -z "$OCF_RESKEY_wsrep_cluster_address" ]; then ocf_exit_reason "Galera must be configured with a wsrep_cluster_address value." return $OCF_ERR_CONFIGURED fi mysql_common_validate } case "$1" in meta-data) meta_data exit $OCF_SUCCESS;; usage|help) usage exit $OCF_SUCCESS;; esac [ "$__OCF_ACTION" = "start" ] && OCF_CHECK_LEVEL=10 galera_validate rc=$? LSB_STATUS_STOPPED=3 if [ $rc -ne 0 ]; then case "$1" in stop) exit $OCF_SUCCESS;; monitor) exit $OCF_NOT_RUNNING;; status) exit $LSB_STATUS_STOPPED;; *) exit $rc;; esac fi if [ -z "${OCF_RESKEY_check_passwd}" ]; then # This value is automatically sourced from /etc/sysconfig/checkcluster if available OCF_RESKEY_check_passwd=${MYSQL_PASSWORD} fi if [ -z "${OCF_RESKEY_check_user}" ]; then # This value is automatically sourced from /etc/sysconfig/checkcluster if available OCF_RESKEY_check_user=${MYSQL_USERNAME} fi : ${OCF_RESKEY_check_user="root"} MYSQL_OPTIONS_CHECK="-nNE --user=${OCF_RESKEY_check_user}" if ocf_is_true "${OCF_RESKEY_check_passwd_use_empty}"; then MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK --password=" elif [ -n "${OCF_RESKEY_check_passwd}" ]; then MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK --password=${OCF_RESKEY_check_passwd}" fi # This value is automatically sourced from /etc/sysconfig/checkcluster if available if [ -n "${MYSQL_HOST}" ]; then MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK -h ${MYSQL_HOST}" fi # This value is automatically sourced from /etc/sysconfig/checkcluster if available if [ -n "${MYSQL_PORT}" ]; then MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK -P ${MYSQL_PORT}" fi # What kind of method was invoked? case "$1" in start) galera_start;; stop) galera_stop;; status) mysql_common_status err;; monitor) galera_monitor;; promote) galera_promote;; demote) galera_demote;; validate-all) exit $OCF_SUCCESS;; *) usage exit $OCF_ERR_UNIMPLEMENTED;; esac # vi:sw=4:ts=4:et: diff --git a/heartbeat/mariadb.in b/heartbeat/mariadb.in index e0f1f3c9f..1dca98ba6 100644 --- a/heartbeat/mariadb.in +++ b/heartbeat/mariadb.in @@ -1,1040 +1,1039 @@ #!@BASH_SHELL@ # # # MariaDB # # Description: Manages a MariaDB Promotable database as Linux-HA resource # # Authors: Alan Robertson: DB2 Script # Jakub Janczak: rewrite as MySQL # Andrew Beekhof: cleanup and import # Sebastian Reitenbach: add OpenBSD defaults, more cleanup # Narayan Newton: add Gentoo/Debian defaults # Marian Marinov, Florian Haas: add replication capability # Yves Trudeau, Baron Schwartz: add VIP support and improve replication # Nils Carlson: add GTID support and semi-sync support # # Support: users@clusterlabs.org # License: GNU General Public License (GPL) # # (c) 2002-2005 International Business Machines, Inc. # 2005-2010 Linux-HA contributors # # See usage() function below for more details... # # OCF instance parameters: # OCF_RESKEY_binary # OCF_RESKEY_client_binary # OCF_RESKEY_config # OCF_RESKEY_datadir # OCF_RESKEY_user # OCF_RESKEY_group # OCF_RESKEY_node_list # OCF_RESKEY_test_table # OCF_RESKEY_test_user # OCF_RESKEY_test_passwd # OCF_RESKEY_enable_creation # OCF_RESKEY_additional_parameters # OCF_RESKEY_log # OCF_RESKEY_pid # OCF_RESKEY_socket # OCF_RESKEY_replication_user # OCF_RESKEY_replication_passwd # OCF_RESKEY_replication_port ####################################################################### # Initialization: OCF_RESKEY_node_list_default="" : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs . ${OCF_FUNCTIONS_DIR}/mysql-common.sh ####################################################################### usage() { cat < 1.0 Resource script for MariaDB. Manages a complete promotable replication setup with GTID, for simpler uses look at the mysql resource agent which supports older replication forms which mysql and mariadb have in common. The resource must be setup to use notifications. Set 'notify=true' in the metadata attributes when defining a MariaDB promotable instance. The default behavior is to use uname -n values in the change promoted to command. Other IPs can be specified manually by adding a node attribute \${INSTANCE_ATTR_NAME}_mysql_master_IP giving the IP to use for replication. For example, if the mariadb primitive you are using is p_mariadb, the attribute to set will be p_mariadb_mysql_master_IP. Manages a MariaDB promotable instance Location of the MariaDB server binary MariaDB server binary Location of the MariaDB client binary MariaDB client binary Configuration file MariaDB config Directory containing databases MariaDB datadir User running MariaDB daemon MariaDB user Group running MariaDB daemon (for logfile and directory permissions) MariaDB group The logfile to be used for mysqld. MariaDB log file All node names of nodes that will execute mariadb. Please separate each node name with a space. This is required for the promoted selection to function. node list The pidfile to be used for mysqld. MariaDB pid file The socket to be used for mysqld. MariaDB socket Table to be tested in monitor statement (in database.table notation) MariaDB test table MariaDB test user, must have select privilege on test_table MariaDB test user MariaDB test user password MariaDB test user password If the MariaDB database does not exist, it will be created Create the database if it does not exist Additional parameters which are passed to the mysqld on startup. (e.g. --skip-external-locking or --skip-grant-tables) Additional parameters to pass to mysqld MariaDB replication user. This user is used for starting and stopping MariaDB replication, for setting and resetting the promoted host, and for setting and unsetting read-only mode. Because of that, this user must have SUPER, REPLICATION SLAVE, REPLICATION CLIENT, PROCESS and RELOAD privileges on all nodes within the cluster. Mandatory if you define a promotable resource. MariaDB replication user MariaDB replication password. Used for replication client and unpromoted. Mandatory if you define a promotable resource. MariaDB replication user password The port on which the Promoted MariaDB instance is listening. MariaDB replication port - END } # Convenience functions greater_than_equal_long() { # there are values we need to compare in this script # that are too large for shell -gt to process local true=$(echo "$1 > $2" | bc) if [ "$true" -eq "1" ]; then return 0 else return 1 fi } greater_than_gtid() { local gtid1_transaction_id=$(echo $1 | cut -d - -f 3) local gtid2_transaction_id=$(echo $2 | cut -d - -f 3) greater_than_equal_long $gtid1_transaction_id $gtid2_transaction_id return $? } set_gtid() { # Sets the GTID in CIB using attrd_updater for this node. local gtid=$($MYSQL $MYSQL_OPTIONS_REPL \ -s -N -e "show global variables like 'gtid_current_pos'" | cut -f 2) # Ensure that we got somethine like a valid GTID if ! echo $gtid | grep -q '-'; then ocf_exit_reason "Unable to read GTID from MariaDB" ocf_log err "Unable to read GTID from MariaDB" return $OCF_ERR_GENERIC fi ${HA_SBIN_DIR}/attrd_updater -p -n ${OCF_RESOURCE_INSTANCE}-gtid -U $gtid } read_gtid() { local node=$1 local query_result local name local host local value # This produces output of the form 'name="var-name" host="node2" value="val"'. # This should be set at this point, because we have store our own GTID previously. if ! query_result=$(${HA_SBIN_DIR}/attrd_updater -p -N $node -n ${OCF_RESOURCE_INSTANCE}-gtid -Q); then ocf_exit_reason "Unable to read GTID from attrd" ocf_log err "Unable to read GTID from attrd" echo "" return fi # Evaluate the query result to place the variables in the local scope. eval ${query_result} echo ${value} } clear_all_gtid() { for node in $OCF_RESKEY_node_list; do ${HA_SBIN_DIR}/attrd_updater -n ${OCF_RESOURCE_INSTANCE}-gtid -N $node -D done } set_waiting_for_first_master() { ${HA_SBIN_DIR}/attrd_updater -p -n ${OCF_RESOURCE_INSTANCE}-waiting-for-first-master -U true } waiting_for_first_master() { local query_result local name local host local value if ! query_result=$(${HA_SBIN_DIR}/attrd_updater -p -n ${OCF_RESOURCE_INSTANCE}-waiting-for-first-master -Q); then ocf_exit_reason "Unable to read waiting-for-first-master from attrd" ocf_log err "Unable to read waiting-for-first-master from attrd" return 1 fi # Evaluate the query result to place the variables in the local scope. eval ${query_result} if [ "$value" = "true" ]; then return 0 else return 1 fi } clear_waiting_for_first_master() { attrd_updater -n ${OCF_RESOURCE_INSTANCE}-waiting-for-first-master -D } have_master_with_priority() { # Go through each node and validate that at least one has # a set priority. Because we unset the priority on reboot # a lack of priority indicates that we need to select a # new master. for node in $OCF_RESKEY_node_list; do ocf_promotion_score -G -N $node >/dev/null 2>&1 rc=$? if [ $rc -eq 0 ]; then return 0 fi done return 1 } attempt_to_set_master() { ocf_log info "Attempting to set master" local expected_node_count if waiting_for_first_master; then # Wait for all nodes to come online expected_node_count=$OCF_RESKEY_CRM_meta_clone_max else # We accept one node being down. This is not arbitrary, # synchronous replication requires acknowledgement from # at least one host, which means only two nodes must have # the latest GTID. So a set of n - 1 ensures that we do # not lose any writes. expected_node_count=$(($OCF_RESKEY_CRM_meta_clone_max-1)) fi # Set the gtid for this node, making it available to other nodes set_gtid local node_count=0 local highest_gtid=0 local master_candidate="" for node in $OCF_RESKEY_node_list; do local node_gtid=$(read_gtid $node) if [ -z "$node_gtid" ]; then continue fi # Got a valid gtid, increment node count node_count=$(($node_count+1)) # Check if this is a good master candidate if greater_than_gtid $node_gtid $highest_gtid; then master_candidate=$node highest_gtid=$node_gtid fi done # If we managed to query a sufficient number of nodes # then set a master if [ $node_count -ge $expected_node_count ]; then ocf_log info "Promoting $master_candidate to master, highest gtid $highest_gtid, queried $node_count nodes." ocf_promotion_score -v 100 -N $master_candidate else ocf_log info "Not enough nodes ($node_count) contributed to select a master, need $expected_node_count nodes." fi } set_read_only() { # Sets or unsets read-only mode. Accepts one boolean as its # optional argument. If invoked without any arguments, defaults to # enabling read only mode. Should only be set in master/slave # setups. # Returns $OCF_SUCCESS if the operation succeeds, or # $OCF_ERR_GENERIC if it fails. local ro_val if ocf_is_true $1; then ro_val="on" else ro_val="off" fi ocf_run $MYSQL $MYSQL_OPTIONS_REPL \ -e "SET GLOBAL read_only=${ro_val}" } get_read_only() { # Check if read-only is set local read_only_state read_only_state=$($MYSQL $MYSQL_OPTIONS_REPL \ -e "SHOW VARIABLES" | grep -w read_only | awk '{print $2}') if [ "$read_only_state" = "ON" ]; then return 0 else return 1 fi } is_slave() { # Determine whether the machine is currently running as a MariaDB # slave, as determined per SHOW SLAVE STATUS. Returns 1 if SHOW # SLAVE STATUS creates an empty result set, 0 otherwise. local rc # Check whether this machine should be slave if ! get_read_only; then return 1 fi if get_slave_info; then # show slave status is not empty # Is the slave sql thread running, then we are a slave! if [ "$slave_sql" == 'Yes' ]; then return 0 else return 1 fi else # "SHOW SLAVE STATUS" returns an empty set if instance is not a # replication slave return 1 fi } parse_slave_info() { # Extracts field $1 from result of "SHOW SLAVE STATUS\G" from file $2 sed -ne "s/^.* $1: \(.*\)$/\1/p" < $2 } get_slave_info() { if [ "$master_log_file" -a "$master_host" ]; then # variables are already defined, get_slave_info has been run before return $OCF_SUCCESS else local tmpfile=$(mktemp ${HA_RSCTMP}/check_slave.${OCF_RESOURCE_INSTANCE}.XXXXXX) $MYSQL $MYSQL_OPTIONS_REPL \ -e 'SHOW SLAVE STATUS\G' > $tmpfile if [ -s $tmpfile ]; then master_host=$(parse_slave_info Master_Host $tmpfile) master_user=$(parse_slave_info Master_User $tmpfile) master_port=$(parse_slave_info Master_Port $tmpfile) master_using_gtid=$(parse_slave_info Using_Gtid $tmpfile) master_log_file=$(parse_slave_info Master_Log_File $tmpfile) slave_sql=$(parse_slave_info Slave_SQL_Running $tmpfile) slave_io=$(parse_slave_info Slave_IO_Running $tmpfile) last_errno=$(parse_slave_info Last_Errno $tmpfile) last_error=$(parse_slave_info Last_Error $tmpfile) secs_behind=$(parse_slave_info Seconds_Behind_Master $tmpfile) last_io_errno=$(parse_slave_info Last_IO_Errno $tmpfile) last_io_error=$(parse_slave_info Last_IO_Error $tmpfile) ocf_log debug "MariaDB instance running as a replication slave" rm "$tmpfile" else # Instance produced an empty "SHOW SLAVE STATUS" output -- # instance is not a slave rm "$tmpfile" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS fi } check_slave() { # Checks slave status local rc new_master get_slave_info rc=$? if [ $rc -eq 0 ]; then # Check normal errors if [ $last_errno -ne 0 ]; then ocf_exit_reason "MariaDB slave replication has failed ($last_errno): $last_error" exit $OCF_ERR_GENERIC fi # Check IO Errors, ignore 2003 which indicates a connection failure to the master if [ $last_io_errno -ne 0 ] && [ $last_io_errno -ne 2003 ]; then ocf_exit_reason "MariaDB slave io has failed ($last_io_errno): $last_io_error" exit $OCF_ERR_GENERIC fi if [ $last_io_errno -eq 2003 ]; then ocf_log warn "MariaDB master not reachable from slave" fi if [ "$slave_io" != 'Yes' ]; then # Not necessarily a bad thing. The master may have # temporarily shut down, and the slave may just be # reconnecting. A warning can't hurt, though. ocf_log warn "MariaDB Slave IO threads currently not running." # Sanity check, are we at least on the right master new_master=$($CRM_ATTR_REPL_INFO --query -q) if [ "$master_host" != "$new_master" ]; then # Not pointing to the right master, not good, removing the VIPs set_reader_attr 0 exit $OCF_SUCCESS fi fi if [ "$slave_sql" != 'Yes' ]; then # We don't have a replication SQL thread running. Not a # good thing. Try to recoved by restarting the SQL thread # and remove reader vip. Prevent MariaDB restart. ocf_exit_reason "MariaDB Slave SQL threads currently not running." # Remove reader vip set_reader_attr 0 # try to restart slave ocf_run $MYSQL $MYSQL_OPTIONS_REPL \ -e "START SLAVE" # Return success to prevent a restart exit $OCF_SUCCESS fi ocf_log debug "MariaDB instance running as a replication slave" else # Instance produced an empty "SHOW SLAVE STATUS" output -- # instance is not a slave # TODO: Needs to handle when get_slave_info will return too many connections error ocf_exit_reason "check_slave invoked on an instance that is not a replication slave." exit $OCF_ERR_GENERIC fi } set_master() { local new_master=$($CRM_ATTR_REPL_INFO --query -q) # Informs the MariaDB server of the master to replicate # from. Accepts one mandatory argument which must contain the host # name of the new master host. The master must either be unchanged # from the laste master the slave replicated from, or freshly # reset with RESET MASTER. ocf_log info "Changing MariaDB configuration to replicate from $new_master." ocf_run $MYSQL $MYSQL_OPTIONS_REPL \ -e "CHANGE MASTER TO MASTER_HOST='$new_master', \ MASTER_PORT=$OCF_RESKEY_replication_port, \ MASTER_USER='$OCF_RESKEY_replication_user', \ MASTER_PASSWORD='$OCF_RESKEY_replication_passwd', \ MASTER_USE_GTID=current_pos"; } unset_master(){ # Instructs the MariaDB server to stop replicating from a master # host. # If we're currently not configured to be replicating from any # host, then there's nothing to do. But we do log a warning as # no-one but the CRM should be touching the MariaDB master/slave # configuration. if ! is_slave; then ocf_log warn "Attempted to unset the replication master on an instance that is not configured as a replication slave" return $OCF_SUCCESS fi # Stop the slave I/O thread and wait for relay log # processing to complete ocf_run $MYSQL $MYSQL_OPTIONS_REPL \ -e "STOP SLAVE IO_THREAD" if [ $? -gt 0 ]; then ocf_exit_reason "Error stopping slave IO thread" exit $OCF_ERR_GENERIC fi local tmpfile=$(mktemp ${HA_RSCTMP}/threads.${OCF_RESOURCE_INSTANCE}.XXXXXX) while true; do $MYSQL $MYSQL_OPTIONS_REPL \ -e 'SHOW PROCESSLIST\G' > $tmpfile if grep -i 'Has read all relay log' $tmpfile >/dev/null; then ocf_log info "MariaDB slave has finished processing relay log" break fi if ! grep -q 'system user' $tmpfile; then ocf_log info "Slave not runnig - not waiting to finish" break fi ocf_log info "Waiting for MariaDB slave to finish processing relay log" sleep 1 done rm -f $tmpfile # Now, stop all slave activity and unset the master host ocf_run $MYSQL $MYSQL_OPTIONS_REPL \ -e "STOP SLAVE" if [ $? -gt 0 ]; then ocf_exit_reason "Error stopping rest slave threads" exit $OCF_ERR_GENERIC fi ocf_run $MYSQL $MYSQL_OPTIONS_REPL \ -e "RESET SLAVE /*!50516 ALL */;" if [ $? -gt 0 ]; then ocf_exit_reason "Failed to reset slave" exit $OCF_ERR_GENERIC fi } # Start replication as slave start_slave() { ocf_run $MYSQL $MYSQL_OPTIONS_REPL \ -e "START SLAVE" } # Set the attribute controlling the readers VIP set_reader_attr() { local curr_attr_value curr_attr_value=$(get_reader_attr) if [ "$curr_attr_value" -ne "$1" ]; then $CRM_ATTR -l reboot --name ${OCF_RESKEY_reader_attribute} -v $1 fi } # get the attribute controlling the readers VIP get_reader_attr() { local attr_value local rc attr_value=$($CRM_ATTR -l reboot --name ${OCF_RESKEY_reader_attribute} --query -q) rc=$? if [ "$rc" -eq "0" ]; then echo $attr_value else echo -1 fi } # Determines what IP address is attached to the current host. The output of the # crm_attribute command looks like this: # scope=nodes name=IP value=10.2.2.161 # If the ${INSTANCE_ATTR_NAME}_MYSQL_MASTER_IP node attribute is not defined, fallback is to uname -n # The ${INSTANCE_ATTR_NAME}_MYSQL_MASTER_IP is the IP address that will be used for the # change master to command. get_local_ip() { local IP IP=$($CRM_ATTR -l forever -n ${INSTANCE_ATTR_NAME}_mysql_master_IP -q -G 2>/dev/null) if [ ! $? -eq 0 ]; then uname -n else echo $IP fi } ####################################################################### # Functions invoked by resource manager actions mysql_monitor() { local rc local status_loglevel="err" # Set loglevel to info during probe if ocf_is_probe; then status_loglevel="info" fi mysql_common_status $status_loglevel rc=$? # If status returned an error, return that immediately if [ $rc -ne $OCF_SUCCESS ]; then return $rc fi # Check if this instance is configured as a slave, and if so # check slave status if is_slave; then if ! check_slave; then return $OCF_ERR_GENERIC fi fi if [ -n "$OCF_RESKEY_test_table" ]; then # Check for test table ocf_run -q $MYSQL $MYSQL_OPTIONS_TEST \ -e "SELECT COUNT(*) FROM $OCF_RESKEY_test_table" rc=$? if [ $rc -ne 0 ]; then ocf_exit_reason "Failed to select from $test_table"; return $OCF_ERR_GENERIC; fi fi # Check if we are in read-only mode and there is no master # with priority then we attempt to select a master if get_read_only && ! have_master_with_priority; then attempt_to_set_master fi if ! get_read_only; then ocf_log debug "MariaDB monitor succeeded (master)"; return $OCF_RUNNING_MASTER else ocf_log debug "MariaDB monitor succeeded"; return $OCF_SUCCESS fi } mysql_start() { local rc if ! ocf_is_ms; then ocf_exit_reason "Resource is not configured as master/slave" return $OCF_ERR_GENERIC fi # Initialize the ReaderVIP attribute, monitor will enable it set_reader_attr 0 mysql_common_status info if [ $? = $OCF_SUCCESS ]; then ocf_log info "MariaDB already running" return $OCF_SUCCESS fi mysql_common_prepare_dirs mysql_common_start --skip-slave-start --log-slave-updates rc=$? if [ $rc != $OCF_SUCCESS ]; then return $rc fi # Enable semi-sync ocf_run -q $MYSQL $MYSQL_OPTIONS_TEST \ -e "SET GLOBAL rpl_semi_sync_slave_enabled='ON', \ rpl_semi_sync_master_enabled='ON', \ rpl_semi_sync_master_wait_no_slave='OFF', \ rpl_semi_sync_master_wait_point='AFTER_SYNC', \ gtid_strict_mode='ON', \ sync_binlog=1, \ sync_master_info=1, \ innodb_flush_log_at_trx_commit=1;" rc=$? if [ $rc -ne 0 ]; then ocf_exit_reason "Failed to enable semi-sync and set variables"; return $OCF_ERR_GENERIC; fi # We're configured as a stateful resource. We must start as # slave by default. At this point we don't know if the CRM has # already promoted a master. So, we simply start in read only # mode and make sure our old score is invalidated. set_read_only on ocf_promotion_score -D # Now, let's see whether there is a master. We might be a new # node that is just joining the cluster, and the CRM may have # promoted a master before. new_master_host=$(echo $OCF_RESKEY_CRM_meta_notify_master_uname|tr -d " ") if [ "$new_master_host" -a "$new_master_host" != ${NODENAME} ]; then set_master start_slave if [ $? -ne 0 ]; then ocf_exit_reason "Failed to start slave" return $OCF_ERR_GENERIC fi else ocf_log info "No MariaDB master present - clearing replication state, setting gtid in attrd, waiting for first master" unset_master set_waiting_for_first_master fi # Initial monitor action if [ -n "$OCF_RESKEY_test_table" -a -n "$OCF_RESKEY_test_user" -a -n "$OCF_RESKEY_test_passwd" ]; then OCF_CHECK_LEVEL=10 fi mysql_monitor rc=$? if [ $rc != $OCF_SUCCESS -a $rc != $OCF_RUNNING_MASTER ]; then ocf_exit_reason "Failed initial monitor action" return $rc fi ocf_log info "MariaDB started" return $OCF_SUCCESS } mysql_stop() { # clear preference for becoming master ocf_promotion_score -D # Remove VIP capability set_reader_attr 0 mysql_common_stop } mysql_promote() { local master_info if ( ! mysql_common_status err ); then return $OCF_NOT_RUNNING fi ocf_run $MYSQL $MYSQL_OPTIONS_REPL \ -e "STOP SLAVE" set_read_only off || return $OCF_ERR_GENERIC # Force the master to wait for timeout period on slave disconnect ocf_run -q $MYSQL $MYSQL_OPTIONS_TEST \ -e "SET GLOBAL rpl_semi_sync_master_wait_no_slave='ON';" # Set Master Info in CIB, cluster level attribute master_info="$(get_local_ip)" ${CRM_ATTR_REPL_INFO} -v "$master_info" # A master can accept reads set_reader_attr 1 # Clear the gtids in attrd now that there is a master clear_all_gtid return $OCF_SUCCESS } mysql_demote() { if ! mysql_common_status err; then return $OCF_NOT_RUNNING fi # Return to default no wait setting. ocf_run -q $MYSQL $MYSQL_OPTIONS_TEST \ -e "SET GLOBAL rpl_semi_sync_master_wait_no_slave='OFF';" # Return master preference to default, so the cluster manager gets # a chance to select a new master ocf_promotion_score -D } mysql_notify() { local type_op type_op="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}" ocf_log debug "Received $type_op notification." case "$type_op" in 'pre-promote') # A master is now being promoted, remove the waiting-for-first-master flag clear_waiting_for_first_master ;; 'post-promote') # The master has completed its promotion. Now is a good # time to check whether our replication slave is working # correctly. new_master_host=$(echo $OCF_RESKEY_CRM_meta_notify_promote_uname|tr -d " ") if [ "$new_master_host" = ${NODENAME} ]; then ocf_log info "This will be the new master, ignoring post-promote notification." else ocf_log info "Resetting replication, uname of master: $new_master_host" unset_master if [ $? -ne 0 ]; then return $OCF_ERR_GENERIC fi set_master if [ $? -ne 0 ]; then return $OCF_ERR_GENERIC fi start_slave if [ $? -ne 0 ]; then ocf_exit_reason "Failed to start slave" return $OCF_ERR_GENERIC fi fi return $OCF_SUCCESS ;; 'pre-demote') demote_host=$(echo $OCF_RESKEY_CRM_meta_notify_demote_uname|tr -d " ") if [ $demote_host = ${NODENAME} ]; then ocf_log info "pre-demote notification for $demote_host" set_read_only on if [ $? -ne 0 ]; then ocf_exit_reason "Failed to set read-only"; return $OCF_ERR_GENERIC; fi # Must kill all existing user threads because they are still Read/write # in order for the slaves to complete the read of binlogs local tmpfile=$(mktemp ${HA_RSCTMP}/threads.${OCF_RESOURCE_INSTANCE}.XXXXXX) $MYSQL $MYSQL_OPTIONS_REPL -e "SHOW PROCESSLIST" > $tmpfile for thread in $(awk '$0 !~ /Binlog Dump|system user|event_scheduler|SHOW PROCESSLIST/ && $0 ~ /^[0-9]/ {print $1}' $tmpfile) do ocf_run $MYSQL $MYSQL_OPTIONS_REPL \ -e "KILL ${thread}" done rm -f $tmpfile else ocf_log info "Ignoring post-demote notification execpt for my own demotion." fi return $OCF_SUCCESS ;; 'post-demote') demote_host=$(echo $OCF_RESKEY_CRM_meta_notify_demote_uname|tr -d " ") if [ $demote_host = ${NODENAME} ]; then ocf_log info "Ignoring post-demote notification for my own demotion." return $OCF_SUCCESS fi ocf_log info "post-demote notification for $demote_host." # The former master has just been gracefully demoted. unset_master ;; *) return $OCF_SUCCESS ;; esac } mysql_validate() { check_binary bc } ####################################################################### case "$1" in meta-data) meta_data exit $OCF_SUCCESS;; usage|help) usage exit $OCF_SUCCESS;; esac mysql_common_validate rc=$? LSB_STATUS_STOPPED=3 if [ $rc -ne 0 ]; then case "$1" in stop) ;; monitor) mysql_common_status "info" if [ $? -eq $OCF_SUCCESS ]; then # if validatation fails and pid is active, always treat this as an error ocf_exit_reason "environment validation failed, active pid is in unknown state." exit $OCF_ERR_GENERIC fi # validation failed and pid is not active, it's safe to say this instance is inactive. exit $OCF_NOT_RUNNING;; status) exit $LSB_STATUS_STOPPED;; *) exit $rc;; esac fi # What kind of method was invoked? case "$1" in start) mysql_start;; stop) mysql_stop;; status) mysql_common_status err;; monitor) mysql_monitor;; promote) mysql_promote;; demote) mysql_demote;; notify) mysql_notify;; validate-all) mysql_validate;; *) usage exit $OCF_ERR_UNIMPLEMENTED;; esac # vi:sw=4:ts=4:et: diff --git a/heartbeat/mysql b/heartbeat/mysql index 1df2fc0f2..6b00889ff 100755 --- a/heartbeat/mysql +++ b/heartbeat/mysql @@ -1,1074 +1,1073 @@ #!/bin/sh # # # MySQL # # Description: Manages a MySQL database as Linux-HA resource # # Authors: Alan Robertson: DB2 Script # Jakub Janczak: rewrite as MySQL # Andrew Beekhof: cleanup and import # Sebastian Reitenbach: add OpenBSD defaults, more cleanup # Narayan Newton: add Gentoo/Debian defaults # Marian Marinov, Florian Haas: add replication capability # Yves Trudeau, Baron Schwartz: add VIP support and improve replication # # Support: users@clusterlabs.org # License: GNU General Public License (GPL) # # (c) 2002-2005 International Business Machines, Inc. # 2005-2010 Linux-HA contributors # # An example usage in /etc/ha.d/haresources: # node1 10.0.0.170 mysql # # See usage() function below for more details... # # OCF instance parameters: # OCF_RESKEY_binary # OCF_RESKEY_client_binary # OCF_RESKEY_config # OCF_RESKEY_datadir # OCF_RESKEY_user # OCF_RESKEY_group # OCF_RESKEY_test_table # OCF_RESKEY_test_user # OCF_RESKEY_test_passwd # OCF_RESKEY_enable_creation # OCF_RESKEY_additional_parameters # OCF_RESKEY_log # OCF_RESKEY_pid # OCF_RESKEY_socket # OCF_RESKEY_replication_user # OCF_RESKEY_replication_passwd # OCF_RESKEY_replication_port # OCF_RESKEY_max_slave_lag # OCF_RESKEY_evict_outdated_slaves # OCF_RESKEY_reader_attribute ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs . ${OCF_FUNCTIONS_DIR}/mysql-common.sh ####################################################################### usage() { cat < 1.0 Resource script for MySQL. May manage a standalone MySQL database, a clone set with externally managed replication, or a complete master/slave replication setup. Note, when master/slave replication is in use, the resource must be setup to use notifications. Set 'notify=true' in the metadata attributes when defining a MySQL master/slave instance. While managing replication, the default behavior is to use uname -n values in the change master to command. Other IPs can be specified manually by adding a node attribute \${INSTANCE_ATTR_NAME}_mysql_master_IP giving the IP to use for replication. For example, if the mysql primitive you are using is p_mysql, the attribute to set will be p_mysql_mysql_master_IP. Manages a MySQL database instance Location of the MySQL server binary MySQL server binary Location of the MySQL client binary MySQL client binary Configuration file MySQL config Directory containing databases MySQL datadir User running MySQL daemon MySQL user Group running MySQL daemon (for logfile and directory permissions) MySQL group The logfile to be used for mysqld. MySQL log file The pidfile to be used for mysqld. MySQL pid file The socket to be used for mysqld. MySQL socket Table to be tested in monitor statement (in database.table notation) MySQL test table MySQL test user, must have select privilege on test_table MySQL test user MySQL test user password MySQL test user password If the MySQL database does not exist, it will be created Create the database if it does not exist Additional parameters which are passed to the mysqld on startup. (e.g. --skip-external-locking or --skip-grant-tables) Additional parameters to pass to mysqld MySQL replication user. This user is used for starting and stopping MySQL replication, for setting and resetting the master host, and for setting and unsetting read-only mode. Because of that, this user must have SUPER, REPLICATION SLAVE, REPLICATION CLIENT, PROCESS and RELOAD privileges on all nodes within the cluster. Mandatory if you define a master-slave resource. MySQL replication user MySQL replication password. Used for replication client and slave. Mandatory if you define a master-slave resource. MySQL replication user password The port on which the Master MySQL instance is listening. MySQL replication port Enables SSL connection to local MySQL service for replication user. i.e. if REQUIRE SSL for replication user in MySQL set, this should be set to "true". MySQL replication require ssl The SSL CA certificate to be used for replication over SSL. MySQL replication SSL CA certificate The SSL CA certificate to be used for replication over SSL. MySQL replication SSL certificate The SSL certificate key to be used for replication over SSL. MySQL replication SSL certificate key The maximum number of seconds a replication slave is allowed to lag behind its master. Do not set this to zero. What the cluster manager does in case a slave exceeds this maximum lag is determined by the evict_outdated_slaves parameter. Maximum time (seconds) a MySQL slave is allowed to lag behind a master If set to true, any slave which is more than max_slave_lag seconds behind the master has its MySQL instance shut down. If this parameter is set to false in a primitive or clone resource, it is simply ignored. If set to false in a master/slave resource, then exceeding the maximum slave lag will merely push down the master preference so the lagging slave is never promoted to the new master. Determines whether to shut down badly lagging slaves An attribute that the RA can manage to specify whether a node can be read from. This node attribute will be 1 if it's fine to read from the node, and 0 otherwise (for example, when a slave has lagged too far behind the master). A typical example for the use of this attribute would be to tie a set of IP addresses to MySQL slaves that can be read from. This parameter is only meaningful in master/slave set configurations. Sets the node attribute that determines whether a node is usable for clients to read from. - END } # Convenience functions set_read_only() { # Sets or unsets read-only mode. Accepts one boolean as its # optional argument. If invoked without any arguments, defaults to # enabling read only mode. Should only be set in master/slave # setups. # Returns $OCF_SUCCESS if the operation succeeds, or # $OCF_ERR_GENERIC if it fails. local ro_val if ocf_is_true $1; then ro_val="on" else ro_val="off" fi ocf_run $MYSQL $MYSQL_OPTIONS_REPL \ -e "SET GLOBAL read_only=${ro_val}" } get_read_only() { # Check if read-only is set local read_only_state read_only_state=`$MYSQL $MYSQL_OPTIONS_REPL \ --skip-column-names -e "SHOW VARIABLES LIKE 'read_only'" | awk '{print $2}'` if [ "$read_only_state" = "ON" ]; then return 0 else return 1 fi } is_slave() { # Determine whether the machine is currently running as a MySQL # slave, as determined per SHOW SLAVE STATUS. Returns 1 if SHOW # SLAVE STATUS creates an empty result set, 0 otherwise. local rc local tmpfile # Check whether this machine should be slave if ! ocf_is_ms || ! get_read_only; then return 1 fi get_slave_info rc=$? rm -f $tmpfile if [ $rc -eq 0 ]; then # show slave status is not empty # Is there a master_log_file defined? (master_log_file is deleted # by reset slave if [ "$master_log_file" ]; then return 0 else return 1 fi else # "SHOW SLAVE STATUS" returns an empty set if instance is not a # replication slave return 1 fi } parse_slave_info() { # Extracts field $1 from result of "SHOW SLAVE STATUS\G" from file $2 sed -ne "s/^.* $1: \(.*\)$/\1/p" < $2 } get_slave_info() { # Warning: this sets $tmpfile and LEAVE this file! You must delete it after use! local mysql_options if [ "$master_log_file" -a "$master_host" ]; then # variables are already defined, get_slave_info has been run before return $OCF_SUCCESS else tmpfile=`mktemp ${HA_RSCTMP}/check_slave.${OCF_RESOURCE_INSTANCE}.XXXXXX` $MYSQL $MYSQL_OPTIONS_REPL \ -e 'SHOW SLAVE STATUS\G' > $tmpfile if [ -s $tmpfile ]; then master_host=`parse_slave_info Master_Host $tmpfile` master_user=`parse_slave_info Master_User $tmpfile` master_port=`parse_slave_info Master_Port $tmpfile` master_log_file=`parse_slave_info Master_Log_File $tmpfile` master_log_pos=`parse_slave_info Read_Master_Log_Pos $tmpfile` slave_sql=`parse_slave_info Slave_SQL_Running $tmpfile` slave_io=`parse_slave_info Slave_IO_Running $tmpfile` last_errno=`parse_slave_info Last_Errno $tmpfile` secs_behind=`parse_slave_info Seconds_Behind_Master $tmpfile` ocf_log debug "MySQL instance running as a replication slave" else # Instance produced an empty "SHOW SLAVE STATUS" output -- # instance is not a slave ocf_exit_reason "check_slave invoked on an instance that is not a replication slave." return $OCF_ERR_GENERIC fi return $OCF_SUCCESS fi } check_slave() { # Checks slave status local rc new_master get_slave_info rc=$? if [ $rc -eq 0 ]; then # Did we receive an error other than max_connections? if [ $last_errno -ne 0 -a $last_errno -ne "$MYSQL_TOO_MANY_CONN_ERR" ]; then # Whoa. Replication ran into an error. This slave has # diverged from its master. Make sure this resource # doesn't restart in place. ocf_exit_reason "MySQL instance configured for replication, but replication has failed." ocf_log err "See $tmpfile for details" # Just pull the reader VIP away, killing MySQL here would be pretty evil # on a loaded server set_reader_attr 0 exit $OCF_SUCCESS fi # If we got max_connections, let's remove the vip if [ $last_errno -eq "$MYSQL_TOO_MANY_CONN_ERR" ]; then set_reader_attr 0 exit $OCF_SUCCESS fi if [ "$slave_io" != 'Yes' ]; then # Not necessarily a bad thing. The master may have # temporarily shut down, and the slave may just be # reconnecting. A warning can't hurt, though. ocf_log warn "MySQL Slave IO threads currently not running." # Sanity check, are we at least on the right master new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1` if [ "$master_host" != "$new_master" ]; then # Not pointing to the right master, not good, removing the VIPs set_reader_attr 0 exit $OCF_SUCCESS fi fi if [ "$slave_sql" != 'Yes' ]; then # We don't have a replication SQL thread running. Not a # good thing. Try to recoved by restarting the SQL thread # and remove reader vip. Prevent MySQL restart. ocf_exit_reason "MySQL Slave SQL threads currently not running." ocf_log err "See $tmpfile for details" # Remove reader vip set_reader_attr 0 # try to restart slave ocf_run $MYSQL $MYSQL_OPTIONS_REPL \ -e "START SLAVE" # Return success to prevent a restart exit $OCF_SUCCESS fi if ocf_is_true $OCF_RESKEY_evict_outdated_slaves; then # We're supposed to bail out if we lag too far # behind. Let's check our lag. if [ "$secs_behind" = "NULL" ] || [ $secs_behind -gt $OCF_RESKEY_max_slave_lag ]; then ocf_exit_reason "MySQL Slave is $secs_behind seconds behind master (allowed maximum: $OCF_RESKEY_max_slave_lag)." ocf_log err "See $tmpfile for details" # Remove reader vip set_reader_attr 0 exit $OCF_ERR_INSTALLED fi fi # is the slave ok to have a VIP on it if [ "$secs_behind" = "NULL" ] || [ $secs_behind -gt $OCF_RESKEY_max_slave_lag ]; then set_reader_attr 0 else set_reader_attr 1 fi ocf_log debug "MySQL instance running as a replication slave" rm -f $tmpfile else # Instance produced an empty "SHOW SLAVE STATUS" output -- # instance is not a slave # TODO: Needs to handle when get_slave_info will return too many connections error rm -f $tmpfile ocf_exit_reason "check_slave invoked on an instance that is not a replication slave." exit $OCF_ERR_GENERIC fi } set_master() { local new_master master_log_file master_log_pos local master_params master_ssl_params new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1` # Keep replication position get_slave_info if [ "$master_log_file" -a "$new_master" = "$master_host" ]; then # master_params=", MASTER_LOG_FILE='$master_log_file', \ # MASTER_LOG_POS=$master_log_pos" ocf_log info "Kept master pos for $master_host : $master_log_file:$master_log_pos" rm -f $tmpfile return else master_log_file=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f2` master_log_pos=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f3` if [ -n "$master_log_file" -a -n "$master_log_pos" ]; then master_params=", MASTER_LOG_FILE='$master_log_file', \ MASTER_LOG_POS=$master_log_pos" ocf_log info "Restored master pos for $new_master : $master_log_file:$master_log_pos" fi fi # Informs the MySQL server of the master to replicate # from. Accepts one mandatory argument which must contain the host # name of the new master host. The master must either be unchanged # from the last master the slave replicated from, or freshly # reset with RESET MASTER. if [ -n "$OCF_RESKEY_replication_master_ssl_ca" ] && [ -n "$OCF_RESKEY_replication_master_ssl_cert" ] && [ -n "$OCF_RESKEY_replication_master_ssl_key" ]; then master_ssl_params=", MASTER_SSL=1, \ MASTER_SSL_CA='$OCF_RESKEY_replication_master_ssl_ca', \ MASTER_SSL_CERT='$OCF_RESKEY_replication_master_ssl_cert', \ MASTER_SSL_KEY='$OCF_RESKEY_replication_master_ssl_key'" fi ocf_run $MYSQL $MYSQL_OPTIONS_REPL \ -e "CHANGE MASTER TO MASTER_HOST='$new_master', \ MASTER_PORT=$OCF_RESKEY_replication_port, \ MASTER_USER='$OCF_RESKEY_replication_user', \ MASTER_PASSWORD='$OCF_RESKEY_replication_passwd' $master_params $master_ssl_params" rm -f $tmpfile } unset_master(){ # Instructs the MySQL server to stop replicating from a master # host. # If we're currently not configured to be replicating from any # host, then there's nothing to do. But we do log a warning as # no-one but the CRM should be touching the MySQL master/slave # configuration. if ! is_slave; then ocf_log warn "Attempted to unset the replication master on an instance that is not configured as a replication slave" return $OCF_SUCCESS fi local tmpfile tmpfile=`mktemp ${HA_RSCTMP}/unset_master.${OCF_RESOURCE_INSTANCE}.XXXXXX` # At this point, the master is read only so there should not be much binlogs to transfer # Let's wait for the last bits while true; do $MYSQL $MYSQL_OPTIONS_REPL \ -e 'SHOW PROCESSLIST\G' > $tmpfile if grep -i 'Waiting for master to send event' $tmpfile >/dev/null; then ocf_log info "MySQL slave has finished reading master binary log" break fi if grep -i 'Reconnecting after a failed master event read' $tmpfile >/dev/null; then ocf_log info "Master is down, no more binary logs to come" break fi if grep -i 'Connecting to master' $tmpfile >/dev/null; then ocf_log info "Master is down, no more binary logs to come" break fi if ! grep 'system user' $tmpfile >/dev/null; then ocf_log info "Slave is not running - not waiting to finish" break fi sleep 1 done # Now, stop the slave I/O thread and wait for relay log # processing to complete ocf_run $MYSQL $MYSQL_OPTIONS_REPL \ -e "STOP SLAVE IO_THREAD" if [ $? -gt 0 ]; then ocf_exit_reason "Error stopping slave IO thread" exit $OCF_ERR_GENERIC fi while true; do $MYSQL $MYSQL_OPTIONS_REPL \ -e 'SHOW PROCESSLIST\G' > $tmpfile if grep -i 'Has read all relay log' $tmpfile >/dev/null; then ocf_log info "MySQL slave has finished processing relay log" break fi if ! grep -q 'system user' $tmpfile; then ocf_log info "Slave not runnig - not waiting to finish" break fi ocf_log info "Waiting for MySQL slave to finish processing relay log" sleep 1 done rm -f $tmpfile # Now, stop all slave activity and unset the master host ocf_run $MYSQL $MYSQL_OPTIONS_REPL \ -e "STOP SLAVE" if [ $? -gt 0 ]; then ocf_exit_reason "Error stopping rest slave threads" exit $OCF_ERR_GENERIC fi ocf_run $MYSQL $MYSQL_OPTIONS_REPL \ -e "RESET SLAVE /*!50516 ALL */;" if [ $? -gt 0 ]; then ocf_exit_reason "Failed to reset slave" exit $OCF_ERR_GENERIC fi } # Start replication as slave start_slave() { ocf_run $MYSQL $MYSQL_OPTIONS_REPL \ -e "START SLAVE" } # Set the attribute controlling the readers VIP set_reader_attr() { local curr_attr_value curr_attr_value=$(get_reader_attr) if [ "$curr_attr_value" -ne "$1" ]; then $CRM_ATTR -l reboot --name ${OCF_RESKEY_reader_attribute} -v $1 fi } # get the attribute controlling the readers VIP get_reader_attr() { local attr_value local rc attr_value=`$CRM_ATTR -l reboot --name ${OCF_RESKEY_reader_attribute} --query -q` rc=$? if [ "$rc" -eq "0" ]; then echo $attr_value else echo -1 fi } # Stores data for MASTER STATUS from MySQL update_data_master_status() { master_status_file="${HA_RSCTMP}/master_status.${OCF_RESOURCE_INSTANCE}" $MYSQL $MYSQL_OPTIONS_REPL -e "SHOW MASTER STATUS\G" > $master_status_file } # Returns the specified value from the stored copy of SHOW MASTER STATUS. # should be call after update_data_master_status for tmpfile # Arguments: # $1 The value to get. get_master_status() { awk -v var="$1" '$1 == var ":" {print substr($0, index($0, ":") + 2)}' "$master_status_file" } # Determines what IP address is attached to the current host. The output of the # crm_attribute command looks like this: # scope=nodes name=IP value=10.2.2.161 # If the ${INSTANCE_ATTR_NAME}_MYSQL_MASTER_IP node attribute is not defined, fallback is to uname -n # The ${INSTANCE_ATTR_NAME}_MYSQL_MASTER_IP is the IP address that will be used for the # change master to command. get_local_ip() { local IP IP=`$CRM_ATTR -l forever -n ${INSTANCE_ATTR_NAME}_mysql_master_IP -q -G` if [ ! $? -eq 0 ]; then uname -n else echo $IP fi } ####################################################################### # Functions invoked by resource manager actions mysql_monitor() { local rc local status_loglevel="err" # Set loglevel to info during probe if ocf_is_probe; then status_loglevel="info" fi if ocf_is_ms; then OCF_CHECK_LEVEL=10 fi mysql_common_status $status_loglevel rc=$? # TODO: check max connections error # If status returned an error, return that immediately if [ $rc -ne $OCF_SUCCESS ]; then if ocf_is_ms ; then # This is a master slave setup but monitored host returned some errors. # Immediately remove it from the pool of possible masters by erasing its master-mysql key # When new mysql master election is started and node got no or negative master-mysql attribute the following is logged # nodename.com pengine: debug: master_color: mysql:0 master score: -1 # If there are NO nodes with positive vaule election of mysql master will fail with # nodename.com pengine: info: master_color: ms_mysql: Promoted 0 instances of a possible 1 to master ocf_promotion_score -D fi return $rc fi if [ $OCF_CHECK_LEVEL -eq 10 ]; then if [ -z "$OCF_RESKEY_test_table" ]; then ocf_exit_reason "test_table not set" return $OCF_ERR_CONFIGURED fi # Check if this instance is configured as a slave, and if so # check slave status if is_slave; then check_slave fi # Check for test table ocf_run -q $MYSQL $MYSQL_OPTIONS_TEST \ -e "SELECT COUNT(*) FROM $OCF_RESKEY_test_table" rc=$? if [ $rc -ne 0 ]; then # We are master/slave and test failed. Delete master score for this node as it is considered unhealthy because of this particular failed check. ocf_is_ms && ocf_promotion_score -D ocf_exit_reason "Failed to select from $test_table"; return $OCF_ERR_GENERIC; fi fi if ocf_is_ms && ! get_read_only; then ocf_log debug "MySQL monitor succeeded (master)"; # Always set master score for the master ocf_promotion_score -v $((${OCF_RESKEY_max_slave_lag}+1)) return $OCF_RUNNING_MASTER else ocf_log debug "MySQL monitor succeeded"; ocf_is_ms && ocf_promotion_score -v 1 return $OCF_SUCCESS fi } mysql_start() { local rc if ocf_is_ms; then # Initialize the ReaderVIP attribute, monitor will enable it set_reader_attr 0 fi mysql_common_status info if [ $? = $OCF_SUCCESS ]; then ocf_log info "MySQL already running" return $OCF_SUCCESS fi mysql_common_prepare_dirs # Uncomment to perform permission clensing # - not convinced this should be enabled by default # #chmod 0755 $OCF_RESKEY_datadir #chown -R $OCF_RESKEY_user $OCF_RESKEY_datadir #chgrp -R $OCF_RESKEY_group $OCF_RESKEY_datadir mysql_extra_params= if ocf_is_ms; then mysql_extra_params="--skip-slave-start" fi mysql_common_start $mysql_extra_params rc=$? if [ $rc != $OCF_SUCCESS ]; then return $rc fi if ocf_is_ms; then # We're configured as a stateful resource. We must start as # slave by default. At this point we don't know if the CRM has # already promoted a master. So, we simply start in read only # mode. set_read_only on # Now, let's see whether there is a master. We might be a new # node that is just joining the cluster, and the CRM may have # promoted a master before. master_host=`echo $OCF_RESKEY_CRM_meta_notify_master_uname|tr -d " "` if [ "$master_host" -a "$master_host" != ${NODENAME} ]; then ocf_log info "Changing MySQL configuration to replicate from $master_host." set_master start_slave if [ $? -ne 0 ]; then ocf_exit_reason "Failed to start slave" return $OCF_ERR_GENERIC fi else ocf_log info "No MySQL master present - clearing replication state" unset_master fi # We also need to set a master preference, otherwise Pacemaker # won't ever promote us in the absence of any explicit # preference set by the administrator. We choose a low # greater-than-zero preference. ocf_promotion_score -v 1 fi # Initial monitor action if [ -n "$OCF_RESKEY_test_table" -a -n "$OCF_RESKEY_test_user" -a -n "$OCF_RESKEY_test_passwd" ]; then OCF_CHECK_LEVEL=10 fi mysql_monitor rc=$? if [ $rc != $OCF_SUCCESS -a $rc != $OCF_RUNNING_MASTER ]; then ocf_exit_reason "Failed initial monitor action" return $rc fi ocf_log info "MySQL started" return $OCF_SUCCESS } mysql_stop() { if ocf_is_ms; then # clear preference for becoming master ocf_promotion_score -D # Remove VIP capability set_reader_attr 0 fi mysql_common_stop } mysql_promote() { local master_info if ( ! mysql_common_status err ); then return $OCF_NOT_RUNNING fi ocf_run $MYSQL $MYSQL_OPTIONS_REPL \ -e "STOP SLAVE" # Set Master Info in CIB, cluster level attribute update_data_master_status master_info="$(get_local_ip)|$(get_master_status File)|$(get_master_status Position)" ${CRM_ATTR_REPL_INFO} -v "$master_info" rm -f $tmpfile set_read_only off || return $OCF_ERR_GENERIC # Existing master gets a higher-than-default master preference, so # the cluster manager does not shuffle the master role around # unnecessarily ocf_promotion_score -v $((${OCF_RESKEY_max_slave_lag}+1)) # A master can accept reads set_reader_attr 1 return $OCF_SUCCESS } mysql_demote() { if ! mysql_common_status err; then return $OCF_NOT_RUNNING fi # Return master preference to default, so the cluster manager gets # a chance to select a new master ocf_promotion_score -v 1 } mysql_notify() { # If not configured as a Stateful resource, we make no sense of # notifications. if ! ocf_is_ms; then ocf_log info "This agent makes no use of notifications unless running in master/slave mode." return $OCF_SUCCESS fi local type_op type_op="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}" ocf_log debug "Received $type_op notification." case "$type_op" in 'pre-promote') # Nothing to do now here, new replication info not yet published ;; 'post-promote') # The master has completed its promotion. Now is a good # time to check whether our replication slave is working # correctly. master_host=`echo $OCF_RESKEY_CRM_meta_notify_promote_uname|tr -d " "` if [ "$master_host" = ${NODENAME} ]; then ocf_log info "This will be the new master, ignoring post-promote notification." else ocf_log info "Resetting replication" unset_master if [ $? -ne 0 ]; then return $OCF_ERR_GENERIC fi ocf_log info "Changing MySQL configuration to replicate from $master_host" set_master if [ $? -ne 0 ]; then return $OCF_ERR_GENERIC fi start_slave if [ $? -ne 0 ]; then ocf_exit_reason "Failed to start slave" return $OCF_ERR_GENERIC fi fi return $OCF_SUCCESS ;; 'pre-demote') demote_host=`echo $OCF_RESKEY_CRM_meta_notify_demote_uname|tr -d " "` if [ $demote_host = ${NODENAME} ]; then ocf_log info "post-demote notification for $demote_host" set_read_only on if [ $? -ne 0 ]; then ocf_exit_reason "Failed to set read-only"; return $OCF_ERR_GENERIC; fi # Must kill all existing user threads because they are still Read/write # in order for the slaves to complete the read of binlogs local tmpfile tmpfile=`mktemp ${HA_RSCTMP}/threads.${OCF_RESOURCE_INSTANCE}.XXXXXX` $MYSQL $MYSQL_OPTIONS_REPL \ -e "SHOW PROCESSLIST" > $tmpfile for thread in `awk '$0 !~ /Binlog Dump|system user|event_scheduler|SHOW PROCESSLIST/ && $0 ~ /^[0-9]/ {print $1}' $tmpfile` do ocf_run $MYSQL $MYSQL_OPTIONS_REPL \ -e "KILL ${thread}" done else ocf_log info "Ignoring post-demote notification execpt for my own demotion." fi return $OCF_SUCCESS ;; 'post-demote') demote_host=`echo $OCF_RESKEY_CRM_meta_notify_demote_uname|tr -d " "` if [ $demote_host = ${NODENAME} ]; then ocf_log info "Ignoring post-demote notification for my own demotion." return $OCF_SUCCESS fi ocf_log info "post-demote notification for $demote_host." # The former master has just been gracefully demoted. unset_master ;; *) return $OCF_SUCCESS ;; esac } ####################################################################### case "$1" in meta-data) meta_data exit $OCF_SUCCESS;; usage|help) usage exit $OCF_SUCCESS;; esac mysql_common_validate rc=$? LSB_STATUS_STOPPED=3 if [ $rc -ne 0 ]; then case "$1" in stop) ;; monitor) mysql_common_status "info" if [ $? -eq $OCF_SUCCESS ]; then # if validatation fails and pid is active, always treat this as an error ocf_exit_reason "environment validation failed, active pid is in unknown state." exit $OCF_ERR_GENERIC fi # validation failed and pid is not active, it's safe to say this instance is inactive. exit $OCF_NOT_RUNNING;; status) exit $LSB_STATUS_STOPPED;; *) exit $rc;; esac fi # What kind of method was invoked? case "$1" in start) mysql_start;; stop) mysql_stop;; status) mysql_common_status err;; monitor) mysql_monitor;; promote) mysql_promote;; demote) mysql_demote;; notify) mysql_notify;; validate-all) exit $OCF_SUCCESS;; *) usage exit $OCF_ERR_UNIMPLEMENTED;; esac # vi:sw=4:ts=4:et: diff --git a/heartbeat/redis.in b/heartbeat/redis.in index 6429477e1..1e541f13d 100755 --- a/heartbeat/redis.in +++ b/heartbeat/redis.in @@ -1,783 +1,782 @@ #!@BASH_SHELL@ # # Resource agent script for redis server. # # Copyright (c) 2013 Patrick Hemmer # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Parameter defaults OCF_RESKEY_bin_default="/usr/bin/redis-server" OCF_RESKEY_client_bin_default="/usr/bin/redis-cli" if [ -f "/etc/redis.conf" ]; then OCF_RESKEY_config_default="/etc/redis.conf" else OCF_RESKEY_config_default="/etc/redis/redis.conf" fi OCF_RESKEY_user_default="redis" OCF_RESKEY_rundir_default="/var/run/redis" OCF_RESKEY_pidfile_name_default="redis-server.pid" OCF_RESKEY_socket_name_default="redis.sock" OCF_RESKEY_port_default="6379" OCF_RESKEY_tunnel_host_default="127.0.0.1" OCF_RESKEY_tunnel_port_map_default="" OCF_RESKEY_wait_last_known_master_default="false" : ${OCF_RESKEY_bin=${OCF_RESKEY_bin_default}} : ${OCF_RESKEY_client_bin=${OCF_RESKEY_client_bin_default}} : ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} : ${OCF_RESKEY_user=${OCF_RESKEY_user_default}} : ${OCF_RESKEY_rundir=${OCF_RESKEY_rundir_default}} : ${OCF_RESKEY_pidfile_name=${OCF_RESKEY_pidfile_name_default}} : ${OCF_RESKEY_socket_name=${OCF_RESKEY_socket_name_default}} : ${OCF_RESKEY_port=${OCF_RESKEY_port_default}} : ${OCF_RESKEY_tunnel_host=${OCF_RESKEY_tunnel_host_default}} : ${OCF_RESKEY_tunnel_port_map=${OCF_RESKEY_tunnel_port_map_default}} : ${OCF_RESKEY_wait_last_known_master=${OCF_RESKEY_wait_last_known_master_default}} CHECK_SLAVE_STATE=0 REDIS_CHECK_DUMP="/usr/bin/redis-check-dump" REDIS_SERVER="$OCF_RESKEY_bin" REDIS_CLIENT="$OCF_RESKEY_client_bin" REDIS_CONFIG="$OCF_RESKEY_config" REDIS_USER="$OCF_RESKEY_user" REDIS_RUNDIR="$OCF_RESKEY_rundir" REDIS_PIDFILE="$OCF_RESKEY_rundir/$OCF_RESKEY_pidfile_name" REDIS_SOCKET="$OCF_RESKEY_rundir/$OCF_RESKEY_socket_name" REDIS_REPLICATION_PORT="$OCF_RESKEY_port" if ! [ -f $REDIS_CHECK_DUMP ]; then REDIS_CHECK_DUMP="$(which redis-check-dump 2>/dev/null)" fi if [ -z "$REDIS_CHECK_DUMP" ]; then REDIS_CHECK_DUMP="$(which redis-check-rdb 2>/dev/null)" fi if [ -r "$REDIS_CONFIG" ]; then REDIS_DUMP_DIR="$(grep "^\s*dir\s" < "$REDIS_CONFIG" | awk '{ print $2 }' 2>/dev/null)" REDIS_DUMP_FILE="$(grep "^\s*dbfilename\s" < "$REDIS_CONFIG" | awk '{ print $2 }' 2>/dev/null)" fi : ${REDIS_DUMP_DIR:=/var/lib/redis/} : ${REDIS_DUMP_FILE:=dump.rdb} redis_meta_data() { cat < 1.0 Resource agent script for redis server. This resource fully supports master/slave replication. The master preference of a node is determined by the 'slave_priority' parameter of the redis config. When taking the resource from 'unmanaged' to 'managed', the currently active master will be given a priority of 1000 (plus 1 for each active connection). The default 'slave_priority' is 100, so the master will stay master. For a slave to become master after converting the resource to managed, set a slave_priority greater than 1000. Redis server Path to \`redis-server\` Path to \`redis-server\` Path to \`redis-cli\` Path to \`redis-cli\` Path to 'redis.conf' Path to 'redis.conf' User to run redis as Redis user Directory to store socket and pid file in Redis var/run dir The filename to use for the pidfile. Will be created in the rundir. Should only be a basename, not a full path. Redis pidfile name The filename to use for the socket. Will be crated in the rundir. Should only be a basename, not a full path. Redis socket name Port for replication client to connect to on remote server Replication port When replication traffic is tunnelled, this is the host to target to forward outgoing traffic to the redis master. The resource agent configures the redis slave to target the master via tunnel_host:tunnel_port. Note that in order to enable replication traffic tunneling, parameter {tunnel_port_map} must be populated. Tunnel host for replication traffic A mapping of pacemaker node names to redis port number. To be used when redis servers need to tunnel replication traffic. On every node where the redis resource is running, the redis server listens to a different port. Each redis server can access its peers for replication traffic via a tunnel accessible at {tunnel_host}:port. The mapping the form of: pcmk1-name:port-for-redis1;pcmk2-name:port-for-redis2;pcmk3-name:port-for-redis3 where the redis resource started on node pcmk1-name would listen on port port-for-redis1 Mapping of Redis server name to redis port During redis cluster bootstrap, wait for the last known master to be promoted before allowing any other instances in the cluster to be promoted. This lessens the risk of data loss when persistent data is in use. Wait for last known master - EOI } INSTANCE_ATTR_NAME=$(echo "${OCF_RESOURCE_INSTANCE}" | awk -F : '{print $1}') CRM_ATTR_REPL_INFO="${HA_SBIN_DIR}/crm_attribute --type crm_config --name ${INSTANCE_ATTR_NAME}_REPL_INFO -s redis_replication" MASTER_HOST="" MASTER_ACTIVE_CACHED="" MASTER_ACTIVE="" CLI_HAVE_AUTH_WARNING=0 CLI_HAVE_ARG_NO_AUTH_WARNING=0 CLI_HAVE_ENV_AUTH=0 redis_cli_features() { CLI_VER=$("$REDIS_CLIENT" -v | awk '{print $NF}') # Starting with 4.0.10 there is a warning on stderr when using a pass # Starting with 5.0.0 there is an argument to silence the warning: --no-auth-warning # Starting with 5.0.3 there is an option to use REDISCLI_AUTH evironment variable for password, no warning in this case ocf_version_cmp $CLI_VER 5.0.3 res=$? if [[ res -ge 1 ]]; then CLI_HAVE_ENV_AUTH=1 fi ocf_version_cmp $CLI_VER 5.0.0 res=$? if [[ res -ge 1 ]]; then CLI_HAVE_ARG_NO_AUTH_WARNING=1 fi ocf_version_cmp $CLI_VER 4.0.10 res=$? if [[ res -ge 1 ]]; then CLI_HAVE_AUTH_WARNING=1 fi } master_is_active() { if [ -z "$MASTER_ACTIVE_CACHED" ]; then # determine if a master instance is already up and is healthy ocf_version_cmp "$OCF_RESKEY_crm_feature_set" "3.1.0" res=$? if [ -z "$OCF_RESKEY_crm_feature_set" ] || [ $res -eq 2 ]; then XMLOPT="--output-as=xml" ocf_version_cmp "$OCF_RESKEY_crm_feature_set" "3.2.0" if [ $? -eq 1 ]; then crm_mon_no_validation -1 $XMLOPT >/dev/null 2>&1 if [ $? -ne 0 ]; then XMLOPT="--as-xml" fi fi else XMLOPT="--as-xml" fi crm_mon_no_validation -1 $XMLOPT | grep -q -i -E "resource.*id=\"${OCF_RESOURCE_INSTANCE}\".* role=\"(Promoted|Master)\".* active=\"true\".* orphaned=\"false\".* failed=\"false\"" MASTER_ACTIVE=$? MASTER_ACTIVE_CACHED="true" fi return $MASTER_ACTIVE } set_master() { MASTER_HOST="$1" ${CRM_ATTR_REPL_INFO} -v "$1" -q } last_known_master() { if [ -z "$MASTER_HOST" ]; then MASTER_HOST="$(${CRM_ATTR_REPL_INFO} --query -q 2>/dev/null)" fi echo "$MASTER_HOST" } crm_master_reboot() { local node node=$(ocf_attribute_target) "${HA_SBIN_DIR}/crm_master" -N "$node" -l reboot "$@" } calculate_score() { perf_score="$1" connected_clients="$2" if ocf_is_true "$OCF_RESKEY_wait_last_known_master"; then # only set perferred score by slave_priority if # we are not waiting for the last known master. Otherwise # we want the agent to have complete control over the scoring. perf_score="" connected_clients="0" fi if [[ -z "$perf_score" ]]; then if [[ "$(last_known_master)" == "$NODENAME" ]]; then perf_score=1000 else perf_score=1 fi fi perf_score=$(( perf_score + connected_clients )) echo "$perf_score" } set_score() { local score local last_master score="$1" if ocf_is_true "$OCF_RESKEY_wait_last_known_master" && ! master_is_active; then last_master="$(last_known_master)" if [ -n "$last_master" ] && [[ "$last_master" != "$NODENAME" ]]; then ocf_log info "Postponing setting master score for ${NODENAME} until last known master instance [${last_master}] is promoted" return fi fi ocf_log debug "monitor: Setting master score to '$score'" crm_master_reboot -v "$score" } redis_client() { ocf_log debug "redis_client: '$REDIS_CLIENT' -s '$REDIS_SOCKET' $*" if [ -n "$clientpasswd" ]; then # Consider redis-cli features to choose optimal password passing method and warning filtering workaround if [[ CLI_HAVE_ENV_AUTH -eq 1 ]]; then REDISCLI_AUTH=$clientpasswd "$REDIS_CLIENT" -s "$REDIS_SOCKET" "$@" | sed 's/\r//' elif [[ CLI_HAVE_ARG_NO_AUTH_WARNING -eq 1 ]]; then "$REDIS_CLIENT" -s "$REDIS_SOCKET" --no-auth-warning -a "$clientpasswd" "$@" | sed 's/\r//' elif [[ CLI_HAVE_AUTH_WARNING -eq 1 ]]; then ("$REDIS_CLIENT" -s "$REDIS_SOCKET" -a "$clientpasswd" "$@" 2>&1 >&3 3>&- | grep -v "Using a password" >&2 3>&-) 3>&1 | sed 's/\r//' else "$REDIS_CLIENT" -s "$REDIS_SOCKET" -a "$clientpasswd" "$@" | sed 's/\r//' fi else "$REDIS_CLIENT" -s "$REDIS_SOCKET" "$@" | sed 's/\r//' fi } simple_status() { local pid if ! [ -f "$REDIS_PIDFILE" ]; then return $OCF_NOT_RUNNING fi pid="$(<"$REDIS_PIDFILE")" pidof $(basename "$REDIS_SERVER") | grep -q "\<$pid\>" || return $OCF_NOT_RUNNING ocf_log debug "monitor: redis-server running under pid $pid" return $OCF_SUCCESS } redis_monitor() { local res local master_name local last_known_master_port simple_status res=$? if (( res != OCF_SUCCESS )); then return $res fi typeset -A info while read line; do [[ "$line" == "#"* ]] && continue [[ "$line" != *":"* ]] && continue IFS=':' read -r key value <<< "$line" info[$key]="$value" done < <(redis_client info) if [[ -z "${info[role]}" ]]; then ocf_log err "monitor: Could not get role from \`$REDIS_CLIENT -s $REDIS_SOCKET info\`" return $OCF_ERR_GENERIC fi if ocf_is_ms; then # Here we see if a score has already been set. # If score isn't set we the redis setting 'slave_priority'. # If that isn't set, we default to 1000 for a master, and 1 for slave. # We then add 1 for each connected client score="$(crm_master_reboot -G --quiet 2>/dev/null)" if [[ -z "$score" ]]; then score=$(calculate_score "${info[slave_priority]}" "${info[connected_clients]}") set_score "$score" fi if [[ "${info[role]}" == "master" ]]; then if ocf_is_probe; then set_master "$NODENAME" fi return $OCF_RUNNING_MASTER fi if [ "$CHECK_SLAVE_STATE" -eq 1 ]; then if [[ "${info[master_link_status]}" != "up" ]]; then ocf_log info "monitor: Slave mode link has not yet been established (link=${info[master_link_status]})" return $OCF_ERR_GENERIC fi if [[ "${info[master_host]}" != "$(last_known_master)" ]]; then if [ -n "${OCF_RESKEY_tunnel_port_map}" ]; then master_name=$(port_to_redis_node ${info[master_port]}) last_known_master_port=$(redis_node_to_port $(last_known_master)) if [[ "${info[master_host]}" != "${OCF_RESKEY_tunnel_host}" ]] || [[ "${info[master_port]}" != "${last_known_master_port}" ]]; then ocf_log err "monitor: Slave mode current tunnelled connection to redis server does not match running master. tunnelled='${info[master_host]}:${info[master_port]} (${master_name})', running='$(last_known_master)'" return $OCF_ERR_GENERIC fi else ocf_log err "monitor: Slave mode current master does not match running master. current=${info[master_host]}, running=$(last_known_master)" return $OCF_ERR_GENERIC fi fi fi fi return $OCF_SUCCESS } redis_node_to_port() { local node=$1 echo "$OCF_RESKEY_tunnel_port_map" | tr ';' '\n' | tr -d ' ' | sed 's/:/ /' | awk -F' ' '$1=="'"$node"'" {print $2;exit}' } port_to_redis_node() { local port=$1 echo "$OCF_RESKEY_tunnel_port_map" | tr ';' '\n' | tr -d ' ' | sed 's/:/ /' | awk -F' ' '$2=="'"$port"'" {print $1;exit}' } get_tunnel_port_from_master() { local master_name=$1 crm_attribute --node "$master_name" -l forever --name ${INSTANCE_ATTR_NAME}-tunnel-port --query -q 2>/dev/null } get_master_from_tunnel_port() { local master_name=$1 crm_attribute --node "$master_name" -l forever --name ${INSTANCE_ATTR_NAME}-tunnel-port --query -q 2>/dev/null } check_dump_file() { if ! have_binary "$REDIS_CHECK_DUMP"; then return 0 fi $REDIS_CHECK_DUMP ${REDIS_DUMP_DIR}/${REDIS_DUMP_FILE} 2>&1 } redis_start() { local size redis_monitor status=$? if (( status == OCF_SUCCESS )) || (( status == OCF_RUNNING_MASTER )); then ocf_log info "start: redis is already running" return $OCF_SUCCESS fi [[ ! -d "$REDIS_RUNDIR" ]] && mkdir -p "$REDIS_RUNDIR" chown -R "$REDIS_USER" "$REDIS_RUNDIR" if have_binary "restorecon"; then restorecon -Rv "$REDIS_RUNDIR" fi # check for 0 byte database dump file. This is an unrecoverable start # condition that we can avoid by deleting the 0 byte database file. if [ -f "${REDIS_DUMP_DIR}/${REDIS_DUMP_FILE}" ]; then size="$(stat --format "%s" ${REDIS_DUMP_DIR}/${REDIS_DUMP_FILE})" if [ "$?" -eq "0" ] && [ "$size" -eq "0" ]; then ocf_log notice "Detected 0 byte ${REDIS_DUMP_FILE}, deleting zero length file to avoid start failure." rm -f "${REDIS_DUMP_DIR}/${REDIS_DUMP_FILE}" fi fi ocf_log info "start: $REDIS_SERVER --daemonize yes --unixsocket '$REDIS_SOCKET' --pidfile '$REDIS_PIDFILE'" output="$(su "$REDIS_USER" -s /bin/sh -c "cd '$REDIS_RUNDIR'; exec '$REDIS_SERVER' '$REDIS_CONFIG' --daemonize yes --unixsocket '$REDIS_SOCKET' --pidfile '$REDIS_PIDFILE'" 2>&1)" while true; do # wait for redis to start typeset -A info while read line; do [[ "$line" == "#"* ]] && continue [[ "$line" != *":"* ]] && continue IFS=':' read -r key value <<< "$line" info[$key]="$value" done < <(redis_client info) if (( info[loading] == 0 )); then break elif (( info[loading] == 1 )); then sleep "${info[loading_eta_seconds]}" elif pidof $(basename "$REDIS_SERVER") >/dev/null; then # unknown error, but the process still exists. # This check is mainly because redis daemonizes before it starts listening, causing `redis-cli` to fail # See https://github.com/antirez/redis/issues/2368 # It's possible that the `pidof` will pick up a different redis, but in that case, the start operation will just time out sleep 1 else check_output="$(check_dump_file)" ocf_log err "start: Unknown error waiting for redis to start. redis-check-dump output=${check_output//$'\n'/; }" return $OCF_ERR_GENERIC fi done while ! [ -s "$REDIS_PIDFILE" ]; do ocf_log debug "start: Waiting for pid file '$REDIS_PIDFILE' to appear" sleep 1 done ocf_is_ms && redis_demote # pacemaker expects resources to start in slave mode redis_monitor status=$? if (( status == OCF_SUCCESS )) || (( status == OCF_RUNNING_MASTER )); then return $OCF_SUCCESS fi check_output="$(check_dump_file)" ocf_log err "start: Unknown error starting redis. redis-server output=${output//$'\n'/; } redis-check-dump output=${check_output//$'\n'/; }" return $status } redis_stop() { redis_monitor status=$? if (( status == OCF_NOT_RUNNING )); then ocf_log info "stop: redis is already stopped" crm_master_reboot -D return $OCF_SUCCESS fi pid="$(<"$REDIS_PIDFILE")" kill -TERM "$pid" while true; do simple_status status=$? if (( status == OCF_NOT_RUNNING )); then crm_master_reboot -D return $OCF_SUCCESS fi sleep 1 done } redis_promote() { redis_monitor status=$? if (( status == OCF_RUNNING_MASTER )); then ocf_log info "promote: Already running as master" set_master "$NODENAME" return $OCF_SUCCESS elif (( status != OCF_SUCCESS )); then ocf_log err "promote: Node is not running as a slave" return $OCF_ERR_GENERIC fi redis_client slaveof no one redis_monitor status=$? if (( status == OCF_RUNNING_MASTER )); then set_master "$NODENAME" return $OCF_SUCCESS fi ocf_log err "promote: Unknown error while promoting to master (status=$status)" return $OCF_ERR_GENERIC } redis_demote() { local master_host local master_port local tunnel_port # client kill is only supported in Redis 2.8.12 or greater version=$(redis_client -v | awk '{print $NF}') ocf_version_cmp "$version" "2.8.11" client_kill=$? CHECK_SLAVE_STATE=1 redis_monitor status=$? if (( status == OCF_SUCCESS )); then ocf_log info "demote: Already running as slave" return $OCF_SUCCESS elif (( status == OCF_NOT_RUNNING )); then ocf_log err "demote: Failed to demote, redis not running." return $OCF_NOT_RUNNING fi master_host="$(last_known_master)" master_port="${REDIS_REPLICATION_PORT}" # The elected master has to remain a slave during startup. # During this period a placeholder master host is assigned. if [ -z "$master_host" ] || [[ "$master_host" == "$NODENAME" ]]; then CHECK_SLAVE_STATE=0 master_host="no-such-master" elif ! master_is_active; then # no master has been promoted yet. we'll be notified when the # master starts. CHECK_SLAVE_STATE=0 master_host="no-such-master" fi if [ -n "${OCF_RESKEY_tunnel_port_map}" ]; then # master_host can be the special marker "no-such-master" # while a master is being selected. In this case, no # tunnel port is returned, but this is not fatal. tunnel_port=$(redis_node_to_port "$master_host") if [ -n "$tunnel_port" ]; then ocf_log info "demote: Setting master to '$master_host' via local tunnel '${OCF_RESKEY_tunnel_host}' on port '$tunnel_port'" master_host="${OCF_RESKEY_tunnel_host}" master_port="$tunnel_port" fi else ocf_log info "demote: Setting master to '$master_host'" fi redis_client slaveof "$master_host" "$master_port" # Wait forever for the slave to connect to the master and finish the # sync. Timeout is controlled by Pacemaker "op start timeout=XX". # # hint: redis master_link_status will only come "up" when # the SYNC with the master has completed. # This can take an arbitraty time (data) and should # only be parametrized by the start operation timeout # by the administrator, not by this resource agent code while true; do # Wait infinite if replication is syncing # Then start/demote operation timeout determines timeout if [ "$client_kill" -eq 2 ]; then redis_client CLIENT PAUSE 2000 fi redis_monitor status=$? if (( status == OCF_SUCCESS )); then if [ "$client_kill" -eq 2 ]; then redis_client CLIENT KILL type normal fi return $OCF_SUCCESS fi sleep 1 done ocf_log err "demote: Unexpected error setting slave mode (status=$status)" return $OCF_ERR_GENERIC } redis_notify() { mode="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}" case "$mode" in post-demote|post-promote) # change the master redis_monitor status=$? if (( status == OCF_SUCCESS )); then # were a slave # calling demote updates the slave's connection # to the newly appointed Master instance. redis_demote fi ;; esac return $OCF_SUCCESS } redis_validate() { if [[ ! -x "$REDIS_SERVER" ]]; then ocf_log err "validate: $REDIS_SERVER does not exist or is not executable" return $OCF_ERR_INSTALLED fi if [[ ! -x "$REDIS_CLIENT" ]]; then ocf_log err "validate: $REDIS_CLIENT does not exist or is not executable" return $OCF_ERR_INSTALLED fi if [[ ! -f "$REDIS_CONFIG" ]]; then ocf_log err "validate: $REDIS_CONFIG does not exist" return $OCF_ERR_CONFIGURED fi if ! getent passwd "$REDIS_USER" &>/dev/null; then ocf_log err "validate: $REDIS_USER is not a valid user" return $OCF_ERR_CONFIGURED fi } if [ "$__OCF_ACTION" != "meta-data" ]; then NODENAME=$(ocf_attribute_target) fi if [ -r "$REDIS_CONFIG" ]; then clientpasswd="$(sed -n -e 's/^\s*requirepass\s*\(.*\)\s*$/\1/p' < $REDIS_CONFIG | tail -n 1)" fi if [ "$__OCF_ACTION" = "start" ]; then redis_validate || exit $? fi redis_cli_features ocf_log debug "action=${1:-$__OCF_ACTION} notify_type=${OCF_RESKEY_CRM_meta_notify_type} notify_operation=${OCF_RESKEY_CRM_meta_notify_operation} master_host=${OCF_RESKEY_CRM_meta_notify_master_uname} slave_host=${OCF_RESKEY_CRM_meta_notify_slave_uname} promote_host=${OCF_RESKEY_CRM_meta_notify_promote_uname} demote_host=${OCF_RESKEY_CRM_meta_notify_demote_uname}; params: bin=${OCF_RESKEY_bin} client_bin=${OCF_RESKEY_client_bin} config=${OCF_RESKEY_config} user=${OCF_RESKEY_user} rundir=${OCF_RESKEY_rundir} port=${OCF_RESKEY_port}" case "${1:-$__OCF_ACTION}" in status|monitor) redis_monitor ;; start) redis_start ;; stop) redis_stop ;; restart) redis_stop && redis_start ;; promote) redis_promote ;; demote) redis_demote ;; notify) redis_notify ;; meta-data) redis_meta_data ;; validate-all) redis_validate ;; *) echo "Usage: $0 {monitor|start|stop|restart|promote|demote|notify|validate-all|meta-data}" exit $OCF_ERR_UNIMPLEMENTED ;; esac status=$? ocf_log debug "exit_status=$status" exit $status