diff --git a/heartbeat/dnsupdate b/heartbeat/dnsupdate index 50ed8ec7a..2db2aa45c 100755 --- a/heartbeat/dnsupdate +++ b/heartbeat/dnsupdate @@ -1,272 +1,276 @@ #!/bin/bash # # # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License v2 # # Copyright (c) 2014 SUSE Linux Products GmbH, Lars Marowsky-Brée # All Rights Reserved. # ####################################################################### : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### # TODO: # - Should setting CNAMEs be supported? # - Should multiple A records be supported? usage() { cat <<-! usage: $0 {start|stop|status|monitor|meta-data|validate-all} ! } meta_data() { cat < 1.0 This resource agent manages IP take-over via dynamic DNS updates. IP take-over via dynamic DNS update The hostname whose IP address will need to be updated. Hostname to update IP address to set. IP address to set Time to live, in seconds, for the DNS record. This affects how soon DNS updates propagate. It should be a reasonable compromise between update speed and DNS server load. If using booth, the ticket timeout is a good start. TTL for the DNS record The file containing the shared secret needed to update the DNS record. Please see the nsupdate man page for the exact syntax. nsupdate key file Which DNS server to send these updates for. When no server is provided, this defaults to the master server for the correct zone. DNS server to contact Port number on the DNS server. Note: due to a limitation in the nsupdate command, this option will only take effect if you also specify the DNS server! Port number on the DNS server Additional options to be passed to nsupdate. Additional nsupdate options Whether or not to actively remove records on stop. This is not needed for normal operation, since the site taking over the IP address will delete all previous records. Remove A record on stop END } dnsupdate_status() { # The resource is considered active if the current IP # address is returned as the only response. local record=$(dig ${dig_opts} ${hostname}. A +short 2>/dev/null) if [ "$record" = "$ip" ]; then return $OCF_SUCCESS fi return $OCF_NOT_RUNNING } dnsupdate_monitor() { if ocf_is_probe ; then # return $OCF_NOT_RUNNING fi dnsupdate_status } dnsupdate_start() { if dnsupdate_status ; then ocf_log info "$hostname already resolves to $ip" return $OCF_SUCCESS fi ocf_log info "Updating DNS records for $hostname" ( if [ -n "$dns_server" ]; then echo "server ${dns_server} ${dns_serverport}" fi echo "update delete $hostname A" echo "update add $hostname ${OCF_RESKEY_ttl} A $ip" echo "send" ) | nsupdate ${nsupdate_opts} dnsupdate_monitor return $? } dnsupdate_stop() { if ocf_is_true "${OCF_RESKEY_unregister_on_stop}" && dnsupdate_status ; then ocf_log info "Unregistering $hostname with $ip from DNS server" ( if [ -n "$dns_server" ]; then echo "server ${dns_server} ${dns_serverport}" fi echo "update delete $hostname A $ip" echo "send" ) | nsupdate ${nsupdate_opts} dnsupdate_monitor if [ $? -ne $OCF_NOT_RUNNING ]; then ocf_log warn "Unregistering failed!" # There's no point in invoking a stop failure # here. If another site takes over the record, # it'll delete all previous entries anyway. fi fi return $OCF_SUCCESS } dnsupdate_validate() { hostname=${OCF_RESKEY_hostname} ip=${OCF_RESKEY_ip} dig_opts="" dns_server=${OCF_RESKEY_server} : ${OCF_RESKEY_serverport:="53"} dns_serverport=${OCF_RESKEY_serverport} : ${OCF_RESKEY_ttl:="300"} - nsupdate_opts=${OCF_RESKEY_opts} + nsupdate_opts=${OCF_RESKEY_nsupdate_opts} + if [ -z "$nsupdate_opts" -a -n "$OCF_RESKEY_opts" ]; then + nsupdate_opts=${OCF_RESKEY_opts} + ocf_log warn "opts was never an advertised parameter, please use nsupdate_opts" + fi if [ -z "$hostname" ]; then ocf_log err "No hostname specified." exit $OCF_ERR_CONFIGURED fi if [ -z "$ip" ]; then ocf_log err "No IP specified." exit $OCF_ERR_CONFIGURED fi if ! ocf_is_decimal $OCF_RESKEY_ttl ; then ocf_log err "ttl $OCF_RESKEY_ttl is not valid" exit $OCF_ERR_CONFIGURED fi if ! ocf_is_decimal $dns_serverport ; then ocf_log err "serverport $dns_serverport is not valid" exit $OCF_ERR_CONFIGURED fi dig_opts+=" -p ${dns_serverport}" if [ -n "$dns_server" ]; then dig_opts+=" @${dns_server}" fi if [ -n "$OCF_RESKEY_keyfile" ]; then if [ ! -f ${OCF_RESKEY_keyfile} ]; then ocf_log err "keyfile $OCF_RESKEY_keyfile does not exist" exit $OCF_ERR_CONFIGURED fi nsupdate_opts+=" -k $OCF_RESKEY_keyfile" fi } if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi case $1 in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; esac check_binary dig check_binary nsupdate dnsupdate_validate case $1 in start) dnsupdate_start ;; stop) dnsupdate_stop ;; monitor) dnsupdate_monitor ;; status) dnsupdate_status ;; validate-all) # We've already run this exit $OCF_SUCCESS ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/heartbeat/ora-common.sh b/heartbeat/ora-common.sh index 5bbb163e6..1580babe1 100644 --- a/heartbeat/ora-common.sh +++ b/heartbeat/ora-common.sh @@ -1,84 +1,88 @@ # ora-common.sh # # Description: Common code for oracle and oralsnr resource agents # # # Author: Dejan Muhamedagic # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # Copyright: (C) 2012 Dejan Muhamedagic, SUSE/Attachmate # # Gather up information about our oracle instance rmtmpfiles() { rm -f $TMPFILES } ora_common_getconfig() { ORACLE_SID=$1 + # optional, defaults to whatever is in oratab ORACLE_HOME=$2 + # optional, defaults to the owner of ORACLE_HOME ORACLE_OWNER=$3 + # optional, defaults to $ORACLE_HOME/network/admin + # (only the oralsnr may provide and use this one) TNS_ADMIN=$4 # get ORACLE_HOME from /etc/oratab if not set [ x = "x$ORACLE_HOME" ] && ORACLE_HOME=`awk -F: "/^$ORACLE_SID:/"'{print $2}' /etc/oratab` # there a better way to find out ORACLE_OWNER? [ x = "x$ORACLE_OWNER" ] && ORACLE_OWNER=`ls -ld $ORACLE_HOME/. 2>/dev/null | awk 'NR==1{print $3}'` # There are use-cases were users want to be able to set a custom TMS_ADMIN path. # When TNS_ADMIN is not provided, use the default path. [ x = "x$TNS_ADMIN" ] && TNS_ADMIN=$ORACLE_HOME/network/admin LD_LIBRARY_PATH=$ORACLE_HOME/lib LIBPATH=$ORACLE_HOME/lib PATH=$ORACLE_HOME/bin:$ORACLE_HOME/dbs:$PATH export ORACLE_SID ORACLE_HOME ORACLE_OWNER TNS_ADMIN export LD_LIBRARY_PATH LIBPATH ORA_ENVF=`mktemp` dumporaenv > $ORA_ENVF chmod 644 $ORA_ENVF TMPFILES="$ORA_ENVF" trap "rmtmpfiles" EXIT } ora_common_validate_all() { # Let's make sure a few important things are set... if [ x = "x$ORACLE_HOME" ]; then ocf_log info "ORACLE_HOME not set" return $OCF_ERR_INSTALLED fi if [ x = "x$ORACLE_OWNER" ]; then ocf_log info "ORACLE_OWNER not set" return $OCF_ERR_INSTALLED fi US=`id -u -n` if [ $US != root -a $US != $ORACLE_OWNER ] then ocf_log err "$0 must be run as root or $ORACLE_OWNER" return $OCF_ERR_PERM fi return 0 } dumporaenv() { cat< 1.0 Resource script for oracle. Manages an Oracle Database instance as an HA resource. Manages an Oracle Database instance The Oracle SID (aka ORACLE_SID). sid The Oracle home directory (aka ORACLE_HOME). If not specified, then the SID along with its home should be listed in /etc/oratab. home The Oracle owner (aka ORACLE_OWNER). If not specified, then it is set to the owner of file \$ORACLE_HOME/dbs/*\${ORACLE_SID}.ora. If this does not work for you, just set it explicitely. user Monitoring user name. Every connection as sysdba is logged in an audit log. This can result in a large number of new files created. A new user is created (if it doesn't exist) in the start action and subsequently used in monitor. It should have very limited rights. Make sure that the password for this user does not expire. monuser Password for the monitoring user. Make sure that the password for this user does not expire. monpassword Profile used by the monitoring user. If the profile does not exist, it will be created with a non-expiring password. monprofile Sometimes IPC objects (shared memory segments and semaphores) belonging to an Oracle instance might be left behind which prevents the instance from starting. It is not easy to figure out which shared segments belong to which instance, in particular when more instances are running as same user. What we use here is the "oradebug" feature and its "ipc" trace utility. It is not optimal to parse the debugging information, but I am not aware of any other way to find out about the IPC information. In case the format or wording of the trace report changes, parsing might fail. There are some precautions, however, to prevent stepping on other peoples toes. There is also a dumpinstipc option which will make us print the IPC objects which belong to the instance. Use it to see if we parse the trace file correctly. Three settings are possible: - none: don't mess with IPC and hope for the best (beware: you'll probably be out of luck, sooner or later) - instance: try to figure out the IPC stuff which belongs to the instance and remove only those (default; should be safe) - orauser: remove all IPC belonging to the user which runs the instance (don't use this if you run more than one instance as same user or if other apps running as this user use IPC) The default setting "instance" should be safe to use, but in that case we cannot guarantee that the instance will start. In case IPC objects were already left around, because, for instance, someone mercilessly killing Oracle processes, there is no way any more to find out which IPC objects should be removed. In that case, human intervention is necessary, and probably _all_ instances running as same user will have to be stopped. The third setting, "orauser", guarantees IPC objects removal, but it does that based only on IPC objects ownership, so you should use that only if every instance runs as separate user. Please report any problems. Suggestions/fixes welcome. ipcrm The clear of the backup mode of ORACLE. clear_backupmode How to stop Oracle is a matter of taste it seems. The default method ("checkpoint/abort") is: alter system checkpoint; shutdown abort; This should be the fastest safe way bring the instance down. If you find "shutdown abort" distasteful, set this attribute to "immediate" in which case we will shutdown immediate; If you still think that there's even better way to shutdown an Oracle instance we are willing to listen. shutdown_method END } # # methods: What methods/operations do we support? # oracle_methods() { cat <<-! start stop status monitor dumpinstipc showdbstat cleanup validate-all methods meta-data usage ! } # # Run commands as the Oracle owner... # execsql() { if [ "$US" = "$ORACLE_OWNER" ]; then sqlplus -S /nolog else su - $ORACLE_OWNER -s /bin/sh -c ". $ORA_ENVF; sqlplus -S /nolog" fi } # # Run commands in the oracle admin sqlplus... # common_sql_opts() { cat</dev/null; then return 0 fi output=`dbasql mk_mon_profile show_mon_profile` if echo "$output" | grep -iw "^$MONPROFILE" >/dev/null; then return 0 else ocf_log err "could not create $MONPROFILE oracle profile" ocf_log err "sqlplus output: $output" return 1 fi } check_mon_user() { local output local output2 output=`dbasql show_mon_user` if echo "$output" | grep -iw "^$MONUSR" >/dev/null; then if echo "$output" | grep -w "EXPIRED" >/dev/null; then dbasql reset_mon_user_password fi output=`dbasql show_mon_user_profile` if echo "$output" | grep -iw "^$MONPROFILE" >/dev/null; then return 0 else output=`dbasql set_mon_user_profile` output2=`dbasql show_mon_user_profile` if echo "$output2" | grep -iw "^$MONPROFILE" >/dev/null; then return 0 fi ocf_log err "could not set profile for $MONUSR oracle user" ocf_log err "sqlplus output: $output( $output2 )" return 1 fi fi output=`dbasql mk_mon_user show_mon_user` if echo "$output" | grep -iw "^$MONUSR" >/dev/null; then return 0 else ocf_log err "could not create $MONUSR oracle user" ocf_log err "sqlplus output: $output" return 1 fi } # # print the output of dbstat (for debugging) # showdbstat() { echo "Full output:" dbstat | execsql echo "Stripped output:" echo "<`dbasql dbstat`>" } # # IPC stuff: not overly complex, but quite involved :-/ # # Part 1: Oracle other_trace_junk() { echo $1 | sed 's/trc$/trm/' } dumpinstipc() { local output tracef output=`dbasql getipc` # filename in the 2nd line tracef=`echo "$output" | awk 'NR==2' | grep '^/.*trc$'` if [ "$tracef" ]; then echo $tracef else ocf_log warn "'dbasql getipc' failed: $output" return 1 fi } parseipc() { local inf=$1 if [ ! -f "$1" ]; then ocf_log warn "$1: no such ipc trace file" return 1 fi awk ' $3 == "Shmid" {n=1;next} n { if( $3~/^[0-9]+$/ ) print $3; n=0 } ' $inf | sort -u | sed 's/^/m:/' awk ' /Semaphore List/ {insems=1;next} insems { for( i=1; i<=NF; i++ ) if( $i~/^[0-9]+$/ ) print $i; } /system semaphore information/ {exit} ' $inf | sort -u | sed 's/^/s:/' TMPFILES="$TMPFILES $inf `other_trace_junk $inf`" } # Part 2: OS (ipcs,ipcrm) filteroraipc() { # this portable? grep -w $ORACLE_OWNER | awk '{print $2}' } ipcdesc() { local what=$1 case $what in m) echo "shared memory segment";; s) echo "semaphore";; q) echo "message queue";; esac } rmipc() { local what=$1 id=$2 ipcs -$what | filteroraipc | grep -iw $id >/dev/null 2>&1 || return ocf_log info "Removing `ipcdesc $what` $id." ipcrm -$what $id } ipcrm_orauser() { local what id for what in m s q; do for id in `ipcs -$what | filteroraipc`; do rmipc $what $id done done } ipcrm_instance() { local ipcobj for ipcobj; do rmipc `echo $ipcobj | sed 's/:/ /'` done } # # oracle_status: is the Oracle instance running? # # quick check to see if the instance is up is_proc_running() { ps -ef | grep -wiqs "[^ ]*[_]pmon_${ORACLE_SID}" } # instance in OPEN state? instance_live() { local status=`monsql_one dbstat` [ "$status" = OPEN ] && return 0 status=`dbasql_one dbstat` if [ "$status" = OPEN ]; then return 0 else ocf_log info "$ORACLE_SID instance state is not OPEN (dbstat output: $status)" return 1 fi } ora_cleanup() { #rm -fr /tmp/.oracle #??? rm -f `ls $ORACLE_HOME/dbs/lk* | grep -i "$ORACLE_SID\$"` #return case $IPCRM in none) ;; instance) ipcrm_instance $* ;; orauser) ipcrm_orauser $* ;; esac } oracle_getconfig() { - ora_common_getconfig "$OCF_RESKEY_sid" "$OCF_RESKEY_home" "$OCF_RESKEY_user" "$OCF_RESKEY_tns_admin" + ora_common_getconfig "$OCF_RESKEY_sid" "$OCF_RESKEY_home" "$OCF_RESKEY_user" clear_backupmode=${OCF_RESKEY_clear_backupmode:-"false"} shutdown_method=${OCF_RESKEY_shutdown_method:-"checkpoint/abort"} IPCRM=${OCF_RESKEY_ipcrm:-"instance"} } # # oracle_start: Start the Oracle instance # # NOTE: We handle instance in the MOUNTED and STARTED states # efficiently # We *do not* handle instance in the restricted or read-only # mode, i.e. it appears as running, but its availability is # "not for general use" # oracle_start() { local status output if is_proc_running; then status="`dbasql_one dbstat`" case "$status" in "OPEN") : nothing to be done, we can leave right now ocf_log info "Oracle instance $ORACLE_SID already running" return $OCF_SUCCESS ;; "STARTED") output=`dbasql dbmount` ;; "MOUNTED") : we proceed if mounted ;; *) # status unknown output=`dbasql dbstop dbstart_mount` ;; esac else output="`dbasql dbstart_mount`" # try to cleanup in case of # ORA-01081: cannot start already-running ORACLE - shut it down first if echo "$output" | grep ORA-01081 >/dev/null 2>&1; then ocf_log info "ORA-01081 error found, trying to cleanup oracle (dbstart_mount output: $output)" ora_cleanup output=`dbasql dbstart_mount` fi fi # oracle instance should be mounted. status="`dbasql_one dbstat`" case "$status" in "MOUNTED") ;; *) : error!! ocf_log err "oracle $ORACLE_SID can not be mounted (status: $status)" return $OCF_ERR_GENERIC ;; esac # It is examined whether mode is "online backup mode", # and if it is true, makes clear the mode. # Afterwards, DB is opened. if is_clear_backupmode_set && is_instance_in_backup_mode; then clear_backup_mode fi output=`dbasql dbopen` # check/create the monitor profile if ! check_mon_profile; then return $OCF_ERR_GENERIC fi # check/create the monitor user if ! check_mon_user; then return $OCF_ERR_GENERIC fi if ! is_proc_running; then ocf_log err "oracle process not running: $output" return $OCF_ERR_GENERIC elif ! instance_live; then ocf_log err "oracle instance $ORACLE_SID not started: $output" return $OCF_ERR_GENERIC else : cool, we are up and running ocf_log info "Oracle instance $ORACLE_SID started: $output" return $OCF_SUCCESS fi } # # oracle_stop: Stop the Oracle instance # oracle_stop() { local status output ipc="" if is_proc_running; then [ "$IPCRM" = "instance" ] && ipc=$(parseipc `dumpinstipc`) output=`dbasql dbstop` else ocf_log info "Oracle instance $ORACLE_SID already stopped" return $OCF_SUCCESS fi ocf_stop_processes TERM $PROCS_CLEANUP_TIME `proc_pids` # kill the procs if they hanged if is_proc_running; then ocf_log err "Oracle instance $ORACLE_SID not stopped: $output" return $OCF_ERR_GENERIC else ocf_log info "Oracle instance $ORACLE_SID stopped: $output" sleep 1 # give em a chance to cleanup ocf_log info "Cleaning up for $ORACLE_SID" ora_cleanup "$ipc" return $OCF_SUCCESS fi } # # oracle_monitor: Can the Oracle instance do anything useful? # oracle_monitor() { if ! is_proc_running; then ocf_log info "oracle process not running" return $OCF_NOT_RUNNING fi if ! instance_live; then ocf_log err "oracle instance $ORACLE_SID is down" return $OCF_ERR_GENERIC fi #ocf_log info "Oracle instance $ORACLE_SID is alive" return $OCF_SUCCESS } # other supported actions oracle_status() { if is_proc_running then echo Oracle instance $ORACLE_SID is running exit $OCF_SUCCESS else echo Oracle instance $ORACLE_SID is stopped exit $OCF_NOT_RUNNING fi } oracle_dumpinstipc() { is_proc_running && parseipc `dumpinstipc` } oracle_showdbstat() { showdbstat } oracle_cleanup() { if [ "$IPCRM" = "instance" ]; then ora_cleanup $(parseipc `dumpinstipc`) else ora_cleanup fi } oracle_validate_all() { case "${shutdown_method}" in "immediate") ;; "checkpoint/abort") ;; *) ocf_log err "unsupported shutdown_method, please read meta-data" return $OCF_ERR_CONFIGURED ;; esac case "${IPCRM}" in "none"|"instance"|"orauser") ;; *) ocf_log err "unsupported ipcrm setting, please read meta-data" return $OCF_ERR_CONFIGURED ;; esac ora_common_validate_all } # used in ora-common.sh show_procs() { ps -e -o pid,args | grep -i "[o]ra[a-zA-Z0-9_]*$ORACLE_SID$" } proc_pids() { show_procs | awk '{print $1}'; } PROCS_CLEANUP_TIME="30" MONUSR=${OCF_RESKEY_monuser:-$OCF_RESKEY_monuser_default} MONPWD=${OCF_RESKEY_monpassword:-$OCF_RESKEY_monpassword_default} MONPROFILE=${OCF_RESKEY_monprofile_default:-$OCF_RESKEY_monprofile_default} MONUSR=$(echo $MONUSR | awk '{print toupper($0)}') MONPROFILE=$(echo $MONPROFILE | awk '{print toupper($0)}') OCF_REQUIRED_PARAMS="sid" OCF_REQUIRED_BINARIES="sqlplus" ocf_rarun $* # # vim:tabstop=4:shiftwidth=4:textwidth=0:wrapmargin=0 diff --git a/heartbeat/sg_persist b/heartbeat/sg_persist index c4af069dd..aa34ed898 100755 --- a/heartbeat/sg_persist +++ b/heartbeat/sg_persist @@ -1,673 +1,673 @@ #!/bin/bash # # # OCF Resource Agent compliant PERSISTENT SCSI RESERVATION resource script. # # # Copyright (c) 2011 Evgeny Nifontov and lwang@suse.com All Rights Reserved. # # "Heartbeat drbd OCF Resource Agent: 2007, Lars Marowsky-Bree" was used # as example of multistate OCF Resource Agent. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # # # OCF instance parameters -# OCF_RESKEY_sg_persist_binary +# OCF_RESKEY_binary # OCF_RESKEY_devs # OCF_RESKEY_required_devs_nof # OCF_RESKEY_reservation_type # OCF_RESKEY_master_score_base # OCF_RESKEY_master_score_dev_factor # OCF_RESKEY_master_score_delay # # TODO # # 1) PROBLEM: devices which were not accessible during 'start' action, will be never registered/reserved # TODO: 'Master' and 'Salve' registers new devs in 'monitor' action # TODO: 'Master' reserves new devs in 'monitor' action ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # set default values : ${sg_persist_binary="sg_persist"} # binary name for the resource : ${devs=""} # device list : ${required_devs_nof=1} # number of required devices : ${reservation_type=1} # reservation type : ${master_score_base=0} # master score base : ${master_score_dev_factor=100} # device factor for master score : ${master_score_delay=30} # delay for master score ####################################################################### meta_data() { cat < 1.1 This resource agent manages SCSI PERSISTENT RESERVATIONS. "sg_persist" from sg3_utils is used, please see its documentation. Should be used as multistate (Master/Slave) resource Slave registers its node id ("crm_node -i") as reservation key ( --param-rk ) on each device in the "devs" list. Master reservs all devices from "devs" list with reservation "--prout-type" value from "reservation_type" parameter. Manages SCSI PERSISTENT RESERVATIONS -The name of the binary that manages the resource +The name of the binary that manages the resource. -the binay name of the resource - +the binary name of the resource + Device list. Multiple devices can be listed with blank space as separator. Shell wildcars are allowed. device list Minimum number of "working" devices from device list 1) existing 2) "sg_persist --read-keys \$device" works (Return code 0) resource actions "start","monitor","promote" and "validate-all" return "\$OCF_ERR_INSTALLED" if the actual number of "working" devices is less then "required_devs_nof". resource actions "stop" and "demote" tries to remove reservations and registration keys from all working devices, but always return "\$OCF_SUCCESS" minimum number of working devices reservation type reservation type master_score_base value "master_score_base" value is used in "master_score" calculation: master_score = \$master_score_base + \$master_score_dev_factor * \$working_devs if set to bigger value in sg_persist resource configuration on some node, this node will be "preferred" for master role. base master_score value Working device factor in master_score calculation each "working" device provides additional value to "master_score", so the node that sees more devices will be preferred for the "Master"-role Setting it to 0 will disable this behavior. working device factor in master_score calculation master/slave decreases/increases its master_score after delay of \$master_score_delay seconds so if some device gets inaccessible, the slave decreases its master_score first and the resource will no be watched and after this device reappears again the master increases its master_score first this can work only if the master_score_delay is bigger then monitor interval on both master and slave Setting it to 0 will disable this behavior. master_score decrease/increase delay time END exit $OCF_SUCCESS } sg_persist_init() { if ! ocf_is_root ; then ocf_log err "You must be root to perform this operation." exit $OCF_ERR_PERM fi - : ${SG_PERSIST:="$sg_persist_binary"} + SG_PERSIST=${OCF_RESKEY_binary:-"$sg_persist_binary"} check_binary $SG_PERSIST ROLE=$OCF_RESKEY_CRM_meta_role NOW=$(date +%s) RESOURCE="${OCF_RESOURCE_INSTANCE}" MASTER_SCORE_VAR_NAME="master-${OCF_RESOURCE_INSTANCE}" PENDING_VAR_NAME="pending-$MASTER_SCORE_VAR_NAME" #only works with corocync CRM_NODE="${HA_SBIN_DIR}/crm_node" NODE_ID_DEC=$($CRM_NODE -i) NODE=$($CRM_NODE -l | $GREP $NODE_ID_DEC) NODE=${NODE#$NODE_ID_DEC } NODE=${NODE% *} MASTER_SCORE_ATTRIBUTE="${HA_SBIN_DIR}/crm_attribute --lifetime=reboot --name=$MASTER_SCORE_VAR_NAME --node=$NODE" CRM_MASTER="${HA_SBIN_DIR}/crm_master --lifetime=reboot" PENDING_ATTRIBUTE="${HA_SBIN_DIR}/crm_attribute --lifetime=reboot --name=$PENDING_VAR_NAME --node=$NODE" NODE_ID_HEX=$(printf '0x%x' $NODE_ID_DEC) if [ -z "$NODE_ID_HEX" ]; then ocf_log err "Couldn't get node id with \"$CRM_NODE\"" exit $OCF_ERR_INSTALLED fi ocf_log debug "$RESOURCE: NODE:$NODE, ROLE:$ROLE, NODE_ID DEC:$NODE_ID_DEC HEX:$NODE_ID_HEX" DEVS=${OCF_RESKEY_devs:=$devs} REQUIRED_DEVS_NOF=${OCF_RESKEY_required_devs_nof:=$required_devs_nof} RESERVATION_TYPE=${OCF_RESKEY_reservation_type:=$reservation_type} MASTER_SCORE_BASE=${OCF_RESKEY_master_score_base:=$master_score_base} MASTER_SCORE_DEV_FACTOR=${OCF_RESKEY_master_score_dev_factor:=$master_score_dev_factor} MASTER_SCORE_DELAY=${OCF_RESKEY_master_score_delay:=$master_score_delay} ocf_log debug "$RESOURCE: DEVS=$DEVS" ocf_log debug "$RESOURCE: REQUIRED_DEVS_NOF=$REQUIRED_DEVS_NOF" ocf_log debug "$RESOURCE: RESERVATION_TYPE=$RESERVATION_TYPE" ocf_log debug "$RESOURCE: MASTER_SCORE_BASE=$MASTER_SCORE_BASE" ocf_log debug "$RESOURCE: MASTER_SCORE_DEV_FACTOR=$MASTER_SCORE_DEV_FACTOR" ocf_log debug "$RESOURCE: MASTER_SCORE_DELAY=$MASTER_SCORE_DELAY" #expand path wildcards DEVS=$(echo $DEVS) if [ -z "$DEVS" ]; then ocf_log err "\"devs\" not defined" exit $OCF_ERR_INSTALLED fi sg_persist_check_devs sg_persist_get_status } sg_persist_action_usage() { cat <&1` if [ $? -eq $OCF_SUCCESS ]; then WORKING_DEVS+=($dev) echo $READ_KEYS | $GREP $NODE_ID_HEX >/dev/null if [ $? -eq 0 ]; then REGISTERED_DEVS+=($dev) READ_RESERVATION=`ocf_run $SG_PERSIST --in --read-reservation $dev 2>&1` if [ $? -eq $OCF_SUCCESS ]; then echo $READ_RESERVATION | $GREP $NODE_ID_HEX >/dev/null if [ $? -eq 0 ]; then RESERVED_DEVS+=($dev) fi reservation_key=`echo $READ_RESERVATION | $GREP -o 'Key=0x[0-9a-f]*' | $GREP -o '0x[0-9a-f]*'` if [ -n "$reservation_key" ]; then DEVS_WITH_RESERVATION+=($dev) RESERVATION_KEYS+=($reservation_key) fi fi fi fi done WORKING_DEVS_NOF=${#WORKING_DEVS[*]} ocf_log debug "$RESOURCE: working devices: `sg_persist_echo_array ${WORKING_DEVS[*]}`" ocf_log debug "$RESOURCE: number of working devices: $WORKING_DEVS_NOF" ocf_log debug "$RESOURCE: registered devices: `sg_persist_echo_array ${REGISTERED_DEVS[*]}`" ocf_log debug "$RESOURCE: reserved devices: `sg_persist_echo_array ${RESERVED_DEVS[*]}`" ocf_log debug "$RESOURCE: devices with reservation: `sg_persist_echo_array ${DEVS_WITH_RESERVATION[*]}`" ocf_log debug "$RESOURCE: reservation keys: `sg_persist_echo_array ${RESERVATION_KEYS[*]}`" MASTER_SCORE=$(($MASTER_SCORE_BASE + $MASTER_SCORE_DEV_FACTOR*$WORKING_DEVS_NOF)) ocf_log debug "$RESOURCE: master_score: $MASTER_SCORE_BASE + $MASTER_SCORE_DEV_FACTOR*$WORKING_DEVS_NOF = $MASTER_SCORE" } sg_persist_check_devs() { for dev in $DEVS do if [ -e "$dev" ]; then EXISTING_DEVS+=($dev) fi done EXISTING_DEVS_NOF=${#EXISTING_DEVS[*]} if [ $EXISTING_DEVS_NOF -lt $REQUIRED_DEVS_NOF ]; then ocf_log err "Number of existing devices=$EXISTING_DEVS_NOF less then required_devs_nof=$REQUIRED_DEVS_NOF" exit $OCF_ERR_INSTALLED fi } sg_persist_is_registered() { for registered_dev in ${REGISTERED_DEVS[*]} do if [ "$registered_dev" == "$1" ]; then return 0 fi done return 1 } sg_persist_get_reservation_key() { for array_index in ${!DEVS_WITH_RESERVATION[*]} do if [ "${DEVS_WITH_RESERVATION[$array_index]}" == "$1" ]; then echo ${RESERVATION_KEYS[$array_index]} return 0 fi done echo "" } sg_persist_echo_array() { str_count=0 arr_str="" for str in "$@" do arr_str="$arr_str[$str_count]:$str " str_count=$(($str_count+1)) done echo $arr_str } sg_persist_parse_act_pending() { ACT_PENDING_TS=0 ACT_PENDING_SCORE=0 if [ -n "$ACT_PENDING" ]; then ACT_PENDING_TS=${ACT_PENDING%%_*} ACT_PENDING_SCORE=${ACT_PENDING##*_} fi } sg_persist_clear_pending() { if [ -n "$ACT_PENDING" ]; then DO_PENDING_UPDATE="YES" NEW_PENDING="" fi } sg_persist_new_master_score() { DO_MASTER_SCORE_UPDATE="YES" NEW_MASTER_SCORE=$1 } sg_persist_new_pending() { DO_PENDING_UPDATE="YES" NEW_PENDING=$1 } # Functions invoked by resource manager actions sg_persist_action_start() { ocf_run $MASTER_SCORE_ATTRIBUTE --update=$MASTER_SCORE ocf_run $PENDING_ATTRIBUTE --update="" if [ $WORKING_DEVS_NOF -lt $REQUIRED_DEVS_NOF ]; then ocf_log err "$RESOURCE: Number of working devices=$WORKING_DEVS_NOF less then required_devs_nof=$REQUIRED_DEVS_NOF" exit $OCF_ERR_GENERIC fi for dev in ${WORKING_DEVS[*]} do if sg_persist_is_registered $dev ; then : OK else ocf_run $SG_PERSIST --out --register --param-rk=0 --param-sark=$NODE_ID_HEX $dev if [ $? -ne $OCF_SUCCESS ] then return $OCF_ERR_GENERIC fi fi done return $OCF_SUCCESS } sg_persist_action_stop() { if [ ${#REGISTERED_DEVS[*]} -eq 0 ]; then ocf_log debug "$RESOURCE stop: already no registrations" else # Clear preference for becoming master ocf_run $MASTER_SCORE_ATTRIBUTE --delete ocf_run $PENDING_ATTRIBUTE --delete for dev in ${REGISTERED_DEVS[*]} do ocf_run $SG_PERSIST --out --register --param-rk=$NODE_ID_HEX --param-sark=0 $dev done fi return $OCF_SUCCESS } sg_persist_action_monitor() { ACT_MASTER_SCORE=`ocf_run -q $MASTER_SCORE_ATTRIBUTE --query --quiet 2>&1` ocf_log debug "$RESOURCE monitor: ACT_MASTER_SCORE=$ACT_MASTER_SCORE" ACT_PENDING=`ocf_run $PENDING_ATTRIBUTE --query --quiet 2>&1` ocf_log debug "$RESOURCE monitor: ACT_PENDING=$ACT_PENDING" sg_persist_parse_act_pending ocf_log debug "$RESOURCE monitor: ACT_PENDING_TS=$ACT_PENDING_TS" ocf_log debug "$RESOURCE monitor: ACT_PENDING_VAL=$ACT_PENDING_SCORE" ocf_log debug "$MASTER_SCORE, $ACT_MASTER_SCORE, $ROLE" DO_MASTER_SCORE_UPDATE="NO" DO_PENDING_UPDATE="NO" if [ -n "$ACT_MASTER_SCORE" ] then if [ $ACT_MASTER_SCORE -eq $MASTER_SCORE ]; then sg_persist_clear_pending else case $ROLE in Master) if [ $MASTER_SCORE -lt $ACT_MASTER_SCORE ]; then if [ -n "$ACT_PENDING" ] then if [ $(($NOW-$ACT_PENDING_TS-$MASTER_SCORE_DELAY)) -ge 0 ]; then sg_persist_new_master_score $MASTER_SCORE sg_persist_clear_pending fi else if [ $MASTER_SCORE_DELAY -eq 0 ]; then sg_persist_new_master_score $MASTER_SCORE sg_persist_clear_pending else sg_persist_new_pending "${NOW}_${MASTER_SCORE}" fi fi else sg_persist_new_master_score $MASTER_SCORE sg_persist_clear_pending fi ;; Slave) if [ $MASTER_SCORE -gt $ACT_MASTER_SCORE ]; then if [ -n "$ACT_PENDING" ]; then if [ $(($NOW-$ACT_PENDING_TS-$MASTER_SCORE_DELAY)) -ge 0 ]; then sg_persist_new_master_score $MASTER_SCORE sg_persist_clear_pending fi else if [ $MASTER_SCORE_DELAY -eq 0 ]; then sg_persist_new_master_score $MASTER_SCORE sg_persist_clear_pending else sg_persist_new_pending "${NOW}_${MASTER_SCORE}" fi fi else sg_persist_new_master_score $MASTER_SCORE sg_persist_clear_pending fi ;; *) ;; esac fi fi if [ $DO_MASTER_SCORE_UPDATE == "YES" ]; then ocf_run $MASTER_SCORE_ATTRIBUTE --update=$NEW_MASTER_SCORE fi if [ $DO_PENDING_UPDATE == "YES" ]; then ocf_run $PENDING_ATTRIBUTE --update=$NEW_PENDING fi if [ ${#REGISTERED_DEVS[*]} -eq 0 ]; then ocf_log debug "$RESOURCE monitor: no registrations" return $OCF_NOT_RUNNING fi if [ ${#RESERVED_DEVS[*]} -eq ${#WORKING_DEVS[*]} ]; then return $OCF_RUNNING_MASTER fi if [ ${#REGISTERED_DEVS[*]} -eq ${#WORKING_DEVS[*]} ]; then if [ $RESERVATION_TYPE -eq 7 ] || [ $RESERVATION_TYPE -eq 8 ]; then if [ ${#DEVS_WITH_RESERVATION[*]} -gt 0 ]; then return $OCF_RUNNING_MASTER else return $OCF_SUCCESS fi else return $OCF_SUCCESS fi fi ocf_log err "$RESOURCE monitor: unexpected state" return $OCF_ERR_GENERIC } sg_persist_action_promote() { if [ ${#RESERVED_DEVS[*]} -gt 0 ]; then ocf_log info "$RESOURCE promote: already master" return $OCF_SUCCESS fi for dev in ${WORKING_DEVS[*]} do reservation_key=`sg_persist_get_reservation_key $dev` case $RESERVATION_TYPE in 1|3|5|6) if [ -z "$reservation_key" ]; then ocf_run $SG_PERSIST --out --reserve --param-rk=$NODE_ID_HEX --prout-type=$RESERVATION_TYPE $dev if [ $? -ne $OCF_SUCCESS ]; then return $OCF_ERR_GENERIC fi else ocf_run $SG_PERSIST --out --preempt --param-sark=$reservation_key --param-rk=$NODE_ID_HEX --prout-type=$RESERVATION_TYPE $dev if [ $? -ne $OCF_SUCCESS ]; then return $OCF_ERR_GENERIC fi fi ;; 7|8) if [ -z "$reservation_key" ]; then ocf_run $SG_PERSIST --out --reserve --param-rk=$NODE_ID_HEX --prout-type=$RESERVATION_TYPE $dev if [ $? -ne $OCF_SUCCESS ] then return $OCF_ERR_GENERIC fi else ocf_log info "$RESOURCE promote: there already exist an reservation holder, all registrants become reservation holders" return $OCF_SUCCESS fi ;; *) return $OCF_ERR_ARGS ;; esac done return $OCF_SUCCESS } sg_persist_action_demote() { case $RESERVATION_TYPE in 1|3|5|6) if [ ${#RESERVED_DEVS[*]} -eq 0 ]; then ocf_log info "$RESOURCE demote: already slave" return $OCF_SUCCESS fi for dev in ${RESERVED_DEVS[*]} do ocf_run $SG_PERSIST --out --release --param-rk=$NODE_ID_HEX --prout-type=$RESERVATION_TYPE $dev if [ $? -ne $OCF_SUCCESS ]; then return $OCF_ERR_GENERIC fi done ;; 7|8) #in case of 7/8, --release won't release the reservation unless unregister the key. if [ ${#REGISTERED_DEVS[*]} -eq 0 ]; then ocf_log info "$RESOURCE demote: already slave" return $OCF_SUCCESS fi for dev in ${REGISTERED_DEVS[*]} do ocf_run $SG_PERSIST --out --register --param-rk=$NODE_ID_HEX --param-sark=0 $dev if [ $? -ne $OCF_SUCCESS ]; then return $OCF_ERR_GENERIC fi done ;; *) return $OCF_ERR_ARGS ;; esac return $OCF_SUCCESS } sg_persist_action_notify() { local n_type="$OCF_RESKEY_CRM_meta_notify_type" local n_op="$OCF_RESKEY_CRM_meta_notify_operation" set -- $OCF_RESKEY_CRM_meta_notify_active_resource local n_active="$#" set -- $OCF_RESKEY_CRM_meta_notify_stop_resource local n_stop="$#" set -- $OCF_RESKEY_CRM_meta_notify_start_resource local n_start="$#" ocf_log debug "$RESOURCE notify: $n_type for $n_op - counts: active $n_active - starting $n_start - stopping $n_stop" return $OCF_SUCCESS } sg_persist_action_validate_all () { if [ "$OCF_RESKEY_CRM_meta_master_max" != "1" ] && [ "$RESERVATION_TYPE" != "7" ] && [ "$RESERVATION_TYPE" != "8" ]; then ocf_log err "Master options misconfigured." exit $OCF_ERR_CONFIGURED fi return $OCF_SUCCESS } if [ $# -ne 1 ]; then echo "Incorrect parameter count." sg_persist_action_usage exit $OCF_ERR_ARGS fi ACTION=$1 case $ACTION in meta-data) meta_data ;; validate-all) sg_persist_init sg_persist_action_validate_all ;; start|promote|monitor|stop|demote) ocf_log debug "$RESOURCE: starting action \"$ACTION\"" sg_persist_init sg_persist_action_$ACTION exit $? ;; notify) sg_persist_action_notify exit $? ;; usage|help) sg_persist_action_usage exit $OCF_SUCCESS ;; *) sg_persist_action_usage exit $OCF_ERR_ARGS ;; esac