diff --git a/heartbeat/SAPDatabase b/heartbeat/SAPDatabase index 641bd4086..14b619fe7 100755 --- a/heartbeat/SAPDatabase +++ b/heartbeat/SAPDatabase @@ -1,341 +1,355 @@ #!/bin/sh # # SAPDatabase # # Description: Manages any type of SAP supported database instance # as a High-Availability OCF compliant resource. # # Author: Alexander Krauth, October 2006 # Support: linux@sap.com # License: GNU General Public License (GPL) # Copyright: (c) 2006, 2007, 2010, 2012 Alexander Krauth # # An example usage: # See usage() function below for more details... # # OCF instance parameters: # OCF_RESKEY_SID # OCF_RESKEY_DIR_EXECUTABLE (optional, well known directories will be searched by default) # OCF_RESKEY_DBTYPE (mandatory, one of the following values: ORA,ADA,DB6,SYB,HDB) # OCF_RESKEY_DBINSTANCE (optional, Database instance name, if not equal to SID) # OCF_RESKEY_DBOSUSER (optional, the Linux user that owns the database processes on operating system level) +# OCF_RESKEY_START_TIMEOUT (optional, Timeout passed to saphostctrl for the StartDatabase action) +# OCF_RESKEY_STOP_TIMEOUT (optional, Timeout passed to saphostctrl for the StopDatabase action) # OCF_RESKEY_STRICT_MONITORING (optional, activate application level monitoring - with Oracle a failover will occur in case of an archiver stuck) # OCF_RESKEY_AUTOMATIC_RECOVER (optional, automatic startup recovery, default is false) # OCF_RESKEY_MONITOR_SERVICES (optional, default is to monitor all database services) # OCF_RESKEY_PRE_START_USEREXIT (optional, lists a script which can be executed before the resource is started) # OCF_RESKEY_POST_START_USEREXIT (optional, lists a script which can be executed after the resource is started) # OCF_RESKEY_PRE_STOP_USEREXIT (optional, lists a script which can be executed before the resource is stopped) # OCF_RESKEY_POST_STOP_USEREXIT (optional, lists a script which can be executed after the resource is stopped) # Deprecated parameters: # OCF_RESKEY_NETSERVICENAME # OCF_RESKEY_DBJ2EE_ONLY # OCF_RESKEY_JAVA_HOME # OCF_RESKEY_DIR_BOOTSTRAP # OCF_RESKEY_DIR_SECSTORE # OCF_RESKEY_DB_JARS # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### SH=/bin/sh usage() { methods=`sapdatabase_methods` methods=`echo $methods | tr ' ' '|'` cat <<-! usage: $0 ($methods) $0 manages a SAP database of any type as an HA resource. Currently Oracle, MaxDB, DB/2 UDB, Sybase ASE and SAP HANA Database are supported. ABAP databases as well as JAVA only databases are supported. The 'start' operation starts the instance. The 'stop' operation stops the instance. The 'status' operation reports whether the instance is running The 'monitor' operation reports whether the instance seems to be working The 'recover' operation tries to recover the instance after a crash (instance will be stopped first!) The 'validate-all' operation reports whether the parameters are valid The 'methods' operation reports on the methods $0 supports ! } meta_data() { cat < 2.14 Manages a SAP database instance as an HA resource. Resource script for SAP databases. It manages a SAP database of any type as an HA resource. The purpose of the resource agent is to start, stop and monitor the database instance of a SAP system. Together with the RDBMS system it will also control the related network service for the database. Like the Oracle Listener and the xserver of MaxDB. The resource agent expects a standard SAP installation of the database and therefore needs less parameters to configure. The resource agent supports the following databases: - Oracle 10.2, 11.2 and 12 - DB/2 UDB for Windows and Unix 9.x - SAP-DB / MaxDB 7.x - Sybase ASE 15.7 - SAP HANA Database since 1.00 - with SAP node 1625203 (http://sdn.sap.com) In fact this resource agent does not run any database commands directly. It uses the SAP standard process SAPHostAgent to control the database. The SAPHostAgent must be installed on each cluster node locally. It will not work, if you try to run the SAPHostAgent also as a HA resource. Please follow SAP note 1031096 for the installation of SAPHostAgent. The required minimum version of SAPHostAgent is: Release: 7.20 Patch Number: 90 or compile time after: Dec 17 2011 The unique database system identifier. e.g. P01 Database system ID The full qualified path where to find saphostexec and saphostctrl. Usually you can leave this empty. Then the default: /usr/sap/hostctrl/exe is used. path of saphostexec and saphostctrl The name of the database vendor you use. Set either: ADA, DB6, ORA, SYB, HDB database vendor Must be used for special database implementations, when database instance name is not equal to the SID (e.g. Oracle DataGuard) Database instance name, if not equal to SID The parameter can be set, if the database processes on operating system level are not executed with the default user of the used database type. Defaults: ADA=taken from /etc/opt/sdb, DB6=db2SID, ORA=oraSID and oracle, SYB=sybSID, HDB=SIDadm the Linux user that owns the database processes on operating system level Deprecated - do not use anymore. This parameter will be deleted in one of the next releases. deprecated - do not use anymore + + + Timeout passed to saphostctrl for function StartDatabase. If is not set will use be default saphostctrl timeout. + StartDatabase timeout parameter + + + + Timeout passed to saphostctrl for function StopDatabase. If is not set will use be default saphostctrl timeout. + StopDatabase timeout parameter + Deprecated - do not use anymore. This parameter will be deleted in one of the next releases. deprecated - do not use anymore Deprecated - do not use anymore. This parameter will be deleted in one of the next releases. deprecated - do not use anymore This controls how the resource agent monitors the database. If set to true, it will use 'saphostctrl -function GetDatabaseStatus' to test the database state. If set to false, only operating system processes are monitored. Activates application level monitoring If you set this to true, 'saphostctrl -function StartDatabase' will always be called with the '-force' option. Enable or disable automatic startup recovery Defines which services are monitored by the SAPDatabase resource agent, if STRICT_MONITORING is set to true. Service names must correspond with the output of the 'saphostctrl -function GetDatabaseStatus' command. Database services to monitor Deprecated - do not use anymore. This parameter will be deleted in one of the next releases. deprecated - do not use anymore Deprecated - do not use anymore. This parameter will be deleted in one of the next releases. deprecated - do not use anymore Deprecated - do not use anymore. This parameter will be deleted in one of the next releases. deprecated - do not use anymore The full qualified path where to find a script or program which should be executed before this resource gets started. path to a pre-start script The full qualified path where to find a script or program which should be executed after this resource got started. path to a post-start script The full qualified path where to find a script or program which should be executed before this resource gets stopped. path to a pre-start script The full qualified path where to find a script or program which should be executed after this resource got stopped. path to a post-start script END } # # methods: What methods/operations do we support? # sapdatabase_methods() { cat <<-! start stop status monitor recover validate-all methods meta-data usage ! } # # sapuserexit : Many SAP customers need some additional processes/tools to run their SAP systems. # This specialties do not allow a totally generic SAP cluster resource agent. # Someone should write a resource agent for each additional process you need, if it # is required to monitor that process within the cluster manager. To enable # you to extent this resource agent without developing a new one, this user exit # was introduced. # sapuserexit() { NAME="$1" VALUE="$2" if [ -n "$VALUE" ] then if have_binary "$VALUE" then ocf_log info "Calling userexit ${NAME} with customer script file ${VALUE}" "$VALUE" >/dev/null 2>&1 ocf_log info "Exiting userexit ${NAME} with customer script file ${VALUE}, returncode: $?" else ocf_log warn "Attribute ${NAME} is set to ${VALUE}, but this file is not executable" fi fi return $OCF_SUCCESS } # # saphostctrl_installed # saphostctrl_installed() { OCF_RESKEY_DIR_EXECUTABLE_default="/usr/sap/hostctrl/exe" : ${OCF_RESKEY_DIR_EXECUTABLE=${OCF_RESKEY_DIR_EXECUTABLE_default}} SAPHOSTCTRL="${OCF_RESKEY_DIR_EXECUTABLE}/saphostctrl" SAPHOSTEXEC="${OCF_RESKEY_DIR_EXECUTABLE}/saphostexec" SAPHOSTSRV="${OCF_RESKEY_DIR_EXECUTABLE}/sapstartsrv" SAPHOSTOSCOL="${OCF_RESKEY_DIR_EXECUTABLE}/saposcol" have_binary $SAPHOSTCTRL && have_binary $SAPHOSTEXEC } # # 'main' starts here... # if ( [ $# -ne 1 ] ) then usage exit $OCF_ERR_ARGS fi # These operations don't require OCF instance parameters to be set case "$1" in meta-data) meta_data exit $OCF_SUCCESS;; usage) usage exit $OCF_SUCCESS;; methods) sapdatabase_methods exit $?;; *);; esac if ! ocf_is_root then ocf_log err "$0 must be run as root" exit $OCF_ERR_PERM fi # mandatory parameter check if [ -z "$OCF_RESKEY_SID" ]; then ocf_log err "Please set OCF_RESKEY_SID to the SAP system id!" exit $OCF_ERR_ARGS fi SID=`echo "$OCF_RESKEY_SID"` +START_TIMEOUT=`echo "$OCF_RESKEY_START_TIMEOUT"` +STOP_TIMEOUT=`echo "$OCF_RESKEY_STOP_TIMEOUT"` if [ -z "$OCF_RESKEY_DBTYPE" ]; then ocf_log err "Please set OCF_RESKEY_DBTYPE to the database vendor specific tag (ADA,DB6,ORA,SYB,HDB)!" exit $OCF_ERR_ARGS fi DBTYPE=`echo "$OCF_RESKEY_DBTYPE" | tr '[:lower:]' '[:upper:]'` # source functions and initialize global variables if saphostctrl_installed; then . ${OCF_FUNCTIONS_DIR}/sapdb.sh else if [ -n "${OCF_RESKEY_DBOSUSER}" ]; then ocf_exit_reason "Usage of parameter OCF_RESKEY_DBOSUSER is not possible without having SAP Host-Agent installed" exit $OCF_ERR_ARGS fi . ${OCF_FUNCTIONS_DIR}/sapdb-nosha.sh fi sapdatabase_init # we always want to fall to the faster status method in case of a probe by the cluster ACTION=$1 if ocf_is_probe then ACTION=status fi # What kind of method was invoked? case "$ACTION" in start|stop|status|recover) sapdatabase_$ACTION exit $?;; monitor) sapdatabase_monitor $OCF_RESKEY_STRICT_MONITORING exit $?;; validate-all) sapdatabase_validate exit $?;; *) sapdatabase_methods exit $OCF_ERR_UNIMPLEMENTED;; esac diff --git a/heartbeat/sapdb.sh b/heartbeat/sapdb.sh index 8661f26c3..d1ffdcbac 100755 --- a/heartbeat/sapdb.sh +++ b/heartbeat/sapdb.sh @@ -1,367 +1,375 @@ # # sapdb.sh - for systems having SAPHostAgent installed # (sourced by SAPDatabase) # # Description: This code is separated from the SAPDatabase agent to # introduce new functions for systems which having # SAPHostAgent installed. # Someday it might be merged back into SAPDatabase agein. # # Author: Alexander Krauth, September 2010 # Support: linux@sap.com # License: GNU General Public License (GPL) # Copyright: (c) 2010, 2012 Alexander Krauth # # # background_check_saphostexec : Run a request to saphostexec in a separat task, to be able to react on a hanging process # background_check_saphostexec() { timeout=600 count=0 $SAPHOSTCTRL -function ListDatabases >/dev/null 2>&1 & pid=$! while kill -0 $pid > /dev/null 2>&1 do sleep 0.1 count=$(( $count + 1 )) if [ $count -ge $timeout ]; then kill -9 $pid >/dev/null 2>&1 ocf_log warn "saphostexec did not respond to the method 'ListDatabases' within 60 seconds" return $OCF_ERR_GENERIC # Timeout fi done # child already has finished, now evaluate it's returncode wait $pid } # # cleanup_saphostexec : make sure to cleanup the SAPHostAgent in case of any # misbehavior # cleanup_saphostexec() { pkill -9 -f "$SAPHOSTEXEC" pkill -9 -f "$SAPHOSTSRV" oscolpid=`pgrep -f "$SAPHOSTOSCOL"` # we check saposcol pid, because it # might not run under control of # saphostexec # cleanup saposcol shared memory, otherwise it will not start again if [ -n "$oscolpid" ];then kill -9 $oscolpid oscolipc=`ipcs -m | grep "4dbe " | awk '{print $2}'` if [ -n "$oscolipc" ]; then ipcrm -m $oscolipc fi fi # removing the unix domain socket file as it might have wrong permissions or # ownership - it will be recreated by saphostexec during next start [ -r /tmp/.sapstream1128 ] && rm -f /tmp/.sapstream1128 } # # check_saphostexec : Before using saphostctrl we make sure that the # saphostexec is running on the current node. # check_saphostexec() { chkrc=$OCF_SUCCESS running=`pgrep -f "$SAPHOSTEXEC" | wc -l` if [ $running -gt 0 ]; then if background_check_saphostexec; then return $OCF_SUCCESS else ocf_log warn "saphostexec did not respond to the method 'ListDatabases' correctly (rc=$?), it will be killed now" running=0 fi fi if [ $running -eq 0 ]; then ocf_log warn "saphostexec is not running on node `hostname`, it will be started now" cleanup_saphostexec output=`$SAPHOSTEXEC -restart 2>&1` # now make sure the daemon has been started and is able to respond srvrc=1 while [ $srvrc -ne 0 -a `pgrep -f "$SAPHOSTEXEC" | wc -l` -gt 0 ] do sleep 1 background_check_saphostexec srvrc=$? done if [ $srvrc -eq 0 ] then ocf_log info "saphostexec on node `hostname` was restarted !" chkrc=$OCF_SUCCESS else ocf_log error "saphostexec on node `hostname` could not be started! - $output" chkrc=$OCF_ERR_GENERIC fi fi return $chkrc } # # sapdatabase_start : Start the SAP database # sapdatabase_start() { check_saphostexec rc=$? if [ $rc -eq $OCF_SUCCESS ] then sapuserexit PRE_START_USEREXIT "$OCF_RESKEY_PRE_START_USEREXIT" DBINST="" if [ -n "$OCF_RESKEY_DBINSTANCE" ] then DBINST="-dbinstance $OCF_RESKEY_DBINSTANCE " fi FORCE="" if ocf_is_true $OCF_RESKEY_AUTOMATIC_RECOVER then FORCE="-force" fi DBOSUSER="" if [ -n "$OCF_RESKEY_DBOSUSER" ] then DBOSUSER="-dbuser $OCF_RESKEY_DBOSUSER " fi - output=`$SAPHOSTCTRL -function StartDatabase -dbname $SID -dbtype $DBTYPE $DBINST $DBOSUSER $FORCE -service` - + if [ -z $START_TIMEOUT ] + then + output=`$SAPHOSTCTRL -function StartDatabase -dbname $SID -dbtype $DBTYPE $DBINST $FORCE -service` + else + output=`$SAPHOSTCTRL -function StartDatabase -dbname $SID -dbtype $DBTYPE $DBINST -timeout $STOP_TIMEOUT $FORCE -service` + fi sapdatabase_monitor 1 rc=$? if [ $rc -eq 0 ] then ocf_log info "SAP database $SID started: $output" rc=$OCF_SUCCESS sapuserexit POST_START_USEREXIT "$OCF_RESKEY_POST_START_USEREXIT" else ocf_log err "SAP database $SID start failed: $output" rc=$OCF_ERR_GENERIC fi fi return $rc } # # sapdatabase_stop: Stop the SAP database # sapdatabase_stop() { check_saphostexec rc=$? if [ $rc -eq $OCF_SUCCESS ] then sapuserexit PRE_STOP_USEREXIT "$OCF_RESKEY_PRE_STOP_USEREXIT" DBINST="" if [ -n "$OCF_RESKEY_DBINSTANCE" ] then DBINST="-dbinstance $OCF_RESKEY_DBINSTANCE " fi DBOSUSER="" if [ -n "$OCF_RESKEY_DBOSUSER" ] then DBOSUSER="-dbuser $OCF_RESKEY_DBOSUSER " fi - output=`$SAPHOSTCTRL -function StopDatabase -dbname $SID -dbtype $DBTYPE $DBINST $DBOSUSER -force -service` - + if [ -z $STOP_TIMEOUT ] + then + output=`$SAPHOSTCTRL -function StopDatabase -dbname $SID -dbtype $DBTYPE $DBINST -force -service` + else + output=`$SAPHOSTCTRL -function StopDatabase -dbname $SID -dbtype $DBTYPE $DBINST -timeout $STOP_TIMEOUT -force -service` + fi if [ $? -eq 0 ] then ocf_log info "SAP database $SID stopped: $output" rc=$OCF_SUCCESS else ocf_log err "SAP database $SID stop failed: $output" rc=$OCF_ERR_GENERIC fi fi sapuserexit POST_STOP_USEREXIT "$OCF_RESKEY_POST_STOP_USEREXIT" return $rc } # # sapdatabase_monitor: Can the given database instance do anything useful? # sapdatabase_monitor() { strict=$1 rc=$OCF_SUCCESS if ! ocf_is_true $strict then sapdatabase_status rc=$? else check_saphostexec rc=$? if [ $rc -eq $OCF_SUCCESS ] then count=0 DBINST="" if [ -n "$OCF_RESKEY_DBINSTANCE" ] then DBINST="-dbinstance $OCF_RESKEY_DBINSTANCE " fi if [ -n "$OCF_RESKEY_DBOSUSER" ] then DBOSUSER="-dbuser $OCF_RESKEY_DBOSUSER " fi output=`$SAPHOSTCTRL -function GetDatabaseStatus -dbname $SID -dbtype $DBTYPE $DBINST $DBOSUSER` # we have to parse the output, because the returncode doesn't tell anything about the instance status for SERVICE in `echo "$output" | grep -i 'Component[ ]*Name *[:=] [A-Za-z][A-Za-z0-9_]* (' | sed 's/^.*Component[ ]*Name *[:=] *\([A-Za-z][A-Za-z0-9_]*\).*$/\1/i'` do COLOR=`echo "$output" | grep -i "Component[ ]*Name *[:=] *$SERVICE (" | sed 's/^.*Status *[:=] *\([A-Za-z][A-Za-z0-9_]*\).*$/\1/i'` STATE=0 case $COLOR in Running) STATE=$OCF_SUCCESS;; *) STATE=$OCF_NOT_RUNNING;; esac SEARCH=`echo "$OCF_RESKEY_MONITOR_SERVICES" | sed 's/\+/\\\+/g' | sed 's/\./\\\./g'` if [ `echo "$SERVICE" | egrep -c "$SEARCH"` -eq 1 ] then if [ $STATE -eq $OCF_NOT_RUNNING ] then ocf_log err "SAP database service $SERVICE is not running with status $COLOR !" rc=$STATE fi count=1 fi done if [ $count -eq 0 -a $rc -eq $OCF_SUCCESS ] then ocf_log err "The resource does not run any services which this RA could monitor!" rc=$OCF_ERR_ARGS fi if [ $rc -ne $OCF_SUCCESS ] then ocf_log err "The SAP database $SID is not running: $output" fi fi fi return $rc } # # sapdatabase_status: Are there any database processes on this host ? # sapdatabase_status() { sid=`echo $SID | tr '[:upper:]' '[:lower:]'` SUSER=${OCF_RESKEY_DBOSUSER:-""} case $DBTYPE in ADA) SEARCH="$SID/db/pgm/kernel" [ -z "$SUSER" ] && SUSER=`grep "^SdbOwner" /etc/opt/sdb | awk -F'=' '{print $2}'` SNUM=2 ;; ORA) DBINST=${OCF_RESKEY_DBINSTANCE} DBINST=${OCF_RESKEY_DBINSTANCE:-${SID}} SEARCH="ora_[a-z][a-z][a-z][a-z]_$DBINST" if [ -z "$SUSER" ]; then id "oracle" > /dev/null 2> /dev/null && SUSER="oracle" id "ora${sid}" > /dev/null 2> /dev/null && SUSER="${SUSER:+${SUSER},}ora${sid}" fi SNUM=4 ;; DB6) SEARCH="db2[a-z][a-z][a-z]" [ -z "$SUSER" ] && SUSER="db2${sid}" SNUM=2 ;; SYB) SEARCH="dataserver" [ -z "$SUSER" ] && SUSER="syb${sid}" SNUM=1 ;; HDB) SEARCH="hdb[a-z]*server" [ -z "$SUSER" ] && SUSER="${sid}adm" SNUM=1 ;; esac [ -z "$SUSER" ] && return $OCF_ERR_INSTALLED cnt=`ps -u $SUSER -o args 2> /dev/null | grep -v grep | grep -c $SEARCH` [ $cnt -ge $SNUM ] && return $OCF_SUCCESS return $OCF_NOT_RUNNING } # # sapdatabase_recover: # sapdatabase_recover() { OCF_RESKEY_AUTOMATIC_RECOVER=1 sapdatabase_stop sapdatabase_start } # # sapdatabase_validate: Check the symantic of the input parameters # sapdatabase_validate() { rc=$OCF_SUCCESS if [ `echo "$SID" | grep -c '^[A-Z][A-Z0-9][A-Z0-9]$'` -ne 1 ] then ocf_log err "Parsing parameter SID: '$SID' is not a valid system ID!" rc=$OCF_ERR_ARGS fi case "$DBTYPE" in ORA|ADA|DB6|SYB|HDB) ;; *) ocf_log err "Parsing parameter DBTYPE: '$DBTYPE' is not a supported database type!" rc=$OCF_ERR_ARGS ;; esac return $rc } # # sapdatabase_init: initialize global variables at the beginning # sapdatabase_init() { OCF_RESKEY_AUTOMATIC_RECOVER_default=0 : ${OCF_RESKEY_AUTOMATIC_RECOVER=${OCF_RESKEY_AUTOMATIC_RECOVER_default}} if [ -z "$OCF_RESKEY_MONITOR_SERVICES" ] then case $DBTYPE in ORA) export OCF_RESKEY_MONITOR_SERVICES="Instance|Database|Listener" ;; ADA) export OCF_RESKEY_MONITOR_SERVICES="Database" ;; DB6) db2sid="db2`echo $SID | tr '[:upper:]' '[:lower:]'`" export OCF_RESKEY_MONITOR_SERVICES="${SID}|${db2sid}" ;; SYB) export OCF_RESKEY_MONITOR_SERVICES="Server" ;; HDB) export OCF_RESKEY_MONITOR_SERVICES="hdbindexserver" ;; esac fi }