diff --git a/rgmanager/src/resources/SAPDatabase b/rgmanager/src/resources/SAPDatabase index 012ee4d6c..b34d67f44 100644 --- a/rgmanager/src/resources/SAPDatabase +++ b/rgmanager/src/resources/SAPDatabase @@ -1,1026 +1,1026 @@ #!/bin/sh # # SAPDatabase # # Description: Manages any type of SAP supported database instance # as a High-Availability OCF compliant resource. # # Author: Alexander Krauth, October 2006 # Support: linux@sap.com # License: GNU General Public License (GPL) # Copyright: (c) 2006, 2007 Alexander Krauth # # An example usage: # See usage() function below for more details... # # OCF instance parameters: # OCF_RESKEY_SID # OCF_RESKEY_DIR_EXECUTABLE (optional, well known directories will be searched by default) # OCF_RESKEY_DBTYPE # OCF_RESKEY_NETSERVICENAME (optional, non standard name of Oracle Listener) # OCF_RESKEY_DBJ2EE_ONLY (optional, default is false) # OCF_RESKEY_JAVA_HOME (optional, only needed if DBJ2EE_ONLY is true and JAVA_HOME enviroment variable is not set) # OCF_RESKEY_STRICT_MONITORING (optional, activate application level monitoring - with Oracle a failover will occur in case of an archiver stuck) # OCF_RESKEY_AUTOMATIC_RECOVER (optional, automatic startup recovery, default is false) # OCF_RESKEY_DIR_BOOTSTRAP (optional, if non standard J2EE server directory) # OCF_RESKEY_DIR_SECSTORE (optional, if non standard J2EE secure store directory) # OCF_RESKEY_DB_JARS (optional, if maintained in bootstrap.properties, mandatory for WebAS Java 7.10) # OCF_RESKEY_PRE_START_USEREXIT (optional, lists a script which can be executed before the resource is started) # OCF_RESKEY_POST_START_USEREXIT (optional, lists a script which can be executed after the resource is started) # OCF_RESKEY_PRE_STOP_USEREXIT (optional, lists a script which can be executed before the resource is stopped) # OCF_RESKEY_POST_STOP_USEREXIT (optional, lists a script which can be executed after the resource is stopped) # # ToDo: # Remove all the database dependend stuff from the agent and use # saphostcontrol daemon as soon as SAP will release it. # ####################################################################### # Initialization: if [ -f $(dirname $0)/.ocf-shellfuncs ]; then . $(dirname $0)/.ocf-shellfuncs elif [ -f $(dirname $0)/ocf-shellfuncs ]; then LC_ALL=C LANG=C PATH=/bin:/sbin:/usr/bin:/usr/sbin export LC_ALL LANG PATH . $(dirname $0)/ocf-shellfuncs else echo Could not find ocf-shellfuncs! exit 1 fi ####################################################################### SH=/bin/sh usage() { methods=`sapdatabase_methods` methods=`echo $methods | tr ' ' '|'` cat <<-! usage: $0 ($methods) $0 manages a SAP database of any type as an HA resource. Currently Oracle, MaxDB and DB/2 UDB are supported. ABAP databases as well as JAVA only databases are supported. The 'start' operation starts the instance. The 'stop' operation stops the instance. The 'status' operation reports whether the instance is running The 'monitor' operation reports whether the instance seems to be working The 'recover' operation tries to recover the instance after a crash (instance will be stopped first!) The 'validate-all' operation reports whether the parameters are valid The 'methods' operation reports on the methods $0 supports ! } meta_data() { cat <<END <?xml version="1.0"?> -<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd"> +<!DOCTYPE resource-agent SYSTEM "ra-api-1-modified.dtd"> <resource-agent name="SAPDatabase"> <version>1.92.1</version> <longdesc lang="en"> Resource script for SAP databases. It manages a SAP database of any type as an HA resource. </longdesc> <shortdesc lang="en">SAP database resource agent</shortdesc> <parameters> <parameter name="SID" unique="1" required="1" primary="1"> <longdesc lang="en">The unique SAP system identifier. e.g. P01</longdesc> <shortdesc lang="en">SAP system ID</shortdesc> <content type="string" default="" /> </parameter> <parameter name="DIR_EXECUTABLE" unique="0" required="0"> <longdesc lang="en">The full qualified path where to find sapstartsrv and sapcontrol.</longdesc> <shortdesc lang="en">path of sapstartsrv and sapcontrol</shortdesc> <content type="string" default="" /> </parameter> <parameter name="DBTYPE" unique="0" required="1"> <longdesc lang="en">The name of the database vendor you use. Set either: ORA,DB6,ADA</longdesc> <shortdesc lang="en">database vendor</shortdesc> <content type="string" default="" /> </parameter> <parameter name="NETSERVICENAME" unique="0" required="0"> <longdesc lang="en">The Oracle TNS listener name.</longdesc> <shortdesc lang="en">listener name</shortdesc> <content type="string" default="" /> </parameter> <parameter name="DBJ2EE_ONLY" unique="0" required="0"> <longdesc lang="en">If you do not have a ABAP stack installed in the SAP database, set this to TRUE</longdesc> <shortdesc lang="en">only JAVA stack installed</shortdesc> <content type="boolean" default="false"/> </parameter> <parameter name="JAVA_HOME" unique="0" required="0"> <longdesc lang="en">This is only needed if the DBJ2EE_ONLY parameter is set to true. Enter the path to the Java SDK which is used by the SAP WebAS Java</longdesc> <shortdesc lang="en">Path to Java SDK</shortdesc> <content type="string" default=""/> </parameter> <parameter name="STRICT_MONITORING" unique="0" required="0"> <longdesc lang="en">This controls how the resource agent monitors the database. If set to true, it will use SAP tools to test the connect to the database. Do not use with Oracle, because it will result in unwanted failovers in case of an archiver stuck</longdesc> <shortdesc lang="en">Activates application level monitoring</shortdesc> <content type="boolean" default="false"/> </parameter> <parameter name="AUTOMATIC_RECOVER" unique="0" required="0"> <longdesc lang="en">The SAPDatabase resource agent tries to recover a failed start attempt automaticaly one time. This is done by running a forced abort of the RDBMS and/or executing recovery commands.</longdesc> <shortdesc lang="en">Enable or disable automatic startup recovery</shortdesc> <content type="boolean" default="false"/> </parameter> <parameter name="DIR_BOOTSTRAP" unique="0" required="0"> <longdesc lang="en">The full qualified path where to find the J2EE instance bootstrap directory. e.g. /usr/sap/P01/J00/j2ee/cluster/bootstrap</longdesc> <shortdesc lang="en">path to j2ee bootstrap directory</shortdesc> <content type="string" default="" /> </parameter> <parameter name="DIR_SECSTORE" unique="0" required="0"> <longdesc lang="en">The full qualified path where to find the J2EE security store directory. e.g. /usr/sap/P01/SYS/global/security/lib/tools</longdesc> <shortdesc lang="en">path to j2ee secure store directory</shortdesc> <content type="string" default="" /> </parameter> <parameter name="DB_JARS" unique="0" required="0"> <longdesc lang="en">The full qualified filename of the jdbc driver for the database connection test. It will be automaticaly read from the bootstrap.properties file in Java engine 6.40 and 7.00. For Java engine 7.10 the parameter is mandatory.</longdesc> <shortdesc lang="en">file name of the jdbc driver</shortdesc> <content type="string" default="" /> </parameter> <parameter name="PRE_START_USEREXIT" unique="0" required="0"> <longdesc lang="en">The full qualified path where to find a script or program which should be executed before this resource gets started.</longdesc> <shortdesc lang="en">path to a pre-start script</shortdesc> <content type="string" default="" /> </parameter> <parameter name="POST_START_USEREXIT" unique="0" required="0"> <longdesc lang="en">The full qualified path where to find a script or program which should be executed after this resource got started.</longdesc> <shortdesc lang="en">path to a post-start script</shortdesc> <content type="string" default="" /> </parameter> <parameter name="PRE_STOP_USEREXIT" unique="0" required="0"> <longdesc lang="en">The full qualified path where to find a script or program which should be executed before this resource gets stopped.</longdesc> <shortdesc lang="en">path to a pre-start script</shortdesc> <content type="string" default="" /> </parameter> <parameter name="POST_STOP_USEREXIT" unique="0" required="0"> <longdesc lang="en">The full qualified path where to find a script or program which should be executed after this resource got stopped.</longdesc> <shortdesc lang="en">path to a post-start script</shortdesc> <content type="string" default="" /> </parameter> </parameters> <actions> <action name="start" timeout="1800" /> <action name="stop" timeout="1800" /> <action name="status" depth="0" timeout="60" interval="120" start-delay="180" /> <action name="monitor" depth="0" timeout="60" interval="120" start-delay="180" /> <action name="validate-all" timeout="5" /> <action name="meta-data" timeout="5" /> <action name="methods" timeout="5" /> </actions> </resource-agent> END } trap_handler() { rm -f $TEMPFILE exit $OCF_ERR_GENERIC } do_exit() { # If we've got a tempfile variable and the tempfile exists... # ... if the return code is 0 *or* the temp file is empty # remove it. if [ -n "$TEMPFILE" ] && [ -e "$TEMPFILE" ]; then if [ $1 -eq 0 ] || [ "$(stat -c %s $TEMPFILE)" = "0" ]; then rm -f $TEMPFILE fi fi exit $1 } # # listener_start: Start the given listener # listener_start() { orasid="ora`echo $SID | tr '[:upper:]' '[:lower:]'`" rc=$OCF_SUCCESS output=`echo "lsnrctl start $NETSERVICENAME" | su - $orasid 2>&1` if [ $? -eq 0 ] then ocf_log info "Oracle Listener $NETSERVICENAME started: $output" rc=$OCF_SUCCESS else ocf_log err "Oracle Listener $NETSERVICENAME start failed: $output" rc=$OCF_ERR_GENERIC fi return $rc } # # listener_stop: Stop the given listener # listener_stop() { orasid="ora`echo $SID | tr '[:upper:]' '[:lower:]'`" rc=$OCF_SUCCESS if listener_status then : listener is running, trying to stop it later... else return $OCF_SUCCESS fi output=`echo "lsnrctl stop $NETSERVICENAME" | su - $orasid 2>&1` if [ $? -eq 0 ] then ocf_log info "Oracle Listener $NETSERVICENAME stopped: $output" else ocf_log err "Oracle Listener $NETSERVICENAME stop failed: $output" rc=$OCF_ERR_GENERIC fi return $rc } # # listener_status: is the given listener running? # listener_status() { orasid="ora`echo $SID | tr '[:upper:]' '[:lower:]'`" # Note: ps cuts off it's output at column $COLUMNS, so "ps -ef" can not be used here # as the output might be to long. cnt=`ps efo args --user $orasid | grep $NETSERVICENAME | grep -c tnslsnr` if [ $cnt -eq 1 ] then rc=$OCF_SUCCESS else ocf_log info "listener process not running for $NETSERVICENAME for $SID" rc=$OCF_ERR_GENERIC fi return $rc } # # x_server_start: Start the given x_server # x_server_start() { rc=$OCF_SUCCESS output=`echo "x_server start" | su - $sidadm 2>&1` if [ $? -eq 0 ] then ocf_log info "MaxDB x_server start: $output" rc=$OCF_SUCCESS else ocf_log err "MaxDB x_server start failed: $output" rc=$OCF_ERR_GENERIC fi return $rc } # # x_server_stop: Stop the x_server # x_server_stop() { rc=$OCF_SUCCESS output=`echo "x_server stop" | su - $sidadm 2>&1` if [ $? -eq 0 ] then ocf_log info "MaxDB x_server stop: $output" else ocf_log err "MaxDB x_server stop failed: $output" rc=$OCF_ERR_GENERIC fi return $rc } # # x_server_status: is the x_server running? # x_server_status() { sdbuser=`grep "^SdbOwner" /etc/opt/sdb | awk -F'=' '{print $2}'` # Note: ps cuts off it's output at column $COLUMNS, so "ps -ef" can not be used here # as the output might be to long. cnt=`ps efo args --user $sdbuser | grep -c vserver` if [ $cnt -ge 1 ] then rc=$OCF_SUCCESS else ocf_log info "x_server process not running" rc=$OCF_ERR_GENERIC fi return $rc } # # oracle_stop: Stop the Oracle database without any condition # oracle_stop() { echo '#!/bin/sh LOG=$HOME/stopdb.log date > $LOG if [ -x "${ORACLE_HOME}/bin/sqlplus" ] then SRVMGRDBA_EXE="${ORACLE_HOME}/bin/sqlplus" else echo "Can not find executable sqlplus" >> $LOG exit 1 fi $SRVMGRDBA_EXE /NOLOG >> $LOG << ! connect / as sysdba shutdown immediate exit ! rc=$? cat $LOG exit $rc' > $TEMPFILE chmod 700 $TEMPFILE chown $sidadm $TEMPFILE su - $sidadm -c $TEMPFILE retcode=$? rm -f $TEMPFILE if [ $retcode -eq 0 ]; then sapdatabase_status if [ $? -ne $OCF_NOT_RUNNING ]; then retcode=1 fi fi return $retcode } # # maxdb_stop: Stop the MaxDB database without any condition # maxdb_stop() { # x_Server must be running to stop database x_server_status if [ $? -ne $OCF_SUCCESS ]; then x_server_start; fi if [ $DBJ2EE_ONLY -eq 1 ]; then userkey=c_J2EE else userkey=c fi echo "#!/bin/sh LOG=\$HOME/stopdb.log date > \$LOG echo \"Stop database with xuserkey >$userkey<\" >> \$LOG dbmcli -U ${userkey} db_offline >> \$LOG 2>&1 exit \$?" > $TEMPFILE chmod 700 $TEMPFILE chown $sidadm $TEMPFILE su - $sidadm -c $TEMPFILE retcode=$? rm -f $TEMPFILE if [ $retcode -eq 0 ]; then sapdatabase_status if [ $? -ne $OCF_NOT_RUNNING ]; then retcode=1 fi fi return $retcode } # # db6udb_stop: Stop the DB2/UDB database without any condition # db6udb_stop() { echo '#!/bin/sh LOG=$HOME/stopdb.log date > $LOG echo "Shut down the database" >> $LOG $INSTHOME/sqllib/bin/db2 deactivate database $DB2DBDFT |tee -a $LOG 2>&1 $INSTHOME/sqllib/adm/db2stop force |tee -a $LOG 2>&1 exit $?' > $TEMPFILE chmod 700 $TEMPFILE chown $sidadm $TEMPFILE su - $sidadm -c $TEMPFILE retcode=$? rm -f $TEMPFILE if [ $retcode -eq 0 ]; then sapdatabase_status if [ $? -ne $OCF_NOT_RUNNING ]; then retcode=1 fi fi return $retcode } # # oracle_recover: try to clean up oracle after a crash # oracle_recover() { echo '#!/bin/sh LOG=$HOME/recover.log date > $LOG echo "Logfile written by heartbeat SAPDatabase resource agent" >> $LOG if [ -x "${ORACLE_HOME}/bin/sqlplus" ] then SRVMGRDBA_EXE="${ORACLE_HOME}/bin/sqlplus" else echo "Can not find executable sqlplus" >> $LOG exit 1 fi $SRVMGRDBA_EXE /NOLOG >> $LOG << ! connect / as sysdba shutdown abort startup mount WHENEVER SQLERROR EXIT SQL.SQLCODE WHENEVER OSERROR EXIT FAILURE alter database recover automatic database; alter database open; exit ! rc=$? cat $LOG exit $rc' > $TEMPFILE chmod 700 $TEMPFILE chown $sidadm $TEMPFILE su - $sidadm -c $TEMPFILE retcode=$? rm -f $TEMPFILE return $retcode } # # maxdb_recover: try to clean up MaxDB after a crash # maxdb_recover() { # x_Server must be running to stop database x_server_status if [ $? -ne $OCF_SUCCESS ]; then x_server_start; fi if [ $DBJ2EE_ONLY -eq 1 ]; then userkey=c_J2EE else userkey=c fi echo "#!/bin/sh LOG=\$HOME/recover.log date > \$LOG echo \"Logfile written by heartbeat SAPDatabase resource agent\" >> \$LOG echo \"Cleanup database with xuserkey >$userkey<\" >> \$LOG echo \"db_stop\" >> \$LOG 2>&1 dbmcli -U ${userkey} db_stop >> \$LOG 2>&1 echo \"db_clear\" >> \$LOG 2>&1 dbmcli -U ${userkey} db_clear >> \$LOG 2>&1 echo \"db_online\" >> \$LOG 2>&1 dbmcli -U ${userkey} db_online >> \$LOG 2>&1 rc=\$? cat \$LOG exit \$rc" > $TEMPFILE chmod 700 $TEMPFILE chown $sidadm $TEMPFILE su - $sidadm -c $TEMPFILE retcode=$? rm -f $TEMPFILE return $retcode } # # db6udb_recover: try to recover DB/2 after a crash # db6udb_recover() { db2sid="db2`echo $SID | tr '[:upper:]' '[:lower:]'`" echo '#!/bin/sh LOG=$HOME/recover.log date > $LOG echo "Logfile written by heartbeat SAPDatabase resource agent" >> $LOG $INSTHOME/sqllib/bin/db2_kill >> $LOG 2>&1 $INSTHOME/sqllib/adm/db2start >> $LOG 2>&1 $INSTHOME/sqllib/bin/db2 activate database $DB2DBDFT >> $LOG 2>&1 rc=$? cat $LOG exit $rc' > $TEMPFILE chmod 700 $TEMPFILE chown $db2sid $TEMPFILE su - $db2sid -c $TEMPFILE retcode=$? rm -f $TEMPFILE return $retcode } # # methods: What methods/operations do we support? # sapdatabase_methods() { cat <<-! start stop status monitor recover validate-all methods meta-data usage ! } # # sapuserexit : Many SAP customers need some additional processes/tools to run their SAP systems. # This specialties do not allow a totally generic SAP cluster resource agent. # Someone should write a resource agent for each additional process you need, if it # is required to monitor that process within the cluster manager. To enable # you to extent this resource agent without developing a new one, this user exit # was introduced. # sapuserexit() { NAME="$1" VALUE="$2" if [ -n "$VALUE" ] then if [ -x "$VALUE" ] then ocf_log info "Calling userexit ${NAME} with customer script file ${VALUE}" eval "$VALUE" > /dev/null 2>&1 ocf_log info "Exiting userexit ${NAME} with customer script file ${VALUE}, returncode: $?" else ocf_log warn "Attribute ${NAME} is set to ${VALUE}, but this file is not executable" fi fi return 0 } # # sapdatabase_start : Start the SAP database # sapdatabase_start() { sapuserexit PRE_START_USEREXIT "$OCF_RESKEY_PRE_START_USEREXIT" case $DBTYPE in ADA) x_server_start ;; ORA) listener_start ;; esac output=`su - $sidadm -c $SAPSTARTDB` rc=$? if [ $DBJ2EE_ONLY -eq 1 ] then sapdatabase_monitor 1 rc=$? fi if [ $rc -ne 0 -a $OCF_RESKEY_AUTOMATIC_RECOVER -eq 1 ] then ocf_log warn "SAP database $SID start failed: $output" ocf_log warn "Try to recover database $SID" output='' sapdatabase_recover rc=$? fi if [ $rc -eq 0 ] then ocf_log info "SAP database $SID started: $output" rc=$OCF_SUCCESS sapuserexit POST_START_USEREXIT "$OCF_RESKEY_POST_START_USEREXIT" else ocf_log err "SAP database $SID start failed: $output" rc=$OCF_ERR_GENERIC fi return $rc } # # sapdatabase_stop: Stop the SAP database # sapdatabase_stop() { sapuserexit PRE_STOP_USEREXIT "$OCF_RESKEY_PRE_STOP_USEREXIT" # use of the stopdb kernel script is not possible, because there are to may checks in that # script. We want to stop the database regardless of anything. #output=`su - $sidadm -c $SAPSTOPDB` case $DBTYPE in ORA) output=`oracle_stop` ;; ADA) output=`maxdb_stop` ;; DB6) output=`db6udb_stop` ;; esac if [ $? -eq 0 ] then ocf_log info "SAP database $SID stopped: $output" rc=$OCF_SUCCESS else ocf_log err "SAP database $SID stop failed: $output" rc=$OCF_ERR_GENERIC fi case $DBTYPE in ORA) listener_stop ;; ADA) x_server_stop ;; esac sapuserexit POST_STOP_USEREXIT "$OCF_RESKEY_POST_STOP_USEREXIT" return $rc } # # sapdatabase_monitor: Can the given database instance do anything useful? # sapdatabase_monitor() { strict=$1 rc=$OCF_SUCCESS case $DBTYPE in ADA) x_server_status if [ $? -ne $OCF_SUCCESS ]; then x_server_start; fi ;; ORA) listener_status if [ $? -ne $OCF_SUCCESS ]; then listener_start; fi ;; esac if [ $strict -eq 0 ] then sapdatabase_status rc=$? else if [ $DBJ2EE_ONLY -eq 0 ] then output=`echo "$SAPDBCONNECT -d -w /dev/null" | su $sidadm 2>&1` if [ $? -le 4 ] then rc=$OCF_SUCCESS else rc=$OCF_NOT_RUNNING fi else MYCP="" EXECMD="" # WebAS Java 6.40+7.00 IAIK_JCE="$SECSTORE"/iaik_jce.jar IAIK_JCE_EXPORT="$SECSTORE"/iaik_jce_export.jar EXCEPTION="$BOOTSTRAP"/exception.jar LOGGING="$BOOTSTRAP"/logging.jar OPENSQLSTA="$BOOTSTRAP"/opensqlsta.jar TC_SEC_SECSTOREFS="$BOOTSTRAP"/tc_sec_secstorefs.jar JDDI="$BOOTSTRAP"/../server0/bin/ext/jdbdictionary/jddi.jar ANTLR="$BOOTSTRAP"/../server0/bin/ext/antlr/antlr.jar FRAME="$BOOTSTRAP"/../server0/bin/system/frame.jar # only start jdbcconnect when all jars available if [ -f "$EXCEPTION" -a -f "$LOGGING" -a -f "$OPENSQLSTA" -a -f "$TC_SEC_SECSTOREFS" -a -f "$JDDI" -a -f "$ANTLR" -a -f "$FRAME" -a -f "$SAPDBCONNECT" ] then MYCP=".:$FRAME:$ANTLR:$JDDI:$IAIK_JCE_EXPORT:$IAIK_JCE:$EXCEPTION:$LOGGING:$OPENSQLSTA:$TC_SEC_SECSTOREFS:$DB_JARS:$SAPDBCONNECT" EXECMD="com.sap.inst.jdbc.connect.JdbcCon -sec $SID:$SID" else # WebAS Java 7.10 LAUNCHER=${BOOTSTRAP}/sap.com~tc~bl~offline_launcher~impl.jar if [ -f "$DB_JARS" -a -f "$SAPDBCONNECT" -a -f "$LAUNCHER" ] then MYCP="$LAUNCHER" EXECMD="com.sap.engine.offline.OfflineToolStart com.sap.inst.jdbc.connect.JdbcCon ${SAPDBCONNECT}:${SECSTORE}:${DB_JARS}:${BOOTSTRAP} -sec $SID:$SID" fi fi if [ -n "$EXECMD" ] then output=`eval ${JAVA_HOME}/bin/java -cp $MYCP $EXECMD` if [ $? -le 0 ] then rc=$OCF_SUCCESS else rc=$OCF_NOT_RUNNING fi else output="Cannot find all jar files needed for database monitoring." rc=$OCF_ERR_GENERIC fi fi fi if [ $rc -ne $OCF_SUCCESS ] then ocf_log err "The SAP database $SID ist not running: $output" fi return $rc } # # sapdatabase_status: Are there any database processes on this host ? # sapdatabase_status() { case $DBTYPE in ADA) SEARCH="$SID/db/pgm/kernel" SUSER=`grep "^SdbOwner" /etc/opt/sdb | awk -F'=' '{print $2}'` SNUM=2 ;; ORA) SEARCH="ora_[a-z][a-z][a-z][a-z]_" SUSER="ora`echo $SID | tr '[:upper:]' '[:lower:]'`" SNUM=4 ;; DB6) SEARCH="db2[a-z][a-z][a-z][a-z][a-z]" SUSER="db2`echo $SID | tr '[:upper:]' '[:lower:]'`" SNUM=5 ;; esac # Note: ps cuts off it's output at column $COLUMNS, so "ps -ef" can not be used here # as the output might be to long. cnt=`ps efo args --user $SUSER 2> /dev/null | grep -c "$SEARCH"` if [ $cnt -ge $SNUM ] then rc=$OCF_SUCCESS else # ocf_log info "Database Instance $SID is not running on `hostname`" rc=$OCF_NOT_RUNNING fi return $rc } # # sapdatabase_recover: # sapdatabase_recover() { case $DBTYPE in ORA) recoutput=`oracle_recover` ;; ADA) recoutput=`maxdb_recover` ;; DB6) recoutput=`db6udb_recover` ;; esac sapdatabase_monitor 1 retcode=$? if [ $retcode -eq $OCF_SUCCESS ] then ocf_log info "Recover of SAP database $SID was successful: $recoutput" else ocf_log err "Recover of SAP database $SID failed: $recoutput" fi return $retcode } # # sapdatabase_validate: Check the symantic of the input parameters # sapdatabase_validate() { rc=$OCF_SUCCESS if [ `echo "$SID" | grep -c '^[A-Z][A-Z0-9][A-Z0-9]$'` -ne 1 ] then ocf_log err "Parsing parameter SID: '$SID' is not a valid system ID!" rc=$OCF_ERR_ARGS fi case "$DBTYPE" in ORA|ADA|DB6) ;; *) ocf_log err "Parsing parameter DBTYPE: '$DBTYPE' is not a supported database type!" rc=$OCF_ERR_ARGS ;; esac return $rc } # # 'main' starts here... # if ( [ $# -ne 1 ] ) then usage exit $OCF_ERR_ARGS fi # These operations don't require OCF instance parameters to be set case "$1" in meta-data) meta_data exit $OCF_SUCCESS;; usage) usage exit $OCF_SUCCESS;; methods) sapdatabase_methods exit $?;; *);; esac # Set a tempfile and make sure to clean it up again TEMPFILE="$(mktemp /tmp/SAPDatabase.tmp.XXXXXX)" trap trap_handler INT TERM # Everything after here must call do_exit to remove temp file US=`id -u -n` US=`echo $US` if [ $US != root ] then ocf_log err "$0 must be run as root" do_exit $OCF_ERR_PERM fi # mandatory parameter check if [ -z "$OCF_RESKEY_SID" ]; then ocf_log err "Please set OCF_RESKEY_SID to the SAP system id!" do_exit $OCF_ERR_ARGS fi SID=`echo "$OCF_RESKEY_SID"` if [ -z "$OCF_RESKEY_DBTYPE" ]; then ocf_log err "Please set OCF_RESKEY_DBTYPE to the database vendor specific tag (ORA,ADA,DB6)!" do_exit $OCF_ERR_ARGS fi DBTYPE=`echo "$OCF_RESKEY_DBTYPE" | tr '[a-z]' '[A-Z]'` # optional OCF parameters, we try to guess which directories are correct EXESTARTDB="startdb" EXESTOPDB="stopdb" EXEDBCONNECT="R3trans" if [ -z "$OCF_RESKEY_DBJ2EE_ONLY" ]; then DBJ2EE_ONLY=0 else case "$OCF_RESKEY_DBJ2EE_ONLY" in 1|true|TRUE|yes|YES) DBJ2EE_ONLY=1 EXESTARTDB="startj2eedb" EXESTOPDB="stopj2eedb" EXEDBCONNECT="jdbcconnect.jar" ;; 0|false|FALSE|no|NO) DBJ2EE_ONLY=0;; *) ocf_log err "Parsing parameter DBJ2EE_ONLY: '$DBJ2EE_ONLY' is not a boolean value!" do_exit $OCF_ERR_ARGS ;; esac fi if [ -z "$OCF_RESKEY_NETSERVICENAME" ]; then case "$DBTYPE" in ORA|ora) NETSERVICENAME="LISTENER";; *) NETSERVICENAME="";; esac else NETSERVICENAME="$OCF_RESKEY_NETSERVICENAME" fi if [ -z "$OCF_RESKEY_STRICT_MONITORING" ]; then OCF_RESKEY_STRICT_MONITORING=0 else case "$OCF_RESKEY_STRICT_MONITORING" in 1|true|TRUE|yes|YES) OCF_RESKEY_STRICT_MONITORING=1;; 0|false|FALSE|no|NO) OCF_RESKEY_STRICT_MONITORING=0;; *) ocf_log err "Parsing parameter STRICT_MONITORING: '$OCF_RESKEY_STRICT_MONITORING' is not a boolean value!" do_exit $OCF_ERR_ARGS ;; esac fi PATHLIST=" $OCF_RESKEY_DIR_EXECUTABLE /usr/sap/$SID/*/exe /usr/sap/$SID/SYS/exe/run /sapmnt/$SID/exe " DIR_EXECUTABLE="" for EXEPATH in $PATHLIST do if [ -x $EXEPATH/$EXESTARTDB -a -x $EXEPATH/$EXESTOPDB -a -x $EXEPATH/$EXEDBCONNECT ] then DIR_EXECUTABLE=$EXEPATH SAPSTARTDB=$EXEPATH/$EXESTARTDB SAPSTOPDB=$EXEPATH/$EXESTOPDB SAPDBCONNECT=$EXEPATH/$EXEDBCONNECT break fi done if [ -z "$DIR_EXECUTABLE" ] then ocf_log warn "Cannot find $EXESTARTDB,$EXESTOPDB and $EXEDBCONNECT executable, please set DIR_EXECUTABLE parameter!" do_exit $OCF_NOT_RUNNING fi if [ $DBJ2EE_ONLY -eq 1 ] then if [ -n "$OCF_RESKEY_DIR_BOOTSTRAP" ] then BOOTSTRAP="$OCF_RESKEY_DIR_BOOTSTRAP" else BOOTSTRAP=`echo /usr/sap/$SID/*/j2ee/cluster/bootstrap | head -1` fi if [ -n "$OCF_RESKEY_DIR_SECSTORE" ] then SECSTORE="$OCF_RESKEY_DIR_SECSTORE" else SECSTORE=/usr/sap/$SID/SYS/global/security/lib/tools fi if [ -n "$OCF_RESKEY_JAVA_HOME" ] then JAVA_HOME="$OCF_RESKEY_JAVA_HOME" PATH=$JAVA_HOME/bin:$PATH else if [ -n "$JAVA_HOME" ] then PATH=$JAVA_HOME/bin:$PATH else ocf_log err "Cannot find JAVA_HOME directory, please set JAVA_HOME parameter!" do_exit $OCF_NOT_RUNNING fi fi if [ -n "$OCF_RESKEY_DB_JARS" ] then DB_JARS=$OCF_RESKEY_DB_JARS else if [ -f "$BOOTSTRAP"/bootstrap.properties ]; then DB_JARS=`cat $BOOTSTRAP/bootstrap.properties | grep -i rdbms.driverLocation | sed -e 's/\\\:/:/g' | awk -F= '{print $2}'` fi fi fi if [ -z "$OCF_RESKEY_AUTOMATIC_RECOVER" ] then OCF_RESKEY_AUTOMATIC_RECOVER=0 else case "$OCF_RESKEY_AUTOMATIC_RECOVER" in 1|true|TRUE|yes|YES) OCF_RESKEY_AUTOMATIC_RECOVER=1;; 0|false|FALSE|no|NO) OCF_RESKEY_AUTOMATIC_RECOVER=0;; esac fi # as root user we need the library path to the SAP kernel to be able to call executables if [ `echo $LD_LIBRARY_PATH | grep -c "^$DIR_EXECUTABLE\>"` -eq 0 ]; then LD_LIBRARY_PATH=$DIR_EXECUTABLE${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH export LD_LIBRARY_PATH fi sidadm="`echo $SID | tr '[:upper:]' '[:lower:]'`adm" # What kind of method was invoked? case "$1" in start) sapdatabase_start do_exit $?;; stop) sapdatabase_stop do_exit $?;; monitor) sapdatabase_monitor $OCF_RESKEY_STRICT_MONITORING do_exit $?;; status) sapdatabase_status do_exit $?;; recover) sapdatabase_recover do_exit $?;; validate-all) sapdatabase_validate do_exit $?;; *) sapdatabase_methods do_exit $OCF_ERR_UNIMPLEMENTED;; esac diff --git a/rgmanager/src/resources/SAPInstance b/rgmanager/src/resources/SAPInstance index 134222323..8e7beee13 100644 --- a/rgmanager/src/resources/SAPInstance +++ b/rgmanager/src/resources/SAPInstance @@ -1,592 +1,592 @@ #!/bin/sh # # SAPInstance # # Description: Manages a single SAP Instance as a High-Availability # resource. One SAP Instance is defined by one # SAP Instance-Profile. start/stop handels all services # of the START-Profile, status and monitor care only # about essential services. # # Author: Alexander Krauth, June 2006 # Support: linux@sap.com # License: GNU General Public License (GPL) # Copyright: (c) 2006, 2007 Alexander Krauth # # An example usage: # See usage() function below for more details... # # OCF instance parameters: # OCF_RESKEY_InstanceName # OCF_RESKEY_DIR_EXECUTABLE (optional, well known directories will be searched by default) # OCF_RESKEY_DIR_PROFILE (optional, well known directories will be searched by default) # OCF_RESKEY_START_PROFILE (optional, well known directories will be searched by default) # OCF_RESKEY_START_WAITTIME (optional, to solve timing problems during J2EE-Addin start) # OCF_RESKEY_AUTOMATIC_RECOVER (optional, automatic startup recovery using cleanipc, default is false) # OCF_RESKEY_PRE_START_USEREXIT (optional, lists a script which can be executed before the resource is started) # OCF_RESKEY_POST_START_USEREXIT (optional, lists a script which can be executed after the resource is started) # OCF_RESKEY_PRE_STOP_USEREXIT (optional, lists a script which can be executed before the resource is stopped) # OCF_RESKEY_POST_STOP_USEREXIT (optional, lists a script which can be executed after the resource is stopped) # ####################################################################### # Initialization: if [ -f $(dirname $0)/.ocf-shellfuncs ]; then . $(dirname $0)/.ocf-shellfuncs elif [ -f $(dirname $0)/ocf-shellfuncs ]; then LC_ALL=C LANG=C PATH=/bin:/sbin:/usr/bin:/usr/sbin export LC_ALL LANG PATH . $(dirname $0)/ocf-shellfuncs else echo Could not find ocf-shellfuncs! exit 1 fi ####################################################################### SH=/bin/sh usage() { methods=`sapinstance_methods` methods=`echo $methods | tr ' ' '|'` cat <<-! usage: $0 ($methods) $0 manages a SAP Instance as an HA resource. The 'start' operation starts the instance. The 'stop' operation stops the instance. The 'status' operation reports whether the instance is running The 'monitor' operation reports whether the instance seems to be working The 'validate-all' operation reports whether the parameters are valid The 'methods' operation reports on the methods $0 supports ! } meta_data() { cat <<END <?xml version="1.0"?> -<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd"> +<!DOCTYPE resource-agent SYSTEM "ra-api-1-modified.dtd"> <resource-agent name="SAPInstance"> <version>1.92.1</version> <longdesc lang="en"> Resource script for SAP. It manages a SAP Instance as an HA resource. </longdesc> <shortdesc lang="en">SAP instance resource agent</shortdesc> <parameters> <parameter name="InstanceName" unique="1" required="1" primary="1"> <longdesc lang="en">The full qualified SAP instance name. e.g. P01_DVEBMGS00_sapp01ci</longdesc> <shortdesc lang="en">instance name: SID_INSTANCE_VIR-HOSTNAME</shortdesc> <content type="string" default="" /> </parameter> <parameter name="DIR_EXECUTABLE" unique="0" required="0"> <longdesc lang="en">The full qualified path where to find sapstartsrv and sapcontrol.</longdesc> <shortdesc lang="en">path of sapstartsrv and sapcontrol</shortdesc> <content type="string" default="" /> </parameter> <parameter name="DIR_PROFILE" unique="0" required="0"> <longdesc lang="en">The full qualified path where to find the SAP START profile.</longdesc> <shortdesc lang="en">path of start profile</shortdesc> <content type="string" default="" /> </parameter> <parameter name="START_PROFILE" unique="0" required="0"> <longdesc lang="en">The name of the SAP START profile.</longdesc> <shortdesc lang="en">start profile name</shortdesc> <content type="string" default="" /> </parameter> <parameter name="START_WAITTIME" unique="0" required="0"> <longdesc lang="en">After that time in seconds a monitor operation is executed by the resource agent. Does the monitor return SUCCESS, the start is handled as SUCCESS. This is useful to resolve timing problems with e.g. the J2EE-Addin instance.</longdesc> <shortdesc lang="en">Check the successful start after that time (do not wait for J2EE-Addin)</shortdesc> <content type="string" default="3600" /> </parameter> <parameter name="AUTOMATIC_RECOVER" unique="0" required="0"> <longdesc lang="en">The SAPInstance resource agent tries to recover a failed start attempt automaticaly one time. This is done by killing runing instance processes and executing cleanipc.</longdesc> <shortdesc lang="en">Enable or disable automatic startup recovery</shortdesc> <content type="boolean" default="false"/> </parameter> <parameter name="PRE_START_USEREXIT" unique="0" required="0"> <longdesc lang="en">The full qualified path where to find a script or program which should be executed before this resource gets started.</longdesc> <shortdesc lang="en">path to a pre-start script</shortdesc> <content type="string" default="" /> </parameter> <parameter name="POST_START_USEREXIT" unique="0" required="0"> <longdesc lang="en">The full qualified path where to find a script or program which should be executed after this resource got started.</longdesc> <shortdesc lang="en">path to a post-start script</shortdesc> <content type="string" default="" /> </parameter> <parameter name="PRE_STOP_USEREXIT" unique="0" required="0"> <longdesc lang="en">The full qualified path where to find a script or program which should be executed before this resource gets stopped.</longdesc> <shortdesc lang="en">path to a pre-start script</shortdesc> <content type="string" default="" /> </parameter> <parameter name="POST_STOP_USEREXIT" unique="0" required="0"> <longdesc lang="en">The full qualified path where to find a script or program which should be executed after this resource got stopped.</longdesc> <shortdesc lang="en">path to a post-start script</shortdesc> <content type="string" default="" /> </parameter> </parameters> <actions> <action name="start" timeout="180" /> <action name="stop" timeout="240" /> <action name="status" timeout="60" depth="0" interval="120" start-delay="240" /> <action name="monitor" depth="0" timeout="60" interval="120" start-delay="240" /> <action name="validate-all" timeout="5" /> <action name="meta-data" timeout="5" /> <action name="methods" timeout="5" /> </actions> </resource-agent> END } # # methods: What methods/operations do we support? # sapinstance_methods() { cat <<-! start stop status monitor validate-all methods meta-data usage ! } # # check_sapstartsrv : Before using sapcontrol we make sure that the sapstartsrv is running for the correct instance. # We cannot use sapinit and the /usr/sap/sapservices file in case of an enquerep instance, # because then we have two instances with the same instance number. # check_sapstartsrv() { restart=0 runninginst="" chkrc=$OCF_SUCCESS output=`$SAPCONTROL -nr $InstanceNr -function ParameterValue INSTANCE_NAME -format script` if [ $? -eq 0 ] then runninginst=`echo "$output" | grep '^0 : ' | cut -d' ' -f3` if [ "$runninginst" != "$InstanceName" ] then ocf_log warn "sapstartsrv is running for instance $runninginst, that service will be killed" restart=1 fi else ocf_log warn "sapstartsrv is not running for instance $SID-$InstanceName, it will be started now" restart=1 fi if [ -z "$runninginst" ]; then runninginst=$InstanceName; fi if [ $restart -eq 1 ] then pkill -9 -f "sapstartsrv.*$runninginst" $SAPSTARTSRV pf=$SAPSTARTPROFILE -D -u $sidadm # now make sure the daemon has been started and is able to respond srvrc=1 while [ $srvrc -eq 1 -a `pgrep -f "sapstartsrv.*$runninginst" | wc -l` -gt 0 ] do sleep 1 $SAPCONTROL -nr $InstanceNr -function GetProcessList > /dev/null 2>&1 srvrc=$? done if [ $srvrc -ne 1 ] then ocf_log info "sapstartsrv for instance $SID-$InstanceName was restarted !" chkrc=$OCF_SUCCESS else ocf_log error "sapstartsrv for instance $SID-$InstanceName could not be started!" chkrc=$OCF_NOT_RUNNING fi fi return $chkrc } # # sapuserexit : Many SAP customers need some additional processes/tools to run their SAP systems. # This specialties do not allow a totally generic SAP cluster resource agent. # Someone should write a resource agent for each additional process you need, if it # is required to monitor that process within the cluster manager. To enable # you to extent this resource agent without developing a new one, this user exit # was introduced. # sapuserexit() { NAME="$1" VALUE="$2" if [ -n "$VALUE" ] then if [ -x "$VALUE" ] then ocf_log info "Calling userexit ${NAME} with customer script file ${VALUE}" eval "$VALUE" > /dev/null 2>&1 ocf_log info "Exiting userexit ${NAME} with customer script file ${VALUE}, returncode: $?" else ocf_log warn "Attribute ${NAME} is set to ${VALUE}, but this file is not executable" fi fi return 0 } # # cleanup_instance : remove resources (processes and shared memory) from a crashed instance) # cleanup_instance() { pkill -9 -f -U $sidadm $InstanceName $DIR_EXECUTABLE/cleanipc $InstanceNr remove return 0 } # # sapinstance_start : Start the SAP instance # sapinstance_start() { sapuserexit PRE_START_USEREXIT "$OCF_RESKEY_PRE_START_USEREXIT" rc=$OCF_NOT_RUNNING loopcount=0 while [ $loopcount -lt 2 ] do loopcount=$(($loopcount + 1)) check_sapstartsrv output=`$SAPCONTROL -nr $InstanceNr -function Start` rc=$? ocf_log info "Starting SAP Instance $SID-$InstanceName: $output" if [ $rc -ne 0 ] then ocf_log err "SAP Instance $SID-$InstanceName start failed." return $OCF_ERR_GENERIC fi startrc=1 while [ $startrc -gt 0 ] do waittime_start=`date +%s` output=`$SAPCONTROL -nr $InstanceNr -function WaitforStarted $OCF_RESKEY_START_WAITTIME 10` startrc=$? waittime_stop=`date +%s` if [ $startrc -ne 0 ] then if [ $(($waittime_stop - $waittime_start)) -ge $OCF_RESKEY_START_WAITTIME ] then sapinstance_monitor NOLOG if [ $? -eq $OCF_SUCCESS ] then output="START_WAITTIME ($OCF_RESKEY_START_WAITTIME) has elapsed, but instance monitor returned SUCCESS. Instance considered running." startrc=0; loopcount=2 fi else if [ $loopcount -eq 1 -a $OCF_RESKEY_AUTOMATIC_RECOVER -eq 1 ] then ocf_log warn "SAP Instance $SID-$InstanceName start failed: $output" ocf_log warn "Try to recover $SID-$InstanceName" cleanup_instance else loopcount=2 fi startrc=-1 fi else loopcount=2 fi done done if [ $startrc -eq 0 ] then ocf_log info "SAP Instance $SID-$InstanceName started: $output" rc=$OCF_SUCCESS sapuserexit POST_START_USEREXIT "$OCF_RESKEY_POST_START_USEREXIT" else ocf_log err "SAP Instance $SID-$InstanceName start failed: $output" rc=$OCF_NOT_RUNNING fi return $rc } # # sapinstance_recover: Try startup of failed instance by cleaning up resources # sapinstance_recover() { cleanup_instance sapinstance_start return $? } # # sapinstance_stop: Stop the SAP instance # sapinstance_stop() { sapuserexit PRE_STOP_USEREXIT "$OCF_RESKEY_PRE_STOP_USEREXIT" check_sapstartsrv output=`$SAPCONTROL -nr $InstanceNr -function Stop` if [ $? -eq 0 ] then output=`$SAPCONTROL -nr $InstanceNr -function WaitforStopped 3600 1` if [ $? -eq 0 ] then ocf_log info "SAP Instance $SID-$InstanceName stopped: $output" rc=$OCF_SUCCESS else ocf_log err "SAP Instance $SID-$InstanceName stop failed: $output" rc=$OCF_ERR_GENERIC fi else ocf_log err "SAP Instance $SID-$InstanceName stop failed: $output" rc=$OCF_ERR_GENERIC fi sapuserexit POST_STOP_USEREXIT "$OCF_RESKEY_POST_STOP_USEREXIT" return $rc } # # sapinstance_monitor: Can the given SAP instance do anything useful? # sapinstance_monitor() { MONLOG=$1 check_sapstartsrv rc=$? if [ $rc -eq $OCF_SUCCESS ] then count=0 LOCALHOST=`hostname` output=`$SAPCONTROL -nr $InstanceNr -host $LOCALHOST -function GetProcessList -format script` # we have to parse the output, because the returncode doesn't tell anything about the instance status for SERVNO in `echo "$output" | grep '^[0-9] ' | cut -d' ' -f1 | sort -u` do COLOR=`echo "$output" | grep "^$SERVNO dispstatus: " | cut -d' ' -f3` SERVICE=`echo "$output" | grep "^$SERVNO name: " | cut -d' ' -f3` STATE=0 case $COLOR in GREEN|YELLOW) STATE=$OCF_SUCCESS;; *) STATE=$OCF_NOT_RUNNING;; esac case $SERVICE in disp+work|msg_server|enserver|enrepserver|jcontrol|jstart) if [ $STATE -eq $OCF_NOT_RUNNING ] then if [ "$MONLOG" != "NOLOG" ] then ocf_log err "SAP instance service $SERVICE is not running with status $COLOR !" fi rc=$STATE fi count=1;; *);; esac done if [ $count -eq 0 -a $rc -eq $OCF_SUCCESS ] then if [ "$MONLOG" != "NOLOG" ] then ocf_log err "The SAP instance does not run any services which this RA could monitor!" fi rc=$OCF_ERR_ARGS fi fi return $rc } # # sapinstance_validate: Check the symantic of the input parameters # sapinstance_validate() { rc=$OCF_SUCCESS if [ `echo "$SID" | grep -c '^[A-Z][A-Z0-9][A-Z0-9]$'` -ne 1 ] then ocf_log err "Parsing instance profile name: '$SID' is not a valid system ID!" rc=$OCF_ERR_ARGS fi if [ `echo "$InstanceName" | grep -c '^[A-Z].*[0-9][0-9]$'` -ne 1 ] then ocf_log err "Parsing instance profile name: '$InstanceName' is not a valid instance name!" rc=$OCF_ERR_ARGS fi if [ `echo "$InstanceNr" | grep -c '^[0-9][0-9]$'` -ne 1 ] then ocf_log err "Parsing instance profile name: '$InstanceNr' is not a valid instance number!" rc=$OCF_ERR_ARGS fi if [ `echo "$SAPVIRHOST" | grep -c '^[A-Za-z][A-Za-z0-9_-]*$'` -ne 1 ] then ocf_log err "Parsing instance profile name: '$SAPVIRHOST' is not a valid hostname!" rc=$OCF_ERR_ARGS fi return $rc } # # 'main' starts here... # if ( [ $# -ne 1 ] ) then usage exit $OCF_ERR_ARGS fi # These operations don't require OCF instance parameters to be set case "$1" in meta-data) meta_data exit $OCF_SUCCESS;; usage) usage exit $OCF_SUCCESS;; methods) sapinstance_methods exit $?;; *);; esac US=`id -u -n` US=`echo $US` if [ $US != root ] then ocf_log err "$0 must be run as root" exit $OCF_ERR_PERM fi # parameter check if [ -z "$OCF_RESKEY_InstanceName" ] then ocf_log err "Please set OCF_RESKEY_InstanceName to the name to the SAP instance profile!" exit $OCF_ERR_ARGS fi SID=`echo "$OCF_RESKEY_InstanceName" | cut -d_ -f1` InstanceName=`echo "$OCF_RESKEY_InstanceName" | cut -d_ -f2` InstanceNr=`echo "$InstanceName" | sed 's/.*\([0-9][0-9]\)$/\1/'` SAPVIRHOST=`echo "$OCF_RESKEY_InstanceName" | cut -d_ -f3` # optional OCF parameters, we try to guess which directories are correct if [ -z "$OCF_RESKEY_DIR_EXECUTABLE" ] then if [ -x /usr/sap/$SID/$InstanceName/exe/sapstartsrv -a -x /usr/sap/$SID/$InstanceName/exe/sapcontrol ] then DIR_EXECUTABLE="/usr/sap/$SID/$InstanceName/exe" SAPSTARTSRV="/usr/sap/$SID/$InstanceName/exe/sapstartsrv" SAPCONTROL="/usr/sap/$SID/$InstanceName/exe/sapcontrol" elif [ -x /usr/sap/$SID/SYS/exe/run/sapstartsrv -a -x /usr/sap/$SID/SYS/exe/run/sapcontrol ] then DIR_EXECUTABLE="/usr/sap/$SID/SYS/exe/run" SAPSTARTSRV="/usr/sap/$SID/SYS/exe/run/sapstartsrv" SAPCONTROL="/usr/sap/$SID/SYS/exe/run/sapcontrol" else ocf_log warn "Cannot find sapstartsrv and sapcontrol executable, please set DIR_EXECUTABLE parameter!" exit $OCF_NOT_RUNNING fi else DIR_EXECUTABLE="$OCF_RESKEY_DIR_EXECUTABLE" SAPSTARTSRV="$OCF_RESKEY_DIR_EXECUTABLE/sapstartsrv" SAPCONTROL="$OCF_RESKEY_DIR_EXECUTABLE/sapcontrol" fi if [ -z "$OCF_RESKEY_DIR_PROFILE" ] then if [ -d /usr/sap/$SID/SYS/profile/ ] then DIR_PROFILE="/usr/sap/$SID/SYS/profile" else ocf_log warn "Expected /usr/sap/$SID/SYS/profile/ to be a directory, please set DIR_PROFILE parameter!" exit $OCF_NOT_RUNNING fi else DIR_PROFILE="$OCF_RESKEY_DIR_PROFILE" fi if [ -z "$OCF_RESKEY_START_PROFILE" ] then SAPSTARTPROFILE="$DIR_PROFILE/START_${InstanceName}_${SAPVIRHOST}" if [ ! -r $SAPSTARTPROFILE ] then ocf_log warn "Expected $SAPSTARTPROFILE to be the instance START profile, please set START_PROFILE parameter!" exit $OCF_NOT_RUNNING fi else SAPSTARTPROFILE="$OCF_RESKEY_START_PROFILE" fi if [ -z "$OCF_RESKEY_START_WAITTIME" ] then OCF_RESKEY_START_WAITTIME=3600 fi if [ -z "$OCF_RESKEY_AUTOMATIC_RECOVER" ] then OCF_RESKEY_AUTOMATIC_RECOVER=0 else case "$OCF_RESKEY_AUTOMATIC_RECOVER" in 1|true|TRUE|yes|YES) OCF_RESKEY_AUTOMATIC_RECOVER=1;; 0|false|FALSE|no|NO) OCF_RESKEY_AUTOMATIC_RECOVER=0;; esac fi # as root user we need the library path to the SAP kernel to be able to call sapcontrol if [ `echo $LD_LIBRARY_PATH | grep -c "^$DIR_EXECUTABLE\>"` -eq 0 ]; then LD_LIBRARY_PATH=$DIR_EXECUTABLE${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH export LD_LIBRARY_PATH fi sidadm="`echo $SID | tr '[:upper:]' '[:lower:]'`adm" # What kind of method was invoked? case "$1" in start) sapinstance_start exit $?;; stop) sapinstance_stop exit $?;; status|monitor) sapinstance_monitor exit $?;; validate-all) sapinstance_validate exit $?;; *) sapinstance_methods exit $OCF_ERR_UNIMPLEMENTED;; esac diff --git a/rgmanager/src/resources/mysql.metadata b/rgmanager/src/resources/mysql.metadata index 7c87a57a8..4fa383bcf 100644 --- a/rgmanager/src/resources/mysql.metadata +++ b/rgmanager/src/resources/mysql.metadata @@ -1,106 +1,106 @@ <?xml version="1.0"?> <resource-agent version="rgmanager 2.0" name="mysql"> <version>1.0</version> <longdesc lang="en"> This defines an instance of MySQL database server </longdesc> <shortdesc lang="en"> Defines a MySQL database server </shortdesc> <parameters> <parameter name="name" primary="1"> <longdesc lang="en"> Define a name </longdesc> <shortdesc lang="en"> Name </shortdesc> <content type="string"/> </parameter> <parameter name="config_file"> <longdesc lang="en"> Define configuration file </longdesc> <shortdesc lang="en"> Define configuration file </shortdesc> <content type="string" default="/etc/my.cnf"/> </parameter> <parameter name="listen_address"> <longdesc lang="en"> Define an IP address for MySQL </longdesc> <shortdesc lang="en"> Define an IP address for MySQL server. If the address is not given then first IP address from the service is taken. </shortdesc> <content type="string"/> </parameter> <parameter name="mysqld_options"> <longdesc lang="en"> Other command-line options for mysqld </longdesc> <shortdesc lang="en"> Other command-line options for mysqld </shortdesc> <content type="string" /> </parameter> <parameter name="startup_wait"> <longdesc lang="en"> Wait X seconds for correct end of service startup </longdesc> <shortdesc lang="en"> Wait X seconds for correct end of service startup </shortdesc> - <content type="number" default="30" /> + <content type="integer" default="30" /> </parameter> <parameter name="shutdown_wait"> <longdesc lang="en"> Wait X seconds for correct end of service shutdown </longdesc> <shortdesc lang="en"> Wait X seconds for correct end of service shutdown </shortdesc> <content type="integer" /> </parameter> <parameter name="service_name" inherit="service%name"> <longdesc lang="en"> Inherit the service name. We need to know the service name in order to determine file systems and IPs for this service. </longdesc> <shortdesc lang="en"> Inherit the service name. </shortdesc> <content type="string"/> </parameter> </parameters> <actions> <action name="start" timeout="0"/> <action name="stop" timeout="0"/> <!-- Checks to see if it''s mounted in the right place --> <action name="status" interval="1m" timeout="10"/> <action name="monitor" interval="1m" timeout="10"/> <!-- Checks to see if we can read from the mountpoint --> <action name="status" depth="10" timeout="30" interval="5m"/> <action name="monitor" depth="10" timeout="30" interval="5m"/> <action name="meta-data" timeout="0"/> <action name="validate-all" timeout="0"/> </actions> <special tag="rgmanager"> </special> </resource-agent> diff --git a/rgmanager/src/resources/ra-api-1-modified.dtd b/rgmanager/src/resources/ra-api-1-modified.dtd index 9c42d23b2..67eb94d8b 100644 --- a/rgmanager/src/resources/ra-api-1-modified.dtd +++ b/rgmanager/src/resources/ra-api-1-modified.dtd @@ -1,68 +1,68 @@ <?xml version="1.0" encoding="ISO-8859-1" ?> <!-- This is based on the RA-API-1.0 DTD from: http://www.opencf.org/cgi-bin/viewcvs.cgi/specs/ra/ra-api-1.dtd There are additions for rgmanager. These additions should be ignored by other RMs. --> <!ELEMENT resource-agent (version,longdesc,shortdesc,parameters,actions,special*) > <!ATTLIST resource-agent name CDATA #REQUIRED version CDATA #IMPLIED> <!ELEMENT version (#PCDATA)> <!ELEMENT parameters (parameter*)> <!ELEMENT actions (action*)> <!-- Primary and required are for rgmanager use. --> <!ELEMENT parameter (longdesc+,shortdesc+,content)> <!ATTLIST parameter name CDATA #REQUIRED primary (1|0) "0" required (1|0) "0" inherit CDATA "" unique (1|0) "0" reconfig (1|0) "0"> <!ELEMENT longdesc ANY> <!ATTLIST longdesc lang NMTOKEN #IMPLIED> <!ELEMENT shortdesc ANY> <!ATTLIST shortdesc lang NMTOKEN #IMPLIED> <!ELEMENT content EMPTY> <!ATTLIST content type (string|integer|boolean) #REQUIRED default CDATA #IMPLIED> <!ELEMENT action EMPTY> <!ATTLIST action - name (start|stop|recover|status|reconfig|monitor|reload|meta-data|validate-all|migrate|methods) #REQUIRED + name (start|stop|recover|status|reconfig|monitor|reload|meta-data|validate-all|verify-all|migrate|methods) #REQUIRED timeout CDATA #REQUIRED interval CDATA #IMPLIED start-delay CDATA #IMPLIED depth CDATA #IMPLIED> <!-- Special tag list for rgmanager --> <!ELEMENT special (attributes*, child*)> <!ATTLIST special tag CDATA #REQUIRED> <!ELEMENT attributes EMPTY> <!ATTLIST attributes maxinstances CDATA "0" init_on_add CDATA "0" destroy_on_delete CDATA "0"> <!ELEMENT child EMPTY> <!ATTLIST child type CDATA #REQUIRED forbid (1|0) "0" start CDATA "100" stop CDATA "0"> diff --git a/rgmanager/src/resources/vm.sh b/rgmanager/src/resources/vm.sh index 117082308..100e34932 100644 --- a/rgmanager/src/resources/vm.sh +++ b/rgmanager/src/resources/vm.sh @@ -1,1087 +1,1087 @@ #!/bin/bash # # Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. # Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # PATH=/bin:/sbin:/usr/bin:/usr/sbin export PATH . $(dirname $0)/ocf-shellfuncs || exit 1 # # Virtual Machine start/stop script (requires the virsh command) # # Indeterminate state: xend/libvirtd is down. export OCF_APP_ERR_INDETERMINATE=150 meta_data() { cat <<EOT <?xml version="1.0"?> <resource-agent version="rgmanager 2.0" name="vm"> <version>1.0</version> <longdesc lang="en"> Defines a Virtual Machine </longdesc> <shortdesc lang="en"> Defines a Virtual Machine </shortdesc> <parameters> <parameter name="name" primary="1"> <longdesc lang="en"> This is the name of the virtual machine. </longdesc> <shortdesc lang="en"> Name </shortdesc> <content type="string"/> </parameter> <parameter name="domain" reconfig="1"> <longdesc lang="en"> Failover domains define lists of cluster members to try in the event that the host of the virtual machine fails. </longdesc> <shortdesc lang="en"> Cluster failover Domain </shortdesc> <content type="string"/> </parameter> <parameter name="autostart" reconfig="1"> <longdesc lang="en"> If set to yes, this resource group will automatically be started after the cluster forms a quorum. If set to no, this virtual machine will start in the 'disabled' state after the cluster forms a quorum. </longdesc> <shortdesc lang="en"> Automatic start after quorum formation </shortdesc> <content type="boolean" default="1"/> </parameter> <parameter name="exclusive" reconfig="1"> <longdesc lang="en"> If set, this resource group will only relocate to nodes which have no other resource groups running in the event of a failure. If no empty nodes are available, this resource group will not be restarted after a failure. Additionally, resource groups will not automatically relocate to the node running this resource group. This option can be overridden by manual start and/or relocate operations. </longdesc> <shortdesc lang="en"> Exclusive resource group </shortdesc> <content type="boolean" default="0"/> </parameter> <parameter name="recovery" reconfig="1"> <longdesc lang="en"> This currently has three possible options: "restart" tries to restart this virtual machine locally before attempting to relocate (default); "relocate" does not bother trying to restart the VM locally; "disable" disables the VM if it fails. </longdesc> <shortdesc lang="en"> Failure recovery policy </shortdesc> <content type="string"/> </parameter> <parameter name="migration_mapping" reconfig="1"> <longdesc lang="en"> Mapping of the hostname of a target cluster member to a different hostname </longdesc> <shortdesc lang="en"> memberhost:targethost,memberhost:targethost .. </shortdesc> <content type="string"/> </parameter> <parameter name="use_virsh"> <longdesc lang="en"> Force use of virsh instead of xm on Xen machines. </longdesc> <shortdesc lang="en"> If set to 1, vm.sh will use the virsh command to manage virtual machines instead of xm. This is required when using non-Xen virtual machines (e.g. qemu / KVM). </shortdesc> <content type="integer" default=""/> </parameter> <parameter name="xmlfile"> <longdesc lang="en"> Full path to libvirt XML file describing the domain. </longdesc> <shortdesc lang="en"> Full path to libvirt XML file describing the domain. </shortdesc> <content type="string"/> </parameter> <parameter name="migrate"> <longdesc lang="en"> Migration type (live or pause, default = live). </longdesc> <shortdesc lang="en"> Migration type (live or pause, default = live). </shortdesc> <content type="string" default="live"/> </parameter> <parameter name="path"> <longdesc lang="en"> Path specification vm.sh will search for the specified VM configuration file. /path1:/path2:... </longdesc> <shortdesc lang="en"> Path to virtual machine configuration files. </shortdesc> <content type="string"/> </parameter> <parameter name="snapshot"> <longdesc lang="en"> Path to the snapshot directory where the virtual machine image will be stored. </longdesc> <shortdesc lang="en"> Path to the snapshot directory where the virtual machine image will be stored. </shortdesc> <content type="string" default=""/> </parameter> <parameter name="depend"> <longdesc lang="en"> Service dependency; will not start without the specified service running. </longdesc> <shortdesc lang="en"> Top-level service this depends on, in service:name format. </shortdesc> <content type="string"/> </parameter> <parameter name="depend_mode"> <longdesc lang="en"> Service dependency mode. hard - This service is stopped/started if its dependency is stopped/started soft - This service only depends on the other service for initial startip. If the other service stops, this service is not stopped. </longdesc> <shortdesc lang="en"> Service dependency mode (soft or hard). </shortdesc> <content type="string" default="hard"/> </parameter> <parameter name="max_restarts" reconfig="1"> <longdesc lang="en"> Maximum restarts for this service. </longdesc> <shortdesc lang="en"> Maximum restarts for this service. </shortdesc> <content type="string" default="0"/> </parameter> <parameter name="restart_expire_time" reconfig="1"> <longdesc lang="en"> Restart expiration time. A restart is forgotten after this time. When combined with the max_restarts option, this lets administrators specify a threshold for when to fail over services. If max_restarts is exceeded in this given expiration time, the service is relocated instead of restarted again. </longdesc> <shortdesc lang="en"> Restart expiration time; amount of time before a restart is forgotten. </shortdesc> <content type="string" default="0"/> </parameter> <parameter name="status_program" reconfig="1"> <longdesc lang="en"> Ordinarily, only the presence/health of a virtual machine is checked. If specified, the status_program value is executed during a depth 10 check. The intent of this program is to ascertain the status of critical services within a virtual machine. </longdesc> <shortdesc lang="en"> Additional status check program </shortdesc> <content type="string" default=""/> </parameter> <parameter name="hypervisor"> - <shortdesc lang="en"> - Hypervisor - </shortdesc > <longdesc lang="en"> Specify hypervisor tricks to use. Default = auto. Other supported options are xen and qemu. </longdesc> - <content type="string" default="auto" /> + <shortdesc lang="en"> + Hypervisor + </shortdesc > + <content type="string" default="auto"/> </parameter> <parameter name="hypervisor_uri"> <longdesc lang="en"> Hypervisor URI. Generally, this is keyed off of the hypervisor and does not need to be set. </longdesc> <shortdesc lang="en"> Hypervisor URI (normally automatic). </shortdesc > <content type="string" default="auto" /> </parameter> <parameter name="migration_uri"> <longdesc lang="en"> Migration URI. Generally, this is keyed off of the hypervisor and does not need to be set. </longdesc> <shortdesc lang="en"> Migration URI (normally automatic). </shortdesc > <content type="string" default="auto" /> </parameter> </parameters> <actions> <action name="start" timeout="300"/> <action name="stop" timeout="120"/> <action name="status" timeout="10" interval="30"/> <action name="monitor" timeout="10" interval="30"/> <!-- depth 10 calls the status_program --> <action name="status" depth="10" timeout="20" interval="60"/> <action name="monitor" depth="10" timeout="20" interval="60"/> <!-- reconfigure - reconfigure with new OCF parameters. NOT OCF COMPATIBLE AT ALL --> <action name="reconfig" timeout="10"/> <action name="migrate" timeout="10m"/> <action name="meta-data" timeout="5"/> <action name="validate-all" timeout="5"/> </actions> <special tag="rgmanager"> <!-- Destroy_on_delete / init_on_add are currently only supported for migratory resources (no children and the 'migrate' action; see above. Do not try this with normal services --> <attributes maxinstances="1" destroy_on_delete="0" init_on_add="0"/> </special> </resource-agent> EOT } build_virsh_cmdline() { declare cmdline="" declare operation=$1 if [ -n "$OCF_RESKEY_hypervisor_uri" ]; then cmdline="$cmdline -c $OCF_RESKEY_hypervisor_uri" fi cmdline="$cmdline $operation $OCF_RESKEY_name" echo $cmdline } # this is only used on startup build_xm_cmdline() { declare operation=$1 # # Virtual domains should never restart themselves when # controlled externally; the external monitoring app # should. # declare cmdline="on_shutdown=\"destroy\" on_reboot=\"destroy\" on_crash=\"destroy\"" if [ -n "$OCF_RESKEY_path" ]; then operation="$operation --path=\"$OCF_RESKEY_path\"" fi if [ -n "$OCF_RESKEY_name" ]; then cmdline="$operation $OCF_RESKEY_name $cmdline" fi echo $cmdline } do_xm_start() { # Use /dev/null for the configuration file, if xmdefconfig # doesn't exist... # declare cmdline echo -n "Virtual machine $OCF_RESKEY_name is " do_status && return 0 cmdline="`build_xm_cmdline create`" ocf_log debug "xm $cmdline" eval xm $cmdline return $? } get_timeout() { declare -i default_timeout=60 declare -i tout=60 if [ -n "$OCF_RESKEY_RGMANAGER_meta_timeout" ]; then tout=$OCF_RESKEY_RGMANAGER_meta_timeout elif [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then tout=$OCF_RESKEY_CRM_meta_timeout fi if [ $tout -eq 0 ]; then echo $default_timeout return 0 fi if [ $tout -lt 0 ]; then echo $default_timeout return 0 fi echo $tout return 0 } # # Start a virtual machine given the parameters from # the environment. # do_virsh_start() { declare cmdline declare snapshotimage echo -n "Virtual machine $OCF_RESKEY_name is " do_status && return 0 snapshotimage="$OCF_RESKEY_snapshot/$OCF_RESKEY_name" if [ -n "$OCF_RESKEY_snapshot" -a -f "$snapshotimage" ]; then eval virsh restore $snapshotimage if [ $? -eq 0 ]; then rm -f $snapshotimage return 0 fi return 1 fi if [ -n "$OCF_RESKEY_xmlfile" -a -f "$OCF_RESKEY_xmlfile" ]; then # TODO: try to use build_virsh_cmdline for the hypervisor_uri cmdline="virsh create $OCF_RESKEY_xmlfile" else cmdline="virsh $(build_virsh_cmdline start)" fi ocf_log debug "$cmdline" $cmdline return $? } do_xm_stop() { declare -i timeout=60 declare -i ret=1 declare st for op in $*; do echo "CMD: xm $op $OCF_RESKEY_name" xm $op $OCF_RESKEY_name timeout=60 while [ $timeout -gt 0 ]; do sleep 5 ((timeout -= 5)) do_status&>/dev/null || return 0 while read dom state; do # # State is "stopped". Kill it. # if [ "$dom" != "$OCF_RESKEY_name" ]; then continue fi if [ "$state" != "---s-" ]; then continue fi xm destroy $OCF_RESKEY_name done < <(xm list | awk '{print $1, $5}') done done return 1 } # # Stop a VM. Try to shut it down. Wait a bit, and if it # doesn't shut down, destroy it. # do_virsh_stop() { declare -i timeout=$(get_timeout) declare -i ret=1 declare state state=$(do_status) [ $? -eq 0 ] || return 0 if [ -n "$OCF_RESKEY_snapshot" ]; then virsh save $OCF_RESKEY_name "$OCF_RESKEY_snapshot/$OCF_RESKEY_name" fi for op in $*; do echo virsh $op $OCF_RESKEY_name ... virsh $op $OCF_RESKEY_name timeout=$(get_timeout) while [ $timeout -gt 0 ]; do sleep 5 ((timeout -= 5)) state=$(do_status) [ $? -eq 0 ] || return 0 if [ "$state" = "paused" ]; then virsh destroy $OCF_RESKEY_name fi done done return 1 } do_start() { if [ "$OCF_RESKEY_use_virsh" = "1" ]; then do_virsh_start $* return $? fi do_xm_start $* return $? } do_stop() { declare domstate rv domstate=$(do_status) rv=$? ocf_log debug "Virtual machine $OCF_RESKEY_name is $domstate" if [ $rv -eq $OCF_APP_ERR_INDETERMINATE ]; then ocf_log crit "xend/libvirtd is dead; cannot stop $OCF_RESKEY_name" return 1 fi if [ "$OCF_RESKEY_use_virsh" = "1" ]; then do_virsh_stop $* return $? fi do_xm_stop $* return $? } # # Reconfigure a running VM. # reconfigure() { return 0 } xm_status() { service xend status &> /dev/null if [ $? -ne 0 ]; then # if xend died echo indeterminate return $OCF_APP_ERR_INDETERMINATE fi xm list $OCF_RESKEY_name &> /dev/null if [ $? -eq 0 ]; then echo "running" return 0 fi xm list migrating-$OCF_RESKEY_name &> /dev/null if [ $? -eq 0 ]; then echo "running" return 0 fi echo "not running" return 1 } virsh_status() { declare state pid if [ "$OCF_RESKEY_hypervisor" = "xen" ]; then service xend status &> /dev/null if [ $? -ne 0 ]; then echo indeterminate return $OCF_APP_ERR_INDETERMINATE fi fi # # libvirtd is required when using virsh even though # not specifically when also using Xen. This is because # libvirtd is required for migration. # pid=$(pidof libvirtd) if [ -z "$pid" ]; then echo indeterminate return $OCF_APP_ERR_INDETERMINATE fi state=$(virsh domstate $OCF_RESKEY_name) echo $state if [ "$state" = "running" ] || [ "$state" = "paused" ] || [ "$state" = "no state" ] || [ "$state" = "idle" ]; then return 0 fi if [ "$state" = "shut off" ]; then return $OCF_NOT_RUNNING fi return $OCF_ERR_GENERIC } # # Simple status check: Find the VM in the list of running # VMs # do_status() { if [ "$OCF_RESKEY_use_virsh" = "1" ]; then virsh_status return $? fi xm_status return $? } # # virsh "path" attribute support # check_config_file() { declare path=$1 if [ -f "$path/$OCF_RESKEY_name" ]; then echo $path/$OCF_RESKEY_name return 2 elif [ -f "$path/$OCF_RESKEY_name.xml" ]; then echo $path/$OCF_RESKEY_name.xml return 2 fi return 0 } parse_input() { declare delim=$1 declare input=$2 declare func=$3 declare inp declare value while [ -n "$input" ]; do value=${input/$delim*/} if [ -n "$value" ]; then eval $func $value if [ $? -eq 2 ]; then return 0 fi fi inp=${input/$value$delim/} if [ "$input" = "$inp" ]; then inp=${input/$value/} fi input=$inp done } search_config_path() { declare config_file=$(parse_input ":" "$OCF_RESKEY_path" check_config_file) if [ -n "$config_file" ]; then export OCF_RESKEY_xmlfile=$config_file return 0 fi return 1 } choose_management_tool() { declare -i is_xml # # Don't override user value for use_virsh if one is given # if [ -n "$OCF_RESKEY_use_virsh" ]; then return 0 fi which xmllint &> /dev/null if [ $? -ne 0 ]; then ocf_log warning "Could not find xmllint; assuming virsh mode" export OCF_RESKEY_use_virsh=1 unset OCF_RESKEY_path return 0 fi xmllint $OCF_RESKEY_xmlfile &> /dev/null is_xml=$? if [ $is_xml -eq 0 ]; then ocf_log debug "$OCF_RESKEY_xmlfile is XML; using virsh" export OCF_RESKEY_use_virsh=1 unset OCF_RESKEY_path else ocf_log debug "$OCF_RESKEY_xmlfile is not XML; using xm" export OCF_RESKEY_use_virsh=0 unset OCF_RESKEY_xmlfile fi return 0 } validate_all() { if [ "$(id -u)" != "0" ]; then ocf_log err "Cannot control VMs. as non-root user." return 1 fi # # If someone selects a hypervisor, honor it. # Otherwise, ask virsh what the hypervisor is. # if [ -z "$OCF_RESKEY_hypervisor" ] || [ "$OCF_RESKEY_hypervisor" = "auto" ]; then export OCF_RESKEY_hypervisor="`virsh version | grep \"Running hypervisor:\" | awk '{print $3}' | tr A-Z a-z`" if [ -z "$OCF_RESKEY_hypervisor" ]; then ocf_log err "Could not determine Hypervisor" return $OCF_ERR_ARGS fi echo Hypervisor: $OCF_RESKEY_hypervisor fi # # Xen hypervisor only for when use_virsh = 0. # if [ "$OCF_RESKEY_use_virsh" = "0" ]; then if [ "$OCF_RESKEY_hypervisor" != "xen" ]; then ocf_log err "Cannot use $OCF_RESKEY_hypervisor hypervisor without using virsh" return $OCF_ERR_ARGS fi if [ -n "$OCF_RESKEY_xmlfile" ]; then ocf_log err "Cannot use xmlfile if use_virsh is set to 0" return $OCF_ERR_ARGS fi else # # Virsh path support. # if [ -n "$OCF_RESKEY_path" ] && [ "$OCF_RESKEY_path" != "/etc/xen" ]; then if [ -n "$OCF_RESKEY_xmlfile" ]; then ocf_log warning "Using $OCF_RESKEY_xmlfile instead of searching $OCF_RESKEY_path" else search_config_path if [ $? -ne 0 ]; then ocf_log warning "Could not find $OCF_RESKEY_name or $OCF_RESKEY_name.xml in search path $OCF_RESKEY_path" unset OCF_RESKEY_xmlfile else ocf_log debug "Using $OCF_RESKEY_xmlfile" fi choose_management_tool fi else export OCF_RESKEY_use_virsh=1 fi fi if [ "$OCF_RESKEY_use_virsh" = "0" ]; then echo "Management tool: xm" which xm &> /dev/null if [ $? -ne 0 ]; then ocf_log err "Cannot find 'xm'; is it installed?" return $OCF_ERR_INSTALLED fi if [ "$OCF_RESKEY_hypervisor" != "xen" ]; then ocf_log err "Cannot use $OCF_RESKEY_hypervisor hypervisor without using virsh" return $OCF_ERR_ARGS fi else echo "Management tool: virsh" which virsh &> /dev/null if [ $? -ne 0 ]; then ocf_log err "Cannot find 'virsh'; is it installed?" return $OCF_ERR_INSTALLED fi fi # # Set the hypervisor URI # if [ -z "$OCF_RESKEY_hypervisor_uri" -o "$OCF_RESKEY_hypervisor_uri" = "auto" ] && [ "$OCF_RESKEY_use_virsh" = "1" ]; then # Virsh makes it easier to do this. Really. if [ "$OCF_RESKEY_hypervisor" = "qemu" ]; then OCF_RESKEY_hypervisor_uri="qemu:///system" fi # I just need to believe in it more. if [ "$OCF_RESKEY_hypervisor" = "xen" ]; then OCF_RESKEY_hypervisor_uri="xen:///" fi echo Hypervisor URI: $OCF_RESKEY_hypervisor_uri fi # # Set the migration URI # if [ -z "$OCF_RESKEY_migration_uri" -o "$OCF_RESKEY_migration_uri" = "auto" ] && [ "$OCF_RESKEY_use_virsh" = "1" ]; then # Virsh makes it easier to do this. Really. if [ "$OCF_RESKEY_hypervisor" = "qemu" ]; then export OCF_RESKEY_migration_uri="qemu+ssh://%s/system" fi # I just need to believe in it more. if [ "$OCF_RESKEY_hypervisor" = "xen" ]; then export OCF_RESKEY_migration_uri="xenmigr://%s/" fi [ -n "$OCF_RESKEY_migration_uri" ] && echo Migration URI format: $(printf $OCF_RESKEY_migration_uri target_host) fi if [ -z "$OCF_RESKEY_name" ]; then echo No domain name specified return $OCF_ERR_ARGS fi if [ "$OCF_RESKEY_hypervisor" = "qemu" ]; then export migrateuriopt="tcp:%s" fi #virsh list --all | awk '{print $2}' | grep -q "^$OCF_RESKEY_name\$" return $? } virsh_migrate() { declare target=$1 declare rv=1 # # Xen and qemu have different migration mechanisms # if [ "$OCF_RESKEY_hypervisor" = "xen" ]; then cmd="virsh migrate $migrate_opt $OCF_RESKEY_name $OCF_RESKEY_hypervisor_uri $(printf $OCF_RESKEY_migration_uri $target)" ocf_log debug "$cmd" err=$($cmd 2>&1 | head -1; exit ${PIPESTATUS[0]}) rv=$? elif [ "$OCF_RESKEY_hypervisor" = "qemu" ]; then cmd="virsh migrate $migrate_opt $OCF_RESKEY_name $(printf $OCF_RESKEY_migration_uri $target) $(printf $migrateuriopt $target)" ocf_log debug "$cmd" err=$($cmd 2>&1 | head -1; exit ${PIPESTATUS[0]}) rv=$? fi if [ $rv -ne 0 ]; then ocf_log err "Migrate $OCF_RESKEY_name to $target failed:" ocf_log err "$err" if [ "$err" != "${err/does not exist/}" ]; then return $OCF_ERR_CONFIGURED fi if [ "$err" != "${err/Domain not found/}" ]; then return $OCF_ERR_CONFIGURED fi return $OCF_ERR_GENERIC fi return $rv } # # XM migrate # xm_migrate() { declare target=$1 declare errstr rv migrate_opt cmd rv=1 if [ "$OCF_RESKEY_migrate" = "live" ]; then migrate_opt="-l" fi # migrate() function sets target using migration_mapping; # no need to do it here anymore cmd="xm migrate $migrate_opt $OCF_RESKEY_name $target" ocf_log debug "$cmd" err=$($cmd 2>&1 | head -1; exit ${PIPESTATUS[0]}) rv=$? if [ $rv -ne 0 ]; then ocf_log err "Migrate $OCF_RESKEY_name to $target failed:" ocf_log err "$err" if [ "$err" != "${err/does not exist/}" ]; then return $OCF_NOT_RUNNING fi if [ "$err" != "${err/Connection refused/}" ]; then return $OCF_ERR_CONFIGURED fi return $OCF_ERR_GENERIC fi return $? } # # Virsh migrate # migrate() { declare target=$1 declare rv migrate_opt if [ "$OCF_RESKEY_migrate" = "live" ]; then migrate_opt="--live" fi # Patch from Marcelo Azevedo to migrate over private # LANs instead of public LANs if [ -n "$OCF_RESKEY_migration_mapping" ] ; then target=${OCF_RESKEY_migration_mapping#*$target:} target=${target%%,*} fi if [ "$OCF_RESKEY_use_virsh" = "1" ]; then virsh_migrate $target rv=$? else xm_migrate $target rv=$? fi return $rv } wait_start() { declare -i timeout_remaining=$(get_timeout) declare -i start_time declare -i end_time declare -i delta declare -i sleep_time if [ -z "$OCF_RESKEY_status_program" ]; then return 0 fi while [ $timeout_remaining -gt 0 ]; do start_time=$(date +%s) bash -c "$OCF_RESKEY_status_program" if [ $? -eq 0 ]; then return 0 fi end_time=$(date +%s) delta=$(((end_time - start_time))) sleep_time=$(((5 - delta))) ((timeout_remaining -= $delta)) if [ $sleep_time -gt 0 ]; then sleep $sleep_time ((timeout_remaining -= $sleep_time)) fi done ocf_log err "Start of $OCF_RESOURCE_INSTANCE has failed" ocf_log err "Timeout exceeded while waiting for \"$OCF_RESKEY_status_program\"" return 1 } # # # case $1 in start) validate_all || exit $OCF_ERR_ARGS do_start rv=$? if [ $rv -ne 0 ]; then exit $rv fi wait_start exit $? ;; stop) validate_all || exit $OCF_ERR_ARGS do_stop shutdown destroy exit $? ;; kill) validate_all || exit $OCF_ERR_ARGS do_stop destroy exit $? ;; recover|restart) exit 0 ;; status|monitor) validate_all || exit $OCF_ERR_ARGS echo -n "Virtual machine $OCF_RESKEY_name is " do_status rv=$? if [ $rv -ne 0 ]; then exit $rv fi [ -z "$OCF_RESKEY_status_program" ] && exit 0 [ -z "$OCF_CHECK_LEVEL" ] && exit 0 [ $OCF_CHECK_LEVEL -lt 10 ] && exit 0 bash -c "$OCF_RESKEY_status_program" &> /dev/null exit $? ;; migrate) validate_all || exit $OCF_ERR_ARGS migrate $2 # Send VM to this node rv=$? if [ $rv -eq $OCF_ERR_GENERIC ]; then # Catch-all: If migration failed with # an unhandled error, do a status check # to see if the VM is really dead. # # If the VM is still in good health, return # a value to rgmanager to indicate the # non-critical error # # OCF states that codes 150-199 are reserved # for application use, so we'll use 150 # do_status > /dev/null if [ $? -eq 0 ]; then rv=150 fi fi exit $rv ;; reload) exit 0 ;; reconfig) validate_all || exit $OCF_ERR_ARGS echo "$0 RECONFIGURING $OCF_RESKEY_memory" reconfigure exit $? ;; meta-data) meta_data exit 0 ;; validate-all) validate_all exit $? ;; *) echo "usage: $0 {start|stop|restart|status|reload|reconfig|meta-data|validate-all}" exit 1 ;; esac