diff --git a/heartbeat/VirtualDomain b/heartbeat/VirtualDomain index c86a15409..62952cbc0 100644 --- a/heartbeat/VirtualDomain +++ b/heartbeat/VirtualDomain @@ -1,339 +1,356 @@ #!/bin/sh # # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # # Resource Agent for domains managed by the libvirt API. # Requires a running libvirt daemon (libvirtd). # # usage: $0 {start|stop|status|monitor|migrate_to|migrate_from|meta-data|validate-all} # ####################################################################### # Initialization: . ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs LC_ALL="C" LANG="C" # Defaults OCF_RESKEY_force_stop_default=0 OCF_RESKEY_hypervisor_default="$(virsh --quiet uri)" : ${OCF_RESKEY_force_stop=${OCF_RESKEY_force_stop_default}} : ${OCF_RESKEY_hypervisor=${OCF_RESKEY_hypervisor_default}} ####################################################################### usage() { echo "usage: $0 {start|stop|status|monitor|migrate_to|migrate_from|meta-data|validate-all}" } meta_data() { cat < 1.0 Resource agent for a virtual domain (a.k.a. domU, virtual machine, virtual environment etc., depending on context) managed by libvirtd. Manages virtual domains Absolute path to the libvirt configuration file, for this virtual domain. Virtual domain configuration file Hypervisor URI to connect to. See the libvirt documentation for details on supported URI formats. The default is system dependent. Hypervisor URI Forcefully shut down ("destroy") the domain on stop. Enable this only if your virtual domain (or your virtualization backend) does not support graceful shutdown. Force shutdown on stop Transport used to connect to the remote hypervisor while migrating. Please refer to the libvirt documentation for details on transports available. If this parameter is omitted, the resource will use libvirt's default transport to connect to the remote hypervisor. Remote hypervisor transport To additionally monitor services within the virtual domain, add this parameter with a list of scripts to monitor. Note: when monitor scripts are used, the start and migrate_from operations will complete only when all monitor scripts have completed successfully. Be sure to set the timeout of these operations to accommodate this delay. space-separated list of monitor scripts EOF } # Set options to be passed to virsh: VIRSH_OPTIONS="--connect=${OCF_RESKEY_hypervisor} --quiet" VirtualDomain_Define() { # Note: passing in the domain name from outside the script is # intended for testing and debugging purposes only. Don't do this # in production, instead let the script figure out the domain name # from the config file. You have been warned. if [ -z "$DOMAIN_NAME" ]; then DOMAIN_NAME="$(virsh ${VIRSH_OPTIONS} define ${OCF_RESKEY_config} | sed -e 's/Domain \(.*\) defined from .*$/\1/')" \ || return $OCF_ERR_GENERIC else ocf_log warn "Domain name ${DOMAIN_NAME} already defined, overriding configuration file ${OCF_RESKEY_config}. You should do this for testing only." fi } VirtualDomain_Status() { rc=$OCF_ERR_GENERIC status="no state" while [ "$status" = "no state" ]; do status="`virsh $VIRSH_OPTIONS domstate $DOMAIN_NAME`" case "$status" in "shut off") # shut off: domain is defined, but not started ocf_log debug "Virtual domain $DOMAIN_NAME is currently $status." rc=$OCF_NOT_RUNNING ;; running|paused|idle|blocked) # running: domain is currently actively consuming cycles # paused: domain is paused (suspended) # idle: domain is running but idle # blocked: synonym for idle used by legacy Xen versions ocf_log debug "Virtual domain $DOMAIN_NAME is currently $status." rc=$OCF_SUCCESS ;; "no state") # "no state" may occur when the domain is currently # being migrated (on the migration target only), or # whenever virsh can't reliably obtain the domain # state. ocf_log info "Virtual domain $DOMAIN_NAME currently has no state, retrying." sleep 1 ;; *) # any other output is unexpected. ocf_log error "Virtual domain $DOMAIN_NAME has unknown status \"$status\"!" ;; esac done return $rc } VirtualDomain_Start() { if VirtualDomain_Status ${DOMAIN_NAME}; then ocf_log info "Virtual domain $DOMAIN_NAME already running." return $OCF_SUCCESS fi virsh $VIRSH_OPTIONS start ${DOMAIN_NAME} rc=$? if [ $rc -ne 0 ]; then ocf_log error "Failed to start virtual domain ${DOMAIN_NAME}." return $OCF_ERR_GENERIC fi while ! VirtualDomain_Monitor; do sleep 1 done return $OCF_SUCCESS } VirtualDomain_Stop() { + local i + local shutdown_timeout if VirtualDomain_Status ${DOMAIN_NAME}; then case $OCF_RESKEY_force_stop in [Yy][Ee][Ss]|[Tt][Rr][Uu][Ee]|[Oo][Nn]|1) - op="destroy" ;; *) - op="shutdown" + # Issue a graceful shutdown request + ocf_log info "Issuing graceful shutdown request for domain ${DOMAIN_NAME}." + virsh $VIRSH_OPTIONS shutdown ${DOMAIN_NAME} + # The "shutdown_timeout" we use here is the operation + # timeout specified in the CIB, minus 5 seconds + shutdown_timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000)-5)) + # Loop on status for $shutdown_timeout seconds + for i in `seq $shutdown_timeout`; do + VirtualDomain_Status || break + sleep 1 + done ;; esac - virsh $VIRSH_OPTIONS $op ${DOMAIN_NAME} - while VirtualDomain_Status; do - sleep 1 - done + # OK. Now if the above graceful shutdown hasn't worked, kill + # off the domain with destroy. If that too does not work, give + # up and have the LRM time us out. + if VirtualDomain_Status; then + ocf_log info "Issuing forced shutdown (destroy) request for domain ${DOMAIN_NAME}." + virsh $VIRSH_OPTIONS destroy ${DOMAIN_NAME} + while VirtualDomain_Status; do + sleep 1 + done + fi return $OCF_SUCCESS else ocf_log info "Domain $DOMAIN_NAME already stopped." return $OCF_SUCCESS fi } VirtualDomain_Migrate_To() { local target_node local remoteuri local transport_suffix target_node="$OCF_RESKEY_CRM_meta_migrate_target" if VirtualDomain_Status ${DOMAIN_NAME}; then # Find out the remote hypervisor to connect to. That is, turn # something like "qemu://foo:9999/system" into # "qemu+tcp://bar:9999/system" if [ -n "${OCF_RESKEY_migration_transport}" ]; then transport_suffix="+${OCF_RESKEY_migration_transport}" fi # Scared of that sed expression? So am I. :-) remoteuri=$(echo ${OCF_RESKEY_hypervisor} | sed -e "s,\(.*\)://[^/:]*\(:\?[0-9]*\)/\(.*\),\1${transport_suffix}://${target_node}\2/\3,") # OK, we know where to connect to. Now do the actual migration. ocf_log info "$DOMAIN_NAME: Starting live migration to ${target_node} (using remote hypervisor URI ${remoteuri})." virsh ${VIRSH_OPTIONS} migrate --live $DOMAIN_NAME ${remoteuri} rc=$? if [ $rc -ne 0 ]; then ocf_log err "$DOMAIN_NAME: live migration to ${remoteuri} failed: $rc" return $OCF_ERR_GENERIC else ocf_log info "$DOMAIN_NAME: live migration to ${target_node} succeeded." return $OCF_SUCCESS fi else ocf_log err "$DOMAIN_NAME: migrate_to: Not active locally!" return $OCF_ERR_GENERIC fi } VirtualDomain_Migrate_From() { while ! VirtualDomain_Monitor; do sleep 1 done ocf_log info "$DOMAIN_NAME: live migration from ${OCF_RESKEY_CRM_meta_migrate_source} succeeded." return $OCF_SUCCESS } VirtualDomain_Monitor() { # First, check the domain status. If that returns anything other # than $OCF_SUCCESS, something is definitely wrong. VirtualDomain_Status ${DOMAIN_NAME} rc=$? if [ ${rc} -eq ${OCF_SUCCESS} ]; then # OK, the generic status check turned out fine. Now, if we # have monitor scripts defined, run them one after another. for script in ${OCF_RESKEY_monitor_scripts}; do script_output="$($script 2>&1)" script_rc=$? if [ ${script_rc} -ne ${OCF_SUCCESS} ]; then # A monitor script returned a non-success exit # code. Stop iterating over the list of scripts, log a # warning message, and propagate $OCF_ERR_GENERIC. ocf_log warn "Monitor command \"${script}\" for domain ${DOMAIN_NAME} returned ${script_rc} with output: ${script_output}" rc=$OCF_ERR_GENERIC break else ocf_log debug "Monitor command \"${script}\" for domain ${DOMAIN_NAME} completed successfully with output: ${script_output}" fi done fi return ${rc} } VirtualDomain_Validate_All() { # Required binaries: for binary in virsh sed; do check_binary $binary done if [ -z $OCF_RESKEY_config ]; then ocf_log error "Missing configuration parameter \"config\"." return $OCF_ERR_CONFIGURED fi # check if we can read the config file (otherwise we're unable to # deduce $DOMAIN_NAME from it, see below) if [ ! -r $OCF_RESKEY_config ]; then ocf_log error "Configuration file $OCF_RESKEY_config does not exist or is not readable." return $OCF_ERR_INSTALLED fi } if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi case $1 in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; esac # Everything except usage and meta-data must pass the validate test VirtualDomain_Validate_All || exit $? # Define the domain on startup, and re-define (harmlessly) # on every invocation. This also sets DOMAIN_NAME. If this fails, # bail out. VirtualDomain_Define || exit $? case $1 in start) VirtualDomain_Start ;; stop) VirtualDomain_Stop ;; migrate_to) VirtualDomain_Migrate_To ;; migrate_from) VirtualDomain_Migrate_From ;; status) VirtualDomain_Status ;; monitor) VirtualDomain_Monitor ;; validate-all) ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $?