diff --git a/heartbeat/VirtualDomain b/heartbeat/VirtualDomain index 43c2b02e2..4f9883ff5 100755 --- a/heartbeat/VirtualDomain +++ b/heartbeat/VirtualDomain @@ -1,747 +1,754 @@ #!/bin/sh # # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # # Resource Agent for domains managed by the libvirt API. # Requires a running libvirt daemon (libvirtd). # # (c) 2008-2010 Florian Haas, Dejan Muhamedagic, # and Linux-HA contributors # # usage: $0 {start|stop|status|monitor|migrate_to|migrate_from|meta-data|validate-all} # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults OCF_RESKEY_force_stop_default=0 OCF_RESKEY_autoset_utilization_cpu_default="true" OCF_RESKEY_autoset_utilization_hv_memory_default="true" OCF_RESKEY_migrateport_default=$(( 49152 + $(ocf_maybe_random) % 64 )) OCF_RESKEY_CRM_meta_timeout_default=90000 OCF_RESKEY_save_config_on_stop_default=false : ${OCF_RESKEY_force_stop=${OCF_RESKEY_force_stop_default}} : ${OCF_RESKEY_autoset_utilization_cpu=${OCF_RESKEY_autoset_utilization_cpu_default}} : ${OCF_RESKEY_autoset_utilization_hv_memory=${OCF_RESKEY_autoset_utilization_hv_memory_default}} : ${OCF_RESKEY_migrateport=${OCF_RESKEY_migrateport_default}} : ${OCF_RESKEY_CRM_meta_timeout=${OCF_RESKEY_CRM_meta_timeout_default}} : ${OCF_RESKEY_save_config_on_stop=${OCF_RESKEY_save_config_on_stop_default}} ####################################################################### ## I'd very much suggest to make this RA use bash, ## and then use magic $SECONDS. ## But for now: NOW=$(date +%s) usage() { echo "usage: $0 {start|stop|status|monitor|migrate_to|migrate_from|meta-data|validate-all}" } meta_data() { cat < 1.1 Resource agent for a virtual domain (a.k.a. domU, virtual machine, virtual environment etc., depending on context) managed by libvirtd. Manages virtual domains through the libvirt virtualization framework Absolute path to the libvirt configuration file, for this virtual domain. Virtual domain configuration file Hypervisor URI to connect to. See the libvirt documentation for details on supported URI formats. The default is system dependent. Determine the system's default uri by running 'virsh --quiet uri'. Hypervisor URI Always forcefully shut down ("destroy") the domain on stop. The default behavior is to resort to a forceful shutdown only after a graceful shutdown attempt has failed. You should only set this to true if your virtual domain (or your virtualization backend) does not support graceful shutdown. Always force shutdown on stop Transport used to connect to the remote hypervisor while migrating. Please refer to the libvirt documentation for details on transports available. If this parameter is omitted, the resource will use libvirt's default transport to connect to the remote hypervisor. Remote hypervisor transport Use a dedicated migration network. The migration URI is composed by adding this parameters value to the end of the node name. If the node name happens to be an FQDN (as opposed to an unqualified host name), insert the suffix immediately prior to the first period (.) in the FQDN. At the moment Qemu/KVM and Xen migration via a dedicated network is supported. Note: Be sure this composed host name is locally resolveable and the associated IP is reachable through the favored network. Migration network host name suffix To additionally monitor services within the virtual domain, add this parameter with a list of scripts to monitor. Note: when monitor scripts are used, the start and migrate_from operations will complete only when all monitor scripts have completed successfully. Be sure to set the timeout of these operations to accommodate this delay. space-separated list of monitor scripts If set true, the agent will detect the number of domainU's vCPUs from virsh, and put it into the CPU utilization of the resource when the monitor is executed. Enable auto-setting the CPU utilization of the resource If set true, the agent will detect the number of *Max memory* from virsh, and put it into the hv_memory utilization of the resource when the monitor is executed. Enable auto-setting the hv_memory utilization of the resource This port will be used in the qemu migrateuri. If unset, the port will be a random highport. Port for migrateuri Changes to a running VM's config are normally lost on stop. This parameter instructs the RA to save the configuration back to the xml file provided in the "config" parameter. Save running VM's config back to its config file Path to the snapshot directory where the virtual machine image will be stored. When this parameter is set, the virtual machine's RAM state will be saved to a file in the snapshot directory when stopped. If on start a state file is present for the domain, the domain will be restored to the same state it was in right before it stopped last. This option is incompatible with the 'force_stop' option. Restore state on start/stop EOF } set_util_attr() { local attr=$1 val=$2 local cval outp cval=$(crm_resource -Q -r $OCF_RESOURCE_INSTANCE -z -g $attr 2>/dev/null) if [ $? -ne 0 ] && [ -z "$cval" ]; then crm_resource -Q -r $OCF_RESOURCE_INSTANCE -z -g $attr 2>&1 | grep -e "not connected" > /dev/null 2>&1 if [ $? -eq 0 ]; then ocf_log debug "Unable to set utilization attribute, cib is not available" return fi fi if [ "$cval" != "$val" ]; then outp=$(crm_resource -r $OCF_RESOURCE_INSTANCE -z -p $attr -v $val 2>&1) || ocf_log warn "crm_resource failed to set utilization attribute $attr: $outp" fi } update_utilization() { local dom_cpu dom_mem if ocf_is_true "$OCF_RESKEY_autoset_utilization_cpu"; then dom_cpu=$(LANG=C virsh $VIRSH_OPTIONS dominfo ${DOMAIN_NAME} 2>/dev/null | awk '/CPU\(s\)/{print $2}') test -n "$dom_cpu" && set_util_attr cpu $dom_cpu fi if ocf_is_true "$OCF_RESKEY_autoset_utilization_hv_memory"; then dom_mem=$(LANG=C virsh $VIRSH_OPTIONS dominfo ${DOMAIN_NAME} 2>/dev/null | awk '/Max memory/{printf("%d", $3/1024)}') test -n "$dom_mem" && set_util_attr hv_memory "$dom_mem" fi } get_emulator() { local emulator="" - local loglevel="error" + # An emulator is not required, so only report message in debug mode + local loglevel="debug" if ocf_is_probe; then loglevel="notice" fi emulator=$(virsh $VIRSH_OPTIONS dumpxml $DOMAIN_NAME 2>/dev/null | sed -n -e 's/^.*\(.*\)<\/emulator>.*$/\1/p') if [ -z "$emulator" ] && [ -a "$EMULATOR_STATE" ]; then emulator=$(cat $EMULATOR_STATE) fi if [ -z "$emulator" ]; then emulator=$(cat ${OCF_RESKEY_config} | sed -n -e 's/^.*\(.*\)<\/emulator>.*$/\1/p') fi if [ -n "$emulator" ]; then basename $emulator else ocf_log $loglevel "Unable to determine emulator for $DOMAIN_NAME" fi } update_emulator_cache() { local emulator emulator=$(get_emulator) if [ -n "$emulator" ]; then echo $emulator > $EMULATOR_STATE fi } # attempt to check domain status outside of libvirt using the emulator process pid_status() { local rc=$OCF_ERR_GENERIC local emulator=$(get_emulator) case "$emulator" in qemu-kvm|qemu-dm|qemu-system-*) rc=$OCF_NOT_RUNNING ps awx | grep -E "[q]emu-(kvm|dm|system).*-name $DOMAIN_NAME " > /dev/null 2>&1 if [ $? -eq 0 ]; then rc=$OCF_SUCCESS fi ;; libvirt_lxc) rc=$OCF_NOT_RUNNING ps awx | grep -E "[l]ibvirt_lxc.*-name $DOMAIN_NAME " > /dev/null 2>&1 if [ $? -eq 0 ]; then rc=$OCF_SUCCESS fi ;; # This can be expanded to check for additional emulators *) # We may be running xen with PV domains, they don't - # have an emulator set. try xenstore-ls in this case - if have_binary xenstore-ls; then - xenstore-ls -f /vm | grep -E "/vm.*name = \"$DOMAIN_NAME\"" > /dev/null 2>&1 + # have an emulator set. try xl list or xen-lists + if have_binary xl; then + xl list $DOMAIN_NAME >/dev/null 2>&1 if [ $? -eq 0 ]; then - rc=$OCF_SUCCESS + return $OCF_SUCCESS + fi + fi + if have_binary xen-list; then + xen-list $DOMAIN_NAME 2>/dev/null | grep -qs "State.*[-r][-b][-p]--" 2>/dev/null + if [ $? -eq 0 ]; then + return $OCF_SUCCESS fi fi ;; esac if [ $rc -eq $OCF_SUCCESS ]; then ocf_log debug "Virtual domain $DOMAIN_NAME is currently running." elif [ $rc -eq $OCF_NOT_RUNNING ]; then ocf_log debug "Virtual domain $DOMAIN_NAME is currently not running." fi return $rc } VirtualDomain_Status() { local try=0 rc=$OCF_ERR_GENERIC status="no state" while [ "$status" = "no state" ]; do try=$(($try + 1 )) status=$(virsh $VIRSH_OPTIONS domstate $DOMAIN_NAME 2>&1 | tr 'A-Z' 'a-z') case "$status" in *"error:"*"domain not found"*|"shut off") # shut off: domain is defined, but not started, will not happen if # domain is created but not defined # Domain not found: domain is not defined and thus not started ocf_log debug "Virtual domain $DOMAIN_NAME is not running: $(echo $status | sed s/error://g)" rc=$OCF_NOT_RUNNING ;; running|paused|idle|blocked|"in shutdown") # running: domain is currently actively consuming cycles # paused: domain is paused (suspended) # idle: domain is running but idle # blocked: synonym for idle used by legacy Xen versions # in shutdown: the domain is in process of shutting down, but has not completely shutdown or crashed. ocf_log debug "Virtual domain $DOMAIN_NAME is currently $status." rc=$OCF_SUCCESS ;; ""|*"failed to "*"connect to the hypervisor"*|"no state") # Empty string may be returned when virsh does not # receive a reply from libvirtd. # "no state" may occur when the domain is currently # being migrated (on the migration target only), or # whenever virsh can't reliably obtain the domain # state. status="no state" if [ "$__OCF_ACTION" = "stop" ] && [ $try -ge 3 ]; then # During the stop operation, we want to bail out # quickly, so as to be able to force-stop (destroy) # the domain if necessary. ocf_log error "Virtual domain $DOMAIN_NAME has no state during stop operation, bailing out." return $OCF_ERR_GENERIC; elif [ "$__OCF_ACTION" = "monitor" ]; then pid_status rc=$? if [ $rc -ne $OCF_ERR_GENERIC ]; then # we've successfully determined the domains status outside of libvirt return $rc fi else # During all other actions, we just wait and try # again, relying on the CRM/LRM to time us out if # this takes too long. ocf_log info "Virtual domain $DOMAIN_NAME currently has no state, retrying." fi sleep 1 ;; *) # any other output is unexpected. ocf_log error "Virtual domain $DOMAIN_NAME has unknown status \"$status\"!" sleep 1 ;; esac done return $rc } verify_undefined() { for dom in `virsh --connect=${OCF_RESKEY_hypervisor} list --all --name 2>/dev/null`; do if [ "$dom" = "$DOMAIN_NAME" ]; then virsh $VIRSH_OPTIONS undefine $DOMAIN_NAME > /dev/null 2>&1 return fi done } VirtualDomain_Start() { local snapshotimage if VirtualDomain_Status; then ocf_log info "Virtual domain $DOMAIN_NAME already running." return $OCF_SUCCESS fi snapshotimage="$OCF_RESKEY_snapshot/${DOMAIN_NAME}.state" if [ -n "$OCF_RESKEY_snapshot" -a -f "$snapshotimage" ]; then virsh restore $snapshotimage if [ $? -eq 0 ]; then rm -f $snapshotimage return $OCF_SUCCESS fi ocf_exit_reason "Failed to restore ${DOMAIN_NAME} from state file in ${OCF_RESKEY_snapshot} directory." return $OCF_ERR_GENERIC fi # Make sure domain is undefined before creating. # The 'create' command guarantees that the domain will be # undefined on shutdown, but requires the domain to be undefined. # if a user defines the domain # outside of this agent, we have to ensure that the domain # is restored to an 'undefined' state before creating. verify_undefined virsh $VIRSH_OPTIONS create ${OCF_RESKEY_config} rc=$? if [ $rc -ne 0 ]; then ocf_exit_reason "Failed to start virtual domain ${DOMAIN_NAME}." return $OCF_ERR_GENERIC fi while ! VirtualDomain_Monitor; do sleep 1 done return $OCF_SUCCESS } force_stop() { local out ex local status=0 ocf_log info "Issuing forced shutdown (destroy) request for domain ${DOMAIN_NAME}." out=$(virsh $VIRSH_OPTIONS destroy ${DOMAIN_NAME} 2>&1|tr 'A-Z' 'a-z') ex=$? echo >&2 "$out" case $ex$out in *"error:"*"domain is not running"*|*"error:"*"domain not found"*) : ;; # unexpected path to the intended outcome, all is well [!0]*) ocf_exit_reason "forced stop failed" return $OCF_ERR_GENERIC ;; 0*) while [ $status != $OCF_NOT_RUNNING ]; do VirtualDomain_Status status=$? done ;; esac return $OCF_SUCCESS } save_config(){ CFGTMP=$(mktemp -t vmcfgsave.XXX) virsh $VIRSH_OPTIONS dumpxml ${DOMAIN_NAME} > ${CFGTMP} if [ -s ${CFGTMP} ]; then if virt-xml-validate ${CFGTMP} domain 2>/dev/null ; then ocf_log info "Saving domain $DOMAIN_NAME to ${OCF_RESKEY_config}. Please make sure it's present on all nodes." if cat ${CFGTMP} > ${OCF_RESKEY_config} ; then ocf_log info "Saved $DOMAIN_NAME domain's configuration to ${OCF_RESKEY_config}." else ocf_log warn "Moving ${CFGTMP} to ${OCF_RESKEY_config} failed." fi else ocf_log warn "Domain $DOMAIN_NAME config failed to validate after dump. Skipping config update." fi else ocf_log warn "Domain $DOMAIN_NAME config has 0 size. Skipping config update." fi rm -f ${CFGTMP} } VirtualDomain_Stop() { local i local status local shutdown_timeout local needshutdown=1 VirtualDomain_Status status=$? case $status in $OCF_SUCCESS) if ocf_is_true $OCF_RESKEY_force_stop; then # if force stop, don't bother attempting graceful shutdown. force_stop return $? fi ocf_log info "Issuing graceful shutdown request for domain ${DOMAIN_NAME}." if [ -n "$OCF_RESKEY_snapshot" ]; then virsh save $DOMAIN_NAME "$OCF_RESKEY_snapshot/${DOMAIN_NAME}.state" if [ $? -eq 0 ]; then needshutdown=0 else ocf_log error "Failed to save snapshot state of ${DOMAIN_NAME} on stop" fi fi # save config if needed if ocf_is_true "$OCF_RESKEY_save_config_on_stop"; then save_config fi # issue the shutdown if save state didn't shutdown for us if [ $needshutdown -eq 1 ]; then # Issue a graceful shutdown request virsh $VIRSH_OPTIONS shutdown ${DOMAIN_NAME} fi # The "shutdown_timeout" we use here is the operation # timeout specified in the CIB, minus 5 seconds shutdown_timeout=$(( $NOW + ($OCF_RESKEY_CRM_meta_timeout/1000) -5 )) # Loop on status until we reach $shutdown_timeout while [ $NOW -lt $shutdown_timeout ]; do VirtualDomain_Status status=$? case $status in $OCF_NOT_RUNNING) # This was a graceful shutdown. return $OCF_SUCCESS ;; $OCF_SUCCESS) # Domain is still running, keep # waiting (until shutdown_timeout # expires) sleep 1 ;; *) # Something went wrong. Bail out and # resort to forced stop (destroy). break; esac NOW=$(date +%s) done ;; $OCF_NOT_RUNNING) ocf_log info "Domain $DOMAIN_NAME already stopped." return $OCF_SUCCESS esac # OK. Now if the above graceful shutdown hasn't worked, kill # off the domain with destroy. If that too does not work, # have the LRM time us out. force_stop } VirtualDomain_Migrate_To() { local target_node local remoteuri local transport_suffix local migrateuri local migrateport local migrate_target local hypervisor target_node="$OCF_RESKEY_CRM_meta_migrate_target" if VirtualDomain_Status; then # Find out the remote hypervisor to connect to. That is, turn # something like "qemu://foo:9999/system" into # "qemu+tcp://bar:9999/system" if [ -n "${OCF_RESKEY_migration_transport}" ]; then transport_suffix="+${OCF_RESKEY_migration_transport}" fi # A typical migration URI via a special migration network looks # like "tcp://bar-mig:49152". The port would be randomly chosen # by libvirt from the range 49152-49215 if omitted, at least since # version 0.7.4 ... if [ -n "${OCF_RESKEY_migration_network_suffix}" ]; then hypervisor="${OCF_RESKEY_hypervisor%%[+:]*}" # Hostname might be a FQDN migrate_target=$(echo ${target_node} | sed -e "s,^\([^.]\+\),\1${OCF_RESKEY_migration_network_suffix},") case $hypervisor in qemu) # For quiet ancient libvirt versions a migration port is needed # and the URI must not contain the "//". Newer versions can handle # the "bad" URI. migrateuri="tcp:${migrate_target}:${OCF_RESKEY_migrateport}" ;; xen) migrateuri="xenmigr://${migrate_target}" ;; *) ocf_log warn "$DOMAIN_NAME: Migration via dedicated network currently not supported for ${hypervisor}." ;; esac fi # Scared of that sed expression? So am I. :-) remoteuri=$(echo ${OCF_RESKEY_hypervisor} | sed -e "s,\(.*\)://[^/:]*\(:\?[0-9]*\)/\(.*\),\1${transport_suffix}://${target_node}\2/\3,") # save config if needed if ocf_is_true "$OCF_RESKEY_save_config_on_stop"; then save_config fi # OK, we know where to connect to. Now do the actual migration. ocf_log info "$DOMAIN_NAME: Starting live migration to ${target_node} (using remote hypervisor URI ${remoteuri} ${migrateuri})." virsh ${VIRSH_OPTIONS} migrate --live $DOMAIN_NAME ${remoteuri} ${migrateuri} rc=$? if [ $rc -ne 0 ]; then ocf_exit_reason "$DOMAIN_NAME: live migration to ${remoteuri} ${migrateuri} failed: $rc" return $OCF_ERR_GENERIC else ocf_log info "$DOMAIN_NAME: live migration to ${target_node} succeeded." return $OCF_SUCCESS fi else ocf_exit_reason "$DOMAIN_NAME: migrate_to: Not active locally!" return $OCF_ERR_GENERIC fi } VirtualDomain_Migrate_From() { while ! VirtualDomain_Monitor; do sleep 1 done ocf_log info "$DOMAIN_NAME: live migration from ${OCF_RESKEY_CRM_meta_migrate_source} succeeded." # save config if needed if ocf_is_true "$OCF_RESKEY_save_config_on_stop"; then save_config fi return $OCF_SUCCESS } VirtualDomain_Monitor() { # First, check the domain status. If that returns anything other # than $OCF_SUCCESS, something is definitely wrong. VirtualDomain_Status rc=$? if [ ${rc} -eq ${OCF_SUCCESS} ]; then # OK, the generic status check turned out fine. Now, if we # have monitor scripts defined, run them one after another. for script in ${OCF_RESKEY_monitor_scripts}; do script_output="$($script 2>&1)" script_rc=$? if [ ${script_rc} -ne ${OCF_SUCCESS} ]; then # A monitor script returned a non-success exit # code. Stop iterating over the list of scripts, log a # warning message, and propagate $OCF_ERR_GENERIC. ocf_exit_reason "Monitor command \"${script}\" for domain ${DOMAIN_NAME} returned ${script_rc} with output: ${script_output}" rc=$OCF_ERR_GENERIC break else ocf_log debug "Monitor command \"${script}\" for domain ${DOMAIN_NAME} completed successfully with output: ${script_output}" fi done fi update_emulator_cache update_utilization return ${rc} } VirtualDomain_Validate_All() { # Required binaries: for binary in virsh sed; do check_binary $binary done if [ -z $OCF_RESKEY_config ]; then ocf_exit_reason "Missing configuration parameter \"config\"." return $OCF_ERR_CONFIGURED fi if ocf_is_true $OCF_RESKEY_force_stop; then if [ -n "$OCF_RESKEY_snapshot" ]; then ocf_exit_reason "The 'force_stop' and 'snapshot' options can not be used together." return $OCF_ERR_CONFIGURED fi fi # check if we can read the config file (otherwise we're unable to # deduce $DOMAIN_NAME from it, see below) if [ ! -r $OCF_RESKEY_config ]; then if ocf_is_probe; then ocf_log info "Configuration file $OCF_RESKEY_config not readable during probe." elif [ "$__OCF_ACTION" = "stop" ]; then ocf_log info "Configuration file $OCF_RESKEY_config not readable, resource considered stopped." else ocf_exit_reason "Configuration file $OCF_RESKEY_config does not exist or is not readable." return $OCF_ERR_INSTALLED fi fi } if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi case $1 in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; esac # Grab the virsh uri default, but only if hypervisor isn't set : ${OCF_RESKEY_hypervisor=$(virsh --quiet uri 2>/dev/null)} # Set options to be passed to virsh: VIRSH_OPTIONS="--connect=${OCF_RESKEY_hypervisor} --quiet" # Everything except usage and meta-data must pass the validate test VirtualDomain_Validate_All || exit $? # During a probe, it is permissible for the config file to not be # readable (it might be on shared storage not available during the # probe). In that case, we're # unable to get the domain name. Thus, we also can't check whether the # domain is running. The only thing we can do here is to assume that # it is not running. if [ ! -r $OCF_RESKEY_config ]; then ocf_is_probe && exit $OCF_NOT_RUNNING [ "$__OCF_ACTION" = "stop" ] && exit $OCF_SUCCESS fi # Retrieve the domain name from the xml file. DOMAIN_NAME=`egrep '[[:space:]]*.*[[:space:]]*$' ${OCF_RESKEY_config} | sed -e 's/[[:space:]]*\(.*\)<\/name>[[:space:]]*$/\1/' 2>/dev/null` if [ -z $DOMAIN_NAME ]; then ocf_exit_reason "Unable to determine domain name." exit $OCF_ERR_GENERIC fi EMULATOR_STATE="${HA_RSCTMP}/VirtualDomain-${DOMAIN_NAME}-emu.state" case $1 in start) VirtualDomain_Start ;; stop) VirtualDomain_Stop ;; migrate_to) VirtualDomain_Migrate_To ;; migrate_from) VirtualDomain_Migrate_From ;; status) VirtualDomain_Status ;; monitor) VirtualDomain_Monitor ;; validate-all) ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/heartbeat/Xen b/heartbeat/Xen index 66591691c..bc3e8f475 100755 --- a/heartbeat/Xen +++ b/heartbeat/Xen @@ -1,548 +1,557 @@ #!/bin/sh # # # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # # Resource Agent for the Xen Hypervisor. # Manages Xen virtual machine instances by # mapping cluster resource start and stop, # to Xen create and shutdown, respectively. # # usage: $0 {start|stop|status|monitor|meta-data} # # OCF parameters are as below: # OCF_RESKEY_xmfile # Absolute path to the Xen control file, # for this virtual machine. # OCF_RESKEY_allow_mem_management # Change memory usage on start/stop/migration # of virtual machine # OCF_RESKEY_reserved_Dom0_memory # minimum memory reserved for domain 0 # OCF_RESKEY_monitor_scripts # scripts to monitor services within the # virtual domain ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### usage() { - cat <<-END + cat <<-END usage: $0 {start|stop|status|monitor|meta-data|validate-all} - END +END } : ${OCF_RESKEY_xmfile=/etc/xen/vm/MyDomU} : ${OCF_RESKEY_shutdown_acpi=0} : ${OCF_RESKEY_allow_mem_management=0} : ${OCF_RESKEY_reserved_Dom0_memory=512} # prefer xl xentool=$(which xl 2> /dev/null || which xm) meta_data() { cat < 1.0 Resource Agent for the Xen Hypervisor. Manages Xen virtual machine instances by mapping cluster resource -start and stop, to Xen create and shutdown, respectively. +start and stop, to Xen create and shutdown, respectively. A note on names We will try to extract the name from the config file (the xmfile attribute). If you use a simple assignment statement, then you should be fine. Otherwise, if there's some python acrobacy involved such as dynamically assigning names depending on other variables, and we will try to detect this, then please set the name attribute. You should also do that if there is any chance of a pathological situation where a config file might be missing, for example if it resides on a shared storage. If all fails, we finally fall back to the instance id to preserve backward compatibility. Para-virtualized guests can also be migrated by enabling the meta_attribute allow-migrate. Manages Xen unprivileged domains (DomUs) Absolute path to the Xen control file, for this virtual machine. Xen control file Name of the virtual machine. Xen DomU name The Xen agent will first try an orderly shutdown using xl shutdown. Should this not succeed within this timeout, the agent will escalate to xl destroy, forcibly killing the node. If this is not set, it will default to two-third of the stop action timeout. Setting this value to 0 forces an immediate destroy. Shutdown escalation timeout Handle shutdown by simulating an ACPI power button event. Enable this to allow graceful shutdown for HVM domains without installed PV drivers. Simulate power button event on shutdown This parameter enables dynamic adjustment of memory for start and stop actions used for Dom0 and the DomUs. The default is to not adjust memory dynamically. Use dynamic memory management In case of a live migration, the system will default to using the IP address associated with the hostname via DNS or /etc/hosts. This parameter allows you to specify a node attribute that will be queried instead for the target node, overriding the IP address. This allows you to use a dedicated network for live migration traffic to a specific node. Warning: make very sure the IP address does point to the right node. Or else the live migration will end up somewhere else, greatly confusing the cluster and causing havoc. Node attribute containing target IP address In case memory management is used, this parameter defines the minimum amount of memory to be reserved for the dom0. The default minimum memory is 512MB. Minimum Dom0 memory To additionally monitor services within the unprivileged domain, add this parameter with a list of scripts to monitor. list of space separated monitor scripts END } Xen_Status() { - if have_binary xen-list; then - xen-list $1 2>/dev/null | grep -qs "State.*[-r][-b][-p]--" 2>/dev/null - if [ $? -ne 0 ]; then - return $OCF_NOT_RUNNING - else - return $OCF_SUCCESS - fi - fi - if have_binary xenstore-ls; then - xenstore-ls -f /vm | grep -E "/vm.*name = \"$1\"" > /dev/null 2>&1 - if [ $? -ne 0 ]; then - return $OCF_NOT_RUNNING - else - return $OCF_SUCCESS - fi - fi - STATUS=`$xentool list --long $1 2>/dev/null | grep status 2>/dev/null` - if [ "X${STATUS}" != "X" ]; then - # we have Xen 3.0.4 or higher - STATUS_NOSPACES=`echo "$STATUS" | awk '{ print $1,$2}'` - if [ "$STATUS_NOSPACES" = "(status 2)" -o "$STATUS_NOSPACES" = "(status 1)" ]; then - return $OCF_SUCCESS - else - return $OCF_NOT_RUNNING - fi - else - # we have Xen 3.0.3 or lower - STATUS=`$xentool list --long $1 2>/dev/null | grep state 2>/dev/null` - echo "${STATUS}" | grep -qs "[-r][-b][-p]---" - if [ $? -ne 0 ]; then - return $OCF_NOT_RUNNING - else - return $OCF_SUCCESS - fi - - fi + if expr "x$xentool" : "x.*xl" >/dev/null; then + $xentool list $1 >/dev/null 2>&1 + if [ $? -ne 0 ]; then + return $OCF_NOT_RUNNING + else + return $OCF_SUCCESS + fi + fi + if have_binary xen-list; then + xen-list $1 2>/dev/null | grep -qs "State.*[-r][-b][-p]--" 2>/dev/null + if [ $? -ne 0 ]; then + return $OCF_NOT_RUNNING + else + return $OCF_SUCCESS + fi + fi + STATUS=`$xentool list --long $1 2>/dev/null | grep status 2>/dev/null` + if [ "X${STATUS}" != "X" ]; then + # we have Xen 3.0.4 or higher + STATUS_NOSPACES=`echo "$STATUS" | awk '{ print $1,$2}'` + if [ "$STATUS_NOSPACES" = "(status 2)" -o "$STATUS_NOSPACES" = "(status 1)" ]; then + return $OCF_SUCCESS + else + return $OCF_NOT_RUNNING + fi + else + # we have Xen 3.0.3 or lower + STATUS=`$xentool list --long $1 2>/dev/null | grep state 2>/dev/null` + echo "${STATUS}" | grep -qs "[-r][-b][-p]---" + if [ $? -ne 0 ]; then + return $OCF_NOT_RUNNING + else + return $OCF_SUCCESS + fi + fi } # If the guest is rebooting, it may completely disappear from the # list of defined guests, thus xl/xen-list would return with not # running; apparently, this period lasts only for a second or # two # If a status returns not running, then test status # again for 5 times (perhaps it'll show up) Xen_Status_with_Retry() { - local rc cnt=5 + local rc cnt=5 - Xen_Status $1 - rc=$? - while [ $rc -eq $OCF_NOT_RUNNING -a $cnt -gt 0 ]; do - case "$__OCF_ACTION" in - stop) - ocf_log debug "domain $1 reported as not running, waiting $cnt seconds ..." - ;; - monitor) - ocf_log warn "domain $1 reported as not running, but it is expected to be running! Retrying for $cnt seconds ..." - ;; - *) : not reachable - ;; - esac - sleep 1 Xen_Status $1 rc=$? - let cnt=$((cnt-1)) - done - return $rc + while [ $rc -eq $OCF_NOT_RUNNING -a $cnt -gt 0 ]; do + case "$__OCF_ACTION" in + stop) + ocf_log debug "domain $1 reported as not running, waiting $cnt seconds ..." + ;; + monitor) + ocf_log warn "domain $1 reported as not running, but it is expected to be running! Retrying for $cnt seconds ..." + ;; + *) : not reachable + ;; + esac + sleep 1 + Xen_Status $1 + rc=$? + let cnt=$((cnt-1)) + done + return $rc } Xen_Adjust_Memory() { - if ocf_is_true "${OCF_RESKEY_allow_mem_management}"; then - CNTNEW=$1 - RUNNING=`Xen_List_running` - RUNCNT=`Xen_Count_running` - MAXMEM=`Xen_Total_Memory` - if [ ${RUNCNT} -eq 0 -a ${CNTNEW} -eq 0 ]; then - RUNCNT=1 - fi - #NEWMEM=`echo "(${MAXMEM}-${OCF_RESKEY_reserved_Dom0_memory})/(${RUNCNT}+${CNTNEW})"|bc` - NEWMEM=$(( (${MAXMEM} - ${OCF_RESKEY_reserved_Dom0_memory}) / (${RUNCNT} + ${CNTNEW} ) )) - # do not rely on ballooning add dom0_mem=512 instead to force memory for dom0 - #$xentool mem-set Domain-0 ${OCF_RESKEY_reserved_Dom0_memory} - for DOM in ${RUNNING}; do - $xentool mem-set ${DOM} ${NEWMEM} - done - ocf_log info "Adjusted memory to: $NEWMEM, for the following $RUNCNT domains: $RUNNING" - fi + if ocf_is_true "${OCF_RESKEY_allow_mem_management}"; then + CNTNEW=$1 + RUNNING=`Xen_List_running` + RUNCNT=`Xen_Count_running` + MAXMEM=`Xen_Total_Memory` + if [ ${RUNCNT} -eq 0 -a ${CNTNEW} -eq 0 ]; then + RUNCNT=1 + fi + #NEWMEM=`echo "(${MAXMEM}-${OCF_RESKEY_reserved_Dom0_memory})/(${RUNCNT}+${CNTNEW})"|bc` + NEWMEM=$(( (${MAXMEM} - ${OCF_RESKEY_reserved_Dom0_memory}) / (${RUNCNT} + ${CNTNEW} ) )) + # do not rely on ballooning add dom0_mem=512 instead to force memory for dom0 + #$xentool mem-set Domain-0 ${OCF_RESKEY_reserved_Dom0_memory} + for DOM in ${RUNNING}; do + $xentool mem-set ${DOM} ${NEWMEM} + done + ocf_log info "Adjusted memory to: $NEWMEM, for the following $RUNCNT domains: $RUNNING" + fi } Xen_List_all() { $xentool list | grep -v -e "Name" -e "Domain-0" | awk '{print $1}' } Xen_List_running() { ALL_DOMS=`Xen_List_all` for DOM in ${ALL_DOMS}; do if Xen_Status $DOM; then echo "${DOM} " fi done } Xen_Count_running() { Xen_List_running | wc -w } Xen_Monitor() { - if ocf_is_probe; then - Xen_Status ${DOMAIN_NAME} - else - Xen_Status_with_Retry ${DOMAIN_NAME} - fi - if [ $? -eq ${OCF_NOT_RUNNING} ]; then - ocf_is_probe || - ocf_log err "Xen domain $DOMAIN_NAME stopped" - return ${OCF_NOT_RUNNING} - fi - if [ "X${OCF_RESKEY_monitor_scripts}" = "X" ]; then - return ${OCF_SUCCESS} - fi - for SCRIPT in ${OCF_RESKEY_monitor_scripts}; do - $SCRIPT - if [ $? -ne 0 ]; then - return ${OCF_ERR_GENERIC} + if ocf_is_probe; then + Xen_Status ${DOMAIN_NAME} + else + Xen_Status_with_Retry ${DOMAIN_NAME} + fi + if [ $? -eq ${OCF_NOT_RUNNING} ]; then + ocf_is_probe || + ocf_log err "Xen domain $DOMAIN_NAME stopped" + return ${OCF_NOT_RUNNING} fi - done - return ${OCF_SUCCESS} + if [ "X${OCF_RESKEY_monitor_scripts}" = "X" ]; then + return ${OCF_SUCCESS} + fi + for SCRIPT in ${OCF_RESKEY_monitor_scripts}; do + $SCRIPT + if [ $? -ne 0 ]; then + return ${OCF_ERR_GENERIC} + fi + done + return ${OCF_SUCCESS} } Xen_Total_Memory() { $xentool info | grep "^total_memory" | awk '{print $3}' } Xen_Start() { - if Xen_Status ${DOMAIN_NAME}; then - ocf_log info "Xen domain $DOMAIN_NAME already running." - return $OCF_SUCCESS - fi - - if [ ! -f "${OCF_RESKEY_xmfile}" ]; then - ocf_log err "Config file ${OCF_RESKEY_xmfile} for $DOMAIN_NAME does not exist." - return $OCF_ERR_INSTALLED - fi - - if ocf_is_true "${OCF_RESKEY_allow_mem_management}"; then - Xen_Adjust_Memory 1 - ocf_log info "New memory for virtual domains: ${NEWMEM}" - sed -i -e "/^memory=/ s/^memory=.*/memory=${NEWMEM}/" ${OCF_RESKEY_xmfile} - $xentool mem-set ${DOMAIN_NAME} ${NEWMEM} - fi - - $xentool create ${OCF_RESKEY_xmfile} name=\"$DOMAIN_NAME\" - rc=$? - if [ $rc -ne 0 ]; then - return $OCF_ERR_GENERIC - else - if ocf_is_true "${OCF_RESKEY_allow_mem_management}"; then - $xentool mem-set ${DOMAIN_NAME} ${NEWMEM} - fi - fi - while sleep 1; do - Xen_Monitor && return $OCF_SUCCESS - done + if Xen_Status ${DOMAIN_NAME}; then + ocf_log info "Xen domain $DOMAIN_NAME already running." + return $OCF_SUCCESS + fi + + if [ ! -f "${OCF_RESKEY_xmfile}" ]; then + ocf_log err "Config file ${OCF_RESKEY_xmfile} for $DOMAIN_NAME does not exist." + return $OCF_ERR_INSTALLED + fi + + if ocf_is_true "${OCF_RESKEY_allow_mem_management}"; then + Xen_Adjust_Memory 1 + ocf_log info "New memory for virtual domains: ${NEWMEM}" + sed -i -e "/^memory=/ s/^memory=.*/memory=${NEWMEM}/" ${OCF_RESKEY_xmfile} + $xentool mem-set ${DOMAIN_NAME} ${NEWMEM} + fi + + $xentool create ${OCF_RESKEY_xmfile} name=\"$DOMAIN_NAME\" + rc=$? + if [ $rc -ne 0 ]; then + return $OCF_ERR_GENERIC + else + if ocf_is_true "${OCF_RESKEY_allow_mem_management}"; then + $xentool mem-set ${DOMAIN_NAME} ${NEWMEM} + fi + fi + while sleep 1; do + Xen_Monitor && return $OCF_SUCCESS + done } xen_domain_stop() { - local dom=$1 - local timeout - - if [ -n "$OCF_RESKEY_shutdown_timeout" ]; then - timeout=$OCF_RESKEY_shutdown_timeout - elif [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then - # Allow 2/3 of the action timeout for the orderly shutdown - # (The origin unit is ms, hence the conversion) - timeout=$((OCF_RESKEY_CRM_meta_timeout/1500)) - else - timeout=60 - fi - - if [ "$timeout" -gt 0 ]; then - ocf_log info "Xen domain $dom will be stopped (timeout: ${timeout}s)" - if ocf_is_true "${OCF_RESKEY_shutdown_acpi}"; then - $xentool trigger $dom power - else - $xentool shutdown $dom - fi - - while Xen_Status $dom && [ "$timeout" -gt 0 ]; do - ocf_log debug "$dom still not stopped. Waiting..." - timeout=$((timeout-1)) - sleep 1 - done - fi - - if [ "$timeout" -eq 0 ]; then - while Xen_Status $dom; do - ocf_log warn "Xen domain $dom will be destroyed!" - $xenkill $dom - sleep 1 - done - # Note: This does not give up. stop isn't allowed to to fail. - # If $xentool destroy fails, stop will eventually timeout. - # This is the correct behaviour. - fi - - ocf_log info "Xen domain $dom stopped." + local dom=$1 + local timeout + + if [ -n "$OCF_RESKEY_shutdown_timeout" ]; then + timeout=$OCF_RESKEY_shutdown_timeout + elif [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then + # Allow 2/3 of the action timeout for the orderly shutdown + # (The origin unit is ms, hence the conversion) + timeout=$((OCF_RESKEY_CRM_meta_timeout/1500)) + else + timeout=60 + fi + + if [ "$timeout" -gt 0 ]; then + ocf_log info "Xen domain $dom will be stopped (timeout: ${timeout}s)" + if ocf_is_true "${OCF_RESKEY_shutdown_acpi}"; then + $xentool trigger $dom power + else + $xentool shutdown $dom + fi + + while Xen_Status $dom && [ "$timeout" -gt 0 ]; do + ocf_log debug "$dom still not stopped. Waiting..." + timeout=$((timeout-1)) + sleep 1 + done + fi + + if [ "$timeout" -eq 0 ]; then + while Xen_Status $dom; do + ocf_log warn "Xen domain $dom will be destroyed!" + $xenkill $dom + sleep 1 + done + # Note: This does not give up. stop isn't allowed to to fail. + # If $xentool destroy fails, stop will eventually timeout. + # This is the correct behaviour. + fi + + ocf_log info "Xen domain $dom stopped." } Xen_Stop() { - local vm - if Xen_Status_with_Retry ${DOMAIN_NAME}; then - vm=${DOMAIN_NAME} - elif Xen_Status migrating-${DOMAIN_NAME}; then - ocf_log info "Xen domain $DOMAIN_NAME is migrating" - vm="migrating-${DOMAIN_NAME}" - else - ocf_log info "Xen domain $DOMAIN_NAME already stopped." - fi - - if [ "$vm" ]; then - xen_domain_stop $vm - else - # It is supposed to be gone, but there have been situations where - # $xentool list / xen-list showed it as stopped but it was still - # instantiated. Nuke it once more to make sure: - $xenkill ${DOMAIN_NAME} - fi - - Xen_Adjust_Memory 0 - return $OCF_SUCCESS + local vm + if Xen_Status_with_Retry ${DOMAIN_NAME}; then + vm=${DOMAIN_NAME} + elif Xen_Status migrating-${DOMAIN_NAME}; then + ocf_log info "Xen domain $DOMAIN_NAME is migrating" + vm="migrating-${DOMAIN_NAME}" + else + ocf_log info "Xen domain $DOMAIN_NAME already stopped." + fi + + if [ "$vm" ]; then + xen_domain_stop $vm + else + # It is supposed to be gone, but there have been situations where + # $xentool list / xen-list showed it as stopped but it was still + # instantiated. Nuke it once more to make sure: + $xenkill ${DOMAIN_NAME} + fi + + Xen_Adjust_Memory 0 + return $OCF_SUCCESS } Xen_Migrate_To() { - target_node="$OCF_RESKEY_CRM_meta_migrate_target" - target_attr="$OCF_RESKEY_node_ip_attribute" - target_addr="$target_node" - - if Xen_Status ${DOMAIN_NAME}; then - ocf_log info "$DOMAIN_NAME: Starting $xentool migrate to $target_node" - - if [ -n "$target_attr" ]; then - nodevalue=`crm_attribute --type nodes --node-uname $target_node --attr-name $target_attr --get-value -q` - if [ -n "${nodevalue}" -a "${nodevalue}" != "(null)" ]; then - target_addr="$nodevalue" - ocf_log info "$DOMAIN_NAME: $target_node is using address $target_addr" - fi - fi - - if [[ "$xentool" == *xm ]]; then - $xentool migrate --live $DOMAIN_NAME $target_addr - else - $xentool migrate $DOMAIN_NAME $target_addr - fi - rc=$? - if [ $rc -ne 0 ]; then - ocf_log err "$DOMAIN_NAME: $xentool migrate to $target_node failed: $rc" - return $OCF_ERR_GENERIC - else - Xen_Adjust_Memory 0 - ocf_log info "$DOMAIN_NAME: $xentool migrate to $target_node succeeded." - return $OCF_SUCCESS - fi - else - ocf_log err "$DOMAIN_NAME: migrate_to: Not active locally!" - return $OCF_ERR_GENERIC - fi + target_node="$OCF_RESKEY_CRM_meta_migrate_target" + target_attr="$OCF_RESKEY_node_ip_attribute" + target_addr="$target_node" + + if Xen_Status ${DOMAIN_NAME}; then + ocf_log info "$DOMAIN_NAME: Starting $xentool migrate to $target_node" + + if [ -n "$target_attr" ]; then + nodevalue=`crm_attribute --type nodes --node-uname $target_node --attr-name $target_attr --get-value -q` + if [ -n "${nodevalue}" -a "${nodevalue}" != "(null)" ]; then + target_addr="$nodevalue" + ocf_log info "$DOMAIN_NAME: $target_node is using address $target_addr" + fi + fi + + if expr "x$xentool" : "x.*xm" >/dev/null; then + $xentool migrate --live $DOMAIN_NAME $target_addr + else + $xentool migrate $DOMAIN_NAME $target_addr + fi + rc=$? + if [ $rc -ne 0 ]; then + ocf_log err "$DOMAIN_NAME: $xentool migrate to $target_node failed: $rc" + return $OCF_ERR_GENERIC + else + Xen_Adjust_Memory 0 + ocf_log info "$DOMAIN_NAME: $xentool migrate to $target_node succeeded." + return $OCF_SUCCESS + fi + else + ocf_log err "$DOMAIN_NAME: migrate_to: Not active locally!" + return $OCF_ERR_GENERIC + fi } Xen_Migrate_From() { - if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then - # Allow 2/3 of the action timeout for status to stabilize - # (The origin unit is ms, hence the conversion) - timeout=$((OCF_RESKEY_CRM_meta_timeout/1500)) - else - timeout=10 # should be plenty - fi - - while ! Xen_Status ${DOMAIN_NAME} && [ $timeout -gt 0 ]; do - ocf_log debug "$DOMAIN_NAME: Not yet active locally, waiting (timeout: ${timeout}s)" - timeout=$((timeout-1)) - sleep 1 - done - - if Xen_Status ${DOMAIN_NAME}; then - Xen_Adjust_Memory 0 - ocf_log info "$DOMAIN_NAME: Active locally, migration successful" - return $OCF_SUCCESS - else - ocf_log err "$DOMAIN_NAME: Not active locally, migration failed!" - return $OCF_ERR_GENERIC - fi + if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then + # Allow 2/3 of the action timeout for status to stabilize + # (The origin unit is ms, hence the conversion) + timeout=$((OCF_RESKEY_CRM_meta_timeout/1500)) + else + timeout=10 # should be plenty + fi + + while ! Xen_Status ${DOMAIN_NAME} && [ $timeout -gt 0 ]; do + ocf_log debug "$DOMAIN_NAME: Not yet active locally, waiting (timeout: ${timeout}s)" + timeout=$((timeout-1)) + sleep 1 + done + + if Xen_Status ${DOMAIN_NAME}; then + Xen_Adjust_Memory 0 + ocf_log info "$DOMAIN_NAME: Active locally, migration successful" + return $OCF_SUCCESS + else + ocf_log err "$DOMAIN_NAME: Not active locally, migration failed!" + return $OCF_ERR_GENERIC + fi } Xen_Validate_All() { - return $OCF_SUCCESS + return $OCF_SUCCESS } if [ $# -ne 1 ]; then - usage - exit $OCF_ERR_ARGS + usage + exit $OCF_ERR_ARGS fi case $1 in - meta-data) meta_data - exit $OCF_SUCCESS - ;; - usage) usage - exit $OCF_SUCCESS - ;; + meta-data) + meta_data + exit $OCF_SUCCESS + ;; + usage) + usage + exit $OCF_SUCCESS + ;; esac # the name business: # # 1. use the name attribute, or # 2. find the name in the config file (if it exists) and use that # unless it contains funny characters such as '%' or space, or # 3. use the OCF_RESOURCE_INSTANCE if [ x"${OCF_RESKEY_name}" != x ]; then DOMAIN_NAME="${OCF_RESKEY_name}" else if [ -f "${OCF_RESKEY_xmfile}" ]; then DOMAIN_NAME=`awk '$1~/^name(=|$)/{print}' ${OCF_RESKEY_xmfile} | sed 's/.*=[[:space:]]*//' | tr -d "[\"']"` if echo "$DOMAIN_NAME" | grep -qs '[%[:space:]]'; then DOMAIN_NAME="" fi fi DOMAIN_NAME=${DOMAIN_NAME:-${OCF_RESOURCE_INSTANCE}} fi for binary in sed awk; do - check_binary $binary + check_binary $binary done if have_binary xen-destroy ; then - xenkill="xen-destroy" + xenkill="xen-destroy" else - xenkill="$xentool destroy" + xenkill="$xentool destroy" fi if [ -n "$OCF_RESKEY_shutdown_timeout" ]; then ocf_is_decimal "$OCF_RESKEY_shutdown_timeout" || { ocf_log err "shutdown_timeout must be a number" exit $OCF_ERR_CONFIGURED } fi case $1 in - start) Xen_Start - ;; - stop) Xen_Stop - ;; - migrate_to) Xen_Migrate_To - ;; - migrate_from) Xen_Migrate_From - ;; - monitor) Xen_Monitor - ;; - status) Xen_Status ${DOMAIN_NAME} - ;; - validate-all) Xen_Validate_All - ;; - *) usage - exit $OCF_ERR_UNIMPLEMENTED - ;; + start) + Xen_Start + ;; + stop) + Xen_Stop + ;; + migrate_to) + Xen_Migrate_To + ;; + migrate_from) + Xen_Migrate_From + ;; + monitor) + Xen_Monitor + ;; + status) + Xen_Status ${DOMAIN_NAME} + ;; + validate-all) + Xen_Validate_All + ;; + *) + usage + exit $OCF_ERR_UNIMPLEMENTED + ;; esac exit $? # vim:sw=2:ts=4: