diff --git a/heartbeat/Xen b/heartbeat/Xen index abfd43682..d28e96db6 100755 --- a/heartbeat/Xen +++ b/heartbeat/Xen @@ -1,636 +1,653 @@ #!/bin/sh # # # Support: users@clusterlabs.org # License: GNU General Public License (GPL) # # Resource Agent for the Xen Hypervisor. # Manages Xen virtual machine instances by # mapping cluster resource start and stop, # to Xen create and shutdown, respectively. # # usage: $0 {start|stop|status|monitor|meta-data} # # OCF parameters are as below: # OCF_RESKEY_xmfile # Absolute path to the Xen control file, # for this virtual machine. # OCF_RESKEY_allow_mem_management # Change memory usage on start/stop/migration # of virtual machine # OCF_RESKEY_reserved_Dom0_memory # minimum memory reserved for domain 0 # OCF_RESKEY_monitor_scripts # scripts to monitor services within the # virtual domain ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +# Parameter defaults + +OCF_RESKEY_xmfile_default="/etc/xen/vm/MyDomU" +OCF_RESKEY_name_default="" +OCF_RESKEY_shutdown_timeout_default="" +OCF_RESKEY_shutdown_acpi_default="0" +OCF_RESKEY_allow_mem_management_default="0" +OCF_RESKEY_node_ip_attribute_default="" +OCF_RESKEY_reserved_Dom0_memory_default="512" +OCF_RESKEY_autoset_utilization_cpu_default="false" +OCF_RESKEY_autoset_utilization_hv_memory_default="false" +OCF_RESKEY_monitor_scripts_default="" + +: ${OCF_RESKEY_xmfile=${OCF_RESKEY_xmfile_default}} +: ${OCF_RESKEY_name=${OCF_RESKEY_name_default}} +: ${OCF_RESKEY_shutdown_timeout=${OCF_RESKEY_shutdown_timeout_default}} +: ${OCF_RESKEY_shutdown_acpi=${OCF_RESKEY_shutdown_acpi_default}} +: ${OCF_RESKEY_allow_mem_management=${OCF_RESKEY_allow_mem_management_default}} +: ${OCF_RESKEY_node_ip_attribute=${OCF_RESKEY_node_ip_attribute_default}} +: ${OCF_RESKEY_reserved_Dom0_memory=${OCF_RESKEY_reserved_Dom0_memory_default}} +: ${OCF_RESKEY_autoset_utilization_cpu=${OCF_RESKEY_autoset_utilization_cpu_default}} +: ${OCF_RESKEY_autoset_utilization_hv_memory=${OCF_RESKEY_autoset_utilization_hv_memory_default}} +: ${OCF_RESKEY_monitor_scripts=${OCF_RESKEY_monitor_scripts_default}} + ####################################################################### usage() { cat <<-END usage: $0 {start|stop|status|monitor|meta-data|validate-all} END } -: ${OCF_RESKEY_xmfile=/etc/xen/vm/MyDomU} -: ${OCF_RESKEY_shutdown_acpi=0} -: ${OCF_RESKEY_allow_mem_management=0} -: ${OCF_RESKEY_reserved_Dom0_memory=512} -: ${OCF_RESKEY_autoset_utilization_cpu="false"} -: ${OCF_RESKEY_autoset_utilization_hv_memory="false"} - # prefer xl xentool=$(which xl 2> /dev/null || which xm) meta_data() { cat < 1.0 Resource Agent for the Xen Hypervisor. Manages Xen virtual machine instances by mapping cluster resource start and stop, to Xen create and shutdown, respectively. A note on names We will try to extract the name from the config file (the xmfile attribute). If you use a simple assignment statement, then you should be fine. Otherwise, if there's some python acrobacy involved such as dynamically assigning names depending on other variables, and we will try to detect this, then please set the name attribute. You should also do that if there is any chance of a pathological situation where a config file might be missing, for example if it resides on a shared storage. If all fails, we finally fall back to the instance id to preserve backward compatibility. Para-virtualized guests can also be migrated by enabling the meta_attribute allow-migrate. Manages Xen unprivileged domains (DomUs) Absolute path to the Xen control file, for this virtual machine. Xen control file - + Name of the virtual machine. Xen DomU name - + The Xen agent will first try an orderly shutdown using xl shutdown. Should this not succeed within this timeout, the agent will escalate to xl destroy, forcibly killing the node. If this is not set, it will default to two-third of the stop action timeout. Setting this value to 0 forces an immediate destroy. Shutdown escalation timeout - + Handle shutdown by simulating an ACPI power button event. Enable this to allow graceful shutdown for HVM domains without installed PV drivers. Simulate power button event on shutdown - + This parameter enables dynamic adjustment of memory for start and stop actions used for Dom0 and the DomUs. The default is to not adjust memory dynamically. Use dynamic memory management - + In case of a live migration, the system will default to using the IP address associated with the hostname via DNS or /etc/hosts. This parameter allows you to specify a node attribute that will be queried instead for the target node, overriding the IP address. This allows you to use a dedicated network for live migration traffic to a specific node. Warning: make very sure the IP address does point to the right node. Or else the live migration will end up somewhere else, greatly confusing the cluster and causing havoc. Node attribute containing target IP address - + In case memory management is used, this parameter defines the minimum amount of memory to be reserved for the dom0. The default minimum memory is 512MB. Minimum Dom0 memory - + If set true, the agent will detect the number of domain's vCPUs from Xen, and put it into the CPU utilization of the resource when the monitor is executed. Before enabling make sure node utilization is also set (using NodeUtilization agent or manually) or the resource might not be able to start anywhere. Enable auto-setting the CPU utilization of the resource - + If set true, the agent will detect the number of memory from Xen, and put it into the hv_memory utilization of the resource when the monitor is executed. Before enabling make sure node utilization is also set (using NodeUtilization agent or manually) or the resource might not be able to start anywhere. Enable auto-setting the hv_memory utilization of the resource - + To additionally monitor services within the unprivileged domain, add this parameter with a list of scripts to monitor. list of space separated monitor scripts - + END } Xen_Status() { if expr "x$xentool" : "x.*xl" >/dev/null; then $xentool list $1 >/dev/null 2>&1 if [ $? -ne 0 ]; then return $OCF_NOT_RUNNING else return $OCF_SUCCESS fi fi if have_binary xen-list; then xen-list $1 2>/dev/null | grep -qs "State.*[-r][-b][-p]--" 2>/dev/null if [ $? -ne 0 ]; then return $OCF_NOT_RUNNING else return $OCF_SUCCESS fi fi STATUS=`$xentool list --long $1 2>/dev/null | grep status 2>/dev/null` if [ "X${STATUS}" != "X" ]; then # we have Xen 3.0.4 or higher STATUS_NOSPACES=`echo "$STATUS" | awk '{ print $1,$2}'` if [ "$STATUS_NOSPACES" = "(status 2)" -o "$STATUS_NOSPACES" = "(status 1)" ]; then return $OCF_SUCCESS else return $OCF_NOT_RUNNING fi else # we have Xen 3.0.3 or lower STATUS=`$xentool list --long $1 2>/dev/null | grep state 2>/dev/null` echo "${STATUS}" | grep -qs "[-r][-b][-p]---" if [ $? -ne 0 ]; then return $OCF_NOT_RUNNING else return $OCF_SUCCESS fi fi } # If the guest is rebooting, it may completely disappear from the # list of defined guests, thus xl/xen-list would return with not # running; apparently, this period lasts only for a second or # two # If a status returns not running, then test status # again for 5 times (perhaps it'll show up) Xen_Status_with_Retry() { local rc cnt=5 Xen_Status $1 rc=$? while [ $rc -eq $OCF_NOT_RUNNING -a $cnt -gt 0 ]; do case "$__OCF_ACTION" in stop) ocf_log debug "domain $1 reported as not running, waiting $cnt seconds ..." ;; monitor) ocf_log warn "domain $1 reported as not running, but it is expected to be running! Retrying for $cnt seconds ..." ;; *) : not reachable ;; esac sleep 1 Xen_Status $1 rc=$? cnt=$((cnt-1)) done return $rc } set_util_attr() { local attr=$1 val=$2 local cval outp cval=$(crm_resource -Q -r $OCF_RESOURCE_INSTANCE -z -g $attr 2>/dev/null) if [ $? -ne 0 ] && [ -z "$cval" ]; then if crm_resource -Q -r $OCF_RESOURCE_INSTANCE -z -g $attr 2>&1 | grep -q "not connected"; then ocf_log debug "Unable to get utilization attribute $attr: cib is not available" return fi fi if [ "$cval" != "$val" ]; then outp=$(crm_resource -r $OCF_RESOURCE_INSTANCE -z -p $attr -v $val 2>&1) || \ ocf_log warn "Unable to set utilization attribute $attr: $outp" fi } Xen_Update_Utilization() { local dom_status dom_cpu dom_mem dom_status=$($xentool list ${DOMAIN_NAME} | awk 'NR==2 {print $4, $3}') if ocf_is_true "$OCF_RESKEY_autoset_utilization_cpu"; then dom_cpu=${dom_status% *} test -n "$dom_cpu" && set_util_attr cpu $dom_cpu fi if ocf_is_true "$OCF_RESKEY_autoset_utilization_hv_memory"; then dom_mem=${dom_status#* } test -n "$dom_mem" && set_util_attr hv_memory "$dom_mem" fi } Xen_Adjust_Memory() { if ocf_is_true "${OCF_RESKEY_allow_mem_management}"; then CNTNEW=$1 RUNNING=`Xen_List_running` RUNCNT=`Xen_Count_running` MAXMEM=`Xen_Total_Memory` if [ ${RUNCNT} -eq 0 -a ${CNTNEW} -eq 0 ]; then RUNCNT=1 fi #NEWMEM=`echo "(${MAXMEM}-${OCF_RESKEY_reserved_Dom0_memory})/(${RUNCNT}+${CNTNEW})"|bc` NEWMEM=$(( (${MAXMEM} - ${OCF_RESKEY_reserved_Dom0_memory}) / (${RUNCNT} + ${CNTNEW} ) )) # do not rely on ballooning add dom0_mem=512 instead to force memory for dom0 #$xentool mem-set Domain-0 ${OCF_RESKEY_reserved_Dom0_memory} for DOM in ${RUNNING}; do $xentool mem-set ${DOM} ${NEWMEM} done ocf_log info "Adjusted memory to: $NEWMEM, for the following $RUNCNT domains: $RUNNING" fi } Xen_List_all() { $xentool list | grep -v -e "Name" -e "Domain-0" | awk '{print $1}' } Xen_List_running() { ALL_DOMS=`Xen_List_all` for DOM in ${ALL_DOMS}; do if Xen_Status $DOM; then echo "${DOM} " fi done } Xen_Count_running() { Xen_List_running | wc -w } Xen_Monitor() { if ocf_is_probe; then Xen_Status ${DOMAIN_NAME} else Xen_Status_with_Retry ${DOMAIN_NAME} fi if [ $? -eq ${OCF_NOT_RUNNING} ]; then ocf_is_probe || ocf_log err "Xen domain $DOMAIN_NAME stopped" return ${OCF_NOT_RUNNING} fi if ocf_is_true "$OCF_RESKEY_autoset_utilization_cpu" || \ ocf_is_true "$OCF_RESKEY_autoset_utilization_hv_memory" then Xen_Update_Utilization fi if [ "X${OCF_RESKEY_monitor_scripts}" = "X" ]; then return ${OCF_SUCCESS} fi for SCRIPT in ${OCF_RESKEY_monitor_scripts}; do $SCRIPT if [ $? -ne 0 ]; then return ${OCF_ERR_GENERIC} fi done return ${OCF_SUCCESS} } Xen_Total_Memory() { $xentool info | grep "^total_memory" | awk '{print $3}' } Xen_Start() { if Xen_Status ${DOMAIN_NAME}; then ocf_log info "Xen domain $DOMAIN_NAME already running." return $OCF_SUCCESS fi if [ ! -f "${OCF_RESKEY_xmfile}" ]; then ocf_log err "Config file ${OCF_RESKEY_xmfile} for $DOMAIN_NAME does not exist." return $OCF_ERR_INSTALLED fi if ocf_is_true "${OCF_RESKEY_allow_mem_management}"; then Xen_Adjust_Memory 1 ocf_log info "New memory for virtual domains: ${NEWMEM}" sed -i -e "/^memory=/ s/^memory=.*/memory=${NEWMEM}/" ${OCF_RESKEY_xmfile} $xentool mem-set ${DOMAIN_NAME} ${NEWMEM} fi # the latest xl management tool is squeamish about some # characters in a name (the vm name is xen-f): # /etc/xen/vm/xen-f:15: config parsing error near `xen': # syntax error, unexpected IDENT, expecting STRING or NUMBER # or '[' # /etc/xen/vm/xen-f:15: config parsing error near `-f': lexical error # # the older xm management tool cannot digest quotes (see # https://developerbugs.linuxfoundation.org/show_bug.cgi?id=2671) # # hence the following if expr "x$xentool" : "x.*xl" >/dev/null; then $xentool create ${OCF_RESKEY_xmfile} name=\"$DOMAIN_NAME\" else $xentool create ${OCF_RESKEY_xmfile} name="$DOMAIN_NAME" fi rc=$? if [ $rc -ne 0 ]; then return $OCF_ERR_GENERIC else if ocf_is_true "${OCF_RESKEY_allow_mem_management}"; then $xentool mem-set ${DOMAIN_NAME} ${NEWMEM} fi fi while sleep 1; do Xen_Monitor && return $OCF_SUCCESS done } xen_domain_stop() { local dom=$1 local timeout if [ -n "$OCF_RESKEY_shutdown_timeout" ]; then timeout=$OCF_RESKEY_shutdown_timeout elif [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then # Allow 2/3 of the action timeout for the orderly shutdown # (The origin unit is ms, hence the conversion) timeout=$((OCF_RESKEY_CRM_meta_timeout/1500)) else timeout=60 fi if [ "$timeout" -gt 0 ]; then ocf_log info "Xen domain $dom will be stopped (timeout: ${timeout}s)" if ocf_is_true "${OCF_RESKEY_shutdown_acpi}"; then $xentool trigger $dom power else $xentool shutdown $dom fi while Xen_Status $dom && [ "$timeout" -gt 0 ]; do ocf_log debug "$dom still not stopped. Waiting..." timeout=$((timeout-1)) sleep 1 done fi if [ "$timeout" -eq 0 ]; then while Xen_Status $dom; do ocf_log warn "Xen domain $dom will be destroyed!" $xenkill $dom sleep 1 done # Note: This does not give up. stop isn't allowed to to fail. # If $xentool destroy fails, stop will eventually timeout. # This is the correct behaviour. fi ocf_log info "Xen domain $dom stopped." } Xen_Stop() { local vm if Xen_Status_with_Retry ${DOMAIN_NAME}; then vm=${DOMAIN_NAME} elif Xen_Status migrating-${DOMAIN_NAME}; then ocf_log info "Xen domain $DOMAIN_NAME is migrating" vm="migrating-${DOMAIN_NAME}" else ocf_log info "Xen domain $DOMAIN_NAME already stopped." fi if [ "$vm" ]; then xen_domain_stop $vm else # It is supposed to be gone, but there have been situations where # $xentool list / xen-list showed it as stopped but it was still # instantiated. Nuke it once more to make sure: $xenkill ${DOMAIN_NAME} fi Xen_Adjust_Memory 0 return $OCF_SUCCESS } Xen_Migrate_To() { target_node="$OCF_RESKEY_CRM_meta_migrate_target" target_attr="$OCF_RESKEY_node_ip_attribute" target_addr="$target_node" if Xen_Status ${DOMAIN_NAME}; then ocf_log info "$DOMAIN_NAME: Starting $xentool migrate to $target_node" if [ -n "$target_attr" ]; then nodevalue=`crm_attribute --type nodes --node $target_node -n $target_attr -G -q` if [ -n "${nodevalue}" -a "${nodevalue}" != "(null)" ]; then target_addr="$nodevalue" ocf_log info "$DOMAIN_NAME: $target_node is using address $target_addr" fi fi if expr "x$xentool" : "x.*xm" >/dev/null; then $xentool migrate --live $DOMAIN_NAME $target_addr else $xentool migrate $DOMAIN_NAME $target_addr fi rc=$? if [ $rc -ne 0 ]; then ocf_log err "$DOMAIN_NAME: $xentool migrate to $target_node failed: $rc" return $OCF_ERR_GENERIC else Xen_Adjust_Memory 0 ocf_log info "$DOMAIN_NAME: $xentool migrate to $target_node succeeded." return $OCF_SUCCESS fi else ocf_log err "$DOMAIN_NAME: migrate_to: Not active locally!" return $OCF_ERR_GENERIC fi } Xen_Migrate_From() { if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then # Allow 2/3 of the action timeout for status to stabilize # (The origin unit is ms, hence the conversion) timeout=$((OCF_RESKEY_CRM_meta_timeout/1500)) else timeout=10 # should be plenty fi while ! Xen_Status ${DOMAIN_NAME} && [ $timeout -gt 0 ]; do ocf_log debug "$DOMAIN_NAME: Not yet active locally, waiting (timeout: ${timeout}s)" timeout=$((timeout-1)) sleep 1 done if Xen_Status ${DOMAIN_NAME}; then Xen_Adjust_Memory 0 ocf_log info "$DOMAIN_NAME: Active locally, migration successful" return $OCF_SUCCESS else ocf_log err "$DOMAIN_NAME: Not active locally, migration failed!" return $OCF_ERR_GENERIC fi } Xen_Validate_All() { return $OCF_SUCCESS } if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi case $1 in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; esac # the name business: # # 1. use the name attribute, or # 2. find the name in the config file (if it exists) and use that # unless it contains funny characters such as '%' or space, or # 3. use the OCF_RESOURCE_INSTANCE if [ x"${OCF_RESKEY_name}" != x ]; then DOMAIN_NAME="${OCF_RESKEY_name}" else if [ -f "${OCF_RESKEY_xmfile}" ]; then DOMAIN_NAME=`awk '$1~/^name(=|$)/{print}' ${OCF_RESKEY_xmfile} | sed 's/.*=[[:space:]]*//' | tr -d "[\"']"` if echo "$DOMAIN_NAME" | grep -qs '[%[:space:]]'; then DOMAIN_NAME="" fi fi DOMAIN_NAME=${DOMAIN_NAME:-${OCF_RESOURCE_INSTANCE}} fi for binary in sed awk; do check_binary $binary done if have_binary xen-destroy ; then xenkill="xen-destroy" else xenkill="$xentool destroy" fi if [ -n "$OCF_RESKEY_shutdown_timeout" ]; then ocf_is_decimal "$OCF_RESKEY_shutdown_timeout" || { ocf_log err "shutdown_timeout must be a number" exit $OCF_ERR_CONFIGURED } fi case $1 in start) Xen_Start ;; stop) Xen_Stop ;; migrate_to) Xen_Migrate_To ;; migrate_from) Xen_Migrate_From ;; monitor) Xen_Monitor ;; status) Xen_Status ${DOMAIN_NAME} ;; validate-all) Xen_Validate_All ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $?