diff --git a/heartbeat/NodeUtilization b/heartbeat/NodeUtilization index 61969e6f7..1eee5f91f 100755 --- a/heartbeat/NodeUtilization +++ b/heartbeat/NodeUtilization @@ -1,226 +1,226 @@ #!/bin/sh # # # NodeUtilization OCF Resource Agent # # Copyright (c) 2011 SUSE LINUX, John Shi # Copyright (c) 2016 SUSE LINUX, Kristoffer Gronlund # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### NodeUtilization_meta_data() { cat < 1.0 The Node Utilization agent detects system parameters like available CPU, host memory and hypervisor memory availability, and adds them into the CIB for each node using crm_attribute. Run the agent as a clone resource to have it populate these parameters on each node. Note: Setting hv_memory only works with Xen at the moment, using the xl or xm command line tools. Node Utilization If set, parameters will be updated if there are differences between the HA parameters and the system values when running the monitor action. If not set, the parameters will be set once when the resource instance starts. Dynamically update parameters in monitor Enable setting node CPU utilization limit. Set node CPU utilization limit. Subtract this value when setting the CPU utilization parameter. CPU reservation. Enable setting available host memory. Set available host memory. Subtract this value when setting host memory utilization, in MB. Host memory reservation, in MB. Enable setting available hypervisor memory. Set available hypervisor memory. Subtract this value when setting hypervisor memory utilization, in MB. Hypervisor memory reservation, in MB. END } Host_Total_Memory() { local xentool xentool=$(which xl 2> /dev/null || which xm 2> /dev/null) if [ -x $xentool ]; then $xentool info | awk '/total_memory/{printf("%d\n",$3);exit(0)}' else ocf_log warn "Can only set hv_memory for Xen hypervisor" echo "0" fi } set_utilization() { host_name="$(ocf_local_nodename)" if ocf_is_true "$OCF_RESKEY_utilization_cpu"; then sys_cpu=$(( $(grep -c processor /proc/cpuinfo) - $OCF_RESKEY_utilization_cpu_reservation )) - uti_cpu=$(crm_attribute -Q -t nodes -U "$host_name" -z -n cpu 2>/dev/null) + uti_cpu=$(crm_attribute -Q -t nodes --node "$host_name" -z -n cpu 2>/dev/null) if [ "$sys_cpu" != "$uti_cpu" ]; then - if ! crm_attribute -t nodes -U "$host_name" -z -n cpu -v $sys_cpu; then + if ! crm_attribute -t nodes --node "$host_name" -z -n cpu -v $sys_cpu; then ocf_log err "Failed to set the cpu utilization attribute for $host_name using crm_attribute." return 1 fi fi fi if ocf_is_true "$OCF_RESKEY_utilization_host_memory"; then sys_mem=$(( $(awk '/MemTotal/{printf("%d\n",$2/1024);exit(0)}' /proc/meminfo) - $OCF_RESKEY_utilization_host_memory_reservation )) - uti_mem=$(crm_attribute -Q -t nodes -U "$host_name" -z -n host_memory 2>/dev/null) + uti_mem=$(crm_attribute -Q -t nodes --node "$host_name" -z -n host_memory 2>/dev/null) if [ "$sys_mem" != "$uti_mem" ]; then - if ! crm_attribute -t nodes -U "$host_name" -z -n host_memory -v $sys_mem; then + if ! crm_attribute -t nodes --node "$host_name" -z -n host_memory -v $sys_mem; then ocf_log err "Failed to set the host_memory utilization attribute for $host_name using crm_attribute." return 1 fi fi fi if ocf_is_true "$OCF_RESKEY_utilization_hv_memory"; then hv_mem=$(( $(Host_Total_Memory) - OCF_RESKEY_utilization_hv_memory_reservation )) - uti_mem=$(crm_attribute -Q -t nodes -U "$host_name" -z -n hv_memory 2>/dev/null) + uti_mem=$(crm_attribute -Q -t nodes --node "$host_name" -z -n hv_memory 2>/dev/null) [ $hv_mem -lt 0 ] && hv_mem=0 if [ "$hv_mem" != "$uti_mem" ]; then - if ! crm_attribute -t nodes -U "$host_name" -z -n hv_memory -v $hv_mem; then + if ! crm_attribute -t nodes --node "$host_name" -z -n hv_memory -v $hv_mem; then ocf_log err "Failed to set the hv_memory utilization attribute for $host_name using crm_attribute." return 1 fi fi fi } NodeUtilization_usage() { cat < /dev/null || which xm) meta_data() { cat < 1.0 Resource Agent for the Xen Hypervisor. Manages Xen virtual machine instances by mapping cluster resource start and stop, to Xen create and shutdown, respectively. A note on names We will try to extract the name from the config file (the xmfile attribute). If you use a simple assignment statement, then you should be fine. Otherwise, if there's some python acrobacy involved such as dynamically assigning names depending on other variables, and we will try to detect this, then please set the name attribute. You should also do that if there is any chance of a pathological situation where a config file might be missing, for example if it resides on a shared storage. If all fails, we finally fall back to the instance id to preserve backward compatibility. Para-virtualized guests can also be migrated by enabling the meta_attribute allow-migrate. Manages Xen unprivileged domains (DomUs) Absolute path to the Xen control file, for this virtual machine. Xen control file Name of the virtual machine. Xen DomU name The Xen agent will first try an orderly shutdown using xl shutdown. Should this not succeed within this timeout, the agent will escalate to xl destroy, forcibly killing the node. If this is not set, it will default to two-third of the stop action timeout. Setting this value to 0 forces an immediate destroy. Shutdown escalation timeout Handle shutdown by simulating an ACPI power button event. Enable this to allow graceful shutdown for HVM domains without installed PV drivers. Simulate power button event on shutdown This parameter enables dynamic adjustment of memory for start and stop actions used for Dom0 and the DomUs. The default is to not adjust memory dynamically. Use dynamic memory management In case of a live migration, the system will default to using the IP address associated with the hostname via DNS or /etc/hosts. This parameter allows you to specify a node attribute that will be queried instead for the target node, overriding the IP address. This allows you to use a dedicated network for live migration traffic to a specific node. Warning: make very sure the IP address does point to the right node. Or else the live migration will end up somewhere else, greatly confusing the cluster and causing havoc. Node attribute containing target IP address In case memory management is used, this parameter defines the minimum amount of memory to be reserved for the dom0. The default minimum memory is 512MB. Minimum Dom0 memory To additionally monitor services within the unprivileged domain, add this parameter with a list of scripts to monitor. list of space separated monitor scripts END } Xen_Status() { if expr "x$xentool" : "x.*xl" >/dev/null; then $xentool list $1 >/dev/null 2>&1 if [ $? -ne 0 ]; then return $OCF_NOT_RUNNING else return $OCF_SUCCESS fi fi if have_binary xen-list; then xen-list $1 2>/dev/null | grep -qs "State.*[-r][-b][-p]--" 2>/dev/null if [ $? -ne 0 ]; then return $OCF_NOT_RUNNING else return $OCF_SUCCESS fi fi STATUS=`$xentool list --long $1 2>/dev/null | grep status 2>/dev/null` if [ "X${STATUS}" != "X" ]; then # we have Xen 3.0.4 or higher STATUS_NOSPACES=`echo "$STATUS" | awk '{ print $1,$2}'` if [ "$STATUS_NOSPACES" = "(status 2)" -o "$STATUS_NOSPACES" = "(status 1)" ]; then return $OCF_SUCCESS else return $OCF_NOT_RUNNING fi else # we have Xen 3.0.3 or lower STATUS=`$xentool list --long $1 2>/dev/null | grep state 2>/dev/null` echo "${STATUS}" | grep -qs "[-r][-b][-p]---" if [ $? -ne 0 ]; then return $OCF_NOT_RUNNING else return $OCF_SUCCESS fi fi } # If the guest is rebooting, it may completely disappear from the # list of defined guests, thus xl/xen-list would return with not # running; apparently, this period lasts only for a second or # two # If a status returns not running, then test status # again for 5 times (perhaps it'll show up) Xen_Status_with_Retry() { local rc cnt=5 Xen_Status $1 rc=$? while [ $rc -eq $OCF_NOT_RUNNING -a $cnt -gt 0 ]; do case "$__OCF_ACTION" in stop) ocf_log debug "domain $1 reported as not running, waiting $cnt seconds ..." ;; monitor) ocf_log warn "domain $1 reported as not running, but it is expected to be running! Retrying for $cnt seconds ..." ;; *) : not reachable ;; esac sleep 1 Xen_Status $1 rc=$? cnt=$((cnt-1)) done return $rc } Xen_Adjust_Memory() { if ocf_is_true "${OCF_RESKEY_allow_mem_management}"; then CNTNEW=$1 RUNNING=`Xen_List_running` RUNCNT=`Xen_Count_running` MAXMEM=`Xen_Total_Memory` if [ ${RUNCNT} -eq 0 -a ${CNTNEW} -eq 0 ]; then RUNCNT=1 fi #NEWMEM=`echo "(${MAXMEM}-${OCF_RESKEY_reserved_Dom0_memory})/(${RUNCNT}+${CNTNEW})"|bc` NEWMEM=$(( (${MAXMEM} - ${OCF_RESKEY_reserved_Dom0_memory}) / (${RUNCNT} + ${CNTNEW} ) )) # do not rely on ballooning add dom0_mem=512 instead to force memory for dom0 #$xentool mem-set Domain-0 ${OCF_RESKEY_reserved_Dom0_memory} for DOM in ${RUNNING}; do $xentool mem-set ${DOM} ${NEWMEM} done ocf_log info "Adjusted memory to: $NEWMEM, for the following $RUNCNT domains: $RUNNING" fi } Xen_List_all() { $xentool list | grep -v -e "Name" -e "Domain-0" | awk '{print $1}' } Xen_List_running() { ALL_DOMS=`Xen_List_all` for DOM in ${ALL_DOMS}; do if Xen_Status $DOM; then echo "${DOM} " fi done } Xen_Count_running() { Xen_List_running | wc -w } Xen_Monitor() { if ocf_is_probe; then Xen_Status ${DOMAIN_NAME} else Xen_Status_with_Retry ${DOMAIN_NAME} fi if [ $? -eq ${OCF_NOT_RUNNING} ]; then ocf_is_probe || ocf_log err "Xen domain $DOMAIN_NAME stopped" return ${OCF_NOT_RUNNING} fi if [ "X${OCF_RESKEY_monitor_scripts}" = "X" ]; then return ${OCF_SUCCESS} fi for SCRIPT in ${OCF_RESKEY_monitor_scripts}; do $SCRIPT if [ $? -ne 0 ]; then return ${OCF_ERR_GENERIC} fi done return ${OCF_SUCCESS} } Xen_Total_Memory() { $xentool info | grep "^total_memory" | awk '{print $3}' } Xen_Start() { if Xen_Status ${DOMAIN_NAME}; then ocf_log info "Xen domain $DOMAIN_NAME already running." return $OCF_SUCCESS fi if [ ! -f "${OCF_RESKEY_xmfile}" ]; then ocf_log err "Config file ${OCF_RESKEY_xmfile} for $DOMAIN_NAME does not exist." return $OCF_ERR_INSTALLED fi if ocf_is_true "${OCF_RESKEY_allow_mem_management}"; then Xen_Adjust_Memory 1 ocf_log info "New memory for virtual domains: ${NEWMEM}" sed -i -e "/^memory=/ s/^memory=.*/memory=${NEWMEM}/" ${OCF_RESKEY_xmfile} $xentool mem-set ${DOMAIN_NAME} ${NEWMEM} fi # the latest xl management tool is squeamish about some # characters in a name (the vm name is xen-f): # /etc/xen/vm/xen-f:15: config parsing error near `xen': # syntax error, unexpected IDENT, expecting STRING or NUMBER # or '[' # /etc/xen/vm/xen-f:15: config parsing error near `-f': lexical error # # the older xm management tool cannot digest quotes (see # https://developerbugs.linuxfoundation.org/show_bug.cgi?id=2671) # # hence the following if expr "x$xentool" : "x.*xl" >/dev/null; then $xentool create ${OCF_RESKEY_xmfile} name=\"$DOMAIN_NAME\" else $xentool create ${OCF_RESKEY_xmfile} name="$DOMAIN_NAME" fi rc=$? if [ $rc -ne 0 ]; then return $OCF_ERR_GENERIC else if ocf_is_true "${OCF_RESKEY_allow_mem_management}"; then $xentool mem-set ${DOMAIN_NAME} ${NEWMEM} fi fi while sleep 1; do Xen_Monitor && return $OCF_SUCCESS done } xen_domain_stop() { local dom=$1 local timeout if [ -n "$OCF_RESKEY_shutdown_timeout" ]; then timeout=$OCF_RESKEY_shutdown_timeout elif [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then # Allow 2/3 of the action timeout for the orderly shutdown # (The origin unit is ms, hence the conversion) timeout=$((OCF_RESKEY_CRM_meta_timeout/1500)) else timeout=60 fi if [ "$timeout" -gt 0 ]; then ocf_log info "Xen domain $dom will be stopped (timeout: ${timeout}s)" if ocf_is_true "${OCF_RESKEY_shutdown_acpi}"; then $xentool trigger $dom power else $xentool shutdown $dom fi while Xen_Status $dom && [ "$timeout" -gt 0 ]; do ocf_log debug "$dom still not stopped. Waiting..." timeout=$((timeout-1)) sleep 1 done fi if [ "$timeout" -eq 0 ]; then while Xen_Status $dom; do ocf_log warn "Xen domain $dom will be destroyed!" $xenkill $dom sleep 1 done # Note: This does not give up. stop isn't allowed to to fail. # If $xentool destroy fails, stop will eventually timeout. # This is the correct behaviour. fi ocf_log info "Xen domain $dom stopped." } Xen_Stop() { local vm if Xen_Status_with_Retry ${DOMAIN_NAME}; then vm=${DOMAIN_NAME} elif Xen_Status migrating-${DOMAIN_NAME}; then ocf_log info "Xen domain $DOMAIN_NAME is migrating" vm="migrating-${DOMAIN_NAME}" else ocf_log info "Xen domain $DOMAIN_NAME already stopped." fi if [ "$vm" ]; then xen_domain_stop $vm else # It is supposed to be gone, but there have been situations where # $xentool list / xen-list showed it as stopped but it was still # instantiated. Nuke it once more to make sure: $xenkill ${DOMAIN_NAME} fi Xen_Adjust_Memory 0 return $OCF_SUCCESS } Xen_Migrate_To() { target_node="$OCF_RESKEY_CRM_meta_migrate_target" target_attr="$OCF_RESKEY_node_ip_attribute" target_addr="$target_node" if Xen_Status ${DOMAIN_NAME}; then ocf_log info "$DOMAIN_NAME: Starting $xentool migrate to $target_node" if [ -n "$target_attr" ]; then - nodevalue=`crm_attribute --type nodes --node-uname $target_node --attr-name $target_attr --get-value -q` + nodevalue=`crm_attribute --type nodes --node $target_node --attr-name $target_attr --get-value -q` if [ -n "${nodevalue}" -a "${nodevalue}" != "(null)" ]; then target_addr="$nodevalue" ocf_log info "$DOMAIN_NAME: $target_node is using address $target_addr" fi fi if expr "x$xentool" : "x.*xm" >/dev/null; then $xentool migrate --live $DOMAIN_NAME $target_addr else $xentool migrate $DOMAIN_NAME $target_addr fi rc=$? if [ $rc -ne 0 ]; then ocf_log err "$DOMAIN_NAME: $xentool migrate to $target_node failed: $rc" return $OCF_ERR_GENERIC else Xen_Adjust_Memory 0 ocf_log info "$DOMAIN_NAME: $xentool migrate to $target_node succeeded." return $OCF_SUCCESS fi else ocf_log err "$DOMAIN_NAME: migrate_to: Not active locally!" return $OCF_ERR_GENERIC fi } Xen_Migrate_From() { if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then # Allow 2/3 of the action timeout for status to stabilize # (The origin unit is ms, hence the conversion) timeout=$((OCF_RESKEY_CRM_meta_timeout/1500)) else timeout=10 # should be plenty fi while ! Xen_Status ${DOMAIN_NAME} && [ $timeout -gt 0 ]; do ocf_log debug "$DOMAIN_NAME: Not yet active locally, waiting (timeout: ${timeout}s)" timeout=$((timeout-1)) sleep 1 done if Xen_Status ${DOMAIN_NAME}; then Xen_Adjust_Memory 0 ocf_log info "$DOMAIN_NAME: Active locally, migration successful" return $OCF_SUCCESS else ocf_log err "$DOMAIN_NAME: Not active locally, migration failed!" return $OCF_ERR_GENERIC fi } Xen_Validate_All() { return $OCF_SUCCESS } if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi case $1 in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; esac # the name business: # # 1. use the name attribute, or # 2. find the name in the config file (if it exists) and use that # unless it contains funny characters such as '%' or space, or # 3. use the OCF_RESOURCE_INSTANCE if [ x"${OCF_RESKEY_name}" != x ]; then DOMAIN_NAME="${OCF_RESKEY_name}" else if [ -f "${OCF_RESKEY_xmfile}" ]; then DOMAIN_NAME=`awk '$1~/^name(=|$)/{print}' ${OCF_RESKEY_xmfile} | sed 's/.*=[[:space:]]*//' | tr -d "[\"']"` if echo "$DOMAIN_NAME" | grep -qs '[%[:space:]]'; then DOMAIN_NAME="" fi fi DOMAIN_NAME=${DOMAIN_NAME:-${OCF_RESOURCE_INSTANCE}} fi for binary in sed awk; do check_binary $binary done if have_binary xen-destroy ; then xenkill="xen-destroy" else xenkill="$xentool destroy" fi if [ -n "$OCF_RESKEY_shutdown_timeout" ]; then ocf_is_decimal "$OCF_RESKEY_shutdown_timeout" || { ocf_log err "shutdown_timeout must be a number" exit $OCF_ERR_CONFIGURED } fi case $1 in start) Xen_Start ;; stop) Xen_Stop ;; migrate_to) Xen_Migrate_To ;; migrate_from) Xen_Migrate_From ;; monitor) Xen_Monitor ;; status) Xen_Status ${DOMAIN_NAME} ;; validate-all) Xen_Validate_All ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $?