diff --git a/heartbeat/VirtualDomain b/heartbeat/VirtualDomain index 7a2255706..6159ede23 100755 --- a/heartbeat/VirtualDomain +++ b/heartbeat/VirtualDomain @@ -1,755 +1,755 @@ #!/bin/sh # # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # # Resource Agent for domains managed by the libvirt API. # Requires a running libvirt daemon (libvirtd). # # (c) 2008-2010 Florian Haas, Dejan Muhamedagic, # and Linux-HA contributors # # usage: $0 {start|stop|status|monitor|migrate_to|migrate_from|meta-data|validate-all} # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults OCF_RESKEY_force_stop_default=0 OCF_RESKEY_autoset_utilization_cpu_default="true" OCF_RESKEY_autoset_utilization_hv_memory_default="true" OCF_RESKEY_migrateport_default=$(( 49152 + $(ocf_maybe_random) % 64 )) OCF_RESKEY_CRM_meta_timeout_default=90000 OCF_RESKEY_save_config_on_stop_default=false : ${OCF_RESKEY_force_stop=${OCF_RESKEY_force_stop_default}} : ${OCF_RESKEY_autoset_utilization_cpu=${OCF_RESKEY_autoset_utilization_cpu_default}} : ${OCF_RESKEY_autoset_utilization_hv_memory=${OCF_RESKEY_autoset_utilization_hv_memory_default}} : ${OCF_RESKEY_migrateport=${OCF_RESKEY_migrateport_default}} : ${OCF_RESKEY_CRM_meta_timeout=${OCF_RESKEY_CRM_meta_timeout_default}} : ${OCF_RESKEY_save_config_on_stop=${OCF_RESKEY_save_config_on_stop_default}} ####################################################################### ## I'd very much suggest to make this RA use bash, ## and then use magic $SECONDS. ## But for now: NOW=$(date +%s) usage() { echo "usage: $0 {start|stop|status|monitor|migrate_to|migrate_from|meta-data|validate-all}" } meta_data() { cat < 1.1 Resource agent for a virtual domain (a.k.a. domU, virtual machine, virtual environment etc., depending on context) managed by libvirtd. Manages virtual domains through the libvirt virtualization framework Absolute path to the libvirt configuration file, for this virtual domain. Virtual domain configuration file Hypervisor URI to connect to. See the libvirt documentation for details on supported URI formats. The default is system dependent. Determine the system's default uri by running 'virsh --quiet uri'. Hypervisor URI Always forcefully shut down ("destroy") the domain on stop. The default behavior is to resort to a forceful shutdown only after a graceful shutdown attempt has failed. You should only set this to true if your virtual domain (or your virtualization backend) does not support graceful shutdown. Always force shutdown on stop Transport used to connect to the remote hypervisor while migrating. Please refer to the libvirt documentation for details on transports available. If this parameter is omitted, the resource will use libvirt's default transport to connect to the remote hypervisor. Remote hypervisor transport Use a dedicated migration network. The migration URI is composed by adding this parameters value to the end of the node name. If the node name happens to be an FQDN (as opposed to an unqualified host name), insert the suffix immediately prior to the first period (.) in the FQDN. At the moment Qemu/KVM and Xen migration via a dedicated network is supported. Note: Be sure this composed host name is locally resolveable and the associated IP is reachable through the favored network. Migration network host name suffix To additionally monitor services within the virtual domain, add this parameter with a list of scripts to monitor. Note: when monitor scripts are used, the start and migrate_from operations will complete only when all monitor scripts have completed successfully. Be sure to set the timeout of these operations to accommodate this delay. space-separated list of monitor scripts If set true, the agent will detect the number of domainU's vCPUs from virsh, and put it into the CPU utilization of the resource when the monitor is executed. Enable auto-setting the CPU utilization of the resource If set true, the agent will detect the number of *Max memory* from virsh, and put it into the hv_memory utilization of the resource when the monitor is executed. Enable auto-setting the hv_memory utilization of the resource This port will be used in the qemu migrateuri. If unset, the port will be a random highport. Port for migrateuri Changes to a running VM's config are normally lost on stop. This parameter instructs the RA to save the configuration back to the xml file provided in the "config" parameter. Save running VM's config back to its config file Path to the snapshot directory where the virtual machine image will be stored. When this parameter is set, the virtual machine's RAM state will be saved to a file in the snapshot directory when stopped. If on start a state file is present for the domain, the domain will be restored to the same state it was in right before it stopped last. This option is incompatible with the 'force_stop' option. Restore state on start/stop EOF } set_util_attr() { local attr=$1 val=$2 local cval outp cval=$(crm_resource -Q -r $OCF_RESOURCE_INSTANCE -z -g $attr 2>/dev/null) if [ $? -ne 0 ] && [ -z "$cval" ]; then crm_resource -Q -r $OCF_RESOURCE_INSTANCE -z -g $attr 2>&1 | grep -e "not connected" > /dev/null 2>&1 if [ $? -eq 0 ]; then ocf_log debug "Unable to set utilization attribute, cib is not available" return fi fi if [ "$cval" != "$val" ]; then outp=$(crm_resource -r $OCF_RESOURCE_INSTANCE -z -p $attr -v $val 2>&1) || ocf_log warn "crm_resource failed to set utilization attribute $attr: $outp" fi } update_utilization() { local dom_cpu dom_mem if ocf_is_true "$OCF_RESKEY_autoset_utilization_cpu"; then dom_cpu=$(LANG=C virsh $VIRSH_OPTIONS dominfo ${DOMAIN_NAME} 2>/dev/null | awk '/CPU\(s\)/{print $2}') test -n "$dom_cpu" && set_util_attr cpu $dom_cpu fi if ocf_is_true "$OCF_RESKEY_autoset_utilization_hv_memory"; then dom_mem=$(LANG=C virsh $VIRSH_OPTIONS dominfo ${DOMAIN_NAME} 2>/dev/null | awk '/Max memory/{printf("%d", $3/1024)}') test -n "$dom_mem" && set_util_attr hv_memory "$dom_mem" fi } get_emulator() { local emulator="" # An emulator is not required, so only report message in debug mode local loglevel="debug" if ocf_is_probe; then loglevel="notice" fi emulator=$(virsh $VIRSH_OPTIONS dumpxml $DOMAIN_NAME 2>/dev/null | sed -n -e 's/^.*\(.*\)<\/emulator>.*$/\1/p') - if [ -z "$emulator" ] && [ -a "$EMULATOR_STATE" ]; then + if [ -z "$emulator" ] && [ -e "$EMULATOR_STATE" ]; then emulator=$(cat $EMULATOR_STATE) fi if [ -z "$emulator" ]; then emulator=$(cat ${OCF_RESKEY_config} | sed -n -e 's/^.*\(.*\)<\/emulator>.*$/\1/p') fi if [ -n "$emulator" ]; then basename $emulator else ocf_log $loglevel "Unable to determine emulator for $DOMAIN_NAME" fi } update_emulator_cache() { local emulator emulator=$(get_emulator) if [ -n "$emulator" ]; then echo $emulator > $EMULATOR_STATE fi } # attempt to check domain status outside of libvirt using the emulator process pid_status() { local rc=$OCF_ERR_GENERIC local emulator=$(get_emulator) case "$emulator" in qemu-kvm|qemu-dm|qemu-system-*) rc=$OCF_NOT_RUNNING ps awx | grep -E "[q]emu-(kvm|dm|system).*-name $DOMAIN_NAME " > /dev/null 2>&1 if [ $? -eq 0 ]; then rc=$OCF_SUCCESS fi ;; libvirt_lxc) rc=$OCF_NOT_RUNNING ps awx | grep -E "[l]ibvirt_lxc.*-name $DOMAIN_NAME " > /dev/null 2>&1 if [ $? -eq 0 ]; then rc=$OCF_SUCCESS fi ;; # This can be expanded to check for additional emulators *) # We may be running xen with PV domains, they don't # have an emulator set. try xl list or xen-lists if have_binary xl; then rc=$OCF_NOT_RUNNING xl list $DOMAIN_NAME >/dev/null 2>&1 if [ $? -eq 0 ]; then rc=$OCF_SUCCESS fi elif have_binary xen-list; then rc=$OCF_NOT_RUNNING xen-list $DOMAIN_NAME 2>/dev/null | grep -qs "State.*[-r][-b][-p]--" 2>/dev/null if [ $? -eq 0 ]; then rc=$OCF_SUCCESS fi fi ;; esac if [ $rc -eq $OCF_SUCCESS ]; then ocf_log debug "Virtual domain $DOMAIN_NAME is currently running." elif [ $rc -eq $OCF_NOT_RUNNING ]; then ocf_log debug "Virtual domain $DOMAIN_NAME is currently not running." fi return $rc } VirtualDomain_Status() { local try=0 rc=$OCF_ERR_GENERIC status="no state" while [ "$status" = "no state" ]; do try=$(($try + 1 )) status=$(virsh $VIRSH_OPTIONS domstate $DOMAIN_NAME 2>&1 | tr 'A-Z' 'a-z') case "$status" in *"error:"*"domain not found"*|"shut off") # shut off: domain is defined, but not started, will not happen if # domain is created but not defined # Domain not found: domain is not defined and thus not started ocf_log debug "Virtual domain $DOMAIN_NAME is not running: $(echo $status | sed s/error://g)" rc=$OCF_NOT_RUNNING ;; running|paused|idle|blocked|"in shutdown") # running: domain is currently actively consuming cycles # paused: domain is paused (suspended) # idle: domain is running but idle # blocked: synonym for idle used by legacy Xen versions # in shutdown: the domain is in process of shutting down, but has not completely shutdown or crashed. ocf_log debug "Virtual domain $DOMAIN_NAME is currently $status." rc=$OCF_SUCCESS ;; ""|*"failed to "*"connect to the hypervisor"*|"no state") # Empty string may be returned when virsh does not # receive a reply from libvirtd. # "no state" may occur when the domain is currently # being migrated (on the migration target only), or # whenever virsh can't reliably obtain the domain # state. status="no state" if [ "$__OCF_ACTION" = "stop" ] && [ $try -ge 3 ]; then # During the stop operation, we want to bail out # quickly, so as to be able to force-stop (destroy) # the domain if necessary. ocf_log error "Virtual domain $DOMAIN_NAME has no state during stop operation, bailing out." return $OCF_ERR_GENERIC; elif [ "$__OCF_ACTION" = "monitor" ]; then pid_status rc=$? if [ $rc -ne $OCF_ERR_GENERIC ]; then # we've successfully determined the domains status outside of libvirt return $rc fi else # During all other actions, we just wait and try # again, relying on the CRM/LRM to time us out if # this takes too long. ocf_log info "Virtual domain $DOMAIN_NAME currently has no state, retrying." fi sleep 1 ;; *) # any other output is unexpected. ocf_log error "Virtual domain $DOMAIN_NAME has unknown status \"$status\"!" sleep 1 ;; esac done return $rc } verify_undefined() { for dom in `virsh --connect=${OCF_RESKEY_hypervisor} list --all --name 2>/dev/null`; do if [ "$dom" = "$DOMAIN_NAME" ]; then virsh $VIRSH_OPTIONS undefine $DOMAIN_NAME > /dev/null 2>&1 return fi done } VirtualDomain_Start() { local snapshotimage if VirtualDomain_Status; then ocf_log info "Virtual domain $DOMAIN_NAME already running." return $OCF_SUCCESS fi snapshotimage="$OCF_RESKEY_snapshot/${DOMAIN_NAME}.state" if [ -n "$OCF_RESKEY_snapshot" -a -f "$snapshotimage" ]; then virsh restore $snapshotimage if [ $? -eq 0 ]; then rm -f $snapshotimage return $OCF_SUCCESS fi ocf_exit_reason "Failed to restore ${DOMAIN_NAME} from state file in ${OCF_RESKEY_snapshot} directory." return $OCF_ERR_GENERIC fi # Make sure domain is undefined before creating. # The 'create' command guarantees that the domain will be # undefined on shutdown, but requires the domain to be undefined. # if a user defines the domain # outside of this agent, we have to ensure that the domain # is restored to an 'undefined' state before creating. verify_undefined virsh $VIRSH_OPTIONS create ${OCF_RESKEY_config} rc=$? if [ $rc -ne 0 ]; then ocf_exit_reason "Failed to start virtual domain ${DOMAIN_NAME}." return $OCF_ERR_GENERIC fi while ! VirtualDomain_Monitor; do sleep 1 done return $OCF_SUCCESS } force_stop() { local out ex local status=0 ocf_log info "Issuing forced shutdown (destroy) request for domain ${DOMAIN_NAME}." out=$(virsh $VIRSH_OPTIONS destroy ${DOMAIN_NAME} 2>&1|tr 'A-Z' 'a-z') ex=$? echo >&2 "$out" case $ex$out in *"error:"*"domain is not running"*|*"error:"*"domain not found"*) : ;; # unexpected path to the intended outcome, all is well [!0]*) ocf_exit_reason "forced stop failed" return $OCF_ERR_GENERIC ;; 0*) while [ $status != $OCF_NOT_RUNNING ]; do VirtualDomain_Status status=$? done ;; esac return $OCF_SUCCESS } save_config(){ CFGTMP=$(mktemp -t vmcfgsave.XXX) virsh $VIRSH_OPTIONS dumpxml ${DOMAIN_NAME} > ${CFGTMP} if [ -s ${CFGTMP} ]; then if virt-xml-validate ${CFGTMP} domain 2>/dev/null ; then ocf_log info "Saving domain $DOMAIN_NAME to ${OCF_RESKEY_config}. Please make sure it's present on all nodes." if cat ${CFGTMP} > ${OCF_RESKEY_config} ; then ocf_log info "Saved $DOMAIN_NAME domain's configuration to ${OCF_RESKEY_config}." else ocf_log warn "Moving ${CFGTMP} to ${OCF_RESKEY_config} failed." fi else ocf_log warn "Domain $DOMAIN_NAME config failed to validate after dump. Skipping config update." fi else ocf_log warn "Domain $DOMAIN_NAME config has 0 size. Skipping config update." fi rm -f ${CFGTMP} } VirtualDomain_Stop() { local i local status local shutdown_timeout local needshutdown=1 VirtualDomain_Status status=$? case $status in $OCF_SUCCESS) if ocf_is_true $OCF_RESKEY_force_stop; then # if force stop, don't bother attempting graceful shutdown. force_stop return $? fi ocf_log info "Issuing graceful shutdown request for domain ${DOMAIN_NAME}." if [ -n "$OCF_RESKEY_snapshot" ]; then virsh save $DOMAIN_NAME "$OCF_RESKEY_snapshot/${DOMAIN_NAME}.state" if [ $? -eq 0 ]; then needshutdown=0 else ocf_log error "Failed to save snapshot state of ${DOMAIN_NAME} on stop" fi fi # save config if needed if ocf_is_true "$OCF_RESKEY_save_config_on_stop"; then save_config fi # issue the shutdown if save state didn't shutdown for us if [ $needshutdown -eq 1 ]; then # Issue a graceful shutdown request virsh $VIRSH_OPTIONS shutdown ${DOMAIN_NAME} fi # The "shutdown_timeout" we use here is the operation # timeout specified in the CIB, minus 5 seconds shutdown_timeout=$(( $NOW + ($OCF_RESKEY_CRM_meta_timeout/1000) -5 )) # Loop on status until we reach $shutdown_timeout while [ $NOW -lt $shutdown_timeout ]; do VirtualDomain_Status status=$? case $status in $OCF_NOT_RUNNING) # This was a graceful shutdown. return $OCF_SUCCESS ;; $OCF_SUCCESS) # Domain is still running, keep # waiting (until shutdown_timeout # expires) sleep 1 ;; *) # Something went wrong. Bail out and # resort to forced stop (destroy). break; esac NOW=$(date +%s) done ;; $OCF_NOT_RUNNING) ocf_log info "Domain $DOMAIN_NAME already stopped." return $OCF_SUCCESS esac # OK. Now if the above graceful shutdown hasn't worked, kill # off the domain with destroy. If that too does not work, # have the LRM time us out. force_stop } VirtualDomain_Migrate_To() { local target_node local remoteuri local transport_suffix local migrateuri local migrateport local migrate_target local hypervisor target_node="$OCF_RESKEY_CRM_meta_migrate_target" if VirtualDomain_Status; then # Find out the remote hypervisor to connect to. That is, turn # something like "qemu://foo:9999/system" into # "qemu+tcp://bar:9999/system" if [ -n "${OCF_RESKEY_migration_transport}" ]; then transport_suffix="+${OCF_RESKEY_migration_transport}" fi # A typical migration URI via a special migration network looks # like "tcp://bar-mig:49152". The port would be randomly chosen # by libvirt from the range 49152-49215 if omitted, at least since # version 0.7.4 ... if [ -n "${OCF_RESKEY_migration_network_suffix}" ]; then hypervisor="${OCF_RESKEY_hypervisor%%[+:]*}" # Hostname might be a FQDN migrate_target=$(echo ${target_node} | sed -e "s,^\([^.]\+\),\1${OCF_RESKEY_migration_network_suffix},") case $hypervisor in qemu) # For quiet ancient libvirt versions a migration port is needed # and the URI must not contain the "//". Newer versions can handle # the "bad" URI. migrateuri="tcp:${migrate_target}:${OCF_RESKEY_migrateport}" ;; xen) migrateuri="xenmigr://${migrate_target}" ;; *) ocf_log warn "$DOMAIN_NAME: Migration via dedicated network currently not supported for ${hypervisor}." ;; esac fi # Scared of that sed expression? So am I. :-) remoteuri=$(echo ${OCF_RESKEY_hypervisor} | sed -e "s,\(.*\)://[^/:]*\(:\?[0-9]*\)/\(.*\),\1${transport_suffix}://${target_node}\2/\3,") # save config if needed if ocf_is_true "$OCF_RESKEY_save_config_on_stop"; then save_config fi # OK, we know where to connect to. Now do the actual migration. ocf_log info "$DOMAIN_NAME: Starting live migration to ${target_node} (using remote hypervisor URI ${remoteuri} ${migrateuri})." virsh ${VIRSH_OPTIONS} migrate --live $DOMAIN_NAME ${remoteuri} ${migrateuri} rc=$? if [ $rc -ne 0 ]; then ocf_exit_reason "$DOMAIN_NAME: live migration to ${remoteuri} ${migrateuri} failed: $rc" return $OCF_ERR_GENERIC else ocf_log info "$DOMAIN_NAME: live migration to ${target_node} succeeded." return $OCF_SUCCESS fi else ocf_exit_reason "$DOMAIN_NAME: migrate_to: Not active locally!" return $OCF_ERR_GENERIC fi } VirtualDomain_Migrate_From() { while ! VirtualDomain_Monitor; do sleep 1 done ocf_log info "$DOMAIN_NAME: live migration from ${OCF_RESKEY_CRM_meta_migrate_source} succeeded." # save config if needed if ocf_is_true "$OCF_RESKEY_save_config_on_stop"; then save_config fi return $OCF_SUCCESS } VirtualDomain_Monitor() { # First, check the domain status. If that returns anything other # than $OCF_SUCCESS, something is definitely wrong. VirtualDomain_Status rc=$? if [ ${rc} -eq ${OCF_SUCCESS} ]; then # OK, the generic status check turned out fine. Now, if we # have monitor scripts defined, run them one after another. for script in ${OCF_RESKEY_monitor_scripts}; do script_output="$($script 2>&1)" script_rc=$? if [ ${script_rc} -ne ${OCF_SUCCESS} ]; then # A monitor script returned a non-success exit # code. Stop iterating over the list of scripts, log a # warning message, and propagate $OCF_ERR_GENERIC. ocf_exit_reason "Monitor command \"${script}\" for domain ${DOMAIN_NAME} returned ${script_rc} with output: ${script_output}" rc=$OCF_ERR_GENERIC break else ocf_log debug "Monitor command \"${script}\" for domain ${DOMAIN_NAME} completed successfully with output: ${script_output}" fi done fi update_emulator_cache update_utilization return ${rc} } VirtualDomain_Validate_All() { # Required binaries: for binary in virsh sed; do check_binary $binary done if [ -z $OCF_RESKEY_config ]; then ocf_exit_reason "Missing configuration parameter \"config\"." return $OCF_ERR_CONFIGURED fi if ocf_is_true $OCF_RESKEY_force_stop; then if [ -n "$OCF_RESKEY_snapshot" ]; then ocf_exit_reason "The 'force_stop' and 'snapshot' options can not be used together." return $OCF_ERR_CONFIGURED fi fi # check if we can read the config file (otherwise we're unable to # deduce $DOMAIN_NAME from it, see below) if [ ! -r $OCF_RESKEY_config ]; then if ocf_is_probe; then ocf_log info "Configuration file $OCF_RESKEY_config not readable during probe." elif [ "$__OCF_ACTION" = "stop" ]; then ocf_log info "Configuration file $OCF_RESKEY_config not readable, resource considered stopped." else ocf_exit_reason "Configuration file $OCF_RESKEY_config does not exist or is not readable." return $OCF_ERR_INSTALLED fi fi } if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi case $1 in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; esac # Grab the virsh uri default, but only if hypervisor isn't set : ${OCF_RESKEY_hypervisor=$(virsh --quiet uri 2>/dev/null)} # Set options to be passed to virsh: VIRSH_OPTIONS="--connect=${OCF_RESKEY_hypervisor} --quiet" # Everything except usage and meta-data must pass the validate test VirtualDomain_Validate_All || exit $? # During a probe, it is permissible for the config file to not be # readable (it might be on shared storage not available during the # probe). In that case, we're # unable to get the domain name. Thus, we also can't check whether the # domain is running. The only thing we can do here is to assume that # it is not running. if [ ! -r $OCF_RESKEY_config ]; then ocf_is_probe && exit $OCF_NOT_RUNNING [ "$__OCF_ACTION" = "stop" ] && exit $OCF_SUCCESS fi # Retrieve the domain name from the xml file. DOMAIN_NAME=`egrep '[[:space:]]*.*[[:space:]]*$' ${OCF_RESKEY_config} | sed -e 's/[[:space:]]*\(.*\)<\/name>[[:space:]]*$/\1/' 2>/dev/null` if [ -z $DOMAIN_NAME ]; then ocf_exit_reason "Unable to determine domain name." exit $OCF_ERR_GENERIC fi EMULATOR_STATE="${HA_RSCTMP}/VirtualDomain-${DOMAIN_NAME}-emu.state" case $1 in start) VirtualDomain_Start ;; stop) VirtualDomain_Stop ;; migrate_to) VirtualDomain_Migrate_To ;; migrate_from) VirtualDomain_Migrate_From ;; status) VirtualDomain_Status ;; monitor) VirtualDomain_Monitor ;; validate-all) ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/heartbeat/Xen b/heartbeat/Xen index bc3e8f475..5fd0934c1 100755 --- a/heartbeat/Xen +++ b/heartbeat/Xen @@ -1,557 +1,557 @@ #!/bin/sh # # # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # # Resource Agent for the Xen Hypervisor. # Manages Xen virtual machine instances by # mapping cluster resource start and stop, # to Xen create and shutdown, respectively. # # usage: $0 {start|stop|status|monitor|meta-data} # # OCF parameters are as below: # OCF_RESKEY_xmfile # Absolute path to the Xen control file, # for this virtual machine. # OCF_RESKEY_allow_mem_management # Change memory usage on start/stop/migration # of virtual machine # OCF_RESKEY_reserved_Dom0_memory # minimum memory reserved for domain 0 # OCF_RESKEY_monitor_scripts # scripts to monitor services within the # virtual domain ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### usage() { cat <<-END usage: $0 {start|stop|status|monitor|meta-data|validate-all} END } : ${OCF_RESKEY_xmfile=/etc/xen/vm/MyDomU} : ${OCF_RESKEY_shutdown_acpi=0} : ${OCF_RESKEY_allow_mem_management=0} : ${OCF_RESKEY_reserved_Dom0_memory=512} # prefer xl xentool=$(which xl 2> /dev/null || which xm) meta_data() { cat < 1.0 Resource Agent for the Xen Hypervisor. Manages Xen virtual machine instances by mapping cluster resource start and stop, to Xen create and shutdown, respectively. A note on names We will try to extract the name from the config file (the xmfile attribute). If you use a simple assignment statement, then you should be fine. Otherwise, if there's some python acrobacy involved such as dynamically assigning names depending on other variables, and we will try to detect this, then please set the name attribute. You should also do that if there is any chance of a pathological situation where a config file might be missing, for example if it resides on a shared storage. If all fails, we finally fall back to the instance id to preserve backward compatibility. Para-virtualized guests can also be migrated by enabling the meta_attribute allow-migrate. Manages Xen unprivileged domains (DomUs) Absolute path to the Xen control file, for this virtual machine. Xen control file Name of the virtual machine. Xen DomU name The Xen agent will first try an orderly shutdown using xl shutdown. Should this not succeed within this timeout, the agent will escalate to xl destroy, forcibly killing the node. If this is not set, it will default to two-third of the stop action timeout. Setting this value to 0 forces an immediate destroy. Shutdown escalation timeout Handle shutdown by simulating an ACPI power button event. Enable this to allow graceful shutdown for HVM domains without installed PV drivers. Simulate power button event on shutdown This parameter enables dynamic adjustment of memory for start and stop actions used for Dom0 and the DomUs. The default is to not adjust memory dynamically. Use dynamic memory management In case of a live migration, the system will default to using the IP address associated with the hostname via DNS or /etc/hosts. This parameter allows you to specify a node attribute that will be queried instead for the target node, overriding the IP address. This allows you to use a dedicated network for live migration traffic to a specific node. Warning: make very sure the IP address does point to the right node. Or else the live migration will end up somewhere else, greatly confusing the cluster and causing havoc. Node attribute containing target IP address In case memory management is used, this parameter defines the minimum amount of memory to be reserved for the dom0. The default minimum memory is 512MB. Minimum Dom0 memory To additionally monitor services within the unprivileged domain, add this parameter with a list of scripts to monitor. list of space separated monitor scripts END } Xen_Status() { if expr "x$xentool" : "x.*xl" >/dev/null; then $xentool list $1 >/dev/null 2>&1 if [ $? -ne 0 ]; then return $OCF_NOT_RUNNING else return $OCF_SUCCESS fi fi if have_binary xen-list; then xen-list $1 2>/dev/null | grep -qs "State.*[-r][-b][-p]--" 2>/dev/null if [ $? -ne 0 ]; then return $OCF_NOT_RUNNING else return $OCF_SUCCESS fi fi STATUS=`$xentool list --long $1 2>/dev/null | grep status 2>/dev/null` if [ "X${STATUS}" != "X" ]; then # we have Xen 3.0.4 or higher STATUS_NOSPACES=`echo "$STATUS" | awk '{ print $1,$2}'` if [ "$STATUS_NOSPACES" = "(status 2)" -o "$STATUS_NOSPACES" = "(status 1)" ]; then return $OCF_SUCCESS else return $OCF_NOT_RUNNING fi else # we have Xen 3.0.3 or lower STATUS=`$xentool list --long $1 2>/dev/null | grep state 2>/dev/null` echo "${STATUS}" | grep -qs "[-r][-b][-p]---" if [ $? -ne 0 ]; then return $OCF_NOT_RUNNING else return $OCF_SUCCESS fi fi } # If the guest is rebooting, it may completely disappear from the # list of defined guests, thus xl/xen-list would return with not # running; apparently, this period lasts only for a second or # two # If a status returns not running, then test status # again for 5 times (perhaps it'll show up) Xen_Status_with_Retry() { local rc cnt=5 Xen_Status $1 rc=$? while [ $rc -eq $OCF_NOT_RUNNING -a $cnt -gt 0 ]; do case "$__OCF_ACTION" in stop) ocf_log debug "domain $1 reported as not running, waiting $cnt seconds ..." ;; monitor) ocf_log warn "domain $1 reported as not running, but it is expected to be running! Retrying for $cnt seconds ..." ;; *) : not reachable ;; esac sleep 1 Xen_Status $1 rc=$? - let cnt=$((cnt-1)) + cnt=$((cnt-1)) done return $rc } Xen_Adjust_Memory() { if ocf_is_true "${OCF_RESKEY_allow_mem_management}"; then CNTNEW=$1 RUNNING=`Xen_List_running` RUNCNT=`Xen_Count_running` MAXMEM=`Xen_Total_Memory` if [ ${RUNCNT} -eq 0 -a ${CNTNEW} -eq 0 ]; then RUNCNT=1 fi #NEWMEM=`echo "(${MAXMEM}-${OCF_RESKEY_reserved_Dom0_memory})/(${RUNCNT}+${CNTNEW})"|bc` NEWMEM=$(( (${MAXMEM} - ${OCF_RESKEY_reserved_Dom0_memory}) / (${RUNCNT} + ${CNTNEW} ) )) # do not rely on ballooning add dom0_mem=512 instead to force memory for dom0 #$xentool mem-set Domain-0 ${OCF_RESKEY_reserved_Dom0_memory} for DOM in ${RUNNING}; do $xentool mem-set ${DOM} ${NEWMEM} done ocf_log info "Adjusted memory to: $NEWMEM, for the following $RUNCNT domains: $RUNNING" fi } Xen_List_all() { $xentool list | grep -v -e "Name" -e "Domain-0" | awk '{print $1}' } Xen_List_running() { ALL_DOMS=`Xen_List_all` for DOM in ${ALL_DOMS}; do if Xen_Status $DOM; then echo "${DOM} " fi done } Xen_Count_running() { Xen_List_running | wc -w } Xen_Monitor() { if ocf_is_probe; then Xen_Status ${DOMAIN_NAME} else Xen_Status_with_Retry ${DOMAIN_NAME} fi if [ $? -eq ${OCF_NOT_RUNNING} ]; then ocf_is_probe || ocf_log err "Xen domain $DOMAIN_NAME stopped" return ${OCF_NOT_RUNNING} fi if [ "X${OCF_RESKEY_monitor_scripts}" = "X" ]; then return ${OCF_SUCCESS} fi for SCRIPT in ${OCF_RESKEY_monitor_scripts}; do $SCRIPT if [ $? -ne 0 ]; then return ${OCF_ERR_GENERIC} fi done return ${OCF_SUCCESS} } Xen_Total_Memory() { $xentool info | grep "^total_memory" | awk '{print $3}' } Xen_Start() { if Xen_Status ${DOMAIN_NAME}; then ocf_log info "Xen domain $DOMAIN_NAME already running." return $OCF_SUCCESS fi if [ ! -f "${OCF_RESKEY_xmfile}" ]; then ocf_log err "Config file ${OCF_RESKEY_xmfile} for $DOMAIN_NAME does not exist." return $OCF_ERR_INSTALLED fi if ocf_is_true "${OCF_RESKEY_allow_mem_management}"; then Xen_Adjust_Memory 1 ocf_log info "New memory for virtual domains: ${NEWMEM}" sed -i -e "/^memory=/ s/^memory=.*/memory=${NEWMEM}/" ${OCF_RESKEY_xmfile} $xentool mem-set ${DOMAIN_NAME} ${NEWMEM} fi $xentool create ${OCF_RESKEY_xmfile} name=\"$DOMAIN_NAME\" rc=$? if [ $rc -ne 0 ]; then return $OCF_ERR_GENERIC else if ocf_is_true "${OCF_RESKEY_allow_mem_management}"; then $xentool mem-set ${DOMAIN_NAME} ${NEWMEM} fi fi while sleep 1; do Xen_Monitor && return $OCF_SUCCESS done } xen_domain_stop() { local dom=$1 local timeout if [ -n "$OCF_RESKEY_shutdown_timeout" ]; then timeout=$OCF_RESKEY_shutdown_timeout elif [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then # Allow 2/3 of the action timeout for the orderly shutdown # (The origin unit is ms, hence the conversion) timeout=$((OCF_RESKEY_CRM_meta_timeout/1500)) else timeout=60 fi if [ "$timeout" -gt 0 ]; then ocf_log info "Xen domain $dom will be stopped (timeout: ${timeout}s)" if ocf_is_true "${OCF_RESKEY_shutdown_acpi}"; then $xentool trigger $dom power else $xentool shutdown $dom fi while Xen_Status $dom && [ "$timeout" -gt 0 ]; do ocf_log debug "$dom still not stopped. Waiting..." timeout=$((timeout-1)) sleep 1 done fi if [ "$timeout" -eq 0 ]; then while Xen_Status $dom; do ocf_log warn "Xen domain $dom will be destroyed!" $xenkill $dom sleep 1 done # Note: This does not give up. stop isn't allowed to to fail. # If $xentool destroy fails, stop will eventually timeout. # This is the correct behaviour. fi ocf_log info "Xen domain $dom stopped." } Xen_Stop() { local vm if Xen_Status_with_Retry ${DOMAIN_NAME}; then vm=${DOMAIN_NAME} elif Xen_Status migrating-${DOMAIN_NAME}; then ocf_log info "Xen domain $DOMAIN_NAME is migrating" vm="migrating-${DOMAIN_NAME}" else ocf_log info "Xen domain $DOMAIN_NAME already stopped." fi if [ "$vm" ]; then xen_domain_stop $vm else # It is supposed to be gone, but there have been situations where # $xentool list / xen-list showed it as stopped but it was still # instantiated. Nuke it once more to make sure: $xenkill ${DOMAIN_NAME} fi Xen_Adjust_Memory 0 return $OCF_SUCCESS } Xen_Migrate_To() { target_node="$OCF_RESKEY_CRM_meta_migrate_target" target_attr="$OCF_RESKEY_node_ip_attribute" target_addr="$target_node" if Xen_Status ${DOMAIN_NAME}; then ocf_log info "$DOMAIN_NAME: Starting $xentool migrate to $target_node" if [ -n "$target_attr" ]; then nodevalue=`crm_attribute --type nodes --node-uname $target_node --attr-name $target_attr --get-value -q` if [ -n "${nodevalue}" -a "${nodevalue}" != "(null)" ]; then target_addr="$nodevalue" ocf_log info "$DOMAIN_NAME: $target_node is using address $target_addr" fi fi if expr "x$xentool" : "x.*xm" >/dev/null; then $xentool migrate --live $DOMAIN_NAME $target_addr else $xentool migrate $DOMAIN_NAME $target_addr fi rc=$? if [ $rc -ne 0 ]; then ocf_log err "$DOMAIN_NAME: $xentool migrate to $target_node failed: $rc" return $OCF_ERR_GENERIC else Xen_Adjust_Memory 0 ocf_log info "$DOMAIN_NAME: $xentool migrate to $target_node succeeded." return $OCF_SUCCESS fi else ocf_log err "$DOMAIN_NAME: migrate_to: Not active locally!" return $OCF_ERR_GENERIC fi } Xen_Migrate_From() { if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then # Allow 2/3 of the action timeout for status to stabilize # (The origin unit is ms, hence the conversion) timeout=$((OCF_RESKEY_CRM_meta_timeout/1500)) else timeout=10 # should be plenty fi while ! Xen_Status ${DOMAIN_NAME} && [ $timeout -gt 0 ]; do ocf_log debug "$DOMAIN_NAME: Not yet active locally, waiting (timeout: ${timeout}s)" timeout=$((timeout-1)) sleep 1 done if Xen_Status ${DOMAIN_NAME}; then Xen_Adjust_Memory 0 ocf_log info "$DOMAIN_NAME: Active locally, migration successful" return $OCF_SUCCESS else ocf_log err "$DOMAIN_NAME: Not active locally, migration failed!" return $OCF_ERR_GENERIC fi } Xen_Validate_All() { return $OCF_SUCCESS } if [ $# -ne 1 ]; then usage exit $OCF_ERR_ARGS fi case $1 in meta-data) meta_data exit $OCF_SUCCESS ;; usage) usage exit $OCF_SUCCESS ;; esac # the name business: # # 1. use the name attribute, or # 2. find the name in the config file (if it exists) and use that # unless it contains funny characters such as '%' or space, or # 3. use the OCF_RESOURCE_INSTANCE if [ x"${OCF_RESKEY_name}" != x ]; then DOMAIN_NAME="${OCF_RESKEY_name}" else if [ -f "${OCF_RESKEY_xmfile}" ]; then DOMAIN_NAME=`awk '$1~/^name(=|$)/{print}' ${OCF_RESKEY_xmfile} | sed 's/.*=[[:space:]]*//' | tr -d "[\"']"` if echo "$DOMAIN_NAME" | grep -qs '[%[:space:]]'; then DOMAIN_NAME="" fi fi DOMAIN_NAME=${DOMAIN_NAME:-${OCF_RESOURCE_INSTANCE}} fi for binary in sed awk; do check_binary $binary done if have_binary xen-destroy ; then xenkill="xen-destroy" else xenkill="$xentool destroy" fi if [ -n "$OCF_RESKEY_shutdown_timeout" ]; then ocf_is_decimal "$OCF_RESKEY_shutdown_timeout" || { ocf_log err "shutdown_timeout must be a number" exit $OCF_ERR_CONFIGURED } fi case $1 in start) Xen_Start ;; stop) Xen_Stop ;; migrate_to) Xen_Migrate_To ;; migrate_from) Xen_Migrate_From ;; monitor) Xen_Monitor ;; status) Xen_Status ${DOMAIN_NAME} ;; validate-all) Xen_Validate_All ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? # vim:sw=2:ts=4: diff --git a/heartbeat/dhcpd b/heartbeat/dhcpd index ae7e20766..861386d0c 100755 --- a/heartbeat/dhcpd +++ b/heartbeat/dhcpd @@ -1,550 +1,550 @@ #!/bin/sh # # Resource Agent for managing dhcpd resources. # # License: GNU General Public License (GPL) # (c) 2011-2012 Chris Bowlby, # # A fair amount of this script has been pulled from the official 0dhcpd # init script. Those portions have been integrated into this script to # ensure consistent behavior between the resource agent and the # original script. The copyrights and original authors are credited # as follows: # # Copyright (c) 1996, 1997, 1998 S.u.S.E. GmbH # Copyright (c) 1998, 1999, 2000, 2001 SuSE GmbH # Copyright (c) 2002, 2003 SuSE Linux AG # Copyright (c) 2004-2008 SUSE LINUX Products GmbH, Nuernberg, Germany. # # Author(s) : Rolf Haberrecker , 1997-1999 # Peter Poeml , 2000-2006 # Marius Tomaschewski , 2006-2010 # # and Linux-HA contributors # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults OCF_RESKEY_binary_default="dhcpd" OCF_RESKEY_pid_default="/var/run/dhcpd.pid" OCF_RESKEY_user_default=dhcpd OCF_RESKEY_group_default=nogroup OCF_RESKEY_config_default="" OCF_RESKEY_chrooted_default="true" OCF_RESKEY_chrooted_path_default="/var/lib/dhcp" OCF_RESKEY_leases_default="/db/dhcpd.leases" OCF_RESKEY_interface_default="" OCF_RESKEY_includes_default="" : ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} : ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}} : ${OCF_RESKEY_user=${OCF_RESKEY_user_default}} : ${OCF_RESKEY_group=${OCF_RESKEY_group_default}} : ${OCF_RESKEY_chrooted=${OCF_RESKEY_chrooted_default}} : ${OCF_RESKEY_chrooted_path=${OCF_RESKEY_chrooted_path_default}} : ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} : ${OCF_RESKEY_leases=${OCF_RESKEY_leases_default}} : ${OCF_RESKEY_interface=${OCF_RESKEY_interface_default}} : ${OCF_RESKEY_includes=${OCF_RESKEY_includes_default}} # To enable support for different versions of dhcp, we need # to know what version we are being run against. DHCP_VERSION_MAJOR=`$OCF_RESKEY_binary --version 2>&1 | awk -F- '{print $3}' | awk -F. '{print $1}' | sed s/^[a-zA-Z]//g` # These files are always copied by default to ensure the chroot environment works. DEFAULT_FILE_LIST="/etc/gai.conf /etc/nsswitch.conf /etc/resolv.conf /etc/host.conf /etc/hosts /etc/localtime /dev/urandom" usage() { cat < 0.1 Manage an ISC DHCP server service in a chroot environment. Chrooted ISC DHCP server resource agent. The absolute path to the DHCP server configuration file. Configuration file Configure the dhcpd service to run in a chrooted or non-chrooted mode. Enable chroot mode The absolute path of the chrooted DHCP environment. The chrooted path The binary for the DHCP server process. An absolute path definition is not required, but can be used to override environment path. dhcpd binary The system user the DHCP server process will run as when it is chrooted. dhcpd owner The system group the DHCP server process will run as when it is chrooted. dhcpd group owner The network interface(s) the DHCP server process will bind to. A blank value will bind the process to all interfaces. Network Interface This parameter provides a means to copy include files into the chrooted environment. If a dhcpd.conf file contains a line similar to this: include "/etc/named.keys"; Then an admin also has to tell the dhcpd RA that this file should be pulled into the chrooted environment. This is a space delimited list. Include files The leases database file, relative to chrooted_path. Leases file The path and filename of the PID file. It is relative to chrooted_path. PID file EOF } # Validate most critical parameters dhcpd_validate_all() { check_binary $OCF_RESKEY_binary if ! ocf_is_probe; then # Test for the appropriate configuration files depending on if # chroot mode is enabled. if ocf_is_true $OCF_RESKEY_chrooted ; then if ! test -e "$OCF_RESKEY_chrooted_path"; then ocf_exit_reason "Path $OCF_RESKEY_chrooted_path does not exist." return $OCF_ERR_INSTALLED fi if test -n "$OCF_RESKEY_chrooted_path/$OCF_RESKEY_config" -a ! -r "$OCF_RESKEY_chrooted_path/$OCF_RESKEY_config"; then ocf_exit_reason "Configuration file $OCF_RESKEY_chrooted_path/$OCF_RESKEY_config doesn't exist" return $OCF_ERR_INSTALLED fi else if test -n "$OCF_RESKEY_config" -a ! -r "$OCF_RESKEY_config"; then ocf_exit_reason "Configuration file $OCF_RESKEY_config doesn't exist" return $OCF_ERR_INSTALLED fi fi fi if ! getent passwd $OCF_RESKEY_user >/dev/null 2>&1; then ocf_exit_reason "User $OCF_RESKEY_user doesn't exist" return $OCF_ERR_INSTALLED fi return $OCF_SUCCESS } # dhcpd_monitor. Send a request to dhcpd and check response. dhcpd_monitor() { # Assume chrooted mode is being used, but if not update the PIDF # variable to point to the non-chrooted PID file. PIDF="$OCF_RESKEY_chrooted_path/$OCF_RESKEY_pid" if ! ocf_is_true $OCF_RESKEY_chrooted ; then PIDF=`dirname $OCF_RESKEY_pid`/dhcpd/`basename $OCF_RESKEY_pid` fi ocf_pidfile_status $PIDF >/dev/null 2>&1 || return $OCF_NOT_RUNNING return $OCF_SUCCESS } # Initialize Chroot dhcpd_initialize_chroot() { # If we are running the initialization for the first time, we need to make # the new chrooted folder, in case we are not using the same default. if ! [ -d $OCF_RESKEY_chrooted_path ] ; then ocf_log info "Initializing $OCF_RESKEY_chrooted_path for use." fi # Make sure all sub-paths are created if something went wrong during # a partial run. for i in db dev etc lib64 var/run; do mkdir -p $OCF_RESKEY_chrooted_path/$i done # If we are running version 4 of the dhcp server, we need to mount a proc partition. if [ $DHCP_VERSION_MAJOR -ge 4 ] ; then mkdir -p $OCF_RESKEY_chrooted_path/proc if ! [ -e $OCF_RESKEY_chrooted_path/proc/net/dev ] ; then mount -t proc -o ro proc $OCF_RESKEY_chrooted_path/proc > /dev/null 2>&1 fi fi # If the folder to store the PID file does not exist, make it. if ! [ -d "$OCF_RESKEY_chrooted_path`dirname $OCF_RESKEY_pid`" ] ; then mkdir -p "$OCF_RESKEY_chrooted_path`dirname $OCF_RESKEY_pid`" fi # Ensure all permissions are in place if the folder was re-created. chown -R $OCF_RESKEY_user:$OCF_RESKEY_group $OCF_RESKEY_chrooted_path/`dirname $OCF_RESKEY_leases` chown -R $OCF_RESKEY_user:$OCF_RESKEY_group "$OCF_RESKEY_chrooted_path/`dirname $OCF_RESKEY_pid`" ## If there is no conf file, we can't initialize the chrooted ## environment, return with "program not configured" if ! [ -f $OCF_RESKEY_config ] ; then ocf_exit_reason "dhcpd has not been configured." return $OCF_ERR_CONFIGURED fi # If the leases file does not exist, create it, as this is a fresh install. if [ ! -e $OCF_RESKEY_chrooted_path/$OCF_RESKEY_leases ]; then touch $OCF_RESKEY_chrooted_path/$OCF_RESKEY_leases fi # Remove the random device. test -e "$OCF_RESKEY_chrooted_path/dev/urandom" && rm -f $OCF_RESKEY_chrooted_path/dev/urandom # Test for the existance of the defined include files, and append # them to the list of files to be copied. for i in $OCF_RESKEY_includes ; do if [ -e $i ] ; then DEFAULT_FILE_LIST="$DEFAULT_FILE_LIST $i" else ocf_exit_reason "include file $i does not exist" return $OCF_ERR_INSTALLED fi done # Ensure all "modified" non-chrooted configuration files are copied into the chrooted environment. for i in $OCF_RESKEY_config $DEFAULT_FILE_LIST; do # First, lets make sure the directory exists within the chrooted environment. if test -d "$i" ; then mkdir -p $OCF_RESKEY_chrooted_path/$i elif test -e "$i" ; then mkdir -p "`dirname $OCF_RESKEY_chrooted_path/$i`" fi # Next, we copy the configuration file into place. cp -aL "$i" "$OCF_RESKEY_chrooted_path/${i%/*}/" > /dev/null 2>&1 || { ocf_exit_reason "could not copy $i to chroot jail"; return $OCF_ERR_GENERIC; } done libdir=$(basename $(echo /var/lib/dhcp/lib*)) if test -x /usr/bin/ldd ; then get_ldd_deps() { - ldd_wl="\/$libdir\/lib" - ldd_bl="\/$libdir\/libc\." + ldd_wl="/$libdir/lib" + ldd_bl="/$libdir/libc\." /usr/bin/ldd "$1" | while read a b c d ; do [ -n "$c" ] || continue - [[ $c =~ $ldd_wl ]] || continue - [[ $c =~ $ldd_bl ]] && continue + echo "$c" | grep -q "$ldd_wl" || continue + echo "$c" | grep -q "$ldd_bl" && continue echo $c done } else get_ldd_deps() { :; } fi cplibs=`for i in /$libdir/libresolv.so.* /$libdir/libnss_*.so.* /$libdir/libpthread.so.0 /$libdir/libdl.so.2 do if [ -s "$i" ] ; then echo "$i" get_ldd_deps "$i" fi done | sort -u` for i in $cplibs ; do if [ -s "$i" ]; then cp -pL "$i" "/var/lib/dhcp/$libdir/" || { ocf_exit_reason "could not copy $i to chroot jail"; return $OCF_ERR_GENERIC; } fi done return $OCF_SUCCESS } # Initialize a non-chroot environment dhcpd_initialize() { ## If there is no conf file, we can't start a dhcp service. if ! [ -f $OCF_RESKEY_config ] ; then ocf_exit_reason "dhcpd has not been configured." return $OCF_ERR_CONFIGURED fi # As with the standard DHCP init script, we can still use the # chrooted default path for storing the leases file. This behavior # is consistent with the existing /etc/init.d/dhcpd script. if ! [ -d $OCF_RESKEY_chrooted_path ] ; then ocf_log info "Initializing $OCF_RESKEY_chrooted_path for use." fi # If the leases file does not exist, create it, as this is a fresh install. if [ ! -e $OCF_RESKEY_chrooted_path/$OCF_RESKEY_leases ]; then touch $OCF_RESKEY_chrooted_path/$OCF_RESKEY_leases fi # if the PID storage path does not exist, make it, and setup the permissions. # NOTE: This part of the script has a potential security flaw, in that if someone # puts in /var/run as the path, it will change ownership to the dhcpd user # and group. However, all that would do is allow that user to view the contents # of the files, which they can do now anyway. If this becomes an issue, I can work # in some changes. # We need to append "dhcpd" to the path for the PID file storage folder, because # if /var/run is used, that folders permissions can not be changed, otherwise it affects # more then just one application. if ! [ -d `dirname $OCF_RESKEY_pid`/dhcpd ] ; then mkdir -p `dirname $OCF_RESKEY_pid`/dhcpd if [ -n "$OCF_RESKEY_user" -a "x$OCF_RESKEY_user" != "xroot" ] ; then chown $OCF_RESKEY_user `dirname $OCF_RESKEY_pid`/dhcpd fi if [ -n "$OCF_RESKEY_group" -a "x$OCF_RESKEY_group" != "xwheel" ] ; then chgrp $OCF_RESKEY_group `dirname $OCF_RESKEY_pid`/dhcpd fi fi return $OCF_SUCCESS } # Start dhcpd_start() { # Lets make sure we are not already running. if dhcpd_monitor; then ocf_log info "dhcpd already running" return $OCF_SUCCESS fi # Only initialize the chrooted path(s) if chroot mode is enabled. if ocf_is_true $OCF_RESKEY_chrooted ; then dhcpd_initialize_chroot || { ocf_exit_reason "Could not fully initialize the chroot environment." ; return $OCF_ERR_INSTALLED; } else dhcpd_initialize || { ocf_exit_reason "Could not fully initialize the runtime environment." ; return $OCF_ERR_INSTALLED; } fi dhcpd_validate_all || exit # Define an empty string variable, to ensure it exists when needed. DHCPD_ARGS="" # To ensure consistent behavior with the standard DHCPD init script, # use the chrooted default path for storing a leases file, when not in # a chrooted enviroment. if ocf_is_true $OCF_RESKEY_chrooted ; then DHCPD_ARGS="$DHCPD_ARGS -chroot $OCF_RESKEY_chrooted_path -lf $OCF_RESKEY_leases" else DHCPD_ARGS="$DHCPD_ARGS -lf $OCF_RESKEY_chrooted_path/$OCF_RESKEY_leases" fi if [ -n "$OCF_RESKEY_user" ]; then DHCPD_ARGS="$DHCPD_ARGS -user $OCF_RESKEY_user" fi if [ -n "$OCF_RESKEY_group" ]; then DHCPD_ARGS="$DHCPD_ARGS -group $OCF_RESKEY_group" fi # If there is a pid file containing a pid, the machine might have crashed. pid files in # /var/run are always cleaned up at boot time, but this is not the case for the pid file in # the chroot jail. Therefore, an old pid file may exist. This is only a problem if it # incidentally contains the pid of a running process. If this process is not a 'dhcpd', # we remove the pid. (dhcpd itself only checks whether the pid is alive or not.) PIDF="$OCF_RESKEY_chrooted_path/$OCF_RESKEY_pid" if ocf_is_true $OCF_RESKEY_chrooted ; then ocf_log info "Starting dhcpd [chroot] service." DHCPD_ARGS="$DHCPD_ARGS -pf $OCF_RESKEY_pid" else ocf_log info "Starting dhcpd [non-chroot] service." PIDF=`dirname $OCF_RESKEY_pid`/dhcpd/`basename $OCF_RESKEY_pid` DHCPD_ARGS="$DHCPD_ARGS -pf $PIDF" fi test -e "$PIDF" && rm -f $PIDF ocf_run $OCF_RESKEY_binary -cf $OCF_RESKEY_config $DHCPD_ARGS $OCF_RESKEY_interface || return $OCF_ERR_INSTALLED while ! dhcpd_monitor; do sleep .1 ocf_log info "waiting for dhcpd to start" return $OCF_SUCCESS done if ocf_is_true $OCF_RESKEY_chrooted ; then ocf_log info "dhcpd [chrooted] has started." else ocf_log info "dhcpd [non-chrooted] has started." fi return $OCF_SUCCESS } # Stop dhcpd_stop () { local timeout local timewait local rc dhcpd_monitor rc=$? case "$rc" in "$OCF_SUCCESS") # Currently running, and is expected behaviour. ;; "$OCF_NOT_RUNNING") # Currently not running, therefore nothing to do. ocf_log info "dhcpd already stopped" return $OCF_SUCCESS ;; esac PIDF="$OCF_RESKEY_chrooted_path/$OCF_RESKEY_pid" if ! ocf_is_true $OCF_RESKEY_chrooted ; then PIDF=`dirname $OCF_RESKEY_pid`/dhcpd/`basename $OCF_RESKEY_pid` fi kill `cat $PIDF` # Allow 2/3 of the action timeout for the orderly shutdown # (The origin unit is ms, hence the conversion) timewait=$((OCF_RESKEY_CRM_meta_timeout/1500)) sleep 0.1; timeout=0 # Sleep here for .1 sec to let dhcpd finish. while dhcpd_monitor ; do if [ $timeout -ge $timewait ]; then break else sleep 1 timeout=`expr $timeout + 1` fi done #If still up if dhcpd_monitor 2>&1; then ocf_log notice "dhcpd is still up! Trying kill -s KILL" kill -s SIGKILL `cat $PIDF` fi # If we are running a dhcp server v4 or higher, unmount the proc partition. if [ $DHCP_VERSION_MAJOR -ge 4 ] ; then # We only want to unmount proc in a chrooted environment, else we could # cause other issues. if ocf_is_true $OCF_RESKEY_chrooted ; then umount $OCF_RESKEY_chrooted_path/proc > /dev/null 2>&1 fi fi rm -f $PIDF ocf_log info "dhcpd stopped" return $OCF_SUCCESS } # Make sure meta-data and usage always succeed case $__OCF_ACTION in meta-data) dhcpd_meta_data exit $OCF_SUCCESS ;; validate-all) dhcpd_validate_all exit $OCF_SUCCESS ;; usage|help) dhcpd_usage exit $OCF_SUCCESS ;; esac # Translate each action into the appropriate function call case $__OCF_ACTION in start) dhcpd_start;; stop) dhcpd_stop;; restart) dhcpd_stop dhcpd_start ;; monitor) dhcpd_monitor;; *) dhcpd_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac diff --git a/heartbeat/exportfs b/heartbeat/exportfs index 73cc05353..c4ec6536c 100755 --- a/heartbeat/exportfs +++ b/heartbeat/exportfs @@ -1,433 +1,433 @@ #!/bin/sh # exportfs # # Description: Manages nfs exported file system. # # (c) 2010 Ben Timby, Florian Haas, Dejan Muhamedagic, # and Linux-HA contributors # # License: GNU General Public License v2 (GPLv2) and later ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults OCF_RESKEY_unlock_on_stop_default=1 OCF_RESKEY_wait_for_leasetime_on_stop_default=0 OCF_RESKEY_rmtab_backup_default=".rmtab" : ${OCF_RESKEY_unlock_on_stop=${OCF_RESKEY_unlock_on_stop_default}} : ${OCF_RESKEY_wait_for_leasetime_on_stop=${OCF_RESKEY_wait_for_leasetime_on_stop_default}} : ${OCF_RESKEY_rmtab_backup=${OCF_RESKEY_rmtab_backup_default}} ####################################################################### exportfs_meta_data() { cat < 1.0 Exportfs uses the exportfs command to add/remove nfs exports. It does NOT manage the nfs server daemon. It depends on Linux specific NFS implementation details, so is considered not portable to other platforms yet. Manages NFS exports The client specification allowing remote machines to mount the directory (or directories) over NFS. Client ACL. The options to pass to exportfs for the exported directory or directories. Export options. The directory or directories to be exported using NFS. Multiple directories are separated by white space. The directory or directories to export. The fsid option to pass to exportfs. This can be a unique positive integer, a UUID, or the special string "root" which is functionally identical to numeric fsid of 0. If multiple directories are being exported, then they are assigned ids sequentially starting with this fsid (fsid, fsid+1, fsid+2, ...). Obviously, in that case the fsid must be an integer. 0 (root) identifies the export as the root of an NFSv4 pseudofilesystem -- avoid this setting unless you understand its special status. This value will override any fsid provided via the options parameter. Unique fsid within cluster or starting fsid for multiple exports. Relinquish NFS locks associated with this filesystem when the resource stops. Enabling this parameter is highly recommended unless the path exported by this ${__SCRIPT_NAME} resource is also exported by a different resource. Note: Unlocking is only possible on Linux systems where /proc/fs/nfsd/unlock_filesystem exists and is writable. If your system does not fulfill this requirement (on account of having an nonrecent kernel, for example), you may set this parameter to 0 to silence the associated warning. Unlock filesystem on stop? When stopping (unexporting), wait out the NFSv4 lease time. Only after all leases have expired does the NFS kernel server relinquish all server-side handles on the exported filesystem. If this ${__SCRIPT_NAME} resource manages an export that resides on a mount point designed to fail over along with the NFS export itself, then enabling this parameter will ensure such failover is working properly. Note that when this parameter is set, your stop timeout MUST accommodate for the wait period. This parameter is safe to disable if none of your NFS clients are using NFS version 4 or later. Ride out the NFSv4 lease time on resource stop? Back up those entries from the NFS rmtab that apply to the exported directory, to the specified backup file. The filename is interpreted as relative to the exported directory. This backup is required if clients are connecting to the export via NFSv3 over TCP. Note that a configured monitor operation is required for this functionality. To disable rmtab backups, set this parameter to the special string "none". Location of the rmtab backup, relative to directory. END return $OCF_SUCCESS } exportfs_methods() { cat <<-! start stop status monitor validate-all methods meta-data usage ! } reset_fsid() { CURRENT_FSID=$OCF_RESKEY_fsid } bump_fsid() { CURRENT_FSID=$((CURRENT_FSID+1)) } get_fsid() { echo $CURRENT_FSID } # run a function on all directories forall() { local func=$1 shift 1 local fast_exit="" local dir rc=0 if [ "$2" = fast_exit ]; then fast_exit=1 shift 1 fi reset_fsid for dir in $OCF_RESKEY_directory; do $func $dir "$@" rc=$(($rc | $?)) bump_fsid [ "$fast_exit" ] && continue [ $rc -ne 0 ] && return $rc done return $rc } backup_rmtab() { local dir=$1 local rmtab_backup rmtab_backup="$dir/${OCF_RESKEY_rmtab_backup}" grep ":$dir:" /var/lib/nfs/rmtab > ${rmtab_backup} } restore_rmtab() { local dir=$1 local rmtab_backup rmtab_backup="$dir/${OCF_RESKEY_rmtab_backup}" if [ -r ${rmtab_backup} ]; then local tmpf=`mktemp` sort -u ${rmtab_backup} /var/lib/nfs/rmtab > $tmpf && install -o root -m 644 $tmpf /var/lib/nfs/rmtab rm -f $tmpf ocf_log debug "Restored `wc -l ${rmtab_backup}` rmtab entries from ${rmtab_backup}." else ocf_log warn "rmtab backup ${rmtab_backup} not found or not readable." fi } exportfs_usage() { cat <" instead of "*" format_exports | grep -q -x -F "$dir " rc=$? fi # log something only for monitors if [ $rc -ne 0 -a "$__OCF_ACTION" = "monitor" ]; then local sev="info" ocf_is_probe || sev="err" ocf_log $sev "$dir not exported to $spec (stopped)." fi return $rc } exportfs_monitor () { if forall is_exported "${OCF_RESKEY_clientspec}"; then if [ ${OCF_RESKEY_rmtab_backup} != "none" ]; then forall backup_rmtab fi return $OCF_SUCCESS else return $OCF_NOT_RUNNING fi } export_one() { local dir=$1 local opts sep sep="" if [ -n "$OCF_RESKEY_options" ]; then opts="$OCF_RESKEY_options" sep="," fi if echo "$opts" | grep fsid >/dev/null; then #replace fsid in options list opts=`echo "$opts" | sed "s/fsid=[0-9]\+/fsid=$(get_fsid)/g"` else #tack the fsid option onto our options list. opts="${opts}${sep}fsid=$(get_fsid)" fi opts="-o $opts" # if any of directories fails to export we can exit # immediately ocf_run exportfs -v $opts "${OCF_RESKEY_clientspec}:$dir" if [ $? -ne 0 ]; then ocf_exit_reason "exportfs failed - exportfs -v $opts ${OCF_RESKEY_clientspec}:$dir" exit $OCF_ERR_GENERIC fi ocf_log info "directory $dir exported" return $OCF_SUCCESS } exportfs_start () { if exportfs_monitor; then ocf_log debug "already exported" return $OCF_SUCCESS fi ocf_log info "Exporting file system(s) ..." forall export_one # Restore the rmtab to ensure smooth NFS-over-TCP failover if [ ${OCF_RESKEY_rmtab_backup} != "none" ]; then forall restore_rmtab fi } unlock_fs() { local dir=$1 local unlockfile unlockfile=/proc/fs/nfsd/unlock_filesystem if [ -w ${unlockfile} ]; then echo "$dir" > ${unlockfile} ocf_log info "Unlocked NFS export $dir" else ocf_log warn "Unable to unlock NFS export $dir, ${unlockfile} not found or not writable" fi } wait_for_leasetime() { local leasetimefile local sleeptime leasetimefile=/proc/fs/nfsd/nfsv4leasetime if [ -r ${leasetimefile} ]; then sleeptime=$((`cat ${leasetimefile}`+2)) ocf_log info "Sleeping ${sleeptime} seconds to accommodate for NFSv4 lease expiry" sleep ${sleeptime}s else ocf_log warn "Unable to read NFSv4 lease time from ${leasetimefile}, file not found or not readable" fi } cleanup_export_cache() { # see if the cache is blocking unexport local contentfile=/proc/net/rpc/nfsd.export/content local fsid_re local i=1 fsid_re="fsid=(echo `forall get_fsid`|sed 's/ /|/g')," while :; do grep -E -q "$fsid_re" $contentfile || break ocf_log info "Cleanup export cache ... (try $i)" ocf_run exportfs -f sleep 0.5 - let i=$i+1 + i=$((i + 1)) done } unexport_one() { local dir=$1 ocf_run exportfs -v -u ${OCF_RESKEY_clientspec}:$dir } exportfs_stop () { local rc exportfs_monitor if [ $? -eq $OCF_NOT_RUNNING ]; then ocf_log debug "not exported" return $OCF_SUCCESS fi ocf_log info "Un-exporting file system ..." # Backup the rmtab to ensure smooth NFS-over-TCP failover if [ ${OCF_RESKEY_rmtab_backup} != "none" ]; then forall backup_rmtab fi forall unexport_one rc=$? if ocf_is_true ${OCF_RESKEY_unlock_on_stop}; then forall unlock_fs fi if ocf_is_true ${OCF_RESKEY_wait_for_leasetime_on_stop}; then wait_for_leasetime fi if [ $rc -eq 0 ]; then cleanup_export_cache ocf_log info "Un-exported file system(s)" return $OCF_SUCCESS else ocf_exit_reason "Failed to un-export file system(s)" return $OCF_ERR_GENERIC fi } testdir() { if [ ! -d $1 ]; then ocf_is_probe || ocf_log err "$1 does not exist or is not a directory" return 1 fi return 0 } exportfs_validate_all () { if [ `echo "$OCF_RESKEY_directory" | wc -w` -gt 1 ] && ! ocf_is_decimal "$OCF_RESKEY_fsid"; then ocf_exit_reason "use integer fsid when exporting multiple directories" return $OCF_ERR_CONFIGURED fi if ! forall testdir; then return $OCF_ERR_INSTALLED fi } # If someone puts a trailing slash at the end of the export directory, # this agent is going to fail in some unexpected ways due to how # export strings are matched. The simplest solution here is to strip off # a trailing '/' in the directory before processing anything. newdir=$(echo "$OCF_RESKEY_directory" | sed -n -e 's/^\(.*\)\/$/\1/p') if [ -n "$newdir" ]; then OCF_RESKEY_directory=$newdir fi OCF_REQUIRED_PARAMS="directory fsid clientspec" OCF_REQUIRED_BINARIES="exportfs" ocf_rarun $* diff --git a/heartbeat/nfsserver b/heartbeat/nfsserver index de1a802b8..a63ff4fff 100755 --- a/heartbeat/nfsserver +++ b/heartbeat/nfsserver @@ -1,786 +1,786 @@ #!/bin/sh # nfsserver # # Description: Manages nfs server as OCF resource # by hxinwei@gmail.com # License: GNU General Public License v2 (GPLv2) and later if [ -n "$OCF_DEBUG_LIBRARY" ]; then . $OCF_DEBUG_LIBRARY else : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs fi DEFAULT_INIT_SCRIPT="/etc/init.d/nfsserver" if ! [ -f $DEFAULT_INIT_SCRIPT ]; then # On some systems, the script is just called nfs DEFAULT_INIT_SCRIPT="/etc/init.d/nfs" fi DEFAULT_NOTIFY_CMD=`which sm-notify` DEFAULT_NOTIFY_CMD=${DEFAULT_NOTIFY_CMD:-"/sbin/sm-notify"} DEFAULT_NOTIFY_FOREGROUND="false" DEFAULT_RPCPIPEFS_DIR="/var/lib/nfs/rpc_pipefs" EXEC_MODE=0 SELINUX_ENABLED=-1 STATD_PATH="/var/lib/nfs" STATD_DIR="" NFS_SYSCONFIG="/etc/sysconfig/nfs" NFS_SYSCONFIG_LOCAL_BACKUP="/etc/sysconfig/nfs.ha.bu" NFS_SYSCONFIG_AUTOGEN_TAG="AUTOGENERATED by $0 high availability resource-agent" nfsserver_meta_data() { cat < 1.0 Nfsserver helps to manage the Linux nfs server as a failover-able resource in Linux-HA. It depends on Linux specific NFS implementation details, so is considered not portable to other platforms yet. Manages an NFS server The default init script shipped with the Linux distro. The nfsserver resource agent offloads the start/stop/monitor work to the init script because the procedure to start/stop/monitor nfsserver varies on different Linux distro. In the event that this option is not set, this agent will attempt to use an init script at this location, ${DEFAULT_INIT_SCRIPT}, or detect a systemd unit-file to use in the event that no init script is detected. Init script for nfsserver Do not send reboot notifications to NFSv3 clients during server startup. Disable NFSv3 server reboot notifications Keeps the sm-notify attached to its controlling terminal and running in the foreground. Keeps the notify tool running in the foreground. Specifies the length of sm-notify retry time, in minutes, to continue retrying notifications to unresponsive hosts. If this option is not specified, sm-notify attempts to send notifications for 15 minutes. Specifying a value of 0 causes sm-notify to continue sending notifications to unresponsive peers until it is manually killed. Specifies the length of sm-notify retry time (minutes). Comma separated list of floating IP addresses used to access the nfs service IP addresses. Specifies what arguments to pass to the nfs daemon on startup. View the rpc.nfsd man page for information on what arguments are available. Note that setting this value will override all settings placed in the local /etc/sysconfig/nfs file. rpc.nfsd options The udp port lockd should listen on. Note that setting this value will override all settings placed in the local /etc/sysconfig/nfs file. lockd udp port The tcp port lockd should listen on. Note that setting this value will override all settings placed in the local /etc/sysconfig/nfs file. lockd tcp port The source port number sm-notify uses when sending reboot notifications. Note that setting this value will override all settings placed in the local /etc/sysconfig/nfs file. sm-notify source port The port number used for RPC listener sockets. Note that setting this value will override all settings placed in the local /etc/sysconfig/nfs file. rpc.statd listener port The port number used for rpc.mountd listener sockets. Note that setting this value will override all settings placed in the local /etc/sysconfig/nfs file. rpc.mountd listener port The port number used for rpc.rquotad. Note that setting this value will override all settings placed in the local /etc/sysconfig/nfs file. rpc.rquotad port The nfsserver resource agent will save nfs related information in this specific directory. And this directory must be able to fail-over before nfsserver itself. Directory to store nfs server related information. The mount point for the sunrpc file system. Default is $DEFAULT_RPCPIPEFS_DIR. This script will mount (bind) nfs_shared_infodir on /var/lib/nfs/ (cannot be changed), and this script will mount the sunrpc file system on $DEFAULT_RPCPIPEFS_DIR (default, can be changed by this parameter). If you want to move only rpc_pipefs/ (e.g. to keep rpc_pipefs/ local) from default, please set this value. The mount point for the sunrpc file system. END return $OCF_SUCCESS } nfsserver_usage() { cat < /dev/null 2>&1 && selinuxenabled SELINUX_ENABLED=$? if [ $SELINUX_ENABLED -eq 0 ]; then export SELINUX_LABEL="$(ls -ldZ $STATD_PATH | cut -f4 -d' ')" fi ## # EXEC_MODE values # 1 user init script or default init script # 2 systemd # # On error, this function will terminate the process # with error code $OCF_ERR_INSTALLED ## set_exec_mode() { ## # If EXEC_MODE is already set, we don't need to run this function again. ## if [ $EXEC_MODE -ne 0 ]; then return 0; fi ## # If the user defined an init script, It must exist for us to continue ## if [ -n "$OCF_RESKEY_nfs_init_script" ]; then # check_binary will exit the process if init script does not exist check_binary ${OCF_RESKEY_nfs_init_script} EXEC_MODE=1 return 0 fi ## # Check to see if the default init script exists, if so we'll use that. ## if which $DEFAULT_INIT_SCRIPT > /dev/null 2>&1; then OCF_RESKEY_nfs_init_script=$DEFAULT_INIT_SCRIPT EXEC_MODE=1 return 0 fi ## # Last of all, attempt systemd. ## if which systemctl > /dev/null 2>&1; then if systemctl list-unit-files | grep nfs-server > /dev/null && systemctl list-unit-files | grep nfs-lock > /dev/null; then EXEC_MODE=2 # when using systemd, the nfs-lock service file handles nfsv3 locking daemons for us. return 0 fi fi ocf_exit_reason "No init script or systemd unit file detected for nfs server" exit $OCF_ERR_INSTALLED } ## # wrapper for init script and systemd calls. ## nfs_exec() { local cmd=$1 set_exec_mode case $EXEC_MODE in 1) ${OCF_RESKEY_nfs_init_script} $cmd;; 2) systemctl $cmd nfs-server.service ;; esac } v3locking_exec() { local cmd=$1 set_exec_mode if [ $EXEC_MODE -eq 2 ]; then systemctl $cmd nfs-lock.service else case $cmd in start) locking_start;; stop) locking_stop;; status) locking_status;; esac fi } nfsserver_monitor () { fn=`mktemp` nfs_exec status > $fn 2>&1 rc=$? ocf_log debug "$(cat $fn)" rm -f $fn #Adapte LSB status code to OCF return code if [ $rc -eq 0 ]; then # don't report success if nfs servers are up # without locking daemons. v3locking_exec "status" rc=$? if [ $rc -ne 0 ]; then ocf_exit_reason "NFS server is up, but the locking daemons are down" rc=$OCF_ERR_GENERIC fi return $rc elif [ $rc -eq 3 ]; then return $OCF_NOT_RUNNING else return $OCF_ERR_GENERIC fi } set_arg() { local key="$1" local value="$2" local file="$3" local requires_sysconfig="$4" if [ -z "$value" ]; then return fi # only write to the tmp /etc/sysconfig/nfs if sysconfig exists. # otherwise this distro does not support setting these options. if [ -d "/etc/sysconfig" ]; then echo "${key}=\"${value}\"" >> $file elif [ "$requires_sysconfig" = "true" ]; then ocf_log warn "/etc/sysconfig/nfs not found, unable to set port and nfsd args." fi export ${key}="${value}" } set_env_args() { local tmpconfig=$(mktemp ${HA_RSCTMP}/nfsserver-tmp-XXXXX) local statd_args # nfsd args set_arg "RPCNFSDARGS" "$OCF_RESKEY_nfsd_args" "$tmpconfig" "true" # mountd args if [ -n "$OCF_RESKEY_mountd_port" ]; then set_arg "RPCMOUNTDOPTS" "-p $OCF_RESKEY_mountd_port" "$tmpconfig" "true" fi # statd args. we always want to perform the notify using sm-notify after # both rpc.statd and the nfsd daemons are initialized statd_args="--no-notify" if [ -n "$OCF_RESKEY_statd_outgoing_port" ]; then statd_args="$statd_args -o $OCF_RESKEY_statd_outgoing_port" fi if [ -n "$OCF_RESKEY_statd_port" ]; then statd_args="$statd_args -p $OCF_RESKEY_statd_port" fi set_arg "STATDARG" "$statd_args" "$tmpconfig" "false" # lockd ports set_arg "LOCKD_UDPPORT" "$OCF_RESKEY_lockd_udp_port" "$tmpconfig" "true" set_arg "LOCKD_TCPPORT" "$OCF_RESKEY_lockd_tcp_port" "$tmpconfig" "true" # rquotad_port if [ -n "$OCF_RESKEY_rquotad_port" ]; then set_arg "RPCRQUOTADOPTS" "-p $OCF_RESKEY_rquotad_port" "$tmpconfig" "true" fi # override local nfs config. preserve previous local config though. if [ -s $tmpconfig ]; then cat $NFS_SYSCONFIG | grep -e "$NFS_SYSCONFIG_AUTOGEN_TAG" if [ $? -ne 0 ]; then # backup local nfs config if it doesn't have our HA autogen tag in it. mv -f $NFS_SYSCONFIG $NFS_SYSCONFIG_LOCAL_BACKUP fi echo "# $NFS_SYSCONFIG_AUTOGEN_TAG" > $NFS_SYSCONFIG echo "# local config backup stored here, '$NFS_SYSCONFIG_LOCAL_BACKUP'" >> $NFS_SYSCONFIG cat $tmpconfig >> $NFS_SYSCONFIG fi rm -f $tmpconfig } prepare_directory () { if [ -z "$fp" ]; then return fi [ -d "$fp" ] || mkdir -p $fp [ -d "$rpcpipefs_make_dir" ] || mkdir -p $rpcpipefs_make_dir [ -d "$fp/v4recovery" ] || mkdir -p $fp/v4recovery [ -d "$fp/$STATD_DIR" ] || mkdir -p "$fp/$STATD_DIR" [ -d "$fp/$STATD_DIR/sm" ] || mkdir -p "$fp/$STATD_DIR/sm" [ -d "$fp/$STATD_DIR/sm.ha" ] || mkdir -p "$fp/$STATD_DIR/sm.ha" [ -d "$fp/$STATD_DIR/sm.bak" ] || mkdir -p "$fp/$STATD_DIR/sm.bak" [ -n "`id -u rpcuser`" -a "`id -g rpcuser`" ] && chown -R rpcuser.rpcuser "$fp/$STATD_DIR" [ -f "$fp/etab" ] || touch "$fp/etab" [ -f "$fp/xtab" ] || touch "$fp/xtab" [ -f "$fp/rmtab" ] || touch "$fp/rmtab" - dd if=/dev/urandom of=$fp/$STATD_DIR/state bs=1 count=4 &> /dev/null + dd if=/dev/urandom of=$fp/$STATD_DIR/state bs=1 count=4 >/dev/null 2>&1 [ -n "`id -u rpcuser`" -a "`id -g rpcuser`" ] && chown rpcuser.rpcuser "$fp/$STATD_DIR/state" [ $SELINUX_ENABLED -eq 0 ] && chcon -R "$SELINUX_LABEL" "$fp" } is_bound () { if mount | grep -q "on $1 type"; then return 0 fi return 1 } bind_tree () { if [ -z "$fp" ]; then return fi if is_bound /var/lib/nfs; then ocf_log debug "$fp is already bound to /var/lib/nfs" return 0 fi mount --bind $fp /var/lib/nfs [ $SELINUX_ENABLED -eq 0 ] && restorecon /var/lib/nfs } unbind_tree () { if `mount | grep -q " on $rpcpipefs_umount_dir"`; then umount -t rpc_pipefs $rpcpipefs_umount_dir fi if is_bound /var/lib/nfs; then umount /var/lib/nfs fi } binary_status() { local binary=$1 local pid pid=$(pgrep ${binary}) case $? in 0) echo "$pid" return $OCF_SUCCESS;; 1) return $OCF_NOT_RUNNING;; *) return $OCF_ERR_GENERIC;; esac } locking_status() { binary_status "rpc.statd" > /dev/null 2>&1 } locking_start() { local ret=$OCF_SUCCESS ocf_log info "Starting rpc.statd." rpc.statd $STATDARG ret=$? if [ $ret -ne 0 ]; then ocf_log err "Failed to start rpc.statd" return $ret fi touch /var/lock/subsys/nfslock return $ret } terminate() { - declare pids - declare i=0 + local pids + local i=0 while : ; do pids=$(binary_status $1) [ -z "$pids" ] && return 0 kill $pids sleep 1 - ((i++)) + i=$((i + 1)) [ $i -gt 3 ] && return 1 done } killkill() { - declare pids - declare i=0 + local pids + local i=0 while : ; do pids=$(binary_status $1) [ -z "$pids" ] && return 0 kill -9 $pids sleep 1 - ((i++)) + i=$((i + 1)) [ $i -gt 3 ] && return 1 done } stop_process() { - declare process=$1 + local process=$1 ocf_log info "Stopping $process" if terminate $process; then ocf_log debug "$process is stopped" else if killkill $process; then ocf_log debug "$process is stopped" else ocf_log debug "Failed to stop $process" return 1 fi fi return 0 } locking_stop() { ret=0 # sm-notify can prevent umount of /var/lib/nfs/statd if # it is still trying to notify unresponsive clients. stop_process sm-notify if [ $? -ne 0 ]; then ret=$OCF_ERR_GENERIC fi stop_process rpc.statd if [ $? -ne 0 ]; then ret=$OCF_ERR_GENERIC fi return $ret } notify_locks() { if ocf_is_true "$OCF_RESKEY_nfs_no_notify"; then # we've been asked not to notify clients return; fi # run in foreground, if requested if ocf_is_true "$OCF_RESKEY_nfs_notify_foreground"; then opts="-d" fi if [ -n "$OCF_RESKEY_nfs_smnotify_retry_time" ]; then opts="$opts -m $OCF_RESKEY_nfs_smnotify_retry_time" fi if [ -n "$OCF_RESKEY_statd_outgoing_port" ]; then opts="$opts -p $OCF_RESKEY_statd_outgoing_port" fi # forces re-notificaiton regardless if notifies have already gone out opts="$opts -f" ocf_log info "executing sm-notify" if [ -n "$OCF_RESKEY_nfs_ip" ]; then for ip in `echo ${OCF_RESKEY_nfs_ip} | sed 's/,/ /g'`; do cp -rpfn $STATD_PATH/sm.ha/* $STATD_PATH/ > /dev/null 2>&1 sm-notify $opts -v $ip done else sm-notify $opts fi } nfsserver_start () { local rc; if nfsserver_monitor; then ocf_log debug "NFS server is already started" return $OCF_SUCCESS fi set_env_args prepare_directory bind_tree # remove the sm-notify pid so sm-notify will be allowed to run again without requiring a reboot. rm -f /var/run/sm-notify.pid # # Synchronize these before starting statd # cp -rpfn $STATD_PATH/sm.ha/* $STATD_PATH/ > /dev/null 2>&1 rm -rf $STATD_PATH/sm.ha/* > /dev/null 2>&1 cp -rpf $STATD_PATH/sm $STATD_PATH/sm.bak /var/lib/nfs/state $STATD_PATH/sm.ha > /dev/null 2>&1 ocf_log info "Starting NFS server ..." # mounts /proc/fs/nfsd for us lsmod | grep -q nfsd if [ $? -ne 0 ]; then modprobe nfsd fi # check to see if we need to start rpc.statd v3locking_exec "status" if [ $? -ne $OCF_SUCCESS ]; then v3locking_exec "start" rc=$? if [ $rc -ne 0 ]; then ocf_exit_reason "Failed to start NFS server locking daemons" return $rc fi else ocf_log info "rpc.statd already up" fi fn=`mktemp` nfs_exec start > $fn 2>&1 rc=$? ocf_log debug "$(cat $fn)" rm -f $fn if [ $rc -ne 0 ]; then ocf_exit_reason "Failed to start NFS server" return $rc fi notify_locks ocf_log info "NFS server started" return $OCF_SUCCESS } nfsserver_stop () { ocf_log info "Stopping NFS server ..." # backup the current sm state information to the ha folder before stopping. # the ha folder will be synced after startup, restoring the statd client state rm -rf $STATD_PATH/sm.ha/* > /dev/null 2>&1 cp -rpf $STATD_PATH/sm $STATD_PATH/sm.bak /var/lib/nfs/state $STATD_PATH/sm.ha > /dev/null 2>&1 fn=`mktemp` nfs_exec stop > $fn 2>&1 rc=$? ocf_log debug "$(cat $fn)" rm -f $fn v3locking_exec "stop" if [ $? -ne 0 ]; then ocf_exit_reason "Failed to stop NFS locking daemons" rc=$OCF_ERR_GENERIC fi if [ $rc -eq 0 ]; then unbind_tree ocf_log info "NFS server stopped" else ocf_exit_reason "Failed to stop NFS server" fi return $rc } nfsserver_validate () { ## # set_exec_mode will exit if nfs server is not installed ## set_exec_mode check_binary ${OCF_RESKEY_nfs_notify_cmd} if [ -n "$OCF_RESKEY_CRM_meta_clone" ] && [ -n "$OCF_RESKEY_nfs_shared_infodir" ]; then ocf_exit_reason "This RA does not support clone mode when a shared info directory is in use." exit $OCF_ERR_CONFIGURED fi if [ -n "$OCF_RESKEY_nfs_smnotify_retry_time" ]; then if ! ocf_is_decimal "$OCF_RESKEY_nfs_smnotify_retry_time"; then ocf_exit_reason "Invalid nfs_smnotify_retry_time [$OCF_RESKEY_nfs_smnotify_retry_time]" exit $OCF_ERR_CONFIGURED fi fi case ${OCF_RESKEY_nfs_notify_cmd##*/} in sm-notify|rpc.statd) ;; *) ocf_exit_reason "Invalid nfs_notify_cmd [$OCF_RESKEY_nfs_notify_cmd]" exit $OCF_ERR_CONFIGURED ;; esac return $OCF_SUCCESS } nfsserver_validate case $__OCF_ACTION in start) nfsserver_start ;; stop) nfsserver_stop ;; monitor) nfsserver_monitor ;; validate-all) exit $OCF_SUCCESS ;; *) nfsserver_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac