diff --git a/heartbeat/lxc.in b/heartbeat/lxc.in index 72a377e24..7a1c62253 100644 --- a/heartbeat/lxc.in +++ b/heartbeat/lxc.in @@ -1,374 +1,354 @@ #!@BASH_SHELL@ # Should now conform to guidelines: # https://github.com/ClusterLabs/resource-agents/blob/master/doc/dev-guides/ra-dev-guide.asc # # LXC (Linux Containers) OCF RA. # Used to cluster enable the start, stop and monitoring of a LXC container. # # Copyright (c) 2011 AkurIT.com.au, Darren Thompson # All Rights Reserved. # # Without limiting the rights of the original copyright holders # This resource is licensed under GPL version 2 # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # OCF instance parameters # OCF_RESKEY_container # OCF_RESKEY_config # OCF_RESKEY_log # OCF_RESKEY_use_screen # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults OCF_RESKEY_log_default="${HA_RSCTMP}/${OCF_RESOURCE_INSTANCE}.log" OCF_RESKEY_use_screen_default="false" : ${OCF_RESKEY_log=${OCF_RESKEY_log_default}} : ${OCF_RESKEY_use_screen=${OCF_RESKEY_use_screen_default}} - # Set default TRANS_RES_STATE (temporary file to "flag" if resource was stated but not stopped) TRANS_RES_STATE="${HA_RSCTMP}/${OCF_RESOURCE_INSTANCE}.state" meta_data() { cat < 0.1 Allows LXC containers to be managed by the cluster. -If the container is running "init" it will also perform an orderly shutdown. +Notes for lxc Versions before 1.0.0, where the Container is stopped using kill -PWR instead of lxc-stop: It is 'assumed' that the 'init' system will do an orderly shudown if presented with a 'kill -PWR' signal. On a 'sysvinit' this would require the container to have an inittab file containing "p0::powerfail:/sbin/init 0" -I have absolutely no idea how this is done with 'upstart' or 'systemd', YMMV if your container is using one of them. + Manages LXC containers The unique name for this 'Container Instance' e.g. 'test1'. Container Name Absolute path to the file holding the specific configuration for this container e.g. '/etc/lxc/test1/config'. The LXC config file. Absolute path to the container log file Container log file Provides the option of capturing the 'root console' from the container and showing it on a separate screen. To see the screen output run 'screen -r {container name}' The default value is set to 'false', change to 'true' to activate this option Use 'screen' for container 'root console' output - END } LXC_usage() { cat <${CGROUP_MOUNT_POINT}/notify_on_release return 0 } LXC_start() { # put this here as it's so long it gets messy later!!! if ocf_is_true $OCF_RESKEY_use_screen; then STARTCMD="screen -dmS ${OCF_RESKEY_container} lxc-start -f ${OCF_RESKEY_config} -n ${OCF_RESKEY_container} -o ${OCF_RESKEY_log}" else STARTCMD="lxc-start -f ${OCF_RESKEY_config} -n ${OCF_RESKEY_container} -o ${OCF_RESKEY_log} -d" fi LXC_status if [ $? -eq $OCF_SUCCESS ]; then ocf_log debug "Resource $OCF_RESOURCE_INSTANCE is already running" ocf_run touch "${TRANS_RES_STATE}" || exit $OCF_ERR_GENERIC return $OCF_SUCCESS fi cgroup_mounted if [ $? -ne 0 ]; then ocf_log err "Unable to find cgroup mount" exit $OCF_ERR_GENERIC fi ocf_log info "Starting" ${OCF_RESKEY_container} ocf_run ${STARTCMD} || exit $OCF_ERR_GENERIC # Spin on status, wait for the cluster manager to time us out if # we fail while ! LXC_status; do ocf_log info "Container ${OCF_RESKEY_container} has not started, waiting" sleep 1 done ocf_run touch "${TRANS_RES_STATE}" || exit $OCF_ERR_GENERIC return $OCF_SUCCESS } LXC_stop() { - local shutdown_timeout - local now + LXC_status if [ $? -eq $OCF_NOT_RUNNING ]; then ocf_log debug "Resource $OCF_RESOURCE_INSTANCE is already stopped" ocf_run rm -f $TRANS_RES_STATE return $OCF_SUCCESS fi cgroup_mounted if [ $? -ne 0 ]; then ocf_log err "Unable to find cgroup mount" exit $OCF_ERR_GENERIC fi - # If the container is running "init" and is able to perform and orderly shutdown, then it should be done. - # It is 'assumed' that the 'init' system will do an orderly shudown if presented with a 'kill -PWR' signal. - # On a 'sysvinit' this would require the container to have an inittab file containing "p0::powerfail:/sbin/init 0" - declare -i PID=0 - declare CMD= + if ! ocf_version_cmp "`lxc_version`" 1.0.0 ; then + # Use lxc-stop if we are newer than 1.0.0 + timeout=$(( ($OCF_RESKEY_CRM_meta_timeout/1000) -5 )) + ocf_log info "Stopping Container ${OCF_RESKEY_container} using lxc-stop" + # lxc-stop will return failure even if it reached the timeout and sucessfully hard-stopped the + # Container so we check below if the Container is really stopped instead of using || exit $OCF_ERR_GENERIC + ocf_run lxc-stop -n "${OCF_RESKEY_container}" -t ${timeout} + + LXC_status + if [ $? -eq $OCF_SUCCESS ]; then + # Try to manually hard-stop if the Container is still running + ocf_run lxc-stop -n "${OCF_RESKEY_container}" -k || exit $OCF_ERR_GENERIC + fi + + else + # Use kill -PWR + # If the container is running "init" and is able to perform and orderly shutdown, then it should be done. + # It is 'assumed' that the 'init' system will do an orderly shudown if presented with a 'kill -PWR' signal. + # On a 'sysvinit' this would require the container to have an inittab file containing "p0::powerfail:/sbin/init 0" + local shutdown_timeout + local now + declare -i PID=0 + declare CMD= - # LXC prior 1.0.0 - if ocf_version_cmp "`lxc_version`" 1.0.0 ; then # This should work for traditional 'sysvinit' and 'upstart' lxc-ps --name "${OCF_RESKEY_container}" -- -C init -o pid,comm |while read CN PID CMD ;do [ $PID -gt 1 ] || continue [ "$CMD" = "init" ] || continue ocf_log info "Sending \"OS shut down\" instruction to" ${OCF_RESKEY_container} "as it was found to be using \"sysV init\" or \"upstart\"" kill -PWR $PID done # This should work for containers using 'systemd' instead of 'init' lxc-ps --name "${OCF_RESKEY_container}" -- -C systemd -o pid,comm |while read CN PID CMD ;do [ $PID -gt 1 ] || continue [ "$CMD" = "systemd" ] || continue ocf_log info "Sending \"OS shut down\" instruction to" ${OCF_RESKEY_container} "as it was found to be using \"systemd\"" kill -PWR $PID done - else - PID=$(lxc-info --name "${OCF_RESKEY_container}" -p -H) - - # If there is no PID the container seems to be down which - # shouldn't happen. - if [ $PID -eq 0 ]; then - ocf_log err "${OCF_RESKEY_container} seems to run, but has no PID." - exit $OCF_ERR_GENERIC - fi - - # Rescue me. - if [ $PID -eq 1 ]; then - ocf_log err "${OCF_RESKEY_container} seems to run with PID 1 which cannot be." - PID=0 - CMD= - else - CMD=$(ps -o comm= -p $PID) - fi - - # This should work for traditional 'sysvinit' and 'upstart' - if [ "$CMD" = "init" ]; then - ocf_log info "Sending \"OS shut down\" instruction to" ${OCF_RESKEY_container} "as it was found to be using \"sysV init\" or \"upstart\"" - kill -PWR $PID - fi - - # This should work for containers using 'systemd' instead of 'init' - if [ "$CMD" = "systemd" ]; then - ocf_log info "Sending \"OS shut down\" instruction to" ${OCF_RESKEY_container} "as it was found to be using \"systemd\"" - kill -PWR $PID - fi - fi - # The "shutdown_timeout" we use here is the operation - # timeout specified in the CIB, minus 5 seconds - now=$(date +%s) - shutdown_timeout=$(( $now + ($OCF_RESKEY_CRM_meta_timeout/1000) -5 )) - # Loop on status until we reach $shutdown_timeout - while [ $now -lt $shutdown_timeout ]; do - LXC_status - status=$? - case $status in - "$OCF_NOT_RUNNING") - ocf_run rm -f $TRANS_RES_STATE - return $OCF_SUCCESS - ;; - "$OCF_SUCCESS") - # Container is still running, keep waiting (until - # shutdown_timeout expires) - sleep 1 - ;; - *) - # Something went wrong. Bail out and - # resort to forced stop (destroy). - break; - esac - now=$(date +%s) - done + + # The "shutdown_timeout" we use here is the operation + # timeout specified in the CIB, minus 5 seconds + now=$(date +%s) + shutdown_timeout=$(( $now + ($OCF_RESKEY_CRM_meta_timeout/1000) -5 )) + # Loop on status until we reach $shutdown_timeout + while [ $now -lt $shutdown_timeout ]; do + LXC_status + status=$? + case $status in + "$OCF_NOT_RUNNING") + ocf_run rm -f $TRANS_RES_STATE + return $OCF_SUCCESS + ;; + "$OCF_SUCCESS") + # Container is still running, keep waiting (until + # shutdown_timeout expires) + sleep 1 + ;; + *) + # Something went wrong. Bail out and + # resort to forced stop (destroy). + break; + esac + now=$(date +%s) + done - # If the container is still running, it will be stopped now. regardless of state! - # LXC prior 1.0.0 - if ocf_version_cmp "`lxc_version`" 1.0.0 ; then + # If the container is still running, it will be stopped now. regardless of state! ocf_run lxc-stop -n ${OCF_RESKEY_container} || exit $OCF_ERR_GENERIC - else - ocf_run lxc-stop -n ${OCF_RESKEY_container} -k || exit $OCF_ERR_GENERIC fi + ocf_log info "Container" ${OCF_RESKEY_container} "stopped" ocf_run rm -f $TRANS_RES_STATE return $OCF_SUCCESS } LXC_status() { # run lxc-info with -s option for LXC-0.7.5 or later local lxc_info_opt="-s" ocf_version_cmp "`lxc_version`" 0.7.5 && lxc_info_opt="" S=`lxc-info $lxc_info_opt -n ${OCF_RESKEY_container}` ocf_log debug "State of ${OCF_RESKEY_container}: $S" if [[ "${S##* }" = "RUNNING" ]] ; then return $OCF_SUCCESS fi return $OCF_NOT_RUNNING } LXC_monitor() { LXC_status && return $OCF_SUCCESS if [ -f $TRANS_RES_STATE ]; then ocf_log err "${OCF_RESKEY_container} is not running, but state file ${TRANS_RES_STATE} exists." exit $OCF_ERR_GENERIC fi return $OCF_NOT_RUNNING } LXC_validate() { # Quick check that all required attributes are set if [ -z "${OCF_RESKEY_container}" ]; then ocf_log err "LXC container name not set!" exit $OCF_ERR_CONFIGURED fi if [ -z "${OCF_RESKEY_config}" ]; then ocf_log err "LXC configuration filename name not set!" exit $OCF_ERR_CONFIGURED fi # Tests that apply only to non-probes if ! ocf_is_probe; then if ! [ -f "${OCF_RESKEY_config}" ]; then ocf_log err "LXC configuration file \"${OCF_RESKEY_config}\" missing or not found!" exit $OCF_ERR_INSTALLED fi if ocf_is_true $OCF_RESKEY_use_screen; then check_binary screen fi check_binary lxc-start check_binary lxc-stop if ocf_version_cmp "`lxc_version`" 1.0.0 ; then check_binary lxc-ps fi check_binary lxc-info fi return $OCF_SUCCESS } if [ $# -ne 1 ]; then LXC_usage exit $OCF_ERR_ARGS fi case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; usage|help) LXC_usage exit $OCF_SUCCESS ;; esac # Everything except usage and meta-data must pass the validate test LXC_validate case $__OCF_ACTION in start) LXC_start;; stop) LXC_stop;; status) LXC_status;; monitor) LXC_monitor;; validate-all) ;; *) LXC_usage ocf_log err "$0 was called with unsupported arguments: $*" exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc