diff --git a/heartbeat/lxc b/heartbeat/lxc index 43e505ab9..53a80df16 100755 --- a/heartbeat/lxc +++ b/heartbeat/lxc @@ -1,323 +1,323 @@ #!/bin/bash # Should now conform to guidelines: http://www.linux-ha.org/doc/dev-guides/ra-dev-guide.html # # LXC (Linux Containers) OCF RA. # Used to cluster enable the start, stop and monitoring of a LXC container. # # Copyright (c) 2011 AkurIT.com.au, Darren Thompson # All Rights Reserved. # # Without limiting the rights of the original copyright holders # This resource is licensed under GPL version 2 # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # OCF instance parameters # OCF_RESKEY_container # OCF_RESKEY_config # OCF_RESKEY_log # OCF_RESKEY_use_screen # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults OCF_RESKEY_log_default="${HA_RSCTMP}/${OCF_RESOURCE_INSTANCE}.log" OCF_RESKEY_use_screen_default="false" : ${OCF_RESKEY_log=${OCF_RESKEY_log_default}} : ${OCF_RESKEY_use_screen=${OCF_RESKEY_use_screen_default}} # Set default TRANS_RES_STATE (temporary file to "flag" if resource was stated but not stopped) TRANS_RES_STATE="${HA_RSCTMP}/${OCF_RESOURCE_INSTANCE}.state" meta_data() { cat < 0.1 Allows LXC containers to be managed by the cluster. If the container is running "init" it will also perform an orderly shutdown. It is 'assumed' that the 'init' system will do an orderly shudown if presented with a 'kill -PWR' signal. On a 'sysvinit' this would require the container to have an inittab file containing "p0::powerfail:/sbin/init 0" I have absolutly no idea how this is done with 'upstart' or 'systemd', YMMV if your container is using one of them. Manages LXC containers The unique name for this 'Container Instance' e.g. 'test1'. Container Name Absolute path to the file holding the specific configuration for this container e.g. '/etc/lxc/test1/config'. The LXC config file. Absolute path to the container log file Container log file Provides the option of capturing the 'root console' from the container and showing it on a separate screen. To see the screen output run 'screen -r {container name}' The default value is set to 'false', change to 'true' to activate this option Use 'screen' for container 'root console' output END } LXC_usage() { cat <${CGROUP_MOUNT_POINT}/notify_on_release return 0 } LXC_start() { # put this here as it's so long it gets messy later!!! if ocf_is_true $OCF_RESKEY_use_screen; then STARTCMD="screen -dmS ${OCF_RESKEY_container} lxc-start -f ${OCF_RESKEY_config} -n ${OCF_RESKEY_container} -o ${OCF_RESKEY_log}" else STARTCMD="lxc-start -f ${OCF_RESKEY_config} -n ${OCF_RESKEY_container} -o ${OCF_RESKEY_log} -d" fi LXC_status if [ $? -eq $OCF_SUCCESS ]; then ocf_log debug "Resource $OCF_RESOURCE_INSTANCE is already running" ocf_run touch "${TRANS_RES_STATE}" || exit $OCF_ERR_GENERIC return $OCF_SUCCESS fi cgroup_mounted if [ $? -ne 0 ]; then ocf_log err "Unable to find cgroup mount" exit $OCF_ERR_GENERIC fi ocf_log info "Starting" ${OCF_RESKEY_container} ocf_run ${STARTCMD} || exit $OCF_ERR_GENERIC # Spin on status, wait for the cluster manager to time us out if # we fail while ! LXC_status; do ocf_log info "Container ${OCF_RESKEY_container} has not started, waiting" sleep 1 done ocf_run touch "${TRANS_RES_STATE}" || exit $OCF_ERR_GENERIC return $OCF_SUCCESS } LXC_stop() { local shutdown_timeout local now LXC_status if [ $? -eq $OCF_NOT_RUNNING ]; then ocf_log debug "Resource $OCF_RESOURCE_INSTANCE is already stopped" ocf_run rm -f $TRANS_RES_STATE return $OCF_SUCCESS fi cgroup_mounted if [ $? -ne 0 ]; then ocf_log err "Unable to find cgroup mount" exit $OCF_ERR_GENERIC fi # If the container is running "init" and is able to perform and orderly shutdown, then it should be done. # It is 'assumed' that the 'init' system will do an orderly shudown if presented with a 'kill -PWR' signal. # On a 'sysvinit' this would require the container to have an inittab file containing "p0::powerfail:/sbin/init 0" typeset -i PID=0 # This should work for traditional 'sysvinit' and 'upstart' - lxc-ps -C init -opid |while read CN PID ;do + lxc-ps --name "${OCF_RESKEY_container}" -- -C init -o pid,comm |while read CN PID CMD ;do [ $PID -gt 1 ] || continue - [ "$CN" = "${OCF_RESKEY_container}" ] || continue + [ "$CMD" = "init" ] || continue ocf_log info "Sending \"OS shut down\" instruction to" ${OCF_RESKEY_container} "as it was found to be using \"sysV init\" or \"upstart\"" kill -PWR $PID done # This should work for containers using 'systemd' instead of 'init' - lxc-ps -C systemd -opid |while read CN PID ;do - [[ $PID -gt 1 ]] || continue - [[ "$CN" = "${OCF_RESKEY_container}" ]] || continue + lxc-ps --name "${OCF_RESKEY_container}" -- -C systemd -o pid,comm |while read CN PID CMD ;do + [ $PID -gt 1 ] || continue + [ "$CMD" = "systemd" ] || continue ocf_log info "Sending \"OS shut down\" instruction to" ${OCF_RESKEY_container} "as it was found to be using \"systemd\"" kill -PWR $PID done # The "shutdown_timeout" we use here is the operation # timeout specified in the CIB, minus 5 seconds now=$(date +%s) shutdown_timeout=$(( $now + ($OCF_RESKEY_CRM_meta_timeout/1000) -5 )) # Loop on status until we reach $shutdown_timeout while [ $now -lt $shutdown_timeout ]; do LXC_status status=$? case $status in "$OCF_NOT_RUNNING") ocf_run rm -f $TRANS_RES_STATE return $OCF_SUCCESS ;; "$OCF_SUCCESS") # Container is still running, keep waiting (until # shutdown_timeout expires) sleep 1 ;; *) # Something went wrong. Bail out and # resort to forced stop (destroy). break; esac now=$(date +%s) done # If the container is still running, it will be stopped now. regardless of state! ocf_run lxc-stop -n ${OCF_RESKEY_container} || exit $OCF_ERR_GENERIC ocf_log info "Container" ${OCF_RESKEY_container} "stopped" ocf_run rm -f $TRANS_RES_STATE return $OCF_SUCCESS } LXC_status() { # run lxc-info with -s option for LXC-0.7.5 or later local lxc_info_opt="-s" ocf_version_cmp "`lxc-version | cut -d' ' -f 3`" 0.7.5 && lxc_info_opt="" S=`lxc-info $lxc_info_opt -n ${OCF_RESKEY_container}` ocf_log debug "State of ${OCF_RESKEY_container}: $S" if [[ "${S##* }" = "RUNNING" ]] ; then return $OCF_SUCCESS fi return $OCF_NOT_RUNNING } LXC_monitor() { LXC_status && return $OCF_SUCCESS if [ -f $TRANS_RES_STATE ]; then ocf_log err "${OCF_RESKEY_container} is not running, but state file ${TRANS_RES_STATE} exists." exit $OCF_ERR_GENERIC fi return $OCF_NOT_RUNNING } LXC_validate() { # Quick check that all required attributes are set if [ -z "${OCF_RESKEY_container}" ]; then ocf_log err "LXC container name not set!" exit $OCF_ERR_CONFIGURED fi if [ -z "${OCF_RESKEY_config}" ]; then ocf_log err "LXC configuration filename name not set!" exit $OCF_ERR_CONFIGURED fi # Tests that apply only to non-probes if ! ocf_is_probe; then if ! [ -f "${OCF_RESKEY_config}" ]; then ocf_log err "LXC configuration file \"${OCF_RESKEY_config}\" missing or not found!" exit $OCF_ERR_INSTALLED fi if ocf_is_true $OCF_RESKEY_use_screen; then check_binary screen fi check_binary lxc-start check_binary lxc-stop check_binary lxc-ps check_binary lxc-info fi return $OCF_SUCCESS } if [ $# -ne 1 ]; then LXC_usage exit $OCF_ERR_ARGS fi case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; usage|help) LXC_usage exit $OCF_SUCCESS ;; esac # Everything except usage and meta-data must pass the validate test LXC_validate case $__OCF_ACTION in start) LXC_start;; stop) LXC_stop;; status) LXC_status;; monitor) LXC_monitor;; validate-all) ;; *) LXC_usage ocf_log err "$0 was called with unsupported arguments: $*" exit $OCF_ERR_UNIMPLEMENTED ;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" exit $rc