diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am index fd3ca4540..35b4b01ea 100644 --- a/heartbeat/Makefile.am +++ b/heartbeat/Makefile.am @@ -1,175 +1,176 @@ # Makefile.am for OCF RAs # # Author: Sun Jing Dong # Copyright (C) 2004 IBM # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in EXTRA_DIST = $(ocf_SCRIPTS) $(ocfcommon_DATA) \ $(common_DATA) $(hb_DATA) $(dtd_DATA) \ README AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/linux-ha halibdir = $(libexecdir)/heartbeat ocfdir = $(OCF_RA_DIR_PREFIX)/heartbeat dtddir = $(datadir)/$(PACKAGE_NAME) dtd_DATA = ra-api-1.dtd metadata.rng if USE_IPV6ADDR_AGENT ocf_PROGRAMS = IPv6addr else ocf_PROGRAMS = endif if IPV6ADDR_COMPATIBLE halib_PROGRAMS = send_ua else halib_PROGRAMS = endif IPv6addr_SOURCES = IPv6addr.c IPv6addr_utils.c send_ua_SOURCES = send_ua.c IPv6addr_utils.c IPv6addr_LDADD = -lplumb $(LIBNETLIBS) send_ua_LDADD = $(LIBNETLIBS) ocf_SCRIPTS = AoEtarget \ AudibleAlarm \ ClusterMon \ CTDB \ Delay \ Dummy \ EvmsSCC \ Evmsd \ Filesystem \ ICP \ IPaddr \ IPaddr2 \ IPsrcaddr \ LVM \ LinuxSCSI \ MailTo \ ManageRAID \ ManageVE \ NodeUtilization \ Pure-FTPd \ Raid1 \ Route \ SAPDatabase \ SAPInstance \ SendArp \ ServeRAID \ SphinxSearchDaemon \ Squid \ Stateful \ SysInfo \ VIPArip \ VirtualDomain \ WAS \ WAS6 \ WinPopup \ Xen \ Xinetd \ ZFS \ anything \ apache \ asterisk \ aws-vpc-route53 \ awseip \ awsvip \ clvm \ conntrackd \ db2 \ dhcpd \ dnsupdate \ docker \ eDir88 \ ethmonitor \ exportfs \ fio \ galera \ garbd \ iSCSILogicalUnit \ iSCSITarget \ ids \ iface-bridge \ iface-vlan \ iscsi \ jboss \ kamailio \ lxc \ minio \ mysql \ mysql-proxy \ nagios \ named \ nfsnotify \ nfsserver \ nginx \ oraasm \ oracle \ oralsnr \ pgagent \ pgsql \ pingd \ portblock \ postfix \ pound \ proftpd \ rabbitmq-cluster \ redis \ + rkt \ rsyncd \ rsyslog \ scsi2reservation \ sfex \ sg_persist \ slapd \ symlink \ syslog-ng \ tomcat \ varnish \ vmware \ vsftpd \ zabbixserver ocfcommondir = $(OCF_LIB_DIR_PREFIX)/heartbeat ocfcommon_DATA = ocf-shellfuncs \ ocf-binaries \ ocf-directories \ ocf-returncodes \ ocf-rarun \ ocf-distro \ apache-conf.sh \ http-mon.sh \ sapdb-nosha.sh \ sapdb.sh \ ora-common.sh \ mysql-common.sh \ nfsserver-redhat.sh \ findif.sh # Legacy locations hbdir = $(sysconfdir)/ha.d hb_DATA = shellfuncs check: $(ocf_SCRIPTS:=.check) %.check: % OCF_ROOT=$(abs_srcdir) OCF_FUNCTIONS_DIR=$(abs_srcdir) ./$< meta-data | xmllint --path $(abs_srcdir) --noout --relaxng $(abs_srcdir)/metadata.rng - diff --git a/heartbeat/rkt b/heartbeat/rkt new file mode 100755 index 000000000..666f8855e --- /dev/null +++ b/heartbeat/rkt @@ -0,0 +1,476 @@ +#!/bin/sh +# +# The rkt HA resource agent creates and launches a container based off +# a supplied image. Containers managed by this agent are both created +# and removed upon the agent's start and stop actions. +# +# Copyright (c) 2017 Valentin Vidic +# All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# + +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +####################################################################### + +meta_data() +{ + cat < + + +1.0 + + +The rkt HA resource agent creates and launches a container +based off a supplied image. Containers managed by this agent +are both created and removed upon the agent's start and stop actions. + +rkt container resource agent. + + + + +The image to base this container off of. + +image + + + + + +The name to give the created container. By default this will +be that resource's instance name. + +container name + + + + + +Allow the image to be pulled from the configured registry when +the image does not exist locally. NOTE, this can drastically increase +the time required to start the container if the image repository is +pulled over the network. + +Allow pulling non-local images + + + + + +Add options to be appended to the 'rkt run' command which is used +when creating the container during the start action. This option allows +users to do things such as setting a custom entry point and injecting +environment variables into the newly created container. + +NOTE: Do not explicitly specify the --name argument in the run_opts. This +agent will set --name using either the resource's instance or the name +provided in the 'name' argument of this agent. + + +run options + + + + + +Specify a command to launch within the container once +it has initialized. + +run command + + + + + +A comma separated list of directories that the container is expecting to use. +The agent will ensure they exist by running 'mkdir -p' + +Required mount points + + + + + +Specify the full path of a command to launch within the container to check +the health of the container. This command must return 0 to indicate that +the container is healthy. A non-zero return code will indicate that the +container has failed and should be recovered. + +Note: Using this method for monitoring processes inside a container +is not recommended, as rkt tries to track processes running +inside the container and does not deal well with many short-lived +processes being spawned. Ensure that your container monitors its +own processes and terminates on fatal error rather than invoking +a command from the outside. + +monitor command + + + + + +Kill a container immediately rather than waiting for it to gracefully +shutdown + +force kill + + + + + + + + + + + + + +END +} + +####################################################################### +REQUIRE_IMAGE_PULL=0 + +rkt_usage() +{ + cat <&1) + rc=$? + + if [ $rc -eq 127 ]; then + ocf_log err "monitor cmd failed (rc=$rc), output: $out" + ocf_exit_reason "monitor_cmd, ${OCF_RESKEY_monitor_cmd}, not found within container." + # there is no recovering from this, exit immediately + exit $OCF_ERR_ARGS + elif [ $rc -ne 0 ]; then + ocf_exit_reason "monitor cmd failed (rc=$rc), output: $out" + rc=$OCF_ERR_GENERIC + else + ocf_log debug "monitor cmd passed: exit code = $rc" + fi + + return $rc +} + +container_exists() +{ + rkt list --no-legend | awk -v C=${CONTAINER} '$2 == C {exit 0} ENDFILE {exit 1}' +} + +container_uuid() +{ + rkt list --no-legend --full | awk -v C=${CONTAINER} '$2 == C {print $1; exit}' +} + +container_state() +{ + rkt list --no-legend | awk -v C=${CONTAINER} '$2 == C {print $4; exit}' +} + +remove_container() +{ + local uuid + + container_exists + if [ $? -ne 0 ]; then + # don't attempt to remove a container that doesn't exist + return 0 + fi + + uuid=$(container_uuid) + ocf_log notice "Cleaning up inactive container, ${CONTAINER}." + ocf_run rkt rm $uuid +} + +rkt_simple_status() +{ + local val + + container_exists + if [ $? -ne 0 ]; then + return $OCF_NOT_RUNNING + fi + + # retrieve the 'STATE' attribute for the container + val=$(container_state) + if [ "$val" = "running" ]; then + # container exists and is running + return $OCF_SUCCESS + fi + + ocf_log debug "container ${CONTAINER} state is $val" + return $OCF_NOT_RUNNING +} + +rkt_monitor() +{ + local rc=0 + + rkt_simple_status + rc=$? + + if [ $rc -ne 0 ]; then + return $rc + fi + + monitor_cmd_exec +} + +rkt_create_mounts() { + oldIFS="$IFS" + IFS="," + for directory in $OCF_RESKEY_mount_points; do + mkdir -p "$directory" + done + IFS="$oldIFS" +} + +rkt_start() +{ + rkt_create_mounts + local run_opts="--name=${CONTAINER}" + + # check to see if the container has already started + rkt_simple_status + if [ $? -eq $OCF_SUCCESS ]; then + return $OCF_SUCCESS + fi + + if [ -n "$OCF_RESKEY_run_cmd" ]; then + run_opts="$run_opts --exec=$OCF_RESKEY_run_cmd" + fi + + if [ -n "$OCF_RESKEY_run_opts" ]; then + run_opts="$run_opts $OCF_RESKEY_run_opts" + fi + + if [ $REQUIRE_IMAGE_PULL -eq 1 ]; then + ocf_log notice "Beginning pull of image, ${OCF_RESKEY_image}" + rkt fetch "${OCF_RESKEY_image}" + if [ $? -ne 0 ]; then + ocf_exit_reason "failed to pull image ${OCF_RESKEY_image}" + return $OCF_ERR_GENERIC + fi + fi + + # make sure any previous container matching our container name is cleaned up first. + # we already know at this point it wouldn't be running + remove_container + ocf_log info "Starting container, ${CONTAINER}." + ocf_run systemd-run --slice=machine rkt run $OCF_RESKEY_image $run_opts + + if [ $? -ne 0 ]; then + ocf_exit_reason "Failed to launch container" + return $OCF_ERR_GENERIC + fi + + while ! container_exists || [ "$(container_state)" == "preparing" ] ; do + ocf_log debug "waiting for container to start" + sleep 1 + done + + # wait for monitor to pass before declaring that the container is started + while true; do + rkt_simple_status + if [ $? -ne $OCF_SUCCESS ]; then + ocf_exit_reason "Newly created container exited after start" + return $OCF_ERR_GENERIC + fi + + monitor_cmd_exec + if [ $? -eq $OCF_SUCCESS ]; then + ocf_log notice "Container ${CONTAINER} started successfully as $(container_uuid)" + return $OCF_SUCCESS + fi + + ocf_exit_reason "waiting on monitor_cmd to pass after start" + sleep 1 + done +} + +rkt_stop() +{ + local timeout=60 + local uuid + + rkt_simple_status + if [ $? -eq $OCF_NOT_RUNNING ]; then + remove_container + return $OCF_SUCCESS + fi + + if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then + timeout=$(( $OCF_RESKEY_CRM_meta_timeout/1000 - 10 )) + if [ $timeout -lt 10 ]; then + timeout=10 + fi + fi + + uuid=$(container_uuid) + if ocf_is_true "$OCF_RESKEY_force_kill"; then + ocf_log info "Killing container, ${CONTAINER}." + ocf_run rkt stop --force $uuid + else + ocf_log info "Stopping container, ${CONTAINER}." + ocf_run rkt stop $uuid + fi + + if [ $? -ne 0 ]; then + ocf_exit_reason "Failed to stop container, ${CONTAINER}, based on image, ${OCF_RESKEY_image}." + return $OCF_ERR_GENERIC + fi + + while [ $timeout -gt 0 ]; do + rkt_simple_status + if [ $? -eq $OCF_NOT_RUNNING ]; then + break + fi + + ocf_log debug "waiting for container to stop" + timeout=$(( $timeout - 1 )) + sleep 1 + done + + rkt_simple_status + if [ $? -eq $OCF_SUCCESS ]; then + ocf_exit_reason "Failed to stop container, ${CONTAINER}." + return $OCF_ERR_GENERIC + fi + + remove_container + if [ $? -ne 0 ]; then + ocf_exit_reason "Failed to remove stopped container, ${CONTAINER}, based on image, ${OCF_RESKEY_image}." + return $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +image_exists() +{ + rkt image list --no-legend | awk -v I=${OCF_RESKEY_image} '$2 == I {exit 0} ENDFILE {exit 1}' + if [ $? -eq 0 ]; then + # image found + return 0 + fi + + if ocf_is_true "$OCF_RESKEY_allow_pull"; then + REQUIRE_IMAGE_PULL=1 + ocf_log notice "Image (${OCF_RESKEY_image}) does not exist locally but will be pulled during start" + return 0 + fi + + # image not found. + return 1 +} + +rkt_validate() +{ + check_binary rkt + check_binary systemd-run + + if [ -z "$OCF_RESKEY_image" ]; then + ocf_exit_reason "'image' option is required" + exit $OCF_ERR_CONFIGURED + fi + + if echo ${CONTAINER} | grep -q [^a-z0-9-]; then + ocf_exit_reason "'name' must contain only lower case alphanumeric characters and -" + exit $OCF_ERR_CONFIGURED + fi + + image_exists + if [ $? -ne 0 ]; then + ocf_exit_reason "base image, ${OCF_RESKEY_image}, could not be found." + exit $OCF_ERR_CONFIGURED + fi + + return $OCF_SUCCESS +} + +# TODO : +# When a user starts plural clones in a node in globally-unique, a user cannot appoint plural name parameters. +# When a user appoints reuse, the resource agent cannot connect plural clones with a container. + +if ocf_is_true "$OCF_RESKEY_CRM_meta_globally_unique"; then + if [ -n "$OCF_RESKEY_name" ]; then + if [ -n "$OCF_RESKEY_CRM_meta_clone_node_max" ] && [ "$OCF_RESKEY_CRM_meta_clone_node_max" -ne 1 ] + then + ocf_exit_reason "Cannot make plural clones from the same name parameter." + exit $OCF_ERR_CONFIGURED + fi + if [ -n "$OCF_RESKEY_CRM_meta_master_node_max" ] && [ "$OCF_RESKEY_CRM_meta_master_node_max" -ne 1 ] + then + ocf_exit_reason "Cannot make plural master from the same name parameter." + exit $OCF_ERR_CONFIGURED + fi + fi + : ${OCF_RESKEY_name=`echo ${OCF_RESOURCE_INSTANCE} | tr ':' '-'`} +else + : ${OCF_RESKEY_name=${OCF_RESOURCE_INSTANCE}} +fi + +CONTAINER=$OCF_RESKEY_name + +case $__OCF_ACTION in +meta-data) meta_data + exit $OCF_SUCCESS;; +start) + rkt_validate + rkt_start;; +stop) rkt_stop;; +monitor) rkt_monitor;; +validate-all) rkt_validate;; +usage|help) rkt_usage + exit $OCF_SUCCESS + ;; +*) rkt_usage + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac +rc=$? +ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" +exit $rc