No OneTemporary
Actions

Size

43 KB

Referenced Files

None

Subscribers

None

View Options

	diff --git a/heartbeat/podman-etcd b/heartbeat/podman-etcd
	new file mode 100755
	index 000000000..9fb419a64
	--- /dev/null
	+++ b/heartbeat/podman-etcd
	@@ -0,0 +1,1414 @@
	+#!/bin/sh
	+#
	+# The podman etcd HA resource agent creates and launches a etcd podman
	+# container based off a supplied podman image. Containers managed by
	+# this agent are both created and removed upon the agent's start and
	+# stop actions.
	+#
	+# Based on the podman resource agent.
	+#
	+# Copyright (c) 2014 David Vossel <davidvossel@gmail.com>
	+# Michele Baldessari <michele@acksyn.org>
	+# All Rights Reserved.
	+#
	+# This program is free software; you can redistribute it and/or modify
	+# it under the terms of version 2 of the GNU General Public License as
	+# published by the Free Software Foundation.
	+#
	+# This program is distributed in the hope that it would be useful, but
	+# WITHOUT ANY WARRANTY; without even the implied warranty of
	+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
	+#
	+# Further, this software is distributed without any warranty that it is
	+# free of the rightful claim of any third person regarding infringement
	+# or the like. Any license provided herein, whether implied or
	+# otherwise, applies only to this software file. Patent licenses, if
	+# any, provided herein do not apply to combinations of this program with
	+# other software, or any other product whatsoever.
	+#
	+# You should have received a copy of the GNU General Public License
	+# along with this program; if not, write the Free Software Foundation,
	+# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
	+#
	+
	+#######################################################################
	+# Initialization:
	+
	+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
	+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
	+
	+# Parameter defaults
	+
	+OCF_RESKEY_image_default="quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:e213f49e8500d14652a0f4546fd43e11164cc9cc5d575a208634c268f150c3ea"
	+OCF_RESKEY_name_default="etcd"
	+OCF_RESKEY_nic_default="br-ex"
	+OCF_RESKEY_authfile_default="/run/resource-agents/pull-secret"
	+OCF_RESKEY_allow_pull_default="1"
	+OCF_RESKEY_reuse_default="0"
	+
	+: ${OCF_RESKEY_image=${OCF_RESKEY_image_default}}
	+: ${OCF_RESKEY_name=${OCF_RESKEY_name_default}}
	+: ${OCF_RESKEY_nic=${OCF_RESKEY_nic_default}}
	+: ${OCF_RESKEY_authfile=${OCF_RESKEY_authfile_default}}
	+: ${OCF_RESKEY_allow_pull=${OCF_RESKEY_allow_pull_default}}
	+: ${OCF_RESKEY_reuse=${OCF_RESKEY_reuse_default}}
	+
	+#######################################################################
	+
	+meta_data()
	+{
	+ cat <<END
	+<?xml version="1.0"?>
	+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
	+<resource-agent name="podman-etcd" version="1.0">
	+<version>1.0</version>
	+
	+<longdesc lang="en">
	+The podman-etcd HA resource agent creates and launches a etcd podman
	+container based off a supplied podman image. Containers managed by
	+this agent are both created and removed upon the agent's start and
	+stop actions.
	+</longdesc>
	+<shortdesc lang="en">Podman etcd container resource agent.</shortdesc>
	+
	+<parameters>
	+<parameter name="image" required="0" unique="0">
	+<longdesc lang="en">
	+The podman image to base this container off of.
	+</longdesc>
	+<shortdesc lang="en">podman image</shortdesc>
	+<content type="string" default="${OCF_RESKEY_image_default}"/>
	+</parameter>
	+
	+<parameter name="name" required="0" unique="0">
	+<longdesc lang="en">
	+The name to give the created container. By default this will
	+be that resource's instance name.
	+</longdesc>
	+<shortdesc lang="en">podman container name</shortdesc>
	+<content type="string" default="${OCF_RESKEY_name_default}"/>
	+</parameter>
	+
	+<parameter name="node_ip_map" unique="0" required="1">
	+<longdesc lang="en">
	+A mapping of node names to IPs.
	+
	+This takes the form of:
	+n1:ip1;n2:ip2
	+
	+where the etcd container on n1 would have IP ip1
	+</longdesc>
	+<shortdesc lang="en">Container node name to IP mapping</shortdesc>
	+<content type="string"/>
	+</parameter>
	+
	+<parameter name="host_map" unique="0">
	+<longdesc lang="en">
	+A mapping of host to node names used for certificate file names.
	+
	+This takes the form of:
	+host1:alt1;host2:alt2
	+
	+where the host1 would be alt1 for certificate files.
	+</longdesc>
	+<shortdesc lang="en">Pacemaker to etcd name mapping</shortdesc>
	+<content type="string"/>
	+</parameter>
	+
	+<parameter name="nic" unique="0">
	+<longdesc lang="en">
	+Network interface to lookup interface for host.
	+</longdesc>
	+<shortdesc lang="en">Network interface</shortdesc>
	+<content type="string" default="${OCF_RESKEY_nic_default}"/>
	+</parameter>
	+
	+<parameter name="authfile" required="0" unique="0">
	+<longdesc lang="en">
	+Path of the authentication file.
	+
	+The file is created by podman login.
	+</longdesc>
	+<shortdesc lang="en">Path of the authentication file </shortdesc>
	+<content type="string" default="${OCF_RESKEY_authfile_default}"/>
	+</parameter>
	+
	+<parameter name="allow_pull" unique="0">
	+<longdesc lang="en">
	+Allow the image to be pulled from the configured podman registry when
	+the image does not exist locally. NOTE, this can drastically increase
	+the time required to start the container if the image repository is
	+pulled over the network.
	+</longdesc>
	+<shortdesc lang="en">Allow pulling non-local images</shortdesc>
	+<content type="boolean" default="${OCF_RESKEY_allow_pull_default}"/>
	+</parameter>
	+
	+<parameter name="run_opts" required="0" unique="0">
	+<longdesc lang="en">
	+Add options to be appended to the 'podman run' command which is used
	+when creating the container during the start action. This option allows
	+users to do things such as setting a custom entry point and injecting
	+environment variables into the newly created container. Note the '-d'
	+option is supplied regardless of this value to force containers to run
	+in the background.
	+
	+NOTE: Do not explicitly specify the --name argument in the run_opts. This
	+agent will set --name using either the resource's instance or the name
	+provided in the 'name' argument of this agent.
	+
	+</longdesc>
	+<shortdesc lang="en">run options</shortdesc>
	+<content type="string"/>
	+</parameter>
	+
	+<parameter name="run_cmd" required="0" unique="0">
	+<longdesc lang="en">
	+Specify a command to launch within the container once
	+it has initialized.
	+</longdesc>
	+<shortdesc lang="en">run command</shortdesc>
	+<content type="string"/>
	+</parameter>
	+
	+<parameter name="run_cmd_opts" required="0" unique="0">
	+<longdesc lang="en">
	+Options to be added to the 'run_cmd'.
	+</longdesc>
	+<shortdesc lang="en">run command options</shortdesc>
	+<content type="string"/>
	+</parameter>
	+
	+<parameter name="mount_points" required="0" unique="0">
	+<longdesc lang="en">
	+A comma separated list of directories that the container is expecting to use.
	+The agent will ensure they exist by running 'mkdir -p'
	+</longdesc>
	+<shortdesc lang="en">Required mount points</shortdesc>
	+<content type="string"/>
	+</parameter>
	+
	+<parameter name="monitor_cmd" required="0" unique="0">
	+<longdesc lang="en">
	+Specify the full path of a command to launch within the container to check
	+the health of the container. This command must return 0 to indicate that
	+the container is healthy. A non-zero return code will indicate that the
	+container has failed and should be recovered.
	+
	+Note: Using this method for monitoring processes inside a container
	+is not recommended, as containerd tries to track processes running
	+inside the container and does not deal well with many short-lived
	+processes being spawned. Ensure that your container monitors its
	+own processes and terminates on fatal error rather than invoking
	+a command from the outside.
	+</longdesc>
	+<shortdesc lang="en">monitor command</shortdesc>
	+<content type="string"/>
	+</parameter>
	+
	+<parameter name="force_kill" required="0" unique="0">
	+<longdesc lang="en">
	+Kill a container immediately rather than waiting for it to gracefully
	+shutdown
	+</longdesc>
	+<shortdesc lang="en">force kill</shortdesc>
	+<content type="boolean"/>
	+</parameter>
	+
	+<parameter name="reuse" required="0" unique="0">
	+<longdesc lang="en">
	+Allow the container to be reused once it is stopped. By default,
	+containers get removed once they are stopped. Enable this option
	+to have the particular one persist when this happens.
	+</longdesc>
	+<shortdesc lang="en">reuse container</shortdesc>
	+<content type="boolean" default="${OCF_RESKEY_reuse_default}"/>
	+</parameter>
	+
	+<parameter name="drop_in_dependency" required="0" unique="0">
	+<longdesc lang="en">
	+Use transient drop-in files to add extra dependencies to the systemd
	+scopes associated to the container. During reboot, this prevents systemd
	+to stop the container before pacemaker.
	+</longdesc>
	+<shortdesc lang="en">drop-in dependency</shortdesc>
	+<content type="boolean"/>
	+</parameter>
	+</parameters>
	+
	+<actions>
	+<action name="start" timeout="600s" />
	+<action name="stop" timeout="90s" />
	+<action name="monitor" timeout="25s" interval="30s" depth="0" />
	+<action name="promote" timeout="300s" />
	+<action name="demote" timeout="120s" />
	+<action name="meta-data" timeout="5s" />
	+<action name="validate-all" timeout="30s" />
	+</actions>
	+</resource-agent>
	+END
	+}
	+
	+#######################################################################
	+REQUIRE_IMAGE_PULL=0
	+
	+podman_usage()
	+{
	+ cat <<END
	+usage: $0 {start\|stop\|monitor\|promote\|demote\|validate-all\|meta-data}
	+
	+Expects to have a fully populated OCF RA-compliant environment set.
	+END
	+}
	+
	+
	+monitor_cmd_exec()
	+{
	+ local rc=$OCF_SUCCESS
	+ local out
	+
	+ out=$(podman exec ${CONTAINER} $OCF_RESKEY_monitor_cmd 2>&1)
	+ rc=$?
	+ # 125: no container with name or ID ${CONTAINER} found
	+ # 126: container state improper (not running)
	+ # 127: any other error
	+ # 255: podman 2+: container not running
	+ case "$rc" in
	+ 125\|126\|255)
	+ rc=$OCF_NOT_RUNNING
	+ ;;
	+ 0)
	+ ocf_log debug "monitor cmd passed: exit code = $rc"
	+ ;;
	+ *)
	+ ocf_exit_reason "monitor cmd failed (rc=$rc), output: $out"
	+ rc=$OCF_ERR_GENERIC
	+ ;;
	+ esac
	+
	+ return $rc
	+}
	+
	+container_exists()
	+{
	+ local rc
	+ local out
	+
	+ out=$(podman exec ${CONTAINER} $OCF_RESKEY_monitor_cmd 2>&1)
	+ rc=$?
	+ # 125: no container with name or ID ${CONTAINER} found
	+ if [ $rc -ne 125 ]; then
	+ return 0
	+ fi
	+ return 1
	+}
	+
	+remove_container()
	+{
	+ local rc
	+ local execids
	+
	+ if ocf_is_true "$OCF_RESKEY_reuse"; then
	+ # never remove the container if we have reuse enabled.
	+ return 0
	+ fi
	+
	+ container_exists
	+ if [ $? -ne 0 ]; then
	+ # don't attempt to remove a container that doesn't exist
	+ return 0
	+ fi
	+ ocf_log notice "Cleaning up inactive container, ${CONTAINER}."
	+ ocf_run podman rm -v $CONTAINER
	+ rc=$?
	+ if [ $rc -ne 0 ]; then
	+ if [ $rc -eq 2 ]; then
	+ if podman inspect --format '{{.State.Status}}' $CONTAINER \| grep -wq "stopping"; then
	+ ocf_log err "Inactive container ${CONTAINER} is stuck in 'stopping' state. Force-remove it."
	+ ocf_run podman rm -f $CONTAINER
	+ rc=$?
	+ fi
	+ fi
	+ # due to a podman bug (rhbz#1841485), sometimes a stopped
	+ # container can still be associated with Exec sessions, in
	+ # which case the "podman rm" has to be forced
	+ execids=$(podman inspect $CONTAINER --format '{{len .ExecIDs}}')
	+ if [ "$execids" -ne "0" ]; then
	+ ocf_log warn "Inactive container ${CONTAINER} has lingering exec sessions. Force-remove it."
	+ ocf_run podman rm -f $CONTAINER
	+ rc=$?
	+ fi
	+ fi
	+ return $rc
	+}
	+
	+podman_simple_status()
	+{
	+ local rc
	+
	+ # simple status is implemented via podman exec
	+ # everything besides success is considered "not running"
	+ monitor_cmd_exec
	+ rc=$?
	+ if [ $rc -ne $OCF_SUCCESS ]; then
	+ rc=$OCF_NOT_RUNNING;
	+ fi
	+ return $rc
	+}
	+
	+podman_monitor()
	+{
	+ # We rely on running podman exec to monitor the container
	+ # state because that command seems to be less prone to
	+ # performance issue under IO load.
	+ #
	+ # For probes to work, we expect cmd_exec to be able to report
	+ # when a container is not running. Here, we're not interested
	+ # in distinguishing whether it's stopped or non existing
	+ # (there's function container_exists for that)
	+ monitor_cmd_exec
	+ rc=$?
	+ if [ $rc -ne 0 ]; then
	+ return $rc
	+ fi
	+
	+ # Failing to cache data and check member list should not cause the
	+ # monitor operation to fail.
	+ # TODO: move this inside check_peers where we alredy query member list json
	+ cache_member_id
	+ check_peers
	+ rc=$?
	+ if [ $rc -ne $OCF_SUCCESS ]; then
	+ return $OCF_ERR_GENERIC
	+ fi
	+
	+ # TODO: Etcd data comes from the disk, so if it is not available is a fatal failure
	+ cache_etcd_data
	+ return $?
	+}
	+
	+podman_create_mounts() {
	+ oldIFS="$IFS"
	+ IFS=","
	+ for directory in $OCF_RESKEY_mount_points; do
	+ mkdir -p "$directory"
	+ done
	+ IFS="$oldIFS"
	+}
	+
	+podman_container_id()
	+{
	+ # Retrieve the container ID by doing a "podman ps" rather than
	+ # a "podman inspect", because the latter has performance issues
	+ # under IO load.
	+ # We could have run "podman start $CONTAINER" to get the ID back
	+ # but if the container is stopped, the command will return a
	+ # name instead of a container ID. This would break us.
	+ podman ps --no-trunc --format '{{.ID}} {{.Names}}' \| grep -F -w -m1 "$CONTAINER" \| cut -d' ' -f1
	+}
	+
	+
	+create_transient_drop_in_dependency()
	+{
	+ local cid=$1
	+ local rc=$OCF_SUCCESS
	+
	+ if [ -z "$cid" ]; then
	+ ocf_exit_reason "Container ID not found for \"$CONTAINER\". Not creating drop-in dependency"
	+ return $OCF_ERR_GENERIC
	+ fi
	+
	+ ocf_log info "Creating drop-in dependency for \"$CONTAINER\" ($cid)"
	+ for scope in "libpod-$cid.scope.d" "libpod-conmon-$cid.scope.d"; do
	+ if [ $rc -eq $OCF_SUCCESS ] && [ ! -d /run/systemd/transient/"$scope" ]; then
	+ mkdir -p /run/systemd/transient/"$scope" && \
	+ printf "[Unit]\nBefore=pacemaker.service" > /run/systemd/transient/"$scope"/dep.conf && \
	+ chmod ago+r /run/systemd/transient/"$scope" /run/systemd/transient/"$scope"/dep.conf
	+ rc=$?
	+ fi
	+ done
	+
	+ if [ $rc -ne $OCF_SUCCESS ]; then
	+ ocf_log error "Could not create drop-in dependency for \"$CONTAINER\" ($cid)"
	+ else
	+ systemctl daemon-reload
	+ rc=$?
	+ if [ $rc -ne $OCF_SUCCESS ]; then
	+ ocf_log error "Could not refresh service definition after creating drop-in for \"$CONTAINER\""
	+ fi
	+ fi
	+
	+ return $rc
	+}
	+
	+
	+run_new_container()
	+{
	+ local opts=$1
	+ local image=$2
	+ local cmd=$3
	+ local rc
	+
	+ ocf_log info "running container $CONTAINER for the first time"
	+ out=$(podman run $opts $image $cmd 2>&1)
	+ rc=$?
	+
	+ if [ -n "$out" ]; then
	+ out="$(echo "$out" \| tr -s ' \t\r\n' ' ')"
	+ if [ $rc -eq 0 ]; then
	+ ocf_log info "$out"
	+ else
	+ ocf_log err "$out"
	+ fi
	+ fi
	+
	+ if [ $rc -eq 125 ]; then
	+ # If an internal podman error occurred, it might be because
	+ # the internal storage layer still references an old container
	+ # with the same name, even though podman itself thinks there
	+ # is no such container. If so, purge the storage layer to try
	+ # to clean the corruption and try again.
	+ if echo "$out" \| grep -q "unknown.*flag"; then
	+ ocf_exit_reason "$out"
	+ return $rc
	+ fi
	+
	+ ocf_log warn "Internal podman error while creating new container $CONTAINER. Retrying."
	+ ocf_run podman rm --storage $CONTAINER
	+ ocf_run podman run $opts $image $cmd
	+ rc=$?
	+ elif [ $rc -eq 127 ]; then
	+ # rhbz#1972209: podman 3.0.x seems to be hit by a race
	+ # where the cgroup is not yet set up properly when the OCI
	+ # runtime configures the container. If that happens, recreate
	+ # the container as long as we get the same error code or
	+ # until start timeout preempts us.
	+ while [ $rc -eq 127 ] && (echo "$out" \| grep -q "cgroup.*scope not found") ; do
	+ ocf_log warn "Internal podman error while assigning cgroup. Retrying."
	+ # Arbitrary sleep to prevent consuming all CPU while looping
	+ sleep 1
	+ podman rm -f "$CONTAINER"
	+ out=$(podman run $opts $image $cmd 2>&1)
	+ rc=$?
	+ done
	+ # Log the created container ID if it succeeded
	+ if [ $rc -eq 0 ]; then
	+ ocf_log info "$out"
	+ fi
	+ fi
	+
	+ return $rc
	+}
	+
	+get_hostname()
	+{
	+ # TODO: consider using ocf_local_nodename instead
	+ local name=$(hostname)
	+ if [ -z "$OCF_RESKEY_host_map" ]; then
	+ echo $name
	+ else
	+ echo "$OCF_RESKEY_host_map" \| tr ';' '\n' \| tr -d ' ' \| sed 's/:/ /' \| awk -F' ' '$1=="'"$name"'" {print $2;exit}'
	+ fi
	+}
	+
	+get_node_ip() {
	+ echo "$(ip -brief addr show $OCF_RESKEY_nic \| awk '{gsub("/.*", "", $3); print $3}')"
	+}
	+
	+prepare_env() {
	+ local name ip standalone_node
	+ local nodename=$(ocf_local_nodename)
	+
	+ THIS_NODE_HOSTNAME="$(get_hostname)"
	+ THIS_NODE_IP="$(get_node_ip)"
	+
	+ if is_force_new_cluster; then
	+ ETCD_INITIAL_CLUSTER_STATE="new"
	+ ALL_ETCD_ENDPOINTS="https://$THIS_NODE_IP:2379"
	+
	+ # TODO: double check if it is always hostname, or use ocf_local_nodename instead
	+ ETCD_INITIAL_CLUSTER="$THIS_NODE_HOSTNAME=https://$THIS_NODE_IP:2380"
	+ else
	+ for node in $(echo "$OCF_RESKEY_node_ip_map" \| sed "s/\s//g;s/;/ /g"); do
	+ name=$(echo "$node" \| awk -F":" '{print $1}')
	+ ip=$(echo "$node" \| awk -F":" '{print $2}')
	+ if [ -z "$name" ] \|\| [ -z "$ip" ]; then
	+ ocf_exit_reason "name or ip missing for 1 or more nodes"
	+ exit $OCF_ERR_CONFIGURED
	+ fi
	+
	+ crm_attribute --type nodes --node $name --name "ip" --update "$ip"
	+ [ -z "$ALL_ETCD_ENDPOINTS" ] && ALL_ETCD_ENDPOINTS="https://$ip:2379" \|\| ALL_ETCD_ENDPOINTS="$ALL_ETCD_ENDPOINTS,https://$ip:2379"
	+ [ -z "$ETCD_INITIAL_CLUSTER" ] && ETCD_INITIAL_CLUSTER="$name=https://$ip:2380" \|\| ETCD_INITIAL_CLUSTER="$ETCD_INITIAL_CLUSTER,$name=https://$ip:2380"
	+ done
	+ fi
	+
	+ ETCDCTL_API="3"
	+ ETCD_CIPHER_SUITES="TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256"
	+ ETCD_DATA_DIR="/var/lib/etcd"
	+ ETCD_ELECTION_TIMEOUT="1000"
	+ ETCD_ENABLE_PPROF="true"
	+ ETCD_EXPERIMENTAL_MAX_LEARNERS="3"
	+ ETCD_EXPERIMENTAL_WARNING_APPLY_DURATION=200ms
	+ ETCD_EXPERIMENTAL_WATCH_PROGRESS_NOTIFY_INTERVAL=5s
	+ ETCD_HEARTBEAT_INTERVAL="100"
	+ ETCD_INITIAL_CLUSTER_STATE=${ETCD_INITIAL_CLUSTER_STATE:-"existing"}
	+ ETCD_QUOTA_BACKEND_BYTES="8589934592"
	+ ETCD_SOCKET_REUSE_ADDRESS="true"
	+
	+ SERVER_CACERT="/etc/kubernetes/static-pod-resources/etcd-certs/configmaps/etcd-all-bundles/server-ca-bundle.crt"
	+ ETCD_PEER_CERT="/etc/kubernetes/static-pod-resources/etcd-certs/secrets/etcd-all-certs/etcd-peer-${THIS_NODE_HOSTNAME}.crt"
	+ ETCD_PEER_KEY="/etc/kubernetes/static-pod-resources/etcd-certs/secrets/etcd-all-certs/etcd-peer-${THIS_NODE_HOSTNAME}.key"
	+
	+ LISTEN_CLIENT_URLS="0.0.0.0"
	+ LISTEN_PEER_URLS="0.0.0.0"
	+ LISTEN_METRICS_URLS="0.0.0.0"
	+
	+ if is_learner; then
	+ LISTEN_CLIENT_URLS="$THIS_NODE_IP"
	+ LISTEN_PEER_URLS="$THIS_NODE_IP"
	+ LISTEN_METRICS_URLS="$THIS_NODE_IP"
	+ fi
	+}
	+
	+archive_data_folder()
	+{
	+ # TODO: use etcd snapshots
	+ local dest_dir_name="members-snapshot-$(date +%Y%M%d%H%M%S)"
	+ local data_dir="/var/lib/etcd/member"
	+ if [ ! -d $data_dir ]; then
	+ ocf_log info "no data dir to backup"
	+ return $OCF_SUCCESS
	+ fi
	+ ocf_log info "backing up $data_dir under $HA_RSCTMP/$dest_dir_name"
	+ mv $data_dir $HA_RSCTMP/$dest_dir_name
	+ sync
	+}
	+
	+podman_start()
	+{
	+ local cid
	+ local rc
	+ local etcd_pod_wait_timeout_sec=$((10 * 60))
	+ local etcd_pod_poll_interval_sec=10
	+ local etcd_pod_poll_retries=$((etcd_pod_wait_timeout_sec/etcd_pod_poll_interval_sec))
	+
	+ if is_standalone; then
	+ if is_learner; then
	+ ocf_exit_reason "$(ocf_local_nodename) set both as learner and standalone"
	+ return $OCF_ERR_CONFIGURED
	+ fi
	+ fi
	+
	+ # ensure the etcd pod is not running before starting the container
	+ ocf_log info "Ensure etcd pod is not running (retries:$etcd_pod_poll_retries, interval:$etcd_pod_poll_interval_sec)"
	+ for try in $(seq $etcd_pod_poll_retries); do
	+ etcd_pod_container_exists
	+ if [ $? -eq 0 ]; then
	+ ocf_log info "Etcd pod running: retry in $etcd_pod_poll_interval_sec seconds."
	+ sleep $etcd_pod_poll_interval_sec
	+ fi
	+ done
	+ etcd_pod_container_exists
	+ if [ $? -eq 0 ]; then
	+ ocf_exit_reason "Etcd pod is still running after $etcd_pod_wait_timeout_sec seconds."
	+ return $OCF_ERR_GENERIC
	+ fi
	+
	+ podman_create_mounts
	+ local run_opts="-d --name=${CONTAINER}"
	+ # check to see if the container has already started
	+ podman_simple_status
	+ if [ $? -eq $OCF_SUCCESS ]; then
	+ return $OCF_SUCCESS
	+ fi
	+
	+ local peer_node=$(get_peer_node_name)
	+ if is_standalone $peer_node; then
	+ local nodename=$(ocf_local_nodename)
	+ local wait_timeout_sec=$((2*60))
	+ local poll_interval_sec=5
	+ local retries=$(( wait_timeout_sec / poll_interval_sec ))
	+
	+ ocf_log info "wait for the leader node to add $nodename as learner (timeout: $wait_timeout_sec seconds)"
	+ for try in $(seq $retries); do
	+ learner_node=$(get_learner_node)
	+ if [ "$nodename" != "$learner_node" ]; then
	+ ocf_log info "$learner_node is not in the member list yet. Retry in $poll_interval_sec seconds."
	+ sleep $poll_interval_sec
	+ continue
	+ fi
	+ ocf_log info "learner node $learner_node in the member list"
	+ break
	+ done
	+ if [ "$nodename" != "$(get_learner_node)" ]; then
	+ ocf_log error "wait for $nodename to be in the member list timed out"
	+ return $OCF_ERR_GENERIC
	+ fi
	+ fi
	+
	+ prepare_env
	+
	+ # add etcd-specific opts
	+ run_opts="$run_opts \
	+ --network=host \
	+ -v /etc/kubernetes:/etc/kubernetes \
	+ -v /var/lib/etcd:/var/lib/etcd \
	+ --env ALL_ETCD_ENDPOINTS=$ALL_ETCD_ENDPOINTS \
	+ --env ETCD_CIPHER_SUITES=$ETCD_CIPHER_SUITES \
	+ --env ETCD_DATA_DIR=$ETCD_DATA_DIR \
	+ --env ETCD_ELECTION_TIMEOUT=$ETCD_ELECTION_TIMEOUT \
	+ --env ETCD_ENABLE_PPROF=$ETCD_ENABLE_PPROF \
	+ --env ETCD_EXPERIMENTAL_MAX_LEARNERS=$ETCD_EXPERIMENTAL_MAX_LEARNERS \
	+ --env ETCD_EXPERIMENTAL_WARNING_APPLY_DURATION=$ETCD_EXPERIMENTAL_WARNING_APPLY_DURATION \
	+ --env ETCD_EXPERIMENTAL_WATCH_PROGRESS_NOTIFY_INTERVAL=$ETCD_EXPERIMENTAL_WATCH_PROGRESS_NOTIFY_INTERVAL \
	+ --env ETCD_HEARTBEAT_INTERVAL=$ETCD_HEARTBEAT_INTERVAL \
	+ --env ETCD_INITIAL_CLUSTER=$ETCD_INITIAL_CLUSTER \
	+ --env ETCD_INITIAL_CLUSTER_STATE=$ETCD_INITIAL_CLUSTER_STATE \
	+ --env ETCD_NAME=$THIS_NODE_HOSTNAME \
	+ --env ETCD_QUOTA_BACKEND_BYTES=$ETCD_QUOTA_BACKEND_BYTES \
	+ --env ETCD_SOCKET_REUSE_ADDRESS=$ETCD_SOCKET_REUSE_ADDRESS \
	+ --env ETCDCTL_API=$ETCDCTL_API \
	+ --env ETCDCTL_CACERT=$SERVER_CACERT \
	+ --env ETCDCTL_CERT=$ETCD_PEER_CERT \
	+ --env ETCDCTL_KEY=$ETCD_PEER_KEY \
	+ --authfile=$OCF_RESKEY_authfile \
	+ --security-opt label=disable"
	+ if [ -n "$OCF_RESKEY_run_opts" ]; then
	+ run_opts="$run_opts $OCF_RESKEY_run_opts"
	+ fi
	+
	+ OCF_RESKEY_run_cmd="$OCF_RESKEY_run_cmd --logger=zap \
	+ --log-level=info \
	+ --experimental-initial-corrupt-check=true \
	+ --snapshot-count=10000 \
	+ --initial-advertise-peer-urls=https://${THIS_NODE_IP}:2380 \
	+ --cert-file=/etc/kubernetes/static-pod-resources/etcd-certs/secrets/etcd-all-certs/etcd-serving-${THIS_NODE_HOSTNAME}.crt \
	+ --key-file=/etc/kubernetes/static-pod-resources/etcd-certs/secrets/etcd-all-certs/etcd-serving-${THIS_NODE_HOSTNAME}.key \
	+ --trusted-ca-file=$SERVER_CACERT \
	+ --client-cert-auth=true \
	+ --peer-cert-file=$ETCD_PEER_CERT \
	+ --peer-key-file=$ETCD_PEER_KEY \
	+ --peer-trusted-ca-file=$SERVER_CACERT \
	+ --peer-client-cert-auth=true \
	+ --advertise-client-urls=https://${THIS_NODE_IP}:2379 \
	+ --listen-client-urls=https://${LISTEN_CLIENT_URLS}:2379,unixs://${THIS_NODE_IP}:0 \
	+ --listen-peer-urls=https://${LISTEN_PEER_URLS}:2380 \
	+ --metrics=extensive \
	+ --listen-metrics-urls=https://${LISTEN_METRICS_URLS}:9978"
	+ if [ -n "$OCF_RESKEY_run_cmd_opts" ]; then
	+ OCF_RESKEY_run_cmd="$OCF_RESKEY_run_cmd $OCF_RESKEY_run_cmd_opts"
	+ fi
	+
	+ if is_force_new_cluster; then
	+ OCF_RESKEY_run_cmd="$OCF_RESKEY_run_cmd --force-new-cluster"
	+ fi
	+
	+ if is_learner; then
	+ archive_data_folder
	+ fi
	+
	+ if [ $REQUIRE_IMAGE_PULL -eq 1 ]; then
	+ ocf_log notice "Beginning pull of image, ${OCF_RESKEY_image}"
	+ podman pull --authfile=$OCF_RESKEY_authfile "${OCF_RESKEY_image}"
	+ if [ $? -ne 0 ]; then
	+ ocf_exit_reason "failed to pull image ${OCF_RESKEY_image}"
	+ return $OCF_ERR_GENERIC
	+ fi
	+ else
	+ ocf_log notice "Pull image not required, ${OCF_RESKEY_image}"
	+ fi
	+
	+ if ocf_is_true "$OCF_RESKEY_reuse" && container_exists; then
	+ ocf_log info "starting existing container $CONTAINER."
	+ ocf_run podman start $CONTAINER
	+ else
	+ # make sure any previous container matching our container name is cleaned up first.
	+ # we already know at this point it wouldn't be running
	+ remove_container
	+ run_new_container "$run_opts" $OCF_RESKEY_image "$OCF_RESKEY_run_cmd"
	+ if [ $? -eq 125 ]; then
	+ return $OCF_ERR_GENERIC
	+ fi
	+ fi
	+ rc=$?
	+
	+ # if the container was stopped or didn't exist before, systemd
	+ # removed the libpod* scopes. So always try to recreate the drop-ins
	+ if [ $rc -eq 0 ] && ocf_is_true "$OCF_RESKEY_drop_in_dependency"; then
	+ cid=$(podman_container_id)
	+ create_transient_drop_in_dependency "$cid"
	+ rc=$?
	+ fi
	+
	+ if [ $rc -ne 0 ]; then
	+ ocf_exit_reason "podman failed to launch container (rc: $rc)"
	+ return $OCF_ERR_GENERIC
	+ fi
	+
	+ # wait for monitor to pass before declaring that the container is started
	+ while true; do
	+ podman_simple_status
	+ if [ $? -ne $OCF_SUCCESS ]; then
	+ ocf_exit_reason "Newly created podman container exited after start"
	+ return $OCF_ERR_GENERIC
	+ fi
	+
	+ monitor_cmd_exec
	+ if [ $? -eq $OCF_SUCCESS ]; then
	+ ocf_log notice "Container $CONTAINER started successfully"
	+ is_force_new_cluster
	+ if [ $? -eq 0 ]; then
	+ clear_force_new_cluster
	+
	+ local peer_node_name=$(get_peer_node_name)
	+ local peer_node_ip=$(get_peer_ip)
	+
	+ if [ -n "$peer_node_name" -a -n "$peer_node_ip" ]; then
	+ add_member_as_learner $peer_node_name $peer_node_ip
	+ else
	+ ocf_log error "could not add peer as learner (peer node name: $peer_node_name, peer ip: $peer_node_ip)"
	+ fi
	+ fi
	+ return $OCF_SUCCESS
	+ fi
	+
	+ ocf_exit_reason "waiting on monitor_cmd to pass after start"
	+ sleep 1
	+ done
	+}
	+
	+podman_stop()
	+{
	+ local timeout=60
	+ local rc
	+ podman_simple_status
	+ if [ $? -eq $OCF_NOT_RUNNING ]; then
	+ #remove_container
	+ ocf_log info "could not leave members list: etcd container not running"
	+ return $OCF_SUCCESS
	+ fi
	+
	+ member_id=$(get_cached_member_id)
	+ if [ -z "$member_id" ]; then
	+ ocf_log err "error leaving members list: could not get cached member ID"
	+ else
	+ # TODO: is it worth/possible to check the current status instead than relying on cached attributes?
	+ if is_standalone; then
	+ ocf_log info "last member. Not leaving the member list"
	+ else
	+ ocf_log info "leaving members list as member with ID $member_id"
	+ endpoint="https://$(get_node_ip):2379"
	+ ocf_run podman exec $CONTAINER etcdctl member remove $member_id --endpoints=$endpoint
	+ rc=$?
	+ if [ $? -ne 0 ]; then
	+ ocf_log err "error leaving members list: error code $rc"
	+ fi
	+ fi
	+ fi
	+ clear_member_id
	+
	+ if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
	+ timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000) -10 ))
	+ if [ $timeout -lt 10 ]; then
	+ timeout=10
	+ fi
	+ fi
	+
	+ if ocf_is_true "$OCF_RESKEY_force_kill"; then
	+ ocf_run podman kill $CONTAINER
	+ rc=$?
	+ else
	+ ocf_log debug "waiting $timeout second[s] before killing container"
	+ ocf_run podman stop -t=$timeout $CONTAINER
	+ rc=$?
	+ # on stop, systemd will automatically delete any transient
	+ # drop-in conf that has been created earlier
	+ fi
	+
	+ if [ $rc -ne 0 ]; then
	+ # If the stop failed, it could be because the controlling conmon
	+ # process died unexpectedly. If so, a generic error code is returned
	+ # but the associated container exit code is -1. If that's the case,
	+ # assume there's no failure and continue with the rm as usual.
	+ if [ $rc -eq 125 ] && \
	+ podman inspect --format '{{.State.Status}}:{{.State.ExitCode}}' $CONTAINER \| grep -Eq '^(exited\|stopped):-1$'; then
	+ ocf_log err "Container ${CONTAINER} had an unexpected stop outcome. Trying to remove it anyway."
	+ else
	+ ocf_exit_reason "Failed to stop container, ${CONTAINER}, based on image, ${OCF_RESKEY_image}."
	+ return $OCF_ERR_GENERIC
	+ fi
	+ fi
	+
	+ remove_container
	+ if [ $? -ne 0 ]; then
	+ ocf_exit_reason "Failed to remove stopped container, ${CONTAINER}, based on image, ${OCF_RESKEY_image}."
	+ return $OCF_ERR_GENERIC
	+ fi
	+
	+ return $OCF_SUCCESS
	+}
	+
	+image_exists()
	+{
	+ podman image exists "${OCF_RESKEY_image}"
	+ if [ $? -eq 0 ]; then
	+ # image found
	+ return 0
	+ fi
	+
	+ if ocf_is_true "$OCF_RESKEY_allow_pull"; then
	+ REQUIRE_IMAGE_PULL=1
	+ ocf_log notice "Image (${OCF_RESKEY_image}) does not exist locally but will be pulled during start"
	+ return 0
	+ fi
	+ # image not found.
	+ return 1
	+}
	+
	+podman_validate()
	+{
	+ check_binary curl
	+ check_binary oc
	+ check_binary podman
	+
	+ if [ -z "$OCF_RESKEY_node_ip_map" ]; then
	+ ocf_exit_reason "'node_ip_map' option is required"
	+ exit $OCF_ERR_CONFIGURED
	+ fi
	+
	+ if [ -z "$OCF_RESKEY_image" ]; then
	+ ocf_exit_reason "'image' option is required"
	+ exit $OCF_ERR_CONFIGURED
	+ fi
	+
	+ image_exists
	+ if [ $? -ne 0 ]; then
	+ ocf_exit_reason "base image, ${OCF_RESKEY_image}, could not be found."
	+ exit $OCF_ERR_CONFIGURED
	+ fi
	+
	+ return $OCF_SUCCESS
	+}
	+
	+podman_notify()
	+{
	+ if [ "${OCF_RESKEY_CRM_meta_notify_type}" = 'post' ] ; then
	+ case "$OCF_RESKEY_CRM_meta_notify_operation" in
	+ start)
	+ ocf_log info "in start action"
	+ ocf_log info "active: ${OCF_RESKEY_CRM_meta_notify_active_uname}"
	+ ocf_log info "start: nodes:${OCF_RESKEY_CRM_meta_notify_start_uname}"
	+ ocf_log info "start: resources:${OCF_RESKEY_CRM_meta_notify_start_resource}"
	+ ocf_log info "stop: nodes:${OCF_RESKEY_CRM_meta_notify_stop_uname}"
	+ ocf_log info "stop: resources:${OCF_RESKEY_CRM_meta_notify_stop_resource}"
	+ ;;
	+ stop)
	+ ocf_log info "in stop action"
	+ ocf_log info "active: ${OCF_RESKEY_CRM_meta_notify_active_uname}"
	+ ocf_log info "start: nodes:${OCF_RESKEY_CRM_meta_notify_start_uname}"
	+ ocf_log info "start: resources:${OCF_RESKEY_CRM_meta_notify_start_resource}"
	+ ocf_log info "stop: nodes:${OCF_RESKEY_CRM_meta_notify_stop_uname}"
	+ ocf_log info "stop: resources:${OCF_RESKEY_CRM_meta_notify_stop_resource}"
	+ ;;
	+ esac
	+ fi
	+
	+}
	+
	+etcd_pod_container_exists() {
	+ local count_matches
	+ # Check whether the etcd pod exists on the same node (header line included)
	+ count_matches=$(crictl pods --label app=etcd -q \| xargs -I {} crictl ps --pod {} -o json \| jq -r '.containers[].metadata \| select ( .name == "etcd" ).name' \| wc -l)
	+ if [ "$count_matches" -eq 1 ]; then
	+ # etcd pod found
	+ return 0
	+ fi
	+ # etcd pod not found
	+ return 1
	+}
	+
	+cache_member_id_if_not_cached()
	+{
	+ cache_member_id
	+ member_id=$(get_cached_member_id)
	+ if [ -z "$member_id" ]; then
	+ ocf_log info "caching member ID"
	+ cache_member_id
	+ else
	+ ocf_log info "cached member id: $member_id"
	+ fi
	+}
	+
	+cache_member_id()
	+{
	+ local nodename
	+ container_exists
	+ if [ $? -ne 0 ]; then
	+ # we need a running container to execute etcdctl.
	+ return 0
	+ fi
	+
	+ # TODO: use get_member_list_json instead
	+ endpoint="https://$(get_node_ip):2379"
	+ ID_DEC=$(podman exec ${CONTAINER} etcdctl member list --endpoints=$endpoint -w json \| jq -r .header.member_id)
	+ rc=$?
	+ if [ $rc -ne 0 ]; then
	+ ocf_log error "could not get member ID: error code $?"
	+ return $rc
	+ fi
	+ if [ -z "$ID_DEC" ]; then
	+ # the container is running, but the instance does not have its member ID yet.
	+ return 0
	+ fi
	+
	+ # etcdctl member commands need the HEX format of the member ID.
	+ ID_HEX=$(printf "%x" $ID_DEC)
	+ nodename=$(ocf_local_nodename)
	+ ocf_log info "cache member ID $ID_HEX in node:$nodename"
	+ crm_attribute --type nodes --node $nodename --name "member_id" --update $ID_HEX
	+ rc=$?
	+ if [ $rc -ne 0 ]; then
	+ ocf_log error "could not cache member ID: error code $rc"
	+ fi
	+ return $rc
	+}
	+
	+clear_member_id()
	+{
	+ crm_attribute --type nodes --node $nodename --name "member_id" --delete
	+}
	+
	+get_cached_member_id()
	+{
	+ local nodename=$(ocf_local_nodename)
	+ ocf_log debug "get member ID from in node:$nodename"
	+ crm_attribute --query --type nodes --node $nodename --name "member_id" \| awk -F"value=" '{print $2}'
	+}
	+
	+add_member_as_learner()
	+{
	+ local rc
	+ local member_name=$1
	+ local member_ip=$2
	+
	+ ocf_log info "add $member_name with $member_ip as learner"
	+ out=$(podman exec ${CONTAINER} etcdctl --endpoints="https://$(get_node_ip):2379" member add $member_name --peer-urls=https://$member_ip:2380 --learner)
	+ rc=$?
	+ if [ $rc -ne 0 ]; then
	+ ocf_log error "could not add $member_name as learner: error code $rc"
	+ return $rc
	+ fi
	+ ocf_log info "$out"
	+
	+ set_learner_node $member_name
	+ return $?
	+}
	+
	+set_force_new_cluster()
	+{
	+ local rc
	+ local nodename=$(ocf_local_nodename)
	+ crm_attribute --name "force_new_cluster" --update "$nodename"
	+ rc=$?
	+ if [ $rc -ne 0 ]; then
	+ ocf_log error "could not set force_new_cluster attribute to $nodename"
	+ else
	+ ocf_log info "$nodename set force_new_cluster attribute"
	+ fi
	+ return $rc
	+}
	+
	+get_force_new_cluster()
	+{
	+ crm_attribute --query --name "force_new_cluster" \| awk -F"value=" '{print $2}'
	+}
	+
	+clear_force_new_cluster()
	+{
	+ local force_new_cluster_node=$(get_force_new_cluster)
	+ if [ -z "$force_new_cluster_node" ]; then
	+ ocf_log info "$this_node: force_new_cluster attribute not set"
	+ return $OCF_SUCCESS
	+ fi
	+
	+ # only the holder of "force_new_cluster" attribute can delete it
	+ local this_node=$(ocf_local_nodename)
	+ if [ "$this_node" = "$force_new_cluster_node" ]; then
	+ crm_attribute --name "force_new_cluster" --delete
	+ rc=$?
	+ if [ $rc -ne 0 ]; then
	+ ocf_log error "could not clear force_new_cluster attribute: error $rc"
	+ else
	+ ocf_log info "$this_node: force_new_cluster attribute cleared"
	+ fi
	+ return $rc
	+ else
	+ ocf_log info "$this_node does not hold force_new_cluster ($force_new_cluster_node has it)"
	+ return $OCF_SUCCESS
	+ fi
	+
	+}
	+
	+is_force_new_cluster()
	+{
	+ # Return 0 if 'force_new_cluster' is set and the value matches the current node name, 1 otherwise.
	+ local nodename=$(ocf_local_nodename)
	+ local value=$(get_force_new_cluster)
	+ if [ -z $value ]; then
	+ ocf_log info "force_new_cluster attribute is not set"
	+ return 1
	+ fi
	+
	+ if [ $value = $nodename ]; then
	+ ocf_log info "$nodename has force_new_cluster set"
	+ return 0
	+ fi
	+
	+ ocf_log info "force_new_cluster attribute set on peer node $value"
	+ return 1
	+}
	+
	+is_standalone()
	+{
	+ local nodename=${1:-$(ocf_local_nodename)}
	+ local standalone_node=$(get_standalone_node)
	+ if [ -z "$standalone_node" ]; then
	+ # TODO: change the log level to debug when the code is stable enough
	+ ocf_log info "no node running standalone"
	+ return 1
	+ fi
	+
	+ if [ "$nodename" = "$standalone_node" ]; then
	+ # TODO: change the log level to debug when the code is stable enough
	+ ocf_log info "$nodename is set as standalone"
	+ return 0
	+ fi
	+ # TODO: change the log level to debug when the code is stable enough
	+ ocf_log info "$nodename is set as learner"
	+ return 1
	+
	+}
	+
	+set_standalone_node()
	+{
	+ local rc
	+ local nodename=$(ocf_local_nodename)
	+
	+ ocf_log info "add $nodename as standalone"
	+ crm_attribute --name "standalone_node" --update $nodename
	+ rc=$?
	+ if [ $rc -ne 0 ]; then
	+ ocf_log error "could not set standalone_node attribute to $nodename"
	+ fi
	+ return $rc
	+}
	+
	+get_standalone_node()
	+{
	+ crm_attribute --query --name "standalone_node" \| awk -F"value=" '{print $2}'
	+}
	+
	+clear_standalone_node()
	+{
	+ if crm_attribute --name "standalone_node" --delete; then
	+ ocf_log info "standalone_node property cleared"
	+ fi
	+}
	+
	+clear_standalone_and_learner_if_not_learners()
	+{
	+ local rc
	+ local member_list_json=$1
	+
	+ standalone_node=$(get_standalone_node)
	+ if [ -z $standalone_node ]; then
	+ return $OCF_SUCCESS
	+ fi
	+
	+ number_of_members=$(echo -n $member_list_json \| jq -r ".members[].ID" \| wc -l)
	+ if [ $number_of_members -ne 2 ]; then
	+ ocf_log info "could not clear standalone_node, nor learner_node properties: found $number_of_members members, need 2"
	+ return $OCF_SUCCESS
	+ fi
	+
	+ id=$(echo -n $member_list_json \| jq -r ".members[] \| select( .isLearner==true ).ID")
	+ rc=$?
	+ if [ $rc -ne 0 ]; then
	+ ocf_log error "could not get isLearner field from member list: error code $rc"
	+ return $rc
	+ fi
	+
	+ if [ -z "$id" ]; then
	+ clear_standalone_node
	+ rc=$?
	+ if [ $rc -ne 0 ]; then
	+ ocf_og error "could not clear standalone_node attribute: error code $rc"
	+ return $rc
	+ fi
	+ fi
	+ if [ -z "$id" ]; then
	+ clear_learner_node
	+ rc=$?
	+ if [ $rc -ne 0 ]; then
	+ ocf_og error "could not clear learner_node attribute: error code $rc"
	+ return $rc
	+ fi
	+ fi
	+
	+ return $rc
	+}
	+
	+is_learner()
	+{
	+ # NOTE: do not use "learner_node" property for this.
	+ # Return 0 if the 'standalone' attribute is set and matches the peer node name, 1 otherwise.
	+ local standalone_node=$(get_standalone_node)
	+ if [ -z "$standalone_node" ]; then
	+ # TODO: change the log level to debug when the code is stable enough
	+ ocf_log info "standalone attribute not set"
	+ return 1
	+ fi
	+
	+ local nodename=$(ocf_local_nodename)
	+ if [ "$nodename" = "$standalone_node" ]; then
	+ # TODO: change the log level to debug when the code is stable enough
	+ ocf_log info "$nodename is not set as learner"
	+ return 1
	+ fi
	+ # TODO: change the log level to debug when the code is stable enough
	+ ocf_log info "$nodename is set as learner"
	+ return 0
	+}
	+
	+set_learner_node()
	+{
	+ local rc
	+ local nodename=$1
	+
	+ crm_attribute --name "learner_node" --update $nodename
	+ rc=$?
	+ if [ $rc -ne 0 ]; then
	+ ocf_log error "could not set learner_node attribute to $nodename"
	+ fi
	+ ocf_log info "$nodename set as learner"
	+ return $rc
	+}
	+
	+get_learner_node()
	+{
	+ crm_attribute --query --name "learner_node" \| awk -F"value=" '{print $2}'
	+}
	+
	+clear_learner_node()
	+{
	+ if crm_attribute --name "learner_node" --delete; then
	+ ocf_log info "learner_node property cleared"
	+ fi
	+}
	+
	+get_peer_node_name() {
	+ crm_node -l \| awk '{print $2}' \| grep -v $(ocf_local_nodename)
	+}
	+
	+get_peer_ip() {
	+ local peer_name=$(get_peer_node_name)
	+ crm_attribute --query --name "ip" --node $peer_name \| awk -F"value=" '{print $2}'
	+}
	+
	+get_all_etcd_endpoints() {
	+ for node in $(echo "$OCF_RESKEY_node_ip_map" \| sed "s/\s//g;s/;/ /g"); do
	+ name=$(echo "$node" \| awk -F":" '{print $1}')
	+ ip=$(echo "$node" \| awk -F":" '{print $2}')
	+ if [ -z "$name" ] \|\| [ -z "$ip" ]; then
	+ ocf_exit_reason "name or ip missing for 1 or more nodes"
	+ exit $OCF_ERR_CONFIGURED
	+ fi
	+
	+ [ -z "$ALL_ETCD_ENDPOINTS" ] && ALL_ETCD_ENDPOINTS="https://$ip:2379" \|\| ALL_ETCD_ENDPOINTS="$ALL_ETCD_ENDPOINTS,https://$ip:2379"
	+ done
	+ echo $ALL_ETCD_ENDPOINTS
	+}
	+
	+get_endpoint_status_json()
	+{
	+ # Get the status of all endpoints
	+ local all_etcd_endpoints=$(get_all_etcd_endpoints)
	+ podman exec ${CONTAINER} etcdctl endpoint status --endpoints=$all_etcd_endpoints -w json
	+}
	+
	+get_member_list_json() {
	+ # Get the list of members visible to the current node
	+ local this_node_endpoint="https://$(get_node_ip):2379"
	+ podman exec ${CONTAINER} etcdctl member list --endpoints=$this_node_endpoint -w json
	+}
	+
	+check_peers()
	+{
	+ # Check peers endpoint status and locally accessible member list
	+ local member_list_json
	+ local this_nodename=$(ocf_local_nodename)
	+
	+ container_exists
	+ if [ $? -ne 0 ]; then
	+ # we need a running container to execute etcdctl.
	+ return $OCF_SUCCESS
	+ fi
	+
	+ member_list_json=$(get_member_list_json)
	+ rc=$?
	+ ocf_log info "member list: $member_list_json"
	+ if [ $rc -ne 0 ]; then
	+ ocf_log info "podman failed to get member list: error code $rc"
	+
	+ endpoint_status_json=$(get_endpoint_status_json)
	+ ocf_log info "endpoint status: $endpoint_status_json"
	+
	+ count_endpoints=$(echo -n $endpoint_status_json \| jq -r ".[].Endpoint" \| wc -l)
	+ if [ $count_endpoints -eq 1 ]; then
	+ ocf_log info "one endpoint only: checking status errors"
	+ endpoint_status_errors=$(echo -n $endpoint_status_json \| jq -r ".[0].Status.errors")
	+ if echo "$endpoint_status_errors" \| grep -q "no leader"; then
	+ set_force_new_cluster
	+ set_standalone_node
	+ ocf_exit_reason "$this_nodename must force a new cluster"
	+ return $OCF_ERR_GENERIC
	+ fi
	+ if [ "$endpoint_status_errors" != "null" ]; then
	+ ocf_log error "unmanaged endpoint status error: $endpoint_status_errors"
	+ fi
	+ fi
	+
	+ return $OCF_SUCCESS
	+ fi
	+
	+ # Example of .members[] instance fields in member list json format:
	+ # NOTE that "name" is present in voting members only, while "isLearner" in learner members only
	+ # and the value is always true (not a string) in that case.
	+ # {
	+# "ID": <member ID>,
	+ # "name": "<node hostname>",
	+ # "peerURLs": [
	+ # "https://<node IP>:2380"
	+ # ],
	+ # "clientURLs": [
	+ # "https://<node IP>:2379"
	+ # ]
	+ # }
	+ for node in $(echo "$OCF_RESKEY_node_ip_map" \| sed "s/\s//g;s/;/ /g"); do
	+ name=$(echo "$node" \| awk -F":" '{print $1}')
	+ # do not check itself
	+ if [ "$name" == "$this_nodename" ]; then
	+ continue
	+ fi
	+
	+ # Check by IP instead of Name since "learner" members appear only in peerURLs, not by Name.
	+ ip=$(echo "$node" \| awk -F":" '{print $2}')
	+ id=$(echo -n $member_list_json \| jq -r ".members[] \| select( .peerURLs \| map(test(\"$ip\")) \| any).ID")
	+ if [ -z "$id" ]; then
	+ ocf_log info "$name is not in the members list"
	+ add_member_as_learner $name $ip
	+ set_standalone_node
	+ else
	+ ocf_log debug "$name is in the members list by IP:$ip"
	+ clear_standalone_and_learner_if_not_learners $member_list_json
	+ fi
	+ done
	+ return $OCF_SUCCESS
	+}
	+
	+cache_etcd_data() {
	+ local rc ip nodename revision cluster_id
	+
	+ container_exists
	+ if [ $? -ne 0 ]; then
	+ # we need a running container to execute etcdctl.
	+ return $OCF_SUCCESS
	+ fi
	+
	+ ip=$(get_node_ip)
	+ nodename=$(ocf_local_nodename)
	+
	+ revision=$(cat /var/lib/etcd/revision.json \| jq -r ".raftIndex.\"https://$ip:2379\"")
	+ ocf_log info "cache revision:$revision in node:$nodename"
	+ crm_attribute --type nodes --node $nodename --name "revision" --update $revision
	+ rc=$?
	+ if [ $rc -ne 0 ]; then
	+ ocf_log err "could not cache etcd revision: error code $rc"
	+ return $?
	+ fi
	+
	+ cluster_id=$(cat /var/lib/etcd/revision.json \| jq -r ".clusterId")
	+ ocf_log info "cache cluster_id:$cluster_id in node:$nodename"
	+ crm_attribute --type nodes --node $nodename --name "cluster_id" --update $cluster_id
	+ rc=$?
	+ if [ $rc -ne 0 ]; then
	+ ocf_log err "could not cache etcd cluster Id: error code $rc"
	+ return $?
	+ fi
	+
	+ return $OCF_SUCCESS
	+}
	+
	+# TODO :
	+# When a user starts plural clones in a node in globally-unique, a user cannot appoint plural name parameters.
	+# When a user appoints reuse, the resource agent cannot connect plural clones with a container.
	+
	+if ocf_is_true "$OCF_RESKEY_CRM_meta_globally_unique"; then
	+ if [ -n "$OCF_RESKEY_name" ]; then
	+ if [ -n "$OCF_RESKEY_CRM_meta_clone_node_max" ] && [ "$OCF_RESKEY_CRM_meta_clone_node_max" -ne 1 ]
	+ then
	+ ocf_exit_reason "Cannot make plural clones from the same name parameter."
	+ exit $OCF_ERR_CONFIGURED
	+ fi
	+ if [ -n "$OCF_RESKEY_CRM_meta_master_node_max" ] && [ "$OCF_RESKEY_CRM_meta_master_node_max" -ne 1 ]
	+ then
	+ ocf_exit_reason "Cannot make plural master from the same name parameter."
	+ exit $OCF_ERR_CONFIGURED
	+ fi
	+ fi
	+ : ${OCF_RESKEY_name=`echo ${OCF_RESOURCE_INSTANCE} \| tr ':' '-'`}
	+else
	+ : ${OCF_RESKEY_name=${OCF_RESOURCE_INSTANCE}}
	+fi
	+
	+CONTAINER=$OCF_RESKEY_name
	+
	+# Note: we currently monitor podman containers by with the "podman exec"
	+# command, so make sure that invocation is always valid by enforcing the
	+# exec command to be non-empty
	+: ${OCF_RESKEY_monitor_cmd:=/bin/true}
	+
	+# When OCF_RESKEY_drop_in_dependency is not populated, we
	+# look at another file-based way of enabling the option.
	+# Otherwise, consider it disabled.
	+if [ -z "$OCF_RESKEY_drop_in_dependency" ]; then
	+ if [ -f "/etc/sysconfig/podman_drop_in" ] \|\| \
	+ [ -f "/etc/default/podman_drop_in" ]; then
	+ OCF_RESKEY_drop_in_dependency=yes
	+ fi
	+fi
	+
	+case $__OCF_ACTION in
	+meta-data) meta_data
	+ exit $OCF_SUCCESS;;
	+start)
	+ podman_validate
	+ podman_start;;
	+stop) podman_stop;;
	+monitor) podman_monitor;;
	+promote) podman_promote;;
	+demote) podman_demote;;
	+validate-all) podman_validate;;
	+notify)
	+ podman_notify;;
	+usage\|help) podman_usage
	+ exit $OCF_SUCCESS
	+ ;;
	+*) podman_usage
	+ exit $OCF_ERR_UNIMPLEMENTED
	+ ;;
	+esac
	+rc=$?
	+ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
	+exit $rc

File Metadata

Mime Type: text/x-diff
Expires: Mon, Feb 24, 6:14 PM (3 h, 27 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 1463970
Default Alt Text: (43 KB)

No OneTemporaryActions

View Options

File Metadata

Event Timeline

No OneTemporary
Actions