diff --git a/heartbeat/podman-etcd b/heartbeat/podman-etcd new file mode 100755 index 000000000..9fb419a64 --- /dev/null +++ b/heartbeat/podman-etcd @@ -0,0 +1,1414 @@ +#!/bin/sh +# +# The podman etcd HA resource agent creates and launches a etcd podman +# container based off a supplied podman image. Containers managed by +# this agent are both created and removed upon the agent's start and +# stop actions. +# +# Based on the podman resource agent. +# +# Copyright (c) 2014 David Vossel +# Michele Baldessari +# All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +# + +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +# Parameter defaults + +OCF_RESKEY_image_default="quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:e213f49e8500d14652a0f4546fd43e11164cc9cc5d575a208634c268f150c3ea" +OCF_RESKEY_name_default="etcd" +OCF_RESKEY_nic_default="br-ex" +OCF_RESKEY_authfile_default="/run/resource-agents/pull-secret" +OCF_RESKEY_allow_pull_default="1" +OCF_RESKEY_reuse_default="0" + +: ${OCF_RESKEY_image=${OCF_RESKEY_image_default}} +: ${OCF_RESKEY_name=${OCF_RESKEY_name_default}} +: ${OCF_RESKEY_nic=${OCF_RESKEY_nic_default}} +: ${OCF_RESKEY_authfile=${OCF_RESKEY_authfile_default}} +: ${OCF_RESKEY_allow_pull=${OCF_RESKEY_allow_pull_default}} +: ${OCF_RESKEY_reuse=${OCF_RESKEY_reuse_default}} + +####################################################################### + +meta_data() +{ + cat < + + +1.0 + + +The podman-etcd HA resource agent creates and launches a etcd podman +container based off a supplied podman image. Containers managed by +this agent are both created and removed upon the agent's start and +stop actions. + +Podman etcd container resource agent. + + + + +The podman image to base this container off of. + +podman image + + + + + +The name to give the created container. By default this will +be that resource's instance name. + +podman container name + + + + + +A mapping of node names to IPs. + +This takes the form of: +n1:ip1;n2:ip2 + +where the etcd container on n1 would have IP ip1 + +Container node name to IP mapping + + + + + +A mapping of host to node names used for certificate file names. + +This takes the form of: +host1:alt1;host2:alt2 + +where the host1 would be alt1 for certificate files. + +Pacemaker to etcd name mapping + + + + + +Network interface to lookup interface for host. + +Network interface + + + + + +Path of the authentication file. + +The file is created by podman login. + +Path of the authentication file + + + + + +Allow the image to be pulled from the configured podman registry when +the image does not exist locally. NOTE, this can drastically increase +the time required to start the container if the image repository is +pulled over the network. + +Allow pulling non-local images + + + + + +Add options to be appended to the 'podman run' command which is used +when creating the container during the start action. This option allows +users to do things such as setting a custom entry point and injecting +environment variables into the newly created container. Note the '-d' +option is supplied regardless of this value to force containers to run +in the background. + +NOTE: Do not explicitly specify the --name argument in the run_opts. This +agent will set --name using either the resource's instance or the name +provided in the 'name' argument of this agent. + + +run options + + + + + +Specify a command to launch within the container once +it has initialized. + +run command + + + + + +Options to be added to the 'run_cmd'. + +run command options + + + + + +A comma separated list of directories that the container is expecting to use. +The agent will ensure they exist by running 'mkdir -p' + +Required mount points + + + + + +Specify the full path of a command to launch within the container to check +the health of the container. This command must return 0 to indicate that +the container is healthy. A non-zero return code will indicate that the +container has failed and should be recovered. + +Note: Using this method for monitoring processes inside a container +is not recommended, as containerd tries to track processes running +inside the container and does not deal well with many short-lived +processes being spawned. Ensure that your container monitors its +own processes and terminates on fatal error rather than invoking +a command from the outside. + +monitor command + + + + + +Kill a container immediately rather than waiting for it to gracefully +shutdown + +force kill + + + + + +Allow the container to be reused once it is stopped. By default, +containers get removed once they are stopped. Enable this option +to have the particular one persist when this happens. + +reuse container + + + + + +Use transient drop-in files to add extra dependencies to the systemd +scopes associated to the container. During reboot, this prevents systemd +to stop the container before pacemaker. + +drop-in dependency + + + + + + + + + + + + + + +END +} + +####################################################################### +REQUIRE_IMAGE_PULL=0 + +podman_usage() +{ + cat <&1) + rc=$? + # 125: no container with name or ID ${CONTAINER} found + # 126: container state improper (not running) + # 127: any other error + # 255: podman 2+: container not running + case "$rc" in + 125|126|255) + rc=$OCF_NOT_RUNNING + ;; + 0) + ocf_log debug "monitor cmd passed: exit code = $rc" + ;; + *) + ocf_exit_reason "monitor cmd failed (rc=$rc), output: $out" + rc=$OCF_ERR_GENERIC + ;; + esac + + return $rc +} + +container_exists() +{ + local rc + local out + + out=$(podman exec ${CONTAINER} $OCF_RESKEY_monitor_cmd 2>&1) + rc=$? + # 125: no container with name or ID ${CONTAINER} found + if [ $rc -ne 125 ]; then + return 0 + fi + return 1 +} + +remove_container() +{ + local rc + local execids + + if ocf_is_true "$OCF_RESKEY_reuse"; then + # never remove the container if we have reuse enabled. + return 0 + fi + + container_exists + if [ $? -ne 0 ]; then + # don't attempt to remove a container that doesn't exist + return 0 + fi + ocf_log notice "Cleaning up inactive container, ${CONTAINER}." + ocf_run podman rm -v $CONTAINER + rc=$? + if [ $rc -ne 0 ]; then + if [ $rc -eq 2 ]; then + if podman inspect --format '{{.State.Status}}' $CONTAINER | grep -wq "stopping"; then + ocf_log err "Inactive container ${CONTAINER} is stuck in 'stopping' state. Force-remove it." + ocf_run podman rm -f $CONTAINER + rc=$? + fi + fi + # due to a podman bug (rhbz#1841485), sometimes a stopped + # container can still be associated with Exec sessions, in + # which case the "podman rm" has to be forced + execids=$(podman inspect $CONTAINER --format '{{len .ExecIDs}}') + if [ "$execids" -ne "0" ]; then + ocf_log warn "Inactive container ${CONTAINER} has lingering exec sessions. Force-remove it." + ocf_run podman rm -f $CONTAINER + rc=$? + fi + fi + return $rc +} + +podman_simple_status() +{ + local rc + + # simple status is implemented via podman exec + # everything besides success is considered "not running" + monitor_cmd_exec + rc=$? + if [ $rc -ne $OCF_SUCCESS ]; then + rc=$OCF_NOT_RUNNING; + fi + return $rc +} + +podman_monitor() +{ + # We rely on running podman exec to monitor the container + # state because that command seems to be less prone to + # performance issue under IO load. + # + # For probes to work, we expect cmd_exec to be able to report + # when a container is not running. Here, we're not interested + # in distinguishing whether it's stopped or non existing + # (there's function container_exists for that) + monitor_cmd_exec + rc=$? + if [ $rc -ne 0 ]; then + return $rc + fi + + # Failing to cache data and check member list should not cause the + # monitor operation to fail. + # TODO: move this inside check_peers where we alredy query member list json + cache_member_id + check_peers + rc=$? + if [ $rc -ne $OCF_SUCCESS ]; then + return $OCF_ERR_GENERIC + fi + + # TODO: Etcd data comes from the disk, so if it is not available is a fatal failure + cache_etcd_data + return $? +} + +podman_create_mounts() { + oldIFS="$IFS" + IFS="," + for directory in $OCF_RESKEY_mount_points; do + mkdir -p "$directory" + done + IFS="$oldIFS" +} + +podman_container_id() +{ + # Retrieve the container ID by doing a "podman ps" rather than + # a "podman inspect", because the latter has performance issues + # under IO load. + # We could have run "podman start $CONTAINER" to get the ID back + # but if the container is stopped, the command will return a + # name instead of a container ID. This would break us. + podman ps --no-trunc --format '{{.ID}} {{.Names}}' | grep -F -w -m1 "$CONTAINER" | cut -d' ' -f1 +} + + +create_transient_drop_in_dependency() +{ + local cid=$1 + local rc=$OCF_SUCCESS + + if [ -z "$cid" ]; then + ocf_exit_reason "Container ID not found for \"$CONTAINER\". Not creating drop-in dependency" + return $OCF_ERR_GENERIC + fi + + ocf_log info "Creating drop-in dependency for \"$CONTAINER\" ($cid)" + for scope in "libpod-$cid.scope.d" "libpod-conmon-$cid.scope.d"; do + if [ $rc -eq $OCF_SUCCESS ] && [ ! -d /run/systemd/transient/"$scope" ]; then + mkdir -p /run/systemd/transient/"$scope" && \ + printf "[Unit]\nBefore=pacemaker.service" > /run/systemd/transient/"$scope"/dep.conf && \ + chmod ago+r /run/systemd/transient/"$scope" /run/systemd/transient/"$scope"/dep.conf + rc=$? + fi + done + + if [ $rc -ne $OCF_SUCCESS ]; then + ocf_log error "Could not create drop-in dependency for \"$CONTAINER\" ($cid)" + else + systemctl daemon-reload + rc=$? + if [ $rc -ne $OCF_SUCCESS ]; then + ocf_log error "Could not refresh service definition after creating drop-in for \"$CONTAINER\"" + fi + fi + + return $rc +} + + +run_new_container() +{ + local opts=$1 + local image=$2 + local cmd=$3 + local rc + + ocf_log info "running container $CONTAINER for the first time" + out=$(podman run $opts $image $cmd 2>&1) + rc=$? + + if [ -n "$out" ]; then + out="$(echo "$out" | tr -s ' \t\r\n' ' ')" + if [ $rc -eq 0 ]; then + ocf_log info "$out" + else + ocf_log err "$out" + fi + fi + + if [ $rc -eq 125 ]; then + # If an internal podman error occurred, it might be because + # the internal storage layer still references an old container + # with the same name, even though podman itself thinks there + # is no such container. If so, purge the storage layer to try + # to clean the corruption and try again. + if echo "$out" | grep -q "unknown.*flag"; then + ocf_exit_reason "$out" + return $rc + fi + + ocf_log warn "Internal podman error while creating new container $CONTAINER. Retrying." + ocf_run podman rm --storage $CONTAINER + ocf_run podman run $opts $image $cmd + rc=$? + elif [ $rc -eq 127 ]; then + # rhbz#1972209: podman 3.0.x seems to be hit by a race + # where the cgroup is not yet set up properly when the OCI + # runtime configures the container. If that happens, recreate + # the container as long as we get the same error code or + # until start timeout preempts us. + while [ $rc -eq 127 ] && (echo "$out" | grep -q "cgroup.*scope not found") ; do + ocf_log warn "Internal podman error while assigning cgroup. Retrying." + # Arbitrary sleep to prevent consuming all CPU while looping + sleep 1 + podman rm -f "$CONTAINER" + out=$(podman run $opts $image $cmd 2>&1) + rc=$? + done + # Log the created container ID if it succeeded + if [ $rc -eq 0 ]; then + ocf_log info "$out" + fi + fi + + return $rc +} + +get_hostname() +{ + # TODO: consider using ocf_local_nodename instead + local name=$(hostname) + if [ -z "$OCF_RESKEY_host_map" ]; then + echo $name + else + echo "$OCF_RESKEY_host_map" | tr ';' '\n' | tr -d ' ' | sed 's/:/ /' | awk -F' ' '$1=="'"$name"'" {print $2;exit}' + fi +} + +get_node_ip() { + echo "$(ip -brief addr show $OCF_RESKEY_nic | awk '{gsub("/.*", "", $3); print $3}')" +} + +prepare_env() { + local name ip standalone_node + local nodename=$(ocf_local_nodename) + + THIS_NODE_HOSTNAME="$(get_hostname)" + THIS_NODE_IP="$(get_node_ip)" + + if is_force_new_cluster; then + ETCD_INITIAL_CLUSTER_STATE="new" + ALL_ETCD_ENDPOINTS="https://$THIS_NODE_IP:2379" + + # TODO: double check if it is always hostname, or use ocf_local_nodename instead + ETCD_INITIAL_CLUSTER="$THIS_NODE_HOSTNAME=https://$THIS_NODE_IP:2380" + else + for node in $(echo "$OCF_RESKEY_node_ip_map" | sed "s/\s//g;s/;/ /g"); do + name=$(echo "$node" | awk -F":" '{print $1}') + ip=$(echo "$node" | awk -F":" '{print $2}') + if [ -z "$name" ] || [ -z "$ip" ]; then + ocf_exit_reason "name or ip missing for 1 or more nodes" + exit $OCF_ERR_CONFIGURED + fi + + crm_attribute --type nodes --node $name --name "ip" --update "$ip" + [ -z "$ALL_ETCD_ENDPOINTS" ] && ALL_ETCD_ENDPOINTS="https://$ip:2379" || ALL_ETCD_ENDPOINTS="$ALL_ETCD_ENDPOINTS,https://$ip:2379" + [ -z "$ETCD_INITIAL_CLUSTER" ] && ETCD_INITIAL_CLUSTER="$name=https://$ip:2380" || ETCD_INITIAL_CLUSTER="$ETCD_INITIAL_CLUSTER,$name=https://$ip:2380" + done + fi + + ETCDCTL_API="3" + ETCD_CIPHER_SUITES="TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256" + ETCD_DATA_DIR="/var/lib/etcd" + ETCD_ELECTION_TIMEOUT="1000" + ETCD_ENABLE_PPROF="true" + ETCD_EXPERIMENTAL_MAX_LEARNERS="3" + ETCD_EXPERIMENTAL_WARNING_APPLY_DURATION=200ms + ETCD_EXPERIMENTAL_WATCH_PROGRESS_NOTIFY_INTERVAL=5s + ETCD_HEARTBEAT_INTERVAL="100" + ETCD_INITIAL_CLUSTER_STATE=${ETCD_INITIAL_CLUSTER_STATE:-"existing"} + ETCD_QUOTA_BACKEND_BYTES="8589934592" + ETCD_SOCKET_REUSE_ADDRESS="true" + + SERVER_CACERT="/etc/kubernetes/static-pod-resources/etcd-certs/configmaps/etcd-all-bundles/server-ca-bundle.crt" + ETCD_PEER_CERT="/etc/kubernetes/static-pod-resources/etcd-certs/secrets/etcd-all-certs/etcd-peer-${THIS_NODE_HOSTNAME}.crt" + ETCD_PEER_KEY="/etc/kubernetes/static-pod-resources/etcd-certs/secrets/etcd-all-certs/etcd-peer-${THIS_NODE_HOSTNAME}.key" + + LISTEN_CLIENT_URLS="0.0.0.0" + LISTEN_PEER_URLS="0.0.0.0" + LISTEN_METRICS_URLS="0.0.0.0" + + if is_learner; then + LISTEN_CLIENT_URLS="$THIS_NODE_IP" + LISTEN_PEER_URLS="$THIS_NODE_IP" + LISTEN_METRICS_URLS="$THIS_NODE_IP" + fi +} + +archive_data_folder() +{ + # TODO: use etcd snapshots + local dest_dir_name="members-snapshot-$(date +%Y%M%d%H%M%S)" + local data_dir="/var/lib/etcd/member" + if [ ! -d $data_dir ]; then + ocf_log info "no data dir to backup" + return $OCF_SUCCESS + fi + ocf_log info "backing up $data_dir under $HA_RSCTMP/$dest_dir_name" + mv $data_dir $HA_RSCTMP/$dest_dir_name + sync +} + +podman_start() +{ + local cid + local rc + local etcd_pod_wait_timeout_sec=$((10 * 60)) + local etcd_pod_poll_interval_sec=10 + local etcd_pod_poll_retries=$((etcd_pod_wait_timeout_sec/etcd_pod_poll_interval_sec)) + + if is_standalone; then + if is_learner; then + ocf_exit_reason "$(ocf_local_nodename) set both as learner and standalone" + return $OCF_ERR_CONFIGURED + fi + fi + + # ensure the etcd pod is not running before starting the container + ocf_log info "Ensure etcd pod is not running (retries:$etcd_pod_poll_retries, interval:$etcd_pod_poll_interval_sec)" + for try in $(seq $etcd_pod_poll_retries); do + etcd_pod_container_exists + if [ $? -eq 0 ]; then + ocf_log info "Etcd pod running: retry in $etcd_pod_poll_interval_sec seconds." + sleep $etcd_pod_poll_interval_sec + fi + done + etcd_pod_container_exists + if [ $? -eq 0 ]; then + ocf_exit_reason "Etcd pod is still running after $etcd_pod_wait_timeout_sec seconds." + return $OCF_ERR_GENERIC + fi + + podman_create_mounts + local run_opts="-d --name=${CONTAINER}" + # check to see if the container has already started + podman_simple_status + if [ $? -eq $OCF_SUCCESS ]; then + return $OCF_SUCCESS + fi + + local peer_node=$(get_peer_node_name) + if is_standalone $peer_node; then + local nodename=$(ocf_local_nodename) + local wait_timeout_sec=$((2*60)) + local poll_interval_sec=5 + local retries=$(( wait_timeout_sec / poll_interval_sec )) + + ocf_log info "wait for the leader node to add $nodename as learner (timeout: $wait_timeout_sec seconds)" + for try in $(seq $retries); do + learner_node=$(get_learner_node) + if [ "$nodename" != "$learner_node" ]; then + ocf_log info "$learner_node is not in the member list yet. Retry in $poll_interval_sec seconds." + sleep $poll_interval_sec + continue + fi + ocf_log info "learner node $learner_node in the member list" + break + done + if [ "$nodename" != "$(get_learner_node)" ]; then + ocf_log error "wait for $nodename to be in the member list timed out" + return $OCF_ERR_GENERIC + fi + fi + + prepare_env + + # add etcd-specific opts + run_opts="$run_opts \ + --network=host \ + -v /etc/kubernetes:/etc/kubernetes \ + -v /var/lib/etcd:/var/lib/etcd \ + --env ALL_ETCD_ENDPOINTS=$ALL_ETCD_ENDPOINTS \ + --env ETCD_CIPHER_SUITES=$ETCD_CIPHER_SUITES \ + --env ETCD_DATA_DIR=$ETCD_DATA_DIR \ + --env ETCD_ELECTION_TIMEOUT=$ETCD_ELECTION_TIMEOUT \ + --env ETCD_ENABLE_PPROF=$ETCD_ENABLE_PPROF \ + --env ETCD_EXPERIMENTAL_MAX_LEARNERS=$ETCD_EXPERIMENTAL_MAX_LEARNERS \ + --env ETCD_EXPERIMENTAL_WARNING_APPLY_DURATION=$ETCD_EXPERIMENTAL_WARNING_APPLY_DURATION \ + --env ETCD_EXPERIMENTAL_WATCH_PROGRESS_NOTIFY_INTERVAL=$ETCD_EXPERIMENTAL_WATCH_PROGRESS_NOTIFY_INTERVAL \ + --env ETCD_HEARTBEAT_INTERVAL=$ETCD_HEARTBEAT_INTERVAL \ + --env ETCD_INITIAL_CLUSTER=$ETCD_INITIAL_CLUSTER \ + --env ETCD_INITIAL_CLUSTER_STATE=$ETCD_INITIAL_CLUSTER_STATE \ + --env ETCD_NAME=$THIS_NODE_HOSTNAME \ + --env ETCD_QUOTA_BACKEND_BYTES=$ETCD_QUOTA_BACKEND_BYTES \ + --env ETCD_SOCKET_REUSE_ADDRESS=$ETCD_SOCKET_REUSE_ADDRESS \ + --env ETCDCTL_API=$ETCDCTL_API \ + --env ETCDCTL_CACERT=$SERVER_CACERT \ + --env ETCDCTL_CERT=$ETCD_PEER_CERT \ + --env ETCDCTL_KEY=$ETCD_PEER_KEY \ + --authfile=$OCF_RESKEY_authfile \ + --security-opt label=disable" + if [ -n "$OCF_RESKEY_run_opts" ]; then + run_opts="$run_opts $OCF_RESKEY_run_opts" + fi + + OCF_RESKEY_run_cmd="$OCF_RESKEY_run_cmd --logger=zap \ + --log-level=info \ + --experimental-initial-corrupt-check=true \ + --snapshot-count=10000 \ + --initial-advertise-peer-urls=https://${THIS_NODE_IP}:2380 \ + --cert-file=/etc/kubernetes/static-pod-resources/etcd-certs/secrets/etcd-all-certs/etcd-serving-${THIS_NODE_HOSTNAME}.crt \ + --key-file=/etc/kubernetes/static-pod-resources/etcd-certs/secrets/etcd-all-certs/etcd-serving-${THIS_NODE_HOSTNAME}.key \ + --trusted-ca-file=$SERVER_CACERT \ + --client-cert-auth=true \ + --peer-cert-file=$ETCD_PEER_CERT \ + --peer-key-file=$ETCD_PEER_KEY \ + --peer-trusted-ca-file=$SERVER_CACERT \ + --peer-client-cert-auth=true \ + --advertise-client-urls=https://${THIS_NODE_IP}:2379 \ + --listen-client-urls=https://${LISTEN_CLIENT_URLS}:2379,unixs://${THIS_NODE_IP}:0 \ + --listen-peer-urls=https://${LISTEN_PEER_URLS}:2380 \ + --metrics=extensive \ + --listen-metrics-urls=https://${LISTEN_METRICS_URLS}:9978" + if [ -n "$OCF_RESKEY_run_cmd_opts" ]; then + OCF_RESKEY_run_cmd="$OCF_RESKEY_run_cmd $OCF_RESKEY_run_cmd_opts" + fi + + if is_force_new_cluster; then + OCF_RESKEY_run_cmd="$OCF_RESKEY_run_cmd --force-new-cluster" + fi + + if is_learner; then + archive_data_folder + fi + + if [ $REQUIRE_IMAGE_PULL -eq 1 ]; then + ocf_log notice "Beginning pull of image, ${OCF_RESKEY_image}" + podman pull --authfile=$OCF_RESKEY_authfile "${OCF_RESKEY_image}" + if [ $? -ne 0 ]; then + ocf_exit_reason "failed to pull image ${OCF_RESKEY_image}" + return $OCF_ERR_GENERIC + fi + else + ocf_log notice "Pull image not required, ${OCF_RESKEY_image}" + fi + + if ocf_is_true "$OCF_RESKEY_reuse" && container_exists; then + ocf_log info "starting existing container $CONTAINER." + ocf_run podman start $CONTAINER + else + # make sure any previous container matching our container name is cleaned up first. + # we already know at this point it wouldn't be running + remove_container + run_new_container "$run_opts" $OCF_RESKEY_image "$OCF_RESKEY_run_cmd" + if [ $? -eq 125 ]; then + return $OCF_ERR_GENERIC + fi + fi + rc=$? + + # if the container was stopped or didn't exist before, systemd + # removed the libpod* scopes. So always try to recreate the drop-ins + if [ $rc -eq 0 ] && ocf_is_true "$OCF_RESKEY_drop_in_dependency"; then + cid=$(podman_container_id) + create_transient_drop_in_dependency "$cid" + rc=$? + fi + + if [ $rc -ne 0 ]; then + ocf_exit_reason "podman failed to launch container (rc: $rc)" + return $OCF_ERR_GENERIC + fi + + # wait for monitor to pass before declaring that the container is started + while true; do + podman_simple_status + if [ $? -ne $OCF_SUCCESS ]; then + ocf_exit_reason "Newly created podman container exited after start" + return $OCF_ERR_GENERIC + fi + + monitor_cmd_exec + if [ $? -eq $OCF_SUCCESS ]; then + ocf_log notice "Container $CONTAINER started successfully" + is_force_new_cluster + if [ $? -eq 0 ]; then + clear_force_new_cluster + + local peer_node_name=$(get_peer_node_name) + local peer_node_ip=$(get_peer_ip) + + if [ -n "$peer_node_name" -a -n "$peer_node_ip" ]; then + add_member_as_learner $peer_node_name $peer_node_ip + else + ocf_log error "could not add peer as learner (peer node name: $peer_node_name, peer ip: $peer_node_ip)" + fi + fi + return $OCF_SUCCESS + fi + + ocf_exit_reason "waiting on monitor_cmd to pass after start" + sleep 1 + done +} + +podman_stop() +{ + local timeout=60 + local rc + podman_simple_status + if [ $? -eq $OCF_NOT_RUNNING ]; then + #remove_container + ocf_log info "could not leave members list: etcd container not running" + return $OCF_SUCCESS + fi + + member_id=$(get_cached_member_id) + if [ -z "$member_id" ]; then + ocf_log err "error leaving members list: could not get cached member ID" + else + # TODO: is it worth/possible to check the current status instead than relying on cached attributes? + if is_standalone; then + ocf_log info "last member. Not leaving the member list" + else + ocf_log info "leaving members list as member with ID $member_id" + endpoint="https://$(get_node_ip):2379" + ocf_run podman exec $CONTAINER etcdctl member remove $member_id --endpoints=$endpoint + rc=$? + if [ $? -ne 0 ]; then + ocf_log err "error leaving members list: error code $rc" + fi + fi + fi + clear_member_id + + if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then + timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000) -10 )) + if [ $timeout -lt 10 ]; then + timeout=10 + fi + fi + + if ocf_is_true "$OCF_RESKEY_force_kill"; then + ocf_run podman kill $CONTAINER + rc=$? + else + ocf_log debug "waiting $timeout second[s] before killing container" + ocf_run podman stop -t=$timeout $CONTAINER + rc=$? + # on stop, systemd will automatically delete any transient + # drop-in conf that has been created earlier + fi + + if [ $rc -ne 0 ]; then + # If the stop failed, it could be because the controlling conmon + # process died unexpectedly. If so, a generic error code is returned + # but the associated container exit code is -1. If that's the case, + # assume there's no failure and continue with the rm as usual. + if [ $rc -eq 125 ] && \ + podman inspect --format '{{.State.Status}}:{{.State.ExitCode}}' $CONTAINER | grep -Eq '^(exited|stopped):-1$'; then + ocf_log err "Container ${CONTAINER} had an unexpected stop outcome. Trying to remove it anyway." + else + ocf_exit_reason "Failed to stop container, ${CONTAINER}, based on image, ${OCF_RESKEY_image}." + return $OCF_ERR_GENERIC + fi + fi + + remove_container + if [ $? -ne 0 ]; then + ocf_exit_reason "Failed to remove stopped container, ${CONTAINER}, based on image, ${OCF_RESKEY_image}." + return $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +image_exists() +{ + podman image exists "${OCF_RESKEY_image}" + if [ $? -eq 0 ]; then + # image found + return 0 + fi + + if ocf_is_true "$OCF_RESKEY_allow_pull"; then + REQUIRE_IMAGE_PULL=1 + ocf_log notice "Image (${OCF_RESKEY_image}) does not exist locally but will be pulled during start" + return 0 + fi + # image not found. + return 1 +} + +podman_validate() +{ + check_binary curl + check_binary oc + check_binary podman + + if [ -z "$OCF_RESKEY_node_ip_map" ]; then + ocf_exit_reason "'node_ip_map' option is required" + exit $OCF_ERR_CONFIGURED + fi + + if [ -z "$OCF_RESKEY_image" ]; then + ocf_exit_reason "'image' option is required" + exit $OCF_ERR_CONFIGURED + fi + + image_exists + if [ $? -ne 0 ]; then + ocf_exit_reason "base image, ${OCF_RESKEY_image}, could not be found." + exit $OCF_ERR_CONFIGURED + fi + + return $OCF_SUCCESS +} + +podman_notify() +{ + if [ "${OCF_RESKEY_CRM_meta_notify_type}" = 'post' ] ; then + case "$OCF_RESKEY_CRM_meta_notify_operation" in + start) + ocf_log info "in start action" + ocf_log info "active: ${OCF_RESKEY_CRM_meta_notify_active_uname}" + ocf_log info "start: nodes:${OCF_RESKEY_CRM_meta_notify_start_uname}" + ocf_log info "start: resources:${OCF_RESKEY_CRM_meta_notify_start_resource}" + ocf_log info "stop: nodes:${OCF_RESKEY_CRM_meta_notify_stop_uname}" + ocf_log info "stop: resources:${OCF_RESKEY_CRM_meta_notify_stop_resource}" + ;; + stop) + ocf_log info "in stop action" + ocf_log info "active: ${OCF_RESKEY_CRM_meta_notify_active_uname}" + ocf_log info "start: nodes:${OCF_RESKEY_CRM_meta_notify_start_uname}" + ocf_log info "start: resources:${OCF_RESKEY_CRM_meta_notify_start_resource}" + ocf_log info "stop: nodes:${OCF_RESKEY_CRM_meta_notify_stop_uname}" + ocf_log info "stop: resources:${OCF_RESKEY_CRM_meta_notify_stop_resource}" + ;; + esac + fi + +} + +etcd_pod_container_exists() { + local count_matches + # Check whether the etcd pod exists on the same node (header line included) + count_matches=$(crictl pods --label app=etcd -q | xargs -I {} crictl ps --pod {} -o json | jq -r '.containers[].metadata | select ( .name == "etcd" ).name' | wc -l) + if [ "$count_matches" -eq 1 ]; then + # etcd pod found + return 0 + fi + # etcd pod not found + return 1 +} + +cache_member_id_if_not_cached() +{ + cache_member_id + member_id=$(get_cached_member_id) + if [ -z "$member_id" ]; then + ocf_log info "caching member ID" + cache_member_id + else + ocf_log info "cached member id: $member_id" + fi +} + +cache_member_id() +{ + local nodename + container_exists + if [ $? -ne 0 ]; then + # we need a running container to execute etcdctl. + return 0 + fi + + # TODO: use get_member_list_json instead + endpoint="https://$(get_node_ip):2379" + ID_DEC=$(podman exec ${CONTAINER} etcdctl member list --endpoints=$endpoint -w json | jq -r .header.member_id) + rc=$? + if [ $rc -ne 0 ]; then + ocf_log error "could not get member ID: error code $?" + return $rc + fi + if [ -z "$ID_DEC" ]; then + # the container is running, but the instance does not have its member ID yet. + return 0 + fi + + # etcdctl member commands need the HEX format of the member ID. + ID_HEX=$(printf "%x" $ID_DEC) + nodename=$(ocf_local_nodename) + ocf_log info "cache member ID $ID_HEX in node:$nodename" + crm_attribute --type nodes --node $nodename --name "member_id" --update $ID_HEX + rc=$? + if [ $rc -ne 0 ]; then + ocf_log error "could not cache member ID: error code $rc" + fi + return $rc +} + +clear_member_id() +{ + crm_attribute --type nodes --node $nodename --name "member_id" --delete +} + +get_cached_member_id() +{ + local nodename=$(ocf_local_nodename) + ocf_log debug "get member ID from in node:$nodename" + crm_attribute --query --type nodes --node $nodename --name "member_id" | awk -F"value=" '{print $2}' +} + +add_member_as_learner() +{ + local rc + local member_name=$1 + local member_ip=$2 + + ocf_log info "add $member_name with $member_ip as learner" + out=$(podman exec ${CONTAINER} etcdctl --endpoints="https://$(get_node_ip):2379" member add $member_name --peer-urls=https://$member_ip:2380 --learner) + rc=$? + if [ $rc -ne 0 ]; then + ocf_log error "could not add $member_name as learner: error code $rc" + return $rc + fi + ocf_log info "$out" + + set_learner_node $member_name + return $? +} + +set_force_new_cluster() +{ + local rc + local nodename=$(ocf_local_nodename) + crm_attribute --name "force_new_cluster" --update "$nodename" + rc=$? + if [ $rc -ne 0 ]; then + ocf_log error "could not set force_new_cluster attribute to $nodename" + else + ocf_log info "$nodename set force_new_cluster attribute" + fi + return $rc +} + +get_force_new_cluster() +{ + crm_attribute --query --name "force_new_cluster" | awk -F"value=" '{print $2}' +} + +clear_force_new_cluster() +{ + local force_new_cluster_node=$(get_force_new_cluster) + if [ -z "$force_new_cluster_node" ]; then + ocf_log info "$this_node: force_new_cluster attribute not set" + return $OCF_SUCCESS + fi + + # only the holder of "force_new_cluster" attribute can delete it + local this_node=$(ocf_local_nodename) + if [ "$this_node" = "$force_new_cluster_node" ]; then + crm_attribute --name "force_new_cluster" --delete + rc=$? + if [ $rc -ne 0 ]; then + ocf_log error "could not clear force_new_cluster attribute: error $rc" + else + ocf_log info "$this_node: force_new_cluster attribute cleared" + fi + return $rc + else + ocf_log info "$this_node does not hold force_new_cluster ($force_new_cluster_node has it)" + return $OCF_SUCCESS + fi + +} + +is_force_new_cluster() +{ + # Return 0 if 'force_new_cluster' is set and the value matches the current node name, 1 otherwise. + local nodename=$(ocf_local_nodename) + local value=$(get_force_new_cluster) + if [ -z $value ]; then + ocf_log info "force_new_cluster attribute is not set" + return 1 + fi + + if [ $value = $nodename ]; then + ocf_log info "$nodename has force_new_cluster set" + return 0 + fi + + ocf_log info "force_new_cluster attribute set on peer node $value" + return 1 +} + +is_standalone() +{ + local nodename=${1:-$(ocf_local_nodename)} + local standalone_node=$(get_standalone_node) + if [ -z "$standalone_node" ]; then + # TODO: change the log level to debug when the code is stable enough + ocf_log info "no node running standalone" + return 1 + fi + + if [ "$nodename" = "$standalone_node" ]; then + # TODO: change the log level to debug when the code is stable enough + ocf_log info "$nodename is set as standalone" + return 0 + fi + # TODO: change the log level to debug when the code is stable enough + ocf_log info "$nodename is set as learner" + return 1 + +} + +set_standalone_node() +{ + local rc + local nodename=$(ocf_local_nodename) + + ocf_log info "add $nodename as standalone" + crm_attribute --name "standalone_node" --update $nodename + rc=$? + if [ $rc -ne 0 ]; then + ocf_log error "could not set standalone_node attribute to $nodename" + fi + return $rc +} + +get_standalone_node() +{ + crm_attribute --query --name "standalone_node" | awk -F"value=" '{print $2}' +} + +clear_standalone_node() +{ + if crm_attribute --name "standalone_node" --delete; then + ocf_log info "standalone_node property cleared" + fi +} + +clear_standalone_and_learner_if_not_learners() +{ + local rc + local member_list_json=$1 + + standalone_node=$(get_standalone_node) + if [ -z $standalone_node ]; then + return $OCF_SUCCESS + fi + + number_of_members=$(echo -n $member_list_json | jq -r ".members[].ID" | wc -l) + if [ $number_of_members -ne 2 ]; then + ocf_log info "could not clear standalone_node, nor learner_node properties: found $number_of_members members, need 2" + return $OCF_SUCCESS + fi + + id=$(echo -n $member_list_json | jq -r ".members[] | select( .isLearner==true ).ID") + rc=$? + if [ $rc -ne 0 ]; then + ocf_log error "could not get isLearner field from member list: error code $rc" + return $rc + fi + + if [ -z "$id" ]; then + clear_standalone_node + rc=$? + if [ $rc -ne 0 ]; then + ocf_og error "could not clear standalone_node attribute: error code $rc" + return $rc + fi + fi + if [ -z "$id" ]; then + clear_learner_node + rc=$? + if [ $rc -ne 0 ]; then + ocf_og error "could not clear learner_node attribute: error code $rc" + return $rc + fi + fi + + return $rc +} + +is_learner() +{ + # NOTE: do not use "learner_node" property for this. + # Return 0 if the 'standalone' attribute is set and matches the peer node name, 1 otherwise. + local standalone_node=$(get_standalone_node) + if [ -z "$standalone_node" ]; then + # TODO: change the log level to debug when the code is stable enough + ocf_log info "standalone attribute not set" + return 1 + fi + + local nodename=$(ocf_local_nodename) + if [ "$nodename" = "$standalone_node" ]; then + # TODO: change the log level to debug when the code is stable enough + ocf_log info "$nodename is not set as learner" + return 1 + fi + # TODO: change the log level to debug when the code is stable enough + ocf_log info "$nodename is set as learner" + return 0 +} + +set_learner_node() +{ + local rc + local nodename=$1 + + crm_attribute --name "learner_node" --update $nodename + rc=$? + if [ $rc -ne 0 ]; then + ocf_log error "could not set learner_node attribute to $nodename" + fi + ocf_log info "$nodename set as learner" + return $rc +} + +get_learner_node() +{ + crm_attribute --query --name "learner_node" | awk -F"value=" '{print $2}' +} + +clear_learner_node() +{ + if crm_attribute --name "learner_node" --delete; then + ocf_log info "learner_node property cleared" + fi +} + +get_peer_node_name() { + crm_node -l | awk '{print $2}' | grep -v $(ocf_local_nodename) +} + +get_peer_ip() { + local peer_name=$(get_peer_node_name) + crm_attribute --query --name "ip" --node $peer_name | awk -F"value=" '{print $2}' +} + +get_all_etcd_endpoints() { + for node in $(echo "$OCF_RESKEY_node_ip_map" | sed "s/\s//g;s/;/ /g"); do + name=$(echo "$node" | awk -F":" '{print $1}') + ip=$(echo "$node" | awk -F":" '{print $2}') + if [ -z "$name" ] || [ -z "$ip" ]; then + ocf_exit_reason "name or ip missing for 1 or more nodes" + exit $OCF_ERR_CONFIGURED + fi + + [ -z "$ALL_ETCD_ENDPOINTS" ] && ALL_ETCD_ENDPOINTS="https://$ip:2379" || ALL_ETCD_ENDPOINTS="$ALL_ETCD_ENDPOINTS,https://$ip:2379" + done + echo $ALL_ETCD_ENDPOINTS +} + +get_endpoint_status_json() +{ + # Get the status of all endpoints + local all_etcd_endpoints=$(get_all_etcd_endpoints) + podman exec ${CONTAINER} etcdctl endpoint status --endpoints=$all_etcd_endpoints -w json +} + +get_member_list_json() { + # Get the list of members visible to the current node + local this_node_endpoint="https://$(get_node_ip):2379" + podman exec ${CONTAINER} etcdctl member list --endpoints=$this_node_endpoint -w json +} + +check_peers() +{ + # Check peers endpoint status and locally accessible member list + local member_list_json + local this_nodename=$(ocf_local_nodename) + + container_exists + if [ $? -ne 0 ]; then + # we need a running container to execute etcdctl. + return $OCF_SUCCESS + fi + + member_list_json=$(get_member_list_json) + rc=$? + ocf_log info "member list: $member_list_json" + if [ $rc -ne 0 ]; then + ocf_log info "podman failed to get member list: error code $rc" + + endpoint_status_json=$(get_endpoint_status_json) + ocf_log info "endpoint status: $endpoint_status_json" + + count_endpoints=$(echo -n $endpoint_status_json | jq -r ".[].Endpoint" | wc -l) + if [ $count_endpoints -eq 1 ]; then + ocf_log info "one endpoint only: checking status errors" + endpoint_status_errors=$(echo -n $endpoint_status_json | jq -r ".[0].Status.errors") + if echo "$endpoint_status_errors" | grep -q "no leader"; then + set_force_new_cluster + set_standalone_node + ocf_exit_reason "$this_nodename must force a new cluster" + return $OCF_ERR_GENERIC + fi + if [ "$endpoint_status_errors" != "null" ]; then + ocf_log error "unmanaged endpoint status error: $endpoint_status_errors" + fi + fi + + return $OCF_SUCCESS + fi + + # Example of .members[] instance fields in member list json format: + # NOTE that "name" is present in voting members only, while "isLearner" in learner members only + # and the value is always true (not a string) in that case. + # { +# "ID": , + # "name": "", + # "peerURLs": [ + # "https://:2380" + # ], + # "clientURLs": [ + # "https://:2379" + # ] + # } + for node in $(echo "$OCF_RESKEY_node_ip_map" | sed "s/\s//g;s/;/ /g"); do + name=$(echo "$node" | awk -F":" '{print $1}') + # do not check itself + if [ "$name" == "$this_nodename" ]; then + continue + fi + + # Check by IP instead of Name since "learner" members appear only in peerURLs, not by Name. + ip=$(echo "$node" | awk -F":" '{print $2}') + id=$(echo -n $member_list_json | jq -r ".members[] | select( .peerURLs | map(test(\"$ip\")) | any).ID") + if [ -z "$id" ]; then + ocf_log info "$name is not in the members list" + add_member_as_learner $name $ip + set_standalone_node + else + ocf_log debug "$name is in the members list by IP:$ip" + clear_standalone_and_learner_if_not_learners $member_list_json + fi + done + return $OCF_SUCCESS +} + +cache_etcd_data() { + local rc ip nodename revision cluster_id + + container_exists + if [ $? -ne 0 ]; then + # we need a running container to execute etcdctl. + return $OCF_SUCCESS + fi + + ip=$(get_node_ip) + nodename=$(ocf_local_nodename) + + revision=$(cat /var/lib/etcd/revision.json | jq -r ".raftIndex.\"https://$ip:2379\"") + ocf_log info "cache revision:$revision in node:$nodename" + crm_attribute --type nodes --node $nodename --name "revision" --update $revision + rc=$? + if [ $rc -ne 0 ]; then + ocf_log err "could not cache etcd revision: error code $rc" + return $? + fi + + cluster_id=$(cat /var/lib/etcd/revision.json | jq -r ".clusterId") + ocf_log info "cache cluster_id:$cluster_id in node:$nodename" + crm_attribute --type nodes --node $nodename --name "cluster_id" --update $cluster_id + rc=$? + if [ $rc -ne 0 ]; then + ocf_log err "could not cache etcd cluster Id: error code $rc" + return $? + fi + + return $OCF_SUCCESS +} + +# TODO : +# When a user starts plural clones in a node in globally-unique, a user cannot appoint plural name parameters. +# When a user appoints reuse, the resource agent cannot connect plural clones with a container. + +if ocf_is_true "$OCF_RESKEY_CRM_meta_globally_unique"; then + if [ -n "$OCF_RESKEY_name" ]; then + if [ -n "$OCF_RESKEY_CRM_meta_clone_node_max" ] && [ "$OCF_RESKEY_CRM_meta_clone_node_max" -ne 1 ] + then + ocf_exit_reason "Cannot make plural clones from the same name parameter." + exit $OCF_ERR_CONFIGURED + fi + if [ -n "$OCF_RESKEY_CRM_meta_master_node_max" ] && [ "$OCF_RESKEY_CRM_meta_master_node_max" -ne 1 ] + then + ocf_exit_reason "Cannot make plural master from the same name parameter." + exit $OCF_ERR_CONFIGURED + fi + fi + : ${OCF_RESKEY_name=`echo ${OCF_RESOURCE_INSTANCE} | tr ':' '-'`} +else + : ${OCF_RESKEY_name=${OCF_RESOURCE_INSTANCE}} +fi + +CONTAINER=$OCF_RESKEY_name + +# Note: we currently monitor podman containers by with the "podman exec" +# command, so make sure that invocation is always valid by enforcing the +# exec command to be non-empty +: ${OCF_RESKEY_monitor_cmd:=/bin/true} + +# When OCF_RESKEY_drop_in_dependency is not populated, we +# look at another file-based way of enabling the option. +# Otherwise, consider it disabled. +if [ -z "$OCF_RESKEY_drop_in_dependency" ]; then + if [ -f "/etc/sysconfig/podman_drop_in" ] || \ + [ -f "/etc/default/podman_drop_in" ]; then + OCF_RESKEY_drop_in_dependency=yes + fi +fi + +case $__OCF_ACTION in +meta-data) meta_data + exit $OCF_SUCCESS;; +start) + podman_validate + podman_start;; +stop) podman_stop;; +monitor) podman_monitor;; +promote) podman_promote;; +demote) podman_demote;; +validate-all) podman_validate;; +notify) + podman_notify;; +usage|help) podman_usage + exit $OCF_SUCCESS + ;; +*) podman_usage + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac +rc=$? +ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" +exit $rc