diff --git a/GNUmakefile b/GNUmakefile index b17fb4cb90..f28dea8839 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -1,333 +1,335 @@ # # Copyright (C) 2008 Andrew Beekhof # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # default: $(shell test ! -e configure && echo init) $(shell test -e configure && echo core) -include Makefile PACKAGE ?= pacemaker # Force 'make dist' to be consistent with 'make export' distdir = $(PACKAGE)-$(TAG) TARFILE = $(distdir).tar.gz DIST_ARCHIVES = $(TARFILE) RPM_ROOT = $(shell pwd) RPM_OPTS = --define "_sourcedir $(RPM_ROOT)" \ --define "_specdir $(RPM_ROOT)" \ --define "_srcrpmdir $(RPM_ROOT)" \ MOCK_OPTIONS ?= --resultdir=$(RPM_ROOT)/mock --no-cleanup-after # Default to fedora compliant spec files # SLES: /etc/SuSE-release # openSUSE: /etc/SuSE-release # RHEL: /etc/redhat-release # Fedora: /etc/fedora-release, /etc/redhat-release, /etc/system-release F ?= $(shell test ! -e /etc/fedora-release && echo 0; test -e /etc/fedora-release && rpm --eval %{fedora}) ARCH ?= $(shell test -e /etc/fedora-release && rpm --eval %{_arch}) MOCK_CFG ?= $(shell test -e /etc/fedora-release && echo fedora-$(F)-$(ARCH)) DISTRO ?= $(shell test -e /etc/SuSE-release && echo suse; echo fedora) TAG ?= $(shell git log --pretty="format:%H" -n 1) WITH ?= --without doc #WITH ?= --without=doc --with=gcov LAST_RC ?= $(shell test -e /Volumes || git tag -l | grep Pacemaker | sort -Vr | grep rc | head -n 1) LAST_RELEASE ?= $(shell test -e /Volumes || git tag -l | grep Pacemaker | sort -Vr | grep -v rc | head -n 1) NEXT_RELEASE ?= $(shell echo $(LAST_RELEASE) | awk -F. '/[0-9]+\./{$$3+=1;OFS=".";print $$1,$$2,$$3}') beekhof: echo $(LAST_RELEASE) $(NEXT_RELEASE) BUILD_COUNTER ?= build.counter LAST_COUNT = $(shell test ! -e $(BUILD_COUNTER) && echo 0; test -e $(BUILD_COUNTER) && cat $(BUILD_COUNTER)) COUNT = $(shell expr 1 + $(LAST_COUNT)) +SPECVERSION ?= $(COUNT) + init: ./autogen.sh export: rm -f $(PACKAGE)-dirty.tar.* $(PACKAGE)-tip.tar.* $(PACKAGE)-HEAD.tar.* if [ ! -f $(TARFILE) ]; then \ rm -f $(PACKAGE).tar.*; \ if [ $(TAG) = dirty ]; then \ git commit -m "DO-NOT-PUSH" -a; \ git archive --prefix=$(distdir)/ HEAD | gzip > $(TARFILE); \ git reset --mixed HEAD^; \ else \ git archive --prefix=$(distdir)/ $(TAG) | gzip > $(TARFILE); \ fi; \ echo `date`: Rebuilt $(TARFILE); \ else \ echo `date`: Using existing tarball: $(TARFILE); \ fi $(PACKAGE)-opensuse.spec: $(PACKAGE)-suse.spec cp $^ $@ @echo Rebuilt $@ $(PACKAGE)-suse.spec: $(PACKAGE).spec.in GNUmakefile rm -f $@ if [ x != x"`git ls-files -m | grep pacemaker.spec.in`" ]; then \ cp $(PACKAGE).spec.in $@; \ echo "Rebuilt $@ (local modifications)"; \ elif [ x = x"`git show $(TAG):pacemaker.spec.in 2>/dev/null`" ]; then \ cp $(PACKAGE).spec.in $@; \ echo "Rebuilt $@"; \ else \ git show $(TAG):$(PACKAGE).spec.in >> $@; \ echo "Rebuilt $@ from $(TAG)"; \ fi sed -i s:%{_docdir}/%{name}:%{_docdir}/%{name}-%{version}:g $@ sed -i s:corosynclib:libcorosync:g $@ sed -i s:libexecdir}/lcrso:libdir}/lcrso:g $@ sed -i 's:%{name}-libs:lib%{name}3:g' $@ sed -i s:heartbeat-libs:heartbeat:g $@ sed -i s:cluster-glue-libs:libglue:g $@ sed -i s:libselinux-devel:automake:g $@ sed -i s:lm_sensors-devel:automake:g $@ sed -i s:bzip2-devel:libbz2-devel:g $@ sed -i s:bcond_without\ publican:bcond_with\ publican:g $@ sed -i s:docbook-style-xsl:docbook-xsl-stylesheets:g $@ sed -i s:libtool-ltdl-devel::g $@ sed -i s:dbus-devel:dbus-1-devel:g $@ sed -i s:publican::g $@ sed -i s:byacc::g $@ sed -i s:global\ cs_major.*:global\ cs_major\ 1:g $@ sed -i s:global\ cs_minor.*:global\ cs_minor\ 4:g $@ sed -i s:gnutls-devel:libgnutls-devel:g $@ sed -i s:189:90:g $@ sed -i 's@python-devel@python-devel python-curses python-xml@' $@ sed -i 's@Requires: python@Requires: python python-curses python-xml@' $@ sed -i 's@%systemd_post pacemaker.service@if [ ZZZ -eq 1 ]; then systemctl preset pacemaker.service || : ; fi@' $@ sed -i 's@%systemd_postun_with_restart pacemaker.service@systemctl daemon-reload || : ; if [ ZZZ -ge 1 ]; then systemctl try-restart pacemaker.service || : ; fi@' $@ sed -i 's@%systemd_preun pacemaker.service@if [ ZZZ -eq 0 ]; then systemctl --no-reload disable pacemaker.service || : ; systemctl stop pacemaker.service || : ; fi@' $@ sed -i 's@%systemd_post pacemaker_remote.service@if [ ZZZ -eq 1 ]; then systemctl preset pacemaker_remote.service || : ; fi@' $@ sed -i 's@%systemd_postun_with_restart pacemaker_remote.service@systemctl daemon-reload || : ; if [ ZZZ -ge 1 ]; then systemctl try-restart pacemaker_remote.service || : ; fi@' $@ sed -i 's@%systemd_preun pacemaker_remote.service@if [ ZZZ -eq 0 ]; then systemctl --no-reload disable pacemaker_remote.service || : ; systemctl stop pacemaker_remote.service || : ; fi@' $@ sed -i "s@ZZZ@\o0441@g" $@ @echo "Applied SUSE-specific modifications" # Works for all fedora based distros $(PACKAGE)-%.spec: $(PACKAGE).spec.in rm -f $@ if [ x != x"`git ls-files -m | grep pacemaker.spec.in`" ]; then \ cp $(PACKAGE).spec.in $(PACKAGE)-$*.spec; \ echo "Rebuilt $@ (local modifications)"; \ elif [ x = x"`git show $(TAG):pacemaker.spec.in 2>/dev/null`" ]; then \ cp $(PACKAGE).spec.in $(PACKAGE)-$*.spec; \ echo "Rebuilt $@"; \ else \ git show $(TAG):$(PACKAGE).spec.in >> $(PACKAGE)-$*.spec; \ echo "Rebuilt $@ from $(TAG)"; \ fi srpm-%: export $(PACKAGE)-%.spec rm -f *.src.rpm cp $(PACKAGE)-$*.spec $(PACKAGE).spec if [ -e $(BUILD_COUNTER) ]; then \ echo $(COUNT) > $(BUILD_COUNTER); \ fi - sed -i 's/global\ specversion.*/global\ specversion\ $(COUNT)/' $(PACKAGE).spec + sed -i 's/global\ specversion.*/global\ specversion\ $(SPECVERSION)/' $(PACKAGE).spec sed -i 's/global\ commit.*/global\ commit\ $(TAG)/' $(PACKAGE).spec case "$(WITH)" in \ *pre_release*) \ sed -i 's/Version:.*/Version:\ $(shell echo $(NEXT_RELEASE) | sed -e s:Pacemaker-:: -e s:-.*::)/' $(PACKAGE).spec;;\ *) \ sed -i 's/Version:.*/Version:\ $(shell git describe --tags $(TAG) | sed -e s:Pacemaker-:: -e s:-.*::)/' $(PACKAGE).spec;;\ esac rpmbuild -bs --define "dist .$*" $(RPM_OPTS) $(WITH) $(PACKAGE).spec chroot: mock-$(MOCK_CFG) mock-install-$(MOCK_CFG) mock-sh-$(MOCK_CFG) echo "Done" mock-next: make F=$(shell expr 1 + $(F)) mock mock-rawhide: make F=rawhide mock mock-install-%: echo "Installing packages" mock --root=$* $(MOCK_OPTIONS) --install $(RPM_ROOT)/mock/*.rpm vi sudo valgrind lcov gdb fence-agents psmisc mock-install: mock-install-$(MOCK_CFG) echo "Done" mock-sh: mock-sh-$(MOCK_CFG) echo "Done" mock-sh-%: echo "Connecting" mock --root=$* $(MOCK_OPTIONS) --shell echo "Done" # eg. WITH="--with cman" make rpm mock-%: make srpm-$(firstword $(shell echo $(@:mock-%=%) | tr '-' ' ')) -rm -rf $(RPM_ROOT)/mock @echo "mock --root=$* --rebuild $(WITH) $(MOCK_OPTIONS) $(RPM_ROOT)/*.src.rpm" mock --root=$* --no-cleanup-after --rebuild $(WITH) $(MOCK_OPTIONS) $(RPM_ROOT)/*.src.rpm srpm: srpm-$(DISTRO) echo "Done" mock: mock-$(MOCK_CFG) echo "Done" rpm-dep: $(PACKAGE)-$(DISTRO).spec if [ x != x`which yum-builddep 2>/dev/null` ]; then \ echo "Installing with yum-builddep"; \ sudo yum-builddep $(PACKAGE)-$(DISTRO).spec; \ elif [ x != x`which yum 2>/dev/null` ]; then \ echo -e "Installing: $(shell grep BuildRequires pacemaker.spec.in | sed -e s/BuildRequires:// -e s:\>.*0:: | tr '\n' ' ')\n\n"; \ sudo yum install $(shell grep BuildRequires pacemaker.spec.in | sed -e s/BuildRequires:// -e s:\>.*0:: | tr '\n' ' '); \ elif [ x != x`which zypper` ]; then \ echo -e "Installing: $(shell grep BuildRequires pacemaker.spec.in | sed -e s/BuildRequires:// -e s:\>.*0:: | tr '\n' ' ')\n\n"; \ sudo zypper install $(shell grep BuildRequires pacemaker.spec.in | sed -e s/BuildRequires:// -e s:\>.*0:: | tr '\n' ' ');\ else \ echo "I don't know how to install $(shell grep BuildRequires pacemaker.spec.in | sed -e s/BuildRequires:// -e s:\>.*0:: | tr '\n' ' ')";\ fi rpm: srpm @echo To create custom builds, edit the flags and options in $(PACKAGE).spec first rpmbuild $(RPM_OPTS) $(WITH) --rebuild $(RPM_ROOT)/*.src.rpm release: make TAG=$(LAST_RELEASE) rpm rc: make TAG=$(LAST_RC) rpm dirty: make TAG=dirty mock COVERITY_DIR = $(shell pwd)/coverity-$(TAG) COVFILE = pacemaker-coverity-$(TAG).tgz COVHOST ?= scan5.coverity.com COVPASS ?= password # Public coverity coverity: test -e configure || ./autogen.sh test -e Makefile || ./configure make core-clean rm -rf $(COVERITY_DIR) cov-build --dir $(COVERITY_DIR) make core tar czf $(COVFILE) --transform=s@.*$(TAG)@cov-int@ $(COVERITY_DIR) @echo "Uploading to public Coverity instance..." curl --form file=@$(COVFILE) --form project=$(PACKAGE) --form password=$(COVPASS) --form email=andrew@beekhof.net http://$(COVHOST)/cgi-bin/upload.py rm -rf $(COVFILE) $(COVERITY_DIR) coverity-corp: test -e configure || ./autogen.sh test -e Makefile || ./configure make core-clean rm -rf $(COVERITY_DIR) cov-build --dir $(COVERITY_DIR) make core @echo "Waiting for a corporate Coverity license..." cov-analyze --dir $(COVERITY_DIR) --wait-for-license cov-format-errors --dir $(COVERITY_DIR) --emacs-style > $(TAG).coverity cov-format-errors --dir $(COVERITY_DIR) rsync -avzxlSD --progress $(COVERITY_DIR)/c/output/errors/ root@www.clusterlabs.org:/var/www/html/coverity/$(PACKAGE)/$(TAG) make core-clean # cov-commit-defects --host $(COVHOST) --dir $(COVERITY_DIR) --stream $(PACKAGE) --user auto --password $(COVPASS) rm -rf $(COVERITY_DIR) global: clean-generic gtags -q %.8.html: %.8 echo groff -mandoc `man -w ./$<` -T html > $@ groff -mandoc `man -w ./$<` -T html > $@ rsync -azxlSD --progress $@ root@www.clusterlabs.org:/var/www/html/man/ %.7.html: %.7 echo groff -mandoc `man -w ./$<` -T html > $@ groff -mandoc `man -w ./$<` -T html > $@ rsync -azxlSD --progress $@ root@www.clusterlabs.org:/var/www/html/man/ doxygen: doxygen Doxyfile abi: abi-check pacemaker $(LAST_RELEASE) $(TAG) abi-www: abi-check -u pacemaker $(LAST_RELEASE) $(TAG) www: all global doxygen find . -name "[a-z]*.8" -exec make \{\}.html \; find . -name "[a-z]*.7" -exec make \{\}.html \; htags -sanhIT rsync -avzxlSD --progress HTML/ root@www.clusterlabs.org:/var/www/html/global/$(PACKAGE)/$(TAG) rsync -avzxlSD --progress doc/api/html/ root@www.clusterlabs.org:/var/www/html/doxygen/$(PACKAGE)/$(TAG) make -C doc www make coverity summary: @printf "\n* `date +"%a %b %d %Y"` `git config user.name` <`git config user.email`> $(NEXT_RELEASE)-1" @printf "\n- Update source tarball to revision: `git id`" @printf "\n- Changesets: `git log --pretty=format:'%h' $(LAST_RELEASE)..HEAD | wc -l`" @printf "\n- Diff: " @git diff -r $(LAST_RELEASE)..HEAD --stat include lib mcp pengine/*.c pengine/*.h cib crmd fencing lrmd tools xml | tail -n 1 rc-changes: @make NEXT_RELEASE=$(shell echo $(LAST_RC) | sed s:-rc.*::) LAST_RELEASE=$(LAST_RC) changes changes: summary @printf "\n- Features added since $(LAST_RELEASE)\n" @git log --pretty=format:' +%s' --abbrev-commit $(LAST_RELEASE)..HEAD | grep -e Feature: | sed -e 's@Feature:@@' | sort -uf @printf "\n- Changes since $(LAST_RELEASE)\n" @git log --pretty=format:' +%s' --abbrev-commit $(LAST_RELEASE)..HEAD | grep -e High: -e Fix: -e Bug | sed -e 's@Fix:@@' -e s@High:@@ -e s@Fencing:@fencing:@ -e 's@Bug@ Bug@' -e s@PE:@pengine:@ | sort -uf changelog: @make changes > ChangeLog @printf "\n">> ChangeLog git show $(LAST_RELEASE):ChangeLog >> ChangeLog @echo -e "\033[1;35m -- Don't forget to run the bumplibs.sh script! --\033[0m" indent: find . -name "*.h" -exec ./p-indent \{\} \; find . -name "*.c" -exec ./p-indent \{\} \; git co HEAD crmd/fsa_proto.h lib/gnu rel-tags: tags find . -name TAGS -exec sed -i 's:\(.*\)/\(.*\)/TAGS:\2/TAGS:g' \{\} \; CLANG_analyzer = $(shell which scan-build) CLANG_checkers = check: clang cppcheck # Extra cppcheck options: --enable=all --inconclusive --std=posix cppcheck: for d in $(LIBLTDL_DIR) replace lib mcp attrd pengine cib crmd fencing lrmd tools; do cppcheck -q $$d; done clang: test -e $(CLANG_analyzer) scan-build $(CLANG_checkers:%=-enable-checker %) make # V3 = scandir unsetenv alphasort # V2 = setenv strerror strchrnul strndup # http://www.gnu.org/software/gnulib/manual/html_node/Initial-import.html#Initial-import GNU_MODS = crypto/md5 gnulib-update: -test ! -e gnulib && git clone git://git.savannah.gnu.org/gnulib.git cd gnulib && git pull gnulib/gnulib-tool --source-base=lib/gnu --lgpl=2 --no-vc-files --import $(GNU_MODS) diff --git a/crmd/remote_lrmd_ra.c b/crmd/remote_lrmd_ra.c index f3dedeb359..2f658eece5 100644 --- a/crmd/remote_lrmd_ra.c +++ b/crmd/remote_lrmd_ra.c @@ -1,800 +1,816 @@ /* * Copyright (C) 2013 David Vossel * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #define REMOTE_LRMD_RA "remote" /* The max start timeout before cmd retry */ #define MAX_START_TIMEOUT_MS 10000 typedef struct remote_ra_cmd_s { /*! the local node the cmd is issued from */ char *owner; /*! the remote node the cmd is executed on */ char *rsc_id; /*! the action to execute */ char *action; /*! some string the client wants us to give it back */ char *userdata; /*! start delay in ms */ int start_delay; /*! timer id used for start delay. */ int delay_id; /*! timeout in ms for cmd */ int timeout; int remaining_timeout; /*! recurring interval in ms */ int interval; /*! interval timer id */ int interval_id; int reported_success; int monitor_timeout_id; int takeover_timeout_id; /*! action parameters */ lrmd_key_value_t *params; /*! executed rc */ int rc; int op_status; int call_id; time_t start_time; gboolean cancel; } remote_ra_cmd_t; enum remote_migration_status { expect_takeover = 1, takeover_complete, }; typedef struct remote_ra_data_s { crm_trigger_t *work; remote_ra_cmd_t *cur_cmd; GList *cmds; GList *recurring_cmds; enum remote_migration_status migrate_status; gboolean active; } remote_ra_data_t; static int handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms); static void handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd); static GList *fail_all_monitor_cmds(GList * list); static void free_cmd(gpointer user_data) { remote_ra_cmd_t *cmd = user_data; if (!cmd) { return; } if (cmd->delay_id) { g_source_remove(cmd->delay_id); } if (cmd->interval_id) { g_source_remove(cmd->interval_id); } if (cmd->monitor_timeout_id) { g_source_remove(cmd->monitor_timeout_id); } if (cmd->takeover_timeout_id) { g_source_remove(cmd->takeover_timeout_id); } free(cmd->owner); free(cmd->rsc_id); free(cmd->action); free(cmd->userdata); lrmd_key_value_freeall(cmd->params); free(cmd); } static int generate_callid(void) { static int remote_ra_callid = 0; remote_ra_callid++; if (remote_ra_callid <= 0) { remote_ra_callid = 1; } return remote_ra_callid; } static gboolean recurring_helper(gpointer data) { remote_ra_cmd_t *cmd = data; lrm_state_t *connection_rsc = NULL; cmd->interval_id = 0; connection_rsc = lrm_state_find(cmd->rsc_id); if (connection_rsc && connection_rsc->remote_ra_data) { remote_ra_data_t *ra_data = connection_rsc->remote_ra_data; ra_data->recurring_cmds = g_list_remove(ra_data->recurring_cmds, cmd); - cmd->call_id = generate_callid(); - ra_data->cmds = g_list_append(ra_data->cmds, cmd); mainloop_set_trigger(ra_data->work); } return FALSE; } static gboolean start_delay_helper(gpointer data) { remote_ra_cmd_t *cmd = data; lrm_state_t *connection_rsc = NULL; cmd->delay_id = 0; connection_rsc = lrm_state_find(cmd->rsc_id); if (connection_rsc && connection_rsc->remote_ra_data) { remote_ra_data_t *ra_data = connection_rsc->remote_ra_data; mainloop_set_trigger(ra_data->work); } return FALSE; } static void report_remote_ra_result(remote_ra_cmd_t * cmd) { lrmd_event_data_t op = { 0, }; op.type = lrmd_event_exec_complete; op.rsc_id = cmd->rsc_id; op.op_type = cmd->action; op.user_data = cmd->userdata; op.timeout = cmd->timeout; op.interval = cmd->interval; op.rc = cmd->rc; op.op_status = cmd->op_status; + op.t_run = cmd->start_time; + op.t_rcchange = cmd->start_time; + if (cmd->reported_success && cmd->rc != PCMK_OCF_OK) { + op.t_rcchange = time(NULL); + /* This edge case will likely never ever occur, but if it does the + * result is that a failure will not be processed correctly. This is only + * remotely possible because we are able to detect a connection resource's tcp + * connection has failed at any moment after start has completed. The actual + * recurring operation is just a connectivity ping. + * + * basically, we are not guaranteed that the first successful monitor op and + * a subsequent failed monitor op will not occur in the same timestamp. We have to + * make it look like the operations occurred at separate times though. */ + if (op.t_rcchange == op.t_run) { + op.t_rcchange++; + } + } + if (cmd->params) { lrmd_key_value_t *tmp; op.params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); for (tmp = cmd->params; tmp; tmp = tmp->next) { g_hash_table_insert(op.params, strdup(tmp->key), strdup(tmp->value)); } } op.call_id = cmd->call_id; op.remote_nodename = cmd->owner; lrm_op_callback(&op); if (op.params) { g_hash_table_destroy(op.params); } } static void update_remaining_timeout(remote_ra_cmd_t * cmd) { cmd->remaining_timeout = ((cmd->timeout / 1000) - (time(NULL) - cmd->start_time)) * 1000; } static gboolean retry_start_cmd_cb(gpointer data) { lrm_state_t *lrm_state = data; remote_ra_data_t *ra_data = lrm_state->remote_ra_data; remote_ra_cmd_t *cmd = NULL; int rc = -1; if (!ra_data || !ra_data->cur_cmd) { return FALSE; } cmd = ra_data->cur_cmd; if (safe_str_neq(cmd->action, "start")) { return FALSE; } update_remaining_timeout(cmd); if (cmd->remaining_timeout > 0) { rc = handle_remote_ra_start(lrm_state, cmd, cmd->remaining_timeout); } if (rc != 0) { cmd->rc = PCMK_OCF_UNKNOWN_ERROR; cmd->op_status = PCMK_LRM_OP_ERROR; report_remote_ra_result(cmd); if (ra_data->cmds) { mainloop_set_trigger(ra_data->work); } ra_data->cur_cmd = NULL; free_cmd(cmd); } else { /* wait for connection event */ } return FALSE; } static gboolean connection_takeover_timeout_cb(gpointer data) { lrm_state_t *lrm_state = NULL; remote_ra_cmd_t *cmd = data; crm_debug("takeover event timed out for node %s", cmd->rsc_id); cmd->takeover_timeout_id = 0; lrm_state = lrm_state_find(cmd->rsc_id); handle_remote_ra_stop(lrm_state, cmd); free_cmd(cmd); return FALSE; } static gboolean monitor_timeout_cb(gpointer data) { lrm_state_t *lrm_state = NULL; remote_ra_cmd_t *cmd = data; crm_debug("Poke async response timed out for node %s", cmd->rsc_id); cmd->monitor_timeout_id = 0; cmd->op_status = PCMK_LRM_OP_TIMEOUT; cmd->rc = PCMK_OCF_UNKNOWN_ERROR; lrm_state = lrm_state_find(cmd->rsc_id); if (lrm_state && lrm_state->remote_ra_data) { remote_ra_data_t *ra_data = lrm_state->remote_ra_data; if (ra_data->cur_cmd == cmd) { ra_data->cur_cmd = NULL; } if (ra_data->cmds) { mainloop_set_trigger(ra_data->work); } } report_remote_ra_result(cmd); free_cmd(cmd); return FALSE; } xmlNode * simple_remote_node_status(const char *node_name, xmlNode * parent, const char *source) { xmlNode *state = create_xml_node(parent, XML_CIB_TAG_STATE); crm_xml_add(state, XML_NODE_IS_REMOTE, "true"); crm_xml_add(state, XML_ATTR_UUID, node_name); crm_xml_add(state, XML_ATTR_UNAME, node_name); crm_xml_add(state, XML_ATTR_ORIGIN, source); return state; } void remote_lrm_op_callback(lrmd_event_data_t * op) { gboolean cmd_handled = FALSE; lrm_state_t *lrm_state = NULL; remote_ra_data_t *ra_data = NULL; remote_ra_cmd_t *cmd = NULL; crm_debug("remote connection event - event_type:%s node:%s action:%s rc:%s op_status:%s", lrmd_event_type2str(op->type), op->remote_nodename, op->op_type ? op->op_type : "none", services_ocf_exitcode_str(op->rc), services_lrm_status_str(op->op_status)); lrm_state = lrm_state_find(op->remote_nodename); if (!lrm_state || !lrm_state->remote_ra_data) { crm_debug("lrm_state info not found for remote lrmd connection event"); return; } ra_data = lrm_state->remote_ra_data; /* Another client has connected to the remote daemon, * determine if this is expected. */ if (op->type == lrmd_event_new_client) { /* great, we new this was coming */ if (ra_data->migrate_status == expect_takeover) { ra_data->migrate_status = takeover_complete; } else { crm_err("Unexpected pacemaker_remote client takeover. Disconnecting"); lrm_state_disconnect(lrm_state); } return; } /* filter all EXEC events up */ if (op->type == lrmd_event_exec_complete) { if (ra_data->migrate_status == takeover_complete) { crm_debug("ignoring event, this connection is taken over by another node"); } else { lrm_op_callback(op); } return; } if ((op->type == lrmd_event_disconnect) && (ra_data->cur_cmd == NULL) && (ra_data->active == TRUE)) { crm_err("Unexpected disconnect on remote-node %s", lrm_state->node_name); ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds); ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds); return; } if (!ra_data->cur_cmd) { crm_debug("no event to match"); return; } cmd = ra_data->cur_cmd; /* Start actions and migrate from actions complete after connection * comes back to us. */ if (op->type == lrmd_event_connect && (safe_str_eq(cmd->action, "start") || safe_str_eq(cmd->action, "migrate_from"))) { if (op->connection_rc < 0) { update_remaining_timeout(cmd); /* There isn't much of a reason to reschedule if the timeout is too small */ if (cmd->remaining_timeout > 3000) { crm_trace("rescheduling start, remaining timeout %d", cmd->remaining_timeout); g_timeout_add(1000, retry_start_cmd_cb, lrm_state); return; } else { crm_trace("can't reschedule start, remaining timeout too small %d", cmd->remaining_timeout); } cmd->op_status = PCMK_LRM_OP_TIMEOUT; cmd->rc = PCMK_OCF_UNKNOWN_ERROR; } else { if (safe_str_eq(cmd->action, "start")) { /* clear PROBED value if it happens to be set after start completes. */ update_attrd(lrm_state->node_name, CRM_OP_PROBED, NULL, NULL, TRUE); } lrm_state_reset_tables(lrm_state); cmd->rc = PCMK_OCF_OK; cmd->op_status = PCMK_LRM_OP_DONE; ra_data->active = TRUE; } crm_debug("remote lrmd connect event matched %s action. ", cmd->action); report_remote_ra_result(cmd); cmd_handled = TRUE; } else if (op->type == lrmd_event_poke && safe_str_eq(cmd->action, "monitor")) { if (cmd->monitor_timeout_id) { g_source_remove(cmd->monitor_timeout_id); cmd->monitor_timeout_id = 0; } /* Only report success the first time, after that only worry about failures. * For this function, if we get the poke pack, it is always a success. Pokes * only fail if the send fails, or the response times out. */ if (!cmd->reported_success) { cmd->rc = PCMK_OCF_OK; cmd->op_status = PCMK_LRM_OP_DONE; report_remote_ra_result(cmd); cmd->reported_success = 1; } crm_debug("remote lrmd poke event matched %s action. ", cmd->action); /* success, keep rescheduling if interval is present. */ if (cmd->interval && (cmd->cancel == FALSE)) { ra_data->recurring_cmds = g_list_append(ra_data->recurring_cmds, cmd); cmd->interval_id = g_timeout_add(cmd->interval, recurring_helper, cmd); cmd = NULL; /* prevent free */ } cmd_handled = TRUE; } else if (op->type == lrmd_event_disconnect && safe_str_eq(cmd->action, "monitor")) { if (ra_data->active == TRUE && (cmd->cancel == FALSE)) { cmd->rc = PCMK_OCF_UNKNOWN_ERROR; cmd->op_status = PCMK_LRM_OP_ERROR; report_remote_ra_result(cmd); crm_err("remote-node %s unexpectedly disconneced during monitor operation", lrm_state->node_name); } cmd_handled = TRUE; } else if (op->type == lrmd_event_new_client && safe_str_eq(cmd->action, "stop")) { handle_remote_ra_stop(lrm_state, cmd); cmd_handled = TRUE; } else { crm_debug("Event did not match %s action", ra_data->cur_cmd->action); } if (cmd_handled) { ra_data->cur_cmd = NULL; if (ra_data->cmds) { mainloop_set_trigger(ra_data->work); } free_cmd(cmd); } } static void handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd) { remote_ra_data_t *ra_data = NULL; CRM_ASSERT(lrm_state); ra_data = lrm_state->remote_ra_data; if (ra_data->migrate_status != takeover_complete) { /* only clear the status if this stop is not apart of a successful migration */ update_attrd_remote_node_removed(lrm_state->node_name, NULL); } ra_data->active = FALSE; lrm_state_disconnect(lrm_state); cmd->rc = PCMK_OCF_OK; cmd->op_status = PCMK_LRM_OP_DONE; if (ra_data->cmds) { g_list_free_full(ra_data->cmds, free_cmd); } if (ra_data->recurring_cmds) { g_list_free_full(ra_data->recurring_cmds, free_cmd); } ra_data->cmds = NULL; ra_data->recurring_cmds = NULL; ra_data->cur_cmd = NULL; report_remote_ra_result(cmd); } static int handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms) { const char *server = NULL; lrmd_key_value_t *tmp = NULL; int port = 0; int timeout_used = timeout_ms > MAX_START_TIMEOUT_MS ? MAX_START_TIMEOUT_MS : timeout_ms; for (tmp = cmd->params; tmp; tmp = tmp->next) { if (safe_str_eq(tmp->key, "addr") || safe_str_eq(tmp->key, "server")) { server = tmp->value; } if (safe_str_eq(tmp->key, "port")) { port = atoi(tmp->value); } } return lrm_state_remote_connect_async(lrm_state, server, port, timeout_used); } static gboolean handle_remote_ra_exec(gpointer user_data) { int rc = 0; lrm_state_t *lrm_state = user_data; remote_ra_data_t *ra_data = lrm_state->remote_ra_data; remote_ra_cmd_t *cmd; GList *first = NULL; if (ra_data->cur_cmd) { /* still waiting on previous cmd */ return TRUE; } while (ra_data->cmds) { first = ra_data->cmds; cmd = first->data; if (cmd->delay_id) { /* still waiting for start delay timer to trip */ return TRUE; } ra_data->cmds = g_list_remove_link(ra_data->cmds, first); g_list_free_1(first); if (!strcmp(cmd->action, "start") || !strcmp(cmd->action, "migrate_from")) { ra_data->migrate_status = 0; rc = handle_remote_ra_start(lrm_state, cmd, cmd->timeout); if (rc == 0) { /* take care of this later when we get async connection result */ crm_debug("began remote lrmd connect, waiting for connect event."); ra_data->cur_cmd = cmd; return TRUE; } else { crm_debug("connect failed, not expecting to match any connection event later"); cmd->rc = PCMK_OCF_UNKNOWN_ERROR; cmd->op_status = PCMK_LRM_OP_ERROR; } report_remote_ra_result(cmd); } else if (!strcmp(cmd->action, "monitor")) { if (lrm_state_is_connected(lrm_state) == TRUE) { rc = lrm_state_poke_connection(lrm_state); if (rc < 0) { cmd->rc = PCMK_OCF_UNKNOWN_ERROR; cmd->op_status = PCMK_LRM_OP_ERROR; } } else { rc = -1; cmd->op_status = PCMK_LRM_OP_DONE; cmd->rc = PCMK_OCF_NOT_RUNNING; } if (rc == 0) { crm_debug("poked remote lrmd at node %s, waiting for async response.", cmd->rsc_id); ra_data->cur_cmd = cmd; cmd->monitor_timeout_id = g_timeout_add(cmd->timeout, monitor_timeout_cb, cmd); return TRUE; } report_remote_ra_result(cmd); } else if (!strcmp(cmd->action, "stop")) { if (ra_data->migrate_status == expect_takeover) { /* briefly wait on stop for the takeover event to occur. If the * takeover event does not occur during the wait period, that's fine. * It just means that the remote-node's lrm_status section is going to get * cleared which will require all the resources running in the remote-node * to be explicitly re-detected via probe actions. If the takeover does occur * successfully, then we can leave the status section intact. */ cmd->monitor_timeout_id = g_timeout_add((cmd->timeout/2), connection_takeover_timeout_cb, cmd); ra_data->cur_cmd = cmd; return TRUE; } handle_remote_ra_stop(lrm_state, cmd); } else if (!strcmp(cmd->action, "migrate_to")) { ra_data->migrate_status = expect_takeover; cmd->rc = PCMK_OCF_OK; cmd->op_status = PCMK_LRM_OP_DONE; report_remote_ra_result(cmd); } free_cmd(cmd); } return TRUE; } static void remote_ra_data_init(lrm_state_t * lrm_state) { remote_ra_data_t *ra_data = NULL; if (lrm_state->remote_ra_data) { return; } ra_data = calloc(1, sizeof(remote_ra_data_t)); ra_data->work = mainloop_add_trigger(G_PRIORITY_HIGH, handle_remote_ra_exec, lrm_state); lrm_state->remote_ra_data = ra_data; } void remote_ra_cleanup(lrm_state_t * lrm_state) { remote_ra_data_t *ra_data = lrm_state->remote_ra_data; if (!ra_data) { return; } if (ra_data->cmds) { g_list_free_full(ra_data->cmds, free_cmd); } if (ra_data->recurring_cmds) { g_list_free_full(ra_data->recurring_cmds, free_cmd); } mainloop_destroy_trigger(ra_data->work); free(ra_data); lrm_state->remote_ra_data = NULL; } gboolean is_remote_lrmd_ra(const char *agent, const char *provider, const char *id) { if (agent && provider && !strcmp(agent, REMOTE_LRMD_RA) && !strcmp(provider, "pacemaker")) { return TRUE; } if (id && lrm_state_find(id)) { return TRUE; } return FALSE; } lrmd_rsc_info_t * remote_ra_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id) { lrmd_rsc_info_t *info = NULL; if ((lrm_state_find(rsc_id))) { info = calloc(1, sizeof(lrmd_rsc_info_t)); info->id = strdup(rsc_id); info->type = strdup(REMOTE_LRMD_RA); info->class = strdup("ocf"); info->provider = strdup("pacemaker"); } return info; } static gboolean is_remote_ra_supported_action(const char *action) { if (!action) { return FALSE; } else if (strcmp(action, "start") && strcmp(action, "stop") && strcmp(action, "migrate_to") && strcmp(action, "migrate_from") && strcmp(action, "monitor")) { return FALSE; } return TRUE; } static GList * fail_all_monitor_cmds(GList * list) { GList *rm_list = NULL; remote_ra_cmd_t *cmd = NULL; GListPtr gIter = NULL; for (gIter = list; gIter != NULL; gIter = gIter->next) { cmd = gIter->data; if (cmd->interval > 0 && safe_str_eq(cmd->action, "monitor")) { rm_list = g_list_append(rm_list, cmd); } } for (gIter = rm_list; gIter != NULL; gIter = gIter->next) { cmd = gIter->data; cmd->rc = PCMK_OCF_UNKNOWN_ERROR; cmd->op_status = PCMK_LRM_OP_ERROR; report_remote_ra_result(cmd); list = g_list_remove(list, cmd); free_cmd(cmd); } /* frees only the list data, not the cmds */ g_list_free(rm_list); return list; } static GList * remove_cmd(GList * list, const char *action, int interval) { remote_ra_cmd_t *cmd = NULL; GListPtr gIter = NULL; for (gIter = list; gIter != NULL; gIter = gIter->next) { cmd = gIter->data; if (cmd->interval == interval && safe_str_eq(cmd->action, action)) { break; } cmd = NULL; } if (cmd) { list = g_list_remove(list, cmd); free_cmd(cmd); } return list; } int remote_ra_cancel(lrm_state_t * lrm_state, const char *rsc_id, const char *action, int interval) { lrm_state_t *connection_rsc = NULL; remote_ra_data_t *ra_data = NULL; connection_rsc = lrm_state_find(rsc_id); if (!connection_rsc || !connection_rsc->remote_ra_data) { return -EINVAL; } ra_data = connection_rsc->remote_ra_data; ra_data->cmds = remove_cmd(ra_data->cmds, action, interval); ra_data->recurring_cmds = remove_cmd(ra_data->recurring_cmds, action, interval); if (ra_data->cur_cmd && (ra_data->cur_cmd->interval == interval) && (safe_str_eq(ra_data->cur_cmd->action, action))) { ra_data->cur_cmd->cancel = TRUE; } return 0; } int remote_ra_exec(lrm_state_t * lrm_state, const char *rsc_id, const char *action, const char *userdata, int interval, /* ms */ int timeout, /* ms */ int start_delay, /* ms */ lrmd_key_value_t * params) { int rc = 0; lrm_state_t *connection_rsc = NULL; remote_ra_cmd_t *cmd = NULL; remote_ra_data_t *ra_data = NULL; if (is_remote_ra_supported_action(action) == FALSE) { rc = -EINVAL; goto exec_done; } connection_rsc = lrm_state_find(rsc_id); if (!connection_rsc) { rc = -EINVAL; goto exec_done; } remote_ra_data_init(connection_rsc); cmd = calloc(1, sizeof(remote_ra_cmd_t)); cmd->owner = strdup(lrm_state->node_name); cmd->rsc_id = strdup(rsc_id); cmd->action = strdup(action); cmd->userdata = strdup(userdata); cmd->interval = interval; cmd->timeout = timeout; cmd->start_delay = start_delay; cmd->params = params; cmd->start_time = time(NULL); cmd->call_id = generate_callid(); if (cmd->start_delay) { cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd); } ra_data = connection_rsc->remote_ra_data; ra_data->cmds = g_list_append(ra_data->cmds, cmd); mainloop_set_trigger(ra_data->work); return cmd->call_id; exec_done: lrmd_key_value_freeall(params); return rc; } diff --git a/crmd/te_utils.c b/crmd/te_utils.c index 66ed1daa68..69c22e3487 100644 --- a/crmd/te_utils.c +++ b/crmd/te_utils.c @@ -1,542 +1,542 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include crm_trigger_t *stonith_reconnect = NULL; GListPtr stonith_cleanup_list = NULL; static gboolean fail_incompletable_stonith(crm_graph_t * graph) { GListPtr lpc = NULL; const char *task = NULL; xmlNode *last_action = NULL; if (graph == NULL) { return FALSE; } for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) { GListPtr lpc2 = NULL; synapse_t *synapse = (synapse_t *) lpc->data; if (synapse->confirmed) { continue; } for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) { crm_action_t *action = (crm_action_t *) lpc2->data; if (action->type != action_type_crm || action->confirmed) { continue; } task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); if (task && safe_str_eq(task, CRM_OP_FENCE)) { action->failed = TRUE; last_action = action->xml; update_graph(graph, action); crm_notice("Failing action %d (%s): STONITHd terminated", action->id, ID(action->xml)); } } } if (last_action != NULL) { crm_warn("STONITHd failure resulted in un-runnable actions"); abort_transition(INFINITY, tg_restart, "Stonith failure", last_action); return TRUE; } return FALSE; } static void tengine_stonith_connection_destroy(stonith_t * st, stonith_event_t * e) { if (is_set(fsa_input_register, R_ST_REQUIRED)) { crm_crit("Fencing daemon connection failed"); mainloop_set_trigger(stonith_reconnect); } else { crm_info("Fencing daemon disconnected"); } /* cbchan will be garbage at this point, arrange for it to be reset */ if(stonith_api) { stonith_api->state = stonith_disconnected; } if (AM_I_DC) { fail_incompletable_stonith(transition_graph); trigger_graph(); } } #if SUPPORT_CMAN # include #endif char *te_client_id = NULL; #ifdef HAVE_SYS_REBOOT_H # include # include #endif static void tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event) { if(te_client_id == NULL) { te_client_id = g_strdup_printf("%s.%d", crm_system_name, getpid()); } if (st_event == NULL) { crm_err("Notify data not found"); return; } if (st_event->result == pcmk_ok && safe_str_eq("on", st_event->action)) { crm_notice("%s was successfully unfenced by %s (at the request of %s)", - st_event->target, st_event->executioner, st_event->origin); + st_event->target, st_event->executioner ? st_event->executioner : "", st_event->origin); /* TODO: Hook up st_event->device */ return; } else if (safe_str_eq("on", st_event->action)) { crm_err("Unfencing of %s by %s failed: %s (%d)", - st_event->target, st_event->executioner, + st_event->target, st_event->executioner ? st_event->executioner : "", pcmk_strerror(st_event->result), st_event->result); return; } else if (st_event->result == pcmk_ok && crm_str_eq(st_event->target, fsa_our_uname, TRUE)) { crm_crit("We were allegedly just fenced by %s for %s!", - st_event->executioner, st_event->origin); /* Dumps blackbox if enabled */ + st_event->executioner ? st_event->executioner : "", st_event->origin); /* Dumps blackbox if enabled */ qb_log_fini(); /* Try to get the above log message to disk - somehow */ /* Get out ASAP and do not come back up. * * Triggering a reboot is also not the worst idea either since * the rest of the cluster thinks we're safely down */ #ifdef RB_HALT_SYSTEM reboot(RB_HALT_SYSTEM); #endif /* * If reboot() fails or is not supported, coming back up will * probably lead to a situation where the other nodes set our * status to 'lost' because of the fencing callback and will * discard subsequent election votes with: * * Election 87 (current: 5171, owner: 103): Processed vote from east-03 (Peer is not part of our cluster) * * So just stay dead, something is seriously messed up anyway. * */ exit(100); /* None of our wrappers since we already called qb_log_fini() */ return; } if (st_event->result == pcmk_ok && safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) { st_fail_count_reset(st_event->target); } crm_notice("Peer %s was%s terminated (%s) by %s for %s: %s (ref=%s) by client %s", st_event->target, st_event->result == pcmk_ok ? "" : " not", st_event->action, st_event->executioner ? st_event->executioner : "", st_event->origin, pcmk_strerror(st_event->result), st_event->id, st_event->client_origin ? st_event->client_origin : ""); #if SUPPORT_CMAN if (st_event->result == pcmk_ok && is_cman_cluster()) { int local_rc = 0; int confirm = 0; char *target_copy = strdup(st_event->target); /* In case fenced hasn't noticed yet * * Any fencing that has been inititated will be completed by way of the fence_pcmk redirect */ local_rc = fenced_external(target_copy); if (local_rc != 0) { crm_err("Could not notify CMAN that '%s' is now fenced: %d", st_event->target, local_rc); } else { crm_notice("Notified CMAN that '%s' is now fenced", st_event->target); } /* In case fenced is already trying to shoot it */ confirm = open("/var/run/cluster/fenced_override", O_NONBLOCK|O_WRONLY); if (confirm > 0) { int ignore = 0; int len = strlen(target_copy); errno = 0; local_rc = write(confirm, target_copy, len); ignore = write(confirm, "\n", 1); if(ignore < 0 && errno == EBADF) { crm_trace("CMAN not expecting %s to be fenced (yet)", st_event->target); } else if (local_rc < len) { crm_perror(LOG_ERR, "Confirmation of CMAN fencing event for '%s' failed: %d", st_event->target, local_rc); } else { fsync(confirm); crm_notice("Confirmed CMAN fencing event for '%s'", st_event->target); } close(confirm); } free(target_copy); } #endif if (st_event->result == pcmk_ok) { crm_node_t *peer = crm_find_peer_full(0, st_event->target, CRM_GET_PEER_REMOTE | CRM_GET_PEER_CLUSTER); const char *uuid = NULL; gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname); if (peer == NULL) { return; } uuid = crm_peer_uuid(peer); crm_trace("target=%s dc=%s", st_event->target, fsa_our_dc); if(AM_I_DC) { /* The DC always sends updates */ send_stonith_update(NULL, st_event->target, uuid); if (st_event->client_origin && safe_str_neq(st_event->client_origin, te_client_id)) { /* Abort the current transition graph if it wasn't us * that invoked stonith to fence someone */ crm_info("External fencing operation from %s fenced %s", st_event->client_origin, st_event->target); abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL); } /* Assume it was our leader if we dont currently have one */ } else if (fsa_our_dc == NULL || safe_str_eq(fsa_our_dc, st_event->target)) { crm_notice("Target %s our leader %s (recorded: %s)", fsa_our_dc ? "was" : "may have been", st_event->target, fsa_our_dc ? fsa_our_dc : ""); /* Given the CIB resyncing that occurs around elections, * have one node update the CIB now and, if the new DC is different, * have them do so too after the election */ if (we_are_executioner) { send_stonith_update(NULL, st_event->target, uuid); } stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(st_event->target)); } crmd_peer_down(peer, TRUE); } } gboolean te_connect_stonith(gpointer user_data) { int lpc = 0; int rc = pcmk_ok; if (stonith_api == NULL) { stonith_api = stonith_api_new(); } if (stonith_api->state != stonith_disconnected) { crm_trace("Still connected"); return TRUE; } for (lpc = 0; lpc < 30; lpc++) { crm_debug("Attempting connection to fencing daemon..."); sleep(1); rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL); if (rc == pcmk_ok) { break; } if (user_data != NULL) { crm_err("Sign-in failed: triggered a retry"); mainloop_set_trigger(stonith_reconnect); return TRUE; } crm_err("Sign-in failed: pausing and trying again in 2s..."); sleep(1); } CRM_CHECK(rc == pcmk_ok, return TRUE); /* If not, we failed 30 times... just get out */ stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT, tengine_stonith_connection_destroy); stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_FENCE, tengine_stonith_notify); crm_trace("Connected"); return TRUE; } gboolean stop_te_timer(crm_action_timer_t * timer) { const char *timer_desc = "action timer"; if (timer == NULL) { return FALSE; } if (timer->reason == timeout_abort) { timer_desc = "global timer"; crm_trace("Stopping %s", timer_desc); } if (timer->source_id != 0) { crm_trace("Stopping %s", timer_desc); g_source_remove(timer->source_id); timer->source_id = 0; } else { crm_trace("%s was already stopped", timer_desc); return FALSE; } return TRUE; } gboolean te_graph_trigger(gpointer user_data) { enum transition_status graph_rc = -1; if (transition_graph == NULL) { crm_debug("Nothing to do"); return TRUE; } crm_trace("Invoking graph %d in state %s", transition_graph->id, fsa_state2string(fsa_state)); switch (fsa_state) { case S_STARTING: case S_PENDING: case S_NOT_DC: case S_HALT: case S_ILLEGAL: case S_STOPPING: case S_TERMINATE: return TRUE; break; default: break; } if (transition_graph->complete == FALSE) { int limit = transition_graph->batch_limit; transition_graph->batch_limit = throttle_get_total_job_limit(limit); graph_rc = run_graph(transition_graph); transition_graph->batch_limit = limit; /* Restore the configured value */ /* significant overhead... */ /* print_graph(LOG_DEBUG_3, transition_graph); */ if (graph_rc == transition_active) { crm_trace("Transition not yet complete"); return TRUE; } else if (graph_rc == transition_pending) { crm_trace("Transition not yet complete - no actions fired"); return TRUE; } if (graph_rc != transition_complete) { crm_warn("Transition failed: %s", transition_status(graph_rc)); print_graph(LOG_NOTICE, transition_graph); } } crm_debug("Transition %d is now complete", transition_graph->id); transition_graph->complete = TRUE; notify_crmd(transition_graph); return TRUE; } void trigger_graph_processing(const char *fn, int line) { crm_trace("%s:%d - Triggered graph processing", fn, line); mainloop_set_trigger(transition_trigger); } void abort_transition_graph(int abort_priority, enum transition_action abort_action, const char *abort_text, xmlNode * reason, const char *fn, int line) { int add[] = { 0, 0, 0 }; int del[] = { 0, 0, 0 }; int level = LOG_INFO; xmlNode *diff = NULL; xmlNode *change = NULL; CRM_CHECK(transition_graph != NULL, return); switch (fsa_state) { case S_STARTING: case S_PENDING: case S_NOT_DC: case S_HALT: case S_ILLEGAL: case S_STOPPING: case S_TERMINATE: crm_info("Abort %s suppressed: state=%s (complete=%d)", abort_text, fsa_state2string(fsa_state), transition_graph->complete); return; default: break; } /* Make sure any queued calculations are discarded ASAP */ free(fsa_pe_ref); fsa_pe_ref = NULL; if (transition_graph->complete == FALSE) { if(update_abort_priority(transition_graph, abort_priority, abort_action, abort_text)) { level = LOG_NOTICE; } } if(reason) { xmlNode *search = NULL; for(search = reason; search; search = search->parent) { if (safe_str_eq(XML_TAG_DIFF, TYPE(search))) { diff = search; break; } } if(diff) { xml_patch_versions(diff, add, del); for(search = reason; search; search = search->parent) { if (safe_str_eq(XML_DIFF_CHANGE, TYPE(search))) { change = search; break; } } } } if(reason == NULL) { do_crm_log(level, "Transition aborted: %s (source=%s:%d, %d)", abort_text, fn, line, transition_graph->complete); } else if(change == NULL) { char *local_path = xml_get_path(reason); do_crm_log(level, "Transition aborted by %s.%s: %s (cib=%d.%d.%d, source=%s:%d, path=%s, %d)", TYPE(reason), ID(reason), abort_text, add[0], add[1], add[2], fn, line, local_path, transition_graph->complete); free(local_path); } else { const char *kind = NULL; const char *op = crm_element_value(change, XML_DIFF_OP); const char *path = crm_element_value(change, XML_DIFF_PATH); if(change == reason) { if(strcmp(op, "create") == 0) { reason = reason->children; } else if(strcmp(op, "modify") == 0) { reason = first_named_child(reason, XML_DIFF_RESULT); if(reason) { reason = reason->children; } } } kind = TYPE(reason); if(strcmp(op, "delete") == 0) { const char *shortpath = strrchr(path, '/'); do_crm_log(level, "Transition aborted by deletion of %s: %s (cib=%d.%d.%d, source=%s:%d, path=%s, %d)", shortpath?shortpath+1:path, abort_text, add[0], add[1], add[2], fn, line, path, transition_graph->complete); } else if (safe_str_eq(XML_CIB_TAG_NVPAIR, kind)) { do_crm_log(level, "Transition aborted by %s, %s=%s: %s (%s cib=%d.%d.%d, source=%s:%d, path=%s, %d)", crm_element_value(reason, XML_ATTR_ID), crm_element_value(reason, XML_NVPAIR_ATTR_NAME), crm_element_value(reason, XML_NVPAIR_ATTR_VALUE), abort_text, op, add[0], add[1], add[2], fn, line, path, transition_graph->complete); } else if (safe_str_eq(XML_LRM_TAG_RSC_OP, kind)) { const char *magic = crm_element_value(reason, XML_ATTR_TRANSITION_MAGIC); do_crm_log(level, "Transition aborted by %s '%s' on %s: %s (magic=%s, cib=%d.%d.%d, source=%s:%d, %d)", crm_element_value(reason, XML_LRM_ATTR_TASK_KEY), op, crm_element_value(reason, XML_LRM_ATTR_TARGET), abort_text, magic, add[0], add[1], add[2], fn, line, transition_graph->complete); } else if (safe_str_eq(XML_CIB_TAG_STATE, kind) || safe_str_eq(XML_CIB_TAG_NODE, kind)) { const char *uname = crm_peer_uname(ID(reason)); do_crm_log(level, "Transition aborted by %s '%s' on %s: %s (cib=%d.%d.%d, source=%s:%d, %d)", kind, op, uname ? uname : ID(reason), abort_text, add[0], add[1], add[2], fn, line, transition_graph->complete); } else { do_crm_log(level, "Transition aborted by %s.%s '%s': %s (cib=%d.%d.%d, source=%s:%d, path=%s, %d)", TYPE(reason), ID(reason), op?op:"change", abort_text, add[0], add[1], add[2], fn, line, path, transition_graph->complete); } } if (transition_graph->complete) { if (transition_timer->period_ms > 0) { crm_timer_stop(transition_timer); crm_timer_start(transition_timer); } else { register_fsa_input(C_FSA_INTERNAL, I_PE_CALC, NULL); } return; } mainloop_set_trigger(transition_trigger); } diff --git a/cts/CIB.py b/cts/CIB.py index 6ce38e9ba3..d26efdbbe8 100644 --- a/cts/CIB.py +++ b/cts/CIB.py @@ -1,425 +1,427 @@ '''CTS: Cluster Testing System: CIB generator ''' __copyright__ = ''' Author: Andrew Beekhof Copyright (C) 2008 Andrew Beekhof ''' from UserDict import UserDict import sys, time, types, syslog, os, struct, string, signal, traceback, warnings, socket from cts.CTSvars import * from cts.CTS import ClusterManager class CibBase: def __init__(self, Factory, tag, _id, **kwargs): self.tag = tag self.name = _id self.kwargs = kwargs self.values = [] self.children = [] self.Factory = Factory def __repr__(self): return "%s-%s" % (self.tag, self.name) def add_child(self, child): self.children.append(child) def __setitem__(self, key, value): if value: self.kwargs[key] = value else: self.values.append(key) from cib_xml import * class ConfigBase: cts_cib = None version = "unknown" feature_set = "unknown" Factory = None def __init__(self, CM, factory, tmpfile=None): self.CM = CM self.Factory = factory if not tmpfile: warnings.filterwarnings("ignore") tmpfile = os.tmpnam() warnings.resetwarnings() self.Factory.tmpfile = tmpfile def version(self): return self.version def NextIP(self): ip = self.CM.Env["IPBase"] if ":" in ip: (prefix, sep, suffix) = ip.rpartition(":") suffix = str(hex(int(suffix, 16)+1)).lstrip("0x") else: (prefix, sep, suffix) = ip.rpartition(".") suffix = str(int(suffix)+1) ip = prefix + sep + suffix self.CM.Env["IPBase"] = ip return ip.strip() class CIB11(ConfigBase): feature_set = "3.0" version = "pacemaker-1.1" counter = 1 def _show(self, command=""): output = "" (rc, result) = self.Factory.rsh(self.Factory.target, "HOME=/root CIB_file="+self.Factory.tmpfile+" cibadmin -Ql "+command, None, ) for line in result: output += line self.Factory.debug("Generated Config: "+line) return output def NewIP(self, name=None, standard="ocf"): if self.CM.Env["IPagent"] == "IPaddr2": ip = self.NextIP() if not name: if ":" in ip: (prefix, sep, suffix) = ip.rpartition(":") name = "r"+suffix else: name = "r"+ip r = Resource(self.Factory, name, self.CM.Env["IPagent"], standard) r["ip"] = ip if ":" in ip: r["cidr_netmask"] = "64" r["nic"] = "eth0" else: r["cidr_netmask"] = "32" else: if not name: name = "r%s%d" % (self.CM.Env["IPagent"], self.counter) self.counter = self.counter + 1 r = Resource(self.Factory, name, self.CM.Env["IPagent"], standard) r.add_op("monitor", "5s") return r def install(self, target): old = self.Factory.tmpfile # Force a rebuild self.cts_cib = None self.Factory.tmpfile = CTSvars.CRM_CONFIG_DIR+"/cib.xml" self.contents(target) self.Factory.rsh(self.Factory.target, "chown "+CTSvars.CRM_DAEMON_USER+" "+self.Factory.tmpfile) self.Factory.tmpfile = old def contents(self, target=None): # fencing resource if self.cts_cib: return self.cts_cib if target: self.Factory.target = target self.Factory.rsh(self.Factory.target, "HOME=/root cibadmin --empty %s > %s" % (self.version, self.Factory.tmpfile)) #cib_base = self.cib_template % (self.feature_set, self.version, ''' remote-tls-port='9898' remote-clear-port='9999' ''') nodelist = "" self.num_nodes = 0 for node in self.CM.Env["nodes"]: nodelist += node + " " self.num_nodes = self.num_nodes + 1 no_quorum = "stop" if self.num_nodes < 3: no_quorum = "ignore" self.Factory.log("Cluster only has %d nodes, configuring: no-quroum-policy=ignore" % self.num_nodes) # Fencing resource # Define first so that the shell doesn't reject every update if self.CM.Env["DoFencing"]: st = Resource(self.Factory, "Fencing", ""+self.CM.Env["stonith-type"], "stonith") # Set a threshold for unreliable stonith devices such as the vmware one st.add_meta("migration-threshold", "5") st.add_op("monitor", "120s", timeout="120s") st.add_op("stop", "0", timeout="60s") st.add_op("start", "0", timeout="60s") entries = string.split(self.CM.Env["stonith-params"], ',') for entry in entries: (name, value) = string.split(entry, '=') if name == "hostlist" and value == "all": value = string.join(self.CM.Env["nodes"], " ") elif name == "pcmk_host_list" and value == "all": value = string.join(self.CM.Env["nodes"], " ") st[name] = value st.commit() # Test advanced fencing logic if True: stf_nodes = [] stt_nodes = [] # Create the levels stl = FencingTopology(self.Factory) for node in self.CM.Env["nodes"]: ftype = self.CM.Env.RandomGen.choice(["levels-and", "levels-or ", "broadcast "]) self.CM.log(" - Using %s fencing for node: %s" % (ftype, node)) + # for baremetal remote node tests + stt_nodes.append("remote_%s" % node) if ftype == "levels-and": stl.level(1, node, "FencingPass,Fencing") stt_nodes.append(node) elif ftype == "levels-or ": stl.level(1, node, "FencingFail") stl.level(2, node, "Fencing") stf_nodes.append(node) # Create a Dummy agent that always passes for levels-and if len(stt_nodes): self.CM.install_helper("fence_dummy", destdir="/usr/sbin", sourcedir=CTSvars.Fencing_home) stt = Resource(self.Factory, "FencingPass", "fence_dummy", "stonith") stt["pcmk_host_list"] = string.join(stt_nodes, " ") # Wait this many seconds before doing anything, handy for letting disks get flushed too stt["random_sleep_range"] = "30" stt["mode"] = "pass" stt.commit() # Create a Dummy agent that always fails for levels-or if len(stf_nodes): self.CM.install_helper("fence_dummy", destdir="/usr/sbin", sourcedir=CTSvars.Fencing_home) stf = Resource(self.Factory, "FencingFail", "fence_dummy", "stonith") stf["pcmk_host_list"] = string.join(stf_nodes, " ") # Wait this many seconds before doing anything, handy for letting disks get flushed too stf["random_sleep_range"] = "30" stf["mode"] = "fail" stf.commit() # Now commit the levels themselves stl.commit() o = Option(self.Factory, "stonith-enabled", self.CM.Env["DoFencing"]) o["start-failure-is-fatal"] = "false" o["pe-input-series-max"] = "5000" o["default-action-timeout"] = "90s" o["shutdown-escalation"] = "5min" o["batch-limit"] = "10" o["dc-deadtime"] = "5s" o["no-quorum-policy"] = no_quorum o["expected-quorum-votes"] = self.num_nodes if self.CM.Env["DoBSC"] == 1: o["ident-string"] = "Linux-HA TEST configuration file - REMOVEME!!" o.commit() # Add resources? if self.CM.Env["CIBResource"] == 1: self.add_resources() if self.CM.cluster_monitor == 1: mon = Resource(self.Factory, "cluster_mon", "ocf", "ClusterMon", "pacemaker") mon.add_op("start", "0", requires="nothing") mon.add_op("monitor", "5s", requires="nothing") mon["update"] = "10" mon["extra_options"] = "-r -n" mon["user"] = "abeekhof" mon["htmlfile"] = "/suse/abeekhof/Export/cluster.html" mon.commit() #self._create('''location prefer-dc cluster_mon rule -INFINITY: \#is_dc eq false''') # generate cib self.cts_cib = self._show() if self.Factory.tmpfile != CTSvars.CRM_CONFIG_DIR+"/cib.xml": self.Factory.rsh(self.Factory.target, "rm -f "+self.Factory.tmpfile) return self.cts_cib def add_resources(self): # Per-node resources for node in self.CM.Env["nodes"]: name = "rsc_"+node r = self.NewIP(name) r.prefer(node, "100") r.commit() # Migrator # Make this slightly sticky (since we have no other location constraints) to avoid relocation during Reattach m = Resource(self.Factory, "migrator","Dummy", "ocf", "pacemaker") m.add_meta("resource-stickiness","1") m.add_meta("allow-migrate", "1") m.add_op("monitor", "P10S") m.commit() # Ping the test master p = Resource(self.Factory, "ping-1","ping", "ocf", "pacemaker") p.add_op("monitor", "60s") p["host_list"] = self.CM.Env["cts-master"] p["name"] = "connected" p["debug"] = "true" c = Clone(self.Factory, "Connectivity", p) c["globally-unique"] = "false" c.commit() #master slave resource s = Resource(self.Factory, "stateful-1", "Stateful", "ocf", "pacemaker") s.add_op("monitor", "15s", timeout="60s") s.add_op("monitor", "16s", timeout="60s", role="Master") ms = Master(self.Factory, "master-1", s) ms["clone-max"] = self.num_nodes ms["master-max"] = 1 ms["clone-node-max"] = 1 ms["master-node-max"] = 1 # Require conectivity to run the master r = Rule(self.Factory, "connected", "-INFINITY", op="or") r.add_child(Expression(self.Factory, "m1-connected-1", "connected", "lt", "1")) r.add_child(Expression(self.Factory, "m1-connected-2", "connected", "not_defined", None)) ms.prefer("connected", rule=r) ms.commit() # Group Resource g = Group(self.Factory, "group-1") g.add_child(self.NewIP()) g.add_child(self.NewIP()) g.add_child(self.NewIP()) # Group with the master g.after("master-1", first="promote", then="start") g.colocate("master-1", "INFINITY", withrole="Master") g.commit() # LSB resource lsb_agent = self.CM.install_helper("LSBDummy") lsb = Resource(self.Factory, "lsb-dummy",lsb_agent, "lsb") lsb.add_op("monitor", "5s") # LSB with group lsb.after("group-1") lsb.colocate("group-1") lsb.commit() class CIB12(CIB11): feature_set = "3.0" version = "pacemaker-1.2" class CIB20(CIB11): feature_set = "3.0" version = "pacemaker-2.0" #class HASI(CIB10): # def add_resources(self): # # DLM resource # self._create('''primitive dlm ocf:pacemaker:controld op monitor interval=120s''') # self._create('''clone dlm-clone dlm meta globally-unique=false interleave=true''') # O2CB resource # self._create('''primitive o2cb ocf:ocfs2:o2cb op monitor interval=120s''') # self._create('''clone o2cb-clone o2cb meta globally-unique=false interleave=true''') # self._create('''colocation o2cb-with-dlm INFINITY: o2cb-clone dlm-clone''') # self._create('''order start-o2cb-after-dlm mandatory: dlm-clone o2cb-clone''') class ConfigFactory: def __init__(self, CM): self.CM = CM self.rsh = self.CM.rsh self.register("pacemaker11", CIB11, CM, self) self.register("pacemaker12", CIB12, CM, self) self.register("pacemaker20", CIB20, CM, self) # self.register("hae", HASI, CM, self) self.target = self.CM.Env["nodes"][0] self.tmpfile = None def log(self, args): self.CM.log("cib: %s" % args) def debug(self, args): self.CM.debug("cib: %s" % args) def register(self, methodName, constructor, *args, **kargs): """register a constructor""" _args = [constructor] _args.extend(args) setattr(self, methodName, apply(ConfigFactoryItem,_args, kargs)) def unregister(self, methodName): """unregister a constructor""" delattr(self, methodName) def createConfig(self, name="pacemaker-1.0"): if name == "pacemaker-1.0": name = "pacemaker10"; elif name == "pacemaker-1.1": name = "pacemaker11"; elif name == "pacemaker-1.2": name = "pacemaker12"; elif name == "pacemaker-2.0": name = "pacemaker20"; elif name == "hasi": name = "hae"; if hasattr(self, name): return getattr(self, name)() else: self.CM.log("Configuration variant '%s' is unknown. Defaulting to latest config" % name) return self.pacemaker12() class ConfigFactoryItem: def __init__(self, function, *args, **kargs): assert callable(function), "function should be a callable obj" self._function = function self._args = args self._kargs = kargs def __call__(self, *args, **kargs): """call function""" _args = list(self._args) _args.extend(args) _kargs = self._kargs.copy() _kargs.update(kargs) return apply(self._function,_args,_kargs) # Basic Sanity Testing if __name__ == '__main__': import CTSlab env = CTSlab.LabEnvironment() env["nodes"] = [] env["nodes"].append("pcmk-1") env["nodes"].append("pcmk-2") env["nodes"].append("pcmk-3") env["nodes"].append("pcmk-4") env["CIBResource"] = 1 env["IPBase"] = "fe80::1234:56:7890:1000" env["DoStonith"] = 1 env["stonith-type"] = "fence_xvm" env["stonith-params"] = "pcmk_arg_map=domain:uname" manager = ClusterManager(env) manager.cluster_monitor = False CibFactory = ConfigFactory(manager) cib = CibFactory.createConfig("pacemaker-1.1") print cib.contents() diff --git a/cts/CTStests.py b/cts/CTStests.py index cd5b7ce365..d2b7668ac3 100644 --- a/cts/CTStests.py +++ b/cts/CTStests.py @@ -1,2912 +1,3202 @@ '''CTS: Cluster Testing System: Tests module There are a few things we want to do here: ''' __copyright__ = ''' Copyright (C) 2000, 2001 Alan Robertson Licensed under the GNU GPL. Add RecourceRecover testcase Zhao Kai ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. # # SPECIAL NOTE: # # Tests may NOT implement any cluster-manager-specific code in them. # EXTEND the ClusterManager object to provide the base capabilities # the test needs if you need to do something that the current CM classes # do not. Otherwise you screw up the whole point of the object structure # in CTS. # # Thank you. # import time, os, re, types, string, tempfile, sys from stat import * from cts import CTS from cts.CTSaudits import * from cts.CTSvars import * from cts.patterns import PatternSelector from cts.logging import LogFactory from cts.remote import RemoteFactory from cts.watcher import LogWatcher from cts.environment import EnvFactory AllTestClasses = [ ] class CTSTest: ''' A Cluster test. We implement the basic set of properties and behaviors for a generic cluster test. Cluster tests track their own statistics. We keep each of the kinds of counts we track as separate {name,value} pairs. ''' def __init__(self, cm): #self.name="the unnamed test" self.Stats = {"calls":0 , "success":0 , "failure":0 , "skipped":0 , "auditfail":0} # if not issubclass(cm.__class__, ClusterManager): # raise ValueError("Must be a ClusterManager object") self.CM = cm self.Env = EnvFactory().getInstance() self.rsh = RemoteFactory().getInstance() self.logger = LogFactory() self.templates = PatternSelector(cm["Name"]) self.Audits = [] self.timeout = 120 self.passed = 1 self.is_loop = 0 self.is_unsafe = 0 self.is_docker_unsafe = 0 self.is_experimental = 0 self.is_container = 0 self.is_valgrind = 0 self.benchmark = 0 # which tests to benchmark self.timer = {} # timers def log(self, args): self.logger.log(args) def debug(self, args): self.logger.debug(args) def has_key(self, key): return self.Stats.has_key(key) def __setitem__(self, key, value): self.Stats[key] = value def __getitem__(self, key): return self.Stats[key] def log_mark(self, msg): self.debug("MARK: test %s %s %d" % (self.name,msg,time.time())) return def get_timer(self,key = "test"): try: return self.timer[key] except: return 0 def set_timer(self,key = "test"): self.timer[key] = time.time() return self.timer[key] def log_timer(self,key = "test"): elapsed = 0 if key in self.timer: elapsed = time.time() - self.timer[key] s = key == "test" and self.name or "%s:%s" % (self.name,key) self.debug("%s runtime: %.2f" % (s, elapsed)) del self.timer[key] return elapsed def incr(self, name): '''Increment (or initialize) the value associated with the given name''' if not self.Stats.has_key(name): self.Stats[name] = 0 self.Stats[name] = self.Stats[name]+1 # Reset the test passed boolean if name == "calls": self.passed = 1 def failure(self, reason="none"): '''Increment the failure count''' self.passed = 0 self.incr("failure") self.logger.log(("Test %s" % self.name).ljust(35) + " FAILED: %s" % reason) return None def success(self): '''Increment the success count''' self.incr("success") return 1 def skipped(self): '''Increment the skipped count''' self.incr("skipped") return 1 def __call__(self, node): '''Perform the given test''' raise ValueError("Abstract Class member (__call__)") self.incr("calls") return self.failure() def audit(self): passed = 1 if len(self.Audits) > 0: for audit in self.Audits: if not audit(): self.logger.log("Internal %s Audit %s FAILED." % (self.name, audit.name())) self.incr("auditfail") passed = 0 return passed def setup(self, node): '''Setup the given test''' return self.success() def teardown(self, node): '''Tear down the given test''' return self.success() def create_watch(self, patterns, timeout, name=None): if not name: name = self.name return LogWatcher(self.Env["LogFileName"], patterns, name, timeout, kind=self.Env["LogWatcher"], hosts=self.Env["nodes"]) def local_badnews(self, prefix, watch, local_ignore=[]): errcount = 0 if not prefix: prefix = "LocalBadNews:" ignorelist = [] ignorelist.append(" CTS: ") ignorelist.append(prefix) ignorelist.extend(local_ignore) while errcount < 100: match = watch.look(0) if match: add_err = 1 for ignore in ignorelist: if add_err == 1 and re.search(ignore, match): add_err = 0 if add_err == 1: self.logger.log(prefix + " " + match) errcount = errcount + 1 else: break else: self.logger.log("Too many errors!") watch.end() return errcount def is_applicable(self): return self.is_applicable_common() def is_applicable_common(self): '''Return TRUE if we are applicable in the current test configuration''' #raise ValueError("Abstract Class member (is_applicable)") if self.is_loop and not self.Env["loop-tests"]: return 0 elif self.is_unsafe and not self.Env["unsafe-tests"]: return 0 elif self.is_valgrind and not self.Env["valgrind-tests"]: return 0 elif self.is_experimental and not self.Env["experimental-tests"]: return 0 elif self.is_docker_unsafe and self.Env["docker"]: return 0 elif self.is_container and not self.Env["container-tests"]: return 0 elif self.Env["benchmark"] and self.benchmark == 0: return 0 return 1 def find_ocfs2_resources(self, node): self.r_o2cb = None self.r_ocfs2 = [] (rc, lines) = self.rsh(node, "crm_resource -c", None) for line in lines: if re.search("^Resource", line): r = AuditResource(self.CM, line) if r.rtype == "o2cb" and r.parent != "NA": self.debug("Found o2cb: %s" % self.r_o2cb) self.r_o2cb = r.parent if re.search("^Constraint", line): c = AuditConstraint(self.CM, line) if c.type == "rsc_colocation" and c.target == self.r_o2cb: self.r_ocfs2.append(c.rsc) self.debug("Found ocfs2 filesystems: %s" % repr(self.r_ocfs2)) return len(self.r_ocfs2) def canrunnow(self, node): '''Return TRUE if we can meaningfully run right now''' return 1 def errorstoignore(self): '''Return list of errors which are 'normal' and should be ignored''' return [] class StopTest(CTSTest): '''Stop (deactivate) the cluster manager on a node''' def __init__(self, cm): CTSTest.__init__(self, cm) self.name = "Stop" def __call__(self, node): '''Perform the 'stop' test. ''' self.incr("calls") if self.CM.ShouldBeStatus[node] != "up": return self.skipped() patterns = [] # Technically we should always be able to notice ourselves stopping patterns.append(self.templates["Pat:We_stopped"] % node) #if self.Env["use_logd"]: # patterns.append(self.templates["Pat:Logd_stopped"] % node) # Any active node needs to notice this one left # NOTE: This wont work if we have multiple partitions for other in self.Env["nodes"]: if self.CM.ShouldBeStatus[other] == "up" and other != node: patterns.append(self.templates["Pat:They_stopped"] %(other, self.CM.key_for_node(node))) #self.debug("Checking %s will notice %s left"%(other, node)) watch = self.create_watch(patterns, self.Env["DeadTime"]) watch.setwatch() if node == self.CM.OurNode: self.incr("us") else: if self.CM.upcount() <= 1: self.incr("all") else: self.incr("them") self.CM.StopaCM(node) watch_result = watch.lookforall() failreason = None UnmatchedList = "||" if watch.unmatched: (rc, output) = self.rsh(node, "/bin/ps axf", None) for line in output: self.debug(line) (rc, output) = self.rsh(node, "/usr/sbin/dlm_tool dump", None) for line in output: self.debug(line) for regex in watch.unmatched: self.logger.log ("ERROR: Shutdown pattern not found: %s" % (regex)) UnmatchedList += regex + "||"; failreason = "Missing shutdown pattern" self.CM.cluster_stable(self.Env["DeadTime"]) if not watch.unmatched or self.CM.upcount() == 0: return self.success() if len(watch.unmatched) >= self.CM.upcount(): return self.failure("no match against (%s)" % UnmatchedList) if failreason == None: return self.success() else: return self.failure(failreason) # # We don't register StopTest because it's better when called by # another test... # class StartTest(CTSTest): '''Start (activate) the cluster manager on a node''' def __init__(self, cm, debug=None): CTSTest.__init__(self,cm) self.name = "start" self.debug = debug def __call__(self, node): '''Perform the 'start' test. ''' self.incr("calls") if self.CM.upcount() == 0: self.incr("us") else: self.incr("them") if self.CM.ShouldBeStatus[node] != "down": return self.skipped() elif self.CM.StartaCM(node): return self.success() else: return self.failure("Startup %s on node %s failed" % (self.Env["Name"], node)) # # We don't register StartTest because it's better when called by # another test... # class FlipTest(CTSTest): '''If it's running, stop it. If it's stopped start it. Overthrow the status quo... ''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "Flip" self.start = StartTest(cm) self.stop = StopTest(cm) def __call__(self, node): '''Perform the 'Flip' test. ''' self.incr("calls") if self.CM.ShouldBeStatus[node] == "up": self.incr("stopped") ret = self.stop(node) type = "up->down" # Give the cluster time to recognize it's gone... time.sleep(self.Env["StableTime"]) elif self.CM.ShouldBeStatus[node] == "down": self.incr("started") ret = self.start(node) type = "down->up" else: return self.skipped() self.incr(type) if ret: return self.success() else: return self.failure("%s failure" % type) # Register FlipTest as a good test to run AllTestClasses.append(FlipTest) class RestartTest(CTSTest): '''Stop and restart a node''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "Restart" self.start = StartTest(cm) self.stop = StopTest(cm) self.benchmark = 1 def __call__(self, node): '''Perform the 'restart' test. ''' self.incr("calls") self.incr("node:" + node) ret1 = 1 if self.CM.StataCM(node): self.incr("WasStopped") if not self.start(node): return self.failure("start (setup) failure: "+node) self.set_timer() if not self.stop(node): return self.failure("stop failure: "+node) if not self.start(node): return self.failure("start failure: "+node) return self.success() # Register RestartTest as a good test to run AllTestClasses.append(RestartTest) class StonithdTest(CTSTest): def __init__(self, cm): CTSTest.__init__(self, cm) self.name = "Stonithd" self.startall = SimulStartLite(cm) self.benchmark = 1 def __call__(self, node): self.incr("calls") if len(self.Env["nodes"]) < 2: return self.skipped() ret = self.startall(None) if not ret: return self.failure("Setup failed") is_dc = self.CM.is_node_dc(node) watchpats = [] - watchpats.append("Operation .* for host '%s' with device .* returned: 0" % node) - watchpats.append("tengine_stonith_notify:.*Peer %s was terminated .*: OK" % node) + watchpats.append(self.templates["Pat:FenceOpOK"] % node) + watchpats.append(self.templates["Pat:NodeFenced"] % node) if self.Env["at-boot"] == 0: self.debug("Expecting %s to stay down" % node) self.CM.ShouldBeStatus[node] = "down" else: self.debug("Expecting %s to come up again %d" % (node, self.Env["at-boot"])) watchpats.append("%s.* S_STARTING -> S_PENDING" % node) watchpats.append("%s.* S_PENDING -> S_NOT_DC" % node) watch = self.create_watch(watchpats, 30 + self.Env["DeadTime"] + self.Env["StableTime"] + self.Env["StartTime"]) watch.setwatch() origin = self.Env.RandomGen.choice(self.Env["nodes"]) rc = self.rsh(origin, "stonith_admin --reboot %s -VVVVVV" % node) if rc == 194: # 194 - 256 = -62 = Timer expired # # Look for the patterns, usually this means the required # device was running on the node to be fenced - or that # the required devices were in the process of being loaded # and/or moved # # Effectively the node committed suicide so there will be # no confirmation, but pacemaker should be watching and # fence the node again self.logger.log("Fencing command on %s to fence %s timed out" % (origin, node)) elif origin != node and rc != 0: self.debug("Waiting for the cluster to recover") self.CM.cluster_stable() self.debug("Waiting STONITHd node to come back up") self.CM.ns.WaitForAllNodesToComeUp(self.Env["nodes"], 600) self.logger.log("Fencing command on %s failed to fence %s (rc=%d)" % (origin, node, rc)) elif origin == node and rc != 255: # 255 == broken pipe, ie. the node was fenced as epxected self.logger.log("Logcally originated fencing returned %d" % rc) self.set_timer("fence") matched = watch.lookforall() self.log_timer("fence") self.set_timer("reform") if watch.unmatched: self.logger.log("Patterns not found: " + repr(watch.unmatched)) self.debug("Waiting for the cluster to recover") self.CM.cluster_stable() self.debug("Waiting STONITHd node to come back up") self.CM.ns.WaitForAllNodesToComeUp(self.Env["nodes"], 600) self.debug("Waiting for the cluster to re-stabilize with all nodes") is_stable = self.CM.cluster_stable(self.Env["StartTime"]) if not matched: return self.failure("Didn't find all expected patterns") elif not is_stable: return self.failure("Cluster did not become stable") self.log_timer("reform") return self.success() def errorstoignore(self): return [ self.templates["Pat:Fencing_start"] % ".*", self.templates["Pat:Fencing_ok"] % ".*", "error: native_create_actions: Resource .*stonith::.* is active on 2 nodes attempting recovery", "error: remote_op_done: Operation reboot of .*by .* for stonith_admin.*: Timer expired", ] def is_applicable(self): if not self.is_applicable_common(): return 0 if self.Env.has_key("DoFencing"): return self.Env["DoFencing"] return 1 AllTestClasses.append(StonithdTest) class StartOnebyOne(CTSTest): '''Start all the nodes ~ one by one''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "StartOnebyOne" self.stopall = SimulStopLite(cm) self.start = StartTest(cm) self.ns = CTS.NodeStatus(cm.Env) def __call__(self, dummy): '''Perform the 'StartOnebyOne' test. ''' self.incr("calls") # We ignore the "node" parameter... # Shut down all the nodes... ret = self.stopall(None) if not ret: return self.failure("Test setup failed") failed = [] self.set_timer() for node in self.Env["nodes"]: if not self.start(node): failed.append(node) if len(failed) > 0: return self.failure("Some node failed to start: " + repr(failed)) return self.success() # Register StartOnebyOne as a good test to run AllTestClasses.append(StartOnebyOne) class SimulStart(CTSTest): '''Start all the nodes ~ simultaneously''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "SimulStart" self.stopall = SimulStopLite(cm) self.startall = SimulStartLite(cm) def __call__(self, dummy): '''Perform the 'SimulStart' test. ''' self.incr("calls") # We ignore the "node" parameter... # Shut down all the nodes... ret = self.stopall(None) if not ret: return self.failure("Setup failed") self.CM.clear_all_caches() if not self.startall(None): return self.failure("Startall failed") return self.success() # Register SimulStart as a good test to run AllTestClasses.append(SimulStart) class SimulStop(CTSTest): '''Stop all the nodes ~ simultaneously''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "SimulStop" self.startall = SimulStartLite(cm) self.stopall = SimulStopLite(cm) def __call__(self, dummy): '''Perform the 'SimulStop' test. ''' self.incr("calls") # We ignore the "node" parameter... # Start up all the nodes... ret = self.startall(None) if not ret: return self.failure("Setup failed") if not self.stopall(None): return self.failure("Stopall failed") return self.success() # Register SimulStop as a good test to run AllTestClasses.append(SimulStop) class StopOnebyOne(CTSTest): '''Stop all the nodes in order''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "StopOnebyOne" self.startall = SimulStartLite(cm) self.stop = StopTest(cm) def __call__(self, dummy): '''Perform the 'StopOnebyOne' test. ''' self.incr("calls") # We ignore the "node" parameter... # Start up all the nodes... ret = self.startall(None) if not ret: return self.failure("Setup failed") failed = [] self.set_timer() for node in self.Env["nodes"]: if not self.stop(node): failed.append(node) if len(failed) > 0: return self.failure("Some node failed to stop: " + repr(failed)) self.CM.clear_all_caches() return self.success() # Register StopOnebyOne as a good test to run AllTestClasses.append(StopOnebyOne) class RestartOnebyOne(CTSTest): '''Restart all the nodes in order''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "RestartOnebyOne" self.startall = SimulStartLite(cm) def __call__(self, dummy): '''Perform the 'RestartOnebyOne' test. ''' self.incr("calls") # We ignore the "node" parameter... # Start up all the nodes... ret = self.startall(None) if not ret: return self.failure("Setup failed") did_fail = [] self.set_timer() self.restart = RestartTest(self.CM) for node in self.Env["nodes"]: if not self.restart(node): did_fail.append(node) if did_fail: return self.failure("Could not restart %d nodes: %s" % (len(did_fail), repr(did_fail))) return self.success() # Register StopOnebyOne as a good test to run AllTestClasses.append(RestartOnebyOne) class PartialStart(CTSTest): '''Start a node - but tell it to stop before it finishes starting up''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "PartialStart" self.startall = SimulStartLite(cm) self.stopall = SimulStopLite(cm) self.stop = StopTest(cm) #self.is_unsafe = 1 def __call__(self, node): '''Perform the 'PartialStart' test. ''' self.incr("calls") ret = self.stopall(None) if not ret: return self.failure("Setup failed") # FIXME! This should use the CM class to get the pattern # then it would be applicable in general watchpats = [] watchpats.append("crmd.*Connecting to cluster infrastructure") watch = self.create_watch(watchpats, self.Env["DeadTime"]+10) watch.setwatch() self.CM.StartaCMnoBlock(node) ret = watch.lookforall() if not ret: self.logger.log("Patterns not found: " + repr(watch.unmatched)) return self.failure("Setup of %s failed" % node) ret = self.stop(node) if not ret: return self.failure("%s did not stop in time" % node) return self.success() def errorstoignore(self): '''Return list of errors which should be ignored''' # We might do some fencing in the 2-node case if we make it up far enough return [ """Executing reboot fencing operation""" ] # Register StopOnebyOne as a good test to run AllTestClasses.append(PartialStart) class StandbyTest(CTSTest): def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "Standby" self.benchmark = 1 self.start = StartTest(cm) self.startall = SimulStartLite(cm) # make sure the node is active # set the node to standby mode # check resources, none resource should be running on the node # set the node to active mode # check resouces, resources should have been migrated back (SHOULD THEY?) def __call__(self, node): self.incr("calls") ret = self.startall(None) if not ret: return self.failure("Start all nodes failed") self.debug("Make sure node %s is active" % node) if self.CM.StandbyStatus(node) != "off": if not self.CM.SetStandbyMode(node, "off"): return self.failure("can't set node %s to active mode" % node) self.CM.cluster_stable() status = self.CM.StandbyStatus(node) if status != "off": return self.failure("standby status of %s is [%s] but we expect [off]" % (node, status)) self.debug("Getting resources running on node %s" % node) rsc_on_node = self.CM.active_resources(node) watchpats = [] watchpats.append("do_state_transition:.*-> S_POLICY_ENGINE") watch = self.create_watch(watchpats, self.Env["DeadTime"]+10) watch.setwatch() self.debug("Setting node %s to standby mode" % node) if not self.CM.SetStandbyMode(node, "on"): return self.failure("can't set node %s to standby mode" % node) self.set_timer("on") ret = watch.lookforall() if not ret: self.logger.log("Patterns not found: " + repr(watch.unmatched)) self.CM.SetStandbyMode(node, "off") return self.failure("cluster didn't react to standby change on %s" % node) self.CM.cluster_stable() status = self.CM.StandbyStatus(node) if status != "on": return self.failure("standby status of %s is [%s] but we expect [on]" % (node, status)) self.log_timer("on") self.debug("Checking resources") bad_run = self.CM.active_resources(node) if len(bad_run) > 0: rc = self.failure("%s set to standby, %s is still running on it" % (node, repr(bad_run))) self.debug("Setting node %s to active mode" % node) self.CM.SetStandbyMode(node, "off") return rc self.debug("Setting node %s to active mode" % node) if not self.CM.SetStandbyMode(node, "off"): return self.failure("can't set node %s to active mode" % node) self.set_timer("off") self.CM.cluster_stable() status = self.CM.StandbyStatus(node) if status != "off": return self.failure("standby status of %s is [%s] but we expect [off]" % (node, status)) self.log_timer("off") return self.success() AllTestClasses.append(StandbyTest) class ValgrindTest(CTSTest): '''Check for memory leaks''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "Valgrind" self.stopall = SimulStopLite(cm) self.startall = SimulStartLite(cm) self.is_valgrind = 1 self.is_loop = 1 def setup(self, node): self.incr("calls") ret = self.stopall(None) if not ret: return self.failure("Stop all nodes failed") # Enable valgrind self.logger.logPat = "/tmp/%s-*.valgrind" % self.name self.Env["valgrind-prefix"] = self.name self.rsh(node, "rm -f %s" % self.logger.logPat, None) ret = self.startall(None) if not ret: return self.failure("Start all nodes failed") for node in self.Env["nodes"]: (rc, output) = self.rsh(node, "ps u --ppid `pidofproc aisexec`", None) for line in output: self.debug(line) return self.success() def teardown(self, node): # Disable valgrind self.Env["valgrind-prefix"] = None # Return all nodes to normal ret = self.stopall(None) if not ret: return self.failure("Stop all nodes failed") return self.success() def find_leaks(self): # Check for leaks leaked = [] self.stop = StopTest(self.CM) for node in self.Env["nodes"]: (rc, ps_out) = self.rsh(node, "ps u --ppid `pidofproc aisexec`", None) rc = self.stop(node) if not rc: self.failure("Couldn't shut down %s" % node) rc = self.rsh(node, "grep -e indirectly.*lost:.*[1-9] -e definitely.*lost:.*[1-9] -e (ERROR|error).*SUMMARY:.*[1-9].*errors %s" % self.logger.logPat, 0) if rc != 1: leaked.append(node) self.failure("Valgrind errors detected on %s" % node) for line in ps_out: self.logger.log(line) (rc, output) = self.rsh(node, "grep -e lost: -e SUMMARY: %s" % self.logger.logPat, None) for line in output: self.logger.log(line) (rc, output) = self.rsh(node, "cat %s" % self.logger.logPat, None) for line in output: self.debug(line) self.rsh(node, "rm -f %s" % self.logger.logPat, None) return leaked def __call__(self, node): leaked = self.find_leaks() if len(leaked) > 0: return self.failure("Nodes %s leaked" % repr(leaked)) return self.success() def errorstoignore(self): '''Return list of errors which should be ignored''' return [ """cib:.*readCibXmlFile:""", """HA_VALGRIND_ENABLED""" ] class StandbyLoopTest(ValgrindTest): '''Check for memory leaks by putting a node in and out of standby for an hour''' def __init__(self, cm): ValgrindTest.__init__(self,cm) self.name = "StandbyLoop" def __call__(self, node): lpc = 0 delay = 2 failed = 0 done = time.time() + self.Env["loop-minutes"] * 60 while time.time() <= done and not failed: lpc = lpc + 1 time.sleep(delay) if not self.CM.SetStandbyMode(node, "on"): self.failure("can't set node %s to standby mode" % node) failed = lpc time.sleep(delay) if not self.CM.SetStandbyMode(node, "off"): self.failure("can't set node %s to active mode" % node) failed = lpc leaked = self.find_leaks() if failed: return self.failure("Iteration %d failed" % failed) elif len(leaked) > 0: return self.failure("Nodes %s leaked" % repr(leaked)) return self.success() AllTestClasses.append(StandbyLoopTest) class BandwidthTest(CTSTest): # Tests should not be cluster-manager-specific # If you need to find out cluster manager configuration to do this, then # it should be added to the generic cluster manager API. '''Test the bandwidth which heartbeat uses''' def __init__(self, cm): CTSTest.__init__(self, cm) self.name = "Bandwidth" self.start = StartTest(cm) self.__setitem__("min",0) self.__setitem__("max",0) self.__setitem__("totalbandwidth",0) self.tempfile = tempfile.mktemp(".cts") self.startall = SimulStartLite(cm) def __call__(self, node): '''Perform the Bandwidth test''' self.incr("calls") if self.CM.upcount() < 1: return self.skipped() Path = self.CM.InternalCommConfig() if "ip" not in Path["mediatype"]: return self.skipped() port = Path["port"][0] port = int(port) ret = self.startall(None) if not ret: return self.failure("Test setup failed") time.sleep(5) # We get extra messages right after startup. fstmpfile = "/var/run/band_estimate" dumpcmd = "tcpdump -p -n -c 102 -i any udp port %d > %s 2>&1" \ % (port, fstmpfile) rc = self.rsh(node, dumpcmd) if rc == 0: farfile = "root@%s:%s" % (node, fstmpfile) self.rsh.cp(farfile, self.tempfile) Bandwidth = self.countbandwidth(self.tempfile) if not Bandwidth: self.logger.log("Could not compute bandwidth.") return self.success() intband = int(Bandwidth + 0.5) self.logger.log("...bandwidth: %d bits/sec" % intband) self.Stats["totalbandwidth"] = self.Stats["totalbandwidth"] + Bandwidth if self.Stats["min"] == 0: self.Stats["min"] = Bandwidth if Bandwidth > self.Stats["max"]: self.Stats["max"] = Bandwidth if Bandwidth < self.Stats["min"]: self.Stats["min"] = Bandwidth self.rsh(node, "rm -f %s" % fstmpfile) os.unlink(self.tempfile) return self.success() else: return self.failure("no response from tcpdump command [%d]!" % rc) def countbandwidth(self, file): fp = open(file, "r") fp.seek(0) count = 0 sum = 0 while 1: line = fp.readline() if not line: return None if re.search("udp",line) or re.search("UDP,", line): count = count + 1 linesplit = string.split(line," ") for j in range(len(linesplit)-1): if linesplit[j] == "udp": break if linesplit[j] == "length:": break try: sum = sum + int(linesplit[j+1]) except ValueError: self.logger.log("Invalid tcpdump line: %s" % line) return None T1 = linesplit[0] timesplit = string.split(T1,":") time2split = string.split(timesplit[2],".") time1 = (long(timesplit[0])*60+long(timesplit[1]))*60+long(time2split[0])+long(time2split[1])*0.000001 break while count < 100: line = fp.readline() if not line: return None if re.search("udp",line) or re.search("UDP,", line): count = count+1 linessplit = string.split(line," ") for j in range(len(linessplit)-1): if linessplit[j] == "udp": break if linesplit[j] == "length:": break try: sum = int(linessplit[j+1]) + sum except ValueError: self.logger.log("Invalid tcpdump line: %s" % line) return None T2 = linessplit[0] timesplit = string.split(T2,":") time2split = string.split(timesplit[2],".") time2 = (long(timesplit[0])*60+long(timesplit[1]))*60+long(time2split[0])+long(time2split[1])*0.000001 time = time2-time1 if (time <= 0): return 0 return (sum*8)/time def is_applicable(self): '''BandwidthTest never applicable''' return 0 AllTestClasses.append(BandwidthTest) ################################################################### class MaintenanceMode(CTSTest): ################################################################### def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "MaintenanceMode" self.start = StartTest(cm) self.startall = SimulStartLite(cm) self.max = 30 #self.is_unsafe = 1 self.benchmark = 1 self.action = "asyncmon" self.interval = 0 self.rid = "maintenanceDummy" def toggleMaintenanceMode(self, node, action): pats = [] pats.append(self.templates["Pat:DC_IDLE"]) # fail the resource right after turning Maintenance mode on # verify it is not recovered until maintenance mode is turned off if action == "On": pats.append("Updating failcount for %s on .* after .* %s" % (self.rid, self.action)) else: pats.append(self.templates["Pat:RscOpOK"] % (self.rid, "stop_0")) pats.append(self.templates["Pat:RscOpOK"] % (self.rid, "start_0")) watch = self.create_watch(pats, 60) watch.setwatch() self.debug("Turning maintenance mode %s" % action) self.rsh(node, self.templates["MaintenanceMode%s" % (action)]) if (action == "On"): self.rsh(node, "crm_resource -V -F -r %s -H %s &>/dev/null" % (self.rid, node)) self.set_timer("recover%s" % (action)) watch.lookforall() self.log_timer("recover%s" % (action)) if watch.unmatched: self.debug("Failed to find patterns when turning maintenance mode %s" % action) return repr(watch.unmatched) return "" def insertMaintenanceDummy(self, node): pats = [] pats.append(("%s.*" % node) + (self.templates["Pat:RscOpOK"] % (self.rid, "start_0"))) watch = self.create_watch(pats, 60) watch.setwatch() self.CM.AddDummyRsc(node, self.rid) self.set_timer("addDummy") watch.lookforall() self.log_timer("addDummy") if watch.unmatched: self.debug("Failed to find patterns when adding maintenance dummy resource") return repr(watch.unmatched) return "" def removeMaintenanceDummy(self, node): pats = [] pats.append(self.templates["Pat:RscOpOK"] % (self.rid, "stop_0")) watch = self.create_watch(pats, 60) watch.setwatch() self.CM.RemoveDummyRsc(node, self.rid) self.set_timer("removeDummy") watch.lookforall() self.log_timer("removeDummy") if watch.unmatched: self.debug("Failed to find patterns when removing maintenance dummy resource") return repr(watch.unmatched) return "" def managedRscList(self, node): rscList = [] (rc, lines) = self.rsh(node, "crm_resource -c", None) for line in lines: if re.search("^Resource", line): tmp = AuditResource(self.CM, line) if tmp.managed(): rscList.append(tmp.id) return rscList def verifyResources(self, node, rscList, managed): managedList = list(rscList) managed_str = "managed" if not managed: managed_str = "unmanaged" (rc, lines) = self.rsh(node, "crm_resource -c", None) for line in lines: if re.search("^Resource", line): tmp = AuditResource(self.CM, line) if managed and not tmp.managed(): continue elif not managed and tmp.managed(): continue elif managedList.count(tmp.id): managedList.remove(tmp.id) if len(managedList) == 0: self.debug("Found all %s resources on %s" % (managed_str, node)) return True self.logger.log("Could not find all %s resources on %s. %s" % (managed_str, node, managedList)) return False def __call__(self, node): '''Perform the 'MaintenanceMode' test. ''' self.incr("calls") verify_managed = False verify_unmanaged = False failPat = "" ret = self.startall(None) if not ret: return self.failure("Setup failed") # get a list of all the managed resources. We use this list # after enabling maintenance mode to verify all managed resources # become un-managed. After maintenance mode is turned off, we use # this list to verify all the resources become managed again. managedResources = self.managedRscList(node) if len(managedResources) == 0: self.logger.log("No managed resources on %s" % node) return self.skipped() # insert a fake resource we can fail during maintenance mode # so we can verify recovery does not take place until after maintenance # mode is disabled. failPat = failPat + self.insertMaintenanceDummy(node) # toggle maintenance mode ON, then fail dummy resource. failPat = failPat + self.toggleMaintenanceMode(node, "On") # verify all the resources are now unmanaged if self.verifyResources(node, managedResources, False): verify_unmanaged = True # Toggle maintenance mode OFF, verify dummy is recovered. failPat = failPat + self.toggleMaintenanceMode(node, "Off") # verify all the resources are now managed again if self.verifyResources(node, managedResources, True): verify_managed = True # Remove our maintenance dummy resource. failPat = failPat + self.removeMaintenanceDummy(node) self.CM.cluster_stable() if failPat != "": return self.failure("Unmatched patterns: %s" % (failPat)) elif verify_unmanaged is False: return self.failure("Failed to verify resources became unmanaged during maintenance mode") elif verify_managed is False: return self.failure("Failed to verify resources switched back to managed after disabling maintenance mode") return self.success() def errorstoignore(self): '''Return list of errors which should be ignored''' return [ """Updating failcount for %s""" % self.rid, """LogActions: Recover %s""" % self.rid, """Unknown operation: fail""", """(ERROR|error): sending stonithRA op to stonithd failed.""", self.templates["Pat:RscOpOK"] % (self.rid, ("%s_%d" % (self.action, self.interval))), """(ERROR|error): process_graph_event: Action %s_%s_%d .* initiated outside of a transition""" % (self.rid, self.action, self.interval), ] AllTestClasses.append(MaintenanceMode) class ResourceRecover(CTSTest): def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "ResourceRecover" self.start = StartTest(cm) self.startall = SimulStartLite(cm) self.max = 30 self.rid = None self.rid_alt = None #self.is_unsafe = 1 self.benchmark = 1 # these are the values used for the new LRM API call self.action = "asyncmon" self.interval = 0 def __call__(self, node): '''Perform the 'ResourceRecover' test. ''' self.incr("calls") ret = self.startall(None) if not ret: return self.failure("Setup failed") resourcelist = self.CM.active_resources(node) # if there are no resourcelist, return directly if len(resourcelist) == 0: self.logger.log("No active resources on %s" % node) return self.skipped() self.rid = self.Env.RandomGen.choice(resourcelist) self.rid_alt = self.rid rsc = None (rc, lines) = self.rsh(node, "crm_resource -c", None) for line in lines: if re.search("^Resource", line): tmp = AuditResource(self.CM, line) if tmp.id == self.rid: rsc = tmp # Handle anonymous clones that get renamed self.rid = rsc.clone_id break if not rsc: return self.failure("Could not find %s in the resource list" % self.rid) self.debug("Shooting %s aka. %s" % (rsc.clone_id, rsc.id)) pats = [] pats.append("Updating failcount for %s on .* after .* %s" % (self.rid, self.action)) if rsc.managed(): pats.append(self.templates["Pat:RscOpOK"] % (self.rid, "stop_0")) if rsc.unique(): pats.append(self.templates["Pat:RscOpOK"] % (self.rid, "start_0")) else: # Anonymous clones may get restarted with a different clone number pats.append(self.templates["Pat:RscOpOK"] % (".*", "start_0")) watch = self.create_watch(pats, 60) watch.setwatch() self.rsh(node, "crm_resource -V -F -r %s -H %s &>/dev/null" % (self.rid, node)) self.set_timer("recover") watch.lookforall() self.log_timer("recover") self.CM.cluster_stable() recovered = self.CM.ResourceLocation(self.rid) if watch.unmatched: return self.failure("Patterns not found: %s" % repr(watch.unmatched)) elif rsc.unique() and len(recovered) > 1: return self.failure("%s is now active on more than one node: %s"%(self.rid, repr(recovered))) elif len(recovered) > 0: self.debug("%s is running on: %s" % (self.rid, repr(recovered))) elif rsc.managed(): return self.failure("%s was not recovered and is inactive" % self.rid) return self.success() def errorstoignore(self): '''Return list of errors which should be ignored''' return [ """Updating failcount for %s""" % self.rid, """LogActions: Recover %s""" % self.rid, """LogActions: Recover %s""" % self.rid_alt, """Unknown operation: fail""", """(ERROR|error): sending stonithRA op to stonithd failed.""", self.templates["Pat:RscOpOK"] % (self.rid, ("%s_%d" % (self.action, self.interval))), """(ERROR|error): process_graph_event: Action %s_%s_%d .* initiated outside of a transition""" % (self.rid, self.action, self.interval), ] AllTestClasses.append(ResourceRecover) class ComponentFail(CTSTest): def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "ComponentFail" # TODO make this work correctly in docker. self.is_docker_unsafe = 1 self.startall = SimulStartLite(cm) self.complist = cm.Components() self.patterns = [] self.okerrpatterns = [] self.is_unsafe = 1 def __call__(self, node): '''Perform the 'ComponentFail' test. ''' self.incr("calls") self.patterns = [] self.okerrpatterns = [] # start all nodes ret = self.startall(None) if not ret: return self.failure("Setup failed") if not self.CM.cluster_stable(self.Env["StableTime"]): return self.failure("Setup failed - unstable") node_is_dc = self.CM.is_node_dc(node, None) # select a component to kill chosen = self.Env.RandomGen.choice(self.complist) while chosen.dc_only == 1 and node_is_dc == 0: chosen = self.Env.RandomGen.choice(self.complist) self.debug("...component %s (dc=%d,boot=%d)" % (chosen.name, node_is_dc,chosen.triggersreboot)) self.incr(chosen.name) if chosen.name != "aisexec" and chosen.name != "corosync": if self.Env["Name"] != "crm-lha" or chosen.name != "pengine": self.patterns.append(self.templates["Pat:ChildKilled"] %(node, chosen.name)) self.patterns.append(self.templates["Pat:ChildRespawn"] %(node, chosen.name)) self.patterns.extend(chosen.pats) if node_is_dc: self.patterns.extend(chosen.dc_pats) # In an ideal world, this next stuff should be in the "chosen" object as a member function if self.Env["Name"] == "crm-lha" and chosen.triggersreboot: # Make sure the node goes down and then comes back up if it should reboot... for other in self.Env["nodes"]: if other != node: self.patterns.append(self.templates["Pat:They_stopped"] %(other, self.CM.key_for_node(node))) self.patterns.append(self.templates["Pat:Slave_started"] % node) self.patterns.append(self.templates["Pat:Local_started"] % node) if chosen.dc_only: # Sometimes these will be in the log, and sometimes they won't... self.okerrpatterns.append("%s .*Process %s:.* exited" % (node, chosen.name)) self.okerrpatterns.append("%s .*I_ERROR.*crmdManagedChildDied" % node) self.okerrpatterns.append("%s .*The %s subsystem terminated unexpectedly" % (node, chosen.name)) self.okerrpatterns.append("(ERROR|error): Client .* exited with return code") else: # Sometimes this won't be in the log... self.okerrpatterns.append(self.templates["Pat:ChildKilled"] %(node, chosen.name)) self.okerrpatterns.append(self.templates["Pat:ChildRespawn"] %(node, chosen.name)) self.okerrpatterns.append(self.templates["Pat:ChildExit"]) if chosen.name == "stonith": # Ignore actions for STONITH resources (rc, lines) = self.rsh(node, "crm_resource -c", None) for line in lines: if re.search("^Resource", line): r = AuditResource(self.CM, line) if r.rclass == "stonith": self.okerrpatterns.append(self.templates["LogActions: Recover.*%s"] % r.id) # supply a copy so self.patterns doesnt end up empty tmpPats = [] tmpPats.extend(self.patterns) self.patterns.extend(chosen.badnews_ignore) # Look for STONITH ops, depending on Env["at-boot"] we might need to change the nodes status stonithPats = [] stonithPats.append(self.templates["Pat:Fencing_ok"] % node) stonith = self.create_watch(stonithPats, 0) stonith.setwatch() # set the watch for stable watch = self.create_watch( tmpPats, self.Env["DeadTime"] + self.Env["StableTime"] + self.Env["StartTime"]) watch.setwatch() # kill the component chosen.kill(node) self.debug("Waiting for the cluster to recover") self.CM.cluster_stable() self.debug("Waiting for any STONITHd node to come back up") self.CM.ns.WaitForAllNodesToComeUp(self.Env["nodes"], 600) self.debug("Waiting for the cluster to re-stabilize with all nodes") self.CM.cluster_stable(self.Env["StartTime"]) self.debug("Checking if %s was shot" % node) shot = stonith.look(60) if shot: self.debug("Found: " + repr(shot)) self.okerrpatterns.append(self.templates["Pat:Fencing_start"] % node) if self.Env["at-boot"] == 0: self.CM.ShouldBeStatus[node] = "down" # If fencing occurred, chances are many (if not all) the expected logs # will not be sent - or will be lost when the node reboots return self.success() # check for logs indicating a graceful recovery matched = watch.lookforall(allow_multiple_matches=1) if watch.unmatched: self.logger.log("Patterns not found: " + repr(watch.unmatched)) self.debug("Waiting for the cluster to re-stabilize with all nodes") is_stable = self.CM.cluster_stable(self.Env["StartTime"]) if not matched: return self.failure("Didn't find all expected %s patterns" % chosen.name) elif not is_stable: return self.failure("Cluster did not become stable after killing %s" % chosen.name) return self.success() def errorstoignore(self): '''Return list of errors which should be ignored''' # Note that okerrpatterns refers to the last time we ran this test # The good news is that this works fine for us... self.okerrpatterns.extend(self.patterns) return self.okerrpatterns AllTestClasses.append(ComponentFail) class SplitBrainTest(CTSTest): '''It is used to test split-brain. when the path between the two nodes break check the two nodes both take over the resource''' def __init__(self,cm): CTSTest.__init__(self,cm) self.name = "SplitBrain" self.start = StartTest(cm) self.startall = SimulStartLite(cm) self.is_experimental = 1 def isolate_partition(self, partition): other_nodes = [] other_nodes.extend(self.Env["nodes"]) for node in partition: try: other_nodes.remove(node) except ValueError: self.logger.log("Node "+node+" not in " + repr(self.Env["nodes"]) + " from " +repr(partition)) if len(other_nodes) == 0: return 1 self.debug("Creating partition: " + repr(partition)) self.debug("Everyone else: " + repr(other_nodes)) for node in partition: if not self.CM.isolate_node(node, other_nodes): self.logger.log("Could not isolate %s" % node) return 0 return 1 def heal_partition(self, partition): other_nodes = [] other_nodes.extend(self.Env["nodes"]) for node in partition: try: other_nodes.remove(node) except ValueError: self.logger.log("Node "+node+" not in " + repr(self.Env["nodes"])) if len(other_nodes) == 0: return 1 self.debug("Healing partition: " + repr(partition)) self.debug("Everyone else: " + repr(other_nodes)) for node in partition: self.CM.unisolate_node(node, other_nodes) def __call__(self, node): '''Perform split-brain test''' self.incr("calls") self.passed = 1 partitions = {} ret = self.startall(None) if not ret: return self.failure("Setup failed") while 1: # Retry until we get multiple partitions partitions = {} p_max = len(self.Env["nodes"]) for node in self.Env["nodes"]: p = self.Env.RandomGen.randint(1, p_max) if not partitions.has_key(p): partitions[p] = [] partitions[p].append(node) p_max = len(partitions.keys()) if p_max > 1: break # else, try again self.debug("Created %d partitions" % p_max) for key in partitions.keys(): self.debug("Partition["+str(key)+"]:\t"+repr(partitions[key])) # Disabling STONITH to reduce test complexity for now self.rsh(node, "crm_attribute -V -n stonith-enabled -v false") for key in partitions.keys(): self.isolate_partition(partitions[key]) count = 30 while count > 0: if len(self.CM.find_partitions()) != p_max: time.sleep(10) else: break else: self.failure("Expected partitions were not created") # Target number of partitions formed - wait for stability if not self.CM.cluster_stable(): self.failure("Partitioned cluster not stable") # Now audit the cluster state self.CM.partitions_expected = p_max if not self.audit(): self.failure("Audits failed") self.CM.partitions_expected = 1 # And heal them again for key in partitions.keys(): self.heal_partition(partitions[key]) # Wait for a single partition to form count = 30 while count > 0: if len(self.CM.find_partitions()) != 1: time.sleep(10) count -= 1 else: break else: self.failure("Cluster did not reform") # Wait for it to have the right number of members count = 30 while count > 0: members = [] partitions = self.CM.find_partitions() if len(partitions) > 0: members = partitions[0].split() if len(members) != len(self.Env["nodes"]): time.sleep(10) count -= 1 else: break else: self.failure("Cluster did not completely reform") # Wait up to 20 minutes - the delay is more preferable than # trying to continue with in a messed up state if not self.CM.cluster_stable(1200): self.failure("Reformed cluster not stable") answer = raw_input('Continue? [nY]') if answer and answer == "n": raise ValueError("Reformed cluster not stable") # Turn fencing back on if self.Env["DoFencing"]: self.rsh(node, "crm_attribute -V -D -n stonith-enabled") self.CM.cluster_stable() if self.passed: return self.success() return self.failure("See previous errors") def errorstoignore(self): '''Return list of errors which are 'normal' and should be ignored''' return [ "Another DC detected:", "(ERROR|error): attrd_cib_callback: .*Application of an update diff failed", "crmd_ha_msg_callback:.*not in our membership list", "CRIT:.*node.*returning after partition", ] def is_applicable(self): if not self.is_applicable_common(): return 0 return len(self.Env["nodes"]) > 2 AllTestClasses.append(SplitBrainTest) class Reattach(CTSTest): def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "Reattach" self.startall = SimulStartLite(cm) self.restart1 = RestartTest(cm) self.stopall = SimulStopLite(cm) self.is_unsafe = 0 # Handled by canrunnow() def setup(self, node): attempt = 0 if not self.startall(None): return None # Make sure we are really _really_ stable and that all # resources, including those that depend on transient node # attributes, are started while not self.CM.cluster_stable(double_check=True): if attempt < 5: attempt += 1 self.debug("Not stable yet, re-testing") else: self.logger.log("Cluster is not stable") return None return 1 def teardown(self, node): # Make sure 'node' is up start = StartTest(self.CM) start(node) is_managed = self.rsh(node, "crm_attribute -Q -G -t crm_config -n is-managed-default -d true", 1) is_managed = is_managed[:-1] # Strip off the newline if is_managed != "true": self.logger.log("Attempting to re-enable resource management on %s (%s)" % (node, is_managed)) managed = self.create_watch(["is-managed-default"], 60) managed.setwatch() self.rsh(node, "crm_attribute -V -D -n is-managed-default") if not managed.lookforall(): self.logger.log("Patterns not found: " + repr(managed.unmatched)) self.logger.log("Could not re-enable resource management") return 0 return 1 def canrunnow(self, node): '''Return TRUE if we can meaningfully run right now''' if self.find_ocfs2_resources(node): self.logger.log("Detach/Reattach scenarios are not possible with OCFS2 services present") return 0 return 1 def __call__(self, node): self.incr("calls") pats = [] managed = self.create_watch(["is-managed-default"], 60) managed.setwatch() self.debug("Disable resource management") self.rsh(node, "crm_attribute -V -n is-managed-default -v false") if not managed.lookforall(): self.logger.log("Patterns not found: " + repr(managed.unmatched)) return self.failure("Resource management not disabled") pats = [] pats.append(self.templates["Pat:RscOpOK"] % (".*", "start")) pats.append(self.templates["Pat:RscOpOK"] % (".*", "stop")) pats.append(self.templates["Pat:RscOpOK"] % (".*", "promote")) pats.append(self.templates["Pat:RscOpOK"] % (".*", "demote")) pats.append(self.templates["Pat:RscOpOK"] % (".*", "migrate")) watch = self.create_watch(pats, 60, "ShutdownActivity") watch.setwatch() self.debug("Shutting down the cluster") ret = self.stopall(None) if not ret: self.debug("Re-enable resource management") self.rsh(node, "crm_attribute -V -D -n is-managed-default") return self.failure("Couldn't shut down the cluster") self.debug("Bringing the cluster back up") ret = self.startall(None) time.sleep(5) # allow ping to update the CIB if not ret: self.debug("Re-enable resource management") self.rsh(node, "crm_attribute -V -D -n is-managed-default") return self.failure("Couldn't restart the cluster") if self.local_badnews("ResourceActivity:", watch): self.debug("Re-enable resource management") self.rsh(node, "crm_attribute -V -D -n is-managed-default") return self.failure("Resources stopped or started during cluster restart") watch = self.create_watch(pats, 60, "StartupActivity") watch.setwatch() managed = self.create_watch(["is-managed-default"], 60) managed.setwatch() self.debug("Re-enable resource management") self.rsh(node, "crm_attribute -V -D -n is-managed-default") if not managed.lookforall(): self.logger.log("Patterns not found: " + repr(managed.unmatched)) return self.failure("Resource management not enabled") self.CM.cluster_stable() # Ignore actions for STONITH resources ignore = [] (rc, lines) = self.rsh(node, "crm_resource -c", None) for line in lines: if re.search("^Resource", line): r = AuditResource(self.CM, line) if r.rclass == "stonith": self.debug("Ignoring start actions for %s" % r.id) ignore.append(self.templates["Pat:RscOpOK"] % (r.id, "start_0")) if self.local_badnews("ResourceActivity:", watch, ignore): return self.failure("Resources stopped or started after resource management was re-enabled") return ret def errorstoignore(self): '''Return list of errors which should be ignored''' return [ "resources were active at shutdown", "pingd: .*(ERROR|error): send_ipc_message:", "pingd: .*(ERROR|error): send_update:", "lrmd: .*(ERROR|error): notify_client:", ] def is_applicable(self): if self.Env["Name"] == "crm-lha": return None return 1 AllTestClasses.append(Reattach) class SpecialTest1(CTSTest): '''Set up a custom test to cause quorum failure issues for Andrew''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "SpecialTest1" self.startall = SimulStartLite(cm) self.restart1 = RestartTest(cm) self.stopall = SimulStopLite(cm) def __call__(self, node): '''Perform the 'SpecialTest1' test for Andrew. ''' self.incr("calls") # Shut down all the nodes... ret = self.stopall(None) if not ret: return self.failure("Could not stop all nodes") # Test config recovery when the other nodes come up self.rsh(node, "rm -f "+CTSvars.CRM_CONFIG_DIR+"/cib*") # Start the selected node ret = self.restart1(node) if not ret: return self.failure("Could not start "+node) # Start all remaining nodes ret = self.startall(None) if not ret: return self.failure("Could not start the remaining nodes") return self.success() def errorstoignore(self): '''Return list of errors which should be ignored''' # Errors that occur as a result of the CIB being wiped return [ """warning: retrieveCib: Cluster configuration not found:""", """error: cib_perform_op: v1 patchset error, patch failed to apply: Application of an update diff failed""", """error: unpack_resources: Resource start-up disabled since no STONITH resources have been defined""", """error: unpack_resources: Either configure some or disable STONITH with the stonith-enabled option""", """error: unpack_resources: NOTE: Clusters with shared data need STONITH to ensure data integrity""", ] AllTestClasses.append(SpecialTest1) class HAETest(CTSTest): '''Set up a custom test to cause quorum failure issues for Andrew''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "HAETest" self.stopall = SimulStopLite(cm) self.startall = SimulStartLite(cm) self.is_loop = 1 def setup(self, node): # Start all remaining nodes ret = self.startall(None) if not ret: return self.failure("Couldn't start all nodes") return self.success() def teardown(self, node): # Stop everything ret = self.stopall(None) if not ret: return self.failure("Couldn't stop all nodes") return self.success() def wait_on_state(self, node, resource, expected_clones, attempts=240): while attempts > 0: active = 0 (rc, lines) = self.rsh(node, "crm_resource -r %s -W -Q" % resource, stdout=None) # Hack until crm_resource does the right thing if rc == 0 and lines: active = len(lines) if len(lines) == expected_clones: return 1 elif rc == 1: self.debug("Resource %s is still inactive" % resource) elif rc == 234: self.logger.log("Unknown resource %s" % resource) return 0 elif rc == 246: self.logger.log("Cluster is inactive") return 0 elif rc != 0: self.logger.log("Call to crm_resource failed, rc=%d" % rc) return 0 else: self.debug("Resource %s is active on %d times instead of %d" % (resource, active, expected_clones)) attempts -= 1 time.sleep(1) return 0 def find_dlm(self, node): self.r_dlm = None (rc, lines) = self.rsh(node, "crm_resource -c", None) for line in lines: if re.search("^Resource", line): r = AuditResource(self.CM, line) if r.rtype == "controld" and r.parent != "NA": self.debug("Found dlm: %s" % self.r_dlm) self.r_dlm = r.parent return 1 return 0 def find_hae_resources(self, node): self.r_dlm = None self.r_o2cb = None self.r_ocfs2 = [] if self.find_dlm(node): self.find_ocfs2_resources(node) def is_applicable(self): if not self.is_applicable_common(): return 0 if self.Env["Schema"] == "hae": return 1 return None class HAERoleTest(HAETest): def __init__(self, cm): '''Lars' mount/unmount test for the HA extension. ''' HAETest.__init__(self,cm) self.name = "HAERoleTest" def change_state(self, node, resource, target): rc = self.rsh(node, "crm_resource -V -r %s -p target-role -v %s --meta" % (resource, target)) return rc def __call__(self, node): self.incr("calls") lpc = 0 failed = 0 delay = 2 done = time.time() + self.Env["loop-minutes"]*60 self.find_hae_resources(node) clone_max = len(self.Env["nodes"]) while time.time() <= done and not failed: lpc = lpc + 1 self.change_state(node, self.r_dlm, "Stopped") if not self.wait_on_state(node, self.r_dlm, 0): self.failure("%s did not go down correctly" % self.r_dlm) failed = lpc self.change_state(node, self.r_dlm, "Started") if not self.wait_on_state(node, self.r_dlm, clone_max): self.failure("%s did not come up correctly" % self.r_dlm) failed = lpc if not self.wait_on_state(node, self.r_o2cb, clone_max): self.failure("%s did not come up correctly" % self.r_o2cb) failed = lpc for fs in self.r_ocfs2: if not self.wait_on_state(node, fs, clone_max): self.failure("%s did not come up correctly" % fs) failed = lpc if failed: return self.failure("iteration %d failed" % failed) return self.success() AllTestClasses.append(HAERoleTest) class HAEStandbyTest(HAETest): '''Set up a custom test to cause quorum failure issues for Andrew''' def __init__(self, cm): HAETest.__init__(self,cm) self.name = "HAEStandbyTest" def change_state(self, node, resource, target): rc = self.rsh(node, "crm_standby -V -l reboot -v %s" % (target)) return rc def __call__(self, node): self.incr("calls") lpc = 0 failed = 0 done = time.time() + self.Env["loop-minutes"]*60 self.find_hae_resources(node) clone_max = len(self.Env["nodes"]) while time.time() <= done and not failed: lpc = lpc + 1 self.change_state(node, self.r_dlm, "true") if not self.wait_on_state(node, self.r_dlm, clone_max-1): self.failure("%s did not go down correctly" % self.r_dlm) failed = lpc self.change_state(node, self.r_dlm, "false") if not self.wait_on_state(node, self.r_dlm, clone_max): self.failure("%s did not come up correctly" % self.r_dlm) failed = lpc if not self.wait_on_state(node, self.r_o2cb, clone_max): self.failure("%s did not come up correctly" % self.r_o2cb) failed = lpc for fs in self.r_ocfs2: if not self.wait_on_state(node, fs, clone_max): self.failure("%s did not come up correctly" % fs) failed = lpc if failed: return self.failure("iteration %d failed" % failed) return self.success() AllTestClasses.append(HAEStandbyTest) class NearQuorumPointTest(CTSTest): ''' This test brings larger clusters near the quorum point (50%). In addition, it will test doing starts and stops at the same time. Here is how I think it should work: - loop over the nodes and decide randomly which will be up and which will be down Use a 50% probability for each of up/down. - figure out what to do to get into that state from the current state - in parallel, bring up those going up and bring those going down. ''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "NearQuorumPoint" def __call__(self, dummy): '''Perform the 'NearQuorumPoint' test. ''' self.incr("calls") startset = [] stopset = [] stonith = self.CM.prepare_fencing_watcher("NearQuorumPoint") #decide what to do with each node for node in self.Env["nodes"]: action = self.Env.RandomGen.choice(["start","stop"]) #action = self.Env.RandomGen.choice(["start","stop","no change"]) if action == "start" : startset.append(node) elif action == "stop" : stopset.append(node) self.debug("start nodes:" + repr(startset)) self.debug("stop nodes:" + repr(stopset)) #add search patterns watchpats = [ ] for node in stopset: if self.CM.ShouldBeStatus[node] == "up": watchpats.append(self.templates["Pat:We_stopped"] % node) for node in startset: if self.CM.ShouldBeStatus[node] == "down": #watchpats.append(self.templates["Pat:Slave_started"] % node) watchpats.append(self.templates["Pat:Local_started"] % node) else: for stopping in stopset: if self.CM.ShouldBeStatus[stopping] == "up": watchpats.append(self.templates["Pat:They_stopped"] % (node, self.CM.key_for_node(stopping))) if len(watchpats) == 0: return self.skipped() if len(startset) != 0: watchpats.append(self.templates["Pat:DC_IDLE"]) watch = self.create_watch(watchpats, self.Env["DeadTime"]+10) watch.setwatch() #begin actions for node in stopset: if self.CM.ShouldBeStatus[node] == "up": self.CM.StopaCMnoBlock(node) for node in startset: if self.CM.ShouldBeStatus[node] == "down": self.CM.StartaCMnoBlock(node) #get the result if watch.lookforall(): self.CM.cluster_stable() self.CM.fencing_cleanup("NearQuorumPoint", stonith) return self.success() self.logger.log("Warn: Patterns not found: " + repr(watch.unmatched)) #get the "bad" nodes upnodes = [] for node in stopset: if self.CM.StataCM(node) == 1: upnodes.append(node) downnodes = [] for node in startset: if self.CM.StataCM(node) == 0: downnodes.append(node) self.CM.fencing_cleanup("NearQuorumPoint", stonith) if upnodes == [] and downnodes == []: self.CM.cluster_stable() # Make sure they're completely down with no residule for node in stopset: self.rsh(node, self.templates["StopCmd"]) return self.success() if len(upnodes) > 0: self.logger.log("Warn: Unstoppable nodes: " + repr(upnodes)) if len(downnodes) > 0: self.logger.log("Warn: Unstartable nodes: " + repr(downnodes)) return self.failure() def is_applicable(self): if self.Env["Name"] == "crm-cman": return None return 1 AllTestClasses.append(NearQuorumPointTest) class RollingUpgradeTest(CTSTest): '''Perform a rolling upgrade of the cluster''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "RollingUpgrade" self.start = StartTest(cm) self.stop = StopTest(cm) self.stopall = SimulStopLite(cm) self.startall = SimulStartLite(cm) def setup(self, node): # Start all remaining nodes ret = self.stopall(None) if not ret: return self.failure("Couldn't stop all nodes") for node in self.Env["nodes"]: if not self.downgrade(node, None): return self.failure("Couldn't downgrade %s" % node) ret = self.startall(None) if not ret: return self.failure("Couldn't start all nodes") return self.success() def teardown(self, node): # Stop everything ret = self.stopall(None) if not ret: return self.failure("Couldn't stop all nodes") for node in self.Env["nodes"]: if not self.upgrade(node, None): return self.failure("Couldn't upgrade %s" % node) return self.success() def install(self, node, version, start=1, flags="--force"): target_dir = "/tmp/rpm-%s" % version src_dir = "%s/%s" % (self.Env["rpm-dir"], version) self.logger.log("Installing %s on %s with %s" % (version, node, flags)) if not self.stop(node): return self.failure("stop failure: "+node) rc = self.rsh(node, "mkdir -p %s" % target_dir) rc = self.rsh(node, "rm -f %s/*.rpm" % target_dir) (rc, lines) = self.rsh(node, "ls -1 %s/*.rpm" % src_dir, None) for line in lines: line = line[:-1] rc = self.rsh.cp("%s" % (line), "%s:%s/" % (node, target_dir)) rc = self.rsh(node, "rpm -Uvh %s %s/*.rpm" % (flags, target_dir)) if start and not self.start(node): return self.failure("start failure: "+node) return self.success() def upgrade(self, node, start=1): return self.install(node, self.Env["current-version"], start) def downgrade(self, node, start=1): return self.install(node, self.Env["previous-version"], start, "--force --nodeps") def __call__(self, node): '''Perform the 'Rolling Upgrade' test. ''' self.incr("calls") for node in self.Env["nodes"]: if self.upgrade(node): return self.failure("Couldn't upgrade %s" % node) self.CM.cluster_stable() return self.success() def is_applicable(self): if not self.is_applicable_common(): return None if not self.Env.has_key("rpm-dir"): return None if not self.Env.has_key("current-version"): return None if not self.Env.has_key("previous-version"): return None return 1 # Register RestartTest as a good test to run AllTestClasses.append(RollingUpgradeTest) class BSC_AddResource(CTSTest): '''Add a resource to the cluster''' def __init__(self, cm): CTSTest.__init__(self, cm) self.name = "AddResource" self.resource_offset = 0 self.cib_cmd = """cibadmin -C -o %s -X '%s' """ def __call__(self, node): self.incr("calls") self.resource_offset = self.resource_offset + 1 r_id = "bsc-rsc-%s-%d" % (node, self.resource_offset) start_pat = "crmd.*%s_start_0.*confirmed.*ok" patterns = [] patterns.append(start_pat % r_id) watch = self.create_watch(patterns, self.Env["DeadTime"]) watch.setwatch() ip = self.NextIP() if not self.make_ip_resource(node, r_id, "ocf", "IPaddr", ip): return self.failure("Make resource %s failed" % r_id) failed = 0 watch_result = watch.lookforall() if watch.unmatched: for regex in watch.unmatched: self.logger.log ("Warn: Pattern not found: %s" % (regex)) failed = 1 if failed: return self.failure("Resource pattern(s) not found") if not self.CM.cluster_stable(self.Env["DeadTime"]): return self.failure("Unstable cluster") return self.success() def NextIP(self): ip = self.Env["IPBase"] if ":" in ip: fields = ip.rpartition(":") fields[2] = str(hex(int(fields[2], 16)+1)) print str(hex(int(f[2], 16)+1)) else: fields = ip.rpartition('.') fields[2] = str(int(fields[2])+1) ip = fields[0] + fields[1] + fields[3]; self.Env["IPBase"] = ip return ip.strip() def make_ip_resource(self, node, id, rclass, type, ip): self.logger.log("Creating %s::%s:%s (%s) on %s" % (rclass,type,id,ip,node)) rsc_xml=""" """ % (id, rclass, type, id, id, ip) node_constraint = """ """ % (id, id, id, id, node) rc = 0 (rc, lines) = self.rsh(node, self.cib_cmd % ("constraints", node_constraint), None) if rc != 0: self.logger.log("Constraint creation failed: %d" % rc) return None (rc, lines) = self.rsh(node, self.cib_cmd % ("resources", rsc_xml), None) if rc != 0: self.logger.log("Resource creation failed: %d" % rc) return None return 1 def is_applicable(self): if self.Env["DoBSC"]: return 1 return None AllTestClasses.append(BSC_AddResource) class SimulStopLite(CTSTest): '''Stop any active nodes ~ simultaneously''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "SimulStopLite" def __call__(self, dummy): '''Perform the 'SimulStopLite' setup work. ''' self.incr("calls") self.debug("Setup: " + self.name) # We ignore the "node" parameter... watchpats = [ ] for node in self.Env["nodes"]: if self.CM.ShouldBeStatus[node] == "up": self.incr("WasStarted") watchpats.append(self.templates["Pat:We_stopped"] % node) #if self.Env["use_logd"]: # watchpats.append(self.templates["Pat:Logd_stopped"] % node) if len(watchpats) == 0: self.CM.clear_all_caches() return self.success() # Stop all the nodes - at about the same time... watch = self.create_watch(watchpats, self.Env["DeadTime"]+10) watch.setwatch() self.set_timer() for node in self.Env["nodes"]: if self.CM.ShouldBeStatus[node] == "up": self.CM.StopaCMnoBlock(node) if watch.lookforall(): self.CM.clear_all_caches() # Make sure they're completely down with no residule for node in self.Env["nodes"]: self.rsh(node, self.templates["StopCmd"]) return self.success() did_fail = 0 up_nodes = [] for node in self.Env["nodes"]: if self.CM.StataCM(node) == 1: did_fail = 1 up_nodes.append(node) if did_fail: return self.failure("Active nodes exist: " + repr(up_nodes)) self.logger.log("Warn: All nodes stopped but CTS didnt detect: " + repr(watch.unmatched)) self.CM.clear_all_caches() return self.failure("Missing log message: "+repr(watch.unmatched)) def is_applicable(self): '''SimulStopLite is a setup test and never applicable''' return 0 class SimulStartLite(CTSTest): '''Start any stopped nodes ~ simultaneously''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "SimulStartLite" def __call__(self, dummy): '''Perform the 'SimulStartList' setup work. ''' self.incr("calls") self.debug("Setup: " + self.name) # We ignore the "node" parameter... node_list = [] for node in self.Env["nodes"]: if self.CM.ShouldBeStatus[node] == "down": self.incr("WasStopped") node_list.append(node) self.set_timer() while len(node_list) > 0: # Repeat until all nodes come up watchpats = [ ] uppat = self.templates["Pat:Slave_started"] if self.CM.upcount() == 0: uppat = self.templates["Pat:Local_started"] watchpats.append(self.templates["Pat:DC_IDLE"]) for node in node_list: watchpats.append(uppat % node) watchpats.append(self.templates["Pat:InfraUp"] % node) watchpats.append(self.templates["Pat:PacemakerUp"] % node) # Start all the nodes - at about the same time... watch = self.create_watch(watchpats, self.Env["DeadTime"]+10) watch.setwatch() stonith = self.CM.prepare_fencing_watcher(self.name) for node in node_list: self.CM.StartaCMnoBlock(node) watch.lookforall() node_list = self.CM.fencing_cleanup(self.name, stonith) # Remove node_list messages from watch.unmatched for node in node_list: self.logger.debug("Dealing with stonith operations for %s" % repr(node_list)) if watch.unmatched: try: watch.unmatched.remove(uppat % node) except: self.debug("Already matched: %s" % (uppat % node)) try: watch.unmatched.remove(self.templates["Pat:InfraUp"] % node) except: self.debug("Already matched: %s" % (self.templates["Pat:InfraUp"] % node)) try: watch.unmatched.remove(self.templates["Pat:PacemakerUp"] % node) except: self.debug("Already matched: %s" % (self.templates["Pat:PacemakerUp"] % node)) if watch.unmatched: for regex in watch.unmatched: self.logger.log ("Warn: Startup pattern not found: %s" %(regex)) if not self.CM.cluster_stable(): return self.failure("Cluster did not stabilize") did_fail = 0 unstable = [] for node in self.Env["nodes"]: if self.CM.StataCM(node) == 0: did_fail = 1 unstable.append(node) if did_fail: return self.failure("Unstarted nodes exist: " + repr(unstable)) unstable = [] for node in self.Env["nodes"]: if not self.CM.node_stable(node): did_fail = 1 unstable.append(node) if did_fail: return self.failure("Unstable cluster nodes exist: " + repr(unstable)) return self.success() def is_applicable(self): '''SimulStartLite is a setup test and never applicable''' return 0 def TestList(cm, audits): result = [] for testclass in AllTestClasses: bound_test = testclass(cm) if bound_test.is_applicable(): bound_test.Audits = audits result.append(bound_test) return result class RemoteLXC(CTSTest): def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "RemoteLXC" self.start = StartTest(cm) self.startall = SimulStartLite(cm) self.num_containers = 2 self.is_container = 1 self.is_docker_unsafe = 1 self.failed = 0 self.fail_string = "" def start_lxc_simple(self, node): # restore any artifacts laying around from a previous test. self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -R &>/dev/null") # generate the containers, put them in the config, add some resources to them pats = [ ] watch = self.create_watch(pats, 120) watch.setwatch() pats.append(self.templates["Pat:RscOpOK"] % ("lxc1", "start_0")) pats.append(self.templates["Pat:RscOpOK"] % ("lxc2", "start_0")) pats.append(self.templates["Pat:RscOpOK"] % ("lxc-ms", "start_0")) pats.append(self.templates["Pat:RscOpOK"] % ("lxc-ms", "promote_0")) self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -g -a -m -s -c %d &>/dev/null" % self.num_containers) self.set_timer("remoteSimpleInit") watch.lookforall() self.log_timer("remoteSimpleInit") if watch.unmatched: self.fail_string = "Unmatched patterns: %s" % (repr(watch.unmatched)) self.failed = 1 def cleanup_lxc_simple(self, node): pats = [ ] # if the test failed, attempt to clean up the cib and libvirt environment # as best as possible if self.failed == 1: # restore libvirt and cib self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -R &>/dev/null") self.rsh(node, "crm_resource -C -r container1 &>/dev/null") self.rsh(node, "crm_resource -C -r container2 &>/dev/null") self.rsh(node, "crm_resource -C -r lxc1 &>/dev/null") self.rsh(node, "crm_resource -C -r lxc2 &>/dev/null") self.rsh(node, "crm_resource -C -r lxc-ms &>/dev/null") time.sleep(20) return watch = self.create_watch(pats, 120) watch.setwatch() pats.append(self.templates["Pat:RscOpOK"] % ("container1", "stop_0")) pats.append(self.templates["Pat:RscOpOK"] % ("container2", "stop_0")) self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -p &>/dev/null") self.set_timer("remoteSimpleCleanup") watch.lookforall() self.log_timer("remoteSimpleCleanup") if watch.unmatched: self.fail_string = "Unmatched patterns: %s" % (repr(watch.unmatched)) self.failed = 1 # cleanup libvirt self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -R &>/dev/null") def __call__(self, node): '''Perform the 'RemoteLXC' test. ''' self.incr("calls") ret = self.startall(None) if not ret: return self.failure("Setup failed, start all nodes failed.") rc = self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -v &>/dev/null") if rc == 1: self.log("Environment test for lxc support failed.") return self.skipped() self.start_lxc_simple(node) self.cleanup_lxc_simple(node) self.debug("Waiting for the cluster to recover") self.CM.cluster_stable() if self.failed == 1: return self.failure(self.fail_string) return self.success() def errorstoignore(self): '''Return list of errors which should be ignored''' return [ """Updating failcount for ping""", """LogActions: Recover ping""", """LogActions: Recover lxc-ms""", """LogActions: Recover container""", # The orphaned lxc-ms resource causes an expected transition error # that is a result of the pengine not having knowledge that the # ms resource used to be a clone. As a result it looks like that # resource is running in multiple locations when it shouldn't... But in # this instance we know why this error is occurring and that it is expected. """Calculated Transition .* /var/lib/pacemaker/pengine/pe-error""", """Resource lxc-ms .* is active on 2 nodes attempting recovery""", """Unknown operation: fail""", """notice: operation_finished: ping-""", """notice: operation_finished: container""", """notice: operation_finished: .*_monitor_0:.*:stderr""", """(ERROR|error): sending stonithRA op to stonithd failed.""", ] AllTestClasses.append(RemoteLXC) ################################################################### -class RemoteBaremetal(CTSTest): +class RemoteDriver(CTSTest): ################################################################### def __init__(self, cm): CTSTest.__init__(self,cm) - self.name = "RemoteBaremetal" + self.name = "RemoteDriver" self.is_docker_unsafe = 1 self.start = StartTest(cm) self.startall = SimulStartLite(cm) self.stop = StopTest(cm) self.pcmk_started = 0 self.failed = 0 self.fail_string = "" self.remote_node_added = 0 - self.remote_node = "remote1" self.remote_rsc_added = 0 - self.remote_rsc = "remote1-rsc" + self.remote_rsc = "remote-rsc" self.cib_cmd = """cibadmin -C -o %s -X '%s' """ def del_rsc(self, node, rsc): for othernode in self.Env["nodes"]: if othernode == node: # we don't want to try and use the cib that we just shutdown. # find a cluster node that is not our soon to be remote-node. continue rc = self.rsh(othernode, "crm_resource -D -r %s -t primitive" % (rsc)) if rc != 0: self.fail_string = ("Removal of resource '%s' failed" % (rsc)) self.failed = 1 return def add_rsc(self, node, rsc_xml): for othernode in self.CM.Env["nodes"]: if othernode == node: # we don't want to try and use the cib that we just shutdown. # find a cluster node that is not our soon to be remote-node. continue rc = self.rsh(othernode, self.cib_cmd % ("resources", rsc_xml)) if rc != 0: self.fail_string = "resource creation failed" self.failed = 1 return def add_primitive_rsc(self, node): rsc_xml = """ - + - + + """ % (self.remote_rsc) self.add_rsc(node, rsc_xml) if self.failed == 0: self.remote_rsc_added = 1 def add_connection_rsc(self, node): rsc_xml = """ - - - + + + - - + + - """ % (self.remote_node, node) self.add_rsc(node, rsc_xml) if self.failed == 0: self.remote_node_added = 1 - def step1_start_metal(self, node): + def stop_pcmk_remote(self, node): + # disable pcmk remote + for i in range(10): + rc = self.rsh(node, "service pacemaker_remote stop") + if rc != 0: + time.sleep(6) + else: + break + + def start_pcmk_remote(self, node): + for i in range(10): + rc = self.rsh(node, "service pacemaker_remote start") + if rc != 0: + time.sleep(6) + else: + self.pcmk_started = 1 + break + + def start_metal(self, node): pcmk_started = 0 # make sure the resource doesn't already exist for some reason self.rsh(node, "crm_resource -D -r %s -t primitive" % (self.remote_rsc)) self.rsh(node, "crm_resource -D -r %s -t primitive" % (self.remote_node)) if not self.stop(node): self.failed = 1 self.fail_string = "Failed to shutdown cluster node %s" % (node) return - for i in range(10): - rc = self.rsh(node, "service pacemaker_remote start") - if rc != 0: - time.sleep(6) - else: - self.pcmk_started = 1 - break + self.start_pcmk_remote(node) if self.pcmk_started == 0: self.failed = 1 self.fail_string = "Failed to start pacemaker_remote on node %s" % (node) return # convert node to baremetal node now that it has shutdow the cluster stack pats = [ ] watch = self.create_watch(pats, 120) watch.setwatch() pats.append(self.templates["Pat:RscOpOK"] % (self.remote_node, "start")) + pats.append(self.templates["Pat:DC_IDLE"]) self.add_connection_rsc(node) self.set_timer("remoteMetalInit") watch.lookforall() self.log_timer("remoteMetalInit") if watch.unmatched: self.fail_string = "Unmatched patterns: %s" % (repr(watch.unmatched)) self.failed = 1 - def step2_add_rsc(self, node): + def migrate_connection(self, node): + if self.failed == 1: + return + + pats = [ ] + pats.append(self.templates["Pat:RscOpOK"] % (self.remote_node, "migrate_to")) + pats.append(self.templates["Pat:RscOpOK"] % (self.remote_node, "migrate_from")) + pats.append(self.templates["Pat:DC_IDLE"]) + watch = self.create_watch(pats, 120) + watch.setwatch() + + (rc, lines) = self.rsh(node, "crm_resource -M -r %s" % (self.remote_node), None) + if rc != 0: + self.fail_string = "failed to move remote node connection resource" + self.logger.log(self.fail_string) + self.failed = 1 + return + + self.set_timer("remoteMetalMigrate") + watch.lookforall() + self.log_timer("remoteMetalMigrate") + + if watch.unmatched: + self.fail_string = "Unmatched patterns: %s" % (repr(watch.unmatched)) + self.logger.log(self.fail_string) + self.failed = 1 + return + + def fail_rsc(self, node): + if self.failed == 1: + return + + watchpats = [ ] + watchpats.append(self.templates["Pat:RscRemoteOpOK"] % (self.remote_rsc, "stop", self.remote_node)) + watchpats.append(self.templates["Pat:RscRemoteOpOK"] % (self.remote_rsc, "start", self.remote_node)) + watchpats.append(self.templates["Pat:DC_IDLE"]) + + watch = self.create_watch(watchpats, 120) + watch.setwatch() + + self.debug("causing dummy rsc to fail.") + + rc = self.rsh(node, "rm -f /var/run/resource-agents/Dummy*") + + self.set_timer("remoteRscFail") + watch.lookforall() + self.log_timer("remoteRscFail") + if watch.unmatched: + self.fail_string = "Unmatched patterns during rsc fail: %s" % (repr(watch.unmatched)) + self.logger.log(self.fail_string) + self.failed = 1 + + def fail_connection(self, node): + if self.failed == 1: + return + + watchpats = [ ] + watchpats.append(self.templates["Pat:FenceOpOK"] % self.remote_node) + watchpats.append(self.templates["Pat:NodeFenced"] % self.remote_node) + + watch = self.create_watch(watchpats, 120) + watch.setwatch() + + # force stop the pcmk remote daemon. this will result in fencing + self.debug("Force stopped active remote node") + self.stop_pcmk_remote(node) + + self.debug("Waiting for remote node to be fenced.") + self.set_timer("remoteMetalFence") + watch.lookforall() + self.log_timer("remoteMetalFence") + if watch.unmatched: + self.fail_string = "Unmatched patterns: %s" % (repr(watch.unmatched)) + self.logger.log(self.fail_string) + self.failed = 1 + return + + self.debug("Waiting for the remote node to come back up") + self.CM.ns.WaitForNodeToComeUp(node, 120); + + pats = [ ] + watch = self.create_watch(pats, 120) + watch.setwatch() + pats.append(self.templates["Pat:RscOpOK"] % (self.remote_node, "start")) + if self.remote_rsc_added == 1: + pats.append(self.templates["Pat:RscOpOK"] % (self.remote_rsc, "monitor")) + + # start the remote node again watch it integrate back into cluster. + self.start_pcmk_remote(node) + if self.pcmk_started == 0: + self.failed = 1 + self.fail_string = "Failed to start pacemaker_remote on node %s" % (node) + self.logger.log(self.fail_string) + return + + self.debug("Waiting for remote node to rejoin cluster after being fenced.") + self.set_timer("remoteMetalRestart") + watch.lookforall() + self.log_timer("remoteMetalRestart") + if watch.unmatched: + self.fail_string = "Unmatched patterns: %s" % (repr(watch.unmatched)) + self.failed = 1 + self.logger.log(self.fail_string) + return + + def add_dummy_rsc(self, node): if self.failed == 1: return # verify we can put a resource on the remote node pats = [ ] watch = self.create_watch(pats, 120) watch.setwatch() - pats.append("process_lrm_event:.*Operation %s_start_0.*node=%s, .*confirmed.*true" % (self.remote_rsc, self.remote_node)) + pats.append(self.templates["Pat:RscRemoteOpOK"] % (self.remote_rsc, "start", self.remote_node)) + pats.append(self.templates["Pat:DC_IDLE"]) # Add a resource that must live on remote-node self.add_primitive_rsc(node) - # this crm_resource command actually occurs on the remote node - # which verifies that the ipc proxy works - time.sleep(1) - (rc, lines) = self.rsh(node, "crm_resource -W -r remote1-rsc --quiet", None) + # force that rsc to prefer the remote node. + (rc, line) = self.CM.rsh(node, "crm_resource -M -r %s -N %s -f" % (self.remote_rsc, self.remote_node), None) if rc != 0: - self.fail_string = "Failed to get location of resource remote1-rsc" + self.fail_string = "Failed to place remote resource on remote node." self.failed = 1 return - find = 0 - for line in lines: - if self.remote_node in line.split(): - find = 1 - break - - if find == 0: - rc = self.rsh(node, "crm_resource -M -r remote1-rsc -N %s" % (self.remote_node)) - if rc != 0: - self.fail_string = "Failed to place primitive on remote-node" - self.failed = 1 - return - self.set_timer("remoteMetalRsc") watch.lookforall() self.log_timer("remoteMetalRsc") if watch.unmatched: self.fail_string = "Unmatched patterns: %s" % (repr(watch.unmatched)) self.failed = 1 - def step3_test_attributes(self, node): + def test_attributes(self, node): if self.failed == 1: return # This verifies permanent attributes can be set on a remote-node. It also # verifies the remote-node can edit it's own cib node section remotely. (rc, line) = self.CM.rsh(node, "crm_attribute -l forever -n testattr -v testval -N %s" % (self.remote_node), None) if rc != 0: self.fail_string = "Failed to set remote-node attribute. rc:%s output:%s" % (rc, line) self.failed = 1 return (rc, line) = self.CM.rsh(node, "crm_attribute -l forever -n testattr -Q -N %s" % (self.remote_node), None) if rc != 0: self.fail_string = "Failed to get remote-node attribute" self.failed = 1 return (rc, line) = self.CM.rsh(node, "crm_attribute -l forever -n testattr -D -N %s" % (self.remote_node), None) if rc != 0: self.fail_string = "Failed to delete remote-node attribute" self.failed = 1 return def cleanup_metal(self, node): if self.pcmk_started == 0: return pats = [ ] watch = self.create_watch(pats, 120) watch.setwatch() if self.remote_rsc_added == 1: pats.append(self.templates["Pat:RscOpOK"] % (self.remote_rsc, "stop")) if self.remote_node_added == 1: pats.append(self.templates["Pat:RscOpOK"] % (self.remote_node, "stop")) self.set_timer("remoteMetalCleanup") if self.remote_rsc_added == 1: - self.rsh(node, "crm_resource -U -r remote1-rsc -N %s" % (self.remote_node)) + self.rsh(node, "crm_resource -U -r %s -N %s" % (self.remote_rsc, self.remote_node)) self.del_rsc(node, self.remote_rsc) if self.remote_node_added == 1: + self.rsh(node, "crm_resource -U -r %s" % (self.remote_node)) self.del_rsc(node, self.remote_node) watch.lookforall() self.log_timer("remoteMetalCleanup") if watch.unmatched: self.fail_string = "Unmatched patterns: %s" % (repr(watch.unmatched)) self.failed = 1 - # disable pcmk remote - for i in range(10): - rc = self.rsh(node, "service pacemaker_remote stop") - if rc != 0: - time.sleep(6) - else: - break + self.stop_pcmk_remote(node) - def setup_env(self): + def setup_env(self, node): + + self.remote_node = "remote_%s" % (node) sync_key = 0 # we are assuming if all nodes have a key, that it is # the right key... If any node doesn't have a remote # key, we regenerate it everywhere. for node in self.Env["nodes"]: rc = self.rsh(node, "ls /etc/pacemaker/authkey") if rc != 0: sync_key = 1 break if sync_key == 0: return # create key locally os.system("/usr/share/pacemaker/tests/cts/lxc_autogen.sh -k &> /dev/null") # sync key throughout the cluster for node in self.Env["nodes"]: rc = self.rsh(node, "mkdir /etc/pacemaker") self.rsh.cp("/etc/pacemaker/authkey", "%s:/etc/pacemaker/authkey" % (node)) def is_applicable(self): if not self.is_applicable_common(): return False for node in self.Env["nodes"]: rc = self.rsh(node, "type pacemaker_remoted >/dev/null 2>&1") if rc != 0: return False return True def __call__(self, node): '''Perform the 'RemoteBaremetal' test. ''' self.incr("calls") ret = self.startall(None) if not ret: return self.failure("Setup failed, start all nodes failed.") - self.setup_env() - self.step1_start_metal(node) - self.step2_add_rsc(node) - self.step3_test_attributes(node) + self.setup_env(node) + self.start_metal(node) + self.add_dummy_rsc(node) + self.test_attributes(node) self.cleanup_metal(node) self.debug("Waiting for the cluster to recover") self.CM.cluster_stable() if self.failed == 1: return self.failure(self.fail_string) return self.success() def errorstoignore(self): '''Return list of errors which should be ignored''' - return [ """is running on remote1 which isn't allowed""", + return [ """is running on remote.*which isn't allowed""", """Connection terminated""", """Failed to send remote""", ] -AllTestClasses.append(RemoteBaremetal) +# Remote driver is called by other tests. + +################################################################### +class RemoteBasic(CTSTest): +################################################################### + def __init__(self, cm): + CTSTest.__init__(self,cm) + self.name = "RemoteBasic" + self.start = StartTest(cm) + self.startall = SimulStartLite(cm) + self.driver = RemoteDriver(cm) + self.is_docker_unsafe = 1 + + def __call__(self, node): + '''Perform the 'RemoteBaremetal' test. ''' + self.incr("calls") + + ret = self.startall(None) + if not ret: + return self.failure("Setup failed, start all nodes failed.") + + self.driver.setup_env(node) + self.driver.start_metal(node) + self.driver.add_dummy_rsc(node) + self.driver.test_attributes(node) + self.driver.cleanup_metal(node) + + self.debug("Waiting for the cluster to recover") + self.CM.cluster_stable() + if self.driver.failed == 1: + return self.failure(self.driver.fail_string) + + return self.success() + + def is_applicable(self): + return self.driver.is_applicable() + + def errorstoignore(self): + return self.driver.errorstoignore() + +AllTestClasses.append(RemoteBasic) + +################################################################### +class RemoteStonithd(CTSTest): +################################################################### + def __init__(self, cm): + CTSTest.__init__(self,cm) + self.name = "RemoteStonithd" + self.start = StartTest(cm) + self.startall = SimulStartLite(cm) + self.driver = RemoteDriver(cm) + self.is_docker_unsafe = 1 + + def __call__(self, node): + '''Perform the 'RemoteStonithd' test. ''' + self.incr("calls") + + ret = self.startall(None) + if not ret: + return self.failure("Setup failed, start all nodes failed.") + + self.driver.setup_env(node) + self.driver.start_metal(node) + self.driver.add_dummy_rsc(node) + + self.driver.fail_connection(node) + self.driver.cleanup_metal(node) + + self.debug("Waiting for the cluster to recover") + self.CM.cluster_stable() + if self.driver.failed == 1: + return self.failure(self.driver.fail_string) + + return self.success() + + def is_applicable(self): + if not self.driver.is_applicable(): + return False + + if self.Env.has_key("DoFencing"): + return self.Env["DoFencing"] + + return True + + def errorstoignore(self): + ignore_pats = [ + """Unexpected disconnect on remote-node""", + """error: process_lrm_event: Operation remote_.*_monitor""", + """LogActions: Recover remote_""", + """Calculated Transition .* /var/lib/pacemaker/pengine/pe-error""", + """error: native_create_actions: Resource .*ocf::.* is active on 2 nodes attempting recovery""", + ] + + ignore_pats.extend(self.driver.errorstoignore()) + return ignore_pats + +AllTestClasses.append(RemoteStonithd) + +################################################################### +class RemoteMigrate(CTSTest): +################################################################### + def __init__(self, cm): + CTSTest.__init__(self,cm) + self.name = "RemoteMigrate" + self.start = StartTest(cm) + self.startall = SimulStartLite(cm) + self.driver = RemoteDriver(cm) + self.is_docker_unsafe = 1 + + def __call__(self, node): + '''Perform the 'RemoteMigrate' test. ''' + self.incr("calls") + + ret = self.startall(None) + if not ret: + return self.failure("Setup failed, start all nodes failed.") + + self.driver.setup_env(node) + self.driver.start_metal(node) + self.driver.add_dummy_rsc(node) + self.driver.migrate_connection(node) + self.driver.cleanup_metal(node) + + self.debug("Waiting for the cluster to recover") + self.CM.cluster_stable() + if self.driver.failed == 1: + return self.failure(self.driver.fail_string) + + return self.success() + + def is_applicable(self): + return self.driver.is_applicable() + + def errorstoignore(self): + return self.driver.errorstoignore() + +AllTestClasses.append(RemoteMigrate) + + +################################################################### +class RemoteRscFailure(CTSTest): +################################################################### + def __init__(self, cm): + + # fail a rsc on a remote node, verify recovery. + CTSTest.__init__(self,cm) + self.name = "RemoteRscFailure" + self.start = StartTest(cm) + self.startall = SimulStartLite(cm) + self.driver = RemoteDriver(cm) + self.is_docker_unsafe = 1 + + def __call__(self, node): + '''Perform the 'RemoteRscFailure' test. ''' + self.incr("calls") + + ret = self.startall(None) + if not ret: + return self.failure("Setup failed, start all nodes failed.") + + self.driver.setup_env(node) + self.driver.start_metal(node) + self.driver.add_dummy_rsc(node) + + # This is an important step. We are migrating the connection + # before failing the resource. This verifies that the migration + # has properly maintained control over the remote-node. + self.driver.migrate_connection(node) + + self.driver.fail_rsc(node) + self.driver.cleanup_metal(node) + + self.debug("Waiting for the cluster to recover") + self.CM.cluster_stable() + if self.driver.failed == 1: + return self.failure(self.driver.fail_string) + + return self.success() + + def is_applicable(self): + return self.driver.is_applicable() + + def errorstoignore(self): + ignore_pats = [ + """LogActions: Recover remote-rsc""", + ] + + ignore_pats.extend(self.driver.errorstoignore()) + return ignore_pats + +AllTestClasses.append(RemoteRscFailure) # vim:ts=4:sw=4:et: diff --git a/cts/patterns.py b/cts/patterns.py index 8d34e1c051..e734f40e0b 100644 --- a/cts/patterns.py +++ b/cts/patterns.py @@ -1,526 +1,529 @@ from UserDict import UserDict import sys, time, types, syslog, os, struct, string, signal, traceback, warnings, socket from cts.CTSvars import * patternvariants = {} class BasePatterns: def __init__(self, name): self.name = name patternvariants[name] = self self.ignore = [] self.BadNews = [] self.components = {} self.commands = { "StatusCmd" : "crmadmin -t 60000 -S %s 2>/dev/null", "CibQuery" : "cibadmin -Ql", "CibAddXml" : "cibadmin --modify -c --xml-text %s", "CibDelXpath" : "cibadmin --delete --xpath %s", # 300,000 == 5 minutes "RscRunning" : CTSvars.CRM_DAEMON_DIR + "/lrmd_test -R -r %s", "CIBfile" : "%s:"+CTSvars.CRM_CONFIG_DIR+"/cib.xml", "TmpDir" : "/tmp", "BreakCommCmd" : "iptables -A INPUT -s %s -j DROP >/dev/null 2>&1", "FixCommCmd" : "iptables -D INPUT -s %s -j DROP >/dev/null 2>&1", # tc qdisc add dev lo root handle 1: cbq avpkt 1000 bandwidth 1000mbit # tc class add dev lo parent 1: classid 1:1 cbq rate "$RATE"kbps allot 17000 prio 5 bounded isolated # tc filter add dev lo parent 1: protocol ip prio 16 u32 match ip dst 127.0.0.1 match ip sport $PORT 0xFFFF flowid 1:1 # tc qdisc add dev lo parent 1: netem delay "$LATENCY"msec "$(($LATENCY/4))"msec 10% 2> /dev/null > /dev/null "ReduceCommCmd" : "", "RestoreCommCmd" : "tc qdisc del dev lo root", "UUIDQueryCmd" : "crmadmin -N", "MaintenanceModeOn" : "cibadmin --modify -c --xml-text ''", "MaintenanceModeOff" : "cibadmin --delete --xpath \"//nvpair[@name='maintenance-mode']\"", "StandbyCmd" : "crm_attribute -VQ -U %s -n standby -l forever -v %s 2>/dev/null", "StandbyQueryCmd" : "crm_attribute -QG -U %s -n standby -l forever -d off 2>/dev/null", } self.search = { "Pat:DC_IDLE" : "crmd.*State transition.*-> S_IDLE", # This wont work if we have multiple partitions "Pat:Local_started" : "%s\W.*The local CRM is operational", "Pat:Slave_started" : "%s\W.*State transition.*-> S_NOT_DC", "Pat:Master_started": "%s\W.*State transition.*-> S_IDLE", "Pat:We_stopped" : "heartbeat.*%s.*Heartbeat shutdown complete", "Pat:Logd_stopped" : "%s\W.*logd:.*Exiting write process", "Pat:They_stopped" : "%s\W.*LOST:.* %s ", "Pat:They_dead" : "node %s.*: is dead", "Pat:TransitionComplete" : "Transition status: Complete: complete", "Pat:Fencing_start" : "Initiating remote operation .* for %s", "Pat:Fencing_ok" : "stonith.*remote_op_done:.*Operation .* of %s by .*: OK", "Pat:RscOpOK" : "process_lrm_event:.*Operation %s_%s.*ok.*confirmed", + "Pat:RscRemoteOpOK" : "process_lrm_event:.*Operation %s_%s.*ok.*node=%s, .*confirmed.*true", + "Pat:NodeFenced" : "tengine_stonith_notify:.*Peer %s was terminated .*: OK", + "Pat:FenceOpOK" : "Operation .* for host '%s' with device .* returned: 0", } def get_component(self, key): if self.components.has_key(key): return self.components[key] print "Unknown component '%s' for %s" % (key, self.name) return [] def get_patterns(self, key): if key == "BadNews": return self.BadNews elif key == "BadNewsIgnore": return self.ignore elif key == "Commands": return self.commands elif key == "Search": return self.search elif key == "Components": return self.components def __getitem__(self, key): if key == "Name": return self.name elif self.commands.has_key(key): return self.commands[key] elif self.search.has_key(key): return self.search[key] else: print "Unknown template '%s' for %s" % (key, self.name) return None class crm_lha(BasePatterns): def __init__(self, name): BasePatterns.__init__(self, name) self.commands.update({ "StartCmd" : "service heartbeat start > /dev/null 2>&1", "StopCmd" : "service heartbeat stop > /dev/null 2>&1", "EpocheCmd" : "crm_node -H -e", "QuorumCmd" : "crm_node -H -q", "ParitionCmd" : "crm_node -H -p", }) self.search.update({ # Patterns to look for in the log files for various occasions... "Pat:ChildKilled" : "%s\W.*heartbeat.*%s.*killed by signal 9", "Pat:ChildRespawn" : "%s\W.*heartbeat.*Respawning client.*%s", "Pat:ChildExit" : "(ERROR|error): Client .* exited with return code", }) self.BadNews = [ r"error:", r"crit:", r"ERROR:", r"CRIT:", r"Shutting down...NOW", r"Timer I_TERMINATE just popped", r"input=I_ERROR", r"input=I_FAIL", r"input=I_INTEGRATED cause=C_TIMER_POPPED", r"input=I_FINALIZED cause=C_TIMER_POPPED", r"input=I_ERROR", r", exiting\.", r"WARN.*Ignoring HA message.*vote.*not in our membership list", r"pengine.*Attempting recovery of resource", r"is taking more than 2x its timeout", r"Confirm not received from", r"Welcome reply not received from", r"Attempting to schedule .* after a stop", r"Resource .* was active at shutdown", r"duplicate entries for call_id", r"Search terminated:", r"No need to invoke the TE", r"global_timer_callback:", r"Faking parameter digest creation", r"Parameters to .* action changed:", r"Parameters to .* changed", ] self.ignore = [ "(ERROR|error): crm_abort:.*crm_glib_handler: ", "(ERROR|error): Message hist queue is filling up", "stonithd.*CRIT: external_hostlist:.*'vmware gethosts' returned an empty hostlist", "stonithd.*(ERROR|error): Could not list nodes for stonith RA external/vmware.", "pengine.*Preventing .* from re-starting", ] class crm_cs_v0(BasePatterns): def __init__(self, name): BasePatterns.__init__(self, name) self.commands.update({ "EpocheCmd" : "crm_node -e --openais", "QuorumCmd" : "crm_node -q --openais", "ParitionCmd" : "crm_node -p --openais", "StartCmd" : "service corosync start", "StopCmd" : "service corosync stop", }) self.search.update({ # The next pattern is too early # "Pat:We_stopped" : "%s.*Service engine unloaded: Pacemaker Cluster Manager", # The next pattern would be preferred, but it doesn't always come out # "Pat:We_stopped" : "%s.*Corosync Cluster Engine exiting with status", "Pat:We_stopped" : "%s\W.*Service engine unloaded: corosync cluster quorum service", "Pat:They_stopped" : "%s\W.*crmd.*Node %s\[.*state is now lost", "Pat:They_dead" : "corosync:.*Node %s is now: lost", "Pat:ChildExit" : "Child process .* exited", "Pat:ChildKilled" : "%s\W.*corosync.*Child process %s terminated with signal 9", "Pat:ChildRespawn" : "%s\W.*corosync.*Respawning failed child process: %s", }) self.ignore = [ r"crm_mon:", r"crmadmin:", r"update_trace_data", r"async_notify:.*strange, client not found", r"Parse error: Ignoring unknown option .*nodename", r"error: log_operation:.*Operation 'reboot' .* with device 'FencingFail' returned:", r"Child process .* terminated with signal 9", r"getinfo response error: 1$", ] self.BadNews = [ r"error:", r"crit:", r"ERROR:", r"CRIT:", r"Shutting down...NOW", r"Timer I_TERMINATE just popped", r"input=I_ERROR", r"input=I_FAIL", r"input=I_INTEGRATED cause=C_TIMER_POPPED", r"input=I_FINALIZED cause=C_TIMER_POPPED", r"input=I_ERROR", r", exiting\.", r"(WARN|warn).*Ignoring HA message.*vote.*not in our membership list", r"pengine.*Attempting recovery of resource", r"is taking more than 2x its timeout", r"Confirm not received from", r"Welcome reply not received from", r"Attempting to schedule .* after a stop", r"Resource .* was active at shutdown", r"duplicate entries for call_id", r"Search terminated:", r":global_timer_callback", r"Faking parameter digest creation", r"Parameters to .* action changed:", r"Parameters to .* changed", r"The .* process .* terminated with signal", r"Child process .* terminated with signal", r"LogActions:.*Recover", r"rsyslogd.* imuxsock lost .* messages from pid .* due to rate-limiting", r"Peer is not part of our cluster", r"We appear to be in an election loop", r"Unknown node -> we will not deliver message", r"crm_write_blackbox", r"pacemakerd.*Could not connect to Cluster Configuration Database API", r"Receiving messages from a node we think is dead", r"share the same cluster nodeid", r"share the same name", #r"crm_ipc_send:.*Request .* failed", #r"crm_ipc_send:.*Sending to .* is disabled until pending reply is received", # Not inherently bad, but worth tracking #r"No need to invoke the TE", #r"ping.*: DEBUG: Updated connected = 0", #r"Digest mis-match:", r"te_graph_trigger:.*Transition failed: terminated", r"process_ping_reply", r"warn.*:retrieveCib", #r"Executing .* fencing operation", #r"fence_pcmk.* Call to fence", #r"fence_pcmk", r"cman killed by node", r"Election storm", r"stalled the FSA with pending inputs", ] self.components["common-ignore"] = [ "Pending action:", "error: crm_log_message_adv:", "resources were active at shutdown", "pending LRM operations at shutdown", "Lost connection to the CIB service", "Connection to the CIB terminated...", "Sending message to CIB service FAILED", "apply_xml_diff:.*Diff application failed!", "crmd.*Action A_RECOVER .* not supported", "unconfirmed_actions:.*Waiting on .* unconfirmed actions", "cib_native_msgready:.*Message pending on command channel", "crmd.*do_exit:.*Performing A_EXIT_1 - forcefully exiting the CRMd", "verify_stopped:.*Resource .* was active at shutdown. You may ignore this error if it is unmanaged.", "error: attrd_connection_destroy:.*Lost connection to attrd", "info: te_fence_node:.*Executing .* fencing operation", "crm_write_blackbox:", # "error: native_create_actions: Resource .*stonith::.* is active on 2 nodes attempting recovery", # "error: process_pe_message: Transition .* ERRORs found during PE processing", ] self.components["corosync-ignore"] = [ r"error: pcmk_cpg_dispatch:.*Connection to the CPG API failed: Library error", r"The .* process .* exited", r"pacemakerd.*error: pcmk_child_exit:.*Child process .* exited", r"cib.*error: cib_cs_destroy:.*Corosync connection lost", r"stonith-ng.*error: stonith_peer_cs_destroy:.*Corosync connection terminated", r"The cib process .* exited: Invalid argument", r"The attrd process .* exited: Transport endpoint is not connected", r"The crmd process .* exited: Link has been severed", r"error: pcmk_child_exit:.*Child process cib .* exited: Invalid argument", r"error: pcmk_child_exit:.*Child process attrd .* exited: Transport endpoint is not connected", r"error: pcmk_child_exit:.*Child process crmd .* exited: Link has been severed", r"lrmd.*error: crm_ipc_read:.*Connection to stonith-ng failed", r"lrmd.*error: mainloop_gio_callback:.*Connection to stonith-ng.* closed", r"lrmd.*error: stonith_connection_destroy_cb:.*LRMD lost STONITH connection", r"crmd.*do_state_transition:.*State transition .* S_RECOVERY", r"crmd.*error: do_log:.*FSA: Input I_ERROR", r"crmd.*error: do_log:.*FSA: Input I_TERMINATE", r"crmd.*error: pcmk_cman_dispatch:.*Connection to cman failed", r"crmd.*error: crmd_fast_exit:.*Could not recover from internal error", r"error: crm_ipc_read:.*Connection to cib_shm failed", r"error: mainloop_gio_callback:.*Connection to cib_shm.* closed", r"error: stonith_connection_failed:.*STONITH connection failed", ] self.components["corosync"] = [ r"pacemakerd.*error: cfg_connection_destroy:.*Connection destroyed", r"pacemakerd.*error: mcp_cpg_destroy:.*Connection destroyed", r"crit: attrd_(cs|cpg)_destroy:.*Lost connection to Corosync service", r"stonith_peer_cs_destroy:.*Corosync connection terminated", r"cib_cs_destroy:.*Corosync connection lost! Exiting.", r"crmd_(cs|quorum)_destroy:.*connection terminated", r"pengine.*Scheduling Node .* for STONITH", r"tengine_stonith_notify:.*Peer .* was terminated .*: OK", ] self.components["cib-ignore"] = [ "lrmd.*Connection to stonith-ng failed", "lrmd.*Connection to stonith-ng.* closed", "lrmd.*LRMD lost STONITH connection", "lrmd.*STONITH connection failed, finalizing .* pending operations", ] self.components["cib"] = [ "State transition .* S_RECOVERY", "Respawning .* crmd", "Respawning .* attrd", "Connection to cib_.* failed", "Connection to cib_.* closed", "Connection to the CIB terminated...", "(Child process|The) crmd .* exited: Generic Pacemaker error", "(Child process|The) attrd .* exited: (Connection reset by peer|Transport endpoint is not connected)", "Lost connection to CIB service", "crmd.*Input I_TERMINATE from do_recover", "crmd.*I_ERROR.*crmd_cib_connection_destroy", "crmd.*Could not recover from internal error", ] self.components["lrmd"] = [ "State transition .* S_RECOVERY", "LRM Connection failed", "Respawning .* crmd", "Connection to lrmd failed", "Connection to lrmd.* closed", "crmd.*I_ERROR.*lrm_connection_destroy", "(Child process|The) crmd .* exited: Generic Pacemaker error", "crmd.*Input I_TERMINATE from do_recover", "crmd.*Could not recover from internal error", ] self.components["lrmd-ignore"] = [] self.components["crmd"] = [ # "WARN: determine_online_status: Node .* is unclean", # "Scheduling Node .* for STONITH", # "Executing .* fencing operation", # Only if the node wasn't the DC: "State transition S_IDLE", "State transition .* -> S_IDLE", ] self.components["crmd-ignore"] = [] self.components["attrd"] = [] self.components["attrd-ignore"] = [] self.components["pengine"] = [ "State transition .* S_RECOVERY", "Respawning .* crmd", "(The|Child process) crmd .* exited: Generic Pacemaker error", "Connection to pengine failed", "Connection to pengine.* closed", "Connection to the Policy Engine failed", "crmd.*I_ERROR.*save_cib_contents", "crmd.*Input I_TERMINATE from do_recover", "crmd.*Could not recover from internal error", ] self.components["pengine-ignore"] = [] self.components["stonith"] = [ "Connection to stonith-ng failed", "LRMD lost STONITH connection", "Connection to stonith-ng.* closed", "Fencing daemon connection failed", "crmd.*stonith_api_add_notification:.*Callback already present", ] self.components["stonith-ignore"] = [ "LogActions: Recover Fencing", "Updating failcount for Fencing", "error: crm_ipc_read: Connection to stonith-ng failed", "error: mainloop_gio_callback: Connection to stonith-ng.*closed (I/O condition=17)", "crit: tengine_stonith_connection_destroy: Fencing daemon connection failed", "error: te_connect_stonith:.*Sign-in failed: triggered a retry", "STONITH connection failed, finalizing .* pending operations.", "process_lrm_event:.*Operation Fencing.* Error", ] self.components["stonith-ignore"].extend(self.components["common-ignore"]) class crm_mcp(crm_cs_v0): ''' The crm version 4 cluster manager class. It implements the things we need to talk to and manipulate crm clusters running on top of native corosync (no plugins) ''' def __init__(self, name): crm_cs_v0.__init__(self, name) self.commands.update({ "StartCmd" : "service corosync start && service pacemaker start", "StopCmd" : "service pacemaker stop; service pacemaker_remote stop; service corosync stop", "EpocheCmd" : "crm_node -e", "QuorumCmd" : "crm_node -q", "ParitionCmd" : "crm_node -p", }) self.search.update({ # Close enough... "Corosync Cluster Engine exiting normally" isn't printed # reliably and there's little interest in doing anything it "Pat:We_stopped" : "%s\W.*Unloading all Corosync service engines", "Pat:They_stopped" : "%s\W.*crmd.*Node %s\[.*state is now lost", "Pat:They_dead" : "crmd.*Node %s\[.*state is now lost", "Pat:ChildExit" : "The .* process exited", "Pat:ChildKilled" : "%s\W.*pacemakerd.*The %s process .* terminated with signal 9", "Pat:ChildRespawn" : "%s\W.*pacemakerd.*Respawning failed child process: %s", "Pat:InfraUp" : "%s\W.*corosync.*Initializing transport", "Pat:PacemakerUp" : "%s\W.*pacemakerd.*Starting Pacemaker", }) # if self.Env["have_systemd"]: # self.update({ # # When systemd is in use, we can look for this instead # "Pat:We_stopped" : "%s.*Stopped Corosync Cluster Engine", # }) class crm_mcp_docker(crm_mcp): ''' The crm version 4 cluster manager class. It implements the things we need to talk to and manipulate crm clusters running on top of native corosync (no plugins) ''' def __init__(self, name): crm_mcp.__init__(self, name) self.commands.update({ "StartCmd" : "pcmk_start", "StopCmd" : "pcmk_stop", }) class crm_cman(crm_cs_v0): ''' The crm version 3 cluster manager class. It implements the things we need to talk to and manipulate crm clusters running on top of openais ''' def __init__(self, name): crm_cs_v0.__init__(self, name) self.commands.update({ "StartCmd" : "service pacemaker start", "StopCmd" : "service pacemaker stop; service pacemaker_remote stop", "EpocheCmd" : "crm_node -e --cman", "QuorumCmd" : "crm_node -q --cman", "ParitionCmd" : "crm_node -p --cman", "Pat:We_stopped" : "%s.*Unloading all Corosync service engines", "Pat:They_stopped" : "%s\W.*crmd.*Node %s\[.*state is now lost", "Pat:They_dead" : "crmd.*Node %s\[.*state is now lost", "Pat:ChildKilled" : "%s\W.*pacemakerd.*The %s process .* terminated with signal 9", "Pat:ChildRespawn" : "%s\W.*pacemakerd.*Respawning failed child process: %s", }) class PatternSelector: def __init__(self, name=None): self.name = name self.base = BasePatterns("crm-base") if not name: crm_cs_v0("crm-plugin-v0") crm_cman("crm-cman") crm_mcp("crm-mcp") crm_lha("crm-lha") elif name == "crm-lha": crm_lha(name) elif name == "crm-plugin-v0": crm_cs_v0(name) elif name == "crm-cman": crm_cman(name) elif name == "crm-mcp": crm_mcp(name) elif name == "crm-mcp-docker": crm_mcp_docker(name) def get_variant(self, variant): if patternvariants.has_key(variant): return patternvariants[variant] print "defaulting to crm-base for %s" % variant return self.base def get_patterns(self, variant, kind): return self.get_variant(variant).get_patterns(kind) def get_template(self, variant, key): v = self.get_variant(variant) return v[key] def get_component(self, variant, kind): return self.get_variant(variant).get_component(kind) def __getitem__(self, key): return self.get_template(self.name, key) # python cts/CTSpatt.py -k crm-mcp -t StartCmd if __name__ == '__main__': pdir=os.path.dirname(sys.path[0]) sys.path.insert(0, pdir) # So that things work from the source directory from cts.CTSvars import * kind=None template=None skipthis=None args=sys.argv[1:] for i in range(0, len(args)): if skipthis: skipthis=None continue elif args[i] == "-k" or args[i] == "--kind": skipthis=1 kind = args[i+1] elif args[i] == "-t" or args[i] == "--template": skipthis=1 template = args[i+1] else: print "Illegal argument " + args[i] print PatternSelector(kind)[template] diff --git a/extra/resources/ping b/extra/resources/ping index b9a69b829b..e7b9973681 100755 --- a/extra/resources/ping +++ b/extra/resources/ping @@ -1,376 +1,386 @@ #!/bin/sh # # # Ping OCF RA that utilizes the system ping # # Copyright (c) 2009 Andrew Beekhof # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS=${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs} . ${OCF_FUNCTIONS} : ${__OCF_ACTION=$1} ####################################################################### meta_data() { cat < 1.0 Every time the monitor action is run, this resource agent records (in the CIB) the current number of ping nodes the host can connect to. It is essentially the same as pingd except that it uses the system ping tool to obtain the results. node connectivity PID file PID file The time to wait (dampening) further changes occur Dampening interval The name of the attributes to set. This is the name to be used in the constraints. Attribute name The number by which to multiply the number of connected ping nodes by Value multiplier - + The list of ping nodes to count. Host list Number of ping attempts, per host, before declaring it dead no. of ping attempts - + How long, in seconds, to wait before declaring a ping lost ping timeout in seconds A catch all for any other options that need to be passed to ping. Extra Options Resource is failed if the score is less than failure_score. Default never fails. failure_score + + +Use fping rather than ping, if found. If set to 0, fping +will not be used even if present. + +Use fping if available + + + Enables to use default attrd_updater verbose logging on every call. Verbose logging END } ####################################################################### ping_conditional_log() { level=$1; shift if [ ${OCF_RESKEY_debug} = "true" ]; then ocf_log $level "$*" fi } ping_usage() { cat <$f_out 2>$f_err; rc=$? active=`grep alive $f_out|wc -l` case $rc in 0) ;; 1) for h in `grep unreachable $f_out | awk '{print $1}'`; do ping_conditional_log warn "$h is inactive" done ;; *) ocf_log err "Unexpected result for '$cmd' $rc: `tr '\n' ';' < $f_err`" ;; esac rm -f $f_out $f_err return $active } ping_check() { active=0 for host in $OCF_RESKEY_host_list; do p_exe=ping case `uname` in Linux) p_args="-n -q -W $OCF_RESKEY_timeout -c $OCF_RESKEY_attempts";; Darwin) p_args="-n -q -t $OCF_RESKEY_timeout -c $OCF_RESKEY_attempts -o";; *) ocf_log err "Unknown host type: `uname`"; exit $OCF_ERR_INSTALLED;; esac case $host in *:*) p_exe=ping6 esac p_out=`$p_exe $p_args $OCF_RESKEY_options $host 2>&1`; rc=$? case $rc in 0) active=`expr $active + 1`;; 1) ping_conditional_log warn "$host is inactive: $p_out";; *) ocf_log err "Unexpected result for '$p_exe $p_args $OCF_RESKEY_options $host' $rc: $p_out";; esac done return $active } ping_update() { - if have_binary fping; then + if ocf_is_true "$OCF_RESKEY_use_fping" && have_binary fping; then fping_check active=$? else ping_check active=$? fi score=`expr $active \* $OCF_RESKEY_multiplier` attrd_updater -n $OCF_RESKEY_name -v $score -d $OCF_RESKEY_dampen $attrd_options rc=$? case $rc in 0) ping_conditional_log debug "Updated $OCF_RESKEY_name = $score" ;; *) ocf_log warn "Could not update $OCF_RESKEY_name = $score: rc=$rc";; esac if [ $rc -ne 0 ]; then return $rc fi if [ -n "$OCF_RESKEY_failure_score" -a "$score" -lt "$OCF_RESKEY_failure_score" ]; then ocf_log warn "$OCF_RESKEY_name is less than failure_score($OCF_RESKEY_failure_score)" return 1 fi return 0 } : ${OCF_RESKEY_name:="pingd"} : ${OCF_RESKEY_dampen:="5s"} : ${OCF_RESKEY_attempts:="3"} : ${OCF_RESKEY_multiplier:="1"} : ${OCF_RESKEY_debug:="false"} : ${OCF_RESKEY_failure_score:="0"} +: ${OCF_RESKEY_use_fping:="1"} : ${OCF_RESKEY_CRM_meta_timeout:="20000"} : ${OCF_RESKEY_CRM_meta_globally_unique:="true"} integer=`echo ${OCF_RESKEY_timeout} | egrep -o '[0-9]*'` case ${OCF_RESKEY_timeout} in *[0-9]ms|*[0-9]msec) OCF_RESKEY_timeout=`expr $integer / 1000`;; *[0-9]m|*[0-9]min) OCF_RESKEY_timeout=`expr $integer \* 60`;; *[0-9]h|*[0-9]hr) OCF_RESKEY_timeout=`expr $integer \* 60 \* 60`;; *) OCF_RESKEY_timeout=$integer;; esac if [ -z ${OCF_RESKEY_timeout} ]; then if [ x"$OCF_RESKEY_host_list" != x ]; then host_count=`echo $OCF_RESKEY_host_list | awk '{print NF}'` OCF_RESKEY_timeout=`expr $OCF_RESKEY_CRM_meta_timeout / $host_count / $OCF_RESKEY_attempts` OCF_RESKEY_timeout=`expr $OCF_RESKEY_timeout / 1100` # Convert to seconds and finish 10% early else OCF_RESKEY_timeout=5 fi fi if [ ${OCF_RESKEY_timeout} -lt 1 ]; then OCF_RESKEY_timeout=5 elif [ ${OCF_RESKEY_timeout} -gt 1000 ]; then # ping actually complains if this value is too high, 5 minutes is plenty OCF_RESKEY_timeout=300 fi if [ ${OCF_RESKEY_CRM_meta_globally_unique} = "false" ]; then : ${OCF_RESKEY_pidfile:="$HA_VARRUN/ping-${OCF_RESKEY_name}"} else : ${OCF_RESKEY_pidfile:="$HA_VARRUN/ping-${OCF_RESOURCE_INSTANCE}"} fi attrd_options='-q' if ocf_is_true ${OCF_RESKEY_debug} ; then attrd_options='' fi # Check the debug option case "${OCF_RESKEY_debug}" in true|True|TRUE|1) OCF_RESKEY_debug=true;; false|False|FALSE|0) OCF_RESKEY_debug=false;; *) ocf_log warn "Value for 'debug' is incorrect. Please specify 'true' or 'false' not: ${OCF_RESKEY_debug}" OCF_RESKEY_debug=false ;; esac case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; start) ping_start;; stop) ping_stop;; monitor) ping_monitor;; reload) ping_start;; validate-all) ping_validate;; usage|help) ping_usage exit $OCF_SUCCESS ;; *) ping_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/extra/resources/remote b/extra/resources/remote index 9f141a2289..d79c4c3934 100644 --- a/extra/resources/remote +++ b/extra/resources/remote @@ -1,110 +1,110 @@ #!/bin/sh # # # remote OCF RA. This script provides metadata for the internal # pacemaker remote lrmd connection agent. Outside of acting # as a place holder so the remote ra script can be indexed and # providing metadata, this script should never be invoked. The # actual functionality behind the remote lrmd connection lives # within pacemaker's crmd component. # # Copyright (c) 2013 David Vossel # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS=${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs} . ${OCF_FUNCTIONS} : ${__OCF_ACTION=$1} ####################################################################### meta_data() { cat < 0.1 Server location to connect to. This can be an ip address or hostname. Server location tcp port to connect to. tcp port - + - - + + END } ####################################################################### remote_usage() { cat < * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TIMEOUT_MULTIPLY_FACTOR 1.2 typedef struct st_query_result_s { char *host; int devices; /* only try peers for non-topology based operations once */ gboolean tried; GListPtr device_list; GHashTable *custom_action_timeouts; /* Subset of devices that peer has verified connectivity on */ GHashTable *verified_devices; } st_query_result_t; GHashTable *remote_op_list = NULL; void call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer); static void remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup); extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data, int call_options); static void report_timeout_period(remote_fencing_op_t * op, int op_timeout); static int get_op_total_timeout(remote_fencing_op_t * op, st_query_result_t * chosen_peer, int default_timeout); static gint sort_strings(gconstpointer a, gconstpointer b) { return strcmp(a, b); } static void free_remote_query(gpointer data) { if (data) { st_query_result_t *query = data; crm_trace("Free'ing query result from %s", query->host); free(query->host); g_list_free_full(query->device_list, free); g_hash_table_destroy(query->custom_action_timeouts); g_hash_table_destroy(query->verified_devices); free(query); } } static void clear_remote_op_timers(remote_fencing_op_t * op) { if (op->query_timer) { g_source_remove(op->query_timer); op->query_timer = 0; } if (op->op_timer_total) { g_source_remove(op->op_timer_total); op->op_timer_total = 0; } if (op->op_timer_one) { g_source_remove(op->op_timer_one); op->op_timer_one = 0; } } static void free_remote_op(gpointer data) { remote_fencing_op_t *op = data; crm_trace("Free'ing op %s for %s", op->id, op->target); crm_log_xml_debug(op->request, "Destroying"); clear_remote_op_timers(op); free(op->id); free(op->action); free(op->target); free(op->client_id); free(op->client_name); free(op->originator); if (op->query_results) { g_list_free_full(op->query_results, free_remote_query); } if (op->request) { free_xml(op->request); op->request = NULL; } if (op->devices_list) { g_list_free_full(op->devices_list, free); op->devices_list = NULL; } if (op->required_list) { g_list_free_full(op->required_list, free); op->required_list = NULL; } free(op); } static xmlNode * create_op_done_notify(remote_fencing_op_t * op, int rc) { xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE); crm_xml_add_int(notify_data, "state", op->state); crm_xml_add_int(notify_data, F_STONITH_RC, rc); crm_xml_add(notify_data, F_STONITH_TARGET, op->target); crm_xml_add(notify_data, F_STONITH_ACTION, op->action); crm_xml_add(notify_data, F_STONITH_DELEGATE, op->delegate); crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, op->id); crm_xml_add(notify_data, F_STONITH_ORIGIN, op->originator); crm_xml_add(notify_data, F_STONITH_CLIENTID, op->client_id); crm_xml_add(notify_data, F_STONITH_CLIENTNAME, op->client_name); return notify_data; } static void bcast_result_to_peers(remote_fencing_op_t * op, int rc) { static int count = 0; xmlNode *bcast = create_xml_node(NULL, T_STONITH_REPLY); xmlNode *notify_data = create_op_done_notify(op, rc); count++; crm_trace("Broadcasting result to peers"); crm_xml_add(bcast, F_TYPE, T_STONITH_NOTIFY); crm_xml_add(bcast, F_SUBTYPE, "broadcast"); crm_xml_add(bcast, F_STONITH_OPERATION, T_STONITH_NOTIFY); crm_xml_add_int(bcast, "count", count); add_message_xml(bcast, F_STONITH_CALLDATA, notify_data); send_cluster_message(NULL, crm_msg_stonith_ng, bcast, FALSE); free_xml(notify_data); free_xml(bcast); return; } static void handle_local_reply_and_notify(remote_fencing_op_t * op, xmlNode * data, int rc) { xmlNode *notify_data = NULL; xmlNode *reply = NULL; if (op->notify_sent == TRUE) { /* nothing to do */ return; } /* Do notification with a clean data object */ notify_data = create_op_done_notify(op, rc); crm_xml_add_int(data, "state", op->state); crm_xml_add(data, F_STONITH_TARGET, op->target); crm_xml_add(data, F_STONITH_OPERATION, op->action); reply = stonith_construct_reply(op->request, NULL, data, rc); crm_xml_add(reply, F_STONITH_DELEGATE, op->delegate); /* Send fencing OP reply to local client that initiated fencing */ do_local_reply(reply, op->client_id, op->call_options & st_opt_sync_call, FALSE); /* bcast to all local clients that the fencing operation happend */ do_stonith_notify(0, T_STONITH_NOTIFY_FENCE, rc, notify_data); /* mark this op as having notify's already sent */ op->notify_sent = TRUE; free_xml(reply); free_xml(notify_data); } static void handle_duplicates(remote_fencing_op_t * op, xmlNode * data, int rc) { GListPtr iter = NULL; for (iter = op->duplicates; iter != NULL; iter = iter->next) { remote_fencing_op_t *other = iter->data; if (other->state == st_duplicate) { /* Ie. it hasn't timed out already */ other->state = op->state; crm_debug("Peforming duplicate notification for %s@%s.%.8s = %s", other->client_name, other->originator, other->id, pcmk_strerror(rc)); remote_op_done(other, data, rc, TRUE); } else { crm_err("Skipping duplicate notification for %s@%s - %d", other->client_name, other->originator, other->state); } } } /*! * \internal * \brief Finalize a remote operation. * * \description This function has two code paths. * * Path 1. This node is the owner of the operation and needs * to notify the cpg group via a broadcast as to the operation's * results. * * Path 2. The cpg broadcast is received. All nodes notify their local * stonith clients the operation results. * * So, The owner of the operation first notifies the cluster of the result, * and once that cpg notify is received back it notifies all the local clients. * * Nodes that are passive watchers of the operation will receive the * broadcast and only need to notify their local clients the operation finished. * * \param op, The fencing operation to finalize * \param data, The xml msg reply (if present) of the last delegated fencing * operation. * \param dup, Is this operation a duplicate, if so treat it a little differently * making sure the broadcast is not sent out. */ static void remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup) { int level = LOG_ERR; const char *subt = NULL; xmlNode *local_data = NULL; op->completed = time(NULL); clear_remote_op_timers(op); if (op->notify_sent == TRUE) { crm_err("Already sent notifications for '%s of %s by %s' (for=%s@%s.%.8s, state=%d): %s", op->action, op->target, op->delegate ? op->delegate : "", op->client_name, op->originator, op->id, op->state, pcmk_strerror(rc)); goto remote_op_done_cleanup; } if (!op->delegate && data) { - xmlNode *ndata = get_xpath_object("//@" F_STONITH_DELEGATE, data, LOG_WARNING); + xmlNode *ndata = get_xpath_object("//@" F_STONITH_DELEGATE, data, LOG_TRACE); if(ndata) { op->delegate = crm_element_value_copy(ndata, F_STONITH_DELEGATE); - } else { - op->delegate = crm_element_value_copy(data, F_ORIG); } } if (data == NULL) { data = create_xml_node(NULL, "remote-op"); local_data = data; } /* Tell everyone the operation is done, we will continue * with doing the local notifications once we receive * the broadcast back. */ subt = crm_element_value(data, F_SUBTYPE); if (dup == FALSE && safe_str_neq(subt, "broadcast")) { /* Defer notification until the bcast message arrives */ bcast_result_to_peers(op, rc); goto remote_op_done_cleanup; } if (rc == pcmk_ok || dup) { level = LOG_NOTICE; } else if (safe_str_neq(op->originator, stonith_our_uname)) { level = LOG_NOTICE; } do_crm_log(level, "Operation %s of %s by %s for %s@%s.%.8s: %s", op->action, op->target, op->delegate ? op->delegate : "", op->client_name, op->originator, op->id, pcmk_strerror(rc)); handle_local_reply_and_notify(op, data, rc); if (dup == FALSE) { handle_duplicates(op, data, rc); } /* Free non-essential parts of the record * Keep the record around so we can query the history */ if (op->query_results) { g_list_free_full(op->query_results, free_remote_query); op->query_results = NULL; } if (op->request) { free_xml(op->request); op->request = NULL; } remote_op_done_cleanup: free_xml(local_data); } static gboolean remote_op_watchdog_done(gpointer userdata) { remote_fencing_op_t *op = userdata; op->op_timer_one = 0; crm_notice("Remote %s operation on %s for %s.%8s assumed complete", op->action, op->target, op->client_name, op->id); op->state = st_done; remote_op_done(op, NULL, pcmk_ok, FALSE); return FALSE; } static gboolean remote_op_timeout_one(gpointer userdata) { remote_fencing_op_t *op = userdata; op->op_timer_one = 0; crm_notice("Remote %s operation on %s for %s.%8s timed out", op->action, op->target, op->client_name, op->id); call_remote_stonith(op, NULL); return FALSE; } static gboolean remote_op_timeout(gpointer userdata) { remote_fencing_op_t *op = userdata; op->op_timer_total = 0; if (op->state == st_done) { crm_debug("Action %s (%s) for %s (%s) already completed", op->action, op->id, op->target, op->client_name); return FALSE; } crm_debug("Action %s (%s) for %s (%s) timed out", op->action, op->id, op->target, op->client_name); op->state = st_failed; remote_op_done(op, NULL, -ETIME, FALSE); return FALSE; } static gboolean remote_op_query_timeout(gpointer data) { remote_fencing_op_t *op = data; op->query_timer = 0; if (op->state == st_done) { crm_debug("Operation %s for %s already completed", op->id, op->target); } else if (op->state == st_exec) { crm_debug("Operation %s for %s already in progress", op->id, op->target); } else if (op->query_results) { crm_debug("Query %s for %s complete: %d", op->id, op->target, op->state); call_remote_stonith(op, NULL); } else { crm_debug("Query %s for %s timed out: %d", op->id, op->target, op->state); if (op->op_timer_total) { g_source_remove(op->op_timer_total); op->op_timer_total = 0; } remote_op_timeout(op); } return FALSE; } static gboolean topology_is_empty(stonith_topology_t *tp) { int i; if (tp == NULL) { return TRUE; } for (i = 0; i < ST_LEVEL_MAX; i++) { if (tp->levels[i] != NULL) { return FALSE; } } return TRUE; } static void add_required_device(remote_fencing_op_t * op, const char *device) { GListPtr match = g_list_find_custom(op->required_list, device, sort_strings); if (match) { /* device already marked required */ return; } op->required_list = g_list_prepend(op->required_list, strdup(device)); /* make sure the required devices is in the current list of devices to be executed */ if (op->devices_list) { GListPtr match = g_list_find_custom(op->devices_list, device, sort_strings); if (match == NULL) { op->devices_list = g_list_append(op->devices_list, strdup(device)); } } } /* deep copy the device list */ static void set_op_device_list(remote_fencing_op_t * op, GListPtr devices) { GListPtr lpc = NULL; if (op->devices_list) { g_list_free_full(op->devices_list, free); op->devices_list = NULL; } for (lpc = devices; lpc != NULL; lpc = lpc->next) { op->devices_list = g_list_append(op->devices_list, strdup(lpc->data)); } /* tack on whatever required devices have not been executed * to the end of the current devices list. This ensures that * the required devices will get executed regardless of what topology * level they exist at. */ for (lpc = op->required_list; lpc != NULL; lpc = lpc->next) { GListPtr match = g_list_find_custom(op->devices_list, lpc->data, sort_strings); if (match == NULL) { op->devices_list = g_list_append(op->devices_list, strdup(lpc->data)); } } op->devices = op->devices_list; } static int stonith_topology_next(remote_fencing_op_t * op) { stonith_topology_t *tp = NULL; if (op->target) { /* Queries don't have a target set */ tp = g_hash_table_lookup(topology, op->target); } if (topology_is_empty(tp)) { return pcmk_ok; } set_bit(op->call_options, st_opt_topology); do { op->level++; } while (op->level < ST_LEVEL_MAX && tp->levels[op->level] == NULL); if (op->level < ST_LEVEL_MAX) { crm_trace("Attempting fencing level %d for %s (%d devices) - %s@%s.%.8s", op->level, op->target, g_list_length(tp->levels[op->level]), op->client_name, op->originator, op->id); set_op_device_list(op, tp->levels[op->level]); return pcmk_ok; } crm_notice("All fencing options to fence %s for %s@%s.%.8s failed", op->target, op->client_name, op->originator, op->id); return -EINVAL; } /*! * \brief Check to see if this operation is a duplicate of another in flight * operation. If so merge this operation into the inflight operation, and mark * it as a duplicate. */ static void merge_duplicates(remote_fencing_op_t * op) { GHashTableIter iter; remote_fencing_op_t *other = NULL; time_t now = time(NULL); g_hash_table_iter_init(&iter, remote_op_list); while (g_hash_table_iter_next(&iter, NULL, (void **)&other)) { crm_node_t *peer = NULL; if (other->state > st_exec) { /* Must be in-progress */ continue; } else if (safe_str_neq(op->target, other->target)) { /* Must be for the same node */ continue; } else if (safe_str_neq(op->action, other->action)) { crm_trace("Must be for the same action: %s vs. ", op->action, other->action); continue; } else if (safe_str_eq(op->client_name, other->client_name)) { crm_trace("Must be for different clients: %s", op->client_name); continue; } else if (safe_str_eq(other->target, other->originator)) { crm_trace("Can't be a suicide operation: %s", other->target); continue; } peer = crm_get_peer(0, other->originator); if(fencing_peer_active(peer) == FALSE) { crm_notice("Failing stonith action %s for node %s originating from %s@%s.%.8s: Originator is dead", other->action, other->target, other->client_name, other->originator, other->id); other->state = st_failed; continue; } else if(other->total_timeout > 0 && now > (other->total_timeout + other->created)) { crm_info("Stonith action %s for node %s originating from %s@%s.%.8s is too old: %d vs. %d + %d", other->action, other->target, other->client_name, other->originator, other->id, now, other->created, other->total_timeout); continue; } /* There is another in-flight request to fence the same host * Piggyback on that instead. If it fails, so do we. */ other->duplicates = g_list_append(other->duplicates, op); if (other->total_timeout == 0) { crm_trace("Making a best-guess as to the timeout used"); other->total_timeout = op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, NULL, op->base_timeout); } crm_notice ("Merging stonith action %s for node %s originating from client %s.%.8s with identical request from %s@%s.%.8s (%ds)", op->action, op->target, op->client_name, op->id, other->client_name, other->originator, other->id, other->total_timeout); report_timeout_period(op, other->total_timeout); op->state = st_duplicate; } } static uint32_t fencing_active_peers(void) { uint32_t count = 0; crm_node_t *entry; GHashTableIter gIter; g_hash_table_iter_init(&gIter, crm_peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { if(fencing_peer_active(entry)) { count++; } } return count; } int stonith_manual_ack(xmlNode * msg, remote_fencing_op_t * op) { xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR); op->state = st_done; op->completed = time(NULL); op->delegate = strdup("a human"); crm_notice("Injecting manual confirmation that %s is safely off/down", crm_element_value(dev, F_STONITH_TARGET)); remote_op_done(op, msg, pcmk_ok, FALSE); /* Replies are sent via done_cb->stonith_send_async_reply()->do_local_reply() */ return -EINPROGRESS; } /*! * \internal * \brief Create a new remote stonith op * \param client, he local stonith client id that initaited the operation * \param request, The request from the client that started the operation * \param peer, Is this operation owned by another stonith peer? Operations * owned by other peers are stored on all the stonith nodes, but only the * owner executes the operation. All the nodes get the results to the operation * once the owner finishes executing it. */ void * create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer) { remote_fencing_op_t *op = NULL; xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE); int call_options = 0; if (remote_op_list == NULL) { remote_op_list = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_remote_op); } /* If this operation is owned by another node, check to make * sure we haven't already created this operation. */ if (peer && dev) { const char *op_id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID); CRM_CHECK(op_id != NULL, return NULL); op = g_hash_table_lookup(remote_op_list, op_id); if (op) { crm_debug("%s already exists", op_id); return op; } } op = calloc(1, sizeof(remote_fencing_op_t)); crm_element_value_int(request, F_STONITH_TIMEOUT, (int *)&(op->base_timeout)); if (peer && dev) { op->id = crm_element_value_copy(dev, F_STONITH_REMOTE_OP_ID); } else { op->id = crm_generate_uuid(); } g_hash_table_replace(remote_op_list, op->id, op); CRM_LOG_ASSERT(g_hash_table_lookup(remote_op_list, op->id) != NULL); crm_trace("Created %s", op->id); op->state = st_query; op->replies_expected = fencing_active_peers(); op->action = crm_element_value_copy(dev, F_STONITH_ACTION); op->originator = crm_element_value_copy(dev, F_STONITH_ORIGIN); op->delegate = crm_element_value_copy(dev, F_STONITH_DELEGATE); /* May not be set */ op->created = time(NULL); if (op->originator == NULL) { /* Local or relayed request */ op->originator = strdup(stonith_our_uname); } CRM_LOG_ASSERT(client != NULL); if (client) { op->client_id = strdup(client); } op->client_name = crm_element_value_copy(request, F_STONITH_CLIENTNAME); op->target = crm_element_value_copy(dev, F_STONITH_TARGET); op->request = copy_xml(request); /* TODO: Figure out how to avoid this */ crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); op->call_options = call_options; crm_element_value_int(request, F_STONITH_CALLID, (int *)&(op->client_callid)); crm_trace("%s new stonith op: %s - %s of %s for %s", (peer && dev) ? "Recorded" : "Generated", op->id, op->action, op->target, op->client_name); if (op->call_options & st_opt_cs_nodeid) { int nodeid = crm_atoi(op->target, NULL); crm_node_t *node = crm_get_peer(nodeid, NULL); /* Ensure the conversion only happens once */ op->call_options &= ~st_opt_cs_nodeid; if (node && node->uname) { free(op->target); op->target = strdup(node->uname); } else { crm_warn("Could not expand nodeid '%s' into a host name (%p)", op->target, node); } } /* check to see if this is a duplicate operation of another in-flight operation */ merge_duplicates(op); return op; } remote_fencing_op_t * initiate_remote_stonith_op(crm_client_t * client, xmlNode * request, gboolean manual_ack) { int query_timeout = 0; xmlNode *query = NULL; const char *client_id = NULL; remote_fencing_op_t *op = NULL; if (client) { client_id = client->id; } else { client_id = crm_element_value(request, F_STONITH_CLIENTID); } CRM_LOG_ASSERT(client_id != NULL); op = create_remote_stonith_op(client_id, request, FALSE); op->owner = TRUE; if (manual_ack) { crm_notice("Initiating manual confirmation for %s: %s", op->target, op->id); return op; } CRM_CHECK(op->action, return NULL); if (stonith_topology_next(op) != pcmk_ok) { op->state = st_failed; } switch (op->state) { case st_failed: crm_warn("Initiation of remote operation %s for %s: failed (%s)", op->action, op->target, op->id); remote_op_done(op, NULL, -EINVAL, FALSE); return op; case st_duplicate: crm_info("Initiating remote operation %s for %s: %s (duplicate)", op->action, op->target, op->id); return op; default: crm_notice("Initiating remote operation %s for %s: %s (%d)", op->action, op->target, op->id, op->state); } query = stonith_create_op(op->client_callid, op->id, STONITH_OP_QUERY, NULL, 0); crm_xml_add(query, F_STONITH_REMOTE_OP_ID, op->id); crm_xml_add(query, F_STONITH_TARGET, op->target); crm_xml_add(query, F_STONITH_ACTION, op->action); crm_xml_add(query, F_STONITH_ORIGIN, op->originator); crm_xml_add(query, F_STONITH_CLIENTID, op->client_id); crm_xml_add(query, F_STONITH_CLIENTNAME, op->client_name); crm_xml_add_int(query, F_STONITH_TIMEOUT, op->base_timeout); crm_xml_add_int(query, F_STONITH_CALLOPTS, op->call_options); send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE); free_xml(query); query_timeout = op->base_timeout * TIMEOUT_MULTIPLY_FACTOR; op->query_timer = g_timeout_add((1000 * query_timeout), remote_op_query_timeout, op); return op; } enum find_best_peer_options { /*! Skip checking the target peer for capable fencing devices */ FIND_PEER_SKIP_TARGET = 0x0001, /*! Only check the target peer for capable fencing devices */ FIND_PEER_TARGET_ONLY = 0x0002, /*! Skip peers and devices that are not verified */ FIND_PEER_VERIFIED_ONLY = 0x0004, }; static st_query_result_t * find_best_peer(const char *device, remote_fencing_op_t * op, enum find_best_peer_options options) { GListPtr iter = NULL; gboolean verified_devices_only = (options & FIND_PEER_VERIFIED_ONLY) ? TRUE : FALSE; if (!device && is_set(op->call_options, st_opt_topology)) { return NULL; } for (iter = op->query_results; iter != NULL; iter = iter->next) { st_query_result_t *peer = iter->data; crm_trace("Testing result from %s for %s with %d devices: %d %x", peer->host, op->target, peer->devices, peer->tried, options); if ((options & FIND_PEER_SKIP_TARGET) && safe_str_eq(peer->host, op->target)) { continue; } if ((options & FIND_PEER_TARGET_ONLY) && safe_str_neq(peer->host, op->target)) { continue; } if (is_set(op->call_options, st_opt_topology)) { /* Do they have the next device of the current fencing level? */ GListPtr match = NULL; if (verified_devices_only && !g_hash_table_lookup(peer->verified_devices, device)) { continue; } match = g_list_find_custom(peer->device_list, device, sort_strings); if (match) { crm_trace("Removing %s from %s (%d remaining)", (char *)match->data, peer->host, g_list_length(peer->device_list)); peer->device_list = g_list_remove(peer->device_list, match->data); return peer; } } else if (peer->devices > 0 && peer->tried == FALSE) { if (verified_devices_only && !g_hash_table_size(peer->verified_devices)) { continue; } /* No topology: Use the current best peer */ crm_trace("Simple fencing"); return peer; } } return NULL; } static st_query_result_t * stonith_choose_peer(remote_fencing_op_t * op) { const char *device = NULL; st_query_result_t *peer = NULL; uint32_t active = fencing_active_peers(); do { if (op->devices) { device = op->devices->data; crm_trace("Checking for someone to fence %s with %s", op->target, device); } else { crm_trace("Checking for someone to fence %s", op->target); } peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET|FIND_PEER_VERIFIED_ONLY); if (peer) { crm_trace("Found verified peer %s for %s", peer->host, device?device:""); return peer; } if(op->query_timer != 0 && op->replies < QB_MIN(op->replies_expected, active)) { crm_trace("Waiting before looking for unverified devices to fence %s", op->target); return NULL; } peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET); if (peer) { crm_trace("Found best unverified peer %s", peer->host); return peer; } peer = find_best_peer(device, op, FIND_PEER_TARGET_ONLY); if(peer) { crm_trace("%s will fence itself", peer->host); return peer; } /* Try the next fencing level if there is one */ } while (is_set(op->call_options, st_opt_topology) && stonith_topology_next(op) == pcmk_ok); crm_notice("Couldn't find anyone to fence %s with %s", op->target, device?device:""); return NULL; } static int get_device_timeout(st_query_result_t * peer, const char *device, int default_timeout) { gpointer res; if (!peer || !device) { return default_timeout; } res = g_hash_table_lookup(peer->custom_action_timeouts, device); return res ? GPOINTER_TO_INT(res) : default_timeout; } static int get_peer_timeout(st_query_result_t * peer, int default_timeout) { int total_timeout = 0; GListPtr cur = NULL; for (cur = peer->device_list; cur; cur = cur->next) { total_timeout += get_device_timeout(peer, cur->data, default_timeout); } return total_timeout ? total_timeout : default_timeout; } static int get_op_total_timeout(remote_fencing_op_t * op, st_query_result_t * chosen_peer, int default_timeout) { stonith_topology_t *tp = g_hash_table_lookup(topology, op->target); int total_timeout = 0; if (is_set(op->call_options, st_opt_topology) && tp) { int i; GListPtr device_list = NULL; GListPtr iter = NULL; /* Yep, this looks scary, nested loops all over the place. * Here is what is going on. * Loop1: Iterate through fencing levels. * Loop2: If a fencing level has devices, loop through each device * Loop3: For each device in a fencing level, see what peer owns it * and what that peer has reported the timeout is for the device. */ for (i = 0; i < ST_LEVEL_MAX; i++) { if (!tp->levels[i]) { continue; } for (device_list = tp->levels[i]; device_list; device_list = device_list->next) { for (iter = op->query_results; iter != NULL; iter = iter->next) { st_query_result_t *peer = iter->data; if (g_list_find_custom(peer->device_list, device_list->data, sort_strings)) { total_timeout += get_device_timeout(peer, device_list->data, default_timeout); break; } } /* End Loop3: match device with peer that owns device, find device's timeout period */ } /* End Loop2: iterate through devices at a specific level */ } /*End Loop1: iterate through fencing levels */ } else if (chosen_peer) { total_timeout = get_peer_timeout(chosen_peer, default_timeout); } else { total_timeout = default_timeout; } return total_timeout ? total_timeout : default_timeout; } static void report_timeout_period(remote_fencing_op_t * op, int op_timeout) { GListPtr iter = NULL; xmlNode *update = NULL; const char *client_node = NULL; const char *client_id = NULL; const char *call_id = NULL; if (op->call_options & st_opt_sync_call) { /* There is no reason to report the timeout for a syncronous call. It * is impossible to use the reported timeout to do anything when the client * is blocking for the response. This update is only important for * async calls that require a callback to report the results in. */ return; } else if (!op->request) { return; } crm_trace("Reporting timeout for %s.%.8s", op->client_name, op->id); client_node = crm_element_value(op->request, F_STONITH_CLIENTNODE); call_id = crm_element_value(op->request, F_STONITH_CALLID); client_id = crm_element_value(op->request, F_STONITH_CLIENTID); if (!client_node || !call_id || !client_id) { return; } if (safe_str_eq(client_node, stonith_our_uname)) { /* The client is connected to this node, send the update direclty to them */ do_stonith_async_timeout_update(client_id, call_id, op_timeout); return; } /* The client is connected to another node, relay this update to them */ update = stonith_create_op(op->client_callid, op->id, STONITH_OP_TIMEOUT_UPDATE, NULL, 0); crm_xml_add(update, F_STONITH_REMOTE_OP_ID, op->id); crm_xml_add(update, F_STONITH_CLIENTID, client_id); crm_xml_add(update, F_STONITH_CALLID, call_id); crm_xml_add_int(update, F_STONITH_TIMEOUT, op_timeout); send_cluster_message(crm_get_peer(0, client_node), crm_msg_stonith_ng, update, FALSE); free_xml(update); for (iter = op->duplicates; iter != NULL; iter = iter->next) { remote_fencing_op_t *dup = iter->data; crm_trace("Reporting timeout for duplicate %s.%.8s", dup->client_name, dup->id); report_timeout_period(iter->data, op_timeout); } } void call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer) { const char *device = NULL; int timeout = op->base_timeout; crm_trace("State for %s.%.8s: %s %d", op->target, op->client_name, op->id, op->state); if (peer == NULL && !is_set(op->call_options, st_opt_topology)) { peer = stonith_choose_peer(op); } if (!op->op_timer_total) { int total_timeout = get_op_total_timeout(op, peer, op->base_timeout); op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * total_timeout; op->op_timer_total = g_timeout_add(1000 * op->total_timeout, remote_op_timeout, op); report_timeout_period(op, op->total_timeout); crm_info("Total remote op timeout set to %d for fencing of node %s for %s.%.8s", total_timeout, op->target, op->client_name, op->id); } if (is_set(op->call_options, st_opt_topology) && op->devices) { /* Ignore any preference, they might not have the device we need */ /* When using topology, the stonith_choose_peer function pops off * the peer from the op's query results. Make sure to calculate * the op_timeout before calling this function when topology is in use */ peer = stonith_choose_peer(op); device = op->devices->data; timeout = get_device_timeout(peer, device, op->base_timeout); } if (peer) { int timeout_one = 0; xmlNode *remote_op = stonith_create_op(op->client_callid, op->id, STONITH_OP_FENCE, NULL, 0); crm_xml_add(remote_op, F_STONITH_REMOTE_OP_ID, op->id); crm_xml_add(remote_op, F_STONITH_TARGET, op->target); crm_xml_add(remote_op, F_STONITH_ACTION, op->action); crm_xml_add(remote_op, F_STONITH_ORIGIN, op->originator); crm_xml_add(remote_op, F_STONITH_CLIENTID, op->client_id); crm_xml_add(remote_op, F_STONITH_CLIENTNAME, op->client_name); crm_xml_add_int(remote_op, F_STONITH_TIMEOUT, timeout); crm_xml_add_int(remote_op, F_STONITH_CALLOPTS, op->call_options); if (device) { timeout_one = TIMEOUT_MULTIPLY_FACTOR * get_device_timeout(peer, device, op->base_timeout); crm_info("Requesting that %s perform op %s %s with %s for %s (%ds)", peer->host, op->action, op->target, device, op->client_name, timeout_one); crm_xml_add(remote_op, F_STONITH_DEVICE, device); crm_xml_add(remote_op, F_STONITH_MODE, "slave"); } else { timeout_one = TIMEOUT_MULTIPLY_FACTOR * get_peer_timeout(peer, op->base_timeout); crm_info("Requesting that %s perform op %s %s for %s (%ds)", peer->host, op->action, op->target, op->client_name, timeout_one); crm_xml_add(remote_op, F_STONITH_MODE, "smart"); /* TODO: We should probably look into peer->device_list to verify watchdog is going to be in use */ if(stonith_watchdog_timeout_ms && safe_str_eq(peer->host, op->target) && safe_str_neq(op->action, "on")) { op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op); } } op->state = st_exec; if (op->op_timer_one) { g_source_remove(op->op_timer_one); } if(device && stonith_watchdog_timeout_ms && safe_str_eq(device, "watchdog")) { op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op); } else { op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op); } send_cluster_message(crm_get_peer(0, peer->host), crm_msg_stonith_ng, remote_op, FALSE); peer->tried = TRUE; free_xml(remote_op); return; } else if (op->owner == FALSE) { crm_err("The termination of %s for %s is not ours to control", op->target, op->client_name); } else if (op->query_timer == 0) { /* We've exhausted all available peers */ crm_info("No remaining peers capable of terminating %s for %s (%d)", op->target, op->client_name, op->state); CRM_LOG_ASSERT(op->state < st_done); remote_op_timeout(op); } else if(op->replies >= op->replies_expected || op->replies >= fencing_active_peers()) { int rc = -EHOSTUNREACH; /* if the operation never left the query state, * but we have all the expected replies, then no devices * are available to execute the fencing operation. */ if(stonith_watchdog_timeout_ms && (device == NULL || safe_str_eq(device, "watchdog"))) { crm_notice("Waiting %ds for %s to self-terminate for %s.%.8s (%p)", stonith_watchdog_timeout_ms/1000, op->target, op->client_name, op->id, device); op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op); return; } if (op->state == st_query) { crm_info("None of the %d peers have devices capable of terminating %s for %s (%d)", op->replies, op->target, op->client_name, op->state); rc = -ENODEV; } else { crm_info("None of the %d peers are capable of terminating %s for %s (%d)", op->replies, op->target, op->client_name, op->state); } op->state = st_failed; remote_op_done(op, NULL, rc, FALSE); } else if (device) { crm_info("Waiting for additional peers capable of terminating %s with %s for %s.%.8s", op->target, device, op->client_name, op->id); } else { crm_info("Waiting for additional peers capable of terminating %s for %s%.8s", op->target, op->client_name, op->id); } } static gint sort_peers(gconstpointer a, gconstpointer b) { const st_query_result_t *peer_a = a; const st_query_result_t *peer_b = a; if (peer_a->devices > peer_b->devices) { return -1; } else if (peer_a->devices > peer_b->devices) { return 1; } return 0; } /*! * \internal * \brief Determine if all the devices in the topology are found or not */ static gboolean all_topology_devices_found(remote_fencing_op_t * op) { GListPtr device = NULL; GListPtr iter = NULL; GListPtr match = NULL; stonith_topology_t *tp = NULL; gboolean skip_target = FALSE; int i; tp = g_hash_table_lookup(topology, op->target); if (!tp) { return FALSE; } if (safe_str_eq(op->action, "off") || safe_str_eq(op->action, "reboot")) { /* Don't count the devices on the target node if we are killing * the target node. */ skip_target = TRUE; } for (i = 0; i < ST_LEVEL_MAX; i++) { for (device = tp->levels[i]; device; device = device->next) { match = FALSE; for (iter = op->query_results; iter != NULL; iter = iter->next) { st_query_result_t *peer = iter->data; if (skip_target && safe_str_eq(peer->host, op->target)) { continue; } match = g_list_find_custom(peer->device_list, device->data, sort_strings); } if (!match) { return FALSE; } } } return TRUE; } int process_remote_stonith_query(xmlNode * msg) { int devices = 0; gboolean host_is_target = FALSE; const char *id = NULL; const char *host = NULL; remote_fencing_op_t *op = NULL; st_query_result_t *result = NULL; uint32_t active = fencing_active_peers(); xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR); xmlNode *child = NULL; CRM_CHECK(dev != NULL, return -EPROTO); id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID); CRM_CHECK(id != NULL, return -EPROTO); dev = get_xpath_object("//@st-available-devices", msg, LOG_ERR); CRM_CHECK(dev != NULL, return -EPROTO); crm_element_value_int(dev, "st-available-devices", &devices); op = g_hash_table_lookup(remote_op_list, id); if (op == NULL) { crm_debug("Unknown or expired remote op: %s", id); return -EOPNOTSUPP; } op->replies++; host = crm_element_value(msg, F_ORIG); host_is_target = safe_str_eq(host, op->target); if (devices <= 0) { /* If we're doing 'known' then we might need to fire anyway */ crm_trace("Query result %d of %d from %s for %s/%s (%d devices) %s", op->replies, op->replies_expected, host, op->target, op->action, devices, id); if(op->state == st_query && (op->replies >= op->replies_expected || op->replies >= active)) { crm_info("All queries have arrived, continuing (%d, %d, %d, %s)", op->replies_expected, active, op->replies, id); call_remote_stonith(op, NULL); } return pcmk_ok; } crm_info("Query result %d of %d from %s for %s/%s (%d devices) %s", op->replies, op->replies_expected, host, op->target, op->action, devices, id); result = calloc(1, sizeof(st_query_result_t)); result->host = strdup(host); result->devices = devices; result->custom_action_timeouts = g_hash_table_new_full(crm_str_hash, g_str_equal, free, NULL); result->verified_devices = g_hash_table_new_full(crm_str_hash, g_str_equal, free, NULL); for (child = __xml_first_child(dev); child != NULL; child = __xml_next(child)) { const char *device = ID(child); int action_timeout = 0; int verified = 0; int required = 0; if (device) { result->device_list = g_list_prepend(result->device_list, strdup(device)); crm_element_value_int(child, F_STONITH_ACTION_TIMEOUT, &action_timeout); crm_element_value_int(child, F_STONITH_DEVICE_VERIFIED, &verified); crm_element_value_int(child, F_STONITH_DEVICE_REQUIRED, &required); if (action_timeout) { crm_trace("Peer %s with device %s returned action timeout %d", result->host, device, action_timeout); g_hash_table_insert(result->custom_action_timeouts, strdup(device), GINT_TO_POINTER(action_timeout)); } if (verified) { crm_trace("Peer %s has confirmed a verified device %s", result->host, device); g_hash_table_insert(result->verified_devices, strdup(device), GINT_TO_POINTER(verified)); } if (required) { crm_trace("Peer %s requires device %s to execute for action %s", result->host, device, op->action); /* This matters when executing a topology. Required devices will get * executed regardless of their topology level. We use this for unfencing. */ add_required_device(op, device); } } } CRM_CHECK(devices == g_list_length(result->device_list), crm_err("Mis-match: Query claimed to have %d devices but %d found", devices, g_list_length(result->device_list))); op->query_results = g_list_insert_sorted(op->query_results, result, sort_peers); if (is_set(op->call_options, st_opt_topology)) { /* If we start the fencing before all the topology results are in, * it is possible fencing levels will be skipped because of the missing * query results. */ if (op->state == st_query && all_topology_devices_found(op)) { /* All the query results are in for the topology, start the fencing ops. */ crm_trace("All topology devices found"); call_remote_stonith(op, result); } else if(op->state == st_query && (op->replies >= op->replies_expected || op->replies >= active)) { crm_info("All topology queries have arrived, continuing (%d, %d, %d) ", op->replies_expected, active, op->replies); call_remote_stonith(op, NULL); } } else if (op->state == st_query) { /* We have a result for a non-topology fencing op that looks promising, * go ahead and start fencing before query timeout */ if (host_is_target == FALSE && g_hash_table_size(result->verified_devices)) { /* we have a verified device living on a peer that is not the target */ crm_trace("Found %d verified devices", g_hash_table_size(result->verified_devices)); call_remote_stonith(op, result); } else if(op->replies >= op->replies_expected || op->replies >= active) { crm_info("All queries have arrived, continuing (%d, %d, %d) ", op->replies_expected, active, op->replies); call_remote_stonith(op, NULL); } else { crm_trace("Waiting for more peer results before launching fencing operation"); } } else if (op->state == st_done) { crm_info("Discarding query result from %s (%d devices): Operation is in state %d", result->host, result->devices, op->state); } return pcmk_ok; } int process_remote_stonith_exec(xmlNode * msg) { int rc = 0; const char *id = NULL; const char *device = NULL; remote_fencing_op_t *op = NULL; xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR); CRM_CHECK(dev != NULL, return -EPROTO); id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID); CRM_CHECK(id != NULL, return -EPROTO); dev = get_xpath_object("//@" F_STONITH_RC, msg, LOG_ERR); CRM_CHECK(dev != NULL, return -EPROTO); crm_element_value_int(dev, F_STONITH_RC, &rc); device = crm_element_value(dev, F_STONITH_DEVICE); if (remote_op_list) { op = g_hash_table_lookup(remote_op_list, id); } if (op == NULL && rc == pcmk_ok) { /* Record successful fencing operations */ const char *client_id = crm_element_value(dev, F_STONITH_CLIENTID); op = create_remote_stonith_op(client_id, dev, TRUE); } if (op == NULL) { /* Could be for an event that began before we started */ /* TODO: Record the op for later querying */ crm_info("Unknown or expired remote op: %s", id); return -EOPNOTSUPP; } if (op->devices && device && safe_str_neq(op->devices->data, device)) { crm_err ("Received outdated reply for device %s (instead of %s) to %s node %s. Operation already timed out at remote level.", device, op->devices->data, op->action, op->target); return rc; } if (safe_str_eq(crm_element_value(msg, F_SUBTYPE), "broadcast")) { crm_debug("Marking call to %s for %s on behalf of %s@%s.%.8s: %s (%d)", op->action, op->target, op->client_name, op->id, op->originator, pcmk_strerror(rc), rc); if (rc == pcmk_ok) { op->state = st_done; } else { op->state = st_failed; } remote_op_done(op, msg, rc, FALSE); return pcmk_ok; } else if (safe_str_neq(op->originator, stonith_our_uname)) { /* If this isn't a remote level broadcast, and we are not the * originator of the operation, we should not be receiving this msg. */ crm_err ("%s received non-broadcast fencing result for operation it does not own (device %s targeting %s)", stonith_our_uname, device, op->target); return rc; } if (is_set(op->call_options, st_opt_topology)) { const char *device = crm_element_value(msg, F_STONITH_DEVICE); crm_notice("Call to %s for %s on behalf of %s@%s: %s (%d)", device, op->target, op->client_name, op->originator, pcmk_strerror(rc), rc); /* We own the op, and it is complete. broadcast the result to all nodes * and notify our local clients. */ if (op->state == st_done) { remote_op_done(op, msg, rc, FALSE); return rc; } /* An operation completed succesfully but has not yet been marked as done. * Continue the topology if more devices exist at the current level, otherwise * mark as done. */ if (rc == pcmk_ok) { GListPtr required_match = g_list_find_custom(op->required_list, device, sort_strings); if (op->devices) { /* Success, are there any more? */ op->devices = op->devices->next; } if (required_match) { op->required_list = g_list_remove(op->required_list, required_match->data); } /* if no more devices at this fencing level, we are done, * else we need to contine with executing the next device in the list */ if (op->devices == NULL) { crm_trace("Marking complex fencing op for %s as complete", op->target); op->state = st_done; remote_op_done(op, msg, rc, FALSE); return rc; } } else { /* This device failed, time to try another topology level. If no other * levels are available, mark this operation as failed and report results. */ if (stonith_topology_next(op) != pcmk_ok) { op->state = st_failed; remote_op_done(op, msg, rc, FALSE); return rc; } } } else if (rc == pcmk_ok && op->devices == NULL) { crm_trace("All done for %s", op->target); op->state = st_done; remote_op_done(op, msg, rc, FALSE); return rc; } else if (rc == -ETIME && op->devices == NULL) { /* If the operation timed out don't bother retrying other peers. */ op->state = st_failed; remote_op_done(op, msg, rc, FALSE); return rc; } else { /* fall-through and attempt other fencing action using another peer */ } /* Retry on failure or execute the rest of the topology */ crm_trace("Next for %s on behalf of %s@%s (rc was %d)", op->target, op->originator, op->client_name, rc); call_remote_stonith(op, NULL); return rc; } int stonith_fence_history(xmlNode * msg, xmlNode ** output) { int rc = 0; const char *target = NULL; xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_TRACE); if (dev) { int options = 0; target = crm_element_value(dev, F_STONITH_TARGET); crm_element_value_int(msg, F_STONITH_CALLOPTS, &options); if (target && (options & st_opt_cs_nodeid)) { int nodeid = crm_atoi(target, NULL); crm_node_t *node = crm_get_peer(nodeid, NULL); if (node) { target = node->uname; } } } crm_trace("Looking for operations on %s in %p", target, remote_op_list); *output = create_xml_node(NULL, F_STONITH_HISTORY_LIST); if (remote_op_list) { GHashTableIter iter; remote_fencing_op_t *op = NULL; g_hash_table_iter_init(&iter, remote_op_list); while (g_hash_table_iter_next(&iter, NULL, (void **)&op)) { xmlNode *entry = NULL; if (target && strcmp(op->target, target) != 0) { continue; } rc = 0; crm_trace("Attaching op %s", op->id); entry = create_xml_node(*output, STONITH_OP_EXEC); crm_xml_add(entry, F_STONITH_TARGET, op->target); crm_xml_add(entry, F_STONITH_ACTION, op->action); crm_xml_add(entry, F_STONITH_ORIGIN, op->originator); crm_xml_add(entry, F_STONITH_DELEGATE, op->delegate); crm_xml_add(entry, F_STONITH_CLIENTNAME, op->client_name); crm_xml_add_int(entry, F_STONITH_DATE, op->completed); crm_xml_add_int(entry, F_STONITH_STATE, op->state); } } return rc; } gboolean stonith_check_fence_tolerance(int tolerance, const char *target, const char *action) { GHashTableIter iter; time_t now = time(NULL); remote_fencing_op_t *rop = NULL; crm_trace("tolerance=%d, remote_op_list=%p", tolerance, remote_op_list); if (tolerance <= 0 || !remote_op_list || target == NULL || action == NULL) { return FALSE; } g_hash_table_iter_init(&iter, remote_op_list); while (g_hash_table_iter_next(&iter, NULL, (void **)&rop)) { if (strcmp(rop->target, target) != 0) { continue; } else if (rop->state != st_done) { continue; } else if (strcmp(rop->action, action) != 0) { continue; } else if ((rop->completed + tolerance) < now) { continue; } crm_notice("Target %s was fenced (%s) less than %ds ago by %s on behalf of %s", target, action, tolerance, rop->delegate, rop->originator); return TRUE; } return FALSE; } diff --git a/include/crm/msg_xml.h b/include/crm/msg_xml.h index eb09b89a0d..d3c2643fc5 100644 --- a/include/crm/msg_xml.h +++ b/include/crm/msg_xml.h @@ -1,397 +1,400 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef XML_TAGS__H # define XML_TAGS__H # ifndef F_ORIG # define F_ORIG "src" # endif # ifndef F_SEQ # define F_SEQ "seq" # endif # ifndef F_SUBTYPE # define F_SUBTYPE "subt" # endif # ifndef F_TYPE # define F_TYPE "t" # endif # ifndef F_CLIENTNAME # define F_CLIENTNAME "cn" # endif # ifndef F_XML_TAGNAME # define F_XML_TAGNAME "__name__" # endif # ifndef T_CRM # define T_CRM "crmd" # endif # ifndef T_ATTRD # define T_ATTRD "attrd" # endif # define CIB_OPTIONS_FIRST "cib-bootstrap-options" # define F_CRM_DATA "crm_xml" # define F_CRM_TASK "crm_task" # define F_CRM_HOST_TO "crm_host_to" # define F_CRM_MSG_TYPE F_SUBTYPE # define F_CRM_SYS_TO "crm_sys_to" # define F_CRM_SYS_FROM "crm_sys_from" # define F_CRM_HOST_FROM F_ORIG # define F_CRM_REFERENCE XML_ATTR_REFERENCE # define F_CRM_VERSION XML_ATTR_VERSION # define F_CRM_ORIGIN "origin" # define F_CRM_USER "crm_user" # define F_CRM_JOIN_ID "join_id" # define F_CRM_ELECTION_ID "election-id" # define F_CRM_ELECTION_AGE_S "election-age-sec" # define F_CRM_ELECTION_AGE_US "election-age-nano-sec" # define F_CRM_ELECTION_OWNER "election-owner" # define F_CRM_TGRAPH "crm-tgraph" # define F_CRM_TGRAPH_INPUT "crm-tgraph-in" # define F_CRM_THROTTLE_MODE "crm-limit-mode" # define F_CRM_THROTTLE_MAX "crm-limit-max" /*---- Common tags/attrs */ # define XML_DIFF_MARKER "__crm_diff_marker__" # define XML_ATTR_TAGNAME F_XML_TAGNAME # define XML_TAG_CIB "cib" # define XML_TAG_FAILED "failed" # define XML_ATTR_CRM_VERSION "crm_feature_set" # define XML_ATTR_DIGEST "digest" # define XML_ATTR_VALIDATION "validate-with" # define XML_ATTR_QUORUM_PANIC "no-quorum-panic" # define XML_ATTR_HAVE_QUORUM "have-quorum" # define XML_ATTR_HAVE_WATCHDOG "have-watchdog" # define XML_ATTR_EXPECTED_VOTES "expected-quorum-votes" # define XML_ATTR_GENERATION "epoch" # define XML_ATTR_GENERATION_ADMIN "admin_epoch" # define XML_ATTR_NUMUPDATES "num_updates" # define XML_ATTR_TIMEOUT "timeout" # define XML_ATTR_ORIGIN "crm-debug-origin" # define XML_ATTR_TSTAMP "crm-timestamp" # define XML_CIB_ATTR_WRITTEN "cib-last-written" # define XML_ATTR_VERSION "version" # define XML_ATTR_DESC "description" # define XML_ATTR_ID "id" # define XML_ATTR_IDREF "id-ref" # define XML_ATTR_ID_LONG "long-id" # define XML_ATTR_TYPE "type" # define XML_ATTR_FILTER_TYPE "type-filter" # define XML_ATTR_FILTER_ID "id-filter" # define XML_ATTR_FILTER_PRIORITY "priority-filter" # define XML_ATTR_VERBOSE "verbose" # define XML_ATTR_OP "op" # define XML_ATTR_DC "is_dc" # define XML_ATTR_DC_UUID "dc-uuid" # define XML_ATTR_UPDATE_ORIG "update-origin" # define XML_ATTR_UPDATE_CLIENT "update-client" # define XML_ATTR_UPDATE_USER "update-user" # define XML_BOOLEAN_TRUE "true" # define XML_BOOLEAN_FALSE "false" # define XML_BOOLEAN_YES XML_BOOLEAN_TRUE # define XML_BOOLEAN_NO XML_BOOLEAN_FALSE # define XML_TAG_OPTIONS "options" /*---- top level tags/attrs */ # define XML_MSG_TAG "crm_message" # define XML_MSG_TAG_DATA "msg_data" # define XML_ATTR_REQUEST "request" # define XML_ATTR_RESPONSE "response" # define XML_ATTR_UNAME "uname" # define XML_ATTR_UUID "id" # define XML_ATTR_REFERENCE "reference" # define XML_FAIL_TAG_RESOURCE "failed_resource" # define XML_FAILRES_ATTR_RESID "resource_id" # define XML_FAILRES_ATTR_REASON "reason" # define XML_FAILRES_ATTR_RESSTATUS "resource_status" # define XML_CRM_TAG_PING "ping_response" # define XML_PING_ATTR_STATUS "result" # define XML_PING_ATTR_SYSFROM "crm_subsystem" # define XML_TAG_FRAGMENT "cib_fragment" # define XML_ATTR_RESULT "result" # define XML_ATTR_SECTION "section" # define XML_FAIL_TAG_CIB "failed_update" # define XML_FAILCIB_ATTR_ID "id" # define XML_FAILCIB_ATTR_OBJTYPE "object_type" # define XML_FAILCIB_ATTR_OP "operation" # define XML_FAILCIB_ATTR_REASON "reason" /*---- CIB specific tags/attrs */ # define XML_CIB_TAG_SECTION_ALL "all" # define XML_CIB_TAG_CONFIGURATION "configuration" # define XML_CIB_TAG_STATUS "status" # define XML_CIB_TAG_RESOURCES "resources" # define XML_CIB_TAG_NODES "nodes" # define XML_CIB_TAG_DOMAINS "domains" # define XML_CIB_TAG_CONSTRAINTS "constraints" # define XML_CIB_TAG_CRMCONFIG "crm_config" # define XML_CIB_TAG_OPCONFIG "op_defaults" # define XML_CIB_TAG_RSCCONFIG "rsc_defaults" # define XML_CIB_TAG_ACLS "acls" # define XML_CIB_TAG_STATE "node_state" # define XML_CIB_TAG_NODE "node" # define XML_CIB_TAG_DOMAIN "domain" # define XML_CIB_TAG_CONSTRAINT "constraint" # define XML_CIB_TAG_NVPAIR "nvpair" # define XML_CIB_TAG_PROPSET "cluster_property_set" # define XML_TAG_ATTR_SETS "instance_attributes" # define XML_TAG_META_SETS "meta_attributes" # define XML_TAG_ATTRS "attributes" # define XML_TAG_PARAMS "parameters" # define XML_TAG_PARAM "param" # define XML_TAG_UTILIZATION "utilization" # define XML_TAG_RESOURCE_REF "resource_ref" # define XML_CIB_TAG_RESOURCE "primitive" # define XML_CIB_TAG_GROUP "group" # define XML_CIB_TAG_INCARNATION "clone" # define XML_CIB_TAG_MASTER "master" # define XML_CIB_TAG_RSC_TEMPLATE "template" # define XML_RSC_ATTR_RESTART "restart-type" # define XML_RSC_ATTR_ORDERED "ordered" # define XML_RSC_ATTR_INTERLEAVE "interleave" # define XML_RSC_ATTR_INCARNATION "clone" # define XML_RSC_ATTR_INCARNATION_MAX "clone-max" # define XML_RSC_ATTR_INCARNATION_NODEMAX "clone-node-max" # define XML_RSC_ATTR_MASTER_MAX "master-max" # define XML_RSC_ATTR_MASTER_NODEMAX "master-node-max" # define XML_RSC_ATTR_STATE "clone-state" # define XML_RSC_ATTR_MANAGED "is-managed" # define XML_RSC_ATTR_TARGET_ROLE "target-role" # define XML_RSC_ATTR_UNIQUE "globally-unique" # define XML_RSC_ATTR_NOTIFY "notify" # define XML_RSC_ATTR_STICKINESS "resource-stickiness" # define XML_RSC_ATTR_FAIL_STICKINESS "migration-threshold" # define XML_RSC_ATTR_FAIL_TIMEOUT "failure-timeout" # define XML_RSC_ATTR_MULTIPLE "multiple-active" # define XML_RSC_ATTR_PRIORITY "priority" # define XML_RSC_ATTR_REQUIRES "requires" # define XML_RSC_ATTR_PROVIDES "provides" # define XML_RSC_ATTR_CONTAINER "container" # define XML_RSC_ATTR_INTERNAL_RSC "internal_rsc" # define XML_RSC_ATTR_MAINTENANCE "maintenance" # define XML_RSC_ATTR_REMOTE_NODE "remote-node" # define XML_OP_ATTR_ON_FAIL "on-fail" # define XML_OP_ATTR_START_DELAY "start-delay" # define XML_OP_ATTR_ALLOW_MIGRATE "allow-migrate" # define XML_OP_ATTR_DEPENDENT "dependent-on" # define XML_OP_ATTR_ORIGIN "interval-origin" # define XML_OP_ATTR_PENDING "record-pending" # define XML_CIB_TAG_LRM "lrm" # define XML_LRM_TAG_RESOURCES "lrm_resources" # define XML_LRM_TAG_RESOURCE "lrm_resource" # define XML_LRM_TAG_AGENTS "lrm_agents" # define XML_LRM_TAG_AGENT "lrm_agent" # define XML_LRM_TAG_RSC_OP "lrm_rsc_op" # define XML_AGENT_ATTR_CLASS "class" # define XML_AGENT_ATTR_PROVIDER "provider" # define XML_LRM_TAG_ATTRIBUTES "attributes" # define XML_CIB_ATTR_REPLACE "replace" # define XML_CIB_ATTR_SOURCE "source" # define XML_CIB_ATTR_HEALTH "health" # define XML_CIB_ATTR_WEIGHT "weight" # define XML_CIB_ATTR_PRIORITY "priority" # define XML_CIB_ATTR_CLEAR "clear_on" # define XML_CIB_ATTR_SOURCE "source" # define XML_NODE_JOIN_STATE "join" # define XML_NODE_EXPECTED "expected" # define XML_NODE_IN_CLUSTER "in_ccm" # define XML_NODE_IS_PEER "crmd" # define XML_NODE_IS_REMOTE "remote_node" # define XML_CIB_ATTR_SHUTDOWN "shutdown" # define XML_CIB_ATTR_STONITH "stonith" # define XML_LRM_ATTR_INTERVAL "interval" # define XML_LRM_ATTR_TASK "operation" # define XML_LRM_ATTR_TASK_KEY "operation_key" # define XML_LRM_ATTR_TARGET "on_node" /*! used for remote nodes. * For remote nodes the action is routed to the 'on_node' * node location, and then from there if 'exec_on' is set * the host will execute the action on the remote node * it controls. */ # define XML_LRM_ATTR_ROUTER_NODE "router_node" # define XML_LRM_ATTR_TARGET_UUID "on_node_uuid" # define XML_LRM_ATTR_RSCID "rsc-id" # define XML_LRM_ATTR_OPSTATUS "op-status" # define XML_LRM_ATTR_RC "rc-code" # define XML_LRM_ATTR_CALLID "call-id" # define XML_LRM_ATTR_OP_DIGEST "op-digest" # define XML_LRM_ATTR_OP_RESTART "op-force-restart" # define XML_LRM_ATTR_RESTART_DIGEST "op-restart-digest" # define XML_LRM_ATTR_EXIT_REASON "exit-reason" # define XML_RSC_OP_LAST_CHANGE "last-rc-change" # define XML_RSC_OP_LAST_RUN "last-run" # define XML_RSC_OP_T_EXEC "exec-time" # define XML_RSC_OP_T_QUEUE "queue-time" # define XML_LRM_ATTR_MIGRATE_SOURCE "migrate_source" # define XML_LRM_ATTR_MIGRATE_TARGET "migrate_target" # define XML_TAG_GRAPH "transition_graph" # define XML_GRAPH_TAG_RSC_OP "rsc_op" # define XML_GRAPH_TAG_PSEUDO_EVENT "pseudo_event" # define XML_GRAPH_TAG_CRM_EVENT "crm_event" # define XML_TAG_RULE "rule" # define XML_RULE_ATTR_SCORE "score" # define XML_RULE_ATTR_SCORE_ATTRIBUTE "score-attribute" # define XML_RULE_ATTR_SCORE_MANGLED "score-attribute-mangled" # define XML_RULE_ATTR_ROLE "role" # define XML_RULE_ATTR_RESULT "result" # define XML_RULE_ATTR_BOOLEAN_OP "boolean-op" # define XML_TAG_EXPRESSION "expression" # define XML_EXPR_ATTR_ATTRIBUTE "attribute" # define XML_EXPR_ATTR_OPERATION "operation" # define XML_EXPR_ATTR_VALUE "value" # define XML_EXPR_ATTR_TYPE "type" # define XML_CONS_TAG_RSC_DEPEND "rsc_colocation" # define XML_CONS_TAG_RSC_ORDER "rsc_order" # define XML_CONS_TAG_RSC_LOCATION "rsc_location" # define XML_CONS_TAG_RSC_TICKET "rsc_ticket" # define XML_CONS_TAG_RSC_SET "resource_set" # define XML_CONS_ATTR_SYMMETRICAL "symmetrical" +# define XML_LOCATION_ATTR_DISCOVERY "resource-discovery" + # define XML_COLOC_ATTR_SOURCE "rsc" # define XML_COLOC_ATTR_SOURCE_ROLE "rsc-role" # define XML_COLOC_ATTR_TARGET "with-rsc" # define XML_COLOC_ATTR_TARGET_ROLE "with-rsc-role" # define XML_COLOC_ATTR_NODE_ATTR "node-attribute" # define XML_COLOC_ATTR_SOURCE_INSTANCE "rsc-instance" # define XML_COLOC_ATTR_TARGET_INSTANCE "with-rsc-instance" # define XML_ORDER_ATTR_FIRST "first" # define XML_ORDER_ATTR_THEN "then" # define XML_ORDER_ATTR_FIRST_ACTION "first-action" # define XML_ORDER_ATTR_THEN_ACTION "then-action" # define XML_ORDER_ATTR_FIRST_INSTANCE "first-instance" # define XML_ORDER_ATTR_THEN_INSTANCE "then-instance" # define XML_ORDER_ATTR_KIND "kind" # define XML_TICKET_ATTR_TICKET "ticket" # define XML_TICKET_ATTR_LOSS_POLICY "loss-policy" # define XML_NVPAIR_ATTR_NAME "name" # define XML_NVPAIR_ATTR_VALUE "value" # define XML_NODE_ATTR_STATE "state" +# define XML_NODE_ATTR_RSC_DISCOVERY "resource-discovery-enabled" # define XML_CONFIG_ATTR_DC_DEADTIME "dc-deadtime" # define XML_CONFIG_ATTR_ELECTION_FAIL "election-timeout" # define XML_CONFIG_ATTR_FORCE_QUIT "shutdown-escalation" # define XML_CONFIG_ATTR_RECHECK "cluster-recheck-interval" # define XML_CIB_TAG_GENERATION_TUPPLE "generation_tuple" # define XML_ATTR_TRANSITION_MAGIC "transition-magic" # define XML_ATTR_TRANSITION_KEY "transition-key" # define XML_ATTR_TE_NOWAIT "op_no_wait" # define XML_ATTR_TE_TARGET_RC "op_target_rc" # define XML_ATTR_TE_ALLOWFAIL "op_allow_fail" # define XML_ATTR_LRM_PROBE "lrm-is-probe" # define XML_TAG_TRANSIENT_NODEATTRS "transient_attributes" # define XML_TAG_DIFF_ADDED "diff-added" # define XML_TAG_DIFF_REMOVED "diff-removed" # define XML_ACL_TAG_USER "acl_target" # define XML_ACL_TAG_USERv1 "acl_user" # define XML_ACL_TAG_GROUP "acl_group" # define XML_ACL_TAG_ROLE "acl_role" # define XML_ACL_TAG_PERMISSION "acl_permission" # define XML_ACL_TAG_ROLE_REF "role" # define XML_ACL_TAG_ROLE_REFv1 "role_ref" # define XML_ACL_ATTR_KIND "kind" # define XML_ACL_TAG_READ "read" # define XML_ACL_TAG_WRITE "write" # define XML_ACL_TAG_DENY "deny" # define XML_ACL_ATTR_REF "reference" # define XML_ACL_ATTR_REFv1 "ref" # define XML_ACL_ATTR_TAG "object-type" # define XML_ACL_ATTR_TAGv1 "tag" # define XML_ACL_ATTR_XPATH "xpath" # define XML_ACL_ATTR_ATTRIBUTE "attribute" # define XML_CIB_TAG_TICKETS "tickets" # define XML_CIB_TAG_TICKET_STATE "ticket_state" # define XML_CIB_TAG_TAGS "tags" # define XML_CIB_TAG_TAG "tag" # define XML_CIB_TAG_OBJ_REF "obj_ref" # define XML_TAG_FENCING_TOPOLOGY "fencing-topology" # define XML_TAG_FENCING_LEVEL "fencing-level" # define XML_ATTR_STONITH_INDEX "index" # define XML_ATTR_STONITH_TARGET "target" # define XML_ATTR_STONITH_DEVICES "devices" # define XML_TAG_DIFF "diff" # define XML_DIFF_VERSION "version" # define XML_DIFF_VSOURCE "source" # define XML_DIFF_VTARGET "target" # define XML_DIFF_CHANGE "change" # define XML_DIFF_LIST "change-list" # define XML_DIFF_ATTR "change-attr" # define XML_DIFF_RESULT "change-result" # define XML_DIFF_OP "operation" # define XML_DIFF_PATH "path" # define XML_DIFF_POSITION "position" # include # define ID(x) crm_element_value(x, XML_ATTR_ID) # define INSTANCE(x) crm_element_value(x, XML_CIB_ATTR_INSTANCE) # define TSTAMP(x) crm_element_value(x, XML_ATTR_TSTAMP) # define TYPE(x) crm_element_name(x) # define NAME(x) crm_element_value(x, XML_NVPAIR_ATTR_NAME) # define VALUE(x) crm_element_value(x, XML_NVPAIR_ATTR_VALUE) #endif diff --git a/include/crm/pengine/status.h b/include/crm/pengine/status.h index d0ba85672f..f08a910635 100644 --- a/include/crm/pengine/status.h +++ b/include/crm/pengine/status.h @@ -1,393 +1,397 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef PENGINE_STATUS__H # define PENGINE_STATUS__H # include # include # include typedef struct node_s pe_node_t; typedef struct node_s node_t; typedef struct pe_action_s action_t; typedef struct pe_action_s pe_action_t; typedef struct resource_s resource_t; typedef struct ticket_s ticket_t; typedef enum no_quorum_policy_e { no_quorum_freeze, no_quorum_stop, no_quorum_ignore, no_quorum_suicide } no_quorum_policy_t; enum node_type { node_ping, node_member, node_remote }; enum pe_restart { pe_restart_restart, pe_restart_ignore }; enum pe_find { pe_find_renamed = 0x001, pe_find_clone = 0x004, pe_find_current = 0x008, pe_find_inactive = 0x010, }; # define pe_flag_have_quorum 0x00000001ULL # define pe_flag_symmetric_cluster 0x00000002ULL # define pe_flag_is_managed_default 0x00000004ULL # define pe_flag_maintenance_mode 0x00000008ULL # define pe_flag_stonith_enabled 0x00000010ULL # define pe_flag_have_stonith_resource 0x00000020ULL # define pe_flag_enable_unfencing 0x00000040ULL # define pe_flag_stop_rsc_orphans 0x00000100ULL # define pe_flag_stop_action_orphans 0x00000200ULL # define pe_flag_stop_everything 0x00000400ULL # define pe_flag_start_failure_fatal 0x00001000ULL # define pe_flag_remove_after_stop 0x00002000ULL # define pe_flag_startup_probes 0x00010000ULL # define pe_flag_have_status 0x00020000ULL # define pe_flag_have_remote_nodes 0x00040000ULL # define pe_flag_quick_location 0x00100000ULL typedef struct pe_working_set_s { xmlNode *input; crm_time_t *now; /* options extracted from the input */ char *dc_uuid; node_t *dc_node; const char *stonith_action; const char *placement_strategy; unsigned long long flags; int stonith_timeout; int default_resource_stickiness; no_quorum_policy_t no_quorum_policy; GHashTable *config_hash; GHashTable *tickets; GHashTable *singletons; /* Actions for which there can be only one - ie. fence nodeX */ GListPtr nodes; GListPtr resources; GListPtr placement_constraints; GListPtr ordering_constraints; GListPtr colocation_constraints; GListPtr ticket_constraints; GListPtr actions; xmlNode *failed; xmlNode *op_defaults; xmlNode *rsc_defaults; /* stats */ int num_synapse; int max_valid_nodes; int order_id; int action_id; /* final output */ xmlNode *graph; GHashTable *template_rsc_sets; const char *localhost; GHashTable *tags; } pe_working_set_t; struct node_shared_s { const char *id; const char *uname; /* Make all these flags into a bitfield one day */ gboolean online; gboolean standby; gboolean standby_onfail; gboolean pending; gboolean unclean; gboolean unseen; gboolean shutdown; gboolean expected_up; gboolean is_dc; + gboolean rsc_discovery_enabled; + int num_resources; GListPtr running_rsc; /* resource_t* */ GListPtr allocated_rsc; /* resource_t* */ resource_t *remote_rsc; GHashTable *attrs; /* char* => char* */ enum node_type type; GHashTable *utilization; /*! cache of calculated rsc digests for this node. */ GHashTable *digest_cache; gboolean maintenance; }; struct node_s { int weight; gboolean fixed; + int rsc_discover_mode; int count; struct node_shared_s *details; }; # include # define pe_rsc_orphan 0x00000001ULL # define pe_rsc_managed 0x00000002ULL # define pe_rsc_block 0x00000004ULL /* Further operations are prohibited due to failure policy */ # define pe_rsc_orphan_container_filler 0x00000008ULL # define pe_rsc_notify 0x00000010ULL # define pe_rsc_unique 0x00000020ULL # define pe_rsc_fence_device 0x00000040ULL # define pe_rsc_provisional 0x00000100ULL # define pe_rsc_allocating 0x00000200ULL # define pe_rsc_merging 0x00000400ULL # define pe_rsc_munging 0x00000800ULL # define pe_rsc_try_reload 0x00001000ULL # define pe_rsc_reload 0x00002000ULL # define pe_rsc_failed 0x00010000ULL # define pe_rsc_shutdown 0x00020000ULL # define pe_rsc_runnable 0x00040000ULL # define pe_rsc_start_pending 0x00080000ULL # define pe_rsc_starting 0x00100000ULL # define pe_rsc_stopping 0x00200000ULL # define pe_rsc_migrating 0x00400000ULL # define pe_rsc_allow_migrate 0x00800000ULL # define pe_rsc_failure_ignored 0x01000000ULL # define pe_rsc_unexpectedly_running 0x02000000ULL # define pe_rsc_maintenance 0x04000000ULL # define pe_rsc_needs_quorum 0x10000000ULL # define pe_rsc_needs_fencing 0x20000000ULL # define pe_rsc_needs_unfencing 0x40000000ULL # define pe_rsc_have_unfencing 0x80000000ULL enum pe_graph_flags { pe_graph_none = 0x00000, pe_graph_updated_first = 0x00001, pe_graph_updated_then = 0x00002, pe_graph_disable = 0x00004, }; /* *INDENT-OFF* */ enum pe_action_flags { pe_action_pseudo = 0x00001, pe_action_runnable = 0x00002, pe_action_optional = 0x00004, pe_action_print_always = 0x00008, pe_action_have_node_attrs = 0x00010, pe_action_failure_is_fatal = 0x00020, pe_action_implied_by_stonith = 0x00040, pe_action_migrate_runnable = 0x00080, pe_action_dumped = 0x00100, pe_action_processed = 0x00200, pe_action_clear = 0x00400, pe_action_dangle = 0x00800, pe_action_requires_any = 0x01000, /* This action requires one or mre of its dependancies to be runnable * We use this to clear the runnable flag before checking dependancies */ }; /* *INDENT-ON* */ struct resource_s { char *id; char *clone_name; xmlNode *xml; xmlNode *orig_xml; xmlNode *ops_xml; resource_t *parent; void *variant_opaque; enum pe_obj_types variant; resource_object_functions_t *fns; resource_alloc_functions_t *cmds; enum rsc_recovery_type recovery_type; enum pe_restart restart_type; int priority; int stickiness; int sort_index; int failure_timeout; int effective_priority; int migration_threshold; gboolean is_remote_node; + gboolean exclusive_discover; unsigned long long flags; GListPtr rsc_cons_lhs; /* rsc_colocation_t* */ GListPtr rsc_cons; /* rsc_colocation_t* */ GListPtr rsc_location; /* rsc_to_node_t* */ GListPtr actions; /* action_t* */ GListPtr rsc_tickets; /* rsc_ticket* */ node_t *allocated_to; GListPtr running_on; /* node_t* */ GHashTable *known_on; /* node_t* */ GHashTable *allowed_nodes; /* node_t* */ enum rsc_role_e role; enum rsc_role_e next_role; GHashTable *meta; GHashTable *parameters; GHashTable *utilization; GListPtr children; /* resource_t* */ GListPtr dangling_migrations; /* node_t* */ node_t *partial_migration_target; node_t *partial_migration_source; resource_t *container; GListPtr fillers; char *pending_task; }; struct pe_action_s { int id; int priority; resource_t *rsc; node_t *node; xmlNode *op_entry; char *task; char *uuid; char *cancel_task; enum pe_action_flags flags; enum rsc_start_requirement needs; enum action_fail_response on_fail; enum rsc_role_e fail_role; action_t *pre_notify; action_t *pre_notified; action_t *post_notify; action_t *post_notified; int seen_count; GHashTable *meta; GHashTable *extra; GListPtr actions_before; /* action_warpper_t* */ GListPtr actions_after; /* action_warpper_t* */ }; struct ticket_s { char *id; gboolean granted; time_t last_granted; gboolean standby; GHashTable *state; }; typedef struct tag_s { char *id; GListPtr refs; } tag_t; enum pe_link_state { pe_link_not_dumped, pe_link_dumped, pe_link_dup, }; /* *INDENT-OFF* */ enum pe_ordering { pe_order_none = 0x0, /* deleted */ pe_order_optional = 0x1, /* pure ordering, nothing implied */ pe_order_apply_first_non_migratable = 0x2, /* Only apply this constraint's ordering if first is not migratable. */ pe_order_implies_first = 0x10, /* If 'then' is required, ensure 'first' is too */ pe_order_implies_then = 0x20, /* If 'first' is required, ensure 'then' is too */ pe_order_implies_first_master = 0x40, /* Imply 'first' is required when 'then' is required and then's rsc holds Master role. */ /* first requires then to be both runnable and migrate runnable. */ pe_order_implies_first_migratable = 0x80, pe_order_runnable_left = 0x100, /* 'then' requires 'first' to be runnable */ pe_order_pseudo_left = 0x200, /* 'then' can only be pseudo if 'first' is runnable */ pe_order_implies_then_on_node = 0x400, /* If 'first' is required on 'nodeX', * ensure instances of 'then' on 'nodeX' are too. * Only really useful if 'then' is a clone and 'first' is not */ pe_order_restart = 0x1000, /* 'then' is runnable if 'first' is optional or runnable */ pe_order_stonith_stop = 0x2000, /* only applies if the action is non-pseudo */ pe_order_serialize_only = 0x4000, /* serialize */ pe_order_implies_first_printed = 0x10000, /* Like ..implies_first but only ensures 'first' is printed, not manditory */ pe_order_implies_then_printed = 0x20000, /* Like ..implies_then but only ensures 'then' is printed, not manditory */ pe_order_asymmetrical = 0x100000, /* Indicates asymmetrical one way ordering constraint. */ pe_order_load = 0x200000, /* Only relevant if... */ pe_order_one_or_more = 0x400000, /* 'then' is only runnable if one or more of it's dependancies are too */ pe_order_anti_colocation = 0x800000, pe_order_preserve = 0x1000000, /* Hack for breaking user ordering constraints with container resources */ pe_order_trace = 0x4000000, /* test marker */ }; /* *INDENT-ON* */ typedef struct action_wrapper_s action_wrapper_t; struct action_wrapper_s { enum pe_ordering type; enum pe_link_state state; action_t *action; }; const char *rsc_printable_id(resource_t *rsc); gboolean cluster_status(pe_working_set_t * data_set); void set_working_set_defaults(pe_working_set_t * data_set); void cleanup_calculations(pe_working_set_t * data_set); resource_t *pe_find_resource(GListPtr rsc_list, const char *id_rh); node_t *pe_find_node(GListPtr node_list, const char *uname); node_t *pe_find_node_id(GListPtr node_list, const char *id); node_t *pe_find_node_any(GListPtr node_list, const char *id, const char *uname); GListPtr find_operations(const char *rsc, const char *node, gboolean active_filter, pe_working_set_t * data_set); #endif diff --git a/lib/common/mainloop.c b/lib/common/mainloop.c index ac395ecf3a..f2295ff439 100644 --- a/lib/common/mainloop.c +++ b/lib/common/mainloop.c @@ -1,1217 +1,1217 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include struct mainloop_child_s { pid_t pid; char *desc; unsigned timerid; unsigned watchid; gboolean timeout; void *privatedata; /* Called when a process dies */ void (*callback) (mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode); }; struct trigger_s { GSource source; gboolean running; gboolean trigger; void *user_data; guint id; }; static gboolean crm_trigger_prepare(GSource * source, gint * timeout) { crm_trigger_t *trig = (crm_trigger_t *) source; /* cluster-glue's FD and IPC related sources make use of * g_source_add_poll() but do not set a timeout in their prepare * functions * * This means mainloop's poll() will block until an event for one * of these sources occurs - any /other/ type of source, such as * this one or g_idle_*, that doesn't use g_source_add_poll() is * S-O-L and wont be processed until there is something fd-based * happens. * * Luckily the timeout we can set here affects all sources and * puts an upper limit on how long poll() can take. * * So unconditionally set a small-ish timeout, not too small that * we're in constant motion, which will act as an upper bound on * how long the signal handling might be delayed for. */ *timeout = 500; /* Timeout in ms */ return trig->trigger; } static gboolean crm_trigger_check(GSource * source) { crm_trigger_t *trig = (crm_trigger_t *) source; return trig->trigger; } static gboolean crm_trigger_dispatch(GSource * source, GSourceFunc callback, gpointer userdata) { int rc = TRUE; crm_trigger_t *trig = (crm_trigger_t *) source; if (trig->running) { /* Wait until the existing job is complete before starting the next one */ return TRUE; } trig->trigger = FALSE; if (callback) { rc = callback(trig->user_data); if (rc < 0) { crm_trace("Trigger handler %p not yet complete", trig); trig->running = TRUE; rc = TRUE; } } return rc; } static void crm_trigger_finalize(GSource * source) { crm_trace("Trigger %p destroyed", source); } #if 0 struct _GSourceCopy { gpointer callback_data; GSourceCallbackFuncs *callback_funcs; const GSourceFuncs *source_funcs; guint ref_count; GMainContext *context; gint priority; guint flags; guint source_id; GSList *poll_fds; GSource *prev; GSource *next; char *name; void *priv; }; static int g_source_refcount(GSource * source) { /* Duplicating the contents of private header files is a necessary evil */ if (source) { struct _GSourceCopy *evil = (struct _GSourceCopy*)source; return evil->ref_count; } return 0; } #else static int g_source_refcount(GSource * source) { return 0; } #endif static GSourceFuncs crm_trigger_funcs = { crm_trigger_prepare, crm_trigger_check, crm_trigger_dispatch, crm_trigger_finalize, }; static crm_trigger_t * mainloop_setup_trigger(GSource * source, int priority, int (*dispatch) (gpointer user_data), gpointer userdata) { crm_trigger_t *trigger = NULL; trigger = (crm_trigger_t *) source; trigger->id = 0; trigger->trigger = FALSE; trigger->user_data = userdata; if (dispatch) { g_source_set_callback(source, dispatch, trigger, NULL); } g_source_set_priority(source, priority); g_source_set_can_recurse(source, FALSE); crm_trace("Setup %p with ref-count=%u", source, g_source_refcount(source)); trigger->id = g_source_attach(source, NULL); crm_trace("Attached %p with ref-count=%u", source, g_source_refcount(source)); return trigger; } void mainloop_trigger_complete(crm_trigger_t * trig) { crm_trace("Trigger handler %p complete", trig); trig->running = FALSE; } /* If dispatch returns: * -1: Job running but not complete * 0: Remove the trigger from mainloop * 1: Leave the trigger in mainloop */ crm_trigger_t * mainloop_add_trigger(int priority, int (*dispatch) (gpointer user_data), gpointer userdata) { GSource *source = NULL; CRM_ASSERT(sizeof(crm_trigger_t) > sizeof(GSource)); source = g_source_new(&crm_trigger_funcs, sizeof(crm_trigger_t)); CRM_ASSERT(source != NULL); return mainloop_setup_trigger(source, priority, dispatch, userdata); } void mainloop_set_trigger(crm_trigger_t * source) { if(source) { source->trigger = TRUE; } } gboolean mainloop_destroy_trigger(crm_trigger_t * source) { GSource *gs = NULL; if(source == NULL) { return TRUE; } gs = (GSource *)source; if(g_source_refcount(gs) > 2) { crm_info("Trigger %p is still referenced %u times", gs, g_source_refcount(gs)); } g_source_destroy(gs); /* Remove from mainloop, ref_count-- */ g_source_unref(gs); /* The caller no longer carries a reference to source * * At this point the source should be free'd, * unless we're currently processing said * source, in which case mainloop holds an * additional reference and it will be free'd * once our processing completes */ return TRUE; } typedef struct signal_s { crm_trigger_t trigger; /* must be first */ void (*handler) (int sig); int signal; } crm_signal_t; static crm_signal_t *crm_signals[NSIG]; static gboolean crm_signal_dispatch(GSource * source, GSourceFunc callback, gpointer userdata) { crm_signal_t *sig = (crm_signal_t *) source; if(sig->signal != SIGCHLD) { crm_info("Invoking handler for signal %d: %s", sig->signal, strsignal(sig->signal)); } sig->trigger.trigger = FALSE; if (sig->handler) { sig->handler(sig->signal); } return TRUE; } static void mainloop_signal_handler(int sig) { if (sig > 0 && sig < NSIG && crm_signals[sig] != NULL) { mainloop_set_trigger((crm_trigger_t *) crm_signals[sig]); } } static GSourceFuncs crm_signal_funcs = { crm_trigger_prepare, crm_trigger_check, crm_signal_dispatch, crm_trigger_finalize, }; gboolean crm_signal(int sig, void (*dispatch) (int sig)) { sigset_t mask; struct sigaction sa; struct sigaction old; if (sigemptyset(&mask) < 0) { crm_perror(LOG_ERR, "Call to sigemptyset failed"); return FALSE; } memset(&sa, 0, sizeof(struct sigaction)); sa.sa_handler = dispatch; sa.sa_flags = SA_RESTART; sa.sa_mask = mask; if (sigaction(sig, &sa, &old) < 0) { crm_perror(LOG_ERR, "Could not install signal handler for signal %d", sig); return FALSE; } return TRUE; } gboolean mainloop_add_signal(int sig, void (*dispatch) (int sig)) { GSource *source = NULL; int priority = G_PRIORITY_HIGH - 1; if (sig == SIGTERM) { /* TERM is higher priority than other signals, * signals are higher priority than other ipc. * Yes, minus: smaller is "higher" */ priority--; } if (sig >= NSIG || sig < 0) { crm_err("Signal %d is out of range", sig); return FALSE; } else if (crm_signals[sig] != NULL && crm_signals[sig]->handler == dispatch) { crm_trace("Signal handler for %d is already installed", sig); return TRUE; } else if (crm_signals[sig] != NULL) { crm_err("Different signal handler for %d is already installed", sig); return FALSE; } CRM_ASSERT(sizeof(crm_signal_t) > sizeof(GSource)); source = g_source_new(&crm_signal_funcs, sizeof(crm_signal_t)); crm_signals[sig] = (crm_signal_t *) mainloop_setup_trigger(source, priority, NULL, NULL); CRM_ASSERT(crm_signals[sig] != NULL); crm_signals[sig]->handler = dispatch; crm_signals[sig]->signal = sig; if (crm_signal(sig, mainloop_signal_handler) == FALSE) { crm_signal_t *tmp = crm_signals[sig]; crm_signals[sig] = NULL; mainloop_destroy_trigger((crm_trigger_t *) tmp); return FALSE; } #if 0 /* If we want signals to interrupt mainloop's poll(), instead of waiting for * the timeout, then we should call siginterrupt() below * * For now, just enforce a low timeout */ if (siginterrupt(sig, 1) < 0) { crm_perror(LOG_INFO, "Could not enable system call interruptions for signal %d", sig); } #endif return TRUE; } gboolean mainloop_destroy_signal(int sig) { crm_signal_t *tmp = NULL; if (sig >= NSIG || sig < 0) { crm_err("Signal %d is out of range", sig); return FALSE; } else if (crm_signal(sig, NULL) == FALSE) { crm_perror(LOG_ERR, "Could not uninstall signal handler for signal %d", sig); return FALSE; } else if (crm_signals[sig] == NULL) { return TRUE; } crm_trace("Destroying signal %d", sig); tmp = crm_signals[sig]; crm_signals[sig] = NULL; mainloop_destroy_trigger((crm_trigger_t *) tmp); return TRUE; } static qb_array_t *gio_map = NULL; void mainloop_cleanup(void) { if(gio_map) { qb_array_free(gio_map); } } /* * libqb... */ struct gio_to_qb_poll { int32_t is_used; guint source; int32_t events; void *data; qb_ipcs_dispatch_fn_t fn; enum qb_loop_priority p; }; static gboolean gio_read_socket(GIOChannel * gio, GIOCondition condition, gpointer data) { struct gio_to_qb_poll *adaptor = (struct gio_to_qb_poll *)data; gint fd = g_io_channel_unix_get_fd(gio); crm_trace("%p.%d %d", data, fd, condition); if (condition & G_IO_NVAL) { crm_trace("Marking failed adaptor %p unused", adaptor); adaptor->is_used = QB_FALSE; } return (adaptor->fn(fd, condition, adaptor->data) == 0); } static void gio_poll_destroy(gpointer data) { struct gio_to_qb_poll *adaptor = (struct gio_to_qb_poll *)data; adaptor->is_used = QB_FALSE; adaptor->source = 0; } static int32_t gio_poll_dispatch_update(enum qb_loop_priority p, int32_t fd, int32_t evts, void *data, qb_ipcs_dispatch_fn_t fn, int32_t add) { struct gio_to_qb_poll *adaptor; GIOChannel *channel; int32_t res = 0; res = qb_array_index(gio_map, fd, (void **)&adaptor); if (res < 0) { crm_err("Array lookup failed for fd=%d: %d", fd, res); return res; } crm_trace("Adding fd=%d to mainloop as adaptor %p", fd, adaptor); if (add && adaptor->is_used) { crm_err("Adaptor for descriptor %d is still in-use", fd); return -EEXIST; } if (!add && !adaptor->is_used) { crm_err("Adaptor for descriptor %d is not in-use", fd); return -ENOENT; } /* channel is created with ref_count = 1 */ channel = g_io_channel_unix_new(fd); if (!channel) { crm_err("No memory left to add fd=%d", fd); return -ENOMEM; } /* Because unlike the poll() API, glib doesn't tell us about HUPs by default */ evts |= (G_IO_HUP | G_IO_NVAL | G_IO_ERR); if (adaptor->is_used) { g_source_remove(adaptor->source); } adaptor->fn = fn; adaptor->events = evts; adaptor->data = data; adaptor->p = p; adaptor->is_used = QB_TRUE; adaptor->source = g_io_add_watch_full(channel, G_PRIORITY_DEFAULT, evts, gio_read_socket, adaptor, gio_poll_destroy); /* Now that mainloop now holds a reference to channel, * thanks to g_io_add_watch_full(), drop ours from g_io_channel_unix_new(). * * This means that channel will be free'd by: * g_main_context_dispatch() * -> g_source_destroy_internal() * -> g_source_callback_unref() * shortly after gio_poll_destroy() completes */ g_io_channel_unref(channel); crm_trace("Added to mainloop with gsource id=%d", adaptor->source); if (adaptor->source > 0) { return 0; } return -EINVAL; } static int32_t gio_poll_dispatch_add(enum qb_loop_priority p, int32_t fd, int32_t evts, void *data, qb_ipcs_dispatch_fn_t fn) { return gio_poll_dispatch_update(p, fd, evts, data, fn, QB_TRUE); } static int32_t gio_poll_dispatch_mod(enum qb_loop_priority p, int32_t fd, int32_t evts, void *data, qb_ipcs_dispatch_fn_t fn) { return gio_poll_dispatch_update(p, fd, evts, data, fn, QB_FALSE); } static int32_t gio_poll_dispatch_del(int32_t fd) { struct gio_to_qb_poll *adaptor; crm_trace("Looking for fd=%d", fd); if (qb_array_index(gio_map, fd, (void **)&adaptor) == 0) { crm_trace("Marking adaptor %p unused", adaptor); if (adaptor->source) { g_source_remove(adaptor->source); adaptor->source = 0; } adaptor->is_used = QB_FALSE; } return 0; } struct qb_ipcs_poll_handlers gio_poll_funcs = { .job_add = NULL, .dispatch_add = gio_poll_dispatch_add, .dispatch_mod = gio_poll_dispatch_mod, .dispatch_del = gio_poll_dispatch_del, }; static enum qb_ipc_type pick_ipc_type(enum qb_ipc_type requested) { const char *env = getenv("PCMK_ipc_type"); if (env && strcmp("shared-mem", env) == 0) { return QB_IPC_SHM; } else if (env && strcmp("socket", env) == 0) { return QB_IPC_SOCKET; } else if (env && strcmp("posix", env) == 0) { return QB_IPC_POSIX_MQ; } else if (env && strcmp("sysv", env) == 0) { return QB_IPC_SYSV_MQ; } else if (requested == QB_IPC_NATIVE) { /* We prefer shared memory because the server never blocks on * send. If part of a message fits into the socket, libqb * needs to block until the remainder can be sent also. * Otherwise the client will wait forever for the remaining * bytes. */ return QB_IPC_SHM; } return requested; } qb_ipcs_service_t * mainloop_add_ipc_server(const char *name, enum qb_ipc_type type, struct qb_ipcs_service_handlers * callbacks) { int rc = 0; qb_ipcs_service_t *server = NULL; if (gio_map == NULL) { gio_map = qb_array_create_2(64, sizeof(struct gio_to_qb_poll), 1); } crm_client_init(); server = qb_ipcs_create(name, 0, pick_ipc_type(type), callbacks); #ifdef HAVE_IPCS_GET_BUFFER_SIZE /* All clients should use at least ipc_buffer_max as their buffer size */ qb_ipcs_enforce_buffer_size(server, crm_ipc_default_buffer_size()); #endif qb_ipcs_poll_handlers_set(server, &gio_poll_funcs); rc = qb_ipcs_run(server); if (rc < 0) { crm_err("Could not start %s IPC server: %s (%d)", name, pcmk_strerror(rc), rc); return NULL; } return server; } void mainloop_del_ipc_server(qb_ipcs_service_t * server) { if (server) { qb_ipcs_destroy(server); } } struct mainloop_io_s { char *name; void *userdata; int fd; guint source; crm_ipc_t *ipc; GIOChannel *channel; int (*dispatch_fn_ipc) (const char *buffer, ssize_t length, gpointer userdata); int (*dispatch_fn_io) (gpointer userdata); void (*destroy_fn) (gpointer userdata); }; static gboolean mainloop_gio_callback(GIOChannel * gio, GIOCondition condition, gpointer data) { gboolean keep = TRUE; mainloop_io_t *client = data; CRM_ASSERT(client->fd == g_io_channel_unix_get_fd(gio)); if (condition & G_IO_IN) { if (client->ipc) { long rc = 0; int max = 10; do { rc = crm_ipc_read(client->ipc); if (rc <= 0) { crm_trace("Message acquisition from %s[%p] failed: %s (%ld)", client->name, client, pcmk_strerror(rc), rc); } else if (client->dispatch_fn_ipc) { const char *buffer = crm_ipc_buffer(client->ipc); crm_trace("New message from %s[%p] = %d", client->name, client, rc, condition); if (client->dispatch_fn_ipc(buffer, rc, client->userdata) < 0) { crm_trace("Connection to %s no longer required", client->name); keep = FALSE; } } } while (keep && rc > 0 && --max > 0); } else { crm_trace("New message from %s[%p] %u", client->name, client, condition); if (client->dispatch_fn_io) { if (client->dispatch_fn_io(client->userdata) < 0) { crm_trace("Connection to %s no longer required", client->name); keep = FALSE; } } } } if (client->ipc && crm_ipc_connected(client->ipc) == FALSE) { crm_err("Connection to %s[%p] closed (I/O condition=%d)", client->name, client, condition); keep = FALSE; } else if (condition & (G_IO_HUP | G_IO_NVAL | G_IO_ERR)) { crm_trace("The connection %s[%p] has been closed (I/O condition=%d)", client->name, client, condition); keep = FALSE; } else if ((condition & G_IO_IN) == 0) { /* #define GLIB_SYSDEF_POLLIN =1 #define GLIB_SYSDEF_POLLPRI =2 #define GLIB_SYSDEF_POLLOUT =4 #define GLIB_SYSDEF_POLLERR =8 #define GLIB_SYSDEF_POLLHUP =16 #define GLIB_SYSDEF_POLLNVAL =32 typedef enum { G_IO_IN GLIB_SYSDEF_POLLIN, G_IO_OUT GLIB_SYSDEF_POLLOUT, G_IO_PRI GLIB_SYSDEF_POLLPRI, G_IO_ERR GLIB_SYSDEF_POLLERR, G_IO_HUP GLIB_SYSDEF_POLLHUP, G_IO_NVAL GLIB_SYSDEF_POLLNVAL } GIOCondition; A bitwise combination representing a condition to watch for on an event source. G_IO_IN There is data to read. G_IO_OUT Data can be written (without blocking). G_IO_PRI There is urgent data to read. G_IO_ERR Error condition. G_IO_HUP Hung up (the connection has been broken, usually for pipes and sockets). G_IO_NVAL Invalid request. The file descriptor is not open. */ crm_err("Strange condition: %d", condition); } /* keep == FALSE results in mainloop_gio_destroy() being called * just before the source is removed from mainloop */ return keep; } static void mainloop_gio_destroy(gpointer c) { mainloop_io_t *client = c; char *c_name = strdup(client->name); /* client->source is valid but about to be destroyed (ref_count == 0) in gmain.c * client->channel will still have ref_count > 0... should be == 1 */ crm_trace("Destroying client %s[%p]", c_name, c); if (client->ipc) { crm_ipc_close(client->ipc); } if (client->destroy_fn) { void (*destroy_fn) (gpointer userdata) = client->destroy_fn; client->destroy_fn = NULL; destroy_fn(client->userdata); } if (client->ipc) { crm_ipc_t *ipc = client->ipc; client->ipc = NULL; crm_ipc_destroy(ipc); } crm_trace("Destroyed client %s[%p]", c_name, c); free(client->name); client->name = NULL; free(client); free(c_name); } mainloop_io_t * mainloop_add_ipc_client(const char *name, int priority, size_t max_size, void *userdata, struct ipc_client_callbacks *callbacks) { mainloop_io_t *client = NULL; crm_ipc_t *conn = crm_ipc_new(name, max_size); if (conn && crm_ipc_connect(conn)) { int32_t fd = crm_ipc_get_fd(conn); client = mainloop_add_fd(name, priority, fd, userdata, NULL); client->ipc = conn; client->destroy_fn = callbacks->destroy; client->dispatch_fn_ipc = callbacks->dispatch; } if (conn && client == NULL) { crm_trace("Connection to %s failed", name); crm_ipc_close(conn); crm_ipc_destroy(conn); } return client; } void mainloop_del_ipc_client(mainloop_io_t * client) { mainloop_del_fd(client); } crm_ipc_t * mainloop_get_ipc_client(mainloop_io_t * client) { if (client) { return client->ipc; } return NULL; } mainloop_io_t * mainloop_add_fd(const char *name, int priority, int fd, void *userdata, struct mainloop_fd_callbacks * callbacks) { mainloop_io_t *client = NULL; - if (fd > 0) { + if (fd >= 0) { client = calloc(1, sizeof(mainloop_io_t)); client->name = strdup(name); client->userdata = userdata; if (callbacks) { client->destroy_fn = callbacks->destroy; client->dispatch_fn_io = callbacks->dispatch; } client->fd = fd; client->channel = g_io_channel_unix_new(fd); client->source = g_io_add_watch_full(client->channel, priority, (G_IO_IN | G_IO_HUP | G_IO_NVAL | G_IO_ERR), mainloop_gio_callback, client, mainloop_gio_destroy); /* Now that mainloop now holds a reference to channel, * thanks to g_io_add_watch_full(), drop ours from g_io_channel_unix_new(). * * This means that channel will be free'd by: * g_main_context_dispatch() or g_source_remove() * -> g_source_destroy_internal() * -> g_source_callback_unref() * shortly after mainloop_gio_destroy() completes */ g_io_channel_unref(client->channel); crm_trace("Added connection %d for %s[%p].%d", client->source, client->name, client, fd); } return client; } void mainloop_del_fd(mainloop_io_t * client) { if (client != NULL) { crm_trace("Removing client %s[%p]", client->name, client); if (client->source) { /* Results in mainloop_gio_destroy() being called just * before the source is removed from mainloop */ g_source_remove(client->source); } } } pid_t mainloop_child_pid(mainloop_child_t * child) { return child->pid; } const char * mainloop_child_name(mainloop_child_t * child) { return child->desc; } int mainloop_child_timeout(mainloop_child_t * child) { return child->timeout; } void * mainloop_child_userdata(mainloop_child_t * child) { return child->privatedata; } void mainloop_clear_child_userdata(mainloop_child_t * child) { child->privatedata = NULL; } /* good function name */ static void child_free(mainloop_child_t *child) { if (child->timerid != 0) { crm_trace("Removing timer %d", child->timerid); g_source_remove(child->timerid); child->timerid = 0; } free(child->desc); free(child); } /* terrible function name */ static int child_kill_helper(mainloop_child_t *child) { if (kill(-child->pid, SIGKILL) < 0) { if (errno != ESRCH) { crm_perror(LOG_ERR, "kill(%d, KILL) failed", child->pid); } return -errno; } return 0; } static gboolean child_timeout_callback(gpointer p) { mainloop_child_t *child = p; int rc = 0; child->timerid = 0; if (child->timeout) { crm_crit("%s process (PID %d) will not die!", child->desc, (int)child->pid); return FALSE; } rc = child_kill_helper(child); if (rc == ESRCH) { /* Nothing left to do. pid doesn't exist */ return FALSE; } child->timeout = TRUE; crm_warn("%s process (PID %d) timed out", child->desc, (int)child->pid); child->timerid = g_timeout_add(5000, child_timeout_callback, child); return FALSE; } static GListPtr child_list = NULL; static gboolean child_waitpid(mainloop_child_t *child, int flags) { int rc = 0; int core = 0; int signo = 0; int status = 0; int exitcode = 0; rc = waitpid(child->pid, &status, flags); if(rc == 0) { crm_perror(LOG_DEBUG, "wait(%d) = %d", child->pid, rc); return FALSE; } else if(rc != child->pid) { signo = SIGCHLD; exitcode = 1; status = 1; crm_perror(LOG_ERR, "Call to waitpid(%d) failed", child->pid); } else { crm_trace("Managed process %d exited: %p", child->pid, child); if (WIFEXITED(status)) { exitcode = WEXITSTATUS(status); crm_trace("Managed process %d (%s) exited with rc=%d", child->pid, child->desc, exitcode); } else if (WIFSIGNALED(status)) { signo = WTERMSIG(status); crm_trace("Managed process %d (%s) exited with signal=%d", child->pid, child->desc, signo); } #ifdef WCOREDUMP if (WCOREDUMP(status)) { core = 1; crm_err("Managed process %d (%s) dumped core", child->pid, child->desc); } #endif } if (child->callback) { child->callback(child, child->pid, core, signo, exitcode); } return TRUE; } static void child_death_dispatch(int signal) { GListPtr iter = child_list; gboolean exited; while(iter) { GListPtr saved = NULL; mainloop_child_t *child = iter->data; exited = child_waitpid(child, WNOHANG); saved = iter; iter = iter->next; if (exited == FALSE) { continue; } crm_trace("Removing process entry %p for %d", child, child->pid); child_list = g_list_remove_link(child_list, saved); g_list_free(saved); child_free(child); } } static gboolean child_signal_init(gpointer p) { crm_trace("Installed SIGCHLD handler"); /* Do NOT use g_child_watch_add() and friends, they rely on pthreads */ mainloop_add_signal(SIGCHLD, child_death_dispatch); /* In case they terminated before the signal handler was installed */ child_death_dispatch(SIGCHLD); return FALSE; } int mainloop_child_kill(pid_t pid) { GListPtr iter; mainloop_child_t *child = NULL; mainloop_child_t *match = NULL; /* It is impossible to block SIGKILL, this allows us to * call waitpid without WNOHANG flag.*/ int waitflags = 0, rc = 0; for (iter = child_list; iter != NULL && match == NULL; iter = iter->next) { child = iter->data; if (pid == child->pid) { match = child; } } if (match == NULL) { return FALSE; } rc = child_kill_helper(match); if(rc == -ESRCH) { /* Its gone, but hasn't shown up in waitpid() yet * * Wait until we get SIGCHLD and let child_death_dispatch() * clean it up as normal (so we get the correct return * code/status) * * The blocking alternative would be to call: * child_waitpid(match, 0); */ crm_trace("Waiting for child %d to be reaped by child_death_dispatch()", match->pid); return TRUE; } else if(rc != 0) { /* If KILL for some other reason set the WNOHANG flag since we * can't be certain what happened. */ waitflags = WNOHANG; } if (child_waitpid(match, waitflags) == FALSE) { /* not much we can do if this occurs */ return FALSE; } child_list = g_list_remove(child_list, match); child_free(match); return TRUE; } /* Create/Log a new tracked process * To track a process group, use -pid */ void mainloop_child_add(pid_t pid, int timeout, const char *desc, void *privatedata, void (*callback) (mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)) { static bool need_init = TRUE; mainloop_child_t *child = g_new(mainloop_child_t, 1); child->pid = pid; child->timerid = 0; child->timeout = FALSE; child->privatedata = privatedata; child->callback = callback; if(desc) { child->desc = strdup(desc); } if (timeout) { child->timerid = g_timeout_add(timeout, child_timeout_callback, child); } child_list = g_list_append(child_list, child); if(need_init) { need_init = FALSE; /* SIGCHLD processing has to be invoked from mainloop. * We do not want it to be possible to both add a child pid * to mainloop, and have the pid's exit callback invoked within * the same callstack. */ g_timeout_add(1, child_signal_init, NULL); } } struct mainloop_timer_s { guint id; guint period_ms; bool repeat; char *name; GSourceFunc cb; void *userdata; }; struct mainloop_timer_s mainloop; static gboolean mainloop_timer_cb(gpointer user_data) { int id = 0; bool repeat = FALSE; struct mainloop_timer_s *t = user_data; CRM_ASSERT(t != NULL); id = t->id; t->id = 0; /* Ensure its unset during callbacks so that * mainloop_timer_running() works as expected */ if(t->cb) { crm_trace("Invoking callbacks for timer %s", t->name); repeat = t->repeat; if(t->cb(t->userdata) == FALSE) { crm_trace("Timer %s complete", t->name); repeat = FALSE; } } if(repeat) { /* Restore if repeating */ t->id = id; } return repeat; } bool mainloop_timer_running(mainloop_timer_t *t) { if(t && t->id != 0) { return TRUE; } return FALSE; } void mainloop_timer_start(mainloop_timer_t *t) { mainloop_timer_stop(t); if(t && t->period_ms > 0) { crm_trace("Starting timer %s", t->name); t->id = g_timeout_add(t->period_ms, mainloop_timer_cb, t); } } void mainloop_timer_stop(mainloop_timer_t *t) { if(t && t->id != 0) { crm_trace("Stopping timer %s", t->name); g_source_remove(t->id); t->id = 0; } } guint mainloop_timer_set_period(mainloop_timer_t *t, guint period_ms) { guint last = 0; if(t) { last = t->period_ms; t->period_ms = period_ms; } if(t && t->id != 0 && last != t->period_ms) { mainloop_timer_start(t); } return last; } mainloop_timer_t * mainloop_timer_add(const char *name, guint period_ms, bool repeat, GSourceFunc cb, void *userdata) { mainloop_timer_t *t = calloc(1, sizeof(mainloop_timer_t)); if(t) { if(name) { t->name = g_strdup_printf("%s-%u-%d", name, period_ms, repeat); } else { t->name = g_strdup_printf("%p-%u-%d", t, period_ms, repeat); } t->id = 0; t->period_ms = period_ms; t->repeat = repeat; t->cb = cb; t->userdata = userdata; crm_trace("Created timer %s with %p %p", t->name, userdata, t->userdata); } return t; } void mainloop_timer_del(mainloop_timer_t *t) { if(t) { crm_trace("Destroying timer %s", t->name); mainloop_timer_stop(t); free(t->name); free(t); } } diff --git a/lib/common/xml.c b/lib/common/xml.c index 06de44c840..51d6ada845 100644 --- a/lib/common/xml.c +++ b/lib/common/xml.c @@ -1,6061 +1,6065 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if HAVE_BZLIB_H # include #endif #if HAVE_LIBXML2 # include # include # include #endif #if HAVE_LIBXSLT # include # include #endif #define XML_BUFFER_SIZE 4096 #define XML_PARSER_DEBUG 0 #define BEST_EFFORT_STATUS 0 void xml_log(int priority, const char *fmt, ...) G_GNUC_PRINTF(2, 3); static inline int __get_prefix(const char *prefix, xmlNode *xml, char *buffer, int offset); void xml_log(int priority, const char *fmt, ...) { va_list ap; va_start(ap, fmt); qb_log_from_external_source_va(__FUNCTION__, __FILE__, fmt, priority, __LINE__, 0, ap); va_end(ap); } typedef struct { xmlRelaxNGPtr rng; xmlRelaxNGValidCtxtPtr valid; xmlRelaxNGParserCtxtPtr parser; } relaxng_ctx_cache_t; struct schema_s { int type; float version; char *name; char *location; char *transform; int after_transform; void *cache; }; typedef struct { int found; const char *string; } filter_t; enum xml_private_flags { xpf_none = 0x0000, xpf_dirty = 0x0001, xpf_deleted = 0x0002, xpf_created = 0x0004, xpf_modified = 0x0008, xpf_tracking = 0x0010, xpf_processed = 0x0020, xpf_skip = 0x0040, xpf_moved = 0x0080, xpf_acl_enabled = 0x0100, xpf_acl_read = 0x0200, xpf_acl_write = 0x0400, xpf_acl_deny = 0x0800, xpf_acl_create = 0x1000, xpf_acl_denied = 0x2000, }; typedef struct xml_private_s { long check; uint32_t flags; char *user; GListPtr acls; GListPtr deleted_paths; } xml_private_t; typedef struct xml_acl_s { enum xml_private_flags mode; char *xpath; } xml_acl_t; /* *INDENT-OFF* */ static filter_t filter[] = { { 0, XML_ATTR_ORIGIN }, { 0, XML_CIB_ATTR_WRITTEN }, { 0, XML_ATTR_UPDATE_ORIG }, { 0, XML_ATTR_UPDATE_CLIENT }, { 0, XML_ATTR_UPDATE_USER }, }; /* *INDENT-ON* */ static struct schema_s *known_schemas = NULL; static int xml_schema_max = 0; static xmlNode *subtract_xml_comment(xmlNode * parent, xmlNode * left, xmlNode * right, gboolean * changed); static xmlNode *find_xml_comment(xmlNode * root, xmlNode * search_comment); static int add_xml_comment(xmlNode * parent, xmlNode * target, xmlNode * update); static bool __xml_acl_check(xmlNode *xml, const char *name, enum xml_private_flags mode); const char *__xml_acl_to_text(enum xml_private_flags flags); static int xml_latest_schema_index(void) { return xml_schema_max - 4; } static int xml_minimum_schema_index(void) { static int best = 0; if(best == 0) { int lpc = 0; float target = 0.0; best = xml_latest_schema_index(); target = floor(known_schemas[best].version); for(lpc = best; lpc > 0; lpc--) { if(known_schemas[lpc].version < target) { return best; } else { best = lpc; } } best = xml_latest_schema_index(); } return best; } const char * xml_latest_schema(void) { return get_schema_name(xml_latest_schema_index()); } #define CHUNK_SIZE 1024 static inline bool TRACKING_CHANGES(xmlNode *xml) { if(xml == NULL || xml->doc == NULL || xml->doc->_private == NULL) { return FALSE; } else if(is_set(((xml_private_t *)xml->doc->_private)->flags, xpf_tracking)) { return TRUE; } return FALSE; } #define buffer_print(buffer, max, offset, fmt, args...) do { \ int rc = (max); \ if(buffer) { \ rc = snprintf((buffer) + (offset), (max) - (offset), fmt, ##args); \ } \ if(buffer && rc < 0) { \ crm_perror(LOG_ERR, "snprintf failed at offset %d", offset); \ (buffer)[(offset)] = 0; \ } else if(rc >= ((max) - (offset))) { \ char *tmp = NULL; \ (max) = QB_MAX(CHUNK_SIZE, (max) * 2); \ tmp = realloc((buffer), (max) + 1); \ CRM_ASSERT(tmp); \ (buffer) = tmp; \ } else { \ offset += rc; \ break; \ } \ } while(1); static void insert_prefix(int options, char **buffer, int *offset, int *max, int depth) { if (options & xml_log_option_formatted) { size_t spaces = 2 * depth; if ((*buffer) == NULL || spaces >= ((*max) - (*offset))) { (*max) = QB_MAX(CHUNK_SIZE, (*max) * 2); (*buffer) = realloc((*buffer), (*max) + 1); } memset((*buffer) + (*offset), ' ', spaces); (*offset) += spaces; } } static const char * get_schema_root(void) { static const char *base = NULL; if (base == NULL) { base = getenv("PCMK_schema_directory"); } if (base == NULL || strlen(base) == 0) { base = CRM_DTD_DIRECTORY; } return base; } static char * get_schema_path(const char *name, const char *file) { const char *base = get_schema_root(); if(file) { return g_strdup_printf("%s/%s", base, file); } return g_strdup_printf("%s/%s.rng", base, name); } static int schema_filter(const struct dirent * a) { int rc = 0; float version = 0; if(strstr(a->d_name, "pacemaker-") != a->d_name) { /* crm_trace("%s - wrong prefix", a->d_name); */ } else if(strstr(a->d_name, ".rng") == NULL) { /* crm_trace("%s - wrong suffix", a->d_name); */ } else if(sscanf(a->d_name, "pacemaker-%f.rng", &version) == 0) { /* crm_trace("%s - wrong format", a->d_name); */ } else if(strcmp(a->d_name, "pacemaker-1.1.rng") == 0) { /* crm_trace("%s - hack", a->d_name); */ } else { /* crm_debug("%s - candidate", a->d_name); */ rc = 1; } return rc; } static int schema_sort(const struct dirent ** a, const struct dirent **b) { int rc = 0; float a_version = 0.0; float b_version = 0.0; sscanf(a[0]->d_name, "pacemaker-%f.rng", &a_version); sscanf(b[0]->d_name, "pacemaker-%f.rng", &b_version); if(a_version > b_version) { rc = 1; } else if(a_version < b_version) { rc = -1; } /* crm_trace("%s (%f) vs. %s (%f) : %d", a[0]->d_name, a_version, b[0]->d_name, b_version, rc); */ return rc; } static void __xml_schema_add( int type, float version, const char *name, const char *location, const char *transform, int after_transform) { int last = xml_schema_max; xml_schema_max++; known_schemas = realloc(known_schemas, xml_schema_max*sizeof(struct schema_s)); CRM_ASSERT(known_schemas != NULL); memset(known_schemas+last, 0, sizeof(struct schema_s)); known_schemas[last].type = type; known_schemas[last].after_transform = after_transform; if(version > 0.0) { known_schemas[last].version = version; known_schemas[last].name = g_strdup_printf("pacemaker-%.1f", version); known_schemas[last].location = g_strdup_printf("%s.rng", known_schemas[last].name); } else { char dummy[1024]; CRM_ASSERT(name); CRM_ASSERT(location); sscanf(name, "%[^-]-%f", dummy, &version); known_schemas[last].version = version; known_schemas[last].name = strdup(name); known_schemas[last].location = strdup(location); } if(transform) { known_schemas[last].transform = strdup(transform); } if(after_transform == 0) { after_transform = xml_schema_max; } known_schemas[last].after_transform = after_transform; if(known_schemas[last].after_transform < 0) { crm_debug("Added supported schema %d: %s (%s)", last, known_schemas[last].name, known_schemas[last].location); } else if(known_schemas[last].transform) { crm_debug("Added supported schema %d: %s (%s upgrades to %d with %s)", last, known_schemas[last].name, known_schemas[last].location, known_schemas[last].after_transform, known_schemas[last].transform); } else { crm_debug("Added supported schema %d: %s (%s upgrades to %d)", last, known_schemas[last].name, known_schemas[last].location, known_schemas[last].after_transform); } } static int __xml_build_schema_list(void) { int lpc, max; const char *base = get_schema_root(); struct dirent **namelist = NULL; max = scandir(base, &namelist, schema_filter, schema_sort); __xml_schema_add(1, 0.0, "pacemaker-0.6", "crm.dtd", "upgrade06.xsl", 3); __xml_schema_add(1, 0.0, "transitional-0.6", "crm-transitional.dtd", "upgrade06.xsl", 3); __xml_schema_add(2, 0.0, "pacemaker-0.7", "pacemaker-1.0.rng", NULL, 0); if (max < 0) { crm_notice("scandir(%s) failed: %s (%d)", base, strerror(errno), errno); } else { for (lpc = 0; lpc < max; lpc++) { int next = 0; float version = 0.0; char *transform = NULL; sscanf(namelist[lpc]->d_name, "pacemaker-%f.rng", &version); if((lpc + 1) < max) { float next_version = 0.0; sscanf(namelist[lpc+1]->d_name, "pacemaker-%f.rng", &next_version); if(floor(version) < floor(next_version)) { struct stat s; char *xslt = NULL; transform = g_strdup_printf("upgrade-%.1f.xsl", version); xslt = get_schema_path(NULL, transform); if(stat(xslt, &s) != 0) { crm_err("Transform %s not found", xslt); __xml_schema_add(2, version, NULL, NULL, NULL, -1); break; } else { free(xslt); } } } else { next = -1; } __xml_schema_add(2, version, NULL, NULL, transform, next); free(namelist[lpc]); free(transform); } } /* 1.1 was the old name for -next */ __xml_schema_add(2, 0.0, "pacemaker-1.1", "pacemaker-next.rng", NULL, 0); __xml_schema_add(2, 0.0, "pacemaker-next", "pacemaker-next.rng", NULL, -1); __xml_schema_add(0, 0.0, "none", "N/A", NULL, -1); free(namelist); return TRUE; } static void set_parent_flag(xmlNode *xml, long flag) { for(; xml; xml = xml->parent) { xml_private_t *p = xml->_private; if(p == NULL) { /* During calls to xmlDocCopyNode(), _private will be unset for parent nodes */ } else { p->flags |= flag; /* crm_trace("Setting flag %x due to %s[@id=%s]", flag, xml->name, ID(xml)); */ } } } static void set_doc_flag(xmlNode *xml, long flag) { if(xml && xml->doc && xml->doc->_private){ /* During calls to xmlDocCopyNode(), xml->doc may be unset */ xml_private_t *p = xml->doc->_private; p->flags |= flag; /* crm_trace("Setting flag %x due to %s[@id=%s]", flag, xml->name, ID(xml)); */ } } static void __xml_node_dirty(xmlNode *xml) { set_doc_flag(xml, xpf_dirty); set_parent_flag(xml, xpf_dirty); } static void __xml_node_clean(xmlNode *xml) { xmlNode *cIter = NULL; xml_private_t *p = xml->_private; if(p) { p->flags = 0; } for (cIter = __xml_first_child(xml); cIter != NULL; cIter = __xml_next(cIter)) { __xml_node_clean(cIter); } } static void crm_node_created(xmlNode *xml) { xmlNode *cIter = NULL; xml_private_t *p = xml->_private; if(p && TRACKING_CHANGES(xml)) { if(is_not_set(p->flags, xpf_created)) { p->flags |= xpf_created; __xml_node_dirty(xml); } for (cIter = __xml_first_child(xml); cIter != NULL; cIter = __xml_next(cIter)) { crm_node_created(cIter); } } } static void crm_attr_dirty(xmlAttr *a) { xmlNode *parent = a->parent; xml_private_t *p = NULL; p = a->_private; p->flags |= (xpf_dirty|xpf_modified); p->flags = (p->flags & ~xpf_deleted); /* crm_trace("Setting flag %x due to %s[@id=%s, @%s=%s]", */ /* xpf_dirty, parent?parent->name:NULL, ID(parent), a->name, a->children->content); */ __xml_node_dirty(parent); } int get_tag_name(const char *input, size_t offset, size_t max); int get_attr_name(const char *input, size_t offset, size_t max); int get_attr_value(const char *input, size_t offset, size_t max); gboolean can_prune_leaf(xmlNode * xml_node); void diff_filter_context(int context, int upper_bound, int lower_bound, xmlNode * xml_node, xmlNode * parent); int in_upper_context(int depth, int context, xmlNode * xml_node); int add_xml_object(xmlNode * parent, xmlNode * target, xmlNode * update, gboolean as_diff); static inline const char * crm_attr_value(xmlAttr * attr) { if (attr == NULL || attr->children == NULL) { return NULL; } return (const char *)attr->children->content; } static inline xmlAttr * crm_first_attr(xmlNode * xml) { if (xml == NULL) { return NULL; } return xml->properties; } #define XML_PRIVATE_MAGIC (long) 0x81726354 static void __xml_acl_free(void *data) { if(data) { xml_acl_t *acl = data; free(acl->xpath); free(acl); } } static void __xml_private_clean(xml_private_t *p) { if(p) { CRM_ASSERT(p->check == XML_PRIVATE_MAGIC); free(p->user); p->user = NULL; if(p->acls) { g_list_free_full(p->acls, __xml_acl_free); p->acls = NULL; } if(p->deleted_paths) { g_list_free_full(p->deleted_paths, free); p->deleted_paths = NULL; } } } static void __xml_private_free(xml_private_t *p) { __xml_private_clean(p); free(p); } static void pcmkDeregisterNode(xmlNodePtr node) { __xml_private_free(node->_private); } static void pcmkRegisterNode(xmlNodePtr node) { xml_private_t *p = NULL; switch(node->type) { case XML_ELEMENT_NODE: case XML_DOCUMENT_NODE: case XML_ATTRIBUTE_NODE: case XML_COMMENT_NODE: p = calloc(1, sizeof(xml_private_t)); p->check = XML_PRIVATE_MAGIC; /* Flags will be reset if necessary when tracking is enabled */ p->flags |= (xpf_dirty|xpf_created); node->_private = p; break; case XML_TEXT_NODE: case XML_DTD_NODE: break; default: /* Ignore */ crm_trace("Ignoring %p %d", node, node->type); CRM_LOG_ASSERT(node->type == XML_ELEMENT_NODE); break; } if(p && TRACKING_CHANGES(node)) { /* XML_ELEMENT_NODE doesn't get picked up here, node->doc is * not hooked up at the point we are called */ set_doc_flag(node, xpf_dirty); __xml_node_dirty(node); } } static xml_acl_t * __xml_acl_create(xmlNode * xml, xmlNode *target, enum xml_private_flags mode) { xml_acl_t *acl = NULL; xml_private_t *p = NULL; const char *tag = crm_element_value(xml, XML_ACL_ATTR_TAG); const char *ref = crm_element_value(xml, XML_ACL_ATTR_REF); const char *xpath = crm_element_value(xml, XML_ACL_ATTR_XPATH); if(tag == NULL) { /* Compatability handling for pacemaker < 1.1.12 */ tag = crm_element_value(xml, XML_ACL_ATTR_TAGv1); } if(ref == NULL) { /* Compatability handling for pacemaker < 1.1.12 */ ref = crm_element_value(xml, XML_ACL_ATTR_REFv1); } if(target == NULL || target->doc == NULL || target->doc->_private == NULL){ CRM_ASSERT(target); CRM_ASSERT(target->doc); CRM_ASSERT(target->doc->_private); return NULL; } else if (tag == NULL && ref == NULL && xpath == NULL) { crm_trace("No criteria %p", xml); return NULL; } p = target->doc->_private; acl = calloc(1, sizeof(xml_acl_t)); if (acl) { const char *attr = crm_element_value(xml, XML_ACL_ATTR_ATTRIBUTE); acl->mode = mode; if(xpath) { acl->xpath = strdup(xpath); crm_trace("Using xpath: %s", acl->xpath); } else { int offset = 0; char buffer[XML_BUFFER_SIZE]; if(tag) { offset += snprintf(buffer + offset, XML_BUFFER_SIZE - offset, "//%s", tag); } else { /* Is this even legal xpath syntax? */ offset += snprintf(buffer + offset, XML_BUFFER_SIZE - offset, "*"); } if(ref || attr) { offset += snprintf(buffer + offset, XML_BUFFER_SIZE - offset, "["); } if(ref) { offset += snprintf(buffer + offset, XML_BUFFER_SIZE - offset, "@id='%s'", ref); } if(ref && attr) { offset += snprintf(buffer + offset, XML_BUFFER_SIZE - offset, " and "); } if(attr) { offset += snprintf(buffer + offset, XML_BUFFER_SIZE - offset, "@%s", attr); } if(ref || attr) { offset += snprintf(buffer + offset, XML_BUFFER_SIZE - offset, "]"); } CRM_LOG_ASSERT(offset > 0); acl->xpath = strdup(buffer); crm_trace("Built xpath: %s", acl->xpath); } p->acls = g_list_append(p->acls, acl); } return acl; } static gboolean __xml_acl_parse_entry(xmlNode * acl_top, xmlNode * acl_entry, xmlNode *target) { xmlNode *child = NULL; for (child = __xml_first_child(acl_entry); child; child = __xml_next(child)) { const char *tag = crm_element_name(child); const char *kind = crm_element_value(child, XML_ACL_ATTR_KIND); if (strcmp(XML_ACL_TAG_PERMISSION, tag) == 0){ tag = kind; } crm_trace("Processing %s %p", tag, child); if(tag == NULL) { CRM_ASSERT(tag != NULL); } else if (strcmp(XML_ACL_TAG_ROLE_REF, tag) == 0 || strcmp(XML_ACL_TAG_ROLE_REFv1, tag) == 0) { const char *ref_role = crm_element_value(child, XML_ATTR_ID); if (ref_role) { xmlNode *role = NULL; for (role = __xml_first_child(acl_top); role; role = __xml_next(role)) { if (strcmp(XML_ACL_TAG_ROLE, (const char *)role->name) == 0) { const char *role_id = crm_element_value(role, XML_ATTR_ID); if (role_id && strcmp(ref_role, role_id) == 0) { crm_debug("Unpacking referenced role: %s", role_id); __xml_acl_parse_entry(acl_top, role, target); break; } } } } } else if (strcmp(XML_ACL_TAG_READ, tag) == 0) { __xml_acl_create(child, target, xpf_acl_read); } else if (strcmp(XML_ACL_TAG_WRITE, tag) == 0) { __xml_acl_create(child, target, xpf_acl_write); } else if (strcmp(XML_ACL_TAG_DENY, tag) == 0) { __xml_acl_create(child, target, xpf_acl_deny); } else { crm_warn("Unknown ACL entry: %s/%s", tag, kind); } } return TRUE; } /* */ const char * __xml_acl_to_text(enum xml_private_flags flags) { if(is_set(flags, xpf_acl_deny)) { return "deny"; } if(is_set(flags, xpf_acl_write)) { return "read/write"; } if(is_set(flags, xpf_acl_read)) { return "read"; } return "none"; } static void __xml_acl_apply(xmlNode *xml) { GListPtr aIter = NULL; xml_private_t *p = NULL; xmlXPathObjectPtr xpathObj = NULL; if(xml_acl_enabled(xml) == FALSE) { p = xml->doc->_private; crm_trace("Not applying ACLs for %s", p->user); return; } p = xml->doc->_private; for(aIter = p->acls; aIter != NULL; aIter = aIter->next) { int max = 0, lpc = 0; xml_acl_t *acl = aIter->data; xpathObj = xpath_search(xml, acl->xpath); max = numXpathResults(xpathObj); for(lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); char *path = xml_get_path(match); p = match->_private; crm_trace("Applying %x to %s for %s", acl->mode, path, acl->xpath); #ifdef SUSE_ACL_COMPAT if(is_not_set(p->flags, acl->mode)) { if(is_set(p->flags, xpf_acl_read) || is_set(p->flags, xpf_acl_write) || is_set(p->flags, xpf_acl_deny)) { crm_config_warn("Configuration element %s is matched by multiple ACL rules, only the first applies ('%s' wins over '%s')", path, __xml_acl_to_text(p->flags), __xml_acl_to_text(acl->mode)); free(path); continue; } } #endif p->flags |= acl->mode; free(path); } crm_trace("Now enforcing ACL: %s (%d matches)", acl->xpath, max); freeXpathObject(xpathObj); } p = xml->_private; if(is_not_set(p->flags, xpf_acl_read) && is_not_set(p->flags, xpf_acl_write)) { p->flags |= xpf_acl_deny; p = xml->doc->_private; crm_info("Enforcing default ACL for %s to %s", p->user, crm_element_name(xml)); } } static void __xml_acl_unpack(xmlNode *source, xmlNode *target, const char *user) { #if ENABLE_ACL xml_private_t *p = NULL; if(target == NULL || target->doc == NULL || target->doc->_private == NULL) { return; } p = target->doc->_private; if(pcmk_acl_required(user) == FALSE) { crm_trace("no acls needed for '%s'", user); } else if(p->acls == NULL) { xmlNode *acls = get_xpath_object("//"XML_CIB_TAG_ACLS, source, LOG_TRACE); free(p->user); p->user = strdup(user); if(acls) { xmlNode *child = NULL; for (child = __xml_first_child(acls); child; child = __xml_next(child)) { const char *tag = crm_element_name(child); if (strcmp(tag, XML_ACL_TAG_USER) == 0 || strcmp(tag, XML_ACL_TAG_USERv1) == 0) { const char *id = crm_element_value(child, XML_ATTR_ID); if(id && strcmp(id, user) == 0) { crm_debug("Unpacking ACLs for %s", id); __xml_acl_parse_entry(acls, child, target); } } } } } #endif } static inline bool __xml_acl_mode_test(enum xml_private_flags allowed, enum xml_private_flags requested) { if(is_set(allowed, xpf_acl_deny)) { return FALSE; } else if(is_set(allowed, requested)) { return TRUE; } else if(is_set(requested, xpf_acl_read) && is_set(allowed, xpf_acl_write)) { return TRUE; } else if(is_set(requested, xpf_acl_create) && is_set(allowed, xpf_acl_write)) { return TRUE; } else if(is_set(requested, xpf_acl_create) && is_set(allowed, xpf_created)) { return TRUE; } return FALSE; } /* rc = TRUE if orig_cib has been filtered * That means '*result' rather than 'xml' should be exploited afterwards */ static bool __xml_purge_attributes(xmlNode *xml) { xmlNode *child = NULL; xmlAttr *xIter = NULL; bool readable_children = FALSE; xml_private_t *p = xml->_private; if(__xml_acl_mode_test(p->flags, xpf_acl_read)) { crm_trace("%s is readable", crm_element_name(xml), ID(xml)); return TRUE; } xIter = crm_first_attr(xml); while(xIter != NULL) { xmlAttr *tmp = xIter; const char *prop_name = (const char *)xIter->name; xIter = xIter->next; if (strcmp(prop_name, XML_ATTR_ID) == 0) { continue; } xmlUnsetProp(xml, tmp->name); } child = __xml_first_child(xml); while ( child != NULL ) { xmlNode *tmp = child; child = __xml_next(child); readable_children |= __xml_purge_attributes(tmp); } if(readable_children == FALSE) { free_xml(xml); /* Nothing readable under here, purge completely */ } return readable_children; } bool xml_acl_filtered_copy(const char *user, xmlNode* acl_source, xmlNode *xml, xmlNode ** result) { GListPtr aIter = NULL; xmlNode *target = NULL; xml_private_t *p = NULL; xml_private_t *doc = NULL; *result = NULL; if(xml == NULL || pcmk_acl_required(user) == FALSE) { crm_trace("no acls needed for '%s'", user); return FALSE; } crm_trace("filtering copy of %p for '%s'", xml, user); target = copy_xml(xml); if(target == NULL) { return TRUE; } __xml_acl_unpack(acl_source, target, user); set_doc_flag(target, xpf_acl_enabled); __xml_acl_apply(target); doc = target->doc->_private; for(aIter = doc->acls; aIter != NULL && target; aIter = aIter->next) { int max = 0; xml_acl_t *acl = aIter->data; if(acl->mode != xpf_acl_deny) { /* Nothing to do */ } else if(acl->xpath) { int lpc = 0; xmlXPathObjectPtr xpathObj = xpath_search(target, acl->xpath); max = numXpathResults(xpathObj); for(lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); crm_trace("Purging attributes from %s", acl->xpath); if(__xml_purge_attributes(match) == FALSE && match == target) { crm_trace("No access to the entire document for %s", user); freeXpathObject(xpathObj); return TRUE; } } crm_trace("Enforced ACL %s (%d matches)", acl->xpath, max); freeXpathObject(xpathObj); } } p = target->_private; if(is_set(p->flags, xpf_acl_deny) && __xml_purge_attributes(target) == FALSE) { crm_trace("No access to the entire document for %s", user); return TRUE; } if(doc->acls) { g_list_free_full(doc->acls, __xml_acl_free); doc->acls = NULL; } else { crm_trace("Ordinary user '%s' cannot access the CIB without any defined ACLs", doc->user); free_xml(target); target = NULL; } if(target) { *result = target; } return TRUE; } static void __xml_acl_post_process(xmlNode * xml) { xmlNode *cIter = __xml_first_child(xml); xml_private_t *p = xml->_private; if(is_set(p->flags, xpf_created)) { xmlAttr *xIter = NULL; /* Always allow new scaffolding, ie. node with no attributes or only an 'id' */ for (xIter = crm_first_attr(xml); xIter != NULL; xIter = xIter->next) { const char *prop_name = (const char *)xIter->name; if (strcmp(prop_name, XML_ATTR_ID) == 0) { /* Delay the acl check */ continue; } else if(__xml_acl_check(xml, NULL, xpf_acl_write)) { crm_trace("Creation of %s=%s is allowed", crm_element_name(xml), ID(xml)); break; } else { char *path = xml_get_path(xml); crm_trace("Cannot add new node %s at %s", crm_element_name(xml), path); if(xml != xmlDocGetRootElement(xml->doc)) { xmlUnlinkNode(xml); xmlFreeNode(xml); } free(path); return; } } } while (cIter != NULL) { xmlNode *child = cIter; cIter = __xml_next(cIter); /* In case it is free'd */ __xml_acl_post_process(child); } } bool xml_acl_denied(xmlNode *xml) { if(xml && xml->doc && xml->doc->_private){ xml_private_t *p = xml->doc->_private; return is_set(p->flags, xpf_acl_denied); } return FALSE; } void xml_acl_disable(xmlNode *xml) { if(xml_acl_enabled(xml)) { xml_private_t *p = xml->doc->_private; /* Catch anything that was created but shouldn't have been */ __xml_acl_apply(xml); __xml_acl_post_process(xml); clear_bit(p->flags, xpf_acl_enabled); } } bool xml_acl_enabled(xmlNode *xml) { if(xml && xml->doc && xml->doc->_private){ xml_private_t *p = xml->doc->_private; return is_set(p->flags, xpf_acl_enabled); } return FALSE; } void xml_track_changes(xmlNode * xml, const char *user, xmlNode *acl_source, bool enforce_acls) { xml_accept_changes(xml); crm_trace("Tracking changes%s to %p", enforce_acls?" with ACLs":"", xml); set_doc_flag(xml, xpf_tracking); if(enforce_acls) { if(acl_source == NULL) { acl_source = xml; } set_doc_flag(xml, xpf_acl_enabled); __xml_acl_unpack(acl_source, xml, user); __xml_acl_apply(xml); } } bool xml_tracking_changes(xmlNode * xml) { if(xml == NULL) { return FALSE; } else if(is_set(((xml_private_t *)xml->doc->_private)->flags, xpf_tracking)) { return TRUE; } return FALSE; } bool xml_document_dirty(xmlNode *xml) { if(xml != NULL && xml->doc && xml->doc->_private) { xml_private_t *doc = xml->doc->_private; return is_set(doc->flags, xpf_dirty); } return FALSE; } /* */ static int __xml_offset(xmlNode *xml) { int position = 0; xmlNode *cIter = NULL; for(cIter = xml; cIter->prev; cIter = cIter->prev) { xml_private_t *p = ((xmlNode*)cIter->prev)->_private; if(is_not_set(p->flags, xpf_skip)) { position++; } } return position; } static int __xml_offset_no_deletions(xmlNode *xml) { int position = 0; xmlNode *cIter = NULL; for(cIter = xml; cIter->prev; cIter = cIter->prev) { xml_private_t *p = ((xmlNode*)cIter->prev)->_private; if(is_not_set(p->flags, xpf_deleted)) { position++; } } return position; } static void __xml_build_changes(xmlNode * xml, xmlNode *patchset) { xmlNode *cIter = NULL; xmlAttr *pIter = NULL; xmlNode *change = NULL; xml_private_t *p = xml->_private; if(patchset && is_set(p->flags, xpf_created)) { int offset = 0; char buffer[XML_BUFFER_SIZE]; if(__get_prefix(NULL, xml->parent, buffer, offset) > 0) { int position = __xml_offset_no_deletions(xml); change = create_xml_node(patchset, XML_DIFF_CHANGE); crm_xml_add(change, XML_DIFF_OP, "create"); crm_xml_add(change, XML_DIFF_PATH, buffer); crm_xml_add_int(change, XML_DIFF_POSITION, position); add_node_copy(change, xml); } return; } for (pIter = crm_first_attr(xml); pIter != NULL; pIter = pIter->next) { xmlNode *attr = NULL; p = pIter->_private; if(is_not_set(p->flags, xpf_deleted) && is_not_set(p->flags, xpf_dirty)) { continue; } if(change == NULL) { int offset = 0; char buffer[XML_BUFFER_SIZE]; if(__get_prefix(NULL, xml, buffer, offset) > 0) { change = create_xml_node(patchset, XML_DIFF_CHANGE); crm_xml_add(change, XML_DIFF_OP, "modify"); crm_xml_add(change, XML_DIFF_PATH, buffer); change = create_xml_node(change, XML_DIFF_LIST); } } attr = create_xml_node(change, XML_DIFF_ATTR); crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, (const char *)pIter->name); if(p->flags & xpf_deleted) { crm_xml_add(attr, XML_DIFF_OP, "unset"); } else { const char *value = crm_element_value(xml, (const char *)pIter->name); crm_xml_add(attr, XML_DIFF_OP, "set"); crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, value); } } if(change) { xmlNode *result = NULL; change = create_xml_node(change->parent, XML_DIFF_RESULT); result = create_xml_node(change, (const char *)xml->name); for (pIter = crm_first_attr(xml); pIter != NULL; pIter = pIter->next) { const char *value = crm_element_value(xml, (const char *)pIter->name); crm_xml_add(result, (const char *)pIter->name, value); } } for (cIter = __xml_first_child(xml); cIter != NULL; cIter = __xml_next(cIter)) { __xml_build_changes(cIter, patchset); } p = xml->_private; if(patchset && is_set(p->flags, xpf_moved)) { int offset = 0; char buffer[XML_BUFFER_SIZE]; crm_trace("%s.%s moved to position %d", xml->name, ID(xml), __xml_offset(xml)); if(__get_prefix(NULL, xml, buffer, offset) > 0) { change = create_xml_node(patchset, XML_DIFF_CHANGE); crm_xml_add(change, XML_DIFF_OP, "move"); crm_xml_add(change, XML_DIFF_PATH, buffer); crm_xml_add_int(change, XML_DIFF_POSITION, __xml_offset_no_deletions(xml)); } } } static void __xml_accept_changes(xmlNode * xml) { xmlNode *cIter = NULL; xmlAttr *pIter = NULL; xml_private_t *p = xml->_private; p->flags = xpf_none; pIter = crm_first_attr(xml); while (pIter != NULL) { const xmlChar *name = pIter->name; p = pIter->_private; pIter = pIter->next; if(p->flags & xpf_deleted) { xml_remove_prop(xml, (const char *)name); } else { p->flags = xpf_none; } } for (cIter = __xml_first_child(xml); cIter != NULL; cIter = __xml_next(cIter)) { __xml_accept_changes(cIter); } } static bool is_config_change(xmlNode *xml) { GListPtr gIter = NULL; xml_private_t *p = NULL; xmlNode *config = first_named_child(xml, XML_CIB_TAG_CONFIGURATION); if(config) { p = config->_private; } if(p && is_set(p->flags, xpf_dirty)) { return TRUE; } if(xml->doc && xml->doc->_private) { p = xml->doc->_private; for(gIter = p->deleted_paths; gIter; gIter = gIter->next) { char *path = gIter->data; if(strstr(path, "/"XML_TAG_CIB"/"XML_CIB_TAG_CONFIGURATION) != NULL) { return TRUE; } } } return FALSE; } static void xml_repair_v1_diff(xmlNode * last, xmlNode * next, xmlNode * local_diff, gboolean changed) { int lpc = 0; xmlNode *cib = NULL; xmlNode *diff_child = NULL; const char *tag = NULL; const char *vfields[] = { XML_ATTR_GENERATION_ADMIN, XML_ATTR_GENERATION, XML_ATTR_NUMUPDATES, }; if (local_diff == NULL) { crm_trace("Nothing to do"); return; } tag = "diff-removed"; diff_child = find_xml_node(local_diff, tag, FALSE); if (diff_child == NULL) { diff_child = create_xml_node(local_diff, tag); } tag = XML_TAG_CIB; cib = find_xml_node(diff_child, tag, FALSE); if (cib == NULL) { cib = create_xml_node(diff_child, tag); } for(lpc = 0; last && lpc < DIMOF(vfields); lpc++){ const char *value = crm_element_value(last, vfields[lpc]); crm_xml_add(diff_child, vfields[lpc], value); if(changed || lpc == 2) { crm_xml_add(cib, vfields[lpc], value); } } tag = "diff-added"; diff_child = find_xml_node(local_diff, tag, FALSE); if (diff_child == NULL) { diff_child = create_xml_node(local_diff, tag); } tag = XML_TAG_CIB; cib = find_xml_node(diff_child, tag, FALSE); if (cib == NULL) { cib = create_xml_node(diff_child, tag); } for(lpc = 0; next && lpc < DIMOF(vfields); lpc++){ const char *value = crm_element_value(next, vfields[lpc]); crm_xml_add(diff_child, vfields[lpc], value); } if (next) { xmlAttrPtr xIter = NULL; for (xIter = next->properties; xIter; xIter = xIter->next) { const char *p_name = (const char *)xIter->name; const char *p_value = crm_element_value(next, p_name); xmlSetProp(cib, (const xmlChar *)p_name, (const xmlChar *)p_value); } } crm_log_xml_explicit(local_diff, "Repaired-diff"); } static xmlNode * xml_create_patchset_v1(xmlNode *source, xmlNode *target, bool config) { xmlNode *patchset = diff_xml_object(source, target, TRUE); if(patchset) { CRM_LOG_ASSERT(xml_document_dirty(target)); xml_repair_v1_diff(source, target, patchset, config); crm_xml_add(patchset, "format", "1"); } return patchset; } static xmlNode * xml_create_patchset_v2(xmlNode *source, xmlNode *target) { int lpc = 0; GListPtr gIter = NULL; xml_private_t *doc = NULL; xmlNode *v = NULL; xmlNode *version = NULL; xmlNode *patchset = NULL; const char *vfields[] = { XML_ATTR_GENERATION_ADMIN, XML_ATTR_GENERATION, XML_ATTR_NUMUPDATES, }; CRM_ASSERT(target); if(xml_document_dirty(target) == FALSE) { return NULL; } CRM_ASSERT(target->doc); doc = target->doc->_private; patchset = create_xml_node(NULL, XML_TAG_DIFF); crm_xml_add_int(patchset, "format", 2); version = create_xml_node(patchset, XML_DIFF_VERSION); v = create_xml_node(version, XML_DIFF_VSOURCE); for(lpc = 0; lpc < DIMOF(vfields); lpc++){ const char *value = crm_element_value(source, vfields[lpc]); if(value == NULL) { value = "1"; } crm_xml_add(v, vfields[lpc], value); } v = create_xml_node(version, XML_DIFF_VTARGET); for(lpc = 0; lpc < DIMOF(vfields); lpc++){ const char *value = crm_element_value(target, vfields[lpc]); if(value == NULL) { value = "1"; } crm_xml_add(v, vfields[lpc], value); } for(gIter = doc->deleted_paths; gIter; gIter = gIter->next) { xmlNode *change = create_xml_node(patchset, XML_DIFF_CHANGE); crm_xml_add(change, XML_DIFF_OP, "delete"); crm_xml_add(change, XML_DIFF_PATH, gIter->data); } __xml_build_changes(target, patchset); return patchset; } static gboolean patch_legacy_mode(void) { static gboolean init = TRUE; static gboolean legacy = FALSE; if(init) { init = FALSE; legacy = daemon_option_enabled("cib", "legacy"); if(legacy) { crm_notice("Enabled legacy mode"); } } return legacy; } xmlNode * xml_create_patchset(int format, xmlNode *source, xmlNode *target, bool *config_changed, bool manage_version, bool with_digest) { int counter = 0; bool config = FALSE; xmlNode *patch = NULL; const char *version = crm_element_value(source, XML_ATTR_CRM_VERSION); xml_acl_disable(target); if(xml_document_dirty(target) == FALSE) { crm_trace("No change %d", format); return NULL; /* No change */ } config = is_config_change(target); if(config_changed) { *config_changed = config; } if(manage_version && config) { crm_trace("Config changed %d", format); crm_xml_add(target, XML_ATTR_NUMUPDATES, "0"); crm_element_value_int(target, XML_ATTR_GENERATION, &counter); crm_xml_add_int(target, XML_ATTR_GENERATION, counter+1); } else if(manage_version) { crm_trace("Status changed %d", format); crm_element_value_int(target, XML_ATTR_NUMUPDATES, &counter); crm_xml_add_int(target, XML_ATTR_NUMUPDATES, counter+1); } if(format == 0) { if(patch_legacy_mode()) { format = 1; } else if(compare_version("3.0.8", version) < 0) { format = 2; } else { format = 1; } crm_trace("Using patch format %d for version: %s", format, version); } switch(format) { case 1: patch = xml_create_patchset_v1(source, target, config); with_digest = TRUE; break; case 2: patch = xml_create_patchset_v2(source, target); break; default: crm_err("Unknown patch format: %d", format); return NULL; } if(patch && with_digest) { char *digest = calculate_xml_versioned_digest(target, FALSE, TRUE, version); crm_xml_add(patch, XML_ATTR_DIGEST, digest); free(digest); } return patch; } static void __xml_log_element(int log_level, const char *file, const char *function, int line, const char *prefix, xmlNode * data, int depth, int options); void xml_log_patchset(uint8_t log_level, const char *function, xmlNode * patchset) { int format = 1; xmlNode *child = NULL; xmlNode *added = NULL; xmlNode *removed = NULL; gboolean is_first = TRUE; int add[] = { 0, 0, 0 }; int del[] = { 0, 0, 0 }; const char *fmt = NULL; const char *digest = NULL; int options = xml_log_option_formatted; static struct qb_log_callsite *patchset_cs = NULL; if (patchset_cs == NULL) { patchset_cs = qb_log_callsite_get(function, __FILE__, "xml-patchset", log_level, __LINE__, 0); } if (patchset == NULL) { crm_trace("Empty patch"); return; } else if (log_level == 0) { /* Log to stdout */ } else if (crm_is_callsite_active(patchset_cs, log_level, 0) == FALSE) { return; } xml_patch_versions(patchset, add, del); fmt = crm_element_value(patchset, "format"); digest = crm_element_value(patchset, XML_ATTR_DIGEST); if (add[2] != del[2] || add[1] != del[1] || add[0] != del[0]) { do_crm_log_alias(log_level, __FILE__, function, __LINE__, "Diff: --- %d.%d.%d %s", del[0], del[1], del[2], fmt); do_crm_log_alias(log_level, __FILE__, function, __LINE__, "Diff: +++ %d.%d.%d %s", add[0], add[1], add[2], digest); } else if (patchset != NULL && (add[0] || add[1] || add[2])) { do_crm_log_alias(log_level, __FILE__, function, __LINE__, "%s: Local-only Change: %d.%d.%d", function ? function : "", add[0], add[1], add[2]); } crm_element_value_int(patchset, "format", &format); if(format == 2) { xmlNode *change = NULL; for (change = __xml_first_child(patchset); change != NULL; change = __xml_next(change)) { const char *op = crm_element_value(change, XML_DIFF_OP); const char *xpath = crm_element_value(change, XML_DIFF_PATH); if(op == NULL) { } else if(strcmp(op, "create") == 0) { int lpc = 0, max = 0; char *prefix = g_strdup_printf("++ %s: ", xpath); max = strlen(prefix); __xml_log_element(log_level, __FILE__, function, __LINE__, prefix, change->children, 0, xml_log_option_formatted|xml_log_option_open); for(lpc = 2; lpc < max; lpc++) { prefix[lpc] = ' '; } __xml_log_element(log_level, __FILE__, function, __LINE__, prefix, change->children, 0, xml_log_option_formatted|xml_log_option_close|xml_log_option_children); free(prefix); } else if(strcmp(op, "move") == 0) { do_crm_log_alias(log_level, __FILE__, function, __LINE__, "+~ %s moved to offset %s", xpath, crm_element_value(change, XML_DIFF_POSITION)); } else if(strcmp(op, "modify") == 0) { xmlNode *clist = first_named_child(change, XML_DIFF_LIST); char buffer_set[XML_BUFFER_SIZE]; char buffer_unset[XML_BUFFER_SIZE]; int o_set = 0; int o_unset = 0; buffer_set[0] = 0; buffer_unset[0] = 0; for (child = __xml_first_child(clist); child != NULL; child = __xml_next(child)) { const char *name = crm_element_value(child, "name"); op = crm_element_value(child, XML_DIFF_OP); if(op == NULL) { } else if(strcmp(op, "set") == 0) { const char *value = crm_element_value(child, "value"); if(o_set > 0) { o_set += snprintf(buffer_set + o_set, XML_BUFFER_SIZE - o_set, ", "); } o_set += snprintf(buffer_set + o_set, XML_BUFFER_SIZE - o_set, "@%s=%s", name, value); } else if(strcmp(op, "unset") == 0) { if(o_unset > 0) { o_unset += snprintf(buffer_unset + o_unset, XML_BUFFER_SIZE - o_unset, ", "); } o_unset += snprintf(buffer_unset + o_unset, XML_BUFFER_SIZE - o_unset, "@%s", name); } } if(o_set) { do_crm_log_alias(log_level, __FILE__, function, __LINE__, "+ %s: %s", xpath, buffer_set); } if(o_unset) { do_crm_log_alias(log_level, __FILE__, function, __LINE__, "-- %s: %s", xpath, buffer_unset); } } else if(strcmp(op, "delete") == 0) { do_crm_log_alias(log_level, __FILE__, function, __LINE__, "-- %s", xpath); } } return; } if (log_level < LOG_DEBUG || function == NULL) { options |= xml_log_option_diff_short; } removed = find_xml_node(patchset, "diff-removed", FALSE); for (child = __xml_first_child(removed); child != NULL; child = __xml_next(child)) { log_data_element(log_level, __FILE__, function, __LINE__, "- ", child, 0, options | xml_log_option_diff_minus); if (is_first) { is_first = FALSE; } else { do_crm_log_alias(log_level, __FILE__, function, __LINE__, " --- "); } } is_first = TRUE; added = find_xml_node(patchset, "diff-added", FALSE); for (child = __xml_first_child(added); child != NULL; child = __xml_next(child)) { log_data_element(log_level, __FILE__, function, __LINE__, "+ ", child, 0, options | xml_log_option_diff_plus); if (is_first) { is_first = FALSE; } else { do_crm_log_alias(log_level, __FILE__, function, __LINE__, " +++ "); } } } void xml_log_changes(uint8_t log_level, const char *function, xmlNode * xml) { GListPtr gIter = NULL; xml_private_t *doc = NULL; CRM_ASSERT(xml); CRM_ASSERT(xml->doc); doc = xml->doc->_private; if(is_not_set(doc->flags, xpf_dirty)) { return; } for(gIter = doc->deleted_paths; gIter; gIter = gIter->next) { do_crm_log_alias(log_level, __FILE__, function, __LINE__, "-- %s", (char*)gIter->data); } log_data_element(log_level, __FILE__, function, __LINE__, "+ ", xml, 0, xml_log_option_formatted|xml_log_option_dirty_add); } void xml_accept_changes(xmlNode * xml) { xmlNode *top = NULL; xml_private_t *doc = NULL; if(xml == NULL) { return; } crm_trace("Accepting changes to %p", xml); doc = xml->doc->_private; top = xmlDocGetRootElement(xml->doc); __xml_private_clean(xml->doc->_private); if(is_not_set(doc->flags, xpf_dirty)) { doc->flags = xpf_none; return; } doc->flags = xpf_none; __xml_accept_changes(top); } /* Simplified version for applying v1-style XML patches */ static void __subtract_xml_object(xmlNode * target, xmlNode * patch) { xmlNode *patch_child = NULL; xmlNode *cIter = NULL; xmlAttrPtr xIter = NULL; char *id = NULL; const char *name = NULL; const char *value = NULL; if (target == NULL || patch == NULL) { return; } if (target->type == XML_COMMENT_NODE) { gboolean dummy; subtract_xml_comment(target->parent, target, patch, &dummy); } name = crm_element_name(target); CRM_CHECK(name != NULL, return); CRM_CHECK(safe_str_eq(crm_element_name(target), crm_element_name(patch)), return); CRM_CHECK(safe_str_eq(ID(target), ID(patch)), return); /* check for XML_DIFF_MARKER in a child */ id = crm_element_value_copy(target, XML_ATTR_ID); value = crm_element_value(patch, XML_DIFF_MARKER); if (value != NULL && strcmp(value, "removed:top") == 0) { crm_trace("We are the root of the deletion: %s.id=%s", name, id); free_xml(target); free(id); return; } for (xIter = crm_first_attr(patch); xIter != NULL; xIter = xIter->next) { const char *p_name = (const char *)xIter->name; xml_remove_prop(target, p_name); } /* Restore the id field, it is never allowed to change */ crm_xml_add(target, XML_ATTR_ID, id); /* changes to child objects */ cIter = __xml_first_child(target); while (cIter) { xmlNode *target_child = cIter; cIter = __xml_next(cIter); if (target_child->type == XML_COMMENT_NODE) { patch_child = find_xml_comment(patch, target_child); } else { patch_child = find_entity(patch, crm_element_name(target_child), ID(target_child)); } __subtract_xml_object(target_child, patch_child); } free(id); } static void __add_xml_object(xmlNode * parent, xmlNode * target, xmlNode * patch) { xmlNode *patch_child = NULL; xmlNode *target_child = NULL; xmlAttrPtr xIter = NULL; const char *id = NULL; const char *name = NULL; const char *value = NULL; if (patch == NULL) { return; } else if (parent == NULL && target == NULL) { return; } /* check for XML_DIFF_MARKER in a child */ value = crm_element_value(patch, XML_DIFF_MARKER); if (target == NULL && value != NULL && strcmp(value, "added:top") == 0) { id = ID(patch); name = crm_element_name(patch); crm_trace("We are the root of the addition: %s.id=%s", name, id); add_node_copy(parent, patch); return; } else if(target == NULL) { id = ID(patch); name = crm_element_name(patch); crm_err("Could not locate: %s.id=%s", name, id); return; } if (target->type == XML_COMMENT_NODE) { add_xml_comment(parent, target, patch); } name = crm_element_name(target); CRM_CHECK(name != NULL, return); CRM_CHECK(safe_str_eq(crm_element_name(target), crm_element_name(patch)), return); CRM_CHECK(safe_str_eq(ID(target), ID(patch)), return); for (xIter = crm_first_attr(patch); xIter != NULL; xIter = xIter->next) { const char *p_name = (const char *)xIter->name; const char *p_value = crm_element_value(patch, p_name); xml_remove_prop(target, p_name); /* Preserve the patch order */ crm_xml_add(target, p_name, p_value); } /* changes to child objects */ for (patch_child = __xml_first_child(patch); patch_child != NULL; patch_child = __xml_next(patch_child)) { if (patch_child->type == XML_COMMENT_NODE) { target_child = find_xml_comment(target, patch_child); } else { target_child = find_entity(target, crm_element_name(patch_child), ID(patch_child)); } __add_xml_object(target, target_child, patch_child); } } bool xml_patch_versions(xmlNode *patchset, int add[3], int del[3]) { int lpc = 0; int format = 1; xmlNode *tmp = NULL; const char *vfields[] = { XML_ATTR_GENERATION_ADMIN, XML_ATTR_GENERATION, XML_ATTR_NUMUPDATES, }; crm_element_value_int(patchset, "format", &format); switch(format) { case 1: tmp = find_xml_node(patchset, "diff-removed", FALSE); tmp = find_xml_node(tmp, "cib", FALSE); if(tmp == NULL) { /* Revert to the diff-removed line */ tmp = find_xml_node(patchset, "diff-removed", FALSE); } break; case 2: tmp = find_xml_node(patchset, "version", FALSE); tmp = find_xml_node(tmp, "source", FALSE); break; default: crm_warn("Unknown patch format: %d", format); return -EINVAL; } - for(lpc = 0; lpc < DIMOF(vfields); lpc++) { - crm_element_value_int(tmp, vfields[lpc], &(del[lpc])); - crm_trace("Got %d for del[%s]", del[lpc], vfields[lpc]); + if (tmp) { + for(lpc = 0; lpc < DIMOF(vfields); lpc++) { + crm_element_value_int(tmp, vfields[lpc], &(del[lpc])); + crm_trace("Got %d for del[%s]", del[lpc], vfields[lpc]); + } } switch(format) { case 1: tmp = find_xml_node(patchset, "diff-added", FALSE); tmp = find_xml_node(tmp, "cib", FALSE); if(tmp == NULL) { /* Revert to the diff-added line */ tmp = find_xml_node(patchset, "diff-added", FALSE); } break; case 2: tmp = find_xml_node(patchset, "version", FALSE); tmp = find_xml_node(tmp, "target", FALSE); break; default: crm_warn("Unknown patch format: %d", format); return -EINVAL; } - for(lpc = 0; lpc < DIMOF(vfields); lpc++) { - crm_element_value_int(tmp, vfields[lpc], &(add[lpc])); - crm_trace("Got %d for add[%s]", add[lpc], vfields[lpc]); + if (tmp) { + for(lpc = 0; lpc < DIMOF(vfields); lpc++) { + crm_element_value_int(tmp, vfields[lpc], &(add[lpc])); + crm_trace("Got %d for add[%s]", add[lpc], vfields[lpc]); + } } return pcmk_ok; } static int xml_patch_version_check(xmlNode *xml, xmlNode *patchset, int format) { int lpc = 0; bool changed = FALSE; int this[] = { 0, 0, 0 }; int add[] = { 0, 0, 0 }; int del[] = { 0, 0, 0 }; const char *vfields[] = { XML_ATTR_GENERATION_ADMIN, XML_ATTR_GENERATION, XML_ATTR_NUMUPDATES, }; for(lpc = 0; lpc < DIMOF(vfields); lpc++) { crm_element_value_int(xml, vfields[lpc], &(this[lpc])); crm_trace("Got %d for this[%s]", this[lpc], vfields[lpc]); if (this[lpc] < 0) { this[lpc] = 0; } } /* Set some defaults in case nothing is present */ add[0] = this[0]; add[1] = this[1]; add[2] = this[2] + 1; for(lpc = 0; lpc < DIMOF(vfields); lpc++) { del[lpc] = this[lpc]; } xml_patch_versions(patchset, add, del); for(lpc = 0; lpc < DIMOF(vfields); lpc++) { if(this[lpc] < del[lpc]) { crm_debug("Current %s is too low (%d < %d)", vfields[lpc], this[lpc], del[lpc]); return -pcmk_err_diff_resync; } else if(this[lpc] > del[lpc]) { crm_info("Current %s is too high (%d > %d)", vfields[lpc], this[lpc], del[lpc]); return -pcmk_err_old_data; } } for(lpc = 0; lpc < DIMOF(vfields); lpc++) { if(add[lpc] > del[lpc]) { changed = TRUE; } } if(changed == FALSE) { crm_notice("Versions did not change in patch %d.%d.%d", add[0], add[1], add[2]); return -pcmk_err_old_data; } crm_debug("Can apply patch %d.%d.%d to %d.%d.%d", add[0], add[1], add[2], this[0], this[1], this[2]); return pcmk_ok; } static int xml_apply_patchset_v1(xmlNode *xml, xmlNode *patchset, bool check_version) { int rc = pcmk_ok; int root_nodes_seen = 0; char *version = crm_element_value_copy(xml, XML_ATTR_CRM_VERSION); xmlNode *child_diff = NULL; xmlNode *added = find_xml_node(patchset, "diff-added", FALSE); xmlNode *removed = find_xml_node(patchset, "diff-removed", FALSE); xmlNode *old = copy_xml(xml); crm_trace("Substraction Phase"); for (child_diff = __xml_first_child(removed); child_diff != NULL; child_diff = __xml_next(child_diff)) { CRM_CHECK(root_nodes_seen == 0, rc = FALSE); if (root_nodes_seen == 0) { __subtract_xml_object(xml, child_diff); } root_nodes_seen++; } if (root_nodes_seen > 1) { crm_err("(-) Diffs cannot contain more than one change set... saw %d", root_nodes_seen); rc = -ENOTUNIQ; } root_nodes_seen = 0; crm_trace("Addition Phase"); if (rc == pcmk_ok) { xmlNode *child_diff = NULL; for (child_diff = __xml_first_child(added); child_diff != NULL; child_diff = __xml_next(child_diff)) { CRM_CHECK(root_nodes_seen == 0, rc = FALSE); if (root_nodes_seen == 0) { __add_xml_object(NULL, xml, child_diff); } root_nodes_seen++; } } if (root_nodes_seen > 1) { crm_err("(+) Diffs cannot contain more than one change set... saw %d", root_nodes_seen); rc = -ENOTUNIQ; } purge_diff_markers(xml); /* Purge prior to checking the digest */ free_xml(old); free(version); return rc; } static xmlNode * __first_xml_child_match(xmlNode *parent, const char *name, const char *id) { xmlNode *cIter = NULL; for (cIter = __xml_first_child(parent); cIter != NULL; cIter = __xml_next(cIter)) { if(strcmp((const char *)cIter->name, name) != 0) { continue; } else if(id) { const char *cid = ID(cIter); if(cid == NULL || strcmp(cid, id) != 0) { continue; } } return cIter; } return NULL; } static xmlNode * __xml_find_path(xmlNode *top, const char *key) { xmlNode *target = (xmlNode*)top->doc; char *id = malloc(XML_BUFFER_SIZE); char *tag = malloc(XML_BUFFER_SIZE); char *section = malloc(XML_BUFFER_SIZE); char *current = strdup(key); char *remainder = malloc(XML_BUFFER_SIZE); int rc = 0; while(current) { rc = sscanf (current, "/%[^/]%s", section, remainder); if(rc <= 0) { crm_trace("Done"); break; } else if(rc > 2) { crm_trace("Aborting on %s", current); target = NULL; break; } else if(tag && section) { int f = sscanf (section, "%[^[][@id='%[^']", tag, id); switch(f) { case 1: target = __first_xml_child_match(target, tag, NULL); break; case 2: target = __first_xml_child_match(target, tag, id); break; default: crm_trace("Aborting on %s", section); target = NULL; break; } if(rc == 1 || target == NULL) { crm_trace("Done"); break; } else { char *tmp = current; current = remainder; remainder = tmp; } } } if(target) { char *path = (char *)xmlGetNodePath(target); crm_trace("Found %s for %s", path, key); free(path); } else { crm_debug("No match for %s", key); } free(remainder); free(current); free(section); free(tag); free(id); return target; } static int xml_apply_patchset_v2(xmlNode *xml, xmlNode *patchset, bool check_version) { int rc = pcmk_ok; xmlNode *change = NULL; for (change = __xml_first_child(patchset); change != NULL; change = __xml_next(change)) { xmlNode *match = NULL; const char *op = crm_element_value(change, XML_DIFF_OP); const char *xpath = crm_element_value(change, XML_DIFF_PATH); crm_trace("Processing %s %s", change->name, op); if(op == NULL) { continue; } #if 0 match = get_xpath_object(xpath, xml, LOG_TRACE); #else match = __xml_find_path(xml, xpath); #endif crm_trace("Performing %s on %s with %p", op, xpath, match); if(match == NULL && strcmp(op, "delete") == 0) { crm_debug("No %s match for %s in %p", op, xpath, xml->doc); continue; } else if(match == NULL) { crm_err("No %s match for %s in %p", op, xpath, xml->doc); rc = -pcmk_err_diff_failed; continue; } else if(strcmp(op, "create") == 0) { int position = 0; xmlNode *child = NULL; xmlNode *match_child = NULL; match_child = match->children; crm_element_value_int(change, XML_DIFF_POSITION, &position); while(match_child && position != __xml_offset(match_child)) { match_child = match_child->next; } child = xmlDocCopyNode(change->children, match->doc, 1); if(match_child) { crm_trace("Adding %s at position %d", child->name, position); xmlAddPrevSibling(match_child, child); } else if(match->last) { /* Add to the end */ crm_trace("Adding %s at position %d (end)", child->name, position); xmlAddNextSibling(match->last, child); } else { crm_trace("Adding %s at position %d (first)", child->name, position); CRM_LOG_ASSERT(position == 0); xmlAddChild(match, child); } crm_node_created(child); } else if(strcmp(op, "move") == 0) { int position = 0; crm_element_value_int(change, XML_DIFF_POSITION, &position); if(position != __xml_offset(match)) { xmlNode *match_child = NULL; int p = position; if(p > __xml_offset(match)) { p++; /* Skip ourselves */ } CRM_ASSERT(match->parent != NULL); match_child = match->parent->children; while(match_child && p != __xml_offset(match_child)) { match_child = match_child->next; } crm_trace("Moving %s to position %d (was %d, prev %p, %s %p)", match->name, position, __xml_offset(match), match->prev, match_child?"next":"last", match_child?match_child:match->parent->last); if(match_child) { xmlAddPrevSibling(match_child, match); } else { CRM_ASSERT(match->parent->last != NULL); xmlAddNextSibling(match->parent->last, match); } } else { crm_trace("%s is already in position %d", match->name, position); } if(position != __xml_offset(match)) { crm_err("Moved %s.%d to position %d instead of %d (%p)", match->name, ID(match), __xml_offset(match), position, match->prev); rc = -pcmk_err_diff_failed; } } else if(strcmp(op, "delete") == 0) { free_xml(match); } else if(strcmp(op, "modify") == 0) { xmlAttr *pIter = crm_first_attr(match); xmlNode *attrs = __xml_first_child(first_named_child(change, XML_DIFF_RESULT)); if(attrs == NULL) { rc = -ENOMSG; continue; } while(pIter != NULL) { const char *name = (const char *)pIter->name; pIter = pIter->next; xml_remove_prop(match, name); } for (pIter = crm_first_attr(attrs); pIter != NULL; pIter = pIter->next) { const char *name = (const char *)pIter->name; const char *value = crm_element_value(attrs, name); crm_xml_add(match, name, value); } } else { crm_err("Unknown operation: %s", op); } } return rc; } int xml_apply_patchset(xmlNode *xml, xmlNode *patchset, bool check_version) { int format = 1; int rc = pcmk_ok; xmlNode *old = NULL; const char *digest = crm_element_value(patchset, XML_ATTR_DIGEST); if(patchset == NULL) { return rc; } xml_log_patchset(LOG_TRACE, __FUNCTION__, patchset); crm_element_value_int(patchset, "format", &format); if(check_version) { rc = xml_patch_version_check(xml, patchset, format); if(rc != pcmk_ok) { return rc; } } if(digest) { /* Make it available for logging if the result doesn't have the expected digest */ old = copy_xml(xml); } if(rc == pcmk_ok) { switch(format) { case 1: rc = xml_apply_patchset_v1(xml, patchset, check_version); break; case 2: rc = xml_apply_patchset_v2(xml, patchset, check_version); break; default: crm_err("Unknown patch format: %d", format); rc = -EINVAL; } } if(rc == pcmk_ok && digest) { static struct qb_log_callsite *digest_cs = NULL; char *new_digest = NULL; char *version = crm_element_value_copy(xml, XML_ATTR_CRM_VERSION); if (digest_cs == NULL) { digest_cs = qb_log_callsite_get(__func__, __FILE__, "diff-digest", LOG_TRACE, __LINE__, crm_trace_nonlog); } new_digest = calculate_xml_versioned_digest(xml, FALSE, TRUE, version); if (safe_str_neq(new_digest, digest)) { crm_info("v%d digest mis-match: expected %s, calculated %s", format, digest, new_digest); rc = -pcmk_err_diff_failed; if (digest_cs && digest_cs->targets) { save_xml_to_file(old, "PatchDigest:input", NULL); save_xml_to_file(xml, "PatchDigest:result", NULL); save_xml_to_file(patchset,"PatchDigest:diff", NULL); } else { crm_trace("%p %0.6x", digest_cs, digest_cs ? digest_cs->targets : 0); } } else { crm_trace("v%d digest matched: expected %s, calculated %s", format, digest, new_digest); } free(new_digest); free(version); } free_xml(old); return rc; } xmlNode * find_xml_node(xmlNode * root, const char *search_path, gboolean must_find) { xmlNode *a_child = NULL; const char *name = "NULL"; if (root != NULL) { name = crm_element_name(root); } if (search_path == NULL) { crm_warn("Will never find "); return NULL; } for (a_child = __xml_first_child(root); a_child != NULL; a_child = __xml_next(a_child)) { if (strcmp((const char *)a_child->name, search_path) == 0) { /* crm_trace("returning node (%s).", crm_element_name(a_child)); */ return a_child; } } if (must_find) { crm_warn("Could not find %s in %s.", search_path, name); } else if (root != NULL) { crm_trace("Could not find %s in %s.", search_path, name); } else { crm_trace("Could not find %s in .", search_path); } return NULL; } xmlNode * find_entity(xmlNode * parent, const char *node_name, const char *id) { xmlNode *a_child = NULL; for (a_child = __xml_first_child(parent); a_child != NULL; a_child = __xml_next(a_child)) { /* Uncertain if node_name == NULL check is strictly necessary here */ if (node_name == NULL || strcmp((const char *)a_child->name, node_name) == 0) { const char *cid = ID(a_child); if (id == NULL || (cid != NULL && strcmp(id, cid) == 0)) { return a_child; } } } crm_trace("node <%s id=%s> not found in %s.", node_name, id, crm_element_name(parent)); return NULL; } void copy_in_properties(xmlNode * target, xmlNode * src) { if (src == NULL) { crm_warn("No node to copy properties from"); } else if (target == NULL) { crm_err("No node to copy properties into"); } else { xmlAttrPtr pIter = NULL; for (pIter = crm_first_attr(src); pIter != NULL; pIter = pIter->next) { const char *p_name = (const char *)pIter->name; const char *p_value = crm_attr_value(pIter); expand_plus_plus(target, p_name, p_value); } } return; } void fix_plus_plus_recursive(xmlNode * target) { /* TODO: Remove recursion and use xpath searches for value++ */ xmlNode *child = NULL; xmlAttrPtr pIter = NULL; for (pIter = crm_first_attr(target); pIter != NULL; pIter = pIter->next) { const char *p_name = (const char *)pIter->name; const char *p_value = crm_attr_value(pIter); expand_plus_plus(target, p_name, p_value); } for (child = __xml_first_child(target); child != NULL; child = __xml_next(child)) { fix_plus_plus_recursive(child); } } void expand_plus_plus(xmlNode * target, const char *name, const char *value) { int offset = 1; int name_len = 0; int int_value = 0; int value_len = 0; const char *old_value = NULL; if (value == NULL || name == NULL) { return; } old_value = crm_element_value(target, name); if (old_value == NULL) { /* if no previous value, set unexpanded */ goto set_unexpanded; } else if (strstr(value, name) != value) { goto set_unexpanded; } name_len = strlen(name); value_len = strlen(value); if (value_len < (name_len + 2) || value[name_len] != '+' || (value[name_len + 1] != '+' && value[name_len + 1] != '=')) { goto set_unexpanded; } /* if we are expanding ourselves, * then no previous value was set and leave int_value as 0 */ if (old_value != value) { int_value = char2score(old_value); } if (value[name_len + 1] != '+') { const char *offset_s = value + (name_len + 2); offset = char2score(offset_s); } int_value += offset; if (int_value > INFINITY) { int_value = (int)INFINITY; } crm_xml_add_int(target, name, int_value); return; set_unexpanded: if (old_value == value) { /* the old value is already set, nothing to do */ return; } crm_xml_add(target, name, value); return; } xmlDoc * getDocPtr(xmlNode * node) { xmlDoc *doc = NULL; CRM_CHECK(node != NULL, return NULL); doc = node->doc; if (doc == NULL) { doc = xmlNewDoc((const xmlChar *)"1.0"); xmlDocSetRootElement(doc, node); xmlSetTreeDoc(node, doc); } return doc; } xmlNode * add_node_copy(xmlNode * parent, xmlNode * src_node) { xmlNode *child = NULL; xmlDoc *doc = getDocPtr(parent); CRM_CHECK(src_node != NULL, return NULL); child = xmlDocCopyNode(src_node, doc, 1); xmlAddChild(parent, child); crm_node_created(child); return child; } int add_node_nocopy(xmlNode * parent, const char *name, xmlNode * child) { add_node_copy(parent, child); free_xml(child); return 1; } static bool __xml_acl_check(xmlNode *xml, const char *name, enum xml_private_flags mode) { CRM_ASSERT(xml); CRM_ASSERT(xml->doc); CRM_ASSERT(xml->doc->_private); #if ENABLE_ACL { if(TRACKING_CHANGES(xml) && xml_acl_enabled(xml)) { int offset = 0; xmlNode *parent = xml; char buffer[XML_BUFFER_SIZE]; xml_private_t *docp = xml->doc->_private; if(docp->acls == NULL) { crm_trace("Ordinary user %s cannot access the CIB without any defined ACLs", docp->user); set_doc_flag(xml, xpf_acl_denied); return FALSE; } offset = __get_prefix(NULL, xml, buffer, offset); if(name) { offset += snprintf(buffer + offset, XML_BUFFER_SIZE - offset, "[@%s]", name); } CRM_LOG_ASSERT(offset > 0); /* Walk the tree upwards looking for xml_acl_* flags * - Creating an attribute requires write permissions for the node * - Creating a child requires write permissions for the parent */ if(name) { xmlAttr *attr = xmlHasProp(xml, (const xmlChar *)name); if(attr && mode == xpf_acl_create) { mode = xpf_acl_write; } } while(parent && parent->_private) { xml_private_t *p = parent->_private; if(__xml_acl_mode_test(p->flags, mode)) { return TRUE; } else if(is_set(p->flags, xpf_acl_deny)) { crm_trace("%x access denied to %s: parent", mode, buffer); set_doc_flag(xml, xpf_acl_denied); return FALSE; } parent = parent->parent; } crm_trace("%x access denied to %s: default", mode, buffer); set_doc_flag(xml, xpf_acl_denied); return FALSE; } } #endif return TRUE; } const char * crm_xml_add(xmlNode * node, const char *name, const char *value) { bool dirty = FALSE; xmlAttr *attr = NULL; CRM_CHECK(node != NULL, return NULL); CRM_CHECK(name != NULL, return NULL); if (value == NULL) { return NULL; } #if XML_PARANOIA_CHECKS { const char *old_value = NULL; old_value = crm_element_value(node, name); /* Could be re-setting the same value */ CRM_CHECK(old_value != value, crm_err("Cannot reset %s with crm_xml_add(%s)", name, value); return value); } #endif if(TRACKING_CHANGES(node)) { const char *old = crm_element_value(node, name); if(old == NULL || value == NULL || strcmp(old, value) != 0) { dirty = TRUE; } } if(dirty && __xml_acl_check(node, name, xpf_acl_create) == FALSE) { crm_trace("Cannot add %s=%s to %s", name, value, node->name); return NULL; } attr = xmlSetProp(node, (const xmlChar *)name, (const xmlChar *)value); if(dirty) { crm_attr_dirty(attr); } CRM_CHECK(attr && attr->children && attr->children->content, return NULL); return (char *)attr->children->content; } const char * crm_xml_replace(xmlNode * node, const char *name, const char *value) { bool dirty = FALSE; xmlAttr *attr = NULL; const char *old_value = NULL; CRM_CHECK(node != NULL, return NULL); CRM_CHECK(name != NULL && name[0] != 0, return NULL); old_value = crm_element_value(node, name); /* Could be re-setting the same value */ CRM_CHECK(old_value != value, return value); if(__xml_acl_check(node, name, xpf_acl_write) == FALSE) { /* Create a fake object linked to doc->_private instead? */ crm_trace("Cannot replace %s=%s to %s", name, value, node->name); return NULL; } else if (old_value != NULL && value == NULL) { xml_remove_prop(node, name); return NULL; } else if (value == NULL) { return NULL; } if(TRACKING_CHANGES(node)) { if(old_value == NULL || value == NULL || strcmp(old_value, value) != 0) { dirty = TRUE; } } attr = xmlSetProp(node, (const xmlChar *)name, (const xmlChar *)value); if(dirty) { crm_attr_dirty(attr); } CRM_CHECK(attr && attr->children && attr->children->content, return NULL); return (char *)attr->children->content; } const char * crm_xml_add_int(xmlNode * node, const char *name, int value) { char *number = crm_itoa(value); const char *added = crm_xml_add(node, name, number); free(number); return added; } xmlNode * create_xml_node(xmlNode * parent, const char *name) { xmlDoc *doc = NULL; xmlNode *node = NULL; if (name == NULL || name[0] == 0) { return NULL; } if (parent == NULL) { doc = xmlNewDoc((const xmlChar *)"1.0"); node = xmlNewDocRawNode(doc, NULL, (const xmlChar *)name, NULL); xmlDocSetRootElement(doc, node); } else { doc = getDocPtr(parent); node = xmlNewDocRawNode(doc, NULL, (const xmlChar *)name, NULL); xmlAddChild(parent, node); } crm_node_created(node); return node; } static inline int __get_prefix(const char *prefix, xmlNode *xml, char *buffer, int offset) { const char *id = ID(xml); if(offset == 0 && prefix == NULL && xml->parent) { offset = __get_prefix(NULL, xml->parent, buffer, offset); } if(id) { offset += snprintf(buffer + offset, XML_BUFFER_SIZE - offset, "/%s[@id='%s']", (const char *)xml->name, id); } else if(xml->name) { offset += snprintf(buffer + offset, XML_BUFFER_SIZE - offset, "/%s", (const char *)xml->name); } return offset; } char * xml_get_path(xmlNode *xml) { int offset = 0; char buffer[XML_BUFFER_SIZE]; if(__get_prefix(NULL, xml, buffer, offset) > 0) { return strdup(buffer); } return NULL; } void free_xml(xmlNode * child) { if (child != NULL) { xmlNode *top = NULL; xmlDoc *doc = child->doc; xml_private_t *p = child->_private; if (doc != NULL) { top = xmlDocGetRootElement(doc); } if (doc != NULL && top == child) { /* Free everything */ xmlFreeDoc(doc); } else if(__xml_acl_check(child, NULL, xpf_acl_write) == FALSE) { int offset = 0; char buffer[XML_BUFFER_SIZE]; __get_prefix(NULL, child, buffer, offset); crm_trace("Cannot remove %s %x", buffer, p->flags); return; } else { if(doc && TRACKING_CHANGES(child) && is_not_set(p->flags, xpf_created)) { int offset = 0; char buffer[XML_BUFFER_SIZE]; if(__get_prefix(NULL, child, buffer, offset) > 0) { crm_trace("Deleting %s %p from %p", buffer, child, doc); p = doc->_private; p->deleted_paths = g_list_append(p->deleted_paths, strdup(buffer)); set_doc_flag(child, xpf_dirty); } } /* Free this particular subtree * Make sure to unlink it from the parent first */ xmlUnlinkNode(child); xmlFreeNode(child); } } } xmlNode * copy_xml(xmlNode * src) { xmlDoc *doc = xmlNewDoc((const xmlChar *)"1.0"); xmlNode *copy = xmlDocCopyNode(src, doc, 1); xmlDocSetRootElement(doc, copy); xmlSetTreeDoc(copy, doc); return copy; } static void crm_xml_err(void *ctx, const char *msg, ...) G_GNUC_PRINTF(2, 3); static void crm_xml_err(void *ctx, const char *msg, ...) { int len = 0; va_list args; char *buf = NULL; static int buffer_len = 0; static char *buffer = NULL; static struct qb_log_callsite *xml_error_cs = NULL; va_start(args, msg); len = vasprintf(&buf, msg, args); if(xml_error_cs == NULL) { xml_error_cs = qb_log_callsite_get( __func__, __FILE__, "xml library error", LOG_TRACE, __LINE__, crm_trace_nonlog); } if (strchr(buf, '\n')) { buf[len - 1] = 0; if (buffer) { crm_err("XML Error: %s%s", buffer, buf); free(buffer); } else { crm_err("XML Error: %s", buf); } if (xml_error_cs && xml_error_cs->targets) { crm_abort(__FILE__, __PRETTY_FUNCTION__, __LINE__, "xml library error", TRUE, TRUE); } buffer = NULL; buffer_len = 0; } else if (buffer == NULL) { buffer_len = len; buffer = buf; buf = NULL; } else { buffer = realloc(buffer, 1 + buffer_len + len); memcpy(buffer + buffer_len, buf, len); buffer_len += len; buffer[buffer_len] = 0; } va_end(args); free(buf); } xmlNode * string2xml(const char *input) { xmlNode *xml = NULL; xmlDocPtr output = NULL; xmlParserCtxtPtr ctxt = NULL; xmlErrorPtr last_error = NULL; if (input == NULL) { crm_err("Can't parse NULL input"); return NULL; } /* create a parser context */ ctxt = xmlNewParserCtxt(); CRM_CHECK(ctxt != NULL, return NULL); /* xmlCtxtUseOptions(ctxt, XML_PARSE_NOBLANKS|XML_PARSE_RECOVER); */ xmlCtxtResetLastError(ctxt); xmlSetGenericErrorFunc(ctxt, crm_xml_err); /* initGenericErrorDefaultFunc(crm_xml_err); */ output = xmlCtxtReadDoc(ctxt, (const xmlChar *)input, NULL, NULL, XML_PARSE_NOBLANKS | XML_PARSE_RECOVER); if (output) { xml = xmlDocGetRootElement(output); } last_error = xmlCtxtGetLastError(ctxt); if (last_error && last_error->code != XML_ERR_OK) { /* crm_abort(__FILE__,__FUNCTION__,__LINE__, "last_error->code != XML_ERR_OK", TRUE, TRUE); */ /* * http://xmlsoft.org/html/libxml-xmlerror.html#xmlErrorLevel * http://xmlsoft.org/html/libxml-xmlerror.html#xmlParserErrors */ crm_warn("Parsing failed (domain=%d, level=%d, code=%d): %s", last_error->domain, last_error->level, last_error->code, last_error->message); if (last_error->code == XML_ERR_DOCUMENT_EMPTY) { CRM_LOG_ASSERT("Cannot parse an empty string"); } else if (last_error->code != XML_ERR_DOCUMENT_END) { crm_err("Couldn't%s parse %d chars: %s", xml ? " fully" : "", (int)strlen(input), input); if (xml != NULL) { crm_log_xml_err(xml, "Partial"); } } else { int len = strlen(input); int lpc = 0; while(lpc < len) { crm_warn("Parse error[+%.3d]: %.80s", lpc, input+lpc); lpc += 80; } CRM_LOG_ASSERT("String parsing error"); } } xmlFreeParserCtxt(ctxt); return xml; } xmlNode * stdin2xml(void) { size_t data_length = 0; size_t read_chars = 0; char *xml_buffer = NULL; xmlNode *xml_obj = NULL; do { size_t next = XML_BUFFER_SIZE + data_length + 1; if(next <= 0) { crm_err("Buffer size exceeded at: %l + %d", data_length, XML_BUFFER_SIZE); break; } xml_buffer = realloc(xml_buffer, next); read_chars = fread(xml_buffer + data_length, 1, XML_BUFFER_SIZE, stdin); data_length += read_chars; } while (read_chars > 0); if (data_length == 0) { crm_warn("No XML supplied on stdin"); free(xml_buffer); return NULL; } xml_buffer[data_length] = '\0'; xml_obj = string2xml(xml_buffer); free(xml_buffer); crm_log_xml_trace(xml_obj, "Created fragment"); return xml_obj; } static char * decompress_file(const char *filename) { char *buffer = NULL; #if HAVE_BZLIB_H int rc = 0; size_t length = 0, read_len = 0; BZFILE *bz_file = NULL; FILE *input = fopen(filename, "r"); if (input == NULL) { crm_perror(LOG_ERR, "Could not open %s for reading", filename); return NULL; } bz_file = BZ2_bzReadOpen(&rc, input, 0, 0, NULL, 0); if (rc != BZ_OK) { BZ2_bzReadClose(&rc, bz_file); return NULL; } rc = BZ_OK; while (rc == BZ_OK) { buffer = realloc(buffer, XML_BUFFER_SIZE + length + 1); read_len = BZ2_bzRead(&rc, bz_file, buffer + length, XML_BUFFER_SIZE); crm_trace("Read %ld bytes from file: %d", (long)read_len, rc); if (rc == BZ_OK || rc == BZ_STREAM_END) { length += read_len; } } buffer[length] = '\0'; if (rc != BZ_STREAM_END) { crm_err("Couldnt read compressed xml from file"); free(buffer); buffer = NULL; } BZ2_bzReadClose(&rc, bz_file); fclose(input); #else crm_err("Cannot read compressed files:" " bzlib was not available at compile time"); #endif return buffer; } void strip_text_nodes(xmlNode * xml) { xmlNode *iter = xml->children; while (iter) { xmlNode *next = iter->next; switch (iter->type) { case XML_TEXT_NODE: /* Remove it */ xmlUnlinkNode(iter); xmlFreeNode(iter); break; case XML_ELEMENT_NODE: /* Search it */ strip_text_nodes(iter); break; default: /* Leave it */ break; } iter = next; } } xmlNode * filename2xml(const char *filename) { xmlNode *xml = NULL; xmlDocPtr output = NULL; const char *match = NULL; xmlParserCtxtPtr ctxt = NULL; xmlErrorPtr last_error = NULL; static int xml_options = XML_PARSE_NOBLANKS | XML_PARSE_RECOVER; /* create a parser context */ ctxt = xmlNewParserCtxt(); CRM_CHECK(ctxt != NULL, return NULL); /* xmlCtxtUseOptions(ctxt, XML_PARSE_NOBLANKS|XML_PARSE_RECOVER); */ xmlCtxtResetLastError(ctxt); xmlSetGenericErrorFunc(ctxt, crm_xml_err); /* initGenericErrorDefaultFunc(crm_xml_err); */ if (filename) { match = strstr(filename, ".bz2"); } if (filename == NULL) { /* STDIN_FILENO == fileno(stdin) */ output = xmlCtxtReadFd(ctxt, STDIN_FILENO, "unknown.xml", NULL, xml_options); } else if (match == NULL || match[4] != 0) { output = xmlCtxtReadFile(ctxt, filename, NULL, xml_options); } else { char *input = decompress_file(filename); output = xmlCtxtReadDoc(ctxt, (const xmlChar *)input, NULL, NULL, xml_options); free(input); } if (output && (xml = xmlDocGetRootElement(output))) { strip_text_nodes(xml); } last_error = xmlCtxtGetLastError(ctxt); if (last_error && last_error->code != XML_ERR_OK) { /* crm_abort(__FILE__,__FUNCTION__,__LINE__, "last_error->code != XML_ERR_OK", TRUE, TRUE); */ /* * http://xmlsoft.org/html/libxml-xmlerror.html#xmlErrorLevel * http://xmlsoft.org/html/libxml-xmlerror.html#xmlParserErrors */ crm_err("Parsing failed (domain=%d, level=%d, code=%d): %s", last_error->domain, last_error->level, last_error->code, last_error->message); if (last_error && last_error->code != XML_ERR_OK) { crm_err("Couldn't%s parse %s", xml ? " fully" : "", filename); if (xml != NULL) { crm_log_xml_err(xml, "Partial"); } } } xmlFreeParserCtxt(ctxt); return xml; } static int write_xml_stream(xmlNode * xml_node, const char *filename, FILE * stream, gboolean compress) { int res = 0; char *buffer = NULL; unsigned int out = 0; static mode_t cib_mode = S_IRUSR | S_IWUSR; CRM_CHECK(stream != NULL, return -1); crm_trace("Writing XML out to %s", filename); if (xml_node == NULL) { crm_err("Cannot write NULL to %s", filename); fclose(stream); return -1; } crm_log_xml_trace(xml_node, "Writing out"); if(strstr(filename, "cib") != NULL) { /* Only CIB's need this field written */ time_t now = time(NULL); char *now_str = ctime(&now); now_str[24] = EOS; /* replace the newline */ crm_xml_add(xml_node, XML_CIB_ATTR_WRITTEN, now_str); /* establish the correct permissions */ fchmod(fileno(stream), cib_mode); } buffer = dump_xml_formatted(xml_node); CRM_CHECK(buffer != NULL && strlen(buffer) > 0, crm_log_xml_warn(xml_node, "dump:failed"); goto bail); if (compress) { #if HAVE_BZLIB_H int rc = BZ_OK; unsigned int in = 0; BZFILE *bz_file = NULL; bz_file = BZ2_bzWriteOpen(&rc, stream, 5, 0, 30); if (rc != BZ_OK) { crm_err("bzWriteOpen failed: %d", rc); } else { BZ2_bzWrite(&rc, bz_file, buffer, strlen(buffer)); if (rc != BZ_OK) { crm_err("bzWrite() failed: %d", rc); } } if (rc == BZ_OK) { BZ2_bzWriteClose(&rc, bz_file, 0, &in, &out); if (rc != BZ_OK) { crm_err("bzWriteClose() failed: %d", rc); out = -1; } else { crm_trace("%s: In: %d, out: %d", filename, in, out); } } #else crm_err("Cannot write compressed files:" " bzlib was not available at compile time"); #endif } if (out <= 0) { res = fprintf(stream, "%s", buffer); if (res < 0) { crm_perror(LOG_ERR, "Cannot write output to %s", filename); goto bail; } } bail: if (fflush(stream) != 0) { crm_perror(LOG_ERR, "fflush for %s failed:", filename); res = -1; } if (fsync(fileno(stream)) < 0) { crm_perror(LOG_ERR, "fsync for %s failed:", filename); res = -1; } fclose(stream); crm_trace("Saved %d bytes to the Cib as XML", res); free(buffer); return res; } int write_xml_fd(xmlNode * xml_node, const char *filename, int fd, gboolean compress) { FILE *stream = NULL; CRM_CHECK(fd > 0, return -1); stream = fdopen(fd, "w"); return write_xml_stream(xml_node, filename, stream, compress); } int write_xml_file(xmlNode * xml_node, const char *filename, gboolean compress) { FILE *stream = NULL; stream = fopen(filename, "w"); return write_xml_stream(xml_node, filename, stream, compress); } xmlNode * get_message_xml(xmlNode * msg, const char *field) { xmlNode *tmp = first_named_child(msg, field); return __xml_first_child(tmp); } gboolean add_message_xml(xmlNode * msg, const char *field, xmlNode * xml) { xmlNode *holder = create_xml_node(msg, field); add_node_copy(holder, xml); return TRUE; } static char * crm_xml_escape_shuffle(char *text, int start, int *length, const char *replace) { int lpc; int offset = strlen(replace) - 1; /* We have space for 1 char already */ *length += offset; text = realloc(text, *length); for (lpc = (*length) - 1; lpc > (start + offset); lpc--) { text[lpc] = text[lpc - offset]; } memcpy(text + start, replace, offset + 1); return text; } char * crm_xml_escape(const char *text) { int index; int changes = 0; int length = 1 + strlen(text); char *copy = strdup(text); /* * When xmlCtxtReadDoc() parses < and friends in a * value, it converts them to their human readable * form. * * If one uses xmlNodeDump() to convert it back to a * string, all is well, because special characters are * converted back to their escape sequences. * * However xmlNodeDump() is randomly dog slow, even with the same * input. So we need to replicate the escapeing in our custom * version so that the result can be re-parsed by xmlCtxtReadDoc() * when necessary. */ for (index = 0; index < length; index++) { switch (copy[index]) { case 0: break; case '<': copy = crm_xml_escape_shuffle(copy, index, &length, "<"); changes++; break; case '>': copy = crm_xml_escape_shuffle(copy, index, &length, ">"); changes++; break; case '"': copy = crm_xml_escape_shuffle(copy, index, &length, """); changes++; break; case '\'': copy = crm_xml_escape_shuffle(copy, index, &length, "'"); changes++; break; case '&': copy = crm_xml_escape_shuffle(copy, index, &length, "&"); changes++; break; case '\t': /* Might as well just expand to a few spaces... */ copy = crm_xml_escape_shuffle(copy, index, &length, " "); changes++; break; case '\n': /* crm_trace("Convert: \\%.3o", copy[index]); */ copy = crm_xml_escape_shuffle(copy, index, &length, "\\n"); changes++; break; case '\r': copy = crm_xml_escape_shuffle(copy, index, &length, "\\r"); changes++; break; /* For debugging... case '\\': crm_trace("Passthrough: \\%c", copy[index+1]); break; */ default: /* Check for and replace non-printing characters with their octal equivalent */ if(copy[index] < ' ' || copy[index] > '~') { char *replace = g_strdup_printf("\\%.3o", copy[index]); /* crm_trace("Convert to octal: \\%.3o", copy[index]); */ copy = crm_xml_escape_shuffle(copy, index, &length, replace); free(replace); changes++; } } } if (changes) { crm_trace("Dumped '%s'", copy); } return copy; } static inline void dump_xml_attr(xmlAttrPtr attr, int options, char **buffer, int *offset, int *max) { char *p_value = NULL; const char *p_name = NULL; CRM_ASSERT(buffer != NULL); if (attr == NULL || attr->children == NULL) { return; } p_name = (const char *)attr->name; p_value = crm_xml_escape((const char *)attr->children->content); buffer_print(*buffer, *max, *offset, " %s=\"%s\"", p_name, p_value); free(p_value); } static void __xml_log_element(int log_level, const char *file, const char *function, int line, const char *prefix, xmlNode * data, int depth, int options) { int max = 0; int offset = 0; const char *name = NULL; const char *hidden = NULL; xmlNode *child = NULL; xmlAttrPtr pIter = NULL; if(data == NULL) { return; } name = crm_element_name(data); if(is_set(options, xml_log_option_open)) { char *buffer = NULL; insert_prefix(options, &buffer, &offset, &max, depth); if(data->type == XML_COMMENT_NODE) { buffer_print(buffer, max, offset, ""); } else { buffer_print(buffer, max, offset, "<%s", name); } hidden = crm_element_value(data, "hidden"); for (pIter = crm_first_attr(data); pIter != NULL; pIter = pIter->next) { xml_private_t *p = pIter->_private; const char *p_name = (const char *)pIter->name; const char *p_value = crm_attr_value(pIter); char *p_copy = NULL; if(is_set(p->flags, xpf_deleted)) { continue; } else if ((is_set(options, xml_log_option_diff_plus) || is_set(options, xml_log_option_diff_minus)) && strcmp(XML_DIFF_MARKER, p_name) == 0) { continue; } else if (hidden != NULL && p_name[0] != 0 && strstr(hidden, p_name) != NULL) { p_copy = strdup("*****"); } else { p_copy = crm_xml_escape(p_value); } buffer_print(buffer, max, offset, " %s=\"%s\"", p_name, p_copy); free(p_copy); } if(xml_has_children(data) == FALSE) { buffer_print(buffer, max, offset, "/>"); } else if(is_set(options, xml_log_option_children)) { buffer_print(buffer, max, offset, ">"); } else { buffer_print(buffer, max, offset, "/>"); } do_crm_log_alias(log_level, file, function, line, "%s %s", prefix, buffer); free(buffer); } if(data->type == XML_COMMENT_NODE) { return; } else if(xml_has_children(data) == FALSE) { return; } else if(is_set(options, xml_log_option_children)) { offset = 0; max = 0; for (child = __xml_first_child(data); child != NULL; child = __xml_next(child)) { __xml_log_element(log_level, file, function, line, prefix, child, depth + 1, options|xml_log_option_open|xml_log_option_close); } } if(is_set(options, xml_log_option_close)) { char *buffer = NULL; insert_prefix(options, &buffer, &offset, &max, depth); buffer_print(buffer, max, offset, "", name); do_crm_log_alias(log_level, file, function, line, "%s %s", prefix, buffer); free(buffer); } } static void __xml_log_change_element(int log_level, const char *file, const char *function, int line, const char *prefix, xmlNode * data, int depth, int options) { xml_private_t *p; char *prefix_m = NULL; xmlNode *child = NULL; xmlAttrPtr pIter = NULL; if(data == NULL) { return; } p = data->_private; prefix_m = strdup(prefix); prefix_m[1] = '+'; if(is_set(p->flags, xpf_dirty) && is_set(p->flags, xpf_created)) { /* Continue and log full subtree */ __xml_log_element(log_level, file, function, line, prefix_m, data, depth, options|xml_log_option_open|xml_log_option_close|xml_log_option_children); } else if(is_set(p->flags, xpf_dirty)) { char *spaces = calloc(80, 1); int s_count = 0, s_max = 80; char *prefix_del = NULL; char *prefix_moved = NULL; const char *flags = prefix; insert_prefix(options, &spaces, &s_count, &s_max, depth); prefix_del = strdup(prefix); prefix_del[0] = '-'; prefix_del[1] = '-'; prefix_moved = strdup(prefix); prefix_moved[1] = '~'; if(is_set(p->flags, xpf_moved)) { flags = prefix_moved; } else { flags = prefix; } __xml_log_element(log_level, file, function, line, flags, data, depth, options|xml_log_option_open); for (pIter = crm_first_attr(data); pIter != NULL; pIter = pIter->next) { const char *aname = (const char*)pIter->name; p = pIter->_private; if(is_set(p->flags, xpf_deleted)) { const char *value = crm_element_value(data, aname); flags = prefix_del; do_crm_log_alias(log_level, file, function, line, "%s %s @%s=%s", flags, spaces, aname, value); } else if(is_set(p->flags, xpf_dirty)) { const char *value = crm_element_value(data, aname); if(is_set(p->flags, xpf_created)) { flags = prefix_m; } else if(is_set(p->flags, xpf_modified)) { flags = prefix; } else if(is_set(p->flags, xpf_moved)) { flags = prefix_moved; } else { flags = prefix; } do_crm_log_alias(log_level, file, function, line, "%s %s @%s=%s", flags, spaces, aname, value); } } free(prefix_moved); free(prefix_del); free(spaces); for (child = __xml_first_child(data); child != NULL; child = __xml_next(child)) { __xml_log_change_element(log_level, file, function, line, prefix, child, depth + 1, options); } __xml_log_element(log_level, file, function, line, prefix, data, depth, options|xml_log_option_close); } else { for (child = __xml_first_child(data); child != NULL; child = __xml_next(child)) { __xml_log_change_element(log_level, file, function, line, prefix, child, depth + 1, options); } } free(prefix_m); } void log_data_element(int log_level, const char *file, const char *function, int line, const char *prefix, xmlNode * data, int depth, int options) { xmlNode *a_child = NULL; char *prefix_m = NULL; if (prefix == NULL) { prefix = ""; } /* Since we use the same file and line, to avoid confusing libqb, we need to use the same format strings */ if (data == NULL) { do_crm_log_alias(log_level, file, function, line, "%s: %s", prefix, "No data to dump as XML"); return; } if(is_set(options, xml_log_option_dirty_add) || is_set(options, xml_log_option_dirty_add)) { __xml_log_change_element(log_level, file, function, line, prefix, data, depth, options); return; } if (is_set(options, xml_log_option_formatted)) { if (is_set(options, xml_log_option_diff_plus) && (data->children == NULL || crm_element_value(data, XML_DIFF_MARKER))) { options |= xml_log_option_diff_all; prefix_m = strdup(prefix); prefix_m[1] = '+'; prefix = prefix_m; } else if (is_set(options, xml_log_option_diff_minus) && (data->children == NULL || crm_element_value(data, XML_DIFF_MARKER))) { options |= xml_log_option_diff_all; prefix_m = strdup(prefix); prefix_m[1] = '-'; prefix = prefix_m; } } if (is_set(options, xml_log_option_diff_short) && is_not_set(options, xml_log_option_diff_all)) { /* Still searching for the actual change */ for (a_child = __xml_first_child(data); a_child != NULL; a_child = __xml_next(a_child)) { log_data_element(log_level, file, function, line, prefix, a_child, depth + 1, options); } return; } __xml_log_element(log_level, file, function, line, prefix, data, depth, options|xml_log_option_open|xml_log_option_close|xml_log_option_children); free(prefix_m); } static void dump_filtered_xml(xmlNode * data, int options, char **buffer, int *offset, int *max) { int lpc; xmlAttrPtr xIter = NULL; static int filter_len = DIMOF(filter); for (lpc = 0; options && lpc < filter_len; lpc++) { filter[lpc].found = FALSE; } for (xIter = crm_first_attr(data); xIter != NULL; xIter = xIter->next) { bool skip = FALSE; const char *p_name = (const char *)xIter->name; for (lpc = 0; skip == FALSE && lpc < filter_len; lpc++) { if (filter[lpc].found == FALSE && strcmp(p_name, filter[lpc].string) == 0) { filter[lpc].found = TRUE; skip = TRUE; break; } } if (skip == FALSE) { dump_xml_attr(xIter, options, buffer, offset, max); } } } static void dump_xml(xmlNode * data, int options, char **buffer, int *offset, int *max, int depth); static void dump_xml_element(xmlNode * data, int options, char **buffer, int *offset, int *max, int depth) { const char *name = NULL; CRM_ASSERT(max != NULL); CRM_ASSERT(offset != NULL); CRM_ASSERT(buffer != NULL); if (data == NULL) { crm_trace("Nothing to dump"); return; } if (*buffer == NULL) { *offset = 0; *max = 0; } name = crm_element_name(data); CRM_ASSERT(name != NULL); insert_prefix(options, buffer, offset, max, depth); buffer_print(*buffer, *max, *offset, "<%s", name); if (options & xml_log_option_filtered) { dump_filtered_xml(data, options, buffer, offset, max); } else { xmlAttrPtr xIter = NULL; for (xIter = crm_first_attr(data); xIter != NULL; xIter = xIter->next) { dump_xml_attr(xIter, options, buffer, offset, max); } } if (data->children == NULL) { buffer_print(*buffer, *max, *offset, "/>"); } else { buffer_print(*buffer, *max, *offset, ">"); } if (options & xml_log_option_formatted) { buffer_print(*buffer, *max, *offset, "\n"); } if (data->children) { xmlNode *xChild = NULL; for (xChild = __xml_first_child(data); xChild != NULL; xChild = __xml_next(xChild)) { dump_xml(xChild, options, buffer, offset, max, depth + 1); } insert_prefix(options, buffer, offset, max, depth); buffer_print(*buffer, *max, *offset, "", name); if (options & xml_log_option_formatted) { buffer_print(*buffer, *max, *offset, "\n"); } } } static void dump_xml_comment(xmlNode * data, int options, char **buffer, int *offset, int *max, int depth) { CRM_ASSERT(max != NULL); CRM_ASSERT(offset != NULL); CRM_ASSERT(buffer != NULL); if (data == NULL) { crm_trace("Nothing to dump"); return; } if (*buffer == NULL) { *offset = 0; *max = 0; } insert_prefix(options, buffer, offset, max, depth); buffer_print(*buffer, *max, *offset, ""); if (options & xml_log_option_formatted) { buffer_print(*buffer, *max, *offset, "\n"); } } static void dump_xml(xmlNode * data, int options, char **buffer, int *offset, int *max, int depth) { #if 0 if (is_not_set(options, xml_log_option_filtered)) { /* Turning this code on also changes the PE tests for some reason * (not just newlines). Figure out why before considering to * enable this permanently. * * It exists to help debug slowness in xmlNodeDump() and * potentially if we ever want to go back to it. * * In theory its a good idea (reuse) but our custom version does * better for the filtered case and avoids the final strdup() for * everything */ time_t now, next; xmlDoc *doc = NULL; xmlBuffer *xml_buffer = NULL; *buffer = NULL; doc = getDocPtr(data); /* doc will only be NULL if data is */ CRM_CHECK(doc != NULL, return); now = time(NULL); xml_buffer = xmlBufferCreate(); CRM_ASSERT(xml_buffer != NULL); /* The default allocator XML_BUFFER_ALLOC_EXACT does far too many * realloc()s and it can take upwards of 18 seconds (yes, seconds) * to dump a 28kb tree which XML_BUFFER_ALLOC_DOUBLEIT can do in * less than 1 second. * * We could also use xmlBufferCreateSize() to start with a * sane-ish initial size and avoid the first few doubles. */ xmlBufferSetAllocationScheme(xml_buffer, XML_BUFFER_ALLOC_DOUBLEIT); *max = xmlNodeDump(xml_buffer, doc, data, 0, (options & xml_log_option_formatted)); if (*max > 0) { *buffer = strdup((char *)xml_buffer->content); } next = time(NULL); if ((now + 1) < next) { crm_log_xml_trace(data, "Long time"); crm_err("xmlNodeDump() -> %dbytes took %ds", *max, next - now); } xmlBufferFree(xml_buffer); return; } #endif switch(data->type) { case XML_ELEMENT_NODE: /* Handle below */ dump_xml_element(data, options, buffer, offset, max, depth); break; case XML_TEXT_NODE: /* Ignore */ return; case XML_COMMENT_NODE: dump_xml_comment(data, options, buffer, offset, max, depth); break; default: crm_warn("Unhandled type: %d", data->type); return; /* XML_ATTRIBUTE_NODE = 2 XML_CDATA_SECTION_NODE = 4 XML_ENTITY_REF_NODE = 5 XML_ENTITY_NODE = 6 XML_PI_NODE = 7 XML_DOCUMENT_NODE = 9 XML_DOCUMENT_TYPE_NODE = 10 XML_DOCUMENT_FRAG_NODE = 11 XML_NOTATION_NODE = 12 XML_HTML_DOCUMENT_NODE = 13 XML_DTD_NODE = 14 XML_ELEMENT_DECL = 15 XML_ATTRIBUTE_DECL = 16 XML_ENTITY_DECL = 17 XML_NAMESPACE_DECL = 18 XML_XINCLUDE_START = 19 XML_XINCLUDE_END = 20 XML_DOCB_DOCUMENT_NODE = 21 */ } } static void fix_digest_buffer(char **buffer, int *offset, int *max, char c) { buffer_print(*buffer, *max, *offset, "%c", c); } static char * dump_xml_for_digest(xmlNode * an_xml_node) { char *buffer = NULL; int offset = 0, max = 0; /* for compatability with the old result which is used for v1 digests */ fix_digest_buffer(&buffer, &offset, &max, ' '); dump_xml(an_xml_node, 0, &buffer, &offset, &max, 0); fix_digest_buffer(&buffer, &offset, &max, '\n'); return buffer; } char * dump_xml_formatted(xmlNode * an_xml_node) { char *buffer = NULL; int offset = 0, max = 0; dump_xml(an_xml_node, xml_log_option_formatted, &buffer, &offset, &max, 0); return buffer; } char * dump_xml_unformatted(xmlNode * an_xml_node) { char *buffer = NULL; int offset = 0, max = 0; dump_xml(an_xml_node, 0, &buffer, &offset, &max, 0); return buffer; } gboolean xml_has_children(const xmlNode * xml_root) { if (xml_root != NULL && xml_root->children != NULL) { return TRUE; } return FALSE; } int crm_element_value_int(xmlNode * data, const char *name, int *dest) { const char *value = crm_element_value(data, name); CRM_CHECK(dest != NULL, return -1); if (value) { *dest = crm_int_helper(value, NULL); return 0; } return -1; } int crm_element_value_const_int(const xmlNode * data, const char *name, int *dest) { return crm_element_value_int((xmlNode *) data, name, dest); } const char * crm_element_value_const(const xmlNode * data, const char *name) { return crm_element_value((xmlNode *) data, name); } char * crm_element_value_copy(xmlNode * data, const char *name) { char *value_copy = NULL; const char *value = crm_element_value(data, name); if (value != NULL) { value_copy = strdup(value); } return value_copy; } void xml_remove_prop(xmlNode * obj, const char *name) { if(__xml_acl_check(obj, NULL, xpf_acl_write) == FALSE) { crm_trace("Cannot remove %s from %s", name, obj->name); } else if(TRACKING_CHANGES(obj)) { /* Leave in place (marked for removal) until after the diff is calculated */ xml_private_t *p = NULL; xmlAttr *attr = xmlHasProp(obj, (const xmlChar *)name); p = attr->_private; set_parent_flag(obj, xpf_dirty); p->flags |= xpf_deleted; /* crm_trace("Setting flag %x due to %s[@id=%s].%s", xpf_dirty, obj->name, ID(obj), name); */ } else { xmlUnsetProp(obj, (const xmlChar *)name); } } void purge_diff_markers(xmlNode * a_node) { xmlNode *child = NULL; CRM_CHECK(a_node != NULL, return); xml_remove_prop(a_node, XML_DIFF_MARKER); for (child = __xml_first_child(a_node); child != NULL; child = __xml_next(child)) { purge_diff_markers(child); } } void save_xml_to_file(xmlNode * xml, const char *desc, const char *filename) { char *f = NULL; if (filename == NULL) { char *uuid = crm_generate_uuid(); f = g_strdup_printf("/tmp/%s", uuid); filename = f; free(uuid); } crm_info("Saving %s to %s", desc, filename); write_xml_file(xml, filename, FALSE); g_free(f); } gboolean apply_xml_diff(xmlNode * old, xmlNode * diff, xmlNode ** new) { gboolean result = TRUE; int root_nodes_seen = 0; static struct qb_log_callsite *digest_cs = NULL; const char *digest = crm_element_value(diff, XML_ATTR_DIGEST); const char *version = crm_element_value(diff, XML_ATTR_CRM_VERSION); xmlNode *child_diff = NULL; xmlNode *added = find_xml_node(diff, "diff-added", FALSE); xmlNode *removed = find_xml_node(diff, "diff-removed", FALSE); CRM_CHECK(new != NULL, return FALSE); if (digest_cs == NULL) { digest_cs = qb_log_callsite_get(__func__, __FILE__, "diff-digest", LOG_TRACE, __LINE__, crm_trace_nonlog); } crm_trace("Substraction Phase"); for (child_diff = __xml_first_child(removed); child_diff != NULL; child_diff = __xml_next(child_diff)) { CRM_CHECK(root_nodes_seen == 0, result = FALSE); if (root_nodes_seen == 0) { *new = subtract_xml_object(NULL, old, child_diff, FALSE, NULL, NULL); } root_nodes_seen++; } if (root_nodes_seen == 0) { *new = copy_xml(old); } else if (root_nodes_seen > 1) { crm_err("(-) Diffs cannot contain more than one change set..." " saw %d", root_nodes_seen); result = FALSE; } root_nodes_seen = 0; crm_trace("Addition Phase"); if (result) { xmlNode *child_diff = NULL; for (child_diff = __xml_first_child(added); child_diff != NULL; child_diff = __xml_next(child_diff)) { CRM_CHECK(root_nodes_seen == 0, result = FALSE); if (root_nodes_seen == 0) { add_xml_object(NULL, *new, child_diff, TRUE); } root_nodes_seen++; } } if (root_nodes_seen > 1) { crm_err("(+) Diffs cannot contain more than one change set..." " saw %d", root_nodes_seen); result = FALSE; } else if (result && digest) { char *new_digest = NULL; purge_diff_markers(*new); /* Purge now so the diff is ok */ new_digest = calculate_xml_versioned_digest(*new, FALSE, TRUE, version); if (safe_str_neq(new_digest, digest)) { crm_info("Digest mis-match: expected %s, calculated %s", digest, new_digest); result = FALSE; crm_trace("%p %0.6x", digest_cs, digest_cs ? digest_cs->targets : 0); if (digest_cs && digest_cs->targets) { save_xml_to_file(old, "diff:original", NULL); save_xml_to_file(diff, "diff:input", NULL); save_xml_to_file(*new, "diff:new", NULL); } } else { crm_trace("Digest matched: expected %s, calculated %s", digest, new_digest); } free(new_digest); } else if (result) { purge_diff_markers(*new); /* Purge now so the diff is ok */ } return result; } static void __xml_diff_object(xmlNode * old, xmlNode * new) { xmlNode *cIter = NULL; xmlAttr *pIter = NULL; CRM_CHECK(new != NULL, return); if(old == NULL) { crm_node_created(new); __xml_acl_post_process(new); /* Check creation is allowed */ return; } else { xml_private_t *p = new->_private; if(p->flags & xpf_processed) { /* Avoid re-comparing nodes */ return; } p->flags |= xpf_processed; } for (pIter = crm_first_attr(new); pIter != NULL; pIter = pIter->next) { xml_private_t *p = pIter->_private; /* Assume everything was just created and take it from there */ p->flags |= xpf_created; } for (pIter = crm_first_attr(old); pIter != NULL; ) { xmlAttr *prop = pIter; xml_private_t *p = NULL; const char *name = (const char *)pIter->name; const char *old_value = crm_element_value(old, name); xmlAttr *exists = xmlHasProp(new, pIter->name); pIter = pIter->next; if(exists == NULL) { p = new->doc->_private; /* Prevent the dirty flag being set recursively upwards */ clear_bit(p->flags, xpf_tracking); exists = xmlSetProp(new, (const xmlChar *)name, (const xmlChar *)old_value); set_bit(p->flags, xpf_tracking); p = exists->_private; p->flags = 0; crm_trace("Lost %s@%s=%s", old->name, name, old_value); xml_remove_prop(new, name); } else { int p_new = __xml_offset((xmlNode*)exists); int p_old = __xml_offset((xmlNode*)prop); const char *value = crm_element_value(new, name); p = exists->_private; p->flags = (p->flags & ~xpf_created); if(strcmp(value, old_value) != 0) { /* Restore the original value, so we can call crm_xml_add() whcih checks ACLs */ char *vcopy = crm_element_value_copy(new, name); crm_trace("Modified %s@%s %s->%s", old->name, name, old_value, vcopy); xmlSetProp(new, prop->name, (const xmlChar *)old_value); crm_xml_add(new, name, vcopy); free(vcopy); } else if(p_old != p_new) { crm_info("Moved %s@%s (%d -> %d)", old->name, name, p_old, p_new); __xml_node_dirty(new); p->flags |= xpf_dirty|xpf_moved; if(p_old > p_new) { p = prop->_private; p->flags |= xpf_skip; } else { p = exists->_private; p->flags |= xpf_skip; } } } } for (pIter = crm_first_attr(new); pIter != NULL; ) { xmlAttr *prop = pIter; xml_private_t *p = pIter->_private; pIter = pIter->next; if(is_set(p->flags, xpf_created)) { char *name = strdup((const char *)prop->name); char *value = crm_element_value_copy(new, name); crm_trace("Created %s@%s=%s", new->name, name, value); /* Remove plus create wont work as it will modify the relative attribute ordering */ if(__xml_acl_check(new, name, xpf_acl_write)) { crm_attr_dirty(prop); } else { xmlUnsetProp(new, prop->name); /* Remove - change not allowed */ } free(value); free(name); } } for (cIter = __xml_first_child(old); cIter != NULL; ) { xmlNode *old_child = cIter; xmlNode *new_child = find_entity(new, crm_element_name(cIter), ID(cIter)); cIter = __xml_next(cIter); if(new_child) { __xml_diff_object(old_child, new_child); } else { xml_private_t *p = old_child->_private; /* Create then free (which will check the acls if necessary) */ xmlNode *candidate = add_node_copy(new, old_child); xmlNode *top = xmlDocGetRootElement(candidate->doc); __xml_node_clean(candidate); __xml_acl_apply(top); /* Make sure any ACLs are applied to 'candidate' */ free_xml(candidate); if(NULL == find_entity(new, crm_element_name(old_child), ID(old_child))) { p->flags |= xpf_skip; } } } for (cIter = __xml_first_child(new); cIter != NULL; ) { xmlNode *new_child = cIter; xmlNode *old_child = find_entity(old, crm_element_name(cIter), ID(cIter)); cIter = __xml_next(cIter); if(old_child == NULL) { xml_private_t *p = new_child->_private; p->flags |= xpf_skip; __xml_diff_object(old_child, new_child); } else { /* Check for movement, we already checked for differences */ int p_new = __xml_offset(new_child); int p_old = __xml_offset(old_child); xml_private_t *p = new_child->_private; if(p_old != p_new) { crm_info("%s.%s moved from %d to %d - %d", new_child->name, ID(new_child), p_old, p_new); p->flags |= xpf_moved; if(p_old > p_new) { p = old_child->_private; p->flags |= xpf_skip; } else { p = new_child->_private; p->flags |= xpf_skip; } } } } } void xml_calculate_changes(xmlNode * old, xmlNode * new) { CRM_CHECK(safe_str_eq(crm_element_name(old), crm_element_name(new)), return); CRM_CHECK(safe_str_eq(ID(old), ID(new)), return); if(xml_tracking_changes(new) == FALSE) { xml_track_changes(new, NULL, NULL, FALSE); } __xml_diff_object(old, new); } xmlNode * diff_xml_object(xmlNode * old, xmlNode * new, gboolean suppress) { xmlNode *tmp1 = NULL; xmlNode *diff = create_xml_node(NULL, "diff"); xmlNode *removed = create_xml_node(diff, "diff-removed"); xmlNode *added = create_xml_node(diff, "diff-added"); crm_xml_add(diff, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); tmp1 = subtract_xml_object(removed, old, new, FALSE, NULL, "removed:top"); if (suppress && tmp1 != NULL && can_prune_leaf(tmp1)) { free_xml(tmp1); } tmp1 = subtract_xml_object(added, new, old, TRUE, NULL, "added:top"); if (suppress && tmp1 != NULL && can_prune_leaf(tmp1)) { free_xml(tmp1); } if (added->children == NULL && removed->children == NULL) { free_xml(diff); diff = NULL; } return diff; } gboolean can_prune_leaf(xmlNode * xml_node) { xmlNode *cIter = NULL; xmlAttrPtr pIter = NULL; gboolean can_prune = TRUE; const char *name = crm_element_name(xml_node); if (safe_str_eq(name, XML_TAG_RESOURCE_REF) || safe_str_eq(name, XML_CIB_TAG_OBJ_REF) || safe_str_eq(name, XML_ACL_TAG_ROLE_REF) || safe_str_eq(name, XML_ACL_TAG_ROLE_REFv1)) { return FALSE; } for (pIter = crm_first_attr(xml_node); pIter != NULL; pIter = pIter->next) { const char *p_name = (const char *)pIter->name; if (strcmp(p_name, XML_ATTR_ID) == 0) { continue; } can_prune = FALSE; } cIter = __xml_first_child(xml_node); while (cIter) { xmlNode *child = cIter; cIter = __xml_next(cIter); if (can_prune_leaf(child)) { free_xml(child); } else { can_prune = FALSE; } } return can_prune; } void diff_filter_context(int context, int upper_bound, int lower_bound, xmlNode * xml_node, xmlNode * parent) { xmlNode *us = NULL; xmlNode *child = NULL; xmlAttrPtr pIter = NULL; xmlNode *new_parent = parent; const char *name = crm_element_name(xml_node); CRM_CHECK(xml_node != NULL && name != NULL, return); us = create_xml_node(parent, name); for (pIter = crm_first_attr(xml_node); pIter != NULL; pIter = pIter->next) { const char *p_name = (const char *)pIter->name; const char *p_value = crm_attr_value(pIter); lower_bound = context; crm_xml_add(us, p_name, p_value); } if (lower_bound >= 0 || upper_bound >= 0) { crm_xml_add(us, XML_ATTR_ID, ID(xml_node)); new_parent = us; } else { upper_bound = in_upper_context(0, context, xml_node); if (upper_bound >= 0) { crm_xml_add(us, XML_ATTR_ID, ID(xml_node)); new_parent = us; } else { free_xml(us); us = NULL; } } for (child = __xml_first_child(us); child != NULL; child = __xml_next(child)) { diff_filter_context(context, upper_bound - 1, lower_bound - 1, child, new_parent); } } int in_upper_context(int depth, int context, xmlNode * xml_node) { if (context == 0) { return 0; } if (xml_node->properties) { return depth; } else if (depth < context) { xmlNode *child = NULL; for (child = __xml_first_child(xml_node); child != NULL; child = __xml_next(child)) { if (in_upper_context(depth + 1, context, child)) { return depth; } } } return 0; } static xmlNode * find_xml_comment(xmlNode * root, xmlNode * search_comment) { xmlNode *a_child = NULL; CRM_CHECK(search_comment->type == XML_COMMENT_NODE, return NULL); for (a_child = __xml_first_child(root); a_child != NULL; a_child = __xml_next(a_child)) { if (a_child->type != XML_COMMENT_NODE) { continue; } if (safe_str_eq((const char *)a_child->content, (const char *)search_comment->content)) { return a_child; } } return NULL; } static xmlNode * subtract_xml_comment(xmlNode * parent, xmlNode * left, xmlNode * right, gboolean * changed) { CRM_CHECK(left != NULL, return NULL); CRM_CHECK(left->type == XML_COMMENT_NODE, return NULL); if (right == NULL || safe_str_neq((const char *)left->content, (const char *)right->content)) { xmlNode *deleted = NULL; deleted = add_node_copy(parent, left); *changed = TRUE; return deleted; } return NULL; } xmlNode * subtract_xml_object(xmlNode * parent, xmlNode * left, xmlNode * right, gboolean full, gboolean * changed, const char *marker) { gboolean dummy = FALSE; gboolean skip = FALSE; xmlNode *diff = NULL; xmlNode *right_child = NULL; xmlNode *left_child = NULL; xmlAttrPtr xIter = NULL; const char *id = NULL; const char *name = NULL; const char *value = NULL; const char *right_val = NULL; int lpc = 0; static int filter_len = DIMOF(filter); if (changed == NULL) { changed = &dummy; } if (left == NULL) { return NULL; } if (left->type == XML_COMMENT_NODE) { return subtract_xml_comment(parent, left, right, changed); } id = ID(left); if (right == NULL) { xmlNode *deleted = NULL; crm_trace("Processing <%s id=%s> (complete copy)", crm_element_name(left), id); deleted = add_node_copy(parent, left); crm_xml_add(deleted, XML_DIFF_MARKER, marker); *changed = TRUE; return deleted; } name = crm_element_name(left); CRM_CHECK(name != NULL, return NULL); CRM_CHECK(safe_str_eq(crm_element_name(left), crm_element_name(right)), return NULL); /* check for XML_DIFF_MARKER in a child */ value = crm_element_value(right, XML_DIFF_MARKER); if (value != NULL && strcmp(value, "removed:top") == 0) { crm_trace("We are the root of the deletion: %s.id=%s", name, id); *changed = TRUE; return NULL; } /* Avoiding creating the full heirarchy would save even more work here */ diff = create_xml_node(parent, name); /* Reset filter */ for (lpc = 0; lpc < filter_len; lpc++) { filter[lpc].found = FALSE; } /* changes to child objects */ for (left_child = __xml_first_child(left); left_child != NULL; left_child = __xml_next(left_child)) { gboolean child_changed = FALSE; if (left_child->type == XML_COMMENT_NODE) { right_child = find_xml_comment(right, left_child); } else { right_child = find_entity(right, crm_element_name(left_child), ID(left_child)); } subtract_xml_object(diff, left_child, right_child, full, &child_changed, marker); if (child_changed) { *changed = TRUE; } } if (*changed == FALSE) { /* Nothing to do */ } else if (full) { xmlAttrPtr pIter = NULL; for (pIter = crm_first_attr(left); pIter != NULL; pIter = pIter->next) { const char *p_name = (const char *)pIter->name; const char *p_value = crm_attr_value(pIter); xmlSetProp(diff, (const xmlChar *)p_name, (const xmlChar *)p_value); } /* We already have everything we need... */ goto done; } else if (id) { xmlSetProp(diff, (const xmlChar *)XML_ATTR_ID, (const xmlChar *)id); } /* changes to name/value pairs */ for (xIter = crm_first_attr(left); xIter != NULL; xIter = xIter->next) { const char *prop_name = (const char *)xIter->name; if (strcmp(prop_name, XML_ATTR_ID) == 0) { continue; } skip = FALSE; for (lpc = 0; skip == FALSE && lpc < filter_len; lpc++) { if (filter[lpc].found == FALSE && strcmp(prop_name, filter[lpc].string) == 0) { filter[lpc].found = TRUE; skip = TRUE; break; } } if (skip) { continue; } right_val = crm_element_value(right, prop_name); if (right_val == NULL) { /* new */ *changed = TRUE; if (full) { xmlAttrPtr pIter = NULL; for (pIter = crm_first_attr(left); pIter != NULL; pIter = pIter->next) { const char *p_name = (const char *)pIter->name; const char *p_value = crm_attr_value(pIter); xmlSetProp(diff, (const xmlChar *)p_name, (const xmlChar *)p_value); } break; } else { const char *left_value = crm_element_value(left, prop_name); xmlSetProp(diff, (const xmlChar *)prop_name, (const xmlChar *)value); crm_xml_add(diff, prop_name, left_value); } } else { /* Only now do we need the left value */ const char *left_value = crm_element_value(left, prop_name); if (strcmp(left_value, right_val) == 0) { /* unchanged */ } else { *changed = TRUE; if (full) { xmlAttrPtr pIter = NULL; crm_trace("Changes detected to %s in <%s id=%s>", prop_name, crm_element_name(left), id); for (pIter = crm_first_attr(left); pIter != NULL; pIter = pIter->next) { const char *p_name = (const char *)pIter->name; const char *p_value = crm_attr_value(pIter); xmlSetProp(diff, (const xmlChar *)p_name, (const xmlChar *)p_value); } break; } else { crm_trace("Changes detected to %s (%s -> %s) in <%s id=%s>", prop_name, left_value, right_val, crm_element_name(left), id); crm_xml_add(diff, prop_name, left_value); } } } } if (*changed == FALSE) { free_xml(diff); return NULL; } else if (full == FALSE && id) { crm_xml_add(diff, XML_ATTR_ID, id); } done: return diff; } static int add_xml_comment(xmlNode * parent, xmlNode * target, xmlNode * update) { CRM_CHECK(update != NULL, return 0); CRM_CHECK(update->type == XML_COMMENT_NODE, return 0); if (target == NULL) { target = find_xml_comment(parent, update); } if (target == NULL) { add_node_copy(parent, update); /* We wont reach here currently */ } else if (safe_str_neq((const char *)target->content, (const char *)update->content)) { xmlFree(target->content); target->content = xmlStrdup(update->content); } return 0; } int add_xml_object(xmlNode * parent, xmlNode * target, xmlNode * update, gboolean as_diff) { xmlNode *a_child = NULL; const char *object_id = NULL; const char *object_name = NULL; #if XML_PARSE_DEBUG crm_log_xml_trace("update:", update); crm_log_xml_trace("target:", target); #endif CRM_CHECK(update != NULL, return 0); if (update->type == XML_COMMENT_NODE) { return add_xml_comment(parent, target, update); } object_name = crm_element_name(update); object_id = ID(update); CRM_CHECK(object_name != NULL, return 0); if (target == NULL && object_id == NULL) { /* placeholder object */ target = find_xml_node(parent, object_name, FALSE); } else if (target == NULL) { target = find_entity(parent, object_name, object_id); } if (target == NULL) { target = create_xml_node(parent, object_name); CRM_CHECK(target != NULL, return 0); #if XML_PARSER_DEBUG crm_trace("Added <%s%s%s/>", crm_str(object_name), object_id ? " id=" : "", object_id ? object_id : ""); } else { crm_trace("Found node <%s%s%s/> to update", crm_str(object_name), object_id ? " id=" : "", object_id ? object_id : ""); #endif } CRM_CHECK(safe_str_eq(crm_element_name(target), crm_element_name(update)), return 0); if (as_diff == FALSE) { /* So that expand_plus_plus() gets called */ copy_in_properties(target, update); } else { /* No need for expand_plus_plus(), just raw speed */ xmlAttrPtr pIter = NULL; for (pIter = crm_first_attr(update); pIter != NULL; pIter = pIter->next) { const char *p_name = (const char *)pIter->name; const char *p_value = crm_attr_value(pIter); /* Remove it first so the ordering of the update is preserved */ xmlUnsetProp(target, (const xmlChar *)p_name); xmlSetProp(target, (const xmlChar *)p_name, (const xmlChar *)p_value); } } for (a_child = __xml_first_child(update); a_child != NULL; a_child = __xml_next(a_child)) { #if XML_PARSER_DEBUG crm_trace("Updating child <%s id=%s>", crm_element_name(a_child), ID(a_child)); #endif add_xml_object(target, NULL, a_child, as_diff); } #if XML_PARSER_DEBUG crm_trace("Finished with <%s id=%s>", crm_str(object_name), crm_str(object_id)); #endif return 0; } gboolean update_xml_child(xmlNode * child, xmlNode * to_update) { gboolean can_update = TRUE; xmlNode *child_of_child = NULL; CRM_CHECK(child != NULL, return FALSE); CRM_CHECK(to_update != NULL, return FALSE); if (safe_str_neq(crm_element_name(to_update), crm_element_name(child))) { can_update = FALSE; } else if (safe_str_neq(ID(to_update), ID(child))) { can_update = FALSE; } else if (can_update) { #if XML_PARSER_DEBUG crm_log_xml_trace(child, "Update match found..."); #endif add_xml_object(NULL, child, to_update, FALSE); } for (child_of_child = __xml_first_child(child); child_of_child != NULL; child_of_child = __xml_next(child_of_child)) { /* only update the first one */ if (can_update) { break; } can_update = update_xml_child(child_of_child, to_update); } return can_update; } int find_xml_children(xmlNode ** children, xmlNode * root, const char *tag, const char *field, const char *value, gboolean search_matches) { int match_found = 0; CRM_CHECK(root != NULL, return FALSE); CRM_CHECK(children != NULL, return FALSE); if (tag != NULL && safe_str_neq(tag, crm_element_name(root))) { } else if (value != NULL && safe_str_neq(value, crm_element_value(root, field))) { } else { if (*children == NULL) { *children = create_xml_node(NULL, __FUNCTION__); } add_node_copy(*children, root); match_found = 1; } if (search_matches || match_found == 0) { xmlNode *child = NULL; for (child = __xml_first_child(root); child != NULL; child = __xml_next(child)) { match_found += find_xml_children(children, child, tag, field, value, search_matches); } } return match_found; } gboolean replace_xml_child(xmlNode * parent, xmlNode * child, xmlNode * update, gboolean delete_only) { gboolean can_delete = FALSE; xmlNode *child_of_child = NULL; const char *up_id = NULL; const char *child_id = NULL; const char *right_val = NULL; CRM_CHECK(child != NULL, return FALSE); CRM_CHECK(update != NULL, return FALSE); up_id = ID(update); child_id = ID(child); if (up_id == NULL || (child_id && strcmp(child_id, up_id) == 0)) { can_delete = TRUE; } if (safe_str_neq(crm_element_name(update), crm_element_name(child))) { can_delete = FALSE; } if (can_delete && delete_only) { xmlAttrPtr pIter = NULL; for (pIter = crm_first_attr(update); pIter != NULL; pIter = pIter->next) { const char *p_name = (const char *)pIter->name; const char *p_value = crm_attr_value(pIter); right_val = crm_element_value(child, p_name); if (safe_str_neq(p_value, right_val)) { can_delete = FALSE; } } } if (can_delete && parent != NULL) { crm_log_xml_trace(child, "Delete match found..."); if (delete_only || update == NULL) { free_xml(child); } else { xmlNode *tmp = copy_xml(update); xmlDoc *doc = tmp->doc; xmlNode *old = NULL; xml_accept_changes(tmp); old = xmlReplaceNode(child, tmp); xml_calculate_changes(old, tmp); xmlDocSetRootElement(doc, old); free_xml(old); } child = NULL; return TRUE; } else if (can_delete) { crm_log_xml_debug(child, "Cannot delete the search root"); can_delete = FALSE; } child_of_child = __xml_first_child(child); while (child_of_child) { xmlNode *next = __xml_next(child_of_child); can_delete = replace_xml_child(child, child_of_child, update, delete_only); /* only delete the first one */ if (can_delete) { child_of_child = NULL; } else { child_of_child = next; } } return can_delete; } void hash2nvpair(gpointer key, gpointer value, gpointer user_data) { const char *name = key; const char *s_value = value; xmlNode *xml_node = user_data; xmlNode *xml_child = create_xml_node(xml_node, XML_CIB_TAG_NVPAIR); crm_xml_add(xml_child, XML_ATTR_ID, name); crm_xml_add(xml_child, XML_NVPAIR_ATTR_NAME, name); crm_xml_add(xml_child, XML_NVPAIR_ATTR_VALUE, s_value); crm_trace("dumped: name=%s value=%s", name, s_value); } void hash2smartfield(gpointer key, gpointer value, gpointer user_data) { const char *name = key; const char *s_value = value; xmlNode *xml_node = user_data; if (isdigit(name[0])) { xmlNode *tmp = create_xml_node(xml_node, XML_TAG_PARAM); crm_xml_add(tmp, XML_NVPAIR_ATTR_NAME, name); crm_xml_add(tmp, XML_NVPAIR_ATTR_VALUE, s_value); } else if (crm_element_value(xml_node, name) == NULL) { crm_xml_add(xml_node, name, s_value); crm_trace("dumped: %s=%s", name, s_value); } else { crm_trace("duplicate: %s=%s", name, s_value); } } void hash2field(gpointer key, gpointer value, gpointer user_data) { const char *name = key; const char *s_value = value; xmlNode *xml_node = user_data; if (crm_element_value(xml_node, name) == NULL) { crm_xml_add(xml_node, name, s_value); } else { crm_trace("duplicate: %s=%s", name, s_value); } } void hash2metafield(gpointer key, gpointer value, gpointer user_data) { char *crm_name = NULL; if (key == NULL || value == NULL) { return; } else if (((char *)key)[0] == '#') { return; } else if (strstr(key, ":")) { return; } crm_name = crm_meta_name(key); hash2field(crm_name, value, user_data); free(crm_name); } GHashTable * xml2list(xmlNode * parent) { xmlNode *child = NULL; xmlAttrPtr pIter = NULL; xmlNode *nvpair_list = NULL; GHashTable *nvpair_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); CRM_CHECK(parent != NULL, return nvpair_hash); nvpair_list = find_xml_node(parent, XML_TAG_ATTRS, FALSE); if (nvpair_list == NULL) { crm_trace("No attributes in %s", crm_element_name(parent)); crm_log_xml_trace(parent, "No attributes for resource op"); } crm_log_xml_trace(nvpair_list, "Unpacking"); for (pIter = crm_first_attr(nvpair_list); pIter != NULL; pIter = pIter->next) { const char *p_name = (const char *)pIter->name; const char *p_value = crm_attr_value(pIter); crm_trace("Added %s=%s", p_name, p_value); g_hash_table_insert(nvpair_hash, strdup(p_name), strdup(p_value)); } for (child = __xml_first_child(nvpair_list); child != NULL; child = __xml_next(child)) { if (strcmp((const char *)child->name, XML_TAG_PARAM) == 0) { const char *key = crm_element_value(child, XML_NVPAIR_ATTR_NAME); const char *value = crm_element_value(child, XML_NVPAIR_ATTR_VALUE); crm_trace("Added %s=%s", key, value); if (key != NULL && value != NULL) { g_hash_table_insert(nvpair_hash, strdup(key), strdup(value)); } } } return nvpair_hash; } typedef struct name_value_s { const char *name; const void *value; } name_value_t; static gint sort_pairs(gconstpointer a, gconstpointer b) { int rc = 0; const name_value_t *pair_a = a; const name_value_t *pair_b = b; CRM_ASSERT(a != NULL); CRM_ASSERT(pair_a->name != NULL); CRM_ASSERT(b != NULL); CRM_ASSERT(pair_b->name != NULL); rc = strcmp(pair_a->name, pair_b->name); if (rc < 0) { return -1; } else if (rc > 0) { return 1; } return 0; } static void dump_pair(gpointer data, gpointer user_data) { name_value_t *pair = data; xmlNode *parent = user_data; crm_xml_add(parent, pair->name, pair->value); } xmlNode * sorted_xml(xmlNode * input, xmlNode * parent, gboolean recursive) { xmlNode *child = NULL; GListPtr sorted = NULL; GListPtr unsorted = NULL; name_value_t *pair = NULL; xmlNode *result = NULL; const char *name = NULL; xmlAttrPtr pIter = NULL; CRM_CHECK(input != NULL, return NULL); name = crm_element_name(input); CRM_CHECK(name != NULL, return NULL); result = create_xml_node(parent, name); for (pIter = crm_first_attr(input); pIter != NULL; pIter = pIter->next) { const char *p_name = (const char *)pIter->name; const char *p_value = crm_attr_value(pIter); pair = calloc(1, sizeof(name_value_t)); pair->name = p_name; pair->value = p_value; unsorted = g_list_prepend(unsorted, pair); pair = NULL; } sorted = g_list_sort(unsorted, sort_pairs); g_list_foreach(sorted, dump_pair, result); g_list_free_full(sorted, free); for (child = __xml_first_child(input); child != NULL; child = __xml_next(child)) { if (recursive) { sorted_xml(child, result, recursive); } else { add_node_copy(result, child); } } return result; } /* "c048eae664dba840e1d2060f00299e9d" */ static char * calculate_xml_digest_v1(xmlNode * input, gboolean sort, gboolean ignored) { char *digest = NULL; char *buffer = NULL; xmlNode *copy = NULL; if (sort) { crm_trace("Sorting xml..."); copy = sorted_xml(input, NULL, TRUE); crm_trace("Done"); input = copy; } buffer = dump_xml_for_digest(input); CRM_CHECK(buffer != NULL && strlen(buffer) > 0, free_xml(copy); free(buffer); return NULL); digest = crm_md5sum(buffer); crm_log_xml_trace(input, "digest:source"); free(buffer); free_xml(copy); return digest; } static char * calculate_xml_digest_v2(xmlNode * source, gboolean do_filter) { char *digest = NULL; char *buffer = NULL; int offset, max; static struct qb_log_callsite *digest_cs = NULL; crm_trace("Begin digest %s", do_filter?"filtered":""); if (do_filter && BEST_EFFORT_STATUS) { /* Exclude the status calculation from the digest * * This doesn't mean it wont be sync'd, we just wont be paranoid * about it being an _exact_ copy * * We don't need it to be exact, since we throw it away and regenerate * from our peers whenever a new DC is elected anyway * * Importantly, this reduces the amount of XML to copy+export as * well as the amount of data for MD5 needs to operate on */ } else { dump_xml(source, do_filter ? xml_log_option_filtered : 0, &buffer, &offset, &max, 0); } CRM_ASSERT(buffer != NULL); digest = crm_md5sum(buffer); if (digest_cs == NULL) { digest_cs = qb_log_callsite_get(__func__, __FILE__, "cib-digest", LOG_TRACE, __LINE__, crm_trace_nonlog); } if (digest_cs && digest_cs->targets) { char *trace_file = crm_concat("/tmp/digest", digest, '-'); crm_trace("Saving %s.%s.%s to %s", crm_element_value(source, XML_ATTR_GENERATION_ADMIN), crm_element_value(source, XML_ATTR_GENERATION), crm_element_value(source, XML_ATTR_NUMUPDATES), trace_file); save_xml_to_file(source, "digest input", trace_file); free(trace_file); } free(buffer); crm_trace("End digest"); return digest; } char * calculate_on_disk_digest(xmlNode * input) { /* Always use the v1 format for on-disk digests * a) its a compatability nightmare * b) we only use this once at startup, all other * invocations are in a separate child process */ return calculate_xml_digest_v1(input, FALSE, FALSE); } char * calculate_operation_digest(xmlNode * input, const char *version) { /* We still need the sorting for parameter digests */ return calculate_xml_digest_v1(input, TRUE, FALSE); } char * calculate_xml_versioned_digest(xmlNode * input, gboolean sort, gboolean do_filter, const char *version) { /* * The sorting associated with v1 digest creation accounted for 23% of * the CIB's CPU usage on the server. v2 drops this. * * The filtering accounts for an additional 2.5% and we may want to * remove it in future. * * v2 also uses the xmlBuffer contents directly to avoid additional copying */ if (version == NULL || compare_version("3.0.5", version) > 0) { crm_trace("Using v1 digest algorithm for %s", crm_str(version)); return calculate_xml_digest_v1(input, sort, do_filter); } crm_trace("Using v2 digest algorithm for %s", crm_str(version)); return calculate_xml_digest_v2(input, do_filter); } static gboolean validate_with_dtd(xmlDocPtr doc, gboolean to_logs, const char *dtd_file) { gboolean valid = TRUE; xmlDtdPtr dtd = NULL; xmlValidCtxtPtr cvp = NULL; CRM_CHECK(doc != NULL, return FALSE); CRM_CHECK(dtd_file != NULL, return FALSE); dtd = xmlParseDTD(NULL, (const xmlChar *)dtd_file); if(dtd == NULL) { crm_err("Could not locate/parse DTD: %s", dtd_file); return TRUE; } cvp = xmlNewValidCtxt(); if(cvp) { if (to_logs) { cvp->userData = (void *)LOG_ERR; cvp->error = (xmlValidityErrorFunc) xml_log; cvp->warning = (xmlValidityWarningFunc) xml_log; } else { cvp->userData = (void *)stderr; cvp->error = (xmlValidityErrorFunc) fprintf; cvp->warning = (xmlValidityWarningFunc) fprintf; } if (!xmlValidateDtd(cvp, doc, dtd)) { valid = FALSE; } xmlFreeValidCtxt(cvp); } else { crm_err("Internal error: No valid context"); } xmlFreeDtd(dtd); return valid; } xmlNode * first_named_child(xmlNode * parent, const char *name) { xmlNode *match = NULL; for (match = __xml_first_child(parent); match != NULL; match = __xml_next(match)) { /* * name == NULL gives first child regardless of name; this is * semantically incorrect in this funciton, but may be necessary * due to prior use of xml_child_iter_filter */ if (name == NULL || strcmp((const char *)match->name, name) == 0) { return match; } } return NULL; } #if 0 static void relaxng_invalid_stderr(void *userData, xmlErrorPtr error) { /* Structure xmlError struct _xmlError { int domain : What part of the library raised this er int code : The error code, e.g. an xmlParserError char * message : human-readable informative error messag xmlErrorLevel level : how consequent is the error char * file : the filename int line : the line number if available char * str1 : extra string information char * str2 : extra string information char * str3 : extra string information int int1 : extra number information int int2 : column number of the error or 0 if N/A void * ctxt : the parser context if available void * node : the node in the tree } */ crm_err("Structured error: line=%d, level=%d %s", error->line, error->level, error->message); } #endif static gboolean validate_with_relaxng(xmlDocPtr doc, gboolean to_logs, const char *relaxng_file, relaxng_ctx_cache_t ** cached_ctx) { int rc = 0; gboolean valid = TRUE; relaxng_ctx_cache_t *ctx = NULL; CRM_CHECK(doc != NULL, return FALSE); CRM_CHECK(relaxng_file != NULL, return FALSE); if (cached_ctx && *cached_ctx) { ctx = *cached_ctx; } else { crm_info("Creating RNG parser context"); ctx = calloc(1, sizeof(relaxng_ctx_cache_t)); xmlLoadExtDtdDefaultValue = 1; ctx->parser = xmlRelaxNGNewParserCtxt(relaxng_file); CRM_CHECK(ctx->parser != NULL, goto cleanup); if (to_logs) { xmlRelaxNGSetParserErrors(ctx->parser, (xmlRelaxNGValidityErrorFunc) xml_log, (xmlRelaxNGValidityWarningFunc) xml_log, GUINT_TO_POINTER(LOG_ERR)); } else { xmlRelaxNGSetParserErrors(ctx->parser, (xmlRelaxNGValidityErrorFunc) fprintf, (xmlRelaxNGValidityWarningFunc) fprintf, stderr); } ctx->rng = xmlRelaxNGParse(ctx->parser); CRM_CHECK(ctx->rng != NULL, crm_err("Could not find/parse %s", relaxng_file); goto cleanup); ctx->valid = xmlRelaxNGNewValidCtxt(ctx->rng); CRM_CHECK(ctx->valid != NULL, goto cleanup); if (to_logs) { xmlRelaxNGSetValidErrors(ctx->valid, (xmlRelaxNGValidityErrorFunc) xml_log, (xmlRelaxNGValidityWarningFunc) xml_log, GUINT_TO_POINTER(LOG_ERR)); } else { xmlRelaxNGSetValidErrors(ctx->valid, (xmlRelaxNGValidityErrorFunc) fprintf, (xmlRelaxNGValidityWarningFunc) fprintf, stderr); } } /* xmlRelaxNGSetValidStructuredErrors( */ /* valid, relaxng_invalid_stderr, valid); */ xmlLineNumbersDefault(1); rc = xmlRelaxNGValidateDoc(ctx->valid, doc); if (rc > 0) { valid = FALSE; } else if (rc < 0) { crm_err("Internal libxml error during validation\n"); } cleanup: if (cached_ctx) { *cached_ctx = ctx; } else { if (ctx->parser != NULL) { xmlRelaxNGFreeParserCtxt(ctx->parser); } if (ctx->valid != NULL) { xmlRelaxNGFreeValidCtxt(ctx->valid); } if (ctx->rng != NULL) { xmlRelaxNGFree(ctx->rng); } free(ctx); } return valid; } void crm_xml_init(void) { static bool init = TRUE; if(init) { init = FALSE; /* The default allocator XML_BUFFER_ALLOC_EXACT does far too many * realloc()s and it can take upwards of 18 seconds (yes, seconds) * to dump a 28kb tree which XML_BUFFER_ALLOC_DOUBLEIT can do in * less than 1 second. */ xmlSetBufferAllocationScheme(XML_BUFFER_ALLOC_DOUBLEIT); /* Populate and free the _private field when nodes are created and destroyed */ xmlDeregisterNodeDefault(pcmkDeregisterNode); xmlRegisterNodeDefault(pcmkRegisterNode); __xml_build_schema_list(); } } void crm_xml_cleanup(void) { int lpc = 0; relaxng_ctx_cache_t *ctx = NULL; crm_info("Cleaning up memory from libxml2"); for (; lpc < xml_schema_max; lpc++) { switch (known_schemas[lpc].type) { case 0: /* None */ break; case 1: /* DTD - Not cached */ break; case 2: /* RNG - Cached */ ctx = (relaxng_ctx_cache_t *) known_schemas[lpc].cache; if (ctx == NULL) { break; } if (ctx->parser != NULL) { xmlRelaxNGFreeParserCtxt(ctx->parser); } if (ctx->valid != NULL) { xmlRelaxNGFreeValidCtxt(ctx->valid); } if (ctx->rng != NULL) { xmlRelaxNGFree(ctx->rng); } free(ctx); known_schemas[lpc].cache = NULL; break; default: break; } free(known_schemas[lpc].name); free(known_schemas[lpc].location); free(known_schemas[lpc].transform); } free(known_schemas); xsltCleanupGlobals(); xmlCleanupParser(); } static gboolean validate_with(xmlNode * xml, int method, gboolean to_logs) { xmlDocPtr doc = NULL; gboolean valid = FALSE; int type = 0; char *file = NULL; if(method < 0) { return FALSE; } type = known_schemas[method].type; if(type == 0) { return TRUE; } CRM_CHECK(xml != NULL, return FALSE); doc = getDocPtr(xml); file = get_schema_path(known_schemas[method].name, known_schemas[method].location); crm_trace("Validating with: %s (type=%d)", crm_str(file), type); switch (type) { case 1: valid = validate_with_dtd(doc, to_logs, file); break; case 2: valid = validate_with_relaxng(doc, to_logs, file, (relaxng_ctx_cache_t **) & (known_schemas[method].cache)); break; default: crm_err("Unknown validator type: %d", type); break; } free(file); return valid; } #include static void dump_file(const char *filename) { FILE *fp = NULL; int ch, line = 0; CRM_CHECK(filename != NULL, return); fp = fopen(filename, "r"); CRM_CHECK(fp != NULL, return); fprintf(stderr, "%4d ", ++line); do { ch = getc(fp); if (ch == EOF) { putc('\n', stderr); break; } else if (ch == '\n') { fprintf(stderr, "\n%4d ", ++line); } else { putc(ch, stderr); } } while (1); fclose(fp); } gboolean validate_xml_verbose(xmlNode * xml_blob) { int fd = 0; xmlDoc *doc = NULL; xmlNode *xml = NULL; gboolean rc = FALSE; char *filename = strdup(CRM_STATE_DIR "/cib-invalid.XXXXXX"); umask(S_IWGRP | S_IWOTH | S_IROTH); fd = mkstemp(filename); write_xml_fd(xml_blob, filename, fd, FALSE); dump_file(filename); doc = xmlParseFile(filename); xml = xmlDocGetRootElement(doc); rc = validate_xml(xml, NULL, FALSE); free_xml(xml); unlink(filename); free(filename); return rc; } gboolean validate_xml(xmlNode * xml_blob, const char *validation, gboolean to_logs) { int version = 0; if (validation == NULL) { validation = crm_element_value(xml_blob, XML_ATTR_VALIDATION); } if (validation == NULL) { int lpc = 0; bool valid = FALSE; validation = crm_element_value(xml_blob, "ignore-dtd"); if (crm_is_true(validation)) { /* Legacy compatibilty */ crm_xml_add(xml_blob, XML_ATTR_VALIDATION, "none"); return TRUE; } /* Work it out */ for (lpc = 0; lpc < xml_schema_max; lpc++) { if(validate_with(xml_blob, lpc, FALSE)) { valid = TRUE; crm_xml_add(xml_blob, XML_ATTR_VALIDATION, known_schemas[lpc].name); crm_info("XML validated against %s", known_schemas[lpc].name); if(known_schemas[lpc].after_transform == 0) { break; } } } return valid; } version = get_schema_version(validation); if (strcmp(validation, "none") == 0) { return TRUE; } else if(version < xml_schema_max) { return validate_with(xml_blob, version, to_logs); } crm_err("Unknown validator: %s", validation); return FALSE; } #if HAVE_LIBXSLT static xmlNode * apply_transformation(xmlNode * xml, const char *transform) { char *xform = NULL; xmlNode *out = NULL; xmlDocPtr res = NULL; xmlDocPtr doc = NULL; xsltStylesheet *xslt = NULL; CRM_CHECK(xml != NULL, return FALSE); doc = getDocPtr(xml); xform = get_schema_path(NULL, transform); xmlLoadExtDtdDefaultValue = 1; xmlSubstituteEntitiesDefault(1); xslt = xsltParseStylesheetFile((const xmlChar *)xform); CRM_CHECK(xslt != NULL, goto cleanup); res = xsltApplyStylesheet(xslt, doc, NULL); CRM_CHECK(res != NULL, goto cleanup); out = xmlDocGetRootElement(res); cleanup: if (xslt) { xsltFreeStylesheet(xslt); } free(xform); return out; } #endif const char * get_schema_name(int version) { if (version < 0 || version >= xml_schema_max) { return "unknown"; } return known_schemas[version].name; } int get_schema_version(const char *name) { int lpc = 0; if(name == NULL) { name = "none"; } for (; lpc < xml_schema_max; lpc++) { if (safe_str_eq(name, known_schemas[lpc].name)) { return lpc; } } return -1; } /* set which validation to use */ #include int update_validation(xmlNode ** xml_blob, int *best, int max, gboolean transform, gboolean to_logs) { xmlNode *xml = NULL; char *value = NULL; int max_stable_schemas = xml_latest_schema_index(); int lpc = 0, match = -1, rc = pcmk_ok; CRM_CHECK(best != NULL, return -EINVAL); CRM_CHECK(xml_blob != NULL, return -EINVAL); CRM_CHECK(*xml_blob != NULL, return -EINVAL); *best = 0; xml = *xml_blob; value = crm_element_value_copy(xml, XML_ATTR_VALIDATION); if (value != NULL) { match = get_schema_version(value); lpc = match; if (lpc >= 0 && transform == FALSE) { lpc++; } else if (lpc < 0) { crm_debug("Unknown validation type"); lpc = 0; } } if (match >= max_stable_schemas) { /* nothing to do */ free(value); *best = match; return pcmk_ok; } while(lpc <= max_stable_schemas) { gboolean valid = TRUE; crm_debug("Testing '%s' validation (%d of %d)", known_schemas[lpc].name ? known_schemas[lpc].name : "", lpc, max_stable_schemas); valid = validate_with(xml, lpc, to_logs); if (valid) { *best = lpc; } else { crm_trace("%s validation failed", known_schemas[lpc].name ? known_schemas[lpc].name : ""); } if (valid && transform) { xmlNode *upgrade = NULL; int next = known_schemas[lpc].after_transform; if (next < 0) { crm_trace("Stopping at %s", known_schemas[lpc].name); break; } else if (max > 0 && lpc == max) { crm_trace("Upgrade limit reached at %s (lpc=%d, next=%d, max=%d)", known_schemas[lpc].name, lpc, next, max); break; } else if (max > 0 && next > max) { crm_debug("Upgrade limit reached at %s (lpc=%d, next=%d, max=%d)", known_schemas[lpc].name, lpc, next, max); break; } else if (known_schemas[lpc].transform == NULL) { crm_notice("%s-style configuration is also valid for %s", known_schemas[lpc].name, known_schemas[next].name); if (validate_with(xml, next, to_logs)) { crm_debug("Configuration valid for schema: %s", known_schemas[next].name); lpc = next; *best = next; rc = pcmk_ok; } else { crm_info("Configuration not valid for schema: %s", known_schemas[next].name); } } else { crm_notice("Upgrading %s-style configuration to %s with %s", known_schemas[lpc].name, known_schemas[next].name, known_schemas[lpc].transform ? known_schemas[lpc].transform : "no-op"); #if HAVE_LIBXSLT upgrade = apply_transformation(xml, known_schemas[lpc].transform); #endif if (upgrade == NULL) { crm_err("Transformation %s failed", known_schemas[lpc].transform); rc = -pcmk_err_transform_failed; } else if (validate_with(upgrade, next, to_logs)) { crm_info("Transformation %s successful", known_schemas[lpc].transform); lpc = next; *best = next; free_xml(xml); xml = upgrade; rc = pcmk_ok; } else { crm_err("Transformation %s did not produce a valid configuration", known_schemas[lpc].transform); crm_log_xml_info(upgrade, "transform:bad"); free_xml(upgrade); rc = -pcmk_err_schema_validation; } } } } if (*best > match) { crm_notice("%s the configuration from %s to %s", transform?"Transformed":"Upgraded", value ? value : "", known_schemas[*best].name); crm_xml_add(xml, XML_ATTR_VALIDATION, known_schemas[*best].name); } *xml_blob = xml; free(value); return rc; } /* * From xpath2.c * * All the elements returned by an XPath query are pointers to * elements from the tree *except* namespace nodes where the XPath * semantic is different from the implementation in libxml2 tree. * As a result when a returned node set is freed when * xmlXPathFreeObject() is called, that routine must check the * element type. But node from the returned set may have been removed * by xmlNodeSetContent() resulting in access to freed data. * * This can be exercised by running * valgrind xpath2 test3.xml '//discarded' discarded * * There is 2 ways around it: * - make a copy of the pointers to the nodes from the result set * then call xmlXPathFreeObject() and then modify the nodes * or * - remove the references from the node set, if they are not namespace nodes, before calling xmlXPathFreeObject(). */ void freeXpathObject(xmlXPathObjectPtr xpathObj) { int lpc, max = numXpathResults(xpathObj); if(xpathObj == NULL) { return; } for(lpc = 0; lpc < max; lpc++) { if (xpathObj->nodesetval->nodeTab[lpc] && xpathObj->nodesetval->nodeTab[lpc]->type != XML_NAMESPACE_DECL) { xpathObj->nodesetval->nodeTab[lpc] = NULL; } } /* _Now_ its safe to free it */ xmlXPathFreeObject(xpathObj); } xmlNode * getXpathResult(xmlXPathObjectPtr xpathObj, int index) { xmlNode *match = NULL; int max = numXpathResults(xpathObj); CRM_CHECK(index >= 0, return NULL); CRM_CHECK(xpathObj != NULL, return NULL); if (index >= max) { crm_err("Requested index %d of only %d items", index, max); return NULL; } else if(xpathObj->nodesetval->nodeTab[index] == NULL) { /* Previously requested */ return NULL; } match = xpathObj->nodesetval->nodeTab[index]; CRM_CHECK(match != NULL, return NULL); if (xpathObj->nodesetval->nodeTab[index]->type != XML_NAMESPACE_DECL) { /* See the comment for freeXpathObject() */ xpathObj->nodesetval->nodeTab[index] = NULL; } if (match->type == XML_DOCUMENT_NODE) { /* Will happen if section = '/' */ match = match->children; } else if (match->type != XML_ELEMENT_NODE && match->parent && match->parent->type == XML_ELEMENT_NODE) { /* reurning the parent instead */ match = match->parent; } else if (match->type != XML_ELEMENT_NODE) { /* We only support searching nodes */ crm_err("We only support %d not %d", XML_ELEMENT_NODE, match->type); match = NULL; } return match; } /* the caller needs to check if the result contains a xmlDocPtr or xmlNodePtr */ xmlXPathObjectPtr xpath_search(xmlNode * xml_top, const char *path) { xmlDocPtr doc = NULL; xmlXPathObjectPtr xpathObj = NULL; xmlXPathContextPtr xpathCtx = NULL; const xmlChar *xpathExpr = (const xmlChar *)path; CRM_CHECK(path != NULL, return NULL); CRM_CHECK(xml_top != NULL, return NULL); CRM_CHECK(strlen(path) > 0, return NULL); doc = getDocPtr(xml_top); xpathCtx = xmlXPathNewContext(doc); CRM_ASSERT(xpathCtx != NULL); xpathObj = xmlXPathEvalExpression(xpathExpr, xpathCtx); xmlXPathFreeContext(xpathCtx); return xpathObj; } gboolean cli_config_update(xmlNode ** xml, int *best_version, gboolean to_logs) { gboolean rc = TRUE; const char *value = crm_element_value(*xml, XML_ATTR_VALIDATION); int version = get_schema_version(value); int min_version = xml_minimum_schema_index(); if (version < min_version) { xmlNode *converted = NULL; converted = copy_xml(*xml); update_validation(&converted, &version, 0, TRUE, to_logs); value = crm_element_value(converted, XML_ATTR_VALIDATION); if (version < min_version) { if (to_logs) { crm_config_err("Your current configuration could only be upgraded to %s... " "the minimum requirement is %s.\n", crm_str(value), get_schema_name(min_version)); } else { fprintf(stderr, "Your current configuration could only be upgraded to %s... " "the minimum requirement is %s.\n", crm_str(value), get_schema_name(min_version)); } free_xml(converted); converted = NULL; rc = FALSE; } else { free_xml(*xml); *xml = converted; if (version < xml_latest_schema_index()) { crm_config_warn("Your configuration was internally updated to %s... " "which is acceptable but not the most recent", get_schema_name(version)); } else if (to_logs) { crm_info("Your configuration was internally updated to the latest version (%s)", get_schema_name(version)); } } } else if (version >= get_schema_version("none")) { if (to_logs) { crm_config_warn("Configuration validation is currently disabled." " It is highly encouraged and prevents many common cluster issues."); } else { fprintf(stderr, "Configuration validation is currently disabled." " It is highly encouraged and prevents many common cluster issues.\n"); } } if (best_version) { *best_version = version; } return rc; } xmlNode * expand_idref(xmlNode * input, xmlNode * top) { const char *tag = NULL; const char *ref = NULL; xmlNode *result = input; char *xpath_string = NULL; if (result == NULL) { return NULL; } else if (top == NULL) { top = input; } tag = crm_element_name(result); ref = crm_element_value(result, XML_ATTR_IDREF); if (ref != NULL) { int xpath_max = 512, offset = 0; xpath_string = calloc(1, xpath_max); offset += snprintf(xpath_string + offset, xpath_max - offset, "//%s[@id='%s']", tag, ref); CRM_LOG_ASSERT(offset > 0); result = get_xpath_object(xpath_string, top, LOG_ERR); if (result == NULL) { char *nodePath = (char *)xmlGetNodePath(top); crm_err("No match for %s found in %s: Invalid configuration", xpath_string, crm_str(nodePath)); free(nodePath); } } free(xpath_string); return result; } xmlNode * get_xpath_object_relative(const char *xpath, xmlNode * xml_obj, int error_level) { int len = 0; xmlNode *result = NULL; char *xpath_full = NULL; char *xpath_prefix = NULL; if (xml_obj == NULL || xpath == NULL) { return NULL; } xpath_prefix = (char *)xmlGetNodePath(xml_obj); len += strlen(xpath_prefix); len += strlen(xpath); xpath_full = strdup(xpath_prefix); xpath_full = realloc(xpath_full, len + 1); strncat(xpath_full, xpath, len); result = get_xpath_object(xpath_full, xml_obj, error_level); free(xpath_prefix); free(xpath_full); return result; } xmlNode * get_xpath_object(const char *xpath, xmlNode * xml_obj, int error_level) { int max; xmlNode *result = NULL; xmlXPathObjectPtr xpathObj = NULL; char *nodePath = NULL; char *matchNodePath = NULL; if (xpath == NULL) { return xml_obj; /* or return NULL? */ } xpathObj = xpath_search(xml_obj, xpath); nodePath = (char *)xmlGetNodePath(xml_obj); max = numXpathResults(xpathObj); if (max < 1) { do_crm_log(error_level, "No match for %s in %s", xpath, crm_str(nodePath)); crm_log_xml_explicit(xml_obj, "Unexpected Input"); } else if (max > 1) { int lpc = 0; do_crm_log(error_level, "Too many matches for %s in %s", xpath, crm_str(nodePath)); for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); CRM_LOG_ASSERT(match != NULL); if(match != NULL) { matchNodePath = (char *)xmlGetNodePath(match); do_crm_log(error_level, "%s[%d] = %s", xpath, lpc, crm_str(matchNodePath)); free(matchNodePath); } } crm_log_xml_explicit(xml_obj, "Bad Input"); } else { result = getXpathResult(xpathObj, 0); } freeXpathObject(xpathObj); free(nodePath); return result; } const char * crm_element_value(xmlNode * data, const char *name) { xmlAttr *attr = NULL; if (data == NULL) { crm_err("Couldn't find %s in NULL", name ? name : ""); CRM_LOG_ASSERT(data != NULL); return NULL; } else if (name == NULL) { crm_err("Couldn't find NULL in %s", crm_element_name(data)); return NULL; } attr = xmlHasProp(data, (const xmlChar *)name); if (attr == NULL || attr->children == NULL) { return NULL; } return (const char *)attr->children->content; } diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c index 9bab3037a8..d645c72bc6 100644 --- a/lib/pengine/unpack.c +++ b/lib/pengine/unpack.c @@ -1,3222 +1,3251 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include CRM_TRACE_INIT_DATA(pe_status); #define set_config_flag(data_set, option, flag) do { \ const char *tmp = pe_pref(data_set->config_hash, option); \ if(tmp) { \ if(crm_is_true(tmp)) { \ set_bit(data_set->flags, flag); \ } else { \ clear_bit(data_set->flags, flag); \ } \ } \ } while(0) gboolean unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, enum action_fail_response *failed, pe_working_set_t * data_set); static gboolean determine_remote_online_status(node_t * this_node); static gboolean is_dangling_container_remote_node(node_t *node) { /* we are looking for a remote-node that was supposed to be mapped to a * container resource, but all traces of that container have disappeared * from both the config and the status section. */ if (is_remote_node(node) && node->details->remote_rsc && node->details->remote_rsc->container == NULL && is_set(node->details->remote_rsc->flags, pe_rsc_orphan_container_filler)) { return TRUE; } return FALSE; } void pe_fence_node(pe_working_set_t * data_set, node_t * node, const char *reason) { CRM_CHECK(node, return); /* fence remote nodes living in a container by marking the container as failed. */ if (is_container_remote_node(node)) { resource_t *rsc = node->details->remote_rsc->container; if (is_set(rsc->flags, pe_rsc_failed) == FALSE) { crm_warn("Remote node %s will be fenced by recovering container resource %s", node->details->uname, rsc->id, reason); set_bit(rsc->flags, pe_rsc_failed); } } else if (is_dangling_container_remote_node(node)) { crm_info("Fencing remote node %s has already occurred, container no longer exists. cleaning up dangling connection resource: %s", node->details->uname, reason); set_bit(node->details->remote_rsc->flags, pe_rsc_failed); } else if (node->details->unclean == FALSE) { if(pe_can_fence(data_set, node)) { crm_warn("Node %s will be fenced %s", node->details->uname, reason); } else { crm_warn("Node %s is unclean %s", node->details->uname, reason); } node->details->unclean = TRUE; } else { crm_trace("Huh? %s %s", node->details->uname, reason); } } gboolean unpack_config(xmlNode * config, pe_working_set_t * data_set) { const char *value = NULL; GHashTable *config_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); xmlXPathObjectPtr xpathObj = NULL; if(is_not_set(data_set->flags, pe_flag_enable_unfencing)) { xpathObj = xpath_search(data_set->input, "//nvpair[@name='provides' and @value='unfencing']"); if(xpathObj && numXpathResults(xpathObj) > 0) { set_bit(data_set->flags, pe_flag_enable_unfencing); } freeXpathObject(xpathObj); } if(is_not_set(data_set->flags, pe_flag_enable_unfencing)) { xpathObj = xpath_search(data_set->input, "//nvpair[@name='requires' and @value='unfencing']"); if(xpathObj && numXpathResults(xpathObj) > 0) { set_bit(data_set->flags, pe_flag_enable_unfencing); } freeXpathObject(xpathObj); } #ifdef REDHAT_COMPAT_6 if(is_not_set(data_set->flags, pe_flag_enable_unfencing)) { xpathObj = xpath_search(data_set->input, "//primitive[@type='fence_scsi']"); if(xpathObj && numXpathResults(xpathObj) > 0) { set_bit(data_set->flags, pe_flag_enable_unfencing); } freeXpathObject(xpathObj); } #endif data_set->config_hash = config_hash; unpack_instance_attributes(data_set->input, config, XML_CIB_TAG_PROPSET, NULL, config_hash, CIB_OPTIONS_FIRST, FALSE, data_set->now); verify_pe_options(data_set->config_hash); set_config_flag(data_set, "enable-startup-probes", pe_flag_startup_probes); if(is_not_set(data_set->flags, pe_flag_startup_probes)) { crm_info("Startup probes: disabled (dangerous)"); } value = pe_pref(data_set->config_hash, XML_ATTR_HAVE_WATCHDOG); if (value && crm_is_true(value)) { crm_notice("Relying on watchdog integration for fencing"); set_bit(data_set->flags, pe_flag_have_stonith_resource); } value = pe_pref(data_set->config_hash, "stonith-timeout"); data_set->stonith_timeout = crm_get_msec(value); crm_debug("STONITH timeout: %d", data_set->stonith_timeout); set_config_flag(data_set, "stonith-enabled", pe_flag_stonith_enabled); crm_debug("STONITH of failed nodes is %s", is_set(data_set->flags, pe_flag_stonith_enabled) ? "enabled" : "disabled"); data_set->stonith_action = pe_pref(data_set->config_hash, "stonith-action"); crm_trace("STONITH will %s nodes", data_set->stonith_action); set_config_flag(data_set, "stop-all-resources", pe_flag_stop_everything); crm_debug("Stop all active resources: %s", is_set(data_set->flags, pe_flag_stop_everything) ? "true" : "false"); set_config_flag(data_set, "symmetric-cluster", pe_flag_symmetric_cluster); if (is_set(data_set->flags, pe_flag_symmetric_cluster)) { crm_debug("Cluster is symmetric" " - resources can run anywhere by default"); } value = pe_pref(data_set->config_hash, "default-resource-stickiness"); data_set->default_resource_stickiness = char2score(value); crm_debug("Default stickiness: %d", data_set->default_resource_stickiness); value = pe_pref(data_set->config_hash, "no-quorum-policy"); if (safe_str_eq(value, "ignore")) { data_set->no_quorum_policy = no_quorum_ignore; } else if (safe_str_eq(value, "freeze")) { data_set->no_quorum_policy = no_quorum_freeze; } else if (safe_str_eq(value, "suicide")) { gboolean do_panic = FALSE; crm_element_value_int(data_set->input, XML_ATTR_QUORUM_PANIC, &do_panic); if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) { crm_config_err ("Setting no-quorum-policy=suicide makes no sense if stonith-enabled=false"); } if (do_panic && is_set(data_set->flags, pe_flag_stonith_enabled)) { data_set->no_quorum_policy = no_quorum_suicide; } else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE && do_panic == FALSE) { crm_notice("Resetting no-quorum-policy to 'stop': The cluster has never had quorum"); data_set->no_quorum_policy = no_quorum_stop; } } else { data_set->no_quorum_policy = no_quorum_stop; } switch (data_set->no_quorum_policy) { case no_quorum_freeze: crm_debug("On loss of CCM Quorum: Freeze resources"); break; case no_quorum_stop: crm_debug("On loss of CCM Quorum: Stop ALL resources"); break; case no_quorum_suicide: crm_notice("On loss of CCM Quorum: Fence all remaining nodes"); break; case no_quorum_ignore: crm_notice("On loss of CCM Quorum: Ignore"); break; } set_config_flag(data_set, "stop-orphan-resources", pe_flag_stop_rsc_orphans); crm_trace("Orphan resources are %s", is_set(data_set->flags, pe_flag_stop_rsc_orphans) ? "stopped" : "ignored"); set_config_flag(data_set, "stop-orphan-actions", pe_flag_stop_action_orphans); crm_trace("Orphan resource actions are %s", is_set(data_set->flags, pe_flag_stop_action_orphans) ? "stopped" : "ignored"); set_config_flag(data_set, "remove-after-stop", pe_flag_remove_after_stop); crm_trace("Stopped resources are removed from the status section: %s", is_set(data_set->flags, pe_flag_remove_after_stop) ? "true" : "false"); set_config_flag(data_set, "maintenance-mode", pe_flag_maintenance_mode); crm_trace("Maintenance mode: %s", is_set(data_set->flags, pe_flag_maintenance_mode) ? "true" : "false"); if (is_set(data_set->flags, pe_flag_maintenance_mode)) { clear_bit(data_set->flags, pe_flag_is_managed_default); } else { set_config_flag(data_set, "is-managed-default", pe_flag_is_managed_default); } crm_trace("By default resources are %smanaged", is_set(data_set->flags, pe_flag_is_managed_default) ? "" : "not "); set_config_flag(data_set, "start-failure-is-fatal", pe_flag_start_failure_fatal); crm_trace("Start failures are %s", is_set(data_set->flags, pe_flag_start_failure_fatal) ? "always fatal" : "handled by failcount"); node_score_red = char2score(pe_pref(data_set->config_hash, "node-health-red")); node_score_green = char2score(pe_pref(data_set->config_hash, "node-health-green")); node_score_yellow = char2score(pe_pref(data_set->config_hash, "node-health-yellow")); crm_debug("Node scores: 'red' = %s, 'yellow' = %s, 'green' = %s", pe_pref(data_set->config_hash, "node-health-red"), pe_pref(data_set->config_hash, "node-health-yellow"), pe_pref(data_set->config_hash, "node-health-green")); data_set->placement_strategy = pe_pref(data_set->config_hash, "placement-strategy"); crm_trace("Placement strategy: %s", data_set->placement_strategy); return TRUE; } static void destroy_digest_cache(gpointer ptr) { op_digest_cache_t *data = ptr; free_xml(data->params_all); free_xml(data->params_restart); free(data->digest_all_calc); free(data->digest_restart_calc); free(data); } static node_t * create_node(const char *id, const char *uname, const char *type, const char *score, pe_working_set_t * data_set) { node_t *new_node = NULL; if (pe_find_node(data_set->nodes, uname) != NULL) { crm_config_warn("Detected multiple node entries with uname=%s" " - this is rarely intended", uname); } new_node = calloc(1, sizeof(node_t)); if (new_node == NULL) { return NULL; } new_node->weight = char2score(score); new_node->fixed = FALSE; new_node->details = calloc(1, sizeof(struct node_shared_s)); if (new_node->details == NULL) { free(new_node); return NULL; } crm_trace("Creating node for entry %s/%s", uname, id); new_node->details->id = id; new_node->details->uname = uname; new_node->details->online = FALSE; new_node->details->shutdown = FALSE; + new_node->details->rsc_discovery_enabled = TRUE; new_node->details->running_rsc = NULL; new_node->details->type = node_ping; if (safe_str_eq(type, "remote")) { new_node->details->type = node_remote; set_bit(data_set->flags, pe_flag_have_remote_nodes); } else if (type == NULL || safe_str_eq(type, "member") || safe_str_eq(type, NORMALNODE)) { new_node->details->type = node_member; } new_node->details->attrs = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); new_node->details->utilization = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); new_node->details->digest_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, destroy_digest_cache); data_set->nodes = g_list_insert_sorted(data_set->nodes, new_node, sort_node_uname); return new_node; } static const char * expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, GHashTable **rsc_name_check) { xmlNode *xml_rsc = NULL; xmlNode *xml_tmp = NULL; xmlNode *attr_set = NULL; xmlNode *attr = NULL; const char *container_id = ID(xml_obj); const char *remote_name = NULL; const char *remote_server = NULL; const char *remote_port = NULL; const char *connect_timeout = "60s"; const char *remote_allow_migrate=NULL; char *tmp_id = NULL; for (attr_set = __xml_first_child(xml_obj); attr_set != NULL; attr_set = __xml_next(attr_set)) { if (safe_str_neq((const char *)attr_set->name, XML_TAG_META_SETS)) { continue; } for (attr = __xml_first_child(attr_set); attr != NULL; attr = __xml_next(attr)) { const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE); const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME); if (safe_str_eq(name, XML_RSC_ATTR_REMOTE_NODE)) { remote_name = value; } else if (safe_str_eq(name, "remote-addr")) { remote_server = value; } else if (safe_str_eq(name, "remote-port")) { remote_port = value; } else if (safe_str_eq(name, "remote-connect-timeout")) { connect_timeout = value; } else if (safe_str_eq(name, "remote-allow-migrate")) { remote_allow_migrate=value; } } } if (remote_name == NULL) { return NULL; } if (*rsc_name_check == NULL) { *rsc_name_check = g_hash_table_new(crm_str_hash, g_str_equal); for (xml_rsc = __xml_first_child(parent); xml_rsc != NULL; xml_rsc = __xml_next(xml_rsc)) { const char *id = ID(xml_rsc); /* avoiding heap allocation here because we know the duration of this hashtable allows us to */ g_hash_table_insert(*rsc_name_check, (char *) id, (char *) id); } } if (g_hash_table_lookup(*rsc_name_check, remote_name)) { crm_err("Naming conflict with remote-node=%s. remote-nodes can not have the same name as a resource.", remote_name); return NULL; } xml_rsc = create_xml_node(parent, XML_CIB_TAG_RESOURCE); crm_xml_add(xml_rsc, XML_ATTR_ID, remote_name); crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, "ocf"); crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, "pacemaker"); crm_xml_add(xml_rsc, XML_ATTR_TYPE, "remote"); xml_tmp = create_xml_node(xml_rsc, XML_TAG_META_SETS); tmp_id = crm_concat(remote_name, XML_TAG_META_SETS, '_'); crm_xml_add(xml_tmp, XML_ATTR_ID, tmp_id); free(tmp_id); attr = create_xml_node(xml_tmp, XML_CIB_TAG_NVPAIR); tmp_id = crm_concat(remote_name, "meta-attributes-container", '_'); crm_xml_add(attr, XML_ATTR_ID, tmp_id); crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, XML_RSC_ATTR_CONTAINER); crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, container_id); free(tmp_id); attr = create_xml_node(xml_tmp, XML_CIB_TAG_NVPAIR); tmp_id = crm_concat(remote_name, "meta-attributes-internal", '_'); crm_xml_add(attr, XML_ATTR_ID, tmp_id); crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, XML_RSC_ATTR_INTERNAL_RSC); crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, "true"); free(tmp_id); if (remote_allow_migrate) { attr = create_xml_node(xml_tmp, XML_CIB_TAG_NVPAIR); tmp_id = crm_concat(remote_name, "meta-attributes-container", '_'); crm_xml_add(attr, XML_ATTR_ID, tmp_id); crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, XML_OP_ATTR_ALLOW_MIGRATE); crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, remote_allow_migrate); free(tmp_id); } xml_tmp = create_xml_node(xml_rsc, "operations"); attr = create_xml_node(xml_tmp, XML_ATTR_OP); tmp_id = crm_concat(remote_name, "monitor-interval-30s", '_'); crm_xml_add(attr, XML_ATTR_ID, tmp_id); crm_xml_add(attr, XML_ATTR_TIMEOUT, "30s"); crm_xml_add(attr, XML_LRM_ATTR_INTERVAL, "30s"); crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, "monitor"); free(tmp_id); if (connect_timeout) { attr = create_xml_node(xml_tmp, XML_ATTR_OP); tmp_id = crm_concat(remote_name, "start-interval-0", '_'); crm_xml_add(attr, XML_ATTR_ID, tmp_id); crm_xml_add(attr, XML_ATTR_TIMEOUT, connect_timeout); crm_xml_add(attr, XML_LRM_ATTR_INTERVAL, "0"); crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, "start"); free(tmp_id); } if (remote_port || remote_server) { xml_tmp = create_xml_node(xml_rsc, XML_TAG_ATTR_SETS); tmp_id = crm_concat(remote_name, XML_TAG_ATTR_SETS, '_'); crm_xml_add(xml_tmp, XML_ATTR_ID, tmp_id); free(tmp_id); if (remote_server) { attr = create_xml_node(xml_tmp, XML_CIB_TAG_NVPAIR); tmp_id = crm_concat(remote_name, "instance-attributes-addr", '_'); crm_xml_add(attr, XML_ATTR_ID, tmp_id); crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, "addr"); crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, remote_server); free(tmp_id); } if (remote_port) { attr = create_xml_node(xml_tmp, XML_CIB_TAG_NVPAIR); tmp_id = crm_concat(remote_name, "instance-attributes-port", '_'); crm_xml_add(attr, XML_ATTR_ID, tmp_id); crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, "port"); crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, remote_port); free(tmp_id); } } return remote_name; } static void handle_startup_fencing(pe_working_set_t *data_set, node_t *new_node) { static const char *blind_faith = NULL; static gboolean unseen_are_unclean = TRUE; if ((new_node->details->type == node_remote) && (new_node->details->remote_rsc == NULL)) { /* ignore fencing remote-nodes that don't have a conneciton resource associated * with them. This happens when remote-node entries get left in the nodes section * after the connection resource is removed */ return; } blind_faith = pe_pref(data_set->config_hash, "startup-fencing"); if (crm_is_true(blind_faith) == FALSE) { unseen_are_unclean = FALSE; crm_warn("Blind faith: not fencing unseen nodes"); } if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE || unseen_are_unclean == FALSE) { /* blind faith... */ new_node->details->unclean = FALSE; } else { /* all nodes are unclean until we've seen their * status entry */ new_node->details->unclean = TRUE; } /* We need to be able to determine if a node's status section * exists or not separate from whether the node is unclean. */ new_node->details->unseen = TRUE; } gboolean unpack_nodes(xmlNode * xml_nodes, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; node_t *new_node = NULL; const char *id = NULL; const char *uname = NULL; const char *type = NULL; const char *score = NULL; for (xml_obj = __xml_first_child(xml_nodes); xml_obj != NULL; xml_obj = __xml_next(xml_obj)) { if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_NODE, TRUE)) { new_node = NULL; id = crm_element_value(xml_obj, XML_ATTR_ID); uname = crm_element_value(xml_obj, XML_ATTR_UNAME); type = crm_element_value(xml_obj, XML_ATTR_TYPE); score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE); crm_trace("Processing node %s/%s", uname, id); if (id == NULL) { crm_config_err("Must specify id tag in "); continue; } new_node = create_node(id, uname, type, score, data_set); if (new_node == NULL) { return FALSE; } /* if(data_set->have_quorum == FALSE */ /* && data_set->no_quorum_policy == no_quorum_stop) { */ /* /\* start shutting resources down *\/ */ /* new_node->weight = -INFINITY; */ /* } */ handle_startup_fencing(data_set, new_node); add_node_attrs(xml_obj, new_node, FALSE, data_set); unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_UTILIZATION, NULL, new_node->details->utilization, NULL, FALSE, data_set->now); crm_trace("Done with node %s", crm_element_value(xml_obj, XML_ATTR_UNAME)); } } if (data_set->localhost && pe_find_node(data_set->nodes, data_set->localhost) == NULL) { crm_info("Creating a fake local node"); create_node(data_set->localhost, data_set->localhost, NULL, 0, data_set); } return TRUE; } static void setup_container(resource_t * rsc, pe_working_set_t * data_set) { const char *container_id = NULL; if (rsc->children) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; setup_container(child_rsc, data_set); } return; } container_id = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_CONTAINER); if (container_id && safe_str_neq(container_id, rsc->id)) { resource_t *container = pe_find_resource(data_set->resources, container_id); if (container) { rsc->container = container; container->fillers = g_list_append(container->fillers, rsc); pe_rsc_trace(rsc, "Resource %s's container is %s", rsc->id, container_id); } else { pe_err("Resource %s: Unknown resource container (%s)", rsc->id, container_id); } } } gboolean unpack_remote_nodes(xmlNode * xml_resources, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; GHashTable *rsc_name_check = NULL; /* generate remote nodes from resource config before unpacking resources */ for (xml_obj = __xml_first_child(xml_resources); xml_obj != NULL; xml_obj = __xml_next(xml_obj)) { const char *new_node_id = NULL; /* remote rsc can be defined as primitive, or exist within the metadata of another rsc */ if (xml_contains_remote_node(xml_obj)) { new_node_id = ID(xml_obj); /* This check is here to make sure we don't iterate over * an expanded node that has already been added to the node list. */ if (new_node_id && pe_find_node(data_set->nodes, new_node_id) != NULL) { continue; } } else { /* expands a metadata defined remote resource into the xml config * as an actual rsc primitive to be unpacked later. */ new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources, &rsc_name_check); } if (new_node_id) { crm_trace("detected remote node %s", new_node_id); /* only create the remote node entry if the node didn't already exist */ if (pe_find_node(data_set->nodes, new_node_id) == NULL) { create_node(new_node_id, new_node_id, "remote", NULL, data_set); } } } if (rsc_name_check) { g_hash_table_destroy(rsc_name_check); } return TRUE; } /* Call this after all the nodes and resources have been * unpacked, but before the status section is read. * * A remote node's online status is reflected by the state * of the remote node's connection resource. We need to link * the remote node to this connection resource so we can have * easy access to the connection resource during the PE calculations. */ static void link_rsc2remotenode(pe_working_set_t *data_set, resource_t *new_rsc) { node_t *remote_node = NULL; if (new_rsc->is_remote_node == FALSE) { return; } if (is_set(data_set->flags, pe_flag_quick_location)) { /* remote_nodes and remote_resources are not linked in quick location calculations */ return; } print_resource(LOG_DEBUG_3, "Linking remote-node connection resource, ", new_rsc, FALSE); remote_node = pe_find_node(data_set->nodes, new_rsc->id); CRM_CHECK(remote_node != NULL, return;); remote_node->details->remote_rsc = new_rsc; /* If this is a baremetal remote-node (no container resource * associated with it) then we need to handle startup fencing the same way * as cluster nodes. */ if (new_rsc->container == NULL) { handle_startup_fencing(data_set, remote_node); return; } } static void destroy_tag(gpointer data) { tag_t *tag = data; if (tag) { free(tag->id); g_list_free_full(tag->refs, free); free(tag); } } gboolean unpack_resources(xmlNode * xml_resources, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; GListPtr gIter = NULL; data_set->template_rsc_sets = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, destroy_tag); for (xml_obj = __xml_first_child(xml_resources); xml_obj != NULL; xml_obj = __xml_next(xml_obj)) { resource_t *new_rsc = NULL; if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_RSC_TEMPLATE, TRUE)) { const char *template_id = ID(xml_obj); if (template_id && g_hash_table_lookup_extended(data_set->template_rsc_sets, template_id, NULL, NULL) == FALSE) { /* Record the template's ID for the knowledge of its existence anyway. */ g_hash_table_insert(data_set->template_rsc_sets, strdup(template_id), NULL); } continue; } crm_trace("Beginning unpack... <%s id=%s... >", crm_element_name(xml_obj), ID(xml_obj)); if (common_unpack(xml_obj, &new_rsc, NULL, data_set)) { data_set->resources = g_list_append(data_set->resources, new_rsc); if (xml_contains_remote_node(xml_obj)) { new_rsc->is_remote_node = TRUE; } print_resource(LOG_DEBUG_3, "Added ", new_rsc, FALSE); } else { crm_config_err("Failed unpacking %s %s", crm_element_name(xml_obj), crm_element_value(xml_obj, XML_ATTR_ID)); if (new_rsc != NULL && new_rsc->fns != NULL) { new_rsc->fns->free(new_rsc); } } } for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; setup_container(rsc, data_set); link_rsc2remotenode(data_set, rsc); } data_set->resources = g_list_sort(data_set->resources, sort_rsc_priority); if (is_set(data_set->flags, pe_flag_quick_location)) { /* Ignore */ } else if (is_set(data_set->flags, pe_flag_stonith_enabled) && is_set(data_set->flags, pe_flag_have_stonith_resource) == FALSE) { crm_config_err("Resource start-up disabled since no STONITH resources have been defined"); crm_config_err("Either configure some or disable STONITH with the stonith-enabled option"); crm_config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity"); } return TRUE; } gboolean unpack_tags(xmlNode * xml_tags, pe_working_set_t * data_set) { xmlNode *xml_tag = NULL; data_set->tags = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, destroy_tag); for (xml_tag = __xml_first_child(xml_tags); xml_tag != NULL; xml_tag = __xml_next(xml_tag)) { xmlNode *xml_obj_ref = NULL; const char *tag_id = ID(xml_tag); if (crm_str_eq((const char *)xml_tag->name, XML_CIB_TAG_TAG, TRUE) == FALSE) { continue; } if (tag_id == NULL) { crm_config_err("Failed unpacking %s: %s should be specified", crm_element_name(xml_tag), XML_ATTR_ID); continue; } for (xml_obj_ref = __xml_first_child(xml_tag); xml_obj_ref != NULL; xml_obj_ref = __xml_next(xml_obj_ref)) { const char *obj_ref = ID(xml_obj_ref); if (crm_str_eq((const char *)xml_obj_ref->name, XML_CIB_TAG_OBJ_REF, TRUE) == FALSE) { continue; } if (obj_ref == NULL) { crm_config_err("Failed unpacking %s for tag %s: %s should be specified", crm_element_name(xml_obj_ref), tag_id, XML_ATTR_ID); continue; } if (add_tag_ref(data_set->tags, tag_id, obj_ref) == FALSE) { return FALSE; } } } return TRUE; } /* The ticket state section: * "/cib/status/tickets/ticket_state" */ static gboolean unpack_ticket_state(xmlNode * xml_ticket, pe_working_set_t * data_set) { const char *ticket_id = NULL; const char *granted = NULL; const char *last_granted = NULL; const char *standby = NULL; xmlAttrPtr xIter = NULL; ticket_t *ticket = NULL; ticket_id = ID(xml_ticket); if (ticket_id == NULL || strlen(ticket_id) == 0) { return FALSE; } crm_trace("Processing ticket state for %s", ticket_id); ticket = g_hash_table_lookup(data_set->tickets, ticket_id); if (ticket == NULL) { ticket = ticket_new(ticket_id, data_set); if (ticket == NULL) { return FALSE; } } for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) { const char *prop_name = (const char *)xIter->name; const char *prop_value = crm_element_value(xml_ticket, prop_name); if (crm_str_eq(prop_name, XML_ATTR_ID, TRUE)) { continue; } g_hash_table_replace(ticket->state, strdup(prop_name), strdup(prop_value)); } granted = g_hash_table_lookup(ticket->state, "granted"); if (granted && crm_is_true(granted)) { ticket->granted = TRUE; crm_info("We have ticket '%s'", ticket->id); } else { ticket->granted = FALSE; crm_info("We do not have ticket '%s'", ticket->id); } last_granted = g_hash_table_lookup(ticket->state, "last-granted"); if (last_granted) { ticket->last_granted = crm_parse_int(last_granted, 0); } standby = g_hash_table_lookup(ticket->state, "standby"); if (standby && crm_is_true(standby)) { ticket->standby = TRUE; if (ticket->granted) { crm_info("Granted ticket '%s' is in standby-mode", ticket->id); } } else { ticket->standby = FALSE; } crm_trace("Done with ticket state for %s", ticket_id); return TRUE; } static gboolean unpack_tickets_state(xmlNode * xml_tickets, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; for (xml_obj = __xml_first_child(xml_tickets); xml_obj != NULL; xml_obj = __xml_next(xml_obj)) { if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_TICKET_STATE, TRUE) == FALSE) { continue; } unpack_ticket_state(xml_obj, data_set); } return TRUE; } /* Compatibility with the deprecated ticket state section: * "/cib/status/tickets/instance_attributes" */ static void get_ticket_state_legacy(gpointer key, gpointer value, gpointer user_data) { const char *long_key = key; char *state_key = NULL; const char *granted_prefix = "granted-ticket-"; const char *last_granted_prefix = "last-granted-"; static int granted_prefix_strlen = 0; static int last_granted_prefix_strlen = 0; const char *ticket_id = NULL; const char *is_granted = NULL; const char *last_granted = NULL; const char *sep = NULL; ticket_t *ticket = NULL; pe_working_set_t *data_set = user_data; if (granted_prefix_strlen == 0) { granted_prefix_strlen = strlen(granted_prefix); } if (last_granted_prefix_strlen == 0) { last_granted_prefix_strlen = strlen(last_granted_prefix); } if (strstr(long_key, granted_prefix) == long_key) { ticket_id = long_key + granted_prefix_strlen; if (strlen(ticket_id)) { state_key = strdup("granted"); is_granted = value; } } else if (strstr(long_key, last_granted_prefix) == long_key) { ticket_id = long_key + last_granted_prefix_strlen; if (strlen(ticket_id)) { state_key = strdup("last-granted"); last_granted = value; } } else if ((sep = strrchr(long_key, '-'))) { ticket_id = sep + 1; state_key = strndup(long_key, strlen(long_key) - strlen(sep)); } if (ticket_id == NULL || strlen(ticket_id) == 0) { free(state_key); return; } if (state_key == NULL || strlen(state_key) == 0) { free(state_key); return; } ticket = g_hash_table_lookup(data_set->tickets, ticket_id); if (ticket == NULL) { ticket = ticket_new(ticket_id, data_set); if (ticket == NULL) { free(state_key); return; } } g_hash_table_replace(ticket->state, state_key, strdup(value)); if (is_granted) { if (crm_is_true(is_granted)) { ticket->granted = TRUE; crm_info("We have ticket '%s'", ticket->id); } else { ticket->granted = FALSE; crm_info("We do not have ticket '%s'", ticket->id); } } else if (last_granted) { ticket->last_granted = crm_parse_int(last_granted, 0); } } /* remove nodes that are down, stopping */ /* create +ve rsc_to_node constraints between resources and the nodes they are running on */ /* anything else? */ gboolean unpack_status(xmlNode * status, pe_working_set_t * data_set) { const char *id = NULL; const char *uname = NULL; xmlNode *state = NULL; xmlNode *lrm_rsc = NULL; node_t *this_node = NULL; crm_trace("Beginning unpack"); if (data_set->tickets == NULL) { data_set->tickets = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, destroy_ticket); } for (state = __xml_first_child(status); state != NULL; state = __xml_next(state)) { if (crm_str_eq((const char *)state->name, XML_CIB_TAG_TICKETS, TRUE)) { xmlNode *xml_tickets = state; GHashTable *state_hash = NULL; /* Compatibility with the deprecated ticket state section: * Unpack the attributes in the deprecated "/cib/status/tickets/instance_attributes" if it exists. */ state_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); unpack_instance_attributes(data_set->input, xml_tickets, XML_TAG_ATTR_SETS, NULL, state_hash, NULL, TRUE, data_set->now); g_hash_table_foreach(state_hash, get_ticket_state_legacy, data_set); if (state_hash) { g_hash_table_destroy(state_hash); } /* Unpack the new "/cib/status/tickets/ticket_state"s */ unpack_tickets_state(xml_tickets, data_set); } if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE)) { xmlNode *attrs = NULL; + const char *resource_discovery_enabled = NULL; id = crm_element_value(state, XML_ATTR_ID); uname = crm_element_value(state, XML_ATTR_UNAME); this_node = pe_find_node_any(data_set->nodes, id, uname); if (uname == NULL) { /* error */ continue; } else if (this_node == NULL) { crm_config_warn("Node %s in status section no longer exists", uname); continue; } else if (is_remote_node(this_node)) { /* online state for remote nodes is determined by the rsc state * after all the unpacking is done. */ continue; } crm_trace("Processing node id=%s, uname=%s", id, uname); /* Mark the node as provisionally clean * - at least we have seen it in the current cluster's lifetime */ this_node->details->unclean = FALSE; this_node->details->unseen = FALSE; attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); add_node_attrs(attrs, this_node, TRUE, data_set); if (crm_is_true(g_hash_table_lookup(this_node->details->attrs, "standby"))) { crm_info("Node %s is in standby-mode", this_node->details->uname); this_node->details->standby = TRUE; } if (crm_is_true(g_hash_table_lookup(this_node->details->attrs, "maintenance"))) { crm_info("Node %s is in maintenance-mode", this_node->details->uname); this_node->details->maintenance = TRUE; } + resource_discovery_enabled = g_hash_table_lookup(this_node->details->attrs, XML_NODE_ATTR_RSC_DISCOVERY); + if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) { + crm_warn("ignoring %s attribute on node %s, disabling resource discovery is not allowed on cluster nodes", + XML_NODE_ATTR_RSC_DISCOVERY, this_node->details->uname); + } + crm_trace("determining node state"); determine_online_status(state, this_node, data_set); if (this_node->details->online && data_set->no_quorum_policy == no_quorum_suicide) { /* Everything else should flow from this automatically * At least until the PE becomes able to migrate off healthy resources */ pe_fence_node(data_set, this_node, "because the cluster does not have quorum"); } } } /* Now that we know all node states, we can safely handle migration ops */ for (state = __xml_first_child(status); state != NULL; state = __xml_next(state)) { if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) { continue; } id = crm_element_value(state, XML_ATTR_ID); uname = crm_element_value(state, XML_ATTR_UNAME); this_node = pe_find_node_any(data_set->nodes, id, uname); if (this_node == NULL) { crm_info("Node %s is unknown", id); continue; } else if (is_remote_node(this_node)) { /* online status of remote node can not be determined until all other * resource status is unpacked. */ continue; } else if (this_node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) { crm_trace("Processing lrm resource entries on healthy node: %s", this_node->details->uname); lrm_rsc = find_xml_node(state, XML_CIB_TAG_LRM, FALSE); lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE); unpack_lrm_resources(this_node, lrm_rsc, data_set); } } /* now that the rest of the cluster's status is determined * calculate remote-nodes */ unpack_remote_status(status, data_set); return TRUE; } gboolean unpack_remote_status(xmlNode * status, pe_working_set_t * data_set) { const char *id = NULL; const char *uname = NULL; GListPtr gIter = NULL; xmlNode *state = NULL; xmlNode *lrm_rsc = NULL; node_t *this_node = NULL; if (is_set(data_set->flags, pe_flag_have_remote_nodes) == FALSE) { crm_trace("no remote nodes to unpack"); return TRUE; } /* get online status */ for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { this_node = gIter->data; if ((this_node == NULL) || (is_remote_node(this_node) == FALSE)) { continue; } determine_remote_online_status(this_node); } /* process attributes */ for (state = __xml_first_child(status); state != NULL; state = __xml_next(state)) { + const char *resource_discovery_enabled = NULL; xmlNode *attrs = NULL; if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) { continue; } id = crm_element_value(state, XML_ATTR_ID); uname = crm_element_value(state, XML_ATTR_UNAME); this_node = pe_find_node_any(data_set->nodes, id, uname); if ((this_node == NULL) || (is_remote_node(this_node) == FALSE)) { continue; } crm_trace("Processing remote node id=%s, uname=%s", id, uname); this_node->details->unclean = FALSE; this_node->details->unseen = FALSE; attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); add_node_attrs(attrs, this_node, TRUE, data_set); if (crm_is_true(g_hash_table_lookup(this_node->details->attrs, "standby"))) { crm_info("Node %s is in standby-mode", this_node->details->uname); this_node->details->standby = TRUE; } + + if (crm_is_true(g_hash_table_lookup(this_node->details->attrs, "maintenance"))) { + crm_info("Node %s is in maintenance-mode", this_node->details->uname); + this_node->details->maintenance = TRUE; + } + + resource_discovery_enabled = g_hash_table_lookup(this_node->details->attrs, XML_NODE_ATTR_RSC_DISCOVERY); + if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) { + if (is_baremetal_remote_node(this_node) && is_not_set(data_set->flags, pe_flag_stonith_enabled)) { + crm_warn("ignoring %s attribute on baremetal remote node %s, disabling resource discovery requires stonith to be enabled.", + XML_NODE_ATTR_RSC_DISCOVERY, this_node->details->uname); + } else { + /* if we're here, this is either a baremetal node and fencing is enabled, + * or this is a container node which we don't care if fencing is enabled + * or not on. container nodes are 'fenced' by recovering the container resource + * regardless of whether fencing is enabled. */ + crm_info("Node %s has resource discovery disabled", this_node->details->uname); + this_node->details->rsc_discovery_enabled = FALSE; + } + } } /* process node rsc status */ for (state = __xml_first_child(status); state != NULL; state = __xml_next(state)) { if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) { continue; } id = crm_element_value(state, XML_ATTR_ID); uname = crm_element_value(state, XML_ATTR_UNAME); this_node = pe_find_node_any(data_set->nodes, id, uname); if ((this_node == NULL) || (is_remote_node(this_node) == FALSE)) { continue; } crm_trace("Processing lrm resource entries on healthy remote node: %s", this_node->details->uname); lrm_rsc = find_xml_node(state, XML_CIB_TAG_LRM, FALSE); lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE); unpack_lrm_resources(this_node, lrm_rsc, data_set); } return TRUE; } static gboolean determine_online_status_no_fencing(pe_working_set_t * data_set, xmlNode * node_state, node_t * this_node) { gboolean online = FALSE; const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE); const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER); const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER); const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED); if (!crm_is_true(in_cluster)) { crm_trace("Node is down: in_cluster=%s", crm_str(in_cluster)); } else if (safe_str_eq(is_peer, ONLINESTATUS)) { if (safe_str_eq(join, CRMD_JOINSTATE_MEMBER)) { online = TRUE; } else { crm_debug("Node is not ready to run resources: %s", join); } } else if (this_node->details->expected_up == FALSE) { crm_trace("CRMd is down: in_cluster=%s", crm_str(in_cluster)); crm_trace("\tis_peer=%s, join=%s, expected=%s", crm_str(is_peer), crm_str(join), crm_str(exp_state)); } else { /* mark it unclean */ pe_fence_node(data_set, this_node, "unexpectedly down"); crm_info("\tin_cluster=%s, is_peer=%s, join=%s, expected=%s", crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state)); } return online; } static gboolean determine_online_status_fencing(pe_working_set_t * data_set, xmlNode * node_state, node_t * this_node) { gboolean online = FALSE; gboolean do_terminate = FALSE; const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE); const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER); const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER); const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED); const char *terminate = g_hash_table_lookup(this_node->details->attrs, "terminate"); /* - XML_NODE_IN_CLUSTER ::= true|false - XML_NODE_IS_PEER ::= true|false|online|offline - XML_NODE_JOIN_STATE ::= member|down|pending|banned - XML_NODE_EXPECTED ::= member|down */ if (crm_is_true(terminate)) { do_terminate = TRUE; } else if (terminate != NULL && strlen(terminate) > 0) { /* could be a time() value */ char t = terminate[0]; if (t != '0' && isdigit(t)) { do_terminate = TRUE; } } crm_trace("%s: in_cluster=%s, is_peer=%s, join=%s, expected=%s, term=%d", this_node->details->uname, crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state), do_terminate); online = crm_is_true(in_cluster); if (safe_str_eq(is_peer, ONLINESTATUS)) { is_peer = XML_BOOLEAN_YES; } if (exp_state == NULL) { exp_state = CRMD_JOINSTATE_DOWN; } if (this_node->details->shutdown) { crm_debug("%s is shutting down", this_node->details->uname); online = crm_is_true(is_peer); /* Slightly different criteria since we cant shut down a dead peer */ } else if (in_cluster == NULL) { pe_fence_node(data_set, this_node, "because the peer has not been seen by the cluster"); } else if (safe_str_eq(join, CRMD_JOINSTATE_NACK)) { pe_fence_node(data_set, this_node, "because it failed the pacemaker membership criteria"); } else if (do_terminate == FALSE && safe_str_eq(exp_state, CRMD_JOINSTATE_DOWN)) { if (crm_is_true(in_cluster) || crm_is_true(is_peer)) { crm_info("- Node %s is not ready to run resources", this_node->details->uname); this_node->details->standby = TRUE; this_node->details->pending = TRUE; } else { crm_trace("%s is down or still coming up", this_node->details->uname); } } else if (do_terminate && safe_str_eq(join, CRMD_JOINSTATE_DOWN) && crm_is_true(in_cluster) == FALSE && crm_is_true(is_peer) == FALSE) { crm_info("Node %s was just shot", this_node->details->uname); online = FALSE; } else if (crm_is_true(in_cluster) == FALSE) { pe_fence_node(data_set, this_node, "because the node is no longer part of the cluster"); } else if (crm_is_true(is_peer) == FALSE) { pe_fence_node(data_set, this_node, "because our peer process is no longer available"); /* Everything is running at this point, now check join state */ } else if (do_terminate) { pe_fence_node(data_set, this_node, "because termination was requested"); } else if (safe_str_eq(join, CRMD_JOINSTATE_MEMBER)) { crm_info("Node %s is active", this_node->details->uname); } else if (safe_str_eq(join, CRMD_JOINSTATE_PENDING) || safe_str_eq(join, CRMD_JOINSTATE_DOWN)) { crm_info("Node %s is not ready to run resources", this_node->details->uname); this_node->details->standby = TRUE; this_node->details->pending = TRUE; } else { pe_fence_node(data_set, this_node, "because the peer was in an unknown state"); crm_warn("%s: in-cluster=%s, is-peer=%s, join=%s, expected=%s, term=%d, shutdown=%d", this_node->details->uname, crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state), do_terminate, this_node->details->shutdown); } return online; } static gboolean determine_remote_online_status(node_t * this_node) { resource_t *rsc = this_node->details->remote_rsc; resource_t *container = NULL; if (rsc == NULL) { this_node->details->online = FALSE; goto remote_online_done; } container = rsc->container; CRM_ASSERT(rsc != NULL); /* If the resource is currently started, mark it online. */ if (rsc->role == RSC_ROLE_STARTED) { crm_trace("Remote node %s is set to ONLINE. role == started", this_node->details->id); this_node->details->online = TRUE; } /* consider this node shutting down if transitioning start->stop */ if (rsc->role == RSC_ROLE_STARTED && rsc->next_role == RSC_ROLE_STOPPED) { crm_trace("Remote node %s shutdown. transition from start to stop role", this_node->details->id); this_node->details->shutdown = TRUE; } /* Now check all the failure conditions. */ if (is_set(rsc->flags, pe_rsc_failed) || (rsc->role == RSC_ROLE_STOPPED) || (container && is_set(container->flags, pe_rsc_failed)) || (container && container->role == RSC_ROLE_STOPPED)) { crm_trace("Remote node %s is set to OFFLINE. node is stopped or rsc failed.", this_node->details->id); this_node->details->online = FALSE; } remote_online_done: crm_trace("Remote node %s online=%s", this_node->details->id, this_node->details->online ? "TRUE" : "FALSE"); return this_node->details->online; } gboolean determine_online_status(xmlNode * node_state, node_t * this_node, pe_working_set_t * data_set) { gboolean online = FALSE; const char *shutdown = NULL; const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED); if (this_node == NULL) { crm_config_err("No node to check"); return online; } this_node->details->shutdown = FALSE; this_node->details->expected_up = FALSE; shutdown = g_hash_table_lookup(this_node->details->attrs, XML_CIB_ATTR_SHUTDOWN); if (shutdown != NULL && safe_str_neq("0", shutdown)) { this_node->details->shutdown = TRUE; } else if (safe_str_eq(exp_state, CRMD_JOINSTATE_MEMBER)) { this_node->details->expected_up = TRUE; } if (this_node->details->type == node_ping) { this_node->details->unclean = FALSE; online = FALSE; /* As far as resource management is concerned, * the node is safely offline. * Anyone caught abusing this logic will be shot */ } else if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) { online = determine_online_status_no_fencing(data_set, node_state, this_node); } else { online = determine_online_status_fencing(data_set, node_state, this_node); } if (online) { this_node->details->online = TRUE; } else { /* remove node from contention */ this_node->fixed = TRUE; this_node->weight = -INFINITY; } if (online && this_node->details->shutdown) { /* dont run resources here */ this_node->fixed = TRUE; this_node->weight = -INFINITY; } if (this_node->details->type == node_ping) { crm_info("Node %s is not a pacemaker node", this_node->details->uname); } else if (this_node->details->unclean) { pe_proc_warn("Node %s is unclean", this_node->details->uname); } else if (this_node->details->online) { crm_info("Node %s is %s", this_node->details->uname, this_node->details->shutdown ? "shutting down" : this_node->details->pending ? "pending" : this_node->details->standby ? "standby" : this_node->details->maintenance ? "maintenance" : "online"); } else { crm_trace("Node %s is offline", this_node->details->uname); } return online; } char * clone_strip(const char *last_rsc_id) { int lpc = 0; char *zero = NULL; CRM_CHECK(last_rsc_id != NULL, return NULL); lpc = strlen(last_rsc_id); while (--lpc > 0) { switch (last_rsc_id[lpc]) { case 0: crm_err("Empty string: %s", last_rsc_id); return NULL; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': break; case ':': zero = calloc(1, lpc + 1); memcpy(zero, last_rsc_id, lpc); zero[lpc] = 0; return zero; default: goto done; } } done: zero = strdup(last_rsc_id); return zero; } char * clone_zero(const char *last_rsc_id) { int lpc = 0; char *zero = NULL; CRM_CHECK(last_rsc_id != NULL, return NULL); if (last_rsc_id != NULL) { lpc = strlen(last_rsc_id); } while (--lpc > 0) { switch (last_rsc_id[lpc]) { case 0: return NULL; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': break; case ':': zero = calloc(1, lpc + 3); memcpy(zero, last_rsc_id, lpc); zero[lpc] = ':'; zero[lpc + 1] = '0'; zero[lpc + 2] = 0; return zero; default: goto done; } } done: lpc = strlen(last_rsc_id); zero = calloc(1, lpc + 3); memcpy(zero, last_rsc_id, lpc); zero[lpc] = ':'; zero[lpc + 1] = '0'; zero[lpc + 2] = 0; crm_trace("%s -> %s", last_rsc_id, zero); return zero; } static resource_t * create_fake_resource(const char *rsc_id, xmlNode * rsc_entry, pe_working_set_t * data_set) { resource_t *rsc = NULL; xmlNode *xml_rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE); copy_in_properties(xml_rsc, rsc_entry); crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id); crm_log_xml_debug(xml_rsc, "Orphan resource"); if (!common_unpack(xml_rsc, &rsc, NULL, data_set)) { return NULL; } if (xml_contains_remote_node(xml_rsc)) { node_t *node; crm_debug("Detected orphaned remote node %s", rsc_id); rsc->is_remote_node = TRUE; node = pe_find_node(data_set->nodes, rsc_id); if (node == NULL) { node = create_node(rsc_id, rsc_id, "remote", NULL, data_set); } link_rsc2remotenode(data_set, rsc); if (node) { crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id); node->details->shutdown = TRUE; } } if (crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER)) { /* This orphaned rsc needs to be mapped to a container. */ crm_trace("Detected orphaned container filler %s", rsc_id); set_bit(rsc->flags, pe_rsc_orphan_container_filler); } set_bit(rsc->flags, pe_rsc_orphan); data_set->resources = g_list_append(data_set->resources, rsc); return rsc; } extern resource_t *create_child_clone(resource_t * rsc, int sub_id, pe_working_set_t * data_set); static resource_t * find_anonymous_clone(pe_working_set_t * data_set, node_t * node, resource_t * parent, const char *rsc_id) { GListPtr rIter = NULL; resource_t *rsc = NULL; gboolean skip_inactive = FALSE; CRM_ASSERT(parent != NULL); CRM_ASSERT(parent->variant == pe_clone || parent->variant == pe_master); CRM_ASSERT(is_not_set(parent->flags, pe_rsc_unique)); /* Find an instance active (or partially active for grouped clones) on the specified node */ pe_rsc_trace(parent, "Looking for %s on %s in %s", rsc_id, node->details->uname, parent->id); for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) { GListPtr nIter = NULL; GListPtr locations = NULL; resource_t *child = rIter->data; child->fns->location(child, &locations, TRUE); if (locations == NULL) { pe_rsc_trace(child, "Resource %s, skip inactive", child->id); continue; } for (nIter = locations; nIter && rsc == NULL; nIter = nIter->next) { node_t *childnode = nIter->data; if (childnode->details == node->details) { /* ->find_rsc() because we might be a cloned group */ rsc = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone); if(rsc) { pe_rsc_trace(rsc, "Resource %s, active", rsc->id); } } /* Keep this block, it means we'll do the right thing if * anyone toggles the unique flag to 'off' */ if (rsc && rsc->running_on) { crm_notice("/Anonymous/ clone %s is already running on %s", parent->id, node->details->uname); skip_inactive = TRUE; rsc = NULL; } } g_list_free(locations); } /* Find an inactive instance */ if (skip_inactive == FALSE) { pe_rsc_trace(parent, "Looking for %s anywhere", rsc_id); for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) { GListPtr locations = NULL; resource_t *child = rIter->data; if (is_set(child->flags, pe_rsc_block)) { pe_rsc_trace(child, "Skip: blocked in stopped state"); continue; } child->fns->location(child, &locations, TRUE); if (locations == NULL) { /* ->find_rsc() because we might be a cloned group */ rsc = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone); pe_rsc_trace(parent, "Resource %s, empty slot", rsc->id); } g_list_free(locations); } } if (rsc == NULL) { /* Create an extra orphan */ resource_t *top = create_child_clone(parent, -1, data_set); /* ->find_rsc() because we might be a cloned group */ rsc = top->fns->find_rsc(top, rsc_id, NULL, pe_find_clone); CRM_ASSERT(rsc != NULL); pe_rsc_debug(parent, "Created orphan %s for %s: %s on %s", top->id, parent->id, rsc_id, node->details->uname); } if (safe_str_neq(rsc_id, rsc->id)) { pe_rsc_debug(rsc, "Internally renamed %s on %s to %s%s", rsc_id, node->details->uname, rsc->id, is_set(rsc->flags, pe_rsc_orphan) ? " (ORPHAN)" : ""); } return rsc; } static resource_t * unpack_find_resource(pe_working_set_t * data_set, node_t * node, const char *rsc_id, xmlNode * rsc_entry) { resource_t *rsc = NULL; resource_t *parent = NULL; crm_trace("looking for %s", rsc_id); rsc = pe_find_resource(data_set->resources, rsc_id); /* no match */ if (rsc == NULL) { /* Even when clone-max=0, we still create a single :0 orphan to match against */ char *tmp = clone_zero(rsc_id); resource_t *clone0 = pe_find_resource(data_set->resources, tmp); if (clone0 && is_not_set(clone0->flags, pe_rsc_unique)) { rsc = clone0; } else { crm_trace("%s is not known as %s either", rsc_id, tmp); } parent = uber_parent(clone0); free(tmp); crm_trace("%s not found: %s", rsc_id, parent ? parent->id : "orphan"); } else if (rsc->variant > pe_native) { crm_trace("%s is no longer a primitve resource, the lrm_resource entry is obsolete", rsc_id); return NULL; } else { parent = uber_parent(rsc); } if (parent && parent->variant > pe_group) { if (is_not_set(parent->flags, pe_rsc_unique)) { char *base = clone_strip(rsc_id); rsc = find_anonymous_clone(data_set, node, parent, base); CRM_ASSERT(rsc != NULL); free(base); } if (rsc && safe_str_neq(rsc_id, rsc->id)) { free(rsc->clone_name); rsc->clone_name = strdup(rsc_id); } } return rsc; } static resource_t * process_orphan_resource(xmlNode * rsc_entry, node_t * node, pe_working_set_t * data_set) { resource_t *rsc = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); crm_debug("Detected orphan resource %s on %s", rsc_id, node->details->uname); rsc = create_fake_resource(rsc_id, rsc_entry, data_set); if (is_set(data_set->flags, pe_flag_stop_rsc_orphans) == FALSE) { clear_bit(rsc->flags, pe_rsc_managed); } else { GListPtr gIter = NULL; print_resource(LOG_DEBUG_3, "Added orphan", rsc, FALSE); CRM_CHECK(rsc != NULL, return NULL); resource_location(rsc, NULL, -INFINITY, "__orphan_dont_run__", data_set); for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; if (node->details->online && get_failcount(node, rsc, NULL, data_set)) { action_t *clear_op = NULL; action_t *ready = NULL; if (is_remote_node(node)) { char *pseudo_op_name = crm_concat(CRM_OP_PROBED, node->details->id, '_'); ready = get_pseudo_op(pseudo_op_name, data_set); free(pseudo_op_name); } else { ready = get_pseudo_op(CRM_OP_PROBED, data_set); } clear_op = custom_action(rsc, crm_concat(rsc->id, CRM_OP_CLEAR_FAILCOUNT, '_'), CRM_OP_CLEAR_FAILCOUNT, node, FALSE, TRUE, data_set); add_hash_param(clear_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); pe_rsc_info(rsc, "Clearing failcount (%d) for orphaned resource %s on %s (%s)", get_failcount(node, rsc, NULL, data_set), rsc->id, node->details->uname, clear_op->uuid); order_actions(clear_op, ready, pe_order_optional); } } } return rsc; } static void process_rsc_state(resource_t * rsc, node_t * node, enum action_fail_response on_fail, xmlNode * migrate_op, pe_working_set_t * data_set) { CRM_ASSERT(rsc); pe_rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s", rsc->id, role2text(rsc->role), node->details->uname, fail2text(on_fail)); /* process current state */ if (rsc->role != RSC_ROLE_UNKNOWN) { resource_t *iter = rsc; while (iter) { if (g_hash_table_lookup(iter->known_on, node->details->id) == NULL) { node_t *n = node_copy(node); pe_rsc_trace(rsc, "%s (aka. %s) known on %s", rsc->id, rsc->clone_name, n->details->uname); g_hash_table_insert(iter->known_on, (gpointer) n->details->id, n); } if (is_set(iter->flags, pe_rsc_unique)) { break; } iter = iter->parent; } } if (rsc->role > RSC_ROLE_STOPPED && node->details->online == FALSE && is_set(rsc->flags, pe_rsc_managed)) { char *reason = NULL; gboolean should_fence = FALSE; /* if this is a remote_node living in a container, fence the container * by recovering it. Mark the resource as unmanaged. Once the container * and remote connenction are re-established, the status section will * get reset in the crmd freeing up this resource to run again once we * are sure we know the resources state. */ if (is_container_remote_node(node)) { set_bit(rsc->flags, pe_rsc_failed); should_fence = TRUE; } else if (is_set(data_set->flags, pe_flag_stonith_enabled)) { if (is_baremetal_remote_node(node) && is_not_set(node->details->remote_rsc->flags, pe_rsc_failed)) { /* setting unceen = true means that fencing of the remote node will * only occur if the connection resource is not going to start somewhere. * This allows connection resources on a failed cluster-node to move to * another node without requiring the baremetal remote nodes to be fenced * as well. */ node->details->unseen = TRUE; reason = g_strdup_printf("because %s is active there. Fencing will be revoked if remote-node connection can be re-established on another cluster-node.", rsc->id); } should_fence = TRUE; } if (should_fence) { if (reason == NULL) { reason = g_strdup_printf("because %s is thought to be active there", rsc->id); } pe_fence_node(data_set, node, reason); } g_free(reason); } if (node->details->unclean) { /* No extra processing needed * Also allows resources to be started again after a node is shot */ on_fail = action_fail_ignore; } switch (on_fail) { case action_fail_ignore: /* nothing to do */ break; case action_fail_fence: /* treat it as if it is still running * but also mark the node as unclean */ pe_fence_node(data_set, node, "because of resource failure(s)"); break; case action_fail_standby: node->details->standby = TRUE; node->details->standby_onfail = TRUE; break; case action_fail_block: /* is_managed == FALSE will prevent any * actions being sent for the resource */ clear_bit(rsc->flags, pe_rsc_managed); set_bit(rsc->flags, pe_rsc_block); break; case action_fail_migrate: /* make sure it comes up somewhere else * or not at all */ resource_location(rsc, node, -INFINITY, "__action_migration_auto__", data_set); break; case action_fail_stop: rsc->next_role = RSC_ROLE_STOPPED; break; case action_fail_recover: if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { set_bit(rsc->flags, pe_rsc_failed); stop_action(rsc, node, FALSE); } break; case action_fail_restart_container: set_bit(rsc->flags, pe_rsc_failed); if (rsc->container) { stop_action(rsc->container, node, FALSE); } else if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { stop_action(rsc, node, FALSE); } break; } /* ensure a remote-node connection failure forces an unclean remote-node * to be fenced. By setting unseen = FALSE, the remote-node failure will * result in a fencing operation regardless if we're going to attempt to * reconnect to the remote-node in this transition or not. */ if (is_set(rsc->flags, pe_rsc_failed) && rsc->is_remote_node) { node_t *tmpnode = pe_find_node(data_set->nodes, rsc->id); if (tmpnode && tmpnode->details->unclean) { tmpnode->details->unseen = FALSE; } } if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { if (is_set(rsc->flags, pe_rsc_orphan)) { if (is_set(rsc->flags, pe_rsc_managed)) { crm_config_warn("Detected active orphan %s running on %s", rsc->id, node->details->uname); } else { crm_config_warn("Cluster configured not to stop active orphans." " %s must be stopped manually on %s", rsc->id, node->details->uname); } } native_add_running(rsc, node, data_set); if (on_fail != action_fail_ignore) { set_bit(rsc->flags, pe_rsc_failed); } } else if (rsc->clone_name && strchr(rsc->clone_name, ':') != NULL) { /* Only do this for older status sections that included instance numbers * Otherwise stopped instances will appear as orphans */ pe_rsc_trace(rsc, "Resetting clone_name %s for %s (stopped)", rsc->clone_name, rsc->id); free(rsc->clone_name); rsc->clone_name = NULL; } else { char *key = stop_key(rsc); GListPtr possible_matches = find_actions(rsc->actions, key, node); GListPtr gIter = possible_matches; for (; gIter != NULL; gIter = gIter->next) { action_t *stop = (action_t *) gIter->data; stop->flags |= pe_action_optional; } g_list_free(possible_matches); free(key); } } /* create active recurring operations as optional */ static void process_recurring(node_t * node, resource_t * rsc, int start_index, int stop_index, GListPtr sorted_op_list, pe_working_set_t * data_set) { int counter = -1; const char *task = NULL; const char *status = NULL; GListPtr gIter = sorted_op_list; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index); for (; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; int interval = 0; char *key = NULL; const char *id = ID(rsc_op); const char *interval_s = NULL; counter++; if (node->details->online == FALSE) { pe_rsc_trace(rsc, "Skipping %s/%s: node is offline", rsc->id, node->details->uname); break; /* Need to check if there's a monitor for role="Stopped" */ } else if (start_index < stop_index && counter <= stop_index) { pe_rsc_trace(rsc, "Skipping %s/%s: resource is not active", id, node->details->uname); continue; } else if (counter < start_index) { pe_rsc_trace(rsc, "Skipping %s/%s: old %d", id, node->details->uname, counter); continue; } interval_s = crm_element_value(rsc_op, XML_LRM_ATTR_INTERVAL); interval = crm_parse_int(interval_s, "0"); if (interval == 0) { pe_rsc_trace(rsc, "Skipping %s/%s: non-recurring", id, node->details->uname); continue; } status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS); if (safe_str_eq(status, "-1")) { pe_rsc_trace(rsc, "Skipping %s/%s: status", id, node->details->uname); continue; } task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); /* create the action */ key = generate_op_key(rsc->id, task, interval); pe_rsc_trace(rsc, "Creating %s/%s", key, node->details->uname); custom_action(rsc, key, task, node, TRUE, TRUE, data_set); } } void calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index) { int counter = -1; int implied_monitor_start = -1; int implied_master_start = -1; const char *task = NULL; const char *status = NULL; GListPtr gIter = sorted_op_list; *stop_index = -1; *start_index = -1; for (; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; counter++; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS); if (safe_str_eq(task, CRMD_ACTION_STOP) && safe_str_eq(status, "0")) { *stop_index = counter; } else if (safe_str_eq(task, CRMD_ACTION_START) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) { *start_index = counter; } else if ((implied_monitor_start <= *stop_index) && safe_str_eq(task, CRMD_ACTION_STATUS)) { const char *rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC); if (safe_str_eq(rc, "0") || safe_str_eq(rc, "8")) { implied_monitor_start = counter; } } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE) || safe_str_eq(task, CRMD_ACTION_DEMOTE)) { implied_master_start = counter; } } if (*start_index == -1) { if (implied_master_start != -1) { *start_index = implied_master_start; } else if (implied_monitor_start != -1) { *start_index = implied_monitor_start; } } } static resource_t * unpack_lrm_rsc_state(node_t * node, xmlNode * rsc_entry, pe_working_set_t * data_set) { GListPtr gIter = NULL; int stop_index = -1; int start_index = -1; enum rsc_role_e req_role = RSC_ROLE_UNKNOWN; const char *task = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); resource_t *rsc = NULL; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; xmlNode *migrate_op = NULL; xmlNode *rsc_op = NULL; enum action_fail_response on_fail = FALSE; enum rsc_role_e saved_role = RSC_ROLE_UNKNOWN; crm_trace("[%s] Processing %s on %s", crm_element_name(rsc_entry), rsc_id, node->details->uname); /* extract operations */ op_list = NULL; sorted_op_list = NULL; for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next(rsc_op)) { if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) { op_list = g_list_prepend(op_list, rsc_op); } } if (op_list == NULL) { /* if there are no operations, there is nothing to do */ return NULL; } /* find the resource */ rsc = unpack_find_resource(data_set, node, rsc_id, rsc_entry); if (rsc == NULL) { rsc = process_orphan_resource(rsc_entry, node, data_set); } CRM_ASSERT(rsc != NULL); /* process operations */ saved_role = rsc->role; on_fail = action_fail_ignore; rsc->role = RSC_ROLE_UNKNOWN; sorted_op_list = g_list_sort(op_list, sort_op_by_callid); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) { migrate_op = rsc_op; } unpack_rsc_op(rsc, node, rsc_op, &on_fail, data_set); } /* create active recurring operations as optional */ calculate_active_ops(sorted_op_list, &start_index, &stop_index); process_recurring(node, rsc, start_index, stop_index, sorted_op_list, data_set); /* no need to free the contents */ g_list_free(sorted_op_list); process_rsc_state(rsc, node, on_fail, migrate_op, data_set); if (get_target_role(rsc, &req_role)) { if (rsc->next_role == RSC_ROLE_UNKNOWN || req_role < rsc->next_role) { pe_rsc_debug(rsc, "%s: Overwriting calculated next role %s" " with requested next role %s", rsc->id, role2text(rsc->next_role), role2text(req_role)); rsc->next_role = req_role; } else if (req_role > rsc->next_role) { pe_rsc_info(rsc, "%s: Not overwriting calculated next role %s" " with requested next role %s", rsc->id, role2text(rsc->next_role), role2text(req_role)); } } if (saved_role > rsc->role) { rsc->role = saved_role; } return rsc; } static void handle_orphaned_container_fillers(xmlNode * lrm_rsc_list, pe_working_set_t * data_set) { xmlNode *rsc_entry = NULL; for (rsc_entry = __xml_first_child(lrm_rsc_list); rsc_entry != NULL; rsc_entry = __xml_next(rsc_entry)) { resource_t *rsc; resource_t *container; const char *rsc_id; const char *container_id; if (safe_str_neq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE)) { continue; } container_id = crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER); rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); if (container_id == NULL || rsc_id == NULL) { continue; } container = pe_find_resource(data_set->resources, container_id); if (container == NULL) { continue; } rsc = pe_find_resource(data_set->resources, rsc_id); if (rsc == NULL || is_set(rsc->flags, pe_rsc_orphan_container_filler) == FALSE || rsc->container != NULL) { continue; } pe_rsc_trace(rsc, "Mapped orphaned rsc %s's container to %s", rsc->id, container_id); rsc->container = container; container->fillers = g_list_append(container->fillers, rsc); } } gboolean unpack_lrm_resources(node_t * node, xmlNode * lrm_rsc_list, pe_working_set_t * data_set) { xmlNode *rsc_entry = NULL; gboolean found_orphaned_container_filler = FALSE; GListPtr unexpected_containers = NULL; GListPtr gIter = NULL; resource_t *remote = NULL; CRM_CHECK(node != NULL, return FALSE); crm_trace("Unpacking resources on %s", node->details->uname); for (rsc_entry = __xml_first_child(lrm_rsc_list); rsc_entry != NULL; rsc_entry = __xml_next(rsc_entry)) { if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) { resource_t *rsc; rsc = unpack_lrm_rsc_state(node, rsc_entry, data_set); if (!rsc) { continue; } if (is_set(rsc->flags, pe_rsc_orphan_container_filler)) { found_orphaned_container_filler = TRUE; } if (is_set(rsc->flags, pe_rsc_unexpectedly_running)) { remote = rsc_contains_remote_node(data_set, rsc); if (remote) { unexpected_containers = g_list_append(unexpected_containers, remote); } } } } /* If a container resource is unexpectedly up... and the remote-node * connection resource for that container is not up, the entire container * must be recovered. */ for (gIter = unexpected_containers; gIter != NULL; gIter = gIter->next) { remote = (resource_t *) gIter->data; if (remote->role != RSC_ROLE_STARTED) { crm_warn("Recovering container resource %s. Resource is unexpectedly running and involves a remote-node.", remote->container->id); set_bit(remote->container->flags, pe_rsc_failed); } } /* now that all the resource state has been unpacked for this node * we have to go back and map any orphaned container fillers to their * container resource */ if (found_orphaned_container_filler) { handle_orphaned_container_fillers(lrm_rsc_list, data_set); } g_list_free(unexpected_containers); return TRUE; } static void set_active(resource_t * rsc) { resource_t *top = uber_parent(rsc); if (top && top->variant == pe_master) { rsc->role = RSC_ROLE_SLAVE; } else { rsc->role = RSC_ROLE_STARTED; } } static void set_node_score(gpointer key, gpointer value, gpointer user_data) { node_t *node = value; int *score = user_data; node->weight = *score; } #define STATUS_PATH_MAX 1024 static xmlNode * find_lrm_op(const char *resource, const char *op, const char *node, const char *source, pe_working_set_t * data_set) { int offset = 0; char xpath[STATUS_PATH_MAX]; offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//node_state[@uname='%s']", node); offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//" XML_LRM_TAG_RESOURCE "[@id='%s']", resource); /* Need to check against transition_magic too? */ if (source && safe_str_eq(op, CRMD_ACTION_MIGRATE)) { offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_target='%s']", op, source); } else if (source && safe_str_eq(op, CRMD_ACTION_MIGRATED)) { offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_source='%s']", op, source); } else { offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "/" XML_LRM_TAG_RSC_OP "[@operation='%s']", op); } CRM_LOG_ASSERT(offset > 0); return get_xpath_object(xpath, data_set->input, LOG_DEBUG); } static void unpack_rsc_migration(resource_t *rsc, node_t *node, xmlNode *xml_op, pe_working_set_t * data_set) { /* * The normal sequence is (now): migrate_to(Src) -> migrate_from(Tgt) -> stop(Src) * * So if a migrate_to is followed by a stop, then we dont need to care what * happended on the target node * * Without the stop, we need to look for a successful migrate_from. * This would also imply we're no longer running on the source * * Without the stop, and without a migrate_from op we make sure the resource * gets stopped on both source and target (assuming the target is up) * */ int stop_id = 0; int task_id = 0; xmlNode *stop_op = find_lrm_op(rsc->id, CRMD_ACTION_STOP, node->details->id, NULL, data_set); if (stop_op) { crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id); } crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id); if (stop_op == NULL || stop_id < task_id) { int from_rc = 0, from_status = 0; const char *migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); node_t *target = pe_find_node(data_set->nodes, migrate_target); node_t *source = pe_find_node(data_set->nodes, migrate_source); xmlNode *migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, migrate_target, migrate_source, data_set); rsc->role = RSC_ROLE_STARTED; /* can be master? */ if (migrate_from) { crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc); crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, &from_status); pe_rsc_trace(rsc, "%s op on %s exited with status=%d, rc=%d", ID(migrate_from), migrate_target, from_status, from_rc); } if (migrate_from && from_rc == PCMK_OCF_OK && from_status == PCMK_LRM_OP_DONE) { pe_rsc_trace(rsc, "Detected dangling migration op: %s on %s", ID(xml_op), migrate_source); /* all good * just need to arrange for the stop action to get sent * but _without_ affecting the target somehow */ rsc->role = RSC_ROLE_STOPPED; rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node); } else if (migrate_from) { /* Failed */ if (target && target->details->online) { pe_rsc_trace(rsc, "Marking active on %s %p %d", migrate_target, target, target->details->online); native_add_running(rsc, target, data_set); } } else { /* Pending or complete but erased */ if (target && target->details->online) { pe_rsc_trace(rsc, "Marking active on %s %p %d", migrate_target, target, target->details->online); native_add_running(rsc, target, data_set); if (source && source->details->online) { /* If we make it here we have a partial migration. The migrate_to * has completed but the migrate_from on the target has not. Hold on * to the target and source on the resource. Later on if we detect that * the resource is still going to run on that target, we may continue * the migration */ rsc->partial_migration_target = target; rsc->partial_migration_source = source; } } else { /* Consider it failed here - forces a restart, prevents migration */ set_bit(rsc->flags, pe_rsc_failed); clear_bit(rsc->flags, pe_rsc_allow_migrate); } } } } static void unpack_rsc_migration_failure(resource_t *rsc, node_t *node, xmlNode *xml_op, pe_working_set_t * data_set) { const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); CRM_ASSERT(rsc); if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) { int stop_id = 0; int migrate_id = 0; const char *migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); xmlNode *stop_op = find_lrm_op(rsc->id, CRMD_ACTION_STOP, migrate_source, NULL, data_set); xmlNode *migrate_op = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATE, migrate_source, migrate_target, data_set); if (stop_op) { crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id); } if (migrate_op) { crm_element_value_int(migrate_op, XML_LRM_ATTR_CALLID, &migrate_id); } /* Get our state right */ rsc->role = RSC_ROLE_STARTED; /* can be master? */ if (stop_op == NULL || stop_id < migrate_id) { node_t *source = pe_find_node(data_set->nodes, migrate_source); if (source && source->details->online) { native_add_running(rsc, source, data_set); } } } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) { int stop_id = 0; int migrate_id = 0; const char *migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); xmlNode *stop_op = find_lrm_op(rsc->id, CRMD_ACTION_STOP, migrate_target, NULL, data_set); xmlNode *migrate_op = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, migrate_target, migrate_source, data_set); if (stop_op) { crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id); } if (migrate_op) { crm_element_value_int(migrate_op, XML_LRM_ATTR_CALLID, &migrate_id); } /* Get our state right */ rsc->role = RSC_ROLE_STARTED; /* can be master? */ if (stop_op == NULL || stop_id < migrate_id) { node_t *target = pe_find_node(data_set->nodes, migrate_target); pe_rsc_trace(rsc, "Stop: %p %d, Migrated: %p %d", stop_op, stop_id, migrate_op, migrate_id); if (target && target->details->online) { native_add_running(rsc, target, data_set); } } else if (migrate_op == NULL) { /* Make sure it gets cleaned up, the stop may pre-date the migrate_from */ rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node); } } } static const char *get_op_key(xmlNode *xml_op) { const char *key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY); if(key == NULL) { key = ID(xml_op); } return key; } static void unpack_rsc_op_failure(resource_t *rsc, node_t *node, int rc, xmlNode *xml_op, enum action_fail_response *on_fail, pe_working_set_t * data_set) { int interval = 0; bool is_probe = FALSE; action_t *action = NULL; const char *key = get_op_key(xml_op); const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION); CRM_ASSERT(rsc); crm_element_value_int(xml_op, XML_LRM_ATTR_INTERVAL, &interval); if(interval == 0 && safe_str_eq(task, CRMD_ACTION_STATUS)) { is_probe = TRUE; pe_rsc_trace(rsc, "is a probe: %s", key); } if (rc != PCMK_OCF_NOT_INSTALLED || is_set(data_set->flags, pe_flag_symmetric_cluster)) { crm_warn("Processing failed op %s for %s on %s: %s (%d)", task, rsc->id, node->details->uname, services_ocf_exitcode_str(rc), rc); crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname); if ((node->details->shutdown == FALSE) || (node->details->online == TRUE)) { add_node_copy(data_set->failed, xml_op); } } else { crm_trace("Processing failed op %s for %s on %s: %s (%d)", task, rsc->id, node->details->uname, services_ocf_exitcode_str(rc), rc); } action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set); if ((action->on_fail <= action_fail_fence && *on_fail < action->on_fail) || (action->on_fail == action_fail_restart_container && *on_fail <= action_fail_recover) || (*on_fail == action_fail_restart_container && action->on_fail >= action_fail_migrate)) { pe_rsc_trace(rsc, "on-fail %s -> %s for %s (%s)", fail2text(*on_fail), fail2text(action->on_fail), action->uuid, key); *on_fail = action->on_fail; } if (safe_str_eq(task, CRMD_ACTION_STOP)) { resource_location(rsc, node, -INFINITY, "__stop_fail__", data_set); } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) { unpack_rsc_migration_failure(rsc, node, xml_op, data_set); } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) { /* * staying in role=master ends up putting the PE/TE into a loop * setting role=slave is not dangerous because no master will be * promoted until the failed resource has been fully stopped */ rsc->next_role = RSC_ROLE_STOPPED; if (action->on_fail == action_fail_block) { rsc->role = RSC_ROLE_MASTER; } else { crm_warn("Forcing %s to stop after a failed demote action", rsc->id); rsc->role = RSC_ROLE_SLAVE; } } else if (compare_version("2.0", op_version) > 0 && safe_str_eq(task, CRMD_ACTION_START)) { crm_warn("Compatibility handling for failed op %s on %s", key, node->details->uname); resource_location(rsc, node, -INFINITY, "__legacy_start__", data_set); } if(is_probe && rc == PCMK_OCF_NOT_INSTALLED) { /* leave stopped */ pe_rsc_trace(rsc, "Leaving %s stopped", rsc->id); rsc->role = RSC_ROLE_STOPPED; } else if (rsc->role < RSC_ROLE_STARTED) { pe_rsc_trace(rsc, "Setting %s active", rsc->id); set_active(rsc); } pe_rsc_trace(rsc, "Resource %s: role=%s, unclean=%s, on_fail=%s, fail_role=%s", rsc->id, role2text(rsc->role), node->details->unclean ? "true" : "false", fail2text(action->on_fail), role2text(action->fail_role)); if (action->fail_role != RSC_ROLE_STARTED && rsc->next_role < action->fail_role) { rsc->next_role = action->fail_role; } if (action->fail_role == RSC_ROLE_STOPPED) { int score = -INFINITY; resource_t *fail_rsc = rsc; if (fail_rsc->parent) { resource_t *parent = uber_parent(fail_rsc); if ((parent->variant == pe_clone || parent->variant == pe_master) && is_not_set(parent->flags, pe_rsc_unique)) { /* for clone and master resources, if a child fails on an operation * with on-fail = stop, all the resources fail. Do this by preventing * the parent from coming up again. */ fail_rsc = parent; } } crm_warn("Making sure %s doesn't come up again", fail_rsc->id); /* make sure it doesnt come up again */ g_hash_table_destroy(fail_rsc->allowed_nodes); fail_rsc->allowed_nodes = node_hash_from_list(data_set->nodes); g_hash_table_foreach(fail_rsc->allowed_nodes, set_node_score, &score); } pe_free_action(action); } static int determine_op_status( resource_t *rsc, int rc, int target_rc, node_t * node, xmlNode * xml_op, enum action_fail_response * on_fail, pe_working_set_t * data_set) { int interval = 0; int result = PCMK_LRM_OP_DONE; const char *key = get_op_key(xml_op); const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); bool is_probe = FALSE; CRM_ASSERT(rsc); crm_element_value_int(xml_op, XML_LRM_ATTR_INTERVAL, &interval); if (interval == 0 && safe_str_eq(task, CRMD_ACTION_STATUS)) { is_probe = TRUE; } if (target_rc >= 0 && target_rc != rc) { result = PCMK_LRM_OP_ERROR; pe_rsc_debug(rsc, "%s on %s returned '%s' (%d) instead of the expected value: '%s' (%d)", key, node->details->uname, services_ocf_exitcode_str(rc), rc, services_ocf_exitcode_str(target_rc), target_rc); } /* we could clean this up significantly except for old LRMs and CRMs that * didnt include target_rc and liked to remap status */ switch (rc) { case PCMK_OCF_OK: if (is_probe && target_rc == 7) { result = PCMK_LRM_OP_DONE; set_bit(rsc->flags, pe_rsc_unexpectedly_running); pe_rsc_info(rsc, "Operation %s found resource %s active on %s", task, rsc->id, node->details->uname); /* legacy code for pre-0.6.5 operations */ } else if (target_rc < 0 && interval > 0 && rsc->role == RSC_ROLE_MASTER) { /* catch status ops that return 0 instead of 8 while they * are supposed to be in master mode */ result = PCMK_LRM_OP_ERROR; } break; case PCMK_OCF_NOT_RUNNING: if (is_probe || target_rc == rc) { result = PCMK_LRM_OP_DONE; rsc->role = RSC_ROLE_STOPPED; /* clear any previous failure actions */ *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; } else if (safe_str_neq(task, CRMD_ACTION_STOP)) { result = PCMK_LRM_OP_ERROR; } break; case PCMK_OCF_RUNNING_MASTER: if (is_probe) { result = PCMK_LRM_OP_DONE; pe_rsc_info(rsc, "Operation %s found resource %s active in master mode on %s", task, rsc->id, node->details->uname); } else if (target_rc == rc) { /* nothing to do */ } else if (target_rc >= 0) { result = PCMK_LRM_OP_ERROR; /* legacy code for pre-0.6.5 operations */ } else if (safe_str_neq(task, CRMD_ACTION_STATUS) || rsc->role != RSC_ROLE_MASTER) { result = PCMK_LRM_OP_ERROR; if (rsc->role != RSC_ROLE_MASTER) { crm_err("%s reported %s in master mode on %s", key, rsc->id, node->details->uname); } } rsc->role = RSC_ROLE_MASTER; break; case PCMK_OCF_FAILED_MASTER: rsc->role = RSC_ROLE_MASTER; result = PCMK_LRM_OP_ERROR; break; case PCMK_OCF_NOT_CONFIGURED: result = PCMK_LRM_OP_ERROR_FATAL; break; case PCMK_OCF_NOT_INSTALLED: case PCMK_OCF_INVALID_PARAM: case PCMK_OCF_INSUFFICIENT_PRIV: case PCMK_OCF_UNIMPLEMENT_FEATURE: if (rc == PCMK_OCF_UNIMPLEMENT_FEATURE && interval > 0) { result = PCMK_LRM_OP_NOTSUPPORTED; break; } else if(pe_can_fence(data_set, node) == FALSE && safe_str_eq(task, CRMD_ACTION_STOP)) { /* If a stop fails and we can't fence, there's nothing else we can do */ pe_proc_err("No further recovery can be attempted for %s: %s action failed with '%s' (%d)", rsc->id, task, services_ocf_exitcode_str(rc), rc); clear_bit(rsc->flags, pe_rsc_managed); set_bit(rsc->flags, pe_rsc_block); } result = PCMK_LRM_OP_ERROR_HARD; break; default: if (result == PCMK_LRM_OP_DONE) { crm_info("Treating %s (rc=%d) on %s as an ERROR", key, rc, node->details->uname); result = PCMK_LRM_OP_ERROR; } } return result; } static bool check_operation_expiry(resource_t *rsc, node_t *node, int rc, xmlNode *xml_op, pe_working_set_t * data_set) { bool expired = FALSE; time_t last_failure = 0; int clear_failcount = 0; int interval = 0; const char *key = get_op_key(xml_op); const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); if (rsc->failure_timeout > 0) { int last_run = 0; if (crm_element_value_int(xml_op, XML_RSC_OP_LAST_CHANGE, &last_run) == 0) { time_t now = get_effective_time(data_set); if (now > (last_run + rsc->failure_timeout)) { expired = TRUE; } } } if (expired) { if (rsc->failure_timeout > 0) { int fc = get_failcount_full(node, rsc, &last_failure, FALSE, xml_op, data_set); if(fc) { if (get_failcount_full(node, rsc, &last_failure, TRUE, xml_op, data_set) == 0) { clear_failcount = 1; crm_notice("Clearing expired failcount for %s on %s", rsc->id, node->details->uname); } else { expired = FALSE; } } } } else if (strstr(ID(xml_op), "last_failure") && ((strcmp(task, "start") == 0) || (strcmp(task, "monitor") == 0))) { op_digest_cache_t *digest_data = NULL; digest_data = rsc_action_digest_cmp(rsc, xml_op, node, data_set); if (digest_data->rc == RSC_DIGEST_UNKNOWN) { crm_trace("rsc op %s on node %s does not have a op digest to compare against", rsc->id, key, node->details->id); } else if (digest_data->rc != RSC_DIGEST_MATCH) { clear_failcount = 1; crm_info ("Clearing failcount for %s on %s, %s failed and now resource parameters have changed.", task, rsc->id, node->details->uname); } } if (clear_failcount) { action_t *clear_op = NULL; clear_op = custom_action(rsc, crm_concat(rsc->id, CRM_OP_CLEAR_FAILCOUNT, '_'), CRM_OP_CLEAR_FAILCOUNT, node, FALSE, TRUE, data_set); add_hash_param(clear_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); } crm_element_value_int(xml_op, XML_LRM_ATTR_INTERVAL, &interval); if(expired && interval == 0 && safe_str_eq(task, CRMD_ACTION_STATUS)) { switch(rc) { case PCMK_OCF_OK: case PCMK_OCF_NOT_RUNNING: case PCMK_OCF_RUNNING_MASTER: /* Don't expire probes that return these values */ expired = FALSE; break; } } return expired; } int get_target_rc(xmlNode *xml_op) { int dummy = 0; int target_rc = 0; char *dummy_string = NULL; const char *key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY); if (key == NULL) { return -1; } decode_transition_key(key, &dummy_string, &dummy, &dummy, &target_rc); free(dummy_string); return target_rc; } static enum action_fail_response get_action_on_fail(resource_t *rsc, const char *key, const char *task, pe_working_set_t * data_set) { int result = action_fail_recover; action_t *action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set); result = action->on_fail; pe_free_action(action); return result; } static void update_resource_state(resource_t *rsc, node_t * node, xmlNode * xml_op, const char *task, int rc, enum action_fail_response *on_fail, pe_working_set_t * data_set) { gboolean clear_past_failure = FALSE; CRM_ASSERT(rsc); if (rc == PCMK_OCF_NOT_RUNNING) { clear_past_failure = TRUE; } else if (rc == PCMK_OCF_NOT_INSTALLED) { rsc->role = RSC_ROLE_STOPPED; } else if (safe_str_eq(task, CRMD_ACTION_STATUS)) { clear_past_failure = TRUE; if (rsc->role < RSC_ROLE_STARTED) { set_active(rsc); } } else if (safe_str_eq(task, CRMD_ACTION_START)) { rsc->role = RSC_ROLE_STARTED; clear_past_failure = TRUE; } else if (safe_str_eq(task, CRMD_ACTION_STOP)) { rsc->role = RSC_ROLE_STOPPED; clear_past_failure = TRUE; } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; clear_past_failure = TRUE; } else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) { /* Demote from Master does not clear an error */ rsc->role = RSC_ROLE_SLAVE; } else if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) { rsc->role = RSC_ROLE_STARTED; clear_past_failure = TRUE; } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) { unpack_rsc_migration(rsc, node, xml_op, data_set); } else if (rsc->role < RSC_ROLE_STARTED) { /* migrate_to and migrate_from will land here */ pe_rsc_trace(rsc, "%s active on %s", rsc->id, node->details->uname); set_active(rsc); } /* clear any previous failure actions */ if (clear_past_failure) { switch (*on_fail) { case action_fail_stop: case action_fail_fence: case action_fail_migrate: case action_fail_standby: pe_rsc_trace(rsc, "%s.%s is not cleared by a completed stop", rsc->id, fail2text(*on_fail)); break; case action_fail_block: case action_fail_ignore: case action_fail_recover: *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; break; case action_fail_restart_container: *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; } } } gboolean unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, enum action_fail_response * on_fail, pe_working_set_t * data_set) { int task_id = 0; const char *key = NULL; const char *task = NULL; const char *task_key = NULL; int rc = 0; int status = PCMK_LRM_OP_PENDING-1; int target_rc = get_target_rc(xml_op); int interval = 0; gboolean expired = FALSE; resource_t *parent = rsc; enum action_fail_response failure_strategy = action_fail_recover; CRM_CHECK(rsc != NULL, return FALSE); CRM_CHECK(node != NULL, return FALSE); CRM_CHECK(xml_op != NULL, return FALSE); task_key = get_op_key(xml_op); task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY); crm_element_value_int(xml_op, XML_LRM_ATTR_RC, &rc); crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id); crm_element_value_int(xml_op, XML_LRM_ATTR_OPSTATUS, &status); crm_element_value_int(xml_op, XML_LRM_ATTR_INTERVAL, &interval); CRM_CHECK(task != NULL, return FALSE); CRM_CHECK(status <= PCMK_LRM_OP_NOT_INSTALLED, return FALSE); CRM_CHECK(status >= PCMK_LRM_OP_PENDING, return FALSE); if (safe_str_eq(task, CRMD_ACTION_NOTIFY)) { /* safe to ignore these */ return TRUE; } if (is_not_set(rsc->flags, pe_rsc_unique)) { parent = uber_parent(rsc); } pe_rsc_trace(rsc, "Unpacking task %s/%s (call_id=%d, status=%d, rc=%d) on %s (role=%s)", task_key, task, task_id, status, rc, node->details->uname, role2text(rsc->role)); if (node->details->unclean) { pe_rsc_trace(rsc, "Node %s (where %s is running) is unclean." " Further action depends on the value of the stop's on-fail attribue", node->details->uname, rsc->id); } if (status == PCMK_LRM_OP_ERROR) { /* Older versions set this if rc != 0 but its up to us to decide */ status = PCMK_LRM_OP_DONE; } if(status != PCMK_LRM_OP_NOT_INSTALLED) { expired = check_operation_expiry(rsc, node, rc, xml_op, data_set); } if (expired && target_rc != rc) { const char *magic = crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC); pe_rsc_debug(rsc, "Expired operation '%s' on %s returned '%s' (%d) instead of the expected value: '%s' (%d)", key, node->details->uname, services_ocf_exitcode_str(rc), rc, services_ocf_exitcode_str(target_rc), target_rc); if(interval == 0) { crm_notice("Ignoring expired calculated failure %s (rc=%d, magic=%s) on %s", task_key, rc, magic, node->details->uname); goto done; } else if(node->details->online && node->details->unclean == FALSE) { crm_notice("Re-initiated expired calculated failure %s (rc=%d, magic=%s) on %s", task_key, rc, magic, node->details->uname); /* This is SO horrible, but we don't have access to CancelXmlOp() yet */ crm_xml_add(xml_op, XML_LRM_ATTR_RESTART_DIGEST, "calculated-failure-timeout"); goto done; } } if(status == PCMK_LRM_OP_DONE || status == PCMK_LRM_OP_ERROR) { status = determine_op_status(rsc, rc, target_rc, node, xml_op, on_fail, data_set); } pe_rsc_trace(rsc, "Handling status: %d", status); switch (status) { case PCMK_LRM_OP_CANCELLED: /* do nothing?? */ pe_err("Dont know what to do for cancelled ops yet"); break; case PCMK_LRM_OP_PENDING: if (safe_str_eq(task, CRMD_ACTION_START)) { set_bit(rsc->flags, pe_rsc_start_pending); set_active(rsc); } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE) && node->details->unclean) { /* If a pending migrate_to action is out on a unclean node, * we have to force the stop action on the target. */ const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); node_t *target = pe_find_node(data_set->nodes, migrate_target); if (target) { stop_action(rsc, target, FALSE); } } if (rsc->pending_task == NULL) { if (safe_str_eq(task, CRMD_ACTION_STATUS) && interval == 0) { /* Comment this out until someone requests it */ /* Comment this out until cl#5184 is fixed */ /*rsc->pending_task = strdup("probe");*/ } else { rsc->pending_task = strdup(task); } } break; case PCMK_LRM_OP_DONE: pe_rsc_trace(rsc, "%s/%s completed on %s", rsc->id, task, node->details->uname); update_resource_state(rsc, node, xml_op, task, rc, on_fail, data_set); break; case PCMK_LRM_OP_NOT_INSTALLED: failure_strategy = get_action_on_fail(rsc, task_key, task, data_set); if (failure_strategy == action_fail_ignore) { crm_warn("Cannot ignore failed %s (status=%d, rc=%d) on %s: " "Resource agent doesn't exist", task_key, status, rc, node->details->uname); /* Also for printing it as "FAILED" by marking it as pe_rsc_failed later */ *on_fail = action_fail_migrate; } resource_location(parent, node, -INFINITY, "hard-error", data_set); unpack_rsc_op_failure(rsc, node, rc, xml_op, on_fail, data_set); break; case PCMK_LRM_OP_ERROR: case PCMK_LRM_OP_ERROR_HARD: case PCMK_LRM_OP_ERROR_FATAL: case PCMK_LRM_OP_TIMEOUT: case PCMK_LRM_OP_NOTSUPPORTED: failure_strategy = get_action_on_fail(rsc, task_key, task, data_set); if ((failure_strategy == action_fail_ignore) || (failure_strategy == action_fail_restart_container && safe_str_eq(task, CRMD_ACTION_STOP))) { crm_warn("Pretending the failure of %s (rc=%d) on %s succeeded", task_key, rc, node->details->uname); update_resource_state(rsc, node, xml_op, task, target_rc, on_fail, data_set); crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname); set_bit(rsc->flags, pe_rsc_failure_ignored); if ((node->details->shutdown == FALSE) || (node->details->online == TRUE)) { crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname); add_node_copy(data_set->failed, xml_op); } if (failure_strategy == action_fail_restart_container && *on_fail <= action_fail_recover) { *on_fail = failure_strategy; } } else { unpack_rsc_op_failure(rsc, node, rc, xml_op, on_fail, data_set); if(status == PCMK_LRM_OP_ERROR_HARD) { do_crm_log(rc != PCMK_OCF_NOT_INSTALLED?LOG_ERR:LOG_NOTICE, "Preventing %s from re-starting on %s: operation %s failed '%s' (%d)", parent->id, node->details->uname, task, services_ocf_exitcode_str(rc), rc); resource_location(parent, node, -INFINITY, "hard-error", data_set); } else if(status == PCMK_LRM_OP_ERROR_FATAL) { crm_err("Preventing %s from re-starting anywhere: operation %s failed '%s' (%d)", parent->id, task, services_ocf_exitcode_str(rc), rc); resource_location(parent, NULL, -INFINITY, "fatal-error", data_set); } } break; } done: pe_rsc_trace(rsc, "Resource %s after %s: role=%s", rsc->id, task, role2text(rsc->role)); return TRUE; } gboolean add_node_attrs(xmlNode * xml_obj, node_t * node, gboolean overwrite, pe_working_set_t * data_set) { const char *cluster_name = NULL; g_hash_table_insert(node->details->attrs, strdup("#uname"), strdup(node->details->uname)); g_hash_table_insert(node->details->attrs, strdup("#kind"), strdup(node->details->remote_rsc?"container":"cluster")); g_hash_table_insert(node->details->attrs, strdup("#" XML_ATTR_ID), strdup(node->details->id)); if (safe_str_eq(node->details->id, data_set->dc_uuid)) { data_set->dc_node = node; node->details->is_dc = TRUE; g_hash_table_insert(node->details->attrs, strdup("#" XML_ATTR_DC), strdup(XML_BOOLEAN_TRUE)); } else { g_hash_table_insert(node->details->attrs, strdup("#" XML_ATTR_DC), strdup(XML_BOOLEAN_FALSE)); } cluster_name = g_hash_table_lookup(data_set->config_hash, "cluster-name"); if (cluster_name) { g_hash_table_insert(node->details->attrs, strdup("#cluster-name"), strdup(cluster_name)); } unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_ATTR_SETS, NULL, node->details->attrs, NULL, overwrite, data_set->now); if (g_hash_table_lookup(node->details->attrs, "#site-name") == NULL) { const char *site_name = g_hash_table_lookup(node->details->attrs, "site-name"); if (site_name) { /* Prefix '#' to the key */ g_hash_table_insert(node->details->attrs, strdup("#site-name"), strdup(site_name)); } else if (cluster_name) { /* Default to cluster-name if unset */ g_hash_table_insert(node->details->attrs, strdup("#site-name"), strdup(cluster_name)); } } return TRUE; } static GListPtr extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter) { int counter = -1; int stop_index = -1; int start_index = -1; xmlNode *rsc_op = NULL; GListPtr gIter = NULL; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; /* extract operations */ op_list = NULL; sorted_op_list = NULL; for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next(rsc_op)) { if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) { crm_xml_add(rsc_op, "resource", rsc); crm_xml_add(rsc_op, XML_ATTR_UNAME, node); op_list = g_list_prepend(op_list, rsc_op); } } if (op_list == NULL) { /* if there are no operations, there is nothing to do */ return NULL; } sorted_op_list = g_list_sort(op_list, sort_op_by_callid); /* create active recurring operations as optional */ if (active_filter == FALSE) { return sorted_op_list; } op_list = NULL; calculate_active_ops(sorted_op_list, &start_index, &stop_index); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; counter++; if (start_index < stop_index) { crm_trace("Skipping %s: not active", ID(rsc_entry)); break; } else if (counter < start_index) { crm_trace("Skipping %s: old", ID(rsc_op)); continue; } op_list = g_list_append(op_list, rsc_op); } g_list_free(sorted_op_list); return op_list; } GListPtr find_operations(const char *rsc, const char *node, gboolean active_filter, pe_working_set_t * data_set) { GListPtr output = NULL; GListPtr intermediate = NULL; xmlNode *tmp = NULL; xmlNode *status = find_xml_node(data_set->input, XML_CIB_TAG_STATUS, TRUE); node_t *this_node = NULL; xmlNode *node_state = NULL; for (node_state = __xml_first_child(status); node_state != NULL; node_state = __xml_next(node_state)) { if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) { const char *uname = crm_element_value(node_state, XML_ATTR_UNAME); if (node != NULL && safe_str_neq(uname, node)) { continue; } this_node = pe_find_node(data_set->nodes, uname); if(this_node == NULL) { CRM_LOG_ASSERT(this_node != NULL); continue; } else if (is_remote_node(this_node)) { determine_remote_online_status(this_node); } else { determine_online_status(node_state, this_node, data_set); } if (this_node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) { /* offline nodes run no resources... * unless stonith is enabled in which case we need to * make sure rsc start events happen after the stonith */ xmlNode *lrm_rsc = NULL; tmp = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); tmp = find_xml_node(tmp, XML_LRM_TAG_RESOURCES, FALSE); for (lrm_rsc = __xml_first_child(tmp); lrm_rsc != NULL; lrm_rsc = __xml_next(lrm_rsc)) { if (crm_str_eq((const char *)lrm_rsc->name, XML_LRM_TAG_RESOURCE, TRUE)) { const char *rsc_id = crm_element_value(lrm_rsc, XML_ATTR_ID); if (rsc != NULL && safe_str_neq(rsc_id, rsc)) { continue; } intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter); output = g_list_concat(output, intermediate); } } } } } return output; } diff --git a/pengine/allocate.c b/pengine/allocate.c index 8d02d9b6c2..e708e265f7 100644 --- a/pengine/allocate.c +++ b/pengine/allocate.c @@ -1,2517 +1,2521 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include CRM_TRACE_INIT_DATA(pe_allocate); void set_alloc_actions(pe_working_set_t * data_set); void migrate_reload_madness(pe_working_set_t * data_set); resource_alloc_functions_t resource_class_alloc_functions[] = { { native_merge_weights, native_color, native_create_actions, native_create_probe, native_internal_constraints, native_rsc_colocation_lh, native_rsc_colocation_rh, native_rsc_location, native_action_flags, native_update_actions, native_expand, native_append_meta, }, { group_merge_weights, group_color, group_create_actions, native_create_probe, group_internal_constraints, group_rsc_colocation_lh, group_rsc_colocation_rh, group_rsc_location, group_action_flags, group_update_actions, group_expand, group_append_meta, }, { clone_merge_weights, clone_color, clone_create_actions, clone_create_probe, clone_internal_constraints, clone_rsc_colocation_lh, clone_rsc_colocation_rh, clone_rsc_location, clone_action_flags, clone_update_actions, clone_expand, clone_append_meta, }, { master_merge_weights, master_color, master_create_actions, clone_create_probe, master_internal_constraints, clone_rsc_colocation_lh, master_rsc_colocation_rh, clone_rsc_location, clone_action_flags, clone_update_actions, clone_expand, master_append_meta, } }; static gboolean check_rsc_parameters(resource_t * rsc, node_t * node, xmlNode * rsc_entry, gboolean active_here, pe_working_set_t * data_set) { int attr_lpc = 0; gboolean force_restart = FALSE; gboolean delete_resource = FALSE; gboolean changed = FALSE; const char *value = NULL; const char *old_value = NULL; const char *attr_list[] = { XML_ATTR_TYPE, XML_AGENT_ATTR_CLASS, XML_AGENT_ATTR_PROVIDER }; for (; attr_lpc < DIMOF(attr_list); attr_lpc++) { value = crm_element_value(rsc->xml, attr_list[attr_lpc]); old_value = crm_element_value(rsc_entry, attr_list[attr_lpc]); if (value == old_value /* ie. NULL */ || crm_str_eq(value, old_value, TRUE)) { continue; } changed = TRUE; trigger_unfencing(rsc, node, "Device definition changed", NULL, data_set); if (active_here) { force_restart = TRUE; crm_notice("Forcing restart of %s on %s, %s changed: %s -> %s", rsc->id, node->details->uname, attr_list[attr_lpc], crm_str(old_value), crm_str(value)); } } if (force_restart) { /* make sure the restart happens */ stop_action(rsc, node, FALSE); set_bit(rsc->flags, pe_rsc_start_pending); delete_resource = TRUE; } else if (changed) { delete_resource = TRUE; } return delete_resource; } static void CancelXmlOp(resource_t * rsc, xmlNode * xml_op, node_t * active_node, const char *reason, pe_working_set_t * data_set) { int interval = 0; action_t *cancel = NULL; char *key = NULL; const char *task = NULL; const char *call_id = NULL; const char *interval_s = NULL; CRM_CHECK(xml_op != NULL, return); CRM_CHECK(active_node != NULL, return); task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); call_id = crm_element_value(xml_op, XML_LRM_ATTR_CALLID); interval_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL); interval = crm_parse_int(interval_s, "0"); /* we need to reconstruct the key because of the way we used to construct resource IDs */ key = generate_op_key(rsc->id, task, interval); crm_info("Action %s on %s will be stopped: %s", key, active_node->details->uname, reason ? reason : "unknown"); /* TODO: This looks highly dangerous if we ever try to schedule 'key' too */ cancel = custom_action(rsc, strdup(key), RSC_CANCEL, active_node, FALSE, TRUE, data_set); free(cancel->task); free(cancel->cancel_task); cancel->task = strdup(RSC_CANCEL); cancel->cancel_task = strdup(task); add_hash_param(cancel->meta, XML_LRM_ATTR_TASK, task); add_hash_param(cancel->meta, XML_LRM_ATTR_CALLID, call_id); add_hash_param(cancel->meta, XML_LRM_ATTR_INTERVAL, interval_s); custom_action_order(rsc, stop_key(rsc), NULL, rsc, NULL, cancel, pe_order_optional, data_set); free(key); key = NULL; } static gboolean check_action_definition(resource_t * rsc, node_t * active_node, xmlNode * xml_op, pe_working_set_t * data_set) { char *key = NULL; int interval = 0; const char *interval_s = NULL; const op_digest_cache_t *digest_data = NULL; gboolean did_change = FALSE; const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *op_version; CRM_CHECK(active_node != NULL, return FALSE); if (safe_str_eq(task, RSC_STOP)) { return FALSE; } interval_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL); interval = crm_parse_int(interval_s, "0"); if (interval > 0) { xmlNode *op_match = NULL; /* we need to reconstruct the key because of the way we used to construct resource IDs */ key = generate_op_key(rsc->id, task, interval); pe_rsc_trace(rsc, "Checking parameters for %s", key); op_match = find_rsc_op_entry(rsc, key); if (op_match == NULL && is_set(data_set->flags, pe_flag_stop_action_orphans)) { CancelXmlOp(rsc, xml_op, active_node, "orphan", data_set); free(key); return TRUE; } else if (op_match == NULL) { pe_rsc_debug(rsc, "Orphan action detected: %s on %s", key, active_node->details->uname); free(key); return TRUE; } free(key); key = NULL; } crm_trace("Testing %s_%s_%d on %s", rsc->id, task, interval, active_node?active_node->details->uname:"N/A"); if (interval == 0 && safe_str_eq(task, RSC_STATUS)) { /* Reload based on the start action not a probe */ task = RSC_START; } else if (interval == 0 && safe_str_eq(task, RSC_MIGRATED)) { /* Reload based on the start action not a migrate */ task = RSC_START; } digest_data = rsc_action_digest_cmp(rsc, xml_op, active_node, data_set); op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION); /* Changes that force a restart */ if (digest_data->rc == RSC_DIGEST_RESTART) { const char *digest_restart = crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST); did_change = TRUE; key = generate_op_key(rsc->id, task, interval); crm_log_xml_info(digest_data->params_restart, "params:restart"); pe_rsc_info(rsc, "Parameters to %s on %s changed: was %s vs. now %s (restart:%s) %s", key, active_node->details->uname, crm_str(digest_restart), digest_data->digest_restart_calc, op_version, crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC)); custom_action(rsc, key, task, NULL, FALSE, TRUE, data_set); trigger_unfencing(rsc, NULL, "Device parameters changed", NULL, data_set); } else if ((digest_data->rc == RSC_DIGEST_ALL) || (digest_data->rc == RSC_DIGEST_UNKNOWN)) { /* Changes that can potentially be handled by a reload */ const char *digest_restart = crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST); const char *digest_all = crm_element_value(xml_op, XML_LRM_ATTR_OP_DIGEST); did_change = TRUE; trigger_unfencing(rsc, NULL, "Device parameters changed (reload)", NULL, data_set); crm_log_xml_info(digest_data->params_all, "params:reload"); key = generate_op_key(rsc->id, task, interval); pe_rsc_info(rsc, "Parameters to %s on %s changed: was %s vs. now %s (reload:%s) %s", key, active_node->details->uname, crm_str(digest_all), digest_data->digest_all_calc, op_version, crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC)); if (interval > 0) { action_t *op = NULL; #if 0 /* Always reload/restart the entire resource */ op = custom_action(rsc, start_key(rsc), RSC_START, NULL, FALSE, TRUE, data_set); update_action_flags(op, pe_action_allow_reload_conversion); #else /* Re-sending the recurring op is sufficient - the old one will be cancelled automatically */ op = custom_action(rsc, key, task, NULL, FALSE, TRUE, data_set); custom_action_order(rsc, start_key(rsc), NULL, NULL, NULL, op, pe_order_runnable_left, data_set); #endif } else if (digest_restart) { pe_rsc_trace(rsc, "Reloading '%s' action for resource %s", task, rsc->id); /* Allow this resource to reload - unless something else causes a full restart */ set_bit(rsc->flags, pe_rsc_try_reload); /* Create these for now, it keeps the action IDs the same in the regression outputs */ custom_action(rsc, key, task, NULL, TRUE, TRUE, data_set); } else { pe_rsc_trace(rsc, "Resource %s doesn't know how to reload", rsc->id); /* Re-send the start/demote/promote op * Recurring ops will be detected independantly */ custom_action(rsc, key, task, NULL, FALSE, TRUE, data_set); } } return did_change; } extern gboolean DeleteRsc(resource_t * rsc, node_t * node, gboolean optional, pe_working_set_t * data_set); static void check_actions_for(xmlNode * rsc_entry, resource_t * rsc, node_t * node, pe_working_set_t * data_set) { GListPtr gIter = NULL; int offset = -1; int interval = 0; int stop_index = 0; int start_index = 0; const char *task = NULL; const char *interval_s = NULL; xmlNode *rsc_op = NULL; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; gboolean is_probe = FALSE; gboolean did_change = FALSE; CRM_CHECK(node != NULL, return); if (is_set(rsc->flags, pe_rsc_orphan)) { resource_t *parent = uber_parent(rsc); if(parent == NULL || parent->variant < pe_clone || is_set(parent->flags, pe_rsc_unique)) { pe_rsc_trace(rsc, "Skipping param check for %s and deleting: orphan", rsc->id); DeleteRsc(rsc, node, FALSE, data_set); } else { pe_rsc_trace(rsc, "Skipping param check for %s (orphan clone)", rsc->id); } return; } else if (pe_find_node_id(rsc->running_on, node->details->id) == NULL) { if (check_rsc_parameters(rsc, node, rsc_entry, FALSE, data_set)) { DeleteRsc(rsc, node, FALSE, data_set); } pe_rsc_trace(rsc, "Skipping param check for %s: no longer active on %s", rsc->id, node->details->uname); return; } pe_rsc_trace(rsc, "Processing %s on %s", rsc->id, node->details->uname); if (check_rsc_parameters(rsc, node, rsc_entry, TRUE, data_set)) { DeleteRsc(rsc, node, FALSE, data_set); } for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next(rsc_op)) { if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) { op_list = g_list_prepend(op_list, rsc_op); } } sorted_op_list = g_list_sort(op_list, sort_op_by_callid); calculate_active_ops(sorted_op_list, &start_index, &stop_index); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; offset++; if (start_index < stop_index) { /* stopped */ continue; } else if (offset < start_index) { /* action occurred prior to a start */ continue; } is_probe = FALSE; did_change = FALSE; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); interval_s = crm_element_value(rsc_op, XML_LRM_ATTR_INTERVAL); interval = crm_parse_int(interval_s, "0"); if (interval == 0 && safe_str_eq(task, RSC_STATUS)) { is_probe = TRUE; } if (interval > 0 && (is_set(rsc->flags, pe_rsc_maintenance) || node->details->maintenance)) { CancelXmlOp(rsc, rsc_op, node, "maintenance mode", data_set); } else if (is_probe || safe_str_eq(task, RSC_START) || interval > 0 || safe_str_eq(task, RSC_MIGRATED)) { did_change = check_action_definition(rsc, node, rsc_op, data_set); } if (did_change && get_failcount(node, rsc, NULL, data_set)) { char *key = NULL; action_t *action_clear = NULL; key = generate_op_key(rsc->id, CRM_OP_CLEAR_FAILCOUNT, 0); action_clear = custom_action(rsc, key, CRM_OP_CLEAR_FAILCOUNT, node, FALSE, TRUE, data_set); set_bit(action_clear->flags, pe_action_runnable); } } g_list_free(sorted_op_list); } static GListPtr find_rsc_list(GListPtr result, resource_t * rsc, const char *id, gboolean renamed_clones, gboolean partial, pe_working_set_t * data_set) { GListPtr gIter = NULL; gboolean match = FALSE; if (id == NULL) { return NULL; } else if (rsc == NULL && data_set) { for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; result = find_rsc_list(result, child, id, renamed_clones, partial, NULL); } return result; } else if (rsc == NULL) { return NULL; } if (partial) { if (strstr(rsc->id, id)) { match = TRUE; } else if (renamed_clones && rsc->clone_name && strstr(rsc->clone_name, id)) { match = TRUE; } } else { if (strcmp(rsc->id, id) == 0) { match = TRUE; } else if (renamed_clones && rsc->clone_name && strcmp(rsc->clone_name, id) == 0) { match = TRUE; } } if (match) { result = g_list_prepend(result, rsc); } if (rsc->children) { gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; result = find_rsc_list(result, child, id, renamed_clones, partial, NULL); } } return result; } static void check_actions(pe_working_set_t * data_set) { const char *id = NULL; node_t *node = NULL; xmlNode *lrm_rscs = NULL; xmlNode *status = get_object_root(XML_CIB_TAG_STATUS, data_set->input); xmlNode *node_state = NULL; for (node_state = __xml_first_child(status); node_state != NULL; node_state = __xml_next(node_state)) { if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) { id = crm_element_value(node_state, XML_ATTR_ID); lrm_rscs = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); lrm_rscs = find_xml_node(lrm_rscs, XML_LRM_TAG_RESOURCES, FALSE); node = pe_find_node_id(data_set->nodes, id); if (node == NULL) { continue; /* Still need to check actions for a maintenance node to cancel existing monitor operations */ } else if (can_run_resources(node) == FALSE && node->details->maintenance == FALSE) { crm_trace("Skipping param check for %s: cant run resources", node->details->uname); continue; } crm_trace("Processing node %s", node->details->uname); if (node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) { xmlNode *rsc_entry = NULL; for (rsc_entry = __xml_first_child(lrm_rscs); rsc_entry != NULL; rsc_entry = __xml_next(rsc_entry)) { if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) { if (xml_has_children(rsc_entry)) { GListPtr gIter = NULL; GListPtr result = NULL; const char *rsc_id = ID(rsc_entry); CRM_CHECK(rsc_id != NULL, return); result = find_rsc_list(NULL, NULL, rsc_id, TRUE, FALSE, data_set); for (gIter = result; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; if (rsc->variant != pe_native) { continue; } check_actions_for(rsc_entry, rsc, node, data_set); } g_list_free(result); } } } } } } } static gboolean apply_placement_constraints(pe_working_set_t * data_set) { GListPtr gIter = NULL; crm_trace("Applying constraints..."); for (gIter = data_set->placement_constraints; gIter != NULL; gIter = gIter->next) { rsc_to_node_t *cons = (rsc_to_node_t *) gIter->data; cons->rsc_lh->cmds->rsc_location(cons->rsc_lh, cons); } return TRUE; } static gboolean failcount_clear_action_exists(node_t * node, resource_t * rsc) { gboolean rc = FALSE; char *key = crm_concat(rsc->id, CRM_OP_CLEAR_FAILCOUNT, '_'); GListPtr list = find_actions_exact(rsc->actions, key, node); if (list) { rc = TRUE; } g_list_free(list); free(key); return rc; } static void common_apply_stickiness(resource_t * rsc, node_t * node, pe_working_set_t * data_set) { int fail_count = 0; resource_t *failed = rsc; if (rsc->children) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; common_apply_stickiness(child_rsc, node, data_set); } return; } if (is_set(rsc->flags, pe_rsc_managed) && rsc->stickiness != 0 && g_list_length(rsc->running_on) == 1) { node_t *current = pe_find_node_id(rsc->running_on, node->details->id); node_t *match = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (current == NULL) { } else if (match != NULL || is_set(data_set->flags, pe_flag_symmetric_cluster)) { resource_t *sticky_rsc = rsc; resource_location(sticky_rsc, node, rsc->stickiness, "stickiness", data_set); pe_rsc_debug(sticky_rsc, "Resource %s: preferring current location" " (node=%s, weight=%d)", sticky_rsc->id, node->details->uname, rsc->stickiness); } else { GHashTableIter iter; node_t *nIter = NULL; pe_rsc_debug(rsc, "Ignoring stickiness for %s: the cluster is asymmetric" " and node %s is not explicitly allowed", rsc->id, node->details->uname); g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&nIter)) { crm_err("%s[%s] = %d", rsc->id, nIter->details->uname, nIter->weight); } } } /* only check failcount here if a failcount clear action * has not already been placed for this resource on the node. * There is no sense in potentially forcing the rsc from this * node if the failcount is being reset anyway. */ if (failcount_clear_action_exists(node, rsc) == FALSE) { fail_count = get_failcount_all(node, rsc, NULL, data_set); } if (fail_count > 0 && rsc->migration_threshold != 0) { if (is_not_set(rsc->flags, pe_rsc_unique)) { failed = uber_parent(rsc); } if (rsc->migration_threshold <= fail_count) { resource_location(failed, node, -INFINITY, "__fail_limit__", data_set); crm_warn("Forcing %s away from %s after %d failures (max=%d)", failed->id, node->details->uname, fail_count, rsc->migration_threshold); } else { crm_info("%s can fail %d more times on %s before being forced off", failed->id, rsc->migration_threshold - fail_count, node->details->uname); } } } static void complex_set_cmds(resource_t * rsc) { GListPtr gIter = rsc->children; rsc->cmds = &resource_class_alloc_functions[rsc->variant]; for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; complex_set_cmds(child_rsc); } } void set_alloc_actions(pe_working_set_t * data_set) { GListPtr gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; complex_set_cmds(rsc); } } static void calculate_system_health(gpointer gKey, gpointer gValue, gpointer user_data) { const char *key = (const char *)gKey; const char *value = (const char *)gValue; int *system_health = (int *)user_data; if (!gKey || !gValue || !user_data) { return; } /* Does it start with #health? */ if (0 == strncmp(key, "#health", 7)) { int score; /* Convert the value into an integer */ score = char2score(value); /* Add it to the running total */ *system_health = merge_weights(score, *system_health); } } static gboolean apply_system_health(pe_working_set_t * data_set) { GListPtr gIter = NULL; const char *health_strategy = pe_pref(data_set->config_hash, "node-health-strategy"); if (health_strategy == NULL || safe_str_eq(health_strategy, "none")) { /* Prevent any accidental health -> score translation */ node_score_red = 0; node_score_yellow = 0; node_score_green = 0; return TRUE; } else if (safe_str_eq(health_strategy, "migrate-on-red")) { /* Resources on nodes which have health values of red are * weighted away from that node. */ node_score_red = -INFINITY; node_score_yellow = 0; node_score_green = 0; } else if (safe_str_eq(health_strategy, "only-green")) { /* Resources on nodes which have health values of red or yellow * are forced away from that node. */ node_score_red = -INFINITY; node_score_yellow = -INFINITY; node_score_green = 0; } else if (safe_str_eq(health_strategy, "progressive")) { /* Same as the above, but use the r/y/g scores provided by the user * Defaults are provided by the pe_prefs table */ } else if (safe_str_eq(health_strategy, "custom")) { /* Requires the admin to configure the rsc_location constaints for * processing the stored health scores */ /* TODO: Check for the existance of appropriate node health constraints */ return TRUE; } else { crm_err("Unknown node health strategy: %s", health_strategy); return FALSE; } crm_info("Applying automated node health strategy: %s", health_strategy); for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { int system_health = 0; node_t *node = (node_t *) gIter->data; /* Search through the node hash table for system health entries. */ g_hash_table_foreach(node->details->attrs, calculate_system_health, &system_health); crm_info(" Node %s has an combined system health of %d", node->details->uname, system_health); /* If the health is non-zero, then create a new rsc2node so that the * weight will be added later on. */ if (system_health != 0) { GListPtr gIter2 = data_set->resources; for (; gIter2 != NULL; gIter2 = gIter2->next) { resource_t *rsc = (resource_t *) gIter2->data; - rsc2node_new(health_strategy, rsc, system_health, node, data_set); + rsc2node_new(health_strategy, rsc, system_health, NULL, node, data_set); } } } return TRUE; } gboolean stage0(pe_working_set_t * data_set) { xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, data_set->input); if (data_set->input == NULL) { return FALSE; } if (is_set(data_set->flags, pe_flag_have_status) == FALSE) { crm_trace("Calculating status"); cluster_status(data_set); } set_alloc_actions(data_set); apply_system_health(data_set); unpack_constraints(cib_constraints, data_set); return TRUE; } static void wait_for_probe(resource_t * rsc, const char *action, action_t * probe_complete, pe_working_set_t * data_set) { if (probe_complete == NULL) { return; } if (rsc->children) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; wait_for_probe(child, action, probe_complete, data_set); } } else { char *key = NULL; if (safe_str_eq(action, RSC_STOP) && g_list_length(rsc->running_on) == 1) { node_t *node = (node_t *) rsc->running_on->data; /* Stop actions on nodes that are shutting down do not need to wait for probes to complete * Doing so prevents node shutdown in the presence of nodes that are coming up * The purpose of waiting is to not stop resources until we know for sure the * intended destination is able to take them */ if (node && node->details->shutdown) { crm_debug("Skipping %s before %s_%s_0 due to %s shutdown", probe_complete->uuid, rsc->id, action, node->details->uname); return; } } key = generate_op_key(rsc->id, action, 0); custom_action_order(NULL, NULL, probe_complete, rsc, key, NULL, pe_order_optional, data_set); } } /* * Check nodes for resources started outside of the LRM */ gboolean probe_resources(pe_working_set_t * data_set) { action_t *probe_complete = NULL; action_t *probe_node_complete = NULL; action_t *probe_cluster_nodes_complete = NULL; GListPtr gIter = NULL; GListPtr gIter2 = NULL; gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; const char *probed = g_hash_table_lookup(node->details->attrs, CRM_OP_PROBED); if (node->details->online == FALSE) { continue; } else if (node->details->unclean) { continue; } else if (is_remote_node(node) && node->details->shutdown) { /* Don't try and probe a remote node we're shutting down. * It causes constraint conflicts to try and run any sort of action * other that 'stop' on resources living within a remote-node when * it is being shutdown. */ continue; } else if (is_container_remote_node(node)) { /* TODO enable container node probes once ordered probing is implemented. */ continue; + } else if (node->details->rsc_discovery_enabled == FALSE) { + /* resource discovery is disabled for this node */ + continue; + } else if (probe_complete == NULL) { probe_complete = get_pseudo_op(CRM_OP_PROBED, data_set); if (is_set(data_set->flags, pe_flag_have_remote_nodes)) { probe_cluster_nodes_complete = get_pseudo_op(CRM_OP_NODES_PROBED, data_set); } } if (probed != NULL && crm_is_true(probed) == FALSE) { action_t *probe_op = custom_action(NULL, g_strdup_printf("%s-%s", CRM_OP_REPROBE, node->details->uname), CRM_OP_REPROBE, node, FALSE, TRUE, data_set); add_hash_param(probe_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); continue; } probe_node_complete = custom_action(NULL, g_strdup_printf("%s-%s", CRM_OP_PROBED, node->details->uname), CRM_OP_PROBED, node, FALSE, TRUE, data_set); if (crm_is_true(probed)) { crm_trace("unset"); update_action_flags(probe_node_complete, pe_action_optional); } else { crm_trace("set"); update_action_flags(probe_node_complete, pe_action_optional | pe_action_clear); } crm_trace("%s - %d", node->details->uname, probe_node_complete->flags & pe_action_optional); probe_node_complete->priority = INFINITY; add_hash_param(probe_node_complete->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); if (node->details->pending) { update_action_flags(probe_node_complete, pe_action_runnable | pe_action_clear); crm_info("Action %s on %s is unrunnable (pending)", probe_node_complete->uuid, probe_node_complete->node->details->uname); } if (is_remote_node(node)) { order_actions(probe_node_complete, probe_complete, pe_order_runnable_left /*|pe_order_implies_then */ ); } else if (probe_cluster_nodes_complete == NULL) { order_actions(probe_node_complete, probe_complete, pe_order_runnable_left /*|pe_order_implies_then */ ); } else { order_actions(probe_node_complete, probe_cluster_nodes_complete, pe_order_runnable_left /*|pe_order_implies_then */ ); } gIter2 = data_set->resources; for (; gIter2 != NULL; gIter2 = gIter2->next) { resource_t *rsc = (resource_t *) gIter2->data; if (rsc->cmds->create_probe(rsc, node, probe_node_complete, FALSE, data_set)) { update_action_flags(probe_complete, pe_action_optional | pe_action_clear); update_action_flags(probe_node_complete, pe_action_optional | pe_action_clear); if (probe_cluster_nodes_complete && (rsc->is_remote_node || rsc_contains_remote_node(data_set, rsc))) { update_action_flags(probe_cluster_nodes_complete, pe_action_optional | pe_action_clear); /* allow remote connection resources and resources * containing remote connection resources to run after all * cluster nodes are probed */ wait_for_probe(rsc, RSC_START, probe_cluster_nodes_complete, data_set); } else { wait_for_probe(rsc, RSC_START, probe_complete, data_set); } } } } gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; if (rsc->is_remote_node || rsc_contains_remote_node(data_set, rsc)) { /* allow remote connection resources and any resources containing * remote connection resources to run after cluster nodes are probed.*/ wait_for_probe(rsc, RSC_STOP, probe_cluster_nodes_complete, data_set); } else { wait_for_probe(rsc, RSC_STOP, probe_complete, data_set); } } return TRUE; } /* * Count how many valid nodes we have (so we know the maximum number of * colors we can resolve). * * Apply node constraints (ie. filter the "allowed_nodes" part of resources */ gboolean stage2(pe_working_set_t * data_set) { GListPtr gIter = NULL; crm_trace("Applying placement constraints"); gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; if (node == NULL) { /* error */ } else if (node->weight >= 0.0 /* global weight */ && node->details->online && node->details->type != node_ping) { data_set->max_valid_nodes++; } } apply_placement_constraints(data_set); gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { GListPtr gIter2 = NULL; node_t *node = (node_t *) gIter->data; gIter2 = data_set->resources; for (; gIter2 != NULL; gIter2 = gIter2->next) { resource_t *rsc = (resource_t *) gIter2->data; common_apply_stickiness(rsc, node, data_set); } } return TRUE; } /* * Create internal resource constraints before allocation */ gboolean stage3(pe_working_set_t * data_set) { GListPtr gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; rsc->cmds->internal_constraints(rsc, data_set); } return TRUE; } /* * Check for orphaned or redefined actions */ gboolean stage4(pe_working_set_t * data_set) { check_actions(data_set); return TRUE; } static gint sort_rsc_process_order(gconstpointer a, gconstpointer b, gpointer data) { int rc = 0; int r1_weight = -INFINITY; int r2_weight = -INFINITY; const char *reason = "existance"; const GListPtr nodes = (GListPtr) data; resource_t *resource1 = (resource_t *) convert_const_pointer(a); resource_t *resource2 = (resource_t *) convert_const_pointer(b); node_t *node = NULL; GListPtr gIter = NULL; GHashTable *r1_nodes = NULL; GHashTable *r2_nodes = NULL; if (a == NULL && b == NULL) { goto done; } if (a == NULL) { return 1; } if (b == NULL) { return -1; } reason = "priority"; r1_weight = resource1->priority; r2_weight = resource2->priority; if (r1_weight > r2_weight) { rc = -1; goto done; } if (r1_weight < r2_weight) { rc = 1; goto done; } reason = "no node list"; if (nodes == NULL) { goto done; } r1_nodes = rsc_merge_weights(resource1, resource1->id, NULL, NULL, 1, pe_weights_forward | pe_weights_init); dump_node_scores(LOG_TRACE, NULL, resource1->id, r1_nodes); r2_nodes = rsc_merge_weights(resource2, resource2->id, NULL, NULL, 1, pe_weights_forward | pe_weights_init); dump_node_scores(LOG_TRACE, NULL, resource2->id, r2_nodes); /* Current location score */ reason = "current location"; r1_weight = -INFINITY; r2_weight = -INFINITY; if (resource1->running_on) { node = g_list_nth_data(resource1->running_on, 0); node = g_hash_table_lookup(r1_nodes, node->details->id); if (node != NULL) { r1_weight = node->weight; } } if (resource2->running_on) { node = g_list_nth_data(resource2->running_on, 0); node = g_hash_table_lookup(r2_nodes, node->details->id); if (node != NULL) { r2_weight = node->weight; } } if (r1_weight > r2_weight) { rc = -1; goto done; } if (r1_weight < r2_weight) { rc = 1; goto done; } reason = "score"; for (gIter = nodes; gIter != NULL; gIter = gIter->next) { node_t *r1_node = NULL; node_t *r2_node = NULL; node = (node_t *) gIter->data; r1_weight = -INFINITY; if (r1_nodes) { r1_node = g_hash_table_lookup(r1_nodes, node->details->id); } if (r1_node) { r1_weight = r1_node->weight; } r2_weight = -INFINITY; if (r2_nodes) { r2_node = g_hash_table_lookup(r2_nodes, node->details->id); } if (r2_node) { r2_weight = r2_node->weight; } if (r1_weight > r2_weight) { rc = -1; goto done; } if (r1_weight < r2_weight) { rc = 1; goto done; } } done: if (r1_nodes) { g_hash_table_destroy(r1_nodes); } if (r2_nodes) { g_hash_table_destroy(r2_nodes); } crm_trace("%s (%d) %c %s (%d) on %s: %s", resource1->id, r1_weight, rc < 0 ? '>' : rc > 0 ? '<' : '=', resource2->id, r2_weight, node ? node->details->id : "n/a", reason); return rc; } static void allocate_resources(pe_working_set_t * data_set) { GListPtr gIter = NULL; if (is_set(data_set->flags, pe_flag_have_remote_nodes)) { /* Force remote connection resources to be allocated first. This * also forces any colocation dependencies to be allocated as well */ for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; if (rsc->is_remote_node == FALSE) { continue; } pe_rsc_trace(rsc, "Allocating: %s", rsc->id); rsc->cmds->allocate(rsc, NULL, data_set); } } /* now do the rest of the resources */ for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; if (rsc->is_remote_node == TRUE) { continue; } pe_rsc_trace(rsc, "Allocating: %s", rsc->id); rsc->cmds->allocate(rsc, NULL, data_set); } } gboolean stage5(pe_working_set_t * data_set) { GListPtr gIter = NULL; if (safe_str_neq(data_set->placement_strategy, "default")) { GListPtr nodes = g_list_copy(data_set->nodes); nodes = g_list_sort_with_data(nodes, sort_node_weight, NULL); data_set->resources = g_list_sort_with_data(data_set->resources, sort_rsc_process_order, nodes); g_list_free(nodes); } gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; dump_node_capacity(show_utilization ? 0 : utilization_log_level, "Original", node); } crm_trace("Allocating services"); /* Take (next) highest resource, assign it and create its actions */ allocate_resources(data_set); gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; dump_node_capacity(show_utilization ? 0 : utilization_log_level, "Remaining", node); } if (is_set(data_set->flags, pe_flag_startup_probes)) { crm_trace("Calculating needed probes"); /* This code probably needs optimization * ptest -x with 100 nodes, 100 clones and clone-max=100: With probes: ptest[14781]: 2010/09/27_17:56:46 notice: TRACE: do_calculations: pengine.c:258 Calculate cluster status ptest[14781]: 2010/09/27_17:56:46 notice: TRACE: do_calculations: pengine.c:278 Applying placement constraints ptest[14781]: 2010/09/27_17:56:47 notice: TRACE: do_calculations: pengine.c:285 Create internal constraints ptest[14781]: 2010/09/27_17:56:47 notice: TRACE: do_calculations: pengine.c:292 Check actions ptest[14781]: 2010/09/27_17:56:48 notice: TRACE: do_calculations: pengine.c:299 Allocate resources ptest[14781]: 2010/09/27_17:56:48 notice: TRACE: stage5: allocate.c:881 Allocating services ptest[14781]: 2010/09/27_17:56:49 notice: TRACE: stage5: allocate.c:894 Calculating needed probes ptest[14781]: 2010/09/27_17:56:51 notice: TRACE: stage5: allocate.c:899 Creating actions ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: stage5: allocate.c:905 Creating done ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: do_calculations: pengine.c:306 Processing fencing and shutdown cases ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: do_calculations: pengine.c:313 Applying ordering constraints 36s ptest[14781]: 2010/09/27_17:57:28 notice: TRACE: do_calculations: pengine.c:320 Create transition graph Without probes: ptest[14637]: 2010/09/27_17:56:21 notice: TRACE: do_calculations: pengine.c:258 Calculate cluster status ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:278 Applying placement constraints ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:285 Create internal constraints ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:292 Check actions ptest[14637]: 2010/09/27_17:56:23 notice: TRACE: do_calculations: pengine.c:299 Allocate resources ptest[14637]: 2010/09/27_17:56:23 notice: TRACE: stage5: allocate.c:881 Allocating services ptest[14637]: 2010/09/27_17:56:24 notice: TRACE: stage5: allocate.c:899 Creating actions ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: stage5: allocate.c:905 Creating done ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:306 Processing fencing and shutdown cases ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:313 Applying ordering constraints ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:320 Create transition graph */ probe_resources(data_set); } crm_trace("Creating actions"); gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; rsc->cmds->create_actions(rsc, data_set); } crm_trace("Creating done"); return TRUE; } static gboolean is_managed(const resource_t * rsc) { GListPtr gIter = rsc->children; if (is_set(rsc->flags, pe_rsc_managed)) { return TRUE; } for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; if (is_managed(child_rsc)) { return TRUE; } } return FALSE; } static gboolean any_managed_resources(pe_working_set_t * data_set) { GListPtr gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; if (is_managed(rsc)) { return TRUE; } } return FALSE; } /* * Create dependancies for stonith and shutdown operations */ gboolean stage6(pe_working_set_t * data_set) { action_t *dc_down = NULL; action_t *dc_fence = NULL; action_t *stonith_op = NULL; action_t *last_stonith = NULL; gboolean integrity_lost = FALSE; action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set); action_t *done = get_pseudo_op(STONITH_DONE, data_set); gboolean need_stonith = TRUE; GListPtr gIter = data_set->nodes; crm_trace("Processing fencing and shutdown cases"); if (any_managed_resources(data_set) == FALSE) { crm_notice("Delaying fencing operations until there are resources to manage"); need_stonith = FALSE; } for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; /* remote-nodes associated with a container resource (such as a vm) are not fenced */ if (is_container_remote_node(node)) { continue; } stonith_op = NULL; if (need_stonith && node->details->unclean && pe_can_fence(data_set, node)) { pe_warn("Scheduling Node %s for STONITH", node->details->uname); stonith_op = pe_fence_op(node, NULL, FALSE, data_set); stonith_constraints(node, stonith_op, data_set); if (node->details->is_dc) { dc_down = stonith_op; dc_fence = stonith_op; } else { if (last_stonith) { order_actions(last_stonith, stonith_op, pe_order_optional); } last_stonith = stonith_op; } } else if (node->details->online && node->details->shutdown && /* TODO define what a shutdown op means for a baremetal remote node. * For now we do not send shutdown operations for remote nodes, but * if we can come up with a good use for this in the future, we will. */ is_remote_node(node) == FALSE) { action_t *down_op = NULL; crm_notice("Scheduling Node %s for shutdown", node->details->uname); down_op = custom_action(NULL, g_strdup_printf("%s-%s", CRM_OP_SHUTDOWN, node->details->uname), CRM_OP_SHUTDOWN, node, FALSE, TRUE, data_set); shutdown_constraints(node, down_op, data_set); add_hash_param(down_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); if (node->details->is_dc) { dc_down = down_op; } } if (node->details->unclean && stonith_op == NULL) { integrity_lost = TRUE; pe_warn("Node %s is unclean!", node->details->uname); } } if (integrity_lost) { if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) { pe_warn("YOUR RESOURCES ARE NOW LIKELY COMPROMISED"); pe_err("ENABLE STONITH TO KEEP YOUR RESOURCES SAFE"); } else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE) { crm_notice("Cannot fence unclean nodes until quorum is" " attained (or no-quorum-policy is set to ignore)"); } } if (dc_down != NULL) { GListPtr gIter = NULL; crm_trace("Ordering shutdowns before %s on %s (DC)", dc_down->task, dc_down->node->details->uname); add_hash_param(dc_down->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) { action_t *node_stop = (action_t *) gIter->data; if (safe_str_neq(CRM_OP_SHUTDOWN, node_stop->task)) { continue; } else if (node_stop->node->details->is_dc) { continue; } crm_debug("Ordering shutdown on %s before %s on %s", node_stop->node->details->uname, dc_down->task, dc_down->node->details->uname); order_actions(node_stop, dc_down, pe_order_optional); } if (last_stonith && dc_down != last_stonith) { order_actions(last_stonith, dc_down, pe_order_optional); } } if (last_stonith) { order_actions(last_stonith, done, pe_order_implies_then); } else if (dc_fence) { order_actions(dc_down, done, pe_order_implies_then); } order_actions(done, all_stopped, pe_order_implies_then); return TRUE; } /* * Determin the sets of independant actions and the correct order for the * actions in each set. * * Mark dependencies of un-runnable actions un-runnable * */ static GListPtr find_actions_by_task(GListPtr actions, resource_t * rsc, const char *original_key) { GListPtr list = NULL; list = find_actions(actions, original_key, NULL); if (list == NULL) { /* we're potentially searching a child of the original resource */ char *key = NULL; char *tmp = NULL; char *task = NULL; int interval = 0; if (parse_op_key(original_key, &tmp, &task, &interval)) { key = generate_op_key(rsc->id, task, interval); /* crm_err("looking up %s instead of %s", key, original_key); */ /* slist_iter(action, action_t, actions, lpc, */ /* crm_err(" - %s", action->uuid)); */ list = find_actions(actions, key, NULL); } else { crm_err("search key: %s", original_key); } free(key); free(tmp); free(task); } return list; } static void rsc_order_then(action_t * lh_action, resource_t * rsc, order_constraint_t * order) { GListPtr gIter = NULL; GListPtr rh_actions = NULL; action_t *rh_action = NULL; enum pe_ordering type = order->type; CRM_CHECK(rsc != NULL, return); CRM_CHECK(order != NULL, return); rh_action = order->rh_action; crm_trace("Processing RH of ordering constraint %d", order->id); if (rh_action != NULL) { rh_actions = g_list_prepend(NULL, rh_action); } else if (rsc != NULL) { rh_actions = find_actions_by_task(rsc->actions, rsc, order->rh_action_task); } if (rh_actions == NULL) { pe_rsc_trace(rsc, "No RH-Side (%s/%s) found for constraint..." " ignoring", rsc->id, order->rh_action_task); if (lh_action) { pe_rsc_trace(rsc, "LH-Side was: %s", lh_action->uuid); } return; } if (lh_action && lh_action->rsc == rsc && is_set(lh_action->flags, pe_action_dangle)) { pe_rsc_trace(rsc, "Detected dangling operation %s -> %s", lh_action->uuid, order->rh_action_task); clear_bit(type, pe_order_implies_then); } gIter = rh_actions; for (; gIter != NULL; gIter = gIter->next) { action_t *rh_action_iter = (action_t *) gIter->data; if (lh_action) { order_actions(lh_action, rh_action_iter, type); } else if (type & pe_order_implies_then) { update_action_flags(rh_action_iter, pe_action_runnable | pe_action_clear); crm_warn("Unrunnable %s 0x%.6x", rh_action_iter->uuid, type); } else { crm_warn("neither %s 0x%.6x", rh_action_iter->uuid, type); } } g_list_free(rh_actions); } static void rsc_order_first(resource_t * lh_rsc, order_constraint_t * order, pe_working_set_t * data_set) { GListPtr gIter = NULL; GListPtr lh_actions = NULL; action_t *lh_action = order->lh_action; resource_t *rh_rsc = order->rh_rsc; crm_trace("Processing LH of ordering constraint %d", order->id); CRM_ASSERT(lh_rsc != NULL); if (lh_action != NULL) { lh_actions = g_list_prepend(NULL, lh_action); } else if (lh_action == NULL) { lh_actions = find_actions_by_task(lh_rsc->actions, lh_rsc, order->lh_action_task); } if (lh_actions == NULL && lh_rsc != rh_rsc) { char *key = NULL; char *rsc_id = NULL; char *op_type = NULL; int interval = 0; parse_op_key(order->lh_action_task, &rsc_id, &op_type, &interval); key = generate_op_key(lh_rsc->id, op_type, interval); if (lh_rsc->fns->state(lh_rsc, TRUE) == RSC_ROLE_STOPPED && safe_str_eq(op_type, RSC_STOP)) { free(key); pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - ignoring", lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task); } else if (lh_rsc->fns->state(lh_rsc, TRUE) == RSC_ROLE_SLAVE && safe_str_eq(op_type, RSC_DEMOTE)) { free(key); pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - ignoring", lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task); } else { pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - creating", lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task); lh_action = custom_action(lh_rsc, key, op_type, NULL, TRUE, TRUE, data_set); lh_actions = g_list_prepend(NULL, lh_action); } free(op_type); free(rsc_id); } gIter = lh_actions; for (; gIter != NULL; gIter = gIter->next) { action_t *lh_action_iter = (action_t *) gIter->data; if (rh_rsc == NULL && order->rh_action) { rh_rsc = order->rh_action->rsc; } if (rh_rsc) { rsc_order_then(lh_action_iter, rh_rsc, order); } else if (order->rh_action) { order_actions(lh_action_iter, order->rh_action, order->type); } } g_list_free(lh_actions); } extern gboolean update_action(action_t * action); static void apply_remote_node_ordering(pe_working_set_t *data_set) { GListPtr gIter = data_set->actions; if (is_set(data_set->flags, pe_flag_have_remote_nodes) == FALSE) { return; } for (; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; resource_t *remote_rsc = NULL; resource_t *container = NULL; if (action->node == NULL || is_remote_node(action->node) == FALSE || action->rsc == NULL || is_set(action->flags, pe_action_pseudo)) { continue; } remote_rsc = action->node->details->remote_rsc; container = remote_rsc->container; if (safe_str_eq(action->task, "monitor") || safe_str_eq(action->task, "start") || safe_str_eq(action->task, "promote") || safe_str_eq(action->task, CRM_OP_LRM_REFRESH) || safe_str_eq(action->task, CRM_OP_CLEAR_FAILCOUNT) || safe_str_eq(action->task, "delete")) { custom_action_order(remote_rsc, generate_op_key(remote_rsc->id, RSC_START, 0), NULL, action->rsc, NULL, action, pe_order_preserve | pe_order_implies_then | pe_order_runnable_left, data_set); } else if (safe_str_eq(action->task, "demote")) { /* If the connection is being torn down, we don't want * to build a constraint between a resource's demotion and * the connection resource starting... because the connection * resource can not start. The connection might already be up, * but the START action would not be allowed which in turn would * block the demotion of any resournces living in the remote-node. * * In this case, only build the constraint between the demotion and * the connection's stop action. This allows the connection and all the * resources within the remote-node to be torn down properly. */ if (remote_rsc->next_role == RSC_ROLE_STOPPED) { custom_action_order(action->rsc, NULL, action, remote_rsc, generate_op_key(remote_rsc->id, RSC_STOP, 0), NULL, pe_order_preserve | pe_order_implies_first, data_set); } else { custom_action_order(remote_rsc, generate_op_key(remote_rsc->id, RSC_START, 0), NULL, action->rsc, NULL, action, pe_order_preserve | pe_order_implies_then | pe_order_runnable_left, data_set); } } else if (safe_str_eq(action->task, "stop") && container && is_set(container->flags, pe_rsc_failed)) { /* when the container representing a remote node fails, the stop * action for all the resources living in that container is implied * by the container stopping. This is similar to how fencing operations * work for cluster nodes. */ pe_set_action_bit(action, pe_action_pseudo); custom_action_order(container, generate_op_key(container->id, RSC_STOP, 0), NULL, action->rsc, NULL, action, pe_order_preserve | pe_order_implies_then | pe_order_runnable_left, data_set); } else if (safe_str_eq(action->task, "stop")) { gboolean after_start = FALSE; /* handle special case with baremetal remote where stop actions need to be * ordered after the connection resource starts somewhere else. */ if (is_baremetal_remote_node(action->node)) { node_t *cluster_node = remote_rsc->running_on ? remote_rsc->running_on->data : NULL; /* if the current cluster node a baremetal connection resource * is residing on is unclean, we can't process any operations on that * remote node until after it starts somewhere else. */ if (cluster_node && cluster_node->details->unclean == TRUE) { after_start = TRUE; } } if (after_start) { custom_action_order(remote_rsc, generate_op_key(remote_rsc->id, RSC_START, 0), NULL, action->rsc, NULL, action, pe_order_preserve | pe_order_implies_then | pe_order_runnable_left, data_set); } else { custom_action_order(action->rsc, NULL, action, remote_rsc, generate_op_key(remote_rsc->id, RSC_STOP, 0), NULL, pe_order_preserve | pe_order_implies_first, data_set); } } } } gboolean stage7(pe_working_set_t * data_set) { GListPtr gIter = NULL; apply_remote_node_ordering(data_set); crm_trace("Applying ordering constraints"); /* Don't ask me why, but apparently they need to be processed in * the order they were created in... go figure * * Also g_list_prepend() has horrendous performance characteristics * So we need to use g_list_prepend() and then reverse the list here */ data_set->ordering_constraints = g_list_reverse(data_set->ordering_constraints); gIter = data_set->ordering_constraints; for (; gIter != NULL; gIter = gIter->next) { order_constraint_t *order = (order_constraint_t *) gIter->data; resource_t *rsc = order->lh_rsc; crm_trace("Applying ordering constraint: %d", order->id); if (rsc != NULL) { crm_trace("rsc_action-to-*"); rsc_order_first(rsc, order, data_set); continue; } rsc = order->rh_rsc; if (rsc != NULL) { crm_trace("action-to-rsc_action"); rsc_order_then(order->lh_action, rsc, order); } else { crm_trace("action-to-action"); order_actions(order->lh_action, order->rh_action, order->type); } } crm_trace("Updating %d actions", g_list_length(data_set->actions)); gIter = data_set->actions; for (; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; update_action(action); } crm_trace("Processing reloads"); gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; rsc_reload(rsc, data_set); LogActions(rsc, data_set, FALSE); } return TRUE; } static gint sort_notify_entries(gconstpointer a, gconstpointer b) { int tmp; const notify_entry_t *entry_a = a; const notify_entry_t *entry_b = b; if (entry_a == NULL && entry_b == NULL) { return 0; } if (entry_a == NULL) { return 1; } if (entry_b == NULL) { return -1; } if (entry_a->rsc == NULL && entry_b->rsc == NULL) { return 0; } if (entry_a->rsc == NULL) { return 1; } if (entry_b->rsc == NULL) { return -1; } tmp = strcmp(entry_a->rsc->id, entry_b->rsc->id); if (tmp != 0) { return tmp; } if (entry_a->node == NULL && entry_b->node == NULL) { return 0; } if (entry_a->node == NULL) { return 1; } if (entry_b->node == NULL) { return -1; } return strcmp(entry_a->node->details->id, entry_b->node->details->id); } static void expand_list(GListPtr list, char **rsc_list, char **node_list) { GListPtr gIter = NULL; const char *uname = NULL; const char *rsc_id = NULL; const char *last_rsc_id = NULL; if (rsc_list) { *rsc_list = NULL; } if (list == NULL) { if (rsc_list) { *rsc_list = strdup(" "); } if (node_list) { *node_list = strdup(" "); } return; } if (node_list) { *node_list = NULL; } for (gIter = list; gIter != NULL; gIter = gIter->next) { notify_entry_t *entry = (notify_entry_t *) gIter->data; CRM_LOG_ASSERT(entry != NULL); CRM_LOG_ASSERT(entry && entry->rsc != NULL); if(entry == NULL || entry->rsc == NULL) { continue; } /* Uh, why? */ CRM_LOG_ASSERT(node_list == NULL || entry->node != NULL); if(node_list != NULL && entry->node == NULL) { continue; } uname = NULL; rsc_id = entry->rsc->id; CRM_ASSERT(rsc_id != NULL); /* filter dups */ if (safe_str_eq(rsc_id, last_rsc_id)) { continue; } last_rsc_id = rsc_id; if (rsc_list != NULL) { int existing_len = 0; int len = 2 + strlen(rsc_id); /* +1 space, +1 EOS */ if (rsc_list && *rsc_list) { existing_len = strlen(*rsc_list); } crm_trace("Adding %s (%dc) at offset %d", rsc_id, len - 2, existing_len); *rsc_list = realloc(*rsc_list, len + existing_len); sprintf(*rsc_list + existing_len, "%s ", rsc_id); } if (entry->node != NULL) { uname = entry->node->details->uname; } if (node_list != NULL && uname) { int existing_len = 0; int len = 2 + strlen(uname); if (node_list && *node_list) { existing_len = strlen(*node_list); } crm_trace("Adding %s (%dc) at offset %d", uname, len - 2, existing_len); *node_list = realloc(*node_list, len + existing_len); sprintf(*node_list + existing_len, "%s ", uname); } } } static void dup_attr(gpointer key, gpointer value, gpointer user_data) { add_hash_param(user_data, key, value); } static action_t * pe_notify(resource_t * rsc, node_t * node, action_t * op, action_t * confirm, notify_data_t * n_data, pe_working_set_t * data_set) { char *key = NULL; action_t *trigger = NULL; const char *value = NULL; const char *task = NULL; if (op == NULL || confirm == NULL) { pe_rsc_trace(rsc, "Op=%p confirm=%p", op, confirm); return NULL; } CRM_CHECK(rsc != NULL, return NULL); CRM_CHECK(node != NULL, return NULL); if (node->details->online == FALSE) { pe_rsc_trace(rsc, "Skipping notification for %s: node offline", rsc->id); return NULL; } else if (is_set(op->flags, pe_action_runnable) == FALSE) { pe_rsc_trace(rsc, "Skipping notification for %s: not runnable", op->uuid); return NULL; } value = g_hash_table_lookup(op->meta, "notify_type"); task = g_hash_table_lookup(op->meta, "notify_operation"); pe_rsc_trace(rsc, "Creating notify actions for %s: %s (%s-%s)", op->uuid, rsc->id, value, task); key = generate_notify_key(rsc->id, value, task); trigger = custom_action(rsc, key, op->task, node, is_set(op->flags, pe_action_optional), TRUE, data_set); g_hash_table_foreach(op->meta, dup_attr, trigger->meta); g_hash_table_foreach(n_data->keys, dup_attr, trigger->meta); /* pseudo_notify before notify */ pe_rsc_trace(rsc, "Ordering %s before %s (%d->%d)", op->uuid, trigger->uuid, trigger->id, op->id); order_actions(op, trigger, pe_order_optional); order_actions(trigger, confirm, pe_order_optional); return trigger; } static void pe_post_notify(resource_t * rsc, node_t * node, notify_data_t * n_data, pe_working_set_t * data_set) { action_t *notify = NULL; CRM_CHECK(rsc != NULL, return); if (n_data->post == NULL) { return; /* Nothing to do */ } notify = pe_notify(rsc, node, n_data->post, n_data->post_done, n_data, data_set); if (notify != NULL) { notify->priority = INFINITY; } if (n_data->post_done) { GListPtr gIter = rsc->actions; for (; gIter != NULL; gIter = gIter->next) { action_t *mon = (action_t *) gIter->data; const char *interval = g_hash_table_lookup(mon->meta, "interval"); if (interval == NULL || safe_str_eq(interval, "0")) { pe_rsc_trace(rsc, "Skipping %s: interval", mon->uuid); continue; } else if (safe_str_eq(mon->task, RSC_CANCEL)) { pe_rsc_trace(rsc, "Skipping %s: cancel", mon->uuid); continue; } order_actions(n_data->post_done, mon, pe_order_optional); } } } notify_data_t * create_notification_boundaries(resource_t * rsc, const char *action, action_t * start, action_t * end, pe_working_set_t * data_set) { /* Create the pseudo ops that preceed and follow the actual notifications */ /* * Creates two sequences (conditional on start and end being supplied): * pre_notify -> pre_notify_complete -> start, and * end -> post_notify -> post_notify_complete * * 'start' and 'end' may be the same event or ${X} and ${X}ed as per clones */ char *key = NULL; notify_data_t *n_data = NULL; if (is_not_set(rsc->flags, pe_rsc_notify)) { return NULL; } n_data = calloc(1, sizeof(notify_data_t)); n_data->action = action; n_data->keys = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if (start) { /* create pre-event notification wrappers */ key = generate_notify_key(rsc->id, "pre", start->task); n_data->pre = custom_action(rsc, key, RSC_NOTIFY, NULL, is_set(start->flags, pe_action_optional), TRUE, data_set); update_action_flags(n_data->pre, pe_action_pseudo); update_action_flags(n_data->pre, pe_action_runnable); add_hash_param(n_data->pre->meta, "notify_type", "pre"); add_hash_param(n_data->pre->meta, "notify_operation", n_data->action); add_hash_param(n_data->pre->meta, "notify_key_type", "pre"); add_hash_param(n_data->pre->meta, "notify_key_operation", start->task); /* create pre_notify_complete */ key = generate_notify_key(rsc->id, "confirmed-pre", start->task); n_data->pre_done = custom_action(rsc, key, RSC_NOTIFIED, NULL, is_set(start->flags, pe_action_optional), TRUE, data_set); update_action_flags(n_data->pre_done, pe_action_pseudo); update_action_flags(n_data->pre_done, pe_action_runnable); add_hash_param(n_data->pre_done->meta, "notify_type", "pre"); add_hash_param(n_data->pre_done->meta, "notify_operation", n_data->action); add_hash_param(n_data->pre_done->meta, "notify_key_type", "confirmed-pre"); add_hash_param(n_data->pre_done->meta, "notify_key_operation", start->task); order_actions(n_data->pre_done, start, pe_order_optional); order_actions(n_data->pre, n_data->pre_done, pe_order_optional); } if (end) { /* create post-event notification wrappers */ key = generate_notify_key(rsc->id, "post", end->task); n_data->post = custom_action(rsc, key, RSC_NOTIFY, NULL, is_set(end->flags, pe_action_optional), TRUE, data_set); n_data->post->priority = INFINITY; update_action_flags(n_data->post, pe_action_pseudo); if (is_set(end->flags, pe_action_runnable)) { update_action_flags(n_data->post, pe_action_runnable); } else { update_action_flags(n_data->post, pe_action_runnable | pe_action_clear); } add_hash_param(n_data->post->meta, "notify_type", "post"); add_hash_param(n_data->post->meta, "notify_operation", n_data->action); add_hash_param(n_data->post->meta, "notify_key_type", "post"); add_hash_param(n_data->post->meta, "notify_key_operation", end->task); /* create post_notify_complete */ key = generate_notify_key(rsc->id, "confirmed-post", end->task); n_data->post_done = custom_action(rsc, key, RSC_NOTIFIED, NULL, is_set(end->flags, pe_action_optional), TRUE, data_set); n_data->post_done->priority = INFINITY; update_action_flags(n_data->post_done, pe_action_pseudo); if (is_set(end->flags, pe_action_runnable)) { update_action_flags(n_data->post_done, pe_action_runnable); } else { update_action_flags(n_data->post_done, pe_action_runnable | pe_action_clear); } add_hash_param(n_data->post_done->meta, "notify_type", "post"); add_hash_param(n_data->post_done->meta, "notify_operation", n_data->action); add_hash_param(n_data->post_done->meta, "notify_key_type", "confirmed-post"); add_hash_param(n_data->post_done->meta, "notify_key_operation", end->task); order_actions(end, n_data->post, pe_order_implies_then); order_actions(n_data->post, n_data->post_done, pe_order_implies_then); } if (start && end) { order_actions(n_data->pre_done, n_data->post, pe_order_optional); } if (safe_str_eq(action, RSC_STOP)) { action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set); order_actions(n_data->post_done, all_stopped, pe_order_optional); } return n_data; } void collect_notification_data(resource_t * rsc, gboolean state, gboolean activity, notify_data_t * n_data) { if (rsc->children) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; collect_notification_data(child, state, activity, n_data); } return; } if (state) { notify_entry_t *entry = NULL; entry = calloc(1, sizeof(notify_entry_t)); entry->rsc = rsc; if (rsc->running_on) { /* we only take the first one */ entry->node = rsc->running_on->data; } pe_rsc_trace(rsc, "%s state: %s", rsc->id, role2text(rsc->role)); switch (rsc->role) { case RSC_ROLE_STOPPED: n_data->inactive = g_list_prepend(n_data->inactive, entry); break; case RSC_ROLE_STARTED: n_data->active = g_list_prepend(n_data->active, entry); break; case RSC_ROLE_SLAVE: n_data->slave = g_list_prepend(n_data->slave, entry); break; case RSC_ROLE_MASTER: n_data->master = g_list_prepend(n_data->master, entry); break; default: crm_err("Unsupported notify role"); free(entry); break; } } if (activity) { notify_entry_t *entry = NULL; enum action_tasks task; GListPtr gIter = rsc->actions; for (; gIter != NULL; gIter = gIter->next) { action_t *op = (action_t *) gIter->data; if (is_set(op->flags, pe_action_optional) == FALSE && op->node != NULL) { entry = calloc(1, sizeof(notify_entry_t)); entry->node = op->node; entry->rsc = rsc; task = text2task(op->task); switch (task) { case start_rsc: n_data->start = g_list_prepend(n_data->start, entry); break; case stop_rsc: n_data->stop = g_list_prepend(n_data->stop, entry); break; case action_promote: n_data->promote = g_list_prepend(n_data->promote, entry); break; case action_demote: n_data->demote = g_list_prepend(n_data->demote, entry); break; default: free(entry); break; } } } } } gboolean expand_notification_data(notify_data_t * n_data) { /* Expand the notification entries into a key=value hashtable * This hashtable is later used in action2xml() */ gboolean required = FALSE; char *rsc_list = NULL; char *node_list = NULL; if (n_data->stop) { n_data->stop = g_list_sort(n_data->stop, sort_notify_entries); } expand_list(n_data->stop, &rsc_list, &node_list); if (rsc_list != NULL && safe_str_neq(" ", rsc_list)) { if (safe_str_eq(n_data->action, RSC_STOP)) { required = TRUE; } } g_hash_table_insert(n_data->keys, strdup("notify_stop_resource"), rsc_list); g_hash_table_insert(n_data->keys, strdup("notify_stop_uname"), node_list); if (n_data->start) { n_data->start = g_list_sort(n_data->start, sort_notify_entries); if (rsc_list && safe_str_eq(n_data->action, RSC_START)) { required = TRUE; } } expand_list(n_data->start, &rsc_list, &node_list); g_hash_table_insert(n_data->keys, strdup("notify_start_resource"), rsc_list); g_hash_table_insert(n_data->keys, strdup("notify_start_uname"), node_list); if (n_data->demote) { n_data->demote = g_list_sort(n_data->demote, sort_notify_entries); if (safe_str_eq(n_data->action, RSC_DEMOTE)) { required = TRUE; } } expand_list(n_data->demote, &rsc_list, &node_list); g_hash_table_insert(n_data->keys, strdup("notify_demote_resource"), rsc_list); g_hash_table_insert(n_data->keys, strdup("notify_demote_uname"), node_list); if (n_data->promote) { n_data->promote = g_list_sort(n_data->promote, sort_notify_entries); if (safe_str_eq(n_data->action, RSC_PROMOTE)) { required = TRUE; } } expand_list(n_data->promote, &rsc_list, &node_list); g_hash_table_insert(n_data->keys, strdup("notify_promote_resource"), rsc_list); g_hash_table_insert(n_data->keys, strdup("notify_promote_uname"), node_list); if (n_data->active) { n_data->active = g_list_sort(n_data->active, sort_notify_entries); } expand_list(n_data->active, &rsc_list, &node_list); g_hash_table_insert(n_data->keys, strdup("notify_active_resource"), rsc_list); g_hash_table_insert(n_data->keys, strdup("notify_active_uname"), node_list); if (n_data->slave) { n_data->slave = g_list_sort(n_data->slave, sort_notify_entries); } expand_list(n_data->slave, &rsc_list, &node_list); g_hash_table_insert(n_data->keys, strdup("notify_slave_resource"), rsc_list); g_hash_table_insert(n_data->keys, strdup("notify_slave_uname"), node_list); if (n_data->master) { n_data->master = g_list_sort(n_data->master, sort_notify_entries); } expand_list(n_data->master, &rsc_list, &node_list); g_hash_table_insert(n_data->keys, strdup("notify_master_resource"), rsc_list); g_hash_table_insert(n_data->keys, strdup("notify_master_uname"), node_list); if (n_data->inactive) { n_data->inactive = g_list_sort(n_data->inactive, sort_notify_entries); } expand_list(n_data->inactive, &rsc_list, NULL); g_hash_table_insert(n_data->keys, strdup("notify_inactive_resource"), rsc_list); if (required && n_data->pre) { update_action_flags(n_data->pre, pe_action_optional | pe_action_clear); update_action_flags(n_data->pre_done, pe_action_optional | pe_action_clear); } if (required && n_data->post) { update_action_flags(n_data->post, pe_action_optional | pe_action_clear); update_action_flags(n_data->post_done, pe_action_optional | pe_action_clear); } return required; } void create_notifications(resource_t * rsc, notify_data_t * n_data, pe_working_set_t * data_set) { GListPtr gIter = NULL; action_t *stop = NULL; action_t *start = NULL; enum action_tasks task = text2task(n_data->action); if (rsc->children) { gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; create_notifications(child, n_data, data_set); } return; } /* Copy notification details into standard ops */ gIter = rsc->actions; for (; gIter != NULL; gIter = gIter->next) { action_t *op = (action_t *) gIter->data; if (is_set(op->flags, pe_action_optional) == FALSE && op->node != NULL) { enum action_tasks t = text2task(op->task); switch (t) { case start_rsc: case stop_rsc: case action_promote: case action_demote: g_hash_table_foreach(n_data->keys, dup_attr, op->meta); break; default: break; } } } pe_rsc_trace(rsc, "Creating notificaitons for: %s.%s (%s->%s)", n_data->action, rsc->id, role2text(rsc->role), role2text(rsc->next_role)); stop = find_first_action(rsc->actions, NULL, RSC_STOP, NULL); start = find_first_action(rsc->actions, NULL, RSC_START, NULL); /* stop / demote */ if (rsc->role != RSC_ROLE_STOPPED) { if (task == stop_rsc || task == action_demote) { gIter = rsc->running_on; for (; gIter != NULL; gIter = gIter->next) { node_t *current_node = (node_t *) gIter->data; pe_notify(rsc, current_node, n_data->pre, n_data->pre_done, n_data, data_set); if (task == action_demote || stop == NULL || is_set(stop->flags, pe_action_optional)) { pe_post_notify(rsc, current_node, n_data, data_set); } } } } /* start / promote */ if (rsc->next_role != RSC_ROLE_STOPPED) { if (rsc->allocated_to == NULL) { pe_proc_err("Next role '%s' but %s is not allocated", role2text(rsc->next_role), rsc->id); } else if (task == start_rsc || task == action_promote) { if (task != start_rsc || start == NULL || is_set(start->flags, pe_action_optional)) { pe_notify(rsc, rsc->allocated_to, n_data->pre, n_data->pre_done, n_data, data_set); } pe_post_notify(rsc, rsc->allocated_to, n_data, data_set); } } } void free_notification_data(notify_data_t * n_data) { if (n_data == NULL) { return; } g_list_free_full(n_data->stop, free); g_list_free_full(n_data->start, free); g_list_free_full(n_data->demote, free); g_list_free_full(n_data->promote, free); g_list_free_full(n_data->master, free); g_list_free_full(n_data->slave, free); g_list_free_full(n_data->active, free); g_list_free_full(n_data->inactive, free); g_hash_table_destroy(n_data->keys); free(n_data); } int transition_id = -1; /* * Create a dependency graph to send to the transitioner (via the CRMd) */ gboolean stage8(pe_working_set_t * data_set) { GListPtr gIter = NULL; const char *value = NULL; transition_id++; crm_trace("Creating transition graph %d.", transition_id); data_set->graph = create_xml_node(NULL, XML_TAG_GRAPH); value = pe_pref(data_set->config_hash, "cluster-delay"); crm_xml_add(data_set->graph, "cluster-delay", value); value = pe_pref(data_set->config_hash, "stonith-timeout"); crm_xml_add(data_set->graph, "stonith-timeout", value); crm_xml_add(data_set->graph, "failed-stop-offset", "INFINITY"); if (is_set(data_set->flags, pe_flag_start_failure_fatal)) { crm_xml_add(data_set->graph, "failed-start-offset", "INFINITY"); } else { crm_xml_add(data_set->graph, "failed-start-offset", "1"); } value = pe_pref(data_set->config_hash, "batch-limit"); crm_xml_add(data_set->graph, "batch-limit", value); crm_xml_add_int(data_set->graph, "transition_id", transition_id); value = pe_pref(data_set->config_hash, "migration-limit"); if (crm_int_helper(value, NULL) > 0) { crm_xml_add(data_set->graph, "migration-limit", value); } /* errors... slist_iter(action, action_t, action_list, lpc, if(action->optional == FALSE && action->runnable == FALSE) { print_action("Ignoring", action, TRUE); } ); */ gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; pe_rsc_trace(rsc, "processing actions for rsc=%s", rsc->id); rsc->cmds->expand(rsc, data_set); } crm_log_xml_trace(data_set->graph, "created resource-driven action list"); /* catch any non-resource specific actions */ crm_trace("processing non-resource actions"); gIter = data_set->actions; for (; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; if (action->rsc && action->node && action->node->details->shutdown && is_not_set(action->rsc->flags, pe_rsc_maintenance) && is_not_set(action->flags, pe_action_optional) && is_not_set(action->flags, pe_action_runnable) && crm_str_eq(action->task, RSC_STOP, TRUE) ) { /* Eventually we should just ignore the 'fence' case * But for now its the best way to detect (in CTS) when * CIB resource updates are being lost */ if (is_set(data_set->flags, pe_flag_have_quorum) || data_set->no_quorum_policy == no_quorum_ignore) { crm_crit("Cannot %s node '%s' because of %s:%s%s", action->node->details->unclean ? "fence" : "shut down", action->node->details->uname, action->rsc->id, is_not_set(action->rsc->flags, pe_rsc_managed) ? " unmanaged" : " blocked", is_set(action->rsc->flags, pe_rsc_failed) ? " failed" : ""); } } graph_element_from_action(action, data_set); } crm_log_xml_trace(data_set->graph, "created generic action list"); crm_trace("Created transition graph %d.", transition_id); return TRUE; } void cleanup_alloc_calculations(pe_working_set_t * data_set) { if (data_set == NULL) { return; } crm_trace("deleting %d order cons: %p", g_list_length(data_set->ordering_constraints), data_set->ordering_constraints); pe_free_ordering(data_set->ordering_constraints); data_set->ordering_constraints = NULL; crm_trace("deleting %d node cons: %p", g_list_length(data_set->placement_constraints), data_set->placement_constraints); pe_free_rsc_to_node(data_set->placement_constraints); data_set->placement_constraints = NULL; crm_trace("deleting %d inter-resource cons: %p", g_list_length(data_set->colocation_constraints), data_set->colocation_constraints); g_list_free_full(data_set->colocation_constraints, free); data_set->colocation_constraints = NULL; crm_trace("deleting %d ticket deps: %p", g_list_length(data_set->ticket_constraints), data_set->ticket_constraints); g_list_free_full(data_set->ticket_constraints, free); data_set->ticket_constraints = NULL; cleanup_calculations(data_set); } diff --git a/pengine/constraints.c b/pengine/constraints.c index 1aa848ef08..88e382b83d 100644 --- a/pengine/constraints.c +++ b/pengine/constraints.c @@ -1,2753 +1,2755 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include enum pe_order_kind { pe_order_kind_optional, pe_order_kind_mandatory, pe_order_kind_serialize, }; #define EXPAND_CONSTRAINT_IDREF(__set, __rsc, __name) do { \ __rsc = pe_find_constraint_resource(data_set->resources, __name); \ if(__rsc == NULL) { \ crm_config_err("%s: No resource found for %s", __set, __name); \ return FALSE; \ } \ } while(0) enum pe_ordering get_flags(const char *id, enum pe_order_kind kind, const char *action_first, const char *action_then, gboolean invert); enum pe_ordering get_asymmetrical_flags(enum pe_order_kind kind); gboolean unpack_constraints(xmlNode * xml_constraints, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; xmlNode *lifetime = NULL; for (xml_obj = __xml_first_child(xml_constraints); xml_obj != NULL; xml_obj = __xml_next(xml_obj)) { const char *id = crm_element_value(xml_obj, XML_ATTR_ID); if (id == NULL) { crm_config_err("Constraint <%s...> must have an id", crm_element_name(xml_obj)); continue; } crm_trace("Processing constraint %s %s", crm_element_name(xml_obj), id); lifetime = first_named_child(xml_obj, "lifetime"); if (lifetime) { crm_config_warn("Support for the lifetime tag, used by %s, is deprecated." " The rules it contains should instead be direct decendants of the constraint object", id); } if (test_ruleset(lifetime, NULL, data_set->now) == FALSE) { crm_info("Constraint %s %s is not active", crm_element_name(xml_obj), id); } else if (safe_str_eq(XML_CONS_TAG_RSC_ORDER, crm_element_name(xml_obj))) { unpack_rsc_order(xml_obj, data_set); } else if (safe_str_eq(XML_CONS_TAG_RSC_DEPEND, crm_element_name(xml_obj))) { unpack_rsc_colocation(xml_obj, data_set); } else if (safe_str_eq(XML_CONS_TAG_RSC_LOCATION, crm_element_name(xml_obj))) { unpack_location(xml_obj, data_set); } else if (safe_str_eq(XML_CONS_TAG_RSC_TICKET, crm_element_name(xml_obj))) { unpack_rsc_ticket(xml_obj, data_set); } else { pe_err("Unsupported constraint type: %s", crm_element_name(xml_obj)); } } return TRUE; } static const char * invert_action(const char *action) { if (safe_str_eq(action, RSC_START)) { return RSC_STOP; } else if (safe_str_eq(action, RSC_STOP)) { return RSC_START; } else if (safe_str_eq(action, RSC_PROMOTE)) { return RSC_DEMOTE; } else if (safe_str_eq(action, RSC_DEMOTE)) { return RSC_PROMOTE; } else if (safe_str_eq(action, RSC_PROMOTED)) { return RSC_DEMOTED; } else if (safe_str_eq(action, RSC_DEMOTED)) { return RSC_PROMOTED; } else if (safe_str_eq(action, RSC_STARTED)) { return RSC_STOPPED; } else if (safe_str_eq(action, RSC_STOPPED)) { return RSC_STARTED; } crm_config_warn("Unknown action: %s", action); return NULL; } static enum pe_order_kind get_ordering_type(xmlNode * xml_obj) { enum pe_order_kind kind_e = pe_order_kind_mandatory; const char *kind = crm_element_value(xml_obj, XML_ORDER_ATTR_KIND); if (kind == NULL) { const char *score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE); kind_e = pe_order_kind_mandatory; if (score) { int score_i = char2score(score); if (score_i == 0) { kind_e = pe_order_kind_optional; } /* } else if(rsc_then->variant == pe_native && rsc_first->variant > pe_group) { */ /* kind_e = pe_order_kind_optional; */ } } else if (safe_str_eq(kind, "Mandatory")) { kind_e = pe_order_kind_mandatory; } else if (safe_str_eq(kind, "Optional")) { kind_e = pe_order_kind_optional; } else if (safe_str_eq(kind, "Serialize")) { kind_e = pe_order_kind_serialize; } else { const char *id = crm_element_value(xml_obj, XML_ATTR_ID); crm_config_err("Constraint %s: Unknown type '%s'", id, kind); } return kind_e; } static resource_t * pe_find_constraint_resource(GListPtr rsc_list, const char *id) { GListPtr rIter = NULL; for (rIter = rsc_list; id && rIter; rIter = rIter->next) { resource_t *parent = rIter->data; resource_t *match = parent->fns->find_rsc(parent, id, NULL, pe_find_renamed | pe_find_current); if (match != NULL) { if(safe_str_neq(match->id, id)) { /* We found an instance of a clone instead */ match = uber_parent(match); crm_debug("Found %s for %s", match->id, id); } return match; } } crm_trace("No match for %s", id); return NULL; } static gboolean pe_find_constraint_tag(pe_working_set_t * data_set, const char * id, tag_t ** tag) { gboolean rc = FALSE; *tag = NULL; rc = g_hash_table_lookup_extended(data_set->template_rsc_sets, id, NULL, (gpointer*) tag); if (rc == FALSE) { rc = g_hash_table_lookup_extended(data_set->tags, id, NULL, (gpointer*) tag); if (rc == FALSE) { crm_config_warn("No template/tag named '%s'", id); return FALSE; } else if (*tag == NULL) { crm_config_warn("No resource is tagged with '%s'", id); return FALSE; } } else if (*tag == NULL) { crm_config_warn("No resource is derived from template '%s'", id); return FALSE; } return rc; } static gboolean valid_resource_or_tag(pe_working_set_t * data_set, const char * id, resource_t ** rsc, tag_t ** tag) { gboolean rc = FALSE; if (rsc) { *rsc = NULL; *rsc = pe_find_constraint_resource(data_set->resources, id); if (*rsc) { return TRUE; } } if (tag) { *tag = NULL; rc = pe_find_constraint_tag(data_set, id, tag); } return rc; } static gboolean unpack_simple_rsc_order(xmlNode * xml_obj, pe_working_set_t * data_set) { int order_id = 0; resource_t *rsc_then = NULL; resource_t *rsc_first = NULL; gboolean invert_bool = TRUE; enum pe_order_kind kind = pe_order_kind_mandatory; enum pe_ordering cons_weight = pe_order_optional; const char *id_first = NULL; const char *id_then = NULL; const char *action_then = NULL; const char *action_first = NULL; const char *instance_then = NULL; const char *instance_first = NULL; const char *id = crm_element_value(xml_obj, XML_ATTR_ID); const char *invert = crm_element_value(xml_obj, XML_CONS_ATTR_SYMMETRICAL); crm_str_to_boolean(invert, &invert_bool); if (xml_obj == NULL) { crm_config_err("No constraint object to process."); return FALSE; } else if (id == NULL) { crm_config_err("%s constraint must have an id", crm_element_name(xml_obj)); return FALSE; } id_then = crm_element_value(xml_obj, XML_ORDER_ATTR_THEN); id_first = crm_element_value(xml_obj, XML_ORDER_ATTR_FIRST); action_then = crm_element_value(xml_obj, XML_ORDER_ATTR_THEN_ACTION); action_first = crm_element_value(xml_obj, XML_ORDER_ATTR_FIRST_ACTION); instance_then = crm_element_value(xml_obj, XML_ORDER_ATTR_THEN_INSTANCE); instance_first = crm_element_value(xml_obj, XML_ORDER_ATTR_FIRST_INSTANCE); if (action_first == NULL) { action_first = RSC_START; } if (action_then == NULL) { action_then = action_first; } if (id_then == NULL || id_first == NULL) { crm_config_err("Constraint %s needs two sides lh: %s rh: %s", id, crm_str(id_then), crm_str(id_first)); return FALSE; } rsc_then = pe_find_constraint_resource(data_set->resources, id_then); rsc_first = pe_find_constraint_resource(data_set->resources, id_first); if (rsc_then == NULL) { crm_config_err("Constraint %s: no resource found for name '%s'", id, id_then); return FALSE; } else if (rsc_first == NULL) { crm_config_err("Constraint %s: no resource found for name '%s'", id, id_first); return FALSE; } else if (instance_then && rsc_then->variant < pe_clone) { crm_config_err("Invalid constraint '%s':" " Resource '%s' is not a clone but instance %s was requested", id, id_then, instance_then); return FALSE; } else if (instance_first && rsc_first->variant < pe_clone) { crm_config_err("Invalid constraint '%s':" " Resource '%s' is not a clone but instance %s was requested", id, id_first, instance_first); return FALSE; } if (instance_then) { rsc_then = find_clone_instance(rsc_then, instance_then, data_set); if (rsc_then == NULL) { crm_config_warn("Invalid constraint '%s': No instance '%s' of '%s'", id, instance_then, id_then); return FALSE; } } if (instance_first) { rsc_first = find_clone_instance(rsc_first, instance_first, data_set); if (rsc_first == NULL) { crm_config_warn("Invalid constraint '%s': No instance '%s' of '%s'", id, instance_first, id_first); return FALSE; } } cons_weight = pe_order_optional; kind = get_ordering_type(xml_obj); if (kind == pe_order_kind_optional && rsc_then->restart_type == pe_restart_restart) { crm_trace("Upgrade : recovery - implies right"); cons_weight |= pe_order_implies_then; } if (invert_bool == FALSE) { cons_weight |= get_asymmetrical_flags(kind); } else { cons_weight |= get_flags(id, kind, action_first, action_then, FALSE); } order_id = new_rsc_order(rsc_first, action_first, rsc_then, action_then, cons_weight, data_set); pe_rsc_trace(rsc_first, "order-%d (%s): %s_%s before %s_%s flags=0x%.6x", order_id, id, rsc_first->id, action_first, rsc_then->id, action_then, cons_weight); if (invert_bool == FALSE) { return TRUE; } else if (invert && kind == pe_order_kind_serialize) { crm_config_warn("Cannot invert serialized constraint set %s", id); return TRUE; } else if (kind == pe_order_kind_serialize) { return TRUE; } action_then = invert_action(action_then); action_first = invert_action(action_first); if (action_then == NULL || action_first == NULL) { crm_config_err("Cannot invert rsc_order constraint %s." " Please specify the inverse manually.", id); return TRUE; } cons_weight = pe_order_optional; if (kind == pe_order_kind_optional && rsc_then->restart_type == pe_restart_restart) { crm_trace("Upgrade : recovery - implies left"); cons_weight |= pe_order_implies_first; } cons_weight |= get_flags(id, kind, action_first, action_then, TRUE); order_id = new_rsc_order(rsc_then, action_then, rsc_first, action_first, cons_weight, data_set); pe_rsc_trace(rsc_then, "order-%d (%s): %s_%s before %s_%s flags=0x%.6x", order_id, id, rsc_then->id, action_then, rsc_first->id, action_first, cons_weight); return TRUE; } static gboolean expand_tags_in_sets(xmlNode * xml_obj, xmlNode ** expanded_xml, pe_working_set_t * data_set) { xmlNode *new_xml = NULL; xmlNode *set = NULL; gboolean any_refs = FALSE; const char *cons_id = NULL; *expanded_xml = NULL; if (xml_obj == NULL) { crm_config_err("No constraint object to process."); return FALSE; } new_xml = copy_xml(xml_obj); cons_id = ID(new_xml); for (set = __xml_first_child(new_xml); set != NULL; set = __xml_next(set)) { xmlNode *xml_rsc = NULL; GListPtr tag_refs = NULL; GListPtr gIter = NULL; if (safe_str_neq((const char *)set->name, XML_CONS_TAG_RSC_SET)) { continue; } for (xml_rsc = __xml_first_child(set); xml_rsc != NULL; xml_rsc = __xml_next(xml_rsc)) { resource_t *rsc = NULL; tag_t *tag = NULL; const char *id = ID(xml_rsc); if (safe_str_neq((const char *)xml_rsc->name, XML_TAG_RESOURCE_REF)) { continue; } if (valid_resource_or_tag(data_set, id, &rsc, &tag) == FALSE) { crm_config_err("Constraint '%s': Invalid reference to '%s'", cons_id, id); free_xml(new_xml); return FALSE; } else if (rsc) { continue; } else if (tag) { /* The resource_ref under the resource_set references a template/tag */ xmlNode *last_ref = xml_rsc; /* A sample: Original XML: Now we are appending rsc2 and rsc3 which are tagged with tag1 right after it: */ for (gIter = tag->refs; gIter != NULL; gIter = gIter->next) { const char *obj_ref = (const char *) gIter->data; xmlNode *new_rsc_ref = NULL; new_rsc_ref = xmlNewDocRawNode(getDocPtr(set), NULL, (const xmlChar *)XML_TAG_RESOURCE_REF, NULL); crm_xml_add(new_rsc_ref, XML_ATTR_ID, obj_ref); xmlAddNextSibling(last_ref, new_rsc_ref); last_ref = new_rsc_ref; } any_refs = TRUE; /* Do not directly free ''. That would break the further __xml_next(xml_rsc)) and cause "Invalid read" seen by valgrind. So just record it into a hash table for freeing it later. */ tag_refs = g_list_append(tag_refs, xml_rsc); } } /* Now free '', and finally get: */ for (gIter = tag_refs; gIter != NULL; gIter = gIter->next) { xmlNode *tag_ref = gIter->data; free_xml(tag_ref); } g_list_free(tag_refs); } if (any_refs) { *expanded_xml = new_xml; } else { free_xml(new_xml); } return TRUE; } static gboolean tag_to_set(xmlNode * xml_obj, xmlNode ** rsc_set, const char * attr, gboolean convert_rsc, pe_working_set_t * data_set) { const char *cons_id = NULL; const char *id = NULL; resource_t *rsc = NULL; tag_t *tag = NULL; *rsc_set = NULL; if (xml_obj == NULL) { crm_config_err("No constraint object to process."); return FALSE; } if (attr == NULL) { crm_config_err("No attribute name to process."); return FALSE; } cons_id = crm_element_value(xml_obj, XML_ATTR_ID); if (cons_id == NULL) { crm_config_err("%s constraint must have an id", crm_element_name(xml_obj)); return FALSE; } id = crm_element_value(xml_obj, attr); if (id == NULL) { return TRUE; } if (valid_resource_or_tag(data_set, id, &rsc, &tag) == FALSE) { crm_config_err("Constraint '%s': Invalid reference to '%s'", cons_id, id); return FALSE; } else if (tag) { GListPtr gIter = NULL; /* A template/tag is referenced by the "attr" attribute (first, then, rsc or with-rsc). Add the template/tag's corresponding "resource_set" which contains the resources derived from it or tagged with it under the constraint. */ *rsc_set = create_xml_node(xml_obj, XML_CONS_TAG_RSC_SET); crm_xml_add(*rsc_set, XML_ATTR_ID, id); for (gIter = tag->refs; gIter != NULL; gIter = gIter->next) { const char *obj_ref = (const char *) gIter->data; xmlNode *rsc_ref = NULL; rsc_ref = create_xml_node(*rsc_set, XML_TAG_RESOURCE_REF); crm_xml_add(rsc_ref, XML_ATTR_ID, obj_ref); } /* Set sequential="false" for the resource_set */ crm_xml_add(*rsc_set, "sequential", XML_BOOLEAN_FALSE); } else if (rsc && convert_rsc) { /* Even a regular resource is referenced by "attr", convert it into a resource_set. Because the other side of the constraint could be a template/tag reference. */ xmlNode *rsc_ref = NULL; *rsc_set = create_xml_node(xml_obj, XML_CONS_TAG_RSC_SET); crm_xml_add(*rsc_set, XML_ATTR_ID, id); rsc_ref = create_xml_node(*rsc_set, XML_TAG_RESOURCE_REF); crm_xml_add(rsc_ref, XML_ATTR_ID, id); } else { return TRUE; } /* Remove the "attr" attribute referencing the template/tag */ if (*rsc_set) { xml_remove_prop(xml_obj, attr); } return TRUE; } gboolean unpack_rsc_location(xmlNode * xml_obj, resource_t * rsc_lh, const char * role, const char * score, pe_working_set_t * data_set); static gboolean unpack_simple_location(xmlNode * xml_obj, pe_working_set_t * data_set) { const char *id = crm_element_value(xml_obj, XML_ATTR_ID); const char *value = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE); if(value) { resource_t *rsc_lh = pe_find_constraint_resource(data_set->resources, value); return unpack_rsc_location(xml_obj, rsc_lh, NULL, NULL, data_set); } value = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE"-pattern"); if(value) { regex_t *r_patt = calloc(1, sizeof(regex_t)); bool invert = FALSE; GListPtr rIter = NULL; if(value[0] == '!') { value++; invert = TRUE; } if (regcomp(r_patt, value, REG_EXTENDED)) { crm_config_err("Bad regex '%s' for constraint '%s'\n", value, id); regfree(r_patt); free(r_patt); return FALSE; } for (rIter = data_set->resources; rIter; rIter = rIter->next) { resource_t *r = rIter->data; int status = regexec(r_patt, r->id, 0, NULL, 0); if(invert == FALSE && status == 0) { crm_debug("'%s' matched '%s' for %s", r->id, value, id); unpack_rsc_location(xml_obj, r, NULL, NULL, data_set); } if(invert && status != 0) { crm_debug("'%s' is an inverted match of '%s' for %s", r->id, value, id); unpack_rsc_location(xml_obj, r, NULL, NULL, data_set); } else { crm_trace("'%s' does not match '%s' for %s", r->id, value, id); } } regfree(r_patt); free(r_patt); } return FALSE; } gboolean unpack_rsc_location(xmlNode * xml_obj, resource_t * rsc_lh, const char * role, const char * score, pe_working_set_t * data_set) { gboolean empty = TRUE; rsc_to_node_t *location = NULL; const char *id_lh = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE); const char *id = crm_element_value(xml_obj, XML_ATTR_ID); const char *node = crm_element_value(xml_obj, XML_CIB_TAG_NODE); + const char *discovery = crm_element_value(xml_obj, XML_LOCATION_ATTR_DISCOVERY); if (rsc_lh == NULL) { /* only a warn as BSC adds the constraint then the resource */ crm_config_warn("No resource (con=%s, rsc=%s)", id, id_lh); return FALSE; } if (score == NULL) { score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE); } if (node != NULL && score != NULL) { int score_i = char2score(score); node_t *match = pe_find_node(data_set->nodes, node); if (!match) { return FALSE; } - location = rsc2node_new(id, rsc_lh, score_i, match, data_set); + location = rsc2node_new(id, rsc_lh, score_i, discovery, match, data_set); } else { xmlNode *rule_xml = NULL; for (rule_xml = __xml_first_child(xml_obj); rule_xml != NULL; rule_xml = __xml_next(rule_xml)) { if (crm_str_eq((const char *)rule_xml->name, XML_TAG_RULE, TRUE)) { empty = FALSE; crm_trace("Unpacking %s/%s", id, ID(rule_xml)); generate_location_rule(rsc_lh, rule_xml, data_set); } } if (empty) { crm_config_err("Invalid location constraint %s:" " rsc_location must contain at least one rule", ID(xml_obj)); } } if (role == NULL) { role = crm_element_value(xml_obj, XML_RULE_ATTR_ROLE); } if (location && role) { if (text2role(role) == RSC_ROLE_UNKNOWN) { pe_err("Invalid constraint %s: Bad role %s", id, role); return FALSE; } else { enum rsc_role_e r = text2role(role); switch(r) { case RSC_ROLE_UNKNOWN: case RSC_ROLE_STARTED: case RSC_ROLE_SLAVE: /* Applies to all */ location->role_filter = RSC_ROLE_UNKNOWN; break; default: location->role_filter = r; break; } } } + return TRUE; } static gboolean unpack_location_tags(xmlNode * xml_obj, xmlNode ** expanded_xml, pe_working_set_t * data_set) { const char *id = NULL; const char *id_lh = NULL; const char *state_lh = NULL; resource_t *rsc_lh = NULL; tag_t *tag_lh = NULL; xmlNode *new_xml = NULL; xmlNode *rsc_set_lh = NULL; gboolean any_sets = FALSE; *expanded_xml = NULL; if (xml_obj == NULL) { crm_config_err("No constraint object to process."); return FALSE; } id = crm_element_value(xml_obj, XML_ATTR_ID); if (id == NULL) { crm_config_err("%s constraint must have an id", crm_element_name(xml_obj)); return FALSE; } /* Attempt to expand any template/tag references in possible resource sets. */ expand_tags_in_sets(xml_obj, &new_xml, data_set); if (new_xml) { /* There are resource sets referencing templates. Return with the expanded XML. */ crm_log_xml_trace(new_xml, "Expanded rsc_location..."); *expanded_xml = new_xml; return TRUE; } id_lh = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE); if (id_lh == NULL) { return TRUE; } if (valid_resource_or_tag(data_set, id_lh, &rsc_lh, &tag_lh) == FALSE) { crm_config_err("Constraint '%s': Invalid reference to '%s'", id, id_lh); return FALSE; } else if (rsc_lh) { /* No template is referenced. */ return TRUE; } state_lh = crm_element_value(xml_obj, XML_RULE_ATTR_ROLE); new_xml = copy_xml(xml_obj); /* Convert the template/tag reference in "rsc" into a resource_set under the rsc_location constraint. */ if (tag_to_set(new_xml, &rsc_set_lh, XML_COLOC_ATTR_SOURCE, FALSE, data_set) == FALSE) { free_xml(new_xml); return FALSE; } if (rsc_set_lh) { if (state_lh) { /* A "rsc-role" is specified. Move it into the converted resource_set as a "role"" attribute. */ crm_xml_add(rsc_set_lh, "role", state_lh); xml_remove_prop(new_xml, XML_RULE_ATTR_ROLE); } any_sets = TRUE; } if (any_sets) { crm_log_xml_trace(new_xml, "Expanded rsc_location..."); *expanded_xml = new_xml; } else { free_xml(new_xml); } return TRUE; } static gboolean unpack_location_set(xmlNode * location, xmlNode * set, pe_working_set_t * data_set) { xmlNode *xml_rsc = NULL; resource_t *resource = NULL; const char *set_id = ID(set); const char *role = crm_element_value(set, "role"); const char *local_score = crm_element_value(set, XML_RULE_ATTR_SCORE); if (set == NULL) { crm_config_err("No resource_set object to process."); return FALSE; } if (set_id == NULL) { crm_config_err("resource_set must have an id"); return FALSE; } for (xml_rsc = __xml_first_child(set); xml_rsc != NULL; xml_rsc = __xml_next(xml_rsc)) { if (crm_str_eq((const char *)xml_rsc->name, XML_TAG_RESOURCE_REF, TRUE)) { EXPAND_CONSTRAINT_IDREF(set_id, resource, ID(xml_rsc)); unpack_rsc_location(location, resource, role, local_score, data_set); } } return TRUE; } gboolean unpack_location(xmlNode * xml_obj, pe_working_set_t * data_set) { xmlNode *set = NULL; gboolean any_sets = FALSE; xmlNode *orig_xml = NULL; xmlNode *expanded_xml = NULL; const char *id = crm_element_value(xml_obj, XML_ATTR_ID); gboolean rc = TRUE; if (xml_obj == NULL) { crm_config_err("No rsc_location constraint object to process."); return FALSE; } if (id == NULL) { crm_config_err("%s constraint must have an id", crm_element_name(xml_obj)); return FALSE; } rc = unpack_location_tags(xml_obj, &expanded_xml, data_set); if (expanded_xml) { orig_xml = xml_obj; xml_obj = expanded_xml; } else if (rc == FALSE) { return FALSE; } for (set = __xml_first_child(xml_obj); set != NULL; set = __xml_next(set)) { if (crm_str_eq((const char *)set->name, XML_CONS_TAG_RSC_SET, TRUE)) { any_sets = TRUE; set = expand_idref(set, data_set->input); if (unpack_location_set(xml_obj, set, data_set) == FALSE) { return FALSE; } } } if (expanded_xml) { free_xml(expanded_xml); xml_obj = orig_xml; } if (any_sets == FALSE) { return unpack_simple_location(xml_obj, data_set); } return TRUE; } static int get_node_score(const char *rule, const char *score, gboolean raw, node_t * node) { int score_f = 0; if (score == NULL) { pe_err("Rule %s: no score specified. Assuming 0.", rule); } else if (raw) { score_f = char2score(score); } else { const char *attr_score = g_hash_table_lookup(node->details->attrs, score); if (attr_score == NULL) { crm_debug("Rule %s: node %s did not have a value for %s", rule, node->details->uname, score); score_f = -INFINITY; } else { crm_debug("Rule %s: node %s had value %s for %s", rule, node->details->uname, attr_score, score); score_f = char2score(attr_score); } } return score_f; } rsc_to_node_t * generate_location_rule(resource_t * rsc, xmlNode * rule_xml, pe_working_set_t * data_set) { const char *rule_id = NULL; const char *score = NULL; const char *boolean = NULL; const char *role = NULL; GListPtr gIter = NULL; GListPtr match_L = NULL; gboolean do_and = TRUE; gboolean accept = TRUE; gboolean raw_score = TRUE; rsc_to_node_t *location_rule = NULL; rule_xml = expand_idref(rule_xml, data_set->input); rule_id = crm_element_value(rule_xml, XML_ATTR_ID); boolean = crm_element_value(rule_xml, XML_RULE_ATTR_BOOLEAN_OP); role = crm_element_value(rule_xml, XML_RULE_ATTR_ROLE); crm_trace("Processing rule: %s", rule_id); if (role != NULL && text2role(role) == RSC_ROLE_UNKNOWN) { pe_err("Bad role specified for %s: %s", rule_id, role); return NULL; } score = crm_element_value(rule_xml, XML_RULE_ATTR_SCORE); if (score == NULL) { score = crm_element_value(rule_xml, XML_RULE_ATTR_SCORE_ATTRIBUTE); if (score == NULL) { score = crm_element_value(rule_xml, XML_RULE_ATTR_SCORE_MANGLED); } if (score != NULL) { raw_score = FALSE; } } if (safe_str_eq(boolean, "or")) { do_and = FALSE; } - location_rule = rsc2node_new(rule_id, rsc, 0, NULL, data_set); + location_rule = rsc2node_new(rule_id, rsc, 0, NULL, NULL, data_set); if (location_rule == NULL) { return NULL; } if (role != NULL) { crm_trace("Setting role filter: %s", role); location_rule->role_filter = text2role(role); if (location_rule->role_filter == RSC_ROLE_SLAVE) { /* Any master/slave cannot be promoted without being a slave first * Ergo, any constraint for the slave role applies to every role */ location_rule->role_filter = RSC_ROLE_UNKNOWN; } } if (do_and) { GListPtr gIter = NULL; match_L = node_list_dup(data_set->nodes, TRUE, FALSE); for (gIter = match_L; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; node->weight = get_node_score(rule_id, score, raw_score, node); } } for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { int score_f = 0; node_t *node = (node_t *) gIter->data; accept = test_rule(rule_xml, node->details->attrs, RSC_ROLE_UNKNOWN, data_set->now); crm_trace("Rule %s %s on %s", ID(rule_xml), accept ? "passed" : "failed", node->details->uname); score_f = get_node_score(rule_id, score, raw_score, node); /* if(accept && score_f == -INFINITY) { */ /* accept = FALSE; */ /* } */ if (accept) { node_t *local = pe_find_node_id(match_L, node->details->id); if (local == NULL && do_and) { continue; } else if (local == NULL) { local = node_copy(node); match_L = g_list_append(match_L, local); } if (do_and == FALSE) { local->weight = merge_weights(local->weight, score_f); } crm_trace("node %s now has weight %d", node->details->uname, local->weight); } else if (do_and && !accept) { /* remove it */ node_t *delete = pe_find_node_id(match_L, node->details->id); if (delete != NULL) { match_L = g_list_remove(match_L, delete); crm_trace("node %s did not match", node->details->uname); } free(delete); } } location_rule->node_list_rh = match_L; if (location_rule->node_list_rh == NULL) { crm_trace("No matching nodes for rule %s", rule_id); return NULL; } crm_trace("%s: %d nodes matched", rule_id, g_list_length(location_rule->node_list_rh)); return location_rule; } static gint sort_cons_priority_lh(gconstpointer a, gconstpointer b) { const rsc_colocation_t *rsc_constraint1 = (const rsc_colocation_t *)a; const rsc_colocation_t *rsc_constraint2 = (const rsc_colocation_t *)b; if (a == NULL) { return 1; } if (b == NULL) { return -1; } CRM_ASSERT(rsc_constraint1->rsc_lh != NULL); CRM_ASSERT(rsc_constraint1->rsc_rh != NULL); if (rsc_constraint1->rsc_lh->priority > rsc_constraint2->rsc_lh->priority) { return -1; } if (rsc_constraint1->rsc_lh->priority < rsc_constraint2->rsc_lh->priority) { return 1; } /* Process clones before primitives and groups */ if (rsc_constraint1->rsc_lh->variant > rsc_constraint2->rsc_lh->variant) { return -1; } else if (rsc_constraint1->rsc_lh->variant < rsc_constraint2->rsc_lh->variant) { return 1; } return strcmp(rsc_constraint1->rsc_lh->id, rsc_constraint2->rsc_lh->id); } static gint sort_cons_priority_rh(gconstpointer a, gconstpointer b) { const rsc_colocation_t *rsc_constraint1 = (const rsc_colocation_t *)a; const rsc_colocation_t *rsc_constraint2 = (const rsc_colocation_t *)b; if (a == NULL) { return 1; } if (b == NULL) { return -1; } CRM_ASSERT(rsc_constraint1->rsc_lh != NULL); CRM_ASSERT(rsc_constraint1->rsc_rh != NULL); if (rsc_constraint1->rsc_rh->priority > rsc_constraint2->rsc_rh->priority) { return -1; } if (rsc_constraint1->rsc_rh->priority < rsc_constraint2->rsc_rh->priority) { return 1; } /* Process clones before primitives and groups */ if (rsc_constraint1->rsc_rh->variant > rsc_constraint2->rsc_rh->variant) { return -1; } else if (rsc_constraint1->rsc_rh->variant < rsc_constraint2->rsc_rh->variant) { return 1; } return strcmp(rsc_constraint1->rsc_rh->id, rsc_constraint2->rsc_rh->id); } gboolean rsc_colocation_new(const char *id, const char *node_attr, int score, resource_t * rsc_lh, resource_t * rsc_rh, const char *state_lh, const char *state_rh, pe_working_set_t * data_set) { rsc_colocation_t *new_con = NULL; if (rsc_lh == NULL) { crm_config_err("No resource found for LHS %s", id); return FALSE; } else if (rsc_rh == NULL) { crm_config_err("No resource found for RHS of %s", id); return FALSE; } new_con = calloc(1, sizeof(rsc_colocation_t)); if (new_con == NULL) { return FALSE; } if (state_lh == NULL || safe_str_eq(state_lh, RSC_ROLE_STARTED_S)) { state_lh = RSC_ROLE_UNKNOWN_S; } if (state_rh == NULL || safe_str_eq(state_rh, RSC_ROLE_STARTED_S)) { state_rh = RSC_ROLE_UNKNOWN_S; } new_con->id = id; new_con->rsc_lh = rsc_lh; new_con->rsc_rh = rsc_rh; new_con->score = score; new_con->role_lh = text2role(state_lh); new_con->role_rh = text2role(state_rh); new_con->node_attribute = node_attr; if (node_attr == NULL) { node_attr = "#" XML_ATTR_UNAME; } pe_rsc_trace(rsc_lh, "%s ==> %s (%s %d)", rsc_lh->id, rsc_rh->id, node_attr, score); rsc_lh->rsc_cons = g_list_insert_sorted(rsc_lh->rsc_cons, new_con, sort_cons_priority_rh); rsc_rh->rsc_cons_lhs = g_list_insert_sorted(rsc_rh->rsc_cons_lhs, new_con, sort_cons_priority_lh); data_set->colocation_constraints = g_list_append(data_set->colocation_constraints, new_con); if (score <= -INFINITY) { new_rsc_order(rsc_lh, CRMD_ACTION_STOP, rsc_rh, CRMD_ACTION_START, pe_order_anti_colocation, data_set); new_rsc_order(rsc_rh, CRMD_ACTION_STOP, rsc_lh, CRMD_ACTION_START, pe_order_anti_colocation, data_set); } return TRUE; } /* LHS before RHS */ int new_rsc_order(resource_t * lh_rsc, const char *lh_task, resource_t * rh_rsc, const char *rh_task, enum pe_ordering type, pe_working_set_t * data_set) { char *lh_key = NULL; char *rh_key = NULL; CRM_CHECK(lh_rsc != NULL, return -1); CRM_CHECK(lh_task != NULL, return -1); CRM_CHECK(rh_rsc != NULL, return -1); CRM_CHECK(rh_task != NULL, return -1); /* We no longer need to test if these reference stonith resources * now that stonithd has access to them even when they're not "running" * if (validate_order_resources(lh_rsc, lh_task, rh_rsc, rh_task)) { return -1; } */ lh_key = generate_op_key(lh_rsc->id, lh_task, 0); rh_key = generate_op_key(rh_rsc->id, rh_task, 0); return custom_action_order(lh_rsc, lh_key, NULL, rh_rsc, rh_key, NULL, type, data_set); } static char * task_from_action_or_key(action_t *action, const char *key) { char *res = NULL; char *rsc_id = NULL; char *op_type = NULL; int interval = 0; if (action) { res = strdup(action->task); } else if (key) { int rc = 0; rc = parse_op_key(key, &rsc_id, &op_type, &interval); if (rc == TRUE) { res = op_type; op_type = NULL; } free(rsc_id); free(op_type); } return res; } /* when order constraints are made between two resources start and stop actions * those constraints have to be mirrored against the corresponding * migration actions to ensure start/stop ordering is preserved during * a migration */ static void handle_migration_ordering(order_constraint_t *order, pe_working_set_t *data_set) { char *lh_task = NULL; char *rh_task = NULL; gboolean rh_migratable; gboolean lh_migratable; if (order->lh_rsc == NULL || order->rh_rsc == NULL) { return; } else if (order->lh_rsc == order->rh_rsc) { return; /* don't mess with those constraints built between parent * resources and the children */ } else if (is_parent(order->lh_rsc, order->rh_rsc)) { return; } else if (is_parent(order->rh_rsc, order->lh_rsc)) { return; } lh_migratable = is_set(order->lh_rsc->flags, pe_rsc_allow_migrate); rh_migratable = is_set(order->rh_rsc->flags, pe_rsc_allow_migrate); /* one of them has to be migratable for * the migrate ordering logic to be applied */ if (lh_migratable == FALSE && rh_migratable == FALSE) { return; } /* at this point we have two resources which allow migrations that have an * order dependency set between them. If those order dependencies involve * start/stop actions, we need to mirror the corresponding migrate actions * so order will be preserved. */ lh_task = task_from_action_or_key(order->lh_action, order->lh_action_task); rh_task = task_from_action_or_key(order->rh_action, order->rh_action_task); if (lh_task == NULL || rh_task == NULL) { goto cleanup_order; } if (safe_str_eq(lh_task, RSC_START) && safe_str_eq(rh_task, RSC_START)) { int flags = pe_order_optional; if (lh_migratable && rh_migratable) { /* A start then B start * A migrate_from then B migrate_to */ custom_action_order(order->lh_rsc, generate_op_key(order->lh_rsc->id, RSC_MIGRATED, 0), NULL, order->rh_rsc, generate_op_key(order->rh_rsc->id, RSC_MIGRATE, 0), NULL, flags, data_set); } if (rh_migratable) { if (lh_migratable) { flags |= pe_order_apply_first_non_migratable; } /* A start then B start * A start then B migrate_to... only if A start is not a part of a migration*/ custom_action_order(order->lh_rsc, generate_op_key(order->lh_rsc->id, RSC_START, 0), NULL, order->rh_rsc, generate_op_key(order->rh_rsc->id, RSC_MIGRATE, 0), NULL, flags, data_set); } } else if (rh_migratable == TRUE && safe_str_eq(lh_task, RSC_STOP) && safe_str_eq(rh_task, RSC_STOP)) { int flags = pe_order_optional; if (lh_migratable) { flags |= pe_order_apply_first_non_migratable; } /* rh side is at the bottom of the stack during a stop. If we have a constraint * stop B then stop A, if B is migrating via stop/start, and A is migrating using migration actions, * we need to enforce that A's migrate_to action occurs after B's stop action. */ custom_action_order(order->lh_rsc, generate_op_key(order->lh_rsc->id, RSC_STOP, 0), NULL, order->rh_rsc, generate_op_key(order->rh_rsc->id, RSC_MIGRATE, 0), NULL, flags, data_set); /* We need to build the stop constraint against migrate_from as well * to account for partial migrations. */ if (order->rh_rsc->partial_migration_target) { custom_action_order(order->lh_rsc, generate_op_key(order->lh_rsc->id, RSC_STOP, 0), NULL, order->rh_rsc, generate_op_key(order->rh_rsc->id, RSC_MIGRATED, 0), NULL, flags, data_set); } } cleanup_order: free(lh_task); free(rh_task); } /* LHS before RHS */ int custom_action_order(resource_t * lh_rsc, char *lh_action_task, action_t * lh_action, resource_t * rh_rsc, char *rh_action_task, action_t * rh_action, enum pe_ordering type, pe_working_set_t * data_set) { order_constraint_t *order = NULL; if (lh_rsc == NULL && lh_action) { lh_rsc = lh_action->rsc; } if (rh_rsc == NULL && rh_action) { rh_rsc = rh_action->rsc; } if ((lh_action == NULL && lh_rsc == NULL) || (rh_action == NULL && rh_rsc == NULL)) { crm_config_err("Invalid inputs %p.%p %p.%p", lh_rsc, lh_action, rh_rsc, rh_action); free(lh_action_task); free(rh_action_task); return -1; } order = calloc(1, sizeof(order_constraint_t)); order->id = data_set->order_id++; order->type = type; order->lh_rsc = lh_rsc; order->rh_rsc = rh_rsc; order->lh_action = lh_action; order->rh_action = rh_action; order->lh_action_task = lh_action_task; order->rh_action_task = rh_action_task; if (order->lh_action_task == NULL && lh_action) { order->lh_action_task = strdup(lh_action->uuid); } if (order->rh_action_task == NULL && rh_action) { order->rh_action_task = strdup(rh_action->uuid); } if (order->lh_rsc == NULL && lh_action) { order->lh_rsc = lh_action->rsc; } if (order->rh_rsc == NULL && rh_action) { order->rh_rsc = rh_action->rsc; } data_set->ordering_constraints = g_list_prepend(data_set->ordering_constraints, order); handle_migration_ordering(order, data_set); return order->id; } enum pe_ordering get_asymmetrical_flags(enum pe_order_kind kind) { enum pe_ordering flags = pe_order_optional; if (kind == pe_order_kind_mandatory) { flags |= pe_order_asymmetrical; } else if (kind == pe_order_kind_serialize) { flags |= pe_order_serialize_only; } return flags; } enum pe_ordering get_flags(const char *id, enum pe_order_kind kind, const char *action_first, const char *action_then, gboolean invert) { enum pe_ordering flags = pe_order_optional; if (invert && kind == pe_order_kind_mandatory) { crm_trace("Upgrade %s: implies left", id); flags |= pe_order_implies_first; } else if (kind == pe_order_kind_mandatory) { crm_trace("Upgrade %s: implies right", id); flags |= pe_order_implies_then; if (safe_str_eq(action_first, RSC_START) || safe_str_eq(action_first, RSC_PROMOTE)) { crm_trace("Upgrade %s: runnable", id); flags |= pe_order_runnable_left; } } else if (kind == pe_order_kind_serialize) { flags |= pe_order_serialize_only; } return flags; } static gboolean unpack_order_set(xmlNode * set, enum pe_order_kind kind, resource_t ** rsc, action_t ** begin, action_t ** end, action_t ** inv_begin, action_t ** inv_end, const char *symmetrical, pe_working_set_t * data_set) { xmlNode *xml_rsc = NULL; GListPtr set_iter = NULL; GListPtr resources = NULL; resource_t *last = NULL; resource_t *resource = NULL; int local_kind = kind; gboolean sequential = FALSE; enum pe_ordering flags = pe_order_optional; char *key = NULL; const char *id = ID(set); const char *action = crm_element_value(set, "action"); const char *sequential_s = crm_element_value(set, "sequential"); const char *kind_s = crm_element_value(set, XML_ORDER_ATTR_KIND); /* char *pseudo_id = NULL; char *end_id = NULL; char *begin_id = NULL; */ if (action == NULL) { action = RSC_START; } if (kind_s) { local_kind = get_ordering_type(set); } if (sequential_s == NULL) { sequential_s = "1"; } sequential = crm_is_true(sequential_s); if (crm_is_true(symmetrical)) { flags = get_flags(id, local_kind, action, action, FALSE); } else { flags = get_asymmetrical_flags(local_kind); } for (xml_rsc = __xml_first_child(set); xml_rsc != NULL; xml_rsc = __xml_next(xml_rsc)) { if (crm_str_eq((const char *)xml_rsc->name, XML_TAG_RESOURCE_REF, TRUE)) { EXPAND_CONSTRAINT_IDREF(id, resource, ID(xml_rsc)); resources = g_list_append(resources, resource); } } if (g_list_length(resources) == 1) { crm_trace("Single set: %s", id); *rsc = resource; *end = NULL; *begin = NULL; *inv_end = NULL; *inv_begin = NULL; goto done; } /* pseudo_id = crm_concat(id, action, '-'); end_id = crm_concat(pseudo_id, "end", '-'); begin_id = crm_concat(pseudo_id, "begin", '-'); */ *rsc = NULL; /* *end = get_pseudo_op(end_id, data_set); *begin = get_pseudo_op(begin_id, data_set); free(pseudo_id); free(begin_id); free(end_id); */ set_iter = resources; while (set_iter != NULL) { resource = (resource_t *) set_iter->data; set_iter = set_iter->next; key = generate_op_key(resource->id, action, 0); /* custom_action_order(NULL, NULL, *begin, resource, strdup(key), NULL, flags|pe_order_implies_first_printed, data_set); custom_action_order(resource, strdup(key), NULL, NULL, NULL, *end, flags|pe_order_implies_then_printed, data_set); */ if (local_kind == pe_order_kind_serialize) { /* Serialize before everything that comes after */ GListPtr gIter = NULL; for (gIter = set_iter; gIter != NULL; gIter = gIter->next) { resource_t *then_rsc = (resource_t *) gIter->data; char *then_key = generate_op_key(then_rsc->id, action, 0); custom_action_order(resource, strdup(key), NULL, then_rsc, then_key, NULL, flags, data_set); } } else if (sequential) { if (last != NULL) { new_rsc_order(last, action, resource, action, flags, data_set); } last = resource; } free(key); } if (crm_is_true(symmetrical) == FALSE) { goto done; } else if (symmetrical && local_kind == pe_order_kind_serialize) { crm_config_warn("Cannot invert serialized constraint set %s", id); goto done; } else if (local_kind == pe_order_kind_serialize) { goto done; } last = NULL; action = invert_action(action); /* pseudo_id = crm_concat(id, action, '-'); end_id = crm_concat(pseudo_id, "end", '-'); begin_id = crm_concat(pseudo_id, "begin", '-'); *inv_end = get_pseudo_op(end_id, data_set); *inv_begin = get_pseudo_op(begin_id, data_set); free(pseudo_id); free(begin_id); free(end_id); */ flags = get_flags(id, local_kind, action, action, TRUE); set_iter = resources; while (set_iter != NULL) { resource = (resource_t *) set_iter->data; set_iter = set_iter->next; /* key = generate_op_key(resource->id, action, 0); custom_action_order(NULL, NULL, *inv_begin, resource, strdup(key), NULL, flags|pe_order_implies_first_printed, data_set); custom_action_order(resource, key, NULL, NULL, NULL, *inv_end, flags|pe_order_implies_then_printed, data_set); */ if (sequential) { if (last != NULL) { new_rsc_order(resource, action, last, action, flags, data_set); } last = resource; } } done: g_list_free(resources); return TRUE; } static gboolean order_rsc_sets(const char *id, xmlNode * set1, xmlNode * set2, enum pe_order_kind kind, pe_working_set_t * data_set, gboolean invert, gboolean symmetrical) { xmlNode *xml_rsc = NULL; resource_t *rsc_1 = NULL; resource_t *rsc_2 = NULL; const char *action_1 = crm_element_value(set1, "action"); const char *action_2 = crm_element_value(set2, "action"); const char *sequential_1 = crm_element_value(set1, "sequential"); const char *sequential_2 = crm_element_value(set2, "sequential"); const char *require_all_s = crm_element_value(set1, "require-all"); gboolean require_all = require_all_s ? crm_is_true(require_all_s) : TRUE; enum pe_ordering flags = pe_order_none; if (action_1 == NULL) { action_1 = RSC_START; }; if (action_2 == NULL) { action_2 = RSC_START; }; if (invert) { action_1 = invert_action(action_1); action_2 = invert_action(action_2); } if(safe_str_eq(RSC_STOP, action_1) || safe_str_eq(RSC_DEMOTE, action_1)) { /* Assuming: A -> ( B || C) -> D * The one-or-more logic only applies during the start/promote phase * During shutdown neither B nor can shutdown until D is down, so simply turn require_all back on. */ require_all = TRUE; } if (symmetrical == FALSE) { flags = get_asymmetrical_flags(kind); } else { flags = get_flags(id, kind, action_2, action_1, invert); } /* If we have an un-ordered set1, whether it is sequential or not is irrelevant in regards to set2. */ if (!require_all) { char *task = crm_concat(CRM_OP_RELAXED_SET, ID(set1), ':'); action_t *unordered_action = get_pseudo_op(task, data_set); free(task); update_action_flags(unordered_action, pe_action_requires_any); for (xml_rsc = __xml_first_child(set1); xml_rsc != NULL; xml_rsc = __xml_next(xml_rsc)) { xmlNode *xml_rsc_2 = NULL; if (!crm_str_eq((const char *)xml_rsc->name, XML_TAG_RESOURCE_REF, TRUE)) { continue; } EXPAND_CONSTRAINT_IDREF(id, rsc_1, ID(xml_rsc)); /* Add an ordering constraint between every element in set1 and the pseudo action. * If any action in set1 is runnable the pseudo action will be runnable. */ custom_action_order(rsc_1, generate_op_key(rsc_1->id, action_1, 0), NULL, NULL, NULL, unordered_action, pe_order_one_or_more | pe_order_implies_then_printed, data_set); for (xml_rsc_2 = __xml_first_child(set2); xml_rsc_2 != NULL; xml_rsc_2 = __xml_next(xml_rsc_2)) { if (!crm_str_eq((const char *)xml_rsc_2->name, XML_TAG_RESOURCE_REF, TRUE)) { continue; } EXPAND_CONSTRAINT_IDREF(id, rsc_2, ID(xml_rsc_2)); /* Add an ordering constraint between the pseudo action and every element in set2. * If the pseudo action is runnable, every action in set2 will be runnable */ custom_action_order(NULL, NULL, unordered_action, rsc_2, generate_op_key(rsc_2->id, action_2, 0), NULL, flags | pe_order_runnable_left, data_set); } } return TRUE; } if (crm_is_true(sequential_1)) { if (invert == FALSE) { /* get the last one */ const char *rid = NULL; for (xml_rsc = __xml_first_child(set1); xml_rsc != NULL; xml_rsc = __xml_next(xml_rsc)) { if (crm_str_eq((const char *)xml_rsc->name, XML_TAG_RESOURCE_REF, TRUE)) { rid = ID(xml_rsc); } } EXPAND_CONSTRAINT_IDREF(id, rsc_1, rid); } else { /* get the first one */ for (xml_rsc = __xml_first_child(set1); xml_rsc != NULL; xml_rsc = __xml_next(xml_rsc)) { if (crm_str_eq((const char *)xml_rsc->name, XML_TAG_RESOURCE_REF, TRUE)) { EXPAND_CONSTRAINT_IDREF(id, rsc_1, ID(xml_rsc)); break; } } } } if (crm_is_true(sequential_2)) { if (invert == FALSE) { /* get the first one */ for (xml_rsc = __xml_first_child(set2); xml_rsc != NULL; xml_rsc = __xml_next(xml_rsc)) { if (crm_str_eq((const char *)xml_rsc->name, XML_TAG_RESOURCE_REF, TRUE)) { EXPAND_CONSTRAINT_IDREF(id, rsc_2, ID(xml_rsc)); break; } } } else { /* get the last one */ const char *rid = NULL; for (xml_rsc = __xml_first_child(set2); xml_rsc != NULL; xml_rsc = __xml_next(xml_rsc)) { if (crm_str_eq((const char *)xml_rsc->name, XML_TAG_RESOURCE_REF, TRUE)) { rid = ID(xml_rsc); } } EXPAND_CONSTRAINT_IDREF(id, rsc_2, rid); } } if (rsc_1 != NULL && rsc_2 != NULL) { new_rsc_order(rsc_1, action_1, rsc_2, action_2, flags, data_set); } else if (rsc_1 != NULL) { for (xml_rsc = __xml_first_child(set2); xml_rsc != NULL; xml_rsc = __xml_next(xml_rsc)) { if (crm_str_eq((const char *)xml_rsc->name, XML_TAG_RESOURCE_REF, TRUE)) { EXPAND_CONSTRAINT_IDREF(id, rsc_2, ID(xml_rsc)); new_rsc_order(rsc_1, action_1, rsc_2, action_2, flags, data_set); } } } else if (rsc_2 != NULL) { xmlNode *xml_rsc = NULL; for (xml_rsc = __xml_first_child(set1); xml_rsc != NULL; xml_rsc = __xml_next(xml_rsc)) { if (crm_str_eq((const char *)xml_rsc->name, XML_TAG_RESOURCE_REF, TRUE)) { EXPAND_CONSTRAINT_IDREF(id, rsc_1, ID(xml_rsc)); new_rsc_order(rsc_1, action_1, rsc_2, action_2, flags, data_set); } } } else { for (xml_rsc = __xml_first_child(set1); xml_rsc != NULL; xml_rsc = __xml_next(xml_rsc)) { if (crm_str_eq((const char *)xml_rsc->name, XML_TAG_RESOURCE_REF, TRUE)) { xmlNode *xml_rsc_2 = NULL; EXPAND_CONSTRAINT_IDREF(id, rsc_1, ID(xml_rsc)); for (xml_rsc_2 = __xml_first_child(set2); xml_rsc_2 != NULL; xml_rsc_2 = __xml_next(xml_rsc_2)) { if (crm_str_eq((const char *)xml_rsc_2->name, XML_TAG_RESOURCE_REF, TRUE)) { EXPAND_CONSTRAINT_IDREF(id, rsc_2, ID(xml_rsc_2)); new_rsc_order(rsc_1, action_1, rsc_2, action_2, flags, data_set); } } } } } return TRUE; } static gboolean unpack_order_tags(xmlNode * xml_obj, xmlNode ** expanded_xml, pe_working_set_t * data_set) { const char *id = NULL; const char *id_first = NULL; const char *id_then = NULL; const char *action_first = NULL; const char *action_then = NULL; resource_t *rsc_first = NULL; resource_t *rsc_then = NULL; tag_t *tag_first = NULL; tag_t *tag_then = NULL; xmlNode *new_xml = NULL; xmlNode *rsc_set_first = NULL; xmlNode *rsc_set_then = NULL; gboolean any_sets = FALSE; *expanded_xml = NULL; if (xml_obj == NULL) { crm_config_err("No constraint object to process."); return FALSE; } id = crm_element_value(xml_obj, XML_ATTR_ID); if (id == NULL) { crm_config_err("%s constraint must have an id", crm_element_name(xml_obj)); return FALSE; } /* Attempt to expand any template/tag references in possible resource sets. */ expand_tags_in_sets(xml_obj, &new_xml, data_set); if (new_xml) { /* There are resource sets referencing templates/tags. Return with the expanded XML. */ crm_log_xml_trace(new_xml, "Expanded rsc_order..."); *expanded_xml = new_xml; return TRUE; } id_first = crm_element_value(xml_obj, XML_ORDER_ATTR_FIRST); id_then = crm_element_value(xml_obj, XML_ORDER_ATTR_THEN); if (id_first == NULL || id_then == NULL) { return TRUE; } if (valid_resource_or_tag(data_set, id_first, &rsc_first, &tag_first) == FALSE) { crm_config_err("Constraint '%s': Invalid reference to '%s'", id, id_first); return FALSE; } if (valid_resource_or_tag(data_set, id_then, &rsc_then, &tag_then) == FALSE) { crm_config_err("Constraint '%s': Invalid reference to '%s'", id, id_then); return FALSE; } if (rsc_first && rsc_then) { /* Neither side references any template/tag. */ return TRUE; } action_first = crm_element_value(xml_obj, XML_ORDER_ATTR_FIRST_ACTION); action_then = crm_element_value(xml_obj, XML_ORDER_ATTR_THEN_ACTION); new_xml = copy_xml(xml_obj); /* Convert the template/tag reference in "first" into a resource_set under the order constraint. */ if (tag_to_set(new_xml, &rsc_set_first, XML_ORDER_ATTR_FIRST, TRUE, data_set) == FALSE) { free_xml(new_xml); return FALSE; } if (rsc_set_first) { if (action_first) { /* A "first-action" is specified. Move it into the converted resource_set as an "action" attribute. */ crm_xml_add(rsc_set_first, "action", action_first); xml_remove_prop(new_xml, XML_ORDER_ATTR_FIRST_ACTION); } any_sets = TRUE; } /* Convert the template/tag reference in "then" into a resource_set under the order constraint. */ if (tag_to_set(new_xml, &rsc_set_then, XML_ORDER_ATTR_THEN, TRUE, data_set) == FALSE) { free_xml(new_xml); return FALSE; } if (rsc_set_then) { if (action_then) { /* A "then-action" is specified. Move it into the converted resource_set as an "action" attribute. */ crm_xml_add(rsc_set_then, "action", action_then); xml_remove_prop(new_xml, XML_ORDER_ATTR_THEN_ACTION); } any_sets = TRUE; } if (any_sets) { crm_log_xml_trace(new_xml, "Expanded rsc_order..."); *expanded_xml = new_xml; } else { free_xml(new_xml); } return TRUE; } gboolean unpack_rsc_order(xmlNode * xml_obj, pe_working_set_t * data_set) { gboolean any_sets = FALSE; resource_t *rsc = NULL; /* resource_t *last_rsc = NULL; */ action_t *set_end = NULL; action_t *set_begin = NULL; action_t *set_inv_end = NULL; action_t *set_inv_begin = NULL; xmlNode *set = NULL; xmlNode *last = NULL; xmlNode *orig_xml = NULL; xmlNode *expanded_xml = NULL; /* action_t *last_end = NULL; action_t *last_begin = NULL; action_t *last_inv_end = NULL; action_t *last_inv_begin = NULL; */ const char *id = crm_element_value(xml_obj, XML_ATTR_ID); const char *invert = crm_element_value(xml_obj, XML_CONS_ATTR_SYMMETRICAL); enum pe_order_kind kind = get_ordering_type(xml_obj); gboolean invert_bool = TRUE; gboolean rc = TRUE; if (invert == NULL) { invert = "true"; } invert_bool = crm_is_true(invert); rc = unpack_order_tags(xml_obj, &expanded_xml, data_set); if (expanded_xml) { orig_xml = xml_obj; xml_obj = expanded_xml; } else if (rc == FALSE) { return FALSE; } for (set = __xml_first_child(xml_obj); set != NULL; set = __xml_next(set)) { if (crm_str_eq((const char *)set->name, XML_CONS_TAG_RSC_SET, TRUE)) { any_sets = TRUE; set = expand_idref(set, data_set->input); if (unpack_order_set(set, kind, &rsc, &set_begin, &set_end, &set_inv_begin, &set_inv_end, invert, data_set) == FALSE) { return FALSE; /* Expand orders in order_rsc_sets() instead of via pseudo actions. */ /* } else if(last) { const char *set_action = crm_element_value(set, "action"); const char *last_action = crm_element_value(last, "action"); enum pe_ordering flags = get_flags(id, kind, last_action, set_action, FALSE); if(!set_action) { set_action = RSC_START; } if(!last_action) { last_action = RSC_START; } if(rsc == NULL && last_rsc == NULL) { order_actions(last_end, set_begin, flags); } else { custom_action_order( last_rsc, null_or_opkey(last_rsc, last_action), last_end, rsc, null_or_opkey(rsc, set_action), set_begin, flags, data_set); } if(crm_is_true(invert)) { set_action = invert_action(set_action); last_action = invert_action(last_action); flags = get_flags(id, kind, last_action, set_action, TRUE); if(rsc == NULL && last_rsc == NULL) { order_actions(last_inv_begin, set_inv_end, flags); } else { custom_action_order( last_rsc, null_or_opkey(last_rsc, last_action), last_inv_begin, rsc, null_or_opkey(rsc, set_action), set_inv_end, flags, data_set); } } */ } else if ( /* never called -- Now call it for supporting clones in resource sets */ last) { if (order_rsc_sets(id, last, set, kind, data_set, FALSE, invert_bool) == FALSE) { return FALSE; } if (invert_bool && order_rsc_sets(id, set, last, kind, data_set, TRUE, invert_bool) == FALSE) { return FALSE; } } last = set; /* last_rsc = rsc; last_end = set_end; last_begin = set_begin; last_inv_end = set_inv_end; last_inv_begin = set_inv_begin; */ } } if (expanded_xml) { free_xml(expanded_xml); xml_obj = orig_xml; } if (any_sets == FALSE) { return unpack_simple_rsc_order(xml_obj, data_set); } return TRUE; } static gboolean unpack_colocation_set(xmlNode * set, int score, pe_working_set_t * data_set) { xmlNode *xml_rsc = NULL; resource_t *with = NULL; resource_t *resource = NULL; const char *set_id = ID(set); const char *role = crm_element_value(set, "role"); const char *sequential = crm_element_value(set, "sequential"); const char *ordering = crm_element_value(set, "ordering"); int local_score = score; const char *score_s = crm_element_value(set, XML_RULE_ATTR_SCORE); if (score_s) { local_score = char2score(score_s); } if(ordering == NULL) { ordering = "group"; } if (sequential != NULL && crm_is_true(sequential) == FALSE) { return TRUE; } else if (local_score >= 0 && safe_str_eq(ordering, "group")) { for (xml_rsc = __xml_first_child(set); xml_rsc != NULL; xml_rsc = __xml_next(xml_rsc)) { if (crm_str_eq((const char *)xml_rsc->name, XML_TAG_RESOURCE_REF, TRUE)) { EXPAND_CONSTRAINT_IDREF(set_id, resource, ID(xml_rsc)); if (with != NULL) { pe_rsc_trace(resource, "Colocating %s with %s", resource->id, with->id); rsc_colocation_new(set_id, NULL, local_score, resource, with, role, role, data_set); } with = resource; } } } else if (local_score >= 0) { resource_t *last = NULL; for (xml_rsc = __xml_first_child(set); xml_rsc != NULL; xml_rsc = __xml_next(xml_rsc)) { if (crm_str_eq((const char *)xml_rsc->name, XML_TAG_RESOURCE_REF, TRUE)) { EXPAND_CONSTRAINT_IDREF(set_id, resource, ID(xml_rsc)); if (last != NULL) { pe_rsc_trace(resource, "Colocating %s with %s", last->id, resource->id); rsc_colocation_new(set_id, NULL, local_score, last, resource, role, role, data_set); } last = resource; } } } else { /* Anti-colocating with every prior resource is * the only way to ensure the intuitive result * (ie. that no-one in the set can run with anyone * else in the set) */ for (xml_rsc = __xml_first_child(set); xml_rsc != NULL; xml_rsc = __xml_next(xml_rsc)) { if (crm_str_eq((const char *)xml_rsc->name, XML_TAG_RESOURCE_REF, TRUE)) { xmlNode *xml_rsc_with = NULL; EXPAND_CONSTRAINT_IDREF(set_id, resource, ID(xml_rsc)); for (xml_rsc_with = __xml_first_child(set); xml_rsc_with != NULL; xml_rsc_with = __xml_next(xml_rsc_with)) { if (crm_str_eq((const char *)xml_rsc_with->name, XML_TAG_RESOURCE_REF, TRUE)) { if (safe_str_eq(resource->id, ID(xml_rsc_with))) { break; } else if (resource == NULL) { crm_config_err("%s: No resource found for %s", set_id, ID(xml_rsc_with)); return FALSE; } EXPAND_CONSTRAINT_IDREF(set_id, with, ID(xml_rsc_with)); pe_rsc_trace(resource, "Anti-Colocating %s with %s", resource->id, with->id); rsc_colocation_new(set_id, NULL, local_score, resource, with, role, role, data_set); } } } } } return TRUE; } static gboolean colocate_rsc_sets(const char *id, xmlNode * set1, xmlNode * set2, int score, pe_working_set_t * data_set) { xmlNode *xml_rsc = NULL; resource_t *rsc_1 = NULL; resource_t *rsc_2 = NULL; const char *role_1 = crm_element_value(set1, "role"); const char *role_2 = crm_element_value(set2, "role"); const char *sequential_1 = crm_element_value(set1, "sequential"); const char *sequential_2 = crm_element_value(set2, "sequential"); if (sequential_1 == NULL || crm_is_true(sequential_1)) { /* get the first one */ for (xml_rsc = __xml_first_child(set1); xml_rsc != NULL; xml_rsc = __xml_next(xml_rsc)) { if (crm_str_eq((const char *)xml_rsc->name, XML_TAG_RESOURCE_REF, TRUE)) { EXPAND_CONSTRAINT_IDREF(id, rsc_1, ID(xml_rsc)); break; } } } if (sequential_2 == NULL || crm_is_true(sequential_2)) { /* get the last one */ const char *rid = NULL; for (xml_rsc = __xml_first_child(set2); xml_rsc != NULL; xml_rsc = __xml_next(xml_rsc)) { if (crm_str_eq((const char *)xml_rsc->name, XML_TAG_RESOURCE_REF, TRUE)) { rid = ID(xml_rsc); } } EXPAND_CONSTRAINT_IDREF(id, rsc_2, rid); } if (rsc_1 != NULL && rsc_2 != NULL) { rsc_colocation_new(id, NULL, score, rsc_1, rsc_2, role_1, role_2, data_set); } else if (rsc_1 != NULL) { for (xml_rsc = __xml_first_child(set2); xml_rsc != NULL; xml_rsc = __xml_next(xml_rsc)) { if (crm_str_eq((const char *)xml_rsc->name, XML_TAG_RESOURCE_REF, TRUE)) { EXPAND_CONSTRAINT_IDREF(id, rsc_2, ID(xml_rsc)); rsc_colocation_new(id, NULL, score, rsc_1, rsc_2, role_1, role_2, data_set); } } } else if (rsc_2 != NULL) { for (xml_rsc = __xml_first_child(set1); xml_rsc != NULL; xml_rsc = __xml_next(xml_rsc)) { if (crm_str_eq((const char *)xml_rsc->name, XML_TAG_RESOURCE_REF, TRUE)) { EXPAND_CONSTRAINT_IDREF(id, rsc_1, ID(xml_rsc)); rsc_colocation_new(id, NULL, score, rsc_1, rsc_2, role_1, role_2, data_set); } } } else { for (xml_rsc = __xml_first_child(set1); xml_rsc != NULL; xml_rsc = __xml_next(xml_rsc)) { if (crm_str_eq((const char *)xml_rsc->name, XML_TAG_RESOURCE_REF, TRUE)) { xmlNode *xml_rsc_2 = NULL; EXPAND_CONSTRAINT_IDREF(id, rsc_1, ID(xml_rsc)); for (xml_rsc_2 = __xml_first_child(set2); xml_rsc_2 != NULL; xml_rsc_2 = __xml_next(xml_rsc_2)) { if (crm_str_eq((const char *)xml_rsc_2->name, XML_TAG_RESOURCE_REF, TRUE)) { EXPAND_CONSTRAINT_IDREF(id, rsc_2, ID(xml_rsc_2)); rsc_colocation_new(id, NULL, score, rsc_1, rsc_2, role_1, role_2, data_set); } } } } } return TRUE; } static gboolean unpack_simple_colocation(xmlNode * xml_obj, pe_working_set_t * data_set) { int score_i = 0; const char *id = crm_element_value(xml_obj, XML_ATTR_ID); const char *score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE); const char *id_lh = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE); const char *id_rh = crm_element_value(xml_obj, XML_COLOC_ATTR_TARGET); const char *state_lh = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE_ROLE); const char *state_rh = crm_element_value(xml_obj, XML_COLOC_ATTR_TARGET_ROLE); const char *instance_lh = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE_INSTANCE); const char *instance_rh = crm_element_value(xml_obj, XML_COLOC_ATTR_TARGET_INSTANCE); const char *attr = crm_element_value(xml_obj, XML_COLOC_ATTR_NODE_ATTR); const char *symmetrical = crm_element_value(xml_obj, XML_CONS_ATTR_SYMMETRICAL); resource_t *rsc_lh = pe_find_constraint_resource(data_set->resources, id_lh); resource_t *rsc_rh = pe_find_constraint_resource(data_set->resources, id_rh); if (rsc_lh == NULL) { crm_config_err("Invalid constraint '%s': No resource named '%s'", id, id_lh); return FALSE; } else if (rsc_rh == NULL) { crm_config_err("Invalid constraint '%s': No resource named '%s'", id, id_rh); return FALSE; } else if (instance_lh && rsc_lh->variant < pe_clone) { crm_config_err ("Invalid constraint '%s': Resource '%s' is not a clone but instance %s was requested", id, id_lh, instance_lh); return FALSE; } else if (instance_rh && rsc_rh->variant < pe_clone) { crm_config_err ("Invalid constraint '%s': Resource '%s' is not a clone but instance %s was requested", id, id_rh, instance_rh); return FALSE; } if (instance_lh) { rsc_lh = find_clone_instance(rsc_lh, instance_lh, data_set); if (rsc_lh == NULL) { crm_config_warn("Invalid constraint '%s': No instance '%s' of '%s'", id, instance_lh, id_lh); return FALSE; } } if (instance_rh) { rsc_rh = find_clone_instance(rsc_rh, instance_rh, data_set); if (rsc_rh == NULL) { crm_config_warn("Invalid constraint '%s': No instance '%s' of '%s'", id, instance_rh, id_rh); return FALSE; } } if (crm_is_true(symmetrical)) { crm_config_warn("The %s colocation constraint attribute has been removed." " It didn't do what you think it did anyway.", XML_CONS_ATTR_SYMMETRICAL); } if (score) { score_i = char2score(score); } rsc_colocation_new(id, attr, score_i, rsc_lh, rsc_rh, state_lh, state_rh, data_set); return TRUE; } static gboolean unpack_colocation_tags(xmlNode * xml_obj, xmlNode ** expanded_xml, pe_working_set_t * data_set) { const char *id = NULL; const char *id_lh = NULL; const char *id_rh = NULL; const char *state_lh = NULL; const char *state_rh = NULL; resource_t *rsc_lh = NULL; resource_t *rsc_rh = NULL; tag_t *tag_lh = NULL; tag_t *tag_rh = NULL; xmlNode *new_xml = NULL; xmlNode *rsc_set_lh = NULL; xmlNode *rsc_set_rh = NULL; gboolean any_sets = FALSE; *expanded_xml = NULL; if (xml_obj == NULL) { crm_config_err("No constraint object to process."); return FALSE; } id = crm_element_value(xml_obj, XML_ATTR_ID); if (id == NULL) { crm_config_err("%s constraint must have an id", crm_element_name(xml_obj)); return FALSE; } /* Attempt to expand any template/tag references in possible resource sets. */ expand_tags_in_sets(xml_obj, &new_xml, data_set); if (new_xml) { /* There are resource sets referencing templates/tags. Return with the expanded XML. */ crm_log_xml_trace(new_xml, "Expanded rsc_colocation..."); *expanded_xml = new_xml; return TRUE; } id_lh = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE); id_rh = crm_element_value(xml_obj, XML_COLOC_ATTR_TARGET); if (id_lh == NULL || id_rh == NULL) { return TRUE; } if (valid_resource_or_tag(data_set, id_lh, &rsc_lh, &tag_lh) == FALSE) { crm_config_err("Constraint '%s': Invalid reference to '%s'", id, id_lh); return FALSE; } if (valid_resource_or_tag(data_set, id_rh, &rsc_rh, &tag_rh) == FALSE) { crm_config_err("Constraint '%s': Invalid reference to '%s'", id, id_rh); return FALSE; } if (rsc_lh && rsc_rh) { /* Neither side references any template/tag. */ return TRUE; } if (tag_lh && tag_rh) { /* A colocation constraint between two templates/tags makes no sense. */ crm_config_err("Either LHS or RHS of %s should be a normal resource instead of a template/tag", id); return FALSE; } state_lh = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE_ROLE); state_rh = crm_element_value(xml_obj, XML_COLOC_ATTR_TARGET_ROLE); new_xml = copy_xml(xml_obj); /* Convert the template/tag reference in "rsc" into a resource_set under the colocation constraint. */ if (tag_to_set(new_xml, &rsc_set_lh, XML_COLOC_ATTR_SOURCE, TRUE, data_set) == FALSE) { free_xml(new_xml); return FALSE; } if (rsc_set_lh) { if (state_lh) { /* A "rsc-role" is specified. Move it into the converted resource_set as a "role"" attribute. */ crm_xml_add(rsc_set_lh, "role", state_lh); xml_remove_prop(new_xml, XML_COLOC_ATTR_SOURCE_ROLE); } any_sets = TRUE; } /* Convert the template/tag reference in "with-rsc" into a resource_set under the colocation constraint. */ if (tag_to_set(new_xml, &rsc_set_rh, XML_COLOC_ATTR_TARGET, TRUE, data_set) == FALSE) { free_xml(new_xml); return FALSE; } if (rsc_set_rh) { if (state_rh) { /* A "with-rsc-role" is specified. Move it into the converted resource_set as a "role"" attribute. */ crm_xml_add(rsc_set_rh, "role", state_rh); xml_remove_prop(new_xml, XML_COLOC_ATTR_TARGET_ROLE); } any_sets = TRUE; } if (any_sets) { crm_log_xml_trace(new_xml, "Expanded rsc_colocation..."); *expanded_xml = new_xml; } else { free_xml(new_xml); } return TRUE; } gboolean unpack_rsc_colocation(xmlNode * xml_obj, pe_working_set_t * data_set) { int score_i = 0; xmlNode *set = NULL; xmlNode *last = NULL; gboolean any_sets = FALSE; xmlNode *orig_xml = NULL; xmlNode *expanded_xml = NULL; const char *id = crm_element_value(xml_obj, XML_ATTR_ID); const char *score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE); gboolean rc = TRUE; if (score) { score_i = char2score(score); } rc = unpack_colocation_tags(xml_obj, &expanded_xml, data_set); if (expanded_xml) { orig_xml = xml_obj; xml_obj = expanded_xml; } else if (rc == FALSE) { return FALSE; } for (set = __xml_first_child(xml_obj); set != NULL; set = __xml_next(set)) { if (crm_str_eq((const char *)set->name, XML_CONS_TAG_RSC_SET, TRUE)) { any_sets = TRUE; set = expand_idref(set, data_set->input); if (unpack_colocation_set(set, score_i, data_set) == FALSE) { return FALSE; } else if (last && colocate_rsc_sets(id, last, set, score_i, data_set) == FALSE) { return FALSE; } last = set; } } if (expanded_xml) { free_xml(expanded_xml); xml_obj = orig_xml; } if (any_sets == FALSE) { return unpack_simple_colocation(xml_obj, data_set); } return TRUE; } gboolean rsc_ticket_new(const char *id, resource_t * rsc_lh, ticket_t * ticket, const char *state_lh, const char *loss_policy, pe_working_set_t * data_set) { rsc_ticket_t *new_rsc_ticket = NULL; if (rsc_lh == NULL) { crm_config_err("No resource found for LHS %s", id); return FALSE; } new_rsc_ticket = calloc(1, sizeof(rsc_ticket_t)); if (new_rsc_ticket == NULL) { return FALSE; } if (state_lh == NULL || safe_str_eq(state_lh, RSC_ROLE_STARTED_S)) { state_lh = RSC_ROLE_UNKNOWN_S; } new_rsc_ticket->id = id; new_rsc_ticket->ticket = ticket; new_rsc_ticket->rsc_lh = rsc_lh; new_rsc_ticket->role_lh = text2role(state_lh); if (safe_str_eq(loss_policy, "fence")) { crm_debug("On loss of ticket '%s': Fence the nodes running %s (%s)", new_rsc_ticket->ticket->id, new_rsc_ticket->rsc_lh->id, role2text(new_rsc_ticket->role_lh)); new_rsc_ticket->loss_policy = loss_ticket_fence; } else if (safe_str_eq(loss_policy, "freeze")) { crm_debug("On loss of ticket '%s': Freeze %s (%s)", new_rsc_ticket->ticket->id, new_rsc_ticket->rsc_lh->id, role2text(new_rsc_ticket->role_lh)); new_rsc_ticket->loss_policy = loss_ticket_freeze; } else if (safe_str_eq(loss_policy, "demote")) { crm_debug("On loss of ticket '%s': Demote %s (%s)", new_rsc_ticket->ticket->id, new_rsc_ticket->rsc_lh->id, role2text(new_rsc_ticket->role_lh)); new_rsc_ticket->loss_policy = loss_ticket_demote; } else if (safe_str_eq(loss_policy, "stop")) { crm_debug("On loss of ticket '%s': Stop %s (%s)", new_rsc_ticket->ticket->id, new_rsc_ticket->rsc_lh->id, role2text(new_rsc_ticket->role_lh)); new_rsc_ticket->loss_policy = loss_ticket_stop; } else { if (new_rsc_ticket->role_lh == RSC_ROLE_MASTER) { crm_debug("On loss of ticket '%s': Default to demote %s (%s)", new_rsc_ticket->ticket->id, new_rsc_ticket->rsc_lh->id, role2text(new_rsc_ticket->role_lh)); new_rsc_ticket->loss_policy = loss_ticket_demote; } else { crm_debug("On loss of ticket '%s': Default to stop %s (%s)", new_rsc_ticket->ticket->id, new_rsc_ticket->rsc_lh->id, role2text(new_rsc_ticket->role_lh)); new_rsc_ticket->loss_policy = loss_ticket_stop; } } pe_rsc_trace(rsc_lh, "%s (%s) ==> %s", rsc_lh->id, role2text(new_rsc_ticket->role_lh), ticket->id); rsc_lh->rsc_tickets = g_list_append(rsc_lh->rsc_tickets, new_rsc_ticket); data_set->ticket_constraints = g_list_append(data_set->ticket_constraints, new_rsc_ticket); if (new_rsc_ticket->ticket->granted == FALSE || new_rsc_ticket->ticket->standby) { rsc_ticket_constraint(rsc_lh, new_rsc_ticket, data_set); } return TRUE; } static gboolean unpack_rsc_ticket_set(xmlNode * set, ticket_t * ticket, const char *loss_policy, pe_working_set_t * data_set) { xmlNode *xml_rsc = NULL; resource_t *resource = NULL; const char *set_id = ID(set); const char *role = crm_element_value(set, "role"); if (set == NULL) { crm_config_err("No resource_set object to process."); return FALSE; } if (set_id == NULL) { crm_config_err("resource_set must have an id"); return FALSE; } if (ticket == NULL) { crm_config_err("No dependented ticket specified for '%s'", set_id); return FALSE; } for (xml_rsc = __xml_first_child(set); xml_rsc != NULL; xml_rsc = __xml_next(xml_rsc)) { if (crm_str_eq((const char *)xml_rsc->name, XML_TAG_RESOURCE_REF, TRUE)) { EXPAND_CONSTRAINT_IDREF(set_id, resource, ID(xml_rsc)); pe_rsc_trace(resource, "Resource '%s' depends on ticket '%s'", resource->id, ticket->id); rsc_ticket_new(set_id, resource, ticket, role, loss_policy, data_set); } } return TRUE; } static gboolean unpack_simple_rsc_ticket(xmlNode * xml_obj, pe_working_set_t * data_set) { const char *id = crm_element_value(xml_obj, XML_ATTR_ID); const char *ticket_str = crm_element_value(xml_obj, XML_TICKET_ATTR_TICKET); const char *loss_policy = crm_element_value(xml_obj, XML_TICKET_ATTR_LOSS_POLICY); ticket_t *ticket = NULL; const char *id_lh = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE); const char *state_lh = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE_ROLE); const char *instance_lh = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE_INSTANCE); resource_t *rsc_lh = NULL; if (xml_obj == NULL) { crm_config_err("No rsc_ticket constraint object to process."); return FALSE; } if (id == NULL) { crm_config_err("%s constraint must have an id", crm_element_name(xml_obj)); return FALSE; } if (ticket_str == NULL) { crm_config_err("Invalid constraint '%s': No ticket specified", id); return FALSE; } else { ticket = g_hash_table_lookup(data_set->tickets, ticket_str); } if (ticket == NULL) { crm_config_err("Invalid constraint '%s': No ticket named '%s'", id, ticket_str); return FALSE; } if (id_lh == NULL) { crm_config_err("Invalid constraint '%s': No resource specified", id); return FALSE; } else { rsc_lh = pe_find_constraint_resource(data_set->resources, id_lh); } if (rsc_lh == NULL) { crm_config_err("Invalid constraint '%s': No resource named '%s'", id, id_lh); return FALSE; } else if (instance_lh && rsc_lh->variant < pe_clone) { crm_config_err ("Invalid constraint '%s': Resource '%s' is not a clone but instance %s was requested", id, id_lh, instance_lh); return FALSE; } if (instance_lh) { rsc_lh = find_clone_instance(rsc_lh, instance_lh, data_set); if (rsc_lh == NULL) { crm_config_warn("Invalid constraint '%s': No instance '%s' of '%s'", id, instance_lh, id_lh); return FALSE; } } rsc_ticket_new(id, rsc_lh, ticket, state_lh, loss_policy, data_set); return TRUE; } static gboolean unpack_rsc_ticket_tags(xmlNode * xml_obj, xmlNode ** expanded_xml, pe_working_set_t * data_set) { const char *id = NULL; const char *id_lh = NULL; const char *state_lh = NULL; resource_t *rsc_lh = NULL; tag_t *tag_lh = NULL; xmlNode *new_xml = NULL; xmlNode *rsc_set_lh = NULL; gboolean any_sets = FALSE; *expanded_xml = NULL; if (xml_obj == NULL) { crm_config_err("No constraint object to process."); return FALSE; } id = crm_element_value(xml_obj, XML_ATTR_ID); if (id == NULL) { crm_config_err("%s constraint must have an id", crm_element_name(xml_obj)); return FALSE; } /* Attempt to expand any template/tag references in possible resource sets. */ expand_tags_in_sets(xml_obj, &new_xml, data_set); if (new_xml) { /* There are resource sets referencing templates/tags. Return with the expanded XML. */ crm_log_xml_trace(new_xml, "Expanded rsc_ticket..."); *expanded_xml = new_xml; return TRUE; } id_lh = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE); if (id_lh == NULL) { return TRUE; } if (valid_resource_or_tag(data_set, id_lh, &rsc_lh, &tag_lh) == FALSE) { crm_config_err("Constraint '%s': Invalid reference to '%s'", id, id_lh); return FALSE; } else if (rsc_lh) { /* No template/tag is referenced. */ return TRUE; } state_lh = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE_ROLE); new_xml = copy_xml(xml_obj); /* Convert the template/tag reference in "rsc" into a resource_set under the rsc_ticket constraint. */ if (tag_to_set(new_xml, &rsc_set_lh, XML_COLOC_ATTR_SOURCE, FALSE, data_set) == FALSE) { free_xml(new_xml); return FALSE; } if (rsc_set_lh) { if (state_lh) { /* A "rsc-role" is specified. Move it into the converted resource_set as a "role"" attribute. */ crm_xml_add(rsc_set_lh, "role", state_lh); xml_remove_prop(new_xml, XML_COLOC_ATTR_SOURCE_ROLE); } any_sets = TRUE; } if (any_sets) { crm_log_xml_trace(new_xml, "Expanded rsc_ticket..."); *expanded_xml = new_xml; } else { free_xml(new_xml); } return TRUE; } gboolean unpack_rsc_ticket(xmlNode * xml_obj, pe_working_set_t * data_set) { xmlNode *set = NULL; gboolean any_sets = FALSE; const char *id = crm_element_value(xml_obj, XML_ATTR_ID); const char *ticket_str = crm_element_value(xml_obj, XML_TICKET_ATTR_TICKET); const char *loss_policy = crm_element_value(xml_obj, XML_TICKET_ATTR_LOSS_POLICY); ticket_t *ticket = NULL; xmlNode *orig_xml = NULL; xmlNode *expanded_xml = NULL; gboolean rc = TRUE; if (xml_obj == NULL) { crm_config_err("No rsc_ticket constraint object to process."); return FALSE; } if (id == NULL) { crm_config_err("%s constraint must have an id", crm_element_name(xml_obj)); return FALSE; } if (data_set->tickets == NULL) { data_set->tickets = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, destroy_ticket); } if (ticket_str == NULL) { crm_config_err("Invalid constraint '%s': No ticket specified", id); return FALSE; } else { ticket = g_hash_table_lookup(data_set->tickets, ticket_str); } if (ticket == NULL) { ticket = ticket_new(ticket_str, data_set); if (ticket == NULL) { return FALSE; } } rc = unpack_rsc_ticket_tags(xml_obj, &expanded_xml, data_set); if (expanded_xml) { orig_xml = xml_obj; xml_obj = expanded_xml; } else if (rc == FALSE) { return FALSE; } for (set = __xml_first_child(xml_obj); set != NULL; set = __xml_next(set)) { if (crm_str_eq((const char *)set->name, XML_CONS_TAG_RSC_SET, TRUE)) { any_sets = TRUE; set = expand_idref(set, data_set->input); if (unpack_rsc_ticket_set(set, ticket, loss_policy, data_set) == FALSE) { return FALSE; } } } if (expanded_xml) { free_xml(expanded_xml); xml_obj = orig_xml; } if (any_sets == FALSE) { return unpack_simple_rsc_ticket(xml_obj, data_set); } return TRUE; } gboolean is_active(rsc_to_node_t * cons) { return TRUE; } diff --git a/pengine/native.c b/pengine/native.c index 5db8c603c0..0020344550 100644 --- a/pengine/native.c +++ b/pengine/native.c @@ -1,3211 +1,3234 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include /* #define DELETE_THEN_REFRESH 1 // The crmd will remove the resource from the CIB itself, making this redundant */ #define INFINITY_HACK (INFINITY * -100) #define VARIANT_NATIVE 1 #include gboolean update_action(action_t * then); void native_rsc_colocation_rh_must(resource_t * rsc_lh, gboolean update_lh, resource_t * rsc_rh, gboolean update_rh); void native_rsc_colocation_rh_mustnot(resource_t * rsc_lh, gboolean update_lh, resource_t * rsc_rh, gboolean update_rh); void Recurring(resource_t * rsc, action_t * start, node_t * node, pe_working_set_t * data_set); void RecurringOp(resource_t * rsc, action_t * start, node_t * node, xmlNode * operation, pe_working_set_t * data_set); void Recurring_Stopped(resource_t * rsc, action_t * start, node_t * node, pe_working_set_t * data_set); void RecurringOp_Stopped(resource_t * rsc, action_t * start, node_t * node, xmlNode * operation, pe_working_set_t * data_set); void pe_post_notify(resource_t * rsc, node_t * node, action_t * op, notify_data_t * n_data, pe_working_set_t * data_set); gboolean DeleteRsc(resource_t * rsc, node_t * node, gboolean optional, pe_working_set_t * data_set); gboolean StopRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean StartRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean DemoteRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean PromoteRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean RoleError(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean NullOp(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set); /* *INDENT-OFF* */ enum rsc_role_e rsc_state_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = { /* Current State */ /* Next State: Unknown Stopped Started Slave Master */ /* Unknown */ { RSC_ROLE_UNKNOWN, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, }, /* Stopped */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STARTED, RSC_ROLE_SLAVE, RSC_ROLE_SLAVE, }, /* Started */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STARTED, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, }, /* Slave */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, }, /* Master */ { RSC_ROLE_STOPPED, RSC_ROLE_SLAVE, RSC_ROLE_SLAVE, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, }, }; gboolean (*rsc_action_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX])(resource_t*,node_t*,gboolean,pe_working_set_t*) = { /* Current State */ /* Next State: Unknown Stopped Started Slave Master */ /* Unknown */ { RoleError, StopRsc, RoleError, RoleError, RoleError, }, /* Stopped */ { RoleError, NullOp, StartRsc, StartRsc, RoleError, }, /* Started */ { RoleError, StopRsc, NullOp, NullOp, PromoteRsc, }, /* Slave */ { RoleError, StopRsc, StopRsc, NullOp, PromoteRsc, }, /* Master */ { RoleError, DemoteRsc, DemoteRsc, DemoteRsc, NullOp, }, }; /* *INDENT-ON* */ struct capacity_data { node_t *node; resource_t *rsc; gboolean is_enough; }; static action_t * get_first_named_action(resource_t * rsc, const char *action, gboolean only_valid, node_t * current); static void check_capacity(gpointer key, gpointer value, gpointer user_data) { int required = 0; int remaining = 0; struct capacity_data *data = user_data; required = crm_parse_int(value, "0"); remaining = crm_parse_int(g_hash_table_lookup(data->node->details->utilization, key), "0"); if (required > remaining) { CRM_ASSERT(data->rsc); CRM_ASSERT(data->node); pe_rsc_debug(data->rsc, "Node %s has no enough %s for resource %s: required=%d remaining=%d", data->node->details->uname, (char *)key, data->rsc->id, required, remaining); data->is_enough = FALSE; } } static gboolean have_enough_capacity(node_t * node, resource_t * rsc) { struct capacity_data data; data.node = node; data.rsc = rsc; data.is_enough = TRUE; g_hash_table_foreach(rsc->utilization, check_capacity, &data); return data.is_enough; } static gboolean native_choose_node(resource_t * rsc, node_t * prefer, pe_working_set_t * data_set) { /* 1. Sort by weight 2. color.chosen_node = the node (of those with the highest wieght) with the fewest resources 3. remove color.chosen_node from all other colors */ int alloc_details = scores_log_level + 1; GListPtr nodes = NULL; node_t *chosen = NULL; int lpc = 0; int multiple = 0; int length = 0; gboolean result = FALSE; if (safe_str_neq(data_set->placement_strategy, "default")) { GListPtr gIter = NULL; for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; if (have_enough_capacity(node, rsc) == FALSE) { pe_rsc_debug(rsc, "Resource %s cannot be allocated to node %s: none of enough capacity", rsc->id, node->details->uname); resource_location(rsc, node, -INFINITY, "__limit_utilization_", data_set); } } dump_node_scores(alloc_details, rsc, "Post-utilization", rsc->allowed_nodes); } length = g_hash_table_size(rsc->allowed_nodes); if (is_not_set(rsc->flags, pe_rsc_provisional)) { return rsc->allocated_to ? TRUE : FALSE; } if (prefer) { chosen = g_hash_table_lookup(rsc->allowed_nodes, prefer->details->id); if (chosen && chosen->weight >= 0 && can_run_resources(chosen)) { pe_rsc_trace(rsc, "Using preferred node %s for %s instead of choosing from %d candidates", chosen->details->uname, rsc->id, length); } else if (chosen && chosen->weight < 0) { pe_rsc_trace(rsc, "Preferred node %s for %s was unavailable", chosen->details->uname, rsc->id); chosen = NULL; } else if (chosen && can_run_resources(chosen)) { pe_rsc_trace(rsc, "Preferred node %s for %s was unsuitable", chosen->details->uname, rsc->id); chosen = NULL; } else { pe_rsc_trace(rsc, "Preferred node %s for %s was unknown", prefer->details->uname, rsc->id); } } if (chosen == NULL && rsc->allowed_nodes) { nodes = g_hash_table_get_values(rsc->allowed_nodes); nodes = g_list_sort_with_data(nodes, sort_node_weight, g_list_nth_data(rsc->running_on, 0)); chosen = g_list_nth_data(nodes, 0); pe_rsc_trace(rsc, "Chose node %s for %s from %d candidates", chosen ? chosen->details->uname : "", rsc->id, length); if (chosen && chosen->weight > 0 && can_run_resources(chosen)) { node_t *running = g_list_nth_data(rsc->running_on, 0); if (running && can_run_resources(running) == FALSE) { pe_rsc_trace(rsc, "Current node for %s (%s) can't run resources", rsc->id, running->details->uname); running = NULL; } for (lpc = 1; lpc < length && running; lpc++) { node_t *tmp = g_list_nth_data(nodes, lpc); if (tmp->weight == chosen->weight) { multiple++; if (tmp->details == running->details) { /* prefer the existing node if scores are equal */ chosen = tmp; } } } } } if (multiple > 1) { int log_level = LOG_INFO; static char score[33]; score2char_stack(chosen->weight, score, sizeof(score)); if (chosen->weight >= INFINITY) { log_level = LOG_WARNING; } do_crm_log(log_level, "%d nodes with equal score (%s) for" " running %s resources. Chose %s.", multiple, score, rsc->id, chosen->details->uname); } result = native_assign_node(rsc, nodes, chosen, FALSE); g_list_free(nodes); return result; } static int node_list_attr_score(GHashTable * list, const char *attr, const char *value) { GHashTableIter iter; node_t *node = NULL; int best_score = -INFINITY; const char *best_node = NULL; if (attr == NULL) { attr = "#" XML_ATTR_UNAME; } g_hash_table_iter_init(&iter, list); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { int weight = node->weight; if (can_run_resources(node) == FALSE) { weight = -INFINITY; } if (weight > best_score || best_node == NULL) { const char *tmp = g_hash_table_lookup(node->details->attrs, attr); if (safe_str_eq(value, tmp)) { best_score = weight; best_node = node->details->uname; } } } if (safe_str_neq(attr, "#" XML_ATTR_UNAME)) { crm_info("Best score for %s=%s was %s with %d", attr, value, best_node ? best_node : "", best_score); } return best_score; } static void node_hash_update(GHashTable * list1, GHashTable * list2, const char *attr, float factor, gboolean only_positive) { int score = 0; int new_score = 0; GHashTableIter iter; node_t *node = NULL; if (attr == NULL) { attr = "#" XML_ATTR_UNAME; } g_hash_table_iter_init(&iter, list1); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { CRM_LOG_ASSERT(node != NULL); if(node == NULL) { continue; }; score = node_list_attr_score(list2, attr, g_hash_table_lookup(node->details->attrs, attr)); new_score = merge_weights(factor * score, node->weight); if (factor < 0 && score < 0) { /* Negative preference for a node with a negative score * should not become a positive preference * * TODO - Decide if we want to filter only if weight == -INFINITY * */ crm_trace("%s: Filtering %d + %f*%d (factor * score)", node->details->uname, node->weight, factor, score); } else if (node->weight == INFINITY_HACK) { crm_trace("%s: Filtering %d + %f*%d (node < 0)", node->details->uname, node->weight, factor, score); } else if (only_positive && new_score < 0 && node->weight > 0) { node->weight = INFINITY_HACK; crm_trace("%s: Filtering %d + %f*%d (score > 0)", node->details->uname, node->weight, factor, score); } else if (only_positive && new_score < 0 && node->weight == 0) { crm_trace("%s: Filtering %d + %f*%d (score == 0)", node->details->uname, node->weight, factor, score); } else { crm_trace("%s: %d + %f*%d", node->details->uname, node->weight, factor, score); node->weight = new_score; } } } GHashTable * node_hash_dup(GHashTable * hash) { /* Hack! */ GListPtr list = g_hash_table_get_values(hash); GHashTable *result = node_hash_from_list(list); g_list_free(list); return result; } GHashTable * native_merge_weights(resource_t * rsc, const char *rhs, GHashTable * nodes, const char *attr, float factor, enum pe_weights flags) { return rsc_merge_weights(rsc, rhs, nodes, attr, factor, flags); } GHashTable * rsc_merge_weights(resource_t * rsc, const char *rhs, GHashTable * nodes, const char *attr, float factor, enum pe_weights flags) { GHashTable *work = NULL; int multiplier = 1; if (factor < 0) { multiplier = -1; } if (is_set(rsc->flags, pe_rsc_merging)) { pe_rsc_info(rsc, "%s: Breaking dependency loop at %s", rhs, rsc->id); return nodes; } set_bit(rsc->flags, pe_rsc_merging); if (is_set(flags, pe_weights_init)) { if (rsc->variant == pe_group && rsc->children) { GListPtr last = rsc->children; while (last->next != NULL) { last = last->next; } pe_rsc_trace(rsc, "Merging %s as a group %p %p", rsc->id, rsc->children, last); work = rsc_merge_weights(last->data, rhs, NULL, attr, factor, flags); } else { work = node_hash_dup(rsc->allowed_nodes); } clear_bit(flags, pe_weights_init); } else if (rsc->variant == pe_group && rsc->children) { GListPtr iter = rsc->children; pe_rsc_trace(rsc, "%s: Combining scores from %d children of %s", rhs, g_list_length(iter), rsc->id); work = node_hash_dup(nodes); for(iter = rsc->children; iter->next != NULL; iter = iter->next) { work = rsc_merge_weights(iter->data, rhs, work, attr, factor, flags); } } else { pe_rsc_trace(rsc, "%s: Combining scores from %s", rhs, rsc->id); work = node_hash_dup(nodes); node_hash_update(work, rsc->allowed_nodes, attr, factor, is_set(flags, pe_weights_positive)); } if (is_set(flags, pe_weights_rollback) && can_run_any(work) == FALSE) { pe_rsc_info(rsc, "%s: Rolling back scores from %s", rhs, rsc->id); g_hash_table_destroy(work); clear_bit(rsc->flags, pe_rsc_merging); return nodes; } if (can_run_any(work)) { GListPtr gIter = NULL; if (is_set(flags, pe_weights_forward)) { gIter = rsc->rsc_cons; crm_trace("Checking %d additional colocation constraints", g_list_length(gIter)); } else if(rsc->variant == pe_group && rsc->children) { GListPtr last = rsc->children; while (last->next != NULL) { last = last->next; } gIter = ((resource_t*)last->data)->rsc_cons_lhs; crm_trace("Checking %d additional optional group colocation constraints from %s", g_list_length(gIter), ((resource_t*)last->data)->id); } else { gIter = rsc->rsc_cons_lhs; crm_trace("Checking %d additional optional colocation constraints %s", g_list_length(gIter), rsc->id); } for (; gIter != NULL; gIter = gIter->next) { resource_t *other = NULL; rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; if (is_set(flags, pe_weights_forward)) { other = constraint->rsc_rh; } else { other = constraint->rsc_lh; } pe_rsc_trace(rsc, "Applying %s (%s)", constraint->id, other->id); work = rsc_merge_weights(other, rhs, work, constraint->node_attribute, multiplier * (float)constraint->score / INFINITY, flags|pe_weights_rollback); dump_node_scores(LOG_TRACE, NULL, rhs, work); } } if (is_set(flags, pe_weights_positive)) { node_t *node = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, work); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if (node->weight == INFINITY_HACK) { node->weight = 1; } } } if (nodes) { g_hash_table_destroy(nodes); } clear_bit(rsc->flags, pe_rsc_merging); return work; } node_t * native_color(resource_t * rsc, node_t * prefer, pe_working_set_t * data_set) { GListPtr gIter = NULL; int alloc_details = scores_log_level + 1; if (rsc->parent && is_not_set(rsc->parent->flags, pe_rsc_allocating)) { /* never allocate children on their own */ pe_rsc_debug(rsc, "Escalating allocation of %s to its parent: %s", rsc->id, rsc->parent->id); rsc->parent->cmds->allocate(rsc->parent, prefer, data_set); } if (is_not_set(rsc->flags, pe_rsc_provisional)) { return rsc->allocated_to; } if (is_set(rsc->flags, pe_rsc_allocating)) { pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id); return NULL; } set_bit(rsc->flags, pe_rsc_allocating); print_resource(alloc_details, "Allocating: ", rsc, FALSE); dump_node_scores(alloc_details, rsc, "Pre-allloc", rsc->allowed_nodes); for (gIter = rsc->rsc_cons; gIter != NULL; gIter = gIter->next) { rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; GHashTable *archive = NULL; resource_t *rsc_rh = constraint->rsc_rh; pe_rsc_trace(rsc, "%s: Pre-Processing %s (%s, %d, %s)", rsc->id, constraint->id, rsc_rh->id, constraint->score, role2text(constraint->role_lh)); if (constraint->role_lh >= RSC_ROLE_MASTER || (constraint->score < 0 && constraint->score > -INFINITY)) { archive = node_hash_dup(rsc->allowed_nodes); } rsc_rh->cmds->allocate(rsc_rh, NULL, data_set); rsc->cmds->rsc_colocation_lh(rsc, rsc_rh, constraint); if (archive && can_run_any(rsc->allowed_nodes) == FALSE) { pe_rsc_info(rsc, "%s: Rolling back scores from %s", rsc->id, rsc_rh->id); g_hash_table_destroy(rsc->allowed_nodes); rsc->allowed_nodes = archive; archive = NULL; } if (archive) { g_hash_table_destroy(archive); } } dump_node_scores(alloc_details, rsc, "Post-coloc", rsc->allowed_nodes); for (gIter = rsc->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) { rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; rsc->allowed_nodes = constraint->rsc_lh->cmds->merge_weights(constraint->rsc_lh, rsc->id, rsc->allowed_nodes, constraint->node_attribute, (float)constraint->score / INFINITY, pe_weights_rollback); } print_resource(LOG_DEBUG_2, "Allocating: ", rsc, FALSE); if (rsc->next_role == RSC_ROLE_STOPPED) { pe_rsc_trace(rsc, "Making sure %s doesn't get allocated", rsc->id); /* make sure it doesnt come up again */ resource_location(rsc, NULL, -INFINITY, XML_RSC_ATTR_TARGET_ROLE, data_set); } else if(rsc->next_role > rsc->role && is_set(data_set->flags, pe_flag_have_quorum) == FALSE && data_set->no_quorum_policy == no_quorum_freeze) { crm_notice("Resource %s cannot be elevated from %s to %s: no-quorum-policy=freeze", rsc->id, role2text(rsc->role), role2text(rsc->next_role)); rsc->next_role = rsc->role; } dump_node_scores(show_scores ? 0 : scores_log_level, rsc, __FUNCTION__, rsc->allowed_nodes); if (is_set(data_set->flags, pe_flag_stonith_enabled) && is_set(data_set->flags, pe_flag_have_stonith_resource) == FALSE) { clear_bit(rsc->flags, pe_rsc_managed); } if (is_not_set(rsc->flags, pe_rsc_managed)) { const char *reason = NULL; node_t *assign_to = NULL; rsc->next_role = rsc->role; if (rsc->running_on == NULL) { reason = "inactive"; } else if (rsc->role == RSC_ROLE_MASTER) { assign_to = rsc->running_on->data; reason = "master"; } else if (is_set(rsc->flags, pe_rsc_failed)) { assign_to = rsc->running_on->data; reason = "failed"; } else { assign_to = rsc->running_on->data; reason = "active"; } pe_rsc_info(rsc, "Unmanaged resource %s allocated to %s: %s", rsc->id, assign_to ? assign_to->details->uname : "'nowhere'", reason); native_assign_node(rsc, NULL, assign_to, TRUE); } else if (is_set(data_set->flags, pe_flag_stop_everything)) { pe_rsc_debug(rsc, "Forcing %s to stop", rsc->id); native_assign_node(rsc, NULL, NULL, TRUE); } else if (is_set(rsc->flags, pe_rsc_provisional) && native_choose_node(rsc, prefer, data_set)) { pe_rsc_trace(rsc, "Allocated resource %s to %s", rsc->id, rsc->allocated_to->details->uname); } else if (rsc->allocated_to == NULL) { if (is_not_set(rsc->flags, pe_rsc_orphan)) { pe_rsc_info(rsc, "Resource %s cannot run anywhere", rsc->id); } else if (rsc->running_on != NULL) { pe_rsc_info(rsc, "Stopping orphan resource %s", rsc->id); } } else { pe_rsc_debug(rsc, "Pre-Allocated resource %s to %s", rsc->id, rsc->allocated_to->details->uname); } clear_bit(rsc->flags, pe_rsc_allocating); print_resource(LOG_DEBUG_3, "Allocated ", rsc, TRUE); if (rsc->is_remote_node) { node_t *remote_node = pe_find_node(data_set->nodes, rsc->id); CRM_ASSERT(remote_node != NULL); if (rsc->allocated_to && rsc->next_role != RSC_ROLE_STOPPED) { crm_trace("Setting remote node %s to ONLINE", remote_node->details->id); remote_node->details->online = TRUE; /* We shouldn't consider an unseen remote-node unclean if we are going * to try and connect to it. Otherwise we get an unnecessary fence */ if (remote_node->details->unseen == TRUE) { remote_node->details->unclean = FALSE; } } else { crm_trace("Setting remote node %s to SHUTDOWN. next role = %s, allocated=%s", remote_node->details->id, role2text(rsc->next_role), rsc->allocated_to ? "true" : "false"); remote_node->details->shutdown = TRUE; } } return rsc->allocated_to; } static gboolean is_op_dup(resource_t * rsc, const char *name, const char *interval) { gboolean dup = FALSE; const char *id = NULL; const char *value = NULL; xmlNode *operation = NULL; CRM_ASSERT(rsc); for (operation = __xml_first_child(rsc->ops_xml); operation != NULL; operation = __xml_next(operation)) { if (crm_str_eq((const char *)operation->name, "op", TRUE)) { value = crm_element_value(operation, "name"); if (safe_str_neq(value, name)) { continue; } value = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); if (value == NULL) { value = "0"; } if (safe_str_neq(value, interval)) { continue; } if (id == NULL) { id = ID(operation); } else { crm_config_err("Operation %s is a duplicate of %s", ID(operation), id); crm_config_err ("Do not use the same (name, interval) combination more than once per resource"); dup = TRUE; } } } return dup; } void RecurringOp(resource_t * rsc, action_t * start, node_t * node, xmlNode * operation, pe_working_set_t * data_set) { char *key = NULL; const char *name = NULL; const char *value = NULL; const char *interval = NULL; const char *node_uname = NULL; unsigned long long interval_ms = 0; action_t *mon = NULL; gboolean is_optional = TRUE; GListPtr possible_matches = NULL; /* Only process for the operations without role="Stopped" */ value = crm_element_value(operation, "role"); if (value && text2role(value) == RSC_ROLE_STOPPED) { return; } CRM_ASSERT(rsc); pe_rsc_trace(rsc, "Creating recurring action %s for %s in role %s on %s", ID(operation), rsc->id, role2text(rsc->next_role), node ? node->details->uname : "n/a"); if (node != NULL) { node_uname = node->details->uname; } interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); interval_ms = crm_get_interval(interval); if (interval_ms == 0) { return; } name = crm_element_value(operation, "name"); if (is_op_dup(rsc, name, interval)) { return; } if (safe_str_eq(name, RSC_STOP) || safe_str_eq(name, RSC_START) || safe_str_eq(name, RSC_DEMOTE) || safe_str_eq(name, RSC_PROMOTE) ) { crm_config_err("Invalid recurring action %s wth name: '%s'", ID(operation), name); return; } key = generate_op_key(rsc->id, name, interval_ms); if (find_rsc_op_entry(rsc, key) == NULL) { /* disabled */ free(key); return; } if (start != NULL) { pe_rsc_trace(rsc, "Marking %s %s due to %s", key, is_set(start->flags, pe_action_optional) ? "optional" : "manditory", start->uuid); is_optional = (rsc->cmds->action_flags(start, NULL) & pe_action_optional); } else { pe_rsc_trace(rsc, "Marking %s optional", key); is_optional = TRUE; } /* start a monitor for an already active resource */ possible_matches = find_actions_exact(rsc->actions, key, node); if (possible_matches == NULL) { is_optional = FALSE; pe_rsc_trace(rsc, "Marking %s manditory: not active", key); } else { g_list_free(possible_matches); } if ((rsc->next_role == RSC_ROLE_MASTER && value == NULL) || (value != NULL && text2role(value) != rsc->next_role)) { int log_level = LOG_DEBUG_2; const char *result = "Ignoring"; if (is_optional) { char *local_key = strdup(key); log_level = LOG_INFO; result = "Cancelling"; /* its running : cancel it */ mon = custom_action(rsc, local_key, RSC_CANCEL, node, FALSE, TRUE, data_set); free(mon->task); free(mon->cancel_task); mon->task = strdup(RSC_CANCEL); mon->cancel_task = strdup(name); add_hash_param(mon->meta, XML_LRM_ATTR_INTERVAL, interval); add_hash_param(mon->meta, XML_LRM_ATTR_TASK, name); local_key = NULL; switch (rsc->role) { case RSC_ROLE_SLAVE: case RSC_ROLE_STARTED: if (rsc->next_role == RSC_ROLE_MASTER) { local_key = promote_key(rsc); } else if (rsc->next_role == RSC_ROLE_STOPPED) { local_key = stop_key(rsc); } break; case RSC_ROLE_MASTER: local_key = demote_key(rsc); break; default: break; } if (local_key) { custom_action_order(rsc, NULL, mon, rsc, local_key, NULL, pe_order_runnable_left, data_set); } mon = NULL; } do_crm_log(log_level, "%s action %s (%s vs. %s)", result, key, value ? value : role2text(RSC_ROLE_SLAVE), role2text(rsc->next_role)); free(key); key = NULL; return; } mon = custom_action(rsc, key, name, node, is_optional, TRUE, data_set); key = mon->uuid; if (is_optional) { pe_rsc_trace(rsc, "%s\t %s (optional)", crm_str(node_uname), mon->uuid); } if (start == NULL || is_set(start->flags, pe_action_runnable) == FALSE) { pe_rsc_debug(rsc, "%s\t %s (cancelled : start un-runnable)", crm_str(node_uname), mon->uuid); update_action_flags(mon, pe_action_runnable | pe_action_clear); } else if (node == NULL || node->details->online == FALSE || node->details->unclean) { pe_rsc_debug(rsc, "%s\t %s (cancelled : no node available)", crm_str(node_uname), mon->uuid); update_action_flags(mon, pe_action_runnable | pe_action_clear); } else if (is_set(mon->flags, pe_action_optional) == FALSE) { pe_rsc_info(rsc, " Start recurring %s (%llus) for %s on %s", mon->task, interval_ms / 1000, rsc->id, crm_str(node_uname)); } if (rsc->next_role == RSC_ROLE_MASTER) { char *running_master = crm_itoa(PCMK_OCF_RUNNING_MASTER); add_hash_param(mon->meta, XML_ATTR_TE_TARGET_RC, running_master); free(running_master); } if (node == NULL || is_set(rsc->flags, pe_rsc_managed)) { custom_action_order(rsc, start_key(rsc), NULL, NULL, strdup(key), mon, pe_order_implies_then | pe_order_runnable_left, data_set); if (rsc->next_role == RSC_ROLE_MASTER) { custom_action_order(rsc, promote_key(rsc), NULL, rsc, NULL, mon, pe_order_optional | pe_order_runnable_left, data_set); } else if (rsc->role == RSC_ROLE_MASTER) { custom_action_order(rsc, demote_key(rsc), NULL, rsc, NULL, mon, pe_order_optional | pe_order_runnable_left, data_set); } } } void Recurring(resource_t * rsc, action_t * start, node_t * node, pe_working_set_t * data_set) { if (is_not_set(rsc->flags, pe_rsc_maintenance) && (node == NULL || node->details->maintenance == FALSE)) { xmlNode *operation = NULL; for (operation = __xml_first_child(rsc->ops_xml); operation != NULL; operation = __xml_next(operation)) { if (crm_str_eq((const char *)operation->name, "op", TRUE)) { RecurringOp(rsc, start, node, operation, data_set); } } } } void RecurringOp_Stopped(resource_t * rsc, action_t * start, node_t * node, xmlNode * operation, pe_working_set_t * data_set) { char *key = NULL; const char *name = NULL; const char *role = NULL; const char *interval = NULL; const char *node_uname = NULL; unsigned long long interval_ms = 0; GListPtr possible_matches = NULL; GListPtr gIter = NULL; /* TODO: Support of non-unique clone */ if (is_set(rsc->flags, pe_rsc_unique) == FALSE) { return; } /* Only process for the operations with role="Stopped" */ role = crm_element_value(operation, "role"); if (role == NULL || text2role(role) != RSC_ROLE_STOPPED) { return; } pe_rsc_trace(rsc, "Creating recurring actions %s for %s in role %s on nodes where it'll not be running", ID(operation), rsc->id, role2text(rsc->next_role)); if (node != NULL) { node_uname = node->details->uname; } interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); interval_ms = crm_get_interval(interval); if (interval_ms == 0) { return; } name = crm_element_value(operation, "name"); if (is_op_dup(rsc, name, interval)) { return; } if (safe_str_eq(name, RSC_STOP) || safe_str_eq(name, RSC_START) || safe_str_eq(name, RSC_DEMOTE) || safe_str_eq(name, RSC_PROMOTE) ) { crm_config_err("Invalid recurring action %s wth name: '%s'", ID(operation), name); return; } key = generate_op_key(rsc->id, name, interval_ms); if (find_rsc_op_entry(rsc, key) == NULL) { /* disabled */ free(key); return; } /* if the monitor exists on the node where the resource will be running, cancel it */ if (node != NULL) { possible_matches = find_actions_exact(rsc->actions, key, node); if (possible_matches) { action_t *cancel_op = NULL; char *local_key = strdup(key); g_list_free(possible_matches); cancel_op = custom_action(rsc, local_key, RSC_CANCEL, node, FALSE, TRUE, data_set); free(cancel_op->task); free(cancel_op->cancel_task); cancel_op->task = strdup(RSC_CANCEL); cancel_op->cancel_task = strdup(name); add_hash_param(cancel_op->meta, XML_LRM_ATTR_INTERVAL, interval); add_hash_param(cancel_op->meta, XML_LRM_ATTR_TASK, name); local_key = NULL; if (rsc->next_role == RSC_ROLE_STARTED || rsc->next_role == RSC_ROLE_SLAVE) { /* rsc->role == RSC_ROLE_STOPPED: cancel the monitor before start */ /* rsc->role == RSC_ROLE_STARTED: for a migration, cancel the monitor on the target node before start */ custom_action_order(rsc, NULL, cancel_op, rsc, start_key(rsc), NULL, pe_order_runnable_left, data_set); } pe_rsc_info(rsc, "Cancel action %s (%s vs. %s) on %s", key, role, role2text(rsc->next_role), crm_str(node_uname)); } } for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { node_t *stop_node = (node_t *) gIter->data; const char *stop_node_uname = stop_node->details->uname; gboolean is_optional = TRUE; gboolean probe_is_optional = TRUE; gboolean stop_is_optional = TRUE; action_t *stopped_mon = NULL; char *rc_inactive = NULL; GListPtr probe_complete_ops = NULL; GListPtr stop_ops = NULL; GListPtr local_gIter = NULL; char *stop_op_key = NULL; if (node_uname && safe_str_eq(stop_node_uname, node_uname)) { continue; } pe_rsc_trace(rsc, "Creating recurring action %s for %s on %s", ID(operation), rsc->id, crm_str(stop_node_uname)); /* start a monitor for an already stopped resource */ possible_matches = find_actions_exact(rsc->actions, key, stop_node); if (possible_matches == NULL) { pe_rsc_trace(rsc, "Marking %s manditory on %s: not active", key, crm_str(stop_node_uname)); is_optional = FALSE; } else { pe_rsc_trace(rsc, "Marking %s optional on %s: already active", key, crm_str(stop_node_uname)); is_optional = TRUE; g_list_free(possible_matches); } stopped_mon = custom_action(rsc, strdup(key), name, stop_node, is_optional, TRUE, data_set); rc_inactive = crm_itoa(PCMK_OCF_NOT_RUNNING); add_hash_param(stopped_mon->meta, XML_ATTR_TE_TARGET_RC, rc_inactive); free(rc_inactive); probe_complete_ops = find_actions(data_set->actions, CRM_OP_PROBED, NULL); for (local_gIter = probe_complete_ops; local_gIter != NULL; local_gIter = local_gIter->next) { action_t *probe_complete = (action_t *) local_gIter->data; if (probe_complete->node == NULL) { if (is_set(probe_complete->flags, pe_action_optional) == FALSE) { probe_is_optional = FALSE; } if (is_set(probe_complete->flags, pe_action_runnable) == FALSE) { crm_debug("%s\t %s (cancelled : probe un-runnable)", crm_str(stop_node_uname), stopped_mon->uuid); update_action_flags(stopped_mon, pe_action_runnable | pe_action_clear); } if (is_set(rsc->flags, pe_rsc_managed)) { custom_action_order(NULL, NULL, probe_complete, NULL, strdup(key), stopped_mon, pe_order_optional, data_set); } break; } } if (probe_complete_ops) { g_list_free(probe_complete_ops); } stop_op_key = stop_key(rsc); stop_ops = find_actions_exact(rsc->actions, stop_op_key, stop_node); for (local_gIter = stop_ops; local_gIter != NULL; local_gIter = local_gIter->next) { action_t *stop = (action_t *) local_gIter->data; if (is_set(stop->flags, pe_action_optional) == FALSE) { stop_is_optional = FALSE; } if (is_set(stop->flags, pe_action_runnable) == FALSE) { crm_debug("%s\t %s (cancelled : stop un-runnable)", crm_str(stop_node_uname), stopped_mon->uuid); update_action_flags(stopped_mon, pe_action_runnable | pe_action_clear); } if (is_set(rsc->flags, pe_rsc_managed)) { custom_action_order(rsc, strdup(stop_op_key), stop, NULL, strdup(key), stopped_mon, pe_order_implies_then | pe_order_runnable_left, data_set); } } if (stop_ops) { g_list_free(stop_ops); } free(stop_op_key); if (is_optional == FALSE && probe_is_optional && stop_is_optional && is_set(rsc->flags, pe_rsc_managed) == FALSE) { pe_rsc_trace(rsc, "Marking %s optional on %s due to unmanaged", key, crm_str(stop_node_uname)); update_action_flags(stopped_mon, pe_action_optional); } if (is_set(stopped_mon->flags, pe_action_optional)) { pe_rsc_trace(rsc, "%s\t %s (optional)", crm_str(stop_node_uname), stopped_mon->uuid); } if (stop_node->details->online == FALSE || stop_node->details->unclean) { pe_rsc_debug(rsc, "%s\t %s (cancelled : no node available)", crm_str(stop_node_uname), stopped_mon->uuid); update_action_flags(stopped_mon, pe_action_runnable | pe_action_clear); } if (is_set(stopped_mon->flags, pe_action_runnable) && is_set(stopped_mon->flags, pe_action_optional) == FALSE) { crm_notice(" Start recurring %s (%llus) for %s on %s", stopped_mon->task, interval_ms / 1000, rsc->id, crm_str(stop_node_uname)); } } free(key); } void Recurring_Stopped(resource_t * rsc, action_t * start, node_t * node, pe_working_set_t * data_set) { if (is_not_set(rsc->flags, pe_rsc_maintenance) && (node == NULL || node->details->maintenance == FALSE)) { xmlNode *operation = NULL; for (operation = __xml_first_child(rsc->ops_xml); operation != NULL; operation = __xml_next(operation)) { if (crm_str_eq((const char *)operation->name, "op", TRUE)) { RecurringOp_Stopped(rsc, start, node, operation, data_set); } } } } static void handle_migration_actions(resource_t * rsc, node_t *current, node_t *chosen, pe_working_set_t * data_set) { action_t *migrate_to = NULL; action_t *migrate_from = NULL; action_t *start = NULL; action_t *stop = NULL; gboolean partial = rsc->partial_migration_target ? TRUE : FALSE; pe_rsc_trace(rsc, "Processing migration actions %s moving from %s to %s . partial migration = %s", rsc->id, current->details->id, chosen->details->id, partial ? "TRUE" : "FALSE"); start = start_action(rsc, chosen, TRUE); stop = stop_action(rsc, current, TRUE); if (partial == FALSE) { migrate_to = custom_action(rsc, generate_op_key(rsc->id, RSC_MIGRATE, 0), RSC_MIGRATE, current, TRUE, TRUE, data_set); } migrate_from = custom_action(rsc, generate_op_key(rsc->id, RSC_MIGRATED, 0), RSC_MIGRATED, chosen, TRUE, TRUE, data_set); if ((migrate_to && migrate_from) || (migrate_from && partial)) { set_bit(start->flags, pe_action_migrate_runnable); set_bit(stop->flags, pe_action_migrate_runnable); update_action_flags(start, pe_action_pseudo); /* easier than trying to delete it from the graph */ /* order probes before migrations */ if (partial) { set_bit(migrate_from->flags, pe_action_migrate_runnable); migrate_from->needs = start->needs; custom_action_order(rsc, generate_op_key(rsc->id, RSC_STATUS, 0), NULL, rsc, generate_op_key(rsc->id, RSC_MIGRATED, 0), NULL, pe_order_optional, data_set); } else { set_bit(migrate_from->flags, pe_action_migrate_runnable); set_bit(migrate_to->flags, pe_action_migrate_runnable); migrate_to->needs = start->needs; custom_action_order(rsc, generate_op_key(rsc->id, RSC_STATUS, 0), NULL, rsc, generate_op_key(rsc->id, RSC_MIGRATE, 0), NULL, pe_order_optional, data_set); custom_action_order(rsc, generate_op_key(rsc->id, RSC_MIGRATE, 0), NULL, rsc, generate_op_key(rsc->id, RSC_MIGRATED, 0), NULL, pe_order_optional | pe_order_implies_first_migratable, data_set); } custom_action_order(rsc, generate_op_key(rsc->id, RSC_MIGRATED, 0), NULL, rsc, generate_op_key(rsc->id, RSC_STOP, 0), NULL, pe_order_optional | pe_order_implies_first_migratable, data_set); custom_action_order(rsc, generate_op_key(rsc->id, RSC_MIGRATED, 0), NULL, rsc, generate_op_key(rsc->id, RSC_START, 0), NULL, pe_order_optional | pe_order_implies_first_migratable | pe_order_pseudo_left, data_set); } if (migrate_to) { add_hash_param(migrate_to->meta, XML_LRM_ATTR_MIGRATE_SOURCE, current->details->uname); add_hash_param(migrate_to->meta, XML_LRM_ATTR_MIGRATE_TARGET, chosen->details->uname); /* migrate_to takes place on the source node, but can * have an effect on the target node depending on how * the agent is written. Because of this, we have to maintain * a record that the migrate_to occurred incase the source node * loses membership while the migrate_to action is still in-flight. */ add_hash_param(migrate_to->meta, XML_OP_ATTR_PENDING, "true"); } if (migrate_from) { add_hash_param(migrate_from->meta, XML_LRM_ATTR_MIGRATE_SOURCE, current->details->uname); add_hash_param(migrate_from->meta, XML_LRM_ATTR_MIGRATE_TARGET, chosen->details->uname); } } void native_create_actions(resource_t * rsc, pe_working_set_t * data_set) { action_t *start = NULL; node_t *chosen = NULL; node_t *current = NULL; gboolean need_stop = FALSE; gboolean is_moving = FALSE; gboolean allow_migrate = is_set(rsc->flags, pe_rsc_allow_migrate) ? TRUE : FALSE; GListPtr gIter = NULL; int num_active_nodes = 0; enum rsc_role_e role = RSC_ROLE_UNKNOWN; enum rsc_role_e next_role = RSC_ROLE_UNKNOWN; CRM_ASSERT(rsc); chosen = rsc->allocated_to; if (chosen != NULL && rsc->next_role == RSC_ROLE_UNKNOWN) { rsc->next_role = RSC_ROLE_STARTED; pe_rsc_trace(rsc, "Fixed next_role: unknown -> %s", role2text(rsc->next_role)); } else if (rsc->next_role == RSC_ROLE_UNKNOWN) { rsc->next_role = RSC_ROLE_STOPPED; pe_rsc_trace(rsc, "Fixed next_role: unknown -> %s", role2text(rsc->next_role)); } pe_rsc_trace(rsc, "Processing state transition for %s %p: %s->%s", rsc->id, rsc, role2text(rsc->role), role2text(rsc->next_role)); if (rsc->running_on) { current = rsc->running_on->data; } for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { node_t *n = (node_t *) gIter->data; if (rsc->partial_migration_source && (n->details == rsc->partial_migration_source->details)) { current = rsc->partial_migration_source; } num_active_nodes++; } get_rsc_attributes(rsc->parameters, rsc, chosen, data_set); for (gIter = rsc->dangling_migrations; gIter != NULL; gIter = gIter->next) { node_t *current = (node_t *) gIter->data; action_t *stop = stop_action(rsc, current, FALSE); set_bit(stop->flags, pe_action_dangle); pe_rsc_trace(rsc, "Forcing a cleanup of %s on %s", rsc->id, current->details->uname); if (is_set(data_set->flags, pe_flag_remove_after_stop)) { DeleteRsc(rsc, current, FALSE, data_set); } } if (num_active_nodes > 1) { if (num_active_nodes == 2 && chosen && rsc->partial_migration_target && rsc->partial_migration_source && (current->details == rsc->partial_migration_source->details) && (chosen->details == rsc->partial_migration_target->details)) { /* Here the chosen node is still the migration target from a partial * migration. Attempt to continue the migration instead of recovering * by stopping the resource everywhere and starting it on a single node. */ pe_rsc_trace(rsc, "Will attempt to continue with a partial migration to target %s from %s", rsc->partial_migration_target->details->id, rsc->partial_migration_source->details->id); } else { const char *type = crm_element_value(rsc->xml, XML_ATTR_TYPE); const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); if(rsc->partial_migration_target && rsc->partial_migration_source) { crm_notice("Resource %s can no longer migrate to %s. Stopping on %s too", rsc->id, rsc->partial_migration_target->details->uname, rsc->partial_migration_source->details->uname); } else { pe_proc_err("Resource %s (%s::%s) is active on %d nodes %s", rsc->id, class, type, num_active_nodes, recovery2text(rsc->recovery_type)); crm_warn("See %s for more information.", "http://clusterlabs.org/wiki/FAQ#Resource_is_Too_Active"); } if (rsc->recovery_type == recovery_stop_start) { need_stop = TRUE; } /* If by chance a partial migration is in process, * but the migration target is not chosen still, clear all * partial migration data. */ rsc->partial_migration_source = rsc->partial_migration_target = NULL; allow_migrate = FALSE; } } if (is_set(rsc->flags, pe_rsc_start_pending)) { start = start_action(rsc, chosen, TRUE); set_bit(start->flags, pe_action_print_always); } if (current && chosen && current->details != chosen->details) { pe_rsc_trace(rsc, "Moving %s", rsc->id); is_moving = TRUE; need_stop = TRUE; } else if (is_set(rsc->flags, pe_rsc_failed)) { pe_rsc_trace(rsc, "Recovering %s", rsc->id); need_stop = TRUE; } else if (is_set(rsc->flags, pe_rsc_block)) { pe_rsc_trace(rsc, "Block %s", rsc->id); need_stop = TRUE; } else if (rsc->role > RSC_ROLE_STARTED && current != NULL && chosen != NULL) { /* Recovery of a promoted resource */ start = start_action(rsc, chosen, TRUE); if (is_set(start->flags, pe_action_optional) == FALSE) { pe_rsc_trace(rsc, "Forced start %s", rsc->id); need_stop = TRUE; } } pe_rsc_trace(rsc, "Creating actions for %s: %s->%s", rsc->id, role2text(rsc->role), role2text(rsc->next_role)); role = rsc->role; /* Potentiall optional steps on brining the resource down and back up to the same level */ while (role != RSC_ROLE_STOPPED) { next_role = rsc_state_matrix[role][RSC_ROLE_STOPPED]; pe_rsc_trace(rsc, "Down: Executing: %s->%s (%s)%s", role2text(role), role2text(next_role), rsc->id, need_stop ? " required" : ""); if (rsc_action_matrix[role][next_role] (rsc, current, !need_stop, data_set) == FALSE) { break; } role = next_role; } while (rsc->role <= rsc->next_role && role != rsc->role && is_not_set(rsc->flags, pe_rsc_block)) { next_role = rsc_state_matrix[role][rsc->role]; pe_rsc_trace(rsc, "Up: Executing: %s->%s (%s)%s", role2text(role), role2text(next_role), rsc->id, need_stop ? " required" : ""); if (rsc_action_matrix[role][next_role] (rsc, chosen, !need_stop, data_set) == FALSE) { break; } role = next_role; } role = rsc->role; /* Required steps from this role to the next */ while (role != rsc->next_role) { next_role = rsc_state_matrix[role][rsc->next_role]; pe_rsc_trace(rsc, "Role: Executing: %s->%s = (%s)", role2text(role), role2text(rsc->next_role), role2text(next_role), rsc->id); if (rsc_action_matrix[role][next_role] (rsc, chosen, FALSE, data_set) == FALSE) { break; } role = next_role; } if(is_set(rsc->flags, pe_rsc_block)) { pe_rsc_trace(rsc, "No monitor additional ops for blocked resource"); } else if (rsc->next_role != RSC_ROLE_STOPPED || is_set(rsc->flags, pe_rsc_managed) == FALSE) { pe_rsc_trace(rsc, "Monitor ops for active resource"); start = start_action(rsc, chosen, TRUE); Recurring(rsc, start, chosen, data_set); Recurring_Stopped(rsc, start, chosen, data_set); } else { pe_rsc_trace(rsc, "Monitor ops for in-active resource"); Recurring_Stopped(rsc, NULL, NULL, data_set); } /* if we are stuck in a partial migration, where the target * of the partial migration no longer matches the chosen target. * A full stop/start is required */ if (rsc->partial_migration_target && (chosen == NULL || rsc->partial_migration_target->details != chosen->details)) { pe_rsc_trace(rsc, "Not allowing partial migration to continue. %s", rsc->id); allow_migrate = FALSE; } else if (is_moving == FALSE || is_not_set(rsc->flags, pe_rsc_managed) || is_set(rsc->flags, pe_rsc_failed) || is_set(rsc->flags, pe_rsc_start_pending) || (current->details->unclean == TRUE) || rsc->next_role < RSC_ROLE_STARTED) { allow_migrate = FALSE; } if (allow_migrate) { handle_migration_actions(rsc, current, chosen, data_set); } } static void rsc_avoids_remote_nodes(resource_t *rsc) { GHashTableIter iter; node_t *node = NULL; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if (node->details->remote_rsc) { node->weight = -INFINITY; } } } void native_internal_constraints(resource_t * rsc, pe_working_set_t * data_set) { /* This function is on the critical path and worth optimizing as much as possible */ resource_t *top = uber_parent(rsc); int type = pe_order_optional | pe_order_implies_then | pe_order_restart; gboolean is_stonith = is_set(rsc->flags, pe_rsc_fence_device); custom_action_order(rsc, generate_op_key(rsc->id, RSC_STOP, 0), NULL, rsc, generate_op_key(rsc->id, RSC_START, 0), NULL, type, data_set); if (top->variant == pe_master || rsc->role > RSC_ROLE_SLAVE) { custom_action_order(rsc, generate_op_key(rsc->id, RSC_DEMOTE, 0), NULL, rsc, generate_op_key(rsc->id, RSC_STOP, 0), NULL, pe_order_implies_first_master, data_set); custom_action_order(rsc, generate_op_key(rsc->id, RSC_START, 0), NULL, rsc, generate_op_key(rsc->id, RSC_PROMOTE, 0), NULL, pe_order_runnable_left, data_set); } if (is_stonith == FALSE && is_set(data_set->flags, pe_flag_enable_unfencing) && is_set(rsc->flags, pe_rsc_needs_unfencing) && is_not_set(rsc->flags, pe_rsc_have_unfencing)) { /* Check if the node needs to be unfenced first */ node_t *node = NULL; GHashTableIter iter; if(rsc != top) { /* Only create these constraints once, rsc is almost certainly cloned */ clear_bit_recursive(top, pe_rsc_have_unfencing); } g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { action_t *unfence = pe_fence_op(node, "on", TRUE, data_set); custom_action_order(top, generate_op_key(top->id, top == rsc?RSC_STOP:RSC_STOPPED, 0), NULL, NULL, strdup(unfence->uuid), unfence, pe_order_optional, data_set); crm_debug("Stopping %s prior to unfencing %s", top->id, unfence->uuid); custom_action_order(NULL, strdup(unfence->uuid), unfence, top, generate_op_key(top->id, RSC_START, 0), NULL, pe_order_implies_then_on_node, data_set); } } if (is_not_set(rsc->flags, pe_rsc_managed)) { pe_rsc_trace(rsc, "Skipping fencing constraints for unmanaged resource: %s", rsc->id); return; } { action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set); custom_action_order(rsc, stop_key(rsc), NULL, NULL, strdup(all_stopped->task), all_stopped, pe_order_implies_then | pe_order_runnable_left, data_set); } if (g_hash_table_size(rsc->utilization) > 0 && safe_str_neq(data_set->placement_strategy, "default")) { GHashTableIter iter; node_t *next = NULL; GListPtr gIter = NULL; pe_rsc_trace(rsc, "Creating utilization constraints for %s - strategy: %s", rsc->id, data_set->placement_strategy); for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { node_t *current = (node_t *) gIter->data; char *load_stopped_task = crm_concat(LOAD_STOPPED, current->details->uname, '_'); action_t *load_stopped = get_pseudo_op(load_stopped_task, data_set); if (load_stopped->node == NULL) { load_stopped->node = node_copy(current); update_action_flags(load_stopped, pe_action_optional | pe_action_clear); } custom_action_order(rsc, stop_key(rsc), NULL, NULL, load_stopped_task, load_stopped, pe_order_load, data_set); } g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&next)) { char *load_stopped_task = crm_concat(LOAD_STOPPED, next->details->uname, '_'); action_t *load_stopped = get_pseudo_op(load_stopped_task, data_set); if (load_stopped->node == NULL) { load_stopped->node = node_copy(next); update_action_flags(load_stopped, pe_action_optional | pe_action_clear); } custom_action_order(NULL, strdup(load_stopped_task), load_stopped, rsc, start_key(rsc), NULL, pe_order_load, data_set); custom_action_order(NULL, strdup(load_stopped_task), load_stopped, rsc, generate_op_key(rsc->id, RSC_MIGRATE, 0), NULL, pe_order_load, data_set); free(load_stopped_task); } } if (rsc->container) { resource_t *remote_rsc = NULL; /* find out if the container is associated with remote node connection resource */ if (rsc->container->is_remote_node) { remote_rsc = rsc->container; } else if (rsc->is_remote_node == FALSE) { remote_rsc = rsc_contains_remote_node(data_set, rsc->container); } /* if the container is a remote-node, force the resource within the container * instead of colocating the resource with the container. */ if (remote_rsc) { GHashTableIter iter; node_t *node = NULL; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if (node->details->remote_rsc != remote_rsc) { node->weight = -INFINITY; } } } else { crm_trace("Generating order and colocation rules for rsc %s with container %s", rsc->id, rsc->container->id); custom_action_order(rsc->container, generate_op_key(rsc->container->id, RSC_START, 0), NULL, rsc, generate_op_key(rsc->id, RSC_START, 0), NULL, pe_order_implies_then | pe_order_runnable_left, data_set); custom_action_order(rsc, generate_op_key(rsc->id, RSC_STOP, 0), NULL, rsc->container, generate_op_key(rsc->container->id, RSC_STOP, 0), NULL, pe_order_implies_first, data_set); rsc_colocation_new("resource-with-containter", NULL, INFINITY, rsc, rsc->container, NULL, NULL, data_set); } } if (rsc->is_remote_node || is_stonith) { /* don't allow remote nodes to run stonith devices * or remote connection resources.*/ rsc_avoids_remote_nodes(rsc); } /* If this rsc is a remote connection resource associated * with a container ( which will most likely be a virtual guest ) * do not allow the container to live on any remote-nodes. * remote-nodes managing nested remote-nodes should not be allowed. */ if (rsc->is_remote_node && rsc->container) { rsc_avoids_remote_nodes(rsc->container); } } void native_rsc_colocation_lh(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint) { if (rsc_lh == NULL) { pe_err("rsc_lh was NULL for %s", constraint->id); return; } else if (constraint->rsc_rh == NULL) { pe_err("rsc_rh was NULL for %s", constraint->id); return; } pe_rsc_trace(rsc_lh, "Processing colocation constraint between %s and %s", rsc_lh->id, rsc_rh->id); rsc_rh->cmds->rsc_colocation_rh(rsc_lh, rsc_rh, constraint); } enum filter_colocation_res { influence_nothing = 0, influence_rsc_location, influence_rsc_priority, }; static enum filter_colocation_res filter_colocation_constraint(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint) { if (constraint->score == 0) { return influence_nothing; } /* rh side must be allocated before we can process constraint */ if (is_set(rsc_rh->flags, pe_rsc_provisional)) { return influence_nothing; } if ((constraint->role_lh >= RSC_ROLE_SLAVE) && rsc_lh->parent && rsc_lh->parent->variant == pe_master && is_not_set(rsc_lh->flags, pe_rsc_provisional)) { /* LH and RH resources have already been allocated, place the correct * priority oh LH rsc for the given multistate resource role */ return influence_rsc_priority; } if (is_not_set(rsc_lh->flags, pe_rsc_provisional)) { /* error check */ struct node_shared_s *details_lh; struct node_shared_s *details_rh; if ((constraint->score > -INFINITY) && (constraint->score < INFINITY)) { return influence_nothing; } details_rh = rsc_rh->allocated_to ? rsc_rh->allocated_to->details : NULL; details_lh = rsc_lh->allocated_to ? rsc_lh->allocated_to->details : NULL; if (constraint->score == INFINITY && details_lh != details_rh) { crm_err("%s and %s are both allocated" " but to different nodes: %s vs. %s", rsc_lh->id, rsc_rh->id, details_lh ? details_lh->uname : "n/a", details_rh ? details_rh->uname : "n/a"); } else if (constraint->score == -INFINITY && details_lh == details_rh) { crm_err("%s and %s are both allocated" " but to the SAME node: %s", rsc_lh->id, rsc_rh->id, details_rh ? details_rh->uname : "n/a"); } return influence_nothing; } if (constraint->score > 0 && constraint->role_lh != RSC_ROLE_UNKNOWN && constraint->role_lh != rsc_lh->next_role) { crm_trace("LH: Skipping constraint: \"%s\" state filter nextrole is %s", role2text(constraint->role_lh), role2text(rsc_lh->next_role)); return influence_nothing; } if (constraint->score > 0 && constraint->role_rh != RSC_ROLE_UNKNOWN && constraint->role_rh != rsc_rh->next_role) { crm_trace("RH: Skipping constraint: \"%s\" state filter", role2text(constraint->role_rh)); return FALSE; } if (constraint->score < 0 && constraint->role_lh != RSC_ROLE_UNKNOWN && constraint->role_lh == rsc_lh->next_role) { crm_trace("LH: Skipping -ve constraint: \"%s\" state filter", role2text(constraint->role_lh)); return influence_nothing; } if (constraint->score < 0 && constraint->role_rh != RSC_ROLE_UNKNOWN && constraint->role_rh == rsc_rh->next_role) { crm_trace("RH: Skipping -ve constraint: \"%s\" state filter", role2text(constraint->role_rh)); return influence_nothing; } return influence_rsc_location; } static void influence_priority(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint) { const char *rh_value = NULL; const char *lh_value = NULL; const char *attribute = "#id"; int score_multiplier = 1; if (constraint->node_attribute != NULL) { attribute = constraint->node_attribute; } if (!rsc_rh->allocated_to || !rsc_lh->allocated_to) { return; } lh_value = g_hash_table_lookup(rsc_lh->allocated_to->details->attrs, attribute); rh_value = g_hash_table_lookup(rsc_rh->allocated_to->details->attrs, attribute); if (!safe_str_eq(lh_value, rh_value)) { if(constraint->score == INFINITY && constraint->role_lh == RSC_ROLE_MASTER) { rsc_lh->priority = -INFINITY; } return; } if (constraint->role_rh && (constraint->role_rh != rsc_rh->next_role)) { return; } if (constraint->role_lh == RSC_ROLE_SLAVE) { score_multiplier = -1; } rsc_lh->priority = merge_weights(score_multiplier * constraint->score, rsc_lh->priority); } static void colocation_match(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint) { const char *tmp = NULL; const char *value = NULL; const char *attribute = "#id"; GHashTable *work = NULL; gboolean do_check = FALSE; GHashTableIter iter; node_t *node = NULL; if (constraint->node_attribute != NULL) { attribute = constraint->node_attribute; } if (rsc_rh->allocated_to) { value = g_hash_table_lookup(rsc_rh->allocated_to->details->attrs, attribute); do_check = TRUE; } else if (constraint->score < 0) { /* nothing to do: * anti-colocation with something thats not running */ return; } work = node_hash_dup(rsc_lh->allowed_nodes); g_hash_table_iter_init(&iter, work); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { tmp = g_hash_table_lookup(node->details->attrs, attribute); if (do_check && safe_str_eq(tmp, value)) { if (constraint->score < INFINITY) { pe_rsc_trace(rsc_lh, "%s: %s.%s += %d", constraint->id, rsc_lh->id, node->details->uname, constraint->score); node->weight = merge_weights(constraint->score, node->weight); } } else if (do_check == FALSE || constraint->score >= INFINITY) { pe_rsc_trace(rsc_lh, "%s: %s.%s -= %d (%s)", constraint->id, rsc_lh->id, node->details->uname, constraint->score, do_check ? "failed" : "unallocated"); node->weight = merge_weights(-constraint->score, node->weight); } } if (can_run_any(work) || constraint->score <= -INFINITY || constraint->score >= INFINITY) { g_hash_table_destroy(rsc_lh->allowed_nodes); rsc_lh->allowed_nodes = work; work = NULL; } else { static char score[33]; score2char_stack(constraint->score, score, sizeof(score)); pe_rsc_info(rsc_lh, "%s: Rolling back scores from %s (%d, %s)", rsc_lh->id, rsc_rh->id, do_check, score); } if (work) { g_hash_table_destroy(work); } } void native_rsc_colocation_rh(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint) { enum filter_colocation_res filter_results; CRM_ASSERT(rsc_lh); CRM_ASSERT(rsc_rh); filter_results = filter_colocation_constraint(rsc_lh, rsc_rh, constraint); pe_rsc_trace(rsc_lh, "%sColocating %s with %s (%s, weight=%d, filter=%d)", constraint->score >= 0 ? "" : "Anti-", rsc_lh->id, rsc_rh->id, constraint->id, constraint->score, filter_results); switch (filter_results) { case influence_rsc_priority: influence_priority(rsc_lh, rsc_rh, constraint); break; case influence_rsc_location: pe_rsc_trace(rsc_lh, "%sColocating %s with %s (%s, weight=%d)", constraint->score >= 0 ? "" : "Anti-", rsc_lh->id, rsc_rh->id, constraint->id, constraint->score); colocation_match(rsc_lh, rsc_rh, constraint); break; case influence_nothing: default: return; } } static gboolean filter_rsc_ticket(resource_t * rsc_lh, rsc_ticket_t * rsc_ticket) { if (rsc_ticket->role_lh != RSC_ROLE_UNKNOWN && rsc_ticket->role_lh != rsc_lh->role) { pe_rsc_trace(rsc_lh, "LH: Skipping constraint: \"%s\" state filter", role2text(rsc_ticket->role_lh)); return FALSE; } return TRUE; } void rsc_ticket_constraint(resource_t * rsc_lh, rsc_ticket_t * rsc_ticket, pe_working_set_t * data_set) { if (rsc_ticket == NULL) { pe_err("rsc_ticket was NULL"); return; } if (rsc_lh == NULL) { pe_err("rsc_lh was NULL for %s", rsc_ticket->id); return; } if (rsc_ticket->ticket->granted && rsc_ticket->ticket->standby == FALSE) { return; } if (rsc_lh->children) { GListPtr gIter = rsc_lh->children; pe_rsc_trace(rsc_lh, "Processing ticket dependencies from %s", rsc_lh->id); for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; rsc_ticket_constraint(child_rsc, rsc_ticket, data_set); } return; } pe_rsc_trace(rsc_lh, "%s: Processing ticket dependency on %s (%s, %s)", rsc_lh->id, rsc_ticket->ticket->id, rsc_ticket->id, role2text(rsc_ticket->role_lh)); if (rsc_ticket->ticket->granted == FALSE && g_list_length(rsc_lh->running_on) > 0) { GListPtr gIter = NULL; switch (rsc_ticket->loss_policy) { case loss_ticket_stop: resource_location(rsc_lh, NULL, -INFINITY, "__loss_of_ticket__", data_set); break; case loss_ticket_demote: /*Promotion score will be set to -INFINITY in master_promotion_order() */ if (rsc_ticket->role_lh != RSC_ROLE_MASTER) { resource_location(rsc_lh, NULL, -INFINITY, "__loss_of_ticket__", data_set); } break; case loss_ticket_fence: if (filter_rsc_ticket(rsc_lh, rsc_ticket) == FALSE) { return; } resource_location(rsc_lh, NULL, -INFINITY, "__loss_of_ticket__", data_set); for (gIter = rsc_lh->running_on; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; pe_fence_node(data_set, node, "deadman ticket lost"); } break; case loss_ticket_freeze: if (filter_rsc_ticket(rsc_lh, rsc_ticket) == FALSE) { return; } if (g_list_length(rsc_lh->running_on) > 0) { clear_bit(rsc_lh->flags, pe_rsc_managed); set_bit(rsc_lh->flags, pe_rsc_block); } break; } } else if (rsc_ticket->ticket->granted == FALSE) { if (rsc_ticket->role_lh != RSC_ROLE_MASTER || rsc_ticket->loss_policy == loss_ticket_stop) { resource_location(rsc_lh, NULL, -INFINITY, "__no_ticket__", data_set); } } else if (rsc_ticket->ticket->standby) { if (rsc_ticket->role_lh != RSC_ROLE_MASTER || rsc_ticket->loss_policy == loss_ticket_stop) { resource_location(rsc_lh, NULL, -INFINITY, "__ticket_standby__", data_set); } } } enum pe_action_flags native_action_flags(action_t * action, node_t * node) { return action->flags; } enum pe_graph_flags native_update_actions(action_t * first, action_t * then, node_t * node, enum pe_action_flags flags, enum pe_action_flags filter, enum pe_ordering type) { /* flags == get_action_flags(first, then_node) called from update_action() */ enum pe_graph_flags changed = pe_graph_none; enum pe_action_flags then_flags = then->flags; enum pe_action_flags first_flags = first->flags; crm_trace( "Testing %s on %s (0x%.6x) with %s 0x%.6x %x %x", first->uuid, first->node ? first->node->details->uname : "[none]", first->flags, then->uuid, then->flags); if (type & pe_order_asymmetrical) { resource_t *then_rsc = then->rsc; enum rsc_role_e then_rsc_role = then_rsc ? then_rsc->fns->state(then_rsc, TRUE) : 0; if (!then_rsc) { /* ignore */ } else if ((then_rsc_role == RSC_ROLE_STOPPED) && safe_str_eq(then->task, RSC_STOP)) { /* ignore... if 'then' is supposed to be stopped after 'first', but * then is already stopped, there is nothing to be done when non-symmetrical. */ } else if ((then_rsc_role == RSC_ROLE_STARTED) && safe_str_eq(then->task, RSC_START)) { /* ignore... if 'then' is supposed to be started after 'first', but * then is already started, there is nothing to be done when non-symmetrical. */ } else if (!(first->flags & pe_action_runnable)) { /* prevent 'then' action from happening if 'first' is not runnable and * 'then' has not yet occurred. */ pe_clear_action_bit(then, pe_action_runnable); pe_clear_action_bit(then, pe_action_optional); pe_rsc_trace(then->rsc, "Unset optional and runnable on %s", then->uuid); } else { /* ignore... then is allowed to start/stop if it wants to. */ } } if (type & pe_order_implies_first) { if ((filter & pe_action_optional) && (flags & pe_action_optional) == 0) { pe_rsc_trace(first->rsc, "Unset optional on %s because of %s", first->uuid, then->uuid); pe_clear_action_bit(first, pe_action_optional); } if (is_set(flags, pe_action_migrate_runnable) && is_set(then->flags, pe_action_migrate_runnable) == FALSE && is_set(then->flags, pe_action_optional) == FALSE) { pe_rsc_trace(first->rsc, "Unset migrate runnable on %s because of %s", first->uuid, then->uuid); pe_clear_action_bit(first, pe_action_migrate_runnable); } } if (type & pe_order_implies_first_master) { if ((filter & pe_action_optional) && ((then->flags & pe_action_optional) == FALSE) && then->rsc && (then->rsc->role == RSC_ROLE_MASTER)) { pe_clear_action_bit(first, pe_action_optional); if (is_set(first->flags, pe_action_migrate_runnable) && is_set(then->flags, pe_action_migrate_runnable) == FALSE) { pe_rsc_trace(first->rsc, "Unset migrate runnable on %s because of %s", first->uuid, then->uuid); pe_clear_action_bit(first, pe_action_migrate_runnable); } pe_rsc_trace(then->rsc, "Unset optional on %s because of %s", first->uuid, then->uuid); } } if ((type & pe_order_implies_first_migratable) && is_set(filter, pe_action_optional)) { if (((then->flags & pe_action_migrate_runnable) == FALSE) || ((then->flags & pe_action_runnable) == FALSE)) { pe_rsc_trace(then->rsc, "Unset runnable on %s because %s is neither runnable or migratable", first->uuid, then->uuid); pe_clear_action_bit(first, pe_action_runnable); } if ((then->flags & pe_action_optional) == 0) { pe_rsc_trace(then->rsc, "Unset optional on %s because %s is not optional", first->uuid, then->uuid); pe_clear_action_bit(first, pe_action_optional); } } if ((type & pe_order_pseudo_left) && is_set(filter, pe_action_optional)) { if ((first->flags & pe_action_runnable) == FALSE) { pe_clear_action_bit(then, pe_action_migrate_runnable); pe_clear_action_bit(then, pe_action_pseudo); pe_rsc_trace(then->rsc, "Unset pseudo on %s because %s is not runnable", then->uuid, first->uuid); } } if (is_set(type, pe_order_runnable_left) && is_set(filter, pe_action_runnable) && is_set(then->flags, pe_action_runnable) && is_set(flags, pe_action_runnable) == FALSE) { pe_rsc_trace(then->rsc, "Unset runnable on %s because of %s", then->uuid, first->uuid); pe_clear_action_bit(then, pe_action_runnable); pe_clear_action_bit(then, pe_action_migrate_runnable); } if (is_set(type, pe_order_implies_then) && is_set(filter, pe_action_optional) && is_set(then->flags, pe_action_optional) && is_set(flags, pe_action_optional) == FALSE) { /* in this case, treat migrate_runnable as if first is optional */ if (is_set(first->flags, pe_action_migrate_runnable) == FALSE) { pe_rsc_trace(then->rsc, "Unset optional on %s because of %s", then->uuid, first->uuid); pe_clear_action_bit(then, pe_action_optional); } } if (is_set(type, pe_order_restart)) { const char *reason = NULL; CRM_ASSERT(first->rsc && first->rsc->variant == pe_native); CRM_ASSERT(then->rsc && then->rsc->variant == pe_native); if ((filter & pe_action_runnable) && (then->flags & pe_action_runnable) == 0 && (then->rsc->flags & pe_rsc_managed)) { reason = "shutdown"; } if ((filter & pe_action_optional) && (then->flags & pe_action_optional) == 0) { reason = "recover"; } if (reason && is_set(first->flags, pe_action_optional)) { if (is_set(first->flags, pe_action_runnable) || is_not_set(then->flags, pe_action_optional)) { pe_rsc_trace(first->rsc, "Handling %s: %s -> %s", reason, first->uuid, then->uuid); pe_clear_action_bit(first, pe_action_optional); } } if (reason && is_not_set(first->flags, pe_action_optional) && is_not_set(first->flags, pe_action_runnable)) { pe_rsc_trace(then->rsc, "Handling %s: %s -> %s", reason, first->uuid, then->uuid); pe_clear_action_bit(then, pe_action_runnable); } if (reason && is_not_set(first->flags, pe_action_optional) && is_set(first->flags, pe_action_migrate_runnable) && is_not_set(then->flags, pe_action_migrate_runnable)) { pe_clear_action_bit(first, pe_action_migrate_runnable); } } if (then_flags != then->flags) { changed |= pe_graph_updated_then; pe_rsc_trace(then->rsc, "Then: Flags for %s on %s are now 0x%.6x (was 0x%.6x) because of %s 0x%.6x", then->uuid, then->node ? then->node->details->uname : "[none]", then->flags, then_flags, first->uuid, first->flags); if(then->rsc && then->rsc->parent) { /* "X_stop then X_start" doesn't get handled for cloned groups unless we do this */ update_action(then); } } if (first_flags != first->flags) { changed |= pe_graph_updated_first; pe_rsc_trace(first->rsc, "First: Flags for %s on %s are now 0x%.6x (was 0x%.6x) because of %s 0x%.6x", first->uuid, first->node ? first->node->details->uname : "[none]", first->flags, first_flags, then->uuid, then->flags); } return changed; } void native_rsc_location(resource_t * rsc, rsc_to_node_t * constraint) { GListPtr gIter = NULL; GHashTableIter iter; node_t *node = NULL; if (constraint == NULL) { pe_err("Constraint is NULL"); return; } else if (rsc == NULL) { pe_err("LHS of rsc_to_node (%s) is NULL", constraint->id); return; } pe_rsc_trace(rsc, "Applying %s (%s) to %s", constraint->id, role2text(constraint->role_filter), rsc->id); /* take "lifetime" into account */ if (constraint->role_filter > RSC_ROLE_UNKNOWN && constraint->role_filter != rsc->next_role) { pe_rsc_debug(rsc, "Constraint (%s) is not active (role : %s vs. %s)", constraint->id, role2text(constraint->role_filter), role2text(rsc->next_role)); return; } else if (is_active(constraint) == FALSE) { pe_rsc_trace(rsc, "Constraint (%s) is not active", constraint->id); return; } if (constraint->node_list_rh == NULL) { pe_rsc_trace(rsc, "RHS of constraint %s is NULL", constraint->id); return; } for (gIter = constraint->node_list_rh; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; node_t *other_node = NULL; other_node = (node_t *) pe_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (other_node != NULL) { pe_rsc_trace(rsc, "%s + %s: %d + %d", node->details->uname, other_node->details->uname, node->weight, other_node->weight); other_node->weight = merge_weights(other_node->weight, node->weight); } else { - node_t *new_node = node_copy(node); + other_node = node_copy(node); - g_hash_table_insert(rsc->allowed_nodes, (gpointer) new_node->details->id, new_node); + g_hash_table_insert(rsc->allowed_nodes, (gpointer) other_node->details->id, other_node); + } + + if (other_node->rsc_discover_mode < constraint->discover_mode) { + /* exclusive > never > always... always is default */ + other_node->rsc_discover_mode = constraint->discover_mode; } } g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { pe_rsc_trace(rsc, "%s + %s : %d", rsc->id, node->details->uname, node->weight); } } void native_expand(resource_t * rsc, pe_working_set_t * data_set) { GListPtr gIter = NULL; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "Processing actions from %s", rsc->id); for (gIter = rsc->actions; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; crm_trace("processing action %d for rsc=%s", action->id, rsc->id); graph_element_from_action(action, data_set); } for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; child_rsc->cmds->expand(child_rsc, data_set); } } #define log_change(fmt, args...) do { \ if(terminal) { \ printf(" * "fmt"\n", ##args); \ } else { \ crm_notice(fmt, ##args); \ } \ } while(0) #define STOP_SANITY_ASSERT(lineno) do { \ if(current && current->details->unclean) { \ /* It will be a pseduo op */ \ } else if(stop == NULL) { \ crm_err("%s:%d: No stop action exists for %s", __FUNCTION__, lineno, rsc->id); \ CRM_ASSERT(stop != NULL); \ } else if(is_set(stop->flags, pe_action_optional)) { \ crm_err("%s:%d: Action %s is still optional", __FUNCTION__, lineno, stop->uuid); \ CRM_ASSERT(is_not_set(stop->flags, pe_action_optional)); \ } \ } while(0) void LogActions(resource_t * rsc, pe_working_set_t * data_set, gboolean terminal) { node_t *next = NULL; node_t *current = NULL; action_t *stop = NULL; action_t *start = NULL; action_t *demote = NULL; action_t *promote = NULL; char *key = NULL; gboolean moving = FALSE; GListPtr possible_matches = NULL; if (rsc->children) { GListPtr gIter = NULL; for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; LogActions(child_rsc, data_set, terminal); } return; } next = rsc->allocated_to; if (rsc->running_on) { if (g_list_length(rsc->running_on) > 1 && rsc->partial_migration_source) { current = rsc->partial_migration_source; } else { current = rsc->running_on->data; } if (rsc->role == RSC_ROLE_STOPPED) { /* * This can occur when resources are being recovered * We fiddle with the current role in native_create_actions() */ rsc->role = RSC_ROLE_STARTED; } } if (current == NULL && is_set(rsc->flags, pe_rsc_orphan)) { /* Don't log stopped orphans */ return; } if (is_not_set(rsc->flags, pe_rsc_managed) || (current == NULL && next == NULL)) { pe_rsc_info(rsc, "Leave %s\t(%s%s)", rsc->id, role2text(rsc->role), is_not_set(rsc->flags, pe_rsc_managed) ? " unmanaged" : ""); return; } if (current != NULL && next != NULL && safe_str_neq(current->details->id, next->details->id)) { moving = TRUE; } key = start_key(rsc); possible_matches = find_actions(rsc->actions, key, next); free(key); if (possible_matches) { start = possible_matches->data; g_list_free(possible_matches); } key = stop_key(rsc); if(start == NULL || is_set(start->flags, pe_action_runnable) == FALSE) { possible_matches = find_actions(rsc->actions, key, NULL); } else { possible_matches = find_actions(rsc->actions, key, current); } free(key); if (possible_matches) { stop = possible_matches->data; g_list_free(possible_matches); } key = promote_key(rsc); possible_matches = find_actions(rsc->actions, key, next); free(key); if (possible_matches) { promote = possible_matches->data; g_list_free(possible_matches); } key = demote_key(rsc); possible_matches = find_actions(rsc->actions, key, next); free(key); if (possible_matches) { demote = possible_matches->data; g_list_free(possible_matches); } if (rsc->role == rsc->next_role) { action_t *migrate_to = NULL; key = generate_op_key(rsc->id, RSC_MIGRATED, 0); possible_matches = find_actions(rsc->actions, key, next); free(key); if (possible_matches) { migrate_to = possible_matches->data; } CRM_CHECK(next != NULL,); if (next == NULL) { } else if (migrate_to && is_set(migrate_to->flags, pe_action_runnable) && current) { log_change("Migrate %s\t(%s %s -> %s)", rsc->id, role2text(rsc->role), current->details->uname, next->details->uname); } else if (is_set(rsc->flags, pe_rsc_reload)) { log_change("Reload %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } else if (start == NULL || is_set(start->flags, pe_action_optional)) { pe_rsc_info(rsc, "Leave %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } else if (start && is_set(start->flags, pe_action_runnable) == FALSE) { log_change("Stop %s\t(%s %s%s)", rsc->id, role2text(rsc->role), current?current->details->uname:"N/A", stop && is_not_set(stop->flags, pe_action_runnable) ? " - blocked" : ""); STOP_SANITY_ASSERT(__LINE__); } else if (moving && current) { log_change("%s %s\t(%s %s -> %s)", is_set(rsc->flags, pe_rsc_failed) ? "Recover" : "Move ", rsc->id, role2text(rsc->role), current->details->uname, next->details->uname); } else if (is_set(rsc->flags, pe_rsc_failed)) { log_change("Recover %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); STOP_SANITY_ASSERT(__LINE__); } else { log_change("Restart %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); /* STOP_SANITY_ASSERT(__LINE__); False positive for migrate-fail-7 */ } g_list_free(possible_matches); return; } if (rsc->role > RSC_ROLE_SLAVE && rsc->role > rsc->next_role) { CRM_CHECK(current != NULL,); if (current != NULL) { gboolean allowed = FALSE; if (demote != NULL && (demote->flags & pe_action_runnable)) { allowed = TRUE; } log_change("Demote %s\t(%s -> %s %s%s)", rsc->id, role2text(rsc->role), role2text(rsc->next_role), current->details->uname, allowed ? "" : " - blocked"); if (stop != NULL && is_not_set(stop->flags, pe_action_optional) && rsc->next_role > RSC_ROLE_STOPPED && moving == FALSE) { if (is_set(rsc->flags, pe_rsc_failed)) { log_change("Recover %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); STOP_SANITY_ASSERT(__LINE__); } else if (is_set(rsc->flags, pe_rsc_reload)) { log_change("Reload %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } else { log_change("Restart %s\t(%s %s)", rsc->id, role2text(rsc->next_role), next->details->uname); STOP_SANITY_ASSERT(__LINE__); } } } } else if (rsc->next_role == RSC_ROLE_STOPPED) { GListPtr gIter = NULL; CRM_CHECK(current != NULL,); key = stop_key(rsc); for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; action_t *stop_op = NULL; gboolean allowed = FALSE; possible_matches = find_actions(rsc->actions, key, node); if (possible_matches) { stop_op = possible_matches->data; g_list_free(possible_matches); } if (stop_op && (stop_op->flags & pe_action_runnable)) { STOP_SANITY_ASSERT(__LINE__); allowed = TRUE; } log_change("Stop %s\t(%s%s)", rsc->id, node->details->uname, allowed ? "" : " - blocked"); } free(key); } if (moving) { log_change("Move %s\t(%s %s -> %s)", rsc->id, role2text(rsc->next_role), current->details->uname, next->details->uname); STOP_SANITY_ASSERT(__LINE__); } if (rsc->role == RSC_ROLE_STOPPED) { gboolean allowed = FALSE; if (start && (start->flags & pe_action_runnable)) { allowed = TRUE; } CRM_CHECK(next != NULL,); if (next != NULL) { log_change("Start %s\t(%s%s)", rsc->id, next->details->uname, allowed ? "" : " - blocked"); } if (allowed == FALSE) { return; } } if (rsc->next_role > RSC_ROLE_SLAVE && rsc->role < rsc->next_role) { gboolean allowed = FALSE; CRM_LOG_ASSERT(next); if (stop != NULL && is_not_set(stop->flags, pe_action_optional) && rsc->role > RSC_ROLE_STOPPED) { if (is_set(rsc->flags, pe_rsc_failed)) { log_change("Recover %s\t(%s %s)", rsc->id, role2text(rsc->role), next?next->details->uname:NULL); STOP_SANITY_ASSERT(__LINE__); } else if (is_set(rsc->flags, pe_rsc_reload)) { log_change("Reload %s\t(%s %s)", rsc->id, role2text(rsc->role), next?next->details->uname:NULL); STOP_SANITY_ASSERT(__LINE__); } else { log_change("Restart %s\t(%s %s)", rsc->id, role2text(rsc->role), next?next->details->uname:NULL); STOP_SANITY_ASSERT(__LINE__); } } if (promote && (promote->flags & pe_action_runnable)) { allowed = TRUE; } log_change("Promote %s\t(%s -> %s %s%s)", rsc->id, role2text(rsc->role), role2text(rsc->next_role), next?next->details->uname:NULL, allowed ? "" : " - blocked"); } } gboolean StopRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set) { GListPtr gIter = NULL; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "%s", rsc->id); for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { node_t *current = (node_t *) gIter->data; action_t *stop; if (rsc->partial_migration_target) { if (rsc->partial_migration_target->details == current->details) { pe_rsc_trace(rsc, "Filtered %s -> %s %s", current->details->uname, next->details->uname, rsc->id); continue; } else { pe_rsc_trace(rsc, "Forced on %s %s", current->details->uname, rsc->id); optional = FALSE; } } pe_rsc_trace(rsc, "%s on %s", rsc->id, current->details->uname); stop = stop_action(rsc, current, optional); if (is_not_set(rsc->flags, pe_rsc_managed)) { update_action_flags(stop, pe_action_runnable | pe_action_clear); } if (is_set(data_set->flags, pe_flag_remove_after_stop)) { DeleteRsc(rsc, current, optional, data_set); } } return TRUE; } gboolean StartRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set) { action_t *start = NULL; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "%s on %s %d", rsc->id, next ? next->details->uname : "N/A", optional); start = start_action(rsc, next, TRUE); if (is_set(start->flags, pe_action_runnable) && optional == FALSE) { update_action_flags(start, pe_action_optional | pe_action_clear); } return TRUE; } gboolean PromoteRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set) { char *key = NULL; GListPtr gIter = NULL; gboolean runnable = TRUE; GListPtr action_list = NULL; CRM_ASSERT(rsc); CRM_CHECK(next != NULL, return FALSE); pe_rsc_trace(rsc, "%s on %s", rsc->id, next->details->uname); key = start_key(rsc); action_list = find_actions_exact(rsc->actions, key, next); free(key); for (gIter = action_list; gIter != NULL; gIter = gIter->next) { action_t *start = (action_t *) gIter->data; if (is_set(start->flags, pe_action_runnable) == FALSE) { runnable = FALSE; } } g_list_free(action_list); if (runnable) { promote_action(rsc, next, optional); return TRUE; } pe_rsc_debug(rsc, "%s\tPromote %s (canceled)", next->details->uname, rsc->id); key = promote_key(rsc); action_list = find_actions_exact(rsc->actions, key, next); free(key); for (gIter = action_list; gIter != NULL; gIter = gIter->next) { action_t *promote = (action_t *) gIter->data; update_action_flags(promote, pe_action_runnable | pe_action_clear); } g_list_free(action_list); return TRUE; } gboolean DemoteRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set) { GListPtr gIter = NULL; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "%s", rsc->id); /* CRM_CHECK(rsc->next_role == RSC_ROLE_SLAVE, return FALSE); */ for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { node_t *current = (node_t *) gIter->data; pe_rsc_trace(rsc, "%s on %s", rsc->id, next ? next->details->uname : "N/A"); demote_action(rsc, current, optional); } return TRUE; } gboolean RoleError(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set) { CRM_ASSERT(rsc); crm_err("%s on %s", rsc->id, next ? next->details->uname : "N/A"); CRM_CHECK(FALSE, return FALSE); return FALSE; } gboolean NullOp(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set) { CRM_ASSERT(rsc); pe_rsc_trace(rsc, "%s", rsc->id); return FALSE; } gboolean DeleteRsc(resource_t * rsc, node_t * node, gboolean optional, pe_working_set_t * data_set) { if (is_set(rsc->flags, pe_rsc_failed)) { pe_rsc_trace(rsc, "Resource %s not deleted from %s: failed", rsc->id, node->details->uname); return FALSE; } else if (node == NULL) { pe_rsc_trace(rsc, "Resource %s not deleted: NULL node", rsc->id); return FALSE; } else if (node->details->unclean || node->details->online == FALSE) { pe_rsc_trace(rsc, "Resource %s not deleted from %s: unrunnable", rsc->id, node->details->uname); return FALSE; } crm_notice("Removing %s from %s", rsc->id, node->details->uname); delete_action(rsc, node, optional); new_rsc_order(rsc, RSC_STOP, rsc, RSC_DELETE, optional ? pe_order_implies_then : pe_order_optional, data_set); new_rsc_order(rsc, RSC_DELETE, rsc, RSC_START, optional ? pe_order_implies_then : pe_order_optional, data_set); return TRUE; } #include <../lib/pengine/unpack.h> #define set_char(x) last_rsc_id[lpc] = x; complete = TRUE; static char * increment_clone(char *last_rsc_id) { int lpc = 0; int len = 0; char *tmp = NULL; gboolean complete = FALSE; CRM_CHECK(last_rsc_id != NULL, return NULL); if (last_rsc_id != NULL) { len = strlen(last_rsc_id); } lpc = len - 1; while (complete == FALSE && lpc > 0) { switch (last_rsc_id[lpc]) { case 0: lpc--; break; case '0': set_char('1'); break; case '1': set_char('2'); break; case '2': set_char('3'); break; case '3': set_char('4'); break; case '4': set_char('5'); break; case '5': set_char('6'); break; case '6': set_char('7'); break; case '7': set_char('8'); break; case '8': set_char('9'); break; case '9': last_rsc_id[lpc] = '0'; lpc--; break; case ':': tmp = last_rsc_id; last_rsc_id = calloc(1, len + 2); memcpy(last_rsc_id, tmp, len); last_rsc_id[++lpc] = '1'; last_rsc_id[len] = '0'; last_rsc_id[len + 1] = 0; complete = TRUE; free(tmp); break; default: crm_err("Unexpected char: %c (%d)", last_rsc_id[lpc], lpc); return NULL; break; } } return last_rsc_id; } static node_t * probe_grouped_clone(resource_t * rsc, node_t * node, pe_working_set_t * data_set) { node_t *running = NULL; resource_t *top = uber_parent(rsc); if (running == NULL && is_set(top->flags, pe_rsc_unique) == FALSE) { /* Annoyingly we also need to check any other clone instances * Clumsy, but it will work. * * An alternative would be to update known_on for every peer * during process_rsc_state() * * This code desperately needs optimization * ptest -x with 100 nodes, 100 clones and clone-max=10: * No probes O(25s) * Detection without clone loop O(3m) * Detection with clone loop O(8m) ptest[32211]: 2010/02/18_14:27:55 CRIT: stage5: Probing for unknown resources ptest[32211]: 2010/02/18_14:33:39 CRIT: stage5: Done ptest[32211]: 2010/02/18_14:35:05 CRIT: stage7: Updating action states ptest[32211]: 2010/02/18_14:35:05 CRIT: stage7: Done */ char *clone_id = clone_zero(rsc->id); resource_t *peer = pe_find_resource(top->children, clone_id); while (peer && running == NULL) { running = pe_hash_table_lookup(peer->known_on, node->details->id); if (running != NULL) { /* we already know the status of the resource on this node */ pe_rsc_trace(rsc, "Skipping active clone: %s", rsc->id); free(clone_id); return running; } clone_id = increment_clone(clone_id); peer = pe_find_resource(data_set->resources, clone_id); } free(clone_id); } return running; } gboolean native_create_probe(resource_t * rsc, node_t * node, action_t * complete, gboolean force, pe_working_set_t * data_set) { char *key = NULL; action_t *probe = NULL; node_t *running = NULL; + node_t *allowed = NULL; resource_t *top = uber_parent(rsc); static const char *rc_master = NULL; static const char *rc_inactive = NULL; if (rc_inactive == NULL) { rc_inactive = crm_itoa(PCMK_OCF_NOT_RUNNING); rc_master = crm_itoa(PCMK_OCF_RUNNING_MASTER); } CRM_CHECK(node != NULL, return FALSE); if (force == FALSE && is_not_set(data_set->flags, pe_flag_startup_probes)) { pe_rsc_trace(rsc, "Skipping active resource detection for %s", rsc->id); return FALSE; } else if (force == FALSE && is_container_remote_node(node)) { pe_rsc_trace(rsc, "Skipping active resource detection for %s on container %s", rsc->id, node->details->id); return FALSE; } if (is_remote_node(node)) { const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); if (safe_str_eq(class, "stonith")) { pe_rsc_trace(rsc, "Skipping probe for %s on node %s, remote-nodes do not run stonith agents.", rsc->id, node->details->id); return FALSE; } else if (rsc_contains_remote_node(data_set, rsc)) { pe_rsc_trace(rsc, "Skipping probe for %s on node %s, remote-nodes can not run resources that contain connection resources.", rsc->id, node->details->id); return FALSE; } else if (rsc->is_remote_node) { pe_rsc_trace(rsc, "Skipping probe for %s on node %s, remote-nodes can not run connection resources", rsc->id, node->details->id); return FALSE; } } if (rsc->children) { GListPtr gIter = NULL; gboolean any_created = FALSE; for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; any_created = child_rsc->cmds->create_probe(child_rsc, node, complete, force, data_set) || any_created; } return any_created; } else if (rsc->container) { pe_rsc_trace(rsc, "Skipping %s: it is within container %s", rsc->id, rsc->container->id); return FALSE; } if (is_set(rsc->flags, pe_rsc_orphan)) { pe_rsc_trace(rsc, "Skipping orphan: %s", rsc->id); return FALSE; } running = g_hash_table_lookup(rsc->known_on, node->details->id); if (running == NULL && is_set(rsc->flags, pe_rsc_unique) == FALSE) { /* Anonymous clones */ if (rsc->parent == top) { running = g_hash_table_lookup(rsc->parent->known_on, node->details->id); } else { /* Grouped anonymous clones need extra special handling */ running = probe_grouped_clone(rsc, node, data_set); } } if (force == FALSE && running != NULL) { /* we already know the status of the resource on this node */ pe_rsc_trace(rsc, "Skipping active: %s on %s", rsc->id, node->details->uname); return FALSE; } + allowed = g_hash_table_lookup(rsc->allowed_nodes, node->details->id); + if (rsc->exclusive_discover || top->exclusive_discover) { + if (allowed == NULL) { + /* exclusive discover is enabled and this node is not in the allowed list. */ + return FALSE; + } else if (allowed->rsc_discover_mode != discover_exclusive) { + /* exclusive discover is enabled and this node is not marked + * as a node this resource should be discovered on */ + return FALSE; + } + } + if (allowed && allowed->rsc_discover_mode == discover_never) { + /* this resource is marked as not needing to be discovered on this node */ + return FALSE; + } + + key = generate_op_key(rsc->id, RSC_STATUS, 0); probe = custom_action(rsc, key, RSC_STATUS, node, FALSE, TRUE, data_set); update_action_flags(probe, pe_action_optional | pe_action_clear); /* If enabled, require unfencing before probing any fence devices * but ensure it happens after any resources that require * unfencing have been probed. * * Doing it the other way (requiring unfencing after probing * resources that need it) would result in the node being * unfenced, and all its resources being stopped, whenever a new * resource is added. Which would be highly suboptimal. * * So essentially, at the point the fencing device(s) have been * probed, we know the state of all resources that require * unfencing and that unfencing occurred. */ if(is_set(rsc->flags, pe_rsc_fence_device) && is_set(data_set->flags, pe_flag_enable_unfencing)) { trigger_unfencing(NULL, node, "node discovery", probe, data_set); probe->priority = INFINITY; /* Ensure this runs if unfencing succeeds */ } else if(is_set(rsc->flags, pe_rsc_needs_unfencing)) { action_t *unfence = pe_fence_op(node, "on", TRUE, data_set); order_actions(probe, unfence, pe_order_optional); } /* * We need to know if it's running_on (not just known_on) this node * to correctly determine the target rc. */ running = pe_find_node_id(rsc->running_on, node->details->id); if (running == NULL) { add_hash_param(probe->meta, XML_ATTR_TE_TARGET_RC, rc_inactive); } else if (rsc->role == RSC_ROLE_MASTER) { add_hash_param(probe->meta, XML_ATTR_TE_TARGET_RC, rc_master); } pe_rsc_debug(rsc, "Probing %s on %s (%s)", rsc->id, node->details->uname, role2text(rsc->role)); if(is_set(rsc->flags, pe_rsc_fence_device) && is_set(data_set->flags, pe_flag_enable_unfencing)) { /* Normally rsc.start depends on probe complete which depends * on rsc.probe. But this can't be the case in this scenario as * it would create graph loops. * * So instead we explicitly order 'rsc.probe then rsc.start' */ custom_action_order(rsc, NULL, probe, rsc, generate_op_key(rsc->id, RSC_START, 0), NULL, pe_order_optional, data_set); } else { order_actions(probe, complete, pe_order_implies_then); } return TRUE; } static void native_start_constraints(resource_t * rsc, action_t * stonith_op, gboolean is_stonith, pe_working_set_t * data_set) { node_t *target = stonith_op ? stonith_op->node : NULL; GListPtr gIter = NULL; action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set); action_t *stonith_done = get_pseudo_op(STONITH_DONE, data_set); for (gIter = rsc->actions; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; if(action->needs == rsc_req_nothing) { } else if (action->needs == rsc_req_stonith) { order_actions(stonith_done, action, pe_order_optional); } else if (target != NULL && safe_str_eq(action->task, RSC_START) && NULL == pe_hash_table_lookup(rsc->known_on, target->details->id)) { /* if known == NULL, then we dont know if * the resource is active on the node * we're about to shoot * * in this case, regardless of action->needs, * the only safe option is to wait until * the node is shot before doing anything * to with the resource * * its analogous to waiting for all the probes * for rscX to complete before starting rscX * * the most likely explaination is that the * DC died and took its status with it */ pe_rsc_debug(rsc, "Ordering %s after %s recovery", action->uuid, target->details->uname); order_actions(all_stopped, action, pe_order_optional | pe_order_runnable_left); } } } static void native_stop_constraints(resource_t * rsc, action_t * stonith_op, gboolean is_stonith, pe_working_set_t * data_set) { char *key = NULL; GListPtr gIter = NULL; GListPtr action_list = NULL; resource_t *top = uber_parent(rsc); key = stop_key(rsc); action_list = find_actions(rsc->actions, key, stonith_op->node); free(key); /* add the stonith OP as a stop pre-req and the mark the stop * as a pseudo op - since its now redundant */ for (gIter = action_list; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; if (action->node->details->online && action->node->details->unclean == FALSE && is_set(rsc->flags, pe_rsc_failed)) { continue; } if (is_set(rsc->flags, pe_rsc_failed)) { crm_notice("Stop of failed resource %s is" " implicit after %s is fenced", rsc->id, action->node->details->uname); } else { crm_info("%s is implicit after %s is fenced", action->uuid, action->node->details->uname); } /* the stop would never complete and is * now implied by the stonith operation */ update_action_flags(action, pe_action_pseudo); update_action_flags(action, pe_action_runnable); update_action_flags(action, pe_action_implied_by_stonith); { enum pe_ordering flags = pe_order_optional; action_t *parent_stop = find_first_action(top->actions, NULL, RSC_STOP, NULL); if(stonith_op->node->details->remote_rsc) { flags |= pe_order_preserve; } order_actions(stonith_op, action, flags); order_actions(stonith_op, parent_stop, flags); } if (is_set(rsc->flags, pe_rsc_notify)) { /* Create a second notification that will be delivered * immediately after the node is fenced * * Basic problem: * - C is a clone active on the node to be shot and stopping on another * - R is a resource that depends on C * * + C.stop depends on R.stop * + C.stopped depends on STONITH * + C.notify depends on C.stopped * + C.healthy depends on C.notify * + R.stop depends on C.healthy * * The extra notification here changes * + C.healthy depends on C.notify * into: * + C.healthy depends on C.notify' * + C.notify' depends on STONITH' * thus breaking the loop */ notify_data_t *n_data = create_notification_boundaries(rsc, RSC_STOP, NULL, stonith_op, data_set); crm_info("Creating secondary notification for %s", action->uuid); collect_notification_data(rsc, TRUE, FALSE, n_data); g_hash_table_insert(n_data->keys, strdup("notify_stop_resource"), strdup(rsc->id)); g_hash_table_insert(n_data->keys, strdup("notify_stop_uname"), strdup(action->node->details->uname)); create_notifications(uber_parent(rsc), n_data, data_set); free_notification_data(n_data); } /* From Bug #1601, successful fencing must be an input to a failed resources stop action. However given group(rA, rB) running on nodeX and B.stop has failed, A := stop healthy resource (rA.stop) B := stop failed resource (pseudo operation B.stop) C := stonith nodeX A requires B, B requires C, C requires A This loop would prevent the cluster from making progress. This block creates the "C requires A" dependency and therefore must (at least for now) be disabled. Instead, run the block above and treat all resources on nodeX as B would be (marked as a pseudo op depending on the STONITH). TODO: Break the "A requires B" dependency in update_action() and re-enable this block } else if(is_stonith == FALSE) { crm_info("Moving healthy resource %s" " off %s before fencing", rsc->id, node->details->uname); * stop healthy resources before the * stonith op * custom_action_order( rsc, stop_key(rsc), NULL, NULL,strdup(CRM_OP_FENCE),stonith_op, pe_order_optional, data_set); */ } g_list_free(action_list); key = demote_key(rsc); action_list = find_actions(rsc->actions, key, stonith_op->node); free(key); for (gIter = action_list; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; if (action->node->details->online == FALSE || action->node->details->unclean == TRUE || is_set(rsc->flags, pe_rsc_failed)) { if (is_set(rsc->flags, pe_rsc_failed)) { pe_rsc_info(rsc, "Demote of failed resource %s is" " implict after %s is fenced", rsc->id, action->node->details->uname); } else { pe_rsc_info(rsc, "%s is implicit after %s is fenced", action->uuid, action->node->details->uname); } /* the stop would never complete and is * now implied by the stonith operation */ crm_trace("here - 1"); update_action_flags(action, pe_action_pseudo); update_action_flags(action, pe_action_runnable); if (is_stonith == FALSE) { order_actions(stonith_op, action, pe_order_preserve|pe_order_optional); } } } g_list_free(action_list); } void rsc_stonith_ordering(resource_t * rsc, action_t * stonith_op, pe_working_set_t * data_set) { gboolean is_stonith = FALSE; if (rsc->children) { GListPtr gIter = NULL; for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; rsc_stonith_ordering(child_rsc, stonith_op, data_set); } return; } if (is_not_set(rsc->flags, pe_rsc_managed)) { pe_rsc_trace(rsc, "Skipping fencing constraints for unmanaged resource: %s", rsc->id); return; } /* Start constraints */ native_start_constraints(rsc, stonith_op, is_stonith, data_set); /* Stop constraints */ if (stonith_op) { native_stop_constraints(rsc, stonith_op, is_stonith, data_set); } } enum stack_activity { stack_stable = 0, stack_starting = 1, stack_stopping = 2, stack_middle = 4, }; static action_t * get_first_named_action(resource_t * rsc, const char *action, gboolean only_valid, node_t * current) { action_t *a = NULL; GListPtr action_list = NULL; char *key = generate_op_key(rsc->id, action, 0); action_list = find_actions(rsc->actions, key, current); if (action_list == NULL || action_list->data == NULL) { crm_trace("%s: no %s action", rsc->id, action); free(key); return NULL; } a = action_list->data; g_list_free(action_list); if (only_valid && is_set(a->flags, pe_action_pseudo)) { crm_trace("%s: pseudo", key); a = NULL; } else if (only_valid && is_not_set(a->flags, pe_action_runnable)) { crm_trace("%s: runnable", key); a = NULL; } free(key); return a; } static void ReloadRsc(resource_t * rsc, action_t * stop, action_t * start, pe_working_set_t * data_set) { action_t *action = NULL; action_t *rewrite = NULL; if (is_not_set(rsc->flags, pe_rsc_try_reload)) { return; } else if (is_not_set(stop->flags, pe_action_optional)) { pe_rsc_trace(rsc, "%s: stop action", rsc->id); return; } else if (is_not_set(start->flags, pe_action_optional)) { pe_rsc_trace(rsc, "%s: start action", rsc->id); return; } pe_rsc_trace(rsc, "%s on %s", rsc->id, stop->node->details->uname); action = get_first_named_action(rsc, RSC_PROMOTE, TRUE, NULL); if (action && is_set(action->flags, pe_action_optional) == FALSE) { update_action_flags(action, pe_action_pseudo); } action = get_first_named_action(rsc, RSC_DEMOTE, TRUE, NULL); if (action && is_set(action->flags, pe_action_optional) == FALSE) { rewrite = action; update_action_flags(stop, pe_action_pseudo); } else { rewrite = start; } pe_rsc_info(rsc, "Rewriting %s of %s on %s as a reload", rewrite->task, rsc->id, stop->node->details->uname); set_bit(rsc->flags, pe_rsc_reload); update_action_flags(rewrite, pe_action_optional | pe_action_clear); free(rewrite->uuid); free(rewrite->task); rewrite->task = strdup("reload"); rewrite->uuid = generate_op_key(rsc->id, rewrite->task, 0); } void rsc_reload(resource_t * rsc, pe_working_set_t * data_set) { GListPtr gIter = NULL; action_t *stop = NULL; action_t *start = NULL; if(is_set(rsc->flags, pe_rsc_munging)) { return; } set_bit(rsc->flags, pe_rsc_munging); if (rsc->children) { for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; rsc_reload(child_rsc, data_set); } return; } else if (rsc->variant > pe_native) { return; } pe_rsc_trace(rsc, "Processing %s", rsc->id); stop = get_first_named_action(rsc, RSC_STOP, TRUE, rsc->running_on ? rsc->running_on->data : NULL); start = get_first_named_action(rsc, RSC_START, TRUE, NULL); if (is_not_set(rsc->flags, pe_rsc_managed) || is_set(rsc->flags, pe_rsc_failed) || is_set(rsc->flags, pe_rsc_start_pending) || rsc->next_role < RSC_ROLE_STARTED) { pe_rsc_trace(rsc, "%s: general resource state: flags=0x%.16llx", rsc->id, rsc->flags); return; } if (stop != NULL && is_set(stop->flags, pe_action_optional) && is_set(rsc->flags, pe_rsc_try_reload)) { ReloadRsc(rsc, stop, start, data_set); } } void native_append_meta(resource_t * rsc, xmlNode * xml) { char *value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION); if (value) { char *name = NULL; name = crm_meta_name(XML_RSC_ATTR_INCARNATION); crm_xml_add(xml, name, value); free(name); } value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_REMOTE_NODE); if (value) { char *name = NULL; name = crm_meta_name(XML_RSC_ATTR_REMOTE_NODE); crm_xml_add(xml, name, value); free(name); } } diff --git a/pengine/pengine.h b/pengine/pengine.h index 653fadf406..87fa1507c4 100644 --- a/pengine/pengine.h +++ b/pengine/pengine.h @@ -1,148 +1,155 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef PENGINE__H # define PENGINE__H typedef struct rsc_to_node_s rsc_to_node_t; typedef struct rsc_colocation_s rsc_colocation_t; typedef struct rsc_ticket_s rsc_ticket_t; typedef struct lrm_agent_s lrm_agent_t; typedef struct order_constraint_s order_constraint_t; # include # include # include # include # include # include # include enum pe_stop_fail { pesf_block, pesf_stonith, pesf_ignore }; enum pe_weights { pe_weights_none = 0x0, pe_weights_init = 0x1, pe_weights_forward = 0x4, pe_weights_positive = 0x8, pe_weights_rollback = 0x10, }; struct rsc_colocation_s { const char *id; const char *node_attribute; resource_t *rsc_lh; resource_t *rsc_rh; int role_lh; int role_rh; int score; }; enum loss_ticket_policy_e { loss_ticket_stop, loss_ticket_demote, loss_ticket_fence, loss_ticket_freeze }; struct rsc_ticket_s { const char *id; resource_t *rsc_lh; ticket_t *ticket; enum loss_ticket_policy_e loss_policy; int role_lh; }; +enum rsc_discover_e { + discover_always = 0, + discover_never, + discover_exclusive, +}; + struct rsc_to_node_s { const char *id; resource_t *rsc_lh; enum rsc_role_e role_filter; + enum rsc_discover_e discover_mode; GListPtr node_list_rh; /* node_t* */ }; struct order_constraint_s { int id; enum pe_ordering type; void *lh_opaque; resource_t *lh_rsc; action_t *lh_action; char *lh_action_task; void *rh_opaque; resource_t *rh_rsc; action_t *rh_action; char *rh_action_task; /* (soon to be) variant specific */ /* int lh_rsc_incarnation; */ /* int rh_rsc_incarnation; */ }; extern gboolean stage0(pe_working_set_t * data_set); extern gboolean probe_resources(pe_working_set_t * data_set); extern gboolean stage2(pe_working_set_t * data_set); extern gboolean stage3(pe_working_set_t * data_set); extern gboolean stage4(pe_working_set_t * data_set); extern gboolean stage5(pe_working_set_t * data_set); extern gboolean stage6(pe_working_set_t * data_set); extern gboolean stage7(pe_working_set_t * data_set); extern gboolean stage8(pe_working_set_t * data_set); extern gboolean summary(GListPtr resources); extern gboolean unpack_constraints(xmlNode * xml_constraints, pe_working_set_t * data_set); extern gboolean update_action_states(GListPtr actions); extern gboolean shutdown_constraints(node_t * node, action_t * shutdown_op, pe_working_set_t * data_set); extern gboolean stonith_constraints(node_t * node, action_t * stonith_op, pe_working_set_t * data_set); extern int custom_action_order(resource_t * lh_rsc, char *lh_task, action_t * lh_action, resource_t * rh_rsc, char *rh_task, action_t * rh_action, enum pe_ordering type, pe_working_set_t * data_set); extern int new_rsc_order(resource_t * lh_rsc, const char *lh_task, resource_t * rh_rsc, const char *rh_task, enum pe_ordering type, pe_working_set_t * data_set); # define order_start_start(rsc1,rsc2, type) \ new_rsc_order(rsc1, CRMD_ACTION_START, rsc2, CRMD_ACTION_START, type, data_set) # define order_stop_stop(rsc1, rsc2, type) \ new_rsc_order(rsc1, CRMD_ACTION_STOP, rsc2, CRMD_ACTION_STOP, type, data_set) extern void graph_element_from_action(action_t * action, pe_working_set_t * data_set); extern gboolean show_scores; extern int scores_log_level; extern gboolean show_utilization; extern int utilization_log_level; extern const char *transition_idle_timeout; #endif diff --git a/pengine/regression.sh b/pengine/regression.sh index bdc7d3a90f..a9a56059cf 100755 --- a/pengine/regression.sh +++ b/pengine/regression.sh @@ -1,772 +1,777 @@ #!/bin/bash # Copyright (C) 2004 Andrew Beekhof # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This software is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # core=`dirname $0` . $core/regression.core.sh || exit 1 create_mode="true" info Generating test outputs for these tests... # do_test file description info Done. echo "" info Performing the following tests from $io_dir create_mode="false" echo "" do_test simple1 "Offline " do_test simple2 "Start " do_test simple3 "Start 2 " do_test simple4 "Start Failed" do_test simple6 "Stop Start " do_test simple7 "Shutdown " #do_test simple8 "Stonith " #do_test simple9 "Lower version" #do_test simple10 "Higher version" do_test simple11 "Priority (ne)" do_test simple12 "Priority (eq)" do_test simple8 "Stickiness" echo "" do_test group1 "Group " do_test group2 "Group + Native " do_test group3 "Group + Group " do_test group4 "Group + Native (nothing)" do_test group5 "Group + Native (move) " do_test group6 "Group + Group (move) " do_test group7 "Group colocation" do_test group13 "Group colocation (cant run)" do_test group8 "Group anti-colocation" do_test group9 "Group recovery" do_test group10 "Group partial recovery" do_test group11 "Group target_role" do_test group14 "Group stop (graph terminated)" do_test group15 "-ve group colocation" do_test bug-1573 "Partial stop of a group with two children" do_test bug-1718 "Mandatory group ordering - Stop group_FUN" do_test bug-lf-2613 "Move group on failure" do_test bug-lf-2619 "Move group on clone failure" do_test group-fail "Ensure stop order is preserved for partially active groups" do_test group-unmanaged "No need to restart r115 because r114 is unmanaged" do_test group-unmanaged-stopped "Make sure r115 is stopped when r114 fails" do_test group-dependants "Account for the location preferences of things colocated with a group" echo "" do_test rsc_dep1 "Must not " do_test rsc_dep3 "Must " do_test rsc_dep5 "Must not 3 " do_test rsc_dep7 "Must 3 " do_test rsc_dep10 "Must (but cant)" do_test rsc_dep2 "Must (running) " do_test rsc_dep8 "Must (running : alt) " do_test rsc_dep4 "Must (running + move)" do_test asymmetric "Asymmetric - require explicit location constraints" echo "" do_test orphan-0 "Orphan ignore" do_test orphan-1 "Orphan stop" do_test orphan-2 "Orphan stop, remove failcount" echo "" do_test params-0 "Params: No change" do_test params-1 "Params: Changed" do_test params-2 "Params: Resource definition" do_test params-4 "Params: Reload" do_test params-5 "Params: Restart based on probe digest" do_test novell-251689 "Resource definition change + target_role=stopped" do_test bug-lf-2106 "Restart all anonymous clone instances after config change" do_test params-6 "Params: Detect reload in previously migrated resource" do_test nvpair-id-ref "Support id-ref in nvpair with optional name" echo "" do_test target-0 "Target Role : baseline" do_test target-1 "Target Role : master" do_test target-2 "Target Role : invalid" echo "" do_test base-score "Set a node's default score for all nodes" echo "" do_test date-1 "Dates" -t "2005-020" do_test date-2 "Date Spec - Pass" -t "2005-020T12:30" do_test date-3 "Date Spec - Fail" -t "2005-020T11:30" do_test origin "Timing of recurring operations" -t "2014-05-07 00:28:00" do_test probe-0 "Probe (anon clone)" do_test probe-1 "Pending Probe" do_test probe-2 "Correctly re-probe cloned groups" do_test probe-3 "Probe (pending node)" do_test probe-4 "Probe (pending node + stopped resource)" --rc 4 do_test standby "Standby" do_test comments "Comments" echo "" do_test one-or-more-0 "Everything starts" do_test one-or-more-1 "Nothing starts because of A" do_test one-or-more-2 "D can start because of C" do_test one-or-more-3 "D cannot start because of B and C" do_test one-or-more-4 "D cannot start because of target-role" do_test one-or-more-5 "Start A and F even though C and D are stopped" do_test one-or-more-6 "Leave A running even though B is stopped" do_test one-or-more-7 "Leave A running even though C is stopped" do_test bug-5140-require-all-false "Allow basegrp:0 to stop" echo "" do_test order1 "Order start 1 " do_test order2 "Order start 2 " do_test order3 "Order stop " do_test order4 "Order (multiple) " do_test order5 "Order (move) " do_test order6 "Order (move w/ restart) " do_test order7 "Order (manditory) " do_test order-optional "Order (score=0) " do_test order-required "Order (score=INFINITY) " do_test bug-lf-2171 "Prevent group start when clone is stopped" do_test order-clone "Clone ordering should be able to prevent startup of dependant clones" do_test order-sets "Ordering for resource sets" do_test order-serialize "Serialize resources without inhibiting migration" do_test order-serialize-set "Serialize a set of resources without inhibiting migration" do_test clone-order-primitive "Order clone start after a primitive" do_test clone-order-16instances "Verify ordering of 16 cloned resources" do_test order-optional-keyword "Order (optional keyword)" do_test order-mandatory "Order (mandatory keyword)" do_test bug-lf-2493 "Don't imply colocation requirements when applying ordering constraints with clones" do_test ordered-set-basic-startup "Constraint set with default order settings." do_test ordered-set-natural "Allow natural set ordering" do_test order-wrong-kind "Order (error)" echo "" do_test coloc-loop "Colocation - loop" do_test coloc-many-one "Colocation - many-to-one" do_test coloc-list "Colocation - many-to-one with list" do_test coloc-group "Colocation - groups" do_test coloc-slave-anti "Anti-colocation with slave shouldn't prevent master colocation" do_test coloc-attr "Colocation based on node attributes" do_test coloc-negative-group "Negative colocation with a group" do_test coloc-intra-set "Intra-set colocation" do_test bug-lf-2435 "Colocation sets with a negative score" do_test coloc-clone-stays-active "Ensure clones don't get stopped/demoted because a dependant must stop" do_test coloc_fp_logic "Verify floating point calculations in colocation are working" do_test colo_master_w_native "cl#5070 - Verify promotion order is affected when colocating master to native rsc." do_test colo_slave_w_native "cl#5070 - Verify promotion order is affected when colocating slave to native rsc." do_test anti-colocation-order "cl#5187 - Prevent resources in an anti-colocation from even temporarily running on a same node" echo "" do_test rsc-sets-seq-true "Resource Sets - sequential=false" do_test rsc-sets-seq-false "Resource Sets - sequential=true" do_test rsc-sets-clone "Resource Sets - Clone" do_test rsc-sets-master "Resource Sets - Master" do_test rsc-sets-clone-1 "Resource Sets - Clone (lf#2404)" #echo "" #do_test agent1 "version: lt (empty)" #do_test agent2 "version: eq " #do_test agent3 "version: gt " echo "" do_test attrs1 "string: eq (and) " do_test attrs2 "string: lt / gt (and)" do_test attrs3 "string: ne (or) " do_test attrs4 "string: exists " do_test attrs5 "string: not_exists " do_test attrs6 "is_dc: true " do_test attrs7 "is_dc: false " do_test attrs8 "score_attribute " do_test per-node-attrs "Per node resource parameters" echo "" do_test mon-rsc-1 "Schedule Monitor - start" do_test mon-rsc-2 "Schedule Monitor - move " do_test mon-rsc-3 "Schedule Monitor - pending start " do_test mon-rsc-4 "Schedule Monitor - move/pending start" echo "" do_test rec-rsc-0 "Resource Recover - no start " do_test rec-rsc-1 "Resource Recover - start " do_test rec-rsc-2 "Resource Recover - monitor " do_test rec-rsc-3 "Resource Recover - stop - ignore" do_test rec-rsc-4 "Resource Recover - stop - block " do_test rec-rsc-5 "Resource Recover - stop - fence " do_test rec-rsc-6 "Resource Recover - multiple - restart" do_test rec-rsc-7 "Resource Recover - multiple - stop " do_test rec-rsc-8 "Resource Recover - multiple - block " do_test rec-rsc-9 "Resource Recover - group/group" do_test monitor-recovery "on-fail=block + resource recovery detected by recurring monitor" do_test stop-failure-no-quorum "Stop failure without quorum" do_test stop-failure-no-fencing "Stop failure without fencing available" do_test stop-failure-with-fencing "Stop failure with fencing available" echo "" do_test quorum-1 "No quorum - ignore" do_test quorum-2 "No quorum - freeze" do_test quorum-3 "No quorum - stop " do_test quorum-4 "No quorum - start anyway" do_test quorum-5 "No quorum - start anyway (group)" do_test quorum-6 "No quorum - start anyway (clone)" do_test bug-cl-5212 "No promotion with no-quorum-policy=freeze" echo "" do_test rec-node-1 "Node Recover - Startup - no fence" do_test rec-node-2 "Node Recover - Startup - fence " do_test rec-node-3 "Node Recover - HA down - no fence" do_test rec-node-4 "Node Recover - HA down - fence " do_test rec-node-5 "Node Recover - CRM down - no fence" do_test rec-node-6 "Node Recover - CRM down - fence " do_test rec-node-7 "Node Recover - no quorum - ignore " do_test rec-node-8 "Node Recover - no quorum - freeze " do_test rec-node-9 "Node Recover - no quorum - stop " do_test rec-node-10 "Node Recover - no quorum - stop w/fence" do_test rec-node-11 "Node Recover - CRM down w/ group - fence " do_test rec-node-12 "Node Recover - nothing active - fence " do_test rec-node-13 "Node Recover - failed resource + shutdown - fence " do_test rec-node-15 "Node Recover - unknown lrm section" do_test rec-node-14 "Serialize all stonith's" echo "" do_test multi1 "Multiple Active (stop/start)" echo "" do_test migrate-begin "Normal migration" do_test migrate-success "Completed migration" do_test migrate-partial-1 "Completed migration, missing stop on source" do_test migrate-partial-2 "Successful migrate_to only" do_test migrate-partial-3 "Successful migrate_to only, target down" do_test migrate-partial-4 "Migrate from the correct host after migrate_to+migrate_from" do_test bug-5186-partial-migrate "Handle partial migration when src node loses membership" do_test migrate-fail-2 "Failed migrate_from" do_test migrate-fail-3 "Failed migrate_from + stop on source" do_test migrate-fail-4 "Failed migrate_from + stop on target - ideally we wouldn't need to re-stop on target" do_test migrate-fail-5 "Failed migrate_from + stop on source and target" do_test migrate-fail-6 "Failed migrate_to" do_test migrate-fail-7 "Failed migrate_to + stop on source" do_test migrate-fail-8 "Failed migrate_to + stop on target - ideally we wouldn't need to re-stop on target" do_test migrate-fail-9 "Failed migrate_to + stop on source and target" do_test migrate-stop "Migration in a stopping stack" do_test migrate-start "Migration in a starting stack" do_test migrate-stop_start "Migration in a restarting stack" do_test migrate-stop-complex "Migration in a complex stopping stack" do_test migrate-start-complex "Migration in a complex starting stack" do_test migrate-stop-start-complex "Migration in a complex moving stack" do_test migrate-shutdown "Order the post-migration 'stop' before node shutdown" do_test migrate-1 "Migrate (migrate)" do_test migrate-2 "Migrate (stable)" do_test migrate-3 "Migrate (failed migrate_to)" do_test migrate-4 "Migrate (failed migrate_from)" do_test novell-252693 "Migration in a stopping stack" do_test novell-252693-2 "Migration in a starting stack" do_test novell-252693-3 "Non-Migration in a starting and stopping stack" do_test bug-1820 "Migration in a group" do_test bug-1820-1 "Non-migration in a group" do_test migrate-5 "Primitive migration with a clone" do_test migrate-fencing "Migration after Fencing" do_test migrate-both-vms "Migrate two VMs that have no colocation" do_test 1-a-then-bm-move-b "Advanced migrate logic. A then B. migrate B." do_test 2-am-then-b-move-a "Advanced migrate logic, A then B, migrate A without stopping B" do_test 3-am-then-bm-both-migrate "Advanced migrate logic. A then B. migrate both" do_test 4-am-then-bm-b-not-migratable "Advanced migrate logic, A then B, B not migratable" do_test 5-am-then-bm-a-not-migratable "Advanced migrate logic. A then B. move both, a not migratable" do_test 6-migrate-group "Advanced migrate logic, migrate a group" do_test 7-migrate-group-one-unmigratable "Advanced migrate logic, migrate group mixed with allow-migrate true/false" do_test 8-am-then-bm-a-migrating-b-stopping "Advanced migrate logic, A then B, A migrating, B stopping" do_test 9-am-then-bm-b-migrating-a-stopping "Advanced migrate logic, A then B, B migrate, A stopping" do_test 10-a-then-bm-b-move-a-clone "Advanced migrate logic, A clone then B, migrate B while stopping A" do_test 11-a-then-bm-b-move-a-clone-starting "Advanced migrate logic, A clone then B, B moving while A is start/stopping" #echo "" #do_test complex1 "Complex " do_test bug-lf-2422 "Dependancy on partially active group - stop ocfs:*" echo "" do_test clone-anon-probe-1 "Probe the correct (anonymous) clone instance for each node" do_test clone-anon-probe-2 "Avoid needless re-probing of anonymous clones" do_test clone-anon-failcount "Merge failcounts for anonymous clones" do_test inc0 "Incarnation start" do_test inc1 "Incarnation start order" do_test inc2 "Incarnation silent restart, stop, move" do_test inc3 "Inter-incarnation ordering, silent restart, stop, move" do_test inc4 "Inter-incarnation ordering, silent restart, stop, move (ordered)" do_test inc5 "Inter-incarnation ordering, silent restart, stop, move (restart 1)" do_test inc6 "Inter-incarnation ordering, silent restart, stop, move (restart 2)" do_test inc7 "Clone colocation" do_test inc8 "Clone anti-colocation" do_test inc9 "Non-unique clone" do_test inc10 "Non-unique clone (stop)" do_test inc11 "Primitive colocation with clones" do_test inc12 "Clone shutdown" do_test cloned-group "Make sure only the correct number of cloned groups are started" do_test cloned-group-stop "Ensure stopping qpidd also stops glance and cinder" do_test clone-no-shuffle "Dont prioritize allocation of instances that must be moved" do_test clone-max-zero "Orphan processing with clone-max=0" do_test clone-anon-dup "Bug LF#2087 - Correctly parse the state of anonymous clones that are active more than once per node" do_test bug-lf-2160 "Dont shuffle clones due to colocation" do_test bug-lf-2213 "clone-node-max enforcement for cloned groups" do_test bug-lf-2153 "Clone ordering constraints" do_test bug-lf-2361 "Ensure clones observe mandatory ordering constraints if the LHS is unrunnable" do_test bug-lf-2317 "Avoid needless restart of primitive depending on a clone" do_test clone-colocate-instance-1 "Colocation with a specific clone instance (negative example)" do_test clone-colocate-instance-2 "Colocation with a specific clone instance" do_test clone-order-instance "Ordering with specific clone instances" do_test bug-lf-2453 "Enforce mandatory clone ordering without colocation" do_test bug-lf-2508 "Correctly reconstruct the status of anonymous cloned groups" do_test bug-lf-2544 "Balanced clone placement" do_test bug-lf-2445 "Redistribute clones with node-max > 1 and stickiness = 0" do_test bug-lf-2574 "Avoid clone shuffle" do_test bug-lf-2581 "Avoid group restart due to unrelated clone (re)start" do_test bug-cl-5168 "Don't shuffle clones" do_test bug-cl-5170 "Prevent clone from starting with on-fail=block" do_test clone-fail-block-colocation "Move colocated group when failed clone has on-fail=block" do_test clone-interleave-1 "Clone-3 cannot start on pcmk-1 due to interleaved ordering (no colocation)" do_test clone-interleave-2 "Clone-3 must stop on pcmk-1 due to interleaved ordering (no colocation)" do_test clone-interleave-3 "Clone-3 must be recovered on pcmk-1 due to interleaved ordering (no colocation)" echo "" do_test unfence-startup "Clean unfencing" do_test unfence-definition "Unfencing when the agent changes" do_test unfence-parameters "Unfencing when the agent parameters changes" echo "" do_test master-0 "Stopped -> Slave" do_test master-1 "Stopped -> Promote" do_test master-2 "Stopped -> Promote : notify" do_test master-3 "Stopped -> Promote : master location" do_test master-4 "Started -> Promote : master location" do_test master-5 "Promoted -> Promoted" do_test master-6 "Promoted -> Promoted (2)" do_test master-7 "Promoted -> Fenced" do_test master-8 "Promoted -> Fenced -> Moved" do_test master-9 "Stopped + Promotable + No quorum" do_test master-10 "Stopped -> Promotable : notify with monitor" do_test master-11 "Stopped -> Promote : colocation" do_test novell-239082 "Demote/Promote ordering" do_test novell-239087 "Stable master placement" do_test master-12 "Promotion based solely on rsc_location constraints" do_test master-13 "Include preferences of colocated resources when placing master" do_test master-demote "Ordering when actions depends on demoting a slave resource" do_test master-ordering "Prevent resources from starting that need a master" do_test bug-1765 "Master-Master Colocation (dont stop the slaves)" do_test master-group "Promotion of cloned groups" do_test bug-lf-1852 "Don't shuffle master/slave instances unnecessarily" do_test master-failed-demote "Dont retry failed demote actions" do_test master-failed-demote-2 "Dont retry failed demote actions (notify=false)" do_test master-depend "Ensure resources that depend on the master don't get allocated until the master does" do_test master-reattach "Re-attach to a running master" do_test master-allow-start "Don't include master score if it would prevent allocation" do_test master-colocation "Allow master instances placemaker to be influenced by colocation constraints" do_test master-pseudo "Make sure promote/demote pseudo actions are created correctly" do_test master-role "Prevent target-role from promoting more than master-max instances" do_test bug-lf-2358 "Master-Master anti-colocation" do_test master-promotion-constraint "Mandatory master colocation constraints" do_test unmanaged-master "Ensure role is preserved for unmanaged resources" do_test master-unmanaged-monitor "Start the correct monitor operation for unmanaged masters" do_test master-demote-2 "Demote does not clear past failure" do_test master-move "Move master based on failure of colocated group" do_test master-probed-score "Observe the promotion score of probed resources" do_test colocation_constraint_stops_master "cl#5054 - Ensure master is demoted when stopped by colocation constraint" do_test colocation_constraint_stops_slave "cl#5054 - Ensure slave is not demoted when stopped by colocation constraint" do_test order_constraint_stops_master "cl#5054 - Ensure master is demoted when stopped by order constraint" do_test order_constraint_stops_slave "cl#5054 - Ensure slave is not demoted when stopped by order constraint" do_test master_monitor_restart "cl#5072 - Ensure master monitor operation will start after promotion." do_test bug-rh-880249 "Handle replacement of an m/s resource with a primitive" do_test bug-5143-ms-shuffle "Prevent master shuffling due to promotion score" do_test master-demote-block "Block promotion if demote fails with on-fail=block" do_test master-dependant-ban "Don't stop instances from being active because a dependant is banned from that host" do_test master-stop "Stop instances due to location constraint with role=Started" do_test master-partially-demoted-group "Allow partially demoted group to finish demoting" do_test bug-cl-5213 "Ensure role colocation with -INFINITY is enforced" do_test bug-cl-5219 "Allow unrelated resources with a common colocation target to remain promoted" echo "" do_test history-1 "Correctly parse stateful-1 resource state" echo "" do_test managed-0 "Managed (reference)" do_test managed-1 "Not managed - down " do_test managed-2 "Not managed - up " do_test bug-5028 "Shutdown should block if anything depends on an unmanaged resource" do_test bug-5028-detach "Ensure detach still works" do_test bug-5028-bottom "Ensure shutdown still blocks if the blocked resource is at the bottom of the stack" do_test unmanaged-stop-1 "cl#5155 - Block the stop of resources if any depending resource is unmanaged " do_test unmanaged-stop-2 "cl#5155 - Block the stop of resources if the first resource in a mandatory stop order is unmanaged " do_test unmanaged-stop-3 "cl#5155 - Block the stop of resources if any depending resource in a group is unmanaged " do_test unmanaged-stop-4 "cl#5155 - Block the stop of resources if any depending resource in the middle of a group is unmanaged " do_test unmanaged-block-restart "Block restart of resources if any dependent resource in a group is unmanaged" echo "" do_test interleave-0 "Interleave (reference)" do_test interleave-1 "coloc - not interleaved" do_test interleave-2 "coloc - interleaved " do_test interleave-3 "coloc - interleaved (2)" do_test interleave-pseudo-stop "Interleaved clone during stonith" do_test interleave-stop "Interleaved clone during stop" do_test interleave-restart "Interleaved clone during dependancy restart" echo "" do_test notify-0 "Notify reference" do_test notify-1 "Notify simple" do_test notify-2 "Notify simple, confirm" do_test notify-3 "Notify move, confirm" do_test novell-239079 "Notification priority" #do_test notify-2 "Notify - 764" echo "" do_test 594 "OSDL #594 - Unrunnable actions scheduled in transition" do_test 662 "OSDL #662 - Two resources start on one node when incarnation_node_max = 1" do_test 696 "OSDL #696 - CRM starts stonith RA without monitor" do_test 726 "OSDL #726 - Attempting to schedule rsc_posic041_monitor_5000 _after_ a stop" do_test 735 "OSDL #735 - Correctly detect that rsc_hadev1 is stopped on hadev3" do_test 764 "OSDL #764 - Missing monitor op for DoFencing:child_DoFencing:1" do_test 797 "OSDL #797 - Assert triggered: task_id_i > max_call_id" do_test 829 "OSDL #829" do_test 994 "OSDL #994 - Stopping the last resource in a resource group causes the entire group to be restarted" do_test 994-2 "OSDL #994 - with a dependant resource" do_test 1360 "OSDL #1360 - Clone stickiness" do_test 1484 "OSDL #1484 - on_fail=stop" do_test 1494 "OSDL #1494 - Clone stability" do_test unrunnable-1 "Unrunnable" do_test stonith-0 "Stonith loop - 1" do_test stonith-1 "Stonith loop - 2" do_test stonith-2 "Stonith loop - 3" do_test stonith-3 "Stonith startup" do_test stonith-4 "Stonith node state" --rc 4 do_test bug-1572-1 "Recovery of groups depending on master/slave" do_test bug-1572-2 "Recovery of groups depending on master/slave when the master is never re-promoted" do_test bug-1685 "Depends-on-master ordering" do_test bug-1822 "Dont promote partially active groups" do_test bug-pm-11 "New resource added to a m/s group" do_test bug-pm-12 "Recover only the failed portion of a cloned group" do_test bug-n-387749 "Don't shuffle clone instances" do_test bug-n-385265 "Don't ignore the failure stickiness of group children - resource_idvscommon should stay stopped" do_test bug-n-385265-2 "Ensure groups are migrated instead of remaining partially active on the current node" do_test bug-lf-1920 "Correctly handle probes that find active resources" do_test bnc-515172 "Location constraint with multiple expressions" do_test colocate-primitive-with-clone "Optional colocation with a clone" do_test use-after-free-merge "Use-after-free in native_merge_weights" do_test bug-lf-2551 "STONITH ordering for stop" do_test bug-lf-2606 "Stonith implies demote" do_test bug-lf-2474 "Ensure resource op timeout takes precedence over op_defaults" do_test bug-suse-707150 "Prevent vm-01 from starting due to colocation/ordering" do_test bug-5014-A-start-B-start "Verify when A starts B starts using symmetrical=false" do_test bug-5014-A-stop-B-started "Verify when A stops B does not stop if it has already started using symmetric=false" do_test bug-5014-A-stopped-B-stopped "Verify when A is stopped and B has not started, B does not start before A using symmetric=false" do_test bug-5014-CthenAthenB-C-stopped "Verify when C then A is symmetrical=true, A then B is symmetric=false, and C is stopped that nothing starts." do_test bug-5014-CLONE-A-start-B-start "Verify when A starts B starts using clone resources with symmetric=false" do_test bug-5014-CLONE-A-stop-B-started "Verify when A stops B does not stop if it has already started using clone resources with symmetric=false." do_test bug-5014-GROUP-A-start-B-start "Verify when A starts B starts when using group resources with symmetric=false." do_test bug-5014-GROUP-A-stopped-B-started "Verify when A stops B does not stop if it has already started using group resources with symmetric=false." do_test bug-5014-GROUP-A-stopped-B-stopped "Verify when A is stopped and B has not started, B does not start before A using group resources with symmetric=false." do_test bug-5014-ordered-set-symmetrical-false "Verify ordered sets work with symmetrical=false" do_test bug-5014-ordered-set-symmetrical-true "Verify ordered sets work with symmetrical=true" do_test bug-5007-masterslave_colocation "Verify use of colocation scores other than INFINITY and -INFINITY work on multi-state resources." do_test bug-5038 "Prevent restart of anonymous clones when clone-max decreases" do_test bug-5025-1 "Automatically clean up failcount after resource config change with reload" do_test bug-5025-2 "Make sure clear failcount action isn't set when config does not change." do_test bug-5025-3 "Automatically clean up failcount after resource config change with restart" do_test bug-5025-4 "Clear failcount when last failure is a start op and rsc attributes changed." do_test failcount "Ensure failcounts are correctly expired" do_test failcount-block "Ensure failcounts are not expired when on-fail=block is present" do_test monitor-onfail-restart "bug-5058 - Monitor failure with on-fail set to restart" do_test monitor-onfail-stop "bug-5058 - Monitor failure wiht on-fail set to stop" do_test bug-5059 "No need to restart p_stateful1:*" do_test bug-5069-op-enabled "Test on-fail=ignore with failure when monitor is enabled." do_test bug-5069-op-disabled "Test on-fail-ignore with failure when monitor is disabled." do_test obsolete-lrm-resource "cl#5115 - Do not use obsolete lrm_resource sections" do_test expire-non-blocked-failure "Ignore failure-timeout only if the failed operation has on-fail=block" do_test ignore_stonith_rsc_order1 "cl#5056- Ignore order constraint between stonith and non-stonith rsc." do_test ignore_stonith_rsc_order2 "cl#5056- Ignore order constraint with group rsc containing mixed stonith and non-stonith." do_test ignore_stonith_rsc_order3 "cl#5056- Ignore order constraint, stonith clone and mixed group" do_test ignore_stonith_rsc_order4 "cl#5056- Ignore order constraint, stonith clone and clone with nested mixed group" do_test honor_stonith_rsc_order1 "cl#5056- Honor order constraint, stonith clone and pure stonith group(single rsc)." do_test honor_stonith_rsc_order2 "cl#5056- Honor order constraint, stonith clone and pure stonith group(multiple rsc)" do_test honor_stonith_rsc_order3 "cl#5056- Honor order constraint, stonith clones with nested pure stonith group." do_test honor_stonith_rsc_order4 "cl#5056- Honor order constraint, between two native stonith rscs." do_test probe-timeout "cl#5099 - Default probe timeout" echo "" do_test systemhealth1 "System Health () #1" do_test systemhealth2 "System Health () #2" do_test systemhealth3 "System Health () #3" do_test systemhealthn1 "System Health (None) #1" do_test systemhealthn2 "System Health (None) #2" do_test systemhealthn3 "System Health (None) #3" do_test systemhealthm1 "System Health (Migrate On Red) #1" do_test systemhealthm2 "System Health (Migrate On Red) #2" do_test systemhealthm3 "System Health (Migrate On Red) #3" do_test systemhealtho1 "System Health (Only Green) #1" do_test systemhealtho2 "System Health (Only Green) #2" do_test systemhealtho3 "System Health (Only Green) #3" do_test systemhealthp1 "System Health (Progessive) #1" do_test systemhealthp2 "System Health (Progessive) #2" do_test systemhealthp3 "System Health (Progessive) #3" echo "" do_test utilization "Placement Strategy - utilization" do_test minimal "Placement Strategy - minimal" do_test balanced "Placement Strategy - balanced" echo "" do_test placement-stickiness "Optimized Placement Strategy - stickiness" do_test placement-priority "Optimized Placement Strategy - priority" do_test placement-location "Optimized Placement Strategy - location" do_test placement-capacity "Optimized Placement Strategy - capacity" echo "" do_test utilization-order1 "Utilization Order - Simple" do_test utilization-order2 "Utilization Order - Complex" do_test utilization-order3 "Utilization Order - Migrate" do_test utilization-order4 "Utilization Order - Live Mirgration (bnc#695440)" do_test utilization-shuffle "Don't displace prmExPostgreSQLDB2 on act2, Start prmExPostgreSQLDB1 on act3" do_test load-stopped-loop "Avoid transition loop due to load_stopped (cl#5044)" echo "" do_test reprobe-target_rc "Ensure correct target_rc for reprobe of inactive resources" do_test node-maintenance-1 "cl#5128 - Node maintenance" do_test node-maintenance-2 "cl#5128 - Node maintenance (coming out of maintenance mode)" do_test rsc-maintenance "Per-resource maintenance" echo "" do_test not-installed-agent "The resource agent is missing" do_test not-installed-tools "Something the resource agent needs is missing" echo "" do_test stopped-monitor-00 "Stopped Monitor - initial start" do_test stopped-monitor-01 "Stopped Monitor - failed started" do_test stopped-monitor-02 "Stopped Monitor - started multi-up" do_test stopped-monitor-03 "Stopped Monitor - stop started" do_test stopped-monitor-04 "Stopped Monitor - failed stop" do_test stopped-monitor-05 "Stopped Monitor - start unmanaged" do_test stopped-monitor-06 "Stopped Monitor - unmanaged multi-up" do_test stopped-monitor-07 "Stopped Monitor - start unmanaged multi-up" do_test stopped-monitor-08 "Stopped Monitor - migrate" do_test stopped-monitor-09 "Stopped Monitor - unmanage started" do_test stopped-monitor-10 "Stopped Monitor - unmanaged started multi-up" do_test stopped-monitor-11 "Stopped Monitor - stop unmanaged started" do_test stopped-monitor-12 "Stopped Monitor - unmanaged started multi-up (targer-role="Stopped")" do_test stopped-monitor-20 "Stopped Monitor - initial stop" do_test stopped-monitor-21 "Stopped Monitor - stopped single-up" do_test stopped-monitor-22 "Stopped Monitor - stopped multi-up" do_test stopped-monitor-23 "Stopped Monitor - start stopped" do_test stopped-monitor-24 "Stopped Monitor - unmanage stopped" do_test stopped-monitor-25 "Stopped Monitor - unmanaged stopped multi-up" do_test stopped-monitor-26 "Stopped Monitor - start unmanaged stopped" do_test stopped-monitor-27 "Stopped Monitor - unmanaged stopped multi-up (target-role="Started")" do_test stopped-monitor-30 "Stopped Monitor - new node started" do_test stopped-monitor-31 "Stopped Monitor - new node stopped" echo"" do_test ticket-primitive-1 "Ticket - Primitive (loss-policy=stop, initial)" do_test ticket-primitive-2 "Ticket - Primitive (loss-policy=stop, granted)" do_test ticket-primitive-3 "Ticket - Primitive (loss-policy-stop, revoked)" do_test ticket-primitive-4 "Ticket - Primitive (loss-policy=demote, initial)" do_test ticket-primitive-5 "Ticket - Primitive (loss-policy=demote, granted)" do_test ticket-primitive-6 "Ticket - Primitive (loss-policy=demote, revoked)" do_test ticket-primitive-7 "Ticket - Primitive (loss-policy=fence, initial)" do_test ticket-primitive-8 "Ticket - Primitive (loss-policy=fence, granted)" do_test ticket-primitive-9 "Ticket - Primitive (loss-policy=fence, revoked)" do_test ticket-primitive-10 "Ticket - Primitive (loss-policy=freeze, initial)" do_test ticket-primitive-11 "Ticket - Primitive (loss-policy=freeze, granted)" do_test ticket-primitive-12 "Ticket - Primitive (loss-policy=freeze, revoked)" do_test ticket-primitive-13 "Ticket - Primitive (loss-policy=stop, standby, granted)" do_test ticket-primitive-14 "Ticket - Primitive (loss-policy=stop, granted, standby)" do_test ticket-primitive-15 "Ticket - Primitive (loss-policy=stop, standby, revoked)" do_test ticket-primitive-16 "Ticket - Primitive (loss-policy=demote, standby, granted)" do_test ticket-primitive-17 "Ticket - Primitive (loss-policy=demote, granted, standby)" do_test ticket-primitive-18 "Ticket - Primitive (loss-policy=demote, standby, revoked)" do_test ticket-primitive-19 "Ticket - Primitive (loss-policy=fence, standby, granted)" do_test ticket-primitive-20 "Ticket - Primitive (loss-policy=fence, granted, standby)" do_test ticket-primitive-21 "Ticket - Primitive (loss-policy=fence, standby, revoked)" do_test ticket-primitive-22 "Ticket - Primitive (loss-policy=freeze, standby, granted)" do_test ticket-primitive-23 "Ticket - Primitive (loss-policy=freeze, granted, standby)" do_test ticket-primitive-24 "Ticket - Primitive (loss-policy=freeze, standby, revoked)" echo"" do_test ticket-group-1 "Ticket - Group (loss-policy=stop, initial)" do_test ticket-group-2 "Ticket - Group (loss-policy=stop, granted)" do_test ticket-group-3 "Ticket - Group (loss-policy-stop, revoked)" do_test ticket-group-4 "Ticket - Group (loss-policy=demote, initial)" do_test ticket-group-5 "Ticket - Group (loss-policy=demote, granted)" do_test ticket-group-6 "Ticket - Group (loss-policy=demote, revoked)" do_test ticket-group-7 "Ticket - Group (loss-policy=fence, initial)" do_test ticket-group-8 "Ticket - Group (loss-policy=fence, granted)" do_test ticket-group-9 "Ticket - Group (loss-policy=fence, revoked)" do_test ticket-group-10 "Ticket - Group (loss-policy=freeze, initial)" do_test ticket-group-11 "Ticket - Group (loss-policy=freeze, granted)" do_test ticket-group-12 "Ticket - Group (loss-policy=freeze, revoked)" do_test ticket-group-13 "Ticket - Group (loss-policy=stop, standby, granted)" do_test ticket-group-14 "Ticket - Group (loss-policy=stop, granted, standby)" do_test ticket-group-15 "Ticket - Group (loss-policy=stop, standby, revoked)" do_test ticket-group-16 "Ticket - Group (loss-policy=demote, standby, granted)" do_test ticket-group-17 "Ticket - Group (loss-policy=demote, granted, standby)" do_test ticket-group-18 "Ticket - Group (loss-policy=demote, standby, revoked)" do_test ticket-group-19 "Ticket - Group (loss-policy=fence, standby, granted)" do_test ticket-group-20 "Ticket - Group (loss-policy=fence, granted, standby)" do_test ticket-group-21 "Ticket - Group (loss-policy=fence, standby, revoked)" do_test ticket-group-22 "Ticket - Group (loss-policy=freeze, standby, granted)" do_test ticket-group-23 "Ticket - Group (loss-policy=freeze, granted, standby)" do_test ticket-group-24 "Ticket - Group (loss-policy=freeze, standby, revoked)" echo"" do_test ticket-clone-1 "Ticket - Clone (loss-policy=stop, initial)" do_test ticket-clone-2 "Ticket - Clone (loss-policy=stop, granted)" do_test ticket-clone-3 "Ticket - Clone (loss-policy-stop, revoked)" do_test ticket-clone-4 "Ticket - Clone (loss-policy=demote, initial)" do_test ticket-clone-5 "Ticket - Clone (loss-policy=demote, granted)" do_test ticket-clone-6 "Ticket - Clone (loss-policy=demote, revoked)" do_test ticket-clone-7 "Ticket - Clone (loss-policy=fence, initial)" do_test ticket-clone-8 "Ticket - Clone (loss-policy=fence, granted)" do_test ticket-clone-9 "Ticket - Clone (loss-policy=fence, revoked)" do_test ticket-clone-10 "Ticket - Clone (loss-policy=freeze, initial)" do_test ticket-clone-11 "Ticket - Clone (loss-policy=freeze, granted)" do_test ticket-clone-12 "Ticket - Clone (loss-policy=freeze, revoked)" do_test ticket-clone-13 "Ticket - Clone (loss-policy=stop, standby, granted)" do_test ticket-clone-14 "Ticket - Clone (loss-policy=stop, granted, standby)" do_test ticket-clone-15 "Ticket - Clone (loss-policy=stop, standby, revoked)" do_test ticket-clone-16 "Ticket - Clone (loss-policy=demote, standby, granted)" do_test ticket-clone-17 "Ticket - Clone (loss-policy=demote, granted, standby)" do_test ticket-clone-18 "Ticket - Clone (loss-policy=demote, standby, revoked)" do_test ticket-clone-19 "Ticket - Clone (loss-policy=fence, standby, granted)" do_test ticket-clone-20 "Ticket - Clone (loss-policy=fence, granted, standby)" do_test ticket-clone-21 "Ticket - Clone (loss-policy=fence, standby, revoked)" do_test ticket-clone-22 "Ticket - Clone (loss-policy=freeze, standby, granted)" do_test ticket-clone-23 "Ticket - Clone (loss-policy=freeze, granted, standby)" do_test ticket-clone-24 "Ticket - Clone (loss-policy=freeze, standby, revoked)" echo"" do_test ticket-master-1 "Ticket - Master (loss-policy=stop, initial)" do_test ticket-master-2 "Ticket - Master (loss-policy=stop, granted)" do_test ticket-master-3 "Ticket - Master (loss-policy-stop, revoked)" do_test ticket-master-4 "Ticket - Master (loss-policy=demote, initial)" do_test ticket-master-5 "Ticket - Master (loss-policy=demote, granted)" do_test ticket-master-6 "Ticket - Master (loss-policy=demote, revoked)" do_test ticket-master-7 "Ticket - Master (loss-policy=fence, initial)" do_test ticket-master-8 "Ticket - Master (loss-policy=fence, granted)" do_test ticket-master-9 "Ticket - Master (loss-policy=fence, revoked)" do_test ticket-master-10 "Ticket - Master (loss-policy=freeze, initial)" do_test ticket-master-11 "Ticket - Master (loss-policy=freeze, granted)" do_test ticket-master-12 "Ticket - Master (loss-policy=freeze, revoked)" do_test ticket-master-13 "Ticket - Master (loss-policy=stop, standby, granted)" do_test ticket-master-14 "Ticket - Master (loss-policy=stop, granted, standby)" do_test ticket-master-15 "Ticket - Master (loss-policy=stop, standby, revoked)" do_test ticket-master-16 "Ticket - Master (loss-policy=demote, standby, granted)" do_test ticket-master-17 "Ticket - Master (loss-policy=demote, granted, standby)" do_test ticket-master-18 "Ticket - Master (loss-policy=demote, standby, revoked)" do_test ticket-master-19 "Ticket - Master (loss-policy=fence, standby, granted)" do_test ticket-master-20 "Ticket - Master (loss-policy=fence, granted, standby)" do_test ticket-master-21 "Ticket - Master (loss-policy=fence, standby, revoked)" do_test ticket-master-22 "Ticket - Master (loss-policy=freeze, standby, granted)" do_test ticket-master-23 "Ticket - Master (loss-policy=freeze, granted, standby)" do_test ticket-master-24 "Ticket - Master (loss-policy=freeze, standby, revoked)" echo "" do_test ticket-rsc-sets-1 "Ticket - Resource sets (1 ticket, initial)" do_test ticket-rsc-sets-2 "Ticket - Resource sets (1 ticket, granted)" do_test ticket-rsc-sets-3 "Ticket - Resource sets (1 ticket, revoked)" do_test ticket-rsc-sets-4 "Ticket - Resource sets (2 tickets, initial)" do_test ticket-rsc-sets-5 "Ticket - Resource sets (2 tickets, granted)" do_test ticket-rsc-sets-6 "Ticket - Resource sets (2 tickets, granted)" do_test ticket-rsc-sets-7 "Ticket - Resource sets (2 tickets, revoked)" do_test ticket-rsc-sets-8 "Ticket - Resource sets (1 ticket, standby, granted)" do_test ticket-rsc-sets-9 "Ticket - Resource sets (1 ticket, granted, standby)" do_test ticket-rsc-sets-10 "Ticket - Resource sets (1 ticket, standby, revoked)" do_test ticket-rsc-sets-11 "Ticket - Resource sets (2 tickets, standby, granted)" do_test ticket-rsc-sets-12 "Ticket - Resource sets (2 tickets, standby, granted)" do_test ticket-rsc-sets-13 "Ticket - Resource sets (2 tickets, granted, standby)" do_test ticket-rsc-sets-14 "Ticket - Resource sets (2 tickets, standby, revoked)" do_test cluster-specific-params "Cluster-specific instance attributes based on rules" do_test site-specific-params "Site-specific instance attributes based on rules" echo "" do_test template-1 "Template - 1" do_test template-2 "Template - 2" do_test template-3 "Template - 3 (merge operations)" do_test template-coloc-1 "Template - Colocation 1" do_test template-coloc-2 "Template - Colocation 2" do_test template-coloc-3 "Template - Colocation 3" do_test template-order-1 "Template - Order 1" do_test template-order-2 "Template - Order 2" do_test template-order-3 "Template - Order 3" do_test template-ticket "Template - Ticket" do_test template-rsc-sets-1 "Template - Resource Sets 1" do_test template-rsc-sets-2 "Template - Resource Sets 2" do_test template-rsc-sets-3 "Template - Resource Sets 3" do_test template-rsc-sets-4 "Template - Resource Sets 4" do_test template-clone-primitive "Cloned primitive from template" do_test template-clone-group "Cloned group from template" do_test location-sets-templates "Resource sets and templates - Location" do_test tags-coloc-order-1 "Tags - Colocation and Order (Simple)" do_test tags-coloc-order-2 "Tags - Colocation and Order (Resource Sets with Templates)" do_test tags-location "Tags - Location" do_test tags-ticket "Tags - Ticket" echo "" do_test container-1 "Container - initial" do_test container-2 "Container - monitor failed" do_test container-3 "Container - stop failed" do_test container-4 "Container - reached migration-threshold" do_test container-group-1 "Container in group - initial" do_test container-group-2 "Container in group - monitor failed" do_test container-group-3 "Container in group - stop failed" do_test container-group-4 "Container in group - reached migration-threshold" do_test container-is-remote-node "Place resource within container when container is remote-node" do_test bug-rh-1097457 "Kill user defined container/contents ordering" echo "" do_test whitebox-fail1 "Fail whitebox container rsc." do_test whitebox-fail2 "Fail whitebox container rsc lrmd connection." do_test whitebox-fail3 "Failed containers should not run nested on remote nodes." do_test whitebox-start "Start whitebox container with resources assigned to it" do_test whitebox-stop "Stop whitebox container with resources assigned to it" do_test whitebox-move "Move whitebox container with resources assigned to it" do_test whitebox-asymmetric "Verify connection rsc opts-in based on container resource" do_test whitebox-ms-ordering "Verify promote/demote can not occur before connection is established" do_test whitebox-orphaned "Properly shutdown orphaned whitebox container" do_test whitebox-orphan-ms "Properly tear down orphan ms resources on remote-nodes" do_test whitebox-unexpectedly-running "Recover container nodes the cluster did not start." do_test whitebox-migrate1 "Migrate both container and connection resource" echo "" do_test remote-startup-probes "Baremetal remote-node startup probes" do_test remote-startup "Startup a newly discovered remote-nodes with no status." do_test remote-fence-unclean "Fence unclean baremetal remote-node" do_test remote-fence-unclean2 "Fence baremetal remote-node after cluster node fails and connection can not be recovered" do_test remote-move "Move remote-node connection resource" do_test remote-disable "Disable a baremetal remote-node" do_test remote-orphaned "Properly shutdown orphaned connection resource" do_test remote-recover "Recover connection resource after cluster-node fails." do_test remote-stale-node-entry "Make sure we properly handle leftover remote-node entries in the node section" + +echo "" +do_test resource-discovery "Exercises resource-discovery location constraint option." +do_test rsc-discovery-per-node "Disable resource discovery per node" + echo "" test_results diff --git a/pengine/test10/resource-discovery.dot b/pengine/test10/resource-discovery.dot new file mode 100644 index 0000000000..efb2434047 --- /dev/null +++ b/pengine/test10/resource-discovery.dot @@ -0,0 +1,185 @@ + digraph "g" { +"FAKE1_monitor_0 18builder" -> "probe_complete 18builder" [ style = bold] +"FAKE1_monitor_0 18builder" [ style=bold color="green" fontcolor="black"] +"FAKE1_monitor_0 18node1" -> "probe_complete 18node1" [ style = bold] +"FAKE1_monitor_0 18node1" [ style=bold color="green" fontcolor="black"] +"FAKE1_monitor_0 18node2" -> "probe_complete 18node2" [ style = bold] +"FAKE1_monitor_0 18node2" [ style=bold color="green" fontcolor="black"] +"FAKE1_monitor_0 18node3" -> "probe_complete 18node3" [ style = bold] +"FAKE1_monitor_0 18node3" [ style=bold color="green" fontcolor="black"] +"FAKE1_monitor_0 18node4" -> "probe_complete 18node4" [ style = bold] +"FAKE1_monitor_0 18node4" [ style=bold color="green" fontcolor="black"] +"FAKE1_monitor_60000 18node2" [ style=bold color="green" fontcolor="black"] +"FAKE1_start_0 18node2" -> "FAKE1_monitor_60000 18node2" [ style = bold] +"FAKE1_start_0 18node2" [ style=bold color="green" fontcolor="black"] +"FAKE2_monitor_60000 18node3" [ style=bold color="green" fontcolor="black"] +"FAKE2_start_0 18node3" -> "FAKE2_monitor_60000 18node3" [ style = bold] +"FAKE2_start_0 18node3" [ style=bold color="green" fontcolor="black"] +"FAKE2_stop_0 18node2" -> "FAKE2_start_0 18node3" [ style = bold] +"FAKE2_stop_0 18node2" -> "all_stopped" [ style = bold] +"FAKE2_stop_0 18node2" [ style=bold color="green" fontcolor="black"] +"FAKE3_monitor_0 18node3" -> "probe_complete 18node3" [ style = bold] +"FAKE3_monitor_0 18node3" [ style=bold color="green" fontcolor="black"] +"FAKE3_monitor_60000 18node3" [ style=bold color="green" fontcolor="black"] +"FAKE3_start_0 18node3" -> "FAKE3_monitor_60000 18node3" [ style = bold] +"FAKE3_start_0 18node3" [ style=bold color="green" fontcolor="black"] +"FAKE3_stop_0 18builder" -> "FAKE3_start_0 18node3" [ style = bold] +"FAKE3_stop_0 18builder" -> "all_stopped" [ style = bold] +"FAKE3_stop_0 18builder" [ style=bold color="green" fontcolor="black"] +"FAKE4_monitor_0 18node4" -> "probe_complete 18node4" [ style = bold] +"FAKE4_monitor_0 18node4" [ style=bold color="green" fontcolor="black"] +"FAKE4_monitor_60000 18node4" [ style=bold color="green" fontcolor="black"] +"FAKE4_start_0 18node4" -> "FAKE4_monitor_60000 18node4" [ style = bold] +"FAKE4_start_0 18node4" [ style=bold color="green" fontcolor="black"] +"FAKE4_stop_0 18node1" -> "FAKE4_start_0 18node4" [ style = bold] +"FAKE4_stop_0 18node1" -> "all_stopped" [ style = bold] +"FAKE4_stop_0 18node1" [ style=bold color="green" fontcolor="black"] +"FAKE5_monitor_0 18builder" -> "probe_complete 18builder" [ style = bold] +"FAKE5_monitor_0 18builder" [ style=bold color="green" fontcolor="black"] +"FAKE5_monitor_0 18node1" -> "probe_complete 18node1" [ style = bold] +"FAKE5_monitor_0 18node1" [ style=bold color="green" fontcolor="black"] +"FAKE5_monitor_0 18node2" -> "probe_complete 18node2" [ style = bold] +"FAKE5_monitor_0 18node2" [ style=bold color="green" fontcolor="black"] +"FAKE5_monitor_0 18node3" -> "probe_complete 18node3" [ style = bold] +"FAKE5_monitor_0 18node3" [ style=bold color="green" fontcolor="black"] +"FAKE5_monitor_0 18node4" -> "probe_complete 18node4" [ style = bold] +"FAKE5_monitor_0 18node4" [ style=bold color="green" fontcolor="black"] +"FAKE5_monitor_0 remote1" -> "probe_complete remote1" [ style = bold] +"FAKE5_monitor_0 remote1" [ style=bold color="green" fontcolor="black"] +"FAKE5_monitor_60000 remote1" [ style=bold color="green" fontcolor="black"] +"FAKE5_start_0 remote1" -> "FAKE5_monitor_60000 remote1" [ style = bold] +"FAKE5_start_0 remote1" [ style=bold color="green" fontcolor="black"] +"FAKECLONE1-clone_running_0" [ style=bold color="green" fontcolor="orange"] +"FAKECLONE1-clone_start_0" -> "FAKECLONE1-clone_running_0" [ style = bold] +"FAKECLONE1-clone_start_0" -> "FAKECLONE1:0_start_0 18builder" [ style = bold] +"FAKECLONE1-clone_start_0" -> "FAKECLONE1:1_start_0 18node1" [ style = bold] +"FAKECLONE1-clone_start_0" -> "FAKECLONE1:2_start_0 18node2" [ style = bold] +"FAKECLONE1-clone_start_0" -> "FAKECLONE1:3_start_0 18node4" [ style = bold] +"FAKECLONE1-clone_start_0" -> "FAKECLONE1:4_start_0 remote1" [ style = bold] +"FAKECLONE1-clone_start_0" -> "FAKECLONE1:5_start_0 18node3" [ style = bold] +"FAKECLONE1-clone_start_0" [ style=bold color="green" fontcolor="orange"] +"FAKECLONE1:0_monitor_60000 18builder" [ style=bold color="green" fontcolor="black"] +"FAKECLONE1:0_start_0 18builder" -> "FAKECLONE1-clone_running_0" [ style = bold] +"FAKECLONE1:0_start_0 18builder" -> "FAKECLONE1:0_monitor_60000 18builder" [ style = bold] +"FAKECLONE1:0_start_0 18builder" [ style=bold color="green" fontcolor="black"] +"FAKECLONE1:1_monitor_60000 18node1" [ style=bold color="green" fontcolor="black"] +"FAKECLONE1:1_start_0 18node1" -> "FAKECLONE1-clone_running_0" [ style = bold] +"FAKECLONE1:1_start_0 18node1" -> "FAKECLONE1:1_monitor_60000 18node1" [ style = bold] +"FAKECLONE1:1_start_0 18node1" [ style=bold color="green" fontcolor="black"] +"FAKECLONE1:2_monitor_60000 18node2" [ style=bold color="green" fontcolor="black"] +"FAKECLONE1:2_start_0 18node2" -> "FAKECLONE1-clone_running_0" [ style = bold] +"FAKECLONE1:2_start_0 18node2" -> "FAKECLONE1:2_monitor_60000 18node2" [ style = bold] +"FAKECLONE1:2_start_0 18node2" [ style=bold color="green" fontcolor="black"] +"FAKECLONE1:3_monitor_60000 18node4" [ style=bold color="green" fontcolor="black"] +"FAKECLONE1:3_start_0 18node4" -> "FAKECLONE1-clone_running_0" [ style = bold] +"FAKECLONE1:3_start_0 18node4" -> "FAKECLONE1:3_monitor_60000 18node4" [ style = bold] +"FAKECLONE1:3_start_0 18node4" [ style=bold color="green" fontcolor="black"] +"FAKECLONE1:4_monitor_0 remote1" -> "probe_complete remote1" [ style = bold] +"FAKECLONE1:4_monitor_0 remote1" [ style=bold color="green" fontcolor="black"] +"FAKECLONE1:4_monitor_60000 remote1" [ style=bold color="green" fontcolor="black"] +"FAKECLONE1:4_start_0 remote1" -> "FAKECLONE1-clone_running_0" [ style = bold] +"FAKECLONE1:4_start_0 remote1" -> "FAKECLONE1:4_monitor_60000 remote1" [ style = bold] +"FAKECLONE1:4_start_0 remote1" [ style=bold color="green" fontcolor="black"] +"FAKECLONE1:5_monitor_60000 18node3" [ style=bold color="green" fontcolor="black"] +"FAKECLONE1:5_start_0 18node3" -> "FAKECLONE1-clone_running_0" [ style = bold] +"FAKECLONE1:5_start_0 18node3" -> "FAKECLONE1:5_monitor_60000 18node3" [ style = bold] +"FAKECLONE1:5_start_0 18node3" [ style=bold color="green" fontcolor="black"] +"FAKECLONE2-clone_running_0" [ style=bold color="green" fontcolor="orange"] +"FAKECLONE2-clone_start_0" -> "FAKECLONE2-clone_running_0" [ style = bold] +"FAKECLONE2-clone_start_0" -> "FAKECLONE2:0_start_0 18builder" [ style = bold] +"FAKECLONE2-clone_start_0" -> "FAKECLONE2:1_start_0 18node1" [ style = bold] +"FAKECLONE2-clone_start_0" -> "FAKECLONE2:2_start_0 18node2" [ style = bold] +"FAKECLONE2-clone_start_0" -> "FAKECLONE2:3_start_0 18node4" [ style = bold] +"FAKECLONE2-clone_start_0" -> "FAKECLONE2:4_start_0 remote1" [ style = bold] +"FAKECLONE2-clone_start_0" -> "FAKECLONE2:5_start_0 18node3" [ style = bold] +"FAKECLONE2-clone_start_0" [ style=bold color="green" fontcolor="orange"] +"FAKECLONE2:0_monitor_0 18builder" -> "probe_complete 18builder" [ style = bold] +"FAKECLONE2:0_monitor_0 18builder" [ style=bold color="green" fontcolor="black"] +"FAKECLONE2:0_monitor_60000 18builder" [ style=bold color="green" fontcolor="black"] +"FAKECLONE2:0_start_0 18builder" -> "FAKECLONE2-clone_running_0" [ style = bold] +"FAKECLONE2:0_start_0 18builder" -> "FAKECLONE2:0_monitor_60000 18builder" [ style = bold] +"FAKECLONE2:0_start_0 18builder" [ style=bold color="green" fontcolor="black"] +"FAKECLONE2:1_monitor_0 18node1" -> "probe_complete 18node1" [ style = bold] +"FAKECLONE2:1_monitor_0 18node1" [ style=bold color="green" fontcolor="black"] +"FAKECLONE2:1_monitor_60000 18node1" [ style=bold color="green" fontcolor="black"] +"FAKECLONE2:1_start_0 18node1" -> "FAKECLONE2-clone_running_0" [ style = bold] +"FAKECLONE2:1_start_0 18node1" -> "FAKECLONE2:1_monitor_60000 18node1" [ style = bold] +"FAKECLONE2:1_start_0 18node1" [ style=bold color="green" fontcolor="black"] +"FAKECLONE2:2_monitor_60000 18node2" [ style=bold color="green" fontcolor="black"] +"FAKECLONE2:2_start_0 18node2" -> "FAKECLONE2-clone_running_0" [ style = bold] +"FAKECLONE2:2_start_0 18node2" -> "FAKECLONE2:2_monitor_60000 18node2" [ style = bold] +"FAKECLONE2:2_start_0 18node2" [ style=bold color="green" fontcolor="black"] +"FAKECLONE2:3_monitor_0 18node4" -> "probe_complete 18node4" [ style = bold] +"FAKECLONE2:3_monitor_0 18node4" [ style=bold color="green" fontcolor="black"] +"FAKECLONE2:3_monitor_60000 18node4" [ style=bold color="green" fontcolor="black"] +"FAKECLONE2:3_start_0 18node4" -> "FAKECLONE2-clone_running_0" [ style = bold] +"FAKECLONE2:3_start_0 18node4" -> "FAKECLONE2:3_monitor_60000 18node4" [ style = bold] +"FAKECLONE2:3_start_0 18node4" [ style=bold color="green" fontcolor="black"] +"FAKECLONE2:4_monitor_0 remote1" -> "probe_complete remote1" [ style = bold] +"FAKECLONE2:4_monitor_0 remote1" [ style=bold color="green" fontcolor="black"] +"FAKECLONE2:4_monitor_60000 remote1" [ style=bold color="green" fontcolor="black"] +"FAKECLONE2:4_start_0 remote1" -> "FAKECLONE2-clone_running_0" [ style = bold] +"FAKECLONE2:4_start_0 remote1" -> "FAKECLONE2:4_monitor_60000 remote1" [ style = bold] +"FAKECLONE2:4_start_0 remote1" [ style=bold color="green" fontcolor="black"] +"FAKECLONE2:5_monitor_0 18node3" -> "probe_complete 18node3" [ style = bold] +"FAKECLONE2:5_monitor_0 18node3" [ style=bold color="green" fontcolor="black"] +"FAKECLONE2:5_monitor_60000 18node3" [ style=bold color="green" fontcolor="black"] +"FAKECLONE2:5_start_0 18node3" -> "FAKECLONE2-clone_running_0" [ style = bold] +"FAKECLONE2:5_start_0 18node3" -> "FAKECLONE2:5_monitor_60000 18node3" [ style = bold] +"FAKECLONE2:5_start_0 18node3" [ style=bold color="green" fontcolor="black"] +"all_stopped" [ style=bold color="green" fontcolor="orange"] +"probe_complete 18builder" -> "probe_nodes_complete" [ style = bold] +"probe_complete 18builder" [ style=bold color="green" fontcolor="black"] +"probe_complete 18node1" -> "probe_nodes_complete" [ style = bold] +"probe_complete 18node1" [ style=bold color="green" fontcolor="black"] +"probe_complete 18node2" -> "probe_nodes_complete" [ style = bold] +"probe_complete 18node2" [ style=bold color="green" fontcolor="black"] +"probe_complete 18node3" -> "probe_nodes_complete" [ style = bold] +"probe_complete 18node3" [ style=bold color="green" fontcolor="black"] +"probe_complete 18node4" -> "probe_nodes_complete" [ style = bold] +"probe_complete 18node4" [ style=bold color="green" fontcolor="black"] +"probe_complete remote1" -> "probe_complete" [ style = bold] +"probe_complete remote1" [ style=bold color="green" fontcolor="black"] +"probe_complete" -> "FAKE1_start_0 18node2" [ style = bold] +"probe_complete" -> "FAKE2_stop_0 18node2" [ style = bold] +"probe_complete" -> "FAKE3_start_0 18node3" [ style = bold] +"probe_complete" -> "FAKE3_stop_0 18builder" [ style = bold] +"probe_complete" -> "FAKE4_start_0 18node4" [ style = bold] +"probe_complete" -> "FAKE4_stop_0 18node1" [ style = bold] +"probe_complete" -> "FAKE5_start_0 remote1" [ style = bold] +"probe_complete" -> "FAKECLONE1:0_start_0 18builder" [ style = bold] +"probe_complete" -> "FAKECLONE1:1_start_0 18node1" [ style = bold] +"probe_complete" -> "FAKECLONE1:2_start_0 18node2" [ style = bold] +"probe_complete" -> "FAKECLONE1:3_start_0 18node4" [ style = bold] +"probe_complete" -> "FAKECLONE1:4_start_0 remote1" [ style = bold] +"probe_complete" -> "FAKECLONE1:5_start_0 18node3" [ style = bold] +"probe_complete" -> "FAKECLONE2:0_start_0 18builder" [ style = bold] +"probe_complete" -> "FAKECLONE2:1_start_0 18node1" [ style = bold] +"probe_complete" -> "FAKECLONE2:2_start_0 18node2" [ style = bold] +"probe_complete" -> "FAKECLONE2:3_start_0 18node4" [ style = bold] +"probe_complete" -> "FAKECLONE2:4_start_0 remote1" [ style = bold] +"probe_complete" -> "FAKECLONE2:5_start_0 18node3" [ style = bold] +"probe_complete" [ style=bold color="green" fontcolor="orange"] +"probe_nodes_complete" -> "remote1_start_0 18builder" [ style = bold] +"probe_nodes_complete" [ style=bold color="green" fontcolor="orange"] +"remote1_monitor_0 18node3" -> "probe_complete 18node3" [ style = bold] +"remote1_monitor_0 18node3" [ style=bold color="green" fontcolor="black"] +"remote1_monitor_0 18node4" -> "probe_complete 18node4" [ style = bold] +"remote1_monitor_0 18node4" [ style=bold color="green" fontcolor="black"] +"remote1_monitor_60000 18builder" [ style=bold color="green" fontcolor="black"] +"remote1_start_0 18builder" -> "FAKE5_monitor_0 remote1" [ style = bold] +"remote1_start_0 18builder" -> "FAKE5_monitor_60000 remote1" [ style = bold] +"remote1_start_0 18builder" -> "FAKE5_start_0 remote1" [ style = bold] +"remote1_start_0 18builder" -> "FAKECLONE1:4_monitor_0 remote1" [ style = bold] +"remote1_start_0 18builder" -> "FAKECLONE1:4_monitor_60000 remote1" [ style = bold] +"remote1_start_0 18builder" -> "FAKECLONE1:4_start_0 remote1" [ style = bold] +"remote1_start_0 18builder" -> "FAKECLONE2:4_monitor_0 remote1" [ style = bold] +"remote1_start_0 18builder" -> "FAKECLONE2:4_monitor_60000 remote1" [ style = bold] +"remote1_start_0 18builder" -> "FAKECLONE2:4_start_0 remote1" [ style = bold] +"remote1_start_0 18builder" -> "remote1_monitor_60000 18builder" [ style = bold] +"remote1_start_0 18builder" [ style=bold color="green" fontcolor="black"] +"shooter_monitor_0 18node3" -> "probe_complete 18node3" [ style = bold] +"shooter_monitor_0 18node3" [ style=bold color="green" fontcolor="black"] +"shooter_monitor_0 18node4" -> "probe_complete 18node4" [ style = bold] +"shooter_monitor_0 18node4" [ style=bold color="green" fontcolor="black"] +} diff --git a/pengine/test10/resource-discovery.exp b/pengine/test10/resource-discovery.exp new file mode 100644 index 0000000000..2770f4ec95 --- /dev/null +++ b/pengine/test10/resource-discovery.exp @@ -0,0 +1,1041 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/pengine/test10/resource-discovery.scores b/pengine/test10/resource-discovery.scores new file mode 100644 index 0000000000..e1fa78eaa1 --- /dev/null +++ b/pengine/test10/resource-discovery.scores @@ -0,0 +1,199 @@ +Allocation scores: +clone_color: FAKECLONE1-clone allocation score on 18builder: 0 +clone_color: FAKECLONE1-clone allocation score on 18node1: 0 +clone_color: FAKECLONE1-clone allocation score on 18node2: 0 +clone_color: FAKECLONE1-clone allocation score on 18node3: 0 +clone_color: FAKECLONE1-clone allocation score on 18node4: 0 +clone_color: FAKECLONE1-clone allocation score on remote1: 0 +clone_color: FAKECLONE1:0 allocation score on 18builder: 0 +clone_color: FAKECLONE1:0 allocation score on 18node1: 0 +clone_color: FAKECLONE1:0 allocation score on 18node2: 0 +clone_color: FAKECLONE1:0 allocation score on 18node3: 0 +clone_color: FAKECLONE1:0 allocation score on 18node4: 0 +clone_color: FAKECLONE1:0 allocation score on remote1: 0 +clone_color: FAKECLONE1:1 allocation score on 18builder: 0 +clone_color: FAKECLONE1:1 allocation score on 18node1: 0 +clone_color: FAKECLONE1:1 allocation score on 18node2: 0 +clone_color: FAKECLONE1:1 allocation score on 18node3: 0 +clone_color: FAKECLONE1:1 allocation score on 18node4: 0 +clone_color: FAKECLONE1:1 allocation score on remote1: 0 +clone_color: FAKECLONE1:2 allocation score on 18builder: 0 +clone_color: FAKECLONE1:2 allocation score on 18node1: 0 +clone_color: FAKECLONE1:2 allocation score on 18node2: 0 +clone_color: FAKECLONE1:2 allocation score on 18node3: 0 +clone_color: FAKECLONE1:2 allocation score on 18node4: 0 +clone_color: FAKECLONE1:2 allocation score on remote1: 0 +clone_color: FAKECLONE1:3 allocation score on 18builder: 0 +clone_color: FAKECLONE1:3 allocation score on 18node1: 0 +clone_color: FAKECLONE1:3 allocation score on 18node2: 0 +clone_color: FAKECLONE1:3 allocation score on 18node3: 0 +clone_color: FAKECLONE1:3 allocation score on 18node4: 0 +clone_color: FAKECLONE1:3 allocation score on remote1: 0 +clone_color: FAKECLONE1:4 allocation score on 18builder: 0 +clone_color: FAKECLONE1:4 allocation score on 18node1: 0 +clone_color: FAKECLONE1:4 allocation score on 18node2: 0 +clone_color: FAKECLONE1:4 allocation score on 18node3: 0 +clone_color: FAKECLONE1:4 allocation score on 18node4: 0 +clone_color: FAKECLONE1:4 allocation score on remote1: 0 +clone_color: FAKECLONE1:5 allocation score on 18builder: 0 +clone_color: FAKECLONE1:5 allocation score on 18node1: 0 +clone_color: FAKECLONE1:5 allocation score on 18node2: 0 +clone_color: FAKECLONE1:5 allocation score on 18node3: 0 +clone_color: FAKECLONE1:5 allocation score on 18node4: 0 +clone_color: FAKECLONE1:5 allocation score on remote1: 0 +clone_color: FAKECLONE2-clone allocation score on 18builder: 0 +clone_color: FAKECLONE2-clone allocation score on 18node1: 0 +clone_color: FAKECLONE2-clone allocation score on 18node2: 0 +clone_color: FAKECLONE2-clone allocation score on 18node3: 0 +clone_color: FAKECLONE2-clone allocation score on 18node4: 0 +clone_color: FAKECLONE2-clone allocation score on remote1: 0 +clone_color: FAKECLONE2:0 allocation score on 18builder: 0 +clone_color: FAKECLONE2:0 allocation score on 18node1: 0 +clone_color: FAKECLONE2:0 allocation score on 18node2: 0 +clone_color: FAKECLONE2:0 allocation score on 18node3: 0 +clone_color: FAKECLONE2:0 allocation score on 18node4: 0 +clone_color: FAKECLONE2:0 allocation score on remote1: 0 +clone_color: FAKECLONE2:1 allocation score on 18builder: 0 +clone_color: FAKECLONE2:1 allocation score on 18node1: 0 +clone_color: FAKECLONE2:1 allocation score on 18node2: 0 +clone_color: FAKECLONE2:1 allocation score on 18node3: 0 +clone_color: FAKECLONE2:1 allocation score on 18node4: 0 +clone_color: FAKECLONE2:1 allocation score on remote1: 0 +clone_color: FAKECLONE2:2 allocation score on 18builder: 0 +clone_color: FAKECLONE2:2 allocation score on 18node1: 0 +clone_color: FAKECLONE2:2 allocation score on 18node2: 0 +clone_color: FAKECLONE2:2 allocation score on 18node3: 0 +clone_color: FAKECLONE2:2 allocation score on 18node4: 0 +clone_color: FAKECLONE2:2 allocation score on remote1: 0 +clone_color: FAKECLONE2:3 allocation score on 18builder: 0 +clone_color: FAKECLONE2:3 allocation score on 18node1: 0 +clone_color: FAKECLONE2:3 allocation score on 18node2: 0 +clone_color: FAKECLONE2:3 allocation score on 18node3: 0 +clone_color: FAKECLONE2:3 allocation score on 18node4: 0 +clone_color: FAKECLONE2:3 allocation score on remote1: 0 +clone_color: FAKECLONE2:4 allocation score on 18builder: 0 +clone_color: FAKECLONE2:4 allocation score on 18node1: 0 +clone_color: FAKECLONE2:4 allocation score on 18node2: 0 +clone_color: FAKECLONE2:4 allocation score on 18node3: 0 +clone_color: FAKECLONE2:4 allocation score on 18node4: 0 +clone_color: FAKECLONE2:4 allocation score on remote1: 0 +clone_color: FAKECLONE2:5 allocation score on 18builder: 0 +clone_color: FAKECLONE2:5 allocation score on 18node1: 0 +clone_color: FAKECLONE2:5 allocation score on 18node2: 0 +clone_color: FAKECLONE2:5 allocation score on 18node3: 0 +clone_color: FAKECLONE2:5 allocation score on 18node4: 0 +clone_color: FAKECLONE2:5 allocation score on remote1: 0 +native_color: FAKE1 allocation score on 18builder: 0 +native_color: FAKE1 allocation score on 18node1: 0 +native_color: FAKE1 allocation score on 18node2: 0 +native_color: FAKE1 allocation score on 18node3: 0 +native_color: FAKE1 allocation score on 18node4: 0 +native_color: FAKE1 allocation score on remote1: -INFINITY +native_color: FAKE2 allocation score on 18builder: 0 +native_color: FAKE2 allocation score on 18node1: 0 +native_color: FAKE2 allocation score on 18node2: -INFINITY +native_color: FAKE2 allocation score on 18node3: 0 +native_color: FAKE2 allocation score on 18node4: 0 +native_color: FAKE2 allocation score on remote1: 0 +native_color: FAKE3 allocation score on 18builder: 0 +native_color: FAKE3 allocation score on 18node1: 0 +native_color: FAKE3 allocation score on 18node2: 0 +native_color: FAKE3 allocation score on 18node3: INFINITY +native_color: FAKE3 allocation score on 18node4: 0 +native_color: FAKE3 allocation score on remote1: 0 +native_color: FAKE4 allocation score on 18builder: 0 +native_color: FAKE4 allocation score on 18node1: 0 +native_color: FAKE4 allocation score on 18node2: 0 +native_color: FAKE4 allocation score on 18node3: 0 +native_color: FAKE4 allocation score on 18node4: 0 +native_color: FAKE4 allocation score on remote1: 0 +native_color: FAKE5 allocation score on 18builder: 0 +native_color: FAKE5 allocation score on 18node1: 0 +native_color: FAKE5 allocation score on 18node2: 0 +native_color: FAKE5 allocation score on 18node3: 0 +native_color: FAKE5 allocation score on 18node4: 0 +native_color: FAKE5 allocation score on remote1: 0 +native_color: FAKECLONE1:0 allocation score on 18builder: 0 +native_color: FAKECLONE1:0 allocation score on 18node1: 0 +native_color: FAKECLONE1:0 allocation score on 18node2: 0 +native_color: FAKECLONE1:0 allocation score on 18node3: 0 +native_color: FAKECLONE1:0 allocation score on 18node4: 0 +native_color: FAKECLONE1:0 allocation score on remote1: 0 +native_color: FAKECLONE1:1 allocation score on 18builder: -INFINITY +native_color: FAKECLONE1:1 allocation score on 18node1: 0 +native_color: FAKECLONE1:1 allocation score on 18node2: 0 +native_color: FAKECLONE1:1 allocation score on 18node3: 0 +native_color: FAKECLONE1:1 allocation score on 18node4: 0 +native_color: FAKECLONE1:1 allocation score on remote1: 0 +native_color: FAKECLONE1:2 allocation score on 18builder: -INFINITY +native_color: FAKECLONE1:2 allocation score on 18node1: -INFINITY +native_color: FAKECLONE1:2 allocation score on 18node2: 0 +native_color: FAKECLONE1:2 allocation score on 18node3: 0 +native_color: FAKECLONE1:2 allocation score on 18node4: 0 +native_color: FAKECLONE1:2 allocation score on remote1: 0 +native_color: FAKECLONE1:3 allocation score on 18builder: -INFINITY +native_color: FAKECLONE1:3 allocation score on 18node1: -INFINITY +native_color: FAKECLONE1:3 allocation score on 18node2: -INFINITY +native_color: FAKECLONE1:3 allocation score on 18node3: 0 +native_color: FAKECLONE1:3 allocation score on 18node4: 0 +native_color: FAKECLONE1:3 allocation score on remote1: 0 +native_color: FAKECLONE1:4 allocation score on 18builder: -INFINITY +native_color: FAKECLONE1:4 allocation score on 18node1: -INFINITY +native_color: FAKECLONE1:4 allocation score on 18node2: -INFINITY +native_color: FAKECLONE1:4 allocation score on 18node3: 0 +native_color: FAKECLONE1:4 allocation score on 18node4: -INFINITY +native_color: FAKECLONE1:4 allocation score on remote1: 0 +native_color: FAKECLONE1:5 allocation score on 18builder: -INFINITY +native_color: FAKECLONE1:5 allocation score on 18node1: -INFINITY +native_color: FAKECLONE1:5 allocation score on 18node2: -INFINITY +native_color: FAKECLONE1:5 allocation score on 18node3: 0 +native_color: FAKECLONE1:5 allocation score on 18node4: -INFINITY +native_color: FAKECLONE1:5 allocation score on remote1: -INFINITY +native_color: FAKECLONE2:0 allocation score on 18builder: 0 +native_color: FAKECLONE2:0 allocation score on 18node1: 0 +native_color: FAKECLONE2:0 allocation score on 18node2: 0 +native_color: FAKECLONE2:0 allocation score on 18node3: 0 +native_color: FAKECLONE2:0 allocation score on 18node4: 0 +native_color: FAKECLONE2:0 allocation score on remote1: 0 +native_color: FAKECLONE2:1 allocation score on 18builder: -INFINITY +native_color: FAKECLONE2:1 allocation score on 18node1: 0 +native_color: FAKECLONE2:1 allocation score on 18node2: 0 +native_color: FAKECLONE2:1 allocation score on 18node3: 0 +native_color: FAKECLONE2:1 allocation score on 18node4: 0 +native_color: FAKECLONE2:1 allocation score on remote1: 0 +native_color: FAKECLONE2:2 allocation score on 18builder: -INFINITY +native_color: FAKECLONE2:2 allocation score on 18node1: -INFINITY +native_color: FAKECLONE2:2 allocation score on 18node2: 0 +native_color: FAKECLONE2:2 allocation score on 18node3: 0 +native_color: FAKECLONE2:2 allocation score on 18node4: 0 +native_color: FAKECLONE2:2 allocation score on remote1: 0 +native_color: FAKECLONE2:3 allocation score on 18builder: -INFINITY +native_color: FAKECLONE2:3 allocation score on 18node1: -INFINITY +native_color: FAKECLONE2:3 allocation score on 18node2: -INFINITY +native_color: FAKECLONE2:3 allocation score on 18node3: 0 +native_color: FAKECLONE2:3 allocation score on 18node4: 0 +native_color: FAKECLONE2:3 allocation score on remote1: 0 +native_color: FAKECLONE2:4 allocation score on 18builder: -INFINITY +native_color: FAKECLONE2:4 allocation score on 18node1: -INFINITY +native_color: FAKECLONE2:4 allocation score on 18node2: -INFINITY +native_color: FAKECLONE2:4 allocation score on 18node3: 0 +native_color: FAKECLONE2:4 allocation score on 18node4: -INFINITY +native_color: FAKECLONE2:4 allocation score on remote1: 0 +native_color: FAKECLONE2:5 allocation score on 18builder: -INFINITY +native_color: FAKECLONE2:5 allocation score on 18node1: -INFINITY +native_color: FAKECLONE2:5 allocation score on 18node2: -INFINITY +native_color: FAKECLONE2:5 allocation score on 18node3: 0 +native_color: FAKECLONE2:5 allocation score on 18node4: -INFINITY +native_color: FAKECLONE2:5 allocation score on remote1: -INFINITY +native_color: remote1 allocation score on 18builder: 0 +native_color: remote1 allocation score on 18node1: 0 +native_color: remote1 allocation score on 18node2: 0 +native_color: remote1 allocation score on 18node3: 0 +native_color: remote1 allocation score on 18node4: 0 +native_color: remote1 allocation score on remote1: -INFINITY +native_color: shooter allocation score on 18builder: 0 +native_color: shooter allocation score on 18node1: 0 +native_color: shooter allocation score on 18node2: 0 +native_color: shooter allocation score on 18node3: 0 +native_color: shooter allocation score on 18node4: 0 +native_color: shooter allocation score on remote1: -INFINITY diff --git a/pengine/test10/resource-discovery.summary b/pengine/test10/resource-discovery.summary new file mode 100644 index 0000000000..af0e5b3e70 --- /dev/null +++ b/pengine/test10/resource-discovery.summary @@ -0,0 +1,124 @@ + +Current cluster status: +Online: [ 18builder 18node1 18node2 18node3 18node4 ] +RemoteOFFLINE: [ remote1 ] + + shooter (stonith:fence_xvm): Started 18node1 + remote1 (ocf::pacemaker:remote): Stopped + FAKE1 (ocf::heartbeat:Dummy): Stopped + FAKE2 (ocf::heartbeat:Dummy): Started 18node2 + FAKE3 (ocf::heartbeat:Dummy): Started 18builder + FAKE4 (ocf::heartbeat:Dummy): Started 18node1 + FAKE5 (ocf::heartbeat:Dummy): Stopped + Clone Set: FAKECLONE1-clone [FAKECLONE1] + Stopped: [ 18builder 18node1 18node2 18node3 18node4 remote1 ] + Clone Set: FAKECLONE2-clone [FAKECLONE2] + Stopped: [ 18builder 18node1 18node2 18node3 18node4 remote1 ] + +Transition Summary: + * Start remote1 (18builder) + * Start FAKE1 (18node2) + * Move FAKE2 (Started 18node2 -> 18node3) + * Move FAKE3 (Started 18builder -> 18node3) + * Move FAKE4 (Started 18node1 -> 18node4) + * Start FAKE5 (remote1) + * Start FAKECLONE1:0 (18builder) + * Start FAKECLONE1:1 (18node1) + * Start FAKECLONE1:2 (18node2) + * Start FAKECLONE1:3 (18node4) + * Start FAKECLONE1:4 (remote1) + * Start FAKECLONE1:5 (18node3) + * Start FAKECLONE2:0 (18builder) + * Start FAKECLONE2:1 (18node1) + * Start FAKECLONE2:2 (18node2) + * Start FAKECLONE2:3 (18node4) + * Start FAKECLONE2:4 (remote1) + * Start FAKECLONE2:5 (18node3) + +Executing cluster transition: + * Resource action: shooter monitor on 18node4 + * Resource action: shooter monitor on 18node3 + * Resource action: remote1 monitor on 18node4 + * Resource action: remote1 monitor on 18node3 + * Resource action: FAKE1 monitor on 18node4 + * Resource action: FAKE1 monitor on 18node3 + * Resource action: FAKE1 monitor on 18node2 + * Resource action: FAKE1 monitor on 18node1 + * Resource action: FAKE1 monitor on 18builder + * Resource action: FAKE3 monitor on 18node3 + * Resource action: FAKE4 monitor on 18node4 + * Resource action: FAKE5 monitor on 18node4 + * Resource action: FAKE5 monitor on 18node3 + * Resource action: FAKE5 monitor on 18node2 + * Resource action: FAKE5 monitor on 18node1 + * Resource action: FAKE5 monitor on 18builder + * Pseudo action: FAKECLONE1-clone_start_0 + * Resource action: FAKECLONE2:0 monitor on 18builder + * Resource action: FAKECLONE2:1 monitor on 18node1 + * Resource action: FAKECLONE2:3 monitor on 18node4 + * Resource action: FAKECLONE2:5 monitor on 18node3 + * Pseudo action: FAKECLONE2-clone_start_0 + * Pseudo action: probe_nodes_complete + * Resource action: remote1 start on 18builder + * Resource action: FAKE5 monitor on remote1 + * Resource action: FAKECLONE1:4 monitor on remote1 + * Resource action: FAKECLONE2:4 monitor on remote1 + * Pseudo action: probe_complete + * Resource action: remote1 monitor=60000 on 18builder + * Resource action: FAKE1 start on 18node2 + * Resource action: FAKE2 stop on 18node2 + * Resource action: FAKE3 stop on 18builder + * Resource action: FAKE4 stop on 18node1 + * Resource action: FAKE5 start on remote1 + * Resource action: FAKECLONE1:0 start on 18builder + * Resource action: FAKECLONE1:1 start on 18node1 + * Resource action: FAKECLONE1:2 start on 18node2 + * Resource action: FAKECLONE1:3 start on 18node4 + * Resource action: FAKECLONE1:4 start on remote1 + * Resource action: FAKECLONE1:5 start on 18node3 + * Pseudo action: FAKECLONE1-clone_running_0 + * Resource action: FAKECLONE2:0 start on 18builder + * Resource action: FAKECLONE2:1 start on 18node1 + * Resource action: FAKECLONE2:2 start on 18node2 + * Resource action: FAKECLONE2:3 start on 18node4 + * Resource action: FAKECLONE2:4 start on remote1 + * Resource action: FAKECLONE2:5 start on 18node3 + * Pseudo action: FAKECLONE2-clone_running_0 + * Pseudo action: all_stopped + * Resource action: FAKE1 monitor=60000 on 18node2 + * Resource action: FAKE2 start on 18node3 + * Resource action: FAKE3 start on 18node3 + * Resource action: FAKE4 start on 18node4 + * Resource action: FAKE5 monitor=60000 on remote1 + * Resource action: FAKECLONE1:0 monitor=60000 on 18builder + * Resource action: FAKECLONE1:1 monitor=60000 on 18node1 + * Resource action: FAKECLONE1:2 monitor=60000 on 18node2 + * Resource action: FAKECLONE1:3 monitor=60000 on 18node4 + * Resource action: FAKECLONE1:4 monitor=60000 on remote1 + * Resource action: FAKECLONE1:5 monitor=60000 on 18node3 + * Resource action: FAKECLONE2:0 monitor=60000 on 18builder + * Resource action: FAKECLONE2:1 monitor=60000 on 18node1 + * Resource action: FAKECLONE2:2 monitor=60000 on 18node2 + * Resource action: FAKECLONE2:3 monitor=60000 on 18node4 + * Resource action: FAKECLONE2:4 monitor=60000 on remote1 + * Resource action: FAKECLONE2:5 monitor=60000 on 18node3 + * Resource action: FAKE2 monitor=60000 on 18node3 + * Resource action: FAKE3 monitor=60000 on 18node3 + * Resource action: FAKE4 monitor=60000 on 18node4 + +Revised cluster status: +Online: [ 18builder 18node1 18node2 18node3 18node4 ] +RemoteOnline: [ remote1 ] + + shooter (stonith:fence_xvm): Started 18node1 + remote1 (ocf::pacemaker:remote): Started 18builder + FAKE1 (ocf::heartbeat:Dummy): Started 18node2 + FAKE2 (ocf::heartbeat:Dummy): Started 18node3 + FAKE3 (ocf::heartbeat:Dummy): Started 18node3 + FAKE4 (ocf::heartbeat:Dummy): Started 18node4 + FAKE5 (ocf::heartbeat:Dummy): Started remote1 + Clone Set: FAKECLONE1-clone [FAKECLONE1] + Started: [ 18builder 18node1 18node2 18node3 18node4 remote1 ] + Clone Set: FAKECLONE2-clone [FAKECLONE2] + Started: [ 18builder 18node1 18node2 18node3 18node4 remote1 ] + diff --git a/pengine/test10/resource-discovery.xml b/pengine/test10/resource-discovery.xml new file mode 100644 index 0000000000..5836804b8a --- /dev/null +++ b/pengine/test10/resource-discovery.xml @@ -0,0 +1,185 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/pengine/test10/rsc-discovery-per-node.dot b/pengine/test10/rsc-discovery-per-node.dot new file mode 100644 index 0000000000..8f5889a6e1 --- /dev/null +++ b/pengine/test10/rsc-discovery-per-node.dot @@ -0,0 +1,195 @@ + digraph "g" { +"FAKE1_monitor_0 18builder" -> "probe_complete 18builder" [ style = bold] +"FAKE1_monitor_0 18builder" [ style=bold color="green" fontcolor="black"] +"FAKE1_monitor_0 18node1" -> "probe_complete 18node1" [ style = bold] +"FAKE1_monitor_0 18node1" [ style=bold color="green" fontcolor="black"] +"FAKE1_monitor_0 18node2" -> "probe_complete 18node2" [ style = bold] +"FAKE1_monitor_0 18node2" [ style=bold color="green" fontcolor="black"] +"FAKE1_monitor_0 18node3" -> "probe_complete 18node3" [ style = bold] +"FAKE1_monitor_0 18node3" [ style=bold color="green" fontcolor="black"] +"FAKE1_monitor_0 18node4" -> "probe_complete 18node4" [ style = bold] +"FAKE1_monitor_0 18node4" [ style=bold color="green" fontcolor="black"] +"FAKE1_monitor_60000 18node2" [ style=bold color="green" fontcolor="black"] +"FAKE1_start_0 18node2" -> "FAKE1_monitor_60000 18node2" [ style = bold] +"FAKE1_start_0 18node2" [ style=bold color="green" fontcolor="black"] +"FAKE2_monitor_0 18node3" -> "probe_complete 18node3" [ style = bold] +"FAKE2_monitor_0 18node3" [ style=bold color="green" fontcolor="black"] +"FAKE2_monitor_0 18node4" -> "probe_complete 18node4" [ style = bold] +"FAKE2_monitor_0 18node4" [ style=bold color="green" fontcolor="black"] +"FAKE2_monitor_60000 18node3" [ style=bold color="green" fontcolor="black"] +"FAKE2_start_0 18node3" -> "FAKE2_monitor_60000 18node3" [ style = bold] +"FAKE2_start_0 18node3" [ style=bold color="green" fontcolor="black"] +"FAKE2_stop_0 18node2" -> "FAKE2_start_0 18node3" [ style = bold] +"FAKE2_stop_0 18node2" -> "all_stopped" [ style = bold] +"FAKE2_stop_0 18node2" [ style=bold color="green" fontcolor="black"] +"FAKE3_monitor_0 18node3" -> "probe_complete 18node3" [ style = bold] +"FAKE3_monitor_0 18node3" [ style=bold color="green" fontcolor="black"] +"FAKE3_monitor_0 18node4" -> "probe_complete 18node4" [ style = bold] +"FAKE3_monitor_0 18node4" [ style=bold color="green" fontcolor="black"] +"FAKE3_monitor_60000 18node4" [ style=bold color="green" fontcolor="black"] +"FAKE3_start_0 18node4" -> "FAKE3_monitor_60000 18node4" [ style = bold] +"FAKE3_start_0 18node4" [ style=bold color="green" fontcolor="black"] +"FAKE3_stop_0 18builder" -> "FAKE3_start_0 18node4" [ style = bold] +"FAKE3_stop_0 18builder" -> "all_stopped" [ style = bold] +"FAKE3_stop_0 18builder" [ style=bold color="green" fontcolor="black"] +"FAKE4_monitor_0 18node3" -> "probe_complete 18node3" [ style = bold] +"FAKE4_monitor_0 18node3" [ style=bold color="green" fontcolor="black"] +"FAKE4_monitor_0 18node4" -> "probe_complete 18node4" [ style = bold] +"FAKE4_monitor_0 18node4" [ style=bold color="green" fontcolor="black"] +"FAKE4_monitor_60000 remote1" [ style=bold color="green" fontcolor="black"] +"FAKE4_start_0 remote1" -> "FAKE4_monitor_60000 remote1" [ style = bold] +"FAKE4_start_0 remote1" [ style=bold color="green" fontcolor="black"] +"FAKE4_stop_0 18node1" -> "FAKE4_start_0 remote1" [ style = bold] +"FAKE4_stop_0 18node1" -> "all_stopped" [ style = bold] +"FAKE4_stop_0 18node1" [ style=bold color="green" fontcolor="black"] +"FAKE5_monitor_0 18builder" -> "probe_complete 18builder" [ style = bold] +"FAKE5_monitor_0 18builder" [ style=bold color="green" fontcolor="black"] +"FAKE5_monitor_0 18node1" -> "probe_complete 18node1" [ style = bold] +"FAKE5_monitor_0 18node1" [ style=bold color="green" fontcolor="black"] +"FAKE5_monitor_0 18node2" -> "probe_complete 18node2" [ style = bold] +"FAKE5_monitor_0 18node2" [ style=bold color="green" fontcolor="black"] +"FAKE5_monitor_0 18node3" -> "probe_complete 18node3" [ style = bold] +"FAKE5_monitor_0 18node3" [ style=bold color="green" fontcolor="black"] +"FAKE5_monitor_0 18node4" -> "probe_complete 18node4" [ style = bold] +"FAKE5_monitor_0 18node4" [ style=bold color="green" fontcolor="black"] +"FAKE5_monitor_60000 18builder" [ style=bold color="green" fontcolor="black"] +"FAKE5_start_0 18builder" -> "FAKE5_monitor_60000 18builder" [ style = bold] +"FAKE5_start_0 18builder" [ style=bold color="green" fontcolor="black"] +"FAKECLONE1-clone_running_0" [ style=bold color="green" fontcolor="orange"] +"FAKECLONE1-clone_start_0" -> "FAKECLONE1-clone_running_0" [ style = bold] +"FAKECLONE1-clone_start_0" -> "FAKECLONE1:0_start_0 18node1" [ style = bold] +"FAKECLONE1-clone_start_0" -> "FAKECLONE1:1_start_0 18node2" [ style = bold] +"FAKECLONE1-clone_start_0" -> "FAKECLONE1:2_start_0 18node3" [ style = bold] +"FAKECLONE1-clone_start_0" -> "FAKECLONE1:3_start_0 18node4" [ style = bold] +"FAKECLONE1-clone_start_0" -> "FAKECLONE1:4_start_0 remote1" [ style = bold] +"FAKECLONE1-clone_start_0" -> "FAKECLONE1:5_start_0 18builder" [ style = bold] +"FAKECLONE1-clone_start_0" [ style=bold color="green" fontcolor="orange"] +"FAKECLONE1:0_monitor_0 18node1" -> "probe_complete 18node1" [ style = bold] +"FAKECLONE1:0_monitor_0 18node1" [ style=bold color="green" fontcolor="black"] +"FAKECLONE1:0_monitor_60000 18node1" [ style=bold color="green" fontcolor="black"] +"FAKECLONE1:0_start_0 18node1" -> "FAKECLONE1-clone_running_0" [ style = bold] +"FAKECLONE1:0_start_0 18node1" -> "FAKECLONE1:0_monitor_60000 18node1" [ style = bold] +"FAKECLONE1:0_start_0 18node1" [ style=bold color="green" fontcolor="black"] +"FAKECLONE1:1_monitor_0 18node2" -> "probe_complete 18node2" [ style = bold] +"FAKECLONE1:1_monitor_0 18node2" [ style=bold color="green" fontcolor="black"] +"FAKECLONE1:1_monitor_60000 18node2" [ style=bold color="green" fontcolor="black"] +"FAKECLONE1:1_start_0 18node2" -> "FAKECLONE1-clone_running_0" [ style = bold] +"FAKECLONE1:1_start_0 18node2" -> "FAKECLONE1:1_monitor_60000 18node2" [ style = bold] +"FAKECLONE1:1_start_0 18node2" [ style=bold color="green" fontcolor="black"] +"FAKECLONE1:2_monitor_0 18node3" -> "probe_complete 18node3" [ style = bold] +"FAKECLONE1:2_monitor_0 18node3" [ style=bold color="green" fontcolor="black"] +"FAKECLONE1:2_monitor_60000 18node3" [ style=bold color="green" fontcolor="black"] +"FAKECLONE1:2_start_0 18node3" -> "FAKECLONE1-clone_running_0" [ style = bold] +"FAKECLONE1:2_start_0 18node3" -> "FAKECLONE1:2_monitor_60000 18node3" [ style = bold] +"FAKECLONE1:2_start_0 18node3" [ style=bold color="green" fontcolor="black"] +"FAKECLONE1:3_monitor_0 18node4" -> "probe_complete 18node4" [ style = bold] +"FAKECLONE1:3_monitor_0 18node4" [ style=bold color="green" fontcolor="black"] +"FAKECLONE1:3_monitor_60000 18node4" [ style=bold color="green" fontcolor="black"] +"FAKECLONE1:3_start_0 18node4" -> "FAKECLONE1-clone_running_0" [ style = bold] +"FAKECLONE1:3_start_0 18node4" -> "FAKECLONE1:3_monitor_60000 18node4" [ style = bold] +"FAKECLONE1:3_start_0 18node4" [ style=bold color="green" fontcolor="black"] +"FAKECLONE1:4_monitor_60000 remote1" [ style=bold color="green" fontcolor="black"] +"FAKECLONE1:4_start_0 remote1" -> "FAKECLONE1-clone_running_0" [ style = bold] +"FAKECLONE1:4_start_0 remote1" -> "FAKECLONE1:4_monitor_60000 remote1" [ style = bold] +"FAKECLONE1:4_start_0 remote1" [ style=bold color="green" fontcolor="black"] +"FAKECLONE1:5_monitor_0 18builder" -> "probe_complete 18builder" [ style = bold] +"FAKECLONE1:5_monitor_0 18builder" [ style=bold color="green" fontcolor="black"] +"FAKECLONE1:5_monitor_60000 18builder" [ style=bold color="green" fontcolor="black"] +"FAKECLONE1:5_start_0 18builder" -> "FAKECLONE1-clone_running_0" [ style = bold] +"FAKECLONE1:5_start_0 18builder" -> "FAKECLONE1:5_monitor_60000 18builder" [ style = bold] +"FAKECLONE1:5_start_0 18builder" [ style=bold color="green" fontcolor="black"] +"FAKECLONE2-clone_running_0" [ style=bold color="green" fontcolor="orange"] +"FAKECLONE2-clone_start_0" -> "FAKECLONE2-clone_running_0" [ style = bold] +"FAKECLONE2-clone_start_0" -> "FAKECLONE2:0_start_0 18node1" [ style = bold] +"FAKECLONE2-clone_start_0" -> "FAKECLONE2:1_start_0 18node2" [ style = bold] +"FAKECLONE2-clone_start_0" -> "FAKECLONE2:2_start_0 18node3" [ style = bold] +"FAKECLONE2-clone_start_0" -> "FAKECLONE2:3_start_0 18node4" [ style = bold] +"FAKECLONE2-clone_start_0" -> "FAKECLONE2:4_start_0 remote1" [ style = bold] +"FAKECLONE2-clone_start_0" -> "FAKECLONE2:5_start_0 18builder" [ style = bold] +"FAKECLONE2-clone_start_0" [ style=bold color="green" fontcolor="orange"] +"FAKECLONE2:0_monitor_0 18node1" -> "probe_complete 18node1" [ style = bold] +"FAKECLONE2:0_monitor_0 18node1" [ style=bold color="green" fontcolor="black"] +"FAKECLONE2:0_monitor_60000 18node1" [ style=bold color="green" fontcolor="black"] +"FAKECLONE2:0_start_0 18node1" -> "FAKECLONE2-clone_running_0" [ style = bold] +"FAKECLONE2:0_start_0 18node1" -> "FAKECLONE2:0_monitor_60000 18node1" [ style = bold] +"FAKECLONE2:0_start_0 18node1" [ style=bold color="green" fontcolor="black"] +"FAKECLONE2:1_monitor_0 18node2" -> "probe_complete 18node2" [ style = bold] +"FAKECLONE2:1_monitor_0 18node2" [ style=bold color="green" fontcolor="black"] +"FAKECLONE2:1_monitor_60000 18node2" [ style=bold color="green" fontcolor="black"] +"FAKECLONE2:1_start_0 18node2" -> "FAKECLONE2-clone_running_0" [ style = bold] +"FAKECLONE2:1_start_0 18node2" -> "FAKECLONE2:1_monitor_60000 18node2" [ style = bold] +"FAKECLONE2:1_start_0 18node2" [ style=bold color="green" fontcolor="black"] +"FAKECLONE2:2_monitor_0 18node3" -> "probe_complete 18node3" [ style = bold] +"FAKECLONE2:2_monitor_0 18node3" [ style=bold color="green" fontcolor="black"] +"FAKECLONE2:2_monitor_60000 18node3" [ style=bold color="green" fontcolor="black"] +"FAKECLONE2:2_start_0 18node3" -> "FAKECLONE2-clone_running_0" [ style = bold] +"FAKECLONE2:2_start_0 18node3" -> "FAKECLONE2:2_monitor_60000 18node3" [ style = bold] +"FAKECLONE2:2_start_0 18node3" [ style=bold color="green" fontcolor="black"] +"FAKECLONE2:3_monitor_0 18node4" -> "probe_complete 18node4" [ style = bold] +"FAKECLONE2:3_monitor_0 18node4" [ style=bold color="green" fontcolor="black"] +"FAKECLONE2:3_monitor_60000 18node4" [ style=bold color="green" fontcolor="black"] +"FAKECLONE2:3_start_0 18node4" -> "FAKECLONE2-clone_running_0" [ style = bold] +"FAKECLONE2:3_start_0 18node4" -> "FAKECLONE2:3_monitor_60000 18node4" [ style = bold] +"FAKECLONE2:3_start_0 18node4" [ style=bold color="green" fontcolor="black"] +"FAKECLONE2:4_monitor_60000 remote1" [ style=bold color="green" fontcolor="black"] +"FAKECLONE2:4_start_0 remote1" -> "FAKECLONE2-clone_running_0" [ style = bold] +"FAKECLONE2:4_start_0 remote1" -> "FAKECLONE2:4_monitor_60000 remote1" [ style = bold] +"FAKECLONE2:4_start_0 remote1" [ style=bold color="green" fontcolor="black"] +"FAKECLONE2:5_monitor_0 18builder" -> "probe_complete 18builder" [ style = bold] +"FAKECLONE2:5_monitor_0 18builder" [ style=bold color="green" fontcolor="black"] +"FAKECLONE2:5_monitor_60000 18builder" [ style=bold color="green" fontcolor="black"] +"FAKECLONE2:5_start_0 18builder" -> "FAKECLONE2-clone_running_0" [ style = bold] +"FAKECLONE2:5_start_0 18builder" -> "FAKECLONE2:5_monitor_60000 18builder" [ style = bold] +"FAKECLONE2:5_start_0 18builder" [ style=bold color="green" fontcolor="black"] +"all_stopped" [ style=bold color="green" fontcolor="orange"] +"probe_complete 18builder" -> "probe_nodes_complete" [ style = bold] +"probe_complete 18builder" [ style=bold color="green" fontcolor="black"] +"probe_complete 18node1" -> "probe_nodes_complete" [ style = bold] +"probe_complete 18node1" [ style=bold color="green" fontcolor="black"] +"probe_complete 18node2" -> "probe_nodes_complete" [ style = bold] +"probe_complete 18node2" [ style=bold color="green" fontcolor="black"] +"probe_complete 18node3" -> "probe_nodes_complete" [ style = bold] +"probe_complete 18node3" [ style=bold color="green" fontcolor="black"] +"probe_complete 18node4" -> "probe_nodes_complete" [ style = bold] +"probe_complete 18node4" [ style=bold color="green" fontcolor="black"] +"probe_complete" -> "FAKE1_start_0 18node2" [ style = bold] +"probe_complete" -> "FAKE2_start_0 18node3" [ style = bold] +"probe_complete" -> "FAKE2_stop_0 18node2" [ style = bold] +"probe_complete" -> "FAKE3_start_0 18node4" [ style = bold] +"probe_complete" -> "FAKE3_stop_0 18builder" [ style = bold] +"probe_complete" -> "FAKE4_start_0 remote1" [ style = bold] +"probe_complete" -> "FAKE4_stop_0 18node1" [ style = bold] +"probe_complete" -> "FAKE5_start_0 18builder" [ style = bold] +"probe_complete" -> "FAKECLONE1:0_start_0 18node1" [ style = bold] +"probe_complete" -> "FAKECLONE1:1_start_0 18node2" [ style = bold] +"probe_complete" -> "FAKECLONE1:2_start_0 18node3" [ style = bold] +"probe_complete" -> "FAKECLONE1:3_start_0 18node4" [ style = bold] +"probe_complete" -> "FAKECLONE1:4_start_0 remote1" [ style = bold] +"probe_complete" -> "FAKECLONE1:5_start_0 18builder" [ style = bold] +"probe_complete" -> "FAKECLONE2:0_start_0 18node1" [ style = bold] +"probe_complete" -> "FAKECLONE2:1_start_0 18node2" [ style = bold] +"probe_complete" -> "FAKECLONE2:2_start_0 18node3" [ style = bold] +"probe_complete" -> "FAKECLONE2:3_start_0 18node4" [ style = bold] +"probe_complete" -> "FAKECLONE2:4_start_0 remote1" [ style = bold] +"probe_complete" -> "FAKECLONE2:5_start_0 18builder" [ style = bold] +"probe_complete" [ style=bold color="green" fontcolor="orange"] +"probe_nodes_complete" -> "remote1_start_0 18builder" [ style = bold] +"probe_nodes_complete" [ style=bold color="green" fontcolor="orange"] +"remote1_monitor_0 18node3" -> "probe_complete 18node3" [ style = bold] +"remote1_monitor_0 18node3" [ style=bold color="green" fontcolor="black"] +"remote1_monitor_0 18node4" -> "probe_complete 18node4" [ style = bold] +"remote1_monitor_0 18node4" [ style=bold color="green" fontcolor="black"] +"remote1_monitor_60000 18builder" [ style=bold color="green" fontcolor="black"] +"remote1_start_0 18builder" -> "FAKE4_monitor_60000 remote1" [ style = bold] +"remote1_start_0 18builder" -> "FAKE4_start_0 remote1" [ style = bold] +"remote1_start_0 18builder" -> "FAKECLONE1:4_monitor_60000 remote1" [ style = bold] +"remote1_start_0 18builder" -> "FAKECLONE1:4_start_0 remote1" [ style = bold] +"remote1_start_0 18builder" -> "FAKECLONE2:4_monitor_60000 remote1" [ style = bold] +"remote1_start_0 18builder" -> "FAKECLONE2:4_start_0 remote1" [ style = bold] +"remote1_start_0 18builder" -> "remote1_monitor_60000 18builder" [ style = bold] +"remote1_start_0 18builder" [ style=bold color="green" fontcolor="black"] +"shooter_monitor_0 18node3" -> "probe_complete 18node3" [ style = bold] +"shooter_monitor_0 18node3" [ style=bold color="green" fontcolor="black"] +"shooter_monitor_0 18node4" -> "probe_complete 18node4" [ style = bold] +"shooter_monitor_0 18node4" [ style=bold color="green" fontcolor="black"] +} diff --git a/pengine/test10/rsc-discovery-per-node.exp b/pengine/test10/rsc-discovery-per-node.exp new file mode 100644 index 0000000000..8de04cd4c7 --- /dev/null +++ b/pengine/test10/rsc-discovery-per-node.exp @@ -0,0 +1,1103 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/pengine/test10/rsc-discovery-per-node.scores b/pengine/test10/rsc-discovery-per-node.scores new file mode 100644 index 0000000000..a0ad11fa9a --- /dev/null +++ b/pengine/test10/rsc-discovery-per-node.scores @@ -0,0 +1,199 @@ +Allocation scores: +clone_color: FAKECLONE1-clone allocation score on 18builder: 0 +clone_color: FAKECLONE1-clone allocation score on 18node1: 0 +clone_color: FAKECLONE1-clone allocation score on 18node2: 0 +clone_color: FAKECLONE1-clone allocation score on 18node3: 0 +clone_color: FAKECLONE1-clone allocation score on 18node4: 0 +clone_color: FAKECLONE1-clone allocation score on remote1: 0 +clone_color: FAKECLONE1:0 allocation score on 18builder: 0 +clone_color: FAKECLONE1:0 allocation score on 18node1: 0 +clone_color: FAKECLONE1:0 allocation score on 18node2: 0 +clone_color: FAKECLONE1:0 allocation score on 18node3: 0 +clone_color: FAKECLONE1:0 allocation score on 18node4: 0 +clone_color: FAKECLONE1:0 allocation score on remote1: 0 +clone_color: FAKECLONE1:1 allocation score on 18builder: 0 +clone_color: FAKECLONE1:1 allocation score on 18node1: 0 +clone_color: FAKECLONE1:1 allocation score on 18node2: 0 +clone_color: FAKECLONE1:1 allocation score on 18node3: 0 +clone_color: FAKECLONE1:1 allocation score on 18node4: 0 +clone_color: FAKECLONE1:1 allocation score on remote1: 0 +clone_color: FAKECLONE1:2 allocation score on 18builder: 0 +clone_color: FAKECLONE1:2 allocation score on 18node1: 0 +clone_color: FAKECLONE1:2 allocation score on 18node2: 0 +clone_color: FAKECLONE1:2 allocation score on 18node3: 0 +clone_color: FAKECLONE1:2 allocation score on 18node4: 0 +clone_color: FAKECLONE1:2 allocation score on remote1: 0 +clone_color: FAKECLONE1:3 allocation score on 18builder: 0 +clone_color: FAKECLONE1:3 allocation score on 18node1: 0 +clone_color: FAKECLONE1:3 allocation score on 18node2: 0 +clone_color: FAKECLONE1:3 allocation score on 18node3: 0 +clone_color: FAKECLONE1:3 allocation score on 18node4: 0 +clone_color: FAKECLONE1:3 allocation score on remote1: 0 +clone_color: FAKECLONE1:4 allocation score on 18builder: 0 +clone_color: FAKECLONE1:4 allocation score on 18node1: 0 +clone_color: FAKECLONE1:4 allocation score on 18node2: 0 +clone_color: FAKECLONE1:4 allocation score on 18node3: 0 +clone_color: FAKECLONE1:4 allocation score on 18node4: 0 +clone_color: FAKECLONE1:4 allocation score on remote1: 0 +clone_color: FAKECLONE1:5 allocation score on 18builder: 0 +clone_color: FAKECLONE1:5 allocation score on 18node1: 0 +clone_color: FAKECLONE1:5 allocation score on 18node2: 0 +clone_color: FAKECLONE1:5 allocation score on 18node3: 0 +clone_color: FAKECLONE1:5 allocation score on 18node4: 0 +clone_color: FAKECLONE1:5 allocation score on remote1: 0 +clone_color: FAKECLONE2-clone allocation score on 18builder: 0 +clone_color: FAKECLONE2-clone allocation score on 18node1: 0 +clone_color: FAKECLONE2-clone allocation score on 18node2: 0 +clone_color: FAKECLONE2-clone allocation score on 18node3: 0 +clone_color: FAKECLONE2-clone allocation score on 18node4: 0 +clone_color: FAKECLONE2-clone allocation score on remote1: 0 +clone_color: FAKECLONE2:0 allocation score on 18builder: 0 +clone_color: FAKECLONE2:0 allocation score on 18node1: 0 +clone_color: FAKECLONE2:0 allocation score on 18node2: 0 +clone_color: FAKECLONE2:0 allocation score on 18node3: 0 +clone_color: FAKECLONE2:0 allocation score on 18node4: 0 +clone_color: FAKECLONE2:0 allocation score on remote1: 0 +clone_color: FAKECLONE2:1 allocation score on 18builder: 0 +clone_color: FAKECLONE2:1 allocation score on 18node1: 0 +clone_color: FAKECLONE2:1 allocation score on 18node2: 0 +clone_color: FAKECLONE2:1 allocation score on 18node3: 0 +clone_color: FAKECLONE2:1 allocation score on 18node4: 0 +clone_color: FAKECLONE2:1 allocation score on remote1: 0 +clone_color: FAKECLONE2:2 allocation score on 18builder: 0 +clone_color: FAKECLONE2:2 allocation score on 18node1: 0 +clone_color: FAKECLONE2:2 allocation score on 18node2: 0 +clone_color: FAKECLONE2:2 allocation score on 18node3: 0 +clone_color: FAKECLONE2:2 allocation score on 18node4: 0 +clone_color: FAKECLONE2:2 allocation score on remote1: 0 +clone_color: FAKECLONE2:3 allocation score on 18builder: 0 +clone_color: FAKECLONE2:3 allocation score on 18node1: 0 +clone_color: FAKECLONE2:3 allocation score on 18node2: 0 +clone_color: FAKECLONE2:3 allocation score on 18node3: 0 +clone_color: FAKECLONE2:3 allocation score on 18node4: 0 +clone_color: FAKECLONE2:3 allocation score on remote1: 0 +clone_color: FAKECLONE2:4 allocation score on 18builder: 0 +clone_color: FAKECLONE2:4 allocation score on 18node1: 0 +clone_color: FAKECLONE2:4 allocation score on 18node2: 0 +clone_color: FAKECLONE2:4 allocation score on 18node3: 0 +clone_color: FAKECLONE2:4 allocation score on 18node4: 0 +clone_color: FAKECLONE2:4 allocation score on remote1: 0 +clone_color: FAKECLONE2:5 allocation score on 18builder: 0 +clone_color: FAKECLONE2:5 allocation score on 18node1: 0 +clone_color: FAKECLONE2:5 allocation score on 18node2: 0 +clone_color: FAKECLONE2:5 allocation score on 18node3: 0 +clone_color: FAKECLONE2:5 allocation score on 18node4: 0 +clone_color: FAKECLONE2:5 allocation score on remote1: 0 +native_color: FAKE1 allocation score on 18builder: 0 +native_color: FAKE1 allocation score on 18node1: 0 +native_color: FAKE1 allocation score on 18node2: 0 +native_color: FAKE1 allocation score on 18node3: 0 +native_color: FAKE1 allocation score on 18node4: 0 +native_color: FAKE1 allocation score on remote1: 0 +native_color: FAKE2 allocation score on 18builder: 0 +native_color: FAKE2 allocation score on 18node1: 0 +native_color: FAKE2 allocation score on 18node2: 0 +native_color: FAKE2 allocation score on 18node3: 0 +native_color: FAKE2 allocation score on 18node4: 0 +native_color: FAKE2 allocation score on remote1: 0 +native_color: FAKE3 allocation score on 18builder: 0 +native_color: FAKE3 allocation score on 18node1: 0 +native_color: FAKE3 allocation score on 18node2: 0 +native_color: FAKE3 allocation score on 18node3: 0 +native_color: FAKE3 allocation score on 18node4: 0 +native_color: FAKE3 allocation score on remote1: 0 +native_color: FAKE4 allocation score on 18builder: 0 +native_color: FAKE4 allocation score on 18node1: 0 +native_color: FAKE4 allocation score on 18node2: 0 +native_color: FAKE4 allocation score on 18node3: 0 +native_color: FAKE4 allocation score on 18node4: 0 +native_color: FAKE4 allocation score on remote1: 0 +native_color: FAKE5 allocation score on 18builder: 0 +native_color: FAKE5 allocation score on 18node1: 0 +native_color: FAKE5 allocation score on 18node2: 0 +native_color: FAKE5 allocation score on 18node3: 0 +native_color: FAKE5 allocation score on 18node4: 0 +native_color: FAKE5 allocation score on remote1: 0 +native_color: FAKECLONE1:0 allocation score on 18builder: 0 +native_color: FAKECLONE1:0 allocation score on 18node1: 0 +native_color: FAKECLONE1:0 allocation score on 18node2: 0 +native_color: FAKECLONE1:0 allocation score on 18node3: 0 +native_color: FAKECLONE1:0 allocation score on 18node4: 0 +native_color: FAKECLONE1:0 allocation score on remote1: 0 +native_color: FAKECLONE1:1 allocation score on 18builder: 0 +native_color: FAKECLONE1:1 allocation score on 18node1: -INFINITY +native_color: FAKECLONE1:1 allocation score on 18node2: 0 +native_color: FAKECLONE1:1 allocation score on 18node3: 0 +native_color: FAKECLONE1:1 allocation score on 18node4: 0 +native_color: FAKECLONE1:1 allocation score on remote1: 0 +native_color: FAKECLONE1:2 allocation score on 18builder: 0 +native_color: FAKECLONE1:2 allocation score on 18node1: -INFINITY +native_color: FAKECLONE1:2 allocation score on 18node2: -INFINITY +native_color: FAKECLONE1:2 allocation score on 18node3: 0 +native_color: FAKECLONE1:2 allocation score on 18node4: 0 +native_color: FAKECLONE1:2 allocation score on remote1: 0 +native_color: FAKECLONE1:3 allocation score on 18builder: 0 +native_color: FAKECLONE1:3 allocation score on 18node1: -INFINITY +native_color: FAKECLONE1:3 allocation score on 18node2: -INFINITY +native_color: FAKECLONE1:3 allocation score on 18node3: -INFINITY +native_color: FAKECLONE1:3 allocation score on 18node4: 0 +native_color: FAKECLONE1:3 allocation score on remote1: 0 +native_color: FAKECLONE1:4 allocation score on 18builder: 0 +native_color: FAKECLONE1:4 allocation score on 18node1: -INFINITY +native_color: FAKECLONE1:4 allocation score on 18node2: -INFINITY +native_color: FAKECLONE1:4 allocation score on 18node3: -INFINITY +native_color: FAKECLONE1:4 allocation score on 18node4: -INFINITY +native_color: FAKECLONE1:4 allocation score on remote1: 0 +native_color: FAKECLONE1:5 allocation score on 18builder: 0 +native_color: FAKECLONE1:5 allocation score on 18node1: -INFINITY +native_color: FAKECLONE1:5 allocation score on 18node2: -INFINITY +native_color: FAKECLONE1:5 allocation score on 18node3: -INFINITY +native_color: FAKECLONE1:5 allocation score on 18node4: -INFINITY +native_color: FAKECLONE1:5 allocation score on remote1: -INFINITY +native_color: FAKECLONE2:0 allocation score on 18builder: 0 +native_color: FAKECLONE2:0 allocation score on 18node1: 0 +native_color: FAKECLONE2:0 allocation score on 18node2: 0 +native_color: FAKECLONE2:0 allocation score on 18node3: 0 +native_color: FAKECLONE2:0 allocation score on 18node4: 0 +native_color: FAKECLONE2:0 allocation score on remote1: 0 +native_color: FAKECLONE2:1 allocation score on 18builder: 0 +native_color: FAKECLONE2:1 allocation score on 18node1: -INFINITY +native_color: FAKECLONE2:1 allocation score on 18node2: 0 +native_color: FAKECLONE2:1 allocation score on 18node3: 0 +native_color: FAKECLONE2:1 allocation score on 18node4: 0 +native_color: FAKECLONE2:1 allocation score on remote1: 0 +native_color: FAKECLONE2:2 allocation score on 18builder: 0 +native_color: FAKECLONE2:2 allocation score on 18node1: -INFINITY +native_color: FAKECLONE2:2 allocation score on 18node2: -INFINITY +native_color: FAKECLONE2:2 allocation score on 18node3: 0 +native_color: FAKECLONE2:2 allocation score on 18node4: 0 +native_color: FAKECLONE2:2 allocation score on remote1: 0 +native_color: FAKECLONE2:3 allocation score on 18builder: 0 +native_color: FAKECLONE2:3 allocation score on 18node1: -INFINITY +native_color: FAKECLONE2:3 allocation score on 18node2: -INFINITY +native_color: FAKECLONE2:3 allocation score on 18node3: -INFINITY +native_color: FAKECLONE2:3 allocation score on 18node4: 0 +native_color: FAKECLONE2:3 allocation score on remote1: 0 +native_color: FAKECLONE2:4 allocation score on 18builder: 0 +native_color: FAKECLONE2:4 allocation score on 18node1: -INFINITY +native_color: FAKECLONE2:4 allocation score on 18node2: -INFINITY +native_color: FAKECLONE2:4 allocation score on 18node3: -INFINITY +native_color: FAKECLONE2:4 allocation score on 18node4: -INFINITY +native_color: FAKECLONE2:4 allocation score on remote1: 0 +native_color: FAKECLONE2:5 allocation score on 18builder: 0 +native_color: FAKECLONE2:5 allocation score on 18node1: -INFINITY +native_color: FAKECLONE2:5 allocation score on 18node2: -INFINITY +native_color: FAKECLONE2:5 allocation score on 18node3: -INFINITY +native_color: FAKECLONE2:5 allocation score on 18node4: -INFINITY +native_color: FAKECLONE2:5 allocation score on remote1: -INFINITY +native_color: remote1 allocation score on 18builder: 0 +native_color: remote1 allocation score on 18node1: 0 +native_color: remote1 allocation score on 18node2: 0 +native_color: remote1 allocation score on 18node3: 0 +native_color: remote1 allocation score on 18node4: 0 +native_color: remote1 allocation score on remote1: -INFINITY +native_color: shooter allocation score on 18builder: 0 +native_color: shooter allocation score on 18node1: 0 +native_color: shooter allocation score on 18node2: 0 +native_color: shooter allocation score on 18node3: 0 +native_color: shooter allocation score on 18node4: 0 +native_color: shooter allocation score on remote1: -INFINITY diff --git a/pengine/test10/rsc-discovery-per-node.summary b/pengine/test10/rsc-discovery-per-node.summary new file mode 100644 index 0000000000..69d5ee3352 --- /dev/null +++ b/pengine/test10/rsc-discovery-per-node.summary @@ -0,0 +1,131 @@ + +Current cluster status: +Online: [ 18builder 18node1 18node2 18node3 18node4 ] +RemoteOFFLINE: [ remote1 ] + + shooter (stonith:fence_xvm): Started 18node1 + remote1 (ocf::pacemaker:remote): Stopped + FAKE1 (ocf::heartbeat:Dummy): Stopped + FAKE2 (ocf::heartbeat:Dummy): Started 18node2 + FAKE3 (ocf::heartbeat:Dummy): Started 18builder + FAKE4 (ocf::heartbeat:Dummy): Started 18node1 + FAKE5 (ocf::heartbeat:Dummy): Stopped + Clone Set: FAKECLONE1-clone [FAKECLONE1] + Stopped: [ 18builder 18node1 18node2 18node3 18node4 remote1 ] + Clone Set: FAKECLONE2-clone [FAKECLONE2] + Stopped: [ 18builder 18node1 18node2 18node3 18node4 remote1 ] + +Transition Summary: + * Start remote1 (18builder) + * Start FAKE1 (18node2) + * Move FAKE2 (Started 18node2 -> 18node3) + * Move FAKE3 (Started 18builder -> 18node4) + * Move FAKE4 (Started 18node1 -> remote1) + * Start FAKE5 (18builder) + * Start FAKECLONE1:0 (18node1) + * Start FAKECLONE1:1 (18node2) + * Start FAKECLONE1:2 (18node3) + * Start FAKECLONE1:3 (18node4) + * Start FAKECLONE1:4 (remote1) + * Start FAKECLONE1:5 (18builder) + * Start FAKECLONE2:0 (18node1) + * Start FAKECLONE2:1 (18node2) + * Start FAKECLONE2:2 (18node3) + * Start FAKECLONE2:3 (18node4) + * Start FAKECLONE2:4 (remote1) + * Start FAKECLONE2:5 (18builder) + +Executing cluster transition: + * Resource action: shooter monitor on 18node4 + * Resource action: shooter monitor on 18node3 + * Resource action: remote1 monitor on 18node4 + * Resource action: remote1 monitor on 18node3 + * Resource action: FAKE1 monitor on 18node4 + * Resource action: FAKE1 monitor on 18node3 + * Resource action: FAKE1 monitor on 18node2 + * Resource action: FAKE1 monitor on 18node1 + * Resource action: FAKE1 monitor on 18builder + * Resource action: FAKE2 monitor on 18node4 + * Resource action: FAKE2 monitor on 18node3 + * Resource action: FAKE3 monitor on 18node4 + * Resource action: FAKE3 monitor on 18node3 + * Resource action: FAKE4 monitor on 18node4 + * Resource action: FAKE4 monitor on 18node3 + * Resource action: FAKE5 monitor on 18node4 + * Resource action: FAKE5 monitor on 18node3 + * Resource action: FAKE5 monitor on 18node2 + * Resource action: FAKE5 monitor on 18node1 + * Resource action: FAKE5 monitor on 18builder + * Resource action: FAKECLONE1:0 monitor on 18node1 + * Resource action: FAKECLONE1:1 monitor on 18node2 + * Resource action: FAKECLONE1:2 monitor on 18node3 + * Resource action: FAKECLONE1:3 monitor on 18node4 + * Resource action: FAKECLONE1:5 monitor on 18builder + * Pseudo action: FAKECLONE1-clone_start_0 + * Resource action: FAKECLONE2:0 monitor on 18node1 + * Resource action: FAKECLONE2:1 monitor on 18node2 + * Resource action: FAKECLONE2:2 monitor on 18node3 + * Resource action: FAKECLONE2:3 monitor on 18node4 + * Resource action: FAKECLONE2:5 monitor on 18builder + * Pseudo action: FAKECLONE2-clone_start_0 + * Pseudo action: probe_nodes_complete + * Pseudo action: probe_complete + * Resource action: remote1 start on 18builder + * Resource action: FAKE1 start on 18node2 + * Resource action: FAKE2 stop on 18node2 + * Resource action: FAKE3 stop on 18builder + * Resource action: FAKE4 stop on 18node1 + * Resource action: FAKE5 start on 18builder + * Resource action: FAKECLONE1:0 start on 18node1 + * Resource action: FAKECLONE1:1 start on 18node2 + * Resource action: FAKECLONE1:2 start on 18node3 + * Resource action: FAKECLONE1:3 start on 18node4 + * Resource action: FAKECLONE1:4 start on remote1 + * Resource action: FAKECLONE1:5 start on 18builder + * Pseudo action: FAKECLONE1-clone_running_0 + * Resource action: FAKECLONE2:0 start on 18node1 + * Resource action: FAKECLONE2:1 start on 18node2 + * Resource action: FAKECLONE2:2 start on 18node3 + * Resource action: FAKECLONE2:3 start on 18node4 + * Resource action: FAKECLONE2:4 start on remote1 + * Resource action: FAKECLONE2:5 start on 18builder + * Pseudo action: FAKECLONE2-clone_running_0 + * Pseudo action: all_stopped + * Resource action: remote1 monitor=60000 on 18builder + * Resource action: FAKE1 monitor=60000 on 18node2 + * Resource action: FAKE2 start on 18node3 + * Resource action: FAKE3 start on 18node4 + * Resource action: FAKE4 start on remote1 + * Resource action: FAKE5 monitor=60000 on 18builder + * Resource action: FAKECLONE1:0 monitor=60000 on 18node1 + * Resource action: FAKECLONE1:1 monitor=60000 on 18node2 + * Resource action: FAKECLONE1:2 monitor=60000 on 18node3 + * Resource action: FAKECLONE1:3 monitor=60000 on 18node4 + * Resource action: FAKECLONE1:4 monitor=60000 on remote1 + * Resource action: FAKECLONE1:5 monitor=60000 on 18builder + * Resource action: FAKECLONE2:0 monitor=60000 on 18node1 + * Resource action: FAKECLONE2:1 monitor=60000 on 18node2 + * Resource action: FAKECLONE2:2 monitor=60000 on 18node3 + * Resource action: FAKECLONE2:3 monitor=60000 on 18node4 + * Resource action: FAKECLONE2:4 monitor=60000 on remote1 + * Resource action: FAKECLONE2:5 monitor=60000 on 18builder + * Resource action: FAKE2 monitor=60000 on 18node3 + * Resource action: FAKE3 monitor=60000 on 18node4 + * Resource action: FAKE4 monitor=60000 on remote1 + +Revised cluster status: +Online: [ 18builder 18node1 18node2 18node3 18node4 ] +RemoteOnline: [ remote1 ] + + shooter (stonith:fence_xvm): Started 18node1 + remote1 (ocf::pacemaker:remote): Started 18builder + FAKE1 (ocf::heartbeat:Dummy): Started 18node2 + FAKE2 (ocf::heartbeat:Dummy): Started 18node3 + FAKE3 (ocf::heartbeat:Dummy): Started 18node4 + FAKE4 (ocf::heartbeat:Dummy): Started remote1 + FAKE5 (ocf::heartbeat:Dummy): Started 18builder + Clone Set: FAKECLONE1-clone [FAKECLONE1] + Started: [ 18builder 18node1 18node2 18node3 18node4 remote1 ] + Clone Set: FAKECLONE2-clone [FAKECLONE2] + Started: [ 18builder 18node1 18node2 18node3 18node4 remote1 ] + diff --git a/pengine/test10/rsc-discovery-per-node.xml b/pengine/test10/rsc-discovery-per-node.xml new file mode 100644 index 0000000000..c111de8188 --- /dev/null +++ b/pengine/test10/rsc-discovery-per-node.xml @@ -0,0 +1,182 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/pengine/utils.c b/pengine/utils.c index 7dfaf95fe3..9782cb6144 100644 --- a/pengine/utils.c +++ b/pengine/utils.c @@ -1,502 +1,515 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include void pe_free_ordering(GListPtr constraints) { GListPtr iterator = constraints; while (iterator != NULL) { order_constraint_t *order = iterator->data; iterator = iterator->next; free(order->lh_action_task); free(order->rh_action_task); free(order); } if (constraints != NULL) { g_list_free(constraints); } } void pe_free_rsc_to_node(GListPtr constraints) { GListPtr iterator = constraints; while (iterator != NULL) { rsc_to_node_t *cons = iterator->data; iterator = iterator->next; g_list_free_full(cons->node_list_rh, free); free(cons); } if (constraints != NULL) { g_list_free(constraints); } } rsc_to_node_t * rsc2node_new(const char *id, resource_t * rsc, - int node_weight, node_t * foo_node, pe_working_set_t * data_set) + int node_weight, const char *discover_mode, + node_t * foo_node, pe_working_set_t * data_set) { rsc_to_node_t *new_con = NULL; if (rsc == NULL || id == NULL) { pe_err("Invalid constraint %s for rsc=%p", crm_str(id), rsc); return NULL; } else if (foo_node == NULL) { CRM_CHECK(node_weight == 0, return NULL); } new_con = calloc(1, sizeof(rsc_to_node_t)); if (new_con != NULL) { new_con->id = id; new_con->rsc_lh = rsc; new_con->node_list_rh = NULL; new_con->role_filter = RSC_ROLE_UNKNOWN; + + if (discover_mode == NULL || safe_str_eq(discover_mode, "always")) { + new_con->discover_mode = discover_always; + } else if (safe_str_eq(discover_mode, "never")) { + new_con->discover_mode = discover_never; + } else if (safe_str_eq(discover_mode, "exclusive")) { + new_con->discover_mode = discover_exclusive; + rsc->exclusive_discover = TRUE; + } else { + pe_err("Invalid %s value %s in location constraint", XML_LOCATION_ATTR_DISCOVERY, discover_mode); + } + if (foo_node != NULL) { node_t *copy = node_copy(foo_node); copy->weight = merge_weights(node_weight, foo_node->weight); new_con->node_list_rh = g_list_prepend(NULL, copy); } data_set->placement_constraints = g_list_prepend(data_set->placement_constraints, new_con); rsc->rsc_location = g_list_prepend(rsc->rsc_location, new_con); } return new_con; } gboolean can_run_resources(const node_t * node) { if (node == NULL) { return FALSE; } #if 0 if (node->weight < 0) { return FALSE; } #endif if (node->details->online == FALSE || node->details->shutdown || node->details->unclean || node->details->standby || node->details->maintenance) { crm_trace("%s: online=%d, unclean=%d, standby=%d, maintenance=%d", node->details->uname, node->details->online, node->details->unclean, node->details->standby, node->details->maintenance); return FALSE; } return TRUE; } struct compare_data { const node_t *node1; const node_t *node2; int result; }; static void do_compare_capacity1(gpointer key, gpointer value, gpointer user_data) { int node1_capacity = 0; int node2_capacity = 0; struct compare_data *data = user_data; node1_capacity = crm_parse_int(value, "0"); node2_capacity = crm_parse_int(g_hash_table_lookup(data->node2->details->utilization, key), "0"); if (node1_capacity > node2_capacity) { data->result--; } else if (node1_capacity < node2_capacity) { data->result++; } } static void do_compare_capacity2(gpointer key, gpointer value, gpointer user_data) { int node1_capacity = 0; int node2_capacity = 0; struct compare_data *data = user_data; if (g_hash_table_lookup_extended(data->node1->details->utilization, key, NULL, NULL)) { return; } node1_capacity = 0; node2_capacity = crm_parse_int(value, "0"); if (node1_capacity > node2_capacity) { data->result--; } else if (node1_capacity < node2_capacity) { data->result++; } } /* rc < 0 if 'node1' has more capacity remaining * rc > 0 if 'node1' has less capacity remaining */ static int compare_capacity(const node_t * node1, const node_t * node2) { struct compare_data data; data.node1 = node1; data.node2 = node2; data.result = 0; g_hash_table_foreach(node1->details->utilization, do_compare_capacity1, &data); g_hash_table_foreach(node2->details->utilization, do_compare_capacity2, &data); return data.result; } /* return -1 if 'a' is more preferred * return 1 if 'b' is more preferred */ gint sort_node_weight(gconstpointer a, gconstpointer b, gpointer data) { const node_t *node1 = (const node_t *)a; const node_t *node2 = (const node_t *)b; const node_t *active = (node_t *) data; int node1_weight = 0; int node2_weight = 0; int result = 0; if (a == NULL) { return 1; } if (b == NULL) { return -1; } node1_weight = node1->weight; node2_weight = node2->weight; if (can_run_resources(node1) == FALSE) { node1_weight = -INFINITY; } if (can_run_resources(node2) == FALSE) { node2_weight = -INFINITY; } if (node1_weight > node2_weight) { crm_trace("%s (%d) > %s (%d) : weight", node1->details->uname, node1_weight, node2->details->uname, node2_weight); return -1; } if (node1_weight < node2_weight) { crm_trace("%s (%d) < %s (%d) : weight", node1->details->uname, node1_weight, node2->details->uname, node2_weight); return 1; } crm_trace("%s (%d) == %s (%d) : weight", node1->details->uname, node1_weight, node2->details->uname, node2_weight); if (safe_str_eq(pe_dataset->placement_strategy, "minimal")) { goto equal; } if (safe_str_eq(pe_dataset->placement_strategy, "balanced")) { result = compare_capacity(node1, node2); if (result != 0) { return result; } } /* now try to balance resources across the cluster */ if (node1->details->num_resources < node2->details->num_resources) { crm_trace("%s (%d) < %s (%d) : resources", node1->details->uname, node1->details->num_resources, node2->details->uname, node2->details->num_resources); return -1; } else if (node1->details->num_resources > node2->details->num_resources) { crm_trace("%s (%d) > %s (%d) : resources", node1->details->uname, node1->details->num_resources, node2->details->uname, node2->details->num_resources); return 1; } if (active && active->details == node1->details) { crm_trace("%s (%d) > %s (%d) : active", node1->details->uname, node1->details->num_resources, node2->details->uname, node2->details->num_resources); return -1; } else if (active && active->details == node2->details) { crm_trace("%s (%d) > %s (%d) : active", node1->details->uname, node1->details->num_resources, node2->details->uname, node2->details->num_resources); return 1; } equal: crm_trace("%s = %s", node1->details->uname, node2->details->uname); return strcmp(node1->details->uname, node2->details->uname); } struct calculate_data { node_t *node; gboolean allocate; }; static void do_calculate_utilization(gpointer key, gpointer value, gpointer user_data) { const char *capacity = NULL; char *remain_capacity = NULL; struct calculate_data *data = user_data; capacity = g_hash_table_lookup(data->node->details->utilization, key); if (capacity) { if (data->allocate) { remain_capacity = crm_itoa(crm_parse_int(capacity, "0") - crm_parse_int(value, "0")); } else { remain_capacity = crm_itoa(crm_parse_int(capacity, "0") + crm_parse_int(value, "0")); } g_hash_table_replace(data->node->details->utilization, strdup(key), remain_capacity); } } /* Specify 'allocate' to TRUE when allocating * Otherwise to FALSE when deallocating */ static void calculate_utilization(node_t * node, resource_t * rsc, gboolean allocate) { struct calculate_data data; data.node = node; data.allocate = allocate; g_hash_table_foreach(rsc->utilization, do_calculate_utilization, &data); if (allocate) { dump_rsc_utilization(show_utilization ? 0 : utilization_log_level, __FUNCTION__, rsc, node); } } void native_deallocate(resource_t * rsc) { if (rsc->allocated_to) { node_t *old = rsc->allocated_to; crm_info("Deallocating %s from %s", rsc->id, old->details->uname); set_bit(rsc->flags, pe_rsc_provisional); rsc->allocated_to = NULL; old->details->allocated_rsc = g_list_remove(old->details->allocated_rsc, rsc); old->details->num_resources--; /* old->count--; */ calculate_utilization(old, rsc, FALSE); free(old); } } gboolean native_assign_node(resource_t * rsc, GListPtr nodes, node_t * chosen, gboolean force) { CRM_ASSERT(rsc->variant == pe_native); if (force == FALSE && chosen != NULL && (can_run_resources(chosen) == FALSE || chosen->weight < 0)) { crm_debug("All nodes for resource %s are unavailable" ", unclean or shutting down (%s: %d, %d)", rsc->id, chosen->details->uname, can_run_resources(chosen), chosen->weight); rsc->next_role = RSC_ROLE_STOPPED; chosen = NULL; } /* todo: update the old node for each resource to reflect its * new resource count */ native_deallocate(rsc); clear_bit(rsc->flags, pe_rsc_provisional); if (chosen == NULL) { char *key = NULL; GListPtr gIter = NULL; GListPtr possible_matches = NULL; crm_debug("Could not allocate a node for %s", rsc->id); rsc->next_role = RSC_ROLE_STOPPED; key = generate_op_key(rsc->id, RSC_STOP, 0); possible_matches = find_actions(rsc->actions, key, NULL); for (gIter = possible_matches; gIter != NULL; gIter = gIter->next) { action_t *stop = (action_t *) gIter->data; update_action_flags(stop, pe_action_optional | pe_action_clear); } g_list_free(possible_matches); free(key); key = generate_op_key(rsc->id, RSC_START, 0); possible_matches = find_actions(rsc->actions, key, NULL); for (gIter = possible_matches; gIter != NULL; gIter = gIter->next) { action_t *start = (action_t *) gIter->data; update_action_flags(start, pe_action_runnable | pe_action_clear); } g_list_free(possible_matches); free(key); return FALSE; } crm_debug("Assigning %s to %s", chosen->details->uname, rsc->id); rsc->allocated_to = node_copy(chosen); chosen->details->allocated_rsc = g_list_prepend(chosen->details->allocated_rsc, rsc); chosen->details->num_resources++; chosen->count++; calculate_utilization(chosen, rsc, TRUE); return TRUE; } void log_action(unsigned int log_level, const char *pre_text, action_t * action, gboolean details) { const char *node_uname = NULL; const char *node_uuid = NULL; if (action == NULL) { crm_trace("%s%s: ", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": "); return; } if (is_set(action->flags, pe_action_pseudo)) { node_uname = NULL; node_uuid = NULL; } else if (action->node != NULL) { node_uname = action->node->details->uname; node_uuid = action->node->details->id; } else { node_uname = ""; node_uuid = NULL; } switch (text2task(action->task)) { case stonith_node: case shutdown_crm: crm_trace("%s%s%sAction %d: %s%s%s%s%s%s", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": ", is_set(action->flags, pe_action_pseudo) ? "Pseduo " : is_set(action->flags, pe_action_optional) ? "Optional " : is_set(action->flags, pe_action_runnable) ? is_set(action->flags, pe_action_processed) ? "" : "(Provisional) " : "!!Non-Startable!! ", action->id, action->uuid, node_uname ? "\ton " : "", node_uname ? node_uname : "", node_uuid ? "\t\t(" : "", node_uuid ? node_uuid : "", node_uuid ? ")" : ""); break; default: crm_trace("%s%s%sAction %d: %s %s%s%s%s%s%s", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": ", is_set(action->flags, pe_action_optional) ? "Optional " : is_set(action->flags, pe_action_pseudo) ? "Pseduo " : is_set(action->flags, pe_action_runnable) ? is_set(action->flags, pe_action_processed) ? "" : "(Provisional) " : "!!Non-Startable!! ", action->id, action->uuid, action->rsc ? action->rsc->id : "", node_uname ? "\ton " : "", node_uname ? node_uname : "", node_uuid ? "\t\t(" : "", node_uuid ? node_uuid : "", node_uuid ? ")" : ""); break; } if (details) { GListPtr gIter = NULL; crm_trace("\t\t====== Preceding Actions"); gIter = action->actions_before; for (; gIter != NULL; gIter = gIter->next) { action_wrapper_t *other = (action_wrapper_t *) gIter->data; log_action(log_level + 1, "\t\t", other->action, FALSE); } crm_trace("\t\t====== Subsequent Actions"); gIter = action->actions_after; for (; gIter != NULL; gIter = gIter->next) { action_wrapper_t *other = (action_wrapper_t *) gIter->data; log_action(log_level + 1, "\t\t", other->action, FALSE); } crm_trace("\t\t====== End"); } else { crm_trace("\t\t(seen=%d, before=%d, after=%d)", action->seen_count, g_list_length(action->actions_before), g_list_length(action->actions_after)); } } gboolean can_run_any(GHashTable * nodes) { GHashTableIter iter; node_t *node = NULL; if (nodes == NULL) { return FALSE; } g_hash_table_iter_init(&iter, nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if (can_run_resources(node) && node->weight >= 0) { return TRUE; } } return FALSE; } diff --git a/pengine/utils.h b/pengine/utils.h index 1939d40956..5142e68998 100644 --- a/pengine/utils.h +++ b/pengine/utils.h @@ -1,63 +1,64 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef PENGINE_AUTILS__H # define PENGINE_AUTILS__H /* Constraint helper functions */ extern rsc_colocation_t *invert_constraint(rsc_colocation_t * constraint); extern rsc_to_node_t *copy_constraint(rsc_to_node_t * constraint); -extern rsc_to_node_t *rsc2node_new(const char *id, resource_t * rsc, int weight, node_t * node, +extern rsc_to_node_t *rsc2node_new(const char *id, resource_t * rsc, int weight, + const char *discovery_mode, node_t * node, pe_working_set_t * data_set); extern void pe_free_rsc_to_node(GListPtr constraints); extern void pe_free_ordering(GListPtr constraints); extern gboolean rsc_colocation_new(const char *id, const char *node_attr, int score, resource_t * rsc_lh, resource_t * rsc_rh, const char *state_lh, const char *state_rh, pe_working_set_t * data_set); extern gboolean rsc_ticket_new(const char *id, resource_t * rsc_lh, ticket_t * ticket, const char *state_lh, const char *loss_policy, pe_working_set_t * data_set); extern rsc_to_node_t *generate_location_rule(resource_t * rsc, xmlNode * location_rule, pe_working_set_t * data_set); extern gint sort_node_weight(gconstpointer a, gconstpointer b, gpointer data_set); extern gboolean can_run_resources(const node_t * node); extern gboolean native_assign_node(resource_t * rsc, GListPtr candidates, node_t * chosen, gboolean force); void native_deallocate(resource_t * rsc); extern void log_action(unsigned int log_level, const char *pre_text, action_t * action, gboolean details); extern gboolean can_run_any(GHashTable * nodes); extern resource_t *find_compatible_child(resource_t * local_child, resource_t * rsc, enum rsc_role_e filter, gboolean current); # define STONITH_UP "stonith_up" # define STONITH_DONE "stonith_complete" # define ALL_STOPPED "all_stopped" # define LOAD_STOPPED "load_stopped" #endif diff --git a/tools/crm_resource.c b/tools/crm_resource.c index 56583e0df0..ff5effd05d 100644 --- a/tools/crm_resource.c +++ b/tools/crm_resource.c @@ -1,2291 +1,2310 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include bool scope_master = FALSE; gboolean do_force = FALSE; gboolean BE_QUIET = FALSE; const char *attr_set_type = XML_TAG_ATTR_SETS; char *host_id = NULL; const char *rsc_id = NULL; const char *host_uname = NULL; const char *prop_name = NULL; const char *prop_value = NULL; const char *rsc_type = NULL; const char *prop_id = NULL; const char *prop_set = NULL; char *move_lifetime = NULL; char rsc_cmd = 'L'; const char *rsc_long_cmd = NULL; char *our_pid = NULL; crm_ipc_t *crmd_channel = NULL; char *xml_file = NULL; int cib_options = cib_sync_call; int crmd_replies_needed = 1; /* The welcome message */ GMainLoop *mainloop = NULL; gboolean print_pending = FALSE; extern void cleanup_alloc_calculations(pe_working_set_t * data_set); #define CMD_ERR(fmt, args...) do { \ crm_warn(fmt, ##args); \ fprintf(stderr, fmt, ##args); \ } while(0) #define message_timeout_ms 60*1000 static gboolean resource_ipc_timeout(gpointer data) { fprintf(stderr, "No messages received in %d seconds.. aborting\n", (int)message_timeout_ms / 1000); crm_err("No messages received in %d seconds", (int)message_timeout_ms / 1000); return crm_exit(-1); } static void resource_ipc_connection_destroy(gpointer user_data) { crm_info("Connection to CRMd was terminated"); crm_exit(1); } static void start_mainloop(void) { + if (crmd_replies_needed == 0) { + return; + } + mainloop = g_main_new(FALSE); fprintf(stderr, "Waiting for %d replies from the CRMd", crmd_replies_needed); crm_debug("Waiting for %d replies from the CRMd", crmd_replies_needed); g_timeout_add(message_timeout_ms, resource_ipc_timeout, NULL); g_main_run(mainloop); } static int resource_ipc_callback(const char *buffer, ssize_t length, gpointer userdata) { xmlNode *msg = string2xml(buffer); fprintf(stderr, "."); crm_log_xml_trace(msg, "[inbound]"); crmd_replies_needed--; if (crmd_replies_needed == 0) { fprintf(stderr, " OK\n"); crm_debug("Got all the replies we expected"); return crm_exit(pcmk_ok); } free_xml(msg); return 0; } struct ipc_client_callbacks crm_callbacks = { .dispatch = resource_ipc_callback, .destroy = resource_ipc_connection_destroy, }; static int do_find_resource(const char *rsc, resource_t * the_rsc, pe_working_set_t * data_set) { int found = 0; GListPtr lpc = NULL; if (the_rsc == NULL) { the_rsc = pe_find_resource(data_set->resources, rsc); } if (the_rsc == NULL) { return -ENXIO; } if (the_rsc->variant >= pe_clone) { GListPtr gIter = the_rsc->children; for (; gIter != NULL; gIter = gIter->next) { found += do_find_resource(rsc, gIter->data, data_set); } return found; } for (lpc = the_rsc->running_on; lpc != NULL; lpc = lpc->next) { node_t *node = (node_t *) lpc->data; crm_trace("resource %s is running on: %s", rsc, node->details->uname); if (BE_QUIET) { fprintf(stdout, "%s\n", node->details->uname); } else { const char *state = ""; if (the_rsc->variant < pe_clone && the_rsc->fns->state(the_rsc, TRUE) == RSC_ROLE_MASTER) { state = "Master"; } fprintf(stdout, "resource %s is running on: %s %s\n", rsc, node->details->uname, state); } found++; } if (BE_QUIET == FALSE && found == 0) { fprintf(stderr, "resource %s is NOT running\n", rsc); } return 0; } #define cons_string(x) x?x:"NA" static void print_cts_constraints(pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; xmlNode *lifetime = NULL; xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, data_set->input); for (xml_obj = __xml_first_child(cib_constraints); xml_obj != NULL; xml_obj = __xml_next(xml_obj)) { const char *id = crm_element_value(xml_obj, XML_ATTR_ID); if (id == NULL) { continue; } lifetime = first_named_child(xml_obj, "lifetime"); if (test_ruleset(lifetime, NULL, data_set->now) == FALSE) { continue; } if (safe_str_eq(XML_CONS_TAG_RSC_DEPEND, crm_element_name(xml_obj))) { printf("Constraint %s %s %s %s %s %s %s\n", crm_element_name(xml_obj), cons_string(crm_element_value(xml_obj, XML_ATTR_ID)), cons_string(crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE)), cons_string(crm_element_value(xml_obj, XML_COLOC_ATTR_TARGET)), cons_string(crm_element_value(xml_obj, XML_RULE_ATTR_SCORE)), cons_string(crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE_ROLE)), cons_string(crm_element_value(xml_obj, XML_COLOC_ATTR_TARGET_ROLE))); } else if (safe_str_eq(XML_CONS_TAG_RSC_LOCATION, crm_element_name(xml_obj))) { /* unpack_location(xml_obj, data_set); */ } } } static void print_cts_rsc(resource_t * rsc) { GListPtr lpc = NULL; const char *host = NULL; gboolean needs_quorum = TRUE; const char *rtype = crm_element_value(rsc->xml, XML_ATTR_TYPE); const char *rprov = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); const char *rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); if (safe_str_eq(rclass, "stonith")) { xmlNode *op = NULL; needs_quorum = FALSE; for (op = __xml_first_child(rsc->ops_xml); op != NULL; op = __xml_next(op)) { if (crm_str_eq((const char *)op->name, "op", TRUE)) { const char *name = crm_element_value(op, "name"); if (safe_str_neq(name, CRMD_ACTION_START)) { const char *value = crm_element_value(op, "requires"); if (safe_str_eq(value, "nothing")) { needs_quorum = FALSE; } break; } } } } if (rsc->running_on != NULL && g_list_length(rsc->running_on) == 1) { node_t *tmp = rsc->running_on->data; host = tmp->details->uname; } printf("Resource: %s %s %s %s %s %s %s %s %d %lld 0x%.16llx\n", crm_element_name(rsc->xml), rsc->id, rsc->clone_name ? rsc->clone_name : rsc->id, rsc->parent ? rsc->parent->id : "NA", rprov ? rprov : "NA", rclass, rtype, host ? host : "NA", needs_quorum, rsc->flags, rsc->flags); for (lpc = rsc->children; lpc != NULL; lpc = lpc->next) { resource_t *child = (resource_t *) lpc->data; print_cts_rsc(child); } } static void print_raw_rsc(resource_t * rsc) { GListPtr lpc = NULL; GListPtr children = rsc->children; if (children == NULL) { printf("%s\n", rsc->id); } for (lpc = children; lpc != NULL; lpc = lpc->next) { resource_t *child = (resource_t *) lpc->data; print_raw_rsc(child); } } static int do_find_resource_list(pe_working_set_t * data_set, gboolean raw) { int found = 0; GListPtr lpc = NULL; int opts = pe_print_printf | pe_print_rsconly; if (print_pending) { opts |= pe_print_pending; } for (lpc = data_set->resources; lpc != NULL; lpc = lpc->next) { resource_t *rsc = (resource_t *) lpc->data; if (is_set(rsc->flags, pe_rsc_orphan) && rsc->fns->active(rsc, TRUE) == FALSE) { continue; } rsc->fns->print(rsc, NULL, opts, stdout); found++; } if (found == 0) { printf("NO resources configured\n"); return -ENXIO; } return 0; } static resource_t * find_rsc_or_clone(const char *rsc, pe_working_set_t * data_set) { resource_t *the_rsc = pe_find_resource(data_set->resources, rsc); if (the_rsc == NULL) { char *as_clone = crm_concat(rsc, "0", ':'); the_rsc = pe_find_resource(data_set->resources, as_clone); free(as_clone); } return the_rsc; } static int dump_resource(const char *rsc, pe_working_set_t * data_set, gboolean expanded) { char *rsc_xml = NULL; resource_t *the_rsc = find_rsc_or_clone(rsc, data_set); int opts = pe_print_printf; if (the_rsc == NULL) { return -ENXIO; } if (print_pending) { opts |= pe_print_pending; } the_rsc->fns->print(the_rsc, NULL, opts, stdout); if (expanded) { rsc_xml = dump_xml_formatted(the_rsc->xml); } else { if (the_rsc->orig_xml) { rsc_xml = dump_xml_formatted(the_rsc->orig_xml); } else { rsc_xml = dump_xml_formatted(the_rsc->xml); } } fprintf(stdout, "%sxml:\n%s\n", expanded ? "" : "raw ", rsc_xml); free(rsc_xml); return 0; } static int dump_resource_attr(const char *rsc, const char *attr, pe_working_set_t * data_set) { int rc = -ENXIO; node_t *current = NULL; GHashTable *params = NULL; resource_t *the_rsc = find_rsc_or_clone(rsc, data_set); const char *value = NULL; if (the_rsc == NULL) { return -ENXIO; } if (g_list_length(the_rsc->running_on) == 1) { current = the_rsc->running_on->data; } else if (g_list_length(the_rsc->running_on) > 1) { CMD_ERR("%s is active on more than one node," " returning the default value for %s\n", the_rsc->id, crm_str(value)); } params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if (safe_str_eq(attr_set_type, XML_TAG_ATTR_SETS)) { get_rsc_attributes(params, the_rsc, current, data_set); } else if (safe_str_eq(attr_set_type, XML_TAG_META_SETS)) { get_meta_attributes(params, the_rsc, current, data_set); } else { unpack_instance_attributes(data_set->input, the_rsc->xml, XML_TAG_UTILIZATION, NULL, params, NULL, FALSE, data_set->now); } crm_debug("Looking up %s in %s", attr, the_rsc->id); value = g_hash_table_lookup(params, attr); if (value != NULL) { fprintf(stdout, "%s\n", value); rc = 0; } else { CMD_ERR("Attribute '%s' not found for '%s'\n", attr, the_rsc->id); } g_hash_table_destroy(params); return rc; } static int find_resource_attr(cib_t * the_cib, const char *attr, const char *rsc, const char *set_type, const char *set_name, const char *attr_id, const char *attr_name, char **value) { int offset = 0; static int xpath_max = 1024; int rc = pcmk_ok; xmlNode *xml_search = NULL; char *xpath_string = NULL; CRM_ASSERT(value != NULL); *value = NULL; if(the_cib == NULL) { return -ENOTCONN; } xpath_string = calloc(1, xpath_max); offset += snprintf(xpath_string + offset, xpath_max - offset, "%s", get_object_path("resources")); offset += snprintf(xpath_string + offset, xpath_max - offset, "//*[@id=\"%s\"]", rsc); if (set_type) { offset += snprintf(xpath_string + offset, xpath_max - offset, "/%s", set_type); if (set_name) { offset += snprintf(xpath_string + offset, xpath_max - offset, "[@id=\"%s\"]", set_name); } } offset += snprintf(xpath_string + offset, xpath_max - offset, "//nvpair["); if (attr_id) { offset += snprintf(xpath_string + offset, xpath_max - offset, "@id=\"%s\"", attr_id); } if (attr_name) { if (attr_id) { offset += snprintf(xpath_string + offset, xpath_max - offset, " and "); } offset += snprintf(xpath_string + offset, xpath_max - offset, "@name=\"%s\"", attr_name); } offset += snprintf(xpath_string + offset, xpath_max - offset, "]"); CRM_LOG_ASSERT(offset > 0); rc = the_cib->cmds->query(the_cib, xpath_string, &xml_search, cib_sync_call | cib_scope_local | cib_xpath); if (rc != pcmk_ok) { goto bail; } crm_log_xml_debug(xml_search, "Match"); if (xml_has_children(xml_search)) { xmlNode *child = NULL; rc = -EINVAL; printf("Multiple attributes match name=%s\n", attr_name); for (child = __xml_first_child(xml_search); child != NULL; child = __xml_next(child)) { printf(" Value: %s \t(id=%s)\n", crm_element_value(child, XML_NVPAIR_ATTR_VALUE), ID(child)); } } else { const char *tmp = crm_element_value(xml_search, attr); if (tmp) { *value = strdup(tmp); } } bail: free(xpath_string); free_xml(xml_search); return rc; } #include "../pengine/pengine.h" static int set_resource_attr(const char *rsc_id, const char *attr_set, const char *attr_id, const char *attr_name, const char *attr_value, bool recursive, cib_t * cib, pe_working_set_t * data_set) { int rc = pcmk_ok; static bool need_init = TRUE; char *local_attr_id = NULL; char *local_attr_set = NULL; xmlNode *xml_top = NULL; xmlNode *xml_obj = NULL; gboolean use_attributes_tag = FALSE; resource_t *rsc = find_rsc_or_clone(rsc_id, data_set); if (rsc == NULL) { return -ENXIO; } if (safe_str_eq(attr_set_type, XML_TAG_ATTR_SETS)) { rc = find_resource_attr(cib, XML_ATTR_ID, rsc_id, XML_TAG_META_SETS, attr_set, attr_id, attr_name, &local_attr_id); if (rc == pcmk_ok) { printf("WARNING: There is already a meta attribute called %s (id=%s)\n", attr_name, local_attr_id); } } rc = find_resource_attr(cib, XML_ATTR_ID, rsc_id, attr_set_type, attr_set, attr_id, attr_name, &local_attr_id); if (rc == pcmk_ok) { crm_debug("Found a match for name=%s: id=%s", attr_name, local_attr_id); attr_id = local_attr_id; } else if (rc != -ENXIO) { free(local_attr_id); return rc; } else { const char *value = NULL; xmlNode *cib_top = NULL; const char *tag = crm_element_name(rsc->xml); cib->cmds->query(cib, "/cib", &cib_top, cib_sync_call | cib_scope_local | cib_xpath | cib_no_children); value = crm_element_value(cib_top, "ignore_dtd"); if (value != NULL) { use_attributes_tag = TRUE; } else { value = crm_element_value(cib_top, XML_ATTR_VALIDATION); if (value && strstr(value, "-0.6")) { use_attributes_tag = TRUE; } } free_xml(cib_top); if (attr_set == NULL) { local_attr_set = crm_concat(rsc_id, attr_set_type, '-'); attr_set = local_attr_set; } if (attr_id == NULL) { local_attr_id = crm_concat(attr_set, attr_name, '-'); attr_id = local_attr_id; } if (use_attributes_tag && safe_str_eq(tag, XML_CIB_TAG_MASTER)) { tag = "master_slave"; /* use the old name */ } xml_top = create_xml_node(NULL, tag); crm_xml_add(xml_top, XML_ATTR_ID, rsc_id); xml_obj = create_xml_node(xml_top, attr_set_type); crm_xml_add(xml_obj, XML_ATTR_ID, attr_set); if (use_attributes_tag) { xml_obj = create_xml_node(xml_obj, XML_TAG_ATTRS); } } xml_obj = create_xml_node(xml_obj, XML_CIB_TAG_NVPAIR); if (xml_top == NULL) { xml_top = xml_obj; } crm_xml_add(xml_obj, XML_ATTR_ID, attr_id); crm_xml_add(xml_obj, XML_NVPAIR_ATTR_NAME, attr_name); crm_xml_add(xml_obj, XML_NVPAIR_ATTR_VALUE, attr_value); crm_log_xml_debug(xml_top, "Update"); rc = cib->cmds->modify(cib, XML_CIB_TAG_RESOURCES, xml_top, cib_options); free_xml(xml_top); free(local_attr_id); free(local_attr_set); if(recursive && safe_str_eq(attr_set_type, XML_TAG_META_SETS)) { GListPtr lpc = NULL; if(need_init) { xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, data_set->input); need_init = FALSE; unpack_constraints(cib_constraints, data_set); for (lpc = data_set->resources; lpc != NULL; lpc = lpc->next) { resource_t *r = (resource_t *) lpc->data; clear_bit(r->flags, pe_rsc_allocating); } } crm_debug("Looking for dependancies %p", rsc->rsc_cons_lhs); set_bit(rsc->flags, pe_rsc_allocating); for (lpc = rsc->rsc_cons_lhs; lpc != NULL; lpc = lpc->next) { rsc_colocation_t *cons = (rsc_colocation_t *) lpc->data; resource_t *peer = cons->rsc_lh; crm_debug("Checking %s %d", cons->id, cons->score); if (cons->score > 0 && is_not_set(peer->flags, pe_rsc_allocating)) { /* Don't get into colocation loops */ crm_debug("Setting %s=%s for dependant resource %s", attr_name, attr_value, peer->id); set_resource_attr(peer->id, NULL, NULL, attr_name, attr_value, recursive, cib, data_set); } } } return rc; } static int delete_resource_attr(const char *rsc_id, const char *attr_set, const char *attr_id, const char *attr_name, cib_t * cib, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; int rc = pcmk_ok; char *local_attr_id = NULL; resource_t *rsc = find_rsc_or_clone(rsc_id, data_set); if (rsc == NULL) { return -ENXIO; } rc = find_resource_attr(cib, XML_ATTR_ID, rsc_id, attr_set_type, attr_set, attr_id, attr_name, &local_attr_id); if (rc == -ENXIO) { return pcmk_ok; } else if (rc != pcmk_ok) { return rc; } if (attr_id == NULL) { attr_id = local_attr_id; } xml_obj = create_xml_node(NULL, XML_CIB_TAG_NVPAIR); crm_xml_add(xml_obj, XML_ATTR_ID, attr_id); crm_xml_add(xml_obj, XML_NVPAIR_ATTR_NAME, attr_name); crm_log_xml_debug(xml_obj, "Delete"); CRM_ASSERT(cib); rc = cib->cmds->delete(cib, XML_CIB_TAG_RESOURCES, xml_obj, cib_options); if (rc == pcmk_ok) { printf("Deleted %s option: id=%s%s%s%s%s\n", rsc_id, local_attr_id, attr_set ? " set=" : "", attr_set ? attr_set : "", attr_name ? " name=" : "", attr_name ? attr_name : ""); } free_xml(xml_obj); free(local_attr_id); return rc; } static int dump_resource_prop(const char *rsc, const char *attr, pe_working_set_t * data_set) { const char *value = NULL; resource_t *the_rsc = pe_find_resource(data_set->resources, rsc); if (the_rsc == NULL) { return -ENXIO; } value = crm_element_value(the_rsc->xml, attr); if (value != NULL) { fprintf(stdout, "%s\n", value); return 0; } return -ENXIO; } static int send_lrm_rsc_op(crm_ipc_t * crmd_channel, const char *op, const char *host_uname, const char *rsc_id, gboolean only_failed, pe_working_set_t * data_set) { char *key = NULL; int rc = -ECOMM; xmlNode *cmd = NULL; xmlNode *xml_rsc = NULL; const char *value = NULL; const char *router_node = host_uname; xmlNode *params = NULL; xmlNode *msg_data = NULL; resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); if (rsc == NULL) { CMD_ERR("Resource %s not found\n", rsc_id); return -ENXIO; } else if (rsc->variant != pe_native) { CMD_ERR("We can only process primitive resources, not %s\n", rsc_id); return -EINVAL; } else if (host_uname == NULL) { CMD_ERR("Please supply a hostname with -H\n"); return -EINVAL; } else { node_t *node = pe_find_node(data_set->nodes, host_uname); if (node && is_remote_node(node)) { if (node->details->remote_rsc == NULL || node->details->remote_rsc->running_on == NULL) { CMD_ERR("No lrmd connection detected to remote node %s", host_uname); return -ENXIO; } node = node->details->remote_rsc->running_on->data; router_node = node->details->uname; } } key = generate_transition_key(0, getpid(), 0, "xxxxxxxx-xrsc-opxx-xcrm-resourcexxxx"); msg_data = create_xml_node(NULL, XML_GRAPH_TAG_RSC_OP); crm_xml_add(msg_data, XML_ATTR_TRANSITION_KEY, key); free(key); crm_xml_add(msg_data, XML_LRM_ATTR_TARGET, host_uname); if (safe_str_neq(router_node, host_uname)) { crm_xml_add(msg_data, XML_LRM_ATTR_ROUTER_NODE, router_node); } xml_rsc = create_xml_node(msg_data, XML_CIB_TAG_RESOURCE); if (rsc->clone_name) { crm_xml_add(xml_rsc, XML_ATTR_ID, rsc->clone_name); crm_xml_add(xml_rsc, XML_ATTR_ID_LONG, rsc->id); } else { crm_xml_add(xml_rsc, XML_ATTR_ID, rsc->id); } value = crm_element_value(rsc->xml, XML_ATTR_TYPE); crm_xml_add(xml_rsc, XML_ATTR_TYPE, value); if (value == NULL) { CMD_ERR("%s has no type! Aborting...\n", rsc_id); return -ENXIO; } value = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, value); if (value == NULL) { CMD_ERR("%s has no class! Aborting...\n", rsc_id); return -ENXIO; } value = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, value); params = create_xml_node(msg_data, XML_TAG_ATTRS); crm_xml_add(params, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); key = crm_meta_name(XML_LRM_ATTR_INTERVAL); crm_xml_add(params, key, "60000"); /* 1 minute */ free(key); cmd = create_request(op, msg_data, router_node, CRM_SYSTEM_CRMD, crm_system_name, our_pid); /* crm_log_xml_warn(cmd, "send_lrm_rsc_op"); */ free_xml(msg_data); if (crm_ipc_send(crmd_channel, cmd, 0, 0, NULL) > 0) { rc = 0; } else { CMD_ERR("Could not send %s op to the crmd", op); rc = -ENOTCONN; } free_xml(cmd); return rc; } static int delete_lrm_rsc(cib_t *cib_conn, crm_ipc_t * crmd_channel, const char *host_uname, resource_t * rsc, pe_working_set_t * data_set) { int rc = pcmk_ok; + node_t *node = NULL; if (rsc == NULL) { return -ENXIO; } else if (rsc->children) { GListPtr lpc = NULL; for (lpc = rsc->children; lpc != NULL; lpc = lpc->next) { resource_t *child = (resource_t *) lpc->data; delete_lrm_rsc(cib_conn, crmd_channel, host_uname, child, data_set); } return pcmk_ok; } else if (host_uname == NULL) { GListPtr lpc = NULL; for (lpc = data_set->nodes; lpc != NULL; lpc = lpc->next) { - node_t *node = (node_t *) lpc->data; + node = (node_t *) lpc->data; if (node->details->online) { delete_lrm_rsc(cib_conn, crmd_channel, node->details->uname, rsc, data_set); } } return pcmk_ok; } - printf("Cleaning up %s on %s\n", rsc->id, host_uname); - rc = send_lrm_rsc_op(crmd_channel, CRM_OP_LRM_DELETE, host_uname, rsc->id, TRUE, data_set); + node = pe_find_node(data_set->nodes, host_uname); + + if (node && node->details->rsc_discovery_enabled) { + printf("Cleaning up %s on %s\n", rsc->id, host_uname); + rc = send_lrm_rsc_op(crmd_channel, CRM_OP_LRM_DELETE, host_uname, rsc->id, TRUE, data_set); + } else { + printf("Resource discovery disabled on %s. Unable to delete lrm state.\n", host_uname); + } if (rc == pcmk_ok) { char *attr_name = NULL; const char *id = rsc->id; - node_t *node = pe_find_node(data_set->nodes, host_uname); - if(node && node->details->remote_rsc == NULL) { + if(node && node->details->remote_rsc == NULL && node->details->rsc_discovery_enabled) { crmd_replies_needed++; } if (rsc->clone_name) { id = rsc->clone_name; } attr_name = crm_concat("fail-count", id, '-'); rc = attrd_update_delegate(NULL, 'D', host_uname, attr_name, NULL, XML_CIB_TAG_STATUS, NULL, NULL, NULL, node ? is_remote_node(node) : FALSE); free(attr_name); } return rc; } static int fail_lrm_rsc(crm_ipc_t * crmd_channel, const char *host_uname, const char *rsc_id, pe_working_set_t * data_set) { crm_warn("Failing: %s", rsc_id); return send_lrm_rsc_op(crmd_channel, CRM_OP_LRM_FAIL, host_uname, rsc_id, FALSE, data_set); } static char * parse_cli_lifetime(const char *input) { char *later_s = NULL; crm_time_t *now = NULL; crm_time_t *later = NULL; crm_time_t *duration = NULL; if (input == NULL) { return NULL; } duration = crm_time_parse_duration(move_lifetime); if (duration == NULL) { CMD_ERR("Invalid duration specified: %s\n", move_lifetime); CMD_ERR("Please refer to" " http://en.wikipedia.org/wiki/ISO_8601#Durations" " for examples of valid durations\n"); return NULL; } now = crm_time_new(NULL); later = crm_time_add(now, duration); crm_time_log(LOG_INFO, "now ", now, crm_time_log_date | crm_time_log_timeofday | crm_time_log_with_timezone); crm_time_log(LOG_INFO, "later ", later, crm_time_log_date | crm_time_log_timeofday | crm_time_log_with_timezone); crm_time_log(LOG_INFO, "duration", duration, crm_time_log_date | crm_time_log_timeofday); later_s = crm_time_as_string(later, crm_time_log_date | crm_time_log_timeofday); printf("Migration will take effect until: %s\n", later_s); crm_time_free(duration); crm_time_free(later); crm_time_free(now); return later_s; } static int ban_resource(const char *rsc_id, const char *host, GListPtr allnodes, cib_t * cib_conn) { char *later_s = NULL; int rc = pcmk_ok; char *id = NULL; xmlNode *fragment = NULL; xmlNode *location = NULL; if(host == NULL) { GListPtr n = allnodes; for(; n && rc == pcmk_ok; n = n->next) { node_t *target = n->data; rc = ban_resource(rsc_id, target->details->uname, NULL, cib_conn); } return rc; } later_s = parse_cli_lifetime(move_lifetime); if(move_lifetime && later_s == NULL) { return -EINVAL; } fragment = create_xml_node(NULL, XML_CIB_TAG_CONSTRAINTS); id = g_strdup_printf("cli-ban-%s-on-%s", rsc_id, host); location = create_xml_node(fragment, XML_CONS_TAG_RSC_LOCATION); crm_xml_add(location, XML_ATTR_ID, id); free(id); if (BE_QUIET == FALSE) { CMD_ERR("WARNING: Creating rsc_location constraint '%s'" " with a score of -INFINITY for resource %s" " on %s.\n", ID(location), rsc_id, host); CMD_ERR("\tThis will prevent %s from %s" " on %s until the constraint is removed using" " the 'crm_resource --clear' command or manually" " with cibadmin\n", rsc_id, scope_master?"being promoted":"running", host); CMD_ERR("\tThis will be the case even if %s is" " the last node in the cluster\n", host); CMD_ERR("\tThis message can be disabled with --quiet\n"); } crm_xml_add(location, XML_COLOC_ATTR_SOURCE, rsc_id); if(scope_master) { crm_xml_add(location, XML_RULE_ATTR_ROLE, RSC_ROLE_MASTER_S); } else { crm_xml_add(location, XML_RULE_ATTR_ROLE, RSC_ROLE_STARTED_S); } if (later_s == NULL) { /* Short form */ crm_xml_add(location, XML_CIB_TAG_NODE, host); crm_xml_add(location, XML_RULE_ATTR_SCORE, MINUS_INFINITY_S); } else { xmlNode *rule = create_xml_node(location, XML_TAG_RULE); xmlNode *expr = create_xml_node(rule, XML_TAG_EXPRESSION); id = g_strdup_printf("cli-ban-%s-on-%s-rule", rsc_id, host); crm_xml_add(rule, XML_ATTR_ID, id); free(id); crm_xml_add(rule, XML_RULE_ATTR_SCORE, MINUS_INFINITY_S); crm_xml_add(rule, XML_RULE_ATTR_BOOLEAN_OP, "and"); id = g_strdup_printf("cli-ban-%s-on-%s-expr", rsc_id, host); crm_xml_add(expr, XML_ATTR_ID, id); free(id); crm_xml_add(expr, XML_EXPR_ATTR_ATTRIBUTE, "#uname"); crm_xml_add(expr, XML_EXPR_ATTR_OPERATION, "eq"); crm_xml_add(expr, XML_EXPR_ATTR_VALUE, host); crm_xml_add(expr, XML_EXPR_ATTR_TYPE, "string"); expr = create_xml_node(rule, "date_expression"); id = g_strdup_printf("cli-ban-%s-on-%s-lifetime", rsc_id, host); crm_xml_add(expr, XML_ATTR_ID, id); free(id); crm_xml_add(expr, "operation", "lt"); crm_xml_add(expr, "end", later_s); } crm_log_xml_notice(fragment, "Modify"); rc = cib_conn->cmds->update(cib_conn, XML_CIB_TAG_CONSTRAINTS, fragment, cib_options); free_xml(fragment); free(later_s); return rc; } static int prefer_resource(const char *rsc_id, const char *host, cib_t * cib_conn) { char *later_s = parse_cli_lifetime(move_lifetime); int rc = pcmk_ok; char *id = NULL; xmlNode *location = NULL; xmlNode *fragment = NULL; if(move_lifetime && later_s == NULL) { return -EINVAL; } if(cib_conn == NULL) { free(later_s); return -ENOTCONN; } fragment = create_xml_node(NULL, XML_CIB_TAG_CONSTRAINTS); id = g_strdup_printf("cli-prefer-%s", rsc_id); location = create_xml_node(fragment, XML_CONS_TAG_RSC_LOCATION); crm_xml_add(location, XML_ATTR_ID, id); free(id); crm_xml_add(location, XML_COLOC_ATTR_SOURCE, rsc_id); if(scope_master) { crm_xml_add(location, XML_RULE_ATTR_ROLE, RSC_ROLE_MASTER_S); } else { crm_xml_add(location, XML_RULE_ATTR_ROLE, RSC_ROLE_STARTED_S); } if (later_s == NULL) { /* Short form */ crm_xml_add(location, XML_CIB_TAG_NODE, host); crm_xml_add(location, XML_RULE_ATTR_SCORE, INFINITY_S); } else { xmlNode *rule = create_xml_node(location, XML_TAG_RULE); xmlNode *expr = create_xml_node(rule, XML_TAG_EXPRESSION); id = crm_concat("cli-prefer-rule", rsc_id, '-'); crm_xml_add(rule, XML_ATTR_ID, id); free(id); crm_xml_add(rule, XML_RULE_ATTR_SCORE, INFINITY_S); crm_xml_add(rule, XML_RULE_ATTR_BOOLEAN_OP, "and"); id = crm_concat("cli-prefer-expr", rsc_id, '-'); crm_xml_add(expr, XML_ATTR_ID, id); free(id); crm_xml_add(expr, XML_EXPR_ATTR_ATTRIBUTE, "#uname"); crm_xml_add(expr, XML_EXPR_ATTR_OPERATION, "eq"); crm_xml_add(expr, XML_EXPR_ATTR_VALUE, host); crm_xml_add(expr, XML_EXPR_ATTR_TYPE, "string"); expr = create_xml_node(rule, "date_expression"); id = crm_concat("cli-prefer-lifetime-end", rsc_id, '-'); crm_xml_add(expr, XML_ATTR_ID, id); free(id); crm_xml_add(expr, "operation", "lt"); crm_xml_add(expr, "end", later_s); } crm_log_xml_info(fragment, "Modify"); rc = cib_conn->cmds->update(cib_conn, XML_CIB_TAG_CONSTRAINTS, fragment, cib_options); free_xml(fragment); free(later_s); return rc; } static int clear_resource(const char *rsc_id, const char *host, GListPtr allnodes, cib_t * cib_conn) { char *id = NULL; int rc = pcmk_ok; xmlNode *fragment = NULL; xmlNode *location = NULL; if(cib_conn == NULL) { return -ENOTCONN; } fragment = create_xml_node(NULL, XML_CIB_TAG_CONSTRAINTS); if(host) { id = g_strdup_printf("cli-ban-%s-on-%s", rsc_id, host); location = create_xml_node(fragment, XML_CONS_TAG_RSC_LOCATION); crm_xml_add(location, XML_ATTR_ID, id); free(id); } else { GListPtr n = allnodes; for(; n; n = n->next) { node_t *target = n->data; id = g_strdup_printf("cli-ban-%s-on-%s", rsc_id, target->details->uname); location = create_xml_node(fragment, XML_CONS_TAG_RSC_LOCATION); crm_xml_add(location, XML_ATTR_ID, id); free(id); } } id = g_strdup_printf("cli-prefer-%s", rsc_id); location = create_xml_node(fragment, XML_CONS_TAG_RSC_LOCATION); crm_xml_add(location, XML_ATTR_ID, id); if(host && do_force == FALSE) { crm_xml_add(location, XML_CIB_TAG_NODE, host); } free(id); crm_log_xml_info(fragment, "Delete"); rc = cib_conn->cmds->delete(cib_conn, XML_CIB_TAG_CONSTRAINTS, fragment, cib_options); if (rc == -ENXIO) { rc = pcmk_ok; } else if (rc != pcmk_ok) { goto bail; } bail: free_xml(fragment); return rc; } static int list_resource_operations(const char *rsc_id, const char *host_uname, gboolean active, pe_working_set_t * data_set) { resource_t *rsc = NULL; int opts = pe_print_printf | pe_print_rsconly | pe_print_suppres_nl; GListPtr ops = find_operations(rsc_id, host_uname, active, data_set); GListPtr lpc = NULL; if (print_pending) { opts |= pe_print_pending; } for (lpc = ops; lpc != NULL; lpc = lpc->next) { xmlNode *xml_op = (xmlNode *) lpc->data; const char *op_rsc = crm_element_value(xml_op, "resource"); const char *last = crm_element_value(xml_op, XML_RSC_OP_LAST_CHANGE); const char *status_s = crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS); const char *op_key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY); int status = crm_parse_int(status_s, "0"); rsc = pe_find_resource(data_set->resources, op_rsc); if(rsc) { rsc->fns->print(rsc, "", opts, stdout); } else { fprintf(stdout, "Unknown resource %s", op_rsc); } fprintf(stdout, ": %s (node=%s, call=%s, rc=%s", op_key ? op_key : ID(xml_op), crm_element_value(xml_op, XML_ATTR_UNAME), crm_element_value(xml_op, XML_LRM_ATTR_CALLID), crm_element_value(xml_op, XML_LRM_ATTR_RC)); if (last) { time_t run_at = crm_parse_int(last, "0"); fprintf(stdout, ", last-rc-change=%s, exec=%sms", crm_strip_trailing_newline(ctime(&run_at)), crm_element_value(xml_op, XML_RSC_OP_T_EXEC)); } fprintf(stdout, "): %s\n", services_lrm_status_str(status)); } return pcmk_ok; } static void show_location(resource_t * rsc, const char *prefix) { GListPtr lpc = NULL; GListPtr list = rsc->rsc_location; int offset = 0; if (prefix) { offset = strlen(prefix) - 2; } for (lpc = list; lpc != NULL; lpc = lpc->next) { rsc_to_node_t *cons = (rsc_to_node_t *) lpc->data; GListPtr lpc2 = NULL; for (lpc2 = cons->node_list_rh; lpc2 != NULL; lpc2 = lpc2->next) { node_t *node = (node_t *) lpc2->data; char *score = score2char(node->weight); fprintf(stdout, "%s: Node %-*s (score=%s, id=%s)\n", prefix ? prefix : " ", 71 - offset, node->details->uname, score, cons->id); free(score); } } } static void show_colocation(resource_t * rsc, gboolean dependants, gboolean recursive, int offset) { char *prefix = NULL; GListPtr lpc = NULL; GListPtr list = rsc->rsc_cons; prefix = calloc(1, (offset * 4) + 1); memset(prefix, ' ', offset * 4); if (dependants) { list = rsc->rsc_cons_lhs; } if (is_set(rsc->flags, pe_rsc_allocating)) { /* Break colocation loops */ printf("loop %s\n", rsc->id); free(prefix); return; } set_bit(rsc->flags, pe_rsc_allocating); for (lpc = list; lpc != NULL; lpc = lpc->next) { rsc_colocation_t *cons = (rsc_colocation_t *) lpc->data; char *score = NULL; resource_t *peer = cons->rsc_rh; if (dependants) { peer = cons->rsc_lh; } if (is_set(peer->flags, pe_rsc_allocating)) { if (dependants == FALSE) { fprintf(stdout, "%s%-*s (id=%s - loop)\n", prefix, 80 - (4 * offset), peer->id, cons->id); } continue; } if (dependants && recursive) { show_colocation(peer, dependants, recursive, offset + 1); } score = score2char(cons->score); if (cons->role_rh > RSC_ROLE_STARTED) { fprintf(stdout, "%s%-*s (score=%s, %s role=%s, id=%s)\n", prefix, 80 - (4 * offset), peer->id, score, dependants ? "needs" : "with", role2text(cons->role_rh), cons->id); } else { fprintf(stdout, "%s%-*s (score=%s, id=%s)\n", prefix, 80 - (4 * offset), peer->id, score, cons->id); } show_location(peer, prefix); free(score); if (!dependants && recursive) { show_colocation(peer, dependants, recursive, offset + 1); } } free(prefix); } static GHashTable * generate_resource_params(resource_t * rsc, pe_working_set_t * data_set) { GHashTable *params = NULL; GHashTable *meta = NULL; GHashTable *combined = NULL; GHashTableIter iter; if (!rsc) { crm_err("Resource does not exist in config"); return NULL; } params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); meta = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); combined = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); get_rsc_attributes(params, rsc, NULL /* TODO: Pass in local node */ , data_set); get_meta_attributes(meta, rsc, NULL /* TODO: Pass in local node */ , data_set); if (params) { char *key = NULL; char *value = NULL; g_hash_table_iter_init(&iter, params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { g_hash_table_insert(combined, strdup(key), strdup(value)); } g_hash_table_destroy(params); } if (meta) { char *key = NULL; char *value = NULL; g_hash_table_iter_init(&iter, meta); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { char *crm_name = crm_meta_name(key); g_hash_table_insert(combined, crm_name, strdup(value)); } g_hash_table_destroy(meta); } return combined; } /* *INDENT-OFF* */ static struct crm_option long_options[] = { /* Top-level Options */ {"help", 0, 0, '?', "\t\tThis text"}, {"version", 0, 0, '$', "\t\tVersion information" }, {"verbose", 0, 0, 'V', "\t\tIncrease debug output"}, {"quiet", 0, 0, 'Q', "\t\tPrint only the value on stdout\n"}, {"resource", 1, 0, 'r', "\tResource ID" }, {"-spacer-",1, 0, '-', "\nQueries:"}, {"list", 0, 0, 'L', "\t\tList all cluster resources"}, {"list-raw", 0, 0, 'l', "\tList the IDs of all instantiated resources (no groups/clones/...)"}, {"list-cts", 0, 0, 'c', NULL, 1}, {"list-operations", 0, 0, 'O', "\tList active resource operations. Optionally filtered by resource (-r) and/or node (-N)"}, {"list-all-operations", 0, 0, 'o', "List all resource operations. Optionally filtered by resource (-r) and/or node (-N)"}, {"pending", 0, 0, 'j', "\t\tDisplay pending state if 'record-pending' is enabled\n"}, {"list-standards", 0, 0, 0, "\tList supported standards"}, {"list-ocf-providers", 0, 0, 0, "List all available OCF providers"}, {"list-agents", 1, 0, 0, "List all agents available for the named standard and/or provider."}, {"list-ocf-alternatives", 1, 0, 0, "List all available providers for the named OCF agent\n"}, {"show-metadata", 1, 0, 0, "Show the metadata for the named class:provider:agent"}, {"query-xml", 0, 0, 'q', "\tQuery the definition of a resource (template expanded)"}, {"query-xml-raw", 0, 0, 'w', "\tQuery the definition of a resource (raw xml)"}, {"locate", 0, 0, 'W', "\t\tDisplay the current location(s) of a resource"}, {"stack", 0, 0, 'A', "\t\tDisplay the prerequisites and dependents of a resource"}, {"constraints",0, 0, 'a', "\tDisplay the (co)location constraints that apply to a resource"}, {"-spacer-", 1, 0, '-', "\nCommands:"}, {"cleanup", 0, 0, 'C', "\t\tDelete the resource history and re-check the current state. Optional: --resource"}, {"set-parameter", 1, 0, 'p', "Set the named parameter for a resource. See also -m, --meta"}, {"get-parameter", 1, 0, 'g', "Display the named parameter for a resource. See also -m, --meta"}, {"delete-parameter",1, 0, 'd', "Delete the named parameter for a resource. See also -m, --meta"}, {"get-property", 1, 0, 'G', "Display the 'class', 'type' or 'provider' of a resource", 1}, {"set-property", 1, 0, 'S', "(Advanced) Set the class, type or provider of a resource", 1}, {"-spacer-", 1, 0, '-', "\nResource location:"}, { "move", 0, 0, 'M', "\t\tMove a resource from its current location to the named destination.\n " "\t\t\t\tRequires: --host. Optional: --lifetime, --master\n\n" "\t\t\t\tNOTE: This may prevent the resource from running on the previous location node until the implicit constraints expire or are removed with --unban\n" }, { "ban", 0, 0, 'B', "\t\tPrevent the named resource from running on the named --host. \n" "\t\t\t\tRequires: --resource. Optional: --host, --lifetime, --master\n\n" "\t\t\t\tIf --host is not specified, it defaults to:\n" "\t\t\t\t * the curent location for primitives and groups, or\n\n" "\t\t\t\t * the curent location of the master for m/s resources with master-max=1\n\n" "\t\t\t\tAll other situations result in an error as there is no sane default.\n\n" "\t\t\t\tNOTE: This will prevent the resource from running on this node until the constraint expires or is removed with --clear\n" }, { "clear", 0, 0, 'U', "\t\tRemove all constraints created by the --ban and/or --move commands. \n" "\t\t\t\tRequires: --resource. Optional: --host, --master\n\n" "\t\t\t\tIf --host is not specified, all constraints created by --ban and --move will be removed for the named resource.\n" }, {"lifetime", 1, 0, 'u', "\tLifespan of constraints created by the --ban and --move commands"}, { "master", 0, 0, 0, "\t\tLimit the scope of the --ban, --move and --clear commands to the Master role.\n" "\t\t\t\tFor --ban and --move, the previous master can still remain active in the Slave role." }, {"-spacer-", 1, 0, '-', "\nAdvanced Commands:"}, {"delete", 0, 0, 'D', "\t\t(Advanced) Delete a resource from the CIB"}, {"fail", 0, 0, 'F', "\t\t(Advanced) Tell the cluster this resource has failed"}, {"force-stop", 0, 0, 0, "\t(Advanced) Bypass the cluster and stop a resource on the local node. Additional detail with -V"}, {"force-start",0, 0, 0, "\t(Advanced) Bypass the cluster and start a resource on the local node. Additional detail with -V"}, {"force-check",0, 0, 0, "\t(Advanced) Bypass the cluster and check the state of a resource on the local node. Additional detail with -V\n"}, {"-spacer-", 1, 0, '-', "\nAdditional Options:"}, {"node", 1, 0, 'N', "\tHost uname"}, {"recursive", 0, 0, 0, "\tFollow colocation chains when using --set-parameter"}, {"resource-type", 1, 0, 't', "Resource type (primitive, clone, group, ...)"}, {"parameter-value", 1, 0, 'v', "Value to use with -p or -S"}, {"meta", 0, 0, 'm', "\t\tModify a resource's configuration option rather than one which is passed to the resource agent script. For use with -p, -g, -d"}, {"utilization", 0, 0, 'z', "\tModify a resource's utilization attribute. For use with -p, -g, -d"}, {"set-name", 1, 0, 's', "\t(Advanced) ID of the instance_attributes object to change"}, {"nvpair", 1, 0, 'i', "\t(Advanced) ID of the nvpair object to change/delete"}, {"force", 0, 0, 'f', "\n" /* Is this actually true anymore? "\t\tForce the resource to move by creating a rule for the current location and a score of -INFINITY" "\n\t\tThis should be used if the resource's stickiness and constraint scores total more than INFINITY (Currently 100,000)" "\n\t\tNOTE: This will prevent the resource from running on this node until the constraint is removed with -U or the --lifetime duration expires\n"*/ }, {"xml-file", 1, 0, 'x', NULL, 1},\ /* legacy options */ {"host-uname", 1, 0, 'H', NULL, 1}, {"migrate", 0, 0, 'M', NULL, 1}, {"un-migrate", 0, 0, 'U', NULL, 1}, {"un-move", 0, 0, 'U', NULL, 1}, {"refresh", 0, 0, 'R', NULL, 1}, {"reprobe", 0, 0, 'P', NULL, 1}, {"-spacer-", 1, 0, '-', "\nExamples:", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', "List the configured resources:", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', " crm_resource --list", pcmk_option_example}, {"-spacer-", 1, 0, '-', "List the available OCF agents:", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', " crm_resource --list-agents ocf", pcmk_option_example}, {"-spacer-", 1, 0, '-', "List the available OCF agents from the linux-ha project:", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', " crm_resource --list-agents ocf:heartbeat", pcmk_option_example}, {"-spacer-", 1, 0, '-', "Display the current location of 'myResource':", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', " crm_resource --resource myResource --locate", pcmk_option_example}, {"-spacer-", 1, 0, '-', "Move 'myResource' to another machine:", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', " crm_resource --resource myResource --move", pcmk_option_example}, {"-spacer-", 1, 0, '-', "Move 'myResource' to a specific machine:", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', " crm_resource --resource myResource --move --node altNode", pcmk_option_example}, {"-spacer-", 1, 0, '-', "Allow (but not force) 'myResource' to move back to its original location:", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', " crm_resource --resource myResource --un-move", pcmk_option_example}, {"-spacer-", 1, 0, '-', "Tell the cluster that 'myResource' failed:", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', " crm_resource --resource myResource --fail", pcmk_option_example}, {"-spacer-", 1, 0, '-', "Stop a 'myResource' (and anything that depends on it):", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', " crm_resource --resource myResource --set-parameter target-role --meta --parameter-value Stopped", pcmk_option_example}, {"-spacer-", 1, 0, '-', "Tell the cluster not to manage 'myResource':", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', "The cluster will not attempt to start or stop the resource under any circumstances."}, {"-spacer-", 1, 0, '-', "Useful when performing maintenance tasks on a resource.", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', " crm_resource --resource myResource --set-parameter is-managed --meta --parameter-value false", pcmk_option_example}, {"-spacer-", 1, 0, '-', "Erase the operation history of 'myResource' on 'aNode':", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', "The cluster will 'forget' the existing resource state (including any errors) and attempt to recover the resource."}, {"-spacer-", 1, 0, '-', "Useful when a resource had failed permanently and has been repaired by an administrator.", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', " crm_resource --resource myResource --cleanup --node aNode", pcmk_option_example}, {0, 0, 0, 0} }; /* *INDENT-ON* */ int main(int argc, char **argv) { const char *longname = NULL; pe_working_set_t data_set; xmlNode *cib_xml_copy = NULL; cib_t *cib_conn = NULL; bool do_trace = FALSE; bool recursive = FALSE; int rc = pcmk_ok; int option_index = 0; int argerr = 0; int flag; crm_log_cli_init("crm_resource"); crm_set_options(NULL, "(query|command) [options]", long_options, "Perform tasks related to cluster resources.\nAllows resources to be queried (definition and location), modified, and moved around the cluster.\n"); if (argc < 2) { crm_help('?', EX_USAGE); } while (1) { flag = crm_get_option_long(argc, argv, &option_index, &longname); if (flag == -1) break; switch (flag) { case 0: if (safe_str_eq("master", longname)) { scope_master = TRUE; } else if(safe_str_eq(longname, "recursive")) { recursive = TRUE; } else if (safe_str_eq("force-stop", longname) || safe_str_eq("force-start", longname) || safe_str_eq("force-check", longname)) { rsc_cmd = flag; rsc_long_cmd = longname; } else if (safe_str_eq("list-ocf-providers", longname) || safe_str_eq("list-ocf-alternatives", longname) || safe_str_eq("list-standards", longname)) { const char *text = NULL; lrmd_list_t *list = NULL; lrmd_list_t *iter = NULL; lrmd_t *lrmd_conn = lrmd_api_new(); if (safe_str_eq("list-ocf-providers", longname) || safe_str_eq("list-ocf-alternatives", longname)) { rc = lrmd_conn->cmds->list_ocf_providers(lrmd_conn, optarg, &list); text = "OCF providers"; } else if (safe_str_eq("list-standards", longname)) { rc = lrmd_conn->cmds->list_standards(lrmd_conn, &list); text = "standards"; } if (rc > 0) { rc = 0; for (iter = list; iter != NULL; iter = iter->next) { rc++; printf("%s\n", iter->val); } lrmd_list_freeall(list); } else if (optarg) { fprintf(stderr, "No %s found for %s\n", text, optarg); } else { fprintf(stderr, "No %s found\n", text); } lrmd_api_delete(lrmd_conn); return crm_exit(rc); } else if (safe_str_eq("show-metadata", longname)) { char standard[512]; char provider[512]; char type[512]; char *metadata = NULL; lrmd_t *lrmd_conn = lrmd_api_new(); rc = sscanf(optarg, "%[^:]:%[^:]:%s", standard, provider, type); if (rc == 3) { rc = lrmd_conn->cmds->get_metadata(lrmd_conn, standard, provider, type, &metadata, 0); } else if (rc == 2) { rc = lrmd_conn->cmds->get_metadata(lrmd_conn, standard, NULL, provider, &metadata, 0); } else if (rc < 2) { fprintf(stderr, "Please specify standard:type or standard:provider:type, not %s\n", optarg); rc = -EINVAL; } if (metadata) { printf("%s\n", metadata); } else { fprintf(stderr, "Metadata query for %s failed: %d\n", optarg, rc); } lrmd_api_delete(lrmd_conn); return crm_exit(rc); } else if (safe_str_eq("list-agents", longname)) { lrmd_list_t *list = NULL; lrmd_list_t *iter = NULL; char standard[512]; char provider[512]; lrmd_t *lrmd_conn = lrmd_api_new(); rc = sscanf(optarg, "%[^:]:%s", standard, provider); if (rc == 1) { rc = lrmd_conn->cmds->list_agents(lrmd_conn, &list, optarg, NULL); provider[0] = '*'; provider[1] = 0; } else if (rc == 2) { rc = lrmd_conn->cmds->list_agents(lrmd_conn, &list, standard, provider); } if (rc > 0) { rc = 0; for (iter = list; iter != NULL; iter = iter->next) { printf("%s\n", iter->val); rc++; } lrmd_list_freeall(list); rc = 0; } else { fprintf(stderr, "No agents found for standard=%s, provider=%s\n", standard, provider); rc = -1; } lrmd_api_delete(lrmd_conn); return crm_exit(rc); } else { crm_err("Unhandled long option: %s", longname); } break; case 'V': do_trace = TRUE; crm_bump_log_level(argc, argv); break; case '$': case '?': crm_help(flag, EX_OK); break; case 'x': xml_file = strdup(optarg); break; case 'Q': BE_QUIET = TRUE; break; case 'm': attr_set_type = XML_TAG_META_SETS; break; case 'z': attr_set_type = XML_TAG_UTILIZATION; break; case 'u': move_lifetime = strdup(optarg); break; case 'f': do_force = TRUE; break; case 'i': prop_id = optarg; break; case 's': prop_set = optarg; break; case 'r': rsc_id = optarg; break; case 'v': prop_value = optarg; break; case 't': rsc_type = optarg; break; case 'C': case 'R': case 'P': rsc_cmd = 'C'; break; case 'L': case 'c': case 'l': case 'q': case 'w': case 'D': case 'F': case 'W': case 'M': case 'U': case 'B': case 'O': case 'o': case 'A': case 'a': rsc_cmd = flag; break; case 'j': print_pending = TRUE; break; case 'p': case 'g': case 'd': case 'S': case 'G': prop_name = optarg; rsc_cmd = flag; break; case 'h': case 'H': case 'N': crm_trace("Option %c => %s", flag, optarg); host_uname = optarg; break; default: CMD_ERR("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag); ++argerr; break; } } if (optind < argc && argv[optind] != NULL) { CMD_ERR("non-option ARGV-elements: "); while (optind < argc && argv[optind] != NULL) { CMD_ERR("%s ", argv[optind++]); ++argerr; } CMD_ERR("\n"); } if (optind > argc) { ++argerr; } if (argerr) { crm_help('?', EX_USAGE); } our_pid = calloc(1, 11); if (our_pid != NULL) { snprintf(our_pid, 10, "%d", getpid()); our_pid[10] = '\0'; } if (do_force) { crm_debug("Forcing..."); cib_options |= cib_quorum_override; } set_working_set_defaults(&data_set); if (rsc_cmd != 'P' || rsc_id) { resource_t *rsc = NULL; cib_conn = cib_new(); rc = cib_conn->cmds->signon(cib_conn, crm_system_name, cib_command); if (rc != pcmk_ok) { CMD_ERR("Error signing on to the CIB service: %s\n", pcmk_strerror(rc)); return crm_exit(rc); } if (xml_file != NULL) { cib_xml_copy = filename2xml(xml_file); } else { rc = cib_conn->cmds->query(cib_conn, NULL, &cib_xml_copy, cib_scope_local | cib_sync_call); } if(rc != pcmk_ok) { goto bail; } else if (cli_config_update(&cib_xml_copy, NULL, FALSE) == FALSE) { rc = -ENOKEY; goto bail; } data_set.input = cib_xml_copy; data_set.now = crm_time_new(NULL); cluster_status(&data_set); if (rsc_id) { rsc = find_rsc_or_clone(rsc_id, &data_set); } if (rsc == NULL && rsc_cmd != 'C') { rc = -ENXIO; } } if (rsc_cmd == 'R' || rsc_cmd == 'C' || rsc_cmd == 'F' || rsc_cmd == 'P') { xmlNode *xml = NULL; mainloop_io_t *source = mainloop_add_ipc_client(CRM_SYSTEM_CRMD, G_PRIORITY_DEFAULT, 0, NULL, &crm_callbacks); crmd_channel = mainloop_get_ipc_client(source); if (crmd_channel == NULL) { CMD_ERR("Error signing on to the CRMd service\n"); rc = -ENOTCONN; goto bail; } xml = create_hello_message(our_pid, crm_system_name, "0", "1"); crm_ipc_send(crmd_channel, xml, 0, 0, NULL); free_xml(xml); } if (rsc_cmd == 'L') { rc = pcmk_ok; do_find_resource_list(&data_set, FALSE); } else if (rsc_cmd == 'l') { int found = 0; GListPtr lpc = NULL; rc = pcmk_ok; for (lpc = data_set.resources; lpc != NULL; lpc = lpc->next) { resource_t *rsc = (resource_t *) lpc->data; found++; print_raw_rsc(rsc); } if (found == 0) { printf("NO resources configured\n"); rc = -ENXIO; goto bail; } } else if (rsc_cmd == 0 && rsc_long_cmd) { svc_action_t *op = NULL; const char *rtype = NULL; const char *rprov = NULL; const char *rclass = NULL; const char *action = NULL; GHashTable *params = NULL; resource_t *rsc = pe_find_resource(data_set.resources, rsc_id); if (rsc == NULL) { CMD_ERR("Must supply a resource id with -r\n"); rc = -ENXIO; goto bail; } if (safe_str_eq(rsc_long_cmd, "force-stop")) { action = "stop"; } else if (safe_str_eq(rsc_long_cmd, "force-start")) { action = "start"; if(rsc->variant >= pe_clone) { rc = do_find_resource(rsc_id, NULL, &data_set); if(rc > 0 && do_force == FALSE) { CMD_ERR("It is not safe to start %s here: the cluster claims it is already active", rsc_id); CMD_ERR("Try setting target-role=stopped first or specifying --force"); crm_exit(EPERM); } } } else if (safe_str_eq(rsc_long_cmd, "force-check")) { action = "monitor"; } rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); rprov = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); rtype = crm_element_value(rsc->xml, XML_ATTR_TYPE); if(safe_str_eq(rclass, "stonith")){ CMD_ERR("Sorry, --%s doesn't support %s resources yet\n", rsc_long_cmd, rclass); crm_exit(EOPNOTSUPP); } params = generate_resource_params(rsc, &data_set); op = resources_action_create(rsc->id, rclass, rprov, rtype, action, 0, -1, params); if(do_trace) { setenv("OCF_TRACE_RA", "1", 1); } if(op == NULL) { /* Re-run but with stderr enabled so we can display a sane error message */ crm_enable_stderr(TRUE); resources_action_create(rsc->id, rclass, rprov, rtype, action, 0, -1, params); return crm_exit(EINVAL); } else if (services_action_sync(op)) { int more, lpc, last; char *local_copy = NULL; if (op->status == PCMK_LRM_OP_DONE) { printf("Operation %s for %s (%s:%s:%s) returned %d\n", action, rsc->id, rclass, rprov ? rprov : "", rtype, op->rc); } else { printf("Operation %s for %s (%s:%s:%s) failed: %d\n", action, rsc->id, rclass, rprov ? rprov : "", rtype, op->status); } if (op->stdout_data) { local_copy = strdup(op->stdout_data); more = strlen(local_copy); last = 0; for (lpc = 0; lpc < more; lpc++) { if (local_copy[lpc] == '\n' || local_copy[lpc] == 0) { local_copy[lpc] = 0; printf(" > stdout: %s\n", local_copy + last); last = lpc + 1; } } free(local_copy); } if (op->stderr_data) { local_copy = strdup(op->stderr_data); more = strlen(local_copy); last = 0; for (lpc = 0; lpc < more; lpc++) { if (local_copy[lpc] == '\n' || local_copy[lpc] == 0) { local_copy[lpc] = 0; printf(" > stderr: %s\n", local_copy + last); last = lpc + 1; } } free(local_copy); } } rc = op->rc; services_action_free(op); return crm_exit(rc); } else if (rsc_cmd == 'A' || rsc_cmd == 'a') { GListPtr lpc = NULL; resource_t *rsc = pe_find_resource(data_set.resources, rsc_id); xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, data_set.input); if (rsc == NULL) { CMD_ERR("Must supply a resource id with -r\n"); rc = -ENXIO; goto bail; } unpack_constraints(cib_constraints, &data_set); for (lpc = data_set.resources; lpc != NULL; lpc = lpc->next) { resource_t *r = (resource_t *) lpc->data; clear_bit(r->flags, pe_rsc_allocating); } show_colocation(rsc, TRUE, rsc_cmd == 'A', 1); fprintf(stdout, "* %s\n", rsc->id); show_location(rsc, NULL); for (lpc = data_set.resources; lpc != NULL; lpc = lpc->next) { resource_t *r = (resource_t *) lpc->data; clear_bit(r->flags, pe_rsc_allocating); } show_colocation(rsc, FALSE, rsc_cmd == 'A', 1); } else if (rsc_cmd == 'c') { int found = 0; GListPtr lpc = NULL; rc = pcmk_ok; for (lpc = data_set.resources; lpc != NULL; lpc = lpc->next) { resource_t *rsc = (resource_t *) lpc->data; print_cts_rsc(rsc); found++; } print_cts_constraints(&data_set); } else if (rsc_cmd == 'F') { rc = fail_lrm_rsc(crmd_channel, host_uname, rsc_id, &data_set); if (rc == pcmk_ok) { start_mainloop(); } } else if (rsc_cmd == 'O') { rc = list_resource_operations(rsc_id, host_uname, TRUE, &data_set); } else if (rsc_cmd == 'o') { rc = list_resource_operations(rsc_id, host_uname, FALSE, &data_set); } else if (rc == -ENXIO) { CMD_ERR("Resource '%s' not found: %s\n", crm_str(rsc_id), pcmk_strerror(rc)); } else if (rsc_cmd == 'W') { if (rsc_id == NULL) { CMD_ERR("Must supply a resource id with -r\n"); rc = -ENXIO; goto bail; } rc = do_find_resource(rsc_id, NULL, &data_set); } else if (rsc_cmd == 'q') { if (rsc_id == NULL) { CMD_ERR("Must supply a resource id with -r\n"); rc = -ENXIO; goto bail; } rc = dump_resource(rsc_id, &data_set, TRUE); } else if (rsc_cmd == 'w') { if (rsc_id == NULL) { CMD_ERR("Must supply a resource id with -r\n"); rc = -ENXIO; goto bail; } rc = dump_resource(rsc_id, &data_set, FALSE); } else if (rsc_cmd == 'U') { node_t *dest = NULL; if (rsc_id == NULL) { CMD_ERR("No value specified for --resource\n"); rc = -ENXIO; goto bail; } if (host_uname) { dest = pe_find_node(data_set.nodes, host_uname); if (dest == NULL) { CMD_ERR("Unknown node: %s\n", host_uname); rc = -ENXIO; goto bail; } rc = clear_resource(rsc_id, dest->details->uname, NULL, cib_conn); } else { rc = clear_resource(rsc_id, NULL, data_set.nodes, cib_conn); } } else if (rsc_cmd == 'M' && host_uname) { int count = 0; node_t *current = NULL; node_t *dest = pe_find_node(data_set.nodes, host_uname); resource_t *rsc = pe_find_resource(data_set.resources, rsc_id); + gboolean cur_is_dest = FALSE; rc = -EINVAL; if (rsc == NULL) { CMD_ERR("Resource '%s' not moved: not found\n", rsc_id); rc = -ENXIO; goto bail; } else if (scope_master && rsc->variant < pe_master) { resource_t *p = uber_parent(rsc); if(p->variant == pe_master) { CMD_ERR("Using parent '%s' for --move command instead of '%s'.\n", rsc->id, rsc_id); rsc_id = p->id; rsc = p; } else { CMD_ERR("Ignoring '--master' option: not valid for %s resources.\n", get_resource_typename(rsc->variant)); scope_master = FALSE; } } if(rsc->variant == pe_master) { GListPtr iter = NULL; for(iter = rsc->children; iter; iter = iter->next) { resource_t *child = (resource_t *)iter->data; enum rsc_role_e child_role = child->fns->state(child, TRUE); if(child_role == RSC_ROLE_MASTER) { rsc = child; count++; } } if(scope_master == FALSE && count == 0) { count = g_list_length(rsc->running_on); } } else if (rsc->variant > pe_group) { count = g_list_length(rsc->running_on); } else if (g_list_length(rsc->running_on) > 1) { CMD_ERR("Resource '%s' not moved: active on multiple nodes\n", rsc_id); goto bail; } if(dest == NULL) { CMD_ERR("Error performing operation: node '%s' is unknown\n", host_uname); rc = -ENXIO; goto bail; } if(g_list_length(rsc->running_on) == 1) { current = rsc->running_on->data; } if(current == NULL) { /* Nothing to check */ } else if(scope_master && rsc->fns->state(rsc, TRUE) != RSC_ROLE_MASTER) { crm_trace("%s is already active on %s but not in correct state", rsc_id, dest->details->uname); - } else if (safe_str_eq(current->details->uname, dest->details->uname)) { - CMD_ERR("Error performing operation: %s is already %s on %s\n", - rsc_id, scope_master?"promoted":"active", dest->details->uname); - goto bail; + cur_is_dest = TRUE; + if (do_force) { + crm_info("%s is already %s on %s, reinforcing placement with location constraint.\n", + rsc_id, scope_master?"promoted":"active", dest->details->uname); + } else { + CMD_ERR("Error performing operation: %s is already %s on %s\n", + rsc_id, scope_master?"promoted":"active", dest->details->uname); + goto bail; + } } /* Clear any previous constraints for 'dest' */ clear_resource(rsc_id, dest->details->uname, data_set.nodes, cib_conn); /* Record an explicit preference for 'dest' */ rc = prefer_resource(rsc_id, dest->details->uname, cib_conn); crm_trace("%s%s now prefers node %s%s", rsc->id, scope_master?" (master)":"", dest->details->uname, do_force?"(forced)":""); - if(do_force) { + /* only ban the previous location if current location != destination location. + * it is possible to use -M to enforce a location without regard of where the + * resource is currently located */ + if(do_force && (cur_is_dest == FALSE)) { /* Ban the original location if possible */ if(current) { ban_resource(rsc_id, current->details->uname, NULL, cib_conn); } else if(count > 1) { CMD_ERR("Resource '%s' is currently %s in %d locations. One may now move one to %s\n", rsc_id, scope_master?"promoted":"active", count, dest->details->uname); CMD_ERR("You can prevent '%s' from being %s at a specific location with:" " --ban %s--host \n", rsc_id, scope_master?"promoted":"active", scope_master?"--master ":""); } else { crm_trace("Not banning %s from it's current location: not active", rsc_id); } } } else if (rsc_cmd == 'B' && host_uname) { resource_t *rsc = pe_find_resource(data_set.resources, rsc_id); node_t *dest = pe_find_node(data_set.nodes, host_uname); rc = -ENXIO; if (rsc_id == NULL) { CMD_ERR("No value specified for --resource\n"); goto bail; } else if(rsc == NULL) { CMD_ERR("Resource '%s' not moved: unknown\n", rsc_id); } else if (dest == NULL) { CMD_ERR("Error performing operation: node '%s' is unknown\n", host_uname); goto bail; } rc = ban_resource(rsc_id, dest->details->uname, NULL, cib_conn); } else if (rsc_cmd == 'B' || rsc_cmd == 'M') { resource_t *rsc = pe_find_resource(data_set.resources, rsc_id); rc = -ENXIO; if (rsc_id == NULL) { CMD_ERR("No value specified for --resource\n"); goto bail; } rc = -EINVAL; if(rsc == NULL) { CMD_ERR("Resource '%s' not moved: unknown\n", rsc_id); } else if(g_list_length(rsc->running_on) == 1) { node_t *current = rsc->running_on->data; rc = ban_resource(rsc_id, current->details->uname, NULL, cib_conn); } else if(rsc->variant == pe_master) { int count = 0; GListPtr iter = NULL; node_t *current = NULL; for(iter = rsc->children; iter; iter = iter->next) { resource_t *child = (resource_t *)iter->data; enum rsc_role_e child_role = child->fns->state(child, TRUE); if(child_role == RSC_ROLE_MASTER) { count++; current = child->running_on->data; } } if(count == 1 && current) { rc = ban_resource(rsc_id, current->details->uname, NULL, cib_conn); } else { CMD_ERR("Resource '%s' not moved: active in %d locations (promoted in %d).\n", rsc_id, g_list_length(rsc->running_on), count); CMD_ERR("You can prevent '%s' from running on a specific location with: --ban --host \n", rsc_id); CMD_ERR("You can prevent '%s' from being promoted at a specific location with:" " --ban --master --host \n", rsc_id); } } else { CMD_ERR("Resource '%s' not moved: active in %d locations.\n", rsc_id, g_list_length(rsc->running_on)); CMD_ERR("You can prevent '%s' from running on a specific location with: --ban --host \n", rsc_id); } } else if (rsc_cmd == 'G') { if (rsc_id == NULL) { CMD_ERR("Must supply a resource id with -r\n"); rc = -ENXIO; goto bail; } rc = dump_resource_prop(rsc_id, prop_name, &data_set); } else if (rsc_cmd == 'S') { xmlNode *msg_data = NULL; if (prop_value == NULL || strlen(prop_value) == 0) { CMD_ERR("You need to supply a value with the -v option\n"); rc = -EINVAL; goto bail; } else if (cib_conn == NULL) { rc = -ENOTCONN; goto bail; } if (rsc_id == NULL) { CMD_ERR("Must supply a resource id with -r\n"); rc = -ENXIO; goto bail; } CRM_LOG_ASSERT(rsc_type != NULL); CRM_LOG_ASSERT(prop_name != NULL); CRM_LOG_ASSERT(prop_value != NULL); msg_data = create_xml_node(NULL, rsc_type); crm_xml_add(msg_data, XML_ATTR_ID, rsc_id); crm_xml_add(msg_data, prop_name, prop_value); rc = cib_conn->cmds->modify(cib_conn, XML_CIB_TAG_RESOURCES, msg_data, cib_options); free_xml(msg_data); } else if (rsc_cmd == 'g') { if (rsc_id == NULL) { CMD_ERR("Must supply a resource id with -r\n"); rc = -ENXIO; goto bail; } rc = dump_resource_attr(rsc_id, prop_name, &data_set); } else if (rsc_cmd == 'p') { if (rsc_id == NULL) { CMD_ERR("Must supply a resource id with -r\n"); rc = -ENXIO; goto bail; } if (prop_value == NULL || strlen(prop_value) == 0) { CMD_ERR("You need to supply a value with the -v option\n"); rc = -EINVAL; goto bail; } /* coverity[var_deref_model] False positive */ rc = set_resource_attr(rsc_id, prop_set, prop_id, prop_name, prop_value, recursive, cib_conn, &data_set); } else if (rsc_cmd == 'd') { if (rsc_id == NULL) { CMD_ERR("Must supply a resource id with -r\n"); rc = -ENXIO; goto bail; } /* coverity[var_deref_model] False positive */ rc = delete_resource_attr(rsc_id, prop_set, prop_id, prop_name, cib_conn, &data_set); } else if (rsc_cmd == 'C' && rsc_id) { resource_t *rsc = pe_find_resource(data_set.resources, rsc_id); crm_debug("Re-checking the state of %s on %s", rsc_id, host_uname); if(rsc) { crmd_replies_needed = 0; rc = delete_lrm_rsc(cib_conn, crmd_channel, host_uname, rsc, &data_set); } else { rc = -ENODEV; } if (rc == pcmk_ok) { start_mainloop(); } } else if (rsc_cmd == 'C') { #if HAVE_ATOMIC_ATTRD xmlNode *cmd = create_request(CRM_OP_REPROBE, NULL, host_uname, CRM_SYSTEM_CRMD, crm_system_name, our_pid); crm_debug("Re-checking the state of all resources on %s", host_uname?host_uname:"all nodes"); rc = attrd_update_delegate( NULL, 'u', host_uname, "fail-count-*", NULL, XML_CIB_TAG_STATUS, NULL, NULL, NULL, FALSE); if (crm_ipc_send(crmd_channel, cmd, 0, 0, NULL) > 0) { start_mainloop(); } free_xml(cmd); #else GListPtr rIter = NULL; crmd_replies_needed = 0; for (rIter = data_set.resources; rIter; rIter = rIter->next) { resource_t *rsc = rIter->data; delete_lrm_rsc(cib_conn, crmd_channel, host_uname, rsc, &data_set); } start_mainloop(); #endif } else if (rsc_cmd == 'D') { xmlNode *msg_data = NULL; if (rsc_id == NULL) { CMD_ERR("Must supply a resource id with -r\n"); rc = -ENXIO; goto bail; } if (rsc_type == NULL) { CMD_ERR("You need to specify a resource type with -t"); rc = -ENXIO; goto bail; } else if (cib_conn == NULL) { rc = -ENOTCONN; goto bail; } msg_data = create_xml_node(NULL, rsc_type); crm_xml_add(msg_data, XML_ATTR_ID, rsc_id); rc = cib_conn->cmds->delete(cib_conn, XML_CIB_TAG_RESOURCES, msg_data, cib_options); free_xml(msg_data); } else { CMD_ERR("Unknown command: %c\n", rsc_cmd); } bail: if (data_set.input != NULL) { cleanup_alloc_calculations(&data_set); } if (cib_conn != NULL) { cib_conn->cmds->signoff(cib_conn); cib_delete(cib_conn); } if (rc == -pcmk_err_no_quorum) { CMD_ERR("Error performing operation: %s\n", pcmk_strerror(rc)); CMD_ERR("Try using -f\n"); } else if (rc != pcmk_ok) { CMD_ERR("Error performing operation: %s\n", pcmk_strerror(rc)); } return crm_exit(rc); } diff --git a/xml/constraints-next.rng b/xml/constraints-next.rng index 051603d93e..90f5b0f40a 100644 --- a/xml/constraints-next.rng +++ b/xml/constraints-next.rng @@ -1,243 +1,256 @@ + + + + + stop demote fence freeze + + + always + never + exclusive + + + start promote demote stop Stopped Started Master Slave Optional Mandatory Serialize