diff --git a/GNUmakefile b/GNUmakefile index 2d1b47da50..27fd317c95 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -1,281 +1,281 @@ # # Copyright (C) 2008 Andrew Beekhof # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # default: $(shell test ! -e configure && echo init) $(shell test -e configure && echo core) -include Makefile PACKAGE ?= pacemaker # Force 'make dist' to be consistent with 'make export' distprefix = ClusterLabs-$(PACKAGE) distdir = $(distprefix)-$(TAG) TARFILE = $(distdir).tar.gz DIST_ARCHIVES = $(TARFILE) RPM_ROOT = $(shell pwd) RPM_OPTS = --define "_sourcedir $(RPM_ROOT)" \ --define "_specdir $(RPM_ROOT)" \ --define "_srcrpmdir $(RPM_ROOT)" \ MOCK_OPTIONS ?= --resultdir=$(RPM_ROOT)/mock --no-cleanup-after # Default to fedora compliant spec files # SLES: /etc/SuSE-release # openSUSE: /etc/SuSE-release # RHEL: /etc/redhat-release # Fedora: /etc/fedora-release, /etc/redhat-release, /etc/system-release F ?= $(shell test ! -e /etc/fedora-release && echo 0; test -e /etc/fedora-release && rpm --eval %{fedora}) ARCH ?= $(shell test -e /etc/fedora-release && rpm --eval %{_arch}) MOCK_CFG ?= $(shell test -e /etc/fedora-release && echo fedora-$(F)-$(ARCH)) DISTRO ?= $(shell test -e /etc/SuSE-release && echo suse; echo fedora) TAG ?= $(shell git log --pretty="format:%h" -n 1) WITH ?= --without=doc #WITH ?= --without=doc --with=gcov -LAST_RELEASE ?= $(shell test -e /Volumes || git tag -l | grep Pacemaker | sort -Vr | head -n 1) -NEXT_RELEASE ?= $(shell test -e /Volumes || git tag -l | grep Pacemaker | sort -Vr | head -n 1 | awk -F. '/[0-9]+\./{$$3+=1;OFS=".";print $$1,$$2,$$3}') +LAST_RELEASE ?= $(shell test -e /Volumes || git tag -l | grep Pacemaker | grep -v rc | sort -Vr | head -n 1) +NEXT_RELEASE ?= $(shell test -e /Volumes || git tag -l | grep Pacemaker | grep -v rc | sort -Vr | head -n 1 | awk -F. '/[0-9]+\./{$$3+=1;OFS=".";print $$1,$$2,$$3}') BUILD_COUNTER ?= build.counter LAST_COUNT = $(shell test ! -e $(BUILD_COUNTER) && echo 0; test -e $(BUILD_COUNTER) && cat $(BUILD_COUNTER)) COUNT = $(shell expr 1 + $(LAST_COUNT)) init: ./autogen.sh echo "Now run configure with any arguments (eg. --prefix) specific to your system" if [ -e `which rpm` ]; then \ echo "Suggested invocation:"; \ rpm --eval %{configure} | grep -v program-prefix`; \ fi export: rm -f $(PACKAGE)-dirty.tar.* $(PACKAGE)-tip.tar.* $(PACKAGE)-HEAD.tar.* if [ ! -f $(TARFILE) ]; then \ rm -f $(PACKAGE).tar.*; \ if [ $(TAG) = dirty ]; then \ git commit -m "DO-NOT-PUSH" -a; \ git archive --prefix=$(distdir)/ HEAD | gzip > $(TARFILE); \ git reset --mixed HEAD^; \ else \ git archive --prefix=$(distdir)/ $(TAG) | gzip > $(TARFILE); \ fi; \ echo `date`: Rebuilt $(TARFILE); \ else \ echo `date`: Using existing tarball: $(TARFILE); \ fi $(PACKAGE)-opensuse.spec: $(PACKAGE)-suse.spec cp $^ $@ @echo Rebuilt $@ $(PACKAGE)-suse.spec: $(PACKAGE).spec.in GNUmakefile rm -f $@ cp $(PACKAGE).spec.in $@ sed -i.sed s:%{_docdir}/%{name}:%{_docdir}/%{name}-%{version}:g $@ sed -i.sed s:corosynclib:libcorosync:g $@ sed -i.sed s:libexecdir}/lcrso:libdir}/lcrso:g $@ sed -i.sed 's:%{name}-libs:lib%{name}3:g' $@ sed -i.sed s:heartbeat-libs:heartbeat:g $@ sed -i.sed s:cluster-glue-libs:libglue:g $@ sed -i.sed s:libselinux-devel:automake:g $@ sed -i.sed s:lm_sensors-devel:automake:g $@ sed -i.sed s:bzip2-devel:libbz2-devel:g $@ sed -i.sed s:Development/Libraries:Development/Libraries/C\ and\ C++:g $@ sed -i.sed s:System\ Environment/Daemons:Productivity/Clustering/HA:g $@ sed -i.sed s:bcond_without\ publican:bcond_with\ publican:g $@ sed -i.sed s:\#global\ py_sitedir:\%global\ py_sitedir:g $@ sed -i.sed s:docbook-style-xsl:docbook-xsl-stylesheets:g $@ sed -i.sed s:libtool-ltdl-devel::g $@ sed -i.sed s:publican::g $@ sed -i.sed s:byacc::g $@ sed -i.sed s:without\ cman:with\ cman:g $@ sed -i.sed s:.*pacemaker.service.*::g $@ sed -i.sed s:global\ cs_major.*:global\ cs_major\ 1:g $@ sed -i.sed s:global\ cs_minor.*:global\ cs_minor\ 4:g $@ @echo Rebuilt $@ # Works for all fedora based distros $(PACKAGE)-%.spec: $(PACKAGE).spec.in rm -f $@ cp $(PACKAGE).spec.in $(PACKAGE)-$*.spec @echo Rebuilt $@ srpm-%: export $(PACKAGE)-%.spec rm -f *.src.rpm cp $(PACKAGE)-$*.spec $(PACKAGE).spec if [ -e $(BUILD_COUNTER) ]; then \ echo $(COUNT) > $(BUILD_COUNTER); \ fi sed -i.sed 's/Source0:.*/Source0:\ $(TARFILE)/' $(PACKAGE).spec sed -i.sed 's/global\ specversion.*/global\ specversion\ $(COUNT)/' $(PACKAGE).spec sed -i.sed 's/global\ upstream_version.*/global\ upstream_version\ $(TAG)/' $(PACKAGE).spec sed -i.sed 's/global\ upstream_prefix.*/global\ upstream_prefix\ $(distprefix)/' $(PACKAGE).spec case $(TAG) in \ Pacemaker*) sed -i.sed 's/Version:.*/Version:\ $(shell echo $(TAG) | sed s:Pacemaker-::)/' $(PACKAGE).spec;; \ *) sed -i.sed 's/Version:.*/Version:\ $(shell echo $(NEXT_RELEASE) | sed s:Pacemaker-::)/' $(PACKAGE).spec;; \ esac rpmbuild -bs --define "dist .$*" $(RPM_OPTS) $(WITH) $(PACKAGE).spec chroot: mock-$(MOCK_CFG) mock-install-$(MOCK_CFG) mock-sh-$(MOCK_CFG) echo "Done" mock-next: make F=$(shell expr 1 + $(F)) mock mock-rawhide: make F=rawhide mock mock-install-%: echo "Installing packages" mock --root=$* $(MOCK_OPTIONS) --install $(RPM_ROOT)/mock/*.rpm vi sudo valgrind lcov gdb fence-agents mock-sh-%: echo "Connecting" mock --root=$* $(MOCK_OPTIONS) --shell # eg. WITH="--with cman" make rpm mock-%: make srpm-$(firstword $(shell echo $(@:mock-%=%) | tr '-' ' ')) -rm -rf $(RPM_ROOT)/mock @echo "mock --root=$* --rebuild $(WITH) $(MOCK_OPTIONS) $(RPM_ROOT)/*.src.rpm" mock --root=$* --no-cleanup-after --rebuild $(WITH) $(MOCK_OPTIONS) $(RPM_ROOT)/*.src.rpm srpm: srpm-$(DISTRO) echo "Done" mock: mock-$(MOCK_CFG) echo "Done" yumdep: $(PACKAGE)-$(DISTRO).spec sudo yum-builddep $(PACKAGE)-$(DISTRO).spec rpm: srpm @echo To create custom builds, edit the flags and options in $(PACKAGE).spec first rpmbuild $(RPM_OPTS) $(WITH) --rebuild $(RPM_ROOT)/*.src.rpm release: make TAG=$(LAST_RELEASE) rpm dirty: make TAG=dirty mock COVERITY_DIR = $(shell pwd)/coverity-$(TAG) COVFILE = pacemaker-coverity-$(TAG).tgz COVHOST ?= scan5.coverity.com COVPASS ?= password # Public coverity coverity: test -e configure || ./autogen.sh test -e Makefile || ./configure make core-clean rm -rf $(COVERITY_DIR) cov-build --dir $(COVERITY_DIR) make core tar czf $(COVFILE) --transform=s@.*$(TAG)@cov-int@ $(COVERITY_DIR) @echo "Uploading to public Coverity instance..." curl --form file=@$(COVFILE) --form project=$(PACKAGE) --form password=$(COVPASS) --form email=andrew@beekhof.net http://$(COVHOST)/cgi-bin/upload.py coverity-corp: test -e configure || ./autogen.sh test -e Makefile || ./configure make core-clean rm -rf $(COVERITY_DIR) cov-build --dir $(COVERITY_DIR) make core @echo "Waiting for a corporate Coverity license..." cov-analyze --dir $(COVERITY_DIR) --wait-for-license cov-format-errors --dir $(COVERITY_DIR) --emacs-style > $(TAG).coverity cov-format-errors --dir $(COVERITY_DIR) rsync -avzxlSD --progress $(COVERITY_DIR)/c/output/errors/ root@www.clusterlabs.org:/var/www/html/coverity/$(PACKAGE)/$(TAG) make core-clean # cov-commit-defects --host $(COVHOST) --dir $(COVERITY_DIR) --stream $(PACKAGE) --user auto --password $(COVPASS) rm -rf $(COVERITY_DIR) global: clean-generic gtags -q %.8.html: %.8 echo groff -mandoc `man -w ./$<` -T html > $@ groff -mandoc `man -w ./$<` -T html > $@ rsync -azxlSD --progress $@ root@www.clusterlabs.org:/var/www/html/man/ %.7.html: %.7 echo groff -mandoc `man -w ./$<` -T html > $@ groff -mandoc `man -w ./$<` -T html > $@ rsync -azxlSD --progress $@ root@www.clusterlabs.org:/var/www/html/man/ doxygen: doxygen Doxyfile abi: abi-check pacemaker $(LAST_RELEASE) $(TAG) abi-www: abi-check -u pacemaker $(LAST_RELEASE) $(TAG) www: all global doxygen find . -name "[a-z]*.8" -exec make \{\}.html \; find . -name "[a-z]*.7" -exec make \{\}.html \; htags -sanhIT rsync -avzxlSD --progress HTML/ root@www.clusterlabs.org:/var/www/html/global/$(PACKAGE)/$(TAG) rsync -avzxlSD --progress doc/api/html/ root@www.clusterlabs.org:/var/www/html/doxygen/$(PACKAGE)/$(TAG) make -C doc www make coverity summary: @printf "\n* `date +"%a %b %d %Y"` `hg showconfig ui.username` $(NEXT_RELEASE)-1" @printf "\n- Update source tarball to revision: `git id`" @printf "\n- Statistics:\n" @printf " Changesets: `git log --pretty=format:'%h' $(LAST_RELEASE)..HEAD | wc -l`\n" @printf " Diff: " @git diff -r $(LAST_RELEASE)..HEAD --stat | tail -n 1 changes: @make summary @printf "\n- Features added in $(NEXT_RELEASE)\n" @git log --pretty=format:' +%s' --abbrev-commit $(LAST_RELEASE)..HEAD | grep -e Feature: | sed -e 's@Feature:@@' | sort -uf @printf "\n- Changes since $(LAST_RELEASE)\n" @git log --pretty=format:' +%s' --abbrev-commit $(LAST_RELEASE)..HEAD | grep -e High: -e Fix: -e Bug | sed -e 's@Fix:@@' -e s@High:@@ -e s@Fencing:@fencing:@ -e 's@Bug:@ Bug@' -e s@PE:@pengine:@ | sort -uf changelog: @make changes > ChangeLog @printf "\n">> ChangeLog git show $(LAST_RELEASE):ChangeLog >> ChangeLog @echo -e "\033[1;35m -- Don't forget to run the bumplibs.sh script! --\033[0m" indent: find . -name "*.h" -exec ./p-indent \{\} \; find . -name "*.c" -exec ./p-indent \{\} \; git co HEAD crmd/fsa_proto.h lib/gnu rel-tags: tags find . -name TAGS -exec sed -i.sed 's:\(.*\)/\(.*\)/TAGS:\2/TAGS:g' \{\} \; ccc_analyzer=/usr/lib64/clang-analyzer/scan-build/ccc-analyzer clang: test -e $(ccc_analyzer) || echo "CLang Analyiser not available. Install the clang-analyzer package" test -e $(ccc_analyzer) || false make CC=$(ccc_analyzer) check # V3 = scandir unsetenv alphasort # V2 = setenv strerror strchrnul strndup # http://www.gnu.org/software/gnulib/manual/html_node/Initial-import.html#Initial-import GNU_MODS = crypto/md5 gnulib-update: -test ! -e gnulib && git clone git://git.savannah.gnu.org/gnulib.git cd gnulib && git pull gnulib/gnulib-tool --source-base=lib/gnu --lgpl=2 --no-vc-files --import $(GNU_MODS) diff --git a/crmd/te_actions.c b/crmd/te_actions.c index c3e333d3b9..8fadd875e5 100644 --- a/crmd/te_actions.c +++ b/crmd/te_actions.c @@ -1,544 +1,548 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include char *te_uuid = NULL; void send_rsc_command(crm_action_t * action); static void te_start_action_timer(crm_graph_t * graph, crm_action_t * action) { action->timer = calloc(1, sizeof(crm_action_timer_t)); action->timer->timeout = action->timeout; action->timer->reason = timeout_action; action->timer->action = action; action->timer->source_id = g_timeout_add(action->timer->timeout + graph->network_delay, action_timer_callback, (void *)action->timer); CRM_ASSERT(action->timer->source_id != 0); } static gboolean te_pseudo_action(crm_graph_t * graph, crm_action_t * pseudo) { crm_debug("Pseudo action %d fired and confirmed", pseudo->id); pseudo->confirmed = TRUE; update_graph(graph, pseudo); trigger_graph(); return TRUE; } void send_stonith_update(crm_action_t * action, const char *target, const char *uuid) { int rc = pcmk_ok; crm_node_t *peer = NULL; /* zero out the node-status & remove all LRM status info */ xmlNode *node_state = NULL; CRM_CHECK(target != NULL, return); CRM_CHECK(uuid != NULL, return); if (get_node_uuid(0, target) == NULL) { set_node_uuid(target, uuid); } /* Make sure the membership and join caches are accurate */ peer = crm_get_peer(0, target); if (peer->uuid == NULL) { crm_info("Recording uuid '%s' for node '%s'", uuid, target); peer->uuid = strdup(uuid); } crm_update_peer_proc(__FUNCTION__, peer, crm_proc_none, NULL); crm_update_peer_state(__FUNCTION__, peer, CRM_NODE_LOST, 0); crm_update_peer_expected(__FUNCTION__, peer, CRMD_JOINSTATE_DOWN); erase_node_from_join(target); node_state = do_update_node_cib(peer, node_update_cluster | node_update_peer | node_update_join | node_update_expected, NULL, __FUNCTION__); /* Force our known ID */ crm_xml_add(node_state, XML_ATTR_UUID, uuid); rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, node_state, cib_quorum_override | cib_scope_local | cib_can_create); /* Delay processing the trigger until the update completes */ crm_debug("Sending fencing update %d for %s", rc, target); fsa_register_cib_callback(rc, FALSE, strdup(target), cib_fencing_updated); /* Make sure it sticks */ /* fsa_cib_conn->cmds->bump_epoch(fsa_cib_conn, cib_quorum_override|cib_scope_local); */ erase_status_tag(target, XML_CIB_TAG_LRM, cib_scope_local); erase_status_tag(target, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local); free_xml(node_state); return; } static gboolean te_fence_node(crm_graph_t * graph, crm_action_t * action) { int rc = 0; const char *id = NULL; const char *uuid = NULL; const char *target = NULL; const char *type = NULL; gboolean invalid_action = FALSE; enum stonith_call_options options = st_opt_none; id = ID(action->xml); target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); type = crm_meta_value(action->params, "stonith_action"); CRM_CHECK(id != NULL, invalid_action = TRUE); CRM_CHECK(uuid != NULL, invalid_action = TRUE); CRM_CHECK(type != NULL, invalid_action = TRUE); CRM_CHECK(target != NULL, invalid_action = TRUE); if (invalid_action) { crm_log_xml_warn(action->xml, "BadAction"); return FALSE; } crm_notice("Executing %s fencing operation (%s) on %s (timeout=%d)", type, id, target, transition_graph->stonith_timeout); /* Passing NULL means block until we can connect... */ te_connect_stonith(NULL); if (crmd_join_phase_count(crm_join_confirmed) == 1) { options |= st_opt_allow_suicide; } rc = stonith_api->cmds->fence(stonith_api, options, target, type, transition_graph->stonith_timeout / 1000, 0); stonith_api->cmds->register_callback(stonith_api, rc, transition_graph->stonith_timeout / 1000, st_opt_timeout_updates, generate_transition_key(transition_graph->id, action->id, 0, te_uuid), "tengine_stonith_callback", tengine_stonith_callback); return TRUE; } static int get_target_rc(crm_action_t * action) { const char *target_rc_s = crm_meta_value(action->params, XML_ATTR_TE_TARGET_RC); if (target_rc_s != NULL) { return crm_parse_int(target_rc_s, "0"); } return 0; } static gboolean te_crm_command(crm_graph_t * graph, crm_action_t * action) { char *counter = NULL; xmlNode *cmd = NULL; gboolean is_local = FALSE; const char *id = NULL; const char *task = NULL; const char *value = NULL; const char *on_node = NULL; const char *router_node = NULL; gboolean rc = TRUE; gboolean no_wait = FALSE; id = ID(action->xml); task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); if (!router_node) { router_node = on_node; } CRM_CHECK(on_node != NULL && strlen(on_node) != 0, crm_err("Corrupted command (id=%s) %s: no node", crm_str(id), crm_str(task)); return FALSE); crm_info("Executing crm-event (%s): %s on %s%s%s", crm_str(id), crm_str(task), on_node, is_local ? " (local)" : "", no_wait ? " - no waiting" : ""); if (safe_str_eq(router_node, fsa_our_uname)) { is_local = TRUE; } value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT); if (crm_is_true(value)) { no_wait = TRUE; } if (is_local && safe_str_eq(task, CRM_OP_SHUTDOWN)) { /* defer until everything else completes */ crm_info("crm-event (%s) is a local shutdown", crm_str(id)); graph->completion_action = tg_shutdown; graph->abort_reason = "local shutdown"; action->confirmed = TRUE; update_graph(graph, action); trigger_graph(); return TRUE; } else if (safe_str_eq(task, CRM_OP_SHUTDOWN)) { crm_node_t *peer = crm_get_peer(0, router_node); crm_update_peer_expected(__FUNCTION__, peer, CRMD_JOINSTATE_DOWN); } cmd = create_request(task, action->xml, router_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL); counter = generate_transition_key(transition_graph->id, action->id, get_target_rc(action), te_uuid); crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter); rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_crmd, cmd, TRUE); free(counter); free_xml(cmd); if (rc == FALSE) { crm_err("Action %d failed: send", action->id); return FALSE; } else if (no_wait) { action->confirmed = TRUE; update_graph(graph, action); trigger_graph(); } else { if (action->timeout <= 0) { crm_err("Action %d: %s on %s had an invalid timeout (%dms). Using %dms instead", action->id, task, on_node, action->timeout, graph->network_delay); action->timeout = graph->network_delay; } te_start_action_timer(graph, action); } return TRUE; } gboolean cib_action_update(crm_action_t * action, int status, int op_rc) { lrmd_event_data_t *op = NULL; xmlNode *state = NULL; xmlNode *rsc = NULL; xmlNode *xml_op = NULL; xmlNode *action_rsc = NULL; int rc = pcmk_ok; const char *name = NULL; const char *value = NULL; const char *rsc_id = NULL; const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); const char *task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); const char *target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); int call_options = cib_quorum_override | cib_scope_local; int target_rc = get_target_rc(action); if (status == PCMK_LRM_OP_PENDING) { crm_debug("%s %d: Recording pending operation %s on %s", crm_element_name(action->xml), action->id, task_uuid, target); } else { crm_warn("%s %d: %s on %s timed out", crm_element_name(action->xml), action->id, task_uuid, target); } action_rsc = find_xml_node(action->xml, XML_CIB_TAG_RESOURCE, TRUE); if (action_rsc == NULL) { return FALSE; } rsc_id = ID(action_rsc); CRM_CHECK(rsc_id != NULL, crm_log_xml_err(action->xml, "Bad:action"); return FALSE); /* update the CIB */ state = create_xml_node(NULL, XML_CIB_TAG_STATE); crm_xml_add(state, XML_ATTR_UUID, target_uuid); crm_xml_add(state, XML_ATTR_UNAME, target); rsc = create_xml_node(state, XML_CIB_TAG_LRM); crm_xml_add(rsc, XML_ATTR_ID, target_uuid); rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCES); rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCE); crm_xml_add(rsc, XML_ATTR_ID, rsc_id); name = XML_ATTR_TYPE; value = crm_element_value(action_rsc, name); crm_xml_add(rsc, name, value); name = XML_AGENT_ATTR_CLASS; value = crm_element_value(action_rsc, name); crm_xml_add(rsc, name, value); name = XML_AGENT_ATTR_PROVIDER; value = crm_element_value(action_rsc, name); crm_xml_add(rsc, name, value); op = convert_graph_action(NULL, action, status, op_rc); op->call_id = -1; op->user_data = generate_transition_key(transition_graph->id, action->id, target_rc, te_uuid); xml_op = create_operation_update(rsc, op, CRM_FEATURE_SET, target_rc, __FUNCTION__, LOG_INFO); lrmd_free_event(op); crm_trace("Updating CIB with \"%s\" (%s): %s %s on %s", status < 0 ? "new action" : XML_ATTR_TIMEOUT, crm_element_name(action->xml), crm_str(task), rsc_id, target); crm_log_xml_trace(xml_op, "Op"); rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, state, call_options); crm_trace("Updating CIB with %s action %d: %s on %s (call_id=%d)", services_lrm_status_str(status), action->id, task_uuid, target, rc); fsa_register_cib_callback(rc, FALSE, NULL, cib_action_updated); free_xml(state); action->sent_update = TRUE; if (rc < pcmk_ok) { return FALSE; } return TRUE; } static gboolean te_rsc_command(crm_graph_t * graph, crm_action_t * action) { /* never overwrite stop actions in the CIB with * anything other than completed results * * Writing pending stops makes it look like the * resource is running again */ xmlNode *cmd = NULL; xmlNode *rsc_op = NULL; gboolean rc = TRUE; gboolean no_wait = FALSE; gboolean is_local = FALSE; char *counter = NULL; const char *task = NULL; const char *value = NULL; const char *on_node = NULL; const char *router_node = NULL; const char *task_uuid = NULL; CRM_ASSERT(action != NULL); CRM_ASSERT(action->xml != NULL); action->executed = FALSE; on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); CRM_CHECK(on_node != NULL && strlen(on_node) != 0, crm_err("Corrupted command(id=%s) %s: no node", ID(action->xml), crm_str(task)); return FALSE); rsc_op = action->xml; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); router_node = crm_element_value(rsc_op, XML_LRM_ATTR_ROUTER_NODE); if (!router_node) { router_node = on_node; } counter = generate_transition_key(transition_graph->id, action->id, get_target_rc(action), te_uuid); crm_xml_add(rsc_op, XML_ATTR_TRANSITION_KEY, counter); if (safe_str_eq(router_node, fsa_our_uname)) { is_local = TRUE; } value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT); if (crm_is_true(value)) { no_wait = TRUE; } crm_notice("Initiating action %d: %s %s on %s%s%s", action->id, task, task_uuid, on_node, is_local ? " (local)" : "", no_wait ? " - no waiting" : ""); cmd = create_request(CRM_OP_INVOKE_LRM, rsc_op, router_node, CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL); if (is_local) { /* shortcut local resource commands */ ha_msg_input_t data = { .msg = cmd, .xml = rsc_op, }; fsa_data_t msg = { .id = 0, .data = &data, .data_type = fsa_dt_ha_msg, .fsa_input = I_NULL, .fsa_cause = C_FSA_INTERNAL, .actions = A_LRM_INVOKE, .origin = __FUNCTION__, }; do_lrm_invoke(A_LRM_INVOKE, C_FSA_INTERNAL, fsa_state, I_NULL, &msg); } else { rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_lrmd, cmd, TRUE); } free(counter); free_xml(cmd); action->executed = TRUE; if (rc == FALSE) { crm_err("Action %d failed: send", action->id); return FALSE; } else if (no_wait) { crm_info("Action %d confirmed - no wait", action->id); action->confirmed = TRUE; update_graph(transition_graph, action); trigger_graph(); } else { if (action->timeout <= 0) { crm_err("Action %d: %s %s on %s had an invalid timeout (%dms). Using %dms instead", action->id, task, task_uuid, on_node, action->timeout, graph->network_delay); action->timeout = graph->network_delay; } te_start_action_timer(graph, action); } value = crm_meta_value(action->params, XML_OP_ATTR_PENDING); - if (crm_is_true(value) && safe_str_neq(task, CRMD_ACTION_CANCEL)) { + if (crm_is_true(value) + && safe_str_neq(task, CRMD_ACTION_CANCEL) + && safe_str_neq(task, CRMD_ACTION_DELETE)) { /* write a "pending" entry to the CIB, inhibit notification */ crm_debug("Recording pending op %s in the CIB", task_uuid); cib_action_update(action, PCMK_LRM_OP_PENDING, PCMK_EXECRA_STATUS_UNKNOWN); } return TRUE; } crm_graph_functions_t te_graph_fns = { te_pseudo_action, te_rsc_command, te_crm_command, te_fence_node }; void notify_crmd(crm_graph_t * graph) { const char *type = "unknown"; enum crmd_fsa_input event = I_NULL; crm_debug("Processing transition completion in state %s", fsa_state2string(fsa_state)); CRM_CHECK(graph->complete, graph->complete = TRUE); switch (graph->completion_action) { case tg_stop: type = "stop"; /* fall through */ case tg_done: type = "done"; if (fsa_state == S_TRANSITION_ENGINE) { event = I_TE_SUCCESS; } break; case tg_restart: type = "restart"; if (fsa_state == S_TRANSITION_ENGINE) { - if (transition_timer->period_ms > 0) { - crm_timer_stop(transition_timer); - crm_timer_start(transition_timer); - } else if (too_many_st_failures() == FALSE) { - event = I_PE_CALC; + if (too_many_st_failures() == FALSE) { + if (transition_timer->period_ms > 0) { + crm_timer_stop(transition_timer); + crm_timer_start(transition_timer); + } else { + event = I_PE_CALC; + } } else { event = I_TE_SUCCESS; } } else if (fsa_state == S_POLICY_ENGINE) { register_fsa_action(A_PE_INVOKE); } break; case tg_shutdown: type = "shutdown"; if (is_set(fsa_input_register, R_SHUTDOWN)) { event = I_STOP; } else { crm_err("We didn't ask to be shut down, yet our" " PE is telling us too."); event = I_TERMINATE; } } crm_debug("Transition %d status: %s - %s", graph->id, type, crm_str(graph->abort_reason)); graph->abort_reason = NULL; graph->completion_action = tg_done; clear_bit(fsa_input_register, R_IN_TRANSITION); if (event != I_NULL) { register_fsa_input(C_FSA_INTERNAL, event, NULL); } else if (fsa_source) { mainloop_set_trigger(fsa_source); } } diff --git a/crmd/te_utils.c b/crmd/te_utils.c index 5d7b73c9f4..ae4c5de396 100644 --- a/crmd/te_utils.c +++ b/crmd/te_utils.c @@ -1,425 +1,427 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include crm_trigger_t *stonith_reconnect = NULL; GListPtr stonith_cleanup_list = NULL; static gboolean fail_incompletable_stonith(crm_graph_t * graph) { GListPtr lpc = NULL; const char *task = NULL; xmlNode *last_action = NULL; if (graph == NULL) { return FALSE; } for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) { GListPtr lpc2 = NULL; synapse_t *synapse = (synapse_t *) lpc->data; if (synapse->confirmed) { continue; } for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) { crm_action_t *action = (crm_action_t *) lpc2->data; if (action->type != action_type_crm || action->confirmed) { continue; } task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); if (task && safe_str_eq(task, CRM_OP_FENCE)) { action->failed = TRUE; last_action = action->xml; update_graph(graph, action); crm_notice("Failing action %d (%s): STONITHd terminated", action->id, ID(action->xml)); } } } if (last_action != NULL) { crm_warn("STONITHd failure resulted in un-runnable actions"); abort_transition(INFINITY, tg_restart, "Stonith failure", last_action); return TRUE; } return FALSE; } static void tengine_stonith_connection_destroy(stonith_t * st, stonith_event_t * e) { if (is_set(fsa_input_register, R_ST_REQUIRED)) { crm_crit("Fencing daemon connection failed"); mainloop_set_trigger(stonith_reconnect); } else { crm_info("Fencing daemon disconnected"); } /* cbchan will be garbage at this point, arrange for it to be reset */ stonith_api->state = stonith_disconnected; if (AM_I_DC) { fail_incompletable_stonith(transition_graph); trigger_graph(); } } #if SUPPORT_CMAN # include #endif static void tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event) { static char *client_id = NULL; if(client_id == NULL) { client_id = g_strdup_printf("%s.%d", crm_system_name, getpid()); } if (st_event == NULL) { crm_err("Notify data not found"); return; } if (st_event->result == pcmk_ok && crm_str_eq(st_event->target, fsa_our_uname, TRUE)) { crm_err("We were alegedly just fenced by %s for %s!", st_event->executioner, st_event->origin); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__); return; } if (st_event->result == pcmk_ok && safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) { reset_st_fail_count(st_event->target); } crm_notice("Peer %s was%s terminated (%s) by %s for %s: %s (ref=%s) by client %s", st_event->target, st_event->result == pcmk_ok ? "" : " not", - st_event->operation, + st_event->action, st_event->executioner ? st_event->executioner : "", st_event->origin, pcmk_strerror(st_event->result), st_event->id, st_event->client_origin ? st_event->client_origin : ""); #if SUPPORT_CMAN if (st_event->result == pcmk_ok && is_cman_cluster()) { int local_rc = 0; char *target_copy = strdup(st_event->target); /* In case fenced hasn't noticed yet * * Any fencing that has been inititated will be completed by way of the fence_pcmk redirect */ local_rc = fenced_external(target_copy); if (local_rc != 0) { crm_err("Could not notify CMAN that '%s' is now fenced: %d", st_event->target, local_rc); } else { crm_notice("Notified CMAN that '%s' is now fenced", st_event->target); } free(target_copy); } #endif if (st_event->result == pcmk_ok) { const char *uuid = get_uuid(st_event->target); gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname); crm_trace("target=%s dc=%s", st_event->target, fsa_our_dc); if(AM_I_DC) { /* The DC always sends updates */ send_stonith_update(NULL, st_event->target, uuid); if (st_event->client_origin && safe_str_neq(st_event->client_origin, client_id)) { /* Abort the current transition graph if it wasn't us * that invoked stonith to fence someone */ crm_info("External fencing operation from %s fenced %s", st_event->client_origin, st_event->target); abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL); } /* Assume it was our leader if we dont currently have one */ } else if (fsa_our_dc == NULL || safe_str_eq(fsa_our_dc, st_event->target)) { crm_notice("Target %s our leader %s (recorded: %s)", fsa_our_dc ? "was" : "may have been", st_event->target, fsa_our_dc ? fsa_our_dc : ""); /* Given the CIB resyncing that occurs around elections, * have one node update the CIB now and, if the new DC is different, * have them do so too after the election */ if (we_are_executioner) { send_stonith_update(NULL, st_event->target, uuid); } stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(st_event->target)); } } } gboolean te_connect_stonith(gpointer user_data) { int lpc = 0; int rc = pcmk_ok; if (stonith_api == NULL) { stonith_api = stonith_api_new(); } if (stonith_api->state != stonith_disconnected) { crm_trace("Still connected"); return TRUE; } for (lpc = 0; lpc < 30; lpc++) { crm_debug("Attempting connection to fencing daemon..."); sleep(1); rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL); if (rc == pcmk_ok) { break; } if (user_data != NULL) { crm_err("Sign-in failed: triggered a retry"); mainloop_set_trigger(stonith_reconnect); return TRUE; } crm_err("Sign-in failed: pausing and trying again in 2s..."); sleep(1); } CRM_CHECK(rc == pcmk_ok, return TRUE); /* If not, we failed 30 times... just get out */ stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT, tengine_stonith_connection_destroy); stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_FENCE, tengine_stonith_notify); crm_trace("Connected"); return TRUE; } gboolean stop_te_timer(crm_action_timer_t * timer) { const char *timer_desc = "action timer"; if (timer == NULL) { return FALSE; } if (timer->reason == timeout_abort) { timer_desc = "global timer"; crm_trace("Stopping %s", timer_desc); } if (timer->source_id != 0) { crm_trace("Stopping %s", timer_desc); g_source_remove(timer->source_id); timer->source_id = 0; } else { crm_trace("%s was already stopped", timer_desc); return FALSE; } return TRUE; } gboolean te_graph_trigger(gpointer user_data) { enum transition_status graph_rc = -1; if (transition_graph == NULL) { crm_debug("Nothing to do"); return TRUE; } crm_trace("Invoking graph %d in state %s", transition_graph->id, fsa_state2string(fsa_state)); switch (fsa_state) { case S_STARTING: case S_PENDING: case S_NOT_DC: case S_HALT: case S_ILLEGAL: case S_STOPPING: case S_TERMINATE: return TRUE; break; default: break; } if (transition_graph->complete == FALSE) { graph_rc = run_graph(transition_graph); print_graph(LOG_DEBUG_3, transition_graph); if (graph_rc == transition_active) { crm_trace("Transition not yet complete"); return TRUE; } else if (graph_rc == transition_pending) { crm_trace("Transition not yet complete - no actions fired"); return TRUE; } if (graph_rc != transition_complete) { crm_warn("Transition failed: %s", transition_status(graph_rc)); print_graph(LOG_NOTICE, transition_graph); } } crm_debug("Transition %d is now complete", transition_graph->id); transition_graph->complete = TRUE; notify_crmd(transition_graph); return TRUE; } void trigger_graph_processing(const char *fn, int line) { mainloop_set_trigger(transition_trigger); crm_trace("%s:%d - Triggered graph processing", fn, line); } void abort_transition_graph(int abort_priority, enum transition_action abort_action, const char *abort_text, xmlNode * reason, const char *fn, int line) { const char *magic = NULL; CRM_CHECK(transition_graph != NULL, return); if (reason) { int diff_add_updates = 0; int diff_add_epoch = 0; int diff_add_admin_epoch = 0; int diff_del_updates = 0; int diff_del_epoch = 0; int diff_del_admin_epoch = 0; const char *uname = ""; xmlNode *search = reason; xmlNode *diff = get_xpath_object("//" F_CIB_UPDATE_RESULT "//diff", reason, LOG_DEBUG_2); magic = crm_element_value(reason, XML_ATTR_TRANSITION_MAGIC); while(search) { const char *kind = TYPE(search); if(safe_str_eq(XML_CIB_TAG_STATE, kind) || safe_str_eq(XML_CIB_TAG_NODE, kind)) { uname = get_uname(ID(search)); } search = search->parent; } if (diff) { cib_diff_version_details(diff, &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates); if (crm_str_eq(TYPE(reason), XML_CIB_TAG_NVPAIR, TRUE)) { crm_info ("%s:%d - Triggered transition abort (complete=%d, node=%s, tag=%s, id=%s, name=%s, value=%s, magic=%s, cib=%d.%d.%d) : %s", fn, line, transition_graph->complete, uname, TYPE(reason), ID(reason), NAME(reason), VALUE(reason), magic ? magic : "NA", diff_add_admin_epoch, diff_add_epoch, diff_add_updates, abort_text); } else { crm_info ("%s:%d - Triggered transition abort (complete=%d, node=%s, tag=%s, id=%s, magic=%s, cib=%d.%d.%d) : %s", fn, line, transition_graph->complete, uname, TYPE(reason), ID(reason), magic ? magic : "NA", diff_add_admin_epoch, diff_add_epoch, diff_add_updates, abort_text); } } else { crm_info ("%s:%d - Triggered transition abort (complete=%d, node=%s, tag=%s, id=%s, magic=%s) : %s", fn, line, transition_graph->complete, uname, TYPE(reason), ID(reason), magic ? magic : "NA", abort_text); } } else { crm_info("%s:%d - Triggered transition abort (complete=%d) : %s", fn, line, transition_graph->complete, abort_text); } switch (fsa_state) { case S_STARTING: case S_PENDING: case S_NOT_DC: case S_HALT: case S_ILLEGAL: case S_STOPPING: case S_TERMINATE: crm_info("Abort suppressed: state=%s (complete=%d)", fsa_state2string(fsa_state), transition_graph->complete); return; default: break; } if (magic == NULL && reason != NULL) { crm_log_xml_debug(reason, "Cause"); } /* Make sure any queued calculations are discarded ASAP */ free(fsa_pe_ref); fsa_pe_ref = NULL; if (transition_graph->complete) { - if (transition_timer->period_ms > 0) { - crm_timer_stop(transition_timer); - crm_timer_start(transition_timer); - } else if (too_many_st_failures() == FALSE) { - register_fsa_input(C_FSA_INTERNAL, I_PE_CALC, NULL); + if (too_many_st_failures() == FALSE) { + if (transition_timer->period_ms > 0) { + crm_timer_stop(transition_timer); + crm_timer_start(transition_timer); + } else { + register_fsa_input(C_FSA_INTERNAL, I_PE_CALC, NULL); + } } else { register_fsa_input(C_FSA_INTERNAL, I_TE_SUCCESS, NULL); } return; } update_abort_priority(transition_graph, abort_priority, abort_action, abort_text); mainloop_set_trigger(transition_trigger); } diff --git a/doc/Pacemaker_Remote/en-US/Ch-Future.txt b/doc/Pacemaker_Remote/en-US/Ch-Future.txt new file mode 100644 index 0000000000..93c082f328 --- /dev/null +++ b/doc/Pacemaker_Remote/en-US/Ch-Future.txt @@ -0,0 +1,15 @@ += Future Features = + +Basic KVM and Linux container integration was the first phase of development for pacemaker_remote and was completed for Pacemaker v1.1.10. Here are some planned features that expand upon this initial functionality. + +== Libvirt Sandbox Support == + +Once the libvirt-sandbox project is integrated with pacemaker_remote, we will gain the ability to preform per-resource linux container isolation with very little performance impact. This functionality will allow resources living on a single node to be isolated from one another. At that point CPU and memory limits could be set per-resource dynamically just using the cluster config. + +== Bare-metal Support == + +The pacemaker_remote daemon already has the ability to run on bare-metal hardware nodes, but the policy engine logic for integrating bare-metal nodes is not complete. There are some complications involved with understanding a bare-metal node's state that virtual nodes don't have. Once this logic is complete, pacemaker will be able to integrate bare-metal nodes in the same way virtual remote-nodes currently are. Some special considerations for fencing will need to be addressed. + +== KVM Migration Support == + +Pacemaker's policy engine is limited in its ability to perform live migrations of KVM resources when resource dependencies are involved. This limitation affects how resources living within a KVM remote-node are handled when a live migration takes place. Currently when a live migration is performed on a KVM remote-node, all the resources within that remote-node have to be stopped before the migration takes place and started once again after migration has finished. This policy engine limitation is fully explained in this bug report, http://bugs.clusterlabs.org/show_bug.cgi?id=5055#c3 diff --git a/doc/Pacemaker_Remote/en-US/Ch-Intro.txt b/doc/Pacemaker_Remote/en-US/Ch-Intro.txt index a505ea1916..c7b3001f38 100644 --- a/doc/Pacemaker_Remote/en-US/Ch-Intro.txt +++ b/doc/Pacemaker_Remote/en-US/Ch-Intro.txt @@ -1,55 +1,55 @@ = Extending High Availability Cluster into Virtual Nodes = == Overview == -The recent addition of the +pacemaker_remote+ service supported by +Pacemaker version 1.1.10 and greater+ allows nodes not running the cluster stack (pacemaker+corosync) to integrate into the cluster and have the cluster manage their resources just as if they were a real cluster node. This means that pacemaker clusters are now capable of managing both launching virtual environments (KVM/LXC) as well as launching the resources that live withing those virtual environments without requiring the virtual environments to run pacemaker or corosync. +The recent addition of the +pacemaker_remote+ service supported by +Pacemaker version 1.1.10 and greater+ allows nodes not running the cluster stack (pacemaker+corosync) to integrate into the cluster and have the cluster manage their resources just as if they were a real cluster node. This means that pacemaker clusters are now capable of managing both launching virtual environments (KVM/LXC) as well as launching the resources that live within those virtual environments without requiring the virtual environments to run pacemaker or corosync. == Terms == +cluster-node+ - A baremetal hardware node running the High Availability stack (pacemaker + corosync) +remote-node+ - A virtual guest node running the pacemaker_remote service. +pacemaker_remote+ - A service daemon capable of performing remote application management within virtual guests (kvm and lxc) in both pacemaker cluster environments and standalone (non-cluster) environments. This service is an enhanced version of pacemaker's local resource manage daemon (LRMD) that is capable of managing and monitoring LSB, OCF, upstart, and systemd resources on a guest remotely. It also allows for most of pacemaker's cli tools (crm_mon, crm_resource, crm_master, crm_attribute, ect..) to work natively on remote-nodes. +LXC+ - A Linux Container defined by the libvirt-lxc Linux container driver. http://libvirt.org/drvlxc.html == Virtual Machine Use Case == The use of pacemaker_remote in virtual machines solves a deployment scenario that has traditionally been difficult to solve. +"I want a pacemaker cluster to manage virtual machine resources, but I also want pacemaker to be able to manage the resources that live within those virtual machines."+ In the past, users desiring this deployment had to make a decision. They would either have to sacrifice the ability of monitoring resources residing within virtual guests by running the cluster stack on the baremetal nodes, or run another cluster instance on the virtual guests where they potentially run into corosync scalability issues. There is a third scenario where the virtual guests run the cluster stack and join the same network as the baremetal nodes, but that can quickly hit issues with scalability as well. With the pacemaker_remote service we have a new option. * The baremetal cluster-nodes run the cluster stack (paceamaker+corosync). * The virtual remote-nodes run the pacemaker_remote service (nearly zero configuration required on the virtual machine side) * The cluster stack on the cluster-nodes launch the virtual machines and immediately connect to the pacemaker_remote service, allowing the virtual machines to integrate into the cluster just as if they were a real cluster-node. The key difference here between the virtual machine remote-nodes and the cluster-nodes is that the remote-nodes are not running the cluster stack. This means the remote nodes will never become the DC, and they do not take place in quorum. On the hand this also means that the remote-nodes are not bound to the scalability limits associated with the cluster stack either. +No 16 node corosync member limits+ to deal with. That isn't to say remote-nodes can scale indefinitely, but the expectation is that remote-nodes scale horizontally much further than cluster-nodes. Other than the quorum limitation, these remote-nodes behave just like cluster nodes in respects to resource management. The cluster is fully capable of managing and monitoring resources on each remote-node. You can build constraints against remote-nodes, put them in standby, or whatever else you'd expect to be able to do with normal cluster-nodes. They even show up in the crm_mon output as you would expect cluster-nodes to. To solidify the concept, an example cluster deployment integrating remote-nodes could look like this. * 16 cluster-nodes running corosync+pacemaker stack. * 64 pacemaker managed virtual machine resources running pacemaker_remote configured as remote-nodes. * 64 pacemaker managed webserver and database resources configured to run on the 64 remote-nodes. With this deployment you would have 64 webservers and databases running on 64 virtual machines on 16 hardware nodes all of which are managed and monitored by the same pacemaker deployment. == Linux Container Use Case == +I want to isolate and limit the system resources (cpu, memory, filesystem) a cluster resource can consume without using virtual machines.+ Using pacemaker_remote with Linux containers (libvirt-lxc) opens up some interesting possibilities for isolating resources in the cluster without the use of a hypervisor. We now have the ability to both define a contained environment with cpu and memory utilization limits and then assign resources to that contained environment all managed from within pacemaker. The LXC Walk-through section of this document outlines how pacemaker_remote can be used to bring Linux containers into the cluster as remote-nodes capable of executing resources. == Expanding the Cluster Stack == === Traditional HA Stack === image::images/pcmk-ha-cluster-stack.png["The Traditional Pacemaker Corosync HA Stack.",width="17cm",height="9cm",align="center"] === Remote-Node Enabled HA Stack === The stack grows one additional layer vertical so we can go further horizontal. image::images/pcmk-ha-remote-stack.png["Placing Pacemaker Remote into the Traditional HA Stack.",width="20cm",height="10cm",align="center"] diff --git a/doc/Pacemaker_Remote/en-US/Ch-Options.txt b/doc/Pacemaker_Remote/en-US/Ch-Options.txt index 17e8a34faf..9e14b310c4 100644 --- a/doc/Pacemaker_Remote/en-US/Ch-Options.txt +++ b/doc/Pacemaker_Remote/en-US/Ch-Options.txt @@ -1,51 +1,51 @@ = Configuration Explained = The walk-through examples use some of these options, but don't explain exactly what they mean or do. This section is meant to be the go-to resource for all the options available for configuring remote-nodes. == Resource Options == When configuring a virtual machine or lxc resource to act as a remote-node, these are the metadata options available to both enable the resource as a remote-node and define the connection parameters. -.Metadata Options for configurint KVM/LXC resources as remote-nodes +.Metadata Options for configuring KVM/LXC resources as remote-nodes [width="95%",cols="1m,1,4<",options="header",align="center"] |========================================================= |Option |Default |Description |+remote-node+ | |The name of the remote-node this resource defines. This both enables the resource as a remote-node and defines the unique name used to identify the remote-node. If no other parameters are set, this value will also be assumed as the hostname to connect to at port 3121. +WARNING+ This value cannot overlap with any resource or node IDs. |+remote-port+ |3121 |Configure a custom port to use for the guest connection to pacemaker_remote. |+remote-addr+ |+remote-node+ value used as hostname |The ip address or hostname to connect to if remote-node's name is not the hostname of the guest. |+remote-connect-timeout+ |60s |How long before a pending guest connection will time out. |========================================================= == Host and Guest Authentication == Authentication and encryption of the connection between cluster-nodes (pacemaker) to remote-nodes (pacemaker_remote) is achieved using TLS with PSK encryption/authentication on +tcp port 3121+. This means both the cluster-node and remote-node must share the same private key. By default this +key must be placed at "/etc/pacemaker/authkey" on both cluster-nodes and remote-nodes+. == Pacemaker and pacemaker_remote Options == If you need to change the default port or authkey location for either pacemaker or pacemaker_remote, there are environment variables you can set that affect both of those daemons. These environment variables can be enabled by placing them in the /etc/sysconfig/pacemaker file. [source,C] ---- #==#==# Pacemaker Remote # Use a custom directory for finding the authkey. PCMK_authkey_location=/etc/pacemaker/authkey # # Specify a custom port for Pacemaker Remote connections PCMK_remote_port=3121 ---- diff --git a/doc/Pacemaker_Remote/en-US/Pacemaker_Remote.xml b/doc/Pacemaker_Remote/en-US/Pacemaker_Remote.xml index f41181b99f..9ee710c1c0 100644 --- a/doc/Pacemaker_Remote/en-US/Pacemaker_Remote.xml +++ b/doc/Pacemaker_Remote/en-US/Pacemaker_Remote.xml @@ -1,16 +1,17 @@ %BOOK_ENTITIES; ]> + diff --git a/doc/Pacemaker_Remote/en-US/Revision_History.xml b/doc/Pacemaker_Remote/en-US/Revision_History.xml index 0d7829ff10..26d8ab6b48 100644 --- a/doc/Pacemaker_Remote/en-US/Revision_History.xml +++ b/doc/Pacemaker_Remote/en-US/Revision_History.xml @@ -1,19 +1,25 @@ %BOOK_ENTITIES; ]> Revision History 1 Tue Mar 19 2013 DavidVosseldvossel@redhat.com Import from Pages.app + + 2 + Tue May 13 2013 + DavidVosseldvossel@redhat.com + Added Future Features Section + diff --git a/doc/pcs-crmsh-quick-ref.md b/doc/pcs-crmsh-quick-ref.md new file mode 100644 index 0000000000..a28960fce1 --- /dev/null +++ b/doc/pcs-crmsh-quick-ref.md @@ -0,0 +1,159 @@ +## Display the configuration + + crmsh # crm configure show + pcs # pcs cluster cib + +## Display the current status + + crmsh # crm_mon -1 + pcs # pcs status + +## Node standby + + crmsh # crm node standby + pcs # pcs cluster standby pcmk-1 + + crmsh # crm node online + pcs # pcs cluster unstandby pcmk-1 + +## Setting configuration options + + crmsh # crm configure property stonith-enabled=false + pcs # pcs property set stonith-enabled=false + +## Listing available resources + + crmsh # crm ra classes + pcs # pcs resource standards + + crmsh # crm ra list ocf pacemaker + pcs # pcs resource agents ocf:pacemaker + +## Creating a resource + + crmsh # crm configure primitive ClusterIP ocf:heartbeat:IPaddr2 \ + params ip=192.168.122.120 cidr_netmask=32 \ + op monitor interval=30s + pcs # pcs resource create ClusterIP IPaddr2 ip=192.168.0.120 cidr_netmask=32 + +The standard and provider (`ocf:heartbeat`) are determined automatically since `IPaddr2` is unique. +The monitor operation is automatically created based on the agent's metadata. + +## Start a resource + crmsh # crm resource start ClusterIP + pcs # pcs resource start ClusterIP + +## Stop a resource + + crmsh # crm resource stop ClusterIP + pcs # pcs resource stop ClusterIP + +## Remove a resource + + crmsh # crm configure delete ClusterIP + pcs # + +## Update a resource + crmsh # crm configure edit ClusterIP + pcs # pcs resource update ClusterIP clusterip_hash=sourceip + +## Display a resource + + crmsh # + pcs # pcs resource show WebFS + +## Resource defaults + + crmsh # crm configure rsc_defaults resource-stickiness=100 + pcs # pcs rsc defaults resource-stickiness=100 + +Listing the current defaults: + + pcs # pcs rsc defaults + +## Operation defaults + + crmsh # crm configure op_defaults timeout=240s + pcs # pcs resource op defaults timeout=240s + +Listing the current defaults: + pcs # pcs resource op defaults + +## Colocation + + crmsh # crm configure colocation website-with-ip INFINITY: WebSite ClusterIP + pcs # pcs constraint colocation add WebSite ClusterIP INFINITY + +With roles + + crmsh # + pcs # + +## Start/stop ordering + + crmsh # crm configure order apache-after-ip mandatory: ClusterIP WebSite + pcs # pcs constraint order ClusterIP then WebSite + +With roles: + + crmsh # + pcs # + +## Preferred locations + + crmsh # crm configure location prefer-pcmk-1 WebSite 50: pcmk-1 + pcs # pcs constraint location WebSite prefers pcmk-1=50 + +With roles: + + crmsh # + pcs # + +## Moving resources + + crmsh # crm resource move WebSite pcmk-1 + pcs # pcs constraint location WebSite prefers pcmk-1=INFINITY + + crmsh # crm resource unmove WebSite + pcs # pcs constraint rm location-WebSite-pcmk-1-INFINITY + +## Creating a clone + + crmsh # configure clone WebIP ClusterIP meta globally-unique="true" clone-max="2" clone-node-max="2" + pcs # pcs resource clone ClusterIP globally-unique=true clone-max=2 clone-node-max=2 + +## Creating a master/slave clone + + crmsh # crm configure ms WebDataClone WebData \ + meta master-max=1 master-node-max=1 \ + clone-max=2 clone-node-max=1 notify=true + pcs # resource master WebDataClone WebData \ + master-max=1 master-node-max=1 clone-max=2 clone-node-max=1 \ + notify=true + +## ... + crmsh # + pcs # + + crmsh # + pcs # + + +## Batch changes + + crmsh # crm + crmsh # cib new drbd_cfg + crmsh # configure primitive WebData ocf:linbit:drbd params drbd_resource=wwwdata \ + op monitor interval=60s + crmsh # configure ms WebDataClone WebData meta master-max=1 master-node-max=1 \ + clone-max=2 clone-node-max=1 notify=true + crmsh # cib commit drbd_cfg + crmsh # quit + + + pcs # pcs cluster cib drbd_cfg + pcs # pcs -f drbd_cfg resource create WebData ocf:linbit:drbd drbd_resource=wwwdata \ + op monitor interval=60s + pcs # pcs -f drbd_cfg resource master WebDataClone WebData master-max=1 master-node-max=1 \ + clone-max=2 clone-node-max=1 notify=true + pcs # pcs cluster push cib drbd_cfg