Page MenuHomeClusterLabs Projects

No OneTemporary

This file is larger than 256 KB, so syntax highlighting was skipped.
diff --git a/.gitignore b/.gitignore
index 847e48490c..03df200f01 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,243 +1,242 @@
# Common
\#*
.\#*
GPATH
GRTAGS
GTAGS
TAGS
Makefile
Makefile.in
.deps
.dirstamp
.libs
*.pc
*.pyc
*.bz2
*.tar.gz
*.tgz
*.la
*.lo
*.o
*~
*.gcda
*.gcno
# Autobuild
aclocal.m4
autoconf
autoheader
autom4te.cache/
automake
build.counter
compile
/confdefs.h
config.guess
config.log
config.status
config.sub
configure
/conftest*
depcomp
install-sh
include/stamp-*
libtool
libtool.m4
ltdl.m4
libltdl
ltmain.sh
missing
py-compile
/m4/argz.m4
/m4/ltargz.m4
/m4/ltoptions.m4
/m4/ltsugar.m4
/m4/ltversion.m4
/m4/lt~obsolete.m4
test-driver
ylwrap
# Configure targets
/cts/CTS.py
/cts/CTSlab.py
/cts/CTSvars.py
/cts/LSBDummy
/cts/OCFIPraTest.py
/cts/benchmark/clubench
/cts/cluster_test
/cts/cts
/cts/cts-cli
/cts/cts-coverage
/cts/cts-exec
/cts/cts-fencing
/cts/cts-log-watcher
/cts/cts-regression
/cts/cts-scheduler
/cts/cts-support
/cts/fence_dummy
/cts/lxc_autogen.sh
/cts/pacemaker-cts-dummyd
/cts/pacemaker-cts-dummyd@.service
/daemons/execd/pacemaker_remote
/daemons/execd/pacemaker_remote.service
/daemons/fenced/fence_legacy
/daemons/pacemakerd/pacemaker
/daemons/pacemakerd/pacemaker.combined.upstart
/daemons/pacemakerd/pacemaker.service
/daemons/pacemakerd/pacemaker.upstart
/doc/Doxyfile
/extra/logrotate/pacemaker
/extra/resources/ClusterMon
/extra/resources/HealthSMART
/extra/resources/SysInfo
/extra/resources/ifspeed
/extra/resources/o2cb
include/config.h
include/config.h.in
include/crm_config.h
publican.cfg
/tools/cibsecret
/tools/crm_error
/tools/crm_failcount
/tools/crm_master
/tools/crm_mon.service
/tools/crm_mon.upstart
/tools/crm_report
/tools/crm_rule
/tools/crm_standby
/tools/pcmk_simtimes
/tools/report.collector
/tools/report.common
# Build targets
*.7
*.7.xml
*.7.html
*.8
*.8.xml
*.8.html
/daemons/attrd/pacemaker-attrd
/daemons/based/pacemaker-based
/daemons/based/cibmon
/daemons/controld/pacemaker-controld
/daemons/execd/cts-exec-helper
/daemons/execd/pacemaker-execd
/daemons/execd/pacemaker-remoted
/daemons/fenced/cts-fence-helper
/daemons/fenced/pacemaker-fenced
/daemons/fenced/pacemaker-fenced.xml
/daemons/pacemakerd/pacemakerd
/daemons/schedulerd/pacemaker-schedulerd
/daemons/schedulerd/pacemaker-schedulerd.xml
/doc/*/tmp/**
/doc/*/publish
/doc/*.build
/doc/*/en-US/Ap-*.xml
/doc/*/en-US/Ch-*.xml
/doc/.ABI-build
/doc/HTML
/doc/abi_dumps
/doc/abi-check
/doc/api/*
/doc/compat_reports
/doc/crm_fencing.html
/doc/publican-catalog*
/doc/shared/en-US/*.xml
/doc/shared/en-US/images/pcmk-*.png
/doc/shared/en-US/images/Policy-Engine-*.png
/doc/sphinx/*/_build
/doc/sphinx/*/conf.py
/lib/common/md5.c
/maint/testcc_helper.cc
/maint/testcc_*_h
/maint/mocked/based
scratch
/tools/attrd_updater
/tools/cibadmin
/tools/crmadmin
/tools/crm_attribute
/tools/crm_diff
/tools/crm_mon
/tools/crm_node
/tools/crm_resource
/tools/crm_shadow
/tools/crm_simulate
/tools/crm_ticket
/tools/crm_verify
/tools/iso8601
/tools/stonith_admin
xml/crm.dtd
xml/pacemaker*.rng
xml/versions.rng
xml/api/api-result*.rng
lib/gnu/libgnu.a
lib/gnu/stdalign.h
*.coverity
# Packager artifacts
*.rpm
/mock
/pacemaker.spec
/rpm/[A-Z]*
# make dist/export working directory
pacemaker-[a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9]
# Test detritus
/cts/.regression.failed.diff
/cts/scheduler/*.ref
/cts/scheduler/*.up
/cts/scheduler/*.up.err
/cts/scheduler/bug-rh-1097457.log
/cts/scheduler/bug-rh-1097457.trs
/cts/scheduler/shadow.*
/cts/test-suite.log
/xml/test-*/*.up
/xml/test-*/*.up.err
/xml/assets/*.rng
/xml/assets/diffview.js
/xml/assets/xmlcatalog
# Test results
*.log
*.trs
/lib/common/tests/strings/pcmk__parse_ll_range
/lib/common/tests/strings/pcmk__str_any_of
-/lib/common/tests/strings/pcmk__str_none_of
/lib/common/tests/utils/pcmk_str_is_infinity
/lib/common/tests/utils/pcmk_str_is_minus_infinity
/lib/pengine/tests/rules/pe_cron_range_satisfied
# Release maintenance detritus
/maint/gnulib
# Formerly built files (helps when jumping back and forth in checkout)
/.ABI-build
/Doxyfile
/HTML
/abi_dumps
/abi-check
/compat_reports
/attrd
/cib
/coverage.sh
/crmd
/cts/HBDummy
/doc/Clusters_from_Scratch.txt
/doc/Pacemaker_Explained.txt
/doc/acls.html
/fencing
/lrmd
/mcp
/pacemaker-*.spec
/pengine
#Other
coverity-*
logs
*.patch
*.diff
*.sed
*.orig
*.rej
*.swp
diff --git a/daemons/controld/controld_based.c b/daemons/controld/controld_based.c
index 919f590841..f2b25897c8 100644
--- a/daemons/controld/controld_based.c
+++ b/daemons/controld/controld_based.c
@@ -1,331 +1,331 @@
/*
* Copyright 2004-2020 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <unistd.h> /* sleep */
#include <crm/common/alerts_internal.h>
#include <crm/common/xml.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <pacemaker-controld.h>
int cib_retries = 0;
static void
do_cib_updated(const char *event, xmlNode * msg)
{
if (pcmk__alert_in_patchset(msg, TRUE)) {
mainloop_set_trigger(config_read);
}
}
static void
do_cib_replaced(const char *event, xmlNode * msg)
{
crm_debug("Updating the CIB after a replace: DC=%s", AM_I_DC ? "true" : "false");
if (AM_I_DC == FALSE) {
return;
} else if (fsa_state == S_FINALIZE_JOIN && is_set(fsa_input_register, R_CIB_ASKED)) {
/* no need to restart the join - we asked for this replace op */
return;
}
/* start the join process again so we get everyone's LRM status */
populate_cib_nodes(node_update_quick|node_update_all, __FUNCTION__);
register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
}
/* A_CIB_STOP, A_CIB_START, O_CIB_RESTART */
void
do_cib_control(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
CRM_ASSERT(fsa_cib_conn != NULL);
if (action & A_CIB_STOP) {
if (fsa_cib_conn->state != cib_disconnected && last_resource_update != 0) {
crm_info("Waiting for resource update %d to complete", last_resource_update);
crmd_fsa_stall(FALSE);
return;
}
crm_info("Disconnecting from the CIB manager");
clear_bit(fsa_input_register, R_CIB_CONNECTED);
fsa_cib_conn->cmds->del_notify_callback(fsa_cib_conn, T_CIB_DIFF_NOTIFY, do_cib_updated);
if (fsa_cib_conn->state != cib_disconnected) {
fsa_cib_conn->cmds->set_slave(fsa_cib_conn, cib_scope_local);
fsa_cib_conn->cmds->signoff(fsa_cib_conn);
}
crm_notice("Disconnected from the CIB manager");
}
if (action & A_CIB_START) {
int rc = pcmk_ok;
if (cur_state == S_STOPPING) {
crm_err("Ignoring request to connect to the CIB manager after shutdown");
return;
}
rc = fsa_cib_conn->cmds->signon(fsa_cib_conn, CRM_SYSTEM_CRMD, cib_command_nonblocking);
if (rc != pcmk_ok) {
/* a short wait that usually avoids stalling the FSA */
sleep(1);
rc = fsa_cib_conn->cmds->signon(fsa_cib_conn, CRM_SYSTEM_CRMD, cib_command_nonblocking);
}
if (rc != pcmk_ok) {
crm_info("Could not connect to the CIB manager: %s", pcmk_strerror(rc));
} else if (pcmk_ok !=
fsa_cib_conn->cmds->set_connection_dnotify(fsa_cib_conn,
crmd_cib_connection_destroy)) {
crm_err("Could not set dnotify callback");
} else if (pcmk_ok !=
fsa_cib_conn->cmds->add_notify_callback(fsa_cib_conn, T_CIB_REPLACE_NOTIFY,
do_cib_replaced)) {
crm_err("Could not set CIB notification callback (replace)");
} else if (pcmk_ok !=
fsa_cib_conn->cmds->add_notify_callback(fsa_cib_conn, T_CIB_DIFF_NOTIFY,
do_cib_updated)) {
crm_err("Could not set CIB notification callback (update)");
} else {
set_bit(fsa_input_register, R_CIB_CONNECTED);
cib_retries = 0;
}
if (is_not_set(fsa_input_register, R_CIB_CONNECTED)) {
cib_retries++;
crm_warn("Couldn't complete CIB registration %d"
" times... pause and retry", cib_retries);
if (cib_retries < 30) {
controld_start_timer(wait_timer);
crmd_fsa_stall(FALSE);
} else {
crm_err("Could not complete CIB"
" registration %d times..." " hard error", cib_retries);
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
}
}
}
/*!
* \internal
* \brief Get CIB call options to use local scope if master unavailable
*
* \return CIB call options
*/
int crmd_cib_smart_opt()
{
int call_opt = cib_quorum_override;
if (fsa_state == S_ELECTION || fsa_state == S_PENDING) {
crm_info("Sending update to local CIB in state: %s", fsa_state2string(fsa_state));
call_opt |= cib_scope_local;
}
return call_opt;
}
/*!
* \internal
* \brief Check whether an action type should be recorded in the CIB
*
* \param[in] action Action type
*
* \return TRUE if action should be recorded, FALSE otherwise
*/
bool
controld_action_is_recordable(const char *action)
{
- return pcmk__str_none_of(action, CRMD_ACTION_CANCEL, CRMD_ACTION_DELETE,
+ return !pcmk__str_any_of(action, CRMD_ACTION_CANCEL, CRMD_ACTION_DELETE,
CRMD_ACTION_NOTIFY, CRMD_ACTION_METADATA, NULL);
}
static void
cib_delete_callback(xmlNode *msg, int call_id, int rc, xmlNode *output,
void *user_data)
{
char *desc = user_data;
if (rc == 0) {
crm_debug("Deletion of %s (via CIB call %d) succeeded", desc, call_id);
} else {
crm_warn("Deletion of %s (via CIB call %d) failed: %s " CRM_XS " rc=%d",
desc, call_id, pcmk_strerror(rc), rc);
}
}
// Searches for various portions of node_state to delete
// Match a particular node's node_state (takes node name 1x)
#define XPATH_NODE_STATE "//" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']"
// Node's lrm section (name 1x)
#define XPATH_NODE_LRM XPATH_NODE_STATE "/" XML_CIB_TAG_LRM
// Node's lrm_rsc_op entries and lrm_resource entries without lock (name 2x)
#define XPATH_NODE_LRM_UNLOCKED XPATH_NODE_STATE "//" XML_LRM_TAG_RSC_OP \
"|" XPATH_NODE_STATE \
"//" XML_LRM_TAG_RESOURCE \
"[not(@" XML_CONFIG_ATTR_SHUTDOWN_LOCK ")]"
// Node's transient_attributes section (name 1x)
#define XPATH_NODE_ATTRS XPATH_NODE_STATE "/" XML_TAG_TRANSIENT_NODEATTRS
// Everything under node_state (name 1x)
#define XPATH_NODE_ALL XPATH_NODE_STATE "/*"
// Unlocked history + transient attributes (name 3x)
#define XPATH_NODE_ALL_UNLOCKED XPATH_NODE_LRM_UNLOCKED "|" XPATH_NODE_ATTRS
/*!
* \internal
* \brief Delete subsection of a node's CIB node_state
*
* \param[in] uname Desired node
* \param[in] section Subsection of node_state to delete
* \param[in] options CIB call options to use
*/
void
controld_delete_node_state(const char *uname, enum controld_section_e section,
int options)
{
char *xpath = NULL;
char *desc = NULL;
CRM_CHECK(uname != NULL, return);
switch (section) {
case controld_section_lrm:
xpath = crm_strdup_printf(XPATH_NODE_LRM, uname);
desc = crm_strdup_printf("resource history for node %s", uname);
break;
case controld_section_lrm_unlocked:
xpath = crm_strdup_printf(XPATH_NODE_LRM_UNLOCKED, uname, uname);
desc = crm_strdup_printf("resource history (other than shutdown "
"locks) for node %s", uname);
break;
case controld_section_attrs:
xpath = crm_strdup_printf(XPATH_NODE_ATTRS, uname);
desc = crm_strdup_printf("transient attributes for node %s", uname);
break;
case controld_section_all:
xpath = crm_strdup_printf(XPATH_NODE_ALL, uname);
desc = crm_strdup_printf("all state for node %s", uname);
break;
case controld_section_all_unlocked:
xpath = crm_strdup_printf(XPATH_NODE_ALL_UNLOCKED,
uname, uname, uname);
desc = crm_strdup_printf("all state (other than shutdown locks) "
"for node %s", uname);
break;
}
if (fsa_cib_conn == NULL) {
crm_warn("Unable to delete %s: no CIB connection", desc);
free(desc);
} else {
int call_id;
options |= cib_quorum_override|cib_xpath|cib_multiple;
call_id = fsa_cib_conn->cmds->remove(fsa_cib_conn, xpath, NULL, options);
crm_info("Deleting %s (via CIB call %d) " CRM_XS " xpath=%s",
desc, call_id, xpath);
fsa_register_cib_callback(call_id, FALSE, desc, cib_delete_callback);
// CIB library handles freeing desc
}
free(xpath);
}
// Takes node name and resource ID
#define XPATH_RESOURCE_HISTORY "//" XML_CIB_TAG_STATE \
"[@" XML_ATTR_UNAME "='%s']/" \
XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES \
"/" XML_LRM_TAG_RESOURCE \
"[@" XML_ATTR_ID "='%s']"
// @TODO could add "and @XML_CONFIG_ATTR_SHUTDOWN_LOCK" to limit to locks
/*!
* \internal
* \brief Clear resource history from CIB for a given resource and node
*
* \param[in] rsc_id ID of resource to be cleared
* \param[in] node Node whose resource history should be cleared
* \param[in] user_name ACL user name to use
* \param[in] call_options CIB call options
*
* \return Standard Pacemaker return code
*/
int
controld_delete_resource_history(const char *rsc_id, const char *node,
const char *user_name, int call_options)
{
char *desc = NULL;
char *xpath = NULL;
int rc = pcmk_rc_ok;
CRM_CHECK((rsc_id != NULL) && (node != NULL), return EINVAL);
desc = crm_strdup_printf("resource history for %s on %s", rsc_id, node);
if (fsa_cib_conn == NULL) {
crm_err("Unable to clear %s: no CIB connection", desc);
free(desc);
return ENOTCONN;
}
// Ask CIB to delete the entry
xpath = crm_strdup_printf(XPATH_RESOURCE_HISTORY, node, rsc_id);
rc = cib_internal_op(fsa_cib_conn, CIB_OP_DELETE, NULL, xpath, NULL,
NULL, call_options|cib_xpath, user_name);
if (rc < 0) {
rc = pcmk_legacy2rc(rc);
crm_err("Could not delete resource status of %s on %s%s%s: %s "
CRM_XS " rc=%d", rsc_id, node,
(user_name? " for user " : ""), (user_name? user_name : ""),
pcmk_rc_str(rc), rc);
free(desc);
free(xpath);
return rc;
}
if (is_set(call_options, cib_sync_call)) {
if (is_set(call_options, cib_dryrun)) {
crm_debug("Deletion of %s would succeed", desc);
} else {
crm_debug("Deletion of %s succeeded", desc);
}
free(desc);
} else {
crm_info("Clearing %s (via CIB call %d) " CRM_XS " xpath=%s",
desc, rc, xpath);
fsa_register_cib_callback(rc, FALSE, desc, cib_delete_callback);
// CIB library handles freeing desc
}
free(xpath);
return pcmk_rc_ok;
}
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
index 1407b86f84..0c26202bc7 100644
--- a/daemons/controld/controld_execd.c
+++ b/daemons/controld/controld_execd.c
@@ -1,2855 +1,2855 @@
/*
* Copyright 2004-2020 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <regex.h>
#include <sys/param.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <crm/crm.h>
#include <crm/lrmd.h> // lrmd_event_data_t, lrmd_rsc_info_t, etc.
#include <crm/services.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/pengine/rules.h>
#include <pacemaker-internal.h>
#include <pacemaker-controld.h>
#define START_DELAY_THRESHOLD 5 * 60 * 1000
#define MAX_LRM_REG_FAILS 30
struct delete_event_s {
int rc;
const char *rsc;
lrm_state_t *lrm_state;
};
static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id);
static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list);
static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data);
static lrmd_event_data_t *construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op,
const char *rsc_id, const char *operation);
static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
const char *operation, xmlNode *msg);
static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state,
int log_level);
static int do_update_resource(const char *node_name, lrmd_rsc_info_t *rsc,
lrmd_event_data_t *op, time_t lock_time);
static void
lrm_connection_destroy(void)
{
if (is_set(fsa_input_register, R_LRM_CONNECTED)) {
crm_crit("Connection to executor failed");
register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
clear_bit(fsa_input_register, R_LRM_CONNECTED);
} else {
crm_info("Disconnected from executor");
}
}
static char *
make_stop_id(const char *rsc, int call_id)
{
return crm_strdup_printf("%s:%d", rsc, call_id);
}
static void
copy_instance_keys(gpointer key, gpointer value, gpointer user_data)
{
if (strstr(key, CRM_META "_") == NULL) {
g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value));
}
}
static void
copy_meta_keys(gpointer key, gpointer value, gpointer user_data)
{
if (strstr(key, CRM_META "_") != NULL) {
g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value));
}
}
/*!
* \internal
* \brief Remove a recurring operation from a resource's history
*
* \param[in,out] history Resource history to modify
* \param[in] op Operation to remove
*
* \return TRUE if the operation was found and removed, FALSE otherwise
*/
static gboolean
history_remove_recurring_op(rsc_history_t *history, const lrmd_event_data_t *op)
{
GList *iter;
for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) {
lrmd_event_data_t *existing = iter->data;
if ((op->interval_ms == existing->interval_ms)
&& crm_str_eq(op->rsc_id, existing->rsc_id, TRUE)
&& safe_str_eq(op->op_type, existing->op_type)) {
history->recurring_op_list = g_list_delete_link(history->recurring_op_list, iter);
lrmd_free_event(existing);
return TRUE;
}
}
return FALSE;
}
/*!
* \internal
* \brief Free all recurring operations in resource history
*
* \param[in,out] history Resource history to modify
*/
static void
history_free_recurring_ops(rsc_history_t *history)
{
GList *iter;
for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) {
lrmd_free_event(iter->data);
}
g_list_free(history->recurring_op_list);
history->recurring_op_list = NULL;
}
/*!
* \internal
* \brief Free resource history
*
* \param[in,out] history Resource history to free
*/
void
history_free(gpointer data)
{
rsc_history_t *history = (rsc_history_t*)data;
if (history->stop_params) {
g_hash_table_destroy(history->stop_params);
}
/* Don't need to free history->rsc.id because it's set to history->id */
free(history->rsc.type);
free(history->rsc.standard);
free(history->rsc.provider);
lrmd_free_event(history->failed);
lrmd_free_event(history->last);
free(history->id);
history_free_recurring_ops(history);
free(history);
}
static void
update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op)
{
int target_rc = 0;
rsc_history_t *entry = NULL;
if (op->rsc_deleted) {
crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type);
controld_delete_resource_history(op->rsc_id, lrm_state->node_name,
NULL, crmd_cib_smart_opt());
return;
}
if (safe_str_eq(op->op_type, RSC_NOTIFY)) {
return;
}
crm_debug("Updating history for '%s' with %s op", op->rsc_id, op->op_type);
entry = g_hash_table_lookup(lrm_state->resource_history, op->rsc_id);
if (entry == NULL && rsc) {
entry = calloc(1, sizeof(rsc_history_t));
entry->id = strdup(op->rsc_id);
g_hash_table_insert(lrm_state->resource_history, entry->id, entry);
entry->rsc.id = entry->id;
entry->rsc.type = strdup(rsc->type);
entry->rsc.standard = strdup(rsc->standard);
if (rsc->provider) {
entry->rsc.provider = strdup(rsc->provider);
} else {
entry->rsc.provider = NULL;
}
} else if (entry == NULL) {
crm_info("Resource %s no longer exists, not updating cache", op->rsc_id);
return;
}
entry->last_callid = op->call_id;
target_rc = rsc_op_expected_rc(op);
if (op->op_status == PCMK_LRM_OP_CANCELLED) {
if (op->interval_ms > 0) {
crm_trace("Removing cancelled recurring op: " PCMK__OP_FMT,
op->rsc_id, op->op_type, op->interval_ms);
history_remove_recurring_op(entry, op);
return;
} else {
crm_trace("Skipping " PCMK__OP_FMT " rc=%d, status=%d",
op->rsc_id, op->op_type, op->interval_ms, op->rc,
op->op_status);
}
} else if (did_rsc_op_fail(op, target_rc)) {
/* Store failed monitors here, otherwise the block below will cause them
* to be forgotten when a stop happens.
*/
if (entry->failed) {
lrmd_free_event(entry->failed);
}
entry->failed = lrmd_copy_event(op);
} else if (op->interval_ms == 0) {
if (entry->last) {
lrmd_free_event(entry->last);
}
entry->last = lrmd_copy_event(op);
if (op->params && pcmk__str_any_of(op->op_type, CRMD_ACTION_START,
"reload", CRMD_ACTION_STATUS, NULL)) {
if (entry->stop_params) {
g_hash_table_destroy(entry->stop_params);
}
entry->stop_params = crm_str_table_new();
g_hash_table_foreach(op->params, copy_instance_keys, entry->stop_params);
}
}
if (op->interval_ms > 0) {
/* Ensure there are no duplicates */
history_remove_recurring_op(entry, op);
crm_trace("Adding recurring op: " PCMK__OP_FMT,
op->rsc_id, op->op_type, op->interval_ms);
entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, lrmd_copy_event(op));
} else if (entry->recurring_op_list && safe_str_eq(op->op_type, RSC_STATUS) == FALSE) {
crm_trace("Dropping %d recurring ops because of: " PCMK__OP_FMT,
g_list_length(entry->recurring_op_list), op->rsc_id,
op->op_type, op->interval_ms);
history_free_recurring_ops(entry);
}
}
/*!
* \internal
* \brief Send a direct OK ack for a resource task
*
* \param[in] lrm_state LRM connection
* \param[in] input Input message being ack'ed
* \param[in] rsc_id ID of affected resource
* \param[in] rsc Affected resource (if available)
* \param[in] task Operation task being ack'ed
* \param[in] ack_host Name of host to send ack to
* \param[in] ack_sys IPC system name to ack
*/
static void
send_task_ok_ack(lrm_state_t *lrm_state, ha_msg_input_t *input,
const char *rsc_id, lrmd_rsc_info_t *rsc, const char *task,
const char *ack_host, const char *ack_sys)
{
lrmd_event_data_t *op = construct_op(lrm_state, input->xml, rsc_id, task);
op->rc = PCMK_OCF_OK;
op->op_status = PCMK_LRM_OP_DONE;
controld_ack_event_directly(ack_host, ack_sys, rsc, op, rsc_id);
lrmd_free_event(op);
}
static inline const char *
op_node_name(lrmd_event_data_t *op)
{
return op->remote_nodename? op->remote_nodename : fsa_our_uname;
}
void
lrm_op_callback(lrmd_event_data_t * op)
{
CRM_CHECK(op != NULL, return);
switch (op->type) {
case lrmd_event_disconnect:
if (op->remote_nodename == NULL) {
/* If this is the local executor IPC connection, set the right
* bits in the controller when the connection goes down.
*/
lrm_connection_destroy();
}
break;
case lrmd_event_exec_complete:
{
lrm_state_t *lrm_state = lrm_state_find(op_node_name(op));
CRM_ASSERT(lrm_state != NULL);
process_lrm_event(lrm_state, op, NULL, NULL);
}
break;
default:
break;
}
}
/* A_LRM_CONNECT */
void
do_lrm_control(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
/* This only pertains to local executor connections. Remote connections are
* handled as resources within the scheduler. Connecting and disconnecting
* from remote executor instances is handled differently.
*/
lrm_state_t *lrm_state = NULL;
if(fsa_our_uname == NULL) {
return; /* Nothing to do */
}
lrm_state = lrm_state_find_or_create(fsa_our_uname);
if (lrm_state == NULL) {
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
return;
}
if (action & A_LRM_DISCONNECT) {
if (lrm_state_verify_stopped(lrm_state, cur_state, LOG_INFO) == FALSE) {
if (action == A_LRM_DISCONNECT) {
crmd_fsa_stall(FALSE);
return;
}
}
clear_bit(fsa_input_register, R_LRM_CONNECTED);
crm_info("Disconnecting from the executor");
lrm_state_disconnect(lrm_state);
lrm_state_reset_tables(lrm_state, FALSE);
crm_notice("Disconnected from the executor");
}
if (action & A_LRM_CONNECT) {
int ret = pcmk_ok;
crm_debug("Connecting to the executor");
ret = lrm_state_ipc_connect(lrm_state);
if (ret != pcmk_ok) {
if (lrm_state->num_lrm_register_fails < MAX_LRM_REG_FAILS) {
crm_warn("Failed to connect to the executor %d time%s (%d max)",
lrm_state->num_lrm_register_fails,
pcmk__plural_s(lrm_state->num_lrm_register_fails),
MAX_LRM_REG_FAILS);
controld_start_timer(wait_timer);
crmd_fsa_stall(FALSE);
return;
}
}
if (ret != pcmk_ok) {
crm_err("Failed to connect to the executor the max allowed %d time%s",
lrm_state->num_lrm_register_fails,
pcmk__plural_s(lrm_state->num_lrm_register_fails));
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
return;
}
set_bit(fsa_input_register, R_LRM_CONNECTED);
crm_info("Connection to the executor established");
}
if (action & ~(A_LRM_CONNECT | A_LRM_DISCONNECT)) {
crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__);
}
}
static gboolean
lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level)
{
int counter = 0;
gboolean rc = TRUE;
const char *when = "lrm disconnect";
GHashTableIter gIter;
const char *key = NULL;
rsc_history_t *entry = NULL;
active_op_t *pending = NULL;
crm_debug("Checking for active resources before exit");
if (cur_state == S_TERMINATE) {
log_level = LOG_ERR;
when = "shutdown";
} else if (is_set(fsa_input_register, R_SHUTDOWN)) {
when = "shutdown... waiting";
}
if (lrm_state->pending_ops && lrm_state_is_connected(lrm_state) == TRUE) {
guint removed = g_hash_table_foreach_remove(
lrm_state->pending_ops, stop_recurring_actions, lrm_state);
guint nremaining = g_hash_table_size(lrm_state->pending_ops);
if (removed || nremaining) {
crm_notice("Stopped %u recurring operation%s at %s (%u remaining)",
removed, pcmk__plural_s(removed), when, nremaining);
}
}
if (lrm_state->pending_ops) {
g_hash_table_iter_init(&gIter, lrm_state->pending_ops);
while (g_hash_table_iter_next(&gIter, NULL, (void **)&pending)) {
/* Ignore recurring actions in the shutdown calculations */
if (pending->interval_ms == 0) {
counter++;
}
}
}
if (counter > 0) {
do_crm_log(log_level, "%d pending executor operation%s at %s",
counter, pcmk__plural_s(counter), when);
if (cur_state == S_TERMINATE || !is_set(fsa_input_register, R_SENT_RSC_STOP)) {
g_hash_table_iter_init(&gIter, lrm_state->pending_ops);
while (g_hash_table_iter_next(&gIter, (gpointer*)&key, (gpointer*)&pending)) {
do_crm_log(log_level, "Pending action: %s (%s)", key, pending->op_key);
}
} else {
rc = FALSE;
}
return rc;
}
if (lrm_state->resource_history == NULL) {
return rc;
}
if (is_set(fsa_input_register, R_SHUTDOWN)) {
/* At this point we're not waiting, we're just shutting down */
when = "shutdown";
}
counter = 0;
g_hash_table_iter_init(&gIter, lrm_state->resource_history);
while (g_hash_table_iter_next(&gIter, NULL, (gpointer*)&entry)) {
if (is_rsc_active(lrm_state, entry->id) == FALSE) {
continue;
}
counter++;
if (log_level == LOG_ERR) {
crm_info("Found %s active at %s", entry->id, when);
} else {
crm_trace("Found %s active at %s", entry->id, when);
}
if (lrm_state->pending_ops) {
GHashTableIter hIter;
g_hash_table_iter_init(&hIter, lrm_state->pending_ops);
while (g_hash_table_iter_next(&hIter, (gpointer*)&key, (gpointer*)&pending)) {
if (crm_str_eq(entry->id, pending->rsc_id, TRUE)) {
crm_notice("%sction %s (%s) incomplete at %s",
pending->interval_ms == 0 ? "A" : "Recurring a",
key, pending->op_key, when);
}
}
}
}
if (counter) {
crm_err("%d resource%s active at %s",
counter, (counter == 1)? " was" : "s were", when);
}
return rc;
}
static char *
build_parameter_list(const lrmd_event_data_t *op,
const struct ra_metadata_s *metadata,
xmlNode *result, enum ra_param_flags_e param_type,
bool invert_for_xml)
{
int len = 0;
int max = 0;
char *list = NULL;
GList *iter = NULL;
/* Newer resource agents support the "private" parameter attribute to
* indicate sensitive parameters. For backward compatibility with older
* agents, this list is used if the agent doesn't specify any as "private".
*/
const char *secure_terms[] = {
"password",
"passwd",
"user",
};
if (is_not_set(metadata->ra_flags, ra_uses_private)
&& (param_type == ra_param_private)) {
max = DIMOF(secure_terms);
}
for (iter = metadata->ra_params; iter != NULL; iter = iter->next) {
struct ra_param_s *param = (struct ra_param_s *) iter->data;
bool accept = FALSE;
if (is_set(param->rap_flags, param_type)) {
accept = TRUE;
} else if (max) {
for (int lpc = 0; lpc < max; lpc++) {
if (safe_str_eq(secure_terms[lpc], param->rap_name)) {
accept = TRUE;
break;
}
}
}
if (accept) {
int start = len;
crm_trace("Attr %s is %s", param->rap_name, ra_param_flag2text(param_type));
len += strlen(param->rap_name) + 2; // include spaces around
list = realloc_safe(list, len + 1); // include null terminator
// spaces before and after make parsing simpler
sprintf(list + start, " %s ", param->rap_name);
} else {
crm_trace("Rejecting %s for %s", param->rap_name, ra_param_flag2text(param_type));
}
if (result && (invert_for_xml? !accept : accept)) {
const char *v = g_hash_table_lookup(op->params, param->rap_name);
if (v != NULL) {
crm_trace("Adding attr %s=%s to the xml result", param->rap_name, v);
crm_xml_add(result, param->rap_name, v);
}
}
}
return list;
}
static void
append_restart_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata,
xmlNode *update, const char *version)
{
char *list = NULL;
char *digest = NULL;
xmlNode *restart = NULL;
CRM_LOG_ASSERT(op->params != NULL);
if (op->interval_ms > 0) {
/* monitors are not reloadable */
return;
}
if (is_set(metadata->ra_flags, ra_supports_reload)) {
restart = create_xml_node(NULL, XML_TAG_PARAMS);
/* Add any parameters with unique="1" to the "op-force-restart" list.
*
* (Currently, we abuse "unique=0" to indicate reloadability. This is
* nonstandard and should eventually be replaced once the OCF standard
* is updated with something better.)
*/
list = build_parameter_list(op, metadata, restart, ra_param_unique,
FALSE);
} else {
/* Resource does not support reloads */
return;
}
digest = calculate_operation_digest(restart, version);
/* Add "op-force-restart" and "op-restart-digest" to indicate the resource supports reload,
* no matter if it actually supports any parameters with unique="1"). */
crm_xml_add(update, XML_LRM_ATTR_OP_RESTART, list? list: "");
crm_xml_add(update, XML_LRM_ATTR_RESTART_DIGEST, digest);
crm_trace("%s: %s, %s", op->rsc_id, digest, list);
crm_log_xml_trace(restart, "restart digest source");
free_xml(restart);
free(digest);
free(list);
}
static void
append_secure_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata,
xmlNode *update, const char *version)
{
char *list = NULL;
char *digest = NULL;
xmlNode *secure = NULL;
CRM_LOG_ASSERT(op->params != NULL);
/*
* To keep XML_LRM_ATTR_OP_SECURE short, we want it to contain the
* secure parameters but XML_LRM_ATTR_SECURE_DIGEST to be based on
* the insecure ones
*/
secure = create_xml_node(NULL, XML_TAG_PARAMS);
list = build_parameter_list(op, metadata, secure, ra_param_private, TRUE);
if (list != NULL) {
digest = calculate_operation_digest(secure, version);
crm_xml_add(update, XML_LRM_ATTR_OP_SECURE, list);
crm_xml_add(update, XML_LRM_ATTR_SECURE_DIGEST, digest);
crm_trace("%s: %s, %s", op->rsc_id, digest, list);
crm_log_xml_trace(secure, "secure digest source");
} else {
crm_trace("%s: no secure parameters", op->rsc_id);
}
free_xml(secure);
free(digest);
free(list);
}
static gboolean
build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op,
const char *node_name, const char *src)
{
int target_rc = 0;
xmlNode *xml_op = NULL;
struct ra_metadata_s *metadata = NULL;
const char *caller_version = NULL;
lrm_state_t *lrm_state = NULL;
if (op == NULL) {
return FALSE;
}
target_rc = rsc_op_expected_rc(op);
/* there is a small risk in formerly mixed clusters that it will
* be sub-optimal.
*
* however with our upgrade policy, the update we send should
* still be completely supported anyway
*/
caller_version = g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION);
CRM_LOG_ASSERT(caller_version != NULL);
if(caller_version == NULL) {
caller_version = CRM_FEATURE_SET;
}
crm_trace("Building %s operation update with originator version: %s", op->rsc_id, caller_version);
xml_op = pcmk__create_history_xml(parent, op, caller_version, target_rc,
fsa_our_uname, src, LOG_DEBUG);
if (xml_op == NULL) {
return TRUE;
}
if ((rsc == NULL) || (op->params == NULL)
|| !crm_op_needs_metadata(rsc->standard, op->op_type)) {
crm_trace("No digests needed for %s action on %s (params=%p rsc=%p)",
op->op_type, op->rsc_id, op->params, rsc);
return TRUE;
}
lrm_state = lrm_state_find(node_name);
if (lrm_state == NULL) {
crm_warn("Cannot calculate digests for operation " PCMK__OP_FMT
" because we have no connection to executor for %s",
op->rsc_id, op->op_type, op->interval_ms, node_name);
return TRUE;
}
metadata = metadata_cache_get(lrm_state->metadata_cache, rsc);
if (metadata == NULL) {
/* For now, we always collect resource agent meta-data via a local,
* synchronous, direct execution of the agent. This has multiple issues:
* the executor should execute agents, not the controller; meta-data for
* Pacemaker Remote nodes should be collected on those nodes, not
* locally; and the meta-data call shouldn't eat into the timeout of the
* real action being performed.
*
* These issues are planned to be addressed by having the scheduler
* schedule a meta-data cache check at the beginning of each transition.
* Once that is working, this block will only be a fallback in case the
* initial collection fails.
*/
char *metadata_str = NULL;
int rc = lrm_state_get_metadata(lrm_state, rsc->standard,
rsc->provider, rsc->type,
&metadata_str, 0);
if (rc != pcmk_ok) {
crm_warn("Failed to get metadata for %s (%s:%s:%s)",
rsc->id, rsc->standard, rsc->provider, rsc->type);
return TRUE;
}
metadata = metadata_cache_update(lrm_state->metadata_cache, rsc,
metadata_str);
free(metadata_str);
if (metadata == NULL) {
crm_warn("Failed to update metadata for %s (%s:%s:%s)",
rsc->id, rsc->standard, rsc->provider, rsc->type);
return TRUE;
}
}
#if ENABLE_VERSIONED_ATTRS
crm_xml_add(xml_op, XML_ATTR_RA_VERSION, metadata->ra_version);
#endif
crm_trace("Including additional digests for %s::%s:%s", rsc->standard, rsc->provider, rsc->type);
append_restart_list(op, metadata, xml_op, caller_version);
append_secure_list(op, metadata, xml_op, caller_version);
return TRUE;
}
static gboolean
is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id)
{
rsc_history_t *entry = NULL;
entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
if (entry == NULL || entry->last == NULL) {
return FALSE;
}
crm_trace("Processing %s: %s.%d=%d", rsc_id, entry->last->op_type,
entry->last->interval_ms, entry->last->rc);
if (entry->last->rc == PCMK_OCF_OK && safe_str_eq(entry->last->op_type, CRMD_ACTION_STOP)) {
return FALSE;
} else if (entry->last->rc == PCMK_OCF_OK
&& safe_str_eq(entry->last->op_type, CRMD_ACTION_MIGRATE)) {
// A stricter check is too complex ... leave that to the scheduler
return FALSE;
} else if (entry->last->rc == PCMK_OCF_NOT_RUNNING) {
return FALSE;
} else if ((entry->last->interval_ms == 0)
&& (entry->last->rc == PCMK_OCF_NOT_CONFIGURED)) {
/* Badly configured resources can't be reliably stopped */
return FALSE;
}
return TRUE;
}
static gboolean
build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list)
{
GHashTableIter iter;
rsc_history_t *entry = NULL;
g_hash_table_iter_init(&iter, lrm_state->resource_history);
while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) {
GList *gIter = NULL;
xmlNode *xml_rsc = create_xml_node(rsc_list, XML_LRM_TAG_RESOURCE);
crm_xml_add(xml_rsc, XML_ATTR_ID, entry->id);
crm_xml_add(xml_rsc, XML_ATTR_TYPE, entry->rsc.type);
crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, entry->rsc.standard);
crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, entry->rsc.provider);
if (entry->last && entry->last->params) {
const char *container = g_hash_table_lookup(entry->last->params, CRM_META"_"XML_RSC_ATTR_CONTAINER);
if (container) {
crm_trace("Resource %s is a part of container resource %s", entry->id, container);
crm_xml_add(xml_rsc, XML_RSC_ATTR_CONTAINER, container);
}
}
build_operation_update(xml_rsc, &(entry->rsc), entry->failed, lrm_state->node_name, __FUNCTION__);
build_operation_update(xml_rsc, &(entry->rsc), entry->last, lrm_state->node_name, __FUNCTION__);
for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIter->next) {
build_operation_update(xml_rsc, &(entry->rsc), gIter->data, lrm_state->node_name, __FUNCTION__);
}
}
return FALSE;
}
static xmlNode *
do_lrm_query_internal(lrm_state_t *lrm_state, int update_flags)
{
xmlNode *xml_state = NULL;
xmlNode *xml_data = NULL;
xmlNode *rsc_list = NULL;
crm_node_t *peer = NULL;
peer = crm_get_peer_full(0, lrm_state->node_name, CRM_GET_PEER_ANY);
CRM_CHECK(peer != NULL, return NULL);
xml_state = create_node_state_update(peer, update_flags, NULL,
__FUNCTION__);
if (xml_state == NULL) {
return NULL;
}
xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM);
crm_xml_add(xml_data, XML_ATTR_ID, peer->uuid);
rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES);
/* Build a list of active (not always running) resources */
build_active_RAs(lrm_state, rsc_list);
crm_log_xml_trace(xml_state, "Current executor state");
return xml_state;
}
xmlNode *
controld_query_executor_state(const char *node_name)
{
lrm_state_t *lrm_state = lrm_state_find(node_name);
if (!lrm_state) {
crm_err("Could not find executor state for node %s", node_name);
return NULL;
}
return do_lrm_query_internal(lrm_state,
node_update_cluster|node_update_peer);
}
/*!
* \internal
* \brief Map standard Pacemaker return code to operation status and OCF code
*
* \param[out] event Executor event whose status and return code should be set
* \param[in] rc Standard Pacemaker return code
*/
void
controld_rc2event(lrmd_event_data_t *event, int rc)
{
switch (rc) {
case pcmk_rc_ok:
event->rc = PCMK_OCF_OK;
event->op_status = PCMK_LRM_OP_DONE;
break;
case EACCES:
event->rc = PCMK_OCF_INSUFFICIENT_PRIV;
event->op_status = PCMK_LRM_OP_ERROR;
break;
default:
event->rc = PCMK_OCF_UNKNOWN_ERROR;
event->op_status = PCMK_LRM_OP_ERROR;
break;
}
}
/*!
* \internal
* \brief Trigger a new transition after CIB status was deleted
*
* If a CIB status delete was not expected (as part of the transition graph),
* trigger a new transition by updating the (arbitrary) "last-lrm-refresh"
* cluster property.
*
* \param[in] from_sys IPC name that requested the delete
* \param[in] rsc_id Resource whose status was deleted (for logging only)
*/
void
controld_trigger_delete_refresh(const char *from_sys, const char *rsc_id)
{
if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) {
char *now_s = crm_strdup_printf("%lld", (long long) time(NULL));
crm_debug("Triggering a refresh after %s cleaned %s", from_sys, rsc_id);
update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG,
NULL, NULL, NULL, NULL, "last-lrm-refresh", now_s,
FALSE, NULL, NULL);
free(now_s);
}
}
static void
notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, int rc)
{
lrmd_event_data_t *op = NULL;
const char *from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM);
const char *from_host = crm_element_value(input->msg, F_CRM_HOST_FROM);
crm_info("Notifying %s on %s that %s was%s deleted",
from_sys, (from_host? from_host : "localhost"), rsc_id,
((rc == pcmk_ok)? "" : " not"));
op = construct_op(lrm_state, input->xml, rsc_id, CRMD_ACTION_DELETE);
controld_rc2event(op, pcmk_legacy2rc(rc));
controld_ack_event_directly(from_host, from_sys, NULL, op, rsc_id);
lrmd_free_event(op);
controld_trigger_delete_refresh(from_sys, rsc_id);
}
static gboolean
lrm_remove_deleted_rsc(gpointer key, gpointer value, gpointer user_data)
{
struct delete_event_s *event = user_data;
struct pending_deletion_op_s *op = value;
if (crm_str_eq(event->rsc, op->rsc, TRUE)) {
notify_deleted(event->lrm_state, op->input, event->rsc, event->rc);
return TRUE;
}
return FALSE;
}
static gboolean
lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data)
{
const char *rsc = user_data;
active_op_t *pending = value;
if (crm_str_eq(rsc, pending->rsc_id, TRUE)) {
crm_info("Removing op %s:%d for deleted resource %s",
pending->op_key, pending->call_id, rsc);
return TRUE;
}
return FALSE;
}
static void
delete_rsc_entry(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id,
GHashTableIter * rsc_gIter, int rc, const char *user_name)
{
struct delete_event_s event;
CRM_CHECK(rsc_id != NULL, return);
if (rc == pcmk_ok) {
char *rsc_id_copy = strdup(rsc_id);
if (rsc_gIter) {
g_hash_table_iter_remove(rsc_gIter);
} else {
g_hash_table_remove(lrm_state->resource_history, rsc_id_copy);
}
controld_delete_resource_history(rsc_id_copy, lrm_state->node_name,
user_name, crmd_cib_smart_opt());
g_hash_table_foreach_remove(lrm_state->pending_ops, lrm_remove_deleted_op, rsc_id_copy);
free(rsc_id_copy);
}
if (input) {
notify_deleted(lrm_state, input, rsc_id, rc);
}
event.rc = rc;
event.rsc = rsc_id;
event.lrm_state = lrm_state;
g_hash_table_foreach_remove(lrm_state->deletion_ops, lrm_remove_deleted_rsc, &event);
}
/*!
* \internal
* \brief Erase an LRM history entry from the CIB, given the operation data
*
* \param[in] lrm_state LRM state of the desired node
* \param[in] op Operation whose history should be deleted
*/
static void
erase_lrm_history_by_op(lrm_state_t *lrm_state, lrmd_event_data_t *op)
{
xmlNode *xml_top = NULL;
CRM_CHECK(op != NULL, return);
xml_top = create_xml_node(NULL, XML_LRM_TAG_RSC_OP);
crm_xml_add_int(xml_top, XML_LRM_ATTR_CALLID, op->call_id);
crm_xml_add(xml_top, XML_ATTR_TRANSITION_KEY, op->user_data);
if (op->interval_ms > 0) {
char *op_id = pcmk__op_key(op->rsc_id, op->op_type, op->interval_ms);
/* Avoid deleting last_failure too (if it was a result of this recurring op failing) */
crm_xml_add(xml_top, XML_ATTR_ID, op_id);
free(op_id);
}
crm_debug("Erasing resource operation history for " PCMK__OP_FMT " (call=%d)",
op->rsc_id, op->op_type, op->interval_ms, op->call_id);
fsa_cib_conn->cmds->remove(fsa_cib_conn, XML_CIB_TAG_STATUS, xml_top,
cib_quorum_override);
crm_log_xml_trace(xml_top, "op:cancel");
free_xml(xml_top);
}
/* Define xpath to find LRM resource history entry by node and resource */
#define XPATH_HISTORY \
"/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS \
"/" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']" \
"/" XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES \
"/" XML_LRM_TAG_RESOURCE "[@" XML_ATTR_ID "='%s']" \
"/" XML_LRM_TAG_RSC_OP
/* ... and also by operation key */
#define XPATH_HISTORY_ID XPATH_HISTORY \
"[@" XML_ATTR_ID "='%s']"
/* ... and also by operation key and operation call ID */
#define XPATH_HISTORY_CALL XPATH_HISTORY \
"[@" XML_ATTR_ID "='%s' and @" XML_LRM_ATTR_CALLID "='%d']"
/* ... and also by operation key and original operation key */
#define XPATH_HISTORY_ORIG XPATH_HISTORY \
"[@" XML_ATTR_ID "='%s' and @" XML_LRM_ATTR_TASK_KEY "='%s']"
/*!
* \internal
* \brief Erase an LRM history entry from the CIB, given operation identifiers
*
* \param[in] lrm_state LRM state of the node to clear history for
* \param[in] rsc_id Name of resource to clear history for
* \param[in] key Operation key of operation to clear history for
* \param[in] orig_op If specified, delete only if it has this original op
* \param[in] call_id If specified, delete entry only if it has this call ID
*/
static void
erase_lrm_history_by_id(lrm_state_t *lrm_state, const char *rsc_id,
const char *key, const char *orig_op, int call_id)
{
char *op_xpath = NULL;
CRM_CHECK((rsc_id != NULL) && (key != NULL), return);
if (call_id > 0) {
op_xpath = crm_strdup_printf(XPATH_HISTORY_CALL,
lrm_state->node_name, rsc_id, key,
call_id);
} else if (orig_op) {
op_xpath = crm_strdup_printf(XPATH_HISTORY_ORIG,
lrm_state->node_name, rsc_id, key,
orig_op);
} else {
op_xpath = crm_strdup_printf(XPATH_HISTORY_ID,
lrm_state->node_name, rsc_id, key);
}
crm_debug("Erasing resource operation history for %s on %s (call=%d)",
key, rsc_id, call_id);
fsa_cib_conn->cmds->remove(fsa_cib_conn, op_xpath, NULL,
cib_quorum_override | cib_xpath);
free(op_xpath);
}
static inline gboolean
last_failed_matches_op(rsc_history_t *entry, const char *op, guint interval_ms)
{
if (entry == NULL) {
return FALSE;
}
if (op == NULL) {
return TRUE;
}
return (safe_str_eq(op, entry->failed->op_type)
&& (interval_ms == entry->failed->interval_ms));
}
/*!
* \internal
* \brief Clear a resource's last failure
*
* Erase a resource's last failure on a particular node from both the
* LRM resource history in the CIB, and the resource history remembered
* for the LRM state.
*
* \param[in] rsc_id Resource name
* \param[in] node_name Node name
* \param[in] operation If specified, only clear if matching this operation
* \param[in] interval_ms If operation is specified, it has this interval
*/
void
lrm_clear_last_failure(const char *rsc_id, const char *node_name,
const char *operation, guint interval_ms)
{
char *op_key = NULL;
char *orig_op_key = NULL;
lrm_state_t *lrm_state = NULL;
lrm_state = lrm_state_find(node_name);
if (lrm_state == NULL) {
return;
}
/* Erase from CIB */
op_key = pcmk__op_key(rsc_id, "last_failure", 0);
if (operation) {
orig_op_key = pcmk__op_key(rsc_id, operation, interval_ms);
}
erase_lrm_history_by_id(lrm_state, rsc_id, op_key, orig_op_key, 0);
free(op_key);
free(orig_op_key);
/* Remove from memory */
if (lrm_state->resource_history) {
rsc_history_t *entry = g_hash_table_lookup(lrm_state->resource_history,
rsc_id);
if (last_failed_matches_op(entry, operation, interval_ms)) {
lrmd_free_event(entry->failed);
entry->failed = NULL;
}
}
}
/* Returns: gboolean - cancellation is in progress */
static gboolean
cancel_op(lrm_state_t * lrm_state, const char *rsc_id, const char *key, int op, gboolean remove)
{
int rc = pcmk_ok;
char *local_key = NULL;
active_op_t *pending = NULL;
CRM_CHECK(op != 0, return FALSE);
CRM_CHECK(rsc_id != NULL, return FALSE);
if (key == NULL) {
local_key = make_stop_id(rsc_id, op);
key = local_key;
}
pending = g_hash_table_lookup(lrm_state->pending_ops, key);
if (pending) {
if (remove && is_not_set(pending->flags, active_op_remove)) {
set_bit(pending->flags, active_op_remove);
crm_debug("Scheduling %s for removal", key);
}
if (is_set(pending->flags, active_op_cancelled)) {
crm_debug("Operation %s already cancelled", key);
free(local_key);
return FALSE;
}
set_bit(pending->flags, active_op_cancelled);
} else {
crm_info("No pending op found for %s", key);
free(local_key);
return FALSE;
}
crm_debug("Cancelling op %d for %s (%s)", op, rsc_id, key);
rc = lrm_state_cancel(lrm_state, pending->rsc_id, pending->op_type,
pending->interval_ms);
if (rc == pcmk_ok) {
crm_debug("Op %d for %s (%s): cancelled", op, rsc_id, key);
free(local_key);
return TRUE;
}
crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc_id, key);
/* The caller needs to make sure the entry is
* removed from the pending_ops list
*
* Usually by returning TRUE inside the worker function
* supplied to g_hash_table_foreach_remove()
*
* Not removing the entry from pending_ops will block
* the node from shutting down
*/
free(local_key);
return FALSE;
}
struct cancel_data {
gboolean done;
gboolean remove;
const char *key;
lrmd_rsc_info_t *rsc;
lrm_state_t *lrm_state;
};
static gboolean
cancel_action_by_key(gpointer key, gpointer value, gpointer user_data)
{
gboolean remove = FALSE;
struct cancel_data *data = user_data;
active_op_t *op = value;
if (crm_str_eq(op->op_key, data->key, TRUE)) {
data->done = TRUE;
remove = !cancel_op(data->lrm_state, data->rsc->id, key, op->call_id, data->remove);
}
return remove;
}
static gboolean
cancel_op_key(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *key, gboolean remove)
{
guint removed = 0;
struct cancel_data data;
CRM_CHECK(rsc != NULL, return FALSE);
CRM_CHECK(key != NULL, return FALSE);
data.key = key;
data.rsc = rsc;
data.done = FALSE;
data.remove = remove;
data.lrm_state = lrm_state;
removed = g_hash_table_foreach_remove(lrm_state->pending_ops, cancel_action_by_key, &data);
crm_trace("Removed %u op cache entries, new size: %u",
removed, g_hash_table_size(lrm_state->pending_ops));
return data.done;
}
/*!
* \internal
* \brief Retrieve resource information from LRM
*
* \param[in] lrm_state LRM connection to use
* \param[in] rsc_xml XML containing resource configuration
* \param[in] do_create If true, register resource with LRM if not already
* \param[out] rsc_info Where to store resource information obtained from LRM
*
* \retval pcmk_ok Success (and rsc_info holds newly allocated result)
* \retval -EINVAL Required information is missing from arguments
* \retval -ENOTCONN No active connection to LRM
* \retval -ENODEV Resource not found
* \retval -errno Error communicating with executor when registering resource
*
* \note Caller is responsible for freeing result on success.
*/
static int
get_lrm_resource(lrm_state_t *lrm_state, xmlNode *rsc_xml, gboolean do_create,
lrmd_rsc_info_t **rsc_info)
{
const char *id = ID(rsc_xml);
CRM_CHECK(lrm_state && rsc_xml && rsc_info, return -EINVAL);
CRM_CHECK(id, return -EINVAL);
if (lrm_state_is_connected(lrm_state) == FALSE) {
return -ENOTCONN;
}
crm_trace("Retrieving resource information for %s from the executor", id);
*rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0);
// If resource isn't known by ID, try clone name, if provided
if (!*rsc_info) {
const char *long_id = crm_element_value(rsc_xml, XML_ATTR_ID_LONG);
if (long_id) {
*rsc_info = lrm_state_get_rsc_info(lrm_state, long_id, 0);
}
}
if ((*rsc_info == NULL) && do_create) {
const char *class = crm_element_value(rsc_xml, XML_AGENT_ATTR_CLASS);
const char *provider = crm_element_value(rsc_xml, XML_AGENT_ATTR_PROVIDER);
const char *type = crm_element_value(rsc_xml, XML_ATTR_TYPE);
int rc;
crm_trace("Registering resource %s with the executor", id);
rc = lrm_state_register_rsc(lrm_state, id, class, provider, type,
lrmd_opt_drop_recurring);
if (rc != pcmk_ok) {
fsa_data_t *msg_data = NULL;
crm_err("Could not register resource %s with the executor on %s: %s "
CRM_XS " rc=%d",
id, lrm_state->node_name, pcmk_strerror(rc), rc);
/* Register this as an internal error if this involves the local
* executor. Otherwise, we're likely dealing with an unresponsive
* remote node, which is not an FSA failure.
*/
if (lrm_state_is_local(lrm_state) == TRUE) {
register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
}
return rc;
}
*rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0);
}
return *rsc_info? pcmk_ok : -ENODEV;
}
static void
delete_resource(lrm_state_t * lrm_state,
const char *id,
lrmd_rsc_info_t * rsc,
GHashTableIter * gIter,
const char *sys,
const char *user,
ha_msg_input_t * request,
gboolean unregister)
{
int rc = pcmk_ok;
crm_info("Removing resource %s from executor for %s%s%s",
id, sys, (user? " as " : ""), (user? user : ""));
if (rsc && unregister) {
rc = lrm_state_unregister_rsc(lrm_state, id, 0);
}
if (rc == pcmk_ok) {
crm_trace("Resource %s deleted from executor", id);
} else if (rc == -EINPROGRESS) {
crm_info("Deletion of resource '%s' from executor is pending", id);
if (request) {
struct pending_deletion_op_s *op = NULL;
char *ref = crm_element_value_copy(request->msg, XML_ATTR_REFERENCE);
op = calloc(1, sizeof(struct pending_deletion_op_s));
op->rsc = strdup(rsc->id);
op->input = copy_ha_msg_input(request);
g_hash_table_insert(lrm_state->deletion_ops, ref, op);
}
return;
} else {
crm_warn("Could not delete '%s' from executor for %s%s%s: %s "
CRM_XS " rc=%d", id, sys, (user? " as " : ""),
(user? user : ""), pcmk_strerror(rc), rc);
}
delete_rsc_entry(lrm_state, request, id, gIter, rc, user);
}
static int
get_fake_call_id(lrm_state_t *lrm_state, const char *rsc_id)
{
int call_id = 999999999;
rsc_history_t *entry = NULL;
if(lrm_state) {
entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
}
/* Make sure the call id is greater than the last successful operation,
* otherwise the failure will not result in a possible recovery of the resource
* as it could appear the failure occurred before the successful start */
if (entry) {
call_id = entry->last_callid + 1;
}
if (call_id < 0) {
call_id = 1;
}
return call_id;
}
static void
fake_op_status(lrm_state_t *lrm_state, lrmd_event_data_t *op, int op_status,
enum ocf_exitcode op_exitcode)
{
op->call_id = get_fake_call_id(lrm_state, op->rsc_id);
op->t_run = time(NULL);
op->t_rcchange = op->t_run;
op->op_status = op_status;
op->rc = op_exitcode;
}
static void
force_reprobe(lrm_state_t *lrm_state, const char *from_sys,
const char *from_host, const char *user_name,
gboolean is_remote_node)
{
GHashTableIter gIter;
rsc_history_t *entry = NULL;
crm_info("Clearing resource history on node %s", lrm_state->node_name);
g_hash_table_iter_init(&gIter, lrm_state->resource_history);
while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
/* only unregister the resource during a reprobe if it is not a remote connection
* resource. otherwise unregistering the connection will terminate remote-node
* membership */
gboolean unregister = TRUE;
if (is_remote_lrmd_ra(NULL, NULL, entry->id)) {
lrm_state_t *remote_lrm_state = lrm_state_find(entry->id);
if (remote_lrm_state) {
/* when forcing a reprobe, make sure to clear remote node before
* clearing the remote node's connection resource */
force_reprobe(remote_lrm_state, from_sys, from_host, user_name, TRUE);
}
unregister = FALSE;
}
delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys,
user_name, NULL, unregister);
}
/* Now delete the copy in the CIB */
controld_delete_node_state(lrm_state->node_name, controld_section_lrm,
cib_scope_local);
/* Finally, _delete_ the value in pacemaker-attrd -- setting it to FALSE
* would result in the scheduler sending us back here again
*/
update_attrd(lrm_state->node_name, CRM_OP_PROBED, NULL, user_name, is_remote_node);
}
/*!
* \internal
* \brief Fail a requested action without actually executing it
*
* For an action that can't be executed, process it similarly to an actual
* execution result, with specified error status (except for notify actions,
* which will always be treated as successful).
*
* \param[in] lrm_state Executor connection that action is for
* \param[in] action Action XML from request
* \param[in] rc Desired return code to use
* \param[in] op_status Desired operation status to use
*/
static void
synthesize_lrmd_failure(lrm_state_t *lrm_state, xmlNode *action,
int op_status, enum ocf_exitcode rc)
{
lrmd_event_data_t *op = NULL;
const char *operation = crm_element_value(action, XML_LRM_ATTR_TASK);
const char *target_node = crm_element_value(action, XML_LRM_ATTR_TARGET);
xmlNode *xml_rsc = find_xml_node(action, XML_CIB_TAG_RESOURCE, TRUE);
if ((xml_rsc == NULL) || (ID(xml_rsc) == NULL)) {
/* @TODO Should we do something else, like direct ack? */
crm_info("Can't fake %s failure (%d) on %s without resource configuration",
crm_element_value(action, XML_LRM_ATTR_TASK_KEY), rc,
target_node);
return;
} else if(operation == NULL) {
/* This probably came from crm_resource -C, nothing to do */
crm_info("Can't fake %s failure (%d) on %s without operation",
ID(xml_rsc), rc, target_node);
return;
}
op = construct_op(lrm_state, action, ID(xml_rsc), operation);
if (safe_str_eq(operation, RSC_NOTIFY)) { // Notifications can't fail
fake_op_status(lrm_state, op, PCMK_LRM_OP_DONE, PCMK_OCF_OK);
} else {
fake_op_status(lrm_state, op, op_status, rc);
}
crm_info("Faking " PCMK__OP_FMT " result (%d) on %s",
op->rsc_id, op->op_type, op->interval_ms, op->rc, target_node);
// Process the result as if it came from the LRM
process_lrm_event(lrm_state, op, NULL, action);
lrmd_free_event(op);
}
/*!
* \internal
* \brief Get target of an LRM operation
*
* \param[in] xml LRM operation data XML
*
* \return LRM operation target node name (local node or Pacemaker Remote node)
*/
static const char *
lrm_op_target(xmlNode *xml)
{
const char *target = NULL;
if (xml) {
target = crm_element_value(xml, XML_LRM_ATTR_TARGET);
}
if (target == NULL) {
target = fsa_our_uname;
}
return target;
}
static void
fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name,
const char *from_host, const char *from_sys)
{
lrmd_event_data_t *op = NULL;
lrmd_rsc_info_t *rsc = NULL;
xmlNode *xml_rsc = find_xml_node(xml, XML_CIB_TAG_RESOURCE, TRUE);
CRM_CHECK(xml_rsc != NULL, return);
/* The executor simply executes operations and reports the results, without
* any concept of success or failure, so to fail a resource, we must fake
* what a failure looks like.
*
* To do this, we create a fake executor operation event for the resource,
* and pass that event to the executor client callback so it will be
* processed as if it came from the executor.
*/
op = construct_op(lrm_state, xml, ID(xml_rsc), "asyncmon");
fake_op_status(lrm_state, op, PCMK_LRM_OP_DONE, PCMK_OCF_UNKNOWN_ERROR);
free((char*) op->user_data);
op->user_data = NULL;
op->interval_ms = 0;
#if ENABLE_ACL
if (user_name && !pcmk__is_privileged(user_name)) {
crm_err("%s does not have permission to fail %s", user_name, ID(xml_rsc));
controld_ack_event_directly(from_host, from_sys, NULL, op, ID(xml_rsc));
lrmd_free_event(op);
return;
}
#endif
if (get_lrm_resource(lrm_state, xml_rsc, TRUE, &rsc) == pcmk_ok) {
crm_info("Failing resource %s...", rsc->id);
op->exit_reason = strdup("Simulated failure");
process_lrm_event(lrm_state, op, NULL, xml);
op->op_status = PCMK_LRM_OP_DONE;
op->rc = PCMK_OCF_OK;
lrmd_free_rsc_info(rsc);
} else {
crm_info("Cannot find/create resource in order to fail it...");
crm_log_xml_warn(xml, "bad input");
}
controld_ack_event_directly(from_host, from_sys, NULL, op, ID(xml_rsc));
lrmd_free_event(op);
}
static void
handle_refresh_op(lrm_state_t *lrm_state, const char *user_name,
const char *from_host, const char *from_sys)
{
int rc = pcmk_ok;
xmlNode *fragment = do_lrm_query_internal(lrm_state, node_update_all);
fsa_cib_update(XML_CIB_TAG_STATUS, fragment, cib_quorum_override, rc, user_name);
crm_info("Forced a local resource history refresh: call=%d", rc);
if (safe_str_neq(CRM_SYSTEM_CRMD, from_sys)) {
xmlNode *reply = create_request(CRM_OP_INVOKE_LRM, fragment, from_host,
from_sys, CRM_SYSTEM_LRMD,
fsa_our_uuid);
crm_debug("ACK'ing refresh from %s (%s)", from_sys, from_host);
if (relay_message(reply, TRUE) == FALSE) {
crm_log_xml_err(reply, "Unable to route reply");
}
free_xml(reply);
}
free_xml(fragment);
}
static void
handle_query_op(xmlNode *msg, lrm_state_t *lrm_state)
{
xmlNode *data = do_lrm_query_internal(lrm_state, node_update_all);
xmlNode *reply = create_reply(msg, data);
if (relay_message(reply, TRUE) == FALSE) {
crm_err("Unable to route reply");
crm_log_xml_err(reply, "reply");
}
free_xml(reply);
free_xml(data);
}
static void
handle_reprobe_op(lrm_state_t *lrm_state, const char *from_sys,
const char *from_host, const char *user_name,
gboolean is_remote_node)
{
crm_notice("Forcing the status of all resources to be redetected");
force_reprobe(lrm_state, from_sys, from_host, user_name, is_remote_node);
- if (pcmk__str_none_of(from_sys, CRM_SYSTEM_PENGINE, CRM_SYSTEM_TENGINE, NULL)) {
+ if (!pcmk__str_any_of(from_sys, CRM_SYSTEM_PENGINE, CRM_SYSTEM_TENGINE, NULL)) {
xmlNode *reply = create_request(CRM_OP_INVOKE_LRM, NULL, from_host,
from_sys, CRM_SYSTEM_LRMD,
fsa_our_uuid);
crm_debug("ACK'ing re-probe from %s (%s)", from_sys, from_host);
if (relay_message(reply, TRUE) == FALSE) {
crm_log_xml_err(reply, "Unable to route reply");
}
free_xml(reply);
}
}
static bool do_lrm_cancel(ha_msg_input_t *input, lrm_state_t *lrm_state,
lrmd_rsc_info_t *rsc, const char *from_host, const char *from_sys)
{
char *op_key = NULL;
char *meta_key = NULL;
int call = 0;
const char *call_id = NULL;
const char *op_task = NULL;
guint interval_ms = 0;
gboolean in_progress = FALSE;
xmlNode *params = find_xml_node(input->xml, XML_TAG_ATTRS, TRUE);
CRM_CHECK(params != NULL, return FALSE);
meta_key = crm_meta_name(XML_LRM_ATTR_TASK);
op_task = crm_element_value(params, meta_key);
free(meta_key);
CRM_CHECK(op_task != NULL, return FALSE);
meta_key = crm_meta_name(XML_LRM_ATTR_INTERVAL_MS);
if (crm_element_value_ms(params, meta_key, &interval_ms) != pcmk_ok) {
free(meta_key);
return FALSE;
}
free(meta_key);
op_key = pcmk__op_key(rsc->id, op_task, interval_ms);
meta_key = crm_meta_name(XML_LRM_ATTR_CALLID);
call_id = crm_element_value(params, meta_key);
free(meta_key);
crm_debug("Scheduler requested op %s (call=%s) be cancelled",
op_key, (call_id? call_id : "NA"));
call = crm_parse_int(call_id, "0");
if (call == 0) {
// Normal case when the scheduler cancels a recurring op
in_progress = cancel_op_key(lrm_state, rsc, op_key, TRUE);
} else {
// Normal case when the scheduler cancels an orphan op
in_progress = cancel_op(lrm_state, rsc->id, NULL, call, TRUE);
}
// Acknowledge cancellation operation if for a remote connection resource
if (!in_progress || is_remote_lrmd_ra(NULL, NULL, rsc->id)) {
char *op_id = make_stop_id(rsc->id, call);
if (is_remote_lrmd_ra(NULL, NULL, rsc->id) == FALSE) {
crm_info("Nothing known about operation %d for %s", call, op_key);
}
erase_lrm_history_by_id(lrm_state, rsc->id, op_key, NULL, call);
send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task,
from_host, from_sys);
/* needed at least for cancellation of a remote operation */
g_hash_table_remove(lrm_state->pending_ops, op_id);
free(op_id);
} else {
/* No ack is needed since abcdaa8, but peers with older versions
* in a rolling upgrade need one. We didn't bump the feature set
* at that commit, so we can only compare against the previous
* CRM version (3.0.8). If any peers have feature set 3.0.9 but
* not abcdaa8, they will time out waiting for the ack (no
* released versions of Pacemaker are affected).
*/
const char *peer_version = crm_element_value(params, XML_ATTR_CRM_VERSION);
if (compare_version(peer_version, "3.0.8") <= 0) {
crm_info("Sending compatibility ack for %s cancellation to %s (CRM version %s)",
op_key, from_host, peer_version);
send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task,
from_host, from_sys);
}
}
free(op_key);
return TRUE;
}
static void
do_lrm_delete(ha_msg_input_t *input, lrm_state_t *lrm_state,
lrmd_rsc_info_t *rsc, const char *from_sys, const char *from_host,
bool crm_rsc_delete, const char *user_name)
{
gboolean unregister = TRUE;
#if ENABLE_ACL
int cib_rc = controld_delete_resource_history(rsc->id, lrm_state->node_name,
user_name,
cib_dryrun|cib_sync_call);
if (cib_rc != pcmk_rc_ok) {
lrmd_event_data_t *op = NULL;
op = construct_op(lrm_state, input->xml, rsc->id, CRMD_ACTION_DELETE);
op->op_status = PCMK_LRM_OP_ERROR;
if (cib_rc == EACCES) {
op->rc = PCMK_OCF_INSUFFICIENT_PRIV;
} else {
op->rc = PCMK_OCF_UNKNOWN_ERROR;
}
controld_ack_event_directly(from_host, from_sys, NULL, op, rsc->id);
lrmd_free_event(op);
return;
}
#endif
if (crm_rsc_delete && is_remote_lrmd_ra(NULL, NULL, rsc->id)) {
unregister = FALSE;
}
delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys,
user_name, input, unregister);
}
/* A_LRM_INVOKE */
void
do_lrm_invoke(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
lrm_state_t *lrm_state = NULL;
const char *crm_op = NULL;
const char *from_sys = NULL;
const char *from_host = NULL;
const char *operation = NULL;
ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
const char *user_name = NULL;
const char *target_node = NULL;
gboolean is_remote_node = FALSE;
bool crm_rsc_delete = FALSE;
target_node = lrm_op_target(input->xml);
is_remote_node = safe_str_neq(target_node, fsa_our_uname);
lrm_state = lrm_state_find(target_node);
if ((lrm_state == NULL) && is_remote_node) {
crm_err("Failing action because local node has never had connection to remote node %s",
target_node);
synthesize_lrmd_failure(NULL, input->xml, PCMK_LRM_OP_NOT_CONNECTED,
PCMK_OCF_UNKNOWN_ERROR);
return;
}
CRM_ASSERT(lrm_state != NULL);
#if ENABLE_ACL
user_name = pcmk__update_acl_user(input->msg, F_CRM_USER, NULL);
#endif
crm_op = crm_element_value(input->msg, F_CRM_TASK);
from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM);
if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) {
from_host = crm_element_value(input->msg, F_CRM_HOST_FROM);
}
#if ENABLE_ACL
crm_trace("Executor %s command from %s as user %s",
crm_op, from_sys, user_name);
#else
crm_trace("Executor %s command from %s",
crm_op, from_sys);
#endif
if (safe_str_eq(crm_op, CRM_OP_LRM_DELETE)) {
if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) {
crm_rsc_delete = TRUE; // from crm_resource
}
operation = CRMD_ACTION_DELETE;
} else if (safe_str_eq(crm_op, CRM_OP_LRM_FAIL)) {
fail_lrm_resource(input->xml, lrm_state, user_name, from_host,
from_sys);
return;
} else if (input->xml != NULL) {
operation = crm_element_value(input->xml, XML_LRM_ATTR_TASK);
}
if (safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) {
handle_refresh_op(lrm_state, user_name, from_host, from_sys);
} else if (safe_str_eq(crm_op, CRM_OP_LRM_QUERY)) {
handle_query_op(input->msg, lrm_state);
} else if (safe_str_eq(operation, CRM_OP_PROBED)) {
update_attrd(lrm_state->node_name, CRM_OP_PROBED, XML_BOOLEAN_TRUE,
user_name, is_remote_node);
} else if (pcmk__str_any_of(CRM_OP_REPROBE, operation, crm_op, NULL)) {
handle_reprobe_op(lrm_state, from_sys, from_host, user_name,
is_remote_node);
} else if (operation != NULL) {
lrmd_rsc_info_t *rsc = NULL;
xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE);
gboolean create_rsc = safe_str_neq(operation, CRMD_ACTION_DELETE);
int rc;
// We can't return anything meaningful without a resource ID
CRM_CHECK(xml_rsc && ID(xml_rsc), return);
rc = get_lrm_resource(lrm_state, xml_rsc, create_rsc, &rsc);
if (rc == -ENOTCONN) {
synthesize_lrmd_failure(lrm_state, input->xml,
PCMK_LRM_OP_NOT_CONNECTED,
PCMK_OCF_UNKNOWN_ERROR);
return;
} else if ((rc < 0) && !create_rsc) {
/* Delete of malformed or nonexistent resource
* (deleting something that does not exist is a success)
*/
crm_notice("Not registering resource '%s' for a %s event "
CRM_XS " get-rc=%d (%s) transition-key=%s",
ID(xml_rsc), operation,
rc, pcmk_strerror(rc), ID(input->xml));
delete_rsc_entry(lrm_state, input, ID(xml_rsc), NULL, pcmk_ok,
user_name);
return;
} else if (rc == -EINVAL) {
// Resource operation on malformed resource
crm_err("Invalid resource definition for %s", ID(xml_rsc));
crm_log_xml_warn(input->msg, "invalid resource");
synthesize_lrmd_failure(lrm_state, input->xml, PCMK_LRM_OP_ERROR,
PCMK_OCF_NOT_CONFIGURED); // fatal error
return;
} else if (rc < 0) {
// Error communicating with the executor
crm_err("Could not register resource '%s' with executor: %s "
CRM_XS " rc=%d",
ID(xml_rsc), pcmk_strerror(rc), rc);
crm_log_xml_warn(input->msg, "failed registration");
synthesize_lrmd_failure(lrm_state, input->xml, PCMK_LRM_OP_ERROR,
PCMK_OCF_INVALID_PARAM); // hard error
return;
}
if (safe_str_eq(operation, CRMD_ACTION_CANCEL)) {
if (!do_lrm_cancel(input, lrm_state, rsc, from_host, from_sys)) {
crm_log_xml_warn(input->xml, "Bad command");
}
} else if (safe_str_eq(operation, CRMD_ACTION_DELETE)) {
do_lrm_delete(input, lrm_state, rsc, from_sys, from_host,
crm_rsc_delete, user_name);
} else {
do_lrm_rsc_op(lrm_state, rsc, operation, input->xml);
}
lrmd_free_rsc_info(rsc);
} else {
crm_err("Cannot perform operation %s of unknown type", crm_str(crm_op));
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
}
#if ENABLE_VERSIONED_ATTRS
static void
resolve_versioned_parameters(lrm_state_t *lrm_state, const char *rsc_id,
const xmlNode *rsc_op, GHashTable *params)
{
/* Resource info *should* already be cached, so we don't get
* executor call */
lrmd_rsc_info_t *rsc = lrm_state_get_rsc_info(lrm_state, rsc_id, 0);
struct ra_metadata_s *metadata;
metadata = metadata_cache_get(lrm_state->metadata_cache, rsc);
if (metadata) {
xmlNode *versioned_attrs = NULL;
GHashTable *hash = NULL;
char *key = NULL;
char *value = NULL;
GHashTableIter iter;
versioned_attrs = first_named_child(rsc_op, XML_TAG_OP_VER_ATTRS);
hash = pe_unpack_versioned_parameters(versioned_attrs, metadata->ra_version);
g_hash_table_iter_init(&iter, hash);
while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) {
g_hash_table_iter_steal(&iter);
g_hash_table_replace(params, key, value);
}
g_hash_table_destroy(hash);
versioned_attrs = first_named_child(rsc_op, XML_TAG_OP_VER_META);
hash = pe_unpack_versioned_parameters(versioned_attrs, metadata->ra_version);
g_hash_table_iter_init(&iter, hash);
while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) {
g_hash_table_replace(params, crm_meta_name(key), strdup(value));
if (safe_str_eq(key, XML_ATTR_TIMEOUT)) {
op->timeout = crm_parse_int(value, "0");
} else if (safe_str_eq(key, XML_OP_ATTR_START_DELAY)) {
op->start_delay = crm_parse_int(value, "0");
}
}
g_hash_table_destroy(hash);
versioned_attrs = first_named_child(rsc_op, XML_TAG_RSC_VER_ATTRS);
hash = pe_unpack_versioned_parameters(versioned_attrs, metadata->ra_version);
g_hash_table_iter_init(&iter, hash);
while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) {
g_hash_table_iter_steal(&iter);
g_hash_table_replace(params, key, value);
}
g_hash_table_destroy(hash);
}
lrmd_free_rsc_info(rsc);
}
#endif
static lrmd_event_data_t *
construct_op(lrm_state_t *lrm_state, xmlNode *rsc_op, const char *rsc_id,
const char *operation)
{
lrmd_event_data_t *op = NULL;
const char *op_delay = NULL;
const char *op_timeout = NULL;
GHashTable *params = NULL;
const char *transition = NULL;
CRM_ASSERT(rsc_id && operation);
op = lrmd_new_event(rsc_id, operation, 0);
op->type = lrmd_event_exec_complete;
op->op_status = PCMK_LRM_OP_PENDING;
op->rc = -1;
op->timeout = 0;
op->start_delay = 0;
if (rsc_op == NULL) {
CRM_LOG_ASSERT(safe_str_eq(CRMD_ACTION_STOP, operation));
op->user_data = NULL;
/* the stop_all_resources() case
* by definition there is no DC (or they'd be shutting
* us down).
* So we should put our version here.
*/
op->params = crm_str_table_new();
g_hash_table_insert(op->params, strdup(XML_ATTR_CRM_VERSION), strdup(CRM_FEATURE_SET));
crm_trace("Constructed %s op for %s", operation, rsc_id);
return op;
}
params = xml2list(rsc_op);
g_hash_table_remove(params, CRM_META "_op_target_rc");
op_delay = crm_meta_value(params, XML_OP_ATTR_START_DELAY);
op->start_delay = crm_parse_int(op_delay, "0");
op_timeout = crm_meta_value(params, XML_ATTR_TIMEOUT);
op->timeout = crm_parse_int(op_timeout, "0");
if (pcmk__guint_from_hash(params, CRM_META "_" XML_LRM_ATTR_INTERVAL_MS, 0,
&(op->interval_ms)) != pcmk_rc_ok) {
op->interval_ms = 0;
}
#if ENABLE_VERSIONED_ATTRS
if (lrm_state && !is_remote_lrmd_ra(NULL, NULL, rsc_id)
- && pcmk__str_none_of(op_type, CRMD_ACTION_METADATA, CRMD_ACTION_DELETE,
+ && !pcmk__str_any_of(op_type, CRMD_ACTION_METADATA, CRMD_ACTION_DELETE,
NULL)) {
resolve_versioned_parameters(lrm_state, rsc_id, rsc_op, params);
}
#endif
if (safe_str_neq(operation, RSC_STOP)) {
op->params = params;
} else {
rsc_history_t *entry = NULL;
if (lrm_state) {
entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
}
/* If we do not have stop parameters cached, use
* whatever we are given */
if (!entry || !entry->stop_params) {
op->params = params;
} else {
/* Copy the cached parameter list so that we stop the resource
* with the old attributes, not the new ones */
op->params = crm_str_table_new();
g_hash_table_foreach(params, copy_meta_keys, op->params);
g_hash_table_foreach(entry->stop_params, copy_instance_keys, op->params);
g_hash_table_destroy(params);
params = NULL;
}
}
/* sanity */
if (op->timeout <= 0) {
op->timeout = op->interval_ms;
}
if (op->start_delay < 0) {
op->start_delay = 0;
}
transition = crm_element_value(rsc_op, XML_ATTR_TRANSITION_KEY);
CRM_CHECK(transition != NULL, return op);
op->user_data = strdup(transition);
if (op->interval_ms != 0) {
if (pcmk__str_any_of(operation, CRMD_ACTION_START, CRMD_ACTION_STOP, NULL)) {
crm_err("Start and Stop actions cannot have an interval: %u",
op->interval_ms);
op->interval_ms = 0;
}
}
crm_trace("Constructed %s op for %s: interval=%u",
operation, rsc_id, op->interval_ms);
return op;
}
/*!
* \internal
* \brief Send a (synthesized) event result
*
* Reply with a synthesized event result directly, as opposed to going through
* the executor.
*
* \param[in] to_host Host to send result to
* \param[in] to_sys IPC name to send result to (NULL for transition engine)
* \param[in] rsc Type information about resource the result is for
* \param[in] op Event with result to send
* \param[in] rsc_id ID of resource the result is for
*/
void
controld_ack_event_directly(const char *to_host, const char *to_sys,
lrmd_rsc_info_t *rsc, lrmd_event_data_t *op,
const char *rsc_id)
{
xmlNode *reply = NULL;
xmlNode *update, *iter;
crm_node_t *peer = NULL;
CRM_CHECK(op != NULL, return);
if (op->rsc_id == NULL) {
CRM_ASSERT(rsc_id != NULL);
op->rsc_id = strdup(rsc_id);
}
if (to_sys == NULL) {
to_sys = CRM_SYSTEM_TENGINE;
}
peer = crm_get_peer(0, fsa_our_uname);
update = create_node_state_update(peer, node_update_none, NULL,
__FUNCTION__);
iter = create_xml_node(update, XML_CIB_TAG_LRM);
crm_xml_add(iter, XML_ATTR_ID, fsa_our_uuid);
iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES);
iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE);
crm_xml_add(iter, XML_ATTR_ID, op->rsc_id);
build_operation_update(iter, rsc, op, fsa_our_uname, __FUNCTION__);
reply = create_request(CRM_OP_INVOKE_LRM, update, to_host, to_sys, CRM_SYSTEM_LRMD, NULL);
crm_log_xml_trace(update, "[direct ACK]");
crm_debug("ACK'ing resource op " PCMK__OP_FMT " from %s: %s",
op->rsc_id, op->op_type, op->interval_ms, op->user_data,
crm_element_value(reply, XML_ATTR_REFERENCE));
if (relay_message(reply, TRUE) == FALSE) {
crm_log_xml_err(reply, "Unable to route reply");
}
free_xml(update);
free_xml(reply);
}
gboolean
verify_stopped(enum crmd_fsa_state cur_state, int log_level)
{
gboolean res = TRUE;
GList *lrm_state_list = lrm_state_get_list();
GList *state_entry;
for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) {
lrm_state_t *lrm_state = state_entry->data;
if (!lrm_state_verify_stopped(lrm_state, cur_state, log_level)) {
/* keep iterating through all even when false is returned */
res = FALSE;
}
}
set_bit(fsa_input_register, R_SENT_RSC_STOP);
g_list_free(lrm_state_list); lrm_state_list = NULL;
return res;
}
struct stop_recurring_action_s {
lrmd_rsc_info_t *rsc;
lrm_state_t *lrm_state;
};
static gboolean
stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data)
{
gboolean remove = FALSE;
struct stop_recurring_action_s *event = user_data;
active_op_t *op = value;
if ((op->interval_ms != 0)
&& crm_str_eq(op->rsc_id, event->rsc->id, TRUE)) {
crm_debug("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (char*)key);
remove = !cancel_op(event->lrm_state, event->rsc->id, key, op->call_id, FALSE);
}
return remove;
}
static gboolean
stop_recurring_actions(gpointer key, gpointer value, gpointer user_data)
{
gboolean remove = FALSE;
lrm_state_t *lrm_state = user_data;
active_op_t *op = value;
if (op->interval_ms != 0) {
crm_info("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id,
(const char *) key);
remove = !cancel_op(lrm_state, op->rsc_id, key, op->call_id, FALSE);
}
return remove;
}
static void
record_pending_op(const char *node_name, lrmd_rsc_info_t *rsc, lrmd_event_data_t *op)
{
const char *record_pending = NULL;
CRM_CHECK(node_name != NULL, return);
CRM_CHECK(rsc != NULL, return);
CRM_CHECK(op != NULL, return);
// Never record certain operation types as pending
if ((op->op_type == NULL) || (op->params == NULL)
|| !controld_action_is_recordable(op->op_type)) {
return;
}
// defaults to true
record_pending = crm_meta_value(op->params, XML_OP_ATTR_PENDING);
if (record_pending && !crm_is_true(record_pending)) {
return;
}
op->call_id = -1;
op->op_status = PCMK_LRM_OP_PENDING;
op->rc = PCMK_OCF_UNKNOWN;
op->t_run = time(NULL);
op->t_rcchange = op->t_run;
/* write a "pending" entry to the CIB, inhibit notification */
crm_debug("Recording pending op " PCMK__OP_FMT " on %s in the CIB",
op->rsc_id, op->op_type, op->interval_ms, node_name);
do_update_resource(node_name, rsc, op, 0);
}
static void
do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
const char *operation, xmlNode *msg)
{
int call_id = 0;
char *op_id = NULL;
lrmd_event_data_t *op = NULL;
lrmd_key_value_t *params = NULL;
fsa_data_t *msg_data = NULL;
const char *transition = NULL;
gboolean stop_recurring = FALSE;
bool send_nack = FALSE;
CRM_CHECK(rsc != NULL, return);
CRM_CHECK(operation != NULL, return);
if (msg != NULL) {
transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY);
if (transition == NULL) {
crm_log_xml_err(msg, "Missing transition number");
}
}
op = construct_op(lrm_state, msg, rsc->id, operation);
CRM_CHECK(op != NULL, return);
if (is_remote_lrmd_ra(NULL, NULL, rsc->id)
&& (op->interval_ms == 0)
&& strcmp(operation, CRMD_ACTION_MIGRATE) == 0) {
/* pcmk remote connections are a special use case.
* We never ever want to stop monitoring a connection resource until
* the entire migration has completed. If the connection is unexpectedly
* severed, even during a migration, this is an event we must detect.*/
stop_recurring = FALSE;
} else if ((op->interval_ms == 0)
&& strcmp(operation, CRMD_ACTION_STATUS) != 0
&& strcmp(operation, CRMD_ACTION_NOTIFY) != 0) {
/* stop any previous monitor operations before changing the resource state */
stop_recurring = TRUE;
}
if (stop_recurring == TRUE) {
guint removed = 0;
struct stop_recurring_action_s data;
data.rsc = rsc;
data.lrm_state = lrm_state;
removed = g_hash_table_foreach_remove(
lrm_state->pending_ops, stop_recurring_action_by_rsc, &data);
if (removed) {
crm_debug("Stopped %u recurring operation%s in preparation for "
PCMK__OP_FMT, removed, pcmk__plural_s(removed),
rsc->id, operation, op->interval_ms);
}
}
/* now do the op */
crm_notice("Requesting local execution of %s operation for %s on %s "
CRM_XS " transition_key=%s op_key=" PCMK__OP_FMT,
crm_action_str(op->op_type, op->interval_ms), rsc->id, lrm_state->node_name,
transition, rsc->id, operation, op->interval_ms);
if (is_set(fsa_input_register, R_SHUTDOWN) && safe_str_eq(operation, RSC_START)) {
register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL);
send_nack = TRUE;
} else if (fsa_state != S_NOT_DC
&& fsa_state != S_POLICY_ENGINE /* Recalculating */
&& fsa_state != S_TRANSITION_ENGINE
&& safe_str_neq(operation, CRMD_ACTION_STOP)) {
send_nack = TRUE;
}
if(send_nack) {
crm_notice("Discarding attempt to perform action %s on %s in state %s (shutdown=%s)",
operation, rsc->id, fsa_state2string(fsa_state),
is_set(fsa_input_register, R_SHUTDOWN)?"true":"false");
op->rc = PCMK_OCF_UNKNOWN_ERROR;
op->op_status = PCMK_LRM_OP_INVALID;
controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id);
lrmd_free_event(op);
free(op_id);
return;
}
record_pending_op(lrm_state->node_name, rsc, op);
op_id = pcmk__op_key(rsc->id, op->op_type, op->interval_ms);
if (op->interval_ms > 0) {
/* cancel it so we can then restart it without conflict */
cancel_op_key(lrm_state, rsc, op_id, FALSE);
}
if (op->params) {
char *key = NULL;
char *value = NULL;
GHashTableIter iter;
g_hash_table_iter_init(&iter, op->params);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
params = lrmd_key_value_add(params, key, value);
}
}
call_id = lrm_state_exec(lrm_state, rsc->id, op->op_type, op->user_data,
op->interval_ms, op->timeout, op->start_delay,
params);
if (call_id <= 0 && lrm_state_is_local(lrm_state)) {
crm_err("Operation %s on %s failed: %d", operation, rsc->id, call_id);
register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
} else if (call_id <= 0) {
crm_err("Operation %s on resource %s failed to execute on remote node %s: %d",
operation, rsc->id, lrm_state->node_name, call_id);
fake_op_status(lrm_state, op, PCMK_LRM_OP_DONE, PCMK_OCF_UNKNOWN_ERROR);
process_lrm_event(lrm_state, op, NULL, NULL);
} else {
/* record all operations so we can wait
* for them to complete during shutdown
*/
char *call_id_s = make_stop_id(rsc->id, call_id);
active_op_t *pending = NULL;
pending = calloc(1, sizeof(active_op_t));
crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s);
pending->call_id = call_id;
pending->interval_ms = op->interval_ms;
pending->op_type = strdup(operation);
pending->op_key = strdup(op_id);
pending->rsc_id = strdup(rsc->id);
pending->start_time = time(NULL);
pending->user_data = op->user_data? strdup(op->user_data) : NULL;
if (crm_element_value_epoch(msg, XML_CONFIG_ATTR_SHUTDOWN_LOCK,
&(pending->lock_time)) != pcmk_ok) {
pending->lock_time = 0;
}
g_hash_table_replace(lrm_state->pending_ops, call_id_s, pending);
if ((op->interval_ms > 0)
&& (op->start_delay > START_DELAY_THRESHOLD)) {
int target_rc = 0;
crm_info("Faking confirmation of %s: execution postponed for over 5 minutes", op_id);
decode_transition_key(op->user_data, NULL, NULL, NULL, &target_rc);
op->rc = target_rc;
op->op_status = PCMK_LRM_OP_DONE;
controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id);
}
pending->params = op->params;
op->params = NULL;
}
free(op_id);
lrmd_free_event(op);
return;
}
int last_resource_update = 0;
static void
cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
switch (rc) {
case pcmk_ok:
case -pcmk_err_diff_failed:
case -pcmk_err_diff_resync:
crm_trace("Resource update %d complete: rc=%d", call_id, rc);
break;
default:
crm_warn("Resource update %d failed: (rc=%d) %s", call_id, rc, pcmk_strerror(rc));
}
if (call_id == last_resource_update) {
last_resource_update = 0;
trigger_fsa(fsa_source);
}
}
/* Only successful stops, and probes that found the resource inactive, get locks
* recorded in the history. This ensures the resource stays locked to the node
* until it is active there again after the node comes back up.
*/
static bool
should_preserve_lock(lrmd_event_data_t *op)
{
if (!controld_shutdown_lock_enabled) {
return false;
}
if (!strcmp(op->op_type, RSC_STOP) && (op->rc == PCMK_OCF_OK)) {
return true;
}
if (!strcmp(op->op_type, RSC_STATUS) && (op->rc == PCMK_OCF_NOT_RUNNING)) {
return true;
}
return false;
}
static int
do_update_resource(const char *node_name, lrmd_rsc_info_t *rsc,
lrmd_event_data_t *op, time_t lock_time)
{
/*
<status>
<nodes_status id=uname>
<lrm>
<lrm_resources>
<lrm_resource id=...>
</...>
*/
int rc = pcmk_ok;
xmlNode *update, *iter = NULL;
int call_opt = crmd_cib_smart_opt();
const char *uuid = NULL;
CRM_CHECK(op != NULL, return 0);
iter = create_xml_node(iter, XML_CIB_TAG_STATUS);
update = iter;
iter = create_xml_node(iter, XML_CIB_TAG_STATE);
if (safe_str_eq(node_name, fsa_our_uname)) {
uuid = fsa_our_uuid;
} else {
/* remote nodes uuid and uname are equal */
uuid = node_name;
crm_xml_add(iter, XML_NODE_IS_REMOTE, "true");
}
CRM_LOG_ASSERT(uuid != NULL);
if(uuid == NULL) {
rc = -EINVAL;
goto done;
}
crm_xml_add(iter, XML_ATTR_UUID, uuid);
crm_xml_add(iter, XML_ATTR_UNAME, node_name);
crm_xml_add(iter, XML_ATTR_ORIGIN, __FUNCTION__);
iter = create_xml_node(iter, XML_CIB_TAG_LRM);
crm_xml_add(iter, XML_ATTR_ID, uuid);
iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES);
iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE);
crm_xml_add(iter, XML_ATTR_ID, op->rsc_id);
build_operation_update(iter, rsc, op, node_name, __FUNCTION__);
if (rsc) {
const char *container = NULL;
crm_xml_add(iter, XML_ATTR_TYPE, rsc->type);
crm_xml_add(iter, XML_AGENT_ATTR_CLASS, rsc->standard);
crm_xml_add(iter, XML_AGENT_ATTR_PROVIDER, rsc->provider);
if (lock_time != 0) {
/* Actions on a locked resource should either preserve the lock by
* recording it with the action result, or clear it.
*/
if (!should_preserve_lock(op)) {
lock_time = 0;
}
crm_xml_add_ll(iter, XML_CONFIG_ATTR_SHUTDOWN_LOCK,
(long long) lock_time);
}
if (op->params) {
container = g_hash_table_lookup(op->params, CRM_META"_"XML_RSC_ATTR_CONTAINER);
}
if (container) {
crm_trace("Resource %s is a part of container resource %s", op->rsc_id, container);
crm_xml_add(iter, XML_RSC_ATTR_CONTAINER, container);
}
} else {
crm_warn("Resource %s no longer exists in the executor", op->rsc_id);
controld_ack_event_directly(NULL, NULL, rsc, op, op->rsc_id);
goto cleanup;
}
crm_log_xml_trace(update, __FUNCTION__);
/* make it an asynchronous call and be done with it
*
* Best case:
* the resource state will be discovered during
* the next signup or election.
*
* Bad case:
* we are shutting down and there is no DC at the time,
* but then why were we shutting down then anyway?
* (probably because of an internal error)
*
* Worst case:
* we get shot for having resources "running" that really weren't
*
* the alternative however means blocking here for too long, which
* isn't acceptable
*/
fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, rc, NULL);
if (rc > 0) {
last_resource_update = rc;
}
done:
/* the return code is a call number, not an error code */
crm_trace("Sent resource state update message: %d for %s=%u on %s",
rc, op->op_type, op->interval_ms, op->rsc_id);
fsa_register_cib_callback(rc, FALSE, NULL, cib_rsc_callback);
cleanup:
free_xml(update);
return rc;
}
void
do_lrm_event(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data)
{
CRM_CHECK(FALSE, return);
}
static char *
unescape_newlines(const char *string)
{
char *pch = NULL;
char *ret = NULL;
static const char *escaped_newline = "\\n";
if (!string) {
return NULL;
}
ret = strdup(string);
pch = strstr(ret, escaped_newline);
while (pch != NULL) {
/* Replace newline escape pattern with actual newline (and a space so we
* don't have to shuffle the rest of the buffer)
*/
pch[0] = '\n';
pch[1] = ' ';
pch = strstr(pch, escaped_newline);
}
return ret;
}
static bool
did_lrm_rsc_op_fail(lrm_state_t *lrm_state, const char * rsc_id,
const char * op_type, guint interval_ms)
{
rsc_history_t *entry = NULL;
CRM_CHECK(lrm_state != NULL, return FALSE);
CRM_CHECK(rsc_id != NULL, return FALSE);
CRM_CHECK(op_type != NULL, return FALSE);
entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
if (entry == NULL || entry->failed == NULL) {
return FALSE;
}
if (crm_str_eq(entry->failed->rsc_id, rsc_id, TRUE)
&& safe_str_eq(entry->failed->op_type, op_type)
&& entry->failed->interval_ms == interval_ms) {
return TRUE;
}
return FALSE;
}
void
process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op,
active_op_t *pending, xmlNode *action_xml)
{
char *op_id = NULL;
char *op_key = NULL;
int update_id = 0;
gboolean remove = FALSE;
gboolean removed = FALSE;
bool need_direct_ack = FALSE;
lrmd_rsc_info_t *rsc = NULL;
const char *node_name = NULL;
CRM_CHECK(op != NULL, return);
CRM_CHECK(op->rsc_id != NULL, return);
// Remap new status codes for older DCs
if (compare_version(fsa_our_dc_version, "3.2.0") < 0) {
switch (op->op_status) {
case PCMK_LRM_OP_NOT_CONNECTED:
op->op_status = PCMK_LRM_OP_ERROR;
op->rc = PCMK_OCF_CONNECTION_DIED;
break;
case PCMK_LRM_OP_INVALID:
op->op_status = PCMK_LRM_OP_ERROR;
op->rc = CRM_DIRECT_NACK_RC;
break;
default:
break;
}
}
op_id = make_stop_id(op->rsc_id, op->call_id);
op_key = pcmk__op_key(op->rsc_id, op->op_type, op->interval_ms);
// Get resource info if available (from executor state or action XML)
if (lrm_state) {
rsc = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0);
}
if ((rsc == NULL) && action_xml) {
xmlNode *xml = find_xml_node(action_xml, XML_CIB_TAG_RESOURCE, TRUE);
const char *standard = crm_element_value(xml, XML_AGENT_ATTR_CLASS);
const char *provider = crm_element_value(xml, XML_AGENT_ATTR_PROVIDER);
const char *type = crm_element_value(xml, XML_ATTR_TYPE);
if (standard && type) {
crm_info("%s agent information not cached, using %s%s%s:%s from action XML",
op->rsc_id, standard,
(provider? ":" : ""), (provider? provider : ""), type);
rsc = lrmd_new_rsc_info(op->rsc_id, standard, provider, type);
} else {
crm_err("Can't process %s result because %s agent information not cached or in XML",
op_key, op->rsc_id);
}
}
// Get node name if available (from executor state or action XML)
if (lrm_state) {
node_name = lrm_state->node_name;
} else if (action_xml) {
node_name = crm_element_value(action_xml, XML_LRM_ATTR_TARGET);
}
if(pending == NULL) {
remove = TRUE;
if (lrm_state) {
pending = g_hash_table_lookup(lrm_state->pending_ops, op_id);
}
}
if (op->op_status == PCMK_LRM_OP_ERROR) {
switch(op->rc) {
case PCMK_OCF_NOT_RUNNING:
case PCMK_OCF_RUNNING_MASTER:
case PCMK_OCF_DEGRADED:
case PCMK_OCF_DEGRADED_MASTER:
// Leave it to the TE/scheduler to decide if this is an error
op->op_status = PCMK_LRM_OP_DONE;
break;
default:
/* Nothing to do */
break;
}
}
if (op->op_status != PCMK_LRM_OP_CANCELLED) {
/* We might not record the result, so directly acknowledge it to the
* originator instead, so it doesn't time out waiting for the result
* (especially important if part of a transition).
*/
need_direct_ack = TRUE;
if (controld_action_is_recordable(op->op_type)) {
if (node_name && rsc) {
// We should record the result, and happily, we can
update_id = do_update_resource(node_name, rsc, op,
pending? pending->lock_time : 0);
need_direct_ack = FALSE;
} else if (op->rsc_deleted) {
/* We shouldn't record the result (likely the resource was
* refreshed, cleaned, or removed while this operation was
* in flight).
*/
crm_notice("Not recording %s result in CIB because "
"resource information was removed since it was initiated",
op_key);
} else {
/* This shouldn't be possible; the executor didn't consider the
* resource deleted, but we couldn't find resource or node
* information.
*/
crm_err("Unable to record %s result in CIB: %s", op_key,
(node_name? "No resource information" : "No node name"));
}
}
} else if (op->interval_ms == 0) {
/* A non-recurring operation was cancelled. Most likely, the
* never-initiated action was removed from the executor's pending
* operations list upon resource removal.
*/
need_direct_ack = TRUE;
} else if (pending == NULL) {
/* This recurring operation was cancelled, but was not pending. No
* transition actions are waiting on it, nothing needs to be done.
*/
} else if (op->user_data == NULL) {
/* This recurring operation was cancelled and pending, but we don't
* have a transition key. This should never happen.
*/
crm_err("Recurring operation %s was cancelled without transition information",
op_key);
} else if (is_set(pending->flags, active_op_remove)) {
/* This recurring operation was cancelled (by us) and pending, and we
* have been waiting for it to finish.
*/
if (lrm_state) {
erase_lrm_history_by_op(lrm_state, op);
}
/* If the recurring operation had failed, the lrm_rsc_op is recorded as
* "last_failure" which won't get erased from the cib given the logic on
* purpose in erase_lrm_history_by_op(). So that the cancel action won't
* have a chance to get confirmed by DC with process_op_deletion().
* Cluster transition would get stuck waiting for the remaining action
* timer to time out.
*
* Directly acknowledge the cancel operation in this case.
*/
if (did_lrm_rsc_op_fail(lrm_state, pending->rsc_id,
pending->op_type, pending->interval_ms)) {
need_direct_ack = TRUE;
}
} else if (op->rsc_deleted) {
/* This recurring operation was cancelled (but not by us, and the
* executor does not have resource information, likely due to resource
* cleanup, refresh, or removal) and pending.
*/
crm_debug("Recurring op %s was cancelled due to resource deletion",
op_key);
need_direct_ack = TRUE;
} else {
/* This recurring operation was cancelled (but not by us, likely by the
* executor before stopping the resource) and pending. We don't need to
* do anything special.
*/
}
if (need_direct_ack) {
controld_ack_event_directly(NULL, NULL, NULL, op, op->rsc_id);
}
if(remove == FALSE) {
/* The caller will do this afterwards, but keep the logging consistent */
removed = TRUE;
} else if (lrm_state && ((op->interval_ms == 0)
|| (op->op_status == PCMK_LRM_OP_CANCELLED))) {
gboolean found = g_hash_table_remove(lrm_state->pending_ops, op_id);
if (op->interval_ms != 0) {
removed = TRUE;
} else if (found) {
removed = TRUE;
crm_trace("Op %s (call=%d, stop-id=%s, remaining=%u): Confirmed",
op_key, op->call_id, op_id,
g_hash_table_size(lrm_state->pending_ops));
}
}
if (node_name == NULL) {
node_name = "unknown node"; // for logging
}
switch (op->op_status) {
case PCMK_LRM_OP_CANCELLED:
crm_info("Result of %s operation for %s on %s: %s "
CRM_XS " call=%d key=%s confirmed=%s",
crm_action_str(op->op_type, op->interval_ms),
op->rsc_id, node_name,
services_lrm_status_str(op->op_status),
op->call_id, op_key, (removed? "true" : "false"));
break;
case PCMK_LRM_OP_DONE:
crm_notice("Result of %s operation for %s on %s: %s "
CRM_XS " rc=%d call=%d key=%s confirmed=%s cib-update=%d",
crm_action_str(op->op_type, op->interval_ms),
op->rsc_id, node_name,
services_ocf_exitcode_str(op->rc), op->rc,
op->call_id, op_key, (removed? "true" : "false"),
update_id);
break;
case PCMK_LRM_OP_TIMEOUT:
crm_err("Result of %s operation for %s on %s: %s "
CRM_XS " call=%d key=%s timeout=%dms",
crm_action_str(op->op_type, op->interval_ms),
op->rsc_id, node_name,
services_lrm_status_str(op->op_status),
op->call_id, op_key, op->timeout);
break;
default:
crm_err("Result of %s operation for %s on %s: %s "
CRM_XS " call=%d key=%s confirmed=%s status=%d cib-update=%d",
crm_action_str(op->op_type, op->interval_ms),
op->rsc_id, node_name,
services_lrm_status_str(op->op_status), op->call_id, op_key,
(removed? "true" : "false"), op->op_status, update_id);
}
if (op->output) {
char *prefix =
crm_strdup_printf("%s-" PCMK__OP_FMT ":%d", node_name,
op->rsc_id, op->op_type, op->interval_ms,
op->call_id);
if (op->rc) {
crm_log_output(LOG_NOTICE, prefix, op->output);
} else {
crm_log_output(LOG_DEBUG, prefix, op->output);
}
free(prefix);
}
if (lrm_state) {
if (safe_str_neq(op->op_type, RSC_METADATA)) {
crmd_alert_resource_op(lrm_state->node_name, op);
} else if (rsc && (op->rc == PCMK_OCF_OK)) {
char *metadata = unescape_newlines(op->output);
metadata_cache_update(lrm_state->metadata_cache, rsc, metadata);
free(metadata);
}
}
if (op->rsc_deleted) {
crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key);
if (lrm_state) {
delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL);
}
}
/* If a shutdown was escalated while operations were pending,
* then the FSA will be stalled right now... allow it to continue
*/
mainloop_set_trigger(fsa_source);
if (lrm_state && rsc) {
update_history_cache(lrm_state, rsc, op);
}
lrmd_free_rsc_info(rsc);
free(op_key);
free(op_id);
}
diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c
index 95f9176deb..7b4901f61f 100644
--- a/daemons/controld/controld_messages.c
+++ b/daemons/controld/controld_messages.c
@@ -1,1291 +1,1291 @@
/*
* Copyright 2004-2020 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <string.h>
#include <time.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/cluster/internal.h>
#include <crm/cib.h>
#include <crm/common/ipc_internal.h>
#include <pacemaker-controld.h>
GListPtr fsa_message_queue = NULL;
extern void crm_shutdown(int nsig);
static enum crmd_fsa_input handle_message(xmlNode *msg,
enum crmd_fsa_cause cause);
static void handle_response(xmlNode *stored_msg);
static enum crmd_fsa_input handle_request(xmlNode *stored_msg,
enum crmd_fsa_cause cause);
static enum crmd_fsa_input handle_shutdown_request(xmlNode *stored_msg);
static void send_msg_via_ipc(xmlNode * msg, const char *sys);
/* debug only, can wrap all it likes */
int last_data_id = 0;
void
register_fsa_error_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input,
fsa_data_t * cur_data, void *new_data, const char *raised_from)
{
/* save the current actions if any */
if (fsa_actions != A_NOTHING) {
register_fsa_input_adv(cur_data ? cur_data->fsa_cause : C_FSA_INTERNAL,
I_NULL, cur_data ? cur_data->data : NULL,
fsa_actions, TRUE, __FUNCTION__);
}
/* reset the action list */
crm_info("Resetting the current action list");
fsa_dump_actions(fsa_actions, "Drop");
fsa_actions = A_NOTHING;
/* register the error */
register_fsa_input_adv(cause, input, new_data, A_NOTHING, TRUE, raised_from);
}
int
register_fsa_input_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input,
void *data, long long with_actions,
gboolean prepend, const char *raised_from)
{
unsigned old_len = g_list_length(fsa_message_queue);
fsa_data_t *fsa_data = NULL;
if (raised_from == NULL) {
raised_from = "<unknown>";
}
if (input == I_NULL && with_actions == A_NOTHING /* && data == NULL */ ) {
/* no point doing anything */
crm_err("Cannot add entry to queue: no input and no action");
return 0;
}
if (input == I_WAIT_FOR_EVENT) {
do_fsa_stall = TRUE;
crm_debug("Stalling the FSA pending further input: source=%s cause=%s data=%p queue=%d",
raised_from, fsa_cause2string(cause), data, old_len);
if (old_len > 0) {
fsa_dump_queue(LOG_TRACE);
prepend = FALSE;
}
if (data == NULL) {
fsa_actions |= with_actions;
fsa_dump_actions(with_actions, "Restored");
return 0;
}
/* Store everything in the new event and reset fsa_actions */
with_actions |= fsa_actions;
fsa_actions = A_NOTHING;
}
last_data_id++;
crm_trace("%s %s FSA input %d (%s) due to %s, %s data",
raised_from, (prepend? "prepended" : "appended"), last_data_id,
fsa_input2string(input), fsa_cause2string(cause),
(data? "with" : "without"));
fsa_data = calloc(1, sizeof(fsa_data_t));
fsa_data->id = last_data_id;
fsa_data->fsa_input = input;
fsa_data->fsa_cause = cause;
fsa_data->origin = raised_from;
fsa_data->data = NULL;
fsa_data->data_type = fsa_dt_none;
fsa_data->actions = with_actions;
if (with_actions != A_NOTHING) {
crm_trace("Adding actions %.16llx to input", with_actions);
}
if (data != NULL) {
switch (cause) {
case C_FSA_INTERNAL:
case C_CRMD_STATUS_CALLBACK:
case C_IPC_MESSAGE:
case C_HA_MESSAGE:
CRM_CHECK(((ha_msg_input_t *) data)->msg != NULL,
crm_err("Bogus data from %s", raised_from));
crm_trace("Copying %s data from %s as cluster message data",
fsa_cause2string(cause), raised_from);
fsa_data->data = copy_ha_msg_input(data);
fsa_data->data_type = fsa_dt_ha_msg;
break;
case C_LRM_OP_CALLBACK:
crm_trace("Copying %s data from %s as lrmd_event_data_t",
fsa_cause2string(cause), raised_from);
fsa_data->data = lrmd_copy_event((lrmd_event_data_t *) data);
fsa_data->data_type = fsa_dt_lrm;
break;
case C_TIMER_POPPED:
case C_SHUTDOWN:
case C_UNKNOWN:
case C_STARTUP:
crm_crit("Copying %s data (from %s) is not yet implemented",
fsa_cause2string(cause), raised_from);
crmd_exit(CRM_EX_SOFTWARE);
break;
}
}
/* make sure to free it properly later */
if (prepend) {
fsa_message_queue = g_list_prepend(fsa_message_queue, fsa_data);
} else {
fsa_message_queue = g_list_append(fsa_message_queue, fsa_data);
}
crm_trace("FSA message queue length is %d",
g_list_length(fsa_message_queue));
/* fsa_dump_queue(LOG_TRACE); */
if (old_len == g_list_length(fsa_message_queue)) {
crm_err("Couldn't add message to the queue");
}
if (fsa_source && input != I_WAIT_FOR_EVENT) {
crm_trace("Triggering FSA");
mainloop_set_trigger(fsa_source);
}
return last_data_id;
}
void
fsa_dump_queue(int log_level)
{
int offset = 0;
GListPtr lpc = NULL;
for (lpc = fsa_message_queue; lpc != NULL; lpc = lpc->next) {
fsa_data_t *data = (fsa_data_t *) lpc->data;
do_crm_log_unlikely(log_level,
"queue[%d.%d]: input %s raised by %s(%p.%d)\t(cause=%s)",
offset++, data->id, fsa_input2string(data->fsa_input),
data->origin, data->data, data->data_type,
fsa_cause2string(data->fsa_cause));
}
}
ha_msg_input_t *
copy_ha_msg_input(ha_msg_input_t * orig)
{
ha_msg_input_t *copy = calloc(1, sizeof(ha_msg_input_t));
CRM_ASSERT(copy != NULL);
copy->msg = (orig && orig->msg)? copy_xml(orig->msg) : NULL;
copy->xml = get_message_xml(copy->msg, F_CRM_DATA);
return copy;
}
void
delete_fsa_input(fsa_data_t * fsa_data)
{
lrmd_event_data_t *op = NULL;
xmlNode *foo = NULL;
if (fsa_data == NULL) {
return;
}
crm_trace("About to free %s data", fsa_cause2string(fsa_data->fsa_cause));
if (fsa_data->data != NULL) {
switch (fsa_data->data_type) {
case fsa_dt_ha_msg:
delete_ha_msg_input(fsa_data->data);
break;
case fsa_dt_xml:
foo = fsa_data->data;
free_xml(foo);
break;
case fsa_dt_lrm:
op = (lrmd_event_data_t *) fsa_data->data;
lrmd_free_event(op);
break;
case fsa_dt_none:
if (fsa_data->data != NULL) {
crm_err("Don't know how to free %s data from %s",
fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
crmd_exit(CRM_EX_SOFTWARE);
}
break;
}
crm_trace("%s data freed", fsa_cause2string(fsa_data->fsa_cause));
}
free(fsa_data);
}
/* returns the next message */
fsa_data_t *
get_message(void)
{
fsa_data_t *message = g_list_nth_data(fsa_message_queue, 0);
fsa_message_queue = g_list_remove(fsa_message_queue, message);
crm_trace("Processing input %d", message->id);
return message;
}
void *
fsa_typed_data_adv(fsa_data_t * fsa_data, enum fsa_data_type a_type, const char *caller)
{
void *ret_val = NULL;
if (fsa_data == NULL) {
crm_err("%s: No FSA data available", caller);
} else if (fsa_data->data == NULL) {
crm_err("%s: No message data available. Origin: %s", caller, fsa_data->origin);
} else if (fsa_data->data_type != a_type) {
crm_crit("%s: Message data was the wrong type! %d vs. requested=%d. Origin: %s",
caller, fsa_data->data_type, a_type, fsa_data->origin);
CRM_ASSERT(fsa_data->data_type == a_type);
} else {
ret_val = fsa_data->data;
}
return ret_val;
}
/* A_MSG_ROUTE */
void
do_msg_route(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
route_message(msg_data->fsa_cause, input->msg);
}
void
route_message(enum crmd_fsa_cause cause, xmlNode * input)
{
ha_msg_input_t fsa_input;
enum crmd_fsa_input result = I_NULL;
fsa_input.msg = input;
CRM_CHECK(cause == C_IPC_MESSAGE || cause == C_HA_MESSAGE, return);
/* try passing the buck first */
if (relay_message(input, cause == C_IPC_MESSAGE)) {
return;
}
/* handle locally */
result = handle_message(input, cause);
/* done or process later? */
switch (result) {
case I_NULL:
case I_CIB_OP:
case I_ROUTER:
case I_NODE_JOIN:
case I_JOIN_REQUEST:
case I_JOIN_RESULT:
break;
default:
/* Defering local processing of message */
register_fsa_input_later(cause, result, &fsa_input);
return;
}
if (result != I_NULL) {
/* add to the front of the queue */
register_fsa_input(cause, result, &fsa_input);
}
}
gboolean
relay_message(xmlNode * msg, gboolean originated_locally)
{
int dest = 1;
int is_for_dc = 0;
int is_for_dcib = 0;
int is_for_te = 0;
int is_for_crm = 0;
int is_for_cib = 0;
int is_local = 0;
const char *host_to = crm_element_value(msg, F_CRM_HOST_TO);
const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO);
const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM);
const char *type = crm_element_value(msg, F_TYPE);
const char *task = crm_element_value(msg, F_CRM_TASK);
const char *ref = crm_element_value(msg, XML_ATTR_REFERENCE);
if (ref == NULL) {
ref = "without reference ID";
}
if (msg == NULL) {
crm_warn("Cannot route empty message");
return TRUE;
} else if (safe_str_eq(task, CRM_OP_HELLO)) {
/* quietly ignore */
crm_trace("No routing needed for hello message %s", ref);
return TRUE;
} else if (safe_str_neq(type, T_CRM)) {
crm_warn("Cannot route message %s: Type is '%s' not '" T_CRM "'",
ref, (type? type : "missing"));
crm_log_xml_warn(msg, "[bad message type]");
return TRUE;
} else if (sys_to == NULL) {
crm_warn("Cannot route message %s: No subsystem specified", ref);
crm_log_xml_warn(msg, "[no subsystem]");
return TRUE;
}
is_for_dc = (strcasecmp(CRM_SYSTEM_DC, sys_to) == 0);
is_for_dcib = (strcasecmp(CRM_SYSTEM_DCIB, sys_to) == 0);
is_for_te = (strcasecmp(CRM_SYSTEM_TENGINE, sys_to) == 0);
is_for_cib = (strcasecmp(CRM_SYSTEM_CIB, sys_to) == 0);
is_for_crm = (strcasecmp(CRM_SYSTEM_CRMD, sys_to) == 0);
is_local = 0;
if (pcmk__str_empty(host_to)) {
if (is_for_dc || is_for_te) {
is_local = 0;
} else if (is_for_crm) {
if (pcmk__str_any_of(task, CRM_OP_NODE_INFO,
PCMK__CONTROLD_CMD_NODES, NULL)) {
/* Node info requests do not specify a host, which is normally
* treated as "all hosts", because the whole point is that the
* client may not know the local node name. Always handle these
* requests locally.
*/
is_local = 1;
} else {
is_local = !originated_locally;
}
} else {
is_local = 1;
}
} else if (safe_str_eq(fsa_our_uname, host_to)) {
is_local = 1;
} else if (is_for_crm && safe_str_eq(task, CRM_OP_LRM_DELETE)) {
xmlNode *msg_data = get_message_xml(msg, F_CRM_DATA);
const char *mode = crm_element_value(msg_data, PCMK__XA_MODE);
if (safe_str_eq(mode, XML_TAG_CIB)) {
// Local delete of an offline node's resource history
is_local = 1;
}
}
if (is_for_dc || is_for_dcib || is_for_te) {
if (AM_I_DC && is_for_te) {
crm_trace("Route message %s locally as transition request", ref);
send_msg_via_ipc(msg, sys_to);
} else if (AM_I_DC) {
crm_trace("Route message %s locally as DC request", ref);
return FALSE; // More to be done by caller
- } else if (originated_locally && pcmk__str_none_of(sys_from, CRM_SYSTEM_PENGINE,
- CRM_SYSTEM_TENGINE, NULL)) {
+ } else if (originated_locally && !pcmk__str_any_of(sys_from, CRM_SYSTEM_PENGINE,
+ CRM_SYSTEM_TENGINE, NULL)) {
#if SUPPORT_COROSYNC
if (is_corosync_cluster()) {
dest = text2msg_type(sys_to);
}
#endif
crm_trace("Relay message %s to DC", ref);
send_cluster_message(host_to ? crm_get_peer(0, host_to) : NULL, dest, msg, TRUE);
} else {
/* Neither the TE nor the scheduler should be sending messages
* to DCs on other nodes. By definition, if we are no longer the DC,
* then the scheduler's or TE's data should be discarded.
*/
crm_trace("Discard message %s because we are not DC", ref);
}
} else if (is_local && (is_for_crm || is_for_cib)) {
crm_trace("Route message %s locally as controller request", ref);
return FALSE; // More to be done by caller
} else if (is_local) {
crm_trace("Relay message %s locally to %s",
ref, (sys_to? sys_to : "unknown client"));
crm_log_xml_trace(msg, "[IPC relay]");
send_msg_via_ipc(msg, sys_to);
} else {
crm_node_t *node_to = NULL;
#if SUPPORT_COROSYNC
if (is_corosync_cluster()) {
dest = text2msg_type(sys_to);
if (dest == crm_msg_none || dest > crm_msg_stonith_ng) {
dest = crm_msg_crmd;
}
}
#endif
if (host_to) {
node_to = crm_find_peer(0, host_to);
if (node_to == NULL) {
crm_warn("Cannot route message %s: Unknown node %s",
ref, host_to);
return TRUE;
}
crm_trace("Relay message %s to %s",
ref, (node_to->uname? node_to->uname : "peer"));
} else {
crm_trace("Broadcast message %s to all peers", ref);
}
send_cluster_message(host_to ? node_to : NULL, dest, msg, TRUE);
}
return TRUE; // No further processing of message is needed
}
// Return true if field contains a positive integer
static bool
authorize_version(xmlNode *message_data, const char *field,
const char *client_name, const char *ref, const char *uuid)
{
const char *version = crm_element_value(message_data, field);
if (pcmk__str_empty(version)) {
crm_warn("IPC hello from %s rejected: No protocol %s",
CRM_XS " ref=%s uuid=%s",
client_name, field, (ref? ref : "none"), uuid);
return false;
} else {
int version_num = crm_parse_int(version, NULL);
if (version_num < 0) {
crm_warn("IPC hello from %s rejected: Protocol %s '%s' "
"not recognized", CRM_XS " ref=%s uuid=%s",
client_name, field, version, (ref? ref : "none"), uuid);
return false;
}
}
return true;
}
/*!
* \internal
* \brief Check whether a client IPC message is acceptable
*
* If a given client IPC message is a hello, "authorize" it by ensuring it has
* valid information such as a protocol version, and return false indicating
* that nothing further needs to be done with the message. If the message is not
* a hello, just return true to indicate it needs further processing.
*
* \param[in] client_msg XML of IPC message
* \param[in] curr_client If IPC is not proxied, client that sent message
* \param[in] proxy_session If IPC is proxied, the session ID
*
* \return true if message needs further processing, false if it doesn't
*/
bool
controld_authorize_ipc_message(xmlNode *client_msg, pcmk__client_t *curr_client,
const char *proxy_session)
{
xmlNode *message_data = NULL;
const char *client_name = NULL;
const char *op = crm_element_value(client_msg, F_CRM_TASK);
const char *ref = crm_element_value(client_msg, XML_ATTR_REFERENCE);
const char *uuid = (curr_client? curr_client->id : proxy_session);
if (uuid == NULL) {
crm_warn("IPC message from client rejected: No client identifier "
CRM_XS " ref=%s", (ref? ref : "none"));
goto rejected;
}
if (safe_str_neq(CRM_OP_HELLO, op)) {
// Only hello messages need to be authorized
return true;
}
message_data = get_message_xml(client_msg, F_CRM_DATA);
client_name = crm_element_value(message_data, "client_name");
if (pcmk__str_empty(client_name)) {
crm_warn("IPC hello from client rejected: No client name",
CRM_XS " ref=%s uuid=%s", (ref? ref : "none"), uuid);
goto rejected;
}
if (!authorize_version(message_data, "major_version", client_name, ref,
uuid)) {
goto rejected;
}
if (!authorize_version(message_data, "minor_version", client_name, ref,
uuid)) {
goto rejected;
}
crm_trace("Validated IPC hello from client %s", client_name);
if (curr_client) {
curr_client->userdata = strdup(client_name);
}
mainloop_set_trigger(fsa_source);
return false;
rejected:
if (curr_client) {
qb_ipcs_disconnect(curr_client->ipcs);
}
return false;
}
static enum crmd_fsa_input
handle_message(xmlNode *msg, enum crmd_fsa_cause cause)
{
const char *type = NULL;
CRM_CHECK(msg != NULL, return I_NULL);
type = crm_element_value(msg, F_CRM_MSG_TYPE);
if (crm_str_eq(type, XML_ATTR_REQUEST, TRUE)) {
return handle_request(msg, cause);
} else if (crm_str_eq(type, XML_ATTR_RESPONSE, TRUE)) {
handle_response(msg);
return I_NULL;
}
crm_err("Unknown message type: %s", type);
return I_NULL;
}
static enum crmd_fsa_input
handle_failcount_op(xmlNode * stored_msg)
{
const char *rsc = NULL;
const char *uname = NULL;
const char *op = NULL;
char *interval_spec = NULL;
guint interval_ms = 0;
gboolean is_remote_node = FALSE;
xmlNode *xml_op = get_message_xml(stored_msg, F_CRM_DATA);
if (xml_op) {
xmlNode *xml_rsc = first_named_child(xml_op, XML_CIB_TAG_RESOURCE);
xmlNode *xml_attrs = first_named_child(xml_op, XML_TAG_ATTRS);
if (xml_rsc) {
rsc = ID(xml_rsc);
}
if (xml_attrs) {
op = crm_element_value(xml_attrs,
CRM_META "_" XML_RSC_ATTR_CLEAR_OP);
crm_element_value_ms(xml_attrs,
CRM_META "_" XML_RSC_ATTR_CLEAR_INTERVAL,
&interval_ms);
}
}
uname = crm_element_value(xml_op, XML_LRM_ATTR_TARGET);
if ((rsc == NULL) || (uname == NULL)) {
crm_log_xml_warn(stored_msg, "invalid failcount op");
return I_NULL;
}
if (crm_element_value(xml_op, XML_LRM_ATTR_ROUTER_NODE)) {
is_remote_node = TRUE;
}
if (interval_ms) {
interval_spec = crm_strdup_printf("%ums", interval_ms);
}
update_attrd_clear_failures(uname, rsc, op, interval_spec, is_remote_node);
free(interval_spec);
lrm_clear_last_failure(rsc, uname, op, interval_ms);
return I_NULL;
}
static enum crmd_fsa_input
handle_lrm_delete(xmlNode *stored_msg)
{
const char *mode = NULL;
xmlNode *msg_data = get_message_xml(stored_msg, F_CRM_DATA);
CRM_CHECK(msg_data != NULL, return I_NULL);
/* CRM_OP_LRM_DELETE has two distinct modes. The default behavior is to
* relay the operation to the affected node, which will unregister the
* resource from the local executor, clear the resource's history from the
* CIB, and do some bookkeeping in the controller.
*
* However, if the affected node is offline, the client will specify
* mode="cib" which means the controller receiving the operation should
* clear the resource's history from the CIB and nothing else. This is used
* to clear shutdown locks.
*/
mode = crm_element_value(msg_data, PCMK__XA_MODE);
if ((mode == NULL) || strcmp(mode, XML_TAG_CIB)) {
// Relay to affected node
crm_xml_add(stored_msg, F_CRM_SYS_TO, CRM_SYSTEM_LRMD);
return I_ROUTER;
} else {
// Delete CIB history locally (compare with do_lrm_delete())
const char *from_sys = NULL;
const char *user_name = NULL;
const char *rsc_id = NULL;
const char *node = NULL;
xmlNode *rsc_xml = NULL;
int rc = pcmk_rc_ok;
rsc_xml = first_named_child(msg_data, XML_CIB_TAG_RESOURCE);
CRM_CHECK(rsc_xml != NULL, return I_NULL);
rsc_id = ID(rsc_xml);
from_sys = crm_element_value(stored_msg, F_CRM_SYS_FROM);
node = crm_element_value(msg_data, XML_LRM_ATTR_TARGET);
#if ENABLE_ACL
user_name = pcmk__update_acl_user(stored_msg, F_CRM_USER, NULL);
#endif
crm_debug("Handling " CRM_OP_LRM_DELETE " for %s on %s locally%s%s "
"(clearing CIB resource history only)", rsc_id, node,
(user_name? " for user " : ""), (user_name? user_name : ""));
#if ENABLE_ACL
rc = controld_delete_resource_history(rsc_id, node, user_name,
cib_dryrun|cib_sync_call);
#endif
if (rc == pcmk_rc_ok) {
rc = controld_delete_resource_history(rsc_id, node, user_name,
crmd_cib_smart_opt());
}
//Notify client and tengine.(Only notify tengine if mode = "cib" and CRM_OP_LRM_DELETE.)
if (from_sys) {
lrmd_event_data_t *op = NULL;
const char *from_host = crm_element_value(stored_msg,
F_CRM_HOST_FROM);
const char *transition;
if (strcmp(from_sys, CRM_SYSTEM_TENGINE)) {
transition = crm_element_value(msg_data,
XML_ATTR_TRANSITION_KEY);
} else {
transition = crm_element_value(stored_msg,
XML_ATTR_TRANSITION_KEY);
}
crm_info("Notifying %s on %s that %s was%s deleted",
from_sys, (from_host? from_host : "local node"), rsc_id,
((rc == pcmk_rc_ok)? "" : " not"));
op = lrmd_new_event(rsc_id, CRMD_ACTION_DELETE, 0);
op->type = lrmd_event_exec_complete;
op->user_data = strdup(transition? transition : FAKE_TE_ID);
op->params = crm_str_table_new();
g_hash_table_insert(op->params, strdup(XML_ATTR_CRM_VERSION),
strdup(CRM_FEATURE_SET));
controld_rc2event(op, rc);
controld_ack_event_directly(from_host, from_sys, NULL, op, rsc_id);
lrmd_free_event(op);
controld_trigger_delete_refresh(from_sys, rsc_id);
}
return I_NULL;
}
}
/*!
* \brief Handle a CRM_OP_REMOTE_STATE message by updating remote peer cache
*
* \param[in] msg Message XML
*
* \return Next FSA input
*/
static enum crmd_fsa_input
handle_remote_state(xmlNode *msg)
{
const char *remote_uname = ID(msg);
const char *remote_is_up = crm_element_value(msg, XML_NODE_IN_CLUSTER);
crm_node_t *remote_peer;
CRM_CHECK(remote_uname && remote_is_up, return I_NULL);
remote_peer = crm_remote_peer_get(remote_uname);
CRM_CHECK(remote_peer, return I_NULL);
crm_update_peer_state(__FUNCTION__, remote_peer,
crm_is_true(remote_is_up)?
CRM_NODE_MEMBER : CRM_NODE_LOST, 0);
return I_NULL;
}
/*!
* \brief Handle a CRM_OP_PING message
*
* \param[in] msg Message XML
*
* \return Next FSA input
*/
static enum crmd_fsa_input
handle_ping(xmlNode *msg)
{
const char *value = NULL;
xmlNode *ping = NULL;
// Build reply
ping = create_xml_node(NULL, XML_CRM_TAG_PING);
value = crm_element_value(msg, F_CRM_SYS_TO);
crm_xml_add(ping, XML_PING_ATTR_SYSFROM, value);
// Add controller state
value = fsa_state2string(fsa_state);
crm_xml_add(ping, XML_PING_ATTR_CRMDSTATE, value);
crm_notice("Current ping state: %s", value); // CTS needs this
// Add controller health
// @TODO maybe do some checks to determine meaningful status
crm_xml_add(ping, XML_PING_ATTR_STATUS, "ok");
// Send reply
msg = create_reply(msg, ping);
free_xml(ping);
if (msg) {
(void) relay_message(msg, TRUE);
free_xml(msg);
}
// Nothing further to do
return I_NULL;
}
/*!
* \brief Handle a PCMK__CONTROLD_CMD_NODES message
*
* \return Next FSA input
*/
static enum crmd_fsa_input
handle_node_list(xmlNode *request)
{
GHashTableIter iter;
crm_node_t *node = NULL;
xmlNode *reply = NULL;
xmlNode *reply_data = NULL;
// Create message data for reply
reply_data = create_xml_node(NULL, XML_CIB_TAG_NODES);
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) {
xmlNode *xml = create_xml_node(reply_data, XML_CIB_TAG_NODE);
crm_xml_add_ll(xml, XML_ATTR_ID, (long long) node->id); // uint32_t
crm_xml_add(xml, XML_ATTR_UNAME, node->uname);
crm_xml_add(xml, XML_NODE_IN_CLUSTER, node->state);
}
// Create and send reply
reply = create_reply(request, reply_data);
free_xml(reply_data);
if (reply) {
(void) relay_message(reply, TRUE);
free_xml(reply);
}
// Nothing further to do
return I_NULL;
}
/*!
* \brief Handle a CRM_OP_NODE_INFO request
*
* \param[in] msg Message XML
*
* \return Next FSA input
*/
static enum crmd_fsa_input
handle_node_info_request(xmlNode *msg)
{
const char *value = NULL;
crm_node_t *node = NULL;
int node_id = 0;
xmlNode *reply = NULL;
// Build reply
reply = create_xml_node(NULL, XML_CIB_TAG_NODE);
crm_xml_add(reply, XML_PING_ATTR_SYSFROM, CRM_SYSTEM_CRMD);
// Add whether current partition has quorum
crm_xml_add_boolean(reply, XML_ATTR_HAVE_QUORUM, fsa_has_quorum);
// Check whether client requested node info by ID and/or name
crm_element_value_int(msg, XML_ATTR_ID, &node_id);
if (node_id < 0) {
node_id = 0;
}
value = crm_element_value(msg, XML_ATTR_UNAME);
// Default to local node if none given
if ((node_id == 0) && (value == NULL)) {
value = fsa_our_uname;
}
node = crm_find_peer_full(node_id, value, CRM_GET_PEER_ANY);
if (node) {
crm_xml_add_int(reply, XML_ATTR_ID, node->id);
crm_xml_add(reply, XML_ATTR_UUID, node->uuid);
crm_xml_add(reply, XML_ATTR_UNAME, node->uname);
crm_xml_add(reply, XML_NODE_IS_PEER, node->state);
crm_xml_add_boolean(reply, XML_NODE_IS_REMOTE,
node->flags & crm_remote_node);
}
// Send reply
msg = create_reply(msg, reply);
free_xml(reply);
if (msg) {
(void) relay_message(msg, TRUE);
free_xml(msg);
}
// Nothing further to do
return I_NULL;
}
static void
verify_feature_set(xmlNode *msg)
{
const char *dc_version = crm_element_value(msg, XML_ATTR_CRM_VERSION);
if (dc_version == NULL) {
/* All we really know is that the DC feature set is older than 3.1.0,
* but that's also all that really matters.
*/
dc_version = "3.0.14";
}
if (feature_set_compatible(dc_version, CRM_FEATURE_SET)) {
crm_trace("Local feature set (%s) is compatible with DC's (%s)",
CRM_FEATURE_SET, dc_version);
} else {
crm_err("Local feature set (%s) is incompatible with DC's (%s)",
CRM_FEATURE_SET, dc_version);
// Nothing is likely to improve without administrator involvement
set_bit(fsa_input_register, R_STAYDOWN);
crmd_exit(CRM_EX_FATAL);
}
}
// DC gets own shutdown all-clear
static enum crmd_fsa_input
handle_shutdown_self_ack(xmlNode *stored_msg)
{
const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
if (is_set(fsa_input_register, R_SHUTDOWN)) {
// The expected case -- we initiated own shutdown sequence
crm_info("Shutting down controller");
return I_STOP;
}
if (safe_str_eq(host_from, fsa_our_dc)) {
// Must be logic error -- DC confirming its own unrequested shutdown
crm_err("Shutting down controller immediately due to "
"unexpected shutdown confirmation");
return I_TERMINATE;
}
if (fsa_state != S_STOPPING) {
// Shouldn't happen -- non-DC confirming unrequested shutdown
crm_err("Starting new DC election because %s is "
"confirming shutdown we did not request",
(host_from? host_from : "another node"));
return I_ELECTION;
}
// Shouldn't happen, but we are already stopping anyway
crm_debug("Ignoring unexpected shutdown confirmation from %s",
(host_from? host_from : "another node"));
return I_NULL;
}
// Non-DC gets shutdown all-clear from DC
static enum crmd_fsa_input
handle_shutdown_ack(xmlNode *stored_msg)
{
const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
if (host_from == NULL) {
crm_warn("Ignoring shutdown request without origin specified");
return I_NULL;
}
if ((fsa_our_dc == NULL) || (strcmp(host_from, fsa_our_dc) == 0)) {
if (is_set(fsa_input_register, R_SHUTDOWN)) {
crm_info("Shutting down controller after confirmation from %s",
host_from);
} else {
crm_err("Shutting down controller after unexpected "
"shutdown request from %s", host_from);
set_bit(fsa_input_register, R_STAYDOWN);
}
return I_STOP;
}
crm_warn("Ignoring shutdown request from %s because DC is %s",
host_from, fsa_our_dc);
return I_NULL;
}
static enum crmd_fsa_input
handle_request(xmlNode *stored_msg, enum crmd_fsa_cause cause)
{
xmlNode *msg = NULL;
const char *op = crm_element_value(stored_msg, F_CRM_TASK);
/* Optimize this for the DC - it has the most to do */
if (op == NULL) {
crm_log_xml_warn(stored_msg, "[request without " F_CRM_TASK "]");
return I_NULL;
}
if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) {
const char *from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
crm_node_t *node = crm_find_peer(0, from);
crm_update_peer_expected(__FUNCTION__, node, CRMD_JOINSTATE_DOWN);
if(AM_I_DC == FALSE) {
return I_NULL; /* Done */
}
}
/*========== DC-Only Actions ==========*/
if (AM_I_DC) {
if (strcmp(op, CRM_OP_JOIN_ANNOUNCE) == 0) {
return I_NODE_JOIN;
} else if (strcmp(op, CRM_OP_JOIN_REQUEST) == 0) {
return I_JOIN_REQUEST;
} else if (strcmp(op, CRM_OP_JOIN_CONFIRM) == 0) {
return I_JOIN_RESULT;
} else if (strcmp(op, CRM_OP_SHUTDOWN) == 0) {
return handle_shutdown_self_ack(stored_msg);
} else if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) {
/* a slave wants to shut down */
/* create cib fragment and add to message */
return handle_shutdown_request(stored_msg);
} else if (strcmp(op, CRM_OP_REMOTE_STATE) == 0) {
/* a remote connection host is letting us know the node state */
return handle_remote_state(stored_msg);
}
}
/*========== common actions ==========*/
if (strcmp(op, CRM_OP_NOVOTE) == 0) {
ha_msg_input_t fsa_input;
fsa_input.msg = stored_msg;
register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input,
A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE, __FUNCTION__);
} else if (strcmp(op, CRM_OP_THROTTLE) == 0) {
throttle_update(stored_msg);
if (AM_I_DC && transition_graph != NULL) {
if (transition_graph->complete == FALSE) {
crm_debug("The throttle changed. Trigger a graph.");
trigger_graph();
}
}
return I_NULL;
} else if (strcmp(op, CRM_OP_CLEAR_FAILCOUNT) == 0) {
return handle_failcount_op(stored_msg);
} else if (strcmp(op, CRM_OP_VOTE) == 0) {
/* count the vote and decide what to do after that */
ha_msg_input_t fsa_input;
fsa_input.msg = stored_msg;
register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input,
A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE, __FUNCTION__);
/* Sometimes we _must_ go into S_ELECTION */
if (fsa_state == S_HALT) {
crm_debug("Forcing an election from S_HALT");
return I_ELECTION;
#if 0
} else if (AM_I_DC) {
/* This is the old way of doing things but what is gained? */
return I_ELECTION;
#endif
}
} else if (strcmp(op, CRM_OP_JOIN_OFFER) == 0) {
verify_feature_set(stored_msg);
crm_debug("Raising I_JOIN_OFFER: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID));
return I_JOIN_OFFER;
} else if (strcmp(op, CRM_OP_JOIN_ACKNAK) == 0) {
crm_debug("Raising I_JOIN_RESULT: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID));
return I_JOIN_RESULT;
} else if (strcmp(op, CRM_OP_LRM_DELETE) == 0) {
return handle_lrm_delete(stored_msg);
} else if ((strcmp(op, CRM_OP_LRM_FAIL) == 0)
|| (strcmp(op, CRM_OP_LRM_REFRESH) == 0)
|| (strcmp(op, CRM_OP_REPROBE) == 0)) {
crm_xml_add(stored_msg, F_CRM_SYS_TO, CRM_SYSTEM_LRMD);
return I_ROUTER;
} else if (strcmp(op, CRM_OP_NOOP) == 0) {
return I_NULL;
} else if (strcmp(op, CRM_OP_LOCAL_SHUTDOWN) == 0) {
crm_shutdown(SIGTERM);
/*return I_SHUTDOWN; */
return I_NULL;
} else if (strcmp(op, CRM_OP_PING) == 0) {
return handle_ping(stored_msg);
} else if (strcmp(op, CRM_OP_NODE_INFO) == 0) {
return handle_node_info_request(stored_msg);
} else if (strcmp(op, CRM_OP_RM_NODE_CACHE) == 0) {
int id = 0;
const char *name = NULL;
crm_element_value_int(stored_msg, XML_ATTR_ID, &id);
name = crm_element_value(stored_msg, XML_ATTR_UNAME);
if(cause == C_IPC_MESSAGE) {
msg = create_request(CRM_OP_RM_NODE_CACHE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) {
crm_err("Could not instruct peers to remove references to node %s/%u", name, id);
} else {
crm_notice("Instructing peers to remove references to node %s/%u", name, id);
}
free_xml(msg);
} else {
reap_crm_member(id, name);
/* If we're forgetting this node, also forget any failures to fence
* it, so we don't carry that over to any node added later with the
* same name.
*/
st_fail_count_reset(name);
}
} else if (strcmp(op, CRM_OP_MAINTENANCE_NODES) == 0) {
xmlNode *xml = get_message_xml(stored_msg, F_CRM_DATA);
remote_ra_process_maintenance_nodes(xml);
} else if (strcmp(op, PCMK__CONTROLD_CMD_NODES) == 0) {
return handle_node_list(stored_msg);
/*========== (NOT_DC)-Only Actions ==========*/
} else if (!AM_I_DC) {
if (strcmp(op, CRM_OP_SHUTDOWN) == 0) {
return handle_shutdown_ack(stored_msg);
}
} else {
crm_err("Unexpected request (%s) sent to %s", op, AM_I_DC ? "the DC" : "non-DC node");
crm_log_xml_err(stored_msg, "Unexpected");
}
return I_NULL;
}
static void
handle_response(xmlNode *stored_msg)
{
const char *op = crm_element_value(stored_msg, F_CRM_TASK);
if (op == NULL) {
crm_log_xml_err(stored_msg, "Bad message");
} else if (AM_I_DC && strcmp(op, CRM_OP_PECALC) == 0) {
// Check whether scheduler answer been superseded by subsequent request
const char *msg_ref = crm_element_value(stored_msg, XML_ATTR_REFERENCE);
if (msg_ref == NULL) {
crm_err("%s - Ignoring calculation with no reference", op);
} else if (safe_str_eq(msg_ref, fsa_pe_ref)) {
ha_msg_input_t fsa_input;
controld_stop_sched_timer();
fsa_input.msg = stored_msg;
register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input);
} else {
crm_info("%s calculation %s is obsolete", op, msg_ref);
}
} else if (strcmp(op, CRM_OP_VOTE) == 0
|| strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0 || strcmp(op, CRM_OP_SHUTDOWN) == 0) {
} else {
const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
crm_err("Unexpected response (op=%s, src=%s) sent to the %s",
op, host_from, AM_I_DC ? "DC" : "controller");
}
}
static enum crmd_fsa_input
handle_shutdown_request(xmlNode * stored_msg)
{
/* handle here to avoid potential version issues
* where the shutdown message/procedure may have
* been changed in later versions.
*
* This way the DC is always in control of the shutdown
*/
char *now_s = NULL;
time_t now = time(NULL);
const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
if (host_from == NULL) {
/* we're shutting down and the DC */
host_from = fsa_our_uname;
}
crm_info("Creating shutdown request for %s (state=%s)", host_from, fsa_state2string(fsa_state));
crm_log_xml_trace(stored_msg, "message");
now_s = crm_itoa(now);
update_attrd(host_from, XML_CIB_ATTR_SHUTDOWN, now_s, NULL, FALSE);
free(now_s);
/* will be picked up by the TE as long as its running */
return I_NULL;
}
/* msg is deleted by the time this returns */
extern gboolean process_te_message(xmlNode * msg, xmlNode * xml_data);
static void
send_msg_via_ipc(xmlNode * msg, const char *sys)
{
pcmk__client_t *client_channel = pcmk__find_client_by_id(sys);
if (crm_element_value(msg, F_CRM_HOST_FROM) == NULL) {
crm_xml_add(msg, F_CRM_HOST_FROM, fsa_our_uname);
}
if (client_channel != NULL) {
/* Transient clients such as crmadmin */
pcmk__ipc_send_xml(client_channel, 0, msg, crm_ipc_server_event);
} else if (sys != NULL && strcmp(sys, CRM_SYSTEM_TENGINE) == 0) {
xmlNode *data = get_message_xml(msg, F_CRM_DATA);
process_te_message(msg, data);
} else if (sys != NULL && strcmp(sys, CRM_SYSTEM_LRMD) == 0) {
fsa_data_t fsa_data;
ha_msg_input_t fsa_input;
fsa_input.msg = msg;
fsa_input.xml = get_message_xml(msg, F_CRM_DATA);
fsa_data.id = 0;
fsa_data.actions = 0;
fsa_data.data = &fsa_input;
fsa_data.fsa_input = I_MESSAGE;
fsa_data.fsa_cause = C_IPC_MESSAGE;
fsa_data.origin = __FUNCTION__;
fsa_data.data_type = fsa_dt_ha_msg;
do_lrm_invoke(A_LRM_INVOKE, C_IPC_MESSAGE, fsa_state, I_MESSAGE, &fsa_data);
} else if (sys != NULL && crmd_is_proxy_session(sys)) {
crmd_proxy_send(sys, msg);
} else {
crm_debug("Unknown Sub-system (%s)... discarding message.", crm_str(sys));
}
}
void
delete_ha_msg_input(ha_msg_input_t * orig)
{
if (orig == NULL) {
return;
}
free_xml(orig->msg);
free(orig);
}
/*!
* \internal
* \brief Notify the DC of a remote node state change
*
* \param[in] node_name Node's name
* \param[in] node_up TRUE if node is up, FALSE if down
*/
void
send_remote_state_message(const char *node_name, gboolean node_up)
{
/* If we don't have a DC, or the message fails, we have a failsafe:
* the DC will eventually pick up the change via the CIB node state.
* The message allows it to happen sooner if possible.
*/
if (fsa_our_dc) {
xmlNode *msg = create_request(CRM_OP_REMOTE_STATE, NULL, fsa_our_dc,
CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);
crm_info("Notifying DC %s of pacemaker_remote node %s %s",
fsa_our_dc, node_name, (node_up? "coming up" : "going down"));
crm_xml_add(msg, XML_ATTR_ID, node_name);
crm_xml_add_boolean(msg, XML_NODE_IN_CLUSTER, node_up);
send_cluster_message(crm_get_peer(0, fsa_our_dc), crm_msg_crmd, msg,
TRUE);
free_xml(msg);
} else {
crm_debug("No DC to notify of pacemaker_remote node %s %s",
node_name, (node_up? "coming up" : "going down"));
}
}
diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c
index 8dead202e9..f243624c70 100644
--- a/daemons/controld/controld_remote_ra.c
+++ b/daemons/controld/controld_remote_ra.c
@@ -1,1298 +1,1298 @@
/*
* Copyright 2013-2020 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/lrmd.h>
#include <crm/services.h>
#include <pacemaker-controld.h>
#define REMOTE_LRMD_RA "remote"
/* The max start timeout before cmd retry */
#define MAX_START_TIMEOUT_MS 10000
typedef struct remote_ra_cmd_s {
/*! the local node the cmd is issued from */
char *owner;
/*! the remote node the cmd is executed on */
char *rsc_id;
/*! the action to execute */
char *action;
/*! some string the client wants us to give it back */
char *userdata;
char *exit_reason; // descriptive text on error
/*! start delay in ms */
int start_delay;
/*! timer id used for start delay. */
int delay_id;
/*! timeout in ms for cmd */
int timeout;
int remaining_timeout;
/*! recurring interval in ms */
guint interval_ms;
/*! interval timer id */
int interval_id;
int reported_success;
int monitor_timeout_id;
int takeover_timeout_id;
/*! action parameters */
lrmd_key_value_t *params;
/*! executed rc */
int rc;
int op_status;
int call_id;
time_t start_time;
gboolean cancel;
} remote_ra_cmd_t;
enum remote_migration_status {
expect_takeover = 1,
takeover_complete,
};
typedef struct remote_ra_data_s {
crm_trigger_t *work;
remote_ra_cmd_t *cur_cmd;
GList *cmds;
GList *recurring_cmds;
enum remote_migration_status migrate_status;
gboolean active;
/* Maintenance mode is difficult to determine from the controller's context,
* so we have it signalled back with the transition from the scheduler.
*/
gboolean is_maintenance;
/* Similar for whether we are controlling a guest node or remote node.
* Fortunately there is a meta-attribute in the transition already and
* as the situation doesn't change over time we can use the
* resource start for noting down the information for later use when
* the attributes aren't at hand.
*/
gboolean controlling_guest;
} remote_ra_data_t;
static int handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms);
static void handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd);
static GList *fail_all_monitor_cmds(GList * list);
static void
free_cmd(gpointer user_data)
{
remote_ra_cmd_t *cmd = user_data;
if (!cmd) {
return;
}
if (cmd->delay_id) {
g_source_remove(cmd->delay_id);
}
if (cmd->interval_id) {
g_source_remove(cmd->interval_id);
}
if (cmd->monitor_timeout_id) {
g_source_remove(cmd->monitor_timeout_id);
}
if (cmd->takeover_timeout_id) {
g_source_remove(cmd->takeover_timeout_id);
}
free(cmd->owner);
free(cmd->rsc_id);
free(cmd->action);
free(cmd->userdata);
free(cmd->exit_reason);
lrmd_key_value_freeall(cmd->params);
free(cmd);
}
static int
generate_callid(void)
{
static int remote_ra_callid = 0;
remote_ra_callid++;
if (remote_ra_callid <= 0) {
remote_ra_callid = 1;
}
return remote_ra_callid;
}
static gboolean
recurring_helper(gpointer data)
{
remote_ra_cmd_t *cmd = data;
lrm_state_t *connection_rsc = NULL;
cmd->interval_id = 0;
connection_rsc = lrm_state_find(cmd->rsc_id);
if (connection_rsc && connection_rsc->remote_ra_data) {
remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
ra_data->recurring_cmds = g_list_remove(ra_data->recurring_cmds, cmd);
ra_data->cmds = g_list_append(ra_data->cmds, cmd);
mainloop_set_trigger(ra_data->work);
}
return FALSE;
}
static gboolean
start_delay_helper(gpointer data)
{
remote_ra_cmd_t *cmd = data;
lrm_state_t *connection_rsc = NULL;
cmd->delay_id = 0;
connection_rsc = lrm_state_find(cmd->rsc_id);
if (connection_rsc && connection_rsc->remote_ra_data) {
remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
mainloop_set_trigger(ra_data->work);
}
return FALSE;
}
/*!
* \internal
* \brief Handle cluster communication related to pacemaker_remote node joining
*
* \param[in] node_name Name of newly integrated pacemaker_remote node
*/
static void
remote_node_up(const char *node_name)
{
int call_opt, call_id = 0;
xmlNode *update, *state;
crm_node_t *node;
enum controld_section_e section = controld_section_all;
CRM_CHECK(node_name != NULL, return);
crm_info("Announcing pacemaker_remote node %s", node_name);
/* Clear node's entire state (resource history and transient attributes)
* other than shutdown locks. The transient attributes should and normally
* will be cleared when the node leaves, but since remote node state has a
* number of corner cases, clear them here as well, to be sure.
*/
call_opt = crmd_cib_smart_opt();
if (controld_shutdown_lock_enabled) {
section = controld_section_all_unlocked;
}
controld_delete_node_state(node_name, section, call_opt);
/* Clear node's probed attribute */
update_attrd(node_name, CRM_OP_PROBED, NULL, NULL, TRUE);
/* Ensure node is in the remote peer cache with member status */
node = crm_remote_peer_get(node_name);
CRM_CHECK(node != NULL, return);
crm_update_peer_state(__FUNCTION__, node, CRM_NODE_MEMBER, 0);
/* pacemaker_remote nodes don't participate in the membership layer,
* so cluster nodes don't automatically get notified when they come and go.
* We send a cluster message to the DC, and update the CIB node state entry,
* so the DC will get it sooner (via message) or later (via CIB refresh),
* and any other interested parties can query the CIB.
*/
send_remote_state_message(node_name, TRUE);
update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
state = create_node_state_update(node, node_update_cluster, update,
__FUNCTION__);
/* Clear the XML_NODE_IS_FENCED flag in the node state. If the node ever
* needs to be fenced, this flag will allow various actions to determine
* whether the fencing has happened yet.
*/
crm_xml_add(state, XML_NODE_IS_FENCED, "0");
/* TODO: If the remote connection drops, and this (async) CIB update either
* failed or has not yet completed, later actions could mistakenly think the
* node has already been fenced (if the XML_NODE_IS_FENCED attribute was
* previously set, because it won't have been cleared). This could prevent
* actual fencing or allow recurring monitor failures to be cleared too
* soon. Ideally, we wouldn't rely on the CIB for the fenced status.
*/
fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
if (call_id < 0) {
crm_perror(LOG_WARNING, "%s CIB node state setup", node_name);
}
free_xml(update);
}
enum down_opts {
DOWN_KEEP_LRM,
DOWN_ERASE_LRM
};
/*!
* \internal
* \brief Handle cluster communication related to pacemaker_remote node leaving
*
* \param[in] node_name Name of lost node
* \param[in] opts Whether to keep or erase LRM history
*/
static void
remote_node_down(const char *node_name, const enum down_opts opts)
{
xmlNode *update;
int call_id = 0;
int call_opt = crmd_cib_smart_opt();
crm_node_t *node;
/* Purge node from attrd's memory */
update_attrd_remote_node_removed(node_name, NULL);
/* Normally, only node attributes should be erased, and the resource history
* should be kept until the node comes back up. However, after a successful
* fence, we want to clear the history as well, so we don't think resources
* are still running on the node.
*/
if (opts == DOWN_ERASE_LRM) {
controld_delete_node_state(node_name, controld_section_all, call_opt);
} else {
controld_delete_node_state(node_name, controld_section_attrs, call_opt);
}
/* Ensure node is in the remote peer cache with lost state */
node = crm_remote_peer_get(node_name);
CRM_CHECK(node != NULL, return);
crm_update_peer_state(__FUNCTION__, node, CRM_NODE_LOST, 0);
/* Notify DC */
send_remote_state_message(node_name, FALSE);
/* Update CIB node state */
update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
create_node_state_update(node, node_update_cluster, update, __FUNCTION__);
fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
if (call_id < 0) {
crm_perror(LOG_ERR, "%s CIB node state update", node_name);
}
free_xml(update);
}
/*!
* \internal
* \brief Handle effects of a remote RA command on node state
*
* \param[in] cmd Completed remote RA command
*/
static void
check_remote_node_state(remote_ra_cmd_t *cmd)
{
/* Only successful actions can change node state */
if (cmd->rc != PCMK_OCF_OK) {
return;
}
if (safe_str_eq(cmd->action, "start")) {
remote_node_up(cmd->rsc_id);
} else if (safe_str_eq(cmd->action, "migrate_from")) {
/* After a successful migration, we don't need to do remote_node_up()
* because the DC already knows the node is up, and we don't want to
* clear LRM history etc. We do need to add the remote node to this
* host's remote peer cache, because (unless it happens to be DC)
* it hasn't been tracking the remote node, and other code relies on
* the cache to distinguish remote nodes from unseen cluster nodes.
*/
crm_node_t *node = crm_remote_peer_get(cmd->rsc_id);
CRM_CHECK(node != NULL, return);
crm_update_peer_state(__FUNCTION__, node, CRM_NODE_MEMBER, 0);
} else if (safe_str_eq(cmd->action, "stop")) {
lrm_state_t *lrm_state = lrm_state_find(cmd->rsc_id);
remote_ra_data_t *ra_data = lrm_state? lrm_state->remote_ra_data : NULL;
if (ra_data) {
if (ra_data->migrate_status != takeover_complete) {
/* Stop means down if we didn't successfully migrate elsewhere */
remote_node_down(cmd->rsc_id, DOWN_KEEP_LRM);
} else if (AM_I_DC == FALSE) {
/* Only the connection host and DC track node state,
* so if the connection migrated elsewhere and we aren't DC,
* un-cache the node, so we don't have stale info
*/
crm_remote_peer_cache_remove(cmd->rsc_id);
}
}
}
/* We don't do anything for successful monitors, which is correct for
* routine recurring monitors, and for monitors on nodes where the
* connection isn't supposed to be (the cluster will stop the connection in
* that case). However, if the initial probe finds the connection already
* active on the node where we want it, we probably should do
* remote_node_up(). Unfortunately, we can't distinguish that case here.
* Given that connections have to be initiated by the cluster, the chance of
* that should be close to zero.
*/
}
static void
report_remote_ra_result(remote_ra_cmd_t * cmd)
{
lrmd_event_data_t op = { 0, };
check_remote_node_state(cmd);
op.type = lrmd_event_exec_complete;
op.rsc_id = cmd->rsc_id;
op.op_type = cmd->action;
op.user_data = cmd->userdata;
op.exit_reason = cmd->exit_reason;
op.timeout = cmd->timeout;
op.interval_ms = cmd->interval_ms;
op.rc = cmd->rc;
op.op_status = cmd->op_status;
op.t_run = (unsigned int) cmd->start_time;
op.t_rcchange = (unsigned int) cmd->start_time;
if (cmd->reported_success && cmd->rc != PCMK_OCF_OK) {
op.t_rcchange = (unsigned int) time(NULL);
/* This edge case will likely never ever occur, but if it does the
* result is that a failure will not be processed correctly. This is only
* remotely possible because we are able to detect a connection resource's tcp
* connection has failed at any moment after start has completed. The actual
* recurring operation is just a connectivity ping.
*
* basically, we are not guaranteed that the first successful monitor op and
* a subsequent failed monitor op will not occur in the same timestamp. We have to
* make it look like the operations occurred at separate times though. */
if (op.t_rcchange == op.t_run) {
op.t_rcchange++;
}
}
if (cmd->params) {
lrmd_key_value_t *tmp;
op.params = crm_str_table_new();
for (tmp = cmd->params; tmp; tmp = tmp->next) {
g_hash_table_insert(op.params, strdup(tmp->key), strdup(tmp->value));
}
}
op.call_id = cmd->call_id;
op.remote_nodename = cmd->owner;
lrm_op_callback(&op);
if (op.params) {
g_hash_table_destroy(op.params);
}
}
static void
update_remaining_timeout(remote_ra_cmd_t * cmd)
{
cmd->remaining_timeout = ((cmd->timeout / 1000) - (time(NULL) - cmd->start_time)) * 1000;
}
static gboolean
retry_start_cmd_cb(gpointer data)
{
lrm_state_t *lrm_state = data;
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
remote_ra_cmd_t *cmd = NULL;
int rc = -1;
if (!ra_data || !ra_data->cur_cmd) {
return FALSE;
}
cmd = ra_data->cur_cmd;
- if (pcmk__str_none_of(cmd->action, "start", "migrate_from", NULL)) {
+ if (!pcmk__str_any_of(cmd->action, "start", "migrate_from", NULL)) {
return FALSE;
}
update_remaining_timeout(cmd);
if (cmd->remaining_timeout > 0) {
rc = handle_remote_ra_start(lrm_state, cmd, cmd->remaining_timeout);
}
if (rc != 0) {
cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
cmd->op_status = PCMK_LRM_OP_ERROR;
report_remote_ra_result(cmd);
if (ra_data->cmds) {
mainloop_set_trigger(ra_data->work);
}
ra_data->cur_cmd = NULL;
free_cmd(cmd);
} else {
/* wait for connection event */
}
return FALSE;
}
static gboolean
connection_takeover_timeout_cb(gpointer data)
{
lrm_state_t *lrm_state = NULL;
remote_ra_cmd_t *cmd = data;
crm_info("takeover event timed out for node %s", cmd->rsc_id);
cmd->takeover_timeout_id = 0;
lrm_state = lrm_state_find(cmd->rsc_id);
handle_remote_ra_stop(lrm_state, cmd);
free_cmd(cmd);
return FALSE;
}
static gboolean
monitor_timeout_cb(gpointer data)
{
lrm_state_t *lrm_state = NULL;
remote_ra_cmd_t *cmd = data;
lrm_state = lrm_state_find(cmd->rsc_id);
crm_info("Timed out waiting for remote poke response from %s%s",
cmd->rsc_id, (lrm_state? "" : " (no LRM state)"));
cmd->monitor_timeout_id = 0;
cmd->op_status = PCMK_LRM_OP_TIMEOUT;
cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
if (lrm_state && lrm_state->remote_ra_data) {
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
if (ra_data->cur_cmd == cmd) {
ra_data->cur_cmd = NULL;
}
if (ra_data->cmds) {
mainloop_set_trigger(ra_data->work);
}
}
report_remote_ra_result(cmd);
free_cmd(cmd);
if(lrm_state) {
lrm_state_disconnect(lrm_state);
}
return FALSE;
}
static void
synthesize_lrmd_success(lrm_state_t *lrm_state, const char *rsc_id, const char *op_type)
{
lrmd_event_data_t op = { 0, };
if (lrm_state == NULL) {
/* if lrm_state not given assume local */
lrm_state = lrm_state_find(fsa_our_uname);
}
CRM_ASSERT(lrm_state != NULL);
op.type = lrmd_event_exec_complete;
op.rsc_id = rsc_id;
op.op_type = op_type;
op.rc = PCMK_OCF_OK;
op.op_status = PCMK_LRM_OP_DONE;
op.t_run = (unsigned int) time(NULL);
op.t_rcchange = op.t_run;
op.call_id = generate_callid();
process_lrm_event(lrm_state, &op, NULL, NULL);
}
void
remote_lrm_op_callback(lrmd_event_data_t * op)
{
gboolean cmd_handled = FALSE;
lrm_state_t *lrm_state = NULL;
remote_ra_data_t *ra_data = NULL;
remote_ra_cmd_t *cmd = NULL;
crm_debug("Processing '%s%s%s' event on remote connection to %s: %s "
"(%d) status=%s (%d)",
(op->op_type? op->op_type : ""), (op->op_type? " " : ""),
lrmd_event_type2str(op->type), op->remote_nodename,
services_ocf_exitcode_str(op->rc), op->rc,
services_lrm_status_str(op->op_status), op->op_status);
lrm_state = lrm_state_find(op->remote_nodename);
if (!lrm_state || !lrm_state->remote_ra_data) {
crm_debug("No state information found for remote connection event");
return;
}
ra_data = lrm_state->remote_ra_data;
if (op->type == lrmd_event_new_client) {
// Another client has connected to the remote daemon
if (ra_data->migrate_status == expect_takeover) {
// Great, we knew this was coming
ra_data->migrate_status = takeover_complete;
} else {
crm_err("Unexpected pacemaker_remote client takeover for %s. Disconnecting", op->remote_nodename);
/* In this case, lrmd_tls_connection_destroy() will be called under the control of mainloop. */
/* Do not free lrm_state->conn yet. */
/* It'll be freed in the following stop action. */
lrm_state_disconnect_only(lrm_state);
}
return;
}
/* filter all EXEC events up */
if (op->type == lrmd_event_exec_complete) {
if (ra_data->migrate_status == takeover_complete) {
crm_debug("ignoring event, this connection is taken over by another node");
} else {
lrm_op_callback(op);
}
return;
}
if ((op->type == lrmd_event_disconnect) && (ra_data->cur_cmd == NULL)) {
if (ra_data->active == FALSE) {
crm_debug("Disconnection from Pacemaker Remote node %s complete",
lrm_state->node_name);
} else if (!remote_ra_is_in_maintenance(lrm_state)) {
crm_err("Lost connection to Pacemaker Remote node %s",
lrm_state->node_name);
ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
} else {
crm_notice("Unmanaged Pacemaker Remote node %s disconnected",
lrm_state->node_name);
/* Do roughly what a 'stop' on the remote-resource would do */
handle_remote_ra_stop(lrm_state, NULL);
remote_node_down(lrm_state->node_name, DOWN_KEEP_LRM);
/* now fake the reply of a successful 'stop' */
synthesize_lrmd_success(NULL, lrm_state->node_name, "stop");
}
return;
}
if (!ra_data->cur_cmd) {
crm_debug("no event to match");
return;
}
cmd = ra_data->cur_cmd;
/* Start actions and migrate from actions complete after connection
* comes back to us. */
if (op->type == lrmd_event_connect && pcmk__str_any_of(cmd->action, "start",
"migrate_from", NULL)) {
if (op->connection_rc < 0) {
update_remaining_timeout(cmd);
if (op->connection_rc == -ENOKEY) {
// Hard error, don't retry
cmd->op_status = PCMK_LRM_OP_ERROR;
cmd->rc = PCMK_OCF_INVALID_PARAM;
cmd->exit_reason = strdup("Authentication key not readable");
} else if (cmd->remaining_timeout > 3000) {
crm_trace("rescheduling start, remaining timeout %d", cmd->remaining_timeout);
g_timeout_add(1000, retry_start_cmd_cb, lrm_state);
return;
} else {
crm_trace("can't reschedule start, remaining timeout too small %d",
cmd->remaining_timeout);
cmd->op_status = PCMK_LRM_OP_TIMEOUT;
cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
}
} else {
lrm_state_reset_tables(lrm_state, TRUE);
cmd->rc = PCMK_OCF_OK;
cmd->op_status = PCMK_LRM_OP_DONE;
ra_data->active = TRUE;
}
crm_debug("Remote connection event matched %s action", cmd->action);
report_remote_ra_result(cmd);
cmd_handled = TRUE;
} else if (op->type == lrmd_event_poke && safe_str_eq(cmd->action, "monitor")) {
if (cmd->monitor_timeout_id) {
g_source_remove(cmd->monitor_timeout_id);
cmd->monitor_timeout_id = 0;
}
/* Only report success the first time, after that only worry about failures.
* For this function, if we get the poke pack, it is always a success. Pokes
* only fail if the send fails, or the response times out. */
if (!cmd->reported_success) {
cmd->rc = PCMK_OCF_OK;
cmd->op_status = PCMK_LRM_OP_DONE;
report_remote_ra_result(cmd);
cmd->reported_success = 1;
}
crm_debug("Remote poke event matched %s action", cmd->action);
/* success, keep rescheduling if interval is present. */
if (cmd->interval_ms && (cmd->cancel == FALSE)) {
ra_data->recurring_cmds = g_list_append(ra_data->recurring_cmds, cmd);
cmd->interval_id = g_timeout_add(cmd->interval_ms,
recurring_helper, cmd);
cmd = NULL; /* prevent free */
}
cmd_handled = TRUE;
} else if (op->type == lrmd_event_disconnect && safe_str_eq(cmd->action, "monitor")) {
if (ra_data->active == TRUE && (cmd->cancel == FALSE)) {
cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
cmd->op_status = PCMK_LRM_OP_ERROR;
report_remote_ra_result(cmd);
crm_err("Remote connection to %s unexpectedly dropped during monitor",
lrm_state->node_name);
}
cmd_handled = TRUE;
} else if (op->type == lrmd_event_new_client && safe_str_eq(cmd->action, "stop")) {
handle_remote_ra_stop(lrm_state, cmd);
cmd_handled = TRUE;
} else {
crm_debug("Event did not match %s action", ra_data->cur_cmd->action);
}
if (cmd_handled) {
ra_data->cur_cmd = NULL;
if (ra_data->cmds) {
mainloop_set_trigger(ra_data->work);
}
free_cmd(cmd);
}
}
static void
handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd)
{
remote_ra_data_t *ra_data = NULL;
CRM_ASSERT(lrm_state);
ra_data = lrm_state->remote_ra_data;
if (ra_data->migrate_status != takeover_complete) {
/* delete pending ops when ever the remote connection is intentionally stopped */
g_hash_table_remove_all(lrm_state->pending_ops);
} else {
/* we no longer hold the history if this connection has been migrated,
* however, we keep metadata cache for future use */
lrm_state_reset_tables(lrm_state, FALSE);
}
ra_data->active = FALSE;
lrm_state_disconnect(lrm_state);
if (ra_data->cmds) {
g_list_free_full(ra_data->cmds, free_cmd);
}
if (ra_data->recurring_cmds) {
g_list_free_full(ra_data->recurring_cmds, free_cmd);
}
ra_data->cmds = NULL;
ra_data->recurring_cmds = NULL;
ra_data->cur_cmd = NULL;
if (cmd) {
cmd->rc = PCMK_OCF_OK;
cmd->op_status = PCMK_LRM_OP_DONE;
report_remote_ra_result(cmd);
}
}
static int
handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms)
{
const char *server = NULL;
lrmd_key_value_t *tmp = NULL;
int port = 0;
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
int timeout_used = timeout_ms > MAX_START_TIMEOUT_MS ? MAX_START_TIMEOUT_MS : timeout_ms;
for (tmp = cmd->params; tmp; tmp = tmp->next) {
if (pcmk__str_any_of(tmp->key, XML_RSC_ATTR_REMOTE_RA_ADDR,
XML_RSC_ATTR_REMOTE_RA_SERVER, NULL)) {
server = tmp->value;
} else if (safe_str_eq(tmp->key, XML_RSC_ATTR_REMOTE_RA_PORT)) {
port = atoi(tmp->value);
} else if (safe_str_eq(tmp->key, CRM_META"_"XML_RSC_ATTR_CONTAINER)) {
ra_data->controlling_guest = TRUE;
}
}
return lrm_state_remote_connect_async(lrm_state, server, port, timeout_used);
}
static gboolean
handle_remote_ra_exec(gpointer user_data)
{
int rc = 0;
lrm_state_t *lrm_state = user_data;
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
remote_ra_cmd_t *cmd;
GList *first = NULL;
if (ra_data->cur_cmd) {
/* still waiting on previous cmd */
return TRUE;
}
while (ra_data->cmds) {
first = ra_data->cmds;
cmd = first->data;
if (cmd->delay_id) {
/* still waiting for start delay timer to trip */
return TRUE;
}
ra_data->cmds = g_list_remove_link(ra_data->cmds, first);
g_list_free_1(first);
if (!strcmp(cmd->action, "start") || !strcmp(cmd->action, "migrate_from")) {
ra_data->migrate_status = 0;
rc = handle_remote_ra_start(lrm_state, cmd, cmd->timeout);
if (rc == 0) {
/* take care of this later when we get async connection result */
crm_debug("Initiated async remote connection, %s action will complete after connect event",
cmd->action);
ra_data->cur_cmd = cmd;
return TRUE;
} else {
crm_debug("Could not initiate remote connection for %s action",
cmd->action);
cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
cmd->op_status = PCMK_LRM_OP_ERROR;
}
report_remote_ra_result(cmd);
} else if (!strcmp(cmd->action, "monitor")) {
if (lrm_state_is_connected(lrm_state) == TRUE) {
rc = lrm_state_poke_connection(lrm_state);
if (rc < 0) {
cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
cmd->op_status = PCMK_LRM_OP_ERROR;
}
} else {
rc = -1;
cmd->op_status = PCMK_LRM_OP_DONE;
cmd->rc = PCMK_OCF_NOT_RUNNING;
}
if (rc == 0) {
crm_debug("Poked Pacemaker Remote at node %s, waiting for async response",
cmd->rsc_id);
ra_data->cur_cmd = cmd;
cmd->monitor_timeout_id = g_timeout_add(cmd->timeout, monitor_timeout_cb, cmd);
return TRUE;
}
report_remote_ra_result(cmd);
} else if (!strcmp(cmd->action, "stop")) {
if (ra_data->migrate_status == expect_takeover) {
/* briefly wait on stop for the takeover event to occur. If the
* takeover event does not occur during the wait period, that's fine.
* It just means that the remote-node's lrm_status section is going to get
* cleared which will require all the resources running in the remote-node
* to be explicitly re-detected via probe actions. If the takeover does occur
* successfully, then we can leave the status section intact. */
cmd->takeover_timeout_id = g_timeout_add((cmd->timeout/2), connection_takeover_timeout_cb, cmd);
ra_data->cur_cmd = cmd;
return TRUE;
}
handle_remote_ra_stop(lrm_state, cmd);
} else if (!strcmp(cmd->action, "migrate_to")) {
ra_data->migrate_status = expect_takeover;
cmd->rc = PCMK_OCF_OK;
cmd->op_status = PCMK_LRM_OP_DONE;
report_remote_ra_result(cmd);
} else if (!strcmp(cmd->action, "reload")) {
/* reloads are a no-op right now, add logic here when they become important */
cmd->rc = PCMK_OCF_OK;
cmd->op_status = PCMK_LRM_OP_DONE;
report_remote_ra_result(cmd);
}
free_cmd(cmd);
}
return TRUE;
}
static void
remote_ra_data_init(lrm_state_t * lrm_state)
{
remote_ra_data_t *ra_data = NULL;
if (lrm_state->remote_ra_data) {
return;
}
ra_data = calloc(1, sizeof(remote_ra_data_t));
ra_data->work = mainloop_add_trigger(G_PRIORITY_HIGH, handle_remote_ra_exec, lrm_state);
lrm_state->remote_ra_data = ra_data;
}
void
remote_ra_cleanup(lrm_state_t * lrm_state)
{
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
if (!ra_data) {
return;
}
if (ra_data->cmds) {
g_list_free_full(ra_data->cmds, free_cmd);
}
if (ra_data->recurring_cmds) {
g_list_free_full(ra_data->recurring_cmds, free_cmd);
}
mainloop_destroy_trigger(ra_data->work);
free(ra_data);
lrm_state->remote_ra_data = NULL;
}
gboolean
is_remote_lrmd_ra(const char *agent, const char *provider, const char *id)
{
if (agent && provider && !strcmp(agent, REMOTE_LRMD_RA) && !strcmp(provider, "pacemaker")) {
return TRUE;
}
if (id && lrm_state_find(id) && safe_str_neq(id, fsa_our_uname)) {
return TRUE;
}
return FALSE;
}
lrmd_rsc_info_t *
remote_ra_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id)
{
lrmd_rsc_info_t *info = NULL;
if ((lrm_state_find(rsc_id))) {
info = calloc(1, sizeof(lrmd_rsc_info_t));
info->id = strdup(rsc_id);
info->type = strdup(REMOTE_LRMD_RA);
info->standard = strdup(PCMK_RESOURCE_CLASS_OCF);
info->provider = strdup("pacemaker");
}
return info;
}
static gboolean
is_remote_ra_supported_action(const char *action)
{
if (!action) {
return FALSE;
} else if (strcmp(action, "start") &&
strcmp(action, "stop") &&
strcmp(action, "reload") &&
strcmp(action, "migrate_to") &&
strcmp(action, "migrate_from") && strcmp(action, "monitor")) {
return FALSE;
}
return TRUE;
}
static GList *
fail_all_monitor_cmds(GList * list)
{
GList *rm_list = NULL;
remote_ra_cmd_t *cmd = NULL;
GListPtr gIter = NULL;
for (gIter = list; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
if ((cmd->interval_ms > 0) && safe_str_eq(cmd->action, "monitor")) {
rm_list = g_list_append(rm_list, cmd);
}
}
for (gIter = rm_list; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
cmd->op_status = PCMK_LRM_OP_ERROR;
crm_trace("Pre-emptively failing %s %s (interval=%u, %s)",
cmd->action, cmd->rsc_id, cmd->interval_ms, cmd->userdata);
report_remote_ra_result(cmd);
list = g_list_remove(list, cmd);
free_cmd(cmd);
}
/* frees only the list data, not the cmds */
g_list_free(rm_list);
return list;
}
static GList *
remove_cmd(GList * list, const char *action, guint interval_ms)
{
remote_ra_cmd_t *cmd = NULL;
GListPtr gIter = NULL;
for (gIter = list; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
if ((cmd->interval_ms == interval_ms)
&& safe_str_eq(cmd->action, action)) {
break;
}
cmd = NULL;
}
if (cmd) {
list = g_list_remove(list, cmd);
free_cmd(cmd);
}
return list;
}
int
remote_ra_cancel(lrm_state_t *lrm_state, const char *rsc_id,
const char *action, guint interval_ms)
{
lrm_state_t *connection_rsc = NULL;
remote_ra_data_t *ra_data = NULL;
connection_rsc = lrm_state_find(rsc_id);
if (!connection_rsc || !connection_rsc->remote_ra_data) {
return -EINVAL;
}
ra_data = connection_rsc->remote_ra_data;
ra_data->cmds = remove_cmd(ra_data->cmds, action, interval_ms);
ra_data->recurring_cmds = remove_cmd(ra_data->recurring_cmds, action,
interval_ms);
if (ra_data->cur_cmd &&
(ra_data->cur_cmd->interval_ms == interval_ms) &&
(safe_str_eq(ra_data->cur_cmd->action, action))) {
ra_data->cur_cmd->cancel = TRUE;
}
return 0;
}
static remote_ra_cmd_t *
handle_dup_monitor(remote_ra_data_t *ra_data, guint interval_ms,
const char *userdata)
{
GList *gIter = NULL;
remote_ra_cmd_t *cmd = NULL;
/* there are 3 places a potential duplicate monitor operation
* could exist.
* 1. recurring_cmds list. where the op is waiting for its next interval
* 2. cmds list, where the op is queued to get executed immediately
* 3. cur_cmd, which means the monitor op is in flight right now.
*/
if (interval_ms == 0) {
return NULL;
}
if (ra_data->cur_cmd &&
ra_data->cur_cmd->cancel == FALSE &&
(ra_data->cur_cmd->interval_ms == interval_ms) &&
safe_str_eq(ra_data->cur_cmd->action, "monitor")) {
cmd = ra_data->cur_cmd;
goto handle_dup;
}
for (gIter = ra_data->recurring_cmds; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
if ((cmd->interval_ms == interval_ms)
&& safe_str_eq(cmd->action, "monitor")) {
goto handle_dup;
}
}
for (gIter = ra_data->cmds; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
if ((cmd->interval_ms == interval_ms)
&& safe_str_eq(cmd->action, "monitor")) {
goto handle_dup;
}
}
return NULL;
handle_dup:
crm_trace("merging duplicate monitor cmd " PCMK__OP_FMT,
cmd->rsc_id, "monitor", interval_ms);
/* update the userdata */
if (userdata) {
free(cmd->userdata);
cmd->userdata = strdup(userdata);
}
/* if we've already reported success, generate a new call id */
if (cmd->reported_success) {
cmd->start_time = time(NULL);
cmd->call_id = generate_callid();
cmd->reported_success = 0;
}
/* if we have an interval_id set, that means we are in the process of
* waiting for this cmd's next interval. instead of waiting, cancel
* the timer and execute the action immediately */
if (cmd->interval_id) {
g_source_remove(cmd->interval_id);
cmd->interval_id = 0;
recurring_helper(cmd);
}
return cmd;
}
int
remote_ra_exec(lrm_state_t *lrm_state, const char *rsc_id, const char *action,
const char *userdata, guint interval_ms,
int timeout, /* ms */
int start_delay, /* ms */
lrmd_key_value_t * params)
{
int rc = 0;
lrm_state_t *connection_rsc = NULL;
remote_ra_cmd_t *cmd = NULL;
remote_ra_data_t *ra_data = NULL;
if (is_remote_ra_supported_action(action) == FALSE) {
rc = -EINVAL;
goto exec_done;
}
connection_rsc = lrm_state_find(rsc_id);
if (!connection_rsc) {
rc = -EINVAL;
goto exec_done;
}
remote_ra_data_init(connection_rsc);
ra_data = connection_rsc->remote_ra_data;
cmd = handle_dup_monitor(ra_data, interval_ms, userdata);
if (cmd) {
rc = cmd->call_id;
goto exec_done;
}
cmd = calloc(1, sizeof(remote_ra_cmd_t));
cmd->owner = strdup(lrm_state->node_name);
cmd->rsc_id = strdup(rsc_id);
cmd->action = strdup(action);
cmd->userdata = strdup(userdata);
cmd->interval_ms = interval_ms;
cmd->timeout = timeout;
cmd->start_delay = start_delay;
cmd->params = params;
cmd->start_time = time(NULL);
cmd->call_id = generate_callid();
if (cmd->start_delay) {
cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
}
ra_data->cmds = g_list_append(ra_data->cmds, cmd);
mainloop_set_trigger(ra_data->work);
return cmd->call_id;
exec_done:
lrmd_key_value_freeall(params);
return rc;
}
/*!
* \internal
* \brief Immediately fail all monitors of a remote node, if proxied here
*
* \param[in] node_name Name of pacemaker_remote node
*/
void
remote_ra_fail(const char *node_name)
{
lrm_state_t *lrm_state = lrm_state_find(node_name);
if (lrm_state && lrm_state_is_connected(lrm_state)) {
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
crm_info("Failing monitors on pacemaker_remote node %s", node_name);
ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
}
}
/* A guest node fencing implied by host fencing looks like:
*
* <pseudo_event id="103" operation="stonith" operation_key="stonith-lxc1-off"
* on_node="lxc1" on_node_uuid="lxc1">
* <attributes CRM_meta_master_lxc_ms="10" CRM_meta_on_node="lxc1"
* CRM_meta_on_node_uuid="lxc1" CRM_meta_stonith_action="off"
* crm_feature_set="3.0.12"/>
* <downed>
* <node id="lxc1"/>
* </downed>
* </pseudo_event>
*/
#define XPATH_PSEUDO_FENCE "//" XML_GRAPH_TAG_PSEUDO_EVENT \
"[@" XML_LRM_ATTR_TASK "='stonith']/" XML_GRAPH_TAG_DOWNED \
"/" XML_CIB_TAG_NODE
/*!
* \internal
* \brief Check a pseudo-action for Pacemaker Remote node side effects
*
* \param[in] xml XML of pseudo-action to check
*/
void
remote_ra_process_pseudo(xmlNode *xml)
{
xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_FENCE);
if (numXpathResults(search) == 1) {
xmlNode *result = getXpathResult(search, 0);
/* Normally, we handle the necessary side effects of a guest node stop
* action when reporting the remote agent's result. However, if the stop
* is implied due to fencing, it will be a fencing pseudo-event, and
* there won't be a result to report. Handle that case here.
*
* This will result in a duplicate call to remote_node_down() if the
* guest stop was real instead of implied, but that shouldn't hurt.
*
* There is still one corner case that isn't handled: if a guest node
* isn't running any resources when its host is fenced, it will appear
* to be cleanly stopped, so there will be no pseudo-fence, and our
* peer cache state will be incorrect unless and until the guest is
* recovered.
*/
if (result) {
const char *remote = ID(result);
if (remote) {
remote_node_down(remote, DOWN_ERASE_LRM);
}
}
}
freeXpathObject(search);
}
static void
remote_ra_maintenance(lrm_state_t * lrm_state, gboolean maintenance)
{
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
xmlNode *update, *state;
int call_opt, call_id = 0;
crm_node_t *node;
call_opt = crmd_cib_smart_opt();
node = crm_remote_peer_get(lrm_state->node_name);
CRM_CHECK(node != NULL, return);
update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
state = create_node_state_update(node, node_update_none, update,
__FUNCTION__);
crm_xml_add(state, XML_NODE_IS_MAINTENANCE, maintenance?"1":"0");
fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
if (call_id < 0) {
crm_perror(LOG_WARNING, "%s CIB node state update failed", lrm_state->node_name);
} else {
/* TODO: still not 100% sure that async update will succeed ... */
ra_data->is_maintenance = maintenance;
}
free_xml(update);
}
#define XPATH_PSEUDO_MAINTENANCE "//" XML_GRAPH_TAG_PSEUDO_EVENT \
"[@" XML_LRM_ATTR_TASK "='" CRM_OP_MAINTENANCE_NODES "']/" \
XML_GRAPH_TAG_MAINTENANCE
/*!
* \internal
* \brief Check a pseudo-action holding updates for maintenance state
*
* \param[in] xml XML of pseudo-action to check
*/
void
remote_ra_process_maintenance_nodes(xmlNode *xml)
{
xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_MAINTENANCE);
if (numXpathResults(search) == 1) {
xmlNode *node;
int cnt = 0, cnt_remote = 0;
for (node =
first_named_child(getXpathResult(search, 0), XML_CIB_TAG_NODE);
node; node = __xml_next(node)) {
lrm_state_t *lrm_state = lrm_state_find(ID(node));
cnt++;
if (lrm_state && lrm_state->remote_ra_data &&
((remote_ra_data_t *) lrm_state->remote_ra_data)->active) {
cnt_remote++;
remote_ra_maintenance(lrm_state,
crm_atoi(crm_element_value(node,
XML_NODE_IS_MAINTENANCE), "0"));
}
}
crm_trace("Action holds %d nodes (%d remotes found) "
"adjusting maintenance-mode", cnt, cnt_remote);
}
freeXpathObject(search);
}
gboolean
remote_ra_is_in_maintenance(lrm_state_t * lrm_state)
{
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
return ra_data->is_maintenance;
}
gboolean
remote_ra_controlling_guest(lrm_state_t * lrm_state)
{
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
return ra_data->controlling_guest;
}
diff --git a/daemons/execd/pacemaker-execd.c b/daemons/execd/pacemaker-execd.c
index d2fde151c8..29a6bda235 100644
--- a/daemons/execd/pacemaker-execd.c
+++ b/daemons/execd/pacemaker-execd.c
@@ -1,517 +1,517 @@
/*
* Copyright 2012-2020 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <glib.h>
#include <signal.h>
#include <sys/types.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/services.h>
#include <crm/common/mainloop.h>
#include <crm/common/ipc.h>
#include <crm/common/ipc_internal.h>
#include <crm/common/remote_internal.h>
#include <crm/lrmd_internal.h>
#include "pacemaker-execd.h"
#if defined(HAVE_GNUTLS_GNUTLS_H) && defined(SUPPORT_REMOTE)
# define ENABLE_PCMK_REMOTE
#endif
static GMainLoop *mainloop = NULL;
static qb_ipcs_service_t *ipcs = NULL;
static stonith_t *stonith_api = NULL;
int lrmd_call_id = 0;
#ifdef ENABLE_PCMK_REMOTE
/* whether shutdown request has been sent */
static sig_atomic_t shutting_down = FALSE;
/* timer for waiting for acknowledgment of shutdown request */
static guint shutdown_ack_timer = 0;
static gboolean lrmd_exit(gpointer data);
#endif
static void
stonith_connection_destroy_cb(stonith_t * st, stonith_event_t * e)
{
stonith_api->state = stonith_disconnected;
crm_err("Connection to fencer lost");
stonith_connection_failed();
}
stonith_t *
get_stonith_connection(void)
{
if (stonith_api && stonith_api->state == stonith_disconnected) {
stonith_api_delete(stonith_api);
stonith_api = NULL;
}
if (stonith_api == NULL) {
int rc = pcmk_ok;
stonith_api = stonith_api_new();
if (stonith_api == NULL) {
crm_err("Could not connect to fencer: API memory allocation failed");
return NULL;
}
rc = stonith_api_connect_retry(stonith_api, crm_system_name, 10);
if (rc != pcmk_ok) {
crm_err("Could not connect to fencer in 10 attempts: %s "
CRM_XS " rc=%d", pcmk_strerror(rc), rc);
stonith_api_delete(stonith_api);
stonith_api = NULL;
} else {
stonith_api->cmds->register_notification(stonith_api,
T_STONITH_NOTIFY_DISCONNECT,
stonith_connection_destroy_cb);
}
}
return stonith_api;
}
static int32_t
lrmd_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
crm_trace("Connection %p", c);
if (pcmk__new_client(c, uid, gid) == NULL) {
return -EIO;
}
return 0;
}
static void
lrmd_ipc_created(qb_ipcs_connection_t * c)
{
pcmk__client_t *new_client = pcmk__find_client(c);
crm_trace("Connection %p", c);
CRM_ASSERT(new_client != NULL);
/* Now that the connection is offically established, alert
* the other clients a new connection exists. */
notify_of_new_client(new_client);
}
static int32_t
lrmd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
pcmk__client_t *client = pcmk__find_client(c);
xmlNode *request = pcmk__client_data2xml(client, data, &id, &flags);
CRM_CHECK(client != NULL, crm_err("Invalid client");
return FALSE);
CRM_CHECK(client->id != NULL, crm_err("Invalid client: %p", client);
return FALSE);
CRM_CHECK(flags & crm_ipc_client_response, crm_err("Invalid client request: %p", client);
return FALSE);
if (!request) {
return 0;
}
if (!client->name) {
const char *value = crm_element_value(request, F_LRMD_CLIENTNAME);
if (value == NULL) {
client->name = crm_itoa(pcmk__client_pid(c));
} else {
client->name = strdup(value);
}
}
lrmd_call_id++;
if (lrmd_call_id < 1) {
lrmd_call_id = 1;
}
crm_xml_add(request, F_LRMD_CLIENTID, client->id);
crm_xml_add(request, F_LRMD_CLIENTNAME, client->name);
crm_xml_add_int(request, F_LRMD_CALLID, lrmd_call_id);
process_lrmd_message(client, id, request);
free_xml(request);
return 0;
}
/*!
* \internal
* \brief Free a client connection, and exit if appropriate
*
* \param[in] client Client connection to free
*/
void
lrmd_client_destroy(pcmk__client_t *client)
{
pcmk__free_client(client);
#ifdef ENABLE_PCMK_REMOTE
/* If we were waiting to shut down, we can now safely do so
* if there are no more proxied IPC providers
*/
if (shutting_down && (ipc_proxy_get_provider() == NULL)) {
lrmd_exit(NULL);
}
#endif
}
static int32_t
lrmd_ipc_closed(qb_ipcs_connection_t * c)
{
pcmk__client_t *client = pcmk__find_client(c);
if (client == NULL) {
return 0;
}
crm_trace("Connection %p", c);
client_disconnect_cleanup(client->id);
#ifdef ENABLE_PCMK_REMOTE
ipc_proxy_remove_provider(client);
#endif
lrmd_client_destroy(client);
return 0;
}
static void
lrmd_ipc_destroy(qb_ipcs_connection_t * c)
{
lrmd_ipc_closed(c);
crm_trace("Connection %p", c);
}
static struct qb_ipcs_service_handlers lrmd_ipc_callbacks = {
.connection_accept = lrmd_ipc_accept,
.connection_created = lrmd_ipc_created,
.msg_process = lrmd_ipc_dispatch,
.connection_closed = lrmd_ipc_closed,
.connection_destroyed = lrmd_ipc_destroy
};
// \return Standard Pacemaker return code
int
lrmd_server_send_reply(pcmk__client_t *client, uint32_t id, xmlNode *reply)
{
crm_trace("Sending reply (%d) to client (%s)", id, client->id);
switch (client->kind) {
case PCMK__CLIENT_IPC:
return pcmk__ipc_send_xml(client, id, reply, FALSE);
#ifdef ENABLE_PCMK_REMOTE
case PCMK__CLIENT_TLS:
return lrmd_tls_send_msg(client->remote, reply, id, "reply");
#endif
default:
crm_err("Could not send reply: unknown client type %d",
client->kind);
}
return ENOTCONN;
}
// \return Standard Pacemaker return code
int
lrmd_server_send_notify(pcmk__client_t *client, xmlNode *msg)
{
crm_trace("Sending notification to client (%s)", client->id);
switch (client->kind) {
case PCMK__CLIENT_IPC:
if (client->ipcs == NULL) {
crm_trace("Could not notify local client: disconnected");
return ENOTCONN;
}
return pcmk__ipc_send_xml(client, 0, msg, crm_ipc_server_event);
#ifdef ENABLE_PCMK_REMOTE
case PCMK__CLIENT_TLS:
if (client->remote == NULL) {
crm_trace("Could not notify remote client: disconnected");
return ENOTCONN;
} else {
return lrmd_tls_send_msg(client->remote, msg, 0, "notify");
}
#endif
default:
crm_err("Could not notify client: unknown type %d", client->kind);
}
return ENOTCONN;
}
/*!
* \internal
* \brief Clean up and exit immediately
*
* \param[in] data Ignored
*
* \return Doesn't return
* \note This can be used as a timer callback.
*/
static gboolean
lrmd_exit(gpointer data)
{
crm_info("Terminating with %d clients", pcmk__ipc_client_count());
if (stonith_api) {
stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT);
stonith_api->cmds->disconnect(stonith_api);
stonith_api_delete(stonith_api);
}
if (ipcs) {
mainloop_del_ipc_server(ipcs);
}
#ifdef ENABLE_PCMK_REMOTE
lrmd_tls_server_destroy();
ipc_proxy_cleanup();
#endif
pcmk__client_cleanup();
g_hash_table_destroy(rsc_list);
if (mainloop) {
lrmd_drain_alerts(mainloop);
}
crm_exit(CRM_EX_OK);
return FALSE;
}
/*!
* \internal
* \brief Request cluster shutdown if appropriate, otherwise exit immediately
*
* \param[in] nsig Signal that caused invocation (ignored)
*/
static void
lrmd_shutdown(int nsig)
{
#ifdef ENABLE_PCMK_REMOTE
pcmk__client_t *ipc_proxy = ipc_proxy_get_provider();
/* If there are active proxied IPC providers, then we may be running
* resources, so notify the cluster that we wish to shut down.
*/
if (ipc_proxy) {
if (shutting_down) {
crm_notice("Waiting for cluster to stop resources before exiting");
return;
}
crm_info("Sending shutdown request to cluster");
if (ipc_proxy_shutdown_req(ipc_proxy) < 0) {
crm_crit("Shutdown request failed, exiting immediately");
} else {
/* We requested a shutdown. Now, we need to wait for an
* acknowledgement from the proxy host (which ensures the proxy host
* supports shutdown requests), then wait for all proxy hosts to
* disconnect (which ensures that all resources have been stopped).
*/
shutting_down = TRUE;
/* Stop accepting new proxy connections */
lrmd_tls_server_destroy();
/* Older controller versions will never acknowledge our request, so
* set a fairly short timeout to exit quickly in that case. If we
* get the ack, we'll defuse this timer.
*/
shutdown_ack_timer = g_timeout_add_seconds(20, lrmd_exit, NULL);
/* Currently, we let the OS kill us if the clients don't disconnect
* in a reasonable time. We could instead set a long timer here
* (shorter than what the OS is likely to use) and exit immediately
* if it pops.
*/
return;
}
}
#endif
lrmd_exit(NULL);
}
/*!
* \internal
* \brief Defuse short exit timer if shutting down
*/
void handle_shutdown_ack()
{
#ifdef ENABLE_PCMK_REMOTE
if (shutting_down) {
crm_info("Received shutdown ack");
if (shutdown_ack_timer > 0) {
g_source_remove(shutdown_ack_timer);
shutdown_ack_timer = 0;
}
return;
}
#endif
crm_debug("Ignoring unexpected shutdown ack");
}
/*!
* \internal
* \brief Make short exit timer fire immediately
*/
void handle_shutdown_nack()
{
#ifdef ENABLE_PCMK_REMOTE
if (shutting_down) {
crm_info("Received shutdown nack");
if (shutdown_ack_timer > 0) {
g_source_remove(shutdown_ack_timer);
shutdown_ack_timer = g_timeout_add(0, lrmd_exit, NULL);
}
return;
}
#endif
crm_debug("Ignoring unexpected shutdown nack");
}
static pcmk__cli_option_t long_options[] = {
// long option, argument type, storage, short option, description, flags
{
"help", no_argument, NULL, '?',
"\tThis text", pcmk__option_default
},
{
"version", no_argument, NULL, '$',
"\tVersion information", pcmk__option_default
},
{
"verbose", no_argument, NULL, 'V',
"\tIncrease debug output", pcmk__option_default
},
{
"logfile", required_argument, NULL, 'l',
"\tSend logs to the additional named logfile", pcmk__option_default
},
#ifdef ENABLE_PCMK_REMOTE
{
"port", required_argument, NULL, 'p',
"\tPort to listen on", pcmk__option_default
},
#endif
{ 0, 0, 0, 0 }
};
#ifdef ENABLE_PCMK_REMOTE
# define EXECD_TYPE "remote"
# define EXECD_NAME "pacemaker-remoted"
# define EXECD_DESC "resource agent executor daemon for Pacemaker Remote nodes"
#else
# define EXECD_TYPE "local"
# define EXECD_NAME "pacemaker-execd"
# define EXECD_DESC "resource agent executor daemon for Pacemaker cluster nodes"
#endif
int
main(int argc, char **argv, char **envp)
{
int flag = 0;
int index = 0;
int bump_log_num = 0;
const char *option = NULL;
#ifdef ENABLE_PCMK_REMOTE
// If necessary, create PID 1 now before any file descriptors are opened
remoted_spawn_pidone(argc, argv, envp);
#endif
crm_log_preinit(EXECD_NAME, argc, argv);
pcmk__set_cli_options(NULL, "[options]", long_options, EXECD_DESC);
while (1) {
flag = pcmk__next_cli_option(argc, argv, &index, NULL);
if (flag == -1) {
break;
}
switch (flag) {
case 'l':
crm_add_logfile(optarg);
break;
case 'p':
setenv("PCMK_remote_port", optarg, 1);
break;
case 'V':
bump_log_num++;
break;
case '?':
case '$':
pcmk__cli_help(flag, CRM_EX_OK);
break;
default:
pcmk__cli_help('?', CRM_EX_USAGE);
break;
}
}
crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
while (bump_log_num > 0) {
crm_bump_log_level(argc, argv);
bump_log_num--;
}
option = pcmk__env_option("logfacility");
- if (option && pcmk__str_none_of(option, "none", "/dev/null", NULL)) {
+ if (option && !pcmk__str_any_of(option, "none", "/dev/null", NULL)) {
setenv("HA_LOGFACILITY", option, 1); /* Used by the ocf_log/ha_log OCF macro */
}
option = pcmk__env_option("logfile");
if(option && safe_str_neq(option, "none")) {
setenv("HA_LOGFILE", option, 1); /* Used by the ocf_log/ha_log OCF macro */
if (pcmk__env_option_enabled(crm_system_name, "debug")) {
setenv("HA_DEBUGLOG", option, 1); /* Used by the ocf_log/ha_debug OCF macro */
}
}
crm_notice("Starting Pacemaker " EXECD_TYPE " executor");
/* The presence of this variable allegedly controls whether child
* processes like httpd will try and use Systemd's sd_notify
* API
*/
unsetenv("NOTIFY_SOCKET");
/* Used by RAs - Leave owned by root */
crm_build_path(CRM_RSCTMP_DIR, 0755);
rsc_list = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_rsc);
ipcs = mainloop_add_ipc_server(CRM_SYSTEM_LRMD, QB_IPC_SHM, &lrmd_ipc_callbacks);
if (ipcs == NULL) {
crm_err("Failed to create IPC server: shutting down and inhibiting respawn");
crm_exit(CRM_EX_FATAL);
}
#ifdef ENABLE_PCMK_REMOTE
if (lrmd_init_remote_tls_server() < 0) {
crm_err("Failed to create TLS listener: shutting down and staying down");
crm_exit(CRM_EX_FATAL);
}
ipc_proxy_init();
#endif
mainloop_add_signal(SIGTERM, lrmd_shutdown);
mainloop = g_main_loop_new(NULL, FALSE);
crm_notice("Pacemaker " EXECD_TYPE " executor successfully started and accepting connections");
g_main_loop_run(mainloop);
/* should never get here */
lrmd_exit(NULL);
return CRM_EX_OK;
}
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
index 9c27d6176f..e287541be3 100644
--- a/daemons/fenced/fenced_commands.c
+++ b/daemons/fenced/fenced_commands.c
@@ -1,2821 +1,2821 @@
/*
* Copyright 2009-2020 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <unistd.h>
#include <sys/utsname.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <ctype.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/ipc.h>
#include <crm/common/ipc_internal.h>
#include <crm/cluster/internal.h>
#include <crm/common/mainloop.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <crm/common/xml.h>
#include <pacemaker-fenced.h>
GHashTable *device_list = NULL;
GHashTable *topology = NULL;
GList *cmd_list = NULL;
struct device_search_s {
/* target of fence action */
char *host;
/* requested fence action */
char *action;
/* timeout to use if a device is queried dynamically for possible targets */
int per_device_timeout;
/* number of registered fencing devices at time of request */
int replies_needed;
/* number of device replies received so far */
int replies_received;
/* whether the target is eligible to perform requested action (or off) */
bool allow_suicide;
/* private data to pass to search callback function */
void *user_data;
/* function to call when all replies have been received */
void (*callback) (GList * devices, void *user_data);
/* devices capable of performing requested action (or off if remapping) */
GListPtr capable;
};
static gboolean stonith_device_dispatch(gpointer user_data);
static void st_child_done(GPid pid, int rc, const char *output, gpointer user_data);
static void stonith_send_reply(xmlNode * reply, int call_options, const char *remote_peer,
const char *client_id);
static void search_devices_record_result(struct device_search_s *search, const char *device,
gboolean can_fence);
typedef struct async_command_s {
int id;
int pid;
int fd_stdout;
int options;
int default_timeout; /* seconds */
int timeout; /* seconds */
int start_delay; /* seconds */
int delay_id;
char *op;
char *origin;
char *client;
char *client_name;
char *remote_op_id;
char *victim;
uint32_t victim_nodeid;
char *action;
char *device;
char *mode;
GListPtr device_list;
GListPtr device_next;
void *internal_user_data;
void (*done_cb) (GPid pid, int rc, const char *output, gpointer user_data);
guint timer_sigterm;
guint timer_sigkill;
/*! If the operation timed out, this is the last signal
* we sent to the process to get it to terminate */
int last_timeout_signo;
stonith_device_t *active_on;
stonith_device_t *activating_on;
} async_command_t;
static xmlNode *stonith_construct_async_reply(async_command_t * cmd, const char *output,
xmlNode * data, int rc);
static gboolean
is_action_required(const char *action, stonith_device_t *device)
{
return device && device->automatic_unfencing && safe_str_eq(action, "on");
}
static int
get_action_delay_max(stonith_device_t * device, const char * action)
{
const char *value = NULL;
int delay_max = 0;
- if (pcmk__str_none_of(action, "off", "reboot", NULL)) {
+ if (!pcmk__str_any_of(action, "off", "reboot", NULL)) {
return 0;
}
value = g_hash_table_lookup(device->params, STONITH_ATTR_DELAY_MAX);
if (value) {
delay_max = crm_parse_interval_spec(value) / 1000;
}
return delay_max;
}
static int
get_action_delay_base(stonith_device_t * device, const char * action)
{
const char *value = NULL;
int delay_base = 0;
- if (pcmk__str_none_of(action, "off", "reboot", NULL)) {
+ if (!pcmk__str_any_of(action, "off", "reboot", NULL)) {
return 0;
}
value = g_hash_table_lookup(device->params, STONITH_ATTR_DELAY_BASE);
if (value) {
delay_base = crm_parse_interval_spec(value) / 1000;
}
return delay_base;
}
/*!
* \internal
* \brief Override STONITH timeout with pcmk_*_timeout if available
*
* \param[in] device STONITH device to use
* \param[in] action STONITH action name
* \param[in] default_timeout Timeout to use if device does not have
* a pcmk_*_timeout parameter for action
*
* \return Value of pcmk_(action)_timeout if available, otherwise default_timeout
* \note For consistency, it would be nice if reboot/off/on timeouts could be
* set the same way as start/stop/monitor timeouts, i.e. with an
* <operation> entry in the fencing resource configuration. However that
* is insufficient because fencing devices may be registered directly via
* the fencer's register_device() API instead of going through the CIB
* (e.g. stonith_admin uses it for its -R option, and the executor uses it
* to ensure a device is registered when a command is issued). As device
* properties, pcmk_*_timeout parameters can be grabbed by the fencer when
* the device is registered, whether by CIB change or API call.
*/
static int
get_action_timeout(stonith_device_t * device, const char *action, int default_timeout)
{
if (action && device && device->params) {
char buffer[64] = { 0, };
const char *value = NULL;
/* If "reboot" was requested but the device does not support it,
* we will remap to "off", so check timeout for "off" instead
*/
if (safe_str_eq(action, "reboot")
&& is_not_set(device->flags, st_device_supports_reboot)) {
crm_trace("%s doesn't support reboot, using timeout for off instead",
device->id);
action = "off";
}
/* If the device config specified an action-specific timeout, use it */
snprintf(buffer, sizeof(buffer), "pcmk_%s_timeout", action);
value = g_hash_table_lookup(device->params, buffer);
if (value) {
return atoi(value);
}
}
return default_timeout;
}
static void
free_async_command(async_command_t * cmd)
{
if (!cmd) {
return;
}
if (cmd->delay_id) {
g_source_remove(cmd->delay_id);
}
cmd_list = g_list_remove(cmd_list, cmd);
g_list_free_full(cmd->device_list, free);
free(cmd->device);
free(cmd->action);
free(cmd->victim);
free(cmd->remote_op_id);
free(cmd->client);
free(cmd->client_name);
free(cmd->origin);
free(cmd->mode);
free(cmd->op);
free(cmd);
}
static async_command_t *
create_async_command(xmlNode * msg)
{
async_command_t *cmd = NULL;
xmlNode *op = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_ERR);
const char *action = crm_element_value(op, F_STONITH_ACTION);
CRM_CHECK(action != NULL, crm_log_xml_warn(msg, "NoAction"); return NULL);
crm_log_xml_trace(msg, "Command");
cmd = calloc(1, sizeof(async_command_t));
crm_element_value_int(msg, F_STONITH_CALLID, &(cmd->id));
crm_element_value_int(msg, F_STONITH_CALLOPTS, &(cmd->options));
crm_element_value_int(msg, F_STONITH_TIMEOUT, &(cmd->default_timeout));
cmd->timeout = cmd->default_timeout;
// Value -1 means disable any static/random fencing delays
crm_element_value_int(msg, F_STONITH_DELAY, &(cmd->start_delay));
cmd->origin = crm_element_value_copy(msg, F_ORIG);
cmd->remote_op_id = crm_element_value_copy(msg, F_STONITH_REMOTE_OP_ID);
cmd->client = crm_element_value_copy(msg, F_STONITH_CLIENTID);
cmd->client_name = crm_element_value_copy(msg, F_STONITH_CLIENTNAME);
cmd->op = crm_element_value_copy(msg, F_STONITH_OPERATION);
cmd->action = strdup(action);
cmd->victim = crm_element_value_copy(op, F_STONITH_TARGET);
cmd->mode = crm_element_value_copy(op, F_STONITH_MODE);
cmd->device = crm_element_value_copy(op, F_STONITH_DEVICE);
CRM_CHECK(cmd->op != NULL, crm_log_xml_warn(msg, "NoOp"); free_async_command(cmd); return NULL);
CRM_CHECK(cmd->client != NULL, crm_log_xml_warn(msg, "NoClient"));
cmd->done_cb = st_child_done;
cmd_list = g_list_append(cmd_list, cmd);
return cmd;
}
static int
get_action_limit(stonith_device_t * device)
{
const char *value = NULL;
int action_limit = 1;
value = g_hash_table_lookup(device->params, STONITH_ATTR_ACTION_LIMIT);
if (value) {
action_limit = crm_parse_int(value, "1");
if (action_limit == 0) {
/* pcmk_action_limit should not be 0. Enforce it to be 1. */
action_limit = 1;
}
}
return action_limit;
}
static int
get_active_cmds(stonith_device_t * device)
{
int counter = 0;
GListPtr gIter = NULL;
GListPtr gIterNext = NULL;
CRM_CHECK(device != NULL, return 0);
for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) {
async_command_t *cmd = gIter->data;
gIterNext = gIter->next;
if (cmd->active_on == device) {
counter++;
}
}
return counter;
}
static void
fork_cb(GPid pid, gpointer user_data)
{
async_command_t *cmd = (async_command_t *) user_data;
stonith_device_t * device =
/* in case of a retry we've done the move from
activating_on to active_on already
*/
cmd->activating_on?cmd->activating_on:cmd->active_on;
CRM_ASSERT(device);
crm_debug("Operation '%s'%s%s on %s now running with pid=%d, timeout=%ds",
cmd->action, cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "",
device->id, pid, cmd->timeout);
cmd->active_on = device;
cmd->activating_on = NULL;
}
static gboolean
stonith_device_execute(stonith_device_t * device)
{
int exec_rc = 0;
const char *action_str = NULL;
const char *host_arg = NULL;
async_command_t *cmd = NULL;
stonith_action_t *action = NULL;
int active_cmds = 0;
int action_limit = 0;
GListPtr gIter = NULL;
GListPtr gIterNext = NULL;
CRM_CHECK(device != NULL, return FALSE);
active_cmds = get_active_cmds(device);
action_limit = get_action_limit(device);
if (action_limit > -1 && active_cmds >= action_limit) {
crm_trace("%s is over its action limit of %d (%u active action%s)",
device->id, action_limit, active_cmds, active_cmds > 1 ? "s" : "");
return TRUE;
}
for (gIter = device->pending_ops; gIter != NULL; gIter = gIterNext) {
async_command_t *pending_op = gIter->data;
gIterNext = gIter->next;
if (pending_op && pending_op->delay_id) {
crm_trace
("Operation '%s'%s%s on %s was asked to run too early, waiting for start_delay timeout of %ds",
pending_op->action, pending_op->victim ? " targeting " : "",
pending_op->victim ? pending_op->victim : "",
device->id, pending_op->start_delay);
continue;
}
device->pending_ops = g_list_remove_link(device->pending_ops, gIter);
g_list_free_1(gIter);
cmd = pending_op;
break;
}
if (cmd == NULL) {
crm_trace("Nothing further to do for %s for now", device->id);
return TRUE;
}
if(safe_str_eq(device->agent, STONITH_WATCHDOG_AGENT)) {
if(safe_str_eq(cmd->action, "reboot")) {
pcmk_panic(__FUNCTION__);
goto done;
} else if(safe_str_eq(cmd->action, "off")) {
pcmk_panic(__FUNCTION__);
goto done;
} else {
crm_info("Faking success for %s watchdog operation", cmd->action);
cmd->done_cb(0, 0, NULL, cmd);
goto done;
}
}
#if SUPPORT_CIBSECRETS
if (pcmk__substitute_secrets(device->id, device->params) != pcmk_rc_ok) {
/* replacing secrets failed! */
if (safe_str_eq(cmd->action,"stop")) {
/* don't fail on stop! */
crm_info("proceeding with the stop operation for %s", device->id);
} else {
crm_err("failed to get secrets for %s, "
"considering resource not configured", device->id);
exec_rc = PCMK_OCF_NOT_CONFIGURED;
cmd->done_cb(0, exec_rc, NULL, cmd);
goto done;
}
}
#endif
action_str = cmd->action;
if (safe_str_eq(cmd->action, "reboot") && is_not_set(device->flags, st_device_supports_reboot)) {
crm_warn("Agent '%s' does not advertise support for 'reboot', performing 'off' action instead", device->agent);
action_str = "off";
}
if (is_set(device->flags, st_device_supports_parameter_port)) {
host_arg = "port";
} else if (is_set(device->flags, st_device_supports_parameter_plug)) {
host_arg = "plug";
}
action = stonith_action_create(device->agent,
action_str,
cmd->victim,
cmd->victim_nodeid,
cmd->timeout, device->params,
device->aliases, host_arg);
/* for async exec, exec_rc is negative for early error exit
otherwise handling of success/errors is done via callbacks */
cmd->activating_on = device;
exec_rc = stonith_action_execute_async(action, (void *)cmd,
cmd->done_cb, fork_cb);
if (exec_rc < 0) {
crm_warn("Operation '%s'%s%s on %s failed: %s (%d)",
cmd->action, cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "",
device->id, pcmk_strerror(exec_rc), exec_rc);
cmd->activating_on = NULL;
cmd->done_cb(0, exec_rc, NULL, cmd);
}
done:
/* Device might get triggered to work by multiple fencing commands
* simultaneously. Trigger the device again to make sure any
* remaining concurrent commands get executed. */
if (device->pending_ops) {
mainloop_set_trigger(device->work);
}
return TRUE;
}
static gboolean
stonith_device_dispatch(gpointer user_data)
{
return stonith_device_execute(user_data);
}
static gboolean
start_delay_helper(gpointer data)
{
async_command_t *cmd = data;
stonith_device_t *device = NULL;
cmd->delay_id = 0;
device = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL;
if (device) {
mainloop_set_trigger(device->work);
}
return FALSE;
}
static void
schedule_stonith_command(async_command_t * cmd, stonith_device_t * device)
{
int delay_max = 0;
int delay_base = 0;
int requested_delay = cmd->start_delay;
CRM_CHECK(cmd != NULL, return);
CRM_CHECK(device != NULL, return);
if (cmd->device) {
free(cmd->device);
}
if (device->include_nodeid && cmd->victim) {
crm_node_t *node = crm_get_peer(0, cmd->victim);
cmd->victim_nodeid = node->id;
}
cmd->device = strdup(device->id);
cmd->timeout = get_action_timeout(device, cmd->action, cmd->default_timeout);
if (cmd->remote_op_id) {
crm_debug("Scheduling '%s' action%s%s on %s for remote peer %s with op id (%s) (timeout=%ds)",
cmd->action,
cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "",
device->id, cmd->origin, cmd->remote_op_id, cmd->timeout);
} else {
crm_debug("Scheduling '%s' action%s%s on %s for %s (timeout=%ds)",
cmd->action,
cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "",
device->id, cmd->client, cmd->timeout);
}
device->pending_ops = g_list_append(device->pending_ops, cmd);
mainloop_set_trigger(device->work);
// Value -1 means disable any static/random fencing delays
if (requested_delay < 0) {
return;
}
delay_max = get_action_delay_max(device, cmd->action);
delay_base = get_action_delay_base(device, cmd->action);
if (delay_max == 0) {
delay_max = delay_base;
}
if (delay_max < delay_base) {
crm_warn("Base-delay (%ds) is larger than max-delay (%ds) "
"for %s on %s - limiting to max-delay",
delay_base, delay_max, cmd->action, device->id);
delay_base = delay_max;
}
if (delay_max > 0) {
// coverity[dont_call] We're not using rand() for security
cmd->start_delay +=
((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0)
+ delay_base;
}
if (cmd->start_delay > 0) {
crm_notice("Delaying '%s' action%s%s on %s for %ds (timeout=%ds, "
"requested_delay=%ds, base=%ds, max=%ds)",
cmd->action,
cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "",
device->id, cmd->start_delay, cmd->timeout,
requested_delay, delay_base, delay_max);
cmd->delay_id =
g_timeout_add_seconds(cmd->start_delay, start_delay_helper, cmd);
}
}
static void
free_device(gpointer data)
{
GListPtr gIter = NULL;
stonith_device_t *device = data;
g_hash_table_destroy(device->params);
g_hash_table_destroy(device->aliases);
for (gIter = device->pending_ops; gIter != NULL; gIter = gIter->next) {
async_command_t *cmd = gIter->data;
crm_warn("Removal of device '%s' purged operation '%s'", device->id, cmd->action);
cmd->done_cb(0, -ENODEV, NULL, cmd);
}
g_list_free(device->pending_ops);
g_list_free_full(device->targets, free);
mainloop_destroy_trigger(device->work);
free_xml(device->agent_metadata);
free(device->namespace);
free(device->on_target_actions);
free(device->agent);
free(device->id);
free(device);
}
void free_device_list(void)
{
if (device_list != NULL) {
g_hash_table_destroy(device_list);
device_list = NULL;
}
}
void
init_device_list(void)
{
if (device_list == NULL) {
device_list = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL,
free_device);
}
}
static GHashTable *
build_port_aliases(const char *hostmap, GListPtr * targets)
{
char *name = NULL;
int last = 0, lpc = 0, max = 0, added = 0;
GHashTable *aliases = crm_strcase_table_new();
if (hostmap == NULL) {
return aliases;
}
max = strlen(hostmap);
for (; lpc <= max; lpc++) {
switch (hostmap[lpc]) {
/* Assignment chars */
case '=':
case ':':
if (lpc > last) {
free(name);
name = calloc(1, 1 + lpc - last);
memcpy(name, hostmap + last, lpc - last);
}
last = lpc + 1;
break;
/* Delimeter chars */
/* case ',': Potentially used to specify multiple ports */
case 0:
case ';':
case ' ':
case '\t':
if (name) {
char *value = NULL;
value = calloc(1, 1 + lpc - last);
memcpy(value, hostmap + last, lpc - last);
crm_debug("Adding alias '%s'='%s'", name, value);
g_hash_table_replace(aliases, name, value);
if (targets) {
*targets = g_list_append(*targets, strdup(value));
}
value = NULL;
name = NULL;
added++;
} else if (lpc > last) {
crm_debug("Parse error at offset %d near '%s'", lpc - last, hostmap + last);
}
last = lpc + 1;
break;
}
if (hostmap[lpc] == 0) {
break;
}
}
if (added == 0) {
crm_info("No host mappings detected in '%s'", hostmap);
}
free(name);
return aliases;
}
GHashTable *metadata_cache = NULL;
void
free_metadata_cache(void) {
if (metadata_cache != NULL) {
g_hash_table_destroy(metadata_cache);
metadata_cache = NULL;
}
}
static void
init_metadata_cache(void) {
if (metadata_cache == NULL) {
metadata_cache = crm_str_table_new();
}
}
static xmlNode *
get_agent_metadata(const char *agent)
{
xmlNode *xml = NULL;
char *buffer = NULL;
init_metadata_cache();
buffer = g_hash_table_lookup(metadata_cache, agent);
if(safe_str_eq(agent, STONITH_WATCHDOG_AGENT)) {
return NULL;
} else if(buffer == NULL) {
stonith_t *st = stonith_api_new();
int rc;
if (st == NULL) {
crm_warn("Could not get agent meta-data: "
"API memory allocation failed");
return NULL;
}
rc = st->cmds->metadata(st, st_opt_sync_call, agent, NULL, &buffer, 10);
stonith_api_delete(st);
if (rc || !buffer) {
crm_err("Could not retrieve metadata for fencing agent %s", agent);
return NULL;
}
g_hash_table_replace(metadata_cache, strdup(agent), buffer);
}
xml = string2xml(buffer);
return xml;
}
static gboolean
is_nodeid_required(xmlNode * xml)
{
xmlXPathObjectPtr xpath = NULL;
if (stand_alone) {
return FALSE;
}
if (!xml) {
return FALSE;
}
xpath = xpath_search(xml, "//parameter[@name='nodeid']");
if (numXpathResults(xpath) <= 0) {
freeXpathObject(xpath);
return FALSE;
}
freeXpathObject(xpath);
return TRUE;
}
#define MAX_ACTION_LEN 256
static char *
add_action(char *actions, const char *action)
{
int offset = 0;
if (actions == NULL) {
actions = calloc(1, MAX_ACTION_LEN);
} else {
offset = strlen(actions);
}
if (offset > 0) {
offset += snprintf(actions+offset, MAX_ACTION_LEN - offset, " ");
}
offset += snprintf(actions+offset, MAX_ACTION_LEN - offset, "%s", action);
return actions;
}
static void
read_action_metadata(stonith_device_t *device)
{
xmlXPathObjectPtr xpath = NULL;
int max = 0;
int lpc = 0;
if (device->agent_metadata == NULL) {
return;
}
xpath = xpath_search(device->agent_metadata, "//action");
max = numXpathResults(xpath);
if (max <= 0) {
freeXpathObject(xpath);
return;
}
for (lpc = 0; lpc < max; lpc++) {
const char *on_target = NULL;
const char *action = NULL;
xmlNode *match = getXpathResult(xpath, lpc);
CRM_LOG_ASSERT(match != NULL);
if(match == NULL) { continue; };
on_target = crm_element_value(match, "on_target");
action = crm_element_value(match, "name");
if(safe_str_eq(action, "list")) {
set_bit(device->flags, st_device_supports_list);
} else if(safe_str_eq(action, "status")) {
set_bit(device->flags, st_device_supports_status);
} else if(safe_str_eq(action, "reboot")) {
set_bit(device->flags, st_device_supports_reboot);
} else if (safe_str_eq(action, "on")) {
/* "automatic" means the cluster will unfence node when it joins */
const char *automatic = crm_element_value(match, "automatic");
/* "required" is a deprecated synonym for "automatic" */
const char *required = crm_element_value(match, "required");
if (crm_is_true(automatic) || crm_is_true(required)) {
device->automatic_unfencing = TRUE;
}
}
if (action && crm_is_true(on_target)) {
device->on_target_actions = add_action(device->on_target_actions, action);
}
}
freeXpathObject(xpath);
}
/*!
* \internal
* \brief Set a pcmk_*_action parameter if not already set
*
* \param[in,out] params Device parameters
* \param[in] action Name of action
* \param[in] value Value to use if action is not already set
*/
static void
map_action(GHashTable *params, const char *action, const char *value)
{
char *key = crm_strdup_printf("pcmk_%s_action", action);
if (g_hash_table_lookup(params, key)) {
crm_warn("Ignoring %s='%s', see %s instead",
STONITH_ATTR_ACTION_OP, value, key);
free(key);
} else {
crm_warn("Mapping %s='%s' to %s='%s'",
STONITH_ATTR_ACTION_OP, value, key, value);
g_hash_table_insert(params, key, strdup(value));
}
}
/*!
* \internal
* \brief Create device parameter table from XML
*
* \param[in] name Device name (used for logging only)
* \param[in,out] params Device parameters
*/
static GHashTable *
xml2device_params(const char *name, xmlNode *dev)
{
GHashTable *params = xml2list(dev);
const char *value;
/* Action should never be specified in the device configuration,
* but we support it for users who are familiar with other software
* that worked that way.
*/
value = g_hash_table_lookup(params, STONITH_ATTR_ACTION_OP);
if (value != NULL) {
crm_warn("%s has '%s' parameter, which should never be specified in configuration",
name, STONITH_ATTR_ACTION_OP);
if (*value == '\0') {
crm_warn("Ignoring empty '%s' parameter", STONITH_ATTR_ACTION_OP);
} else if (strcmp(value, "reboot") == 0) {
crm_warn("Ignoring %s='reboot' (see stonith-action cluster property instead)",
STONITH_ATTR_ACTION_OP);
} else if (strcmp(value, "off") == 0) {
map_action(params, "reboot", value);
} else {
map_action(params, "off", value);
map_action(params, "reboot", value);
}
g_hash_table_remove(params, STONITH_ATTR_ACTION_OP);
}
return params;
}
static stonith_device_t *
build_device_from_xml(xmlNode * msg)
{
const char *value;
xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, msg, LOG_ERR);
stonith_device_t *device = NULL;
char *agent = crm_element_value_copy(dev, "agent");
CRM_CHECK(agent != NULL, return device);
device = calloc(1, sizeof(stonith_device_t));
CRM_CHECK(device != NULL, {free(agent); return device;});
device->id = crm_element_value_copy(dev, XML_ATTR_ID);
device->agent = agent;
device->namespace = crm_element_value_copy(dev, "namespace");
device->params = xml2device_params(device->id, dev);
value = g_hash_table_lookup(device->params, STONITH_ATTR_HOSTLIST);
if (value) {
device->targets = stonith__parse_targets(value);
}
value = g_hash_table_lookup(device->params, STONITH_ATTR_HOSTMAP);
device->aliases = build_port_aliases(value, &(device->targets));
device->agent_metadata = get_agent_metadata(device->agent);
if (device->agent_metadata) {
read_action_metadata(device);
set_bit(device->flags,
stonith__device_parameter_flags(device->agent_metadata));
}
value = g_hash_table_lookup(device->params, "nodeid");
if (!value) {
device->include_nodeid = is_nodeid_required(device->agent_metadata);
}
value = crm_element_value(dev, "rsc_provides");
if (safe_str_eq(value, "unfencing")) {
device->automatic_unfencing = TRUE;
}
if (is_action_required("on", device)) {
crm_info("The fencing device '%s' requires unfencing", device->id);
}
if (device->on_target_actions) {
crm_info("The fencing device '%s' requires actions (%s) to be executed on the target node",
device->id, device->on_target_actions);
}
device->work = mainloop_add_trigger(G_PRIORITY_HIGH, stonith_device_dispatch, device);
/* TODO: Hook up priority */
return device;
}
static const char *
target_list_type(stonith_device_t * dev)
{
const char *check_type = NULL;
check_type = g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTCHECK);
if (check_type == NULL) {
if (g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTLIST)) {
check_type = "static-list";
} else if (g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTMAP)) {
check_type = "static-list";
} else if(is_set(dev->flags, st_device_supports_list)){
check_type = "dynamic-list";
} else if(is_set(dev->flags, st_device_supports_status)){
check_type = "status";
} else {
check_type = "none";
}
}
return check_type;
}
static void
schedule_internal_command(const char *origin,
stonith_device_t * device,
const char *action,
const char *victim,
int timeout,
void *internal_user_data,
void (*done_cb) (GPid pid, int rc, const char *output,
gpointer user_data))
{
async_command_t *cmd = NULL;
cmd = calloc(1, sizeof(async_command_t));
cmd->id = -1;
cmd->default_timeout = timeout ? timeout : 60;
cmd->timeout = cmd->default_timeout;
cmd->action = strdup(action);
cmd->victim = victim ? strdup(victim) : NULL;
cmd->device = strdup(device->id);
cmd->origin = strdup(origin);
cmd->client = strdup(crm_system_name);
cmd->client_name = strdup(crm_system_name);
cmd->internal_user_data = internal_user_data;
cmd->done_cb = done_cb; /* cmd, not internal_user_data, is passed to 'done_cb' as the userdata */
schedule_stonith_command(cmd, device);
}
gboolean
string_in_list(GListPtr list, const char *item)
{
int lpc = 0;
int max = g_list_length(list);
for (lpc = 0; lpc < max; lpc++) {
const char *value = g_list_nth_data(list, lpc);
if (safe_str_eq(item, value)) {
return TRUE;
} else {
crm_trace("%d: '%s' != '%s'", lpc, item, value);
}
}
return FALSE;
}
static void
status_search_cb(GPid pid, int rc, const char *output, gpointer user_data)
{
async_command_t *cmd = user_data;
struct device_search_s *search = cmd->internal_user_data;
stonith_device_t *dev = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL;
gboolean can = FALSE;
free_async_command(cmd);
if (!dev) {
search_devices_record_result(search, NULL, FALSE);
return;
}
mainloop_set_trigger(dev->work);
if (rc == 1 /* unknown */ ) {
crm_trace("Host %s is not known by %s", search->host, dev->id);
} else if (rc == 0 /* active */ || rc == 2 /* inactive */ ) {
crm_trace("Host %s is known by %s", search->host, dev->id);
can = TRUE;
} else {
crm_notice("Unknown result when testing if %s can fence %s: rc=%d", dev->id, search->host,
rc);
}
search_devices_record_result(search, dev->id, can);
}
static void
dynamic_list_search_cb(GPid pid, int rc, const char *output, gpointer user_data)
{
async_command_t *cmd = user_data;
struct device_search_s *search = cmd->internal_user_data;
stonith_device_t *dev = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL;
gboolean can_fence = FALSE;
free_async_command(cmd);
/* Host/alias must be in the list output to be eligible to be fenced
*
* Will cause problems if down'd nodes aren't listed or (for virtual nodes)
* if the guest is still listed despite being moved to another machine
*/
if (!dev) {
search_devices_record_result(search, NULL, FALSE);
return;
}
mainloop_set_trigger(dev->work);
/* If we successfully got the targets earlier, don't disable. */
if (rc != 0 && !dev->targets) {
crm_notice("Disabling port list queries for %s (%d): %s", dev->id, rc, output);
/* Fall back to status */
g_hash_table_replace(dev->params, strdup(STONITH_ATTR_HOSTCHECK), strdup("status"));
g_list_free_full(dev->targets, free);
dev->targets = NULL;
} else if (!rc) {
crm_info("Refreshing port list for %s", dev->id);
g_list_free_full(dev->targets, free);
dev->targets = stonith__parse_targets(output);
dev->targets_age = time(NULL);
}
if (dev->targets) {
const char *alias = g_hash_table_lookup(dev->aliases, search->host);
if (!alias) {
alias = search->host;
}
if (string_in_list(dev->targets, alias)) {
can_fence = TRUE;
}
}
search_devices_record_result(search, dev->id, can_fence);
}
/*!
* \internal
* \brief Returns true if any key in first is not in second or second has a different value for key
*/
static int
device_params_diff(GHashTable *first, GHashTable *second) {
char *key = NULL;
char *value = NULL;
GHashTableIter gIter;
g_hash_table_iter_init(&gIter, first);
while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&value)) {
if(strstr(key, "CRM_meta") == key) {
continue;
} else if(strcmp(key, "crm_feature_set") == 0) {
continue;
} else {
char *other_value = g_hash_table_lookup(second, key);
if (!other_value || safe_str_neq(other_value, value)) {
crm_trace("Different value for %s: %s != %s", key, other_value, value);
return 1;
}
}
}
return 0;
}
/*!
* \internal
* \brief Checks to see if an identical device already exists in the device_list
*/
static stonith_device_t *
device_has_duplicate(stonith_device_t * device)
{
stonith_device_t *dup = g_hash_table_lookup(device_list, device->id);
if (!dup) {
crm_trace("No match for %s", device->id);
return NULL;
} else if (safe_str_neq(dup->agent, device->agent)) {
crm_trace("Different agent: %s != %s", dup->agent, device->agent);
return NULL;
}
/* Use calculate_operation_digest() here? */
if (device_params_diff(device->params, dup->params) ||
device_params_diff(dup->params, device->params)) {
return NULL;
}
crm_trace("Match");
return dup;
}
int
stonith_device_register(xmlNode * msg, const char **desc, gboolean from_cib)
{
stonith_device_t *dup = NULL;
stonith_device_t *device = build_device_from_xml(msg);
CRM_CHECK(device != NULL, return -ENOMEM);
dup = device_has_duplicate(device);
if (dup) {
crm_debug("Device '%s' already existed in device list (%d active devices)", device->id,
g_hash_table_size(device_list));
free_device(device);
device = dup;
} else {
stonith_device_t *old = g_hash_table_lookup(device_list, device->id);
if (from_cib && old && old->api_registered) {
/* If the cib is writing over an entry that is shared with a stonith client,
* copy any pending ops that currently exist on the old entry to the new one.
* Otherwise the pending ops will be reported as failures
*/
crm_info("Overwriting an existing entry for %s from the cib", device->id);
device->pending_ops = old->pending_ops;
device->api_registered = TRUE;
old->pending_ops = NULL;
if (device->pending_ops) {
mainloop_set_trigger(device->work);
}
}
g_hash_table_replace(device_list, device->id, device);
crm_notice("Added '%s' to the device list (%d active devices)", device->id,
g_hash_table_size(device_list));
}
if (desc) {
*desc = device->id;
}
if (from_cib) {
device->cib_registered = TRUE;
} else {
device->api_registered = TRUE;
}
return pcmk_ok;
}
int
stonith_device_remove(const char *id, gboolean from_cib)
{
stonith_device_t *device = g_hash_table_lookup(device_list, id);
if (!device) {
crm_info("Device '%s' not found (%d active devices)", id, g_hash_table_size(device_list));
return pcmk_ok;
}
if (from_cib) {
device->cib_registered = FALSE;
} else {
device->verified = FALSE;
device->api_registered = FALSE;
}
if (!device->cib_registered && !device->api_registered) {
g_hash_table_remove(device_list, id);
crm_info("Removed '%s' from the device list (%d active devices)",
id, g_hash_table_size(device_list));
} else {
crm_trace("Not removing '%s' from the device list (%d active devices) "
"- still %s%s_registered", id, g_hash_table_size(device_list),
device->cib_registered?"cib":"", device->api_registered?"api":"");
}
return pcmk_ok;
}
/*!
* \internal
* \brief Return the number of stonith levels registered for a node
*
* \param[in] tp Node's topology table entry
*
* \return Number of non-NULL levels in topology entry
* \note This function is used only for log messages.
*/
static int
count_active_levels(stonith_topology_t * tp)
{
int lpc = 0;
int count = 0;
for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) {
if (tp->levels[lpc] != NULL) {
count++;
}
}
return count;
}
static void
free_topology_entry(gpointer data)
{
stonith_topology_t *tp = data;
int lpc = 0;
for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) {
if (tp->levels[lpc] != NULL) {
g_list_free_full(tp->levels[lpc], free);
}
}
free(tp->target);
free(tp->target_value);
free(tp->target_pattern);
free(tp->target_attribute);
free(tp);
}
void
free_topology_list(void)
{
if (topology != NULL) {
g_hash_table_destroy(topology);
topology = NULL;
}
}
void
init_topology_list(void)
{
if (topology == NULL) {
topology = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL,
free_topology_entry);
}
}
char *stonith_level_key(xmlNode *level, int mode)
{
if(mode == -1) {
mode = stonith_level_kind(level);
}
switch(mode) {
case 0:
return crm_element_value_copy(level, XML_ATTR_STONITH_TARGET);
case 1:
return crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_PATTERN);
case 2:
{
const char *name = crm_element_value(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE);
const char *value = crm_element_value(level, XML_ATTR_STONITH_TARGET_VALUE);
if(name && value) {
return crm_strdup_printf("%s=%s", name, value);
}
}
default:
return crm_strdup_printf("Unknown-%d-%s", mode, ID(level));
}
}
int stonith_level_kind(xmlNode * level)
{
int mode = 0;
const char *target = crm_element_value(level, XML_ATTR_STONITH_TARGET);
if(target == NULL) {
mode++;
target = crm_element_value(level, XML_ATTR_STONITH_TARGET_PATTERN);
}
if(stand_alone == FALSE && target == NULL) {
mode++;
if(crm_element_value(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE) == NULL) {
mode++;
} else if(crm_element_value(level, XML_ATTR_STONITH_TARGET_VALUE) == NULL) {
mode++;
}
}
return mode;
}
static stonith_key_value_t *
parse_device_list(const char *devices)
{
int lpc = 0;
int max = 0;
int last = 0;
stonith_key_value_t *output = NULL;
if (devices == NULL) {
return output;
}
max = strlen(devices);
for (lpc = 0; lpc <= max; lpc++) {
if (devices[lpc] == ',' || devices[lpc] == 0) {
char *line = strndup(devices + last, lpc - last);
output = stonith_key_value_add(output, NULL, line);
free(line);
last = lpc + 1;
}
}
return output;
}
/*!
* \internal
* \brief Register a STONITH level for a target
*
* Given an XML request specifying the target name, level index, and device IDs
* for the level, this will create an entry for the target in the global topology
* table if one does not already exist, then append the specified device IDs to
* the entry's device list for the specified level.
*
* \param[in] msg XML request for STONITH level registration
* \param[out] desc If not NULL, will be set to string representation ("TARGET[LEVEL]")
*
* \return pcmk_ok on success, -EINVAL if XML does not specify valid level index
*/
int
stonith_level_register(xmlNode *msg, char **desc)
{
int id = 0;
xmlNode *level;
int mode;
char *target;
stonith_topology_t *tp;
stonith_key_value_t *dIter = NULL;
stonith_key_value_t *devices = NULL;
/* Allow the XML here to point to the level tag directly, or wrapped in
* another tag. If directly, don't search by xpath, because it might give
* multiple hits (e.g. if the XML is the CIB).
*/
if (safe_str_eq(TYPE(msg), XML_TAG_FENCING_LEVEL)) {
level = msg;
} else {
level = get_xpath_object("//" XML_TAG_FENCING_LEVEL, msg, LOG_ERR);
}
CRM_CHECK(level != NULL, return -EINVAL);
mode = stonith_level_kind(level);
target = stonith_level_key(level, mode);
crm_element_value_int(level, XML_ATTR_STONITH_INDEX, &id);
if (desc) {
*desc = crm_strdup_printf("%s[%d]", target, id);
}
/* Sanity-check arguments */
if (mode >= 3 || (id <= 0) || (id >= ST_LEVEL_MAX)) {
crm_trace("Could not add %s[%d] (%d) to the topology (%d active entries)", target, id, mode, g_hash_table_size(topology));
free(target);
crm_log_xml_err(level, "Bad topology");
return -EINVAL;
}
/* Find or create topology table entry */
tp = g_hash_table_lookup(topology, target);
if (tp == NULL) {
tp = calloc(1, sizeof(stonith_topology_t));
tp->kind = mode;
tp->target = target;
tp->target_value = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_VALUE);
tp->target_pattern = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_PATTERN);
tp->target_attribute = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE);
g_hash_table_replace(topology, tp->target, tp);
crm_trace("Added %s (%d) to the topology (%d active entries)",
target, mode, g_hash_table_size(topology));
} else {
free(target);
}
if (tp->levels[id] != NULL) {
crm_info("Adding to the existing %s[%d] topology entry",
tp->target, id);
}
devices = parse_device_list(crm_element_value(level, XML_ATTR_STONITH_DEVICES));
for (dIter = devices; dIter; dIter = dIter->next) {
const char *device = dIter->value;
crm_trace("Adding device '%s' for %s[%d]", device, tp->target, id);
tp->levels[id] = g_list_append(tp->levels[id], strdup(device));
}
stonith_key_value_freeall(devices, 1, 1);
crm_info("Target %s has %d active fencing levels",
tp->target, count_active_levels(tp));
return pcmk_ok;
}
int
stonith_level_remove(xmlNode *msg, char **desc)
{
int id = 0;
stonith_topology_t *tp;
char *target;
/* Unlike additions, removal requests should always have one level tag */
xmlNode *level = get_xpath_object("//" XML_TAG_FENCING_LEVEL, msg, LOG_ERR);
CRM_CHECK(level != NULL, return -EINVAL);
target = stonith_level_key(level, -1);
crm_element_value_int(level, XML_ATTR_STONITH_INDEX, &id);
if (desc) {
*desc = crm_strdup_printf("%s[%d]", target, id);
}
/* Sanity-check arguments */
if (id >= ST_LEVEL_MAX) {
free(target);
return -EINVAL;
}
tp = g_hash_table_lookup(topology, target);
if (tp == NULL) {
crm_info("Topology for %s not found (%d active entries)",
target, g_hash_table_size(topology));
} else if (id == 0 && g_hash_table_remove(topology, target)) {
crm_info("Removed all %s related entries from the topology (%d active entries)",
target, g_hash_table_size(topology));
} else if (id > 0 && tp->levels[id] != NULL) {
g_list_free_full(tp->levels[id], free);
tp->levels[id] = NULL;
crm_info("Removed level '%d' from topology for %s (%d active levels remaining)",
id, target, count_active_levels(tp));
}
free(target);
return pcmk_ok;
}
/*!
* \internal
* \brief Schedule an (asynchronous) action directly on a stonith device
*
* Handle a STONITH_OP_EXEC API message by scheduling a requested agent action
* directly on a specified device. Only list, monitor, and status actions are
* expected to use this call, though it should work with any agent command.
*
* \param[in] msg API message XML with desired action
* \param[out] output Unused
*
* \return -EINPROGRESS on success, -errno otherwise
* \note If the action is monitor, the device must be registered via the API
* (CIB registration is not sufficient), because monitor should not be
* possible unless the device is "started" (API registered).
*/
static int
stonith_device_action(xmlNode * msg, char **output)
{
xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, msg, LOG_ERR);
xmlNode *op = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_ERR);
const char *id = crm_element_value(dev, F_STONITH_DEVICE);
const char *action = crm_element_value(op, F_STONITH_ACTION);
async_command_t *cmd = NULL;
stonith_device_t *device = NULL;
if ((id == NULL) || (action == NULL)) {
crm_info("Malformed API action request: device %s, action %s",
(id? id : "not specified"),
(action? action : "not specified"));
return -EPROTO;
}
device = g_hash_table_lookup(device_list, id);
if ((device == NULL)
|| (!device->api_registered && !strcmp(action, "monitor"))) {
// Monitors may run only on "started" (API-registered) devices
crm_info("Ignoring API '%s' action request because device %s not found",
action, id);
return -ENODEV;
}
cmd = create_async_command(msg);
if (cmd == NULL) {
return -EPROTO;
}
schedule_stonith_command(cmd, device);
return -EINPROGRESS;
}
static void
search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence)
{
search->replies_received++;
if (can_fence && device) {
search->capable = g_list_append(search->capable, strdup(device));
}
if (search->replies_needed == search->replies_received) {
crm_debug("Finished Search. %d devices can perform action (%s) on node %s",
g_list_length(search->capable),
search->action ? search->action : "<unknown>",
search->host ? search->host : "<anyone>");
search->callback(search->capable, search->user_data);
free(search->host);
free(search->action);
free(search);
}
}
/*!
* \internal
* \brief Check whether the local host is allowed to execute a fencing action
*
* \param[in] device Fence device to check
* \param[in] action Fence action to check
* \param[in] target Hostname of fence target
* \param[in] allow_suicide Whether self-fencing is allowed for this operation
*
* \return TRUE if local host is allowed to execute action, FALSE otherwise
*/
static gboolean
localhost_is_eligible(const stonith_device_t *device, const char *action,
const char *target, gboolean allow_suicide)
{
gboolean localhost_is_target = safe_str_eq(target, stonith_our_uname);
if (device && action && device->on_target_actions
&& strstr(device->on_target_actions, action)) {
if (!localhost_is_target) {
crm_trace("'%s' operation with %s can only be executed for localhost not %s",
action, device->id, target);
return FALSE;
}
} else if (localhost_is_target && !allow_suicide) {
crm_trace("'%s' operation does not support self-fencing", action);
return FALSE;
}
return TRUE;
}
static void
can_fence_host_with_device(stonith_device_t * dev, struct device_search_s *search)
{
gboolean can = FALSE;
const char *check_type = NULL;
const char *host = search->host;
const char *alias = NULL;
CRM_LOG_ASSERT(dev != NULL);
if (dev == NULL) {
goto search_report_results;
} else if (host == NULL) {
can = TRUE;
goto search_report_results;
}
/* Short-circuit query if this host is not allowed to perform the action */
if (safe_str_eq(search->action, "reboot")) {
/* A "reboot" *might* get remapped to "off" then "on", so short-circuit
* only if all three are disallowed. If only one or two are disallowed,
* we'll report that with the results. We never allow suicide for
* remapped "on" operations because the host is off at that point.
*/
if (!localhost_is_eligible(dev, "reboot", host, search->allow_suicide)
&& !localhost_is_eligible(dev, "off", host, search->allow_suicide)
&& !localhost_is_eligible(dev, "on", host, FALSE)) {
goto search_report_results;
}
} else if (!localhost_is_eligible(dev, search->action, host,
search->allow_suicide)) {
goto search_report_results;
}
alias = g_hash_table_lookup(dev->aliases, host);
if (alias == NULL) {
alias = host;
}
check_type = target_list_type(dev);
if (safe_str_eq(check_type, "none")) {
can = TRUE;
} else if (safe_str_eq(check_type, "static-list")) {
/* Presence in the hostmap is sufficient
* Only use if all hosts on which the device can be active can always fence all listed hosts
*/
if (string_in_list(dev->targets, host)) {
can = TRUE;
} else if (g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTMAP)
&& g_hash_table_lookup(dev->aliases, host)) {
can = TRUE;
}
} else if (safe_str_eq(check_type, "dynamic-list")) {
time_t now = time(NULL);
if (dev->targets == NULL || dev->targets_age + 60 < now) {
crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)",
check_type, dev->id, search->host, search->action);
schedule_internal_command(__FUNCTION__, dev, "list", NULL,
search->per_device_timeout, search, dynamic_list_search_cb);
/* we'll respond to this search request async in the cb */
return;
}
if (string_in_list(dev->targets, alias)) {
can = TRUE;
}
} else if (safe_str_eq(check_type, "status")) {
crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)",
check_type, dev->id, search->host, search->action);
schedule_internal_command(__FUNCTION__, dev, "status", search->host,
search->per_device_timeout, search, status_search_cb);
/* we'll respond to this search request async in the cb */
return;
} else {
crm_err("Invalid value for " STONITH_ATTR_HOSTCHECK ": %s", check_type);
check_type = "Invalid " STONITH_ATTR_HOSTCHECK;
}
if (safe_str_eq(host, alias)) {
crm_notice("%s is%s eligible to fence (%s) %s: %s",
dev->id, (can? "" : " not"), search->action, host,
check_type);
} else {
crm_notice("%s is%s eligible to fence (%s) %s (aka. '%s'): %s",
dev->id, (can? "" : " not"), search->action, host, alias,
check_type);
}
search_report_results:
search_devices_record_result(search, dev ? dev->id : NULL, can);
}
static void
search_devices(gpointer key, gpointer value, gpointer user_data)
{
stonith_device_t *dev = value;
struct device_search_s *search = user_data;
can_fence_host_with_device(dev, search);
}
#define DEFAULT_QUERY_TIMEOUT 20
static void
get_capable_devices(const char *host, const char *action, int timeout, bool suicide, void *user_data,
void (*callback) (GList * devices, void *user_data))
{
struct device_search_s *search;
int per_device_timeout = DEFAULT_QUERY_TIMEOUT;
int devices_needing_async_query = 0;
char *key = NULL;
const char *check_type = NULL;
GHashTableIter gIter;
stonith_device_t *device = NULL;
if (!g_hash_table_size(device_list)) {
callback(NULL, user_data);
return;
}
search = calloc(1, sizeof(struct device_search_s));
if (!search) {
callback(NULL, user_data);
return;
}
g_hash_table_iter_init(&gIter, device_list);
while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&device)) {
check_type = target_list_type(device);
if (pcmk__str_any_of(check_type, "status", "dynamic-list", NULL)) {
devices_needing_async_query++;
}
}
/* If we have devices that require an async event in order to know what
* nodes they can fence, we have to give the events a timeout. The total
* query timeout is divided among those events. */
if (devices_needing_async_query) {
per_device_timeout = timeout / devices_needing_async_query;
if (!per_device_timeout) {
crm_err("STONITH timeout %ds is too low; using %ds, but consider raising to at least %ds",
timeout, DEFAULT_QUERY_TIMEOUT,
DEFAULT_QUERY_TIMEOUT * devices_needing_async_query);
per_device_timeout = DEFAULT_QUERY_TIMEOUT;
} else if (per_device_timeout < DEFAULT_QUERY_TIMEOUT) {
crm_notice("STONITH timeout %ds is low for the current configuration;"
" consider raising to at least %ds",
timeout, DEFAULT_QUERY_TIMEOUT * devices_needing_async_query);
}
}
search->host = host ? strdup(host) : NULL;
search->action = action ? strdup(action) : NULL;
search->per_device_timeout = per_device_timeout;
/* We are guaranteed this many replies. Even if a device gets
* unregistered some how during the async search, we will get
* the correct number of replies. */
search->replies_needed = g_hash_table_size(device_list);
search->allow_suicide = suicide;
search->callback = callback;
search->user_data = user_data;
/* kick off the search */
crm_debug("Searching through %d devices to see what is capable of action (%s) for target %s",
search->replies_needed,
search->action ? search->action : "<unknown>",
search->host ? search->host : "<anyone>");
g_hash_table_foreach(device_list, search_devices, search);
}
struct st_query_data {
xmlNode *reply;
char *remote_peer;
char *client_id;
char *target;
char *action;
int call_options;
};
/*!
* \internal
* \brief Add action-specific attributes to query reply XML
*
* \param[in,out] xml XML to add attributes to
* \param[in] action Fence action
* \param[in] device Fence device
*/
static void
add_action_specific_attributes(xmlNode *xml, const char *action,
stonith_device_t *device)
{
int action_specific_timeout;
int delay_max;
int delay_base;
CRM_CHECK(xml && action && device, return);
if (is_action_required(action, device)) {
crm_trace("Action '%s' is required on %s", action, device->id);
crm_xml_add_int(xml, F_STONITH_DEVICE_REQUIRED, 1);
}
action_specific_timeout = get_action_timeout(device, action, 0);
if (action_specific_timeout) {
crm_trace("Action '%s' has timeout %dms on %s",
action, action_specific_timeout, device->id);
crm_xml_add_int(xml, F_STONITH_ACTION_TIMEOUT, action_specific_timeout);
}
delay_max = get_action_delay_max(device, action);
if (delay_max > 0) {
crm_trace("Action '%s' has maximum random delay %dms on %s",
action, delay_max, device->id);
crm_xml_add_int(xml, F_STONITH_DELAY_MAX, delay_max / 1000);
}
delay_base = get_action_delay_base(device, action);
if (delay_base > 0) {
crm_xml_add_int(xml, F_STONITH_DELAY_BASE, delay_base / 1000);
}
if ((delay_max > 0) && (delay_base == 0)) {
crm_trace("Action '%s' has maximum random delay %dms on %s",
action, delay_max, device->id);
} else if ((delay_max == 0) && (delay_base > 0)) {
crm_trace("Action '%s' has a static delay of %dms on %s",
action, delay_base, device->id);
} else if ((delay_max > 0) && (delay_base > 0)) {
crm_trace("Action '%s' has a minimum delay of %dms and a randomly chosen "
"maximum delay of %dms on %s",
action, delay_base, delay_max, device->id);
}
}
/*!
* \internal
* \brief Add "disallowed" attribute to query reply XML if appropriate
*
* \param[in,out] xml XML to add attribute to
* \param[in] action Fence action
* \param[in] device Fence device
* \param[in] target Fence target
* \param[in] allow_suicide Whether self-fencing is allowed
*/
static void
add_disallowed(xmlNode *xml, const char *action, stonith_device_t *device,
const char *target, gboolean allow_suicide)
{
if (!localhost_is_eligible(device, action, target, allow_suicide)) {
crm_trace("Action '%s' on %s is disallowed for local host",
action, device->id);
crm_xml_add(xml, F_STONITH_ACTION_DISALLOWED, XML_BOOLEAN_TRUE);
}
}
/*!
* \internal
* \brief Add child element with action-specific values to query reply XML
*
* \param[in,out] xml XML to add attribute to
* \param[in] action Fence action
* \param[in] device Fence device
* \param[in] target Fence target
* \param[in] allow_suicide Whether self-fencing is allowed
*/
static void
add_action_reply(xmlNode *xml, const char *action, stonith_device_t *device,
const char *target, gboolean allow_suicide)
{
xmlNode *child = create_xml_node(xml, F_STONITH_ACTION);
crm_xml_add(child, XML_ATTR_ID, action);
add_action_specific_attributes(child, action, device);
add_disallowed(child, action, device, target, allow_suicide);
}
static void
stonith_query_capable_device_cb(GList * devices, void *user_data)
{
struct st_query_data *query = user_data;
int available_devices = 0;
xmlNode *dev = NULL;
xmlNode *list = NULL;
GListPtr lpc = NULL;
/* Pack the results into XML */
list = create_xml_node(NULL, __FUNCTION__);
crm_xml_add(list, F_STONITH_TARGET, query->target);
for (lpc = devices; lpc != NULL; lpc = lpc->next) {
stonith_device_t *device = g_hash_table_lookup(device_list, lpc->data);
const char *action = query->action;
if (!device) {
/* It is possible the device got unregistered while
* determining who can fence the target */
continue;
}
available_devices++;
dev = create_xml_node(list, F_STONITH_DEVICE);
crm_xml_add(dev, XML_ATTR_ID, device->id);
crm_xml_add(dev, "namespace", device->namespace);
crm_xml_add(dev, "agent", device->agent);
crm_xml_add_int(dev, F_STONITH_DEVICE_VERIFIED, device->verified);
/* If the originating fencer wants to reboot the node, and we have a
* capable device that doesn't support "reboot", remap to "off" instead.
*/
if (is_not_set(device->flags, st_device_supports_reboot)
&& safe_str_eq(query->action, "reboot")) {
crm_trace("%s doesn't support reboot, using values for off instead",
device->id);
action = "off";
}
/* Add action-specific values if available */
add_action_specific_attributes(dev, action, device);
if (safe_str_eq(query->action, "reboot")) {
/* A "reboot" *might* get remapped to "off" then "on", so after
* sending the "reboot"-specific values in the main element, we add
* sub-elements for "off" and "on" values.
*
* We short-circuited earlier if "reboot", "off" and "on" are all
* disallowed for the local host. However if only one or two are
* disallowed, we send back the results and mark which ones are
* disallowed. If "reboot" is disallowed, this might cause problems
* with older fencer versions, which won't check for it. Older
* versions will ignore "off" and "on", so they are not a problem.
*/
add_disallowed(dev, action, device, query->target,
is_set(query->call_options, st_opt_allow_suicide));
add_action_reply(dev, "off", device, query->target,
is_set(query->call_options, st_opt_allow_suicide));
add_action_reply(dev, "on", device, query->target, FALSE);
}
/* A query without a target wants device parameters */
if (query->target == NULL) {
xmlNode *attrs = create_xml_node(dev, XML_TAG_ATTRS);
g_hash_table_foreach(device->params, hash2field, attrs);
}
}
crm_xml_add_int(list, F_STONITH_AVAILABLE_DEVICES, available_devices);
if (query->target) {
crm_debug("Found %d matching devices for '%s'", available_devices, query->target);
} else {
crm_debug("%d devices installed", available_devices);
}
if (list != NULL) {
crm_log_xml_trace(list, "Add query results");
add_message_xml(query->reply, F_STONITH_CALLDATA, list);
}
stonith_send_reply(query->reply, query->call_options, query->remote_peer, query->client_id);
free_xml(query->reply);
free(query->remote_peer);
free(query->client_id);
free(query->target);
free(query->action);
free(query);
free_xml(list);
g_list_free_full(devices, free);
}
static void
stonith_query(xmlNode * msg, const char *remote_peer, const char *client_id, int call_options)
{
struct st_query_data *query = NULL;
const char *action = NULL;
const char *target = NULL;
int timeout = 0;
xmlNode *dev = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_NEVER);
crm_element_value_int(msg, F_STONITH_TIMEOUT, &timeout);
if (dev) {
const char *device = crm_element_value(dev, F_STONITH_DEVICE);
target = crm_element_value(dev, F_STONITH_TARGET);
action = crm_element_value(dev, F_STONITH_ACTION);
if (device && safe_str_eq(device, "manual_ack")) {
/* No query or reply necessary */
return;
}
}
crm_log_xml_debug(msg, "Query");
query = calloc(1, sizeof(struct st_query_data));
query->reply = stonith_construct_reply(msg, NULL, NULL, pcmk_ok);
query->remote_peer = remote_peer ? strdup(remote_peer) : NULL;
query->client_id = client_id ? strdup(client_id) : NULL;
query->target = target ? strdup(target) : NULL;
query->action = action ? strdup(action) : NULL;
query->call_options = call_options;
get_capable_devices(target, action, timeout,
is_set(call_options, st_opt_allow_suicide),
query, stonith_query_capable_device_cb);
}
#define ST_LOG_OUTPUT_MAX 512
static void
log_operation(async_command_t * cmd, int rc, int pid, const char *next, const char *output, gboolean op_merged)
{
if (rc == 0) {
next = NULL;
}
if (cmd->victim != NULL) {
do_crm_log(rc == 0 ? LOG_NOTICE : LOG_ERR,
"Operation '%s' [%d] (call %d from %s) for host '%s' with device '%s' returned%s: %d (%s)%s%s",
cmd->action, pid, cmd->id, cmd->client_name, cmd->victim,
cmd->device, (op_merged? " (merged)" : ""),
rc, pcmk_strerror(rc),
(next? ", retrying with " : ""), (next ? next : ""));
} else {
do_crm_log_unlikely(rc == 0 ? LOG_DEBUG : LOG_NOTICE,
"Operation '%s' [%d] for device '%s' returned%s: %d (%s)%s%s",
cmd->action, pid, cmd->device,
(op_merged? " (merged)" : ""),
rc, pcmk_strerror(rc),
(next? ", retrying with " : ""), (next ? next : ""));
}
if (output) {
/* Logging the whole string confuses syslog when the string is xml */
char *prefix = crm_strdup_printf("%s:%d", cmd->device, pid);
crm_log_output(rc == 0 ? LOG_DEBUG : LOG_WARNING, prefix, output);
free(prefix);
}
}
static void
stonith_send_async_reply(async_command_t * cmd, const char *output, int rc, GPid pid, int options)
{
xmlNode *reply = NULL;
gboolean bcast = FALSE;
reply = stonith_construct_async_reply(cmd, output, NULL, rc);
if (safe_str_eq(cmd->action, "metadata")) {
/* Too verbose to log */
crm_trace("Metadata query for %s", cmd->device);
output = NULL;
} else if (crm_str_eq(cmd->action, "monitor", TRUE) ||
crm_str_eq(cmd->action, "list", TRUE) || crm_str_eq(cmd->action, "status", TRUE)) {
crm_trace("Never broadcast '%s' replies", cmd->action);
} else if (!stand_alone && safe_str_eq(cmd->origin, cmd->victim) && safe_str_neq(cmd->action, "on")) {
crm_trace("Broadcast '%s' reply for %s", cmd->action, cmd->victim);
crm_xml_add(reply, F_SUBTYPE, "broadcast");
bcast = TRUE;
}
log_operation(cmd, rc, pid, NULL, output, (options & st_reply_opt_merged ? TRUE : FALSE));
crm_log_xml_trace(reply, "Reply");
if (options & st_reply_opt_merged) {
crm_xml_add(reply, F_STONITH_MERGED, "true");
}
if (bcast) {
crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY);
send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE);
} else if (cmd->origin) {
crm_trace("Directed reply to %s", cmd->origin);
send_cluster_message(crm_get_peer(0, cmd->origin), crm_msg_stonith_ng, reply, FALSE);
} else {
crm_trace("Directed local %ssync reply to %s",
(cmd->options & st_opt_sync_call) ? "" : "a-", cmd->client_name);
do_local_reply(reply, cmd->client, cmd->options & st_opt_sync_call, FALSE);
}
if (stand_alone) {
/* Do notification with a clean data object */
xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE);
crm_xml_add_int(notify_data, F_STONITH_RC, rc);
crm_xml_add(notify_data, F_STONITH_TARGET, cmd->victim);
crm_xml_add(notify_data, F_STONITH_OPERATION, cmd->op);
crm_xml_add(notify_data, F_STONITH_DELEGATE, "localhost");
crm_xml_add(notify_data, F_STONITH_DEVICE, cmd->device);
crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id);
crm_xml_add(notify_data, F_STONITH_ORIGIN, cmd->client);
do_stonith_notify(0, T_STONITH_NOTIFY_FENCE, rc, notify_data);
do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL);
}
free_xml(reply);
}
static void
cancel_stonith_command(async_command_t * cmd)
{
stonith_device_t *device;
CRM_CHECK(cmd != NULL, return);
if (!cmd->device) {
return;
}
device = g_hash_table_lookup(device_list, cmd->device);
if (device) {
crm_trace("Cancel scheduled '%s' action on %s", cmd->action, device->id);
device->pending_ops = g_list_remove(device->pending_ops, cmd);
}
}
static void
st_child_done(GPid pid, int rc, const char *output, gpointer user_data)
{
stonith_device_t *device = NULL;
stonith_device_t *next_device = NULL;
async_command_t *cmd = user_data;
GListPtr gIter = NULL;
GListPtr gIterNext = NULL;
CRM_CHECK(cmd != NULL, return);
cmd->active_on = NULL;
/* The device is ready to do something else now */
device = g_hash_table_lookup(device_list, cmd->device);
if (device) {
if (!device->verified && (rc == pcmk_ok) &&
(safe_str_eq(cmd->action, "list") ||
safe_str_eq(cmd->action, "monitor") || safe_str_eq(cmd->action, "status"))) {
device->verified = TRUE;
}
mainloop_set_trigger(device->work);
}
crm_debug("Operation '%s' on '%s' completed with rc=%d (%d remaining)",
cmd->action, cmd->device, rc, g_list_length(cmd->device_next));
if (rc == 0) {
GListPtr iter;
/* see if there are any required devices left to execute for this op */
for (iter = cmd->device_next; iter != NULL; iter = iter->next) {
next_device = g_hash_table_lookup(device_list, iter->data);
if (next_device != NULL && is_action_required(cmd->action, next_device)) {
cmd->device_next = iter->next;
break;
}
next_device = NULL;
}
} else if (rc != 0 && cmd->device_next && (is_action_required(cmd->action, device) == FALSE)) {
/* if this device didn't work out, see if there are any others we can try.
* if the failed device was 'required', we can't pick another device. */
next_device = g_hash_table_lookup(device_list, cmd->device_next->data);
cmd->device_next = cmd->device_next->next;
}
/* this operation requires more fencing, hooray! */
if (next_device) {
log_operation(cmd, rc, pid, next_device->id, output, FALSE);
schedule_stonith_command(cmd, next_device);
/* Prevent cmd from being freed */
cmd = NULL;
goto done;
}
stonith_send_async_reply(cmd, output, rc, pid, st_reply_opt_none);
if (rc != 0) {
goto done;
}
/* Check to see if any operations are scheduled to do the exact
* same thing that just completed. If so, rather than
* performing the same fencing operation twice, return the result
* of this operation for all pending commands it matches. */
for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) {
async_command_t *cmd_other = gIter->data;
gIterNext = gIter->next;
if (cmd == cmd_other) {
continue;
}
/* A pending scheduled command matches the command that just finished if.
* 1. The client connections are different.
* 2. The node victim is the same.
* 3. The fencing action is the same.
* 4. The device scheduled to execute the action is the same.
*/
if (safe_str_eq(cmd->client, cmd_other->client) ||
safe_str_neq(cmd->victim, cmd_other->victim) ||
safe_str_neq(cmd->action, cmd_other->action) ||
safe_str_neq(cmd->device, cmd_other->device)) {
continue;
}
/* Duplicate merging will do the right thing for either type of remapped
* reboot. If the executing fencer remapped an unsupported reboot to
* off, then cmd->action will be reboot and will be merged with any
* other reboot requests. If the originating fencer remapped a
* topology reboot to off then on, we will get here once with
* cmd->action "off" and once with "on", and they will be merged
* separately with similar requests.
*/
crm_notice
("Merging stonith action '%s' targeting %s originating from client %s with identical stonith request from client %s",
cmd_other->action, cmd_other->victim, cmd_other->client_name, cmd->client_name);
cmd_list = g_list_remove_link(cmd_list, gIter);
stonith_send_async_reply(cmd_other, output, rc, pid, st_reply_opt_merged);
cancel_stonith_command(cmd_other);
free_async_command(cmd_other);
g_list_free_1(gIter);
}
done:
free_async_command(cmd);
}
static gint
sort_device_priority(gconstpointer a, gconstpointer b)
{
const stonith_device_t *dev_a = a;
const stonith_device_t *dev_b = b;
if (dev_a->priority > dev_b->priority) {
return -1;
} else if (dev_a->priority < dev_b->priority) {
return 1;
}
return 0;
}
static void
stonith_fence_get_devices_cb(GList * devices, void *user_data)
{
async_command_t *cmd = user_data;
stonith_device_t *device = NULL;
crm_info("Found %d matching devices for '%s'", g_list_length(devices), cmd->victim);
if (devices != NULL) {
/* Order based on priority */
devices = g_list_sort(devices, sort_device_priority);
device = g_hash_table_lookup(device_list, devices->data);
if (device) {
cmd->device_list = devices;
cmd->device_next = devices->next;
devices = NULL; /* list owned by cmd now */
}
}
/* we have a device, schedule it for fencing. */
if (device) {
schedule_stonith_command(cmd, device);
/* in progress */
return;
}
/* no device found! */
stonith_send_async_reply(cmd, NULL, -ENODEV, 0, st_reply_opt_none);
free_async_command(cmd);
g_list_free_full(devices, free);
}
static int
stonith_fence(xmlNode * msg)
{
const char *device_id = NULL;
stonith_device_t *device = NULL;
async_command_t *cmd = create_async_command(msg);
xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR);
if (cmd == NULL) {
return -EPROTO;
}
device_id = crm_element_value(dev, F_STONITH_DEVICE);
if (device_id) {
device = g_hash_table_lookup(device_list, device_id);
if (device == NULL) {
crm_err("Requested device '%s' is not available", device_id);
return -ENODEV;
}
schedule_stonith_command(cmd, device);
} else {
const char *host = crm_element_value(dev, F_STONITH_TARGET);
if (cmd->options & st_opt_cs_nodeid) {
int nodeid = crm_atoi(host, NULL);
crm_node_t *node = crm_find_known_peer_full(nodeid, NULL, CRM_GET_PEER_ANY);
if (node) {
host = node->uname;
}
}
/* If we get to here, then self-fencing is implicitly allowed */
get_capable_devices(host, cmd->action, cmd->default_timeout,
TRUE, cmd, stonith_fence_get_devices_cb);
}
return -EINPROGRESS;
}
xmlNode *
stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data, int rc)
{
xmlNode *reply = NULL;
reply = create_xml_node(NULL, T_STONITH_REPLY);
crm_xml_add(reply, "st_origin", __FUNCTION__);
crm_xml_add(reply, F_TYPE, T_STONITH_NG);
crm_xml_add(reply, "st_output", output);
crm_xml_add_int(reply, F_STONITH_RC, rc);
if (request == NULL) {
/* Most likely, this is the result of a stonith operation that was
* initiated before we came up. Unfortunately that means we lack enough
* information to provide clients with a full result.
*
* @TODO Maybe synchronize this information at start-up?
*/
crm_warn("Missing request information for client notifications for "
"operation with result %d (initiated before we came up?)", rc);
} else {
const char *name = NULL;
const char *value = NULL;
const char *names[] = {
F_STONITH_OPERATION,
F_STONITH_CALLID,
F_STONITH_CLIENTID,
F_STONITH_CLIENTNAME,
F_STONITH_REMOTE_OP_ID,
F_STONITH_CALLOPTS
};
crm_trace("Creating a result reply with%s reply output (rc=%d)",
(data? "" : "out"), rc);
for (int lpc = 0; lpc < DIMOF(names); lpc++) {
name = names[lpc];
value = crm_element_value(request, name);
crm_xml_add(reply, name, value);
}
if (data != NULL) {
add_message_xml(reply, F_STONITH_CALLDATA, data);
}
}
return reply;
}
static xmlNode *
stonith_construct_async_reply(async_command_t * cmd, const char *output, xmlNode * data, int rc)
{
xmlNode *reply = NULL;
crm_trace("Creating a basic reply");
reply = create_xml_node(NULL, T_STONITH_REPLY);
crm_xml_add(reply, "st_origin", __FUNCTION__);
crm_xml_add(reply, F_TYPE, T_STONITH_NG);
crm_xml_add(reply, F_STONITH_OPERATION, cmd->op);
crm_xml_add(reply, F_STONITH_DEVICE, cmd->device);
crm_xml_add(reply, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id);
crm_xml_add(reply, F_STONITH_CLIENTID, cmd->client);
crm_xml_add(reply, F_STONITH_CLIENTNAME, cmd->client_name);
crm_xml_add(reply, F_STONITH_TARGET, cmd->victim);
crm_xml_add(reply, F_STONITH_ACTION, cmd->op);
crm_xml_add(reply, F_STONITH_ORIGIN, cmd->origin);
crm_xml_add_int(reply, F_STONITH_CALLID, cmd->id);
crm_xml_add_int(reply, F_STONITH_CALLOPTS, cmd->options);
crm_xml_add_int(reply, F_STONITH_RC, rc);
crm_xml_add(reply, "st_output", output);
if (data != NULL) {
crm_info("Attaching reply output");
add_message_xml(reply, F_STONITH_CALLDATA, data);
}
return reply;
}
bool fencing_peer_active(crm_node_t *peer)
{
if (peer == NULL) {
return FALSE;
} else if (peer->uname == NULL) {
return FALSE;
} else if (is_set(peer->processes, crm_get_cluster_proc())) {
return TRUE;
}
return FALSE;
}
/*!
* \internal
* \brief Determine if we need to use an alternate node to
* fence the target. If so return that node's uname
*
* \retval NULL, no alternate host
* \retval uname, uname of alternate host to use
*/
static const char *
check_alternate_host(const char *target)
{
const char *alternate_host = NULL;
crm_trace("Checking if we (%s) can fence %s", stonith_our_uname, target);
if (find_topology_for_host(target) && safe_str_eq(target, stonith_our_uname)) {
GHashTableIter gIter;
crm_node_t *entry = NULL;
g_hash_table_iter_init(&gIter, crm_peer_cache);
while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
crm_trace("Checking for %s.%d != %s", entry->uname, entry->id, target);
if (fencing_peer_active(entry)
&& safe_str_neq(entry->uname, target)) {
alternate_host = entry->uname;
break;
}
}
if (alternate_host == NULL) {
crm_err("No alternate host available to handle complex self fencing request");
g_hash_table_iter_init(&gIter, crm_peer_cache);
while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
crm_notice("Peer[%d] %s", entry->id, entry->uname);
}
}
}
return alternate_host;
}
static void
stonith_send_reply(xmlNode * reply, int call_options, const char *remote_peer,
const char *client_id)
{
if (remote_peer) {
send_cluster_message(crm_get_peer(0, remote_peer), crm_msg_stonith_ng, reply, FALSE);
} else {
do_local_reply(reply, client_id, is_set(call_options, st_opt_sync_call), remote_peer != NULL);
}
}
static void
remove_relay_op(xmlNode * request)
{
xmlNode *dev = get_xpath_object("//@" F_STONITH_ACTION, request, LOG_TRACE);
const char *relay_op_id = NULL;
const char *op_id = NULL;
const char *client_name = NULL;
const char *target = NULL;
remote_fencing_op_t *relay_op = NULL;
if (dev) {
target = crm_element_value(dev, F_STONITH_TARGET);
}
relay_op_id = crm_element_value(request, F_STONITH_REMOTE_OP_ID_RELAY);
op_id = crm_element_value(request, F_STONITH_REMOTE_OP_ID);
client_name = crm_element_value(request, F_STONITH_CLIENTNAME);
/* Delete RELAY operation. */
if (relay_op_id && target && safe_str_eq(target, stonith_our_uname)) {
relay_op = g_hash_table_lookup(stonith_remote_op_list, relay_op_id);
if (relay_op) {
GHashTableIter iter;
remote_fencing_op_t *list_op = NULL;
g_hash_table_iter_init(&iter, stonith_remote_op_list);
/* If the operation to be deleted is registered as a duplicate, delete the registration. */
while (g_hash_table_iter_next(&iter, NULL, (void **)&list_op)) {
GListPtr dup_iter = NULL;
if (list_op != relay_op) {
for (dup_iter = list_op->duplicates; dup_iter != NULL; dup_iter = dup_iter->next) {
remote_fencing_op_t *other = dup_iter->data;
if (other == relay_op) {
other->duplicates = g_list_remove(other->duplicates, relay_op);
break;
}
}
}
}
crm_info("Delete the relay op : %s - %s of %s for %s.(replaced by op : %s - %s of %s for %s)",
relay_op->id, relay_op->action, relay_op->target, relay_op->client_name,
op_id, relay_op->action, target, client_name);
g_hash_table_remove(stonith_remote_op_list, relay_op_id);
}
}
}
static int
handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
xmlNode *request, const char *remote_peer)
{
int call_options = 0;
int rc = -EOPNOTSUPP;
xmlNode *data = NULL;
xmlNode *reply = NULL;
char *output = NULL;
const char *op = crm_element_value(request, F_STONITH_OPERATION);
const char *client_id = crm_element_value(request, F_STONITH_CLIENTID);
crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
if (is_set(call_options, st_opt_sync_call)) {
CRM_ASSERT(client == NULL || client->request_id == id);
}
if (crm_str_eq(op, CRM_OP_REGISTER, TRUE)) {
xmlNode *reply = create_xml_node(NULL, "reply");
CRM_ASSERT(client);
crm_xml_add(reply, F_STONITH_OPERATION, CRM_OP_REGISTER);
crm_xml_add(reply, F_STONITH_CLIENTID, client->id);
pcmk__ipc_send_xml(client, id, reply, flags);
client->request_id = 0;
free_xml(reply);
return 0;
} else if (crm_str_eq(op, STONITH_OP_EXEC, TRUE)) {
rc = stonith_device_action(request, &output);
} else if (crm_str_eq(op, STONITH_OP_TIMEOUT_UPDATE, TRUE)) {
const char *call_id = crm_element_value(request, F_STONITH_CALLID);
const char *client_id = crm_element_value(request, F_STONITH_CLIENTID);
int op_timeout = 0;
crm_element_value_int(request, F_STONITH_TIMEOUT, &op_timeout);
do_stonith_async_timeout_update(client_id, call_id, op_timeout);
return 0;
} else if (crm_str_eq(op, STONITH_OP_QUERY, TRUE)) {
if (remote_peer) {
create_remote_stonith_op(client_id, request, TRUE); /* Record it for the future notification */
}
/* Delete the DC node RELAY operation. */
remove_relay_op(request);
stonith_query(request, remote_peer, client_id, call_options);
return 0;
} else if (crm_str_eq(op, T_STONITH_NOTIFY, TRUE)) {
const char *flag_name = NULL;
CRM_ASSERT(client);
flag_name = crm_element_value(request, F_STONITH_NOTIFY_ACTIVATE);
if (flag_name) {
crm_debug("Setting %s callbacks for %s (%s): ON", flag_name, client->name, client->id);
client->options |= get_stonith_flag(flag_name);
}
flag_name = crm_element_value(request, F_STONITH_NOTIFY_DEACTIVATE);
if (flag_name) {
crm_debug("Setting %s callbacks for %s (%s): off", flag_name, client->name, client->id);
client->options |= get_stonith_flag(flag_name);
}
if (flags & crm_ipc_client_response) {
pcmk__ipc_send_ack(client, id, flags, "ack");
}
return 0;
} else if (crm_str_eq(op, STONITH_OP_RELAY, TRUE)) {
xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE);
crm_notice("Peer %s has received a forwarded fencing request from %s to fence (%s) peer %s",
stonith_our_uname,
client ? client->name : remote_peer,
crm_element_value(dev, F_STONITH_ACTION),
crm_element_value(dev, F_STONITH_TARGET));
if (initiate_remote_stonith_op(NULL, request, FALSE) != NULL) {
rc = -EINPROGRESS;
}
} else if (crm_str_eq(op, STONITH_OP_FENCE, TRUE)) {
if (remote_peer || stand_alone) {
rc = stonith_fence(request);
} else if (call_options & st_opt_manual_ack) {
remote_fencing_op_t *rop = NULL;
xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE);
const char *target = crm_element_value(dev, F_STONITH_TARGET);
crm_notice("Received manual confirmation that %s is fenced", target);
rop = initiate_remote_stonith_op(client, request, TRUE);
rc = stonith_manual_ack(request, rop);
} else {
const char *alternate_host = NULL;
xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE);
const char *target = crm_element_value(dev, F_STONITH_TARGET);
const char *action = crm_element_value(dev, F_STONITH_ACTION);
const char *device = crm_element_value(dev, F_STONITH_DEVICE);
if (client) {
int tolerance = 0;
crm_notice("Client %s.%.8s wants to fence (%s) '%s' with device '%s'",
client->name, client->id, action, target, device ? device : "(any)");
crm_element_value_int(dev, F_STONITH_TOLERANCE, &tolerance);
if (stonith_check_fence_tolerance(tolerance, target, action)) {
rc = 0;
goto done;
}
} else {
crm_notice("Peer %s wants to fence (%s) '%s' with device '%s'",
remote_peer, action, target, device ? device : "(any)");
}
alternate_host = check_alternate_host(target);
if (alternate_host && client) {
const char *client_id = NULL;
remote_fencing_op_t *op = NULL;
crm_notice("Forwarding complex self fencing request to peer %s", alternate_host);
if (client->id) {
client_id = client->id;
} else {
client_id = crm_element_value(request, F_STONITH_CLIENTID);
}
/* Create an operation for RELAY and send the ID in the RELAY message. */
/* When a QUERY response is received, delete the RELAY operation to avoid the existence of duplicate operations. */
op = create_remote_stonith_op(client_id, request, FALSE);
crm_xml_add(request, F_STONITH_OPERATION, STONITH_OP_RELAY);
crm_xml_add(request, F_STONITH_CLIENTID, client->id);
crm_xml_add(request, F_STONITH_REMOTE_OP_ID, op->id);
send_cluster_message(crm_get_peer(0, alternate_host), crm_msg_stonith_ng, request,
FALSE);
rc = -EINPROGRESS;
} else if (initiate_remote_stonith_op(client, request, FALSE) != NULL) {
rc = -EINPROGRESS;
}
}
} else if (crm_str_eq(op, STONITH_OP_FENCE_HISTORY, TRUE)) {
rc = stonith_fence_history(request, &data, remote_peer, call_options);
if (call_options & st_opt_discard_reply) {
/* we don't expect answers to the broadcast
* we might have sent out
*/
free_xml(data);
return pcmk_ok;
}
} else if (crm_str_eq(op, STONITH_OP_DEVICE_ADD, TRUE)) {
const char *device_id = NULL;
rc = stonith_device_register(request, &device_id, FALSE);
do_stonith_notify_device(call_options, op, rc, device_id);
} else if (crm_str_eq(op, STONITH_OP_DEVICE_DEL, TRUE)) {
xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, request, LOG_ERR);
const char *device_id = crm_element_value(dev, XML_ATTR_ID);
rc = stonith_device_remove(device_id, FALSE);
do_stonith_notify_device(call_options, op, rc, device_id);
} else if (crm_str_eq(op, STONITH_OP_LEVEL_ADD, TRUE)) {
char *device_id = NULL;
rc = stonith_level_register(request, &device_id);
do_stonith_notify_level(call_options, op, rc, device_id);
free(device_id);
} else if (crm_str_eq(op, STONITH_OP_LEVEL_DEL, TRUE)) {
char *device_id = NULL;
rc = stonith_level_remove(request, &device_id);
do_stonith_notify_level(call_options, op, rc, device_id);
} else if(safe_str_eq(op, CRM_OP_RM_NODE_CACHE)) {
int node_id = 0;
const char *name = NULL;
crm_element_value_int(request, XML_ATTR_ID, &node_id);
name = crm_element_value(request, XML_ATTR_UNAME);
reap_crm_member(node_id, name);
return pcmk_ok;
} else {
crm_err("Unknown IPC request %s from %s",
op, (client? client->name : remote_peer));
}
done:
/* Always reply unless the request is in process still.
* If in progress, a reply will happen async after the request
* processing is finished */
if (rc != -EINPROGRESS) {
crm_trace("Reply handling: %p %u %u %d %d %s", client, client?client->request_id:0,
id, is_set(call_options, st_opt_sync_call), call_options,
crm_element_value(request, F_STONITH_CALLOPTS));
if (is_set(call_options, st_opt_sync_call)) {
CRM_ASSERT(client == NULL || client->request_id == id);
}
reply = stonith_construct_reply(request, output, data, rc);
stonith_send_reply(reply, call_options, remote_peer, client_id);
}
free(output);
free_xml(data);
free_xml(reply);
return rc;
}
static void
handle_reply(pcmk__client_t *client, xmlNode *request, const char *remote_peer)
{
const char *op = crm_element_value(request, F_STONITH_OPERATION);
if (crm_str_eq(op, STONITH_OP_QUERY, TRUE)) {
process_remote_stonith_query(request);
} else if (crm_str_eq(op, T_STONITH_NOTIFY, TRUE)) {
process_remote_stonith_exec(request);
} else if (crm_str_eq(op, STONITH_OP_FENCE, TRUE)) {
/* Reply to a complex fencing op */
process_remote_stonith_exec(request);
} else {
crm_err("Unknown %s reply from %s", op, client ? client->name : remote_peer);
crm_log_xml_warn(request, "UnknownOp");
}
}
void
stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags,
xmlNode *request, const char *remote_peer)
{
int call_options = 0;
int rc = 0;
gboolean is_reply = FALSE;
/* Copy op for reporting. The original might get freed by handle_reply()
* before we use it in crm_debug():
* handle_reply()
* |- process_remote_stonith_exec()
* |-- remote_op_done()
* |--- handle_local_reply_and_notify()
* |---- crm_xml_add(...F_STONITH_OPERATION...)
* |--- free_xml(op->request)
*/
char *op = crm_element_value_copy(request, F_STONITH_OPERATION);
if (get_xpath_object("//" T_STONITH_REPLY, request, LOG_NEVER)) {
is_reply = TRUE;
}
crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
crm_debug("Processing %s%s %u from %s (%16x)", op, is_reply ? " reply" : "",
id, client ? client->name : remote_peer, call_options);
if (is_set(call_options, st_opt_sync_call)) {
CRM_ASSERT(client == NULL || client->request_id == id);
}
if (is_reply) {
handle_reply(client, request, remote_peer);
} else {
rc = handle_request(client, id, flags, request, remote_peer);
}
crm_debug("Processed %s%s from %s: %s (%d)", op,
is_reply ? " reply" : "", client ? client->name : remote_peer,
rc > 0 ? "" : pcmk_strerror(rc), rc);
free(op);
}
diff --git a/include/crm/common/strings_internal.h b/include/crm/common/strings_internal.h
index 681a59f433..8aca4ff9de 100644
--- a/include/crm/common/strings_internal.h
+++ b/include/crm/common/strings_internal.h
@@ -1,63 +1,62 @@
/*
* Copyright 2015-2020 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__STRINGS_INTERNAL__H
#define PCMK__STRINGS_INTERNAL__H
#include <stdbool.h> // bool
#include <glib.h> // guint, GList, GHashTable
/* internal generic string functions (from strings.c) */
int pcmk__guint_from_hash(GHashTable *table, const char *key, guint default_val,
guint *result);
bool pcmk__starts_with(const char *str, const char *prefix);
bool pcmk__ends_with(const char *s, const char *match);
bool pcmk__ends_with_ext(const char *s, const char *match);
char *pcmk__add_word(char *list, const char *word);
int pcmk__compress(const char *data, unsigned int length, unsigned int max,
char **result, unsigned int *result_len);
int pcmk__parse_ll_range(const char *srcstring, long long *start, long long *end);
gboolean pcmk__str_in_list(GList *lst, const gchar *s);
bool pcmk__str_any_of(const char *s, ...) G_GNUC_NULL_TERMINATED;
-bool pcmk__str_none_of(const char *s, ...) G_GNUC_NULL_TERMINATED;
/* Correctly displaying singular or plural is complicated; consider "1 node has"
* vs. "2 nodes have". A flexible solution is to pluralize entire strings, e.g.
*
* if (a == 1) {
* crm_info("singular message"):
* } else {
* crm_info("plural message");
* }
*
* though even that's not sufficient for all languages besides English (if we
* ever desire to do translations of output and log messages). But the following
* convenience macros are "good enough" and more concise for many cases.
*/
/* Example:
* crm_info("Found %d %s", nentries,
* pcmk__plural_alt(nentries, "entry", "entries"));
*/
#define pcmk__plural_alt(i, s1, s2) (((i) == 1)? (s1) : (s2))
// Example: crm_info("Found %d node%s", nnodes, pcmk__plural_s(nnodes));
#define pcmk__plural_s(i) pcmk__plural_alt(i, "", "s")
static inline int
pcmk__str_empty(const char *s)
{
return (s == NULL) || (s[0] == '\0');
}
#endif /* PCMK__STRINGS_INTERNAL__H */
diff --git a/lib/common/logging.c b/lib/common/logging.c
index 6f3dc3550f..d2de15e7e3 100644
--- a/lib/common/logging.c
+++ b/lib/common/logging.c
@@ -1,1002 +1,1002 @@
/*
* Copyright 2004-2020 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <sys/utsname.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <limits.h>
#include <ctype.h>
#include <pwd.h>
#include <grp.h>
#include <time.h>
#include <libgen.h>
#include <signal.h>
#include <bzlib.h>
#include <qb/qbdefs.h>
#include <crm/crm.h>
#include <crm/common/mainloop.h>
unsigned int crm_log_priority = LOG_NOTICE;
unsigned int crm_log_level = LOG_INFO;
static gboolean crm_tracing_enabled(void);
unsigned int crm_trace_nonlog = 0;
bool crm_is_daemon = 0;
GLogFunc glib_log_default;
static void
crm_glib_handler(const gchar * log_domain, GLogLevelFlags flags, const gchar * message,
gpointer user_data)
{
int log_level = LOG_WARNING;
GLogLevelFlags msg_level = (flags & G_LOG_LEVEL_MASK);
static struct qb_log_callsite *glib_cs = NULL;
if (glib_cs == NULL) {
glib_cs = qb_log_callsite_get(__FUNCTION__, __FILE__, "glib-handler", LOG_DEBUG, __LINE__, crm_trace_nonlog);
}
switch (msg_level) {
case G_LOG_LEVEL_CRITICAL:
log_level = LOG_CRIT;
if (crm_is_callsite_active(glib_cs, LOG_DEBUG, 0) == FALSE) {
/* log and record how we got here */
crm_abort(__FILE__, __FUNCTION__, __LINE__, message, TRUE, TRUE);
}
break;
case G_LOG_LEVEL_ERROR:
log_level = LOG_ERR;
break;
case G_LOG_LEVEL_MESSAGE:
log_level = LOG_NOTICE;
break;
case G_LOG_LEVEL_INFO:
log_level = LOG_INFO;
break;
case G_LOG_LEVEL_DEBUG:
log_level = LOG_DEBUG;
break;
case G_LOG_LEVEL_WARNING:
case G_LOG_FLAG_RECURSION:
case G_LOG_FLAG_FATAL:
case G_LOG_LEVEL_MASK:
log_level = LOG_WARNING;
break;
}
do_crm_log(log_level, "%s: %s", log_domain, message);
}
#ifndef NAME_MAX
# define NAME_MAX 256
#endif
/*!
* \internal
* \brief Write out a blackbox (enabling blackboxes if needed)
*
* \param[in] nsig Signal number that was received
*
* \note This is a true signal handler, and so must be async-safe.
*/
static void
crm_trigger_blackbox(int nsig)
{
if(nsig == SIGTRAP) {
/* Turn it on if it wasn't already */
crm_enable_blackbox(nsig);
}
crm_write_blackbox(nsig, NULL);
}
void
crm_log_deinit(void)
{
g_log_set_default_handler(glib_log_default, NULL);
}
#define FMT_MAX 256
static void
set_format_string(int method, const char *daemon)
{
if (method == QB_LOG_SYSLOG) {
// The system log gets a simplified, user-friendly format
crm_extended_logging(method, QB_FALSE);
qb_log_format_set(method, "%g %p: %b");
} else {
// Everything else gets more detail, for advanced troubleshooting
int offset = 0;
char fmt[FMT_MAX];
if (method > QB_LOG_STDERR) {
struct utsname res;
const char *nodename = "localhost";
if (uname(&res) == 0) {
nodename = res.nodename;
}
// If logging to file, prefix with timestamp, node name, daemon ID
offset += snprintf(fmt + offset, FMT_MAX - offset,
"%%t %s %-20s[%lu] ",
nodename, daemon, (unsigned long) getpid());
}
// Add function name (in parentheses)
offset += snprintf(fmt + offset, FMT_MAX - offset, "(%%n");
if (crm_tracing_enabled()) {
// When tracing, add file and line number
offset += snprintf(fmt + offset, FMT_MAX - offset, "@%%f:%%l");
}
offset += snprintf(fmt + offset, FMT_MAX - offset, ")");
// Add tag (if any), severity, and actual message
offset += snprintf(fmt + offset, FMT_MAX - offset, " %%g\t%%p: %%b");
CRM_LOG_ASSERT(offset > 0);
qb_log_format_set(method, fmt);
}
}
gboolean
crm_add_logfile(const char *filename)
{
bool is_default = false;
static int default_fd = -1;
static gboolean have_logfile = FALSE;
const char *default_logfile = CRM_LOG_DIR "/pacemaker.log";
struct stat parent;
int fd = 0, rc = 0;
FILE *logfile = NULL;
char *parent_dir = NULL;
char *filename_cp;
if (filename == NULL && have_logfile == FALSE) {
filename = default_logfile;
}
if (filename == NULL) {
return FALSE; /* Nothing to do */
} else if(safe_str_eq(filename, "none")) {
return FALSE; /* Nothing to do */
} else if(safe_str_eq(filename, "/dev/null")) {
return FALSE; /* Nothing to do */
} else if(safe_str_eq(filename, default_logfile)) {
is_default = TRUE;
}
if(is_default && default_fd >= 0) {
return TRUE; /* Nothing to do */
}
/* Check the parent directory */
filename_cp = strdup(filename);
parent_dir = dirname(filename_cp);
rc = stat(parent_dir, &parent);
if (rc != 0) {
crm_err("Directory '%s' does not exist: logging to '%s' is disabled", parent_dir, filename);
free(filename_cp);
return FALSE;
}
free(filename_cp);
errno = 0;
logfile = fopen(filename, "a");
if(logfile == NULL) {
crm_err("%s (%d): Logging to '%s' as uid=%u, gid=%u is disabled",
pcmk_strerror(errno), errno, filename, geteuid(), getegid());
return FALSE;
}
/* Check/Set permissions if we're root */
if (geteuid() == 0) {
struct stat st;
uid_t pcmk_uid = 0;
gid_t pcmk_gid = 0;
gboolean fix = FALSE;
int logfd = fileno(logfile);
const char *modestr;
mode_t filemode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
rc = fstat(logfd, &st);
if (rc < 0) {
crm_perror(LOG_WARNING, "Cannot stat %s", filename);
fclose(logfile);
return FALSE;
}
if (pcmk_daemon_user(&pcmk_uid, &pcmk_gid) == 0) {
if (st.st_gid != pcmk_gid) {
/* Wrong group */
fix = TRUE;
} else if ((st.st_mode & S_IRWXG) != (S_IRGRP | S_IWGRP)) {
/* Not read/writable by the correct group */
fix = TRUE;
}
}
modestr = getenv("PCMK_logfile_mode");
if (modestr) {
long filemode_l = strtol(modestr, NULL, 8);
if (filemode_l != LONG_MIN && filemode_l != LONG_MAX) {
filemode = (mode_t)filemode_l;
}
}
if (fix) {
rc = fchown(logfd, pcmk_uid, pcmk_gid);
if (rc < 0) {
crm_warn("Cannot change the ownership of %s to user %s and gid %d",
filename, CRM_DAEMON_USER, pcmk_gid);
}
}
if (filemode) {
rc = fchmod(logfd, filemode);
if (rc < 0) {
crm_warn("Cannot change the mode of %s to %o", filename, filemode);
}
fprintf(logfile, "Set r/w permissions for uid=%d, gid=%d on %s\n",
pcmk_uid, pcmk_gid, filename);
if (fflush(logfile) < 0 || fsync(logfd) < 0) {
crm_err("Couldn't write out logfile: %s", filename);
}
}
}
/* Close and reopen with libqb */
fclose(logfile);
fd = qb_log_file_open(filename);
if (fd < 0) {
crm_perror(LOG_WARNING, "Couldn't send additional logging to %s", filename);
return FALSE;
}
if(is_default) {
default_fd = fd;
// Some resource agents will log only if environment variable is set
if (pcmk__env_option("logfile") == NULL) {
pcmk__set_env_option("logfile", filename);
}
} else if(default_fd >= 0) {
crm_notice("Switching to %s", filename);
qb_log_ctl(default_fd, QB_LOG_CONF_ENABLED, QB_FALSE);
}
crm_notice("Additional logging available in %s", filename);
qb_log_ctl(fd, QB_LOG_CONF_ENABLED, QB_TRUE);
/* qb_log_ctl(fd, QB_LOG_CONF_FILE_SYNC, 1); Turn on synchronous writes */
#ifdef HAVE_qb_log_conf_QB_LOG_CONF_MAX_LINE_LEN
// Longer than default, for logging long XML lines
qb_log_ctl(fd, QB_LOG_CONF_MAX_LINE_LEN, 800);
#endif
/* Enable callsites */
crm_update_callsites();
have_logfile = TRUE;
return TRUE;
}
static int blackbox_trigger = 0;
static volatile char *blackbox_file_prefix = NULL;
#ifdef QB_FEATURE_LOG_HIRES_TIMESTAMPS
typedef struct timespec *log_time_t;
#else
typedef time_t log_time_t;
#endif
static void
blackbox_logger(int32_t t, struct qb_log_callsite *cs, log_time_t timestamp,
const char *msg)
{
if(cs && cs->priority < LOG_ERR) {
crm_write_blackbox(SIGTRAP, cs); /* Bypass the over-dumping logic */
} else {
crm_write_blackbox(0, cs);
}
}
static void
crm_control_blackbox(int nsig, bool enable)
{
int lpc = 0;
if (blackbox_file_prefix == NULL) {
pid_t pid = getpid();
blackbox_file_prefix = crm_strdup_printf("%s/%s-%lu",
CRM_BLACKBOX_DIR,
crm_system_name,
(unsigned long) pid);
}
if (enable && qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_STATE_GET, 0) != QB_LOG_STATE_ENABLED) {
qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_SIZE, 5 * 1024 * 1024); /* Any size change drops existing entries */
qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_TRUE); /* Setting the size seems to disable it */
/* Enable synchronous logging */
for (lpc = QB_LOG_BLACKBOX; lpc < QB_LOG_TARGET_MAX; lpc++) {
qb_log_ctl(lpc, QB_LOG_CONF_FILE_SYNC, QB_TRUE);
}
crm_notice("Initiated blackbox recorder: %s", blackbox_file_prefix);
/* Save to disk on abnormal termination */
crm_signal_handler(SIGSEGV, crm_trigger_blackbox);
crm_signal_handler(SIGABRT, crm_trigger_blackbox);
crm_signal_handler(SIGILL, crm_trigger_blackbox);
crm_signal_handler(SIGBUS, crm_trigger_blackbox);
crm_signal_handler(SIGFPE, crm_trigger_blackbox);
crm_update_callsites();
blackbox_trigger = qb_log_custom_open(blackbox_logger, NULL, NULL, NULL);
qb_log_ctl(blackbox_trigger, QB_LOG_CONF_ENABLED, QB_TRUE);
crm_trace("Trigger: %d is %d %d", blackbox_trigger,
qb_log_ctl(blackbox_trigger, QB_LOG_CONF_STATE_GET, 0), QB_LOG_STATE_ENABLED);
crm_update_callsites();
} else if (!enable && qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_STATE_GET, 0) == QB_LOG_STATE_ENABLED) {
qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_FALSE);
/* Disable synchronous logging again when the blackbox is disabled */
for (lpc = QB_LOG_BLACKBOX; lpc < QB_LOG_TARGET_MAX; lpc++) {
qb_log_ctl(lpc, QB_LOG_CONF_FILE_SYNC, QB_FALSE);
}
}
}
void
crm_enable_blackbox(int nsig)
{
crm_control_blackbox(nsig, TRUE);
}
void
crm_disable_blackbox(int nsig)
{
crm_control_blackbox(nsig, FALSE);
}
/*!
* \internal
* \brief Write out a blackbox, if blackboxes are enabled
*
* \param[in] nsig Signal that was received
* \param[in] cs libqb callsite
*
* \note This may be called via a true signal handler and so must be async-safe.
* @TODO actually make this async-safe
*/
void
crm_write_blackbox(int nsig, struct qb_log_callsite *cs)
{
static volatile int counter = 1;
static volatile time_t last = 0;
char buffer[NAME_MAX];
time_t now = time(NULL);
if (blackbox_file_prefix == NULL) {
return;
}
switch (nsig) {
case 0:
case SIGTRAP:
/* The graceful case - such as assertion failure or user request */
if (nsig == 0 && now == last) {
/* Prevent over-dumping */
return;
}
snprintf(buffer, NAME_MAX, "%s.%d", blackbox_file_prefix, counter++);
if (nsig == SIGTRAP) {
crm_notice("Blackbox dump requested, please see %s for contents", buffer);
} else if (cs) {
syslog(LOG_NOTICE,
"Problem detected at %s:%d (%s), please see %s for additional details",
cs->function, cs->lineno, cs->filename, buffer);
} else {
crm_notice("Problem detected, please see %s for additional details", buffer);
}
last = now;
qb_log_blackbox_write_to_file(buffer);
/* Flush the existing contents
* A size change would also work
*/
qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_FALSE);
qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_TRUE);
break;
default:
/* Do as little as possible, just try to get what we have out
* We logged the filename when the blackbox was enabled
*/
crm_signal_handler(nsig, SIG_DFL);
qb_log_blackbox_write_to_file((const char *)blackbox_file_prefix);
qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_FALSE);
raise(nsig);
break;
}
}
gboolean
crm_log_cli_init(const char *entity)
{
return crm_log_init(entity, LOG_ERR, FALSE, FALSE, 0, NULL, TRUE);
}
static const char *
crm_quark_to_string(uint32_t tag)
{
const char *text = g_quark_to_string(tag);
if (text) {
return text;
}
return "";
}
static void
crm_log_filter_source(int source, const char *trace_files, const char *trace_fns,
const char *trace_fmts, const char *trace_tags, const char *trace_blackbox,
struct qb_log_callsite *cs)
{
if (qb_log_ctl(source, QB_LOG_CONF_STATE_GET, 0) != QB_LOG_STATE_ENABLED) {
return;
} else if (cs->tags != crm_trace_nonlog && source == QB_LOG_BLACKBOX) {
/* Blackbox gets everything if enabled */
qb_bit_set(cs->targets, source);
} else if (source == blackbox_trigger && blackbox_trigger > 0) {
/* Should this log message result in the blackbox being dumped */
if (cs->priority <= LOG_ERR) {
qb_bit_set(cs->targets, source);
} else if (trace_blackbox) {
char *key = crm_strdup_printf("%s:%d", cs->function, cs->lineno);
if (strstr(trace_blackbox, key) != NULL) {
qb_bit_set(cs->targets, source);
}
free(key);
}
} else if (source == QB_LOG_SYSLOG) { /* No tracing to syslog */
if (cs->priority <= crm_log_priority && cs->priority <= crm_log_level) {
qb_bit_set(cs->targets, source);
}
/* Log file tracing options... */
} else if (cs->priority <= crm_log_level) {
qb_bit_set(cs->targets, source);
} else if (trace_files && strstr(trace_files, cs->filename) != NULL) {
qb_bit_set(cs->targets, source);
} else if (trace_fns && strstr(trace_fns, cs->function) != NULL) {
qb_bit_set(cs->targets, source);
} else if (trace_fmts && strstr(trace_fmts, cs->format) != NULL) {
qb_bit_set(cs->targets, source);
} else if (trace_tags
&& cs->tags != 0
&& cs->tags != crm_trace_nonlog && g_quark_to_string(cs->tags) != NULL) {
qb_bit_set(cs->targets, source);
}
}
static void
crm_log_filter(struct qb_log_callsite *cs)
{
int lpc = 0;
static int need_init = 1;
static const char *trace_fns = NULL;
static const char *trace_tags = NULL;
static const char *trace_fmts = NULL;
static const char *trace_files = NULL;
static const char *trace_blackbox = NULL;
if (need_init) {
need_init = 0;
trace_fns = getenv("PCMK_trace_functions");
trace_fmts = getenv("PCMK_trace_formats");
trace_tags = getenv("PCMK_trace_tags");
trace_files = getenv("PCMK_trace_files");
trace_blackbox = getenv("PCMK_trace_blackbox");
if (trace_tags != NULL) {
uint32_t tag;
char token[500];
const char *offset = NULL;
const char *next = trace_tags;
do {
offset = next;
next = strchrnul(offset, ',');
snprintf(token, sizeof(token), "%.*s", (int)(next - offset), offset);
tag = g_quark_from_string(token);
crm_info("Created GQuark %u from token '%s' in '%s'", tag, token, trace_tags);
if (next[0] != 0) {
next++;
}
} while (next != NULL && next[0] != 0);
}
}
cs->targets = 0; /* Reset then find targets to enable */
for (lpc = QB_LOG_SYSLOG; lpc < QB_LOG_TARGET_MAX; lpc++) {
crm_log_filter_source(lpc, trace_files, trace_fns, trace_fmts, trace_tags, trace_blackbox,
cs);
}
}
gboolean
crm_is_callsite_active(struct qb_log_callsite *cs, uint8_t level, uint32_t tags)
{
gboolean refilter = FALSE;
if (cs == NULL) {
return FALSE;
}
if (cs->priority != level) {
cs->priority = level;
refilter = TRUE;
}
if (cs->tags != tags) {
cs->tags = tags;
refilter = TRUE;
}
if (refilter) {
crm_log_filter(cs);
}
if (cs->targets == 0) {
return FALSE;
}
return TRUE;
}
void
crm_update_callsites(void)
{
static gboolean log = TRUE;
if (log) {
log = FALSE;
crm_debug
("Enabling callsites based on priority=%d, files=%s, functions=%s, formats=%s, tags=%s",
crm_log_level, getenv("PCMK_trace_files"), getenv("PCMK_trace_functions"),
getenv("PCMK_trace_formats"), getenv("PCMK_trace_tags"));
}
qb_log_filter_fn_set(crm_log_filter);
}
static gboolean
crm_tracing_enabled(void)
{
if (crm_log_level == LOG_TRACE) {
return TRUE;
} else if (getenv("PCMK_trace_files") || getenv("PCMK_trace_functions")
|| getenv("PCMK_trace_formats") || getenv("PCMK_trace_tags")) {
return TRUE;
}
return FALSE;
}
static int
crm_priority2int(const char *name)
{
struct syslog_names {
const char *name;
int priority;
};
static struct syslog_names p_names[] = {
{"emerg", LOG_EMERG},
{"alert", LOG_ALERT},
{"crit", LOG_CRIT},
{"error", LOG_ERR},
{"warning", LOG_WARNING},
{"notice", LOG_NOTICE},
{"info", LOG_INFO},
{"debug", LOG_DEBUG},
{NULL, -1}
};
int lpc;
for (lpc = 0; name != NULL && p_names[lpc].name != NULL; lpc++) {
if (crm_str_eq(p_names[lpc].name, name, TRUE)) {
return p_names[lpc].priority;
}
}
return crm_log_priority;
}
static void
crm_identity(const char *entity, int argc, char **argv)
{
if(crm_system_name != NULL) {
/* Nothing to do */
} else if (entity) {
free(crm_system_name);
crm_system_name = strdup(entity);
} else if (argc > 0 && argv != NULL) {
char *mutable = strdup(argv[0]);
char *modified = basename(mutable);
if (strstr(modified, "lt-") == modified) {
modified += 3;
}
free(crm_system_name);
crm_system_name = strdup(modified);
free(mutable);
} else if (crm_system_name == NULL) {
crm_system_name = strdup("Unknown");
}
setenv("PCMK_service", crm_system_name, 1);
}
void
crm_log_preinit(const char *entity, int argc, char **argv)
{
/* Configure libqb logging with nothing turned on */
int lpc = 0;
int32_t qb_facility = 0;
static bool have_logging = FALSE;
if(have_logging == FALSE) {
have_logging = TRUE;
crm_xml_init(); /* Sets buffer allocation strategy */
if (crm_trace_nonlog == 0) {
crm_trace_nonlog = g_quark_from_static_string("Pacemaker non-logging tracepoint");
}
umask(S_IWGRP | S_IWOTH | S_IROTH);
/* Redirect messages from glib functions to our handler */
glib_log_default = g_log_set_default_handler(crm_glib_handler, NULL);
/* and for good measure... - this enum is a bit field (!) */
g_log_set_always_fatal((GLogLevelFlags) 0); /*value out of range */
/* Who do we log as */
crm_identity(entity, argc, argv);
qb_facility = qb_log_facility2int("local0");
qb_log_init(crm_system_name, qb_facility, LOG_ERR);
crm_log_level = LOG_CRIT;
/* Nuke any syslog activity until it's asked for */
qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, QB_FALSE);
#ifdef HAVE_qb_log_conf_QB_LOG_CONF_MAX_LINE_LEN
// Shorter than default, generous for what we *should* send to syslog
qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_MAX_LINE_LEN, 256);
#endif
/* Set format strings and disable threading
* Pacemaker and threads do not mix well (due to the amount of forking)
*/
qb_log_tags_stringify_fn_set(crm_quark_to_string);
for (lpc = QB_LOG_SYSLOG; lpc < QB_LOG_TARGET_MAX; lpc++) {
qb_log_ctl(lpc, QB_LOG_CONF_THREADED, QB_FALSE);
#ifdef HAVE_qb_log_conf_QB_LOG_CONF_ELLIPSIS
// End truncated lines with '...'
qb_log_ctl(lpc, QB_LOG_CONF_ELLIPSIS, QB_TRUE);
#endif
set_format_string(lpc, crm_system_name);
}
}
}
gboolean
crm_log_init(const char *entity, uint8_t level, gboolean daemon, gboolean to_stderr,
int argc, char **argv, gboolean quiet)
{
const char *syslog_priority = NULL;
const char *logfile = pcmk__env_option("logfile");
const char *facility = pcmk__env_option("logfacility");
const char *f_copy = facility;
crm_is_daemon = daemon;
crm_log_preinit(entity, argc, argv);
if (level > LOG_TRACE) {
level = LOG_TRACE;
}
if(level > crm_log_level) {
crm_log_level = level;
}
/* Should we log to syslog */
if (facility == NULL) {
if(crm_is_daemon) {
facility = "daemon";
} else {
facility = "none";
}
pcmk__set_env_option("logfacility", facility);
}
if (safe_str_eq(facility, "none")) {
quiet = TRUE;
} else {
qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_FACILITY, qb_log_facility2int(facility));
}
if (pcmk__env_option_enabled(crm_system_name, "debug")) {
/* Override the default setting */
crm_log_level = LOG_DEBUG;
}
/* What lower threshold do we have for sending to syslog */
syslog_priority = pcmk__env_option("logpriority");
if(syslog_priority) {
int priority = crm_priority2int(syslog_priority);
crm_log_priority = priority;
qb_log_filter_ctl(QB_LOG_SYSLOG, QB_LOG_FILTER_ADD, QB_LOG_FILTER_FILE, "*", priority);
} else {
qb_log_filter_ctl(QB_LOG_SYSLOG, QB_LOG_FILTER_ADD, QB_LOG_FILTER_FILE, "*", LOG_NOTICE);
}
// Log to syslog unless requested to be quiet
if (!quiet) {
qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, QB_TRUE);
}
/* Should we log to stderr */
if (pcmk__env_option_enabled(crm_system_name, "stderr")) {
/* Override the default setting */
to_stderr = TRUE;
}
crm_enable_stderr(to_stderr);
/* Should we log to a file */
if (safe_str_eq("none", logfile)) {
/* No soup^Hlogs for you! */
} else if(crm_is_daemon) {
// Daemons always get a log file, unless explicitly set to "none"
crm_add_logfile(logfile);
} else if(logfile) {
crm_add_logfile(logfile);
}
if (crm_is_daemon && pcmk__env_option_enabled(crm_system_name, "blackbox")) {
crm_enable_blackbox(0);
}
/* Summary */
crm_trace("Quiet: %d, facility %s", quiet, f_copy);
pcmk__env_option("logfile");
pcmk__env_option("logfacility");
crm_update_callsites();
/* Ok, now we can start logging... */
if (quiet == FALSE && crm_is_daemon == FALSE) {
crm_log_args(argc, argv);
}
if (crm_is_daemon) {
const char *user = getenv("USER");
- if (user != NULL && pcmk__str_none_of(user, "root", CRM_DAEMON_USER, NULL)) {
+ if (user != NULL && !pcmk__str_any_of(user, "root", CRM_DAEMON_USER, NULL)) {
crm_trace("Not switching to corefile directory for %s", user);
crm_is_daemon = FALSE;
}
}
if (crm_is_daemon) {
int user = getuid();
const char *base = CRM_CORE_DIR;
struct passwd *pwent = getpwuid(user);
if (pwent == NULL) {
crm_perror(LOG_ERR, "Cannot get name for uid: %d", user);
- } else if (pcmk__str_none_of(pwent->pw_name, "root", CRM_DAEMON_USER, NULL)) {
+ } else if (!pcmk__str_any_of(pwent->pw_name, "root", CRM_DAEMON_USER, NULL)) {
crm_trace("Don't change active directory for regular user: %s", pwent->pw_name);
} else if (chdir(base) < 0) {
crm_perror(LOG_INFO, "Cannot change active directory to %s", base);
} else {
crm_info("Changed active directory to %s", base);
#if 0
{
char path[512];
snprintf(path, 512, "%s-%lu", crm_system_name, (unsigned long) getpid());
mkdir(path, 0750);
chdir(path);
crm_info("Changed active directory to %s/%s/%s", base, pwent->pw_name, path);
}
#endif
}
/* Original meanings from signal(7)
*
* Signal Value Action Comment
* SIGTRAP 5 Core Trace/breakpoint trap
* SIGUSR1 30,10,16 Term User-defined signal 1
* SIGUSR2 31,12,17 Term User-defined signal 2
*
* Our usage is as similar as possible
*/
mainloop_add_signal(SIGUSR1, crm_enable_blackbox);
mainloop_add_signal(SIGUSR2, crm_disable_blackbox);
mainloop_add_signal(SIGTRAP, crm_trigger_blackbox);
}
return TRUE;
}
/* returns the old value */
unsigned int
set_crm_log_level(unsigned int level)
{
unsigned int old = crm_log_level;
if (level > LOG_TRACE) {
level = LOG_TRACE;
}
crm_log_level = level;
crm_update_callsites();
crm_trace("New log level: %d", level);
return old;
}
void
crm_enable_stderr(int enable)
{
if (enable && qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_STATE_GET, 0) != QB_LOG_STATE_ENABLED) {
qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_ENABLED, QB_TRUE);
crm_update_callsites();
} else if (enable == FALSE) {
qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_ENABLED, QB_FALSE);
}
}
void
crm_bump_log_level(int argc, char **argv)
{
static int args = TRUE;
int level = crm_log_level;
if (args && argc > 1) {
crm_log_args(argc, argv);
}
if (qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_STATE_GET, 0) == QB_LOG_STATE_ENABLED) {
set_crm_log_level(level + 1);
}
/* Enable after potentially logging the argstring, not before */
crm_enable_stderr(TRUE);
}
unsigned int
get_crm_log_level(void)
{
return crm_log_level;
}
#define ARGS_FMT "Invoked: %s"
void
crm_log_args(int argc, char **argv)
{
int lpc = 0;
int len = 0;
int existing_len = 0;
int line = __LINE__;
static int logged = 0;
char *arg_string = NULL;
if (argc == 0 || argv == NULL || logged) {
return;
}
logged = 1;
// cppcheck seems not to understand the abort logic in realloc_safe
// cppcheck-suppress memleak
for (; lpc < argc; lpc++) {
if (argv[lpc] == NULL) {
break;
}
len = 2 + strlen(argv[lpc]); /* +1 space, +1 EOS */
arg_string = realloc_safe(arg_string, len + existing_len);
existing_len += sprintf(arg_string + existing_len, "%s ", argv[lpc]);
}
qb_log_from_external_source(__func__, __FILE__, ARGS_FMT, LOG_NOTICE, line, 0, arg_string);
free(arg_string);
}
void
crm_log_output_fn(const char *file, const char *function, int line, int level, const char *prefix,
const char *output)
{
const char *next = NULL;
const char *offset = NULL;
if (level == LOG_NEVER) {
return;
}
if (output == NULL) {
if (level != LOG_STDOUT) {
level = LOG_TRACE;
}
output = "-- empty --";
}
next = output;
do {
offset = next;
next = strchrnul(offset, '\n');
do_crm_log_alias(level, file, function, line, "%s [ %.*s ]", prefix,
(int)(next - offset), offset);
if (next[0] != 0) {
next++;
}
} while (next != NULL && next[0] != 0);
}
diff --git a/lib/common/strings.c b/lib/common/strings.c
index bd68ccf69f..aaea3c9700 100644
--- a/lib/common/strings.c
+++ b/lib/common/strings.c
@@ -1,782 +1,759 @@
/*
* Copyright 2004-2020 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#ifndef _GNU_SOURCE
# define _GNU_SOURCE
#endif
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <limits.h>
#include <bzlib.h>
#include <sys/types.h>
char *
crm_itoa_stack(int an_int, char *buffer, size_t len)
{
if (buffer != NULL) {
snprintf(buffer, len, "%d", an_int);
}
return buffer;
}
/*!
* \internal
* \brief Scan a long long integer from a string
*
* \param[in] text String to scan
* \param[out] result If not NULL, where to store scanned value
* \param[out] end_text If not NULL, where to store pointer to just after value
*
* \return Standard Pacemaker return code (also set errno on error)
*/
static int
scan_ll(const char *text, long long *result, char **end_text)
{
long long local_result = -1;
char *local_end_text = NULL;
int rc = pcmk_rc_ok;
errno = 0;
if (text != NULL) {
#ifdef ANSI_ONLY
local_result = (long long) strtol(text, &local_end_text, 10);
#else
local_result = strtoll(text, &local_end_text, 10);
#endif
if (errno == ERANGE) {
rc = errno;
crm_warn("Integer parsed from %s was clipped to %lld",
text, local_result);
} else if (errno != 0) {
rc = errno;
local_result = -1;
crm_err("Could not parse integer from %s (using -1 instead): %s",
text, pcmk_rc_str(rc));
} else if (local_end_text == text) {
rc = EINVAL;
local_result = -1;
crm_err("Could not parse integer from %s (using -1 instead): "
"No digits found", text);
}
if ((end_text == NULL) && (local_end_text != NULL)
&& (local_end_text[0] != '\0')) {
crm_warn("Characters left over after parsing '%s': '%s'",
text, local_end_text);
}
errno = rc;
}
if (end_text != NULL) {
*end_text = local_end_text;
}
if (result != NULL) {
*result = local_result;
}
return rc;
}
/*!
* \brief Parse a long long integer value from a string
*
* \param[in] text The string to parse
* \param[in] default_text Default string to parse if text is NULL
*
* \return Parsed value on success, -1 (and set errno) on error
*/
long long
crm_parse_ll(const char *text, const char *default_text)
{
long long result;
if (text == NULL) {
text = default_text;
if (text == NULL) {
crm_err("No default conversion value supplied");
errno = EINVAL;
return -1;
}
}
scan_ll(text, &result, NULL);
return result;
}
/*!
* \brief Parse an integer value from a string
*
* \param[in] text The string to parse
* \param[in] default_text Default string to parse if text is NULL
*
* \return Parsed value on success, INT_MIN or INT_MAX (and set errno to ERANGE)
* if parsed value is out of integer range, otherwise -1 (and set errno)
*/
int
crm_parse_int(const char *text, const char *default_text)
{
long long result = crm_parse_ll(text, default_text);
if (result < INT_MIN) {
// If errno is ERANGE, crm_parse_ll() has already logged a message
if (errno != ERANGE) {
crm_err("Conversion of %s was clipped: %lld", text, result);
errno = ERANGE;
}
return INT_MIN;
} else if (result > INT_MAX) {
// If errno is ERANGE, crm_parse_ll() has already logged a message
if (errno != ERANGE) {
crm_err("Conversion of %s was clipped: %lld", text, result);
errno = ERANGE;
}
return INT_MAX;
}
return (int) result;
}
/*!
* \internal
* \brief Parse a guint from a string stored in a hash table
*
* \param[in] table Hash table to search
* \param[in] key Hash table key to use to retrieve string
* \param[in] default_val What to use if key has no entry in table
* \param[out] result If not NULL, where to store parsed integer
*
* \return Standard Pacemaker return code
*/
int
pcmk__guint_from_hash(GHashTable *table, const char *key, guint default_val,
guint *result)
{
const char *value;
long long value_ll;
CRM_CHECK((table != NULL) && (key != NULL), return EINVAL);
value = g_hash_table_lookup(table, key);
if (value == NULL) {
if (result != NULL) {
*result = default_val;
}
return pcmk_rc_ok;
}
errno = 0;
value_ll = crm_parse_ll(value, NULL);
if (errno != 0) {
return errno; // Message already logged
}
if ((value_ll < 0) || (value_ll > G_MAXUINT)) {
crm_warn("Could not parse non-negative integer from %s", value);
return ERANGE;
}
if (result != NULL) {
*result = (guint) value_ll;
}
return pcmk_rc_ok;
}
#ifndef NUMCHARS
# define NUMCHARS "0123456789."
#endif
#ifndef WHITESPACE
# define WHITESPACE " \t\n\r\f"
#endif
/*!
* \brief Parse a time+units string and return milliseconds equivalent
*
* \param[in] input String with a number and units (optionally with whitespace
* before and/or after the number)
*
* \return Milliseconds corresponding to string expression, or -1 on error
*/
long long
crm_get_msec(const char *input)
{
const char *num_start = NULL;
const char *units;
long long multiplier = 1000;
long long divisor = 1;
long long msec = -1;
size_t num_len = 0;
char *end_text = NULL;
if (input == NULL) {
return -1;
}
num_start = input + strspn(input, WHITESPACE);
num_len = strspn(num_start, NUMCHARS);
if (num_len < 1) {
return -1;
}
units = num_start + num_len;
units += strspn(units, WHITESPACE);
if (!strncasecmp(units, "ms", 2) || !strncasecmp(units, "msec", 4)) {
multiplier = 1;
divisor = 1;
} else if (!strncasecmp(units, "us", 2) || !strncasecmp(units, "usec", 4)) {
multiplier = 1;
divisor = 1000;
} else if (!strncasecmp(units, "s", 1) || !strncasecmp(units, "sec", 3)) {
multiplier = 1000;
divisor = 1;
} else if (!strncasecmp(units, "m", 1) || !strncasecmp(units, "min", 3)) {
multiplier = 60 * 1000;
divisor = 1;
} else if (!strncasecmp(units, "h", 1) || !strncasecmp(units, "hr", 2)) {
multiplier = 60 * 60 * 1000;
divisor = 1;
} else if ((*units != EOS) && (*units != '\n') && (*units != '\r')) {
return -1;
}
scan_ll(num_start, &msec, &end_text);
if (msec > (LLONG_MAX / multiplier)) {
// Arithmetics overflow while multiplier/divisor mutually exclusive
return LLONG_MAX;
}
msec *= multiplier;
msec /= divisor;
return msec;
}
gboolean
safe_str_neq(const char *a, const char *b)
{
if (a == b) {
return FALSE;
} else if (a == NULL || b == NULL) {
return TRUE;
} else if (strcasecmp(a, b) == 0) {
return FALSE;
}
return TRUE;
}
gboolean
crm_is_true(const char *s)
{
gboolean ret = FALSE;
if (s != NULL) {
crm_str_to_boolean(s, &ret);
}
return ret;
}
int
crm_str_to_boolean(const char *s, int *ret)
{
if (s == NULL) {
return -1;
} else if (strcasecmp(s, "true") == 0
|| strcasecmp(s, "on") == 0
|| strcasecmp(s, "yes") == 0 || strcasecmp(s, "y") == 0 || strcasecmp(s, "1") == 0) {
*ret = TRUE;
return 1;
} else if (strcasecmp(s, "false") == 0
|| strcasecmp(s, "off") == 0
|| strcasecmp(s, "no") == 0 || strcasecmp(s, "n") == 0 || strcasecmp(s, "0") == 0) {
*ret = FALSE;
return 1;
}
return -1;
}
char *
crm_strip_trailing_newline(char *str)
{
int len;
if (str == NULL) {
return str;
}
for (len = strlen(str) - 1; len >= 0 && str[len] == '\n'; len--) {
str[len] = '\0';
}
return str;
}
gboolean
crm_str_eq(const char *a, const char *b, gboolean use_case)
{
if (use_case) {
return g_strcmp0(a, b) == 0;
/* TODO - Figure out which calls, if any, really need to be case independent */
} else if (a == b) {
return TRUE;
} else if (a == NULL || b == NULL) {
/* shouldn't be comparing NULLs */
return FALSE;
} else if (strcasecmp(a, b) == 0) {
return TRUE;
}
return FALSE;
}
/*!
* \brief Check whether a string starts with a certain sequence
*
* \param[in] str String to check
* \param[in] prefix Sequence to match against beginning of \p str
*
* \return \c true if \p str begins with match, \c false otherwise
* \note This is equivalent to !strncmp(s, prefix, strlen(prefix))
* but is likely less efficient when prefix is a string literal
* if the compiler optimizes away the strlen() at compile time,
* and more efficient otherwise.
*/
bool
pcmk__starts_with(const char *str, const char *prefix)
{
const char *s = str;
const char *p = prefix;
if (!s || !p) {
return false;
}
while (*s && *p) {
if (*s++ != *p++) {
return false;
}
}
return (*p == 0);
}
static inline bool
ends_with(const char *s, const char *match, bool as_extension)
{
if (pcmk__str_empty(match)) {
return true;
} else if (s == NULL) {
return false;
} else {
size_t slen, mlen;
/* Besides as_extension, we could also check
!strchr(&match[1], match[0]) but that would be inefficient.
*/
if (as_extension) {
s = strrchr(s, match[0]);
return (s == NULL)? false : !strcmp(s, match);
}
mlen = strlen(match);
slen = strlen(s);
return ((slen >= mlen) && !strcmp(s + slen - mlen, match));
}
}
/*!
* \internal
* \brief Check whether a string ends with a certain sequence
*
* \param[in] s String to check
* \param[in] match Sequence to match against end of \p s
*
* \return \c true if \p s ends case-sensitively with match, \c false otherwise
* \note pcmk__ends_with_ext() can be used if the first character of match
* does not recur in match.
*/
bool
pcmk__ends_with(const char *s, const char *match)
{
return ends_with(s, match, false);
}
/*!
* \internal
* \brief Check whether a string ends with a certain "extension"
*
* \param[in] s String to check
* \param[in] match Extension to match against end of \p s, that is,
* its first character must not occur anywhere
* in the rest of that very sequence (example: file
* extension where the last dot is its delimiter,
* e.g., ".html"); incorrect results may be
* returned otherwise.
*
* \return \c true if \p s ends (verbatim, i.e., case sensitively)
* with "extension" designated as \p match (including empty
* string), \c false otherwise
*
* \note Main incentive to prefer this function over \c pcmk__ends_with()
* where possible is the efficiency (at the cost of added
* restriction on \p match as stated; the complexity class
* remains the same, though: BigO(M+N) vs. BigO(M+2N)).
*/
bool
pcmk__ends_with_ext(const char *s, const char *match)
{
return ends_with(s, match, true);
}
/*
* This re-implements g_str_hash as it was prior to glib2-2.28:
*
* https://gitlab.gnome.org/GNOME/glib/commit/354d655ba8a54b754cb5a3efb42767327775696c
*
* Note that the new g_str_hash is presumably a *better* hash (it's actually
* a correct implementation of DJB's hash), but we need to preserve existing
* behaviour, because the hash key ultimately determines the "sort" order
* when iterating through GHashTables, which affects allocation of scores to
* clone instances when iterating through rsc->allowed_nodes. It (somehow)
* also appears to have some minor impact on the ordering of a few
* pseudo_event IDs in the transition graph.
*/
guint
g_str_hash_traditional(gconstpointer v)
{
const signed char *p;
guint32 h = 0;
for (p = v; *p != '\0'; p++)
h = (h << 5) - h + *p;
return h;
}
/* used with hash tables where case does not matter */
gboolean
crm_strcase_equal(gconstpointer a, gconstpointer b)
{
return crm_str_eq((const char *) a, (const char *) b, FALSE);
}
guint
crm_strcase_hash(gconstpointer v)
{
const signed char *p;
guint32 h = 0;
for (p = v; *p != '\0'; p++)
h = (h << 5) - h + g_ascii_tolower(*p);
return h;
}
static void
copy_str_table_entry(gpointer key, gpointer value, gpointer user_data)
{
if (key && value && user_data) {
g_hash_table_insert((GHashTable*)user_data, strdup(key), strdup(value));
}
}
GHashTable *
crm_str_table_dup(GHashTable *old_table)
{
GHashTable *new_table = NULL;
if (old_table) {
new_table = crm_str_table_new();
g_hash_table_foreach(old_table, copy_str_table_entry, new_table);
}
return new_table;
}
/*!
* \internal
* \brief Add a word to a space-separated string list
*
* \param[in,out] list Pointer to beginning of list
* \param[in] word Word to add to list
*
* \return (Potentially new) beginning of list
* \note This dynamically reallocates list as needed.
*/
char *
pcmk__add_word(char *list, const char *word)
{
if (word != NULL) {
size_t len = list? strlen(list) : 0;
list = realloc_safe(list, len + strlen(word) + 2); // 2 = space + EOS
sprintf(list + len, " %s", word);
}
return list;
}
/*!
* \internal
* \brief Compress data
*
* \param[in] data Data to compress
* \param[in] length Number of characters of data to compress
* \param[in] max Maximum size of compressed data (or 0 to estimate)
* \param[out] result Where to store newly allocated compressed result
* \param[out] result_len Where to store actual compressed length of result
*
* \return Standard Pacemaker return code
*/
int
pcmk__compress(const char *data, unsigned int length, unsigned int max,
char **result, unsigned int *result_len)
{
int rc;
char *compressed = NULL;
char *uncompressed = strdup(data);
#ifdef CLOCK_MONOTONIC
struct timespec after_t;
struct timespec before_t;
#endif
if (max == 0) {
max = (length * 1.01) + 601; // Size guaranteed to hold result
}
#ifdef CLOCK_MONOTONIC
clock_gettime(CLOCK_MONOTONIC, &before_t);
#endif
compressed = calloc((size_t) max, sizeof(char));
CRM_ASSERT(compressed);
*result_len = max;
rc = BZ2_bzBuffToBuffCompress(compressed, result_len, uncompressed, length,
CRM_BZ2_BLOCKS, 0, CRM_BZ2_WORK);
free(uncompressed);
if (rc != BZ_OK) {
crm_err("Compression of %d bytes failed: %s " CRM_XS " bzerror=%d",
length, bz2_strerror(rc), rc);
free(compressed);
return pcmk_rc_error;
}
#ifdef CLOCK_MONOTONIC
clock_gettime(CLOCK_MONOTONIC, &after_t);
crm_trace("Compressed %d bytes into %d (ratio %d:1) in %.0fms",
length, *result_len, length / (*result_len),
(after_t.tv_sec - before_t.tv_sec) * 1000 +
(after_t.tv_nsec - before_t.tv_nsec) / 1e6);
#else
crm_trace("Compressed %d bytes into %d (ratio %d:1)",
length, *result_len, length / (*result_len));
#endif
*result = compressed;
return pcmk_rc_ok;
}
char *
crm_strdup_printf(char const *format, ...)
{
va_list ap;
int len = 0;
char *string = NULL;
va_start(ap, format);
len = vasprintf (&string, format, ap);
CRM_ASSERT(len > 0);
va_end(ap);
return string;
}
int
pcmk__parse_ll_range(const char *srcstring, long long *start, long long *end)
{
char *remainder = NULL;
CRM_ASSERT(start != NULL && end != NULL);
*start = -1;
*end = -1;
crm_trace("Attempting to decode: [%s]", srcstring);
if (srcstring == NULL || strcmp(srcstring, "") == 0 || strcmp(srcstring, "-") == 0) {
return pcmk_rc_unknown_format;
}
/* String starts with a dash, so this is either a range with
* no beginning or garbage.
* */
if (*srcstring == '-') {
int rc = scan_ll(srcstring+1, end, &remainder);
if (rc != pcmk_rc_ok || *remainder != '\0') {
return pcmk_rc_unknown_format;
} else {
return pcmk_rc_ok;
}
}
if (scan_ll(srcstring, start, &remainder) != pcmk_rc_ok) {
return pcmk_rc_unknown_format;
}
if (*remainder && *remainder == '-') {
if (*(remainder+1)) {
char *more_remainder = NULL;
int rc = scan_ll(remainder+1, end, &more_remainder);
if (rc != pcmk_rc_ok || *more_remainder != '\0') {
return pcmk_rc_unknown_format;
}
}
} else if (*remainder && *remainder != '-') {
*start = -1;
return pcmk_rc_unknown_format;
} else {
/* The input string contained only one number. Set start and end
* to the same value and return pcmk_rc_ok. This gives the caller
* a way to tell this condition apart from a range with no end.
*/
*end = *start;
}
return pcmk_rc_ok;
}
gboolean
pcmk__str_in_list(GList *lst, const gchar *s)
{
if (lst == NULL) {
return FALSE;
}
if (strcmp(lst->data, "*") == 0 && lst->next == NULL) {
return TRUE;
}
return g_list_find_custom(lst, s, (GCompareFunc) strcmp) != NULL;
}
bool
pcmk__str_any_of(const char *s, ...)
{
bool rc = false;
va_list ap;
va_start(ap, s);
while (1) {
const char *ele = va_arg(ap, const char *);
if (ele == NULL) {
break;
} else if (crm_str_eq(s, ele, FALSE)) {
rc = true;
break;
}
}
va_end(ap);
return rc;
}
-bool
-pcmk__str_none_of(const char *s, ...)
-{
- bool rc = true;
- va_list ap;
-
- va_start(ap, s);
-
- while (1) {
- const char *ele = va_arg(ap, const char *);
-
- if (ele == NULL) {
- break;
- } else if (crm_str_eq(s, ele, FALSE)) {
- rc = false;
- break;
- }
- }
-
- va_end(ap);
- return rc;
-}
-
/*
* \brief Sort strings, with numeric portions sorted numerically
*
* Sort two strings case-insensitively like strcasecmp(), but with any numeric
* portions of the string sorted numerically. This is particularly useful for
* node names (for example, "node10" will sort higher than "node9" but lower
* than "remotenode9").
*
* \param[in] s1 First string to compare (must not be NULL)
* \param[in] s2 Second string to compare (must not be NULL)
*
* \retval -1 \p s1 comes before \p s2
* \retval 0 \p s1 and \p s2 are equal
* \retval 1 \p s1 comes after \p s2
*/
int
pcmk_numeric_strcasecmp(const char *s1, const char *s2)
{
while (*s1 && *s2) {
if (isdigit(*s1) && isdigit(*s2)) {
// If node names contain a number, sort numerically
char *end1 = NULL;
char *end2 = NULL;
long num1 = strtol(s1, &end1, 10);
long num2 = strtol(s2, &end2, 10);
// allow ordering e.g. 007 > 7
size_t len1 = end1 - s1;
size_t len2 = end2 - s2;
if (num1 < num2) {
return -1;
} else if (num1 > num2) {
return 1;
} else if (len1 < len2) {
return -1;
} else if (len1 > len2) {
return 1;
}
s1 = end1;
s2 = end2;
} else {
// Compare non-digits case-insensitively
int lower1 = tolower(*s1);
int lower2 = tolower(*s2);
if (lower1 < lower2) {
return -1;
} else if (lower1 > lower2) {
return 1;
}
++s1;
++s2;
}
}
if (!*s1 && *s2) {
return -1;
} else if (*s1 && !*s2) {
return 1;
}
return 0;
}
diff --git a/lib/common/tests/strings/Makefile.am b/lib/common/tests/strings/Makefile.am
index 047537b227..859559f1f3 100644
--- a/lib/common/tests/strings/Makefile.am
+++ b/lib/common/tests/strings/Makefile.am
@@ -1,21 +1,20 @@
AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_builddir)/include
LDADD = $(top_builddir)/lib/common/libcrmcommon.la
include $(top_srcdir)/mk/glib-tap.mk
# Add each test program here. Each test should be written as a little standalone
# program using the glib unit testing functions. See the documentation for more
# information.
#
# https://developer.gnome.org/glib/unstable/glib-Testing.html
test_programs = pcmk__parse_ll_range \
- pcmk__str_any_of \
- pcmk__str_none_of
+ pcmk__str_any_of
# If any extra data needs to be added to the source distribution, add it to the
# following list.
dist_test_data =
# If any extra data needs to be used by tests but should not be added to the
# source distribution, add it to the following list.
test_data =
diff --git a/lib/common/tests/strings/pcmk__str_none_of.c b/lib/common/tests/strings/pcmk__str_none_of.c
deleted file mode 100644
index 5073e8dabf..0000000000
--- a/lib/common/tests/strings/pcmk__str_none_of.c
+++ /dev/null
@@ -1,37 +0,0 @@
-#include <glib.h>
-
-#include <crm_internal.h>
-
-static void
-empty_input_list(void) {
- g_assert(pcmk__str_none_of("xxx", NULL) == true);
- g_assert(pcmk__str_none_of("", NULL) == true);
-}
-
-static void
-empty_string(void) {
- g_assert(pcmk__str_none_of("", "xxx", "yyy", NULL) == true);
- g_assert(pcmk__str_none_of(NULL, "xxx", "yyy", NULL) == true);
-}
-
-static void
-in_list(void) {
- g_assert(pcmk__str_none_of("xxx", "aaa", "bbb", "xxx", NULL) == false);
- g_assert(pcmk__str_none_of("XXX", "aaa", "bbb", "xxx", NULL) == false);
-}
-
-static void
-not_in_list(void) {
- g_assert(pcmk__str_none_of("xxx", "aaa", "bbb", NULL) == true);
-}
-
-int main(int argc, char **argv) {
- g_test_init(&argc, &argv, NULL);
-
- g_test_add_func("/common/strings/none_of/empty_list", empty_input_list);
- g_test_add_func("/common/strings/none_of/empty_string", empty_string);
- g_test_add_func("/common/strings/none_of/in", in_list);
- g_test_add_func("/common/strings/none_of/not_in", not_in_list);
-
- return g_test_run();
-}
diff --git a/lib/pacemaker/pcmk_sched_transition.c b/lib/pacemaker/pcmk_sched_transition.c
index 8fa232d139..7edcc197ec 100644
--- a/lib/pacemaker/pcmk_sched_transition.c
+++ b/lib/pacemaker/pcmk_sched_transition.c
@@ -1,858 +1,858 @@
/*
* Copyright 2009-2020 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/param.h>
#include <sys/types.h>
#include <dirent.h>
#include <crm/crm.h>
#include <crm/lrmd.h> // lrmd_event_data_t, lrmd_free_event()
#include <crm/cib.h>
#include <crm/common/util.h>
#include <crm/common/iso8601.h>
#include <crm/pengine/status.h>
#include <pacemaker-internal.h>
static bool fake_quiet = FALSE;
static cib_t *fake_cib = NULL;
static GListPtr fake_resource_list = NULL;
static GListPtr fake_op_fail_list = NULL;
gboolean bringing_nodes_online = FALSE;
#define STATUS_PATH_MAX 512
#define quiet_log(fmt, args...) do { \
if(fake_quiet) { \
crm_trace(fmt, ##args); \
} else { \
printf(fmt , ##args); \
} \
} while(0)
#define NEW_NODE_TEMPLATE "//"XML_CIB_TAG_NODE"[@uname='%s']"
#define NODE_TEMPLATE "//"XML_CIB_TAG_STATE"[@uname='%s']"
#define RSC_TEMPLATE "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']"
static void
inject_transient_attr(xmlNode * cib_node, const char *name, const char *value)
{
xmlNode *attrs = NULL;
xmlNode *instance_attrs = NULL;
xmlChar *node_path;
const char *node_uuid = ID(cib_node);
node_path = xmlGetNodePath(cib_node);
quiet_log(" + Injecting attribute %s=%s into %s '%s'\n",
name, value, node_path, ID(cib_node));
free(node_path);
attrs = first_named_child(cib_node, XML_TAG_TRANSIENT_NODEATTRS);
if (attrs == NULL) {
attrs = create_xml_node(cib_node, XML_TAG_TRANSIENT_NODEATTRS);
crm_xml_add(attrs, XML_ATTR_ID, node_uuid);
}
instance_attrs = first_named_child(attrs, XML_TAG_ATTR_SETS);
if (instance_attrs == NULL) {
instance_attrs = create_xml_node(attrs, XML_TAG_ATTR_SETS);
crm_xml_add(instance_attrs, XML_ATTR_ID, node_uuid);
}
crm_create_nvpair_xml(instance_attrs, NULL, name, value);
}
static void
update_failcounts(xmlNode * cib_node, const char *resource, const char *task,
guint interval_ms, int rc)
{
if (rc == 0) {
return;
} else if ((rc == 7) && (interval_ms == 0)) {
return;
} else {
char *name = NULL;
char *now = crm_ttoa(time(NULL));
name = pcmk__failcount_name(resource, task, interval_ms);
inject_transient_attr(cib_node, name, "value++");
free(name);
name = pcmk__lastfailure_name(resource, task, interval_ms);
inject_transient_attr(cib_node, name, now);
free(name);
free(now);
}
}
static void
create_node_entry(cib_t * cib_conn, const char *node)
{
int rc = pcmk_ok;
char *xpath = crm_strdup_printf(NEW_NODE_TEMPLATE, node);
rc = cib_conn->cmds->query(cib_conn, xpath, NULL, cib_xpath | cib_sync_call | cib_scope_local);
if (rc == -ENXIO) {
xmlNode *cib_object = create_xml_node(NULL, XML_CIB_TAG_NODE);
crm_xml_add(cib_object, XML_ATTR_ID, node); // Use node name as ID
crm_xml_add(cib_object, XML_ATTR_UNAME, node);
cib_conn->cmds->create(cib_conn, XML_CIB_TAG_NODES, cib_object,
cib_sync_call | cib_scope_local);
/* Not bothering with subsequent query to see if it exists,
we'll bomb out later in the call to query_node_uuid()... */
free_xml(cib_object);
}
free(xpath);
}
static lrmd_event_data_t *
create_op(xmlNode *cib_resource, const char *task, guint interval_ms,
int outcome)
{
lrmd_event_data_t *op = NULL;
xmlNode *xop = NULL;
op = lrmd_new_event(ID(cib_resource), task, interval_ms);
op->rc = outcome;
op->op_status = 0;
op->params = NULL; /* TODO: Fill me in */
op->t_run = (unsigned int) time(NULL);
op->t_rcchange = op->t_run;
op->call_id = 0;
for (xop = __xml_first_child_element(cib_resource); xop != NULL;
xop = __xml_next_element(xop)) {
int tmp = 0;
crm_element_value_int(xop, XML_LRM_ATTR_CALLID, &tmp);
if (tmp > op->call_id) {
op->call_id = tmp;
}
}
op->call_id++;
return op;
}
static xmlNode *
inject_op(xmlNode * cib_resource, lrmd_event_data_t * op, int target_rc)
{
return pcmk__create_history_xml(cib_resource, op, CRM_FEATURE_SET,
target_rc, NULL, crm_system_name,
LOG_TRACE);
}
static xmlNode *
inject_node_state(cib_t * cib_conn, const char *node, const char *uuid)
{
int rc = pcmk_ok;
xmlNode *cib_object = NULL;
char *xpath = crm_strdup_printf(NODE_TEMPLATE, node);
if (bringing_nodes_online) {
create_node_entry(cib_conn, node);
}
rc = cib_conn->cmds->query(cib_conn, xpath, &cib_object,
cib_xpath | cib_sync_call | cib_scope_local);
if (cib_object && ID(cib_object) == NULL) {
crm_err("Detected multiple node_state entries for xpath=%s, bailing", xpath);
crm_log_xml_warn(cib_object, "Duplicates");
free(xpath);
crm_exit(CRM_EX_SOFTWARE);
return NULL; // not reached, but makes static analysis happy
}
if (rc == -ENXIO) {
char *found_uuid = NULL;
if (uuid == NULL) {
query_node_uuid(cib_conn, node, &found_uuid, NULL);
} else {
found_uuid = strdup(uuid);
}
cib_object = create_xml_node(NULL, XML_CIB_TAG_STATE);
crm_xml_add(cib_object, XML_ATTR_UUID, found_uuid);
crm_xml_add(cib_object, XML_ATTR_UNAME, node);
cib_conn->cmds->create(cib_conn, XML_CIB_TAG_STATUS, cib_object,
cib_sync_call | cib_scope_local);
free_xml(cib_object);
free(found_uuid);
rc = cib_conn->cmds->query(cib_conn, xpath, &cib_object,
cib_xpath | cib_sync_call | cib_scope_local);
crm_trace("injecting node state for %s. rc is %d", node, rc);
}
free(xpath);
CRM_ASSERT(rc == pcmk_ok);
return cib_object;
}
static xmlNode *
modify_node(cib_t * cib_conn, char *node, gboolean up)
{
xmlNode *cib_node = inject_node_state(cib_conn, node, NULL);
if (up) {
crm_xml_add(cib_node, XML_NODE_IN_CLUSTER, XML_BOOLEAN_YES);
crm_xml_add(cib_node, XML_NODE_IS_PEER, ONLINESTATUS);
crm_xml_add(cib_node, XML_NODE_JOIN_STATE, CRMD_JOINSTATE_MEMBER);
crm_xml_add(cib_node, XML_NODE_EXPECTED, CRMD_JOINSTATE_MEMBER);
} else {
crm_xml_add(cib_node, XML_NODE_IN_CLUSTER, XML_BOOLEAN_NO);
crm_xml_add(cib_node, XML_NODE_IS_PEER, OFFLINESTATUS);
crm_xml_add(cib_node, XML_NODE_JOIN_STATE, CRMD_JOINSTATE_DOWN);
crm_xml_add(cib_node, XML_NODE_EXPECTED, CRMD_JOINSTATE_DOWN);
}
crm_xml_add(cib_node, XML_ATTR_ORIGIN, crm_system_name);
return cib_node;
}
static xmlNode *
find_resource_xml(xmlNode * cib_node, const char *resource)
{
xmlNode *match = NULL;
const char *node = crm_element_value(cib_node, XML_ATTR_UNAME);
char *xpath = crm_strdup_printf(RSC_TEMPLATE, node, resource);
match = get_xpath_object(xpath, cib_node, LOG_TRACE);
free(xpath);
return match;
}
static xmlNode *
inject_resource(xmlNode * cib_node, const char *resource, const char *lrm_name,
const char *rclass, const char *rtype, const char *rprovider)
{
xmlNode *lrm = NULL;
xmlNode *container = NULL;
xmlNode *cib_resource = NULL;
char *xpath = NULL;
cib_resource = find_resource_xml(cib_node, resource);
if (cib_resource != NULL) {
/* If an existing LRM history entry uses the resource name,
* continue using it, even if lrm_name is different.
*/
return cib_resource;
}
// Check for history entry under preferred name
if (strcmp(resource, lrm_name)) {
cib_resource = find_resource_xml(cib_node, lrm_name);
if (cib_resource != NULL) {
return cib_resource;
}
}
/* One day, add query for class, provider, type */
if (rclass == NULL || rtype == NULL) {
fprintf(stderr, "Resource %s not found in the status section of %s."
" Please supply the class and type to continue\n", resource, ID(cib_node));
return NULL;
- } else if (pcmk__str_none_of(rclass, PCMK_RESOURCE_CLASS_OCF, PCMK_RESOURCE_CLASS_STONITH,
+ } else if (!pcmk__str_any_of(rclass, PCMK_RESOURCE_CLASS_OCF, PCMK_RESOURCE_CLASS_STONITH,
PCMK_RESOURCE_CLASS_SERVICE, PCMK_RESOURCE_CLASS_UPSTART,
PCMK_RESOURCE_CLASS_SYSTEMD, PCMK_RESOURCE_CLASS_LSB, NULL)) {
fprintf(stderr, "Invalid class for %s: %s\n", resource, rclass);
return NULL;
} else if (is_set(pcmk_get_ra_caps(rclass), pcmk_ra_cap_provider)
&& (rprovider == NULL)) {
fprintf(stderr, "Please specify the provider for resource %s\n", resource);
return NULL;
}
xpath = (char *)xmlGetNodePath(cib_node);
crm_info("Injecting new resource %s into %s '%s'", lrm_name, xpath, ID(cib_node));
free(xpath);
lrm = first_named_child(cib_node, XML_CIB_TAG_LRM);
if (lrm == NULL) {
const char *node_uuid = ID(cib_node);
lrm = create_xml_node(cib_node, XML_CIB_TAG_LRM);
crm_xml_add(lrm, XML_ATTR_ID, node_uuid);
}
container = first_named_child(lrm, XML_LRM_TAG_RESOURCES);
if (container == NULL) {
container = create_xml_node(lrm, XML_LRM_TAG_RESOURCES);
}
cib_resource = create_xml_node(container, XML_LRM_TAG_RESOURCE);
// If we're creating a new entry, use the preferred name
crm_xml_add(cib_resource, XML_ATTR_ID, lrm_name);
crm_xml_add(cib_resource, XML_AGENT_ATTR_CLASS, rclass);
crm_xml_add(cib_resource, XML_AGENT_ATTR_PROVIDER, rprovider);
crm_xml_add(cib_resource, XML_ATTR_TYPE, rtype);
return cib_resource;
}
#define XPATH_MAX 1024
static int
find_ticket_state(cib_t * the_cib, const char *ticket_id, xmlNode ** ticket_state_xml)
{
int offset = 0;
int rc = pcmk_ok;
xmlNode *xml_search = NULL;
char *xpath_string = NULL;
CRM_ASSERT(ticket_state_xml != NULL);
*ticket_state_xml = NULL;
xpath_string = calloc(1, XPATH_MAX);
offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "%s", "/cib/status/tickets");
if (ticket_id) {
offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "/%s[@id=\"%s\"]",
XML_CIB_TAG_TICKET_STATE, ticket_id);
}
CRM_LOG_ASSERT(offset > 0);
rc = the_cib->cmds->query(the_cib, xpath_string, &xml_search,
cib_sync_call | cib_scope_local | cib_xpath);
if (rc != pcmk_ok) {
goto bail;
}
crm_log_xml_debug(xml_search, "Match");
if (xml_has_children(xml_search)) {
if (ticket_id) {
fprintf(stdout, "Multiple ticket_states match ticket_id=%s\n", ticket_id);
}
*ticket_state_xml = xml_search;
} else {
*ticket_state_xml = xml_search;
}
bail:
free(xpath_string);
return rc;
}
static int
set_ticket_state_attr(const char *ticket_id, const char *attr_name,
const char *attr_value, cib_t * cib, int cib_options)
{
int rc = pcmk_ok;
xmlNode *xml_top = NULL;
xmlNode *ticket_state_xml = NULL;
rc = find_ticket_state(cib, ticket_id, &ticket_state_xml);
if (rc == pcmk_ok) {
crm_debug("Found a match state for ticket: id=%s", ticket_id);
xml_top = ticket_state_xml;
} else if (rc != -ENXIO) {
return rc;
} else {
xmlNode *xml_obj = NULL;
xml_top = create_xml_node(NULL, XML_CIB_TAG_STATUS);
xml_obj = create_xml_node(xml_top, XML_CIB_TAG_TICKETS);
ticket_state_xml = create_xml_node(xml_obj, XML_CIB_TAG_TICKET_STATE);
crm_xml_add(ticket_state_xml, XML_ATTR_ID, ticket_id);
}
crm_xml_add(ticket_state_xml, attr_name, attr_value);
crm_log_xml_debug(xml_top, "Update");
rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, xml_top, cib_options);
free_xml(xml_top);
return rc;
}
void
modify_configuration(pe_working_set_t * data_set, cib_t *cib,
const char *quorum, const char *watchdog, GListPtr node_up, GListPtr node_down, GListPtr node_fail,
GListPtr op_inject, GListPtr ticket_grant, GListPtr ticket_revoke,
GListPtr ticket_standby, GListPtr ticket_activate)
{
int rc = pcmk_ok;
GListPtr gIter = NULL;
xmlNode *cib_op = NULL;
xmlNode *cib_node = NULL;
xmlNode *cib_resource = NULL;
lrmd_event_data_t *op = NULL;
if (quorum) {
xmlNode *top = create_xml_node(NULL, XML_TAG_CIB);
quiet_log(" + Setting quorum: %s\n", quorum);
/* crm_xml_add(top, XML_ATTR_DC_UUID, dc_uuid); */
crm_xml_add(top, XML_ATTR_HAVE_QUORUM, quorum);
rc = cib->cmds->modify(cib, NULL, top, cib_sync_call | cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
}
if (watchdog) {
quiet_log(" + Setting watchdog: %s\n", watchdog);
rc = update_attr_delegate(cib, cib_sync_call | cib_scope_local,
XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL,
XML_ATTR_HAVE_WATCHDOG, watchdog, FALSE, NULL, NULL);
CRM_ASSERT(rc == pcmk_ok);
}
for (gIter = node_up; gIter != NULL; gIter = gIter->next) {
char *node = (char *)gIter->data;
quiet_log(" + Bringing node %s online\n", node);
cib_node = modify_node(cib, node, TRUE);
CRM_ASSERT(cib_node != NULL);
rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, cib_node,
cib_sync_call | cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
free_xml(cib_node);
}
for (gIter = node_down; gIter != NULL; gIter = gIter->next) {
char xpath[STATUS_PATH_MAX];
char *node = (char *)gIter->data;
quiet_log(" + Taking node %s offline\n", node);
cib_node = modify_node(cib, node, FALSE);
CRM_ASSERT(cib_node != NULL);
rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, cib_node,
cib_sync_call | cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
free_xml(cib_node);
snprintf(xpath, STATUS_PATH_MAX, "//node_state[@uname='%s']/%s", node, XML_CIB_TAG_LRM);
cib->cmds->remove(cib, xpath, NULL,
cib_xpath | cib_sync_call | cib_scope_local);
snprintf(xpath, STATUS_PATH_MAX, "//node_state[@uname='%s']/%s", node,
XML_TAG_TRANSIENT_NODEATTRS);
cib->cmds->remove(cib, xpath, NULL,
cib_xpath | cib_sync_call | cib_scope_local);
}
for (gIter = node_fail; gIter != NULL; gIter = gIter->next) {
char *node = (char *)gIter->data;
quiet_log(" + Failing node %s\n", node);
cib_node = modify_node(cib, node, TRUE);
crm_xml_add(cib_node, XML_NODE_IN_CLUSTER, XML_BOOLEAN_NO);
CRM_ASSERT(cib_node != NULL);
rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, cib_node,
cib_sync_call | cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
free_xml(cib_node);
}
for (gIter = ticket_grant; gIter != NULL; gIter = gIter->next) {
char *ticket_id = (char *)gIter->data;
quiet_log(" + Granting ticket %s\n", ticket_id);
rc = set_ticket_state_attr(ticket_id, "granted", "true",
cib, cib_sync_call | cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
}
for (gIter = ticket_revoke; gIter != NULL; gIter = gIter->next) {
char *ticket_id = (char *)gIter->data;
quiet_log(" + Revoking ticket %s\n", ticket_id);
rc = set_ticket_state_attr(ticket_id, "granted", "false",
cib, cib_sync_call | cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
}
for (gIter = ticket_standby; gIter != NULL; gIter = gIter->next) {
char *ticket_id = (char *)gIter->data;
quiet_log(" + Making ticket %s standby\n", ticket_id);
rc = set_ticket_state_attr(ticket_id, "standby", "true",
cib, cib_sync_call | cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
}
for (gIter = ticket_activate; gIter != NULL; gIter = gIter->next) {
char *ticket_id = (char *)gIter->data;
quiet_log(" + Activating ticket %s\n", ticket_id);
rc = set_ticket_state_attr(ticket_id, "standby", "false",
cib, cib_sync_call | cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
}
for (gIter = op_inject; gIter != NULL; gIter = gIter->next) {
char *spec = (char *)gIter->data;
int rc = 0;
int outcome = 0;
guint interval_ms = 0;
char *key = NULL;
char *node = NULL;
char *task = NULL;
char *resource = NULL;
const char *rtype = NULL;
const char *rclass = NULL;
const char *rprovider = NULL;
pe_resource_t *rsc = NULL;
quiet_log(" + Injecting %s into the configuration\n", spec);
key = calloc(1, strlen(spec) + 1);
node = calloc(1, strlen(spec) + 1);
rc = sscanf(spec, "%[^@]@%[^=]=%d", key, node, &outcome);
if (rc != 3) {
fprintf(stderr, "Invalid operation spec: %s. Only found %d fields\n", spec, rc);
free(key);
free(node);
continue;
}
parse_op_key(key, &resource, &task, &interval_ms);
rsc = pe_find_resource(data_set->resources, resource);
if (rsc == NULL) {
fprintf(stderr, " - Invalid resource name: %s\n", resource);
} else {
rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
rtype = crm_element_value(rsc->xml, XML_ATTR_TYPE);
rprovider = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER);
cib_node = inject_node_state(cib, node, NULL);
CRM_ASSERT(cib_node != NULL);
update_failcounts(cib_node, resource, task, interval_ms, outcome);
cib_resource = inject_resource(cib_node, resource, resource,
rclass, rtype, rprovider);
CRM_ASSERT(cib_resource != NULL);
op = create_op(cib_resource, task, interval_ms, outcome);
CRM_ASSERT(op != NULL);
cib_op = inject_op(cib_resource, op, 0);
CRM_ASSERT(cib_op != NULL);
lrmd_free_event(op);
rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, cib_node,
cib_sync_call | cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
}
free(task);
free(node);
free(key);
}
}
static gboolean
exec_pseudo_action(crm_graph_t * graph, crm_action_t * action)
{
const char *node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
action->confirmed = TRUE;
quiet_log(" * Pseudo action: %s%s%s\n", task, node ? " on " : "", node ? node : "");
update_graph(graph, action);
return TRUE;
}
static gboolean
exec_rsc_action(crm_graph_t * graph, crm_action_t * action)
{
int rc = 0;
GListPtr gIter = NULL;
lrmd_event_data_t *op = NULL;
int target_outcome = 0;
const char *rtype = NULL;
const char *rclass = NULL;
const char *resource = NULL;
const char *rprovider = NULL;
const char *lrm_name = NULL;
const char *operation = crm_element_value(action->xml, "operation");
const char *target_rc_s = crm_meta_value(action->params, XML_ATTR_TE_TARGET_RC);
xmlNode *cib_node = NULL;
xmlNode *cib_resource = NULL;
xmlNode *action_rsc = first_named_child(action->xml, XML_CIB_TAG_RESOURCE);
char *node = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET);
char *uuid = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET_UUID);
const char *router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
if (pcmk__str_any_of(operation, CRM_OP_PROBED, CRM_OP_REPROBE, NULL)) {
crm_info("Skipping %s op for %s", operation, node);
goto done;
}
if (action_rsc == NULL) {
crm_log_xml_err(action->xml, "Bad");
free(node); free(uuid);
return FALSE;
}
/* Look for the preferred name
* If not found, try the expected 'local' name
* If not found use the preferred name anyway
*/
resource = crm_element_value(action_rsc, XML_ATTR_ID);
CRM_ASSERT(resource != NULL); // makes static analysis happy
lrm_name = resource; // Preferred name when writing history
if (pe_find_resource(fake_resource_list, resource) == NULL) {
const char *longname = crm_element_value(action_rsc, XML_ATTR_ID_LONG);
if (longname && pe_find_resource(fake_resource_list, longname)) {
resource = longname;
}
}
if (pcmk__str_any_of(operation, "delete", RSC_METADATA, NULL)) {
quiet_log(" * Resource action: %-15s %s on %s\n", resource, operation, node);
goto done;
}
rclass = crm_element_value(action_rsc, XML_AGENT_ATTR_CLASS);
rtype = crm_element_value(action_rsc, XML_ATTR_TYPE);
rprovider = crm_element_value(action_rsc, XML_AGENT_ATTR_PROVIDER);
if (target_rc_s != NULL) {
target_outcome = crm_parse_int(target_rc_s, "0");
}
CRM_ASSERT(fake_cib->cmds->query(fake_cib, NULL, NULL, cib_sync_call | cib_scope_local) ==
pcmk_ok);
cib_node = inject_node_state(fake_cib, node, (router_node? node : uuid));
CRM_ASSERT(cib_node != NULL);
cib_resource = inject_resource(cib_node, resource, lrm_name,
rclass, rtype, rprovider);
if (cib_resource == NULL) {
crm_err("invalid resource in transition");
free(node); free(uuid);
free_xml(cib_node);
return FALSE;
}
op = convert_graph_action(cib_resource, action, 0, target_outcome);
if (op->interval_ms) {
quiet_log(" * Resource action: %-15s %s=%u on %s\n",
resource, op->op_type, op->interval_ms, node);
} else {
quiet_log(" * Resource action: %-15s %s on %s\n", resource, op->op_type, node);
}
for (gIter = fake_op_fail_list; gIter != NULL; gIter = gIter->next) {
char *spec = (char *)gIter->data;
char *key = NULL;
const char *match_name = NULL;
// Allow user to specify anonymous clone with or without instance number
key = crm_strdup_printf(PCMK__OP_FMT "@%s=", resource, op->op_type,
op->interval_ms, node);
if (strncasecmp(key, spec, strlen(key)) == 0) {
match_name = resource;
}
free(key);
if ((match_name == NULL) && strcmp(resource, lrm_name)) {
key = crm_strdup_printf(PCMK__OP_FMT "@%s=", lrm_name, op->op_type,
op->interval_ms, node);
if (strncasecmp(key, spec, strlen(key)) == 0) {
match_name = lrm_name;
}
free(key);
}
if (match_name != NULL) {
rc = sscanf(spec, "%*[^=]=%d", (int *) &op->rc);
// ${match_name}_${task}_${interval_in_ms}@${node}=${rc}
if (rc != 1) {
fprintf(stderr,
"Invalid failed operation spec: %s. Result code must be integer\n",
spec);
continue;
}
action->failed = TRUE;
graph->abort_priority = INFINITY;
printf("\tPretending action %d failed with rc=%d\n", action->id, op->rc);
update_failcounts(cib_node, match_name, op->op_type,
op->interval_ms, op->rc);
break;
}
}
inject_op(cib_resource, op, target_outcome);
lrmd_free_event(op);
rc = fake_cib->cmds->modify(fake_cib, XML_CIB_TAG_STATUS, cib_node,
cib_sync_call | cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
done:
free(node); free(uuid);
free_xml(cib_node);
action->confirmed = TRUE;
update_graph(graph, action);
return TRUE;
}
static gboolean
exec_crmd_action(crm_graph_t * graph, crm_action_t * action)
{
const char *node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
xmlNode *rsc = first_named_child(action->xml, XML_CIB_TAG_RESOURCE);
action->confirmed = TRUE;
if(rsc) {
quiet_log(" * Cluster action: %s for %s on %s\n", task, ID(rsc), node);
} else {
quiet_log(" * Cluster action: %s on %s\n", task, node);
}
update_graph(graph, action);
return TRUE;
}
static gboolean
exec_stonith_action(crm_graph_t * graph, crm_action_t * action)
{
const char *op = crm_meta_value(action->params, "stonith_action");
char *target = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET);
quiet_log(" * Fencing %s (%s)\n", target, op);
if(safe_str_neq(op, "on")) {
int rc = 0;
char xpath[STATUS_PATH_MAX];
xmlNode *cib_node = modify_node(fake_cib, target, FALSE);
crm_xml_add(cib_node, XML_ATTR_ORIGIN, __FUNCTION__);
CRM_ASSERT(cib_node != NULL);
rc = fake_cib->cmds->replace(fake_cib, XML_CIB_TAG_STATUS, cib_node,
cib_sync_call | cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
snprintf(xpath, STATUS_PATH_MAX, "//node_state[@uname='%s']/%s", target, XML_CIB_TAG_LRM);
fake_cib->cmds->remove(fake_cib, xpath, NULL,
cib_xpath | cib_sync_call | cib_scope_local);
snprintf(xpath, STATUS_PATH_MAX, "//node_state[@uname='%s']/%s", target,
XML_TAG_TRANSIENT_NODEATTRS);
fake_cib->cmds->remove(fake_cib, xpath, NULL,
cib_xpath | cib_sync_call | cib_scope_local);
free_xml(cib_node);
}
action->confirmed = TRUE;
update_graph(graph, action);
free(target);
return TRUE;
}
int
run_simulation(pe_working_set_t * data_set, cib_t *cib, GListPtr op_fail_list, bool quiet)
{
crm_graph_t *transition = NULL;
enum transition_status graph_rc = -1;
crm_graph_functions_t exec_fns = {
exec_pseudo_action,
exec_rsc_action,
exec_crmd_action,
exec_stonith_action,
};
fake_cib = cib;
fake_quiet = quiet;
fake_op_fail_list = op_fail_list;
quiet_log("\nExecuting cluster transition:\n");
set_graph_functions(&exec_fns);
transition = unpack_graph(data_set->graph, crm_system_name);
print_graph(LOG_DEBUG, transition);
fake_resource_list = data_set->resources;
do {
graph_rc = run_graph(transition);
} while (graph_rc == transition_active);
fake_resource_list = NULL;
if (graph_rc != transition_complete) {
fprintf(stdout, "Transition failed: %s\n", transition_status(graph_rc));
print_graph(LOG_ERR, transition);
}
destroy_graph(transition);
if (graph_rc != transition_complete) {
fprintf(stdout, "An invalid transition was produced\n");
}
if (quiet == FALSE) {
xmlNode *cib_object = NULL;
int rc = fake_cib->cmds->query(fake_cib, NULL, &cib_object, cib_sync_call | cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
pe_reset_working_set(data_set);
data_set->input = cib_object;
}
if (graph_rc != transition_complete) {
return graph_rc;
}
return 0;
}
diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c
index 4eebe33699..f21c397b64 100644
--- a/lib/pengine/utils.c
+++ b/lib/pengine/utils.c
@@ -1,2832 +1,2832 @@
/*
* Copyright 2004-2020 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/common/util.h>
#include <glib.h>
#include <stdbool.h>
#include <crm/pengine/rules.h>
#include <crm/pengine/internal.h>
extern xmlNode *get_object_root(const char *object_type, xmlNode * the_root);
void print_str_str(gpointer key, gpointer value, gpointer user_data);
gboolean ghash_free_str_str(gpointer key, gpointer value, gpointer user_data);
static void unpack_operation(pe_action_t * action, xmlNode * xml_obj, pe_resource_t * container,
pe_working_set_t * data_set, guint interval_ms);
static xmlNode *find_rsc_op_entry_helper(pe_resource_t * rsc, const char *key,
gboolean include_disabled);
#if ENABLE_VERSIONED_ATTRS
pe_rsc_action_details_t *
pe_rsc_action_details(pe_action_t *action)
{
pe_rsc_action_details_t *details;
CRM_CHECK(action != NULL, return NULL);
if (action->action_details == NULL) {
action->action_details = calloc(1, sizeof(pe_rsc_action_details_t));
CRM_CHECK(action->action_details != NULL, return NULL);
}
details = (pe_rsc_action_details_t *) action->action_details;
if (details->versioned_parameters == NULL) {
details->versioned_parameters = create_xml_node(NULL,
XML_TAG_OP_VER_ATTRS);
}
if (details->versioned_meta == NULL) {
details->versioned_meta = create_xml_node(NULL, XML_TAG_OP_VER_META);
}
return details;
}
static void
pe_free_rsc_action_details(pe_action_t *action)
{
pe_rsc_action_details_t *details;
if ((action == NULL) || (action->action_details == NULL)) {
return;
}
details = (pe_rsc_action_details_t *) action->action_details;
if (details->versioned_parameters) {
free_xml(details->versioned_parameters);
}
if (details->versioned_meta) {
free_xml(details->versioned_meta);
}
action->action_details = NULL;
}
#endif
/*!
* \internal
* \brief Check whether we can fence a particular node
*
* \param[in] data_set Working set for cluster
* \param[in] node Name of node to check
*
* \return true if node can be fenced, false otherwise
*/
bool
pe_can_fence(pe_working_set_t *data_set, pe_node_t *node)
{
if (pe__is_guest_node(node)) {
/* Guest nodes are fenced by stopping their container resource. We can
* do that if the container's host is either online or fenceable.
*/
pe_resource_t *rsc = node->details->remote_rsc->container;
for (GList *n = rsc->running_on; n != NULL; n = n->next) {
pe_node_t *container_node = n->data;
if (!container_node->details->online
&& !pe_can_fence(data_set, container_node)) {
return false;
}
}
return true;
} else if(is_not_set(data_set->flags, pe_flag_stonith_enabled)) {
return false; /* Turned off */
} else if (is_not_set(data_set->flags, pe_flag_have_stonith_resource)) {
return false; /* No devices */
} else if (is_set(data_set->flags, pe_flag_have_quorum)) {
return true;
} else if (data_set->no_quorum_policy == no_quorum_ignore) {
return true;
} else if(node == NULL) {
return false;
} else if(node->details->online) {
crm_notice("We can fence %s without quorum because they're in our membership", node->details->uname);
return true;
}
crm_trace("Cannot fence %s", node->details->uname);
return false;
}
/*!
* \internal
* \brief Copy a node object
*
* \param[in] this_node Node object to copy
*
* \return Newly allocated shallow copy of this_node
* \note This function asserts on errors and is guaranteed to return non-NULL.
*/
pe_node_t *
pe__copy_node(const pe_node_t *this_node)
{
pe_node_t *new_node = NULL;
CRM_ASSERT(this_node != NULL);
new_node = calloc(1, sizeof(pe_node_t));
CRM_ASSERT(new_node != NULL);
new_node->rsc_discover_mode = this_node->rsc_discover_mode;
new_node->weight = this_node->weight;
new_node->fixed = this_node->fixed;
new_node->details = this_node->details;
return new_node;
}
/* any node in list1 or list2 and not in the other gets a score of -INFINITY */
void
node_list_exclude(GHashTable * hash, GListPtr list, gboolean merge_scores)
{
GHashTable *result = hash;
pe_node_t *other_node = NULL;
GListPtr gIter = list;
GHashTableIter iter;
pe_node_t *node = NULL;
g_hash_table_iter_init(&iter, hash);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
other_node = pe_find_node_id(list, node->details->id);
if (other_node == NULL) {
node->weight = -INFINITY;
} else if (merge_scores) {
node->weight = pe__add_scores(node->weight, other_node->weight);
}
}
for (; gIter != NULL; gIter = gIter->next) {
pe_node_t *node = (pe_node_t *) gIter->data;
other_node = pe_hash_table_lookup(result, node->details->id);
if (other_node == NULL) {
pe_node_t *new_node = pe__copy_node(node);
new_node->weight = -INFINITY;
g_hash_table_insert(result, (gpointer) new_node->details->id, new_node);
}
}
}
/*!
* \internal
* \brief Create a node hash table from a node list
*
* \param[in] list Node list
*
* \return Hash table equivalent of node list
*/
GHashTable *
pe__node_list2table(GList *list)
{
GHashTable *result = NULL;
result = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free);
for (GList *gIter = list; gIter != NULL; gIter = gIter->next) {
pe_node_t *new_node = pe__copy_node((pe_node_t *) gIter->data);
g_hash_table_insert(result, (gpointer) new_node->details->id, new_node);
}
return result;
}
gint
sort_node_uname(gconstpointer a, gconstpointer b)
{
return pcmk_numeric_strcasecmp(((const pe_node_t *) a)->details->uname,
((const pe_node_t *) b)->details->uname);
}
/*!
* \internal
* \brief Output node weights to stdout
*
* \param[in] rsc Use allowed nodes for this resource
* \param[in] comment Text description to prefix lines with
* \param[in] nodes If rsc is not specified, use these nodes
*/
static void
pe__output_node_weights(pe_resource_t *rsc, const char *comment,
GHashTable *nodes)
{
char score[128]; // Stack-allocated since this is called frequently
// Sort the nodes so the output is consistent for regression tests
GList *list = g_list_sort(g_hash_table_get_values(nodes), sort_node_uname);
for (GList *gIter = list; gIter != NULL; gIter = gIter->next) {
pe_node_t *node = (pe_node_t *) gIter->data;
score2char_stack(node->weight, score, sizeof(score));
if (rsc) {
printf("%s: %s allocation score on %s: %s\n",
comment, rsc->id, node->details->uname, score);
} else {
printf("%s: %s = %s\n", comment, node->details->uname, score);
}
}
g_list_free(list);
}
/*!
* \internal
* \brief Log node weights at trace level
*
* \param[in] file Caller's filename
* \param[in] function Caller's function name
* \param[in] line Caller's line number
* \param[in] rsc Use allowed nodes for this resource
* \param[in] comment Text description to prefix lines with
* \param[in] nodes If rsc is not specified, use these nodes
*/
static void
pe__log_node_weights(const char *file, const char *function, int line,
pe_resource_t *rsc, const char *comment, GHashTable *nodes)
{
GHashTableIter iter;
pe_node_t *node = NULL;
char score[128]; // Stack-allocated since this is called frequently
// Don't waste time if we're not tracing at this point
pcmk__log_else(LOG_TRACE, return);
g_hash_table_iter_init(&iter, nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
score2char_stack(node->weight, score, sizeof(score));
if (rsc) {
qb_log_from_external_source(function, file,
"%s: %s allocation score on %s: %s",
LOG_TRACE, line, 0,
comment, rsc->id,
node->details->uname, score);
} else {
qb_log_from_external_source(function, file, "%s: %s = %s",
LOG_TRACE, line, 0,
comment, node->details->uname,
score);
}
}
}
/*!
* \internal
* \brief Log or output node weights
*
* \param[in] file Caller's filename
* \param[in] function Caller's function name
* \param[in] line Caller's line number
* \param[in] to_log Log if true, otherwise output
* \param[in] rsc Use allowed nodes for this resource
* \param[in] comment Text description to prefix lines with
* \param[in] nodes If rsc is not specified, use these nodes
*/
void
pe__show_node_weights_as(const char *file, const char *function, int line,
bool to_log, pe_resource_t *rsc, const char *comment,
GHashTable *nodes)
{
if (rsc != NULL) {
if (is_set(rsc->flags, pe_rsc_orphan)) {
// Don't show allocation scores for orphans
return;
}
nodes = rsc->allowed_nodes;
}
if (nodes == NULL) {
// Nothing to show
return;
}
if (to_log) {
pe__log_node_weights(file, function, line, rsc, comment, nodes);
} else {
pe__output_node_weights(rsc, comment, nodes);
}
// If this resource has children, repeat recursively for each
if (rsc && rsc->children) {
for (GList *gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
pe__show_node_weights_as(file, function, line, to_log, child,
comment, nodes);
}
}
}
static void
append_dump_text(gpointer key, gpointer value, gpointer user_data)
{
char **dump_text = user_data;
char *new_text = crm_strdup_printf("%s %s=%s",
*dump_text, (char *)key, (char *)value);
free(*dump_text);
*dump_text = new_text;
}
void
dump_node_capacity(int level, const char *comment, pe_node_t * node)
{
char *dump_text = crm_strdup_printf("%s: %s capacity:",
comment, node->details->uname);
g_hash_table_foreach(node->details->utilization, append_dump_text, &dump_text);
if (level == LOG_STDOUT) {
fprintf(stdout, "%s\n", dump_text);
} else {
crm_trace("%s", dump_text);
}
free(dump_text);
}
void
dump_rsc_utilization(int level, const char *comment, pe_resource_t * rsc, pe_node_t * node)
{
char *dump_text = crm_strdup_printf("%s: %s utilization on %s:",
comment, rsc->id, node->details->uname);
g_hash_table_foreach(rsc->utilization, append_dump_text, &dump_text);
switch (level) {
case LOG_STDOUT:
fprintf(stdout, "%s\n", dump_text);
break;
case LOG_NEVER:
break;
default:
crm_trace("%s", dump_text);
}
free(dump_text);
}
gint
sort_rsc_index(gconstpointer a, gconstpointer b)
{
const pe_resource_t *resource1 = (const pe_resource_t *)a;
const pe_resource_t *resource2 = (const pe_resource_t *)b;
if (a == NULL && b == NULL) {
return 0;
}
if (a == NULL) {
return 1;
}
if (b == NULL) {
return -1;
}
if (resource1->sort_index > resource2->sort_index) {
return -1;
}
if (resource1->sort_index < resource2->sort_index) {
return 1;
}
return 0;
}
gint
sort_rsc_priority(gconstpointer a, gconstpointer b)
{
const pe_resource_t *resource1 = (const pe_resource_t *)a;
const pe_resource_t *resource2 = (const pe_resource_t *)b;
if (a == NULL && b == NULL) {
return 0;
}
if (a == NULL) {
return 1;
}
if (b == NULL) {
return -1;
}
if (resource1->priority > resource2->priority) {
return -1;
}
if (resource1->priority < resource2->priority) {
return 1;
}
return 0;
}
static enum pe_quorum_policy
effective_quorum_policy(pe_resource_t *rsc, pe_working_set_t *data_set)
{
enum pe_quorum_policy policy = data_set->no_quorum_policy;
if (is_set(data_set->flags, pe_flag_have_quorum)) {
policy = no_quorum_ignore;
} else if (data_set->no_quorum_policy == no_quorum_demote) {
switch (rsc->role) {
case RSC_ROLE_MASTER:
case RSC_ROLE_SLAVE:
if (rsc->next_role > RSC_ROLE_SLAVE) {
rsc->next_role = RSC_ROLE_SLAVE;
}
policy = no_quorum_ignore;
break;
default:
policy = no_quorum_stop;
break;
}
}
return policy;
}
pe_action_t *
custom_action(pe_resource_t * rsc, char *key, const char *task,
pe_node_t * on_node, gboolean optional, gboolean save_action,
pe_working_set_t * data_set)
{
pe_action_t *action = NULL;
GListPtr possible_matches = NULL;
CRM_CHECK(key != NULL, return NULL);
CRM_CHECK(task != NULL, free(key); return NULL);
if (save_action && rsc != NULL) {
possible_matches = find_actions(rsc->actions, key, on_node);
} else if(save_action) {
#if 0
action = g_hash_table_lookup(data_set->singletons, key);
#else
/* More expensive but takes 'node' into account */
possible_matches = find_actions(data_set->actions, key, on_node);
#endif
}
if(data_set->singletons == NULL) {
data_set->singletons = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, NULL);
}
if (possible_matches != NULL) {
if (pcmk__list_of_multiple(possible_matches)) {
pe_warn("Action %s for %s on %s exists %d times",
task, rsc ? rsc->id : "<NULL>",
on_node ? on_node->details->uname : "<NULL>", g_list_length(possible_matches));
}
action = g_list_nth_data(possible_matches, 0);
pe_rsc_trace(rsc, "Found existing action %d (%s) for %s (%s) on %s",
action->id, action->uuid,
(rsc? rsc->id : "no resource"), task,
(on_node? on_node->details->uname : "no node"));
g_list_free(possible_matches);
}
if (action == NULL) {
if (save_action) {
pe_rsc_trace(rsc, "Creating %s action %d: %s for %s (%s) on %s",
(optional? "optional" : "mandatory"),
data_set->action_id, key,
(rsc? rsc->id : "no resource"), task,
(on_node? on_node->details->uname : "no node"));
}
action = calloc(1, sizeof(pe_action_t));
if (save_action) {
action->id = data_set->action_id++;
} else {
action->id = 0;
}
action->rsc = rsc;
CRM_ASSERT(task != NULL);
action->task = strdup(task);
if (on_node) {
action->node = pe__copy_node(on_node);
}
action->uuid = strdup(key);
if (safe_str_eq(task, CRM_OP_LRM_DELETE)) {
// Resource history deletion for a node can be done on the DC
pe_set_action_bit(action, pe_action_dc);
}
pe_set_action_bit(action, pe_action_runnable);
if (optional) {
pe_set_action_bit(action, pe_action_optional);
} else {
pe_clear_action_bit(action, pe_action_optional);
}
action->extra = crm_str_table_new();
action->meta = crm_str_table_new();
if (save_action) {
data_set->actions = g_list_prepend(data_set->actions, action);
if(rsc == NULL) {
g_hash_table_insert(data_set->singletons, action->uuid, action);
}
}
if (rsc != NULL) {
guint interval_ms = 0;
action->op_entry = find_rsc_op_entry_helper(rsc, key, TRUE);
parse_op_key(key, NULL, NULL, &interval_ms);
unpack_operation(action, action->op_entry, rsc->container, data_set,
interval_ms);
if (save_action) {
rsc->actions = g_list_prepend(rsc->actions, action);
}
}
if (save_action) {
pe_rsc_trace(rsc, "Action %d created", action->id);
}
}
if (!optional && is_set(action->flags, pe_action_optional)) {
pe_rsc_trace(rsc, "Unset optional on action %d", action->id);
pe_clear_action_bit(action, pe_action_optional);
}
if (rsc != NULL) {
enum action_tasks a_task = text2task(action->task);
enum pe_quorum_policy quorum_policy = effective_quorum_policy(rsc, data_set);
int warn_level = LOG_TRACE;
if (save_action) {
warn_level = LOG_WARNING;
}
if (is_set(action->flags, pe_action_have_node_attrs) == FALSE
&& action->node != NULL && action->op_entry != NULL) {
pe_rule_eval_data_t rule_data = {
.node_hash = action->node->details->attrs,
.role = RSC_ROLE_UNKNOWN,
.now = data_set->now,
.match_data = NULL,
.rsc_data = NULL,
.op_data = NULL
};
pe_set_action_bit(action, pe_action_have_node_attrs);
pe__unpack_dataset_nvpairs(action->op_entry, XML_TAG_ATTR_SETS,
&rule_data, action->extra, NULL,
FALSE, data_set);
}
if (is_set(action->flags, pe_action_pseudo)) {
/* leave untouched */
} else if (action->node == NULL) {
pe_rsc_trace(rsc, "Unset runnable on %s", action->uuid);
pe_clear_action_bit(action, pe_action_runnable);
} else if (is_not_set(rsc->flags, pe_rsc_managed)
&& g_hash_table_lookup(action->meta,
XML_LRM_ATTR_INTERVAL_MS) == NULL) {
crm_debug("Action %s (unmanaged)", action->uuid);
pe_rsc_trace(rsc, "Set optional on %s", action->uuid);
pe_set_action_bit(action, pe_action_optional);
/* action->runnable = FALSE; */
} else if (is_not_set(action->flags, pe_action_dc)
&& !(action->node->details->online)
&& (!pe__is_guest_node(action->node)
|| action->node->details->remote_requires_reset)) {
pe_clear_action_bit(action, pe_action_runnable);
do_crm_log(warn_level, "Action %s on %s is unrunnable (offline)",
action->uuid, action->node->details->uname);
if (is_set(action->rsc->flags, pe_rsc_managed)
&& save_action && a_task == stop_rsc
&& action->node->details->unclean == FALSE) {
pe_fence_node(data_set, action->node, "resource actions are unrunnable", FALSE);
}
} else if (is_not_set(action->flags, pe_action_dc)
&& action->node->details->pending) {
pe_clear_action_bit(action, pe_action_runnable);
do_crm_log(warn_level, "Action %s on %s is unrunnable (pending)",
action->uuid, action->node->details->uname);
} else if (action->needs == rsc_req_nothing) {
pe_rsc_trace(rsc, "Action %s does not require anything", action->uuid);
pe_action_set_reason(action, NULL, TRUE);
if (pe__is_guest_node(action->node)
&& !pe_can_fence(data_set, action->node)) {
/* An action that requires nothing usually does not require any
* fencing in order to be runnable. However, there is an
* exception: an action cannot be completed if it is on a guest
* node whose host is unclean and cannot be fenced.
*/
pe_clear_action_bit(action, pe_action_runnable);
crm_debug("%s\t%s (cancelled : host cannot be fenced)",
action->node->details->uname, action->uuid);
} else {
pe_set_action_bit(action, pe_action_runnable);
}
#if 0
/*
* No point checking this
* - if we don't have quorum we can't stonith anyway
*/
} else if (action->needs == rsc_req_stonith) {
crm_trace("Action %s requires only stonith", action->uuid);
action->runnable = TRUE;
#endif
} else if (quorum_policy == no_quorum_stop) {
pe_action_set_flag_reason(__FUNCTION__, __LINE__, action, NULL, "no quorum", pe_action_runnable, TRUE);
crm_debug("%s\t%s (cancelled : quorum)", action->node->details->uname, action->uuid);
} else if (quorum_policy == no_quorum_freeze) {
pe_rsc_trace(rsc, "Check resource is already active: %s %s %s %s", rsc->id, action->uuid, role2text(rsc->next_role), role2text(rsc->role));
if (rsc->fns->active(rsc, TRUE) == FALSE || rsc->next_role > rsc->role) {
pe_action_set_flag_reason(__FUNCTION__, __LINE__, action, NULL, "quorum freeze", pe_action_runnable, TRUE);
pe_rsc_debug(rsc, "%s\t%s (cancelled : quorum freeze)",
action->node->details->uname, action->uuid);
}
} else if(is_not_set(action->flags, pe_action_runnable)) {
pe_rsc_trace(rsc, "Action %s is runnable", action->uuid);
//pe_action_set_reason(action, NULL, TRUE);
pe_set_action_bit(action, pe_action_runnable);
}
if (save_action) {
switch (a_task) {
case stop_rsc:
set_bit(rsc->flags, pe_rsc_stopping);
break;
case start_rsc:
clear_bit(rsc->flags, pe_rsc_starting);
if (is_set(action->flags, pe_action_runnable)) {
set_bit(rsc->flags, pe_rsc_starting);
}
break;
default:
break;
}
}
}
free(key);
return action;
}
static bool
valid_stop_on_fail(const char *value)
{
- return pcmk__str_none_of(value, "standby", "demote", "stop", NULL);
+ return !pcmk__str_any_of(value, "standby", "demote", "stop", NULL);
}
static const char *
unpack_operation_on_fail(pe_action_t * action)
{
const char *name = NULL;
const char *role = NULL;
const char *on_fail = NULL;
const char *interval_spec = NULL;
const char *enabled = NULL;
const char *value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ON_FAIL);
if (safe_str_eq(action->task, CRMD_ACTION_STOP)
&& !valid_stop_on_fail(value)) {
pcmk__config_err("Resetting '" XML_OP_ATTR_ON_FAIL "' for %s stop "
"action to default value because '%s' is not "
"allowed for stop", action->rsc->id, value);
return NULL;
} else if (safe_str_eq(action->task, CRMD_ACTION_DEMOTE) && !value) {
/* demote on_fail defaults to master monitor value if present */
xmlNode *operation = NULL;
CRM_CHECK(action->rsc != NULL, return NULL);
for (operation = __xml_first_child_element(action->rsc->ops_xml);
operation && !value; operation = __xml_next_element(operation)) {
if (!crm_str_eq((const char *)operation->name, "op", TRUE)) {
continue;
}
name = crm_element_value(operation, "name");
role = crm_element_value(operation, "role");
on_fail = crm_element_value(operation, XML_OP_ATTR_ON_FAIL);
enabled = crm_element_value(operation, "enabled");
interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL);
if (!on_fail) {
continue;
} else if (enabled && !crm_is_true(enabled)) {
continue;
} else if (safe_str_neq(name, "monitor") || safe_str_neq(role, "Master")) {
continue;
} else if (crm_parse_interval_spec(interval_spec) == 0) {
continue;
} else if (safe_str_eq(on_fail, "demote")) {
continue;
}
value = on_fail;
}
} else if (safe_str_eq(action->task, CRM_OP_LRM_DELETE)) {
value = "ignore";
} else if (safe_str_eq(value, "demote")) {
name = crm_element_value(action->op_entry, "name");
role = crm_element_value(action->op_entry, "role");
on_fail = crm_element_value(action->op_entry, XML_OP_ATTR_ON_FAIL);
interval_spec = crm_element_value(action->op_entry,
XML_LRM_ATTR_INTERVAL);
if (safe_str_neq(name, CRMD_ACTION_PROMOTE)
&& (safe_str_neq(name, CRMD_ACTION_STATUS)
|| safe_str_neq(role, "Master")
|| (crm_parse_interval_spec(interval_spec) == 0))) {
pcmk__config_err("Resetting '" XML_OP_ATTR_ON_FAIL "' for %s %s "
"action to default value because 'demote' is not "
"allowed for it", action->rsc->id, name);
return NULL;
}
}
return value;
}
static xmlNode *
find_min_interval_mon(pe_resource_t * rsc, gboolean include_disabled)
{
guint interval_ms = 0;
guint min_interval_ms = G_MAXUINT;
const char *name = NULL;
const char *value = NULL;
const char *interval_spec = NULL;
xmlNode *op = NULL;
xmlNode *operation = NULL;
for (operation = __xml_first_child_element(rsc->ops_xml); operation != NULL;
operation = __xml_next_element(operation)) {
if (crm_str_eq((const char *)operation->name, "op", TRUE)) {
name = crm_element_value(operation, "name");
interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL);
value = crm_element_value(operation, "enabled");
if (!include_disabled && value && crm_is_true(value) == FALSE) {
continue;
}
if (safe_str_neq(name, RSC_STATUS)) {
continue;
}
interval_ms = crm_parse_interval_spec(interval_spec);
if (interval_ms && (interval_ms < min_interval_ms)) {
min_interval_ms = interval_ms;
op = operation;
}
}
}
return op;
}
static int
unpack_start_delay(const char *value, GHashTable *meta)
{
int start_delay = 0;
if (value != NULL) {
start_delay = crm_get_msec(value);
if (start_delay < 0) {
start_delay = 0;
}
if (meta) {
g_hash_table_replace(meta, strdup(XML_OP_ATTR_START_DELAY), crm_itoa(start_delay));
}
}
return start_delay;
}
// true if value contains valid, non-NULL interval origin for recurring op
static bool
unpack_interval_origin(const char *value, xmlNode *xml_obj, guint interval_ms,
crm_time_t *now, long long *start_delay)
{
long long result = 0;
guint interval_sec = interval_ms / 1000;
crm_time_t *origin = NULL;
// Ignore unspecified values and non-recurring operations
if ((value == NULL) || (interval_ms == 0) || (now == NULL)) {
return false;
}
// Parse interval origin from text
origin = crm_time_new(value);
if (origin == NULL) {
pcmk__config_err("Ignoring '" XML_OP_ATTR_ORIGIN "' for operation "
"'%s' because '%s' is not valid",
(ID(xml_obj)? ID(xml_obj) : "(missing ID)"), value);
return false;
}
// Get seconds since origin (negative if origin is in the future)
result = crm_time_get_seconds(now) - crm_time_get_seconds(origin);
crm_time_free(origin);
// Calculate seconds from closest interval to now
result = result % interval_sec;
// Calculate seconds remaining until next interval
result = ((result <= 0)? 0 : interval_sec) - result;
crm_info("Calculated a start delay of %llds for operation '%s'",
result,
(ID(xml_obj)? ID(xml_obj) : "(unspecified)"));
if (start_delay != NULL) {
*start_delay = result * 1000; // milliseconds
}
return true;
}
static int
unpack_timeout(const char *value)
{
int timeout = crm_get_msec(value);
if (timeout < 0) {
timeout = crm_get_msec(CRM_DEFAULT_OP_TIMEOUT_S);
}
return timeout;
}
int
pe_get_configured_timeout(pe_resource_t *rsc, const char *action, pe_working_set_t *data_set)
{
xmlNode *child = NULL;
const char *timeout = NULL;
GHashTable *action_meta = NULL;
int timeout_ms = 0;
pe_rule_eval_data_t rule_data = {
.node_hash = NULL,
.role = RSC_ROLE_UNKNOWN,
.now = data_set->now,
.match_data = NULL,
.rsc_data = NULL,
.op_data = NULL
};
for (child = first_named_child(rsc->ops_xml, XML_ATTR_OP);
child != NULL; child = crm_next_same_xml(child)) {
if (safe_str_eq(action, crm_element_value(child, XML_NVPAIR_ATTR_NAME))) {
timeout = crm_element_value(child, XML_ATTR_TIMEOUT);
break;
}
}
if (timeout == NULL && data_set->op_defaults) {
action_meta = crm_str_table_new();
pe__unpack_dataset_nvpairs(data_set->op_defaults, XML_TAG_META_SETS,
&rule_data, action_meta, NULL, FALSE, data_set);
timeout = g_hash_table_lookup(action_meta, XML_ATTR_TIMEOUT);
}
// @TODO check meta-attributes (including versioned meta-attributes)
// @TODO maybe use min-interval monitor timeout as default for monitors
timeout_ms = crm_get_msec(timeout);
if (timeout_ms < 0) {
timeout_ms = crm_get_msec(CRM_DEFAULT_OP_TIMEOUT_S);
}
if (action_meta != NULL) {
g_hash_table_destroy(action_meta);
}
return timeout_ms;
}
#if ENABLE_VERSIONED_ATTRS
static void
unpack_versioned_meta(xmlNode *versioned_meta, xmlNode *xml_obj,
guint interval_ms, crm_time_t *now)
{
xmlNode *attrs = NULL;
xmlNode *attr = NULL;
for (attrs = __xml_first_child_element(versioned_meta); attrs != NULL;
attrs = __xml_next_element(attrs)) {
for (attr = __xml_first_child_element(attrs); attr != NULL;
attr = __xml_next_element(attr)) {
const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME);
const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE);
if (safe_str_eq(name, XML_OP_ATTR_START_DELAY)) {
int start_delay = unpack_start_delay(value, NULL);
crm_xml_add_int(attr, XML_NVPAIR_ATTR_VALUE, start_delay);
} else if (safe_str_eq(name, XML_OP_ATTR_ORIGIN)) {
long long start_delay = 0;
if (unpack_interval_origin(value, xml_obj, interval_ms, now,
&start_delay)) {
crm_xml_add(attr, XML_NVPAIR_ATTR_NAME,
XML_OP_ATTR_START_DELAY);
crm_xml_add_ll(attr, XML_NVPAIR_ATTR_VALUE, start_delay);
}
} else if (safe_str_eq(name, XML_ATTR_TIMEOUT)) {
int timeout = unpack_timeout(value);
crm_xml_add_int(attr, XML_NVPAIR_ATTR_VALUE, timeout);
}
}
}
}
#endif
/*!
* \brief Unpack operation XML into an action structure
*
* Unpack an operation's meta-attributes (normalizing the interval, timeout,
* and start delay values as integer milliseconds), requirements, and
* failure policy.
*
* \param[in,out] action Action to unpack into
* \param[in] xml_obj Operation XML (or NULL if all defaults)
* \param[in] container Resource that contains affected resource, if any
* \param[in] data_set Cluster state
* \param[in] interval_ms How frequently to perform the operation
*/
static void
unpack_operation(pe_action_t * action, xmlNode * xml_obj, pe_resource_t * container,
pe_working_set_t * data_set, guint interval_ms)
{
int timeout = 0;
char *value_ms = NULL;
const char *value = NULL;
const char *field = XML_LRM_ATTR_INTERVAL;
char *default_timeout = NULL;
#if ENABLE_VERSIONED_ATTRS
pe_rsc_action_details_t *rsc_details = NULL;
#endif
pe_rsc_eval_data_t rsc_rule_data = {
.standard = crm_element_value(action->rsc->xml, XML_AGENT_ATTR_CLASS),
.provider = crm_element_value(action->rsc->xml, XML_AGENT_ATTR_PROVIDER),
.agent = crm_element_value(action->rsc->xml, XML_EXPR_ATTR_TYPE)
};
pe_op_eval_data_t op_rule_data = {
.op_name = action->task,
.interval = interval_ms
};
pe_rule_eval_data_t rule_data = {
.node_hash = NULL,
.role = RSC_ROLE_UNKNOWN,
.now = data_set->now,
.match_data = NULL,
.rsc_data = &rsc_rule_data,
.op_data = &op_rule_data
};
CRM_CHECK(action && action->rsc, return);
// Cluster-wide <op_defaults> <meta_attributes>
pe__unpack_dataset_nvpairs(data_set->op_defaults, XML_TAG_META_SETS, &rule_data,
action->meta, NULL, FALSE, data_set);
// Probe timeouts default differently, so handle timeout default later
default_timeout = g_hash_table_lookup(action->meta, XML_ATTR_TIMEOUT);
if (default_timeout) {
default_timeout = strdup(default_timeout);
g_hash_table_remove(action->meta, XML_ATTR_TIMEOUT);
}
if (xml_obj) {
xmlAttrPtr xIter = NULL;
// <op> <meta_attributes> take precedence over defaults
pe__unpack_dataset_nvpairs(xml_obj, XML_TAG_META_SETS, &rule_data,
action->meta, NULL, TRUE, data_set);
#if ENABLE_VERSIONED_ATTRS
rsc_details = pe_rsc_action_details(action);
pe_eval_versioned_attributes(data_set->input, xml_obj,
XML_TAG_ATTR_SETS, &rule_data,
rsc_details->versioned_parameters,
NULL);
pe_eval_versioned_attributes(data_set->input, xml_obj,
XML_TAG_META_SETS, &rule_data,
rsc_details->versioned_meta,
NULL);
#endif
/* Anything set as an <op> XML property has highest precedence.
* This ensures we use the name and interval from the <op> tag.
*/
for (xIter = xml_obj->properties; xIter; xIter = xIter->next) {
const char *prop_name = (const char *)xIter->name;
const char *prop_value = crm_element_value(xml_obj, prop_name);
g_hash_table_replace(action->meta, strdup(prop_name), strdup(prop_value));
}
}
g_hash_table_remove(action->meta, "id");
// Normalize interval to milliseconds
if (interval_ms > 0) {
value_ms = crm_strdup_printf("%u", interval_ms);
g_hash_table_replace(action->meta, strdup(field), value_ms);
} else if (g_hash_table_lookup(action->meta, field) != NULL) {
g_hash_table_remove(action->meta, field);
}
// Handle timeout default, now that we know the interval
if (g_hash_table_lookup(action->meta, XML_ATTR_TIMEOUT)) {
free(default_timeout);
} else {
// Probe timeouts default to minimum-interval monitor's
if (safe_str_eq(action->task, RSC_STATUS) && (interval_ms == 0)) {
xmlNode *min_interval_mon = find_min_interval_mon(action->rsc, FALSE);
if (min_interval_mon) {
value = crm_element_value(min_interval_mon, XML_ATTR_TIMEOUT);
if (value) {
crm_trace("\t%s defaults to minimum-interval monitor's timeout '%s'",
action->uuid, value);
free(default_timeout);
default_timeout = strdup(value);
}
}
}
if (default_timeout) {
g_hash_table_insert(action->meta, strdup(XML_ATTR_TIMEOUT),
default_timeout);
}
}
- if (pcmk__str_none_of(action->task, RSC_START, RSC_PROMOTE, NULL)) {
+ if (!pcmk__str_any_of(action->task, RSC_START, RSC_PROMOTE, NULL)) {
action->needs = rsc_req_nothing;
value = "nothing (not start/promote)";
} else if (is_set(action->rsc->flags, pe_rsc_needs_fencing)) {
action->needs = rsc_req_stonith;
value = "fencing (resource)";
} else if (is_set(action->rsc->flags, pe_rsc_needs_quorum)) {
action->needs = rsc_req_quorum;
value = "quorum (resource)";
} else {
action->needs = rsc_req_nothing;
value = "nothing (resource)";
}
pe_rsc_trace(action->rsc, "\tAction %s requires: %s", action->uuid, value);
value = unpack_operation_on_fail(action);
if (value == NULL) {
} else if (safe_str_eq(value, "block")) {
action->on_fail = action_fail_block;
g_hash_table_insert(action->meta, strdup(XML_OP_ATTR_ON_FAIL), strdup("block"));
value = "block"; // The above could destroy the original string
} else if (safe_str_eq(value, "fence")) {
action->on_fail = action_fail_fence;
value = "node fencing";
if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) {
pcmk__config_err("Resetting '" XML_OP_ATTR_ON_FAIL "' for "
"operation '%s' to 'stop' because 'fence' is not "
"valid when fencing is disabled", action->uuid);
action->on_fail = action_fail_stop;
action->fail_role = RSC_ROLE_STOPPED;
value = "stop resource";
}
} else if (safe_str_eq(value, "standby")) {
action->on_fail = action_fail_standby;
value = "node standby";
} else if (pcmk__str_any_of(value, "ignore", "nothing", NULL)) {
action->on_fail = action_fail_ignore;
value = "ignore";
} else if (safe_str_eq(value, "migrate")) {
action->on_fail = action_fail_migrate;
value = "force migration";
} else if (safe_str_eq(value, "stop")) {
action->on_fail = action_fail_stop;
action->fail_role = RSC_ROLE_STOPPED;
value = "stop resource";
} else if (safe_str_eq(value, "restart")) {
action->on_fail = action_fail_recover;
value = "restart (and possibly migrate)";
} else if (safe_str_eq(value, "restart-container")) {
if (container) {
action->on_fail = action_fail_restart_container;
value = "restart container (and possibly migrate)";
} else {
value = NULL;
}
} else if (safe_str_eq(value, "demote")) {
action->on_fail = action_fail_demote;
value = "demote instance";
} else {
pe_err("Resource %s: Unknown failure type (%s)", action->rsc->id, value);
value = NULL;
}
/* defaults */
if (value == NULL && container) {
action->on_fail = action_fail_restart_container;
value = "restart container (and possibly migrate) (default)";
/* For remote nodes, ensure that any failure that results in dropping an
* active connection to the node results in fencing of the node.
*
* There are only two action failures that don't result in fencing.
* 1. probes - probe failures are expected.
* 2. start - a start failure indicates that an active connection does not already
* exist. The user can set op on-fail=fence if they really want to fence start
* failures. */
} else if (((value == NULL) || !is_set(action->rsc->flags, pe_rsc_managed)) &&
(pe__resource_is_remote_conn(action->rsc, data_set) &&
!(safe_str_eq(action->task, CRMD_ACTION_STATUS) && (interval_ms == 0)) &&
(safe_str_neq(action->task, CRMD_ACTION_START)))) {
if (!is_set(action->rsc->flags, pe_rsc_managed)) {
action->on_fail = action_fail_stop;
action->fail_role = RSC_ROLE_STOPPED;
value = "stop unmanaged remote node (enforcing default)";
} else {
if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
value = "fence remote node (default)";
} else {
value = "recover remote node connection (default)";
}
if (action->rsc->remote_reconnect_ms) {
action->fail_role = RSC_ROLE_STOPPED;
}
action->on_fail = action_fail_reset_remote;
}
} else if (value == NULL && safe_str_eq(action->task, CRMD_ACTION_STOP)) {
if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
action->on_fail = action_fail_fence;
value = "resource fence (default)";
} else {
action->on_fail = action_fail_block;
value = "resource block (default)";
}
} else if (value == NULL) {
action->on_fail = action_fail_recover;
value = "restart (and possibly migrate) (default)";
}
pe_rsc_trace(action->rsc, "\t%s failure handling: %s", action->task, value);
value = NULL;
if (xml_obj != NULL) {
value = g_hash_table_lookup(action->meta, "role_after_failure");
if (value) {
pe_warn_once(pe_wo_role_after,
"Support for role_after_failure is deprecated and will be removed in a future release");
}
}
if (value != NULL && action->fail_role == RSC_ROLE_UNKNOWN) {
action->fail_role = text2role(value);
}
/* defaults */
if (action->fail_role == RSC_ROLE_UNKNOWN) {
if (safe_str_eq(action->task, CRMD_ACTION_PROMOTE)) {
action->fail_role = RSC_ROLE_SLAVE;
} else {
action->fail_role = RSC_ROLE_STARTED;
}
}
pe_rsc_trace(action->rsc, "\t%s failure results in: %s", action->task,
role2text(action->fail_role));
value = g_hash_table_lookup(action->meta, XML_OP_ATTR_START_DELAY);
if (value) {
unpack_start_delay(value, action->meta);
} else {
long long start_delay = 0;
value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ORIGIN);
if (unpack_interval_origin(value, xml_obj, interval_ms, data_set->now,
&start_delay)) {
g_hash_table_replace(action->meta, strdup(XML_OP_ATTR_START_DELAY),
crm_strdup_printf("%lld", start_delay));
}
}
value = g_hash_table_lookup(action->meta, XML_ATTR_TIMEOUT);
timeout = unpack_timeout(value);
g_hash_table_replace(action->meta, strdup(XML_ATTR_TIMEOUT), crm_itoa(timeout));
#if ENABLE_VERSIONED_ATTRS
unpack_versioned_meta(rsc_details->versioned_meta, xml_obj, interval_ms,
data_set->now);
#endif
}
static xmlNode *
find_rsc_op_entry_helper(pe_resource_t * rsc, const char *key, gboolean include_disabled)
{
guint interval_ms = 0;
gboolean do_retry = TRUE;
char *local_key = NULL;
const char *name = NULL;
const char *value = NULL;
const char *interval_spec = NULL;
char *match_key = NULL;
xmlNode *op = NULL;
xmlNode *operation = NULL;
retry:
for (operation = __xml_first_child_element(rsc->ops_xml); operation != NULL;
operation = __xml_next_element(operation)) {
if (crm_str_eq((const char *)operation->name, "op", TRUE)) {
name = crm_element_value(operation, "name");
interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL);
value = crm_element_value(operation, "enabled");
if (!include_disabled && value && crm_is_true(value) == FALSE) {
continue;
}
interval_ms = crm_parse_interval_spec(interval_spec);
match_key = pcmk__op_key(rsc->id, name, interval_ms);
if (safe_str_eq(key, match_key)) {
op = operation;
}
free(match_key);
if (rsc->clone_name) {
match_key = pcmk__op_key(rsc->clone_name, name, interval_ms);
if (safe_str_eq(key, match_key)) {
op = operation;
}
free(match_key);
}
if (op != NULL) {
free(local_key);
return op;
}
}
}
free(local_key);
if (do_retry == FALSE) {
return NULL;
}
do_retry = FALSE;
if (strstr(key, CRMD_ACTION_MIGRATE) || strstr(key, CRMD_ACTION_MIGRATED)) {
local_key = pcmk__op_key(rsc->id, "migrate", 0);
key = local_key;
goto retry;
} else if (strstr(key, "_notify_")) {
local_key = pcmk__op_key(rsc->id, "notify", 0);
key = local_key;
goto retry;
}
return NULL;
}
xmlNode *
find_rsc_op_entry(pe_resource_t * rsc, const char *key)
{
return find_rsc_op_entry_helper(rsc, key, FALSE);
}
void
print_node(const char *pre_text, pe_node_t * node, gboolean details)
{
if (node == NULL) {
crm_trace("%s%s: <NULL>", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": ");
return;
}
CRM_ASSERT(node->details);
crm_trace("%s%s%sNode %s: (weight=%d, fixed=%s)",
pre_text == NULL ? "" : pre_text,
pre_text == NULL ? "" : ": ",
node->details->online ? "" : "Unavailable/Unclean ",
node->details->uname, node->weight, node->fixed ? "True" : "False");
if (details) {
int log_level = LOG_TRACE;
char *pe_mutable = strdup("\t\t");
GListPtr gIter = node->details->running_rsc;
crm_trace("\t\t===Node Attributes");
g_hash_table_foreach(node->details->attrs, print_str_str, pe_mutable);
free(pe_mutable);
crm_trace("\t\t=== Resources");
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *rsc = (pe_resource_t *) gIter->data;
rsc->fns->print(rsc, "\t\t", pe_print_log|pe_print_pending,
&log_level);
}
}
}
/*
* Used by the HashTable for-loop
*/
void
print_str_str(gpointer key, gpointer value, gpointer user_data)
{
crm_trace("%s%s %s ==> %s",
user_data == NULL ? "" : (char *)user_data,
user_data == NULL ? "" : ": ", (char *)key, (char *)value);
}
void
pe_free_action(pe_action_t * action)
{
if (action == NULL) {
return;
}
g_list_free_full(action->actions_before, free); /* pe_action_wrapper_t* */
g_list_free_full(action->actions_after, free); /* pe_action_wrapper_t* */
if (action->extra) {
g_hash_table_destroy(action->extra);
}
if (action->meta) {
g_hash_table_destroy(action->meta);
}
#if ENABLE_VERSIONED_ATTRS
if (action->rsc) {
pe_free_rsc_action_details(action);
}
#endif
free(action->cancel_task);
free(action->reason);
free(action->task);
free(action->uuid);
free(action->node);
free(action);
}
GListPtr
find_recurring_actions(GListPtr input, pe_node_t * not_on_node)
{
const char *value = NULL;
GListPtr result = NULL;
GListPtr gIter = input;
CRM_CHECK(input != NULL, return NULL);
for (; gIter != NULL; gIter = gIter->next) {
pe_action_t *action = (pe_action_t *) gIter->data;
value = g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL_MS);
if (value == NULL) {
/* skip */
} else if (safe_str_eq(value, "0")) {
/* skip */
} else if (safe_str_eq(CRMD_ACTION_CANCEL, action->task)) {
/* skip */
} else if (not_on_node == NULL) {
crm_trace("(null) Found: %s", action->uuid);
result = g_list_prepend(result, action);
} else if (action->node == NULL) {
/* skip */
} else if (action->node->details != not_on_node->details) {
crm_trace("Found: %s", action->uuid);
result = g_list_prepend(result, action);
}
}
return result;
}
enum action_tasks
get_complex_task(pe_resource_t * rsc, const char *name, gboolean allow_non_atomic)
{
enum action_tasks task = text2task(name);
if (rsc == NULL) {
return task;
} else if (allow_non_atomic == FALSE || rsc->variant == pe_native) {
switch (task) {
case stopped_rsc:
case started_rsc:
case action_demoted:
case action_promoted:
crm_trace("Folding %s back into its atomic counterpart for %s", name, rsc->id);
return task - 1;
default:
break;
}
}
return task;
}
pe_action_t *
find_first_action(GListPtr input, const char *uuid, const char *task, pe_node_t * on_node)
{
GListPtr gIter = NULL;
CRM_CHECK(uuid || task, return NULL);
for (gIter = input; gIter != NULL; gIter = gIter->next) {
pe_action_t *action = (pe_action_t *) gIter->data;
if (uuid != NULL && safe_str_neq(uuid, action->uuid)) {
continue;
} else if (task != NULL && safe_str_neq(task, action->task)) {
continue;
} else if (on_node == NULL) {
return action;
} else if (action->node == NULL) {
continue;
} else if (on_node->details == action->node->details) {
return action;
}
}
return NULL;
}
GListPtr
find_actions(GListPtr input, const char *key, const pe_node_t *on_node)
{
GListPtr gIter = input;
GListPtr result = NULL;
CRM_CHECK(key != NULL, return NULL);
for (; gIter != NULL; gIter = gIter->next) {
pe_action_t *action = (pe_action_t *) gIter->data;
if (safe_str_neq(key, action->uuid)) {
crm_trace("%s does not match action %s", key, action->uuid);
continue;
} else if (on_node == NULL) {
crm_trace("Action %s matches (ignoring node)", key);
result = g_list_prepend(result, action);
} else if (action->node == NULL) {
crm_trace("Action %s matches (unallocated, assigning to %s)",
key, on_node->details->uname);
action->node = pe__copy_node(on_node);
result = g_list_prepend(result, action);
} else if (on_node->details == action->node->details) {
crm_trace("Action %s on %s matches", key, on_node->details->uname);
result = g_list_prepend(result, action);
} else {
crm_trace("Action %s on node %s does not match requested node %s",
key, action->node->details->uname,
on_node->details->uname);
}
}
return result;
}
GList *
find_actions_exact(GList *input, const char *key, const pe_node_t *on_node)
{
GList *result = NULL;
CRM_CHECK(key != NULL, return NULL);
if (on_node == NULL) {
crm_trace("Not searching for action %s because node not specified",
key);
return NULL;
}
for (GList *gIter = input; gIter != NULL; gIter = gIter->next) {
pe_action_t *action = (pe_action_t *) gIter->data;
if (action->node == NULL) {
crm_trace("Skipping comparison of %s vs action %s without node",
key, action->uuid);
} else if (safe_str_neq(key, action->uuid)) {
crm_trace("Desired action %s doesn't match %s", key, action->uuid);
} else if (safe_str_neq(on_node->details->id,
action->node->details->id)) {
crm_trace("Action %s desired node ID %s doesn't match %s",
key, on_node->details->id, action->node->details->id);
} else {
crm_trace("Action %s matches", key);
result = g_list_prepend(result, action);
}
}
return result;
}
/*!
* \brief Find all actions of given type for a resource
*
* \param[in] rsc Resource to search
* \param[in] node Find only actions scheduled on this node
* \param[in] task Action name to search for
* \param[in] require_node If TRUE, NULL node or action node will not match
*
* \return List of actions found (or NULL if none)
* \note If node is not NULL and require_node is FALSE, matching actions
* without a node will be assigned to node.
*/
GList *
pe__resource_actions(const pe_resource_t *rsc, const pe_node_t *node,
const char *task, bool require_node)
{
GList *result = NULL;
char *key = pcmk__op_key(rsc->id, task, 0);
if (require_node) {
result = find_actions_exact(rsc->actions, key, node);
} else {
result = find_actions(rsc->actions, key, node);
}
free(key);
return result;
}
static void
resource_node_score(pe_resource_t * rsc, pe_node_t * node, int score, const char *tag)
{
pe_node_t *match = NULL;
if ((rsc->exclusive_discover || (node->rsc_discover_mode == pe_discover_never))
&& safe_str_eq(tag, "symmetric_default")) {
/* This string comparision may be fragile, but exclusive resources and
* exclusive nodes should not have the symmetric_default constraint
* applied to them.
*/
return;
} else if (rsc->children) {
GListPtr gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
resource_node_score(child_rsc, node, score, tag);
}
}
pe_rsc_trace(rsc, "Setting %s for %s on %s: %d", tag, rsc->id, node->details->uname, score);
match = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id);
if (match == NULL) {
match = pe__copy_node(node);
g_hash_table_insert(rsc->allowed_nodes, (gpointer) match->details->id, match);
}
match->weight = pe__add_scores(match->weight, score);
}
void
resource_location(pe_resource_t * rsc, pe_node_t * node, int score, const char *tag,
pe_working_set_t * data_set)
{
if (node != NULL) {
resource_node_score(rsc, node, score, tag);
} else if (data_set != NULL) {
GListPtr gIter = data_set->nodes;
for (; gIter != NULL; gIter = gIter->next) {
pe_node_t *node_iter = (pe_node_t *) gIter->data;
resource_node_score(rsc, node_iter, score, tag);
}
} else {
GHashTableIter iter;
pe_node_t *node_iter = NULL;
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node_iter)) {
resource_node_score(rsc, node_iter, score, tag);
}
}
if (node == NULL && score == -INFINITY) {
if (rsc->allocated_to) {
crm_info("Deallocating %s from %s", rsc->id, rsc->allocated_to->details->uname);
free(rsc->allocated_to);
rsc->allocated_to = NULL;
}
}
}
#define sort_return(an_int, why) do { \
free(a_uuid); \
free(b_uuid); \
crm_trace("%s (%d) %c %s (%d) : %s", \
a_xml_id, a_call_id, an_int>0?'>':an_int<0?'<':'=', \
b_xml_id, b_call_id, why); \
return an_int; \
} while(0)
gint
sort_op_by_callid(gconstpointer a, gconstpointer b)
{
int a_call_id = -1;
int b_call_id = -1;
char *a_uuid = NULL;
char *b_uuid = NULL;
const xmlNode *xml_a = a;
const xmlNode *xml_b = b;
const char *a_xml_id = crm_element_value(xml_a, XML_ATTR_ID);
const char *b_xml_id = crm_element_value(xml_b, XML_ATTR_ID);
if (safe_str_eq(a_xml_id, b_xml_id)) {
/* We have duplicate lrm_rsc_op entries in the status
* section which is unlikely to be a good thing
* - we can handle it easily enough, but we need to get
* to the bottom of why it's happening.
*/
pe_err("Duplicate lrm_rsc_op entries named %s", a_xml_id);
sort_return(0, "duplicate");
}
crm_element_value_int(xml_a, XML_LRM_ATTR_CALLID, &a_call_id);
crm_element_value_int(xml_b, XML_LRM_ATTR_CALLID, &b_call_id);
if (a_call_id == -1 && b_call_id == -1) {
/* both are pending ops so it doesn't matter since
* stops are never pending
*/
sort_return(0, "pending");
} else if (a_call_id >= 0 && a_call_id < b_call_id) {
sort_return(-1, "call id");
} else if (b_call_id >= 0 && a_call_id > b_call_id) {
sort_return(1, "call id");
} else if (b_call_id >= 0 && a_call_id == b_call_id) {
/*
* The op and last_failed_op are the same
* Order on last-rc-change
*/
time_t last_a = -1;
time_t last_b = -1;
crm_element_value_epoch(xml_a, XML_RSC_OP_LAST_CHANGE, &last_a);
crm_element_value_epoch(xml_b, XML_RSC_OP_LAST_CHANGE, &last_b);
crm_trace("rc-change: %lld vs %lld",
(long long) last_a, (long long) last_b);
if (last_a >= 0 && last_a < last_b) {
sort_return(-1, "rc-change");
} else if (last_b >= 0 && last_a > last_b) {
sort_return(1, "rc-change");
}
sort_return(0, "rc-change");
} else {
/* One of the inputs is a pending operation
* Attempt to use XML_ATTR_TRANSITION_MAGIC to determine its age relative to the other
*/
int a_id = -1;
int b_id = -1;
const char *a_magic = crm_element_value(xml_a, XML_ATTR_TRANSITION_MAGIC);
const char *b_magic = crm_element_value(xml_b, XML_ATTR_TRANSITION_MAGIC);
CRM_CHECK(a_magic != NULL && b_magic != NULL, sort_return(0, "No magic"));
if (!decode_transition_magic(a_magic, &a_uuid, &a_id, NULL, NULL, NULL,
NULL)) {
sort_return(0, "bad magic a");
}
if (!decode_transition_magic(b_magic, &b_uuid, &b_id, NULL, NULL, NULL,
NULL)) {
sort_return(0, "bad magic b");
}
/* try to determine the relative age of the operation...
* some pending operations (e.g. a start) may have been superseded
* by a subsequent stop
*
* [a|b]_id == -1 means it's a shutdown operation and _always_ comes last
*/
if (safe_str_neq(a_uuid, b_uuid) || a_id == b_id) {
/*
* some of the logic in here may be redundant...
*
* if the UUID from the TE doesn't match then one better
* be a pending operation.
* pending operations don't survive between elections and joins
* because we query the LRM directly
*/
if (b_call_id == -1) {
sort_return(-1, "transition + call");
} else if (a_call_id == -1) {
sort_return(1, "transition + call");
}
} else if ((a_id >= 0 && a_id < b_id) || b_id == -1) {
sort_return(-1, "transition");
} else if ((b_id >= 0 && a_id > b_id) || a_id == -1) {
sort_return(1, "transition");
}
}
/* we should never end up here */
CRM_CHECK(FALSE, sort_return(0, "default"));
}
time_t
get_effective_time(pe_working_set_t * data_set)
{
if(data_set) {
if (data_set->now == NULL) {
crm_trace("Recording a new 'now'");
data_set->now = crm_time_new(NULL);
}
return crm_time_get_seconds_since_epoch(data_set->now);
}
crm_trace("Defaulting to 'now'");
return time(NULL);
}
gboolean
get_target_role(pe_resource_t * rsc, enum rsc_role_e * role)
{
enum rsc_role_e local_role = RSC_ROLE_UNKNOWN;
const char *value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE);
CRM_CHECK(role != NULL, return FALSE);
if (value == NULL || safe_str_eq("started", value)
|| safe_str_eq("default", value)) {
return FALSE;
}
local_role = text2role(value);
if (local_role == RSC_ROLE_UNKNOWN) {
pcmk__config_err("Ignoring '" XML_RSC_ATTR_TARGET_ROLE "' for %s "
"because '%s' is not valid", rsc->id, value);
return FALSE;
} else if (local_role > RSC_ROLE_STARTED) {
if (is_set(uber_parent(rsc)->flags, pe_rsc_promotable)) {
if (local_role > RSC_ROLE_SLAVE) {
/* This is what we'd do anyway, just leave the default to avoid messing up the placement algorithm */
return FALSE;
}
} else {
pcmk__config_err("Ignoring '" XML_RSC_ATTR_TARGET_ROLE "' for %s "
"because '%s' only makes sense for promotable "
"clones", rsc->id, value);
return FALSE;
}
}
*role = local_role;
return TRUE;
}
gboolean
order_actions(pe_action_t * lh_action, pe_action_t * rh_action, enum pe_ordering order)
{
GListPtr gIter = NULL;
pe_action_wrapper_t *wrapper = NULL;
GListPtr list = NULL;
if (order == pe_order_none) {
return FALSE;
}
if (lh_action == NULL || rh_action == NULL) {
return FALSE;
}
crm_trace("Ordering Action %s before %s", lh_action->uuid, rh_action->uuid);
/* Ensure we never create a dependency on ourselves... it's happened */
CRM_ASSERT(lh_action != rh_action);
/* Filter dups, otherwise update_action_states() has too much work to do */
gIter = lh_action->actions_after;
for (; gIter != NULL; gIter = gIter->next) {
pe_action_wrapper_t *after = (pe_action_wrapper_t *) gIter->data;
if (after->action == rh_action && (after->type & order)) {
return FALSE;
}
}
wrapper = calloc(1, sizeof(pe_action_wrapper_t));
wrapper->action = rh_action;
wrapper->type = order;
list = lh_action->actions_after;
list = g_list_prepend(list, wrapper);
lh_action->actions_after = list;
wrapper = NULL;
/* order |= pe_order_implies_then; */
/* order ^= pe_order_implies_then; */
wrapper = calloc(1, sizeof(pe_action_wrapper_t));
wrapper->action = lh_action;
wrapper->type = order;
list = rh_action->actions_before;
list = g_list_prepend(list, wrapper);
rh_action->actions_before = list;
return TRUE;
}
pe_action_t *
get_pseudo_op(const char *name, pe_working_set_t * data_set)
{
pe_action_t *op = NULL;
if(data_set->singletons) {
op = g_hash_table_lookup(data_set->singletons, name);
}
if (op == NULL) {
op = custom_action(NULL, strdup(name), name, NULL, TRUE, TRUE, data_set);
set_bit(op->flags, pe_action_pseudo);
set_bit(op->flags, pe_action_runnable);
}
return op;
}
void
destroy_ticket(gpointer data)
{
pe_ticket_t *ticket = data;
if (ticket->state) {
g_hash_table_destroy(ticket->state);
}
free(ticket->id);
free(ticket);
}
pe_ticket_t *
ticket_new(const char *ticket_id, pe_working_set_t * data_set)
{
pe_ticket_t *ticket = NULL;
if (pcmk__str_empty(ticket_id)) {
return NULL;
}
if (data_set->tickets == NULL) {
data_set->tickets =
g_hash_table_new_full(crm_str_hash, g_str_equal, free,
destroy_ticket);
}
ticket = g_hash_table_lookup(data_set->tickets, ticket_id);
if (ticket == NULL) {
ticket = calloc(1, sizeof(pe_ticket_t));
if (ticket == NULL) {
crm_err("Cannot allocate ticket '%s'", ticket_id);
return NULL;
}
crm_trace("Creaing ticket entry for %s", ticket_id);
ticket->id = strdup(ticket_id);
ticket->granted = FALSE;
ticket->last_granted = -1;
ticket->standby = FALSE;
ticket->state = crm_str_table_new();
g_hash_table_insert(data_set->tickets, strdup(ticket->id), ticket);
}
return ticket;
}
static void
filter_parameters(xmlNode * param_set, const char *param_string, bool need_present)
{
if (param_set && param_string) {
xmlAttrPtr xIter = param_set->properties;
while (xIter) {
const char *prop_name = (const char *)xIter->name;
char *name = crm_strdup_printf(" %s ", prop_name);
char *match = strstr(param_string, name);
free(name);
// Do now, because current entry might get removed below
xIter = xIter->next;
if (need_present && match == NULL) {
crm_trace("%s not found in %s", prop_name, param_string);
xml_remove_prop(param_set, prop_name);
} else if (need_present == FALSE && match) {
crm_trace("%s found in %s", prop_name, param_string);
xml_remove_prop(param_set, prop_name);
}
}
}
}
#if ENABLE_VERSIONED_ATTRS
static void
append_versioned_params(xmlNode *versioned_params, const char *ra_version, xmlNode *params)
{
GHashTable *hash = pe_unpack_versioned_parameters(versioned_params, ra_version);
char *key = NULL;
char *value = NULL;
GHashTableIter iter;
g_hash_table_iter_init(&iter, hash);
while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) {
crm_xml_add(params, key, value);
}
g_hash_table_destroy(hash);
}
#endif
/*!
* \internal
* \brief Calculate action digests and store in node's digest cache
*
* \param[in] rsc Resource that action was for
* \param[in] task Name of action performed
* \param[in] key Action's task key
* \param[in] node Node action was performed on
* \param[in] xml_op XML of operation in CIB status (if available)
* \param[in] calc_secure Whether to calculate secure digest
* \param[in] data_set Cluster working set
*
* \return Pointer to node's digest cache entry
*/
static op_digest_cache_t *
rsc_action_digest(pe_resource_t *rsc, const char *task, const char *key,
pe_node_t *node, xmlNode *xml_op, bool calc_secure,
pe_working_set_t *data_set)
{
op_digest_cache_t *data = NULL;
data = g_hash_table_lookup(node->details->digest_cache, key);
if (data == NULL) {
GHashTable *local_rsc_params = crm_str_table_new();
pe_action_t *action = custom_action(rsc, strdup(key), task, node, TRUE, FALSE, data_set);
#if ENABLE_VERSIONED_ATTRS
xmlNode *local_versioned_params = create_xml_node(NULL, XML_TAG_RSC_VER_ATTRS);
const char *ra_version = NULL;
#endif
const char *op_version;
const char *restart_list = NULL;
const char *secure_list = " passwd password ";
data = calloc(1, sizeof(op_digest_cache_t));
CRM_ASSERT(data != NULL);
get_rsc_attributes(local_rsc_params, rsc, node, data_set);
#if ENABLE_VERSIONED_ATTRS
pe_get_versioned_attributes(local_versioned_params, rsc, node, data_set);
#endif
data->params_all = create_xml_node(NULL, XML_TAG_PARAMS);
// REMOTE_CONTAINER_HACK: Allow remote nodes that start containers with pacemaker remote inside
if (pe__add_bundle_remote_name(rsc, data->params_all,
XML_RSC_ATTR_REMOTE_RA_ADDR)) {
crm_trace("Set address for bundle connection %s (on %s)",
rsc->id, node->details->uname);
}
g_hash_table_foreach(local_rsc_params, hash2field, data->params_all);
g_hash_table_foreach(action->extra, hash2field, data->params_all);
g_hash_table_foreach(rsc->parameters, hash2field, data->params_all);
g_hash_table_foreach(action->meta, hash2metafield, data->params_all);
if(xml_op) {
secure_list = crm_element_value(xml_op, XML_LRM_ATTR_OP_SECURE);
restart_list = crm_element_value(xml_op, XML_LRM_ATTR_OP_RESTART);
op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION);
#if ENABLE_VERSIONED_ATTRS
ra_version = crm_element_value(xml_op, XML_ATTR_RA_VERSION);
#endif
} else {
op_version = CRM_FEATURE_SET;
}
#if ENABLE_VERSIONED_ATTRS
append_versioned_params(local_versioned_params, ra_version, data->params_all);
append_versioned_params(rsc->versioned_parameters, ra_version, data->params_all);
{
pe_rsc_action_details_t *details = pe_rsc_action_details(action);
append_versioned_params(details->versioned_parameters, ra_version, data->params_all);
}
#endif
pcmk__filter_op_for_digest(data->params_all);
g_hash_table_destroy(local_rsc_params);
pe_free_action(action);
data->digest_all_calc = calculate_operation_digest(data->params_all, op_version);
if (calc_secure) {
data->params_secure = copy_xml(data->params_all);
if(secure_list) {
filter_parameters(data->params_secure, secure_list, FALSE);
}
data->digest_secure_calc = calculate_operation_digest(data->params_secure, op_version);
}
if(xml_op && crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST) != NULL) {
data->params_restart = copy_xml(data->params_all);
if (restart_list) {
filter_parameters(data->params_restart, restart_list, TRUE);
}
data->digest_restart_calc = calculate_operation_digest(data->params_restart, op_version);
}
g_hash_table_insert(node->details->digest_cache, strdup(key), data);
}
return data;
}
op_digest_cache_t *
rsc_action_digest_cmp(pe_resource_t * rsc, xmlNode * xml_op, pe_node_t * node,
pe_working_set_t * data_set)
{
op_digest_cache_t *data = NULL;
char *key = NULL;
guint interval_ms = 0;
const char *op_version;
const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
const char *digest_all;
const char *digest_restart;
CRM_ASSERT(node != NULL);
op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION);
digest_all = crm_element_value(xml_op, XML_LRM_ATTR_OP_DIGEST);
digest_restart = crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST);
crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
key = pcmk__op_key(rsc->id, task, interval_ms);
data = rsc_action_digest(rsc, task, key, node, xml_op,
is_set(data_set->flags, pe_flag_sanitized),
data_set);
data->rc = RSC_DIGEST_MATCH;
if (digest_restart && data->digest_restart_calc && strcmp(data->digest_restart_calc, digest_restart) != 0) {
pe_rsc_info(rsc, "Parameters to %s on %s changed: was %s vs. now %s (restart:%s) %s",
key, node->details->uname,
crm_str(digest_restart), data->digest_restart_calc,
op_version, crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC));
data->rc = RSC_DIGEST_RESTART;
} else if (digest_all == NULL) {
/* it is unknown what the previous op digest was */
data->rc = RSC_DIGEST_UNKNOWN;
} else if (strcmp(digest_all, data->digest_all_calc) != 0) {
pe_rsc_info(rsc, "Parameters to %s on %s changed: was %s vs. now %s (%s:%s) %s",
key, node->details->uname,
crm_str(digest_all), data->digest_all_calc,
(interval_ms > 0)? "reschedule" : "reload",
op_version, crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC));
data->rc = RSC_DIGEST_ALL;
}
free(key);
return data;
}
/*!
* \internal
* \brief Create an unfencing summary for use in special node attribute
*
* Create a string combining a fence device's resource ID, agent type, and
* parameter digest (whether for all parameters or just non-private parameters).
* This can be stored in a special node attribute, allowing us to detect changes
* in either the agent type or parameters, to know whether unfencing must be
* redone or can be safely skipped when the device's history is cleaned.
*
* \param[in] rsc_id Fence device resource ID
* \param[in] agent_type Fence device agent
* \param[in] param_digest Fence device parameter digest
*
* \return Newly allocated string with unfencing digest
* \note The caller is responsible for freeing the result.
*/
static inline char *
create_unfencing_summary(const char *rsc_id, const char *agent_type,
const char *param_digest)
{
return crm_strdup_printf("%s:%s:%s", rsc_id, agent_type, param_digest);
}
/*!
* \internal
* \brief Check whether a node can skip unfencing
*
* Check whether a fence device's current definition matches a node's
* stored summary of when it was last unfenced by the device.
*
* \param[in] rsc_id Fence device's resource ID
* \param[in] agent Fence device's agent type
* \param[in] digest_calc Fence device's current parameter digest
* \param[in] node_summary Value of node's special unfencing node attribute
* (a comma-separated list of unfencing summaries for
* all devices that have unfenced this node)
*
* \return TRUE if digest matches, FALSE otherwise
*/
static bool
unfencing_digest_matches(const char *rsc_id, const char *agent,
const char *digest_calc, const char *node_summary)
{
bool matches = FALSE;
if (rsc_id && agent && digest_calc && node_summary) {
char *search_secure = create_unfencing_summary(rsc_id, agent,
digest_calc);
/* The digest was calculated including the device ID and agent,
* so there is no risk of collision using strstr().
*/
matches = (strstr(node_summary, search_secure) != NULL);
crm_trace("Calculated unfencing digest '%s' %sfound in '%s'",
search_secure, matches? "" : "not ", node_summary);
free(search_secure);
}
return matches;
}
/* Magic string to use as action name for digest cache entries used for
* unfencing checks. This is not a real action name (i.e. "on"), so
* check_action_definition() won't confuse these entries with real actions.
*/
#define STONITH_DIGEST_TASK "stonith-on"
/*!
* \internal
* \brief Calculate fence device digests and digest comparison result
*
* \param[in] rsc Fence device resource
* \param[in] agent Fence device's agent type
* \param[in] node Node with digest cache to use
* \param[in] data_set Cluster working set
*
* \return Node's digest cache entry
*/
static op_digest_cache_t *
fencing_action_digest_cmp(pe_resource_t *rsc, const char *agent,
pe_node_t *node, pe_working_set_t *data_set)
{
const char *node_summary = NULL;
// Calculate device's current parameter digests
char *key = pcmk__op_key(rsc->id, STONITH_DIGEST_TASK, 0);
op_digest_cache_t *data = rsc_action_digest(rsc, STONITH_DIGEST_TASK, key,
node, NULL, TRUE, data_set);
free(key);
// Check whether node has special unfencing summary node attribute
node_summary = pe_node_attribute_raw(node, CRM_ATTR_DIGESTS_ALL);
if (node_summary == NULL) {
data->rc = RSC_DIGEST_UNKNOWN;
return data;
}
// Check whether full parameter digest matches
if (unfencing_digest_matches(rsc->id, agent, data->digest_all_calc,
node_summary)) {
data->rc = RSC_DIGEST_MATCH;
return data;
}
// Check whether secure parameter digest matches
node_summary = pe_node_attribute_raw(node, CRM_ATTR_DIGESTS_SECURE);
if (unfencing_digest_matches(rsc->id, agent, data->digest_secure_calc,
node_summary)) {
data->rc = RSC_DIGEST_MATCH;
if (is_set(data_set->flags, pe_flag_stdout)) {
printf("Only 'private' parameters to %s for unfencing %s changed\n",
rsc->id, node->details->uname);
}
return data;
}
// Parameters don't match
data->rc = RSC_DIGEST_ALL;
if (is_set(data_set->flags, (pe_flag_sanitized|pe_flag_stdout))
&& data->digest_secure_calc) {
char *digest = create_unfencing_summary(rsc->id, agent,
data->digest_secure_calc);
printf("Parameters to %s for unfencing %s changed, try '%s'\n",
rsc->id, node->details->uname, digest);
free(digest);
}
return data;
}
const char *rsc_printable_id(pe_resource_t *rsc)
{
if (is_not_set(rsc->flags, pe_rsc_unique)) {
return ID(rsc->xml);
}
return rsc->id;
}
void
clear_bit_recursive(pe_resource_t * rsc, unsigned long long flag)
{
GListPtr gIter = rsc->children;
clear_bit(rsc->flags, flag);
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
clear_bit_recursive(child_rsc, flag);
}
}
void
set_bit_recursive(pe_resource_t * rsc, unsigned long long flag)
{
GListPtr gIter = rsc->children;
set_bit(rsc->flags, flag);
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
set_bit_recursive(child_rsc, flag);
}
}
static GListPtr
find_unfencing_devices(GListPtr candidates, GListPtr matches)
{
for (GListPtr gIter = candidates; gIter != NULL; gIter = gIter->next) {
pe_resource_t *candidate = gIter->data;
const char *provides = g_hash_table_lookup(candidate->meta, XML_RSC_ATTR_PROVIDES);
const char *requires = g_hash_table_lookup(candidate->meta, XML_RSC_ATTR_REQUIRES);
if(candidate->children) {
matches = find_unfencing_devices(candidate->children, matches);
} else if (is_not_set(candidate->flags, pe_rsc_fence_device)) {
continue;
} else if (crm_str_eq(provides, "unfencing", FALSE) || crm_str_eq(requires, "unfencing", FALSE)) {
matches = g_list_prepend(matches, candidate);
}
}
return matches;
}
static int
node_priority_fencing_delay(pe_node_t * node, pe_working_set_t * data_set)
{
int member_count = 0;
int online_count = 0;
int top_priority = 0;
int lowest_priority = 0;
GListPtr gIter = NULL;
// `priority-fencing-delay` is disabled
if (data_set->priority_fencing_delay <= 0) {
return 0;
}
/* No need to request a delay if the fencing target is not a normal cluster
* member, for example if it's a remote node or a guest node. */
if (node->details->type != node_member) {
return 0;
}
// No need to request a delay if the fencing target is in our partition
if (node->details->online) {
return 0;
}
for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
pe_node_t *n = gIter->data;
if (n->details->type != node_member) {
continue;
}
member_count ++;
if (n->details->online) {
online_count++;
}
if (member_count == 1
|| n->details->priority > top_priority) {
top_priority = n->details->priority;
}
if (member_count == 1
|| n->details->priority < lowest_priority) {
lowest_priority = n->details->priority;
}
}
// No need to delay if we have more than half of the cluster members
if (online_count > member_count / 2) {
return 0;
}
/* All the nodes have equal priority.
* Any configured corresponding `pcmk_delay_base/max` will be applied. */
if (lowest_priority == top_priority) {
return 0;
}
if (node->details->priority < top_priority) {
return 0;
}
return data_set->priority_fencing_delay;
}
pe_action_t *
pe_fence_op(pe_node_t * node, const char *op, bool optional, const char *reason,
bool priority_delay, pe_working_set_t * data_set)
{
char *op_key = NULL;
pe_action_t *stonith_op = NULL;
if(op == NULL) {
op = data_set->stonith_action;
}
op_key = crm_strdup_printf("%s-%s-%s", CRM_OP_FENCE, node->details->uname, op);
if(data_set->singletons) {
stonith_op = g_hash_table_lookup(data_set->singletons, op_key);
}
if(stonith_op == NULL) {
stonith_op = custom_action(NULL, op_key, CRM_OP_FENCE, node, TRUE, TRUE, data_set);
add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET, node->details->uname);
add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET_UUID, node->details->id);
add_hash_param(stonith_op->meta, "stonith_action", op);
if (pe__is_guest_or_remote_node(node)
&& is_set(data_set->flags, pe_flag_enable_unfencing)) {
/* Extra work to detect device changes on remotes
*
* We may do this for all nodes in the future, but for now
* the check_action_definition() based stuff works fine.
*/
long max = 1024;
long digests_all_offset = 0;
long digests_secure_offset = 0;
char *digests_all = calloc(max, sizeof(char));
char *digests_secure = calloc(max, sizeof(char));
GListPtr matches = find_unfencing_devices(data_set->resources, NULL);
for (GListPtr gIter = matches; gIter != NULL; gIter = gIter->next) {
pe_resource_t *match = gIter->data;
const char *agent = g_hash_table_lookup(match->meta,
XML_ATTR_TYPE);
op_digest_cache_t *data = NULL;
data = fencing_action_digest_cmp(match, agent, node, data_set);
if(data->rc == RSC_DIGEST_ALL) {
optional = FALSE;
crm_notice("Unfencing %s (remote): because the definition of %s changed", node->details->uname, match->id);
if (is_set(data_set->flags, pe_flag_stdout)) {
fprintf(stdout, " notice: Unfencing %s (remote): because the definition of %s changed\n", node->details->uname, match->id);
}
}
digests_all_offset += snprintf(
digests_all+digests_all_offset, max-digests_all_offset,
"%s:%s:%s,", match->id, agent, data->digest_all_calc);
digests_secure_offset += snprintf(
digests_secure+digests_secure_offset, max-digests_secure_offset,
"%s:%s:%s,", match->id, agent, data->digest_secure_calc);
}
g_hash_table_insert(stonith_op->meta,
strdup(XML_OP_ATTR_DIGESTS_ALL),
digests_all);
g_hash_table_insert(stonith_op->meta,
strdup(XML_OP_ATTR_DIGESTS_SECURE),
digests_secure);
}
} else {
free(op_key);
}
if (data_set->priority_fencing_delay > 0
/* It's a suitable case where `priority-fencing-delay` applies.
* At least add `priority-fencing-delay` field as an indicator. */
&& (priority_delay
/* Re-calculate priority delay for the suitable case when
* pe_fence_op() is called again by stage6() after node priority has
* been actually calculated with native_add_running() */
|| g_hash_table_lookup(stonith_op->meta,
XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY) != NULL)) {
/* Add `priority-fencing-delay` to the fencing op even if it's 0 for
* the targeting node. So that it takes precedence over any possible
* `pcmk_delay_base/max`.
*/
char *delay_s = crm_itoa(node_priority_fencing_delay(node, data_set));
g_hash_table_insert(stonith_op->meta,
strdup(XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY),
delay_s);
}
if(optional == FALSE && pe_can_fence(data_set, node)) {
pe_action_required(stonith_op, NULL, reason);
} else if(reason && stonith_op->reason == NULL) {
stonith_op->reason = strdup(reason);
}
return stonith_op;
}
void
trigger_unfencing(
pe_resource_t * rsc, pe_node_t *node, const char *reason, pe_action_t *dependency, pe_working_set_t * data_set)
{
if(is_not_set(data_set->flags, pe_flag_enable_unfencing)) {
/* No resources require it */
return;
} else if (rsc != NULL && is_not_set(rsc->flags, pe_rsc_fence_device)) {
/* Wasn't a stonith device */
return;
} else if(node
&& node->details->online
&& node->details->unclean == FALSE
&& node->details->shutdown == FALSE) {
pe_action_t *unfence = pe_fence_op(node, "on", FALSE, reason, FALSE, data_set);
if(dependency) {
order_actions(unfence, dependency, pe_order_optional);
}
} else if(rsc) {
GHashTableIter iter;
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
if(node->details->online && node->details->unclean == FALSE && node->details->shutdown == FALSE) {
trigger_unfencing(rsc, node, reason, dependency, data_set);
}
}
}
}
gboolean
add_tag_ref(GHashTable * tags, const char * tag_name, const char * obj_ref)
{
pe_tag_t *tag = NULL;
GListPtr gIter = NULL;
gboolean is_existing = FALSE;
CRM_CHECK(tags && tag_name && obj_ref, return FALSE);
tag = g_hash_table_lookup(tags, tag_name);
if (tag == NULL) {
tag = calloc(1, sizeof(pe_tag_t));
if (tag == NULL) {
return FALSE;
}
tag->id = strdup(tag_name);
tag->refs = NULL;
g_hash_table_insert(tags, strdup(tag_name), tag);
}
for (gIter = tag->refs; gIter != NULL; gIter = gIter->next) {
const char *existing_ref = (const char *) gIter->data;
if (crm_str_eq(existing_ref, obj_ref, TRUE)){
is_existing = TRUE;
break;
}
}
if (is_existing == FALSE) {
tag->refs = g_list_append(tag->refs, strdup(obj_ref));
crm_trace("Added: tag=%s ref=%s", tag->id, obj_ref);
}
return TRUE;
}
void pe_action_set_flag_reason(const char *function, long line,
pe_action_t *action, pe_action_t *reason, const char *text,
enum pe_action_flags flags, bool overwrite)
{
bool unset = FALSE;
bool update = FALSE;
const char *change = NULL;
if(is_set(flags, pe_action_runnable)) {
unset = TRUE;
change = "unrunnable";
} else if(is_set(flags, pe_action_optional)) {
unset = TRUE;
change = "required";
} else if(is_set(flags, pe_action_migrate_runnable)) {
unset = TRUE;
overwrite = TRUE;
change = "unrunnable";
} else if(is_set(flags, pe_action_dangle)) {
change = "dangling";
} else if(is_set(flags, pe_action_requires_any)) {
change = "required";
} else {
crm_err("Unknown flag change to %x by %s: 0x%s",
flags, action->uuid, (reason? reason->uuid : "0"));
}
if(unset) {
if(is_set(action->flags, flags)) {
action->flags = crm_clear_bit(function, line, action->uuid, action->flags, flags);
update = TRUE;
}
} else {
if(is_not_set(action->flags, flags)) {
action->flags = crm_set_bit(function, line, action->uuid, action->flags, flags);
update = TRUE;
}
}
if((change && update) || text) {
char *reason_text = NULL;
if(reason == NULL) {
pe_action_set_reason(action, text, overwrite);
} else if(reason->rsc == NULL) {
reason_text = crm_strdup_printf("%s %s%c %s", change, reason->task, text?':':0, text?text:"");
} else {
reason_text = crm_strdup_printf("%s %s %s%c %s", change, reason->rsc->id, reason->task, text?':':0, text?text:"NA");
}
if(reason_text && action->rsc != reason->rsc) {
pe_action_set_reason(action, reason_text, overwrite);
}
free(reason_text);
}
}
void pe_action_set_reason(pe_action_t *action, const char *reason, bool overwrite)
{
if (action->reason != NULL && overwrite) {
pe_rsc_trace(action->rsc, "Changing %s reason from '%s' to '%s'",
action->uuid, action->reason, crm_str(reason));
free(action->reason);
} else if (action->reason == NULL) {
pe_rsc_trace(action->rsc, "Set %s reason to '%s'",
action->uuid, crm_str(reason));
} else {
// crm_assert(action->reason != NULL && !overwrite);
return;
}
if (reason != NULL) {
action->reason = strdup(reason);
} else {
action->reason = NULL;
}
}
/*!
* \internal
* \brief Check whether shutdown has been requested for a node
*
* \param[in] node Node to check
*
* \return TRUE if node has shutdown attribute set and nonzero, FALSE otherwise
* \note This differs from simply using node->details->shutdown in that it can
* be used before that has been determined (and in fact to determine it),
* and it can also be used to distinguish requested shutdown from implicit
* shutdown of remote nodes by virtue of their connection stopping.
*/
bool
pe__shutdown_requested(pe_node_t *node)
{
const char *shutdown = pe_node_attribute_raw(node, XML_CIB_ATTR_SHUTDOWN);
return shutdown && strcmp(shutdown, "0");
}
/*!
* \internal
* \brief Update a data set's "recheck by" time
*
* \param[in] recheck Epoch time when recheck should happen
* \param[in,out] data_set Current working set
*/
void
pe__update_recheck_time(time_t recheck, pe_working_set_t *data_set)
{
if ((recheck > get_effective_time(data_set))
&& ((data_set->recheck_by == 0)
|| (data_set->recheck_by > recheck))) {
data_set->recheck_by = recheck;
}
}
/*!
* \internal
* \brief Wrapper for pe_unpack_nvpairs() using a cluster working set
*/
void
pe__unpack_dataset_nvpairs(xmlNode *xml_obj, const char *set_name,
pe_rule_eval_data_t *rule_data, GHashTable *hash,
const char *always_first, gboolean overwrite,
pe_working_set_t *data_set)
{
crm_time_t *next_change = crm_time_new_undefined();
pe_eval_nvpairs(data_set->input, xml_obj, set_name, rule_data, hash,
always_first, overwrite, next_change);
if (crm_time_is_defined(next_change)) {
time_t recheck = (time_t) crm_time_get_seconds_since_epoch(next_change);
pe__update_recheck_time(recheck, data_set);
}
crm_time_free(next_change);
}
bool
pe__resource_is_disabled(pe_resource_t *rsc)
{
const char *target_role = NULL;
CRM_CHECK(rsc != NULL, return false);
target_role = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE);
if (target_role) {
enum rsc_role_e target_role_e = text2role(target_role);
if ((target_role_e == RSC_ROLE_STOPPED)
|| ((target_role_e == RSC_ROLE_SLAVE)
&& is_set(uber_parent(rsc)->flags, pe_rsc_promotable))) {
return true;
}
}
return false;
}
/*!
* \internal
* \brief Create an action to clear a resource's history from CIB
*
* \param[in] rsc Resource to clear
* \param[in] node Node to clear history on
*
* \return New action to clear resource history
*/
pe_action_t *
pe__clear_resource_history(pe_resource_t *rsc, pe_node_t *node,
pe_working_set_t *data_set)
{
char *key = NULL;
CRM_ASSERT(rsc && node);
key = pcmk__op_key(rsc->id, CRM_OP_LRM_DELETE, 0);
return custom_action(rsc, key, CRM_OP_LRM_DELETE, node, FALSE, TRUE,
data_set);
}
bool
pe__rsc_running_on_any_node_in_list(pe_resource_t *rsc, GListPtr node_list)
{
for (GListPtr ele = rsc->running_on; ele; ele = ele->next) {
pe_node_t *node = (pe_node_t *) ele->data;
if (pcmk__str_in_list(node_list, node->details->uname)) {
return true;
}
}
return false;
}
bool
pcmk__rsc_filtered_by_node(pe_resource_t *rsc, GListPtr only_node)
{
return (rsc->fns->active(rsc, FALSE) && !pe__rsc_running_on_any_node_in_list(rsc, only_node));
}
GListPtr
pe__filter_rsc_list(GListPtr rscs, GListPtr filter)
{
GListPtr retval = NULL;
for (GListPtr gIter = rscs; gIter; gIter = gIter->next) {
pe_resource_t *rsc = (pe_resource_t *) gIter->data;
/* I think the second condition is safe here for all callers of this
* function. If not, it needs to move into pe__node_text.
*/
if (pcmk__str_in_list(filter, rsc_printable_id(rsc)) ||
(rsc->parent && pcmk__str_in_list(filter, rsc_printable_id(rsc->parent)))) {
retval = g_list_prepend(retval, rsc);
}
}
return retval;
}

File Metadata

Mime Type
text/x-diff
Expires
Thu, Oct 16, 12:22 AM (20 h, 35 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2487955
Default Alt Text
(487 KB)

Event Timeline