diff --git a/attrd/Makefile.am b/attrd/Makefile.am index f72cf19c65..911e7297e1 100644 --- a/attrd/Makefile.am +++ b/attrd/Makefile.am @@ -1,42 +1,44 @@ # # Copyright (C) 2004-2009 Andrew Beekhof # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # include $(top_srcdir)/Makefile.common halibdir = $(CRM_DAEMON_DIR) halib_PROGRAMS = attrd ## SOURCES noinst_HEADERS = internal.h attrd_common.h attrd_CFLAGS = $(CFLAGS_HARDENED_EXE) attrd_LDFLAGS = $(LDFLAGS_HARDENED_EXE) attrd_LDADD = $(top_builddir)/lib/cluster/libcrmcluster.la \ + $(top_builddir)/lib/pengine/libpe_rules.la \ $(top_builddir)/lib/common/libcrmcommon.la \ $(top_builddir)/lib/cib/libcib.la \ + $(top_builddir)/lib/lrmd/liblrmd.la \ $(CLUSTERLIBS) attrd_SOURCES = if BUILD_ATOMIC_ATTRD -attrd_SOURCES += main.c commands.c attrd_common.c +attrd_SOURCES += main.c commands.c attrd_common.c attrd_alerts.c else -attrd_SOURCES += legacy.c attrd_common.c +attrd_SOURCES += legacy.c attrd_common.c attrd_alerts.c endif clean-generic: rm -f *.log *.debug *.xml *~ diff --git a/attrd/attrd_alerts.c b/attrd/attrd_alerts.c new file mode 100644 index 0000000000..d187fab7b2 --- /dev/null +++ b/attrd/attrd_alerts.c @@ -0,0 +1,536 @@ +/* + * Copyright (C) 2015 Andrew Beekhof + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include "attrd_alerts.h" +#include +#include +#include + +GHashTable *alert_info_cache = NULL; + +lrmd_t * +attrd_lrmd_connect(int max_retry, void callback(lrmd_event_data_t * op)) +{ + int ret = -ENOTCONN; + int fails = 0; + + if (!the_lrmd) { + the_lrmd = lrmd_api_new(); + } + + while(fails < max_retry) { + the_lrmd->cmds->set_callback(the_lrmd, callback); + + ret = the_lrmd->cmds->connect(the_lrmd, T_ATTRD, NULL); + if (ret != pcmk_ok) { + fails++; + crm_trace("lrmd_connect RETRY!(%d)", fails); + } else { + crm_trace("lrmd_connect OK!"); + break; + } + } + + if (ret != pcmk_ok) { + if (the_lrmd->cmds->is_connected(the_lrmd)) { + lrmd_api_delete(the_lrmd); + } + the_lrmd = NULL; + } + return the_lrmd; +} + +static void +attrd_parse_alerts(xmlNode *notifications) +{ + xmlNode *alert; + crm_alert_entry_t entry; + guint max_timeout = 0; + + crm_free_alert_list(); + crm_alert_max_alert_timeout = CRM_ALERT_DEFAULT_TIMEOUT_MS; + + if (crm_alert_kind_default == NULL) { + crm_alert_kind_default = g_strsplit(CRM_ALERT_KIND_DEFAULT, ",", 0); + } + + if (notifications) { + crm_info("We have an alerts section in the cib"); + } else { + crm_info("No optional alerts section in cib"); + return; + } + + for (alert = first_named_child(notifications, XML_CIB_TAG_ALERT); + alert; alert = __xml_next(alert)) { + xmlNode *recipient; + int recipients = 0, envvars = 0; + GHashTable *config_hash = NULL; + + entry = (crm_alert_entry_t) { + .id = (char *) crm_element_value(alert, XML_ATTR_ID), + .path = (char *) crm_element_value(alert, XML_ALERT_ATTR_PATH), + .timeout = CRM_ALERT_DEFAULT_TIMEOUT_MS, + .tstamp_format = (char *) CRM_ALERT_DEFAULT_TSTAMP_FORMAT, + .select_kind_orig = NULL, + .select_kind = NULL, + .select_attribute_name_orig = NULL, + .select_attribute_name = NULL + }; + + crm_get_envvars_from_cib(alert, + &entry, + &envvars); + + config_hash = + get_meta_attrs_from_cib(alert, &entry, &max_timeout); + + crm_debug("Found alert: id=%s, path=%s, timeout=%d, " + "tstamp_format=%s, select_kind=%s, select_attribute_name=%s, %d additional environment variables", + entry.id, entry.path, entry.timeout, + entry.tstamp_format, entry.select_kind_orig, entry.select_attribute_name_orig, envvars); + + for (recipient = first_named_child(alert, + XML_CIB_TAG_ALERT_RECIPIENT); + recipient; recipient = __xml_next(recipient)) { + int envvars_added = 0; + + entry.recipient = (char *) crm_element_value(recipient, + XML_ALERT_ATTR_REC_VALUE); + recipients++; + + crm_get_envvars_from_cib(recipient, + &entry, + &envvars_added); + + { + crm_alert_entry_t recipient_entry = entry; + GHashTable *config_hash = + get_meta_attrs_from_cib(recipient, + &recipient_entry, + &max_timeout); + + crm_add_dup_alert_list_entry(&recipient_entry); + + crm_debug("Alert has recipient: id=%s, value=%s, " + "%d additional environment variables", + crm_element_value(recipient, XML_ATTR_ID), + recipient_entry.recipient, envvars_added); + + g_hash_table_destroy(config_hash); + } + + entry.envvars = + crm_drop_envvars(&entry, envvars_added); + } + + if (recipients == 0) { + crm_add_dup_alert_list_entry(&entry); + } + + crm_drop_envvars(&entry, -1); + g_hash_table_destroy(config_hash); + } + + if (max_timeout > 0) { + crm_alert_max_alert_timeout = max_timeout; + } +} + +static void +config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) +{ + crm_time_t *now = crm_time_new(NULL); + xmlNode *crmalerts = NULL; + + if (rc != pcmk_ok) { + crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc)); + goto bail; + } + + crmalerts = output; + if ((crmalerts) && + (crm_element_name(crmalerts)) && + (strcmp(crm_element_name(crmalerts), XML_CIB_TAG_ALERTS) != 0)) { + crmalerts = first_named_child(crmalerts, XML_CIB_TAG_ALERTS); + } + if (!crmalerts) { + crm_err("Local CIB query for " XML_CIB_TAG_ALERTS " section failed"); + goto bail; + } + + attrd_parse_alerts(crmalerts); + + bail: + crm_time_free(now); +} + +gboolean +attrd_read_options(gpointer user_data) +{ + int call_id; + + if (the_cib) { + call_id = the_cib->cmds->query(the_cib, + "//" XML_CIB_TAG_ALERTS, + NULL, cib_xpath | cib_scope_local); + + the_cib->cmds->register_callback_full(the_cib, call_id, 120, FALSE, + NULL, + "config_query_callback", + config_query_callback, free); + + crm_trace("Querying the CIB... call %d", call_id); + } else { + crm_err("Querying the CIB...CIB connection not active"); + } + return TRUE; +} + +void +attrd_cib_updated_cb(const char *event, xmlNode * msg) +{ + int rc = -1; + int format= 1; + xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); + xmlNode *change = NULL; + xmlXPathObject *xpathObj = NULL; + + CRM_CHECK(msg != NULL, return); + + crm_element_value_int(msg, F_CIB_RC, &rc); + if (rc < pcmk_ok) { + crm_trace("Filter rc=%d (%s)", rc, pcmk_strerror(rc)); + return; + } + + crm_element_value_int(patchset, "format", &format); + if (format == 1) { + if ((xpathObj = xpath_search( + msg, + "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_ALERTS + )) != NULL) { + freeXpathObject(xpathObj); + mainloop_set_trigger(attrd_config_read); + } + } else if (format == 2) { + for (change = __xml_first_child(patchset); change != NULL; change = __xml_next(change)) { + const char *xpath = crm_element_value(change, XML_DIFF_PATH); + + if (xpath == NULL) { + continue; + } + + /* modifying properties */ + if (!strstr(xpath, "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_ALERTS)) { + xmlNode *section = NULL; + const char *name = NULL; + + /* adding notifications section */ + if ((strcmp(xpath, "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION) != 0) || + ((section = __xml_first_child(change)) == NULL) || + ((name = crm_element_name(section)) == NULL) || + (strcmp(name, XML_CIB_TAG_ALERTS) != 0)) { + continue; + } + } + + mainloop_set_trigger(attrd_config_read); + break; + } + + } else { + crm_warn("Unknown patch format: %d", format); + } + +} + +GHashTable * +get_meta_attrs_from_cib(xmlNode *basenode, crm_alert_entry_t *entry, + guint *max_timeout) +{ + GHashTable *config_hash = + g_hash_table_new_full(crm_str_hash, g_str_equal, + g_hash_destroy_str, g_hash_destroy_str); + crm_time_t *now = crm_time_new(NULL); + const char *value = NULL; + + unpack_instance_attributes(basenode, basenode, XML_TAG_META_SETS, NULL, + config_hash, NULL, FALSE, now); + + value = g_hash_table_lookup(config_hash, XML_ALERT_ATTR_TIMEOUT); + if (value) { + entry->timeout = crm_get_msec(value); + if (entry->timeout <= 0) { + if (entry->timeout == 0) { + crm_trace("Setting timeout to default %dmsec", + CRM_ALERT_DEFAULT_TIMEOUT_MS); + } else { + crm_warn("Invalid timeout value setting to default %dmsec", + CRM_ALERT_DEFAULT_TIMEOUT_MS); + } + entry->timeout = CRM_ALERT_DEFAULT_TIMEOUT_MS; + } else { + crm_trace("Found timeout %dmsec", entry->timeout); + } + if (entry->timeout > *max_timeout) { + *max_timeout = entry->timeout; + } + } + value = g_hash_table_lookup(config_hash, XML_ALERT_ATTR_TSTAMP_FORMAT); + if (value) { + /* hard to do any checks here as merely anything can + * can be a valid time-format-string + */ + entry->tstamp_format = (char *) value; + crm_trace("Found timestamp format string '%s'", value); + } + + value = g_hash_table_lookup(config_hash, XML_ALERT_ATTR_SELECT_KIND); + if (value) { + entry->select_kind_orig = (char*) value; + entry->select_kind = g_strsplit((char*) value, ",", 0); + crm_trace("Found select_kind string '%s'", (char *) value); + } + + value = g_hash_table_lookup(config_hash, XML_ALERT_ATTR_SELECT_ATTRIBUTE_NAME); + if (value) { + entry->select_attribute_name_orig = (char*) value; + entry->select_attribute_name = g_strsplit((char*) value, ",", 0); + crm_trace("Found attribute_name string '%s'", (char *) value); + } + + crm_time_free(now); + return config_hash; /* keep hash as long as strings are needed */ +} + +void +attrd_alert_fini() +{ + if (alert_info_cache) { + g_hash_table_destroy(alert_info_cache); + alert_info_cache = NULL; + } + + if (crm_alert_kind_default) { + g_strfreev(crm_alert_kind_default); + crm_alert_kind_default = NULL; + } +} + +static int +exec_alerts(lrmd_t *lrmd, const char *kind, const char *attribute_name, lrmd_key_value_t * params, GListPtr alert_list, GHashTable *info_cache) +{ + int call_id = 0; + static int operations = 0; + GListPtr l; + crm_time_hr_t *now = crm_time_hr_new(NULL); + + params = lrmd_set_alert_key_to_lrmd_params(params, CRM_alert_kind, kind); + params = lrmd_set_alert_key_to_lrmd_params(params, CRM_alert_version, VERSION); + + for (l = g_list_first(alert_list); l; l = g_list_next(l)) { + lrmd_rsc_info_t *rsc = NULL; + crm_alert_entry_t *entry = (crm_alert_entry_t *)(l->data); + char *timestamp = crm_time_format_hr(entry->tstamp_format, now); + lrmd_key_value_t * copy_params = NULL; + lrmd_key_value_t *head, *p; + + if (crm_is_target_alert(entry->select_kind == NULL ? crm_alert_kind_default : entry->select_kind, kind) == FALSE) { + crm_trace("Cannot sending '%s' alert to '%s' via '%s'(select_kind=%s)", kind, entry->recipient, entry->path, + entry->select_kind == NULL ? CRM_ALERT_KIND_DEFAULT : entry->select_kind_orig); + free(timestamp); + continue; + } + + if (crm_is_target_alert(entry->select_attribute_name, attribute_name) == FALSE) { + crm_trace("Cannot sending '%s' alert to '%s' via '%s'(select_attribute_name=%s attribute_name=%s)", kind, entry->recipient, entry->path, + entry->select_attribute_name_orig, attribute_name); + free(timestamp); + continue; + } + + crm_info("Sending '%s' alert to '%s' via '%s'", kind, entry->recipient, entry->path); + + rsc = g_hash_table_lookup(alert_info_cache, entry->id); + if (rsc == NULL) { + rsc = lrmd->cmds->get_rsc_info(lrmd, entry->id, 0); + if (!rsc) { + lrmd->cmds->register_rsc(lrmd, entry->id, PCMK_ALERT_CLASS, "pacemaker", entry->path, lrmd_opt_drop_recurring); + rsc = lrmd->cmds->get_rsc_info(lrmd, entry->id, 0); + if (!rsc) { + crm_err("Could not add alert %s : %s", entry->id, entry->path); + return -1; + } + /* cache the result */ + g_hash_table_insert(alert_info_cache, entry->id, rsc); + } + } + + /* Because there is a parameter to turn into every transmission, Copy a parameter. */ + head = params; + while (head) { + p = head->next; + copy_params = lrmd_key_value_add(copy_params, head->key, head->value); + head = p; + } + + operations++; + + copy_params = lrmd_key_value_add(copy_params, CRM_ALERT_KEY_PATH, entry->path); + copy_params = lrmd_set_alert_key_to_lrmd_params(copy_params, CRM_alert_recipient, entry->recipient); + copy_params = lrmd_set_alert_key_to_lrmd_params(copy_params, CRM_alert_node_sequence, crm_itoa(operations)); + copy_params = lrmd_set_alert_key_to_lrmd_params(copy_params, CRM_alert_timestamp, timestamp); + + lrmd_set_alert_envvar_to_lrmd_params(copy_params, entry); + + call_id = lrmd->cmds->exec_alert(lrmd, strdup(entry->id), entry->timeout, lrmd_opt_notify_orig_only, copy_params); + if (call_id <= 0) { + crm_err("Operation %s on %s failed: %d", "start", rsc->id, call_id); + } else { + crm_info("Operation %s on %s compete: %d", "start", rsc->id, call_id); + } + + free(timestamp); + } + + if (now) { + free(now); + } + + return call_id; +} + +static void +free_alert_info(gpointer value) +{ + lrmd_rsc_info_t *rsc_info = value; + + lrmd_free_rsc_info(rsc_info); +} + +static void +attrd_alert_lrm_op_callback(lrmd_event_data_t * op) +{ + CRM_CHECK(op != NULL, return); + + if (op->type == lrmd_event_disconnect) { + crm_info("Lost connection to LRMD service!"); + if (the_lrmd->cmds->is_connected(the_lrmd)) { + the_lrmd->cmds->disconnect(the_lrmd); + lrmd_api_delete(the_lrmd); + } + the_lrmd = NULL; + return; + } else if (op->type != lrmd_event_exec_complete) { + return; + } + + if (op->params != NULL) { + void *value_tmp1, *value_tmp2; + + value_tmp1 = g_hash_table_lookup(op->params, CRM_ALERT_KEY_PATH); + if (value_tmp1 != NULL) { + value_tmp2 = g_hash_table_lookup(op->params, CRM_ALERT_NODE_SEQUENCE); + if(op->rc == 0) { + crm_info("Alert %s (%s) complete", value_tmp2, value_tmp1); + } else { + crm_warn("Alert %s (%s) failed: %d", value_tmp2, value_tmp1, op->rc); + } + } + } +} + +int +attrd_send_alerts(lrmd_t *lrmd, const char *node, uint32_t nodeid, const char *attribute_name, const char *attribute_value, GListPtr alert_list) +{ + int ret = pcmk_ok; + lrmd_key_value_t *params = NULL; + + if (lrmd == NULL) { + lrmd = attrd_lrmd_connect(10, attrd_alert_lrm_op_callback); + if (lrmd == NULL) { + crm_warn("LRMD connection not active"); + return ret; + } + } + + crm_trace("LRMD connection active"); + + if (alert_info_cache == NULL) { + alert_info_cache = g_hash_table_new_full(crm_str_hash, + g_str_equal, NULL, free_alert_info); + } + + params = lrmd_set_alert_key_to_lrmd_params(params, CRM_alert_node, node); + params = lrmd_set_alert_key_to_lrmd_params(params, CRM_alert_nodeid, crm_itoa(nodeid)); + params = lrmd_set_alert_key_to_lrmd_params(params, CRM_alert_attribute_name, attribute_name); + params = lrmd_set_alert_key_to_lrmd_params(params, CRM_alert_attribute_value, attribute_value == NULL ? "null" : attribute_value); + + ret = exec_alerts(lrmd, "attribute", attribute_name, params, alert_list, alert_info_cache); + crm_trace("ret : %d, node : %s, nodeid: %s, name: %s, value : %s", + ret, node, crm_itoa(nodeid), attribute_name, attribute_value); + + if (params) { + lrmd_key_value_freeall(params); + } + + return ret; +} + +#if HAVE_ATOMIC_ATTRD +void +set_alert_attribute_value(GHashTable *t, attribute_value_t *v) +{ + attribute_value_t *a_v = NULL; + a_v = calloc(1, sizeof(attribute_value_t)); + CRM_ASSERT(a_v != NULL); + + a_v->nodeid = v->nodeid; + a_v->nodename = strdup(v->nodename); + + if (v->current != NULL) { + a_v->current = strdup(v->current); + } + + g_hash_table_replace(t, a_v->nodename, a_v); +} + +void +send_alert_attributes_value(attribute_t *a, GHashTable *t) +{ + int call_id = 0; + attribute_value_t *at = NULL; + GHashTableIter vIter; + + g_hash_table_iter_init(&vIter, t); + + while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & at)) { + call_id = attrd_send_alerts(the_lrmd, at->nodename, at->nodeid, a->id, at->current, crm_alert_list); + crm_trace("call_id : %d, nodename : %s, nodeid: %d, name: %s, value : %s", + call_id, at->nodename, at->nodeid, a->id, at->current); + } +} +#endif diff --git a/attrd/attrd_alerts.h b/attrd/attrd_alerts.h new file mode 100644 index 0000000000..6949c1f1bf --- /dev/null +++ b/attrd/attrd_alerts.h @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2015 Andrew Beekhof + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef ATTRD_ALERT__H +# define ATTRD_ALERT__H + +# include +# include +# include + +extern cib_t *the_cib; +extern lrmd_t *the_lrmd; +extern crm_trigger_t *attrd_config_read; + +lrmd_t *attrd_lrmd_connect(int max_retry, void callback(lrmd_event_data_t * op)); +gboolean attrd_read_options(gpointer user_data); +void attrd_cib_updated_cb(const char *event, xmlNode * msg); +GHashTable *get_meta_attrs_from_cib(xmlNode *basenode, crm_alert_entry_t *entry, guint *max_timeout); +void attrd_enable_alerts(const char *script, const char *target); +void attrd_alert_fini(void); +int attrd_send_alerts(lrmd_t *lrmd, const char *node, uint32_t nodeid, const char *attribute_name, const char *attribute_value, GListPtr alert_list); +#if HAVE_ATOMIC_ATTRD +void set_alert_attribute_value(GHashTable *t, attribute_value_t *v); +void send_alert_attributes_value(attribute_t *a, GHashTable *t); +#endif +#endif + diff --git a/attrd/commands.c b/attrd/commands.c index 98b4215b91..41b54bce6a 100644 --- a/attrd/commands.c +++ b/attrd/commands.c @@ -1,1183 +1,1166 @@ /* * Copyright (C) 2013 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include +#include "attrd_alerts.h" /* * Legacy attrd (all pre-1.1.11 Pacemaker versions, plus all versions when using * heartbeat, CMAN, or corosync-plugin stacks) is unversioned. * * With atomic attrd, each attrd will send ATTRD_PROTOCOL_VERSION with every * peer request and reply. Currently, there is no way to know the minimum * version supported by all peers, which limits its usefulness. * * Protocol Pacemaker Significant changes * -------- --------- ------------------- * 1 1.1.11 ATTRD_OP_UPDATE (F_ATTRD_ATTRIBUTE only), * ATTRD_OP_PEER_REMOVE, ATTRD_OP_REFRESH, ATTRD_OP_FLUSH, * ATTRD_OP_SYNC, ATTRD_OP_SYNC_RESPONSE * 1 1.1.13 ATTRD_OP_UPDATE (with F_ATTR_REGEX), ATTRD_OP_QUERY * 1 1.1.15 ATTRD_OP_UPDATE_BOTH, ATTRD_OP_UPDATE_DELAY * 2 1.1.17 ATTRD_OP_CLEAR_FAILCOUNT */ #define ATTRD_PROTOCOL_VERSION "2" int last_cib_op_done = 0; char *peer_writer = NULL; GHashTable *attributes = NULL; -typedef struct attribute_s { - char *uuid; /* TODO: Remove if at all possible */ - char *id; - char *set; - - GHashTable *values; - - int update; - int timeout_ms; - - /* TODO: refactor these three as a bitmask */ - bool changed; /* whether attribute value has changed since last write */ - bool unknown_peer_uuids; /* whether we know we're missing a peer uuid */ - gboolean is_private; /* whether to keep this attribute out of the CIB */ - - mainloop_timer_t *timer; - - char *user; - -} attribute_t; - -typedef struct attribute_value_s { - uint32_t nodeid; - gboolean is_remote; - char *nodename; - char *current; - char *requested; -} attribute_value_t; - - void write_attribute(attribute_t *a); void write_or_elect_attribute(attribute_t *a); void attrd_peer_update(crm_node_t *peer, xmlNode *xml, const char *host, bool filter); void attrd_peer_sync(crm_node_t *peer, xmlNode *xml); void attrd_peer_remove(const char *host, gboolean uncache, const char *source); static gboolean send_attrd_message(crm_node_t * node, xmlNode * data) { crm_xml_add(data, F_TYPE, T_ATTRD); crm_xml_add(data, F_ATTRD_IGNORE_LOCALLY, "atomic-version"); /* Tell older versions to ignore our messages */ crm_xml_add(data, F_ATTRD_VERSION, ATTRD_PROTOCOL_VERSION); crm_xml_add_int(data, F_ATTRD_WRITER, election_state(writer)); return send_cluster_message(node, crm_msg_attrd, data, TRUE); } static gboolean attribute_timer_cb(gpointer data) { attribute_t *a = data; crm_trace("Dampen interval expired for %s in state %d", a->id, election_state(writer)); write_or_elect_attribute(a); return FALSE; } static void free_attribute_value(gpointer data) { attribute_value_t *v = data; free(v->nodename); free(v->current); free(v->requested); free(v); } void free_attribute(gpointer data) { attribute_t *a = data; if(a) { free(a->id); free(a->set); free(a->uuid); free(a->user); mainloop_timer_del(a->timer); g_hash_table_destroy(a->values); free(a); } } static xmlNode * build_attribute_xml( xmlNode *parent, const char *name, const char *set, const char *uuid, unsigned int timeout_ms, const char *user, gboolean is_private, const char *peer, uint32_t peerid, const char *value) { xmlNode *xml = create_xml_node(parent, __FUNCTION__); crm_xml_add(xml, F_ATTRD_ATTRIBUTE, name); crm_xml_add(xml, F_ATTRD_SET, set); crm_xml_add(xml, F_ATTRD_KEY, uuid); crm_xml_add(xml, F_ATTRD_USER, user); crm_xml_add(xml, F_ATTRD_HOST, peer); crm_xml_add_int(xml, F_ATTRD_HOST_ID, peerid); crm_xml_add(xml, F_ATTRD_VALUE, value); crm_xml_add_int(xml, F_ATTRD_DAMPEN, timeout_ms/1000); crm_xml_add_int(xml, F_ATTRD_IS_PRIVATE, is_private); return xml; } static attribute_t * create_attribute(xmlNode *xml) { int dampen = 0; const char *value = crm_element_value(xml, F_ATTRD_DAMPEN); attribute_t *a = calloc(1, sizeof(attribute_t)); a->id = crm_element_value_copy(xml, F_ATTRD_ATTRIBUTE); a->set = crm_element_value_copy(xml, F_ATTRD_SET); a->uuid = crm_element_value_copy(xml, F_ATTRD_KEY); a->values = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, NULL, free_attribute_value); crm_element_value_int(xml, F_ATTRD_IS_PRIVATE, &a->is_private); #if ENABLE_ACL crm_trace("Performing all %s operations as user '%s'", a->id, a->user); a->user = crm_element_value_copy(xml, F_ATTRD_USER); #endif if(value) { dampen = crm_get_msec(value); crm_trace("Created attribute %s with delay %dms (%s)", a->id, dampen, value); } else { crm_trace("Created attribute %s with no delay", a->id); } if(dampen > 0) { a->timeout_ms = dampen; a->timer = mainloop_timer_add(a->id, a->timeout_ms, FALSE, attribute_timer_cb, a); } else if (dampen < 0) { crm_warn("Ignoring invalid delay %s for attribute %s", value, a->id); } g_hash_table_replace(attributes, a->id, a); return a; } /*! * \internal * \brief Respond to a client peer-remove request (i.e. propagate to all peers) * * \param[in] client_name Name of client that made request (for log messages) * \param[in] xml Root of request XML * * \return void */ void attrd_client_peer_remove(const char *client_name, xmlNode *xml) { const char *host = crm_element_value(xml, F_ATTRD_HOST); if (host) { crm_info("Client %s is requesting all values for %s be removed", client_name, host); send_attrd_message(NULL, xml); /* ends up at attrd_peer_message() */ } else { crm_info("Ignoring request by client %s to remove all peer values without specifying peer", client_name); } } /*! * \internal * \brief Respond to a client update request * * \param[in] xml Root of request XML * * \return void */ void attrd_client_update(xmlNode *xml) { attribute_t *a = NULL; char *host = crm_element_value_copy(xml, F_ATTRD_HOST); const char *attr = crm_element_value(xml, F_ATTRD_ATTRIBUTE); const char *value = crm_element_value(xml, F_ATTRD_VALUE); const char *regex = crm_element_value(xml, F_ATTRD_REGEX); /* If a regex was specified, broadcast a message for each match */ if ((attr == NULL) && regex) { GHashTableIter aIter; regex_t *r_patt = calloc(1, sizeof(regex_t)); crm_debug("Setting %s to %s", regex, value); if (regcomp(r_patt, regex, REG_EXTENDED)) { crm_err("Bad regex '%s' for update", regex); } else { g_hash_table_iter_init(&aIter, attributes); while (g_hash_table_iter_next(&aIter, (gpointer *) & attr, NULL)) { int status = regexec(r_patt, attr, 0, NULL, 0); if (status == 0) { crm_trace("Matched %s with %s", attr, regex); crm_xml_add(xml, F_ATTRD_ATTRIBUTE, attr); send_attrd_message(NULL, xml); } } } free(host); regfree(r_patt); free(r_patt); return; } else if (attr == NULL) { crm_err("Update request did not specify attribute or regular expression"); free(host); return; } if (host == NULL) { crm_trace("Inferring host"); host = strdup(attrd_cluster->uname); crm_xml_add(xml, F_ATTRD_HOST, host); crm_xml_add_int(xml, F_ATTRD_HOST_ID, attrd_cluster->nodeid); } a = g_hash_table_lookup(attributes, attr); /* If value was specified using ++ or += notation, expand to real value */ if (value) { if (attrd_value_needs_expansion(value)) { int int_value; attribute_value_t *v = NULL; if (a) { v = g_hash_table_lookup(a->values, host); } int_value = attrd_expand_value(value, (v? v->current : NULL)); crm_info("Expanded %s=%s to %d", attr, value, int_value); crm_xml_add_int(xml, F_ATTRD_VALUE, int_value); /* Replacing the value frees the previous memory, so re-query it */ value = crm_element_value(xml, F_ATTRD_VALUE); } } if ((peer_writer == NULL) && (election_state(writer) != election_in_progress)) { crm_info("Starting an election to determine the writer"); election_vote(writer); } crm_debug("Broadcasting %s[%s] = %s%s", attr, host, value, ((election_state(writer) == election_won)? " (writer)" : "")); free(host); send_attrd_message(NULL, xml); /* ends up at attrd_peer_message() */ } /*! * \internal * \brief Respond to client clear-failure request * * \param[in] xml Request XML */ void attrd_client_clear_failure(xmlNode *xml) { #if 0 /* @TODO This would be most efficient, but there is currently no way to * verify that all peers support the op. If that ever changes, we could * enable this code. */ if (all_peers_support_clear_failure) { /* Propagate to all peers (including ourselves). * This ends up at attrd_peer_message(). */ send_attrd_message(NULL, xml); return; } #endif const char *rsc = crm_element_value(xml, F_ATTRD_RESOURCE); const char *op = crm_element_value(xml, F_ATTRD_OPERATION); const char *interval_s = crm_element_value(xml, F_ATTRD_INTERVAL); /* Map this to an update */ crm_xml_add(xml, F_ATTRD_TASK, ATTRD_OP_UPDATE); /* Add regular expression matching desired attributes */ if (rsc) { char *pattern; if (op == NULL) { pattern = crm_strdup_printf(ATTRD_RE_CLEAR_ONE, rsc); } else { int interval = crm_get_interval(interval_s); pattern = crm_strdup_printf(ATTRD_RE_CLEAR_OP, rsc, op, interval); } crm_xml_add(xml, F_ATTRD_REGEX, pattern); free(pattern); } else { crm_xml_add(xml, F_ATTRD_REGEX, ATTRD_RE_CLEAR_ALL); } /* Make sure attribute and value are not set, so we delete via regex */ if (crm_element_value(xml, F_ATTRD_ATTRIBUTE)) { crm_xml_replace(xml, F_ATTRD_ATTRIBUTE, NULL); } if (crm_element_value(xml, F_ATTRD_VALUE)) { crm_xml_replace(xml, F_ATTRD_VALUE, NULL); } attrd_client_update(xml); } /*! * \internal * \brief Respond to a client refresh request (i.e. write out all attributes) * * \return void */ void attrd_client_refresh(void) { GHashTableIter iter; attribute_t *a = NULL; /* 'refresh' forces a write of the current value of all attributes * Cancel any existing timers, we're writing it NOW */ g_hash_table_iter_init(&iter, attributes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & a)) { mainloop_timer_stop(a->timer); } crm_info("Updating all attributes"); write_attributes(TRUE, FALSE); } /*! * \internal * \brief Build the XML reply to a client query * * param[in] attr Name of requested attribute * param[in] host Name of requested host (or NULL for all hosts) * * \return New XML reply * \note Caller is responsible for freeing the resulting XML */ static xmlNode *build_query_reply(const char *attr, const char *host) { xmlNode *reply = create_xml_node(NULL, __FUNCTION__); attribute_t *a; if (reply == NULL) { return NULL; } crm_xml_add(reply, F_TYPE, T_ATTRD); crm_xml_add(reply, F_ATTRD_VERSION, ATTRD_PROTOCOL_VERSION); /* If desired attribute exists, add its value(s) to the reply */ a = g_hash_table_lookup(attributes, attr); if (a) { attribute_value_t *v; xmlNode *host_value; crm_xml_add(reply, F_ATTRD_ATTRIBUTE, attr); /* Allow caller to use "localhost" to refer to local node */ if (safe_str_eq(host, "localhost")) { host = attrd_cluster->uname; crm_trace("Mapped localhost to %s", host); } /* If a specific node was requested, add its value */ if (host) { v = g_hash_table_lookup(a->values, host); host_value = create_xml_node(reply, XML_CIB_TAG_NODE); if (host_value == NULL) { free_xml(reply); return NULL; } crm_xml_add(host_value, F_ATTRD_HOST, host); crm_xml_add(host_value, F_ATTRD_VALUE, (v? v->current : NULL)); /* Otherwise, add all nodes' values */ } else { GHashTableIter iter; g_hash_table_iter_init(&iter, a->values); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &v)) { host_value = create_xml_node(reply, XML_CIB_TAG_NODE); if (host_value == NULL) { free_xml(reply); return NULL; } crm_xml_add(host_value, F_ATTRD_HOST, v->nodename); crm_xml_add(host_value, F_ATTRD_VALUE, v->current); } } } return reply; } /*! * \internal * \brief Respond to a client query * * \param[in] client Who queried us * \param[in] query Root of query XML * * \return void */ void attrd_client_query(crm_client_t *client, uint32_t id, uint32_t flags, xmlNode *query) { const char *attr; const char *origin = crm_element_value(query, F_ORIG); ssize_t rc; xmlNode *reply; if (origin == NULL) { origin = "unknown client"; } crm_debug("Query arrived from %s", origin); /* Request must specify attribute name to query */ attr = crm_element_value(query, F_ATTRD_ATTRIBUTE); if (attr == NULL) { crm_warn("Ignoring malformed query from %s (no attribute name given)", origin); return; } /* Build the XML reply */ reply = build_query_reply(attr, crm_element_value(query, F_ATTRD_HOST)); if (reply == NULL) { crm_err("Could not respond to query from %s: could not create XML reply", origin); return; } crm_log_xml_trace(reply, "Reply"); /* Send the reply to the client */ client->request_id = 0; if ((rc = crm_ipcs_send(client, id, reply, flags)) < 0) { crm_err("Could not respond to query from %s: %s (%d)", origin, pcmk_strerror(-rc), -rc); } free_xml(reply); } /*! * \internal * \brief Clear failure-related attributes * * \param[in] peer Peer that sent clear request * \param[in] xml Request XML */ static void attrd_peer_clear_failure(crm_node_t *peer, xmlNode *xml) { const char *rsc = crm_element_value(xml, F_ATTRD_RESOURCE); const char *host = crm_element_value(xml, F_ATTRD_HOST); const char *op = crm_element_value(xml, F_ATTRD_OPERATION); const char *interval_s = crm_element_value(xml, F_ATTRD_INTERVAL); int interval = crm_get_interval(interval_s); char *attr = NULL; GHashTableIter iter; regex_t regex; if (attrd_failure_regex(®ex, rsc, op, interval) != pcmk_ok) { crm_info("Ignoring invalid request to clear failures for %s", (rsc? rsc : "all resources")); return; } crm_xml_add(xml, F_ATTRD_TASK, ATTRD_OP_UPDATE); /* Make sure value is not set, so we delete */ if (crm_element_value(xml, F_ATTRD_VALUE)) { crm_xml_replace(xml, F_ATTRD_VALUE, NULL); } g_hash_table_iter_init(&iter, attributes); while (g_hash_table_iter_next(&iter, (gpointer *) &attr, NULL)) { if (regexec(®ex, attr, 0, NULL, 0) == 0) { crm_trace("Matched %s when clearing %s", attr, (rsc? rsc : "all resources")); crm_xml_add(xml, F_ATTRD_ATTRIBUTE, attr); attrd_peer_update(peer, xml, host, FALSE); } } regfree(®ex); } void attrd_peer_message(crm_node_t *peer, xmlNode *xml) { int peer_state = 0; const char *v = crm_element_value(xml, F_ATTRD_VERSION); const char *op = crm_element_value(xml, F_ATTRD_TASK); const char *election_op = crm_element_value(xml, F_CRM_TASK); const char *host = crm_element_value(xml, F_ATTRD_HOST); if(election_op) { enum election_result rc = 0; crm_xml_add(xml, F_CRM_HOST_FROM, peer->uname); rc = election_count_vote(writer, xml, TRUE); switch(rc) { case election_start: free(peer_writer); peer_writer = NULL; election_vote(writer); break; case election_lost: free(peer_writer); peer_writer = strdup(peer->uname); break; default: election_check(writer); break; } return; } else if(v == NULL) { /* From the non-atomic version */ if (safe_str_eq(op, ATTRD_OP_UPDATE)) { const char *name = crm_element_value(xml, F_ATTRD_ATTRIBUTE); crm_trace("Compatibility update of %s from %s", name, peer->uname); attrd_peer_update(peer, xml, host, FALSE); } else if (safe_str_eq(op, ATTRD_OP_FLUSH)) { const char *name = crm_element_value(xml, F_ATTRD_ATTRIBUTE); attribute_t *a = g_hash_table_lookup(attributes, name); if(a) { crm_trace("Compatibility write-out of %s for %s from %s", a->id, op, peer->uname); write_or_elect_attribute(a); } } else if (safe_str_eq(op, ATTRD_OP_REFRESH)) { GHashTableIter aIter; attribute_t *a = NULL; g_hash_table_iter_init(&aIter, attributes); while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { crm_trace("Compatibility write-out of %s for %s from %s", a->id, op, peer->uname); write_or_elect_attribute(a); } } } crm_element_value_int(xml, F_ATTRD_WRITER, &peer_state); if(election_state(writer) == election_won && peer_state == election_won && safe_str_neq(peer->uname, attrd_cluster->uname)) { crm_notice("Detected another attribute writer: %s", peer->uname); election_vote(writer); } else if(peer_state == election_won) { if(peer_writer == NULL) { peer_writer = strdup(peer->uname); crm_notice("Recorded attribute writer: %s", peer->uname); } else if(safe_str_neq(peer->uname, peer_writer)) { crm_notice("Recorded new attribute writer: %s (was %s)", peer->uname, peer_writer); free(peer_writer); peer_writer = strdup(peer->uname); } } if (safe_str_eq(op, ATTRD_OP_UPDATE) || safe_str_eq(op, ATTRD_OP_UPDATE_BOTH) || safe_str_eq(op, ATTRD_OP_UPDATE_DELAY)) { attrd_peer_update(peer, xml, host, FALSE); } else if (safe_str_eq(op, ATTRD_OP_SYNC)) { attrd_peer_sync(peer, xml); } else if (safe_str_eq(op, ATTRD_OP_PEER_REMOVE)) { attrd_peer_remove(host, TRUE, peer->uname); } else if (safe_str_eq(op, ATTRD_OP_CLEAR_FAILURE)) { /* It is not currently possible to receive this as a peer command, * but will be, if we one day enable propagating this operation. */ attrd_peer_clear_failure(peer, xml); } else if (safe_str_eq(op, ATTRD_OP_SYNC_RESPONSE) && safe_str_neq(peer->uname, attrd_cluster->uname)) { xmlNode *child = NULL; crm_info("Processing %s from %s", op, peer->uname); for (child = __xml_first_child(xml); child != NULL; child = __xml_next(child)) { host = crm_element_value(child, F_ATTRD_HOST); attrd_peer_update(peer, child, host, TRUE); } } } void attrd_peer_sync(crm_node_t *peer, xmlNode *xml) { GHashTableIter aIter; GHashTableIter vIter; attribute_t *a = NULL; attribute_value_t *v = NULL; xmlNode *sync = create_xml_node(NULL, __FUNCTION__); crm_xml_add(sync, F_ATTRD_TASK, ATTRD_OP_SYNC_RESPONSE); g_hash_table_iter_init(&aIter, attributes); while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { g_hash_table_iter_init(&vIter, a->values); while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) { crm_debug("Syncing %s[%s] = %s to %s", a->id, v->nodename, v->current, peer?peer->uname:"everyone"); build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms, a->user, a->is_private, v->nodename, v->nodeid, v->current); } } crm_debug("Syncing values to %s", peer?peer->uname:"everyone"); send_attrd_message(peer, sync); free_xml(sync); } /*! * \internal * \brief Remove all attributes and optionally peer cache entries for a node * * \param[in] host Name of node to purge * \param[in] uncache If TRUE, remove node from peer caches * \param[in] source Who requested removal (only used for logging) */ void attrd_peer_remove(const char *host, gboolean uncache, const char *source) { attribute_t *a = NULL; GHashTableIter aIter; CRM_CHECK(host != NULL, return); crm_notice("Removing all %s attributes for %s", host, source); g_hash_table_iter_init(&aIter, attributes); while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { if(g_hash_table_remove(a->values, host)) { crm_debug("Removed %s[%s] for %s", a->id, host, source); } } if (uncache) { crm_remote_peer_cache_remove(host); reap_crm_member(0, host); } } /*! * \internal * \brief Return host's hash table entry (creating one if needed) * * \param[in] values Hash table of values * \param[in] host Name of peer to look up * \param[in] xml XML describing the attribute * * \return Pointer to new or existing hash table entry */ static attribute_value_t * attrd_lookup_or_create_value(GHashTable *values, const char *host, xmlNode *xml) { attribute_value_t *v = g_hash_table_lookup(values, host); int is_remote = 0; crm_element_value_int(xml, F_ATTRD_IS_REMOTE, &is_remote); if (is_remote) { /* If we previously assumed this node was an unseen cluster node, * remove its entry from the cluster peer cache. */ crm_node_t *dup = crm_find_peer(0, host); if (dup && (dup->uuid == NULL)) { reap_crm_member(0, host); } /* Ensure this host is in the remote peer cache */ crm_remote_peer_cache_add(host); } if (v == NULL) { v = calloc(1, sizeof(attribute_value_t)); CRM_ASSERT(v != NULL); v->nodename = strdup(host); CRM_ASSERT(v->nodename != NULL); v->is_remote = is_remote; g_hash_table_replace(values, v->nodename, v); } return(v); } void attrd_peer_update(crm_node_t *peer, xmlNode *xml, const char *host, bool filter) { bool changed = FALSE; attribute_t *a; attribute_value_t *v = NULL; int dampen = 0; const char *op = crm_element_value(xml, F_ATTRD_TASK); const char *attr = crm_element_value(xml, F_ATTRD_ATTRIBUTE); const char *value = crm_element_value(xml, F_ATTRD_VALUE); const char *dvalue = crm_element_value(xml, F_ATTRD_DAMPEN); if (attr == NULL) { crm_warn("Peer update did not specify attribute"); return; } a = g_hash_table_lookup(attributes, attr); if(a == NULL) { if (op == NULL /* The xml children from an ATTRD_OP_SYNC_RESPONSE have no F_ATTRD_TASK */ || safe_str_eq(op, ATTRD_OP_UPDATE) || safe_str_eq(op, ATTRD_OP_UPDATE_BOTH)) { a = create_attribute(xml); } else { crm_warn("Update error (attribute %s not found)", attr); return; } } if (op == NULL /* The xml children from an ATTRD_OP_SYNC_RESPONSE have no F_ATTRD_TASK */ || safe_str_eq(op, ATTRD_OP_UPDATE_BOTH) || safe_str_eq(op, ATTRD_OP_UPDATE_DELAY)) { if (dvalue) { dampen = crm_get_msec(dvalue); if (dampen >= 0) { if (a->timeout_ms != dampen) { mainloop_timer_stop(a->timer); mainloop_timer_del(a->timer); a->timeout_ms = dampen; if (dampen > 0) { a->timer = mainloop_timer_add(a->id, a->timeout_ms, FALSE, attribute_timer_cb, a); crm_info("Update attribute %s with delay %dms (%s)", a->id, dampen, dvalue); } else { a->timer = NULL; crm_info("Update attribute %s with not delay", a->id); } //if dampen is changed, attrd writes in a current value immediately. write_or_elect_attribute(a); if (safe_str_eq(op, ATTRD_OP_UPDATE_DELAY)) { return; } } else { if (safe_str_eq(op, ATTRD_OP_UPDATE_DELAY)) { crm_trace("Unchanged attribute %s with delay %dms (%s).(ATTRD_OP_UPDATE_DELAY)", a->id, dampen, dvalue); return; } } } else { crm_warn("Update error (A positive number is necessary for delay parameter. attribute %s : %dms (%s))", a->id, dampen, dvalue); return; } } else { crm_warn("Update error (delay parameter is necessary for the update of the attribute %s)", a->id); return; } } if(host == NULL) { GHashTableIter vIter; g_hash_table_iter_init(&vIter, a->values); crm_debug("Setting %s for all hosts to %s", attr, value); xml_remove_prop(xml, F_ATTRD_HOST_ID); while (g_hash_table_iter_next(&vIter, (gpointer *) & host, NULL)) { attrd_peer_update(peer, xml, host, filter); } return; } v = attrd_lookup_or_create_value(a->values, host, xml); if(filter && safe_str_neq(v->current, value) && safe_str_eq(host, attrd_cluster->uname)) { xmlNode *sync = create_xml_node(NULL, __FUNCTION__); crm_notice("%s[%s]: local value '%s' takes priority over '%s' from %s", a->id, host, v->current, value, peer->uname); crm_xml_add(sync, F_ATTRD_TASK, ATTRD_OP_SYNC_RESPONSE); v = g_hash_table_lookup(a->values, host); build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms, a->user, a->is_private, v->nodename, v->nodeid, v->current); crm_xml_add_int(sync, F_ATTRD_WRITER, election_state(writer)); send_attrd_message(peer, sync); free_xml(sync); } else if(safe_str_neq(v->current, value)) { crm_info("Setting %s[%s]: %s -> %s from %s", attr, host, v->current, value, peer->uname); free(v->current); if(value) { v->current = strdup(value); } else { v->current = NULL; } changed = TRUE; } else { crm_trace("Unchanged %s[%s] from %s is %s", attr, host, peer->uname, value); } a->changed |= changed; if(changed) { if(a->timer) { crm_trace("Delayed write out (%dms) for %s", a->timeout_ms, a->id); mainloop_timer_start(a->timer); } else { write_or_elect_attribute(a); } } /* If this is a cluster node whose node ID we are learning, remember it */ if ((v->nodeid == 0) && (v->is_remote == FALSE) && (crm_element_value_int(xml, F_ATTRD_HOST_ID, (int*)&v->nodeid) == 0)) { crm_node_t *known_peer = crm_get_peer(v->nodeid, host); crm_trace("We know %s's node id now: %s", known_peer->uname, known_peer->uuid); if (election_state(writer) == election_won) { write_attributes(FALSE, TRUE); return; } } } void write_or_elect_attribute(attribute_t *a) { enum election_result rc = election_state(writer); if(rc == election_won) { write_attribute(a); } else if(rc == election_in_progress) { crm_trace("Election in progress to determine who will write out %s", a->id); } else if(peer_writer == NULL) { crm_info("Starting an election to determine who will write out %s", a->id); election_vote(writer); } else { crm_trace("%s will write out %s, we are in state %d", peer_writer, a->id, rc); } } gboolean attrd_election_cb(gpointer user_data) { crm_trace("Election complete"); free(peer_writer); peer_writer = strdup(attrd_cluster->uname); /* Update the peers after an election */ attrd_peer_sync(NULL, NULL); /* Update the CIB after an election */ write_attributes(TRUE, FALSE); return FALSE; } void attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *data) { if ((kind == crm_status_nstate) || (kind == crm_status_rstate)) { if (safe_str_eq(peer->state, CRM_NODE_MEMBER)) { /* If we're the writer, send new peers a list of all attributes * (unless it's a remote node, which doesn't run its own attrd) */ if ((election_state(writer) == election_won) && !is_set(peer->flags, crm_remote_node)) { attrd_peer_sync(peer, NULL); } } else { /* Remove all attribute values associated with lost nodes */ attrd_peer_remove(peer->uname, FALSE, "peer loss"); if (peer_writer && safe_str_eq(peer->uname, peer_writer)) { free(peer_writer); peer_writer = NULL; crm_notice("Lost attribute writer %s", peer->uname); } } } } static void attrd_cib_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { int level = LOG_ERR; GHashTableIter iter; const char *peer = NULL; attribute_value_t *v = NULL; char *name = user_data; attribute_t *a = g_hash_table_lookup(attributes, name); if(a == NULL) { crm_info("Attribute %s no longer exists", name); goto done; } a->update = 0; if (rc == pcmk_ok && call_id < 0) { rc = call_id; } switch (rc) { case pcmk_ok: level = LOG_INFO; last_cib_op_done = call_id; break; case -pcmk_err_diff_failed: /* When an attr changes while the CIB is syncing */ case -ETIME: /* When an attr changes while there is a DC election */ case -ENXIO: /* When an attr changes while the CIB is syncing a * newer config from a node that just came up */ level = LOG_WARNING; break; } do_crm_log(level, "Update %d for %s: %s (%d)", call_id, name, pcmk_strerror(rc), rc); g_hash_table_iter_init(&iter, a->values); while (g_hash_table_iter_next(&iter, (gpointer *) & peer, (gpointer *) & v)) { do_crm_log(level, "Update %d for %s[%s]=%s: %s (%d)", call_id, a->id, peer, v->requested, pcmk_strerror(rc), rc); free(v->requested); v->requested = NULL; if (rc != pcmk_ok) { a->changed = TRUE; /* Attempt write out again */ } } done: if(a && a->changed && election_state(writer) == election_won) { write_attribute(a); } } void write_attributes(bool all, bool peer_discovered) { GHashTableIter iter; attribute_t *a = NULL; g_hash_table_iter_init(&iter, attributes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & a)) { if (peer_discovered && a->unknown_peer_uuids) { /* a new peer uuid has been discovered, try writing this attribute again. */ a->changed = TRUE; } if(all || a->changed) { write_attribute(a); } else { crm_debug("Skipping unchanged attribute %s", a->id); } } } static void build_update_element(xmlNode *parent, attribute_t *a, const char *nodeid, const char *value) { const char *set = NULL; xmlNode *xml_obj = NULL; xml_obj = create_xml_node(parent, XML_CIB_TAG_STATE); crm_xml_add(xml_obj, XML_ATTR_ID, nodeid); xml_obj = create_xml_node(xml_obj, XML_TAG_TRANSIENT_NODEATTRS); crm_xml_add(xml_obj, XML_ATTR_ID, nodeid); xml_obj = create_xml_node(xml_obj, XML_TAG_ATTR_SETS); if (a->set) { crm_xml_set_id(xml_obj, "%s", a->set); } else { crm_xml_set_id(xml_obj, "%s-%s", XML_CIB_TAG_STATUS, nodeid); } set = ID(xml_obj); xml_obj = create_xml_node(xml_obj, XML_CIB_TAG_NVPAIR); if (a->uuid) { crm_xml_set_id(xml_obj, "%s", a->uuid); } else { crm_xml_set_id(xml_obj, "%s-%s", set, a->id); } crm_xml_add(xml_obj, XML_NVPAIR_ATTR_NAME, a->id); if(value) { crm_xml_add(xml_obj, XML_NVPAIR_ATTR_VALUE, value); } else { crm_xml_add(xml_obj, XML_NVPAIR_ATTR_VALUE, ""); crm_xml_add(xml_obj, "__delete__", XML_NVPAIR_ATTR_VALUE); } } void write_attribute(attribute_t *a) { int private_updates = 0, cib_updates = 0; xmlNode *xml_top = NULL; attribute_value_t *v = NULL; GHashTableIter iter; enum cib_call_options flags = cib_quorum_override; + GHashTable *alert_attribute_value = NULL; if (a == NULL) { return; } /* If this attribute will be written to the CIB ... */ if (!a->is_private) { /* Defer the write if now's not a good time */ if (the_cib == NULL) { crm_info("Write out of '%s' delayed: cib not connected", a->id); return; } else if (a->update && (a->update < last_cib_op_done)) { crm_info("Write out of '%s' continuing: update %d considered lost", a->id, a->update); } else if (a->update) { crm_info("Write out of '%s' delayed: update %d in progress", a->id, a->update); return; } else if (mainloop_timer_running(a->timer)) { crm_info("Write out of '%s' delayed: timer is running", a->id); return; } /* Initialize the status update XML */ xml_top = create_xml_node(NULL, XML_CIB_TAG_STATUS); } /* Attribute will be written shortly, so clear changed flag */ a->changed = FALSE; /* We will check all peers' uuids shortly, so initialize this to false */ a->unknown_peer_uuids = FALSE; + /* Make the table for the attribute trap */ + alert_attribute_value = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, NULL, free_attribute_value);; + /* Iterate over each peer value of this attribute */ g_hash_table_iter_init(&iter, a->values); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & v)) { crm_node_t *peer = crm_get_peer_full(v->nodeid, v->nodename, CRM_GET_PEER_ANY); /* If the value's peer info does not correspond to a peer, ignore it */ if (peer == NULL) { crm_notice("Update error (peer not found): %s[%s]=%s failed (host=%p)", v->nodename, a->id, v->current, peer); continue; } /* If we're just learning the peer's node id, remember it */ if (peer->id && (v->nodeid == 0)) { crm_trace("Updating value's nodeid"); v->nodeid = peer->id; } /* If this is a private attribute, no update needs to be sent */ if (a->is_private) { private_updates++; continue; } /* If the peer is found, but its uuid is unknown, defer write */ if (peer->uuid == NULL) { a->unknown_peer_uuids = TRUE; crm_notice("Update error (unknown peer uuid, retry will be attempted once uuid is discovered): %s[%s]=%s failed (host=%p)", v->nodename, a->id, v->current, peer); continue; } /* Add this value to status update XML */ crm_debug("Update: %s[%s]=%s (%s %u %u %s)", v->nodename, a->id, v->current, peer->uuid, peer->id, v->nodeid, peer->uname); build_update_element(xml_top, a, peer->uuid, v->current); cib_updates++; + /* Preservation of the attribute to transmit alert */ + set_alert_attribute_value(alert_attribute_value, v); + free(v->requested); v->requested = NULL; if (v->current) { v->requested = strdup(v->current); } else { /* Older attrd versions don't know about the cib_mixed_update * flag so make sure it goes to the local cib which does */ flags |= cib_mixed_update|cib_scope_local; } } if (private_updates) { crm_info("Processed %d private change%s for %s, id=%s, set=%s", private_updates, ((private_updates == 1)? "" : "s"), a->id, (a->uuid? a->uuid : ""), a->set); } if (cib_updates) { crm_log_xml_trace(xml_top, __FUNCTION__); a->update = cib_internal_op(the_cib, CIB_OP_MODIFY, NULL, XML_CIB_TAG_STATUS, xml_top, NULL, flags, a->user); crm_info("Sent update %d with %d changes for %s, id=%s, set=%s", a->update, cib_updates, a->id, (a->uuid? a->uuid : ""), a->set); the_cib->cmds->register_callback_full(the_cib, a->update, 120, FALSE, strdup(a->id), "attrd_cib_callback", attrd_cib_callback, free); + /* Transmit alert of the attribute */ + send_alert_attributes_value(a, alert_attribute_value); + } + + g_hash_table_destroy(alert_attribute_value); free_xml(xml_top); } diff --git a/attrd/internal.h b/attrd/internal.h index b8b53a142d..225dd9b460 100644 --- a/attrd/internal.h +++ b/attrd/internal.h @@ -1,41 +1,68 @@ /* * Copyright (C) 2013-2017 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include +typedef struct attribute_s { + char *uuid; /* TODO: Remove if at all possible */ + char *id; + char *set; + GHashTable *values; + int update; + int timeout_ms; + + /* TODO: refactor these three as a bitmask */ + bool changed; /* whether attribute value has changed since last write */ + bool unknown_peer_uuids; /* whether we know we're missing a peer uuid */ + gboolean is_private; /* whether to keep this attribute out of the CIB */ + + mainloop_timer_t *timer; + + char *user; + +} attribute_t; + +typedef struct attribute_value_s { + uint32_t nodeid; + gboolean is_remote; + char *nodename; + char *current; + char *requested; +} attribute_value_t; + cib_t *the_cib; crm_cluster_t *attrd_cluster; GHashTable *attributes; election_t *writer; int attrd_error; #define attrd_send_ack(client, id, flags) \ crm_ipcs_send_ack((client), (id), (flags), "ack", __FUNCTION__, __LINE__) void write_attributes(bool all, bool peer_discovered); void attrd_peer_message(crm_node_t *client, xmlNode *msg); void attrd_client_peer_remove(const char *client_name, xmlNode *xml); void attrd_client_clear_failure(xmlNode *xml); void attrd_client_update(xmlNode *xml); void attrd_client_refresh(void); void attrd_client_query(crm_client_t *client, uint32_t id, uint32_t flags, xmlNode *query); void free_attribute(gpointer data); gboolean attrd_election_cb(gpointer user_data); void attrd_peer_change_cb(enum crm_status_type type, crm_node_t *peer, const void *data); diff --git a/attrd/legacy.c b/attrd/legacy.c index 64fef6e020..7805a10b30 100644 --- a/attrd/legacy.c +++ b/attrd/legacy.c @@ -1,1218 +1,1248 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include +#include #include #include #include +#include "attrd_alerts.h" + #define OPTARGS "hV" #if SUPPORT_HEARTBEAT ll_cluster_t *attrd_cluster_conn; #endif char *attrd_uname = NULL; char *attrd_uuid = NULL; +uint32_t attrd_nodeid = 0; GHashTable *attr_hash = NULL; -cib_t *cib_conn = NULL; +cib_t *the_cib = NULL; +lrmd_t *the_lrmd = NULL; +crm_trigger_t *attrd_config_read = NULL; /* Convenience macro for registering a CIB callback. - * Check cib_conn != NULL before using. + * Check the_cib != NULL before using. */ #define register_cib_callback(call_id, data, fn, free_fn) \ - cib_conn->cmds->register_callback_full(cib_conn, call_id, 120, FALSE, \ + the_cib->cmds->register_callback_full(the_cib, call_id, 120, FALSE, \ data, #fn, fn, free_fn) typedef struct attr_hash_entry_s { char *uuid; char *id; char *set; char *section; char *value; char *stored_value; int timeout; char *dampen; guint timer_id; char *user; } attr_hash_entry_t; void attrd_local_callback(xmlNode * msg); gboolean attrd_timer_callback(void *user_data); gboolean attrd_trigger_update(attr_hash_entry_t * hash_entry); void attrd_perform_update(attr_hash_entry_t * hash_entry); static void update_local_attr(xmlNode *msg, attr_hash_entry_t *hash_entry); static void free_hash_entry(gpointer data) { attr_hash_entry_t *entry = data; if (entry == NULL) { return; } free(entry->id); free(entry->set); free(entry->dampen); free(entry->section); free(entry->uuid); free(entry->value); free(entry->stored_value); free(entry->user); free(entry); } /* Exit code means? */ static int32_t attrd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; crm_client_t *client = crm_client_get(c); xmlNode *msg = crm_ipcs_recv(client, data, size, &id, &flags); crm_ipcs_send_ack(client, id, flags, "ack", __FUNCTION__, __LINE__); if (msg == NULL) { crm_debug("No msg from %d (%p)", crm_ipcs_client_pid(c), c); return 0; } #if ENABLE_ACL CRM_ASSERT(client->user != NULL); crm_acl_get_set_user(msg, F_ATTRD_USER, client->user); #endif crm_trace("Processing msg from %d (%p)", crm_ipcs_client_pid(c), c); crm_log_xml_trace(msg, __FUNCTION__); attrd_local_callback(msg); free_xml(msg); return 0; } static void usage(const char *cmd, int exit_status) { FILE *stream; stream = exit_status ? stderr : stdout; fprintf(stream, "usage: %s [-srkh] [-c configure file]\n", cmd); /* fprintf(stream, "\t-d\tsets debug level\n"); */ /* fprintf(stream, "\t-s\tgets daemon status\n"); */ /* fprintf(stream, "\t-r\trestarts daemon\n"); */ /* fprintf(stream, "\t-k\tstops daemon\n"); */ /* fprintf(stream, "\t-h\thelp message\n"); */ fflush(stream); crm_exit(exit_status); } static void stop_attrd_timer(attr_hash_entry_t * hash_entry) { if (hash_entry != NULL && hash_entry->timer_id != 0) { crm_trace("Stopping %s timer", hash_entry->id); g_source_remove(hash_entry->timer_id); hash_entry->timer_id = 0; } } static void log_hash_entry(int level, attr_hash_entry_t * entry, const char *text) { do_crm_log(level, "%s: Set: %s, Name: %s, Value: %s, Timeout: %s", text, entry->section, entry->id, entry->value, entry->dampen); } static attr_hash_entry_t * find_hash_entry(xmlNode * msg) { const char *value = NULL; const char *attr = crm_element_value(msg, F_ATTRD_ATTRIBUTE); attr_hash_entry_t *hash_entry = NULL; if (attr == NULL) { crm_info("Ignoring message with no attribute name"); return NULL; } hash_entry = g_hash_table_lookup(attr_hash, attr); if (hash_entry == NULL) { /* create one and add it */ crm_info("Creating hash entry for %s", attr); hash_entry = calloc(1, sizeof(attr_hash_entry_t)); hash_entry->id = strdup(attr); g_hash_table_insert(attr_hash, hash_entry->id, hash_entry); hash_entry = g_hash_table_lookup(attr_hash, attr); CRM_CHECK(hash_entry != NULL, return NULL); } value = crm_element_value(msg, F_ATTRD_SET); if (value != NULL) { free(hash_entry->set); hash_entry->set = strdup(value); crm_debug("\t%s->set: %s", attr, value); } value = crm_element_value(msg, F_ATTRD_SECTION); if (value == NULL) { value = XML_CIB_TAG_STATUS; } free(hash_entry->section); hash_entry->section = strdup(value); crm_trace("\t%s->section: %s", attr, value); value = crm_element_value(msg, F_ATTRD_DAMPEN); if (value != NULL) { free(hash_entry->dampen); hash_entry->dampen = strdup(value); hash_entry->timeout = crm_get_msec(value); crm_trace("\t%s->timeout: %s", attr, value); } #if ENABLE_ACL free(hash_entry->user); hash_entry->user = NULL; value = crm_element_value(msg, F_ATTRD_USER); if (value != NULL) { hash_entry->user = strdup(value); crm_trace("\t%s->user: %s", attr, value); } #endif log_hash_entry(LOG_DEBUG_2, hash_entry, "Found (and updated) entry:"); return hash_entry; } /*! * \internal * \brief Clear failure-related attributes for local node * * \param[in] xml XML of ATTRD_OP_CLEAR_FAILURE request */ static void local_clear_failure(xmlNode *xml) { const char *rsc = crm_element_value(xml, F_ATTRD_RESOURCE); const char *what = rsc? rsc : "all resources"; const char *op = crm_element_value(xml, F_ATTRD_OPERATION); const char *interval_s = crm_element_value(xml, F_ATTRD_INTERVAL); int interval = crm_get_interval(interval_s); regex_t regex; GHashTableIter iter; attr_hash_entry_t *hash_entry = NULL; if (attrd_failure_regex(®ex, rsc, op, interval) != pcmk_ok) { crm_info("Ignoring invalid request to clear %s", (rsc? rsc : "all resources")); return; } crm_debug("Clearing %s locally", what); /* Make sure value is not set, so we delete */ if (crm_element_value(xml, F_ATTRD_VALUE)) { crm_xml_replace(xml, F_ATTRD_VALUE, NULL); } g_hash_table_iter_init(&iter, attr_hash); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &hash_entry)) { if (regexec(®ex, hash_entry->id, 0, NULL, 0) == 0) { crm_trace("Matched %s when clearing %s", hash_entry->id, what); update_local_attr(xml, hash_entry); } } } static void remote_clear_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { if (rc == 0) { crm_debug("Successfully cleared failures using %s", user_data); } else { crm_notice("Failed to clear failures: %s " CRM_XS " call=%d xpath=%s rc=%d", pcmk_strerror(rc), call_id, user_data, rc); } } /* xpath component to match an id attribute (format takes remote node name) */ #define XPATH_ID "[@" XML_ATTR_UUID "='%s']" /* Define the start of an xpath to match a remote node transient attribute * (argument must be either an empty string to match for all remote nodes, * or XPATH_ID to match for a single remote node) */ #define XPATH_REMOTE_ATTR(x) "/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS \ "/" XML_CIB_TAG_STATE "[@" XML_NODE_IS_REMOTE "='true']" x \ "/" XML_TAG_TRANSIENT_NODEATTRS "/" XML_TAG_ATTR_SETS "/" XML_CIB_TAG_NVPAIR /* xpath component to match an attribute name exactly */ #define XPATH_NAME_IS(x) "@" XML_NVPAIR_ATTR_NAME "='" x "'" /* xpath component to match an attribute name by prefix */ #define XPATH_NAME_START(x) "starts-with(@" XML_NVPAIR_ATTR_NAME ", '" x "')" /* xpath ending to clear all resources */ #define XPATH_CLEAR_ALL \ "[" XPATH_NAME_START(CRM_FAIL_COUNT_PREFIX "-") \ " or " XPATH_NAME_START(CRM_LAST_FAILURE_PREFIX "-") "]" /* xpath ending to clear all operations for one resource * (format takes resource name x 4) * * @COMPAT attributes set < 1.1.17: * also match older attributes that do not have the operation part */ #define XPATH_CLEAR_ONE \ "[" XPATH_NAME_IS(CRM_FAIL_COUNT_PREFIX "-%s") \ " or " XPATH_NAME_IS(CRM_LAST_FAILURE_PREFIX "-%s") \ " or " XPATH_NAME_START(CRM_FAIL_COUNT_PREFIX "-%s#") \ " or " XPATH_NAME_START(CRM_LAST_FAILURE_PREFIX "-%s#") "]" /* xpath ending to clear one operation for one resource * (format takes resource name x 2, resource name + operation + interval x 2) * * @COMPAT attributes set < 1.1.17: * also match older attributes that do not have the operation part */ #define XPATH_CLEAR_OP \ "[" XPATH_NAME_IS(CRM_FAIL_COUNT_PREFIX "-%s") \ " or " XPATH_NAME_IS(CRM_LAST_FAILURE_PREFIX "-%s") \ " or " XPATH_NAME_IS(CRM_FAIL_COUNT_PREFIX "-%s#%s_%d") \ " or " XPATH_NAME_IS(CRM_LAST_FAILURE_PREFIX "-%s#%s_%d") "]" /*! * \internal * \brief Clear failure-related attributes for Pacemaker Remote node(s) * * \param[in] xml XML of ATTRD_OP_CLEAR_FAILURE request */ static void remote_clear_failure(xmlNode *xml) { const char *rsc = crm_element_value(xml, F_ATTRD_RESOURCE); const char *host = crm_element_value(xml, F_ATTRD_HOST); const char *op = crm_element_value(xml, F_ATTRD_OPERATION); int rc = pcmk_ok; char *xpath; - if (cib_conn == NULL) { + if (the_cib == NULL) { crm_info("Ignoring request to clear %s on %s because not connected to CIB", (rsc? rsc : "all resources"), (host? host: "all remote nodes")); return; } /* Build an xpath to clear appropriate attributes */ if (rsc == NULL) { /* No resource specified, clear all resources */ if (host == NULL) { xpath = crm_strdup_printf(XPATH_REMOTE_ATTR("") XPATH_CLEAR_ALL); } else { xpath = crm_strdup_printf(XPATH_REMOTE_ATTR(XPATH_ID) XPATH_CLEAR_ALL, host); } } else if (op == NULL) { /* Resource but no operation specified, clear all operations */ if (host == NULL) { xpath = crm_strdup_printf(XPATH_REMOTE_ATTR("") XPATH_CLEAR_ONE, rsc, rsc, rsc, rsc); } else { xpath = crm_strdup_printf(XPATH_REMOTE_ATTR(XPATH_ID) XPATH_CLEAR_ONE, host, rsc, rsc, rsc, rsc); } } else { /* Resource and operation specified */ const char *interval_s = crm_element_value(xml, F_ATTRD_INTERVAL); int interval = crm_get_interval(interval_s); if (host == NULL) { xpath = crm_strdup_printf(XPATH_REMOTE_ATTR("") XPATH_CLEAR_OP, rsc, rsc, rsc, op, interval, rsc, op, interval); } else { xpath = crm_strdup_printf(XPATH_REMOTE_ATTR(XPATH_ID) XPATH_CLEAR_OP, host, rsc, rsc, rsc, op, interval, rsc, op, interval); } } crm_trace("Clearing attributes matching %s", xpath); - rc = cib_conn->cmds->delete(cib_conn, xpath, NULL, cib_xpath|cib_multiple); + rc = the_cib->cmds->delete(the_cib, xpath, NULL, cib_xpath|cib_multiple); register_cib_callback(rc, xpath, remote_clear_callback, free); } static void process_xml_request(xmlNode *xml) { attr_hash_entry_t *hash_entry = NULL; const char *from = crm_element_value(xml, F_ORIG); const char *op = crm_element_value(xml, F_ATTRD_TASK); const char *host = crm_element_value(xml, F_ATTRD_HOST); const char *ignore = crm_element_value(xml, F_ATTRD_IGNORE_LOCALLY); if (host && safe_str_eq(host, attrd_uname)) { crm_info("%s relayed from %s", (op? op : "Request"), from); attrd_local_callback(xml); } else if (safe_str_eq(op, ATTRD_OP_PEER_REMOVE)) { CRM_CHECK(host != NULL, return); crm_debug("Removing %s from peer caches for %s", host, from); crm_remote_peer_cache_remove(host); reap_crm_member(0, host); } else if (safe_str_eq(op, ATTRD_OP_CLEAR_FAILURE)) { local_clear_failure(xml); } else if ((ignore == NULL) || safe_str_neq(from, attrd_uname)) { crm_trace("%s message from %s", op, from); hash_entry = find_hash_entry(xml); stop_attrd_timer(hash_entry); attrd_perform_update(hash_entry); } } #if SUPPORT_HEARTBEAT static void attrd_ha_connection_destroy(gpointer user_data) { crm_trace("Invoked"); if (attrd_shutting_down()) { /* we signed out, so this is expected */ crm_info("Heartbeat disconnection complete"); return; } crm_crit("Lost connection to heartbeat service!"); if (attrd_mainloop_running()) { attrd_quit_mainloop(); return; } crm_exit(pcmk_ok); } static void attrd_ha_callback(HA_Message * msg, void *private_data) { xmlNode *xml = convert_ha_message(NULL, msg, __FUNCTION__); process_xml_request(xml); free_xml(xml); } #endif #if SUPPORT_COROSYNC static void attrd_cs_dispatch(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { uint32_t kind = 0; xmlNode *xml = NULL; const char *from = NULL; char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); if(data == NULL) { return; } if (kind == crm_class_cluster) { xml = string2xml(data); if (xml == NULL) { crm_err("Bad message received: '%.120s'", data); } } if (xml != NULL) { /* crm_xml_add_int(xml, F_SEQ, wrapper->id); */ crm_xml_add(xml, F_ORIG, from); process_xml_request(xml); free_xml(xml); } free(data); } static void attrd_cs_destroy(gpointer unused) { if (attrd_shutting_down()) { /* we signed out, so this is expected */ crm_info("Corosync disconnection complete"); return; } crm_crit("Lost connection to Corosync service!"); if (attrd_mainloop_running()) { attrd_quit_mainloop(); return; } crm_exit(EINVAL); } #endif static void attrd_cib_connection_destroy(gpointer user_data) { cib_t *conn = user_data; conn->cmds->signoff(conn); /* Ensure IPC is cleaned up */ if (attrd_shutting_down()) { crm_info("Connection to the CIB terminated..."); } else { /* eventually this will trigger a reconnect, not a shutdown */ crm_err("Connection to the CIB terminated..."); crm_exit(ENOTCONN); } return; } static void update_for_hash_entry(gpointer key, gpointer value, gpointer user_data) { attr_hash_entry_t *entry = value; if (entry->value != NULL || entry->stored_value != NULL) { attrd_timer_callback(value); } } static void local_update_for_hash_entry(gpointer key, gpointer value, gpointer user_data) { attr_hash_entry_t *entry = value; if (entry->timer_id == 0) { crm_trace("Performing local-only update after replace for %s", entry->id); attrd_perform_update(entry); /* } else { * just let the timer expire and attrd_timer_callback() will do the right thing */ } } static void do_cib_replaced(const char *event, xmlNode * msg) { crm_info("Updating all attributes after %s event", event); g_hash_table_foreach(attr_hash, local_update_for_hash_entry, NULL); } static gboolean cib_connect(void *user_data) { static int attempts = 1; static int max_retry = 20; gboolean was_err = FALSE; static cib_t *local_conn = NULL; if (local_conn == NULL) { local_conn = cib_new(); } if (was_err == FALSE) { int rc = -ENOTCONN; if (attempts < max_retry) { crm_debug("CIB signon attempt %d", attempts); rc = local_conn->cmds->signon(local_conn, T_ATTRD, cib_command); } if (rc != pcmk_ok && attempts > max_retry) { crm_err("Signon to CIB failed: %s", pcmk_strerror(rc)); was_err = TRUE; } else if (rc != pcmk_ok) { attempts++; return TRUE; } } crm_info("Connected to the CIB after %d signon attempts", attempts); if (was_err == FALSE) { int rc = local_conn->cmds->set_connection_dnotify(local_conn, attrd_cib_connection_destroy); if (rc != pcmk_ok) { crm_err("Could not set dnotify callback"); was_err = TRUE; } } if (was_err == FALSE) { if (pcmk_ok != local_conn->cmds->add_notify_callback(local_conn, T_CIB_REPLACE_NOTIFY, do_cib_replaced)) { crm_err("Could not set CIB notification callback"); was_err = TRUE; } + if (was_err == FALSE) { + if (pcmk_ok != local_conn->cmds->add_notify_callback(local_conn, T_CIB_DIFF_NOTIFY, attrd_cib_updated_cb)) { + crm_err("Could not set CIB notification callback (update)"); + was_err = TRUE; + } + + } + attrd_config_read = mainloop_add_trigger(G_PRIORITY_HIGH, attrd_read_options, NULL); + + /* Reading of cib(Alert section) after the start */ + mainloop_set_trigger(attrd_config_read); } if (was_err) { crm_err("Aborting startup"); crm_exit(DAEMON_RESPAWN_STOP); } - cib_conn = local_conn; + the_cib = local_conn; crm_info("Sending full refresh now that we're connected to the cib"); g_hash_table_foreach(attr_hash, local_update_for_hash_entry, NULL); return FALSE; } int main(int argc, char **argv) { int flag = 0; int argerr = 0; crm_cluster_t cluster; gboolean was_err = FALSE; qb_ipcs_connection_t *c = NULL; qb_ipcs_service_t *ipcs = NULL; crm_log_init(T_ATTRD, LOG_NOTICE, TRUE, FALSE, argc, argv, FALSE); mainloop_add_signal(SIGTERM, attrd_shutdown); while ((flag = getopt(argc, argv, OPTARGS)) != EOF) { switch (flag) { case 'V': crm_bump_log_level(argc, argv); break; case 'h': /* Help message */ usage(T_ATTRD, EX_OK); break; default: ++argerr; break; } } if (optind > argc) { ++argerr; } if (argerr) { usage(T_ATTRD, EX_USAGE); } attr_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_hash_entry); crm_info("Starting up"); if (was_err == FALSE) { #if SUPPORT_COROSYNC if (is_openais_cluster()) { cluster.destroy = attrd_cs_destroy; cluster.cpg.cpg_deliver_fn = attrd_cs_dispatch; cluster.cpg.cpg_confchg_fn = pcmk_cpg_membership; } #endif #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { cluster.hb_conn = NULL; cluster.hb_dispatch = attrd_ha_callback; cluster.destroy = attrd_ha_connection_destroy; } #endif if (FALSE == crm_cluster_connect(&cluster)) { crm_err("HA Signon failed"); was_err = TRUE; } attrd_uname = cluster.uname; attrd_uuid = cluster.uuid; + attrd_nodeid = cluster.nodeid; #if SUPPORT_HEARTBEAT attrd_cluster_conn = cluster.hb_conn; #endif } crm_info("Cluster connection active"); if (was_err == FALSE) { attrd_init_ipc(&ipcs, attrd_ipc_dispatch); } crm_info("Accepting attribute updates"); attrd_init_mainloop(); if (0 == g_timeout_add_full(G_PRIORITY_LOW + 1, 5000, cib_connect, NULL, NULL)) { crm_info("Adding timer failed"); was_err = TRUE; } if (was_err) { crm_err("Aborting startup"); return 100; } crm_notice("Starting mainloop..."); attrd_run_mainloop(); crm_notice("Exiting..."); #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { attrd_cluster_conn->llc_ops->signoff(attrd_cluster_conn, TRUE); attrd_cluster_conn->llc_ops->delete(attrd_cluster_conn); } #endif c = qb_ipcs_connection_first_get(ipcs); while (c != NULL) { qb_ipcs_connection_t *last = c; c = qb_ipcs_connection_next_get(ipcs, last); /* There really shouldn't be anyone connected at this point */ crm_notice("Disconnecting client %p, pid=%d...", last, crm_ipcs_client_pid(last)); qb_ipcs_disconnect(last); qb_ipcs_connection_unref(last); } qb_ipcs_destroy(ipcs); - if (cib_conn) { - cib_conn->cmds->signoff(cib_conn); - cib_delete(cib_conn); + if (the_lrmd) { + the_lrmd->cmds->disconnect(the_lrmd); + lrmd_api_delete(the_lrmd); + the_lrmd = NULL; + } + + if (the_cib) { + the_cib->cmds->signoff(the_cib); + cib_delete(the_cib); } g_hash_table_destroy(attr_hash); free(attrd_uuid); return crm_exit(pcmk_ok); } struct attrd_callback_s { char *attr; char *value; }; /*! * \internal * \brief Free an attrd callback structure */ static void free_attrd_callback(void *user_data) { struct attrd_callback_s *data = user_data; free(data->attr); free(data->value); free(data); } static void attrd_cib_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { attr_hash_entry_t *hash_entry = NULL; struct attrd_callback_s *data = user_data; if (data->value == NULL && rc == -ENXIO) { rc = pcmk_ok; } else if (call_id < 0) { crm_warn("Update %s=%s failed: %s", data->attr, data->value, pcmk_strerror(call_id)); return; } switch (rc) { case pcmk_ok: crm_debug("Update %d for %s=%s passed", call_id, data->attr, data->value); hash_entry = g_hash_table_lookup(attr_hash, data->attr); if (hash_entry) { free(hash_entry->stored_value); hash_entry->stored_value = NULL; if (data->value != NULL) { hash_entry->stored_value = strdup(data->value); } } break; case -pcmk_err_diff_failed: /* When an attr changes while the CIB is syncing */ case -ETIME: /* When an attr changes while there is a DC election */ case -ENXIO: /* When an attr changes while the CIB is syncing a * newer config from a node that just came up */ crm_warn("Update %d for %s=%s failed: %s", call_id, data->attr, data->value, pcmk_strerror(rc)); break; default: crm_err("Update %d for %s=%s failed: %s", call_id, data->attr, data->value, pcmk_strerror(rc)); } } void attrd_perform_update(attr_hash_entry_t * hash_entry) { int rc = pcmk_ok; struct attrd_callback_s *data = NULL; const char *user_name = NULL; if (hash_entry == NULL) { return; - } else if (cib_conn == NULL) { + } else if (the_cib == NULL) { crm_info("Delaying operation %s=%s: cib not connected", hash_entry->id, crm_str(hash_entry->value)); return; } #if ENABLE_ACL if (hash_entry->user) { user_name = hash_entry->user; crm_trace("Performing request from user '%s'", hash_entry->user); } #endif if (hash_entry->value == NULL) { /* delete the attr */ - rc = delete_attr_delegate(cib_conn, cib_none, hash_entry->section, attrd_uuid, NULL, + rc = delete_attr_delegate(the_cib, cib_none, hash_entry->section, attrd_uuid, NULL, hash_entry->set, hash_entry->uuid, hash_entry->id, NULL, FALSE, user_name); if (rc >= 0 && hash_entry->stored_value) { crm_notice("Sent delete %d: node=%s, attr=%s, id=%s, set=%s, section=%s", rc, attrd_uuid, hash_entry->id, hash_entry->uuid ? hash_entry->uuid : "", hash_entry->set, hash_entry->section); } else if (rc < 0 && rc != -ENXIO) { crm_notice ("Delete operation failed: node=%s, attr=%s, id=%s, set=%s, section=%s: %s (%d)", attrd_uuid, hash_entry->id, hash_entry->uuid ? hash_entry->uuid : "", hash_entry->set, hash_entry->section, pcmk_strerror(rc), rc); } else { crm_trace("Sent delete %d: node=%s, attr=%s, id=%s, set=%s, section=%s", rc, attrd_uuid, hash_entry->id, hash_entry->uuid ? hash_entry->uuid : "", hash_entry->set, hash_entry->section); } + attrd_send_alerts(the_lrmd, attrd_uname, attrd_nodeid, hash_entry->id, hash_entry->value, crm_alert_list); } else { /* send update */ - rc = update_attr_delegate(cib_conn, cib_none, hash_entry->section, + rc = update_attr_delegate(the_cib, cib_none, hash_entry->section, attrd_uuid, NULL, hash_entry->set, hash_entry->uuid, hash_entry->id, hash_entry->value, FALSE, user_name, NULL); if (rc < 0) { crm_notice("Sent update %s=%s failed: %s", hash_entry->id, hash_entry->value, pcmk_strerror(rc)); } if (safe_str_neq(hash_entry->value, hash_entry->stored_value) || rc < 0) { crm_notice("Sent update %d: %s=%s", rc, hash_entry->id, hash_entry->value); } else { crm_trace("Sent update %d: %s=%s", rc, hash_entry->id, hash_entry->value); } + attrd_send_alerts(the_lrmd, attrd_uname, attrd_nodeid, hash_entry->id, hash_entry->value, crm_alert_list); } data = calloc(1, sizeof(struct attrd_callback_s)); data->attr = strdup(hash_entry->id); if (hash_entry->value != NULL) { data->value = strdup(hash_entry->value); } register_cib_callback(rc, data, attrd_cib_callback, free_attrd_callback); return; } /*! * \internal * \brief Expand attribute values that use "++" or "+=" * * \param[in] value Attribute value to expand * \param[in] old_value Previous value of attribute * * \return Newly allocated string with expanded value, or NULL if not expanded */ static char * expand_attr_value(const char *value, const char *old_value) { char *expanded = NULL; if (attrd_value_needs_expansion(value)) { expanded = crm_itoa(attrd_expand_value(value, old_value)); } return expanded; } /*! * \internal * \brief Update a single node attribute for this node * * \param[in] msg XML message with update * \param[in,out] hash_entry Node attribute structure */ static void update_local_attr(xmlNode *msg, attr_hash_entry_t *hash_entry) { const char *value = crm_element_value(msg, F_ATTRD_VALUE); char *expanded = NULL; if (hash_entry->uuid == NULL) { const char *key = crm_element_value(msg, F_ATTRD_KEY); if (key) { hash_entry->uuid = strdup(key); } } crm_debug("Request to update %s (%s) to %s from %s (stored: %s)", hash_entry->id, (hash_entry->uuid? hash_entry->uuid : "no uuid"), value, hash_entry->value, hash_entry->stored_value); if (safe_str_eq(value, hash_entry->value) && safe_str_eq(value, hash_entry->stored_value)) { crm_trace("Ignoring non-change"); return; } else if (value) { expanded = expand_attr_value(value, hash_entry->value); if (expanded) { crm_info("Expanded %s=%s to %s", hash_entry->id, value, expanded); value = expanded; } } if (safe_str_eq(value, hash_entry->value) && hash_entry->timer_id) { /* We're already waiting to set this value */ free(expanded); return; } free(hash_entry->value); hash_entry->value = NULL; if (value != NULL) { hash_entry->value = (expanded? expanded : strdup(value)); crm_debug("New value of %s is %s", hash_entry->id, value); } stop_attrd_timer(hash_entry); if (hash_entry->timeout > 0) { hash_entry->timer_id = g_timeout_add(hash_entry->timeout, attrd_timer_callback, hash_entry); } else { attrd_trigger_update(hash_entry); } } /*! * \internal * \brief Log the result of a CIB operation for a remote attribute * * \param[in] msg ignored * \param[in] id CIB operation ID * \param[in] rc CIB operation result * \param[in] output ignored * \param[in] data User-friendly string describing operation */ static void remote_attr_callback(xmlNode *msg, int id, int rc, xmlNode *output, void *data) { if (rc == pcmk_ok) { crm_debug("%s succeeded " CRM_XS " call=%d", data, id); } else { crm_notice("%s failed: %s " CRM_XS " call=%d rc=%d", data, pcmk_strerror(rc), id, rc); } } /*! * \internal * \brief Update a Pacemaker Remote node attribute via CIB only * * \param[in] host Pacemaker Remote node name * \param[in] name Attribute name * \param[in] value New attribute value * \param[in] section CIB section to update (defaults to status if NULL) * \param[in] user_name User to perform operation as * * \note Legacy attrd does not track remote node attributes, so such requests * are only sent to the CIB. This means that dampening is ignored, and * updates for the same attribute submitted to different nodes cannot be * reliably ordered. This is not ideal, but allows remote nodes to * be supported, and should be acceptable in practice. */ static void update_remote_attr(const char *host, const char *name, const char *value, const char *section, const char *user_name) { int rc = pcmk_ok; char *desc; if (value == NULL) { desc = crm_strdup_printf("Delete of %s in %s for %s", name, section, host); } else { desc = crm_strdup_printf("Update of %s=%s in %s for %s", name, value, section, host); } if (name == NULL) { rc = -EINVAL; - } else if (cib_conn == NULL) { + } else if (the_cib == NULL) { rc = -ENOTCONN; } if (rc != pcmk_ok) { remote_attr_callback(NULL, rc, rc, NULL, desc); free(desc); return; } if (value == NULL) { - rc = delete_attr_delegate(cib_conn, cib_none, section, + rc = delete_attr_delegate(the_cib, cib_none, section, host, NULL, NULL, NULL, name, NULL, FALSE, user_name); } else { - rc = update_attr_delegate(cib_conn, cib_none, section, + rc = update_attr_delegate(the_cib, cib_none, section, host, NULL, NULL, NULL, name, value, FALSE, user_name, "remote"); } + + attrd_send_alerts(the_lrmd, host, 0, name, value == NULL? "null":value, crm_alert_list); + crm_trace("%s submitted as CIB call %d", desc, rc); register_cib_callback(rc, desc, remote_attr_callback, free); } /*! * \internal * \brief Handle a client request to clear failures * * \param[in] msg XML of request * * \note Handling is according to the host specified in the request: * NULL: Relay to all cluster nodes (which do local_clear_failure()) * and also handle all remote nodes here, using remote_clear_failure(); * Our uname: Handle here, using local_clear_failure(); * Known peer: Relay to that peer, which (via process_xml_message() then * attrd_local_callback()) comes back here as previous case; * Unknown peer: Handle here as remote node, using remote_clear_failure() */ static void attrd_client_clear_failure(xmlNode *msg) { const char *host = crm_element_value(msg, F_ATTRD_HOST); if (host == NULL) { /* Clear failure on all cluster nodes */ crm_notice("Broadcasting request to clear failure on all hosts"); send_cluster_message(NULL, crm_msg_attrd, msg, FALSE); /* Clear failure on all remote nodes */ remote_clear_failure(msg); } else if (safe_str_eq(host, attrd_uname)) { local_clear_failure(msg); } else { int is_remote = FALSE; crm_node_t *peer = crm_find_peer(0, host); crm_element_value_int(msg, F_ATTRD_IS_REMOTE, &is_remote); if (is_remote || (peer == NULL)) { /* If request is not for a known cluster node, assume remote */ remote_clear_failure(msg); } else { /* Relay request to proper node */ crm_notice("Relaying request to clear failure to %s", host); send_cluster_message(peer, crm_msg_attrd, msg, FALSE); } } } void attrd_local_callback(xmlNode * msg) { attr_hash_entry_t *hash_entry = NULL; const char *from = crm_element_value(msg, F_ORIG); const char *op = crm_element_value(msg, F_ATTRD_TASK); const char *attr = crm_element_value(msg, F_ATTRD_ATTRIBUTE); const char *pattern = crm_element_value(msg, F_ATTRD_REGEX); const char *value = crm_element_value(msg, F_ATTRD_VALUE); const char *host = crm_element_value(msg, F_ATTRD_HOST); int is_remote = FALSE; crm_element_value_int(msg, F_ATTRD_IS_REMOTE, &is_remote); if (safe_str_eq(op, ATTRD_OP_REFRESH)) { crm_notice("Sending full refresh (origin=%s)", from); g_hash_table_foreach(attr_hash, update_for_hash_entry, NULL); return; } else if (safe_str_eq(op, ATTRD_OP_PEER_REMOVE)) { if (host) { crm_notice("Broadcasting removal of peer %s", host); send_cluster_message(NULL, crm_msg_attrd, msg, FALSE); } return; } else if (safe_str_eq(op, ATTRD_OP_CLEAR_FAILURE)) { attrd_client_clear_failure(msg); return; } else if (op && safe_str_neq(op, ATTRD_OP_UPDATE)) { crm_notice("Ignoring unsupported %s request from %s", op, from); return; } /* Handle requests for Pacemaker Remote nodes specially */ if (host && is_remote) { const char *section = crm_element_value(msg, F_ATTRD_SECTION); const char *user_name = crm_element_value(msg, F_ATTRD_USER); if (section == NULL) { section = XML_CIB_TAG_STATUS; } if ((attr == NULL) && (pattern != NULL)) { /* Attribute(s) specified by regular expression */ /* @TODO query, iterate and update_remote_attr() for matches? */ crm_notice("Update of %s for %s failed: regular expressions " "are not supported with Pacemaker Remote nodes", pattern, host); } else { /* Single attribute specified by exact name */ update_remote_attr(host, attr, value, section, user_name); } return; } /* Redirect requests for another cluster node to that node */ if (host != NULL && safe_str_neq(host, attrd_uname)) { send_cluster_message(crm_get_peer(0, host), crm_msg_attrd, msg, FALSE); return; } if (attr != NULL) { /* Single attribute specified by exact name */ crm_debug("%s message from %s: %s=%s", op, from, attr, crm_str(value)); hash_entry = find_hash_entry(msg); if (hash_entry != NULL) { update_local_attr(msg, hash_entry); } } else if (pattern != NULL) { /* Attribute(s) specified by regular expression */ regex_t regex; GHashTableIter iter; if (regcomp(®ex, pattern, REG_EXTENDED|REG_NOSUB)) { crm_err("Update from %s failed: invalid pattern %s", from, pattern); return; } crm_debug("%s message from %s: %s=%s", op, from, pattern, crm_str(value)); g_hash_table_iter_init(&iter, attr_hash); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &hash_entry)) { int rc = regexec(®ex, hash_entry->id, 0, NULL, 0); if (rc == 0) { crm_trace("Attribute %s matches %s", hash_entry->id, pattern); update_local_attr(msg, hash_entry); } } } else { crm_info("Ignoring message with no attribute name or expression"); } } gboolean attrd_timer_callback(void *user_data) { stop_attrd_timer(user_data); attrd_trigger_update(user_data); return TRUE; /* Always return true, removed cleanly by stop_attrd_timer() */ } gboolean attrd_trigger_update(attr_hash_entry_t * hash_entry) { xmlNode *msg = NULL; /* send HA message to everyone */ crm_notice("Sending flush op to all hosts for: %s (%s)", hash_entry->id, crm_str(hash_entry->value)); log_hash_entry(LOG_DEBUG_2, hash_entry, "Sending flush op to all hosts for:"); msg = create_xml_node(NULL, __FUNCTION__); crm_xml_add(msg, F_TYPE, T_ATTRD); crm_xml_add(msg, F_ORIG, attrd_uname); crm_xml_add(msg, F_ATTRD_TASK, "flush"); crm_xml_add(msg, F_ATTRD_ATTRIBUTE, hash_entry->id); crm_xml_add(msg, F_ATTRD_SET, hash_entry->set); crm_xml_add(msg, F_ATTRD_SECTION, hash_entry->section); crm_xml_add(msg, F_ATTRD_DAMPEN, hash_entry->dampen); crm_xml_add(msg, F_ATTRD_VALUE, hash_entry->value); #if ENABLE_ACL if (hash_entry->user) { crm_xml_add(msg, F_ATTRD_USER, hash_entry->user); } #endif if (hash_entry->timeout <= 0) { crm_xml_add(msg, F_ATTRD_IGNORE_LOCALLY, hash_entry->value); attrd_perform_update(hash_entry); } send_cluster_message(NULL, crm_msg_attrd, msg, FALSE); free_xml(msg); return TRUE; } diff --git a/attrd/main.c b/attrd/main.c index 57617ff517..d61826d67a 100644 --- a/attrd/main.c +++ b/attrd/main.c @@ -1,338 +1,364 @@ /* * Copyright (C) 2013 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include +#include +#include #include #include #include #include +#include #include #include #include +#include "attrd_alerts.h" cib_t *the_cib = NULL; +lrmd_t *the_lrmd = NULL; crm_cluster_t *attrd_cluster = NULL; election_t *writer = NULL; int attrd_error = pcmk_ok; +crm_trigger_t *attrd_config_read = NULL; static void attrd_cpg_dispatch(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { uint32_t kind = 0; xmlNode *xml = NULL; const char *from = NULL; char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); if(data == NULL) { return; } if (kind == crm_class_cluster) { xml = string2xml(data); } if (xml == NULL) { crm_err("Bad message of class %d received from %s[%u]: '%.120s'", kind, from, nodeid, data); } else { crm_node_t *peer = crm_get_peer(nodeid, from); attrd_peer_message(peer, xml); } free_xml(xml); free(data); } static void attrd_cpg_destroy(gpointer unused) { if (attrd_shutting_down()) { crm_info("Corosync disconnection complete"); } else { crm_crit("Lost connection to Corosync service!"); attrd_error = ECONNRESET; attrd_shutdown(0); } } static void attrd_cib_replaced_cb(const char *event, xmlNode * msg) { crm_notice("Updating all attributes after %s event", event); if(election_state(writer) == election_won) { write_attributes(TRUE, FALSE); } } static void attrd_cib_destroy_cb(gpointer user_data) { cib_t *conn = user_data; conn->cmds->signoff(conn); /* Ensure IPC is cleaned up */ if (attrd_shutting_down()) { crm_info("Connection disconnection complete"); } else { /* eventually this should trigger a reconnect, not a shutdown */ crm_err("Lost connection to CIB service!"); attrd_error = ECONNRESET; attrd_shutdown(0); } return; } static cib_t * attrd_cib_connect(int max_retry) { int rc = -ENOTCONN; static int attempts = 0; cib_t *connection = cib_new(); if(connection == NULL) { return NULL; } do { if(attempts > 0) { sleep(attempts); } attempts++; crm_debug("CIB signon attempt %d", attempts); rc = connection->cmds->signon(connection, T_ATTRD, cib_command); } while(rc != pcmk_ok && attempts < max_retry); if (rc != pcmk_ok) { crm_err("Signon to CIB failed: %s (%d)", pcmk_strerror(rc), rc); goto cleanup; } crm_info("Connected to the CIB after %d attempts", attempts); rc = connection->cmds->set_connection_dnotify(connection, attrd_cib_destroy_cb); if (rc != pcmk_ok) { crm_err("Could not set disconnection callback"); goto cleanup; } rc = connection->cmds->add_notify_callback(connection, T_CIB_REPLACE_NOTIFY, attrd_cib_replaced_cb); if(rc != pcmk_ok) { crm_err("Could not set CIB notification callback"); goto cleanup; } + rc = connection->cmds->add_notify_callback(connection, T_CIB_DIFF_NOTIFY, attrd_cib_updated_cb); + if (rc != pcmk_ok) { + crm_err("Could not set CIB notification callback (update)"); + goto cleanup; + } + return connection; cleanup: connection->cmds->signoff(connection); cib_delete(connection); return NULL; } static int32_t attrd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; crm_client_t *client = crm_client_get(c); xmlNode *xml = crm_ipcs_recv(client, data, size, &id, &flags); const char *op; if (xml == NULL) { crm_debug("No msg from %d (%p)", crm_ipcs_client_pid(c), c); return 0; } #if ENABLE_ACL CRM_ASSERT(client->user != NULL); crm_acl_get_set_user(xml, F_ATTRD_USER, client->user); #endif crm_trace("Processing msg from %d (%p)", crm_ipcs_client_pid(c), c); crm_log_xml_trace(xml, __FUNCTION__); op = crm_element_value(xml, F_ATTRD_TASK); if (client->name == NULL) { const char *value = crm_element_value(xml, F_ORIG); client->name = crm_strdup_printf("%s.%d", value?value:"unknown", client->pid); } if (safe_str_eq(op, ATTRD_OP_PEER_REMOVE)) { attrd_send_ack(client, id, flags); attrd_client_peer_remove(client->name, xml); } else if (safe_str_eq(op, ATTRD_OP_CLEAR_FAILURE)) { attrd_send_ack(client, id, flags); attrd_client_clear_failure(xml); } else if (safe_str_eq(op, ATTRD_OP_UPDATE)) { attrd_send_ack(client, id, flags); attrd_client_update(xml); } else if (safe_str_eq(op, ATTRD_OP_UPDATE_BOTH)) { attrd_send_ack(client, id, flags); attrd_client_update(xml); } else if (safe_str_eq(op, ATTRD_OP_UPDATE_DELAY)) { attrd_send_ack(client, id, flags); attrd_client_update(xml); } else if (safe_str_eq(op, ATTRD_OP_REFRESH)) { attrd_send_ack(client, id, flags); attrd_client_refresh(); } else if (safe_str_eq(op, ATTRD_OP_QUERY)) { /* queries will get reply, so no ack is necessary */ attrd_client_query(client, id, flags, xml); } else { crm_info("Ignoring request from client %s with unknown operation %s", client->name, op); } free_xml(xml); return 0; } /* *INDENT-OFF* */ static struct crm_option long_options[] = { /* Top-level Options */ {"help", 0, 0, '?', "\tThis text"}, {"verbose", 0, 0, 'V', "\tIncrease debug output"}, {0, 0, 0, 0} }; /* *INDENT-ON* */ int main(int argc, char **argv) { int rc = pcmk_ok; int flag = 0; int index = 0; int argerr = 0; qb_ipcs_service_t *ipcs = NULL; attrd_init_mainloop(); crm_log_preinit(NULL, argc, argv); crm_set_options(NULL, "[options]", long_options, "Daemon for aggregating and atomically storing node attribute updates into the CIB"); mainloop_add_signal(SIGTERM, attrd_shutdown); while (1) { flag = crm_get_option(argc, argv, &index); if (flag == -1) break; switch (flag) { case 'V': crm_bump_log_level(argc, argv); break; case 'h': /* Help message */ crm_help(flag, EX_OK); break; default: ++argerr; break; } } if (optind > argc) { ++argerr; } if (argerr) { crm_help('?', EX_USAGE); } crm_log_init(T_ATTRD, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); crm_info("Starting up"); attributes = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_attribute); attrd_cluster = malloc(sizeof(crm_cluster_t)); attrd_cluster->destroy = attrd_cpg_destroy; attrd_cluster->cpg.cpg_deliver_fn = attrd_cpg_dispatch; attrd_cluster->cpg.cpg_confchg_fn = pcmk_cpg_membership; crm_set_status_callback(&attrd_peer_change_cb); if (crm_cluster_connect(attrd_cluster) == FALSE) { crm_err("Cluster connection failed"); rc = DAEMON_RESPAWN_STOP; goto done; } crm_info("Cluster connection active"); writer = election_init(T_ATTRD, attrd_cluster->uname, 120000, attrd_election_cb); attrd_init_ipc(&ipcs, attrd_ipc_dispatch); crm_info("Accepting attribute updates"); the_cib = attrd_cib_connect(10); if (the_cib == NULL) { rc = DAEMON_RESPAWN_STOP; goto done; } crm_info("CIB connection active"); + + attrd_config_read = mainloop_add_trigger(G_PRIORITY_HIGH, attrd_read_options, NULL); + + /* Reading of cib(Alert section) after the start */ + mainloop_set_trigger(attrd_config_read); + attrd_run_mainloop(); done: crm_info("Shutting down attribute manager"); election_fini(writer); if (ipcs) { crm_client_disconnect_all(ipcs); qb_ipcs_destroy(ipcs); g_hash_table_destroy(attributes); } + attrd_alert_fini(); + + if (the_lrmd) { + the_lrmd->cmds->disconnect(the_lrmd); + lrmd_api_delete(the_lrmd); + the_lrmd = NULL; + } + if (the_cib) { the_cib->cmds->signoff(the_cib); cib_delete(the_cib); } if(attrd_error) { return crm_exit(attrd_error); } return crm_exit(rc); } diff --git a/crmd/crmd_alerts.c b/crmd/crmd_alerts.c index 9e306c5c99..6dda4f66bf 100644 --- a/crmd/crmd_alerts.c +++ b/crmd/crmd_alerts.c @@ -1,378 +1,408 @@ /* * Copyright (C) 2015 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include "crmd_alerts.h" #include "crmd_messages.h" #include #include static char *notify_script = NULL; static char *notify_target = NULL; static int alerts_inflight = 0; static gboolean draining_alerts = FALSE; /* * synchronize local data with cib */ static GHashTable * get_meta_attrs_from_cib(xmlNode *basenode, crm_alert_entry_t *entry, guint *max_timeout) { GHashTable *config_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); crm_time_t *now = crm_time_new(NULL); const char *value = NULL; unpack_instance_attributes(basenode, basenode, XML_TAG_META_SETS, NULL, config_hash, NULL, FALSE, now); value = g_hash_table_lookup(config_hash, XML_ALERT_ATTR_TIMEOUT); if (value) { entry->timeout = crm_get_msec(value); if (entry->timeout <= 0) { if (entry->timeout == 0) { crm_trace("Setting timeout to default %dmsec", CRM_ALERT_DEFAULT_TIMEOUT_MS); } else { crm_warn("Invalid timeout value setting to default %dmsec", CRM_ALERT_DEFAULT_TIMEOUT_MS); } entry->timeout = CRM_ALERT_DEFAULT_TIMEOUT_MS; } else { crm_trace("Found timeout %dmsec", entry->timeout); } if (entry->timeout > *max_timeout) { *max_timeout = entry->timeout; } } value = g_hash_table_lookup(config_hash, XML_ALERT_ATTR_TSTAMP_FORMAT); if (value) { /* hard to do any checks here as merely anything can * can be a valid time-format-string */ entry->tstamp_format = (char *) value; crm_trace("Found timestamp format string '%s'", value); } + value = g_hash_table_lookup(config_hash, XML_ALERT_ATTR_SELECT_KIND); + if (value) { + entry->select_kind_orig = (char *) value; + entry->select_kind = g_strsplit((char *) value, ",", 0); + crm_trace("Found select_kind string '%s'", (char *) value); + } + crm_time_free(now); return config_hash; /* keep hash as long as strings are needed */ } void parse_alerts(xmlNode *alerts) { xmlNode *alert; crm_alert_entry_t entry; guint max_timeout = 0; crm_free_alert_list(); crm_alert_max_alert_timeout = CRM_ALERT_DEFAULT_TIMEOUT_MS; + if (crm_alert_kind_default == NULL) { + crm_alert_kind_default = g_strsplit(CRM_ALERT_KIND_DEFAULT, ",", 0); + } if (alerts) { crm_info("We have an alerts section in the cib"); if (notify_script) { crm_warn("Cib contains configuration for Legacy Notifications " "which is overruled by alerts section"); } } else { crm_info("No optional alerts section in cib"); if (notify_script) { entry = (crm_alert_entry_t) { .id = (char *) "legacy_notification", .path = notify_script, .timeout = CRM_ALERT_DEFAULT_TIMEOUT_MS, - .recipient = notify_target + .recipient = notify_target, + .select_kind_orig = NULL, + .select_kind = NULL, + .select_attribute_name_orig = NULL, + .select_attribute_name = NULL }; crm_add_dup_alert_list_entry(&entry); crm_info("Legacy Notifications enabled"); } return; } for (alert = first_named_child(alerts, XML_CIB_TAG_ALERT); alert; alert = __xml_next(alert)) { xmlNode *recipient; int recipients = 0, envvars = 0; GHashTable *config_hash = NULL; entry = (crm_alert_entry_t) { .id = (char *) crm_element_value(alert, XML_ATTR_ID), .path = (char *) crm_element_value(alert, XML_ALERT_ATTR_PATH), .timeout = CRM_ALERT_DEFAULT_TIMEOUT_MS, - .tstamp_format = (char *) CRM_ALERT_DEFAULT_TSTAMP_FORMAT + .tstamp_format = (char *) CRM_ALERT_DEFAULT_TSTAMP_FORMAT, + .select_kind_orig = NULL, + .select_kind = NULL, + .select_attribute_name_orig = NULL, + .select_attribute_name = NULL }; crm_get_envvars_from_cib(alert, &entry, &envvars); config_hash = get_meta_attrs_from_cib(alert, &entry, &max_timeout); crm_debug("Found alert: id=%s, path=%s, timeout=%d, " - "tstamp_format=%s, %d additional environment variables", + "tstamp_format=%s, select_kind=%s, %d additional environment variables", entry.id, entry.path, entry.timeout, - entry.tstamp_format, envvars); + entry.tstamp_format, entry.select_kind_orig, envvars); for (recipient = first_named_child(alert, XML_CIB_TAG_ALERT_RECIPIENT); recipient; recipient = __xml_next(recipient)) { int envvars_added = 0; entry.recipient = (char *) crm_element_value(recipient, XML_ALERT_ATTR_REC_VALUE); recipients++; crm_get_envvars_from_cib(recipient, &entry, &envvars_added); { crm_alert_entry_t recipient_entry = entry; GHashTable *config_hash = get_meta_attrs_from_cib(recipient, &recipient_entry, &max_timeout); crm_add_dup_alert_list_entry(&recipient_entry); crm_debug("Alert has recipient: id=%s, value=%s, " "%d additional environment variables", crm_element_value(recipient, XML_ATTR_ID), recipient_entry.recipient, envvars_added); g_hash_table_destroy(config_hash); } crm_drop_envvars(&entry, envvars_added); } if (recipients == 0) { crm_add_dup_alert_list_entry(&entry); } crm_drop_envvars(&entry, -1); g_hash_table_destroy(config_hash); } if (max_timeout > 0) { crm_alert_max_alert_timeout = max_timeout; } } /* * end of synchronization of local data with cib */ void crmd_enable_alerts(const char *script, const char *target) { free(notify_script); notify_script = ((script) && (strcmp(script,"/dev/null")))?strdup(script):NULL; free(notify_target); notify_target = (target != NULL)?strdup(target):NULL; } static void crmd_alert_complete(svc_action_t *op) { alerts_inflight--; if(op->rc == 0) { crm_info("Alert %d (%s) complete", op->sequence, op->agent); } else { crm_warn("Alert %d (%s) failed: %d", op->sequence, op->agent, op->rc); } } static void send_alerts(const char *kind) { svc_action_t *alert = NULL; static int operations = 0; GListPtr l; crm_time_hr_t *now = crm_time_hr_new(NULL); crm_set_alert_key(CRM_alert_kind, kind); crm_set_alert_key(CRM_alert_version, VERSION); for (l = g_list_first(crm_alert_list); l; l = g_list_next(l)) { crm_alert_entry_t *entry = (crm_alert_entry_t *)(l->data); char *timestamp = crm_time_format_hr(entry->tstamp_format, now); + if (crm_is_target_alert(entry->select_kind == NULL ? crm_alert_kind_default : entry->select_kind, kind) == FALSE) { + crm_trace("Cannot sending '%s' alert to '%s' via '%s'(select_kind=%s)", kind, entry->recipient, entry->path, + entry->select_kind == NULL ? CRM_ALERT_KIND_DEFAULT : entry->select_kind_orig); + free(timestamp); + continue; + } + operations++; if (!draining_alerts) { crm_debug("Sending '%s' alert to '%s' via '%s'", kind, entry->recipient, entry->path); crm_set_alert_key(CRM_alert_recipient, entry->recipient); crm_set_alert_key_int(CRM_alert_node_sequence, operations); crm_set_alert_key(CRM_alert_timestamp, timestamp); alert = services_action_create_generic(entry->path, NULL); alert->timeout = entry->timeout; alert->standard = strdup("event"); alert->id = strdup(entry->id); alert->agent = strdup(entry->path); alert->sequence = operations; crm_set_envvar_list(entry); alerts_inflight++; if(services_action_async(alert, &crmd_alert_complete) == FALSE) { services_action_free(alert); alerts_inflight--; } crm_unset_envvar_list(entry); } else { crm_warn("Ignoring '%s' alert to '%s' via '%s' received " "while shutting down", kind, entry->recipient, entry->path); } free(timestamp); } crm_unset_alert_keys(); if (now) { free(now); } } void crmd_alert_node_event(crm_node_t *node) { if(!crm_alert_list) { return; } crm_set_alert_key(CRM_alert_node, node->uname); crm_set_alert_key_int(CRM_alert_nodeid, node->id); crm_set_alert_key(CRM_alert_desc, node->state); send_alerts("node"); } void crmd_alert_fencing_op(stonith_event_t * e) { char *desc = NULL; if (!crm_alert_list) { return; } desc = crm_strdup_printf( "Operation %s of %s by %s for %s@%s: %s (ref=%s)", e->action, e->target, e->executioner ? e->executioner : "", e->client_origin, e->origin, pcmk_strerror(e->result), e->id); crm_set_alert_key(CRM_alert_node, e->target); crm_set_alert_key(CRM_alert_task, e->operation); crm_set_alert_key(CRM_alert_desc, desc); crm_set_alert_key_int(CRM_alert_rc, e->result); send_alerts("fencing"); free(desc); } void crmd_alert_resource_op(const char *node, lrmd_event_data_t * op) { int target_rc = 0; if(!crm_alert_list) { return; } target_rc = rsc_op_expected_rc(op); if(op->interval == 0 && target_rc == op->rc && safe_str_eq(op->op_type, RSC_STATUS)) { /* Leave it up to the script if they want to alert for * 'failed' probes, only swallow ones for which the result was * unexpected. * * Even if we find a resource running, it was probably because * someone erased the status section. */ return; } crm_set_alert_key(CRM_alert_node, node); crm_set_alert_key(CRM_alert_rsc, op->rsc_id); crm_set_alert_key(CRM_alert_task, op->op_type); crm_set_alert_key_int(CRM_alert_interval, op->interval); crm_set_alert_key_int(CRM_alert_target_rc, target_rc); crm_set_alert_key_int(CRM_alert_status, op->op_status); crm_set_alert_key_int(CRM_alert_rc, op->rc); if(op->op_status == PCMK_LRM_OP_DONE) { crm_set_alert_key(CRM_alert_desc, services_ocf_exitcode_str(op->rc)); } else { crm_set_alert_key(CRM_alert_desc, services_lrm_status_str(op->op_status)); } send_alerts("resource"); } static gboolean alert_drain_timeout_callback(gpointer user_data) { gboolean *timeout_popped = (gboolean *) user_data; *timeout_popped = TRUE; return FALSE; } void crmd_drain_alerts(GMainContext *ctx) { guint timer; gboolean timeout_popped = FALSE; draining_alerts = TRUE; timer = g_timeout_add(crm_alert_max_alert_timeout + 5000, alert_drain_timeout_callback, (gpointer) &timeout_popped); while(alerts_inflight && !timeout_popped) { crm_trace("Draining mainloop while still %d alerts are in flight (timeout=%dms)", alerts_inflight, crm_alert_max_alert_timeout + 5000); g_main_context_iteration(ctx, TRUE); } if (!timeout_popped && (timer > 0)) { g_source_remove(timer); } + + if (crm_alert_kind_default) { + g_strfreev(crm_alert_kind_default); + crm_alert_kind_default = NULL; + } } diff --git a/extra/PCMK-MIB.txt b/extra/PCMK-MIB.txt index 01788115f1..4fba72cd06 100644 --- a/extra/PCMK-MIB.txt +++ b/extra/PCMK-MIB.txt @@ -1,120 +1,141 @@ PACEMAKER-MIB DEFINITIONS ::= BEGIN -- -- MIB objects for the pacemaker cluster manager implementation -- IMPORTS MODULE-IDENTITY, OBJECT-TYPE, Integer32, NOTIFICATION-TYPE, enterprises FROM SNMPv2-SMI SnmpAdminString FROM SNMP-FRAMEWORK-MIB netSnmp FROM NET-SNMP-MIB RowStatus, StorageType FROM SNMPv2-TC InetAddressType, InetAddress FROM INET-ADDRESS-MIB ; pacemaker MODULE-IDENTITY - LAST-UPDATED "201601052100Z" + LAST-UPDATED "201703242100Z" ORGANIZATION "www.clusterlabs.org" CONTACT-INFO "name: Andrew Beekhof email: users@clusterlabs.org" DESCRIPTION "MIB objects for the Pacemaker cluster manager implementation" + REVISION "201703242100Z" + DESCRIPTION "Add pacemakerNotificationAttributeName and pacemakerNotificationAttributeValue" + REVISION "201601052100Z" DESCRIPTION "Add pacemakerTrap and pacemakerNotificationTrap" REVISION "200910062115Z" DESCRIPTION "Corrections after feedback from beekhof" REVISION "200910051115Z" DESCRIPTION "First draft" ::= { enterprises 32723 } -- -- top level structure -- pacemakerNotification OBJECT IDENTIFIER ::= { pacemaker 1 } pacemakerTrap OBJECT IDENTIFIER ::= { pacemaker 2 } -- -- pacemaker Notifications -- pacemakerNotificationNode OBJECT-TYPE SYNTAX OCTET STRING (SIZE(1..64)) MAX-ACCESS accessible-for-notify STATUS current DESCRIPTION "The node on which the status change happened." ::= { pacemakerNotification 1 } pacemakerNotificationResource OBJECT-TYPE SYNTAX OCTET STRING (SIZE(1..256)) MAX-ACCESS accessible-for-notify STATUS current DESCRIPTION "The name of the resource that changed the status." ::= { pacemakerNotification 2 } pacemakerNotificationOperation OBJECT-TYPE SYNTAX OCTET STRING (SIZE(1..64)) MAX-ACCESS accessible-for-notify STATUS current DESCRIPTION "The operation that caused the status change." ::= { pacemakerNotification 3 } pacemakerNotificationDescription OBJECT-TYPE SYNTAX OCTET STRING (SIZE(1..256)) MAX-ACCESS accessible-for-notify STATUS current DESCRIPTION "The textual output relevant error code of the operation (if any) that caused the status change." ::= { pacemakerNotification 4 } pacemakerNotificationStatus OBJECT-TYPE SYNTAX Integer32 MAX-ACCESS accessible-for-notify STATUS current DESCRIPTION "The numerical representation of the status of the operation." ::= { pacemakerNotification 5 } pacemakerNotificationReturnCode OBJECT-TYPE SYNTAX Integer32 MAX-ACCESS accessible-for-notify STATUS current DESCRIPTION "The return code of the operation." ::= { pacemakerNotification 6 } pacemakerNotificationTargetReturnCode OBJECT-TYPE SYNTAX Integer32 MAX-ACCESS accessible-for-notify STATUS current DESCRIPTION "The expected return code of the operation." ::= { pacemakerNotification 7 } +pacemakerNotificationAttributeName OBJECT-TYPE + SYNTAX OCTET STRING (SIZE(1..256)) + MAX-ACCESS accessible-for-notify + STATUS current + DESCRIPTION + "The name of the attribute." +::= { pacemakerNotification 8 } + +pacemakerNotificationAttributeValue OBJECT-TYPE + SYNTAX OCTET STRING (SIZE(1..256)) + MAX-ACCESS accessible-for-notify + STATUS current + DESCRIPTION + "The value of the attribute." +::= { pacemakerNotification 9 } + -- -- pacemaker Traps -- pacemakerNotificationTrap NOTIFICATION-TYPE OBJECTS { pacemakerNotificationNode, pacemakerNotificationResource, pacemakerNotificationOperation, pacemakerNotificationDescription, pacemakerNotificationStatus, pacemakerNotificationReturnCode, - pacemakerNotificationTargetReturnCode + pacemakerNotificationTargetReturnCode, + pacemakerNotificationAttributeName, + pacemakerNotificationAttributeValue } STATUS current DESCRIPTION "Pacemaker notification trap" ::= { pacemakerTrap 1 } END diff --git a/extra/alerts/alert_file.sh.sample b/extra/alerts/alert_file.sh.sample index e1e0bb9a5b..7f10096e95 100644 --- a/extra/alerts/alert_file.sh.sample +++ b/extra/alerts/alert_file.sh.sample @@ -1,113 +1,118 @@ #!/bin/sh # # Copyright (C) 2015 Andrew Beekhof # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This software is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # ############################################################################## # Sample configuration (cib fragment in xml notation) # ================================ # # # # # # # # # # # # # # No one will probably ever see this echo, unless they run the script manually. # An alternative would be to log to the system log, or similar. (We can't send # this to the configured recipient, because that variable won't be defined in # this case either.) if [ -z $CRM_alert_version ]; then echo "$0 must be run by Pacemaker version 1.1.15 or later" exit 0 fi # Alert agents must always handle the case where no recipients are defined, # even if it's a no-op (a recipient might be added to the configuration later). if [ -z "${CRM_alert_recipient}" ]; then echo "$0 requires a recipient configured with a full filename path" exit 0 fi debug_exec_order_default="false" # Pacemaker passes instance attributes to alert agents as environment variables. # It is completely up to the agent what instance attributes to support. # Here, we define an instance attribute "debug_exec_order". : ${debug_exec_order=${debug_exec_order_default}} if [ "${debug_exec_order}" = "true" ]; then tstamp=`printf "%04d. " "$CRM_alert_node_sequence"` if [ ! -z "$CRM_alert_timestamp" ]; then tstamp="${tstamp} $CRM_alert_timestamp (`date "+%H:%M:%S.%06N"`): " fi else if [ ! -z "$CRM_alert_timestamp" ]; then tstamp="$CRM_alert_timestamp: " fi fi case $CRM_alert_kind in node) echo "${tstamp}Node '${CRM_alert_node}' is now '${CRM_alert_desc}'" >> "${CRM_alert_recipient}" ;; fencing) # Other keys: # # CRM_alert_node # CRM_alert_task # CRM_alert_rc # echo "${tstamp}Fencing ${CRM_alert_desc}" >> "${CRM_alert_recipient}" ;; resource) # Other keys: # # CRM_alert_target_rc # CRM_alert_status # CRM_alert_rc # if [ ${CRM_alert_interval} = "0" ]; then CRM_alert_interval="" else CRM_alert_interval=" (${CRM_alert_interval})" fi if [ ${CRM_alert_target_rc} = "0" ]; then CRM_alert_target_rc="" else CRM_alert_target_rc=" (target: ${CRM_alert_target_rc})" fi case ${CRM_alert_desc} in Cancelled) ;; *) echo "${tstamp}Resource operation '${CRM_alert_task}${CRM_alert_interval}' for '${CRM_alert_rsc}' on '${CRM_alert_node}': ${CRM_alert_desc}${CRM_alert_target_rc}" >> "${CRM_alert_recipient}" ;; esac ;; + attribute) + # + echo "${tstamp}Attribute '${CRM_alert_attribute_name}' on node '${CRM_alert_node}' was updated to '${CRM_alert_attribute_value}'" >> "${CRM_alert_recipient}" + ;; + *) echo "${tstamp}Unhandled $CRM_alert_kind alert" >> "${CRM_alert_recipient}" env | grep CRM_alert >> "${CRM_alert_recipient}" ;; esac diff --git a/extra/alerts/alert_smtp.sh.sample b/extra/alerts/alert_smtp.sh.sample index 7412bc373e..ea350d7063 100644 --- a/extra/alerts/alert_smtp.sh.sample +++ b/extra/alerts/alert_smtp.sh.sample @@ -1,111 +1,115 @@ #!/bin/sh # # Copyright (C) 2016 Klaus Wenninger # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This software is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # ############################################################################## # # Sample configuration (cib fragment in xml notation) # ================================ # # # # # # # # # # # # email_client_default="sendmail" email_sender_default="hacluster" email_recipient_default="root" : ${email_client=${email_client_default}} : ${email_sender=${email_sender_default}} email_recipient="${CRM_alert_recipient-${email_recipient_default}}" node_name=`uname -n` cluster_name=`crm_attribute --query -n cluster-name -q` email_body=`env | grep CRM_alert_` if [ ! -z "${email_sender##*@*}" ]; then email_sender="${email_sender}@${node_name}" fi if [ ! -z "${email_recipient##*@*}" ]; then email_recipient="${email_recipient}@${node_name}" fi if [ -z ${CRM_alert_version} ]; then email_subject="Pacemaker version 1.1.15 or later is required for alerts" else case ${CRM_alert_kind} in node) email_subject="${CRM_alert_timestamp} ${cluster_name}: Node '${CRM_alert_node}' is now '${CRM_alert_desc}'" ;; fencing) email_subject="${CRM_alert_timestamp} ${cluster_name}: Fencing ${CRM_alert_desc}" ;; resource) if [ ${CRM_alert_interval} = "0" ]; then CRM_alert_interval="" else CRM_alert_interval=" (${CRM_alert_interval})" fi if [ ${CRM_alert_target_rc} = "0" ]; then CRM_alert_target_rc="" else CRM_alert_target_rc=" (target: ${CRM_alert_target_rc})" fi case ${CRM_alert_desc} in Cancelled) ;; *) email_subject="${CRM_alert_timestamp} ${cluster_name}: Resource operation '${CRM_alert_task}${CRM_alert_interval}' for '${CRM_alert_rsc}' on '${CRM_alert_node}': ${CRM_alert_desc}${CRM_alert_target_rc}" ;; esac ;; + attribute) + # + email_subject="${CRM_alert_timestamp} ${cluster_name}: The '${CRM_alert_attribute_name}' attribute of the '${CRM_alert_node}' node was updated in '${CRM_alert_attribute_value}'" + ;; *) email_subject="${CRM_alert_timestamp} ${cluster_name}: Unhandled $CRM_alert_kind alert" ;; esac fi if [ ! -z "${email_subject}" ]; then case $email_client in # This sample script supports only sendmail for sending the email. # Support for additional senders can easily be added by adding # new cases here. sendmail) sendmail -t -r "${email_sender}" <<__EOF__ From: ${email_sender} To: ${email_recipient} Return-Path: ${email_sender} Subject: ${email_subject} ${email_body} __EOF__ ;; *) ;; esac fi diff --git a/extra/alerts/alert_snmp.sh.sample b/extra/alerts/alert_snmp.sh.sample index 48a2fe309b..8b8ac094f9 100644 --- a/extra/alerts/alert_snmp.sh.sample +++ b/extra/alerts/alert_snmp.sh.sample @@ -1,169 +1,179 @@ #!/bin/sh # # Description: Manages a SNMP trap, provided by NTT OSSC as an # script under Heartbeat/LinuxHA control # # Copyright (c) 2016 NIPPON TELEGRAPH AND TELEPHONE CORPORATION # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # ############################################################################## # This sample script assumes that only users who already have # hacluster-equivalent access to the cluster nodes can edit the CIB. Otherwise, # a malicious user could run commands as hacluster by inserting shell code into # the trap_options or timestamp-format parameters. # # Sample configuration (cib fragment in xml notation) # ================================ # # # # # # # # # # # # # # ================================ if [ -z "$CRM_alert_version" ]; then echo "$0 must be run by Pacemaker version 1.1.15 or later" exit 0 fi if [ -z "$CRM_alert_recipient" ]; then echo "$0 requires a recipient configured with the SNMP server IP address" exit 0 fi # trap_binary_default="/usr/bin/snmptrap" trap_version_default="2c" trap_options_default="" trap_community_default="public" trap_node_states_default="all" trap_fencing_tasks_default="all" trap_resource_tasks_default="all" trap_monitor_success_default="false" trap_add_hires_timestamp_oid_default="true" trap_snmp_persistent_dir_default="/var/lib/pacemaker/snmp" : ${trap_binary=${trap_binary_default}} : ${trap_version=${trap_version_default}} : ${trap_options=${trap_options_default}} : ${trap_community=${trap_community_default}} : ${trap_node_states=${trap_node_states_default}} : ${trap_fencing_tasks=${trap_fencing_tasks_default}} : ${trap_resource_tasks=${trap_resource_tasks_default}} : ${trap_monitor_success=${trap_monitor_success_default}} : ${trap_add_hires_timestamp_oid=${trap_add_hires_timestamp_oid_default}} : ${trap_snmp_persistent_dir=${trap_snmp_persistent_dir_default}} if [ "${trap_add_hires_timestamp_oid}" = "true" ]; then hires_timestamp="HOST-RESOURCES-MIB::hrSystemDate s ${CRM_alert_timestamp}" fi is_in_list() { item_list=`echo "$1" | tr ',' ' '` if [ "${item_list}" = "all" ]; then return 0 else for act in $item_list do act=`echo "$act" | tr A-Z a-z` [ "$act" != "$2" ] && continue return 0 done fi return 1 } if [ -z ${SNMP_PERSISTENT_DIR} ]; then export SNMP_PERSISTENT_DIR="${trap_snmp_persistent_dir}" # mkdir for snmp trap tools. if [ ! -d ${SNMP_PERSISTENT_DIR} ]; then mkdir -p ${SNMP_PERSISTENT_DIR} fi fi rc=0 case "$CRM_alert_kind" in node) is_in_list "${trap_node_states}" "${CRM_alert_desc}" [ $? -ne 0 ] && exit 0 output=`"${trap_binary}" -v "${trap_version}" ${trap_options} \ -c "${trap_community}" "${CRM_alert_recipient}" "" \ PACEMAKER-MIB::pacemakerNotificationTrap \ PACEMAKER-MIB::pacemakerNotificationNode s "${CRM_alert_node}" \ PACEMAKER-MIB::pacemakerNotificationDescription s "${CRM_alert_desc}" \ ${hires_timestamp} 2>&1` rc=$? ;; fencing) is_in_list "${trap_fencing_tasks}" "${CRM_alert_task}" [ $? -ne 0 ] && exit 0 output=`"${trap_binary}" -v "${trap_version}" ${trap_options} \ -c "${trap_community}" "${CRM_alert_recipient}" "" \ PACEMAKER-MIB::pacemakerNotificationTrap \ PACEMAKER-MIB::pacemakerNotificationNode s "${CRM_alert_node}" \ PACEMAKER-MIB::pacemakerNotificationOperation s "${CRM_alert_task}" \ PACEMAKER-MIB::pacemakerNotificationDescription s "${CRM_alert_desc}" \ PACEMAKER-MIB::pacemakerNotificationReturnCode i ${CRM_alert_rc} \ ${hires_timestamp} 2>&1` rc=$? ;; resource) is_in_list "${trap_resource_tasks}" "${CRM_alert_task}" [ $? -ne 0 ] && exit 0 case "${CRM_alert_desc}" in Cancelled) ;; *) if [ "${trap_monitor_success}" = "false" ] \ && [ "${CRM_alert_rc}" = "${CRM_alert_target_rc}" ] \ && [ "${CRM_alert_task}" = "monitor" ]; then exit fi output=`"${trap_binary}" -v "${trap_version}" ${trap_options} \ -c "${trap_community}" "${CRM_alert_recipient}" "" \ PACEMAKER-MIB::pacemakerNotificationTrap \ PACEMAKER-MIB::pacemakerNotificationNode s "${CRM_alert_node}" \ PACEMAKER-MIB::pacemakerNotificationResource s "${CRM_alert_rsc}" \ PACEMAKER-MIB::pacemakerNotificationOperation s "${CRM_alert_task}" \ PACEMAKER-MIB::pacemakerNotificationDescription s "${CRM_alert_desc}" \ PACEMAKER-MIB::pacemakerNotificationStatus i ${CRM_alert_status} \ PACEMAKER-MIB::pacemakerNotificationReturnCode i ${CRM_alert_rc} \ PACEMAKER-MIB::pacemakerNotificationTargetReturnCode i ${CRM_alert_target_rc} \ ${hires_timestamp} 2>&1` rc=$? ;; esac ;; + attribute) + output=`"${trap_binary}" -v "${trap_version}" ${trap_options} \ + -c "${trap_community}" "${CRM_alert_recipient}" "" \ + PACEMAKER-MIB::pacemakerNotificationTrap \ + PACEMAKER-MIB::pacemakerNotificationNode s "${CRM_alert_node}" \ + PACEMAKER-MIB::pacemakerNotificationAttributeName s "${CRM_alert_attribute_name}" \ + PACEMAKER-MIB::pacemakerNotificationAttributeValue s "${CRM_alert_attribute_value}" \ + ${hires_timestamp} 2>&1` + rc=$? + ;; *) ;; esac if [ $rc -ne 0 ]; then echo "${trap_binary} returned error : rc=${rc} ${output}" fi diff --git a/include/crm/common/alerts_internal.h b/include/crm/common/alerts_internal.h index 98a5f8c9ef..0791ac8e58 100644 --- a/include/crm/common/alerts_internal.h +++ b/include/crm/common/alerts_internal.h @@ -1,66 +1,90 @@ /* * Copyright (C) 2015 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef ALERT_INTERNAL_H #define ALERT_INTERNAL_H /* Default-Timeout to use before killing a alerts script (in milliseconds) */ # define CRM_ALERT_DEFAULT_TIMEOUT_MS (30000) /* Default-Format-String used to pass timestamps to the alerts scripts */ # define CRM_ALERT_DEFAULT_TSTAMP_FORMAT "%H:%M:%S.%06N" +typedef struct { + char *name; + char *value; +} crm_alert_envvar_t; + typedef struct { char *id; char *path; int timeout; char *tstamp_format; char *recipient; + char *select_kind_orig; + char **select_kind; + char *select_attribute_name_orig; + char **select_attribute_name; GListPtr envvars; } crm_alert_entry_t; enum crm_alert_keys_e { CRM_alert_recipient = 0, CRM_alert_node, CRM_alert_nodeid, CRM_alert_rsc, CRM_alert_task, CRM_alert_interval, CRM_alert_desc, CRM_alert_status, CRM_alert_target_rc, CRM_alert_rc, CRM_alert_kind, CRM_alert_version, CRM_alert_node_sequence, - CRM_alert_timestamp + CRM_alert_timestamp, + CRM_alert_attribute_name, + CRM_alert_attribute_value, + CRM_alert_select_kind, + CRM_alert_select_attribute_name }; +#define CRM_ALERT_INTERNAL_KEY_MAX 16 +#define CRM_ALERT_KEY_PATH "CRM_alert_path" +#define CRM_ALERT_NODE_SEQUENCE "CRM_alert_node_sequence" +#define CRM_ALERT_KIND_DEFAULT "node,fencing,resource" + +#if (HAVE_ATOMIC_ATTRD == 0) +extern char *attrd_uname; +#endif extern GListPtr crm_alert_list; extern guint crm_alert_max_alert_timeout; -extern const char *crm_alert_keys[14][3]; +extern const char *crm_alert_keys[CRM_ALERT_INTERNAL_KEY_MAX][3]; +extern char **crm_alert_kind_default; void crm_free_alert_list(void); GListPtr crm_drop_envvars(crm_alert_entry_t *entry, int count); void crm_add_dup_alert_list_entry(crm_alert_entry_t *entry); GListPtr crm_get_envvars_from_cib(xmlNode *basenode, crm_alert_entry_t *entry, int *count); void crm_set_alert_key(enum crm_alert_keys_e name, const char *value); void crm_set_alert_key_int(enum crm_alert_keys_e name, int value); void crm_unset_alert_keys(void); void crm_set_envvar_list(crm_alert_entry_t *entry); void crm_unset_envvar_list(crm_alert_entry_t *entry); +gboolean crm_is_target_alert(char **list, const char *value); + #endif diff --git a/include/crm/lrmd.h b/include/crm/lrmd.h index d9cb3f9724..01014685ad 100644 --- a/include/crm/lrmd.h +++ b/include/crm/lrmd.h @@ -1,475 +1,498 @@ /* * Copyright (c) 2012 David Vossel * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ /** * \file * \brief Local Resource Manager * \ingroup lrmd */ #include #include #ifndef LRMD__H # define LRMD__H typedef struct lrmd_s lrmd_t; typedef struct lrmd_key_value_s { char *key; char *value; struct lrmd_key_value_s *next; } lrmd_key_value_t; #define LRMD_PROTOCOL_VERSION "1.1" /* *INDENT-OFF* */ #define DEFAULT_REMOTE_KEY_LOCATION "/etc/pacemaker/authkey" #define ALT_REMOTE_KEY_LOCATION "/etc/corosync/authkey" #define DEFAULT_REMOTE_PORT 3121 #define DEFAULT_REMOTE_USERNAME "lrmd" #define F_LRMD_OPERATION "lrmd_op" #define F_LRMD_CLIENTNAME "lrmd_clientname" #define F_LRMD_IS_IPC_PROVIDER "lrmd_is_ipc_provider" #define F_LRMD_CLIENTID "lrmd_clientid" #define F_LRMD_PROTOCOL_VERSION "lrmd_protocol_version" #define F_LRMD_REMOTE_MSG_TYPE "lrmd_remote_msg_type" #define F_LRMD_REMOTE_MSG_ID "lrmd_remote_msg_id" #define F_LRMD_CALLBACK_TOKEN "lrmd_async_id" #define F_LRMD_CALLID "lrmd_callid" #define F_LRMD_CANCEL_CALLID "lrmd_cancel_callid" #define F_LRMD_CALLOPTS "lrmd_callopt" #define F_LRMD_CALLDATA "lrmd_calldata" #define F_LRMD_RC "lrmd_rc" #define F_LRMD_EXEC_RC "lrmd_exec_rc" #define F_LRMD_OP_STATUS "lrmd_exec_op_status" #define F_LRMD_TIMEOUT "lrmd_timeout" #define F_LRMD_WATCHDOG "lrmd_watchdog" #define F_LRMD_CLASS "lrmd_class" #define F_LRMD_PROVIDER "lrmd_provider" #define F_LRMD_TYPE "lrmd_type" #define F_LRMD_ORIGIN "lrmd_origin" #define F_LRMD_RSC_RUN_TIME "lrmd_run_time" #define F_LRMD_RSC_RCCHANGE_TIME "lrmd_rcchange_time" #define F_LRMD_RSC_EXEC_TIME "lrmd_exec_time" #define F_LRMD_RSC_QUEUE_TIME "lrmd_queue_time" #define F_LRMD_RSC_ID "lrmd_rsc_id" #define F_LRMD_RSC_ACTION "lrmd_rsc_action" #define F_LRMD_RSC_USERDATA_STR "lrmd_rsc_userdata_str" #define F_LRMD_RSC_OUTPUT "lrmd_rsc_output" #define F_LRMD_RSC_EXIT_REASON "lrmd_rsc_exit_reason" #define F_LRMD_RSC_START_DELAY "lrmd_rsc_start_delay" #define F_LRMD_RSC_INTERVAL "lrmd_rsc_interval" #define F_LRMD_RSC_METADATA "lrmd_rsc_metadata_res" #define F_LRMD_RSC_DELETED "lrmd_rsc_deleted" #define F_LRMD_RSC "lrmd_rsc" +#define F_LRMD_ALERT_ID "lrmd_alert_id" +#define F_LRMD_ALERT "lrmd_alert" + #define LRMD_OP_RSC_CHK_REG "lrmd_rsc_check_register" #define LRMD_OP_RSC_REG "lrmd_rsc_register" #define LRMD_OP_RSC_EXEC "lrmd_rsc_exec" #define LRMD_OP_RSC_CANCEL "lrmd_rsc_cancel" #define LRMD_OP_RSC_UNREG "lrmd_rsc_unregister" #define LRMD_OP_RSC_INFO "lrmd_rsc_info" #define LRMD_OP_RSC_METADATA "lrmd_rsc_metadata" #define LRMD_OP_POKE "lrmd_rsc_poke" #define LRMD_OP_NEW_CLIENT "lrmd_rsc_new_client" #define LRMD_OP_CHECK "lrmd_check" +#define LRMD_OP_ALERT_EXEC "lrmd_alert_exec" #define LRMD_IPC_OP_NEW "new" #define LRMD_IPC_OP_DESTROY "destroy" #define LRMD_IPC_OP_EVENT "event" #define LRMD_IPC_OP_REQUEST "request" #define LRMD_IPC_OP_RESPONSE "response" #define LRMD_IPC_OP_SHUTDOWN_REQ "shutdown_req" #define LRMD_IPC_OP_SHUTDOWN_ACK "shutdown_ack" #define LRMD_IPC_OP_SHUTDOWN_NACK "shutdown_nack" #define F_LRMD_IPC_OP "lrmd_ipc_op" #define F_LRMD_IPC_IPC_SERVER "lrmd_ipc_server" #define F_LRMD_IPC_SESSION "lrmd_ipc_session" #define F_LRMD_IPC_CLIENT "lrmd_ipc_client" #define F_LRMD_IPC_PROXY_NODE "lrmd_ipc_proxy_node" #define F_LRMD_IPC_USER "lrmd_ipc_user" #define F_LRMD_IPC_MSG "lrmd_ipc_msg" #define F_LRMD_IPC_MSG_ID "lrmd_ipc_msg_id" #define F_LRMD_IPC_MSG_FLAGS "lrmd_ipc_msg_flags" #define T_LRMD "lrmd" #define T_LRMD_REPLY "lrmd_reply" #define T_LRMD_NOTIFY "lrmd_notify" #define T_LRMD_IPC_PROXY "lrmd_ipc_proxy" /* *INDENT-ON* */ /*! * \brief Create a new local lrmd connection */ lrmd_t *lrmd_api_new(void); /*! * \brief Create a new remote lrmd connection using tls backend * * \param nodename name of remote node identified with this connection * \param server name of server to connect to * \param port port number to connect to * * \note nodename and server may be the same value. */ lrmd_t *lrmd_remote_api_new(const char *nodename, const char *server, int port); /*! * \brief Use after lrmd_poll returns 1 to read and dispatch a message * * \param[in,out] lrmd lrmd connection object * * \return TRUE if connection is still up, FALSE if disconnected */ bool lrmd_dispatch(lrmd_t * lrmd); /*! * \brief Poll for a specified timeout period to determine if a message * is ready for dispatch. * \retval 1 msg is ready * \retval 0 timeout occurred * \retval negative error code */ int lrmd_poll(lrmd_t * lrmd, int timeout); /*! * \brief Destroy lrmd object */ void lrmd_api_delete(lrmd_t * lrmd); lrmd_key_value_t *lrmd_key_value_add(lrmd_key_value_t * kvp, const char *key, const char *value); /* *INDENT-OFF* */ /* Reserved for future use */ enum lrmd_call_options { lrmd_opt_none = 0x00000000, /* lrmd_opt_sync_call = 0x00000001, //Not implemented, patches welcome. */ /*! Only notify the client originating a exec() the results */ lrmd_opt_notify_orig_only = 0x00000002, /*! Drop recurring operations initiated by a client when client disconnects. * This call_option is only valid when registering a resource. When used * remotely with the pacemaker_remote daemon, this option means that recurring * operations will be dropped once all the remote connections disconnect. */ lrmd_opt_drop_recurring = 0x00000003, /*! Only send out notifications for recurring operations whenthe result changes */ lrmd_opt_notify_changes_only = 0x00000004, }; enum lrmd_callback_event { lrmd_event_register, lrmd_event_unregister, lrmd_event_exec_complete, lrmd_event_disconnect, lrmd_event_connect, lrmd_event_poke, lrmd_event_new_client, }; /* *INDENT-ON* */ typedef struct lrmd_event_data_s { /*! Type of event, register, unregister, call_completed... */ enum lrmd_callback_event type; /*! The resource this event occurred on. */ const char *rsc_id; /*! The action performed, start, stop, monitor... */ const char *op_type; /*! The userdata string given do exec() api function */ const char *user_data; /*! The client api call id associated with this event */ int call_id; /*! The operation's timeout period in ms. */ int timeout; /*! The operation's recurring interval in ms. */ int interval; /*! The operation's start delay value in ms. */ int start_delay; /*! This operation that just completed is on a deleted rsc. */ int rsc_deleted; /*! The executed ra return code mapped to OCF */ enum ocf_exitcode rc; /*! The lrmd status returned for exec_complete events */ int op_status; /*! stdout from resource agent operation */ const char *output; /*! Timestamp of when op ran */ unsigned int t_run; /*! Timestamp of last rc change */ unsigned int t_rcchange; /*! Time in length op took to execute */ unsigned int exec_time; /*! Time in length spent in queue */ unsigned int queue_time; /*! int connection result. Used for connection and poke events */ int connection_rc; /* This is a GHashTable containing the * parameters given to the operation */ void *params; /*! client node name associated with this connection * (used to match actions to the proper client when there are multiple) */ const char *remote_nodename; /*! exit failure reason string from resource agent operation */ const char *exit_reason; } lrmd_event_data_t; lrmd_event_data_t *lrmd_copy_event(lrmd_event_data_t * event); void lrmd_free_event(lrmd_event_data_t * event); typedef struct lrmd_rsc_info_s { char *id; char *type; char *class; char *provider; } lrmd_rsc_info_t; lrmd_rsc_info_t *lrmd_copy_rsc_info(lrmd_rsc_info_t * rsc_info); void lrmd_free_rsc_info(lrmd_rsc_info_t * rsc_info); typedef void (*lrmd_event_callback) (lrmd_event_data_t * event); typedef struct lrmd_list_s { const char *val; struct lrmd_list_s *next; } lrmd_list_t; void lrmd_list_freeall(lrmd_list_t * head); void lrmd_key_value_freeall(lrmd_key_value_t * head); typedef struct lrmd_api_operations_s { /*! * \brief Connect from the lrmd. * * \retval 0, success * \retval negative error code on failure */ int (*connect) (lrmd_t * lrmd, const char *client_name, int *fd); /*! * \brief Establish an connection to lrmd, don't block while connecting. * \note this function requires the use of mainloop. * * \note The is returned using the event callback. * \note When this function returns 0, the callback will be invoked * to report the final result of the connect. * \retval 0, connect in progress, wait for event callback * \retval -1, failure. */ int (*connect_async) (lrmd_t * lrmd, const char *client_name, int timeout /*ms */ ); /*! * \brief Is connected to lrmd daemon? * * \retval 0, false * \retval 1, true */ int (*is_connected) (lrmd_t * lrmd); /*! * \brief Poke lrmd connection to verify it is still capable of serving requests * \note The response comes in the form of a poke event to the callback. * * \retval 0, wait for response in callback * \retval -1, connection failure, callback may not be invoked */ int (*poke_connection) (lrmd_t * lrmd); /*! * \brief Disconnect from the lrmd. * * \retval 0, success * \retval negative error code on failure */ int (*disconnect) (lrmd_t * lrmd); /*! * \brief Register a resource with the lrmd. * * \note Synchronous, guaranteed to occur in daemon before function returns. * * \retval 0, success * \retval negative error code on failure */ int (*register_rsc) (lrmd_t * lrmd, const char *rsc_id, const char *class, const char *provider, const char *agent, enum lrmd_call_options options); /*! * \brief Retrieve registration info for a rsc * * \retval info on success * \retval NULL on failure */ lrmd_rsc_info_t *(*get_rsc_info) (lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options); /*! * \brief Unregister a resource from the lrmd. * * \note All pending and recurring operations will be cancelled * automatically. * * \note Synchronous, guaranteed to occur in daemon before function returns. * * \retval 0, success * \retval -1, success, but operations are currently executing on the rsc which will * return once they are completed. * \retval negative error code on failure * */ int (*unregister_rsc) (lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options); /*! * \brief Sets the callback to receive lrmd events on. */ void (*set_callback) (lrmd_t * lrmd, lrmd_event_callback callback); /*! * \brief Issue a command on a resource * * \note Asynchronous, command is queued in daemon on function return, but * execution of command is not synced. * * \note Operations on individual resources are guaranteed to occur * in the order the client api calls them in. * * \note Operations between different resources are not guaranteed * to occur in any specific order in relation to one another * regardless of what order the client api is called in. * \retval call_id to track async event result on success * \retval negative error code on failure */ int (*exec) (lrmd_t * lrmd, const char *rsc_id, const char *action, const char *userdata, /* userdata string given back in event notification */ int interval, /* ms */ int timeout, /* ms */ int start_delay, /* ms */ enum lrmd_call_options options, lrmd_key_value_t * params); /* ownership of params is given up to api here */ /*! * \brief Cancel a recurring command. * * \note Synchronous, guaranteed to occur in daemon before function returns. * * \note The cancel is completed async from this call. * We can be guaranteed the cancel has completed once * the callback receives an exec_complete event with * the lrmd_op_status signifying that the operation is * cancelled. * \note For each resource, cancel operations and exec operations * are processed in the order they are received. * It is safe to assume that for a single resource, a cancel * will occur in the lrmd before an exec if the client's cancel * api call occurs before the exec api call. * * It is not however safe to assume any operation on one resource will * occur before an operation on another resource regardless of * the order the client api is called in. * * \retval 0, cancel command sent. * \retval negative error code on failure */ int (*cancel) (lrmd_t * lrmd, const char *rsc_id, const char *action, int interval); /*! * \brief Get the metadata documentation for a resource. * * \note Value is returned in output. Output must be freed when set * * \retval lrmd_ok success * \retval negative error code on failure */ int (*get_metadata) (lrmd_t * lrmd, const char *class, const char *provider, const char *agent, char **output, enum lrmd_call_options options); /*! * \brief Retrieve a list of installed resource agents. * * \note if class is not provided, all known agents will be returned * \note list must be freed using lrmd_list_freeall() * * \retval num items in list on success * \retval negative error code on failure */ int (*list_agents) (lrmd_t * lrmd, lrmd_list_t ** agents, const char *class, const char *provider); /*! * \brief Retrieve a list of resource agent providers * * \note When the agent is provided, only the agent's provider will be returned * \note When no agent is supplied, all providers will be returned. * \note List must be freed using lrmd_list_freeall() * * \retval num items in list on success * \retval negative error code on failure */ int (*list_ocf_providers) (lrmd_t * lrmd, const char *agent, lrmd_list_t ** providers); /*! * \brief Retrieve a list of standards supported by this machine/installation * * \note List must be freed using lrmd_list_freeall() * * \retval num items in list on success * \retval negative error code on failure */ int (*list_standards) (lrmd_t * lrmd, lrmd_list_t ** standards); + /*! + * \brief Issue a command on a alert + * + * \note Asynchronous, command is queued in daemon on function return, but + * execution of command is not synced. + * + * \note Operations on individual alerts are guaranteed to occur + * in the order the client api calls them in. + * + * \note Operations between different alerts are not guaranteed + * to occur in any specific order in relation to one another + * regardless of what order the client api is called in. + * \retval call_id to track async event result on success + * \retval negative error code on failure + */ + int (*exec_alert) (lrmd_t * lrmd, const char *alert_id, + int timeout, /* ms */ + enum lrmd_call_options options, lrmd_key_value_t * params); /* ownership of params is given up to api here */ + } lrmd_api_operations_t; struct lrmd_s { lrmd_api_operations_t *cmds; void *private; }; static inline const char * lrmd_event_type2str(enum lrmd_callback_event type) { switch (type) { case lrmd_event_register: return "register"; case lrmd_event_unregister: return "unregister"; case lrmd_event_exec_complete: return "exec_complete"; case lrmd_event_disconnect: return "disconnect"; case lrmd_event_connect: return "connect"; case lrmd_event_poke: return "poke"; case lrmd_event_new_client: return "new_client"; } return "unknown"; } #endif diff --git a/include/crm/lrmd_alerts_internal.h b/include/crm/lrmd_alerts_internal.h new file mode 100644 index 0000000000..5df88ed048 --- /dev/null +++ b/include/crm/lrmd_alerts_internal.h @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2015 Andrew Beekhof + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef LRMD_ALERT_INTERNAL_H +#define LRMD_ALERT_INTERNAL_H +lrmd_key_value_t * lrmd_set_alert_key_to_lrmd_params(lrmd_key_value_t *head, enum crm_alert_keys_e name, const char *value); +void lrmd_set_alert_envvar_to_lrmd_params(lrmd_key_value_t *head, crm_alert_entry_t *entry); +#endif diff --git a/include/crm/msg_xml.h b/include/crm/msg_xml.h index e31b173581..9f871ab803 100644 --- a/include/crm/msg_xml.h +++ b/include/crm/msg_xml.h @@ -1,442 +1,444 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef XML_TAGS__H # define XML_TAGS__H # ifndef F_ORIG # define F_ORIG "src" # endif # ifndef F_SEQ # define F_SEQ "seq" # endif # ifndef F_SUBTYPE # define F_SUBTYPE "subt" # endif # ifndef F_TYPE # define F_TYPE "t" # endif # ifndef F_CLIENTNAME # define F_CLIENTNAME "cn" # endif # ifndef F_XML_TAGNAME # define F_XML_TAGNAME "__name__" # endif # ifndef T_CRM # define T_CRM "crmd" # endif # ifndef T_ATTRD # define T_ATTRD "attrd" # endif # define CIB_OPTIONS_FIRST "cib-bootstrap-options" # define F_CRM_DATA "crm_xml" # define F_CRM_TASK "crm_task" # define F_CRM_HOST_TO "crm_host_to" # define F_CRM_MSG_TYPE F_SUBTYPE # define F_CRM_SYS_TO "crm_sys_to" # define F_CRM_SYS_FROM "crm_sys_from" # define F_CRM_HOST_FROM F_ORIG # define F_CRM_REFERENCE XML_ATTR_REFERENCE # define F_CRM_VERSION XML_ATTR_VERSION # define F_CRM_ORIGIN "origin" # define F_CRM_USER "crm_user" # define F_CRM_JOIN_ID "join_id" # define F_CRM_DC_LEAVING "dc-leaving" # define F_CRM_ELECTION_ID "election-id" # define F_CRM_ELECTION_AGE_S "election-age-sec" # define F_CRM_ELECTION_AGE_US "election-age-nano-sec" # define F_CRM_ELECTION_OWNER "election-owner" # define F_CRM_TGRAPH "crm-tgraph" # define F_CRM_TGRAPH_INPUT "crm-tgraph-in" # define F_CRM_THROTTLE_MODE "crm-limit-mode" # define F_CRM_THROTTLE_MAX "crm-limit-max" /*---- Common tags/attrs */ # define XML_DIFF_MARKER "__crm_diff_marker__" # define XML_ATTR_TAGNAME F_XML_TAGNAME # define XML_TAG_CIB "cib" # define XML_TAG_FAILED "failed" # define XML_ATTR_CRM_VERSION "crm_feature_set" # define XML_ATTR_DIGEST "digest" # define XML_ATTR_VALIDATION "validate-with" # define XML_ATTR_RA_VERSION "ra-version" # define XML_ATTR_QUORUM_PANIC "no-quorum-panic" # define XML_ATTR_HAVE_QUORUM "have-quorum" # define XML_ATTR_HAVE_WATCHDOG "have-watchdog" # define XML_ATTR_EXPECTED_VOTES "expected-quorum-votes" # define XML_ATTR_GENERATION "epoch" # define XML_ATTR_GENERATION_ADMIN "admin_epoch" # define XML_ATTR_NUMUPDATES "num_updates" # define XML_ATTR_TIMEOUT "timeout" # define XML_ATTR_ORIGIN "crm-debug-origin" # define XML_ATTR_TSTAMP "crm-timestamp" # define XML_CIB_ATTR_WRITTEN "cib-last-written" # define XML_ATTR_VERSION "version" # define XML_ATTR_DESC "description" # define XML_ATTR_ID "id" # define XML_ATTR_IDREF "id-ref" # define XML_ATTR_ID_LONG "long-id" # define XML_ATTR_TYPE "type" # define XML_ATTR_FILTER_TYPE "type-filter" # define XML_ATTR_FILTER_ID "id-filter" # define XML_ATTR_FILTER_PRIORITY "priority-filter" # define XML_ATTR_VERBOSE "verbose" # define XML_ATTR_OP "op" # define XML_ATTR_DC "is_dc" # define XML_ATTR_DC_UUID "dc-uuid" # define XML_ATTR_UPDATE_ORIG "update-origin" # define XML_ATTR_UPDATE_CLIENT "update-client" # define XML_ATTR_UPDATE_USER "update-user" # define XML_BOOLEAN_TRUE "true" # define XML_BOOLEAN_FALSE "false" # define XML_BOOLEAN_YES XML_BOOLEAN_TRUE # define XML_BOOLEAN_NO XML_BOOLEAN_FALSE # define XML_TAG_OPTIONS "options" /*---- top level tags/attrs */ # define XML_MSG_TAG "crm_message" # define XML_MSG_TAG_DATA "msg_data" # define XML_ATTR_REQUEST "request" # define XML_ATTR_RESPONSE "response" # define XML_ATTR_UNAME "uname" # define XML_ATTR_UUID "id" # define XML_ATTR_REFERENCE "reference" # define XML_FAIL_TAG_RESOURCE "failed_resource" # define XML_FAILRES_ATTR_RESID "resource_id" # define XML_FAILRES_ATTR_REASON "reason" # define XML_FAILRES_ATTR_RESSTATUS "resource_status" # define XML_CRM_TAG_PING "ping_response" # define XML_PING_ATTR_STATUS "result" # define XML_PING_ATTR_SYSFROM "crm_subsystem" # define XML_TAG_FRAGMENT "cib_fragment" # define XML_ATTR_RESULT "result" # define XML_ATTR_SECTION "section" # define XML_FAIL_TAG_CIB "failed_update" # define XML_FAILCIB_ATTR_ID "id" # define XML_FAILCIB_ATTR_OBJTYPE "object_type" # define XML_FAILCIB_ATTR_OP "operation" # define XML_FAILCIB_ATTR_REASON "reason" /*---- CIB specific tags/attrs */ # define XML_CIB_TAG_SECTION_ALL "all" # define XML_CIB_TAG_CONFIGURATION "configuration" # define XML_CIB_TAG_STATUS "status" # define XML_CIB_TAG_RESOURCES "resources" # define XML_CIB_TAG_NODES "nodes" # define XML_CIB_TAG_DOMAINS "domains" # define XML_CIB_TAG_CONSTRAINTS "constraints" # define XML_CIB_TAG_CRMCONFIG "crm_config" # define XML_CIB_TAG_OPCONFIG "op_defaults" # define XML_CIB_TAG_RSCCONFIG "rsc_defaults" # define XML_CIB_TAG_ACLS "acls" # define XML_CIB_TAG_ALERTS "alerts" # define XML_CIB_TAG_ALERT "alert" # define XML_CIB_TAG_ALERT_RECIPIENT "recipient" # define XML_CIB_TAG_STATE "node_state" # define XML_CIB_TAG_NODE "node" # define XML_CIB_TAG_DOMAIN "domain" # define XML_CIB_TAG_CONSTRAINT "constraint" # define XML_CIB_TAG_NVPAIR "nvpair" # define XML_CIB_TAG_PROPSET "cluster_property_set" # define XML_TAG_ATTR_SETS "instance_attributes" # define XML_TAG_META_SETS "meta_attributes" # define XML_TAG_ATTRS "attributes" # define XML_TAG_RSC_VER_ATTRS "rsc_versioned_attrs" # define XML_TAG_OP_VER_ATTRS "op_versioned_attrs" # define XML_TAG_OP_VER_META "op_versioned_meta" # define XML_TAG_PARAMS "parameters" # define XML_TAG_PARAM "param" # define XML_TAG_UTILIZATION "utilization" # define XML_TAG_RESOURCE_REF "resource_ref" # define XML_CIB_TAG_RESOURCE "primitive" # define XML_CIB_TAG_GROUP "group" # define XML_CIB_TAG_INCARNATION "clone" # define XML_CIB_TAG_MASTER "master" # define XML_CIB_TAG_CONTAINER "bundle" # define XML_CIB_TAG_RSC_TEMPLATE "template" # define XML_RSC_ATTR_ISOLATION_INSTANCE "isolation-instance" # define XML_RSC_ATTR_ISOLATION_WRAPPER "isolation-wrapper" # define XML_RSC_ATTR_ISOLATION_HOST "isolation-host" # define XML_RSC_ATTR_ISOLATION "isolation" # define XML_RSC_ATTR_RESTART "restart-type" # define XML_RSC_ATTR_ORDERED "ordered" # define XML_RSC_ATTR_INTERLEAVE "interleave" # define XML_RSC_ATTR_INCARNATION "clone" # define XML_RSC_ATTR_INCARNATION_MAX "clone-max" # define XML_RSC_ATTR_INCARNATION_MIN "clone-min" # define XML_RSC_ATTR_INCARNATION_NODEMAX "clone-node-max" # define XML_RSC_ATTR_MASTER_MAX "master-max" # define XML_RSC_ATTR_MASTER_NODEMAX "master-node-max" # define XML_RSC_ATTR_STATE "clone-state" # define XML_RSC_ATTR_MANAGED "is-managed" # define XML_RSC_ATTR_TARGET_ROLE "target-role" # define XML_RSC_ATTR_UNIQUE "globally-unique" # define XML_RSC_ATTR_NOTIFY "notify" # define XML_RSC_ATTR_STICKINESS "resource-stickiness" # define XML_RSC_ATTR_FAIL_STICKINESS "migration-threshold" # define XML_RSC_ATTR_FAIL_TIMEOUT "failure-timeout" # define XML_RSC_ATTR_MULTIPLE "multiple-active" # define XML_RSC_ATTR_PRIORITY "priority" # define XML_RSC_ATTR_REQUIRES "requires" # define XML_RSC_ATTR_PROVIDES "provides" # define XML_RSC_ATTR_CONTAINER "container" # define XML_RSC_ATTR_INTERNAL_RSC "internal_rsc" # define XML_RSC_ATTR_MAINTENANCE "maintenance" # define XML_RSC_ATTR_REMOTE_NODE "remote-node" # define XML_RSC_ATTR_CLEAR_OP "clear_failure_op" # define XML_RSC_ATTR_CLEAR_INTERVAL "clear_failure_interval" # define XML_REMOTE_ATTR_RECONNECT_INTERVAL "reconnect_interval" # define XML_OP_ATTR_ON_FAIL "on-fail" # define XML_OP_ATTR_START_DELAY "start-delay" # define XML_OP_ATTR_ALLOW_MIGRATE "allow-migrate" # define XML_OP_ATTR_DEPENDENT "dependent-on" # define XML_OP_ATTR_ORIGIN "interval-origin" # define XML_OP_ATTR_PENDING "record-pending" # define XML_CIB_TAG_LRM "lrm" # define XML_LRM_TAG_RESOURCES "lrm_resources" # define XML_LRM_TAG_RESOURCE "lrm_resource" # define XML_LRM_TAG_AGENTS "lrm_agents" # define XML_LRM_TAG_AGENT "lrm_agent" # define XML_LRM_TAG_RSC_OP "lrm_rsc_op" # define XML_AGENT_ATTR_CLASS "class" # define XML_AGENT_ATTR_PROVIDER "provider" # define XML_LRM_TAG_ATTRIBUTES "attributes" # define XML_CIB_ATTR_REPLACE "replace" # define XML_CIB_ATTR_SOURCE "source" # define XML_CIB_ATTR_HEALTH "health" # define XML_CIB_ATTR_WEIGHT "weight" # define XML_CIB_ATTR_PRIORITY "priority" # define XML_CIB_ATTR_CLEAR "clear_on" # define XML_CIB_ATTR_SOURCE "source" # define XML_NODE_JOIN_STATE "join" # define XML_NODE_EXPECTED "expected" # define XML_NODE_IN_CLUSTER "in_ccm" # define XML_NODE_IS_PEER "crmd" # define XML_NODE_IS_REMOTE "remote_node" # define XML_NODE_IS_FENCED "node_fenced" # define XML_NODE_IS_MAINTENANCE "node_in_maintenance" # define XML_CIB_ATTR_SHUTDOWN "shutdown" # define XML_CIB_ATTR_STONITH "stonith" # define XML_CIB_ATTR_STANDBY "standby" /* LRM is a bit of a misnomer here; the crmd and pengine use these to track * actions, which usually but not always are LRM operations */ # define XML_LRM_ATTR_INTERVAL "interval" # define XML_LRM_ATTR_TASK "operation" # define XML_LRM_ATTR_TASK_KEY "operation_key" # define XML_LRM_ATTR_TARGET "on_node" # define XML_LRM_ATTR_TARGET_UUID "on_node_uuid" /*! Actions to be executed on Pacemaker Remote nodes are routed through * crmd on the cluster node hosting the remote connection. That cluster node * is considered the router node for the action. */ # define XML_LRM_ATTR_ROUTER_NODE "router_node" # define XML_LRM_ATTR_RSCID "rsc-id" # define XML_LRM_ATTR_OPSTATUS "op-status" # define XML_LRM_ATTR_RC "rc-code" # define XML_LRM_ATTR_CALLID "call-id" # define XML_LRM_ATTR_OP_DIGEST "op-digest" # define XML_LRM_ATTR_OP_RESTART "op-force-restart" # define XML_LRM_ATTR_OP_SECURE "op-secure-params" # define XML_LRM_ATTR_RESTART_DIGEST "op-restart-digest" # define XML_LRM_ATTR_SECURE_DIGEST "op-secure-digest" # define XML_LRM_ATTR_EXIT_REASON "exit-reason" # define XML_RSC_OP_LAST_CHANGE "last-rc-change" # define XML_RSC_OP_LAST_RUN "last-run" # define XML_RSC_OP_T_EXEC "exec-time" # define XML_RSC_OP_T_QUEUE "queue-time" # define XML_LRM_ATTR_MIGRATE_SOURCE "migrate_source" # define XML_LRM_ATTR_MIGRATE_TARGET "migrate_target" # define XML_TAG_GRAPH "transition_graph" # define XML_GRAPH_TAG_RSC_OP "rsc_op" # define XML_GRAPH_TAG_PSEUDO_EVENT "pseudo_event" # define XML_GRAPH_TAG_CRM_EVENT "crm_event" # define XML_GRAPH_TAG_DOWNED "downed" # define XML_GRAPH_TAG_MAINTENANCE "maintenance" # define XML_TAG_RULE "rule" # define XML_RULE_ATTR_SCORE "score" # define XML_RULE_ATTR_SCORE_ATTRIBUTE "score-attribute" /* following has no use (hardly ever meaningful); kept for compatibility */ # define XML_RULE_ATTR_SCORE_MANGLED "score-attribute-mangled" # define XML_RULE_ATTR_ROLE "role" # define XML_RULE_ATTR_RESULT "result" # define XML_RULE_ATTR_BOOLEAN_OP "boolean-op" # define XML_TAG_EXPRESSION "expression" # define XML_EXPR_ATTR_ATTRIBUTE "attribute" # define XML_EXPR_ATTR_OPERATION "operation" # define XML_EXPR_ATTR_VALUE "value" # define XML_EXPR_ATTR_TYPE "type" # define XML_EXPR_ATTR_VALUE_SOURCE "value-source" # define XML_CONS_TAG_RSC_DEPEND "rsc_colocation" # define XML_CONS_TAG_RSC_ORDER "rsc_order" # define XML_CONS_TAG_RSC_LOCATION "rsc_location" # define XML_CONS_TAG_RSC_TICKET "rsc_ticket" # define XML_CONS_TAG_RSC_SET "resource_set" # define XML_CONS_ATTR_SYMMETRICAL "symmetrical" # define XML_LOCATION_ATTR_DISCOVERY "resource-discovery" # define XML_COLOC_ATTR_SOURCE "rsc" # define XML_COLOC_ATTR_SOURCE_ROLE "rsc-role" # define XML_COLOC_ATTR_TARGET "with-rsc" # define XML_COLOC_ATTR_TARGET_ROLE "with-rsc-role" # define XML_COLOC_ATTR_NODE_ATTR "node-attribute" # define XML_COLOC_ATTR_SOURCE_INSTANCE "rsc-instance" # define XML_COLOC_ATTR_TARGET_INSTANCE "with-rsc-instance" # define XML_LOC_ATTR_SOURCE "rsc" # define XML_LOC_ATTR_SOURCE_PATTERN "rsc-pattern" # define XML_ORDER_ATTR_FIRST "first" # define XML_ORDER_ATTR_THEN "then" # define XML_ORDER_ATTR_FIRST_ACTION "first-action" # define XML_ORDER_ATTR_THEN_ACTION "then-action" # define XML_ORDER_ATTR_FIRST_INSTANCE "first-instance" # define XML_ORDER_ATTR_THEN_INSTANCE "then-instance" # define XML_ORDER_ATTR_KIND "kind" # define XML_TICKET_ATTR_TICKET "ticket" # define XML_TICKET_ATTR_LOSS_POLICY "loss-policy" # define XML_NVPAIR_ATTR_NAME "name" # define XML_NVPAIR_ATTR_VALUE "value" # define XML_NODE_ATTR_STATE "state" # define XML_NODE_ATTR_RSC_DISCOVERY "resource-discovery-enabled" # define XML_CONFIG_ATTR_DC_DEADTIME "dc-deadtime" # define XML_CONFIG_ATTR_ELECTION_FAIL "election-timeout" # define XML_CONFIG_ATTR_FORCE_QUIT "shutdown-escalation" # define XML_CONFIG_ATTR_RECHECK "cluster-recheck-interval" # define XML_ALERT_ATTR_PATH "path" # define XML_ALERT_ATTR_TIMEOUT "timeout" # define XML_ALERT_ATTR_TSTAMP_FORMAT "timestamp-format" # define XML_ALERT_ATTR_REC_VALUE "value" +# define XML_ALERT_ATTR_SELECT_KIND "kind" +# define XML_ALERT_ATTR_SELECT_ATTRIBUTE_NAME "attribute_name" # define XML_CIB_TAG_GENERATION_TUPPLE "generation_tuple" # define XML_ATTR_TRANSITION_MAGIC "transition-magic" # define XML_ATTR_TRANSITION_KEY "transition-key" # define XML_ATTR_TE_NOWAIT "op_no_wait" # define XML_ATTR_TE_TARGET_RC "op_target_rc" # define XML_ATTR_LRM_PROBE "lrm-is-probe" # define XML_TAG_TRANSIENT_NODEATTRS "transient_attributes" # define XML_TAG_DIFF_ADDED "diff-added" # define XML_TAG_DIFF_REMOVED "diff-removed" # define XML_ACL_TAG_USER "acl_target" # define XML_ACL_TAG_USERv1 "acl_user" # define XML_ACL_TAG_GROUP "acl_group" # define XML_ACL_TAG_ROLE "acl_role" # define XML_ACL_TAG_PERMISSION "acl_permission" # define XML_ACL_TAG_ROLE_REF "role" # define XML_ACL_TAG_ROLE_REFv1 "role_ref" # define XML_ACL_ATTR_KIND "kind" # define XML_ACL_TAG_READ "read" # define XML_ACL_TAG_WRITE "write" # define XML_ACL_TAG_DENY "deny" # define XML_ACL_ATTR_REF "reference" # define XML_ACL_ATTR_REFv1 "ref" # define XML_ACL_ATTR_TAG "object-type" # define XML_ACL_ATTR_TAGv1 "tag" # define XML_ACL_ATTR_XPATH "xpath" # define XML_ACL_ATTR_ATTRIBUTE "attribute" # define XML_CIB_TAG_TICKETS "tickets" # define XML_CIB_TAG_TICKET_STATE "ticket_state" # define XML_CIB_TAG_TAGS "tags" # define XML_CIB_TAG_TAG "tag" # define XML_CIB_TAG_OBJ_REF "obj_ref" # define XML_TAG_FENCING_TOPOLOGY "fencing-topology" # define XML_TAG_FENCING_LEVEL "fencing-level" # define XML_ATTR_STONITH_INDEX "index" # define XML_ATTR_STONITH_TARGET "target" # define XML_ATTR_STONITH_TARGET_VALUE "target-value" # define XML_ATTR_STONITH_TARGET_PATTERN "target-pattern" # define XML_ATTR_STONITH_TARGET_ATTRIBUTE "target-attribute" # define XML_ATTR_STONITH_DEVICES "devices" # define XML_TAG_DIFF "diff" # define XML_DIFF_VERSION "version" # define XML_DIFF_VSOURCE "source" # define XML_DIFF_VTARGET "target" # define XML_DIFF_CHANGE "change" # define XML_DIFF_LIST "change-list" # define XML_DIFF_ATTR "change-attr" # define XML_DIFF_RESULT "change-result" # define XML_DIFF_OP "operation" # define XML_DIFF_PATH "path" # define XML_DIFF_POSITION "position" /* Defined for backward API compatibility but no longer used by Pacemaker */ # define XML_ATTR_TE_ALLOWFAIL "op_allow_fail" # include # define ID(x) crm_element_value(x, XML_ATTR_ID) # define INSTANCE(x) crm_element_value(x, XML_CIB_ATTR_INSTANCE) # define TSTAMP(x) crm_element_value(x, XML_ATTR_TSTAMP) # define TYPE(x) crm_element_name(x) # define NAME(x) crm_element_value(x, XML_NVPAIR_ATTR_NAME) # define VALUE(x) crm_element_value(x, XML_NVPAIR_ATTR_VALUE) #endif diff --git a/include/crm/services.h b/include/crm/services.h index e1fae9b98a..3ac3d8a879 100644 --- a/include/crm/services.h +++ b/include/crm/services.h @@ -1,417 +1,423 @@ /* * Copyright (C) 2010 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /** * \file * \brief Services API * \ingroup core */ #ifndef __PCMK_SERVICES__ # define __PCMK_SERVICES__ # ifdef __cplusplus extern "C" { # endif # include # include # include # include # ifndef OCF_ROOT_DIR # define OCF_ROOT_DIR "/usr/lib/ocf" # endif # ifndef LSB_ROOT_DIR # define LSB_ROOT_DIR "/etc/init.d" # endif /* TODO: Autodetect these two ?*/ # ifndef SYSTEMCTL # define SYSTEMCTL "/bin/systemctl" # endif /* Deprecated and unused by Pacemaker, kept for API backward compatibility */ # ifndef SERVICE_SCRIPT # define SERVICE_SCRIPT "/sbin/service" # endif /* Known resource classes */ #define PCMK_RESOURCE_CLASS_OCF "ocf" #define PCMK_RESOURCE_CLASS_SERVICE "service" #define PCMK_RESOURCE_CLASS_LSB "lsb" #define PCMK_RESOURCE_CLASS_SYSTEMD "systemd" #define PCMK_RESOURCE_CLASS_UPSTART "upstart" #define PCMK_RESOURCE_CLASS_HB "heartbeat" #define PCMK_RESOURCE_CLASS_NAGIOS "nagios" #define PCMK_RESOURCE_CLASS_STONITH "stonith" +#define PCMK_ALERT_CLASS "alert" /* This is the string passed in the OCF_EXIT_REASON_PREFIX * environment variable. The stderr output that occurs * after this prefix is encountered is considered the exit * reason for a completed operationt */ #define PCMK_OCF_REASON_PREFIX "ocf-exit-reason:" /* *INDENT-OFF* */ enum lsb_exitcode { PCMK_LSB_OK = 0, PCMK_LSB_UNKNOWN_ERROR = 1, PCMK_LSB_INVALID_PARAM = 2, PCMK_LSB_UNIMPLEMENT_FEATURE = 3, PCMK_LSB_INSUFFICIENT_PRIV = 4, PCMK_LSB_NOT_INSTALLED = 5, PCMK_LSB_NOT_CONFIGURED = 6, PCMK_LSB_NOT_RUNNING = 7, }; /* The return codes for the status operation are not the same for other * operatios - go figure */ enum lsb_status_exitcode { PCMK_LSB_STATUS_OK = 0, PCMK_LSB_STATUS_VAR_PID = 1, PCMK_LSB_STATUS_VAR_LOCK = 2, PCMK_LSB_STATUS_NOT_RUNNING = 3, PCMK_LSB_STATUS_UNKNOWN = 4, /* custom codes should be in the 150-199 range reserved for application use */ PCMK_LSB_STATUS_NOT_INSTALLED = 150, PCMK_LSB_STATUS_INSUFFICIENT_PRIV = 151, }; /* Uniform exit codes * Everything is mapped to its OCF equivalent so that Pacemaker only deals with one set of codes */ enum ocf_exitcode { PCMK_OCF_OK = 0, PCMK_OCF_UNKNOWN_ERROR = 1, PCMK_OCF_INVALID_PARAM = 2, PCMK_OCF_UNIMPLEMENT_FEATURE = 3, PCMK_OCF_INSUFFICIENT_PRIV = 4, PCMK_OCF_NOT_INSTALLED = 5, PCMK_OCF_NOT_CONFIGURED = 6, PCMK_OCF_NOT_RUNNING = 7, /* End of overlap with LSB */ PCMK_OCF_RUNNING_MASTER = 8, PCMK_OCF_FAILED_MASTER = 9, /* 150-199 reserved for application use */ PCMK_OCF_CONNECTION_DIED = 189, /* Operation failure implied by disconnection of the LRM API to a local or remote node */ PCMK_OCF_DEGRADED = 190, /* Active resource that is no longer 100% functional */ PCMK_OCF_DEGRADED_MASTER = 191, /* Promoted resource that is no longer 100% functional */ PCMK_OCF_EXEC_ERROR = 192, /* Generic problem invoking the agent */ PCMK_OCF_UNKNOWN = 193, /* State of the service is unknown - used for recording in-flight operations */ PCMK_OCF_SIGNAL = 194, PCMK_OCF_NOT_SUPPORTED = 195, PCMK_OCF_PENDING = 196, PCMK_OCF_CANCELLED = 197, PCMK_OCF_TIMEOUT = 198, PCMK_OCF_OTHER_ERROR = 199, /* Keep the same codes as PCMK_LSB */ }; enum op_status { PCMK_LRM_OP_PENDING = -1, PCMK_LRM_OP_DONE, PCMK_LRM_OP_CANCELLED, PCMK_LRM_OP_TIMEOUT, PCMK_LRM_OP_NOTSUPPORTED, PCMK_LRM_OP_ERROR, PCMK_LRM_OP_ERROR_HARD, PCMK_LRM_OP_ERROR_FATAL, PCMK_LRM_OP_NOT_INSTALLED, }; enum nagios_exitcode { NAGIOS_STATE_OK = 0, NAGIOS_STATE_WARNING = 1, NAGIOS_STATE_CRITICAL = 2, NAGIOS_STATE_UNKNOWN = 3, NAGIOS_STATE_DEPENDENT = 4, NAGIOS_INSUFFICIENT_PRIV = 100, NAGIOS_NOT_INSTALLED = 101, }; enum svc_action_flags { /* On timeout, only kill pid, do not kill entire pid group */ SVC_ACTION_LEAVE_GROUP = 0x01, }; /* *INDENT-ON* */ typedef struct svc_action_private_s svc_action_private_t; typedef struct svc_action_s { char *id; char *rsc; char *action; int interval; char *standard; char *provider; char *agent; int timeout; GHashTable *params; int rc; int pid; int cancel; int status; int sequence; int expected_rc; int synchronous; enum svc_action_flags flags; char *stderr_data; char *stdout_data; /** * Data stored by the creator of the action. * * This may be used to hold data that is needed later on by a callback, * for example. */ void *cb_data; svc_action_private_t *opaque; + /* + * Store a parameter of Alert. + */ + GHashTable *alert_params; + } svc_action_t; /** * \brief Get a list of files or directories in a given path * * \param[in] root full path to a directory to read * \param[in] files return list of files if TRUE or directories if FALSE * \param[in] executable if TRUE and files is TRUE, only return executable files * * \return a list of what was found. The list items are char *. * \note It is the caller's responsibility to free the result with g_list_free_full(list, free). */ GList *get_directory_list(const char *root, gboolean files, gboolean executable); /** * Get a list of services * * \return a list of services. The list items are gchar *. This list _must_ * be destroyed using g_list_free_full(list, free). */ GList *services_list(void); /** * \brief Get a list of providers * * \param[in] standard list providers of this standard (e.g. ocf, lsb, etc.) * * \return a list of providers as char * list items (or NULL if standard does not support providers) * \note The caller is responsible for freeing the result using g_list_free_full(list, free). */ GList *resources_list_providers(const char *standard); /** * \brief Get a list of resource agents * * \param[in] standard list agents using this standard (e.g. ocf, lsb, etc.) (or NULL for all) * \param[in] provider list agents from this provider (or NULL for all) * * \return a list of resource agents. The list items are char *. * \note The caller is responsible for freeing the result using g_list_free_full(list, free). */ GList *resources_list_agents(const char *standard, const char *provider); /** * Get list of available standards * * \return a list of resource standards. The list items are char *. This list _must_ * be destroyed using g_list_free_full(list, free). */ GList *resources_list_standards(void); svc_action_t *services_action_create(const char *name, const char *action, int interval /* ms */ , int timeout /* ms */ ); /** * \brief Create a new resource action * * \param[in] name name of resource * \param[in] standard resource agent standard (ocf, lsb, etc.) * \param[in] provider resource agent provider * \param[in] agent resource agent name * \param[in] action action (start, stop, monitor, etc.) * \param[in] interval how often to repeat this action, in milliseconds (if 0, execute only once) * \param[in] timeout consider action failed if it does not complete in this many milliseconds * \param[in] params action parameters * * \return newly allocated action instance * * \post After the call, 'params' is owned, and later free'd by the svc_action_t result * \note The caller is responsible for freeing the return value using * services_action_free(). */ svc_action_t *resources_action_create(const char *name, const char *standard, const char *provider, const char *agent, const char *action, int interval /* ms */ , int timeout /* ms */ , GHashTable * params, enum svc_action_flags flags); /** * Kick a recurring action so it is scheduled immediately for re-execution */ gboolean services_action_kick(const char *name, const char *action, int interval /* ms */); /** * Find the first class that can provide service::${agent} * * \param[in] agent which agent to search for * \return NULL, or the first class that provides the named agent */ const char *resources_find_service_class(const char *agent); /** * Utilize services API to execute an arbitrary command. * * This API has useful infrastructure in place to be able to run a command * in the background and get notified via a callback when the command finishes. * * \param[in] exec command to execute * \param[in] args arguments to the command, NULL terminated * * \return a svc_action_t object, used to pass to the execute function * (services_action_sync() or services_action_async()) and is * provided to the callback. */ svc_action_t *services_action_create_generic(const char *exec, const char *args[]); void services_action_cleanup(svc_action_t * op); void services_action_free(svc_action_t * op); gboolean services_action_sync(svc_action_t * op); /** * Run an action asynchronously. * * \param[in] op services action data * \param[in] action_callback callback for when the action completes * * \retval TRUE succesfully started execution * \retval FALSE failed to start execution, no callback will be received */ gboolean services_action_async(svc_action_t * op, void (*action_callback) (svc_action_t *)); gboolean services_action_cancel(const char *name, const char *action, int interval); static inline const char *services_lrm_status_str(enum op_status status) { switch (status) { case PCMK_LRM_OP_PENDING: return "pending"; case PCMK_LRM_OP_DONE:return "complete"; case PCMK_LRM_OP_CANCELLED:return "Cancelled"; case PCMK_LRM_OP_TIMEOUT:return "Timed Out"; case PCMK_LRM_OP_NOTSUPPORTED:return "NOT SUPPORTED"; case PCMK_LRM_OP_ERROR:return "Error"; case PCMK_LRM_OP_NOT_INSTALLED:return "Not installed"; default:return "UNKNOWN!"; } } static inline const char *services_ocf_exitcode_str(enum ocf_exitcode code) { switch (code) { case PCMK_OCF_OK: return "ok"; case PCMK_OCF_UNKNOWN_ERROR: return "unknown error"; case PCMK_OCF_INVALID_PARAM: return "invalid parameter"; case PCMK_OCF_UNIMPLEMENT_FEATURE: return "unimplemented feature"; case PCMK_OCF_INSUFFICIENT_PRIV: return "insufficient privileges"; case PCMK_OCF_NOT_INSTALLED: return "not installed"; case PCMK_OCF_NOT_CONFIGURED: return "not configured"; case PCMK_OCF_NOT_RUNNING: return "not running"; case PCMK_OCF_RUNNING_MASTER: return "master"; case PCMK_OCF_FAILED_MASTER: return "master (failed)"; case PCMK_OCF_SIGNAL: return "OCF_SIGNAL"; case PCMK_OCF_NOT_SUPPORTED: return "OCF_NOT_SUPPORTED"; case PCMK_OCF_PENDING: return "OCF_PENDING"; case PCMK_OCF_CANCELLED: return "OCF_CANCELLED"; case PCMK_OCF_TIMEOUT: return "OCF_TIMEOUT"; case PCMK_OCF_OTHER_ERROR: return "OCF_OTHER_ERROR"; case PCMK_OCF_DEGRADED: return "OCF_DEGRADED"; case PCMK_OCF_DEGRADED_MASTER: return "OCF_DEGRADED_MASTER"; default: return "unknown"; } } /** * \brief Get OCF equivalent of LSB exit code * * \param[in] action LSB action that produced exit code * \param[in] lsb_exitcode Exit code of LSB action * * \return PCMK_OCF_* constant that corresponds to LSB exit code */ static inline enum ocf_exitcode services_get_ocf_exitcode(const char *action, int lsb_exitcode) { /* For non-status actions, LSB and OCF share error code meaning <= 7 */ if (action && strcmp(action, "status") && strcmp(action, "monitor")) { if ((lsb_exitcode < 0) || (lsb_exitcode > PCMK_LSB_NOT_RUNNING)) { return PCMK_OCF_UNKNOWN_ERROR; } return (enum ocf_exitcode)lsb_exitcode; } /* status has different return codes */ switch (lsb_exitcode) { case PCMK_LSB_STATUS_OK: return PCMK_OCF_OK; case PCMK_LSB_STATUS_NOT_INSTALLED: return PCMK_OCF_NOT_INSTALLED; case PCMK_LSB_STATUS_INSUFFICIENT_PRIV: return PCMK_OCF_INSUFFICIENT_PRIV; case PCMK_LSB_STATUS_VAR_PID: case PCMK_LSB_STATUS_VAR_LOCK: case PCMK_LSB_STATUS_NOT_RUNNING: return PCMK_OCF_NOT_RUNNING; } return PCMK_OCF_UNKNOWN_ERROR; } # ifdef __cplusplus } # endif #endif /* __PCMK_SERVICES__ */ diff --git a/lib/common/alerts.c b/lib/common/alerts.c index 046ea9471e..8d473e26a3 100644 --- a/lib/common/alerts.c +++ b/lib/common/alerts.c @@ -1,263 +1,298 @@ /* * Copyright (C) 2015 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include +#include #include #include -typedef struct { - char *name; - char *value; -} envvar_t; - GListPtr crm_alert_list = NULL; guint crm_alert_max_alert_timeout = CRM_ALERT_DEFAULT_TIMEOUT_MS; +char **crm_alert_kind_default = NULL; /* * to allow script compatibility we can have more than one * set of environment variables */ -const char *crm_alert_keys[14][3] = +const char *crm_alert_keys[CRM_ALERT_INTERNAL_KEY_MAX][3] = { [CRM_alert_recipient] = {"CRM_notify_recipient", "CRM_alert_recipient", NULL}, [CRM_alert_node] = {"CRM_notify_node", "CRM_alert_node", NULL}, [CRM_alert_nodeid] = {"CRM_notify_nodeid", "CRM_alert_nodeid", NULL}, [CRM_alert_rsc] = {"CRM_notify_rsc", "CRM_alert_rsc", NULL}, [CRM_alert_task] = {"CRM_notify_task", "CRM_alert_task", NULL}, [CRM_alert_interval] = {"CRM_notify_interval", "CRM_alert_interval", NULL}, [CRM_alert_desc] = {"CRM_notify_desc", "CRM_alert_desc", NULL}, [CRM_alert_status] = {"CRM_notify_status", "CRM_alert_status", NULL}, [CRM_alert_target_rc] = {"CRM_notify_target_rc", "CRM_alert_target_rc", NULL}, [CRM_alert_rc] = {"CRM_notify_rc", "CRM_alert_rc", NULL}, [CRM_alert_kind] = {"CRM_notify_kind", "CRM_alert_kind", NULL}, [CRM_alert_version] = {"CRM_notify_version", "CRM_alert_version", NULL}, - [CRM_alert_node_sequence] = {"CRM_notify_node_sequence", "CRM_alert_node_sequence", NULL}, - [CRM_alert_timestamp] = {"CRM_notify_timestamp", "CRM_alert_timestamp", NULL} + [CRM_alert_node_sequence] = {"CRM_notify_node_sequence", CRM_ALERT_NODE_SEQUENCE, NULL}, + [CRM_alert_timestamp] = {"CRM_notify_timestamp", "CRM_alert_timestamp", NULL}, + [CRM_alert_attribute_name] = {"CRM_notify_attribute_name", "CRM_alert_attribute_name", NULL}, + [CRM_alert_attribute_value] = {"CRM_notify_attribute_value", "CRM_alert_attribute_value", NULL} }; static void -free_envvar_entry(envvar_t *entry) +free_envvar_entry(crm_alert_envvar_t *entry) { free(entry->name); free(entry->value); free(entry); } static void crm_free_alert_list_entry(crm_alert_entry_t *entry) { free(entry->id); free(entry->path); free(entry->tstamp_format); free(entry->recipient); + + free(entry->select_kind_orig); + if (entry->select_kind) { + g_strfreev(entry->select_kind); + } + + free(entry->select_attribute_name_orig); + if(entry->select_attribute_name) { + g_strfreev(entry->select_attribute_name); + } + if (entry->envvars) { g_list_free_full(entry->envvars, (GDestroyNotify) free_envvar_entry); } free(entry); } void crm_free_alert_list() { if (crm_alert_list) { g_list_free_full(crm_alert_list, (GDestroyNotify) crm_free_alert_list_entry); crm_alert_list = NULL; } } static gpointer -copy_envvar_entry(envvar_t * src, +copy_envvar_entry(crm_alert_envvar_t * src, gpointer data) { - envvar_t *dst = calloc(1, sizeof(envvar_t)); + crm_alert_envvar_t *dst = calloc(1, sizeof(crm_alert_envvar_t)); CRM_ASSERT(dst); dst->name = strdup(src->name); dst->value = src->value?strdup(src->value):NULL; return (gpointer) dst; } static GListPtr add_dup_envvar(crm_alert_entry_t *entrys, - envvar_t *entry) + crm_alert_envvar_t *entry) { entrys->envvars = g_list_prepend(entrys->envvars, copy_envvar_entry(entry, NULL)); return entrys->envvars; } GListPtr crm_drop_envvars(crm_alert_entry_t *entry, int count) { int i; for (i = 0; (entry->envvars) && ((count < 0) || (i < count)); i++) { - free_envvar_entry((envvar_t *) g_list_first(entry->envvars)->data); + free_envvar_entry((crm_alert_envvar_t *) g_list_first(entry->envvars)->data); entry->envvars = g_list_delete_link(entry->envvars, g_list_first(entry->envvars)); } return entry->envvars; } static GListPtr copy_envvar_list_remove_dupes(crm_alert_entry_t *entry) { GListPtr dst = NULL, ls, ld; /* we are adding to the front so variable dupes coming via * recipient-section have got precedence over those in the * global section - we don't expect that many variables here * that it pays off to go for a hash-table to make dupe elimination * more efficient - maybe later when we might decide to do more * with the variables than cycling through them */ for (ls = g_list_first(entry->envvars); ls; ls = g_list_next(ls)) { for (ld = g_list_first(dst); ld; ld = g_list_next(ld)) { - if (!strcmp(((envvar_t *)(ls->data))->name, - ((envvar_t *)(ld->data))->name)) { + if (!strcmp(((crm_alert_envvar_t *)(ls->data))->name, + ((crm_alert_envvar_t *)(ld->data))->name)) { break; } } if (!ld) { dst = g_list_prepend(dst, - copy_envvar_entry((envvar_t *)(ls->data), NULL)); + copy_envvar_entry((crm_alert_envvar_t *)(ls->data), NULL)); } } return dst; } void crm_add_dup_alert_list_entry(crm_alert_entry_t *entry) { crm_alert_entry_t *new_entry = (crm_alert_entry_t *) calloc(1, sizeof(crm_alert_entry_t)); CRM_ASSERT(new_entry); *new_entry = (crm_alert_entry_t) { .id = strdup(entry->id), .path = strdup(entry->path), .timeout = entry->timeout, .tstamp_format = entry->tstamp_format?strdup(entry->tstamp_format):NULL, .recipient = entry->recipient?strdup(entry->recipient):NULL, + .select_kind_orig = entry->select_kind_orig?g_strdup(entry->select_kind_orig):NULL, + .select_kind = entry->select_kind?g_strdupv(entry->select_kind):NULL, + .select_attribute_name_orig = entry->select_attribute_name_orig?g_strdup(entry->select_attribute_name_orig):NULL, + .select_attribute_name = entry->select_attribute_name?g_strdupv(entry->select_attribute_name):NULL, .envvars = entry->envvars? copy_envvar_list_remove_dupes(entry) :NULL }; crm_alert_list = g_list_prepend(crm_alert_list, new_entry); } GListPtr crm_get_envvars_from_cib(xmlNode *basenode, crm_alert_entry_t *entry, int *count) { xmlNode *envvar; xmlNode *pair; if ((!basenode) || (!(envvar = first_named_child(basenode, XML_TAG_ATTR_SETS)))) { return entry->envvars; } for (pair = first_named_child(envvar, XML_CIB_TAG_NVPAIR); pair; pair = __xml_next(pair)) { - envvar_t envvar_entry = (envvar_t) { + crm_alert_envvar_t envvar_entry = (crm_alert_envvar_t) { .name = (char *) crm_element_value(pair, XML_NVPAIR_ATTR_NAME), .value = (char *) crm_element_value(pair, XML_NVPAIR_ATTR_VALUE) }; crm_trace("Found environment variable %s = '%s'", envvar_entry.name, envvar_entry.value?envvar_entry.value:""); (*count)++; add_dup_envvar(entry, &envvar_entry); } return entry->envvars; } void crm_set_alert_key(enum crm_alert_keys_e name, const char *value) { const char **key; for (key = crm_alert_keys[name]; *key; key++) { crm_trace("Setting alert key %s = '%s'", *key, value); if (value) { setenv(*key, value, 1); } else { unsetenv(*key); } } } void crm_set_alert_key_int(enum crm_alert_keys_e name, int value) { char *s = crm_itoa(value); crm_set_alert_key(name, s); free(s); } void crm_unset_alert_keys() { const char **key; enum crm_alert_keys_e name; for(name = 0; name < DIMOF(crm_alert_keys); name++) { for(key = crm_alert_keys[name]; *key; key++) { crm_trace("Unsetting alert key %s", *key); unsetenv(*key); } } } void crm_set_envvar_list(crm_alert_entry_t *entry) { GListPtr l; for (l = g_list_first(entry->envvars); l; l = g_list_next(l)) { - envvar_t *env = (envvar_t *)(l->data); + crm_alert_envvar_t *env = (crm_alert_envvar_t *)(l->data); crm_trace("Setting environment variable %s = '%s'", env->name, env->value?env->value:""); if (env->value) { setenv(env->name, env->value, 1); } else { unsetenv(env->name); } } } void crm_unset_envvar_list(crm_alert_entry_t *entry) { GListPtr l; for (l = g_list_first(entry->envvars); l; l = g_list_next(l)) { - envvar_t *env = (envvar_t *)(l->data); + crm_alert_envvar_t *env = (crm_alert_envvar_t *)(l->data); crm_trace("Unsetting environment variable %s", env->name); unsetenv(env->name); } } + +gboolean +crm_is_target_alert(char **list, const char *value) +{ + int target_list_num = 0; + gboolean rc = FALSE; + + if (list == NULL) return TRUE; + + target_list_num = g_strv_length(list); + + for( int cnt = 0; cnt < target_list_num; cnt++ ) { + if (strcmp(list[cnt], value) == 0) { + rc = TRUE; + break; + } + } + + return rc; +} + diff --git a/lib/lrmd/Makefile.am b/lib/lrmd/Makefile.am index 19800c6973..2cfb1bb088 100644 --- a/lib/lrmd/Makefile.am +++ b/lib/lrmd/Makefile.am @@ -1,29 +1,29 @@ # Copyright (c) 2012 David Vossel # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # include $(top_srcdir)/Makefile.common lib_LTLIBRARIES = liblrmd.la liblrmd_la_LDFLAGS = -version-info 5:0:4 liblrmd_la_CFLAGS = $(CFLAGS_HARDENED_LIB) liblrmd_la_LDFLAGS += $(LDFLAGS_HARDENED_LIB) liblrmd_la_LIBADD = $(top_builddir)/lib/common/libcrmcommon.la \ $(top_builddir)/lib/services/libcrmservice.la \ $(top_builddir)/lib/fencing/libstonithd.la -liblrmd_la_SOURCES = lrmd_client.c proxy_common.c +liblrmd_la_SOURCES = lrmd_client.c proxy_common.c lrmd_alerts.c diff --git a/lib/lrmd/lrmd_alerts.c b/lib/lrmd/lrmd_alerts.c new file mode 100644 index 0000000000..398a7788d8 --- /dev/null +++ b/lib/lrmd/lrmd_alerts.c @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2015 David Vossel + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +lrmd_key_value_t * +lrmd_set_alert_key_to_lrmd_params(lrmd_key_value_t *head, enum crm_alert_keys_e name, const char *value) +{ + const char **key; + + for (key = crm_alert_keys[name]; *key; key++) { + crm_trace("Setting alert key %s = '%s'", *key, value); + head = lrmd_key_value_add(head, *key, value); + } + return head; +} + +void +lrmd_set_alert_envvar_to_lrmd_params(lrmd_key_value_t *head, crm_alert_entry_t *entry) +{ + GListPtr l; + + for (l = g_list_first(entry->envvars); l; l = g_list_next(l)) { + crm_alert_envvar_t *ev = (crm_alert_envvar_t *)(l->data); + + crm_trace("Setting environment variable %s = '%s'", ev->name, + ev->value?ev->value:""); + lrmd_key_value_add(head, ev->name, ev->value); + } + +} + diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c index cc37c0e4b7..a95884bda5 100644 --- a/lib/lrmd/lrmd_client.c +++ b/lib/lrmd/lrmd_client.c @@ -1,2232 +1,2258 @@ /* * Copyright (c) 2012 David Vossel * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_GNUTLS_GNUTLS_H # undef KEYFILE # include #endif #include #include #include #include #include #define MAX_TLS_RECV_WAIT 10000 CRM_TRACE_INIT_DATA(lrmd); static int lrmd_api_disconnect(lrmd_t * lrmd); static int lrmd_api_is_connected(lrmd_t * lrmd); /* IPC proxy functions */ int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg); static void lrmd_internal_proxy_dispatch(lrmd_t *lrmd, xmlNode *msg); void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg)); #ifdef HAVE_GNUTLS_GNUTLS_H # define LRMD_CLIENT_HANDSHAKE_TIMEOUT 5000 /* 5 seconds */ gnutls_psk_client_credentials_t psk_cred_s; int lrmd_tls_set_key(gnutls_datum_t * key); static void lrmd_tls_disconnect(lrmd_t * lrmd); static int global_remote_msg_id = 0; int lrmd_tls_send_msg(crm_remote_t * session, xmlNode * msg, uint32_t id, const char *msg_type); static void lrmd_tls_connection_destroy(gpointer userdata); #endif typedef struct lrmd_private_s { enum client_type type; char *token; mainloop_io_t *source; /* IPC parameters */ crm_ipc_t *ipc; crm_remote_t *remote; /* Extra TLS parameters */ char *remote_nodename; #ifdef HAVE_GNUTLS_GNUTLS_H char *server; int port; gnutls_psk_client_credentials_t psk_cred_c; /* while the async connection is occuring, this is the id * of the connection timeout timer. */ int async_timer; int sock; /* since tls requires a round trip across the network for a * request/reply, there are times where we just want to be able * to send a request from the client and not wait around (or even care * about) what the reply is. */ int expected_late_replies; GList *pending_notify; crm_trigger_t *process_notify; #endif lrmd_event_callback callback; /* Internal IPC proxy msg passing for remote guests */ void (*proxy_callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg); void *proxy_callback_userdata; char *peer_version; } lrmd_private_t; static lrmd_list_t * lrmd_list_add(lrmd_list_t * head, const char *value) { lrmd_list_t *p, *end; p = calloc(1, sizeof(lrmd_list_t)); p->val = strdup(value); end = head; while (end && end->next) { end = end->next; } if (end) { end->next = p; } else { head = p; } return head; } void lrmd_list_freeall(lrmd_list_t * head) { lrmd_list_t *p; while (head) { char *val = (char *)head->val; p = head->next; free(val); free(head); head = p; } } lrmd_key_value_t * lrmd_key_value_add(lrmd_key_value_t * head, const char *key, const char *value) { lrmd_key_value_t *p, *end; p = calloc(1, sizeof(lrmd_key_value_t)); p->key = strdup(key); p->value = strdup(value); end = head; while (end && end->next) { end = end->next; } if (end) { end->next = p; } else { head = p; } return head; } void lrmd_key_value_freeall(lrmd_key_value_t * head) { lrmd_key_value_t *p; while (head) { p = head->next; free(head->key); free(head->value); free(head); head = p; } } static void dup_attr(gpointer key, gpointer value, gpointer user_data) { g_hash_table_replace(user_data, strdup(key), strdup(value)); } lrmd_event_data_t * lrmd_copy_event(lrmd_event_data_t * event) { lrmd_event_data_t *copy = NULL; copy = calloc(1, sizeof(lrmd_event_data_t)); /* This will get all the int values. * we just have to be careful not to leave any * dangling pointers to strings. */ memcpy(copy, event, sizeof(lrmd_event_data_t)); copy->rsc_id = event->rsc_id ? strdup(event->rsc_id) : NULL; copy->op_type = event->op_type ? strdup(event->op_type) : NULL; copy->user_data = event->user_data ? strdup(event->user_data) : NULL; copy->output = event->output ? strdup(event->output) : NULL; copy->exit_reason = event->exit_reason ? strdup(event->exit_reason) : NULL; copy->remote_nodename = event->remote_nodename ? strdup(event->remote_nodename) : NULL; if (event->params) { copy->params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if (copy->params != NULL) { g_hash_table_foreach(event->params, dup_attr, copy->params); } } return copy; } void lrmd_free_event(lrmd_event_data_t * event) { if (!event) { return; } /* free gives me grief if i try to cast */ free((char *)event->rsc_id); free((char *)event->op_type); free((char *)event->user_data); free((char *)event->output); free((char *)event->exit_reason); free((char *)event->remote_nodename); if (event->params) { g_hash_table_destroy(event->params); } free(event); } static int lrmd_dispatch_internal(lrmd_t * lrmd, xmlNode * msg) { const char *type; const char *proxy_session = crm_element_value(msg, F_LRMD_IPC_SESSION); lrmd_private_t *native = lrmd->private; lrmd_event_data_t event = { 0, }; if (proxy_session != NULL) { /* this is proxy business */ lrmd_internal_proxy_dispatch(lrmd, msg); return 1; } else if (!native->callback) { /* no callback set */ crm_trace("notify event received but client has not set callback"); return 1; } event.remote_nodename = native->remote_nodename; type = crm_element_value(msg, F_LRMD_OPERATION); crm_element_value_int(msg, F_LRMD_CALLID, &event.call_id); event.rsc_id = crm_element_value(msg, F_LRMD_RSC_ID); if (crm_str_eq(type, LRMD_OP_RSC_REG, TRUE)) { event.type = lrmd_event_register; } else if (crm_str_eq(type, LRMD_OP_RSC_UNREG, TRUE)) { event.type = lrmd_event_unregister; } else if (crm_str_eq(type, LRMD_OP_RSC_EXEC, TRUE)) { crm_element_value_int(msg, F_LRMD_TIMEOUT, &event.timeout); crm_element_value_int(msg, F_LRMD_RSC_INTERVAL, &event.interval); crm_element_value_int(msg, F_LRMD_RSC_START_DELAY, &event.start_delay); crm_element_value_int(msg, F_LRMD_EXEC_RC, (int *)&event.rc); crm_element_value_int(msg, F_LRMD_OP_STATUS, &event.op_status); crm_element_value_int(msg, F_LRMD_RSC_DELETED, &event.rsc_deleted); crm_element_value_int(msg, F_LRMD_RSC_RUN_TIME, (int *)&event.t_run); crm_element_value_int(msg, F_LRMD_RSC_RCCHANGE_TIME, (int *)&event.t_rcchange); crm_element_value_int(msg, F_LRMD_RSC_EXEC_TIME, (int *)&event.exec_time); crm_element_value_int(msg, F_LRMD_RSC_QUEUE_TIME, (int *)&event.queue_time); event.op_type = crm_element_value(msg, F_LRMD_RSC_ACTION); event.user_data = crm_element_value(msg, F_LRMD_RSC_USERDATA_STR); event.output = crm_element_value(msg, F_LRMD_RSC_OUTPUT); event.exit_reason = crm_element_value(msg, F_LRMD_RSC_EXIT_REASON); event.type = lrmd_event_exec_complete; event.params = xml2list(msg); } else if (crm_str_eq(type, LRMD_OP_NEW_CLIENT, TRUE)) { event.type = lrmd_event_new_client; } else if (crm_str_eq(type, LRMD_OP_POKE, TRUE)) { event.type = lrmd_event_poke; } else { return 1; } crm_trace("op %s notify event received", type); native->callback(&event); if (event.params) { g_hash_table_destroy(event.params); } return 1; } static int lrmd_ipc_dispatch(const char *buffer, ssize_t length, gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->private; xmlNode *msg; int rc; if (!native->callback) { /* no callback set */ return 1; } msg = string2xml(buffer); rc = lrmd_dispatch_internal(lrmd, msg); free_xml(msg); return rc; } #ifdef HAVE_GNUTLS_GNUTLS_H static void lrmd_free_xml(gpointer userdata) { free_xml((xmlNode *) userdata); } static int lrmd_tls_connected(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->private; if (native->remote->tls_session) { return TRUE; } return FALSE; } static int lrmd_tls_dispatch(gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->private; xmlNode *xml = NULL; int rc = 0; int disconnected = 0; if (lrmd_tls_connected(lrmd) == FALSE) { crm_trace("tls dispatch triggered after disconnect"); return 0; } crm_trace("tls_dispatch triggered"); /* First check if there are any pending notifies to process that came * while we were waiting for replies earlier. */ if (native->pending_notify) { GList *iter = NULL; crm_trace("Processing pending notifies"); for (iter = native->pending_notify; iter; iter = iter->next) { lrmd_dispatch_internal(lrmd, iter->data); } g_list_free_full(native->pending_notify, lrmd_free_xml); native->pending_notify = NULL; } /* Next read the current buffer and see if there are any messages to handle. */ rc = crm_remote_ready(native->remote, 0); if (rc == 0) { /* nothing to read, see if any full messages are already in buffer. */ xml = crm_remote_parse_buffer(native->remote); } else if (rc < 0) { disconnected = 1; } else { crm_remote_recv(native->remote, -1, &disconnected); xml = crm_remote_parse_buffer(native->remote); } while (xml) { const char *msg_type = crm_element_value(xml, F_LRMD_REMOTE_MSG_TYPE); if (safe_str_eq(msg_type, "notify")) { lrmd_dispatch_internal(lrmd, xml); } else if (safe_str_eq(msg_type, "reply")) { if (native->expected_late_replies > 0) { native->expected_late_replies--; } else { int reply_id = 0; crm_element_value_int(xml, F_LRMD_CALLID, &reply_id); /* if this happens, we want to know about it */ crm_err("Got outdated reply %d", reply_id); } } free_xml(xml); xml = crm_remote_parse_buffer(native->remote); } if (disconnected) { crm_info("Server disconnected while reading remote server msg."); lrmd_tls_disconnect(lrmd); return 0; } return 1; } #endif /* Not used with mainloop */ int lrmd_poll(lrmd_t * lrmd, int timeout) { lrmd_private_t *native = lrmd->private; switch (native->type) { case CRM_CLIENT_IPC: return crm_ipc_ready(native->ipc); #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: if (native->pending_notify) { return 1; } return crm_remote_ready(native->remote, 0); #endif default: crm_err("Unsupported connection type: %d", native->type); } return 0; } /* Not used with mainloop */ bool lrmd_dispatch(lrmd_t * lrmd) { lrmd_private_t *private = NULL; CRM_ASSERT(lrmd != NULL); private = lrmd->private; switch (private->type) { case CRM_CLIENT_IPC: while (crm_ipc_ready(private->ipc)) { if (crm_ipc_read(private->ipc) > 0) { const char *msg = crm_ipc_buffer(private->ipc); lrmd_ipc_dispatch(msg, strlen(msg), lrmd); } } break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: lrmd_tls_dispatch(lrmd); break; #endif default: crm_err("Unsupported connection type: %d", private->type); } if (lrmd_api_is_connected(lrmd) == FALSE) { crm_err("Connection closed"); return FALSE; } return TRUE; } static xmlNode * lrmd_create_op(const char *token, const char *op, xmlNode * data, enum lrmd_call_options options) { xmlNode *op_msg = create_xml_node(NULL, "lrmd_command"); CRM_CHECK(op_msg != NULL, return NULL); CRM_CHECK(token != NULL, return NULL); crm_xml_add(op_msg, F_XML_TAGNAME, "lrmd_command"); crm_xml_add(op_msg, F_TYPE, T_LRMD); crm_xml_add(op_msg, F_LRMD_CALLBACK_TOKEN, token); crm_xml_add(op_msg, F_LRMD_OPERATION, op); crm_trace("Sending call options: %.8lx, %d", (long)options, options); crm_xml_add_int(op_msg, F_LRMD_CALLOPTS, options); if (data != NULL) { add_message_xml(op_msg, F_LRMD_CALLDATA, data); } return op_msg; } static void lrmd_ipc_connection_destroy(gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->private; crm_info("IPC connection destroyed"); /* Prevent these from being cleaned up in lrmd_api_disconnect() */ native->ipc = NULL; native->source = NULL; if (native->callback) { lrmd_event_data_t event = { 0, }; event.type = lrmd_event_disconnect; event.remote_nodename = native->remote_nodename; native->callback(&event); } } #ifdef HAVE_GNUTLS_GNUTLS_H static void lrmd_tls_connection_destroy(gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->private; crm_info("TLS connection destroyed"); if (native->remote->tls_session) { gnutls_bye(*native->remote->tls_session, GNUTLS_SHUT_RDWR); gnutls_deinit(*native->remote->tls_session); gnutls_free(native->remote->tls_session); } if (native->psk_cred_c) { gnutls_psk_free_client_credentials(native->psk_cred_c); } if (native->sock) { close(native->sock); } if (native->process_notify) { mainloop_destroy_trigger(native->process_notify); native->process_notify = NULL; } if (native->pending_notify) { g_list_free_full(native->pending_notify, lrmd_free_xml); native->pending_notify = NULL; } free(native->remote->buffer); native->remote->buffer = NULL; native->source = 0; native->sock = 0; native->psk_cred_c = NULL; native->remote->tls_session = NULL; native->sock = 0; if (native->callback) { lrmd_event_data_t event = { 0, }; event.remote_nodename = native->remote_nodename; event.type = lrmd_event_disconnect; native->callback(&event); } return; } int lrmd_tls_send_msg(crm_remote_t * session, xmlNode * msg, uint32_t id, const char *msg_type) { int rc = -1; crm_xml_add_int(msg, F_LRMD_REMOTE_MSG_ID, id); crm_xml_add(msg, F_LRMD_REMOTE_MSG_TYPE, msg_type); rc = crm_remote_send(session, msg); if (rc < 0) { crm_err("Failed to send remote lrmd tls msg, rc = %d", rc); return rc; } return rc; } static xmlNode * lrmd_tls_recv_reply(lrmd_t * lrmd, int total_timeout, int expected_reply_id, int *disconnected) { lrmd_private_t *native = lrmd->private; xmlNode *xml = NULL; time_t start = time(NULL); const char *msg_type = NULL; int reply_id = 0; int remaining_timeout = 0; /* A timeout of 0 here makes no sense. We have to wait a period of time * for the response to come back. If -1 or 0, default to 10 seconds. */ if (total_timeout <= 0 || total_timeout > MAX_TLS_RECV_WAIT) { total_timeout = MAX_TLS_RECV_WAIT; } while (!xml) { xml = crm_remote_parse_buffer(native->remote); if (!xml) { /* read some more off the tls buffer if we still have time left. */ if (remaining_timeout) { remaining_timeout = total_timeout - ((time(NULL) - start) * 1000); } else { remaining_timeout = total_timeout; } if (remaining_timeout <= 0) { crm_err("Never received the expected reply during the timeout period, disconnecting."); *disconnected = TRUE; return NULL; } crm_remote_recv(native->remote, remaining_timeout, disconnected); xml = crm_remote_parse_buffer(native->remote); if (!xml) { crm_err("Unable to receive expected reply, disconnecting."); *disconnected = TRUE; return NULL; } else if (*disconnected) { return NULL; } } CRM_ASSERT(xml != NULL); crm_element_value_int(xml, F_LRMD_REMOTE_MSG_ID, &reply_id); msg_type = crm_element_value(xml, F_LRMD_REMOTE_MSG_TYPE); if (!msg_type) { crm_err("Empty msg type received while waiting for reply"); free_xml(xml); xml = NULL; } else if (safe_str_eq(msg_type, "notify")) { /* got a notify while waiting for reply, trigger the notify to be processed later */ crm_info("queueing notify"); native->pending_notify = g_list_append(native->pending_notify, xml); if (native->process_notify) { crm_info("notify trigger set."); mainloop_set_trigger(native->process_notify); } xml = NULL; } else if (safe_str_neq(msg_type, "reply")) { /* msg isn't a reply, make some noise */ crm_err("Expected a reply, got %s", msg_type); free_xml(xml); xml = NULL; } else if (reply_id != expected_reply_id) { if (native->expected_late_replies > 0) { native->expected_late_replies--; } else { crm_err("Got outdated reply, expected id %d got id %d", expected_reply_id, reply_id); } free_xml(xml); xml = NULL; } } if (native->remote->buffer && native->process_notify) { mainloop_set_trigger(native->process_notify); } return xml; } static int lrmd_tls_send(lrmd_t * lrmd, xmlNode * msg) { int rc = 0; lrmd_private_t *native = lrmd->private; global_remote_msg_id++; if (global_remote_msg_id <= 0) { global_remote_msg_id = 1; } rc = lrmd_tls_send_msg(native->remote, msg, global_remote_msg_id, "request"); if (rc <= 0) { crm_err("Remote lrmd send failed, disconnecting"); lrmd_tls_disconnect(lrmd); return -ENOTCONN; } return pcmk_ok; } static int lrmd_tls_send_recv(lrmd_t * lrmd, xmlNode * msg, int timeout, xmlNode ** reply) { int rc = 0; int disconnected = 0; xmlNode *xml = NULL; if (lrmd_tls_connected(lrmd) == FALSE) { return -1; } rc = lrmd_tls_send(lrmd, msg); if (rc < 0) { return rc; } xml = lrmd_tls_recv_reply(lrmd, timeout, global_remote_msg_id, &disconnected); if (disconnected) { crm_err("Remote lrmd server disconnected while waiting for reply with id %d. ", global_remote_msg_id); lrmd_tls_disconnect(lrmd); rc = -ENOTCONN; } else if (!xml) { crm_err("Remote lrmd never received reply for request id %d. timeout: %dms ", global_remote_msg_id, timeout); rc = -ECOMM; } if (reply) { *reply = xml; } else { free_xml(xml); } return rc; } #endif static int lrmd_send_xml(lrmd_t * lrmd, xmlNode * msg, int timeout, xmlNode ** reply) { int rc = -1; lrmd_private_t *native = lrmd->private; switch (native->type) { case CRM_CLIENT_IPC: rc = crm_ipc_send(native->ipc, msg, crm_ipc_client_response, timeout, reply); break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: rc = lrmd_tls_send_recv(lrmd, msg, timeout, reply); break; #endif default: crm_err("Unsupported connection type: %d", native->type); } return rc; } static int lrmd_send_xml_no_reply(lrmd_t * lrmd, xmlNode * msg) { int rc = -1; lrmd_private_t *native = lrmd->private; switch (native->type) { case CRM_CLIENT_IPC: rc = crm_ipc_send(native->ipc, msg, crm_ipc_flags_none, 0, NULL); break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: rc = lrmd_tls_send(lrmd, msg); if (rc == pcmk_ok) { /* we don't want to wait around for the reply, but * since the request/reply protocol needs to behave the same * as libqb, a reply will eventually come later anyway. */ native->expected_late_replies++; } break; #endif default: crm_err("Unsupported connection type: %d", native->type); } return rc; } static int lrmd_api_is_connected(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->private; switch (native->type) { case CRM_CLIENT_IPC: return crm_ipc_connected(native->ipc); break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: return lrmd_tls_connected(lrmd); break; #endif default: crm_err("Unsupported connection type: %d", native->type); } return 0; } static int lrmd_send_command(lrmd_t * lrmd, const char *op, xmlNode * data, xmlNode ** output_data, int timeout, /* ms. defaults to 1000 if set to 0 */ enum lrmd_call_options options, gboolean expect_reply) { /* TODO we need to reduce usage of this boolean */ int rc = pcmk_ok; int reply_id = -1; lrmd_private_t *native = lrmd->private; xmlNode *op_msg = NULL; xmlNode *op_reply = NULL; if (!lrmd_api_is_connected(lrmd)) { return -ENOTCONN; } if (op == NULL) { crm_err("No operation specified"); return -EINVAL; } CRM_CHECK(native->token != NULL,; ); crm_trace("sending %s op to lrmd", op); op_msg = lrmd_create_op(native->token, op, data, options); if (op_msg == NULL) { return -EINVAL; } crm_xml_add_int(op_msg, F_LRMD_TIMEOUT, timeout); if (expect_reply) { rc = lrmd_send_xml(lrmd, op_msg, timeout, &op_reply); } else { rc = lrmd_send_xml_no_reply(lrmd, op_msg); goto done; } if (rc < 0) { crm_perror(LOG_ERR, "Couldn't perform %s operation (timeout=%d): %d", op, timeout, rc); rc = -ECOMM; goto done; } else if(op_reply == NULL) { rc = -ENOMSG; goto done; } rc = pcmk_ok; crm_element_value_int(op_reply, F_LRMD_CALLID, &reply_id); crm_trace("%s op reply received", op); if (crm_element_value_int(op_reply, F_LRMD_RC, &rc) != 0) { rc = -ENOMSG; goto done; } crm_log_xml_trace(op_reply, "Reply"); if (output_data) { *output_data = op_reply; op_reply = NULL; /* Prevent subsequent free */ } done: if (lrmd_api_is_connected(lrmd) == FALSE) { crm_err("LRMD disconnected"); } free_xml(op_msg); free_xml(op_reply); return rc; } static int lrmd_api_poke_connection(lrmd_t * lrmd) { int rc; lrmd_private_t *native = lrmd->private; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); rc = lrmd_send_command(lrmd, LRMD_OP_POKE, data, NULL, 0, 0, native->type == CRM_CLIENT_IPC ? TRUE : FALSE); free_xml(data); return rc < 0 ? rc : pcmk_ok; } int remote_proxy_check(lrmd_t * lrmd, GHashTable *hash) { int rc; const char *value; lrmd_private_t *native = lrmd->private; xmlNode *data = create_xml_node(NULL, F_LRMD_OPERATION); crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); value = g_hash_table_lookup(hash, "stonith-watchdog-timeout"); crm_xml_add(data, F_LRMD_WATCHDOG, value); rc = lrmd_send_command(lrmd, LRMD_OP_CHECK, data, NULL, 0, 0, native->type == CRM_CLIENT_IPC ? TRUE : FALSE); free_xml(data); return rc < 0 ? rc : pcmk_ok; } static int lrmd_handshake(lrmd_t * lrmd, const char *name) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->private; xmlNode *reply = NULL; xmlNode *hello = create_xml_node(NULL, "lrmd_command"); crm_xml_add(hello, F_TYPE, T_LRMD); crm_xml_add(hello, F_LRMD_OPERATION, CRM_OP_REGISTER); crm_xml_add(hello, F_LRMD_CLIENTNAME, name); crm_xml_add(hello, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION); /* advertise that we are a proxy provider */ if (native->proxy_callback) { crm_xml_add(hello, F_LRMD_IS_IPC_PROVIDER, "true"); } rc = lrmd_send_xml(lrmd, hello, -1, &reply); if (rc < 0) { crm_perror(LOG_DEBUG, "Couldn't complete registration with the lrmd API: %d", rc); rc = -ECOMM; } else if (reply == NULL) { crm_err("Did not receive registration reply"); rc = -EPROTO; } else { const char *version = crm_element_value(reply, F_LRMD_PROTOCOL_VERSION); const char *msg_type = crm_element_value(reply, F_LRMD_OPERATION); const char *tmp_ticket = crm_element_value(reply, F_LRMD_CLIENTID); crm_element_value_int(reply, F_LRMD_RC, &rc); if (rc == -EPROTO) { crm_err("LRMD protocol mismatch client version %s, server version %s", LRMD_PROTOCOL_VERSION, version); crm_log_xml_err(reply, "Protocol Error"); } else if (safe_str_neq(msg_type, CRM_OP_REGISTER)) { crm_err("Invalid registration message: %s", msg_type); crm_log_xml_err(reply, "Bad reply"); rc = -EPROTO; } else if (tmp_ticket == NULL) { crm_err("No registration token provided"); crm_log_xml_err(reply, "Bad reply"); rc = -EPROTO; } else { crm_trace("Obtained registration token: %s", tmp_ticket); native->token = strdup(tmp_ticket); native->peer_version = strdup(version?version:"1.0"); /* Included since 1.1 */ rc = pcmk_ok; } } free_xml(reply); free_xml(hello); if (rc != pcmk_ok) { lrmd_api_disconnect(lrmd); } return rc; } static int lrmd_ipc_connect(lrmd_t * lrmd, int *fd) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->private; static struct ipc_client_callbacks lrmd_callbacks = { .dispatch = lrmd_ipc_dispatch, .destroy = lrmd_ipc_connection_destroy }; crm_info("Connecting to lrmd"); if (fd) { /* No mainloop */ native->ipc = crm_ipc_new(CRM_SYSTEM_LRMD, 0); if (native->ipc && crm_ipc_connect(native->ipc)) { *fd = crm_ipc_get_fd(native->ipc); } else if (native->ipc) { crm_perror(LOG_ERR, "Connection to local resource manager failed"); rc = -ENOTCONN; } } else { native->source = mainloop_add_ipc_client(CRM_SYSTEM_LRMD, G_PRIORITY_HIGH, 0, lrmd, &lrmd_callbacks); native->ipc = mainloop_get_ipc_client(native->source); } if (native->ipc == NULL) { crm_debug("Could not connect to the LRMD API"); rc = -ENOTCONN; } return rc; } #ifdef HAVE_GNUTLS_GNUTLS_H static int set_key(gnutls_datum_t * key, const char *location) { FILE *stream; int read_len = 256; int cur_len = 0; int buf_len = read_len; static char *key_cache = NULL; static size_t key_cache_len = 0; static time_t key_cache_updated; if (location == NULL) { return -1; } if (key_cache) { time_t now = time(NULL); if ((now - key_cache_updated) < 60) { key->data = gnutls_malloc(key_cache_len + 1); key->size = key_cache_len; memcpy(key->data, key_cache, key_cache_len); crm_debug("using cached LRMD key"); return 0; } else { key_cache_len = 0; key_cache_updated = 0; free(key_cache); key_cache = NULL; crm_debug("clearing lrmd key cache"); } } stream = fopen(location, "r"); if (!stream) { return -1; } key->data = gnutls_malloc(read_len); while (!feof(stream)) { int next; if (cur_len == buf_len) { buf_len = cur_len + read_len; key->data = gnutls_realloc(key->data, buf_len); } next = fgetc(stream); if (next == EOF && feof(stream)) { break; } key->data[cur_len] = next; cur_len++; } fclose(stream); key->size = cur_len; if (!cur_len) { gnutls_free(key->data); key->data = 0; return -1; } if (!key_cache) { key_cache = calloc(1, key->size + 1); memcpy(key_cache, key->data, key->size); key_cache_len = key->size; key_cache_updated = time(NULL); } return 0; } int lrmd_tls_set_key(gnutls_datum_t * key) { int rc = 0; const char *specific_location = getenv("PCMK_authkey_location"); if (set_key(key, specific_location) == 0) { crm_debug("Using custom authkey location %s", specific_location); return 0; } else if (specific_location) { crm_err("No valid lrmd remote key found at %s, trying default location", specific_location); } if (set_key(key, DEFAULT_REMOTE_KEY_LOCATION) != 0) { rc = set_key(key, ALT_REMOTE_KEY_LOCATION); } if (rc) { crm_err("No valid lrmd remote key found at %s", DEFAULT_REMOTE_KEY_LOCATION); return -1; } return rc; } static void lrmd_gnutls_global_init(void) { static int gnutls_init = 0; if (!gnutls_init) { crm_gnutls_global_init(); } gnutls_init = 1; } #endif static void report_async_connection_result(lrmd_t * lrmd, int rc) { lrmd_private_t *native = lrmd->private; if (native->callback) { lrmd_event_data_t event = { 0, }; event.type = lrmd_event_connect; event.remote_nodename = native->remote_nodename; event.connection_rc = rc; native->callback(&event); } } #ifdef HAVE_GNUTLS_GNUTLS_H static void lrmd_tcp_connect_cb(void *userdata, int sock) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->private; char name[256] = { 0, }; static struct mainloop_fd_callbacks lrmd_tls_callbacks = { .dispatch = lrmd_tls_dispatch, .destroy = lrmd_tls_connection_destroy, }; int rc = sock; gnutls_datum_t psk_key = { NULL, 0 }; native->async_timer = 0; if (rc < 0) { lrmd_tls_connection_destroy(lrmd); crm_info("remote lrmd connect to %s at port %d failed", native->server, native->port); report_async_connection_result(lrmd, rc); return; } /* TODO continue with tls stuff now that tcp connect passed. make this async as well soon * to avoid all blocking code in the client. */ native->sock = sock; if (lrmd_tls_set_key(&psk_key) != 0) { lrmd_tls_connection_destroy(lrmd); return; } gnutls_psk_allocate_client_credentials(&native->psk_cred_c); gnutls_psk_set_client_credentials(native->psk_cred_c, DEFAULT_REMOTE_USERNAME, &psk_key, GNUTLS_PSK_KEY_RAW); gnutls_free(psk_key.data); native->remote->tls_session = create_psk_tls_session(sock, GNUTLS_CLIENT, native->psk_cred_c); if (crm_initiate_client_tls_handshake(native->remote, LRMD_CLIENT_HANDSHAKE_TIMEOUT) != 0) { crm_warn("Client tls handshake failed for server %s:%d. Disconnecting", native->server, native->port); gnutls_deinit(*native->remote->tls_session); gnutls_free(native->remote->tls_session); native->remote->tls_session = NULL; lrmd_tls_connection_destroy(lrmd); report_async_connection_result(lrmd, -1); return; } crm_info("Remote lrmd client TLS connection established with server %s:%d", native->server, native->port); snprintf(name, 128, "remote-lrmd-%s:%d", native->server, native->port); native->process_notify = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_tls_dispatch, lrmd); native->source = mainloop_add_fd(name, G_PRIORITY_HIGH, native->sock, lrmd, &lrmd_tls_callbacks); rc = lrmd_handshake(lrmd, name); report_async_connection_result(lrmd, rc); return; } static int lrmd_tls_connect_async(lrmd_t * lrmd, int timeout /*ms */ ) { int rc = -1; int sock = 0; int timer_id = 0; lrmd_private_t *native = lrmd->private; lrmd_gnutls_global_init(); sock = crm_remote_tcp_connect_async(native->server, native->port, timeout, &timer_id, lrmd, lrmd_tcp_connect_cb); if (sock != -1) { native->sock = sock; rc = 0; native->async_timer = timer_id; } return rc; } static int lrmd_tls_connect(lrmd_t * lrmd, int *fd) { static struct mainloop_fd_callbacks lrmd_tls_callbacks = { .dispatch = lrmd_tls_dispatch, .destroy = lrmd_tls_connection_destroy, }; lrmd_private_t *native = lrmd->private; int sock; gnutls_datum_t psk_key = { NULL, 0 }; lrmd_gnutls_global_init(); sock = crm_remote_tcp_connect(native->server, native->port); if (sock < 0) { crm_warn("Could not establish remote lrmd connection to %s", native->server); lrmd_tls_connection_destroy(lrmd); return -ENOTCONN; } native->sock = sock; if (lrmd_tls_set_key(&psk_key) != 0) { lrmd_tls_connection_destroy(lrmd); return -1; } gnutls_psk_allocate_client_credentials(&native->psk_cred_c); gnutls_psk_set_client_credentials(native->psk_cred_c, DEFAULT_REMOTE_USERNAME, &psk_key, GNUTLS_PSK_KEY_RAW); gnutls_free(psk_key.data); native->remote->tls_session = create_psk_tls_session(sock, GNUTLS_CLIENT, native->psk_cred_c); if (crm_initiate_client_tls_handshake(native->remote, LRMD_CLIENT_HANDSHAKE_TIMEOUT) != 0) { crm_err("Session creation for %s:%d failed", native->server, native->port); gnutls_deinit(*native->remote->tls_session); gnutls_free(native->remote->tls_session); native->remote->tls_session = NULL; lrmd_tls_connection_destroy(lrmd); return -1; } crm_info("Remote lrmd client TLS connection established with server %s:%d", native->server, native->port); if (fd) { *fd = sock; } else { char name[256] = { 0, }; snprintf(name, 128, "remote-lrmd-%s:%d", native->server, native->port); native->process_notify = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_tls_dispatch, lrmd); native->source = mainloop_add_fd(name, G_PRIORITY_HIGH, native->sock, lrmd, &lrmd_tls_callbacks); } return pcmk_ok; } #endif static int lrmd_api_connect(lrmd_t * lrmd, const char *name, int *fd) { int rc = -ENOTCONN; lrmd_private_t *native = lrmd->private; switch (native->type) { case CRM_CLIENT_IPC: rc = lrmd_ipc_connect(lrmd, fd); break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: rc = lrmd_tls_connect(lrmd, fd); break; #endif default: crm_err("Unsupported connection type: %d", native->type); } if (rc == pcmk_ok) { rc = lrmd_handshake(lrmd, name); } return rc; } static int lrmd_api_connect_async(lrmd_t * lrmd, const char *name, int timeout) { int rc = 0; lrmd_private_t *native = lrmd->private; if (!native->callback) { crm_err("Async connect not possible, no lrmd client callback set."); return -1; } switch (native->type) { case CRM_CLIENT_IPC: /* fake async connection with ipc. it should be fast * enough that we gain very little from async */ rc = lrmd_api_connect(lrmd, name, NULL); if (!rc) { report_async_connection_result(lrmd, rc); } break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: rc = lrmd_tls_connect_async(lrmd, timeout); if (rc) { /* connection failed, report rc now */ report_async_connection_result(lrmd, rc); } break; #endif default: crm_err("Unsupported connection type: %d", native->type); } return rc; } static void lrmd_ipc_disconnect(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->private; if (native->source != NULL) { /* Attached to mainloop */ mainloop_del_ipc_client(native->source); native->source = NULL; native->ipc = NULL; } else if (native->ipc) { /* Not attached to mainloop */ crm_ipc_t *ipc = native->ipc; native->ipc = NULL; crm_ipc_close(ipc); crm_ipc_destroy(ipc); } } #ifdef HAVE_GNUTLS_GNUTLS_H static void lrmd_tls_disconnect(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->private; if (native->remote->tls_session) { gnutls_bye(*native->remote->tls_session, GNUTLS_SHUT_RDWR); gnutls_deinit(*native->remote->tls_session); gnutls_free(native->remote->tls_session); native->remote->tls_session = 0; } if (native->async_timer) { g_source_remove(native->async_timer); native->async_timer = 0; } if (native->source != NULL) { /* Attached to mainloop */ mainloop_del_ipc_client(native->source); native->source = NULL; } else if (native->sock) { close(native->sock); native->sock = 0; } if (native->pending_notify) { g_list_free_full(native->pending_notify, lrmd_free_xml); native->pending_notify = NULL; } } #endif static int lrmd_api_disconnect(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->private; crm_info("Disconnecting from %d lrmd service", native->type); switch (native->type) { case CRM_CLIENT_IPC: lrmd_ipc_disconnect(lrmd); break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: lrmd_tls_disconnect(lrmd); break; #endif default: crm_err("Unsupported connection type: %d", native->type); } free(native->token); native->token = NULL; free(native->peer_version); native->peer_version = NULL; return 0; } static int lrmd_api_register_rsc(lrmd_t * lrmd, const char *rsc_id, const char *class, const char *provider, const char *type, enum lrmd_call_options options) { int rc = pcmk_ok; xmlNode *data = NULL; if (!class || !type || !rsc_id) { return -EINVAL; } if (safe_str_eq(class, PCMK_RESOURCE_CLASS_OCF) && !provider) { return -EINVAL; } data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); crm_xml_add(data, F_LRMD_CLASS, class); crm_xml_add(data, F_LRMD_PROVIDER, provider); crm_xml_add(data, F_LRMD_TYPE, type); rc = lrmd_send_command(lrmd, LRMD_OP_RSC_REG, data, NULL, 0, options, TRUE); free_xml(data); return rc; } static int lrmd_api_unregister_rsc(lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options) { int rc = pcmk_ok; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); rc = lrmd_send_command(lrmd, LRMD_OP_RSC_UNREG, data, NULL, 0, options, TRUE); free_xml(data); return rc; } lrmd_rsc_info_t * lrmd_copy_rsc_info(lrmd_rsc_info_t * rsc_info) { lrmd_rsc_info_t *copy = NULL; copy = calloc(1, sizeof(lrmd_rsc_info_t)); copy->id = strdup(rsc_info->id); copy->type = strdup(rsc_info->type); copy->class = strdup(rsc_info->class); if (rsc_info->provider) { copy->provider = strdup(rsc_info->provider); } return copy; } void lrmd_free_rsc_info(lrmd_rsc_info_t * rsc_info) { if (!rsc_info) { return; } free(rsc_info->id); free(rsc_info->type); free(rsc_info->class); free(rsc_info->provider); free(rsc_info); } static lrmd_rsc_info_t * lrmd_api_get_rsc_info(lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options) { lrmd_rsc_info_t *rsc_info = NULL; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); xmlNode *output = NULL; const char *class = NULL; const char *provider = NULL; const char *type = NULL; crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); lrmd_send_command(lrmd, LRMD_OP_RSC_INFO, data, &output, 0, options, TRUE); free_xml(data); if (!output) { return NULL; } class = crm_element_value(output, F_LRMD_CLASS); provider = crm_element_value(output, F_LRMD_PROVIDER); type = crm_element_value(output, F_LRMD_TYPE); if (!class || !type) { free_xml(output); return NULL; } else if (safe_str_eq(class, PCMK_RESOURCE_CLASS_OCF) && !provider) { free_xml(output); return NULL; } rsc_info = calloc(1, sizeof(lrmd_rsc_info_t)); rsc_info->id = strdup(rsc_id); rsc_info->class = strdup(class); if (provider) { rsc_info->provider = strdup(provider); } rsc_info->type = strdup(type); free_xml(output); return rsc_info; } static void lrmd_api_set_callback(lrmd_t * lrmd, lrmd_event_callback callback) { lrmd_private_t *native = lrmd->private; native->callback = callback; } void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg)) { lrmd_private_t *native = lrmd->private; native->proxy_callback = callback; native->proxy_callback_userdata = userdata; } void lrmd_internal_proxy_dispatch(lrmd_t *lrmd, xmlNode *msg) { lrmd_private_t *native = lrmd->private; if (native->proxy_callback) { crm_log_xml_trace(msg, "PROXY_INBOUND"); native->proxy_callback(lrmd, native->proxy_callback_userdata, msg); } } int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg) { if (lrmd == NULL) { return -ENOTCONN; } crm_xml_add(msg, F_LRMD_OPERATION, CRM_OP_IPC_FWD); crm_log_xml_trace(msg, "PROXY_OUTBOUND"); return lrmd_send_xml_no_reply(lrmd, msg); } static int stonith_get_metadata(const char *provider, const char *type, char **output) { int rc = pcmk_ok; stonith_t *stonith_api = stonith_api_new(); if(stonith_api) { stonith_api->cmds->metadata(stonith_api, st_opt_sync_call, type, provider, output, 0); stonith_api->cmds->free(stonith_api); } if (*output == NULL) { rc = -EIO; } return rc; } #define lsb_metadata_template \ "\n" \ "\n" \ "\n" \ " 1.0\n" \ " \n" \ " %s\n" \ " \n" \ " %s\n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " %s\n" \ " %s\n" \ " %s\n" \ " %s\n" \ " %s\n" \ " %s\n" \ " %s\n" \ " \n" \ "\n" #define LSB_INITSCRIPT_INFOBEGIN_TAG "### BEGIN INIT INFO" #define LSB_INITSCRIPT_INFOEND_TAG "### END INIT INFO" #define PROVIDES "# Provides:" #define REQ_START "# Required-Start:" #define REQ_STOP "# Required-Stop:" #define SHLD_START "# Should-Start:" #define SHLD_STOP "# Should-Stop:" #define DFLT_START "# Default-Start:" #define DFLT_STOP "# Default-Stop:" #define SHORT_DSCR "# Short-Description:" #define DESCRIPTION "# Description:" #define lsb_meta_helper_free_value(m) \ do { \ if ((m) != NULL) { \ xmlFree(m); \ (m) = NULL; \ } \ } while(0) /*! * \internal * \brief Grab an LSB header value * * \param[in] line Line read from LSB init script * \param[in/out] value If not set, will be set to XML-safe copy of value * \param[in] prefix Set value if line starts with this pattern * * \return TRUE if value was set, FALSE otherwise */ static inline gboolean lsb_meta_helper_get_value(const char *line, char **value, const char *prefix) { if (!*value && !strncasecmp(line, prefix, strlen(prefix))) { *value = (char *)xmlEncodeEntitiesReentrant(NULL, BAD_CAST line+strlen(prefix)); return TRUE; } return FALSE; } static int lsb_get_metadata(const char *type, char **output) { char ra_pathname[PATH_MAX] = { 0, }; FILE *fp; char buffer[1024]; char *provides = NULL; char *req_start = NULL; char *req_stop = NULL; char *shld_start = NULL; char *shld_stop = NULL; char *dflt_start = NULL; char *dflt_stop = NULL; char *s_dscrpt = NULL; char *xml_l_dscrpt = NULL; int offset = 0; int max = 2048; char description[max]; if(type[0] == '/') { snprintf(ra_pathname, sizeof(ra_pathname), "%s", type); } else { snprintf(ra_pathname, sizeof(ra_pathname), "%s/%s", LSB_ROOT_DIR, type); } crm_trace("Looking into %s", ra_pathname); if (!(fp = fopen(ra_pathname, "r"))) { return -errno; } /* Enter into the lsb-compliant comment block */ while (fgets(buffer, sizeof(buffer), fp)) { /* Now suppose each of the following eight arguments contain only one line */ if (lsb_meta_helper_get_value(buffer, &provides, PROVIDES)) { continue; } if (lsb_meta_helper_get_value(buffer, &req_start, REQ_START)) { continue; } if (lsb_meta_helper_get_value(buffer, &req_stop, REQ_STOP)) { continue; } if (lsb_meta_helper_get_value(buffer, &shld_start, SHLD_START)) { continue; } if (lsb_meta_helper_get_value(buffer, &shld_stop, SHLD_STOP)) { continue; } if (lsb_meta_helper_get_value(buffer, &dflt_start, DFLT_START)) { continue; } if (lsb_meta_helper_get_value(buffer, &dflt_stop, DFLT_STOP)) { continue; } if (lsb_meta_helper_get_value(buffer, &s_dscrpt, SHORT_DSCR)) { continue; } /* Long description may cross multiple lines */ if (offset == 0 && (0 == strncasecmp(buffer, DESCRIPTION, strlen(DESCRIPTION)))) { /* Between # and keyword, more than one space, or a tab * character, indicates the continuation line. * * Extracted from LSB init script standard */ while (fgets(buffer, sizeof(buffer), fp)) { if (!strncmp(buffer, "# ", 3) || !strncmp(buffer, "#\t", 2)) { buffer[0] = ' '; offset += snprintf(description+offset, max-offset, "%s", buffer); } else { fputs(buffer, fp); break; /* Long description ends */ } } continue; } if (xml_l_dscrpt == NULL && offset > 0) { xml_l_dscrpt = (char *)xmlEncodeEntitiesReentrant(NULL, BAD_CAST(description)); } if (!strncasecmp(buffer, LSB_INITSCRIPT_INFOEND_TAG, strlen(LSB_INITSCRIPT_INFOEND_TAG))) { /* Get to the out border of LSB comment block */ break; } if (buffer[0] != '#') { break; /* Out of comment block in the beginning */ } } fclose(fp); *output = crm_strdup_printf(lsb_metadata_template, type, (xml_l_dscrpt == NULL) ? type : xml_l_dscrpt, (s_dscrpt == NULL) ? type : s_dscrpt, (provides == NULL) ? "" : provides, (req_start == NULL) ? "" : req_start, (req_stop == NULL) ? "" : req_stop, (shld_start == NULL) ? "" : shld_start, (shld_stop == NULL) ? "" : shld_stop, (dflt_start == NULL) ? "" : dflt_start, (dflt_stop == NULL) ? "" : dflt_stop); lsb_meta_helper_free_value(xml_l_dscrpt); lsb_meta_helper_free_value(s_dscrpt); lsb_meta_helper_free_value(provides); lsb_meta_helper_free_value(req_start); lsb_meta_helper_free_value(req_stop); lsb_meta_helper_free_value(shld_start); lsb_meta_helper_free_value(shld_stop); lsb_meta_helper_free_value(dflt_start); lsb_meta_helper_free_value(dflt_stop); crm_trace("Created fake metadata: %llu", (unsigned long long) strlen(*output)); return pcmk_ok; } #if SUPPORT_NAGIOS static int nagios_get_metadata(const char *type, char **output) { int rc = pcmk_ok; FILE *file_strm = NULL; int start = 0, length = 0, read_len = 0; char *metadata_file = NULL; int len = 36; len += strlen(NAGIOS_METADATA_DIR); len += strlen(type); metadata_file = calloc(1, len); CRM_CHECK(metadata_file != NULL, return -ENOMEM); sprintf(metadata_file, "%s/%s.xml", NAGIOS_METADATA_DIR, type); file_strm = fopen(metadata_file, "r"); if (file_strm == NULL) { crm_err("Metadata file %s does not exist", metadata_file); free(metadata_file); return -EIO; } /* see how big the file is */ start = ftell(file_strm); fseek(file_strm, 0L, SEEK_END); length = ftell(file_strm); fseek(file_strm, 0L, start); CRM_ASSERT(length >= 0); CRM_ASSERT(start == ftell(file_strm)); if (length <= 0) { crm_info("%s was not valid", metadata_file); free(*output); *output = NULL; rc = -EIO; } else { crm_trace("Reading %d bytes from file", length); *output = calloc(1, (length + 1)); read_len = fread(*output, 1, length, file_strm); if (read_len != length) { crm_err("Calculated and read bytes differ: %d vs. %d", length, read_len); free(*output); *output = NULL; rc = -EIO; } } fclose(file_strm); free(metadata_file); return rc; } #endif #if SUPPORT_HEARTBEAT /* strictly speaking, support for class=heartbeat style scripts * does not require "heartbeat support" to be enabled. * But since those scripts are part of the "heartbeat" package usually, * and are very unlikely to be present in any other deployment, * I leave it inside this ifdef. * * Yes, I know, these are legacy and should die, * or at least be rewritten to be a proper OCF style agent. * But they exist, and custom scripts following these rules do, too. * * Taken from the old "glue" lrmd, see * http://hg.linux-ha.org/glue/file/0a7add1d9996/lib/plugins/lrm/raexechb.c#l49 * http://hg.linux-ha.org/glue/file/0a7add1d9996/lib/plugins/lrm/raexechb.c#l393 */ static const char hb_metadata_template[] = "\n" "\n" "\n" "1.0\n" "\n" "%s" "\n" "%s\n" "\n" "\n" "\n" "This argument will be passed as the first argument to the " "heartbeat resource agent (assuming it supports one)\n" "\n" "argv[1]\n" "\n" "\n" "\n" "\n" "This argument will be passed as the second argument to the " "heartbeat resource agent (assuming it supports one)\n" "\n" "argv[2]\n" "\n" "\n" "\n" "\n" "This argument will be passed as the third argument to the " "heartbeat resource agent (assuming it supports one)\n" "\n" "argv[3]\n" "\n" "\n" "\n" "\n" "This argument will be passed as the fourth argument to the " "heartbeat resource agent (assuming it supports one)\n" "\n" "argv[4]\n" "\n" "\n" "\n" "\n" "This argument will be passed as the fifth argument to the " "heartbeat resource agent (assuming it supports one)\n" "\n" "argv[5]\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n"; static int heartbeat_get_metadata(const char *type, char **output) { *output = crm_strdup_printf(hb_metadata_template, type, type, type); crm_trace("Created fake metadata: %llu", (unsigned long long) strlen(*output)); return pcmk_ok; } #endif static int generic_get_metadata(const char *standard, const char *provider, const char *type, char **output) { svc_action_t *action; action = resources_action_create(type, standard, provider, type, "meta-data", 0, 30000, NULL, 0); if (action == NULL) { crm_err("Unable to retrieve meta-data for %s:%s:%s", standard, provider, type); services_action_free(action); return -EINVAL; } if (!(services_action_sync(action))) { crm_err("Failed to retrieve meta-data for %s:%s:%s", standard, provider, type); services_action_free(action); return -EIO; } if (!action->stdout_data) { crm_err("Failed to receive meta-data for %s:%s:%s", standard, provider, type); services_action_free(action); return -EIO; } *output = strdup(action->stdout_data); services_action_free(action); return pcmk_ok; } static int lrmd_api_get_metadata(lrmd_t * lrmd, const char *class, const char *provider, const char *type, char **output, enum lrmd_call_options options) { if (!class || !type) { return -EINVAL; } if (safe_str_eq(class, PCMK_RESOURCE_CLASS_SERVICE)) { class = resources_find_service_class(type); } if (safe_str_eq(class, PCMK_RESOURCE_CLASS_STONITH)) { return stonith_get_metadata(provider, type, output); } else if (safe_str_eq(class, PCMK_RESOURCE_CLASS_LSB)) { return lsb_get_metadata(type, output); #if SUPPORT_NAGIOS } else if (safe_str_eq(class, PCMK_RESOURCE_CLASS_NAGIOS)) { return nagios_get_metadata(type, output); #endif #if SUPPORT_HEARTBEAT } else if (safe_str_eq(class, PCMK_RESOURCE_CLASS_HB)) { return heartbeat_get_metadata(type, output); #endif } return generic_get_metadata(class, provider, type, output); } static int lrmd_api_exec(lrmd_t * lrmd, const char *rsc_id, const char *action, const char *userdata, int interval, /* ms */ int timeout, /* ms */ int start_delay, /* ms */ enum lrmd_call_options options, lrmd_key_value_t * params) { int rc = pcmk_ok; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); xmlNode *args = create_xml_node(data, XML_TAG_ATTRS); lrmd_key_value_t *tmp = NULL; crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); crm_xml_add(data, F_LRMD_RSC_ACTION, action); crm_xml_add(data, F_LRMD_RSC_USERDATA_STR, userdata); crm_xml_add_int(data, F_LRMD_RSC_INTERVAL, interval); crm_xml_add_int(data, F_LRMD_TIMEOUT, timeout); crm_xml_add_int(data, F_LRMD_RSC_START_DELAY, start_delay); for (tmp = params; tmp; tmp = tmp->next) { hash2smartfield((gpointer) tmp->key, (gpointer) tmp->value, args); } rc = lrmd_send_command(lrmd, LRMD_OP_RSC_EXEC, data, NULL, timeout, options, TRUE); free_xml(data); lrmd_key_value_freeall(params); return rc; } +static int +lrmd_api_exec_alert(lrmd_t * lrmd, const char *alert_id, + int timeout, /* ms */ + enum lrmd_call_options options, lrmd_key_value_t * params) +{ + int rc = pcmk_ok; + xmlNode *data = create_xml_node(NULL, F_LRMD_ALERT); + xmlNode *args = create_xml_node(data, XML_TAG_ATTRS); + lrmd_key_value_t *tmp = NULL; + + crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); + crm_xml_add(data, F_LRMD_ALERT_ID, alert_id); + crm_xml_add_int(data, F_LRMD_TIMEOUT, timeout); + + for (tmp = params; tmp; tmp = tmp->next) { + hash2smartfield((gpointer) tmp->key, (gpointer) tmp->value, args); + } + + rc = lrmd_send_command(lrmd, LRMD_OP_ALERT_EXEC, data, NULL, timeout, options, TRUE); + free_xml(data); + + lrmd_key_value_freeall(params); + return rc; +} + static int lrmd_api_cancel(lrmd_t * lrmd, const char *rsc_id, const char *action, int interval) { int rc = pcmk_ok; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add(data, F_LRMD_RSC_ACTION, action); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); crm_xml_add_int(data, F_LRMD_RSC_INTERVAL, interval); rc = lrmd_send_command(lrmd, LRMD_OP_RSC_CANCEL, data, NULL, 0, 0, TRUE); free_xml(data); return rc; } static int list_stonith_agents(lrmd_list_t ** resources) { int rc = 0; stonith_t *stonith_api = stonith_api_new(); stonith_key_value_t *stonith_resources = NULL; stonith_key_value_t *dIter = NULL; if(stonith_api) { stonith_api->cmds->list_agents(stonith_api, st_opt_sync_call, NULL, &stonith_resources, 0); stonith_api->cmds->free(stonith_api); } for (dIter = stonith_resources; dIter; dIter = dIter->next) { rc++; if (resources) { *resources = lrmd_list_add(*resources, dIter->value); } } stonith_key_value_freeall(stonith_resources, 1, 0); return rc; } static int lrmd_api_list_agents(lrmd_t * lrmd, lrmd_list_t ** resources, const char *class, const char *provider) { int rc = 0; if (safe_str_eq(class, PCMK_RESOURCE_CLASS_STONITH)) { rc += list_stonith_agents(resources); } else { GListPtr gIter = NULL; GList *agents = resources_list_agents(class, provider); for (gIter = agents; gIter != NULL; gIter = gIter->next) { *resources = lrmd_list_add(*resources, (const char *)gIter->data); rc++; } g_list_free_full(agents, free); if (!class) { rc += list_stonith_agents(resources); } } if (rc == 0) { crm_notice("No agents found for class %s", class); rc = -EPROTONOSUPPORT; } return rc; } static int does_provider_have_agent(const char *agent, const char *provider, const char *class) { int found = 0; GList *agents = NULL; GListPtr gIter2 = NULL; agents = resources_list_agents(class, provider); for (gIter2 = agents; gIter2 != NULL; gIter2 = gIter2->next) { if (safe_str_eq(agent, gIter2->data)) { found = 1; } } g_list_free_full(agents, free); return found; } static int lrmd_api_list_ocf_providers(lrmd_t * lrmd, const char *agent, lrmd_list_t ** providers) { int rc = pcmk_ok; char *provider = NULL; GList *ocf_providers = NULL; GListPtr gIter = NULL; ocf_providers = resources_list_providers(PCMK_RESOURCE_CLASS_OCF); for (gIter = ocf_providers; gIter != NULL; gIter = gIter->next) { provider = gIter->data; if (!agent || does_provider_have_agent(agent, provider, PCMK_RESOURCE_CLASS_OCF)) { *providers = lrmd_list_add(*providers, (const char *)gIter->data); rc++; } } g_list_free_full(ocf_providers, free); return rc; } static int lrmd_api_list_standards(lrmd_t * lrmd, lrmd_list_t ** supported) { int rc = 0; GList *standards = NULL; GListPtr gIter = NULL; standards = resources_list_standards(); for (gIter = standards; gIter != NULL; gIter = gIter->next) { *supported = lrmd_list_add(*supported, (const char *)gIter->data); rc++; } if (list_stonith_agents(NULL) > 0) { *supported = lrmd_list_add(*supported, PCMK_RESOURCE_CLASS_STONITH); rc++; } g_list_free_full(standards, free); return rc; } lrmd_t * lrmd_api_new(void) { lrmd_t *new_lrmd = NULL; lrmd_private_t *pvt = NULL; new_lrmd = calloc(1, sizeof(lrmd_t)); pvt = calloc(1, sizeof(lrmd_private_t)); pvt->remote = calloc(1, sizeof(crm_remote_t)); new_lrmd->cmds = calloc(1, sizeof(lrmd_api_operations_t)); pvt->type = CRM_CLIENT_IPC; new_lrmd->private = pvt; new_lrmd->cmds->connect = lrmd_api_connect; new_lrmd->cmds->connect_async = lrmd_api_connect_async; new_lrmd->cmds->is_connected = lrmd_api_is_connected; new_lrmd->cmds->poke_connection = lrmd_api_poke_connection; new_lrmd->cmds->disconnect = lrmd_api_disconnect; new_lrmd->cmds->register_rsc = lrmd_api_register_rsc; new_lrmd->cmds->unregister_rsc = lrmd_api_unregister_rsc; new_lrmd->cmds->get_rsc_info = lrmd_api_get_rsc_info; new_lrmd->cmds->set_callback = lrmd_api_set_callback; new_lrmd->cmds->get_metadata = lrmd_api_get_metadata; new_lrmd->cmds->exec = lrmd_api_exec; new_lrmd->cmds->cancel = lrmd_api_cancel; new_lrmd->cmds->list_agents = lrmd_api_list_agents; new_lrmd->cmds->list_ocf_providers = lrmd_api_list_ocf_providers; new_lrmd->cmds->list_standards = lrmd_api_list_standards; + new_lrmd->cmds->exec_alert = lrmd_api_exec_alert; return new_lrmd; } lrmd_t * lrmd_remote_api_new(const char *nodename, const char *server, int port) { #ifdef HAVE_GNUTLS_GNUTLS_H lrmd_t *new_lrmd = lrmd_api_new(); lrmd_private_t *native = new_lrmd->private; if (!nodename && !server) { lrmd_api_delete(new_lrmd); return NULL; } native->type = CRM_CLIENT_TLS; native->remote_nodename = nodename ? strdup(nodename) : strdup(server); native->server = server ? strdup(server) : strdup(nodename); native->port = port; if (native->port == 0) { const char *remote_port_str = getenv("PCMK_remote_port"); native->port = remote_port_str ? atoi(remote_port_str) : DEFAULT_REMOTE_PORT; } return new_lrmd; #else crm_err("GNUTLS is not enabled for this build, remote LRMD client can not be created"); return NULL; #endif } void lrmd_api_delete(lrmd_t * lrmd) { if (!lrmd) { return; } lrmd->cmds->disconnect(lrmd); /* no-op if already disconnected */ free(lrmd->cmds); if (lrmd->private) { lrmd_private_t *native = lrmd->private; #ifdef HAVE_GNUTLS_GNUTLS_H free(native->server); #endif free(native->remote_nodename); free(native->remote); free(native->token); free(native->peer_version); } free(lrmd->private); free(lrmd); } diff --git a/lib/services/services.c b/lib/services/services.c index 2765e437bd..1a031e044d 100644 --- a/lib/services/services.c +++ b/lib/services/services.c @@ -1,946 +1,951 @@ /* * Copyright (C) 2010-2016 Andrew Beekhof * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include #include #include #include "services_private.h" #if SUPPORT_UPSTART # include #endif #if SUPPORT_SYSTEMD # include #endif /* TODO: Develop a rollover strategy */ static int operations = 0; static GHashTable *recurring_actions = NULL; /* ops waiting to run async because of conflicting active * pending ops */ static GList *blocked_ops = NULL; /* ops currently active (in-flight) */ static GList *inflight_ops = NULL; static void handle_blocked_ops(void); svc_action_t * services_action_create(const char *name, const char *action, int interval, int timeout) { return resources_action_create(name, PCMK_RESOURCE_CLASS_LSB, NULL, name, action, interval, timeout, NULL, 0); } const char * resources_find_service_class(const char *agent) { /* Priority is: * - lsb * - systemd * - upstart */ int rc = 0; struct stat st; char *path = NULL; #ifdef LSB_ROOT_DIR rc = asprintf(&path, "%s/%s", LSB_ROOT_DIR, agent); if (rc > 0 && stat(path, &st) == 0) { free(path); return PCMK_RESOURCE_CLASS_LSB; } free(path); #endif #if SUPPORT_SYSTEMD if (systemd_unit_exists(agent)) { return PCMK_RESOURCE_CLASS_SYSTEMD; } #endif #if SUPPORT_UPSTART if (upstart_job_exists(agent)) { return PCMK_RESOURCE_CLASS_UPSTART; } #endif return NULL; } static inline void init_recurring_actions(void) { if (recurring_actions == NULL) { recurring_actions = g_hash_table_new_full(g_str_hash, g_str_equal, NULL, NULL); } } /*! * \internal * \brief Check whether op is in-flight systemd or upstart op * * \param[in] op Operation to check * * \return TRUE if op is in-flight systemd or upstart op */ static inline gboolean inflight_systemd_or_upstart(svc_action_t *op) { return (safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_SYSTEMD) || safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_UPSTART)) && (g_list_find(inflight_ops, op) != NULL); } /*! * \internal * \brief Expand "service" alias to an actual resource class * * \param[in] rsc Resource name (for logging only) * \param[in] standard Resource class as configured * \param[in] agent Agent name to look for * * \return Newly allocated string with actual resource class * * \note The caller is responsible for calling free() on the result. */ static char * expand_resource_class(const char *rsc, const char *standard, const char *agent) { char *expanded_class = NULL; if (strcasecmp(standard, PCMK_RESOURCE_CLASS_SERVICE) == 0) { const char *found_class = resources_find_service_class(agent); if (found_class) { crm_debug("Found %s agent %s for %s", found_class, agent, rsc); expanded_class = strdup(found_class); } else { crm_info("Assuming resource class lsb for agent %s for %s", agent, rsc); expanded_class = strdup(PCMK_RESOURCE_CLASS_LSB); } } else { expanded_class = strdup(standard); } CRM_ASSERT(expanded_class); return expanded_class; } svc_action_t * resources_action_create(const char *name, const char *standard, const char *provider, const char *agent, const char *action, int interval, int timeout, GHashTable * params, enum svc_action_flags flags) { svc_action_t *op = NULL; /* * Do some up front sanity checks before we go off and * build the svc_action_t instance. */ if (crm_strlen_zero(name)) { crm_err("Cannot create operation without resource name"); goto return_error; } if (crm_strlen_zero(standard)) { crm_err("Cannot create operation for %s without resource class", name); goto return_error; } if (!strcasecmp(standard, PCMK_RESOURCE_CLASS_OCF) && crm_strlen_zero(provider)) { crm_err("Cannot create OCF operation for %s without provider", name); goto return_error; } if (crm_strlen_zero(agent)) { crm_err("Cannot create operation for %s without agent name", name); goto return_error; } if (crm_strlen_zero(action)) { crm_err("Cannot create operation for %s without operation name", name); goto return_error; } /* * Sanity checks passed, proceed! */ op = calloc(1, sizeof(svc_action_t)); op->opaque = calloc(1, sizeof(svc_action_private_t)); op->rsc = strdup(name); op->interval = interval; op->timeout = timeout; op->standard = expand_resource_class(name, standard, agent); op->agent = strdup(agent); op->sequence = ++operations; op->flags = flags; op->id = generate_op_key(name, action, interval); if (safe_str_eq(action, "monitor") && ( #if SUPPORT_HEARTBEAT safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_HB) || #endif safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_LSB))) { action = "status"; } op->action = strdup(action); if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_OCF) == 0) { op->provider = strdup(provider); op->params = params; params = NULL; if (asprintf(&op->opaque->exec, "%s/resource.d/%s/%s", OCF_ROOT_DIR, provider, agent) == -1) { crm_err("Internal error: cannot create agent path"); goto return_error; } op->opaque->args[0] = strdup(op->opaque->exec); op->opaque->args[1] = strdup(action); } else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_LSB) == 0) { if (op->agent[0] == '/') { /* if given an absolute path, use that instead * of tacking on the LSB_ROOT_DIR path to the front */ op->opaque->exec = strdup(op->agent); } else if (asprintf(&op->opaque->exec, "%s/%s", LSB_ROOT_DIR, op->agent) == -1) { crm_err("Internal error: cannot create agent path"); goto return_error; } op->opaque->args[0] = strdup(op->opaque->exec); op->opaque->args[1] = strdup(op->action); op->opaque->args[2] = NULL; #if SUPPORT_HEARTBEAT } else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_HB) == 0) { int index; int param_num; char buf_tmp[20]; void *value_tmp; if (op->agent[0] == '/') { /* if given an absolute path, use that instead * of tacking on the HB_RA_DIR path to the front */ op->opaque->exec = strdup(op->agent); } else if (asprintf(&op->opaque->exec, "%s/%s", HB_RA_DIR, op->agent) == -1) { crm_err("Internal error: cannot create agent path"); goto return_error; } op->opaque->args[0] = strdup(op->opaque->exec); /* The "heartbeat" agent class only has positional arguments, * which we keyed by their decimal position number. */ param_num = 1; for (index = 1; index <= MAX_ARGC - 3; index++ ) { snprintf(buf_tmp, sizeof(buf_tmp), "%d", index); value_tmp = g_hash_table_lookup(params, buf_tmp); if (value_tmp == NULL) { /* maybe: strdup("") ?? * But the old lrmd did simply continue as well. */ continue; } op->opaque->args[param_num++] = strdup(value_tmp); } /* Add operation code as the last argument, */ /* and the teminating NULL pointer */ op->opaque->args[param_num++] = strdup(op->action); op->opaque->args[param_num] = NULL; #endif #if SUPPORT_SYSTEMD } else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_SYSTEMD) == 0) { op->opaque->exec = strdup("systemd-dbus"); #endif #if SUPPORT_UPSTART } else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_UPSTART) == 0) { op->opaque->exec = strdup("upstart-dbus"); #endif #if SUPPORT_NAGIOS } else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_NAGIOS) == 0) { int index = 0; if (op->agent[0] == '/') { /* if given an absolute path, use that instead * of tacking on the NAGIOS_PLUGIN_DIR path to the front */ op->opaque->exec = strdup(op->agent); } else if (asprintf(&op->opaque->exec, "%s/%s", NAGIOS_PLUGIN_DIR, op->agent) == -1) { crm_err("Internal error: cannot create agent path"); goto return_error; } op->opaque->args[0] = strdup(op->opaque->exec); index = 1; if (safe_str_eq(op->action, "monitor") && op->interval == 0) { /* Invoke --version for a nagios probe */ op->opaque->args[index] = strdup("--version"); index++; } else if (params) { GHashTableIter iter; char *key = NULL; char *value = NULL; static int args_size = sizeof(op->opaque->args) / sizeof(char *); g_hash_table_iter_init(&iter, params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value) && index <= args_size - 3) { int len = 3; char *long_opt = NULL; if (safe_str_eq(key, XML_ATTR_CRM_VERSION) || strstr(key, CRM_META "_")) { continue; } len += strlen(key); long_opt = calloc(1, len); sprintf(long_opt, "--%s", key); long_opt[len - 1] = 0; op->opaque->args[index] = long_opt; op->opaque->args[index + 1] = strdup(value); index += 2; } } op->opaque->args[index] = NULL; #endif } else { crm_err("Unknown resource standard: %s", op->standard); services_action_free(op); op = NULL; } if(params) { g_hash_table_destroy(params); } return op; return_error: if(params) { g_hash_table_destroy(params); } services_action_free(op); return NULL; } svc_action_t * services_action_create_generic(const char *exec, const char *args[]) { svc_action_t *op; unsigned int cur_arg; op = calloc(1, sizeof(*op)); op->opaque = calloc(1, sizeof(svc_action_private_t)); op->opaque->exec = strdup(exec); op->opaque->args[0] = strdup(exec); for (cur_arg = 1; args && args[cur_arg - 1]; cur_arg++) { op->opaque->args[cur_arg] = strdup(args[cur_arg - 1]); if (cur_arg == DIMOF(op->opaque->args) - 1) { crm_err("svc_action_t args list not long enough for '%s' execution request.", exec); break; } } return op; } #if SUPPORT_DBUS /*! * \internal * \brief Update operation's pending DBus call, unreferencing old one if needed * * \param[in,out] op Operation to modify * \param[in] pending Pending call to set */ void services_set_op_pending(svc_action_t *op, DBusPendingCall *pending) { if (op->opaque->pending && (op->opaque->pending != pending)) { if (pending) { crm_info("Lost pending %s DBus call (%p)", op->id, op->opaque->pending); } else { crm_trace("Done with pending %s DBus call (%p)", op->id, op->opaque->pending); } dbus_pending_call_unref(op->opaque->pending); } op->opaque->pending = pending; if (pending) { crm_trace("Updated pending %s DBus call (%p)", op->id, pending); } else { crm_trace("Cleared pending %s DBus call", op->id); } } #endif void services_action_cleanup(svc_action_t * op) { if(op->opaque == NULL) { return; } #if SUPPORT_DBUS if(op->opaque->timerid != 0) { crm_trace("Removing timer for call %s to %s", op->action, op->rsc); g_source_remove(op->opaque->timerid); op->opaque->timerid = 0; } if(op->opaque->pending) { crm_trace("Cleaning up pending dbus call %p %s for %s", op->opaque->pending, op->action, op->rsc); if(dbus_pending_call_get_completed(op->opaque->pending)) { crm_warn("Pending dbus call %s for %s did not complete", op->action, op->rsc); } dbus_pending_call_cancel(op->opaque->pending); dbus_pending_call_unref(op->opaque->pending); op->opaque->pending = NULL; } #endif if (op->opaque->stderr_gsource) { mainloop_del_fd(op->opaque->stderr_gsource); op->opaque->stderr_gsource = NULL; } if (op->opaque->stdout_gsource) { mainloop_del_fd(op->opaque->stdout_gsource); op->opaque->stdout_gsource = NULL; } } void services_action_free(svc_action_t * op) { unsigned int i; if (op == NULL) { return; } /* The operation should be removed from all tracking lists by this point. * If it's not, we have a bug somewhere, so bail. That may lead to a * memory leak, but it's better than a use-after-free segmentation fault. */ CRM_CHECK(g_list_find(inflight_ops, op) == NULL, return); CRM_CHECK(g_list_find(blocked_ops, op) == NULL, return); CRM_CHECK((recurring_actions == NULL) || (g_hash_table_lookup(recurring_actions, op->id) == NULL), return); services_action_cleanup(op); if (op->opaque->repeat_timer) { g_source_remove(op->opaque->repeat_timer); op->opaque->repeat_timer = 0; } free(op->id); free(op->opaque->exec); for (i = 0; i < DIMOF(op->opaque->args); i++) { free(op->opaque->args[i]); } free(op->opaque); free(op->rsc); free(op->action); free(op->standard); free(op->agent); free(op->provider); free(op->stdout_data); free(op->stderr_data); if (op->params) { g_hash_table_destroy(op->params); op->params = NULL; } + if (op->alert_params) { + g_hash_table_destroy(op->alert_params); + op->alert_params = NULL; + } + free(op); } gboolean cancel_recurring_action(svc_action_t * op) { crm_info("Cancelling %s operation %s", op->standard, op->id); if (recurring_actions) { g_hash_table_remove(recurring_actions, op->id); } if (op->opaque->repeat_timer) { g_source_remove(op->opaque->repeat_timer); op->opaque->repeat_timer = 0; } return TRUE; } /*! * \brief Cancel a recurring action * * \param[in] name Name of resource that operation is for * \param[in] action Name of operation to cancel * \param[in] interval Interval of operation to cancel * * \return TRUE if action was successfully cancelled, FALSE otherwise */ gboolean services_action_cancel(const char *name, const char *action, int interval) { gboolean cancelled = FALSE; char *id = generate_op_key(name, action, interval); svc_action_t *op = NULL; /* We can only cancel a recurring action */ init_recurring_actions(); op = g_hash_table_lookup(recurring_actions, id); if (op == NULL) { goto done; } /* Tell operation_finalize() not to reschedule the operation */ op->cancel = TRUE; /* Stop tracking it as a recurring operation, and stop its timer */ cancel_recurring_action(op); /* If the op has a PID, it's an in-flight child process, so kill it. * * Whether the kill succeeds or fails, the main loop will send the op to * operation_finished() (and thus operation_finalize()) when the process * goes away. */ if (op->pid != 0) { crm_info("Terminating in-flight op %s (pid %d) early because it was cancelled", id, op->pid); cancelled = mainloop_child_kill(op->pid); if (cancelled == FALSE) { crm_err("Termination of %s (pid %d) failed", id, op->pid); } goto done; } /* In-flight systemd and upstart ops don't have a pid. The relevant handlers * will call operation_finalize() when the operation completes. * @TODO: Can we request early termination, maybe using * dbus_pending_call_cancel()? */ if (inflight_systemd_or_upstart(op)) { crm_info("Will cancel %s op %s when in-flight instance completes", op->standard, op->id); cancelled = FALSE; goto done; } /* Otherwise, operation is not in-flight, just report as cancelled */ op->status = PCMK_LRM_OP_CANCELLED; if (op->opaque->callback) { op->opaque->callback(op); } blocked_ops = g_list_remove(blocked_ops, op); services_action_free(op); cancelled = TRUE; done: free(id); return cancelled; } gboolean services_action_kick(const char *name, const char *action, int interval /* ms */) { svc_action_t * op = NULL; char *id = generate_op_key(name, action, interval); init_recurring_actions(); op = g_hash_table_lookup(recurring_actions, id); free(id); if (op == NULL) { return FALSE; } if (op->pid || inflight_systemd_or_upstart(op)) { return TRUE; } else { if (op->opaque->repeat_timer) { g_source_remove(op->opaque->repeat_timer); op->opaque->repeat_timer = 0; } recurring_action_timer(op); return TRUE; } } /*! * \internal * \brief Add a new recurring operation, checking for duplicates * * \param[in] op Operation to add * * \return TRUE if duplicate found (and reschedule), FALSE otherwise */ static gboolean handle_duplicate_recurring(svc_action_t * op) { svc_action_t * dup = NULL; /* check for duplicates */ dup = g_hash_table_lookup(recurring_actions, op->id); if (dup && (dup != op)) { /* update user data */ if (op->opaque->callback) { dup->opaque->callback = op->opaque->callback; dup->cb_data = op->cb_data; op->cb_data = NULL; } /* immediately execute the next interval */ if (dup->pid != 0) { if (op->opaque->repeat_timer) { g_source_remove(op->opaque->repeat_timer); op->opaque->repeat_timer = 0; } recurring_action_timer(dup); } /* free the duplicate */ services_action_free(op); return TRUE; } return FALSE; } inline static gboolean action_exec_helper(svc_action_t * op) { /* Whether a/synchronous must be decided (op->synchronous) beforehand. */ if (op->standard && (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_UPSTART) == 0)) { #if SUPPORT_UPSTART return upstart_job_exec(op); #endif } else if (op->standard && strcasecmp(op->standard, PCMK_RESOURCE_CLASS_SYSTEMD) == 0) { #if SUPPORT_SYSTEMD return systemd_unit_exec(op); #endif } else { return services_os_action_execute(op); } /* The 'op' has probably been freed if the execution functions return TRUE for the asynchronous 'op'. */ /* Avoid using the 'op' in here. */ return FALSE; } void services_add_inflight_op(svc_action_t * op) { if (op == NULL) { return; } CRM_ASSERT(op->synchronous == FALSE); /* keep track of ops that are in-flight to avoid collisions in the same namespace */ if (op->rsc) { inflight_ops = g_list_append(inflight_ops, op); } } /*! * \internal * \brief Stop tracking an operation that completed * * \param[in] op Operation to stop tracking */ void services_untrack_op(svc_action_t *op) { /* Op is no longer in-flight or blocked */ inflight_ops = g_list_remove(inflight_ops, op); blocked_ops = g_list_remove(blocked_ops, op); /* Op is no longer blocking other ops, so check if any need to run */ handle_blocked_ops(); } gboolean services_action_async(svc_action_t * op, void (*action_callback) (svc_action_t *)) { op->synchronous = false; if (action_callback) { op->opaque->callback = action_callback; } if (op->interval > 0) { init_recurring_actions(); if (handle_duplicate_recurring(op) == TRUE) { /* entry rescheduled, dup freed */ /* exit early */ return TRUE; } g_hash_table_replace(recurring_actions, op->id, op); } if (op->rsc && is_op_blocked(op->rsc)) { blocked_ops = g_list_append(blocked_ops, op); return TRUE; } return action_exec_helper(op); } static gboolean processing_blocked_ops = FALSE; gboolean is_op_blocked(const char *rsc) { GList *gIter = NULL; svc_action_t *op = NULL; for (gIter = inflight_ops; gIter != NULL; gIter = gIter->next) { op = gIter->data; if (safe_str_eq(op->rsc, rsc)) { return TRUE; } } return FALSE; } static void handle_blocked_ops(void) { GList *executed_ops = NULL; GList *gIter = NULL; svc_action_t *op = NULL; gboolean res = FALSE; if (processing_blocked_ops) { /* avoid nested calling of this function */ return; } processing_blocked_ops = TRUE; /* n^2 operation here, but blocked ops are incredibly rare. this list * will be empty 99% of the time. */ for (gIter = blocked_ops; gIter != NULL; gIter = gIter->next) { op = gIter->data; if (is_op_blocked(op->rsc)) { continue; } executed_ops = g_list_append(executed_ops, op); res = action_exec_helper(op); if (res == FALSE) { op->status = PCMK_LRM_OP_ERROR; /* this can cause this function to be called recursively * which is why we have processing_blocked_ops static variable */ operation_finalize(op); } } for (gIter = executed_ops; gIter != NULL; gIter = gIter->next) { op = gIter->data; blocked_ops = g_list_remove(blocked_ops, op); } g_list_free(executed_ops); processing_blocked_ops = FALSE; } gboolean services_action_sync(svc_action_t * op) { gboolean rc = TRUE; if (op == NULL) { crm_trace("No operation to execute"); return FALSE; } op->synchronous = true; rc = action_exec_helper(op); crm_trace(" > %s_%s_%d: %s = %d", op->rsc, op->action, op->interval, op->opaque->exec, op->rc); if (op->stdout_data) { crm_trace(" > stdout: %s", op->stdout_data); } if (op->stderr_data) { crm_trace(" > stderr: %s", op->stderr_data); } return rc; } GList * get_directory_list(const char *root, gboolean files, gboolean executable) { return services_os_get_directory_list(root, files, executable); } GList * services_list(void) { return resources_list_agents(PCMK_RESOURCE_CLASS_LSB, NULL); } #if SUPPORT_HEARTBEAT static GList * resources_os_list_hb_agents(void) { return services_os_get_directory_list(HB_RA_DIR, TRUE, TRUE); } #endif GList * resources_list_standards(void) { GList *standards = NULL; GList *agents = NULL; standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_OCF)); standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_LSB)); standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_SERVICE)); #if SUPPORT_SYSTEMD agents = systemd_unit_listall(); if (agents) { standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_SYSTEMD)); g_list_free_full(agents, free); } #endif #if SUPPORT_UPSTART agents = upstart_job_listall(); if (agents) { standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_UPSTART)); g_list_free_full(agents, free); } #endif #if SUPPORT_NAGIOS agents = resources_os_list_nagios_agents(); if (agents) { standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_NAGIOS)); g_list_free_full(agents, free); } #endif #if SUPPORT_HEARTBEAT standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_HB)); #endif return standards; } GList * resources_list_providers(const char *standard) { if (strcasecmp(standard, PCMK_RESOURCE_CLASS_OCF) == 0) { return resources_os_list_ocf_providers(); } return NULL; } GList * resources_list_agents(const char *standard, const char *provider) { if ((standard == NULL) || (strcasecmp(standard, PCMK_RESOURCE_CLASS_SERVICE) == 0)) { GList *tmp1; GList *tmp2; GList *result = resources_os_list_lsb_agents(); if (standard == NULL) { tmp1 = result; tmp2 = resources_os_list_ocf_agents(NULL); if (tmp2) { result = g_list_concat(tmp1, tmp2); } } #if SUPPORT_SYSTEMD tmp1 = result; tmp2 = systemd_unit_listall(); if (tmp2) { result = g_list_concat(tmp1, tmp2); } #endif #if SUPPORT_UPSTART tmp1 = result; tmp2 = upstart_job_listall(); if (tmp2) { result = g_list_concat(tmp1, tmp2); } #endif return result; } else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_OCF) == 0) { return resources_os_list_ocf_agents(provider); } else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_LSB) == 0) { return resources_os_list_lsb_agents(); #if SUPPORT_HEARTBEAT } else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_HB) == 0) { return resources_os_list_hb_agents(); #endif #if SUPPORT_SYSTEMD } else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_SYSTEMD) == 0) { return systemd_unit_listall(); #endif #if SUPPORT_UPSTART } else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_UPSTART) == 0) { return upstart_job_listall(); #endif #if SUPPORT_NAGIOS } else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_NAGIOS) == 0) { return resources_os_list_nagios_agents(); #endif } return NULL; } diff --git a/lib/services/services_linux.c b/lib/services/services_linux.c index c2ac43d333..6be8b0c596 100644 --- a/lib/services/services_linux.c +++ b/lib/services/services_linux.c @@ -1,906 +1,923 @@ /* * Copyright (C) 2010-2016 Andrew Beekhof * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_SYS_SIGNALFD_H #include #endif #include "crm/crm.h" #include "crm/common/mainloop.h" #include "crm/services.h" #include "services_private.h" #if SUPPORT_CIBSECRETS # include "crm/common/cib_secrets.h" #endif static inline void set_fd_opts(int fd, int opts) { int flag; if ((flag = fcntl(fd, F_GETFL)) >= 0) { if (fcntl(fd, F_SETFL, flag | opts) < 0) { crm_err("fcntl() write failed"); } } else { crm_err("fcntl() read failed"); } } static gboolean svc_read_output(int fd, svc_action_t * op, bool is_stderr) { char *data = NULL; int rc = 0, len = 0; char buf[500]; static const size_t buf_read_len = sizeof(buf) - 1; if (fd < 0) { crm_trace("No fd for %s", op->id); return FALSE; } if (is_stderr && op->stderr_data) { len = strlen(op->stderr_data); data = op->stderr_data; crm_trace("Reading %s stderr into offset %d", op->id, len); } else if (is_stderr == FALSE && op->stdout_data) { len = strlen(op->stdout_data); data = op->stdout_data; crm_trace("Reading %s stdout into offset %d", op->id, len); } else { crm_trace("Reading %s %s into offset %d", op->id, is_stderr?"stderr":"stdout", len); } do { rc = read(fd, buf, buf_read_len); if (rc > 0) { crm_trace("Got %d chars: %.80s", rc, buf); buf[rc] = 0; data = realloc_safe(data, len + rc + 1); len += sprintf(data + len, "%s", buf); } else if (errno != EINTR) { /* error or EOF * Cleanup happens in pipe_done() */ rc = FALSE; break; } } while (rc == buf_read_len || rc < 0); if (is_stderr) { op->stderr_data = data; } else { op->stdout_data = data; } return rc; } static int dispatch_stdout(gpointer userdata) { svc_action_t *op = (svc_action_t *) userdata; return svc_read_output(op->opaque->stdout_fd, op, FALSE); } static int dispatch_stderr(gpointer userdata) { svc_action_t *op = (svc_action_t *) userdata; return svc_read_output(op->opaque->stderr_fd, op, TRUE); } static void pipe_out_done(gpointer user_data) { svc_action_t *op = (svc_action_t *) user_data; crm_trace("%p", op); op->opaque->stdout_gsource = NULL; if (op->opaque->stdout_fd > STDOUT_FILENO) { close(op->opaque->stdout_fd); } op->opaque->stdout_fd = -1; } static void pipe_err_done(gpointer user_data) { svc_action_t *op = (svc_action_t *) user_data; op->opaque->stderr_gsource = NULL; if (op->opaque->stderr_fd > STDERR_FILENO) { close(op->opaque->stderr_fd); } op->opaque->stderr_fd = -1; } static struct mainloop_fd_callbacks stdout_callbacks = { .dispatch = dispatch_stdout, .destroy = pipe_out_done, }; static struct mainloop_fd_callbacks stderr_callbacks = { .dispatch = dispatch_stderr, .destroy = pipe_err_done, }; static void set_ocf_env(const char *key, const char *value, gpointer user_data) { if (setenv(key, value, 1) != 0) { crm_perror(LOG_ERR, "setenv failed for key:%s and value:%s", key, value); } } static void set_ocf_env_with_prefix(gpointer key, gpointer value, gpointer user_data) { char buffer[500]; snprintf(buffer, sizeof(buffer), "OCF_RESKEY_%s", (char *)key); set_ocf_env(buffer, value, user_data); } +static void +set_alert_env(gpointer key, gpointer value, gpointer user_data) +{ + set_ocf_env((char*)key, value, user_data); +} + +static void +add_alert_env_vars(svc_action_t * op) +{ + if (op->alert_params) { + g_hash_table_foreach(op->alert_params, set_alert_env, NULL); + } +} + static void add_OCF_env_vars(svc_action_t * op) { if ((op->standard == NULL) || (strcasecmp(PCMK_RESOURCE_CLASS_OCF, op->standard) != 0)) { return; } if (op->params) { g_hash_table_foreach(op->params, set_ocf_env_with_prefix, NULL); } set_ocf_env("OCF_RA_VERSION_MAJOR", "1", NULL); set_ocf_env("OCF_RA_VERSION_MINOR", "0", NULL); set_ocf_env("OCF_ROOT", OCF_ROOT_DIR, NULL); set_ocf_env("OCF_EXIT_REASON_PREFIX", PCMK_OCF_REASON_PREFIX, NULL); if (op->rsc) { set_ocf_env("OCF_RESOURCE_INSTANCE", op->rsc, NULL); } if (op->agent != NULL) { set_ocf_env("OCF_RESOURCE_TYPE", op->agent, NULL); } /* Notes: this is not added to specification yet. Sept 10,2004 */ if (op->provider != NULL) { set_ocf_env("OCF_RESOURCE_PROVIDER", op->provider, NULL); } } gboolean recurring_action_timer(gpointer data) { svc_action_t *op = data; crm_debug("Scheduling another invocation of %s", op->id); /* Clean out the old result */ free(op->stdout_data); op->stdout_data = NULL; free(op->stderr_data); op->stderr_data = NULL; op->opaque->repeat_timer = 0; services_action_async(op, NULL); return FALSE; } /* Returns FALSE if 'op' should be free'd by the caller */ gboolean operation_finalize(svc_action_t * op) { int recurring = 0; if (op->interval) { if (op->cancel) { op->status = PCMK_LRM_OP_CANCELLED; cancel_recurring_action(op); } else { recurring = 1; op->opaque->repeat_timer = g_timeout_add(op->interval, recurring_action_timer, (void *)op); } } if (op->opaque->callback) { op->opaque->callback(op); } op->pid = 0; services_untrack_op(op); if (!recurring && op->synchronous == FALSE) { /* * If this is a recurring action, do not free explicitly. * It will get freed whenever the action gets cancelled. */ services_action_free(op); return TRUE; } services_action_cleanup(op); return FALSE; } static void operation_finished(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) { svc_action_t *op = mainloop_child_userdata(p); char *prefix = crm_strdup_printf("%s:%d", op->id, op->pid); mainloop_clear_child_userdata(p); op->status = PCMK_LRM_OP_DONE; CRM_ASSERT(op->pid == pid); crm_trace("%s %p %p", prefix, op->opaque->stderr_gsource, op->opaque->stdout_gsource); if (op->opaque->stderr_gsource) { /* Make sure we have read everything from the buffer. * Depending on the priority mainloop gives the fd, operation_finished * could occur before all the reads are done. Force the read now.*/ crm_trace("%s dispatching stderr", prefix); dispatch_stderr(op); crm_trace("%s: %p", op->id, op->stderr_data); mainloop_del_fd(op->opaque->stderr_gsource); op->opaque->stderr_gsource = NULL; } if (op->opaque->stdout_gsource) { /* Make sure we have read everything from the buffer. * Depending on the priority mainloop gives the fd, operation_finished * could occur before all the reads are done. Force the read now.*/ crm_trace("%s dispatching stdout", prefix); dispatch_stdout(op); crm_trace("%s: %p", op->id, op->stdout_data); mainloop_del_fd(op->opaque->stdout_gsource); op->opaque->stdout_gsource = NULL; } if (signo) { if (mainloop_child_timeout(p)) { crm_warn("%s - timed out after %dms", prefix, op->timeout); op->status = PCMK_LRM_OP_TIMEOUT; op->rc = PCMK_OCF_TIMEOUT; } else { do_crm_log_unlikely((op->cancel) ? LOG_INFO : LOG_WARNING, "%s - terminated with signal %d", prefix, signo); op->status = PCMK_LRM_OP_ERROR; op->rc = PCMK_OCF_SIGNAL; } } else { op->rc = exitcode; crm_debug("%s - exited with rc=%d", prefix, exitcode); } free(prefix); prefix = crm_strdup_printf("%s:%d:stderr", op->id, op->pid); crm_log_output(LOG_NOTICE, prefix, op->stderr_data); free(prefix); prefix = crm_strdup_printf("%s:%d:stdout", op->id, op->pid); crm_log_output(LOG_DEBUG, prefix, op->stdout_data); free(prefix); operation_finalize(op); } /*! * \internal * \brief Set operation rc and status per errno from stat(), fork() or execvp() * * \param[in,out] op Operation to set rc and status for * \param[in] error Value of errno after system call * * \return void */ static void services_handle_exec_error(svc_action_t * op, int error) { int rc_not_installed, rc_insufficient_priv, rc_exec_error; /* Mimic the return codes for each standard as that's what we'll convert back from in get_uniform_rc() */ if (safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_LSB) && safe_str_eq(op->action, "status")) { rc_not_installed = PCMK_LSB_STATUS_NOT_INSTALLED; rc_insufficient_priv = PCMK_LSB_STATUS_INSUFFICIENT_PRIV; rc_exec_error = PCMK_LSB_STATUS_UNKNOWN; #if SUPPORT_NAGIOS } else if (safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_NAGIOS)) { rc_not_installed = NAGIOS_NOT_INSTALLED; rc_insufficient_priv = NAGIOS_INSUFFICIENT_PRIV; rc_exec_error = PCMK_OCF_EXEC_ERROR; #endif } else { rc_not_installed = PCMK_OCF_NOT_INSTALLED; rc_insufficient_priv = PCMK_OCF_INSUFFICIENT_PRIV; rc_exec_error = PCMK_OCF_EXEC_ERROR; } switch (error) { /* see execve(2), stat(2) and fork(2) */ case ENOENT: /* No such file or directory */ case EISDIR: /* Is a directory */ case ENOTDIR: /* Path component is not a directory */ case EINVAL: /* Invalid executable format */ case ENOEXEC: /* Invalid executable format */ op->rc = rc_not_installed; op->status = PCMK_LRM_OP_NOT_INSTALLED; break; case EACCES: /* permission denied (various errors) */ case EPERM: /* permission denied (various errors) */ op->rc = rc_insufficient_priv; op->status = PCMK_LRM_OP_ERROR; break; default: op->rc = rc_exec_error; op->status = PCMK_LRM_OP_ERROR; } } static void action_launch_child(svc_action_t *op) { int lpc; /* SIGPIPE is ignored (which is different from signal blocking) by the gnutls library. * Depending on the libqb version in use, libqb may set SIGPIPE to be ignored as well. * We do not want this to be inherited by the child process. By resetting this the signal * to the default behavior, we avoid some potential odd problems that occur during OCF * scripts when SIGPIPE is ignored by the environment. */ signal(SIGPIPE, SIG_DFL); #if defined(HAVE_SCHED_SETSCHEDULER) if (sched_getscheduler(0) != SCHED_OTHER) { struct sched_param sp; memset(&sp, 0, sizeof(sp)); sp.sched_priority = 0; if (sched_setscheduler(0, SCHED_OTHER, &sp) == -1) { crm_perror(LOG_ERR, "Could not reset scheduling policy to SCHED_OTHER for %s", op->id); } } #endif if (setpriority(PRIO_PROCESS, 0, 0) == -1) { crm_perror(LOG_ERR, "Could not reset process priority to 0 for %s", op->id); } /* Man: The call setpgrp() is equivalent to setpgid(0,0) * _and_ compiles on BSD variants too * need to investigate if it works the same too. */ setpgid(0, 0); /* close all descriptors except stdin/out/err and channels to logd */ for (lpc = getdtablesize() - 1; lpc > STDERR_FILENO; lpc--) { close(lpc); } #if SUPPORT_CIBSECRETS if (replace_secret_params(op->rsc, op->params) < 0) { /* replacing secrets failed! */ if (safe_str_eq(op->action,"stop")) { /* don't fail on stop! */ crm_info("proceeding with the stop operation for %s", op->rsc); } else { crm_err("failed to get secrets for %s, " "considering resource not configured", op->rsc); _exit(PCMK_OCF_NOT_CONFIGURED); } } #endif /* Setup environment correctly */ add_OCF_env_vars(op); + /* Setup environment correctly for Alert */ + add_alert_env_vars(op); + /* execute the RA */ execvp(op->opaque->exec, op->opaque->args); /* Most cases should have been already handled by stat() */ services_handle_exec_error(op, errno); _exit(op->rc); } #ifndef HAVE_SYS_SIGNALFD_H static int sigchld_pipe[2] = { -1, -1 }; static void sigchld_handler() { if ((sigchld_pipe[1] >= 0) && (write(sigchld_pipe[1], "", 1) == -1)) { crm_perror(LOG_TRACE, "Could not poke SIGCHLD self-pipe"); } } #endif static void action_synced_wait(svc_action_t * op, sigset_t *mask) { int status = 0; int timeout = op->timeout; int sfd = -1; time_t start = -1; struct pollfd fds[3]; int wait_rc = 0; #ifdef HAVE_SYS_SIGNALFD_H sfd = signalfd(-1, mask, SFD_NONBLOCK); if (sfd < 0) { crm_perror(LOG_ERR, "signalfd() failed"); } #else sfd = sigchld_pipe[0]; #endif fds[0].fd = op->opaque->stdout_fd; fds[0].events = POLLIN; fds[0].revents = 0; fds[1].fd = op->opaque->stderr_fd; fds[1].events = POLLIN; fds[1].revents = 0; fds[2].fd = sfd; fds[2].events = POLLIN; fds[2].revents = 0; crm_trace("Waiting for %d", op->pid); start = time(NULL); do { int poll_rc = poll(fds, 3, timeout); if (poll_rc > 0) { if (fds[0].revents & POLLIN) { svc_read_output(op->opaque->stdout_fd, op, FALSE); } if (fds[1].revents & POLLIN) { svc_read_output(op->opaque->stderr_fd, op, TRUE); } if (fds[2].revents & POLLIN) { #ifdef HAVE_SYS_SIGNALFD_H struct signalfd_siginfo fdsi; ssize_t s; s = read(sfd, &fdsi, sizeof(struct signalfd_siginfo)); if (s != sizeof(struct signalfd_siginfo)) { crm_perror(LOG_ERR, "Read from signal fd %d failed", sfd); } else if (fdsi.ssi_signo == SIGCHLD) { #else if (1) { /* Clear out the sigchld pipe. */ char ch; while (read(sfd, &ch, 1) == 1) /*omit*/; #endif wait_rc = waitpid(op->pid, &status, WNOHANG); if (wait_rc > 0) { break; } else if (wait_rc < 0){ if (errno == ECHILD) { /* Here, don't dare to kill and bail out... */ break; } else { /* ...otherwise pretend process still runs. */ wait_rc = 0; } crm_perror(LOG_ERR, "waitpid() for %d failed", op->pid); } } } } else if (poll_rc == 0) { timeout = 0; break; } else if (poll_rc < 0) { if (errno != EINTR) { crm_perror(LOG_ERR, "poll() failed"); break; } } timeout = op->timeout - (time(NULL) - start) * 1000; } while ((op->timeout < 0 || timeout > 0)); crm_trace("Child done: %d", op->pid); if (wait_rc <= 0) { op->rc = PCMK_OCF_UNKNOWN_ERROR; if (op->timeout > 0 && timeout <= 0) { op->status = PCMK_LRM_OP_TIMEOUT; crm_warn("%s:%d - timed out after %dms", op->id, op->pid, op->timeout); } else { op->status = PCMK_LRM_OP_ERROR; } /* If only child hasn't been successfully waited for, yet. This is to limit killing wrong target a bit more. */ if (wait_rc == 0 && waitpid(op->pid, &status, WNOHANG) == 0) { if (kill(op->pid, SIGKILL)) { crm_err("kill(%d, KILL) failed: %d", op->pid, errno); } /* Safe to skip WNOHANG here as we sent non-ignorable signal. */ while (waitpid(op->pid, &status, 0) == (pid_t) -1 && errno == EINTR) /*omit*/; } } else if (WIFEXITED(status)) { op->status = PCMK_LRM_OP_DONE; op->rc = WEXITSTATUS(status); crm_info("Managed %s process %d exited with rc=%d", op->id, op->pid, op->rc); } else if (WIFSIGNALED(status)) { int signo = WTERMSIG(status); op->status = PCMK_LRM_OP_ERROR; crm_err("Managed %s process %d exited with signal=%d", op->id, op->pid, signo); } #ifdef WCOREDUMP if (WCOREDUMP(status)) { crm_err("Managed %s process %d dumped core", op->id, op->pid); } #endif svc_read_output(op->opaque->stdout_fd, op, FALSE); svc_read_output(op->opaque->stderr_fd, op, TRUE); close(op->opaque->stdout_fd); close(op->opaque->stderr_fd); #ifdef HAVE_SYS_SIGNALFD_H close(sfd); #endif } /* For an asynchronous 'op', returns FALSE if 'op' should be free'd by the caller */ /* For a synchronous 'op', returns FALSE if 'op' fails */ gboolean services_os_action_execute(svc_action_t * op) { int stdout_fd[2]; int stderr_fd[2]; struct stat st; sigset_t *pmask; #ifdef HAVE_SYS_SIGNALFD_H sigset_t mask; sigset_t old_mask; #define sigchld_cleanup() do { \ if (sigismember(&old_mask, SIGCHLD) == 0) { \ if (sigprocmask(SIG_UNBLOCK, &mask, NULL) < 0) { \ crm_perror(LOG_ERR, "sigprocmask() failed to unblock sigchld"); \ } \ } \ } while (0) #else struct sigaction sa; struct sigaction old_sa; #define sigchld_cleanup() do { \ if (sigaction(SIGCHLD, &old_sa, NULL) < 0) { \ crm_perror(LOG_ERR, "sigaction() failed to remove sigchld handler"); \ } \ close(sigchld_pipe[0]); \ close(sigchld_pipe[1]); \ sigchld_pipe[0] = sigchld_pipe[1] = -1; \ } while(0) #endif /* Fail fast */ if(stat(op->opaque->exec, &st) != 0) { int rc = errno; crm_warn("Cannot execute '%s': %s (%d)", op->opaque->exec, pcmk_strerror(rc), rc); services_handle_exec_error(op, rc); if (!op->synchronous) { return operation_finalize(op); } return FALSE; } if (pipe(stdout_fd) < 0) { int rc = errno; crm_err("pipe(stdout_fd) failed. '%s': %s (%d)", op->opaque->exec, pcmk_strerror(rc), rc); services_handle_exec_error(op, rc); if (!op->synchronous) { return operation_finalize(op); } return FALSE; } if (pipe(stderr_fd) < 0) { int rc = errno; close(stdout_fd[0]); close(stdout_fd[1]); crm_err("pipe(stderr_fd) failed. '%s': %s (%d)", op->opaque->exec, pcmk_strerror(rc), rc); services_handle_exec_error(op, rc); if (!op->synchronous) { return operation_finalize(op); } return FALSE; } if (op->synchronous) { #ifdef HAVE_SYS_SIGNALFD_H sigemptyset(&mask); sigaddset(&mask, SIGCHLD); sigemptyset(&old_mask); if (sigprocmask(SIG_BLOCK, &mask, &old_mask) < 0) { crm_perror(LOG_ERR, "sigprocmask() failed to block sigchld"); } pmask = &mask; #else if(pipe(sigchld_pipe) == -1) { crm_perror(LOG_ERR, "pipe() failed"); } set_fd_opts(sigchld_pipe[0], O_NONBLOCK); set_fd_opts(sigchld_pipe[1], O_NONBLOCK); sa.sa_handler = sigchld_handler; sa.sa_flags = 0; sigemptyset(&sa.sa_mask); if (sigaction(SIGCHLD, &sa, &old_sa) < 0) { crm_perror(LOG_ERR, "sigaction() failed to set sigchld handler"); } pmask = NULL; #endif } op->pid = fork(); switch (op->pid) { case -1: { int rc = errno; close(stdout_fd[0]); close(stdout_fd[1]); close(stderr_fd[0]); close(stderr_fd[1]); crm_err("Could not execute '%s': %s (%d)", op->opaque->exec, pcmk_strerror(rc), rc); services_handle_exec_error(op, rc); if (!op->synchronous) { return operation_finalize(op); } sigchld_cleanup(); return FALSE; } case 0: /* Child */ close(stdout_fd[0]); close(stderr_fd[0]); if (STDOUT_FILENO != stdout_fd[1]) { if (dup2(stdout_fd[1], STDOUT_FILENO) != STDOUT_FILENO) { crm_err("dup2() failed (stdout)"); } close(stdout_fd[1]); } if (STDERR_FILENO != stderr_fd[1]) { if (dup2(stderr_fd[1], STDERR_FILENO) != STDERR_FILENO) { crm_err("dup2() failed (stderr)"); } close(stderr_fd[1]); } if (op->synchronous) { sigchld_cleanup(); } action_launch_child(op); CRM_ASSERT(0); /* action_launch_child is effectively noreturn */ } /* Only the parent reaches here */ close(stdout_fd[1]); close(stderr_fd[1]); op->opaque->stdout_fd = stdout_fd[0]; set_fd_opts(op->opaque->stdout_fd, O_NONBLOCK); op->opaque->stderr_fd = stderr_fd[0]; set_fd_opts(op->opaque->stderr_fd, O_NONBLOCK); if (op->synchronous) { action_synced_wait(op, pmask); sigchld_cleanup(); } else { crm_trace("Async waiting for %d - %s", op->pid, op->opaque->exec); mainloop_child_add_with_flags(op->pid, op->timeout, op->id, op, (op->flags & SVC_ACTION_LEAVE_GROUP) ? mainloop_leave_pid_group : 0, operation_finished); op->opaque->stdout_gsource = mainloop_add_fd(op->id, G_PRIORITY_LOW, op->opaque->stdout_fd, op, &stdout_callbacks); op->opaque->stderr_gsource = mainloop_add_fd(op->id, G_PRIORITY_LOW, op->opaque->stderr_fd, op, &stderr_callbacks); services_add_inflight_op(op); } return TRUE; } GList * services_os_get_directory_list(const char *root, gboolean files, gboolean executable) { GList *list = NULL; struct dirent **namelist; int entries = 0, lpc = 0; char buffer[PATH_MAX]; entries = scandir(root, &namelist, NULL, alphasort); if (entries <= 0) { return list; } for (lpc = 0; lpc < entries; lpc++) { struct stat sb; if ('.' == namelist[lpc]->d_name[0]) { free(namelist[lpc]); continue; } snprintf(buffer, sizeof(buffer), "%s/%s", root, namelist[lpc]->d_name); if (stat(buffer, &sb)) { continue; } if (S_ISDIR(sb.st_mode)) { if (files) { free(namelist[lpc]); continue; } } else if (S_ISREG(sb.st_mode)) { if (files == FALSE) { free(namelist[lpc]); continue; } else if (executable && (sb.st_mode & S_IXUSR) == 0 && (sb.st_mode & S_IXGRP) == 0 && (sb.st_mode & S_IXOTH) == 0) { free(namelist[lpc]); continue; } } list = g_list_append(list, strdup(namelist[lpc]->d_name)); free(namelist[lpc]); } free(namelist); return list; } GList * resources_os_list_lsb_agents(void) { return get_directory_list(LSB_ROOT_DIR, TRUE, TRUE); } GList * resources_os_list_ocf_providers(void) { return get_directory_list(OCF_ROOT_DIR "/resource.d", FALSE, TRUE); } GList * resources_os_list_ocf_agents(const char *provider) { GList *gIter = NULL; GList *result = NULL; GList *providers = NULL; if (provider) { char buffer[500]; snprintf(buffer, sizeof(buffer), "%s/resource.d/%s", OCF_ROOT_DIR, provider); return get_directory_list(buffer, TRUE, TRUE); } providers = resources_os_list_ocf_providers(); for (gIter = providers; gIter != NULL; gIter = gIter->next) { GList *tmp1 = result; GList *tmp2 = resources_os_list_ocf_agents(gIter->data); if (tmp2) { result = g_list_concat(tmp1, tmp2); } } g_list_free_full(providers, free); return result; } #if SUPPORT_NAGIOS GList * resources_os_list_nagios_agents(void) { GList *plugin_list = NULL; GList *result = NULL; GList *gIter = NULL; plugin_list = get_directory_list(NAGIOS_PLUGIN_DIR, TRUE, TRUE); /* Make sure both the plugin and its metadata exist */ for (gIter = plugin_list; gIter != NULL; gIter = gIter->next) { const char *plugin = gIter->data; char *metadata = crm_strdup_printf(NAGIOS_METADATA_DIR "/%s.xml", plugin); struct stat st; if (stat(metadata, &st) == 0) { result = g_list_append(result, strdup(plugin)); } free(metadata); } g_list_free_full(plugin_list, free); return result; } #endif diff --git a/lrmd/lrmd.c b/lrmd/lrmd.c index 245149b8f7..afdfd01b02 100644 --- a/lrmd/lrmd.c +++ b/lrmd/lrmd.c @@ -1,1717 +1,1793 @@ /* * Copyright (c) 2012 David Vossel * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #include #include #include #include #include #include #include #include +#include #include #include #ifdef HAVE_SYS_TIMEB_H # include #endif #define EXIT_REASON_MAX_LEN 128 GHashTable *rsc_list = NULL; typedef struct lrmd_cmd_s { int timeout; int interval; int start_delay; int timeout_orig; int call_id; int exec_rc; int lrmd_op_status; int call_opts; /* Timer ids, must be removed on cmd destruction. */ int delay_id; int stonith_recurring_id; int rsc_deleted; int service_flags; char *client_id; char *origin; char *rsc_id; char *action; char *real_action; char *exit_reason; char *output; char *userdata_str; /* when set, this cmd should go through a container wrapper */ const char *isolation_wrapper; #ifdef HAVE_SYS_TIMEB_H /* recurring and systemd operations may involve more than one lrmd command * per operation, so they need info about original and most recent */ struct timeb t_first_run; /* Timestamp of when op first ran */ struct timeb t_run; /* Timestamp of when op most recently ran */ struct timeb t_first_queue; /* Timestamp of when op first was queued */ struct timeb t_queue; /* Timestamp of when op most recently was queued */ struct timeb t_rcchange; /* Timestamp of last rc change */ #endif int first_notify_sent; int last_notify_rc; int last_notify_op_status; int last_pid; GHashTable *params; } lrmd_cmd_t; static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc); static gboolean lrmd_rsc_dispatch(gpointer user_data); static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id); static void log_finished(lrmd_cmd_t * cmd, int exec_time, int queue_time) { char pid_str[32] = { 0, }; int log_level = LOG_INFO; if (cmd->last_pid) { snprintf(pid_str, 32, "%d", cmd->last_pid); } if (safe_str_eq(cmd->action, "monitor")) { log_level = LOG_DEBUG; } #ifdef HAVE_SYS_TIMEB_H do_crm_log(log_level, "finished - rsc:%s action:%s call_id:%d %s%s exit-code:%d exec-time:%dms queue-time:%dms", cmd->rsc_id, cmd->action, cmd->call_id, cmd->last_pid ? "pid:" : "", pid_str, cmd->exec_rc, exec_time, queue_time); #else do_crm_log(log_level, "finished - rsc:%s action:%s call_id:%d %s%s exit-code:%d", cmd->rsc_id, cmd->action, cmd->call_id, cmd->last_pid ? "pid:" : "", pid_str, cmd->exec_rc); #endif } static void log_execute(lrmd_cmd_t * cmd) { int log_level = LOG_INFO; if (safe_str_eq(cmd->action, "monitor")) { log_level = LOG_DEBUG; } do_crm_log(log_level, "executing - rsc:%s action:%s call_id:%d", cmd->rsc_id, cmd->action, cmd->call_id); } static const char * normalize_action_name(lrmd_rsc_t * rsc, const char *action) { if (safe_str_eq(action, "monitor") && (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_LSB) || safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_SERVICE) || safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_SYSTEMD))) { return "status"; } return action; } static lrmd_rsc_t * build_rsc_from_xml(xmlNode * msg) { xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR); lrmd_rsc_t *rsc = NULL; rsc = calloc(1, sizeof(lrmd_rsc_t)); crm_element_value_int(msg, F_LRMD_CALLOPTS, &rsc->call_opts); rsc->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID); rsc->class = crm_element_value_copy(rsc_xml, F_LRMD_CLASS); rsc->provider = crm_element_value_copy(rsc_xml, F_LRMD_PROVIDER); rsc->type = crm_element_value_copy(rsc_xml, F_LRMD_TYPE); rsc->work = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_rsc_dispatch, rsc); return rsc; } static void dup_attr(gpointer key, gpointer value, gpointer user_data) { g_hash_table_replace(user_data, strdup(key), strdup(value)); } static lrmd_cmd_t * create_lrmd_cmd(xmlNode * msg, crm_client_t * client, lrmd_rsc_t *rsc) { int call_options = 0; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR); lrmd_cmd_t *cmd = NULL; cmd = calloc(1, sizeof(lrmd_cmd_t)); crm_element_value_int(msg, F_LRMD_CALLOPTS, &call_options); cmd->call_opts = call_options; cmd->client_id = strdup(client->id); crm_element_value_int(msg, F_LRMD_CALLID, &cmd->call_id); crm_element_value_int(rsc_xml, F_LRMD_RSC_INTERVAL, &cmd->interval); crm_element_value_int(rsc_xml, F_LRMD_TIMEOUT, &cmd->timeout); crm_element_value_int(rsc_xml, F_LRMD_RSC_START_DELAY, &cmd->start_delay); cmd->timeout_orig = cmd->timeout; cmd->origin = crm_element_value_copy(rsc_xml, F_LRMD_ORIGIN); cmd->action = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ACTION); cmd->userdata_str = crm_element_value_copy(rsc_xml, F_LRMD_RSC_USERDATA_STR); cmd->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID); cmd->params = xml2list(rsc_xml); cmd->isolation_wrapper = g_hash_table_lookup(cmd->params, "CRM_meta_isolation_wrapper"); if (cmd->isolation_wrapper) { if (g_hash_table_lookup(cmd->params, "CRM_meta_isolation_instance") == NULL) { g_hash_table_insert(cmd->params, strdup("CRM_meta_isolation_instance"), strdup(rsc->rsc_id)); } if (rsc->provider) { g_hash_table_insert(cmd->params, strdup("CRM_meta_provider"), strdup(rsc->provider)); } g_hash_table_insert(cmd->params, strdup("CRM_meta_class"), strdup(rsc->class)); g_hash_table_insert(cmd->params, strdup("CRM_meta_type"), strdup(rsc->type)); } if (safe_str_eq(g_hash_table_lookup(cmd->params, "CRM_meta_on_fail"), "block")) { crm_debug("Setting flag to leave pid group on timeout and only kill action pid for %s_%s_%d", cmd->rsc_id, cmd->action, cmd->interval); cmd->service_flags |= SVC_ACTION_LEAVE_GROUP; } return cmd; } +static lrmd_cmd_t * +create_alert_cmd(xmlNode * msg, crm_client_t * client, lrmd_rsc_t *rsc) +{ + int call_options = 0; + xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_ALERT, msg, LOG_ERR); + lrmd_cmd_t *cmd = NULL; + + cmd = calloc(1, sizeof(lrmd_cmd_t)); + + crm_element_value_int(msg, F_LRMD_CALLOPTS, &call_options); + cmd->call_opts = call_options; + cmd->client_id = strdup(client->id); + + crm_element_value_int(msg, F_LRMD_CALLID, &cmd->call_id); + crm_element_value_int(rsc_xml, F_LRMD_TIMEOUT, &cmd->timeout); + cmd->timeout_orig = cmd->timeout; + + cmd->origin = crm_element_value_copy(rsc_xml, F_LRMD_ORIGIN); + cmd->action = strdup("start"); + cmd->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_ALERT_ID); + + cmd->params = xml2list(rsc_xml); + + return cmd; +} + static void free_lrmd_cmd(lrmd_cmd_t * cmd) { if (cmd->stonith_recurring_id) { g_source_remove(cmd->stonith_recurring_id); } if (cmd->delay_id) { g_source_remove(cmd->delay_id); } if (cmd->params) { g_hash_table_destroy(cmd->params); } free(cmd->origin); free(cmd->action); free(cmd->real_action); free(cmd->userdata_str); free(cmd->rsc_id); free(cmd->output); free(cmd->exit_reason); free(cmd->client_id); free(cmd); } static gboolean stonith_recurring_op_helper(gpointer data) { lrmd_cmd_t *cmd = data; lrmd_rsc_t *rsc; cmd->stonith_recurring_id = 0; if (!cmd->rsc_id) { return FALSE; } rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id); CRM_ASSERT(rsc != NULL); /* take it out of recurring_ops list, and put it in the pending ops * to be executed */ rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd); rsc->pending_ops = g_list_append(rsc->pending_ops, cmd); #ifdef HAVE_SYS_TIMEB_H ftime(&cmd->t_queue); if (cmd->t_first_queue.time == 0) { cmd->t_first_queue = cmd->t_queue; } #endif mainloop_set_trigger(rsc->work); return FALSE; } static gboolean start_delay_helper(gpointer data) { lrmd_cmd_t *cmd = data; lrmd_rsc_t *rsc = NULL; cmd->delay_id = 0; rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL; if (rsc) { mainloop_set_trigger(rsc->work); } return FALSE; } static gboolean merge_recurring_duplicate(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { GListPtr gIter = NULL; lrmd_cmd_t * dup = NULL; gboolean dup_pending = FALSE; if (cmd->interval == 0) { return 0; } for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) { dup = gIter->data; if (safe_str_eq(cmd->action, dup->action) && cmd->interval == dup->interval) { dup_pending = TRUE; goto merge_dup; } } /* if dup is in recurring_ops list, that means it has already executed * and is in the interval loop. we can't just remove it in this case. */ for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) { dup = gIter->data; if (safe_str_eq(cmd->action, dup->action) && cmd->interval == dup->interval) { goto merge_dup; } } return FALSE; merge_dup: /* This should not occur, if it does we need to investigate in the crmd * how something like this is possible */ crm_warn("Duplicate recurring op entry detected (%s_%s_%d), merging with previous op entry", rsc->rsc_id, normalize_action_name(rsc, dup->action), dup->interval); /* merge */ dup->first_notify_sent = 0; free(dup->userdata_str); dup->userdata_str = cmd->userdata_str; cmd->userdata_str = NULL; dup->call_id = cmd->call_id; if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH)) { /* if we are waiting for the next interval, kick it off now */ if (dup_pending == TRUE) { g_source_remove(cmd->stonith_recurring_id); cmd->stonith_recurring_id = 0; stonith_recurring_op_helper(cmd); } } else if (dup_pending == FALSE) { /* if we've already handed this to the service lib, kick off an early execution */ services_action_kick(rsc->rsc_id, normalize_action_name(rsc, dup->action), dup->interval); } free_lrmd_cmd(cmd); return TRUE; } static void schedule_lrmd_cmd(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { gboolean dup_processed = FALSE; CRM_CHECK(cmd != NULL, return); CRM_CHECK(rsc != NULL, return); crm_trace("Scheduling %s on %s", cmd->action, rsc->rsc_id); dup_processed = merge_recurring_duplicate(rsc, cmd); if (dup_processed) { /* duplicate recurring cmd found, cmds merged */ return; } /* crmd expects lrmd to automatically cancel recurring ops before rsc stops. */ if (rsc && safe_str_eq(cmd->action, "stop")) { cancel_all_recurring(rsc, NULL); } rsc->pending_ops = g_list_append(rsc->pending_ops, cmd); #ifdef HAVE_SYS_TIMEB_H ftime(&cmd->t_queue); if (cmd->t_first_queue.time == 0) { cmd->t_first_queue = cmd->t_queue; } #endif mainloop_set_trigger(rsc->work); if (cmd->start_delay) { cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd); } } static void send_reply(crm_client_t * client, int rc, uint32_t id, int call_id) { int send_rc = 0; xmlNode *reply = NULL; reply = create_xml_node(NULL, T_LRMD_REPLY); crm_xml_add(reply, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add_int(reply, F_LRMD_RC, rc); crm_xml_add_int(reply, F_LRMD_CALLID, call_id); send_rc = lrmd_server_send_reply(client, id, reply); free_xml(reply); if (send_rc < 0) { crm_warn("LRMD reply to %s failed: %d", client->name, send_rc); } } static void send_client_notify(gpointer key, gpointer value, gpointer user_data) { xmlNode *update_msg = user_data; crm_client_t *client = value; if (client == NULL) { crm_err("Asked to send event to NULL client"); return; } else if (client->name == NULL) { crm_trace("Asked to send event to client with no name"); return; } if (lrmd_server_send_notify(client, update_msg) <= 0) { crm_warn("Notification of client %s/%s failed", client->name, client->id); } } #ifdef HAVE_SYS_TIMEB_H /*! * \internal * \brief Return difference between two times in milliseconds * * \param[in] now More recent time (or NULL to use current time) * \param[in] old Earlier time * * \return milliseconds difference (or 0 if old is NULL or has time zero) */ static int time_diff_ms(struct timeb *now, struct timeb *old) { struct timeb local_now = { 0, }; if (now == NULL) { ftime(&local_now); now = &local_now; } if ((old == NULL) || (old->time == 0)) { return 0; } return difftime(now->time, old->time) * 1000 + now->millitm - old->millitm; } /*! * \internal * \brief Reset a command's operation times to their original values. * * Reset a command's run and queued timestamps to the timestamps of the original * command, so we report the entire time since then and not just the time since * the most recent command (for recurring and systemd operations). * * /param[in] cmd LRMD command object to reset * * /note It's not obvious what the queued time should be for a systemd * start/stop operation, which might go like this: * initial command queued 5ms, runs 3s * monitor command queued 10ms, runs 10s * monitor command queued 10ms, runs 10s * Is the queued time for that operation 5ms, 10ms or 25ms? The current * implementation will report 5ms. If it's 25ms, then we need to * subtract 20ms from the total exec time so as not to count it twice. * We can implement that later if it matters to anyone ... */ static void cmd_original_times(lrmd_cmd_t * cmd) { cmd->t_run = cmd->t_first_run; cmd->t_queue = cmd->t_first_queue; } #endif static void send_cmd_complete_notify(lrmd_cmd_t * cmd) { int exec_time = 0; int queue_time = 0; xmlNode *notify = NULL; #ifdef HAVE_SYS_TIMEB_H exec_time = time_diff_ms(NULL, &cmd->t_run); queue_time = time_diff_ms(&cmd->t_run, &cmd->t_queue); #endif log_finished(cmd, exec_time, queue_time); /* if the first notify result for a cmd has already been sent earlier, and the * the option to only send notifies on result changes is set. Check to see * if the last result is the same as the new one. If so, suppress this update */ if (cmd->first_notify_sent && (cmd->call_opts & lrmd_opt_notify_changes_only)) { if (cmd->last_notify_rc == cmd->exec_rc && cmd->last_notify_op_status == cmd->lrmd_op_status) { /* only send changes */ return; } } cmd->first_notify_sent = 1; cmd->last_notify_rc = cmd->exec_rc; cmd->last_notify_op_status = cmd->lrmd_op_status; notify = create_xml_node(NULL, T_LRMD_NOTIFY); crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add_int(notify, F_LRMD_TIMEOUT, cmd->timeout); crm_xml_add_int(notify, F_LRMD_RSC_INTERVAL, cmd->interval); crm_xml_add_int(notify, F_LRMD_RSC_START_DELAY, cmd->start_delay); crm_xml_add_int(notify, F_LRMD_EXEC_RC, cmd->exec_rc); crm_xml_add_int(notify, F_LRMD_OP_STATUS, cmd->lrmd_op_status); crm_xml_add_int(notify, F_LRMD_CALLID, cmd->call_id); crm_xml_add_int(notify, F_LRMD_RSC_DELETED, cmd->rsc_deleted); #ifdef HAVE_SYS_TIMEB_H crm_xml_add_int(notify, F_LRMD_RSC_RUN_TIME, cmd->t_run.time); crm_xml_add_int(notify, F_LRMD_RSC_RCCHANGE_TIME, cmd->t_rcchange.time); crm_xml_add_int(notify, F_LRMD_RSC_EXEC_TIME, exec_time); crm_xml_add_int(notify, F_LRMD_RSC_QUEUE_TIME, queue_time); #endif crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_RSC_EXEC); crm_xml_add(notify, F_LRMD_RSC_ID, cmd->rsc_id); if(cmd->real_action) { crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->real_action); } else { crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->action); } crm_xml_add(notify, F_LRMD_RSC_USERDATA_STR, cmd->userdata_str); crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->output); crm_xml_add(notify, F_LRMD_RSC_EXIT_REASON, cmd->exit_reason); if (cmd->params) { char *key = NULL; char *value = NULL; GHashTableIter iter; xmlNode *args = create_xml_node(notify, XML_TAG_ATTRS); g_hash_table_iter_init(&iter, cmd->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { hash2smartfield((gpointer) key, (gpointer) value, args); } } if (cmd->client_id && (cmd->call_opts & lrmd_opt_notify_orig_only)) { crm_client_t *client = crm_client_get_by_id(cmd->client_id); if (client) { send_client_notify(client->id, client, notify); } } else if (client_connections != NULL) { g_hash_table_foreach(client_connections, send_client_notify, notify); } free_xml(notify); } static void send_generic_notify(int rc, xmlNode * request) { if (client_connections != NULL) { int call_id = 0; xmlNode *notify = NULL; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); const char *op = crm_element_value(request, F_LRMD_OPERATION); crm_element_value_int(request, F_LRMD_CALLID, &call_id); notify = create_xml_node(NULL, T_LRMD_NOTIFY); crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add_int(notify, F_LRMD_RC, rc); crm_xml_add_int(notify, F_LRMD_CALLID, call_id); crm_xml_add(notify, F_LRMD_OPERATION, op); crm_xml_add(notify, F_LRMD_RSC_ID, rsc_id); g_hash_table_foreach(client_connections, send_client_notify, notify); free_xml(notify); } } static void cmd_reset(lrmd_cmd_t * cmd) { cmd->lrmd_op_status = 0; cmd->last_pid = 0; memset(&cmd->t_run, 0, sizeof(cmd->t_run)); memset(&cmd->t_queue, 0, sizeof(cmd->t_queue)); free(cmd->exit_reason); cmd->exit_reason = NULL; free(cmd->output); cmd->output = NULL; } static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc) { crm_trace("Resource operation rsc:%s action:%s completed (%p %p)", cmd->rsc_id, cmd->action, rsc ? rsc->active : NULL, cmd); if (rsc && (rsc->active == cmd)) { rsc->active = NULL; mainloop_set_trigger(rsc->work); } if (!rsc) { cmd->rsc_deleted = 1; } /* reset original timeout so client notification has correct information */ cmd->timeout = cmd->timeout_orig; send_cmd_complete_notify(cmd); if (cmd->interval && (cmd->lrmd_op_status == PCMK_LRM_OP_CANCELLED)) { if (rsc) { rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd); rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd); } free_lrmd_cmd(cmd); } else if (cmd->interval == 0) { if (rsc) { rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd); } free_lrmd_cmd(cmd); } else { /* Clear all the values pertaining just to the last iteration of a recurring op. */ cmd_reset(cmd); } } #if SUPPORT_HEARTBEAT static int pattern_matched(const char *pat, const char *str) { if (g_pattern_match_simple(pat, str)) { crm_debug("RA output matched stopped pattern [%s]", pat); return TRUE; } return FALSE; } static int hb2uniform_rc(const char *action, int rc, const char *stdout_data) { const char *stop_pattern[] = { "*stopped*", "*not*running*" }; const char *running_pattern[] = { "*running*", "*OK*" }; char *lower_std_output = NULL; int result; if (rc < 0) { return PCMK_OCF_UNKNOWN_ERROR; } /* Treat class heartbeat the same as class lsb. */ if (!safe_str_eq(action, "status") && !safe_str_eq(action, "monitor")) { return services_get_ocf_exitcode(action, rc); } /* for status though, exit code is ignored, * and the stdout is scanned for specific strings */ if (stdout_data == NULL) { crm_warn("No status output from the (hb) resource agent, assuming stopped"); return PCMK_OCF_NOT_RUNNING; } lower_std_output = g_ascii_strdown(stdout_data, -1); if (pattern_matched(stop_pattern[0], lower_std_output) || pattern_matched(stop_pattern[1], lower_std_output)) { result = PCMK_OCF_NOT_RUNNING; } else if (pattern_matched(running_pattern[0], lower_std_output) || pattern_matched(running_pattern[1], stdout_data)) { /* "OK" is matched case sensitive */ result = PCMK_OCF_OK; } else { /* It didn't say it was running - must be stopped */ crm_debug("RA output did not match any pattern, assuming stopped"); result = PCMK_OCF_NOT_RUNNING; } free(lower_std_output); return result; } #endif static int ocf2uniform_rc(int rc) { if (rc < 0 || rc > PCMK_OCF_FAILED_MASTER) { return PCMK_OCF_UNKNOWN_ERROR; } return rc; } static int stonith2uniform_rc(const char *action, int rc) { if (rc == -ENODEV) { if (safe_str_eq(action, "stop")) { rc = PCMK_OCF_OK; } else if (safe_str_eq(action, "start")) { rc = PCMK_OCF_NOT_INSTALLED; } else { rc = PCMK_OCF_NOT_RUNNING; } } else if (rc != 0) { rc = PCMK_OCF_UNKNOWN_ERROR; } return rc; } #if SUPPORT_NAGIOS static int nagios2uniform_rc(const char *action, int rc) { if (rc < 0) { return PCMK_OCF_UNKNOWN_ERROR; } switch (rc) { case NAGIOS_STATE_OK: return PCMK_OCF_OK; case NAGIOS_INSUFFICIENT_PRIV: return PCMK_OCF_INSUFFICIENT_PRIV; case NAGIOS_NOT_INSTALLED: return PCMK_OCF_NOT_INSTALLED; case NAGIOS_STATE_WARNING: case NAGIOS_STATE_CRITICAL: case NAGIOS_STATE_UNKNOWN: case NAGIOS_STATE_DEPENDENT: default: return PCMK_OCF_UNKNOWN_ERROR; } return PCMK_OCF_UNKNOWN_ERROR; } #endif static int get_uniform_rc(const char *standard, const char *action, int rc) { if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_OCF)) { return ocf2uniform_rc(rc); } else if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_STONITH)) { return stonith2uniform_rc(action, rc); } else if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_SYSTEMD)) { return rc; } else if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_UPSTART)) { return rc; #if SUPPORT_NAGIOS } else if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_NAGIOS)) { return nagios2uniform_rc(action, rc); #endif } else { return services_get_ocf_exitcode(action, rc); } } static int action_get_uniform_rc(svc_action_t * action) { lrmd_cmd_t *cmd = action->cb_data; #if SUPPORT_HEARTBEAT if (safe_str_eq(action->standard, PCMK_RESOURCE_CLASS_HB)) { return hb2uniform_rc(cmd->action, action->rc, action->stdout_data); } #endif return get_uniform_rc(action->standard, cmd->action, action->rc); } void notify_of_new_client(crm_client_t *new_client) { crm_client_t *client = NULL; GHashTableIter iter; xmlNode *notify = NULL; char *key = NULL; notify = create_xml_node(NULL, T_LRMD_NOTIFY); crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_NEW_CLIENT); g_hash_table_iter_init(&iter, client_connections); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & client)) { if (safe_str_eq(client->id, new_client->id)) { continue; } send_client_notify((gpointer) key, (gpointer) client, (gpointer) notify); } free_xml(notify); } static char * parse_exit_reason(const char *output) { const char *cur = NULL; const char *last = NULL; char *reason = NULL; static int cookie_len = 0; char *eol = NULL; if (output == NULL) { return NULL; } if (!cookie_len) { cookie_len = strlen(PCMK_OCF_REASON_PREFIX); } cur = strstr(output, PCMK_OCF_REASON_PREFIX); for (; cur != NULL; cur = strstr(cur, PCMK_OCF_REASON_PREFIX)) { /* skip over the cookie delimiter string */ cur += cookie_len; last = cur; } if (last == NULL) { return NULL; } /* make our own copy */ reason = calloc(1, (EXIT_REASON_MAX_LEN+1)); CRM_ASSERT(reason); /* limit reason string size */ strncpy(reason, last, EXIT_REASON_MAX_LEN); /* truncate everything after a new line */ eol = strchr(reason, '\n'); if (eol != NULL) { *eol = '\0'; } return reason; } void client_disconnect_cleanup(const char *client_id) { GHashTableIter iter; lrmd_rsc_t *rsc = NULL; char *key = NULL; g_hash_table_iter_init(&iter, rsc_list); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) { if (rsc->call_opts & lrmd_opt_drop_recurring) { /* This client is disconnecting, drop any recurring operations * it may have initiated on the resource */ cancel_all_recurring(rsc, client_id); } } } static void action_complete(svc_action_t * action) { lrmd_rsc_t *rsc; lrmd_cmd_t *cmd = action->cb_data; const char *rclass = NULL; bool goagain = false; if (!cmd) { crm_err("LRMD action (%s) completed does not match any known operations.", action->id); return; } #ifdef HAVE_SYS_TIMEB_H if (cmd->exec_rc != action->rc) { ftime(&cmd->t_rcchange); } #endif cmd->last_pid = action->pid; cmd->exec_rc = action_get_uniform_rc(action); cmd->lrmd_op_status = action->status; rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL; if (rsc && safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_SERVICE)) { rclass = resources_find_service_class(rsc->class); } else if(rsc) { rclass = rsc->class; } if (safe_str_eq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD)) { if(cmd->exec_rc == PCMK_OCF_OK && safe_str_eq(cmd->action, "start")) { /* systemd I curse thee! * * systemd returns from start actions after the start _begins_ * not after it completes. * * So we have to jump through a few hoops so that we don't * report 'complete' to the rest of pacemaker until, you know, * it's actually done. */ goagain = true; cmd->real_action = cmd->action; cmd->action = strdup("monitor"); } else if(cmd->exec_rc == PCMK_OCF_OK && safe_str_eq(cmd->action, "stop")) { goagain = true; cmd->real_action = cmd->action; cmd->action = strdup("monitor"); } else if(cmd->real_action) { /* Ok, so this is the follow up monitor action to check if start actually completed */ if(cmd->lrmd_op_status == PCMK_LRM_OP_DONE && cmd->exec_rc == PCMK_OCF_PENDING) { goagain = true; } else if(cmd->exec_rc == PCMK_OCF_OK && safe_str_eq(cmd->real_action, "stop")) { goagain = true; } else { #ifdef HAVE_SYS_TIMEB_H int time_sum = time_diff_ms(NULL, &cmd->t_first_run); int timeout_left = cmd->timeout_orig - time_sum; crm_debug("%s %s is now complete (elapsed=%dms, remaining=%dms): %s (%d)", cmd->rsc_id, cmd->real_action, time_sum, timeout_left, services_ocf_exitcode_str(cmd->exec_rc), cmd->exec_rc); cmd_original_times(cmd); #endif if(cmd->lrmd_op_status == PCMK_LRM_OP_DONE && cmd->exec_rc == PCMK_OCF_NOT_RUNNING && safe_str_eq(cmd->real_action, "stop")) { cmd->exec_rc = PCMK_OCF_OK; } } } } #if SUPPORT_NAGIOS if (rsc && safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS)) { if (safe_str_eq(cmd->action, "monitor") && cmd->interval == 0 && cmd->exec_rc == PCMK_OCF_OK) { /* Successfully executed --version for the nagios plugin */ cmd->exec_rc = PCMK_OCF_NOT_RUNNING; } else if (safe_str_eq(cmd->action, "start") && cmd->exec_rc != PCMK_OCF_OK) { goagain = true; } } #endif /* Wrapping this section in ifdef implies that systemd resources are not * fully supported on platforms without sys/timeb.h. Since timeb is * obsolete, we should eventually prefer a clock_gettime() implementation * (wrapped in its own ifdef) with timeb as a fallback. */ #ifdef HAVE_SYS_TIMEB_H if(goagain) { int time_sum = time_diff_ms(NULL, &cmd->t_first_run); int timeout_left = cmd->timeout_orig - time_sum; int delay = cmd->timeout_orig / 10; if(delay >= timeout_left && timeout_left > 20) { delay = timeout_left/2; } delay = QB_MIN(2000, delay); if (delay < timeout_left) { cmd->start_delay = delay; cmd->timeout = timeout_left; if(cmd->exec_rc == PCMK_OCF_OK) { crm_debug("%s %s may still be in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)", cmd->rsc_id, cmd->real_action, time_sum, timeout_left, delay); } else if(cmd->exec_rc == PCMK_OCF_PENDING) { crm_info("%s %s is still in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)", cmd->rsc_id, cmd->action, time_sum, timeout_left, delay); } else { crm_notice("%s %s failed '%s' (%d): re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)", cmd->rsc_id, cmd->action, services_ocf_exitcode_str(cmd->exec_rc), cmd->exec_rc, time_sum, timeout_left, delay); } cmd_reset(cmd); if(rsc) { rsc->active = NULL; } schedule_lrmd_cmd(rsc, cmd); /* Don't finalize cmd, we're not done with it yet */ return; } else { crm_notice("Giving up on %s %s (rc=%d): timeout (elapsed=%dms, remaining=%dms)", cmd->rsc_id, cmd->real_action?cmd->real_action:cmd->action, cmd->exec_rc, time_sum, timeout_left); cmd->lrmd_op_status = PCMK_LRM_OP_TIMEOUT; cmd->exec_rc = PCMK_OCF_TIMEOUT; cmd_original_times(cmd); } } #endif if (action->stderr_data) { cmd->output = strdup(action->stderr_data); cmd->exit_reason = parse_exit_reason(action->stderr_data); } else if (action->stdout_data) { cmd->output = strdup(action->stdout_data); } cmd_finalize(cmd, rsc); } static void stonith_action_complete(lrmd_cmd_t * cmd, int rc) { int recurring = cmd->interval; lrmd_rsc_t *rsc = NULL; cmd->exec_rc = get_uniform_rc(PCMK_RESOURCE_CLASS_STONITH, cmd->action, rc); rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id); if (cmd->lrmd_op_status == PCMK_LRM_OP_CANCELLED) { recurring = 0; /* do nothing */ } else if (rc == -ENODEV && safe_str_eq(cmd->action, "monitor")) { /* Not registered == inactive */ cmd->lrmd_op_status = PCMK_LRM_OP_DONE; cmd->exec_rc = PCMK_OCF_NOT_RUNNING; } else if (rc) { /* Attempt to map return codes to op status if possible */ switch (rc) { case -EPROTONOSUPPORT: cmd->lrmd_op_status = PCMK_LRM_OP_NOTSUPPORTED; break; case -ETIME: cmd->lrmd_op_status = PCMK_LRM_OP_TIMEOUT; break; default: /* TODO: This looks wrong. Status should be _DONE and exec_rc set to an error */ cmd->lrmd_op_status = PCMK_LRM_OP_ERROR; } } else { /* command successful */ cmd->lrmd_op_status = PCMK_LRM_OP_DONE; if (safe_str_eq(cmd->action, "start") && rsc) { rsc->stonith_started = 1; } } if (recurring && rsc) { if (cmd->stonith_recurring_id) { g_source_remove(cmd->stonith_recurring_id); } cmd->stonith_recurring_id = g_timeout_add(cmd->interval, stonith_recurring_op_helper, cmd); } cmd_finalize(cmd, rsc); } static void lrmd_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data) { stonith_action_complete(data->userdata, data->rc); } void stonith_connection_failed(void) { GHashTableIter iter; GList *cmd_list = NULL; GList *cmd_iter = NULL; lrmd_rsc_t *rsc = NULL; char *key = NULL; g_hash_table_iter_init(&iter, rsc_list); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) { if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH)) { if (rsc->active) { cmd_list = g_list_append(cmd_list, rsc->active); } if (rsc->recurring_ops) { cmd_list = g_list_concat(cmd_list, rsc->recurring_ops); } if (rsc->pending_ops) { cmd_list = g_list_concat(cmd_list, rsc->pending_ops); } rsc->pending_ops = rsc->recurring_ops = NULL; } } if (!cmd_list) { return; } crm_err("STONITH connection failed, finalizing %d pending operations.", g_list_length(cmd_list)); for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) { stonith_action_complete(cmd_iter->data, -ENOTCONN); } g_list_free(cmd_list); } static int lrmd_rsc_execute_stonith(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { int rc = 0; int do_monitor = 0; stonith_t *stonith_api = get_stonith_connection(); if (!stonith_api) { cmd->exec_rc = get_uniform_rc(PCMK_RESOURCE_CLASS_STONITH, cmd->action, -ENOTCONN); cmd->lrmd_op_status = PCMK_LRM_OP_ERROR; cmd_finalize(cmd, rsc); return -EUNATCH; } if (safe_str_eq(cmd->action, "start")) { char *key = NULL; char *value = NULL; stonith_key_value_t *device_params = NULL; if (cmd->params) { GHashTableIter iter; g_hash_table_iter_init(&iter, cmd->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { device_params = stonith_key_value_add(device_params, key, value); } } /* Stonith automatically registers devices from the IPC when changes occur, * but to avoid a possible race condition between stonith receiving the IPC update * and the lrmd requesting that resource, the lrmd still registers the device as well. * Stonith knows how to handle duplicate device registrations correctly. */ rc = stonith_api->cmds->register_device(stonith_api, st_opt_sync_call, cmd->rsc_id, rsc->provider, rsc->type, device_params); stonith_key_value_freeall(device_params, 1, 1); if (rc == 0) { do_monitor = 1; } } else if (safe_str_eq(cmd->action, "stop")) { rc = stonith_api->cmds->remove_device(stonith_api, st_opt_sync_call, cmd->rsc_id); rsc->stonith_started = 0; } else if (safe_str_eq(cmd->action, "monitor")) { if (cmd->interval) { do_monitor = 1; } else { rc = rsc->stonith_started ? 0 : -ENODEV; } } if (!do_monitor) { goto cleanup_stonith_exec; } rc = stonith_api->cmds->monitor(stonith_api, 0, cmd->rsc_id, cmd->timeout / 1000); rc = stonith_api->cmds->register_callback(stonith_api, rc, 0, 0, cmd, "lrmd_stonith_callback", lrmd_stonith_callback); /* don't cleanup yet, we will find out the result of the monitor later */ if (rc > 0) { rsc->active = cmd; return rc; } else if (rc == 0) { rc = -1; } cleanup_stonith_exec: stonith_action_complete(cmd, rc); return rc; } static int lrmd_rsc_execute_service_lib(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { svc_action_t *action = NULL; GHashTable *params_copy = NULL; CRM_ASSERT(rsc); CRM_ASSERT(cmd); crm_trace("Creating action, resource:%s action:%s class:%s provider:%s agent:%s", rsc->rsc_id, cmd->action, rsc->class, rsc->provider, rsc->type); #if SUPPORT_NAGIOS /* Recurring operations are cancelled anyway for a stop operation */ if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS) && safe_str_eq(cmd->action, "stop")) { cmd->exec_rc = PCMK_OCF_OK; goto exec_done; } #endif if (cmd->params) { params_copy = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if (params_copy != NULL) { g_hash_table_foreach(cmd->params, dup_attr, params_copy); } } - if (cmd->isolation_wrapper) { + if (safe_str_eq(rsc->class, PCMK_ALERT_CLASS)) { + /* In the case of Alert, lrmd always set rsc->type from CRM_alert_path parameter. */ + void *value_lookup = g_hash_table_lookup(params_copy, CRM_ALERT_KEY_PATH); + if (value_lookup != NULL) { + action = services_action_create_generic((char*)value_lookup, NULL); + action->action = strdup(cmd->action); + action->timeout = cmd->timeout; + action->id = strdup(rsc->rsc_id); + action->alert_params = params_copy; + + value_lookup = g_hash_table_lookup(params_copy, CRM_ALERT_NODE_SEQUENCE); + if (value_lookup != NULL) { + action->sequence = crm_atoi(value_lookup, ""); + } + } + } else if (cmd->isolation_wrapper) { g_hash_table_remove(params_copy, "CRM_meta_isolation_wrapper"); action = resources_action_create(rsc->rsc_id, PCMK_RESOURCE_CLASS_OCF, LRMD_ISOLATION_PROVIDER, cmd->isolation_wrapper, cmd->action, /*action will be normalized in wrapper*/ cmd->interval, cmd->timeout, params_copy, cmd->service_flags); } else { action = resources_action_create(rsc->rsc_id, rsc->class, rsc->provider, rsc->type, normalize_action_name(rsc, cmd->action), cmd->interval, cmd->timeout, params_copy, cmd->service_flags); } if (!action) { crm_err("Failed to create action, action:%s on resource %s", cmd->action, rsc->rsc_id); cmd->lrmd_op_status = PCMK_LRM_OP_ERROR; goto exec_done; } action->cb_data = cmd; /* 'cmd' may not be valid after this point if * services_action_async() returned TRUE * * Upstart and systemd both synchronously determine monitor/status * results and call action_complete (which may free 'cmd') if necessary. */ if (services_action_async(action, action_complete)) { return TRUE; } cmd->exec_rc = action->rc; if(action->status != PCMK_LRM_OP_DONE) { cmd->lrmd_op_status = action->status; } else { cmd->lrmd_op_status = PCMK_LRM_OP_ERROR; } services_action_free(action); action = NULL; exec_done: cmd_finalize(cmd, rsc); return TRUE; } static gboolean lrmd_rsc_execute(lrmd_rsc_t * rsc) { lrmd_cmd_t *cmd = NULL; CRM_CHECK(rsc != NULL, return FALSE); if (rsc->active) { crm_trace("%s is still active", rsc->rsc_id); return TRUE; } if (rsc->pending_ops) { GList *first = rsc->pending_ops; cmd = first->data; if (cmd->delay_id) { crm_trace ("Command %s %s was asked to run too early, waiting for start_delay timeout of %dms", cmd->rsc_id, cmd->action, cmd->start_delay); return TRUE; } rsc->pending_ops = g_list_remove_link(rsc->pending_ops, first); g_list_free_1(first); #ifdef HAVE_SYS_TIMEB_H if (cmd->t_first_run.time == 0) { ftime(&cmd->t_first_run); } ftime(&cmd->t_run); #endif } if (!cmd) { crm_trace("Nothing further to do for %s", rsc->rsc_id); return TRUE; } rsc->active = cmd; /* only one op at a time for a rsc */ if (cmd->interval) { rsc->recurring_ops = g_list_append(rsc->recurring_ops, cmd); } log_execute(cmd); if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH)) { lrmd_rsc_execute_stonith(rsc, cmd); } else { lrmd_rsc_execute_service_lib(rsc, cmd); } return TRUE; } static gboolean lrmd_rsc_dispatch(gpointer user_data) { return lrmd_rsc_execute(user_data); } void free_rsc(gpointer data) { GListPtr gIter = NULL; lrmd_rsc_t *rsc = data; int is_stonith = safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH); gIter = rsc->pending_ops; while (gIter != NULL) { GListPtr next = gIter->next; lrmd_cmd_t *cmd = gIter->data; /* command was never executed */ cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED; cmd_finalize(cmd, NULL); gIter = next; } /* frees list, but not list elements. */ g_list_free(rsc->pending_ops); gIter = rsc->recurring_ops; while (gIter != NULL) { GListPtr next = gIter->next; lrmd_cmd_t *cmd = gIter->data; if (is_stonith) { cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED; /* If a stonith command is in-flight, just mark it as cancelled; * it is not safe to finalize/free the cmd until the stonith api * says it has either completed or timed out. */ if (rsc->active != cmd) { cmd_finalize(cmd, NULL); } } else { /* This command is already handed off to service library, * let service library cancel it and tell us via the callback * when it is cancelled. The rsc can be safely destroyed * even if we are waiting for the cancel result */ services_action_cancel(rsc->rsc_id, normalize_action_name(rsc, cmd->action), cmd->interval); } gIter = next; } /* frees list, but not list elements. */ g_list_free(rsc->recurring_ops); free(rsc->rsc_id); free(rsc->class); free(rsc->provider); free(rsc->type); mainloop_destroy_trigger(rsc->work); free(rsc); } static int process_lrmd_signon(crm_client_t * client, uint32_t id, xmlNode * request) { xmlNode *reply = create_xml_node(NULL, "reply"); const char *is_ipc_provider = crm_element_value(request, F_LRMD_IS_IPC_PROVIDER); const char *protocol_version = crm_element_value(request, F_LRMD_PROTOCOL_VERSION); if (compare_version(protocol_version, LRMD_PROTOCOL_VERSION) < 0) { crm_err("Cluster API version must be greater than or equal to %s, not %s", LRMD_PROTOCOL_VERSION, protocol_version); crm_xml_add_int(reply, F_LRMD_RC, -EPROTO); crm_xml_add(reply, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION); } crm_xml_add(reply, F_LRMD_OPERATION, CRM_OP_REGISTER); crm_xml_add(reply, F_LRMD_CLIENTID, client->id); crm_xml_add(reply, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION); lrmd_server_send_reply(client, id, reply); if (crm_is_true(is_ipc_provider)) { /* this is a remote connection from a cluster nodes crmd */ #ifdef SUPPORT_REMOTE ipc_proxy_add_provider(client); #endif } free_xml(reply); return pcmk_ok; } static int process_lrmd_rsc_register(crm_client_t * client, uint32_t id, xmlNode * request) { int rc = pcmk_ok; lrmd_rsc_t *rsc = build_rsc_from_xml(request); lrmd_rsc_t *dup = g_hash_table_lookup(rsc_list, rsc->rsc_id); if (dup && safe_str_eq(rsc->class, dup->class) && safe_str_eq(rsc->provider, dup->provider) && safe_str_eq(rsc->type, dup->type)) { crm_warn("Can't add, RSC '%s' already present in the rsc list (%d active resources)", rsc->rsc_id, g_hash_table_size(rsc_list)); free_rsc(rsc); return rc; } g_hash_table_replace(rsc_list, rsc->rsc_id, rsc); crm_info("Added '%s' to the rsc list (%d active resources)", rsc->rsc_id, g_hash_table_size(rsc_list)); return rc; } static void process_lrmd_get_rsc_info(crm_client_t * client, uint32_t id, xmlNode * request) { int rc = pcmk_ok; int send_rc = 0; int call_id = 0; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); xmlNode *reply = NULL; lrmd_rsc_t *rsc = NULL; crm_element_value_int(request, F_LRMD_CALLID, &call_id); if (!rsc_id) { rc = -ENODEV; goto get_rsc_done; } if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) { crm_info("Resource '%s' not found (%d active resources)", rsc_id, g_hash_table_size(rsc_list)); rc = -ENODEV; goto get_rsc_done; } get_rsc_done: reply = create_xml_node(NULL, T_LRMD_REPLY); crm_xml_add(reply, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add_int(reply, F_LRMD_RC, rc); crm_xml_add_int(reply, F_LRMD_CALLID, call_id); if (rsc) { crm_xml_add(reply, F_LRMD_RSC_ID, rsc->rsc_id); crm_xml_add(reply, F_LRMD_CLASS, rsc->class); crm_xml_add(reply, F_LRMD_PROVIDER, rsc->provider); crm_xml_add(reply, F_LRMD_TYPE, rsc->type); } send_rc = lrmd_server_send_reply(client, id, reply); if (send_rc < 0) { crm_warn("LRMD reply to %s failed: %d", client->name, send_rc); } free_xml(reply); } static int process_lrmd_rsc_unregister(crm_client_t * client, uint32_t id, xmlNode * request) { int rc = pcmk_ok; lrmd_rsc_t *rsc = NULL; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); if (!rsc_id) { return -ENODEV; } if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) { crm_info("Resource '%s' not found (%d active resources)", rsc_id, g_hash_table_size(rsc_list)); return pcmk_ok; } if (rsc->active) { /* let the caller know there are still active ops on this rsc to watch for */ crm_trace("Operation still in progress: %p", rsc->active); rc = -EINPROGRESS; } g_hash_table_remove(rsc_list, rsc_id); return rc; } static int process_lrmd_rsc_exec(crm_client_t * client, uint32_t id, xmlNode * request) { lrmd_rsc_t *rsc = NULL; lrmd_cmd_t *cmd = NULL; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); int call_id; if (!rsc_id) { return -EINVAL; } if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) { crm_info("Resource '%s' not found (%d active resources)", rsc_id, g_hash_table_size(rsc_list)); return -ENODEV; } cmd = create_lrmd_cmd(request, client, rsc); call_id = cmd->call_id; /* Don't reference cmd after handing it off to be scheduled. * The cmd could get merged and freed. */ schedule_lrmd_cmd(rsc, cmd); return call_id; } +static int +process_lrmd_alert_exec(crm_client_t * client, uint32_t id, xmlNode * request) +{ + lrmd_rsc_t *alert = NULL; + lrmd_cmd_t *cmd = NULL; + xmlNode *alert_xml = get_xpath_object("//" F_LRMD_ALERT, request, LOG_ERR); + const char *alert_id = crm_element_value(alert_xml, F_LRMD_ALERT_ID); + int call_id; + + if (!alert_id) { + return -EINVAL; + } + + alert = g_hash_table_lookup(rsc_list, alert_id); + if (alert == NULL) { + crm_info("Alert '%s' not found (%d active resources)", + alert_id, g_hash_table_size(rsc_list)); + return -ENODEV; + } + + call_id = pcmk_ok; + cmd = create_alert_cmd(request, client, alert); + call_id = cmd->call_id; + + /* Don't reference cmd after handing it off to be scheduled. + * The cmd could get merged and freed. */ + schedule_lrmd_cmd(alert, cmd); + + return call_id; +} + static int cancel_op(const char *rsc_id, const char *action, int interval) { GListPtr gIter = NULL; lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, rsc_id); /* How to cancel an action. * 1. Check pending ops list, if it hasn't been handed off * to the service library or stonith recurring list remove * it there and that will stop it. * 2. If it isn't in the pending ops list, then it's either a * recurring op in the stonith recurring list, or the service * library's recurring list. Stop it there * 3. If not found in any lists, then this operation has either * been executed already and is not a recurring operation, or * never existed. */ if (!rsc) { return -ENODEV; } for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) { lrmd_cmd_t *cmd = gIter->data; if (safe_str_eq(cmd->action, action) && cmd->interval == interval) { cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED; cmd_finalize(cmd, rsc); return pcmk_ok; } } if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH)) { /* The service library does not handle stonith operations. * We have to handle recurring stonith operations ourselves. */ for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) { lrmd_cmd_t *cmd = gIter->data; if (safe_str_eq(cmd->action, action) && cmd->interval == interval) { cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED; if (rsc->active != cmd) { cmd_finalize(cmd, rsc); } return pcmk_ok; } } } else if (services_action_cancel(rsc_id, normalize_action_name(rsc, action), interval) == TRUE) { /* The service library will tell the action_complete callback function * this action was cancelled, which will destroy the cmd and remove * it from the recurring_op list. Do not do that in this function * if the service library says it cancelled it. */ return pcmk_ok; } return -EOPNOTSUPP; } static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id) { GList *cmd_list = NULL; GList *cmd_iter = NULL; /* Notice a copy of each list is created when concat is called. * This prevents odd behavior from occurring when the cmd_list * is iterated through later on. It is possible the cancel_op * function may end up modifying the recurring_ops and pending_ops * lists. If we did not copy those lists, our cmd_list iteration * could get messed up.*/ if (rsc->recurring_ops) { cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->recurring_ops)); } if (rsc->pending_ops) { cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->pending_ops)); } if (!cmd_list) { return; } for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) { lrmd_cmd_t *cmd = cmd_iter->data; if (cmd->interval == 0) { continue; } if (client_id && safe_str_neq(cmd->client_id, client_id)) { continue; } cancel_op(rsc->rsc_id, cmd->action, cmd->interval); } /* frees only the copied list data, not the cmds */ g_list_free(cmd_list); } static int process_lrmd_rsc_cancel(crm_client_t * client, uint32_t id, xmlNode * request) { xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); const char *action = crm_element_value(rsc_xml, F_LRMD_RSC_ACTION); int interval = 0; crm_element_value_int(rsc_xml, F_LRMD_RSC_INTERVAL, &interval); if (!rsc_id || !action) { return -EINVAL; } return cancel_op(rsc_id, action, interval); } void process_lrmd_message(crm_client_t * client, uint32_t id, xmlNode * request) { int rc = pcmk_ok; int call_id = 0; const char *op = crm_element_value(request, F_LRMD_OPERATION); int do_reply = 0; int do_notify = 0; crm_trace("Processing %s operation from %s", op, client->id); crm_element_value_int(request, F_LRMD_CALLID, &call_id); if (crm_str_eq(op, CRM_OP_IPC_FWD, TRUE)) { #ifdef SUPPORT_REMOTE ipc_proxy_forward_client(client, request); #endif do_reply = 1; } else if (crm_str_eq(op, CRM_OP_REGISTER, TRUE)) { rc = process_lrmd_signon(client, id, request); } else if (crm_str_eq(op, LRMD_OP_RSC_REG, TRUE)) { rc = process_lrmd_rsc_register(client, id, request); do_notify = 1; do_reply = 1; } else if (crm_str_eq(op, LRMD_OP_RSC_INFO, TRUE)) { process_lrmd_get_rsc_info(client, id, request); } else if (crm_str_eq(op, LRMD_OP_RSC_UNREG, TRUE)) { rc = process_lrmd_rsc_unregister(client, id, request); /* don't notify anyone about failed un-registers */ if (rc == pcmk_ok || rc == -EINPROGRESS) { do_notify = 1; } do_reply = 1; } else if (crm_str_eq(op, LRMD_OP_RSC_EXEC, TRUE)) { rc = process_lrmd_rsc_exec(client, id, request); do_reply = 1; } else if (crm_str_eq(op, LRMD_OP_RSC_CANCEL, TRUE)) { rc = process_lrmd_rsc_cancel(client, id, request); do_reply = 1; } else if (crm_str_eq(op, LRMD_OP_POKE, TRUE)) { do_notify = 1; do_reply = 1; } else if (crm_str_eq(op, LRMD_OP_CHECK, TRUE)) { xmlNode *data = get_message_xml(request, F_LRMD_CALLDATA); const char *timeout = crm_element_value(data, F_LRMD_WATCHDOG); CRM_LOG_ASSERT(data != NULL); check_sbd_timeout(timeout); + } else if (crm_str_eq(op, LRMD_OP_ALERT_EXEC, TRUE)) { + rc = process_lrmd_alert_exec(client, id, request); + do_reply = 1; } else { rc = -EOPNOTSUPP; do_reply = 1; crm_err("Unknown %s from %s", op, client->name); crm_log_xml_warn(request, "UnknownOp"); } crm_debug("Processed %s operation from %s: rc=%d, reply=%d, notify=%d", op, client->id, rc, do_reply, do_notify); if (do_reply) { send_reply(client, rc, id, call_id); } if (do_notify) { send_generic_notify(rc, request); } }