diff --git a/attrd/attrd_alerts.c b/attrd/attrd_alerts.c index 58562e8c14..c95360b0c1 100644 --- a/attrd/attrd_alerts.c +++ b/attrd/attrd_alerts.c @@ -1,380 +1,385 @@ /* * Copyright (C) 2015 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include "attrd_alerts.h" #include #include #include #include -GHashTable *alert_info_cache = NULL; +static GListPtr attrd_alert_list = NULL; +static GHashTable *alert_info_cache = NULL; lrmd_t * attrd_lrmd_connect(int max_retry, void callback(lrmd_event_data_t * op)) { int ret = -ENOTCONN; int fails = 0; if (!the_lrmd) { the_lrmd = lrmd_api_new(); } while(fails < max_retry) { the_lrmd->cmds->set_callback(the_lrmd, callback); ret = the_lrmd->cmds->connect(the_lrmd, T_ATTRD, NULL); if (ret != pcmk_ok) { fails++; crm_trace("lrmd_connect RETRY!(%d)", fails); } else { crm_trace("lrmd_connect OK!"); break; } } if (ret != pcmk_ok) { if (the_lrmd->cmds->is_connected(the_lrmd)) { lrmd_api_delete(the_lrmd); } the_lrmd = NULL; } return the_lrmd; } static void config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { crm_time_t *now = crm_time_new(NULL); xmlNode *crmalerts = NULL; if (rc != pcmk_ok) { crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc)); goto bail; } crmalerts = output; if ((crmalerts) && (crm_element_name(crmalerts)) && (strcmp(crm_element_name(crmalerts), XML_CIB_TAG_ALERTS) != 0)) { crmalerts = first_named_child(crmalerts, XML_CIB_TAG_ALERTS); } if (!crmalerts) { crm_err("Local CIB query for " XML_CIB_TAG_ALERTS " section failed"); goto bail; } - pe_unpack_alerts(crmalerts); + pe_free_alert_list(attrd_alert_list); + attrd_alert_list = pe_unpack_alerts(crmalerts); bail: crm_time_free(now); } gboolean attrd_read_options(gpointer user_data) { int call_id; if (the_cib) { call_id = the_cib->cmds->query(the_cib, "//" XML_CIB_TAG_ALERTS, NULL, cib_xpath | cib_scope_local); the_cib->cmds->register_callback_full(the_cib, call_id, 120, FALSE, NULL, "config_query_callback", config_query_callback, free); crm_trace("Querying the CIB... call %d", call_id); } else { crm_err("Querying the CIB...CIB connection not active"); } return TRUE; } void attrd_cib_updated_cb(const char *event, xmlNode * msg) { int rc = -1; int format= 1; xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); xmlNode *change = NULL; xmlXPathObject *xpathObj = NULL; CRM_CHECK(msg != NULL, return); crm_element_value_int(msg, F_CIB_RC, &rc); if (rc < pcmk_ok) { crm_trace("Filter rc=%d (%s)", rc, pcmk_strerror(rc)); return; } crm_element_value_int(patchset, "format", &format); if (format == 1) { if ((xpathObj = xpath_search( msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_ALERTS )) != NULL) { freeXpathObject(xpathObj); mainloop_set_trigger(attrd_config_read); } } else if (format == 2) { for (change = __xml_first_child(patchset); change != NULL; change = __xml_next(change)) { const char *xpath = crm_element_value(change, XML_DIFF_PATH); if (xpath == NULL) { continue; } /* modifying properties */ if (!strstr(xpath, "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_ALERTS)) { xmlNode *section = NULL; const char *name = NULL; /* adding notifications section */ if ((strcmp(xpath, "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION) != 0) || ((section = __xml_first_child(change)) == NULL) || ((name = crm_element_name(section)) == NULL) || (strcmp(name, XML_CIB_TAG_ALERTS) != 0)) { continue; } } mainloop_set_trigger(attrd_config_read); break; } } else { crm_warn("Unknown patch format: %d", format); } } void attrd_alert_fini() { if (alert_info_cache) { g_hash_table_destroy(alert_info_cache); alert_info_cache = NULL; } if (crm_alert_kind_default) { g_strfreev(crm_alert_kind_default); crm_alert_kind_default = NULL; } } static int -exec_alerts(lrmd_t *lrmd, const char *kind, const char *attribute_name, lrmd_key_value_t * params, GListPtr alert_list, GHashTable *info_cache) +exec_alerts(lrmd_t *lrmd, const char *kind, const char *attribute_name, + lrmd_key_value_t *params) { int call_id = 0; static int operations = 0; GListPtr l; crm_time_hr_t *now = crm_time_hr_new(NULL); params = lrmd_set_alert_key_to_lrmd_params(params, CRM_alert_kind, kind); params = lrmd_set_alert_key_to_lrmd_params(params, CRM_alert_version, VERSION); - for (l = g_list_first(alert_list); l; l = g_list_next(l)) { + for (l = g_list_first(attrd_alert_list); l; l = g_list_next(l)) { lrmd_rsc_info_t *rsc = NULL; crm_alert_entry_t *entry = (crm_alert_entry_t *)(l->data); char *timestamp = crm_time_format_hr(entry->tstamp_format, now); lrmd_key_value_t * copy_params = NULL; lrmd_key_value_t *head, *p; if (crm_is_target_alert(entry->select_kind == NULL ? crm_alert_kind_default : entry->select_kind, kind) == FALSE) { crm_trace("Cannot sending '%s' alert to '%s' via '%s'(select_kind=%s)", kind, entry->recipient, entry->path, entry->select_kind == NULL ? CRM_ALERT_KIND_DEFAULT : entry->select_kind_orig); free(timestamp); continue; } if (crm_is_target_alert(entry->select_attribute_name, attribute_name) == FALSE) { crm_trace("Cannot sending '%s' alert to '%s' via '%s'(select_attribute_name=%s attribute_name=%s)", kind, entry->recipient, entry->path, entry->select_attribute_name_orig, attribute_name); free(timestamp); continue; } crm_info("Sending '%s' alert to '%s' via '%s'", kind, entry->recipient, entry->path); rsc = g_hash_table_lookup(alert_info_cache, entry->id); if (rsc == NULL) { rsc = lrmd->cmds->get_rsc_info(lrmd, entry->id, 0); if (!rsc) { lrmd->cmds->register_rsc(lrmd, entry->id, PCMK_ALERT_CLASS, "pacemaker", entry->path, lrmd_opt_drop_recurring); rsc = lrmd->cmds->get_rsc_info(lrmd, entry->id, 0); if (!rsc) { crm_err("Could not add alert %s : %s", entry->id, entry->path); return -1; } /* cache the result */ g_hash_table_insert(alert_info_cache, entry->id, rsc); } } /* Because there is a parameter to turn into every transmission, Copy a parameter. */ head = params; while (head) { p = head->next; copy_params = lrmd_key_value_add(copy_params, head->key, head->value); head = p; } operations++; copy_params = lrmd_key_value_add(copy_params, CRM_ALERT_KEY_PATH, entry->path); copy_params = lrmd_set_alert_key_to_lrmd_params(copy_params, CRM_alert_recipient, entry->recipient); copy_params = lrmd_set_alert_key_to_lrmd_params(copy_params, CRM_alert_node_sequence, crm_itoa(operations)); copy_params = lrmd_set_alert_key_to_lrmd_params(copy_params, CRM_alert_timestamp, timestamp); lrmd_set_alert_envvar_to_lrmd_params(copy_params, entry); call_id = lrmd->cmds->exec_alert(lrmd, strdup(entry->id), entry->timeout, lrmd_opt_notify_orig_only, copy_params); if (call_id <= 0) { crm_err("Operation %s on %s failed: %d", "start", rsc->id, call_id); } else { crm_info("Operation %s on %s compete: %d", "start", rsc->id, call_id); } free(timestamp); } if (now) { free(now); } return call_id; } static void free_alert_info(gpointer value) { lrmd_rsc_info_t *rsc_info = value; lrmd_free_rsc_info(rsc_info); } static void attrd_alert_lrm_op_callback(lrmd_event_data_t * op) { CRM_CHECK(op != NULL, return); if (op->type == lrmd_event_disconnect) { crm_info("Lost connection to LRMD service!"); if (the_lrmd->cmds->is_connected(the_lrmd)) { the_lrmd->cmds->disconnect(the_lrmd); lrmd_api_delete(the_lrmd); } the_lrmd = NULL; return; } else if (op->type != lrmd_event_exec_complete) { return; } if (op->params != NULL) { void *value_tmp1, *value_tmp2; value_tmp1 = g_hash_table_lookup(op->params, CRM_ALERT_KEY_PATH); if (value_tmp1 != NULL) { value_tmp2 = g_hash_table_lookup(op->params, CRM_ALERT_NODE_SEQUENCE); if(op->rc == 0) { crm_info("Alert %s (%s) complete", value_tmp2, value_tmp1); } else { crm_warn("Alert %s (%s) failed: %d", value_tmp2, value_tmp1, op->rc); } } } } -int -attrd_send_alerts(lrmd_t *lrmd, const char *node, uint32_t nodeid, const char *attribute_name, const char *attribute_value, GListPtr alert_list) +int +attrd_send_alerts(lrmd_t *lrmd, const char *node, uint32_t nodeid, + const char *attribute_name, const char *attribute_value) { int ret = pcmk_ok; lrmd_key_value_t *params = NULL; if (lrmd == NULL) { lrmd = attrd_lrmd_connect(10, attrd_alert_lrm_op_callback); if (lrmd == NULL) { crm_warn("LRMD connection not active"); return ret; } } crm_trace("LRMD connection active"); if (alert_info_cache == NULL) { alert_info_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_alert_info); } params = lrmd_set_alert_key_to_lrmd_params(params, CRM_alert_node, node); params = lrmd_set_alert_key_to_lrmd_params(params, CRM_alert_nodeid, crm_itoa(nodeid)); params = lrmd_set_alert_key_to_lrmd_params(params, CRM_alert_attribute_name, attribute_name); params = lrmd_set_alert_key_to_lrmd_params(params, CRM_alert_attribute_value, attribute_value == NULL ? "null" : attribute_value); - ret = exec_alerts(lrmd, "attribute", attribute_name, params, alert_list, alert_info_cache); + ret = exec_alerts(lrmd, "attribute", attribute_name, params); crm_trace("ret : %d, node : %s, nodeid: %s, name: %s, value : %s", ret, node, crm_itoa(nodeid), attribute_name, attribute_value); if (params) { lrmd_key_value_freeall(params); } return ret; } #if HAVE_ATOMIC_ATTRD void set_alert_attribute_value(GHashTable *t, attribute_value_t *v) { attribute_value_t *a_v = NULL; a_v = calloc(1, sizeof(attribute_value_t)); CRM_ASSERT(a_v != NULL); a_v->nodeid = v->nodeid; a_v->nodename = strdup(v->nodename); if (v->current != NULL) { a_v->current = strdup(v->current); } g_hash_table_replace(t, a_v->nodename, a_v); } void send_alert_attributes_value(attribute_t *a, GHashTable *t) { int call_id = 0; attribute_value_t *at = NULL; GHashTableIter vIter; g_hash_table_iter_init(&vIter, t); while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & at)) { - call_id = attrd_send_alerts(the_lrmd, at->nodename, at->nodeid, a->id, at->current, crm_alert_list); + call_id = attrd_send_alerts(the_lrmd, at->nodename, at->nodeid, a->id, + at->current); crm_trace("call_id : %d, nodename : %s, nodeid: %d, name: %s, value : %s", call_id, at->nodename, at->nodeid, a->id, at->current); } } #endif diff --git a/attrd/attrd_alerts.h b/attrd/attrd_alerts.h index ecd50e1e0f..2879b0e8f1 100644 --- a/attrd/attrd_alerts.h +++ b/attrd/attrd_alerts.h @@ -1,41 +1,42 @@ /* * Copyright (C) 2015 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef ATTRD_ALERT__H # define ATTRD_ALERT__H # include # include # include extern cib_t *the_cib; extern lrmd_t *the_lrmd; extern crm_trigger_t *attrd_config_read; lrmd_t *attrd_lrmd_connect(int max_retry, void callback(lrmd_event_data_t * op)); gboolean attrd_read_options(gpointer user_data); void attrd_cib_updated_cb(const char *event, xmlNode * msg); void attrd_enable_alerts(const char *script, const char *target); void attrd_alert_fini(void); -int attrd_send_alerts(lrmd_t *lrmd, const char *node, uint32_t nodeid, const char *attribute_name, const char *attribute_value, GListPtr alert_list); +int attrd_send_alerts(lrmd_t *lrmd, const char *node, uint32_t nodeid, + const char *attribute_name, const char *attribute_value); #if HAVE_ATOMIC_ATTRD void set_alert_attribute_value(GHashTable *t, attribute_value_t *v); void send_alert_attributes_value(attribute_t *a, GHashTable *t); #endif #endif diff --git a/attrd/legacy.c b/attrd/legacy.c index 447deecbdf..bb95bb7941 100644 --- a/attrd/legacy.c +++ b/attrd/legacy.c @@ -1,1250 +1,1252 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "attrd_alerts.h" #define OPTARGS "hV" #if SUPPORT_HEARTBEAT ll_cluster_t *attrd_cluster_conn; #endif char *attrd_uname = NULL; char *attrd_uuid = NULL; uint32_t attrd_nodeid = 0; GHashTable *attr_hash = NULL; cib_t *the_cib = NULL; lrmd_t *the_lrmd = NULL; crm_trigger_t *attrd_config_read = NULL; /* Convenience macro for registering a CIB callback. * Check the_cib != NULL before using. */ #define register_cib_callback(call_id, data, fn, free_fn) \ the_cib->cmds->register_callback_full(the_cib, call_id, 120, FALSE, \ data, #fn, fn, free_fn) typedef struct attr_hash_entry_s { char *uuid; char *id; char *set; char *section; char *value; char *stored_value; int timeout; char *dampen; guint timer_id; char *user; } attr_hash_entry_t; void attrd_local_callback(xmlNode * msg); gboolean attrd_timer_callback(void *user_data); gboolean attrd_trigger_update(attr_hash_entry_t * hash_entry); void attrd_perform_update(attr_hash_entry_t * hash_entry); static void update_local_attr(xmlNode *msg, attr_hash_entry_t *hash_entry); static void free_hash_entry(gpointer data) { attr_hash_entry_t *entry = data; if (entry == NULL) { return; } free(entry->id); free(entry->set); free(entry->dampen); free(entry->section); free(entry->uuid); free(entry->value); free(entry->stored_value); free(entry->user); free(entry); } /* Exit code means? */ static int32_t attrd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; crm_client_t *client = crm_client_get(c); xmlNode *msg = crm_ipcs_recv(client, data, size, &id, &flags); crm_ipcs_send_ack(client, id, flags, "ack", __FUNCTION__, __LINE__); if (msg == NULL) { crm_debug("No msg from %d (%p)", crm_ipcs_client_pid(c), c); return 0; } #if ENABLE_ACL CRM_ASSERT(client->user != NULL); crm_acl_get_set_user(msg, F_ATTRD_USER, client->user); #endif crm_trace("Processing msg from %d (%p)", crm_ipcs_client_pid(c), c); crm_log_xml_trace(msg, __FUNCTION__); attrd_local_callback(msg); free_xml(msg); return 0; } static void usage(const char *cmd, int exit_status) { FILE *stream; stream = exit_status ? stderr : stdout; fprintf(stream, "usage: %s [-srkh] [-c configure file]\n", cmd); /* fprintf(stream, "\t-d\tsets debug level\n"); */ /* fprintf(stream, "\t-s\tgets daemon status\n"); */ /* fprintf(stream, "\t-r\trestarts daemon\n"); */ /* fprintf(stream, "\t-k\tstops daemon\n"); */ /* fprintf(stream, "\t-h\thelp message\n"); */ fflush(stream); crm_exit(exit_status); } static void stop_attrd_timer(attr_hash_entry_t * hash_entry) { if (hash_entry != NULL && hash_entry->timer_id != 0) { crm_trace("Stopping %s timer", hash_entry->id); g_source_remove(hash_entry->timer_id); hash_entry->timer_id = 0; } } static void log_hash_entry(int level, attr_hash_entry_t * entry, const char *text) { do_crm_log(level, "%s: Set: %s, Name: %s, Value: %s, Timeout: %s", text, entry->section, entry->id, entry->value, entry->dampen); } static attr_hash_entry_t * find_hash_entry(xmlNode * msg) { const char *value = NULL; const char *attr = crm_element_value(msg, F_ATTRD_ATTRIBUTE); attr_hash_entry_t *hash_entry = NULL; if (attr == NULL) { crm_info("Ignoring message with no attribute name"); return NULL; } hash_entry = g_hash_table_lookup(attr_hash, attr); if (hash_entry == NULL) { /* create one and add it */ crm_info("Creating hash entry for %s", attr); hash_entry = calloc(1, sizeof(attr_hash_entry_t)); hash_entry->id = strdup(attr); g_hash_table_insert(attr_hash, hash_entry->id, hash_entry); hash_entry = g_hash_table_lookup(attr_hash, attr); CRM_CHECK(hash_entry != NULL, return NULL); } value = crm_element_value(msg, F_ATTRD_SET); if (value != NULL) { free(hash_entry->set); hash_entry->set = strdup(value); crm_debug("\t%s->set: %s", attr, value); } value = crm_element_value(msg, F_ATTRD_SECTION); if (value == NULL) { value = XML_CIB_TAG_STATUS; } free(hash_entry->section); hash_entry->section = strdup(value); crm_trace("\t%s->section: %s", attr, value); value = crm_element_value(msg, F_ATTRD_DAMPEN); if (value != NULL) { free(hash_entry->dampen); hash_entry->dampen = strdup(value); hash_entry->timeout = crm_get_msec(value); crm_trace("\t%s->timeout: %s", attr, value); } #if ENABLE_ACL free(hash_entry->user); hash_entry->user = NULL; value = crm_element_value(msg, F_ATTRD_USER); if (value != NULL) { hash_entry->user = strdup(value); crm_trace("\t%s->user: %s", attr, value); } #endif log_hash_entry(LOG_DEBUG_2, hash_entry, "Found (and updated) entry:"); return hash_entry; } /*! * \internal * \brief Clear failure-related attributes for local node * * \param[in] xml XML of ATTRD_OP_CLEAR_FAILURE request */ static void local_clear_failure(xmlNode *xml) { const char *rsc = crm_element_value(xml, F_ATTRD_RESOURCE); const char *what = rsc? rsc : "all resources"; const char *op = crm_element_value(xml, F_ATTRD_OPERATION); const char *interval_s = crm_element_value(xml, F_ATTRD_INTERVAL); int interval = crm_get_interval(interval_s); regex_t regex; GHashTableIter iter; attr_hash_entry_t *hash_entry = NULL; if (attrd_failure_regex(®ex, rsc, op, interval) != pcmk_ok) { crm_info("Ignoring invalid request to clear %s", (rsc? rsc : "all resources")); return; } crm_debug("Clearing %s locally", what); /* Make sure value is not set, so we delete */ if (crm_element_value(xml, F_ATTRD_VALUE)) { crm_xml_replace(xml, F_ATTRD_VALUE, NULL); } g_hash_table_iter_init(&iter, attr_hash); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &hash_entry)) { if (regexec(®ex, hash_entry->id, 0, NULL, 0) == 0) { crm_trace("Matched %s when clearing %s", hash_entry->id, what); update_local_attr(xml, hash_entry); } } regfree(®ex); } static void remote_clear_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { if (rc == 0) { crm_debug("Successfully cleared failures using %s", (char *) user_data); } else { crm_notice("Failed to clear failures: %s " CRM_XS " call=%d xpath=%s rc=%d", pcmk_strerror(rc), call_id, (char *) user_data, rc); } } /* xpath component to match an id attribute (format takes remote node name) */ #define XPATH_ID "[@" XML_ATTR_UUID "='%s']" /* Define the start of an xpath to match a remote node transient attribute * (argument must be either an empty string to match for all remote nodes, * or XPATH_ID to match for a single remote node) */ #define XPATH_REMOTE_ATTR(x) "/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS \ "/" XML_CIB_TAG_STATE "[@" XML_NODE_IS_REMOTE "='true']" x \ "/" XML_TAG_TRANSIENT_NODEATTRS "/" XML_TAG_ATTR_SETS "/" XML_CIB_TAG_NVPAIR /* xpath component to match an attribute name exactly */ #define XPATH_NAME_IS(x) "@" XML_NVPAIR_ATTR_NAME "='" x "'" /* xpath component to match an attribute name by prefix */ #define XPATH_NAME_START(x) "starts-with(@" XML_NVPAIR_ATTR_NAME ", '" x "')" /* xpath ending to clear all resources */ #define XPATH_CLEAR_ALL \ "[" XPATH_NAME_START(CRM_FAIL_COUNT_PREFIX "-") \ " or " XPATH_NAME_START(CRM_LAST_FAILURE_PREFIX "-") "]" /* xpath ending to clear all operations for one resource * (format takes resource name x 4) * * @COMPAT attributes set < 1.1.17: * also match older attributes that do not have the operation part */ #define XPATH_CLEAR_ONE \ "[" XPATH_NAME_IS(CRM_FAIL_COUNT_PREFIX "-%s") \ " or " XPATH_NAME_IS(CRM_LAST_FAILURE_PREFIX "-%s") \ " or " XPATH_NAME_START(CRM_FAIL_COUNT_PREFIX "-%s#") \ " or " XPATH_NAME_START(CRM_LAST_FAILURE_PREFIX "-%s#") "]" /* xpath ending to clear one operation for one resource * (format takes resource name x 2, resource name + operation + interval x 2) * * @COMPAT attributes set < 1.1.17: * also match older attributes that do not have the operation part */ #define XPATH_CLEAR_OP \ "[" XPATH_NAME_IS(CRM_FAIL_COUNT_PREFIX "-%s") \ " or " XPATH_NAME_IS(CRM_LAST_FAILURE_PREFIX "-%s") \ " or " XPATH_NAME_IS(CRM_FAIL_COUNT_PREFIX "-%s#%s_%d") \ " or " XPATH_NAME_IS(CRM_LAST_FAILURE_PREFIX "-%s#%s_%d") "]" /*! * \internal * \brief Clear failure-related attributes for Pacemaker Remote node(s) * * \param[in] xml XML of ATTRD_OP_CLEAR_FAILURE request */ static void remote_clear_failure(xmlNode *xml) { const char *rsc = crm_element_value(xml, F_ATTRD_RESOURCE); const char *host = crm_element_value(xml, F_ATTRD_HOST); const char *op = crm_element_value(xml, F_ATTRD_OPERATION); int rc = pcmk_ok; char *xpath; if (the_cib == NULL) { crm_info("Ignoring request to clear %s on %s because not connected to CIB", (rsc? rsc : "all resources"), (host? host: "all remote nodes")); return; } /* Build an xpath to clear appropriate attributes */ if (rsc == NULL) { /* No resource specified, clear all resources */ if (host == NULL) { xpath = crm_strdup_printf(XPATH_REMOTE_ATTR("") XPATH_CLEAR_ALL); } else { xpath = crm_strdup_printf(XPATH_REMOTE_ATTR(XPATH_ID) XPATH_CLEAR_ALL, host); } } else if (op == NULL) { /* Resource but no operation specified, clear all operations */ if (host == NULL) { xpath = crm_strdup_printf(XPATH_REMOTE_ATTR("") XPATH_CLEAR_ONE, rsc, rsc, rsc, rsc); } else { xpath = crm_strdup_printf(XPATH_REMOTE_ATTR(XPATH_ID) XPATH_CLEAR_ONE, host, rsc, rsc, rsc, rsc); } } else { /* Resource and operation specified */ const char *interval_s = crm_element_value(xml, F_ATTRD_INTERVAL); int interval = crm_get_interval(interval_s); if (host == NULL) { xpath = crm_strdup_printf(XPATH_REMOTE_ATTR("") XPATH_CLEAR_OP, rsc, rsc, rsc, op, interval, rsc, op, interval); } else { xpath = crm_strdup_printf(XPATH_REMOTE_ATTR(XPATH_ID) XPATH_CLEAR_OP, host, rsc, rsc, rsc, op, interval, rsc, op, interval); } } crm_trace("Clearing attributes matching %s", xpath); rc = the_cib->cmds->delete(the_cib, xpath, NULL, cib_xpath|cib_multiple); register_cib_callback(rc, xpath, remote_clear_callback, free); } static void process_xml_request(xmlNode *xml) { attr_hash_entry_t *hash_entry = NULL; const char *from = crm_element_value(xml, F_ORIG); const char *op = crm_element_value(xml, F_ATTRD_TASK); const char *host = crm_element_value(xml, F_ATTRD_HOST); const char *ignore = crm_element_value(xml, F_ATTRD_IGNORE_LOCALLY); if (host && safe_str_eq(host, attrd_uname)) { crm_info("%s relayed from %s", (op? op : "Request"), from); attrd_local_callback(xml); } else if (safe_str_eq(op, ATTRD_OP_PEER_REMOVE)) { CRM_CHECK(host != NULL, return); crm_debug("Removing %s from peer caches for %s", host, from); crm_remote_peer_cache_remove(host); reap_crm_member(0, host); } else if (safe_str_eq(op, ATTRD_OP_CLEAR_FAILURE)) { local_clear_failure(xml); } else if ((ignore == NULL) || safe_str_neq(from, attrd_uname)) { crm_trace("%s message from %s", op, from); hash_entry = find_hash_entry(xml); stop_attrd_timer(hash_entry); attrd_perform_update(hash_entry); } } #if SUPPORT_HEARTBEAT static void attrd_ha_connection_destroy(gpointer user_data) { crm_trace("Invoked"); if (attrd_shutting_down()) { /* we signed out, so this is expected */ crm_info("Heartbeat disconnection complete"); return; } crm_crit("Lost connection to heartbeat service!"); if (attrd_mainloop_running()) { attrd_quit_mainloop(); return; } crm_exit(pcmk_ok); } static void attrd_ha_callback(HA_Message * msg, void *private_data) { xmlNode *xml = convert_ha_message(NULL, msg, __FUNCTION__); process_xml_request(xml); free_xml(xml); } #endif #if SUPPORT_COROSYNC static void attrd_cs_dispatch(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { uint32_t kind = 0; xmlNode *xml = NULL; const char *from = NULL; char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); if(data == NULL) { return; } if (kind == crm_class_cluster) { xml = string2xml(data); if (xml == NULL) { crm_err("Bad message received: '%.120s'", data); } } if (xml != NULL) { /* crm_xml_add_int(xml, F_SEQ, wrapper->id); */ crm_xml_add(xml, F_ORIG, from); process_xml_request(xml); free_xml(xml); } free(data); } static void attrd_cs_destroy(gpointer unused) { if (attrd_shutting_down()) { /* we signed out, so this is expected */ crm_info("Corosync disconnection complete"); return; } crm_crit("Lost connection to Corosync service!"); if (attrd_mainloop_running()) { attrd_quit_mainloop(); return; } crm_exit(EINVAL); } #endif static void attrd_cib_connection_destroy(gpointer user_data) { cib_t *conn = user_data; conn->cmds->signoff(conn); /* Ensure IPC is cleaned up */ if (attrd_shutting_down()) { crm_info("Connection to the CIB terminated..."); } else { /* eventually this will trigger a reconnect, not a shutdown */ crm_err("Connection to the CIB terminated..."); crm_exit(ENOTCONN); } return; } static void update_for_hash_entry(gpointer key, gpointer value, gpointer user_data) { attr_hash_entry_t *entry = value; if (entry->value != NULL || entry->stored_value != NULL) { attrd_timer_callback(value); } } static void local_update_for_hash_entry(gpointer key, gpointer value, gpointer user_data) { attr_hash_entry_t *entry = value; if (entry->timer_id == 0) { crm_trace("Performing local-only update after replace for %s", entry->id); attrd_perform_update(entry); /* } else { * just let the timer expire and attrd_timer_callback() will do the right thing */ } } static void do_cib_replaced(const char *event, xmlNode * msg) { crm_info("Updating all attributes after %s event", event); g_hash_table_foreach(attr_hash, local_update_for_hash_entry, NULL); } static gboolean cib_connect(void *user_data) { static int attempts = 1; static int max_retry = 20; gboolean was_err = FALSE; static cib_t *local_conn = NULL; if (local_conn == NULL) { local_conn = cib_new(); } if (was_err == FALSE) { int rc = -ENOTCONN; if (attempts < max_retry) { crm_debug("CIB signon attempt %d", attempts); rc = local_conn->cmds->signon(local_conn, T_ATTRD, cib_command); } if (rc != pcmk_ok && attempts > max_retry) { crm_err("Signon to CIB failed: %s", pcmk_strerror(rc)); was_err = TRUE; } else if (rc != pcmk_ok) { attempts++; return TRUE; } } crm_info("Connected to the CIB after %d signon attempts", attempts); if (was_err == FALSE) { int rc = local_conn->cmds->set_connection_dnotify(local_conn, attrd_cib_connection_destroy); if (rc != pcmk_ok) { crm_err("Could not set dnotify callback"); was_err = TRUE; } } if (was_err == FALSE) { if (pcmk_ok != local_conn->cmds->add_notify_callback(local_conn, T_CIB_REPLACE_NOTIFY, do_cib_replaced)) { crm_err("Could not set CIB notification callback"); was_err = TRUE; } if (was_err == FALSE) { if (pcmk_ok != local_conn->cmds->add_notify_callback(local_conn, T_CIB_DIFF_NOTIFY, attrd_cib_updated_cb)) { crm_err("Could not set CIB notification callback (update)"); was_err = TRUE; } } attrd_config_read = mainloop_add_trigger(G_PRIORITY_HIGH, attrd_read_options, NULL); /* Reading of cib(Alert section) after the start */ mainloop_set_trigger(attrd_config_read); } if (was_err) { crm_err("Aborting startup"); crm_exit(DAEMON_RESPAWN_STOP); } the_cib = local_conn; crm_info("Sending full refresh now that we're connected to the cib"); g_hash_table_foreach(attr_hash, local_update_for_hash_entry, NULL); return FALSE; } int main(int argc, char **argv) { int flag = 0; int argerr = 0; crm_cluster_t cluster; gboolean was_err = FALSE; qb_ipcs_connection_t *c = NULL; qb_ipcs_service_t *ipcs = NULL; crm_log_init(T_ATTRD, LOG_NOTICE, TRUE, FALSE, argc, argv, FALSE); mainloop_add_signal(SIGTERM, attrd_shutdown); while ((flag = getopt(argc, argv, OPTARGS)) != EOF) { switch (flag) { case 'V': crm_bump_log_level(argc, argv); break; case 'h': /* Help message */ usage(T_ATTRD, EX_OK); break; default: ++argerr; break; } } if (optind > argc) { ++argerr; } if (argerr) { usage(T_ATTRD, EX_USAGE); } attr_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_hash_entry); crm_info("Starting up"); if (was_err == FALSE) { #if SUPPORT_COROSYNC if (is_openais_cluster()) { cluster.destroy = attrd_cs_destroy; cluster.cpg.cpg_deliver_fn = attrd_cs_dispatch; cluster.cpg.cpg_confchg_fn = pcmk_cpg_membership; } #endif #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { cluster.hb_conn = NULL; cluster.hb_dispatch = attrd_ha_callback; cluster.destroy = attrd_ha_connection_destroy; } #endif if (FALSE == crm_cluster_connect(&cluster)) { crm_err("HA Signon failed"); was_err = TRUE; } attrd_uname = cluster.uname; attrd_uuid = cluster.uuid; attrd_nodeid = cluster.nodeid; #if SUPPORT_HEARTBEAT attrd_cluster_conn = cluster.hb_conn; #endif } crm_info("Cluster connection active"); if (was_err == FALSE) { attrd_init_ipc(&ipcs, attrd_ipc_dispatch); } crm_info("Accepting attribute updates"); attrd_init_mainloop(); if (0 == g_timeout_add_full(G_PRIORITY_LOW + 1, 5000, cib_connect, NULL, NULL)) { crm_info("Adding timer failed"); was_err = TRUE; } if (was_err) { crm_err("Aborting startup"); return 100; } crm_notice("Starting mainloop..."); attrd_run_mainloop(); crm_notice("Exiting..."); #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { attrd_cluster_conn->llc_ops->signoff(attrd_cluster_conn, TRUE); attrd_cluster_conn->llc_ops->delete(attrd_cluster_conn); } #endif c = qb_ipcs_connection_first_get(ipcs); while (c != NULL) { qb_ipcs_connection_t *last = c; c = qb_ipcs_connection_next_get(ipcs, last); /* There really shouldn't be anyone connected at this point */ crm_notice("Disconnecting client %p, pid=%d...", last, crm_ipcs_client_pid(last)); qb_ipcs_disconnect(last); qb_ipcs_connection_unref(last); } qb_ipcs_destroy(ipcs); if (the_lrmd) { the_lrmd->cmds->disconnect(the_lrmd); lrmd_api_delete(the_lrmd); the_lrmd = NULL; } if (the_cib) { the_cib->cmds->signoff(the_cib); cib_delete(the_cib); } g_hash_table_destroy(attr_hash); free(attrd_uuid); return crm_exit(pcmk_ok); } struct attrd_callback_s { char *attr; char *value; }; /*! * \internal * \brief Free an attrd callback structure */ static void free_attrd_callback(void *user_data) { struct attrd_callback_s *data = user_data; free(data->attr); free(data->value); free(data); } static void attrd_cib_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { attr_hash_entry_t *hash_entry = NULL; struct attrd_callback_s *data = user_data; if (data->value == NULL && rc == -ENXIO) { rc = pcmk_ok; } else if (call_id < 0) { crm_warn("Update %s=%s failed: %s", data->attr, data->value, pcmk_strerror(call_id)); return; } switch (rc) { case pcmk_ok: crm_debug("Update %d for %s=%s passed", call_id, data->attr, data->value); hash_entry = g_hash_table_lookup(attr_hash, data->attr); if (hash_entry) { free(hash_entry->stored_value); hash_entry->stored_value = NULL; if (data->value != NULL) { hash_entry->stored_value = strdup(data->value); } } break; case -pcmk_err_diff_failed: /* When an attr changes while the CIB is syncing */ case -ETIME: /* When an attr changes while there is a DC election */ case -ENXIO: /* When an attr changes while the CIB is syncing a * newer config from a node that just came up */ crm_warn("Update %d for %s=%s failed: %s", call_id, data->attr, data->value, pcmk_strerror(rc)); break; default: crm_err("Update %d for %s=%s failed: %s", call_id, data->attr, data->value, pcmk_strerror(rc)); } } void attrd_perform_update(attr_hash_entry_t * hash_entry) { int rc = pcmk_ok; struct attrd_callback_s *data = NULL; const char *user_name = NULL; if (hash_entry == NULL) { return; } else if (the_cib == NULL) { crm_info("Delaying operation %s=%s: cib not connected", hash_entry->id, crm_str(hash_entry->value)); return; } #if ENABLE_ACL if (hash_entry->user) { user_name = hash_entry->user; crm_trace("Performing request from user '%s'", hash_entry->user); } #endif if (hash_entry->value == NULL) { /* delete the attr */ rc = delete_attr_delegate(the_cib, cib_none, hash_entry->section, attrd_uuid, NULL, hash_entry->set, hash_entry->uuid, hash_entry->id, NULL, FALSE, user_name); if (rc >= 0 && hash_entry->stored_value) { crm_notice("Sent delete %d: node=%s, attr=%s, id=%s, set=%s, section=%s", rc, attrd_uuid, hash_entry->id, hash_entry->uuid ? hash_entry->uuid : "", hash_entry->set, hash_entry->section); } else if (rc < 0 && rc != -ENXIO) { crm_notice ("Delete operation failed: node=%s, attr=%s, id=%s, set=%s, section=%s: %s (%d)", attrd_uuid, hash_entry->id, hash_entry->uuid ? hash_entry->uuid : "", hash_entry->set, hash_entry->section, pcmk_strerror(rc), rc); } else { crm_trace("Sent delete %d: node=%s, attr=%s, id=%s, set=%s, section=%s", rc, attrd_uuid, hash_entry->id, hash_entry->uuid ? hash_entry->uuid : "", hash_entry->set, hash_entry->section); } - attrd_send_alerts(the_lrmd, attrd_uname, attrd_nodeid, hash_entry->id, hash_entry->value, crm_alert_list); + attrd_send_alerts(the_lrmd, attrd_uname, attrd_nodeid, hash_entry->id, + hash_entry->value); } else { /* send update */ rc = update_attr_delegate(the_cib, cib_none, hash_entry->section, attrd_uuid, NULL, hash_entry->set, hash_entry->uuid, hash_entry->id, hash_entry->value, FALSE, user_name, NULL); if (rc < 0) { crm_notice("Sent update %s=%s failed: %s", hash_entry->id, hash_entry->value, pcmk_strerror(rc)); } if (safe_str_neq(hash_entry->value, hash_entry->stored_value) || rc < 0) { crm_notice("Sent update %d: %s=%s", rc, hash_entry->id, hash_entry->value); } else { crm_trace("Sent update %d: %s=%s", rc, hash_entry->id, hash_entry->value); } - attrd_send_alerts(the_lrmd, attrd_uname, attrd_nodeid, hash_entry->id, hash_entry->value, crm_alert_list); + attrd_send_alerts(the_lrmd, attrd_uname, attrd_nodeid, hash_entry->id, + hash_entry->value); } data = calloc(1, sizeof(struct attrd_callback_s)); data->attr = strdup(hash_entry->id); if (hash_entry->value != NULL) { data->value = strdup(hash_entry->value); } register_cib_callback(rc, data, attrd_cib_callback, free_attrd_callback); return; } /*! * \internal * \brief Expand attribute values that use "++" or "+=" * * \param[in] value Attribute value to expand * \param[in] old_value Previous value of attribute * * \return Newly allocated string with expanded value, or NULL if not expanded */ static char * expand_attr_value(const char *value, const char *old_value) { char *expanded = NULL; if (attrd_value_needs_expansion(value)) { expanded = crm_itoa(attrd_expand_value(value, old_value)); } return expanded; } /*! * \internal * \brief Update a single node attribute for this node * * \param[in] msg XML message with update * \param[in,out] hash_entry Node attribute structure */ static void update_local_attr(xmlNode *msg, attr_hash_entry_t *hash_entry) { const char *value = crm_element_value(msg, F_ATTRD_VALUE); char *expanded = NULL; if (hash_entry->uuid == NULL) { const char *key = crm_element_value(msg, F_ATTRD_KEY); if (key) { hash_entry->uuid = strdup(key); } } crm_debug("Request to update %s (%s) to %s from %s (stored: %s)", hash_entry->id, (hash_entry->uuid? hash_entry->uuid : "no uuid"), value, hash_entry->value, hash_entry->stored_value); if (safe_str_eq(value, hash_entry->value) && safe_str_eq(value, hash_entry->stored_value)) { crm_trace("Ignoring non-change"); return; } else if (value) { expanded = expand_attr_value(value, hash_entry->value); if (expanded) { crm_info("Expanded %s=%s to %s", hash_entry->id, value, expanded); value = expanded; } } if (safe_str_eq(value, hash_entry->value) && hash_entry->timer_id) { /* We're already waiting to set this value */ free(expanded); return; } free(hash_entry->value); hash_entry->value = NULL; if (value != NULL) { hash_entry->value = (expanded? expanded : strdup(value)); crm_debug("New value of %s is %s", hash_entry->id, value); } stop_attrd_timer(hash_entry); if (hash_entry->timeout > 0) { hash_entry->timer_id = g_timeout_add(hash_entry->timeout, attrd_timer_callback, hash_entry); } else { attrd_trigger_update(hash_entry); } } /*! * \internal * \brief Log the result of a CIB operation for a remote attribute * * \param[in] msg ignored * \param[in] id CIB operation ID * \param[in] rc CIB operation result * \param[in] output ignored * \param[in] data User-friendly string describing operation */ static void remote_attr_callback(xmlNode *msg, int id, int rc, xmlNode *output, void *data) { if (rc == pcmk_ok) { crm_debug("%s succeeded " CRM_XS " call=%d", (char *) data, id); } else { crm_notice("%s failed: %s " CRM_XS " call=%d rc=%d", (char *) data, pcmk_strerror(rc), id, rc); } } /*! * \internal * \brief Update a Pacemaker Remote node attribute via CIB only * * \param[in] host Pacemaker Remote node name * \param[in] name Attribute name * \param[in] value New attribute value * \param[in] section CIB section to update (defaults to status if NULL) * \param[in] user_name User to perform operation as * * \note Legacy attrd does not track remote node attributes, so such requests * are only sent to the CIB. This means that dampening is ignored, and * updates for the same attribute submitted to different nodes cannot be * reliably ordered. This is not ideal, but allows remote nodes to * be supported, and should be acceptable in practice. */ static void update_remote_attr(const char *host, const char *name, const char *value, const char *section, const char *user_name) { int rc = pcmk_ok; char *desc; if (value == NULL) { desc = crm_strdup_printf("Delete of %s in %s for %s", name, section, host); } else { desc = crm_strdup_printf("Update of %s=%s in %s for %s", name, value, section, host); } if (name == NULL) { rc = -EINVAL; } else if (the_cib == NULL) { rc = -ENOTCONN; } if (rc != pcmk_ok) { remote_attr_callback(NULL, rc, rc, NULL, desc); free(desc); return; } if (value == NULL) { rc = delete_attr_delegate(the_cib, cib_none, section, host, NULL, NULL, NULL, name, NULL, FALSE, user_name); } else { rc = update_attr_delegate(the_cib, cib_none, section, host, NULL, NULL, NULL, name, value, FALSE, user_name, "remote"); } - attrd_send_alerts(the_lrmd, host, 0, name, value == NULL? "null":value, crm_alert_list); + attrd_send_alerts(the_lrmd, host, 0, name, (value? value : "null")); crm_trace("%s submitted as CIB call %d", desc, rc); register_cib_callback(rc, desc, remote_attr_callback, free); } /*! * \internal * \brief Handle a client request to clear failures * * \param[in] msg XML of request * * \note Handling is according to the host specified in the request: * NULL: Relay to all cluster nodes (which do local_clear_failure()) * and also handle all remote nodes here, using remote_clear_failure(); * Our uname: Handle here, using local_clear_failure(); * Known peer: Relay to that peer, which (via process_xml_message() then * attrd_local_callback()) comes back here as previous case; * Unknown peer: Handle here as remote node, using remote_clear_failure() */ static void attrd_client_clear_failure(xmlNode *msg) { const char *host = crm_element_value(msg, F_ATTRD_HOST); if (host == NULL) { /* Clear failure on all cluster nodes */ crm_notice("Broadcasting request to clear failure on all hosts"); send_cluster_message(NULL, crm_msg_attrd, msg, FALSE); /* Clear failure on all remote nodes */ remote_clear_failure(msg); } else if (safe_str_eq(host, attrd_uname)) { local_clear_failure(msg); } else { int is_remote = FALSE; crm_node_t *peer = crm_find_peer(0, host); crm_element_value_int(msg, F_ATTRD_IS_REMOTE, &is_remote); if (is_remote || (peer == NULL)) { /* If request is not for a known cluster node, assume remote */ remote_clear_failure(msg); } else { /* Relay request to proper node */ crm_notice("Relaying request to clear failure to %s", host); send_cluster_message(peer, crm_msg_attrd, msg, FALSE); } } } void attrd_local_callback(xmlNode * msg) { attr_hash_entry_t *hash_entry = NULL; const char *from = crm_element_value(msg, F_ORIG); const char *op = crm_element_value(msg, F_ATTRD_TASK); const char *attr = crm_element_value(msg, F_ATTRD_ATTRIBUTE); const char *pattern = crm_element_value(msg, F_ATTRD_REGEX); const char *value = crm_element_value(msg, F_ATTRD_VALUE); const char *host = crm_element_value(msg, F_ATTRD_HOST); int is_remote = FALSE; crm_element_value_int(msg, F_ATTRD_IS_REMOTE, &is_remote); if (safe_str_eq(op, ATTRD_OP_REFRESH)) { crm_notice("Sending full refresh (origin=%s)", from); g_hash_table_foreach(attr_hash, update_for_hash_entry, NULL); return; } else if (safe_str_eq(op, ATTRD_OP_PEER_REMOVE)) { if (host) { crm_notice("Broadcasting removal of peer %s", host); send_cluster_message(NULL, crm_msg_attrd, msg, FALSE); } return; } else if (safe_str_eq(op, ATTRD_OP_CLEAR_FAILURE)) { attrd_client_clear_failure(msg); return; } else if (op && safe_str_neq(op, ATTRD_OP_UPDATE)) { crm_notice("Ignoring unsupported %s request from %s", op, from); return; } /* Handle requests for Pacemaker Remote nodes specially */ if (host && is_remote) { const char *section = crm_element_value(msg, F_ATTRD_SECTION); const char *user_name = crm_element_value(msg, F_ATTRD_USER); if (section == NULL) { section = XML_CIB_TAG_STATUS; } if ((attr == NULL) && (pattern != NULL)) { /* Attribute(s) specified by regular expression */ /* @TODO query, iterate and update_remote_attr() for matches? */ crm_notice("Update of %s for %s failed: regular expressions " "are not supported with Pacemaker Remote nodes", pattern, host); } else { /* Single attribute specified by exact name */ update_remote_attr(host, attr, value, section, user_name); } return; } /* Redirect requests for another cluster node to that node */ if (host != NULL && safe_str_neq(host, attrd_uname)) { send_cluster_message(crm_get_peer(0, host), crm_msg_attrd, msg, FALSE); return; } if (attr != NULL) { /* Single attribute specified by exact name */ crm_debug("%s message from %s: %s=%s", op, from, attr, crm_str(value)); hash_entry = find_hash_entry(msg); if (hash_entry != NULL) { update_local_attr(msg, hash_entry); } } else if (pattern != NULL) { /* Attribute(s) specified by regular expression */ regex_t regex; GHashTableIter iter; if (regcomp(®ex, pattern, REG_EXTENDED|REG_NOSUB)) { crm_err("Update from %s failed: invalid pattern %s", from, pattern); return; } crm_debug("%s message from %s: %s=%s", op, from, pattern, crm_str(value)); g_hash_table_iter_init(&iter, attr_hash); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &hash_entry)) { int rc = regexec(®ex, hash_entry->id, 0, NULL, 0); if (rc == 0) { crm_trace("Attribute %s matches %s", hash_entry->id, pattern); update_local_attr(msg, hash_entry); } } regfree(®ex); } else { crm_info("Ignoring message with no attribute name or expression"); } } gboolean attrd_timer_callback(void *user_data) { stop_attrd_timer(user_data); attrd_trigger_update(user_data); return TRUE; /* Always return true, removed cleanly by stop_attrd_timer() */ } gboolean attrd_trigger_update(attr_hash_entry_t * hash_entry) { xmlNode *msg = NULL; /* send HA message to everyone */ crm_notice("Sending flush op to all hosts for: %s (%s)", hash_entry->id, crm_str(hash_entry->value)); log_hash_entry(LOG_DEBUG_2, hash_entry, "Sending flush op to all hosts for:"); msg = create_xml_node(NULL, __FUNCTION__); crm_xml_add(msg, F_TYPE, T_ATTRD); crm_xml_add(msg, F_ORIG, attrd_uname); crm_xml_add(msg, F_ATTRD_TASK, "flush"); crm_xml_add(msg, F_ATTRD_ATTRIBUTE, hash_entry->id); crm_xml_add(msg, F_ATTRD_SET, hash_entry->set); crm_xml_add(msg, F_ATTRD_SECTION, hash_entry->section); crm_xml_add(msg, F_ATTRD_DAMPEN, hash_entry->dampen); crm_xml_add(msg, F_ATTRD_VALUE, hash_entry->value); #if ENABLE_ACL if (hash_entry->user) { crm_xml_add(msg, F_ATTRD_USER, hash_entry->user); } #endif if (hash_entry->timeout <= 0) { crm_xml_add(msg, F_ATTRD_IGNORE_LOCALLY, hash_entry->value); attrd_perform_update(hash_entry); } send_cluster_message(NULL, crm_msg_attrd, msg, FALSE); free_xml(msg); return TRUE; } diff --git a/crmd/control.c b/crmd/control.c index f8d78d4f5b..a3e11d95fc 100644 --- a/crmd/control.c +++ b/crmd/control.c @@ -1,1171 +1,1171 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include -#include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include qb_ipcs_service_t *ipcs = NULL; extern gboolean crm_connect_corosync(crm_cluster_t * cluster); extern void crmd_ha_connection_destroy(gpointer user_data); void crm_shutdown(int nsig); gboolean crm_read_options(gpointer user_data); gboolean fsa_has_quorum = FALSE; crm_trigger_t *fsa_source = NULL; crm_trigger_t *config_read = NULL; bool no_quorum_suicide_escalation = FALSE; static gboolean election_timeout_popped(gpointer data) { /* Not everyone voted */ crm_info("Election failed: Declaring ourselves the winner"); register_fsa_input(C_TIMER_POPPED, I_ELECTION_DC, NULL); return FALSE; } /* A_HA_CONNECT */ void do_ha_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { gboolean registered = FALSE; static crm_cluster_t *cluster = NULL; if (cluster == NULL) { cluster = calloc(1, sizeof(crm_cluster_t)); } if (action & A_HA_DISCONNECT) { crm_cluster_disconnect(cluster); crm_info("Disconnected from the cluster"); set_bit(fsa_input_register, R_HA_DISCONNECTED); } if (action & A_HA_CONNECT) { crm_set_status_callback(&peer_update_callback); crm_set_autoreap(FALSE); if (is_openais_cluster()) { #if SUPPORT_COROSYNC registered = crm_connect_corosync(cluster); #endif } else if (is_heartbeat_cluster()) { #if SUPPORT_HEARTBEAT cluster->destroy = crmd_ha_connection_destroy; cluster->hb_dispatch = crmd_ha_msg_callback; registered = crm_cluster_connect(cluster); fsa_cluster_conn = cluster->hb_conn; crm_trace("Be informed of Node Status changes"); if (registered && fsa_cluster_conn->llc_ops->set_nstatus_callback(fsa_cluster_conn, crmd_ha_status_callback, fsa_cluster_conn) != HA_OK) { crm_err("Cannot set nstatus callback: %s", fsa_cluster_conn->llc_ops->errmsg(fsa_cluster_conn)); registered = FALSE; } crm_trace("Be informed of CRM Client Status changes"); if (registered && fsa_cluster_conn->llc_ops->set_cstatus_callback(fsa_cluster_conn, crmd_client_status_callback, fsa_cluster_conn) != HA_OK) { crm_err("Cannot set cstatus callback: %s", fsa_cluster_conn->llc_ops->errmsg(fsa_cluster_conn)); registered = FALSE; } if (registered) { crm_trace("Requesting an initial dump of CRMD client_status"); fsa_cluster_conn->llc_ops->client_status(fsa_cluster_conn, NULL, CRM_SYSTEM_CRMD, -1); } #endif } fsa_election = election_init(NULL, cluster->uname, 60000/*60s*/, election_timeout_popped); fsa_our_uname = cluster->uname; fsa_our_uuid = cluster->uuid; if(cluster->uuid == NULL) { crm_err("Could not obtain local uuid"); registered = FALSE; } if (registered == FALSE) { set_bit(fsa_input_register, R_HA_DISCONNECTED); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } populate_cib_nodes(node_update_none, __FUNCTION__); clear_bit(fsa_input_register, R_HA_DISCONNECTED); crm_info("Connected to the cluster"); } if (action & ~(A_HA_CONNECT | A_HA_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } } static bool need_spawn_pengine_from_crmd(void) { static int result = -1; if (result != -1) return result; if (!is_heartbeat_cluster()) { result = 0; return result; } /* NULL, or "strange" value: rather spawn from here. */ result = TRUE; crm_str_to_boolean(daemon_option("crmd_spawns_pengine"), &result); return result; } /* A_SHUTDOWN */ void do_shutdown(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { /* just in case */ set_bit(fsa_input_register, R_SHUTDOWN); if (need_spawn_pengine_from_crmd()) { if (is_set(fsa_input_register, pe_subsystem->flag_connected)) { crm_info("Terminating the %s", pe_subsystem->name); if (stop_subsystem(pe_subsystem, TRUE) == FALSE) { /* It's gone ... */ crm_err("Faking %s exit", pe_subsystem->name); clear_bit(fsa_input_register, pe_subsystem->flag_connected); } else { crm_info("Waiting for subsystems to exit"); crmd_fsa_stall(FALSE); } } crm_info("All subsystems stopped, continuing"); } if (stonith_api) { /* Prevent it from coming up again */ clear_bit(fsa_input_register, R_ST_REQUIRED); crm_info("Disconnecting STONITH..."); stonith_api->cmds->disconnect(stonith_api); } } /* A_SHUTDOWN_REQ */ void do_shutdown_req(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { xmlNode *msg = NULL; set_bit(fsa_input_register, R_SHUTDOWN); crm_info("Sending shutdown request to all peers (DC is %s)", (fsa_our_dc? fsa_our_dc : "not set")); msg = create_request(CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); /* set_bit(fsa_input_register, R_STAYDOWN); */ if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } free_xml(msg); } extern crm_ipc_t *attrd_ipc; extern char *max_generation_from; extern xmlNode *max_generation_xml; extern GHashTable *resource_history; extern GHashTable *voted; extern char *te_client_id; extern regex_t *version_format_regex; void log_connected_client(gpointer key, gpointer value, gpointer user_data); void log_connected_client(gpointer key, gpointer value, gpointer user_data) { crm_client_t *client = value; crm_err("%s is still connected at exit", crm_client_name(client)); } int crmd_fast_exit(int rc) { if (is_set(fsa_input_register, R_STAYDOWN)) { crm_warn("Inhibiting respawn "CRM_XS" remapping exit code %d to %d", rc, DAEMON_RESPAWN_STOP); rc = DAEMON_RESPAWN_STOP; } if (rc == pcmk_ok && is_set(fsa_input_register, R_IN_RECOVERY)) { crm_err("Could not recover from internal error"); rc = pcmk_err_generic; } return crm_exit(rc); } int crmd_exit(int rc) { GListPtr gIter = NULL; GMainLoop *mloop = crmd_mainloop; static bool in_progress = FALSE; if(in_progress && rc == 0) { crm_debug("Exit is already in progress"); return rc; } else if(in_progress) { crm_notice("Error during shutdown process, terminating now with status %d: %s", rc, pcmk_strerror(rc)); crm_write_blackbox(SIGTRAP, NULL); crmd_fast_exit(rc); } in_progress = TRUE; crm_trace("Preparing to exit: %d", rc); /* Suppress secondary errors resulting from us disconnecting everything */ set_bit(fsa_input_register, R_HA_DISCONNECTED); /* Close all IPC servers and clients to ensure any and all shared memory files are cleaned up */ if(ipcs) { crm_trace("Closing IPC server"); mainloop_del_ipc_server(ipcs); ipcs = NULL; } if (attrd_ipc) { crm_trace("Closing attrd connection"); crm_ipc_close(attrd_ipc); crm_ipc_destroy(attrd_ipc); attrd_ipc = NULL; } if (pe_subsystem && pe_subsystem->client && pe_subsystem->client->ipcs) { crm_trace("Disconnecting Policy Engine"); qb_ipcs_disconnect(pe_subsystem->client->ipcs); } if(stonith_api) { crm_trace("Disconnecting fencing API"); clear_bit(fsa_input_register, R_ST_REQUIRED); stonith_api->cmds->free(stonith_api); stonith_api = NULL; } if (rc == pcmk_ok && crmd_mainloop == NULL) { crm_debug("No mainloop detected"); rc = EPROTO; } /* On an error, just get out. * * Otherwise, make the effort to have mainloop exit gracefully so * that it (mostly) cleans up after itself and valgrind has less * to report on - allowing real errors stand out */ if(rc != pcmk_ok) { crm_notice("Forcing immediate exit with status %d: %s", rc, pcmk_strerror(rc)); crm_write_blackbox(SIGTRAP, NULL); return crmd_fast_exit(rc); } /* Clean up as much memory as possible for valgrind */ for (gIter = fsa_message_queue; gIter != NULL; gIter = gIter->next) { fsa_data_t *fsa_data = gIter->data; crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]", fsa_input2string(fsa_data->fsa_input), fsa_state2string(fsa_state), fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); delete_fsa_input(fsa_data); } clear_bit(fsa_input_register, R_MEMBERSHIP); g_list_free(fsa_message_queue); fsa_message_queue = NULL; free(pe_subsystem); pe_subsystem = NULL; free(te_subsystem); te_subsystem = NULL; free(cib_subsystem); cib_subsystem = NULL; if (version_format_regex) { regfree(version_format_regex); free(version_format_regex); } election_fini(fsa_election); fsa_election = NULL; /* Tear down the CIB connection, but don't free it yet -- it could be used * when we drain the mainloop later. */ cib_free_callbacks(fsa_cib_conn); fsa_cib_conn->cmds->signoff(fsa_cib_conn); verify_stopped(fsa_state, LOG_WARNING); clear_bit(fsa_input_register, R_LRM_CONNECTED); lrm_state_destroy_all(); /* This basically will not work, since mainloop has a reference to it */ mainloop_destroy_trigger(fsa_source); fsa_source = NULL; mainloop_destroy_trigger(config_read); config_read = NULL; mainloop_destroy_trigger(stonith_reconnect); stonith_reconnect = NULL; mainloop_destroy_trigger(transition_trigger); transition_trigger = NULL; crm_client_cleanup(); crm_peer_destroy(); crm_timer_stop(transition_timer); crm_timer_stop(integration_timer); crm_timer_stop(finalization_timer); crm_timer_stop(election_trigger); election_timeout_stop(fsa_election); crm_timer_stop(shutdown_escalation_timer); crm_timer_stop(wait_timer); crm_timer_stop(recheck_timer); free(transition_timer); transition_timer = NULL; free(integration_timer); integration_timer = NULL; free(finalization_timer); finalization_timer = NULL; free(election_trigger); election_trigger = NULL; free(shutdown_escalation_timer); shutdown_escalation_timer = NULL; free(wait_timer); wait_timer = NULL; free(recheck_timer); recheck_timer = NULL; free(fsa_our_dc_version); fsa_our_dc_version = NULL; free(fsa_our_uname); fsa_our_uname = NULL; free(fsa_our_uuid); fsa_our_uuid = NULL; free(fsa_our_dc); fsa_our_dc = NULL; free(fsa_cluster_name); fsa_cluster_name = NULL; free(te_uuid); te_uuid = NULL; free(te_client_id); te_client_id = NULL; free(fsa_pe_ref); fsa_pe_ref = NULL; free(failed_stop_offset); failed_stop_offset = NULL; free(failed_start_offset); failed_start_offset = NULL; free(max_generation_from); max_generation_from = NULL; free_xml(max_generation_xml); max_generation_xml = NULL; mainloop_destroy_signal(SIGPIPE); mainloop_destroy_signal(SIGUSR1); mainloop_destroy_signal(SIGTERM); mainloop_destroy_signal(SIGTRAP); /* leave SIGCHLD engaged as we might still want to drain some service-actions */ if (mloop) { GMainContext *ctx = g_main_loop_get_context(crmd_mainloop); /* Don't re-enter this block */ crmd_mainloop = NULL; crmd_drain_alerts(ctx); /* no signals on final draining anymore */ mainloop_destroy_signal(SIGCHLD); crm_trace("Draining mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx)); { int lpc = 0; while((g_main_context_pending(ctx) && lpc < 10)) { lpc++; crm_trace("Iteration %d", lpc); g_main_context_dispatch(ctx); } } crm_trace("Closing mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx)); g_main_loop_quit(mloop); #if SUPPORT_HEARTBEAT /* Do this only after g_main_loop_quit(). * * This interface was broken (incomplete) since it was introduced. * ->delete() does cleanup and free most of it, but it does not * actually remove and destroy the corresponding GSource, so the next * prepare/check iteratioin would find a corrupt (because partially * freed) GSource, and segfault. * * Apparently one was supposed to store the GSource as returned by * G_main_add_ll_cluster(), and g_source_destroy() that "by hand". * * But no-one ever did this, not even in the old hb code when this was * introduced. * * Note that fsa_cluster_conn was set as an "alias" to cluster->hb_conn * in do_ha_control() right after crm_cluster_connect(), and only * happens to still point at that object, because do_ha_control() does * not reset it to NULL after crm_cluster_disconnect() above does * reset cluster->hb_conn to NULL. * Not sure if that's something to cleanup, too. * * I'll try to fix this up in heartbeat proper, so ->delete * will actually remove, and destroy, and unref, and free this thing. * Doing so after g_main_loop_quit() is valid with both old, * and eventually fixed heartbeat. * * If we introduce the "by hand" destroy/remove/unref, * this may break again once heartbeat is fixed :-( * * -- Lars Ellenberg */ if (fsa_cluster_conn) { crm_trace("Deleting heartbeat api object"); fsa_cluster_conn->llc_ops->delete(fsa_cluster_conn); fsa_cluster_conn = NULL; } #endif /* Won't do anything yet, since we're inside it now */ g_main_loop_unref(mloop); crm_trace("Done %d", rc); } else { mainloop_destroy_signal(SIGCHLD); } cib_delete(fsa_cib_conn); fsa_cib_conn = NULL; throttle_fini(); /* Graceful */ return rc; } /* A_EXIT_0, A_EXIT_1 */ void do_exit(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { int exit_code = pcmk_ok; int log_level = LOG_INFO; const char *exit_type = "gracefully"; if (action & A_EXIT_1) { /* exit_code = pcmk_err_generic; */ log_level = LOG_ERR; exit_type = "forcefully"; exit_code = pcmk_err_generic; } verify_stopped(cur_state, LOG_ERR); do_crm_log(log_level, "Performing %s - %s exiting the CRMd", fsa_action2string(action), exit_type); crm_info("[%s] stopped (%d)", crm_system_name, exit_code); crmd_exit(exit_code); } static void sigpipe_ignore(int nsig) { return; } /* A_STARTUP */ void do_startup(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { int was_error = 0; crm_debug("Registering Signal Handlers"); mainloop_add_signal(SIGTERM, crm_shutdown); mainloop_add_signal(SIGPIPE, sigpipe_ignore); fsa_source = mainloop_add_trigger(G_PRIORITY_HIGH, crm_fsa_trigger, NULL); config_read = mainloop_add_trigger(G_PRIORITY_HIGH, crm_read_options, NULL); transition_trigger = mainloop_add_trigger(G_PRIORITY_LOW, te_graph_trigger, NULL); crm_debug("Creating CIB and LRM objects"); fsa_cib_conn = cib_new(); lrm_state_init_local(); /* set up the timers */ transition_timer = calloc(1, sizeof(fsa_timer_t)); integration_timer = calloc(1, sizeof(fsa_timer_t)); finalization_timer = calloc(1, sizeof(fsa_timer_t)); election_trigger = calloc(1, sizeof(fsa_timer_t)); shutdown_escalation_timer = calloc(1, sizeof(fsa_timer_t)); wait_timer = calloc(1, sizeof(fsa_timer_t)); recheck_timer = calloc(1, sizeof(fsa_timer_t)); if (election_trigger != NULL) { election_trigger->source_id = 0; election_trigger->period_ms = -1; election_trigger->fsa_input = I_DC_TIMEOUT; election_trigger->callback = crm_timer_popped; election_trigger->repeat = FALSE; } else { was_error = TRUE; } if (transition_timer != NULL) { transition_timer->source_id = 0; transition_timer->period_ms = -1; transition_timer->fsa_input = I_PE_CALC; transition_timer->callback = crm_timer_popped; transition_timer->repeat = FALSE; } else { was_error = TRUE; } if (integration_timer != NULL) { integration_timer->source_id = 0; integration_timer->period_ms = -1; integration_timer->fsa_input = I_INTEGRATED; integration_timer->callback = crm_timer_popped; integration_timer->repeat = FALSE; } else { was_error = TRUE; } if (finalization_timer != NULL) { finalization_timer->source_id = 0; finalization_timer->period_ms = -1; finalization_timer->fsa_input = I_FINALIZED; finalization_timer->callback = crm_timer_popped; finalization_timer->repeat = FALSE; /* for possible enabling... a bug in the join protocol left * a slave in S_PENDING while we think it's in S_NOT_DC * * raising I_FINALIZED put us into a transition loop which is * never resolved. * in this loop we continually send probes which the node * NACK's because it's in S_PENDING * * if we have nodes where heartbeat is active but the * CRM is not... then this will be handled in the * integration phase */ finalization_timer->fsa_input = I_ELECTION; } else { was_error = TRUE; } if (shutdown_escalation_timer != NULL) { shutdown_escalation_timer->source_id = 0; shutdown_escalation_timer->period_ms = -1; shutdown_escalation_timer->fsa_input = I_STOP; shutdown_escalation_timer->callback = crm_timer_popped; shutdown_escalation_timer->repeat = FALSE; } else { was_error = TRUE; } if (wait_timer != NULL) { wait_timer->source_id = 0; wait_timer->period_ms = 2000; wait_timer->fsa_input = I_NULL; wait_timer->callback = crm_timer_popped; wait_timer->repeat = FALSE; } else { was_error = TRUE; } if (recheck_timer != NULL) { recheck_timer->source_id = 0; recheck_timer->period_ms = -1; recheck_timer->fsa_input = I_PE_CALC; recheck_timer->callback = crm_timer_popped; recheck_timer->repeat = FALSE; } else { was_error = TRUE; } /* set up the sub systems */ cib_subsystem = calloc(1, sizeof(struct crm_subsystem_s)); te_subsystem = calloc(1, sizeof(struct crm_subsystem_s)); pe_subsystem = calloc(1, sizeof(struct crm_subsystem_s)); if (cib_subsystem != NULL) { cib_subsystem->pid = -1; cib_subsystem->name = CRM_SYSTEM_CIB; cib_subsystem->flag_connected = R_CIB_CONNECTED; cib_subsystem->flag_required = R_CIB_REQUIRED; } else { was_error = TRUE; } if (te_subsystem != NULL) { te_subsystem->pid = -1; te_subsystem->name = CRM_SYSTEM_TENGINE; te_subsystem->flag_connected = R_TE_CONNECTED; te_subsystem->flag_required = R_TE_REQUIRED; } else { was_error = TRUE; } if (pe_subsystem != NULL) { pe_subsystem->pid = -1; pe_subsystem->path = CRM_DAEMON_DIR; pe_subsystem->name = CRM_SYSTEM_PENGINE; pe_subsystem->command = CRM_DAEMON_DIR "/" CRM_SYSTEM_PENGINE; pe_subsystem->args = NULL; pe_subsystem->flag_connected = R_PE_CONNECTED; pe_subsystem->flag_required = R_PE_REQUIRED; } else { was_error = TRUE; } if (was_error == FALSE && need_spawn_pengine_from_crmd()) { if (start_subsystem(pe_subsystem) == FALSE) { was_error = TRUE; } } if (was_error) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } static int32_t crmd_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { crm_trace("Connection %p", c); if (crm_client_new(c, uid, gid) == NULL) { return -EIO; } return 0; } static void crmd_ipc_created(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); } static int32_t crmd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; crm_client_t *client = crm_client_get(c); xmlNode *msg = crm_ipcs_recv(client, data, size, &id, &flags); crm_trace("Invoked: %s", crm_client_name(client)); crm_ipcs_send_ack(client, id, flags, "ack", __FUNCTION__, __LINE__); if (msg == NULL) { return 0; } #if ENABLE_ACL CRM_ASSERT(client->user != NULL); crm_acl_get_set_user(msg, F_CRM_USER, client->user); #endif crm_trace("Processing msg from %s", crm_client_name(client)); crm_log_xml_trace(msg, "CRMd[inbound]"); crm_xml_add(msg, F_CRM_SYS_FROM, client->id); if (crmd_authorize_message(msg, client, NULL)) { route_message(C_IPC_MESSAGE, msg); } trigger_fsa(fsa_source); free_xml(msg); return 0; } static int32_t crmd_ipc_closed(qb_ipcs_connection_t * c) { crm_client_t *client = crm_client_get(c); struct crm_subsystem_s *the_subsystem = NULL; if (client == NULL) { return 0; } crm_trace("Connection %p", c); if (client->userdata == NULL) { crm_trace("Client hadn't registered with us yet"); } else if (strcasecmp(CRM_SYSTEM_PENGINE, client->userdata) == 0) { the_subsystem = pe_subsystem; } else if (strcasecmp(CRM_SYSTEM_TENGINE, client->userdata) == 0) { the_subsystem = te_subsystem; } else if (strcasecmp(CRM_SYSTEM_CIB, client->userdata) == 0) { the_subsystem = cib_subsystem; } if (the_subsystem != NULL) { the_subsystem->source = NULL; the_subsystem->client = NULL; crm_info("Received HUP from %s:[%d]", the_subsystem->name, the_subsystem->pid); } else { /* else that was a transient client */ crm_trace("Received HUP from transient client"); } crm_trace("Disconnecting client %s (%p)", crm_client_name(client), client); free(client->userdata); crm_client_destroy(client); trigger_fsa(fsa_source); return 0; } static void crmd_ipc_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); crmd_ipc_closed(c); } /* A_STOP */ void do_stop(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { crm_trace("Closing IPC server"); mainloop_del_ipc_server(ipcs); ipcs = NULL; register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); } /* A_STARTED */ void do_started(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { static struct qb_ipcs_service_handlers crmd_callbacks = { .connection_accept = crmd_ipc_accept, .connection_created = crmd_ipc_created, .msg_process = crmd_ipc_dispatch, .connection_closed = crmd_ipc_closed, .connection_destroyed = crmd_ipc_destroy }; if (cur_state != S_STARTING) { crm_err("Start cancelled... %s", fsa_state2string(cur_state)); return; } else if (is_set(fsa_input_register, R_MEMBERSHIP) == FALSE) { crm_info("Delaying start, no membership data (%.16llx)", R_MEMBERSHIP); crmd_fsa_stall(TRUE); return; } else if (is_set(fsa_input_register, R_LRM_CONNECTED) == FALSE) { crm_info("Delaying start, LRM not connected (%.16llx)", R_LRM_CONNECTED); crmd_fsa_stall(TRUE); return; } else if (is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) { crm_info("Delaying start, CIB not connected (%.16llx)", R_CIB_CONNECTED); crmd_fsa_stall(TRUE); return; } else if (is_set(fsa_input_register, R_READ_CONFIG) == FALSE) { crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG); crmd_fsa_stall(TRUE); return; } else if (is_set(fsa_input_register, R_PEER_DATA) == FALSE) { /* try reading from HA */ crm_info("Delaying start, No peer data (%.16llx)", R_PEER_DATA); #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { HA_Message *msg = NULL; crm_trace("Looking for a HA message"); msg = fsa_cluster_conn->llc_ops->readmsg(fsa_cluster_conn, 0); if (msg != NULL) { crm_trace("There was a HA message"); ha_msg_del(msg); } } #endif crmd_fsa_stall(TRUE); return; } crm_debug("Init server comms"); ipcs = crmd_ipc_server_init(&crmd_callbacks); if (ipcs == NULL) { crm_err("Failed to create IPC server: shutting down and inhibiting respawn"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } if (stonith_reconnect == NULL) { int dummy; stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW, te_connect_stonith, &dummy); } set_bit(fsa_input_register, R_ST_REQUIRED); mainloop_set_trigger(stonith_reconnect); crm_notice("The local CRM is operational"); clear_bit(fsa_input_register, R_STARTING); register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL); } /* A_RECOVER */ void do_recover(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { set_bit(fsa_input_register, R_IN_RECOVERY); crm_warn("Fast-tracking shutdown in response to errors"); register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); } /* *INDENT-OFF* */ pe_cluster_option crmd_opts[] = { /* name, old-name, validate, values, default, short description, long description */ { "dc-version", NULL, "string", NULL, "none", NULL, "Version of Pacemaker on the cluster's DC.", "Includes the hash which identifies the exact changeset it was built from. Used for diagnostic purposes." }, { "cluster-infrastructure", NULL, "string", NULL, "heartbeat", NULL, "The messaging stack on which Pacemaker is currently running.", "Used for informational and diagnostic purposes." }, { XML_CONFIG_ATTR_DC_DEADTIME, "dc_deadtime", "time", NULL, "20s", &check_time, "How long to wait for a response from other nodes during startup.", "The \"correct\" value will depend on the speed/load of your network and the type of switches used." }, { XML_CONFIG_ATTR_RECHECK, "cluster_recheck_interval", "time", "Zero disables polling. Positive values are an interval in seconds (unless other SI units are specified. eg. 5min)", "15min", &check_timer, "Polling interval for time based changes to options, resource parameters and constraints.", "The Cluster is primarily event driven, however the configuration can have elements that change based on time." " To ensure these changes take effect, we can optionally poll the cluster's status for changes." }, #ifdef RHEL7_COMPAT /* These options were superseded by the alerts feature and now are just an * alternate interface to it. It was never released upstream, but was * released in RHEL 7, so we allow it to be enabled at compile-time by * defining RHEL7_COMPAT. */ { "notification-agent", NULL, "string", NULL, "/dev/null", &check_script, "Deprecated", "Use alert path in alerts section instead" }, { "notification-recipient", NULL, "string", NULL, "", NULL, "Deprecated", "Use recipient value in alerts section instead" }, #endif { "load-threshold", NULL, "percentage", NULL, "80%", &check_utilization, "The maximum amount of system resources that should be used by nodes in the cluster", "The cluster will slow down its recovery process when the amount of system resources used" " (currently CPU) approaches this limit", }, { "node-action-limit", NULL, "integer", NULL, "0", &check_number, "The maximum number of jobs that can be scheduled per node. Defaults to 2x cores"}, { XML_CONFIG_ATTR_ELECTION_FAIL, "election_timeout", "time", NULL, "2min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." }, { XML_CONFIG_ATTR_FORCE_QUIT, "shutdown_escalation", "time", NULL, "20min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." }, { "crmd-integration-timeout", NULL, "time", NULL, "3min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." }, { "crmd-finalization-timeout", NULL, "time", NULL, "30min", &check_timer, "*** Advanced Use Only ***.", "If you need to adjust this value, it probably indicates the presence of a bug." }, { "crmd-transition-delay", NULL, "time", NULL, "0s", &check_timer, "*** Advanced Use Only ***\n" "Enabling this option will slow down cluster recovery under all conditions", "Delay cluster recovery for the configured interval to allow for additional/related events to occur.\n" "Useful if your configuration is sensitive to the order in which ping updates arrive." }, { "stonith-watchdog-timeout", NULL, "time", NULL, NULL, &check_sbd_timeout, "How long to wait before we can assume nodes are safely down", NULL }, { "stonith-max-attempts",NULL,"integer",NULL,"10",&check_positive_number, "How many times stonith can fail before it will no longer be attempted on a target" }, { "no-quorum-policy", "no_quorum_policy", "enum", "stop, freeze, ignore, suicide", "stop", &check_quorum, NULL, NULL }, #if SUPPORT_PLUGIN { XML_ATTR_EXPECTED_VOTES, NULL, "integer", NULL, "2", &check_number, "The number of nodes expected to be in the cluster", "Used to calculate quorum in openais based clusters." }, #endif }; /* *INDENT-ON* */ void crmd_metadata(void) { config_metadata("CRM Daemon", "1.0", "CRM Daemon Options", "This is a fake resource that details the options that can be configured for the CRM Daemon.", crmd_opts, DIMOF(crmd_opts)); } static void verify_crmd_options(GHashTable * options) { verify_all_options(options, crmd_opts, DIMOF(crmd_opts)); } static const char * crmd_pref(GHashTable * options, const char *name) { return get_cluster_pref(options, crmd_opts, DIMOF(crmd_opts), name); } static void config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { const char *value = NULL; GHashTable *config_hash = NULL; crm_time_t *now = crm_time_new(NULL); xmlNode *crmconfig = NULL; xmlNode *alerts = NULL; if (rc != pcmk_ok) { fsa_data_t *msg_data = NULL; crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); if (rc == -EACCES || rc == -pcmk_err_schema_validation) { crm_err("The cluster is mis-configured - shutting down and staying down"); set_bit(fsa_input_register, R_STAYDOWN); } goto bail; } crmconfig = output; if ((crmconfig) && (crm_element_name(crmconfig)) && (strcmp(crm_element_name(crmconfig), XML_CIB_TAG_CRMCONFIG) != 0)) { crmconfig = first_named_child(crmconfig, XML_CIB_TAG_CRMCONFIG); } if (!crmconfig) { fsa_data_t *msg_data = NULL; crm_err("Local CIB query for " XML_CIB_TAG_CRMCONFIG " section failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); goto bail; } crm_debug("Call %d : Parsing CIB options", call_id); config_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); unpack_instance_attributes(crmconfig, crmconfig, XML_CIB_TAG_PROPSET, NULL, config_hash, CIB_OPTIONS_FIRST, FALSE, now); verify_crmd_options(config_hash); #ifdef RHEL7_COMPAT { const char *script = crmd_pref(config_hash, "notification-agent"); const char *recip = crmd_pref(config_hash, "notification-recipient"); pe_enable_legacy_alerts(script, recip); } #endif value = crmd_pref(config_hash, XML_CONFIG_ATTR_DC_DEADTIME); election_trigger->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, "node-action-limit"); /* Also checks migration-limit */ throttle_update_job_max(value); value = crmd_pref(config_hash, "load-threshold"); if(value) { throttle_set_load_target(strtof(value, NULL) / 100.0); } value = crmd_pref(config_hash, "no-quorum-policy"); if (safe_str_eq(value, "suicide") && pcmk_locate_sbd()) { no_quorum_suicide_escalation = TRUE; } value = crmd_pref(config_hash,"stonith-max-attempts"); update_stonith_max_attempts(value); value = crmd_pref(config_hash, XML_CONFIG_ATTR_FORCE_QUIT); shutdown_escalation_timer->period_ms = crm_get_msec(value); /* How long to declare an election over - even if not everyone voted */ crm_debug("Shutdown escalation occurs after: %dms", shutdown_escalation_timer->period_ms); value = crmd_pref(config_hash, XML_CONFIG_ATTR_ELECTION_FAIL); election_timeout_set_period(fsa_election, crm_get_msec(value)); value = crmd_pref(config_hash, XML_CONFIG_ATTR_RECHECK); recheck_timer->period_ms = crm_get_msec(value); crm_debug("Checking for expired actions every %dms", recheck_timer->period_ms); value = crmd_pref(config_hash, "crmd-transition-delay"); transition_timer->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, "crmd-integration-timeout"); integration_timer->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, "crmd-finalization-timeout"); finalization_timer->period_ms = crm_get_msec(value); #if SUPPORT_COROSYNC if (is_classic_ais_cluster()) { value = crmd_pref(config_hash, XML_ATTR_EXPECTED_VOTES); crm_debug("Sending expected-votes=%s to corosync", value); send_cluster_text(crm_class_quorum, value, TRUE, NULL, crm_msg_ais); } #endif free(fsa_cluster_name); fsa_cluster_name = NULL; value = g_hash_table_lookup(config_hash, "cluster-name"); if (value) { fsa_cluster_name = strdup(value); } alerts = first_named_child(output, XML_CIB_TAG_ALERTS); - pe_unpack_alerts(alerts); + crmd_unpack_alerts(alerts); set_bit(fsa_input_register, R_READ_CONFIG); crm_trace("Triggering FSA: %s", __FUNCTION__); mainloop_set_trigger(fsa_source); g_hash_table_destroy(config_hash); bail: crm_time_free(now); } gboolean crm_read_options(gpointer user_data) { int call_id = fsa_cib_conn->cmds->query(fsa_cib_conn, "//" XML_CIB_TAG_CRMCONFIG " | //" XML_CIB_TAG_ALERTS, NULL, cib_xpath | cib_scope_local); fsa_register_cib_callback(call_id, FALSE, NULL, config_query_callback); crm_trace("Querying the CIB... call %d", call_id); return TRUE; } /* A_READCONFIG */ void do_read_config(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { throttle_init(); mainloop_set_trigger(config_read); } void crm_shutdown(int nsig) { if (crmd_mainloop != NULL && g_main_is_running(crmd_mainloop)) { if (is_set(fsa_input_register, R_SHUTDOWN)) { crm_err("Escalating the shutdown"); register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL); } else { set_bit(fsa_input_register, R_SHUTDOWN); register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL); if (shutdown_escalation_timer->period_ms < 1) { const char *value = crmd_pref(NULL, XML_CONFIG_ATTR_FORCE_QUIT); int msec = crm_get_msec(value); crm_debug("Using default shutdown escalation: %dms", msec); shutdown_escalation_timer->period_ms = msec; } /* can't rely on this... */ crm_notice("Shutting down cluster resource manager " CRM_XS " limit=%dms", shutdown_escalation_timer->period_ms); crm_timer_start(shutdown_escalation_timer); } } else { crm_info("exit from shutdown"); crmd_exit(pcmk_ok); } } diff --git a/crmd/crmd_alerts.c b/crmd/crmd_alerts.c index 306d0ea4ab..b185179398 100644 --- a/crmd/crmd_alerts.c +++ b/crmd/crmd_alerts.c @@ -1,219 +1,226 @@ /* * Copyright (C) 2015 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include "crmd_alerts.h" #include "crmd_messages.h" #include #include #include +static GListPtr crmd_alert_list = NULL; static int alerts_inflight = 0; static gboolean draining_alerts = FALSE; +void crmd_unpack_alerts(xmlNode *alerts) +{ + pe_free_alert_list(crmd_alert_list); + crmd_alert_list = pe_unpack_alerts(alerts); +} + static void crmd_alert_complete(svc_action_t *op) { alerts_inflight--; if(op->rc == 0) { crm_info("Alert %d (%s) complete", op->sequence, op->agent); } else { crm_warn("Alert %d (%s) failed: %d", op->sequence, op->agent, op->rc); } } static void send_alerts(const char *kind) { svc_action_t *alert = NULL; static int operations = 0; GListPtr l; crm_time_hr_t *now = crm_time_hr_new(NULL); crm_set_alert_key(CRM_alert_kind, kind); crm_set_alert_key(CRM_alert_version, VERSION); - for (l = g_list_first(crm_alert_list); l; l = g_list_next(l)) { + for (l = g_list_first(crmd_alert_list); l; l = g_list_next(l)) { crm_alert_entry_t *entry = (crm_alert_entry_t *)(l->data); char *timestamp = crm_time_format_hr(entry->tstamp_format, now); if (crm_is_target_alert(entry->select_kind == NULL ? crm_alert_kind_default : entry->select_kind, kind) == FALSE) { crm_trace("Cannot sending '%s' alert to '%s' via '%s'(select_kind=%s)", kind, entry->recipient, entry->path, entry->select_kind == NULL ? CRM_ALERT_KIND_DEFAULT : entry->select_kind_orig); free(timestamp); continue; } operations++; if (!draining_alerts) { crm_debug("Sending '%s' alert to '%s' via '%s'", kind, entry->recipient, entry->path); crm_set_alert_key(CRM_alert_recipient, entry->recipient); crm_set_alert_key_int(CRM_alert_node_sequence, operations); crm_set_alert_key(CRM_alert_timestamp, timestamp); alert = services_action_create_generic(entry->path, NULL); alert->timeout = entry->timeout; alert->standard = strdup("event"); alert->id = strdup(entry->id); alert->agent = strdup(entry->path); alert->sequence = operations; crm_set_envvar_list(entry); alerts_inflight++; if(services_action_async(alert, &crmd_alert_complete) == FALSE) { services_action_free(alert); alerts_inflight--; } crm_unset_envvar_list(entry); } else { crm_warn("Ignoring '%s' alert to '%s' via '%s' received " "while shutting down", kind, entry->recipient, entry->path); } free(timestamp); } crm_unset_alert_keys(); if (now) { free(now); } } void crmd_alert_node_event(crm_node_t *node) { - if(!crm_alert_list) { + if (crmd_alert_list == NULL) { return; } crm_set_alert_key(CRM_alert_node, node->uname); crm_set_alert_key_int(CRM_alert_nodeid, node->id); crm_set_alert_key(CRM_alert_desc, node->state); send_alerts("node"); } void crmd_alert_fencing_op(stonith_event_t * e) { char *desc = NULL; - if (!crm_alert_list) { + if (crmd_alert_list == NULL) { return; } desc = crm_strdup_printf( "Operation %s of %s by %s for %s@%s: %s (ref=%s)", e->action, e->target, e->executioner ? e->executioner : "", e->client_origin, e->origin, pcmk_strerror(e->result), e->id); crm_set_alert_key(CRM_alert_node, e->target); crm_set_alert_key(CRM_alert_task, e->operation); crm_set_alert_key(CRM_alert_desc, desc); crm_set_alert_key_int(CRM_alert_rc, e->result); send_alerts("fencing"); free(desc); } void crmd_alert_resource_op(const char *node, lrmd_event_data_t * op) { int target_rc = 0; - if(!crm_alert_list) { + if (crmd_alert_list == NULL) { return; } target_rc = rsc_op_expected_rc(op); if(op->interval == 0 && target_rc == op->rc && safe_str_eq(op->op_type, RSC_STATUS)) { /* Leave it up to the script if they want to alert for * 'failed' probes, only swallow ones for which the result was * unexpected. * * Even if we find a resource running, it was probably because * someone erased the status section. */ return; } crm_set_alert_key(CRM_alert_node, node); crm_set_alert_key(CRM_alert_rsc, op->rsc_id); crm_set_alert_key(CRM_alert_task, op->op_type); crm_set_alert_key_int(CRM_alert_interval, op->interval); crm_set_alert_key_int(CRM_alert_target_rc, target_rc); crm_set_alert_key_int(CRM_alert_status, op->op_status); crm_set_alert_key_int(CRM_alert_rc, op->rc); if(op->op_status == PCMK_LRM_OP_DONE) { crm_set_alert_key(CRM_alert_desc, services_ocf_exitcode_str(op->rc)); } else { crm_set_alert_key(CRM_alert_desc, services_lrm_status_str(op->op_status)); } send_alerts("resource"); } static gboolean alert_drain_timeout_callback(gpointer user_data) { gboolean *timeout_popped = (gboolean *) user_data; *timeout_popped = TRUE; return FALSE; } void crmd_drain_alerts(GMainContext *ctx) { guint timer; gboolean timeout_popped = FALSE; draining_alerts = TRUE; timer = g_timeout_add(crm_alert_max_alert_timeout + 5000, alert_drain_timeout_callback, (gpointer) &timeout_popped); while(alerts_inflight && !timeout_popped) { crm_trace("Draining mainloop while still %d alerts are in flight (timeout=%dms)", alerts_inflight, crm_alert_max_alert_timeout + 5000); g_main_context_iteration(ctx, TRUE); } if (!timeout_popped && (timer > 0)) { g_source_remove(timer); } if (crm_alert_kind_default) { g_strfreev(crm_alert_kind_default); crm_alert_kind_default = NULL; } } diff --git a/crmd/crmd_alerts.h b/crmd/crmd_alerts.h index d31976ee08..fce9956918 100644 --- a/crmd/crmd_alerts.h +++ b/crmd/crmd_alerts.h @@ -1,30 +1,31 @@ /* * Copyright (C) 2015 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef CRMD_ALERT__H # define CRMD_ALERT__H # include # include # include +void crmd_unpack_alerts(xmlNode *alerts); void crmd_alert_node_event(crm_node_t *node); void crmd_alert_fencing_op(stonith_event_t *e); void crmd_alert_resource_op(const char *node, lrmd_event_data_t *op); void crmd_drain_alerts(GMainContext *ctx); #endif diff --git a/include/crm/common/alerts_internal.h b/include/crm/common/alerts_internal.h index 0791ac8e58..fd96f2ed9a 100644 --- a/include/crm/common/alerts_internal.h +++ b/include/crm/common/alerts_internal.h @@ -1,90 +1,86 @@ /* * Copyright (C) 2015 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef ALERT_INTERNAL_H #define ALERT_INTERNAL_H /* Default-Timeout to use before killing a alerts script (in milliseconds) */ # define CRM_ALERT_DEFAULT_TIMEOUT_MS (30000) /* Default-Format-String used to pass timestamps to the alerts scripts */ # define CRM_ALERT_DEFAULT_TSTAMP_FORMAT "%H:%M:%S.%06N" typedef struct { char *name; char *value; } crm_alert_envvar_t; typedef struct { char *id; char *path; int timeout; char *tstamp_format; char *recipient; char *select_kind_orig; char **select_kind; char *select_attribute_name_orig; char **select_attribute_name; GListPtr envvars; } crm_alert_entry_t; enum crm_alert_keys_e { CRM_alert_recipient = 0, CRM_alert_node, CRM_alert_nodeid, CRM_alert_rsc, CRM_alert_task, CRM_alert_interval, CRM_alert_desc, CRM_alert_status, CRM_alert_target_rc, CRM_alert_rc, CRM_alert_kind, CRM_alert_version, CRM_alert_node_sequence, CRM_alert_timestamp, CRM_alert_attribute_name, CRM_alert_attribute_value, CRM_alert_select_kind, CRM_alert_select_attribute_name }; #define CRM_ALERT_INTERNAL_KEY_MAX 16 #define CRM_ALERT_KEY_PATH "CRM_alert_path" #define CRM_ALERT_NODE_SEQUENCE "CRM_alert_node_sequence" #define CRM_ALERT_KIND_DEFAULT "node,fencing,resource" -#if (HAVE_ATOMIC_ATTRD == 0) -extern char *attrd_uname; -#endif -extern GListPtr crm_alert_list; extern guint crm_alert_max_alert_timeout; extern const char *crm_alert_keys[CRM_ALERT_INTERNAL_KEY_MAX][3]; extern char **crm_alert_kind_default; -void crm_free_alert_list(void); -GListPtr crm_drop_envvars(crm_alert_entry_t *entry, int count); -void crm_add_dup_alert_list_entry(crm_alert_entry_t *entry); -GListPtr crm_get_envvars_from_cib(xmlNode *basenode, crm_alert_entry_t *entry, int *count); +crm_alert_entry_t *crm_dup_alert_entry(crm_alert_entry_t *entry); +crm_alert_envvar_t *crm_dup_alert_envvar(crm_alert_envvar_t *src); +void crm_free_alert_entry(crm_alert_entry_t *entry); +void crm_free_alert_envvar(crm_alert_envvar_t *entry); void crm_set_alert_key(enum crm_alert_keys_e name, const char *value); void crm_set_alert_key_int(enum crm_alert_keys_e name, int value); void crm_unset_alert_keys(void); void crm_set_envvar_list(crm_alert_entry_t *entry); void crm_unset_envvar_list(crm_alert_entry_t *entry); gboolean crm_is_target_alert(char **list, const char *value); #endif diff --git a/include/crm/pengine/rules_internal.h b/include/crm/pengine/rules_internal.h index 28533e38ef..860692b2e1 100644 --- a/include/crm/pengine/rules_internal.h +++ b/include/crm/pengine/rules_internal.h @@ -1,18 +1,20 @@ /* * Copyright (C) 2015-2017 Andrew Beekhof * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef RULES_INTERNAL_H #define RULES_INTERNAL_H +#include #include -void pe_unpack_alerts(xmlNode *alerts); +GListPtr pe_unpack_alerts(xmlNode *alerts); +void pe_free_alert_list(GListPtr alert_list); #ifdef RHEL7_COMPAT void pe_enable_legacy_alerts(const char *script, const char *target); #endif #endif diff --git a/lib/common/alerts.c b/lib/common/alerts.c index 8d473e26a3..12bb5ccf42 100644 --- a/lib/common/alerts.c +++ b/lib/common/alerts.c @@ -1,298 +1,244 @@ /* * Copyright (C) 2015 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include -GListPtr crm_alert_list = NULL; guint crm_alert_max_alert_timeout = CRM_ALERT_DEFAULT_TIMEOUT_MS; char **crm_alert_kind_default = NULL; /* * to allow script compatibility we can have more than one * set of environment variables */ const char *crm_alert_keys[CRM_ALERT_INTERNAL_KEY_MAX][3] = { [CRM_alert_recipient] = {"CRM_notify_recipient", "CRM_alert_recipient", NULL}, [CRM_alert_node] = {"CRM_notify_node", "CRM_alert_node", NULL}, [CRM_alert_nodeid] = {"CRM_notify_nodeid", "CRM_alert_nodeid", NULL}, [CRM_alert_rsc] = {"CRM_notify_rsc", "CRM_alert_rsc", NULL}, [CRM_alert_task] = {"CRM_notify_task", "CRM_alert_task", NULL}, [CRM_alert_interval] = {"CRM_notify_interval", "CRM_alert_interval", NULL}, [CRM_alert_desc] = {"CRM_notify_desc", "CRM_alert_desc", NULL}, [CRM_alert_status] = {"CRM_notify_status", "CRM_alert_status", NULL}, [CRM_alert_target_rc] = {"CRM_notify_target_rc", "CRM_alert_target_rc", NULL}, [CRM_alert_rc] = {"CRM_notify_rc", "CRM_alert_rc", NULL}, [CRM_alert_kind] = {"CRM_notify_kind", "CRM_alert_kind", NULL}, [CRM_alert_version] = {"CRM_notify_version", "CRM_alert_version", NULL}, [CRM_alert_node_sequence] = {"CRM_notify_node_sequence", CRM_ALERT_NODE_SEQUENCE, NULL}, [CRM_alert_timestamp] = {"CRM_notify_timestamp", "CRM_alert_timestamp", NULL}, [CRM_alert_attribute_name] = {"CRM_notify_attribute_name", "CRM_alert_attribute_name", NULL}, [CRM_alert_attribute_value] = {"CRM_notify_attribute_value", "CRM_alert_attribute_value", NULL} }; -static void -free_envvar_entry(crm_alert_envvar_t *entry) +void +crm_free_alert_envvar(crm_alert_envvar_t *entry) { free(entry->name); free(entry->value); free(entry); } -static void -crm_free_alert_list_entry(crm_alert_entry_t *entry) +void +crm_free_alert_entry(crm_alert_entry_t *entry) { free(entry->id); free(entry->path); free(entry->tstamp_format); free(entry->recipient); free(entry->select_kind_orig); if (entry->select_kind) { g_strfreev(entry->select_kind); } free(entry->select_attribute_name_orig); if(entry->select_attribute_name) { g_strfreev(entry->select_attribute_name); } if (entry->envvars) { g_list_free_full(entry->envvars, - (GDestroyNotify) free_envvar_entry); + (GDestroyNotify) crm_free_alert_envvar); } free(entry); } -void -crm_free_alert_list() -{ - if (crm_alert_list) { - g_list_free_full(crm_alert_list, (GDestroyNotify) crm_free_alert_list_entry); - crm_alert_list = NULL; - } -} - -static gpointer -copy_envvar_entry(crm_alert_envvar_t * src, - gpointer data) +crm_alert_envvar_t * +crm_dup_alert_envvar(crm_alert_envvar_t *src) { crm_alert_envvar_t *dst = calloc(1, sizeof(crm_alert_envvar_t)); CRM_ASSERT(dst); dst->name = strdup(src->name); dst->value = src->value?strdup(src->value):NULL; - return (gpointer) dst; -} - -static GListPtr -add_dup_envvar(crm_alert_entry_t *entrys, - crm_alert_envvar_t *entry) -{ - entrys->envvars = g_list_prepend(entrys->envvars, copy_envvar_entry(entry, NULL)); - return entrys->envvars; -} - -GListPtr -crm_drop_envvars(crm_alert_entry_t *entry, int count) -{ - int i; - - for (i = 0; - (entry->envvars) && ((count < 0) || (i < count)); - i++) { - free_envvar_entry((crm_alert_envvar_t *) g_list_first(entry->envvars)->data); - entry->envvars = g_list_delete_link(entry->envvars, - g_list_first(entry->envvars)); - } - return entry->envvars; + return dst; } static GListPtr copy_envvar_list_remove_dupes(crm_alert_entry_t *entry) { GListPtr dst = NULL, ls, ld; /* we are adding to the front so variable dupes coming via * recipient-section have got precedence over those in the * global section - we don't expect that many variables here * that it pays off to go for a hash-table to make dupe elimination * more efficient - maybe later when we might decide to do more * with the variables than cycling through them */ for (ls = g_list_first(entry->envvars); ls; ls = g_list_next(ls)) { for (ld = g_list_first(dst); ld; ld = g_list_next(ld)) { if (!strcmp(((crm_alert_envvar_t *)(ls->data))->name, ((crm_alert_envvar_t *)(ld->data))->name)) { break; } } if (!ld) { - dst = g_list_prepend(dst, - copy_envvar_entry((crm_alert_envvar_t *)(ls->data), NULL)); + dst = g_list_prepend(dst, crm_dup_alert_envvar((crm_alert_envvar_t *)(ls->data))); } } return dst; } -void -crm_add_dup_alert_list_entry(crm_alert_entry_t *entry) +/*! + * \internal + * \brief Duplicate an alert entry + * + * \param[in] entry Alert entry to duplicate + * + * \return Duplicate of alert entry + */ +crm_alert_entry_t * +crm_dup_alert_entry(crm_alert_entry_t *entry) { crm_alert_entry_t *new_entry = (crm_alert_entry_t *) calloc(1, sizeof(crm_alert_entry_t)); CRM_ASSERT(new_entry); *new_entry = (crm_alert_entry_t) { .id = strdup(entry->id), .path = strdup(entry->path), .timeout = entry->timeout, .tstamp_format = entry->tstamp_format?strdup(entry->tstamp_format):NULL, .recipient = entry->recipient?strdup(entry->recipient):NULL, .select_kind_orig = entry->select_kind_orig?g_strdup(entry->select_kind_orig):NULL, .select_kind = entry->select_kind?g_strdupv(entry->select_kind):NULL, .select_attribute_name_orig = entry->select_attribute_name_orig?g_strdup(entry->select_attribute_name_orig):NULL, .select_attribute_name = entry->select_attribute_name?g_strdupv(entry->select_attribute_name):NULL, .envvars = entry->envvars? copy_envvar_list_remove_dupes(entry) :NULL }; - crm_alert_list = g_list_prepend(crm_alert_list, new_entry); + return new_entry; } -GListPtr -crm_get_envvars_from_cib(xmlNode *basenode, crm_alert_entry_t *entry, int *count) -{ - xmlNode *envvar; - xmlNode *pair; - - if ((!basenode) || - (!(envvar = first_named_child(basenode, XML_TAG_ATTR_SETS)))) { - return entry->envvars; - } - - for (pair = first_named_child(envvar, XML_CIB_TAG_NVPAIR); - pair; pair = __xml_next(pair)) { - - crm_alert_envvar_t envvar_entry = (crm_alert_envvar_t) { - .name = (char *) crm_element_value(pair, XML_NVPAIR_ATTR_NAME), - .value = (char *) crm_element_value(pair, XML_NVPAIR_ATTR_VALUE) - }; - crm_trace("Found environment variable %s = '%s'", envvar_entry.name, - envvar_entry.value?envvar_entry.value:""); - (*count)++; - add_dup_envvar(entry, &envvar_entry); - } - - return entry->envvars; -} - void crm_set_alert_key(enum crm_alert_keys_e name, const char *value) { const char **key; for (key = crm_alert_keys[name]; *key; key++) { crm_trace("Setting alert key %s = '%s'", *key, value); if (value) { setenv(*key, value, 1); } else { unsetenv(*key); } } } void crm_set_alert_key_int(enum crm_alert_keys_e name, int value) { char *s = crm_itoa(value); crm_set_alert_key(name, s); free(s); } void crm_unset_alert_keys() { const char **key; enum crm_alert_keys_e name; for(name = 0; name < DIMOF(crm_alert_keys); name++) { for(key = crm_alert_keys[name]; *key; key++) { crm_trace("Unsetting alert key %s", *key); unsetenv(*key); } } } void crm_set_envvar_list(crm_alert_entry_t *entry) { GListPtr l; for (l = g_list_first(entry->envvars); l; l = g_list_next(l)) { crm_alert_envvar_t *env = (crm_alert_envvar_t *)(l->data); crm_trace("Setting environment variable %s = '%s'", env->name, env->value?env->value:""); if (env->value) { setenv(env->name, env->value, 1); } else { unsetenv(env->name); } } } void crm_unset_envvar_list(crm_alert_entry_t *entry) { GListPtr l; for (l = g_list_first(entry->envvars); l; l = g_list_next(l)) { crm_alert_envvar_t *env = (crm_alert_envvar_t *)(l->data); crm_trace("Unsetting environment variable %s", env->name); unsetenv(env->name); } } gboolean crm_is_target_alert(char **list, const char *value) { int target_list_num = 0; gboolean rc = FALSE; if (list == NULL) return TRUE; target_list_num = g_strv_length(list); for( int cnt = 0; cnt < target_list_num; cnt++ ) { if (strcmp(list[cnt], value) == 0) { rc = TRUE; break; } } return rc; } diff --git a/lib/pengine/rules_alerts.c b/lib/pengine/rules_alerts.c index 16c9dae082..edbf01e15f 100644 --- a/lib/pengine/rules_alerts.c +++ b/lib/pengine/rules_alerts.c @@ -1,200 +1,272 @@ /* * Copyright (C) 2015-2017 Andrew Beekhof * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #ifdef RHEL7_COMPAT /* @COMPAT An early implementation of alerts was backported to RHEL 7, * even though it was never in an upstream release. */ static char *notify_script = NULL; static char *notify_target = NULL; void pe_enable_legacy_alerts(const char *script, const char *target) { free(notify_script); notify_script = (script && strcmp(script, "/dev/null"))? strdup(script) : NULL; free(notify_target); notify_target = target? strdup(target): NULL; } #endif static GHashTable * get_meta_attrs_from_cib(xmlNode *basenode, crm_alert_entry_t *entry, guint *max_timeout) { GHashTable *config_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); crm_time_t *now = crm_time_new(NULL); const char *value = NULL; unpack_instance_attributes(basenode, basenode, XML_TAG_META_SETS, NULL, config_hash, NULL, FALSE, now); value = g_hash_table_lookup(config_hash, XML_ALERT_ATTR_TIMEOUT); if (value) { entry->timeout = crm_get_msec(value); if (entry->timeout <= 0) { if (entry->timeout == 0) { crm_trace("Setting timeout to default %dmsec", CRM_ALERT_DEFAULT_TIMEOUT_MS); } else { crm_warn("Invalid timeout value setting to default %dmsec", CRM_ALERT_DEFAULT_TIMEOUT_MS); } entry->timeout = CRM_ALERT_DEFAULT_TIMEOUT_MS; } else { crm_trace("Found timeout %dmsec", entry->timeout); } if (entry->timeout > *max_timeout) { *max_timeout = entry->timeout; } } value = g_hash_table_lookup(config_hash, XML_ALERT_ATTR_TSTAMP_FORMAT); if (value) { /* hard to do any checks here as merely anything can * can be a valid time-format-string */ entry->tstamp_format = (char *) value; crm_trace("Found timestamp format string '%s'", value); } value = g_hash_table_lookup(config_hash, XML_ALERT_ATTR_SELECT_KIND); if (value) { entry->select_kind_orig = (char *) value; entry->select_kind = g_strsplit((char *) value, ",", 0); crm_trace("Found select_kind string '%s'", (char *) value); } value = g_hash_table_lookup(config_hash, XML_ALERT_ATTR_SELECT_ATTRIBUTE_NAME); if (value) { entry->select_attribute_name_orig = (char*) value; entry->select_attribute_name = g_strsplit((char*) value, ",", 0); crm_trace("Found attribute_name string '%s'", (char *) value); } crm_time_free(now); return config_hash; /* keep hash as long as strings are needed */ } -void +static void +drop_envvars(crm_alert_entry_t *entry, int count) +{ + int i; + + for (i = 0; entry->envvars && ((count < 0) || (i < count)); i++) { + GListPtr first = g_list_first(entry->envvars); + + crm_free_alert_envvar((crm_alert_envvar_t *) first->data); + entry->envvars = g_list_delete_link(entry->envvars, first); + } +} + +static GListPtr +get_envvars_from_cib(xmlNode *basenode, crm_alert_entry_t *entry, int *count) +{ + xmlNode *child; + + if (basenode == NULL) { + return entry->envvars; + } + + child = first_named_child(basenode, XML_TAG_ATTR_SETS); + if (child == NULL) { + return entry->envvars; + } + + for (child = first_named_child(child, XML_CIB_TAG_NVPAIR); child != NULL; + child = __xml_next(child)) { + + crm_alert_envvar_t envvar_entry = (crm_alert_envvar_t) { + .name = (char *) crm_element_value(child, XML_NVPAIR_ATTR_NAME), + .value = (char *) crm_element_value(child, XML_NVPAIR_ATTR_VALUE) + }; + crm_trace("Found environment variable %s = '%s'", envvar_entry.name, + (envvar_entry.value? envvar_entry.value : "")); + (*count)++; + entry->envvars = g_list_prepend(entry->envvars, + crm_dup_alert_envvar(&envvar_entry)); + } + return entry->envvars; +} + +/*! + * \internal + * \brief Unpack a CIB alerts section + * + * \param[in] alerts XML of alerts section + * + * \return List of unpacked alert entries + * + * \note Unlike most unpack functions, this is not used by the pengine itself, + * but is supplied for use by daemons that need to send alerts. + */ +GListPtr pe_unpack_alerts(xmlNode *alerts) { xmlNode *alert; crm_alert_entry_t entry; guint max_timeout = 0; + GListPtr alert_list = NULL; - crm_free_alert_list(); crm_alert_max_alert_timeout = CRM_ALERT_DEFAULT_TIMEOUT_MS; if (crm_alert_kind_default == NULL) { crm_alert_kind_default = g_strsplit(CRM_ALERT_KIND_DEFAULT, ",", 0); } if (alerts) { #ifdef RHEL7_COMPAT if (notify_script) { crm_warn("Ignoring deprecated notification configuration because alerts available"); } #endif } else { #ifdef RHEL7_COMPAT if (notify_script) { entry = (crm_alert_entry_t) { .id = (char *) "legacy_notification", .path = notify_script, .timeout = CRM_ALERT_DEFAULT_TIMEOUT_MS, .recipient = notify_target, .select_kind_orig = NULL, .select_kind = NULL, .select_attribute_name_orig = NULL, .select_attribute_name = NULL }; - crm_add_dup_alert_list_entry(&entry); + alert_list = g_list_prepend(alert_list, + crm_dup_alert_entry(&entry)); crm_warn("Deprecated notification syntax in use (alerts syntax is preferable)"); } #endif - return; + return alert_list; } for (alert = first_named_child(alerts, XML_CIB_TAG_ALERT); alert; alert = __xml_next(alert)) { xmlNode *recipient; int recipients = 0, envvars = 0; GHashTable *config_hash = NULL; entry = (crm_alert_entry_t) { .id = (char *) crm_element_value(alert, XML_ATTR_ID), .path = (char *) crm_element_value(alert, XML_ALERT_ATTR_PATH), .timeout = CRM_ALERT_DEFAULT_TIMEOUT_MS, .tstamp_format = (char *) CRM_ALERT_DEFAULT_TSTAMP_FORMAT, .select_kind_orig = NULL, .select_kind = NULL, .select_attribute_name_orig = NULL, .select_attribute_name = NULL }; - crm_get_envvars_from_cib(alert, &entry, &envvars); + get_envvars_from_cib(alert, &entry, &envvars); config_hash = get_meta_attrs_from_cib(alert, &entry, &max_timeout); crm_debug("Found alert %s with path=%s timeout=%d tstamp_format=%s " "select_kind=%s select_attribute_name=%s " "%d additional environment variables", entry.id, entry.path, entry.timeout, entry.tstamp_format, entry.select_kind_orig, entry.select_attribute_name_orig, envvars); for (recipient = first_named_child(alert, XML_CIB_TAG_ALERT_RECIPIENT); recipient != NULL; recipient = __xml_next(recipient)) { int envvars_added = 0; entry.recipient = (char *) crm_element_value(recipient, XML_ALERT_ATTR_REC_VALUE); recipients++; - crm_get_envvars_from_cib(recipient, &entry, &envvars_added); + get_envvars_from_cib(recipient, &entry, &envvars_added); { crm_alert_entry_t recipient_entry = entry; GHashTable *config_hash = get_meta_attrs_from_cib(recipient, &recipient_entry, &max_timeout); - crm_add_dup_alert_list_entry(&recipient_entry); + alert_list = g_list_prepend(alert_list, + crm_dup_alert_entry(&recipient_entry)); crm_debug("Alert has recipient: id=%s, value=%s, " "%d additional environment variables", crm_element_value(recipient, XML_ATTR_ID), recipient_entry.recipient, envvars_added); g_hash_table_destroy(config_hash); } - crm_drop_envvars(&entry, envvars_added); + drop_envvars(&entry, envvars_added); } if (recipients == 0) { - crm_add_dup_alert_list_entry(&entry); + alert_list = g_list_prepend(alert_list, + crm_dup_alert_entry(&entry)); } - crm_drop_envvars(&entry, -1); + drop_envvars(&entry, -1); g_hash_table_destroy(config_hash); } if (max_timeout > 0) { crm_alert_max_alert_timeout = max_timeout; } + return alert_list; +} + +/*! + * \internal + * \brief Free an alert list generated by pe_unpack_alerts() + * + * \param[in] alert_list Alert list to free + */ +void +pe_free_alert_list(GListPtr alert_list) +{ + if (alert_list) { + g_list_free_full(alert_list, (GDestroyNotify) crm_free_alert_entry); + } }