diff --git a/attrd/attrd_alerts.c b/attrd/attrd_alerts.c index d187fab7b2..58562e8c14 100644 --- a/attrd/attrd_alerts.c +++ b/attrd/attrd_alerts.c @@ -1,536 +1,380 @@ /* * Copyright (C) 2015 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include -#include #include #include #include #include "attrd_alerts.h" #include -#include #include +#include +#include GHashTable *alert_info_cache = NULL; lrmd_t * attrd_lrmd_connect(int max_retry, void callback(lrmd_event_data_t * op)) { int ret = -ENOTCONN; int fails = 0; if (!the_lrmd) { the_lrmd = lrmd_api_new(); } while(fails < max_retry) { the_lrmd->cmds->set_callback(the_lrmd, callback); ret = the_lrmd->cmds->connect(the_lrmd, T_ATTRD, NULL); if (ret != pcmk_ok) { fails++; crm_trace("lrmd_connect RETRY!(%d)", fails); } else { crm_trace("lrmd_connect OK!"); break; } } if (ret != pcmk_ok) { if (the_lrmd->cmds->is_connected(the_lrmd)) { lrmd_api_delete(the_lrmd); } the_lrmd = NULL; } return the_lrmd; } -static void -attrd_parse_alerts(xmlNode *notifications) -{ - xmlNode *alert; - crm_alert_entry_t entry; - guint max_timeout = 0; - - crm_free_alert_list(); - crm_alert_max_alert_timeout = CRM_ALERT_DEFAULT_TIMEOUT_MS; - - if (crm_alert_kind_default == NULL) { - crm_alert_kind_default = g_strsplit(CRM_ALERT_KIND_DEFAULT, ",", 0); - } - - if (notifications) { - crm_info("We have an alerts section in the cib"); - } else { - crm_info("No optional alerts section in cib"); - return; - } - - for (alert = first_named_child(notifications, XML_CIB_TAG_ALERT); - alert; alert = __xml_next(alert)) { - xmlNode *recipient; - int recipients = 0, envvars = 0; - GHashTable *config_hash = NULL; - - entry = (crm_alert_entry_t) { - .id = (char *) crm_element_value(alert, XML_ATTR_ID), - .path = (char *) crm_element_value(alert, XML_ALERT_ATTR_PATH), - .timeout = CRM_ALERT_DEFAULT_TIMEOUT_MS, - .tstamp_format = (char *) CRM_ALERT_DEFAULT_TSTAMP_FORMAT, - .select_kind_orig = NULL, - .select_kind = NULL, - .select_attribute_name_orig = NULL, - .select_attribute_name = NULL - }; - - crm_get_envvars_from_cib(alert, - &entry, - &envvars); - - config_hash = - get_meta_attrs_from_cib(alert, &entry, &max_timeout); - - crm_debug("Found alert: id=%s, path=%s, timeout=%d, " - "tstamp_format=%s, select_kind=%s, select_attribute_name=%s, %d additional environment variables", - entry.id, entry.path, entry.timeout, - entry.tstamp_format, entry.select_kind_orig, entry.select_attribute_name_orig, envvars); - - for (recipient = first_named_child(alert, - XML_CIB_TAG_ALERT_RECIPIENT); - recipient; recipient = __xml_next(recipient)) { - int envvars_added = 0; - - entry.recipient = (char *) crm_element_value(recipient, - XML_ALERT_ATTR_REC_VALUE); - recipients++; - - crm_get_envvars_from_cib(recipient, - &entry, - &envvars_added); - - { - crm_alert_entry_t recipient_entry = entry; - GHashTable *config_hash = - get_meta_attrs_from_cib(recipient, - &recipient_entry, - &max_timeout); - - crm_add_dup_alert_list_entry(&recipient_entry); - - crm_debug("Alert has recipient: id=%s, value=%s, " - "%d additional environment variables", - crm_element_value(recipient, XML_ATTR_ID), - recipient_entry.recipient, envvars_added); - - g_hash_table_destroy(config_hash); - } - - entry.envvars = - crm_drop_envvars(&entry, envvars_added); - } - - if (recipients == 0) { - crm_add_dup_alert_list_entry(&entry); - } - - crm_drop_envvars(&entry, -1); - g_hash_table_destroy(config_hash); - } - - if (max_timeout > 0) { - crm_alert_max_alert_timeout = max_timeout; - } -} - static void config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { crm_time_t *now = crm_time_new(NULL); xmlNode *crmalerts = NULL; if (rc != pcmk_ok) { crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc)); goto bail; } crmalerts = output; if ((crmalerts) && (crm_element_name(crmalerts)) && (strcmp(crm_element_name(crmalerts), XML_CIB_TAG_ALERTS) != 0)) { crmalerts = first_named_child(crmalerts, XML_CIB_TAG_ALERTS); } if (!crmalerts) { crm_err("Local CIB query for " XML_CIB_TAG_ALERTS " section failed"); goto bail; } - attrd_parse_alerts(crmalerts); + pe_unpack_alerts(crmalerts); bail: crm_time_free(now); } gboolean attrd_read_options(gpointer user_data) { int call_id; if (the_cib) { call_id = the_cib->cmds->query(the_cib, "//" XML_CIB_TAG_ALERTS, NULL, cib_xpath | cib_scope_local); the_cib->cmds->register_callback_full(the_cib, call_id, 120, FALSE, NULL, "config_query_callback", config_query_callback, free); crm_trace("Querying the CIB... call %d", call_id); } else { crm_err("Querying the CIB...CIB connection not active"); } return TRUE; } void attrd_cib_updated_cb(const char *event, xmlNode * msg) { int rc = -1; int format= 1; xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); xmlNode *change = NULL; xmlXPathObject *xpathObj = NULL; CRM_CHECK(msg != NULL, return); crm_element_value_int(msg, F_CIB_RC, &rc); if (rc < pcmk_ok) { crm_trace("Filter rc=%d (%s)", rc, pcmk_strerror(rc)); return; } crm_element_value_int(patchset, "format", &format); if (format == 1) { if ((xpathObj = xpath_search( msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_ALERTS )) != NULL) { freeXpathObject(xpathObj); mainloop_set_trigger(attrd_config_read); } } else if (format == 2) { for (change = __xml_first_child(patchset); change != NULL; change = __xml_next(change)) { const char *xpath = crm_element_value(change, XML_DIFF_PATH); if (xpath == NULL) { continue; } /* modifying properties */ if (!strstr(xpath, "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_ALERTS)) { xmlNode *section = NULL; const char *name = NULL; /* adding notifications section */ if ((strcmp(xpath, "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION) != 0) || ((section = __xml_first_child(change)) == NULL) || ((name = crm_element_name(section)) == NULL) || (strcmp(name, XML_CIB_TAG_ALERTS) != 0)) { continue; } } mainloop_set_trigger(attrd_config_read); break; } } else { crm_warn("Unknown patch format: %d", format); } } -GHashTable * -get_meta_attrs_from_cib(xmlNode *basenode, crm_alert_entry_t *entry, - guint *max_timeout) -{ - GHashTable *config_hash = - g_hash_table_new_full(crm_str_hash, g_str_equal, - g_hash_destroy_str, g_hash_destroy_str); - crm_time_t *now = crm_time_new(NULL); - const char *value = NULL; - - unpack_instance_attributes(basenode, basenode, XML_TAG_META_SETS, NULL, - config_hash, NULL, FALSE, now); - - value = g_hash_table_lookup(config_hash, XML_ALERT_ATTR_TIMEOUT); - if (value) { - entry->timeout = crm_get_msec(value); - if (entry->timeout <= 0) { - if (entry->timeout == 0) { - crm_trace("Setting timeout to default %dmsec", - CRM_ALERT_DEFAULT_TIMEOUT_MS); - } else { - crm_warn("Invalid timeout value setting to default %dmsec", - CRM_ALERT_DEFAULT_TIMEOUT_MS); - } - entry->timeout = CRM_ALERT_DEFAULT_TIMEOUT_MS; - } else { - crm_trace("Found timeout %dmsec", entry->timeout); - } - if (entry->timeout > *max_timeout) { - *max_timeout = entry->timeout; - } - } - value = g_hash_table_lookup(config_hash, XML_ALERT_ATTR_TSTAMP_FORMAT); - if (value) { - /* hard to do any checks here as merely anything can - * can be a valid time-format-string - */ - entry->tstamp_format = (char *) value; - crm_trace("Found timestamp format string '%s'", value); - } - - value = g_hash_table_lookup(config_hash, XML_ALERT_ATTR_SELECT_KIND); - if (value) { - entry->select_kind_orig = (char*) value; - entry->select_kind = g_strsplit((char*) value, ",", 0); - crm_trace("Found select_kind string '%s'", (char *) value); - } - - value = g_hash_table_lookup(config_hash, XML_ALERT_ATTR_SELECT_ATTRIBUTE_NAME); - if (value) { - entry->select_attribute_name_orig = (char*) value; - entry->select_attribute_name = g_strsplit((char*) value, ",", 0); - crm_trace("Found attribute_name string '%s'", (char *) value); - } - - crm_time_free(now); - return config_hash; /* keep hash as long as strings are needed */ -} - void attrd_alert_fini() { if (alert_info_cache) { g_hash_table_destroy(alert_info_cache); alert_info_cache = NULL; } if (crm_alert_kind_default) { g_strfreev(crm_alert_kind_default); crm_alert_kind_default = NULL; } } static int exec_alerts(lrmd_t *lrmd, const char *kind, const char *attribute_name, lrmd_key_value_t * params, GListPtr alert_list, GHashTable *info_cache) { int call_id = 0; static int operations = 0; GListPtr l; crm_time_hr_t *now = crm_time_hr_new(NULL); params = lrmd_set_alert_key_to_lrmd_params(params, CRM_alert_kind, kind); params = lrmd_set_alert_key_to_lrmd_params(params, CRM_alert_version, VERSION); for (l = g_list_first(alert_list); l; l = g_list_next(l)) { lrmd_rsc_info_t *rsc = NULL; crm_alert_entry_t *entry = (crm_alert_entry_t *)(l->data); char *timestamp = crm_time_format_hr(entry->tstamp_format, now); lrmd_key_value_t * copy_params = NULL; lrmd_key_value_t *head, *p; if (crm_is_target_alert(entry->select_kind == NULL ? crm_alert_kind_default : entry->select_kind, kind) == FALSE) { crm_trace("Cannot sending '%s' alert to '%s' via '%s'(select_kind=%s)", kind, entry->recipient, entry->path, entry->select_kind == NULL ? CRM_ALERT_KIND_DEFAULT : entry->select_kind_orig); free(timestamp); continue; } if (crm_is_target_alert(entry->select_attribute_name, attribute_name) == FALSE) { crm_trace("Cannot sending '%s' alert to '%s' via '%s'(select_attribute_name=%s attribute_name=%s)", kind, entry->recipient, entry->path, entry->select_attribute_name_orig, attribute_name); free(timestamp); continue; } crm_info("Sending '%s' alert to '%s' via '%s'", kind, entry->recipient, entry->path); rsc = g_hash_table_lookup(alert_info_cache, entry->id); if (rsc == NULL) { rsc = lrmd->cmds->get_rsc_info(lrmd, entry->id, 0); if (!rsc) { lrmd->cmds->register_rsc(lrmd, entry->id, PCMK_ALERT_CLASS, "pacemaker", entry->path, lrmd_opt_drop_recurring); rsc = lrmd->cmds->get_rsc_info(lrmd, entry->id, 0); if (!rsc) { crm_err("Could not add alert %s : %s", entry->id, entry->path); return -1; } /* cache the result */ g_hash_table_insert(alert_info_cache, entry->id, rsc); } } /* Because there is a parameter to turn into every transmission, Copy a parameter. */ head = params; while (head) { p = head->next; copy_params = lrmd_key_value_add(copy_params, head->key, head->value); head = p; } operations++; copy_params = lrmd_key_value_add(copy_params, CRM_ALERT_KEY_PATH, entry->path); copy_params = lrmd_set_alert_key_to_lrmd_params(copy_params, CRM_alert_recipient, entry->recipient); copy_params = lrmd_set_alert_key_to_lrmd_params(copy_params, CRM_alert_node_sequence, crm_itoa(operations)); copy_params = lrmd_set_alert_key_to_lrmd_params(copy_params, CRM_alert_timestamp, timestamp); lrmd_set_alert_envvar_to_lrmd_params(copy_params, entry); call_id = lrmd->cmds->exec_alert(lrmd, strdup(entry->id), entry->timeout, lrmd_opt_notify_orig_only, copy_params); if (call_id <= 0) { crm_err("Operation %s on %s failed: %d", "start", rsc->id, call_id); } else { crm_info("Operation %s on %s compete: %d", "start", rsc->id, call_id); } free(timestamp); } if (now) { free(now); } return call_id; } static void free_alert_info(gpointer value) { lrmd_rsc_info_t *rsc_info = value; lrmd_free_rsc_info(rsc_info); } static void attrd_alert_lrm_op_callback(lrmd_event_data_t * op) { CRM_CHECK(op != NULL, return); if (op->type == lrmd_event_disconnect) { crm_info("Lost connection to LRMD service!"); if (the_lrmd->cmds->is_connected(the_lrmd)) { the_lrmd->cmds->disconnect(the_lrmd); lrmd_api_delete(the_lrmd); } the_lrmd = NULL; return; } else if (op->type != lrmd_event_exec_complete) { return; } if (op->params != NULL) { void *value_tmp1, *value_tmp2; value_tmp1 = g_hash_table_lookup(op->params, CRM_ALERT_KEY_PATH); if (value_tmp1 != NULL) { value_tmp2 = g_hash_table_lookup(op->params, CRM_ALERT_NODE_SEQUENCE); if(op->rc == 0) { crm_info("Alert %s (%s) complete", value_tmp2, value_tmp1); } else { crm_warn("Alert %s (%s) failed: %d", value_tmp2, value_tmp1, op->rc); } } } } int attrd_send_alerts(lrmd_t *lrmd, const char *node, uint32_t nodeid, const char *attribute_name, const char *attribute_value, GListPtr alert_list) { int ret = pcmk_ok; lrmd_key_value_t *params = NULL; if (lrmd == NULL) { lrmd = attrd_lrmd_connect(10, attrd_alert_lrm_op_callback); if (lrmd == NULL) { crm_warn("LRMD connection not active"); return ret; } } crm_trace("LRMD connection active"); if (alert_info_cache == NULL) { alert_info_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_alert_info); } params = lrmd_set_alert_key_to_lrmd_params(params, CRM_alert_node, node); params = lrmd_set_alert_key_to_lrmd_params(params, CRM_alert_nodeid, crm_itoa(nodeid)); params = lrmd_set_alert_key_to_lrmd_params(params, CRM_alert_attribute_name, attribute_name); params = lrmd_set_alert_key_to_lrmd_params(params, CRM_alert_attribute_value, attribute_value == NULL ? "null" : attribute_value); ret = exec_alerts(lrmd, "attribute", attribute_name, params, alert_list, alert_info_cache); crm_trace("ret : %d, node : %s, nodeid: %s, name: %s, value : %s", ret, node, crm_itoa(nodeid), attribute_name, attribute_value); if (params) { lrmd_key_value_freeall(params); } return ret; } #if HAVE_ATOMIC_ATTRD void set_alert_attribute_value(GHashTable *t, attribute_value_t *v) { attribute_value_t *a_v = NULL; a_v = calloc(1, sizeof(attribute_value_t)); CRM_ASSERT(a_v != NULL); a_v->nodeid = v->nodeid; a_v->nodename = strdup(v->nodename); if (v->current != NULL) { a_v->current = strdup(v->current); } g_hash_table_replace(t, a_v->nodename, a_v); } void send_alert_attributes_value(attribute_t *a, GHashTable *t) { int call_id = 0; attribute_value_t *at = NULL; GHashTableIter vIter; g_hash_table_iter_init(&vIter, t); while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & at)) { call_id = attrd_send_alerts(the_lrmd, at->nodename, at->nodeid, a->id, at->current, crm_alert_list); crm_trace("call_id : %d, nodename : %s, nodeid: %d, name: %s, value : %s", call_id, at->nodename, at->nodeid, a->id, at->current); } } #endif diff --git a/attrd/attrd_alerts.h b/attrd/attrd_alerts.h index 6949c1f1bf..ecd50e1e0f 100644 --- a/attrd/attrd_alerts.h +++ b/attrd/attrd_alerts.h @@ -1,42 +1,41 @@ /* * Copyright (C) 2015 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef ATTRD_ALERT__H # define ATTRD_ALERT__H # include # include # include extern cib_t *the_cib; extern lrmd_t *the_lrmd; extern crm_trigger_t *attrd_config_read; lrmd_t *attrd_lrmd_connect(int max_retry, void callback(lrmd_event_data_t * op)); gboolean attrd_read_options(gpointer user_data); void attrd_cib_updated_cb(const char *event, xmlNode * msg); -GHashTable *get_meta_attrs_from_cib(xmlNode *basenode, crm_alert_entry_t *entry, guint *max_timeout); void attrd_enable_alerts(const char *script, const char *target); void attrd_alert_fini(void); int attrd_send_alerts(lrmd_t *lrmd, const char *node, uint32_t nodeid, const char *attribute_name, const char *attribute_value, GListPtr alert_list); #if HAVE_ATOMIC_ATTRD void set_alert_attribute_value(GHashTable *t, attribute_value_t *v); void send_alert_attributes_value(attribute_t *a, GHashTable *t); #endif #endif diff --git a/crmd/control.c b/crmd/control.c index 6b4aa79302..f8d78d4f5b 100644 --- a/crmd/control.c +++ b/crmd/control.c @@ -1,1170 +1,1171 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include +#include #include #include #include #include #include #include #include #include #include #include #include #include #include qb_ipcs_service_t *ipcs = NULL; extern gboolean crm_connect_corosync(crm_cluster_t * cluster); extern void crmd_ha_connection_destroy(gpointer user_data); void crm_shutdown(int nsig); gboolean crm_read_options(gpointer user_data); gboolean fsa_has_quorum = FALSE; crm_trigger_t *fsa_source = NULL; crm_trigger_t *config_read = NULL; bool no_quorum_suicide_escalation = FALSE; static gboolean election_timeout_popped(gpointer data) { /* Not everyone voted */ crm_info("Election failed: Declaring ourselves the winner"); register_fsa_input(C_TIMER_POPPED, I_ELECTION_DC, NULL); return FALSE; } /* A_HA_CONNECT */ void do_ha_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { gboolean registered = FALSE; static crm_cluster_t *cluster = NULL; if (cluster == NULL) { cluster = calloc(1, sizeof(crm_cluster_t)); } if (action & A_HA_DISCONNECT) { crm_cluster_disconnect(cluster); crm_info("Disconnected from the cluster"); set_bit(fsa_input_register, R_HA_DISCONNECTED); } if (action & A_HA_CONNECT) { crm_set_status_callback(&peer_update_callback); crm_set_autoreap(FALSE); if (is_openais_cluster()) { #if SUPPORT_COROSYNC registered = crm_connect_corosync(cluster); #endif } else if (is_heartbeat_cluster()) { #if SUPPORT_HEARTBEAT cluster->destroy = crmd_ha_connection_destroy; cluster->hb_dispatch = crmd_ha_msg_callback; registered = crm_cluster_connect(cluster); fsa_cluster_conn = cluster->hb_conn; crm_trace("Be informed of Node Status changes"); if (registered && fsa_cluster_conn->llc_ops->set_nstatus_callback(fsa_cluster_conn, crmd_ha_status_callback, fsa_cluster_conn) != HA_OK) { crm_err("Cannot set nstatus callback: %s", fsa_cluster_conn->llc_ops->errmsg(fsa_cluster_conn)); registered = FALSE; } crm_trace("Be informed of CRM Client Status changes"); if (registered && fsa_cluster_conn->llc_ops->set_cstatus_callback(fsa_cluster_conn, crmd_client_status_callback, fsa_cluster_conn) != HA_OK) { crm_err("Cannot set cstatus callback: %s", fsa_cluster_conn->llc_ops->errmsg(fsa_cluster_conn)); registered = FALSE; } if (registered) { crm_trace("Requesting an initial dump of CRMD client_status"); fsa_cluster_conn->llc_ops->client_status(fsa_cluster_conn, NULL, CRM_SYSTEM_CRMD, -1); } #endif } fsa_election = election_init(NULL, cluster->uname, 60000/*60s*/, election_timeout_popped); fsa_our_uname = cluster->uname; fsa_our_uuid = cluster->uuid; if(cluster->uuid == NULL) { crm_err("Could not obtain local uuid"); registered = FALSE; } if (registered == FALSE) { set_bit(fsa_input_register, R_HA_DISCONNECTED); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } populate_cib_nodes(node_update_none, __FUNCTION__); clear_bit(fsa_input_register, R_HA_DISCONNECTED); crm_info("Connected to the cluster"); } if (action & ~(A_HA_CONNECT | A_HA_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } } static bool need_spawn_pengine_from_crmd(void) { static int result = -1; if (result != -1) return result; if (!is_heartbeat_cluster()) { result = 0; return result; } /* NULL, or "strange" value: rather spawn from here. */ result = TRUE; crm_str_to_boolean(daemon_option("crmd_spawns_pengine"), &result); return result; } /* A_SHUTDOWN */ void do_shutdown(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { /* just in case */ set_bit(fsa_input_register, R_SHUTDOWN); if (need_spawn_pengine_from_crmd()) { if (is_set(fsa_input_register, pe_subsystem->flag_connected)) { crm_info("Terminating the %s", pe_subsystem->name); if (stop_subsystem(pe_subsystem, TRUE) == FALSE) { /* It's gone ... */ crm_err("Faking %s exit", pe_subsystem->name); clear_bit(fsa_input_register, pe_subsystem->flag_connected); } else { crm_info("Waiting for subsystems to exit"); crmd_fsa_stall(FALSE); } } crm_info("All subsystems stopped, continuing"); } if (stonith_api) { /* Prevent it from coming up again */ clear_bit(fsa_input_register, R_ST_REQUIRED); crm_info("Disconnecting STONITH..."); stonith_api->cmds->disconnect(stonith_api); } } /* A_SHUTDOWN_REQ */ void do_shutdown_req(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { xmlNode *msg = NULL; set_bit(fsa_input_register, R_SHUTDOWN); crm_info("Sending shutdown request to all peers (DC is %s)", (fsa_our_dc? fsa_our_dc : "not set")); msg = create_request(CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); /* set_bit(fsa_input_register, R_STAYDOWN); */ if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } free_xml(msg); } extern crm_ipc_t *attrd_ipc; extern char *max_generation_from; extern xmlNode *max_generation_xml; extern GHashTable *resource_history; extern GHashTable *voted; extern char *te_client_id; extern regex_t *version_format_regex; void log_connected_client(gpointer key, gpointer value, gpointer user_data); void log_connected_client(gpointer key, gpointer value, gpointer user_data) { crm_client_t *client = value; crm_err("%s is still connected at exit", crm_client_name(client)); } int crmd_fast_exit(int rc) { if (is_set(fsa_input_register, R_STAYDOWN)) { crm_warn("Inhibiting respawn "CRM_XS" remapping exit code %d to %d", rc, DAEMON_RESPAWN_STOP); rc = DAEMON_RESPAWN_STOP; } if (rc == pcmk_ok && is_set(fsa_input_register, R_IN_RECOVERY)) { crm_err("Could not recover from internal error"); rc = pcmk_err_generic; } return crm_exit(rc); } int crmd_exit(int rc) { GListPtr gIter = NULL; GMainLoop *mloop = crmd_mainloop; static bool in_progress = FALSE; if(in_progress && rc == 0) { crm_debug("Exit is already in progress"); return rc; } else if(in_progress) { crm_notice("Error during shutdown process, terminating now with status %d: %s", rc, pcmk_strerror(rc)); crm_write_blackbox(SIGTRAP, NULL); crmd_fast_exit(rc); } in_progress = TRUE; crm_trace("Preparing to exit: %d", rc); /* Suppress secondary errors resulting from us disconnecting everything */ set_bit(fsa_input_register, R_HA_DISCONNECTED); /* Close all IPC servers and clients to ensure any and all shared memory files are cleaned up */ if(ipcs) { crm_trace("Closing IPC server"); mainloop_del_ipc_server(ipcs); ipcs = NULL; } if (attrd_ipc) { crm_trace("Closing attrd connection"); crm_ipc_close(attrd_ipc); crm_ipc_destroy(attrd_ipc); attrd_ipc = NULL; } if (pe_subsystem && pe_subsystem->client && pe_subsystem->client->ipcs) { crm_trace("Disconnecting Policy Engine"); qb_ipcs_disconnect(pe_subsystem->client->ipcs); } if(stonith_api) { crm_trace("Disconnecting fencing API"); clear_bit(fsa_input_register, R_ST_REQUIRED); stonith_api->cmds->free(stonith_api); stonith_api = NULL; } if (rc == pcmk_ok && crmd_mainloop == NULL) { crm_debug("No mainloop detected"); rc = EPROTO; } /* On an error, just get out. * * Otherwise, make the effort to have mainloop exit gracefully so * that it (mostly) cleans up after itself and valgrind has less * to report on - allowing real errors stand out */ if(rc != pcmk_ok) { crm_notice("Forcing immediate exit with status %d: %s", rc, pcmk_strerror(rc)); crm_write_blackbox(SIGTRAP, NULL); return crmd_fast_exit(rc); } /* Clean up as much memory as possible for valgrind */ for (gIter = fsa_message_queue; gIter != NULL; gIter = gIter->next) { fsa_data_t *fsa_data = gIter->data; crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]", fsa_input2string(fsa_data->fsa_input), fsa_state2string(fsa_state), fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); delete_fsa_input(fsa_data); } clear_bit(fsa_input_register, R_MEMBERSHIP); g_list_free(fsa_message_queue); fsa_message_queue = NULL; free(pe_subsystem); pe_subsystem = NULL; free(te_subsystem); te_subsystem = NULL; free(cib_subsystem); cib_subsystem = NULL; if (version_format_regex) { regfree(version_format_regex); free(version_format_regex); } election_fini(fsa_election); fsa_election = NULL; /* Tear down the CIB connection, but don't free it yet -- it could be used * when we drain the mainloop later. */ cib_free_callbacks(fsa_cib_conn); fsa_cib_conn->cmds->signoff(fsa_cib_conn); verify_stopped(fsa_state, LOG_WARNING); clear_bit(fsa_input_register, R_LRM_CONNECTED); lrm_state_destroy_all(); /* This basically will not work, since mainloop has a reference to it */ mainloop_destroy_trigger(fsa_source); fsa_source = NULL; mainloop_destroy_trigger(config_read); config_read = NULL; mainloop_destroy_trigger(stonith_reconnect); stonith_reconnect = NULL; mainloop_destroy_trigger(transition_trigger); transition_trigger = NULL; crm_client_cleanup(); crm_peer_destroy(); crm_timer_stop(transition_timer); crm_timer_stop(integration_timer); crm_timer_stop(finalization_timer); crm_timer_stop(election_trigger); election_timeout_stop(fsa_election); crm_timer_stop(shutdown_escalation_timer); crm_timer_stop(wait_timer); crm_timer_stop(recheck_timer); free(transition_timer); transition_timer = NULL; free(integration_timer); integration_timer = NULL; free(finalization_timer); finalization_timer = NULL; free(election_trigger); election_trigger = NULL; free(shutdown_escalation_timer); shutdown_escalation_timer = NULL; free(wait_timer); wait_timer = NULL; free(recheck_timer); recheck_timer = NULL; free(fsa_our_dc_version); fsa_our_dc_version = NULL; free(fsa_our_uname); fsa_our_uname = NULL; free(fsa_our_uuid); fsa_our_uuid = NULL; free(fsa_our_dc); fsa_our_dc = NULL; free(fsa_cluster_name); fsa_cluster_name = NULL; free(te_uuid); te_uuid = NULL; free(te_client_id); te_client_id = NULL; free(fsa_pe_ref); fsa_pe_ref = NULL; free(failed_stop_offset); failed_stop_offset = NULL; free(failed_start_offset); failed_start_offset = NULL; free(max_generation_from); max_generation_from = NULL; free_xml(max_generation_xml); max_generation_xml = NULL; mainloop_destroy_signal(SIGPIPE); mainloop_destroy_signal(SIGUSR1); mainloop_destroy_signal(SIGTERM); mainloop_destroy_signal(SIGTRAP); /* leave SIGCHLD engaged as we might still want to drain some service-actions */ if (mloop) { GMainContext *ctx = g_main_loop_get_context(crmd_mainloop); /* Don't re-enter this block */ crmd_mainloop = NULL; crmd_drain_alerts(ctx); /* no signals on final draining anymore */ mainloop_destroy_signal(SIGCHLD); crm_trace("Draining mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx)); { int lpc = 0; while((g_main_context_pending(ctx) && lpc < 10)) { lpc++; crm_trace("Iteration %d", lpc); g_main_context_dispatch(ctx); } } crm_trace("Closing mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx)); g_main_loop_quit(mloop); #if SUPPORT_HEARTBEAT /* Do this only after g_main_loop_quit(). * * This interface was broken (incomplete) since it was introduced. * ->delete() does cleanup and free most of it, but it does not * actually remove and destroy the corresponding GSource, so the next * prepare/check iteratioin would find a corrupt (because partially * freed) GSource, and segfault. * * Apparently one was supposed to store the GSource as returned by * G_main_add_ll_cluster(), and g_source_destroy() that "by hand". * * But no-one ever did this, not even in the old hb code when this was * introduced. * * Note that fsa_cluster_conn was set as an "alias" to cluster->hb_conn * in do_ha_control() right after crm_cluster_connect(), and only * happens to still point at that object, because do_ha_control() does * not reset it to NULL after crm_cluster_disconnect() above does * reset cluster->hb_conn to NULL. * Not sure if that's something to cleanup, too. * * I'll try to fix this up in heartbeat proper, so ->delete * will actually remove, and destroy, and unref, and free this thing. * Doing so after g_main_loop_quit() is valid with both old, * and eventually fixed heartbeat. * * If we introduce the "by hand" destroy/remove/unref, * this may break again once heartbeat is fixed :-( * * -- Lars Ellenberg */ if (fsa_cluster_conn) { crm_trace("Deleting heartbeat api object"); fsa_cluster_conn->llc_ops->delete(fsa_cluster_conn); fsa_cluster_conn = NULL; } #endif /* Won't do anything yet, since we're inside it now */ g_main_loop_unref(mloop); crm_trace("Done %d", rc); } else { mainloop_destroy_signal(SIGCHLD); } cib_delete(fsa_cib_conn); fsa_cib_conn = NULL; throttle_fini(); /* Graceful */ return rc; } /* A_EXIT_0, A_EXIT_1 */ void do_exit(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { int exit_code = pcmk_ok; int log_level = LOG_INFO; const char *exit_type = "gracefully"; if (action & A_EXIT_1) { /* exit_code = pcmk_err_generic; */ log_level = LOG_ERR; exit_type = "forcefully"; exit_code = pcmk_err_generic; } verify_stopped(cur_state, LOG_ERR); do_crm_log(log_level, "Performing %s - %s exiting the CRMd", fsa_action2string(action), exit_type); crm_info("[%s] stopped (%d)", crm_system_name, exit_code); crmd_exit(exit_code); } static void sigpipe_ignore(int nsig) { return; } /* A_STARTUP */ void do_startup(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { int was_error = 0; crm_debug("Registering Signal Handlers"); mainloop_add_signal(SIGTERM, crm_shutdown); mainloop_add_signal(SIGPIPE, sigpipe_ignore); fsa_source = mainloop_add_trigger(G_PRIORITY_HIGH, crm_fsa_trigger, NULL); config_read = mainloop_add_trigger(G_PRIORITY_HIGH, crm_read_options, NULL); transition_trigger = mainloop_add_trigger(G_PRIORITY_LOW, te_graph_trigger, NULL); crm_debug("Creating CIB and LRM objects"); fsa_cib_conn = cib_new(); lrm_state_init_local(); /* set up the timers */ transition_timer = calloc(1, sizeof(fsa_timer_t)); integration_timer = calloc(1, sizeof(fsa_timer_t)); finalization_timer = calloc(1, sizeof(fsa_timer_t)); election_trigger = calloc(1, sizeof(fsa_timer_t)); shutdown_escalation_timer = calloc(1, sizeof(fsa_timer_t)); wait_timer = calloc(1, sizeof(fsa_timer_t)); recheck_timer = calloc(1, sizeof(fsa_timer_t)); if (election_trigger != NULL) { election_trigger->source_id = 0; election_trigger->period_ms = -1; election_trigger->fsa_input = I_DC_TIMEOUT; election_trigger->callback = crm_timer_popped; election_trigger->repeat = FALSE; } else { was_error = TRUE; } if (transition_timer != NULL) { transition_timer->source_id = 0; transition_timer->period_ms = -1; transition_timer->fsa_input = I_PE_CALC; transition_timer->callback = crm_timer_popped; transition_timer->repeat = FALSE; } else { was_error = TRUE; } if (integration_timer != NULL) { integration_timer->source_id = 0; integration_timer->period_ms = -1; integration_timer->fsa_input = I_INTEGRATED; integration_timer->callback = crm_timer_popped; integration_timer->repeat = FALSE; } else { was_error = TRUE; } if (finalization_timer != NULL) { finalization_timer->source_id = 0; finalization_timer->period_ms = -1; finalization_timer->fsa_input = I_FINALIZED; finalization_timer->callback = crm_timer_popped; finalization_timer->repeat = FALSE; /* for possible enabling... a bug in the join protocol left * a slave in S_PENDING while we think it's in S_NOT_DC * * raising I_FINALIZED put us into a transition loop which is * never resolved. * in this loop we continually send probes which the node * NACK's because it's in S_PENDING * * if we have nodes where heartbeat is active but the * CRM is not... then this will be handled in the * integration phase */ finalization_timer->fsa_input = I_ELECTION; } else { was_error = TRUE; } if (shutdown_escalation_timer != NULL) { shutdown_escalation_timer->source_id = 0; shutdown_escalation_timer->period_ms = -1; shutdown_escalation_timer->fsa_input = I_STOP; shutdown_escalation_timer->callback = crm_timer_popped; shutdown_escalation_timer->repeat = FALSE; } else { was_error = TRUE; } if (wait_timer != NULL) { wait_timer->source_id = 0; wait_timer->period_ms = 2000; wait_timer->fsa_input = I_NULL; wait_timer->callback = crm_timer_popped; wait_timer->repeat = FALSE; } else { was_error = TRUE; } if (recheck_timer != NULL) { recheck_timer->source_id = 0; recheck_timer->period_ms = -1; recheck_timer->fsa_input = I_PE_CALC; recheck_timer->callback = crm_timer_popped; recheck_timer->repeat = FALSE; } else { was_error = TRUE; } /* set up the sub systems */ cib_subsystem = calloc(1, sizeof(struct crm_subsystem_s)); te_subsystem = calloc(1, sizeof(struct crm_subsystem_s)); pe_subsystem = calloc(1, sizeof(struct crm_subsystem_s)); if (cib_subsystem != NULL) { cib_subsystem->pid = -1; cib_subsystem->name = CRM_SYSTEM_CIB; cib_subsystem->flag_connected = R_CIB_CONNECTED; cib_subsystem->flag_required = R_CIB_REQUIRED; } else { was_error = TRUE; } if (te_subsystem != NULL) { te_subsystem->pid = -1; te_subsystem->name = CRM_SYSTEM_TENGINE; te_subsystem->flag_connected = R_TE_CONNECTED; te_subsystem->flag_required = R_TE_REQUIRED; } else { was_error = TRUE; } if (pe_subsystem != NULL) { pe_subsystem->pid = -1; pe_subsystem->path = CRM_DAEMON_DIR; pe_subsystem->name = CRM_SYSTEM_PENGINE; pe_subsystem->command = CRM_DAEMON_DIR "/" CRM_SYSTEM_PENGINE; pe_subsystem->args = NULL; pe_subsystem->flag_connected = R_PE_CONNECTED; pe_subsystem->flag_required = R_PE_REQUIRED; } else { was_error = TRUE; } if (was_error == FALSE && need_spawn_pengine_from_crmd()) { if (start_subsystem(pe_subsystem) == FALSE) { was_error = TRUE; } } if (was_error) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } static int32_t crmd_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { crm_trace("Connection %p", c); if (crm_client_new(c, uid, gid) == NULL) { return -EIO; } return 0; } static void crmd_ipc_created(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); } static int32_t crmd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; crm_client_t *client = crm_client_get(c); xmlNode *msg = crm_ipcs_recv(client, data, size, &id, &flags); crm_trace("Invoked: %s", crm_client_name(client)); crm_ipcs_send_ack(client, id, flags, "ack", __FUNCTION__, __LINE__); if (msg == NULL) { return 0; } #if ENABLE_ACL CRM_ASSERT(client->user != NULL); crm_acl_get_set_user(msg, F_CRM_USER, client->user); #endif crm_trace("Processing msg from %s", crm_client_name(client)); crm_log_xml_trace(msg, "CRMd[inbound]"); crm_xml_add(msg, F_CRM_SYS_FROM, client->id); if (crmd_authorize_message(msg, client, NULL)) { route_message(C_IPC_MESSAGE, msg); } trigger_fsa(fsa_source); free_xml(msg); return 0; } static int32_t crmd_ipc_closed(qb_ipcs_connection_t * c) { crm_client_t *client = crm_client_get(c); struct crm_subsystem_s *the_subsystem = NULL; if (client == NULL) { return 0; } crm_trace("Connection %p", c); if (client->userdata == NULL) { crm_trace("Client hadn't registered with us yet"); } else if (strcasecmp(CRM_SYSTEM_PENGINE, client->userdata) == 0) { the_subsystem = pe_subsystem; } else if (strcasecmp(CRM_SYSTEM_TENGINE, client->userdata) == 0) { the_subsystem = te_subsystem; } else if (strcasecmp(CRM_SYSTEM_CIB, client->userdata) == 0) { the_subsystem = cib_subsystem; } if (the_subsystem != NULL) { the_subsystem->source = NULL; the_subsystem->client = NULL; crm_info("Received HUP from %s:[%d]", the_subsystem->name, the_subsystem->pid); } else { /* else that was a transient client */ crm_trace("Received HUP from transient client"); } crm_trace("Disconnecting client %s (%p)", crm_client_name(client), client); free(client->userdata); crm_client_destroy(client); trigger_fsa(fsa_source); return 0; } static void crmd_ipc_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); crmd_ipc_closed(c); } /* A_STOP */ void do_stop(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { crm_trace("Closing IPC server"); mainloop_del_ipc_server(ipcs); ipcs = NULL; register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); } /* A_STARTED */ void do_started(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { static struct qb_ipcs_service_handlers crmd_callbacks = { .connection_accept = crmd_ipc_accept, .connection_created = crmd_ipc_created, .msg_process = crmd_ipc_dispatch, .connection_closed = crmd_ipc_closed, .connection_destroyed = crmd_ipc_destroy }; if (cur_state != S_STARTING) { crm_err("Start cancelled... %s", fsa_state2string(cur_state)); return; } else if (is_set(fsa_input_register, R_MEMBERSHIP) == FALSE) { crm_info("Delaying start, no membership data (%.16llx)", R_MEMBERSHIP); crmd_fsa_stall(TRUE); return; } else if (is_set(fsa_input_register, R_LRM_CONNECTED) == FALSE) { crm_info("Delaying start, LRM not connected (%.16llx)", R_LRM_CONNECTED); crmd_fsa_stall(TRUE); return; } else if (is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) { crm_info("Delaying start, CIB not connected (%.16llx)", R_CIB_CONNECTED); crmd_fsa_stall(TRUE); return; } else if (is_set(fsa_input_register, R_READ_CONFIG) == FALSE) { crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG); crmd_fsa_stall(TRUE); return; } else if (is_set(fsa_input_register, R_PEER_DATA) == FALSE) { /* try reading from HA */ crm_info("Delaying start, No peer data (%.16llx)", R_PEER_DATA); #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { HA_Message *msg = NULL; crm_trace("Looking for a HA message"); msg = fsa_cluster_conn->llc_ops->readmsg(fsa_cluster_conn, 0); if (msg != NULL) { crm_trace("There was a HA message"); ha_msg_del(msg); } } #endif crmd_fsa_stall(TRUE); return; } crm_debug("Init server comms"); ipcs = crmd_ipc_server_init(&crmd_callbacks); if (ipcs == NULL) { crm_err("Failed to create IPC server: shutting down and inhibiting respawn"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } if (stonith_reconnect == NULL) { int dummy; stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW, te_connect_stonith, &dummy); } set_bit(fsa_input_register, R_ST_REQUIRED); mainloop_set_trigger(stonith_reconnect); crm_notice("The local CRM is operational"); clear_bit(fsa_input_register, R_STARTING); register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL); } /* A_RECOVER */ void do_recover(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { set_bit(fsa_input_register, R_IN_RECOVERY); crm_warn("Fast-tracking shutdown in response to errors"); register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); } /* *INDENT-OFF* */ pe_cluster_option crmd_opts[] = { /* name, old-name, validate, values, default, short description, long description */ { "dc-version", NULL, "string", NULL, "none", NULL, "Version of Pacemaker on the cluster's DC.", "Includes the hash which identifies the exact changeset it was built from. Used for diagnostic purposes." }, { "cluster-infrastructure", NULL, "string", NULL, "heartbeat", NULL, "The messaging stack on which Pacemaker is currently running.", "Used for informational and diagnostic purposes." }, { XML_CONFIG_ATTR_DC_DEADTIME, "dc_deadtime", "time", NULL, "20s", &check_time, "How long to wait for a response from other nodes during startup.", "The \"correct\" value will depend on the speed/load of your network and the type of switches used." }, { XML_CONFIG_ATTR_RECHECK, "cluster_recheck_interval", "time", "Zero disables polling. Positive values are an interval in seconds (unless other SI units are specified. eg. 5min)", "15min", &check_timer, "Polling interval for time based changes to options, resource parameters and constraints.", "The Cluster is primarily event driven, however the configuration can have elements that change based on time." " To ensure these changes take effect, we can optionally poll the cluster's status for changes." }, #ifdef RHEL7_COMPAT /* These options were superseded by the alerts feature and now are just an * alternate interface to it. It was never released upstream, but was * released in RHEL 7, so we allow it to be enabled at compile-time by * defining RHEL7_COMPAT. */ { "notification-agent", NULL, "string", NULL, "/dev/null", &check_script, "Deprecated", "Use alert path in alerts section instead" }, { "notification-recipient", NULL, "string", NULL, "", NULL, "Deprecated", "Use recipient value in alerts section instead" }, #endif { "load-threshold", NULL, "percentage", NULL, "80%", &check_utilization, "The maximum amount of system resources that should be used by nodes in the cluster", "The cluster will slow down its recovery process when the amount of system resources used" " (currently CPU) approaches this limit", }, { "node-action-limit", NULL, "integer", NULL, "0", &check_number, "The maximum number of jobs that can be scheduled per node. Defaults to 2x cores"}, { XML_CONFIG_ATTR_ELECTION_FAIL, "election_timeout", "time", NULL, "2min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." }, { XML_CONFIG_ATTR_FORCE_QUIT, "shutdown_escalation", "time", NULL, "20min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." }, { "crmd-integration-timeout", NULL, "time", NULL, "3min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." }, { "crmd-finalization-timeout", NULL, "time", NULL, "30min", &check_timer, "*** Advanced Use Only ***.", "If you need to adjust this value, it probably indicates the presence of a bug." }, { "crmd-transition-delay", NULL, "time", NULL, "0s", &check_timer, "*** Advanced Use Only ***\n" "Enabling this option will slow down cluster recovery under all conditions", "Delay cluster recovery for the configured interval to allow for additional/related events to occur.\n" "Useful if your configuration is sensitive to the order in which ping updates arrive." }, { "stonith-watchdog-timeout", NULL, "time", NULL, NULL, &check_sbd_timeout, "How long to wait before we can assume nodes are safely down", NULL }, { "stonith-max-attempts",NULL,"integer",NULL,"10",&check_positive_number, "How many times stonith can fail before it will no longer be attempted on a target" }, { "no-quorum-policy", "no_quorum_policy", "enum", "stop, freeze, ignore, suicide", "stop", &check_quorum, NULL, NULL }, #if SUPPORT_PLUGIN { XML_ATTR_EXPECTED_VOTES, NULL, "integer", NULL, "2", &check_number, "The number of nodes expected to be in the cluster", "Used to calculate quorum in openais based clusters." }, #endif }; /* *INDENT-ON* */ void crmd_metadata(void) { config_metadata("CRM Daemon", "1.0", "CRM Daemon Options", "This is a fake resource that details the options that can be configured for the CRM Daemon.", crmd_opts, DIMOF(crmd_opts)); } static void verify_crmd_options(GHashTable * options) { verify_all_options(options, crmd_opts, DIMOF(crmd_opts)); } static const char * crmd_pref(GHashTable * options, const char *name) { return get_cluster_pref(options, crmd_opts, DIMOF(crmd_opts), name); } static void config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { -#ifdef RHEL7_COMPAT - const char *script = NULL; -#endif const char *value = NULL; GHashTable *config_hash = NULL; crm_time_t *now = crm_time_new(NULL); xmlNode *crmconfig = NULL; xmlNode *alerts = NULL; if (rc != pcmk_ok) { fsa_data_t *msg_data = NULL; crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); if (rc == -EACCES || rc == -pcmk_err_schema_validation) { crm_err("The cluster is mis-configured - shutting down and staying down"); set_bit(fsa_input_register, R_STAYDOWN); } goto bail; } crmconfig = output; if ((crmconfig) && (crm_element_name(crmconfig)) && (strcmp(crm_element_name(crmconfig), XML_CIB_TAG_CRMCONFIG) != 0)) { crmconfig = first_named_child(crmconfig, XML_CIB_TAG_CRMCONFIG); } if (!crmconfig) { fsa_data_t *msg_data = NULL; crm_err("Local CIB query for " XML_CIB_TAG_CRMCONFIG " section failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); goto bail; } crm_debug("Call %d : Parsing CIB options", call_id); config_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); unpack_instance_attributes(crmconfig, crmconfig, XML_CIB_TAG_PROPSET, NULL, config_hash, CIB_OPTIONS_FIRST, FALSE, now); verify_crmd_options(config_hash); #ifdef RHEL7_COMPAT - script = crmd_pref(config_hash, "notification-agent"); - value = crmd_pref(config_hash, "notification-recipient"); - crmd_enable_alerts(script, value); + { + const char *script = crmd_pref(config_hash, "notification-agent"); + const char *recip = crmd_pref(config_hash, "notification-recipient"); + + pe_enable_legacy_alerts(script, recip); + } #endif value = crmd_pref(config_hash, XML_CONFIG_ATTR_DC_DEADTIME); election_trigger->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, "node-action-limit"); /* Also checks migration-limit */ throttle_update_job_max(value); value = crmd_pref(config_hash, "load-threshold"); if(value) { throttle_set_load_target(strtof(value, NULL) / 100.0); } value = crmd_pref(config_hash, "no-quorum-policy"); if (safe_str_eq(value, "suicide") && pcmk_locate_sbd()) { no_quorum_suicide_escalation = TRUE; } value = crmd_pref(config_hash,"stonith-max-attempts"); update_stonith_max_attempts(value); value = crmd_pref(config_hash, XML_CONFIG_ATTR_FORCE_QUIT); shutdown_escalation_timer->period_ms = crm_get_msec(value); /* How long to declare an election over - even if not everyone voted */ crm_debug("Shutdown escalation occurs after: %dms", shutdown_escalation_timer->period_ms); value = crmd_pref(config_hash, XML_CONFIG_ATTR_ELECTION_FAIL); election_timeout_set_period(fsa_election, crm_get_msec(value)); value = crmd_pref(config_hash, XML_CONFIG_ATTR_RECHECK); recheck_timer->period_ms = crm_get_msec(value); crm_debug("Checking for expired actions every %dms", recheck_timer->period_ms); value = crmd_pref(config_hash, "crmd-transition-delay"); transition_timer->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, "crmd-integration-timeout"); integration_timer->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, "crmd-finalization-timeout"); finalization_timer->period_ms = crm_get_msec(value); #if SUPPORT_COROSYNC if (is_classic_ais_cluster()) { value = crmd_pref(config_hash, XML_ATTR_EXPECTED_VOTES); crm_debug("Sending expected-votes=%s to corosync", value); send_cluster_text(crm_class_quorum, value, TRUE, NULL, crm_msg_ais); } #endif free(fsa_cluster_name); fsa_cluster_name = NULL; value = g_hash_table_lookup(config_hash, "cluster-name"); if (value) { fsa_cluster_name = strdup(value); } alerts = first_named_child(output, XML_CIB_TAG_ALERTS); - parse_alerts(alerts); + pe_unpack_alerts(alerts); set_bit(fsa_input_register, R_READ_CONFIG); crm_trace("Triggering FSA: %s", __FUNCTION__); mainloop_set_trigger(fsa_source); g_hash_table_destroy(config_hash); bail: crm_time_free(now); } gboolean crm_read_options(gpointer user_data) { int call_id = fsa_cib_conn->cmds->query(fsa_cib_conn, "//" XML_CIB_TAG_CRMCONFIG " | //" XML_CIB_TAG_ALERTS, NULL, cib_xpath | cib_scope_local); fsa_register_cib_callback(call_id, FALSE, NULL, config_query_callback); crm_trace("Querying the CIB... call %d", call_id); return TRUE; } /* A_READCONFIG */ void do_read_config(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { throttle_init(); mainloop_set_trigger(config_read); } void crm_shutdown(int nsig) { if (crmd_mainloop != NULL && g_main_is_running(crmd_mainloop)) { if (is_set(fsa_input_register, R_SHUTDOWN)) { crm_err("Escalating the shutdown"); register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL); } else { set_bit(fsa_input_register, R_SHUTDOWN); register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL); if (shutdown_escalation_timer->period_ms < 1) { const char *value = crmd_pref(NULL, XML_CONFIG_ATTR_FORCE_QUIT); int msec = crm_get_msec(value); crm_debug("Using default shutdown escalation: %dms", msec); shutdown_escalation_timer->period_ms = msec; } /* can't rely on this... */ crm_notice("Shutting down cluster resource manager " CRM_XS " limit=%dms", shutdown_escalation_timer->period_ms); crm_timer_start(shutdown_escalation_timer); } } else { crm_info("exit from shutdown"); crmd_exit(pcmk_ok); } } diff --git a/crmd/crmd_alerts.c b/crmd/crmd_alerts.c index 6dda4f66bf..306d0ea4ab 100644 --- a/crmd/crmd_alerts.c +++ b/crmd/crmd_alerts.c @@ -1,408 +1,219 @@ /* * Copyright (C) 2015 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include -#include #include "crmd_alerts.h" #include "crmd_messages.h" #include #include +#include -static char *notify_script = NULL; -static char *notify_target = NULL; static int alerts_inflight = 0; static gboolean draining_alerts = FALSE; -/* - * synchronize local data with cib - */ - -static GHashTable * -get_meta_attrs_from_cib(xmlNode *basenode, crm_alert_entry_t *entry, - guint *max_timeout) -{ - GHashTable *config_hash = - g_hash_table_new_full(crm_str_hash, g_str_equal, - g_hash_destroy_str, g_hash_destroy_str); - crm_time_t *now = crm_time_new(NULL); - const char *value = NULL; - - unpack_instance_attributes(basenode, basenode, XML_TAG_META_SETS, NULL, - config_hash, NULL, FALSE, now); - - value = g_hash_table_lookup(config_hash, XML_ALERT_ATTR_TIMEOUT); - if (value) { - entry->timeout = crm_get_msec(value); - if (entry->timeout <= 0) { - if (entry->timeout == 0) { - crm_trace("Setting timeout to default %dmsec", - CRM_ALERT_DEFAULT_TIMEOUT_MS); - } else { - crm_warn("Invalid timeout value setting to default %dmsec", - CRM_ALERT_DEFAULT_TIMEOUT_MS); - } - entry->timeout = CRM_ALERT_DEFAULT_TIMEOUT_MS; - } else { - crm_trace("Found timeout %dmsec", entry->timeout); - } - if (entry->timeout > *max_timeout) { - *max_timeout = entry->timeout; - } - } - value = g_hash_table_lookup(config_hash, XML_ALERT_ATTR_TSTAMP_FORMAT); - if (value) { - /* hard to do any checks here as merely anything can - * can be a valid time-format-string - */ - entry->tstamp_format = (char *) value; - crm_trace("Found timestamp format string '%s'", value); - } - - value = g_hash_table_lookup(config_hash, XML_ALERT_ATTR_SELECT_KIND); - if (value) { - entry->select_kind_orig = (char *) value; - entry->select_kind = g_strsplit((char *) value, ",", 0); - crm_trace("Found select_kind string '%s'", (char *) value); - } - - crm_time_free(now); - return config_hash; /* keep hash as long as strings are needed */ -} - -void -parse_alerts(xmlNode *alerts) -{ - xmlNode *alert; - crm_alert_entry_t entry; - guint max_timeout = 0; - - crm_free_alert_list(); - crm_alert_max_alert_timeout = CRM_ALERT_DEFAULT_TIMEOUT_MS; - if (crm_alert_kind_default == NULL) { - crm_alert_kind_default = g_strsplit(CRM_ALERT_KIND_DEFAULT, ",", 0); - } - - if (alerts) { - crm_info("We have an alerts section in the cib"); - - if (notify_script) { - crm_warn("Cib contains configuration for Legacy Notifications " - "which is overruled by alerts section"); - } - } else { - crm_info("No optional alerts section in cib"); - - if (notify_script) { - entry = (crm_alert_entry_t) { - .id = (char *) "legacy_notification", - .path = notify_script, - .timeout = CRM_ALERT_DEFAULT_TIMEOUT_MS, - .recipient = notify_target, - .select_kind_orig = NULL, - .select_kind = NULL, - .select_attribute_name_orig = NULL, - .select_attribute_name = NULL - }; - crm_add_dup_alert_list_entry(&entry); - crm_info("Legacy Notifications enabled"); - } - - return; - } - - for (alert = first_named_child(alerts, XML_CIB_TAG_ALERT); - alert; alert = __xml_next(alert)) { - xmlNode *recipient; - int recipients = 0, envvars = 0; - GHashTable *config_hash = NULL; - - entry = (crm_alert_entry_t) { - .id = (char *) crm_element_value(alert, XML_ATTR_ID), - .path = (char *) crm_element_value(alert, XML_ALERT_ATTR_PATH), - .timeout = CRM_ALERT_DEFAULT_TIMEOUT_MS, - .tstamp_format = (char *) CRM_ALERT_DEFAULT_TSTAMP_FORMAT, - .select_kind_orig = NULL, - .select_kind = NULL, - .select_attribute_name_orig = NULL, - .select_attribute_name = NULL - }; - - crm_get_envvars_from_cib(alert, - &entry, - &envvars); - - config_hash = - get_meta_attrs_from_cib(alert, &entry, &max_timeout); - - crm_debug("Found alert: id=%s, path=%s, timeout=%d, " - "tstamp_format=%s, select_kind=%s, %d additional environment variables", - entry.id, entry.path, entry.timeout, - entry.tstamp_format, entry.select_kind_orig, envvars); - - for (recipient = first_named_child(alert, - XML_CIB_TAG_ALERT_RECIPIENT); - recipient; recipient = __xml_next(recipient)) { - int envvars_added = 0; - - entry.recipient = (char *) crm_element_value(recipient, - XML_ALERT_ATTR_REC_VALUE); - recipients++; - - crm_get_envvars_from_cib(recipient, - &entry, - &envvars_added); - - { - crm_alert_entry_t recipient_entry = entry; - GHashTable *config_hash = - get_meta_attrs_from_cib(recipient, - &recipient_entry, - &max_timeout); - - crm_add_dup_alert_list_entry(&recipient_entry); - - crm_debug("Alert has recipient: id=%s, value=%s, " - "%d additional environment variables", - crm_element_value(recipient, XML_ATTR_ID), - recipient_entry.recipient, envvars_added); - - g_hash_table_destroy(config_hash); - } - - crm_drop_envvars(&entry, envvars_added); - } - - if (recipients == 0) { - crm_add_dup_alert_list_entry(&entry); - } - - crm_drop_envvars(&entry, -1); - g_hash_table_destroy(config_hash); - } - - if (max_timeout > 0) { - crm_alert_max_alert_timeout = max_timeout; - } -} - -/* - * end of synchronization of local data with cib - */ - -void -crmd_enable_alerts(const char *script, const char *target) -{ - free(notify_script); - notify_script = ((script) && - (strcmp(script,"/dev/null")))?strdup(script):NULL; - - free(notify_target); - notify_target = (target != NULL)?strdup(target):NULL; -} - static void crmd_alert_complete(svc_action_t *op) { alerts_inflight--; if(op->rc == 0) { crm_info("Alert %d (%s) complete", op->sequence, op->agent); } else { crm_warn("Alert %d (%s) failed: %d", op->sequence, op->agent, op->rc); } } static void send_alerts(const char *kind) { svc_action_t *alert = NULL; static int operations = 0; GListPtr l; crm_time_hr_t *now = crm_time_hr_new(NULL); crm_set_alert_key(CRM_alert_kind, kind); crm_set_alert_key(CRM_alert_version, VERSION); for (l = g_list_first(crm_alert_list); l; l = g_list_next(l)) { crm_alert_entry_t *entry = (crm_alert_entry_t *)(l->data); char *timestamp = crm_time_format_hr(entry->tstamp_format, now); if (crm_is_target_alert(entry->select_kind == NULL ? crm_alert_kind_default : entry->select_kind, kind) == FALSE) { crm_trace("Cannot sending '%s' alert to '%s' via '%s'(select_kind=%s)", kind, entry->recipient, entry->path, entry->select_kind == NULL ? CRM_ALERT_KIND_DEFAULT : entry->select_kind_orig); free(timestamp); continue; } operations++; if (!draining_alerts) { crm_debug("Sending '%s' alert to '%s' via '%s'", kind, entry->recipient, entry->path); crm_set_alert_key(CRM_alert_recipient, entry->recipient); crm_set_alert_key_int(CRM_alert_node_sequence, operations); crm_set_alert_key(CRM_alert_timestamp, timestamp); alert = services_action_create_generic(entry->path, NULL); alert->timeout = entry->timeout; alert->standard = strdup("event"); alert->id = strdup(entry->id); alert->agent = strdup(entry->path); alert->sequence = operations; crm_set_envvar_list(entry); alerts_inflight++; if(services_action_async(alert, &crmd_alert_complete) == FALSE) { services_action_free(alert); alerts_inflight--; } crm_unset_envvar_list(entry); } else { crm_warn("Ignoring '%s' alert to '%s' via '%s' received " "while shutting down", kind, entry->recipient, entry->path); } free(timestamp); } crm_unset_alert_keys(); if (now) { free(now); } } void crmd_alert_node_event(crm_node_t *node) { if(!crm_alert_list) { return; } crm_set_alert_key(CRM_alert_node, node->uname); crm_set_alert_key_int(CRM_alert_nodeid, node->id); crm_set_alert_key(CRM_alert_desc, node->state); send_alerts("node"); } void crmd_alert_fencing_op(stonith_event_t * e) { char *desc = NULL; if (!crm_alert_list) { return; } desc = crm_strdup_printf( "Operation %s of %s by %s for %s@%s: %s (ref=%s)", e->action, e->target, e->executioner ? e->executioner : "", e->client_origin, e->origin, pcmk_strerror(e->result), e->id); crm_set_alert_key(CRM_alert_node, e->target); crm_set_alert_key(CRM_alert_task, e->operation); crm_set_alert_key(CRM_alert_desc, desc); crm_set_alert_key_int(CRM_alert_rc, e->result); send_alerts("fencing"); free(desc); } void crmd_alert_resource_op(const char *node, lrmd_event_data_t * op) { int target_rc = 0; if(!crm_alert_list) { return; } target_rc = rsc_op_expected_rc(op); if(op->interval == 0 && target_rc == op->rc && safe_str_eq(op->op_type, RSC_STATUS)) { /* Leave it up to the script if they want to alert for * 'failed' probes, only swallow ones for which the result was * unexpected. * * Even if we find a resource running, it was probably because * someone erased the status section. */ return; } crm_set_alert_key(CRM_alert_node, node); crm_set_alert_key(CRM_alert_rsc, op->rsc_id); crm_set_alert_key(CRM_alert_task, op->op_type); crm_set_alert_key_int(CRM_alert_interval, op->interval); crm_set_alert_key_int(CRM_alert_target_rc, target_rc); crm_set_alert_key_int(CRM_alert_status, op->op_status); crm_set_alert_key_int(CRM_alert_rc, op->rc); if(op->op_status == PCMK_LRM_OP_DONE) { crm_set_alert_key(CRM_alert_desc, services_ocf_exitcode_str(op->rc)); } else { crm_set_alert_key(CRM_alert_desc, services_lrm_status_str(op->op_status)); } send_alerts("resource"); } static gboolean alert_drain_timeout_callback(gpointer user_data) { gboolean *timeout_popped = (gboolean *) user_data; *timeout_popped = TRUE; return FALSE; } void crmd_drain_alerts(GMainContext *ctx) { guint timer; gboolean timeout_popped = FALSE; draining_alerts = TRUE; timer = g_timeout_add(crm_alert_max_alert_timeout + 5000, alert_drain_timeout_callback, (gpointer) &timeout_popped); while(alerts_inflight && !timeout_popped) { crm_trace("Draining mainloop while still %d alerts are in flight (timeout=%dms)", alerts_inflight, crm_alert_max_alert_timeout + 5000); g_main_context_iteration(ctx, TRUE); } if (!timeout_popped && (timer > 0)) { g_source_remove(timer); } if (crm_alert_kind_default) { g_strfreev(crm_alert_kind_default); crm_alert_kind_default = NULL; } } diff --git a/crmd/crmd_alerts.h b/crmd/crmd_alerts.h index 40089839f1..d31976ee08 100644 --- a/crmd/crmd_alerts.h +++ b/crmd/crmd_alerts.h @@ -1,32 +1,30 @@ /* * Copyright (C) 2015 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef CRMD_ALERT__H # define CRMD_ALERT__H # include # include # include -void crmd_enable_alerts(const char *script, const char *target); void crmd_alert_node_event(crm_node_t *node); void crmd_alert_fencing_op(stonith_event_t *e); void crmd_alert_resource_op(const char *node, lrmd_event_data_t *op); void crmd_drain_alerts(GMainContext *ctx); -void parse_alerts(xmlNode *alerts); #endif diff --git a/include/crm/pengine/Makefile.am b/include/crm/pengine/Makefile.am index fa072f7e5c..f41f88782a 100644 --- a/include/crm/pengine/Makefile.am +++ b/include/crm/pengine/Makefile.am @@ -1,23 +1,23 @@ # # Copyright (C) 2004 Andrew Beekhof # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in headerdir=$(pkgincludedir)/crm/pengine -noinst_HEADERS = internal.h +noinst_HEADERS = internal.h rules_internal.h header_HEADERS = common.h complex.h remote.h rules.h status.h diff --git a/include/crm/pengine/rules_internal.h b/include/crm/pengine/rules_internal.h new file mode 100644 index 0000000000..28533e38ef --- /dev/null +++ b/include/crm/pengine/rules_internal.h @@ -0,0 +1,18 @@ +/* + * Copyright (C) 2015-2017 Andrew Beekhof + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ +#ifndef RULES_INTERNAL_H +#define RULES_INTERNAL_H + +#include + +void pe_unpack_alerts(xmlNode *alerts); + +#ifdef RHEL7_COMPAT +void pe_enable_legacy_alerts(const char *script, const char *target); +#endif + +#endif diff --git a/lib/pengine/Makefile.am b/lib/pengine/Makefile.am index ff4d1e3bd5..ad2c7ae60f 100644 --- a/lib/pengine/Makefile.am +++ b/lib/pengine/Makefile.am @@ -1,44 +1,44 @@ # # Copyright (C) 2004 Andrew Beekhof # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # include $(top_srcdir)/Makefile.common ## libraries lib_LTLIBRARIES = libpe_rules.la libpe_status.la ## SOURCES noinst_HEADERS = unpack.h variant.h libpe_rules_la_LDFLAGS = -version-info 4:0:2 libpe_rules_la_CFLAGS = $(CFLAGS_HARDENED_LIB) libpe_rules_la_LDFLAGS += $(LDFLAGS_HARDENED_LIB) libpe_rules_la_LIBADD = $(top_builddir)/lib/common/libcrmcommon.la -libpe_rules_la_SOURCES = rules.c common.c +libpe_rules_la_SOURCES = rules.c rules_alerts.c common.c libpe_status_la_LDFLAGS = -version-info 13:0:3 libpe_status_la_CFLAGS = $(CFLAGS_HARDENED_LIB) libpe_status_la_LDFLAGS += $(LDFLAGS_HARDENED_LIB) libpe_status_la_LIBADD = @CURSESLIBS@ $(top_builddir)/lib/common/libcrmcommon.la libpe_status_la_SOURCES = status.c unpack.c utils.c complex.c native.c container.c \ group.c clone.c rules.c common.c failcounts.c remote.c clean-generic: rm -f *.log *.debug *~ diff --git a/lib/pengine/rules_alerts.c b/lib/pengine/rules_alerts.c new file mode 100644 index 0000000000..16c9dae082 --- /dev/null +++ b/lib/pengine/rules_alerts.c @@ -0,0 +1,200 @@ +/* + * Copyright (C) 2015-2017 Andrew Beekhof + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#include +#include +#include +#include +#include +#include + +#ifdef RHEL7_COMPAT +/* @COMPAT An early implementation of alerts was backported to RHEL 7, + * even though it was never in an upstream release. + */ +static char *notify_script = NULL; +static char *notify_target = NULL; + +void +pe_enable_legacy_alerts(const char *script, const char *target) +{ + free(notify_script); + notify_script = (script && strcmp(script, "/dev/null"))? + strdup(script) : NULL; + + free(notify_target); + notify_target = target? strdup(target): NULL; +} +#endif + +static GHashTable * +get_meta_attrs_from_cib(xmlNode *basenode, crm_alert_entry_t *entry, + guint *max_timeout) +{ + GHashTable *config_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, + g_hash_destroy_str, + g_hash_destroy_str); + crm_time_t *now = crm_time_new(NULL); + const char *value = NULL; + + unpack_instance_attributes(basenode, basenode, XML_TAG_META_SETS, NULL, + config_hash, NULL, FALSE, now); + + value = g_hash_table_lookup(config_hash, XML_ALERT_ATTR_TIMEOUT); + if (value) { + entry->timeout = crm_get_msec(value); + if (entry->timeout <= 0) { + if (entry->timeout == 0) { + crm_trace("Setting timeout to default %dmsec", + CRM_ALERT_DEFAULT_TIMEOUT_MS); + } else { + crm_warn("Invalid timeout value setting to default %dmsec", + CRM_ALERT_DEFAULT_TIMEOUT_MS); + } + entry->timeout = CRM_ALERT_DEFAULT_TIMEOUT_MS; + } else { + crm_trace("Found timeout %dmsec", entry->timeout); + } + if (entry->timeout > *max_timeout) { + *max_timeout = entry->timeout; + } + } + value = g_hash_table_lookup(config_hash, XML_ALERT_ATTR_TSTAMP_FORMAT); + if (value) { + /* hard to do any checks here as merely anything can + * can be a valid time-format-string + */ + entry->tstamp_format = (char *) value; + crm_trace("Found timestamp format string '%s'", value); + } + + value = g_hash_table_lookup(config_hash, XML_ALERT_ATTR_SELECT_KIND); + if (value) { + entry->select_kind_orig = (char *) value; + entry->select_kind = g_strsplit((char *) value, ",", 0); + crm_trace("Found select_kind string '%s'", (char *) value); + } + + value = g_hash_table_lookup(config_hash, + XML_ALERT_ATTR_SELECT_ATTRIBUTE_NAME); + if (value) { + entry->select_attribute_name_orig = (char*) value; + entry->select_attribute_name = g_strsplit((char*) value, ",", 0); + crm_trace("Found attribute_name string '%s'", (char *) value); + } + + crm_time_free(now); + return config_hash; /* keep hash as long as strings are needed */ +} + +void +pe_unpack_alerts(xmlNode *alerts) +{ + xmlNode *alert; + crm_alert_entry_t entry; + guint max_timeout = 0; + + crm_free_alert_list(); + crm_alert_max_alert_timeout = CRM_ALERT_DEFAULT_TIMEOUT_MS; + if (crm_alert_kind_default == NULL) { + crm_alert_kind_default = g_strsplit(CRM_ALERT_KIND_DEFAULT, ",", 0); + } + + if (alerts) { +#ifdef RHEL7_COMPAT + if (notify_script) { + crm_warn("Ignoring deprecated notification configuration because alerts available"); + } +#endif + } else { +#ifdef RHEL7_COMPAT + if (notify_script) { + entry = (crm_alert_entry_t) { + .id = (char *) "legacy_notification", + .path = notify_script, + .timeout = CRM_ALERT_DEFAULT_TIMEOUT_MS, + .recipient = notify_target, + .select_kind_orig = NULL, + .select_kind = NULL, + .select_attribute_name_orig = NULL, + .select_attribute_name = NULL + }; + crm_add_dup_alert_list_entry(&entry); + crm_warn("Deprecated notification syntax in use (alerts syntax is preferable)"); + } +#endif + return; + } + + for (alert = first_named_child(alerts, XML_CIB_TAG_ALERT); + alert; alert = __xml_next(alert)) { + + xmlNode *recipient; + int recipients = 0, envvars = 0; + GHashTable *config_hash = NULL; + + entry = (crm_alert_entry_t) { + .id = (char *) crm_element_value(alert, XML_ATTR_ID), + .path = (char *) crm_element_value(alert, XML_ALERT_ATTR_PATH), + .timeout = CRM_ALERT_DEFAULT_TIMEOUT_MS, + .tstamp_format = (char *) CRM_ALERT_DEFAULT_TSTAMP_FORMAT, + .select_kind_orig = NULL, + .select_kind = NULL, + .select_attribute_name_orig = NULL, + .select_attribute_name = NULL + }; + + crm_get_envvars_from_cib(alert, &entry, &envvars); + config_hash = get_meta_attrs_from_cib(alert, &entry, &max_timeout); + + crm_debug("Found alert %s with path=%s timeout=%d tstamp_format=%s " + "select_kind=%s select_attribute_name=%s " + "%d additional environment variables", + entry.id, entry.path, entry.timeout, entry.tstamp_format, + entry.select_kind_orig, entry.select_attribute_name_orig, + envvars); + + for (recipient = first_named_child(alert, XML_CIB_TAG_ALERT_RECIPIENT); + recipient != NULL; recipient = __xml_next(recipient)) { + + int envvars_added = 0; + + entry.recipient = (char *) crm_element_value(recipient, + XML_ALERT_ATTR_REC_VALUE); + recipients++; + + crm_get_envvars_from_cib(recipient, &entry, &envvars_added); + + { + crm_alert_entry_t recipient_entry = entry; + GHashTable *config_hash = get_meta_attrs_from_cib(recipient, + &recipient_entry, + &max_timeout); + + crm_add_dup_alert_list_entry(&recipient_entry); + crm_debug("Alert has recipient: id=%s, value=%s, " + "%d additional environment variables", + crm_element_value(recipient, XML_ATTR_ID), + recipient_entry.recipient, envvars_added); + g_hash_table_destroy(config_hash); + } + + crm_drop_envvars(&entry, envvars_added); + } + + if (recipients == 0) { + crm_add_dup_alert_list_entry(&entry); + } + + crm_drop_envvars(&entry, -1); + g_hash_table_destroy(config_hash); + } + + if (max_timeout > 0) { + crm_alert_max_alert_timeout = max_timeout; + } +}