diff --git a/daemons/attrd/attrd_alerts.c b/daemons/attrd/attrd_alerts.c index 611f270960..df7cee1ebe 100644 --- a/daemons/attrd/attrd_alerts.c +++ b/daemons/attrd/attrd_alerts.c @@ -1,144 +1,146 @@ /* - * Copyright 2015-2018 Andrew Beekhof + * Copyright 2015-2019 the Pacemaker project contributors + * + * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include -#include +#include #include "pacemaker-attrd.h" static GListPtr attrd_alert_list = NULL; static void attrd_lrmd_callback(lrmd_event_data_t * op) { CRM_CHECK(op != NULL, return); switch (op->type) { case lrmd_event_disconnect: crm_info("Lost connection to executor"); attrd_lrmd_disconnect(); break; default: break; } } static lrmd_t * attrd_lrmd_connect() { if (the_lrmd == NULL) { the_lrmd = lrmd_api_new(); the_lrmd->cmds->set_callback(the_lrmd, attrd_lrmd_callback); } if (!the_lrmd->cmds->is_connected(the_lrmd)) { const unsigned int max_attempts = 10; int ret = -ENOTCONN; for (int fails = 0; fails < max_attempts; ++fails) { ret = the_lrmd->cmds->connect(the_lrmd, T_ATTRD, NULL); if (ret == pcmk_ok) { break; } crm_debug("Could not connect to executor, %d tries remaining", (max_attempts - fails)); /* @TODO We don't want to block here with sleep, but we should wait * some time between connection attempts. We could possibly add a * timer with a callback, but then we'd likely need an alert queue. */ } if (ret != pcmk_ok) { attrd_lrmd_disconnect(); } } return the_lrmd; } void attrd_lrmd_disconnect() { if (the_lrmd) { lrmd_t *conn = the_lrmd; the_lrmd = NULL; /* in case we're called recursively */ lrmd_api_delete(conn); /* will disconnect if necessary */ } } static void config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { xmlNode *crmalerts = NULL; if (rc == -ENXIO) { crm_debug("Local CIB has no alerts section"); return; } else if (rc != pcmk_ok) { crm_notice("Could not query local CIB: %s", pcmk_strerror(rc)); return; } crmalerts = output; if (crmalerts && !crm_str_eq(crm_element_name(crmalerts), XML_CIB_TAG_ALERTS, TRUE)) { crmalerts = first_named_child(crmalerts, XML_CIB_TAG_ALERTS); } if (!crmalerts) { crm_notice("CIB query result has no " XML_CIB_TAG_ALERTS " section"); return; } pe_free_alert_list(attrd_alert_list); attrd_alert_list = pe_unpack_alerts(crmalerts); } #define XPATH_ALERTS \ "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_ALERTS gboolean attrd_read_options(gpointer user_data) { int call_id; CRM_CHECK(the_cib != NULL, return TRUE); call_id = the_cib->cmds->query(the_cib, XPATH_ALERTS, NULL, cib_xpath | cib_scope_local); the_cib->cmds->register_callback_full(the_cib, call_id, 120, FALSE, NULL, "config_query_callback", config_query_callback, free); crm_trace("Querying the CIB... call %d", call_id); return TRUE; } void attrd_cib_updated_cb(const char *event, xmlNode * msg) { if (!attrd_shutting_down() && crm_patchset_contains_alert(msg, FALSE)) { mainloop_set_trigger(attrd_config_read); } } int attrd_send_attribute_alert(const char *node, int nodeid, const char *attr, const char *value) { if (attrd_alert_list == NULL) { return pcmk_ok; } return lrmd_send_attribute_alert(attrd_lrmd_connect(), attrd_alert_list, node, nodeid, attr, value); } diff --git a/daemons/controld/controld_execd_state.c b/daemons/controld/controld_execd_state.c index 37c8320844..0e21d18971 100644 --- a/daemons/controld/controld_execd_state.c +++ b/daemons/controld/controld_execd_state.c @@ -1,829 +1,829 @@ /* * Copyright 2012-2019 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include -#include +#include #include #include GHashTable *lrm_state_table = NULL; extern GHashTable *proxy_table; int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg); void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg)); static void free_rsc_info(gpointer value) { lrmd_rsc_info_t *rsc_info = value; lrmd_free_rsc_info(rsc_info); } static void free_deletion_op(gpointer value) { struct pending_deletion_op_s *op = value; free(op->rsc); delete_ha_msg_input(op->input); free(op); } static void free_recurring_op(gpointer value) { struct recurring_op_s *op = (struct recurring_op_s *)value; free(op->user_data); free(op->rsc_id); free(op->op_type); free(op->op_key); if (op->params) { g_hash_table_destroy(op->params); } free(op); } static gboolean fail_pending_op(gpointer key, gpointer value, gpointer user_data) { lrmd_event_data_t event = { 0, }; lrm_state_t *lrm_state = user_data; struct recurring_op_s *op = (struct recurring_op_s *)value; crm_trace("Pre-emptively failing " CRM_OP_FMT " on %s (call=%s, %s)", op->rsc_id, op->op_type, op->interval_ms, lrm_state->node_name, (char*)key, op->user_data); event.type = lrmd_event_exec_complete; event.rsc_id = op->rsc_id; event.op_type = op->op_type; event.user_data = op->user_data; event.timeout = 0; event.interval_ms = op->interval_ms; event.rc = PCMK_OCF_UNKNOWN_ERROR; event.op_status = PCMK_LRM_OP_NOT_CONNECTED; event.t_run = (unsigned int) op->start_time; event.t_rcchange = (unsigned int) op->start_time; event.call_id = op->call_id; event.remote_nodename = lrm_state->node_name; event.params = op->params; process_lrm_event(lrm_state, &event, op, NULL); return TRUE; } gboolean lrm_state_is_local(lrm_state_t *lrm_state) { if (lrm_state == NULL || fsa_our_uname == NULL) { return FALSE; } if (strcmp(lrm_state->node_name, fsa_our_uname) != 0) { return FALSE; } return TRUE; } lrm_state_t * lrm_state_create(const char *node_name) { lrm_state_t *state = NULL; if (!node_name) { crm_err("No node name given for lrm state object"); return NULL; } state = calloc(1, sizeof(lrm_state_t)); if (!state) { return NULL; } state->node_name = strdup(node_name); state->rsc_info_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_rsc_info); state->deletion_ops = g_hash_table_new_full(crm_str_hash, g_str_equal, free, free_deletion_op); state->pending_ops = g_hash_table_new_full(crm_str_hash, g_str_equal, free, free_recurring_op); state->resource_history = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, history_free); state->metadata_cache = metadata_cache_new(); g_hash_table_insert(lrm_state_table, (char *)state->node_name, state); return state; } void lrm_state_destroy(const char *node_name) { g_hash_table_remove(lrm_state_table, node_name); } static gboolean remote_proxy_remove_by_node(gpointer key, gpointer value, gpointer user_data) { remote_proxy_t *proxy = value; const char *node_name = user_data; if (safe_str_eq(node_name, proxy->node_name)) { return TRUE; } return FALSE; } static void internal_lrm_state_destroy(gpointer data) { lrm_state_t *lrm_state = data; if (!lrm_state) { return; } crm_trace("Destroying proxy table %s with %d members", lrm_state->node_name, g_hash_table_size(proxy_table)); g_hash_table_foreach_remove(proxy_table, remote_proxy_remove_by_node, (char *) lrm_state->node_name); remote_ra_cleanup(lrm_state); lrmd_api_delete(lrm_state->conn); if (lrm_state->rsc_info_cache) { crm_trace("Destroying rsc info cache with %d members", g_hash_table_size(lrm_state->rsc_info_cache)); g_hash_table_destroy(lrm_state->rsc_info_cache); } if (lrm_state->resource_history) { crm_trace("Destroying history op cache with %d members", g_hash_table_size(lrm_state->resource_history)); g_hash_table_destroy(lrm_state->resource_history); } if (lrm_state->deletion_ops) { crm_trace("Destroying deletion op cache with %d members", g_hash_table_size(lrm_state->deletion_ops)); g_hash_table_destroy(lrm_state->deletion_ops); } if (lrm_state->pending_ops) { crm_trace("Destroying pending op cache with %d members", g_hash_table_size(lrm_state->pending_ops)); g_hash_table_destroy(lrm_state->pending_ops); } metadata_cache_free(lrm_state->metadata_cache); free((char *)lrm_state->node_name); free(lrm_state); } void lrm_state_reset_tables(lrm_state_t * lrm_state, gboolean reset_metadata) { if (lrm_state->resource_history) { crm_trace("Re-setting history op cache with %d members", g_hash_table_size(lrm_state->resource_history)); g_hash_table_remove_all(lrm_state->resource_history); } if (lrm_state->deletion_ops) { crm_trace("Re-setting deletion op cache with %d members", g_hash_table_size(lrm_state->deletion_ops)); g_hash_table_remove_all(lrm_state->deletion_ops); } if (lrm_state->pending_ops) { crm_trace("Re-setting pending op cache with %d members", g_hash_table_size(lrm_state->pending_ops)); g_hash_table_remove_all(lrm_state->pending_ops); } if (lrm_state->rsc_info_cache) { crm_trace("Re-setting rsc info cache with %d members", g_hash_table_size(lrm_state->rsc_info_cache)); g_hash_table_remove_all(lrm_state->rsc_info_cache); } if (reset_metadata) { metadata_cache_reset(lrm_state->metadata_cache); } } gboolean lrm_state_init_local(void) { if (lrm_state_table) { return TRUE; } lrm_state_table = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, NULL, internal_lrm_state_destroy); if (!lrm_state_table) { return FALSE; } proxy_table = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, NULL, remote_proxy_free); if (!proxy_table) { g_hash_table_destroy(lrm_state_table); lrm_state_table = NULL; return FALSE; } return TRUE; } void lrm_state_destroy_all(void) { if (lrm_state_table) { crm_trace("Destroying state table with %d members", g_hash_table_size(lrm_state_table)); g_hash_table_destroy(lrm_state_table); lrm_state_table = NULL; } if(proxy_table) { crm_trace("Destroying proxy table with %d members", g_hash_table_size(proxy_table)); g_hash_table_destroy(proxy_table); proxy_table = NULL; } } lrm_state_t * lrm_state_find(const char *node_name) { if (!node_name) { return NULL; } return g_hash_table_lookup(lrm_state_table, node_name); } lrm_state_t * lrm_state_find_or_create(const char *node_name) { lrm_state_t *lrm_state; lrm_state = g_hash_table_lookup(lrm_state_table, node_name); if (!lrm_state) { lrm_state = lrm_state_create(node_name); } return lrm_state; } GList * lrm_state_get_list(void) { return g_hash_table_get_values(lrm_state_table); } static remote_proxy_t * find_connected_proxy_by_node(const char * node_name) { GHashTableIter gIter; remote_proxy_t *proxy = NULL; CRM_CHECK(proxy_table != NULL, return NULL); g_hash_table_iter_init(&gIter, proxy_table); while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) &proxy)) { if (proxy->source && safe_str_eq(node_name, proxy->node_name)) { return proxy; } } return NULL; } static void remote_proxy_disconnect_by_node(const char * node_name) { remote_proxy_t *proxy = NULL; CRM_CHECK(proxy_table != NULL, return); while ((proxy = find_connected_proxy_by_node(node_name)) != NULL) { /* mainloop_del_ipc_client() eventually calls remote_proxy_disconnected() * , which removes the entry from proxy_table. * Do not do this in a g_hash_table_iter_next() loop. */ if (proxy->source) { mainloop_del_ipc_client(proxy->source); } } return; } void lrm_state_disconnect_only(lrm_state_t * lrm_state) { int removed = 0; if (!lrm_state->conn) { return; } crm_trace("Disconnecting %s", lrm_state->node_name); remote_proxy_disconnect_by_node(lrm_state->node_name); ((lrmd_t *) lrm_state->conn)->cmds->disconnect(lrm_state->conn); if (is_not_set(fsa_input_register, R_SHUTDOWN)) { removed = g_hash_table_foreach_remove(lrm_state->pending_ops, fail_pending_op, lrm_state); crm_trace("Synthesized %d operation failures for %s", removed, lrm_state->node_name); } } void lrm_state_disconnect(lrm_state_t * lrm_state) { if (!lrm_state->conn) { return; } lrm_state_disconnect_only(lrm_state); lrmd_api_delete(lrm_state->conn); lrm_state->conn = NULL; } int lrm_state_is_connected(lrm_state_t * lrm_state) { if (!lrm_state->conn) { return FALSE; } return ((lrmd_t *) lrm_state->conn)->cmds->is_connected(lrm_state->conn); } int lrm_state_poke_connection(lrm_state_t * lrm_state) { if (!lrm_state->conn) { return -1; } return ((lrmd_t *) lrm_state->conn)->cmds->poke_connection(lrm_state->conn); } int lrm_state_ipc_connect(lrm_state_t * lrm_state) { int ret; if (!lrm_state->conn) { lrm_state->conn = lrmd_api_new(); ((lrmd_t *) lrm_state->conn)->cmds->set_callback(lrm_state->conn, lrm_op_callback); } ret = ((lrmd_t *) lrm_state->conn)->cmds->connect(lrm_state->conn, CRM_SYSTEM_CRMD, NULL); if (ret != pcmk_ok) { lrm_state->num_lrm_register_fails++; } else { lrm_state->num_lrm_register_fails = 0; } return ret; } static remote_proxy_t * crmd_remote_proxy_new(lrmd_t *lrmd, const char *node_name, const char *session_id, const char *channel) { static struct ipc_client_callbacks proxy_callbacks = { .dispatch = remote_proxy_dispatch, .destroy = remote_proxy_disconnected }; remote_proxy_t *proxy = remote_proxy_new(lrmd, &proxy_callbacks, node_name, session_id, channel); return proxy; } gboolean crmd_is_proxy_session(const char *session) { return g_hash_table_lookup(proxy_table, session) ? TRUE : FALSE; } void crmd_proxy_send(const char *session, xmlNode *msg) { remote_proxy_t *proxy = g_hash_table_lookup(proxy_table, session); lrm_state_t *lrm_state = NULL; if (!proxy) { return; } crm_log_xml_trace(msg, "to-proxy"); lrm_state = lrm_state_find(proxy->node_name); if (lrm_state) { crm_trace("Sending event to %.8s on %s", proxy->session_id, proxy->node_name); remote_proxy_relay_event(proxy, msg); } } static void crmd_proxy_dispatch(const char *session, xmlNode *msg) { crm_log_xml_trace(msg, "controller-proxy[inbound]"); crm_xml_add(msg, F_CRM_SYS_FROM, session); if (crmd_authorize_message(msg, NULL, session)) { route_message(C_IPC_MESSAGE, msg); } trigger_fsa(fsa_source); } static void remote_config_check(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { if (rc != pcmk_ok) { crm_err("Query resulted in an error: %s", pcmk_strerror(rc)); if (rc == -EACCES || rc == -pcmk_err_schema_validation) { crm_err("The cluster is mis-configured - shutting down and staying down"); } } else { lrmd_t * lrmd = (lrmd_t *)user_data; crm_time_t *now = crm_time_new(NULL); GHashTable *config_hash = crm_str_table_new(); crm_debug("Call %d : Parsing CIB options", call_id); pe_unpack_nvpairs(output, output, XML_CIB_TAG_PROPSET, NULL, config_hash, CIB_OPTIONS_FIRST, FALSE, now, NULL); /* Now send it to the remote peer */ remote_proxy_check(lrmd, config_hash); g_hash_table_destroy(config_hash); crm_time_free(now); } } static void crmd_remote_proxy_cb(lrmd_t *lrmd, void *userdata, xmlNode *msg) { lrm_state_t *lrm_state = userdata; const char *session = crm_element_value(msg, F_LRMD_IPC_SESSION); remote_proxy_t *proxy = g_hash_table_lookup(proxy_table, session); const char *op = crm_element_value(msg, F_LRMD_IPC_OP); if (safe_str_eq(op, LRMD_IPC_OP_NEW)) { const char *channel = crm_element_value(msg, F_LRMD_IPC_IPC_SERVER); proxy = crmd_remote_proxy_new(lrmd, lrm_state->node_name, session, channel); if (!remote_ra_controlling_guest(lrm_state)) { if (proxy != NULL) { /* Look up stonith-watchdog-timeout and send to the remote peer for validation */ int rc = fsa_cib_conn->cmds->query(fsa_cib_conn, XML_CIB_TAG_CRMCONFIG, NULL, cib_scope_local); fsa_cib_conn->cmds->register_callback_full(fsa_cib_conn, rc, 10, FALSE, lrmd, "remote_config_check", remote_config_check, NULL); } } else { crm_debug("Skipping remote_config_check for guest-nodes"); } } else if (safe_str_eq(op, LRMD_IPC_OP_SHUTDOWN_REQ)) { char *now_s = NULL; time_t now = time(NULL); crm_notice("%s requested shutdown of its remote connection", lrm_state->node_name); if (!remote_ra_is_in_maintenance(lrm_state)) { now_s = crm_itoa(now); update_attrd(lrm_state->node_name, XML_CIB_ATTR_SHUTDOWN, now_s, NULL, TRUE); free(now_s); remote_proxy_ack_shutdown(lrmd); crm_warn("Reconnection attempts to %s may result in failures that must be cleared", lrm_state->node_name); } else { remote_proxy_nack_shutdown(lrmd); crm_notice("Remote resource for %s is not managed so no ordered shutdown happening", lrm_state->node_name); } return; } else if (safe_str_eq(op, LRMD_IPC_OP_REQUEST) && proxy && proxy->is_local) { /* This is for the controller, which we are, so don't try * to send to ourselves over IPC -- do it directly. */ int flags = 0; xmlNode *request = get_message_xml(msg, F_LRMD_IPC_MSG); CRM_CHECK(request != NULL, return); #if ENABLE_ACL CRM_CHECK(lrm_state->node_name, return); crm_xml_add(request, XML_ACL_TAG_ROLE, "pacemaker-remote"); crm_acl_get_set_user(request, F_LRMD_IPC_USER, lrm_state->node_name); #endif /* Pacemaker Remote nodes don't know their own names (as known to the * cluster). When getting a node info request with no name or ID, add * the name, so we don't return info for ourselves instead of the * Pacemaker Remote node. */ if (safe_str_eq(crm_element_value(request, F_CRM_TASK), CRM_OP_NODE_INFO)) { int node_id; crm_element_value_int(request, XML_ATTR_ID, &node_id); if ((node_id <= 0) && (crm_element_value(request, XML_ATTR_UNAME) == NULL)) { crm_xml_add(request, XML_ATTR_UNAME, lrm_state->node_name); } } crmd_proxy_dispatch(session, request); crm_element_value_int(msg, F_LRMD_IPC_MSG_FLAGS, &flags); if (flags & crm_ipc_client_response) { int msg_id = 0; xmlNode *op_reply = create_xml_node(NULL, "ack"); crm_xml_add(op_reply, "function", __FUNCTION__); crm_xml_add_int(op_reply, "line", __LINE__); crm_element_value_int(msg, F_LRMD_IPC_MSG_ID, &msg_id); remote_proxy_relay_response(proxy, op_reply, msg_id); free_xml(op_reply); } } else { remote_proxy_cb(lrmd, lrm_state->node_name, msg); } } int lrm_state_remote_connect_async(lrm_state_t * lrm_state, const char *server, int port, int timeout_ms) { int ret; if (!lrm_state->conn) { lrm_state->conn = lrmd_remote_api_new(lrm_state->node_name, server, port); if (!lrm_state->conn) { return -1; } ((lrmd_t *) lrm_state->conn)->cmds->set_callback(lrm_state->conn, remote_lrm_op_callback); lrmd_internal_set_proxy_callback(lrm_state->conn, lrm_state, crmd_remote_proxy_cb); } crm_trace("initiating remote connection to %s at %d with timeout %d", server, port, timeout_ms); ret = ((lrmd_t *) lrm_state->conn)->cmds->connect_async(lrm_state->conn, lrm_state->node_name, timeout_ms); if (ret != pcmk_ok) { lrm_state->num_lrm_register_fails++; } else { lrm_state->num_lrm_register_fails = 0; } return ret; } int lrm_state_get_metadata(lrm_state_t * lrm_state, const char *class, const char *provider, const char *agent, char **output, enum lrmd_call_options options) { lrmd_key_value_t *params = NULL; if (!lrm_state->conn) { return -ENOTCONN; } /* Add the node name to the environment, as is done with normal resource * action calls. Meta-data calls shouldn't need it, but some agents are * written with an ocf_local_nodename call at the beginning regardless of * action. Without the environment variable, the agent would try to contact * the controller to get the node name -- but the controller would be * blocking on the synchronous meta-data call. * * At this point, we have to assume that agents are unlikely to make other * calls that require the controller, such as crm_node --quorum or * --cluster-id. * * @TODO Make meta-data calls asynchronous. (This will be part of a larger * project to make meta-data calls via the executor rather than directly.) */ params = lrmd_key_value_add(params, CRM_META "_" XML_LRM_ATTR_TARGET, lrm_state->node_name); return ((lrmd_t *) lrm_state->conn)->cmds->get_metadata_params(lrm_state->conn, class, provider, agent, output, options, params); } int lrm_state_cancel(lrm_state_t *lrm_state, const char *rsc_id, const char *action, guint interval_ms) { if (!lrm_state->conn) { return -ENOTCONN; } /* Figure out a way to make this async? * NOTICE: Currently it's synced and directly acknowledged in do_lrm_invoke(). */ if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) { return remote_ra_cancel(lrm_state, rsc_id, action, interval_ms); } return ((lrmd_t *) lrm_state->conn)->cmds->cancel(lrm_state->conn, rsc_id, action, interval_ms); } lrmd_rsc_info_t * lrm_state_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id, enum lrmd_call_options options) { lrmd_rsc_info_t *rsc = NULL; if (!lrm_state->conn) { return NULL; } if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) { return remote_ra_get_rsc_info(lrm_state, rsc_id); } rsc = g_hash_table_lookup(lrm_state->rsc_info_cache, rsc_id); if (rsc == NULL) { /* only contact the lrmd if we don't already have a cached rsc info */ rsc = ((lrmd_t *) lrm_state->conn)->cmds->get_rsc_info(lrm_state->conn, rsc_id, options); if (rsc == NULL) { return NULL; } /* cache the result */ g_hash_table_insert(lrm_state->rsc_info_cache, rsc->id, rsc); } return lrmd_copy_rsc_info(rsc); } int lrm_state_exec(lrm_state_t *lrm_state, const char *rsc_id, const char *action, const char *userdata, guint interval_ms, int timeout, /* ms */ int start_delay, /* ms */ lrmd_key_value_t * params) { if (!lrm_state->conn) { lrmd_key_value_freeall(params); return -ENOTCONN; } if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) { return remote_ra_exec(lrm_state, rsc_id, action, userdata, interval_ms, timeout, start_delay, params); } return ((lrmd_t *) lrm_state->conn)->cmds->exec(lrm_state->conn, rsc_id, action, userdata, interval_ms, timeout, start_delay, lrmd_opt_notify_changes_only, params); } int lrm_state_register_rsc(lrm_state_t * lrm_state, const char *rsc_id, const char *class, const char *provider, const char *agent, enum lrmd_call_options options) { lrmd_t *conn = (lrmd_t *) lrm_state->conn; if (conn == NULL) { return -ENOTCONN; } if (is_remote_lrmd_ra(agent, provider, NULL)) { return lrm_state_find_or_create(rsc_id)? pcmk_ok : -EINVAL; } /* @TODO Implement an asynchronous version of this (currently a blocking * call to the lrmd). */ return conn->cmds->register_rsc(lrm_state->conn, rsc_id, class, provider, agent, options); } int lrm_state_unregister_rsc(lrm_state_t * lrm_state, const char *rsc_id, enum lrmd_call_options options) { if (!lrm_state->conn) { return -ENOTCONN; } if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) { lrm_state_destroy(rsc_id); return pcmk_ok; } g_hash_table_remove(lrm_state->rsc_info_cache, rsc_id); /* @TODO Optimize this ... this function is a blocking round trip from * client to daemon. The controld_execd_state.c code path that uses this * function should always treat it as an async operation. The executor API * should make an async version available. */ return ((lrmd_t *) lrm_state->conn)->cmds->unregister_rsc(lrm_state->conn, rsc_id, options); } /* * Functions for sending alerts via local executor connection */ static GListPtr crmd_alert_list = NULL; void crmd_unpack_alerts(xmlNode *alerts) { pe_free_alert_list(crmd_alert_list); crmd_alert_list = pe_unpack_alerts(alerts); } void crmd_alert_node_event(crm_node_t *node) { lrm_state_t *lrm_state; if (crmd_alert_list == NULL) { return; } lrm_state = lrm_state_find(fsa_our_uname); if (lrm_state == NULL) { return; } lrmd_send_node_alert((lrmd_t *) lrm_state->conn, crmd_alert_list, node->uname, node->id, node->state); } void crmd_alert_fencing_op(stonith_event_t * e) { char *desc; lrm_state_t *lrm_state; if (crmd_alert_list == NULL) { return; } lrm_state = lrm_state_find(fsa_our_uname); if (lrm_state == NULL) { return; } desc = crm_strdup_printf("Operation %s of %s by %s for %s@%s: %s (ref=%s)", e->action, e->target, (e->executioner? e->executioner : ""), e->client_origin, e->origin, pcmk_strerror(e->result), e->id); lrmd_send_fencing_alert((lrmd_t *) lrm_state->conn, crmd_alert_list, e->target, e->operation, desc, e->result); free(desc); } void crmd_alert_resource_op(const char *node, lrmd_event_data_t * op) { lrm_state_t *lrm_state; if (crmd_alert_list == NULL) { return; } lrm_state = lrm_state_find(fsa_our_uname); if (lrm_state == NULL) { return; } lrmd_send_resource_alert((lrmd_t *) lrm_state->conn, crmd_alert_list, node, op); } diff --git a/daemons/execd/Makefile.am b/daemons/execd/Makefile.am index 048f7f3b44..210a7842f8 100644 --- a/daemons/execd/Makefile.am +++ b/daemons/execd/Makefile.am @@ -1,62 +1,62 @@ # # Copyright 2012-2019 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU Lesser General Public License # version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. # include $(top_srcdir)/Makefile.common halibdir = $(CRM_DAEMON_DIR) halib_PROGRAMS = pacemaker-execd cts-exec-helper initdir = $(INITDIR) init_SCRIPTS = pacemaker_remote sbin_PROGRAMS = pacemaker-remoted if BUILD_SYSTEMD systemdsystemunit_DATA = pacemaker_remote.service endif pacemaker_execd_CFLAGS = $(CFLAGS_HARDENED_EXE) pacemaker_execd_LDFLAGS = $(LDFLAGS_HARDENED_EXE) pacemaker_execd_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ $(top_builddir)/lib/services/libcrmservice.la \ $(top_builddir)/lib/fencing/libstonithd.la ${COMPAT_LIBS} pacemaker_execd_SOURCES = pacemaker-execd.c execd_commands.c \ execd_alerts.c pacemaker_remoted_CPPFLAGS = -DSUPPORT_REMOTE $(AM_CPPFLAGS) pacemaker_remoted_CFLAGS = $(CFLAGS_HARDENED_EXE) pacemaker_remoted_LDFLAGS = $(LDFLAGS_HARDENED_EXE) pacemaker_remoted_LDADD = $(pacemaker_execd_LDADD) \ $(top_builddir)/lib/lrmd/liblrmd.la pacemaker_remoted_SOURCES = $(pacemaker_execd_SOURCES) \ - remoted_tls.c remoted_proxy.c + remoted_tls.c remoted_pidone.c remoted_proxy.c cts_exec_helper_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ $(top_builddir)/lib/lrmd/liblrmd.la \ $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/services/libcrmservice.la \ $(top_builddir)/lib/pengine/libpe_status.la cts_exec_helper_SOURCES = cts-exec-helper.c noinst_HEADERS = pacemaker-execd.h CLEANFILES = $(man8_MANS) if BUILD_LEGACY_LINKS install-exec-hook: cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f lrmd && $(LN_S) pacemaker-execd lrmd cd $(DESTDIR)$(sbindir) && rm -f pacemaker_remoted && $(LN_S) pacemaker-remoted pacemaker_remoted uninstall-hook: cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f lrmd cd $(DESTDIR)$(sbindir) && rm -f pacemaker_remoted endif diff --git a/daemons/execd/pacemaker-execd.c b/daemons/execd/pacemaker-execd.c index be4fd5e4cd..db32baa160 100644 --- a/daemons/execd/pacemaker-execd.c +++ b/daemons/execd/pacemaker-execd.c @@ -1,635 +1,508 @@ /* * Copyright 2012-2019 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include -#include #include - #include -#include #include #include #include #include #include #include #include +#include #include "pacemaker-execd.h" #if defined(HAVE_GNUTLS_GNUTLS_H) && defined(SUPPORT_REMOTE) # define ENABLE_PCMK_REMOTE - -// Hidden in liblrmd -extern int lrmd_tls_send_msg(crm_remote_t *session, xmlNode *msg, uint32_t id, - const char *msg_type); #endif static GMainLoop *mainloop = NULL; static qb_ipcs_service_t *ipcs = NULL; static stonith_t *stonith_api = NULL; int lrmd_call_id = 0; #ifdef ENABLE_PCMK_REMOTE /* whether shutdown request has been sent */ static sig_atomic_t shutting_down = FALSE; /* timer for waiting for acknowledgment of shutdown request */ static guint shutdown_ack_timer = 0; static gboolean lrmd_exit(gpointer data); #endif static void stonith_connection_destroy_cb(stonith_t * st, stonith_event_t * e) { stonith_api->state = stonith_disconnected; crm_err("Connection to fencer lost"); stonith_connection_failed(); } stonith_t * get_stonith_connection(void) { if (stonith_api && stonith_api->state == stonith_disconnected) { stonith_api_delete(stonith_api); stonith_api = NULL; } if (stonith_api == NULL) { int rc = pcmk_ok; stonith_api = stonith_api_new(); if (stonith_api == NULL) { crm_err("Could not connect to fencer: API memory allocation failed"); return NULL; } rc = stonith_api_connect_retry(stonith_api, crm_system_name, 10); if (rc != pcmk_ok) { crm_err("Could not connect to fencer in 10 attempts: %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); stonith_api_delete(stonith_api); stonith_api = NULL; } else { stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT, stonith_connection_destroy_cb); } } return stonith_api; } static int32_t lrmd_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { crm_trace("Connection %p", c); if (crm_client_new(c, uid, gid) == NULL) { return -EIO; } return 0; } static void lrmd_ipc_created(qb_ipcs_connection_t * c) { crm_client_t *new_client = crm_client_get(c); crm_trace("Connection %p", c); CRM_ASSERT(new_client != NULL); /* Now that the connection is offically established, alert * the other clients a new connection exists. */ notify_of_new_client(new_client); } static int32_t lrmd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; crm_client_t *client = crm_client_get(c); xmlNode *request = crm_ipcs_recv(client, data, size, &id, &flags); CRM_CHECK(client != NULL, crm_err("Invalid client"); return FALSE); CRM_CHECK(client->id != NULL, crm_err("Invalid client: %p", client); return FALSE); CRM_CHECK(flags & crm_ipc_client_response, crm_err("Invalid client request: %p", client); return FALSE); if (!request) { return 0; } if (!client->name) { const char *value = crm_element_value(request, F_LRMD_CLIENTNAME); if (value == NULL) { client->name = crm_itoa(crm_ipcs_client_pid(c)); } else { client->name = strdup(value); } } lrmd_call_id++; if (lrmd_call_id < 1) { lrmd_call_id = 1; } crm_xml_add(request, F_LRMD_CLIENTID, client->id); crm_xml_add(request, F_LRMD_CLIENTNAME, client->name); crm_xml_add_int(request, F_LRMD_CALLID, lrmd_call_id); process_lrmd_message(client, id, request); free_xml(request); return 0; } /*! * \internal * \brief Free a client connection, and exit if appropriate * * \param[in] client Client connection to free */ void lrmd_client_destroy(crm_client_t *client) { crm_client_destroy(client); #ifdef ENABLE_PCMK_REMOTE /* If we were waiting to shut down, we can now safely do so * if there are no more proxied IPC providers */ if (shutting_down && (ipc_proxy_get_provider() == NULL)) { lrmd_exit(NULL); } #endif } static int32_t lrmd_ipc_closed(qb_ipcs_connection_t * c) { crm_client_t *client = crm_client_get(c); if (client == NULL) { return 0; } crm_trace("Connection %p", c); client_disconnect_cleanup(client->id); #ifdef ENABLE_PCMK_REMOTE ipc_proxy_remove_provider(client); #endif lrmd_client_destroy(client); return 0; } static void lrmd_ipc_destroy(qb_ipcs_connection_t * c) { lrmd_ipc_closed(c); crm_trace("Connection %p", c); } static struct qb_ipcs_service_handlers lrmd_ipc_callbacks = { .connection_accept = lrmd_ipc_accept, .connection_created = lrmd_ipc_created, .msg_process = lrmd_ipc_dispatch, .connection_closed = lrmd_ipc_closed, .connection_destroyed = lrmd_ipc_destroy }; int lrmd_server_send_reply(crm_client_t * client, uint32_t id, xmlNode * reply) { crm_trace("Sending reply (%d) to client (%s)", id, client->id); switch (client->kind) { case CRM_CLIENT_IPC: return crm_ipcs_send(client, id, reply, FALSE); #ifdef ENABLE_PCMK_REMOTE case CRM_CLIENT_TLS: return lrmd_tls_send_msg(client->remote, reply, id, "reply"); #endif default: crm_err("Could not send reply: unknown client type %d", client->kind); } return -ENOTCONN; } int lrmd_server_send_notify(crm_client_t * client, xmlNode * msg) { crm_trace("Sending notification to client (%s)", client->id); switch (client->kind) { case CRM_CLIENT_IPC: if (client->ipcs == NULL) { crm_trace("Could not notify local client: disconnected"); return -ENOTCONN; } return crm_ipcs_send(client, 0, msg, crm_ipc_server_event); #ifdef ENABLE_PCMK_REMOTE case CRM_CLIENT_TLS: if (client->remote == NULL) { crm_trace("Could not notify remote client: disconnected"); return -ENOTCONN; } return lrmd_tls_send_msg(client->remote, msg, 0, "notify"); #endif default: crm_err("Could not notify client: unknown type %d", client->kind); } return -ENOTCONN; } /*! * \internal * \brief Clean up and exit immediately * * \param[in] data Ignored * * \return Doesn't return * \note This can be used as a timer callback. */ static gboolean lrmd_exit(gpointer data) { crm_info("Terminating with %d clients", crm_hash_table_size(client_connections)); if (stonith_api) { stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT); stonith_api->cmds->disconnect(stonith_api); stonith_api_delete(stonith_api); } if (ipcs) { mainloop_del_ipc_server(ipcs); } #ifdef ENABLE_PCMK_REMOTE lrmd_tls_server_destroy(); ipc_proxy_cleanup(); #endif crm_client_cleanup(); g_hash_table_destroy(rsc_list); if (mainloop) { lrmd_drain_alerts(mainloop); } crm_exit(CRM_EX_OK); return FALSE; } /*! * \internal * \brief Request cluster shutdown if appropriate, otherwise exit immediately * * \param[in] nsig Signal that caused invocation (ignored) */ static void lrmd_shutdown(int nsig) { #ifdef ENABLE_PCMK_REMOTE crm_client_t *ipc_proxy = ipc_proxy_get_provider(); /* If there are active proxied IPC providers, then we may be running * resources, so notify the cluster that we wish to shut down. */ if (ipc_proxy) { if (shutting_down) { crm_notice("Waiting for cluster to stop resources before exiting"); return; } crm_info("Sending shutdown request to cluster"); if (ipc_proxy_shutdown_req(ipc_proxy) < 0) { crm_crit("Shutdown request failed, exiting immediately"); } else { /* We requested a shutdown. Now, we need to wait for an * acknowledgement from the proxy host (which ensures the proxy host * supports shutdown requests), then wait for all proxy hosts to * disconnect (which ensures that all resources have been stopped). */ shutting_down = TRUE; /* Stop accepting new proxy connections */ lrmd_tls_server_destroy(); /* Older controller versions will never acknowledge our request, so * set a fairly short timeout to exit quickly in that case. If we * get the ack, we'll defuse this timer. */ shutdown_ack_timer = g_timeout_add_seconds(20, lrmd_exit, NULL); /* Currently, we let the OS kill us if the clients don't disconnect * in a reasonable time. We could instead set a long timer here * (shorter than what the OS is likely to use) and exit immediately * if it pops. */ return; } } #endif lrmd_exit(NULL); } /*! * \internal * \brief Defuse short exit timer if shutting down */ void handle_shutdown_ack() { #ifdef ENABLE_PCMK_REMOTE if (shutting_down) { crm_info("Received shutdown ack"); if (shutdown_ack_timer > 0) { g_source_remove(shutdown_ack_timer); shutdown_ack_timer = 0; } return; } #endif crm_debug("Ignoring unexpected shutdown ack"); } /*! * \internal * \brief Make short exit timer fire immediately */ void handle_shutdown_nack() { #ifdef ENABLE_PCMK_REMOTE if (shutting_down) { crm_info("Received shutdown nack"); if (shutdown_ack_timer > 0) { g_source_remove(shutdown_ack_timer); shutdown_ack_timer = g_timeout_add(0, lrmd_exit, NULL); } return; } #endif crm_debug("Ignoring unexpected shutdown nack"); } - -static pid_t main_pid = 0; -static void -sigdone(void) -{ - exit(CRM_EX_OK); -} - -static void -sigreap(void) -{ - pid_t pid = 0; - int status; - do { - /* - * Opinions seem to differ as to what to put here: - * -1, any child process - * 0, any child process whose process group ID is equal to that of the calling process - */ - pid = waitpid(-1, &status, WNOHANG); - if(pid == main_pid) { - /* Exit when pacemaker-remote exits and use the same return code */ - if (WIFEXITED(status)) { - exit(WEXITSTATUS(status)); - } - exit(CRM_EX_ERROR); - } - - } while (pid > 0); -} - -static struct { - int sig; - void (*handler)(void); -} sigmap[] = { - { SIGCHLD, sigreap }, - { SIGINT, sigdone }, -}; - -static void spawn_pidone(int argc, char **argv, char **envp) -{ - sigset_t set; - - if (getpid() != 1) { - return; - } - - /* Containers can be expected to have /var/log, but they may not have - * /var/log/pacemaker, so use a different default if no value has been - * explicitly configured in the container's environment. - */ - if (daemon_option("logfile") == NULL) { - set_daemon_option("logfile", "/var/log/pcmk-init.log"); - } - - sigfillset(&set); - sigprocmask(SIG_BLOCK, &set, 0); - - main_pid = fork(); - switch (main_pid) { - case 0: - sigprocmask(SIG_UNBLOCK, &set, NULL); - setsid(); - setpgid(0, 0); - - /* Child remains as pacemaker-remoted */ - return; - case -1: - perror("fork"); - } - - /* Parent becomes the reaper of zombie processes */ - /* Safe to initialize logging now if needed */ - -#ifdef HAVE___PROGNAME - /* Differentiate ourselves in the 'ps' output */ - { - char *p; - int i, maxlen; - char *LastArgv = NULL; - const char *name = "pcmk-init"; - - for(i = 0; i < argc; i++) { - if(!i || (LastArgv + 1 == argv[i])) - LastArgv = argv[i] + strlen(argv[i]); - } - - for(i = 0; envp[i] != NULL; i++) { - if((LastArgv + 1) == envp[i]) { - LastArgv = envp[i] + strlen(envp[i]); - } - } - - maxlen = (LastArgv - argv[0]) - 2; - - i = strlen(name); - /* We can overwrite individual argv[] arguments */ - snprintf(argv[0], maxlen, "%s", name); - - /* Now zero out everything else */ - p = &argv[0][i]; - while(p < LastArgv) - *p++ = '\0'; - argv[1] = NULL; - } -#endif /* HAVE___PROGNAME */ - - while (1) { - int sig; - size_t i; - - sigwait(&set, &sig); - for (i = 0; i < DIMOF(sigmap); i++) { - if (sigmap[i].sig == sig) { - sigmap[i].handler(); - break; - } - } - } -} - /* *INDENT-OFF* */ static struct crm_option long_options[] = { /* Top-level Options */ {"help", 0, 0, '?', "\tThis text"}, {"version", 0, 0, '$', "\tVersion information" }, {"verbose", 0, 0, 'V', "\tIncrease debug output"}, {"logfile", 1, 0, 'l', "\tSend logs to the additional named logfile"}, #ifdef ENABLE_PCMK_REMOTE {"port", 1, 0, 'p', "\tPort to listen on"}, #endif {0, 0, 0, 0} }; /* *INDENT-ON* */ #ifdef ENABLE_PCMK_REMOTE # define EXECD_TYPE "remote" #else # define EXECD_TYPE "local" #endif int main(int argc, char **argv, char **envp) { int flag = 0; int index = 0; int bump_log_num = 0; const char *option = NULL; - /* If necessary, create PID1 now before any FDs are opened */ - spawn_pidone(argc, argv, envp); - #ifndef ENABLE_PCMK_REMOTE crm_log_preinit("pacemaker-execd", argc, argv); crm_set_options(NULL, "[options]", long_options, "Resource agent executor daemon for cluster nodes"); #else + // If necessary, create PID 1 now before any file descriptors are opened + remoted_spawn_pidone(argc, argv, envp); + crm_log_preinit("pacemaker-remoted", argc, argv); crm_set_options(NULL, "[options]", long_options, "Resource agent executor daemon for Pacemaker Remote nodes"); #endif while (1) { flag = crm_get_option(argc, argv, &index); if (flag == -1) { break; } switch (flag) { case 'l': crm_add_logfile(optarg); break; case 'p': setenv("PCMK_remote_port", optarg, 1); break; case 'V': bump_log_num++; break; case '?': case '$': crm_help(flag, CRM_EX_OK); break; default: crm_help('?', CRM_EX_USAGE); break; } } crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); while (bump_log_num > 0) { crm_bump_log_level(argc, argv); bump_log_num--; } option = daemon_option("logfacility"); if (option && safe_str_neq(option, "none") && safe_str_neq(option, "/dev/null")) { setenv("HA_LOGFACILITY", option, 1); /* Used by the ocf_log/ha_log OCF macro */ } option = daemon_option("logfile"); if(option && safe_str_neq(option, "none")) { setenv("HA_LOGFILE", option, 1); /* Used by the ocf_log/ha_log OCF macro */ if (daemon_option_enabled(crm_system_name, "debug")) { setenv("HA_DEBUGLOG", option, 1); /* Used by the ocf_log/ha_debug OCF macro */ } } crm_notice("Starting Pacemaker " EXECD_TYPE " executor"); /* The presence of this variable allegedly controls whether child * processes like httpd will try and use Systemd's sd_notify * API */ unsetenv("NOTIFY_SOCKET"); /* Used by RAs - Leave owned by root */ crm_build_path(CRM_RSCTMP_DIR, 0755); rsc_list = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_rsc); ipcs = mainloop_add_ipc_server(CRM_SYSTEM_LRMD, QB_IPC_SHM, &lrmd_ipc_callbacks); if (ipcs == NULL) { crm_err("Failed to create IPC server: shutting down and inhibiting respawn"); crm_exit(CRM_EX_FATAL); } #ifdef ENABLE_PCMK_REMOTE if (lrmd_init_remote_tls_server() < 0) { crm_err("Failed to create TLS listener: shutting down and staying down"); crm_exit(CRM_EX_FATAL); } ipc_proxy_init(); #endif mainloop_add_signal(SIGTERM, lrmd_shutdown); mainloop = g_main_loop_new(NULL, FALSE); crm_notice("Pacemaker " EXECD_TYPE " executor successfully started and accepting connections"); g_main_loop_run(mainloop); /* should never get here */ lrmd_exit(NULL); return CRM_EX_OK; } diff --git a/daemons/execd/pacemaker-execd.h b/daemons/execd/pacemaker-execd.h index f6d747e9f4..4a52d91834 100644 --- a/daemons/execd/pacemaker-execd.h +++ b/daemons/execd/pacemaker-execd.h @@ -1,100 +1,101 @@ /* * Copyright 2012-2019 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PACEMAKER_EXECD__H # define PACEMAKER_EXECD__H # include # include # include # include # ifdef HAVE_GNUTLS_GNUTLS_H # undef KEYFILE # include # endif GHashTable *rsc_list; typedef struct lrmd_rsc_s { char *rsc_id; char *class; char *provider; char *type; int call_opts; /* NEVER dereference this pointer, * It simply exists as a switch to let us know * when the currently active operation has completed */ void *active; /* Operations in this list * have not been executed yet. */ GList *pending_ops; /* Operations in this list are recurring operations * that have been handed off from the pending ops list. */ GList *recurring_ops; int st_probe_rc; // What value should be returned for a probe if stonith crm_trigger_t *work; } lrmd_rsc_t; # ifdef HAVE_GNUTLS_GNUTLS_H // in remoted_tls.c int lrmd_init_remote_tls_server(void); void lrmd_tls_server_destroy(void); # endif int lrmd_server_send_reply(crm_client_t * client, uint32_t id, xmlNode * reply); int lrmd_server_send_notify(crm_client_t * client, xmlNode * msg); void notify_of_new_client(crm_client_t *new_client); void process_lrmd_message(crm_client_t * client, uint32_t id, xmlNode * request); void free_rsc(gpointer data); void handle_shutdown_ack(void); void handle_shutdown_nack(void); void lrmd_client_destroy(crm_client_t *client); void client_disconnect_cleanup(const char *client_id); /*! * \brief Don't worry about freeing this connection. It is * taken care of after mainloop exits by the main() function. */ stonith_t *get_stonith_connection(void); /*! * \brief This is a callback that tells the lrmd * the current stonith connection has gone away. This allows * us to timeout any pending stonith commands */ void stonith_connection_failed(void); #ifdef SUPPORT_REMOTE void ipc_proxy_init(void); void ipc_proxy_cleanup(void); void ipc_proxy_add_provider(crm_client_t *client); void ipc_proxy_remove_provider(crm_client_t *client); void ipc_proxy_forward_client(crm_client_t *client, xmlNode *xml); crm_client_t *ipc_proxy_get_provider(void); int ipc_proxy_shutdown_req(crm_client_t *ipc_proxy); +void remoted_spawn_pidone(int argc, char **argv, char **envp); #endif int process_lrmd_alert_exec(crm_client_t *client, uint32_t id, xmlNode *request); void lrmd_drain_alerts(GMainLoop *mloop); #endif // PACEMAKER_EXECD__H diff --git a/daemons/execd/remoted_pidone.c b/daemons/execd/remoted_pidone.c new file mode 100644 index 0000000000..9528e9ab1b --- /dev/null +++ b/daemons/execd/remoted_pidone.c @@ -0,0 +1,298 @@ +/* + * Copyright 2017-2019 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include "pacemaker-execd.h" + +static pid_t main_pid = 0; + +static void +sigdone(void) +{ + exit(CRM_EX_OK); +} + +static void +sigreap(void) +{ + pid_t pid = 0; + int status; + + do { + /* + * Opinions seem to differ as to what to put here: + * -1, any child process + * 0, any child process whose process group ID is equal to that of the calling process + */ + pid = waitpid(-1, &status, WNOHANG); + if (pid == main_pid) { + /* Exit when pacemaker-remote exits and use the same return code */ + if (WIFEXITED(status)) { + exit(WEXITSTATUS(status)); + } + exit(CRM_EX_ERROR); + } + } while (pid > 0); +} + +static struct { + int sig; + void (*handler)(void); +} sigmap[] = { + { SIGCHLD, sigreap }, + { SIGINT, sigdone }, +}; + +/*! + * \internal + * \brief Check a line of text for a valid environment variable name + * + * \param[in] line Text to check + * \param[out] first First character of valid name if found, NULL otherwise + * \param[out] last Last character of valid name if found, NULL otherwise + * + * \return TRUE if valid name found, FALSE otherwise + * \note It's reasonable to impose limitations on environment variable names + * beyond what C or setenv() does: We only allow names that contain only + * [a-zA-Z0-9_] characters and do not start with a digit. + */ +static bool +find_env_var_name(char *line, char **first, char **last) +{ + // Skip leading whitespace + *first = line; + while (isspace(**first)) { + ++*first; + } + + if (isalpha(**first) || (**first == '_')) { // Valid first character + *last = *first; + while (isalnum(*(*last + 1)) || (*(*last + 1) == '_')) { + ++*last; + } + return TRUE; + } + + *first = *last = NULL; + return FALSE; +} + +static void +load_env_vars(const char *filename) +{ + /* We haven't forked or initialized logging yet, so don't leave any file + * descriptors open, and don't log -- silently ignore errors. + */ + FILE *fp = fopen(filename, "r"); + + if (fp != NULL) { + char line[LINE_MAX] = { '\0', }; + + while (fgets(line, LINE_MAX, fp) != NULL) { + char *name = NULL; + char *end = NULL; + char *value = NULL; + char *quote = NULL; + + // Look for valid name immediately followed by equals sign + if (find_env_var_name(line, &name, &end) && (*++end == '=')) { + + // Null-terminate name, and advance beyond equals sign + *end++ = '\0'; + + // Check whether value is quoted + if ((*end == '\'') || (*end == '"')) { + quote = end++; + } + value = end; + + if (quote) { + /* Value is remaining characters up to next non-backslashed + * matching quote character. + */ + while (((*end != *quote) || (*(end - 1) == '\\')) + && (*end != '\0')) { + end++; + } + if (*end == *quote) { + // Null-terminate value, and advance beyond close quote + *end++ = '\0'; + } else { + // Matching closing quote wasn't found + value = NULL; + } + + } else { + /* Value is remaining characters up to next non-backslashed + * whitespace. + */ + while ((!isspace(*end) || (*(end - 1) == '\\')) + && (*end != '\0')) { + ++end; + } + + if (end == (line + LINE_MAX - 1)) { + // Line was too long + value = NULL; + } + // Do NOT null-terminate value (yet) + } + + /* We have a valid name and value, and end is now the character + * after the closing quote or the first whitespace after the + * unquoted value. Make sure the rest of the line is just + * whitespace or a comment. + */ + if (value) { + char *value_end = end; + + while (isspace(*end) && (*end != '\n')) { + ++end; + } + if ((*end == '\n') || (*end == '#')) { + if (quote == NULL) { + // Now we can null-terminate an unquoted value + *value_end = '\0'; + } + + // Don't overwrite (bundle options take precedence) + setenv(name, value, 0); + + } else { + value = NULL; + } + } + } + + if ((value == NULL) && (strchr(line, '\n') == NULL)) { + // Eat remainder of line beyond LINE_MAX + if (fscanf(fp, "%*[^\n]\n") == EOF) { + value = NULL; // Don't care, make compiler happy + } + } + } + fclose(fp); + } +} + +void +remoted_spawn_pidone(int argc, char **argv, char **envp) +{ + sigset_t set; + + /* This environment variable exists for two purposes: + * - For testing, setting it to "full" enables full PID 1 behavior even + * when PID is not 1 + * - Setting to "vars" enables just the loading of environment variables + * from /etc/pacemaker/pcmk-init.env, which could be useful for testing or + * containers with a custom PID 1 script that launches pacemaker-remoted. + */ + const char *pid1 = (getpid() == 1)? "full" : getenv("PCMK_remote_pid1"); + + if (pid1 == NULL) { + return; + } + + /* When a container is launched, it may be given specific environment + * variables, which for Pacemaker bundles are given in the bundle + * configuration. However, that does not allow for host-specific values. + * To allow for that, look for a special file containing a shell-like syntax + * of name/value pairs, and export those into the environment. + */ + load_env_vars("/etc/pacemaker/pcmk-init.env"); + + if (strcmp(pid1, "full")) { + return; + } + + /* Containers can be expected to have /var/log, but they may not have + * /var/log/pacemaker, so use a different default if no value has been + * explicitly configured in the container's environment. + */ + if (daemon_option("logfile") == NULL) { + set_daemon_option("logfile", "/var/log/pcmk-init.log"); + } + + sigfillset(&set); + sigprocmask(SIG_BLOCK, &set, 0); + + main_pid = fork(); + switch (main_pid) { + case 0: + sigprocmask(SIG_UNBLOCK, &set, NULL); + setsid(); + setpgid(0, 0); + + // Child remains as pacemaker-remoted + return; + case -1: + perror("fork"); + } + + /* Parent becomes the reaper of zombie processes */ + /* Safe to initialize logging now if needed */ + +# ifdef HAVE___PROGNAME + /* Differentiate ourselves in the 'ps' output */ + { + char *p; + int i, maxlen; + char *LastArgv = NULL; + const char *name = "pcmk-init"; + + for (i = 0; i < argc; i++) { + if (!i || (LastArgv + 1 == argv[i])) + LastArgv = argv[i] + strlen(argv[i]); + } + + for (i = 0; envp[i] != NULL; i++) { + if ((LastArgv + 1) == envp[i]) { + LastArgv = envp[i] + strlen(envp[i]); + } + } + + maxlen = (LastArgv - argv[0]) - 2; + + i = strlen(name); + + /* We can overwrite individual argv[] arguments */ + snprintf(argv[0], maxlen, "%s", name); + + /* Now zero out everything else */ + p = &argv[0][i]; + while (p < LastArgv) { + *p++ = '\0'; + } + argv[1] = NULL; + } +# endif // HAVE___PROGNAME + + while (1) { + int sig; + size_t i; + + sigwait(&set, &sig); + for (i = 0; i < DIMOF(sigmap); i++) { + if (sigmap[i].sig == sig) { + sigmap[i].handler(); + break; + } + } + } +} diff --git a/include/crm/Makefile.am b/include/crm/Makefile.am index 9d57dd9053..566e9feafc 100644 --- a/include/crm/Makefile.am +++ b/include/crm/Makefile.am @@ -1,19 +1,19 @@ # # Copyright 2004-2019 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # MAINTAINERCLEANFILES = Makefile.in headerdir=$(pkgincludedir)/crm header_HEADERS = attrd.h cib.h cluster.h compatibility.h crm.h \ lrmd.h msg_xml.h services.h stonith-ng.h -noinst_HEADERS = lrmd_alerts_internal.h +noinst_HEADERS = lrmd_internal.h SUBDIRS = common pengine cib fencing cluster diff --git a/include/crm/lrmd_alerts_internal.h b/include/crm/lrmd_alerts_internal.h deleted file mode 100644 index a381789270..0000000000 --- a/include/crm/lrmd_alerts_internal.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright 2015-2017 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#ifndef LRMD_ALERT_INTERNAL_H -#define LRMD_ALERT_INTERNAL_H - -#include -#include - -int lrmd_send_attribute_alert(lrmd_t *lrmd, GList *alert_list, - const char *node, uint32_t nodeid, - const char *attr_name, const char *attr_value); -int lrmd_send_node_alert(lrmd_t *lrmd, GList *alert_list, - const char *node, uint32_t nodeid, const char *state); -int lrmd_send_fencing_alert(lrmd_t *lrmd, GList *alert_list, - const char *target, const char *task, - const char *desc, int op_rc); -int lrmd_send_resource_alert(lrmd_t *lrmd, GList *alert_list, - const char *node, lrmd_event_data_t *op); - -#endif diff --git a/include/crm/lrmd_internal.h b/include/crm/lrmd_internal.h new file mode 100644 index 0000000000..31ed5418f2 --- /dev/null +++ b/include/crm/lrmd_internal.h @@ -0,0 +1,33 @@ +/* + * Copyright 2015-2019 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#ifndef LRMD_INTERNAL__H +#define LRMD_INTERNAL__H + +#include // uint32_t +#include // GList +#include // xmlNode +#include // crm_remote_t +#include // lrmd_t, lrmd_event_data_t + +int lrmd_send_attribute_alert(lrmd_t *lrmd, GList *alert_list, + const char *node, uint32_t nodeid, + const char *attr_name, const char *attr_value); +int lrmd_send_node_alert(lrmd_t *lrmd, GList *alert_list, + const char *node, uint32_t nodeid, const char *state); +int lrmd_send_fencing_alert(lrmd_t *lrmd, GList *alert_list, + const char *target, const char *task, + const char *desc, int op_rc); +int lrmd_send_resource_alert(lrmd_t *lrmd, GList *alert_list, + const char *node, lrmd_event_data_t *op); + +int lrmd_tls_send_msg(crm_remote_t *session, xmlNode *msg, uint32_t id, + const char *msg_type); + +#endif diff --git a/lib/lrmd/lrmd_alerts.c b/lib/lrmd/lrmd_alerts.c index 34dac8276d..8238e166c3 100644 --- a/lib/lrmd/lrmd_alerts.c +++ b/lib/lrmd/lrmd_alerts.c @@ -1,386 +1,388 @@ /* - * Copyright 2015-2018 David Vossel + * Copyright 2015-2019 the Pacemaker project contributors + * + * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include -#include +#include #include #include #include static lrmd_key_value_t * alert_key2param(lrmd_key_value_t *head, enum crm_alert_keys_e name, const char *value) { const char **key; if (value == NULL) { value = ""; } for (key = crm_alert_keys[name]; *key; key++) { crm_trace("Setting alert key %s = '%s'", *key, value); head = lrmd_key_value_add(head, *key, value); } return head; } static lrmd_key_value_t * alert_key2param_int(lrmd_key_value_t *head, enum crm_alert_keys_e name, int value) { char *value_s = crm_itoa(value); head = alert_key2param(head, name, value_s); free(value_s); return head; } static lrmd_key_value_t * alert_key2param_ms(lrmd_key_value_t *head, enum crm_alert_keys_e name, guint value) { char *value_s = crm_strdup_printf("%u", value); head = alert_key2param(head, name, value_s); free(value_s); return head; } static void set_ev_kv(gpointer key, gpointer value, gpointer user_data) { lrmd_key_value_t **head = (lrmd_key_value_t **) user_data; if (value) { crm_trace("Setting environment variable %s='%s'", (char*)key, (char*)value); *head = lrmd_key_value_add(*head, key, value); } } static lrmd_key_value_t * alert_envvar2params(lrmd_key_value_t *head, crm_alert_entry_t *entry) { if (entry->envvars) { g_hash_table_foreach(entry->envvars, set_ev_kv, &head); } return head; } /* * We could use g_strv_contains() instead of this function, * but that has only been available since glib 2.43.2. */ static gboolean is_target_alert(char **list, const char *value) { int target_list_num = 0; gboolean rc = FALSE; CRM_CHECK(value != NULL, return FALSE); if (list == NULL) { return TRUE; } target_list_num = g_strv_length(list); for (int cnt = 0; cnt < target_list_num; cnt++) { if (strcmp(list[cnt], value) == 0) { rc = TRUE; break; } } return rc; } /*! * \internal * \brief Execute alert agents for an event * * \param[in] lrmd Executor connection to use * \param[in] alert_list Alerts to execute * \param[in] kind Type of event that is being alerted for * \param[in] attr_name If crm_alert_attribute, the attribute name * \param[in,out] params Environment variables to pass to agents * * \retval pcmk_ok on success * \retval -1 if some alerts failed * \retval -2 if all alerts failed */ static int exec_alert_list(lrmd_t *lrmd, GList *alert_list, enum crm_alert_flags kind, const char *attr_name, lrmd_key_value_t *params) { bool any_success = FALSE, any_failure = FALSE; const char *kind_s = crm_alert_flag2text(kind); crm_time_hr_t *now = NULL; struct timeval tv_now; char timestamp_epoch[20]; char timestamp_usec[7]; params = alert_key2param(params, CRM_alert_kind, kind_s); params = alert_key2param(params, CRM_alert_version, VERSION); for (GList *iter = g_list_first(alert_list); iter; iter = g_list_next(iter)) { crm_alert_entry_t *entry = (crm_alert_entry_t *)(iter->data); lrmd_key_value_t *copy_params = NULL; lrmd_key_value_t *head = NULL; int rc; if (is_not_set(entry->flags, kind)) { crm_trace("Filtering unwanted %s alert to %s via %s", kind_s, entry->recipient, entry->id); continue; } if ((kind == crm_alert_attribute) && !is_target_alert(entry->select_attribute_name, attr_name)) { crm_trace("Filtering unwanted attribute '%s' alert to %s via %s", attr_name, entry->recipient, entry->id); continue; } if (now == NULL) { if (gettimeofday(&tv_now, NULL) == 0) { now = crm_time_timeval_hr_convert(NULL, &tv_now); } } crm_info("Sending %s alert via %s to %s", kind_s, entry->id, entry->recipient); /* Make a copy of the parameters, because each alert will be unique */ for (head = params; head != NULL; head = head->next) { copy_params = lrmd_key_value_add(copy_params, head->key, head->value); } copy_params = alert_key2param(copy_params, CRM_alert_recipient, entry->recipient); if (now) { char *timestamp = crm_time_format_hr(entry->tstamp_format, now); if (timestamp) { copy_params = alert_key2param(copy_params, CRM_alert_timestamp, timestamp); free(timestamp); } snprintf(timestamp_epoch, sizeof(timestamp_epoch), "%lld", (long long) tv_now.tv_sec); copy_params = alert_key2param(copy_params, CRM_alert_timestamp_epoch, timestamp_epoch); snprintf(timestamp_usec, sizeof(timestamp_usec), "%06d", now->useconds); copy_params = alert_key2param(copy_params, CRM_alert_timestamp_usec, timestamp_usec); } copy_params = alert_envvar2params(copy_params, entry); rc = lrmd->cmds->exec_alert(lrmd, entry->id, entry->path, entry->timeout, copy_params); if (rc < 0) { crm_err("Could not execute alert %s: %s " CRM_XS " rc=%d", entry->id, pcmk_strerror(rc), rc); any_failure = TRUE; } else { any_success = TRUE; } } if (now) { free(now); } if (any_failure) { return (any_success? -1 : -2); } return pcmk_ok; } /*! * \internal * \brief Send an alert for a node attribute change * * \param[in] lrmd Executor connection to use * \param[in] alert_list List of alert agents to execute * \param[in] node Name of node with attribute change * \param[in] nodeid Node ID of node with attribute change * \param[in] attr_name Name of attribute that changed * \param[in] attr_value New value of attribute that changed * * \retval pcmk_ok on success * \retval -1 if some alert agents failed * \retval -2 if all alert agents failed */ int lrmd_send_attribute_alert(lrmd_t *lrmd, GList *alert_list, const char *node, uint32_t nodeid, const char *attr_name, const char *attr_value) { int rc = pcmk_ok; lrmd_key_value_t *params = NULL; if (lrmd == NULL) { return -2; } params = alert_key2param(params, CRM_alert_node, node); params = alert_key2param_int(params, CRM_alert_nodeid, nodeid); params = alert_key2param(params, CRM_alert_attribute_name, attr_name); params = alert_key2param(params, CRM_alert_attribute_value, attr_value); rc = exec_alert_list(lrmd, alert_list, crm_alert_attribute, attr_name, params); lrmd_key_value_freeall(params); return rc; } /*! * \internal * \brief Send an alert for a node membership event * * \param[in] lrmd Executor connection to use * \param[in] alert_list List of alert agents to execute * \param[in] node Name of node with change * \param[in] nodeid Node ID of node with change * \param[in] state New state of node with change * * \retval pcmk_ok on success * \retval -1 if some alert agents failed * \retval -2 if all alert agents failed */ int lrmd_send_node_alert(lrmd_t *lrmd, GList *alert_list, const char *node, uint32_t nodeid, const char *state) { int rc = pcmk_ok; lrmd_key_value_t *params = NULL; if (lrmd == NULL) { return -2; } params = alert_key2param(params, CRM_alert_node, node); params = alert_key2param(params, CRM_alert_desc, state); params = alert_key2param_int(params, CRM_alert_nodeid, nodeid); rc = exec_alert_list(lrmd, alert_list, crm_alert_node, NULL, params); lrmd_key_value_freeall(params); return rc; } /*! * \internal * \brief Send an alert for a fencing event * * \param[in] lrmd Executor connection to use * \param[in] alert_list List of alert agents to execute * \param[in] target Name of fence target node * \param[in] task Type of fencing event that occurred * \param[in] desc Readable description of event * \param[in] op_rc Result of fence action * * \retval pcmk_ok on success * \retval -1 if some alert agents failed * \retval -2 if all alert agents failed */ int lrmd_send_fencing_alert(lrmd_t *lrmd, GList *alert_list, const char *target, const char *task, const char *desc, int op_rc) { int rc = pcmk_ok; lrmd_key_value_t *params = NULL; if (lrmd == NULL) { return -2; } params = alert_key2param(params, CRM_alert_node, target); params = alert_key2param(params, CRM_alert_task, task); params = alert_key2param(params, CRM_alert_desc, desc); params = alert_key2param_int(params, CRM_alert_rc, op_rc); rc = exec_alert_list(lrmd, alert_list, crm_alert_fencing, NULL, params); lrmd_key_value_freeall(params); return rc; } /*! * \internal * \brief Send an alert for a resource operation * * \param[in] lrmd Executor connection to use * \param[in] alert_list List of alert agents to execute * \param[in] node Name of node that executed operation * \param[in] op Resource operation * * \retval pcmk_ok on success * \retval -1 if some alert agents failed * \retval -2 if all alert agents failed */ int lrmd_send_resource_alert(lrmd_t *lrmd, GList *alert_list, const char *node, lrmd_event_data_t *op) { int rc = pcmk_ok; int target_rc = pcmk_ok; lrmd_key_value_t *params = NULL; if (lrmd == NULL) { return -2; } target_rc = rsc_op_expected_rc(op); if ((op->interval_ms == 0) && (target_rc == op->rc) && safe_str_eq(op->op_type, RSC_STATUS)) { /* Don't send alerts for probes with the expected result. Leave it up to * the agent whether to alert for 'failed' probes. (Even if we find a * resource running, it was probably because someone did a clean-up of * the status section.) */ return pcmk_ok; } params = alert_key2param(params, CRM_alert_node, node); params = alert_key2param(params, CRM_alert_rsc, op->rsc_id); params = alert_key2param(params, CRM_alert_task, op->op_type); params = alert_key2param_ms(params, CRM_alert_interval, op->interval_ms); params = alert_key2param_int(params, CRM_alert_target_rc, target_rc); params = alert_key2param_int(params, CRM_alert_status, op->op_status); params = alert_key2param_int(params, CRM_alert_rc, op->rc); /* Reoccurring operations do not set exec_time, so on timeout, set it * to the operation timeout since that's closer to the actual value. */ if (op->op_status == PCMK_LRM_OP_TIMEOUT && op->exec_time == 0) { params = alert_key2param_int(params, CRM_alert_exec_time, op->timeout); } else { params = alert_key2param_int(params, CRM_alert_exec_time, op->exec_time); } if (op->op_status == PCMK_LRM_OP_DONE) { params = alert_key2param(params, CRM_alert_desc, services_ocf_exitcode_str(op->rc)); } else { params = alert_key2param(params, CRM_alert_desc, services_lrm_status_str(op->op_status)); } rc = exec_alert_list(lrmd, alert_list, crm_alert_resource, NULL, params); lrmd_key_value_freeall(params); return rc; }