diff --git a/crmd/remote_lrmd_ra.c b/crmd/remote_lrmd_ra.c index eb995eae8c..e68d784046 100644 --- a/crmd/remote_lrmd_ra.c +++ b/crmd/remote_lrmd_ra.c @@ -1,1142 +1,1142 @@ /* * Copyright (C) 2013 David Vossel * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #define REMOTE_LRMD_RA "remote" /* The max start timeout before cmd retry */ #define MAX_START_TIMEOUT_MS 10000 typedef struct remote_ra_cmd_s { /*! the local node the cmd is issued from */ char *owner; /*! the remote node the cmd is executed on */ char *rsc_id; /*! the action to execute */ char *action; /*! some string the client wants us to give it back */ char *userdata; /*! start delay in ms */ int start_delay; /*! timer id used for start delay. */ int delay_id; /*! timeout in ms for cmd */ int timeout; int remaining_timeout; /*! recurring interval in ms */ int interval; /*! interval timer id */ int interval_id; int reported_success; int monitor_timeout_id; int takeover_timeout_id; /*! action parameters */ lrmd_key_value_t *params; /*! executed rc */ int rc; int op_status; int call_id; time_t start_time; gboolean cancel; } remote_ra_cmd_t; enum remote_migration_status { expect_takeover = 1, takeover_complete, }; typedef struct remote_ra_data_s { crm_trigger_t *work; remote_ra_cmd_t *cur_cmd; GList *cmds; GList *recurring_cmds; enum remote_migration_status migrate_status; gboolean active; } remote_ra_data_t; static int handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms); static void handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd); static GList *fail_all_monitor_cmds(GList * list); static void free_cmd(gpointer user_data) { remote_ra_cmd_t *cmd = user_data; if (!cmd) { return; } if (cmd->delay_id) { g_source_remove(cmd->delay_id); } if (cmd->interval_id) { g_source_remove(cmd->interval_id); } if (cmd->monitor_timeout_id) { g_source_remove(cmd->monitor_timeout_id); } if (cmd->takeover_timeout_id) { g_source_remove(cmd->takeover_timeout_id); } free(cmd->owner); free(cmd->rsc_id); free(cmd->action); free(cmd->userdata); lrmd_key_value_freeall(cmd->params); free(cmd); } static int generate_callid(void) { static int remote_ra_callid = 0; remote_ra_callid++; if (remote_ra_callid <= 0) { remote_ra_callid = 1; } return remote_ra_callid; } static gboolean recurring_helper(gpointer data) { remote_ra_cmd_t *cmd = data; lrm_state_t *connection_rsc = NULL; cmd->interval_id = 0; connection_rsc = lrm_state_find(cmd->rsc_id); if (connection_rsc && connection_rsc->remote_ra_data) { remote_ra_data_t *ra_data = connection_rsc->remote_ra_data; ra_data->recurring_cmds = g_list_remove(ra_data->recurring_cmds, cmd); ra_data->cmds = g_list_append(ra_data->cmds, cmd); mainloop_set_trigger(ra_data->work); } return FALSE; } static gboolean start_delay_helper(gpointer data) { remote_ra_cmd_t *cmd = data; lrm_state_t *connection_rsc = NULL; cmd->delay_id = 0; connection_rsc = lrm_state_find(cmd->rsc_id); if (connection_rsc && connection_rsc->remote_ra_data) { remote_ra_data_t *ra_data = connection_rsc->remote_ra_data; mainloop_set_trigger(ra_data->work); } return FALSE; } /*! * \internal * \brief Handle cluster communication related to pacemaker_remote node joining * * \param[in] node_name Name of newly integrated pacemaker_remote node */ static void remote_node_up(const char *node_name) { int call_opt, call_id = 0; xmlNode *update, *state; crm_node_t *node; CRM_CHECK(node_name != NULL, return); crm_info("Announcing pacemaker_remote node %s", node_name); /* Clear node's operation history. The node's transient attributes should * and normally will be cleared when the node leaves, but since remote node * state has a number of corner cases, clear them here as well, to be sure. */ call_opt = crmd_cib_smart_opt(); erase_status_tag(node_name, XML_CIB_TAG_LRM, call_opt); erase_status_tag(node_name, XML_TAG_TRANSIENT_NODEATTRS, call_opt); /* Clear node's probed attribute */ update_attrd(node_name, CRM_OP_PROBED, NULL, NULL, TRUE); /* Ensure node is in the remote peer cache with member status */ node = crm_remote_peer_get(node_name); CRM_CHECK(node != NULL, return); crm_update_peer_state(__FUNCTION__, node, CRM_NODE_MEMBER, 0); /* pacemaker_remote nodes don't participate in the membership layer, * so cluster nodes don't automatically get notified when they come and go. * We send a cluster message to the DC, and update the CIB node state entry, * so the DC will get it sooner (via message) or later (via CIB refresh), * and any other interested parties can query the CIB. */ send_remote_state_message(node_name, TRUE); update = create_xml_node(NULL, XML_CIB_TAG_STATUS); state = create_node_state_update(node, node_update_cluster, update, __FUNCTION__); /* Clear the XML_NODE_IS_FENCED flag in the node state. If the node ever * needs to be fenced, this flag will allow various actions to determine * whether the fencing has happened yet. */ crm_xml_add(state, XML_NODE_IS_FENCED, "0"); /* TODO: If the remote connection drops, and this (async) CIB update either * failed or has not yet completed, later actions could mistakenly think the * node has already been fenced (if the XML_NODE_IS_FENCED attribute was * previously set, because it won't have been cleared). This could prevent * actual fencing or allow recurring monitor failures to be cleared too * soon. Ideally, we wouldn't rely on the CIB for the fenced status. */ fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL); if (call_id < 0) { crm_perror(LOG_WARNING, "%s CIB node state setup", node_name); } free_xml(update); } enum down_opts { DOWN_KEEP_LRM, DOWN_ERASE_LRM }; /*! * \internal * \brief Handle cluster communication related to pacemaker_remote node leaving * * \param[in] node_name Name of lost node * \param[in] opts Whether to keep or erase LRM history */ static void remote_node_down(const char *node_name, const enum down_opts opts) { xmlNode *update; int call_id = 0; int call_opt = crmd_cib_smart_opt(); crm_node_t *node; /* Purge node from attrd's memory */ update_attrd_remote_node_removed(node_name, NULL); /* Purge node's transient attributes */ erase_status_tag(node_name, XML_TAG_TRANSIENT_NODEATTRS, call_opt); /* Normally, the LRM operation history should be kept until the node comes * back up. However, after a successful fence, we want to clear it, so we * don't think resources are still running on the node. */ if (opts == DOWN_ERASE_LRM) { erase_status_tag(node_name, XML_CIB_TAG_LRM, call_opt); } /* Ensure node is in the remote peer cache with lost state */ node = crm_remote_peer_get(node_name); CRM_CHECK(node != NULL, return); crm_update_peer_state(__FUNCTION__, node, CRM_NODE_LOST, 0); /* Notify DC */ send_remote_state_message(node_name, FALSE); /* Update CIB node state */ update = create_xml_node(NULL, XML_CIB_TAG_STATUS); create_node_state_update(node, node_update_cluster, update, __FUNCTION__); fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL); if (call_id < 0) { crm_perror(LOG_ERR, "%s CIB node state update", node_name); } free_xml(update); } /*! * \internal * \brief Handle effects of a remote RA command on node state * * \param[in] cmd Completed remote RA command */ static void check_remote_node_state(remote_ra_cmd_t *cmd) { /* Only successful actions can change node state */ if (cmd->rc != PCMK_OCF_OK) { return; } if (safe_str_eq(cmd->action, "start")) { remote_node_up(cmd->rsc_id); } else if (safe_str_eq(cmd->action, "migrate_from")) { /* After a successful migration, we don't need to do remote_node_up() * because the DC already knows the node is up, and we don't want to * clear LRM history etc. We do need to add the remote node to this * host's remote peer cache, because (unless it happens to be DC) * it hasn't been tracking the remote node, and other code relies on * the cache to distinguish remote nodes from unseen cluster nodes. */ crm_node_t *node = crm_remote_peer_get(cmd->rsc_id); CRM_CHECK(node != NULL, return); crm_update_peer_state(__FUNCTION__, node, CRM_NODE_MEMBER, 0); } else if (safe_str_eq(cmd->action, "stop")) { lrm_state_t *lrm_state = lrm_state_find(cmd->rsc_id); remote_ra_data_t *ra_data = lrm_state? lrm_state->remote_ra_data : NULL; if (ra_data) { if (ra_data->migrate_status != takeover_complete) { /* Stop means down if we didn't successfully migrate elsewhere */ remote_node_down(cmd->rsc_id, DOWN_KEEP_LRM); } else if (AM_I_DC == FALSE) { /* Only the connection host and DC track node state, * so if the connection migrated elsewhere and we aren't DC, * un-cache the node, so we don't have stale info */ crm_remote_peer_cache_remove(cmd->rsc_id); } } } /* We don't do anything for successful monitors, which is correct for * routine recurring monitors, and for monitors on nodes where the * connection isn't supposed to be (the cluster will stop the connection in * that case). However, if the initial probe finds the connection already * active on the node where we want it, we probably should do * remote_node_up(). Unfortunately, we can't distinguish that case here. * Given that connections have to be initiated by the cluster, the chance of * that should be close to zero. */ } static void report_remote_ra_result(remote_ra_cmd_t * cmd) { lrmd_event_data_t op = { 0, }; check_remote_node_state(cmd); op.type = lrmd_event_exec_complete; op.rsc_id = cmd->rsc_id; op.op_type = cmd->action; op.user_data = cmd->userdata; op.timeout = cmd->timeout; op.interval = cmd->interval; op.rc = cmd->rc; op.op_status = cmd->op_status; op.t_run = cmd->start_time; op.t_rcchange = cmd->start_time; if (cmd->reported_success && cmd->rc != PCMK_OCF_OK) { op.t_rcchange = time(NULL); /* This edge case will likely never ever occur, but if it does the * result is that a failure will not be processed correctly. This is only * remotely possible because we are able to detect a connection resource's tcp * connection has failed at any moment after start has completed. The actual * recurring operation is just a connectivity ping. * * basically, we are not guaranteed that the first successful monitor op and * a subsequent failed monitor op will not occur in the same timestamp. We have to * make it look like the operations occurred at separate times though. */ if (op.t_rcchange == op.t_run) { op.t_rcchange++; } } if (cmd->params) { lrmd_key_value_t *tmp; op.params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); for (tmp = cmd->params; tmp; tmp = tmp->next) { g_hash_table_insert(op.params, strdup(tmp->key), strdup(tmp->value)); } } op.call_id = cmd->call_id; op.remote_nodename = cmd->owner; lrm_op_callback(&op); if (op.params) { g_hash_table_destroy(op.params); } } static void update_remaining_timeout(remote_ra_cmd_t * cmd) { cmd->remaining_timeout = ((cmd->timeout / 1000) - (time(NULL) - cmd->start_time)) * 1000; } static gboolean retry_start_cmd_cb(gpointer data) { lrm_state_t *lrm_state = data; remote_ra_data_t *ra_data = lrm_state->remote_ra_data; remote_ra_cmd_t *cmd = NULL; int rc = -1; if (!ra_data || !ra_data->cur_cmd) { return FALSE; } cmd = ra_data->cur_cmd; if (safe_str_neq(cmd->action, "start") && safe_str_neq(cmd->action, "migrate_from")) { return FALSE; } update_remaining_timeout(cmd); if (cmd->remaining_timeout > 0) { rc = handle_remote_ra_start(lrm_state, cmd, cmd->remaining_timeout); } if (rc != 0) { cmd->rc = PCMK_OCF_UNKNOWN_ERROR; cmd->op_status = PCMK_LRM_OP_ERROR; report_remote_ra_result(cmd); if (ra_data->cmds) { mainloop_set_trigger(ra_data->work); } ra_data->cur_cmd = NULL; free_cmd(cmd); } else { /* wait for connection event */ } return FALSE; } static gboolean connection_takeover_timeout_cb(gpointer data) { lrm_state_t *lrm_state = NULL; remote_ra_cmd_t *cmd = data; crm_info("takeover event timed out for node %s", cmd->rsc_id); cmd->takeover_timeout_id = 0; lrm_state = lrm_state_find(cmd->rsc_id); handle_remote_ra_stop(lrm_state, cmd); free_cmd(cmd); return FALSE; } static gboolean monitor_timeout_cb(gpointer data) { lrm_state_t *lrm_state = NULL; remote_ra_cmd_t *cmd = data; lrm_state = lrm_state_find(cmd->rsc_id); crm_info("Poke async response timed out for node %s (%p)", cmd->rsc_id, lrm_state); cmd->monitor_timeout_id = 0; cmd->op_status = PCMK_LRM_OP_TIMEOUT; cmd->rc = PCMK_OCF_UNKNOWN_ERROR; if (lrm_state && lrm_state->remote_ra_data) { remote_ra_data_t *ra_data = lrm_state->remote_ra_data; if (ra_data->cur_cmd == cmd) { ra_data->cur_cmd = NULL; } if (ra_data->cmds) { mainloop_set_trigger(ra_data->work); } } report_remote_ra_result(cmd); free_cmd(cmd); if(lrm_state) { lrm_state_disconnect(lrm_state); } return FALSE; } void remote_lrm_op_callback(lrmd_event_data_t * op) { gboolean cmd_handled = FALSE; lrm_state_t *lrm_state = NULL; remote_ra_data_t *ra_data = NULL; remote_ra_cmd_t *cmd = NULL; crm_debug("remote connection event - event_type:%s node:%s action:%s rc:%s op_status:%s", lrmd_event_type2str(op->type), op->remote_nodename, op->op_type ? op->op_type : "none", services_ocf_exitcode_str(op->rc), services_lrm_status_str(op->op_status)); lrm_state = lrm_state_find(op->remote_nodename); if (!lrm_state || !lrm_state->remote_ra_data) { crm_debug("lrm_state info not found for remote lrmd connection event"); return; } ra_data = lrm_state->remote_ra_data; /* Another client has connected to the remote daemon, * determine if this is expected. */ if (op->type == lrmd_event_new_client) { /* great, we new this was coming */ if (ra_data->migrate_status == expect_takeover) { ra_data->migrate_status = takeover_complete; } else { crm_err("Unexpected pacemaker_remote client takeover for %s. Disconnecting", op->remote_nodename); /* In this case, lrmd_tls_connection_destroy() will be called under the control of mainloop. */ /* Do not free lrm_state->conn yet. */ /* It'll be freed in the following stop action. */ lrm_state_disconnect_only(lrm_state); } return; } /* filter all EXEC events up */ if (op->type == lrmd_event_exec_complete) { if (ra_data->migrate_status == takeover_complete) { crm_debug("ignoring event, this connection is taken over by another node"); } else { lrm_op_callback(op); } return; } if ((op->type == lrmd_event_disconnect) && (ra_data->cur_cmd == NULL) && (ra_data->active == TRUE)) { crm_err("Unexpected disconnect on remote-node %s", lrm_state->node_name); ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds); ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds); return; } if (!ra_data->cur_cmd) { crm_debug("no event to match"); return; } cmd = ra_data->cur_cmd; /* Start actions and migrate from actions complete after connection * comes back to us. */ if (op->type == lrmd_event_connect && (safe_str_eq(cmd->action, "start") || safe_str_eq(cmd->action, "migrate_from"))) { if (op->connection_rc < 0) { update_remaining_timeout(cmd); /* There isn't much of a reason to reschedule if the timeout is too small */ if (cmd->remaining_timeout > 3000) { crm_trace("rescheduling start, remaining timeout %d", cmd->remaining_timeout); g_timeout_add(1000, retry_start_cmd_cb, lrm_state); return; } else { crm_trace("can't reschedule start, remaining timeout too small %d", cmd->remaining_timeout); } cmd->op_status = PCMK_LRM_OP_TIMEOUT; cmd->rc = PCMK_OCF_UNKNOWN_ERROR; } else { lrm_state_reset_tables(lrm_state); cmd->rc = PCMK_OCF_OK; cmd->op_status = PCMK_LRM_OP_DONE; ra_data->active = TRUE; } crm_debug("remote lrmd connect event matched %s action. ", cmd->action); report_remote_ra_result(cmd); cmd_handled = TRUE; } else if (op->type == lrmd_event_poke && safe_str_eq(cmd->action, "monitor")) { if (cmd->monitor_timeout_id) { g_source_remove(cmd->monitor_timeout_id); cmd->monitor_timeout_id = 0; } /* Only report success the first time, after that only worry about failures. * For this function, if we get the poke pack, it is always a success. Pokes * only fail if the send fails, or the response times out. */ if (!cmd->reported_success) { cmd->rc = PCMK_OCF_OK; cmd->op_status = PCMK_LRM_OP_DONE; report_remote_ra_result(cmd); cmd->reported_success = 1; } crm_debug("remote lrmd poke event matched %s action. ", cmd->action); /* success, keep rescheduling if interval is present. */ if (cmd->interval && (cmd->cancel == FALSE)) { ra_data->recurring_cmds = g_list_append(ra_data->recurring_cmds, cmd); cmd->interval_id = g_timeout_add(cmd->interval, recurring_helper, cmd); cmd = NULL; /* prevent free */ } cmd_handled = TRUE; } else if (op->type == lrmd_event_disconnect && safe_str_eq(cmd->action, "monitor")) { if (ra_data->active == TRUE && (cmd->cancel == FALSE)) { cmd->rc = PCMK_OCF_UNKNOWN_ERROR; cmd->op_status = PCMK_LRM_OP_ERROR; report_remote_ra_result(cmd); crm_err("remote-node %s unexpectedly disconneced during monitor operation", lrm_state->node_name); } cmd_handled = TRUE; } else if (op->type == lrmd_event_new_client && safe_str_eq(cmd->action, "stop")) { handle_remote_ra_stop(lrm_state, cmd); cmd_handled = TRUE; } else { crm_debug("Event did not match %s action", ra_data->cur_cmd->action); } if (cmd_handled) { ra_data->cur_cmd = NULL; if (ra_data->cmds) { mainloop_set_trigger(ra_data->work); } free_cmd(cmd); } } static void handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd) { remote_ra_data_t *ra_data = NULL; CRM_ASSERT(lrm_state); ra_data = lrm_state->remote_ra_data; if (ra_data->migrate_status != takeover_complete) { /* delete pending ops when ever the remote connection is intentionally stopped */ g_hash_table_remove_all(lrm_state->pending_ops); } else { /* we no longer hold the history if this connection has been migrated */ lrm_state_reset_tables(lrm_state); } ra_data->active = FALSE; lrm_state_disconnect(lrm_state); cmd->rc = PCMK_OCF_OK; cmd->op_status = PCMK_LRM_OP_DONE; if (ra_data->cmds) { g_list_free_full(ra_data->cmds, free_cmd); } if (ra_data->recurring_cmds) { g_list_free_full(ra_data->recurring_cmds, free_cmd); } ra_data->cmds = NULL; ra_data->recurring_cmds = NULL; ra_data->cur_cmd = NULL; report_remote_ra_result(cmd); } static int handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms) { const char *server = NULL; lrmd_key_value_t *tmp = NULL; int port = 0; int timeout_used = timeout_ms > MAX_START_TIMEOUT_MS ? MAX_START_TIMEOUT_MS : timeout_ms; for (tmp = cmd->params; tmp; tmp = tmp->next) { if (safe_str_eq(tmp->key, "addr") || safe_str_eq(tmp->key, "server")) { server = tmp->value; } if (safe_str_eq(tmp->key, "port")) { port = atoi(tmp->value); } } return lrm_state_remote_connect_async(lrm_state, server, port, timeout_used); } static gboolean handle_remote_ra_exec(gpointer user_data) { int rc = 0; lrm_state_t *lrm_state = user_data; remote_ra_data_t *ra_data = lrm_state->remote_ra_data; remote_ra_cmd_t *cmd; GList *first = NULL; if (ra_data->cur_cmd) { /* still waiting on previous cmd */ return TRUE; } while (ra_data->cmds) { first = ra_data->cmds; cmd = first->data; if (cmd->delay_id) { /* still waiting for start delay timer to trip */ return TRUE; } ra_data->cmds = g_list_remove_link(ra_data->cmds, first); g_list_free_1(first); if (!strcmp(cmd->action, "start") || !strcmp(cmd->action, "migrate_from")) { ra_data->migrate_status = 0; rc = handle_remote_ra_start(lrm_state, cmd, cmd->timeout); if (rc == 0) { /* take care of this later when we get async connection result */ crm_debug("began remote lrmd connect, waiting for connect event."); ra_data->cur_cmd = cmd; return TRUE; } else { crm_debug("connect failed, not expecting to match any connection event later"); cmd->rc = PCMK_OCF_UNKNOWN_ERROR; cmd->op_status = PCMK_LRM_OP_ERROR; } report_remote_ra_result(cmd); } else if (!strcmp(cmd->action, "monitor")) { if (lrm_state_is_connected(lrm_state) == TRUE) { rc = lrm_state_poke_connection(lrm_state); if (rc < 0) { cmd->rc = PCMK_OCF_UNKNOWN_ERROR; cmd->op_status = PCMK_LRM_OP_ERROR; } } else { rc = -1; cmd->op_status = PCMK_LRM_OP_DONE; cmd->rc = PCMK_OCF_NOT_RUNNING; } if (rc == 0) { crm_debug("poked remote lrmd at node %s, waiting for async response.", cmd->rsc_id); ra_data->cur_cmd = cmd; cmd->monitor_timeout_id = g_timeout_add(cmd->timeout, monitor_timeout_cb, cmd); return TRUE; } report_remote_ra_result(cmd); } else if (!strcmp(cmd->action, "stop")) { if (ra_data->migrate_status == expect_takeover) { /* briefly wait on stop for the takeover event to occur. If the * takeover event does not occur during the wait period, that's fine. * It just means that the remote-node's lrm_status section is going to get * cleared which will require all the resources running in the remote-node * to be explicitly re-detected via probe actions. If the takeover does occur * successfully, then we can leave the status section intact. */ cmd->takeover_timeout_id = g_timeout_add((cmd->timeout/2), connection_takeover_timeout_cb, cmd); ra_data->cur_cmd = cmd; return TRUE; } handle_remote_ra_stop(lrm_state, cmd); } else if (!strcmp(cmd->action, "migrate_to")) { ra_data->migrate_status = expect_takeover; cmd->rc = PCMK_OCF_OK; cmd->op_status = PCMK_LRM_OP_DONE; report_remote_ra_result(cmd); } else if (!strcmp(cmd->action, "reload")) { /* reloads are a no-op right now, add logic here when they become important */ cmd->rc = PCMK_OCF_OK; cmd->op_status = PCMK_LRM_OP_DONE; report_remote_ra_result(cmd); } free_cmd(cmd); } return TRUE; } static void remote_ra_data_init(lrm_state_t * lrm_state) { remote_ra_data_t *ra_data = NULL; if (lrm_state->remote_ra_data) { return; } ra_data = calloc(1, sizeof(remote_ra_data_t)); ra_data->work = mainloop_add_trigger(G_PRIORITY_HIGH, handle_remote_ra_exec, lrm_state); lrm_state->remote_ra_data = ra_data; } void remote_ra_cleanup(lrm_state_t * lrm_state) { remote_ra_data_t *ra_data = lrm_state->remote_ra_data; if (!ra_data) { return; } if (ra_data->cmds) { g_list_free_full(ra_data->cmds, free_cmd); } if (ra_data->recurring_cmds) { g_list_free_full(ra_data->recurring_cmds, free_cmd); } mainloop_destroy_trigger(ra_data->work); free(ra_data); lrm_state->remote_ra_data = NULL; } gboolean is_remote_lrmd_ra(const char *agent, const char *provider, const char *id) { if (agent && provider && !strcmp(agent, REMOTE_LRMD_RA) && !strcmp(provider, "pacemaker")) { return TRUE; } if (id && lrm_state_find(id) && safe_str_neq(id, fsa_our_uname)) { return TRUE; } return FALSE; } lrmd_rsc_info_t * remote_ra_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id) { lrmd_rsc_info_t *info = NULL; if ((lrm_state_find(rsc_id))) { info = calloc(1, sizeof(lrmd_rsc_info_t)); info->id = strdup(rsc_id); info->type = strdup(REMOTE_LRMD_RA); - info->class = strdup("ocf"); + info->class = strdup(PCMK_RESOURCE_CLASS_OCF); info->provider = strdup("pacemaker"); } return info; } static gboolean is_remote_ra_supported_action(const char *action) { if (!action) { return FALSE; } else if (strcmp(action, "start") && strcmp(action, "stop") && strcmp(action, "reload") && strcmp(action, "migrate_to") && strcmp(action, "migrate_from") && strcmp(action, "monitor")) { return FALSE; } return TRUE; } static GList * fail_all_monitor_cmds(GList * list) { GList *rm_list = NULL; remote_ra_cmd_t *cmd = NULL; GListPtr gIter = NULL; for (gIter = list; gIter != NULL; gIter = gIter->next) { cmd = gIter->data; if (cmd->interval > 0 && safe_str_eq(cmd->action, "monitor")) { rm_list = g_list_append(rm_list, cmd); } } for (gIter = rm_list; gIter != NULL; gIter = gIter->next) { cmd = gIter->data; cmd->rc = PCMK_OCF_UNKNOWN_ERROR; cmd->op_status = PCMK_LRM_OP_ERROR; crm_trace("Pre-emptively failing %s %s (interval=%d, %s)", cmd->action, cmd->rsc_id, cmd->interval, cmd->userdata); report_remote_ra_result(cmd); list = g_list_remove(list, cmd); free_cmd(cmd); } /* frees only the list data, not the cmds */ g_list_free(rm_list); return list; } static GList * remove_cmd(GList * list, const char *action, int interval) { remote_ra_cmd_t *cmd = NULL; GListPtr gIter = NULL; for (gIter = list; gIter != NULL; gIter = gIter->next) { cmd = gIter->data; if (cmd->interval == interval && safe_str_eq(cmd->action, action)) { break; } cmd = NULL; } if (cmd) { list = g_list_remove(list, cmd); free_cmd(cmd); } return list; } int remote_ra_cancel(lrm_state_t * lrm_state, const char *rsc_id, const char *action, int interval) { lrm_state_t *connection_rsc = NULL; remote_ra_data_t *ra_data = NULL; connection_rsc = lrm_state_find(rsc_id); if (!connection_rsc || !connection_rsc->remote_ra_data) { return -EINVAL; } ra_data = connection_rsc->remote_ra_data; ra_data->cmds = remove_cmd(ra_data->cmds, action, interval); ra_data->recurring_cmds = remove_cmd(ra_data->recurring_cmds, action, interval); if (ra_data->cur_cmd && (ra_data->cur_cmd->interval == interval) && (safe_str_eq(ra_data->cur_cmd->action, action))) { ra_data->cur_cmd->cancel = TRUE; } return 0; } static remote_ra_cmd_t * handle_dup_monitor(remote_ra_data_t *ra_data, int interval, const char *userdata) { GList *gIter = NULL; remote_ra_cmd_t *cmd = NULL; /* there are 3 places a potential duplicate monitor operation * could exist. * 1. recurring_cmds list. where the op is waiting for its next interval * 2. cmds list, where the op is queued to get executed immediately * 3. cur_cmd, which means the monitor op is in flight right now. */ if (interval == 0) { return NULL; } if (ra_data->cur_cmd && ra_data->cur_cmd->cancel == FALSE && ra_data->cur_cmd->interval == interval && safe_str_eq(ra_data->cur_cmd->action, "monitor")) { cmd = ra_data->cur_cmd; goto handle_dup; } for (gIter = ra_data->recurring_cmds; gIter != NULL; gIter = gIter->next) { cmd = gIter->data; if (cmd->interval == interval && safe_str_eq(cmd->action, "monitor")) { goto handle_dup; } } for (gIter = ra_data->cmds; gIter != NULL; gIter = gIter->next) { cmd = gIter->data; if (cmd->interval == interval && safe_str_eq(cmd->action, "monitor")) { goto handle_dup; } } return NULL; handle_dup: crm_trace("merging duplicate monitor cmd %s_monitor_%d", cmd->rsc_id, interval); /* update the userdata */ if (userdata) { free(cmd->userdata); cmd->userdata = strdup(userdata); } /* if we've already reported success, generate a new call id */ if (cmd->reported_success) { cmd->start_time = time(NULL); cmd->call_id = generate_callid(); cmd->reported_success = 0; } /* if we have an interval_id set, that means we are in the process of * waiting for this cmd's next interval. instead of waiting, cancel * the timer and execute the action immediately */ if (cmd->interval_id) { g_source_remove(cmd->interval_id); cmd->interval_id = 0; recurring_helper(cmd); } return cmd; } int remote_ra_exec(lrm_state_t * lrm_state, const char *rsc_id, const char *action, const char *userdata, int interval, /* ms */ int timeout, /* ms */ int start_delay, /* ms */ lrmd_key_value_t * params) { int rc = 0; lrm_state_t *connection_rsc = NULL; remote_ra_cmd_t *cmd = NULL; remote_ra_data_t *ra_data = NULL; if (is_remote_ra_supported_action(action) == FALSE) { rc = -EINVAL; goto exec_done; } connection_rsc = lrm_state_find(rsc_id); if (!connection_rsc) { rc = -EINVAL; goto exec_done; } remote_ra_data_init(connection_rsc); ra_data = connection_rsc->remote_ra_data; cmd = handle_dup_monitor(ra_data, interval, userdata); if (cmd) { return cmd->call_id; } cmd = calloc(1, sizeof(remote_ra_cmd_t)); cmd->owner = strdup(lrm_state->node_name); cmd->rsc_id = strdup(rsc_id); cmd->action = strdup(action); cmd->userdata = strdup(userdata); cmd->interval = interval; cmd->timeout = timeout; cmd->start_delay = start_delay; cmd->params = params; cmd->start_time = time(NULL); cmd->call_id = generate_callid(); if (cmd->start_delay) { cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd); } ra_data->cmds = g_list_append(ra_data->cmds, cmd); mainloop_set_trigger(ra_data->work); return cmd->call_id; exec_done: lrmd_key_value_freeall(params); return rc; } /*! * \internal * \brief Immediately fail all monitors of a remote node, if proxied here * * \param[in] node_name Name of pacemaker_remote node */ void remote_ra_fail(const char *node_name) { lrm_state_t *lrm_state = lrm_state_find(node_name); if (lrm_state && lrm_state_is_connected(lrm_state)) { remote_ra_data_t *ra_data = lrm_state->remote_ra_data; crm_info("Failing monitors on pacemaker_remote node %s", node_name); ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds); ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds); } } /* A guest node fencing implied by host fencing looks like: * * * * * * * */ #define XPATH_PSEUDO_FENCE "//" XML_GRAPH_TAG_PSEUDO_EVENT \ "[@" XML_LRM_ATTR_TASK "='stonith']/" XML_GRAPH_TAG_DOWNED \ "/" XML_CIB_TAG_NODE /*! * \internal * \brief Check a pseudo-action for Pacemaker Remote node side effects * * \param[in] xml XML of pseudo-action to check */ void remote_ra_process_pseudo(xmlNode *xml) { xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_FENCE); if (numXpathResults(search) == 1) { xmlNode *result = getXpathResult(search, 0); /* Normally, we handle the necessary side effects of a guest node stop * action when reporting the remote agent's result. However, if the stop * is implied due to fencing, it will be a fencing pseudo-event, and * there won't be a result to report. Handle that case here. * * This will result in a duplicate call to remote_node_down() if the * guest stop was real instead of implied, but that shouldn't hurt. * * There is still one corner case that isn't handled: if a guest node * isn't running any resources when its host is fenced, it will appear * to be cleanly stopped, so there will be no pseudo-fence, and our * peer cache state will be incorrect unless and until the guest is * recovered. */ if (result) { const char *remote = ID(result); if (remote) { remote_node_down(remote, DOWN_ERASE_LRM); } } } freeXpathObject(search); } diff --git a/fencing/main.c b/fencing/main.c index 95a1408192..e6eb087a68 100644 --- a/fencing/main.c +++ b/fencing/main.c @@ -1,1546 +1,1546 @@ /* * Copyright (C) 2009 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include char *stonith_our_uname = NULL; char *stonith_our_uuid = NULL; long stonith_watchdog_timeout_ms = 0; GMainLoop *mainloop = NULL; gboolean stand_alone = FALSE; gboolean no_cib_connect = FALSE; gboolean stonith_shutdown_flag = FALSE; qb_ipcs_service_t *ipcs = NULL; xmlNode *local_cib = NULL; GHashTable *known_peer_names = NULL; static cib_t *cib_api = NULL; static void *cib_library = NULL; static void stonith_shutdown(int nsig); static void stonith_cleanup(void); static int32_t st_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { if (stonith_shutdown_flag) { crm_info("Ignoring new client [%d] during shutdown", crm_ipcs_client_pid(c)); return -EPERM; } if (crm_client_new(c, uid, gid) == NULL) { return -EIO; } return 0; } static void st_ipc_created(qb_ipcs_connection_t * c) { crm_trace("Connection created for %p", c); } /* Exit code means? */ static int32_t st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; int call_options = 0; xmlNode *request = NULL; crm_client_t *c = crm_client_get(qbc); const char *op = NULL; if (c == NULL) { crm_info("Invalid client: %p", qbc); return 0; } request = crm_ipcs_recv(c, data, size, &id, &flags); if (request == NULL) { crm_ipcs_send_ack(c, id, flags, "nack", __FUNCTION__, __LINE__); return 0; } op = crm_element_value(request, F_CRM_TASK); if(safe_str_eq(op, CRM_OP_RM_NODE_CACHE)) { crm_xml_add(request, F_TYPE, T_STONITH_NG); crm_xml_add(request, F_STONITH_OPERATION, op); crm_xml_add(request, F_STONITH_CLIENTID, c->id); crm_xml_add(request, F_STONITH_CLIENTNAME, crm_client_name(c)); crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname); send_cluster_message(NULL, crm_msg_stonith_ng, request, FALSE); free_xml(request); return 0; } if (c->name == NULL) { const char *value = crm_element_value(request, F_STONITH_CLIENTNAME); if (value == NULL) { value = "unknown"; } c->name = crm_strdup_printf("%s.%u", value, c->pid); } crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); crm_trace("Flags %u/%u for command %u from %s", flags, call_options, id, crm_client_name(c)); if (is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(flags & crm_ipc_client_response); CRM_LOG_ASSERT(c->request_id == 0); /* This means the client has two synchronous events in-flight */ c->request_id = id; /* Reply only to the last one */ } crm_xml_add(request, F_STONITH_CLIENTID, c->id); crm_xml_add(request, F_STONITH_CLIENTNAME, crm_client_name(c)); crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname); crm_log_xml_trace(request, "Client[inbound]"); stonith_command(c, id, flags, request, NULL); free_xml(request); return 0; } /* Error code means? */ static int32_t st_ipc_closed(qb_ipcs_connection_t * c) { crm_client_t *client = crm_client_get(c); if (client == NULL) { return 0; } crm_trace("Connection %p closed", c); crm_client_destroy(client); /* 0 means: yes, go ahead and destroy the connection */ return 0; } static void st_ipc_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p destroyed", c); st_ipc_closed(c); } static void stonith_peer_callback(xmlNode * msg, void *private_data) { const char *remote_peer = crm_element_value(msg, F_ORIG); const char *op = crm_element_value(msg, F_STONITH_OPERATION); if (crm_str_eq(op, "poke", TRUE)) { return; } crm_log_xml_trace(msg, "Peer[inbound]"); stonith_command(NULL, 0, 0, msg, remote_peer); } #if SUPPORT_HEARTBEAT static void stonith_peer_hb_callback(HA_Message * msg, void *private_data) { xmlNode *xml = convert_ha_message(NULL, msg, __FUNCTION__); stonith_peer_callback(xml, private_data); free_xml(xml); } static void stonith_peer_hb_destroy(gpointer user_data) { if (stonith_shutdown_flag) { crm_info("Heartbeat disconnection complete... exiting"); } else { crm_err("Heartbeat connection lost! Exiting."); } stonith_shutdown(0); } #endif #if SUPPORT_COROSYNC static void stonith_peer_ais_callback(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { uint32_t kind = 0; xmlNode *xml = NULL; const char *from = NULL; char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); if(data == NULL) { return; } if (kind == crm_class_cluster) { xml = string2xml(data); if (xml == NULL) { crm_err("Invalid XML: '%.120s'", data); free(data); return; } crm_xml_add(xml, F_ORIG, from); /* crm_xml_add_int(xml, F_SEQ, wrapper->id); */ stonith_peer_callback(xml, NULL); } free_xml(xml); free(data); return; } static void stonith_peer_cs_destroy(gpointer user_data) { crm_err("Corosync connection terminated"); stonith_shutdown(0); } #endif void do_local_reply(xmlNode * notify_src, const char *client_id, gboolean sync_reply, gboolean from_peer) { /* send callback to originating child */ crm_client_t *client_obj = NULL; int local_rc = pcmk_ok; crm_trace("Sending response"); client_obj = crm_client_get_by_id(client_id); crm_trace("Sending callback to request originator"); if (client_obj == NULL) { local_rc = -1; crm_trace("No client to sent the response to. F_STONITH_CLIENTID not set."); } else { int rid = 0; if (sync_reply) { CRM_LOG_ASSERT(client_obj->request_id); rid = client_obj->request_id; client_obj->request_id = 0; crm_trace("Sending response %d to %s %s", rid, client_obj->name, from_peer ? "(originator of delegated request)" : ""); } else { crm_trace("Sending an event to %s %s", client_obj->name, from_peer ? "(originator of delegated request)" : ""); } local_rc = crm_ipcs_send(client_obj, rid, notify_src, sync_reply?crm_ipc_flags_none:crm_ipc_server_event); } if (local_rc < pcmk_ok && client_obj != NULL) { crm_warn("%sSync reply to %s failed: %s", sync_reply ? "" : "A-", client_obj ? client_obj->name : "", pcmk_strerror(local_rc)); } } long long get_stonith_flag(const char *name) { if (safe_str_eq(name, T_STONITH_NOTIFY_FENCE)) { return 0x01; } else if (safe_str_eq(name, STONITH_OP_DEVICE_ADD)) { return 0x04; } else if (safe_str_eq(name, STONITH_OP_DEVICE_DEL)) { return 0x10; } return 0; } static void stonith_notify_client(gpointer key, gpointer value, gpointer user_data) { xmlNode *update_msg = user_data; crm_client_t *client = value; const char *type = NULL; CRM_CHECK(client != NULL, return); CRM_CHECK(update_msg != NULL, return); type = crm_element_value(update_msg, F_SUBTYPE); CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return); if (client->ipcs == NULL) { crm_trace("Skipping client with NULL channel"); return; } if (client->options & get_stonith_flag(type)) { int rc = crm_ipcs_send(client, 0, update_msg, crm_ipc_server_event | crm_ipc_server_error); if (rc <= 0) { crm_warn("%s notification of client %s.%.6s failed: %s (%d)", type, crm_client_name(client), client->id, pcmk_strerror(rc), rc); } else { crm_trace("Sent %s notification to client %s.%.6s", type, crm_client_name(client), client->id); } } } void do_stonith_async_timeout_update(const char *client_id, const char *call_id, int timeout) { crm_client_t *client = NULL; xmlNode *notify_data = NULL; if (!timeout || !call_id || !client_id) { return; } client = crm_client_get_by_id(client_id); if (!client) { return; } notify_data = create_xml_node(NULL, T_STONITH_TIMEOUT_VALUE); crm_xml_add(notify_data, F_TYPE, T_STONITH_TIMEOUT_VALUE); crm_xml_add(notify_data, F_STONITH_CALLID, call_id); crm_xml_add_int(notify_data, F_STONITH_TIMEOUT, timeout); crm_trace("timeout update is %d for client %s and call id %s", timeout, client_id, call_id); if (client) { crm_ipcs_send(client, 0, notify_data, crm_ipc_server_event); } free_xml(notify_data); } void do_stonith_notify(int options, const char *type, int result, xmlNode * data) { /* TODO: Standardize the contents of data */ xmlNode *update_msg = create_xml_node(NULL, "notify"); CRM_CHECK(type != NULL,;); crm_xml_add(update_msg, F_TYPE, T_STONITH_NOTIFY); crm_xml_add(update_msg, F_SUBTYPE, type); crm_xml_add(update_msg, F_STONITH_OPERATION, type); crm_xml_add_int(update_msg, F_STONITH_RC, result); if (data != NULL) { add_message_xml(update_msg, F_STONITH_CALLDATA, data); } crm_trace("Notifying clients"); g_hash_table_foreach(client_connections, stonith_notify_client, update_msg); free_xml(update_msg); crm_trace("Notify complete"); } static void do_stonith_notify_config(int options, const char *op, int rc, const char *desc, int active) { xmlNode *notify_data = create_xml_node(NULL, op); CRM_CHECK(notify_data != NULL, return); crm_xml_add(notify_data, F_STONITH_DEVICE, desc); crm_xml_add_int(notify_data, F_STONITH_ACTIVE, active); do_stonith_notify(options, op, rc, notify_data); free_xml(notify_data); } void do_stonith_notify_device(int options, const char *op, int rc, const char *desc) { do_stonith_notify_config(options, op, rc, desc, g_hash_table_size(device_list)); } void do_stonith_notify_level(int options, const char *op, int rc, const char *desc) { do_stonith_notify_config(options, op, rc, desc, g_hash_table_size(topology)); } static void topology_remove_helper(const char *node, int level) { int rc; char *desc = NULL; xmlNode *data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL); crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__); crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level); crm_xml_add(data, XML_ATTR_STONITH_TARGET, node); rc = stonith_level_remove(data, &desc); do_stonith_notify_level(0, STONITH_OP_LEVEL_DEL, rc, desc); free_xml(data); free(desc); } static void remove_cib_device(xmlXPathObjectPtr xpathObj) { int max = numXpathResults(xpathObj), lpc = 0; for (lpc = 0; lpc < max; lpc++) { const char *rsc_id = NULL; const char *standard = NULL; xmlNode *match = getXpathResult(xpathObj, lpc); CRM_LOG_ASSERT(match != NULL); if(match != NULL) { standard = crm_element_value(match, XML_AGENT_ATTR_CLASS); } - if (safe_str_neq(standard, "stonith")) { + if (safe_str_neq(standard, PCMK_RESOURCE_CLASS_STONITH)) { continue; } rsc_id = crm_element_value(match, XML_ATTR_ID); stonith_device_remove(rsc_id, TRUE); } } static void handle_topology_change(xmlNode *match, bool remove) { int rc; char *desc = NULL; CRM_CHECK(match != NULL, return); crm_trace("Updating %s", ID(match)); if(remove) { int index = 0; char *key = stonith_level_key(match, -1); crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index); topology_remove_helper(key, index); free(key); } rc = stonith_level_register(match, &desc); do_stonith_notify_level(0, STONITH_OP_LEVEL_ADD, rc, desc); free(desc); } static void remove_fencing_topology(xmlXPathObjectPtr xpathObj) { int max = numXpathResults(xpathObj), lpc = 0; for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); CRM_LOG_ASSERT(match != NULL); if (match && crm_element_value(match, XML_DIFF_MARKER)) { /* Deletion */ int index = 0; char *target = stonith_level_key(match, -1); crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index); if (target == NULL) { crm_err("Invalid fencing target in element %s", ID(match)); } else if (index <= 0) { crm_err("Invalid level for %s in element %s", target, ID(match)); } else { topology_remove_helper(target, index); } /* } else { Deal with modifications during the 'addition' stage */ } } } static void register_fencing_topology(xmlXPathObjectPtr xpathObj) { int max = numXpathResults(xpathObj), lpc = 0; for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); handle_topology_change(match, TRUE); } } /* Fencing */ static void fencing_topology_init() { xmlXPathObjectPtr xpathObj = NULL; const char *xpath = "//" XML_TAG_FENCING_LEVEL; crm_trace("Full topology refresh"); if(topology) { g_hash_table_destroy(topology); topology = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_topology_entry); } /* Grab everything */ xpathObj = xpath_search(local_cib, xpath); register_fencing_topology(xpathObj); freeXpathObject(xpathObj); } #define rsc_name(x) x->clone_name?x->clone_name:x->id /*! * \internal * \brief Check whether our uname is in a resource's allowed node list * * \param[in] rsc Resource to check * * \return Pointer to node object if found, NULL otherwise */ static node_t * our_node_allowed_for(resource_t *rsc) { GHashTableIter iter; node_t *node = NULL; if (rsc && stonith_our_uname) { g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if (node && strcmp(node->details->uname, stonith_our_uname) == 0) { break; } node = NULL; } } return node; } /*! * \internal * \brief If a resource or any of its children are STONITH devices, update their * definitions given a cluster working set. * * \param[in] rsc Resource to check * \param[in] data_set Cluster working set with device information */ static void cib_device_update(resource_t *rsc, pe_working_set_t *data_set) { node_t *node = NULL; const char *value = NULL; const char *rclass = NULL; node_t *parent = NULL; gboolean remove = TRUE; /* If this is a complex resource, check children rather than this resource itself. * TODO: Mark each installed device and remove if untouched when this process finishes. */ if(rsc->children) { GListPtr gIter = NULL; for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { cib_device_update(gIter->data, data_set); if(rsc->variant == pe_clone || rsc->variant == pe_master) { crm_trace("Only processing one copy of the clone %s", rsc->id); break; } } return; } /* We only care about STONITH resources. */ rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); - if(safe_str_neq(rclass, "stonith")) { + if (safe_str_neq(rclass, PCMK_RESOURCE_CLASS_STONITH)) { return; } /* If this STONITH resource is disabled, just remove it. */ value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); if (safe_str_eq(value, RSC_STOPPED)) { crm_info("Device %s has been disabled", rsc->id); goto update_done; } /* Check whether our node is allowed for this resource (and its parent if in a group) */ node = our_node_allowed_for(rsc); if (rsc->parent && (rsc->parent->variant == pe_group)) { parent = our_node_allowed_for(rsc->parent); } if(node == NULL) { /* Our node is disallowed, so remove the device */ GHashTableIter iter; crm_info("Device %s has been disabled on %s: unknown", rsc->id, stonith_our_uname); g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { crm_trace("Available: %s = %d", node->details->uname, node->weight); } goto update_done; } else if(node->weight < 0 || (parent && parent->weight < 0)) { /* Our node (or its group) is disallowed by score, so remove the device */ char *score = score2char((node->weight < 0) ? node->weight : parent->weight); crm_info("Device %s has been disabled on %s: score=%s", rsc->id, stonith_our_uname, score); free(score); goto update_done; } else { /* Our node is allowed, so update the device information */ xmlNode *data; GHashTableIter gIter; stonith_key_value_t *params = NULL; const char *name = NULL; const char *agent = crm_element_value(rsc->xml, XML_EXPR_ATTR_TYPE); const char *provider = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); const char *rsc_provides = NULL; crm_debug("Device %s is allowed on %s: score=%d", rsc->id, stonith_our_uname, node->weight); get_rsc_attributes(rsc->parameters, rsc, node, data_set); get_meta_attributes(rsc->meta, rsc, node, data_set); rsc_provides = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_PROVIDES); g_hash_table_iter_init(&gIter, rsc->parameters); while (g_hash_table_iter_next(&gIter, (gpointer *) & name, (gpointer *) & value)) { if (!name || !value) { continue; } params = stonith_key_value_add(params, name, value); crm_trace(" %s=%s", name, value); } remove = FALSE; data = create_device_registration_xml(rsc_name(rsc), provider, agent, params, rsc_provides); stonith_device_register(data, NULL, TRUE); stonith_key_value_freeall(params, 1, 1); free_xml(data); } update_done: if(remove && g_hash_table_lookup(device_list, rsc_name(rsc))) { stonith_device_remove(rsc_name(rsc), TRUE); } } extern xmlNode *do_calculations(pe_working_set_t * data_set, xmlNode * xml_input, crm_time_t * now); extern node_t *create_node(const char *id, const char *uname, const char *type, const char *score, pe_working_set_t * data_set); /*! * \internal * \brief Update all STONITH device definitions based on current CIB */ static void cib_devices_update(void) { GListPtr gIter = NULL; pe_working_set_t data_set; crm_info("Updating devices to version %s.%s.%s", crm_element_value(local_cib, XML_ATTR_GENERATION_ADMIN), crm_element_value(local_cib, XML_ATTR_GENERATION), crm_element_value(local_cib, XML_ATTR_NUMUPDATES)); set_working_set_defaults(&data_set); data_set.input = local_cib; data_set.now = crm_time_new(NULL); data_set.flags |= pe_flag_quick_location; data_set.localhost = stonith_our_uname; cluster_status(&data_set); do_calculations(&data_set, NULL, NULL); for (gIter = data_set.resources; gIter != NULL; gIter = gIter->next) { cib_device_update(gIter->data, &data_set); } data_set.input = NULL; /* Wasn't a copy */ cleanup_alloc_calculations(&data_set); } static void update_cib_stonith_devices_v2(const char *event, xmlNode * msg) { xmlNode *change = NULL; char *reason = NULL; bool needs_update = FALSE; xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); for (change = __xml_first_child(patchset); change != NULL; change = __xml_next(change)) { const char *op = crm_element_value(change, XML_DIFF_OP); const char *xpath = crm_element_value(change, XML_DIFF_PATH); const char *shortpath = NULL; if(op == NULL || strcmp(op, "move") == 0) { continue; } else if(safe_str_eq(op, "delete") && strstr(xpath, XML_CIB_TAG_RESOURCE)) { const char *rsc_id = NULL; char *search = NULL; char *mutable = NULL; if (strstr(xpath, XML_TAG_ATTR_SETS)) { needs_update = TRUE; break; } mutable = strdup(xpath); rsc_id = strstr(mutable, "primitive[@id=\'"); if (rsc_id != NULL) { rsc_id += strlen("primitive[@id=\'"); search = strchr(rsc_id, '\''); } if (search != NULL) { *search = 0; stonith_device_remove(rsc_id, TRUE); } else { crm_warn("Ignoring malformed CIB update (resource deletion)"); } free(mutable); } else if(strstr(xpath, "/"XML_CIB_TAG_RESOURCES)) { shortpath = strrchr(xpath, '/'); CRM_ASSERT(shortpath); reason = crm_strdup_printf("%s %s", op, shortpath+1); needs_update = TRUE; break; } else if(strstr(xpath, XML_CONS_TAG_RSC_LOCATION)) { shortpath = strrchr(xpath, '/'); CRM_ASSERT(shortpath); reason = crm_strdup_printf("%s %s", op, shortpath+1); needs_update = TRUE; break; } } if(needs_update) { crm_info("Updating device list from the cib: %s", reason); cib_devices_update(); } free(reason); } static void update_cib_stonith_devices_v1(const char *event, xmlNode * msg) { const char *reason = "none"; gboolean needs_update = FALSE; xmlXPathObjectPtr xpath_obj = NULL; /* process new constraints */ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_CONS_TAG_RSC_LOCATION); if (numXpathResults(xpath_obj) > 0) { int max = numXpathResults(xpath_obj), lpc = 0; /* Safest and simplest to always recompute */ needs_update = TRUE; reason = "new location constraint"; for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpath_obj, lpc); crm_log_xml_trace(match, "new constraint"); } } freeXpathObject(xpath_obj); /* process deletions */ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_RESOURCE); if (numXpathResults(xpath_obj) > 0) { remove_cib_device(xpath_obj); } freeXpathObject(xpath_obj); /* process additions */ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_RESOURCE); if (numXpathResults(xpath_obj) > 0) { int max = numXpathResults(xpath_obj), lpc = 0; for (lpc = 0; lpc < max; lpc++) { const char *rsc_id = NULL; const char *standard = NULL; xmlNode *match = getXpathResult(xpath_obj, lpc); rsc_id = crm_element_value(match, XML_ATTR_ID); standard = crm_element_value(match, XML_AGENT_ATTR_CLASS); - if (safe_str_neq(standard, "stonith")) { + if (safe_str_neq(standard, PCMK_RESOURCE_CLASS_STONITH)) { continue; } crm_trace("Fencing resource %s was added or modified", rsc_id); reason = "new resource"; needs_update = TRUE; } } freeXpathObject(xpath_obj); if(needs_update) { crm_info("Updating device list from the cib: %s", reason); cib_devices_update(); } } static void update_cib_stonith_devices(const char *event, xmlNode * msg) { int format = 1; xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); CRM_ASSERT(patchset); crm_element_value_int(patchset, "format", &format); switch(format) { case 1: update_cib_stonith_devices_v1(event, msg); break; case 2: update_cib_stonith_devices_v2(event, msg); break; default: crm_warn("Unknown patch format: %d", format); } } /* Needs to hold node name + attribute name + attribute value + 75 */ #define XPATH_MAX 512 /*! * \internal * \brief Check whether a node has a specific attribute name/value * * \param[in] node Name of node to check * \param[in] name Name of an attribute to look for * \param[in] value The value the named attribute needs to be set to in order to be considered a match * * \return TRUE if the locally cached CIB has the specified node attribute */ gboolean node_has_attr(const char *node, const char *name, const char *value) { char xpath[XPATH_MAX]; xmlNode *match; int n; CRM_CHECK(local_cib != NULL, return FALSE); /* Search for the node's attributes in the CIB. While the schema allows * multiple sets of instance attributes, and allows instance attributes to * use id-ref to reference values elsewhere, that is intended for resources, * so we ignore that here. */ n = snprintf(xpath, XPATH_MAX, "//" XML_CIB_TAG_NODES "/" XML_CIB_TAG_NODE "[@uname='%s']/" XML_TAG_ATTR_SETS "/" XML_CIB_TAG_NVPAIR "[@name='%s' and @value='%s']", node, name, value); match = get_xpath_object(xpath, local_cib, LOG_TRACE); CRM_CHECK(n < XPATH_MAX, return FALSE); return (match != NULL); } static void update_fencing_topology(const char *event, xmlNode * msg) { int format = 1; const char *xpath; xmlXPathObjectPtr xpathObj = NULL; xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); CRM_ASSERT(patchset); crm_element_value_int(patchset, "format", &format); if(format == 1) { /* Process deletions (only) */ xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_TAG_FENCING_LEVEL; xpathObj = xpath_search(msg, xpath); remove_fencing_topology(xpathObj); freeXpathObject(xpathObj); /* Process additions and changes */ xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_TAG_FENCING_LEVEL; xpathObj = xpath_search(msg, xpath); register_fencing_topology(xpathObj); freeXpathObject(xpathObj); } else if(format == 2) { xmlNode *change = NULL; int add[] = { 0, 0, 0 }; int del[] = { 0, 0, 0 }; xml_patch_versions(patchset, add, del); for (change = __xml_first_child(patchset); change != NULL; change = __xml_next(change)) { const char *op = crm_element_value(change, XML_DIFF_OP); const char *xpath = crm_element_value(change, XML_DIFF_PATH); if(op == NULL) { continue; } else if(strstr(xpath, "/" XML_TAG_FENCING_LEVEL) != NULL) { /* Change to a specific entry */ crm_trace("Handling %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); if(strcmp(op, "move") == 0) { continue; } else if(strcmp(op, "create") == 0) { handle_topology_change(change->children, FALSE); } else if(strcmp(op, "modify") == 0) { xmlNode *match = first_named_child(change, XML_DIFF_RESULT); if(match) { handle_topology_change(match->children, TRUE); } } else if(strcmp(op, "delete") == 0) { /* Nuclear option, all we have is the path and an id... not enough to remove a specific entry */ crm_info("Re-initializing fencing topology after %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); fencing_topology_init(); return; } } else if (strstr(xpath, "/" XML_TAG_FENCING_TOPOLOGY) != NULL) { /* Change to the topology in general */ crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); fencing_topology_init(); return; } else if (strstr(xpath, "/" XML_CIB_TAG_CONFIGURATION)) { /* Changes to the whole config section, possibly including the topology as a whild */ if(first_named_child(change, XML_TAG_FENCING_TOPOLOGY) == NULL) { crm_trace("Nothing for us in %s operation %d.%d.%d for %s.", op, add[0], add[1], add[2], xpath); } else if(strcmp(op, "delete") == 0 || strcmp(op, "create") == 0) { crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s.", op, add[0], add[1], add[2], xpath); fencing_topology_init(); return; } } else { crm_trace("Nothing for us in %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); } } } else { crm_warn("Unknown patch format: %d", format); } } static bool have_cib_devices = FALSE; static void update_cib_cache_cb(const char *event, xmlNode * msg) { int rc = pcmk_ok; xmlNode *stonith_enabled_xml = NULL; xmlNode *stonith_watchdog_xml = NULL; const char *stonith_enabled_s = NULL; static gboolean stonith_enabled_saved = TRUE; if(!have_cib_devices) { crm_trace("Skipping updates until we get a full dump"); return; } else if(msg == NULL) { crm_trace("Missing %s update", event); return; } /* Maintain a local copy of the CIB so that we have full access * to device definitions, location constraints, and node attributes */ if (local_cib != NULL) { int rc = pcmk_ok; xmlNode *patchset = NULL; crm_element_value_int(msg, F_CIB_RC, &rc); if (rc != pcmk_ok) { return; } patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); xml_log_patchset(LOG_TRACE, "Config update", patchset); rc = xml_apply_patchset(local_cib, patchset, TRUE); switch (rc) { case pcmk_ok: case -pcmk_err_old_data: break; case -pcmk_err_diff_resync: case -pcmk_err_diff_failed: crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc); free_xml(local_cib); local_cib = NULL; break; default: crm_warn("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc); free_xml(local_cib); local_cib = NULL; } } if (local_cib == NULL) { crm_trace("Re-requesting the full cib"); rc = cib_api->cmds->query(cib_api, NULL, &local_cib, cib_scope_local | cib_sync_call); if(rc != pcmk_ok) { crm_err("Couldn't retrieve the CIB: %s (%d)", pcmk_strerror(rc), rc); return; } CRM_ASSERT(local_cib != NULL); stonith_enabled_saved = FALSE; /* Trigger a full refresh below */ } stonith_enabled_xml = get_xpath_object("//nvpair[@name='stonith-enabled']", local_cib, LOG_TRACE); if (stonith_enabled_xml) { stonith_enabled_s = crm_element_value(stonith_enabled_xml, XML_NVPAIR_ATTR_VALUE); } if (stonith_enabled_s == NULL || crm_is_true(stonith_enabled_s)) { long timeout_ms = 0; const char *value = NULL; stonith_watchdog_xml = get_xpath_object("//nvpair[@name='stonith-watchdog-timeout']", local_cib, LOG_TRACE); if (stonith_watchdog_xml) { value = crm_element_value(stonith_watchdog_xml, XML_NVPAIR_ATTR_VALUE); } if(value) { timeout_ms = crm_get_msec(value); } if(timeout_ms != stonith_watchdog_timeout_ms) { crm_notice("New watchdog timeout %lds (was %lds)", timeout_ms/1000, stonith_watchdog_timeout_ms/1000); stonith_watchdog_timeout_ms = timeout_ms; } } else { stonith_watchdog_timeout_ms = 0; } if (stonith_enabled_s && crm_is_true(stonith_enabled_s) == FALSE) { crm_trace("Ignoring cib updates while stonith is disabled"); stonith_enabled_saved = FALSE; return; } else if (stonith_enabled_saved == FALSE) { crm_info("Updating stonith device and topology lists now that stonith is enabled"); stonith_enabled_saved = TRUE; fencing_topology_init(); cib_devices_update(); } else { update_fencing_topology(event, msg); update_cib_stonith_devices(event, msg); } } static void init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { crm_info("Updating device list from the cib: init"); have_cib_devices = TRUE; local_cib = copy_xml(output); fencing_topology_init(); cib_devices_update(); } static void stonith_shutdown(int nsig) { stonith_shutdown_flag = TRUE; crm_info("Terminating with %d clients", crm_hash_table_size(client_connections)); if (mainloop != NULL && g_main_is_running(mainloop)) { g_main_quit(mainloop); } else { stonith_cleanup(); crm_exit(pcmk_ok); } } static void cib_connection_destroy(gpointer user_data) { if (stonith_shutdown_flag) { crm_info("Connection to the CIB closed."); return; } else { crm_notice("Connection to the CIB terminated. Shutting down."); } if (cib_api) { cib_api->cmds->signoff(cib_api); } stonith_shutdown(0); } static void stonith_cleanup(void) { if (cib_api) { cib_api->cmds->signoff(cib_api); } if (ipcs) { qb_ipcs_destroy(ipcs); } g_hash_table_destroy(known_peer_names); known_peer_names = NULL; crm_peer_destroy(); crm_client_cleanup(); free(stonith_our_uname); free_xml(local_cib); } /* *INDENT-OFF* */ static struct crm_option long_options[] = { {"stand-alone", 0, 0, 's'}, {"stand-alone-w-cpg", 0, 0, 'c'}, {"logfile", 1, 0, 'l'}, {"verbose", 0, 0, 'V'}, {"version", 0, 0, '$'}, {"help", 0, 0, '?'}, {0, 0, 0, 0} }; /* *INDENT-ON* */ static void setup_cib(void) { int rc, retries = 0; static cib_t *(*cib_new_fn) (void) = NULL; if (cib_new_fn == NULL) { cib_new_fn = find_library_function(&cib_library, CIB_LIBRARY, "cib_new", TRUE); } if (cib_new_fn != NULL) { cib_api = (*cib_new_fn) (); } if (cib_api == NULL) { crm_err("No connection to the CIB"); return; } do { sleep(retries); rc = cib_api->cmds->signon(cib_api, CRM_SYSTEM_STONITHD, cib_command); } while (rc == -ENOTCONN && ++retries < 5); if (rc != pcmk_ok) { crm_err("Could not connect to the CIB service: %s (%d)", pcmk_strerror(rc), rc); } else if (pcmk_ok != cib_api->cmds->add_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, update_cib_cache_cb)) { crm_err("Could not set CIB notification callback"); } else { rc = cib_api->cmds->query(cib_api, NULL, NULL, cib_scope_local); cib_api->cmds->register_callback(cib_api, rc, 120, FALSE, NULL, "init_cib_cache_cb", init_cib_cache_cb); cib_api->cmds->set_connection_dnotify(cib_api, cib_connection_destroy); crm_info("Watching for stonith topology changes"); } } struct qb_ipcs_service_handlers ipc_callbacks = { .connection_accept = st_ipc_accept, .connection_created = st_ipc_created, .msg_process = st_ipc_dispatch, .connection_closed = st_ipc_closed, .connection_destroyed = st_ipc_destroy }; /*! * \internal * \brief Callback for peer status changes * * \param[in] type What changed * \param[in] node What peer had the change * \param[in] data Previous value of what changed */ static void st_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data) { if ((type != crm_status_processes) && !is_set(node->flags, crm_remote_node)) { xmlNode *query = NULL; if (node->id && node->uname) { g_hash_table_insert(known_peer_names, GUINT_TO_POINTER(node->id), strdup(node->uname)); } /* * This is a hack until we can send to a nodeid and/or we fix node name lookups * These messages are ignored in stonith_peer_callback() */ query = create_xml_node(NULL, "stonith_command"); crm_xml_add(query, F_XML_TAGNAME, "stonith_command"); crm_xml_add(query, F_TYPE, T_STONITH_NG); crm_xml_add(query, F_STONITH_OPERATION, "poke"); crm_debug("Broadcasting our uname because of node %u", node->id); send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE); free_xml(query); } } int main(int argc, char **argv) { int flag; int rc = 0; int lpc = 0; int argerr = 0; int option_index = 0; crm_cluster_t cluster; const char *actions[] = { "reboot", "off", "list", "monitor", "status" }; crm_log_preinit("stonith-ng", argc, argv); crm_set_options(NULL, "mode [options]", long_options, "Provides a summary of cluster's current state." "\n\nOutputs varying levels of detail in a number of different formats.\n"); while (1) { flag = crm_get_option(argc, argv, &option_index); if (flag == -1) { break; } switch (flag) { case 'V': crm_bump_log_level(argc, argv); break; case 'l': crm_add_logfile(optarg); break; case 's': stand_alone = TRUE; break; case 'c': stand_alone = FALSE; no_cib_connect = TRUE; break; case '$': case '?': crm_help(flag, EX_OK); break; default: ++argerr; break; } } if (argc - optind == 1 && safe_str_eq("metadata", argv[optind])) { printf("\n"); printf("\n"); printf(" 1.0\n"); printf (" This is a fake resource that details the instance attributes handled by stonithd.\n"); printf(" Options available for all stonith resources\n"); printf(" \n"); printf(" \n"); printf (" The priority of the stonith resource. Devices are tried in order of highest priority to lowest.\n"); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_HOSTARG); printf (" Advanced use only: An alternate parameter to supply instead of 'port'\n"); printf (" Some devices do not support the standard 'port' parameter or may provide additional ones.\n" "Use this to specify an alternate, device-specific, parameter that should indicate the machine to be fenced.\n" "A value of 'none' can be used to tell the cluster not to supply any additional parameters.\n" " \n"); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_HOSTMAP); printf (" A mapping of host names to ports numbers for devices that do not support host names.\n"); printf (" Eg. node1:1;node2:2,3 would tell the cluster to use port 1 for node1 and ports 2 and 3 for node2\n"); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_HOSTLIST); printf (" A list of machines controlled by this device (Optional unless %s=static-list).\n", STONITH_ATTR_HOSTCHECK); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_HOSTCHECK); printf (" How to determine which machines are controlled by the device.\n"); printf (" Allowed values: dynamic-list (query the device), static-list (check the %s attribute), none (assume every device can fence every machine)\n", STONITH_ATTR_HOSTLIST); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_DELAY_MAX); printf (" Enable random delay for stonith actions and specify the maximum of random delay\n"); printf (" This prevents double fencing when using slow devices such as sbd.\n" "Use this to enable random delay for stonith actions and specify the maximum of random delay.\n"); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_ACTION_LIMIT); printf (" The maximum number of actions can be performed in parallel on this device\n"); printf (" Pengine property concurrent-fencing=true needs to be configured first.\n" "Then use this to specify the maximum number of actions can be performed in parallel on this device. -1 is unlimited.\n"); printf(" \n"); printf(" \n"); for (lpc = 0; lpc < DIMOF(actions); lpc++) { printf(" \n", actions[lpc]); printf (" Advanced use only: An alternate command to run instead of '%s'\n", actions[lpc]); printf (" Some devices do not support the standard commands or may provide additional ones.\n" "Use this to specify an alternate, device-specific, command that implements the '%s' action.\n", actions[lpc]); printf(" \n", actions[lpc]); printf(" \n"); printf(" \n", actions[lpc]); printf (" Advanced use only: Specify an alternate timeout to use for %s actions instead of stonith-timeout\n", actions[lpc]); printf (" Some devices need much more/less time to complete than normal.\n" "Use this to specify an alternate, device-specific, timeout for '%s' actions.\n", actions[lpc]); printf(" \n"); printf(" \n"); printf(" \n", actions[lpc]); printf (" Advanced use only: The maximum number of times to retry the '%s' command within the timeout period\n", actions[lpc]); printf(" Some devices do not support multiple connections." " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining." " Use this option to alter the number of times Pacemaker retries '%s' actions before giving up." "\n", actions[lpc]); printf(" \n"); printf(" \n"); } printf(" \n"); printf("\n"); return 0; } if (optind != argc) { ++argerr; } if (argerr) { crm_help('?', EX_USAGE); } crm_log_init("stonith-ng", LOG_INFO, TRUE, FALSE, argc, argv, FALSE); mainloop_add_signal(SIGTERM, stonith_shutdown); crm_peer_init(); known_peer_names = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, free); if (stand_alone == FALSE) { #if SUPPORT_HEARTBEAT cluster.hb_conn = NULL; cluster.hb_dispatch = stonith_peer_hb_callback; cluster.destroy = stonith_peer_hb_destroy; #endif if (is_openais_cluster()) { #if SUPPORT_COROSYNC cluster.destroy = stonith_peer_cs_destroy; cluster.cpg.cpg_deliver_fn = stonith_peer_ais_callback; cluster.cpg.cpg_confchg_fn = pcmk_cpg_membership; #endif } crm_set_status_callback(&st_peer_update_callback); if (crm_cluster_connect(&cluster) == FALSE) { crm_crit("Cannot sign in to the cluster... terminating"); crm_exit(DAEMON_RESPAWN_STOP); } stonith_our_uname = cluster.uname; stonith_our_uuid = cluster.uuid; #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { /* crm_cluster_connect() registered us for crm_system_name, which * usually is the only F_TYPE used by the respective sub system. * Stonith needs to register two additional F_TYPE callbacks, * because it can :-/ */ if (HA_OK != cluster.hb_conn->llc_ops->set_msg_callback(cluster.hb_conn, T_STONITH_NOTIFY, cluster.hb_dispatch, cluster.hb_conn)) { crm_crit("Cannot set msg callback %s: %s", T_STONITH_NOTIFY, cluster.hb_conn->llc_ops->errmsg(cluster.hb_conn)); crm_exit(DAEMON_RESPAWN_STOP); } if (HA_OK != cluster.hb_conn->llc_ops->set_msg_callback(cluster.hb_conn, T_STONITH_TIMEOUT_VALUE, cluster.hb_dispatch, cluster.hb_conn)) { crm_crit("Cannot set msg callback %s: %s", T_STONITH_TIMEOUT_VALUE, cluster.hb_conn->llc_ops->errmsg(cluster.hb_conn)); crm_exit(DAEMON_RESPAWN_STOP); } } #endif if (no_cib_connect == FALSE) { setup_cib(); } } else { stonith_our_uname = strdup("localhost"); } device_list = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_device); topology = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_topology_entry); if(stonith_watchdog_timeout_ms > 0) { xmlNode *xml; stonith_key_value_t *params = NULL; params = stonith_key_value_add(params, STONITH_ATTR_HOSTLIST, stonith_our_uname); xml = create_device_registration_xml("watchdog", "internal", STONITH_WATCHDOG_AGENT, params, NULL); stonith_device_register(xml, NULL, FALSE); stonith_key_value_freeall(params, 1, 1); free_xml(xml); } stonith_ipc_server_init(&ipcs, &ipc_callbacks); #if SUPPORT_STONITH_CONFIG if (((stand_alone == TRUE)) && !(standalone_cfg_read_file(STONITH_NG_CONF_FILE))) { standalone_cfg_commit(); } #endif /* Create the mainloop and run it... */ mainloop = g_main_new(FALSE); crm_info("Starting %s mainloop", crm_system_name); g_main_run(mainloop); stonith_cleanup(); #if SUPPORT_HEARTBEAT if (cluster.hb_conn) { cluster.hb_conn->llc_ops->delete(cluster.hb_conn); } #endif crm_info("Done"); return crm_exit(rc); } diff --git a/include/crm/services.h b/include/crm/services.h index 05c8ebfbf4..e1fae9b98a 100644 --- a/include/crm/services.h +++ b/include/crm/services.h @@ -1,407 +1,417 @@ /* * Copyright (C) 2010 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /** * \file * \brief Services API * \ingroup core */ #ifndef __PCMK_SERVICES__ # define __PCMK_SERVICES__ # ifdef __cplusplus extern "C" { # endif # include # include # include # include # ifndef OCF_ROOT_DIR # define OCF_ROOT_DIR "/usr/lib/ocf" # endif # ifndef LSB_ROOT_DIR # define LSB_ROOT_DIR "/etc/init.d" # endif /* TODO: Autodetect these two ?*/ # ifndef SYSTEMCTL # define SYSTEMCTL "/bin/systemctl" # endif +/* Deprecated and unused by Pacemaker, kept for API backward compatibility */ # ifndef SERVICE_SCRIPT # define SERVICE_SCRIPT "/sbin/service" # endif +/* Known resource classes */ +#define PCMK_RESOURCE_CLASS_OCF "ocf" +#define PCMK_RESOURCE_CLASS_SERVICE "service" +#define PCMK_RESOURCE_CLASS_LSB "lsb" +#define PCMK_RESOURCE_CLASS_SYSTEMD "systemd" +#define PCMK_RESOURCE_CLASS_UPSTART "upstart" +#define PCMK_RESOURCE_CLASS_HB "heartbeat" +#define PCMK_RESOURCE_CLASS_NAGIOS "nagios" +#define PCMK_RESOURCE_CLASS_STONITH "stonith" /* This is the string passed in the OCF_EXIT_REASON_PREFIX * environment variable. The stderr output that occurs * after this prefix is encountered is considered the exit * reason for a completed operationt */ #define PCMK_OCF_REASON_PREFIX "ocf-exit-reason:" /* *INDENT-OFF* */ enum lsb_exitcode { PCMK_LSB_OK = 0, PCMK_LSB_UNKNOWN_ERROR = 1, PCMK_LSB_INVALID_PARAM = 2, PCMK_LSB_UNIMPLEMENT_FEATURE = 3, PCMK_LSB_INSUFFICIENT_PRIV = 4, PCMK_LSB_NOT_INSTALLED = 5, PCMK_LSB_NOT_CONFIGURED = 6, PCMK_LSB_NOT_RUNNING = 7, }; /* The return codes for the status operation are not the same for other * operatios - go figure */ enum lsb_status_exitcode { PCMK_LSB_STATUS_OK = 0, PCMK_LSB_STATUS_VAR_PID = 1, PCMK_LSB_STATUS_VAR_LOCK = 2, PCMK_LSB_STATUS_NOT_RUNNING = 3, PCMK_LSB_STATUS_UNKNOWN = 4, /* custom codes should be in the 150-199 range reserved for application use */ PCMK_LSB_STATUS_NOT_INSTALLED = 150, PCMK_LSB_STATUS_INSUFFICIENT_PRIV = 151, }; /* Uniform exit codes * Everything is mapped to its OCF equivalent so that Pacemaker only deals with one set of codes */ enum ocf_exitcode { PCMK_OCF_OK = 0, PCMK_OCF_UNKNOWN_ERROR = 1, PCMK_OCF_INVALID_PARAM = 2, PCMK_OCF_UNIMPLEMENT_FEATURE = 3, PCMK_OCF_INSUFFICIENT_PRIV = 4, PCMK_OCF_NOT_INSTALLED = 5, PCMK_OCF_NOT_CONFIGURED = 6, PCMK_OCF_NOT_RUNNING = 7, /* End of overlap with LSB */ PCMK_OCF_RUNNING_MASTER = 8, PCMK_OCF_FAILED_MASTER = 9, /* 150-199 reserved for application use */ PCMK_OCF_CONNECTION_DIED = 189, /* Operation failure implied by disconnection of the LRM API to a local or remote node */ PCMK_OCF_DEGRADED = 190, /* Active resource that is no longer 100% functional */ PCMK_OCF_DEGRADED_MASTER = 191, /* Promoted resource that is no longer 100% functional */ PCMK_OCF_EXEC_ERROR = 192, /* Generic problem invoking the agent */ PCMK_OCF_UNKNOWN = 193, /* State of the service is unknown - used for recording in-flight operations */ PCMK_OCF_SIGNAL = 194, PCMK_OCF_NOT_SUPPORTED = 195, PCMK_OCF_PENDING = 196, PCMK_OCF_CANCELLED = 197, PCMK_OCF_TIMEOUT = 198, PCMK_OCF_OTHER_ERROR = 199, /* Keep the same codes as PCMK_LSB */ }; enum op_status { PCMK_LRM_OP_PENDING = -1, PCMK_LRM_OP_DONE, PCMK_LRM_OP_CANCELLED, PCMK_LRM_OP_TIMEOUT, PCMK_LRM_OP_NOTSUPPORTED, PCMK_LRM_OP_ERROR, PCMK_LRM_OP_ERROR_HARD, PCMK_LRM_OP_ERROR_FATAL, PCMK_LRM_OP_NOT_INSTALLED, }; enum nagios_exitcode { NAGIOS_STATE_OK = 0, NAGIOS_STATE_WARNING = 1, NAGIOS_STATE_CRITICAL = 2, NAGIOS_STATE_UNKNOWN = 3, NAGIOS_STATE_DEPENDENT = 4, NAGIOS_INSUFFICIENT_PRIV = 100, NAGIOS_NOT_INSTALLED = 101, }; enum svc_action_flags { /* On timeout, only kill pid, do not kill entire pid group */ SVC_ACTION_LEAVE_GROUP = 0x01, }; /* *INDENT-ON* */ typedef struct svc_action_private_s svc_action_private_t; typedef struct svc_action_s { char *id; char *rsc; char *action; int interval; char *standard; char *provider; char *agent; int timeout; GHashTable *params; int rc; int pid; int cancel; int status; int sequence; int expected_rc; int synchronous; enum svc_action_flags flags; char *stderr_data; char *stdout_data; /** * Data stored by the creator of the action. * * This may be used to hold data that is needed later on by a callback, * for example. */ void *cb_data; svc_action_private_t *opaque; } svc_action_t; /** * \brief Get a list of files or directories in a given path * * \param[in] root full path to a directory to read * \param[in] files return list of files if TRUE or directories if FALSE * \param[in] executable if TRUE and files is TRUE, only return executable files * * \return a list of what was found. The list items are char *. * \note It is the caller's responsibility to free the result with g_list_free_full(list, free). */ GList *get_directory_list(const char *root, gboolean files, gboolean executable); /** * Get a list of services * * \return a list of services. The list items are gchar *. This list _must_ * be destroyed using g_list_free_full(list, free). */ GList *services_list(void); /** * \brief Get a list of providers * * \param[in] standard list providers of this standard (e.g. ocf, lsb, etc.) * * \return a list of providers as char * list items (or NULL if standard does not support providers) * \note The caller is responsible for freeing the result using g_list_free_full(list, free). */ GList *resources_list_providers(const char *standard); /** * \brief Get a list of resource agents * * \param[in] standard list agents using this standard (e.g. ocf, lsb, etc.) (or NULL for all) * \param[in] provider list agents from this provider (or NULL for all) * * \return a list of resource agents. The list items are char *. * \note The caller is responsible for freeing the result using g_list_free_full(list, free). */ GList *resources_list_agents(const char *standard, const char *provider); /** * Get list of available standards * * \return a list of resource standards. The list items are char *. This list _must_ * be destroyed using g_list_free_full(list, free). */ GList *resources_list_standards(void); svc_action_t *services_action_create(const char *name, const char *action, int interval /* ms */ , int timeout /* ms */ ); /** * \brief Create a new resource action * * \param[in] name name of resource * \param[in] standard resource agent standard (ocf, lsb, etc.) * \param[in] provider resource agent provider * \param[in] agent resource agent name * \param[in] action action (start, stop, monitor, etc.) * \param[in] interval how often to repeat this action, in milliseconds (if 0, execute only once) * \param[in] timeout consider action failed if it does not complete in this many milliseconds * \param[in] params action parameters * * \return newly allocated action instance * * \post After the call, 'params' is owned, and later free'd by the svc_action_t result * \note The caller is responsible for freeing the return value using * services_action_free(). */ svc_action_t *resources_action_create(const char *name, const char *standard, const char *provider, const char *agent, const char *action, int interval /* ms */ , int timeout /* ms */ , GHashTable * params, enum svc_action_flags flags); /** * Kick a recurring action so it is scheduled immediately for re-execution */ gboolean services_action_kick(const char *name, const char *action, int interval /* ms */); /** * Find the first class that can provide service::${agent} * * \param[in] agent which agent to search for * \return NULL, or the first class that provides the named agent */ const char *resources_find_service_class(const char *agent); /** * Utilize services API to execute an arbitrary command. * * This API has useful infrastructure in place to be able to run a command * in the background and get notified via a callback when the command finishes. * * \param[in] exec command to execute * \param[in] args arguments to the command, NULL terminated * * \return a svc_action_t object, used to pass to the execute function * (services_action_sync() or services_action_async()) and is * provided to the callback. */ svc_action_t *services_action_create_generic(const char *exec, const char *args[]); void services_action_cleanup(svc_action_t * op); void services_action_free(svc_action_t * op); gboolean services_action_sync(svc_action_t * op); /** * Run an action asynchronously. * * \param[in] op services action data * \param[in] action_callback callback for when the action completes * * \retval TRUE succesfully started execution * \retval FALSE failed to start execution, no callback will be received */ gboolean services_action_async(svc_action_t * op, void (*action_callback) (svc_action_t *)); gboolean services_action_cancel(const char *name, const char *action, int interval); static inline const char *services_lrm_status_str(enum op_status status) { switch (status) { case PCMK_LRM_OP_PENDING: return "pending"; case PCMK_LRM_OP_DONE:return "complete"; case PCMK_LRM_OP_CANCELLED:return "Cancelled"; case PCMK_LRM_OP_TIMEOUT:return "Timed Out"; case PCMK_LRM_OP_NOTSUPPORTED:return "NOT SUPPORTED"; case PCMK_LRM_OP_ERROR:return "Error"; case PCMK_LRM_OP_NOT_INSTALLED:return "Not installed"; default:return "UNKNOWN!"; } } static inline const char *services_ocf_exitcode_str(enum ocf_exitcode code) { switch (code) { case PCMK_OCF_OK: return "ok"; case PCMK_OCF_UNKNOWN_ERROR: return "unknown error"; case PCMK_OCF_INVALID_PARAM: return "invalid parameter"; case PCMK_OCF_UNIMPLEMENT_FEATURE: return "unimplemented feature"; case PCMK_OCF_INSUFFICIENT_PRIV: return "insufficient privileges"; case PCMK_OCF_NOT_INSTALLED: return "not installed"; case PCMK_OCF_NOT_CONFIGURED: return "not configured"; case PCMK_OCF_NOT_RUNNING: return "not running"; case PCMK_OCF_RUNNING_MASTER: return "master"; case PCMK_OCF_FAILED_MASTER: return "master (failed)"; case PCMK_OCF_SIGNAL: return "OCF_SIGNAL"; case PCMK_OCF_NOT_SUPPORTED: return "OCF_NOT_SUPPORTED"; case PCMK_OCF_PENDING: return "OCF_PENDING"; case PCMK_OCF_CANCELLED: return "OCF_CANCELLED"; case PCMK_OCF_TIMEOUT: return "OCF_TIMEOUT"; case PCMK_OCF_OTHER_ERROR: return "OCF_OTHER_ERROR"; case PCMK_OCF_DEGRADED: return "OCF_DEGRADED"; case PCMK_OCF_DEGRADED_MASTER: return "OCF_DEGRADED_MASTER"; default: return "unknown"; } } /** * \brief Get OCF equivalent of LSB exit code * * \param[in] action LSB action that produced exit code * \param[in] lsb_exitcode Exit code of LSB action * * \return PCMK_OCF_* constant that corresponds to LSB exit code */ static inline enum ocf_exitcode services_get_ocf_exitcode(const char *action, int lsb_exitcode) { /* For non-status actions, LSB and OCF share error code meaning <= 7 */ if (action && strcmp(action, "status") && strcmp(action, "monitor")) { if ((lsb_exitcode < 0) || (lsb_exitcode > PCMK_LSB_NOT_RUNNING)) { return PCMK_OCF_UNKNOWN_ERROR; } return (enum ocf_exitcode)lsb_exitcode; } /* status has different return codes */ switch (lsb_exitcode) { case PCMK_LSB_STATUS_OK: return PCMK_OCF_OK; case PCMK_LSB_STATUS_NOT_INSTALLED: return PCMK_OCF_NOT_INSTALLED; case PCMK_LSB_STATUS_INSUFFICIENT_PRIV: return PCMK_OCF_INSUFFICIENT_PRIV; case PCMK_LSB_STATUS_VAR_PID: case PCMK_LSB_STATUS_VAR_LOCK: case PCMK_LSB_STATUS_NOT_RUNNING: return PCMK_OCF_NOT_RUNNING; } return PCMK_OCF_UNKNOWN_ERROR; } # ifdef __cplusplus } # endif #endif /* __PCMK_SERVICES__ */ diff --git a/lib/common/utils.c b/lib/common/utils.c index 3e3abd3964..dbf84c0c25 100644 --- a/lib/common/utils.c +++ b/lib/common/utils.c @@ -1,2128 +1,2131 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef MAXLINE # define MAXLINE 512 #endif #ifdef HAVE_GETOPT_H # include #endif #ifndef PW_BUFFER_LEN # define PW_BUFFER_LEN 500 #endif CRM_TRACE_INIT_DATA(common); gboolean crm_config_error = FALSE; gboolean crm_config_warning = FALSE; char *crm_system_name = NULL; int node_score_red = 0; int node_score_green = 0; int node_score_yellow = 0; int node_score_infinity = INFINITY; static struct crm_option *crm_long_options = NULL; static const char *crm_app_description = NULL; static char *crm_short_options = NULL; static const char *crm_app_usage = NULL; int crm_exit(int rc) { mainloop_cleanup(); #if HAVE_LIBXML2 crm_trace("cleaning up libxml"); crm_xml_cleanup(); #endif crm_trace("exit %d", rc); qb_log_fini(); free(crm_short_options); free(crm_system_name); exit(ABS(rc)); /* Always exit with a positive value so that it can be passed to crm_error * * Otherwise the system wraps it around and people * have to jump through hoops figuring out what the * error was */ return rc; /* Can never happen, but allows return crm_exit(rc) * where "return rc" was used previously - which * keeps compilers happy. */ } gboolean check_time(const char *value) { if (crm_get_msec(value) < 5000) { return FALSE; } return TRUE; } gboolean check_timer(const char *value) { if (crm_get_msec(value) < 0) { return FALSE; } return TRUE; } gboolean check_boolean(const char *value) { int tmp = FALSE; if (crm_str_to_boolean(value, &tmp) != 1) { return FALSE; } return TRUE; } gboolean check_number(const char *value) { errno = 0; if (value == NULL) { return FALSE; } else if (safe_str_eq(value, MINUS_INFINITY_S)) { } else if (safe_str_eq(value, INFINITY_S)) { } else { crm_int_helper(value, NULL); } if (errno != 0) { return FALSE; } return TRUE; } gboolean check_quorum(const char *value) { if (safe_str_eq(value, "stop")) { return TRUE; } else if (safe_str_eq(value, "freeze")) { return TRUE; } else if (safe_str_eq(value, "ignore")) { return TRUE; } else if (safe_str_eq(value, "suicide")) { return TRUE; } return FALSE; } gboolean check_script(const char *value) { struct stat st; if(safe_str_eq(value, "/dev/null")) { return TRUE; } if(stat(value, &st) != 0) { crm_err("Script %s does not exist", value); return FALSE; } if(S_ISREG(st.st_mode) == 0) { crm_err("Script %s is not a regular file", value); return FALSE; } if( (st.st_mode & (S_IXUSR | S_IXGRP )) == 0) { crm_err("Script %s is not executable", value); return FALSE; } return TRUE; } gboolean check_utilization(const char *value) { char *end = NULL; long number = strtol(value, &end, 10); if(end && end[0] != '%') { return FALSE; } else if(number < 0) { return FALSE; } return TRUE; } int char2score(const char *score) { int score_f = 0; if (score == NULL) { } else if (safe_str_eq(score, MINUS_INFINITY_S)) { score_f = -node_score_infinity; } else if (safe_str_eq(score, INFINITY_S)) { score_f = node_score_infinity; } else if (safe_str_eq(score, "+" INFINITY_S)) { score_f = node_score_infinity; } else if (safe_str_eq(score, "red")) { score_f = node_score_red; } else if (safe_str_eq(score, "yellow")) { score_f = node_score_yellow; } else if (safe_str_eq(score, "green")) { score_f = node_score_green; } else { score_f = crm_parse_int(score, NULL); if (score_f > 0 && score_f > node_score_infinity) { score_f = node_score_infinity; } else if (score_f < 0 && score_f < -node_score_infinity) { score_f = -node_score_infinity; } } return score_f; } char * score2char_stack(int score, char *buf, size_t len) { if (score >= node_score_infinity) { strncpy(buf, INFINITY_S, 9); } else if (score <= -node_score_infinity) { strncpy(buf, MINUS_INFINITY_S , 10); } else { return crm_itoa_stack(score, buf, len); } return buf; } char * score2char(int score) { if (score >= node_score_infinity) { return strdup(INFINITY_S); } else if (score <= -node_score_infinity) { return strdup("-" INFINITY_S); } return crm_itoa(score); } const char * cluster_option(GHashTable * options, gboolean(*validate) (const char *), const char *name, const char *old_name, const char *def_value) { const char *value = NULL; CRM_ASSERT(name != NULL); if (options != NULL) { value = g_hash_table_lookup(options, name); } if (value == NULL && old_name && options != NULL) { value = g_hash_table_lookup(options, old_name); if (value != NULL) { crm_config_warn("Using deprecated name '%s' for" " cluster option '%s'", old_name, name); g_hash_table_insert(options, strdup(name), strdup(value)); value = g_hash_table_lookup(options, old_name); } } if (value == NULL) { crm_trace("Using default value '%s' for cluster option '%s'", def_value, name); if (options == NULL) { return def_value; } else if(def_value == NULL) { return def_value; } g_hash_table_insert(options, strdup(name), strdup(def_value)); value = g_hash_table_lookup(options, name); } if (validate && validate(value) == FALSE) { crm_config_err("Value '%s' for cluster option '%s' is invalid." " Defaulting to %s", value, name, def_value); g_hash_table_replace(options, strdup(name), strdup(def_value)); value = g_hash_table_lookup(options, name); } return value; } const char * get_cluster_pref(GHashTable * options, pe_cluster_option * option_list, int len, const char *name) { int lpc = 0; const char *value = NULL; gboolean found = FALSE; for (lpc = 0; lpc < len; lpc++) { if (safe_str_eq(name, option_list[lpc].name)) { found = TRUE; value = cluster_option(options, option_list[lpc].is_valid, option_list[lpc].name, option_list[lpc].alt_name, option_list[lpc].default_value); } } CRM_CHECK(found, crm_err("No option named: %s", name)); return value; } void config_metadata(const char *name, const char *version, const char *desc_short, const char *desc_long, pe_cluster_option * option_list, int len) { int lpc = 0; fprintf(stdout, "" "\n" "\n" " %s\n" " %s\n" " %s\n" " \n", name, version, desc_long, desc_short); for (lpc = 0; lpc < len; lpc++) { if (option_list[lpc].description_long == NULL && option_list[lpc].description_short == NULL) { continue; } fprintf(stdout, " \n" " %s\n" " \n" " %s%s%s\n" " \n", option_list[lpc].name, option_list[lpc].description_short, option_list[lpc].type, option_list[lpc].default_value, option_list[lpc].description_long ? option_list[lpc]. description_long : option_list[lpc].description_short, option_list[lpc].values ? " Allowed values: " : "", option_list[lpc].values ? option_list[lpc].values : ""); } fprintf(stdout, " \n\n"); } void verify_all_options(GHashTable * options, pe_cluster_option * option_list, int len) { int lpc = 0; for (lpc = 0; lpc < len; lpc++) { cluster_option(options, option_list[lpc].is_valid, option_list[lpc].name, option_list[lpc].alt_name, option_list[lpc].default_value); } } char * generate_hash_key(const char *crm_msg_reference, const char *sys) { char *hash_key = crm_concat(sys ? sys : "none", crm_msg_reference, '_'); crm_trace("created hash key: (%s)", hash_key); return hash_key; } int crm_user_lookup(const char *name, uid_t * uid, gid_t * gid) { int rc = -1; char *buffer = NULL; struct passwd pwd; struct passwd *pwentry = NULL; buffer = calloc(1, PW_BUFFER_LEN); getpwnam_r(name, &pwd, buffer, PW_BUFFER_LEN, &pwentry); if (pwentry) { rc = 0; if (uid) { *uid = pwentry->pw_uid; } if (gid) { *gid = pwentry->pw_gid; } crm_trace("Cluster user %s has uid=%d gid=%d", name, pwentry->pw_uid, pwentry->pw_gid); } else { crm_err("Cluster user %s does not exist", name); } free(buffer); return rc; } static int crm_version_helper(const char *text, char **end_text) { int atoi_result = -1; CRM_ASSERT(end_text != NULL); errno = 0; if (text != NULL && text[0] != 0) { atoi_result = (int)strtol(text, end_text, 10); if (errno == EINVAL) { crm_err("Conversion of '%s' %c failed", text, text[0]); atoi_result = -1; } } return atoi_result; } /* * version1 < version2 : -1 * version1 = version2 : 0 * version1 > version2 : 1 */ int compare_version(const char *version1, const char *version2) { int rc = 0; int lpc = 0; char *ver1_copy = NULL, *ver2_copy = NULL; char *rest1 = NULL, *rest2 = NULL; if (version1 == NULL && version2 == NULL) { return 0; } else if (version1 == NULL) { return -1; } else if (version2 == NULL) { return 1; } ver1_copy = strdup(version1); ver2_copy = strdup(version2); rest1 = ver1_copy; rest2 = ver2_copy; while (1) { int digit1 = 0; int digit2 = 0; lpc++; if (rest1 == rest2) { break; } if (rest1 != NULL) { digit1 = crm_version_helper(rest1, &rest1); } if (rest2 != NULL) { digit2 = crm_version_helper(rest2, &rest2); } if (digit1 < digit2) { rc = -1; break; } else if (digit1 > digit2) { rc = 1; break; } if (rest1 != NULL && rest1[0] == '.') { rest1++; } if (rest1 != NULL && rest1[0] == 0) { rest1 = NULL; } if (rest2 != NULL && rest2[0] == '.') { rest2++; } if (rest2 != NULL && rest2[0] == 0) { rest2 = NULL; } } free(ver1_copy); free(ver2_copy); if (rc == 0) { crm_trace("%s == %s (%d)", version1, version2, lpc); } else if (rc < 0) { crm_trace("%s < %s (%d)", version1, version2, lpc); } else if (rc > 0) { crm_trace("%s > %s (%d)", version1, version2, lpc); } return rc; } gboolean do_stderr = FALSE; #ifndef NUMCHARS # define NUMCHARS "0123456789." #endif #ifndef WHITESPACE # define WHITESPACE " \t\n\r\f" #endif unsigned long long crm_get_interval(const char *input) { unsigned long long msec = 0; if (input == NULL) { return msec; } else if (input[0] != 'P') { long long tmp = crm_get_msec(input); if(tmp > 0) { msec = tmp; } } else { crm_time_t *interval = crm_time_parse_duration(input); msec = 1000 * crm_time_get_seconds(interval); crm_time_free(interval); } return msec; } long long crm_get_msec(const char *input) { const char *cp = input; const char *units; long long multiplier = 1000; long long divisor = 1; long long msec = -1; char *end_text = NULL; /* double dret; */ if (input == NULL) { return msec; } cp += strspn(cp, WHITESPACE); units = cp + strspn(cp, NUMCHARS); units += strspn(units, WHITESPACE); if (strchr(NUMCHARS, *cp) == NULL) { return msec; } if (strncasecmp(units, "ms", 2) == 0 || strncasecmp(units, "msec", 4) == 0) { multiplier = 1; divisor = 1; } else if (strncasecmp(units, "us", 2) == 0 || strncasecmp(units, "usec", 4) == 0) { multiplier = 1; divisor = 1000; } else if (strncasecmp(units, "s", 1) == 0 || strncasecmp(units, "sec", 3) == 0) { multiplier = 1000; divisor = 1; } else if (strncasecmp(units, "m", 1) == 0 || strncasecmp(units, "min", 3) == 0) { multiplier = 60 * 1000; divisor = 1; } else if (strncasecmp(units, "h", 1) == 0 || strncasecmp(units, "hr", 2) == 0) { multiplier = 60 * 60 * 1000; divisor = 1; } else if (*units != EOS && *units != '\n' && *units != '\r') { return msec; } msec = crm_int_helper(cp, &end_text); if (msec > LLONG_MAX/multiplier) { /* arithmetics overflow while multiplier/divisor mutually exclusive */ return LLONG_MAX; } msec *= multiplier; msec /= divisor; /* dret += 0.5; */ /* msec = (long long)dret; */ return msec; } +/*! + * \brief Generate an operation key + * + * \param[in] rsc_id ID of resource being operated on + * \param[in] op_type Operation name + * \param[in] interval Operation interval + * + * \return Newly allocated memory containing operation key as string + * + * \note It is the caller's responsibility to free() the result. + */ char * generate_op_key(const char *rsc_id, const char *op_type, int interval) { - int len = 35; - char *op_id = NULL; - - CRM_CHECK(rsc_id != NULL, return NULL); - CRM_CHECK(op_type != NULL, return NULL); - - len += strlen(op_type); - len += strlen(rsc_id); - op_id = malloc(len); - CRM_CHECK(op_id != NULL, return NULL); - sprintf(op_id, "%s_%s_%d", rsc_id, op_type, interval); - return op_id; + CRM_ASSERT(rsc_id != NULL); + CRM_ASSERT(op_type != NULL); + CRM_ASSERT(interval >= 0); + return crm_strdup_printf("%s_%s_%d", rsc_id, op_type, interval); } gboolean parse_op_key(const char *key, char **rsc_id, char **op_type, int *interval) { char *notify = NULL; char *mutable_key = NULL; char *mutable_key_ptr = NULL; int len = 0, offset = 0, ch = 0; CRM_CHECK(key != NULL, return FALSE); *interval = 0; len = strlen(key); offset = len - 1; crm_trace("Source: %s", key); while (offset > 0 && isdigit(key[offset])) { int digits = len - offset; ch = key[offset] - '0'; CRM_CHECK(ch < 10, return FALSE); CRM_CHECK(ch >= 0, return FALSE); while (digits > 1) { digits--; ch = ch * 10; } *interval += ch; offset--; } crm_trace(" Interval: %d", *interval); CRM_CHECK(key[offset] == '_', return FALSE); mutable_key = strdup(key); mutable_key[offset] = 0; offset--; while (offset > 0 && key[offset] != '_') { offset--; } CRM_CHECK(key[offset] == '_', free(mutable_key); return FALSE); mutable_key_ptr = mutable_key + offset + 1; crm_trace(" Action: %s", mutable_key_ptr); *op_type = strdup(mutable_key_ptr); mutable_key[offset] = 0; offset--; CRM_CHECK(mutable_key != mutable_key_ptr, free(mutable_key); return FALSE); notify = strstr(mutable_key, "_post_notify"); if (notify && safe_str_eq(notify, "_post_notify")) { notify[0] = 0; } notify = strstr(mutable_key, "_pre_notify"); if (notify && safe_str_eq(notify, "_pre_notify")) { notify[0] = 0; } crm_trace(" Resource: %s", mutable_key); *rsc_id = mutable_key; return TRUE; } char * generate_notify_key(const char *rsc_id, const char *notify_type, const char *op_type) { int len = 12; char *op_id = NULL; CRM_CHECK(rsc_id != NULL, return NULL); CRM_CHECK(op_type != NULL, return NULL); CRM_CHECK(notify_type != NULL, return NULL); len += strlen(op_type); len += strlen(rsc_id); len += strlen(notify_type); if(len > 0) { op_id = malloc(len); } if (op_id != NULL) { sprintf(op_id, "%s_%s_notify_%s_0", rsc_id, notify_type, op_type); } return op_id; } char * generate_transition_magic_v202(const char *transition_key, int op_status) { int len = 80; char *fail_state = NULL; CRM_CHECK(transition_key != NULL, return NULL); len += strlen(transition_key); fail_state = malloc(len); if (fail_state != NULL) { snprintf(fail_state, len, "%d:%s", op_status, transition_key); } return fail_state; } char * generate_transition_magic(const char *transition_key, int op_status, int op_rc) { int len = 80; char *fail_state = NULL; CRM_CHECK(transition_key != NULL, return NULL); len += strlen(transition_key); fail_state = malloc(len); if (fail_state != NULL) { snprintf(fail_state, len, "%d:%d;%s", op_status, op_rc, transition_key); } return fail_state; } gboolean decode_transition_magic(const char *magic, char **uuid, int *transition_id, int *action_id, int *op_status, int *op_rc, int *target_rc) { int res = 0; char *key = NULL; gboolean result = TRUE; CRM_CHECK(magic != NULL, return FALSE); CRM_CHECK(op_rc != NULL, return FALSE); CRM_CHECK(op_status != NULL, return FALSE); key = calloc(1, strlen(magic) + 1); res = sscanf(magic, "%d:%d;%s", op_status, op_rc, key); if (res != 3) { crm_warn("Only found %d items in: '%s'", res, magic); free(key); return FALSE; } CRM_CHECK(decode_transition_key(key, uuid, transition_id, action_id, target_rc), result = FALSE); free(key); return result; } char * generate_transition_key(int transition_id, int action_id, int target_rc, const char *node) { int len = 40; char *fail_state = NULL; CRM_CHECK(node != NULL, return NULL); len += strlen(node); fail_state = malloc(len); if (fail_state != NULL) { snprintf(fail_state, len, "%d:%d:%d:%-*s", action_id, transition_id, target_rc, 36, node); } return fail_state; } gboolean decode_transition_key(const char *key, char **uuid, int *transition_id, int *action_id, int *target_rc) { int res = 0; gboolean done = FALSE; CRM_CHECK(uuid != NULL, return FALSE); CRM_CHECK(target_rc != NULL, return FALSE); CRM_CHECK(action_id != NULL, return FALSE); CRM_CHECK(transition_id != NULL, return FALSE); *uuid = calloc(1, 37); res = sscanf(key, "%d:%d:%d:%36s", action_id, transition_id, target_rc, *uuid); switch (res) { case 4: /* Post Pacemaker 0.6 */ done = TRUE; break; case 3: case 2: /* this can be tricky - the UUID might start with an integer */ /* Until Pacemaker 0.6 */ done = TRUE; *target_rc = -1; res = sscanf(key, "%d:%d:%36s", action_id, transition_id, *uuid); if (res == 2) { *action_id = -1; res = sscanf(key, "%d:%36s", transition_id, *uuid); CRM_CHECK(res == 2, done = FALSE); } else if (res != 3) { CRM_CHECK(res == 3, done = FALSE); } break; case 1: /* Prior to Heartbeat 2.0.8 */ done = TRUE; *action_id = -1; *target_rc = -1; res = sscanf(key, "%d:%36s", transition_id, *uuid); CRM_CHECK(res == 2, done = FALSE); break; default: crm_crit("Unhandled sscanf result (%d) for %s", res, key); } if (strlen(*uuid) != 36) { crm_warn("Bad UUID (%s) in sscanf result (%d) for %s", *uuid, res, key); } if (done == FALSE) { crm_err("Cannot decode '%s' rc=%d", key, res); free(*uuid); *uuid = NULL; *target_rc = -1; *action_id = -1; *transition_id = -1; } return done; } void filter_action_parameters(xmlNode * param_set, const char *version) { char *key = NULL; char *timeout = NULL; char *interval = NULL; const char *attr_filter[] = { XML_ATTR_ID, XML_ATTR_CRM_VERSION, XML_LRM_ATTR_OP_DIGEST, XML_LRM_ATTR_TARGET, XML_LRM_ATTR_TARGET_UUID, }; gboolean do_delete = FALSE; int lpc = 0; static int meta_len = 0; if (meta_len == 0) { meta_len = strlen(CRM_META); } if (param_set == NULL) { return; } for (lpc = 0; lpc < DIMOF(attr_filter); lpc++) { xml_remove_prop(param_set, attr_filter[lpc]); } key = crm_meta_name(XML_LRM_ATTR_INTERVAL); interval = crm_element_value_copy(param_set, key); free(key); key = crm_meta_name(XML_ATTR_TIMEOUT); timeout = crm_element_value_copy(param_set, key); if (param_set) { xmlAttrPtr xIter = param_set->properties; while (xIter) { const char *prop_name = (const char *)xIter->name; xIter = xIter->next; do_delete = FALSE; if (strncasecmp(prop_name, CRM_META, meta_len) == 0) { do_delete = TRUE; } if (do_delete) { xml_remove_prop(param_set, prop_name); } } } if (crm_get_msec(interval) > 0 && compare_version(version, "1.0.8") > 0) { /* Re-instate the operation's timeout value */ if (timeout != NULL) { crm_xml_add(param_set, key, timeout); } } free(interval); free(timeout); free(key); } extern bool crm_is_daemon; /* coverity[+kill] */ void crm_abort(const char *file, const char *function, int line, const char *assert_condition, gboolean do_core, gboolean do_fork) { int rc = 0; int pid = 0; int status = 0; /* Implied by the parent's error logging below */ /* crm_write_blackbox(0); */ if(crm_is_daemon == FALSE) { /* This is a command line tool - do not fork */ /* crm_add_logfile(NULL); * Record it to a file? */ crm_enable_stderr(TRUE); /* Make sure stderr is enabled so we can tell the caller */ do_fork = FALSE; /* Just crash if needed */ } if (do_core == FALSE) { crm_err("%s: Triggered assert at %s:%d : %s", function, file, line, assert_condition); return; } else if (do_fork) { pid = fork(); } else { crm_err("%s: Triggered fatal assert at %s:%d : %s", function, file, line, assert_condition); } if (pid == -1) { crm_crit("%s: Cannot create core for non-fatal assert at %s:%d : %s", function, file, line, assert_condition); return; } else if(pid == 0) { /* Child process */ abort(); return; } /* Parent process */ crm_err("%s: Forked child %d to record non-fatal assert at %s:%d : %s", function, pid, file, line, assert_condition); crm_write_blackbox(SIGTRAP, NULL); do { rc = waitpid(pid, &status, 0); if(rc == pid) { return; /* Job done */ } } while(errno == EINTR); if (errno == ECHILD) { /* crm_mon does this */ crm_trace("Cannot wait on forked child %d - SIGCHLD is probably set to SIG_IGN", pid); return; } crm_perror(LOG_ERR, "Cannot wait on forked child %d", pid); } int crm_pid_active(long pid, const char *daemon) { static int have_proc_pid = 0; if(have_proc_pid == 0) { char proc_path[PATH_MAX], exe_path[PATH_MAX]; /* check to make sure pid hasn't been reused by another process */ snprintf(proc_path, sizeof(proc_path), "/proc/%lu/exe", (long unsigned int)getpid()); have_proc_pid = 1; if(readlink(proc_path, exe_path, PATH_MAX - 1) < 0) { have_proc_pid = -1; } } if (pid <= 0) { return -1; } else if (kill(pid, 0) < 0 && errno == ESRCH) { return 0; } else if(daemon == NULL || have_proc_pid == -1) { return 1; } else { int rc = 0; char proc_path[PATH_MAX], exe_path[PATH_MAX], myexe_path[PATH_MAX]; /* check to make sure pid hasn't been reused by another process */ snprintf(proc_path, sizeof(proc_path), "/proc/%lu/exe", pid); rc = readlink(proc_path, exe_path, PATH_MAX - 1); if (rc < 0 && errno == EACCES) { crm_perror(LOG_INFO, "Could not read from %s", proc_path); return 1; } else if (rc < 0) { crm_perror(LOG_ERR, "Could not read from %s", proc_path); return 0; } exe_path[rc] = 0; if(daemon[0] != '/') { rc = snprintf(myexe_path, sizeof(proc_path), CRM_DAEMON_DIR"/%s", daemon); myexe_path[rc] = 0; } else { rc = snprintf(myexe_path, sizeof(proc_path), "%s", daemon); myexe_path[rc] = 0; } if (strcmp(exe_path, myexe_path) == 0) { return 1; } } return 0; } #define LOCKSTRLEN 11 long crm_read_pidfile(const char *filename) { int fd; struct stat sbuf; long pid = -ENOENT; char buf[LOCKSTRLEN + 1]; if ((fd = open(filename, O_RDONLY)) < 0) { goto bail; } if (fstat(fd, &sbuf) >= 0 && sbuf.st_size < LOCKSTRLEN) { sleep(2); /* if someone was about to create one, * give'm a sec to do so */ } if (read(fd, buf, sizeof(buf)) < 1) { goto bail; } if (sscanf(buf, "%lu", &pid) > 0) { if (pid <= 0) { pid = -ESRCH; } else { crm_trace("Got pid %lu from %s\n", pid, filename); } } bail: if (fd >= 0) { close(fd); } return pid; } long crm_pidfile_inuse(const char *filename, long mypid, const char *daemon) { long pid = crm_read_pidfile(filename); if (pid < 2) { /* Invalid pid */ pid = -ENOENT; unlink(filename); } else if (mypid && pid == mypid) { /* In use by us */ pid = pcmk_ok; } else if (crm_pid_active(pid, daemon) == FALSE) { /* Contains a stale value */ unlink(filename); pid = -ENOENT; } else if (mypid && pid != mypid) { /* locked by existing process - give up */ pid = -EEXIST; } return pid; } static int crm_lock_pidfile(const char *filename, const char *name) { long mypid = 0; int fd = 0, rc = 0; char buf[LOCKSTRLEN + 1]; mypid = (unsigned long)getpid(); rc = crm_pidfile_inuse(filename, 0, name); if (rc == -ENOENT) { /* exists but the process is not active */ } else if (rc != pcmk_ok) { /* locked by existing process - give up */ return rc; } if ((fd = open(filename, O_CREAT | O_WRONLY | O_EXCL, 0644)) < 0) { /* Hmmh, why did we fail? Anyway, nothing we can do about it */ return -errno; } snprintf(buf, sizeof(buf), "%*lu\n", LOCKSTRLEN - 1, mypid); rc = write(fd, buf, LOCKSTRLEN); close(fd); if (rc != LOCKSTRLEN) { crm_perror(LOG_ERR, "Incomplete write to %s", filename); return -errno; } return crm_pidfile_inuse(filename, mypid, name); } void crm_make_daemon(const char *name, gboolean daemonize, const char *pidfile) { int rc; long pid; const char *devnull = "/dev/null"; if (daemonize == FALSE) { return; } /* Check before we even try... */ rc = crm_pidfile_inuse(pidfile, 1, name); if(rc < pcmk_ok && rc != -ENOENT) { pid = crm_read_pidfile(pidfile); crm_err("%s: already running [pid %ld in %s]", name, pid, pidfile); printf("%s: already running [pid %ld in %s]\n", name, pid, pidfile); crm_exit(rc); } pid = fork(); if (pid < 0) { fprintf(stderr, "%s: could not start daemon\n", name); crm_perror(LOG_ERR, "fork"); crm_exit(EINVAL); } else if (pid > 0) { crm_exit(pcmk_ok); } rc = crm_lock_pidfile(pidfile, name); if(rc < pcmk_ok) { crm_err("Could not lock '%s' for %s: %s (%d)", pidfile, name, pcmk_strerror(rc), rc); printf("Could not lock '%s' for %s: %s (%d)\n", pidfile, name, pcmk_strerror(rc), rc); crm_exit(rc); } umask(S_IWGRP | S_IWOTH | S_IROTH); close(STDIN_FILENO); (void)open(devnull, O_RDONLY); /* Stdin: fd 0 */ close(STDOUT_FILENO); (void)open(devnull, O_WRONLY); /* Stdout: fd 1 */ close(STDERR_FILENO); (void)open(devnull, O_WRONLY); /* Stderr: fd 2 */ } char * crm_meta_name(const char *field) { int lpc = 0; int max = 0; char *crm_name = NULL; CRM_CHECK(field != NULL, return NULL); crm_name = crm_concat(CRM_META, field, '_'); /* Massage the names so they can be used as shell variables */ max = strlen(crm_name); for (; lpc < max; lpc++) { switch (crm_name[lpc]) { case '-': crm_name[lpc] = '_'; break; } } return crm_name; } const char * crm_meta_value(GHashTable * hash, const char *field) { char *key = NULL; const char *value = NULL; key = crm_meta_name(field); if (key) { value = g_hash_table_lookup(hash, key); free(key); } return value; } static struct option * crm_create_long_opts(struct crm_option *long_options) { struct option *long_opts = NULL; #ifdef HAVE_GETOPT_H int index = 0, lpc = 0; /* * A previous, possibly poor, choice of '?' as the short form of --help * means that getopt_long() returns '?' for both --help and for "unknown option" * * This dummy entry allows us to differentiate between the two in crm_get_option() * and exit with the correct error code */ long_opts = realloc_safe(long_opts, (index + 1) * sizeof(struct option)); long_opts[index].name = "__dummmy__"; long_opts[index].has_arg = 0; long_opts[index].flag = 0; long_opts[index].val = '_'; index++; for (lpc = 0; long_options[lpc].name != NULL; lpc++) { if (long_options[lpc].name[0] == '-') { continue; } long_opts = realloc_safe(long_opts, (index + 1) * sizeof(struct option)); /*fprintf(stderr, "Creating %d %s = %c\n", index, * long_options[lpc].name, long_options[lpc].val); */ long_opts[index].name = long_options[lpc].name; long_opts[index].has_arg = long_options[lpc].has_arg; long_opts[index].flag = long_options[lpc].flag; long_opts[index].val = long_options[lpc].val; index++; } /* Now create the list terminator */ long_opts = realloc_safe(long_opts, (index + 1) * sizeof(struct option)); long_opts[index].name = NULL; long_opts[index].has_arg = 0; long_opts[index].flag = 0; long_opts[index].val = 0; #endif return long_opts; } void crm_set_options(const char *short_options, const char *app_usage, struct crm_option *long_options, const char *app_desc) { if (short_options) { crm_short_options = strdup(short_options); } else if (long_options) { int lpc = 0; int opt_string_len = 0; char *local_short_options = NULL; for (lpc = 0; long_options[lpc].name != NULL; lpc++) { if (long_options[lpc].val && long_options[lpc].val != '-' && long_options[lpc].val < UCHAR_MAX) { local_short_options = realloc_safe(local_short_options, opt_string_len + 4); local_short_options[opt_string_len++] = long_options[lpc].val; /* getopt(3) says: Two colons mean an option takes an optional arg; */ if (long_options[lpc].has_arg == optional_argument) { local_short_options[opt_string_len++] = ':'; } if (long_options[lpc].has_arg >= required_argument) { local_short_options[opt_string_len++] = ':'; } local_short_options[opt_string_len] = 0; } } crm_short_options = local_short_options; crm_trace("Generated short option string: '%s'", local_short_options); } if (long_options) { crm_long_options = long_options; } if (app_desc) { crm_app_description = app_desc; } if (app_usage) { crm_app_usage = app_usage; } } int crm_get_option(int argc, char **argv, int *index) { return crm_get_option_long(argc, argv, index, NULL); } int crm_get_option_long(int argc, char **argv, int *index, const char **longname) { #ifdef HAVE_GETOPT_H static struct option *long_opts = NULL; if (long_opts == NULL && crm_long_options) { long_opts = crm_create_long_opts(crm_long_options); } *index = 0; if (long_opts) { int flag = getopt_long(argc, argv, crm_short_options, long_opts, index); switch (flag) { case 0: if (long_opts[*index].val) { return long_opts[*index].val; } else if (longname) { *longname = long_opts[*index].name; } else { crm_notice("Unhandled option --%s", long_opts[*index].name); return flag; } case -1: /* End of option processing */ break; case ':': crm_trace("Missing argument"); crm_help('?', 1); break; case '?': crm_help('?', *index ? 0 : 1); break; } return flag; } #endif if (crm_short_options) { return getopt(argc, argv, crm_short_options); } return -1; } int crm_help(char cmd, int exit_code) { int i = 0; FILE *stream = (exit_code ? stderr : stdout); if (cmd == 'v' || cmd == '$') { fprintf(stream, "Pacemaker %s\n", PACEMAKER_VERSION); fprintf(stream, "Written by Andrew Beekhof\n"); goto out; } if (cmd == '!') { fprintf(stream, "Pacemaker %s (Build: %s): %s\n", PACEMAKER_VERSION, BUILD_VERSION, CRM_FEATURES); goto out; } fprintf(stream, "%s - %s\n", crm_system_name, crm_app_description); if (crm_app_usage) { fprintf(stream, "Usage: %s %s\n", crm_system_name, crm_app_usage); } if (crm_long_options) { fprintf(stream, "Options:\n"); for (i = 0; crm_long_options[i].name != NULL; i++) { if (crm_long_options[i].flags & pcmk_option_hidden) { } else if (crm_long_options[i].flags & pcmk_option_paragraph) { fprintf(stream, "%s\n\n", crm_long_options[i].desc); } else if (crm_long_options[i].flags & pcmk_option_example) { fprintf(stream, "\t#%s\n\n", crm_long_options[i].desc); } else if (crm_long_options[i].val == '-' && crm_long_options[i].desc) { fprintf(stream, "%s\n", crm_long_options[i].desc); } else { /* is val printable as char ? */ if (crm_long_options[i].val && crm_long_options[i].val <= UCHAR_MAX) { fprintf(stream, " -%c,", crm_long_options[i].val); } else { fputs(" ", stream); } fprintf(stream, " --%s%s\t%s\n", crm_long_options[i].name, crm_long_options[i].has_arg == optional_argument ? "[=value]" : crm_long_options[i].has_arg == required_argument ? "=value" : "", crm_long_options[i].desc ? crm_long_options[i].desc : ""); } } } else if (crm_short_options) { fprintf(stream, "Usage: %s - %s\n", crm_system_name, crm_app_description); for (i = 0; crm_short_options[i] != 0; i++) { int has_arg = no_argument /* 0 */; if (crm_short_options[i + 1] == ':') { if (crm_short_options[i + 2] == ':') has_arg = optional_argument /* 2 */; else has_arg = required_argument /* 1 */; } fprintf(stream, " -%c %s\n", crm_short_options[i], has_arg == optional_argument ? "[value]" : has_arg == required_argument ? "{value}" : ""); i += has_arg; } } fprintf(stream, "\nReport bugs to %s\n", PACKAGE_BUGREPORT); out: return crm_exit(exit_code); } void cib_ipc_servers_init(qb_ipcs_service_t **ipcs_ro, qb_ipcs_service_t **ipcs_rw, qb_ipcs_service_t **ipcs_shm, struct qb_ipcs_service_handlers *ro_cb, struct qb_ipcs_service_handlers *rw_cb) { *ipcs_ro = mainloop_add_ipc_server(cib_channel_ro, QB_IPC_NATIVE, ro_cb); *ipcs_rw = mainloop_add_ipc_server(cib_channel_rw, QB_IPC_NATIVE, rw_cb); *ipcs_shm = mainloop_add_ipc_server(cib_channel_shm, QB_IPC_SHM, rw_cb); if (*ipcs_ro == NULL || *ipcs_rw == NULL || *ipcs_shm == NULL) { crm_err("Failed to create cib servers: exiting and inhibiting respawn."); crm_warn("Verify pacemaker and pacemaker_remote are not both enabled."); crm_exit(DAEMON_RESPAWN_STOP); } } void cib_ipc_servers_destroy(qb_ipcs_service_t *ipcs_ro, qb_ipcs_service_t *ipcs_rw, qb_ipcs_service_t *ipcs_shm) { qb_ipcs_destroy(ipcs_ro); qb_ipcs_destroy(ipcs_rw); qb_ipcs_destroy(ipcs_shm); } qb_ipcs_service_t * crmd_ipc_server_init(struct qb_ipcs_service_handlers *cb) { return mainloop_add_ipc_server(CRM_SYSTEM_CRMD, QB_IPC_NATIVE, cb); } void attrd_ipc_server_init(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb) { *ipcs = mainloop_add_ipc_server(T_ATTRD, QB_IPC_NATIVE, cb); if (*ipcs == NULL) { crm_err("Failed to create attrd servers: exiting and inhibiting respawn."); crm_warn("Verify pacemaker and pacemaker_remote are not both enabled."); crm_exit(DAEMON_RESPAWN_STOP); } } void stonith_ipc_server_init(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb) { *ipcs = mainloop_add_ipc_server("stonith-ng", QB_IPC_NATIVE, cb); if (*ipcs == NULL) { crm_err("Failed to create stonith-ng servers: exiting and inhibiting respawn."); crm_warn("Verify pacemaker and pacemaker_remote are not both enabled."); crm_exit(DAEMON_RESPAWN_STOP); } } int attrd_update_delegate(crm_ipc_t * ipc, char command, const char *host, const char *name, const char *value, const char *section, const char *set, const char *dampen, const char *user_name, int options) { int rc = -ENOTCONN; int max = 5; const char *task = NULL; const char *name_as = NULL; const char *display_host = (host ? host : "localhost"); const char *display_command = NULL; /* for commands without name/value */ xmlNode *update = create_xml_node(NULL, __FUNCTION__); static gboolean connected = TRUE; static crm_ipc_t *local_ipc = NULL; static enum crm_ipc_flags flags = crm_ipc_flags_none; if (ipc == NULL && local_ipc == NULL) { local_ipc = crm_ipc_new(T_ATTRD, 0); flags |= crm_ipc_client_response; connected = FALSE; } if (ipc == NULL) { ipc = local_ipc; } /* remap common aliases */ if (safe_str_eq(section, "reboot")) { section = XML_CIB_TAG_STATUS; } else if (safe_str_eq(section, "forever")) { section = XML_CIB_TAG_NODES; } crm_xml_add(update, F_TYPE, T_ATTRD); crm_xml_add(update, F_ORIG, crm_system_name?crm_system_name:"unknown"); if (name == NULL && command == 'U') { command = 'R'; } switch (command) { case 'u': task = ATTRD_OP_UPDATE; name_as = F_ATTRD_REGEX; break; case 'D': case 'U': case 'v': task = ATTRD_OP_UPDATE; name_as = F_ATTRD_ATTRIBUTE; break; case 'R': task = ATTRD_OP_REFRESH; display_command = "refresh"; break; case 'B': task = ATTRD_OP_UPDATE_BOTH; name_as = F_ATTRD_ATTRIBUTE; break; case 'Y': task = ATTRD_OP_UPDATE_DELAY; name_as = F_ATTRD_ATTRIBUTE; break; case 'Q': task = ATTRD_OP_QUERY; name_as = F_ATTRD_ATTRIBUTE; break; case 'C': task = ATTRD_OP_PEER_REMOVE; display_command = "purge"; break; } if (name_as != NULL) { if (name == NULL) { rc = -EINVAL; goto done; } crm_xml_add(update, name_as, name); } crm_xml_add(update, F_ATTRD_TASK, task); crm_xml_add(update, F_ATTRD_VALUE, value); crm_xml_add(update, F_ATTRD_DAMPEN, dampen); crm_xml_add(update, F_ATTRD_SECTION, section); crm_xml_add(update, F_ATTRD_HOST, host); crm_xml_add(update, F_ATTRD_SET, set); crm_xml_add_int(update, F_ATTRD_IS_REMOTE, is_set(options, attrd_opt_remote)); crm_xml_add_int(update, F_ATTRD_IS_PRIVATE, is_set(options, attrd_opt_private)); #if ENABLE_ACL if (user_name) { crm_xml_add(update, F_ATTRD_USER, user_name); } #endif while (max > 0) { if (connected == FALSE) { crm_info("Connecting to cluster... %d retries remaining", max); connected = crm_ipc_connect(ipc); } if (connected) { rc = crm_ipc_send(ipc, update, flags, 0, NULL); } else { crm_perror(LOG_INFO, "Connection to cluster attribute manager failed"); } if (ipc != local_ipc) { break; } else if (rc > 0) { break; } else if (rc == -EAGAIN || rc == -EALREADY) { sleep(5 - max); max--; } else { crm_ipc_close(ipc); connected = FALSE; sleep(5 - max); max--; } } done: free_xml(update); if (rc > 0) { rc = pcmk_ok; } if (display_command) { crm_debug("Asked attrd to %s %s: %s (%d)", display_command, display_host, pcmk_strerror(rc), rc); } else { crm_debug("Asked attrd to update %s=%s for %s: %s (%d)", name, value, display_host, pcmk_strerror(rc), rc); } return rc; } #define FAKE_TE_ID "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" static void append_digest(lrmd_event_data_t * op, xmlNode * update, const char *version, const char *magic, int level) { /* this will enable us to later determine that the * resource's parameters have changed and we should force * a restart */ char *digest = NULL; xmlNode *args_xml = NULL; if (op->params == NULL) { return; } args_xml = create_xml_node(NULL, XML_TAG_PARAMS); g_hash_table_foreach(op->params, hash2field, args_xml); filter_action_parameters(args_xml, version); crm_summarize_versioned_params(args_xml, op->versioned_params); digest = calculate_operation_digest(args_xml, version); #if 0 if (level < get_crm_log_level() && op->interval == 0 && crm_str_eq(op->op_type, CRMD_ACTION_START, TRUE)) { char *digest_source = dump_xml_unformatted(args_xml); do_crm_log(level, "Calculated digest %s for %s (%s). Source: %s\n", digest, ID(update), magic, digest_source); free(digest_source); } #endif crm_xml_add(update, XML_LRM_ATTR_OP_DIGEST, digest); free_xml(args_xml); free(digest); } int rsc_op_expected_rc(lrmd_event_data_t * op) { int rc = 0; if (op && op->user_data) { int dummy = 0; char *uuid = NULL; decode_transition_key(op->user_data, &uuid, &dummy, &dummy, &rc); free(uuid); } return rc; } gboolean did_rsc_op_fail(lrmd_event_data_t * op, int target_rc) { switch (op->op_status) { case PCMK_LRM_OP_CANCELLED: case PCMK_LRM_OP_PENDING: return FALSE; break; case PCMK_LRM_OP_NOTSUPPORTED: case PCMK_LRM_OP_TIMEOUT: case PCMK_LRM_OP_ERROR: return TRUE; break; default: if (target_rc != op->rc) { return TRUE; } } return FALSE; } xmlNode * create_operation_update(xmlNode * parent, lrmd_event_data_t * op, const char * caller_version, int target_rc, const char * node, const char * origin, int level) { char *key = NULL; char *magic = NULL; char *op_id = NULL; char *op_id_additional = NULL; char *local_user_data = NULL; const char *exit_reason = NULL; xmlNode *xml_op = NULL; const char *task = NULL; gboolean dc_munges_migrate_ops = (compare_version(caller_version, "3.0.3") < 0); gboolean dc_needs_unique_ops = (compare_version(caller_version, "3.0.6") < 0); CRM_CHECK(op != NULL, return NULL); do_crm_log(level, "%s: Updating resource %s after %s op %s (interval=%d)", origin, op->rsc_id, op->op_type, services_lrm_status_str(op->op_status), op->interval); crm_trace("DC version: %s", caller_version); task = op->op_type; /* remap the task name under various scenarios * this makes life easier for the PE when trying determine the current state */ if (crm_str_eq(task, "reload", TRUE)) { if (op->op_status == PCMK_LRM_OP_DONE) { task = CRMD_ACTION_START; } else { task = CRMD_ACTION_STATUS; } } else if (dc_munges_migrate_ops && crm_str_eq(task, CRMD_ACTION_MIGRATE, TRUE)) { /* if the migrate_from fails it will have enough info to do the right thing */ if (op->op_status == PCMK_LRM_OP_DONE) { task = CRMD_ACTION_STOP; } else { task = CRMD_ACTION_STATUS; } } else if (dc_munges_migrate_ops && op->op_status == PCMK_LRM_OP_DONE && crm_str_eq(task, CRMD_ACTION_MIGRATED, TRUE)) { task = CRMD_ACTION_START; } key = generate_op_key(op->rsc_id, task, op->interval); if (dc_needs_unique_ops && op->interval > 0) { op_id = strdup(key); } else if (crm_str_eq(task, CRMD_ACTION_NOTIFY, TRUE)) { const char *n_type = crm_meta_value(op->params, "notify_type"); const char *n_task = crm_meta_value(op->params, "notify_operation"); CRM_LOG_ASSERT(n_type != NULL); CRM_LOG_ASSERT(n_task != NULL); op_id = generate_notify_key(op->rsc_id, n_type, n_task); /* these are not yet allowed to fail */ op->op_status = PCMK_LRM_OP_DONE; op->rc = 0; } else if (did_rsc_op_fail(op, target_rc)) { op_id = generate_op_key(op->rsc_id, "last_failure", 0); if (op->interval == 0) { /* Ensure 'last' gets updated too in case recording-pending="true" */ op_id_additional = generate_op_key(op->rsc_id, "last", 0); } exit_reason = op->exit_reason; } else if (op->interval > 0) { op_id = strdup(key); } else { op_id = generate_op_key(op->rsc_id, "last", 0); } again: xml_op = find_entity(parent, XML_LRM_TAG_RSC_OP, op_id); if (xml_op == NULL) { xml_op = create_xml_node(parent, XML_LRM_TAG_RSC_OP); } if (op->user_data == NULL) { crm_debug("Generating fake transition key for:" " %s_%s_%d %d from %s", op->rsc_id, op->op_type, op->interval, op->call_id, origin); local_user_data = generate_transition_key(-1, op->call_id, target_rc, FAKE_TE_ID); op->user_data = local_user_data; } if(magic == NULL) { magic = generate_transition_magic(op->user_data, op->op_status, op->rc); } crm_xml_add(xml_op, XML_ATTR_ID, op_id); crm_xml_add(xml_op, XML_LRM_ATTR_TASK_KEY, key); crm_xml_add(xml_op, XML_LRM_ATTR_TASK, task); crm_xml_add(xml_op, XML_ATTR_ORIGIN, origin); crm_xml_add(xml_op, XML_ATTR_CRM_VERSION, caller_version); crm_xml_add(xml_op, XML_ATTR_TRANSITION_KEY, op->user_data); crm_xml_add(xml_op, XML_ATTR_TRANSITION_MAGIC, magic); crm_xml_add(xml_op, XML_LRM_ATTR_EXIT_REASON, exit_reason); crm_xml_add(xml_op, XML_LRM_ATTR_TARGET, node); /* For context during triage */ crm_xml_add_int(xml_op, XML_LRM_ATTR_CALLID, op->call_id); crm_xml_add_int(xml_op, XML_LRM_ATTR_RC, op->rc); crm_xml_add_int(xml_op, XML_LRM_ATTR_OPSTATUS, op->op_status); crm_xml_add_int(xml_op, XML_LRM_ATTR_INTERVAL, op->interval); if (compare_version("2.1", caller_version) <= 0) { if (op->t_run || op->t_rcchange || op->exec_time || op->queue_time) { crm_trace("Timing data (%s_%s_%d): last=%u change=%u exec=%u queue=%u", op->rsc_id, op->op_type, op->interval, op->t_run, op->t_rcchange, op->exec_time, op->queue_time); if (op->interval == 0) { /* The values are the same for non-recurring ops */ crm_xml_add_int(xml_op, XML_RSC_OP_LAST_RUN, op->t_run); crm_xml_add_int(xml_op, XML_RSC_OP_LAST_CHANGE, op->t_run); } else if(op->t_rcchange) { /* last-run is not accurate for recurring ops */ crm_xml_add_int(xml_op, XML_RSC_OP_LAST_CHANGE, op->t_rcchange); } else { /* ...but is better than nothing otherwise */ crm_xml_add_int(xml_op, XML_RSC_OP_LAST_CHANGE, op->t_run); } crm_xml_add_int(xml_op, XML_RSC_OP_T_EXEC, op->exec_time); crm_xml_add_int(xml_op, XML_RSC_OP_T_QUEUE, op->queue_time); } } if (crm_str_eq(op->op_type, CRMD_ACTION_MIGRATE, TRUE) || crm_str_eq(op->op_type, CRMD_ACTION_MIGRATED, TRUE)) { /* * Record migrate_source and migrate_target always for migrate ops. */ const char *name = XML_LRM_ATTR_MIGRATE_SOURCE; crm_xml_add(xml_op, name, crm_meta_value(op->params, name)); name = XML_LRM_ATTR_MIGRATE_TARGET; crm_xml_add(xml_op, name, crm_meta_value(op->params, name)); } append_digest(op, xml_op, caller_version, magic, LOG_DEBUG); if (op_id_additional) { free(op_id); op_id = op_id_additional; op_id_additional = NULL; goto again; } if (local_user_data) { free(local_user_data); op->user_data = NULL; } free(magic); free(op_id); free(key); return xml_op; } bool pcmk_acl_required(const char *user) { #if ENABLE_ACL if(user == NULL || strlen(user) == 0) { crm_trace("no user set"); return FALSE; } else if (strcmp(user, CRM_DAEMON_USER) == 0) { return FALSE; } else if (strcmp(user, "root") == 0) { return FALSE; } crm_trace("acls required for %s", user); return TRUE; #else crm_trace("acls not supported"); return FALSE; #endif } #if ENABLE_ACL char * uid2username(uid_t uid) { struct passwd *pwent = getpwuid(uid); if (pwent == NULL) { crm_perror(LOG_ERR, "Cannot get password entry of uid: %d", uid); return NULL; } else { return strdup(pwent->pw_name); } } const char * crm_acl_get_set_user(xmlNode * request, const char *field, const char *peer_user) { /* field is only checked for backwards compatibility */ static const char *effective_user = NULL; const char *requested_user = NULL; const char *user = NULL; if(effective_user == NULL) { effective_user = uid2username(geteuid()); } requested_user = crm_element_value(request, XML_ACL_TAG_USER); if(requested_user == NULL) { requested_user = crm_element_value(request, field); } if (is_privileged(effective_user) == FALSE) { /* We're not running as a privileged user, set or overwrite any existing value for $XML_ACL_TAG_USER */ user = effective_user; } else if(peer_user == NULL && requested_user == NULL) { /* No user known or requested, use 'effective_user' and make sure one is set for the request */ user = effective_user; } else if(peer_user == NULL) { /* No user known, trusting 'requested_user' */ user = requested_user; } else if (is_privileged(peer_user) == FALSE) { /* The peer is not a privileged user, set or overwrite any existing value for $XML_ACL_TAG_USER */ user = peer_user; } else if (requested_user == NULL) { /* Even if we're privileged, make sure there is always a value set */ user = peer_user; } else { /* Legal delegation to 'requested_user' */ user = requested_user; } /* Yes, pointer comparision */ if(user != crm_element_value(request, XML_ACL_TAG_USER)) { crm_xml_add(request, XML_ACL_TAG_USER, user); } if(field != NULL && user != crm_element_value(request, field)) { crm_xml_add(request, field, user); } return requested_user; } void determine_request_user(const char *user, xmlNode * request, const char *field) { /* Get our internal validation out of the way first */ CRM_CHECK(user != NULL && request != NULL && field != NULL, return); /* If our peer is a privileged user, we might be doing something on behalf of someone else */ if (is_privileged(user) == FALSE) { /* We're not a privileged user, set or overwrite any existing value for $field */ crm_xml_replace(request, field, user); } else if (crm_element_value(request, field) == NULL) { /* Even if we're privileged, make sure there is always a value set */ crm_xml_replace(request, field, user); /* } else { Legal delegation */ } crm_trace("Processing msg as user '%s'", crm_element_value(request, field)); } #endif void * find_library_function(void **handle, const char *lib, const char *fn, gboolean fatal) { char *error; void *a_function; if (*handle == NULL) { *handle = dlopen(lib, RTLD_LAZY); } if (!(*handle)) { crm_err("%sCould not open %s: %s", fatal ? "Fatal: " : "", lib, dlerror()); if (fatal) { crm_exit(DAEMON_RESPAWN_STOP); } return NULL; } a_function = dlsym(*handle, fn); if (a_function == NULL) { error = dlerror(); crm_err("%sCould not find %s in %s: %s", fatal ? "Fatal: " : "", fn, lib, error); if (fatal) { crm_exit(DAEMON_RESPAWN_STOP); } } return a_function; } void * convert_const_pointer(const void *ptr) { /* Worst function ever */ return (void *)ptr; } #ifdef HAVE_UUID_UUID_H # include #endif char * crm_generate_uuid(void) { unsigned char uuid[16]; char *buffer = malloc(37); /* Including NUL byte */ uuid_generate(uuid); uuid_unparse(uuid, buffer); return buffer; } #include char * crm_md5sum(const char *buffer) { int lpc = 0, len = 0; char *digest = NULL; unsigned char raw_digest[MD5_DIGEST_SIZE]; if (buffer == NULL) { buffer = ""; } len = strlen(buffer); crm_trace("Beginning digest of %d bytes", len); digest = malloc(2 * MD5_DIGEST_SIZE + 1); if(digest) { md5_buffer(buffer, len, raw_digest); for (lpc = 0; lpc < MD5_DIGEST_SIZE; lpc++) { sprintf(digest + (2 * lpc), "%02x", raw_digest[lpc]); } digest[(2 * MD5_DIGEST_SIZE)] = 0; crm_trace("Digest %s.", digest); } else { crm_err("Could not create digest"); } return digest; } #ifdef HAVE_GNUTLS_GNUTLS_H void crm_gnutls_global_init(void) { signal(SIGPIPE, SIG_IGN); gnutls_global_init(); } #endif diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c index e00907830e..64b66b14d3 100644 --- a/lib/lrmd/lrmd_client.c +++ b/lib/lrmd/lrmd_client.c @@ -1,2248 +1,2249 @@ /* * Copyright (c) 2012 David Vossel * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_GNUTLS_GNUTLS_H # undef KEYFILE # include #endif #include #include #include #include #include #define MAX_TLS_RECV_WAIT 10000 CRM_TRACE_INIT_DATA(lrmd); static int lrmd_api_disconnect(lrmd_t * lrmd); static int lrmd_api_is_connected(lrmd_t * lrmd); /* IPC proxy functions */ int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg); static void lrmd_internal_proxy_dispatch(lrmd_t *lrmd, xmlNode *msg); void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg)); #ifdef HAVE_GNUTLS_GNUTLS_H # define LRMD_CLIENT_HANDSHAKE_TIMEOUT 5000 /* 5 seconds */ gnutls_psk_client_credentials_t psk_cred_s; int lrmd_tls_set_key(gnutls_datum_t * key); static void lrmd_tls_disconnect(lrmd_t * lrmd); static int global_remote_msg_id = 0; int lrmd_tls_send_msg(crm_remote_t * session, xmlNode * msg, uint32_t id, const char *msg_type); static void lrmd_tls_connection_destroy(gpointer userdata); #endif typedef struct lrmd_private_s { enum client_type type; char *token; mainloop_io_t *source; /* IPC parameters */ crm_ipc_t *ipc; crm_remote_t *remote; /* Extra TLS parameters */ char *remote_nodename; #ifdef HAVE_GNUTLS_GNUTLS_H char *server; int port; gnutls_psk_client_credentials_t psk_cred_c; /* while the async connection is occuring, this is the id * of the connection timeout timer. */ int async_timer; int sock; /* since tls requires a round trip across the network for a * request/reply, there are times where we just want to be able * to send a request from the client and not wait around (or even care * about) what the reply is. */ int expected_late_replies; GList *pending_notify; crm_trigger_t *process_notify; #endif lrmd_event_callback callback; /* Internal IPC proxy msg passing for remote guests */ void (*proxy_callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg); void *proxy_callback_userdata; char *peer_version; } lrmd_private_t; static lrmd_list_t * lrmd_list_add(lrmd_list_t * head, const char *value) { lrmd_list_t *p, *end; p = calloc(1, sizeof(lrmd_list_t)); p->val = strdup(value); end = head; while (end && end->next) { end = end->next; } if (end) { end->next = p; } else { head = p; } return head; } void lrmd_list_freeall(lrmd_list_t * head) { lrmd_list_t *p; while (head) { char *val = (char *)head->val; p = head->next; free(val); free(head); head = p; } } lrmd_key_value_t * lrmd_key_value_add(lrmd_key_value_t * head, const char *key, const char *value) { lrmd_key_value_t *p, *end; p = calloc(1, sizeof(lrmd_key_value_t)); p->key = strdup(key); p->value = strdup(value); end = head; while (end && end->next) { end = end->next; } if (end) { end->next = p; } else { head = p; } return head; } void lrmd_key_value_freeall(lrmd_key_value_t * head) { lrmd_key_value_t *p; while (head) { p = head->next; free(head->key); free(head->value); free(head); head = p; } } static void dup_attr(gpointer key, gpointer value, gpointer user_data) { g_hash_table_replace(user_data, strdup(key), strdup(value)); } lrmd_event_data_t * lrmd_copy_event(lrmd_event_data_t * event) { lrmd_event_data_t *copy = NULL; copy = calloc(1, sizeof(lrmd_event_data_t)); /* This will get all the int values. * we just have to be careful not to leave any * dangling pointers to strings. */ memcpy(copy, event, sizeof(lrmd_event_data_t)); copy->rsc_id = event->rsc_id ? strdup(event->rsc_id) : NULL; copy->op_type = event->op_type ? strdup(event->op_type) : NULL; copy->user_data = event->user_data ? strdup(event->user_data) : NULL; copy->output = event->output ? strdup(event->output) : NULL; copy->exit_reason = event->exit_reason ? strdup(event->exit_reason) : NULL; copy->remote_nodename = event->remote_nodename ? strdup(event->remote_nodename) : NULL; if (event->params) { copy->params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if (copy->params != NULL) { g_hash_table_foreach(event->params, dup_attr, copy->params); } } if (event->versioned_params) { copy->versioned_params = copy_xml(event->versioned_params); } return copy; } void lrmd_free_event(lrmd_event_data_t * event) { if (!event) { return; } /* free gives me grief if i try to cast */ free((char *)event->rsc_id); free((char *)event->op_type); free((char *)event->user_data); free((char *)event->output); free((char *)event->exit_reason); free((char *)event->remote_nodename); if (event->params) { g_hash_table_destroy(event->params); } if (event->versioned_params) { free_xml(event->versioned_params); } free(event); } static int lrmd_dispatch_internal(lrmd_t * lrmd, xmlNode * msg) { const char *type; const char *proxy_session = crm_element_value(msg, F_LRMD_IPC_SESSION); lrmd_private_t *native = lrmd->private; lrmd_event_data_t event = { 0, }; if (proxy_session != NULL) { /* this is proxy business */ lrmd_internal_proxy_dispatch(lrmd, msg); return 1; } else if (!native->callback) { /* no callback set */ crm_trace("notify event received but client has not set callback"); return 1; } event.remote_nodename = native->remote_nodename; type = crm_element_value(msg, F_LRMD_OPERATION); crm_element_value_int(msg, F_LRMD_CALLID, &event.call_id); event.rsc_id = crm_element_value(msg, F_LRMD_RSC_ID); if (crm_str_eq(type, LRMD_OP_RSC_REG, TRUE)) { event.type = lrmd_event_register; } else if (crm_str_eq(type, LRMD_OP_RSC_UNREG, TRUE)) { event.type = lrmd_event_unregister; } else if (crm_str_eq(type, LRMD_OP_RSC_EXEC, TRUE)) { crm_element_value_int(msg, F_LRMD_TIMEOUT, &event.timeout); crm_element_value_int(msg, F_LRMD_RSC_INTERVAL, &event.interval); crm_element_value_int(msg, F_LRMD_RSC_START_DELAY, &event.start_delay); crm_element_value_int(msg, F_LRMD_EXEC_RC, (int *)&event.rc); crm_element_value_int(msg, F_LRMD_OP_STATUS, &event.op_status); crm_element_value_int(msg, F_LRMD_RSC_DELETED, &event.rsc_deleted); crm_element_value_int(msg, F_LRMD_RSC_RUN_TIME, (int *)&event.t_run); crm_element_value_int(msg, F_LRMD_RSC_RCCHANGE_TIME, (int *)&event.t_rcchange); crm_element_value_int(msg, F_LRMD_RSC_EXEC_TIME, (int *)&event.exec_time); crm_element_value_int(msg, F_LRMD_RSC_QUEUE_TIME, (int *)&event.queue_time); event.op_type = crm_element_value(msg, F_LRMD_RSC_ACTION); event.user_data = crm_element_value(msg, F_LRMD_RSC_USERDATA_STR); event.output = crm_element_value(msg, F_LRMD_RSC_OUTPUT); event.exit_reason = crm_element_value(msg, F_LRMD_RSC_EXIT_REASON); event.type = lrmd_event_exec_complete; event.params = xml2list(msg); event.versioned_params = first_named_child(msg, XML_TAG_VER_ATTRS); } else if (crm_str_eq(type, LRMD_OP_NEW_CLIENT, TRUE)) { event.type = lrmd_event_new_client; } else if (crm_str_eq(type, LRMD_OP_POKE, TRUE)) { event.type = lrmd_event_poke; } else { return 1; } crm_trace("op %s notify event received", type); native->callback(&event); if (event.params) { g_hash_table_destroy(event.params); } return 1; } static int lrmd_ipc_dispatch(const char *buffer, ssize_t length, gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->private; xmlNode *msg; int rc; if (!native->callback) { /* no callback set */ return 1; } msg = string2xml(buffer); rc = lrmd_dispatch_internal(lrmd, msg); free_xml(msg); return rc; } #ifdef HAVE_GNUTLS_GNUTLS_H static void lrmd_free_xml(gpointer userdata) { free_xml((xmlNode *) userdata); } static int lrmd_tls_connected(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->private; if (native->remote->tls_session) { return TRUE; } return FALSE; } static int lrmd_tls_dispatch(gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->private; xmlNode *xml = NULL; int rc = 0; int disconnected = 0; if (lrmd_tls_connected(lrmd) == FALSE) { crm_trace("tls dispatch triggered after disconnect"); return 0; } crm_trace("tls_dispatch triggered"); /* First check if there are any pending notifies to process that came * while we were waiting for replies earlier. */ if (native->pending_notify) { GList *iter = NULL; crm_trace("Processing pending notifies"); for (iter = native->pending_notify; iter; iter = iter->next) { lrmd_dispatch_internal(lrmd, iter->data); } g_list_free_full(native->pending_notify, lrmd_free_xml); native->pending_notify = NULL; } /* Next read the current buffer and see if there are any messages to handle. */ rc = crm_remote_ready(native->remote, 0); if (rc == 0) { /* nothing to read, see if any full messages are already in buffer. */ xml = crm_remote_parse_buffer(native->remote); } else if (rc < 0) { disconnected = 1; } else { crm_remote_recv(native->remote, -1, &disconnected); xml = crm_remote_parse_buffer(native->remote); } while (xml) { const char *msg_type = crm_element_value(xml, F_LRMD_REMOTE_MSG_TYPE); if (safe_str_eq(msg_type, "notify")) { lrmd_dispatch_internal(lrmd, xml); } else if (safe_str_eq(msg_type, "reply")) { if (native->expected_late_replies > 0) { native->expected_late_replies--; } else { int reply_id = 0; crm_element_value_int(xml, F_LRMD_CALLID, &reply_id); /* if this happens, we want to know about it */ crm_err("Got outdated reply %d", reply_id); } } free_xml(xml); xml = crm_remote_parse_buffer(native->remote); } if (disconnected) { crm_info("Server disconnected while reading remote server msg."); lrmd_tls_disconnect(lrmd); return 0; } return 1; } #endif /* Not used with mainloop */ int lrmd_poll(lrmd_t * lrmd, int timeout) { lrmd_private_t *native = lrmd->private; switch (native->type) { case CRM_CLIENT_IPC: return crm_ipc_ready(native->ipc); #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: if (native->pending_notify) { return 1; } return crm_remote_ready(native->remote, 0); #endif default: crm_err("Unsupported connection type: %d", native->type); } return 0; } /* Not used with mainloop */ bool lrmd_dispatch(lrmd_t * lrmd) { lrmd_private_t *private = NULL; CRM_ASSERT(lrmd != NULL); private = lrmd->private; switch (private->type) { case CRM_CLIENT_IPC: while (crm_ipc_ready(private->ipc)) { if (crm_ipc_read(private->ipc) > 0) { const char *msg = crm_ipc_buffer(private->ipc); lrmd_ipc_dispatch(msg, strlen(msg), lrmd); } } break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: lrmd_tls_dispatch(lrmd); break; #endif default: crm_err("Unsupported connection type: %d", private->type); } if (lrmd_api_is_connected(lrmd) == FALSE) { crm_err("Connection closed"); return FALSE; } return TRUE; } static xmlNode * lrmd_create_op(const char *token, const char *op, xmlNode * data, enum lrmd_call_options options) { xmlNode *op_msg = create_xml_node(NULL, "lrmd_command"); CRM_CHECK(op_msg != NULL, return NULL); CRM_CHECK(token != NULL, return NULL); crm_xml_add(op_msg, F_XML_TAGNAME, "lrmd_command"); crm_xml_add(op_msg, F_TYPE, T_LRMD); crm_xml_add(op_msg, F_LRMD_CALLBACK_TOKEN, token); crm_xml_add(op_msg, F_LRMD_OPERATION, op); crm_trace("Sending call options: %.8lx, %d", (long)options, options); crm_xml_add_int(op_msg, F_LRMD_CALLOPTS, options); if (data != NULL) { add_message_xml(op_msg, F_LRMD_CALLDATA, data); } return op_msg; } static void lrmd_ipc_connection_destroy(gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->private; crm_info("IPC connection destroyed"); /* Prevent these from being cleaned up in lrmd_api_disconnect() */ native->ipc = NULL; native->source = NULL; if (native->callback) { lrmd_event_data_t event = { 0, }; event.type = lrmd_event_disconnect; event.remote_nodename = native->remote_nodename; native->callback(&event); } } #ifdef HAVE_GNUTLS_GNUTLS_H static void lrmd_tls_connection_destroy(gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->private; crm_info("TLS connection destroyed"); if (native->remote->tls_session) { gnutls_bye(*native->remote->tls_session, GNUTLS_SHUT_RDWR); gnutls_deinit(*native->remote->tls_session); gnutls_free(native->remote->tls_session); } if (native->psk_cred_c) { gnutls_psk_free_client_credentials(native->psk_cred_c); } if (native->sock) { close(native->sock); } if (native->process_notify) { mainloop_destroy_trigger(native->process_notify); native->process_notify = NULL; } if (native->pending_notify) { g_list_free_full(native->pending_notify, lrmd_free_xml); native->pending_notify = NULL; } free(native->remote->buffer); native->remote->buffer = NULL; native->source = 0; native->sock = 0; native->psk_cred_c = NULL; native->remote->tls_session = NULL; native->sock = 0; if (native->callback) { lrmd_event_data_t event = { 0, }; event.remote_nodename = native->remote_nodename; event.type = lrmd_event_disconnect; native->callback(&event); } return; } int lrmd_tls_send_msg(crm_remote_t * session, xmlNode * msg, uint32_t id, const char *msg_type) { int rc = -1; crm_xml_add_int(msg, F_LRMD_REMOTE_MSG_ID, id); crm_xml_add(msg, F_LRMD_REMOTE_MSG_TYPE, msg_type); rc = crm_remote_send(session, msg); if (rc < 0) { crm_err("Failed to send remote lrmd tls msg, rc = %d", rc); return rc; } return rc; } static xmlNode * lrmd_tls_recv_reply(lrmd_t * lrmd, int total_timeout, int expected_reply_id, int *disconnected) { lrmd_private_t *native = lrmd->private; xmlNode *xml = NULL; time_t start = time(NULL); const char *msg_type = NULL; int reply_id = 0; int remaining_timeout = 0; /* A timeout of 0 here makes no sense. We have to wait a period of time * for the response to come back. If -1 or 0, default to 10 seconds. */ if (total_timeout <= 0 || total_timeout > MAX_TLS_RECV_WAIT) { total_timeout = MAX_TLS_RECV_WAIT; } while (!xml) { xml = crm_remote_parse_buffer(native->remote); if (!xml) { /* read some more off the tls buffer if we still have time left. */ if (remaining_timeout) { remaining_timeout = total_timeout - ((time(NULL) - start) * 1000); } else { remaining_timeout = total_timeout; } if (remaining_timeout <= 0) { crm_err("Never received the expected reply during the timeout period, disconnecting."); *disconnected = TRUE; return NULL; } crm_remote_recv(native->remote, remaining_timeout, disconnected); xml = crm_remote_parse_buffer(native->remote); if (!xml) { crm_err("Unable to receive expected reply, disconnecting."); *disconnected = TRUE; return NULL; } else if (*disconnected) { return NULL; } } CRM_ASSERT(xml != NULL); crm_element_value_int(xml, F_LRMD_REMOTE_MSG_ID, &reply_id); msg_type = crm_element_value(xml, F_LRMD_REMOTE_MSG_TYPE); if (!msg_type) { crm_err("Empty msg type received while waiting for reply"); free_xml(xml); xml = NULL; } else if (safe_str_eq(msg_type, "notify")) { /* got a notify while waiting for reply, trigger the notify to be processed later */ crm_info("queueing notify"); native->pending_notify = g_list_append(native->pending_notify, xml); if (native->process_notify) { crm_info("notify trigger set."); mainloop_set_trigger(native->process_notify); } xml = NULL; } else if (safe_str_neq(msg_type, "reply")) { /* msg isn't a reply, make some noise */ crm_err("Expected a reply, got %s", msg_type); free_xml(xml); xml = NULL; } else if (reply_id != expected_reply_id) { if (native->expected_late_replies > 0) { native->expected_late_replies--; } else { crm_err("Got outdated reply, expected id %d got id %d", expected_reply_id, reply_id); } free_xml(xml); xml = NULL; } } if (native->remote->buffer && native->process_notify) { mainloop_set_trigger(native->process_notify); } return xml; } static int lrmd_tls_send(lrmd_t * lrmd, xmlNode * msg) { int rc = 0; lrmd_private_t *native = lrmd->private; global_remote_msg_id++; if (global_remote_msg_id <= 0) { global_remote_msg_id = 1; } rc = lrmd_tls_send_msg(native->remote, msg, global_remote_msg_id, "request"); if (rc <= 0) { crm_err("Remote lrmd send failed, disconnecting"); lrmd_tls_disconnect(lrmd); return -ENOTCONN; } return pcmk_ok; } static int lrmd_tls_send_recv(lrmd_t * lrmd, xmlNode * msg, int timeout, xmlNode ** reply) { int rc = 0; int disconnected = 0; xmlNode *xml = NULL; if (lrmd_tls_connected(lrmd) == FALSE) { return -1; } rc = lrmd_tls_send(lrmd, msg); if (rc < 0) { return rc; } xml = lrmd_tls_recv_reply(lrmd, timeout, global_remote_msg_id, &disconnected); if (disconnected) { crm_err("Remote lrmd server disconnected while waiting for reply with id %d. ", global_remote_msg_id); lrmd_tls_disconnect(lrmd); rc = -ENOTCONN; } else if (!xml) { crm_err("Remote lrmd never received reply for request id %d. timeout: %dms ", global_remote_msg_id, timeout); rc = -ECOMM; } if (reply) { *reply = xml; } else { free_xml(xml); } return rc; } #endif static int lrmd_send_xml(lrmd_t * lrmd, xmlNode * msg, int timeout, xmlNode ** reply) { int rc = -1; lrmd_private_t *native = lrmd->private; switch (native->type) { case CRM_CLIENT_IPC: rc = crm_ipc_send(native->ipc, msg, crm_ipc_client_response, timeout, reply); break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: rc = lrmd_tls_send_recv(lrmd, msg, timeout, reply); break; #endif default: crm_err("Unsupported connection type: %d", native->type); } return rc; } static int lrmd_send_xml_no_reply(lrmd_t * lrmd, xmlNode * msg) { int rc = -1; lrmd_private_t *native = lrmd->private; switch (native->type) { case CRM_CLIENT_IPC: rc = crm_ipc_send(native->ipc, msg, crm_ipc_flags_none, 0, NULL); break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: rc = lrmd_tls_send(lrmd, msg); if (rc == pcmk_ok) { /* we don't want to wait around for the reply, but * since the request/reply protocol needs to behave the same * as libqb, a reply will eventually come later anyway. */ native->expected_late_replies++; } break; #endif default: crm_err("Unsupported connection type: %d", native->type); } return rc; } static int lrmd_api_is_connected(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->private; switch (native->type) { case CRM_CLIENT_IPC: return crm_ipc_connected(native->ipc); break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: return lrmd_tls_connected(lrmd); break; #endif default: crm_err("Unsupported connection type: %d", native->type); } return 0; } static int lrmd_send_command(lrmd_t * lrmd, const char *op, xmlNode * data, xmlNode ** output_data, int timeout, /* ms. defaults to 1000 if set to 0 */ enum lrmd_call_options options, gboolean expect_reply) { /* TODO we need to reduce usage of this boolean */ int rc = pcmk_ok; int reply_id = -1; lrmd_private_t *native = lrmd->private; xmlNode *op_msg = NULL; xmlNode *op_reply = NULL; if (!lrmd_api_is_connected(lrmd)) { return -ENOTCONN; } if (op == NULL) { crm_err("No operation specified"); return -EINVAL; } CRM_CHECK(native->token != NULL,; ); crm_trace("sending %s op to lrmd", op); op_msg = lrmd_create_op(native->token, op, data, options); if (op_msg == NULL) { return -EINVAL; } crm_xml_add_int(op_msg, F_LRMD_TIMEOUT, timeout); if (expect_reply) { rc = lrmd_send_xml(lrmd, op_msg, timeout, &op_reply); } else { rc = lrmd_send_xml_no_reply(lrmd, op_msg); goto done; } if (rc < 0) { crm_perror(LOG_ERR, "Couldn't perform %s operation (timeout=%d): %d", op, timeout, rc); rc = -ECOMM; goto done; } else if(op_reply == NULL) { rc = -ENOMSG; goto done; } rc = pcmk_ok; crm_element_value_int(op_reply, F_LRMD_CALLID, &reply_id); crm_trace("%s op reply received", op); if (crm_element_value_int(op_reply, F_LRMD_RC, &rc) != 0) { rc = -ENOMSG; goto done; } crm_log_xml_trace(op_reply, "Reply"); if (output_data) { *output_data = op_reply; op_reply = NULL; /* Prevent subsequent free */ } done: if (lrmd_api_is_connected(lrmd) == FALSE) { crm_err("LRMD disconnected"); } free_xml(op_msg); free_xml(op_reply); return rc; } static int lrmd_api_poke_connection(lrmd_t * lrmd) { int rc; lrmd_private_t *native = lrmd->private; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); rc = lrmd_send_command(lrmd, LRMD_OP_POKE, data, NULL, 0, 0, native->type == CRM_CLIENT_IPC ? TRUE : FALSE); free_xml(data); return rc < 0 ? rc : pcmk_ok; } int remote_proxy_check(lrmd_t * lrmd, GHashTable *hash) { int rc; const char *value; lrmd_private_t *native = lrmd->private; xmlNode *data = create_xml_node(NULL, F_LRMD_OPERATION); crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); value = g_hash_table_lookup(hash, "stonith-watchdog-timeout"); crm_xml_add(data, F_LRMD_WATCHDOG, value); rc = lrmd_send_command(lrmd, LRMD_OP_CHECK, data, NULL, 0, 0, native->type == CRM_CLIENT_IPC ? TRUE : FALSE); free_xml(data); return rc < 0 ? rc : pcmk_ok; } static int lrmd_handshake(lrmd_t * lrmd, const char *name) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->private; xmlNode *reply = NULL; xmlNode *hello = create_xml_node(NULL, "lrmd_command"); crm_xml_add(hello, F_TYPE, T_LRMD); crm_xml_add(hello, F_LRMD_OPERATION, CRM_OP_REGISTER); crm_xml_add(hello, F_LRMD_CLIENTNAME, name); crm_xml_add(hello, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION); /* advertise that we are a proxy provider */ if (native->proxy_callback) { crm_xml_add(hello, F_LRMD_IS_IPC_PROVIDER, "true"); } rc = lrmd_send_xml(lrmd, hello, -1, &reply); if (rc < 0) { crm_perror(LOG_DEBUG, "Couldn't complete registration with the lrmd API: %d", rc); rc = -ECOMM; } else if (reply == NULL) { crm_err("Did not receive registration reply"); rc = -EPROTO; } else { const char *version = crm_element_value(reply, F_LRMD_PROTOCOL_VERSION); const char *msg_type = crm_element_value(reply, F_LRMD_OPERATION); const char *tmp_ticket = crm_element_value(reply, F_LRMD_CLIENTID); crm_element_value_int(reply, F_LRMD_RC, &rc); if (rc == -EPROTO) { crm_err("LRMD protocol mismatch client version %s, server version %s", LRMD_PROTOCOL_VERSION, version); crm_log_xml_err(reply, "Protocol Error"); } else if (safe_str_neq(msg_type, CRM_OP_REGISTER)) { crm_err("Invalid registration message: %s", msg_type); crm_log_xml_err(reply, "Bad reply"); rc = -EPROTO; } else if (tmp_ticket == NULL) { crm_err("No registration token provided"); crm_log_xml_err(reply, "Bad reply"); rc = -EPROTO; } else { crm_trace("Obtained registration token: %s", tmp_ticket); native->token = strdup(tmp_ticket); native->peer_version = strdup(version?version:"1.0"); /* Included since 1.1 */ rc = pcmk_ok; } } free_xml(reply); free_xml(hello); if (rc != pcmk_ok) { lrmd_api_disconnect(lrmd); } return rc; } static int lrmd_ipc_connect(lrmd_t * lrmd, int *fd) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->private; static struct ipc_client_callbacks lrmd_callbacks = { .dispatch = lrmd_ipc_dispatch, .destroy = lrmd_ipc_connection_destroy }; crm_info("Connecting to lrmd"); if (fd) { /* No mainloop */ native->ipc = crm_ipc_new(CRM_SYSTEM_LRMD, 0); if (native->ipc && crm_ipc_connect(native->ipc)) { *fd = crm_ipc_get_fd(native->ipc); } else if (native->ipc) { crm_perror(LOG_ERR, "Connection to local resource manager failed"); rc = -ENOTCONN; } } else { native->source = mainloop_add_ipc_client(CRM_SYSTEM_LRMD, G_PRIORITY_HIGH, 0, lrmd, &lrmd_callbacks); native->ipc = mainloop_get_ipc_client(native->source); } if (native->ipc == NULL) { crm_debug("Could not connect to the LRMD API"); rc = -ENOTCONN; } return rc; } #ifdef HAVE_GNUTLS_GNUTLS_H static int set_key(gnutls_datum_t * key, const char *location) { FILE *stream; int read_len = 256; int cur_len = 0; int buf_len = read_len; static char *key_cache = NULL; static size_t key_cache_len = 0; static time_t key_cache_updated; if (location == NULL) { return -1; } if (key_cache) { time_t now = time(NULL); if ((now - key_cache_updated) < 60) { key->data = gnutls_malloc(key_cache_len + 1); key->size = key_cache_len; memcpy(key->data, key_cache, key_cache_len); crm_debug("using cached LRMD key"); return 0; } else { key_cache_len = 0; key_cache_updated = 0; free(key_cache); key_cache = NULL; crm_debug("clearing lrmd key cache"); } } stream = fopen(location, "r"); if (!stream) { return -1; } key->data = gnutls_malloc(read_len); while (!feof(stream)) { int next; if (cur_len == buf_len) { buf_len = cur_len + read_len; key->data = gnutls_realloc(key->data, buf_len); } next = fgetc(stream); if (next == EOF && feof(stream)) { break; } key->data[cur_len] = next; cur_len++; } fclose(stream); key->size = cur_len; if (!cur_len) { gnutls_free(key->data); key->data = 0; return -1; } if (!key_cache) { key_cache = calloc(1, key->size + 1); memcpy(key_cache, key->data, key->size); key_cache_len = key->size; key_cache_updated = time(NULL); } return 0; } int lrmd_tls_set_key(gnutls_datum_t * key) { int rc = 0; const char *specific_location = getenv("PCMK_authkey_location"); if (set_key(key, specific_location) == 0) { crm_debug("Using custom authkey location %s", specific_location); return 0; } else if (specific_location) { crm_err("No valid lrmd remote key found at %s, trying default location", specific_location); } if (set_key(key, DEFAULT_REMOTE_KEY_LOCATION) != 0) { rc = set_key(key, ALT_REMOTE_KEY_LOCATION); } if (rc) { crm_err("No valid lrmd remote key found at %s", DEFAULT_REMOTE_KEY_LOCATION); return -1; } return rc; } static void lrmd_gnutls_global_init(void) { static int gnutls_init = 0; if (!gnutls_init) { crm_gnutls_global_init(); } gnutls_init = 1; } #endif static void report_async_connection_result(lrmd_t * lrmd, int rc) { lrmd_private_t *native = lrmd->private; if (native->callback) { lrmd_event_data_t event = { 0, }; event.type = lrmd_event_connect; event.remote_nodename = native->remote_nodename; event.connection_rc = rc; native->callback(&event); } } #ifdef HAVE_GNUTLS_GNUTLS_H static void lrmd_tcp_connect_cb(void *userdata, int sock) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->private; char name[256] = { 0, }; static struct mainloop_fd_callbacks lrmd_tls_callbacks = { .dispatch = lrmd_tls_dispatch, .destroy = lrmd_tls_connection_destroy, }; int rc = sock; gnutls_datum_t psk_key = { NULL, 0 }; native->async_timer = 0; if (rc < 0) { lrmd_tls_connection_destroy(lrmd); crm_info("remote lrmd connect to %s at port %d failed", native->server, native->port); report_async_connection_result(lrmd, rc); return; } /* TODO continue with tls stuff now that tcp connect passed. make this async as well soon * to avoid all blocking code in the client. */ native->sock = sock; if (lrmd_tls_set_key(&psk_key) != 0) { lrmd_tls_connection_destroy(lrmd); return; } gnutls_psk_allocate_client_credentials(&native->psk_cred_c); gnutls_psk_set_client_credentials(native->psk_cred_c, DEFAULT_REMOTE_USERNAME, &psk_key, GNUTLS_PSK_KEY_RAW); gnutls_free(psk_key.data); native->remote->tls_session = create_psk_tls_session(sock, GNUTLS_CLIENT, native->psk_cred_c); if (crm_initiate_client_tls_handshake(native->remote, LRMD_CLIENT_HANDSHAKE_TIMEOUT) != 0) { crm_warn("Client tls handshake failed for server %s:%d. Disconnecting", native->server, native->port); gnutls_deinit(*native->remote->tls_session); gnutls_free(native->remote->tls_session); native->remote->tls_session = NULL; lrmd_tls_connection_destroy(lrmd); report_async_connection_result(lrmd, -1); return; } crm_info("Remote lrmd client TLS connection established with server %s:%d", native->server, native->port); snprintf(name, 128, "remote-lrmd-%s:%d", native->server, native->port); native->process_notify = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_tls_dispatch, lrmd); native->source = mainloop_add_fd(name, G_PRIORITY_HIGH, native->sock, lrmd, &lrmd_tls_callbacks); rc = lrmd_handshake(lrmd, name); report_async_connection_result(lrmd, rc); return; } static int lrmd_tls_connect_async(lrmd_t * lrmd, int timeout /*ms */ ) { int rc = -1; int sock = 0; int timer_id = 0; lrmd_private_t *native = lrmd->private; lrmd_gnutls_global_init(); sock = crm_remote_tcp_connect_async(native->server, native->port, timeout, &timer_id, lrmd, lrmd_tcp_connect_cb); if (sock != -1) { native->sock = sock; rc = 0; native->async_timer = timer_id; } return rc; } static int lrmd_tls_connect(lrmd_t * lrmd, int *fd) { static struct mainloop_fd_callbacks lrmd_tls_callbacks = { .dispatch = lrmd_tls_dispatch, .destroy = lrmd_tls_connection_destroy, }; lrmd_private_t *native = lrmd->private; int sock; gnutls_datum_t psk_key = { NULL, 0 }; lrmd_gnutls_global_init(); sock = crm_remote_tcp_connect(native->server, native->port); if (sock < 0) { crm_warn("Could not establish remote lrmd connection to %s", native->server); lrmd_tls_connection_destroy(lrmd); return -ENOTCONN; } native->sock = sock; if (lrmd_tls_set_key(&psk_key) != 0) { lrmd_tls_connection_destroy(lrmd); return -1; } gnutls_psk_allocate_client_credentials(&native->psk_cred_c); gnutls_psk_set_client_credentials(native->psk_cred_c, DEFAULT_REMOTE_USERNAME, &psk_key, GNUTLS_PSK_KEY_RAW); gnutls_free(psk_key.data); native->remote->tls_session = create_psk_tls_session(sock, GNUTLS_CLIENT, native->psk_cred_c); if (crm_initiate_client_tls_handshake(native->remote, LRMD_CLIENT_HANDSHAKE_TIMEOUT) != 0) { crm_err("Session creation for %s:%d failed", native->server, native->port); gnutls_deinit(*native->remote->tls_session); gnutls_free(native->remote->tls_session); native->remote->tls_session = NULL; lrmd_tls_connection_destroy(lrmd); return -1; } crm_info("Remote lrmd client TLS connection established with server %s:%d", native->server, native->port); if (fd) { *fd = sock; } else { char name[256] = { 0, }; snprintf(name, 128, "remote-lrmd-%s:%d", native->server, native->port); native->process_notify = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_tls_dispatch, lrmd); native->source = mainloop_add_fd(name, G_PRIORITY_HIGH, native->sock, lrmd, &lrmd_tls_callbacks); } return pcmk_ok; } #endif static int lrmd_api_connect(lrmd_t * lrmd, const char *name, int *fd) { int rc = -ENOTCONN; lrmd_private_t *native = lrmd->private; switch (native->type) { case CRM_CLIENT_IPC: rc = lrmd_ipc_connect(lrmd, fd); break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: rc = lrmd_tls_connect(lrmd, fd); break; #endif default: crm_err("Unsupported connection type: %d", native->type); } if (rc == pcmk_ok) { rc = lrmd_handshake(lrmd, name); } return rc; } static int lrmd_api_connect_async(lrmd_t * lrmd, const char *name, int timeout) { int rc = 0; lrmd_private_t *native = lrmd->private; if (!native->callback) { crm_err("Async connect not possible, no lrmd client callback set."); return -1; } switch (native->type) { case CRM_CLIENT_IPC: /* fake async connection with ipc. it should be fast * enough that we gain very little from async */ rc = lrmd_api_connect(lrmd, name, NULL); if (!rc) { report_async_connection_result(lrmd, rc); } break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: rc = lrmd_tls_connect_async(lrmd, timeout); if (rc) { /* connection failed, report rc now */ report_async_connection_result(lrmd, rc); } break; #endif default: crm_err("Unsupported connection type: %d", native->type); } return rc; } static void lrmd_ipc_disconnect(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->private; if (native->source != NULL) { /* Attached to mainloop */ mainloop_del_ipc_client(native->source); native->source = NULL; native->ipc = NULL; } else if (native->ipc) { /* Not attached to mainloop */ crm_ipc_t *ipc = native->ipc; native->ipc = NULL; crm_ipc_close(ipc); crm_ipc_destroy(ipc); } } #ifdef HAVE_GNUTLS_GNUTLS_H static void lrmd_tls_disconnect(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->private; if (native->remote->tls_session) { gnutls_bye(*native->remote->tls_session, GNUTLS_SHUT_RDWR); gnutls_deinit(*native->remote->tls_session); gnutls_free(native->remote->tls_session); native->remote->tls_session = 0; } if (native->async_timer) { g_source_remove(native->async_timer); native->async_timer = 0; } if (native->source != NULL) { /* Attached to mainloop */ mainloop_del_ipc_client(native->source); native->source = NULL; } else if (native->sock) { close(native->sock); native->sock = 0; } if (native->pending_notify) { g_list_free_full(native->pending_notify, lrmd_free_xml); native->pending_notify = NULL; } } #endif static int lrmd_api_disconnect(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->private; crm_info("Disconnecting from %d lrmd service", native->type); switch (native->type) { case CRM_CLIENT_IPC: lrmd_ipc_disconnect(lrmd); break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: lrmd_tls_disconnect(lrmd); break; #endif default: crm_err("Unsupported connection type: %d", native->type); } free(native->token); native->token = NULL; free(native->peer_version); native->peer_version = NULL; return 0; } static int lrmd_api_register_rsc(lrmd_t * lrmd, const char *rsc_id, const char *class, const char *provider, const char *type, enum lrmd_call_options options) { int rc = pcmk_ok; xmlNode *data = NULL; if (!class || !type || !rsc_id) { return -EINVAL; } - if (safe_str_eq(class, "ocf") && !provider) { + if (safe_str_eq(class, PCMK_RESOURCE_CLASS_OCF) && !provider) { return -EINVAL; } data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); crm_xml_add(data, F_LRMD_CLASS, class); crm_xml_add(data, F_LRMD_PROVIDER, provider); crm_xml_add(data, F_LRMD_TYPE, type); rc = lrmd_send_command(lrmd, LRMD_OP_RSC_REG, data, NULL, 0, options, TRUE); free_xml(data); return rc; } static int lrmd_api_unregister_rsc(lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options) { int rc = pcmk_ok; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); rc = lrmd_send_command(lrmd, LRMD_OP_RSC_UNREG, data, NULL, 0, options, TRUE); free_xml(data); return rc; } lrmd_rsc_info_t * lrmd_copy_rsc_info(lrmd_rsc_info_t * rsc_info) { lrmd_rsc_info_t *copy = NULL; copy = calloc(1, sizeof(lrmd_rsc_info_t)); copy->id = strdup(rsc_info->id); copy->type = strdup(rsc_info->type); copy->class = strdup(rsc_info->class); if (rsc_info->provider) { copy->provider = strdup(rsc_info->provider); } return copy; } void lrmd_free_rsc_info(lrmd_rsc_info_t * rsc_info) { if (!rsc_info) { return; } free(rsc_info->id); free(rsc_info->type); free(rsc_info->class); free(rsc_info->provider); free(rsc_info); } static lrmd_rsc_info_t * lrmd_api_get_rsc_info(lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options) { lrmd_rsc_info_t *rsc_info = NULL; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); xmlNode *output = NULL; const char *class = NULL; const char *provider = NULL; const char *type = NULL; crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); lrmd_send_command(lrmd, LRMD_OP_RSC_INFO, data, &output, 0, options, TRUE); free_xml(data); if (!output) { return NULL; } class = crm_element_value(output, F_LRMD_CLASS); provider = crm_element_value(output, F_LRMD_PROVIDER); type = crm_element_value(output, F_LRMD_TYPE); if (!class || !type) { free_xml(output); return NULL; - } else if (safe_str_eq(class, "ocf") && !provider) { + } else if (safe_str_eq(class, PCMK_RESOURCE_CLASS_OCF) && !provider) { free_xml(output); return NULL; } rsc_info = calloc(1, sizeof(lrmd_rsc_info_t)); rsc_info->id = strdup(rsc_id); rsc_info->class = strdup(class); if (provider) { rsc_info->provider = strdup(provider); } rsc_info->type = strdup(type); free_xml(output); return rsc_info; } static void lrmd_api_set_callback(lrmd_t * lrmd, lrmd_event_callback callback) { lrmd_private_t *native = lrmd->private; native->callback = callback; } void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg)) { lrmd_private_t *native = lrmd->private; native->proxy_callback = callback; native->proxy_callback_userdata = userdata; } void lrmd_internal_proxy_dispatch(lrmd_t *lrmd, xmlNode *msg) { lrmd_private_t *native = lrmd->private; if (native->proxy_callback) { crm_log_xml_trace(msg, "PROXY_INBOUND"); native->proxy_callback(lrmd, native->proxy_callback_userdata, msg); } } int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg) { if (lrmd == NULL) { return -ENOTCONN; } crm_xml_add(msg, F_LRMD_OPERATION, CRM_OP_IPC_FWD); crm_log_xml_trace(msg, "PROXY_OUTBOUND"); return lrmd_send_xml_no_reply(lrmd, msg); } static int stonith_get_metadata(const char *provider, const char *type, char **output) { int rc = pcmk_ok; stonith_t *stonith_api = stonith_api_new(); if(stonith_api) { stonith_api->cmds->metadata(stonith_api, st_opt_sync_call, type, provider, output, 0); stonith_api->cmds->free(stonith_api); } if (*output == NULL) { rc = -EIO; } return rc; } #define lsb_metadata_template \ "\n" \ "\n" \ "\n" \ " 1.0\n" \ " \n" \ " %s\n" \ " \n" \ " %s\n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " %s\n" \ " %s\n" \ " %s\n" \ " %s\n" \ " %s\n" \ " %s\n" \ " %s\n" \ " \n" \ "\n" #define LSB_INITSCRIPT_INFOBEGIN_TAG "### BEGIN INIT INFO" #define LSB_INITSCRIPT_INFOEND_TAG "### END INIT INFO" #define PROVIDES "# Provides:" #define REQ_START "# Required-Start:" #define REQ_STOP "# Required-Stop:" #define SHLD_START "# Should-Start:" #define SHLD_STOP "# Should-Stop:" #define DFLT_START "# Default-Start:" #define DFLT_STOP "# Default-Stop:" #define SHORT_DSCR "# Short-Description:" #define DESCRIPTION "# Description:" #define lsb_meta_helper_free_value(m) \ do { \ if ((m) != NULL) { \ xmlFree(m); \ (m) = NULL; \ } \ } while(0) /*! * \internal * \brief Grab an LSB header value * * \param[in] line Line read from LSB init script * \param[in/out] value If not set, will be set to XML-safe copy of value * \param[in] prefix Set value if line starts with this pattern * * \return TRUE if value was set, FALSE otherwise */ static inline gboolean lsb_meta_helper_get_value(const char *line, char **value, const char *prefix) { if (!*value && !strncasecmp(line, prefix, strlen(prefix))) { *value = (char *)xmlEncodeEntitiesReentrant(NULL, BAD_CAST line+strlen(prefix)); return TRUE; } return FALSE; } static int lsb_get_metadata(const char *type, char **output) { char ra_pathname[PATH_MAX] = { 0, }; FILE *fp; char buffer[1024]; char *provides = NULL; char *req_start = NULL; char *req_stop = NULL; char *shld_start = NULL; char *shld_stop = NULL; char *dflt_start = NULL; char *dflt_stop = NULL; char *s_dscrpt = NULL; char *xml_l_dscrpt = NULL; int offset = 0; int max = 2048; char description[max]; if(type[0] == '/') { snprintf(ra_pathname, sizeof(ra_pathname), "%s", type); } else { snprintf(ra_pathname, sizeof(ra_pathname), "%s/%s", LSB_ROOT_DIR, type); } crm_trace("Looking into %s", ra_pathname); if (!(fp = fopen(ra_pathname, "r"))) { return -errno; } /* Enter into the lsb-compliant comment block */ while (fgets(buffer, sizeof(buffer), fp)) { /* Now suppose each of the following eight arguments contain only one line */ if (lsb_meta_helper_get_value(buffer, &provides, PROVIDES)) { continue; } if (lsb_meta_helper_get_value(buffer, &req_start, REQ_START)) { continue; } if (lsb_meta_helper_get_value(buffer, &req_stop, REQ_STOP)) { continue; } if (lsb_meta_helper_get_value(buffer, &shld_start, SHLD_START)) { continue; } if (lsb_meta_helper_get_value(buffer, &shld_stop, SHLD_STOP)) { continue; } if (lsb_meta_helper_get_value(buffer, &dflt_start, DFLT_START)) { continue; } if (lsb_meta_helper_get_value(buffer, &dflt_stop, DFLT_STOP)) { continue; } if (lsb_meta_helper_get_value(buffer, &s_dscrpt, SHORT_DSCR)) { continue; } /* Long description may cross multiple lines */ if (offset == 0 && (0 == strncasecmp(buffer, DESCRIPTION, strlen(DESCRIPTION)))) { /* Between # and keyword, more than one space, or a tab * character, indicates the continuation line. * * Extracted from LSB init script standard */ while (fgets(buffer, sizeof(buffer), fp)) { if (!strncmp(buffer, "# ", 3) || !strncmp(buffer, "#\t", 2)) { buffer[0] = ' '; offset += snprintf(description+offset, max-offset, "%s", buffer); } else { fputs(buffer, fp); break; /* Long description ends */ } } continue; } if (xml_l_dscrpt == NULL && offset > 0) { xml_l_dscrpt = (char *)xmlEncodeEntitiesReentrant(NULL, BAD_CAST(description)); } if (!strncasecmp(buffer, LSB_INITSCRIPT_INFOEND_TAG, strlen(LSB_INITSCRIPT_INFOEND_TAG))) { /* Get to the out border of LSB comment block */ break; } if (buffer[0] != '#') { break; /* Out of comment block in the beginning */ } } fclose(fp); *output = crm_strdup_printf(lsb_metadata_template, type, (xml_l_dscrpt == NULL) ? type : xml_l_dscrpt, (s_dscrpt == NULL) ? type : s_dscrpt, (provides == NULL) ? "" : provides, (req_start == NULL) ? "" : req_start, (req_stop == NULL) ? "" : req_stop, (shld_start == NULL) ? "" : shld_start, (shld_stop == NULL) ? "" : shld_stop, (dflt_start == NULL) ? "" : dflt_start, (dflt_stop == NULL) ? "" : dflt_stop); lsb_meta_helper_free_value(xml_l_dscrpt); lsb_meta_helper_free_value(s_dscrpt); lsb_meta_helper_free_value(provides); lsb_meta_helper_free_value(req_start); lsb_meta_helper_free_value(req_stop); lsb_meta_helper_free_value(shld_start); lsb_meta_helper_free_value(shld_stop); lsb_meta_helper_free_value(dflt_start); lsb_meta_helper_free_value(dflt_stop); crm_trace("Created fake metadata: %llu", (unsigned long long) strlen(*output)); return pcmk_ok; } #if SUPPORT_NAGIOS static int nagios_get_metadata(const char *type, char **output) { int rc = pcmk_ok; FILE *file_strm = NULL; int start = 0, length = 0, read_len = 0; char *metadata_file = NULL; int len = 36; len += strlen(NAGIOS_METADATA_DIR); len += strlen(type); metadata_file = calloc(1, len); CRM_CHECK(metadata_file != NULL, return -ENOMEM); sprintf(metadata_file, "%s/%s.xml", NAGIOS_METADATA_DIR, type); file_strm = fopen(metadata_file, "r"); if (file_strm == NULL) { crm_err("Metadata file %s does not exist", metadata_file); free(metadata_file); return -EIO; } /* see how big the file is */ start = ftell(file_strm); fseek(file_strm, 0L, SEEK_END); length = ftell(file_strm); fseek(file_strm, 0L, start); CRM_ASSERT(length >= 0); CRM_ASSERT(start == ftell(file_strm)); if (length <= 0) { crm_info("%s was not valid", metadata_file); free(*output); *output = NULL; rc = -EIO; } else { crm_trace("Reading %d bytes from file", length); *output = calloc(1, (length + 1)); read_len = fread(*output, 1, length, file_strm); if (read_len != length) { crm_err("Calculated and read bytes differ: %d vs. %d", length, read_len); free(*output); *output = NULL; rc = -EIO; } } fclose(file_strm); free(metadata_file); return rc; } #endif #if SUPPORT_HEARTBEAT /* strictly speaking, support for class=heartbeat style scripts * does not require "heartbeat support" to be enabled. * But since those scripts are part of the "heartbeat" package usually, * and are very unlikely to be present in any other deployment, * I leave it inside this ifdef. * * Yes, I know, these are legacy and should die, * or at least be rewritten to be a proper OCF style agent. * But they exist, and custom scripts following these rules do, too. * * Taken from the old "glue" lrmd, see * http://hg.linux-ha.org/glue/file/0a7add1d9996/lib/plugins/lrm/raexechb.c#l49 * http://hg.linux-ha.org/glue/file/0a7add1d9996/lib/plugins/lrm/raexechb.c#l393 */ static const char hb_metadata_template[] = "\n" "\n" "\n" "1.0\n" "\n" "%s" "\n" "%s\n" "\n" "\n" "\n" "This argument will be passed as the first argument to the " "heartbeat resource agent (assuming it supports one)\n" "\n" "argv[1]\n" "\n" "\n" "\n" "\n" "This argument will be passed as the second argument to the " "heartbeat resource agent (assuming it supports one)\n" "\n" "argv[2]\n" "\n" "\n" "\n" "\n" "This argument will be passed as the third argument to the " "heartbeat resource agent (assuming it supports one)\n" "\n" "argv[3]\n" "\n" "\n" "\n" "\n" "This argument will be passed as the fourth argument to the " "heartbeat resource agent (assuming it supports one)\n" "\n" "argv[4]\n" "\n" "\n" "\n" "\n" "This argument will be passed as the fifth argument to the " "heartbeat resource agent (assuming it supports one)\n" "\n" "argv[5]\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n" "\n"; static int heartbeat_get_metadata(const char *type, char **output) { *output = crm_strdup_printf(hb_metadata_template, type, type, type); crm_trace("Created fake metadata: %llu", (unsigned long long) strlen(*output)); return pcmk_ok; } #endif static int generic_get_metadata(const char *standard, const char *provider, const char *type, char **output) { svc_action_t *action; action = resources_action_create(type, standard, provider, type, "meta-data", 0, 30000, NULL, 0); if (action == NULL) { crm_err("Unable to retrieve meta-data for %s:%s:%s", standard, provider, type); services_action_free(action); return -EINVAL; } if (!(services_action_sync(action))) { crm_err("Failed to retrieve meta-data for %s:%s:%s", standard, provider, type); services_action_free(action); return -EIO; } if (!action->stdout_data) { crm_err("Failed to receive meta-data for %s:%s:%s", standard, provider, type); services_action_free(action); return -EIO; } *output = strdup(action->stdout_data); services_action_free(action); return pcmk_ok; } static int lrmd_api_get_metadata(lrmd_t * lrmd, const char *class, const char *provider, const char *type, char **output, enum lrmd_call_options options) { if (!class || !type) { return -EINVAL; } - if (safe_str_eq(class, "service")) { + if (safe_str_eq(class, PCMK_RESOURCE_CLASS_SERVICE)) { class = resources_find_service_class(type); } - if (safe_str_eq(class, "stonith")) { + if (safe_str_eq(class, PCMK_RESOURCE_CLASS_STONITH)) { return stonith_get_metadata(provider, type, output); - } else if (safe_str_eq(class, "lsb")) { + } else if (safe_str_eq(class, PCMK_RESOURCE_CLASS_LSB)) { return lsb_get_metadata(type, output); #if SUPPORT_NAGIOS - } else if (safe_str_eq(class, "nagios")) { + } else if (safe_str_eq(class, PCMK_RESOURCE_CLASS_NAGIOS)) { return nagios_get_metadata(type, output); #endif #if SUPPORT_HEARTBEAT - } else if (safe_str_eq(class, "heartbeat")) { + } else if (safe_str_eq(class, PCMK_RESOURCE_CLASS_HB)) { return heartbeat_get_metadata(type, output); #endif } return generic_get_metadata(class, provider, type, output); } static int lrmd_api_exec(lrmd_t * lrmd, const char *rsc_id, const char *action, const char *userdata, int interval, /* ms */ int timeout, /* ms */ int start_delay, /* ms */ enum lrmd_call_options options, lrmd_key_value_t * params) { int rc = pcmk_ok; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); xmlNode *args = create_xml_node(data, XML_TAG_ATTRS); lrmd_key_value_t *tmp = NULL; const char *versioned_args_key = "#" XML_TAG_VER_ATTRS; crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); crm_xml_add(data, F_LRMD_RSC_ACTION, action); crm_xml_add(data, F_LRMD_RSC_USERDATA_STR, userdata); crm_xml_add_int(data, F_LRMD_RSC_INTERVAL, interval); crm_xml_add_int(data, F_LRMD_TIMEOUT, timeout); crm_xml_add_int(data, F_LRMD_RSC_START_DELAY, start_delay); for (tmp = params; tmp; tmp = tmp->next) { if (safe_str_eq(tmp->key, versioned_args_key)) { xmlNode *versioned_args = string2xml(tmp->value); if (versioned_args) { add_node_nocopy(data, NULL, versioned_args); } } else { hash2smartfield((gpointer) tmp->key, (gpointer) tmp->value, args); } } rc = lrmd_send_command(lrmd, LRMD_OP_RSC_EXEC, data, NULL, timeout, options, TRUE); free_xml(data); lrmd_key_value_freeall(params); return rc; } static int lrmd_api_cancel(lrmd_t * lrmd, const char *rsc_id, const char *action, int interval) { int rc = pcmk_ok; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add(data, F_LRMD_RSC_ACTION, action); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); crm_xml_add_int(data, F_LRMD_RSC_INTERVAL, interval); rc = lrmd_send_command(lrmd, LRMD_OP_RSC_CANCEL, data, NULL, 0, 0, TRUE); free_xml(data); return rc; } static int list_stonith_agents(lrmd_list_t ** resources) { int rc = 0; stonith_t *stonith_api = stonith_api_new(); stonith_key_value_t *stonith_resources = NULL; stonith_key_value_t *dIter = NULL; if(stonith_api) { stonith_api->cmds->list_agents(stonith_api, st_opt_sync_call, NULL, &stonith_resources, 0); stonith_api->cmds->free(stonith_api); } for (dIter = stonith_resources; dIter; dIter = dIter->next) { rc++; if (resources) { *resources = lrmd_list_add(*resources, dIter->value); } } stonith_key_value_freeall(stonith_resources, 1, 0); return rc; } static int lrmd_api_list_agents(lrmd_t * lrmd, lrmd_list_t ** resources, const char *class, const char *provider) { int rc = 0; - if (safe_str_eq(class, "stonith")) { + if (safe_str_eq(class, PCMK_RESOURCE_CLASS_STONITH)) { rc += list_stonith_agents(resources); } else { GListPtr gIter = NULL; GList *agents = resources_list_agents(class, provider); for (gIter = agents; gIter != NULL; gIter = gIter->next) { *resources = lrmd_list_add(*resources, (const char *)gIter->data); rc++; } g_list_free_full(agents, free); if (!class) { rc += list_stonith_agents(resources); } } if (rc == 0) { crm_notice("No agents found for class %s", class); rc = -EPROTONOSUPPORT; } return rc; } static int does_provider_have_agent(const char *agent, const char *provider, const char *class) { int found = 0; GList *agents = NULL; GListPtr gIter2 = NULL; agents = resources_list_agents(class, provider); for (gIter2 = agents; gIter2 != NULL; gIter2 = gIter2->next) { if (safe_str_eq(agent, gIter2->data)) { found = 1; } } g_list_free_full(agents, free); return found; } static int lrmd_api_list_ocf_providers(lrmd_t * lrmd, const char *agent, lrmd_list_t ** providers) { int rc = pcmk_ok; char *provider = NULL; GList *ocf_providers = NULL; GListPtr gIter = NULL; - ocf_providers = resources_list_providers("ocf"); + ocf_providers = resources_list_providers(PCMK_RESOURCE_CLASS_OCF); for (gIter = ocf_providers; gIter != NULL; gIter = gIter->next) { provider = gIter->data; - if (!agent || does_provider_have_agent(agent, provider, "ocf")) { + if (!agent || does_provider_have_agent(agent, provider, + PCMK_RESOURCE_CLASS_OCF)) { *providers = lrmd_list_add(*providers, (const char *)gIter->data); rc++; } } g_list_free_full(ocf_providers, free); return rc; } static int lrmd_api_list_standards(lrmd_t * lrmd, lrmd_list_t ** supported) { int rc = 0; GList *standards = NULL; GListPtr gIter = NULL; standards = resources_list_standards(); for (gIter = standards; gIter != NULL; gIter = gIter->next) { *supported = lrmd_list_add(*supported, (const char *)gIter->data); rc++; } if (list_stonith_agents(NULL) > 0) { - *supported = lrmd_list_add(*supported, "stonith"); + *supported = lrmd_list_add(*supported, PCMK_RESOURCE_CLASS_STONITH); rc++; } g_list_free_full(standards, free); return rc; } lrmd_t * lrmd_api_new(void) { lrmd_t *new_lrmd = NULL; lrmd_private_t *pvt = NULL; new_lrmd = calloc(1, sizeof(lrmd_t)); pvt = calloc(1, sizeof(lrmd_private_t)); pvt->remote = calloc(1, sizeof(crm_remote_t)); new_lrmd->cmds = calloc(1, sizeof(lrmd_api_operations_t)); pvt->type = CRM_CLIENT_IPC; new_lrmd->private = pvt; new_lrmd->cmds->connect = lrmd_api_connect; new_lrmd->cmds->connect_async = lrmd_api_connect_async; new_lrmd->cmds->is_connected = lrmd_api_is_connected; new_lrmd->cmds->poke_connection = lrmd_api_poke_connection; new_lrmd->cmds->disconnect = lrmd_api_disconnect; new_lrmd->cmds->register_rsc = lrmd_api_register_rsc; new_lrmd->cmds->unregister_rsc = lrmd_api_unregister_rsc; new_lrmd->cmds->get_rsc_info = lrmd_api_get_rsc_info; new_lrmd->cmds->set_callback = lrmd_api_set_callback; new_lrmd->cmds->get_metadata = lrmd_api_get_metadata; new_lrmd->cmds->exec = lrmd_api_exec; new_lrmd->cmds->cancel = lrmd_api_cancel; new_lrmd->cmds->list_agents = lrmd_api_list_agents; new_lrmd->cmds->list_ocf_providers = lrmd_api_list_ocf_providers; new_lrmd->cmds->list_standards = lrmd_api_list_standards; return new_lrmd; } lrmd_t * lrmd_remote_api_new(const char *nodename, const char *server, int port) { #ifdef HAVE_GNUTLS_GNUTLS_H lrmd_t *new_lrmd = lrmd_api_new(); lrmd_private_t *native = new_lrmd->private; if (!nodename && !server) { lrmd_api_delete(new_lrmd); return NULL; } native->type = CRM_CLIENT_TLS; native->remote_nodename = nodename ? strdup(nodename) : strdup(server); native->server = server ? strdup(server) : strdup(nodename); native->port = port; if (native->port == 0) { const char *remote_port_str = getenv("PCMK_remote_port"); native->port = remote_port_str ? atoi(remote_port_str) : DEFAULT_REMOTE_PORT; } return new_lrmd; #else crm_err("GNUTLS is not enabled for this build, remote LRMD client can not be created"); return NULL; #endif } void lrmd_api_delete(lrmd_t * lrmd) { if (!lrmd) { return; } lrmd->cmds->disconnect(lrmd); /* no-op if already disconnected */ free(lrmd->cmds); if (lrmd->private) { lrmd_private_t *native = lrmd->private; #ifdef HAVE_GNUTLS_GNUTLS_H free(native->server); #endif free(native->remote_nodename); free(native->remote); free(native->token); free(native->peer_version); } free(lrmd->private); free(lrmd); } diff --git a/lib/pengine/complex.c b/lib/pengine/complex.c index 7fe622f59e..a83929dce2 100644 --- a/lib/pengine/complex.c +++ b/lib/pengine/complex.c @@ -1,888 +1,888 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include void populate_hash(xmlNode * nvpair_list, GHashTable * hash, const char **attrs, int attrs_length); resource_object_functions_t resource_class_functions[] = { { native_unpack, native_find_rsc, native_parameter, native_print, native_active, native_resource_state, native_location, native_free}, { group_unpack, native_find_rsc, native_parameter, group_print, group_active, group_resource_state, native_location, group_free}, { clone_unpack, native_find_rsc, native_parameter, clone_print, clone_active, clone_resource_state, native_location, clone_free}, { master_unpack, native_find_rsc, native_parameter, clone_print, clone_active, clone_resource_state, native_location, clone_free} }; enum pe_obj_types get_resource_type(const char *name) { if (safe_str_eq(name, XML_CIB_TAG_RESOURCE)) { return pe_native; } else if (safe_str_eq(name, XML_CIB_TAG_GROUP)) { return pe_group; } else if (safe_str_eq(name, XML_CIB_TAG_INCARNATION)) { return pe_clone; } else if (safe_str_eq(name, XML_CIB_TAG_MASTER)) { return pe_master; } return pe_unknown; } const char * get_resource_typename(enum pe_obj_types type) { switch (type) { case pe_native: return XML_CIB_TAG_RESOURCE; case pe_group: return XML_CIB_TAG_GROUP; case pe_clone: return XML_CIB_TAG_INCARNATION; case pe_master: return XML_CIB_TAG_MASTER; case pe_unknown: return "unknown"; } return ""; } static void dup_attr(gpointer key, gpointer value, gpointer user_data) { add_hash_param(user_data, key, value); } void get_meta_attributes(GHashTable * meta_hash, resource_t * rsc, node_t * node, pe_working_set_t * data_set) { GHashTable *node_hash = NULL; const char *version = crm_element_value(data_set->input, XML_ATTR_CRM_VERSION); if (node) { node_hash = node->details->attrs; } if (rsc->xml) { xmlAttrPtr xIter = NULL; for (xIter = rsc->xml->properties; xIter; xIter = xIter->next) { const char *prop_name = (const char *)xIter->name; const char *prop_value = crm_element_value(rsc->xml, prop_name); add_hash_param(meta_hash, prop_name, prop_value); } } unpack_instance_attributes(data_set->input, rsc->xml, XML_TAG_META_SETS, node_hash, meta_hash, NULL, FALSE, data_set->now); if(version == NULL || compare_version(version, "3.0.9") < 0) { /* populate from the regular attributes until the GUI can create * meta attributes */ unpack_instance_attributes(data_set->input, rsc->xml, XML_TAG_ATTR_SETS, node_hash, meta_hash, NULL, FALSE, data_set->now); } /* set anything else based on the parent */ if (rsc->parent != NULL) { g_hash_table_foreach(rsc->parent->meta, dup_attr, meta_hash); } /* and finally check the defaults */ unpack_instance_attributes(data_set->input, data_set->rsc_defaults, XML_TAG_META_SETS, node_hash, meta_hash, NULL, FALSE, data_set->now); } void get_rsc_attributes(GHashTable * meta_hash, resource_t * rsc, node_t * node, pe_working_set_t * data_set) { GHashTable *node_hash = NULL; if (node) { node_hash = node->details->attrs; } unpack_instance_attributes(data_set->input, rsc->xml, XML_TAG_ATTR_SETS, node_hash, meta_hash, NULL, FALSE, data_set->now); /* set anything else based on the parent */ if (rsc->parent != NULL) { get_rsc_attributes(meta_hash, rsc->parent, node, data_set); } else { /* and finally check the defaults */ unpack_instance_attributes(data_set->input, data_set->rsc_defaults, XML_TAG_ATTR_SETS, node_hash, meta_hash, NULL, FALSE, data_set->now); } } void pe_get_versioned_attributes(xmlNode * meta_hash, resource_t * rsc, node_t * node, pe_working_set_t * data_set) { GHashTable *node_hash = NULL; if (node) { node_hash = node->details->attrs; } pe_unpack_versioned_attributes(data_set->input, rsc->xml, XML_TAG_ATTR_SETS, node_hash, meta_hash, data_set->now); /* set anything else based on the parent */ if (rsc->parent != NULL) { pe_get_versioned_attributes(meta_hash, rsc->parent, node, data_set); } else { /* and finally check the defaults */ pe_unpack_versioned_attributes(data_set->input, data_set->rsc_defaults, XML_TAG_ATTR_SETS, node_hash, meta_hash, data_set->now); } } static char * template_op_key(xmlNode * op) { const char *name = crm_element_value(op, "name"); const char *role = crm_element_value(op, "role"); char *key = NULL; if (role == NULL || crm_str_eq(role, RSC_ROLE_STARTED_S, TRUE) || crm_str_eq(role, RSC_ROLE_SLAVE_S, TRUE)) { role = RSC_ROLE_UNKNOWN_S; } key = crm_concat(name, role, '-'); return key; } static gboolean unpack_template(xmlNode * xml_obj, xmlNode ** expanded_xml, pe_working_set_t * data_set) { xmlNode *cib_resources = NULL; xmlNode *template = NULL; xmlNode *new_xml = NULL; xmlNode *child_xml = NULL; xmlNode *rsc_ops = NULL; xmlNode *template_ops = NULL; const char *template_ref = NULL; const char *clone = NULL; const char *id = NULL; if (xml_obj == NULL) { pe_err("No resource object for template unpacking"); return FALSE; } template_ref = crm_element_value(xml_obj, XML_CIB_TAG_RSC_TEMPLATE); if (template_ref == NULL) { return TRUE; } id = ID(xml_obj); if (id == NULL) { pe_err("'%s' object must have a id", crm_element_name(xml_obj)); return FALSE; } if (crm_str_eq(template_ref, id, TRUE)) { pe_err("The resource object '%s' should not reference itself", id); return FALSE; } cib_resources = get_xpath_object("//"XML_CIB_TAG_RESOURCES, data_set->input, LOG_TRACE); if (cib_resources == NULL) { pe_err("No resources configured"); return FALSE; } template = find_entity(cib_resources, XML_CIB_TAG_RSC_TEMPLATE, template_ref); if (template == NULL) { pe_err("No template named '%s'", template_ref); return FALSE; } new_xml = copy_xml(template); xmlNodeSetName(new_xml, xml_obj->name); crm_xml_replace(new_xml, XML_ATTR_ID, id); clone = crm_element_value(xml_obj, XML_RSC_ATTR_INCARNATION); if(clone) { crm_xml_add(new_xml, XML_RSC_ATTR_INCARNATION, clone); } template_ops = find_xml_node(new_xml, "operations", FALSE); for (child_xml = __xml_first_child(xml_obj); child_xml != NULL; child_xml = __xml_next_element(child_xml)) { xmlNode *new_child = NULL; new_child = add_node_copy(new_xml, child_xml); if (crm_str_eq((const char *)new_child->name, "operations", TRUE)) { rsc_ops = new_child; } } if (template_ops && rsc_ops) { xmlNode *op = NULL; GHashTable *rsc_ops_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, NULL); for (op = __xml_first_child(rsc_ops); op != NULL; op = __xml_next_element(op)) { char *key = template_op_key(op); g_hash_table_insert(rsc_ops_hash, key, op); } for (op = __xml_first_child(template_ops); op != NULL; op = __xml_next_element(op)) { char *key = template_op_key(op); if (g_hash_table_lookup(rsc_ops_hash, key) == NULL) { add_node_copy(rsc_ops, op); } free(key); } if (rsc_ops_hash) { g_hash_table_destroy(rsc_ops_hash); } free_xml(template_ops); } /*free_xml(*expanded_xml); */ *expanded_xml = new_xml; /* Disable multi-level templates for now */ /*if(unpack_template(new_xml, expanded_xml, data_set) == FALSE) { free_xml(*expanded_xml); *expanded_xml = NULL; return FALSE; } */ return TRUE; } static gboolean add_template_rsc(xmlNode * xml_obj, pe_working_set_t * data_set) { const char *template_ref = NULL; const char *id = NULL; if (xml_obj == NULL) { pe_err("No resource object for processing resource list of template"); return FALSE; } template_ref = crm_element_value(xml_obj, XML_CIB_TAG_RSC_TEMPLATE); if (template_ref == NULL) { return TRUE; } id = ID(xml_obj); if (id == NULL) { pe_err("'%s' object must have a id", crm_element_name(xml_obj)); return FALSE; } if (crm_str_eq(template_ref, id, TRUE)) { pe_err("The resource object '%s' should not reference itself", id); return FALSE; } if (add_tag_ref(data_set->template_rsc_sets, template_ref, id) == FALSE) { return FALSE; } return TRUE; } static void handle_rsc_isolation(resource_t *rsc) { resource_t *top = uber_parent(rsc); resource_t *iso = rsc; const char *wrapper = NULL; const char *value; /* check for isolation wrapper mapping if the parent doesn't have one set * isolation mapping is enabled by default. For safety, we are allowing isolation * to be disabled by setting the meta attr, isolation=false. */ value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_ISOLATION); if (top->isolation_wrapper == NULL && (value == NULL || crm_is_true(value))) { if (g_hash_table_lookup(rsc->parameters, "pcmk_docker_image")) { wrapper = "docker-wrapper"; } /* add more isolation technologies here as we expand */ } else if (top->isolation_wrapper) { goto set_rsc_opts; } if (wrapper == NULL) { return; } /* if this is a cloned primitive/group, go head and set the isolation wrapper at * at the clone level. this is really the only sane thing to do in this situation. * This allows someone to clone an isolated resource without having to shuffle * around the isolation attributes to the clone parent */ if (top == rsc->parent && top->variant >= pe_clone) { iso = top; } iso->isolation_wrapper = wrapper; set_bit(top->flags, pe_rsc_unique); set_rsc_opts: clear_bit(rsc->flags, pe_rsc_allow_migrate); set_bit(rsc->flags, pe_rsc_unique); if (top->variant >= pe_clone) { add_hash_param(rsc->meta, XML_RSC_ATTR_UNIQUE, XML_BOOLEAN_TRUE); } } gboolean common_unpack(xmlNode * xml_obj, resource_t ** rsc, resource_t * parent, pe_working_set_t * data_set) { bool isdefault = FALSE; xmlNode *expanded_xml = NULL; xmlNode *ops = NULL; resource_t *top = NULL; const char *value = NULL; const char *rclass = NULL; /* Look for this after any templates have been expanded */ const char *id = crm_element_value(xml_obj, XML_ATTR_ID); int container_remote_node = 0; int baremetal_remote_node = 0; crm_log_xml_trace(xml_obj, "Processing resource input..."); if (id == NULL) { pe_err("Must specify id tag in "); return FALSE; } else if (rsc == NULL) { pe_err("Nowhere to unpack resource into"); return FALSE; } if (unpack_template(xml_obj, &expanded_xml, data_set) == FALSE) { return FALSE; } *rsc = calloc(1, sizeof(resource_t)); (*rsc)->cluster = data_set; if (expanded_xml) { crm_log_xml_trace(expanded_xml, "Expanded resource..."); (*rsc)->xml = expanded_xml; (*rsc)->orig_xml = xml_obj; } else { (*rsc)->xml = xml_obj; (*rsc)->orig_xml = NULL; } /* Do not use xml_obj from here on, use (*rsc)->xml in case templates are involved */ rclass = crm_element_value((*rsc)->xml, XML_AGENT_ATTR_CLASS); (*rsc)->parent = parent; ops = find_xml_node((*rsc)->xml, "operations", FALSE); (*rsc)->ops_xml = expand_idref(ops, data_set->input); (*rsc)->variant = get_resource_type(crm_element_name((*rsc)->xml)); if ((*rsc)->variant == pe_unknown) { pe_err("Unknown resource type: %s", crm_element_name((*rsc)->xml)); free(*rsc); return FALSE; } (*rsc)->parameters = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); (*rsc)->versioned_parameters = create_xml_node(NULL, XML_TAG_VER_ATTRS); (*rsc)->meta = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); (*rsc)->allowed_nodes = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, g_hash_destroy_str); (*rsc)->known_on = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, g_hash_destroy_str); value = crm_element_value((*rsc)->xml, XML_RSC_ATTR_INCARNATION); if (value) { (*rsc)->id = crm_concat(id, value, ':'); add_hash_param((*rsc)->meta, XML_RSC_ATTR_INCARNATION, value); } else { (*rsc)->id = strdup(id); } (*rsc)->fns = &resource_class_functions[(*rsc)->variant]; pe_rsc_trace((*rsc), "Unpacking resource..."); get_meta_attributes((*rsc)->meta, *rsc, NULL, data_set); get_rsc_attributes((*rsc)->parameters, *rsc, NULL, data_set); pe_get_versioned_attributes((*rsc)->versioned_parameters, *rsc, NULL, data_set); (*rsc)->flags = 0; set_bit((*rsc)->flags, pe_rsc_runnable); set_bit((*rsc)->flags, pe_rsc_provisional); if (is_set(data_set->flags, pe_flag_is_managed_default)) { set_bit((*rsc)->flags, pe_rsc_managed); } (*rsc)->rsc_cons = NULL; (*rsc)->rsc_tickets = NULL; (*rsc)->actions = NULL; (*rsc)->role = RSC_ROLE_STOPPED; (*rsc)->next_role = RSC_ROLE_UNKNOWN; (*rsc)->recovery_type = recovery_stop_start; (*rsc)->stickiness = data_set->default_resource_stickiness; (*rsc)->migration_threshold = INFINITY; (*rsc)->failure_timeout = 0; value = g_hash_table_lookup((*rsc)->meta, XML_CIB_ATTR_PRIORITY); (*rsc)->priority = crm_parse_int(value, "0"); (*rsc)->effective_priority = (*rsc)->priority; value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_NOTIFY); if (crm_is_true(value)) { set_bit((*rsc)->flags, pe_rsc_notify); } if (xml_contains_remote_node((*rsc)->xml)) { if (g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_CONTAINER)) { container_remote_node = 1; } else { baremetal_remote_node = 1; } } value = g_hash_table_lookup((*rsc)->meta, XML_OP_ATTR_ALLOW_MIGRATE); if (crm_is_true(value) && xml_has_children((*rsc)->versioned_parameters)) { pe_rsc_trace((*rsc), "Migration is disabled for resources with versioned parameters"); } else if (crm_is_true(value)) { set_bit((*rsc)->flags, pe_rsc_allow_migrate); } else if (value == NULL && baremetal_remote_node && !xml_has_children((*rsc)->versioned_parameters)) { /* by default, we want baremetal remote-nodes to be able * to float around the cluster without having to stop all the * resources within the remote-node before moving. Allowing * migration support enables this feature. If this ever causes * problems, migration support can be explicitly turned off with * allow-migrate=false. * We don't support migration for versioned resources, though. */ set_bit((*rsc)->flags, pe_rsc_allow_migrate); } value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_MANAGED); if (value != NULL && safe_str_neq("default", value)) { gboolean bool_value = TRUE; crm_str_to_boolean(value, &bool_value); if (bool_value == FALSE) { clear_bit((*rsc)->flags, pe_rsc_managed); } else { set_bit((*rsc)->flags, pe_rsc_managed); } } value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_MAINTENANCE); if (value != NULL && safe_str_neq("default", value)) { gboolean bool_value = FALSE; crm_str_to_boolean(value, &bool_value); if (bool_value == TRUE) { clear_bit((*rsc)->flags, pe_rsc_managed); set_bit((*rsc)->flags, pe_rsc_maintenance); } } else if (is_set(data_set->flags, pe_flag_maintenance_mode)) { clear_bit((*rsc)->flags, pe_rsc_managed); set_bit((*rsc)->flags, pe_rsc_maintenance); } pe_rsc_trace((*rsc), "Options for %s", (*rsc)->id); handle_rsc_isolation(*rsc); top = uber_parent(*rsc); value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_UNIQUE); if (crm_is_true(value) || top->variant < pe_clone) { set_bit((*rsc)->flags, pe_rsc_unique); } value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_RESTART); if (safe_str_eq(value, "restart")) { (*rsc)->restart_type = pe_restart_restart; pe_rsc_trace((*rsc), "\tDependency restart handling: restart"); } else { (*rsc)->restart_type = pe_restart_ignore; pe_rsc_trace((*rsc), "\tDependency restart handling: ignore"); } value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_MULTIPLE); if (safe_str_eq(value, "stop_only")) { (*rsc)->recovery_type = recovery_stop_only; pe_rsc_trace((*rsc), "\tMultiple running resource recovery: stop only"); } else if (safe_str_eq(value, "block")) { (*rsc)->recovery_type = recovery_block; pe_rsc_trace((*rsc), "\tMultiple running resource recovery: block"); } else { (*rsc)->recovery_type = recovery_stop_start; pe_rsc_trace((*rsc), "\tMultiple running resource recovery: stop/start"); } value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_STICKINESS); if (value != NULL && safe_str_neq("default", value)) { (*rsc)->stickiness = char2score(value); } value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_FAIL_STICKINESS); if (value != NULL && safe_str_neq("default", value)) { (*rsc)->migration_threshold = char2score(value); } else if (value == NULL) { /* Make a best-effort guess at a migration threshold for people with 0.6 configs * try with underscores and hyphens, from both the resource and global defaults section */ value = g_hash_table_lookup((*rsc)->meta, "resource-failure-stickiness"); if (value == NULL) { value = g_hash_table_lookup((*rsc)->meta, "resource_failure_stickiness"); } if (value == NULL) { value = g_hash_table_lookup(data_set->config_hash, "default-resource-failure-stickiness"); } if (value == NULL) { value = g_hash_table_lookup(data_set->config_hash, "default_resource_failure_stickiness"); } if (value) { int fail_sticky = char2score(value); if (fail_sticky == -INFINITY) { (*rsc)->migration_threshold = 1; pe_rsc_info((*rsc), "Set a migration threshold of %d for %s based on a failure-stickiness of %s", (*rsc)->migration_threshold, (*rsc)->id, value); } else if ((*rsc)->stickiness != 0 && fail_sticky != 0) { (*rsc)->migration_threshold = (*rsc)->stickiness / fail_sticky; if ((*rsc)->migration_threshold < 0) { /* Make sure it's positive */ (*rsc)->migration_threshold = 0 - (*rsc)->migration_threshold; } (*rsc)->migration_threshold += 1; pe_rsc_info((*rsc), "Calculated a migration threshold for %s of %d based on a stickiness of %d/%s", (*rsc)->id, (*rsc)->migration_threshold, (*rsc)->stickiness, value); } } } - if (safe_str_eq(rclass, "stonith")) { + if (safe_str_eq(rclass, PCMK_RESOURCE_CLASS_STONITH)) { set_bit(data_set->flags, pe_flag_have_stonith_resource); set_bit((*rsc)->flags, pe_rsc_fence_device); } value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_REQUIRES); handle_requires_pref: if (safe_str_eq(value, "nothing")) { } else if (safe_str_eq(value, "quorum")) { set_bit((*rsc)->flags, pe_rsc_needs_quorum); } else if (safe_str_eq(value, "unfencing")) { if (is_set((*rsc)->flags, pe_rsc_fence_device)) { crm_config_warn("%s is a fencing device but requires (un)fencing", (*rsc)->id); value = "quorum"; isdefault = TRUE; goto handle_requires_pref; } else if (is_not_set(data_set->flags, pe_flag_stonith_enabled)) { crm_config_warn("%s requires (un)fencing but fencing is disabled", (*rsc)->id); value = "quorum"; isdefault = TRUE; goto handle_requires_pref; } else { set_bit((*rsc)->flags, pe_rsc_needs_fencing); set_bit((*rsc)->flags, pe_rsc_needs_unfencing); } } else if (safe_str_eq(value, "fencing")) { set_bit((*rsc)->flags, pe_rsc_needs_fencing); if (is_not_set(data_set->flags, pe_flag_stonith_enabled)) { crm_config_warn("%s requires fencing but fencing is disabled", (*rsc)->id); } } else { if (value) { crm_config_err("Invalid value for %s->requires: %s%s", (*rsc)->id, value, is_set(data_set->flags, pe_flag_stonith_enabled) ? "" : " (stonith-enabled=false)"); } isdefault = TRUE; if(is_set((*rsc)->flags, pe_rsc_fence_device)) { value = "quorum"; } else if (is_set(data_set->flags, pe_flag_enable_unfencing)) { value = "unfencing"; } else if (is_set(data_set->flags, pe_flag_stonith_enabled)) { value = "fencing"; } else if (data_set->no_quorum_policy == no_quorum_ignore) { value = "nothing"; } else { value = "quorum"; } goto handle_requires_pref; } pe_rsc_trace((*rsc), "\tRequired to start: %s%s", value, isdefault?" (default)":""); value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_FAIL_TIMEOUT); if (value != NULL) { /* call crm_get_msec() and convert back to seconds */ (*rsc)->failure_timeout = (crm_get_msec(value) / 1000); } if (baremetal_remote_node) { value = g_hash_table_lookup((*rsc)->parameters, XML_REMOTE_ATTR_RECONNECT_INTERVAL); if (value) { /* reconnect delay works by setting failure_timeout and preventing the * connection from starting until the failure is cleared. */ (*rsc)->remote_reconnect_interval = (crm_get_msec(value) / 1000); /* we want to override any default failure_timeout in use when remote * reconnect_interval is in use. */ (*rsc)->failure_timeout = (*rsc)->remote_reconnect_interval; } } get_target_role(*rsc, &((*rsc)->next_role)); pe_rsc_trace((*rsc), "\tDesired next state: %s", (*rsc)->next_role != RSC_ROLE_UNKNOWN ? role2text((*rsc)->next_role) : "default"); if ((*rsc)->fns->unpack(*rsc, data_set) == FALSE) { return FALSE; } if (is_set(data_set->flags, pe_flag_symmetric_cluster)) { resource_location(*rsc, NULL, 0, "symmetric_default", data_set); } else if (container_remote_node) { /* remote resources tied to a container resource must always be allowed * to opt-in to the cluster. Whether the connection resource is actually * allowed to be placed on a node is dependent on the container resource */ resource_location(*rsc, NULL, 0, "remote_connection_default", data_set); } pe_rsc_trace((*rsc), "\tAction notification: %s", is_set((*rsc)->flags, pe_rsc_notify) ? "required" : "not required"); (*rsc)->utilization = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); unpack_instance_attributes(data_set->input, (*rsc)->xml, XML_TAG_UTILIZATION, NULL, (*rsc)->utilization, NULL, FALSE, data_set->now); /* data_set->resources = g_list_append(data_set->resources, (*rsc)); */ if (expanded_xml) { if (add_template_rsc(xml_obj, data_set) == FALSE) { return FALSE; } } return TRUE; } void common_update_score(resource_t * rsc, const char *id, int score) { node_t *node = NULL; node = pe_hash_table_lookup(rsc->allowed_nodes, id); if (node != NULL) { pe_rsc_trace(rsc, "Updating score for %s on %s: %d + %d", rsc->id, id, node->weight, score); node->weight = merge_weights(node->weight, score); } if (rsc->children) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; common_update_score(child_rsc, id, score); } } } gboolean is_parent(resource_t *child, resource_t *rsc) { resource_t *parent = child; if (parent == NULL || rsc == NULL) { return FALSE; } while (parent->parent != NULL) { if (parent->parent == rsc) { return TRUE; } parent = parent->parent; } return FALSE; } resource_t * uber_parent(resource_t * rsc) { resource_t *parent = rsc; if (parent == NULL) { return NULL; } while (parent->parent != NULL) { parent = parent->parent; } return parent; } void common_free(resource_t * rsc) { if (rsc == NULL) { return; } pe_rsc_trace(rsc, "Freeing %s %d", rsc->id, rsc->variant); g_list_free(rsc->rsc_cons); g_list_free(rsc->rsc_cons_lhs); g_list_free(rsc->rsc_tickets); g_list_free(rsc->dangling_migrations); if (rsc->parameters != NULL) { g_hash_table_destroy(rsc->parameters); } if (rsc->versioned_parameters != NULL) { free_xml(rsc->versioned_parameters); } if (rsc->meta != NULL) { g_hash_table_destroy(rsc->meta); } if (rsc->utilization != NULL) { g_hash_table_destroy(rsc->utilization); } if (rsc->parent == NULL && is_set(rsc->flags, pe_rsc_orphan)) { free_xml(rsc->xml); rsc->xml = NULL; free_xml(rsc->orig_xml); rsc->orig_xml = NULL; /* if rsc->orig_xml, then rsc->xml is an expanded xml from a template */ } else if (rsc->orig_xml) { free_xml(rsc->xml); rsc->xml = NULL; } if (rsc->running_on) { g_list_free(rsc->running_on); rsc->running_on = NULL; } if (rsc->known_on) { g_hash_table_destroy(rsc->known_on); rsc->known_on = NULL; } if (rsc->actions) { g_list_free(rsc->actions); rsc->actions = NULL; } if (rsc->allowed_nodes) { g_hash_table_destroy(rsc->allowed_nodes); rsc->allowed_nodes = NULL; } g_list_free(rsc->fillers); g_list_free(rsc->rsc_location); pe_rsc_trace(rsc, "Resource freed"); free(rsc->id); free(rsc->clone_name); free(rsc->allocated_to); free(rsc->variant_opaque); free(rsc->pending_task); free(rsc); } diff --git a/lib/pengine/native.c b/lib/pengine/native.c index 11febf48cd..5f3cfc343d 100644 --- a/lib/pengine/native.c +++ b/lib/pengine/native.c @@ -1,936 +1,936 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #define VARIANT_NATIVE 1 #include "./variant.h" void native_add_running(resource_t * rsc, node_t * node, pe_working_set_t * data_set) { GListPtr gIter = rsc->running_on; CRM_CHECK(node != NULL, return); for (; gIter != NULL; gIter = gIter->next) { node_t *a_node = (node_t *) gIter->data; CRM_CHECK(a_node != NULL, return); if (safe_str_eq(a_node->details->id, node->details->id)) { return; } } pe_rsc_trace(rsc, "Adding %s to %s %s", rsc->id, node->details->uname, is_set(rsc->flags, pe_rsc_managed)?"":"(unmanaged)"); rsc->running_on = g_list_append(rsc->running_on, node); if (rsc->variant == pe_native) { node->details->running_rsc = g_list_append(node->details->running_rsc, rsc); } if (rsc->variant == pe_native && node->details->maintenance) { clear_bit(rsc->flags, pe_rsc_managed); } if (is_not_set(rsc->flags, pe_rsc_managed)) { resource_t *p = rsc->parent; pe_rsc_info(rsc, "resource %s isn't managed", rsc->id); resource_location(rsc, node, INFINITY, "not_managed_default", data_set); while(p && node->details->online) { /* add without the additional location constraint */ p->running_on = g_list_append(p->running_on, node); p = p->parent; } return; } if (rsc->variant == pe_native && g_list_length(rsc->running_on) > 1) { switch (rsc->recovery_type) { case recovery_stop_only: { GHashTableIter gIter; node_t *local_node = NULL; /* make sure it doesn't come up again */ g_hash_table_destroy(rsc->allowed_nodes); rsc->allowed_nodes = node_hash_from_list(data_set->nodes); g_hash_table_iter_init(&gIter, rsc->allowed_nodes); while (g_hash_table_iter_next(&gIter, NULL, (void **)&local_node)) { local_node->weight = -INFINITY; } } break; case recovery_stop_start: break; case recovery_block: clear_bit(rsc->flags, pe_rsc_managed); set_bit(rsc->flags, pe_rsc_block); /* If the group that the resource belongs to is configured with multiple-active=block, */ /* block the whole group. */ if (rsc->parent && rsc->parent->variant == pe_group && rsc->parent->recovery_type == recovery_block) { GListPtr gIter = rsc->parent->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; clear_bit(child->flags, pe_rsc_managed); set_bit(child->flags, pe_rsc_block); } } break; } crm_debug("%s is active on %d nodes including %s: %s", rsc->id, g_list_length(rsc->running_on), node->details->uname, recovery2text(rsc->recovery_type)); } else { pe_rsc_trace(rsc, "Resource %s is active on: %s", rsc->id, node->details->uname); } if (rsc->parent != NULL) { native_add_running(rsc->parent, node, data_set); } } extern void force_non_unique_clone(resource_t * rsc, const char *rid, pe_working_set_t * data_set); gboolean native_unpack(resource_t * rsc, pe_working_set_t * data_set) { resource_t *parent = uber_parent(rsc); native_variant_data_t *native_data = NULL; const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); pe_rsc_trace(rsc, "Processing resource %s...", rsc->id); native_data = calloc(1, sizeof(native_variant_data_t)); rsc->variant_opaque = native_data; if (is_set(rsc->flags, pe_rsc_unique) && rsc->parent) { - if (safe_str_eq(class, "lsb")) { + if (safe_str_eq(class, PCMK_RESOURCE_CLASS_LSB)) { resource_t *top = uber_parent(rsc); force_non_unique_clone(top, rsc->id, data_set); } } - if (safe_str_eq(class, "ocf") == FALSE) { + if (safe_str_eq(class, PCMK_RESOURCE_CLASS_OCF) == FALSE) { const char *stateful = g_hash_table_lookup(parent->meta, "stateful"); if (safe_str_eq(stateful, XML_BOOLEAN_TRUE)) { pe_err ("Resource %s is of type %s and therefore cannot be used as a master/slave resource", rsc->id, class); return FALSE; } } return TRUE; } resource_t * native_find_rsc(resource_t * rsc, const char *id, node_t * on_node, int flags) { gboolean match = FALSE; resource_t *result = NULL; GListPtr gIter = rsc->children; CRM_ASSERT(id != NULL); if (flags & pe_find_clone) { const char *rid = ID(rsc->xml); if (rsc->parent == NULL) { match = FALSE; } else if (safe_str_eq(rsc->id, id)) { match = TRUE; } else if (safe_str_eq(rid, id)) { match = TRUE; } } else { if (strcmp(rsc->id, id) == 0) { match = TRUE; } else if (is_set(flags, pe_find_renamed) && rsc->clone_name && strcmp(rsc->clone_name, id) == 0) { match = TRUE; } } if (match && on_node) { pe_rsc_trace(rsc, "Now checking %s is on %s", rsc->id, on_node->details->uname); if (is_set(flags, pe_find_current) && rsc->running_on) { GListPtr gIter = rsc->running_on; for (; gIter != NULL; gIter = gIter->next) { node_t *loc = (node_t *) gIter->data; if (loc->details == on_node->details) { return rsc; } } } else if (is_set(flags, pe_find_inactive) && rsc->running_on == NULL) { return rsc; } else if (is_not_set(flags, pe_find_current) && rsc->allocated_to && rsc->allocated_to->details == on_node->details) { return rsc; } } else if (match) { return rsc; } for (; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; result = rsc->fns->find_rsc(child, id, on_node, flags); if (result) { return result; } } return NULL; } char * native_parameter(resource_t * rsc, node_t * node, gboolean create, const char *name, pe_working_set_t * data_set) { char *value_copy = NULL; const char *value = NULL; GHashTable *hash = rsc->parameters; GHashTable *local_hash = NULL; CRM_CHECK(rsc != NULL, return NULL); CRM_CHECK(name != NULL && strlen(name) != 0, return NULL); pe_rsc_trace(rsc, "Looking up %s in %s", name, rsc->id); if (create || g_hash_table_size(rsc->parameters) == 0) { if (node != NULL) { pe_rsc_trace(rsc, "Creating hash with node %s", node->details->uname); } else { pe_rsc_trace(rsc, "Creating default hash"); } local_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); get_rsc_attributes(local_hash, rsc, node, data_set); hash = local_hash; } value = g_hash_table_lookup(hash, name); if (value == NULL) { /* try meta attributes instead */ value = g_hash_table_lookup(rsc->meta, name); } if (value != NULL) { value_copy = strdup(value); } if (local_hash != NULL) { g_hash_table_destroy(local_hash); } return value_copy; } gboolean native_active(resource_t * rsc, gboolean all) { GListPtr gIter = rsc->running_on; for (; gIter != NULL; gIter = gIter->next) { node_t *a_node = (node_t *) gIter->data; if (a_node->details->unclean) { crm_debug("Resource %s: node %s is unclean", rsc->id, a_node->details->uname); return TRUE; } else if (a_node->details->online == FALSE) { crm_debug("Resource %s: node %s is offline", rsc->id, a_node->details->uname); } else { crm_debug("Resource %s active on %s", rsc->id, a_node->details->uname); return TRUE; } } return FALSE; } struct print_data_s { long options; void *print_data; }; static void native_print_attr(gpointer key, gpointer value, gpointer user_data) { long options = ((struct print_data_s *)user_data)->options; void *print_data = ((struct print_data_s *)user_data)->print_data; status_print("Option: %s = %s\n", (char *)key, (char *)value); } static const char * native_pending_state(resource_t * rsc) { const char *pending_state = NULL; if (safe_str_eq(rsc->pending_task, CRMD_ACTION_START)) { pending_state = "Starting"; } else if (safe_str_eq(rsc->pending_task, CRMD_ACTION_STOP)) { pending_state = "Stopping"; } else if (safe_str_eq(rsc->pending_task, CRMD_ACTION_MIGRATE)) { pending_state = "Migrating"; } else if (safe_str_eq(rsc->pending_task, CRMD_ACTION_MIGRATED)) { /* Work might be done in here. */ pending_state = "Migrating"; } else if (safe_str_eq(rsc->pending_task, CRMD_ACTION_PROMOTE)) { pending_state = "Promoting"; } else if (safe_str_eq(rsc->pending_task, CRMD_ACTION_DEMOTE)) { pending_state = "Demoting"; } return pending_state; } static const char * native_pending_task(resource_t * rsc) { const char *pending_task = NULL; if (safe_str_eq(rsc->pending_task, CRMD_ACTION_NOTIFY)) { /* "Notifying" is not very useful to be shown. */ pending_task = NULL; } else if (safe_str_eq(rsc->pending_task, CRMD_ACTION_STATUS)) { pending_task = "Monitoring"; /* Pending probes are not printed, even if pending * operations are requested. If someone ever requests that * behavior, uncomment this and the corresponding part of * unpack.c:unpack_rsc_op(). */ /* } else if (safe_str_eq(rsc->pending_task, "probe")) { pending_task = "Checking"; */ } return pending_task; } static enum rsc_role_e native_displayable_role(resource_t *rsc) { enum rsc_role_e role = rsc->role; if ((role == RSC_ROLE_STARTED) && (uber_parent(rsc)->variant == pe_master)) { role = RSC_ROLE_SLAVE; } return role; } static const char * native_displayable_state(resource_t *rsc, long options) { const char *rsc_state = NULL; if (options & pe_print_pending) { rsc_state = native_pending_state(rsc); } if (rsc_state == NULL) { rsc_state = role2text(native_displayable_role(rsc)); } return rsc_state; } static void native_print_xml(resource_t * rsc, const char *pre_text, long options, void *print_data) { const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); const char *prov = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); const char *rsc_state = native_displayable_state(rsc, options); const char *target_role = NULL; /* resource information. */ status_print("%sxml, XML_ATTR_TYPE)); status_print("role=\"%s\" ", rsc_state); if (rsc->meta) { target_role = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); } if (target_role) { status_print("target_role=\"%s\" ", target_role); } status_print("active=\"%s\" ", rsc->fns->active(rsc, TRUE) ? "true" : "false"); status_print("orphaned=\"%s\" ", is_set(rsc->flags, pe_rsc_orphan) ? "true" : "false"); status_print("blocked=\"%s\" ", is_set(rsc->flags, pe_rsc_block) ? "true" : "false"); status_print("managed=\"%s\" ", is_set(rsc->flags, pe_rsc_managed) ? "true" : "false"); status_print("failed=\"%s\" ", is_set(rsc->flags, pe_rsc_failed) ? "true" : "false"); status_print("failure_ignored=\"%s\" ", is_set(rsc->flags, pe_rsc_failure_ignored) ? "true" : "false"); status_print("nodes_running_on=\"%d\" ", g_list_length(rsc->running_on)); if (options & pe_print_pending) { const char *pending_task = native_pending_task(rsc); if (pending_task) { status_print("pending=\"%s\" ", pending_task); } } if (options & pe_print_dev) { status_print("provisional=\"%s\" ", is_set(rsc->flags, pe_rsc_provisional) ? "true" : "false"); status_print("runnable=\"%s\" ", is_set(rsc->flags, pe_rsc_runnable) ? "true" : "false"); status_print("priority=\"%f\" ", (double)rsc->priority); status_print("variant=\"%s\" ", crm_element_name(rsc->xml)); } /* print out the nodes this resource is running on */ if (options & pe_print_rsconly) { status_print("/>\n"); /* do nothing */ } else if (g_list_length(rsc->running_on) > 0) { GListPtr gIter = rsc->running_on; status_print(">\n"); for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; status_print("%s \n", pre_text, node->details->uname, node->details->id, node->details->online ? "false" : "true"); } status_print("%s\n", pre_text); } else { status_print("/>\n"); } } /* making this inline rather than a macro prevents a coverity "unreachable" * warning on the first usage */ static inline const char * comma_if(int i) { return i? ", " : ""; } void native_print(resource_t * rsc, const char *pre_text, long options, void *print_data) { node_t *node = NULL; const char *desc = NULL; const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); const char *kind = crm_element_value(rsc->xml, XML_ATTR_TYPE); const char *target_role = NULL; enum rsc_role_e role = native_displayable_role(rsc); int offset = 0; int flagOffset = 0; char buffer[LINE_MAX]; char flagBuffer[LINE_MAX]; CRM_ASSERT(rsc->variant == pe_native); CRM_ASSERT(kind != NULL); if (rsc->meta) { const char *is_internal = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INTERNAL_RSC); if (crm_is_true(is_internal)) { crm_trace("skipping print of internal resource %s", rsc->id); return; } target_role = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); } if (pre_text == NULL && (options & pe_print_printf)) { pre_text = " "; } if (options & pe_print_xml) { native_print_xml(rsc, pre_text, options, print_data); return; } if (rsc->running_on != NULL) { node = rsc->running_on->data; } if ((options & pe_print_rsconly) || g_list_length(rsc->running_on) > 1) { node = NULL; } if (options & pe_print_html) { if (is_not_set(rsc->flags, pe_rsc_managed)) { status_print(""); } else if (is_set(rsc->flags, pe_rsc_failed)) { status_print(""); } else if (rsc->variant == pe_native && g_list_length(rsc->running_on) == 0) { status_print(""); } else if (g_list_length(rsc->running_on) > 1) { status_print(""); } else if (is_set(rsc->flags, pe_rsc_failure_ignored)) { status_print(""); } else { status_print(""); } } if(pre_text) { offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", pre_text); } offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", rsc_printable_id(rsc)); offset += snprintf(buffer + offset, LINE_MAX - offset, "\t(%s", class); - if (safe_str_eq(class, "ocf")) { + if (safe_str_eq(class, PCMK_RESOURCE_CLASS_OCF)) { const char *prov = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); offset += snprintf(buffer + offset, LINE_MAX - offset, "::%s", prov); } offset += snprintf(buffer + offset, LINE_MAX - offset, ":%s):\t", kind); if(is_set(rsc->flags, pe_rsc_orphan)) { offset += snprintf(buffer + offset, LINE_MAX - offset, " ORPHANED "); } if(role > RSC_ROLE_SLAVE && is_set(rsc->flags, pe_rsc_failed)) { offset += snprintf(buffer + offset, LINE_MAX - offset, "FAILED %s", role2text(role)); } else if(is_set(rsc->flags, pe_rsc_failed)) { offset += snprintf(buffer + offset, LINE_MAX - offset, "FAILED"); } else { const char *rsc_state = native_displayable_state(rsc, options); offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", rsc_state); } if(node) { offset += snprintf(buffer + offset, LINE_MAX - offset, " %s", node->details->uname); if (node->details->online == FALSE && node->details->unclean) { flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset, "%sUNCLEAN", comma_if(flagOffset)); } } if (options & pe_print_pending) { const char *pending_task = native_pending_task(rsc); if (pending_task) { flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset, "%s%s", comma_if(flagOffset), pending_task); } } if (target_role) { enum rsc_role_e target_role_e = text2role(target_role); /* Ignore target role Started, as it is the default anyways * (and would also allow a Master to be Master). * Show if target role limits our abilities. */ if (target_role_e == RSC_ROLE_STOPPED) { flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset, "%sdisabled", comma_if(flagOffset)); rsc->cluster->disabled_resources++; } else if (uber_parent(rsc)->variant == pe_master && target_role_e == RSC_ROLE_SLAVE) { flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset, "%starget-role:%s", comma_if(flagOffset), target_role); rsc->cluster->disabled_resources++; } } if (is_set(rsc->flags, pe_rsc_block)) { flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset, "%sblocked", comma_if(flagOffset)); rsc->cluster->blocked_resources++; } else if (is_not_set(rsc->flags, pe_rsc_managed)) { flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset, "%sunmanaged", comma_if(flagOffset)); } if(is_set(rsc->flags, pe_rsc_failure_ignored)) { flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset, "%sfailure ignored", comma_if(flagOffset)); } if ((options & pe_print_rsconly) || g_list_length(rsc->running_on) > 1) { desc = crm_element_value(rsc->xml, XML_ATTR_DESC); } CRM_LOG_ASSERT(offset > 0); if(flagOffset > 0) { status_print("%s (%s)%s%s", buffer, flagBuffer, desc?" ":"", desc?desc:""); } else { status_print("%s%s%s", buffer, desc?" ":"", desc?desc:""); } #if CURSES_ENABLED if ((options & pe_print_rsconly) || g_list_length(rsc->running_on) > 1) { /* Done */ } else if (options & pe_print_ncurses) { /* coverity[negative_returns] False positive */ move(-1, 0); } #endif if (options & pe_print_html) { status_print(" "); } if ((options & pe_print_rsconly)) { } else if (g_list_length(rsc->running_on) > 1) { GListPtr gIter = rsc->running_on; int counter = 0; if (options & pe_print_html) { status_print("
    \n"); } else if ((options & pe_print_printf) || (options & pe_print_ncurses)) { status_print("["); } for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; counter++; if (options & pe_print_html) { status_print("
  • \n%s", node->details->uname); } else if ((options & pe_print_printf) || (options & pe_print_ncurses)) { status_print(" %s", node->details->uname); } else if ((options & pe_print_log)) { status_print("\t%d : %s", counter, node->details->uname); } else { status_print("%s", node->details->uname); } if (options & pe_print_html) { status_print("
  • \n"); } } if (options & pe_print_html) { status_print("
\n"); } else if ((options & pe_print_printf) || (options & pe_print_ncurses)) { status_print(" ]"); } } if (options & pe_print_html) { status_print("
\n"); } else if (options & pe_print_suppres_nl) { /* nothing */ } else if ((options & pe_print_printf) || (options & pe_print_ncurses)) { status_print("\n"); } if (options & pe_print_details) { struct print_data_s pdata; pdata.options = options; pdata.print_data = print_data; g_hash_table_foreach(rsc->parameters, native_print_attr, &pdata); } if (options & pe_print_dev) { GHashTableIter iter; node_t *node = NULL; status_print("%s\t(%s%svariant=%s, priority=%f)", pre_text, is_set(rsc->flags, pe_rsc_provisional) ? "provisional, " : "", is_set(rsc->flags, pe_rsc_runnable) ? "" : "non-startable, ", crm_element_name(rsc->xml), (double)rsc->priority); status_print("%s\tAllowed Nodes", pre_text); g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { status_print("%s\t * %s %d", pre_text, node->details->uname, node->weight); } } if (options & pe_print_max_details) { GHashTableIter iter; node_t *node = NULL; status_print("%s\t=== Allowed Nodes\n", pre_text); g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { print_node("\t", node, FALSE); } } } void native_free(resource_t * rsc) { pe_rsc_trace(rsc, "Freeing resource action list (not the data)"); common_free(rsc); } enum rsc_role_e native_resource_state(const resource_t * rsc, gboolean current) { enum rsc_role_e role = rsc->next_role; if (current) { role = rsc->role; } pe_rsc_trace(rsc, "%s state: %s", rsc->id, role2text(role)); return role; } node_t * native_location(resource_t * rsc, GListPtr * list, gboolean current) { node_t *one = NULL; GListPtr result = NULL; if (rsc->children) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; child->fns->location(child, &result, current); } } else if (current && rsc->running_on) { result = g_list_copy(rsc->running_on); } else if (current == FALSE && rsc->allocated_to) { result = g_list_append(NULL, rsc->allocated_to); } if (result && g_list_length(result) == 1) { one = g_list_nth_data(result, 0); } if (list) { GListPtr gIter = result; for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; if (*list == NULL || pe_find_node_id(*list, node->details->id) == NULL) { *list = g_list_append(*list, node); } } } g_list_free(result); return one; } static void get_rscs_brief(GListPtr rsc_list, GHashTable * rsc_table, GHashTable * active_table) { GListPtr gIter = rsc_list; for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); const char *kind = crm_element_value(rsc->xml, XML_ATTR_TYPE); int offset = 0; char buffer[LINE_MAX]; int *rsc_counter = NULL; int *active_counter = NULL; if (rsc->variant != pe_native) { continue; } offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", class); - if (safe_str_eq(class, "ocf")) { + if (safe_str_eq(class, PCMK_RESOURCE_CLASS_OCF)) { const char *prov = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); offset += snprintf(buffer + offset, LINE_MAX - offset, "::%s", prov); } offset += snprintf(buffer + offset, LINE_MAX - offset, ":%s", kind); CRM_LOG_ASSERT(offset > 0); if (rsc_table) { rsc_counter = g_hash_table_lookup(rsc_table, buffer); if (rsc_counter == NULL) { rsc_counter = calloc(1, sizeof(int)); *rsc_counter = 0; g_hash_table_insert(rsc_table, strdup(buffer), rsc_counter); } (*rsc_counter)++; } if (active_table) { GListPtr gIter2 = rsc->running_on; for (; gIter2 != NULL; gIter2 = gIter2->next) { node_t *node = (node_t *) gIter2->data; GHashTable *node_table = NULL; if (node->details->unclean == FALSE && node->details->online == FALSE) { continue; } node_table = g_hash_table_lookup(active_table, node->details->uname); if (node_table == NULL) { node_table = g_hash_table_new_full(crm_str_hash, g_str_equal, free, free); g_hash_table_insert(active_table, strdup(node->details->uname), node_table); } active_counter = g_hash_table_lookup(node_table, buffer); if (active_counter == NULL) { active_counter = calloc(1, sizeof(int)); *active_counter = 0; g_hash_table_insert(node_table, strdup(buffer), active_counter); } (*active_counter)++; } } } } static void destroy_node_table(gpointer data) { GHashTable *node_table = data; if (node_table) { g_hash_table_destroy(node_table); } } void print_rscs_brief(GListPtr rsc_list, const char *pre_text, long options, void *print_data, gboolean print_all) { GHashTable *rsc_table = g_hash_table_new_full(crm_str_hash, g_str_equal, free, free); GHashTable *active_table = g_hash_table_new_full(crm_str_hash, g_str_equal, free, destroy_node_table); GHashTableIter hash_iter; char *type = NULL; int *rsc_counter = NULL; get_rscs_brief(rsc_list, rsc_table, active_table); g_hash_table_iter_init(&hash_iter, rsc_table); while (g_hash_table_iter_next(&hash_iter, (gpointer *)&type, (gpointer *)&rsc_counter)) { GHashTableIter hash_iter2; char *node_name = NULL; GHashTable *node_table = NULL; int active_counter_all = 0; g_hash_table_iter_init(&hash_iter2, active_table); while (g_hash_table_iter_next(&hash_iter2, (gpointer *)&node_name, (gpointer *)&node_table)) { int *active_counter = g_hash_table_lookup(node_table, type); if (active_counter == NULL || *active_counter == 0) { continue; } else { active_counter_all += *active_counter; } if (options & pe_print_rsconly) { node_name = NULL; } if (options & pe_print_html) { status_print("
  • \n"); } if (print_all) { status_print("%s%d/%d\t(%s):\tActive %s\n", pre_text ? pre_text : "", active_counter ? *active_counter : 0, rsc_counter ? *rsc_counter : 0, type, active_counter && (*active_counter > 0) && node_name ? node_name : ""); } else { status_print("%s%d\t(%s):\tActive %s\n", pre_text ? pre_text : "", active_counter ? *active_counter : 0, type, active_counter && (*active_counter > 0) && node_name ? node_name : ""); } if (options & pe_print_html) { status_print("
  • \n"); } } if (print_all && active_counter_all == 0) { if (options & pe_print_html) { status_print("
  • \n"); } status_print("%s%d/%d\t(%s):\tActive\n", pre_text ? pre_text : "", active_counter_all, rsc_counter ? *rsc_counter : 0, type); if (options & pe_print_html) { status_print("
  • \n"); } } } if (rsc_table) { g_hash_table_destroy(rsc_table); rsc_table = NULL; } if (active_table) { g_hash_table_destroy(active_table); active_table = NULL; } } diff --git a/lib/pengine/remote.c b/lib/pengine/remote.c index e8d9bab627..c5a3eec1a7 100644 --- a/lib/pengine/remote.c +++ b/lib/pengine/remote.c @@ -1,133 +1,134 @@ /* * Copyright (C) 2013 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include gboolean is_rsc_baremetal_remote_node(resource_t *rsc, pe_working_set_t * data_set) { node_t *node; if (rsc == NULL) { return FALSE; } else if (rsc->is_remote_node == FALSE) { return FALSE; } node = pe_find_node(data_set->nodes, rsc->id); if (node == NULL) { return FALSE; } return is_baremetal_remote_node(node); } gboolean is_baremetal_remote_node(node_t *node) { if (is_remote_node(node) && (node->details->remote_rsc == FALSE || node->details->remote_rsc->container == FALSE)) { return TRUE; } return FALSE; } gboolean is_container_remote_node(node_t *node) { if (is_remote_node(node) && (node->details->remote_rsc && node->details->remote_rsc->container)) { return TRUE; } return FALSE; } gboolean is_remote_node(node_t *node) { if (node && node->details->type == node_remote) { return TRUE; } return FALSE; } resource_t * rsc_contains_remote_node(pe_working_set_t * data_set, resource_t *rsc) { if (is_set(data_set->flags, pe_flag_have_remote_nodes) == FALSE) { return NULL; } if (rsc->fillers) { GListPtr gIter = NULL; for (gIter = rsc->fillers; gIter != NULL; gIter = gIter->next) { resource_t *filler = (resource_t *) gIter->data; if (filler->is_remote_node) { return filler; } } } return NULL; } gboolean xml_contains_remote_node(xmlNode *xml) { const char *class = crm_element_value(xml, XML_AGENT_ATTR_CLASS); const char *provider = crm_element_value(xml, XML_AGENT_ATTR_PROVIDER); const char *agent = crm_element_value(xml, XML_ATTR_TYPE); - if (safe_str_eq(agent, "remote") && safe_str_eq(provider, "pacemaker") && safe_str_eq(class, "ocf")) { + if (safe_str_eq(agent, "remote") && safe_str_eq(provider, "pacemaker") + && safe_str_eq(class, PCMK_RESOURCE_CLASS_OCF)) { return TRUE; } return FALSE; } /*! * \internal * \brief Execute a supplied function for each guest node running on a host * * \param[in] data_set Working set for cluster * \param[in] host Host node to check * \param[in] helper Function to call for each guest node * \param[in/out] user_data Pointer to pass to helper function */ void pe_foreach_guest_node(const pe_working_set_t *data_set, const node_t *host, void (*helper)(const node_t*, void*), void *user_data) { GListPtr iter; CRM_CHECK(data_set && host && host->details && helper, return); if (!is_set(data_set->flags, pe_flag_have_remote_nodes)) { return; } for (iter = host->details->running_rsc; iter != NULL; iter = iter->next) { resource_t *rsc = (resource_t *) iter->data; if (rsc->is_remote_node && (rsc->container != NULL)) { node_t *guest_node = pe_find_node(data_set->nodes, rsc->id); if (guest_node) { (*helper)(guest_node, user_data); } } } } diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c index e0a3452334..e6a8f583e6 100644 --- a/lib/pengine/unpack.c +++ b/lib/pengine/unpack.c @@ -1,3463 +1,3463 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include CRM_TRACE_INIT_DATA(pe_status); #define set_config_flag(data_set, option, flag) do { \ const char *tmp = pe_pref(data_set->config_hash, option); \ if(tmp) { \ if(crm_is_true(tmp)) { \ set_bit(data_set->flags, flag); \ } else { \ clear_bit(data_set->flags, flag); \ } \ } \ } while(0) gboolean unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, xmlNode ** last_failure, enum action_fail_response *failed, pe_working_set_t * data_set); static gboolean determine_remote_online_status(pe_working_set_t * data_set, node_t * this_node); static gboolean is_dangling_container_remote_node(node_t *node) { /* we are looking for a remote-node that was supposed to be mapped to a * container resource, but all traces of that container have disappeared * from both the config and the status section. */ if (is_remote_node(node) && node->details->remote_rsc && node->details->remote_rsc->container == NULL && is_set(node->details->remote_rsc->flags, pe_rsc_orphan_container_filler)) { return TRUE; } return FALSE; } void pe_fence_node(pe_working_set_t * data_set, node_t * node, const char *reason) { CRM_CHECK(node, return); /* A guest node is fenced by marking its container as failed */ if (is_container_remote_node(node)) { resource_t *rsc = node->details->remote_rsc->container; if (is_set(rsc->flags, pe_rsc_failed) == FALSE) { crm_warn("Guest node %s will be fenced (by recovering %s) %s", node->details->uname, rsc->id, reason); /* We don't mark the node as unclean, because that would prevent the * node from running resources. We want to allow it to run resources * in this transition if the recovery succeeds. */ node->details->remote_requires_reset = TRUE; set_bit(rsc->flags, pe_rsc_failed); } } else if (is_dangling_container_remote_node(node)) { crm_info("Cleaning up dangling connection resource for guest node %s %s" " (fencing is already done, guest resource no longer exists)", node->details->uname, reason); set_bit(node->details->remote_rsc->flags, pe_rsc_failed); } else if (is_baremetal_remote_node(node)) { if(pe_can_fence(data_set, node)) { crm_warn("Node %s will be fenced %s", node->details->uname, reason); } else { crm_warn("Node %s is unclean %s", node->details->uname, reason); } node->details->unclean = TRUE; node->details->remote_requires_reset = TRUE; } else if (node->details->unclean == FALSE) { if(pe_can_fence(data_set, node)) { crm_warn("Node %s will be fenced %s", node->details->uname, reason); } else { crm_warn("Node %s is unclean %s", node->details->uname, reason); } node->details->unclean = TRUE; } else { crm_trace("Huh? %s %s", node->details->uname, reason); } } gboolean unpack_config(xmlNode * config, pe_working_set_t * data_set) { const char *value = NULL; GHashTable *config_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); xmlXPathObjectPtr xpathObj = NULL; if(is_not_set(data_set->flags, pe_flag_enable_unfencing)) { xpathObj = xpath_search(data_set->input, "//nvpair[@name='provides' and @value='unfencing']"); if(xpathObj && numXpathResults(xpathObj) > 0) { set_bit(data_set->flags, pe_flag_enable_unfencing); } freeXpathObject(xpathObj); } if(is_not_set(data_set->flags, pe_flag_enable_unfencing)) { xpathObj = xpath_search(data_set->input, "//nvpair[@name='requires' and @value='unfencing']"); if(xpathObj && numXpathResults(xpathObj) > 0) { set_bit(data_set->flags, pe_flag_enable_unfencing); } freeXpathObject(xpathObj); } #ifdef REDHAT_COMPAT_6 if(is_not_set(data_set->flags, pe_flag_enable_unfencing)) { xpathObj = xpath_search(data_set->input, "//primitive[@type='fence_scsi']"); if(xpathObj && numXpathResults(xpathObj) > 0) { set_bit(data_set->flags, pe_flag_enable_unfencing); } freeXpathObject(xpathObj); } #endif data_set->config_hash = config_hash; unpack_instance_attributes(data_set->input, config, XML_CIB_TAG_PROPSET, NULL, config_hash, CIB_OPTIONS_FIRST, FALSE, data_set->now); verify_pe_options(data_set->config_hash); set_config_flag(data_set, "enable-startup-probes", pe_flag_startup_probes); if(is_not_set(data_set->flags, pe_flag_startup_probes)) { crm_info("Startup probes: disabled (dangerous)"); } value = pe_pref(data_set->config_hash, XML_ATTR_HAVE_WATCHDOG); if (value && crm_is_true(value)) { crm_notice("Watchdog will be used via SBD if fencing is required"); set_bit(data_set->flags, pe_flag_have_stonith_resource); } value = pe_pref(data_set->config_hash, "stonith-timeout"); data_set->stonith_timeout = crm_get_msec(value); crm_debug("STONITH timeout: %d", data_set->stonith_timeout); set_config_flag(data_set, "stonith-enabled", pe_flag_stonith_enabled); crm_debug("STONITH of failed nodes is %s", is_set(data_set->flags, pe_flag_stonith_enabled) ? "enabled" : "disabled"); data_set->stonith_action = pe_pref(data_set->config_hash, "stonith-action"); crm_trace("STONITH will %s nodes", data_set->stonith_action); set_config_flag(data_set, "concurrent-fencing", pe_flag_concurrent_fencing); crm_debug("Concurrent fencing is %s", is_set(data_set->flags, pe_flag_concurrent_fencing) ? "enabled" : "disabled"); set_config_flag(data_set, "stop-all-resources", pe_flag_stop_everything); crm_debug("Stop all active resources: %s", is_set(data_set->flags, pe_flag_stop_everything) ? "true" : "false"); set_config_flag(data_set, "symmetric-cluster", pe_flag_symmetric_cluster); if (is_set(data_set->flags, pe_flag_symmetric_cluster)) { crm_debug("Cluster is symmetric" " - resources can run anywhere by default"); } value = pe_pref(data_set->config_hash, "default-resource-stickiness"); data_set->default_resource_stickiness = char2score(value); crm_debug("Default stickiness: %d", data_set->default_resource_stickiness); value = pe_pref(data_set->config_hash, "no-quorum-policy"); if (safe_str_eq(value, "ignore")) { data_set->no_quorum_policy = no_quorum_ignore; } else if (safe_str_eq(value, "freeze")) { data_set->no_quorum_policy = no_quorum_freeze; } else if (safe_str_eq(value, "suicide")) { gboolean do_panic = FALSE; crm_element_value_int(data_set->input, XML_ATTR_QUORUM_PANIC, &do_panic); if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) { crm_config_err ("Setting no-quorum-policy=suicide makes no sense if stonith-enabled=false"); } if (do_panic && is_set(data_set->flags, pe_flag_stonith_enabled)) { data_set->no_quorum_policy = no_quorum_suicide; } else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE && do_panic == FALSE) { crm_notice("Resetting no-quorum-policy to 'stop': The cluster has never had quorum"); data_set->no_quorum_policy = no_quorum_stop; } } else { data_set->no_quorum_policy = no_quorum_stop; } switch (data_set->no_quorum_policy) { case no_quorum_freeze: crm_debug("On loss of CCM Quorum: Freeze resources"); break; case no_quorum_stop: crm_debug("On loss of CCM Quorum: Stop ALL resources"); break; case no_quorum_suicide: crm_notice("On loss of CCM Quorum: Fence all remaining nodes"); break; case no_quorum_ignore: crm_notice("On loss of CCM Quorum: Ignore"); break; } set_config_flag(data_set, "stop-orphan-resources", pe_flag_stop_rsc_orphans); crm_trace("Orphan resources are %s", is_set(data_set->flags, pe_flag_stop_rsc_orphans) ? "stopped" : "ignored"); set_config_flag(data_set, "stop-orphan-actions", pe_flag_stop_action_orphans); crm_trace("Orphan resource actions are %s", is_set(data_set->flags, pe_flag_stop_action_orphans) ? "stopped" : "ignored"); set_config_flag(data_set, "remove-after-stop", pe_flag_remove_after_stop); crm_trace("Stopped resources are removed from the status section: %s", is_set(data_set->flags, pe_flag_remove_after_stop) ? "true" : "false"); set_config_flag(data_set, "maintenance-mode", pe_flag_maintenance_mode); crm_trace("Maintenance mode: %s", is_set(data_set->flags, pe_flag_maintenance_mode) ? "true" : "false"); if (is_set(data_set->flags, pe_flag_maintenance_mode)) { clear_bit(data_set->flags, pe_flag_is_managed_default); } else { set_config_flag(data_set, "is-managed-default", pe_flag_is_managed_default); } crm_trace("By default resources are %smanaged", is_set(data_set->flags, pe_flag_is_managed_default) ? "" : "not "); set_config_flag(data_set, "start-failure-is-fatal", pe_flag_start_failure_fatal); crm_trace("Start failures are %s", is_set(data_set->flags, pe_flag_start_failure_fatal) ? "always fatal" : "handled by failcount"); node_score_red = char2score(pe_pref(data_set->config_hash, "node-health-red")); node_score_green = char2score(pe_pref(data_set->config_hash, "node-health-green")); node_score_yellow = char2score(pe_pref(data_set->config_hash, "node-health-yellow")); crm_debug("Node scores: 'red' = %s, 'yellow' = %s, 'green' = %s", pe_pref(data_set->config_hash, "node-health-red"), pe_pref(data_set->config_hash, "node-health-yellow"), pe_pref(data_set->config_hash, "node-health-green")); data_set->placement_strategy = pe_pref(data_set->config_hash, "placement-strategy"); crm_trace("Placement strategy: %s", data_set->placement_strategy); return TRUE; } static void destroy_digest_cache(gpointer ptr) { op_digest_cache_t *data = ptr; free_xml(data->params_all); free_xml(data->params_secure); free_xml(data->params_restart); free(data->digest_all_calc); free(data->digest_restart_calc); free(data->digest_secure_calc); free(data); } static node_t * create_node(const char *id, const char *uname, const char *type, const char *score, pe_working_set_t * data_set) { node_t *new_node = NULL; if (pe_find_node(data_set->nodes, uname) != NULL) { crm_config_warn("Detected multiple node entries with uname=%s" " - this is rarely intended", uname); } new_node = calloc(1, sizeof(node_t)); if (new_node == NULL) { return NULL; } new_node->weight = char2score(score); new_node->fixed = FALSE; new_node->details = calloc(1, sizeof(struct node_shared_s)); if (new_node->details == NULL) { free(new_node); return NULL; } crm_trace("Creating node for entry %s/%s", uname, id); new_node->details->id = id; new_node->details->uname = uname; new_node->details->online = FALSE; new_node->details->shutdown = FALSE; new_node->details->rsc_discovery_enabled = TRUE; new_node->details->running_rsc = NULL; new_node->details->type = node_ping; if (safe_str_eq(type, "remote")) { new_node->details->type = node_remote; set_bit(data_set->flags, pe_flag_have_remote_nodes); } else if (type == NULL || safe_str_eq(type, "member") || safe_str_eq(type, NORMALNODE)) { new_node->details->type = node_member; } new_node->details->attrs = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if (is_remote_node(new_node)) { g_hash_table_insert(new_node->details->attrs, strdup("#kind"), strdup("remote")); } else { g_hash_table_insert(new_node->details->attrs, strdup("#kind"), strdup("cluster")); } new_node->details->utilization = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); new_node->details->digest_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, destroy_digest_cache); data_set->nodes = g_list_insert_sorted(data_set->nodes, new_node, sort_node_uname); return new_node; } static const char * expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, GHashTable **rsc_name_check) { xmlNode *xml_rsc = NULL; xmlNode *xml_tmp = NULL; xmlNode *attr_set = NULL; xmlNode *attr = NULL; const char *container_id = ID(xml_obj); const char *remote_name = NULL; const char *remote_server = NULL; const char *remote_port = NULL; const char *connect_timeout = "60s"; const char *remote_allow_migrate=NULL; char *tmp_id = NULL; for (attr_set = __xml_first_child(xml_obj); attr_set != NULL; attr_set = __xml_next_element(attr_set)) { if (safe_str_neq((const char *)attr_set->name, XML_TAG_META_SETS)) { continue; } for (attr = __xml_first_child(attr_set); attr != NULL; attr = __xml_next_element(attr)) { const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE); const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME); if (safe_str_eq(name, XML_RSC_ATTR_REMOTE_NODE)) { remote_name = value; } else if (safe_str_eq(name, "remote-addr")) { remote_server = value; } else if (safe_str_eq(name, "remote-port")) { remote_port = value; } else if (safe_str_eq(name, "remote-connect-timeout")) { connect_timeout = value; } else if (safe_str_eq(name, "remote-allow-migrate")) { remote_allow_migrate=value; } } } if (remote_name == NULL) { return NULL; } if (*rsc_name_check == NULL) { *rsc_name_check = g_hash_table_new(crm_str_hash, g_str_equal); for (xml_rsc = __xml_first_child(parent); xml_rsc != NULL; xml_rsc = __xml_next_element(xml_rsc)) { const char *id = ID(xml_rsc); /* avoiding heap allocation here because we know the duration of this hashtable allows us to */ g_hash_table_insert(*rsc_name_check, (char *) id, (char *) id); } } if (g_hash_table_lookup(*rsc_name_check, remote_name)) { crm_err("Naming conflict with remote-node=%s. remote-nodes can not have the same name as a resource.", remote_name); return NULL; } xml_rsc = create_xml_node(parent, XML_CIB_TAG_RESOURCE); crm_xml_add(xml_rsc, XML_ATTR_ID, remote_name); - crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, "ocf"); + crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, PCMK_RESOURCE_CLASS_OCF); crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, "pacemaker"); crm_xml_add(xml_rsc, XML_ATTR_TYPE, "remote"); xml_tmp = create_xml_node(xml_rsc, XML_TAG_META_SETS); tmp_id = crm_concat(remote_name, XML_TAG_META_SETS, '_'); crm_xml_add(xml_tmp, XML_ATTR_ID, tmp_id); free(tmp_id); attr = create_xml_node(xml_tmp, XML_CIB_TAG_NVPAIR); tmp_id = crm_concat(remote_name, "meta-attributes-container", '_'); crm_xml_add(attr, XML_ATTR_ID, tmp_id); crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, XML_RSC_ATTR_CONTAINER); crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, container_id); free(tmp_id); attr = create_xml_node(xml_tmp, XML_CIB_TAG_NVPAIR); tmp_id = crm_concat(remote_name, "meta-attributes-internal", '_'); crm_xml_add(attr, XML_ATTR_ID, tmp_id); crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, XML_RSC_ATTR_INTERNAL_RSC); crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, "true"); free(tmp_id); if (remote_allow_migrate) { attr = create_xml_node(xml_tmp, XML_CIB_TAG_NVPAIR); tmp_id = crm_concat(remote_name, "meta-attributes-container", '_'); crm_xml_add(attr, XML_ATTR_ID, tmp_id); crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, XML_OP_ATTR_ALLOW_MIGRATE); crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, remote_allow_migrate); free(tmp_id); } xml_tmp = create_xml_node(xml_rsc, "operations"); attr = create_xml_node(xml_tmp, XML_ATTR_OP); tmp_id = crm_concat(remote_name, "monitor-interval-30s", '_'); crm_xml_add(attr, XML_ATTR_ID, tmp_id); crm_xml_add(attr, XML_ATTR_TIMEOUT, "30s"); crm_xml_add(attr, XML_LRM_ATTR_INTERVAL, "30s"); crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, "monitor"); free(tmp_id); if (connect_timeout) { attr = create_xml_node(xml_tmp, XML_ATTR_OP); tmp_id = crm_concat(remote_name, "start-interval-0", '_'); crm_xml_add(attr, XML_ATTR_ID, tmp_id); crm_xml_add(attr, XML_ATTR_TIMEOUT, connect_timeout); crm_xml_add(attr, XML_LRM_ATTR_INTERVAL, "0"); crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, "start"); free(tmp_id); } if (remote_port || remote_server) { xml_tmp = create_xml_node(xml_rsc, XML_TAG_ATTR_SETS); tmp_id = crm_concat(remote_name, XML_TAG_ATTR_SETS, '_'); crm_xml_add(xml_tmp, XML_ATTR_ID, tmp_id); free(tmp_id); if (remote_server) { attr = create_xml_node(xml_tmp, XML_CIB_TAG_NVPAIR); tmp_id = crm_concat(remote_name, "instance-attributes-addr", '_'); crm_xml_add(attr, XML_ATTR_ID, tmp_id); crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, "addr"); crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, remote_server); free(tmp_id); } if (remote_port) { attr = create_xml_node(xml_tmp, XML_CIB_TAG_NVPAIR); tmp_id = crm_concat(remote_name, "instance-attributes-port", '_'); crm_xml_add(attr, XML_ATTR_ID, tmp_id); crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, "port"); crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, remote_port); free(tmp_id); } } return remote_name; } static void handle_startup_fencing(pe_working_set_t *data_set, node_t *new_node) { static const char *blind_faith = NULL; static gboolean unseen_are_unclean = TRUE; static gboolean need_warning = TRUE; if ((new_node->details->type == node_remote) && (new_node->details->remote_rsc == NULL)) { /* Ignore fencing for remote nodes that don't have a connection resource * associated with them. This happens when remote node entries get left * in the nodes section after the connection resource is removed. */ return; } blind_faith = pe_pref(data_set->config_hash, "startup-fencing"); if (crm_is_true(blind_faith) == FALSE) { unseen_are_unclean = FALSE; if (need_warning) { crm_warn("Blind faith: not fencing unseen nodes"); /* Warn once per run, not per node and transition */ need_warning = FALSE; } } if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE || unseen_are_unclean == FALSE) { /* blind faith... */ new_node->details->unclean = FALSE; } else { /* all nodes are unclean until we've seen their * status entry */ new_node->details->unclean = TRUE; } /* We need to be able to determine if a node's status section * exists or not separate from whether the node is unclean. */ new_node->details->unseen = TRUE; } gboolean unpack_nodes(xmlNode * xml_nodes, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; node_t *new_node = NULL; const char *id = NULL; const char *uname = NULL; const char *type = NULL; const char *score = NULL; for (xml_obj = __xml_first_child(xml_nodes); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) { if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_NODE, TRUE)) { new_node = NULL; id = crm_element_value(xml_obj, XML_ATTR_ID); uname = crm_element_value(xml_obj, XML_ATTR_UNAME); type = crm_element_value(xml_obj, XML_ATTR_TYPE); score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE); crm_trace("Processing node %s/%s", uname, id); if (id == NULL) { crm_config_err("Must specify id tag in "); continue; } new_node = create_node(id, uname, type, score, data_set); if (new_node == NULL) { return FALSE; } /* if(data_set->have_quorum == FALSE */ /* && data_set->no_quorum_policy == no_quorum_stop) { */ /* /\* start shutting resources down *\/ */ /* new_node->weight = -INFINITY; */ /* } */ handle_startup_fencing(data_set, new_node); add_node_attrs(xml_obj, new_node, FALSE, data_set); unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_UTILIZATION, NULL, new_node->details->utilization, NULL, FALSE, data_set->now); crm_trace("Done with node %s", crm_element_value(xml_obj, XML_ATTR_UNAME)); } } if (data_set->localhost && pe_find_node(data_set->nodes, data_set->localhost) == NULL) { crm_info("Creating a fake local node"); create_node(data_set->localhost, data_set->localhost, NULL, 0, data_set); } return TRUE; } static void setup_container(resource_t * rsc, pe_working_set_t * data_set) { const char *container_id = NULL; if (rsc->children) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; setup_container(child_rsc, data_set); } return; } container_id = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_CONTAINER); if (container_id && safe_str_neq(container_id, rsc->id)) { resource_t *container = pe_find_resource(data_set->resources, container_id); if (container) { rsc->container = container; container->fillers = g_list_append(container->fillers, rsc); pe_rsc_trace(rsc, "Resource %s's container is %s", rsc->id, container_id); } else { pe_err("Resource %s: Unknown resource container (%s)", rsc->id, container_id); } } } gboolean unpack_remote_nodes(xmlNode * xml_resources, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; GHashTable *rsc_name_check = NULL; /* generate remote nodes from resource config before unpacking resources */ for (xml_obj = __xml_first_child(xml_resources); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) { const char *new_node_id = NULL; /* first check if this is a bare metal remote node. Bare metal remote nodes * are defined as a resource primitive only. */ if (xml_contains_remote_node(xml_obj)) { new_node_id = ID(xml_obj); /* The "pe_find_node" check is here to make sure we don't iterate over * an expanded node that has already been added to the node list. */ if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) { crm_trace("Found baremetal remote node %s in container resource %s", new_node_id, ID(xml_obj)); create_node(new_node_id, new_node_id, "remote", NULL, data_set); } continue; } /* Now check for guest remote nodes. * guest remote nodes are defined within a resource primitive. * Example1: a vm resource might be configured as a remote node. * Example2: a vm resource might be configured within a group to be a remote node. * Note: right now we only support guest remote nodes in as a standalone primitive * or a primitive within a group. No cloned primitives can be a guest remote node * right now */ if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_RESOURCE, TRUE)) { /* expands a metadata defined remote resource into the xml config * as an actual rsc primitive to be unpacked later. */ new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources, &rsc_name_check); if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) { crm_trace("Found guest remote node %s in container resource %s", new_node_id, ID(xml_obj)); create_node(new_node_id, new_node_id, "remote", NULL, data_set); } continue; } else if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_GROUP, TRUE)) { xmlNode *xml_obj2 = NULL; /* search through a group to see if any of the primitive contain a remote node. */ for (xml_obj2 = __xml_first_child(xml_obj); xml_obj2 != NULL; xml_obj2 = __xml_next_element(xml_obj2)) { new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources, &rsc_name_check); if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) { crm_trace("Found guest remote node %s in container resource %s which is in group %s", new_node_id, ID(xml_obj2), ID(xml_obj)); create_node(new_node_id, new_node_id, "remote", NULL, data_set); } } } } if (rsc_name_check) { g_hash_table_destroy(rsc_name_check); } return TRUE; } /* Call this after all the nodes and resources have been * unpacked, but before the status section is read. * * A remote node's online status is reflected by the state * of the remote node's connection resource. We need to link * the remote node to this connection resource so we can have * easy access to the connection resource during the PE calculations. */ static void link_rsc2remotenode(pe_working_set_t *data_set, resource_t *new_rsc) { node_t *remote_node = NULL; if (new_rsc->is_remote_node == FALSE) { return; } if (is_set(data_set->flags, pe_flag_quick_location)) { /* remote_nodes and remote_resources are not linked in quick location calculations */ return; } print_resource(LOG_DEBUG_3, "Linking remote-node connection resource, ", new_rsc, FALSE); remote_node = pe_find_node(data_set->nodes, new_rsc->id); CRM_CHECK(remote_node != NULL, return;); remote_node->details->remote_rsc = new_rsc; /* If this is a baremetal remote-node (no container resource * associated with it) then we need to handle startup fencing the same way * as cluster nodes. */ if (new_rsc->container == NULL) { handle_startup_fencing(data_set, remote_node); } else { /* At this point we know if the remote node is a container or baremetal * remote node, update the #kind attribute if a container is involved */ g_hash_table_replace(remote_node->details->attrs, strdup("#kind"), strdup("container")); } } static void destroy_tag(gpointer data) { tag_t *tag = data; if (tag) { free(tag->id); g_list_free_full(tag->refs, free); free(tag); } } gboolean unpack_resources(xmlNode * xml_resources, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; GListPtr gIter = NULL; data_set->template_rsc_sets = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, destroy_tag); for (xml_obj = __xml_first_child(xml_resources); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) { resource_t *new_rsc = NULL; if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_RSC_TEMPLATE, TRUE)) { const char *template_id = ID(xml_obj); if (template_id && g_hash_table_lookup_extended(data_set->template_rsc_sets, template_id, NULL, NULL) == FALSE) { /* Record the template's ID for the knowledge of its existence anyway. */ g_hash_table_insert(data_set->template_rsc_sets, strdup(template_id), NULL); } continue; } crm_trace("Beginning unpack... <%s id=%s... >", crm_element_name(xml_obj), ID(xml_obj)); if (common_unpack(xml_obj, &new_rsc, NULL, data_set)) { data_set->resources = g_list_append(data_set->resources, new_rsc); if (xml_contains_remote_node(xml_obj)) { new_rsc->is_remote_node = TRUE; } print_resource(LOG_DEBUG_3, "Added ", new_rsc, FALSE); } else { crm_config_err("Failed unpacking %s %s", crm_element_name(xml_obj), crm_element_value(xml_obj, XML_ATTR_ID)); if (new_rsc != NULL && new_rsc->fns != NULL) { new_rsc->fns->free(new_rsc); } } } for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; setup_container(rsc, data_set); link_rsc2remotenode(data_set, rsc); } data_set->resources = g_list_sort(data_set->resources, sort_rsc_priority); if (is_set(data_set->flags, pe_flag_quick_location)) { /* Ignore */ } else if (is_set(data_set->flags, pe_flag_stonith_enabled) && is_set(data_set->flags, pe_flag_have_stonith_resource) == FALSE) { crm_config_err("Resource start-up disabled since no STONITH resources have been defined"); crm_config_err("Either configure some or disable STONITH with the stonith-enabled option"); crm_config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity"); } return TRUE; } gboolean unpack_tags(xmlNode * xml_tags, pe_working_set_t * data_set) { xmlNode *xml_tag = NULL; data_set->tags = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, destroy_tag); for (xml_tag = __xml_first_child(xml_tags); xml_tag != NULL; xml_tag = __xml_next_element(xml_tag)) { xmlNode *xml_obj_ref = NULL; const char *tag_id = ID(xml_tag); if (crm_str_eq((const char *)xml_tag->name, XML_CIB_TAG_TAG, TRUE) == FALSE) { continue; } if (tag_id == NULL) { crm_config_err("Failed unpacking %s: %s should be specified", crm_element_name(xml_tag), XML_ATTR_ID); continue; } for (xml_obj_ref = __xml_first_child(xml_tag); xml_obj_ref != NULL; xml_obj_ref = __xml_next_element(xml_obj_ref)) { const char *obj_ref = ID(xml_obj_ref); if (crm_str_eq((const char *)xml_obj_ref->name, XML_CIB_TAG_OBJ_REF, TRUE) == FALSE) { continue; } if (obj_ref == NULL) { crm_config_err("Failed unpacking %s for tag %s: %s should be specified", crm_element_name(xml_obj_ref), tag_id, XML_ATTR_ID); continue; } if (add_tag_ref(data_set->tags, tag_id, obj_ref) == FALSE) { return FALSE; } } } return TRUE; } /* The ticket state section: * "/cib/status/tickets/ticket_state" */ static gboolean unpack_ticket_state(xmlNode * xml_ticket, pe_working_set_t * data_set) { const char *ticket_id = NULL; const char *granted = NULL; const char *last_granted = NULL; const char *standby = NULL; xmlAttrPtr xIter = NULL; ticket_t *ticket = NULL; ticket_id = ID(xml_ticket); if (ticket_id == NULL || strlen(ticket_id) == 0) { return FALSE; } crm_trace("Processing ticket state for %s", ticket_id); ticket = g_hash_table_lookup(data_set->tickets, ticket_id); if (ticket == NULL) { ticket = ticket_new(ticket_id, data_set); if (ticket == NULL) { return FALSE; } } for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) { const char *prop_name = (const char *)xIter->name; const char *prop_value = crm_element_value(xml_ticket, prop_name); if (crm_str_eq(prop_name, XML_ATTR_ID, TRUE)) { continue; } g_hash_table_replace(ticket->state, strdup(prop_name), strdup(prop_value)); } granted = g_hash_table_lookup(ticket->state, "granted"); if (granted && crm_is_true(granted)) { ticket->granted = TRUE; crm_info("We have ticket '%s'", ticket->id); } else { ticket->granted = FALSE; crm_info("We do not have ticket '%s'", ticket->id); } last_granted = g_hash_table_lookup(ticket->state, "last-granted"); if (last_granted) { ticket->last_granted = crm_parse_int(last_granted, 0); } standby = g_hash_table_lookup(ticket->state, "standby"); if (standby && crm_is_true(standby)) { ticket->standby = TRUE; if (ticket->granted) { crm_info("Granted ticket '%s' is in standby-mode", ticket->id); } } else { ticket->standby = FALSE; } crm_trace("Done with ticket state for %s", ticket_id); return TRUE; } static gboolean unpack_tickets_state(xmlNode * xml_tickets, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; for (xml_obj = __xml_first_child(xml_tickets); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) { if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_TICKET_STATE, TRUE) == FALSE) { continue; } unpack_ticket_state(xml_obj, data_set); } return TRUE; } /* Compatibility with the deprecated ticket state section: * "/cib/status/tickets/instance_attributes" */ static void get_ticket_state_legacy(gpointer key, gpointer value, gpointer user_data) { const char *long_key = key; char *state_key = NULL; const char *granted_prefix = "granted-ticket-"; const char *last_granted_prefix = "last-granted-"; static int granted_prefix_strlen = 0; static int last_granted_prefix_strlen = 0; const char *ticket_id = NULL; const char *is_granted = NULL; const char *last_granted = NULL; const char *sep = NULL; ticket_t *ticket = NULL; pe_working_set_t *data_set = user_data; if (granted_prefix_strlen == 0) { granted_prefix_strlen = strlen(granted_prefix); } if (last_granted_prefix_strlen == 0) { last_granted_prefix_strlen = strlen(last_granted_prefix); } if (strstr(long_key, granted_prefix) == long_key) { ticket_id = long_key + granted_prefix_strlen; if (strlen(ticket_id)) { state_key = strdup("granted"); is_granted = value; } } else if (strstr(long_key, last_granted_prefix) == long_key) { ticket_id = long_key + last_granted_prefix_strlen; if (strlen(ticket_id)) { state_key = strdup("last-granted"); last_granted = value; } } else if ((sep = strrchr(long_key, '-'))) { ticket_id = sep + 1; state_key = strndup(long_key, strlen(long_key) - strlen(sep)); } if (ticket_id == NULL || strlen(ticket_id) == 0) { free(state_key); return; } if (state_key == NULL || strlen(state_key) == 0) { free(state_key); return; } ticket = g_hash_table_lookup(data_set->tickets, ticket_id); if (ticket == NULL) { ticket = ticket_new(ticket_id, data_set); if (ticket == NULL) { free(state_key); return; } } g_hash_table_replace(ticket->state, state_key, strdup(value)); if (is_granted) { if (crm_is_true(is_granted)) { ticket->granted = TRUE; crm_info("We have ticket '%s'", ticket->id); } else { ticket->granted = FALSE; crm_info("We do not have ticket '%s'", ticket->id); } } else if (last_granted) { ticket->last_granted = crm_parse_int(last_granted, 0); } } /* remove nodes that are down, stopping */ /* create +ve rsc_to_node constraints between resources and the nodes they are running on */ /* anything else? */ gboolean unpack_status(xmlNode * status, pe_working_set_t * data_set) { const char *id = NULL; const char *uname = NULL; xmlNode *state = NULL; xmlNode *lrm_rsc = NULL; node_t *this_node = NULL; crm_trace("Beginning unpack"); if (data_set->tickets == NULL) { data_set->tickets = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, destroy_ticket); } for (state = __xml_first_child(status); state != NULL; state = __xml_next_element(state)) { if (crm_str_eq((const char *)state->name, XML_CIB_TAG_TICKETS, TRUE)) { xmlNode *xml_tickets = state; GHashTable *state_hash = NULL; /* Compatibility with the deprecated ticket state section: * Unpack the attributes in the deprecated "/cib/status/tickets/instance_attributes" if it exists. */ state_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); unpack_instance_attributes(data_set->input, xml_tickets, XML_TAG_ATTR_SETS, NULL, state_hash, NULL, TRUE, data_set->now); g_hash_table_foreach(state_hash, get_ticket_state_legacy, data_set); if (state_hash) { g_hash_table_destroy(state_hash); } /* Unpack the new "/cib/status/tickets/ticket_state"s */ unpack_tickets_state(xml_tickets, data_set); } if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE)) { xmlNode *attrs = NULL; const char *resource_discovery_enabled = NULL; id = crm_element_value(state, XML_ATTR_ID); uname = crm_element_value(state, XML_ATTR_UNAME); this_node = pe_find_node_any(data_set->nodes, id, uname); if (uname == NULL) { /* error */ continue; } else if (this_node == NULL) { crm_config_warn("Node %s in status section no longer exists", uname); continue; } else if (is_remote_node(this_node)) { /* online state for remote nodes is determined by the * rsc state after all the unpacking is done. we do however * need to mark whether or not the node has been fenced as this plays * a role during unpacking cluster node resource state */ this_node->details->remote_was_fenced = crm_atoi(crm_element_value(state, XML_NODE_IS_FENCED), "0"); continue; } crm_trace("Processing node id=%s, uname=%s", id, uname); /* Mark the node as provisionally clean * - at least we have seen it in the current cluster's lifetime */ this_node->details->unclean = FALSE; this_node->details->unseen = FALSE; attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); add_node_attrs(attrs, this_node, TRUE, data_set); if (crm_is_true(g_hash_table_lookup(this_node->details->attrs, "standby"))) { crm_info("Node %s is in standby-mode", this_node->details->uname); this_node->details->standby = TRUE; } if (crm_is_true(g_hash_table_lookup(this_node->details->attrs, "maintenance"))) { crm_info("Node %s is in maintenance-mode", this_node->details->uname); this_node->details->maintenance = TRUE; } resource_discovery_enabled = g_hash_table_lookup(this_node->details->attrs, XML_NODE_ATTR_RSC_DISCOVERY); if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) { crm_warn("ignoring %s attribute on node %s, disabling resource discovery is not allowed on cluster nodes", XML_NODE_ATTR_RSC_DISCOVERY, this_node->details->uname); } crm_trace("determining node state"); determine_online_status(state, this_node, data_set); if (this_node->details->online && data_set->no_quorum_policy == no_quorum_suicide) { /* Everything else should flow from this automatically * At least until the PE becomes able to migrate off healthy resources */ pe_fence_node(data_set, this_node, "because the cluster does not have quorum"); } } } /* Now that we know all node states, we can safely handle migration ops */ for (state = __xml_first_child(status); state != NULL; state = __xml_next_element(state)) { if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) { continue; } id = crm_element_value(state, XML_ATTR_ID); uname = crm_element_value(state, XML_ATTR_UNAME); this_node = pe_find_node_any(data_set->nodes, id, uname); if (this_node == NULL) { crm_info("Node %s is unknown", id); continue; } else if (is_remote_node(this_node)) { /* online status of remote node can not be determined until all other * resource status is unpacked. */ continue; } else if (this_node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) { crm_trace("Processing lrm resource entries on healthy node: %s", this_node->details->uname); lrm_rsc = find_xml_node(state, XML_CIB_TAG_LRM, FALSE); lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE); unpack_lrm_resources(this_node, lrm_rsc, data_set); } } /* now that the rest of the cluster's status is determined * calculate remote-nodes */ unpack_remote_status(status, data_set); return TRUE; } gboolean unpack_remote_status(xmlNode * status, pe_working_set_t * data_set) { const char *id = NULL; const char *uname = NULL; const char *shutdown = NULL; GListPtr gIter = NULL; xmlNode *state = NULL; xmlNode *lrm_rsc = NULL; node_t *this_node = NULL; if (is_set(data_set->flags, pe_flag_have_remote_nodes) == FALSE) { crm_trace("no remote nodes to unpack"); return TRUE; } /* get online status */ for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { this_node = gIter->data; if ((this_node == NULL) || (is_remote_node(this_node) == FALSE)) { continue; } determine_remote_online_status(data_set, this_node); } /* process attributes */ for (state = __xml_first_child(status); state != NULL; state = __xml_next_element(state)) { const char *resource_discovery_enabled = NULL; xmlNode *attrs = NULL; if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) { continue; } id = crm_element_value(state, XML_ATTR_ID); uname = crm_element_value(state, XML_ATTR_UNAME); this_node = pe_find_node_any(data_set->nodes, id, uname); if ((this_node == NULL) || (is_remote_node(this_node) == FALSE)) { continue; } crm_trace("Processing remote node id=%s, uname=%s", id, uname); if (this_node->details->remote_requires_reset == FALSE) { this_node->details->unclean = FALSE; this_node->details->unseen = FALSE; } attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); add_node_attrs(attrs, this_node, TRUE, data_set); shutdown = g_hash_table_lookup(this_node->details->attrs, XML_CIB_ATTR_SHUTDOWN); if (shutdown != NULL && safe_str_neq("0", shutdown)) { resource_t *rsc = this_node->details->remote_rsc; crm_info("Node %s is shutting down", this_node->details->uname); this_node->details->shutdown = TRUE; rsc->next_role = RSC_ROLE_STOPPED; } if (crm_is_true(g_hash_table_lookup(this_node->details->attrs, "standby"))) { crm_info("Node %s is in standby-mode", this_node->details->uname); this_node->details->standby = TRUE; } if (crm_is_true(g_hash_table_lookup(this_node->details->attrs, "maintenance"))) { crm_info("Node %s is in maintenance-mode", this_node->details->uname); this_node->details->maintenance = TRUE; } resource_discovery_enabled = g_hash_table_lookup(this_node->details->attrs, XML_NODE_ATTR_RSC_DISCOVERY); if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) { if (is_baremetal_remote_node(this_node) && is_not_set(data_set->flags, pe_flag_stonith_enabled)) { crm_warn("ignoring %s attribute on baremetal remote node %s, disabling resource discovery requires stonith to be enabled.", XML_NODE_ATTR_RSC_DISCOVERY, this_node->details->uname); } else { /* if we're here, this is either a baremetal node and fencing is enabled, * or this is a container node which we don't care if fencing is enabled * or not on. container nodes are 'fenced' by recovering the container resource * regardless of whether fencing is enabled. */ crm_info("Node %s has resource discovery disabled", this_node->details->uname); this_node->details->rsc_discovery_enabled = FALSE; } } } /* process node rsc status */ for (state = __xml_first_child(status); state != NULL; state = __xml_next_element(state)) { if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) { continue; } id = crm_element_value(state, XML_ATTR_ID); uname = crm_element_value(state, XML_ATTR_UNAME); this_node = pe_find_node_any(data_set->nodes, id, uname); if ((this_node == NULL) || (is_remote_node(this_node) == FALSE)) { continue; } crm_trace("Processing lrm resource entries on healthy remote node: %s", this_node->details->uname); lrm_rsc = find_xml_node(state, XML_CIB_TAG_LRM, FALSE); lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE); unpack_lrm_resources(this_node, lrm_rsc, data_set); } return TRUE; } static gboolean determine_online_status_no_fencing(pe_working_set_t * data_set, xmlNode * node_state, node_t * this_node) { gboolean online = FALSE; const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE); const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER); const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER); const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED); if (!crm_is_true(in_cluster)) { crm_trace("Node is down: in_cluster=%s", crm_str(in_cluster)); } else if (safe_str_eq(is_peer, ONLINESTATUS)) { if (safe_str_eq(join, CRMD_JOINSTATE_MEMBER)) { online = TRUE; } else { crm_debug("Node is not ready to run resources: %s", join); } } else if (this_node->details->expected_up == FALSE) { crm_trace("CRMd is down: in_cluster=%s", crm_str(in_cluster)); crm_trace("\tis_peer=%s, join=%s, expected=%s", crm_str(is_peer), crm_str(join), crm_str(exp_state)); } else { /* mark it unclean */ pe_fence_node(data_set, this_node, "because node is unexpectedly down"); crm_info("\tin_cluster=%s, is_peer=%s, join=%s, expected=%s", crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state)); } return online; } static gboolean determine_online_status_fencing(pe_working_set_t * data_set, xmlNode * node_state, node_t * this_node) { gboolean online = FALSE; gboolean do_terminate = FALSE; const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE); const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER); const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER); const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED); const char *terminate = g_hash_table_lookup(this_node->details->attrs, "terminate"); /* - XML_NODE_IN_CLUSTER ::= true|false - XML_NODE_IS_PEER ::= true|false|online|offline - XML_NODE_JOIN_STATE ::= member|down|pending|banned - XML_NODE_EXPECTED ::= member|down */ if (crm_is_true(terminate)) { do_terminate = TRUE; } else if (terminate != NULL && strlen(terminate) > 0) { /* could be a time() value */ char t = terminate[0]; if (t != '0' && isdigit(t)) { do_terminate = TRUE; } } crm_trace("%s: in_cluster=%s, is_peer=%s, join=%s, expected=%s, term=%d", this_node->details->uname, crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state), do_terminate); online = crm_is_true(in_cluster); if (safe_str_eq(is_peer, ONLINESTATUS)) { is_peer = XML_BOOLEAN_YES; } if (exp_state == NULL) { exp_state = CRMD_JOINSTATE_DOWN; } if (this_node->details->shutdown) { crm_debug("%s is shutting down", this_node->details->uname); /* Slightly different criteria since we can't shut down a dead peer */ online = crm_is_true(is_peer); } else if (in_cluster == NULL) { pe_fence_node(data_set, this_node, "because the peer has not been seen by the cluster"); } else if (safe_str_eq(join, CRMD_JOINSTATE_NACK)) { pe_fence_node(data_set, this_node, "because it failed the pacemaker membership criteria"); } else if (do_terminate == FALSE && safe_str_eq(exp_state, CRMD_JOINSTATE_DOWN)) { if (crm_is_true(in_cluster) || crm_is_true(is_peer)) { crm_info("- Node %s is not ready to run resources", this_node->details->uname); this_node->details->standby = TRUE; this_node->details->pending = TRUE; } else { crm_trace("%s is down or still coming up", this_node->details->uname); } } else if (do_terminate && safe_str_eq(join, CRMD_JOINSTATE_DOWN) && crm_is_true(in_cluster) == FALSE && crm_is_true(is_peer) == FALSE) { crm_info("Node %s was just shot", this_node->details->uname); online = FALSE; } else if (crm_is_true(in_cluster) == FALSE) { pe_fence_node(data_set, this_node, "because the node is no longer part of the cluster"); } else if (crm_is_true(is_peer) == FALSE) { pe_fence_node(data_set, this_node, "because our peer process is no longer available"); /* Everything is running at this point, now check join state */ } else if (do_terminate) { pe_fence_node(data_set, this_node, "because termination was requested"); } else if (safe_str_eq(join, CRMD_JOINSTATE_MEMBER)) { crm_info("Node %s is active", this_node->details->uname); } else if (safe_str_eq(join, CRMD_JOINSTATE_PENDING) || safe_str_eq(join, CRMD_JOINSTATE_DOWN)) { crm_info("Node %s is not ready to run resources", this_node->details->uname); this_node->details->standby = TRUE; this_node->details->pending = TRUE; } else { pe_fence_node(data_set, this_node, "because the peer was in an unknown state"); crm_warn("%s: in-cluster=%s, is-peer=%s, join=%s, expected=%s, term=%d, shutdown=%d", this_node->details->uname, crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state), do_terminate, this_node->details->shutdown); } return online; } static gboolean determine_remote_online_status(pe_working_set_t * data_set, node_t * this_node) { resource_t *rsc = this_node->details->remote_rsc; resource_t *container = NULL; pe_node_t *host = NULL; /* If there is a node state entry for a (former) Pacemaker Remote node * but no resource creating that node, the node's connection resource will * be NULL. Consider it an offline remote node in that case. */ if (rsc == NULL) { this_node->details->online = FALSE; goto remote_online_done; } container = rsc->container; if (container && (g_list_length(rsc->running_on) == 1)) { host = rsc->running_on->data; } /* If the resource is currently started, mark it online. */ if (rsc->role == RSC_ROLE_STARTED) { crm_trace("%s node %s presumed ONLINE because connection resource is started", (container? "Guest" : "Remote"), this_node->details->id); this_node->details->online = TRUE; } /* consider this node shutting down if transitioning start->stop */ if (rsc->role == RSC_ROLE_STARTED && rsc->next_role == RSC_ROLE_STOPPED) { crm_trace("%s node %s shutting down because connection resource is stopping", (container? "Guest" : "Remote"), this_node->details->id); this_node->details->shutdown = TRUE; } /* Now check all the failure conditions. */ if(container && is_set(container->flags, pe_rsc_failed)) { crm_trace("Guest node %s UNCLEAN because guest resource failed", this_node->details->id); this_node->details->online = FALSE; this_node->details->remote_requires_reset = TRUE; } else if(is_set(rsc->flags, pe_rsc_failed)) { crm_trace("%s node %s OFFLINE because connection resource failed", (container? "Guest" : "Remote"), this_node->details->id); this_node->details->online = FALSE; } else if (rsc->role == RSC_ROLE_STOPPED || (container && container->role == RSC_ROLE_STOPPED)) { crm_trace("%s node %s OFFLINE because its resource is stopped", (container? "Guest" : "Remote"), this_node->details->id); this_node->details->online = FALSE; this_node->details->remote_requires_reset = FALSE; } else if (host && (host->details->online == FALSE) && host->details->unclean) { crm_trace("Guest node %s UNCLEAN because host is unclean", this_node->details->id); this_node->details->online = FALSE; this_node->details->remote_requires_reset = TRUE; } remote_online_done: crm_trace("Remote node %s online=%s", this_node->details->id, this_node->details->online ? "TRUE" : "FALSE"); return this_node->details->online; } gboolean determine_online_status(xmlNode * node_state, node_t * this_node, pe_working_set_t * data_set) { gboolean online = FALSE; const char *shutdown = NULL; const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED); if (this_node == NULL) { crm_config_err("No node to check"); return online; } this_node->details->shutdown = FALSE; this_node->details->expected_up = FALSE; shutdown = g_hash_table_lookup(this_node->details->attrs, XML_CIB_ATTR_SHUTDOWN); if (shutdown != NULL && safe_str_neq("0", shutdown)) { this_node->details->shutdown = TRUE; } else if (safe_str_eq(exp_state, CRMD_JOINSTATE_MEMBER)) { this_node->details->expected_up = TRUE; } if (this_node->details->type == node_ping) { this_node->details->unclean = FALSE; online = FALSE; /* As far as resource management is concerned, * the node is safely offline. * Anyone caught abusing this logic will be shot */ } else if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) { online = determine_online_status_no_fencing(data_set, node_state, this_node); } else { online = determine_online_status_fencing(data_set, node_state, this_node); } if (online) { this_node->details->online = TRUE; } else { /* remove node from contention */ this_node->fixed = TRUE; this_node->weight = -INFINITY; } if (online && this_node->details->shutdown) { /* don't run resources here */ this_node->fixed = TRUE; this_node->weight = -INFINITY; } if (this_node->details->type == node_ping) { crm_info("Node %s is not a pacemaker node", this_node->details->uname); } else if (this_node->details->unclean) { pe_proc_warn("Node %s is unclean", this_node->details->uname); } else if (this_node->details->online) { crm_info("Node %s is %s", this_node->details->uname, this_node->details->shutdown ? "shutting down" : this_node->details->pending ? "pending" : this_node->details->standby ? "standby" : this_node->details->maintenance ? "maintenance" : "online"); } else { crm_trace("Node %s is offline", this_node->details->uname); } return online; } char * clone_strip(const char *last_rsc_id) { int lpc = 0; char *zero = NULL; CRM_CHECK(last_rsc_id != NULL, return NULL); lpc = strlen(last_rsc_id); while (--lpc > 0) { switch (last_rsc_id[lpc]) { case 0: crm_err("Empty string: %s", last_rsc_id); return NULL; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': break; case ':': zero = calloc(1, lpc + 1); memcpy(zero, last_rsc_id, lpc); zero[lpc] = 0; return zero; default: goto done; } } done: zero = strdup(last_rsc_id); return zero; } char * clone_zero(const char *last_rsc_id) { int lpc = 0; char *zero = NULL; CRM_CHECK(last_rsc_id != NULL, return NULL); if (last_rsc_id != NULL) { lpc = strlen(last_rsc_id); } while (--lpc > 0) { switch (last_rsc_id[lpc]) { case 0: return NULL; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': break; case ':': zero = calloc(1, lpc + 3); memcpy(zero, last_rsc_id, lpc); zero[lpc] = ':'; zero[lpc + 1] = '0'; zero[lpc + 2] = 0; return zero; default: goto done; } } done: lpc = strlen(last_rsc_id); zero = calloc(1, lpc + 3); memcpy(zero, last_rsc_id, lpc); zero[lpc] = ':'; zero[lpc + 1] = '0'; zero[lpc + 2] = 0; crm_trace("%s -> %s", last_rsc_id, zero); return zero; } static resource_t * create_fake_resource(const char *rsc_id, xmlNode * rsc_entry, pe_working_set_t * data_set) { resource_t *rsc = NULL; xmlNode *xml_rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE); copy_in_properties(xml_rsc, rsc_entry); crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id); crm_log_xml_debug(xml_rsc, "Orphan resource"); if (!common_unpack(xml_rsc, &rsc, NULL, data_set)) { return NULL; } if (xml_contains_remote_node(xml_rsc)) { node_t *node; crm_debug("Detected orphaned remote node %s", rsc_id); rsc->is_remote_node = TRUE; node = pe_find_node(data_set->nodes, rsc_id); if (node == NULL) { node = create_node(rsc_id, rsc_id, "remote", NULL, data_set); } link_rsc2remotenode(data_set, rsc); if (node) { crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id); node->details->shutdown = TRUE; } } if (crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER)) { /* This orphaned rsc needs to be mapped to a container. */ crm_trace("Detected orphaned container filler %s", rsc_id); set_bit(rsc->flags, pe_rsc_orphan_container_filler); } set_bit(rsc->flags, pe_rsc_orphan); data_set->resources = g_list_append(data_set->resources, rsc); return rsc; } extern resource_t *create_child_clone(resource_t * rsc, int sub_id, pe_working_set_t * data_set); static resource_t * find_anonymous_clone(pe_working_set_t * data_set, node_t * node, resource_t * parent, const char *rsc_id) { GListPtr rIter = NULL; resource_t *rsc = NULL; gboolean skip_inactive = FALSE; CRM_ASSERT(parent != NULL); CRM_ASSERT(parent->variant == pe_clone || parent->variant == pe_master); CRM_ASSERT(is_not_set(parent->flags, pe_rsc_unique)); /* Find an instance active (or partially active for grouped clones) on the specified node */ pe_rsc_trace(parent, "Looking for %s on %s in %s", rsc_id, node->details->uname, parent->id); for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) { GListPtr nIter = NULL; GListPtr locations = NULL; resource_t *child = rIter->data; child->fns->location(child, &locations, TRUE); if (locations == NULL) { pe_rsc_trace(child, "Resource %s, skip inactive", child->id); continue; } for (nIter = locations; nIter && rsc == NULL; nIter = nIter->next) { node_t *childnode = nIter->data; if (childnode->details == node->details) { /* ->find_rsc() because we might be a cloned group */ rsc = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone); if(rsc) { pe_rsc_trace(rsc, "Resource %s, active", rsc->id); } } /* Keep this block, it means we'll do the right thing if * anyone toggles the unique flag to 'off' */ if (rsc && rsc->running_on) { crm_notice("/Anonymous/ clone %s is already running on %s", parent->id, node->details->uname); skip_inactive = TRUE; rsc = NULL; } } g_list_free(locations); } /* Find an inactive instance */ if (skip_inactive == FALSE) { pe_rsc_trace(parent, "Looking for %s anywhere", rsc_id); for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) { GListPtr locations = NULL; resource_t *child = rIter->data; if (is_set(child->flags, pe_rsc_block)) { pe_rsc_trace(child, "Skip: blocked in stopped state"); continue; } child->fns->location(child, &locations, TRUE); if (locations == NULL) { /* ->find_rsc() because we might be a cloned group */ rsc = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone); pe_rsc_trace(parent, "Resource %s, empty slot", rsc->id); } g_list_free(locations); } } if (rsc == NULL) { /* Create an extra orphan */ resource_t *top = create_child_clone(parent, -1, data_set); /* ->find_rsc() because we might be a cloned group */ rsc = top->fns->find_rsc(top, rsc_id, NULL, pe_find_clone); CRM_ASSERT(rsc != NULL); pe_rsc_debug(parent, "Created orphan %s for %s: %s on %s", top->id, parent->id, rsc_id, node->details->uname); } if (safe_str_neq(rsc_id, rsc->id)) { pe_rsc_debug(rsc, "Internally renamed %s on %s to %s%s", rsc_id, node->details->uname, rsc->id, is_set(rsc->flags, pe_rsc_orphan) ? " (ORPHAN)" : ""); } return rsc; } static resource_t * unpack_find_resource(pe_working_set_t * data_set, node_t * node, const char *rsc_id, xmlNode * rsc_entry) { resource_t *rsc = NULL; resource_t *parent = NULL; crm_trace("looking for %s", rsc_id); rsc = pe_find_resource(data_set->resources, rsc_id); /* no match */ if (rsc == NULL) { /* Even when clone-max=0, we still create a single :0 orphan to match against */ char *tmp = clone_zero(rsc_id); resource_t *clone0 = pe_find_resource(data_set->resources, tmp); if (clone0 && is_not_set(clone0->flags, pe_rsc_unique)) { rsc = clone0; } else { crm_trace("%s is not known as %s either", rsc_id, tmp); } parent = uber_parent(clone0); free(tmp); crm_trace("%s not found: %s", rsc_id, parent ? parent->id : "orphan"); } else if (rsc->variant > pe_native) { crm_trace("%s is no longer a primitive resource, the lrm_resource entry is obsolete", rsc_id); return NULL; } else { parent = uber_parent(rsc); } if (parent && parent->variant > pe_group) { if (is_not_set(parent->flags, pe_rsc_unique)) { char *base = clone_strip(rsc_id); rsc = find_anonymous_clone(data_set, node, parent, base); CRM_ASSERT(rsc != NULL); free(base); } if (rsc && safe_str_neq(rsc_id, rsc->id)) { free(rsc->clone_name); rsc->clone_name = strdup(rsc_id); } } return rsc; } static resource_t * process_orphan_resource(xmlNode * rsc_entry, node_t * node, pe_working_set_t * data_set) { resource_t *rsc = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); crm_debug("Detected orphan resource %s on %s", rsc_id, node->details->uname); rsc = create_fake_resource(rsc_id, rsc_entry, data_set); if (is_set(data_set->flags, pe_flag_stop_rsc_orphans) == FALSE) { clear_bit(rsc->flags, pe_rsc_managed); } else { print_resource(LOG_DEBUG_3, "Added orphan", rsc, FALSE); CRM_CHECK(rsc != NULL, return NULL); resource_location(rsc, NULL, -INFINITY, "__orphan_dont_run__", data_set); } return rsc; } static void process_rsc_state(resource_t * rsc, node_t * node, enum action_fail_response on_fail, xmlNode * migrate_op, pe_working_set_t * data_set) { node_t *tmpnode = NULL; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s", rsc->id, role2text(rsc->role), node->details->uname, fail2text(on_fail)); /* process current state */ if (rsc->role != RSC_ROLE_UNKNOWN) { resource_t *iter = rsc; while (iter) { if (g_hash_table_lookup(iter->known_on, node->details->id) == NULL) { node_t *n = node_copy(node); pe_rsc_trace(rsc, "%s (aka. %s) known on %s", rsc->id, rsc->clone_name, n->details->uname); g_hash_table_insert(iter->known_on, (gpointer) n->details->id, n); } if (is_set(iter->flags, pe_rsc_unique)) { break; } iter = iter->parent; } } /* If a managed resource is believed to be running, but node is down ... */ if (rsc->role > RSC_ROLE_STOPPED && node->details->online == FALSE && node->details->maintenance == FALSE && is_set(rsc->flags, pe_rsc_managed)) { char *reason = NULL; gboolean should_fence = FALSE; /* If this is a guest node, fence it (regardless of whether fencing is * enabled, because guest node fencing is done by recovery of the * container resource rather than by stonithd). Mark the resource * we're processing as failed. When the guest comes back up, its * operation history in the CIB will be cleared, freeing the affected * resource to run again once we are sure we know its state. */ if (is_container_remote_node(node)) { set_bit(rsc->flags, pe_rsc_failed); should_fence = TRUE; } else if (is_set(data_set->flags, pe_flag_stonith_enabled)) { if (is_baremetal_remote_node(node) && node->details->remote_rsc && is_not_set(node->details->remote_rsc->flags, pe_rsc_failed)) { /* setting unseen = true means that fencing of the remote node will * only occur if the connection resource is not going to start somewhere. * This allows connection resources on a failed cluster-node to move to * another node without requiring the baremetal remote nodes to be fenced * as well. */ node->details->unseen = TRUE; reason = crm_strdup_printf("because %s is active there. Fencing will be revoked if remote-node connection can be re-established on another cluster-node.", rsc->id); } should_fence = TRUE; } if (should_fence) { if (reason == NULL) { reason = crm_strdup_printf("because %s is thought to be active there", rsc->id); } pe_fence_node(data_set, node, reason); } free(reason); } if (node->details->unclean) { /* No extra processing needed * Also allows resources to be started again after a node is shot */ on_fail = action_fail_ignore; } switch (on_fail) { case action_fail_ignore: /* nothing to do */ break; case action_fail_fence: /* treat it as if it is still running * but also mark the node as unclean */ pe_fence_node(data_set, node, "because of resource failure(s)"); break; case action_fail_standby: node->details->standby = TRUE; node->details->standby_onfail = TRUE; break; case action_fail_block: /* is_managed == FALSE will prevent any * actions being sent for the resource */ clear_bit(rsc->flags, pe_rsc_managed); set_bit(rsc->flags, pe_rsc_block); break; case action_fail_migrate: /* make sure it comes up somewhere else * or not at all */ resource_location(rsc, node, -INFINITY, "__action_migration_auto__", data_set); break; case action_fail_stop: rsc->next_role = RSC_ROLE_STOPPED; break; case action_fail_recover: if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { set_bit(rsc->flags, pe_rsc_failed); stop_action(rsc, node, FALSE); } break; case action_fail_restart_container: set_bit(rsc->flags, pe_rsc_failed); if (rsc->container) { stop_action(rsc->container, node, FALSE); } else if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { stop_action(rsc, node, FALSE); } break; case action_fail_reset_remote: set_bit(rsc->flags, pe_rsc_failed); if (is_set(data_set->flags, pe_flag_stonith_enabled)) { tmpnode = NULL; if (rsc->is_remote_node) { tmpnode = pe_find_node(data_set->nodes, rsc->id); } if (tmpnode && is_baremetal_remote_node(tmpnode) && tmpnode->details->remote_was_fenced == 0) { /* connection resource to baremetal resource failed in a way that * should result in fencing the remote-node. */ pe_fence_node(data_set, tmpnode, "because of connection failure(s)"); } } /* require the stop action regardless if fencing is occuring or not. */ if (rsc->role > RSC_ROLE_STOPPED) { stop_action(rsc, node, FALSE); } /* if reconnect delay is in use, prevent the connection from exiting the * "STOPPED" role until the failure is cleared by the delay timeout. */ if (rsc->remote_reconnect_interval) { rsc->next_role = RSC_ROLE_STOPPED; } break; } /* ensure a remote-node connection failure forces an unclean remote-node * to be fenced. By setting unseen = FALSE, the remote-node failure will * result in a fencing operation regardless if we're going to attempt to * reconnect to the remote-node in this transition or not. */ if (is_set(rsc->flags, pe_rsc_failed) && rsc->is_remote_node) { tmpnode = pe_find_node(data_set->nodes, rsc->id); if (tmpnode && tmpnode->details->unclean) { tmpnode->details->unseen = FALSE; } } if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { if (is_set(rsc->flags, pe_rsc_orphan)) { if (is_set(rsc->flags, pe_rsc_managed)) { crm_config_warn("Detected active orphan %s running on %s", rsc->id, node->details->uname); } else { crm_config_warn("Cluster configured not to stop active orphans." " %s must be stopped manually on %s", rsc->id, node->details->uname); } } native_add_running(rsc, node, data_set); if (on_fail != action_fail_ignore) { set_bit(rsc->flags, pe_rsc_failed); } } else if (rsc->clone_name && strchr(rsc->clone_name, ':') != NULL) { /* Only do this for older status sections that included instance numbers * Otherwise stopped instances will appear as orphans */ pe_rsc_trace(rsc, "Resetting clone_name %s for %s (stopped)", rsc->clone_name, rsc->id); free(rsc->clone_name); rsc->clone_name = NULL; } else { char *key = stop_key(rsc); GListPtr possible_matches = find_actions(rsc->actions, key, node); GListPtr gIter = possible_matches; for (; gIter != NULL; gIter = gIter->next) { action_t *stop = (action_t *) gIter->data; stop->flags |= pe_action_optional; } g_list_free(possible_matches); free(key); } } /* create active recurring operations as optional */ static void process_recurring(node_t * node, resource_t * rsc, int start_index, int stop_index, GListPtr sorted_op_list, pe_working_set_t * data_set) { int counter = -1; const char *task = NULL; const char *status = NULL; GListPtr gIter = sorted_op_list; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index); for (; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; int interval = 0; char *key = NULL; const char *id = ID(rsc_op); const char *interval_s = NULL; counter++; if (node->details->online == FALSE) { pe_rsc_trace(rsc, "Skipping %s/%s: node is offline", rsc->id, node->details->uname); break; /* Need to check if there's a monitor for role="Stopped" */ } else if (start_index < stop_index && counter <= stop_index) { pe_rsc_trace(rsc, "Skipping %s/%s: resource is not active", id, node->details->uname); continue; } else if (counter < start_index) { pe_rsc_trace(rsc, "Skipping %s/%s: old %d", id, node->details->uname, counter); continue; } interval_s = crm_element_value(rsc_op, XML_LRM_ATTR_INTERVAL); interval = crm_parse_int(interval_s, "0"); if (interval == 0) { pe_rsc_trace(rsc, "Skipping %s/%s: non-recurring", id, node->details->uname); continue; } status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS); if (safe_str_eq(status, "-1")) { pe_rsc_trace(rsc, "Skipping %s/%s: status", id, node->details->uname); continue; } task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); /* create the action */ key = generate_op_key(rsc->id, task, interval); pe_rsc_trace(rsc, "Creating %s/%s", key, node->details->uname); custom_action(rsc, key, task, node, TRUE, TRUE, data_set); } } void calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index) { int counter = -1; int implied_monitor_start = -1; int implied_master_start = -1; const char *task = NULL; const char *status = NULL; GListPtr gIter = sorted_op_list; *stop_index = -1; *start_index = -1; for (; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; counter++; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS); if (safe_str_eq(task, CRMD_ACTION_STOP) && safe_str_eq(status, "0")) { *stop_index = counter; } else if (safe_str_eq(task, CRMD_ACTION_START) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) { *start_index = counter; } else if ((implied_monitor_start <= *stop_index) && safe_str_eq(task, CRMD_ACTION_STATUS)) { const char *rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC); if (safe_str_eq(rc, "0") || safe_str_eq(rc, "8")) { implied_monitor_start = counter; } } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE) || safe_str_eq(task, CRMD_ACTION_DEMOTE)) { implied_master_start = counter; } } if (*start_index == -1) { if (implied_master_start != -1) { *start_index = implied_master_start; } else if (implied_monitor_start != -1) { *start_index = implied_monitor_start; } } } static resource_t * unpack_lrm_rsc_state(node_t * node, xmlNode * rsc_entry, pe_working_set_t * data_set) { GListPtr gIter = NULL; int stop_index = -1; int start_index = -1; enum rsc_role_e req_role = RSC_ROLE_UNKNOWN; const char *task = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); resource_t *rsc = NULL; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; xmlNode *migrate_op = NULL; xmlNode *rsc_op = NULL; xmlNode *last_failure = NULL; enum action_fail_response on_fail = FALSE; enum rsc_role_e saved_role = RSC_ROLE_UNKNOWN; crm_trace("[%s] Processing %s on %s", crm_element_name(rsc_entry), rsc_id, node->details->uname); /* extract operations */ op_list = NULL; sorted_op_list = NULL; for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next_element(rsc_op)) { if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) { op_list = g_list_prepend(op_list, rsc_op); } } if (op_list == NULL) { /* if there are no operations, there is nothing to do */ return NULL; } /* find the resource */ rsc = unpack_find_resource(data_set, node, rsc_id, rsc_entry); if (rsc == NULL) { rsc = process_orphan_resource(rsc_entry, node, data_set); } CRM_ASSERT(rsc != NULL); /* process operations */ saved_role = rsc->role; on_fail = action_fail_ignore; rsc->role = RSC_ROLE_UNKNOWN; sorted_op_list = g_list_sort(op_list, sort_op_by_callid); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) { migrate_op = rsc_op; } unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail, data_set); } /* create active recurring operations as optional */ calculate_active_ops(sorted_op_list, &start_index, &stop_index); process_recurring(node, rsc, start_index, stop_index, sorted_op_list, data_set); /* no need to free the contents */ g_list_free(sorted_op_list); process_rsc_state(rsc, node, on_fail, migrate_op, data_set); if (get_target_role(rsc, &req_role)) { if (rsc->next_role == RSC_ROLE_UNKNOWN || req_role < rsc->next_role) { pe_rsc_debug(rsc, "%s: Overwriting calculated next role %s" " with requested next role %s", rsc->id, role2text(rsc->next_role), role2text(req_role)); rsc->next_role = req_role; } else if (req_role > rsc->next_role) { pe_rsc_info(rsc, "%s: Not overwriting calculated next role %s" " with requested next role %s", rsc->id, role2text(rsc->next_role), role2text(req_role)); } } if (saved_role > rsc->role) { rsc->role = saved_role; } return rsc; } static void handle_orphaned_container_fillers(xmlNode * lrm_rsc_list, pe_working_set_t * data_set) { xmlNode *rsc_entry = NULL; for (rsc_entry = __xml_first_child(lrm_rsc_list); rsc_entry != NULL; rsc_entry = __xml_next_element(rsc_entry)) { resource_t *rsc; resource_t *container; const char *rsc_id; const char *container_id; if (safe_str_neq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE)) { continue; } container_id = crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER); rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); if (container_id == NULL || rsc_id == NULL) { continue; } container = pe_find_resource(data_set->resources, container_id); if (container == NULL) { continue; } rsc = pe_find_resource(data_set->resources, rsc_id); if (rsc == NULL || is_set(rsc->flags, pe_rsc_orphan_container_filler) == FALSE || rsc->container != NULL) { continue; } pe_rsc_trace(rsc, "Mapped orphaned rsc %s's container to %s", rsc->id, container_id); rsc->container = container; container->fillers = g_list_append(container->fillers, rsc); } } gboolean unpack_lrm_resources(node_t * node, xmlNode * lrm_rsc_list, pe_working_set_t * data_set) { xmlNode *rsc_entry = NULL; gboolean found_orphaned_container_filler = FALSE; GListPtr unexpected_containers = NULL; GListPtr gIter = NULL; resource_t *remote = NULL; CRM_CHECK(node != NULL, return FALSE); crm_trace("Unpacking resources on %s", node->details->uname); for (rsc_entry = __xml_first_child(lrm_rsc_list); rsc_entry != NULL; rsc_entry = __xml_next_element(rsc_entry)) { if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) { resource_t *rsc; rsc = unpack_lrm_rsc_state(node, rsc_entry, data_set); if (!rsc) { continue; } if (is_set(rsc->flags, pe_rsc_orphan_container_filler)) { found_orphaned_container_filler = TRUE; } if (is_set(rsc->flags, pe_rsc_unexpectedly_running)) { remote = rsc_contains_remote_node(data_set, rsc); if (remote) { unexpected_containers = g_list_append(unexpected_containers, remote); } } } } /* If a container resource is unexpectedly up... and the remote-node * connection resource for that container is not up, the entire container * must be recovered. */ for (gIter = unexpected_containers; gIter != NULL; gIter = gIter->next) { remote = (resource_t *) gIter->data; if (remote->role != RSC_ROLE_STARTED) { crm_warn("Recovering container resource %s. Resource is unexpectedly running and involves a remote-node.", remote->container->id); set_bit(remote->container->flags, pe_rsc_failed); } } /* now that all the resource state has been unpacked for this node * we have to go back and map any orphaned container fillers to their * container resource */ if (found_orphaned_container_filler) { handle_orphaned_container_fillers(lrm_rsc_list, data_set); } g_list_free(unexpected_containers); return TRUE; } static void set_active(resource_t * rsc) { resource_t *top = uber_parent(rsc); if (top && top->variant == pe_master) { rsc->role = RSC_ROLE_SLAVE; } else { rsc->role = RSC_ROLE_STARTED; } } static void set_node_score(gpointer key, gpointer value, gpointer user_data) { node_t *node = value; int *score = user_data; node->weight = *score; } #define STATUS_PATH_MAX 1024 static xmlNode * find_lrm_op(const char *resource, const char *op, const char *node, const char *source, pe_working_set_t * data_set) { int offset = 0; char xpath[STATUS_PATH_MAX]; offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//node_state[@uname='%s']", node); offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//" XML_LRM_TAG_RESOURCE "[@id='%s']", resource); /* Need to check against transition_magic too? */ if (source && safe_str_eq(op, CRMD_ACTION_MIGRATE)) { offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_target='%s']", op, source); } else if (source && safe_str_eq(op, CRMD_ACTION_MIGRATED)) { offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_source='%s']", op, source); } else { offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "/" XML_LRM_TAG_RSC_OP "[@operation='%s']", op); } CRM_LOG_ASSERT(offset > 0); return get_xpath_object(xpath, data_set->input, LOG_DEBUG); } static void unpack_rsc_migration(resource_t *rsc, node_t *node, xmlNode *xml_op, pe_working_set_t * data_set) { /* * The normal sequence is (now): migrate_to(Src) -> migrate_from(Tgt) -> stop(Src) * * So if a migrate_to is followed by a stop, then we don't need to care what * happened on the target node * * Without the stop, we need to look for a successful migrate_from. * This would also imply we're no longer running on the source * * Without the stop, and without a migrate_from op we make sure the resource * gets stopped on both source and target (assuming the target is up) * */ int stop_id = 0; int task_id = 0; xmlNode *stop_op = find_lrm_op(rsc->id, CRMD_ACTION_STOP, node->details->id, NULL, data_set); if (stop_op) { crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id); } crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id); if (stop_op == NULL || stop_id < task_id) { int from_rc = 0, from_status = 0; const char *migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); node_t *target = pe_find_node(data_set->nodes, migrate_target); node_t *source = pe_find_node(data_set->nodes, migrate_source); xmlNode *migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, migrate_target, migrate_source, data_set); rsc->role = RSC_ROLE_STARTED; /* can be master? */ if (migrate_from) { crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc); crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, &from_status); pe_rsc_trace(rsc, "%s op on %s exited with status=%d, rc=%d", ID(migrate_from), migrate_target, from_status, from_rc); } if (migrate_from && from_rc == PCMK_OCF_OK && from_status == PCMK_LRM_OP_DONE) { pe_rsc_trace(rsc, "Detected dangling migration op: %s on %s", ID(xml_op), migrate_source); /* all good * just need to arrange for the stop action to get sent * but _without_ affecting the target somehow */ rsc->role = RSC_ROLE_STOPPED; rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node); } else if (migrate_from) { /* Failed */ if (target && target->details->online) { pe_rsc_trace(rsc, "Marking active on %s %p %d", migrate_target, target, target->details->online); native_add_running(rsc, target, data_set); } } else { /* Pending or complete but erased */ if (target && target->details->online) { pe_rsc_trace(rsc, "Marking active on %s %p %d", migrate_target, target, target->details->online); native_add_running(rsc, target, data_set); if (source && source->details->online) { /* If we make it here we have a partial migration. The migrate_to * has completed but the migrate_from on the target has not. Hold on * to the target and source on the resource. Later on if we detect that * the resource is still going to run on that target, we may continue * the migration */ rsc->partial_migration_target = target; rsc->partial_migration_source = source; } } else { /* Consider it failed here - forces a restart, prevents migration */ set_bit(rsc->flags, pe_rsc_failed); clear_bit(rsc->flags, pe_rsc_allow_migrate); } } } } static void unpack_rsc_migration_failure(resource_t *rsc, node_t *node, xmlNode *xml_op, pe_working_set_t * data_set) { const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); CRM_ASSERT(rsc); if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) { int stop_id = 0; int migrate_id = 0; const char *migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); xmlNode *stop_op = find_lrm_op(rsc->id, CRMD_ACTION_STOP, migrate_source, NULL, data_set); xmlNode *migrate_op = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATE, migrate_source, migrate_target, data_set); if (stop_op) { crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id); } if (migrate_op) { crm_element_value_int(migrate_op, XML_LRM_ATTR_CALLID, &migrate_id); } /* Get our state right */ rsc->role = RSC_ROLE_STARTED; /* can be master? */ if (stop_op == NULL || stop_id < migrate_id) { node_t *source = pe_find_node(data_set->nodes, migrate_source); if (source && source->details->online) { native_add_running(rsc, source, data_set); } } } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) { int stop_id = 0; int migrate_id = 0; const char *migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); xmlNode *stop_op = find_lrm_op(rsc->id, CRMD_ACTION_STOP, migrate_target, NULL, data_set); xmlNode *migrate_op = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, migrate_target, migrate_source, data_set); if (stop_op) { crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id); } if (migrate_op) { crm_element_value_int(migrate_op, XML_LRM_ATTR_CALLID, &migrate_id); } /* Get our state right */ rsc->role = RSC_ROLE_STARTED; /* can be master? */ if (stop_op == NULL || stop_id < migrate_id) { node_t *target = pe_find_node(data_set->nodes, migrate_target); pe_rsc_trace(rsc, "Stop: %p %d, Migrated: %p %d", stop_op, stop_id, migrate_op, migrate_id); if (target && target->details->online) { native_add_running(rsc, target, data_set); } } else if (migrate_op == NULL) { /* Make sure it gets cleaned up, the stop may pre-date the migrate_from */ rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node); } } } static void record_failed_op(xmlNode *op, node_t* node, pe_working_set_t * data_set) { xmlNode *xIter = NULL; const char *op_key = crm_element_value(op, XML_LRM_ATTR_TASK_KEY); if (node->details->online == FALSE) { return; } for (xIter = data_set->failed->children; xIter; xIter = xIter->next) { const char *key = crm_element_value(xIter, XML_LRM_ATTR_TASK_KEY); const char *uname = crm_element_value(xIter, XML_ATTR_UNAME); if(safe_str_eq(op_key, key) && safe_str_eq(uname, node->details->uname)) { crm_trace("Skipping duplicate entry %s on %s", op_key, node->details->uname); return; } } crm_trace("Adding entry %s on %s", op_key, node->details->uname); crm_xml_add(op, XML_ATTR_UNAME, node->details->uname); add_node_copy(data_set->failed, op); } static const char *get_op_key(xmlNode *xml_op) { const char *key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY); if(key == NULL) { key = ID(xml_op); } return key; } static void unpack_rsc_op_failure(resource_t * rsc, node_t * node, int rc, xmlNode * xml_op, xmlNode ** last_failure, enum action_fail_response * on_fail, pe_working_set_t * data_set) { int interval = 0; bool is_probe = FALSE; action_t *action = NULL; const char *key = get_op_key(xml_op); const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION); CRM_ASSERT(rsc); *last_failure = xml_op; crm_element_value_int(xml_op, XML_LRM_ATTR_INTERVAL, &interval); if(interval == 0 && safe_str_eq(task, CRMD_ACTION_STATUS)) { is_probe = TRUE; pe_rsc_trace(rsc, "is a probe: %s", key); } if (rc != PCMK_OCF_NOT_INSTALLED || is_set(data_set->flags, pe_flag_symmetric_cluster)) { crm_warn("Processing failed op %s for %s on %s: %s (%d)", task, rsc->id, node->details->uname, services_ocf_exitcode_str(rc), rc); record_failed_op(xml_op, node, data_set); } else { crm_trace("Processing failed op %s for %s on %s: %s (%d)", task, rsc->id, node->details->uname, services_ocf_exitcode_str(rc), rc); } action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set); if ((action->on_fail <= action_fail_fence && *on_fail < action->on_fail) || (action->on_fail == action_fail_reset_remote && *on_fail <= action_fail_recover) || (action->on_fail == action_fail_restart_container && *on_fail <= action_fail_recover) || (*on_fail == action_fail_restart_container && action->on_fail >= action_fail_migrate)) { pe_rsc_trace(rsc, "on-fail %s -> %s for %s (%s)", fail2text(*on_fail), fail2text(action->on_fail), action->uuid, key); *on_fail = action->on_fail; } if (safe_str_eq(task, CRMD_ACTION_STOP)) { resource_location(rsc, node, -INFINITY, "__stop_fail__", data_set); } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) { unpack_rsc_migration_failure(rsc, node, xml_op, data_set); } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) { /* * staying in role=master ends up putting the PE/TE into a loop * setting role=slave is not dangerous because no master will be * promoted until the failed resource has been fully stopped */ rsc->next_role = RSC_ROLE_STOPPED; if (action->on_fail == action_fail_block) { rsc->role = RSC_ROLE_MASTER; } else { crm_warn("Forcing %s to stop after a failed demote action", rsc->id); rsc->role = RSC_ROLE_SLAVE; } } else if (compare_version("2.0", op_version) > 0 && safe_str_eq(task, CRMD_ACTION_START)) { crm_warn("Compatibility handling for failed op %s on %s", key, node->details->uname); resource_location(rsc, node, -INFINITY, "__legacy_start__", data_set); } if(is_probe && rc == PCMK_OCF_NOT_INSTALLED) { /* leave stopped */ pe_rsc_trace(rsc, "Leaving %s stopped", rsc->id); rsc->role = RSC_ROLE_STOPPED; } else if (rsc->role < RSC_ROLE_STARTED) { pe_rsc_trace(rsc, "Setting %s active", rsc->id); set_active(rsc); } pe_rsc_trace(rsc, "Resource %s: role=%s, unclean=%s, on_fail=%s, fail_role=%s", rsc->id, role2text(rsc->role), node->details->unclean ? "true" : "false", fail2text(action->on_fail), role2text(action->fail_role)); if (action->fail_role != RSC_ROLE_STARTED && rsc->next_role < action->fail_role) { rsc->next_role = action->fail_role; } if (action->fail_role == RSC_ROLE_STOPPED) { int score = -INFINITY; resource_t *fail_rsc = rsc; if (fail_rsc->parent) { resource_t *parent = uber_parent(fail_rsc); if ((parent->variant == pe_clone || parent->variant == pe_master) && is_not_set(parent->flags, pe_rsc_unique)) { /* for clone and master resources, if a child fails on an operation * with on-fail = stop, all the resources fail. Do this by preventing * the parent from coming up again. */ fail_rsc = parent; } } crm_warn("Making sure %s doesn't come up again", fail_rsc->id); /* make sure it doesn't come up again */ g_hash_table_destroy(fail_rsc->allowed_nodes); fail_rsc->allowed_nodes = node_hash_from_list(data_set->nodes); g_hash_table_foreach(fail_rsc->allowed_nodes, set_node_score, &score); } pe_free_action(action); } static int determine_op_status( resource_t *rsc, int rc, int target_rc, node_t * node, xmlNode * xml_op, enum action_fail_response * on_fail, pe_working_set_t * data_set) { int interval = 0; int result = PCMK_LRM_OP_DONE; const char *key = get_op_key(xml_op); const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); bool is_probe = FALSE; CRM_ASSERT(rsc); crm_element_value_int(xml_op, XML_LRM_ATTR_INTERVAL, &interval); if (interval == 0 && safe_str_eq(task, CRMD_ACTION_STATUS)) { is_probe = TRUE; } if (target_rc >= 0 && target_rc != rc) { result = PCMK_LRM_OP_ERROR; pe_rsc_debug(rsc, "%s on %s returned '%s' (%d) instead of the expected value: '%s' (%d)", key, node->details->uname, services_ocf_exitcode_str(rc), rc, services_ocf_exitcode_str(target_rc), target_rc); } /* we could clean this up significantly except for old LRMs and CRMs that * didn't include target_rc and liked to remap status */ switch (rc) { case PCMK_OCF_OK: if (is_probe && target_rc == 7) { result = PCMK_LRM_OP_DONE; set_bit(rsc->flags, pe_rsc_unexpectedly_running); pe_rsc_info(rsc, "Operation %s found resource %s active on %s", task, rsc->id, node->details->uname); /* legacy code for pre-0.6.5 operations */ } else if (target_rc < 0 && interval > 0 && rsc->role == RSC_ROLE_MASTER) { /* catch status ops that return 0 instead of 8 while they * are supposed to be in master mode */ result = PCMK_LRM_OP_ERROR; } break; case PCMK_OCF_NOT_RUNNING: if (is_probe || target_rc == rc || is_not_set(rsc->flags, pe_rsc_managed)) { result = PCMK_LRM_OP_DONE; rsc->role = RSC_ROLE_STOPPED; /* clear any previous failure actions */ *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; } else if (safe_str_neq(task, CRMD_ACTION_STOP)) { result = PCMK_LRM_OP_ERROR; } break; case PCMK_OCF_RUNNING_MASTER: if (is_probe) { result = PCMK_LRM_OP_DONE; pe_rsc_info(rsc, "Operation %s found resource %s active in master mode on %s", task, rsc->id, node->details->uname); } else if (target_rc == rc) { /* nothing to do */ } else if (target_rc >= 0) { result = PCMK_LRM_OP_ERROR; /* legacy code for pre-0.6.5 operations */ } else if (safe_str_neq(task, CRMD_ACTION_STATUS) || rsc->role != RSC_ROLE_MASTER) { result = PCMK_LRM_OP_ERROR; if (rsc->role != RSC_ROLE_MASTER) { crm_err("%s reported %s in master mode on %s", key, rsc->id, node->details->uname); } } rsc->role = RSC_ROLE_MASTER; break; case PCMK_OCF_DEGRADED_MASTER: case PCMK_OCF_FAILED_MASTER: rsc->role = RSC_ROLE_MASTER; result = PCMK_LRM_OP_ERROR; break; case PCMK_OCF_NOT_CONFIGURED: result = PCMK_LRM_OP_ERROR_FATAL; break; case PCMK_OCF_NOT_INSTALLED: case PCMK_OCF_INVALID_PARAM: case PCMK_OCF_INSUFFICIENT_PRIV: case PCMK_OCF_UNIMPLEMENT_FEATURE: if (rc == PCMK_OCF_UNIMPLEMENT_FEATURE && interval > 0) { result = PCMK_LRM_OP_NOTSUPPORTED; break; } else if(pe_can_fence(data_set, node) == FALSE && safe_str_eq(task, CRMD_ACTION_STOP)) { /* If a stop fails and we can't fence, there's nothing else we can do */ pe_proc_err("No further recovery can be attempted for %s: %s action failed with '%s' (%d)", rsc->id, task, services_ocf_exitcode_str(rc), rc); clear_bit(rsc->flags, pe_rsc_managed); set_bit(rsc->flags, pe_rsc_block); } result = PCMK_LRM_OP_ERROR_HARD; break; default: if (result == PCMK_LRM_OP_DONE) { crm_info("Treating %s (rc=%d) on %s as an ERROR", key, rc, node->details->uname); result = PCMK_LRM_OP_ERROR; } } return result; } static bool check_operation_expiry(resource_t *rsc, node_t *node, int rc, xmlNode *xml_op, pe_working_set_t * data_set) { bool expired = FALSE; time_t last_failure = 0; int interval = 0; int failure_timeout = rsc->failure_timeout; const char *key = get_op_key(xml_op); const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *clear_reason = NULL; /* clearing recurring monitor operation failures automatically * needs to be carefully considered */ if (safe_str_eq(crm_element_value(xml_op, XML_LRM_ATTR_TASK), "monitor") && safe_str_neq(crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL), "0")) { /* TODO, in the future we should consider not clearing recurring monitor * op failures unless the last action for a resource was a "stop" action. * otherwise it is possible that clearing the monitor failure will result * in the resource being in an undeterministic state. * * For now we handle this potential undeterministic condition for remote * node connection resources by not clearing a recurring monitor op failure * until after the node has been fenced. */ if (is_set(data_set->flags, pe_flag_stonith_enabled) && (rsc->remote_reconnect_interval)) { node_t *remote_node = pe_find_node(data_set->nodes, rsc->id); if (remote_node && remote_node->details->remote_was_fenced == 0) { if (strstr(ID(xml_op), "last_failure")) { crm_info("Waiting to clear monitor failure for remote node %s until fencing has occurred", rsc->id); } /* disabling failure timeout for this operation because we believe * fencing of the remote node should occur first. */ failure_timeout = 0; } } } if (failure_timeout > 0) { int last_run = 0; if (crm_element_value_int(xml_op, XML_RSC_OP_LAST_CHANGE, &last_run) == 0) { time_t now = get_effective_time(data_set); if (now > (last_run + failure_timeout)) { expired = TRUE; } } } if (expired) { if (failure_timeout > 0) { int fc = get_failcount_full(node, rsc, &last_failure, FALSE, xml_op, data_set); if(fc) { if (get_failcount_full(node, rsc, &last_failure, TRUE, xml_op, data_set) == 0) { clear_reason = "it expired"; } else { expired = FALSE; } } else if (rsc->remote_reconnect_interval && strstr(ID(xml_op), "last_failure")) { /* always clear last failure when reconnect interval is set */ clear_reason = "reconnect interval is set"; } } } else if (strstr(ID(xml_op), "last_failure") && ((strcmp(task, "start") == 0) || (strcmp(task, "monitor") == 0))) { op_digest_cache_t *digest_data = NULL; digest_data = rsc_action_digest_cmp(rsc, xml_op, node, data_set); if (digest_data->rc == RSC_DIGEST_UNKNOWN) { crm_trace("rsc op %s/%s on node %s does not have a op digest to compare against", rsc->id, key, node->details->id); } else if (digest_data->rc != RSC_DIGEST_MATCH) { clear_reason = "resource parameters have changed"; } } if (clear_reason != NULL) { char *key = generate_op_key(rsc->id, CRM_OP_CLEAR_FAILCOUNT, 0); action_t *clear_op = custom_action(rsc, key, CRM_OP_CLEAR_FAILCOUNT, node, FALSE, TRUE, data_set); add_hash_param(clear_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); crm_notice("Clearing failure of %s on %s because %s " CRM_XS " %s", rsc->id, node->details->uname, clear_reason, clear_op->uuid); } crm_element_value_int(xml_op, XML_LRM_ATTR_INTERVAL, &interval); if(expired && interval == 0 && safe_str_eq(task, CRMD_ACTION_STATUS)) { switch(rc) { case PCMK_OCF_OK: case PCMK_OCF_NOT_RUNNING: case PCMK_OCF_RUNNING_MASTER: case PCMK_OCF_DEGRADED: case PCMK_OCF_DEGRADED_MASTER: /* Don't expire probes that return these values */ expired = FALSE; break; } } return expired; } int get_target_rc(xmlNode *xml_op) { int dummy = 0; int target_rc = 0; char *dummy_string = NULL; const char *key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY); if (key == NULL) { return -1; } decode_transition_key(key, &dummy_string, &dummy, &dummy, &target_rc); free(dummy_string); return target_rc; } static enum action_fail_response get_action_on_fail(resource_t *rsc, const char *key, const char *task, pe_working_set_t * data_set) { int result = action_fail_recover; action_t *action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set); result = action->on_fail; pe_free_action(action); return result; } static void update_resource_state(resource_t * rsc, node_t * node, xmlNode * xml_op, const char * task, int rc, xmlNode * last_failure, enum action_fail_response * on_fail, pe_working_set_t * data_set) { gboolean clear_past_failure = FALSE; CRM_ASSERT(rsc); CRM_ASSERT(xml_op); if (rc == PCMK_OCF_NOT_RUNNING) { clear_past_failure = TRUE; } else if (rc == PCMK_OCF_NOT_INSTALLED) { rsc->role = RSC_ROLE_STOPPED; } else if (safe_str_eq(task, CRMD_ACTION_STATUS)) { if (last_failure) { const char *op_key = get_op_key(xml_op); const char *last_failure_key = get_op_key(last_failure); if (safe_str_eq(op_key, last_failure_key)) { clear_past_failure = TRUE; } } if (rsc->role < RSC_ROLE_STARTED) { set_active(rsc); } } else if (safe_str_eq(task, CRMD_ACTION_START)) { rsc->role = RSC_ROLE_STARTED; clear_past_failure = TRUE; } else if (safe_str_eq(task, CRMD_ACTION_STOP)) { rsc->role = RSC_ROLE_STOPPED; clear_past_failure = TRUE; } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; clear_past_failure = TRUE; } else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) { /* Demote from Master does not clear an error */ rsc->role = RSC_ROLE_SLAVE; } else if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) { rsc->role = RSC_ROLE_STARTED; clear_past_failure = TRUE; } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) { unpack_rsc_migration(rsc, node, xml_op, data_set); } else if (rsc->role < RSC_ROLE_STARTED) { pe_rsc_trace(rsc, "%s active on %s", rsc->id, node->details->uname); set_active(rsc); } /* clear any previous failure actions */ if (clear_past_failure) { switch (*on_fail) { case action_fail_stop: case action_fail_fence: case action_fail_migrate: case action_fail_standby: pe_rsc_trace(rsc, "%s.%s is not cleared by a completed stop", rsc->id, fail2text(*on_fail)); break; case action_fail_block: case action_fail_ignore: case action_fail_recover: case action_fail_restart_container: *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; break; case action_fail_reset_remote: if (rsc->remote_reconnect_interval == 0) { /* when reconnect delay is not in use, the connection is allowed * to start again after the remote node is fenced and completely * stopped. Otherwise, with reconnect delay we wait for the failure * to be cleared entirely before reconnected can be attempted. */ *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; } break; } } } gboolean unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, xmlNode ** last_failure, enum action_fail_response * on_fail, pe_working_set_t * data_set) { int task_id = 0; const char *key = NULL; const char *task = NULL; const char *task_key = NULL; int rc = 0; int status = PCMK_LRM_OP_PENDING-1; int target_rc = get_target_rc(xml_op); int interval = 0; gboolean expired = FALSE; resource_t *parent = rsc; enum action_fail_response failure_strategy = action_fail_recover; CRM_CHECK(rsc != NULL, return FALSE); CRM_CHECK(node != NULL, return FALSE); CRM_CHECK(xml_op != NULL, return FALSE); task_key = get_op_key(xml_op); task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY); crm_element_value_int(xml_op, XML_LRM_ATTR_RC, &rc); crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id); crm_element_value_int(xml_op, XML_LRM_ATTR_OPSTATUS, &status); crm_element_value_int(xml_op, XML_LRM_ATTR_INTERVAL, &interval); CRM_CHECK(task != NULL, return FALSE); CRM_CHECK(status <= PCMK_LRM_OP_NOT_INSTALLED, return FALSE); CRM_CHECK(status >= PCMK_LRM_OP_PENDING, return FALSE); if (safe_str_eq(task, CRMD_ACTION_NOTIFY)) { /* safe to ignore these */ return TRUE; } if (is_not_set(rsc->flags, pe_rsc_unique)) { parent = uber_parent(rsc); } pe_rsc_trace(rsc, "Unpacking task %s/%s (call_id=%d, status=%d, rc=%d) on %s (role=%s)", task_key, task, task_id, status, rc, node->details->uname, role2text(rsc->role)); if (node->details->unclean) { pe_rsc_trace(rsc, "Node %s (where %s is running) is unclean." " Further action depends on the value of the stop's on-fail attribute", node->details->uname, rsc->id); } if (status == PCMK_LRM_OP_ERROR) { /* Older versions set this if rc != 0 but it's up to us to decide */ status = PCMK_LRM_OP_DONE; } if(status != PCMK_LRM_OP_NOT_INSTALLED) { expired = check_operation_expiry(rsc, node, rc, xml_op, data_set); } /* Degraded results are informational only, re-map them to their error-free equivalents */ if (rc == PCMK_OCF_DEGRADED && safe_str_eq(task, CRMD_ACTION_STATUS)) { rc = PCMK_OCF_OK; /* Add them to the failed list to highlight them for the user */ if ((node->details->shutdown == FALSE) || (node->details->online == TRUE)) { crm_trace("Remapping %d to %d", PCMK_OCF_DEGRADED, PCMK_OCF_OK); record_failed_op(xml_op, node, data_set); } } else if (rc == PCMK_OCF_DEGRADED_MASTER && safe_str_eq(task, CRMD_ACTION_STATUS)) { rc = PCMK_OCF_RUNNING_MASTER; /* Add them to the failed list to highlight them for the user */ if ((node->details->shutdown == FALSE) || (node->details->online == TRUE)) { crm_trace("Remapping %d to %d", PCMK_OCF_DEGRADED_MASTER, PCMK_OCF_RUNNING_MASTER); record_failed_op(xml_op, node, data_set); } } if (expired && target_rc != rc) { const char *magic = crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC); pe_rsc_debug(rsc, "Expired operation '%s' on %s returned '%s' (%d) instead of the expected value: '%s' (%d)", key, node->details->uname, services_ocf_exitcode_str(rc), rc, services_ocf_exitcode_str(target_rc), target_rc); if(interval == 0) { crm_notice("Ignoring expired calculated failure %s (rc=%d, magic=%s) on %s", task_key, rc, magic, node->details->uname); goto done; } else if(node->details->online && node->details->unclean == FALSE) { crm_notice("Re-initiated expired calculated failure %s (rc=%d, magic=%s) on %s", task_key, rc, magic, node->details->uname); /* This is SO horrible, but we don't have access to CancelXmlOp() yet */ crm_xml_add(xml_op, XML_LRM_ATTR_RESTART_DIGEST, "calculated-failure-timeout"); goto done; } } if(status == PCMK_LRM_OP_DONE || status == PCMK_LRM_OP_ERROR) { status = determine_op_status(rsc, rc, target_rc, node, xml_op, on_fail, data_set); } pe_rsc_trace(rsc, "Handling status: %d", status); switch (status) { case PCMK_LRM_OP_CANCELLED: /* do nothing?? */ pe_err("Don't know what to do for cancelled ops yet"); break; case PCMK_LRM_OP_PENDING: if (safe_str_eq(task, CRMD_ACTION_START)) { set_bit(rsc->flags, pe_rsc_start_pending); set_active(rsc); } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE) && node->details->unclean) { /* If a pending migrate_to action is out on a unclean node, * we have to force the stop action on the target. */ const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); node_t *target = pe_find_node(data_set->nodes, migrate_target); if (target) { stop_action(rsc, target, FALSE); } } if (rsc->pending_task == NULL) { if (safe_str_eq(task, CRMD_ACTION_STATUS) && interval == 0) { /* Pending probes are not printed, even if pending * operations are requested. If someone ever requests that * behavior, uncomment this and the corresponding part of * native.c:native_pending_task(). */ /*rsc->pending_task = strdup("probe");*/ } else { rsc->pending_task = strdup(task); } } break; case PCMK_LRM_OP_DONE: pe_rsc_trace(rsc, "%s/%s completed on %s", rsc->id, task, node->details->uname); update_resource_state(rsc, node, xml_op, task, rc, *last_failure, on_fail, data_set); break; case PCMK_LRM_OP_NOT_INSTALLED: failure_strategy = get_action_on_fail(rsc, task_key, task, data_set); if (failure_strategy == action_fail_ignore) { crm_warn("Cannot ignore failed %s (status=%d, rc=%d) on %s: " "Resource agent doesn't exist", task_key, status, rc, node->details->uname); /* Also for printing it as "FAILED" by marking it as pe_rsc_failed later */ *on_fail = action_fail_migrate; } resource_location(parent, node, -INFINITY, "hard-error", data_set); unpack_rsc_op_failure(rsc, node, rc, xml_op, last_failure, on_fail, data_set); break; case PCMK_LRM_OP_ERROR: case PCMK_LRM_OP_ERROR_HARD: case PCMK_LRM_OP_ERROR_FATAL: case PCMK_LRM_OP_TIMEOUT: case PCMK_LRM_OP_NOTSUPPORTED: failure_strategy = get_action_on_fail(rsc, task_key, task, data_set); if ((failure_strategy == action_fail_ignore) || (failure_strategy == action_fail_restart_container && safe_str_eq(task, CRMD_ACTION_STOP))) { crm_warn("Pretending the failure of %s (rc=%d) on %s succeeded", task_key, rc, node->details->uname); update_resource_state(rsc, node, xml_op, task, target_rc, *last_failure, on_fail, data_set); crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname); set_bit(rsc->flags, pe_rsc_failure_ignored); record_failed_op(xml_op, node, data_set); if (failure_strategy == action_fail_restart_container && *on_fail <= action_fail_recover) { *on_fail = failure_strategy; } } else { unpack_rsc_op_failure(rsc, node, rc, xml_op, last_failure, on_fail, data_set); if(status == PCMK_LRM_OP_ERROR_HARD) { do_crm_log(rc != PCMK_OCF_NOT_INSTALLED?LOG_ERR:LOG_NOTICE, "Preventing %s from re-starting on %s: operation %s failed '%s' (%d)", parent->id, node->details->uname, task, services_ocf_exitcode_str(rc), rc); resource_location(parent, node, -INFINITY, "hard-error", data_set); } else if(status == PCMK_LRM_OP_ERROR_FATAL) { crm_err("Preventing %s from re-starting anywhere: operation %s failed '%s' (%d)", parent->id, task, services_ocf_exitcode_str(rc), rc); resource_location(parent, NULL, -INFINITY, "fatal-error", data_set); } } break; } done: pe_rsc_trace(rsc, "Resource %s after %s: role=%s", rsc->id, task, role2text(rsc->role)); return TRUE; } gboolean add_node_attrs(xmlNode * xml_obj, node_t * node, gboolean overwrite, pe_working_set_t * data_set) { const char *cluster_name = NULL; g_hash_table_insert(node->details->attrs, strdup("#uname"), strdup(node->details->uname)); g_hash_table_insert(node->details->attrs, strdup("#" XML_ATTR_ID), strdup(node->details->id)); if (safe_str_eq(node->details->id, data_set->dc_uuid)) { data_set->dc_node = node; node->details->is_dc = TRUE; g_hash_table_insert(node->details->attrs, strdup("#" XML_ATTR_DC), strdup(XML_BOOLEAN_TRUE)); } else { g_hash_table_insert(node->details->attrs, strdup("#" XML_ATTR_DC), strdup(XML_BOOLEAN_FALSE)); } cluster_name = g_hash_table_lookup(data_set->config_hash, "cluster-name"); if (cluster_name) { g_hash_table_insert(node->details->attrs, strdup("#cluster-name"), strdup(cluster_name)); } unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_ATTR_SETS, NULL, node->details->attrs, NULL, overwrite, data_set->now); if (g_hash_table_lookup(node->details->attrs, "#site-name") == NULL) { const char *site_name = g_hash_table_lookup(node->details->attrs, "site-name"); if (site_name) { /* Prefix '#' to the key */ g_hash_table_insert(node->details->attrs, strdup("#site-name"), strdup(site_name)); } else if (cluster_name) { /* Default to cluster-name if unset */ g_hash_table_insert(node->details->attrs, strdup("#site-name"), strdup(cluster_name)); } } return TRUE; } static GListPtr extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter) { int counter = -1; int stop_index = -1; int start_index = -1; xmlNode *rsc_op = NULL; GListPtr gIter = NULL; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; /* extract operations */ op_list = NULL; sorted_op_list = NULL; for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next_element(rsc_op)) { if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) { crm_xml_add(rsc_op, "resource", rsc); crm_xml_add(rsc_op, XML_ATTR_UNAME, node); op_list = g_list_prepend(op_list, rsc_op); } } if (op_list == NULL) { /* if there are no operations, there is nothing to do */ return NULL; } sorted_op_list = g_list_sort(op_list, sort_op_by_callid); /* create active recurring operations as optional */ if (active_filter == FALSE) { return sorted_op_list; } op_list = NULL; calculate_active_ops(sorted_op_list, &start_index, &stop_index); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; counter++; if (start_index < stop_index) { crm_trace("Skipping %s: not active", ID(rsc_entry)); break; } else if (counter < start_index) { crm_trace("Skipping %s: old", ID(rsc_op)); continue; } op_list = g_list_append(op_list, rsc_op); } g_list_free(sorted_op_list); return op_list; } GListPtr find_operations(const char *rsc, const char *node, gboolean active_filter, pe_working_set_t * data_set) { GListPtr output = NULL; GListPtr intermediate = NULL; xmlNode *tmp = NULL; xmlNode *status = find_xml_node(data_set->input, XML_CIB_TAG_STATUS, TRUE); node_t *this_node = NULL; xmlNode *node_state = NULL; for (node_state = __xml_first_child(status); node_state != NULL; node_state = __xml_next_element(node_state)) { if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) { const char *uname = crm_element_value(node_state, XML_ATTR_UNAME); if (node != NULL && safe_str_neq(uname, node)) { continue; } this_node = pe_find_node(data_set->nodes, uname); if(this_node == NULL) { CRM_LOG_ASSERT(this_node != NULL); continue; } else if (is_remote_node(this_node)) { determine_remote_online_status(data_set, this_node); } else { determine_online_status(node_state, this_node, data_set); } if (this_node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) { /* offline nodes run no resources... * unless stonith is enabled in which case we need to * make sure rsc start events happen after the stonith */ xmlNode *lrm_rsc = NULL; tmp = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); tmp = find_xml_node(tmp, XML_LRM_TAG_RESOURCES, FALSE); for (lrm_rsc = __xml_first_child(tmp); lrm_rsc != NULL; lrm_rsc = __xml_next_element(lrm_rsc)) { if (crm_str_eq((const char *)lrm_rsc->name, XML_LRM_TAG_RESOURCE, TRUE)) { const char *rsc_id = crm_element_value(lrm_rsc, XML_ATTR_ID); if (rsc != NULL && safe_str_neq(rsc_id, rsc)) { continue; } intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter); output = g_list_concat(output, intermediate); } } } } } return output; } diff --git a/lib/services/services.c b/lib/services/services.c index 0b535e69d5..679f8e78bb 100644 --- a/lib/services/services.c +++ b/lib/services/services.c @@ -1,842 +1,941 @@ /* * Copyright (C) 2010-2016 Andrew Beekhof * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include #include #include #include "services_private.h" #if SUPPORT_UPSTART # include #endif #if SUPPORT_SYSTEMD # include #endif /* TODO: Develop a rollover strategy */ static int operations = 0; static GHashTable *recurring_actions = NULL; /* ops waiting to run async because of conflicting active * pending ops */ static GList *blocked_ops = NULL; /* ops currently active (in-flight) */ -GList *inflight_ops = NULL; +static GList *inflight_ops = NULL; + +static void handle_blocked_ops(void); svc_action_t * services_action_create(const char *name, const char *action, int interval, int timeout) { - return resources_action_create(name, "lsb", NULL, name, action, interval, timeout, NULL, 0); + return resources_action_create(name, PCMK_RESOURCE_CLASS_LSB, NULL, name, + action, interval, timeout, NULL, 0); } const char * resources_find_service_class(const char *agent) { /* Priority is: * - lsb * - systemd * - upstart */ int rc = 0; struct stat st; char *path = NULL; #ifdef LSB_ROOT_DIR rc = asprintf(&path, "%s/%s", LSB_ROOT_DIR, agent); if (rc > 0 && stat(path, &st) == 0) { free(path); - return "lsb"; + return PCMK_RESOURCE_CLASS_LSB; } free(path); #endif #if SUPPORT_SYSTEMD if (systemd_unit_exists(agent)) { - return "systemd"; + return PCMK_RESOURCE_CLASS_SYSTEMD; } #endif #if SUPPORT_UPSTART if (upstart_job_exists(agent)) { - return "upstart"; + return PCMK_RESOURCE_CLASS_UPSTART; } #endif return NULL; } +static inline void +init_recurring_actions(void) +{ + if (recurring_actions == NULL) { + recurring_actions = g_hash_table_new_full(g_str_hash, g_str_equal, NULL, + NULL); + } +} + +/*! + * \internal + * \brief Check whether op is in-flight systemd or upstart op + * + * \param[in] op Operation to check + * + * \return TRUE if op is in-flight systemd or upstart op + */ +static inline gboolean +inflight_systemd_or_upstart(svc_action_t *op) +{ + return (safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_SYSTEMD) + || safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_UPSTART)) + && (g_list_find(inflight_ops, op) != NULL); +} + +/*! + * \internal + * \brief Expand "service" alias to an actual resource class + * + * \param[in] rsc Resource name (for logging only) + * \param[in] standard Resource class as configured + * \param[in] agent Agent name to look for + * + * \return Newly allocated string with actual resource class + * + * \note The caller is responsible for calling free() on the result. + */ +static char * +expand_resource_class(const char *rsc, const char *standard, const char *agent) +{ + char *expanded_class = NULL; + + if (strcasecmp(standard, PCMK_RESOURCE_CLASS_SERVICE) == 0) { + const char *found_class = resources_find_service_class(agent); + + if (found_class) { + crm_debug("Found %s agent %s for %s", found_class, agent, rsc); + expanded_class = strdup(found_class); + } else { + crm_info("Assuming resource class lsb for agent %s for %s", + agent, rsc); + expanded_class = strdup(PCMK_RESOURCE_CLASS_LSB); + } + } else { + expanded_class = strdup(standard); + } + CRM_ASSERT(expanded_class); + return expanded_class; +} svc_action_t * resources_action_create(const char *name, const char *standard, const char *provider, const char *agent, const char *action, int interval, int timeout, GHashTable * params, enum svc_action_flags flags) { svc_action_t *op = NULL; /* * Do some up front sanity checks before we go off and * build the svc_action_t instance. */ if (crm_strlen_zero(name)) { - crm_err("A service or resource action must have a name."); + crm_err("Cannot create operation without resource name"); goto return_error; } if (crm_strlen_zero(standard)) { - crm_err("A service action must have a valid standard."); + crm_err("Cannot create operation for %s without resource class", name); goto return_error; } - if (!strcasecmp(standard, "ocf") && crm_strlen_zero(provider)) { - crm_err("An OCF resource action must have a provider."); + if (!strcasecmp(standard, PCMK_RESOURCE_CLASS_OCF) + && crm_strlen_zero(provider)) { + crm_err("Cannot create OCF operation for %s without provider", name); goto return_error; } if (crm_strlen_zero(agent)) { - crm_err("A service or resource action must have an agent."); + crm_err("Cannot create operation for %s without agent name", name); goto return_error; } if (crm_strlen_zero(action)) { - crm_err("A service or resource action must specify an action."); + crm_err("Cannot create operation for %s without operation name", name); goto return_error; } - if (safe_str_eq(action, "monitor") && ( -#if SUPPORT_HEARTBEAT - safe_str_eq(standard, "heartbeat") || -#endif - safe_str_eq(standard, "lsb") || safe_str_eq(standard, "service"))) { - action = "status"; - } - /* * Sanity checks passed, proceed! */ op = calloc(1, sizeof(svc_action_t)); op->opaque = calloc(1, sizeof(svc_action_private_t)); op->rsc = strdup(name); - op->action = strdup(action); op->interval = interval; op->timeout = timeout; - op->standard = strdup(standard); + op->standard = expand_resource_class(name, standard, agent); op->agent = strdup(agent); op->sequence = ++operations; op->flags = flags; + op->id = generate_op_key(name, action, interval); - if (asprintf(&op->id, "%s_%s_%d", name, action, interval) == -1) { - goto return_error; - } - - if (strcasecmp(op->standard, "service") == 0) { - const char *expanded = resources_find_service_class(op->agent); - - if(expanded) { - crm_debug("Found a %s agent for %s/%s", expanded, op->rsc, op->agent); - free(op->standard); - op->standard = strdup(expanded); - - } else { - crm_info("Cannot determine the standard for %s (%s)", op->rsc, op->agent); - free(op->standard); - op->standard = strdup("lsb"); - } - CRM_ASSERT(op->standard); + if (safe_str_eq(action, "monitor") && ( +#if SUPPORT_HEARTBEAT + safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_HB) || +#endif + safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_LSB))) { + action = "status"; } + op->action = strdup(action); - if (strcasecmp(op->standard, "ocf") == 0) { + if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_OCF) == 0) { op->provider = strdup(provider); op->params = params; params = NULL; if (asprintf(&op->opaque->exec, "%s/resource.d/%s/%s", OCF_ROOT_DIR, provider, agent) == -1) { crm_err("Internal error: cannot create agent path"); goto return_error; } op->opaque->args[0] = strdup(op->opaque->exec); op->opaque->args[1] = strdup(action); - } else if (strcasecmp(op->standard, "lsb") == 0) { + } else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_LSB) == 0) { if (op->agent[0] == '/') { /* if given an absolute path, use that instead * of tacking on the LSB_ROOT_DIR path to the front */ op->opaque->exec = strdup(op->agent); } else if (asprintf(&op->opaque->exec, "%s/%s", LSB_ROOT_DIR, op->agent) == -1) { crm_err("Internal error: cannot create agent path"); goto return_error; } op->opaque->args[0] = strdup(op->opaque->exec); op->opaque->args[1] = strdup(op->action); op->opaque->args[2] = NULL; #if SUPPORT_HEARTBEAT - } else if (strcasecmp(op->standard, "heartbeat") == 0) { + } else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_HB) == 0) { int index; int param_num; char buf_tmp[20]; void *value_tmp; if (op->agent[0] == '/') { /* if given an absolute path, use that instead * of tacking on the HB_RA_DIR path to the front */ op->opaque->exec = strdup(op->agent); } else if (asprintf(&op->opaque->exec, "%s/%s", HB_RA_DIR, op->agent) == -1) { crm_err("Internal error: cannot create agent path"); goto return_error; } op->opaque->args[0] = strdup(op->opaque->exec); /* The "heartbeat" agent class only has positional arguments, * which we keyed by their decimal position number. */ param_num = 1; - for (index = 1; index <= MAX_ARGC - 3; index++ ) { + for (index = 1; index <= MAX_ARGC - 3; index++ ) { snprintf(buf_tmp, sizeof(buf_tmp), "%d", index); value_tmp = g_hash_table_lookup(params, buf_tmp); if (value_tmp == NULL) { /* maybe: strdup("") ?? * But the old lrmd did simply continue as well. */ continue; } op->opaque->args[param_num++] = strdup(value_tmp); } - /* Add operation code as the last argument, */ - /* and the teminating NULL pointer */ + /* Add operation code as the last argument, */ + /* and the teminating NULL pointer */ op->opaque->args[param_num++] = strdup(op->action); op->opaque->args[param_num] = NULL; #endif #if SUPPORT_SYSTEMD - } else if (strcasecmp(op->standard, "systemd") == 0) { + } else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_SYSTEMD) == 0) { op->opaque->exec = strdup("systemd-dbus"); #endif #if SUPPORT_UPSTART - } else if (strcasecmp(op->standard, "upstart") == 0) { + } else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_UPSTART) == 0) { op->opaque->exec = strdup("upstart-dbus"); #endif - } else if (strcasecmp(op->standard, "service") == 0) { - op->opaque->exec = strdup(SERVICE_SCRIPT); - op->opaque->args[0] = strdup(SERVICE_SCRIPT); - op->opaque->args[1] = strdup(agent); - op->opaque->args[2] = strdup(action); - #if SUPPORT_NAGIOS - } else if (strcasecmp(op->standard, "nagios") == 0) { + } else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_NAGIOS) == 0) { int index = 0; if (op->agent[0] == '/') { /* if given an absolute path, use that instead * of tacking on the NAGIOS_PLUGIN_DIR path to the front */ op->opaque->exec = strdup(op->agent); } else if (asprintf(&op->opaque->exec, "%s/%s", NAGIOS_PLUGIN_DIR, op->agent) == -1) { crm_err("Internal error: cannot create agent path"); goto return_error; } op->opaque->args[0] = strdup(op->opaque->exec); index = 1; if (safe_str_eq(op->action, "monitor") && op->interval == 0) { /* Invoke --version for a nagios probe */ op->opaque->args[index] = strdup("--version"); index++; } else if (params) { GHashTableIter iter; char *key = NULL; char *value = NULL; static int args_size = sizeof(op->opaque->args) / sizeof(char *); g_hash_table_iter_init(&iter, params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value) && index <= args_size - 3) { int len = 3; char *long_opt = NULL; if (safe_str_eq(key, XML_ATTR_CRM_VERSION) || strstr(key, CRM_META "_")) { continue; } len += strlen(key); long_opt = calloc(1, len); sprintf(long_opt, "--%s", key); long_opt[len - 1] = 0; op->opaque->args[index] = long_opt; op->opaque->args[index + 1] = strdup(value); index += 2; } } op->opaque->args[index] = NULL; #endif - } else { crm_err("Unknown resource standard: %s", op->standard); services_action_free(op); op = NULL; } if(params) { g_hash_table_destroy(params); } return op; return_error: if(params) { g_hash_table_destroy(params); } services_action_free(op); return NULL; } svc_action_t * services_action_create_generic(const char *exec, const char *args[]) { svc_action_t *op; unsigned int cur_arg; op = calloc(1, sizeof(*op)); op->opaque = calloc(1, sizeof(svc_action_private_t)); op->opaque->exec = strdup(exec); op->opaque->args[0] = strdup(exec); for (cur_arg = 1; args && args[cur_arg - 1]; cur_arg++) { op->opaque->args[cur_arg] = strdup(args[cur_arg - 1]); if (cur_arg == DIMOF(op->opaque->args) - 1) { crm_err("svc_action_t args list not long enough for '%s' execution request.", exec); break; } } return op; } #if SUPPORT_DBUS /*! * \internal * \brief Update operation's pending DBus call, unreferencing old one if needed * * \param[in,out] op Operation to modify * \param[in] pending Pending call to set */ void services_set_op_pending(svc_action_t *op, DBusPendingCall *pending) { if (op->opaque->pending && (op->opaque->pending != pending)) { if (pending) { crm_info("Lost pending %s DBus call (%p)", op->id, op->opaque->pending); } else { crm_trace("Done with pending %s DBus call (%p)", op->id, op->opaque->pending); } dbus_pending_call_unref(op->opaque->pending); } op->opaque->pending = pending; if (pending) { crm_trace("Updated pending %s DBus call (%p)", op->id, pending); } else { crm_trace("Cleared pending %s DBus call", op->id); } } #endif void services_action_cleanup(svc_action_t * op) { if(op->opaque == NULL) { return; } #if SUPPORT_DBUS if(op->opaque->timerid != 0) { crm_trace("Removing timer for call %s to %s", op->action, op->rsc); g_source_remove(op->opaque->timerid); op->opaque->timerid = 0; } if(op->opaque->pending) { crm_trace("Cleaning up pending dbus call %p %s for %s", op->opaque->pending, op->action, op->rsc); if(dbus_pending_call_get_completed(op->opaque->pending)) { crm_warn("Pending dbus call %s for %s did not complete", op->action, op->rsc); } dbus_pending_call_cancel(op->opaque->pending); dbus_pending_call_unref(op->opaque->pending); op->opaque->pending = NULL; } #endif if (op->opaque->stderr_gsource) { mainloop_del_fd(op->opaque->stderr_gsource); op->opaque->stderr_gsource = NULL; } if (op->opaque->stdout_gsource) { mainloop_del_fd(op->opaque->stdout_gsource); op->opaque->stdout_gsource = NULL; } } void services_action_free(svc_action_t * op) { unsigned int i; if (op == NULL) { return; } + /* The operation should be removed from all tracking lists by this point. + * If it's not, we have a bug somewhere, so bail. That may lead to a + * memory leak, but it's better than a use-after-free segmentation fault. + */ + CRM_CHECK(g_list_find(inflight_ops, op) == NULL, return); + CRM_CHECK(g_list_find(blocked_ops, op) == NULL, return); + CRM_CHECK((recurring_actions == NULL) + || (g_hash_table_lookup(recurring_actions, op->id) == NULL), + return); + services_action_cleanup(op); if (op->opaque->repeat_timer) { g_source_remove(op->opaque->repeat_timer); op->opaque->repeat_timer = 0; } free(op->id); free(op->opaque->exec); for (i = 0; i < DIMOF(op->opaque->args); i++) { free(op->opaque->args[i]); } free(op->opaque); free(op->rsc); free(op->action); free(op->standard); free(op->agent); free(op->provider); free(op->stdout_data); free(op->stderr_data); if (op->params) { g_hash_table_destroy(op->params); op->params = NULL; } free(op); } gboolean cancel_recurring_action(svc_action_t * op) { crm_info("Cancelling %s operation %s", op->standard, op->id); if (recurring_actions) { g_hash_table_remove(recurring_actions, op->id); } if (op->opaque->repeat_timer) { g_source_remove(op->opaque->repeat_timer); op->opaque->repeat_timer = 0; } return TRUE; } +/*! + * \brief Cancel a recurring action + * + * \param[in] name Name of resource that operation is for + * \param[in] action Name of operation to cancel + * \param[in] interval Interval of operation to cancel + * + * \return TRUE if action was successfully cancelled, FALSE otherwise + */ gboolean services_action_cancel(const char *name, const char *action, int interval) { + gboolean cancelled = FALSE; + char *id = generate_op_key(name, action, interval); svc_action_t *op = NULL; - char id[512]; - snprintf(id, sizeof(id), "%s_%s_%d", name, action, interval); - - if (!(op = g_hash_table_lookup(recurring_actions, id))) { - return FALSE; + /* We can only cancel a recurring action */ + init_recurring_actions(); + op = g_hash_table_lookup(recurring_actions, id); + if (op == NULL) { + goto done; } - /* Always kill the recurring timer */ + /* Tell operation_finalize() not to reschedule the operation */ + op->cancel = TRUE; + + /* Stop tracking it as a recurring operation, and stop its timer */ cancel_recurring_action(op); - if (op->pid == 0) { - op->status = PCMK_LRM_OP_CANCELLED; - if (op->opaque->callback) { - op->opaque->callback(op); + /* If the op has a PID, it's an in-flight child process, so kill it. + * + * Whether the kill succeeds or fails, the main loop will send the op to + * operation_finished() (and thus operation_finalize()) when the process + * goes away. + */ + if (op->pid != 0) { + crm_info("Terminating in-flight op %s (pid %d) early because it was cancelled", + id, op->pid); + cancelled = mainloop_child_kill(op->pid); + if (cancelled == FALSE) { + crm_err("Termination of %s (pid %d) failed", id, op->pid); } + goto done; + } - blocked_ops = g_list_remove(blocked_ops, op); - services_action_free(op); + /* In-flight systemd and upstart ops don't have a pid. The relevant handlers + * will call operation_finalize() when the operation completes. + * @TODO: Can we request early termination, maybe using + * dbus_pending_call_cancel()? + */ + if (inflight_systemd_or_upstart(op)) { + crm_info("Will cancel %s op %s when in-flight instance completes", + op->standard, op->id); + cancelled = FALSE; + goto done; + } - } else { - crm_info("Cancelling in-flight op: performing early termination of %s (pid=%d)", id, op->pid); - op->cancel = 1; - if (mainloop_child_kill(op->pid) == FALSE) { - /* even though the early termination failed, - * the op will be marked as cancelled once it completes. */ - crm_err("Termination of %s (pid=%d) failed", id, op->pid); - return FALSE; - } + /* Otherwise, operation is not in-flight, just report as cancelled */ + op->status = PCMK_LRM_OP_CANCELLED; + if (op->opaque->callback) { + op->opaque->callback(op); } - return TRUE; + blocked_ops = g_list_remove(blocked_ops, op); + services_action_free(op); + cancelled = TRUE; + +done: + free(id); + return cancelled; } gboolean services_action_kick(const char *name, const char *action, int interval /* ms */) { svc_action_t * op = NULL; - char *id = NULL; - - if (asprintf(&id, "%s_%s_%d", name, action, interval) == -1) { - return FALSE; - } + char *id = generate_op_key(name, action, interval); + init_recurring_actions(); op = g_hash_table_lookup(recurring_actions, id); free(id); if (op == NULL) { return FALSE; } - if (op->pid) { + + if (op->pid || inflight_systemd_or_upstart(op)) { return TRUE; } else { if (op->opaque->repeat_timer) { g_source_remove(op->opaque->repeat_timer); op->opaque->repeat_timer = 0; } recurring_action_timer(op); return TRUE; } } /* add new recurring operation, check for duplicates. * - if duplicate found, return TRUE, immediately reschedule op. * - if no dup, return FALSE, inserve into recurring op list.*/ static gboolean handle_duplicate_recurring(svc_action_t * op, void (*action_callback) (svc_action_t *)) { svc_action_t * dup = NULL; - if (recurring_actions == NULL) { - recurring_actions = g_hash_table_new_full(g_str_hash, g_str_equal, NULL, NULL); - return FALSE; - } - /* check for duplicates */ dup = g_hash_table_lookup(recurring_actions, op->id); if (dup && (dup != op)) { /* update user data */ if (op->opaque->callback) { dup->opaque->callback = op->opaque->callback; dup->cb_data = op->cb_data; op->cb_data = NULL; } /* immediately execute the next interval */ if (dup->pid != 0) { if (op->opaque->repeat_timer) { g_source_remove(op->opaque->repeat_timer); op->opaque->repeat_timer = 0; } recurring_action_timer(dup); } /* free the dup. */ services_action_free(op); return TRUE; } return FALSE; } inline static gboolean action_exec_helper(svc_action_t * op) { /* Whether a/synchronous must be decided (op->synchronous) beforehand. */ - if (op->standard && strcasecmp(op->standard, "upstart") == 0) { + if (op->standard + && (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_UPSTART) == 0)) { #if SUPPORT_UPSTART return upstart_job_exec(op); #endif - } else if (op->standard && strcasecmp(op->standard, "systemd") == 0) { + } else if (op->standard && strcasecmp(op->standard, + PCMK_RESOURCE_CLASS_SYSTEMD) == 0) { #if SUPPORT_SYSTEMD return systemd_unit_exec(op); #endif } else { return services_os_action_execute(op); } /* The 'op' has probably been freed if the execution functions return TRUE for the asynchronous 'op'. */ /* Avoid using the 'op' in here. */ return FALSE; } void services_add_inflight_op(svc_action_t * op) { if (op == NULL) { return; } CRM_ASSERT(op->synchronous == FALSE); /* keep track of ops that are in-flight to avoid collisions in the same namespace */ if (op->rsc) { inflight_ops = g_list_append(inflight_ops, op); } } +/*! + * \internal + * \brief Stop tracking an operation that completed + * + * \param[in] op Operation to stop tracking + */ +void +services_untrack_op(svc_action_t *op) +{ + /* Op is no longer in-flight or blocked */ + inflight_ops = g_list_remove(inflight_ops, op); + blocked_ops = g_list_remove(blocked_ops, op); + + /* Op is no longer blocking other ops, so check if any need to run */ + handle_blocked_ops(); +} + gboolean services_action_async(svc_action_t * op, void (*action_callback) (svc_action_t *)) { op->synchronous = false; if (action_callback) { op->opaque->callback = action_callback; } if (op->interval > 0) { + init_recurring_actions(); if (handle_duplicate_recurring(op, action_callback) == TRUE) { /* entry rescheduled, dup freed */ /* exit early */ return TRUE; } g_hash_table_replace(recurring_actions, op->id, op); } if (op->rsc && is_op_blocked(op->rsc)) { blocked_ops = g_list_append(blocked_ops, op); return TRUE; } return action_exec_helper(op); } static gboolean processing_blocked_ops = FALSE; gboolean is_op_blocked(const char *rsc) { GList *gIter = NULL; svc_action_t *op = NULL; for (gIter = inflight_ops; gIter != NULL; gIter = gIter->next) { op = gIter->data; if (safe_str_eq(op->rsc, rsc)) { return TRUE; } } return FALSE; } -void +static void handle_blocked_ops(void) { GList *executed_ops = NULL; GList *gIter = NULL; svc_action_t *op = NULL; gboolean res = FALSE; if (processing_blocked_ops) { /* avoid nested calling of this function */ return; } processing_blocked_ops = TRUE; /* n^2 operation here, but blocked ops are incredibly rare. this list * will be empty 99% of the time. */ for (gIter = blocked_ops; gIter != NULL; gIter = gIter->next) { op = gIter->data; if (is_op_blocked(op->rsc)) { continue; } executed_ops = g_list_append(executed_ops, op); res = action_exec_helper(op); if (res == FALSE) { op->status = PCMK_LRM_OP_ERROR; /* this can cause this function to be called recursively * which is why we have processing_blocked_ops static variable */ operation_finalize(op); } } for (gIter = executed_ops; gIter != NULL; gIter = gIter->next) { op = gIter->data; blocked_ops = g_list_remove(blocked_ops, op); } g_list_free(executed_ops); processing_blocked_ops = FALSE; } gboolean services_action_sync(svc_action_t * op) { gboolean rc = TRUE; if (op == NULL) { crm_trace("No operation to execute"); return FALSE; } op->synchronous = true; rc = action_exec_helper(op); crm_trace(" > %s_%s_%d: %s = %d", op->rsc, op->action, op->interval, op->opaque->exec, op->rc); if (op->stdout_data) { crm_trace(" > stdout: %s", op->stdout_data); } if (op->stderr_data) { crm_trace(" > stderr: %s", op->stderr_data); } return rc; } GList * get_directory_list(const char *root, gboolean files, gboolean executable) { return services_os_get_directory_list(root, files, executable); } GList * services_list(void) { - return resources_list_agents("lsb", NULL); + return resources_list_agents(PCMK_RESOURCE_CLASS_LSB, NULL); } #if SUPPORT_HEARTBEAT static GList * resources_os_list_hb_agents(void) { return services_os_get_directory_list(HB_RA_DIR, TRUE, TRUE); } #endif GList * resources_list_standards(void) { GList *standards = NULL; GList *agents = NULL; - standards = g_list_append(standards, strdup("ocf")); - standards = g_list_append(standards, strdup("lsb")); - standards = g_list_append(standards, strdup("service")); + standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_OCF)); + standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_LSB)); + standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_SERVICE)); #if SUPPORT_SYSTEMD agents = systemd_unit_listall(); if (agents) { - standards = g_list_append(standards, strdup("systemd")); + standards = g_list_append(standards, + strdup(PCMK_RESOURCE_CLASS_SYSTEMD)); g_list_free_full(agents, free); } #endif #if SUPPORT_UPSTART agents = upstart_job_listall(); if (agents) { - standards = g_list_append(standards, strdup("upstart")); + standards = g_list_append(standards, + strdup(PCMK_RESOURCE_CLASS_UPSTART)); g_list_free_full(agents, free); } #endif #if SUPPORT_NAGIOS agents = resources_os_list_nagios_agents(); if (agents) { - standards = g_list_append(standards, strdup("nagios")); + standards = g_list_append(standards, + strdup(PCMK_RESOURCE_CLASS_NAGIOS)); g_list_free_full(agents, free); } #endif #if SUPPORT_HEARTBEAT - standards = g_list_append(standards, strdup("heartbeat")); + standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_HB)); #endif return standards; } GList * resources_list_providers(const char *standard) { - if (strcasecmp(standard, "ocf") == 0) { + if (strcasecmp(standard, PCMK_RESOURCE_CLASS_OCF) == 0) { return resources_os_list_ocf_providers(); } return NULL; } GList * resources_list_agents(const char *standard, const char *provider) { - if (standard == NULL || strcasecmp(standard, "service") == 0) { + if ((standard == NULL) + || (strcasecmp(standard, PCMK_RESOURCE_CLASS_SERVICE) == 0)) { + GList *tmp1; GList *tmp2; GList *result = resources_os_list_lsb_agents(); if (standard == NULL) { tmp1 = result; tmp2 = resources_os_list_ocf_agents(NULL); if (tmp2) { result = g_list_concat(tmp1, tmp2); } } #if SUPPORT_SYSTEMD tmp1 = result; tmp2 = systemd_unit_listall(); if (tmp2) { result = g_list_concat(tmp1, tmp2); } #endif #if SUPPORT_UPSTART tmp1 = result; tmp2 = upstart_job_listall(); if (tmp2) { result = g_list_concat(tmp1, tmp2); } #endif return result; - } else if (strcasecmp(standard, "ocf") == 0) { + } else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_OCF) == 0) { return resources_os_list_ocf_agents(provider); - } else if (strcasecmp(standard, "lsb") == 0) { + } else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_LSB) == 0) { return resources_os_list_lsb_agents(); #if SUPPORT_HEARTBEAT - } else if (strcasecmp(standard, "heartbeat") == 0) { + } else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_HB) == 0) { return resources_os_list_hb_agents(); #endif #if SUPPORT_SYSTEMD - } else if (strcasecmp(standard, "systemd") == 0) { + } else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_SYSTEMD) == 0) { return systemd_unit_listall(); #endif #if SUPPORT_UPSTART - } else if (strcasecmp(standard, "upstart") == 0) { + } else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_UPSTART) == 0) { return upstart_job_listall(); #endif #if SUPPORT_NAGIOS - } else if (strcasecmp(standard, "nagios") == 0) { + } else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_NAGIOS) == 0) { return resources_os_list_nagios_agents(); #endif } return NULL; } diff --git a/lib/services/services_linux.c b/lib/services/services_linux.c index adfefaaa3c..c2ac43d333 100644 --- a/lib/services/services_linux.c +++ b/lib/services/services_linux.c @@ -1,908 +1,906 @@ /* * Copyright (C) 2010-2016 Andrew Beekhof * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_SYS_SIGNALFD_H #include #endif #include "crm/crm.h" #include "crm/common/mainloop.h" #include "crm/services.h" #include "services_private.h" #if SUPPORT_CIBSECRETS # include "crm/common/cib_secrets.h" #endif -/* ops currently active (in-flight) */ -extern GList *inflight_ops; - static inline void set_fd_opts(int fd, int opts) { int flag; if ((flag = fcntl(fd, F_GETFL)) >= 0) { if (fcntl(fd, F_SETFL, flag | opts) < 0) { crm_err("fcntl() write failed"); } } else { crm_err("fcntl() read failed"); } } static gboolean svc_read_output(int fd, svc_action_t * op, bool is_stderr) { char *data = NULL; int rc = 0, len = 0; char buf[500]; static const size_t buf_read_len = sizeof(buf) - 1; if (fd < 0) { crm_trace("No fd for %s", op->id); return FALSE; } if (is_stderr && op->stderr_data) { len = strlen(op->stderr_data); data = op->stderr_data; crm_trace("Reading %s stderr into offset %d", op->id, len); } else if (is_stderr == FALSE && op->stdout_data) { len = strlen(op->stdout_data); data = op->stdout_data; crm_trace("Reading %s stdout into offset %d", op->id, len); } else { crm_trace("Reading %s %s into offset %d", op->id, is_stderr?"stderr":"stdout", len); } do { rc = read(fd, buf, buf_read_len); if (rc > 0) { crm_trace("Got %d chars: %.80s", rc, buf); buf[rc] = 0; data = realloc_safe(data, len + rc + 1); len += sprintf(data + len, "%s", buf); } else if (errno != EINTR) { /* error or EOF * Cleanup happens in pipe_done() */ rc = FALSE; break; } } while (rc == buf_read_len || rc < 0); if (is_stderr) { op->stderr_data = data; } else { op->stdout_data = data; } return rc; } static int dispatch_stdout(gpointer userdata) { svc_action_t *op = (svc_action_t *) userdata; return svc_read_output(op->opaque->stdout_fd, op, FALSE); } static int dispatch_stderr(gpointer userdata) { svc_action_t *op = (svc_action_t *) userdata; return svc_read_output(op->opaque->stderr_fd, op, TRUE); } static void pipe_out_done(gpointer user_data) { svc_action_t *op = (svc_action_t *) user_data; crm_trace("%p", op); op->opaque->stdout_gsource = NULL; if (op->opaque->stdout_fd > STDOUT_FILENO) { close(op->opaque->stdout_fd); } op->opaque->stdout_fd = -1; } static void pipe_err_done(gpointer user_data) { svc_action_t *op = (svc_action_t *) user_data; op->opaque->stderr_gsource = NULL; if (op->opaque->stderr_fd > STDERR_FILENO) { close(op->opaque->stderr_fd); } op->opaque->stderr_fd = -1; } static struct mainloop_fd_callbacks stdout_callbacks = { .dispatch = dispatch_stdout, .destroy = pipe_out_done, }; static struct mainloop_fd_callbacks stderr_callbacks = { .dispatch = dispatch_stderr, .destroy = pipe_err_done, }; static void set_ocf_env(const char *key, const char *value, gpointer user_data) { if (setenv(key, value, 1) != 0) { crm_perror(LOG_ERR, "setenv failed for key:%s and value:%s", key, value); } } static void set_ocf_env_with_prefix(gpointer key, gpointer value, gpointer user_data) { char buffer[500]; snprintf(buffer, sizeof(buffer), "OCF_RESKEY_%s", (char *)key); set_ocf_env(buffer, value, user_data); } static void add_OCF_env_vars(svc_action_t * op) { - if (!op->standard || strcasecmp("ocf", op->standard) != 0) { + if ((op->standard == NULL) + || (strcasecmp(PCMK_RESOURCE_CLASS_OCF, op->standard) != 0)) { return; } if (op->params) { g_hash_table_foreach(op->params, set_ocf_env_with_prefix, NULL); } set_ocf_env("OCF_RA_VERSION_MAJOR", "1", NULL); set_ocf_env("OCF_RA_VERSION_MINOR", "0", NULL); set_ocf_env("OCF_ROOT", OCF_ROOT_DIR, NULL); set_ocf_env("OCF_EXIT_REASON_PREFIX", PCMK_OCF_REASON_PREFIX, NULL); if (op->rsc) { set_ocf_env("OCF_RESOURCE_INSTANCE", op->rsc, NULL); } if (op->agent != NULL) { set_ocf_env("OCF_RESOURCE_TYPE", op->agent, NULL); } /* Notes: this is not added to specification yet. Sept 10,2004 */ if (op->provider != NULL) { set_ocf_env("OCF_RESOURCE_PROVIDER", op->provider, NULL); } } gboolean recurring_action_timer(gpointer data) { svc_action_t *op = data; crm_debug("Scheduling another invocation of %s", op->id); /* Clean out the old result */ free(op->stdout_data); op->stdout_data = NULL; free(op->stderr_data); op->stderr_data = NULL; op->opaque->repeat_timer = 0; services_action_async(op, NULL); return FALSE; } /* Returns FALSE if 'op' should be free'd by the caller */ gboolean operation_finalize(svc_action_t * op) { int recurring = 0; if (op->interval) { if (op->cancel) { op->status = PCMK_LRM_OP_CANCELLED; cancel_recurring_action(op); } else { recurring = 1; op->opaque->repeat_timer = g_timeout_add(op->interval, recurring_action_timer, (void *)op); } } if (op->opaque->callback) { op->opaque->callback(op); } op->pid = 0; - inflight_ops = g_list_remove(inflight_ops, op); - - handle_blocked_ops(); + services_untrack_op(op); if (!recurring && op->synchronous == FALSE) { /* * If this is a recurring action, do not free explicitly. * It will get freed whenever the action gets cancelled. */ services_action_free(op); return TRUE; } services_action_cleanup(op); return FALSE; } static void operation_finished(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) { svc_action_t *op = mainloop_child_userdata(p); char *prefix = crm_strdup_printf("%s:%d", op->id, op->pid); mainloop_clear_child_userdata(p); op->status = PCMK_LRM_OP_DONE; CRM_ASSERT(op->pid == pid); crm_trace("%s %p %p", prefix, op->opaque->stderr_gsource, op->opaque->stdout_gsource); if (op->opaque->stderr_gsource) { /* Make sure we have read everything from the buffer. * Depending on the priority mainloop gives the fd, operation_finished * could occur before all the reads are done. Force the read now.*/ crm_trace("%s dispatching stderr", prefix); dispatch_stderr(op); crm_trace("%s: %p", op->id, op->stderr_data); mainloop_del_fd(op->opaque->stderr_gsource); op->opaque->stderr_gsource = NULL; } if (op->opaque->stdout_gsource) { /* Make sure we have read everything from the buffer. * Depending on the priority mainloop gives the fd, operation_finished * could occur before all the reads are done. Force the read now.*/ crm_trace("%s dispatching stdout", prefix); dispatch_stdout(op); crm_trace("%s: %p", op->id, op->stdout_data); mainloop_del_fd(op->opaque->stdout_gsource); op->opaque->stdout_gsource = NULL; } if (signo) { if (mainloop_child_timeout(p)) { crm_warn("%s - timed out after %dms", prefix, op->timeout); op->status = PCMK_LRM_OP_TIMEOUT; op->rc = PCMK_OCF_TIMEOUT; } else { do_crm_log_unlikely((op->cancel) ? LOG_INFO : LOG_WARNING, "%s - terminated with signal %d", prefix, signo); op->status = PCMK_LRM_OP_ERROR; op->rc = PCMK_OCF_SIGNAL; } } else { op->rc = exitcode; crm_debug("%s - exited with rc=%d", prefix, exitcode); } free(prefix); prefix = crm_strdup_printf("%s:%d:stderr", op->id, op->pid); crm_log_output(LOG_NOTICE, prefix, op->stderr_data); free(prefix); prefix = crm_strdup_printf("%s:%d:stdout", op->id, op->pid); crm_log_output(LOG_DEBUG, prefix, op->stdout_data); free(prefix); operation_finalize(op); } /*! * \internal * \brief Set operation rc and status per errno from stat(), fork() or execvp() * * \param[in,out] op Operation to set rc and status for * \param[in] error Value of errno after system call * * \return void */ static void services_handle_exec_error(svc_action_t * op, int error) { int rc_not_installed, rc_insufficient_priv, rc_exec_error; /* Mimic the return codes for each standard as that's what we'll convert back from in get_uniform_rc() */ - if (safe_str_eq(op->standard, "lsb") && safe_str_eq(op->action, "status")) { + if (safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_LSB) + && safe_str_eq(op->action, "status")) { + rc_not_installed = PCMK_LSB_STATUS_NOT_INSTALLED; rc_insufficient_priv = PCMK_LSB_STATUS_INSUFFICIENT_PRIV; rc_exec_error = PCMK_LSB_STATUS_UNKNOWN; #if SUPPORT_NAGIOS - } else if (safe_str_eq(op->standard, "nagios")) { + } else if (safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_NAGIOS)) { rc_not_installed = NAGIOS_NOT_INSTALLED; rc_insufficient_priv = NAGIOS_INSUFFICIENT_PRIV; rc_exec_error = PCMK_OCF_EXEC_ERROR; #endif } else { rc_not_installed = PCMK_OCF_NOT_INSTALLED; rc_insufficient_priv = PCMK_OCF_INSUFFICIENT_PRIV; rc_exec_error = PCMK_OCF_EXEC_ERROR; } switch (error) { /* see execve(2), stat(2) and fork(2) */ case ENOENT: /* No such file or directory */ case EISDIR: /* Is a directory */ case ENOTDIR: /* Path component is not a directory */ case EINVAL: /* Invalid executable format */ case ENOEXEC: /* Invalid executable format */ op->rc = rc_not_installed; op->status = PCMK_LRM_OP_NOT_INSTALLED; break; case EACCES: /* permission denied (various errors) */ case EPERM: /* permission denied (various errors) */ op->rc = rc_insufficient_priv; op->status = PCMK_LRM_OP_ERROR; break; default: op->rc = rc_exec_error; op->status = PCMK_LRM_OP_ERROR; } } static void action_launch_child(svc_action_t *op) { int lpc; /* SIGPIPE is ignored (which is different from signal blocking) by the gnutls library. * Depending on the libqb version in use, libqb may set SIGPIPE to be ignored as well. * We do not want this to be inherited by the child process. By resetting this the signal * to the default behavior, we avoid some potential odd problems that occur during OCF * scripts when SIGPIPE is ignored by the environment. */ signal(SIGPIPE, SIG_DFL); #if defined(HAVE_SCHED_SETSCHEDULER) if (sched_getscheduler(0) != SCHED_OTHER) { struct sched_param sp; memset(&sp, 0, sizeof(sp)); sp.sched_priority = 0; if (sched_setscheduler(0, SCHED_OTHER, &sp) == -1) { crm_perror(LOG_ERR, "Could not reset scheduling policy to SCHED_OTHER for %s", op->id); } } #endif if (setpriority(PRIO_PROCESS, 0, 0) == -1) { crm_perror(LOG_ERR, "Could not reset process priority to 0 for %s", op->id); } /* Man: The call setpgrp() is equivalent to setpgid(0,0) * _and_ compiles on BSD variants too * need to investigate if it works the same too. */ setpgid(0, 0); /* close all descriptors except stdin/out/err and channels to logd */ for (lpc = getdtablesize() - 1; lpc > STDERR_FILENO; lpc--) { close(lpc); } #if SUPPORT_CIBSECRETS if (replace_secret_params(op->rsc, op->params) < 0) { /* replacing secrets failed! */ if (safe_str_eq(op->action,"stop")) { /* don't fail on stop! */ crm_info("proceeding with the stop operation for %s", op->rsc); } else { crm_err("failed to get secrets for %s, " "considering resource not configured", op->rsc); _exit(PCMK_OCF_NOT_CONFIGURED); } } #endif /* Setup environment correctly */ add_OCF_env_vars(op); /* execute the RA */ execvp(op->opaque->exec, op->opaque->args); /* Most cases should have been already handled by stat() */ services_handle_exec_error(op, errno); _exit(op->rc); } #ifndef HAVE_SYS_SIGNALFD_H static int sigchld_pipe[2] = { -1, -1 }; static void sigchld_handler() { if ((sigchld_pipe[1] >= 0) && (write(sigchld_pipe[1], "", 1) == -1)) { crm_perror(LOG_TRACE, "Could not poke SIGCHLD self-pipe"); } } #endif static void action_synced_wait(svc_action_t * op, sigset_t *mask) { int status = 0; int timeout = op->timeout; int sfd = -1; time_t start = -1; struct pollfd fds[3]; int wait_rc = 0; #ifdef HAVE_SYS_SIGNALFD_H sfd = signalfd(-1, mask, SFD_NONBLOCK); if (sfd < 0) { crm_perror(LOG_ERR, "signalfd() failed"); } #else sfd = sigchld_pipe[0]; #endif fds[0].fd = op->opaque->stdout_fd; fds[0].events = POLLIN; fds[0].revents = 0; fds[1].fd = op->opaque->stderr_fd; fds[1].events = POLLIN; fds[1].revents = 0; fds[2].fd = sfd; fds[2].events = POLLIN; fds[2].revents = 0; crm_trace("Waiting for %d", op->pid); start = time(NULL); do { int poll_rc = poll(fds, 3, timeout); if (poll_rc > 0) { if (fds[0].revents & POLLIN) { svc_read_output(op->opaque->stdout_fd, op, FALSE); } if (fds[1].revents & POLLIN) { svc_read_output(op->opaque->stderr_fd, op, TRUE); } if (fds[2].revents & POLLIN) { #ifdef HAVE_SYS_SIGNALFD_H struct signalfd_siginfo fdsi; ssize_t s; s = read(sfd, &fdsi, sizeof(struct signalfd_siginfo)); if (s != sizeof(struct signalfd_siginfo)) { crm_perror(LOG_ERR, "Read from signal fd %d failed", sfd); } else if (fdsi.ssi_signo == SIGCHLD) { #else if (1) { /* Clear out the sigchld pipe. */ char ch; while (read(sfd, &ch, 1) == 1) /*omit*/; #endif wait_rc = waitpid(op->pid, &status, WNOHANG); if (wait_rc > 0) { break; } else if (wait_rc < 0){ if (errno == ECHILD) { /* Here, don't dare to kill and bail out... */ break; } else { /* ...otherwise pretend process still runs. */ wait_rc = 0; } crm_perror(LOG_ERR, "waitpid() for %d failed", op->pid); } } } } else if (poll_rc == 0) { timeout = 0; break; } else if (poll_rc < 0) { if (errno != EINTR) { crm_perror(LOG_ERR, "poll() failed"); break; } } timeout = op->timeout - (time(NULL) - start) * 1000; } while ((op->timeout < 0 || timeout > 0)); crm_trace("Child done: %d", op->pid); if (wait_rc <= 0) { op->rc = PCMK_OCF_UNKNOWN_ERROR; if (op->timeout > 0 && timeout <= 0) { op->status = PCMK_LRM_OP_TIMEOUT; crm_warn("%s:%d - timed out after %dms", op->id, op->pid, op->timeout); } else { op->status = PCMK_LRM_OP_ERROR; } /* If only child hasn't been successfully waited for, yet. This is to limit killing wrong target a bit more. */ if (wait_rc == 0 && waitpid(op->pid, &status, WNOHANG) == 0) { if (kill(op->pid, SIGKILL)) { crm_err("kill(%d, KILL) failed: %d", op->pid, errno); } /* Safe to skip WNOHANG here as we sent non-ignorable signal. */ while (waitpid(op->pid, &status, 0) == (pid_t) -1 && errno == EINTR) /*omit*/; } } else if (WIFEXITED(status)) { op->status = PCMK_LRM_OP_DONE; op->rc = WEXITSTATUS(status); crm_info("Managed %s process %d exited with rc=%d", op->id, op->pid, op->rc); } else if (WIFSIGNALED(status)) { int signo = WTERMSIG(status); op->status = PCMK_LRM_OP_ERROR; crm_err("Managed %s process %d exited with signal=%d", op->id, op->pid, signo); } #ifdef WCOREDUMP if (WCOREDUMP(status)) { crm_err("Managed %s process %d dumped core", op->id, op->pid); } #endif svc_read_output(op->opaque->stdout_fd, op, FALSE); svc_read_output(op->opaque->stderr_fd, op, TRUE); close(op->opaque->stdout_fd); close(op->opaque->stderr_fd); #ifdef HAVE_SYS_SIGNALFD_H close(sfd); #endif } /* For an asynchronous 'op', returns FALSE if 'op' should be free'd by the caller */ /* For a synchronous 'op', returns FALSE if 'op' fails */ gboolean services_os_action_execute(svc_action_t * op) { int stdout_fd[2]; int stderr_fd[2]; struct stat st; sigset_t *pmask; #ifdef HAVE_SYS_SIGNALFD_H sigset_t mask; sigset_t old_mask; #define sigchld_cleanup() do { \ if (sigismember(&old_mask, SIGCHLD) == 0) { \ if (sigprocmask(SIG_UNBLOCK, &mask, NULL) < 0) { \ crm_perror(LOG_ERR, "sigprocmask() failed to unblock sigchld"); \ } \ } \ } while (0) #else struct sigaction sa; struct sigaction old_sa; #define sigchld_cleanup() do { \ if (sigaction(SIGCHLD, &old_sa, NULL) < 0) { \ crm_perror(LOG_ERR, "sigaction() failed to remove sigchld handler"); \ } \ close(sigchld_pipe[0]); \ close(sigchld_pipe[1]); \ sigchld_pipe[0] = sigchld_pipe[1] = -1; \ } while(0) #endif /* Fail fast */ if(stat(op->opaque->exec, &st) != 0) { int rc = errno; crm_warn("Cannot execute '%s': %s (%d)", op->opaque->exec, pcmk_strerror(rc), rc); services_handle_exec_error(op, rc); if (!op->synchronous) { return operation_finalize(op); } return FALSE; } if (pipe(stdout_fd) < 0) { int rc = errno; crm_err("pipe(stdout_fd) failed. '%s': %s (%d)", op->opaque->exec, pcmk_strerror(rc), rc); services_handle_exec_error(op, rc); if (!op->synchronous) { return operation_finalize(op); } return FALSE; } if (pipe(stderr_fd) < 0) { int rc = errno; close(stdout_fd[0]); close(stdout_fd[1]); crm_err("pipe(stderr_fd) failed. '%s': %s (%d)", op->opaque->exec, pcmk_strerror(rc), rc); services_handle_exec_error(op, rc); if (!op->synchronous) { return operation_finalize(op); } return FALSE; } if (op->synchronous) { #ifdef HAVE_SYS_SIGNALFD_H sigemptyset(&mask); sigaddset(&mask, SIGCHLD); sigemptyset(&old_mask); if (sigprocmask(SIG_BLOCK, &mask, &old_mask) < 0) { crm_perror(LOG_ERR, "sigprocmask() failed to block sigchld"); } pmask = &mask; #else if(pipe(sigchld_pipe) == -1) { crm_perror(LOG_ERR, "pipe() failed"); } set_fd_opts(sigchld_pipe[0], O_NONBLOCK); set_fd_opts(sigchld_pipe[1], O_NONBLOCK); sa.sa_handler = sigchld_handler; sa.sa_flags = 0; sigemptyset(&sa.sa_mask); if (sigaction(SIGCHLD, &sa, &old_sa) < 0) { crm_perror(LOG_ERR, "sigaction() failed to set sigchld handler"); } pmask = NULL; #endif } op->pid = fork(); switch (op->pid) { case -1: { int rc = errno; close(stdout_fd[0]); close(stdout_fd[1]); close(stderr_fd[0]); close(stderr_fd[1]); crm_err("Could not execute '%s': %s (%d)", op->opaque->exec, pcmk_strerror(rc), rc); services_handle_exec_error(op, rc); if (!op->synchronous) { return operation_finalize(op); } sigchld_cleanup(); return FALSE; } case 0: /* Child */ close(stdout_fd[0]); close(stderr_fd[0]); if (STDOUT_FILENO != stdout_fd[1]) { if (dup2(stdout_fd[1], STDOUT_FILENO) != STDOUT_FILENO) { crm_err("dup2() failed (stdout)"); } close(stdout_fd[1]); } if (STDERR_FILENO != stderr_fd[1]) { if (dup2(stderr_fd[1], STDERR_FILENO) != STDERR_FILENO) { crm_err("dup2() failed (stderr)"); } close(stderr_fd[1]); } if (op->synchronous) { sigchld_cleanup(); } action_launch_child(op); CRM_ASSERT(0); /* action_launch_child is effectively noreturn */ } /* Only the parent reaches here */ close(stdout_fd[1]); close(stderr_fd[1]); op->opaque->stdout_fd = stdout_fd[0]; set_fd_opts(op->opaque->stdout_fd, O_NONBLOCK); op->opaque->stderr_fd = stderr_fd[0]; set_fd_opts(op->opaque->stderr_fd, O_NONBLOCK); if (op->synchronous) { action_synced_wait(op, pmask); sigchld_cleanup(); } else { crm_trace("Async waiting for %d - %s", op->pid, op->opaque->exec); mainloop_child_add_with_flags(op->pid, op->timeout, op->id, op, (op->flags & SVC_ACTION_LEAVE_GROUP) ? mainloop_leave_pid_group : 0, operation_finished); op->opaque->stdout_gsource = mainloop_add_fd(op->id, G_PRIORITY_LOW, op->opaque->stdout_fd, op, &stdout_callbacks); op->opaque->stderr_gsource = mainloop_add_fd(op->id, G_PRIORITY_LOW, op->opaque->stderr_fd, op, &stderr_callbacks); services_add_inflight_op(op); } return TRUE; } GList * services_os_get_directory_list(const char *root, gboolean files, gboolean executable) { GList *list = NULL; struct dirent **namelist; int entries = 0, lpc = 0; char buffer[PATH_MAX]; entries = scandir(root, &namelist, NULL, alphasort); if (entries <= 0) { return list; } for (lpc = 0; lpc < entries; lpc++) { struct stat sb; if ('.' == namelist[lpc]->d_name[0]) { free(namelist[lpc]); continue; } snprintf(buffer, sizeof(buffer), "%s/%s", root, namelist[lpc]->d_name); if (stat(buffer, &sb)) { continue; } if (S_ISDIR(sb.st_mode)) { if (files) { free(namelist[lpc]); continue; } } else if (S_ISREG(sb.st_mode)) { if (files == FALSE) { free(namelist[lpc]); continue; } else if (executable && (sb.st_mode & S_IXUSR) == 0 && (sb.st_mode & S_IXGRP) == 0 && (sb.st_mode & S_IXOTH) == 0) { free(namelist[lpc]); continue; } } list = g_list_append(list, strdup(namelist[lpc]->d_name)); free(namelist[lpc]); } free(namelist); return list; } GList * resources_os_list_lsb_agents(void) { return get_directory_list(LSB_ROOT_DIR, TRUE, TRUE); } GList * resources_os_list_ocf_providers(void) { return get_directory_list(OCF_ROOT_DIR "/resource.d", FALSE, TRUE); } GList * resources_os_list_ocf_agents(const char *provider) { GList *gIter = NULL; GList *result = NULL; GList *providers = NULL; if (provider) { char buffer[500]; snprintf(buffer, sizeof(buffer), "%s/resource.d/%s", OCF_ROOT_DIR, provider); return get_directory_list(buffer, TRUE, TRUE); } providers = resources_os_list_ocf_providers(); for (gIter = providers; gIter != NULL; gIter = gIter->next) { GList *tmp1 = result; GList *tmp2 = resources_os_list_ocf_agents(gIter->data); if (tmp2) { result = g_list_concat(tmp1, tmp2); } } g_list_free_full(providers, free); return result; } #if SUPPORT_NAGIOS GList * resources_os_list_nagios_agents(void) { GList *plugin_list = NULL; GList *result = NULL; GList *gIter = NULL; plugin_list = get_directory_list(NAGIOS_PLUGIN_DIR, TRUE, TRUE); /* Make sure both the plugin and its metadata exist */ for (gIter = plugin_list; gIter != NULL; gIter = gIter->next) { const char *plugin = gIter->data; char *metadata = crm_strdup_printf(NAGIOS_METADATA_DIR "/%s.xml", plugin); struct stat st; if (stat(metadata, &st) == 0) { result = g_list_append(result, strdup(plugin)); } free(metadata); } g_list_free_full(plugin_list, free); return result; } #endif diff --git a/lib/services/services_private.h b/lib/services/services_private.h index ec9e1c4b73..a949592b54 100644 --- a/lib/services/services_private.h +++ b/lib/services/services_private.h @@ -1,89 +1,89 @@ /* * Copyright (C) 2010 - 2011, Red Hat, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef SERVICES_PRIVATE__H # define SERVICES_PRIVATE__H # include # include "crm/services.h" #if SUPPORT_DBUS # include #endif #define MAX_ARGC 255 struct svc_action_private_s { char *exec; char *args[MAX_ARGC]; guint repeat_timer; void (*callback) (svc_action_t * op); int stderr_fd; mainloop_io_t *stderr_gsource; int stdout_fd; mainloop_io_t *stdout_gsource; #if SUPPORT_DBUS DBusPendingCall* pending; unsigned timerid; #endif }; G_GNUC_INTERNAL GList *services_os_get_directory_list(const char *root, gboolean files, gboolean executable); G_GNUC_INTERNAL gboolean services_os_action_execute(svc_action_t * op); G_GNUC_INTERNAL GList *resources_os_list_lsb_agents(void); G_GNUC_INTERNAL GList *resources_os_list_ocf_providers(void); G_GNUC_INTERNAL GList *resources_os_list_ocf_agents(const char *provider); G_GNUC_INTERNAL GList *resources_os_list_nagios_agents(void); G_GNUC_INTERNAL gboolean cancel_recurring_action(svc_action_t * op); G_GNUC_INTERNAL gboolean recurring_action_timer(gpointer data); G_GNUC_INTERNAL gboolean operation_finalize(svc_action_t * op); G_GNUC_INTERNAL void services_add_inflight_op(svc_action_t *op); G_GNUC_INTERNAL -void handle_blocked_ops(void); +void services_untrack_op(svc_action_t *op); G_GNUC_INTERNAL gboolean is_op_blocked(const char *rsc); #if SUPPORT_DBUS G_GNUC_INTERNAL void services_set_op_pending(svc_action_t *op, DBusPendingCall *pending); #endif #endif /* SERVICES_PRIVATE__H */ diff --git a/lrmd/lrmd.c b/lrmd/lrmd.c index 724edb7ba7..5669d34988 100644 --- a/lrmd/lrmd.c +++ b/lrmd/lrmd.c @@ -1,1826 +1,1830 @@ /* * Copyright (c) 2012 David Vossel * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_SYS_TIMEB_H # include #endif #define EXIT_REASON_MAX_LEN 128 GHashTable *rsc_list = NULL; regex_t *version_format_regex = NULL; GHashTable *ra_version_hash = NULL; typedef struct lrmd_cmd_s { int timeout; int interval; int start_delay; int timeout_orig; int call_id; int exec_rc; int lrmd_op_status; int call_opts; /* Timer ids, must be removed on cmd destruction. */ int delay_id; int stonith_recurring_id; int rsc_deleted; int service_flags; char *client_id; char *origin; char *rsc_id; char *action; char *real_action; char *exit_reason; char *output; char *userdata_str; /* when set, this cmd should go through a container wrapper */ const char *isolation_wrapper; #ifdef HAVE_SYS_TIMEB_H /* recurring and systemd operations may involve more than one lrmd command * per operation, so they need info about original and most recent */ struct timeb t_first_run; /* Timestamp of when op first ran */ struct timeb t_run; /* Timestamp of when op most recently ran */ struct timeb t_first_queue; /* Timestamp of when op first was queued */ struct timeb t_queue; /* Timestamp of when op most recently was queued */ struct timeb t_rcchange; /* Timestamp of last rc change */ #endif int first_notify_sent; int last_notify_rc; int last_notify_op_status; int last_pid; GHashTable *params; xmlNode *versioned_attrs; } lrmd_cmd_t; static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc); static gboolean lrmd_rsc_dispatch(gpointer user_data); static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id); static void log_finished(lrmd_cmd_t * cmd, int exec_time, int queue_time) { char pid_str[32] = { 0, }; int log_level = LOG_INFO; if (cmd->last_pid) { snprintf(pid_str, 32, "%d", cmd->last_pid); } if (safe_str_eq(cmd->action, "monitor")) { log_level = LOG_DEBUG; } #ifdef HAVE_SYS_TIMEB_H do_crm_log(log_level, "finished - rsc:%s action:%s call_id:%d %s%s exit-code:%d exec-time:%dms queue-time:%dms", cmd->rsc_id, cmd->action, cmd->call_id, cmd->last_pid ? "pid:" : "", pid_str, cmd->exec_rc, exec_time, queue_time); #else do_crm_log(log_level, "finished - rsc:%s action:%s call_id:%d %s%s exit-code:%d", cmd->rsc_id, cmd->action, cmd->call_id, cmd->last_pid ? "pid:" : "", pid_str, cmd->exec_rc); #endif } static void log_execute(lrmd_cmd_t * cmd) { int log_level = LOG_INFO; if (safe_str_eq(cmd->action, "monitor")) { log_level = LOG_DEBUG; } do_crm_log(log_level, "executing - rsc:%s action:%s call_id:%d", cmd->rsc_id, cmd->action, cmd->call_id); } static const char * normalize_action_name(lrmd_rsc_t * rsc, const char *action) { if (safe_str_eq(action, "monitor") && - (safe_str_eq(rsc->class, "lsb") || - safe_str_eq(rsc->class, "service") || safe_str_eq(rsc->class, "systemd"))) { + (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_LSB) || + safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_SERVICE) + || safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_SYSTEMD))) { return "status"; } return action; } static lrmd_rsc_t * build_rsc_from_xml(xmlNode * msg) { xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR); lrmd_rsc_t *rsc = NULL; rsc = calloc(1, sizeof(lrmd_rsc_t)); crm_element_value_int(msg, F_LRMD_CALLOPTS, &rsc->call_opts); rsc->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID); rsc->class = crm_element_value_copy(rsc_xml, F_LRMD_CLASS); rsc->provider = crm_element_value_copy(rsc_xml, F_LRMD_PROVIDER); rsc->type = crm_element_value_copy(rsc_xml, F_LRMD_TYPE); rsc->work = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_rsc_dispatch, rsc); return rsc; } static void dup_attr(gpointer key, gpointer value, gpointer user_data) { g_hash_table_replace(user_data, strdup(key), strdup(value)); } static gboolean valid_version_format(const char *version) { if (version == NULL) { return FALSE; } if (version_format_regex == NULL) { const char *regex_string = "^[[:digit:]]+([.][[:digit:]]+)*$"; version_format_regex = calloc(1, sizeof(regex_t)); regcomp(version_format_regex, regex_string, REG_EXTENDED); } return regexec(version_format_regex, version, 0, NULL, 0) == 0; } static const char* get_ra_version(const char *class, const char *provider, const char *type, gboolean update_hash) { int len = 0; char *key = NULL; static const char *default_version = "0.1"; const char *version = NULL; CRM_CHECK(type != NULL, return default_version); CRM_CHECK(class != NULL, return default_version); if (!provider) { provider = "heartbeat"; } len = strlen(class) + strlen(provider) + strlen(type) + 3; key = calloc(len, sizeof(char)); if (!key) { return default_version; } if (!ra_version_hash) { ra_version_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); } snprintf(key, len, "%s:%s:%s", class, provider, type); version = g_hash_table_lookup(ra_version_hash, key); if (!version || update_hash) { char *metadata = NULL; lrmd_t *lrmd = lrmd_api_new(); lrmd->cmds->get_metadata(lrmd, class, provider, type, &metadata, 0); lrmd_api_delete(lrmd); if (metadata) { xmlNode *metadata_xml = string2xml(metadata); xmlNode *version_xml = get_xpath_object("//resource-agent/@version", metadata_xml, LOG_TRACE); if (version_xml) { version = crm_element_value(version_xml, XML_ATTR_VERSION); if (valid_version_format(version)) { crm_info("Resource agent version for %s:%s:%s is %s", class, provider, type, version); version = strdup(version); g_hash_table_replace(ra_version_hash, strdup(key), (gpointer) version); } else { crm_notice("Resource agent version for %s:%s:%s has unrecognized format: %s", class, provider, type, version); version = default_version; } } else { crm_trace("Resource agent version for %s:%s:%s does not specified", class, provider, type); version = default_version; } free_xml(metadata_xml); free(metadata); } else { version = default_version; } } free(key); return version; } static lrmd_cmd_t * create_lrmd_cmd(xmlNode * msg, crm_client_t * client, lrmd_rsc_t *rsc) { int call_options = 0; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR); lrmd_cmd_t *cmd = NULL; cmd = calloc(1, sizeof(lrmd_cmd_t)); crm_element_value_int(msg, F_LRMD_CALLOPTS, &call_options); cmd->call_opts = call_options; cmd->client_id = strdup(client->id); crm_element_value_int(msg, F_LRMD_CALLID, &cmd->call_id); crm_element_value_int(rsc_xml, F_LRMD_RSC_INTERVAL, &cmd->interval); crm_element_value_int(rsc_xml, F_LRMD_TIMEOUT, &cmd->timeout); crm_element_value_int(rsc_xml, F_LRMD_RSC_START_DELAY, &cmd->start_delay); cmd->timeout_orig = cmd->timeout; cmd->origin = crm_element_value_copy(rsc_xml, F_LRMD_ORIGIN); cmd->action = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ACTION); cmd->userdata_str = crm_element_value_copy(rsc_xml, F_LRMD_RSC_USERDATA_STR); cmd->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID); cmd->params = xml2list(rsc_xml); cmd->versioned_attrs = first_named_child(rsc_xml, XML_TAG_VER_ATTRS); if (cmd->versioned_attrs) { const char *ra_version = get_ra_version(rsc->class, rsc->provider, rsc->type, safe_str_eq(cmd->action, "start")); GHashTable *node_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); GHashTable *hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); cmd->versioned_attrs = copy_xml(cmd->versioned_attrs); g_hash_table_insert(node_hash, strdup("#ra-version"), strdup(ra_version)); unpack_instance_attributes(NULL, cmd->versioned_attrs, XML_TAG_ATTR_SETS, node_hash, hash, NULL, FALSE, NULL); g_hash_table_foreach(hash, dup_attr, cmd->params); g_hash_table_destroy(node_hash); g_hash_table_destroy(hash); } cmd->isolation_wrapper = g_hash_table_lookup(cmd->params, "CRM_meta_isolation_wrapper"); if (cmd->isolation_wrapper) { if (g_hash_table_lookup(cmd->params, "CRM_meta_isolation_instance") == NULL) { g_hash_table_insert(cmd->params, strdup("CRM_meta_isolation_instance"), strdup(rsc->rsc_id)); } if (rsc->provider) { g_hash_table_insert(cmd->params, strdup("CRM_meta_provider"), strdup(rsc->provider)); } g_hash_table_insert(cmd->params, strdup("CRM_meta_class"), strdup(rsc->class)); g_hash_table_insert(cmd->params, strdup("CRM_meta_type"), strdup(rsc->type)); } if (safe_str_eq(g_hash_table_lookup(cmd->params, "CRM_meta_on_fail"), "block")) { crm_debug("Setting flag to leave pid group on timeout and only kill action pid for %s_%s_%d", cmd->rsc_id, cmd->action, cmd->interval); cmd->service_flags |= SVC_ACTION_LEAVE_GROUP; } return cmd; } static void free_lrmd_cmd(lrmd_cmd_t * cmd) { if (cmd->stonith_recurring_id) { g_source_remove(cmd->stonith_recurring_id); } if (cmd->delay_id) { g_source_remove(cmd->delay_id); } if (cmd->params) { g_hash_table_destroy(cmd->params); } if (cmd->versioned_attrs) { free_xml(cmd->versioned_attrs); } free(cmd->origin); free(cmd->action); free(cmd->real_action); free(cmd->userdata_str); free(cmd->rsc_id); free(cmd->output); free(cmd->exit_reason); free(cmd->client_id); free(cmd); } static gboolean stonith_recurring_op_helper(gpointer data) { lrmd_cmd_t *cmd = data; lrmd_rsc_t *rsc; cmd->stonith_recurring_id = 0; if (!cmd->rsc_id) { return FALSE; } rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id); CRM_ASSERT(rsc != NULL); /* take it out of recurring_ops list, and put it in the pending ops * to be executed */ rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd); rsc->pending_ops = g_list_append(rsc->pending_ops, cmd); #ifdef HAVE_SYS_TIMEB_H ftime(&cmd->t_queue); if (cmd->t_first_queue.time == 0) { cmd->t_first_queue = cmd->t_queue; } #endif mainloop_set_trigger(rsc->work); return FALSE; } static gboolean start_delay_helper(gpointer data) { lrmd_cmd_t *cmd = data; lrmd_rsc_t *rsc = NULL; cmd->delay_id = 0; rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL; if (rsc) { mainloop_set_trigger(rsc->work); } return FALSE; } static gboolean merge_recurring_duplicate(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { GListPtr gIter = NULL; lrmd_cmd_t * dup = NULL; gboolean dup_pending = FALSE; if (cmd->interval == 0) { return 0; } for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) { dup = gIter->data; if (safe_str_eq(cmd->action, dup->action) && cmd->interval == dup->interval) { dup_pending = TRUE; goto merge_dup; } } /* if dup is in recurring_ops list, that means it has already executed * and is in the interval loop. we can't just remove it in this case. */ for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) { dup = gIter->data; if (safe_str_eq(cmd->action, dup->action) && cmd->interval == dup->interval) { goto merge_dup; } } return FALSE; merge_dup: /* This should not occur, if it does we need to investigate in the crmd * how something like this is possible */ crm_warn("Duplicate recurring op entry detected (%s_%s_%d), merging with previous op entry", rsc->rsc_id, normalize_action_name(rsc, dup->action), dup->interval); /* merge */ dup->first_notify_sent = 0; free(dup->userdata_str); dup->userdata_str = cmd->userdata_str; cmd->userdata_str = NULL; dup->call_id = cmd->call_id; - if (safe_str_eq(rsc->class, "stonith")) { + if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH)) { /* if we are waiting for the next interval, kick it off now */ if (dup_pending == TRUE) { g_source_remove(cmd->stonith_recurring_id); cmd->stonith_recurring_id = 0; stonith_recurring_op_helper(cmd); } } else if (dup_pending == FALSE) { /* if we've already handed this to the service lib, kick off an early execution */ services_action_kick(rsc->rsc_id, normalize_action_name(rsc, dup->action), dup->interval); } free_lrmd_cmd(cmd); return TRUE; } static void schedule_lrmd_cmd(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { gboolean dup_processed = FALSE; CRM_CHECK(cmd != NULL, return); CRM_CHECK(rsc != NULL, return); crm_trace("Scheduling %s on %s", cmd->action, rsc->rsc_id); dup_processed = merge_recurring_duplicate(rsc, cmd); if (dup_processed) { /* duplicate recurring cmd found, cmds merged */ return; } /* crmd expects lrmd to automatically cancel recurring ops before rsc stops. */ if (rsc && safe_str_eq(cmd->action, "stop")) { cancel_all_recurring(rsc, NULL); } rsc->pending_ops = g_list_append(rsc->pending_ops, cmd); #ifdef HAVE_SYS_TIMEB_H ftime(&cmd->t_queue); if (cmd->t_first_queue.time == 0) { cmd->t_first_queue = cmd->t_queue; } #endif mainloop_set_trigger(rsc->work); if (cmd->start_delay) { cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd); } } static void send_reply(crm_client_t * client, int rc, uint32_t id, int call_id) { int send_rc = 0; xmlNode *reply = NULL; reply = create_xml_node(NULL, T_LRMD_REPLY); crm_xml_add(reply, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add_int(reply, F_LRMD_RC, rc); crm_xml_add_int(reply, F_LRMD_CALLID, call_id); send_rc = lrmd_server_send_reply(client, id, reply); free_xml(reply); if (send_rc < 0) { crm_warn("LRMD reply to %s failed: %d", client->name, send_rc); } } static void send_client_notify(gpointer key, gpointer value, gpointer user_data) { xmlNode *update_msg = user_data; crm_client_t *client = value; if (client == NULL) { crm_err("Asked to send event to NULL client"); return; } else if (client->name == NULL) { crm_trace("Asked to send event to client with no name"); return; } if (lrmd_server_send_notify(client, update_msg) <= 0) { crm_warn("Notification of client %s/%s failed", client->name, client->id); } } #ifdef HAVE_SYS_TIMEB_H /*! * \internal * \brief Return difference between two times in milliseconds * * \param[in] now More recent time (or NULL to use current time) * \param[in] old Earlier time * * \return milliseconds difference (or 0 if old is NULL or has time zero) */ static int time_diff_ms(struct timeb *now, struct timeb *old) { struct timeb local_now = { 0, }; if (now == NULL) { ftime(&local_now); now = &local_now; } if ((old == NULL) || (old->time == 0)) { return 0; } return difftime(now->time, old->time) * 1000 + now->millitm - old->millitm; } /*! * \internal * \brief Reset a command's operation times to their original values. * * Reset a command's run and queued timestamps to the timestamps of the original * command, so we report the entire time since then and not just the time since * the most recent command (for recurring and systemd operations). * * /param[in] cmd LRMD command object to reset * * /note It's not obvious what the queued time should be for a systemd * start/stop operation, which might go like this: * initial command queued 5ms, runs 3s * monitor command queued 10ms, runs 10s * monitor command queued 10ms, runs 10s * Is the queued time for that operation 5ms, 10ms or 25ms? The current * implementation will report 5ms. If it's 25ms, then we need to * subtract 20ms from the total exec time so as not to count it twice. * We can implement that later if it matters to anyone ... */ static void cmd_original_times(lrmd_cmd_t * cmd) { cmd->t_run = cmd->t_first_run; cmd->t_queue = cmd->t_first_queue; } #endif static void send_cmd_complete_notify(lrmd_cmd_t * cmd) { int exec_time = 0; int queue_time = 0; xmlNode *notify = NULL; #ifdef HAVE_SYS_TIMEB_H exec_time = time_diff_ms(NULL, &cmd->t_run); queue_time = time_diff_ms(&cmd->t_run, &cmd->t_queue); #endif log_finished(cmd, exec_time, queue_time); /* if the first notify result for a cmd has already been sent earlier, and the * the option to only send notifies on result changes is set. Check to see * if the last result is the same as the new one. If so, suppress this update */ if (cmd->first_notify_sent && (cmd->call_opts & lrmd_opt_notify_changes_only)) { if (cmd->last_notify_rc == cmd->exec_rc && cmd->last_notify_op_status == cmd->lrmd_op_status) { /* only send changes */ return; } } cmd->first_notify_sent = 1; cmd->last_notify_rc = cmd->exec_rc; cmd->last_notify_op_status = cmd->lrmd_op_status; notify = create_xml_node(NULL, T_LRMD_NOTIFY); crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add_int(notify, F_LRMD_TIMEOUT, cmd->timeout); crm_xml_add_int(notify, F_LRMD_RSC_INTERVAL, cmd->interval); crm_xml_add_int(notify, F_LRMD_RSC_START_DELAY, cmd->start_delay); crm_xml_add_int(notify, F_LRMD_EXEC_RC, cmd->exec_rc); crm_xml_add_int(notify, F_LRMD_OP_STATUS, cmd->lrmd_op_status); crm_xml_add_int(notify, F_LRMD_CALLID, cmd->call_id); crm_xml_add_int(notify, F_LRMD_RSC_DELETED, cmd->rsc_deleted); #ifdef HAVE_SYS_TIMEB_H crm_xml_add_int(notify, F_LRMD_RSC_RUN_TIME, cmd->t_run.time); crm_xml_add_int(notify, F_LRMD_RSC_RCCHANGE_TIME, cmd->t_rcchange.time); crm_xml_add_int(notify, F_LRMD_RSC_EXEC_TIME, exec_time); crm_xml_add_int(notify, F_LRMD_RSC_QUEUE_TIME, queue_time); #endif crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_RSC_EXEC); crm_xml_add(notify, F_LRMD_RSC_ID, cmd->rsc_id); if(cmd->real_action) { crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->real_action); } else { crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->action); } crm_xml_add(notify, F_LRMD_RSC_USERDATA_STR, cmd->userdata_str); crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->output); crm_xml_add(notify, F_LRMD_RSC_EXIT_REASON, cmd->exit_reason); if (cmd->params) { char *key = NULL; char *value = NULL; GHashTableIter iter; xmlNode *args = create_xml_node(notify, XML_TAG_ATTRS); g_hash_table_iter_init(&iter, cmd->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { hash2smartfield((gpointer) key, (gpointer) value, args); } } if (cmd->versioned_attrs) { add_node_copy(notify, cmd->versioned_attrs); } if (cmd->client_id && (cmd->call_opts & lrmd_opt_notify_orig_only)) { crm_client_t *client = crm_client_get_by_id(cmd->client_id); if (client) { send_client_notify(client->id, client, notify); } } else if (client_connections != NULL) { g_hash_table_foreach(client_connections, send_client_notify, notify); } free_xml(notify); } static void send_generic_notify(int rc, xmlNode * request) { if (client_connections != NULL) { int call_id = 0; xmlNode *notify = NULL; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); const char *op = crm_element_value(request, F_LRMD_OPERATION); crm_element_value_int(request, F_LRMD_CALLID, &call_id); notify = create_xml_node(NULL, T_LRMD_NOTIFY); crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add_int(notify, F_LRMD_RC, rc); crm_xml_add_int(notify, F_LRMD_CALLID, call_id); crm_xml_add(notify, F_LRMD_OPERATION, op); crm_xml_add(notify, F_LRMD_RSC_ID, rsc_id); g_hash_table_foreach(client_connections, send_client_notify, notify); free_xml(notify); } } static void cmd_reset(lrmd_cmd_t * cmd) { cmd->lrmd_op_status = 0; cmd->last_pid = 0; memset(&cmd->t_run, 0, sizeof(cmd->t_run)); memset(&cmd->t_queue, 0, sizeof(cmd->t_queue)); free(cmd->exit_reason); cmd->exit_reason = NULL; free(cmd->output); cmd->output = NULL; } static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc) { crm_trace("Resource operation rsc:%s action:%s completed (%p %p)", cmd->rsc_id, cmd->action, rsc ? rsc->active : NULL, cmd); if (rsc && (rsc->active == cmd)) { rsc->active = NULL; mainloop_set_trigger(rsc->work); } if (!rsc) { cmd->rsc_deleted = 1; } /* reset original timeout so client notification has correct information */ cmd->timeout = cmd->timeout_orig; send_cmd_complete_notify(cmd); if (cmd->interval && (cmd->lrmd_op_status == PCMK_LRM_OP_CANCELLED)) { if (rsc) { rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd); rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd); } free_lrmd_cmd(cmd); } else if (cmd->interval == 0) { if (rsc) { rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd); } free_lrmd_cmd(cmd); } else { /* Clear all the values pertaining just to the last iteration of a recurring op. */ cmd_reset(cmd); } } #if SUPPORT_HEARTBEAT static int pattern_matched(const char *pat, const char *str) { if (g_pattern_match_simple(pat, str)) { crm_debug("RA output matched stopped pattern [%s]", pat); return TRUE; } return FALSE; } static int hb2uniform_rc(const char *action, int rc, const char *stdout_data) { const char *stop_pattern[] = { "*stopped*", "*not*running*" }; const char *running_pattern[] = { "*running*", "*OK*" }; char *lower_std_output = NULL; int result; if (rc < 0) { return PCMK_OCF_UNKNOWN_ERROR; } /* Treat class heartbeat the same as class lsb. */ if (!safe_str_eq(action, "status") && !safe_str_eq(action, "monitor")) { return services_get_ocf_exitcode(action, rc); } /* for status though, exit code is ignored, * and the stdout is scanned for specific strings */ if (stdout_data == NULL) { crm_warn("No status output from the (hb) resource agent, assuming stopped"); return PCMK_OCF_NOT_RUNNING; } lower_std_output = g_ascii_strdown(stdout_data, -1); if (pattern_matched(stop_pattern[0], lower_std_output) || pattern_matched(stop_pattern[1], lower_std_output)) { result = PCMK_OCF_NOT_RUNNING; } else if (pattern_matched(running_pattern[0], lower_std_output) || pattern_matched(running_pattern[1], stdout_data)) { /* "OK" is matched case sensitive */ result = PCMK_OCF_OK; } else { /* It didn't say it was running - must be stopped */ crm_debug("RA output did not match any pattern, assuming stopped"); result = PCMK_OCF_NOT_RUNNING; } free(lower_std_output); return result; } #endif static int ocf2uniform_rc(int rc) { if (rc < 0 || rc > PCMK_OCF_FAILED_MASTER) { return PCMK_OCF_UNKNOWN_ERROR; } return rc; } static int stonith2uniform_rc(const char *action, int rc) { if (rc == -ENODEV) { if (safe_str_eq(action, "stop")) { rc = PCMK_OCF_OK; } else if (safe_str_eq(action, "start")) { rc = PCMK_OCF_NOT_INSTALLED; } else { rc = PCMK_OCF_NOT_RUNNING; } } else if (rc != 0) { rc = PCMK_OCF_UNKNOWN_ERROR; } return rc; } #if SUPPORT_NAGIOS static int nagios2uniform_rc(const char *action, int rc) { if (rc < 0) { return PCMK_OCF_UNKNOWN_ERROR; } switch (rc) { case NAGIOS_STATE_OK: return PCMK_OCF_OK; case NAGIOS_INSUFFICIENT_PRIV: return PCMK_OCF_INSUFFICIENT_PRIV; case NAGIOS_NOT_INSTALLED: return PCMK_OCF_NOT_INSTALLED; case NAGIOS_STATE_WARNING: case NAGIOS_STATE_CRITICAL: case NAGIOS_STATE_UNKNOWN: case NAGIOS_STATE_DEPENDENT: default: return PCMK_OCF_UNKNOWN_ERROR; } return PCMK_OCF_UNKNOWN_ERROR; } #endif static int get_uniform_rc(const char *standard, const char *action, int rc) { - if (safe_str_eq(standard, "ocf")) { + if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_OCF)) { return ocf2uniform_rc(rc); - } else if (safe_str_eq(standard, "stonith")) { + } else if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_STONITH)) { return stonith2uniform_rc(action, rc); - } else if (safe_str_eq(standard, "systemd")) { + } else if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_SYSTEMD)) { return rc; - } else if (safe_str_eq(standard, "upstart")) { + } else if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_UPSTART)) { return rc; #if SUPPORT_NAGIOS - } else if (safe_str_eq(standard, "nagios")) { + } else if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_NAGIOS)) { return nagios2uniform_rc(action, rc); #endif } else { return services_get_ocf_exitcode(action, rc); } } static int action_get_uniform_rc(svc_action_t * action) { lrmd_cmd_t *cmd = action->cb_data; #if SUPPORT_HEARTBEAT - if (safe_str_eq(action->standard, "heartbeat")) { + if (safe_str_eq(action->standard, PCMK_RESOURCE_CLASS_HB)) { return hb2uniform_rc(cmd->action, action->rc, action->stdout_data); } #endif return get_uniform_rc(action->standard, cmd->action, action->rc); } void notify_of_new_client(crm_client_t *new_client) { crm_client_t *client = NULL; GHashTableIter iter; xmlNode *notify = NULL; char *key = NULL; notify = create_xml_node(NULL, T_LRMD_NOTIFY); crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_NEW_CLIENT); g_hash_table_iter_init(&iter, client_connections); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & client)) { if (safe_str_eq(client->id, new_client->id)) { continue; } send_client_notify((gpointer) key, (gpointer) client, (gpointer) notify); } free_xml(notify); } static char * parse_exit_reason(const char *output) { const char *cur = NULL; const char *last = NULL; char *reason = NULL; static int cookie_len = 0; char *eol = NULL; if (output == NULL) { return NULL; } if (!cookie_len) { cookie_len = strlen(PCMK_OCF_REASON_PREFIX); } cur = strstr(output, PCMK_OCF_REASON_PREFIX); for (; cur != NULL; cur = strstr(cur, PCMK_OCF_REASON_PREFIX)) { /* skip over the cookie delimiter string */ cur += cookie_len; last = cur; } if (last == NULL) { return NULL; } /* make our own copy */ reason = calloc(1, (EXIT_REASON_MAX_LEN+1)); CRM_ASSERT(reason); /* limit reason string size */ strncpy(reason, last, EXIT_REASON_MAX_LEN); /* truncate everything after a new line */ eol = strchr(reason, '\n'); if (eol != NULL) { *eol = '\0'; } return reason; } void client_disconnect_cleanup(const char *client_id) { GHashTableIter iter; lrmd_rsc_t *rsc = NULL; char *key = NULL; g_hash_table_iter_init(&iter, rsc_list); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) { if (rsc->call_opts & lrmd_opt_drop_recurring) { /* This client is disconnecting, drop any recurring operations * it may have initiated on the resource */ cancel_all_recurring(rsc, client_id); } } } static void action_complete(svc_action_t * action) { lrmd_rsc_t *rsc; lrmd_cmd_t *cmd = action->cb_data; const char *rclass = NULL; bool goagain = false; if (!cmd) { crm_err("LRMD action (%s) completed does not match any known operations.", action->id); return; } #ifdef HAVE_SYS_TIMEB_H if (cmd->exec_rc != action->rc) { ftime(&cmd->t_rcchange); } #endif cmd->last_pid = action->pid; cmd->exec_rc = action_get_uniform_rc(action); cmd->lrmd_op_status = action->status; rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL; - if(rsc && safe_str_eq(rsc->class, "service")) { + if (rsc && safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_SERVICE)) { rclass = resources_find_service_class(rsc->class); } else if(rsc) { rclass = rsc->class; } - if (safe_str_eq(rclass, "systemd")) { + if (safe_str_eq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD)) { if(cmd->exec_rc == PCMK_OCF_OK && safe_str_eq(cmd->action, "start")) { /* systemd I curse thee! * * systemd returns from start actions after the start _begins_ * not after it completes. * * So we have to jump through a few hoops so that we don't * report 'complete' to the rest of pacemaker until, you know, * it's actually done. */ goagain = true; cmd->real_action = cmd->action; cmd->action = strdup("monitor"); } else if(cmd->exec_rc == PCMK_OCF_OK && safe_str_eq(cmd->action, "stop")) { goagain = true; cmd->real_action = cmd->action; cmd->action = strdup("monitor"); } else if(cmd->real_action) { /* Ok, so this is the follow up monitor action to check if start actually completed */ if(cmd->lrmd_op_status == PCMK_LRM_OP_DONE && cmd->exec_rc == PCMK_OCF_PENDING) { goagain = true; } else if(cmd->exec_rc == PCMK_OCF_OK && safe_str_eq(cmd->real_action, "stop")) { goagain = true; } else { #ifdef HAVE_SYS_TIMEB_H int time_sum = time_diff_ms(NULL, &cmd->t_first_run); int timeout_left = cmd->timeout_orig - time_sum; crm_debug("%s %s is now complete (elapsed=%dms, remaining=%dms): %s (%d)", cmd->rsc_id, cmd->real_action, time_sum, timeout_left, services_ocf_exitcode_str(cmd->exec_rc), cmd->exec_rc); cmd_original_times(cmd); #endif if(cmd->lrmd_op_status == PCMK_LRM_OP_DONE && cmd->exec_rc == PCMK_OCF_NOT_RUNNING && safe_str_eq(cmd->real_action, "stop")) { cmd->exec_rc = PCMK_OCF_OK; } } } } #if SUPPORT_NAGIOS - if (rsc && safe_str_eq(rsc->class, "nagios")) { + if (rsc && safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS)) { if (safe_str_eq(cmd->action, "monitor") && cmd->interval == 0 && cmd->exec_rc == PCMK_OCF_OK) { /* Successfully executed --version for the nagios plugin */ cmd->exec_rc = PCMK_OCF_NOT_RUNNING; } else if (safe_str_eq(cmd->action, "start") && cmd->exec_rc != PCMK_OCF_OK) { goagain = true; } } #endif /* Wrapping this section in ifdef implies that systemd resources are not * fully supported on platforms without sys/timeb.h. Since timeb is * obsolete, we should eventually prefer a clock_gettime() implementation * (wrapped in its own ifdef) with timeb as a fallback. */ #ifdef HAVE_SYS_TIMEB_H if(goagain) { int time_sum = time_diff_ms(NULL, &cmd->t_first_run); int timeout_left = cmd->timeout_orig - time_sum; int delay = cmd->timeout_orig / 10; if(delay >= timeout_left && timeout_left > 20) { delay = timeout_left/2; } delay = QB_MIN(2000, delay); if (delay < timeout_left) { cmd->start_delay = delay; cmd->timeout = timeout_left; if(cmd->exec_rc == PCMK_OCF_OK) { crm_debug("%s %s may still be in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)", cmd->rsc_id, cmd->real_action, time_sum, timeout_left, delay); } else if(cmd->exec_rc == PCMK_OCF_PENDING) { crm_info("%s %s is still in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)", cmd->rsc_id, cmd->action, time_sum, timeout_left, delay); } else { crm_notice("%s %s failed '%s' (%d): re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)", cmd->rsc_id, cmd->action, services_ocf_exitcode_str(cmd->exec_rc), cmd->exec_rc, time_sum, timeout_left, delay); } cmd_reset(cmd); if(rsc) { rsc->active = NULL; } schedule_lrmd_cmd(rsc, cmd); /* Don't finalize cmd, we're not done with it yet */ return; } else { crm_notice("Giving up on %s %s (rc=%d): timeout (elapsed=%dms, remaining=%dms)", cmd->rsc_id, cmd->real_action?cmd->real_action:cmd->action, cmd->exec_rc, time_sum, timeout_left); cmd->lrmd_op_status = PCMK_LRM_OP_TIMEOUT; cmd->exec_rc = PCMK_OCF_TIMEOUT; cmd_original_times(cmd); } } #endif if (action->stderr_data) { cmd->output = strdup(action->stderr_data); cmd->exit_reason = parse_exit_reason(action->stderr_data); } else if (action->stdout_data) { cmd->output = strdup(action->stdout_data); } cmd_finalize(cmd, rsc); } static void stonith_action_complete(lrmd_cmd_t * cmd, int rc) { int recurring = cmd->interval; lrmd_rsc_t *rsc = NULL; - cmd->exec_rc = get_uniform_rc("stonith", cmd->action, rc); + cmd->exec_rc = get_uniform_rc(PCMK_RESOURCE_CLASS_STONITH, cmd->action, rc); rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id); if (cmd->lrmd_op_status == PCMK_LRM_OP_CANCELLED) { recurring = 0; /* do nothing */ } else if (rc == -ENODEV && safe_str_eq(cmd->action, "monitor")) { /* Not registered == inactive */ cmd->lrmd_op_status = PCMK_LRM_OP_DONE; cmd->exec_rc = PCMK_OCF_NOT_RUNNING; } else if (rc) { /* Attempt to map return codes to op status if possible */ switch (rc) { case -EPROTONOSUPPORT: cmd->lrmd_op_status = PCMK_LRM_OP_NOTSUPPORTED; break; case -ETIME: cmd->lrmd_op_status = PCMK_LRM_OP_TIMEOUT; break; default: /* TODO: This looks wrong. Status should be _DONE and exec_rc set to an error */ cmd->lrmd_op_status = PCMK_LRM_OP_ERROR; } } else { /* command successful */ cmd->lrmd_op_status = PCMK_LRM_OP_DONE; if (safe_str_eq(cmd->action, "start") && rsc) { rsc->stonith_started = 1; } } if (recurring && rsc) { if (cmd->stonith_recurring_id) { g_source_remove(cmd->stonith_recurring_id); } cmd->stonith_recurring_id = g_timeout_add(cmd->interval, stonith_recurring_op_helper, cmd); } cmd_finalize(cmd, rsc); } static void lrmd_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data) { stonith_action_complete(data->userdata, data->rc); } void stonith_connection_failed(void) { GHashTableIter iter; GList *cmd_list = NULL; GList *cmd_iter = NULL; lrmd_rsc_t *rsc = NULL; char *key = NULL; g_hash_table_iter_init(&iter, rsc_list); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) { - if (safe_str_eq(rsc->class, "stonith")) { + if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH)) { if (rsc->active) { cmd_list = g_list_append(cmd_list, rsc->active); } if (rsc->recurring_ops) { cmd_list = g_list_concat(cmd_list, rsc->recurring_ops); } if (rsc->pending_ops) { cmd_list = g_list_concat(cmd_list, rsc->pending_ops); } rsc->pending_ops = rsc->recurring_ops = NULL; } } if (!cmd_list) { return; } crm_err("STONITH connection failed, finalizing %d pending operations.", g_list_length(cmd_list)); for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) { stonith_action_complete(cmd_iter->data, -ENOTCONN); } g_list_free(cmd_list); } static int lrmd_rsc_execute_stonith(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { int rc = 0; int do_monitor = 0; stonith_t *stonith_api = get_stonith_connection(); if (!stonith_api) { - cmd->exec_rc = get_uniform_rc("stonith", cmd->action, -ENOTCONN); + cmd->exec_rc = get_uniform_rc(PCMK_RESOURCE_CLASS_STONITH, cmd->action, + -ENOTCONN); cmd->lrmd_op_status = PCMK_LRM_OP_ERROR; cmd_finalize(cmd, rsc); return -EUNATCH; } if (safe_str_eq(cmd->action, "start")) { char *key = NULL; char *value = NULL; stonith_key_value_t *device_params = NULL; if (cmd->params) { GHashTableIter iter; g_hash_table_iter_init(&iter, cmd->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { device_params = stonith_key_value_add(device_params, key, value); } } /* Stonith automatically registers devices from the IPC when changes occur, * but to avoid a possible race condition between stonith receiving the IPC update * and the lrmd requesting that resource, the lrmd still registers the device as well. * Stonith knows how to handle duplicate device registrations correctly. */ rc = stonith_api->cmds->register_device(stonith_api, st_opt_sync_call, cmd->rsc_id, rsc->provider, rsc->type, device_params); stonith_key_value_freeall(device_params, 1, 1); if (rc == 0) { do_monitor = 1; } } else if (safe_str_eq(cmd->action, "stop")) { rc = stonith_api->cmds->remove_device(stonith_api, st_opt_sync_call, cmd->rsc_id); rsc->stonith_started = 0; } else if (safe_str_eq(cmd->action, "monitor")) { if (cmd->interval) { do_monitor = 1; } else { rc = rsc->stonith_started ? 0 : -ENODEV; } } if (!do_monitor) { goto cleanup_stonith_exec; } rc = stonith_api->cmds->monitor(stonith_api, 0, cmd->rsc_id, cmd->timeout / 1000); rc = stonith_api->cmds->register_callback(stonith_api, rc, 0, 0, cmd, "lrmd_stonith_callback", lrmd_stonith_callback); /* don't cleanup yet, we will find out the result of the monitor later */ if (rc > 0) { rsc->active = cmd; return rc; } else if (rc == 0) { rc = -1; } cleanup_stonith_exec: stonith_action_complete(cmd, rc); return rc; } static int lrmd_rsc_execute_service_lib(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { svc_action_t *action = NULL; GHashTable *params_copy = NULL; CRM_ASSERT(rsc); CRM_ASSERT(cmd); crm_trace("Creating action, resource:%s action:%s class:%s provider:%s agent:%s", rsc->rsc_id, cmd->action, rsc->class, rsc->provider, rsc->type); #if SUPPORT_NAGIOS /* Recurring operations are cancelled anyway for a stop operation */ - if (safe_str_eq(rsc->class, "nagios") && safe_str_eq(cmd->action, "stop")) { + if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS) + && safe_str_eq(cmd->action, "stop")) { + cmd->exec_rc = PCMK_OCF_OK; goto exec_done; } #endif if (cmd->params) { params_copy = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if (params_copy != NULL) { g_hash_table_foreach(cmd->params, dup_attr, params_copy); } } if (cmd->isolation_wrapper) { g_hash_table_remove(params_copy, "CRM_meta_isolation_wrapper"); action = resources_action_create(rsc->rsc_id, - "ocf", + PCMK_RESOURCE_CLASS_OCF, LRMD_ISOLATION_PROVIDER, cmd->isolation_wrapper, cmd->action, /*action will be normalized in wrapper*/ cmd->interval, cmd->timeout, params_copy, cmd->service_flags); } else { action = resources_action_create(rsc->rsc_id, rsc->class, rsc->provider, rsc->type, normalize_action_name(rsc, cmd->action), cmd->interval, cmd->timeout, params_copy, cmd->service_flags); } if (!action) { crm_err("Failed to create action, action:%s on resource %s", cmd->action, rsc->rsc_id); cmd->lrmd_op_status = PCMK_LRM_OP_ERROR; goto exec_done; } action->cb_data = cmd; /* 'cmd' may not be valid after this point if * services_action_async() returned TRUE * * Upstart and systemd both synchronously determine monitor/status * results and call action_complete (which may free 'cmd') if necessary. */ if (services_action_async(action, action_complete)) { return TRUE; } cmd->exec_rc = action->rc; if(action->status != PCMK_LRM_OP_DONE) { cmd->lrmd_op_status = action->status; } else { cmd->lrmd_op_status = PCMK_LRM_OP_ERROR; } services_action_free(action); action = NULL; exec_done: cmd_finalize(cmd, rsc); return TRUE; } static gboolean lrmd_rsc_execute(lrmd_rsc_t * rsc) { lrmd_cmd_t *cmd = NULL; CRM_CHECK(rsc != NULL, return FALSE); if (rsc->active) { crm_trace("%s is still active", rsc->rsc_id); return TRUE; } if (rsc->pending_ops) { GList *first = rsc->pending_ops; cmd = first->data; if (cmd->delay_id) { crm_trace ("Command %s %s was asked to run too early, waiting for start_delay timeout of %dms", cmd->rsc_id, cmd->action, cmd->start_delay); return TRUE; } rsc->pending_ops = g_list_remove_link(rsc->pending_ops, first); g_list_free_1(first); #ifdef HAVE_SYS_TIMEB_H if (cmd->t_first_run.time == 0) { ftime(&cmd->t_first_run); } ftime(&cmd->t_run); #endif } if (!cmd) { crm_trace("Nothing further to do for %s", rsc->rsc_id); return TRUE; } rsc->active = cmd; /* only one op at a time for a rsc */ if (cmd->interval) { rsc->recurring_ops = g_list_append(rsc->recurring_ops, cmd); } log_execute(cmd); - if (safe_str_eq(rsc->class, "stonith")) { + if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH)) { lrmd_rsc_execute_stonith(rsc, cmd); } else { lrmd_rsc_execute_service_lib(rsc, cmd); } return TRUE; } static gboolean lrmd_rsc_dispatch(gpointer user_data) { return lrmd_rsc_execute(user_data); } void free_rsc(gpointer data) { GListPtr gIter = NULL; lrmd_rsc_t *rsc = data; - int is_stonith = safe_str_eq(rsc->class, "stonith"); + int is_stonith = safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH); gIter = rsc->pending_ops; while (gIter != NULL) { GListPtr next = gIter->next; lrmd_cmd_t *cmd = gIter->data; /* command was never executed */ cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED; cmd_finalize(cmd, NULL); gIter = next; } /* frees list, but not list elements. */ g_list_free(rsc->pending_ops); gIter = rsc->recurring_ops; while (gIter != NULL) { GListPtr next = gIter->next; lrmd_cmd_t *cmd = gIter->data; if (is_stonith) { cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED; /* if a stonith cmd is in-flight, mark just mark it as cancelled, * it is not safe to finalize/free the cmd until the stonith api * says it has either completed or timed out.*/ if (rsc->active != cmd) { cmd_finalize(cmd, NULL); } } else { /* This command is already handed off to service library, * let service library cancel it and tell us via the callback * when it is cancelled. The rsc can be safely destroyed * even if we are waiting for the cancel result */ services_action_cancel(rsc->rsc_id, normalize_action_name(rsc, cmd->action), cmd->interval); } gIter = next; } /* frees list, but not list elements. */ g_list_free(rsc->recurring_ops); free(rsc->rsc_id); free(rsc->class); free(rsc->provider); free(rsc->type); mainloop_destroy_trigger(rsc->work); free(rsc); } static int process_lrmd_signon(crm_client_t * client, uint32_t id, xmlNode * request) { xmlNode *reply = create_xml_node(NULL, "reply"); const char *is_ipc_provider = crm_element_value(request, F_LRMD_IS_IPC_PROVIDER); const char *protocol_version = crm_element_value(request, F_LRMD_PROTOCOL_VERSION); if (compare_version(protocol_version, LRMD_PROTOCOL_VERSION) < 0) { crm_err("Cluster API version must be greater than or equal to %s, not %s", LRMD_PROTOCOL_VERSION, protocol_version); crm_xml_add_int(reply, F_LRMD_RC, -EPROTO); crm_xml_add(reply, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION); } crm_xml_add(reply, F_LRMD_OPERATION, CRM_OP_REGISTER); crm_xml_add(reply, F_LRMD_CLIENTID, client->id); crm_xml_add(reply, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION); lrmd_server_send_reply(client, id, reply); if (crm_is_true(is_ipc_provider)) { /* this is a remote connection from a cluster nodes crmd */ #ifdef SUPPORT_REMOTE ipc_proxy_add_provider(client); #endif } free_xml(reply); return pcmk_ok; } static int process_lrmd_rsc_register(crm_client_t * client, uint32_t id, xmlNode * request) { int rc = pcmk_ok; lrmd_rsc_t *rsc = build_rsc_from_xml(request); lrmd_rsc_t *dup = g_hash_table_lookup(rsc_list, rsc->rsc_id); if (dup && safe_str_eq(rsc->class, dup->class) && safe_str_eq(rsc->provider, dup->provider) && safe_str_eq(rsc->type, dup->type)) { crm_warn("Can't add, RSC '%s' already present in the rsc list (%d active resources)", rsc->rsc_id, g_hash_table_size(rsc_list)); free_rsc(rsc); return rc; } g_hash_table_replace(rsc_list, rsc->rsc_id, rsc); crm_info("Added '%s' to the rsc list (%d active resources)", rsc->rsc_id, g_hash_table_size(rsc_list)); return rc; } static void process_lrmd_get_rsc_info(crm_client_t * client, uint32_t id, xmlNode * request) { int rc = pcmk_ok; int send_rc = 0; int call_id = 0; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); xmlNode *reply = NULL; lrmd_rsc_t *rsc = NULL; crm_element_value_int(request, F_LRMD_CALLID, &call_id); if (!rsc_id) { rc = -ENODEV; goto get_rsc_done; } if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) { crm_info("Resource '%s' not found (%d active resources)", rsc_id, g_hash_table_size(rsc_list)); rc = -ENODEV; goto get_rsc_done; } get_rsc_done: reply = create_xml_node(NULL, T_LRMD_REPLY); crm_xml_add(reply, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add_int(reply, F_LRMD_RC, rc); crm_xml_add_int(reply, F_LRMD_CALLID, call_id); if (rsc) { crm_xml_add(reply, F_LRMD_RSC_ID, rsc->rsc_id); crm_xml_add(reply, F_LRMD_CLASS, rsc->class); crm_xml_add(reply, F_LRMD_PROVIDER, rsc->provider); crm_xml_add(reply, F_LRMD_TYPE, rsc->type); } send_rc = lrmd_server_send_reply(client, id, reply); if (send_rc < 0) { crm_warn("LRMD reply to %s failed: %d", client->name, send_rc); } free_xml(reply); } static int process_lrmd_rsc_unregister(crm_client_t * client, uint32_t id, xmlNode * request) { int rc = pcmk_ok; lrmd_rsc_t *rsc = NULL; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); if (!rsc_id) { return -ENODEV; } if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) { crm_info("Resource '%s' not found (%d active resources)", rsc_id, g_hash_table_size(rsc_list)); return pcmk_ok; } if (rsc->active) { /* let the caller know there are still active ops on this rsc to watch for */ crm_trace("Operation still in progress: %p", rsc->active); rc = -EINPROGRESS; } g_hash_table_remove(rsc_list, rsc_id); return rc; } static int process_lrmd_rsc_exec(crm_client_t * client, uint32_t id, xmlNode * request) { lrmd_rsc_t *rsc = NULL; lrmd_cmd_t *cmd = NULL; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); int call_id; if (!rsc_id) { return -EINVAL; } if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) { crm_info("Resource '%s' not found (%d active resources)", rsc_id, g_hash_table_size(rsc_list)); return -ENODEV; } cmd = create_lrmd_cmd(request, client, rsc); call_id = cmd->call_id; /* Don't reference cmd after handing it off to be scheduled. * The cmd could get merged and freed. */ schedule_lrmd_cmd(rsc, cmd); return call_id; } static int cancel_op(const char *rsc_id, const char *action, int interval) { GListPtr gIter = NULL; lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, rsc_id); /* How to cancel an action. * 1. Check pending ops list, if it hasn't been handed off * to the service library or stonith recurring list remove * it there and that will stop it. * 2. If it isn't in the pending ops list, then it's either a * recurring op in the stonith recurring list, or the service * library's recurring list. Stop it there * 3. If not found in any lists, then this operation has either * been executed already and is not a recurring operation, or * never existed. */ if (!rsc) { return -ENODEV; } for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) { lrmd_cmd_t *cmd = gIter->data; if (safe_str_eq(cmd->action, action) && cmd->interval == interval) { cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED; cmd_finalize(cmd, rsc); return pcmk_ok; } } - if (safe_str_eq(rsc->class, "stonith")) { + if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH)) { /* The service library does not handle stonith operations. * We have to handle recurring stonith operations ourselves. */ for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) { lrmd_cmd_t *cmd = gIter->data; if (safe_str_eq(cmd->action, action) && cmd->interval == interval) { cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED; if (rsc->active != cmd) { cmd_finalize(cmd, rsc); } return pcmk_ok; } } } else if (services_action_cancel(rsc_id, normalize_action_name(rsc, action), interval) == TRUE) { /* The service library will tell the action_complete callback function * this action was cancelled, which will destroy the cmd and remove * it from the recurring_op list. Do not do that in this function * if the service library says it cancelled it. */ return pcmk_ok; } return -EOPNOTSUPP; } static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id) { GList *cmd_list = NULL; GList *cmd_iter = NULL; /* Notice a copy of each list is created when concat is called. * This prevents odd behavior from occurring when the cmd_list * is iterated through later on. It is possible the cancel_op * function may end up modifying the recurring_ops and pending_ops * lists. If we did not copy those lists, our cmd_list iteration * could get messed up.*/ if (rsc->recurring_ops) { cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->recurring_ops)); } if (rsc->pending_ops) { cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->pending_ops)); } if (!cmd_list) { return; } for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) { lrmd_cmd_t *cmd = cmd_iter->data; if (cmd->interval == 0) { continue; } if (client_id && safe_str_neq(cmd->client_id, client_id)) { continue; } cancel_op(rsc->rsc_id, cmd->action, cmd->interval); } /* frees only the copied list data, not the cmds */ g_list_free(cmd_list); } static int process_lrmd_rsc_cancel(crm_client_t * client, uint32_t id, xmlNode * request) { xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); const char *action = crm_element_value(rsc_xml, F_LRMD_RSC_ACTION); int interval = 0; crm_element_value_int(rsc_xml, F_LRMD_RSC_INTERVAL, &interval); if (!rsc_id || !action) { return -EINVAL; } return cancel_op(rsc_id, action, interval); } void process_lrmd_message(crm_client_t * client, uint32_t id, xmlNode * request) { int rc = pcmk_ok; int call_id = 0; const char *op = crm_element_value(request, F_LRMD_OPERATION); int do_reply = 0; int do_notify = 0; crm_trace("Processing %s operation from %s", op, client->id); crm_element_value_int(request, F_LRMD_CALLID, &call_id); if (crm_str_eq(op, CRM_OP_IPC_FWD, TRUE)) { #ifdef SUPPORT_REMOTE ipc_proxy_forward_client(client, request); #endif do_reply = 1; } else if (crm_str_eq(op, CRM_OP_REGISTER, TRUE)) { rc = process_lrmd_signon(client, id, request); } else if (crm_str_eq(op, LRMD_OP_RSC_REG, TRUE)) { rc = process_lrmd_rsc_register(client, id, request); do_notify = 1; do_reply = 1; } else if (crm_str_eq(op, LRMD_OP_RSC_INFO, TRUE)) { process_lrmd_get_rsc_info(client, id, request); } else if (crm_str_eq(op, LRMD_OP_RSC_UNREG, TRUE)) { rc = process_lrmd_rsc_unregister(client, id, request); /* don't notify anyone about failed un-registers */ if (rc == pcmk_ok || rc == -EINPROGRESS) { do_notify = 1; } do_reply = 1; } else if (crm_str_eq(op, LRMD_OP_RSC_EXEC, TRUE)) { rc = process_lrmd_rsc_exec(client, id, request); do_reply = 1; } else if (crm_str_eq(op, LRMD_OP_RSC_CANCEL, TRUE)) { rc = process_lrmd_rsc_cancel(client, id, request); do_reply = 1; } else if (crm_str_eq(op, LRMD_OP_POKE, TRUE)) { do_notify = 1; do_reply = 1; } else if (crm_str_eq(op, LRMD_OP_CHECK, TRUE)) { xmlNode *data = get_message_xml(request, F_LRMD_CALLDATA); const char *timeout = crm_element_value(data, F_LRMD_WATCHDOG); CRM_LOG_ASSERT(data != NULL); check_sbd_timeout(timeout); } else { rc = -EOPNOTSUPP; do_reply = 1; crm_err("Unknown %s from %s", op, client->name); crm_log_xml_warn(request, "UnknownOp"); } crm_debug("Processed %s operation from %s: rc=%d, reply=%d, notify=%d", op, client->id, rc, do_reply, do_notify); if (do_reply) { send_reply(client, rc, id, call_id); } if (do_notify) { send_generic_notify(rc, request); } } diff --git a/pengine/native.c b/pengine/native.c index 0c972ea5a5..52eba4f623 100644 --- a/pengine/native.c +++ b/pengine/native.c @@ -1,3272 +1,3272 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include /* #define DELETE_THEN_REFRESH 1 // The crmd will remove the resource from the CIB itself, making this redundant */ #define INFINITY_HACK (INFINITY * -100) #define VARIANT_NATIVE 1 #include gboolean update_action(action_t * then); void native_rsc_colocation_rh_must(resource_t * rsc_lh, gboolean update_lh, resource_t * rsc_rh, gboolean update_rh); void native_rsc_colocation_rh_mustnot(resource_t * rsc_lh, gboolean update_lh, resource_t * rsc_rh, gboolean update_rh); void Recurring(resource_t * rsc, action_t * start, node_t * node, pe_working_set_t * data_set); void RecurringOp(resource_t * rsc, action_t * start, node_t * node, xmlNode * operation, pe_working_set_t * data_set); void Recurring_Stopped(resource_t * rsc, action_t * start, node_t * node, pe_working_set_t * data_set); void RecurringOp_Stopped(resource_t * rsc, action_t * start, node_t * node, xmlNode * operation, pe_working_set_t * data_set); void ReloadRsc(resource_t * rsc, node_t *node, pe_working_set_t * data_set); gboolean DeleteRsc(resource_t * rsc, node_t * node, gboolean optional, pe_working_set_t * data_set); gboolean StopRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean StartRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean DemoteRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean PromoteRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean RoleError(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean NullOp(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set); /* *INDENT-OFF* */ enum rsc_role_e rsc_state_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = { /* Current State */ /* Next State: Unknown Stopped Started Slave Master */ /* Unknown */ { RSC_ROLE_UNKNOWN, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, }, /* Stopped */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STARTED, RSC_ROLE_SLAVE, RSC_ROLE_SLAVE, }, /* Started */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STARTED, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, }, /* Slave */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, }, /* Master */ { RSC_ROLE_STOPPED, RSC_ROLE_SLAVE, RSC_ROLE_SLAVE, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, }, }; gboolean (*rsc_action_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX])(resource_t*,node_t*,gboolean,pe_working_set_t*) = { /* Current State */ /* Next State: Unknown Stopped Started Slave Master */ /* Unknown */ { RoleError, StopRsc, RoleError, RoleError, RoleError, }, /* Stopped */ { RoleError, NullOp, StartRsc, StartRsc, RoleError, }, /* Started */ { RoleError, StopRsc, NullOp, NullOp, PromoteRsc, }, /* Slave */ { RoleError, StopRsc, StopRsc, NullOp, PromoteRsc, }, /* Master */ { RoleError, DemoteRsc, DemoteRsc, DemoteRsc, NullOp, }, }; /* *INDENT-ON* */ static action_t * get_first_named_action(resource_t * rsc, const char *action, gboolean only_valid, node_t * current); static gboolean native_choose_node(resource_t * rsc, node_t * prefer, pe_working_set_t * data_set) { /* 1. Sort by weight 2. color.chosen_node = the node (of those with the highest wieght) with the fewest resources 3. remove color.chosen_node from all other colors */ GListPtr nodes = NULL; node_t *chosen = NULL; int lpc = 0; int multiple = 0; int length = 0; gboolean result = FALSE; process_utilization(rsc, &prefer, data_set); length = g_hash_table_size(rsc->allowed_nodes); if (is_not_set(rsc->flags, pe_rsc_provisional)) { return rsc->allocated_to ? TRUE : FALSE; } if (prefer) { chosen = g_hash_table_lookup(rsc->allowed_nodes, prefer->details->id); if (chosen && chosen->weight >= 0 && can_run_resources(chosen)) { pe_rsc_trace(rsc, "Using preferred node %s for %s instead of choosing from %d candidates", chosen->details->uname, rsc->id, length); } else if (chosen && chosen->weight < 0) { pe_rsc_trace(rsc, "Preferred node %s for %s was unavailable", chosen->details->uname, rsc->id); chosen = NULL; } else if (chosen && can_run_resources(chosen)) { pe_rsc_trace(rsc, "Preferred node %s for %s was unsuitable", chosen->details->uname, rsc->id); chosen = NULL; } else { pe_rsc_trace(rsc, "Preferred node %s for %s was unknown", prefer->details->uname, rsc->id); } } if (chosen == NULL && rsc->allowed_nodes) { nodes = g_hash_table_get_values(rsc->allowed_nodes); nodes = g_list_sort_with_data(nodes, sort_node_weight, g_list_nth_data(rsc->running_on, 0)); chosen = g_list_nth_data(nodes, 0); pe_rsc_trace(rsc, "Chose node %s for %s from %d candidates", chosen ? chosen->details->uname : "", rsc->id, length); if (chosen && chosen->weight > 0 && can_run_resources(chosen)) { node_t *running = g_list_nth_data(rsc->running_on, 0); if (running && can_run_resources(running) == FALSE) { pe_rsc_trace(rsc, "Current node for %s (%s) can't run resources", rsc->id, running->details->uname); running = NULL; } for (lpc = 1; lpc < length && running; lpc++) { node_t *tmp = g_list_nth_data(nodes, lpc); if (tmp->weight == chosen->weight) { multiple++; if (tmp->details == running->details) { /* prefer the existing node if scores are equal */ chosen = tmp; } } } } } if (multiple > 1) { int log_level = LOG_INFO; static char score[33]; score2char_stack(chosen->weight, score, sizeof(score)); if (chosen->weight >= INFINITY) { log_level = LOG_WARNING; } do_crm_log(log_level, "%d nodes with equal score (%s) for" " running %s resources. Chose %s.", multiple, score, rsc->id, chosen->details->uname); } result = native_assign_node(rsc, nodes, chosen, FALSE); g_list_free(nodes); return result; } static int node_list_attr_score(GHashTable * list, const char *attr, const char *value) { GHashTableIter iter; node_t *node = NULL; int best_score = -INFINITY; const char *best_node = NULL; if (attr == NULL) { attr = "#" XML_ATTR_UNAME; } g_hash_table_iter_init(&iter, list); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { int weight = node->weight; if (can_run_resources(node) == FALSE) { weight = -INFINITY; } if (weight > best_score || best_node == NULL) { const char *tmp = g_hash_table_lookup(node->details->attrs, attr); if (safe_str_eq(value, tmp)) { best_score = weight; best_node = node->details->uname; } } } if (safe_str_neq(attr, "#" XML_ATTR_UNAME)) { crm_info("Best score for %s=%s was %s with %d", attr, value, best_node ? best_node : "", best_score); } return best_score; } static void node_hash_update(GHashTable * list1, GHashTable * list2, const char *attr, float factor, gboolean only_positive) { int score = 0; int new_score = 0; GHashTableIter iter; node_t *node = NULL; if (attr == NULL) { attr = "#" XML_ATTR_UNAME; } g_hash_table_iter_init(&iter, list1); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { CRM_LOG_ASSERT(node != NULL); if(node == NULL) { continue; }; score = node_list_attr_score(list2, attr, g_hash_table_lookup(node->details->attrs, attr)); new_score = merge_weights(factor * score, node->weight); if (factor < 0 && score < 0) { /* Negative preference for a node with a negative score * should not become a positive preference * * TODO - Decide if we want to filter only if weight == -INFINITY * */ crm_trace("%s: Filtering %d + %f*%d (factor * score)", node->details->uname, node->weight, factor, score); } else if (node->weight == INFINITY_HACK) { crm_trace("%s: Filtering %d + %f*%d (node < 0)", node->details->uname, node->weight, factor, score); } else if (only_positive && new_score < 0 && node->weight > 0) { node->weight = INFINITY_HACK; crm_trace("%s: Filtering %d + %f*%d (score > 0)", node->details->uname, node->weight, factor, score); } else if (only_positive && new_score < 0 && node->weight == 0) { crm_trace("%s: Filtering %d + %f*%d (score == 0)", node->details->uname, node->weight, factor, score); } else { crm_trace("%s: %d + %f*%d", node->details->uname, node->weight, factor, score); node->weight = new_score; } } } GHashTable * node_hash_dup(GHashTable * hash) { /* Hack! */ GListPtr list = g_hash_table_get_values(hash); GHashTable *result = node_hash_from_list(list); g_list_free(list); return result; } GHashTable * native_merge_weights(resource_t * rsc, const char *rhs, GHashTable * nodes, const char *attr, float factor, enum pe_weights flags) { return rsc_merge_weights(rsc, rhs, nodes, attr, factor, flags); } GHashTable * rsc_merge_weights(resource_t * rsc, const char *rhs, GHashTable * nodes, const char *attr, float factor, enum pe_weights flags) { GHashTable *work = NULL; int multiplier = 1; if (factor < 0) { multiplier = -1; } if (is_set(rsc->flags, pe_rsc_merging)) { pe_rsc_info(rsc, "%s: Breaking dependency loop at %s", rhs, rsc->id); return nodes; } set_bit(rsc->flags, pe_rsc_merging); if (is_set(flags, pe_weights_init)) { if (rsc->variant == pe_group && rsc->children) { GListPtr last = rsc->children; while (last->next != NULL) { last = last->next; } pe_rsc_trace(rsc, "Merging %s as a group %p %p", rsc->id, rsc->children, last); work = rsc_merge_weights(last->data, rhs, NULL, attr, factor, flags); } else { work = node_hash_dup(rsc->allowed_nodes); } clear_bit(flags, pe_weights_init); } else if (rsc->variant == pe_group && rsc->children) { GListPtr iter = rsc->children; pe_rsc_trace(rsc, "%s: Combining scores from %d children of %s", rhs, g_list_length(iter), rsc->id); work = node_hash_dup(nodes); for(iter = rsc->children; iter->next != NULL; iter = iter->next) { work = rsc_merge_weights(iter->data, rhs, work, attr, factor, flags); } } else { pe_rsc_trace(rsc, "%s: Combining scores from %s", rhs, rsc->id); work = node_hash_dup(nodes); node_hash_update(work, rsc->allowed_nodes, attr, factor, is_set(flags, pe_weights_positive)); } if (is_set(flags, pe_weights_rollback) && can_run_any(work) == FALSE) { pe_rsc_info(rsc, "%s: Rolling back scores from %s", rhs, rsc->id); g_hash_table_destroy(work); clear_bit(rsc->flags, pe_rsc_merging); return nodes; } if (can_run_any(work)) { GListPtr gIter = NULL; if (is_set(flags, pe_weights_forward)) { gIter = rsc->rsc_cons; crm_trace("Checking %d additional colocation constraints", g_list_length(gIter)); } else if(rsc->variant == pe_group && rsc->children) { GListPtr last = rsc->children; while (last->next != NULL) { last = last->next; } gIter = ((resource_t*)last->data)->rsc_cons_lhs; crm_trace("Checking %d additional optional group colocation constraints from %s", g_list_length(gIter), ((resource_t*)last->data)->id); } else { gIter = rsc->rsc_cons_lhs; crm_trace("Checking %d additional optional colocation constraints %s", g_list_length(gIter), rsc->id); } for (; gIter != NULL; gIter = gIter->next) { resource_t *other = NULL; rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; if (is_set(flags, pe_weights_forward)) { other = constraint->rsc_rh; } else { other = constraint->rsc_lh; } pe_rsc_trace(rsc, "Applying %s (%s)", constraint->id, other->id); work = rsc_merge_weights(other, rhs, work, constraint->node_attribute, multiplier * (float)constraint->score / INFINITY, flags|pe_weights_rollback); dump_node_scores(LOG_TRACE, NULL, rhs, work); } } if (is_set(flags, pe_weights_positive)) { node_t *node = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, work); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if (node->weight == INFINITY_HACK) { node->weight = 1; } } } if (nodes) { g_hash_table_destroy(nodes); } clear_bit(rsc->flags, pe_rsc_merging); return work; } node_t * native_color(resource_t * rsc, node_t * prefer, pe_working_set_t * data_set) { GListPtr gIter = NULL; int alloc_details = scores_log_level + 1; if (rsc->parent && is_not_set(rsc->parent->flags, pe_rsc_allocating)) { /* never allocate children on their own */ pe_rsc_debug(rsc, "Escalating allocation of %s to its parent: %s", rsc->id, rsc->parent->id); rsc->parent->cmds->allocate(rsc->parent, prefer, data_set); } if (is_not_set(rsc->flags, pe_rsc_provisional)) { return rsc->allocated_to; } if (is_set(rsc->flags, pe_rsc_allocating)) { pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id); return NULL; } set_bit(rsc->flags, pe_rsc_allocating); print_resource(alloc_details, "Allocating: ", rsc, FALSE); dump_node_scores(alloc_details, rsc, "Pre-alloc", rsc->allowed_nodes); for (gIter = rsc->rsc_cons; gIter != NULL; gIter = gIter->next) { rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; GHashTable *archive = NULL; resource_t *rsc_rh = constraint->rsc_rh; pe_rsc_trace(rsc, "%s: Pre-Processing %s (%s, %d, %s)", rsc->id, constraint->id, rsc_rh->id, constraint->score, role2text(constraint->role_lh)); if (constraint->role_lh >= RSC_ROLE_MASTER || (constraint->score < 0 && constraint->score > -INFINITY)) { archive = node_hash_dup(rsc->allowed_nodes); } rsc_rh->cmds->allocate(rsc_rh, NULL, data_set); rsc->cmds->rsc_colocation_lh(rsc, rsc_rh, constraint); if (archive && can_run_any(rsc->allowed_nodes) == FALSE) { pe_rsc_info(rsc, "%s: Rolling back scores from %s", rsc->id, rsc_rh->id); g_hash_table_destroy(rsc->allowed_nodes); rsc->allowed_nodes = archive; archive = NULL; } if (archive) { g_hash_table_destroy(archive); } } dump_node_scores(alloc_details, rsc, "Post-coloc", rsc->allowed_nodes); for (gIter = rsc->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) { rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; rsc->allowed_nodes = constraint->rsc_lh->cmds->merge_weights(constraint->rsc_lh, rsc->id, rsc->allowed_nodes, constraint->node_attribute, (float)constraint->score / INFINITY, pe_weights_rollback); } print_resource(LOG_DEBUG_2, "Allocating: ", rsc, FALSE); if (rsc->next_role == RSC_ROLE_STOPPED) { pe_rsc_trace(rsc, "Making sure %s doesn't get allocated", rsc->id); /* make sure it doesn't come up again */ resource_location(rsc, NULL, -INFINITY, XML_RSC_ATTR_TARGET_ROLE, data_set); } else if(rsc->next_role > rsc->role && is_set(data_set->flags, pe_flag_have_quorum) == FALSE && data_set->no_quorum_policy == no_quorum_freeze) { crm_notice("Resource %s cannot be elevated from %s to %s: no-quorum-policy=freeze", rsc->id, role2text(rsc->role), role2text(rsc->next_role)); rsc->next_role = rsc->role; } dump_node_scores(show_scores ? 0 : scores_log_level, rsc, __FUNCTION__, rsc->allowed_nodes); if (is_set(data_set->flags, pe_flag_stonith_enabled) && is_set(data_set->flags, pe_flag_have_stonith_resource) == FALSE) { clear_bit(rsc->flags, pe_rsc_managed); } if (is_not_set(rsc->flags, pe_rsc_managed)) { const char *reason = NULL; node_t *assign_to = NULL; rsc->next_role = rsc->role; if (rsc->running_on == NULL) { reason = "inactive"; } else if (rsc->role == RSC_ROLE_MASTER) { assign_to = rsc->running_on->data; reason = "master"; } else if (is_set(rsc->flags, pe_rsc_failed)) { assign_to = rsc->running_on->data; reason = "failed"; } else { assign_to = rsc->running_on->data; reason = "active"; } pe_rsc_info(rsc, "Unmanaged resource %s allocated to %s: %s", rsc->id, assign_to ? assign_to->details->uname : "'nowhere'", reason); native_assign_node(rsc, NULL, assign_to, TRUE); } else if (is_set(data_set->flags, pe_flag_stop_everything)) { pe_rsc_debug(rsc, "Forcing %s to stop", rsc->id); native_assign_node(rsc, NULL, NULL, TRUE); } else if (is_set(rsc->flags, pe_rsc_provisional) && native_choose_node(rsc, prefer, data_set)) { pe_rsc_trace(rsc, "Allocated resource %s to %s", rsc->id, rsc->allocated_to->details->uname); } else if (rsc->allocated_to == NULL) { if (is_not_set(rsc->flags, pe_rsc_orphan)) { pe_rsc_info(rsc, "Resource %s cannot run anywhere", rsc->id); } else if (rsc->running_on != NULL) { pe_rsc_info(rsc, "Stopping orphan resource %s", rsc->id); } } else { pe_rsc_debug(rsc, "Pre-Allocated resource %s to %s", rsc->id, rsc->allocated_to->details->uname); } clear_bit(rsc->flags, pe_rsc_allocating); print_resource(LOG_DEBUG_3, "Allocated ", rsc, TRUE); if (rsc->is_remote_node) { node_t *remote_node = pe_find_node(data_set->nodes, rsc->id); CRM_ASSERT(remote_node != NULL); if (rsc->allocated_to && rsc->next_role != RSC_ROLE_STOPPED) { crm_trace("Setting remote node %s to ONLINE", remote_node->details->id); remote_node->details->online = TRUE; /* We shouldn't consider an unseen remote-node unclean if we are going * to try and connect to it. Otherwise we get an unnecessary fence */ if (remote_node->details->unseen == TRUE) { remote_node->details->unclean = FALSE; } } else { crm_trace("Setting remote node %s to SHUTDOWN. next role = %s, allocated=%s", remote_node->details->id, role2text(rsc->next_role), rsc->allocated_to ? "true" : "false"); remote_node->details->shutdown = TRUE; } } return rsc->allocated_to; } static gboolean is_op_dup(resource_t * rsc, const char *name, const char *interval) { gboolean dup = FALSE; const char *id = NULL; const char *value = NULL; xmlNode *operation = NULL; CRM_ASSERT(rsc); for (operation = __xml_first_child(rsc->ops_xml); operation != NULL; operation = __xml_next_element(operation)) { if (crm_str_eq((const char *)operation->name, "op", TRUE)) { value = crm_element_value(operation, "name"); if (safe_str_neq(value, name)) { continue; } value = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); if (value == NULL) { value = "0"; } if (safe_str_neq(value, interval)) { continue; } if (id == NULL) { id = ID(operation); } else { crm_config_err("Operation %s is a duplicate of %s", ID(operation), id); crm_config_err ("Do not use the same (name, interval) combination more than once per resource"); dup = TRUE; } } } return dup; } void RecurringOp(resource_t * rsc, action_t * start, node_t * node, xmlNode * operation, pe_working_set_t * data_set) { char *key = NULL; const char *name = NULL; const char *value = NULL; const char *interval = NULL; const char *node_uname = NULL; unsigned long long interval_ms = 0; action_t *mon = NULL; gboolean is_optional = TRUE; GListPtr possible_matches = NULL; /* Only process for the operations without role="Stopped" */ value = crm_element_value(operation, "role"); if (value && text2role(value) == RSC_ROLE_STOPPED) { return; } CRM_ASSERT(rsc); pe_rsc_trace(rsc, "Creating recurring action %s for %s in role %s on %s", ID(operation), rsc->id, role2text(rsc->next_role), node ? node->details->uname : "n/a"); if (node != NULL) { node_uname = node->details->uname; } interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); interval_ms = crm_get_interval(interval); if (interval_ms == 0) { return; } name = crm_element_value(operation, "name"); if (is_op_dup(rsc, name, interval)) { return; } if (safe_str_eq(name, RSC_STOP) || safe_str_eq(name, RSC_START) || safe_str_eq(name, RSC_DEMOTE) || safe_str_eq(name, RSC_PROMOTE) ) { crm_config_err("Invalid recurring action %s wth name: '%s'", ID(operation), name); return; } key = generate_op_key(rsc->id, name, interval_ms); if (find_rsc_op_entry(rsc, key) == NULL) { /* disabled */ free(key); return; } if (start != NULL) { pe_rsc_trace(rsc, "Marking %s %s due to %s", key, is_set(start->flags, pe_action_optional) ? "optional" : "mandatory", start->uuid); is_optional = (rsc->cmds->action_flags(start, NULL) & pe_action_optional); } else { pe_rsc_trace(rsc, "Marking %s optional", key); is_optional = TRUE; } /* start a monitor for an already active resource */ possible_matches = find_actions_exact(rsc->actions, key, node); if (possible_matches == NULL) { is_optional = FALSE; pe_rsc_trace(rsc, "Marking %s mandatory: not active", key); } else { GListPtr gIter = NULL; for (gIter = possible_matches; gIter != NULL; gIter = gIter->next) { action_t *op = (action_t *) gIter->data; if (is_set(op->flags, pe_action_reschedule)) { is_optional = FALSE; break; } } g_list_free(possible_matches); } if ((rsc->next_role == RSC_ROLE_MASTER && value == NULL) || (value != NULL && text2role(value) != rsc->next_role)) { int log_level = LOG_DEBUG_2; const char *result = "Ignoring"; if (is_optional) { char *local_key = strdup(key); log_level = LOG_INFO; result = "Cancelling"; /* it's running : cancel it */ mon = custom_action(rsc, local_key, RSC_CANCEL, node, FALSE, TRUE, data_set); free(mon->task); free(mon->cancel_task); mon->task = strdup(RSC_CANCEL); mon->cancel_task = strdup(name); add_hash_param(mon->meta, XML_LRM_ATTR_INTERVAL, interval); add_hash_param(mon->meta, XML_LRM_ATTR_TASK, name); local_key = NULL; switch (rsc->role) { case RSC_ROLE_SLAVE: case RSC_ROLE_STARTED: if (rsc->next_role == RSC_ROLE_MASTER) { local_key = promote_key(rsc); } else if (rsc->next_role == RSC_ROLE_STOPPED) { local_key = stop_key(rsc); } break; case RSC_ROLE_MASTER: local_key = demote_key(rsc); break; default: break; } if (local_key) { custom_action_order(rsc, NULL, mon, rsc, local_key, NULL, pe_order_runnable_left, data_set); } mon = NULL; } do_crm_log(log_level, "%s action %s (%s vs. %s)", result, key, value ? value : role2text(RSC_ROLE_SLAVE), role2text(rsc->next_role)); free(key); return; } mon = custom_action(rsc, key, name, node, is_optional, TRUE, data_set); key = mon->uuid; if (is_optional) { pe_rsc_trace(rsc, "%s\t %s (optional)", crm_str(node_uname), mon->uuid); } if (start == NULL || is_set(start->flags, pe_action_runnable) == FALSE) { pe_rsc_debug(rsc, "%s\t %s (cancelled : start un-runnable)", crm_str(node_uname), mon->uuid); update_action_flags(mon, pe_action_runnable | pe_action_clear, __FUNCTION__); } else if (node == NULL || node->details->online == FALSE || node->details->unclean) { pe_rsc_debug(rsc, "%s\t %s (cancelled : no node available)", crm_str(node_uname), mon->uuid); update_action_flags(mon, pe_action_runnable | pe_action_clear, __FUNCTION__); } else if (is_set(mon->flags, pe_action_optional) == FALSE) { pe_rsc_info(rsc, " Start recurring %s (%llus) for %s on %s", mon->task, interval_ms / 1000, rsc->id, crm_str(node_uname)); } if (rsc->next_role == RSC_ROLE_MASTER) { char *running_master = crm_itoa(PCMK_OCF_RUNNING_MASTER); add_hash_param(mon->meta, XML_ATTR_TE_TARGET_RC, running_master); free(running_master); } if (node == NULL || is_set(rsc->flags, pe_rsc_managed)) { custom_action_order(rsc, start_key(rsc), NULL, NULL, strdup(key), mon, pe_order_implies_then | pe_order_runnable_left, data_set); custom_action_order(rsc, reload_key(rsc), NULL, NULL, strdup(key), mon, pe_order_implies_then | pe_order_runnable_left, data_set); if (rsc->next_role == RSC_ROLE_MASTER) { custom_action_order(rsc, promote_key(rsc), NULL, rsc, NULL, mon, pe_order_optional | pe_order_runnable_left, data_set); } else if (rsc->role == RSC_ROLE_MASTER) { custom_action_order(rsc, demote_key(rsc), NULL, rsc, NULL, mon, pe_order_optional | pe_order_runnable_left, data_set); } } } void Recurring(resource_t * rsc, action_t * start, node_t * node, pe_working_set_t * data_set) { if (is_not_set(rsc->flags, pe_rsc_maintenance) && (node == NULL || node->details->maintenance == FALSE)) { xmlNode *operation = NULL; for (operation = __xml_first_child(rsc->ops_xml); operation != NULL; operation = __xml_next_element(operation)) { if (crm_str_eq((const char *)operation->name, "op", TRUE)) { RecurringOp(rsc, start, node, operation, data_set); } } } } void RecurringOp_Stopped(resource_t * rsc, action_t * start, node_t * node, xmlNode * operation, pe_working_set_t * data_set) { char *key = NULL; const char *name = NULL; const char *role = NULL; const char *interval = NULL; const char *node_uname = NULL; unsigned long long interval_ms = 0; GListPtr possible_matches = NULL; GListPtr gIter = NULL; /* TODO: Support of non-unique clone */ if (is_set(rsc->flags, pe_rsc_unique) == FALSE) { return; } /* Only process for the operations with role="Stopped" */ role = crm_element_value(operation, "role"); if (role == NULL || text2role(role) != RSC_ROLE_STOPPED) { return; } pe_rsc_trace(rsc, "Creating recurring actions %s for %s in role %s on nodes where it'll not be running", ID(operation), rsc->id, role2text(rsc->next_role)); if (node != NULL) { node_uname = node->details->uname; } interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); interval_ms = crm_get_interval(interval); if (interval_ms == 0) { return; } name = crm_element_value(operation, "name"); if (is_op_dup(rsc, name, interval)) { return; } if (safe_str_eq(name, RSC_STOP) || safe_str_eq(name, RSC_START) || safe_str_eq(name, RSC_DEMOTE) || safe_str_eq(name, RSC_PROMOTE) ) { crm_config_err("Invalid recurring action %s wth name: '%s'", ID(operation), name); return; } key = generate_op_key(rsc->id, name, interval_ms); if (find_rsc_op_entry(rsc, key) == NULL) { /* disabled */ free(key); return; } /* if the monitor exists on the node where the resource will be running, cancel it */ if (node != NULL) { possible_matches = find_actions_exact(rsc->actions, key, node); if (possible_matches) { action_t *cancel_op = NULL; char *local_key = strdup(key); g_list_free(possible_matches); cancel_op = custom_action(rsc, local_key, RSC_CANCEL, node, FALSE, TRUE, data_set); free(cancel_op->task); free(cancel_op->cancel_task); cancel_op->task = strdup(RSC_CANCEL); cancel_op->cancel_task = strdup(name); add_hash_param(cancel_op->meta, XML_LRM_ATTR_INTERVAL, interval); add_hash_param(cancel_op->meta, XML_LRM_ATTR_TASK, name); local_key = NULL; if (rsc->next_role == RSC_ROLE_STARTED || rsc->next_role == RSC_ROLE_SLAVE) { /* rsc->role == RSC_ROLE_STOPPED: cancel the monitor before start */ /* rsc->role == RSC_ROLE_STARTED: for a migration, cancel the monitor on the target node before start */ custom_action_order(rsc, NULL, cancel_op, rsc, start_key(rsc), NULL, pe_order_runnable_left, data_set); } pe_rsc_info(rsc, "Cancel action %s (%s vs. %s) on %s", key, role, role2text(rsc->next_role), crm_str(node_uname)); } } for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { node_t *stop_node = (node_t *) gIter->data; const char *stop_node_uname = stop_node->details->uname; gboolean is_optional = TRUE; gboolean probe_is_optional = TRUE; gboolean stop_is_optional = TRUE; action_t *stopped_mon = NULL; char *rc_inactive = NULL; GListPtr probe_complete_ops = NULL; GListPtr stop_ops = NULL; GListPtr local_gIter = NULL; char *stop_op_key = NULL; if (node_uname && safe_str_eq(stop_node_uname, node_uname)) { continue; } pe_rsc_trace(rsc, "Creating recurring action %s for %s on %s", ID(operation), rsc->id, crm_str(stop_node_uname)); /* start a monitor for an already stopped resource */ possible_matches = find_actions_exact(rsc->actions, key, stop_node); if (possible_matches == NULL) { pe_rsc_trace(rsc, "Marking %s mandatory on %s: not active", key, crm_str(stop_node_uname)); is_optional = FALSE; } else { pe_rsc_trace(rsc, "Marking %s optional on %s: already active", key, crm_str(stop_node_uname)); is_optional = TRUE; g_list_free(possible_matches); } stopped_mon = custom_action(rsc, strdup(key), name, stop_node, is_optional, TRUE, data_set); rc_inactive = crm_itoa(PCMK_OCF_NOT_RUNNING); add_hash_param(stopped_mon->meta, XML_ATTR_TE_TARGET_RC, rc_inactive); free(rc_inactive); if (is_set(rsc->flags, pe_rsc_managed)) { char *probe_key = generate_op_key(rsc->id, CRMD_ACTION_STATUS, 0); GListPtr probes = find_actions(rsc->actions, probe_key, stop_node); GListPtr pIter = NULL; for (pIter = probes; pIter != NULL; pIter = pIter->next) { action_t *probe = (action_t *) pIter->data; order_actions(probe, stopped_mon, pe_order_runnable_left); crm_trace("%s then %s on %s", probe->uuid, stopped_mon->uuid, stop_node->details->uname); } g_list_free(probes); free(probe_key); } if (probe_complete_ops) { g_list_free(probe_complete_ops); } stop_op_key = stop_key(rsc); stop_ops = find_actions_exact(rsc->actions, stop_op_key, stop_node); for (local_gIter = stop_ops; local_gIter != NULL; local_gIter = local_gIter->next) { action_t *stop = (action_t *) local_gIter->data; if (is_set(stop->flags, pe_action_optional) == FALSE) { stop_is_optional = FALSE; } if (is_set(stop->flags, pe_action_runnable) == FALSE) { crm_debug("%s\t %s (cancelled : stop un-runnable)", crm_str(stop_node_uname), stopped_mon->uuid); update_action_flags(stopped_mon, pe_action_runnable | pe_action_clear, __FUNCTION__); } if (is_set(rsc->flags, pe_rsc_managed)) { custom_action_order(rsc, strdup(stop_op_key), stop, NULL, strdup(key), stopped_mon, pe_order_implies_then | pe_order_runnable_left, data_set); } } if (stop_ops) { g_list_free(stop_ops); } free(stop_op_key); if (is_optional == FALSE && probe_is_optional && stop_is_optional && is_set(rsc->flags, pe_rsc_managed) == FALSE) { pe_rsc_trace(rsc, "Marking %s optional on %s due to unmanaged", key, crm_str(stop_node_uname)); update_action_flags(stopped_mon, pe_action_optional, __FUNCTION__); } if (is_set(stopped_mon->flags, pe_action_optional)) { pe_rsc_trace(rsc, "%s\t %s (optional)", crm_str(stop_node_uname), stopped_mon->uuid); } if (stop_node->details->online == FALSE || stop_node->details->unclean) { pe_rsc_debug(rsc, "%s\t %s (cancelled : no node available)", crm_str(stop_node_uname), stopped_mon->uuid); update_action_flags(stopped_mon, pe_action_runnable | pe_action_clear, __FUNCTION__); } if (is_set(stopped_mon->flags, pe_action_runnable) && is_set(stopped_mon->flags, pe_action_optional) == FALSE) { crm_notice(" Start recurring %s (%llus) for %s on %s", stopped_mon->task, interval_ms / 1000, rsc->id, crm_str(stop_node_uname)); } } free(key); } void Recurring_Stopped(resource_t * rsc, action_t * start, node_t * node, pe_working_set_t * data_set) { if (is_not_set(rsc->flags, pe_rsc_maintenance) && (node == NULL || node->details->maintenance == FALSE)) { xmlNode *operation = NULL; for (operation = __xml_first_child(rsc->ops_xml); operation != NULL; operation = __xml_next_element(operation)) { if (crm_str_eq((const char *)operation->name, "op", TRUE)) { RecurringOp_Stopped(rsc, start, node, operation, data_set); } } } } static void handle_migration_actions(resource_t * rsc, node_t *current, node_t *chosen, pe_working_set_t * data_set) { action_t *migrate_to = NULL; action_t *migrate_from = NULL; action_t *start = NULL; action_t *stop = NULL; gboolean partial = rsc->partial_migration_target ? TRUE : FALSE; pe_rsc_trace(rsc, "Processing migration actions %s moving from %s to %s . partial migration = %s", rsc->id, current->details->id, chosen->details->id, partial ? "TRUE" : "FALSE"); start = start_action(rsc, chosen, TRUE); stop = stop_action(rsc, current, TRUE); if (partial == FALSE) { migrate_to = custom_action(rsc, generate_op_key(rsc->id, RSC_MIGRATE, 0), RSC_MIGRATE, current, TRUE, TRUE, data_set); } migrate_from = custom_action(rsc, generate_op_key(rsc->id, RSC_MIGRATED, 0), RSC_MIGRATED, chosen, TRUE, TRUE, data_set); if ((migrate_to && migrate_from) || (migrate_from && partial)) { set_bit(start->flags, pe_action_migrate_runnable); set_bit(stop->flags, pe_action_migrate_runnable); update_action_flags(start, pe_action_pseudo, __FUNCTION__); /* easier than trying to delete it from the graph */ /* order probes before migrations */ if (partial) { set_bit(migrate_from->flags, pe_action_migrate_runnable); migrate_from->needs = start->needs; custom_action_order(rsc, generate_op_key(rsc->id, RSC_STATUS, 0), NULL, rsc, generate_op_key(rsc->id, RSC_MIGRATED, 0), NULL, pe_order_optional, data_set); } else { set_bit(migrate_from->flags, pe_action_migrate_runnable); set_bit(migrate_to->flags, pe_action_migrate_runnable); migrate_to->needs = start->needs; custom_action_order(rsc, generate_op_key(rsc->id, RSC_STATUS, 0), NULL, rsc, generate_op_key(rsc->id, RSC_MIGRATE, 0), NULL, pe_order_optional, data_set); custom_action_order(rsc, generate_op_key(rsc->id, RSC_MIGRATE, 0), NULL, rsc, generate_op_key(rsc->id, RSC_MIGRATED, 0), NULL, pe_order_optional | pe_order_implies_first_migratable, data_set); } custom_action_order(rsc, generate_op_key(rsc->id, RSC_MIGRATED, 0), NULL, rsc, generate_op_key(rsc->id, RSC_STOP, 0), NULL, pe_order_optional | pe_order_implies_first_migratable, data_set); custom_action_order(rsc, generate_op_key(rsc->id, RSC_MIGRATED, 0), NULL, rsc, generate_op_key(rsc->id, RSC_START, 0), NULL, pe_order_optional | pe_order_implies_first_migratable | pe_order_pseudo_left, data_set); } if (migrate_to) { add_hash_param(migrate_to->meta, XML_LRM_ATTR_MIGRATE_SOURCE, current->details->uname); add_hash_param(migrate_to->meta, XML_LRM_ATTR_MIGRATE_TARGET, chosen->details->uname); /* pcmk remote connections don't require pending to be recorded in cib. * We can optimize cib writes by only setting PENDING for non pcmk remote * connection resources */ if (rsc->is_remote_node == FALSE) { /* migrate_to takes place on the source node, but can * have an effect on the target node depending on how * the agent is written. Because of this, we have to maintain * a record that the migrate_to occurred incase the source node * loses membership while the migrate_to action is still in-flight. */ add_hash_param(migrate_to->meta, XML_OP_ATTR_PENDING, "true"); } } if (migrate_from) { add_hash_param(migrate_from->meta, XML_LRM_ATTR_MIGRATE_SOURCE, current->details->uname); add_hash_param(migrate_from->meta, XML_LRM_ATTR_MIGRATE_TARGET, chosen->details->uname); } } void native_create_actions(resource_t * rsc, pe_working_set_t * data_set) { action_t *start = NULL; node_t *chosen = NULL; node_t *current = NULL; gboolean need_stop = FALSE; gboolean is_moving = FALSE; gboolean allow_migrate = is_set(rsc->flags, pe_rsc_allow_migrate) ? TRUE : FALSE; GListPtr gIter = NULL; int num_active_nodes = 0; enum rsc_role_e role = RSC_ROLE_UNKNOWN; enum rsc_role_e next_role = RSC_ROLE_UNKNOWN; CRM_ASSERT(rsc); chosen = rsc->allocated_to; if (chosen != NULL && rsc->next_role == RSC_ROLE_UNKNOWN) { rsc->next_role = RSC_ROLE_STARTED; pe_rsc_trace(rsc, "Fixed next_role: unknown -> %s", role2text(rsc->next_role)); } else if (rsc->next_role == RSC_ROLE_UNKNOWN) { rsc->next_role = RSC_ROLE_STOPPED; pe_rsc_trace(rsc, "Fixed next_role: unknown -> %s", role2text(rsc->next_role)); } pe_rsc_trace(rsc, "Processing state transition for %s %p: %s->%s", rsc->id, rsc, role2text(rsc->role), role2text(rsc->next_role)); if (rsc->running_on) { current = rsc->running_on->data; } for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { node_t *n = (node_t *) gIter->data; if (rsc->partial_migration_source && (n->details == rsc->partial_migration_source->details)) { current = rsc->partial_migration_source; } num_active_nodes++; } for (gIter = rsc->dangling_migrations; gIter != NULL; gIter = gIter->next) { node_t *current = (node_t *) gIter->data; action_t *stop = stop_action(rsc, current, FALSE); set_bit(stop->flags, pe_action_dangle); pe_rsc_trace(rsc, "Forcing a cleanup of %s on %s", rsc->id, current->details->uname); if (is_set(data_set->flags, pe_flag_remove_after_stop)) { DeleteRsc(rsc, current, FALSE, data_set); } } if (num_active_nodes > 1) { if (num_active_nodes == 2 && chosen && rsc->partial_migration_target && rsc->partial_migration_source && (current->details == rsc->partial_migration_source->details) && (chosen->details == rsc->partial_migration_target->details)) { /* Here the chosen node is still the migration target from a partial * migration. Attempt to continue the migration instead of recovering * by stopping the resource everywhere and starting it on a single node. */ pe_rsc_trace(rsc, "Will attempt to continue with a partial migration to target %s from %s", rsc->partial_migration_target->details->id, rsc->partial_migration_source->details->id); } else { const char *type = crm_element_value(rsc->xml, XML_ATTR_TYPE); const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); if(rsc->partial_migration_target && rsc->partial_migration_source) { crm_notice("Resource %s can no longer migrate to %s. Stopping on %s too", rsc->id, rsc->partial_migration_target->details->uname, rsc->partial_migration_source->details->uname); } else { pe_proc_err("Resource %s (%s::%s) is active on %d nodes %s", rsc->id, class, type, num_active_nodes, recovery2text(rsc->recovery_type)); crm_warn("See %s for more information.", "http://clusterlabs.org/wiki/FAQ#Resource_is_Too_Active"); } if (rsc->recovery_type == recovery_stop_start) { need_stop = TRUE; } /* If by chance a partial migration is in process, * but the migration target is not chosen still, clear all * partial migration data. */ rsc->partial_migration_source = rsc->partial_migration_target = NULL; allow_migrate = FALSE; } } if (is_set(rsc->flags, pe_rsc_start_pending)) { start = start_action(rsc, chosen, TRUE); set_bit(start->flags, pe_action_print_always); } if (current && chosen && current->details != chosen->details) { pe_rsc_trace(rsc, "Moving %s", rsc->id); is_moving = TRUE; need_stop = TRUE; } else if (is_set(rsc->flags, pe_rsc_failed)) { pe_rsc_trace(rsc, "Recovering %s", rsc->id); need_stop = TRUE; } else if (is_set(rsc->flags, pe_rsc_block)) { pe_rsc_trace(rsc, "Block %s", rsc->id); need_stop = TRUE; } else if (rsc->role > RSC_ROLE_STARTED && current != NULL && chosen != NULL) { /* Recovery of a promoted resource */ start = start_action(rsc, chosen, TRUE); if (is_set(start->flags, pe_action_optional) == FALSE) { pe_rsc_trace(rsc, "Forced start %s", rsc->id); need_stop = TRUE; } } pe_rsc_trace(rsc, "Creating actions for %s: %s->%s", rsc->id, role2text(rsc->role), role2text(rsc->next_role)); /* Create any additional actions required when bringing resource down and * back up to same level. */ role = rsc->role; while (role != RSC_ROLE_STOPPED) { next_role = rsc_state_matrix[role][RSC_ROLE_STOPPED]; pe_rsc_trace(rsc, "Down: Executing: %s->%s (%s)%s", role2text(role), role2text(next_role), rsc->id, need_stop ? " required" : ""); if (rsc_action_matrix[role][next_role] (rsc, current, !need_stop, data_set) == FALSE) { break; } role = next_role; } while (rsc->role <= rsc->next_role && role != rsc->role && is_not_set(rsc->flags, pe_rsc_block)) { next_role = rsc_state_matrix[role][rsc->role]; pe_rsc_trace(rsc, "Up: Executing: %s->%s (%s)%s", role2text(role), role2text(next_role), rsc->id, need_stop ? " required" : ""); if (rsc_action_matrix[role][next_role] (rsc, chosen, !need_stop, data_set) == FALSE) { break; } role = next_role; } role = rsc->role; /* Required steps from this role to the next */ while (role != rsc->next_role) { next_role = rsc_state_matrix[role][rsc->next_role]; pe_rsc_trace(rsc, "Role: Executing: %s->%s = (%s)", role2text(role), role2text(next_role), rsc->id); if (rsc_action_matrix[role][next_role] (rsc, chosen, FALSE, data_set) == FALSE) { break; } role = next_role; } if(is_set(rsc->flags, pe_rsc_block)) { pe_rsc_trace(rsc, "No monitor additional ops for blocked resource"); } else if (rsc->next_role != RSC_ROLE_STOPPED || is_set(rsc->flags, pe_rsc_managed) == FALSE) { pe_rsc_trace(rsc, "Monitor ops for active resource"); start = start_action(rsc, chosen, TRUE); Recurring(rsc, start, chosen, data_set); Recurring_Stopped(rsc, start, chosen, data_set); } else { pe_rsc_trace(rsc, "Monitor ops for in-active resource"); Recurring_Stopped(rsc, NULL, NULL, data_set); } /* if we are stuck in a partial migration, where the target * of the partial migration no longer matches the chosen target. * A full stop/start is required */ if (rsc->partial_migration_target && (chosen == NULL || rsc->partial_migration_target->details != chosen->details)) { pe_rsc_trace(rsc, "Not allowing partial migration to continue. %s", rsc->id); allow_migrate = FALSE; } else if (is_moving == FALSE || is_not_set(rsc->flags, pe_rsc_managed) || is_set(rsc->flags, pe_rsc_failed) || is_set(rsc->flags, pe_rsc_start_pending) || (current->details->unclean == TRUE) || rsc->next_role < RSC_ROLE_STARTED) { allow_migrate = FALSE; } if (allow_migrate) { handle_migration_actions(rsc, current, chosen, data_set); } } static void rsc_avoids_remote_nodes(resource_t *rsc) { GHashTableIter iter; node_t *node = NULL; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if (node->details->remote_rsc) { node->weight = -INFINITY; } } } void native_internal_constraints(resource_t * rsc, pe_working_set_t * data_set) { /* This function is on the critical path and worth optimizing as much as possible */ resource_t *top = uber_parent(rsc); int type = pe_order_optional | pe_order_implies_then | pe_order_restart; gboolean is_stonith = is_set(rsc->flags, pe_rsc_fence_device); custom_action_order(rsc, generate_op_key(rsc->id, RSC_STOP, 0), NULL, rsc, generate_op_key(rsc->id, RSC_START, 0), NULL, type, data_set); if (top->variant == pe_master || rsc->role > RSC_ROLE_SLAVE) { custom_action_order(rsc, generate_op_key(rsc->id, RSC_DEMOTE, 0), NULL, rsc, generate_op_key(rsc->id, RSC_STOP, 0), NULL, pe_order_implies_first_master, data_set); custom_action_order(rsc, generate_op_key(rsc->id, RSC_START, 0), NULL, rsc, generate_op_key(rsc->id, RSC_PROMOTE, 0), NULL, pe_order_runnable_left, data_set); } if (is_stonith == FALSE && is_set(data_set->flags, pe_flag_enable_unfencing) && is_set(rsc->flags, pe_rsc_needs_unfencing)) { /* Check if the node needs to be unfenced first */ node_t *node = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { action_t *unfence = pe_fence_op(node, "on", TRUE, data_set); crm_debug("Ordering any stops of %s before %s, and any starts after", rsc->id, unfence->uuid); /* * It would be more efficient to order clone resources once, * rather than order each instance, but ordering the instance * allows us to avoid unnecessary dependencies that might conflict * with user constraints. * * @TODO: This constraint can still produce a transition loop if the * resource has a stop scheduled on the node being unfenced, and * there is a user ordering constraint to start some other resource * (which will be ordered after the unfence) before stopping this * resource. An example is "start some slow-starting cloned service * before stopping an associated virtual IP that may be moving to * it": * stop this -> unfencing -> start that -> stop this */ custom_action_order(rsc, stop_key(rsc), NULL, NULL, strdup(unfence->uuid), unfence, pe_order_optional|pe_order_same_node, data_set); custom_action_order(NULL, strdup(unfence->uuid), unfence, rsc, start_key(rsc), NULL, pe_order_implies_then_on_node|pe_order_same_node, data_set); } } if (is_not_set(rsc->flags, pe_rsc_managed)) { pe_rsc_trace(rsc, "Skipping fencing constraints for unmanaged resource: %s", rsc->id); return; } { action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set); custom_action_order(rsc, stop_key(rsc), NULL, NULL, strdup(all_stopped->task), all_stopped, pe_order_implies_then | pe_order_runnable_left, data_set); } if (g_hash_table_size(rsc->utilization) > 0 && safe_str_neq(data_set->placement_strategy, "default")) { GHashTableIter iter; node_t *next = NULL; GListPtr gIter = NULL; pe_rsc_trace(rsc, "Creating utilization constraints for %s - strategy: %s", rsc->id, data_set->placement_strategy); for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { node_t *current = (node_t *) gIter->data; char *load_stopped_task = crm_concat(LOAD_STOPPED, current->details->uname, '_'); action_t *load_stopped = get_pseudo_op(load_stopped_task, data_set); if (load_stopped->node == NULL) { load_stopped->node = node_copy(current); update_action_flags(load_stopped, pe_action_optional | pe_action_clear, __FUNCTION__); } custom_action_order(rsc, stop_key(rsc), NULL, NULL, load_stopped_task, load_stopped, pe_order_load, data_set); } g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&next)) { char *load_stopped_task = crm_concat(LOAD_STOPPED, next->details->uname, '_'); action_t *load_stopped = get_pseudo_op(load_stopped_task, data_set); if (load_stopped->node == NULL) { load_stopped->node = node_copy(next); update_action_flags(load_stopped, pe_action_optional | pe_action_clear, __FUNCTION__); } custom_action_order(NULL, strdup(load_stopped_task), load_stopped, rsc, start_key(rsc), NULL, pe_order_load, data_set); custom_action_order(NULL, strdup(load_stopped_task), load_stopped, rsc, generate_op_key(rsc->id, RSC_MIGRATE, 0), NULL, pe_order_load, data_set); free(load_stopped_task); } } if (rsc->container) { resource_t *remote_rsc = NULL; /* find out if the container is associated with remote node connection resource */ if (rsc->container->is_remote_node) { remote_rsc = rsc->container; } else if (rsc->is_remote_node == FALSE) { remote_rsc = rsc_contains_remote_node(data_set, rsc->container); } /* if the container is a remote-node, force the resource within the container * instead of colocating the resource with the container. */ if (remote_rsc) { GHashTableIter iter; node_t *node = NULL; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if (node->details->remote_rsc != remote_rsc) { node->weight = -INFINITY; } } } else { crm_trace("Generating order and colocation rules for rsc %s with container %s", rsc->id, rsc->container->id); custom_action_order(rsc->container, generate_op_key(rsc->container->id, RSC_START, 0), NULL, rsc, generate_op_key(rsc->id, RSC_START, 0), NULL, pe_order_implies_then | pe_order_runnable_left, data_set); custom_action_order(rsc, generate_op_key(rsc->id, RSC_STOP, 0), NULL, rsc->container, generate_op_key(rsc->container->id, RSC_STOP, 0), NULL, pe_order_implies_first, data_set); rsc_colocation_new("resource-with-container", NULL, INFINITY, rsc, rsc->container, NULL, NULL, data_set); } } if (rsc->is_remote_node || is_stonith) { /* don't allow remote nodes to run stonith devices * or remote connection resources.*/ rsc_avoids_remote_nodes(rsc); } /* If this rsc is a remote connection resource associated * with a container ( which will most likely be a virtual guest ) * do not allow the container to live on any remote-nodes. * remote-nodes managing nested remote-nodes should not be allowed. */ if (rsc->is_remote_node && rsc->container) { rsc_avoids_remote_nodes(rsc->container); } } void native_rsc_colocation_lh(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint) { if (rsc_lh == NULL) { pe_err("rsc_lh was NULL for %s", constraint->id); return; } else if (constraint->rsc_rh == NULL) { pe_err("rsc_rh was NULL for %s", constraint->id); return; } pe_rsc_trace(rsc_lh, "Processing colocation constraint between %s and %s", rsc_lh->id, rsc_rh->id); rsc_rh->cmds->rsc_colocation_rh(rsc_lh, rsc_rh, constraint); } enum filter_colocation_res filter_colocation_constraint(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint, gboolean preview) { if (constraint->score == 0) { return influence_nothing; } /* rh side must be allocated before we can process constraint */ if (preview == FALSE && is_set(rsc_rh->flags, pe_rsc_provisional)) { return influence_nothing; } if ((constraint->role_lh >= RSC_ROLE_SLAVE) && rsc_lh->parent && rsc_lh->parent->variant == pe_master && is_not_set(rsc_lh->flags, pe_rsc_provisional)) { /* LH and RH resources have already been allocated, place the correct * priority oh LH rsc for the given multistate resource role */ return influence_rsc_priority; } if (preview == FALSE && is_not_set(rsc_lh->flags, pe_rsc_provisional)) { /* error check */ struct node_shared_s *details_lh; struct node_shared_s *details_rh; if ((constraint->score > -INFINITY) && (constraint->score < INFINITY)) { return influence_nothing; } details_rh = rsc_rh->allocated_to ? rsc_rh->allocated_to->details : NULL; details_lh = rsc_lh->allocated_to ? rsc_lh->allocated_to->details : NULL; if (constraint->score == INFINITY && details_lh != details_rh) { crm_err("%s and %s are both allocated" " but to different nodes: %s vs. %s", rsc_lh->id, rsc_rh->id, details_lh ? details_lh->uname : "n/a", details_rh ? details_rh->uname : "n/a"); } else if (constraint->score == -INFINITY && details_lh == details_rh) { crm_err("%s and %s are both allocated" " but to the SAME node: %s", rsc_lh->id, rsc_rh->id, details_rh ? details_rh->uname : "n/a"); } return influence_nothing; } if (constraint->score > 0 && constraint->role_lh != RSC_ROLE_UNKNOWN && constraint->role_lh != rsc_lh->next_role) { crm_trace("LH: Skipping constraint: \"%s\" state filter nextrole is %s", role2text(constraint->role_lh), role2text(rsc_lh->next_role)); return influence_nothing; } if (constraint->score > 0 && constraint->role_rh != RSC_ROLE_UNKNOWN && constraint->role_rh != rsc_rh->next_role) { crm_trace("RH: Skipping constraint: \"%s\" state filter", role2text(constraint->role_rh)); return FALSE; } if (constraint->score < 0 && constraint->role_lh != RSC_ROLE_UNKNOWN && constraint->role_lh == rsc_lh->next_role) { crm_trace("LH: Skipping -ve constraint: \"%s\" state filter", role2text(constraint->role_lh)); return influence_nothing; } if (constraint->score < 0 && constraint->role_rh != RSC_ROLE_UNKNOWN && constraint->role_rh == rsc_rh->next_role) { crm_trace("RH: Skipping -ve constraint: \"%s\" state filter", role2text(constraint->role_rh)); return influence_nothing; } return influence_rsc_location; } static void influence_priority(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint) { const char *rh_value = NULL; const char *lh_value = NULL; const char *attribute = "#id"; int score_multiplier = 1; if (constraint->node_attribute != NULL) { attribute = constraint->node_attribute; } if (!rsc_rh->allocated_to || !rsc_lh->allocated_to) { return; } lh_value = g_hash_table_lookup(rsc_lh->allocated_to->details->attrs, attribute); rh_value = g_hash_table_lookup(rsc_rh->allocated_to->details->attrs, attribute); if (!safe_str_eq(lh_value, rh_value)) { if(constraint->score == INFINITY && constraint->role_lh == RSC_ROLE_MASTER) { rsc_lh->priority = -INFINITY; } return; } if (constraint->role_rh && (constraint->role_rh != rsc_rh->next_role)) { return; } if (constraint->role_lh == RSC_ROLE_SLAVE) { score_multiplier = -1; } rsc_lh->priority = merge_weights(score_multiplier * constraint->score, rsc_lh->priority); } static void colocation_match(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint) { const char *tmp = NULL; const char *value = NULL; const char *attribute = "#id"; GHashTable *work = NULL; gboolean do_check = FALSE; GHashTableIter iter; node_t *node = NULL; if (constraint->node_attribute != NULL) { attribute = constraint->node_attribute; } if (rsc_rh->allocated_to) { value = g_hash_table_lookup(rsc_rh->allocated_to->details->attrs, attribute); do_check = TRUE; } else if (constraint->score < 0) { /* nothing to do: * anti-colocation with something thats not running */ return; } work = node_hash_dup(rsc_lh->allowed_nodes); g_hash_table_iter_init(&iter, work); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { tmp = g_hash_table_lookup(node->details->attrs, attribute); if (do_check && safe_str_eq(tmp, value)) { if (constraint->score < INFINITY) { pe_rsc_trace(rsc_lh, "%s: %s.%s += %d", constraint->id, rsc_lh->id, node->details->uname, constraint->score); node->weight = merge_weights(constraint->score, node->weight); } } else if (do_check == FALSE || constraint->score >= INFINITY) { pe_rsc_trace(rsc_lh, "%s: %s.%s -= %d (%s)", constraint->id, rsc_lh->id, node->details->uname, constraint->score, do_check ? "failed" : "unallocated"); node->weight = merge_weights(-constraint->score, node->weight); } } if (can_run_any(work) || constraint->score <= -INFINITY || constraint->score >= INFINITY) { g_hash_table_destroy(rsc_lh->allowed_nodes); rsc_lh->allowed_nodes = work; work = NULL; } else { static char score[33]; score2char_stack(constraint->score, score, sizeof(score)); pe_rsc_info(rsc_lh, "%s: Rolling back scores from %s (%d, %s)", rsc_lh->id, rsc_rh->id, do_check, score); } if (work) { g_hash_table_destroy(work); } } void native_rsc_colocation_rh(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint) { enum filter_colocation_res filter_results; CRM_ASSERT(rsc_lh); CRM_ASSERT(rsc_rh); filter_results = filter_colocation_constraint(rsc_lh, rsc_rh, constraint, FALSE); pe_rsc_trace(rsc_lh, "%sColocating %s with %s (%s, weight=%d, filter=%d)", constraint->score >= 0 ? "" : "Anti-", rsc_lh->id, rsc_rh->id, constraint->id, constraint->score, filter_results); switch (filter_results) { case influence_rsc_priority: influence_priority(rsc_lh, rsc_rh, constraint); break; case influence_rsc_location: pe_rsc_trace(rsc_lh, "%sColocating %s with %s (%s, weight=%d)", constraint->score >= 0 ? "" : "Anti-", rsc_lh->id, rsc_rh->id, constraint->id, constraint->score); colocation_match(rsc_lh, rsc_rh, constraint); break; case influence_nothing: default: return; } } static gboolean filter_rsc_ticket(resource_t * rsc_lh, rsc_ticket_t * rsc_ticket) { if (rsc_ticket->role_lh != RSC_ROLE_UNKNOWN && rsc_ticket->role_lh != rsc_lh->role) { pe_rsc_trace(rsc_lh, "LH: Skipping constraint: \"%s\" state filter", role2text(rsc_ticket->role_lh)); return FALSE; } return TRUE; } void rsc_ticket_constraint(resource_t * rsc_lh, rsc_ticket_t * rsc_ticket, pe_working_set_t * data_set) { if (rsc_ticket == NULL) { pe_err("rsc_ticket was NULL"); return; } if (rsc_lh == NULL) { pe_err("rsc_lh was NULL for %s", rsc_ticket->id); return; } if (rsc_ticket->ticket->granted && rsc_ticket->ticket->standby == FALSE) { return; } if (rsc_lh->children) { GListPtr gIter = rsc_lh->children; pe_rsc_trace(rsc_lh, "Processing ticket dependencies from %s", rsc_lh->id); for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; rsc_ticket_constraint(child_rsc, rsc_ticket, data_set); } return; } pe_rsc_trace(rsc_lh, "%s: Processing ticket dependency on %s (%s, %s)", rsc_lh->id, rsc_ticket->ticket->id, rsc_ticket->id, role2text(rsc_ticket->role_lh)); if (rsc_ticket->ticket->granted == FALSE && g_list_length(rsc_lh->running_on) > 0) { GListPtr gIter = NULL; switch (rsc_ticket->loss_policy) { case loss_ticket_stop: resource_location(rsc_lh, NULL, -INFINITY, "__loss_of_ticket__", data_set); break; case loss_ticket_demote: /*Promotion score will be set to -INFINITY in master_promotion_order() */ if (rsc_ticket->role_lh != RSC_ROLE_MASTER) { resource_location(rsc_lh, NULL, -INFINITY, "__loss_of_ticket__", data_set); } break; case loss_ticket_fence: if (filter_rsc_ticket(rsc_lh, rsc_ticket) == FALSE) { return; } resource_location(rsc_lh, NULL, -INFINITY, "__loss_of_ticket__", data_set); for (gIter = rsc_lh->running_on; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; pe_fence_node(data_set, node, "because deadman ticket was lost"); } break; case loss_ticket_freeze: if (filter_rsc_ticket(rsc_lh, rsc_ticket) == FALSE) { return; } if (g_list_length(rsc_lh->running_on) > 0) { clear_bit(rsc_lh->flags, pe_rsc_managed); set_bit(rsc_lh->flags, pe_rsc_block); } break; } } else if (rsc_ticket->ticket->granted == FALSE) { if (rsc_ticket->role_lh != RSC_ROLE_MASTER || rsc_ticket->loss_policy == loss_ticket_stop) { resource_location(rsc_lh, NULL, -INFINITY, "__no_ticket__", data_set); } } else if (rsc_ticket->ticket->standby) { if (rsc_ticket->role_lh != RSC_ROLE_MASTER || rsc_ticket->loss_policy == loss_ticket_stop) { resource_location(rsc_lh, NULL, -INFINITY, "__ticket_standby__", data_set); } } } enum pe_action_flags native_action_flags(action_t * action, node_t * node) { return action->flags; } enum pe_graph_flags native_update_actions(action_t * first, action_t * then, node_t * node, enum pe_action_flags flags, enum pe_action_flags filter, enum pe_ordering type) { /* flags == get_action_flags(first, then_node) called from update_action() */ enum pe_graph_flags changed = pe_graph_none; enum pe_action_flags then_flags = then->flags; enum pe_action_flags first_flags = first->flags; crm_trace( "Testing %s on %s (0x%.6x) with %s 0x%.6x", first->uuid, first->node ? first->node->details->uname : "[none]", first->flags, then->uuid, then->flags); if (type & pe_order_asymmetrical) { resource_t *then_rsc = then->rsc; enum rsc_role_e then_rsc_role = then_rsc ? then_rsc->fns->state(then_rsc, TRUE) : 0; if (!then_rsc) { /* ignore */ } else if ((then_rsc_role == RSC_ROLE_STOPPED) && safe_str_eq(then->task, RSC_STOP)) { /* ignore... if 'then' is supposed to be stopped after 'first', but * then is already stopped, there is nothing to be done when non-symmetrical. */ } else if ((then_rsc_role >= RSC_ROLE_STARTED) && safe_str_eq(then->task, RSC_START) && then->node && then_rsc->running_on && g_list_length(then_rsc->running_on) == 1 && then->node->details == ((node_t *) then_rsc->running_on->data)->details) { /* ignore... if 'then' is supposed to be started after 'first', but * then is already started, there is nothing to be done when non-symmetrical. */ } else if (!(first->flags & pe_action_runnable)) { /* prevent 'then' action from happening if 'first' is not runnable and * 'then' has not yet occurred. */ pe_clear_action_bit(then, pe_action_runnable); pe_clear_action_bit(then, pe_action_optional); pe_rsc_trace(then->rsc, "Unset optional and runnable on %s", then->uuid); } else { /* ignore... then is allowed to start/stop if it wants to. */ } } if (type & pe_order_implies_first) { if ((filter & pe_action_optional) && (flags & pe_action_optional) == 0) { pe_rsc_trace(first->rsc, "Unset optional on %s because of %s", first->uuid, then->uuid); pe_clear_action_bit(first, pe_action_optional); } if (is_set(flags, pe_action_migrate_runnable) && is_set(then->flags, pe_action_migrate_runnable) == FALSE && is_set(then->flags, pe_action_optional) == FALSE) { pe_rsc_trace(first->rsc, "Unset migrate runnable on %s because of %s", first->uuid, then->uuid); pe_clear_action_bit(first, pe_action_migrate_runnable); } } if (type & pe_order_implies_first_master) { if ((filter & pe_action_optional) && ((then->flags & pe_action_optional) == FALSE) && then->rsc && (then->rsc->role == RSC_ROLE_MASTER)) { pe_clear_action_bit(first, pe_action_optional); if (is_set(first->flags, pe_action_migrate_runnable) && is_set(then->flags, pe_action_migrate_runnable) == FALSE) { pe_rsc_trace(first->rsc, "Unset migrate runnable on %s because of %s", first->uuid, then->uuid); pe_clear_action_bit(first, pe_action_migrate_runnable); } pe_rsc_trace(then->rsc, "Unset optional on %s because of %s", first->uuid, then->uuid); } } if ((type & pe_order_implies_first_migratable) && is_set(filter, pe_action_optional)) { if (((then->flags & pe_action_migrate_runnable) == FALSE) || ((then->flags & pe_action_runnable) == FALSE)) { pe_rsc_trace(then->rsc, "Unset runnable on %s because %s is neither runnable or migratable", first->uuid, then->uuid); pe_clear_action_bit(first, pe_action_runnable); } if ((then->flags & pe_action_optional) == 0) { pe_rsc_trace(then->rsc, "Unset optional on %s because %s is not optional", first->uuid, then->uuid); pe_clear_action_bit(first, pe_action_optional); } } if ((type & pe_order_pseudo_left) && is_set(filter, pe_action_optional)) { if ((first->flags & pe_action_runnable) == FALSE) { pe_clear_action_bit(then, pe_action_migrate_runnable); pe_clear_action_bit(then, pe_action_pseudo); pe_rsc_trace(then->rsc, "Unset pseudo on %s because %s is not runnable", then->uuid, first->uuid); } } if (is_set(type, pe_order_runnable_left) && is_set(filter, pe_action_runnable) && is_set(then->flags, pe_action_runnable) && is_set(flags, pe_action_runnable) == FALSE) { pe_rsc_trace(then->rsc, "Unset runnable on %s because of %s", then->uuid, first->uuid); pe_clear_action_bit(then, pe_action_runnable); pe_clear_action_bit(then, pe_action_migrate_runnable); } if (is_set(type, pe_order_implies_then) && is_set(filter, pe_action_optional) && is_set(then->flags, pe_action_optional) && is_set(flags, pe_action_optional) == FALSE) { /* in this case, treat migrate_runnable as if first is optional */ if (is_set(first->flags, pe_action_migrate_runnable) == FALSE) { pe_rsc_trace(then->rsc, "Unset optional on %s because of %s", then->uuid, first->uuid); pe_clear_action_bit(then, pe_action_optional); } } if (is_set(type, pe_order_restart)) { const char *reason = NULL; CRM_ASSERT(first->rsc && first->rsc->variant == pe_native); CRM_ASSERT(then->rsc && then->rsc->variant == pe_native); if ((filter & pe_action_runnable) && (then->flags & pe_action_runnable) == 0 && (then->rsc->flags & pe_rsc_managed)) { reason = "shutdown"; } if ((filter & pe_action_optional) && (then->flags & pe_action_optional) == 0) { reason = "recover"; } if (reason && is_set(first->flags, pe_action_optional)) { if (is_set(first->flags, pe_action_runnable) || is_not_set(then->flags, pe_action_optional)) { pe_rsc_trace(first->rsc, "Handling %s: %s -> %s", reason, first->uuid, then->uuid); pe_clear_action_bit(first, pe_action_optional); } } if (reason && is_not_set(first->flags, pe_action_optional) && is_not_set(first->flags, pe_action_runnable)) { pe_rsc_trace(then->rsc, "Handling %s: %s -> %s", reason, first->uuid, then->uuid); pe_clear_action_bit(then, pe_action_runnable); } if (reason && is_not_set(first->flags, pe_action_optional) && is_set(first->flags, pe_action_migrate_runnable) && is_not_set(then->flags, pe_action_migrate_runnable)) { pe_clear_action_bit(first, pe_action_migrate_runnable); } } if (then_flags != then->flags) { changed |= pe_graph_updated_then; pe_rsc_trace(then->rsc, "Then: Flags for %s on %s are now 0x%.6x (was 0x%.6x) because of %s 0x%.6x", then->uuid, then->node ? then->node->details->uname : "[none]", then->flags, then_flags, first->uuid, first->flags); if(then->rsc && then->rsc->parent) { /* "X_stop then X_start" doesn't get handled for cloned groups unless we do this */ update_action(then); } } if (first_flags != first->flags) { changed |= pe_graph_updated_first; pe_rsc_trace(first->rsc, "First: Flags for %s on %s are now 0x%.6x (was 0x%.6x) because of %s 0x%.6x", first->uuid, first->node ? first->node->details->uname : "[none]", first->flags, first_flags, then->uuid, then->flags); } return changed; } void native_rsc_location(resource_t * rsc, rsc_to_node_t * constraint) { GListPtr gIter = NULL; GHashTableIter iter; node_t *node = NULL; if (constraint == NULL) { pe_err("Constraint is NULL"); return; } else if (rsc == NULL) { pe_err("LHS of rsc_to_node (%s) is NULL", constraint->id); return; } pe_rsc_trace(rsc, "Applying %s (%s) to %s", constraint->id, role2text(constraint->role_filter), rsc->id); /* take "lifetime" into account */ if (constraint->role_filter > RSC_ROLE_UNKNOWN && constraint->role_filter != rsc->next_role) { pe_rsc_debug(rsc, "Constraint (%s) is not active (role : %s vs. %s)", constraint->id, role2text(constraint->role_filter), role2text(rsc->next_role)); return; } else if (is_active(constraint) == FALSE) { pe_rsc_trace(rsc, "Constraint (%s) is not active", constraint->id); return; } if (constraint->node_list_rh == NULL) { pe_rsc_trace(rsc, "RHS of constraint %s is NULL", constraint->id); return; } for (gIter = constraint->node_list_rh; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; node_t *other_node = NULL; other_node = (node_t *) pe_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (other_node != NULL) { pe_rsc_trace(rsc, "%s + %s: %d + %d", node->details->uname, other_node->details->uname, node->weight, other_node->weight); other_node->weight = merge_weights(other_node->weight, node->weight); } else { other_node = node_copy(node); g_hash_table_insert(rsc->allowed_nodes, (gpointer) other_node->details->id, other_node); } if (other_node->rsc_discover_mode < constraint->discover_mode) { if (constraint->discover_mode == discover_exclusive) { rsc->exclusive_discover = TRUE; } /* exclusive > never > always... always is default */ other_node->rsc_discover_mode = constraint->discover_mode; } } g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { pe_rsc_trace(rsc, "%s + %s : %d", rsc->id, node->details->uname, node->weight); } } void native_expand(resource_t * rsc, pe_working_set_t * data_set) { GListPtr gIter = NULL; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "Processing actions from %s", rsc->id); for (gIter = rsc->actions; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; crm_trace("processing action %d for rsc=%s", action->id, rsc->id); graph_element_from_action(action, data_set); } for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; child_rsc->cmds->expand(child_rsc, data_set); } } #define log_change(fmt, args...) do { \ if(terminal) { \ printf(" * "fmt"\n", ##args); \ } else { \ crm_notice(fmt, ##args); \ } \ } while(0) #define STOP_SANITY_ASSERT(lineno) do { \ if(current && current->details->unclean) { \ /* It will be a pseudo op */ \ } else if(stop == NULL) { \ crm_err("%s:%d: No stop action exists for %s", __FUNCTION__, lineno, rsc->id); \ CRM_ASSERT(stop != NULL); \ } else if(is_set(stop->flags, pe_action_optional)) { \ crm_err("%s:%d: Action %s is still optional", __FUNCTION__, lineno, stop->uuid); \ CRM_ASSERT(is_not_set(stop->flags, pe_action_optional)); \ } \ } while(0) void LogActions(resource_t * rsc, pe_working_set_t * data_set, gboolean terminal) { node_t *next = NULL; node_t *current = NULL; action_t *stop = NULL; action_t *start = NULL; action_t *demote = NULL; action_t *promote = NULL; char *key = NULL; gboolean moving = FALSE; GListPtr possible_matches = NULL; if (rsc->children) { GListPtr gIter = NULL; for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; LogActions(child_rsc, data_set, terminal); } return; } next = rsc->allocated_to; if (rsc->running_on) { if (g_list_length(rsc->running_on) > 1 && rsc->partial_migration_source) { current = rsc->partial_migration_source; } else { current = rsc->running_on->data; } if (rsc->role == RSC_ROLE_STOPPED) { /* * This can occur when resources are being recovered * We fiddle with the current role in native_create_actions() */ rsc->role = RSC_ROLE_STARTED; } } if (current == NULL && is_set(rsc->flags, pe_rsc_orphan)) { /* Don't log stopped orphans */ return; } if (is_not_set(rsc->flags, pe_rsc_managed) || (current == NULL && next == NULL)) { pe_rsc_info(rsc, "Leave %s\t(%s%s)", rsc->id, role2text(rsc->role), is_not_set(rsc->flags, pe_rsc_managed) ? " unmanaged" : ""); return; } if (current != NULL && next != NULL && safe_str_neq(current->details->id, next->details->id)) { moving = TRUE; } key = start_key(rsc); possible_matches = find_actions(rsc->actions, key, next); free(key); if (possible_matches) { start = possible_matches->data; g_list_free(possible_matches); } key = stop_key(rsc); if(start == NULL || is_set(start->flags, pe_action_runnable) == FALSE) { possible_matches = find_actions(rsc->actions, key, NULL); } else { possible_matches = find_actions(rsc->actions, key, current); } free(key); if (possible_matches) { stop = possible_matches->data; g_list_free(possible_matches); } key = promote_key(rsc); possible_matches = find_actions(rsc->actions, key, next); free(key); if (possible_matches) { promote = possible_matches->data; g_list_free(possible_matches); } key = demote_key(rsc); possible_matches = find_actions(rsc->actions, key, next); free(key); if (possible_matches) { demote = possible_matches->data; g_list_free(possible_matches); } if (rsc->role == rsc->next_role) { action_t *migrate_to = NULL; key = generate_op_key(rsc->id, RSC_MIGRATED, 0); possible_matches = find_actions(rsc->actions, key, next); free(key); if (possible_matches) { migrate_to = possible_matches->data; } CRM_CHECK(next != NULL,); if (next == NULL) { } else if (migrate_to && is_set(migrate_to->flags, pe_action_runnable) && current) { log_change("Migrate %s\t(%s %s -> %s)", rsc->id, role2text(rsc->role), current->details->uname, next->details->uname); } else if (is_set(rsc->flags, pe_rsc_reload)) { log_change("Reload %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } else if (start == NULL || is_set(start->flags, pe_action_optional)) { pe_rsc_info(rsc, "Leave %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } else if (start && is_set(start->flags, pe_action_runnable) == FALSE) { log_change("Stop %s\t(%s %s%s)", rsc->id, role2text(rsc->role), current?current->details->uname:"N/A", stop && is_not_set(stop->flags, pe_action_runnable) ? " - blocked" : ""); STOP_SANITY_ASSERT(__LINE__); } else if (moving && current) { log_change("%s %s\t(%s %s -> %s)", is_set(rsc->flags, pe_rsc_failed) ? "Recover" : "Move ", rsc->id, role2text(rsc->role), current->details->uname, next->details->uname); } else if (is_set(rsc->flags, pe_rsc_failed)) { log_change("Recover %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); STOP_SANITY_ASSERT(__LINE__); } else { log_change("Restart %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); /* STOP_SANITY_ASSERT(__LINE__); False positive for migrate-fail-7 */ } g_list_free(possible_matches); return; } if (rsc->role > RSC_ROLE_SLAVE && rsc->role > rsc->next_role) { CRM_CHECK(current != NULL,); if (current != NULL) { gboolean allowed = FALSE; if (demote != NULL && (demote->flags & pe_action_runnable)) { allowed = TRUE; } log_change("Demote %s\t(%s -> %s %s%s)", rsc->id, role2text(rsc->role), role2text(rsc->next_role), current->details->uname, allowed ? "" : " - blocked"); if (stop != NULL && is_not_set(stop->flags, pe_action_optional) && rsc->next_role > RSC_ROLE_STOPPED && moving == FALSE) { if (is_set(rsc->flags, pe_rsc_failed)) { log_change("Recover %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); STOP_SANITY_ASSERT(__LINE__); } else if (is_set(rsc->flags, pe_rsc_reload)) { log_change("Reload %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } else { log_change("Restart %s\t(%s %s)", rsc->id, role2text(rsc->next_role), next->details->uname); STOP_SANITY_ASSERT(__LINE__); } } } } else if (rsc->next_role == RSC_ROLE_STOPPED) { GListPtr gIter = NULL; CRM_CHECK(current != NULL,); key = stop_key(rsc); for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; action_t *stop_op = NULL; gboolean allowed = FALSE; possible_matches = find_actions(rsc->actions, key, node); if (possible_matches) { stop_op = possible_matches->data; g_list_free(possible_matches); } if (stop_op && (stop_op->flags & pe_action_runnable)) { STOP_SANITY_ASSERT(__LINE__); allowed = TRUE; } log_change("Stop %s\t(%s%s)", rsc->id, node->details->uname, allowed ? "" : " - blocked"); } free(key); } if (moving) { log_change("Move %s\t(%s %s -> %s)", rsc->id, role2text(rsc->next_role), current->details->uname, next->details->uname); STOP_SANITY_ASSERT(__LINE__); } if (rsc->role == RSC_ROLE_STOPPED) { gboolean allowed = FALSE; if (start && (start->flags & pe_action_runnable)) { allowed = TRUE; } CRM_CHECK(next != NULL,); if (next != NULL) { log_change("Start %s\t(%s%s)", rsc->id, next->details->uname, allowed ? "" : " - blocked"); } if (allowed == FALSE) { return; } } if (rsc->next_role > RSC_ROLE_SLAVE && rsc->role < rsc->next_role) { gboolean allowed = FALSE; CRM_LOG_ASSERT(next); if (stop != NULL && is_not_set(stop->flags, pe_action_optional) && rsc->role > RSC_ROLE_STOPPED) { if (is_set(rsc->flags, pe_rsc_failed)) { log_change("Recover %s\t(%s %s)", rsc->id, role2text(rsc->role), next?next->details->uname:NULL); STOP_SANITY_ASSERT(__LINE__); } else if (is_set(rsc->flags, pe_rsc_reload)) { log_change("Reload %s\t(%s %s)", rsc->id, role2text(rsc->role), next?next->details->uname:NULL); STOP_SANITY_ASSERT(__LINE__); } else { log_change("Restart %s\t(%s %s)", rsc->id, role2text(rsc->role), next?next->details->uname:NULL); STOP_SANITY_ASSERT(__LINE__); } } if (promote && (promote->flags & pe_action_runnable)) { allowed = TRUE; } log_change("Promote %s\t(%s -> %s %s%s)", rsc->id, role2text(rsc->role), role2text(rsc->next_role), next?next->details->uname:NULL, allowed ? "" : " - blocked"); } } gboolean StopRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set) { GListPtr gIter = NULL; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "%s", rsc->id); for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { node_t *current = (node_t *) gIter->data; action_t *stop; if (rsc->partial_migration_target) { if (rsc->partial_migration_target->details == current->details) { pe_rsc_trace(rsc, "Filtered %s -> %s %s", current->details->uname, next->details->uname, rsc->id); continue; } else { pe_rsc_trace(rsc, "Forced on %s %s", current->details->uname, rsc->id); optional = FALSE; } } pe_rsc_trace(rsc, "%s on %s", rsc->id, current->details->uname); stop = stop_action(rsc, current, optional); if (is_not_set(rsc->flags, pe_rsc_managed)) { update_action_flags(stop, pe_action_runnable | pe_action_clear, __FUNCTION__); } if (is_set(data_set->flags, pe_flag_remove_after_stop)) { DeleteRsc(rsc, current, optional, data_set); } } return TRUE; } gboolean StartRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set) { action_t *start = NULL; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "%s on %s %d", rsc->id, next ? next->details->uname : "N/A", optional); start = start_action(rsc, next, TRUE); if (is_set(start->flags, pe_action_runnable) && optional == FALSE) { update_action_flags(start, pe_action_optional | pe_action_clear, __FUNCTION__); } return TRUE; } gboolean PromoteRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set) { char *key = NULL; GListPtr gIter = NULL; gboolean runnable = TRUE; GListPtr action_list = NULL; CRM_ASSERT(rsc); CRM_CHECK(next != NULL, return FALSE); pe_rsc_trace(rsc, "%s on %s", rsc->id, next->details->uname); key = start_key(rsc); action_list = find_actions_exact(rsc->actions, key, next); free(key); for (gIter = action_list; gIter != NULL; gIter = gIter->next) { action_t *start = (action_t *) gIter->data; if (is_set(start->flags, pe_action_runnable) == FALSE) { runnable = FALSE; } } g_list_free(action_list); if (runnable) { promote_action(rsc, next, optional); return TRUE; } pe_rsc_debug(rsc, "%s\tPromote %s (canceled)", next->details->uname, rsc->id); key = promote_key(rsc); action_list = find_actions_exact(rsc->actions, key, next); free(key); for (gIter = action_list; gIter != NULL; gIter = gIter->next) { action_t *promote = (action_t *) gIter->data; update_action_flags(promote, pe_action_runnable | pe_action_clear, __FUNCTION__); } g_list_free(action_list); return TRUE; } gboolean DemoteRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set) { GListPtr gIter = NULL; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "%s", rsc->id); /* CRM_CHECK(rsc->next_role == RSC_ROLE_SLAVE, return FALSE); */ for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { node_t *current = (node_t *) gIter->data; pe_rsc_trace(rsc, "%s on %s", rsc->id, next ? next->details->uname : "N/A"); demote_action(rsc, current, optional); } return TRUE; } gboolean RoleError(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set) { CRM_ASSERT(rsc); crm_err("%s on %s", rsc->id, next ? next->details->uname : "N/A"); CRM_CHECK(FALSE, return FALSE); return FALSE; } gboolean NullOp(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set) { CRM_ASSERT(rsc); pe_rsc_trace(rsc, "%s", rsc->id); return FALSE; } gboolean DeleteRsc(resource_t * rsc, node_t * node, gboolean optional, pe_working_set_t * data_set) { if (is_set(rsc->flags, pe_rsc_failed)) { pe_rsc_trace(rsc, "Resource %s not deleted from %s: failed", rsc->id, node->details->uname); return FALSE; } else if (node == NULL) { pe_rsc_trace(rsc, "Resource %s not deleted: NULL node", rsc->id); return FALSE; } else if (node->details->unclean || node->details->online == FALSE) { pe_rsc_trace(rsc, "Resource %s not deleted from %s: unrunnable", rsc->id, node->details->uname); return FALSE; } crm_notice("Removing %s from %s", rsc->id, node->details->uname); delete_action(rsc, node, optional); new_rsc_order(rsc, RSC_STOP, rsc, RSC_DELETE, optional ? pe_order_implies_then : pe_order_optional, data_set); new_rsc_order(rsc, RSC_DELETE, rsc, RSC_START, optional ? pe_order_implies_then : pe_order_optional, data_set); return TRUE; } #include <../lib/pengine/unpack.h> #define set_char(x) last_rsc_id[lpc] = x; complete = TRUE; static char * increment_clone(char *last_rsc_id) { int lpc = 0; int len = 0; char *tmp = NULL; gboolean complete = FALSE; CRM_CHECK(last_rsc_id != NULL, return NULL); if (last_rsc_id != NULL) { len = strlen(last_rsc_id); } lpc = len - 1; while (complete == FALSE && lpc > 0) { switch (last_rsc_id[lpc]) { case 0: lpc--; break; case '0': set_char('1'); break; case '1': set_char('2'); break; case '2': set_char('3'); break; case '3': set_char('4'); break; case '4': set_char('5'); break; case '5': set_char('6'); break; case '6': set_char('7'); break; case '7': set_char('8'); break; case '8': set_char('9'); break; case '9': last_rsc_id[lpc] = '0'; lpc--; break; case ':': tmp = last_rsc_id; last_rsc_id = calloc(1, len + 2); memcpy(last_rsc_id, tmp, len); last_rsc_id[++lpc] = '1'; last_rsc_id[len] = '0'; last_rsc_id[len + 1] = 0; complete = TRUE; free(tmp); break; default: crm_err("Unexpected char: %c (%d)", last_rsc_id[lpc], lpc); return NULL; break; } } return last_rsc_id; } static node_t * probe_grouped_clone(resource_t * rsc, node_t * node, pe_working_set_t * data_set) { node_t *running = NULL; resource_t *top = uber_parent(rsc); if (running == NULL && is_set(top->flags, pe_rsc_unique) == FALSE) { /* Annoyingly we also need to check any other clone instances * Clumsy, but it will work. * * An alternative would be to update known_on for every peer * during process_rsc_state() * * This code desperately needs optimization * ptest -x with 100 nodes, 100 clones and clone-max=10: * No probes O(25s) * Detection without clone loop O(3m) * Detection with clone loop O(8m) ptest[32211]: 2010/02/18_14:27:55 CRIT: stage5: Probing for unknown resources ptest[32211]: 2010/02/18_14:33:39 CRIT: stage5: Done ptest[32211]: 2010/02/18_14:35:05 CRIT: stage7: Updating action states ptest[32211]: 2010/02/18_14:35:05 CRIT: stage7: Done */ char *clone_id = clone_zero(rsc->id); resource_t *peer = pe_find_resource(top->children, clone_id); while (peer && running == NULL) { running = pe_hash_table_lookup(peer->known_on, node->details->id); if (running != NULL) { /* we already know the status of the resource on this node */ pe_rsc_trace(rsc, "Skipping active clone: %s", rsc->id); free(clone_id); return running; } clone_id = increment_clone(clone_id); peer = pe_find_resource(data_set->resources, clone_id); } free(clone_id); } return running; } gboolean native_create_probe(resource_t * rsc, node_t * node, action_t * complete, gboolean force, pe_working_set_t * data_set) { enum pe_ordering flags = pe_order_optional; char *key = NULL; action_t *probe = NULL; node_t *running = NULL; node_t *allowed = NULL; resource_t *top = uber_parent(rsc); static const char *rc_master = NULL; static const char *rc_inactive = NULL; if (rc_inactive == NULL) { rc_inactive = crm_itoa(PCMK_OCF_NOT_RUNNING); rc_master = crm_itoa(PCMK_OCF_RUNNING_MASTER); } CRM_CHECK(node != NULL, return FALSE); if (force == FALSE && is_not_set(data_set->flags, pe_flag_startup_probes)) { pe_rsc_trace(rsc, "Skipping active resource detection for %s", rsc->id); return FALSE; } else if (force == FALSE && is_container_remote_node(node)) { pe_rsc_trace(rsc, "Skipping active resource detection for %s on container %s", rsc->id, node->details->id); return FALSE; } if (is_remote_node(node)) { const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); - if (safe_str_eq(class, "stonith")) { + if (safe_str_eq(class, PCMK_RESOURCE_CLASS_STONITH)) { pe_rsc_trace(rsc, "Skipping probe for %s on node %s, remote-nodes do not run stonith agents.", rsc->id, node->details->id); return FALSE; } else if (rsc_contains_remote_node(data_set, rsc)) { pe_rsc_trace(rsc, "Skipping probe for %s on node %s, remote-nodes can not run resources that contain connection resources.", rsc->id, node->details->id); return FALSE; } else if (rsc->is_remote_node) { pe_rsc_trace(rsc, "Skipping probe for %s on node %s, remote-nodes can not run connection resources", rsc->id, node->details->id); return FALSE; } } if (rsc->children) { GListPtr gIter = NULL; gboolean any_created = FALSE; for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; any_created = child_rsc->cmds->create_probe(child_rsc, node, complete, force, data_set) || any_created; } return any_created; } else if (rsc->container) { pe_rsc_trace(rsc, "Skipping %s: it is within container %s", rsc->id, rsc->container->id); return FALSE; } if (is_set(rsc->flags, pe_rsc_orphan)) { pe_rsc_trace(rsc, "Skipping orphan: %s", rsc->id); return FALSE; } running = g_hash_table_lookup(rsc->known_on, node->details->id); if (running == NULL && is_set(rsc->flags, pe_rsc_unique) == FALSE) { /* Anonymous clones */ if (rsc->parent == top) { running = g_hash_table_lookup(rsc->parent->known_on, node->details->id); } else { /* Grouped anonymous clones need extra special handling */ running = probe_grouped_clone(rsc, node, data_set); } } if (force == FALSE && running != NULL) { /* we already know the status of the resource on this node */ pe_rsc_trace(rsc, "Skipping active: %s on %s", rsc->id, node->details->uname); return FALSE; } allowed = g_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (rsc->exclusive_discover || top->exclusive_discover) { if (allowed == NULL) { /* exclusive discover is enabled and this node is not in the allowed list. */ return FALSE; } else if (allowed->rsc_discover_mode != discover_exclusive) { /* exclusive discover is enabled and this node is not marked * as a node this resource should be discovered on */ return FALSE; } } if (allowed && allowed->rsc_discover_mode == discover_never) { /* this resource is marked as not needing to be discovered on this node */ return FALSE; } key = generate_op_key(rsc->id, RSC_STATUS, 0); probe = custom_action(rsc, key, RSC_STATUS, node, FALSE, TRUE, data_set); update_action_flags(probe, pe_action_optional | pe_action_clear, __FUNCTION__); /* If enabled, require unfencing before probing any fence devices * but ensure it happens after any resources that require * unfencing have been probed. * * Doing it the other way (requiring unfencing after probing * resources that need it) would result in the node being * unfenced, and all its resources being stopped, whenever a new * resource is added. Which would be highly suboptimal. * * So essentially, at the point the fencing device(s) have been * probed, we know the state of all resources that require * unfencing and that unfencing occurred. */ if(is_set(rsc->flags, pe_rsc_fence_device) && is_set(data_set->flags, pe_flag_enable_unfencing)) { trigger_unfencing(NULL, node, "node discovery", probe, data_set); probe->priority = INFINITY; /* Ensure this runs if unfencing succeeds */ } else if(is_set(rsc->flags, pe_rsc_needs_unfencing)) { action_t *unfence = pe_fence_op(node, "on", TRUE, data_set); order_actions(probe, unfence, pe_order_optional); } /* * We need to know if it's running_on (not just known_on) this node * to correctly determine the target rc. */ running = pe_find_node_id(rsc->running_on, node->details->id); if (running == NULL) { add_hash_param(probe->meta, XML_ATTR_TE_TARGET_RC, rc_inactive); } else if (rsc->role == RSC_ROLE_MASTER) { add_hash_param(probe->meta, XML_ATTR_TE_TARGET_RC, rc_master); } crm_debug("Probing %s on %s (%s) %d %p", rsc->id, node->details->uname, role2text(rsc->role), is_set(probe->flags, pe_action_runnable), rsc->running_on); if(is_set(rsc->flags, pe_rsc_fence_device) && is_set(data_set->flags, pe_flag_enable_unfencing)) { top = rsc; } else if (top->variant < pe_clone) { top = rsc; } else { crm_trace("Probing %s on %s (%s) as %s", rsc->id, node->details->uname, role2text(rsc->role), top->id); } if(is_not_set(probe->flags, pe_action_runnable) && rsc->running_on == NULL) { /* Prevent the start from occuring if rsc isn't active, but * don't cause it to stop if it was active already */ flags |= pe_order_runnable_left; } custom_action_order(rsc, NULL, probe, top, generate_op_key(top->id, RSC_START, 0), NULL, flags, data_set); /* Before any reloads, if they exist */ custom_action_order(rsc, NULL, probe, top, reload_key(rsc), NULL, pe_order_optional, data_set); if (node->details->shutdown == FALSE) { custom_action_order(rsc, NULL, probe, rsc, generate_op_key(rsc->id, RSC_STOP, 0), NULL, pe_order_optional, data_set); } if(is_set(rsc->flags, pe_rsc_fence_device) && is_set(data_set->flags, pe_flag_enable_unfencing)) { /* Normally rsc.start depends on probe complete which depends * on rsc.probe. But this can't be the case in this scenario as * it would create graph loops. * * So instead we explicitly order 'rsc.probe then rsc.start' */ } else { order_actions(probe, complete, pe_order_implies_then); } return TRUE; } static void native_start_constraints(resource_t * rsc, action_t * stonith_op, pe_working_set_t * data_set) { node_t *target; GListPtr gIter = NULL; action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set); action_t *stonith_done = get_pseudo_op(STONITH_DONE, data_set); CRM_CHECK(stonith_op && stonith_op->node, return); target = stonith_op->node; for (gIter = rsc->actions; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; if(action->needs == rsc_req_nothing) { /* Anything other than start or promote requires nothing */ } else if (action->needs == rsc_req_stonith) { order_actions(stonith_done, action, pe_order_optional); } else if (safe_str_eq(action->task, RSC_START) && NULL == pe_hash_table_lookup(rsc->known_on, target->details->id)) { /* if known == NULL, then we don't know if * the resource is active on the node * we're about to shoot * * in this case, regardless of action->needs, * the only safe option is to wait until * the node is shot before doing anything * to with the resource * * it's analogous to waiting for all the probes * for rscX to complete before starting rscX * * the most likely explanation is that the * DC died and took its status with it */ pe_rsc_debug(rsc, "Ordering %s after %s recovery", action->uuid, target->details->uname); order_actions(all_stopped, action, pe_order_optional | pe_order_runnable_left); } } } static void native_stop_constraints(resource_t * rsc, action_t * stonith_op, pe_working_set_t * data_set) { char *key = NULL; GListPtr gIter = NULL; GListPtr action_list = NULL; action_t *start = NULL; resource_t *top = uber_parent(rsc); node_t *target; CRM_CHECK(stonith_op && stonith_op->node, return); target = stonith_op->node; /* Check whether the resource has a pending start action */ start = find_first_action(rsc->actions, NULL, CRMD_ACTION_START, NULL); /* Get a list of stop actions potentially implied by the fencing */ key = stop_key(rsc); action_list = find_actions(rsc->actions, key, target); free(key); for (gIter = action_list; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; if (is_set(rsc->flags, pe_rsc_failed)) { crm_notice("Stop of failed resource %s is implicit after %s is fenced", rsc->id, target->details->uname); } else { crm_info("%s is implicit after %s is fenced", action->uuid, target->details->uname); } /* The stop would never complete and is now implied by the fencing, * so convert it into a pseudo-action. */ update_action_flags(action, pe_action_pseudo, __FUNCTION__); update_action_flags(action, pe_action_runnable, __FUNCTION__); update_action_flags(action, pe_action_implied_by_stonith, __FUNCTION__); if(start == NULL || start->needs > rsc_req_quorum) { enum pe_ordering flags = pe_order_optional; action_t *parent_stop = find_first_action(top->actions, NULL, RSC_STOP, NULL); if (target->details->remote_rsc) { /* User constraints must not order a resource in a guest node * relative to the guest node container resource. This flag * marks constraints as generated by the cluster and thus * immune to that check. */ flags |= pe_order_preserve; } order_actions(stonith_op, action, flags); order_actions(stonith_op, parent_stop, flags); } if (is_set(rsc->flags, pe_rsc_notify)) { /* Create a second notification that will be delivered * immediately after the node is fenced * * Basic problem: * - C is a clone active on the node to be shot and stopping on another * - R is a resource that depends on C * * + C.stop depends on R.stop * + C.stopped depends on STONITH * + C.notify depends on C.stopped * + C.healthy depends on C.notify * + R.stop depends on C.healthy * * The extra notification here changes * + C.healthy depends on C.notify * into: * + C.healthy depends on C.notify' * + C.notify' depends on STONITH' * thus breaking the loop */ create_secondary_notification(action, rsc, stonith_op, data_set); } /* From Bug #1601, successful fencing must be an input to a failed resources stop action. However given group(rA, rB) running on nodeX and B.stop has failed, A := stop healthy resource (rA.stop) B := stop failed resource (pseudo operation B.stop) C := stonith nodeX A requires B, B requires C, C requires A This loop would prevent the cluster from making progress. This block creates the "C requires A" dependency and therefore must (at least for now) be disabled. Instead, run the block above and treat all resources on nodeX as B would be (marked as a pseudo op depending on the STONITH). TODO: Break the "A requires B" dependency in update_action() and re-enable this block } else if(is_stonith == FALSE) { crm_info("Moving healthy resource %s" " off %s before fencing", rsc->id, node->details->uname); * stop healthy resources before the * stonith op * custom_action_order( rsc, stop_key(rsc), NULL, NULL,strdup(CRM_OP_FENCE),stonith_op, pe_order_optional, data_set); */ } g_list_free(action_list); /* Get a list of demote actions potentially implied by the fencing */ key = demote_key(rsc); action_list = find_actions(rsc->actions, key, target); free(key); for (gIter = action_list; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; if (action->node->details->online == FALSE || action->node->details->unclean == TRUE || is_set(rsc->flags, pe_rsc_failed)) { if (is_set(rsc->flags, pe_rsc_failed)) { pe_rsc_info(rsc, "Demote of failed resource %s is implicit after %s is fenced", rsc->id, target->details->uname); } else { pe_rsc_info(rsc, "%s is implicit after %s is fenced", action->uuid, target->details->uname); } /* The demote would never complete and is now implied by the * fencing, so convert it into a pseudo-action. */ update_action_flags(action, pe_action_pseudo, __FUNCTION__); update_action_flags(action, pe_action_runnable, __FUNCTION__); if (start == NULL || start->needs > rsc_req_quorum) { order_actions(stonith_op, action, pe_order_preserve|pe_order_optional); } } } g_list_free(action_list); } void rsc_stonith_ordering(resource_t * rsc, action_t * stonith_op, pe_working_set_t * data_set) { if (rsc->children) { GListPtr gIter = NULL; for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; rsc_stonith_ordering(child_rsc, stonith_op, data_set); } } else if (is_not_set(rsc->flags, pe_rsc_managed)) { pe_rsc_trace(rsc, "Skipping fencing constraints for unmanaged resource: %s", rsc->id); } else { native_start_constraints(rsc, stonith_op, data_set); native_stop_constraints(rsc, stonith_op, data_set); } } enum stack_activity { stack_stable = 0, stack_starting = 1, stack_stopping = 2, stack_middle = 4, }; static action_t * get_first_named_action(resource_t * rsc, const char *action, gboolean only_valid, node_t * current) { action_t *a = NULL; GListPtr action_list = NULL; char *key = generate_op_key(rsc->id, action, 0); action_list = find_actions(rsc->actions, key, current); if (action_list == NULL || action_list->data == NULL) { crm_trace("%s: no %s action", rsc->id, action); free(key); return NULL; } a = action_list->data; g_list_free(action_list); if (only_valid && is_set(a->flags, pe_action_pseudo)) { crm_trace("%s: pseudo", key); a = NULL; } else if (only_valid && is_not_set(a->flags, pe_action_runnable)) { crm_trace("%s: runnable", key); a = NULL; } free(key); return a; } void ReloadRsc(resource_t * rsc, node_t *node, pe_working_set_t * data_set) { GListPtr gIter = NULL; action_t *other = NULL; action_t *reload = NULL; if (rsc->children) { for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; ReloadRsc(child_rsc, node, data_set); } return; } else if (rsc->variant > pe_native) { /* Complex resource with no children */ return; } else if (is_not_set(rsc->flags, pe_rsc_managed)) { pe_rsc_trace(rsc, "%s: unmanaged", rsc->id); return; } else if (is_set(rsc->flags, pe_rsc_failed) || is_set(rsc->flags, pe_rsc_start_pending)) { pe_rsc_trace(rsc, "%s: general resource state: flags=0x%.16llx", rsc->id, rsc->flags); stop_action(rsc, node, FALSE); /* Force a full restart, overkill? */ return; } else if (node == NULL) { pe_rsc_trace(rsc, "%s: not active", rsc->id); return; } pe_rsc_trace(rsc, "Processing %s", rsc->id); set_bit(rsc->flags, pe_rsc_reload); reload = custom_action( rsc, reload_key(rsc), CRMD_ACTION_RELOAD, node, FALSE, TRUE, data_set); /* stop = stop_action(rsc, node, optional); */ other = get_first_named_action(rsc, RSC_STOP, TRUE, node); if (other != NULL) { order_actions(reload, other, pe_order_optional); } other = get_first_named_action(rsc, RSC_DEMOTE, TRUE, node); if (other != NULL) { order_actions(reload, other, pe_order_optional); } } void native_append_meta(resource_t * rsc, xmlNode * xml) { char *value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION); resource_t *iso_parent, *last_parent, *parent; if (value) { char *name = NULL; name = crm_meta_name(XML_RSC_ATTR_INCARNATION); crm_xml_add(xml, name, value); free(name); } value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_REMOTE_NODE); if (value) { char *name = NULL; name = crm_meta_name(XML_RSC_ATTR_REMOTE_NODE); crm_xml_add(xml, name, value); free(name); } for (parent = rsc; parent != NULL; parent = parent->parent) { if (parent->container) { crm_xml_add(xml, CRM_META"_"XML_RSC_ATTR_CONTAINER, parent->container->id); } } last_parent = iso_parent = rsc; while (iso_parent != NULL) { char *name = NULL; char *iso = NULL; if (iso_parent->isolation_wrapper == NULL) { last_parent = iso_parent; iso_parent = iso_parent->parent; continue; } /* name of wrapper script this resource is routed through. */ name = crm_meta_name(XML_RSC_ATTR_ISOLATION_WRAPPER); crm_xml_add(xml, name, iso_parent->isolation_wrapper); free(name); /* instance name for isolated environment */ name = crm_meta_name(XML_RSC_ATTR_ISOLATION_INSTANCE); if (iso_parent->variant >= pe_clone) { /* if isolation is set at the clone/master level, we have to * give this resource the unique isolation instance associated * with the clone child (last_parent)*/ /* Example: cloned group. group is container * clone myclone - iso_parent * group mygroup - last_parent (this is the iso environment) * rsc myrsc1 - rsc * rsc myrsc2 * The group is what is isolated in example1. We have to make * sure myrsc1 and myrsc2 launch in the same isolated environment. * * Example: cloned primitives. rsc primitive is container * clone myclone iso_parent * rsc myrsc1 - last_parent == rsc (this is the iso environment) * The individual cloned primitive instances are isolated */ value = g_hash_table_lookup(last_parent->meta, XML_RSC_ATTR_INCARNATION); CRM_ASSERT(value != NULL); iso = crm_concat(crm_element_value(last_parent->xml, XML_ATTR_ID), value, '_'); crm_xml_add(xml, name, iso); free(iso); } else { /* * Example: cloned group of containers * clone myclone * group mygroup * rsc myrsc1 - iso_parent (this is the iso environment) * rsc myrsc2 * * Example: group of containers * group mygroup * rsc myrsc1 - iso_parent (this is the iso environment) * rsc myrsc2 * * Example: group is container * group mygroup - iso_parent ( this is iso environment) * rsc myrsc1 * rsc myrsc2 * * Example: single primitive * rsc myrsc1 - iso_parent (this is the iso environment) */ value = g_hash_table_lookup(iso_parent->meta, XML_RSC_ATTR_INCARNATION); if (value) { crm_xml_add(xml, name, iso_parent->id); iso = crm_concat(crm_element_value(iso_parent->xml, XML_ATTR_ID), value, '_'); crm_xml_add(xml, name, iso); free(iso); } else { crm_xml_add(xml, name, iso_parent->id); } } free(name); break; } } diff --git a/tools/crm_resource_print.c b/tools/crm_resource_print.c index 5e84723302..65841e5a2d 100644 --- a/tools/crm_resource_print.c +++ b/tools/crm_resource_print.c @@ -1,397 +1,397 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include bool print_pending = TRUE; #define cons_string(x) x?x:"NA" void cli_resource_print_cts_constraints(pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; xmlNode *lifetime = NULL; xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, data_set->input); for (xml_obj = __xml_first_child(cib_constraints); xml_obj != NULL; xml_obj = __xml_next(xml_obj)) { const char *id = crm_element_value(xml_obj, XML_ATTR_ID); if (id == NULL) { continue; } lifetime = first_named_child(xml_obj, "lifetime"); if (test_ruleset(lifetime, NULL, data_set->now) == FALSE) { continue; } if (safe_str_eq(XML_CONS_TAG_RSC_DEPEND, crm_element_name(xml_obj))) { printf("Constraint %s %s %s %s %s %s %s\n", crm_element_name(xml_obj), cons_string(crm_element_value(xml_obj, XML_ATTR_ID)), cons_string(crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE)), cons_string(crm_element_value(xml_obj, XML_COLOC_ATTR_TARGET)), cons_string(crm_element_value(xml_obj, XML_RULE_ATTR_SCORE)), cons_string(crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE_ROLE)), cons_string(crm_element_value(xml_obj, XML_COLOC_ATTR_TARGET_ROLE))); } else if (safe_str_eq(XML_CONS_TAG_RSC_LOCATION, crm_element_name(xml_obj))) { /* unpack_location(xml_obj, data_set); */ } } } void cli_resource_print_cts(resource_t * rsc) { GListPtr lpc = NULL; const char *host = NULL; bool needs_quorum = TRUE; const char *rtype = crm_element_value(rsc->xml, XML_ATTR_TYPE); const char *rprov = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); const char *rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); - if (safe_str_eq(rclass, "stonith")) { + if (safe_str_eq(rclass, PCMK_RESOURCE_CLASS_STONITH)) { xmlNode *op = NULL; needs_quorum = FALSE; for (op = __xml_first_child(rsc->ops_xml); op != NULL; op = __xml_next(op)) { if (crm_str_eq((const char *)op->name, "op", TRUE)) { const char *name = crm_element_value(op, "name"); if (safe_str_neq(name, CRMD_ACTION_START)) { const char *value = crm_element_value(op, "requires"); if (safe_str_eq(value, "nothing")) { needs_quorum = FALSE; } break; } } } } if (rsc->running_on != NULL && g_list_length(rsc->running_on) == 1) { node_t *tmp = rsc->running_on->data; host = tmp->details->uname; } printf("Resource: %s %s %s %s %s %s %s %s %d %lld 0x%.16llx\n", crm_element_name(rsc->xml), rsc->id, rsc->clone_name ? rsc->clone_name : rsc->id, rsc->parent ? rsc->parent->id : "NA", rprov ? rprov : "NA", rclass, rtype, host ? host : "NA", needs_quorum, rsc->flags, rsc->flags); for (lpc = rsc->children; lpc != NULL; lpc = lpc->next) { resource_t *child = (resource_t *) lpc->data; cli_resource_print_cts(child); } } void cli_resource_print_raw(resource_t * rsc) { GListPtr lpc = NULL; GListPtr children = rsc->children; if (children == NULL) { printf("%s\n", rsc->id); } for (lpc = children; lpc != NULL; lpc = lpc->next) { resource_t *child = (resource_t *) lpc->data; cli_resource_print_raw(child); } } int cli_resource_print_list(pe_working_set_t * data_set, bool raw) { int found = 0; GListPtr lpc = NULL; int opts = pe_print_printf | pe_print_rsconly; if (print_pending) { opts |= pe_print_pending; } for (lpc = data_set->resources; lpc != NULL; lpc = lpc->next) { resource_t *rsc = (resource_t *) lpc->data; if (is_set(rsc->flags, pe_rsc_orphan) && rsc->fns->active(rsc, TRUE) == FALSE) { continue; } rsc->fns->print(rsc, NULL, opts, stdout); found++; } if (found == 0) { printf("NO resources configured\n"); return -ENXIO; } return 0; } int cli_resource_print_operations(const char *rsc_id, const char *host_uname, bool active, pe_working_set_t * data_set) { resource_t *rsc = NULL; int opts = pe_print_printf | pe_print_rsconly | pe_print_suppres_nl; GListPtr ops = find_operations(rsc_id, host_uname, active, data_set); GListPtr lpc = NULL; if (print_pending) { opts |= pe_print_pending; } for (lpc = ops; lpc != NULL; lpc = lpc->next) { xmlNode *xml_op = (xmlNode *) lpc->data; const char *op_rsc = crm_element_value(xml_op, "resource"); const char *last = crm_element_value(xml_op, XML_RSC_OP_LAST_CHANGE); const char *status_s = crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS); const char *op_key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY); int status = crm_parse_int(status_s, "0"); rsc = pe_find_resource(data_set->resources, op_rsc); if(rsc) { rsc->fns->print(rsc, "", opts, stdout); } else { fprintf(stdout, "Unknown resource %s", op_rsc); } fprintf(stdout, ": %s (node=%s, call=%s, rc=%s", op_key ? op_key : ID(xml_op), crm_element_value(xml_op, XML_ATTR_UNAME), crm_element_value(xml_op, XML_LRM_ATTR_CALLID), crm_element_value(xml_op, XML_LRM_ATTR_RC)); if (last) { time_t run_at = crm_parse_int(last, "0"); fprintf(stdout, ", last-rc-change=%s, exec=%sms", crm_strip_trailing_newline(ctime(&run_at)), crm_element_value(xml_op, XML_RSC_OP_T_EXEC)); } fprintf(stdout, "): %s\n", services_lrm_status_str(status)); } return pcmk_ok; } void cli_resource_print_location(resource_t * rsc, const char *prefix) { GListPtr lpc = NULL; GListPtr list = rsc->rsc_location; int offset = 0; if (prefix) { offset = strlen(prefix) - 2; } for (lpc = list; lpc != NULL; lpc = lpc->next) { rsc_to_node_t *cons = (rsc_to_node_t *) lpc->data; GListPtr lpc2 = NULL; for (lpc2 = cons->node_list_rh; lpc2 != NULL; lpc2 = lpc2->next) { node_t *node = (node_t *) lpc2->data; char *score = score2char(node->weight); fprintf(stdout, "%s: Node %-*s (score=%s, id=%s)\n", prefix ? prefix : " ", 71 - offset, node->details->uname, score, cons->id); free(score); } } } void cli_resource_print_colocation(resource_t * rsc, bool dependents, bool recursive, int offset) { char *prefix = NULL; GListPtr lpc = NULL; GListPtr list = rsc->rsc_cons; prefix = calloc(1, (offset * 4) + 1); memset(prefix, ' ', offset * 4); if (dependents) { list = rsc->rsc_cons_lhs; } if (is_set(rsc->flags, pe_rsc_allocating)) { /* Break colocation loops */ printf("loop %s\n", rsc->id); free(prefix); return; } set_bit(rsc->flags, pe_rsc_allocating); for (lpc = list; lpc != NULL; lpc = lpc->next) { rsc_colocation_t *cons = (rsc_colocation_t *) lpc->data; char *score = NULL; resource_t *peer = cons->rsc_rh; if (dependents) { peer = cons->rsc_lh; } if (is_set(peer->flags, pe_rsc_allocating)) { if (dependents == FALSE) { fprintf(stdout, "%s%-*s (id=%s - loop)\n", prefix, 80 - (4 * offset), peer->id, cons->id); } continue; } if (dependents && recursive) { cli_resource_print_colocation(peer, dependents, recursive, offset + 1); } score = score2char(cons->score); if (cons->role_rh > RSC_ROLE_STARTED) { fprintf(stdout, "%s%-*s (score=%s, %s role=%s, id=%s)\n", prefix, 80 - (4 * offset), peer->id, score, dependents ? "needs" : "with", role2text(cons->role_rh), cons->id); } else { fprintf(stdout, "%s%-*s (score=%s, id=%s)\n", prefix, 80 - (4 * offset), peer->id, score, cons->id); } cli_resource_print_location(peer, prefix); free(score); if (!dependents && recursive) { cli_resource_print_colocation(peer, dependents, recursive, offset + 1); } } free(prefix); } int cli_resource_print(const char *rsc, pe_working_set_t * data_set, bool expanded) { char *rsc_xml = NULL; resource_t *the_rsc = find_rsc_or_clone(rsc, data_set); int opts = pe_print_printf; if (the_rsc == NULL) { return -ENXIO; } if (print_pending) { opts |= pe_print_pending; } the_rsc->fns->print(the_rsc, NULL, opts, stdout); if (expanded) { rsc_xml = dump_xml_formatted(the_rsc->xml); } else { if (the_rsc->orig_xml) { rsc_xml = dump_xml_formatted(the_rsc->orig_xml); } else { rsc_xml = dump_xml_formatted(the_rsc->xml); } } fprintf(stdout, "%sxml:\n%s\n", expanded ? "" : "raw ", rsc_xml); free(rsc_xml); return 0; } int cli_resource_print_attribute(const char *rsc, const char *attr, pe_working_set_t * data_set) { int rc = -ENXIO; node_t *current = NULL; GHashTable *params = NULL; resource_t *the_rsc = find_rsc_or_clone(rsc, data_set); const char *value = NULL; if (the_rsc == NULL) { return -ENXIO; } if (g_list_length(the_rsc->running_on) == 1) { current = the_rsc->running_on->data; } else if (g_list_length(the_rsc->running_on) > 1) { CMD_ERR("%s is active on more than one node," " returning the default value for %s", the_rsc->id, crm_str(value)); } params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if (safe_str_eq(attr_set_type, XML_TAG_ATTR_SETS)) { get_rsc_attributes(params, the_rsc, current, data_set); } else if (safe_str_eq(attr_set_type, XML_TAG_META_SETS)) { /* No need to redirect to the parent */ get_meta_attributes(params, the_rsc, current, data_set); } else { unpack_instance_attributes(data_set->input, the_rsc->xml, XML_TAG_UTILIZATION, NULL, params, NULL, FALSE, data_set->now); } crm_debug("Looking up %s in %s", attr, the_rsc->id); value = g_hash_table_lookup(params, attr); if (value != NULL) { fprintf(stdout, "%s\n", value); rc = 0; } else { CMD_ERR("Attribute '%s' not found for '%s'", attr, the_rsc->id); } g_hash_table_destroy(params); return rc; } int cli_resource_print_property(const char *rsc, const char *attr, pe_working_set_t * data_set) { const char *value = NULL; resource_t *the_rsc = pe_find_resource(data_set->resources, rsc); if (the_rsc == NULL) { return -ENXIO; } value = crm_element_value(the_rsc->xml, attr); if (value != NULL) { fprintf(stdout, "%s\n", value); return 0; } return -ENXIO; } diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c index 7a64bc947a..61aabe9862 100644 --- a/tools/crm_resource_runtime.c +++ b/tools/crm_resource_runtime.c @@ -1,1684 +1,1684 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include bool do_trace = FALSE; bool do_force = FALSE; int crmd_replies_needed = 1; /* The welcome message */ const char *attr_set_type = XML_TAG_ATTR_SETS; static int do_find_resource(const char *rsc, resource_t * the_rsc, pe_working_set_t * data_set) { int found = 0; GListPtr lpc = NULL; for (lpc = the_rsc->running_on; lpc != NULL; lpc = lpc->next) { node_t *node = (node_t *) lpc->data; crm_trace("resource %s is running on: %s", rsc, node->details->uname); if (BE_QUIET) { fprintf(stdout, "%s\n", node->details->uname); } else { const char *state = ""; if (the_rsc->variant < pe_clone && the_rsc->fns->state(the_rsc, TRUE) == RSC_ROLE_MASTER) { state = "Master"; } fprintf(stdout, "resource %s is running on: %s %s\n", rsc, node->details->uname, state); } found++; } if (BE_QUIET == FALSE && found == 0) { fprintf(stderr, "resource %s is NOT running\n", rsc); } return found; } int cli_resource_search(const char *rsc, pe_working_set_t * data_set) { int found = 0; resource_t *the_rsc = NULL; resource_t *parent = NULL; if (the_rsc == NULL) { the_rsc = pe_find_resource(data_set->resources, rsc); } if (the_rsc == NULL) { return -ENXIO; } if (the_rsc->variant >= pe_clone) { GListPtr gIter = the_rsc->children; for (; gIter != NULL; gIter = gIter->next) { found += do_find_resource(rsc, gIter->data, data_set); } /* The anonymous clone children's common ID is supplied */ } else if ((parent = uber_parent(the_rsc)) != NULL && parent->variant >= pe_clone && is_not_set(the_rsc->flags, pe_rsc_unique) && the_rsc->clone_name && safe_str_eq(rsc, the_rsc->clone_name) && safe_str_neq(rsc, the_rsc->id)) { GListPtr gIter = parent->children; for (; gIter != NULL; gIter = gIter->next) { found += do_find_resource(rsc, gIter->data, data_set); } } else { found += do_find_resource(rsc, the_rsc, data_set); } return found; } resource_t * find_rsc_or_clone(const char *rsc, pe_working_set_t * data_set) { resource_t *the_rsc = pe_find_resource(data_set->resources, rsc); if (the_rsc == NULL) { char *as_clone = crm_concat(rsc, "0", ':'); the_rsc = pe_find_resource(data_set->resources, as_clone); free(as_clone); } return the_rsc; } static int find_resource_attr(cib_t * the_cib, const char *attr, const char *rsc, const char *set_type, const char *set_name, const char *attr_id, const char *attr_name, char **value) { int offset = 0; static int xpath_max = 1024; int rc = pcmk_ok; xmlNode *xml_search = NULL; char *xpath_string = NULL; if(value) { *value = NULL; } if(the_cib == NULL) { return -ENOTCONN; } xpath_string = calloc(1, xpath_max); offset += snprintf(xpath_string + offset, xpath_max - offset, "%s", get_object_path("resources")); offset += snprintf(xpath_string + offset, xpath_max - offset, "//*[@id=\"%s\"]", rsc); if (set_type) { offset += snprintf(xpath_string + offset, xpath_max - offset, "/%s", set_type); if (set_name) { offset += snprintf(xpath_string + offset, xpath_max - offset, "[@id=\"%s\"]", set_name); } } offset += snprintf(xpath_string + offset, xpath_max - offset, "//nvpair["); if (attr_id) { offset += snprintf(xpath_string + offset, xpath_max - offset, "@id=\"%s\"", attr_id); } if (attr_name) { if (attr_id) { offset += snprintf(xpath_string + offset, xpath_max - offset, " and "); } offset += snprintf(xpath_string + offset, xpath_max - offset, "@name=\"%s\"", attr_name); } offset += snprintf(xpath_string + offset, xpath_max - offset, "]"); CRM_LOG_ASSERT(offset > 0); rc = the_cib->cmds->query(the_cib, xpath_string, &xml_search, cib_sync_call | cib_scope_local | cib_xpath); if (rc != pcmk_ok) { goto bail; } crm_log_xml_debug(xml_search, "Match"); if (xml_has_children(xml_search)) { xmlNode *child = NULL; rc = -EINVAL; printf("Multiple attributes match name=%s\n", attr_name); for (child = __xml_first_child(xml_search); child != NULL; child = __xml_next(child)) { printf(" Value: %s \t(id=%s)\n", crm_element_value(child, XML_NVPAIR_ATTR_VALUE), ID(child)); } } else if(value) { const char *tmp = crm_element_value(xml_search, attr); if (tmp) { *value = strdup(tmp); } } bail: free(xpath_string); free_xml(xml_search); return rc; } static resource_t * find_matching_attr_resource(resource_t * rsc, const char * rsc_id, const char * attr_set, const char * attr_id, const char * attr_name, cib_t * cib, const char * cmd) { int rc = pcmk_ok; char *lookup_id = NULL; char *local_attr_id = NULL; if(do_force == TRUE) { return rsc; } else if(rsc->parent) { switch(rsc->parent->variant) { case pe_group: if (BE_QUIET == FALSE) { printf("Performing %s of '%s' for '%s' will not apply to its peers in '%s'\n", cmd, attr_name, rsc_id, rsc->parent->id); } break; case pe_master: case pe_clone: rc = find_resource_attr(cib, XML_ATTR_ID, rsc_id, attr_set_type, attr_set, attr_id, attr_name, &local_attr_id); free(local_attr_id); if(rc != pcmk_ok) { rsc = rsc->parent; if (BE_QUIET == FALSE) { printf("Performing %s of '%s' on '%s', the parent of '%s'\n", cmd, attr_name, rsc->id, rsc_id); } } break; default: break; } } else if (rsc->parent && BE_QUIET == FALSE) { printf("Forcing %s of '%s' for '%s' instead of '%s'\n", cmd, attr_name, rsc_id, rsc->parent->id); } else if(rsc->parent == NULL && rsc->children) { resource_t *child = rsc->children->data; if(child->variant == pe_native) { lookup_id = clone_strip(child->id); /* Could be a cloned group! */ rc = find_resource_attr(cib, XML_ATTR_ID, lookup_id, attr_set_type, attr_set, attr_id, attr_name, &local_attr_id); if(rc == pcmk_ok) { rsc = child; if (BE_QUIET == FALSE) { printf("A value for '%s' already exists in child '%s', performing %s on that instead of '%s'\n", attr_name, lookup_id, cmd, rsc_id); } } free(local_attr_id); free(lookup_id); } } return rsc; } int cli_resource_update_attribute(const char *rsc_id, const char *attr_set, const char *attr_id, const char *attr_name, const char *attr_value, bool recursive, cib_t * cib, pe_working_set_t * data_set) { int rc = pcmk_ok; static bool need_init = TRUE; char *lookup_id = NULL; char *local_attr_id = NULL; char *local_attr_set = NULL; xmlNode *xml_top = NULL; xmlNode *xml_obj = NULL; bool use_attributes_tag = FALSE; resource_t *rsc = find_rsc_or_clone(rsc_id, data_set); if (rsc == NULL) { return -ENXIO; } if(attr_id == NULL && do_force == FALSE && pcmk_ok != find_resource_attr( cib, XML_ATTR_ID, uber_parent(rsc)->id, NULL, NULL, NULL, attr_name, NULL)) { printf("\n"); } if (safe_str_eq(attr_set_type, XML_TAG_ATTR_SETS)) { if (do_force == FALSE) { rc = find_resource_attr(cib, XML_ATTR_ID, uber_parent(rsc)->id, XML_TAG_META_SETS, attr_set, attr_id, attr_name, &local_attr_id); if (rc == pcmk_ok && BE_QUIET == FALSE) { printf("WARNING: There is already a meta attribute for '%s' called '%s' (id=%s)\n", uber_parent(rsc)->id, attr_name, local_attr_id); printf(" Delete '%s' first or use --force to override\n", local_attr_id); } free(local_attr_id); if (rc == pcmk_ok) { return -ENOTUNIQ; } } } else { rsc = find_matching_attr_resource(rsc, rsc_id, attr_set, attr_id, attr_name, cib, "update"); } lookup_id = clone_strip(rsc->id); /* Could be a cloned group! */ rc = find_resource_attr(cib, XML_ATTR_ID, lookup_id, attr_set_type, attr_set, attr_id, attr_name, &local_attr_id); if (rc == pcmk_ok) { crm_debug("Found a match for name=%s: id=%s", attr_name, local_attr_id); attr_id = local_attr_id; } else if (rc != -ENXIO) { free(lookup_id); free(local_attr_id); return rc; } else { const char *value = NULL; xmlNode *cib_top = NULL; const char *tag = crm_element_name(rsc->xml); cib->cmds->query(cib, "/cib", &cib_top, cib_sync_call | cib_scope_local | cib_xpath | cib_no_children); value = crm_element_value(cib_top, "ignore_dtd"); if (value != NULL) { use_attributes_tag = TRUE; } else { value = crm_element_value(cib_top, XML_ATTR_VALIDATION); if (crm_ends_with(value, "-0.6")) { use_attributes_tag = TRUE; } } free_xml(cib_top); if (attr_set == NULL) { local_attr_set = crm_concat(lookup_id, attr_set_type, '-'); attr_set = local_attr_set; } if (attr_id == NULL) { local_attr_id = crm_concat(attr_set, attr_name, '-'); attr_id = local_attr_id; } if (use_attributes_tag && safe_str_eq(tag, XML_CIB_TAG_MASTER)) { tag = "master_slave"; /* use the old name */ } xml_top = create_xml_node(NULL, tag); crm_xml_add(xml_top, XML_ATTR_ID, lookup_id); xml_obj = create_xml_node(xml_top, attr_set_type); crm_xml_add(xml_obj, XML_ATTR_ID, attr_set); if (use_attributes_tag) { xml_obj = create_xml_node(xml_obj, XML_TAG_ATTRS); } } xml_obj = create_xml_node(xml_obj, XML_CIB_TAG_NVPAIR); if (xml_top == NULL) { xml_top = xml_obj; } crm_xml_add(xml_obj, XML_ATTR_ID, attr_id); crm_xml_add(xml_obj, XML_NVPAIR_ATTR_NAME, attr_name); crm_xml_add(xml_obj, XML_NVPAIR_ATTR_VALUE, attr_value); crm_log_xml_debug(xml_top, "Update"); rc = cib->cmds->modify(cib, XML_CIB_TAG_RESOURCES, xml_top, cib_options); if (rc == pcmk_ok && BE_QUIET == FALSE) { printf("Set '%s' option: id=%s%s%s%s%s=%s\n", lookup_id, local_attr_id, attr_set ? " set=" : "", attr_set ? attr_set : "", attr_name ? " name=" : "", attr_name ? attr_name : "", attr_value); } free_xml(xml_top); free(lookup_id); free(local_attr_id); free(local_attr_set); if(recursive && safe_str_eq(attr_set_type, XML_TAG_META_SETS)) { GListPtr lpc = NULL; if(need_init) { xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, data_set->input); need_init = FALSE; unpack_constraints(cib_constraints, data_set); for (lpc = data_set->resources; lpc != NULL; lpc = lpc->next) { resource_t *r = (resource_t *) lpc->data; clear_bit(r->flags, pe_rsc_allocating); } } crm_debug("Looking for dependencies %p", rsc->rsc_cons_lhs); set_bit(rsc->flags, pe_rsc_allocating); for (lpc = rsc->rsc_cons_lhs; lpc != NULL; lpc = lpc->next) { rsc_colocation_t *cons = (rsc_colocation_t *) lpc->data; resource_t *peer = cons->rsc_lh; crm_debug("Checking %s %d", cons->id, cons->score); if (cons->score > 0 && is_not_set(peer->flags, pe_rsc_allocating)) { /* Don't get into colocation loops */ crm_debug("Setting %s=%s for dependent resource %s", attr_name, attr_value, peer->id); cli_resource_update_attribute(peer->id, NULL, NULL, attr_name, attr_value, recursive, cib, data_set); } } } return rc; } int cli_resource_delete_attribute(const char *rsc_id, const char *attr_set, const char *attr_id, const char *attr_name, cib_t * cib, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; int rc = pcmk_ok; char *lookup_id = NULL; char *local_attr_id = NULL; resource_t *rsc = find_rsc_or_clone(rsc_id, data_set); if (rsc == NULL) { return -ENXIO; } if(attr_id == NULL && do_force == FALSE && find_resource_attr( cib, XML_ATTR_ID, uber_parent(rsc)->id, NULL, NULL, NULL, attr_name, NULL) != pcmk_ok) { printf("\n"); } if(safe_str_eq(attr_set_type, XML_TAG_META_SETS)) { rsc = find_matching_attr_resource(rsc, rsc_id, attr_set, attr_id, attr_name, cib, "delete"); } lookup_id = clone_strip(rsc->id); rc = find_resource_attr(cib, XML_ATTR_ID, lookup_id, attr_set_type, attr_set, attr_id, attr_name, &local_attr_id); if (rc == -ENXIO) { free(lookup_id); return pcmk_ok; } else if (rc != pcmk_ok) { free(lookup_id); return rc; } if (attr_id == NULL) { attr_id = local_attr_id; } xml_obj = create_xml_node(NULL, XML_CIB_TAG_NVPAIR); crm_xml_add(xml_obj, XML_ATTR_ID, attr_id); crm_xml_add(xml_obj, XML_NVPAIR_ATTR_NAME, attr_name); crm_log_xml_debug(xml_obj, "Delete"); CRM_ASSERT(cib); rc = cib->cmds->delete(cib, XML_CIB_TAG_RESOURCES, xml_obj, cib_options); if (rc == pcmk_ok && BE_QUIET == FALSE) { printf("Deleted '%s' option: id=%s%s%s%s%s\n", lookup_id, local_attr_id, attr_set ? " set=" : "", attr_set ? attr_set : "", attr_name ? " name=" : "", attr_name ? attr_name : ""); } free(lookup_id); free_xml(xml_obj); free(local_attr_id); return rc; } static int send_lrm_rsc_op(crm_ipc_t * crmd_channel, const char *op, const char *host_uname, const char *rsc_id, bool only_failed, pe_working_set_t * data_set) { char *our_pid = NULL; char *key = NULL; int rc = -ECOMM; xmlNode *cmd = NULL; xmlNode *xml_rsc = NULL; const char *value = NULL; const char *router_node = host_uname; xmlNode *params = NULL; xmlNode *msg_data = NULL; resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); if (rsc == NULL) { CMD_ERR("Resource %s not found", rsc_id); return -ENXIO; } else if (rsc->variant != pe_native) { CMD_ERR("We can only process primitive resources, not %s", rsc_id); return -EINVAL; } else if (host_uname == NULL) { CMD_ERR("Please supply a hostname with -H"); return -EINVAL; } else { node_t *node = pe_find_node(data_set->nodes, host_uname); if (node && is_remote_node(node)) { if (node->details->remote_rsc == NULL || node->details->remote_rsc->running_on == NULL) { CMD_ERR("No lrmd connection detected to remote node %s", host_uname); return -ENXIO; } node = node->details->remote_rsc->running_on->data; router_node = node->details->uname; } } key = generate_transition_key(0, getpid(), 0, "xxxxxxxx-xrsc-opxx-xcrm-resourcexxxx"); msg_data = create_xml_node(NULL, XML_GRAPH_TAG_RSC_OP); crm_xml_add(msg_data, XML_ATTR_TRANSITION_KEY, key); free(key); crm_xml_add(msg_data, XML_LRM_ATTR_TARGET, host_uname); if (safe_str_neq(router_node, host_uname)) { crm_xml_add(msg_data, XML_LRM_ATTR_ROUTER_NODE, router_node); } xml_rsc = create_xml_node(msg_data, XML_CIB_TAG_RESOURCE); if (rsc->clone_name) { crm_xml_add(xml_rsc, XML_ATTR_ID, rsc->clone_name); crm_xml_add(xml_rsc, XML_ATTR_ID_LONG, rsc->id); } else { crm_xml_add(xml_rsc, XML_ATTR_ID, rsc->id); } value = crm_copy_xml_element(rsc->xml, xml_rsc, XML_ATTR_TYPE); if (value == NULL) { CMD_ERR("%s has no type! Aborting...", rsc_id); return -ENXIO; } value = crm_copy_xml_element(rsc->xml, xml_rsc, XML_AGENT_ATTR_CLASS); if (value == NULL) { CMD_ERR("%s has no class! Aborting...", rsc_id); return -ENXIO; } crm_copy_xml_element(rsc->xml, xml_rsc, XML_AGENT_ATTR_PROVIDER); params = create_xml_node(msg_data, XML_TAG_ATTRS); crm_xml_add(params, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); key = crm_meta_name(XML_LRM_ATTR_INTERVAL); crm_xml_add(params, key, "60000"); /* 1 minute */ free(key); our_pid = calloc(1, 11); if (our_pid != NULL) { snprintf(our_pid, 10, "%d", getpid()); our_pid[10] = '\0'; } cmd = create_request(op, msg_data, router_node, CRM_SYSTEM_CRMD, crm_system_name, our_pid); /* crm_log_xml_warn(cmd, "send_lrm_rsc_op"); */ free_xml(msg_data); if (crm_ipc_send(crmd_channel, cmd, 0, 0, NULL) > 0) { rc = 0; } else { CMD_ERR("Could not send %s op to the crmd", op); rc = -ENOTCONN; } free_xml(cmd); return rc; } static int cli_delete_attr(cib_t * cib_conn, const char * host_uname, const char * attr_name, pe_working_set_t * data_set) { node_t *node = pe_find_node(data_set->nodes, host_uname); int attr_options = attrd_opt_none; if (node && is_remote_node(node)) { set_bit(attr_options, attrd_opt_remote); } return attrd_update_delegate(NULL, 'D', host_uname, attr_name, NULL, XML_CIB_TAG_STATUS, NULL, NULL, NULL, attr_options); } int cli_resource_delete(cib_t *cib_conn, crm_ipc_t * crmd_channel, const char *host_uname, resource_t * rsc, pe_working_set_t * data_set) { int rc = pcmk_ok; node_t *node = NULL; if (rsc == NULL) { return -ENXIO; } else if (rsc->children) { GListPtr lpc = NULL; for (lpc = rsc->children; lpc != NULL; lpc = lpc->next) { resource_t *child = (resource_t *) lpc->data; rc = cli_resource_delete(cib_conn, crmd_channel, host_uname, child, data_set); if(rc != pcmk_ok || (rsc->variant >= pe_clone && is_not_set(rsc->flags, pe_rsc_unique))) { return rc; } } return pcmk_ok; } else if (host_uname == NULL) { GListPtr lpc = NULL; for (lpc = data_set->nodes; lpc != NULL; lpc = lpc->next) { node = (node_t *) lpc->data; if (node->details->online) { cli_resource_delete(cib_conn, crmd_channel, node->details->uname, rsc, data_set); } } return pcmk_ok; } node = pe_find_node(data_set->nodes, host_uname); if (node && node->details->rsc_discovery_enabled) { printf("Cleaning up %s on %s", rsc->id, host_uname); rc = send_lrm_rsc_op(crmd_channel, CRM_OP_LRM_DELETE, host_uname, rsc->id, TRUE, data_set); } else { printf("Resource discovery disabled on %s. Unable to delete lrm state.\n", host_uname); rc = -EOPNOTSUPP; } if (rc == pcmk_ok) { char *attr_name = NULL; if(node && node->details->remote_rsc == NULL && node->details->rsc_discovery_enabled) { crmd_replies_needed++; } if(is_not_set(rsc->flags, pe_rsc_unique)) { char *id = clone_strip(rsc->id); attr_name = crm_failcount_name(id); free(id); } else if (rsc->clone_name) { attr_name = crm_failcount_name(rsc->clone_name); } else { attr_name = crm_failcount_name(rsc->id); } printf(", removing %s\n", attr_name); rc = cli_delete_attr(cib_conn, host_uname, attr_name, data_set); free(attr_name); } else if(rc != -EOPNOTSUPP) { printf(" - FAILED\n"); } return rc; } void cli_resource_check(cib_t * cib_conn, resource_t *rsc) { int need_nl = 0; char *role_s = NULL; char *managed = NULL; resource_t *parent = uber_parent(rsc); find_resource_attr(cib_conn, XML_NVPAIR_ATTR_VALUE, parent->id, NULL, NULL, NULL, XML_RSC_ATTR_MANAGED, &managed); find_resource_attr(cib_conn, XML_NVPAIR_ATTR_VALUE, parent->id, NULL, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, &role_s); if(role_s) { enum rsc_role_e role = text2role(role_s); if(role == RSC_ROLE_UNKNOWN) { // Treated as if unset } else if(role == RSC_ROLE_STOPPED) { printf("\n * The configuration specifies that '%s' should remain stopped\n", parent->id); need_nl++; } else if(parent->variant > pe_clone && role == RSC_ROLE_SLAVE) { printf("\n * The configuration specifies that '%s' should not be promoted\n", parent->id); need_nl++; } } if(managed && crm_is_true(managed) == FALSE) { printf("%s * The configuration prevents the cluster from stopping or starting '%s' (unmanaged)\n", need_nl == 0?"\n":"", parent->id); need_nl++; } if(need_nl) { printf("\n"); } } int cli_resource_fail(crm_ipc_t * crmd_channel, const char *host_uname, const char *rsc_id, pe_working_set_t * data_set) { crm_warn("Failing: %s", rsc_id); return send_lrm_rsc_op(crmd_channel, CRM_OP_LRM_FAIL, host_uname, rsc_id, FALSE, data_set); } static GHashTable * generate_resource_params(resource_t * rsc, pe_working_set_t * data_set) { GHashTable *params = NULL; GHashTable *meta = NULL; GHashTable *combined = NULL; GHashTableIter iter; if (!rsc) { crm_err("Resource does not exist in config"); return NULL; } params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); meta = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); combined = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); get_rsc_attributes(params, rsc, NULL /* TODO: Pass in local node */ , data_set); get_meta_attributes(meta, rsc, NULL /* TODO: Pass in local node */ , data_set); if (params) { char *key = NULL; char *value = NULL; g_hash_table_iter_init(&iter, params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { g_hash_table_insert(combined, strdup(key), strdup(value)); } g_hash_table_destroy(params); } if (meta) { char *key = NULL; char *value = NULL; g_hash_table_iter_init(&iter, meta); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { char *crm_name = crm_meta_name(key); g_hash_table_insert(combined, crm_name, strdup(value)); } g_hash_table_destroy(meta); } return combined; } static bool resource_is_running_on(resource_t *rsc, const char *host) { bool found = TRUE; GListPtr hIter = NULL; GListPtr hosts = NULL; if(rsc == NULL) { return FALSE; } rsc->fns->location(rsc, &hosts, TRUE); for (hIter = hosts; host != NULL && hIter != NULL; hIter = hIter->next) { pe_node_t *node = (pe_node_t *) hIter->data; if(strcmp(host, node->details->uname) == 0) { crm_trace("Resource %s is running on %s\n", rsc->id, host); goto done; } else if(strcmp(host, node->details->id) == 0) { crm_trace("Resource %s is running on %s\n", rsc->id, host); goto done; } } if(host != NULL) { crm_trace("Resource %s is not running on: %s\n", rsc->id, host); found = FALSE; } else if(host == NULL && hosts == NULL) { crm_trace("Resource %s is not running\n", rsc->id); found = FALSE; } done: g_list_free(hosts); return found; } /*! * \internal * \brief Create a list of all resources active on host from a given list * * \param[in] host Name of host to check whether resources are active * \param[in] rsc_list List of resources to check * * \return New list of resources from list that are active on host */ static GList * get_active_resources(const char *host, GList *rsc_list) { GList *rIter = NULL; GList *active = NULL; for (rIter = rsc_list; rIter != NULL; rIter = rIter->next) { resource_t *rsc = (resource_t *) rIter->data; /* Expand groups to their members, because if we're restarting a member * other than the first, we can't otherwise tell which resources are * stopping and starting. */ if (rsc->variant == pe_group) { active = g_list_concat(active, get_active_resources(host, rsc->children)); } else if (resource_is_running_on(rsc, host)) { active = g_list_append(active, strdup(rsc->id)); } } return active; } static GList *subtract_lists(GList *from, GList *items) { GList *item = NULL; GList *result = g_list_copy(from); for (item = items; item != NULL; item = item->next) { GList *candidate = NULL; for (candidate = from; candidate != NULL; candidate = candidate->next) { crm_info("Comparing %s with %s", candidate->data, item->data); if(strcmp(candidate->data, item->data) == 0) { result = g_list_remove(result, candidate->data); break; } } } return result; } static void dump_list(GList *items, const char *tag) { int lpc = 0; GList *item = NULL; for (item = items; item != NULL; item = item->next) { crm_trace("%s[%d]: %s", tag, lpc, (char*)item->data); lpc++; } } static void display_list(GList *items, const char *tag) { GList *item = NULL; for (item = items; item != NULL; item = item->next) { fprintf(stdout, "%s%s\n", tag, (const char *)item->data); } } /*! * \internal * \brief Upgrade XML to latest schema version and use it as working set input * * This also updates the working set timestamp to the current time. * * \param[in] data_set Working set instance to update * \param[in] xml XML to use as input * * \return pcmk_ok on success, -ENOKEY if unable to upgrade XML * \note On success, caller is responsible for freeing memory allocated for * data_set->now. * \todo This follows the example of other callers of cli_config_update() * and returns -ENOKEY ("Required key not available") if that fails, * but perhaps -pcmk_err_schema_validation would be better in that case. */ int update_working_set_xml(pe_working_set_t *data_set, xmlNode **xml) { if (cli_config_update(xml, NULL, FALSE) == FALSE) { return -ENOKEY; } data_set->input = *xml; data_set->now = crm_time_new(NULL); return pcmk_ok; } /*! * \internal * \brief Update a working set's XML input based on a CIB query * * \param[in] data_set Data set instance to initialize * \param[in] cib Connection to the CIB * * \return pcmk_ok on success, -errno on failure * \note On success, caller is responsible for freeing memory allocated for * data_set->input and data_set->now. */ static int update_working_set_from_cib(pe_working_set_t * data_set, cib_t *cib) { xmlNode *cib_xml_copy = NULL; int rc; rc = cib->cmds->query(cib, NULL, &cib_xml_copy, cib_scope_local | cib_sync_call); if (rc != pcmk_ok) { fprintf(stderr, "Could not obtain the current CIB: %s (%d)\n", pcmk_strerror(rc), rc); return rc; } rc = update_working_set_xml(data_set, &cib_xml_copy); if (rc != pcmk_ok) { fprintf(stderr, "Could not upgrade the current CIB XML\n"); free_xml(cib_xml_copy); return rc; } return pcmk_ok; } static int update_dataset(cib_t *cib, pe_working_set_t * data_set, bool simulate) { char *pid = NULL; char *shadow_file = NULL; cib_t *shadow_cib = NULL; int rc; cleanup_alloc_calculations(data_set); rc = update_working_set_from_cib(data_set, cib); if (rc != pcmk_ok) { return rc; } if(simulate) { pid = crm_itoa(getpid()); shadow_cib = cib_shadow_new(pid); shadow_file = get_shadow_file(pid); if (shadow_cib == NULL) { fprintf(stderr, "Could not create shadow cib: '%s'\n", pid); rc = -ENXIO; goto cleanup; } rc = write_xml_file(data_set->input, shadow_file, FALSE); if (rc < 0) { fprintf(stderr, "Could not populate shadow cib: %s (%d)\n", pcmk_strerror(rc), rc); goto cleanup; } rc = shadow_cib->cmds->signon(shadow_cib, crm_system_name, cib_command); if(rc != pcmk_ok) { fprintf(stderr, "Could not connect to shadow cib: %s (%d)\n", pcmk_strerror(rc), rc); goto cleanup; } do_calculations(data_set, data_set->input, NULL); run_simulation(data_set, shadow_cib, NULL, TRUE); rc = update_dataset(shadow_cib, data_set, FALSE); } else { cluster_status(data_set); } cleanup: /* Do not free data_set->input here, we need rsc->xml to be valid later on */ cib_delete(shadow_cib); free(pid); if(shadow_file) { unlink(shadow_file); free(shadow_file); } return rc; } static int max_delay_for_resource(pe_working_set_t * data_set, resource_t *rsc) { int delay = 0; int max_delay = 0; if(rsc && rsc->children) { GList *iter = NULL; for(iter = rsc->children; iter; iter = iter->next) { resource_t *child = (resource_t *)iter->data; delay = max_delay_for_resource(data_set, child); if(delay > max_delay) { double seconds = delay / 1000.0; crm_trace("Calculated new delay of %.1fs due to %s", seconds, child->id); max_delay = delay; } } } else if(rsc) { char *key = crm_strdup_printf("%s_%s_0", rsc->id, RSC_STOP); action_t *stop = custom_action(rsc, key, RSC_STOP, NULL, TRUE, FALSE, data_set); const char *value = g_hash_table_lookup(stop->meta, XML_ATTR_TIMEOUT); max_delay = crm_int_helper(value, NULL); pe_free_action(stop); } return max_delay; } static int max_delay_in(pe_working_set_t * data_set, GList *resources) { int max_delay = 0; GList *item = NULL; for (item = resources; item != NULL; item = item->next) { int delay = 0; resource_t *rsc = pe_find_resource(data_set->resources, (const char *)item->data); delay = max_delay_for_resource(data_set, rsc); if(delay > max_delay) { double seconds = delay / 1000.0; crm_trace("Calculated new delay of %.1fs due to %s", seconds, rsc->id); max_delay = delay; } } return 5 + (max_delay / 1000); } #define waiting_for_starts(d, r, h) ((g_list_length(d) > 0) || \ (resource_is_running_on((r), (h)) == FALSE)) /*! * \internal * \brief Restart a resource (on a particular host if requested). * * \param[in] rsc The resource to restart * \param[in] host The host to restart the resource on (or NULL for all) * \param[in] timeout_ms Consider failed if actions do not complete in this time * (specified in milliseconds, but a two-second * granularity is actually used; if 0, a timeout will be * calculated based on the resource timeout) * \param[in] cib Connection to the CIB for modifying/checking resource * * \return pcmk_ok on success, -errno on failure (exits on certain failures) */ int cli_resource_restart(resource_t * rsc, const char *host, int timeout_ms, cib_t * cib) { int rc = 0; int lpc = 0; int before = 0; int step_timeout_s = 0; int sleep_interval = 2; int timeout = timeout_ms / 1000; bool is_clone = FALSE; char *rsc_id = NULL; char *orig_target_role = NULL; GList *list_delta = NULL; GList *target_active = NULL; GList *current_active = NULL; GList *restart_target_active = NULL; pe_working_set_t data_set; if(resource_is_running_on(rsc, host) == FALSE) { const char *id = rsc->clone_name?rsc->clone_name:rsc->id; if(host) { printf("%s is not running on %s and so cannot be restarted\n", id, host); } else { printf("%s is not running anywhere and so cannot be restarted\n", id); } return -ENXIO; } /* We might set the target-role meta-attribute */ attr_set_type = XML_TAG_META_SETS; rsc_id = strdup(rsc->id); if(rsc->variant > pe_group) { is_clone = TRUE; } /* grab full cib determine originally active resources disable or ban poll cib and watch for affected resources to get stopped without --timeout, calculate the stop timeout for each step and wait for that if we hit --timeout or the service timeout, re-enable or un-ban, report failure and indicate which resources we couldn't take down if everything stopped, re-enable or un-ban poll cib and watch for affected resources to get started without --timeout, calculate the start timeout for each step and wait for that if we hit --timeout or the service timeout, report (different) failure and indicate which resources we couldn't bring back up report success Optimizations: - use constraints to determine ordered list of affected resources - Allow a --no-deps option (aka. --force-restart) */ set_working_set_defaults(&data_set); rc = update_dataset(cib, &data_set, FALSE); if(rc != pcmk_ok) { fprintf(stdout, "Could not get new resource list: %s (%d)\n", pcmk_strerror(rc), rc); free(rsc_id); return rc; } restart_target_active = get_active_resources(host, data_set.resources); current_active = get_active_resources(host, data_set.resources); dump_list(current_active, "Origin"); if(is_clone && host) { /* Stop the clone instance by banning it from the host */ BE_QUIET = TRUE; rc = cli_resource_ban(rsc_id, host, NULL, cib); } else { /* Stop the resource by setting target-role to Stopped. * Remember any existing target-role so we can restore it later * (though it only makes any difference if it's Slave). */ char *lookup_id = clone_strip(rsc->id); find_resource_attr(cib, XML_NVPAIR_ATTR_VALUE, lookup_id, NULL, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, &orig_target_role); free(lookup_id); rc = cli_resource_update_attribute(rsc_id, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, RSC_STOPPED, FALSE, cib, &data_set); } if(rc != pcmk_ok) { fprintf(stderr, "Could not set target-role for %s: %s (%d)\n", rsc_id, pcmk_strerror(rc), rc); if (current_active) { g_list_free_full(current_active, free); } if (restart_target_active) { g_list_free_full(restart_target_active, free); } free(rsc_id); return crm_exit(rc); } rc = update_dataset(cib, &data_set, TRUE); if(rc != pcmk_ok) { fprintf(stderr, "Could not determine which resources would be stopped\n"); goto failure; } target_active = get_active_resources(host, data_set.resources); dump_list(target_active, "Target"); list_delta = subtract_lists(current_active, target_active); fprintf(stdout, "Waiting for %d resources to stop:\n", g_list_length(list_delta)); display_list(list_delta, " * "); step_timeout_s = timeout / sleep_interval; while(g_list_length(list_delta) > 0) { before = g_list_length(list_delta); if(timeout_ms == 0) { step_timeout_s = max_delay_in(&data_set, list_delta) / sleep_interval; } /* We probably don't need the entire step timeout */ for(lpc = 0; lpc < step_timeout_s && g_list_length(list_delta) > 0; lpc++) { sleep(sleep_interval); if(timeout) { timeout -= sleep_interval; crm_trace("%ds remaining", timeout); } rc = update_dataset(cib, &data_set, FALSE); if(rc != pcmk_ok) { fprintf(stderr, "Could not determine which resources were stopped\n"); goto failure; } if (current_active) { g_list_free_full(current_active, free); } current_active = get_active_resources(host, data_set.resources); g_list_free(list_delta); list_delta = subtract_lists(current_active, target_active); dump_list(current_active, "Current"); dump_list(list_delta, "Delta"); } crm_trace("%d (was %d) resources remaining", g_list_length(list_delta), before); if(before == g_list_length(list_delta)) { /* aborted during stop phase, print the contents of list_delta */ fprintf(stderr, "Could not complete shutdown of %s, %d resources remaining\n", rsc_id, g_list_length(list_delta)); display_list(list_delta, " * "); rc = -ETIME; goto failure; } } if(is_clone && host) { rc = cli_resource_clear(rsc_id, host, NULL, cib); } else if (orig_target_role) { rc = cli_resource_update_attribute(rsc_id, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, orig_target_role, FALSE, cib, &data_set); free(orig_target_role); orig_target_role = NULL; } else { rc = cli_resource_delete_attribute(rsc_id, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, cib, &data_set); } if(rc != pcmk_ok) { fprintf(stderr, "Could not unset target-role for %s: %s (%d)\n", rsc_id, pcmk_strerror(rc), rc); free(rsc_id); return crm_exit(rc); } if (target_active) { g_list_free_full(target_active, free); } target_active = restart_target_active; if (list_delta) { g_list_free(list_delta); } list_delta = subtract_lists(target_active, current_active); fprintf(stdout, "Waiting for %d resources to start again:\n", g_list_length(list_delta)); display_list(list_delta, " * "); step_timeout_s = timeout / sleep_interval; while (waiting_for_starts(list_delta, rsc, host)) { before = g_list_length(list_delta); if(timeout_ms == 0) { step_timeout_s = max_delay_in(&data_set, list_delta) / sleep_interval; } /* We probably don't need the entire step timeout */ for (lpc = 0; (lpc < step_timeout_s) && waiting_for_starts(list_delta, rsc, host); lpc++) { sleep(sleep_interval); if(timeout) { timeout -= sleep_interval; crm_trace("%ds remaining", timeout); } rc = update_dataset(cib, &data_set, FALSE); if(rc != pcmk_ok) { fprintf(stderr, "Could not determine which resources were started\n"); goto failure; } if (current_active) { g_list_free_full(current_active, free); } /* It's OK if dependent resources moved to a different node, * so we check active resources on all nodes. */ current_active = get_active_resources(NULL, data_set.resources); g_list_free(list_delta); list_delta = subtract_lists(target_active, current_active); dump_list(current_active, "Current"); dump_list(list_delta, "Delta"); } if(before == g_list_length(list_delta)) { /* aborted during start phase, print the contents of list_delta */ fprintf(stdout, "Could not complete restart of %s, %d resources remaining\n", rsc_id, g_list_length(list_delta)); display_list(list_delta, " * "); rc = -ETIME; goto failure; } } rc = pcmk_ok; goto done; failure: if(is_clone && host) { cli_resource_clear(rsc_id, host, NULL, cib); } else if (orig_target_role) { cli_resource_update_attribute(rsc_id, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, orig_target_role, FALSE, cib, &data_set); free(orig_target_role); } else { cli_resource_delete_attribute(rsc_id, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, cib, &data_set); } done: if (list_delta) { g_list_free(list_delta); } if (current_active) { g_list_free_full(current_active, free); } if (target_active && (target_active != restart_target_active)) { g_list_free_full(target_active, free); } if (restart_target_active) { g_list_free_full(restart_target_active, free); } cleanup_alloc_calculations(&data_set); free(rsc_id); return rc; } #define action_is_pending(action) \ ((is_set((action)->flags, pe_action_optional) == FALSE) \ && (is_set((action)->flags, pe_action_runnable) == TRUE) \ && (is_set((action)->flags, pe_action_pseudo) == FALSE)) /*! * \internal * \brief Return TRUE if any actions in a list are pending * * \param[in] actions List of actions to check * * \return TRUE if any actions in the list are pending, FALSE otherwise */ static bool actions_are_pending(GListPtr actions) { GListPtr action; for (action = actions; action != NULL; action = action->next) { if (action_is_pending((action_t *) action->data)) { return TRUE; } } return FALSE; } /*! * \internal * \brief Print pending actions to stderr * * \param[in] actions List of actions to check * * \return void */ static void print_pending_actions(GListPtr actions) { GListPtr action; fprintf(stderr, "Pending actions:\n"); for (action = actions; action != NULL; action = action->next) { action_t *a = (action_t *) action->data; if (action_is_pending(a)) { fprintf(stderr, "\tAction %d: %s", a->id, a->uuid); if (a->node) { fprintf(stderr, "\ton %s", a->node->details->uname); } fprintf(stderr, "\n"); } } } /* For --wait, timeout (in seconds) to use if caller doesn't specify one */ #define WAIT_DEFAULT_TIMEOUT_S (60 * 60) /* For --wait, how long to sleep between cluster state checks */ #define WAIT_SLEEP_S (2) /*! * \internal * \brief Wait until all pending cluster actions are complete * * This waits until either the CIB's transition graph is idle or a timeout is * reached. * * \param[in] timeout_ms Consider failed if actions do not complete in this time * (specified in milliseconds, but one-second granularity * is actually used; if 0, a default will be used) * \param[in] cib Connection to the CIB * * \return pcmk_ok on success, -errno on failure */ int wait_till_stable(int timeout_ms, cib_t * cib) { pe_working_set_t data_set; int rc = -1; int timeout_s = timeout_ms? ((timeout_ms + 999) / 1000) : WAIT_DEFAULT_TIMEOUT_S; time_t expire_time = time(NULL) + timeout_s; time_t time_diff; set_working_set_defaults(&data_set); do { /* Abort if timeout is reached */ time_diff = expire_time - time(NULL); if (time_diff > 0) { crm_info("Waiting up to %d seconds for cluster actions to complete", time_diff); } else { print_pending_actions(data_set.actions); cleanup_alloc_calculations(&data_set); return -ETIME; } if (rc == pcmk_ok) { /* this avoids sleep on first loop iteration */ sleep(WAIT_SLEEP_S); } /* Get latest transition graph */ cleanup_alloc_calculations(&data_set); rc = update_working_set_from_cib(&data_set, cib); if (rc != pcmk_ok) { cleanup_alloc_calculations(&data_set); return rc; } do_calculations(&data_set, data_set.input, NULL); } while (actions_are_pending(data_set.actions)); return pcmk_ok; } int cli_resource_execute(const char *rsc_id, const char *rsc_action, GHashTable *override_hash, cib_t * cib, pe_working_set_t *data_set) { int rc = pcmk_ok; svc_action_t *op = NULL; const char *rtype = NULL; const char *rprov = NULL; const char *rclass = NULL; const char *action = NULL; GHashTable *params = NULL; resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); if (rsc == NULL) { CMD_ERR("Must supply a resource id with -r"); return -ENXIO; } if (safe_str_eq(rsc_action, "force-check")) { action = "monitor"; } else if (safe_str_eq(rsc_action, "force-stop")) { action = rsc_action+6; } else if (safe_str_eq(rsc_action, "force-start") || safe_str_eq(rsc_action, "force-demote") || safe_str_eq(rsc_action, "force-promote")) { action = rsc_action+6; if(rsc->variant >= pe_clone) { rc = cli_resource_search(rsc_id, data_set); if(rc > 0 && do_force == FALSE) { CMD_ERR("It is not safe to %s %s here: the cluster claims it is already active", action, rsc_id); CMD_ERR("Try setting target-role=stopped first or specifying --force"); crm_exit(EPERM); } } } if(rsc->variant == pe_clone || rsc->variant == pe_master) { /* Grab the first child resource in the hope it's not a group */ rsc = rsc->children->data; } if(rsc->variant == pe_group) { CMD_ERR("Sorry, --%s doesn't support group resources", rsc_action); crm_exit(EOPNOTSUPP); } rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); rprov = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); rtype = crm_element_value(rsc->xml, XML_ATTR_TYPE); - if(safe_str_eq(rclass, "stonith")){ + if (safe_str_eq(rclass, PCMK_RESOURCE_CLASS_STONITH)) { CMD_ERR("Sorry, --%s doesn't support %s resources yet", rsc_action, rclass); crm_exit(EOPNOTSUPP); } params = generate_resource_params(rsc, data_set); op = resources_action_create(rsc->id, rclass, rprov, rtype, action, 0, -1, params, 0); if(do_trace) { setenv("OCF_TRACE_RA", "1", 1); } if(op && override_hash) { GHashTableIter iter; char *name = NULL; char *value = NULL; g_hash_table_iter_init(&iter, override_hash); while (g_hash_table_iter_next(&iter, (gpointer *) & name, (gpointer *) & value)) { printf("Overriding the cluster configuration for '%s' with '%s' = '%s'\n", rsc->id, name, value); g_hash_table_replace(op->params, strdup(name), strdup(value)); } } if(op == NULL) { /* Re-run but with stderr enabled so we can display a sane error message */ crm_enable_stderr(TRUE); resources_action_create(rsc->id, rclass, rprov, rtype, action, 0, -1, params, 0); return crm_exit(EINVAL); } else if (services_action_sync(op)) { int more, lpc, last; char *local_copy = NULL; if (op->status == PCMK_LRM_OP_DONE) { printf("Operation %s for %s (%s:%s:%s) returned %d\n", action, rsc->id, rclass, rprov ? rprov : "", rtype, op->rc); } else { printf("Operation %s for %s (%s:%s:%s) failed: %d\n", action, rsc->id, rclass, rprov ? rprov : "", rtype, op->status); } if (op->stdout_data) { local_copy = strdup(op->stdout_data); more = strlen(local_copy); last = 0; for (lpc = 0; lpc < more; lpc++) { if (local_copy[lpc] == '\n' || local_copy[lpc] == 0) { local_copy[lpc] = 0; printf(" > stdout: %s\n", local_copy + last); last = lpc + 1; } } free(local_copy); } if (op->stderr_data) { local_copy = strdup(op->stderr_data); more = strlen(local_copy); last = 0; for (lpc = 0; lpc < more; lpc++) { if (local_copy[lpc] == '\n' || local_copy[lpc] == 0) { local_copy[lpc] = 0; printf(" > stderr: %s\n", local_copy + last); last = lpc + 1; } } free(local_copy); } } rc = op->rc; services_action_free(op); return rc; } int cli_resource_move(const char *rsc_id, const char *host_name, cib_t * cib, pe_working_set_t *data_set) { int rc = -EINVAL; int count = 0; node_t *current = NULL; node_t *dest = pe_find_node(data_set->nodes, host_name); resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); bool cur_is_dest = FALSE; if (rsc == NULL) { CMD_ERR("Resource '%s' not moved: not found", rsc_id); return -ENXIO; } else if (scope_master && rsc->variant < pe_master) { resource_t *p = uber_parent(rsc); if(p->variant == pe_master) { CMD_ERR("Using parent '%s' for --move command instead of '%s'.", rsc->id, rsc_id); rsc_id = p->id; rsc = p; } else { CMD_ERR("Ignoring '--master' option: not valid for %s resources.", get_resource_typename(rsc->variant)); scope_master = FALSE; } } if(rsc->variant == pe_master) { GListPtr iter = NULL; for(iter = rsc->children; iter; iter = iter->next) { resource_t *child = (resource_t *)iter->data; enum rsc_role_e child_role = child->fns->state(child, TRUE); if(child_role == RSC_ROLE_MASTER) { rsc = child; count++; } } if(scope_master == FALSE && count == 0) { count = g_list_length(rsc->running_on); } } else if (rsc->variant > pe_group) { count = g_list_length(rsc->running_on); } else if (g_list_length(rsc->running_on) > 1) { CMD_ERR("Resource '%s' not moved: active on multiple nodes", rsc_id); return rc; } if(dest == NULL) { CMD_ERR("Error performing operation: node '%s' is unknown", host_name); return -ENXIO; } if(g_list_length(rsc->running_on) == 1) { current = rsc->running_on->data; } if(current == NULL) { /* Nothing to check */ } else if(scope_master && rsc->fns->state(rsc, TRUE) != RSC_ROLE_MASTER) { crm_trace("%s is already active on %s but not in correct state", rsc_id, dest->details->uname); } else if (safe_str_eq(current->details->uname, dest->details->uname)) { cur_is_dest = TRUE; if (do_force) { crm_info("%s is already %s on %s, reinforcing placement with location constraint.", rsc_id, scope_master?"promoted":"active", dest->details->uname); } else { CMD_ERR("Error performing operation: %s is already %s on %s", rsc_id, scope_master?"promoted":"active", dest->details->uname); return rc; } } /* Clear any previous constraints for 'dest' */ cli_resource_clear(rsc_id, dest->details->uname, data_set->nodes, cib); /* Record an explicit preference for 'dest' */ rc = cli_resource_prefer(rsc_id, dest->details->uname, cib); crm_trace("%s%s now prefers node %s%s", rsc->id, scope_master?" (master)":"", dest->details->uname, do_force?"(forced)":""); /* only ban the previous location if current location != destination location. * it is possible to use -M to enforce a location without regard of where the * resource is currently located */ if(do_force && (cur_is_dest == FALSE)) { /* Ban the original location if possible */ if(current) { (void)cli_resource_ban(rsc_id, current->details->uname, NULL, cib); } else if(count > 1) { CMD_ERR("Resource '%s' is currently %s in %d locations. One may now move one to %s", rsc_id, scope_master?"promoted":"active", count, dest->details->uname); CMD_ERR("You can prevent '%s' from being %s at a specific location with:" " --ban %s--host ", rsc_id, scope_master?"promoted":"active", scope_master?"--master ":""); } else { crm_trace("Not banning %s from its current location: not active", rsc_id); } } return rc; } diff --git a/tools/fake_transition.c b/tools/fake_transition.c index c45349f545..6448c143c0 100644 --- a/tools/fake_transition.c +++ b/tools/fake_transition.c @@ -1,852 +1,853 @@ /* * Copyright (C) 2009 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "fake_transition.h" static bool fake_quiet = FALSE; static cib_t *fake_cib = NULL; static GListPtr fake_resource_list = NULL; static GListPtr fake_op_fail_list = NULL; gboolean bringing_nodes_online = FALSE; #define STATUS_PATH_MAX 512 #define quiet_log(fmt, args...) do { \ if(fake_quiet) { \ crm_trace(fmt, ##args); \ } else { \ printf(fmt , ##args); \ } \ } while(0) #define new_node_template "//"XML_CIB_TAG_NODE"[@uname='%s']" #define node_template "//"XML_CIB_TAG_STATE"[@uname='%s']" #define rsc_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']" #define op_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']/"XML_LRM_TAG_RSC_OP"[@id='%s']" /* #define op_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']/"XML_LRM_TAG_RSC_OP"[@id='%s' and @"XML_LRM_ATTR_CALLID"='%d']" */ static void inject_transient_attr(xmlNode * cib_node, const char *name, const char *value) { xmlNode *attrs = NULL; xmlNode *container = NULL; xmlNode *nvp = NULL; xmlChar *node_path; const char *node_uuid = ID(cib_node); char *nvp_id = crm_concat(name, node_uuid, '-'); node_path = xmlGetNodePath(cib_node); quiet_log("Injecting attribute %s=%s into %s '%s'", name, value, node_path, ID(cib_node)); free(node_path); attrs = first_named_child(cib_node, XML_TAG_TRANSIENT_NODEATTRS); if (attrs == NULL) { attrs = create_xml_node(cib_node, XML_TAG_TRANSIENT_NODEATTRS); crm_xml_add(attrs, XML_ATTR_ID, node_uuid); } container = first_named_child(attrs, XML_TAG_ATTR_SETS); if (container == NULL) { container = create_xml_node(attrs, XML_TAG_ATTR_SETS); crm_xml_add(container, XML_ATTR_ID, node_uuid); } nvp = create_xml_node(container, XML_CIB_TAG_NVPAIR); crm_xml_add(nvp, XML_ATTR_ID, nvp_id); crm_xml_add(nvp, XML_NVPAIR_ATTR_NAME, name); crm_xml_add(nvp, XML_NVPAIR_ATTR_VALUE, value); free(nvp_id); } static void update_failcounts(xmlNode * cib_node, const char *resource, int interval, int rc) { if (rc == 0) { return; } else if (rc == 7 && interval == 0) { return; } else { char *name = NULL; char *now = crm_itoa(time(NULL)); name = crm_failcount_name(resource); inject_transient_attr(cib_node, name, "value++"); free(name); name = crm_lastfailure_name(resource); inject_transient_attr(cib_node, name, now); free(name); free(now); } } static void create_node_entry(cib_t * cib_conn, const char *node) { int rc = pcmk_ok; int max = strlen(new_node_template) + strlen(node) + 1; char *xpath = NULL; xpath = calloc(1, max); snprintf(xpath, max, new_node_template, node); rc = cib_conn->cmds->query(cib_conn, xpath, NULL, cib_xpath | cib_sync_call | cib_scope_local); if (rc == -ENXIO) { xmlNode *cib_object = create_xml_node(NULL, XML_CIB_TAG_NODE); /* Using node uname as uuid ala corosync/openais */ crm_xml_add(cib_object, XML_ATTR_ID, node); crm_xml_add(cib_object, XML_ATTR_UNAME, node); cib_conn->cmds->create(cib_conn, XML_CIB_TAG_NODES, cib_object, cib_sync_call | cib_scope_local); /* Not bothering with subsequent query to see if it exists, we'll bomb out later in the call to query_node_uuid()... */ free_xml(cib_object); } free(xpath); } static lrmd_event_data_t * create_op(xmlNode * cib_resource, const char *task, int interval, int outcome) { lrmd_event_data_t *op = NULL; xmlNode *xop = NULL; op = calloc(1, sizeof(lrmd_event_data_t)); op->rsc_id = strdup(ID(cib_resource)); op->interval = interval; op->op_type = strdup(task); op->rc = outcome; op->op_status = 0; op->params = NULL; /* TODO: Fill me in */ op->t_run = time(NULL); op->t_rcchange = op->t_run; op->call_id = 0; for (xop = __xml_first_child(cib_resource); xop != NULL; xop = __xml_next(xop)) { int tmp = 0; crm_element_value_int(xop, XML_LRM_ATTR_CALLID, &tmp); if (tmp > op->call_id) { op->call_id = tmp; } } op->call_id++; return op; } static xmlNode * inject_op(xmlNode * cib_resource, lrmd_event_data_t * op, int target_rc) { return create_operation_update(cib_resource, op, CRM_FEATURE_SET, target_rc, NULL, crm_system_name, LOG_DEBUG_2); } static xmlNode * inject_node_state(cib_t * cib_conn, const char *node, const char *uuid) { int rc = pcmk_ok; int max = strlen(rsc_template) + strlen(node) + 1; char *xpath = NULL; xmlNode *cib_object = NULL; xpath = calloc(1, max); if (bringing_nodes_online) { create_node_entry(cib_conn, node); } snprintf(xpath, max, node_template, node); rc = cib_conn->cmds->query(cib_conn, xpath, &cib_object, cib_xpath | cib_sync_call | cib_scope_local); if (cib_object && ID(cib_object) == NULL) { crm_err("Detected multiple node_state entries for xpath=%s, bailing", xpath); crm_log_xml_warn(cib_object, "Duplicates"); crm_exit(ENOTUNIQ); } if (rc == -ENXIO) { char *found_uuid = NULL; if (uuid == NULL) { query_node_uuid(cib_conn, node, &found_uuid, NULL); } else { found_uuid = strdup(uuid); } cib_object = create_xml_node(NULL, XML_CIB_TAG_STATE); crm_xml_add(cib_object, XML_ATTR_UUID, found_uuid); crm_xml_add(cib_object, XML_ATTR_UNAME, node); cib_conn->cmds->create(cib_conn, XML_CIB_TAG_STATUS, cib_object, cib_sync_call | cib_scope_local); free_xml(cib_object); free(found_uuid); rc = cib_conn->cmds->query(cib_conn, xpath, &cib_object, cib_xpath | cib_sync_call | cib_scope_local); crm_trace("injecting node state for %s. rc is %d", node, rc); } free(xpath); CRM_ASSERT(rc == pcmk_ok); return cib_object; } static xmlNode * modify_node(cib_t * cib_conn, char *node, gboolean up) { xmlNode *cib_node = inject_node_state(cib_conn, node, NULL); if (up) { crm_xml_add(cib_node, XML_NODE_IN_CLUSTER, XML_BOOLEAN_YES); crm_xml_add(cib_node, XML_NODE_IS_PEER, ONLINESTATUS); crm_xml_add(cib_node, XML_NODE_JOIN_STATE, CRMD_JOINSTATE_MEMBER); crm_xml_add(cib_node, XML_NODE_EXPECTED, CRMD_JOINSTATE_MEMBER); } else { crm_xml_add(cib_node, XML_NODE_IN_CLUSTER, XML_BOOLEAN_NO); crm_xml_add(cib_node, XML_NODE_IS_PEER, OFFLINESTATUS); crm_xml_add(cib_node, XML_NODE_JOIN_STATE, CRMD_JOINSTATE_DOWN); crm_xml_add(cib_node, XML_NODE_EXPECTED, CRMD_JOINSTATE_DOWN); } crm_xml_add(cib_node, XML_ATTR_ORIGIN, crm_system_name); return cib_node; } static xmlNode * find_resource_xml(xmlNode * cib_node, const char *resource) { char *xpath = NULL; xmlNode *match = NULL; const char *node = crm_element_value(cib_node, XML_ATTR_UNAME); int max = strlen(rsc_template) + strlen(resource) + strlen(node) + 1; xpath = calloc(1, max); snprintf(xpath, max, rsc_template, node, resource); match = get_xpath_object(xpath, cib_node, LOG_DEBUG_2); free(xpath); return match; } static xmlNode * inject_resource(xmlNode * cib_node, const char *resource, const char *rclass, const char *rtype, const char *rprovider) { xmlNode *lrm = NULL; xmlNode *container = NULL; xmlNode *cib_resource = NULL; char *xpath = NULL; cib_resource = find_resource_xml(cib_node, resource); if (cib_resource != NULL) { return cib_resource; } /* One day, add query for class, provider, type */ if (rclass == NULL || rtype == NULL) { fprintf(stderr, "Resource %s not found in the status section of %s." " Please supply the class and type to continue\n", resource, ID(cib_node)); return NULL; - } else if (safe_str_neq(rclass, "ocf") - && safe_str_neq(rclass, "stonith") - && safe_str_neq(rclass, "heartbeat") - && safe_str_neq(rclass, "service") - && safe_str_neq(rclass, "upstart") - && safe_str_neq(rclass, "systemd") - && safe_str_neq(rclass, "lsb")) { + } else if (safe_str_neq(rclass, PCMK_RESOURCE_CLASS_OCF) + && safe_str_neq(rclass, PCMK_RESOURCE_CLASS_STONITH) + && safe_str_neq(rclass, PCMK_RESOURCE_CLASS_HB) + && safe_str_neq(rclass, PCMK_RESOURCE_CLASS_SERVICE) + && safe_str_neq(rclass, PCMK_RESOURCE_CLASS_UPSTART) + && safe_str_neq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD) + && safe_str_neq(rclass, PCMK_RESOURCE_CLASS_LSB)) { fprintf(stderr, "Invalid class for %s: %s\n", resource, rclass); return NULL; - } else if (safe_str_eq(rclass, "ocf") && rprovider == NULL) { + } else if (safe_str_eq(rclass, PCMK_RESOURCE_CLASS_OCF) + && rprovider == NULL) { fprintf(stderr, "Please specify the provider for resource %s\n", resource); return NULL; } xpath = (char *)xmlGetNodePath(cib_node); crm_info("Injecting new resource %s into %s '%s'", resource, xpath, ID(cib_node)); free(xpath); lrm = first_named_child(cib_node, XML_CIB_TAG_LRM); if (lrm == NULL) { const char *node_uuid = ID(cib_node); lrm = create_xml_node(cib_node, XML_CIB_TAG_LRM); crm_xml_add(lrm, XML_ATTR_ID, node_uuid); } container = first_named_child(lrm, XML_LRM_TAG_RESOURCES); if (container == NULL) { container = create_xml_node(lrm, XML_LRM_TAG_RESOURCES); } cib_resource = create_xml_node(container, XML_LRM_TAG_RESOURCE); crm_xml_add(cib_resource, XML_ATTR_ID, resource); crm_xml_add(cib_resource, XML_AGENT_ATTR_CLASS, rclass); crm_xml_add(cib_resource, XML_AGENT_ATTR_PROVIDER, rprovider); crm_xml_add(cib_resource, XML_ATTR_TYPE, rtype); return cib_resource; } static int find_ticket_state(cib_t * the_cib, const char *ticket_id, xmlNode ** ticket_state_xml) { int offset = 0; static int xpath_max = 1024; int rc = pcmk_ok; xmlNode *xml_search = NULL; char *xpath_string = NULL; CRM_ASSERT(ticket_state_xml != NULL); *ticket_state_xml = NULL; xpath_string = calloc(1, xpath_max); offset += snprintf(xpath_string + offset, xpath_max - offset, "%s", "/cib/status/tickets"); if (ticket_id) { offset += snprintf(xpath_string + offset, xpath_max - offset, "/%s[@id=\"%s\"]", XML_CIB_TAG_TICKET_STATE, ticket_id); } CRM_LOG_ASSERT(offset > 0); rc = the_cib->cmds->query(the_cib, xpath_string, &xml_search, cib_sync_call | cib_scope_local | cib_xpath); if (rc != pcmk_ok) { goto bail; } crm_log_xml_debug(xml_search, "Match"); if (xml_has_children(xml_search)) { if (ticket_id) { fprintf(stdout, "Multiple ticket_states match ticket_id=%s\n", ticket_id); } *ticket_state_xml = xml_search; } else { *ticket_state_xml = xml_search; } bail: free(xpath_string); return rc; } static int set_ticket_state_attr(const char *ticket_id, const char *attr_name, const char *attr_value, cib_t * cib, int cib_options) { int rc = pcmk_ok; xmlNode *xml_top = NULL; xmlNode *ticket_state_xml = NULL; rc = find_ticket_state(cib, ticket_id, &ticket_state_xml); if (rc == pcmk_ok) { crm_debug("Found a match state for ticket: id=%s", ticket_id); xml_top = ticket_state_xml; } else if (rc != -ENXIO) { return rc; } else { xmlNode *xml_obj = NULL; xml_top = create_xml_node(NULL, XML_CIB_TAG_STATUS); xml_obj = create_xml_node(xml_top, XML_CIB_TAG_TICKETS); ticket_state_xml = create_xml_node(xml_obj, XML_CIB_TAG_TICKET_STATE); crm_xml_add(ticket_state_xml, XML_ATTR_ID, ticket_id); } crm_xml_add(ticket_state_xml, attr_name, attr_value); crm_log_xml_debug(xml_top, "Update"); rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, xml_top, cib_options); free_xml(xml_top); return rc; } void modify_configuration(pe_working_set_t * data_set, cib_t *cib, const char *quorum, const char *watchdog, GListPtr node_up, GListPtr node_down, GListPtr node_fail, GListPtr op_inject, GListPtr ticket_grant, GListPtr ticket_revoke, GListPtr ticket_standby, GListPtr ticket_activate) { int rc = pcmk_ok; GListPtr gIter = NULL; xmlNode *cib_op = NULL; xmlNode *cib_node = NULL; xmlNode *cib_resource = NULL; lrmd_event_data_t *op = NULL; if (quorum) { xmlNode *top = create_xml_node(NULL, XML_TAG_CIB); quiet_log(" + Setting quorum: %s\n", quorum); /* crm_xml_add(top, XML_ATTR_DC_UUID, dc_uuid); */ crm_xml_add(top, XML_ATTR_HAVE_QUORUM, quorum); rc = cib->cmds->modify(cib, NULL, top, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); } if (watchdog) { quiet_log(" + Setting watchdog: %s\n", watchdog); rc = update_attr_delegate(cib, cib_sync_call | cib_scope_local, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, XML_ATTR_HAVE_WATCHDOG, watchdog, FALSE, NULL, NULL); CRM_ASSERT(rc == pcmk_ok); } for (gIter = node_up; gIter != NULL; gIter = gIter->next) { char *node = (char *)gIter->data; quiet_log(" + Bringing node %s online\n", node); cib_node = modify_node(cib, node, TRUE); CRM_ASSERT(cib_node != NULL); rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, cib_node, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); free_xml(cib_node); } for (gIter = node_down; gIter != NULL; gIter = gIter->next) { char xpath[STATUS_PATH_MAX]; char *node = (char *)gIter->data; quiet_log(" + Taking node %s offline\n", node); cib_node = modify_node(cib, node, FALSE); CRM_ASSERT(cib_node != NULL); rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, cib_node, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); free_xml(cib_node); snprintf(xpath, STATUS_PATH_MAX, "//node_state[@uname='%s']/%s", node, XML_CIB_TAG_LRM); cib->cmds->delete(cib, xpath, NULL, cib_xpath | cib_sync_call | cib_scope_local); snprintf(xpath, STATUS_PATH_MAX, "//node_state[@uname='%s']/%s", node, XML_TAG_TRANSIENT_NODEATTRS); cib->cmds->delete(cib, xpath, NULL, cib_xpath | cib_sync_call | cib_scope_local); } for (gIter = node_fail; gIter != NULL; gIter = gIter->next) { char *node = (char *)gIter->data; quiet_log(" + Failing node %s\n", node); cib_node = modify_node(cib, node, TRUE); crm_xml_add(cib_node, XML_NODE_IN_CLUSTER, XML_BOOLEAN_NO); CRM_ASSERT(cib_node != NULL); rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, cib_node, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); free_xml(cib_node); } for (gIter = ticket_grant; gIter != NULL; gIter = gIter->next) { char *ticket_id = (char *)gIter->data; quiet_log(" + Granting ticket %s\n", ticket_id); rc = set_ticket_state_attr(ticket_id, "granted", "true", cib, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); } for (gIter = ticket_revoke; gIter != NULL; gIter = gIter->next) { char *ticket_id = (char *)gIter->data; quiet_log(" + Revoking ticket %s\n", ticket_id); rc = set_ticket_state_attr(ticket_id, "granted", "false", cib, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); } for (gIter = ticket_standby; gIter != NULL; gIter = gIter->next) { char *ticket_id = (char *)gIter->data; quiet_log(" + Making ticket %s standby\n", ticket_id); rc = set_ticket_state_attr(ticket_id, "standby", "true", cib, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); } for (gIter = ticket_activate; gIter != NULL; gIter = gIter->next) { char *ticket_id = (char *)gIter->data; quiet_log(" + Activating ticket %s\n", ticket_id); rc = set_ticket_state_attr(ticket_id, "standby", "false", cib, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); } for (gIter = op_inject; gIter != NULL; gIter = gIter->next) { char *spec = (char *)gIter->data; int rc = 0; int outcome = 0; int interval = 0; char *key = NULL; char *node = NULL; char *task = NULL; char *resource = NULL; const char *rtype = NULL; const char *rclass = NULL; const char *rprovider = NULL; resource_t *rsc = NULL; quiet_log(" + Injecting %s into the configuration\n", spec); key = calloc(1, strlen(spec) + 1); node = calloc(1, strlen(spec) + 1); rc = sscanf(spec, "%[^@]@%[^=]=%d", key, node, &outcome); if (rc != 3) { fprintf(stderr, "Invalid operation spec: %s. Only found %d fields\n", spec, rc); free(key); free(node); continue; } parse_op_key(key, &resource, &task, &interval); rsc = pe_find_resource(data_set->resources, resource); if (rsc == NULL) { fprintf(stderr, " - Invalid resource name: %s\n", resource); } else { rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); rtype = crm_element_value(rsc->xml, XML_ATTR_TYPE); rprovider = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); cib_node = inject_node_state(cib, node, NULL); CRM_ASSERT(cib_node != NULL); update_failcounts(cib_node, resource, interval, outcome); cib_resource = inject_resource(cib_node, resource, rclass, rtype, rprovider); CRM_ASSERT(cib_resource != NULL); op = create_op(cib_resource, task, interval, outcome); CRM_ASSERT(op != NULL); cib_op = inject_op(cib_resource, op, 0); CRM_ASSERT(cib_op != NULL); lrmd_free_event(op); rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, cib_node, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); } free(task); free(node); free(key); } } static gboolean exec_pseudo_action(crm_graph_t * graph, crm_action_t * action) { const char *node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); action->confirmed = TRUE; quiet_log(" * Pseudo action: %s%s%s\n", task, node ? " on " : "", node ? node : ""); update_graph(graph, action); return TRUE; } static gboolean exec_rsc_action(crm_graph_t * graph, crm_action_t * action) { int rc = 0; GListPtr gIter = NULL; lrmd_event_data_t *op = NULL; int target_outcome = 0; gboolean uname_is_uuid = FALSE; const char *rtype = NULL; const char *rclass = NULL; const char *resource = NULL; const char *rprovider = NULL; const char *operation = crm_element_value(action->xml, "operation"); const char *target_rc_s = crm_meta_value(action->params, XML_ATTR_TE_TARGET_RC); xmlNode *cib_node = NULL; xmlNode *cib_resource = NULL; xmlNode *action_rsc = first_named_child(action->xml, XML_CIB_TAG_RESOURCE); char *node = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET); char *uuid = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET_UUID); const char *router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); if (safe_str_eq(operation, CRM_OP_PROBED) || safe_str_eq(operation, CRM_OP_REPROBE)) { crm_info("Skipping %s op for %s", operation, node); goto done; } if (action_rsc == NULL) { crm_log_xml_err(action->xml, "Bad"); free(node); free(uuid); return FALSE; } /* Look for the preferred name * If not found, try the expected 'local' name * If not found use the preferred name anyway */ resource = crm_element_value(action_rsc, XML_ATTR_ID); if (pe_find_resource(fake_resource_list, resource) == NULL) { const char *longname = crm_element_value(action_rsc, XML_ATTR_ID_LONG); if (pe_find_resource(fake_resource_list, longname)) { resource = longname; } } if (safe_str_eq(operation, "delete")) { quiet_log(" * Resource action: %-15s delete on %s\n", resource, node); goto done; } rclass = crm_element_value(action_rsc, XML_AGENT_ATTR_CLASS); rtype = crm_element_value(action_rsc, XML_ATTR_TYPE); rprovider = crm_element_value(action_rsc, XML_AGENT_ATTR_PROVIDER); if (target_rc_s != NULL) { target_outcome = crm_parse_int(target_rc_s, "0"); } CRM_ASSERT(fake_cib->cmds->query(fake_cib, NULL, NULL, cib_sync_call | cib_scope_local) == pcmk_ok); if (router_node) { uname_is_uuid = TRUE; } cib_node = inject_node_state(fake_cib, node, uname_is_uuid ? node : uuid); CRM_ASSERT(cib_node != NULL); cib_resource = inject_resource(cib_node, resource, rclass, rtype, rprovider); CRM_ASSERT(cib_resource != NULL); op = convert_graph_action(cib_resource, action, 0, target_outcome); if (op->interval) { quiet_log(" * Resource action: %-15s %s=%d on %s\n", resource, op->op_type, op->interval, node); } else { quiet_log(" * Resource action: %-15s %s on %s\n", resource, op->op_type, node); } for (gIter = fake_op_fail_list; gIter != NULL; gIter = gIter->next) { char *spec = (char *)gIter->data; char *key = NULL; key = calloc(1, 1 + strlen(spec)); snprintf(key, strlen(spec), "%s_%s_%d@%s=", resource, op->op_type, op->interval, node); if (strncasecmp(key, spec, strlen(key)) == 0) { sscanf(spec, "%*[^=]=%d", (int *)&op->rc); action->failed = TRUE; graph->abort_priority = INFINITY; printf("\tPretending action %d failed with rc=%d\n", action->id, op->rc); update_failcounts(cib_node, resource, op->interval, op->rc); free(key); break; } free(key); } inject_op(cib_resource, op, target_outcome); lrmd_free_event(op); rc = fake_cib->cmds->modify(fake_cib, XML_CIB_TAG_STATUS, cib_node, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); done: free(node); free(uuid); free_xml(cib_node); action->confirmed = TRUE; update_graph(graph, action); return TRUE; } static gboolean exec_crmd_action(crm_graph_t * graph, crm_action_t * action) { const char *node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); xmlNode *rsc = first_named_child(action->xml, XML_CIB_TAG_RESOURCE); action->confirmed = TRUE; if(rsc) { quiet_log(" * Cluster action: %s for %s on %s\n", task, ID(rsc), node); } else { quiet_log(" * Cluster action: %s on %s\n", task, node); } update_graph(graph, action); return TRUE; } static gboolean exec_stonith_action(crm_graph_t * graph, crm_action_t * action) { const char *op = crm_meta_value(action->params, "stonith_action"); char *target = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET); quiet_log(" * Fencing %s (%s)\n", target, op); if(safe_str_neq(op, "on")) { int rc = 0; char xpath[STATUS_PATH_MAX]; xmlNode *cib_node = modify_node(fake_cib, target, FALSE); crm_xml_add(cib_node, XML_ATTR_ORIGIN, __FUNCTION__); CRM_ASSERT(cib_node != NULL); rc = fake_cib->cmds->replace(fake_cib, XML_CIB_TAG_STATUS, cib_node, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); snprintf(xpath, STATUS_PATH_MAX, "//node_state[@uname='%s']/%s", target, XML_CIB_TAG_LRM); fake_cib->cmds->delete(fake_cib, xpath, NULL, cib_xpath | cib_sync_call | cib_scope_local); snprintf(xpath, STATUS_PATH_MAX, "//node_state[@uname='%s']/%s", target, XML_TAG_TRANSIENT_NODEATTRS); fake_cib->cmds->delete(fake_cib, xpath, NULL, cib_xpath | cib_sync_call | cib_scope_local); free_xml(cib_node); } action->confirmed = TRUE; update_graph(graph, action); free(target); return TRUE; } int run_simulation(pe_working_set_t * data_set, cib_t *cib, GListPtr op_fail_list, bool quiet) { crm_graph_t *transition = NULL; enum transition_status graph_rc = -1; crm_graph_functions_t exec_fns = { exec_pseudo_action, exec_rsc_action, exec_crmd_action, exec_stonith_action, }; fake_cib = cib; fake_quiet = quiet; fake_op_fail_list = op_fail_list; quiet_log("\nExecuting cluster transition:\n"); set_graph_functions(&exec_fns); transition = unpack_graph(data_set->graph, crm_system_name); print_graph(LOG_DEBUG, transition); fake_resource_list = data_set->resources; do { graph_rc = run_graph(transition); } while (graph_rc == transition_active); fake_resource_list = NULL; if (graph_rc != transition_complete) { fprintf(stdout, "Transition failed: %s\n", transition_status(graph_rc)); print_graph(LOG_ERR, transition); } destroy_graph(transition); if (graph_rc != transition_complete) { fprintf(stdout, "An invalid transition was produced\n"); } if (quiet == FALSE) { xmlNode *cib_object = NULL; int rc = fake_cib->cmds->query(fake_cib, NULL, &cib_object, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); cleanup_alloc_calculations(data_set); data_set->input = cib_object; } if (graph_rc != transition_complete) { return graph_rc; } return 0; }