diff --git a/crmd/lrm.c b/crmd/lrm.c index 5289c6fa73..daa50d602a 100644 --- a/crmd/lrm.c +++ b/crmd/lrm.c @@ -1,1994 +1,1999 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #define START_DELAY_THRESHOLD 5 * 60 * 1000 typedef struct resource_history_s { char *id; lrmd_rsc_info_t rsc; lrmd_event_data_t *last; lrmd_event_data_t *failed; GList *recurring_op_list; /* Resources must be stopped using the same * parameters they were started with. This hashtable * holds the parameters that should be used for the next stop * cmd on this resource. */ GHashTable *stop_params; } rsc_history_t; struct recurring_op_s { char *rsc_id; char *op_type; char *op_key; int call_id; int interval; gboolean remove; gboolean cancelled; }; struct pending_deletion_op_s { char *rsc; ha_msg_input_t *input; }; struct delete_event_s { int rc; const char *rsc; }; GHashTable *resource_history = NULL; GHashTable *pending_ops = NULL; GHashTable *deletion_ops = NULL; int num_lrm_register_fails = 0; int max_lrm_register_fails = 30; gboolean process_lrm_event(lrmd_event_data_t * op); gboolean is_rsc_active(const char *rsc_id); gboolean build_active_RAs(xmlNode * rsc_list); static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data); static int delete_rsc_status(const char *rsc_id, int call_options, const char *user_name); static lrmd_event_data_t *construct_op(xmlNode * rsc_op, const char *rsc_id, const char *operation); void do_lrm_rsc_op(lrmd_rsc_info_t * rsc, const char *operation, xmlNode * msg, xmlNode * request); void send_direct_ack(const char *to_host, const char *to_sys, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id); static void lrm_connection_destroy(void) { if (is_set(fsa_input_register, R_LRM_CONNECTED)) { crm_crit("LRM Connection failed"); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); clear_bit(fsa_input_register, R_LRM_CONNECTED); } else { crm_info("LRM Connection disconnected"); } } static void free_deletion_op(gpointer value) { struct pending_deletion_op_s *op = value; free(op->rsc); delete_ha_msg_input(op->input); free(op); } static void free_recurring_op(gpointer value) { struct recurring_op_s *op = (struct recurring_op_s *)value; free(op->rsc_id); free(op->op_type); free(op->op_key); free(op); } static char * make_stop_id(const char *rsc, int call_id) { char *op_id = NULL; op_id = calloc(1, strlen(rsc) + 34); if (op_id != NULL) { snprintf(op_id, strlen(rsc) + 34, "%s:%d", rsc, call_id); } return op_id; } static void copy_instance_keys(gpointer key, gpointer value, gpointer user_data) { if (strstr(key, CRM_META "_") == NULL) { g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value)); } } static void copy_meta_keys(gpointer key, gpointer value, gpointer user_data) { if (strstr(key, CRM_META "_") != NULL) { g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value)); } } static void history_cache_destroy(gpointer data) { rsc_history_t *entry = data; if (entry->stop_params) { g_hash_table_destroy(entry->stop_params); } free(entry->rsc.type); free(entry->rsc.class); free(entry->rsc.provider); lrmd_free_event(entry->failed); lrmd_free_event(entry->last); free(entry->id); free(entry); } static void update_history_cache(lrmd_rsc_info_t * rsc, lrmd_event_data_t * op) { int target_rc = 0; rsc_history_t *entry = NULL; if (op->rsc_deleted) { crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type); delete_rsc_status(op->rsc_id, cib_quorum_override, NULL); return; } if (safe_str_eq(op->op_type, RSC_NOTIFY)) { return; } crm_debug("Updating history for '%s' with %s op", op->rsc_id, op->op_type); entry = g_hash_table_lookup(resource_history, op->rsc_id); if (entry == NULL && rsc) { entry = calloc(1, sizeof(rsc_history_t)); entry->id = strdup(op->rsc_id); g_hash_table_insert(resource_history, entry->id, entry); entry->rsc.id = entry->id; entry->rsc.type = strdup(rsc->type); entry->rsc.class = strdup(rsc->class); if (rsc->provider) { entry->rsc.provider = strdup(rsc->provider); } else { entry->rsc.provider = NULL; } } else if (entry == NULL) { crm_info("Resource %s no longer exists, not updating cache", op->rsc_id); return; } target_rc = rsc_op_expected_rc(op); if (op->op_status == PCMK_LRM_OP_CANCELLED) { if (op->interval > 0) { GList *gIter, *gIterNext; crm_trace("Removing cancelled recurring op: %s_%s_%d", op->rsc_id, op->op_type, op->interval); for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIterNext) { lrmd_event_data_t *existing = gIter->data; gIterNext = gIter->next; if (safe_str_eq(op->rsc_id, existing->rsc_id) && safe_str_eq(op->op_type, existing->op_type) && op->interval == existing->interval) { lrmd_free_event(existing); entry->recurring_op_list = g_list_delete_link(entry->recurring_op_list, gIter); } } return; } else { crm_trace("Skipping %s_%s_%d rc=%d, status=%d", op->rsc_id, op->op_type, op->interval, op->rc, op->op_status); } } else if (did_rsc_op_fail(op, target_rc)) { /* We must store failed monitors here * - otherwise the block below will cause them to be forgetten them when a stop happens */ if (entry->failed) { lrmd_free_event(entry->failed); } entry->failed = lrmd_copy_event(op); } else if (op->interval == 0) { if (entry->last) { lrmd_free_event(entry->last); } entry->last = lrmd_copy_event(op); if (op->params && (safe_str_eq(CRMD_ACTION_START, op->op_type) || safe_str_eq(CRMD_ACTION_STATUS, op->op_type))) { if (entry->stop_params) { g_hash_table_destroy(entry->stop_params); } entry->stop_params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); g_hash_table_foreach(op->params, copy_instance_keys, entry->stop_params); } } if (op->interval > 0) { crm_trace("Adding recurring op: %s_%s_%d", op->rsc_id, op->op_type, op->interval); entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, lrmd_copy_event(op)); } else if (entry->recurring_op_list && safe_str_eq(op->op_type, RSC_STATUS) == FALSE) { GList *gIter = entry->recurring_op_list; crm_trace("Dropping %d recurring ops because of: %s_%s_%d", g_list_length(gIter), op->rsc_id, op->op_type, op->interval); for (; gIter != NULL; gIter = gIter->next) { lrmd_free_event(gIter->data); } g_list_free(entry->recurring_op_list); entry->recurring_op_list = NULL; } } /* A_LRM_CONNECT */ void do_lrm_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { if (fsa_lrm_conn == NULL) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } if (action & A_LRM_DISCONNECT) { if (verify_stopped(cur_state, LOG_INFO) == FALSE) { if(action == A_LRM_DISCONNECT) { crmd_fsa_stall(NULL); return; } } if (is_set(fsa_input_register, R_LRM_CONNECTED)) { clear_bit(fsa_input_register, R_LRM_CONNECTED); fsa_lrm_conn->cmds->disconnect(fsa_lrm_conn); crm_info("Disconnected from the LRM"); } if(resource_history) { g_hash_table_destroy(resource_history); resource_history = NULL; } if(deletion_ops) { g_hash_table_destroy(deletion_ops); deletion_ops = NULL; } if(pending_ops) { g_hash_table_destroy(pending_ops); pending_ops = NULL; } } if (action & A_LRM_CONNECT) { int ret = pcmk_ok; deletion_ops = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, free_deletion_op); pending_ops = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, free_recurring_op); resource_history = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, history_cache_destroy); crm_debug("Connecting to the LRM"); ret = fsa_lrm_conn->cmds->connect(fsa_lrm_conn, CRM_SYSTEM_CRMD, NULL); if (ret != pcmk_ok) { if (++num_lrm_register_fails < max_lrm_register_fails) { crm_warn("Failed to sign on to the LRM %d" " (%d max) times", num_lrm_register_fails, max_lrm_register_fails); crm_timer_start(wait_timer); crmd_fsa_stall(NULL); return; } } if (ret == pcmk_ok) { crm_trace("LRM: set_lrm_callback..."); fsa_lrm_conn->cmds->set_callback(fsa_lrm_conn, lrm_op_callback); } if (ret != pcmk_ok) { crm_err("Failed to sign on to the LRM %d" " (max) times", num_lrm_register_fails); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } set_bit(fsa_input_register, R_LRM_CONNECTED); crm_debug("LRM connection established"); } if (action & ~(A_LRM_CONNECT | A_LRM_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } } static void ghash_print_pending(gpointer key, gpointer value, gpointer user_data) { const char *stop_id = key; int *log_level = user_data; struct recurring_op_s *pending = value; do_crm_log(*log_level, "Pending action: %s (%s)", stop_id, pending->op_key); } static void ghash_print_pending_for_rsc(gpointer key, gpointer value, gpointer user_data) { const char *stop_id = key; char *rsc = user_data; struct recurring_op_s *pending = value; if (safe_str_eq(rsc, pending->rsc_id)) { crm_notice("%sction %s (%s) incomplete at shutdown", pending->interval == 0 ? "A" : "Recurring a", stop_id, pending->op_key); } } static void ghash_count_pending(gpointer key, gpointer value, gpointer user_data) { int *counter = user_data; struct recurring_op_s *pending = value; if (pending->interval > 0) { /* Ignore recurring actions in the shutdown calculations */ return; } (*counter)++; } gboolean verify_stopped(enum crmd_fsa_state cur_state, int log_level) { int counter = 0; gboolean rc = TRUE; GHashTableIter gIter; rsc_history_t *entry = NULL; crm_debug("Checking for active resources before exit"); if (cur_state == S_TERMINATE) { log_level = LOG_ERR; } if (pending_ops) { if (is_set(fsa_input_register, R_LRM_CONNECTED)) { /* Only log/complain about non-recurring actions */ g_hash_table_foreach_remove(pending_ops, stop_recurring_actions, NULL); } g_hash_table_foreach(pending_ops, ghash_count_pending, &counter); } if (counter > 0) { rc = FALSE; do_crm_log(log_level, "%d pending LRM operations at shutdown%s", counter, cur_state == S_TERMINATE ? "" : "... waiting"); if (cur_state == S_TERMINATE || !is_set(fsa_input_register, R_SENT_RSC_STOP)) { g_hash_table_foreach(pending_ops, ghash_print_pending, &log_level); } goto bail; } if (resource_history == NULL) { goto bail; } g_hash_table_iter_init(&gIter, resource_history); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { if (is_rsc_active(entry->id) == FALSE) { continue; } crm_err("Resource %s was active at shutdown." " You may ignore this error if it is unmanaged.", entry->id); g_hash_table_foreach(pending_ops, ghash_print_pending_for_rsc, entry->id); } bail: set_bit(fsa_input_register, R_SENT_RSC_STOP); if (cur_state == S_TERMINATE) { rc = TRUE; } return rc; } static char * get_rsc_metadata(const char *type, const char *class, const char *provider) { char *metadata = NULL; CRM_CHECK(type != NULL, return NULL); CRM_CHECK(class != NULL, return NULL); if (provider == NULL) { provider = "heartbeat"; } crm_trace("Retreiving metadata for %s::%s:%s", type, class, provider); fsa_lrm_conn->cmds->get_metadata(fsa_lrm_conn, class, provider, type, &metadata, 0); if (metadata) { /* copy the metadata because the LRM likes using * g_alloc instead of cl_malloc */ char *m_copy = strdup(metadata); g_free(metadata); metadata = m_copy; } else { crm_warn("No metadata found for %s::%s:%s", type, class, provider); } return metadata; } typedef struct reload_data_s { char *key; char *metadata; time_t last_query; gboolean can_reload; GListPtr restart_list; } reload_data_t; static void g_hash_destroy_reload(gpointer data) { reload_data_t *reload = data; free(reload->key); free(reload->metadata); g_list_free_full(reload->restart_list, free); free(reload); } GHashTable *reload_hash = NULL; static GListPtr get_rsc_restart_list(lrmd_rsc_info_t * rsc, lrmd_event_data_t * op) { int len = 0; char *key = NULL; char *copy = NULL; const char *value = NULL; const char *provider = NULL; xmlNode *param = NULL; xmlNode *params = NULL; xmlNode *actions = NULL; xmlNode *metadata = NULL; time_t now = time(NULL); reload_data_t *reload = NULL; if (reload_hash == NULL) { reload_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, g_hash_destroy_reload); } provider = rsc->provider; if (provider == NULL) { provider = "heartbeat"; } len = strlen(rsc->type) + strlen(rsc->class) + strlen(provider) + 4; /* coverity[returned_null] Ignore */ key = malloc( len); snprintf(key, len, "%s::%s:%s", rsc->type, rsc->class, provider); reload = g_hash_table_lookup(reload_hash, key); if (reload && ((now - 9) > reload->last_query) && safe_str_eq(op->op_type, RSC_START)) { reload = NULL; /* re-query */ } if (reload == NULL) { xmlNode *action = NULL; reload = calloc(1, sizeof(reload_data_t)); g_hash_table_replace(reload_hash, key, reload); reload->last_query = now; reload->key = key; key = NULL; reload->metadata = get_rsc_metadata(rsc->type, rsc->class, provider); metadata = string2xml(reload->metadata); if (metadata == NULL) { crm_err("Metadata for %s::%s:%s is not valid XML", rsc->provider, rsc->class, rsc->type); goto cleanup; } actions = find_xml_node(metadata, "actions", TRUE); for (action = __xml_first_child(actions); action != NULL; action = __xml_next(action)) { if (crm_str_eq((const char *)action->name, "action", TRUE)) { value = crm_element_value(action, "name"); if (safe_str_eq("reload", value)) { reload->can_reload = TRUE; break; } } } if (reload->can_reload == FALSE) { goto cleanup; } params = find_xml_node(metadata, "parameters", TRUE); for (param = __xml_first_child(params); param != NULL; param = __xml_next(param)) { if (crm_str_eq((const char *)param->name, "parameter", TRUE)) { value = crm_element_value(param, "unique"); if (crm_is_true(value)) { value = crm_element_value(param, "name"); if (value == NULL) { crm_err("%s: NULL param", key); continue; } crm_debug("Attr %s is not reloadable", value); copy = strdup(value); CRM_CHECK(copy != NULL, continue); reload->restart_list = g_list_append(reload->restart_list, copy); } } } } cleanup: free(key); free_xml(metadata); return reload->restart_list; } static void append_restart_list(lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, xmlNode * update, const char *version) { int len = 0; char *list = NULL; char *digest = NULL; const char *value = NULL; xmlNode *restart = NULL; GListPtr restart_list = NULL; GListPtr lpc = NULL; if (op->interval > 0) { /* monitors are not reloadable */ return; } else if (op->params == NULL) { crm_debug("%s has no parameters", ID(update)); return; } else if (rsc == NULL) { return; } else if (crm_str_eq(CRMD_ACTION_STOP, op->op_type, TRUE)) { /* Stopped resources don't need to be reloaded */ return; } else if (compare_version("1.0.8", version) > 0) { /* Caller version does not support reloads */ return; } restart_list = get_rsc_restart_list(rsc, op); if (restart_list == NULL) { /* Resource does not support reloads */ return; } restart = create_xml_node(NULL, XML_TAG_PARAMS); for (lpc = restart_list; lpc != NULL; lpc = lpc->next) { const char *param = (const char *)lpc->data; int start = len; CRM_CHECK(param != NULL, continue); value = g_hash_table_lookup(op->params, param); if (value != NULL) { crm_xml_add(restart, param, value); } len += strlen(param) + 2; list = realloc(list, len + 1); sprintf(list + start, " %s ", param); } digest = calculate_operation_digest(restart, version); crm_xml_add(update, XML_LRM_ATTR_OP_RESTART, list); crm_xml_add(update, XML_LRM_ATTR_RESTART_DIGEST, digest); crm_trace("%s: %s, %s", rsc->id, digest, list); crm_log_xml_trace(restart, "restart digest source"); free_xml(restart); free(digest); free(list); } static gboolean build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *src) { int target_rc = 0; xmlNode *xml_op = NULL; const char *caller_version = CRM_FEATURE_SET; if (op == NULL) { return FALSE; } else if (AM_I_DC) { } else if (fsa_our_dc_version != NULL) { caller_version = fsa_our_dc_version; } else if (op->params == NULL) { caller_version = fsa_our_dc_version; } else { /* there is a small risk in formerly mixed clusters that * it will be sub-optimal. * however with our upgrade policy, the update we send * should still be completely supported anyway */ caller_version = g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION); crm_debug("Falling back to operation originator version: %s", caller_version); } target_rc = rsc_op_expected_rc(op); xml_op = create_operation_update(parent, op, caller_version, target_rc, src, LOG_DEBUG); if (xml_op) { append_restart_list(rsc, op, xml_op, caller_version); } return TRUE; } gboolean is_rsc_active(const char *rsc_id) { rsc_history_t *entry = NULL; crm_trace("Processing lrmd_rsc_info_t entry %s", rsc_id); entry = g_hash_table_lookup(resource_history, rsc_id); if (entry == NULL || entry->last == NULL) { return FALSE; } if (entry->last->rc == PCMK_EXECRA_OK && safe_str_eq(entry->last->op_type, CRMD_ACTION_STOP)) { return FALSE; } else if (entry->last->rc == PCMK_EXECRA_OK && safe_str_eq(entry->last->op_type, CRMD_ACTION_MIGRATE)) { /* a stricter check is too complex... * leave that to the PE */ return FALSE; } else if (entry->last->rc == PCMK_EXECRA_NOT_RUNNING) { return FALSE; } else if (entry->last->interval == 0 && entry->last->rc == PCMK_EXECRA_NOT_CONFIGURED) { /* Badly configured resources can't be reliably stopped */ return FALSE; } return TRUE; } gboolean build_active_RAs(xmlNode * rsc_list) { GHashTableIter iter; rsc_history_t *entry = NULL; g_hash_table_iter_init(&iter, resource_history); while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) { GList *gIter = NULL; xmlNode *xml_rsc = create_xml_node(rsc_list, XML_LRM_TAG_RESOURCE); crm_xml_add(xml_rsc, XML_ATTR_ID, entry->id); crm_xml_add(xml_rsc, XML_ATTR_TYPE, entry->rsc.type); crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, entry->rsc.class); crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, entry->rsc.provider); build_operation_update(xml_rsc, &(entry->rsc), entry->last, __FUNCTION__); build_operation_update(xml_rsc, &(entry->rsc), entry->failed, __FUNCTION__); for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIter->next) { build_operation_update(xml_rsc, &(entry->rsc), gIter->data, __FUNCTION__); } } return FALSE; } xmlNode * do_lrm_query(gboolean is_replace) { xmlNode *xml_result = NULL; xmlNode *xml_state = NULL; xmlNode *xml_data = NULL; xmlNode *rsc_list = NULL; crm_node_t *peer = crm_get_peer(0, fsa_our_uname); xml_state = do_update_node_cib(peer, node_update_cluster|node_update_peer, NULL, __FUNCTION__); /* The next two lines shouldn't be necessary for newer DCs */ crm_xml_add(xml_state, XML_NODE_JOIN_STATE, CRMD_JOINSTATE_MEMBER); crm_xml_add(xml_state, XML_NODE_EXPECTED, CRMD_JOINSTATE_MEMBER); xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM); crm_xml_add(xml_data, XML_ATTR_ID, fsa_our_uuid); rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES); /* Build a list of active (not always running) resources */ build_active_RAs(rsc_list); xml_result = create_cib_fragment(xml_state, XML_CIB_TAG_STATUS); crm_log_xml_trace(xml_state, "Current state of the LRM"); free_xml(xml_state); return xml_result; } static void notify_deleted(ha_msg_input_t * input, const char *rsc_id, int rc) { lrmd_event_data_t *op = NULL; const char *from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM); const char *from_host = crm_element_value(input->msg, F_CRM_HOST_FROM); crm_info("Notifying %s on %s that %s was%s deleted", from_sys, from_host, rsc_id, rc == pcmk_ok ? "" : " not"); op = construct_op(input->xml, rsc_id, CRMD_ACTION_DELETE); CRM_ASSERT(op != NULL); if (rc == pcmk_ok) { op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_EXECRA_OK; } else { op->op_status = PCMK_LRM_OP_ERROR; op->rc = PCMK_EXECRA_UNKNOWN_ERROR; } send_direct_ack(from_host, from_sys, NULL, op, rsc_id); lrmd_free_event(op); if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) { /* this isn't expected - trigger a new transition */ time_t now = time(NULL); char *now_s = crm_itoa(now); crm_debug("Triggering a refresh after %s deleted %s from the LRM", from_sys, rsc_id); update_attr_delegate( fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, "last-lrm-refresh", now_s, FALSE, NULL); free(now_s); } } static gboolean lrm_remove_deleted_rsc(gpointer key, gpointer value, gpointer user_data) { struct delete_event_s *event = user_data; struct pending_deletion_op_s *op = value; if (safe_str_eq(event->rsc, op->rsc)) { notify_deleted(op->input, event->rsc, event->rc); return TRUE; } return FALSE; } static gboolean lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data) { const char *rsc = user_data; struct recurring_op_s *pending = value; if (safe_str_eq(rsc, pending->rsc_id)) { crm_info("Removing op %s:%d for deleted resource %s", pending->op_key, pending->call_id, rsc); return TRUE; } return FALSE; } /* * Remove the rsc from the CIB * * Avoids refreshing the entire LRM section of this host */ #define rsc_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']" static int delete_rsc_status(const char *rsc_id, int call_options, const char *user_name) { char *rsc_xpath = NULL; int max = 0; int rc = pcmk_ok; CRM_CHECK(rsc_id != NULL, return -ENXIO); max = strlen(rsc_template) + strlen(rsc_id) + strlen(fsa_our_uname) + 1; rsc_xpath = calloc(1, max); snprintf(rsc_xpath, max, rsc_template, fsa_our_uname, rsc_id); rc = cib_internal_op(fsa_cib_conn, CIB_OP_DELETE, NULL, rsc_xpath, NULL, NULL, call_options | cib_xpath, user_name); free(rsc_xpath); return rc; } static void delete_rsc_entry(ha_msg_input_t * input, const char *rsc_id, GHashTableIter *rsc_gIter, int rc, const char *user_name) { struct delete_event_s event; CRM_CHECK(rsc_id != NULL, return); if (rc == pcmk_ok) { char *rsc_id_copy = strdup(rsc_id); if (rsc_gIter) g_hash_table_iter_remove(rsc_gIter); else g_hash_table_remove(resource_history, rsc_id_copy); crm_debug("sync: Sending delete op for %s", rsc_id_copy); delete_rsc_status(rsc_id_copy, cib_quorum_override, user_name); g_hash_table_foreach_remove(pending_ops, lrm_remove_deleted_op, rsc_id_copy); free(rsc_id_copy); } if (input) { notify_deleted(input, rsc_id, rc); } event.rc = rc; event.rsc = rsc_id; g_hash_table_foreach_remove(deletion_ops, lrm_remove_deleted_rsc, &event); } /* * Remove the op from the CIB * * Avoids refreshing the entire LRM section of this host */ #define op_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']/"XML_LRM_TAG_RSC_OP"[@id='%s']" #define op_call_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']/"XML_LRM_TAG_RSC_OP"[@id='%s' and @"XML_LRM_ATTR_CALLID"='%d']" static void delete_op_entry(lrmd_event_data_t * op, const char *rsc_id, const char *key, int call_id) { xmlNode *xml_top = NULL; if (op != NULL) { xml_top = create_xml_node(NULL, XML_LRM_TAG_RSC_OP); crm_xml_add_int(xml_top, XML_LRM_ATTR_CALLID, op->call_id); crm_xml_add(xml_top, XML_ATTR_TRANSITION_KEY, op->user_data); if(op->interval > 0) { char *op_id = generate_op_key(op->rsc_id, op->op_type, op->interval); /* Avoid deleting last_failure too (if it was a result of this recurring op failing) */ crm_xml_add(xml_top, XML_ATTR_ID, op_id); free(op_id); } crm_debug("async: Sending delete op for %s_%s_%d (call=%d)", op->rsc_id, op->op_type, op->interval, op->call_id); fsa_cib_conn->cmds->delete(fsa_cib_conn, XML_CIB_TAG_STATUS, xml_top, cib_quorum_override); } else if (rsc_id != NULL && key != NULL) { int max = 0; char *op_xpath = NULL; if (call_id > 0) { max = strlen(op_call_template) + strlen(rsc_id) + strlen(fsa_our_uname) + strlen(key) + 10; op_xpath = calloc(1, max); snprintf(op_xpath, max, op_call_template, fsa_our_uname, rsc_id, key, call_id); } else { max = strlen(op_template) + strlen(rsc_id) + strlen(fsa_our_uname) + strlen(key) + 1; op_xpath = calloc(1, max); snprintf(op_xpath, max, op_template, fsa_our_uname, rsc_id, key); } crm_debug("sync: Sending delete op for %s (call=%d)", rsc_id, call_id); fsa_cib_conn->cmds->delete(fsa_cib_conn, op_xpath, NULL, cib_quorum_override | cib_xpath); free(op_xpath); } else { crm_err("Not enough information to delete op entry: rsc=%p key=%p", rsc_id, key); return; } crm_log_xml_trace(xml_top, "op:cancel"); free_xml(xml_top); } void lrm_clear_last_failure(const char *rsc_id) { char *attr = NULL; GHashTableIter iter; rsc_history_t *entry = NULL; attr = generate_op_key(rsc_id, "last_failure", 0); delete_op_entry(NULL, rsc_id, attr, 0); free(attr); if (!resource_history) { return; } g_hash_table_iter_init(&iter, resource_history); while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) { if (safe_str_eq(rsc_id, entry->id)) { lrmd_free_event(entry->failed); entry->failed = NULL; } } } static gboolean cancel_op(const char *rsc_id, const char *key, int op, gboolean remove) { int rc = pcmk_ok; struct recurring_op_s *pending = NULL; CRM_CHECK(op != 0, return FALSE); CRM_CHECK(rsc_id != NULL, return FALSE); if (key == NULL) { key = make_stop_id(rsc_id, op); } pending = g_hash_table_lookup(pending_ops, key); if (pending) { if (remove && pending->remove == FALSE) { pending->remove = TRUE; crm_debug("Scheduling %s for removal", key); } if (pending->cancelled) { crm_debug("Operation %s already cancelled", key); return TRUE; } pending->cancelled = TRUE; } else { crm_info("No pending op found for %s", key); return TRUE; } crm_debug("Cancelling op %d for %s (%s)", op, rsc_id, key); rc = fsa_lrm_conn->cmds->cancel(fsa_lrm_conn, pending->rsc_id, pending->op_type, pending->interval); if (rc == pcmk_ok) { crm_debug("Op %d for %s (%s): cancelled", op, rsc_id, key); } else { crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc_id, key); /* The caller needs to make sure the entry is * removed from the pending_ops list * * Usually by returning TRUE inside the worker function * supplied to g_hash_table_foreach_remove() * * Not removing the entry from pending_ops will block * the node from shutting down */ return FALSE; } return TRUE; } struct cancel_data { gboolean done; gboolean remove; const char *key; lrmd_rsc_info_t *rsc; }; static gboolean cancel_action_by_key(gpointer key, gpointer value, gpointer user_data) { struct cancel_data *data = user_data; struct recurring_op_s *op = (struct recurring_op_s *)value; if (safe_str_eq(op->op_key, data->key)) { data->done = TRUE; if (cancel_op(data->rsc->id, key, op->call_id, data->remove) == FALSE) { return TRUE; } } return FALSE; } static gboolean cancel_op_key(lrmd_rsc_info_t * rsc, const char *key, gboolean remove) { struct cancel_data data; CRM_CHECK(rsc != NULL, return FALSE); CRM_CHECK(key != NULL, return FALSE); data.key = key; data.rsc = rsc; data.done = FALSE; data.remove = remove; g_hash_table_foreach_remove(pending_ops, cancel_action_by_key, &data); return data.done; } static lrmd_rsc_info_t * get_lrm_resource(xmlNode * resource, xmlNode * op_msg, gboolean do_create) { lrmd_rsc_info_t *rsc = NULL; const char *id = ID(resource); const char *type = crm_element_value(resource, XML_ATTR_TYPE); const char *class = crm_element_value(resource, XML_AGENT_ATTR_CLASS); const char *provider = crm_element_value(resource, XML_AGENT_ATTR_PROVIDER); const char *long_id = crm_element_value(resource, XML_ATTR_ID_LONG); crm_trace("Retrieving %s from the LRM.", id); CRM_CHECK(id != NULL, return NULL); rsc = fsa_lrm_conn->cmds->get_rsc_info(fsa_lrm_conn, id, 0); if (!rsc && long_id) { rsc = fsa_lrm_conn->cmds->get_rsc_info(fsa_lrm_conn, long_id, 0); } if (!rsc && do_create) { CRM_CHECK(class != NULL, return NULL); CRM_CHECK(type != NULL, return NULL); crm_trace("Adding rsc %s before operation", id); fsa_lrm_conn->cmds->register_rsc(fsa_lrm_conn, id, class, provider, type, lrmd_opt_drop_recurring); rsc = fsa_lrm_conn->cmds->get_rsc_info(fsa_lrm_conn, id, 0); if (!rsc) { fsa_data_t *msg_data = NULL; crm_err("Could not add resource %s to LRM", id); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } } return rsc; } static void delete_resource(const char *id, lrmd_rsc_info_t * rsc, GHashTableIter *gIter, const char *sys, const char *host, const char *user, ha_msg_input_t * request) { int rc = pcmk_ok; crm_info("Removing resource %s for %s (%s) on %s", id, sys, user ? user : "internal", host); if (rsc) { rc = fsa_lrm_conn->cmds->unregister_rsc(fsa_lrm_conn, id, 0); } if (rc == pcmk_ok) { crm_trace("Resource '%s' deleted", id); } else if (rc == -EINPROGRESS) { crm_info("Deletion of resource '%s' pending", id); if (request) { struct pending_deletion_op_s *op = NULL; char *ref = crm_element_value_copy(request->msg, XML_ATTR_REFERENCE); op = calloc(1, sizeof(struct pending_deletion_op_s)); op->rsc = strdup(rsc->id); op->input = copy_ha_msg_input(request); g_hash_table_insert(deletion_ops, ref, op); } return; } else { crm_warn("Deletion of resource '%s' for %s (%s) on %s failed: %d", id, sys, user ? user : "internal", host, rc); } delete_rsc_entry(request, id, gIter, rc, user); } /* A_LRM_INVOKE */ void do_lrm_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { gboolean done = FALSE; gboolean create_rsc = TRUE; const char *crm_op = NULL; const char *from_sys = NULL; const char *from_host = NULL; const char *operation = NULL; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); const char *user_name = NULL; #if ENABLE_ACL user_name = crm_element_value(input->msg, F_CRM_USER); crm_trace("LRM command from user '%s'", user_name); #endif crm_op = crm_element_value(input->msg, F_CRM_TASK); from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM); if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) { from_host = crm_element_value(input->msg, F_CRM_HOST_FROM); } crm_trace("LRM command from: %s", from_sys); if (safe_str_eq(crm_op, CRM_OP_LRM_DELETE)) { operation = CRMD_ACTION_DELETE; } else if (safe_str_eq(operation, CRM_OP_LRM_REFRESH)) { crm_op = CRM_OP_LRM_REFRESH; } else if (safe_str_eq(crm_op, CRM_OP_LRM_FAIL)) { rsc_history_t *entry = NULL; lrmd_event_data_t *op = NULL; lrmd_rsc_info_t *rsc = NULL; xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE); CRM_CHECK(xml_rsc != NULL, return); /* The lrmd can not fail a resource, it does not understand the * concept of success or failure in relation to a resource, it simply * executes operations and reports the results. We determine what a failure is. * Becaues of this, if we want to fail a resource we have to fake what we * understand a failure to look like. * * To do this we create a fake lrmd operation event for the resource * we want to fail. We then pass that event to the lrmd client callback * so it will be processed as if it actually came from the lrmd. */ op = construct_op(input->xml, ID(xml_rsc), "asyncmon"); free((char *) op->user_data); op->user_data = NULL; entry = g_hash_table_lookup(resource_history, op->rsc_id); /* Make sure the call id is greater than the last successful operation, * otherwise the failure will not result in a possible recovery of the resource * as it could appear the failure occurred before the successful start */ if (entry && entry->last) { op->call_id = entry->last->call_id + 1; if (op->call_id < 0) { op->call_id = 1; } } op->interval = 0; op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_EXECRA_UNKNOWN_ERROR; CRM_ASSERT(op != NULL); # if ENABLE_ACL if (user_name && is_privileged(user_name) == FALSE) { crm_err("%s does not have permission to fail %s", user_name, ID(xml_rsc)); send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc)); lrmd_free_event(op); return; } # endif rsc = get_lrm_resource(xml_rsc, input->xml, create_rsc); if (rsc) { crm_info("Failing resource %s...", rsc->id); process_lrm_event(op); op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_EXECRA_OK; lrmd_free_rsc_info(rsc); } else { crm_info("Cannot find/create resource in order to fail it..."); crm_log_xml_warn(input->msg, "bad input"); } send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc)); lrmd_free_event(op); return; } else if (input->xml != NULL) { operation = crm_element_value(input->xml, XML_LRM_ATTR_TASK); } if (safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) { int rc = pcmk_ok; xmlNode *fragment = do_lrm_query(TRUE); fsa_cib_update(XML_CIB_TAG_STATUS, fragment, cib_quorum_override, rc, user_name); crm_info("Forced a local LRM refresh: call=%d", rc); free_xml(fragment); } else if (safe_str_eq(crm_op, CRM_OP_LRM_QUERY)) { xmlNode *data = do_lrm_query(FALSE); xmlNode *reply = create_reply(input->msg, data); if (relay_message(reply, TRUE) == FALSE) { crm_err("Unable to route reply"); crm_log_xml_err(reply, "reply"); } free_xml(reply); free_xml(data); } else if (safe_str_eq(operation, CRM_OP_PROBED)) { update_attrd(NULL, CRM_OP_PROBED, XML_BOOLEAN_TRUE, user_name); } else if (safe_str_eq(crm_op, CRM_OP_REPROBE)) { GHashTableIter gIter; rsc_history_t *entry = NULL; crm_notice("Forcing the status of all resources to be redetected"); g_hash_table_iter_init(&gIter, resource_history); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { delete_resource(entry->id, &entry->rsc, &gIter, from_sys, from_host, user_name, NULL); } /* Now delete the copy in the CIB */ erase_status_tag(fsa_our_uname, XML_CIB_TAG_LRM, cib_scope_local); /* And finally, _delete_ the value in attrd * Setting it to FALSE results in the PE sending us back here again */ update_attrd(NULL, CRM_OP_PROBED, NULL, user_name); } else if (operation != NULL) { lrmd_rsc_info_t *rsc = NULL; xmlNode *params = NULL; xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE); CRM_CHECK(xml_rsc != NULL, return); /* only the first 16 chars are used by the LRM */ params = find_xml_node(input->xml, XML_TAG_ATTRS, TRUE); if (safe_str_eq(operation, CRMD_ACTION_DELETE)) { create_rsc = FALSE; } rsc = get_lrm_resource(xml_rsc, input->xml, create_rsc); if (rsc == NULL && create_rsc) { crm_err("Invalid resource definition"); crm_log_xml_warn(input->msg, "bad input"); } else if (rsc == NULL) { lrmd_event_data_t *op = NULL; crm_notice("Not creating resource for a %s event: %s", operation, ID(input->xml)); delete_rsc_entry(input, ID(xml_rsc), NULL, pcmk_ok, user_name); op = construct_op(input->xml, ID(xml_rsc), operation); op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_EXECRA_OK; CRM_ASSERT(op != NULL); send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc)); lrmd_free_event(op); } else if (safe_str_eq(operation, CRMD_ACTION_CANCEL)) { lrmd_event_data_t *op = NULL; char *op_key = NULL; char *meta_key = NULL; int call = 0; const char *call_id = NULL; const char *op_task = NULL; const char *op_interval = NULL; CRM_CHECK(params != NULL, crm_log_xml_warn(input->xml, "Bad command"); return); meta_key = crm_meta_name(XML_LRM_ATTR_INTERVAL); op_interval = crm_element_value(params, meta_key); free(meta_key); meta_key = crm_meta_name(XML_LRM_ATTR_TASK); op_task = crm_element_value(params, meta_key); free(meta_key); meta_key = crm_meta_name(XML_LRM_ATTR_CALLID); call_id = crm_element_value(params, meta_key); free(meta_key); CRM_CHECK(op_task != NULL, crm_log_xml_warn(input->xml, "Bad command"); return); CRM_CHECK(op_interval != NULL, crm_log_xml_warn(input->xml, "Bad command"); return); op = construct_op(input->xml, rsc->id, op_task); CRM_ASSERT(op != NULL); op_key = generate_op_key(rsc->id, op_task, crm_parse_int(op_interval, "0")); crm_debug("PE requested op %s (call=%s) be cancelled", op_key, call_id ? call_id : "NA"); call = crm_parse_int(call_id, "0"); if (call == 0) { /* the normal case when the PE cancels a recurring op */ done = cancel_op_key(rsc, op_key, TRUE); } else { /* the normal case when the PE cancels an orphan op */ done = cancel_op(rsc->id, NULL, call, TRUE); } if (done == FALSE) { crm_debug("Nothing known about operation %d for %s", call, op_key); delete_op_entry(NULL, rsc->id, op_key, call); /* needed?? surely not otherwise the cancel_op_(_key) wouldn't * have failed in the first place */ g_hash_table_remove(pending_ops, op_key); } op->rc = PCMK_EXECRA_OK; op->op_status = PCMK_LRM_OP_DONE; send_direct_ack(from_host, from_sys, rsc, op, rsc->id); free(op_key); lrmd_free_event(op); } else if (safe_str_eq(operation, CRMD_ACTION_DELETE)) { int cib_rc = pcmk_ok; CRM_ASSERT(rsc != NULL); cib_rc = delete_rsc_status(rsc->id, cib_dryrun | cib_sync_call, user_name); if (cib_rc != pcmk_ok) { lrmd_event_data_t *op = NULL; crm_err ("Attempt of deleting resource status '%s' from CIB for %s (user=%s) on %s failed: (rc=%d) %s", rsc->id, from_sys, user_name ? user_name : "unknown", from_host, cib_rc, pcmk_strerror(cib_rc)); op = construct_op(input->xml, rsc->id, operation); op->op_status = PCMK_LRM_OP_ERROR; if (cib_rc == -EACCES) { op->rc = PCMK_EXECRA_INSUFFICIENT_PRIV; } else { op->rc = PCMK_EXECRA_UNKNOWN_ERROR; } send_direct_ack(from_host, from_sys, NULL, op, rsc->id); lrmd_free_event(op); return; } delete_resource(rsc->id, rsc, NULL, from_sys, from_host, user_name, input); } else if (rsc != NULL) { do_lrm_rsc_op(rsc, operation, input->xml, input->msg); } lrmd_free_rsc_info(rsc); } else { crm_err("Operation was neither a lrm_query, nor a rsc op. %s", crm_str(crm_op)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } static lrmd_event_data_t * construct_op(xmlNode * rsc_op, const char *rsc_id, const char *operation) { lrmd_event_data_t *op = NULL; const char *op_delay = NULL; const char *op_timeout = NULL; const char *op_interval = NULL; GHashTable *params = NULL; const char *transition = NULL; CRM_LOG_ASSERT(rsc_id != NULL); op = calloc(1, sizeof(lrmd_event_data_t)); op->type = lrmd_event_exec_complete; op->op_type = strdup(operation); op->op_status = PCMK_LRM_OP_PENDING; op->rc = -1; op->rsc_id = strdup(rsc_id); op->interval = 0; op->timeout = 0; op->start_delay = 0; if (rsc_op == NULL) { CRM_LOG_ASSERT(safe_str_eq(CRMD_ACTION_STOP, operation)); op->user_data = NULL; /* the stop_all_resources() case * by definition there is no DC (or they'd be shutting * us down). * So we should put our version here. */ op->params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); g_hash_table_insert(op->params, strdup(XML_ATTR_CRM_VERSION), strdup(CRM_FEATURE_SET)); crm_trace("Constructed %s op for %s", operation, rsc_id); return op; } params = xml2list(rsc_op); g_hash_table_remove(params, CRM_META "_op_target_rc"); op_delay = crm_meta_value(params, XML_OP_ATTR_START_DELAY); op_timeout = crm_meta_value(params, XML_ATTR_TIMEOUT); op_interval = crm_meta_value(params, XML_LRM_ATTR_INTERVAL); op->interval = crm_parse_int(op_interval, "0"); op->timeout = crm_parse_int(op_timeout, "0"); op->start_delay = crm_parse_int(op_delay, "0"); if (safe_str_neq(operation, RSC_STOP)) { op->params = params; } else { rsc_history_t *entry = g_hash_table_lookup(resource_history, rsc_id); /* If we do not have stop parameters cached, use * whatever we are given */ if (!entry || !entry->stop_params) { op->params = params; } else { /* Copy the cached parameter list so that we stop the resource * with the old attributes, not the new ones */ op->params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); g_hash_table_foreach(params, copy_meta_keys, op->params); g_hash_table_foreach(entry->stop_params, copy_instance_keys, op->params); g_hash_table_destroy(params); params = NULL; } } /* sanity */ if (op->interval < 0) { op->interval = 0; } if (op->timeout <= 0) { op->timeout = op->interval; } if (op->start_delay < 0) { op->start_delay = 0; } transition = crm_element_value(rsc_op, XML_ATTR_TRANSITION_KEY); CRM_CHECK(transition != NULL, return op); op->user_data = strdup(transition); if (op->interval != 0) { if (safe_str_eq(operation, CRMD_ACTION_START) || safe_str_eq(operation, CRMD_ACTION_STOP)) { crm_err("Start and Stop actions cannot have an interval: %d", op->interval); op->interval = 0; } } crm_trace("Constructed %s op for %s: interval=%d", operation, rsc_id, op->interval); return op; } void send_direct_ack(const char *to_host, const char *to_sys, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id) { xmlNode *reply = NULL; xmlNode *update, *iter; xmlNode *fragment; crm_node_t *peer = NULL; CRM_CHECK(op != NULL, return); if (op->rsc_id == NULL) { CRM_LOG_ASSERT(rsc_id != NULL); op->rsc_id = strdup(rsc_id); } if (to_sys == NULL) { to_sys = CRM_SYSTEM_TENGINE; } peer = crm_get_peer(0, fsa_our_uname); update = do_update_node_cib(peer, node_update_none, NULL, __FUNCTION__); iter = create_xml_node(update, XML_CIB_TAG_LRM); crm_xml_add(iter, XML_ATTR_ID, fsa_our_uuid); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); build_operation_update(iter, rsc, op, __FUNCTION__); fragment = create_cib_fragment(update, XML_CIB_TAG_STATUS); reply = create_request(CRM_OP_INVOKE_LRM, fragment, to_host, to_sys, CRM_SYSTEM_LRMD, NULL); crm_log_xml_trace(update, "ACK Update"); crm_debug("ACK'ing resource op %s_%s_%d from %s: %s", op->rsc_id, op->op_type, op->interval, op->user_data, crm_element_value(reply, XML_ATTR_REFERENCE)); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } free_xml(fragment); free_xml(update); free_xml(reply); } static gboolean stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data) { lrmd_rsc_info_t *rsc = user_data; struct recurring_op_s *op = (struct recurring_op_s *)value; if (op->interval != 0 && safe_str_eq(op->rsc_id, rsc->id)) { if (cancel_op(rsc->id, key, op->call_id, FALSE) == FALSE) { return TRUE; } } return FALSE; } static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; struct recurring_op_s *op = (struct recurring_op_s *)value; if (op->interval != 0) { remove = cancel_op(op->rsc_id, key, op->call_id, FALSE); } return remove; } void do_lrm_rsc_op(lrmd_rsc_info_t * rsc, const char *operation, xmlNode * msg, xmlNode * request) { int call_id = 0; char *op_id = NULL; lrmd_event_data_t *op = NULL; lrmd_key_value_t *params = NULL; fsa_data_t *msg_data = NULL; const char *transition = NULL; CRM_CHECK(rsc != NULL, return); if (msg != NULL) { transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY); if (transition == NULL) { crm_log_xml_err(msg, "Missing transition number"); } } op = construct_op(msg, rsc->id, operation); /* stop the monitor before stopping the resource */ if (crm_str_eq(operation, CRMD_ACTION_STOP, TRUE) || crm_str_eq(operation, CRMD_ACTION_DEMOTE, TRUE) || crm_str_eq(operation, CRMD_ACTION_PROMOTE, TRUE) || crm_str_eq(operation, CRMD_ACTION_MIGRATE, TRUE)) { g_hash_table_foreach_remove(pending_ops, stop_recurring_action_by_rsc, rsc); } /* now do the op */ crm_debug("Performing key=%s op=%s_%s_%d", transition, rsc->id, operation, op->interval); if (fsa_state != S_NOT_DC && fsa_state != S_POLICY_ENGINE && fsa_state != S_TRANSITION_ENGINE) { if (safe_str_neq(operation, "fail") && safe_str_neq(operation, CRMD_ACTION_STOP)) { crm_info("Discarding attempt to perform action %s on %s" " in state %s", operation, rsc->id, fsa_state2string(fsa_state)); op->rc = 99; op->op_status = PCMK_LRM_OP_ERROR; send_direct_ack(NULL, NULL, rsc, op, rsc->id); lrmd_free_event(op); free(op_id); return; } } op_id = generate_op_key(rsc->id, op->op_type, op->interval); if (op->interval > 0) { /* cancel it so we can then restart it without conflict */ cancel_op_key(rsc, op_id, FALSE); } if (op->params) { char *key = NULL; char *value = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, op->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { params = lrmd_key_value_add(params, key, value); } } call_id = fsa_lrm_conn->cmds->exec(fsa_lrm_conn, rsc->id, op->op_type, op->user_data, op->interval, op->timeout, op->start_delay, lrmd_opt_notify_changes_only, params); if (call_id <= 0) { crm_err("Operation %s on %s failed: %d", operation, rsc->id, call_id); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } else { /* record all operations so we can wait * for them to complete during shutdown */ char *call_id_s = make_stop_id(rsc->id, call_id); struct recurring_op_s *pending = NULL; pending = calloc(1, sizeof(struct recurring_op_s)); crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s); pending->call_id = call_id; pending->interval = op->interval; pending->op_type = strdup(operation); pending->op_key = strdup(op_id); pending->rsc_id = strdup(rsc->id); g_hash_table_replace(pending_ops, call_id_s, pending); if (op->interval > 0 && op->start_delay > START_DELAY_THRESHOLD) { char *uuid = NULL; int dummy = 0, target_rc = 0; crm_info("Faking confirmation of %s: execution postponed for over 5 minutes", op_id); decode_transition_key(op->user_data, &uuid, &dummy, &dummy, &target_rc); free(uuid); op->rc = target_rc; op->op_status = PCMK_LRM_OP_DONE; send_direct_ack(NULL, NULL, rsc, op, rsc->id); } } free(op_id); lrmd_free_event(op); return; } int last_resource_update = 0; static void cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { switch (rc) { case pcmk_ok: case -pcmk_err_diff_failed: case -pcmk_err_diff_resync: crm_trace("Resource update %d complete: rc=%d", call_id, rc); break; default: crm_warn("Resource update %d failed: (rc=%d) %s", call_id, rc, pcmk_strerror(rc)); } if(call_id == last_resource_update) { last_resource_update = 0; trigger_fsa(fsa_source); } } static int do_update_resource(lrmd_rsc_info_t * rsc, lrmd_event_data_t * op) { /* */ int rc = pcmk_ok; xmlNode *update, *iter = NULL; int call_opt = cib_quorum_override; CRM_CHECK(op != NULL, return 0); if (fsa_state == S_ELECTION || fsa_state == S_PENDING) { crm_info("Sending update to local CIB in state: %s", fsa_state2string(fsa_state)); call_opt |= cib_scope_local; } iter = create_xml_node(iter, XML_CIB_TAG_STATUS); update = iter; iter = create_xml_node(iter, XML_CIB_TAG_STATE); set_uuid(iter, XML_ATTR_UUID, fsa_our_uname); crm_xml_add(iter, XML_ATTR_UNAME, fsa_our_uname); crm_xml_add(iter, XML_ATTR_ORIGIN, __FUNCTION__); iter = create_xml_node(iter, XML_CIB_TAG_LRM); crm_xml_add(iter, XML_ATTR_ID, fsa_our_uuid); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); build_operation_update(iter, rsc, op, __FUNCTION__); if (rsc) { crm_xml_add(iter, XML_ATTR_TYPE, rsc->type); crm_xml_add(iter, XML_AGENT_ATTR_CLASS, rsc->class); crm_xml_add(iter, XML_AGENT_ATTR_PROVIDER, rsc->provider); CRM_CHECK(rsc->type != NULL, crm_err("Resource %s has no value for type", op->rsc_id)); CRM_CHECK(rsc->class != NULL, crm_err("Resource %s has no value for class", op->rsc_id)); } else { crm_warn("Resource %s no longer exists in the lrmd", op->rsc_id); goto cleanup; } /* make it an asyncronous call and be done with it * * Best case: * the resource state will be discovered during * the next signup or election. * * Bad case: * we are shutting down and there is no DC at the time, * but then why were we shutting down then anyway? * (probably because of an internal error) * * Worst case: * we get shot for having resources "running" when the really weren't * * the alternative however means blocking here for too long, which * isnt acceptable */ fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, rc, NULL); if(rc > 0) { last_resource_update = rc; } /* the return code is a call number, not an error code */ crm_trace("Sent resource state update message: %d for %s=%d on %s", rc, op->op_type, op->interval, op->rsc_id); fsa_cib_conn->cmds->register_callback(fsa_cib_conn, rc, 60, FALSE, NULL, "cib_rsc_callback", cib_rsc_callback); cleanup: free_xml(update); return rc; } void do_lrm_event(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data) { CRM_CHECK(FALSE, return); } gboolean process_lrm_event(lrmd_event_data_t * op) { char *op_id = NULL; char *op_key = NULL; int update_id = 0; int log_level = LOG_ERR; gboolean removed = FALSE; lrmd_rsc_info_t *rsc = NULL; struct recurring_op_s *pending = NULL; CRM_CHECK(op != NULL, return FALSE); if (op->type == lrmd_event_disconnect) { lrm_connection_destroy(); return TRUE; } else if (op->type != lrmd_event_exec_complete) { return TRUE; } CRM_CHECK(op->rsc_id != NULL, return FALSE); op_id = make_stop_id(op->rsc_id, op->call_id); pending = g_hash_table_lookup(pending_ops, op_id); op_key = generate_op_key(op->rsc_id, op->op_type, op->interval); rsc = fsa_lrm_conn->cmds->get_rsc_info(fsa_lrm_conn, op->rsc_id, 0); switch (op->op_status) { case PCMK_LRM_OP_ERROR: case PCMK_LRM_OP_PENDING: case PCMK_LRM_OP_NOTSUPPORTED: break; case PCMK_LRM_OP_CANCELLED: log_level = LOG_INFO; break; case PCMK_LRM_OP_DONE: log_level = LOG_NOTICE; break; case PCMK_LRM_OP_TIMEOUT: log_level = LOG_DEBUG_3; crm_err("LRM operation %s (%d) %s (timeout=%dms)", op_key, op->call_id, services_lrm_status_str(op->op_status), op->timeout); break; default: crm_err("Mapping unknown status (%d) to ERROR", op->op_status); op->op_status = PCMK_LRM_OP_ERROR; } if (op->op_status == PCMK_LRM_OP_ERROR && (op->rc == PCMK_EXECRA_RUNNING_MASTER || op->rc == PCMK_EXECRA_NOT_RUNNING)) { /* Leave it up to the TE/PE to decide if this is an error */ op->op_status = PCMK_LRM_OP_DONE; log_level = LOG_INFO; } if (op->op_status != PCMK_LRM_OP_CANCELLED) { if (safe_str_eq(op->op_type, RSC_NOTIFY)) { /* Keep notify ops out of the CIB */ send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); } else { update_id = do_update_resource(rsc, op); } } else if (op->interval == 0) { /* This will occur when "crm resource cleanup" is called while actions are in-flight */ crm_err("Op %s (call=%d): Cancelled", op_key, op->call_id); send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); } else if (pending == NULL) { /* Operations that are cancelled may safely be removed * from the pending op list before the lrmd completion event * is received. Only report non-cancelled ops here. */ if (op->op_status != PCMK_LRM_OP_CANCELLED) { crm_err("Op %s (call=%d): No 'pending' entry", op_key, op->call_id); } } else if (op->user_data == NULL) { crm_err("Op %s (call=%d): No user data", op_key, op->call_id); } else if (pending->remove) { delete_op_entry(op, op->rsc_id, op_key, op->call_id); } else { /* Before a stop is called, no need to direct ack */ crm_trace("Op %s (call=%d): no delete event required", op_key, op->call_id); } if ((op->interval == 0) && g_hash_table_remove(pending_ops, op_id)) { removed = TRUE; crm_trace("Op %s (call=%d, stop-id=%s): Confirmed", op_key, op->call_id, op_id); } if (op->op_status == PCMK_LRM_OP_DONE) { do_crm_log(log_level, "LRM operation %s (call=%d, rc=%d, cib-update=%d, confirmed=%s) %s", op_key, op->call_id, op->rc, update_id, removed ? "true" : "false", lrmd_event_rc2str(op->rc)); } else { do_crm_log(log_level, "LRM operation %s (call=%d, status=%d, cib-update=%d, confirmed=%s) %s", op_key, op->call_id, op->op_status, update_id, removed ? "true" : "false", services_lrm_status_str(op->op_status)); } - if (op->rc != 0 && op->output != NULL) { - crm_info("Result: %s", op->output); - } else if (op->output != NULL) { - crm_debug("Result: %s", op->output); + if (op->output) { + char *prefix = g_strdup_printf("%s_%s_%d:%d", op->rsc_id, op->op_type, op->interval, op->call_id); + + if (op->rc) { + crm_log_output(LOG_NOTICE, prefix, op->output); + } else { + crm_log_output(LOG_DEBUG, prefix, op->output); + } + g_free(prefix); } if (op->rsc_deleted) { crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key); delete_rsc_entry(NULL, op->rsc_id, NULL, pcmk_ok, NULL); } /* If a shutdown was escalated while operations were pending, * then the FSA will be stalled right now... allow it to continue */ mainloop_set_trigger(fsa_source); update_history_cache(rsc, op); lrmd_free_rsc_info(rsc); free(op_key); free(op_id); return TRUE; } diff --git a/include/crm/common/logging.h b/include/crm/common/logging.h index cf190064c0..b6f153f990 100644 --- a/include/crm/common/logging.h +++ b/include/crm/common/logging.h @@ -1,169 +1,171 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /** * \file * \brief Wrappers for and extensions to libqb logging * \ingroup core */ #ifndef CRM_LOGGING__H # define CRM_LOGGING__H # include # ifndef LOG_TRACE # define LOG_TRACE LOG_DEBUG+1 # endif # define LOG_DEBUG_2 LOG_TRACE # define LOG_DEBUG_3 LOG_TRACE # define LOG_DEBUG_4 LOG_TRACE # define LOG_DEBUG_5 LOG_TRACE # define LOG_DEBUG_6 LOG_TRACE extern unsigned int crm_log_level; extern gboolean crm_config_error; extern gboolean crm_config_warning; extern unsigned int crm_trace_nonlog; void crm_enable_blackbox(int nsig); void crm_enable_blackbox_tracing(int nsig); void crm_write_blackbox(int nsig, struct qb_log_callsite *callsite); void crm_update_callsites(void); void crm_log_deinit(void); gboolean crm_log_cli_init(const char *entity); gboolean crm_log_init(const char *entity, int level, gboolean daemon, gboolean to_stderr, int argc, char **argv, gboolean quiet); void crm_log_args(int argc, char **argv); +void crm_log_output_fn(const char *file, const char *function, int line, int level, const char *prefix, const char *output); +#define crm_log_output(level, prefix, output) crm_log_output_fn(__FILE__, __FUNCTION__, __LINE__, level, prefix, output) gboolean crm_add_logfile(const char *filename); void crm_bump_log_level(int argc, char **argv); void crm_enable_stderr(int enable); gboolean crm_is_callsite_active(struct qb_log_callsite *cs, int level, int tags); int log_data_element(int log_level, const char *file, const char *function, int line, const char *prefix, xmlNode * data, int depth, gboolean formatted); /* returns the old value */ unsigned int set_crm_log_level(unsigned int level); unsigned int get_crm_log_level(void); /* * Throughout the macros below, note the leading, pre-comma, space in the * various ' , ##args' occurences to aid portability across versions of 'gcc'. * http://gcc.gnu.org/onlinedocs/cpp/Variadic-Macros.html#Variadic-Macros */ # define CRM_TRACE_INIT_DATA(name) QB_LOG_INIT_DATA(name) # define do_crm_log(level, fmt, args...) qb_log_from_external_source( __func__, __FILE__, fmt, level, __LINE__, 0, ##args) /* level /MUST/ be a constant or compilation will fail */ # define do_crm_log_unlikely(level, fmt, args...) do { \ static struct qb_log_callsite *trace_cs = NULL; \ if(trace_cs == NULL) { \ trace_cs = qb_log_callsite_get(__func__, __FILE__, fmt, level, __LINE__, 0); \ } \ if (crm_is_callsite_active(trace_cs, level, 0)) { \ qb_log_from_external_source( \ __func__, __FILE__, fmt, level, __LINE__, 0, ##args); \ } \ } while(0) # define CRM_LOG_ASSERT(expr) do { \ if(__unlikely((expr) == FALSE)) { \ static struct qb_log_callsite *core_cs = NULL; \ if(core_cs == NULL) { \ core_cs = qb_log_callsite_get(__func__, __FILE__, "log-assert", LOG_TRACE, __LINE__, 0); \ } \ crm_abort(__FILE__, __PRETTY_FUNCTION__, __LINE__, #expr, \ core_cs?core_cs->targets:FALSE, TRUE); \ } \ } while(0) # define CRM_CHECK(expr, failure_action) do { \ if(__unlikely((expr) == FALSE)) { \ static struct qb_log_callsite *core_cs = NULL; \ if(core_cs == NULL) { \ core_cs = qb_log_callsite_get(__func__, __FILE__, "check-assert", LOG_TRACE, __LINE__, 0); \ } \ crm_abort(__FILE__, __PRETTY_FUNCTION__, __LINE__, #expr, \ core_cs?core_cs->targets:FALSE, TRUE); \ failure_action; \ } \ } while(0) # define do_crm_log_xml(level, text, xml) do { \ static struct qb_log_callsite *xml_cs = NULL; \ if(xml_cs == NULL) { \ xml_cs = qb_log_callsite_get(__func__, __FILE__, "xml-blog", level, __LINE__, 0); \ } \ if (crm_is_callsite_active(xml_cs, level, 0)) { \ log_data_element(level, __FILE__, __PRETTY_FUNCTION__, __LINE__, text, xml, 0, TRUE); \ } \ } while(0) # define do_crm_log_alias(level, file, function, line, fmt, args...) do { \ qb_log_from_external_source(function, file, fmt, level, line, 0, ##args); \ } while(0) # define do_crm_log_always(level, fmt, args...) qb_log(level, "%s: " fmt, __PRETTY_FUNCTION__ , ##args) # define crm_perror(level, fmt, args...) do { \ const char *err = strerror(errno); \ fprintf(stderr, fmt ": %s (%d)\n", ##args, err, errno); \ do_crm_log(level, fmt ": %s (%d)", ##args, err, errno); \ } while(0) # define crm_log_tag(level, tag, fmt, args...) do { \ static struct qb_log_callsite *trace_tag_cs = NULL; \ int converted_tag = g_quark_try_string(tag); \ if(trace_tag_cs == NULL) { \ trace_tag_cs = qb_log_callsite_get(__func__, __FILE__, fmt, level, __LINE__, converted_tag); \ } \ if (crm_is_callsite_active(trace_tag_cs, level, converted_tag)) { \ qb_log_from_external_source( __func__, __FILE__, fmt, level, __LINE__, converted_tag, ##args); \ } \ } while(0) # define crm_crit(fmt, args...) qb_logt(LOG_CRIT, 0, fmt , ##args) # define crm_err(fmt, args...) qb_logt(LOG_ERR, 0, fmt , ##args) # define crm_warn(fmt, args...) qb_logt(LOG_WARNING, 0, fmt , ##args) # define crm_notice(fmt, args...) qb_logt(LOG_NOTICE, 0, fmt , ##args) # define crm_info(fmt, args...) qb_logt(LOG_INFO, 0, fmt , ##args) # define crm_debug(fmt, args...) do_crm_log_unlikely(LOG_DEBUG, fmt , ##args) # define crm_trace(fmt, args...) do_crm_log_unlikely(LOG_TRACE, fmt , ##args) # define crm_log_xml_crit(xml, text) do_crm_log_xml(LOG_CRIT, text, xml) # define crm_log_xml_err(xml, text) do_crm_log_xml(LOG_ERR, text, xml) # define crm_log_xml_warn(xml, text) do_crm_log_xml(LOG_WARNING, text, xml) # define crm_log_xml_notice(xml, text) do_crm_log_xml(LOG_NOTICE, text, xml) # define crm_log_xml_info(xml, text) do_crm_log_xml(LOG_INFO, text, xml) # define crm_log_xml_debug(xml, text) do_crm_log_xml(LOG_DEBUG, text, xml) # define crm_log_xml_trace(xml, text) do_crm_log_xml(LOG_TRACE, text, xml) # define crm_str(x) (const char*)(x?x:"") #endif diff --git a/lib/common/logging.c b/lib/common/logging.c index 9906384c24..98b8bc778b 100644 --- a/lib/common/logging.c +++ b/lib/common/logging.c @@ -1,859 +1,879 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include unsigned int crm_log_level = LOG_INFO; static gboolean crm_tracing_enabled(void); unsigned int crm_trace_nonlog = 0; #ifdef HAVE_G_LOG_SET_DEFAULT_HANDLER GLogFunc glib_log_default; static void crm_glib_handler(const gchar * log_domain, GLogLevelFlags flags, const gchar * message, gpointer user_data) { int log_level = LOG_WARNING; GLogLevelFlags msg_level = (flags & G_LOG_LEVEL_MASK); switch (msg_level) { case G_LOG_LEVEL_CRITICAL: /* log and record how we got here */ crm_abort(__FILE__, __PRETTY_FUNCTION__, __LINE__, message, TRUE, TRUE); return; case G_LOG_LEVEL_ERROR: log_level = LOG_ERR; break; case G_LOG_LEVEL_MESSAGE: log_level = LOG_NOTICE; break; case G_LOG_LEVEL_INFO: log_level = LOG_INFO; break; case G_LOG_LEVEL_DEBUG: log_level = LOG_DEBUG; break; case G_LOG_LEVEL_WARNING: case G_LOG_FLAG_RECURSION: case G_LOG_FLAG_FATAL: case G_LOG_LEVEL_MASK: log_level = LOG_WARNING; break; } do_crm_log(log_level, "%s: %s", log_domain, message); } #endif #ifndef NAME_MAX # define NAME_MAX 256 #endif static void crm_trigger_blackbox(int nsig) { crm_write_blackbox(nsig, NULL); } const char * daemon_option(const char *option) { char env_name[NAME_MAX]; const char *value = NULL; snprintf(env_name, NAME_MAX, "PCMK_%s", option); value = getenv(env_name); if (value != NULL) { crm_trace("Found %s = %s", env_name, value); return value; } snprintf(env_name, NAME_MAX, "HA_%s", option); value = getenv(env_name); if (value != NULL) { crm_trace("Found %s = %s", env_name, value); return value; } crm_trace("Nothing found for %s", option); return NULL; } void set_daemon_option(const char *option, const char *value) { char env_name[NAME_MAX]; snprintf(env_name, NAME_MAX, "PCMK_%s", option); if(value) { crm_trace("Setting %s to %s", env_name, value); setenv(env_name, value, 1); } else { crm_trace("Unsetting %s", env_name); unsetenv(env_name); } snprintf(env_name, NAME_MAX, "HA_%s", option); if(value) { crm_trace("Setting %s to %s", env_name, value); setenv(env_name, value, 1); } else { crm_trace("Unsetting %s", env_name); unsetenv(env_name); } } gboolean daemon_option_enabled(const char *daemon, const char *option) { const char *value = daemon_option(option); if (value != NULL && crm_is_true(value)) { return TRUE; } else if (value != NULL && strstr(value, daemon)) { return TRUE; } return FALSE; } void crm_log_deinit(void) { #ifdef HAVE_G_LOG_SET_DEFAULT_HANDLER g_log_set_default_handler(glib_log_default, NULL); #endif } #define FMT_MAX 256 static void set_format_string(int method, const char *daemon) { int offset = 0; char fmt[FMT_MAX]; if (method > QB_LOG_STDERR) { /* When logging to a file */ struct utsname res; if (uname(&res) == 0) { offset += snprintf(fmt + offset, FMT_MAX - offset, "%%t [%d] %s %10s: ", getpid(), res.nodename, daemon); } else { offset += snprintf(fmt + offset, FMT_MAX - offset, "%%t [%d] %10s: ", getpid(), daemon); } } if (crm_tracing_enabled() && method >= QB_LOG_STDERR) { offset += snprintf(fmt + offset, FMT_MAX - offset, "(%%-12f:%%5l %%g) %%-7p: %%n: "); } else { offset += snprintf(fmt + offset, FMT_MAX - offset, "%%g %%-7p: %%n: "); } if (method == QB_LOG_SYSLOG) { offset += snprintf(fmt + offset, FMT_MAX - offset, "%%b"); } else { offset += snprintf(fmt + offset, FMT_MAX - offset, "\t%%b"); } qb_log_format_set(method, fmt); } gboolean crm_add_logfile(const char *filename) { struct stat parent; int fd = 0, rc = 0; FILE *logfile = NULL; char *parent_dir = NULL; static gboolean have_logfile = FALSE; if(filename == NULL && have_logfile == FALSE) { filename = "/var/log/pacemaker.log"; } if (filename == NULL) { return FALSE; /* Nothing to do */ } /* Check the parent directory and attempt to open */ parent_dir = dirname(strdup(filename)); rc = stat(parent_dir, &parent); if (rc != 0) { crm_err("Directory '%s' does not exist: logging to '%s' is disabled", parent_dir, filename); return FALSE; } else if (parent.st_uid == geteuid() && (parent.st_mode & (S_IRUSR | S_IWUSR))) { /* all good - user */ logfile = fopen(filename, "a"); } else if (parent.st_gid == getegid() && (parent.st_mode & S_IXGRP)) { /* all good - group */ logfile = fopen(filename, "a"); } else { crm_err("We (uid=%u, gid=%u) do not have permission to access '%s': logging to '%s' is disabled", geteuid(), getegid(), parent_dir, filename); return FALSE; } /* Check/Set permissions if we're root */ if(logfile && geteuid() == 0) { struct stat st; uid_t pcmk_uid = 0; gid_t pcmk_gid = 0; gboolean fix = FALSE; int logfd = fileno(logfile); rc = fstat(logfd, &st); if(rc < 0) { crm_perror(LOG_WARNING, "Cannot stat %s", filename); fclose(logfile); return FALSE; } crm_user_lookup(CRM_DAEMON_USER, &pcmk_uid, &pcmk_gid); if(st.st_gid != pcmk_gid) { /* Wrong group */ fix = TRUE; } else if((st.st_mode & S_IRWXG) != (S_IRGRP|S_IWGRP)) { /* Not read/writable by the correct group */ fix = TRUE; } if(fix) { rc = fchown(logfd, pcmk_uid, pcmk_gid); if(rc < 0) { crm_warn("Cannot change the ownership of %s to user %s and gid %d", filename, CRM_DAEMON_USER, pcmk_gid); } rc = fchmod(logfd, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); if(rc < 0) { crm_warn("Cannot change the mode of %s to rw-rw----", filename); } fprintf(logfile, "Set r/w permissions for uid=%d, gid=%d on %s\n", pcmk_uid, pcmk_gid, filename); if(fflush(logfile) < 0 || fsync(logfd) < 0) { crm_err("Couldn't write out logfile: %s", filename); } } } if(logfile) { fclose(logfile); } /* Now open with libqb */ fd = qb_log_file_open(filename); if(fd < 0) { crm_perror(LOG_WARNING, "Couldn't send additional logging to %s", filename); return FALSE; } crm_notice("Additional logging available in %s", filename); qb_log_ctl(fd, QB_LOG_CONF_ENABLED, QB_TRUE); /* Enable callsites */ crm_update_callsites(); have_logfile = TRUE; return TRUE; } static int blackbox_trigger = 0; static char *blackbox_file_prefix = NULL; static void blackbox_logger(int32_t t, struct qb_log_callsite *cs, time_t timestamp, const char *msg) { crm_write_blackbox(0, cs); } void crm_enable_blackbox(int nsig) { if(blackbox_file_prefix == NULL) { pid_t pid = getpid(); blackbox_file_prefix = malloc(NAME_MAX); snprintf(blackbox_file_prefix, NAME_MAX, "%s/%s-%d", CRM_BLACKBOX_DIR, crm_system_name, pid); } if (qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_STATE_GET, 0) != QB_LOG_STATE_ENABLED) { qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_SIZE, 5*1024*1024); /* Any size change drops existing entries */ qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_TRUE); /* Setting the size seems to disable it */ crm_notice("Initiated blackbox recorder: %s", blackbox_file_prefix); crm_signal(SIGSEGV, crm_trigger_blackbox); crm_update_callsites(); /* Original meanings from signal(7) * * Signal Value Action Comment * SIGTRAP 5 Core Trace/breakpoint trap * * Our usage is as similar as possible */ mainloop_add_signal(SIGTRAP, crm_trigger_blackbox); blackbox_trigger = qb_log_custom_open(blackbox_logger, NULL, NULL, NULL); qb_log_ctl(blackbox_trigger, QB_LOG_CONF_ENABLED, QB_TRUE); crm_info("Trigger: %d is %d %d", blackbox_trigger, qb_log_ctl(blackbox_trigger, QB_LOG_CONF_STATE_GET, 0), QB_LOG_STATE_ENABLED); crm_update_callsites(); } } void crm_write_blackbox(int nsig, struct qb_log_callsite *cs) { static int counter = 1; static time_t last = 0; char buffer[NAME_MAX]; time_t now = time(NULL); if(blackbox_file_prefix == NULL) { return; } switch(nsig) { case 0: case SIGTRAP: /* The graceful case - such as assertion failure or user request */ snprintf(buffer, NAME_MAX, "%s.%d", blackbox_file_prefix, counter++); if(nsig == 0 && (now - last) < 2) { /* Prevent over-dumping */ return; } else if(nsig == SIGTRAP) { crm_notice("Blackbox dump requested, please see %s for contents", buffer); } else if(cs) { syslog(LOG_NOTICE, "Problem detected at %s:%d (%s), please see %s for additional details", cs->function, cs->lineno, cs->filename, buffer); } else { crm_notice("Problem detected, please see %s for additional details", buffer); } last = now; qb_log_blackbox_write_to_file(buffer); /* Flush the existing contents * A size change would also work */ qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_FALSE); qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_TRUE); break; default: /* Do as little as possible, just try to get what we have out * We logged the filename when the blackbox was enabled */ crm_signal(nsig, SIG_DFL); qb_log_blackbox_write_to_file(blackbox_file_prefix); qb_log_fini(); raise(nsig); break; } } gboolean crm_log_cli_init(const char *entity) { return crm_log_init(entity, LOG_ERR, FALSE, FALSE, 0, NULL, TRUE); } static const char *crm_quark_to_string(uint32_t tag) { const char *text = g_quark_to_string(tag); if(text) { return text; } return ""; } static void crm_log_filter_source(int source, const char *trace_files, const char *trace_fns, const char *trace_fmts, const char *trace_tags, const char *trace_blackbox, struct qb_log_callsite *cs) { if (qb_log_ctl(source, QB_LOG_CONF_STATE_GET, 0) != QB_LOG_STATE_ENABLED) { return; } else if(cs->tags != crm_trace_nonlog && source == QB_LOG_BLACKBOX) { /* Blackbox gets everything if enabled */ qb_bit_set(cs->targets, source); } else if(source == blackbox_trigger && blackbox_trigger > 0) { /* Should this log message result in the blackbox being dumped */ if(cs->priority <= LOG_ERR) { qb_bit_set(cs->targets, source); } else if(trace_blackbox) { char *key = g_strdup_printf("%s:%d", cs->function, cs->lineno); if(strstr(trace_blackbox, key) != NULL) { qb_bit_set(cs->targets, source); } free(key); } } else if (source == QB_LOG_SYSLOG) { /* No tracing to syslog */ if(cs->priority <= LOG_NOTICE && cs->priority <= crm_log_level) { qb_bit_set(cs->targets, source); } /* Log file tracing options... */ } else if (cs->priority <= crm_log_level) { qb_bit_set(cs->targets, source); } else if(trace_files && strstr(trace_files, cs->filename) != NULL) { qb_bit_set(cs->targets, source); } else if(trace_fns && strstr(trace_fns, cs->function) != NULL) { qb_bit_set(cs->targets, source); } else if(trace_fmts && strstr(trace_fmts, cs->format) != NULL) { qb_bit_set(cs->targets, source); } else if(trace_tags && cs->tags != 0 && cs->tags != crm_trace_nonlog && g_quark_to_string(cs->tags) != NULL) { qb_bit_set(cs->targets, source); } } static void crm_log_filter(struct qb_log_callsite *cs) { int lpc = 0; static int need_init = 1; static const char *trace_fns = NULL; static const char *trace_tags = NULL; static const char *trace_fmts = NULL; static const char *trace_files = NULL; static const char *trace_blackbox = NULL; if(need_init) { need_init = 0; trace_fns = getenv("PCMK_trace_functions"); trace_fmts = getenv("PCMK_trace_formats"); trace_tags = getenv("PCMK_trace_tags"); trace_files = getenv("PCMK_trace_files"); trace_blackbox = getenv("PCMK_trace_blackbox"); if (trace_tags != NULL) { uint32_t tag; char token[500]; const char *offset = NULL; const char *next = trace_tags; do { offset = next; next = strchrnul(offset, ','); snprintf(token, 499, "%.*s", (int)(next - offset), offset); tag = g_quark_from_string(token); crm_info("Created GQuark %u from token '%s' in '%s'", tag, token, trace_tags); if (next[0] != 0) { next++; } } while (next != NULL && next[0] != 0); } } cs->targets = 0; /* Reset then find targets to enable */ for (lpc = QB_LOG_SYSLOG; lpc < QB_LOG_TARGET_MAX; lpc++) { crm_log_filter_source(lpc, trace_files, trace_fns, trace_fmts, trace_tags, trace_blackbox, cs); } } gboolean crm_is_callsite_active(struct qb_log_callsite *cs, int level, int tags) { gboolean refilter = FALSE; if (cs == NULL) { return FALSE; } if (cs->priority != level) { cs->priority = level; refilter = TRUE; } if (cs->tags != tags) { cs->tags = tags; refilter = TRUE; } if (refilter) { crm_log_filter(cs); } if (cs->targets == 0) { return FALSE; } return TRUE; } void crm_update_callsites(void) { static gboolean log = TRUE; if(log) { log = FALSE; crm_debug("Enabling callsites based on priority=%d, files=%s, functions=%s, formats=%s, tags=%s", crm_log_level, getenv("PCMK_trace_files"), getenv("PCMK_trace_functions"), getenv("PCMK_trace_formats"), getenv("PCMK_trace_tags")); } qb_log_filter_fn_set(crm_log_filter); } static gboolean crm_tracing_enabled(void) { if(crm_log_level >= LOG_TRACE) { return TRUE; } else if(getenv("PCMK_trace_files") || getenv("PCMK_trace_functions") || getenv("PCMK_trace_formats") || getenv("PCMK_trace_tags")) { return TRUE; } return FALSE; } gboolean crm_log_init(const char *entity, int level, gboolean daemon, gboolean to_stderr, int argc, char **argv, gboolean quiet) { int lpc = 0; const char *logfile = daemon_option("debugfile"); const char *facility = daemon_option("logfacility"); const char *f_copy = facility; if(crm_trace_nonlog == 0) { crm_trace_nonlog = g_quark_from_static_string("Pacemaker non-logging tracepoint"); } umask(S_IWGRP | S_IWOTH | S_IROTH); /* Redirect messages from glib functions to our handler */ #ifdef HAVE_G_LOG_SET_DEFAULT_HANDLER glib_log_default = g_log_set_default_handler(crm_glib_handler, NULL); #endif /* and for good measure... - this enum is a bit field (!) */ g_log_set_always_fatal((GLogLevelFlags) 0); /*value out of range */ if (facility == NULL) { facility = "daemon"; } else if(safe_str_eq(facility, "none")) { facility = "daemon"; quiet = TRUE; } if (entity) { crm_system_name = entity; } else if (argc > 0 && argv != NULL) { char *mutable = strdup(argv[0]); crm_system_name = basename(mutable); if (strstr(crm_system_name, "lt-") == crm_system_name) { crm_system_name += 3; } } else if (crm_system_name == NULL) { crm_system_name = "Unknown"; } setenv("PCMK_service", crm_system_name, 1); if (daemon_option_enabled(crm_system_name, "debug")) { /* Override the default setting */ level = LOG_DEBUG; } if (daemon_option_enabled(crm_system_name, "stderr")) { /* Override the default setting */ to_stderr = TRUE; } crm_log_level = level; qb_log_init(crm_system_name, qb_log_facility2int(facility), level); qb_log_tags_stringify_fn_set(crm_quark_to_string); /* Set default format strings */ for (lpc = QB_LOG_SYSLOG; lpc < QB_LOG_TARGET_MAX; lpc++) { set_format_string(lpc, crm_system_name); } crm_enable_stderr(to_stderr); if(logfile) { crm_add_logfile(logfile); } if (daemon_option_enabled(crm_system_name, "blackbox")) { crm_enable_blackbox(0); } crm_trace("Quiet: %d, facility %s", quiet, f_copy); daemon_option("debugfile"); daemon_option("logfacility"); if (quiet) { /* Nuke any syslog activity */ facility = NULL; qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, QB_FALSE); } if(daemon) { set_daemon_option("logfacility", facility); } if(daemon && crm_tracing_enabled() && qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_STATE_GET, 0) != QB_LOG_STATE_ENABLED && qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_STATE_GET, 0) != QB_LOG_STATE_ENABLED) { /* Make sure tracing goes somewhere */ crm_add_logfile(NULL); } crm_update_callsites(); /* Ok, now we can start logging... */ if (quiet == FALSE && daemon == FALSE) { crm_log_args(argc, argv); } if (daemon) { const char *user = getenv("USER"); if (user != NULL && safe_str_neq(user, "root") && safe_str_neq(user, CRM_DAEMON_USER)) { crm_trace("Not switching to corefile directory for %s", user); daemon = FALSE; } } if (daemon) { int user = getuid(); const char *base = CRM_CORE_DIR; struct passwd *pwent = getpwuid(user); if (pwent == NULL) { crm_perror(LOG_ERR, "Cannot get name for uid: %d", user); } else if (safe_str_neq(pwent->pw_name, "root") && safe_str_neq(pwent->pw_name, CRM_DAEMON_USER)) { crm_trace("Don't change active directory for regular user: %s", pwent->pw_name); } else if (chdir(base) < 0) { crm_perror(LOG_INFO, "Cannot change active directory to %s", base); } else if (chdir(pwent->pw_name) < 0) { crm_perror(LOG_INFO, "Cannot change active directory to %s/%s", base, pwent->pw_name); } else { crm_info("Changed active directory to %s/%s", base, pwent->pw_name); #if 0 { char path[512]; snprintf(path, 512, "%s-%d", crm_system_name, getpid()); mkdir(path, 0750); chdir(path); crm_info("Changed active directory to %s/%s/%s", base, pwent->pw_name, path); } #endif } mainloop_add_signal(SIGUSR1, crm_enable_blackbox); } return TRUE; } /* returns the old value */ unsigned int set_crm_log_level(unsigned int level) { unsigned int old = crm_log_level; crm_log_level = level; crm_update_callsites(); crm_trace("New log level: %d", level); return old; } void crm_enable_stderr(int enable) { if (enable && qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_STATE_GET, 0) != QB_LOG_STATE_ENABLED) { qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_ENABLED, QB_TRUE); crm_update_callsites(); } else if (enable == FALSE) { qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_ENABLED, QB_FALSE); } } void crm_bump_log_level(int argc, char **argv) { static int args = TRUE; int level = crm_log_level; if(args && argc > 1) { crm_log_args(argc, argv); } if (qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_STATE_GET, 0) == QB_LOG_STATE_ENABLED) { set_crm_log_level(level + 1); } /* Enable after potentially logging the argstring, not before */ crm_enable_stderr(TRUE); } unsigned int get_crm_log_level(void) { return crm_log_level; } #define ARGS_FMT "Invoked: %s" void crm_log_args(int argc, char **argv) { int lpc = 0; int len = 0; int restore = FALSE; int existing_len = 0; int line = __LINE__; static int logged = 0; char *arg_string = NULL; struct qb_log_callsite *args_cs = qb_log_callsite_get(__func__, __FILE__, ARGS_FMT, LOG_NOTICE, line, 0); if (argc == 0 || argv == NULL || logged) { return; } logged = 1; qb_bit_set(args_cs->targets, QB_LOG_SYSLOG); /* Turn on syslog too */ restore = qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_STATE_GET, 0); qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, QB_TRUE); for (; lpc < argc; lpc++) { if (argv[lpc] == NULL) { break; } len = 2 + strlen(argv[lpc]); /* +1 space, +1 EOS */ arg_string = realloc(arg_string, len + existing_len); existing_len += sprintf(arg_string + existing_len, "%s ", argv[lpc]); } qb_log_from_external_source(__func__, __FILE__, ARGS_FMT, LOG_NOTICE, line, 0, arg_string); qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, restore); free(arg_string); } const char * pcmk_strerror(int rc) { int error = rc; if(rc < 0) { error = 0 - rc; } if(error == 0) { return "OK"; } else if(error < PCMK_ERROR_OFFSET) { return strerror(error); } switch(error) { case pcmk_err_generic: return "Generic Pacemaker error"; case pcmk_err_no_quorum: return "Operation requires quorum"; case pcmk_err_dtd_validation: return "Update does not conform to the configured schema"; case pcmk_err_transform_failed: return "Schema transform failed"; case pcmk_err_old_data: return "Update was older than existing configuration"; case pcmk_err_diff_failed: return "Application of an update diff failed"; case pcmk_err_diff_resync: return "Application of an update diff failed, requesting a full refresh"; /* The following cases will only be hit on systems for which they are non-standard */ /* coverity[dead_error_condition] False positive on non-Linux */ case ENOTUNIQ: return "Name not unique on network"; /* coverity[dead_error_condition] False positive on non-Linux */ case ECOMM: return "Communication error on send"; /* coverity[dead_error_condition] False positive on non-Linux */ case ELIBACC: return "Can not access a needed shared library"; /* coverity[dead_error_condition] False positive on non-Linux */ case EREMOTEIO: return "Remote I/O error"; /* coverity[dead_error_condition] False positive on non-Linux */ case EUNATCH: return "Protocol driver not attached"; /* coverity[dead_error_condition] False positive on non-Linux */ case ENOKEY: return "Required key not available"; } crm_err("Unknown error code: %d", rc); return "Unknown error"; } + + +void crm_log_output_fn(const char *file, const char *function, int line, int level, const char *prefix, const char *output) +{ + const char *next = NULL; + const char *offset = NULL; + + if(output) { + next = output; + do { + offset = next; + next = strchrnul(offset, '\n'); + do_crm_log_alias(level, file, function, line, "%s [ %.*s ]", (int) (next - offset), offset); + if (next[0] != 0) { + next++; + } + + } while (next != NULL && next[0] != 0); + } +} diff --git a/lib/services/services_linux.c b/lib/services/services_linux.c index af3172ad56..932584e7ff 100644 --- a/lib/services/services_linux.c +++ b/lib/services/services_linux.c @@ -1,556 +1,531 @@ /* * Copyright (C) 2010 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include "crm/crm.h" #include "crm/common/mainloop.h" #include "crm/services.h" #include "services_private.h" static inline void set_fd_opts(int fd, int opts) { int flag; if ((flag = fcntl(fd, F_GETFL)) >= 0) { if (fcntl(fd, F_SETFL, flag | opts) < 0) { crm_err( "fcntl() write failed"); } } else { crm_err( "fcntl() read failed"); } } static gboolean read_output(int fd, svc_action_t *op) { char *data = NULL; int rc = 0, len = 0; gboolean is_err = FALSE; char buf[500]; static const size_t buf_read_len = sizeof(buf) - 1; crm_trace("%p", op); if (fd < 0) { return FALSE; } if (fd == op->opaque->stderr_fd) { is_err = TRUE; if (op->stderr_data) { len = strlen(op->stderr_data); data = op->stderr_data; } } else if (op->stdout_data) { len = strlen(op->stdout_data); data = op->stdout_data; } do { rc = read(fd, buf, buf_read_len); if (rc > 0) { buf[rc] = 0; data = realloc(data, len + rc + 1); sprintf(data + len, "%s", buf); len += rc; } else if (errno != EINTR) { /* error or EOF * Cleanup happens in pipe_done() */ rc = FALSE; break; } } while (rc == buf_read_len || rc < 0); if (data != NULL && is_err) { op->stderr_data = data; } else if (data != NULL) { op->stdout_data = data; } return rc; } static int dispatch_stdout(gpointer userdata) { svc_action_t* op = (svc_action_t *) userdata; return read_output(op->opaque->stdout_fd, op); } static int dispatch_stderr(gpointer userdata) { svc_action_t* op = (svc_action_t *) userdata; return read_output(op->opaque->stderr_fd, op); } static void pipe_out_done(gpointer user_data) { svc_action_t* op = (svc_action_t *) user_data; crm_trace("%p", op); op->opaque->stdout_gsource = NULL; if (op->opaque->stdout_fd > STDOUT_FILENO) { close(op->opaque->stdout_fd); } op->opaque->stdout_fd = -1; } static void pipe_err_done(gpointer user_data) { svc_action_t* op = (svc_action_t *) user_data; op->opaque->stderr_gsource = NULL; if (op->opaque->stderr_fd > STDERR_FILENO) { close(op->opaque->stderr_fd); } op->opaque->stderr_fd = -1; } static struct mainloop_fd_callbacks stdout_callbacks = { .dispatch = dispatch_stdout, .destroy = pipe_out_done, }; static struct mainloop_fd_callbacks stderr_callbacks = { .dispatch = dispatch_stderr, .destroy = pipe_err_done, }; static void set_ocf_env(const char *key, const char *value, gpointer user_data) { if (setenv(key, value, 1) != 0) { crm_perror(LOG_ERR, "setenv failed for key:%s and value:%s", key, value); } } static void set_ocf_env_with_prefix(gpointer key, gpointer value, gpointer user_data) { char buffer[500]; snprintf(buffer, sizeof(buffer), "OCF_RESKEY_%s", (char *) key); set_ocf_env(buffer, value, user_data); } static void add_OCF_env_vars(svc_action_t *op) { if (!op->standard || strcasecmp("ocf", op->standard) != 0) { return; } if (op->params) { g_hash_table_foreach(op->params, set_ocf_env_with_prefix, NULL); } set_ocf_env("OCF_RA_VERSION_MAJOR", "1", NULL); set_ocf_env("OCF_RA_VERSION_MINOR", "0", NULL); set_ocf_env("OCF_ROOT", OCF_ROOT_DIR, NULL); if (op->rsc) { set_ocf_env("OCF_RESOURCE_INSTANCE", op->rsc, NULL); } if (op->agent != NULL) { set_ocf_env("OCF_RESOURCE_TYPE", op->agent, NULL); } /* Notes: this is not added to specification yet. Sept 10,2004 */ if (op->provider != NULL) { set_ocf_env("OCF_RESOURCE_PROVIDER", op->provider, NULL); } } gboolean recurring_action_timer(gpointer data) { svc_action_t *op = data; crm_debug("Scheduling another invokation of %s", op->id); /* Clean out the old result */ free(op->stdout_data); op->stdout_data = NULL; free(op->stderr_data); op->stderr_data = NULL; services_action_async(op, NULL); return FALSE; } void operation_finalize(svc_action_t *op) { int recurring = 0; if (op->interval) { if (op->cancel) { op->status = PCMK_LRM_OP_CANCELLED; cancel_recurring_action(op); } else { recurring = 1; op->opaque->repeat_timer = g_timeout_add(op->interval, recurring_action_timer, (void *) op); } } if (op->opaque->callback) { op->opaque->callback(op); } if (!recurring) { /* * If this is a recurring action, do not free explicitly. * It will get freed whenever the action gets cancelled. */ services_action_free(op); } } static void operation_finished(mainloop_child_t *p, int status, int signo, int exitcode) { - char *next = NULL; - char *offset = NULL; svc_action_t *op = mainloop_get_child_userdata(p); pid_t pid = mainloop_get_child_pid(p); + char *prefix = g_strdup_printf("%s:%d", op->id, op->pid); mainloop_clear_child_userdata(p); op->status = PCMK_LRM_OP_DONE; CRM_ASSERT(op->pid == pid); if (op->opaque->stderr_gsource) { /* Make sure we have read everything from the buffer. * Depending on the priority mainloop gives the fd, operation_finished * could occur before all the reads are done. Force the read now.*/ dispatch_stderr(op); } if (op->opaque->stdout_gsource) { /* Make sure we have read everything from the buffer. * Depending on the priority mainloop gives the fd, operation_finished * could occur before all the reads are done. Force the read now.*/ dispatch_stdout(op); } if (signo) { if (mainloop_get_child_timeout(p)) { - crm_warn("%s:%d - timed out after %dms", op->id, op->pid, + crm_warn("%s - timed out after %dms", prefix, op->timeout); op->status = PCMK_LRM_OP_TIMEOUT; op->rc = PCMK_OCF_TIMEOUT; } else { - crm_warn("%s:%d - terminated with signal %d", op->id, op->pid, + crm_warn("%s - terminated with signal %d", prefix, signo); op->status = PCMK_LRM_OP_ERROR; op->rc = PCMK_OCF_SIGNAL; } } else { op->rc = exitcode; - crm_debug("%s:%d - exited with rc=%d", op->id, op->pid, exitcode); - - if (op->stdout_data) { - next = op->stdout_data; - do { - offset = next; - next = strchrnul(offset, '\n'); - crm_debug("%s:%d [ %.*s ]", op->id, op->pid, - (int) (next - offset), offset); - if (next[0] != 0) { - next++; - } - - } while (next != NULL && next[0] != 0); - } - - if (op->stderr_data) { - next = op->stderr_data; - do { - offset = next; - next = strchrnul(offset, '\n'); - crm_notice("%s:%d [ %.*s ]", op->id, op->pid, - (int) (next - offset), offset); - if (next[0] != 0) { - next++; - } - - } while (next != NULL && next[0] != 0); - } + crm_debug("%s - exited with rc=%d", prefix, exitcode); } + crm_log_output(LOG_NOTICE, prefix, op->stderr_data); + crm_log_output(LOG_DEBUG, prefix, op->stdout_data); + op->pid = 0; + g_free(prefix); operation_finalize(op); } gboolean services_os_action_execute(svc_action_t* op, gboolean synchronous) { int rc, lpc; int stdout_fd[2]; int stderr_fd[2]; if (pipe(stdout_fd) < 0) { crm_err( "pipe() failed"); } if (pipe(stderr_fd) < 0) { crm_err( "pipe() failed"); } op->pid = fork(); switch (op->pid) { case -1: crm_err( "fork() failed"); close(stdout_fd[0]); close(stdout_fd[1]); close(stderr_fd[0]); close(stderr_fd[1]); return FALSE; case 0: /* Child */ /* Man: The call setpgrp() is equivalent to setpgid(0,0) * _and_ compiles on BSD variants too * need to investigate if it works the same too. */ setpgid(0, 0); close(stdout_fd[0]); close(stderr_fd[0]); if (STDOUT_FILENO != stdout_fd[1]) { if (dup2(stdout_fd[1], STDOUT_FILENO) != STDOUT_FILENO) { crm_err( "dup2() failed (stdout)"); } close(stdout_fd[1]); } if (STDERR_FILENO != stderr_fd[1]) { if (dup2(stderr_fd[1], STDERR_FILENO) != STDERR_FILENO) { crm_err( "dup2() failed (stderr)"); } close(stderr_fd[1]); } /* close all descriptors except stdin/out/err and channels to logd */ for (lpc = getdtablesize() - 1; lpc > STDERR_FILENO; lpc--) { close(lpc); } /* Setup environment correctly */ add_OCF_env_vars(op); /* execute the RA */ execvp(op->opaque->exec, op->opaque->args); switch (errno) { /* see execve(2) */ case ENOENT: /* No such file or directory */ case EISDIR: /* Is a directory */ rc = PCMK_OCF_NOT_INSTALLED; break; case EACCES: /* permission denied (various errors) */ rc = PCMK_OCF_INSUFFICIENT_PRIV; break; default: rc = PCMK_OCF_UNKNOWN_ERROR; break; } _exit(rc); } /* Only the parent reaches here */ close(stdout_fd[1]); close(stderr_fd[1]); op->opaque->stdout_fd = stdout_fd[0]; set_fd_opts(op->opaque->stdout_fd, O_NONBLOCK); op->opaque->stderr_fd = stderr_fd[0]; set_fd_opts(op->opaque->stderr_fd, O_NONBLOCK); if (synchronous) { int status = 0; int timeout = (1 + op->timeout) / 1000; crm_trace("Waiting for %d", op->pid); while ((op->timeout < 0 || timeout > 0) && waitpid(op->pid, &status, WNOHANG) <= 0) { sleep(1); read_output(op->opaque->stdout_fd, op); read_output(op->opaque->stderr_fd, op); timeout--; } crm_trace("Child done: %d", op->pid); if (timeout == 0) { int killrc = kill(op->pid, 9 /*SIGKILL*/); op->rc = PCMK_OCF_UNKNOWN_ERROR; op->status = PCMK_LRM_OP_TIMEOUT; crm_warn("%s:%d - timed out after %dms", op->id, op->pid, op->timeout); if (killrc && errno != ESRCH) { crm_err("kill(%d, KILL) failed: %d", op->pid, errno); } } else if (WIFEXITED(status)) { op->status = PCMK_LRM_OP_DONE; op->rc = WEXITSTATUS(status); crm_info("Managed %s process %d exited with rc=%d", op->id, op->pid, op->rc); } else if (WIFSIGNALED(status)) { int signo = WTERMSIG(status); op->status = PCMK_LRM_OP_ERROR; crm_err("Managed %s process %d exited with signal=%d", op->id, op->pid, signo); } #ifdef WCOREDUMP if (WCOREDUMP(status)) { crm_err("Managed %s process %d dumped core", op->id, op->pid); } #endif read_output(op->opaque->stdout_fd, op); read_output(op->opaque->stderr_fd, op); } else { crm_trace("Async waiting for %d - %s", op->pid, op->opaque->exec); mainloop_add_child(op->pid, op->timeout, op->id, op, operation_finished); op->opaque->stdout_gsource = mainloop_add_fd(op->id, G_PRIORITY_LOW, op->opaque->stdout_fd, op, &stdout_callbacks); op->opaque->stderr_gsource = mainloop_add_fd(op->id, G_PRIORITY_LOW, op->opaque->stderr_fd, op, &stderr_callbacks); } return TRUE; } GList * services_os_get_directory_list(const char *root, gboolean files) { GList *list = NULL; struct dirent **namelist; int entries = 0, lpc = 0; char buffer[PATH_MAX]; entries = scandir(root, &namelist, NULL, alphasort); if (entries <= 0) { return list; } for (lpc = 0; lpc < entries; lpc++) { struct stat sb; if ('.' == namelist[lpc]->d_name[0]) { free(namelist[lpc]); continue; } snprintf(buffer, sizeof(buffer), "%s/%s", root, namelist[lpc]->d_name); if (stat(buffer, &sb)) { continue; } if (S_ISDIR(sb.st_mode)) { if (files) { free(namelist[lpc]); continue; } } else if (S_ISREG(sb.st_mode)) { if (files == FALSE) { free(namelist[lpc]); continue; } else if ((sb.st_mode & S_IXUSR) == 0 && (sb.st_mode & S_IXGRP) == 0 && (sb.st_mode & S_IXOTH) == 0) { free(namelist[lpc]); continue; } } list = g_list_append(list, strdup(namelist[lpc]->d_name)); free(namelist[lpc]); } free(namelist); return list; } GList * resources_os_list_lsb_agents(void) { return get_directory_list(LSB_ROOT_DIR, TRUE); } GList * resources_os_list_ocf_providers(void) { return get_directory_list(OCF_ROOT_DIR "/resource.d", FALSE); } GList * resources_os_list_ocf_agents(const char *provider) { GList *gIter = NULL; GList *result = NULL; GList *providers = NULL; if (provider) { char buffer[500]; snprintf(buffer, sizeof(buffer), "%s/resource.d/%s", OCF_ROOT_DIR, provider); return get_directory_list(buffer, TRUE); } providers = resources_os_list_ocf_providers(); for (gIter = providers; gIter != NULL; gIter = gIter->next) { GList *tmp1 = result; GList *tmp2 = resources_os_list_ocf_agents(gIter->data); if(tmp2) { result = g_list_concat(tmp1, tmp2); } } g_list_free_full(providers, free); return result; }