diff --git a/cib/callbacks.c b/cib/callbacks.c index 528dfe9c46..369b691245 100644 --- a/cib/callbacks.c +++ b/cib/callbacks.c @@ -1,1445 +1,1445 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include -#include +#include #include #include #include #include #include #include "common.h" extern GMainLoop *mainloop; extern gboolean cib_shutdown_flag; extern gboolean stand_alone; extern const char *cib_root; static unsigned long cib_local_bcast_num = 0; typedef struct cib_local_notify_s { xmlNode *notify_src; char *client_id; gboolean from_peer; gboolean sync_reply; } cib_local_notify_t; qb_ipcs_service_t *ipcs_ro = NULL; qb_ipcs_service_t *ipcs_rw = NULL; qb_ipcs_service_t *ipcs_shm = NULL; #if SUPPORT_HEARTBEAT extern ll_cluster_t *hb_conn; #endif extern int cib_update_counter(xmlNode * xml_obj, const char *field, gboolean reset); extern void GHFunc_count_peers(gpointer key, gpointer value, gpointer user_data); gint cib_GCompareFunc(gconstpointer a, gconstpointer b); gboolean can_write(int flags); void send_cib_replace(const xmlNode * sync_request, const char *host); void cib_process_request(xmlNode * request, gboolean privileged, gboolean force_synchronous, gboolean from_peer, cib_client_t * cib_client); extern GHashTable *client_list; extern GHashTable *local_notify_queue; int next_client_id = 0; extern const char *cib_our_uname; extern unsigned long cib_num_ops, cib_num_local, cib_num_updates, cib_num_fail; extern unsigned long cib_bad_connects, cib_num_timeouts; extern int cib_status; int cib_process_command(xmlNode * request, xmlNode ** reply, xmlNode ** cib_diff, gboolean privileged); gboolean cib_common_callback(qb_ipcs_connection_t *c, void *data, size_t size, gboolean privileged); static int32_t cib_ipc_accept(qb_ipcs_connection_t *c, uid_t uid, gid_t gid) { cib_client_t *new_client = NULL; #if ENABLE_ACL struct group *crm_grp = NULL; #endif crm_trace("Connecting %p for uid=%d gid=%d pid=%d", c, uid, gid, crm_ipcs_client_pid(c)); if (cib_shutdown_flag) { crm_info("Ignoring new client [%d] during shutdown", crm_ipcs_client_pid(c)); return -EPERM; } new_client = calloc(1, sizeof(cib_client_t)); new_client->ipc = c; CRM_CHECK(new_client->id == NULL, free(new_client->id)); new_client->id = crm_generate_uuid(); #if ENABLE_ACL crm_grp = getgrnam(CRM_DAEMON_GROUP); if (crm_grp) { qb_ipcs_connection_auth_set(c, -1, crm_grp->gr_gid, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); } new_client->user = uid2username(uid); #endif /* make sure we can find ourselves later for sync calls * redirected to the master instance */ g_hash_table_insert(client_list, new_client->id, new_client); qb_ipcs_context_set(c, new_client); return 0; } static void cib_ipc_created(qb_ipcs_connection_t *c) { cib_client_t *cib_client = qb_ipcs_context_get(c); crm_trace("%p connected for client %s", c, cib_client->id); } static int32_t cib_ipc_dispatch_rw(qb_ipcs_connection_t *c, void *data, size_t size) { cib_client_t *cib_client = qb_ipcs_context_get(c); crm_trace("%p message from %s", c, cib_client->id); return cib_common_callback(c, data, size, TRUE); } static int32_t cib_ipc_dispatch_ro(qb_ipcs_connection_t *c, void *data, size_t size) { cib_client_t *cib_client = qb_ipcs_context_get(c); crm_trace("%p message from %s", c, cib_client->id); return cib_common_callback(c, data, size, FALSE); } /* Error code means? */ static int32_t cib_ipc_closed(qb_ipcs_connection_t *c) { cib_client_t *cib_client = qb_ipcs_context_get(c); crm_trace("Connection %p closed", c); CRM_ASSERT(cib_client != NULL); CRM_ASSERT(cib_client->id != NULL); if (!g_hash_table_remove(client_list, cib_client->id)) { crm_err("Client %s not found in the hashtable", cib_client->name); } return 0; } static void cib_ipc_destroy(qb_ipcs_connection_t *c) { cib_client_t *cib_client = qb_ipcs_context_get(c); CRM_ASSERT(cib_client != NULL); CRM_ASSERT(cib_client->id != NULL); /* In case we arrive here without a call to cib_ipc_close() */ g_hash_table_remove(client_list, cib_client->id); crm_trace("Destroying %s (%p)", cib_client->name, c); free(cib_client->name); free(cib_client->callback_id); free(cib_client->id); free(cib_client->user); free(cib_client); crm_trace("Freed the cib client"); if (cib_shutdown_flag) { cib_shutdown(0); } } struct qb_ipcs_service_handlers ipc_ro_callbacks = { .connection_accept = cib_ipc_accept, .connection_created = cib_ipc_created, .msg_process = cib_ipc_dispatch_ro, .connection_closed = cib_ipc_closed, .connection_destroyed = cib_ipc_destroy }; struct qb_ipcs_service_handlers ipc_rw_callbacks = { .connection_accept = cib_ipc_accept, .connection_created = cib_ipc_created, .msg_process = cib_ipc_dispatch_rw, .connection_closed = cib_ipc_closed, .connection_destroyed = cib_ipc_destroy }; void cib_common_callback_worker(xmlNode * op_request, cib_client_t * cib_client, gboolean privileged) { const char *op = crm_element_value(op_request, F_CIB_OPERATION); if (crm_str_eq(op, CRM_OP_REGISTER, TRUE)) { xmlNode *ack = create_xml_node(NULL, __FUNCTION__); crm_xml_add(ack, F_CIB_OPERATION, CRM_OP_REGISTER); crm_xml_add(ack, F_CIB_CLIENTID, cib_client->id); crm_ipcs_send(cib_client->ipc, ack, FALSE); free_xml(ack); return; } else if (crm_str_eq(op, T_CIB_NOTIFY, TRUE)) { /* Update the notify filters for this client */ int on_off = 0; int rc = pcmk_ok; const char *type = crm_element_value(op_request, F_CIB_NOTIFY_TYPE); crm_element_value_int(op_request, F_CIB_NOTIFY_ACTIVATE, &on_off); crm_debug("Setting %s callbacks for %s (%s): %s", type, cib_client->name, cib_client->id, on_off ? "on" : "off"); if (safe_str_eq(type, T_CIB_POST_NOTIFY)) { cib_client->post_notify = on_off; } else if (safe_str_eq(type, T_CIB_PRE_NOTIFY)) { cib_client->pre_notify = on_off; } else if (safe_str_eq(type, T_CIB_UPDATE_CONFIRM)) { cib_client->confirmations = on_off; } else if (safe_str_eq(type, T_CIB_DIFF_NOTIFY)) { cib_client->diffs = on_off; } else if (safe_str_eq(type, T_CIB_REPLACE_NOTIFY)) { cib_client->replace = on_off; } else { rc = -ENXIO; } /* Already ack'd */ return; } cib_client->num_calls++; cib_process_request(op_request, FALSE, privileged, FALSE, cib_client); } int32_t cib_common_callback(qb_ipcs_connection_t *c, void *data, size_t size, gboolean privileged) { int call_options = 0; const char *op = NULL; const char *call = NULL; xmlNode *op_request = crm_ipcs_recv(c, data, size); cib_client_t *cib_client = qb_ipcs_context_get(c); if(op_request) { op = crm_element_value(op_request, F_CIB_OPERATION); call = crm_element_value(op_request, F_CIB_CALLID); crm_element_value_int(op_request, F_CIB_CALLOPTS, &call_options); } crm_trace("Inbound: %.200s", data); if (op_request == NULL || cib_client == NULL) { xmlNode *ack = create_xml_node(NULL, "nack"); crm_trace("Sending nack to %p", cib_client); crm_xml_add(ack, F_CIB_CALLID, call); crm_xml_add(ack, F_CIB_OPERATION, op); crm_xml_add(ack, XML_ATTR_ORIGIN, __FUNCTION__); crm_ipcs_send(c, ack, FALSE); free_xml(ack); return 0; } else if((call_options & cib_sync_call) == 0) { xmlNode *ack = create_xml_node(NULL, "ack"); crm_trace("Sending a-sync ack to %p", cib_client); crm_xml_add(ack, F_CIB_CALLID, call); crm_xml_add(ack, F_CIB_OPERATION, op); crm_xml_add(ack, XML_ATTR_ORIGIN, __FUNCTION__); crm_ipcs_send(c, ack, FALSE); free_xml(ack); } if (cib_client->name == NULL) { const char *value = crm_element_value(op_request, F_CIB_CLIENTNAME); if (value == NULL) { cib_client->name = crm_itoa(crm_ipcs_client_pid(c)); } else { cib_client->name = strdup(value); } } if (cib_client->callback_id == NULL) { const char *value = crm_element_value(op_request, F_CIB_CALLBACK_TOKEN); if (value != NULL) { cib_client->callback_id = strdup(value); } else { cib_client->callback_id = strdup(cib_client->id); } } crm_xml_add(op_request, F_CIB_CLIENTID, cib_client->id); crm_xml_add(op_request, F_CIB_CLIENTNAME, cib_client->name); #if ENABLE_ACL determine_request_user(cib_client->user, op_request, F_CIB_USER); #endif crm_log_xml_trace(op_request, "Client[inbound]"); cib_common_callback_worker(op_request, cib_client, privileged); free_xml(op_request); return 0; } static void do_local_notify(xmlNode * notify_src, const char *client_id, gboolean sync_reply, gboolean from_peer) { /* send callback to originating child */ cib_client_t *client_obj = NULL; int local_rc = pcmk_ok; if (client_id != NULL) { client_obj = g_hash_table_lookup(client_list, client_id); } else { crm_trace("No client to sent the response to. F_CIB_CLIENTID not set."); } if (client_obj == NULL) { local_rc = -ECONNRESET; } else { crm_trace("Sending %ssync response to %s %s", sync_reply ? "" : "an a-", client_obj->name, from_peer ? "(originator of delegated request)" : ""); if (client_obj->ipc && crm_ipcs_send(client_obj->ipc, notify_src, !sync_reply) < 0) { local_rc = -ENOMSG; #ifdef HAVE_GNUTLS_GNUTLS_H } else if (client_obj->session) { crm_send_remote_msg(client_obj->session, notify_src, client_obj->encrypted); #endif } else if(client_obj->ipc == NULL) { crm_err("Unknown transport for %s", client_obj->name); } } if (local_rc != pcmk_ok && client_obj != NULL) { crm_warn("%sSync reply to %s failed: %s", sync_reply ? "" : "A-", client_obj ? client_obj->name : "", pcmk_strerror(local_rc)); } } static void local_notify_destroy_callback(gpointer data) { cib_local_notify_t *notify = data; free_xml(notify->notify_src); free(notify->client_id); free(notify); } static void check_local_notify(int bcast_id) { cib_local_notify_t *notify = NULL; if (!local_notify_queue) { return; } notify = g_hash_table_lookup(local_notify_queue, GINT_TO_POINTER(bcast_id)); if (notify) { do_local_notify(notify->notify_src, notify->client_id, notify->sync_reply, notify->from_peer); g_hash_table_remove(local_notify_queue, GINT_TO_POINTER(bcast_id)); } } static void queue_local_notify(xmlNode * notify_src, const char *client_id, gboolean sync_reply, gboolean from_peer) { cib_local_notify_t *notify = calloc(1, sizeof(cib_local_notify_t)); notify->notify_src = notify_src; notify->client_id = strdup(client_id); notify->sync_reply = sync_reply; notify->from_peer = from_peer; if (!local_notify_queue) { local_notify_queue = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, local_notify_destroy_callback); } g_hash_table_insert(local_notify_queue, GINT_TO_POINTER(cib_local_bcast_num), notify); } static void parse_local_options(cib_client_t * cib_client, int call_type, int call_options, const char *host, const char *op, gboolean * local_notify, gboolean * needs_reply, gboolean * process, gboolean * needs_forward) { if (cib_op_modifies(call_type) && !(call_options & cib_inhibit_bcast)) { /* we need to send an update anyway */ *needs_reply = TRUE; } else { *needs_reply = FALSE; } if (host == NULL && (call_options & cib_scope_local)) { crm_trace("Processing locally scoped %s op from %s", op, cib_client->name); *local_notify = TRUE; } else if (host == NULL && cib_is_master) { crm_trace("Processing master %s op locally from %s", op, cib_client->name); *local_notify = TRUE; } else if (safe_str_eq(host, cib_our_uname)) { crm_trace("Processing locally addressed %s op from %s", op, cib_client->name); *local_notify = TRUE; } else if (stand_alone) { *needs_forward = FALSE; *local_notify = TRUE; *process = TRUE; } else { crm_trace("%s op from %s needs to be forwarded to %s", op, cib_client->name, host ? host : "the master instance"); *needs_forward = TRUE; *process = FALSE; } } static gboolean parse_peer_options(int call_type, xmlNode * request, gboolean * local_notify, gboolean * needs_reply, gboolean * process, gboolean * needs_forward) { const char *op = NULL; const char *host = NULL; const char *delegated = NULL; const char *originator = crm_element_value(request, F_ORIG); const char *reply_to = crm_element_value(request, F_CIB_ISREPLY); const char *update = crm_element_value(request, F_CIB_GLOBAL_UPDATE); gboolean is_reply = safe_str_eq(reply_to, cib_our_uname); if (crm_is_true(update)) { *needs_reply = FALSE; if (is_reply) { *local_notify = TRUE; crm_trace("Processing global/peer update from %s" " that originated from us", originator); } else { crm_trace("Processing global/peer update from %s", originator); } return TRUE; } host = crm_element_value(request, F_CIB_HOST); if (host != NULL && safe_str_eq(host, cib_our_uname)) { crm_trace("Processing request sent to us from %s", originator); return TRUE; } else if (host == NULL && cib_is_master == TRUE) { crm_trace("Processing request sent to master instance from %s", originator); return TRUE; } op = crm_element_value(request, F_CIB_OPERATION); if(safe_str_eq(op, "cib_shutdown_req")) { /* Always process these */ *local_notify = FALSE; if(reply_to == NULL || is_reply) { *process = TRUE; } if(is_reply) { *needs_reply = FALSE; } return *process; } if (is_reply) { crm_trace("Forward reply sent from %s to local clients", originator); *process = FALSE; *needs_reply = FALSE; *local_notify = TRUE; return TRUE; } delegated = crm_element_value(request, F_CIB_DELEGATED); if (delegated != NULL) { crm_trace("Ignoring msg for master instance"); } else if (host != NULL) { /* this is for a specific instance and we're not it */ crm_trace("Ignoring msg for instance on %s", crm_str(host)); } else if (reply_to == NULL && cib_is_master == FALSE) { /* this is for the master instance and we're not it */ crm_trace("Ignoring reply to %s", crm_str(reply_to)); } else if (safe_str_eq(op, "cib_shutdown_req")) { if (reply_to != NULL) { crm_debug("Processing %s from %s", op, host); *needs_reply = FALSE; } else { crm_debug("Processing %s reply from %s", op, host); } return TRUE; } else { crm_err("Nothing for us to do?"); crm_log_xml_err(request, "Peer[inbound]"); } return FALSE; } static void forward_request(xmlNode * request, cib_client_t * cib_client, int call_options) { const char *op = crm_element_value(request, F_CIB_OPERATION); const char *host = crm_element_value(request, F_CIB_HOST); crm_xml_add(request, F_CIB_DELEGATED, cib_our_uname); if (host != NULL) { crm_trace("Forwarding %s op to %s", op, host); send_cluster_message(host, crm_msg_cib, request, FALSE); } else { crm_trace("Forwarding %s op to master instance", op); send_cluster_message(NULL, crm_msg_cib, request, FALSE); } /* Return the request to its original state */ xml_remove_prop(request, F_CIB_DELEGATED); if (call_options & cib_discard_reply) { crm_trace("Client not interested in reply"); } } static gboolean send_peer_reply(xmlNode * msg, xmlNode * result_diff, const char *originator, gboolean broadcast) { CRM_ASSERT(msg != NULL); if (broadcast) { /* this (successful) call modified the CIB _and_ the * change needs to be broadcast... * send via HA to other nodes */ int diff_add_updates = 0; int diff_add_epoch = 0; int diff_add_admin_epoch = 0; int diff_del_updates = 0; int diff_del_epoch = 0; int diff_del_admin_epoch = 0; char *digest = NULL; cib_diff_version_details(result_diff, &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates); crm_trace("Sending update diff %d.%d.%d -> %d.%d.%d", diff_del_admin_epoch, diff_del_epoch, diff_del_updates, diff_add_admin_epoch, diff_add_epoch, diff_add_updates); crm_xml_add(msg, F_CIB_ISREPLY, originator); crm_xml_add(msg, F_CIB_GLOBAL_UPDATE, XML_BOOLEAN_TRUE); crm_xml_add(msg, F_CIB_OPERATION, CIB_OP_APPLY_DIFF); /* Its safe to always use the latest version since the election * ensures the software on this node is the oldest node in the cluster */ digest = calculate_xml_versioned_digest(the_cib, FALSE, TRUE, CRM_FEATURE_SET); crm_xml_add(result_diff, XML_ATTR_DIGEST, digest); crm_log_xml_trace(the_cib, digest); free(digest); add_message_xml(msg, F_CIB_UPDATE_DIFF, result_diff); crm_log_xml_trace(msg, "copy"); return send_cluster_message(NULL, crm_msg_cib, msg, TRUE); } else if (originator != NULL) { /* send reply via HA to originating node */ crm_trace("Sending request result to originator only"); crm_xml_add(msg, F_CIB_ISREPLY, originator); return send_cluster_message(originator, crm_msg_cib, msg, FALSE); } return FALSE; } void cib_process_request(xmlNode * request, gboolean force_synchronous, gboolean privileged, gboolean from_peer, cib_client_t * cib_client) { int call_type = 0; int call_options = 0; gboolean process = TRUE; gboolean is_update = TRUE; gboolean needs_reply = TRUE; gboolean local_notify = FALSE; gboolean needs_forward = FALSE; gboolean global_update = crm_is_true(crm_element_value(request, F_CIB_GLOBAL_UPDATE)); xmlNode *op_reply = NULL; xmlNode *result_diff = NULL; int rc = pcmk_ok; const char *op = crm_element_value(request, F_CIB_OPERATION); const char *originator = crm_element_value(request, F_ORIG); const char *host = crm_element_value(request, F_CIB_HOST); const char *client_id = crm_element_value(request, F_CIB_CLIENTID); crm_trace("%s Processing msg %s", cib_our_uname, crm_element_value(request, F_SEQ)); cib_num_ops++; if (cib_num_ops == 0) { cib_num_fail = 0; cib_num_local = 0; cib_num_updates = 0; crm_info("Stats wrapped around"); } if (host != NULL && strlen(host) == 0) { host = NULL; } crm_element_value_int(request, F_CIB_CALLOPTS, &call_options); if (force_synchronous) { call_options |= cib_sync_call; } crm_trace("Processing %s message (%s) for %s...", from_peer ? "peer" : "local", from_peer ? originator : cib_our_uname, host ? host : "master"); rc = cib_get_operation_id(op, &call_type); if (rc != pcmk_ok) { /* TODO: construct error reply? */ crm_err("Pre-processing of command failed: %s", pcmk_strerror(rc)); return; } is_update = cib_op_modifies(call_type); if (is_update) { cib_num_updates++; } if (from_peer == FALSE) { parse_local_options(cib_client, call_type, call_options, host, op, &local_notify, &needs_reply, &process, &needs_forward); } else if (parse_peer_options(call_type, request, &local_notify, &needs_reply, &process, &needs_forward) == FALSE) { return; } crm_trace("Finished determining processing actions"); if (call_options & cib_discard_reply) { needs_reply = is_update; local_notify = FALSE; } if (needs_forward) { forward_request(request, cib_client, call_options); return; } if (cib_status != pcmk_ok) { rc = cib_status; crm_err("Operation ignored, cluster configuration is invalid." " Please repair and restart: %s", pcmk_strerror(cib_status)); op_reply = cib_construct_reply(request, the_cib, cib_status); } else if (process) { int level = LOG_INFO; const char *section = crm_element_value(request, F_CIB_SECTION); cib_num_local++; rc = cib_process_command(request, &op_reply, &result_diff, privileged); if (global_update) { switch (rc) { case pcmk_ok: case -pcmk_err_old_data: case -pcmk_err_diff_resync: case -pcmk_err_diff_failed: level = LOG_DEBUG_2; break; default: level = LOG_ERR; } } else if (safe_str_eq(op, CIB_OP_QUERY)) { level = LOG_DEBUG_2; } else if (rc != pcmk_ok) { cib_num_fail++; level = LOG_WARNING; } else if (safe_str_eq(op, CIB_OP_SLAVE)) { level = LOG_DEBUG_2; } else if (safe_str_eq(section, XML_CIB_TAG_STATUS)) { level = LOG_DEBUG_2; } do_crm_log_unlikely(level, "Operation complete: op %s for section %s (origin=%s/%s/%s, version=%s.%s.%s): %s (rc=%d)", op, section ? section : "'all'", originator ? originator : "local", crm_element_value(request, F_CIB_CLIENTNAME), crm_element_value(request, F_CIB_CALLID), the_cib ? crm_element_value(the_cib, XML_ATTR_GENERATION_ADMIN) : "0", the_cib ? crm_element_value(the_cib, XML_ATTR_GENERATION) : "0", the_cib ? crm_element_value(the_cib, XML_ATTR_NUMUPDATES) : "0", pcmk_strerror(rc), rc); if (op_reply == NULL && (needs_reply || local_notify)) { crm_err("Unexpected NULL reply to message"); crm_log_xml_err(request, "null reply"); needs_reply = FALSE; local_notify = FALSE; } } crm_trace("processing response cases %.16x %.16x", call_options, cib_sync_call); /* from now on we are the server */ if (needs_reply == FALSE || stand_alone) { /* nothing more to do... * this was a non-originating slave update */ crm_trace("Completed slave update"); } else if (rc == pcmk_ok && result_diff != NULL && !(call_options & cib_inhibit_bcast)) { gboolean broadcast = FALSE; cib_local_bcast_num++; crm_xml_add_int(request, F_CIB_LOCAL_NOTIFY_ID, cib_local_bcast_num); broadcast = send_peer_reply(request, result_diff, originator, TRUE); if (broadcast && client_id && local_notify && op_reply) { /* If we have been asked to sync the reply, * and a bcast msg has gone out, we queue the local notify * until we know the bcast message has been received */ local_notify = FALSE; queue_local_notify(op_reply, client_id, (call_options & cib_sync_call), from_peer); op_reply = NULL; /* the reply is queued, so don't free here */ } } else if (call_options & cib_discard_reply) { crm_trace("Caller isn't interested in reply"); } else if (from_peer) { if (is_update == FALSE || result_diff == NULL) { crm_trace("Request not broadcast: R/O call"); } else if (call_options & cib_inhibit_bcast) { crm_trace("Request not broadcast: inhibited"); } else if (rc != pcmk_ok) { crm_trace("Request not broadcast: call failed: %s", pcmk_strerror(rc)); } else { crm_trace("Directing reply to %s", originator); } send_peer_reply(op_reply, result_diff, originator, FALSE); } if (local_notify && client_id) { if (process == FALSE) { do_local_notify(request, client_id, call_options & cib_sync_call, from_peer); } else { do_local_notify(op_reply, client_id, call_options & cib_sync_call, from_peer); } } free_xml(op_reply); free_xml(result_diff); return; } xmlNode * cib_construct_reply(xmlNode * request, xmlNode * output, int rc) { int lpc = 0; xmlNode *reply = NULL; const char *name = NULL; const char *value = NULL; const char *names[] = { F_CIB_OPERATION, F_CIB_CALLID, F_CIB_CLIENTID, F_CIB_CALLOPTS }; static int max = DIMOF(names); crm_trace("Creating a basic reply"); reply = create_xml_node(NULL, "cib-reply"); crm_xml_add(reply, F_TYPE, T_CIB); for (lpc = 0; lpc < max; lpc++) { name = names[lpc]; value = crm_element_value(request, name); crm_xml_add(reply, name, value); } crm_xml_add_int(reply, F_CIB_RC, rc); if (output != NULL) { crm_trace("Attaching reply output"); add_message_xml(reply, F_CIB_CALLDATA, output); } return reply; } int cib_process_command(xmlNode * request, xmlNode ** reply, xmlNode ** cib_diff, gboolean privileged) { xmlNode *input = NULL; xmlNode *output = NULL; xmlNode *result_cib = NULL; xmlNode *current_cib = NULL; #if ENABLE_ACL xmlNode *filtered_current_cib = NULL; #endif int call_type = 0; int call_options = 0; int log_level = LOG_DEBUG_4; const char *op = NULL; const char *section = NULL; int rc = pcmk_ok; int rc2 = pcmk_ok; gboolean send_r_notify = FALSE; gboolean global_update = FALSE; gboolean config_changed = FALSE; gboolean manage_counters = TRUE; CRM_ASSERT(cib_status == pcmk_ok); *reply = NULL; *cib_diff = NULL; current_cib = the_cib; /* Start processing the request... */ op = crm_element_value(request, F_CIB_OPERATION); crm_element_value_int(request, F_CIB_CALLOPTS, &call_options); rc = cib_get_operation_id(op, &call_type); if (rc == pcmk_ok && privileged == FALSE) { rc = cib_op_can_run(call_type, call_options, privileged, global_update); } rc2 = cib_op_prepare(call_type, request, &input, §ion); if (rc == pcmk_ok) { rc = rc2; } if (rc != pcmk_ok) { crm_trace("Call setup failed: %s", pcmk_strerror(rc)); goto done; } else if (cib_op_modifies(call_type) == FALSE) { #if ENABLE_ACL if (acl_enabled(config_hash) == FALSE || acl_filter_cib(request, current_cib, current_cib, &filtered_current_cib) == FALSE) { rc = cib_perform_op(op, call_options, cib_op_func(call_type), TRUE, section, request, input, FALSE, &config_changed, current_cib, &result_cib, NULL, &output); } else if (filtered_current_cib == NULL) { crm_debug("Pre-filtered the entire cib"); rc = -EACCES; } else { crm_debug("Pre-filtered the queried cib according to the ACLs"); rc = cib_perform_op(op, call_options, cib_op_func(call_type), TRUE, section, request, input, FALSE, &config_changed, filtered_current_cib, &result_cib, NULL, &output); } #else rc = cib_perform_op(op, call_options, cib_op_func(call_type), TRUE, section, request, input, FALSE, &config_changed, current_cib, &result_cib, NULL, &output); #endif CRM_CHECK(result_cib == NULL, free_xml(result_cib)); goto done; } /* Handle a valid write action */ global_update = crm_is_true(crm_element_value(request, F_CIB_GLOBAL_UPDATE)); if (global_update) { manage_counters = FALSE; call_options |= cib_force_diff; CRM_CHECK(call_type == 3 || call_type == 4, crm_err("Call type: %d", call_type); crm_log_xml_err(request, "bad op")); } #ifdef SUPPORT_PRENOTIFY if ((call_options & cib_inhibit_notify) == 0) { cib_pre_notify(call_options, op, the_cib, input); } #endif if (rc == pcmk_ok) { if (call_options & cib_inhibit_bcast) { /* skip */ crm_trace("Skipping update: inhibit broadcast"); manage_counters = FALSE; } rc = cib_perform_op(op, call_options, cib_op_func(call_type), FALSE, section, request, input, manage_counters, &config_changed, current_cib, &result_cib, cib_diff, &output); #if ENABLE_ACL if (acl_enabled(config_hash) == TRUE && acl_check_diff(request, current_cib, result_cib, *cib_diff) == FALSE) { rc = -EACCES; } #endif if (rc == pcmk_ok && config_changed) { time_t now; char *now_str = NULL; const char *validation = crm_element_value(result_cib, XML_ATTR_VALIDATION); if (validation) { int current_version = get_schema_version(validation); int support_version = get_schema_version("pacemaker-1.1"); /* Once the later schemas support the "update-*" attributes, change "==" to ">=" -- Changed */ if (current_version >= support_version) { const char *origin = crm_element_value(request, F_ORIG); crm_xml_replace(result_cib, XML_ATTR_UPDATE_ORIG, origin ? origin : cib_our_uname); crm_xml_replace(result_cib, XML_ATTR_UPDATE_CLIENT, crm_element_value(request, F_CIB_CLIENTNAME)); #if ENABLE_ACL crm_xml_replace(result_cib, XML_ATTR_UPDATE_USER, crm_element_value(request, F_CIB_USER)); #endif } } now = time(NULL); now_str = ctime(&now); now_str[24] = EOS; /* replace the newline */ crm_xml_replace(result_cib, XML_CIB_ATTR_WRITTEN, now_str); } if (manage_counters == FALSE) { config_changed = cib_config_changed(current_cib, result_cib, cib_diff); } /* Always write to disk for replace ops, * this negates the need to detect ordering changes */ if (config_changed == FALSE && crm_str_eq(CIB_OP_REPLACE, op, TRUE)) { config_changed = TRUE; } } if (rc == pcmk_ok && (call_options & cib_dryrun) == 0) { rc = activateCibXml(result_cib, config_changed, op); if (rc == pcmk_ok && cib_internal_config_changed(*cib_diff)) { cib_read_config(config_hash, result_cib); } if (crm_str_eq(CIB_OP_REPLACE, op, TRUE)) { if (section == NULL) { send_r_notify = TRUE; } else if (safe_str_eq(section, XML_TAG_CIB)) { send_r_notify = TRUE; } else if (safe_str_eq(section, XML_CIB_TAG_NODES)) { send_r_notify = TRUE; } else if (safe_str_eq(section, XML_CIB_TAG_STATUS)) { send_r_notify = TRUE; } } else if (crm_str_eq(CIB_OP_ERASE, op, TRUE)) { send_r_notify = TRUE; } } else if (rc == -pcmk_err_dtd_validation) { if (output != NULL) { crm_log_xml_info(output, "cib:output"); free_xml(output); } #if ENABLE_ACL { xmlNode *filtered_result_cib = NULL; if (acl_enabled(config_hash) == FALSE || acl_filter_cib(request, current_cib, result_cib, &filtered_result_cib) == FALSE) { output = result_cib; } else { crm_debug("Filtered the result cib for output according to the ACLs"); output = filtered_result_cib; if (result_cib != NULL) { free_xml(result_cib); } } } #else output = result_cib; #endif } else { free_xml(result_cib); } if ((call_options & cib_inhibit_notify) == 0) { const char *call_id = crm_element_value(request, F_CIB_CALLID); const char *client = crm_element_value(request, F_CIB_CLIENTNAME); #ifdef SUPPORT_POSTNOTIFY cib_post_notify(call_options, op, input, rc, the_cib); #endif cib_diff_notify(call_options, client, call_id, op, input, rc, *cib_diff); } if (send_r_notify) { const char *origin = crm_element_value(request, F_ORIG); cib_replace_notify(origin, the_cib, rc, *cib_diff); } if (rc != pcmk_ok) { log_level = LOG_DEBUG_4; if (rc == -pcmk_err_dtd_validation && global_update) { log_level = LOG_WARNING; crm_log_xml_info(input, "cib:global_update"); } } else if (config_changed) { log_level = LOG_DEBUG_3; if (cib_is_master) { log_level = LOG_NOTICE; } } else if (cib_is_master) { log_level = LOG_DEBUG_2; } log_xml_diff(log_level, *cib_diff, "cib:diff"); done: if ((call_options & cib_discard_reply) == 0) { *reply = cib_construct_reply(request, output, rc); crm_log_xml_trace(*reply, "cib:reply"); } #if ENABLE_ACL if (filtered_current_cib != NULL) { free_xml(filtered_current_cib); } #endif if (call_type >= 0) { cib_op_cleanup(call_type, call_options, &input, &output); } return rc; } gint cib_GCompareFunc(gconstpointer a, gconstpointer b) { const xmlNode *a_msg = a; const xmlNode *b_msg = b; int msg_a_id = 0; int msg_b_id = 0; const char *value = NULL; value = crm_element_value_const(a_msg, F_CIB_CALLID); msg_a_id = crm_parse_int(value, NULL); value = crm_element_value_const(b_msg, F_CIB_CALLID); msg_b_id = crm_parse_int(value, NULL); if (msg_a_id == msg_b_id) { return 0; } else if (msg_a_id < msg_b_id) { return -1; } return 1; } #if SUPPORT_HEARTBEAT void cib_ha_peer_callback(HA_Message * msg, void *private_data) { xmlNode *xml = convert_ha_message(NULL, msg, __FUNCTION__); cib_peer_callback(xml, private_data); free_xml(xml); } #endif void cib_peer_callback(xmlNode * msg, void *private_data) { const char *reason = NULL; const char *originator = crm_element_value(msg, F_ORIG); if (originator == NULL || crm_str_eq(originator, cib_our_uname, TRUE)) { /* message is from ourselves */ int bcast_id = 0; if (!(crm_element_value_int(msg, F_CIB_LOCAL_NOTIFY_ID, &bcast_id))) { check_local_notify(bcast_id); } return; } else if (crm_peer_cache == NULL) { reason = "membership not established"; goto bail; } if (crm_element_value(msg, F_CIB_CLIENTNAME) == NULL) { crm_xml_add(msg, F_CIB_CLIENTNAME, originator); } /* crm_log_xml_trace("Peer[inbound]", msg); */ cib_process_request(msg, FALSE, TRUE, TRUE, NULL); return; bail: if (reason) { const char *seq = crm_element_value(msg, F_SEQ); const char *op = crm_element_value(msg, F_CIB_OPERATION); crm_warn("Discarding %s message (%s) from %s: %s", op, seq, originator, reason); } } void cib_client_status_callback(const char *node, const char *client, const char *status, void *private) { crm_node_t *peer = NULL; if (safe_str_eq(client, CRM_SYSTEM_CIB)) { crm_info("Status update: Client %s/%s now has status [%s]", node, client, status); if (safe_str_eq(status, JOINSTATUS)) { status = ONLINESTATUS; } else if (safe_str_eq(status, LEAVESTATUS)) { status = OFFLINESTATUS; } peer = crm_get_peer(0, node); crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cib, status); } return; } #if SUPPORT_HEARTBEAT extern oc_ev_t *cib_ev_token; static void *ccm_library = NULL; int (*ccm_api_callback_done) (void *cookie) = NULL; int (*ccm_api_handle_event) (const oc_ev_t * token) = NULL; int cib_ccm_dispatch(gpointer user_data) { int rc = 0; oc_ev_t *ccm_token = (oc_ev_t *) user_data; crm_trace("received callback"); if (ccm_api_handle_event == NULL) { ccm_api_handle_event = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_handle_event", 1); } rc = (*ccm_api_handle_event) (ccm_token); if (0 == rc) { return 0; } crm_err("CCM connection appears to have failed: rc=%d.", rc); /* eventually it might be nice to recover and reconnect... but until then... */ crm_err("Exiting to recover from CCM connection failure"); exit(2); return -1; } int current_instance = 0; void cib_ccm_msg_callback(oc_ed_t event, void *cookie, size_t size, const void *data) { gboolean update_id = FALSE; const oc_ev_membership_t *membership = data; CRM_ASSERT(membership != NULL); crm_info("Processing CCM event=%s (id=%d)", ccm_event_name(event), membership->m_instance); if (current_instance > membership->m_instance) { crm_err("Membership instance ID went backwards! %d->%d", current_instance, membership->m_instance); CRM_ASSERT(current_instance <= membership->m_instance); } switch (event) { case OC_EV_MS_NEW_MEMBERSHIP: case OC_EV_MS_INVALID: update_id = TRUE; break; case OC_EV_MS_PRIMARY_RESTORED: update_id = TRUE; break; case OC_EV_MS_NOT_PRIMARY: crm_trace("Ignoring transitional CCM event: %s", ccm_event_name(event)); break; case OC_EV_MS_EVICTED: crm_err("Evicted from CCM: %s", ccm_event_name(event)); break; default: crm_err("Unknown CCM event: %d", event); } if (update_id) { unsigned int lpc = 0; CRM_CHECK(membership != NULL, return); current_instance = membership->m_instance; for (lpc = 0; lpc < membership->m_n_out; lpc++) { crm_update_ccm_node(membership, lpc + membership->m_out_idx, CRM_NODE_LOST, current_instance); } for (lpc = 0; lpc < membership->m_n_member; lpc++) { crm_update_ccm_node(membership, lpc + membership->m_memb_idx, CRM_NODE_ACTIVE, current_instance); } } if (ccm_api_callback_done == NULL) { ccm_api_callback_done = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_callback_done", 1); } (*ccm_api_callback_done) (cookie); return; } #endif gboolean can_write(int flags) { return TRUE; } static gboolean cib_force_exit(gpointer data) { crm_notice("Forcing exit!"); terminate_cib(__FUNCTION__, TRUE); return FALSE; } static void disconnect_remote_client(gpointer key, gpointer value, gpointer user_data) { cib_client_t *a_client = value; crm_err("Disconnecting %s... Not implemented", crm_str(a_client->name)); } void cib_shutdown(int nsig) { struct qb_ipcs_stats srv_stats; if (cib_shutdown_flag == FALSE) { int disconnects = 0; qb_ipcs_connection_t *c = NULL; cib_shutdown_flag = TRUE; for(c = qb_ipcs_connection_first_get(ipcs_rw); c != NULL; c = qb_ipcs_connection_next_get(ipcs_rw, c)) { crm_debug("Disconnecting r/w client %p...", c); qb_ipcs_disconnect(c); qb_ipcs_connection_unref(c); disconnects++; } for(c = qb_ipcs_connection_first_get(ipcs_ro); c != NULL; c = qb_ipcs_connection_next_get(ipcs_ro, c)) { crm_debug("Disconnecting r/o client %p...", c); qb_ipcs_disconnect(c); qb_ipcs_connection_unref(c); disconnects++; } for(c = qb_ipcs_connection_first_get(ipcs_shm); c != NULL; c = qb_ipcs_connection_next_get(ipcs_shm, c)) { crm_debug("Disconnecting non-blocking r/w client %p...", c); qb_ipcs_disconnect(c); qb_ipcs_connection_unref(c); disconnects++; } disconnects += g_hash_table_size(client_list); crm_debug("Disconnecting %d remote clients", g_hash_table_size(client_list)); g_hash_table_foreach(client_list, disconnect_remote_client, NULL); crm_info("Disconnected %d clients", disconnects); } qb_ipcs_stats_get(ipcs_rw, &srv_stats, QB_FALSE); if(g_hash_table_size(client_list) == 0) { crm_info("All clients disconnected (%d)", srv_stats.active_connections); initiate_exit(); } else { crm_info("Waiting on %d clients to disconnect (%d)", g_hash_table_size(client_list), srv_stats.active_connections); } } void initiate_exit(void) { int active = 0; xmlNode *leaving = NULL; active = crm_active_peers(); if (active < 2) { terminate_cib(__FUNCTION__, FALSE); return; } crm_info("Sending disconnect notification to %d peers...", active); leaving = create_xml_node(NULL, "exit-notification"); crm_xml_add(leaving, F_TYPE, "cib"); crm_xml_add(leaving, F_CIB_OPERATION, "cib_shutdown_req"); send_cluster_message(NULL, crm_msg_cib, leaving, TRUE); free_xml(leaving); g_timeout_add(crm_get_msec("5s"), cib_force_exit, NULL); } extern int remote_fd; extern int remote_tls_fd; extern void terminate_ais_connection(void); void terminate_cib(const char *caller, gboolean fast) { if (remote_fd > 0) { close(remote_fd); } if (remote_tls_fd > 0) { close(remote_tls_fd); } if(!fast) { if(is_heartbeat_cluster()) { #if SUPPORT_HEARTBEAT if (hb_conn != NULL) { crm_info("%s: Disconnecting heartbeat", caller); hb_conn->llc_ops->signoff(hb_conn, FALSE); } else { crm_err("%s: No heartbeat connection", caller); } #endif } else { #if SUPPORT_COROSYNC crm_info("%s: Disconnecting corosync", caller); terminate_ais_connection(); #endif } } uninitializeCib(); crm_info("%s: Exiting%s...", caller, fast?" fast":""); qb_log_fini(); if (fast) { exit(EX_USAGE); } else if(mainloop != NULL && g_main_is_running(mainloop)) { g_main_quit(mainloop); } else { exit(EX_OK); } } diff --git a/cib/main.c b/cib/main.c index 4ef4de9b65..104a72b0e9 100644 --- a/cib/main.c +++ b/cib/main.c @@ -1,598 +1,598 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include +#include #include #include #include #include #include #if HAVE_LIBXML2 # include #endif #ifdef HAVE_GETOPT_H # include #endif #if HAVE_BZLIB_H # include #endif extern int init_remote_listener(int port, gboolean encrypted); extern gboolean stand_alone; gboolean cib_shutdown_flag = FALSE; int cib_status = pcmk_ok; #if SUPPORT_HEARTBEAT oc_ev_t *cib_ev_token; ll_cluster_t *hb_conn = NULL; extern void oc_ev_special(const oc_ev_t *, oc_ev_class_t, int); gboolean cib_register_ha(ll_cluster_t * hb_cluster, const char *client_name); #endif extern void terminate_cib(const char *caller, gboolean fast); GMainLoop *mainloop = NULL; const char *cib_root = CRM_CONFIG_DIR; char *cib_our_uname = NULL; gboolean preserve_status = FALSE; gboolean cib_writes_enabled = TRUE; int remote_fd = 0; int remote_tls_fd = 0; void usage(const char *cmd, int exit_status); int cib_init(void); void cib_shutdown(int nsig); gboolean startCib(const char *filename); extern int write_cib_contents(gpointer p); GHashTable *client_list = NULL; GHashTable *config_hash = NULL; GHashTable *local_notify_queue = NULL; char *channel1 = NULL; char *channel2 = NULL; char *channel3 = NULL; char *channel4 = NULL; char *channel5 = NULL; #define OPTARGS "maswr:V?" void cib_cleanup(void); static void cib_enable_writes(int nsig) { crm_info("(Re)enabling disk writes"); cib_writes_enabled = TRUE; } static void log_cib_client(gpointer key, gpointer value, gpointer user_data) { cib_client_t *a_client = value; crm_info("Client %s", crm_str(a_client->name)); } int main(int argc, char **argv) { int flag; int rc = 0; int argerr = 0; #ifdef HAVE_GETOPT_H int option_index = 0; /* *INDENT-OFF* */ static struct option long_options[] = { {"per-action-cib", 0, 0, 'a'}, {"stand-alone", 0, 0, 's'}, {"disk-writes", 0, 0, 'w'}, {"cib-root", 1, 0, 'r'}, {"verbose", 0, 0, 'V'}, {"help", 0, 0, '?'}, {"metadata", 0, 0, 'm'}, {0, 0, 0, 0} }; /* *INDENT-ON* */ #endif struct passwd *pwentry = NULL; crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); mainloop_add_signal(SIGTERM, cib_shutdown); mainloop_add_signal(SIGPIPE, cib_enable_writes); cib_writer = mainloop_add_trigger(G_PRIORITY_LOW, write_cib_contents, NULL); crm_peer_init(); client_list = g_hash_table_new(crm_str_hash, g_str_equal); while (1) { #ifdef HAVE_GETOPT_H flag = getopt_long(argc, argv, OPTARGS, long_options, &option_index); #else flag = getopt(argc, argv, OPTARGS); #endif if (flag == -1) break; switch (flag) { case 'V': crm_bump_log_level(); break; case 's': stand_alone = TRUE; preserve_status = TRUE; cib_writes_enabled = FALSE; pwentry = getpwnam(CRM_DAEMON_USER); CRM_CHECK(pwentry != NULL, crm_perror(LOG_ERR, "Invalid uid (%s) specified", CRM_DAEMON_USER); return 100); rc = setgid(pwentry->pw_gid); if (rc < 0) { crm_perror(LOG_ERR, "Could not set group to %d", pwentry->pw_gid); return 100; } rc = setuid(pwentry->pw_uid); if (rc < 0) { crm_perror(LOG_ERR, "Could not set user to %d", pwentry->pw_uid); return 100; } break; case '?': /* Help message */ usage(crm_system_name, EX_OK); break; case 'w': cib_writes_enabled = TRUE; break; case 'r': cib_root = optarg; break; case 'm': cib_metadata(); return 0; default: ++argerr; break; } } if (argc - optind == 1 && safe_str_eq("metadata", argv[optind])) { cib_metadata(); return 0; } if (optind > argc) { ++argerr; } if (argerr) { usage(crm_system_name, EX_USAGE); } if (crm_is_writable(cib_root, NULL, CRM_DAEMON_USER, CRM_DAEMON_GROUP, FALSE) == FALSE) { crm_err("Bad permissions on %s. Terminating", cib_root); fprintf(stderr, "ERROR: Bad permissions on %s. See logs for details\n", cib_root); fflush(stderr); return 100; } /* read local config file */ rc = cib_init(); CRM_CHECK(g_hash_table_size(client_list) == 0, crm_warn("Not all clients gone at exit")); g_hash_table_foreach(client_list, log_cib_client, NULL); cib_cleanup(); #if SUPPORT_HEARTBEAT if (hb_conn) { hb_conn->llc_ops->delete(hb_conn); } #endif crm_info("Done"); return rc; } void cib_cleanup(void) { crm_peer_destroy(); if (local_notify_queue) { g_hash_table_destroy(local_notify_queue); } g_hash_table_destroy(config_hash); g_hash_table_destroy(client_list); free(cib_our_uname); #if HAVE_LIBXML2 crm_xml_cleanup(); #endif free(channel1); free(channel2); free(channel3); free(channel4); free(channel5); } unsigned long cib_num_ops = 0; const char *cib_stat_interval = "10min"; unsigned long cib_num_local = 0, cib_num_updates = 0, cib_num_fail = 0; unsigned long cib_bad_connects = 0, cib_num_timeouts = 0; #if SUPPORT_HEARTBEAT gboolean ccm_connect(void); static void ccm_connection_destroy(gpointer user_data) { crm_err("CCM connection failed... blocking while we reconnect"); CRM_ASSERT(ccm_connect()); return; } static void *ccm_library = NULL; gboolean ccm_connect(void) { gboolean did_fail = TRUE; int num_ccm_fails = 0; int max_ccm_fails = 30; int ret; int cib_ev_fd; int (*ccm_api_register) (oc_ev_t ** token) = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_register", 1); int (*ccm_api_set_callback) (const oc_ev_t * token, oc_ev_class_t class, oc_ev_callback_t * fn, oc_ev_callback_t ** prev_fn) = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_set_callback", 1); void (*ccm_api_special) (const oc_ev_t *, oc_ev_class_t, int) = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_special", 1); int (*ccm_api_activate) (const oc_ev_t * token, int *fd) = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_activate", 1); int (*ccm_api_unregister) (oc_ev_t * token) = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_unregister", 1); static struct mainloop_fd_callbacks ccm_fd_callbacks = { .dispatch = cib_ccm_dispatch, .destroy = ccm_connection_destroy, }; while (did_fail) { did_fail = FALSE; crm_info("Registering with CCM..."); ret = (*ccm_api_register) (&cib_ev_token); if (ret != 0) { did_fail = TRUE; } if (did_fail == FALSE) { crm_trace("Setting up CCM callbacks"); ret = (*ccm_api_set_callback) (cib_ev_token, OC_EV_MEMB_CLASS, cib_ccm_msg_callback, NULL); if (ret != 0) { crm_warn("CCM callback not set"); did_fail = TRUE; } } if (did_fail == FALSE) { (*ccm_api_special) (cib_ev_token, OC_EV_MEMB_CLASS, 0); crm_trace("Activating CCM token"); ret = (*ccm_api_activate) (cib_ev_token, &cib_ev_fd); if (ret != 0) { crm_warn("CCM Activation failed"); did_fail = TRUE; } } if (did_fail) { num_ccm_fails++; (*ccm_api_unregister) (cib_ev_token); if (num_ccm_fails < max_ccm_fails) { crm_warn("CCM Connection failed %d times (%d max)", num_ccm_fails, max_ccm_fails); sleep(3); } else { crm_err("CCM Activation failed %d (max) times", num_ccm_fails); return FALSE; } } } crm_debug("CCM Activation passed... all set to go!"); mainloop_add_fd("heartbeat-ccm", cib_ev_fd, cib_ev_token, &ccm_fd_callbacks); return TRUE; } #endif #if SUPPORT_COROSYNC static gboolean cib_ais_dispatch(AIS_Message * wrapper, char *data, int sender) { xmlNode *xml = NULL; if (wrapper->header.id == crm_class_cluster) { xml = string2xml(data); if (xml == NULL) { goto bail; } crm_xml_add(xml, F_ORIG, wrapper->sender.uname); crm_xml_add_int(xml, F_SEQ, wrapper->id); cib_peer_callback(xml, NULL); } free_xml(xml); return TRUE; bail: crm_err("Invalid XML: '%.120s'", data); return TRUE; } static void cib_ais_destroy(gpointer user_data) { if (cib_shutdown_flag) { crm_info("Corosync disconnection complete"); } else { crm_err("Corosync connection lost! Exiting."); terminate_cib(__FUNCTION__, TRUE); } } #endif static void cib_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data) { #if 0 /* crm_active_peers(crm_proc_cib) appears to give the wrong answer * sometimes, this might help figure out why */ if(type == crm_status_nstate) { crm_info("status: %s is now %s (was %s)", node->uname, node->state, (const char *)data); if (safe_str_eq(CRMD_STATE_ACTIVE, node->state)) { return; } } else if(type == crm_status_processes) { uint32_t old = 0; if (data) { old = *(const uint32_t *)data; } if ((node->processes ^ old) & crm_proc_cib) { crm_info("status: cib process on %s is now %sactive", node->uname, is_set(node->processes, crm_proc_cib)?"":"in"); } else { return; } } else { return; } #endif if(cib_shutdown_flag && crm_active_peers() < 2 && g_hash_table_size(client_list) == 0) { crm_info("No more peers"); terminate_cib(__FUNCTION__, FALSE); } } #if SUPPORT_HEARTBEAT static void cib_ha_connection_destroy(gpointer user_data) { if (cib_shutdown_flag) { crm_info("Heartbeat disconnection complete... exiting"); terminate_cib(__FUNCTION__, FALSE); } else { crm_err("Heartbeat connection lost! Exiting."); terminate_cib(__FUNCTION__, TRUE); } } #endif int cib_init(void) { config_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if (startCib("cib.xml") == FALSE) { crm_crit("Cannot start CIB... terminating"); exit(1); } if (stand_alone == FALSE) { void *dispatch = NULL; void *destroy = NULL; if (is_openais_cluster()) { #if SUPPORT_COROSYNC destroy = cib_ais_destroy; dispatch = cib_ais_dispatch; #endif } else if(is_heartbeat_cluster()) { #if SUPPORT_HEARTBEAT dispatch = cib_ha_peer_callback; destroy = cib_ha_connection_destroy; #endif } if (crm_cluster_connect(&cib_our_uname, NULL, dispatch, destroy, #if SUPPORT_HEARTBEAT &hb_conn #else NULL #endif ) == FALSE) { crm_crit("Cannot sign in to the cluster... terminating"); exit(100); } if (is_openais_cluster()) { crm_set_status_callback(&cib_peer_update_callback); } #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { gboolean was_error = FALSE; if (was_error == FALSE) { if (HA_OK != hb_conn->llc_ops->set_cstatus_callback(hb_conn, cib_client_status_callback, hb_conn)) { crm_err("Cannot set cstatus callback: %s", hb_conn->llc_ops->errmsg(hb_conn)); was_error = TRUE; } } if (was_error == FALSE) { was_error = (ccm_connect() == FALSE); } if (was_error == FALSE) { /* Async get client status information in the cluster */ crm_info("Requesting the list of configured nodes"); hb_conn->llc_ops->client_status(hb_conn, NULL, CRM_SYSTEM_CIB, -1); } } #endif } else { cib_our_uname = strdup("localhost"); } ipcs_ro = mainloop_add_ipc_server(cib_channel_ro, QB_IPC_NATIVE, &ipc_ro_callbacks); ipcs_rw = mainloop_add_ipc_server(cib_channel_rw, QB_IPC_NATIVE, &ipc_rw_callbacks); ipcs_shm = mainloop_add_ipc_server(cib_channel_shm, QB_IPC_SHM, &ipc_rw_callbacks); if (stand_alone) { cib_is_master = TRUE; } if (ipcs_ro != NULL && ipcs_rw != NULL && ipcs_shm != NULL) { /* Create the mainloop and run it... */ mainloop = g_main_new(FALSE); crm_info("Starting %s mainloop", crm_system_name); g_main_run(mainloop); } else { crm_err("Couldnt start all IPC channels, exiting."); return -1; } qb_ipcs_destroy(ipcs_ro); qb_ipcs_destroy(ipcs_rw); qb_ipcs_destroy(ipcs_shm); return 0; } void usage(const char *cmd, int exit_status) { FILE *stream; stream = exit_status ? stderr : stdout; fprintf(stream, "usage: %s [-%s]\n", cmd, OPTARGS); fprintf(stream, "\t--%s (-%c)\t\tTurn on debug info." " Additional instances increase verbosity\n", "verbose", 'V'); fprintf(stream, "\t--%s (-%c)\t\tThis help message\n", "help", '?'); fprintf(stream, "\t--%s (-%c)\t\tShow configurable cib options\n", "metadata", 'm'); fprintf(stream, "\t--%s (-%c)\tAdvanced use only\n", "per-action-cib", 'a'); fprintf(stream, "\t--%s (-%c)\tAdvanced use only\n", "stand-alone", 's'); fprintf(stream, "\t--%s (-%c)\tAdvanced use only\n", "disk-writes", 'w'); fprintf(stream, "\t--%s (-%c)\t\tAdvanced use only\n", "cib-root", 'r'); fflush(stream); exit(exit_status); } gboolean startCib(const char *filename) { gboolean active = FALSE; xmlNode *cib = readCibXmlFile(cib_root, filename, !preserve_status); CRM_ASSERT(cib != NULL); if (activateCibXml(cib, TRUE, "start") == 0) { int port = 0; const char *port_s = NULL; active = TRUE; cib_read_config(config_hash, cib); port_s = crm_element_value(cib, "remote-tls-port"); if (port_s) { port = crm_parse_int(port_s, "0"); remote_tls_fd = init_remote_listener(port, TRUE); } port_s = crm_element_value(cib, "remote-clear-port"); if (port_s) { port = crm_parse_int(port_s, "0"); remote_fd = init_remote_listener(port, FALSE); } crm_info("CIB Initialization completed successfully"); } return active; } diff --git a/cib/messages.c b/cib/messages.c index d59a62dd1d..158629e8ff 100644 --- a/cib/messages.c +++ b/cib/messages.c @@ -1,459 +1,459 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include -#include +#include #include #include #include #define MAX_DIFF_RETRY 5 #ifdef CIBPIPE gboolean cib_is_master = TRUE; #else gboolean cib_is_master = FALSE; #endif xmlNode *the_cib = NULL; gboolean syncd_once = FALSE; extern const char *cib_our_uname; int revision_check(xmlNode * cib_update, xmlNode * cib_copy, int flags); int get_revision(xmlNode * xml_obj, int cur_revision); int updateList(xmlNode * local_cib, xmlNode * update_command, xmlNode * failed, int operation, const char *section); gboolean check_generation(xmlNode * newCib, xmlNode * oldCib); gboolean update_results(xmlNode * failed, xmlNode * target, const char *operation, int return_code); int cib_update_counter(xmlNode * xml_obj, const char *field, gboolean reset); int sync_our_cib(xmlNode * request, gboolean all); extern xmlNode *cib_msg_copy(const xmlNode * msg, gboolean with_data); extern gboolean cib_shutdown_flag; int cib_process_shutdown_req(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { #ifdef CIBPIPE return -EINVAL; #else int result = pcmk_ok; const char *host = crm_element_value(req, F_ORIG); *answer = NULL; if (crm_element_value(req, F_CIB_ISREPLY) == NULL) { crm_info("Shutdown REQ from %s", host); return pcmk_ok; } else if (cib_shutdown_flag) { crm_info("Shutdown ACK from %s", host); terminate_cib(__FUNCTION__, FALSE); return pcmk_ok; } else { crm_err("Shutdown ACK from %s - not shutting down", host); result = -EINVAL; } return result; #endif } int cib_process_default(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { int result = pcmk_ok; crm_trace("Processing \"%s\" event", op); *answer = NULL; if (op == NULL) { result = -EINVAL; crm_err("No operation specified"); } else if (strcasecmp(CRM_OP_NOOP, op) == 0) { ; } else { result = -EPROTONOSUPPORT; crm_err("Action [%s] is not supported by the CIB", op); } return result; } int cib_process_quit(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { int result = pcmk_ok; crm_trace("Processing \"%s\" event", op); crm_warn("The CRMd has asked us to exit... complying"); exit(0); return result; } int cib_process_readwrite(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { #ifdef CIBPIPE return -EINVAL; #else int result = pcmk_ok; crm_trace("Processing \"%s\" event", op); if (safe_str_eq(op, CIB_OP_ISMASTER)) { if (cib_is_master == TRUE) { result = pcmk_ok; } else { result = -EPERM; } return result; } if (safe_str_eq(op, CIB_OP_MASTER)) { if (cib_is_master == FALSE) { crm_info("We are now in R/W mode"); cib_is_master = TRUE; syncd_once = TRUE; } else { crm_debug("We are still in R/W mode"); } } else if (cib_is_master) { crm_info("We are now in R/O mode"); cib_is_master = FALSE; } return result; #endif } int cib_process_ping(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { #ifdef CIBPIPE return -EINVAL; #else int result = pcmk_ok; crm_trace("Processing \"%s\" event", op); *answer = create_xml_node(NULL, XML_CRM_TAG_PING); crm_xml_add(*answer, XML_PING_ATTR_STATUS, "ok"); crm_xml_add(*answer, XML_PING_ATTR_SYSFROM, CRM_SYSTEM_CIB); return result; #endif } int cib_process_sync(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { #ifdef CIBPIPE return -EINVAL; #else return sync_our_cib(req, TRUE); #endif } int cib_process_sync_one(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { #ifdef CIBPIPE return -EINVAL; #else return sync_our_cib(req, FALSE); #endif } int sync_in_progress = 0; int cib_server_process_diff(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { int rc = pcmk_ok; if (cib_is_master) { /* the master is never waiting for a resync */ sync_in_progress = 0; } if (sync_in_progress > MAX_DIFF_RETRY) { /* request another full-sync, * the last request may have been lost */ sync_in_progress = 0; } if (sync_in_progress) { int diff_add_updates = 0; int diff_add_epoch = 0; int diff_add_admin_epoch = 0; int diff_del_updates = 0; int diff_del_epoch = 0; int diff_del_admin_epoch = 0; cib_diff_version_details(input, &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates); sync_in_progress++; crm_notice("Not applying diff %d.%d.%d -> %d.%d.%d (sync in progress)", diff_del_admin_epoch, diff_del_epoch, diff_del_updates, diff_add_admin_epoch, diff_add_epoch, diff_add_updates); return -pcmk_err_diff_resync; } rc = cib_process_diff(op, options, section, req, input, existing_cib, result_cib, answer); if (rc == -pcmk_err_diff_resync && cib_is_master == FALSE) { xmlNode *sync_me = create_xml_node(NULL, "sync-me"); free_xml(*result_cib); *result_cib = NULL; crm_info("Requesting re-sync from peer"); sync_in_progress++; crm_xml_add(sync_me, F_TYPE, "cib"); crm_xml_add(sync_me, F_CIB_OPERATION, CIB_OP_SYNC_ONE); crm_xml_add(sync_me, F_CIB_DELEGATED, cib_our_uname); if (send_cluster_message(NULL, crm_msg_cib, sync_me, FALSE) == FALSE) { rc = -ENOTCONN; } free_xml(sync_me); } else if (rc == -pcmk_err_diff_resync) { rc = -pcmk_err_diff_failed; if (options & cib_force_diff) { crm_warn("Not requesting full refresh in R/W mode"); } } return rc; } int cib_process_replace_svr(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { const char *tag = crm_element_name(input); int rc = cib_process_replace(op, options, section, req, input, existing_cib, result_cib, answer); if (rc == pcmk_ok && safe_str_eq(tag, XML_TAG_CIB)) { sync_in_progress = 0; } return rc; } static int delete_cib_object(xmlNode * parent, xmlNode * delete_spec) { const char *object_name = NULL; const char *object_id = NULL; xmlNode *equiv_node = NULL; int result = pcmk_ok; if (delete_spec != NULL) { object_name = crm_element_name(delete_spec); } object_id = crm_element_value(delete_spec, XML_ATTR_ID); crm_trace("Processing: <%s id=%s>", crm_str(object_name), crm_str(object_id)); if (delete_spec == NULL) { result = -EINVAL; } else if (parent == NULL) { result = -EINVAL; } else if (object_id == NULL) { /* placeholder object */ equiv_node = find_xml_node(parent, object_name, FALSE); } else { equiv_node = find_entity(parent, object_name, object_id); } if (result != pcmk_ok) { ; /* nothing */ } else if (equiv_node == NULL) { result = pcmk_ok; } else if (xml_has_children(delete_spec) == FALSE) { /* only leaves are deleted */ crm_debug("Removing leaf: <%s id=%s>", crm_str(object_name), crm_str(object_id)); free_xml_from_parent(parent, equiv_node); equiv_node = NULL; } else { xmlNode *child = NULL; for (child = __xml_first_child(delete_spec); child != NULL; child = __xml_next(child)) { int tmp_result = delete_cib_object(equiv_node, child); /* only the first error is likely to be interesting */ if (tmp_result != pcmk_ok && result == pcmk_ok) { result = tmp_result; } } } return result; } int cib_process_delete_absolute(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { xmlNode *failed = NULL; int result = pcmk_ok; xmlNode *update_section = NULL; crm_trace("Processing \"%s\" event for section=%s", op, crm_str(section)); if (safe_str_eq(XML_CIB_TAG_SECTION_ALL, section)) { section = NULL; } else if (safe_str_eq(XML_TAG_CIB, section)) { section = NULL; } else if (safe_str_eq(crm_element_name(input), XML_TAG_CIB)) { section = NULL; } CRM_CHECK(strcasecmp(CIB_OP_DELETE, op) == 0, return -EINVAL); if (input == NULL) { crm_err("Cannot perform modification with no data"); return -EINVAL; } failed = create_xml_node(NULL, XML_TAG_FAILED); update_section = get_object_root(section, *result_cib); result = delete_cib_object(update_section, input); update_results(failed, input, op, result); if (xml_has_children(failed)) { CRM_CHECK(result != pcmk_ok, result = -EINVAL); } if (result != pcmk_ok) { crm_log_xml_err(failed, "CIB Update failures"); *answer = failed; } else { free_xml(failed); } return result; } gboolean check_generation(xmlNode * newCib, xmlNode * oldCib) { if (cib_compare_generation(newCib, oldCib) >= 0) { return TRUE; } crm_warn("Generation from update is older than the existing one"); return FALSE; } #ifndef CIBPIPE int sync_our_cib(xmlNode * request, gboolean all) { int result = pcmk_ok; const char *host = crm_element_value(request, F_ORIG); const char *op = crm_element_value(request, F_CIB_OPERATION); xmlNode *replace_request = cib_msg_copy(request, FALSE); CRM_CHECK(the_cib != NULL,;); CRM_CHECK(replace_request != NULL,;); crm_debug("Syncing CIB to %s", all ? "all peers" : host); if (all == FALSE && host == NULL) { crm_log_xml_err(request, "bad sync"); } /* remove the "all == FALSE" condition * * sync_from was failing, the local client wasnt being notified * because it didnt know it was a reply * setting this does not prevent the other nodes from applying it * if all == TRUE */ if (host != NULL) { crm_xml_add(replace_request, F_CIB_ISREPLY, host); } crm_xml_add(replace_request, F_CIB_OPERATION, CIB_OP_REPLACE); crm_xml_add(replace_request, "original_" F_CIB_OPERATION, op); crm_xml_add(replace_request, F_CIB_GLOBAL_UPDATE, XML_BOOLEAN_TRUE); add_message_xml(replace_request, F_CIB_CALLDATA, the_cib); if (send_cluster_message(all ? NULL : host, crm_msg_cib, replace_request, FALSE) == FALSE) { result = -ENOTCONN; } free_xml(replace_request); return result; } #endif diff --git a/crmd/control.c b/crmd/control.c index b017d4fd37..ce8f54c23f 100644 --- a/crmd/control.c +++ b/crmd/control.c @@ -1,1013 +1,1012 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include -#include -#include "../lib/cluster/stack.h" +#include #include #include #include #include #include #include #include #include #include #include qb_ipcs_service_t *ipcs = NULL; extern gboolean crm_connect_corosync(void); extern void crmd_ha_connection_destroy(gpointer user_data); void crm_shutdown(int nsig); gboolean crm_read_options(gpointer user_data); gboolean fsa_has_quorum = FALSE; GHashTable *ipc_clients = NULL; crm_trigger_t *fsa_source = NULL; crm_trigger_t *config_read = NULL; /* A_HA_CONNECT */ void do_ha_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { gboolean registered = FALSE; if (action & A_HA_DISCONNECT) { if (is_openais_cluster()) { crm_peer_destroy(); #if SUPPORT_COROSYNC terminate_ais_connection(); #endif crm_info("Disconnected from OpenAIS"); #if SUPPORT_HEARTBEAT } else if (fsa_cluster_conn != NULL) { set_bit_inplace(fsa_input_register, R_HA_DISCONNECTED); fsa_cluster_conn->llc_ops->signoff(fsa_cluster_conn, FALSE); crm_info("Disconnected from Heartbeat"); #endif } } if (action & A_HA_CONNECT) { crm_set_status_callback(&peer_update_callback); if (is_openais_cluster()) { #if SUPPORT_COROSYNC registered = crm_connect_corosync(); #endif } else if (is_heartbeat_cluster()) { #if SUPPORT_HEARTBEAT registered = crm_cluster_connect(&fsa_our_uname, &fsa_our_uuid, crmd_ha_msg_callback, crmd_ha_connection_destroy, &fsa_cluster_conn); #endif } #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { crm_trace("Be informed of Node Status changes"); if (registered && fsa_cluster_conn->llc_ops->set_nstatus_callback(fsa_cluster_conn, crmd_ha_status_callback, fsa_cluster_conn) != HA_OK) { crm_err("Cannot set nstatus callback: %s", fsa_cluster_conn->llc_ops->errmsg(fsa_cluster_conn)); registered = FALSE; } crm_trace("Be informed of CRM Client Status changes"); if (registered && fsa_cluster_conn->llc_ops->set_cstatus_callback(fsa_cluster_conn, crmd_client_status_callback, fsa_cluster_conn) != HA_OK) { crm_err("Cannot set cstatus callback: %s", fsa_cluster_conn->llc_ops->errmsg(fsa_cluster_conn)); registered = FALSE; } if (registered) { crm_trace("Requesting an initial dump of CRMD client_status"); fsa_cluster_conn->llc_ops->client_status(fsa_cluster_conn, NULL, CRM_SYSTEM_CRMD, -1); } } #endif if (registered == FALSE) { set_bit_inplace(fsa_input_register, R_HA_DISCONNECTED); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } clear_bit_inplace(fsa_input_register, R_HA_DISCONNECTED); crm_info("Connected to the cluster"); } if (action & ~(A_HA_CONNECT | A_HA_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } } /* A_SHUTDOWN */ void do_shutdown(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { /* just in case */ set_bit_inplace(fsa_input_register, R_SHUTDOWN); if (is_heartbeat_cluster()) { if (is_set(fsa_input_register, pe_subsystem->flag_connected)) { crm_info("Terminating the %s", pe_subsystem->name); if (stop_subsystem(pe_subsystem, TRUE) == FALSE) { /* its gone... */ crm_err("Faking %s exit", pe_subsystem->name); clear_bit_inplace(fsa_input_register, pe_subsystem->flag_connected); } else { crm_info("Waiting for subsystems to exit"); crmd_fsa_stall(NULL); } } crm_info("All subsystems stopped, continuing"); } if (stonith_api) { /* Prevent it from comming up again */ clear_bit_inplace(fsa_input_register, R_ST_REQUIRED); crm_info("Disconnecting STONITH..."); stonith_api->cmds->disconnect(stonith_api); } } /* A_SHUTDOWN_REQ */ void do_shutdown_req(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { xmlNode *msg = NULL; crm_info("Sending shutdown request to %s", crm_str(fsa_our_dc)); msg = create_request(CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); /* set_bit_inplace(fsa_input_register, R_STAYDOWN); */ if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } free_xml(msg); } extern crm_ipc_t *attrd_ipc; extern char *max_generation_from; extern xmlNode *max_generation_xml; extern GHashTable *resource_history; extern GHashTable *voted; extern GHashTable *reload_hash; void log_connected_client(gpointer key, gpointer value, gpointer user_data); void log_connected_client(gpointer key, gpointer value, gpointer user_data) { crmd_client_t *client = value; crm_err("%s is still connected at exit", client->table_key); } static void free_mem(fsa_data_t * msg_data) { GListPtr gIter = NULL; crm_ipc_close(attrd_ipc); crm_ipc_destroy(attrd_ipc); g_main_loop_quit(crmd_mainloop); g_main_loop_unref(crmd_mainloop); #if SUPPORT_HEARTBEAT if (fsa_cluster_conn) { fsa_cluster_conn->llc_ops->delete(fsa_cluster_conn); fsa_cluster_conn = NULL; } #endif for(gIter = fsa_message_queue; gIter != NULL; gIter = gIter->next) { fsa_data_t *fsa_data = gIter->data; crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]", fsa_input2string(fsa_data->fsa_input), fsa_state2string(fsa_state), fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); delete_fsa_input(fsa_data); } g_list_free(fsa_message_queue); delete_fsa_input(msg_data); if (ipc_clients) { crm_debug("Number of connected clients: %d", g_hash_table_size(ipc_clients)); /* g_hash_table_foreach(ipc_clients, log_connected_client, NULL); */ g_hash_table_destroy(ipc_clients); } empty_uuid_cache(); crm_peer_destroy(); clear_bit_inplace(fsa_input_register, R_MEMBERSHIP); if (te_subsystem->client && te_subsystem->client->ipc) { crm_debug("Full destroy: TE"); qb_ipcs_disconnect(te_subsystem->client->ipc); } free(te_subsystem); if (pe_subsystem->client && pe_subsystem->client->ipc) { crm_debug("Full destroy: PE"); qb_ipcs_disconnect(pe_subsystem->client->ipc); } free(pe_subsystem); free(cib_subsystem); if (integrated_nodes) { g_hash_table_destroy(integrated_nodes); } if (finalized_nodes) { g_hash_table_destroy(finalized_nodes); } if (confirmed_nodes) { g_hash_table_destroy(confirmed_nodes); } if (reload_hash) { g_hash_table_destroy(reload_hash); } if (resource_history) { g_hash_table_destroy(resource_history); } if (voted) { g_hash_table_destroy(voted); } cib_delete(fsa_cib_conn); fsa_cib_conn = NULL; if (fsa_lrm_conn) { lrmd_api_delete(fsa_lrm_conn); fsa_lrm_conn = NULL; } free(transition_timer); free(integration_timer); free(finalization_timer); free(election_trigger); free(election_timeout); free(shutdown_escalation_timer); free(wait_timer); free(recheck_timer); free(fsa_our_dc_version); free(fsa_our_uname); free(fsa_our_uuid); free(fsa_our_dc); free(max_generation_from); free_xml(max_generation_xml); crm_xml_cleanup(); } /* A_EXIT_0, A_EXIT_1 */ void do_exit(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { int exit_code = 0; int log_level = LOG_INFO; const char *exit_type = "gracefully"; if (action & A_EXIT_1) { exit_code = 1; log_level = LOG_ERR; exit_type = "forcefully"; } verify_stopped(cur_state, LOG_ERR); do_crm_log(log_level, "Performing %s - %s exiting the CRMd", fsa_action2string(action), exit_type); if (is_set(fsa_input_register, R_IN_RECOVERY)) { crm_err("Could not recover from internal error"); exit_code = 2; } if (is_set(fsa_input_register, R_STAYDOWN)) { crm_warn("Inhibiting respawn by Heartbeat"); exit_code = 100; } crm_info("[%s] stopped (%d)", crm_system_name, exit_code); free_mem(msg_data); exit(exit_code); } /* A_STARTUP */ void do_startup(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { int was_error = 0; int interval = 1; /* seconds between DC heartbeats */ crm_debug("Registering Signal Handlers"); mainloop_add_signal(SIGTERM, crm_shutdown); fsa_source = mainloop_add_trigger(G_PRIORITY_HIGH, crm_fsa_trigger, NULL); config_read = mainloop_add_trigger(G_PRIORITY_HIGH, crm_read_options, NULL); ipc_clients = g_hash_table_new(crm_str_hash, g_str_equal); crm_debug("Creating CIB and LRM objects"); fsa_cib_conn = cib_new(); fsa_lrm_conn = lrmd_api_new(); /* set up the timers */ transition_timer = calloc(1, sizeof(fsa_timer_t)); integration_timer = calloc(1, sizeof(fsa_timer_t)); finalization_timer = calloc(1, sizeof(fsa_timer_t)); election_trigger = calloc(1, sizeof(fsa_timer_t)); election_timeout = calloc(1, sizeof(fsa_timer_t)); shutdown_escalation_timer = calloc(1, sizeof(fsa_timer_t)); wait_timer = calloc(1, sizeof(fsa_timer_t)); recheck_timer = calloc(1, sizeof(fsa_timer_t)); interval = interval * 1000; if (election_trigger != NULL) { election_trigger->source_id = 0; election_trigger->period_ms = -1; election_trigger->fsa_input = I_DC_TIMEOUT; election_trigger->callback = crm_timer_popped; election_trigger->repeat = FALSE; } else { was_error = TRUE; } if (election_timeout != NULL) { election_timeout->source_id = 0; election_timeout->period_ms = -1; election_timeout->fsa_input = I_ELECTION_DC; election_timeout->callback = crm_timer_popped; election_timeout->repeat = FALSE; } else { was_error = TRUE; } if (transition_timer != NULL) { transition_timer->source_id = 0; transition_timer->period_ms = -1; transition_timer->fsa_input = I_PE_CALC; transition_timer->callback = crm_timer_popped; transition_timer->repeat = FALSE; } else { was_error = TRUE; } if (integration_timer != NULL) { integration_timer->source_id = 0; integration_timer->period_ms = -1; integration_timer->fsa_input = I_INTEGRATED; integration_timer->callback = crm_timer_popped; integration_timer->repeat = FALSE; } else { was_error = TRUE; } if (finalization_timer != NULL) { finalization_timer->source_id = 0; finalization_timer->period_ms = -1; finalization_timer->fsa_input = I_FINALIZED; finalization_timer->callback = crm_timer_popped; finalization_timer->repeat = FALSE; /* for possible enabling... a bug in the join protocol left * a slave in S_PENDING while we think its in S_NOT_DC * * raising I_FINALIZED put us into a transition loop which is * never resolved. * in this loop we continually send probes which the node * NACK's because its in S_PENDING * * if we have nodes where heartbeat is active but the * CRM is not... then this will be handled in the * integration phase */ finalization_timer->fsa_input = I_ELECTION; } else { was_error = TRUE; } if (shutdown_escalation_timer != NULL) { shutdown_escalation_timer->source_id = 0; shutdown_escalation_timer->period_ms = -1; shutdown_escalation_timer->fsa_input = I_STOP; shutdown_escalation_timer->callback = crm_timer_popped; shutdown_escalation_timer->repeat = FALSE; } else { was_error = TRUE; } if (wait_timer != NULL) { wait_timer->source_id = 0; wait_timer->period_ms = 2000; wait_timer->fsa_input = I_NULL; wait_timer->callback = crm_timer_popped; wait_timer->repeat = FALSE; } else { was_error = TRUE; } if (recheck_timer != NULL) { recheck_timer->source_id = 0; recheck_timer->period_ms = -1; recheck_timer->fsa_input = I_PE_CALC; recheck_timer->callback = crm_timer_popped; recheck_timer->repeat = FALSE; } else { was_error = TRUE; } /* set up the sub systems */ cib_subsystem = calloc(1, sizeof(struct crm_subsystem_s)); te_subsystem = calloc(1, sizeof(struct crm_subsystem_s)); pe_subsystem = calloc(1, sizeof(struct crm_subsystem_s)); if (cib_subsystem != NULL) { cib_subsystem->pid = -1; cib_subsystem->name = CRM_SYSTEM_CIB; cib_subsystem->flag_connected = R_CIB_CONNECTED; cib_subsystem->flag_required = R_CIB_REQUIRED; } else { was_error = TRUE; } if (te_subsystem != NULL) { te_subsystem->pid = -1; te_subsystem->name = CRM_SYSTEM_TENGINE; te_subsystem->flag_connected = R_TE_CONNECTED; te_subsystem->flag_required = R_TE_REQUIRED; } else { was_error = TRUE; } if (pe_subsystem != NULL) { pe_subsystem->pid = -1; pe_subsystem->path = CRM_DAEMON_DIR; pe_subsystem->name = CRM_SYSTEM_PENGINE; pe_subsystem->command = CRM_DAEMON_DIR "/" CRM_SYSTEM_PENGINE; pe_subsystem->args = NULL; pe_subsystem->flag_connected = R_PE_CONNECTED; pe_subsystem->flag_required = R_PE_REQUIRED; } else { was_error = TRUE; } if (was_error == FALSE && is_heartbeat_cluster()) { if(start_subsystem(pe_subsystem) == FALSE) { was_error = TRUE; } } if (was_error) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } welcomed_nodes = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); integrated_nodes = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); finalized_nodes = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); confirmed_nodes = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); } static int32_t crmd_ipc_accept(qb_ipcs_connection_t *c, uid_t uid, gid_t gid) { crmd_client_t *blank_client = NULL; #if ENABLE_ACL struct group *crm_grp = NULL; #endif crm_trace("Connecting %p for uid=%d gid=%d", c, uid, gid); blank_client = calloc(1, sizeof(crmd_client_t)); CRM_ASSERT(blank_client != NULL); crm_trace("Created client: %p", blank_client); blank_client->ipc = c; blank_client->sub_sys = NULL; blank_client->uuid = NULL; blank_client->table_key = NULL; #if ENABLE_ACL crm_grp = getgrnam(CRM_DAEMON_GROUP); if (crm_grp) { qb_ipcs_connection_auth_set(c, -1, crm_grp->gr_gid, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); } blank_client->user = uid2username(uid); #endif qb_ipcs_context_set(c, blank_client); return 0; } static void crmd_ipc_created(qb_ipcs_connection_t *c) { crm_trace("Client %p connected", c); } static int32_t crmd_ipc_dispatch(qb_ipcs_connection_t *c, void *data, size_t size) { crmd_client_t *client = qb_ipcs_context_get(c); xmlNode *msg = crm_ipcs_recv(c, data, size); xmlNode *ack = create_xml_node(NULL, "ack"); crm_trace("Invoked: %s", client->table_key); crm_ipcs_send(c, ack, FALSE); /* TODO: Do not unconditionally send this */ free_xml(ack); if (msg == NULL) { return 0; } #if ENABLE_ACL determine_request_user(client->user, msg, F_CRM_USER); #endif crm_trace("Processing msg from %s", client->table_key); crm_log_xml_trace(msg, "CRMd[inbound]"); if (crmd_authorize_message(msg, client)) { route_message(C_IPC_MESSAGE, msg); } trigger_fsa(fsa_source); free_xml(msg); return 0; } static int32_t crmd_ipc_closed(qb_ipcs_connection_t *c) { return 0; } static void crmd_ipc_destroy(qb_ipcs_connection_t *c) { crmd_client_t *client = qb_ipcs_context_get(c); if (client == NULL) { crm_trace("No client to delete"); return; } process_client_disconnect(client); crm_trace("Disconnecting client %s (%p)", client->table_key, client); free(client->table_key); free(client->sub_sys); free(client->uuid); free(client->user); free(client); trigger_fsa(fsa_source); } /* A_STOP */ void do_stop(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { if (is_heartbeat_cluster()) { stop_subsystem(pe_subsystem, FALSE); } mainloop_del_ipc_server(ipcs); register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); } /* A_STARTED */ void do_started(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { static struct qb_ipcs_service_handlers crmd_callbacks = { .connection_accept = crmd_ipc_accept, .connection_created = crmd_ipc_created, .msg_process = crmd_ipc_dispatch, .connection_closed = crmd_ipc_closed, .connection_destroyed = crmd_ipc_destroy }; if (cur_state != S_STARTING) { crm_err("Start cancelled... %s", fsa_state2string(cur_state)); return; } else if (is_set(fsa_input_register, R_MEMBERSHIP) == FALSE) { crm_info("Delaying start, no membership data (%.16llx)", R_MEMBERSHIP); crmd_fsa_stall(NULL); return; } else if (is_set(fsa_input_register, R_LRM_CONNECTED) == FALSE) { crm_info("Delaying start, LRM not connected (%.16llx)", R_LRM_CONNECTED); crmd_fsa_stall(NULL); return; } else if (is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) { crm_info("Delaying start, CIB not connected (%.16llx)", R_CIB_CONNECTED); crmd_fsa_stall(NULL); return; } else if (is_set(fsa_input_register, R_READ_CONFIG) == FALSE) { crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG); crmd_fsa_stall(NULL); return; } else if (is_set(fsa_input_register, R_PEER_DATA) == FALSE) { /* try reading from HA */ crm_info("Delaying start, No peer data (%.16llx)", R_PEER_DATA); #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { HA_Message *msg = NULL; crm_trace("Looking for a HA message"); msg = fsa_cluster_conn->llc_ops->readmsg(fsa_cluster_conn, 0); if (msg != NULL) { crm_trace("There was a HA message"); ha_msg_del(msg); } } #endif crmd_fsa_stall(NULL); return; } crm_debug("Init server comms"); ipcs = mainloop_add_ipc_server(CRM_SYSTEM_CRMD, QB_IPC_NATIVE, &crmd_callbacks); if (ipcs == NULL) { crm_err("Couldn't start IPC server"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } if (stonith_reconnect == NULL) { int dummy; stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW, te_connect_stonith, &dummy); } set_bit_inplace(fsa_input_register, R_ST_REQUIRED); mainloop_set_trigger(stonith_reconnect); crm_notice("The local CRM is operational"); clear_bit_inplace(fsa_input_register, R_STARTING); register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL); } /* A_RECOVER */ void do_recover(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { set_bit_inplace(fsa_input_register, R_IN_RECOVERY); crm_err("Action %s (%.16llx) not supported", fsa_action2string(action), action); register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); } /* *INDENT-OFF* */ pe_cluster_option crmd_opts[] = { /* name, old-name, validate, default, description */ { "dc-version", NULL, "string", NULL, "none", NULL, "Version of Pacemaker on the cluster's DC.", "Includes the hash which identifies the exact Mercurial changeset it was built from. Used for diagnostic purposes." }, { "cluster-infrastructure", NULL, "string", NULL, "heartbeat", NULL, "The messaging stack on which Pacemaker is currently running.", "Used for informational and diagnostic purposes." }, { XML_CONFIG_ATTR_DC_DEADTIME, "dc_deadtime", "time", NULL, "20s", &check_time, "How long to wait for a response from other nodes during startup.", "The \"correct\" value will depend on the speed/load of your network and the type of switches used." }, { XML_CONFIG_ATTR_RECHECK, "cluster_recheck_interval", "time", "Zero disables polling. Positive values are an interval in seconds (unless other SI units are specified. eg. 5min)", "15min", &check_timer, "Polling interval for time based changes to options, resource parameters and constraints.", "The Cluster is primarily event driven, however the configuration can have elements that change based on time." " To ensure these changes take effect, we can optionally poll the cluster's status for changes." }, { XML_CONFIG_ATTR_ELECTION_FAIL, "election_timeout", "time", NULL, "2min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." }, { XML_CONFIG_ATTR_FORCE_QUIT, "shutdown_escalation", "time", NULL, "20min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." }, { "crmd-integration-timeout", NULL, "time", NULL, "3min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." }, { "crmd-finalization-timeout", NULL, "time", NULL, "30min", &check_timer, "*** Advanced Use Only ***.", "If you need to adjust this value, it probably indicates the presence of a bug." }, { "crmd-transition-delay", NULL, "time", NULL, "0s", &check_timer, "*** Advanced Use Only ***\nEnabling this option will slow down cluster recovery under all conditions", "Delay cluster recovery for the configured interval to allow for additional/related events to occur.\nUseful if your configuration is sensitive to the order in which ping updates arrive." }, { XML_ATTR_EXPECTED_VOTES, NULL, "integer", NULL, "2", &check_number, "The number of nodes expected to be in the cluster", "Used to calculate quorum in openais based clusters." }, }; /* *INDENT-ON* */ void crmd_metadata(void) { config_metadata("CRM Daemon", "1.0", "CRM Daemon Options", "This is a fake resource that details the options that can be configured for the CRM Daemon.", crmd_opts, DIMOF(crmd_opts)); } static void verify_crmd_options(GHashTable * options) { verify_all_options(options, crmd_opts, DIMOF(crmd_opts)); } static const char * crmd_pref(GHashTable * options, const char *name) { return get_cluster_pref(options, crmd_opts, DIMOF(crmd_opts), name); } static void config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { const char *value = NULL; GHashTable *config_hash = NULL; ha_time_t *now = new_ha_date(TRUE); if (rc != pcmk_ok) { fsa_data_t *msg_data = NULL; crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); if (rc == -EACCES || rc == -pcmk_err_dtd_validation) { crm_err("The cluster is mis-configured - shutting down and staying down"); set_bit_inplace(fsa_input_register, R_STAYDOWN); } goto bail; } crm_debug("Call %d : Parsing CIB options", call_id); config_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); unpack_instance_attributes(output, output, XML_CIB_TAG_PROPSET, NULL, config_hash, CIB_OPTIONS_FIRST, FALSE, now); verify_crmd_options(config_hash); value = crmd_pref(config_hash, XML_CONFIG_ATTR_DC_DEADTIME); election_trigger->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, XML_CONFIG_ATTR_FORCE_QUIT); shutdown_escalation_timer->period_ms = crm_get_msec(value); crm_debug("Shutdown escalation occurs after: %dms", shutdown_escalation_timer->period_ms); value = crmd_pref(config_hash, XML_CONFIG_ATTR_ELECTION_FAIL); election_timeout->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, XML_CONFIG_ATTR_RECHECK); recheck_timer->period_ms = crm_get_msec(value); crm_debug("Checking for expired actions every %dms", recheck_timer->period_ms); value = crmd_pref(config_hash, "crmd-transition-delay"); transition_timer->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, "crmd-integration-timeout"); integration_timer->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, "crmd-finalization-timeout"); finalization_timer->period_ms = crm_get_msec(value); #if SUPPORT_COROSYNC if (is_classic_ais_cluster()) { value = crmd_pref(config_hash, XML_ATTR_EXPECTED_VOTES); crm_debug("Sending expected-votes=%s to corosync", value); send_ais_text(crm_class_quorum, value, TRUE, NULL, crm_msg_ais); } #endif set_bit_inplace(fsa_input_register, R_READ_CONFIG); crm_trace("Triggering FSA: %s", __FUNCTION__); mainloop_set_trigger(fsa_source); g_hash_table_destroy(config_hash); bail: free_ha_date(now); } gboolean crm_read_options(gpointer user_data) { int call_id = fsa_cib_conn->cmds->query(fsa_cib_conn, XML_CIB_TAG_CRMCONFIG, NULL, cib_scope_local); add_cib_op_callback(fsa_cib_conn, call_id, FALSE, NULL, config_query_callback); crm_trace("Querying the CIB... call %d", call_id); return TRUE; } /* A_READCONFIG */ void do_read_config(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { mainloop_set_trigger(config_read); } void crm_shutdown(int nsig) { if (crmd_mainloop != NULL && g_main_is_running(crmd_mainloop)) { if (is_set(fsa_input_register, R_SHUTDOWN)) { crm_err("Escalating the shutdown"); register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL); } else { set_bit_inplace(fsa_input_register, R_SHUTDOWN); register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL); if (shutdown_escalation_timer->period_ms < 1) { const char *value = crmd_pref(NULL, XML_CONFIG_ATTR_FORCE_QUIT); int msec = crm_get_msec(value); crm_debug("Using default shutdown escalation: %dms", msec); shutdown_escalation_timer->period_ms = msec; } /* cant rely on this... */ crm_notice("Requesting shutdown, upper limit is %dms", shutdown_escalation_timer->period_ms); crm_timer_start(shutdown_escalation_timer); } } else { crm_info("exit from shutdown"); exit(EX_OK); } } static void default_cib_update_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { if (rc != pcmk_ok) { fsa_data_t *msg_data = NULL; crm_err("CIB Update failed: %s", pcmk_strerror(rc)); crm_log_xml_warn(output, "update:failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } #if SUPPORT_HEARTBEAT static void populate_cib_nodes_ha(gboolean with_client_status) { int call_id = 0; const char *ha_node = NULL; xmlNode *cib_node_list = NULL; if (fsa_cluster_conn == NULL) { crm_debug("Not connected"); return; } /* Async get client status information in the cluster */ crm_info("Requesting the list of configured nodes"); fsa_cluster_conn->llc_ops->init_nodewalk(fsa_cluster_conn); cib_node_list = create_xml_node(NULL, XML_CIB_TAG_NODES); do { const char *ha_node_type = NULL; const char *ha_node_uuid = NULL; xmlNode *cib_new_node = NULL; ha_node = fsa_cluster_conn->llc_ops->nextnode(fsa_cluster_conn); if (ha_node == NULL) { continue; } ha_node_type = fsa_cluster_conn->llc_ops->node_type(fsa_cluster_conn, ha_node); if (safe_str_neq(NORMALNODE, ha_node_type)) { crm_debug("Node %s: skipping '%s'", ha_node, ha_node_type); continue; } ha_node_uuid = get_uuid(ha_node); if (ha_node_uuid == NULL) { crm_warn("Node %s: no uuid found", ha_node); continue; } crm_debug("Node: %s (uuid: %s)", ha_node, ha_node_uuid); cib_new_node = create_xml_node(cib_node_list, XML_CIB_TAG_NODE); crm_xml_add(cib_new_node, XML_ATTR_ID, ha_node_uuid); crm_xml_add(cib_new_node, XML_ATTR_UNAME, ha_node); crm_xml_add(cib_new_node, XML_ATTR_TYPE, ha_node_type); } while (ha_node != NULL); fsa_cluster_conn->llc_ops->end_nodewalk(fsa_cluster_conn); /* Now update the CIB with the list of nodes */ fsa_cib_update(XML_CIB_TAG_NODES, cib_node_list, cib_scope_local | cib_quorum_override, call_id, NULL); add_cib_op_callback(fsa_cib_conn, call_id, FALSE, NULL, default_cib_update_callback); free_xml(cib_node_list); crm_trace("Complete"); } #endif static void create_cib_node_definition(gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; xmlNode *cib_nodes = user_data; xmlNode *cib_new_node = NULL; crm_trace("Creating node entry for %s/%s", node->uname, node->uuid); cib_new_node = create_xml_node(cib_nodes, XML_CIB_TAG_NODE); crm_xml_add(cib_new_node, XML_ATTR_ID, node->uuid); crm_xml_add(cib_new_node, XML_ATTR_UNAME, node->uname); crm_xml_add(cib_new_node, XML_ATTR_TYPE, NORMALNODE); } void populate_cib_nodes(gboolean with_client_status) { int call_id = 0; xmlNode *cib_node_list = NULL; #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { populate_cib_nodes_ha(with_client_status); return; } #endif cib_node_list = create_xml_node(NULL, XML_CIB_TAG_NODES); g_hash_table_foreach(crm_peer_cache, create_cib_node_definition, cib_node_list); fsa_cib_update(XML_CIB_TAG_NODES, cib_node_list, cib_scope_local | cib_quorum_override, call_id, NULL); add_cib_op_callback(fsa_cib_conn, call_id, FALSE, NULL, default_cib_update_callback); free_xml(cib_node_list); crm_trace("Complete"); } diff --git a/crmd/corosync.c b/crmd/corosync.c index e0f9f711fa..c2fff68359 100644 --- a/crmd/corosync.c +++ b/crmd/corosync.c @@ -1,182 +1,182 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include -#include +#include #include #include #include #include #include #include #include #include #include #include extern void post_cache_update(int seq); extern void crmd_ha_connection_destroy(gpointer user_data); /* A_HA_CONNECT */ #if SUPPORT_COROSYNC static gboolean crmd_ais_dispatch(AIS_Message * wrapper, char *data, int sender) { int seq = 0; xmlNode *xml = NULL; const char *seq_s = NULL; xml = string2xml(data); if (xml == NULL) { crm_err("Could not parse message content (%d): %.100s", wrapper->header.id, data); return TRUE; } switch (wrapper->header.id) { case crm_class_members: seq_s = crm_element_value(xml, "id"); seq = crm_int_helper(seq_s, NULL); set_bit_inplace(fsa_input_register, R_PEER_DATA); post_cache_update(seq); /* fall through */ case crm_class_quorum: crm_update_quorum(crm_have_quorum, FALSE); if (AM_I_DC) { const char *votes = crm_element_value(xml, "expected"); if (votes == NULL || check_number(votes) == FALSE) { crm_log_xml_err(xml, "Invalid quorum/membership update"); } else { int rc = update_attr(fsa_cib_conn, cib_quorum_override | cib_scope_local | cib_inhibit_notify, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, XML_ATTR_EXPECTED_VOTES, votes, FALSE); crm_info("Setting expected votes to %s", votes); if (pcmk_ok > rc) { crm_err("Quorum update failed: %s", pcmk_strerror(rc)); } } } break; case crm_class_cluster: crm_xml_add(xml, F_ORIG, wrapper->sender.uname); crm_xml_add_int(xml, F_SEQ, wrapper->id); crmd_ha_msg_filter(xml); break; case crm_class_rmpeer: /* Ignore */ break; case crm_class_notify: case crm_class_nodeid: crm_err("Unexpected message class (%d): %.100s", wrapper->header.id, data); break; default: crm_err("Invalid message class (%d): %.100s", wrapper->header.id, data); } free_xml(xml); return TRUE; } static gboolean crmd_cman_dispatch(unsigned long long seq, gboolean quorate) { crm_update_quorum(quorate, FALSE); post_cache_update(seq); return TRUE; } static void crmd_quorum_destroy(gpointer user_data) { if (is_set(fsa_input_register, R_HA_DISCONNECTED)) { crm_err("connection terminated"); exit(1); } else { crm_info("connection closed"); } } static void crmd_ais_destroy(gpointer user_data) { if (is_set(fsa_input_register, R_HA_DISCONNECTED)) { crm_err("connection terminated"); exit(1); } else { crm_info("connection closed"); } } #if SUPPORT_CMAN static void crmd_cman_destroy(gpointer user_data) { if (is_set(fsa_input_register, R_HA_DISCONNECTED)) { crm_err("connection terminated"); exit(1); } else { crm_info("connection closed"); } } #endif extern gboolean crm_connect_corosync(void); gboolean crm_connect_corosync(void) { gboolean rc = FALSE; if (is_openais_cluster()) { crm_set_status_callback(&peer_update_callback); rc = crm_cluster_connect(&fsa_our_uname, &fsa_our_uuid, crmd_ais_dispatch, crmd_ais_destroy, NULL); } if (rc && is_corosync_cluster()) { init_quorum_connection(crmd_cman_dispatch, crmd_quorum_destroy); } #if SUPPORT_CMAN if (rc && is_cman_cluster()) { init_cman_connection(crmd_cman_dispatch, crmd_cman_destroy); set_bit_inplace(fsa_input_register, R_MEMBERSHIP); } #endif return rc; } #endif diff --git a/crmd/crmd_messages.h b/crmd/crmd_messages.h index 23c58b0c8e..2fcb055a17 100644 --- a/crmd/crmd_messages.h +++ b/crmd/crmd_messages.h @@ -1,111 +1,111 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef XML_CRM_MESSAGES__H # define XML_CRM_MESSAGES__H # include # include # include -# include +# include # include typedef struct ha_msg_input_s { xmlNode *msg; xmlNode *xml; } ha_msg_input_t; extern ha_msg_input_t *new_ha_msg_input(xmlNode * orig); extern void delete_ha_msg_input(ha_msg_input_t * orig); extern void *fsa_typed_data_adv(fsa_data_t * fsa_data, enum fsa_data_type a_type, const char *caller); # define fsa_typed_data(x) fsa_typed_data_adv(msg_data, x, __FUNCTION__) extern void register_fsa_error_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input, fsa_data_t * cur_data, void *new_data, const char *raised_from); # define register_fsa_error(cause, input, new_data) register_fsa_error_adv(cause, input, msg_data, new_data, __FUNCTION__) extern int register_fsa_input_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input, void *data, long long with_actions, gboolean prepend, const char *raised_from); extern void fsa_dump_queue(int log_level); extern void route_message(enum crmd_fsa_cause cause, xmlNode * input); # define crmd_fsa_stall(cur_input) if(cur_input != NULL) { \ register_fsa_input_adv( \ ((fsa_data_t*)cur_input)->fsa_cause, I_WAIT_FOR_EVENT, \ ((fsa_data_t*)cur_input)->data, action, TRUE, __FUNCTION__); \ } else { \ register_fsa_input_adv( \ C_FSA_INTERNAL, I_WAIT_FOR_EVENT, \ NULL, action, TRUE, __FUNCTION__); \ } \ # define register_fsa_input(cause, input, data) register_fsa_input_adv(cause, input, data, A_NOTHING, FALSE, __FUNCTION__) # define register_fsa_action(action) { \ fsa_actions |= action; \ if(fsa_source) { \ mainloop_set_trigger(fsa_source); \ } \ crm_debug("%s added action %s to the FSA", \ __FUNCTION__, fsa_action2string(action)); \ } # define register_fsa_input_before(cause, input, data) register_fsa_input_adv(cause, input, data, A_NOTHING, TRUE, __FUNCTION__) # define register_fsa_input_later(cause, input, data) register_fsa_input_adv(cause, input, data, A_NOTHING, FALSE, __FUNCTION__) void delete_fsa_input(fsa_data_t * fsa_data); GListPtr put_message(fsa_data_t * new_message); fsa_data_t *get_message(void); gboolean is_message(void); gboolean have_wait_message(void); extern gboolean relay_message(xmlNode * relay_message, gboolean originated_locally); extern void process_message(xmlNode * msg, gboolean originated_locally, const char *src_node_name); extern gboolean crm_dc_process_message(xmlNode * whole_message, xmlNode * action, const char *host_from, const char *sys_from, const char *sys_to, const char *op, gboolean dc_mode); extern gboolean send_msg_via_ipc(xmlNode * msg, const char *sys); extern gboolean add_pending_outgoing_reply(const char *originating_node_name, const char *crm_msg_reference, const char *sys_to, const char *sys_from); extern gboolean crmd_authorize_message(xmlNode * client_msg, crmd_client_t * curr_client); extern gboolean send_request(xmlNode * msg, char **msg_reference); extern enum crmd_fsa_input handle_message(xmlNode * stored_msg); extern void lrm_op_callback(lrmd_event_data_t * op); extern ha_msg_input_t *copy_ha_msg_input(ha_msg_input_t * orig); #endif diff --git a/crmd/election.c b/crmd/election.c index 69e5140b5f..d2d58fae4e 100644 --- a/crmd/election.c +++ b/crmd/election.c @@ -1,577 +1,577 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include -#include +#include #include #include #include #include #include GHashTable *voted = NULL; uint highest_born_on = -1; static int current_election_id = 1; static int crm_uptime(struct timeval *output) { struct rusage info; int rc = getrusage(RUSAGE_SELF, &info); output->tv_sec = 0; output->tv_usec = 0; if (rc < 0) { crm_perror(LOG_ERR, "Could not calculate the current uptime"); return -1; } output->tv_sec = info.ru_utime.tv_sec; output->tv_usec = info.ru_utime.tv_usec; crm_debug("Current CPU usage is: %lds, %ldus", (long)info.ru_utime.tv_sec, (long)info.ru_utime.tv_usec); return 1; } static int crm_compare_age(struct timeval your_age) { struct timeval our_age; if (crm_uptime(&our_age) < 0) { return -1; } /* We want these times to be "significantly" different */ if (our_age.tv_sec > your_age.tv_sec) { crm_debug("Win: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec); return 1; } else if (our_age.tv_sec < your_age.tv_sec) { crm_debug("Loose: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec); return -1; } else if (our_age.tv_usec > your_age.tv_usec) { crm_debug("Win: %ld vs %ld (usec)", (long)our_age.tv_usec, (long)your_age.tv_usec); return 1; } else if (our_age.tv_usec < your_age.tv_usec) { crm_debug("Loose: %ld vs %ld (usec)", (long)our_age.tv_usec, (long)your_age.tv_usec); return -1; } return 0; } /* A_ELECTION_VOTE */ void do_election_vote(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { struct timeval age; xmlNode *vote = NULL; gboolean not_voting = FALSE; /* don't vote if we're in one of these states or wanting to shut down */ switch (cur_state) { case S_STARTING: case S_RECOVERY: case S_STOPPING: case S_TERMINATE: crm_warn("Not voting in election, we're in state %s", fsa_state2string(cur_state)); not_voting = TRUE; break; default: break; } if (not_voting == FALSE) { if (is_set(fsa_input_register, R_STARTING)) { not_voting = TRUE; } } if (not_voting) { if (AM_I_DC) { register_fsa_input(C_FSA_INTERNAL, I_RELEASE_DC, NULL); } else { register_fsa_input(C_FSA_INTERNAL, I_PENDING, NULL); } return; } vote = create_request(CRM_OP_VOTE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); current_election_id++; crm_xml_add(vote, F_CRM_ELECTION_OWNER, fsa_our_uuid); crm_xml_add_int(vote, F_CRM_ELECTION_ID, current_election_id); crm_uptime(&age); crm_xml_add_int(vote, F_CRM_ELECTION_AGE_S, age.tv_sec); crm_xml_add_int(vote, F_CRM_ELECTION_AGE_US, age.tv_usec); send_cluster_message(NULL, crm_msg_crmd, vote, TRUE); free_xml(vote); crm_debug("Started election %d", current_election_id); if (voted) { g_hash_table_destroy(voted); } voted = NULL; if (cur_state == S_ELECTION || cur_state == S_RELEASE_DC) { crm_timer_start(election_timeout); } else if (cur_state != S_INTEGRATION) { crm_err("Broken? Voting in state %s", fsa_state2string(cur_state)); } return; } char *dc_hb_msg = NULL; int beat_num = 0; gboolean do_dc_heartbeat(gpointer data) { return TRUE; } struct election_data_s { const char *winning_uname; unsigned int winning_bornon; }; static void log_member_uname(gpointer key, gpointer value, gpointer user_data) { const crm_node_t *node = value; if (crm_is_peer_active(node)) { crm_err("%s: %s proc=%.32x", (char *)user_data, (char *)key, node->processes); } } static void log_node(gpointer key, gpointer value, gpointer user_data) { crm_err("%s: %s", (char *)user_data, (char *)key); } void do_election_check(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { int voted_size = 0; int num_members = crm_active_peers(); if (voted) { voted_size = g_hash_table_size(voted); } /* in the case of #voted > #members, it is better to * wait for the timeout and give the cluster time to * stabilize */ if (fsa_state != S_ELECTION) { crm_debug("Ignore election check: we not in an election"); } else if (voted_size >= num_members) { /* we won and everyone has voted */ crm_timer_stop(election_timeout); register_fsa_input(C_FSA_INTERNAL, I_ELECTION_DC, NULL); if (voted_size > num_members) { char *data = NULL; data = strdup("member"); g_hash_table_foreach(crm_peer_cache, log_member_uname, data); free(data); data = strdup("voted"); g_hash_table_foreach(voted, log_node, data); free(data); } crm_debug("Destroying voted hash"); g_hash_table_destroy(voted); voted = NULL; } else { crm_debug("Still waiting on %d non-votes (%d total)", num_members - voted_size, num_members); } return; } #define win_dampen 1 /* in seconds */ #define loss_dampen 2 /* in seconds */ /* A_ELECTION_COUNT */ void do_election_count_vote(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { struct timeval your_age; int age; int election_id = -1; int log_level = LOG_INFO; gboolean use_born_on = FALSE; gboolean done = FALSE; gboolean we_loose = FALSE; const char *op = NULL; const char *vote_from = NULL; const char *your_version = NULL; const char *election_owner = NULL; const char *reason = "unknown"; crm_node_t *our_node = NULL, *your_node = NULL; ha_msg_input_t *vote = fsa_typed_data(fsa_dt_ha_msg); static time_t last_election_win = 0; static time_t last_election_loss = 0; /* if the membership copy is NULL we REALLY shouldnt be voting * the question is how we managed to get here. */ CRM_CHECK(msg_data != NULL, return); CRM_CHECK(crm_peer_cache != NULL, return); CRM_CHECK(vote != NULL, crm_err("Bogus data from %s", msg_data->origin); return); CRM_CHECK(vote->msg != NULL, crm_err("Bogus data from %s", msg_data->origin); return); your_age.tv_sec = 0; your_age.tv_usec = 0; op = crm_element_value(vote->msg, F_CRM_TASK); vote_from = crm_element_value(vote->msg, F_CRM_HOST_FROM); your_version = crm_element_value(vote->msg, F_CRM_VERSION); election_owner = crm_element_value(vote->msg, F_CRM_ELECTION_OWNER); crm_element_value_int(vote->msg, F_CRM_ELECTION_ID, &election_id); crm_element_value_int(vote->msg, F_CRM_ELECTION_AGE_S, (int *)&(your_age.tv_sec)); crm_element_value_int(vote->msg, F_CRM_ELECTION_AGE_US, (int *)&(your_age.tv_usec)); CRM_CHECK(vote_from != NULL, vote_from = fsa_our_uname); your_node = crm_get_peer(0, vote_from); our_node = crm_get_peer(0, fsa_our_uname); if (voted == NULL) { crm_debug("Created voted hash"); voted = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); } if (is_heartbeat_cluster()) { use_born_on = TRUE; } else if (is_classic_ais_cluster()) { use_born_on = TRUE; } age = crm_compare_age(your_age); if (cur_state == S_STARTING) { reason = "Still starting"; we_loose = TRUE; } else if (our_node == NULL || crm_is_peer_active(our_node) == FALSE) { reason = "We are not part of the cluster"; log_level = LOG_ERR; we_loose = TRUE; } else if (your_node == NULL || crm_is_peer_active(your_node) == FALSE) { /* Possibly we cached the message in the FSA queue at a point that it wasn't */ reason = "Peer is not part of our cluster"; log_level = LOG_WARNING; done = TRUE; } else if (election_id != current_election_id && crm_str_eq(fsa_our_uuid, election_owner, TRUE)) { log_level = LOG_DEBUG_2; reason = "Superceeded"; done = TRUE; } else if (crm_str_eq(op, CRM_OP_NOVOTE, TRUE)) { char *op_copy = strdup(op); char *uname_copy = strdup(vote_from); CRM_ASSERT(crm_str_eq(fsa_our_uuid, election_owner, TRUE)); /* update the list of nodes that have voted */ g_hash_table_replace(voted, uname_copy, op_copy); reason = "Recorded"; done = TRUE; } else if (crm_str_eq(vote_from, fsa_our_uname, TRUE)) { char *op_copy = strdup(op); char *uname_copy = strdup(vote_from); CRM_ASSERT(crm_str_eq(fsa_our_uuid, election_owner, TRUE)); /* update ourselves in the list of nodes that have voted */ g_hash_table_replace(voted, uname_copy, op_copy); reason = "Recorded"; done = TRUE; } else if (compare_version(your_version, CRM_FEATURE_SET) < 0) { reason = "Version"; we_loose = TRUE; } else if (compare_version(your_version, CRM_FEATURE_SET) > 0) { reason = "Version"; } else if (age < 0) { reason = "Uptime"; we_loose = TRUE; } else if (age > 0) { reason = "Uptime"; /* TODO: Check for y(our) born < 0 */ } else if (use_born_on && your_node->born < our_node->born) { reason = "Born"; we_loose = TRUE; } else if (use_born_on && your_node->born > our_node->born) { reason = "Born"; } else if (fsa_our_uname == NULL) { reason = "Unknown host name"; we_loose = TRUE; } else if (strcasecmp(fsa_our_uname, vote_from) > 0) { reason = "Host name"; we_loose = TRUE; } else { reason = "Host name"; CRM_ASSERT(strcmp(fsa_our_uname, vote_from) != 0); /* cant happen... * } else if(strcasecmp(fsa_our_uname, vote_from) == 0) { * * default... * } else { // strcasecmp(fsa_our_uname, vote_from) < 0 * we win */ } if (done) { do_crm_log(log_level + 1, "Election %d (current: %d, owner: %s): Processed %s from %s (%s)", election_id, current_election_id, election_owner, op, vote_from, reason); } else if (we_loose) { xmlNode *novote = create_request(CRM_OP_NOVOTE, NULL, vote_from, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); do_crm_log(log_level, "Election %d (owner: %s) lost: %s from %s (%s)", election_id, election_owner, op, vote_from, reason); update_dc(NULL); crm_timer_stop(election_timeout); if (fsa_input_register & R_THE_DC) { crm_trace("Give up the DC to %s", vote_from); register_fsa_input(C_FSA_INTERNAL, I_RELEASE_DC, NULL); } else if (cur_state != S_STARTING) { crm_trace("We werent the DC anyway"); register_fsa_input(C_FSA_INTERNAL, I_PENDING, NULL); } crm_xml_add(novote, F_CRM_ELECTION_OWNER, election_owner); crm_xml_add_int(novote, F_CRM_ELECTION_ID, election_id); send_cluster_message(vote_from, crm_msg_crmd, novote, TRUE); free_xml(novote); fsa_cib_conn->cmds->set_slave(fsa_cib_conn, cib_scope_local); last_election_loss = time(NULL); last_election_win = 0; } else { do_crm_log(log_level, "Election %d (owner: %s) pass: %s from %s (%s)", election_id, election_owner, op, vote_from, reason); if (last_election_loss) { time_t tm_now = time(NULL); if (tm_now - last_election_loss < (time_t) loss_dampen) { crm_info("Election %d ignore: We already lost an election less than %ds ago (%s)", election_id, loss_dampen, ctime(&last_election_loss)); update_dc(NULL); return; } last_election_loss = 0; } #if 0 /* Enabling this code can lead to multiple DCs during SimulStart. * Specifically when a node comes up after our last 'win' vote. * * Fixing and enabling this functionality might become important when * we start running realy big clusters, but for now leave it disabled. */ if (last_election_win) { time_t tm_now = time(NULL); if (tm_now - last_election_win < (time_t) win_dampen) { crm_info("Election %d ignore: We already won an election less than %ds ago", election_id, win_dampen); return; } } last_election_win = time(NULL); #endif register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); g_hash_table_destroy(voted); voted = NULL; } } /* A_ELECT_TIMER_START, A_ELECTION_TIMEOUT */ /* we won */ void do_election_timer_ctrl(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { } static void feature_update_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { if (rc != pcmk_ok) { fsa_data_t *msg_data = NULL; register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } /* A_DC_TAKEOVER */ void do_dc_takeover(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { int rc = pcmk_ok; xmlNode *cib = NULL; GListPtr gIter = NULL; static const char *cluster_type = NULL; if (cluster_type == NULL) { cluster_type = getenv("HA_cluster_type"); } if (cluster_type == NULL) { cluster_type = "Heartbeat"; } crm_info("Taking over DC status for this partition"); set_bit_inplace(fsa_input_register, R_THE_DC); for (gIter = stonith_cleanup_list; gIter != NULL; gIter = gIter->next) { char *target = gIter->data; const char *uuid = get_uuid(target); crm_notice("Marking %s, target of a previous stonith action, as clean", target); send_stonith_update(NULL, target, uuid); free(target); } g_list_free(stonith_cleanup_list); stonith_cleanup_list = NULL; #if SUPPORT_COROSYNC if (is_classic_ais_cluster()) { send_ais_text(crm_class_quorum, NULL, TRUE, NULL, crm_msg_ais); } #endif if (voted != NULL) { crm_trace("Destroying voted hash"); g_hash_table_destroy(voted); voted = NULL; } set_bit_inplace(fsa_input_register, R_JOIN_OK); set_bit_inplace(fsa_input_register, R_INVOKE_PE); fsa_cib_conn->cmds->set_master(fsa_cib_conn, cib_scope_local); cib = create_xml_node(NULL, XML_TAG_CIB); crm_xml_add(cib, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); fsa_cib_update(XML_TAG_CIB, cib, cib_quorum_override, rc, NULL); add_cib_op_callback(fsa_cib_conn, rc, FALSE, NULL, feature_update_callback); update_attr(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, "dc-version", VERSION "-" BUILD_VERSION, FALSE); update_attr(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, "cluster-infrastructure", cluster_type, FALSE); mainloop_set_trigger(config_read); free_xml(cib); } /* A_DC_RELEASE */ void do_dc_release(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { if (action & A_DC_RELEASE) { GListPtr gIter = NULL; crm_debug("Releasing the role of DC"); clear_bit_inplace(fsa_input_register, R_THE_DC); for (gIter = stonith_cleanup_list; gIter != NULL; gIter = gIter->next) { char *target = gIter->data; crm_debug("Purging %s from stonith cleanup list", target); free(target); } g_list_free(stonith_cleanup_list); stonith_cleanup_list = NULL; } else if (action & A_DC_RELEASED) { crm_info("DC role released"); #if 0 if (are there errors) { /* we cant stay up if not healthy */ /* or perhaps I_ERROR and go to S_RECOVER? */ result = I_SHUTDOWN; } #endif register_fsa_input(C_FSA_INTERNAL, I_RELEASE_SUCCESS, NULL); } else { crm_err("Unknown action %s", fsa_action2string(action)); } crm_trace("Am I still the DC? %s", AM_I_DC ? XML_BOOLEAN_YES : XML_BOOLEAN_NO); } diff --git a/crmd/membership.c b/crmd/membership.c index 8e73ab1132..9b47b57b99 100644 --- a/crmd/membership.c +++ b/crmd/membership.c @@ -1,254 +1,254 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* put these first so that uuid_t is defined without conflicts */ #include #include #include #include #include #include -#include +#include #include #include #include #include #include #include gboolean membership_flux_hack = FALSE; void post_cache_update(int instance); void ghash_update_cib_node(gpointer key, gpointer value, gpointer user_data); int last_peer_update = 0; extern GHashTable *voted; struct update_data_s { const char *state; const char *caller; xmlNode *updates; gboolean overwrite_join; }; extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source); static void check_dead_member(const char *uname, GHashTable * members) { CRM_CHECK(uname != NULL, return); if (members != NULL && g_hash_table_lookup(members, uname) != NULL) { crm_err("%s didnt really leave the membership!", uname); return; } erase_node_from_join(uname); if (voted != NULL) { g_hash_table_remove(voted, uname); } if (safe_str_eq(fsa_our_uname, uname)) { crm_err("We're not part of the cluster anymore"); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); } else if (AM_I_DC == FALSE && safe_str_eq(uname, fsa_our_dc)) { crm_warn("Our DC node (%s) left the cluster", uname); register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); } else if (fsa_state == S_INTEGRATION || fsa_state == S_FINALIZE_JOIN) { check_join_state(fsa_state, __FUNCTION__); } } static void reap_dead_nodes(gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; if (crm_is_peer_active(node) == FALSE) { check_dead_member(node->uname, NULL); fail_incompletable_actions(transition_graph, node->uuid); } } gboolean ever_had_quorum = FALSE; void post_cache_update(int instance) { xmlNode *no_op = NULL; crm_peer_seq = instance; crm_debug("Updated cache after membership event %d.", instance); g_hash_table_foreach(crm_peer_cache, reap_dead_nodes, NULL); set_bit_inplace(fsa_input_register, R_MEMBERSHIP); if (AM_I_DC) { populate_cib_nodes(FALSE); do_update_cib_nodes(FALSE, __FUNCTION__); } /* * If we lost nodes, we should re-check the election status * Safe to call outside of an election */ register_fsa_action(A_ELECTION_CHECK); /* Membership changed, remind everyone we're here. * This will aid detection of duplicate DCs */ no_op = create_request(CRM_OP_NOOP, NULL, NULL, CRM_SYSTEM_CRMD, AM_I_DC ? CRM_SYSTEM_DC : CRM_SYSTEM_CRMD, NULL); send_cluster_message(NULL, crm_msg_crmd, no_op, FALSE); free_xml(no_op); } static void crmd_node_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { fsa_data_t *msg_data = NULL; last_peer_update = 0; if (rc == pcmk_ok) { crm_trace("Node update %d complete", call_id); } else { crm_err("Node update %d failed", call_id); crm_log_xml_debug(msg, "failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } void ghash_update_cib_node(gpointer key, gpointer value, gpointer user_data) { xmlNode *tmp1 = NULL; const char *join = NULL; crm_node_t *node = value; struct update_data_s *data = (struct update_data_s *)user_data; enum crm_proc_flag messaging = crm_proc_plugin | crm_proc_heartbeat | crm_proc_cpg; data->state = XML_BOOLEAN_NO; if (safe_str_eq(node->state, CRM_NODE_ACTIVE)) { data->state = XML_BOOLEAN_YES; } crm_debug("Updating %s: %s (overwrite=%s) hash_size=%d", node->uname, data->state, data->overwrite_join ? "true" : "false", g_hash_table_size(confirmed_nodes)); if (data->overwrite_join) { if ((node->processes & proc_flags) == FALSE) { join = CRMD_JOINSTATE_DOWN; } else { const char *peer_member = g_hash_table_lookup(confirmed_nodes, node->uname); if (peer_member != NULL) { join = CRMD_JOINSTATE_MEMBER; } else { join = CRMD_JOINSTATE_PENDING; } } } tmp1 = create_node_state(node->uname, (node->processes & messaging) ? ACTIVESTATUS : DEADSTATUS, data->state, (node->processes & proc_flags) ? ONLINESTATUS : OFFLINESTATUS, join, NULL, FALSE, data->caller); add_node_copy(data->updates, tmp1); free_xml(tmp1); } void do_update_cib_nodes(gboolean overwrite, const char *caller) { int call_id = 0; int call_options = cib_scope_local | cib_quorum_override; struct update_data_s update_data; xmlNode *fragment = NULL; if (crm_peer_cache == NULL) { /* We got a replace notification before being connected to * the CCM. * So there is no need to update the local CIB with our values * - since we have none. */ return; } else if (AM_I_DC == FALSE) { return; } fragment = create_xml_node(NULL, XML_CIB_TAG_STATUS); update_data.caller = caller; update_data.updates = fragment; update_data.overwrite_join = overwrite; g_hash_table_foreach(crm_peer_cache, ghash_update_cib_node, &update_data); fsa_cib_update(XML_CIB_TAG_STATUS, fragment, call_options, call_id, NULL); add_cib_op_callback(fsa_cib_conn, call_id, FALSE, NULL, crmd_node_update_complete); last_peer_update = call_id; free_xml(fragment); } static void cib_quorum_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { fsa_data_t *msg_data = NULL; if (rc == pcmk_ok) { crm_trace("Quorum update %d complete", call_id); } else { crm_err("Quorum update %d failed", call_id); crm_log_xml_debug(msg, "failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } void crm_update_quorum(gboolean quorum, gboolean force_update) { ever_had_quorum |= quorum; if (AM_I_DC && (force_update || fsa_has_quorum != quorum)) { int call_id = 0; xmlNode *update = NULL; int call_options = cib_scope_local | cib_quorum_override; update = create_xml_node(NULL, XML_TAG_CIB); crm_xml_add_int(update, XML_ATTR_HAVE_QUORUM, quorum); set_uuid(update, XML_ATTR_DC_UUID, fsa_our_uname); fsa_cib_update(XML_TAG_CIB, update, call_options, call_id, NULL); crm_debug("Updating quorum status to %s (call=%d)", quorum ? "true" : "false", call_id); add_cib_op_callback(fsa_cib_conn, call_id, FALSE, NULL, cib_quorum_update_complete); free_xml(update); } fsa_has_quorum = quorum; } diff --git a/crmd/messages.c b/crmd/messages.c index c74ce47040..7aa0f5059e 100644 --- a/crmd/messages.c +++ b/crmd/messages.c @@ -1,984 +1,984 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include -#include +#include #include #include #include #include GListPtr fsa_message_queue = NULL; extern void crm_shutdown(int nsig); void handle_response(xmlNode * stored_msg); enum crmd_fsa_input handle_request(xmlNode * stored_msg); enum crmd_fsa_input handle_shutdown_request(xmlNode * stored_msg); #ifdef MSG_LOG # define ROUTER_RESULT(x) crm_trace("Router result: %s", x); \ crm_log_xml_trace(msg, "router.log"); #else # define ROUTER_RESULT(x) crm_trace("Router result: %s", x) #endif /* debug only, can wrap all it likes */ int last_data_id = 0; void register_fsa_error_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input, fsa_data_t * cur_data, void *new_data, const char *raised_from) { /* save the current actions if any */ if (fsa_actions != A_NOTHING) { register_fsa_input_adv(cur_data ? cur_data->fsa_cause : C_FSA_INTERNAL, I_NULL, cur_data ? cur_data->data : NULL, fsa_actions, TRUE, __FUNCTION__); } /* reset the action list */ fsa_actions = A_NOTHING; /* register the error */ register_fsa_input_adv(cause, input, new_data, A_NOTHING, TRUE, raised_from); } int register_fsa_input_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input, void *data, long long with_actions, gboolean prepend, const char *raised_from) { unsigned old_len = g_list_length(fsa_message_queue); fsa_data_t *fsa_data = NULL; last_data_id++; CRM_CHECK(raised_from != NULL, raised_from = ""); crm_trace("%s %s FSA input %d (%s) (cause=%s) %s data", raised_from, prepend ? "prepended" : "appended", last_data_id, fsa_input2string(input), fsa_cause2string(cause), data ? "with" : "without"); if (input == I_WAIT_FOR_EVENT) { do_fsa_stall = TRUE; crm_debug("Stalling the FSA pending further input: cause=%s", fsa_cause2string(cause)); if (old_len > 0) { crm_warn("%s stalled the FSA with pending inputs", raised_from); fsa_dump_queue(LOG_DEBUG); } if (data == NULL) { set_bit_inplace(fsa_actions, with_actions); with_actions = A_NOTHING; return 0; } crm_err("%s stalled the FSA with data - this may be broken", raised_from); } if (input == I_NULL && with_actions == A_NOTHING /* && data == NULL */ ) { /* no point doing anything */ crm_err("Cannot add entry to queue: no input and no action"); return 0; } fsa_data = calloc(1, sizeof(fsa_data_t)); fsa_data->id = last_data_id; fsa_data->fsa_input = input; fsa_data->fsa_cause = cause; fsa_data->origin = raised_from; fsa_data->data = NULL; fsa_data->data_type = fsa_dt_none; fsa_data->actions = with_actions; if (with_actions != A_NOTHING) { crm_trace("Adding actions %.16llx to input", with_actions); } if (data != NULL) { switch (cause) { case C_FSA_INTERNAL: case C_CRMD_STATUS_CALLBACK: case C_IPC_MESSAGE: case C_HA_MESSAGE: crm_trace("Copying %s data from %s as a HA msg", fsa_cause2string(cause), raised_from); CRM_CHECK(((ha_msg_input_t *) data)->msg != NULL, crm_err("Bogus data from %s", raised_from)); fsa_data->data = copy_ha_msg_input(data); fsa_data->data_type = fsa_dt_ha_msg; break; case C_LRM_OP_CALLBACK: crm_trace("Copying %s data from %s as lrmd_event_data_t", fsa_cause2string(cause), raised_from); fsa_data->data = lrmd_copy_event((lrmd_event_data_t *) data); fsa_data->data_type = fsa_dt_lrm; break; case C_CCM_CALLBACK: case C_SUBSYSTEM_CONNECT: case C_LRM_MONITOR_CALLBACK: case C_TIMER_POPPED: case C_SHUTDOWN: case C_HEARTBEAT_FAILED: case C_HA_DISCONNECT: case C_ILLEGAL: case C_UNKNOWN: case C_STARTUP: crm_err("Copying %s data (from %s)" " not yet implemented", fsa_cause2string(cause), raised_from); exit(1); break; } crm_trace("%s data copied", fsa_cause2string(fsa_data->fsa_cause)); } /* make sure to free it properly later */ if (prepend) { crm_trace("Prepending input"); fsa_message_queue = g_list_prepend(fsa_message_queue, fsa_data); } else { fsa_message_queue = g_list_append(fsa_message_queue, fsa_data); } crm_trace("Queue len: %d", g_list_length(fsa_message_queue)); fsa_dump_queue(LOG_DEBUG_2); if (old_len == g_list_length(fsa_message_queue)) { crm_err("Couldnt add message to the queue"); } if (fsa_source) { crm_trace("Triggering FSA: %s", __FUNCTION__); mainloop_set_trigger(fsa_source); } return last_data_id; } void fsa_dump_queue(int log_level) { int offset = 0; GListPtr lpc = NULL; for (lpc = fsa_message_queue; lpc != NULL; lpc = lpc->next) { fsa_data_t *data = (fsa_data_t *) lpc->data; do_crm_log_unlikely(log_level, "queue[%d(%d)]: input %s raised by %s()\t(cause=%s)", offset++, data->id, fsa_input2string(data->fsa_input), data->origin, fsa_cause2string(data->fsa_cause)); } } ha_msg_input_t * copy_ha_msg_input(ha_msg_input_t * orig) { ha_msg_input_t *copy = NULL; xmlNodePtr data = NULL; if (orig != NULL) { crm_trace("Copy msg"); data = copy_xml(orig->msg); } else { crm_trace("No message to copy"); } copy = new_ha_msg_input(data); if (orig && orig->msg != NULL) { CRM_CHECK(copy->msg != NULL, crm_err("copy failed")); } return copy; } void delete_fsa_input(fsa_data_t * fsa_data) { lrmd_event_data_t *op = NULL; xmlNode *foo = NULL; if (fsa_data == NULL) { return; } crm_trace("About to free %s data", fsa_cause2string(fsa_data->fsa_cause)); if (fsa_data->data != NULL) { switch (fsa_data->data_type) { case fsa_dt_ha_msg: delete_ha_msg_input(fsa_data->data); break; case fsa_dt_xml: foo = fsa_data->data; free_xml(foo); break; case fsa_dt_lrm: op = (lrmd_event_data_t *) fsa_data->data; lrmd_free_event(op); break; case fsa_dt_none: if (fsa_data->data != NULL) { crm_err("Dont know how to free %s data from %s", fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); exit(1); } break; } crm_trace("%s data freed", fsa_cause2string(fsa_data->fsa_cause)); } free(fsa_data); } /* returns the next message */ fsa_data_t * get_message(void) { fsa_data_t *message = g_list_nth_data(fsa_message_queue, 0); fsa_message_queue = g_list_remove(fsa_message_queue, message); crm_trace("Processing input %d", message->id); return message; } /* returns the current head of the FIFO queue */ gboolean is_message(void) { return (g_list_length(fsa_message_queue) > 0); } void * fsa_typed_data_adv(fsa_data_t * fsa_data, enum fsa_data_type a_type, const char *caller) { void *ret_val = NULL; if (fsa_data == NULL) { crm_err("%s: No FSA data available", caller); } else if (fsa_data->data == NULL) { crm_err("%s: No message data available. Origin: %s", caller, fsa_data->origin); } else if (fsa_data->data_type != a_type) { crm_crit( "%s: Message data was the wrong type! %d vs. requested=%d." " Origin: %s", caller, fsa_data->data_type, a_type, fsa_data->origin); CRM_ASSERT(fsa_data->data_type == a_type); } else { ret_val = fsa_data->data; } return ret_val; } /* A_MSG_ROUTE */ void do_msg_route(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); route_message(msg_data->fsa_cause, input->msg); } void route_message(enum crmd_fsa_cause cause, xmlNode * input) { ha_msg_input_t fsa_input; enum crmd_fsa_input result = I_NULL; fsa_input.msg = input; CRM_CHECK(cause == C_IPC_MESSAGE || cause == C_HA_MESSAGE, return); /* try passing the buck first */ if (relay_message(input, cause == C_IPC_MESSAGE)) { return; } /* handle locally */ result = handle_message(input); /* done or process later? */ switch (result) { case I_NULL: case I_CIB_OP: case I_ROUTER: case I_NODE_JOIN: case I_JOIN_REQUEST: case I_JOIN_RESULT: break; default: /* Defering local processing of message */ register_fsa_input_later(cause, result, &fsa_input); return; } if (result != I_NULL) { /* add to the front of the queue */ register_fsa_input(cause, result, &fsa_input); } } gboolean relay_message(xmlNode * msg, gboolean originated_locally) { int dest = 1; int is_for_dc = 0; int is_for_dcib = 0; int is_for_te = 0; int is_for_crm = 0; int is_for_cib = 0; int is_local = 0; gboolean processing_complete = FALSE; const char *host_to = crm_element_value(msg, F_CRM_HOST_TO); const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO); const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM); const char *type = crm_element_value(msg, F_TYPE); const char *msg_error = NULL; crm_trace("Routing message %s", crm_element_value(msg, XML_ATTR_REFERENCE)); if (msg == NULL) { msg_error = "Cannot route empty message"; } else if (safe_str_eq(CRM_OP_HELLO, crm_element_value(msg, F_CRM_TASK))) { /* quietly ignore */ processing_complete = TRUE; } else if (safe_str_neq(type, T_CRM)) { msg_error = "Bad message type"; } else if (sys_to == NULL) { msg_error = "Bad message destination: no subsystem"; } if (msg_error != NULL) { processing_complete = TRUE; crm_err("%s", msg_error); crm_log_xml_warn(msg, "bad msg"); } if (processing_complete) { return TRUE; } processing_complete = TRUE; is_for_dc = (strcasecmp(CRM_SYSTEM_DC, sys_to) == 0); is_for_dcib = (strcasecmp(CRM_SYSTEM_DCIB, sys_to) == 0); is_for_te = (strcasecmp(CRM_SYSTEM_TENGINE, sys_to) == 0); is_for_cib = (strcasecmp(CRM_SYSTEM_CIB, sys_to) == 0); is_for_crm = (strcasecmp(CRM_SYSTEM_CRMD, sys_to) == 0); is_local = 0; if (host_to == NULL || strlen(host_to) == 0) { if (is_for_dc || is_for_te) { is_local = 0; } else if (is_for_crm && originated_locally) { is_local = 0; } else { is_local = 1; } } else if (safe_str_eq(fsa_our_uname, host_to)) { is_local = 1; } if (is_for_dc || is_for_dcib || is_for_te) { if (AM_I_DC && is_for_te) { ROUTER_RESULT("Message result: Local relay"); send_msg_via_ipc(msg, sys_to); } else if (AM_I_DC) { ROUTER_RESULT("Message result: DC/CRMd process"); processing_complete = FALSE; /* more to be done by caller */ } else if (originated_locally && safe_str_neq(sys_from, CRM_SYSTEM_PENGINE) && safe_str_neq(sys_from, CRM_SYSTEM_TENGINE)) { /* Neither the TE or PE should be sending messages * to DC's on other nodes * * By definition, if we are no longer the DC, then * the PE or TE's data should be discarded */ #if SUPPORT_COROSYNC if (is_openais_cluster()) { dest = text2msg_type(sys_to); } #endif ROUTER_RESULT("Message result: External relay to DC"); send_cluster_message(host_to, dest, msg, TRUE); } else { /* discard */ ROUTER_RESULT("Message result: Discard, not DC"); } } else if (is_local && (is_for_crm || is_for_cib)) { ROUTER_RESULT("Message result: CRMd process"); processing_complete = FALSE; /* more to be done by caller */ } else if (is_local) { ROUTER_RESULT("Message result: Local relay"); send_msg_via_ipc(msg, sys_to); } else { #if SUPPORT_COROSYNC if (is_openais_cluster()) { dest = text2msg_type(sys_to); } #endif ROUTER_RESULT("Message result: External relay"); send_cluster_message(host_to, dest, msg, TRUE); } return processing_complete; } static gboolean process_hello_message(xmlNode * hello, char **uuid, char **client_name, char **major_version, char **minor_version) { const char *local_uuid; const char *local_client_name; const char *local_major_version; const char *local_minor_version; *uuid = NULL; *client_name = NULL; *major_version = NULL; *minor_version = NULL; if (hello == NULL) { return FALSE; } local_uuid = crm_element_value(hello, "client_uuid"); local_client_name = crm_element_value(hello, "client_name"); local_major_version = crm_element_value(hello, "major_version"); local_minor_version = crm_element_value(hello, "minor_version"); if (local_uuid == NULL || strlen(local_uuid) == 0) { crm_err("Hello message was not valid (field %s not found)", "uuid"); return FALSE; } else if (local_client_name == NULL || strlen(local_client_name) == 0) { crm_err("Hello message was not valid (field %s not found)", "client name"); return FALSE; } else if (local_major_version == NULL || strlen(local_major_version) == 0) { crm_err("Hello message was not valid (field %s not found)", "major version"); return FALSE; } else if (local_minor_version == NULL || strlen(local_minor_version) == 0) { crm_err("Hello message was not valid (field %s not found)", "minor version"); return FALSE; } *uuid = strdup(local_uuid); *client_name = strdup(local_client_name); *major_version = strdup(local_major_version); *minor_version = strdup(local_minor_version); crm_trace("Hello message ok"); return TRUE; } gboolean crmd_authorize_message(xmlNode * client_msg, crmd_client_t * curr_client) { /* check the best case first */ const char *sys_from = crm_element_value(client_msg, F_CRM_SYS_FROM); char *uuid = NULL; char *client_name = NULL; char *major_version = NULL; char *minor_version = NULL; const char *filtered_from; gpointer table_key = NULL; gboolean auth_result = FALSE; gboolean can_reply = FALSE; /* no-one has registered with this id */ xmlNode *xml = NULL; const char *op = crm_element_value(client_msg, F_CRM_TASK); if (safe_str_neq(CRM_OP_HELLO, op)) { if (sys_from == NULL) { crm_warn("Message [%s] was had no value for %s... discarding", crm_element_value(client_msg, XML_ATTR_REFERENCE), F_CRM_SYS_FROM); return FALSE; } filtered_from = sys_from; /* The CIB can have two names on the DC */ if (strcasecmp(sys_from, CRM_SYSTEM_DCIB) == 0) filtered_from = CRM_SYSTEM_CIB; if (g_hash_table_lookup(ipc_clients, filtered_from) != NULL) { can_reply = TRUE; /* reply can be routed */ } crm_trace("Message reply can%s be routed from %s.", can_reply ? "" : " not", sys_from); if (can_reply == FALSE) { crm_warn("Message [%s] not authorized", crm_element_value(client_msg, XML_ATTR_REFERENCE)); } return can_reply; } crm_trace("received client join msg"); crm_log_xml_trace(client_msg, "join"); xml = get_message_xml(client_msg, F_CRM_DATA); auth_result = process_hello_message(xml, &uuid, &client_name, &major_version, &minor_version); if (auth_result == TRUE) { if (client_name == NULL || uuid == NULL) { crm_err("Bad client details (client_name=%s, uuid=%s)", crm_str(client_name), crm_str(uuid)); auth_result = FALSE; } } if (auth_result == TRUE) { /* check version */ int mav = atoi(major_version); int miv = atoi(minor_version); crm_trace("Checking client version number"); if (mav < 0 || miv < 0) { crm_err("Client version (%d:%d) is not acceptable", mav, miv); auth_result = FALSE; } } table_key = (gpointer) generate_hash_key(client_name, uuid); if (auth_result == TRUE) { xmlNode *xml = NULL; crm_trace("Accepted client %s", crm_str(table_key)); curr_client->table_key = table_key; curr_client->sub_sys = strdup(client_name); curr_client->uuid = strdup(uuid); g_hash_table_insert(ipc_clients, table_key, curr_client->ipc); xml = create_hello_message("n/a", CRM_SYSTEM_CRMD, "0", "1"); crm_ipcs_send(curr_client->ipc, xml, FALSE); free_xml(xml); crm_trace("Updated client list with %s", crm_str(table_key)); crm_trace("Triggering FSA: %s", __FUNCTION__); mainloop_set_trigger(fsa_source); } else { free(table_key); crm_warn("Rejected client logon request"); qb_ipcs_disconnect(curr_client->ipc); } free(uuid); free(minor_version); free(major_version); free(client_name); /* hello messages should never be processed further */ return FALSE; } enum crmd_fsa_input handle_message(xmlNode * msg) { const char *type = NULL; CRM_CHECK(msg != NULL, return I_NULL); type = crm_element_value(msg, F_CRM_MSG_TYPE); if (crm_str_eq(type, XML_ATTR_REQUEST, TRUE)) { return handle_request(msg); } else if (crm_str_eq(type, XML_ATTR_RESPONSE, TRUE)) { handle_response(msg); return I_NULL; } crm_err("Unknown message type: %s", type); return I_NULL; } static enum crmd_fsa_input handle_failcount_op(xmlNode * stored_msg) { const char *rsc = NULL; xmlNode *xml_rsc = get_xpath_object("//" XML_CIB_TAG_RESOURCE, stored_msg, LOG_ERR); if (xml_rsc) { rsc = ID(xml_rsc); } if (rsc) { char *attr = NULL; crm_info("Removing failcount for %s", rsc); attr = crm_concat("fail-count", rsc, '-'); update_attrd(NULL, attr, NULL, NULL); free(attr); attr = crm_concat("last-failure", rsc, '-'); update_attrd(NULL, attr, NULL, NULL); free(attr); lrm_clear_last_failure(rsc); } else { crm_log_xml_warn(stored_msg, "invalid failcount op"); } return I_NULL; } enum crmd_fsa_input handle_request(xmlNode * stored_msg) { xmlNode *msg = NULL; const char *op = crm_element_value(stored_msg, F_CRM_TASK); /* Optimize this for the DC - it has the most to do */ if (op == NULL) { crm_log_xml_err(stored_msg, "Bad message"); return I_NULL; } /*========== DC-Only Actions ==========*/ if (AM_I_DC) { if (strcmp(op, CRM_OP_JOIN_ANNOUNCE) == 0) { return I_NODE_JOIN; } else if (strcmp(op, CRM_OP_JOIN_REQUEST) == 0) { return I_JOIN_REQUEST; } else if (strcmp(op, CRM_OP_JOIN_CONFIRM) == 0) { return I_JOIN_RESULT; } else if (strcmp(op, CRM_OP_SHUTDOWN) == 0) { const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM); gboolean dc_match = safe_str_eq(host_from, fsa_our_dc); if (is_set(fsa_input_register, R_SHUTDOWN)) { crm_info("Shutting ourselves down (DC)"); return I_STOP; } else if (dc_match) { crm_err("We didnt ask to be shut down, yet our" " TE is telling us too." " Better get out now!"); return I_TERMINATE; } else if (fsa_state != S_STOPPING) { crm_err("Another node is asking us to shutdown" " but we think we're ok."); return I_ELECTION; } } else if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) { /* a slave wants to shut down */ /* create cib fragment and add to message */ return handle_shutdown_request(stored_msg); } } /*========== common actions ==========*/ if (strcmp(op, CRM_OP_NOVOTE) == 0) { ha_msg_input_t fsa_input; fsa_input.msg = stored_msg; register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input, A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE, __FUNCTION__); } else if (strcmp(op, CRM_OP_CLEAR_FAILCOUNT) == 0) { return handle_failcount_op(stored_msg); } else if (strcmp(op, CRM_OP_VOTE) == 0) { /* count the vote and decide what to do after that */ ha_msg_input_t fsa_input; fsa_input.msg = stored_msg; register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input, A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE, __FUNCTION__); /* Sometimes we _must_ go into S_ELECTION */ if (fsa_state == S_HALT) { crm_debug("Forcing an election from S_HALT"); return I_ELECTION; #if 0 } else if (AM_I_DC) { /* This is the old way of doing things but what is gained? */ return I_ELECTION; #endif } } else if (strcmp(op, CRM_OP_JOIN_OFFER) == 0) { crm_debug("Raising I_JOIN_OFFER: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID)); return I_JOIN_OFFER; } else if (strcmp(op, CRM_OP_JOIN_ACKNAK) == 0) { crm_debug("Raising I_JOIN_RESULT: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID)); return I_JOIN_RESULT; } else if (strcmp(op, CRM_OP_LRM_DELETE) == 0 || strcmp(op, CRM_OP_LRM_FAIL) == 0 || strcmp(op, CRM_OP_LRM_REFRESH) == 0 || strcmp(op, CRM_OP_REPROBE) == 0) { crm_xml_add(stored_msg, F_CRM_SYS_TO, CRM_SYSTEM_LRMD); return I_ROUTER; } else if (strcmp(op, CRM_OP_NOOP) == 0) { return I_NULL; } else if (strcmp(op, CRM_OP_LOCAL_SHUTDOWN) == 0) { crm_shutdown(SIGTERM); /*return I_SHUTDOWN; */ return I_NULL; /*========== (NOT_DC)-Only Actions ==========*/ } else if (AM_I_DC == FALSE && strcmp(op, CRM_OP_SHUTDOWN) == 0) { const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM); gboolean dc_match = safe_str_eq(host_from, fsa_our_dc); if (dc_match || fsa_our_dc == NULL) { if (is_set(fsa_input_register, R_SHUTDOWN) == FALSE) { crm_err("We didn't ask to be shut down, yet our" " DC is telling us too."); set_bit_inplace(fsa_input_register, R_STAYDOWN); return I_STOP; } crm_info("Shutting down"); return I_STOP; } else { crm_warn("Discarding %s op from %s", op, host_from); } } else if (strcmp(op, CRM_OP_PING) == 0) { /* eventually do some stuff to figure out * if we /are/ ok */ const char *sys_to = crm_element_value(stored_msg, F_CRM_SYS_TO); xmlNode *ping = create_xml_node(NULL, XML_CRM_TAG_PING); crm_xml_add(ping, XML_PING_ATTR_STATUS, "ok"); crm_xml_add(ping, XML_PING_ATTR_SYSFROM, sys_to); crm_xml_add(ping, "crmd_state", fsa_state2string(fsa_state)); /* Ok, so technically not so interesting, but CTS needs to see this */ crm_notice("Current ping state: %s", fsa_state2string(fsa_state)); msg = create_reply(stored_msg, ping); relay_message(msg, TRUE); free_xml(ping); free_xml(msg); } else if (strcmp(op, CRM_OP_RM_NODE_CACHE) == 0) { xmlNode *options = get_xpath_object("//"XML_TAG_OPTIONS, stored_msg, LOG_ERR); int id = 0; if (options) { crm_element_value_int(options, XML_ATTR_ID, &id); } if (id) { reap_crm_member(id); } } else { crm_err("Unexpected request (%s) sent to %s", op, AM_I_DC ? "the DC" : "non-DC node"); crm_log_xml_err(stored_msg, "Unexpected"); } return I_NULL; } void handle_response(xmlNode * stored_msg) { const char *op = crm_element_value(stored_msg, F_CRM_TASK); if (op == NULL) { crm_log_xml_err(stored_msg, "Bad message"); } else if (AM_I_DC && strcmp(op, CRM_OP_PECALC) == 0) { /* Check if the PE answer been superceeded by a subsequent request? */ const char *msg_ref = crm_element_value(stored_msg, XML_ATTR_REFERENCE); if (msg_ref == NULL) { crm_err("%s - Ignoring calculation with no reference", op); } else if (safe_str_eq(msg_ref, fsa_pe_ref)) { ha_msg_input_t fsa_input; fsa_input.msg = stored_msg; register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input); crm_trace("Completed: %s...", fsa_pe_ref); } else { crm_info("%s calculation %s is obsolete", op, msg_ref); } } else if (strcmp(op, CRM_OP_VOTE) == 0 || strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0 || strcmp(op, CRM_OP_SHUTDOWN) == 0) { } else { const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM); crm_err("Unexpected response (op=%s, src=%s) sent to the %s", op, host_from, AM_I_DC ? "DC" : "CRMd"); } } enum crmd_fsa_input handle_shutdown_request(xmlNode * stored_msg) { /* handle here to avoid potential version issues * where the shutdown message/proceedure may have * been changed in later versions. * * This way the DC is always in control of the shutdown */ char *now_s = NULL; time_t now = time(NULL); xmlNode *node_state = NULL; const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM); if (host_from == NULL) { /* we're shutting down and the DC */ host_from = fsa_our_uname; } crm_info("Creating shutdown request for %s (state=%s)", host_from, fsa_state2string(fsa_state)); crm_log_xml_trace(stored_msg, "message"); node_state = create_node_state(host_from, NULL, NULL, NULL, NULL, CRMD_STATE_INACTIVE, FALSE, __FUNCTION__); fsa_cib_anon_update(XML_CIB_TAG_STATUS, node_state, cib_quorum_override); crm_log_xml_trace(node_state, "Shutdown update"); free_xml(node_state); now_s = crm_itoa(now); update_attrd(host_from, XML_CIB_ATTR_SHUTDOWN, now_s, NULL); free(now_s); /* will be picked up by the TE as long as its running */ return I_NULL; } /* msg is deleted by the time this returns */ extern gboolean process_te_message(xmlNode * msg, xmlNode * xml_data); gboolean send_msg_via_ipc(xmlNode * msg, const char *sys) { gboolean send_ok = TRUE; qb_ipcs_connection_t *client_channel; client_channel = (qb_ipcs_connection_t *) g_hash_table_lookup(ipc_clients, sys); if (crm_element_value(msg, F_CRM_HOST_FROM) == NULL) { crm_xml_add(msg, F_CRM_HOST_FROM, fsa_our_uname); } if (client_channel != NULL) { /* Transient clients such as crmadmin */ send_ok = crm_ipcs_send(client_channel, msg, TRUE); } else if (sys != NULL && strcmp(sys, CRM_SYSTEM_TENGINE) == 0) { xmlNode *data = get_message_xml(msg, F_CRM_DATA); process_te_message(msg, data); } else if (sys != NULL && strcmp(sys, CRM_SYSTEM_LRMD) == 0) { fsa_data_t fsa_data; ha_msg_input_t fsa_input; fsa_input.msg = msg; fsa_input.xml = get_message_xml(msg, F_CRM_DATA); fsa_data.id = 0; fsa_data.actions = 0; fsa_data.data = &fsa_input; fsa_data.fsa_input = I_MESSAGE; fsa_data.fsa_cause = C_IPC_MESSAGE; fsa_data.origin = __FUNCTION__; fsa_data.data_type = fsa_dt_ha_msg; #ifdef FSA_TRACE crm_trace("Invoking action A_LRM_INVOKE (%.16llx)", A_LRM_INVOKE); #endif do_lrm_invoke(A_LRM_INVOKE, C_IPC_MESSAGE, fsa_state, I_MESSAGE, &fsa_data); } else { crm_err("Unknown Sub-system (%s)... discarding message.", crm_str(sys)); send_ok = FALSE; } return send_ok; } ha_msg_input_t * new_ha_msg_input(xmlNode * orig) { ha_msg_input_t *input_copy = NULL; input_copy = calloc(1, sizeof(ha_msg_input_t)); input_copy->msg = orig; input_copy->xml = get_message_xml(input_copy->msg, F_CRM_DATA); return input_copy; } void delete_ha_msg_input(ha_msg_input_t * orig) { if (orig == NULL) { return; } free_xml(orig->msg); free(orig); } diff --git a/crmd/te_utils.c b/crmd/te_utils.c index 171d053f38..66b111b752 100644 --- a/crmd/te_utils.c +++ b/crmd/te_utils.c @@ -1,417 +1,416 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include crm_trigger_t *stonith_reconnect = NULL; GListPtr stonith_cleanup_list = NULL; static gboolean fail_incompletable_stonith(crm_graph_t * graph) { GListPtr lpc = NULL; const char *task = NULL; xmlNode *last_action = NULL; if (graph == NULL) { return FALSE; } for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) { GListPtr lpc2 = NULL; synapse_t *synapse = (synapse_t *) lpc->data; if (synapse->confirmed) { continue; } for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) { crm_action_t *action = (crm_action_t *) lpc2->data; if (action->type != action_type_crm || action->confirmed) { continue; } task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); if (task && safe_str_eq(task, CRM_OP_FENCE)) { action->failed = TRUE; last_action = action->xml; update_graph(graph, action); crm_notice("Failing action %d (%s): STONITHd terminated", action->id, ID(action->xml)); } } } if (last_action != NULL) { crm_warn("STONITHd failure resulted in un-runnable actions"); abort_transition(INFINITY, tg_restart, "Stonith failure", last_action); return TRUE; } return FALSE; } static void tengine_stonith_connection_destroy(stonith_t * st, const char *event, xmlNode * msg) { if (is_set(fsa_input_register, R_ST_REQUIRED)) { crm_crit("Fencing daemon connection failed"); mainloop_set_trigger(stonith_reconnect); } else { crm_info("Fencing daemon disconnected"); } /* cbchan will be garbage at this point, arrange for it to be reset */ stonith_api->state = stonith_disconnected; if (AM_I_DC) { fail_incompletable_stonith(transition_graph); trigger_graph(); } } /* */ #if SUPPORT_CMAN # include -# include "../lib/cluster/stack.h" #endif static void tengine_stonith_notify(stonith_t * st, const char *event, xmlNode * msg) { int rc = -99; const char *origin = NULL; const char *target = NULL; const char *executioner = NULL; xmlNode *action = get_xpath_object("//st-data", msg, LOG_ERR); if (action == NULL) { crm_log_xml_err(msg, "Notify data not found"); return; } crm_log_xml_debug(msg, "stonith_notify"); crm_element_value_int(msg, F_STONITH_RC, &rc); origin = crm_element_value(action, F_STONITH_ORIGIN); target = crm_element_value(action, F_STONITH_TARGET); executioner = crm_element_value(action, F_STONITH_DELEGATE); if (rc == pcmk_ok && crm_str_eq(target, fsa_our_uname, TRUE)) { crm_err("We were alegedly just fenced by %s for %s!", executioner, origin); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__); return; } crm_notice("Peer %s was%s terminated (%s) by %s for %s: %s (ref=%s)", target, rc == pcmk_ok?"":" not", crm_element_value(action, F_STONITH_OPERATION), executioner ? executioner : "", origin, pcmk_strerror(rc), crm_element_value(action, F_STONITH_REMOTE)); #if SUPPORT_CMAN if (rc == pcmk_ok && is_cman_cluster()) { int local_rc = 0; int confirm = 0; char *target_copy = strdup(target); /* In case fenced hasn't noticed yet */ local_rc = fenced_external(target_copy); if (local_rc != 0) { crm_err("Could not notify CMAN that '%s' is now fenced: %d", target, local_rc); } else { crm_notice("Notified CMAN that '%s' is now fenced", target); } /* In case fenced is already trying to shoot it */ confirm = open("/var/run/cluster/fenced_override", O_NONBLOCK|O_WRONLY); if (confirm) { int ignore = 0; int len = strlen(target_copy); errno = 0; local_rc = write(confirm, target_copy, len); ignore = write(confirm, "\n", 1); if(errno == EBADF) { crm_trace("CMAN not expecting %s to be fenced (yet)", target); } else if (local_rc < len) { crm_perror(LOG_ERR, "Confirmation of CMAN fencing event for '%s' failed: %d", target, local_rc); } else { fsync(confirm); crm_notice("Confirmed CMAN fencing event for '%s'", target); } close(confirm); } } #endif if (rc == pcmk_ok) { crm_trace("target=%s dc=%s", target, fsa_our_dc); if (fsa_our_dc == NULL || safe_str_eq(fsa_our_dc, target)) { crm_notice("Target %s our leader %s (recorded: %s)", fsa_our_dc?"was":"may have been", target, fsa_our_dc ? fsa_our_dc : ""); /* Given the CIB resyncing that occurs around elections, * have one node update the CIB now and, if the new DC is different, * have them do so too after the election */ if (safe_str_eq(executioner, fsa_our_uname)) { const char *uuid = get_uuid(target); send_stonith_update(NULL, target, uuid); } stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(target)); } } } gboolean te_connect_stonith(gpointer user_data) { int lpc = 0; int rc = pcmk_ok; if (stonith_api == NULL) { stonith_api = stonith_api_new(); } if (stonith_api->state != stonith_disconnected) { crm_trace("Still connected"); return TRUE; } for (lpc = 0; lpc < 30; lpc++) { crm_debug("Attempting connection to fencing daemon..."); sleep(1); rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL); if (rc == pcmk_ok) { break; } if (user_data != NULL) { crm_err("Sign-in failed: triggered a retry"); mainloop_set_trigger(stonith_reconnect); return TRUE; } crm_err("Sign-in failed: pausing and trying again in 2s..."); sleep(1); } CRM_CHECK(rc == pcmk_ok, return TRUE); /* If not, we failed 30 times... just get out */ stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT, tengine_stonith_connection_destroy); stonith_api->cmds->register_notification(stonith_api, STONITH_OP_FENCE, tengine_stonith_notify); crm_trace("Connected"); return TRUE; } gboolean stop_te_timer(crm_action_timer_t * timer) { const char *timer_desc = "action timer"; if (timer == NULL) { return FALSE; } if (timer->reason == timeout_abort) { timer_desc = "global timer"; crm_trace("Stopping %s", timer_desc); } if (timer->source_id != 0) { crm_trace("Stopping %s", timer_desc); g_source_remove(timer->source_id); timer->source_id = 0; } else { crm_trace("%s was already stopped", timer_desc); return FALSE; } return TRUE; } gboolean te_graph_trigger(gpointer user_data) { enum transition_status graph_rc = -1; if (transition_graph == NULL) { crm_debug("Nothing to do"); return TRUE; } crm_trace("Invoking graph %d in state %s", transition_graph->id, fsa_state2string(fsa_state)); switch (fsa_state) { case S_STARTING: case S_PENDING: case S_NOT_DC: case S_HALT: case S_ILLEGAL: case S_STOPPING: case S_TERMINATE: return TRUE; break; default: break; } if (transition_graph->complete == FALSE) { graph_rc = run_graph(transition_graph); print_graph(LOG_DEBUG_3, transition_graph); if (graph_rc == transition_active) { crm_trace("Transition not yet complete"); return TRUE; } else if (graph_rc == transition_pending) { crm_trace("Transition not yet complete - no actions fired"); return TRUE; } if (graph_rc != transition_complete) { crm_err("Transition failed: %s", transition_status(graph_rc)); print_graph(LOG_WARNING, transition_graph); } } crm_debug("Transition %d is now complete", transition_graph->id); transition_graph->complete = TRUE; notify_crmd(transition_graph); return TRUE; } void trigger_graph_processing(const char *fn, int line) { mainloop_set_trigger(transition_trigger); crm_trace("%s:%d - Triggered graph processing", fn, line); } void abort_transition_graph(int abort_priority, enum transition_action abort_action, const char *abort_text, xmlNode * reason, const char *fn, int line) { const char *magic = NULL; CRM_CHECK(transition_graph != NULL, return); if (reason) { int diff_add_updates = 0; int diff_add_epoch = 0; int diff_add_admin_epoch = 0; int diff_del_updates = 0; int diff_del_epoch = 0; int diff_del_admin_epoch = 0; xmlNode *diff = get_xpath_object("//" F_CIB_UPDATE_RESULT "//diff", reason, LOG_DEBUG_2); magic = crm_element_value(reason, XML_ATTR_TRANSITION_MAGIC); if (diff) { cib_diff_version_details(diff, &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates); if (crm_str_eq(TYPE(reason), XML_CIB_TAG_NVPAIR, TRUE)) { crm_info("%s:%d - Triggered transition abort (complete=%d, tag=%s, id=%s, name=%s, value=%s, magic=%s, cib=%d.%d.%d) : %s", fn, line, transition_graph->complete, TYPE(reason), ID(reason), NAME(reason), VALUE(reason), magic ? magic : "NA", diff_add_admin_epoch, diff_add_epoch, diff_add_updates, abort_text); } else { crm_info("%s:%d - Triggered transition abort (complete=%d, tag=%s, id=%s, magic=%s, cib=%d.%d.%d) : %s", fn, line, transition_graph->complete, TYPE(reason), ID(reason), magic ? magic : "NA", diff_add_admin_epoch, diff_add_epoch, diff_add_updates, abort_text); } } else { crm_info("%s:%d - Triggered transition abort (complete=%d, tag=%s, id=%s, magic=%s) : %s", fn, line, transition_graph->complete, TYPE(reason), ID(reason), magic ? magic : "NA", abort_text); } } else { crm_info("%s:%d - Triggered transition abort (complete=%d) : %s", fn, line, transition_graph->complete, abort_text); } switch (fsa_state) { case S_STARTING: case S_PENDING: case S_NOT_DC: case S_HALT: case S_ILLEGAL: case S_STOPPING: case S_TERMINATE: crm_info("Abort suppressed: state=%s (complete=%d)", fsa_state2string(fsa_state), transition_graph->complete); return; default: break; } if (magic == NULL && reason != NULL) { crm_log_xml_debug(reason, "Cause"); } /* Make sure any queued calculations are discarded ASAP */ free(fsa_pe_ref); fsa_pe_ref = NULL; if (transition_graph->complete) { if (transition_timer->period_ms > 0) { crm_timer_stop(transition_timer); crm_timer_start(transition_timer); } else { register_fsa_input(C_FSA_INTERNAL, I_PE_CALC, NULL); } return; } update_abort_priority(transition_graph, abort_priority, abort_action, abort_text); mainloop_set_trigger(transition_trigger); } diff --git a/fencing/admin.c b/fencing/admin.c index a4cd776b37..c8671f3e78 100644 --- a/fencing/admin.c +++ b/fencing/admin.c @@ -1,384 +1,384 @@ /* * Copyright (C) 2009 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include -#include +#include #include #include #include #include /* *INDENT-OFF* */ static struct crm_option long_options[] = { {"help", 0, 0, '?', "\tThis text"}, {"version", 0, 0, '$', "\tVersion information" }, {"verbose", 0, 0, 'V', "\tIncrease debug output"}, {"quiet", 0, 0, 'q', "\tPrint only essential output"}, {"-spacer-", 0, 0, '-', "Commands:"}, {"list", 1, 0, 'l', "List devices that can terminate the specified host"}, {"list-registered", 0, 0, 'L', "List all registered devices"}, {"list-installed", 0, 0, 'I', "List all installed devices"}, {"-spacer-", 0, 0, '-', ""}, {"metadata", 0, 0, 'M', "Check the device's metadata"}, {"query", 1, 0, 'Q', "Check the device's status"}, {"fence", 1, 0, 'F', "Fence the named host"}, {"unfence", 1, 0, 'U', "Unfence the named host"}, {"reboot", 1, 0, 'B', "Reboot the named host"}, {"confirm", 1, 0, 'C', "Confirm the named host is now safely down"}, {"history", 1, 0, 'H', "Retrieve last fencing operation"}, {"-spacer-", 0, 0, '-', ""}, {"register", 1, 0, 'R', "Register the named stonith device. Requires: --agent, optional: --option"}, {"deregister", 1, 0, 'D', "De-register the named stonith device"}, {"register-level", 1, 0, 'r', "Register a stonith level for the named host. Requires: --index, one or more --device entries"}, {"deregister-level", 1, 0, 'd', "De-register a stonith level for the named host. Requires: --index"}, {"-spacer-", 0, 0, '-', ""}, {"-spacer-", 0, 0, '-', "Options and modifiers:"}, {"agent", 1, 0, 'a', "The agent (eg. fence_xvm) to instantiate when calling with --register"}, {"env-option", 1, 0, 'e'}, {"option", 1, 0, 'o'}, {"device", 1, 0, 'v', "A device to associate with a given host and stonith level"}, {"index", 1, 0, 'i', "The stonith level (1-9)"}, {"timeout", 1, 0, 't', "Operation timeout in seconds"}, {"list-all", 0, 0, 'L', "legacy alias for --list-registered"}, {0, 0, 0, 0} }; /* *INDENT-ON* */ int st_opts = st_opt_sync_call; int main(int argc, char ** argv) { int flag; int rc = 0; int quiet = 0; int verbose = 0; int argerr = 0; int timeout = 120; int option_index = 0; int fence_level = 0; char name[512]; char value[512]; const char *agent = NULL; const char *device = NULL; const char *target = NULL; char action = 0; stonith_t *st = NULL; stonith_key_value_t *params = NULL; stonith_key_value_t *devices = NULL; stonith_key_value_t *dIter = NULL; crm_log_init(NULL, LOG_NOTICE, FALSE, FALSE, argc, argv, FALSE); crm_set_options(NULL, "mode [options]", long_options, "Provides access to the stonith-ng API.\n" "\nAllows the administrator to add/remove/list devices, check device and host status and fence hosts\n"); while (1) { flag = crm_get_option(argc, argv, &option_index); if (flag == -1) break; switch(flag) { case 'V': verbose = 1; crm_bump_log_level(); break; case '$': case '?': crm_help(flag, EX_OK); break; case 'L': case 'I': action = flag; break; case 'q': quiet = 1; break; case 'Q': case 'R': case 'D': action = flag; device = optarg; break; case 'a': agent = optarg; break; case 'l': target = optarg; action = 'L'; break; case 'M': action = flag; break; case 't': timeout = crm_atoi(optarg, NULL); break; case 'B': case 'F': case 'U': case 'C': case 'H': case 'r': case 'd': target = optarg; action = flag; break; case 'i': fence_level = crm_atoi(optarg, NULL); break; case 'v': devices = stonith_key_value_add(devices, NULL, optarg); break; case 'o': crm_info("Scanning: -o %s", optarg); rc = sscanf(optarg, "%[^=]=%[^=]", name, value); if(rc != 2) { crm_err("Invalid option: -o %s", optarg); ++argerr; } else { crm_info("Got: '%s'='%s'", name, value); stonith_key_value_add(params, name, value); } break; case 'e': { char *key = crm_concat("OCF_RESKEY", optarg, '_'); const char *env = getenv(key); if(env == NULL) { crm_err("Invalid option: -e %s", optarg); ++argerr; } else { crm_info("Got: '%s'='%s'", optarg, env); stonith_key_value_add( params, optarg, env); } } break; default: ++argerr; break; } } if (optind > argc) { ++argerr; } if (argerr) { crm_help('?', EX_USAGE); } crm_debug("Create"); st = stonith_api_new(); if(action != 'M' && action != 'I') { rc = st->cmds->connect(st, crm_system_name, NULL); crm_debug("Connect: %d", rc); if(rc < 0) { goto done; } } switch(action) { case 'I': rc = st->cmds->list(st, st_opt_sync_call, NULL, &devices, timeout); for(dIter = devices; dIter; dIter = dIter->next ) { fprintf( stdout, " %s\n", dIter->value ); } if(rc == 0) { fprintf(stderr, "No devices found\n"); } else if(rc > 0) { fprintf(stderr, "%d devices found\n", rc); rc = 0; } stonith_key_value_freeall(devices, 1, 1); break; case 'L': rc = st->cmds->query(st, st_opts, target, &devices, timeout); for(dIter = devices; dIter; dIter = dIter->next ) { fprintf( stdout, " %s\n", dIter->value ); } if(rc == 0) { fprintf(stderr, "No devices found\n"); } else if(rc > 0) { fprintf(stderr, "%d devices found\n", rc); rc = 0; } stonith_key_value_freeall(devices, 1, 1); break; case 'Q': rc = st->cmds->call(st, st_opts, device, "monitor", NULL, timeout); if(rc < 0) { rc = st->cmds->call(st, st_opts, device, "list", NULL, timeout); } break; case 'R': rc = st->cmds->register_device(st, st_opts, device, "stonith-ng", agent, params); break; case 'D': rc = st->cmds->remove_device(st, st_opts, device); break; case 'r': rc = st->cmds->register_level(st, st_opts, target, fence_level, devices); break; case 'd': rc = st->cmds->remove_level(st, st_opts, target, fence_level); break; case 'M': if (agent == NULL) { printf("Please specify an agent to query using -a,--agent [value]\n"); return -1; } else { char *buffer = NULL; rc = st->cmds->metadata(st, st_opt_sync_call, agent, NULL, &buffer, timeout); if(rc == pcmk_ok) { printf("%s\n", buffer); } free(buffer); } break; case 'C': rc = st->cmds->confirm(st, st_opts, target); break; case 'B': rc = st->cmds->fence(st, st_opts, target, "reboot", timeout); break; case 'F': rc = st->cmds->fence(st, st_opts, target, "off", timeout); break; case 'U': rc = st->cmds->fence(st, st_opts, target, "on", timeout); break; case 'H': { stonith_history_t *history, *hp, *latest = NULL; rc = st->cmds->history(st, st_opts, target, &history, timeout); for(hp = history; hp; hp = hp->next) { char *action_s = NULL; time_t complete = hp->completed; if(hp->state == st_done) { latest = hp; } if(quiet || !verbose) { continue; } else if(hp->action == NULL) { action_s = strdup("unknown"); } else if(hp->action[0] != 'r') { action_s = crm_concat("turn", hp->action, ' '); } else { action_s = strdup(hp->action); } if(hp->state == st_failed) { printf("%s failed to %s node %s on behalf of %s at %s\n", hp->delegate?hp->delegate:"We", action_s, hp->target, hp->origin, ctime(&complete)); } else if(hp->state == st_done && hp->delegate) { printf("%s was able to %s node %s on behalf of %s at %s\n", hp->delegate, action_s, hp->target, hp->origin, ctime(&complete)); } else if(hp->state == st_done) { printf("We were able to %s node %s on behalf of %s at %s\n", action_s, hp->target, hp->origin, ctime(&complete)); } else { printf("%s wishes to %s node %s - %d %d\n", hp->origin, action_s, hp->target, hp->state, hp->completed); } free(action_s); } if(latest) { if(quiet) { printf("%d\n", latest->completed); } else { char *action_s = NULL; time_t complete = latest->completed; if(latest->action == NULL) { action_s = strdup("unknown"); } else if(latest->action[0] != 'r') { action_s = crm_concat("turn", latest->action, ' '); } else { action_s = strdup(latest->action); } printf("%s was able to %s node %s on behalf of %s at %s\n", latest->delegate?latest->delegate:"We", action_s, latest->target, latest->origin, ctime(&complete)); free(action_s); } } } break; } done: if(rc < 0) { printf("Command failed: %s\n", pcmk_strerror(rc)); } stonith_key_value_freeall(params, 1, 1); st->cmds->disconnect(st); crm_debug("Disconnect: %d", rc); crm_debug("Destroy"); stonith_api_delete(st); return rc; } diff --git a/fencing/commands.c b/fencing/commands.c index 6ebf65b2b2..a7826943c4 100644 --- a/fencing/commands.c +++ b/fencing/commands.c @@ -1,1256 +1,1256 @@ /* * Copyright (C) 2009 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include +#include #include #include #include #include #include GHashTable *device_list = NULL; GHashTable *topology = NULL; static int active_children = 0; static gboolean stonith_device_dispatch(gpointer user_data); static void st_child_done(GPid pid, gint status, gpointer user_data); static void free_async_command(async_command_t *cmd) { g_list_free(cmd->device_list); free(cmd->device); free(cmd->action); free(cmd->victim); free(cmd->remote); free(cmd->client); free(cmd->origin); free(cmd->op); free(cmd); } static async_command_t *create_async_command(xmlNode *msg) { async_command_t *cmd = NULL; xmlNode *op = get_xpath_object("//@"F_STONITH_ACTION, msg, LOG_ERR); const char *action = crm_element_value(op, F_STONITH_ACTION); CRM_CHECK(action != NULL, crm_log_xml_warn(msg, "NoAction"); return NULL); crm_log_xml_trace(msg, "Command"); cmd = calloc(1, sizeof(async_command_t)); crm_element_value_int(msg, F_STONITH_CALLID, &(cmd->id)); crm_element_value_int(msg, F_STONITH_CALLOPTS, &(cmd->options)); crm_element_value_int(msg, F_STONITH_TIMEOUT, &(cmd->timeout)); cmd->origin = crm_element_value_copy(msg, F_ORIG); cmd->remote = crm_element_value_copy(msg, F_STONITH_REMOTE); cmd->client = crm_element_value_copy(msg, F_STONITH_CLIENTID); cmd->op = crm_element_value_copy(msg, F_STONITH_OPERATION); cmd->action = strdup(action); cmd->victim = crm_element_value_copy(op, F_STONITH_TARGET); cmd->mode = crm_element_value_copy(op, F_STONITH_MODE); cmd->device = crm_element_value_copy(op, F_STONITH_DEVICE); cmd->done = st_child_done; CRM_CHECK(cmd->op != NULL, crm_log_xml_warn(msg, "NoOp"); free_async_command(cmd); return NULL); CRM_CHECK(cmd->client != NULL, crm_log_xml_warn(msg, "NoClient")); return cmd; } static int stonith_manual_ack(xmlNode *msg, remote_fencing_op_t *op) { async_command_t *cmd = create_async_command(msg); xmlNode *dev = get_xpath_object("//@"F_STONITH_TARGET, msg, LOG_ERR); if(cmd == NULL) { return -EINVAL; } cmd->device = strdup("manual_ack"); cmd->remote = strdup(op->id); crm_notice("Injecting manual confirmation that %s is safely off/down", crm_element_value(dev, F_STONITH_TARGET)); st_child_done(0, 0, cmd); return pcmk_ok; } static gboolean stonith_device_execute(stonith_device_t *device) { int rc = 0; int exec_rc = 0; async_command_t *cmd = NULL; CRM_CHECK(device != NULL, return FALSE); if(device->active_pid) { crm_trace("%s is still active with pid %u", device->id, device->active_pid); return TRUE; } if(device->pending_ops) { GList *first = device->pending_ops; device->pending_ops = g_list_remove_link(device->pending_ops, first); cmd = first->data; g_list_free_1(first); } if(cmd == NULL) { crm_trace("Nothing further to do for %s", device->id); return TRUE; } cmd->device = strdup(device->id); exec_rc = run_stonith_agent(device->agent, cmd->action, cmd->victim, device->params, device->aliases, &rc, NULL, cmd); if(exec_rc > 0) { crm_debug("Operation %s%s%s on %s now running with pid=%d, timeout=%dms", cmd->action, cmd->victim?" for node ":"", cmd->victim?cmd->victim:"", device->id, exec_rc, cmd->timeout); device->active_pid = exec_rc; } else { crm_warn("Operation %s%s%s on %s failed (%d/%d)", cmd->action, cmd->victim?" for node ":"", cmd->victim?cmd->victim:"", device->id, exec_rc, rc); st_child_done(0, rc<0?rc:exec_rc, cmd); } return TRUE; } static gboolean stonith_device_dispatch(gpointer user_data) { return stonith_device_execute(user_data); } static void schedule_stonith_command(async_command_t *cmd, stonith_device_t *device) { CRM_CHECK(cmd != NULL, return); CRM_CHECK(device != NULL, return); crm_debug("Scheduling %s on %s for %s (timeout=%dms)", cmd->action, device->id, cmd->remote?cmd->remote:cmd->client, cmd->timeout); device->pending_ops = g_list_append(device->pending_ops, cmd); mainloop_set_trigger(device->work); } void free_device(gpointer data) { GListPtr gIter = NULL; stonith_device_t *device = data; g_hash_table_destroy(device->params); g_hash_table_destroy(device->aliases); for(gIter = device->pending_ops; gIter != NULL; gIter = gIter->next) { async_command_t *cmd = gIter->data; crm_warn("Removal of device '%s' purged operation %s", device->id, cmd->action); st_child_done(0, -ENODEV, cmd); free_async_command(cmd); } g_list_free(device->pending_ops); g_list_free_full(device->targets, free); free(device->namespace); free(device->agent); free(device->id); free(device); } static GHashTable *build_port_aliases(const char *hostmap, GListPtr *targets) { char *name = NULL; int last = 0, lpc = 0, max = 0, added = 0; GHashTable *aliases = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if(hostmap == NULL) { return aliases; } max = strlen(hostmap); for(; lpc <= max; lpc++) { switch(hostmap[lpc]) { /* Assignment chars */ case '=': case ':': if(lpc > last) { free(name); name = calloc(1, 1 + lpc - last); memcpy(name, hostmap + last, lpc - last); } last = lpc + 1; break; /* Delimeter chars */ /* case ',': Potentially used to specify multiple ports */ case 0: case ';': case ' ': case '\t': if(name) { char *value = NULL; value = calloc(1, 1 + lpc - last); memcpy(value, hostmap + last, lpc - last); crm_debug("Adding alias '%s'='%s'", name, value); g_hash_table_replace(aliases, name, value); if(targets) { *targets = g_list_append(*targets, strdup(value)); } value=NULL; name=NULL; added++; } else if(lpc > last) { crm_debug("Parse error at offset %d near '%s'", lpc-last, hostmap+last); } last = lpc + 1; break; } if(hostmap[lpc] == 0) { break; } } if(added == 0) { crm_info("No host mappings detected in '%s'", hostmap); } free(name); return aliases; } static void parse_host_line(const char *line, GListPtr *output) { int lpc = 0; int max = 0; int last = 0; if(line) { max = strlen(line); } else { return; } /* Check for any complaints about additional parameters that the device doesn't understand */ if(strstr(line, "invalid") || strstr(line, "variable")) { crm_debug("Skipping: %s", line); return; } crm_trace("Processing: %s", line); /* Skip initial whitespace */ for(lpc = 0; lpc <= max && isspace(line[lpc]); lpc++) { last = lpc+1; } /* Now the actual content */ for(lpc = 0; lpc <= max; lpc++) { gboolean a_space = isspace(line[lpc]); if(a_space && lpc < max && isspace(line[lpc+1])) { /* fast-forward to the end of the spaces */ } else if(a_space || line[lpc] == ',' || line[lpc] == 0) { int rc = 1; char *entry = NULL; if(lpc != last) { entry = calloc(1, 1 + lpc - last); rc = sscanf(line+last, "%[a-zA-Z0-9_-.]", entry); } if(entry == NULL) { /* Skip */ } else if(rc != 1) { crm_warn("Could not parse (%d %d): %s", last, lpc, line+last); } else if(safe_str_neq(entry, "on") && safe_str_neq(entry, "off")) { crm_trace("Adding '%s'", entry); *output = g_list_append(*output, entry); entry = NULL; } free(entry); last = lpc + 1; } } } static GListPtr parse_host_list(const char *hosts) { int lpc = 0; int max = 0; int last = 0; GListPtr output = NULL; if(hosts == NULL) { return output; } max = strlen(hosts); for(lpc = 0; lpc <= max; lpc++) { if(hosts[lpc] == '\n' || hosts[lpc] == 0) { char *line = NULL; line = calloc(1, 2 + lpc - last); snprintf(line, 1 + lpc - last, "%s", hosts+last); parse_host_line(line, &output); free(line); last = lpc + 1; } } return output; } static stonith_device_t *build_device_from_xml(xmlNode *msg) { xmlNode *dev = get_xpath_object("//"F_STONITH_DEVICE, msg, LOG_ERR); stonith_device_t *device = NULL; device = calloc(1, sizeof(stonith_device_t)); device->id = crm_element_value_copy(dev, XML_ATTR_ID); device->agent = crm_element_value_copy(dev, "agent"); device->namespace = crm_element_value_copy(dev, "namespace"); device->params = xml2list(dev); device->work = mainloop_add_trigger(G_PRIORITY_HIGH, stonith_device_dispatch, device); /* TODO: Hook up priority */ return device; } int stonith_device_register(xmlNode *msg) { const char *value = NULL; stonith_device_t *device = build_device_from_xml(msg); value = g_hash_table_lookup(device->params, STONITH_ATTR_HOSTLIST); if(value) { device->targets = parse_host_list(value); } value = g_hash_table_lookup(device->params, STONITH_ATTR_HOSTMAP); device->aliases = build_port_aliases(value, &(device->targets)); g_hash_table_replace(device_list, device->id, device); crm_notice("Added '%s' to the device list (%d active devices)", device->id, g_hash_table_size(device_list)); return pcmk_ok; } static int stonith_device_remove(xmlNode *msg) { xmlNode *dev = get_xpath_object("//"F_STONITH_DEVICE, msg, LOG_ERR); const char *id = crm_element_value(dev, XML_ATTR_ID); if(g_hash_table_remove(device_list, id)) { crm_info("Removed '%s' from the device list (%d active devices)", id, g_hash_table_size(device_list)); } else { crm_info("Device '%s' not found (%d active devices)", id, g_hash_table_size(device_list)); } return pcmk_ok; } static int count_active_levels(stonith_topology_t *tp) { int lpc = 0; int count = 0; for(lpc = 0; lpc < ST_LEVEL_MAX; lpc++) { if(tp->levels[lpc] != NULL) { count++; } } return count; } void free_topology_entry(gpointer data) { stonith_topology_t *tp = data; int lpc = 0; for(lpc = 0; lpc < ST_LEVEL_MAX; lpc++) { if(tp->levels[lpc] != NULL) { g_list_free_full(tp->levels[lpc], free); } } free(tp->node); free(tp); } int stonith_level_register(xmlNode *msg) { int id = 0; int rc = pcmk_ok; xmlNode *child = NULL; xmlNode *level = get_xpath_object("//"F_STONITH_LEVEL, msg, LOG_ERR); const char *node = crm_element_value(level, F_STONITH_TARGET); stonith_topology_t *tp = g_hash_table_lookup(topology, node); crm_element_value_int(level, XML_ATTR_ID, &id); if(id <= 0 || id >= ST_LEVEL_MAX) { return -EINVAL; } if(tp == NULL) { tp = calloc(1, sizeof(stonith_topology_t)); tp->node = strdup(node); g_hash_table_replace(topology, tp->node, tp); crm_trace("Added %s to the topology (%d active entries)", node, g_hash_table_size(topology)); } if(tp->levels[id] != NULL) { crm_info("Adding to the existing %s[%d] topology entry (%d active entries)", node, id, count_active_levels(tp)); } for (child = __xml_first_child(level); child != NULL; child = __xml_next(child)) { const char *device = ID(child); crm_trace("Adding device '%s' for %s (%d)", device, node, id); tp->levels[id] = g_list_append(tp->levels[id], strdup(device)); } crm_info("Node %s has %d active fencing levels", node, count_active_levels(tp)); return rc; } int stonith_level_remove(xmlNode *msg) { int id = 0; xmlNode *level = get_xpath_object("//"F_STONITH_LEVEL, msg, LOG_ERR); const char *node = crm_element_value(level, F_STONITH_TARGET); stonith_topology_t *tp = g_hash_table_lookup(topology, node); if(tp == NULL) { crm_info("Node %s not found (%d active entries)", node, g_hash_table_size(topology)); return pcmk_ok; } crm_element_value_int(level, XML_ATTR_ID, &id); if(id < 0 || id >= ST_LEVEL_MAX) { return -EINVAL; } if(id == 0 && g_hash_table_remove(topology, node)) { crm_info("Removed all %s related entries from the topology (%d active entries)", node, g_hash_table_size(topology)); } else if(id > 0 && tp->levels[id] != NULL) { g_list_free_full(tp->levels[id], free); tp->levels[id] = NULL; crm_info("Removed entry '%d' from %s's topology (%d active entries remaining)", id, node, count_active_levels(tp)); } return pcmk_ok; } static gboolean string_in_list(GListPtr list, const char *item) { int lpc = 0; int max = g_list_length(list); for(lpc = 0; lpc < max; lpc ++) { const char *value = g_list_nth_data(list, lpc); if(safe_str_eq(item, value)) { return TRUE; } } return FALSE; } static int stonith_device_action(xmlNode *msg, char **output) { int rc = pcmk_ok; xmlNode *dev = get_xpath_object("//"F_STONITH_DEVICE, msg, LOG_ERR); const char *id = crm_element_value(dev, F_STONITH_DEVICE); async_command_t *cmd = NULL; stonith_device_t *device = NULL; if(id) { crm_trace("Looking for '%s'", id); device = g_hash_table_lookup(device_list, id); } if(device) { cmd = create_async_command(msg); if(cmd == NULL) { free_device(device); return -EPROTO; } schedule_stonith_command(cmd, device); rc = -EINPROGRESS; } else { crm_info("Device %s not found", id?id:""); rc = -ENODEV; } return rc; } static gboolean can_fence_host_with_device(stonith_device_t *dev, const char *host) { gboolean can = FALSE; const char *alias = host; const char *check_type = NULL; if(dev == NULL) { return FALSE; } else if(host == NULL) { return TRUE; } if(g_hash_table_lookup(dev->aliases, host)) { alias = g_hash_table_lookup(dev->aliases, host); } check_type = g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTCHECK); if(check_type == NULL) { if(g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTLIST)) { check_type = "static-list"; } else { check_type = "dynamic-list"; } } if(safe_str_eq(check_type, "none")) { can = TRUE; } else if(safe_str_eq(check_type, "static-list")) { /* Presence in the hostmap is sufficient * Only use if all hosts on which the device can be active can always fence all listed hosts */ if(string_in_list(dev->targets, host)) { can = TRUE; } } else if(safe_str_eq(check_type, "dynamic-list")) { time_t now = time(NULL); /* Host/alias must be in the list output to be eligable to be fenced * * Will cause problems if down'd nodes aren't listed or (for virtual nodes) * if the guest is still listed despite being moved to another machine */ if(dev->targets_age < 0) { crm_trace("Port list queries disabled for %s", dev->id); } else if(dev->targets == NULL || dev->targets_age + 60 < now) { char *output = NULL; int rc = pcmk_ok; int exec_rc = pcmk_ok; /* Check for the target's presence in the output of the 'list' command */ g_list_free_full(dev->targets, free); dev->targets = NULL; while(dev->active_pid != 0 && kill(dev->active_pid, 0) == 0) { /* This is a hack * The proper approach would be to do asynchronous replies */ crm_trace("Waiting for %u to exit for %s", dev->active_pid, dev->id); sleep(1); } exec_rc = run_stonith_agent(dev->agent, "list", NULL, dev->params, NULL, &rc, &output, NULL); if(rc != 0 && dev->active_pid == 0) { /* This device probably only supports a single * connection, which appears to already be in use, * likely involved in a montior or (less likely) * metadata operation. * * Avoid disabling port list queries in the hope that * the op would succeed next time */ crm_info("Couldn't query ports for %s. Call failed with rc=%d and active_pid=%d: %s", dev->agent, rc, dev->active_pid, output); } else if(exec_rc < 0 || rc != 0) { crm_notice("Disabling port list queries for %s (%d/%d): %s", dev->id, exec_rc, rc, output); dev->targets_age = -1; /* Fall back to status */ g_hash_table_replace(dev->params, strdup(STONITH_ATTR_HOSTCHECK), strdup("status")); } else { crm_info("Refreshing port list for %s", dev->id); dev->targets = parse_host_list(output); dev->targets_age = now; } free(output); } if(string_in_list(dev->targets, alias)) { can = TRUE; } } else if(safe_str_eq(check_type, "status")) { int rc = 0; int exec_rc = 0; /* Run the status operation for the device/target combination * Will cause problems if the device doesn't return 2 for down'd nodes or * (for virtual nodes) if the device doesn't return 1 for guests that * have been moved to another host */ exec_rc = run_stonith_agent( dev->agent, "status", host, dev->params, dev->aliases, &rc, NULL, NULL); if(exec_rc != 0) { crm_err("Could not invoke %s: rc=%d", dev->id, exec_rc); } else if(rc == 1 /* unkown */) { crm_trace("Host %s is not known by %s", host, dev->id); } else if(rc == 0 /* active */ || rc == 2 /* inactive */) { can = TRUE; } else { crm_notice("Unkown result when testing if %s can fence %s: rc=%d", dev->id, host, rc); } } else { crm_err("Unknown check type: %s", check_type); } if(safe_str_eq(host, alias)) { crm_info("%s can%s fence %s: %s", dev->id, can?"":" not", host, check_type); } else { crm_info("%s can%s fence %s (aka. '%s'): %s", dev->id, can?"":" not", host, alias, check_type); } return can; } struct device_search_s { const char *host; GListPtr capable; }; static void search_devices( gpointer key, gpointer value, gpointer user_data) { stonith_device_t *dev = value; struct device_search_s *search = user_data; if(can_fence_host_with_device(dev, search->host)) { search->capable = g_list_append(search->capable, value); } } static int stonith_query(xmlNode *msg, xmlNode **list) { struct device_search_s search; int available_devices = 0; xmlNode *dev = get_xpath_object("//@"F_STONITH_TARGET, msg, LOG_DEBUG_3); search.host = NULL; search.capable = NULL; if(dev) { const char *device = crm_element_value(dev, F_STONITH_DEVICE); search.host = crm_element_value(dev, F_STONITH_TARGET); if(device && safe_str_eq(device, "manual_ack")) { /* No query necessary */ if(list) { *list = NULL; } return pcmk_ok; } } crm_log_xml_debug(msg, "Query"); g_hash_table_foreach(device_list, search_devices, &search); available_devices = g_list_length(search.capable); if(search.host) { crm_debug("Found %d matching devices for '%s'", available_devices, search.host); } else { crm_debug("%d devices installed", available_devices); } /* Pack the results into data */ if(list) { GListPtr lpc = NULL; *list = create_xml_node(NULL, __FUNCTION__); crm_xml_add(*list, F_STONITH_TARGET, search.host); crm_xml_add_int(*list, "st-available-devices", available_devices); for(lpc = search.capable; lpc != NULL; lpc = lpc->next) { stonith_device_t *device = (stonith_device_t*)lpc->data; dev = create_xml_node(*list, F_STONITH_DEVICE); crm_xml_add(dev, XML_ATTR_ID, device->id); crm_xml_add(dev, "namespace", device->namespace); crm_xml_add(dev, "agent", device->agent); if(search.host == NULL) { xmlNode *attrs = create_xml_node(dev, XML_TAG_ATTRS); g_hash_table_foreach(device->params, hash2field, attrs); } } } g_list_free(search.capable); return available_devices; } static void log_operation(async_command_t *cmd, int rc, int pid, const char *next, const char *output) { if(rc == 0) { next = NULL; } if(cmd->victim != NULL) { do_crm_log(rc==0?LOG_NOTICE:LOG_ERR, "Operation '%s' [%d] (call %d from %s) for host '%s' with device '%s' returned: %d%s%s", cmd->action, pid, cmd->id, cmd->client, cmd->victim, cmd->device, rc, next?". Trying: ":"", next?next:""); } else { do_crm_log_unlikely(rc==0?LOG_DEBUG:LOG_NOTICE, "Operation '%s' [%d] for device '%s' returned: %d%s%s", cmd->action, pid, cmd->device, rc, next?". Trying: ":"", next?next:""); } if(output) { /* Logging the whole string confuses syslog when the string is xml */ char *local_copy = strdup(output); int lpc = 0, last = 0, more = strlen(local_copy); for(lpc = 0; lpc < more; lpc++) { if(local_copy[lpc] == '\n' || local_copy[lpc] == 0) { local_copy[lpc] = 0; do_crm_log(rc==0?LOG_INFO:LOG_WARNING, "%s: %s", cmd->device, local_copy+last); last = lpc+1; } } crm_debug("%s: %s (total %d bytes)", cmd->device, local_copy+last, more); free(local_copy); } } #define READ_MAX 500 static void st_child_done(GPid pid, gint status, gpointer user_data) { int rc = -pcmk_err_generic; int len = 0; int more = 0; gboolean bcast = TRUE; char *output = NULL; xmlNode *data = NULL; xmlNode *reply = NULL; stonith_device_t *device = NULL; async_command_t *cmd = user_data; CRM_CHECK(cmd != NULL, return); if(cmd->timer_sigterm > 0) { g_source_remove(cmd->timer_sigterm); } if(cmd->timer_sigkill > 0) { g_source_remove(cmd->timer_sigkill); } if(WIFSIGNALED(status)) { int signo = WTERMSIG(status); if(signo == SIGTERM || signo == SIGKILL) { rc = -ETIME; } else { rc = -ECONNABORTED; } crm_notice("Child process %d performing action '%s' with '%s' terminated with signal %d", pid, cmd->action, cmd->device, signo); } else if(WIFEXITED(status)) { rc = WEXITSTATUS(status); crm_debug("Child process %d performing action '%s' with '%s' exited with rc %d", pid, cmd->action, cmd->device, rc); } active_children--; /* The device is ready to do something else now */ device = g_hash_table_lookup(device_list, cmd->device); if(device) { device->active_pid = 0; mainloop_set_trigger(device->work); } do { char buffer[READ_MAX]; errno = 0; if(cmd->stdout > 0) { memset(&buffer, 0, READ_MAX); more = read(cmd->stdout, buffer, READ_MAX-1); crm_trace("Got %d more bytes: %s", more, buffer); } if(more > 0) { output = realloc(output, len + more + 1); sprintf(output+len, "%s", buffer); len += more; } } while (more == (READ_MAX-1) || (more < 0 && errno == EINTR)); if(cmd->stdout) { close(cmd->stdout); cmd->stdout = 0; } crm_trace("Operation on %s completed with rc=%d (%d remaining)", cmd->device, rc, g_list_length(cmd->device_next)); if(rc != 0 && cmd->device_next) { stonith_device_t *dev = cmd->device_next->data; log_operation(cmd, rc, pid, dev->id, output); cmd->device_next = cmd->device_next->next; schedule_stonith_command(cmd, dev); goto done; } if(rc > 0) { rc = -pcmk_err_generic; } reply = stonith_construct_async_reply(cmd, output, data, rc); if(safe_str_eq(cmd->action, "metadata")) { /* Too verbose to log */ bcast = FALSE; free(output); output = NULL; crm_trace("Directed reply: %s op", cmd->action); } else if(crm_str_eq(cmd->action, "monitor", TRUE)) { crm_trace("Directed reply: %s op", cmd->action); bcast = FALSE; } else if(safe_str_eq(cmd->mode, "slave")) { crm_trace("Directed reply: Complex op with %s", cmd->device); bcast = FALSE; } log_operation(cmd, rc, pid, NULL, output); crm_log_xml_trace(reply, "Reply"); if(bcast && !stand_alone) { /* Send reply as T_STONITH_NOTIFY so everyone does notifications * Potentially limit to unsucessful operations to the originator? */ crm_trace("Broadcast reply"); crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY); send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE); } else if(cmd->origin) { crm_trace("Directed reply to %s", cmd->origin); send_cluster_message(cmd->origin, crm_msg_stonith_ng, reply, FALSE); } else { crm_trace("Directed local %ssync reply to %s", (cmd->options & st_opt_sync_call)?"":"a-", cmd->client); do_local_reply(reply, cmd->client, cmd->options & st_opt_sync_call, FALSE); } free_async_command(cmd); done: free(output); free_xml(reply); free_xml(data); } static gint sort_device_priority(gconstpointer a, gconstpointer b) { const stonith_device_t *dev_a = a; const stonith_device_t *dev_b = a; if(dev_a->priority > dev_b->priority) { return -1; } else if(dev_a->priority < dev_b->priority) { return 1; } return 0; } static int stonith_fence(xmlNode *msg) { int options = 0; const char *device_id = NULL; stonith_device_t *device = NULL; async_command_t *cmd = create_async_command(msg); xmlNode *dev = get_xpath_object("//@"F_STONITH_TARGET, msg, LOG_ERR); if(cmd == NULL) { return -EPROTO; } device_id = crm_element_value(dev, F_STONITH_DEVICE); if(device_id) { device = g_hash_table_lookup(device_list, device_id); if(device == NULL) { crm_err("Requested device '%s' is not available", device_id); } } else { struct device_search_s search; search.capable = NULL; search.host = crm_element_value(dev, F_STONITH_TARGET); crm_element_value_int(msg, F_STONITH_CALLOPTS, &options); if(options & st_opt_cs_nodeid) { int nodeid = crm_atoi(search.host, NULL); crm_node_t *node = crm_get_peer(nodeid, NULL); if(node) { search.host = node->uname; } } g_hash_table_foreach(device_list, search_devices, &search); crm_info("Found %d matching devices for '%s'", g_list_length(search.capable), search.host); if(g_list_length(search.capable) > 0) { /* Order based on priority */ search.capable = g_list_sort(search.capable, sort_device_priority); device = search.capable->data; if(g_list_length(search.capable) > 1) { cmd->device_list = search.capable; cmd->device_next = cmd->device_list->next; } else { g_list_free(search.capable); } } } if(device) { cmd->device = device->id; schedule_stonith_command(cmd, device); return -EINPROGRESS; } free_async_command(cmd); return -EHOSTUNREACH; } xmlNode *stonith_construct_reply(xmlNode *request, char *output, xmlNode *data, int rc) { int lpc = 0; xmlNode *reply = NULL; const char *name = NULL; const char *value = NULL; const char *names[] = { F_STONITH_OPERATION, F_STONITH_CALLID, F_STONITH_CLIENTID, F_STONITH_REMOTE, F_STONITH_CALLOPTS }; crm_trace("Creating a basic reply"); reply = create_xml_node(NULL, T_STONITH_REPLY); crm_xml_add(reply, "st_origin", __FUNCTION__); crm_xml_add(reply, F_TYPE, T_STONITH_NG); crm_xml_add(reply, "st_output", output); crm_xml_add_int(reply, F_STONITH_RC, rc); CRM_CHECK(request != NULL, crm_warn("Can't create a sane reply"); return reply); for(lpc = 0; lpc < DIMOF(names); lpc++) { name = names[lpc]; value = crm_element_value(request, name); crm_xml_add(reply, name, value); } if(data != NULL) { crm_trace("Attaching reply output"); add_message_xml(reply, F_STONITH_CALLDATA, data); } return reply; } xmlNode *stonith_construct_async_reply(async_command_t *cmd, char *output, xmlNode *data, int rc) { xmlNode *reply = NULL; crm_trace("Creating a basic reply"); reply = create_xml_node(NULL, T_STONITH_REPLY); crm_xml_add(reply, "st_origin", __FUNCTION__); crm_xml_add(reply, F_TYPE, T_STONITH_NG); crm_xml_add(reply, F_STONITH_OPERATION, cmd->op); crm_xml_add(reply, F_STONITH_DEVICE, cmd->device); crm_xml_add(reply, F_STONITH_REMOTE, cmd->remote); crm_xml_add(reply, F_STONITH_CLIENTID, cmd->client); crm_xml_add(reply, F_STONITH_TARGET, cmd->victim); crm_xml_add(reply, F_STONITH_ACTION, cmd->op); crm_xml_add_int(reply, F_STONITH_CALLID, cmd->id); crm_xml_add_int(reply, F_STONITH_CALLOPTS, cmd->options); crm_xml_add_int(reply, F_STONITH_RC, rc); crm_xml_add(reply, "st_output", output); if(data != NULL) { crm_info("Attaching reply output"); add_message_xml(reply, F_STONITH_CALLDATA, data); } return reply; } void stonith_command(stonith_client_t *client, xmlNode *request, const char *remote) { int call_options = 0; int rc = -EOPNOTSUPP; gboolean is_reply = FALSE; gboolean always_reply = FALSE; xmlNode *reply = NULL; xmlNode *data = NULL; char *output = NULL; const char *op = crm_element_value(request, F_STONITH_OPERATION); const char *client_id = crm_element_value(request, F_STONITH_CLIENTID); crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); if(get_xpath_object("//"T_STONITH_REPLY, request, LOG_DEBUG_3)) { is_reply = TRUE; } crm_debug("Processing %s%s from %s (%16x)", op, is_reply?" reply":"", client?client->name:remote, call_options); if(crm_str_eq(op, CRM_OP_REGISTER, TRUE)) { xmlNode *reply = create_xml_node(NULL, "reply"); crm_xml_add(reply, F_STONITH_OPERATION, CRM_OP_REGISTER); crm_xml_add(reply, F_STONITH_CLIENTID, client->id); crm_ipcs_send(client->channel, reply, FALSE); free_xml(reply); return; } else if(crm_str_eq(op, STONITH_OP_EXEC, TRUE)) { rc = stonith_device_action(request, &output); } else if(is_reply && crm_str_eq(op, STONITH_OP_QUERY, TRUE)) { process_remote_stonith_query(request); return; } else if(crm_str_eq(op, STONITH_OP_QUERY, TRUE)) { create_remote_stonith_op(client_id, request, TRUE); /* Record it for the future notification */ rc = stonith_query(request, &data); always_reply = TRUE; if(!data) { return; } } else if(is_reply && crm_str_eq(op, T_STONITH_NOTIFY, TRUE)) { process_remote_stonith_exec(request); return; } else if(is_reply && crm_str_eq(op, STONITH_OP_FENCE, TRUE)) { /* Reply to a complex fencing op */ process_remote_stonith_exec(request); return; } else if(crm_str_eq(op, T_STONITH_NOTIFY, TRUE)) { const char *flag_name = NULL; flag_name = crm_element_value(request, F_STONITH_NOTIFY_ACTIVATE); if(flag_name) { crm_debug("Setting %s callbacks for %s (%s): ON", flag_name, client->name, client->id); client->flags |= get_stonith_flag(flag_name); } flag_name = crm_element_value(request, F_STONITH_NOTIFY_DEACTIVATE); if(flag_name) { crm_debug("Setting %s callbacks for %s (%s): off", flag_name, client->name, client->id); client->flags |= get_stonith_flag(flag_name); } crm_ipcs_send_ack(client->channel, "ack", __FUNCTION__, __LINE__); return; /* } else if(is_reply && crm_str_eq(op, STONITH_OP_FENCE, TRUE)) { */ /* process_remote_stonith_exec(request); */ /* return; */ } else if(is_reply == FALSE && crm_str_eq(op, STONITH_OP_RELAY, TRUE)) { if(initiate_remote_stonith_op(NULL, request, FALSE) != NULL) { rc = -EINPROGRESS; } } else if(is_reply == FALSE && crm_str_eq(op, STONITH_OP_FENCE, TRUE)) { if(remote || stand_alone) { rc = stonith_fence(request); } else if(call_options & st_opt_manual_ack) { remote_fencing_op_t *rop = initiate_remote_stonith_op(client, request, TRUE); rc = stonith_manual_ack(request, rop); } else { const char *alternate_host = NULL; xmlNode *dev = get_xpath_object("//@"F_STONITH_TARGET, request, LOG_TRACE); const char *target = crm_element_value_copy(dev, F_STONITH_TARGET); if((call_options & st_opt_sync_call) == 0) { crm_ipcs_send_ack(client->channel, "ack", __FUNCTION__, __LINE__); } if(g_hash_table_lookup(topology, target) && safe_str_eq(target, stonith_our_uname)) { GHashTableIter gIter; crm_node_t *entry = NULL; int membership = crm_proc_plugin | crm_proc_heartbeat | crm_proc_cpg; g_hash_table_iter_init(&gIter, crm_peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { crm_trace("Checking for %s.%d != %s", entry->uname, entry->id, target); if(entry->uname && (entry->processes & membership) && safe_str_neq(entry->uname, target)) { alternate_host = entry->uname; break; } } if(alternate_host == NULL) { crm_err("No alternate host available to handle complex self fencing request"); } } if(alternate_host) { crm_notice("Forwarding complex self fencing request to peer %s", alternate_host); crm_xml_add(request, F_STONITH_OPERATION, STONITH_OP_RELAY); crm_xml_add(request, F_STONITH_CLIENTID, client->id); send_cluster_message(alternate_host, crm_msg_stonith_ng, request, FALSE); rc = -EINPROGRESS; } else if(initiate_remote_stonith_op(client, request, FALSE) != NULL) { rc = -EINPROGRESS; } } } else if (crm_str_eq(op, STONITH_OP_FENCE_HISTORY, TRUE)) { rc = stonith_fence_history(request, &data); always_reply = TRUE; } else if(crm_str_eq(op, CRM_OP_REGISTER, TRUE)) { return; } else if(crm_str_eq(op, STONITH_OP_DEVICE_ADD, TRUE)) { rc = stonith_device_register(request); do_stonith_notify(call_options, op, rc, request, NULL); } else if(crm_str_eq(op, STONITH_OP_DEVICE_DEL, TRUE)) { rc = stonith_device_remove(request); do_stonith_notify(call_options, op, rc, request, NULL); } else if(crm_str_eq(op, STONITH_OP_LEVEL_ADD, TRUE)) { rc = stonith_level_register(request); do_stonith_notify(call_options, op, rc, request, NULL); } else if(crm_str_eq(op, STONITH_OP_LEVEL_DEL, TRUE)) { rc = stonith_level_remove(request); do_stonith_notify(call_options, op, rc, request, NULL); } else if(crm_str_eq(op, STONITH_OP_CONFIRM, TRUE)) { async_command_t *cmd = create_async_command(request); xmlNode *reply = stonith_construct_async_reply(cmd, NULL, NULL, 0); crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY); crm_notice("Broadcasting manual fencing confirmation for node %s", cmd->victim); send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE); free_async_command(cmd); free_xml(reply); } else { crm_err("Unknown %s%s from %s", op, is_reply?" reply":"", client?client->name:remote); crm_log_xml_warn(request, "UnknownOp"); } do_crm_log_unlikely(rc>0?LOG_DEBUG:LOG_INFO,"Processed %s%s from %s: %s (%d)", op, is_reply?" reply":"", client?client->name:remote, rc>0?"":pcmk_strerror(rc), rc); if(is_reply || rc == -EINPROGRESS) { /* Nothing (yet) */ } else if(remote) { reply = stonith_construct_reply(request, output, data, rc); send_cluster_message(remote, crm_msg_stonith_ng, reply, FALSE); free_xml(reply); } else if(rc <= pcmk_ok || always_reply) { reply = stonith_construct_reply(request, output, data, rc); do_local_reply(reply, client_id, call_options & st_opt_sync_call, remote!=NULL); free_xml(reply); } free(output); free_xml(data); } diff --git a/fencing/main.c b/fencing/main.c index b6975ce283..22e783f620 100644 --- a/fencing/main.c +++ b/fencing/main.c @@ -1,840 +1,840 @@ /* * Copyright (C) 2009 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include -#include +#include #include #include #include #include #include #include #include char *stonith_our_uname = NULL; GMainLoop *mainloop = NULL; GHashTable *client_list = NULL; gboolean stand_alone = FALSE; gboolean stonith_shutdown_flag = FALSE; qb_ipcs_service_t *ipcs = NULL; #if SUPPORT_HEARTBEAT ll_cluster_t *hb_conn = NULL; #endif static void stonith_shutdown(int nsig); static void stonith_cleanup(void); static int32_t st_ipc_accept(qb_ipcs_connection_t *c, uid_t uid, gid_t gid) { crm_trace("Connecting %p for uid=%d gid=%d", c, uid, gid); if(stonith_shutdown_flag) { crm_info("Ignoring new client [%d] during shutdown", crm_ipcs_client_pid(c)); return -EPERM; } return 0; } static void st_ipc_created(qb_ipcs_connection_t *c) { stonith_client_t *new_client = NULL; #if 0 struct qb_ipcs_stats srv_stats; qb_ipcs_stats_get(s1, &srv_stats, QB_FALSE); qb_log(LOG_INFO, "Connection created (active:%d, closed:%d)", srv_stats.active_connections, srv_stats.closed_connections); #endif new_client = calloc(1, sizeof(stonith_client_t)); new_client->channel = c; new_client->channel_name = strdup("ipc"); CRM_CHECK(new_client->id == NULL, free(new_client->id)); new_client->id = crm_generate_uuid(); crm_trace("Created channel %p for client %s", c, new_client->id); /* make sure we can find ourselves later for sync calls * redirected to the master instance */ g_hash_table_insert(client_list, new_client->id, new_client); qb_ipcs_context_set(c, new_client); CRM_ASSERT(qb_ipcs_context_get(c) != NULL); } /* Exit code means? */ static int32_t st_ipc_dispatch(qb_ipcs_connection_t *c, void *data, size_t size) { xmlNode *request = NULL; stonith_client_t *client = (stonith_client_t*)qb_ipcs_context_get(c); request = crm_ipcs_recv(c, data, size); if (request == NULL) { return 0; } CRM_CHECK(client != NULL, goto cleanup); if(client->name == NULL) { const char *value = crm_element_value(request, F_STONITH_CLIENTNAME); if(value == NULL) { client->name = crm_itoa(crm_ipcs_client_pid(c)); } else { client->name = strdup(value); } } CRM_CHECK(client->id != NULL, crm_err("Invalid client: %p/%s", client, client->name); goto cleanup); crm_xml_add(request, F_STONITH_CLIENTID, client->id); crm_xml_add(request, F_STONITH_CLIENTNAME, client->name); crm_log_xml_trace(request, "Client[inbound]"); stonith_command(client, request, NULL); cleanup: if(client == NULL || client->id == NULL) { crm_log_xml_notice(request, "Invalid client"); } free_xml(request); return 0; } /* Error code means? */ static int32_t st_ipc_closed(qb_ipcs_connection_t *c) { stonith_client_t *client = (stonith_client_t*)qb_ipcs_context_get(c); #if 0 qb_ipcs_stats_get(s1, &srv_stats, QB_FALSE); qb_ipcs_connection_stats_get(c, &stats, QB_FALSE); qb_log(LOG_INFO, "Connection to pid:%d destroyed (active:%d, closed:%d)", stats.client_pid, srv_stats.active_connections, srv_stats.closed_connections); qb_log(LOG_DEBUG, " Requests %"PRIu64"", stats.requests); qb_log(LOG_DEBUG, " Responses %"PRIu64"", stats.responses); qb_log(LOG_DEBUG, " Events %"PRIu64"", stats.events); qb_log(LOG_DEBUG, " Send retries %"PRIu64"", stats.send_retries); qb_log(LOG_DEBUG, " Recv retries %"PRIu64"", stats.recv_retries); qb_log(LOG_DEBUG, " FC state %d", stats.flow_control_state); qb_log(LOG_DEBUG, " FC count %"PRIu64"", stats.flow_control_count); #endif if (client == NULL) { crm_err("No client"); return 0; } crm_trace("Cleaning up after client disconnect: %p/%s/%s", client, crm_str(client->name), client->id); if(client->id != NULL) { g_hash_table_remove(client_list, client->id); } /* 0 means: yes, go ahead and destroy the connection */ return 0; } static void st_ipc_destroy(qb_ipcs_connection_t *c) { stonith_client_t *client = (stonith_client_t*)qb_ipcs_context_get(c); /* Make sure the connection is fully cleaned up */ st_ipc_closed(c); if(client == NULL) { crm_trace("Nothing to destroy"); return; } crm_trace("Destroying %s (%p)", client->name, client); free(client->name); free(client->id); free(client); crm_trace("Done"); return; } static void stonith_peer_callback(xmlNode * msg, void* private_data) { const char *remote = crm_element_value(msg, F_ORIG); crm_log_xml_trace(msg, "Peer[inbound]"); stonith_command(NULL, msg, remote); } #if SUPPORT_HEARTBEAT static void stonith_peer_hb_callback(HA_Message * msg, void* private_data) { xmlNode *xml = convert_ha_message(NULL, msg, __FUNCTION__); stonith_peer_callback(xml, private_data); free_xml(xml); } static void stonith_peer_hb_destroy(gpointer user_data) { if(stonith_shutdown_flag) { crm_info("Heartbeat disconnection complete... exiting"); } else { crm_err("Heartbeat connection lost! Exiting."); } stonith_shutdown(0); } #endif #if SUPPORT_COROSYNC static gboolean stonith_peer_ais_callback( AIS_Message *wrapper, char *data, int sender) { xmlNode *xml = NULL; if(wrapper->header.id == crm_class_cluster) { xml = string2xml(data); if(xml == NULL) { goto bail; } crm_xml_add(xml, F_ORIG, wrapper->sender.uname); crm_xml_add_int(xml, F_SEQ, wrapper->id); stonith_peer_callback(xml, NULL); } free_xml(xml); return TRUE; bail: crm_err("Invalid XML: '%.120s'", data); return TRUE; } static void stonith_peer_ais_destroy(gpointer user_data) { crm_err("AIS connection terminated"); stonith_shutdown(0); } #endif void do_local_reply(xmlNode *notify_src, const char *client_id, gboolean sync_reply, gboolean from_peer) { /* send callback to originating child */ stonith_client_t *client_obj = NULL; int local_rc = pcmk_ok; crm_trace("Sending response"); if(client_id != NULL) { client_obj = g_hash_table_lookup(client_list, client_id); } else { crm_trace("No client to sent the response to." " F_STONITH_CLIENTID not set."); } crm_trace("Sending callback to request originator"); if(client_obj == NULL) { local_rc = -1; } else { crm_trace("Sending %ssync response to %s %s", sync_reply?"":"an a-", client_obj->name, from_peer?"(originator of delegated request)":""); local_rc = crm_ipcs_send(client_obj->channel, notify_src, !sync_reply); } if(local_rc < pcmk_ok && client_obj != NULL) { crm_warn("%sSync reply to %s failed: %s", sync_reply?"":"A-", client_obj?client_obj->name:"", pcmk_strerror(local_rc)); } } long long get_stonith_flag(const char *name) { if(safe_str_eq(name, STONITH_OP_FENCE)) { return 0x01; } else if(safe_str_eq(name, STONITH_OP_DEVICE_ADD)) { return 0x04; } else if(safe_str_eq(name, STONITH_OP_DEVICE_DEL)) { return 0x10; } return 0; } static void stonith_notify_client(gpointer key, gpointer value, gpointer user_data) { xmlNode *update_msg = user_data; stonith_client_t *client = value; const char *type = NULL; CRM_CHECK(client != NULL, return); CRM_CHECK(update_msg != NULL, return); type = crm_element_value(update_msg, F_SUBTYPE); CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return); if(client->channel == NULL) { crm_trace("Skipping client with NULL channel"); return; } else if(client->name == NULL) { crm_trace("Skipping unnammed client / comamnd channel"); return; } if(client->flags & get_stonith_flag(type)) { crm_trace("Sending %s-notification to client %s/%s", type, client->name, client->id); if(crm_ipcs_send(client->channel, update_msg, ipcs_send_event|ipcs_send_error) <= 0) { crm_warn("%s-Notification of client %s/%s failed", type, client->name, client->id); } } } void do_stonith_notify( int options, const char *type, int result, xmlNode *data, const char *remote) { /* TODO: Standardize the contents of data */ xmlNode *update_msg = create_xml_node(NULL, "notify"); CRM_CHECK(type != NULL, ;); crm_xml_add(update_msg, F_TYPE, T_STONITH_NOTIFY); crm_xml_add(update_msg, F_SUBTYPE, type); crm_xml_add(update_msg, F_STONITH_OPERATION, type); crm_xml_add_int(update_msg, F_STONITH_RC, result); if(data != NULL) { add_message_xml(update_msg, F_STONITH_CALLDATA, data); } crm_trace("Notifying clients"); g_hash_table_foreach(client_list, stonith_notify_client, update_msg); free_xml(update_msg); crm_trace("Notify complete"); } static stonith_key_value_t *parse_device_list(const char *devices) { int lpc = 0; int max = 0; int last = 0; stonith_key_value_t *output = NULL; if(devices == NULL) { return output; } max = strlen(devices); for(lpc = 0; lpc <= max; lpc++) { if(devices[lpc] == ',' || devices[lpc] == 0) { char *line = NULL; line = calloc(1, 2 + lpc - last); snprintf(line, 1 + lpc - last, "%s", devices+last); output = stonith_key_value_add(output, NULL, line); free(line); last = lpc + 1; } } return output; } static void topology_remove_helper(const char *node, int level) { xmlNode *data = create_xml_node(NULL, F_STONITH_LEVEL); crm_xml_add(data, "origin", __FUNCTION__); crm_xml_add_int(data, XML_ATTR_ID, level); crm_xml_add(data, F_STONITH_TARGET, node); stonith_level_remove(data); free_xml(data); } static void topology_register_helper(const char *node, int level, stonith_key_value_t *device_list) { xmlNode *data = create_level_registration_xml(node, level, device_list); stonith_level_register(data); free_xml(data); } static void remove_fencing_topology(xmlXPathObjectPtr xpathObj) { int max = 0, lpc = 0; if(xpathObj && xpathObj->nodesetval) { max = xpathObj->nodesetval->nodeNr; } for(lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); CRM_CHECK(match != NULL, continue); if(crm_element_value(match, XML_DIFF_MARKER)) { /* Deletion */ int index = 0; const char *target = crm_element_value(match, XML_ATTR_STONITH_TARGET); crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index); if(target == NULL) { crm_err("Invalid fencing target in element %s", ID(match)); } else if(index <= 0) { crm_err("Invalid level for %s in element %s", target, ID(match)); } else { topology_remove_helper(target, index); } /* } else { Deal with modifications during the 'addition' stage */ } } } static void register_fencing_topology(xmlXPathObjectPtr xpathObj, gboolean force) { int max = 0, lpc = 0; if(xpathObj && xpathObj->nodesetval) { max = xpathObj->nodesetval->nodeNr; } for(lpc = 0; lpc < max; lpc++) { int index = 0; const char *target; const char *dev_list; stonith_key_value_t *devices = NULL; xmlNode *match = getXpathResult(xpathObj, lpc); CRM_CHECK(match != NULL, continue); crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index); target = crm_element_value(match, XML_ATTR_STONITH_TARGET); dev_list = crm_element_value(match, XML_ATTR_STONITH_DEVICES); devices = parse_device_list(dev_list); crm_trace("Updating %s[%d] (%s) to %s", target, index, ID(match), dev_list); if(target == NULL) { crm_err("Invalid fencing target in element %s", ID(match)); } else if(index <= 0) { crm_err("Invalid level for %s in element %s", target, ID(match)); } else if(force == FALSE && crm_element_value(match, XML_DIFF_MARKER)) { /* Addition */ topology_register_helper(target, index, devices); } else { /* Modification */ /* Remove then re-add */ topology_remove_helper(target, index); topology_register_helper(target, index, devices); } stonith_key_value_freeall(devices, 1, 1); } } /* Fencing */ static void fencing_topology_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { xmlXPathObjectPtr xpathObj = NULL; const char *xpath = "//" XML_TAG_FENCING_LEVEL; crm_trace("Pushing in stonith topology"); /* Grab everything */ xpathObj = xpath_search(msg, xpath); register_fencing_topology(xpathObj, TRUE); if(xpathObj) { xmlXPathFreeObject(xpathObj); } } static void update_fencing_topology(const char *event, xmlNode * msg) { const char *xpath; xmlXPathObjectPtr xpathObj = NULL; /* Process deletions (only) */ xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_TAG_FENCING_LEVEL; xpathObj = xpath_search(msg, xpath); remove_fencing_topology(xpathObj); if(xpathObj) { xmlXPathFreeObject(xpathObj); } /* Process additions and changes */ xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_TAG_FENCING_LEVEL; xpathObj = xpath_search(msg, xpath); register_fencing_topology(xpathObj, FALSE); if(xpathObj) { xmlXPathFreeObject(xpathObj); } } static void stonith_shutdown(int nsig) { stonith_shutdown_flag = TRUE; crm_info("Terminating with %d clients", g_hash_table_size(client_list)); if(mainloop != NULL && g_main_is_running(mainloop)) { g_main_quit(mainloop); } else { stonith_cleanup(); exit(EX_OK); } } cib_t *cib = NULL; static void stonith_cleanup(void) { if(cib) { cib->cmds->signoff(cib); } qb_ipcs_destroy(ipcs); crm_peer_destroy(); g_hash_table_destroy(client_list); free(stonith_our_uname); #if HAVE_LIBXML2 crm_xml_cleanup(); #endif } /* *INDENT-OFF* */ static struct crm_option long_options[] = { {"stand-alone", 0, 0, 's'}, {"verbose", 0, 0, 'V'}, {"version", 0, 0, '$'}, {"help", 0, 0, '?'}, {0, 0, 0, 0} }; /* *INDENT-ON* */ static void setup_cib(void) { static void *cib_library = NULL; static cib_t *(*cib_new_fn)(void) = NULL; static const char *(*cib_err_fn)(int) = NULL; int rc, retries = 0; if(cib_library == NULL) { cib_library = dlopen(CIB_LIBRARY, RTLD_LAZY); } if(cib_library && cib_new_fn == NULL) { cib_new_fn = dlsym(cib_library, "cib_new"); } if(cib_library && cib_err_fn == NULL) { cib_err_fn = dlsym(cib_library, "pcmk_strerror"); } if(cib_new_fn != NULL) { cib = (*cib_new_fn)(); } if(cib == NULL) { crm_err("No connection to the CIB"); return; } do { sleep(retries); rc = cib->cmds->signon(cib, CRM_SYSTEM_CRMD, cib_command); } while(rc == -ENOTCONN && ++retries < 5); if (rc != pcmk_ok) { crm_err("Could not connect to the CIB service: %s", (*cib_err_fn)(rc)); } else if (pcmk_ok != cib->cmds->add_notify_callback( cib, T_CIB_DIFF_NOTIFY, update_fencing_topology)) { crm_err("Could not set CIB notification callback"); } else { rc = cib->cmds->query(cib, NULL, NULL, cib_scope_local); add_cib_op_callback(cib, rc, FALSE, NULL, fencing_topology_callback); crm_notice("Watching for stonith topology changes"); } } struct qb_ipcs_service_handlers ipc_callbacks = { .connection_accept = st_ipc_accept, .connection_created = st_ipc_created, .msg_process = st_ipc_dispatch, .connection_closed = st_ipc_closed, .connection_destroyed = st_ipc_destroy }; int main(int argc, char ** argv) { int flag; int rc = 0; int lpc = 0; int argerr = 0; int option_index = 0; const char *actions[] = { "reboot", "poweroff", "list", "monitor", "status" }; crm_log_init("stonith-ng", LOG_INFO, TRUE, FALSE, argc, argv, FALSE); crm_set_options(NULL, "mode [options]", long_options, "Provides a summary of cluster's current state." "\n\nOutputs varying levels of detail in a number of different formats.\n"); while (1) { flag = crm_get_option(argc, argv, &option_index); if (flag == -1) break; switch(flag) { case 'V': crm_bump_log_level(); break; case 's': stand_alone = TRUE; break; case '$': case '?': crm_help(flag, EX_OK); break; default: ++argerr; break; } } if(argc - optind == 1 && safe_str_eq("metadata", argv[optind])) { printf("\n"); printf("\n"); printf(" 1.0\n"); printf(" This is a fake resource that details the instance attributes handled by stonithd.\n"); printf(" Options available for all stonith resources\n"); printf(" \n"); printf(" \n"); printf(" How long to wait for the STONITH action to complete.\n"); printf(" Overrides the stonith-timeout cluster property\n"); printf(" \n"); printf(" \n"); printf(" \n"); printf(" The priority of the stonith resource. The lower the number, the higher the priority.\n"); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_HOSTARG); printf(" Advanced use only: An alternate parameter to supply instead of 'port'\n"); printf(" Some devices do not support the standard 'port' parameter or may provide additional ones.\n" "Use this to specify an alternate, device-specific, parameter that should indicate the machine to be fenced.\n" "A value of 'none' can be used to tell the cluster not to supply any additional parameters.\n" " \n"); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_HOSTMAP); printf(" A mapping of host names to ports numbers for devices that do not support host names.\n"); printf(" Eg. node1:1;node2:2,3 would tell the cluster to use port 1 for node1 and ports 2 and 3 for node2\n"); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_HOSTLIST); printf(" A list of machines controlled by this device (Optional unless %s=static-list).\n", STONITH_ATTR_HOSTCHECK); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_HOSTCHECK); printf(" How to determin which machines are controlled by the device.\n"); printf(" Allowed values: dynamic-list (query the device), static-list (check the %s attribute), none (assume every device can fence every machine)\n", STONITH_ATTR_HOSTLIST); printf(" \n"); printf(" \n"); for(lpc = 0; lpc < DIMOF(actions); lpc++) { printf(" \n", actions[lpc]); printf(" Advanced use only: An alternate command to run instead of '%s'\n", actions[lpc]); printf(" Some devices do not support the standard commands or may provide additional ones.\n" "Use this to specify an alternate, device-specific, command that implements the '%s' action.\n", actions[lpc]); printf(" \n", actions[lpc]); printf(" \n"); } printf(" \n"); printf("\n"); return 0; } if (optind != argc) { ++argerr; } if (argerr) { crm_help('?', EX_USAGE); } mainloop_add_signal(SIGTERM, stonith_shutdown); crm_peer_init(); client_list = g_hash_table_new(crm_str_hash, g_str_equal); if(stand_alone == FALSE) { void *dispatch = NULL; void *destroy = NULL; #if SUPPORT_HEARTBEAT dispatch = stonith_peer_hb_callback; destroy = stonith_peer_hb_destroy; #endif if(is_openais_cluster()) { #if SUPPORT_COROSYNC destroy = stonith_peer_ais_destroy; dispatch = stonith_peer_ais_callback; #endif } if(crm_cluster_connect(&stonith_our_uname, NULL, dispatch, destroy, #if SUPPORT_HEARTBEAT &hb_conn #else NULL #endif ) == FALSE){ crm_crit("Cannot sign in to the cluster... terminating"); exit(100); } setup_cib(); } else { stonith_our_uname = strdup("localhost"); } device_list = g_hash_table_new_full( crm_str_hash, g_str_equal, NULL, free_device); topology = g_hash_table_new_full( crm_str_hash, g_str_equal, NULL, free_topology_entry); ipcs = mainloop_add_ipc_server("stonith-ng", QB_IPC_NATIVE, &ipc_callbacks); #if SUPPORT_STONITH_CONFIG if (((stand_alone == TRUE)) && !(standalone_cfg_read_file(STONITH_NG_CONF_FILE))) { standalone_cfg_commit(); } #endif if(ipcs != NULL) { /* Create the mainloop and run it... */ mainloop = g_main_new(FALSE); crm_info("Starting %s mainloop", crm_system_name); g_main_run(mainloop); } else { crm_err("Couldnt start all communication channels, exiting."); } stonith_cleanup(); #if SUPPORT_HEARTBEAT if(hb_conn) { hb_conn->llc_ops->delete(hb_conn); } #endif crm_info("Done"); qb_log_fini(); return rc; } diff --git a/fencing/remote.c b/fencing/remote.c index f98d399b98..9400610ee2 100644 --- a/fencing/remote.c +++ b/fencing/remote.c @@ -1,676 +1,676 @@ /* * Copyright (C) 2009 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include +#include #include #include #include #include #include typedef struct st_query_result_s { char *host; int devices; GListPtr device_list; } st_query_result_t; GHashTable *remote_op_list = NULL; void call_remote_stonith(remote_fencing_op_t *op, st_query_result_t *peer); extern xmlNode *stonith_create_op( int call_id, const char *token, const char *op, xmlNode *data, int call_options); static void free_remote_query(gpointer data) { if(data) { st_query_result_t *query = data; crm_trace("Free'ing query result from %s", query->host); free(query->host); free(query); } } static void free_remote_op(gpointer data) { remote_fencing_op_t *op = data; crm_trace("Free'ing op %s for %s", op->id, op->target); crm_log_xml_debug(op->request, "Destroying"); free(op->id); free(op->action); free(op->target); free(op->client_id); free(op->originator); if(op->query_timer) { g_source_remove(op->query_timer); } if(op->op_timer) { g_source_remove(op->op_timer); } if(op->query_results) { g_list_free_full(op->query_results, free_remote_query); } if(op->request) { free_xml(op->request); op->request = NULL; } free(op); } static void remote_op_done(remote_fencing_op_t *op, xmlNode *data, int rc) { xmlNode *reply = NULL; xmlNode *local_data = NULL; xmlNode *notify_data = NULL; op->completed = time(NULL); if(op->query_timer) { g_source_remove(op->query_timer); op->query_timer = 0; } if(op->op_timer) { g_source_remove(op->op_timer); op->op_timer = 0; } if(data == NULL) { data = create_xml_node(NULL, "remote-op"); local_data = data; } else { op->delegate = crm_element_value_copy(data, F_ORIG); } crm_xml_add_int(data, "state", op->state); crm_xml_add(data, F_STONITH_TARGET, op->target); crm_xml_add(data, F_STONITH_OPERATION, op->action); if(op->request != NULL) { reply = stonith_construct_reply(op->request, NULL, data, rc); crm_xml_add(reply, F_STONITH_DELEGATE, op->delegate); do_crm_log(rc==pcmk_ok?LOG_NOTICE:LOG_ERR, "Operation %s of %s by %s for %s[%s]: %s", op->action, op->target, op->delegate?op->delegate:"", op->originator, op->client_id, pcmk_strerror(rc)); } else { crm_err("Already sent notifications for '%s of %s by %s' (op=%s, for=%s, state=%d): %s", op->action, op->target, op->delegate, op->id, op->client_id, op->state, pcmk_strerror(rc)); return; } if(reply) { do_local_reply(reply, op->client_id, op->call_options & st_opt_sync_call, FALSE); } /* Do notification with a clean data object */ notify_data = create_xml_node(NULL, "st-data"); crm_xml_add_int(notify_data, "state", op->state); crm_xml_add_int(notify_data, F_STONITH_RC, rc); crm_xml_add(notify_data, F_STONITH_TARGET, op->target); crm_xml_add(notify_data, F_STONITH_OPERATION, op->action); crm_xml_add(notify_data, F_STONITH_DELEGATE, op->delegate); crm_xml_add(notify_data, F_STONITH_REMOTE, op->id); crm_xml_add(notify_data, F_STONITH_ORIGIN, op->originator); do_stonith_notify(0, STONITH_OP_FENCE, rc, notify_data, NULL); free_xml(notify_data); free_xml(local_data); free_xml(reply); /* Free non-essential parts of the record * Keep the record around so we can query the history */ if(op->query_results) { g_list_free_full(op->query_results, free_remote_query); op->query_results = NULL; } if(op->request) { free_xml(op->request); op->request = NULL; } } static gboolean remote_op_timeout(gpointer userdata) { remote_fencing_op_t *op = userdata; op->query_timer = 0; if(op->state == st_done) { crm_debug("Action %s (%s) for %s already completed", op->action, op->id, op->target); return FALSE; } crm_debug("Action %s (%s) for %s timed out", op->action, op->id, op->target); remote_op_done(op, NULL, -ETIME); op->state = st_failed; return FALSE; } static gboolean remote_op_query_timeout(gpointer data) { remote_fencing_op_t *op = data; op->query_timer = 0; if(op->state == st_done) { crm_debug("Operation %s for %s already completed", op->id, op->target); } else if(op->state == st_exec) { crm_debug("Operation %s for %s already in progress", op->id, op->target); } else if(op->query_results) { crm_debug("Query %s for %s complete: %d", op->id, op->target, op->state); call_remote_stonith(op, NULL); } else { if(op->op_timer) { g_source_remove(op->op_timer); op->op_timer = 0; } remote_op_timeout(op); } return FALSE; } static int stonith_topology_next(remote_fencing_op_t *op) { stonith_topology_t *tp = NULL; if(op->target) { /* Queries don't have a target set */ tp = g_hash_table_lookup(topology, op->target); } if(tp == NULL) { return pcmk_ok; } set_bit(op->call_options, st_opt_topology); do { op->level++; } while(op->level < ST_LEVEL_MAX && tp->levels[op->level] == NULL); if(op->level < ST_LEVEL_MAX) { crm_trace("Attempting fencing level %d for %s (%d devices)", op->level, op->target, g_list_length(tp->levels[op->level])); op->devices = tp->levels[op->level]; return pcmk_ok; } crm_notice("All fencing options for %s failed", op->target); return -EINVAL; } void *create_remote_stonith_op(const char *client, xmlNode *request, gboolean peer) { remote_fencing_op_t *op = NULL; xmlNode *dev = get_xpath_object("//@"F_STONITH_TARGET, request, LOG_TRACE); if(remote_op_list == NULL) { remote_op_list = g_hash_table_new_full( crm_str_hash, g_str_equal, NULL, free_remote_op); } if(peer && dev) { const char *peer_id = crm_element_value(dev, F_STONITH_REMOTE); CRM_CHECK(peer_id != NULL, return NULL); op = g_hash_table_lookup(remote_op_list, peer_id); if(op) { crm_debug("%s already exists", peer_id); return op; } } op = calloc(1, sizeof(remote_fencing_op_t)); crm_element_value_int(request, F_STONITH_TIMEOUT, (int*)&(op->base_timeout)); if(peer && dev) { op->id = crm_element_value_copy(dev, F_STONITH_REMOTE); crm_trace("Recorded new stonith op: %s", op->id); } else { op->id = crm_generate_uuid(); crm_trace("Generated new stonith op: %s", op->id); } g_hash_table_replace(remote_op_list, op->id, op); CRM_LOG_ASSERT(g_hash_table_lookup(remote_op_list, op->id) != NULL); op->state = st_query; op->action = crm_element_value_copy(dev, F_STONITH_ACTION); op->originator = crm_element_value_copy(dev, F_STONITH_OWNER); if(op->originator == NULL) { /* Local request */ op->originator = strdup(stonith_our_uname); } if(client) { op->client_id = strdup(client); } op->target = crm_element_value_copy(dev, F_STONITH_TARGET); op->request = copy_xml(request); /* TODO: Figure out how to avoid this */ crm_element_value_int(request, F_STONITH_CALLOPTS, (int*)&(op->call_options)); if(op->call_options & st_opt_cs_nodeid) { int nodeid = crm_atoi(op->target, NULL); crm_node_t *node = crm_get_peer(nodeid, NULL); /* Ensure the conversion only happens once */ op->call_options &= ~st_opt_cs_nodeid; if(node) { free(op->target); op->target = strdup(node->uname); } } if(stonith_topology_next(op) != pcmk_ok) { op->state = st_failed; } return op; } remote_fencing_op_t *initiate_remote_stonith_op(stonith_client_t *client, xmlNode *request, gboolean manual_ack) { xmlNode *query = NULL; const char *client_id = NULL; remote_fencing_op_t *op = NULL; if(client) { client_id = client->id; } else { client_id = crm_element_value(request, F_STONITH_CLIENTID); } CRM_LOG_ASSERT(client_id != NULL); op = create_remote_stonith_op(client_id, request, FALSE); query = stonith_create_op(0, op->id, STONITH_OP_QUERY, NULL, 0); if(!manual_ack) { op->op_timer = g_timeout_add(1200*op->base_timeout, remote_op_timeout, op); op->query_timer = g_timeout_add(100*op->base_timeout, remote_op_query_timeout, op); } else { crm_xml_add(query, F_STONITH_DEVICE, "manual_ack"); } crm_xml_add(query, F_STONITH_REMOTE, op->id); crm_xml_add(query, F_STONITH_TARGET, op->target); crm_xml_add(query, F_STONITH_ACTION, op->action); crm_xml_add(query, F_STONITH_OWNER, op->originator); crm_xml_add(query, F_STONITH_CLIENTID, op->client_id); crm_xml_add_int(query, F_STONITH_TIMEOUT, op->base_timeout); crm_info("Initiating remote operation %s for %s: %s", op->action, op->target, op->id); CRM_CHECK(op->action, return NULL); send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE); free_xml(query); return op; } static gint sort_strings(gconstpointer a, gconstpointer b) { return strcmp(a, b); } static st_query_result_t *stonith_choose_peer(remote_fencing_op_t *op) { GListPtr iter = NULL; do { if(op->devices) { crm_trace("Checking for someone to fence %s with %s", op->target, (char*)op->devices->data); } else { crm_trace("Checking for someone to fence %s", op->target); } for(iter = op->query_results; iter != NULL; iter = iter->next) { st_query_result_t *peer = iter->data; if(is_set(op->call_options, st_opt_topology)) { /* Do they have the next device of the current fencing level? */ GListPtr match = NULL; if(op->devices) { match = g_list_find_custom(peer->device_list, op->devices->data, sort_strings); } if(match) { crm_trace("Removing %s from %s (%d remaining)", (char*)match->data, peer->host, g_list_length(peer->device_list)); peer->device_list = g_list_remove(peer->device_list, match->data); return peer; } } else if(peer && peer->devices > 0) { /* No topology: Use the current best peer */ crm_trace("Simple fencing"); return peer; } } /* Try the next fencing level if there is one */ } while(is_set(op->call_options, st_opt_topology) && stonith_topology_next(op) == pcmk_ok); if(op->devices) { crm_trace("Couldn't find anyone to fence %s with %s", op->target, (char*)op->devices->data); } else { crm_trace("Couldn't find anyone to fence %s", op->target); } return NULL; } void call_remote_stonith(remote_fencing_op_t *op, st_query_result_t *peer) { const char *device = NULL; int timeout = op->base_timeout; if(is_set(op->call_options, st_opt_topology)) { int num_devices = g_list_length(op->devices); /* Not accurate when there are multiple levels */ if(num_devices) { timeout /= num_devices; crm_trace("Dividing the timeout (%ds) equally between %d devices: %ds", op->base_timeout, num_devices, timeout); } /* Ignore any preference, they might not have the device we need */ peer = stonith_choose_peer(op); device = op->devices->data; } else if(peer == NULL) { peer = stonith_choose_peer(op); } if(peer) { xmlNode *query = stonith_create_op(0, op->id, STONITH_OP_FENCE, NULL, 0); crm_xml_add(query, F_STONITH_REMOTE, op->id); crm_xml_add(query, F_STONITH_TARGET, op->target); crm_xml_add(query, F_STONITH_ACTION, op->action); crm_xml_add(query, F_STONITH_OWNER, op->originator); crm_xml_add(query, F_STONITH_CLIENTID, op->client_id); crm_xml_add_int(query, F_STONITH_TIMEOUT, timeout); if(device) { crm_info("Requesting that %s perform op %s %s with %s", peer->host, op->action, op->target, device); crm_xml_add(query, F_STONITH_DEVICE, device); crm_xml_add(query, F_STONITH_MODE, "slave"); } else { crm_info("Requesting that %s perform op %s %s", peer->host, op->action, op->target); crm_xml_add(query, F_STONITH_MODE, "smart"); } op->state = st_exec; send_cluster_message(peer->host, crm_msg_stonith_ng, query, FALSE); free_xml(query); return; } else if(op->query_timer == 0) { /* We've exhausted all available peers */ crm_info("No remaining peers capable of terminating %s", op->target); remote_op_timeout(op); } else if(device) { crm_info("Waiting for additional peers capable of terminating %s with %s", op->target, device); } else { crm_info("Waiting for additional peers capable of terminating %s", op->target); } free_remote_query(peer); } static gint sort_peers(gconstpointer a, gconstpointer b) { const st_query_result_t *peer_a = a; const st_query_result_t *peer_b = a; if(peer_a->devices > peer_b->devices) { return -1; } else if(peer_a->devices > peer_b->devices) { return 1; } return 0; } int process_remote_stonith_query(xmlNode *msg) { int devices = 0; const char *id = NULL; const char *host = NULL; remote_fencing_op_t *op = NULL; st_query_result_t *result = NULL; xmlNode *dev = get_xpath_object("//@"F_STONITH_REMOTE, msg, LOG_ERR); xmlNode *child = NULL; CRM_CHECK(dev != NULL, return -EPROTO); id = crm_element_value(dev, F_STONITH_REMOTE); CRM_CHECK(id != NULL, return -EPROTO); dev = get_xpath_object("//@st-available-devices", msg, LOG_ERR); CRM_CHECK(dev != NULL, return -EPROTO); crm_element_value_int(dev, "st-available-devices", &devices); op = g_hash_table_lookup(remote_op_list, id); if(op == NULL) { crm_debug("Unknown or expired remote op: %s", id); return -EOPNOTSUPP; } op->replies++; host = crm_element_value(msg, F_ORIG); if(devices <= 0) { /* If we're doing 'known' then we might need to fire anyway */ crm_trace("Query result from %s (%d devices)", host, devices); return pcmk_ok; } else if(op->call_options & st_opt_allow_suicide) { crm_trace("Allowing %s to potentialy fence itself", op->target); } else if(safe_str_eq(host, op->target)) { crm_info("Ignoring reply from %s, hosts are not permitted to commit suicide", op->target); return pcmk_ok; } crm_debug("Query result from %s (%d devices)", host, devices); result = calloc(1, sizeof(st_query_result_t)); result->host = strdup(host); result->devices = devices; for (child = __xml_first_child(dev); child != NULL; child = __xml_next(child)) { const char *device = ID(child); if(device) { result->device_list = g_list_prepend(result->device_list, strdup(device)); } } CRM_CHECK(devices == g_list_length(result->device_list), crm_err("Mis-match: Query claimed to have %d devices but %d found", devices, g_list_length(result->device_list))); op->query_results = g_list_insert_sorted(op->query_results, result, sort_peers); if(op->state == st_query && is_set(op->call_options, st_opt_all_replies) == FALSE) { call_remote_stonith(op, result); } else if(op->state == st_done) { crm_info("Discarding query result from %s (%d devices): Operation is in state %d", result->host, result->devices, op->state); } return pcmk_ok; } int process_remote_stonith_exec(xmlNode *msg) { int rc = 0; const char *id = NULL; remote_fencing_op_t *op = NULL; xmlNode *dev = get_xpath_object("//@"F_STONITH_REMOTE, msg, LOG_ERR); CRM_CHECK(dev != NULL, return -EPROTO); id = crm_element_value(dev, F_STONITH_REMOTE); CRM_CHECK(id != NULL, return -EPROTO); dev = get_xpath_object("//@"F_STONITH_RC, msg, LOG_ERR); CRM_CHECK(dev != NULL, return -EPROTO); crm_element_value_int(dev, F_STONITH_RC, &rc); if(remote_op_list) { op = g_hash_table_lookup(remote_op_list, id); } if(op == NULL && rc == pcmk_ok) { /* Record successful fencing operations */ const char *client_id = crm_element_value(msg, F_STONITH_CLIENTID); op = create_remote_stonith_op(client_id, msg, TRUE); } if(op == NULL) { /* Could be for an event that began before we started */ /* TODO: Record the op for later querying */ crm_info("Unknown or expired remote op: %s", id); return -EOPNOTSUPP; } if(is_set(op->call_options, st_opt_topology)) { const char *device = crm_element_value(msg, F_STONITH_DEVICE); crm_notice("Call to %s for %s on behalf of %s: %s (%d)", device, op->target, op->originator, rc == pcmk_ok?"passed":"failed", rc); if(safe_str_eq(op->originator, stonith_our_uname)) { if(op->state == st_done) { remote_op_done(op, msg, rc); return rc; } else if(rc == pcmk_ok && op->devices) { /* Success, are there any more? */ op->devices = op->devices->next; } if(op->devices == NULL) { crm_trace("Broadcasting completion of complex fencing op for %s", op->target); send_cluster_message(NULL, crm_msg_stonith_ng, msg, FALSE); op->state = st_done; return rc; } } else { op->state = st_done; remote_op_done(op, msg, rc); } } else if(rc == pcmk_ok && op->devices == NULL) { crm_trace("All done for %s", op->target); op->state = st_done; remote_op_done(op, msg, rc); return rc; } /* Retry on failure or execute the rest of the topology */ crm_trace("Next for %s (rc was %d)", op->target, rc); call_remote_stonith(op, NULL); return rc; } int stonith_fence_history(xmlNode *msg, xmlNode **output) { int rc = 0; const char *target = NULL; xmlNode *dev = get_xpath_object("//@"F_STONITH_TARGET, msg, LOG_TRACE); if(dev) { int options = 0; target = crm_element_value(dev, F_STONITH_TARGET); crm_element_value_int(msg, F_STONITH_CALLOPTS, &options); if(target && (options & st_opt_cs_nodeid)) { int nodeid = crm_atoi(target, NULL); crm_node_t *node = crm_get_peer(nodeid, NULL); if(node) { target = node->uname; } } } *output = create_xml_node(NULL, F_STONITH_HISTORY_LIST); if (remote_op_list) { GHashTableIter iter; remote_fencing_op_t *op = NULL; g_hash_table_iter_init(&iter, remote_op_list); while(g_hash_table_iter_next(&iter, NULL, (void**)&op)) { xmlNode *entry = NULL; if (target && strcmp(op->target, target) != 0) { continue; } rc = 0; entry = create_xml_node(*output, STONITH_OP_EXEC); crm_xml_add(entry, F_STONITH_TARGET, op->target); crm_xml_add(entry, F_STONITH_ACTION, op->action); crm_xml_add(entry, F_STONITH_ORIGIN, op->originator); crm_xml_add(entry, F_STONITH_DELEGATE, op->delegate); crm_xml_add_int(entry, F_STONITH_DATE, op->completed); crm_xml_add_int(entry, F_STONITH_STATE, op->state); } } return rc; } diff --git a/fencing/test.c b/fencing/test.c index d842e0683b..e4132c7f26 100644 --- a/fencing/test.c +++ b/fencing/test.c @@ -1,193 +1,193 @@ /* * Copyright (C) 2009 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include -#include +#include #include #include #include /* *INDENT-OFF* */ static struct crm_option long_options[] = { {"verbose", 0, 0, 'V'}, {"version", 0, 0, '$'}, {"help", 0, 0, '?'}, {"passive", 0, 0, 'p'}, {0, 0, 0, 0} }; /* *INDENT-ON* */ int st_opts = st_opt_sync_call; static void st_callback(stonith_t *st, const char *event, xmlNode *msg) { crm_log_xml_notice(msg, event); } int main(int argc, char ** argv) { int argerr = 0; int flag; int option_index = 0; int rc = 0; struct pollfd pollfd; stonith_t *st = NULL; stonith_key_value_t *params = NULL; gboolean passive_mode = FALSE; crm_log_cli_init("stonith-test"); crm_set_options(NULL, "mode [options]", long_options, "Provides a summary of cluster's current state." "\n\nOutputs varying levels of detail in a number of different formats.\n"); params = stonith_key_value_add(params, "ipaddr", "localhost"); params = stonith_key_value_add(params, "pcmk-portmal", "some-host=pcmk-1 pcmk-3=3,4"); params = stonith_key_value_add(params, "login", "root"); params = stonith_key_value_add(params, "identity_file","/root/.ssh/id_dsa"); while (1) { flag = crm_get_option(argc, argv, &option_index); if (flag == -1) break; switch(flag) { case 'V': crm_bump_log_level(); break; case '$': case '?': crm_help(flag, EX_OK); break; case 'p': passive_mode = TRUE; break; default: ++argerr; break; } } if (optind > argc) { ++argerr; } if (argerr) { crm_help('?', EX_USAGE); } crm_debug("Create"); st = stonith_api_new(); rc = st->cmds->connect(st, crm_system_name, &pollfd.fd); crm_debug("Connect: %d", rc); rc = st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT, st_callback); if(passive_mode) { rc = st->cmds->register_notification(st, STONITH_OP_FENCE, st_callback); rc = st->cmds->register_notification(st, STONITH_OP_DEVICE_ADD, st_callback); rc = st->cmds->register_notification(st, STONITH_OP_DEVICE_DEL, st_callback); crm_info("Looking for notification"); pollfd.events = POLLIN; while(true) { rc = poll( &pollfd, 1, 600 * 1000 ); /* wait 10 minutes, -1 forever */ if (rc > 0 ) stonith_dispatch( st ); else break; } } else { rc = st->cmds->register_device(st, st_opts, "test-id", "stonith-ng", "fence_virsh", params); crm_debug("Register: %d", rc); rc = st->cmds->call(st, st_opts, "test-id", "list", NULL, 10); crm_debug("List: %d", rc); rc = st->cmds->call(st, st_opts, "test-id", "monitor", NULL, 10); crm_debug("Monitor: %d", rc); rc = st->cmds->call(st, st_opts, "test-id", "status", "pcmk-2", 10); crm_debug("Status pcmk-2: %d", rc); rc = st->cmds->call(st, st_opts, "test-id", "status", "pcmk-1", 10); crm_debug("Status pcmk-1: %d", rc); rc = st->cmds->fence(st, st_opts, "unknown-host", "off", 60); crm_debug("Fence unknown-host: %d", rc); rc = st->cmds->call(st, st_opts, "test-id", "status", "pcmk-1", 10); crm_debug("Status pcmk-1: %d", rc); rc = st->cmds->fence(st, st_opts, "pcmk-1", "off", 60); crm_debug("Fence pcmk-1: %d", rc); rc = st->cmds->call(st, st_opts, "test-id", "status", "pcmk-1", 10); crm_debug("Status pcmk-1: %d", rc); rc = st->cmds->fence(st, st_opts, "pcmk-1", "on", 10); crm_debug("Unfence pcmk-1: %d", rc); rc = st->cmds->call(st, st_opts, "test-id", "status", "pcmk-1", 10); crm_debug("Status pcmk-1: %d", rc); rc = st->cmds->fence(st, st_opts, "some-host", "off", 10); crm_debug("Fence alias: %d", rc); rc = st->cmds->call(st, st_opts, "test-id", "status", "some-host", 10); crm_debug("Status alias: %d", rc); rc = st->cmds->fence(st, st_opts, "pcmk-1", "on", 10); crm_debug("Unfence pcmk-1: %d", rc); rc = st->cmds->remove_device(st, st_opts, "test-id"); crm_debug("Remove test-id: %d", rc); } stonith_key_value_freeall(params, 1, 1); rc = st->cmds->disconnect(st); crm_debug("Disconnect: %d", rc); crm_debug("Destroy"); stonith_api_delete(st); return rc; } diff --git a/include/crm/ais.h b/include/crm/ais.h index 02e822c495..4d551f6b10 100644 --- a/include/crm/ais.h +++ b/include/crm/ais.h @@ -1,411 +1,30 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef CRM_AIS__H # define CRM_AIS__H # include # include # include # include # include -# define AIS_IPC_MESSAGE_SIZE 8192*128 - -# if SUPPORT_COROSYNC -# if CS_USES_LIBQB -# include -# include -typedef struct qb_ipc_request_header cs_ipc_header_request_t; -typedef struct qb_ipc_response_header cs_ipc_header_response_t; -# else -# include -# include -# include -static inline int -qb_to_cs_error(int a) -{ - return a; -} - -typedef coroipc_request_header_t cs_ipc_header_request_t; -typedef coroipc_response_header_t cs_ipc_header_response_t; -# endif -# else -typedef struct { - int size __attribute__ ((aligned(8))); - int id __attribute__ ((aligned(8))); -} __attribute__ ((aligned(8))) cs_ipc_header_request_t; - -typedef struct { - int size __attribute__ ((aligned(8))); - int id __attribute__ ((aligned(8))); - int error __attribute__ ((aligned(8))); -} __attribute__ ((aligned(8))) cs_ipc_header_response_t; - -# endif - -# define CRM_MESSAGE_IPC_ACK 0 - -# ifndef CRM_SERVICE -# define CRM_SERVICE PCMK_SERVICE_ID -# endif - -# define MAX_NAME 256 -# define AIS_IPC_NAME "ais-crm-ipc" - -/* *INDENT-OFF* */ -#define CRM_NODE_LOST "lost" -#define CRM_NODE_MEMBER "member" -#define CRM_NODE_ACTIVE CRM_NODE_MEMBER -#define CRM_NODE_INACTIVE CRM_NODE_LOST -#define CRM_NODE_EVICTED "evicted" - -typedef struct crm_ais_host_s AIS_Host; -typedef struct crm_ais_msg_s AIS_Message; - -enum crm_ais_msg_class { - crm_class_cluster = 0, - crm_class_members = 1, - crm_class_notify = 2, - crm_class_nodeid = 3, - crm_class_rmpeer = 4, - crm_class_quorum = 5, -}; - -/* order here matters - its used to index into the crm_children array */ -enum crm_ais_msg_types { - crm_msg_none = 0, - crm_msg_ais = 1, - crm_msg_lrmd = 2, - crm_msg_cib = 3, - crm_msg_crmd = 4, - crm_msg_attrd = 5, - crm_msg_stonithd = 6, - crm_msg_te = 7, - crm_msg_pe = 8, - crm_msg_stonith_ng = 9, -}; - -enum crm_proc_flag { - crm_proc_none = 0x00000001, - - /* 3 messaging types */ - crm_proc_heartbeat = 0x01000000, - crm_proc_plugin = 0x00000002, - crm_proc_cpg = 0x04000000, - - crm_proc_lrmd = 0x00000010, - crm_proc_cib = 0x00000100, - crm_proc_crmd = 0x00000200, - crm_proc_attrd = 0x00001000, - - crm_proc_pe = 0x00010000, - crm_proc_te = 0x00020000, - - crm_proc_stonithd = 0x00002000, - crm_proc_stonith_ng= 0x00100000, - - crm_proc_mgmtd = 0x00040000, -}; -/* *INDENT-ON* */ - -typedef struct crm_peer_node_s { - uint32_t id; - uint64_t born; - uint64_t last_seen; - - int32_t votes; - uint32_t processes; - - char *uname; - char *state; - char *uuid; - char *addr; - char *version; -} crm_node_t; - -struct crm_ais_host_s { - uint32_t id; - uint32_t pid; - gboolean local; - enum crm_ais_msg_types type; - uint32_t size; - char uname[MAX_NAME]; - -} __attribute__ ((packed)); - -struct crm_ais_msg_s { - cs_ipc_header_response_t header __attribute__ ((aligned(8))); - uint32_t id; - gboolean is_compressed; - - AIS_Host host; - AIS_Host sender; - - uint32_t size; - uint32_t compressed_size; - /* 584 bytes */ - char data[0]; - -} __attribute__ ((packed)); - -struct crm_ais_nodeid_resp_s { - cs_ipc_header_response_t header __attribute__ ((aligned(8))); - uint32_t id; - uint32_t counter; - char uname[MAX_NAME]; - char cname[MAX_NAME]; -} __attribute__ ((packed)); - -struct crm_ais_quorum_resp_s { - cs_ipc_header_response_t header __attribute__ ((aligned(8))); - uint64_t id; - uint32_t votes; - uint32_t expected_votes; - uint32_t quorate; -} __attribute__ ((packed)); - -static inline const char * -msg_type2text(enum crm_ais_msg_types type) -{ - const char *text = "unknown"; - - switch (type) { - case crm_msg_none: - text = "unknown"; - break; - case crm_msg_ais: - text = "ais"; - break; - case crm_msg_cib: - text = "cib"; - break; - case crm_msg_crmd: - text = "crmd"; - break; - case crm_msg_pe: - text = "pengine"; - break; - case crm_msg_te: - text = "tengine"; - break; - case crm_msg_lrmd: - text = "lrmd"; - break; - case crm_msg_attrd: - text = "attrd"; - break; - case crm_msg_stonithd: - text = "stonithd"; - break; - case crm_msg_stonith_ng: - text = "stonith-ng"; - break; - } - return text; -} - -static inline const char * -peer2text(enum crm_proc_flag proc) -{ - const char *text = "unknown"; - - switch (proc) { - case crm_proc_none: - text = "unknown"; - break; - case crm_proc_plugin: - text = "ais"; - break; - case crm_proc_heartbeat: - text = "heartbeat"; - break; - case crm_proc_cib: - text = "cib"; - break; - case crm_proc_crmd: - text = "crmd"; - break; - case crm_proc_pe: - text = "pengine"; - break; - case crm_proc_te: - text = "tengine"; - break; - case crm_proc_lrmd: - text = "lrmd"; - break; - case crm_proc_attrd: - text = "attrd"; - break; - case crm_proc_stonithd: - text = "stonithd"; - break; - case crm_proc_stonith_ng: - text = "stonith-ng"; - break; - case crm_proc_mgmtd: - text = "mgmtd"; - break; - case crm_proc_cpg: - text = "corosync-cpg"; - break; - } - return text; -} - -static inline enum crm_proc_flag -text2proc(const char *proc) -{ - /* We only care about these two so far */ - - if(proc && strcmp(proc, "cib") == 0) { - return crm_proc_cib; - } else if(proc && strcmp(proc, "crmd") == 0) { - return crm_proc_crmd; - } - - return crm_proc_none; -} - - -static inline const char * -ais_dest(const struct crm_ais_host_s *host) -{ - if (host->local) { - return "local"; - } else if (host->size > 0) { - return host->uname; - } else { - return ""; - } -} - -# define ais_data_len(msg) (msg->is_compressed?msg->compressed_size:msg->size) - -static inline AIS_Message * -ais_msg_copy(const AIS_Message * source) -{ - AIS_Message *target = malloc(sizeof(AIS_Message) + ais_data_len(source)); - - memcpy(target, source, sizeof(AIS_Message)); - memcpy(target->data, source->data, ais_data_len(target)); - - return target; -} - -static inline const char * -ais_error2text(int error) -{ - const char *text = "unknown"; - -# if SUPPORT_COROSYNC - switch (error) { - case CS_OK: - text = "None"; - break; - case CS_ERR_LIBRARY: - text = "Library error"; - break; - case CS_ERR_VERSION: - text = "Version error"; - break; - case CS_ERR_INIT: - text = "Initialization error"; - break; - case CS_ERR_TIMEOUT: - text = "Timeout"; - break; - case CS_ERR_TRY_AGAIN: - text = "Try again"; - break; - case CS_ERR_INVALID_PARAM: - text = "Invalid parameter"; - break; - case CS_ERR_NO_MEMORY: - text = "No memory"; - break; - case CS_ERR_BAD_HANDLE: - text = "Bad handle"; - break; - case CS_ERR_BUSY: - text = "Busy"; - break; - case CS_ERR_ACCESS: - text = "Access error"; - break; - case CS_ERR_NOT_EXIST: - text = "Doesn't exist"; - break; - case CS_ERR_NAME_TOO_LONG: - text = "Name too long"; - break; - case CS_ERR_EXIST: - text = "Exists"; - break; - case CS_ERR_NO_SPACE: - text = "No space"; - break; - case CS_ERR_INTERRUPT: - text = "Interrupt"; - break; - case CS_ERR_NAME_NOT_FOUND: - text = "Name not found"; - break; - case CS_ERR_NO_RESOURCES: - text = "No resources"; - break; - case CS_ERR_NOT_SUPPORTED: - text = "Not supported"; - break; - case CS_ERR_BAD_OPERATION: - text = "Bad operation"; - break; - case CS_ERR_FAILED_OPERATION: - text = "Failed operation"; - break; - case CS_ERR_MESSAGE_ERROR: - text = "Message error"; - break; - case CS_ERR_QUEUE_FULL: - text = "Queue full"; - break; - case CS_ERR_QUEUE_NOT_AVAILABLE: - text = "Queue not available"; - break; - case CS_ERR_BAD_FLAGS: - text = "Bad flags"; - break; - case CS_ERR_TOO_BIG: - text = "To big"; - break; - case CS_ERR_NO_SECTIONS: - text = "No sections"; - break; - } -# endif - return text; -} - -enum crm_ais_msg_types text2msg_type(const char *text); -char *get_ais_data(const AIS_Message * msg); -gboolean check_message_sanity(const AIS_Message * msg, const char *data); #endif diff --git a/include/crm/cluster.h b/include/crm/cluster.h index 268c67c986..824858b196 100644 --- a/include/crm/cluster.h +++ b/include/crm/cluster.h @@ -1,129 +1,210 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef CRM_COMMON_CLUSTER__H # define CRM_COMMON_CLUSTER__H # include # include -# include # if SUPPORT_HEARTBEAT # include # include # endif extern gboolean crm_have_quorum; extern GHashTable *crm_peer_cache; extern GHashTable *crm_peer_id_cache; extern unsigned long long crm_peer_seq; +# ifndef CRM_SERVICE +# define CRM_SERVICE PCMK_SERVICE_ID +# endif + +/* *INDENT-OFF* */ +#define CRM_NODE_LOST "lost" +#define CRM_NODE_MEMBER "member" +#define CRM_NODE_ACTIVE CRM_NODE_MEMBER +#define CRM_NODE_INACTIVE CRM_NODE_LOST +#define CRM_NODE_EVICTED "evicted" + +enum crm_proc_flag { + crm_proc_none = 0x00000001, + + /* 3 messaging types */ + crm_proc_heartbeat = 0x01000000, + crm_proc_plugin = 0x00000002, + crm_proc_cpg = 0x04000000, + + crm_proc_lrmd = 0x00000010, + crm_proc_cib = 0x00000100, + crm_proc_crmd = 0x00000200, + crm_proc_attrd = 0x00001000, + + crm_proc_pe = 0x00010000, + crm_proc_te = 0x00020000, + + crm_proc_stonithd = 0x00002000, + crm_proc_stonith_ng= 0x00100000, + + crm_proc_mgmtd = 0x00040000, +}; +/* *INDENT-ON* */ + +typedef struct crm_peer_node_s { + uint32_t id; + uint64_t born; + uint64_t last_seen; + + int32_t votes; + uint32_t processes; + + char *uname; + char *state; + char *uuid; + char *addr; + char *version; +} crm_node_t; + +static inline const char * +peer2text(enum crm_proc_flag proc) +{ + const char *text = "unknown"; + + switch (proc) { + case crm_proc_none: + text = "unknown"; + break; + case crm_proc_plugin: + text = "ais"; + break; + case crm_proc_heartbeat: + text = "heartbeat"; + break; + case crm_proc_cib: + text = "cib"; + break; + case crm_proc_crmd: + text = "crmd"; + break; + case crm_proc_pe: + text = "pengine"; + break; + case crm_proc_te: + text = "tengine"; + break; + case crm_proc_lrmd: + text = "lrmd"; + break; + case crm_proc_attrd: + text = "attrd"; + break; + case crm_proc_stonithd: + text = "stonithd"; + break; + case crm_proc_stonith_ng: + text = "stonith-ng"; + break; + case crm_proc_mgmtd: + text = "mgmtd"; + break; + case crm_proc_cpg: + text = "corosync-cpg"; + break; + } + return text; +} + void crm_peer_init(void); void crm_peer_destroy(void); char *get_corosync_uuid(uint32_t id, const char *uname); const char *get_node_uuid(uint32_t id, const char *uname); int get_corosync_id(int id, const char *uuid); gboolean crm_cluster_connect(char **our_uname, char **our_uuid, void *dispatch, void *destroy, # if SUPPORT_HEARTBEAT ll_cluster_t ** hb_conn # else void **unused # endif ); -gboolean init_cman_connection(gboolean(*dispatch) (unsigned long long, gboolean), - void (*destroy) (gpointer)); - -gboolean init_quorum_connection(gboolean(*dispatch) (unsigned long long, gboolean), - void (*destroy) (gpointer)); - +enum crm_ais_msg_types; gboolean send_cluster_message(const char *node, enum crm_ais_msg_types service, - xmlNode * data, gboolean ordered); + xmlNode * data, gboolean ordered); -void destroy_crm_node(gpointer data); +void destroy_crm_node(gpointer/* crm_node_t* */ data); crm_node_t *crm_get_peer(unsigned int id, const char *uname); -void crm_update_peer_proc(const char *source, crm_node_t *peer, uint32_t flag, const char *status); -crm_node_t *crm_update_peer(const char *source, unsigned int id, uint64_t born, uint64_t seen, int32_t votes, - uint32_t children, const char *uuid, const char *uname, - const char *addr, const char *state); - guint crm_active_peers(void); gboolean crm_is_peer_active(const crm_node_t * node); guint reap_crm_member(uint32_t id); int crm_terminate_member(int nodeid, const char *uname, void* unused); int crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection); gboolean crm_get_cluster_name(char **cname); # if SUPPORT_HEARTBEAT -xmlNode *convert_ha_message(xmlNode * parent, HA_Message *msg, const char *field); gboolean crm_is_heartbeat_peer_active(const crm_node_t * node); -gboolean ccm_have_quorum(oc_ed_t event); -const char *ccm_event_name(oc_ed_t event); -crm_node_t *crm_update_ccm_node(const oc_ev_membership_t * oc, int offset, const char *state, - uint64_t seq); # endif # if SUPPORT_COROSYNC extern int ais_fd_sync; gboolean crm_is_corosync_peer_active(const crm_node_t * node); gboolean send_ais_text(int class, const char *data, gboolean local, const char *node, enum crm_ais_msg_types dest); gboolean get_ais_nodeid(uint32_t * id, char **uname); # endif void empty_uuid_cache(void); const char *get_uuid(const char *uname); const char *get_uname(const char *uuid); void set_uuid(xmlNode * node, const char *attr, const char *uname); void unget_uuid(const char *uname); enum crm_status_type { crm_status_uname, crm_status_nstate, crm_status_processes, }; enum crm_ais_msg_types text2msg_type(const char *text); -void - crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *)); +void crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *)); /* *INDENT-OFF* */ enum cluster_type_e { pcmk_cluster_unknown = 0x0001, pcmk_cluster_invalid = 0x0002, pcmk_cluster_heartbeat = 0x0004, pcmk_cluster_classic_ais = 0x0010, pcmk_cluster_corosync = 0x0020, pcmk_cluster_cman = 0x0040, }; /* *INDENT-ON* */ enum cluster_type_e get_cluster_type(void); const char *name_for_cluster_type(enum cluster_type_e type); gboolean is_corosync_cluster(void); gboolean is_cman_cluster(void); gboolean is_openais_cluster(void); gboolean is_classic_ais_cluster(void); gboolean is_heartbeat_cluster(void); #endif diff --git a/include/crm/ais.h b/include/crm/cluster/internal.h similarity index 67% copy from include/crm/ais.h copy to include/crm/cluster/internal.h index 02e822c495..1e145276f4 100644 --- a/include/crm/ais.h +++ b/include/crm/cluster/internal.h @@ -1,411 +1,327 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ -#ifndef CRM_AIS__H -# define CRM_AIS__H +#ifndef CRM_CLUSTER_INTERNAL__H +# define CRM_CLUSTER_INTERNAL__H -# include -# include -# include - -# include -# include +# include +# define AIS_IPC_NAME "ais-crm-ipc" # define AIS_IPC_MESSAGE_SIZE 8192*128 - -# if SUPPORT_COROSYNC -# if CS_USES_LIBQB -# include -# include -typedef struct qb_ipc_request_header cs_ipc_header_request_t; -typedef struct qb_ipc_response_header cs_ipc_header_response_t; -# else -# include -# include -# include -static inline int -qb_to_cs_error(int a) -{ - return a; -} - -typedef coroipc_request_header_t cs_ipc_header_request_t; -typedef coroipc_response_header_t cs_ipc_header_response_t; -# endif -# else -typedef struct { - int size __attribute__ ((aligned(8))); - int id __attribute__ ((aligned(8))); -} __attribute__ ((aligned(8))) cs_ipc_header_request_t; - -typedef struct { - int size __attribute__ ((aligned(8))); - int id __attribute__ ((aligned(8))); - int error __attribute__ ((aligned(8))); -} __attribute__ ((aligned(8))) cs_ipc_header_response_t; - -# endif - # define CRM_MESSAGE_IPC_ACK 0 -# ifndef CRM_SERVICE -# define CRM_SERVICE PCMK_SERVICE_ID -# endif - -# define MAX_NAME 256 -# define AIS_IPC_NAME "ais-crm-ipc" - -/* *INDENT-OFF* */ -#define CRM_NODE_LOST "lost" -#define CRM_NODE_MEMBER "member" -#define CRM_NODE_ACTIVE CRM_NODE_MEMBER -#define CRM_NODE_INACTIVE CRM_NODE_LOST -#define CRM_NODE_EVICTED "evicted" - typedef struct crm_ais_host_s AIS_Host; typedef struct crm_ais_msg_s AIS_Message; enum crm_ais_msg_class { crm_class_cluster = 0, crm_class_members = 1, crm_class_notify = 2, crm_class_nodeid = 3, crm_class_rmpeer = 4, crm_class_quorum = 5, }; /* order here matters - its used to index into the crm_children array */ enum crm_ais_msg_types { crm_msg_none = 0, crm_msg_ais = 1, crm_msg_lrmd = 2, crm_msg_cib = 3, crm_msg_crmd = 4, crm_msg_attrd = 5, crm_msg_stonithd = 6, crm_msg_te = 7, crm_msg_pe = 8, crm_msg_stonith_ng = 9, }; -enum crm_proc_flag { - crm_proc_none = 0x00000001, - - /* 3 messaging types */ - crm_proc_heartbeat = 0x01000000, - crm_proc_plugin = 0x00000002, - crm_proc_cpg = 0x04000000, - - crm_proc_lrmd = 0x00000010, - crm_proc_cib = 0x00000100, - crm_proc_crmd = 0x00000200, - crm_proc_attrd = 0x00001000, - - crm_proc_pe = 0x00010000, - crm_proc_te = 0x00020000, - - crm_proc_stonithd = 0x00002000, - crm_proc_stonith_ng= 0x00100000, - - crm_proc_mgmtd = 0x00040000, -}; -/* *INDENT-ON* */ - -typedef struct crm_peer_node_s { - uint32_t id; - uint64_t born; - uint64_t last_seen; - - int32_t votes; - uint32_t processes; - - char *uname; - char *state; - char *uuid; - char *addr; - char *version; -} crm_node_t; - struct crm_ais_host_s { uint32_t id; uint32_t pid; gboolean local; enum crm_ais_msg_types type; uint32_t size; char uname[MAX_NAME]; } __attribute__ ((packed)); struct crm_ais_msg_s { cs_ipc_header_response_t header __attribute__ ((aligned(8))); uint32_t id; gboolean is_compressed; AIS_Host host; AIS_Host sender; uint32_t size; uint32_t compressed_size; /* 584 bytes */ char data[0]; } __attribute__ ((packed)); struct crm_ais_nodeid_resp_s { cs_ipc_header_response_t header __attribute__ ((aligned(8))); uint32_t id; uint32_t counter; char uname[MAX_NAME]; char cname[MAX_NAME]; } __attribute__ ((packed)); struct crm_ais_quorum_resp_s { cs_ipc_header_response_t header __attribute__ ((aligned(8))); uint64_t id; uint32_t votes; uint32_t expected_votes; uint32_t quorate; } __attribute__ ((packed)); -static inline const char * -msg_type2text(enum crm_ais_msg_types type) -{ - const char *text = "unknown"; - - switch (type) { - case crm_msg_none: - text = "unknown"; - break; - case crm_msg_ais: - text = "ais"; - break; - case crm_msg_cib: - text = "cib"; - break; - case crm_msg_crmd: - text = "crmd"; - break; - case crm_msg_pe: - text = "pengine"; - break; - case crm_msg_te: - text = "tengine"; - break; - case crm_msg_lrmd: - text = "lrmd"; - break; - case crm_msg_attrd: - text = "attrd"; - break; - case crm_msg_stonithd: - text = "stonithd"; - break; - case crm_msg_stonith_ng: - text = "stonith-ng"; - break; - } - return text; -} - -static inline const char * -peer2text(enum crm_proc_flag proc) -{ - const char *text = "unknown"; - - switch (proc) { - case crm_proc_none: - text = "unknown"; - break; - case crm_proc_plugin: - text = "ais"; - break; - case crm_proc_heartbeat: - text = "heartbeat"; - break; - case crm_proc_cib: - text = "cib"; - break; - case crm_proc_crmd: - text = "crmd"; - break; - case crm_proc_pe: - text = "pengine"; - break; - case crm_proc_te: - text = "tengine"; - break; - case crm_proc_lrmd: - text = "lrmd"; - break; - case crm_proc_attrd: - text = "attrd"; - break; - case crm_proc_stonithd: - text = "stonithd"; - break; - case crm_proc_stonith_ng: - text = "stonith-ng"; - break; - case crm_proc_mgmtd: - text = "mgmtd"; - break; - case crm_proc_cpg: - text = "corosync-cpg"; - break; - } - return text; -} - static inline enum crm_proc_flag text2proc(const char *proc) { /* We only care about these two so far */ if(proc && strcmp(proc, "cib") == 0) { return crm_proc_cib; } else if(proc && strcmp(proc, "crmd") == 0) { return crm_proc_crmd; } return crm_proc_none; } static inline const char * ais_dest(const struct crm_ais_host_s *host) { if (host->local) { return "local"; } else if (host->size > 0) { return host->uname; } else { return ""; } } # define ais_data_len(msg) (msg->is_compressed?msg->compressed_size:msg->size) static inline AIS_Message * ais_msg_copy(const AIS_Message * source) { AIS_Message *target = malloc(sizeof(AIS_Message) + ais_data_len(source)); memcpy(target, source, sizeof(AIS_Message)); memcpy(target->data, source->data, ais_data_len(target)); return target; } static inline const char * ais_error2text(int error) { const char *text = "unknown"; # if SUPPORT_COROSYNC switch (error) { case CS_OK: text = "None"; break; case CS_ERR_LIBRARY: text = "Library error"; break; case CS_ERR_VERSION: text = "Version error"; break; case CS_ERR_INIT: text = "Initialization error"; break; case CS_ERR_TIMEOUT: text = "Timeout"; break; case CS_ERR_TRY_AGAIN: text = "Try again"; break; case CS_ERR_INVALID_PARAM: text = "Invalid parameter"; break; case CS_ERR_NO_MEMORY: text = "No memory"; break; case CS_ERR_BAD_HANDLE: text = "Bad handle"; break; case CS_ERR_BUSY: text = "Busy"; break; case CS_ERR_ACCESS: text = "Access error"; break; case CS_ERR_NOT_EXIST: text = "Doesn't exist"; break; case CS_ERR_NAME_TOO_LONG: text = "Name too long"; break; case CS_ERR_EXIST: text = "Exists"; break; case CS_ERR_NO_SPACE: text = "No space"; break; case CS_ERR_INTERRUPT: text = "Interrupt"; break; case CS_ERR_NAME_NOT_FOUND: text = "Name not found"; break; case CS_ERR_NO_RESOURCES: text = "No resources"; break; case CS_ERR_NOT_SUPPORTED: text = "Not supported"; break; case CS_ERR_BAD_OPERATION: text = "Bad operation"; break; case CS_ERR_FAILED_OPERATION: text = "Failed operation"; break; case CS_ERR_MESSAGE_ERROR: text = "Message error"; break; case CS_ERR_QUEUE_FULL: text = "Queue full"; break; case CS_ERR_QUEUE_NOT_AVAILABLE: text = "Queue not available"; break; case CS_ERR_BAD_FLAGS: text = "Bad flags"; break; case CS_ERR_TOO_BIG: text = "To big"; break; case CS_ERR_NO_SECTIONS: text = "No sections"; break; } # endif return text; } +static inline const char * +msg_type2text(enum crm_ais_msg_types type) +{ + const char *text = "unknown"; + + switch (type) { + case crm_msg_none: + text = "unknown"; + break; + case crm_msg_ais: + text = "ais"; + break; + case crm_msg_cib: + text = "cib"; + break; + case crm_msg_crmd: + text = "crmd"; + break; + case crm_msg_pe: + text = "pengine"; + break; + case crm_msg_te: + text = "tengine"; + break; + case crm_msg_lrmd: + text = "lrmd"; + break; + case crm_msg_attrd: + text = "attrd"; + break; + case crm_msg_stonithd: + text = "stonithd"; + break; + case crm_msg_stonith_ng: + text = "stonith-ng"; + break; + } + return text; +} + enum crm_ais_msg_types text2msg_type(const char *text); char *get_ais_data(const AIS_Message * msg); gboolean check_message_sanity(const AIS_Message * msg, const char *data); + +# if SUPPORT_HEARTBEAT +extern ll_cluster_t *heartbeat_cluster; +gboolean send_ha_message(ll_cluster_t * hb_conn, xmlNode * msg, + const char *node, gboolean force_ordered); +gboolean ha_msg_dispatch(ll_cluster_t * cluster_conn, gpointer user_data); + +gboolean register_heartbeat_conn(ll_cluster_t * hb_cluster, char **uuid, char **uname, + void (*hb_message) (HA_Message * msg, void *private_data), + void (*hb_destroy) (gpointer user_data)); +xmlNode *convert_ha_message(xmlNode * parent, HA_Message *msg, const char *field); +gboolean ccm_have_quorum(oc_ed_t event); +const char *ccm_event_name(oc_ed_t event); +crm_node_t *crm_update_ccm_node(const oc_ev_membership_t * oc, int offset, const char *state, + uint64_t seq); +# endif + +# if SUPPORT_COROSYNC + +gboolean send_ais_message(xmlNode * msg, gboolean local, + const char *node, enum crm_ais_msg_types dest); + +enum cluster_type_e find_corosync_variant(void); + +void terminate_ais_connection(void); +gboolean init_ais_connection(gboolean(*dispatch) (AIS_Message *, char *, int), + void (*destroy) (gpointer), char **our_uuid, char **our_uname, + int *nodeid); +gboolean init_ais_connection_once(gboolean(*dispatch) (AIS_Message *, char *, int), + void (*destroy) (gpointer), char **our_uuid, + char **our_uname, int *nodeid); + +# endif + +enum crm_quorum_source { + crm_quorum_cman, + crm_quorum_corosync, + crm_quorum_pacemaker, +}; + +enum crm_quorum_source get_quorum_source(void); + +void crm_update_peer_proc(const char *source, crm_node_t *peer, uint32_t flag, const char *status); + +crm_node_t *crm_update_peer( + const char *source, unsigned int id, uint64_t born, uint64_t seen, + int32_t votes, uint32_t children, const char *uuid, const char *uname, + const char *addr, const char *state); + +gboolean init_cman_connection( + gboolean(*dispatch) (unsigned long long, gboolean), void (*destroy) (gpointer)); + +gboolean init_quorum_connection( + gboolean(*dispatch) (unsigned long long, gboolean), void (*destroy) (gpointer)); + #endif diff --git a/include/crm/crm.h b/include/crm/crm.h index b03b41c917..c28e21b481 100644 --- a/include/crm/crm.h +++ b/include/crm/crm.h @@ -1,189 +1,193 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef CRM__H # define CRM__H # include # include # include # include # undef MIN # undef MAX # include # include # define CRM_FEATURE_SET "3.0.6" # define MINIMUM_SCHEMA_VERSION "pacemaker-1.0" # define LATEST_SCHEMA_VERSION "pacemaker-"CRM_DTD_VERSION # define EOS '\0' # define DIMOF(a) ((int) (sizeof(a)/sizeof(a[0])) ) +# ifndef MAX_NAME +# define MAX_NAME 256 +# endif + # ifndef __GNUC__ # define __builtin_expect(expr, result) (expr) # endif /* Some handy macros used by the Linux kernel */ # define __likely(expr) __builtin_expect(expr, 1) # define __unlikely(expr) __builtin_expect(expr, 0) # define CRM_META "CRM_meta" extern const char *crm_system_name; /* *INDENT-OFF* */ /* Clean these up at some point, some probably should be runtime options */ # define SOCKET_LEN 1024 # define APPNAME_LEN 256 # define MAX_IPC_FAIL 5 # define MAX_IPC_DELAY 120 # define MSG_LOG 1 # define DOT_FSA_ACTIONS 1 # define DOT_ALL_FSA_INPUTS 1 /* #define FSA_TRACE 1 */ # define INFINITY_S "INFINITY" # define MINUS_INFINITY_S "-INFINITY" # define INFINITY 1000000 /* Sub-systems */ # define CRM_SYSTEM_DC "dc" # define CRM_SYSTEM_DCIB "dcib" /* The master CIB */ # define CRM_SYSTEM_CIB "cib" # define CRM_SYSTEM_CRMD "crmd" # define CRM_SYSTEM_LRMD "lrmd" # define CRM_SYSTEM_PENGINE "pengine" # define CRM_SYSTEM_TENGINE "tengine" # define CRM_SYSTEM_STONITHD "stonithd" # define CRM_SYSTEM_MCP "pacemakerd" /* Valid operations */ # define CRM_OP_NOOP "noop" # define CRM_OP_JOIN_ANNOUNCE "join_announce" # define CRM_OP_JOIN_OFFER "join_offer" # define CRM_OP_JOIN_REQUEST "join_request" # define CRM_OP_JOIN_ACKNAK "join_ack_nack" # define CRM_OP_JOIN_CONFIRM "join_confirm" # define CRM_OP_DIE "die_no_respawn" # define CRM_OP_RETRIVE_CIB "retrieve_cib" # define CRM_OP_PING "ping" # define CRM_OP_VOTE "vote" # define CRM_OP_NOVOTE "no-vote" # define CRM_OP_HELLO "hello" # define CRM_OP_HBEAT "dc_beat" # define CRM_OP_PECALC "pe_calc" # define CRM_OP_ABORT "abort" # define CRM_OP_QUIT "quit" # define CRM_OP_LOCAL_SHUTDOWN "start_shutdown" # define CRM_OP_SHUTDOWN_REQ "req_shutdown" # define CRM_OP_SHUTDOWN "do_shutdown" # define CRM_OP_FENCE "stonith" # define CRM_OP_EVENTCC "event_cc" # define CRM_OP_TEABORT "te_abort" # define CRM_OP_TEABORTED "te_abort_confirmed" /* we asked */ # define CRM_OP_TE_HALT "te_halt" # define CRM_OP_TECOMPLETE "te_complete" # define CRM_OP_TETIMEOUT "te_timeout" # define CRM_OP_TRANSITION "transition" # define CRM_OP_REGISTER "register" # define CRM_OP_DEBUG_UP "debug_inc" # define CRM_OP_DEBUG_DOWN "debug_dec" # define CRM_OP_INVOKE_LRM "lrm_invoke" # define CRM_OP_LRM_REFRESH "lrm_refresh" # define CRM_OP_LRM_QUERY "lrm_query" # define CRM_OP_LRM_DELETE "lrm_delete" # define CRM_OP_LRM_FAIL "lrm_fail" # define CRM_OP_PROBED "probe_complete" # define CRM_OP_REPROBE "probe_again" # define CRM_OP_CLEAR_FAILCOUNT "clear_failcount" # define CRM_OP_RELAXED_SET "one-or-more" # define CRM_OP_RM_NODE_CACHE "rm_node_cache" # define CRMD_STATE_ACTIVE "member" # define CRMD_STATE_INACTIVE "down" # define CRMD_JOINSTATE_DOWN CRMD_STATE_INACTIVE # define CRMD_JOINSTATE_PENDING "pending" # define CRMD_JOINSTATE_MEMBER CRMD_STATE_ACTIVE # define CRMD_JOINSTATE_NACK "banned" # define CRMD_ACTION_DELETE "delete" # define CRMD_ACTION_CANCEL "cancel" # define CRMD_ACTION_MIGRATE "migrate_to" # define CRMD_ACTION_MIGRATED "migrate_from" # define CRMD_ACTION_START "start" # define CRMD_ACTION_STARTED "running" # define CRMD_ACTION_STOP "stop" # define CRMD_ACTION_STOPPED "stopped" # define CRMD_ACTION_PROMOTE "promote" # define CRMD_ACTION_PROMOTED "promoted" # define CRMD_ACTION_DEMOTE "demote" # define CRMD_ACTION_DEMOTED "demoted" # define CRMD_ACTION_NOTIFY "notify" # define CRMD_ACTION_NOTIFIED "notified" # define CRMD_ACTION_STATUS "monitor" /* short names */ # define RSC_DELETE CRMD_ACTION_DELETE # define RSC_CANCEL CRMD_ACTION_CANCEL # define RSC_MIGRATE CRMD_ACTION_MIGRATE # define RSC_MIGRATED CRMD_ACTION_MIGRATED # define RSC_START CRMD_ACTION_START # define RSC_STARTED CRMD_ACTION_STARTED # define RSC_STOP CRMD_ACTION_STOP # define RSC_STOPPED CRMD_ACTION_STOPPED # define RSC_PROMOTE CRMD_ACTION_PROMOTE # define RSC_PROMOTED CRMD_ACTION_PROMOTED # define RSC_DEMOTE CRMD_ACTION_DEMOTE # define RSC_DEMOTED CRMD_ACTION_DEMOTED # define RSC_NOTIFY CRMD_ACTION_NOTIFY # define RSC_NOTIFIED CRMD_ACTION_NOTIFIED # define RSC_STATUS CRMD_ACTION_STATUS /* *INDENT-ON* */ typedef GList *GListPtr; # include # include # include # define crm_str_hash g_str_hash_traditional guint g_str_hash_traditional(gconstpointer v); #endif diff --git a/include/crm_internal.h b/include/crm_internal.h index 15b652650e..4cbf309ab8 100644 --- a/include/crm_internal.h +++ b/include/crm_internal.h @@ -1,162 +1,195 @@ /* crm_internal.h */ /* * Copyright (C) 2006 - 2008 * Andrew Beekhof * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef CRM_INTERNAL__H # define CRM_INTERNAL__H # include # include # include # include # include # include /* Dynamic loading of libraries */ void *find_library_function(void **handle, const char *lib, const char *fn, int fatal); void *convert_const_pointer(const void *ptr); /* For ACLs */ char *uid2username(uid_t uid); void determine_request_user(char *user, xmlNode * request, const char *field); # if ENABLE_ACL # include static inline gboolean is_privileged(const char *user) { if (user == NULL) { return FALSE; } else if (strcmp(user, CRM_DAEMON_USER) == 0) { return TRUE; } else if (strcmp(user, "root") == 0) { return TRUE; } return FALSE; } # endif /* CLI option processing*/ # ifdef HAVE_GETOPT_H # include # else # define no_argument 0 # define required_argument 1 # endif # define pcmk_option_default 0x00000 # define pcmk_option_hidden 0x00001 # define pcmk_option_paragraph 0x00002 # define pcmk_option_example 0x00004 struct crm_option { /* Fields from 'struct option' in getopt.h */ /* name of long option */ const char *name; /* * one of no_argument, required_argument, and optional_argument: * whether option takes an argument */ int has_arg; /* if not NULL, set *flag to val when option found */ int *flag; /* if flag not NULL, value to set *flag to; else return value */ int val; /* Custom fields */ const char *desc; long flags; }; void crm_set_options(const char *short_options, const char *usage, struct crm_option *long_options, const char *app_desc); int crm_get_option(int argc, char **argv, int *index); int crm_get_option_long(int argc, char **argv, int *index, const char **longname); void crm_help(char cmd, int exit_code); /* Cluster Option Processing */ typedef struct pe_cluster_option_s { const char *name; const char *alt_name; const char *type; const char *values; const char *default_value; gboolean(*is_valid) (const char *); const char *description_short; const char *description_long; } pe_cluster_option; const char *cluster_option(GHashTable * options, gboolean(*validate) (const char *), const char *name, const char *old_name, const char *def_value); const char *get_cluster_pref(GHashTable * options, pe_cluster_option * option_list, int len, const char *name); void config_metadata(const char *name, const char *version, const char *desc_short, const char *desc_long, pe_cluster_option * option_list, int len); void verify_all_options(GHashTable * options, pe_cluster_option * option_list, int len); gboolean check_time(const char *value); gboolean check_timer(const char *value); gboolean check_boolean(const char *value); gboolean check_number(const char *value); /* Shared PE/crmd functionality */ void filter_action_parameters(xmlNode * param_set, const char *version); void filter_reload_parameters(xmlNode * param_set, const char *restart_string); /* Resource operation updates */ xmlNode *create_operation_update(xmlNode * parent, lrmd_event_data_t *event, const char *caller_version, int target_rc, const char *origin, int level); /* char2score */ extern int node_score_red; extern int node_score_green; extern int node_score_yellow; extern int node_score_infinity; /* Assorted convenience functions */ static inline int crm_strlen_zero(const char *s) { return !s || *s == '\0'; } char *generate_series_filename(const char *directory, const char *series, int sequence, gboolean bzip); int get_last_sequence(const char *directory, const char *series); void write_last_sequence(const char *directory, const char *series, int sequence, int max); void crm_make_daemon(const char *name, gboolean daemonize, const char *pidfile); gboolean crm_is_writable(const char *dir, const char *file, const char *user, const char *group, gboolean need_both); long long crm_int_helper(const char *text, char **end_text); char *crm_concat(const char *prefix, const char *suffix, char join); char *generate_hash_key(const char *crm_msg_reference, const char *sys); xmlNode *crm_recv_remote_msg(void *session, gboolean encrypted); void crm_send_remote_msg(void *session, xmlNode * msg, gboolean encrypted); # define crm_config_err(fmt...) { crm_config_error = TRUE; crm_err(fmt); } # define crm_config_warn(fmt...) { crm_config_warning = TRUE; crm_warn(fmt); } +# if SUPPORT_COROSYNC +# if CS_USES_LIBQB +# include +# include +typedef struct qb_ipc_request_header cs_ipc_header_request_t; +typedef struct qb_ipc_response_header cs_ipc_header_response_t; +# else +# include +# include +# include +static inline int +qb_to_cs_error(int a) +{ + return a; +} + +typedef coroipc_request_header_t cs_ipc_header_request_t; +typedef coroipc_response_header_t cs_ipc_header_response_t; +# endif +# else +typedef struct { + int size __attribute__ ((aligned(8))); + int id __attribute__ ((aligned(8))); +} __attribute__ ((aligned(8))) cs_ipc_header_request_t; + +typedef struct { + int size __attribute__ ((aligned(8))); + int id __attribute__ ((aligned(8))); + int error __attribute__ ((aligned(8))); +} __attribute__ ((aligned(8))) cs_ipc_header_response_t; + +# endif + #endif /* CRM_INTERNAL__H */ diff --git a/lib/ais/plugin.c b/lib/ais/plugin.c index 6babf2532d..9ec2ac2715 100644 --- a/lib/ais/plugin.c +++ b/lib/ais/plugin.c @@ -1,1756 +1,1755 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USAA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include #include "plugin.h" #include "utils.h" #include #include #include #include #include #include #include #include #include struct corosync_api_v1 *pcmk_api = NULL; uint32_t plugin_has_votes = 0; uint32_t plugin_expected_votes = 2; int use_mgmtd = 0; int plugin_log_level = LOG_DEBUG; char *local_uname = NULL; int local_uname_len = 0; char *local_cname = NULL; int local_cname_len = 0; uint32_t local_nodeid = 0; char *ipc_channel_name = NULL; static uint64_t local_born_on = 0; uint64_t membership_seq = 0; pthread_t pcmk_wait_thread; gboolean use_mcp = FALSE; gboolean wait_active = TRUE; gboolean have_reliable_membership_id = FALSE; GHashTable *ipc_client_list = NULL; GHashTable *membership_list = NULL; GHashTable *membership_notify_list = NULL; #define MAX_RESPAWN 100 #define LOOPBACK_ID 16777343 #define crm_flag_none 0x00000000 #define crm_flag_members 0x00000001 struct crm_identify_msg_s { cs_ipc_header_request_t header __attribute__ ((aligned(8))); uint32_t id; uint32_t pid; int32_t votes; uint32_t processes; char uname[256]; char version[256]; uint64_t born_on; } __attribute__ ((packed)); /* *INDENT-OFF* */ static crm_child_t pcmk_children[] = { { 0, crm_proc_none, crm_flag_none, 0, 0, FALSE, "none", NULL, NULL, NULL, NULL }, { 0, crm_proc_plugin, crm_flag_none, 0, 0, FALSE, "ais", NULL, NULL, NULL, NULL }, { 0, crm_proc_lrmd, crm_flag_none, 3, 0, TRUE, "lrmd", NULL, CRM_DAEMON_DIR"/lrmd", NULL, NULL }, { 0, crm_proc_cib, crm_flag_members, 1, 0, TRUE, "cib", CRM_DAEMON_USER, CRM_DAEMON_DIR"/cib", NULL, NULL }, { 0, crm_proc_crmd, crm_flag_members, 6, 0, TRUE, "crmd", CRM_DAEMON_USER, CRM_DAEMON_DIR"/crmd", NULL, NULL }, { 0, crm_proc_attrd, crm_flag_none, 4, 0, TRUE, "attrd", CRM_DAEMON_USER, CRM_DAEMON_DIR"/attrd", NULL, NULL }, { 0, crm_proc_stonithd, crm_flag_none, 0, 0, TRUE, "stonithd", NULL, "/bin/false", NULL, NULL }, { 0, crm_proc_pe, crm_flag_none, 5, 0, TRUE, "pengine", CRM_DAEMON_USER, CRM_DAEMON_DIR"/pengine", NULL, NULL }, { 0, crm_proc_mgmtd, crm_flag_none, 7, 0, TRUE, "mgmtd", NULL, HB_DAEMON_DIR"/mgmtd", NULL, NULL }, { 0, crm_proc_stonith_ng, crm_flag_none, 2, 0, TRUE, "stonith-ng", NULL, CRM_DAEMON_DIR"/stonithd", NULL, NULL }, }; /* *INDENT-ON* */ void send_cluster_id(void); int send_cluster_msg_raw(const AIS_Message * ais_msg); char *pcmk_generate_membership_data(void); gboolean check_message_sanity(const AIS_Message * msg, const char *data); typedef const void ais_void_ptr; int pcmk_shutdown(void); void pcmk_peer_update(enum totem_configuration_type configuration_type, const unsigned int *member_list, size_t member_list_entries, const unsigned int *left_list, size_t left_list_entries, const unsigned int *joined_list, size_t joined_list_entries, const struct memb_ring_id *ring_id); int pcmk_startup(struct corosync_api_v1 *corosync_api); int pcmk_config_init(struct corosync_api_v1 *corosync_api); int pcmk_ipc_exit(void *conn); int pcmk_ipc_connect(void *conn); void pcmk_ipc(void *conn, ais_void_ptr * msg); void pcmk_exec_dump(void); void pcmk_cluster_swab(void *msg); void pcmk_cluster_callback(ais_void_ptr * message, unsigned int nodeid); void pcmk_nodeid(void *conn, ais_void_ptr * msg); void pcmk_nodes(void *conn, ais_void_ptr * msg); void pcmk_notify(void *conn, ais_void_ptr * msg); void pcmk_remove_member(void *conn, ais_void_ptr * msg); void pcmk_quorum(void *conn, ais_void_ptr * msg); void pcmk_cluster_id_swab(void *msg); void pcmk_cluster_id_callback(ais_void_ptr * message, unsigned int nodeid); void ais_remove_peer(char *node_id); static uint32_t get_process_list(void) { int lpc = 0; uint32_t procs = crm_proc_plugin; if (use_mcp) { return 0; } for (lpc = 0; lpc < SIZEOF(pcmk_children); lpc++) { if (pcmk_children[lpc].pid != 0) { procs |= pcmk_children[lpc].flag; } } return procs; } static struct corosync_lib_handler pcmk_lib_service[] = { { /* 0 */ .lib_handler_fn = pcmk_ipc, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED, }, { /* 1 */ .lib_handler_fn = pcmk_nodes, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED, }, { /* 2 */ .lib_handler_fn = pcmk_notify, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED, }, { /* 3 */ .lib_handler_fn = pcmk_nodeid, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED, }, { /* 4 */ .lib_handler_fn = pcmk_remove_member, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED, }, { /* 5 */ .lib_handler_fn = pcmk_quorum, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED, }, }; static struct corosync_exec_handler pcmk_exec_service[] = { { /* 0 */ .exec_handler_fn = pcmk_cluster_callback, .exec_endian_convert_fn = pcmk_cluster_swab}, { /* 1 */ .exec_handler_fn = pcmk_cluster_id_callback, .exec_endian_convert_fn = pcmk_cluster_id_swab} }; /* * Exports the interface for the service */ /* *INDENT-OFF* */ struct corosync_service_engine pcmk_service_handler = { .name = (char *)"Pacemaker Cluster Manager "PACKAGE_VERSION, .id = PCMK_SERVICE_ID, .private_data_size = 0, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED, .allow_inquorate = CS_LIB_ALLOW_INQUORATE, .lib_init_fn = pcmk_ipc_connect, .lib_exit_fn = pcmk_ipc_exit, .exec_init_fn = pcmk_startup, .exec_exit_fn = pcmk_shutdown, .config_init_fn = pcmk_config_init, .priority = 50, .lib_engine = pcmk_lib_service, .lib_engine_count = sizeof (pcmk_lib_service) / sizeof (struct corosync_lib_handler), .exec_engine = pcmk_exec_service, .exec_engine_count = sizeof (pcmk_exec_service) / sizeof (struct corosync_exec_handler), .confchg_fn = pcmk_peer_update, .exec_dump_fn = pcmk_exec_dump, /* void (*sync_init) (void); */ /* int (*sync_process) (void); */ /* void (*sync_activate) (void); */ /* void (*sync_abort) (void); */ }; /* * Dynamic Loader definition */ struct corosync_service_engine *pcmk_get_handler_ver0 (void); struct corosync_service_engine_iface_ver0 pcmk_service_handler_iface = { .corosync_get_service_engine_ver0 = pcmk_get_handler_ver0 }; static struct lcr_iface openais_pcmk_ver0[2] = { { .name = "pacemaker", .version = 0, .versions_replace = 0, .versions_replace_count = 0, .dependencies = 0, .dependency_count = 0, .constructor = NULL, .destructor = NULL, .interfaces = NULL }, { .name = "pacemaker", .version = 1, .versions_replace = 0, .versions_replace_count = 0, .dependencies = 0, .dependency_count = 0, .constructor = NULL, .destructor = NULL, .interfaces = NULL } }; static struct lcr_comp pcmk_comp_ver0 = { .iface_count = 2, .ifaces = openais_pcmk_ver0 }; /* *INDENT-ON* */ struct corosync_service_engine * pcmk_get_handler_ver0(void) { return (&pcmk_service_handler); } __attribute__ ((constructor)) static void register_this_component(void) { lcr_interfaces_set(&openais_pcmk_ver0[0], &pcmk_service_handler_iface); lcr_interfaces_set(&openais_pcmk_ver0[1], &pcmk_service_handler_iface); lcr_component_register(&pcmk_comp_ver0); } static int plugin_has_quorum(void) { if ((plugin_expected_votes >> 1) < plugin_has_votes) { return 1; } return 0; } static void update_expected_votes(int value) { if (value < plugin_has_votes) { /* Never drop below the number of connected nodes */ ais_info("Cannot update expected quorum votes %d -> %d:" " value cannot be less that the current number of votes", plugin_expected_votes, value); } else if (plugin_expected_votes != value) { ais_info("Expected quorum votes %d -> %d", plugin_expected_votes, value); plugin_expected_votes = value; } } /* Create our own local copy of the config so we can navigate it */ static void process_ais_conf(void) { char *value = NULL; gboolean any_log = FALSE; hdb_handle_t top_handle = 0; hdb_handle_t local_handle = 0; ais_info("Reading configure"); top_handle = config_find_init(pcmk_api, "logging"); local_handle = config_find_next(pcmk_api, "logging", top_handle); get_config_opt(pcmk_api, local_handle, "debug", &value, "on"); if (ais_get_boolean(value)) { plugin_log_level = LOG_DEBUG; pcmk_env.debug = "1"; } else { plugin_log_level = LOG_INFO; pcmk_env.debug = "0"; } get_config_opt(pcmk_api, local_handle, "to_logfile", &value, "off"); if (ais_get_boolean(value)) { get_config_opt(pcmk_api, local_handle, "logfile", &value, NULL); if (value == NULL) { ais_err("Logging to a file requested but no log file specified"); } else { uid_t pcmk_uid = geteuid(); uid_t pcmk_gid = getegid(); FILE *logfile = fopen(value, "a"); if (logfile) { int ignore = 0; int logfd = fileno(logfile); pcmk_env.logfile = value; /* Ensure the file has the correct permissions */ ignore = fchown(logfd, pcmk_uid, pcmk_gid); ignore = fchmod(logfd, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); fprintf(logfile, "Set r/w permissions for uid=%d, gid=%d on %s\n", pcmk_uid, pcmk_gid, value); fflush(logfile); fsync(logfd); fclose(logfile); any_log = TRUE; } else { ais_err("Couldn't create logfile: %s", value); } } } get_config_opt(pcmk_api, local_handle, "to_syslog", &value, "on"); if (any_log && ais_get_boolean(value) == FALSE) { ais_info("User configured file based logging and explicitly disabled syslog."); value = "none"; } else { if (ais_get_boolean(value) == FALSE) { ais_err ("Please enable some sort of logging, either 'to_file: on' or 'to_syslog: on'."); ais_err("If you use file logging, be sure to also define a value for 'logfile'"); } get_config_opt(pcmk_api, local_handle, "syslog_facility", &value, "daemon"); } pcmk_env.syslog = value; config_find_done(pcmk_api, local_handle); top_handle = config_find_init(pcmk_api, "quorum"); local_handle = config_find_next(pcmk_api, "quorum", top_handle); get_config_opt(pcmk_api, local_handle, "provider", &value, NULL); if (value && ais_str_eq("quorum_cman", value)) { pcmk_env.quorum = "cman"; } else { pcmk_env.quorum = "pcmk"; } top_handle = config_find_init(pcmk_api, "service"); local_handle = config_find_next(pcmk_api, "service", top_handle); while (local_handle) { value = NULL; pcmk_api->object_key_get(local_handle, "name", strlen("name"), (void **)&value, NULL); if (ais_str_eq("pacemaker", value)) { break; } local_handle = config_find_next(pcmk_api, "service", top_handle); } get_config_opt(pcmk_api, local_handle, "ver", &value, "0"); if (ais_str_eq(value, "1")) { ais_info("Enabling MCP mode: Use the Pacemaker init script to complete Pacemaker startup"); use_mcp = TRUE; } get_config_opt(pcmk_api, local_handle, "clustername", &local_cname, "pcmk"); local_cname_len = strlen(local_cname); get_config_opt(pcmk_api, local_handle, "use_logd", &value, "no"); pcmk_env.use_logd = value; get_config_opt(pcmk_api, local_handle, "use_mgmtd", &value, "no"); if (ais_get_boolean(value) == FALSE) { int lpc = 0; for (; lpc < SIZEOF(pcmk_children); lpc++) { if (crm_proc_mgmtd & pcmk_children[lpc].flag) { /* Disable mgmtd startup */ pcmk_children[lpc].start_seq = 0; break; } } } config_find_done(pcmk_api, local_handle); } int pcmk_config_init(struct corosync_api_v1 *unused) { return 0; } static void * pcmk_wait_dispatch(void *arg) { struct timespec waitsleep = { .tv_sec = 1, .tv_nsec = 0 }; while (wait_active) { int lpc = 0; for (; lpc < SIZEOF(pcmk_children); lpc++) { if (pcmk_children[lpc].pid > 0) { int status; pid_t pid = wait4(pcmk_children[lpc].pid, &status, WNOHANG, NULL); if (pid == 0) { continue; } else if (pid < 0) { ais_perror("Call to wait4(%s) failed", pcmk_children[lpc].name); continue; } /* cleanup */ pcmk_children[lpc].pid = 0; pcmk_children[lpc].conn = NULL; pcmk_children[lpc].async_conn = NULL; if (WIFSIGNALED(status)) { int sig = WTERMSIG(status); ais_err("Child process %s terminated with signal %d" " (pid=%d, core=%s)", pcmk_children[lpc].name, sig, pid, WCOREDUMP(status) ? "true" : "false"); } else if (WIFEXITED(status)) { int rc = WEXITSTATUS(status); do_ais_log(rc == 0 ? LOG_NOTICE : LOG_ERR, "Child process %s exited (pid=%d, rc=%d)", pcmk_children[lpc].name, pid, rc); if (rc == 100) { ais_notice("Child process %s no longer wishes" " to be respawned", pcmk_children[lpc].name); pcmk_children[lpc].respawn = FALSE; } } /* Broadcast the fact that one of our processes died * * Try to get some logging of the cause out first though * because we're probably about to get fenced * * Potentially do this only if respawn_count > N * to allow for local recovery */ send_cluster_id(); pcmk_children[lpc].respawn_count += 1; if (pcmk_children[lpc].respawn_count > MAX_RESPAWN) { ais_err("Child respawn count exceeded by %s", pcmk_children[lpc].name); pcmk_children[lpc].respawn = FALSE; } if (pcmk_children[lpc].respawn) { ais_notice("Respawning failed child process: %s", pcmk_children[lpc].name); spawn_child(&(pcmk_children[lpc])); } send_cluster_id(); } } sched_yield(); nanosleep(&waitsleep, 0); } return 0; } static uint32_t pcmk_update_nodeid(void) { int last = local_nodeid; local_nodeid = pcmk_api->totem_nodeid_get(); if (last != local_nodeid) { if (last == 0) { ais_info("Local node id: %u", local_nodeid); } else { char *last_s = NULL; ais_malloc0(last_s, 32); ais_warn("Detected local node id change: %u -> %u", last, local_nodeid); snprintf(last_s, 31, "%u", last); ais_remove_peer(last_s); ais_free(last_s); } update_member(local_nodeid, 0, 0, 1, 0, local_uname, CRM_NODE_MEMBER, NULL); } return local_nodeid; } static void build_path(const char *path_c, mode_t mode) { int offset = 1, len = 0; char *path = ais_strdup(path_c); AIS_CHECK(path != NULL, return); for (len = strlen(path); offset < len; offset++) { if (path[offset] == '/') { path[offset] = 0; if (mkdir(path, mode) < 0 && errno != EEXIST) { ais_perror("Could not create directory '%s'", path); break; } path[offset] = '/'; } } if (mkdir(path, mode) < 0 && errno != EEXIST) { ais_perror("Could not create directory '%s'", path); } ais_free(path); } int pcmk_startup(struct corosync_api_v1 *init_with) { int rc = 0; int lpc = 0; int start_seq = 1; struct utsname us; struct rlimit cores; static int max = SIZEOF(pcmk_children); uid_t pcmk_uid = 0; gid_t pcmk_gid = 0; uid_t root_uid = -1; uid_t cs_uid = geteuid(); pcmk_user_lookup("root", &root_uid, NULL); pcmk_api = init_with; pcmk_env.debug = "0"; pcmk_env.logfile = NULL; pcmk_env.use_logd = "false"; pcmk_env.syslog = "daemon"; if (cs_uid != root_uid) { ais_err("Corosync must be configured to start as 'root'," " otherwise Pacemaker cannot manage services." " Expected %d got %d", root_uid, cs_uid); return -1; } process_ais_conf(); membership_list = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, destroy_ais_node); membership_notify_list = g_hash_table_new(g_direct_hash, g_direct_equal); ipc_client_list = g_hash_table_new(g_direct_hash, g_direct_equal); ais_info("CRM: Initialized"); log_printf(LOG_INFO, "Logging: Initialized %s\n", __PRETTY_FUNCTION__); rc = getrlimit(RLIMIT_CORE, &cores); if (rc < 0) { ais_perror("Cannot determine current maximum core size."); } else { if (cores.rlim_max == 0 && geteuid() == 0) { cores.rlim_max = RLIM_INFINITY; } else { ais_info("Maximum core file size is: %lu", cores.rlim_max); } cores.rlim_cur = cores.rlim_max; rc = setrlimit(RLIMIT_CORE, &cores); if (rc < 0) { ais_perror("Core file generation will remain disabled." " Core files are an important diagnositic tool," " please consider enabling them by default."); } #if 0 /* system() is not thread-safe, can't call from here * Actually, its a pretty hacky way to try and achieve this anyway */ if (system("echo 1 > /proc/sys/kernel/core_uses_pid") != 0) { ais_perror("Could not enable /proc/sys/kernel/core_uses_pid"); } #endif } if (pcmk_user_lookup(CRM_DAEMON_USER, &pcmk_uid, &pcmk_gid) < 0) { ais_err("Cluster user %s does not exist, aborting Pacemaker startup", CRM_DAEMON_USER); return TRUE; } rc = mkdir(CRM_STATE_DIR, 0750); rc = chown(CRM_STATE_DIR, pcmk_uid, pcmk_gid); /* Used by stonithd */ build_path(HA_STATE_DIR "/heartbeat", 0755); /* Used by RAs - Leave owned by root */ build_path(CRM_RSCTMP_DIR, 0755); rc = uname(&us); AIS_ASSERT(rc == 0); local_uname = ais_strdup(us.nodename); local_uname_len = strlen(local_uname); ais_info("Service: %d", PCMK_SERVICE_ID); ais_info("Local hostname: %s", local_uname); pcmk_update_nodeid(); if (use_mcp == FALSE) { pthread_create(&pcmk_wait_thread, NULL, pcmk_wait_dispatch, NULL); for (start_seq = 1; start_seq < max; start_seq++) { /* dont start anything with start_seq < 1 */ for (lpc = 0; lpc < max; lpc++) { if (start_seq == pcmk_children[lpc].start_seq) { spawn_child(&(pcmk_children[lpc])); } } } } return 0; } /* static void ais_print_node(const char *prefix, struct totem_ip_address *host) { int len = 0; char *buffer = NULL; ais_malloc0(buffer, INET6_ADDRSTRLEN+1); inet_ntop(host->family, host->addr, buffer, INET6_ADDRSTRLEN); len = strlen(buffer); ais_info("%s: %.*s", prefix, len, buffer); ais_free(buffer); } */ #if 0 /* copied here for reference from exec/totempg.c */ char * totempg_ifaces_print(unsigned int nodeid) { static char iface_string[256 * INTERFACE_MAX]; char one_iface[64]; struct totem_ip_address interfaces[INTERFACE_MAX]; char **status; unsigned int iface_count; unsigned int i; int res; iface_string[0] = '\0'; res = totempg_ifaces_get(nodeid, interfaces, &status, &iface_count); if (res == -1) { return ("no interface found for nodeid"); } for (i = 0; i < iface_count; i++) { sprintf(one_iface, "r(%d) ip(%s), ", i, totemip_print(&interfaces[i])); strcat(iface_string, one_iface); } return (iface_string); } #endif static void ais_mark_unseen_peer_dead(gpointer key, gpointer value, gpointer user_data) { int *changed = user_data; crm_node_t *node = value; if (node->last_seen != membership_seq && ais_str_eq(CRM_NODE_LOST, node->state) == FALSE) { ais_info("Node %s was not seen in the previous transition", node->uname); *changed += update_member(node->id, 0, membership_seq, node->votes, node->processes, node->uname, CRM_NODE_LOST, NULL); } } void pcmk_peer_update(enum totem_configuration_type configuration_type, const unsigned int *member_list, size_t member_list_entries, const unsigned int *left_list, size_t left_list_entries, const unsigned int *joined_list, size_t joined_list_entries, const struct memb_ring_id *ring_id) { int lpc = 0; int changed = 0; int do_update = 0; AIS_ASSERT(ring_id != NULL); switch (configuration_type) { case TOTEM_CONFIGURATION_REGULAR: do_update = 1; break; case TOTEM_CONFIGURATION_TRANSITIONAL: break; } membership_seq = ring_id->seq; ais_notice("%s membership event on ring %lld: memb=%ld, new=%ld, lost=%ld", do_update ? "Stable" : "Transitional", ring_id->seq, (long)member_list_entries, (long)joined_list_entries, (long)left_list_entries); if (do_update == 0) { for (lpc = 0; lpc < joined_list_entries; lpc++) { const char *prefix = "new: "; uint32_t nodeid = joined_list[lpc]; ais_info("%s %s %u", prefix, member_uname(nodeid), nodeid); } for (lpc = 0; lpc < member_list_entries; lpc++) { const char *prefix = "memb:"; uint32_t nodeid = member_list[lpc]; ais_info("%s %s %u", prefix, member_uname(nodeid), nodeid); } for (lpc = 0; lpc < left_list_entries; lpc++) { const char *prefix = "lost:"; uint32_t nodeid = left_list[lpc]; ais_info("%s %s %u", prefix, member_uname(nodeid), nodeid); } return; } for (lpc = 0; lpc < joined_list_entries; lpc++) { const char *prefix = "NEW: "; uint32_t nodeid = joined_list[lpc]; crm_node_t *node = NULL; changed += update_member(nodeid, 0, membership_seq, -1, 0, NULL, CRM_NODE_MEMBER, NULL); ais_info("%s %s %u", prefix, member_uname(nodeid), nodeid); node = g_hash_table_lookup(membership_list, GUINT_TO_POINTER(nodeid)); if (node->addr == NULL) { const char *addr = totempg_ifaces_print(nodeid); node->addr = ais_strdup(addr); ais_debug("Node %u has address %s", nodeid, node->addr); } } for (lpc = 0; lpc < member_list_entries; lpc++) { const char *prefix = "MEMB:"; uint32_t nodeid = member_list[lpc]; changed += update_member(nodeid, 0, membership_seq, -1, 0, NULL, CRM_NODE_MEMBER, NULL); ais_info("%s %s %u", prefix, member_uname(nodeid), nodeid); } for (lpc = 0; lpc < left_list_entries; lpc++) { const char *prefix = "LOST:"; uint32_t nodeid = left_list[lpc]; changed += update_member(nodeid, 0, membership_seq, -1, 0, NULL, CRM_NODE_LOST, NULL); ais_info("%s %s %u", prefix, member_uname(nodeid), nodeid); } if (changed && joined_list_entries == 0 && left_list_entries == 0) { ais_err("Something strange happened: %d", changed); changed = 0; } ais_trace("Reaping unseen nodes..."); g_hash_table_foreach(membership_list, ais_mark_unseen_peer_dead, &changed); if (member_list_entries > 1) { /* Used to set born-on in send_cluster_id()) * We need to wait until we have at least one peer since first * membership id is based on the one before we stopped and isn't reliable */ have_reliable_membership_id = TRUE; } if (changed) { ais_debug("%d nodes changed", changed); pcmk_update_nodeid(); send_member_notification(); } send_cluster_id(); } int pcmk_ipc_exit(void *conn) { int lpc = 0; const char *client = NULL; void *async_conn = conn; for (; lpc < SIZEOF(pcmk_children); lpc++) { if (pcmk_children[lpc].conn == conn) { if (wait_active == FALSE) { /* Make sure the shutdown loop exits */ pcmk_children[lpc].pid = 0; } pcmk_children[lpc].conn = NULL; pcmk_children[lpc].async_conn = NULL; client = pcmk_children[lpc].name; break; } } g_hash_table_remove(membership_notify_list, async_conn); g_hash_table_remove(ipc_client_list, async_conn); if (client) { do_ais_log(LOG_INFO, "Client %s (conn=%p, async-conn=%p) left", client, conn, async_conn); } else { do_ais_log((LOG_DEBUG + 1), "Client %s (conn=%p, async-conn=%p) left", "unknown-transient", conn, async_conn); } return (0); } int pcmk_ipc_connect(void *conn) { /* OpenAIS hasn't finished setting up the connection at this point * Sending messages now messes up the protocol! */ return (0); } /* * Executive message handlers */ void pcmk_cluster_swab(void *msg) { AIS_Message *ais_msg = msg; ais_trace("Performing endian conversion..."); ais_msg->id = swab32(ais_msg->id); ais_msg->size = swab32(ais_msg->size); ais_msg->is_compressed = swab32(ais_msg->is_compressed); ais_msg->compressed_size = swab32(ais_msg->compressed_size); ais_msg->host.id = swab32(ais_msg->host.id); ais_msg->host.pid = swab32(ais_msg->host.pid); ais_msg->host.type = swab32(ais_msg->host.type); ais_msg->host.size = swab32(ais_msg->host.size); ais_msg->host.local = swab32(ais_msg->host.local); ais_msg->sender.id = swab32(ais_msg->sender.id); ais_msg->sender.pid = swab32(ais_msg->sender.pid); ais_msg->sender.type = swab32(ais_msg->sender.type); ais_msg->sender.size = swab32(ais_msg->sender.size); ais_msg->sender.local = swab32(ais_msg->sender.local); ais_msg->header.size = swab32(ais_msg->header.size); ais_msg->header.id = swab32(ais_msg->header.id); ais_msg->header.error = swab32(ais_msg->header.error); } void pcmk_cluster_callback(ais_void_ptr * message, unsigned int nodeid) { const AIS_Message *ais_msg = message; ais_trace("Message from node %u (%s)", nodeid, nodeid == local_nodeid ? "local" : "remote"); /* Shouldn't be required... update_member( ais_msg->sender.id, membership_seq, -1, 0, ais_msg->sender.uname, NULL); */ if (ais_msg->host.size == 0 || ais_str_eq(ais_msg->host.uname, local_uname)) { route_ais_message(ais_msg, FALSE); } else { ais_trace("Discarding Msg[%d] (dest=%s:%s, from=%s:%s)", ais_msg->id, ais_dest(&(ais_msg->host)), msg_type2text(ais_msg->host.type), ais_dest(&(ais_msg->sender)), msg_type2text(ais_msg->sender.type)); } } void pcmk_cluster_id_swab(void *msg) { struct crm_identify_msg_s *ais_msg = msg; ais_trace("Performing endian conversion..."); ais_msg->id = swab32(ais_msg->id); ais_msg->pid = swab32(ais_msg->pid); ais_msg->votes = swab32(ais_msg->votes); ais_msg->processes = swab32(ais_msg->processes); ais_msg->born_on = swab64(ais_msg->born_on); ais_msg->header.size = swab32(ais_msg->header.size); ais_msg->header.id = swab32(ais_msg->header.id); } void pcmk_cluster_id_callback(ais_void_ptr * message, unsigned int nodeid) { int changed = 0; const struct crm_identify_msg_s *msg = message; if (nodeid != msg->id) { ais_err("Invalid message: Node %u claimed to be node %d", nodeid, msg->id); return; } ais_debug("Node update: %s (%s)", msg->uname, msg->version); changed = update_member(nodeid, msg->born_on, membership_seq, msg->votes, msg->processes, msg->uname, NULL, msg->version); if (changed) { send_member_notification(); } } struct res_overlay { cs_ipc_header_response_t header __attribute((aligned(8))); char buf[4096]; }; struct res_overlay *res_overlay = NULL; static void send_ipc_ack(void *conn) { if (res_overlay == NULL) { ais_malloc0(res_overlay, sizeof(struct res_overlay)); } res_overlay->header.id = CRM_MESSAGE_IPC_ACK; res_overlay->header.size = sizeof(cs_ipc_header_response_t); res_overlay->header.error = CS_OK; pcmk_api->ipc_response_send(conn, res_overlay, res_overlay->header.size); } /* local callbacks */ void pcmk_ipc(void *conn, ais_void_ptr * msg) { AIS_Message *mutable; int type = 0, size = 0; gboolean transient = TRUE; const AIS_Message *ais_msg = (const AIS_Message *)msg; void *async_conn = conn; ais_trace("Message from client %p", conn); if (check_message_sanity(msg, ((const AIS_Message *)msg)->data) == FALSE) { /* The message is corrupted - ignore */ send_ipc_ack(conn); msg = NULL; return; } /* Make a copy of the message here and ACK it * The message is only valid until a response is sent * but the response must also be sent _before_ we send anything else */ mutable = ais_msg_copy(ais_msg); AIS_ASSERT(check_message_sanity(mutable, mutable->data)); size = mutable->header.size; /* ais_malloc0(ais_msg, size); */ /* memcpy(ais_msg, msg, size); */ type = mutable->sender.type; ais_trace ("type: %d local: %d conn: %p host type: %d ais: %d sender pid: %d child pid: %d size: %d", type, mutable->host.local, pcmk_children[type].conn, mutable->host.type, crm_msg_ais, mutable->sender.pid, pcmk_children[type].pid, ((int)SIZEOF(pcmk_children))); if (type > crm_msg_none && type < SIZEOF(pcmk_children)) { /* known child process */ transient = FALSE; } #if 0 /* If this check fails, the order of pcmk_children probably * doesn't match that of the crm_ais_msg_types enum */ AIS_CHECK(transient || mutable->sender.pid == pcmk_children[type].pid, ais_err("Sender: %d, child[%d]: %d", mutable->sender.pid, type, pcmk_children[type].pid); ais_free(mutable); return); #endif if (transient == FALSE && type > crm_msg_none && mutable->host.local && pcmk_children[type].conn == NULL && mutable->host.type == crm_msg_ais) { AIS_CHECK(mutable->sender.type != mutable->sender.pid, ais_err("Pid=%d, type=%d", mutable->sender.pid, mutable->sender.type)); ais_info("Recorded connection %p for %s/%d", conn, pcmk_children[type].name, pcmk_children[type].pid); pcmk_children[type].conn = conn; pcmk_children[type].async_conn = async_conn; /* Make sure they have the latest membership */ if (pcmk_children[type].flags & crm_flag_members) { char *update = pcmk_generate_membership_data(); g_hash_table_replace(membership_notify_list, async_conn, async_conn); ais_info("Sending membership update " U64T " to %s", membership_seq, pcmk_children[type].name); send_client_msg(async_conn, crm_class_members, crm_msg_none, update); } } else if (transient) { AIS_CHECK(mutable->sender.type == mutable->sender.pid, ais_err("Pid=%d, type=%d", mutable->sender.pid, mutable->sender.type)); g_hash_table_replace(ipc_client_list, async_conn, GUINT_TO_POINTER(mutable->sender.pid)); } mutable->sender.id = local_nodeid; mutable->sender.size = local_uname_len; memset(mutable->sender.uname, 0, MAX_NAME); memcpy(mutable->sender.uname, local_uname, mutable->sender.size); route_ais_message(mutable, TRUE); send_ipc_ack(conn); msg = NULL; ais_free(mutable); } int pcmk_shutdown(void) { int lpc = 0; static int phase = 0; static int max_wait = 0; static time_t next_log = 0; static int max = SIZEOF(pcmk_children); if (use_mcp) { if (pcmk_children[crm_msg_crmd].conn || pcmk_children[crm_msg_stonith_ng].conn) { time_t now = time(NULL); if (now > next_log) { next_log = now + 300; ais_notice ("Preventing Corosync shutdown. Please ensure Pacemaker is stopped first."); } return -1; } ais_notice("Unloading Pacemaker plugin"); return 0; } if (phase == 0) { ais_notice("Shuting down Pacemaker"); phase = max; } wait_active = FALSE; /* stop the wait loop */ for (; phase > 0; phase--) { /* dont stop anything with start_seq < 1 */ for (lpc = max - 1; lpc >= 0; lpc--) { if (phase != pcmk_children[lpc].start_seq) { continue; } if (pcmk_children[lpc].pid) { pid_t pid = 0; int status = 0; time_t now = time(NULL); if (pcmk_children[lpc].respawn) { max_wait = 5; /* 5 * 30s = 2.5 minutes... plenty once the crmd is gone */ next_log = now + 30; pcmk_children[lpc].respawn = FALSE; stop_child(&(pcmk_children[lpc]), SIGTERM); } pid = wait4(pcmk_children[lpc].pid, &status, WNOHANG, NULL); if (pid < 0) { ais_perror("Call to wait4(%s/%d) failed - treating it as stopped", pcmk_children[lpc].name, pcmk_children[lpc].pid); } else if (pid == 0) { if (now >= next_log) { max_wait--; next_log = now + 30; ais_notice("Still waiting for %s (pid=%d, seq=%d) to terminate...", pcmk_children[lpc].name, pcmk_children[lpc].pid, pcmk_children[lpc].start_seq); if (max_wait <= 0 && phase < pcmk_children[crm_msg_crmd].start_seq) { ais_err("Child %s taking too long to terminate, sending SIGKILL", pcmk_children[lpc].name); stop_child(&(pcmk_children[lpc]), SIGKILL); } } /* Return control to corosync */ return -1; } } /* cleanup */ ais_notice("%s confirmed stopped", pcmk_children[lpc].name); pcmk_children[lpc].async_conn = NULL; pcmk_children[lpc].conn = NULL; pcmk_children[lpc].pid = 0; } } send_cluster_id(); ais_notice("Shutdown complete"); /* TODO: Add back the logsys flush call once its written */ return 0; } struct member_loop_data { char *string; }; static void member_vote_count_fn(gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; if (ais_str_eq(CRM_NODE_MEMBER, node->state)) { plugin_has_votes += node->votes; } } void member_loop_fn(gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; struct member_loop_data *data = user_data; ais_trace("Dumping node %u", node->id); data->string = append_member(data->string, node); } char * pcmk_generate_membership_data(void) { int size = 0; struct member_loop_data data; size = 256; ais_malloc0(data.string, size); /* Ensure the list of active processes is up-to-date */ update_member(local_nodeid, 0, 0, -1, get_process_list(), local_uname, CRM_NODE_MEMBER, NULL); plugin_has_votes = 0; g_hash_table_foreach(membership_list, member_vote_count_fn, NULL); if (plugin_has_votes > plugin_expected_votes) { update_expected_votes(plugin_has_votes); } snprintf(data.string, size, "", membership_seq, plugin_has_quorum()? "true" : "false", plugin_expected_votes, plugin_has_votes); g_hash_table_foreach(membership_list, member_loop_fn, &data); size = strlen(data.string); data.string = realloc(data.string, size + 9); /* 9 = + nul */ sprintf(data.string + size, ""); return data.string; } void pcmk_nodes(void *conn, ais_void_ptr * msg) { char *data = pcmk_generate_membership_data(); void *async_conn = conn; /* send the ACK before we send any other messages * - but after we no longer need to access the message */ send_ipc_ack(conn); msg = NULL; if (async_conn) { send_client_msg(async_conn, crm_class_members, crm_msg_none, data); } ais_free(data); } void pcmk_remove_member(void *conn, ais_void_ptr * msg) { const AIS_Message *ais_msg = msg; char *data = get_ais_data(ais_msg); send_ipc_ack(conn); msg = NULL; if (data != NULL) { char *bcast = ais_concat("remove-peer", data, ':'); send_cluster_msg(crm_msg_ais, NULL, bcast); ais_info("Sent: %s", bcast); ais_free(bcast); } ais_free(data); } static void send_quorum_details(void *conn) { int size = 256; char *data = NULL; ais_malloc0(data, size); snprintf(data, size, "", membership_seq, plugin_has_quorum()? "true" : "false", plugin_expected_votes, plugin_has_votes); send_client_msg(conn, crm_class_quorum, crm_msg_none, data); ais_free(data); } void pcmk_quorum(void *conn, ais_void_ptr * msg) { char *dummy = NULL; const AIS_Message *ais_msg = msg; char *data = get_ais_data(ais_msg); send_ipc_ack(conn); msg = NULL; /* Make sure the current number of votes is accurate */ dummy = pcmk_generate_membership_data(); ais_free(dummy); /* Calls without data just want the current quorum details */ if (data != NULL && strlen(data) > 0) { int value = ais_get_int(data, NULL); update_expected_votes(value); } send_quorum_details(conn); ais_free(data); } void pcmk_notify(void *conn, ais_void_ptr * msg) { const AIS_Message *ais_msg = msg; char *data = get_ais_data(ais_msg); void *async_conn = conn; int enable = 0; int sender = ais_msg->sender.pid; send_ipc_ack(conn); msg = NULL; if (ais_str_eq("true", data)) { enable = 1; } ais_info("%s node notifications for child %d (%p)", enable ? "Enabling" : "Disabling", sender, async_conn); if (enable) { g_hash_table_replace(membership_notify_list, async_conn, async_conn); } else { g_hash_table_remove(membership_notify_list, async_conn); } ais_free(data); } void pcmk_nodeid(void *conn, ais_void_ptr * msg) { static int counter = 0; struct crm_ais_nodeid_resp_s resp; ais_trace("Sending local nodeid: %d to %p[%d]", local_nodeid, conn, counter); resp.header.id = crm_class_nodeid; resp.header.size = sizeof(struct crm_ais_nodeid_resp_s); resp.header.error = CS_OK; resp.id = local_nodeid; resp.counter = counter++; memset(resp.uname, 0, MAX_NAME); memcpy(resp.uname, local_uname, local_uname_len); memset(resp.cname, 0, MAX_NAME); memcpy(resp.cname, local_cname, local_cname_len); pcmk_api->ipc_response_send(conn, &resp, resp.header.size); } static gboolean ghash_send_update(gpointer key, gpointer value, gpointer data) { if (send_client_msg(value, crm_class_members, crm_msg_none, data) != 0) { /* remove it */ return TRUE; } return FALSE; } void send_member_notification(void) { char *update = pcmk_generate_membership_data(); ais_info("Sending membership update " U64T " to %d children", membership_seq, g_hash_table_size(membership_notify_list)); g_hash_table_foreach_remove(membership_notify_list, ghash_send_update, update); ais_free(update); } gboolean check_message_sanity(const AIS_Message * msg, const char *data) { gboolean sane = TRUE; gboolean repaired = FALSE; int dest = msg->host.type; int tmp_size = msg->header.size - sizeof(AIS_Message); if (sane && msg->header.size == 0) { ais_err("Message with no size"); sane = FALSE; } if (sane && msg->header.error != CS_OK) { ais_err("Message header contains an error: %d", msg->header.error); sane = FALSE; } AIS_CHECK(msg->header.size > sizeof(AIS_Message), ais_err("Message %d size too small: %d < %zu", msg->header.id, msg->header.size, sizeof(AIS_Message)); return FALSE); if (sane && ais_data_len(msg) != tmp_size) { ais_warn("Message payload size is incorrect: expected %d, got %d", ais_data_len(msg), tmp_size); sane = TRUE; } if (sane && ais_data_len(msg) == 0) { ais_err("Message with no payload"); sane = FALSE; } if (sane && data && msg->is_compressed == FALSE) { int str_size = strlen(data) + 1; if (ais_data_len(msg) != str_size) { int lpc = 0; ais_err("Message payload is corrupted: expected %d bytes, got %d", ais_data_len(msg), str_size); sane = FALSE; for (lpc = (str_size - 10); lpc < msg->size; lpc++) { if (lpc < 0) { lpc = 0; } ais_trace("bad_data[%d]: %d / '%c'", lpc, data[lpc], data[lpc]); } } } if (sane == FALSE) { AIS_CHECK(sane, ais_err ("Invalid message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size)); } else if (repaired) { ais_err ("Repaired message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } else { ais_trace ("Verified message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } return sane; } static int delivered_transient = 0; static void deliver_transient_msg(gpointer key, gpointer value, gpointer user_data) { int pid = GPOINTER_TO_INT(value); AIS_Message *mutable = user_data; if (pid == mutable->host.type) { int rc = send_client_ipc(key, mutable); delivered_transient++; ais_info("Sent message to %s.%d (rc=%d)", ais_dest(&(mutable->host)), pid, rc); if (rc != 0) { ais_warn("Sending message to %s.%d failed (rc=%d)", ais_dest(&(mutable->host)), pid, rc); log_ais_message(LOG_DEBUG, mutable); } } } gboolean route_ais_message(const AIS_Message * msg, gboolean local_origin) { int rc = 0; int dest = msg->host.type; const char *reason = "unknown"; AIS_Message *mutable = ais_msg_copy(msg); static int service_id = SERVICE_ID_MAKE(PCMK_SERVICE_ID, 0); ais_trace("Msg[%d] (dest=%s:%s, from=%s:%s.%d, remote=%s, size=%d)", mutable->id, ais_dest(&(mutable->host)), msg_type2text(dest), ais_dest(&(mutable->sender)), msg_type2text(mutable->sender.type), mutable->sender.pid, local_origin ? "false" : "true", ais_data_len((mutable))); if (local_origin == FALSE) { if (mutable->host.size == 0 || ais_str_eq(local_uname, mutable->host.uname)) { mutable->host.local = TRUE; } } if (check_message_sanity(mutable, mutable->data) == FALSE) { /* Dont send this message to anyone */ rc = 1; goto bail; } if (mutable->host.local) { void *conn = NULL; const char *lookup = NULL; if (dest == crm_msg_ais) { process_ais_message(mutable); goto bail; } else if (dest == crm_msg_lrmd) { /* lrmd messages are routed via the crm */ dest = crm_msg_crmd; } else if (dest == crm_msg_te) { /* te messages are routed via the crm */ dest = crm_msg_crmd; } else if (dest >= SIZEOF(pcmk_children)) { /* Transient client */ delivered_transient = 0; g_hash_table_foreach(ipc_client_list, deliver_transient_msg, mutable); if (delivered_transient) { ais_trace("Sent message to %d transient clients: %d", delivered_transient, dest); goto bail; } else { /* try the crmd */ ais_trace("Sending message to transient client %d via crmd", dest); dest = crm_msg_crmd; } } else if (dest == 0) { ais_err("Invalid destination: %d", dest); log_ais_message(LOG_ERR, mutable); log_printf(LOG_ERR, "%s", get_ais_data(mutable)); rc = 1; goto bail; } lookup = msg_type2text(dest); conn = pcmk_children[dest].async_conn; /* the cluster fails in weird and wonderfully obscure ways when this is not true */ AIS_ASSERT(ais_str_eq(lookup, pcmk_children[dest].name)); if (mutable->header.id == service_id) { mutable->header.id = 0; /* reset this back to zero for IPC messages */ } else if (mutable->header.id != 0) { ais_err("reset header id back to zero from %d", mutable->header.id); mutable->header.id = 0; /* reset this back to zero for IPC messages */ } reason = "ipc delivery failed"; rc = send_client_ipc(conn, mutable); } else if (local_origin) { /* forward to other hosts */ ais_trace("Forwarding to cluster"); reason = "cluster delivery failed"; rc = send_cluster_msg_raw(mutable); } if (rc != 0) { ais_warn("Sending message to %s.%s failed: %s (rc=%d)", ais_dest(&(mutable->host)), msg_type2text(dest), reason, rc); log_ais_message(LOG_DEBUG, mutable); } bail: ais_free(mutable); return rc == 0 ? TRUE : FALSE; } int send_cluster_msg_raw(const AIS_Message * ais_msg) { int rc = 0; struct iovec iovec; static uint32_t msg_id = 0; AIS_Message *mutable = ais_msg_copy(ais_msg); AIS_ASSERT(local_nodeid != 0); AIS_ASSERT(ais_msg->header.size == (sizeof(AIS_Message) + ais_data_len(ais_msg))); if (mutable->id == 0) { msg_id++; AIS_CHECK(msg_id != 0 /* detect wrap-around */ , msg_id++; ais_err("Message ID wrapped around")); mutable->id = msg_id; } mutable->header.error = CS_OK; mutable->header.id = SERVICE_ID_MAKE(PCMK_SERVICE_ID, 0); mutable->sender.id = local_nodeid; mutable->sender.size = local_uname_len; memset(mutable->sender.uname, 0, MAX_NAME); memcpy(mutable->sender.uname, local_uname, mutable->sender.size); iovec.iov_base = (char *)mutable; iovec.iov_len = mutable->header.size; ais_trace("Sending message (size=%u)", (unsigned int)iovec.iov_len); rc = pcmk_api->totem_mcast(&iovec, 1, TOTEMPG_SAFE); if (rc == 0 && mutable->is_compressed == FALSE) { ais_trace("Message sent: %.80s", mutable->data); } AIS_CHECK(rc == 0, ais_err("Message not sent (%d): %.120s", rc, mutable->data)); ais_free(mutable); return rc; } #define min(x,y) (x)<(y)?(x):(y) void send_cluster_id(void) { int rc = 0; int len = 0; time_t now = time(NULL); struct iovec iovec; struct crm_identify_msg_s *msg = NULL; static time_t started = 0; static uint64_t first_seq = 0; AIS_ASSERT(local_nodeid != 0); if (started == 0) { started = now; first_seq = membership_seq; } if (local_born_on == 0) { if (started + 15 < now) { ais_debug("Born-on set to: " U64T " (age)", first_seq); local_born_on = first_seq; } else if (have_reliable_membership_id) { ais_debug("Born-on set to: " U64T " (peer)", membership_seq); local_born_on = membership_seq; } else { ais_debug("Leaving born-on unset: " U64T, membership_seq); } } ais_malloc0(msg, sizeof(struct crm_identify_msg_s)); msg->header.size = sizeof(struct crm_identify_msg_s); msg->id = local_nodeid; /* msg->header.error = CS_OK; */ msg->header.id = SERVICE_ID_MAKE(PCMK_SERVICE_ID, 1); len = min(local_uname_len, MAX_NAME - 1); memset(msg->uname, 0, MAX_NAME); memcpy(msg->uname, local_uname, len); len = min(strlen(VERSION), MAX_NAME - 1); memset(msg->version, 0, MAX_NAME); memcpy(msg->version, VERSION, len); msg->votes = 1; msg->pid = getpid(); msg->processes = get_process_list(); msg->born_on = local_born_on; ais_debug("Local update: id=%u, born=" U64T ", seq=" U64T "", local_nodeid, local_born_on, membership_seq); update_member(local_nodeid, local_born_on, membership_seq, msg->votes, msg->processes, NULL, NULL, VERSION); iovec.iov_base = (char *)msg; iovec.iov_len = msg->header.size; rc = pcmk_api->totem_mcast(&iovec, 1, TOTEMPG_SAFE); AIS_CHECK(rc == 0, ais_err("Message not sent (%d)", rc)); ais_free(msg); } static gboolean ghash_send_removal(gpointer key, gpointer value, gpointer data) { send_quorum_details(value); if (send_client_msg(value, crm_class_rmpeer, crm_msg_none, data) != 0) { /* remove it */ return TRUE; } return FALSE; } void ais_remove_peer(char *node_id) { uint32_t id = ais_get_int(node_id, NULL); crm_node_t *node = g_hash_table_lookup(membership_list, GUINT_TO_POINTER(id)); if (node == NULL) { ais_info("Peer %u is unknown", id); } else if (ais_str_eq(CRM_NODE_MEMBER, node->state)) { ais_warn("Peer %u/%s is still active", id, node->uname); } else if (g_hash_table_remove(membership_list, GUINT_TO_POINTER(id))) { plugin_expected_votes--; ais_notice("Removed dead peer %u from the membership list", id); ais_info("Sending removal of %u to %d children", id, g_hash_table_size(membership_notify_list)); g_hash_table_foreach_remove(membership_notify_list, ghash_send_removal, node_id); } else { ais_warn("Peer %u/%s was not removed", id, node->uname); } } gboolean process_ais_message(const AIS_Message * msg) { int len = ais_data_len(msg); char *data = get_ais_data(msg); do_ais_log(LOG_DEBUG, "Msg[%d] (dest=%s:%s, from=%s:%s.%d, remote=%s, size=%d): %.90s", msg->id, ais_dest(&(msg->host)), msg_type2text(msg->host.type), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->sender.uname == local_uname ? "false" : "true", ais_data_len(msg), data); if (data && len > 12 && strncmp("remove-peer:", data, 12) == 0) { char *node = data + 12; ais_remove_peer(node); } ais_free(data); return TRUE; } static void member_dump_fn(gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; ais_info(" node id:%u, uname=%s state=%s processes=%.16x born=" U64T " seen=" U64T " addr=%s version=%s", node->id, node->uname ? node->uname : "-unknown-", node->state, node->processes, node->born, node->last_seen, node->addr ? node->addr : "-unknown-", node->version ? node->version : "-unknown-"); } void pcmk_exec_dump(void) { /* Called after SIG_USR2 */ process_ais_conf(); ais_info("Local id: %u, uname: %s, born: " U64T, local_nodeid, local_uname, local_born_on); ais_info("Membership id: " U64T ", quorate: %s, expected: %u, actual: %u", membership_seq, plugin_has_quorum()? "true" : "false", plugin_expected_votes, plugin_has_votes); g_hash_table_foreach(membership_list, member_dump_fn, NULL); } diff --git a/lib/ais/utils.c b/lib/ais/utils.c index f635293e19..ab8f9565b3 100644 --- a/lib/ais/utils.c +++ b/lib/ais/utils.c @@ -1,752 +1,751 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include #include "./utils.h" #include "./plugin.h" struct pcmk_env_s pcmk_env; void log_ais_message(int level, const AIS_Message * msg) { char *data = get_ais_data(msg); qb_log_from_external_source(__func__, __FILE__, "Msg[%d] (dest=%s:%s, from=%s:%s.%d, remote=%s, size=%d): %.90s", level, __LINE__, 0, msg->id, ais_dest(&(msg->host)), msg_type2text(msg->host.type), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->sender.uname == local_uname ? "false" : "true", ais_data_len(msg), data); /* do_ais_log(level, */ /* "Msg[%d] (dest=%s:%s, from=%s:%s.%d, remote=%s, size=%d): %.90s", */ /* msg->id, ais_dest(&(msg->host)), msg_type2text(msg->host.type), */ /* ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), */ /* msg->sender.pid, */ /* msg->sender.uname==local_uname?"false":"true", */ /* ais_data_len(msg), data); */ ais_free(data); } /* static gboolean ghash_find_by_uname(gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; int id = GPOINTER_TO_INT(user_data); if (node->id == id) { return TRUE; } return FALSE; } */ static int ais_string_to_boolean(const char *s) { int rc = 0; if (s == NULL) { return rc; } if (strcasecmp(s, "true") == 0 || strcasecmp(s, "on") == 0 || strcasecmp(s, "yes") == 0 || strcasecmp(s, "y") == 0 || strcasecmp(s, "1") == 0) { rc = 1; } return rc; } static char *opts_default[] = { NULL, NULL }; static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL }; gboolean spawn_child(crm_child_t * child) { int lpc = 0; uid_t uid = 0; struct rlimit oflimits; gboolean use_valgrind = FALSE; gboolean use_callgrind = FALSE; const char *devnull = "/dev/null"; const char *env_valgrind = getenv("PCMK_valgrind_enabled"); const char *env_callgrind = getenv("PCMK_callgrind_enabled"); if (child->command == NULL) { ais_info("Nothing to do for child \"%s\"", child->name); return TRUE; } if (ais_string_to_boolean(env_callgrind)) { use_callgrind = TRUE; use_valgrind = TRUE; } else if (env_callgrind != NULL && strstr(env_callgrind, child->name)) { use_callgrind = TRUE; use_valgrind = TRUE; } else if (ais_string_to_boolean(env_valgrind)) { use_valgrind = TRUE; } else if (env_valgrind != NULL && strstr(env_valgrind, child->name)) { use_valgrind = TRUE; } if (use_valgrind && strlen(VALGRIND_BIN) == 0) { ais_warn("Cannot enable valgrind for %s:" " The location of the valgrind binary is unknown", child->name); use_valgrind = FALSE; } if (child->uid) { if (pcmk_user_lookup(child->uid, &uid, NULL) < 0) { ais_err("Invalid uid (%s) specified for %s", child->uid, child->name); return FALSE; } } child->pid = fork(); AIS_ASSERT(child->pid != -1); if (child->pid > 0) { /* parent */ ais_info("Forked child %d for process %s%s", child->pid, child->name, use_valgrind ? " (valgrind enabled: " VALGRIND_BIN ")" : ""); } else { /* Setup the two alternate arg arrarys */ opts_vgrind[0] = ais_strdup(VALGRIND_BIN); if (use_callgrind) { opts_vgrind[1] = ais_strdup("--tool=callgrind"); opts_vgrind[2] = ais_strdup("--callgrind-out-file=" CRM_STATE_DIR "/callgrind.out.%p"); opts_vgrind[3] = ais_strdup(child->command); opts_vgrind[4] = NULL; } else { opts_vgrind[1] = ais_strdup(child->command); opts_vgrind[2] = NULL; opts_vgrind[3] = NULL; opts_vgrind[4] = NULL; } opts_default[0] = ais_strdup(child->command);; #if 0 /* Dont set the group for now - it prevents connection to the cluster */ if (gid && setgid(gid) < 0) { ais_perror("Could not set group to %d", gid); } #endif if (uid) { struct passwd *pwent = getpwuid(uid); if(pwent == NULL) { ais_perror("Cannot get password entry of uid: %d", uid); } else if (initgroups(pwent->pw_name, pwent->pw_gid) < 0) { ais_perror("Cannot initalize groups for %s (uid=%d)", pwent->pw_name, uid); } } if (uid && setuid(uid) < 0) { ais_perror("Could not set user to %d (%s)", uid, child->uid); } /* Close all open file descriptors */ getrlimit(RLIMIT_NOFILE, &oflimits); for (; lpc < oflimits.rlim_cur; lpc++) { close(lpc); } (void)open(devnull, O_RDONLY); /* Stdin: fd 0 */ (void)open(devnull, O_WRONLY); /* Stdout: fd 1 */ (void)open(devnull, O_WRONLY); /* Stderr: fd 2 */ /* *INDENT-OFF* */ setenv("HA_COMPRESSION", "bz2", 1); setenv("HA_cluster_type", "openais", 1); setenv("HA_debug", pcmk_env.debug, 1); setenv("HA_logfacility", pcmk_env.syslog, 1); setenv("HA_LOGFACILITY", pcmk_env.syslog, 1); setenv("HA_use_logd", pcmk_env.use_logd, 1); setenv("HA_quorum_type", pcmk_env.quorum, 1); /* *INDENT-ON* */ if (pcmk_env.logfile) { setenv("HA_debugfile", pcmk_env.logfile, 1); } if (use_valgrind) { (void)execvp(VALGRIND_BIN, opts_vgrind); } else { (void)execvp(child->command, opts_default); } ais_perror("FATAL: Cannot exec %s", child->command); exit(100); } return TRUE; } gboolean stop_child(crm_child_t * child, int signal) { if (signal == 0) { signal = SIGTERM; } if (child->command == NULL) { ais_info("Nothing to do for child \"%s\"", child->name); return TRUE; } ais_debug("Stopping CRM child \"%s\"", child->name); if (child->pid <= 0) { ais_trace("Client %s not running", child->name); return TRUE; } errno = 0; if (kill(child->pid, signal) == 0) { ais_notice("Sent -%d to %s: [%d]", signal, child->name, child->pid); } else { ais_perror("Sent -%d to %s: [%d]", signal, child->name, child->pid); } return TRUE; } void destroy_ais_node(gpointer data) { crm_node_t *node = data; ais_info("Destroying entry for node %u", node->id); ais_free(node->addr); ais_free(node->uname); ais_free(node->state); ais_free(node); } int update_member(unsigned int id, uint64_t born, uint64_t seq, int32_t votes, uint32_t procs, const char *uname, const char *state, const char *version) { int changed = 0; crm_node_t *node = NULL; node = g_hash_table_lookup(membership_list, GUINT_TO_POINTER(id)); if (node == NULL) { ais_malloc0(node, sizeof(crm_node_t)); ais_info("Creating entry for node %u born on " U64T "", id, seq); node->id = id; node->addr = NULL; node->state = ais_strdup("unknown"); g_hash_table_insert(membership_list, GUINT_TO_POINTER(id), node); node = g_hash_table_lookup(membership_list, GUINT_TO_POINTER(id)); } AIS_ASSERT(node != NULL); if (seq != 0) { node->last_seen = seq; } if (born != 0 && node->born != born) { changed = TRUE; node->born = born; ais_info("%p Node %u (%s) born on: " U64T, node, id, uname, born); } if (version != NULL) { ais_free(node->version); node->version = ais_strdup(version); } if (uname != NULL) { if (node->uname == NULL || ais_str_eq(node->uname, uname) == FALSE) { ais_info("%p Node %u now known as %s (was: %s)", node, id, uname, node->uname); ais_free(node->uname); node->uname = ais_strdup(uname); changed = TRUE; } } if (procs != 0 && procs != node->processes) { ais_info("Node %s now has process list: %.32x (%u)", node->uname, procs, procs); node->processes = procs; changed = TRUE; } if (votes >= 0 && votes != node->votes) { ais_info("Node %s now has %d quorum votes (was %d)", node->uname, votes, node->votes); node->votes = votes; changed = TRUE; } if (state != NULL) { if (node->state == NULL || ais_str_eq(node->state, state) == FALSE) { ais_free(node->state); node->state = ais_strdup(state); ais_info("Node %u/%s is now: %s", id, node->uname ? node->uname : "unknown", state); changed = TRUE; } } return changed; } void delete_member(uint32_t id, const char *uname) { if (uname == NULL) { g_hash_table_remove(membership_list, GUINT_TO_POINTER(id)); return; } ais_err("Deleting by uname is not yet supported"); } const char * member_uname(uint32_t id) { crm_node_t *node = g_hash_table_lookup(membership_list, GUINT_TO_POINTER(id)); if (node == NULL) { return ".unknown."; } if (node->uname == NULL) { return ".pending."; } return node->uname; } char * append_member(char *data, crm_node_t * node) { int size = 1; /* nul */ int offset = 0; static int fixed_len = 4 + 8 + 7 + 6 + 6 + 7 + 11; if (data) { size = strlen(data); } offset = size; size += fixed_len; size += 32; /* node->id */ size += 100; /* node->seq, node->born */ size += strlen(node->state); if (node->uname) { size += (7 + strlen(node->uname)); } if (node->addr) { size += (6 + strlen(node->addr)); } if (node->version) { size += (9 + strlen(node->version)); } data = realloc(data, size); offset += snprintf(data + offset, size - offset, "id); if (node->uname) { offset += snprintf(data + offset, size - offset, "uname=\"%s\" ", node->uname); } offset += snprintf(data + offset, size - offset, "state=\"%s\" ", node->state); offset += snprintf(data + offset, size - offset, "born=\"" U64T "\" ", node->born); offset += snprintf(data + offset, size - offset, "seen=\"" U64T "\" ", node->last_seen); offset += snprintf(data + offset, size - offset, "votes=\"%d\" ", node->votes); offset += snprintf(data + offset, size - offset, "processes=\"%u\" ", node->processes); if (node->addr) { offset += snprintf(data + offset, size - offset, "addr=\"%s\" ", node->addr); } if (node->version) { offset += snprintf(data + offset, size - offset, "version=\"%s\" ", node->version); } offset += snprintf(data + offset, size - offset, "/>"); return data; } void swap_sender(AIS_Message * msg) { int tmp = 0; char tmp_s[256]; tmp = msg->host.type; msg->host.type = msg->sender.type; msg->sender.type = tmp; tmp = msg->host.type; msg->host.size = msg->sender.type; msg->sender.type = tmp; memcpy(tmp_s, msg->host.uname, 256); memcpy(msg->host.uname, msg->sender.uname, 256); memcpy(msg->sender.uname, tmp_s, 256); } char * get_ais_data(const AIS_Message * msg) { int rc = BZ_OK; char *uncompressed = NULL; unsigned int new_size = msg->size + 1; if (msg->is_compressed == FALSE) { uncompressed = strdup(msg->data); } else { ais_malloc0(uncompressed, new_size); rc = BZ2_bzBuffToBuffDecompress(uncompressed, &new_size, (char *)msg->data, msg->compressed_size, 1, 0); if (rc != BZ_OK) { ais_info("rc=%d, new=%u expected=%u", rc, new_size, msg->size); } AIS_ASSERT(rc == BZ_OK); AIS_ASSERT(new_size == msg->size); } return uncompressed; } int send_cluster_msg(enum crm_ais_msg_types type, const char *host, const char *data) { int rc = 0; int data_len = 0; AIS_Message *ais_msg = NULL; int total_size = sizeof(AIS_Message); AIS_ASSERT(local_nodeid != 0); if (data != NULL) { data_len = 1 + strlen(data); total_size += data_len; } ais_malloc0(ais_msg, total_size); ais_msg->header.size = total_size; ais_msg->header.error = CS_OK; ais_msg->header.id = 0; ais_msg->size = data_len; ais_msg->sender.type = crm_msg_ais; if (data != NULL) { memcpy(ais_msg->data, data, data_len); } ais_msg->host.type = type; ais_msg->host.id = 0; if (host) { ais_msg->host.size = strlen(host); memset(ais_msg->host.uname, 0, MAX_NAME); memcpy(ais_msg->host.uname, host, ais_msg->host.size); /* ais_msg->host.id = nodeid_lookup(host); */ } else { ais_msg->host.type = type; ais_msg->host.size = 0; memset(ais_msg->host.uname, 0, MAX_NAME); } rc = send_cluster_msg_raw(ais_msg); ais_free(ais_msg); return rc; } extern struct corosync_api_v1 *pcmk_api; int send_client_ipc(void *conn, const AIS_Message * ais_msg) { int rc = -1; if (conn == NULL) { rc = -2; } else if (!libais_connection_active(conn)) { ais_warn("Connection no longer active"); rc = -3; /* } else if ((queue->size - 1) == queue->used) { */ /* ais_err("Connection is throttled: %d", queue->size); */ } else { #if SUPPORT_COROSYNC rc = pcmk_api->ipc_dispatch_send(conn, ais_msg, ais_msg->header.size); #endif } return rc; } int send_client_msg(void *conn, enum crm_ais_msg_class class, enum crm_ais_msg_types type, const char *data) { int rc = 0; int data_len = 0; int total_size = sizeof(AIS_Message); AIS_Message *ais_msg = NULL; static int msg_id = 0; AIS_ASSERT(local_nodeid != 0); msg_id++; AIS_ASSERT(msg_id != 0 /* wrap-around */ ); if (data != NULL) { data_len = 1 + strlen(data); } total_size += data_len; ais_malloc0(ais_msg, total_size); ais_msg->id = msg_id; ais_msg->header.id = class; ais_msg->header.size = total_size; ais_msg->header.error = CS_OK; ais_msg->size = data_len; if (data != NULL) { memcpy(ais_msg->data, data, data_len); } ais_msg->host.size = 0; ais_msg->host.type = type; memset(ais_msg->host.uname, 0, MAX_NAME); ais_msg->host.id = 0; ais_msg->sender.type = crm_msg_ais; ais_msg->sender.size = local_uname_len; memset(ais_msg->sender.uname, 0, MAX_NAME); memcpy(ais_msg->sender.uname, local_uname, ais_msg->sender.size); ais_msg->sender.id = local_nodeid; rc = send_client_ipc(conn, ais_msg); if (rc != 0) { ais_warn("Sending message to %s failed: %d", msg_type2text(type), rc); log_ais_message(LOG_DEBUG, ais_msg); } ais_free(ais_msg); return rc; } char * ais_concat(const char *prefix, const char *suffix, char join) { int len = 0; char *new_str = NULL; AIS_ASSERT(prefix != NULL); AIS_ASSERT(suffix != NULL); len = strlen(prefix) + strlen(suffix) + 2; ais_malloc0(new_str, (len)); sprintf(new_str, "%s%c%s", prefix, join, suffix); new_str[len - 1] = 0; return new_str; } hdb_handle_t config_find_init(struct corosync_api_v1 * config, char *name) { hdb_handle_t local_handle = 0; #if SUPPORT_COROSYNC config->object_find_create(OBJECT_PARENT_HANDLE, name, strlen(name), &local_handle); ais_info("Local handle: %lld for %s", (long long)local_handle, name); #endif return local_handle; } hdb_handle_t config_find_next(struct corosync_api_v1 * config, char *name, hdb_handle_t top_handle) { int rc = 0; hdb_handle_t local_handle = 0; #if SUPPORT_COROSYNC rc = config->object_find_next(top_handle, &local_handle); #endif if (rc < 0) { ais_info("No additional configuration supplied for: %s", name); local_handle = 0; } else { ais_info("Processing additional %s options...", name); } return local_handle; } void config_find_done(struct corosync_api_v1 *config, hdb_handle_t local_handle) { #if SUPPORT_COROSYNC config->object_find_destroy(local_handle); #endif } int get_config_opt(struct corosync_api_v1 *config, hdb_handle_t object_service_handle, char *key, char **value, const char *fallback) { char *env_key = NULL; *value = NULL; if (object_service_handle > 0) { config->object_key_get(object_service_handle, key, strlen(key), (void **)value, NULL); } if (*value) { ais_info("Found '%s' for option: %s", *value, key); return 0; } env_key = ais_concat("HA", key, '_'); *value = getenv(env_key); ais_free(env_key); if (*value) { ais_info("Found '%s' in ENV for option: %s", *value, key); return 0; } if (fallback) { ais_info("Defaulting to '%s' for option: %s", fallback, key); *value = ais_strdup(fallback); } else { ais_info("No default for option: %s", key); } return -1; } int ais_get_boolean(const char *value) { if (value == NULL) { return 0; } else if (strcasecmp(value, "true") == 0 || strcasecmp(value, "on") == 0 || strcasecmp(value, "yes") == 0 || strcasecmp(value, "y") == 0 || strcasecmp(value, "1") == 0) { return 1; } return 0; } long long ais_get_int(const char *text, char **end_text) { long long result = -1; char *local_end_text = NULL; errno = 0; if (text != NULL) { #ifdef ANSI_ONLY if (end_text != NULL) { result = strtol(text, end_text, 10); } else { result = strtol(text, &local_end_text, 10); } #else if (end_text != NULL) { result = strtoll(text, end_text, 10); } else { result = strtoll(text, &local_end_text, 10); } #endif if (errno == EINVAL) { ais_err("Conversion of %s failed", text); result = -1; } else if (errno == ERANGE) { ais_err("Conversion of %s was clipped: %lld", text, result); } else if (errno != 0) { ais_perror("Conversion of %s failed:", text); } if (local_end_text != NULL && local_end_text[0] != '\0') { ais_err("Characters left over after parsing '%s': '%s'", text, local_end_text); } } return result; } #define PW_BUFFER_LEN 500 int pcmk_user_lookup(const char *name, uid_t * uid, gid_t * gid) { int rc = -1; char *buffer = NULL; struct passwd pwd; struct passwd *pwentry = NULL; ais_malloc0(buffer, PW_BUFFER_LEN); getpwnam_r(name, &pwd, buffer, PW_BUFFER_LEN, &pwentry); if (pwentry) { rc = 0; if (uid) { *uid = pwentry->pw_uid; } if (gid) { *gid = pwentry->pw_gid; } ais_debug("Cluster user %s has uid=%d gid=%d", name, pwentry->pw_uid, pwentry->pw_gid); } else { ais_err("Cluster user %s does not exist", name); } ais_free(buffer); return rc; } diff --git a/lib/cluster/cluster.c b/lib/cluster/cluster.c index 5684a0b125..0df4d97c72 100644 --- a/lib/cluster/cluster.c +++ b/lib/cluster/cluster.c @@ -1,518 +1,517 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include -#include -#include "stack.h" +#include CRM_TRACE_INIT_DATA(cluster); #if SUPPORT_HEARTBEAT void *hb_library = NULL; #endif static GHashTable *crm_uuid_cache = NULL; static GHashTable *crm_uname_cache = NULL; static char * get_heartbeat_uuid(uint32_t unused, const char *uname) { char *uuid_calc = NULL; #if SUPPORT_HEARTBEAT cl_uuid_t uuid_raw; const char *unknown = "00000000-0000-0000-0000-000000000000"; if (heartbeat_cluster == NULL) { crm_warn("No connection to heartbeat, using uuid=uname"); return NULL; } if (heartbeat_cluster->llc_ops->get_uuid_by_name(heartbeat_cluster, uname, &uuid_raw) == HA_FAIL) { crm_err("get_uuid_by_name() call failed for host %s", uname); free(uuid_calc); return NULL; } uuid_calc = calloc(1, 50); cl_uuid_unparse(&uuid_raw, uuid_calc); if (safe_str_eq(uuid_calc, unknown)) { crm_warn("Could not calculate UUID for %s", uname); free(uuid_calc); return NULL; } #endif return uuid_calc; } static gboolean uname_is_uuid(void) { static const char *uuid_pref = NULL; if (uuid_pref == NULL) { uuid_pref = getenv("PCMK_uname_is_uuid"); } if (uuid_pref == NULL) { /* true is legacy mode */ uuid_pref = "false"; } return crm_is_true(uuid_pref); } int get_corosync_id(int id, const char *uuid) { if (id == 0 && !uname_is_uuid() && is_corosync_cluster()) { id = crm_atoi(uuid, "0"); } return id; } char * get_corosync_uuid(uint32_t id, const char *uname) { if (!uname_is_uuid() && is_corosync_cluster()) { if (id <= 0) { /* Try the membership cache... */ crm_node_t *node = g_hash_table_lookup(crm_peer_cache, uname); if (node != NULL) { id = node->id; } } if (id > 0) { return crm_itoa(id); } else { crm_warn("Node %s is not yet known by corosync", uname); } } else if (uname != NULL) { return strdup(uname); } return NULL; } const char * get_node_uuid(uint32_t id, const char *uname) { char *uuid = NULL; enum cluster_type_e type = get_cluster_type(); if (crm_uuid_cache == NULL) { crm_uuid_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); } /* avoid blocking heartbeat calls where possible */ if (uname) { uuid = g_hash_table_lookup(crm_uuid_cache, uname); } if (uuid != NULL) { return uuid; } switch (type) { case pcmk_cluster_corosync: uuid = get_corosync_uuid(id, uname); break; case pcmk_cluster_cman: case pcmk_cluster_classic_ais: if (uname) { uuid = strdup(uname); } break; case pcmk_cluster_heartbeat: uuid = get_heartbeat_uuid(id, uname); break; case pcmk_cluster_unknown: case pcmk_cluster_invalid: crm_err("Unsupported cluster type"); break; } if (uuid == NULL) { return NULL; } if (uname) { g_hash_table_insert(crm_uuid_cache, strdup(uname), uuid); return g_hash_table_lookup(crm_uuid_cache, uname); } /* Memory leak! */ CRM_LOG_ASSERT(uuid != NULL); return uuid; } gboolean crm_cluster_connect(char **our_uname, char **our_uuid, void *dispatch, void *destroy, #if SUPPORT_HEARTBEAT ll_cluster_t ** hb_conn #else void **hb_conn #endif ) { enum cluster_type_e type = get_cluster_type(); crm_notice("Connecting to cluster infrastructure: %s", name_for_cluster_type(type)); if (hb_conn != NULL) { *hb_conn = NULL; } #if SUPPORT_COROSYNC if (is_openais_cluster()) { crm_peer_init(); return init_ais_connection(dispatch, destroy, our_uuid, our_uname, NULL); } #endif #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { int rv; CRM_ASSERT(hb_conn != NULL); /* coverity[var_deref_op] False positive */ if (*hb_conn == NULL) { /* No object passed in, create a new one. */ ll_cluster_t *(*new_cluster) (const char *llctype) = find_library_function(&hb_library, HEARTBEAT_LIBRARY, "ll_cluster_new", 1); *hb_conn = (*new_cluster) ("heartbeat"); /* dlclose(handle); */ } else { /* Object passed in. Disconnect first, then reconnect below. */ ll_cluster_t *conn = *hb_conn; conn->llc_ops->signoff(conn, FALSE); } /* make sure we are disconnected first with the old object, if any. */ if (heartbeat_cluster && heartbeat_cluster != *hb_conn) { heartbeat_cluster->llc_ops->signoff(heartbeat_cluster, FALSE); } CRM_ASSERT(*hb_conn != NULL); heartbeat_cluster = *hb_conn; rv = register_heartbeat_conn(heartbeat_cluster, our_uuid, our_uname, dispatch, destroy); if (rv) { /* we'll benefit from a bigger queue length on heartbeat side. * Otherwise, if peers send messages faster than we can consume * them right now, heartbeat messaging layer will kick us out once * it's (small) default queue fills up :( * If we fail to adjust the sendq length, that's not yet fatal, though. */ if (HA_OK != (*hb_conn)->llc_ops->set_sendq_len(*hb_conn, 1024)) { crm_warn("Cannot set sendq length: %s", (*hb_conn)->llc_ops->errmsg(*hb_conn)); } } return rv; } #endif crm_info("Unsupported cluster stack: %s", getenv("HA_cluster_type")); return FALSE; } gboolean send_cluster_message(const char *node, enum crm_ais_msg_types service, xmlNode * data, gboolean ordered) { #if SUPPORT_COROSYNC if (is_openais_cluster()) { return send_ais_message(data, FALSE, node, service); } #endif #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { return send_ha_message(heartbeat_cluster, data, node, ordered); } #endif return FALSE; } void empty_uuid_cache(void) { if (crm_uuid_cache != NULL) { g_hash_table_destroy(crm_uuid_cache); crm_uuid_cache = NULL; } } void unget_uuid(const char *uname) { if (crm_uuid_cache == NULL) { return; } g_hash_table_remove(crm_uuid_cache, uname); } const char * get_uuid(const char *uname) { return get_node_uuid(0, uname); } const char * get_uname(const char *uuid) { const char *uname = NULL; if (crm_uname_cache == NULL) { crm_uname_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); } CRM_CHECK(uuid != NULL, return NULL); /* avoid blocking calls where possible */ uname = g_hash_table_lookup(crm_uname_cache, uuid); if (uname != NULL) { crm_trace("%s = %s (cached)", uuid, uname); return uname; } #if SUPPORT_COROSYNC if (is_openais_cluster()) { if (!uname_is_uuid() && is_corosync_cluster()) { uint32_t id = crm_int_helper(uuid, NULL); crm_node_t *node = g_hash_table_lookup(crm_peer_id_cache, GUINT_TO_POINTER(id)); uname = node ? node->uname : NULL; } else { uname = uuid; } if (uname) { crm_trace("Storing %s = %s", uuid, uname); g_hash_table_insert(crm_uname_cache, strdup(uuid), strdup(uname)); } } #endif #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { if (heartbeat_cluster != NULL && uuid != NULL) { cl_uuid_t uuid_raw; char *hb_uname = NULL; char *uuid_copy = strdup(uuid); cl_uuid_parse(uuid_copy, &uuid_raw); hb_uname = malloc( MAX_NAME); if (heartbeat_cluster->llc_ops->get_name_by_uuid(heartbeat_cluster, &uuid_raw, hb_uname, MAX_NAME) == HA_FAIL) { crm_err("Could not calculate uname for %s", uuid); free(uuid_copy); free(hb_uname); } else { crm_trace("Storing %s = %s", uuid, uname); g_hash_table_insert(crm_uname_cache, uuid_copy, hb_uname); } } } #endif return g_hash_table_lookup(crm_uname_cache, uuid); } void set_uuid(xmlNode * node, const char *attr, const char *uname) { const char *uuid_calc = get_uuid(uname); crm_xml_add(node, attr, uuid_calc); return; } const char * name_for_cluster_type(enum cluster_type_e type) { switch (type) { case pcmk_cluster_classic_ais: return "classic openais (with plugin)"; case pcmk_cluster_cman: return "cman"; case pcmk_cluster_corosync: return "corosync"; case pcmk_cluster_heartbeat: return "heartbeat"; case pcmk_cluster_unknown: return "unknown"; case pcmk_cluster_invalid: return "invalid"; } crm_err("Invalid cluster type: %d", type); return "invalid"; } /* Do not expose these two */ int set_cluster_type(enum cluster_type_e type); static enum cluster_type_e cluster_type = pcmk_cluster_unknown; int set_cluster_type(enum cluster_type_e type) { if (cluster_type == pcmk_cluster_unknown) { crm_info("Cluster type set to: %s", name_for_cluster_type(type)); cluster_type = type; return 0; } else if (cluster_type == type) { return 0; } else if (pcmk_cluster_unknown == type) { cluster_type = type; return 0; } crm_err("Cluster type already set to %s, ignoring %s", name_for_cluster_type(cluster_type), name_for_cluster_type(type)); return -1; } enum cluster_type_e get_cluster_type(void) { if (cluster_type == pcmk_cluster_unknown) { const char *cluster = getenv("HA_cluster_type"); cluster_type = pcmk_cluster_invalid; if (cluster) { crm_info("Cluster type is: '%s'", cluster); } else { #if SUPPORT_COROSYNC cluster_type = find_corosync_variant(); if (cluster_type == pcmk_cluster_unknown) { cluster = "heartbeat"; crm_info("Assuming a 'heartbeat' based cluster"); } else { cluster = name_for_cluster_type(cluster_type); crm_info("Detected an active '%s' cluster", cluster); } #else cluster = "heartbeat"; #endif } if (safe_str_eq(cluster, "heartbeat")) { #if SUPPORT_HEARTBEAT cluster_type = pcmk_cluster_heartbeat; #else cluster_type = pcmk_cluster_invalid; #endif } else if (safe_str_eq(cluster, "openais") || safe_str_eq(cluster, "classic openais (with plugin)")) { #if SUPPORT_COROSYNC cluster_type = pcmk_cluster_classic_ais; #else cluster_type = pcmk_cluster_invalid; #endif } else if (safe_str_eq(cluster, "corosync")) { #if SUPPORT_COROSYNC cluster_type = pcmk_cluster_corosync; #else cluster_type = pcmk_cluster_invalid; #endif } else if (safe_str_eq(cluster, "cman")) { #if SUPPORT_CMAN cluster_type = pcmk_cluster_cman; #else cluster_type = pcmk_cluster_invalid; #endif } else { cluster_type = pcmk_cluster_invalid; } if (cluster_type == pcmk_cluster_invalid) { crm_notice ("This installation of Pacemaker does not support the '%s' cluster infrastructure. Terminating.", cluster); exit(100); } } return cluster_type; } gboolean is_cman_cluster(void) { return get_cluster_type() == pcmk_cluster_cman; } gboolean is_corosync_cluster(void) { return get_cluster_type() == pcmk_cluster_corosync; } gboolean is_classic_ais_cluster(void) { return get_cluster_type() == pcmk_cluster_classic_ais; } gboolean is_openais_cluster(void) { enum cluster_type_e type = get_cluster_type(); if (type == pcmk_cluster_classic_ais) { return TRUE; } else if (type == pcmk_cluster_corosync) { return TRUE; } else if (type == pcmk_cluster_cman) { return TRUE; } return FALSE; } gboolean is_heartbeat_cluster(void) { return get_cluster_type() == pcmk_cluster_heartbeat; } diff --git a/lib/cluster/corosync.c b/lib/cluster/corosync.c index bc7c877c1c..0a04f8dd04 100644 --- a/lib/cluster/corosync.c +++ b/lib/cluster/corosync.c @@ -1,850 +1,848 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include -#include #include -#include +#include #include #include -#include "stack.h" #include #include #include #include #include #include #include #include cpg_handle_t pcmk_cpg_handle = 0; struct cpg_name pcmk_cpg_group = { .length = 0, .value[0] = 0, }; quorum_handle_t pcmk_quorum_handle = 0; gboolean(*quorum_app_callback) (unsigned long long seq, gboolean quorate) = NULL; static char *pcmk_uname = NULL; static int pcmk_uname_len = 0; static uint32_t pcmk_nodeid = 0; #define cs_repeat(counter, max, code) do { \ code; \ if(rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { \ counter++; \ crm_debug("Retrying operation after %ds", counter); \ sleep(counter); \ } else { \ break; \ } \ } while(counter < max) enum crm_ais_msg_types text2msg_type(const char *text) { int type = crm_msg_none; CRM_CHECK(text != NULL, return type); if (safe_str_eq(text, "ais")) { type = crm_msg_ais; } else if (safe_str_eq(text, "crm_plugin")) { type = crm_msg_ais; } else if (safe_str_eq(text, CRM_SYSTEM_CIB)) { type = crm_msg_cib; } else if (safe_str_eq(text, CRM_SYSTEM_CRMD)) { type = crm_msg_crmd; } else if (safe_str_eq(text, CRM_SYSTEM_DC)) { type = crm_msg_crmd; } else if (safe_str_eq(text, CRM_SYSTEM_TENGINE)) { type = crm_msg_te; } else if (safe_str_eq(text, CRM_SYSTEM_PENGINE)) { type = crm_msg_pe; } else if (safe_str_eq(text, CRM_SYSTEM_LRMD)) { type = crm_msg_lrmd; } else if (safe_str_eq(text, CRM_SYSTEM_STONITHD)) { type = crm_msg_stonithd; } else if (safe_str_eq(text, "stonith-ng")) { type = crm_msg_stonith_ng; } else if (safe_str_eq(text, "attrd")) { type = crm_msg_attrd; } else { /* This will normally be a transient client rather than * a cluster daemon. Set the type to the pid of the client */ int scan_rc = sscanf(text, "%d", &type); if (scan_rc != 1) { /* Ensure its sane */ type = crm_msg_none; } } return type; } static char *ais_cluster_name = NULL; gboolean crm_get_cluster_name(char **cname) { CRM_CHECK(cname != NULL, return FALSE); if (ais_cluster_name) { *cname = strdup(ais_cluster_name); return TRUE; } return FALSE; } gboolean send_ais_text(int class, const char *data, gboolean local, const char *node, enum crm_ais_msg_types dest) { static int msg_id = 0; static int local_pid = 0; int retries = 0; int rc = CS_OK; int buf_len = sizeof(cs_ipc_header_response_t); char *buf = NULL; struct iovec iov; const char *transport = "pcmk"; AIS_Message *ais_msg = NULL; enum crm_ais_msg_types sender = text2msg_type(crm_system_name); /* There are only 6 handlers registered to crm_lib_service in plugin.c */ CRM_CHECK(class < 6, crm_err("Invalid message class: %d", class); return FALSE); if (data == NULL) { data = ""; } if (local_pid == 0) { local_pid = getpid(); } if (sender == crm_msg_none) { sender = local_pid; } ais_msg = calloc(1, sizeof(AIS_Message)); ais_msg->id = msg_id++; ais_msg->header.id = class; ais_msg->header.error = CS_OK; ais_msg->host.type = dest; ais_msg->host.local = local; if (node) { ais_msg->host.size = strlen(node); memset(ais_msg->host.uname, 0, MAX_NAME); memcpy(ais_msg->host.uname, node, ais_msg->host.size); ais_msg->host.id = 0; } else { ais_msg->host.size = 0; memset(ais_msg->host.uname, 0, MAX_NAME); ais_msg->host.id = 0; } ais_msg->sender.id = 0; ais_msg->sender.type = sender; ais_msg->sender.pid = local_pid; ais_msg->sender.size = pcmk_uname_len; memset(ais_msg->sender.uname, 0, MAX_NAME); memcpy(ais_msg->sender.uname, pcmk_uname, ais_msg->sender.size); ais_msg->size = 1 + strlen(data); if (ais_msg->size < CRM_BZ2_THRESHOLD) { failback: ais_msg = realloc(ais_msg, sizeof(AIS_Message) + ais_msg->size); memcpy(ais_msg->data, data, ais_msg->size); } else { char *compressed = NULL; char *uncompressed = strdup(data); unsigned int len = (ais_msg->size * 1.1) + 600; /* recomended size */ crm_trace("Compressing message payload"); compressed = malloc( len); rc = BZ2_bzBuffToBuffCompress(compressed, &len, uncompressed, ais_msg->size, CRM_BZ2_BLOCKS, 0, CRM_BZ2_WORK); free(uncompressed); if (rc != BZ_OK) { crm_err("Compression failed: %d", rc); free(compressed); goto failback; } ais_msg = realloc(ais_msg, sizeof(AIS_Message) + len + 1); memcpy(ais_msg->data, compressed, len); ais_msg->data[len] = 0; free(compressed); ais_msg->is_compressed = TRUE; ais_msg->compressed_size = len; crm_trace("Compression details: %d -> %d", ais_msg->size, ais_data_len(ais_msg)); } ais_msg->header.size = sizeof(AIS_Message) + ais_data_len(ais_msg); crm_trace("Sending%s message %d to %s.%s (data=%d, total=%d)", ais_msg->is_compressed ? " compressed" : "", ais_msg->id, ais_dest(&(ais_msg->host)), msg_type2text(dest), ais_data_len(ais_msg), ais_msg->header.size); iov.iov_base = ais_msg; iov.iov_len = ais_msg->header.size; buf = realloc(buf, buf_len); do { if (rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { retries++; crm_info("Peer overloaded or membership in flux:" " Re-sending message (Attempt %d of 20)", retries); sleep(retries); /* Proportional back off */ } errno = 0; transport = "cpg"; CRM_CHECK(dest != crm_msg_ais, rc = CS_ERR_MESSAGE_ERROR; goto bail); rc = cpg_mcast_joined(pcmk_cpg_handle, CPG_TYPE_AGREED, &iov, 1); if (rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { cpg_flow_control_state_t fc_state = CPG_FLOW_CONTROL_DISABLED; int rc2 = cpg_flow_control_state_get(pcmk_cpg_handle, &fc_state); if (rc2 == CS_OK && fc_state == CPG_FLOW_CONTROL_ENABLED) { crm_warn("Connection overloaded, cannot send messages"); goto bail; } else if (rc2 != CS_OK) { crm_warn("Could not determin the connection state: %s (%d)", ais_error2text(rc2), rc2); goto bail; } } } while ((rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) && retries < 20); bail: if (rc != CS_OK) { crm_perror(LOG_ERR, "Sending message %d via %s: FAILED (rc=%d): %s", ais_msg->id, transport, rc, ais_error2text(rc)); } else { crm_trace("Message %d: sent", ais_msg->id); } free(buf); free(ais_msg); return (rc == CS_OK); } gboolean send_ais_message(xmlNode * msg, gboolean local, const char *node, enum crm_ais_msg_types dest) { gboolean rc = TRUE; char *data = dump_xml_unformatted(msg); rc = send_ais_text(crm_class_cluster, data, local, node, dest); free(data); return rc; } void terminate_ais_connection(void) { crm_notice("Disconnecting from Corosync"); if(pcmk_cpg_handle) { crm_trace("Disconnecting CPG"); cpg_leave(pcmk_cpg_handle, &pcmk_cpg_group); cpg_finalize(pcmk_cpg_handle); pcmk_cpg_handle = 0; } else { crm_info("No CPG connection"); } if(pcmk_quorum_handle) { crm_trace("Disconnecting quorum"); quorum_finalize(pcmk_quorum_handle); pcmk_quorum_handle = 0; } else { crm_info("No Quorum connection"); } } int ais_membership_timer = 0; gboolean ais_membership_force = FALSE; static gboolean ais_dispatch_message(AIS_Message * msg, gboolean(*dispatch) (AIS_Message *, char *, int)) { char *data = NULL; char *uncompressed = NULL; xmlNode *xml = NULL; CRM_ASSERT(msg != NULL); crm_trace("Got new%s message (size=%d, %d, %d)", msg->is_compressed ? " compressed" : "", ais_data_len(msg), msg->size, msg->compressed_size); data = msg->data; if (msg->is_compressed && msg->size > 0) { int rc = BZ_OK; unsigned int new_size = msg->size + 1; if (check_message_sanity(msg, NULL) == FALSE) { goto badmsg; } crm_trace("Decompressing message data"); uncompressed = calloc(1, new_size); rc = BZ2_bzBuffToBuffDecompress(uncompressed, &new_size, data, msg->compressed_size, 1, 0); if (rc != BZ_OK) { crm_err("Decompression failed: %d", rc); goto badmsg; } CRM_ASSERT(rc == BZ_OK); CRM_ASSERT(new_size == msg->size); data = uncompressed; } else if (check_message_sanity(msg, data) == FALSE) { goto badmsg; } else if (safe_str_eq("identify", data)) { int pid = getpid(); char *pid_s = crm_itoa(pid); send_ais_text(crm_class_cluster, pid_s, TRUE, NULL, crm_msg_ais); free(pid_s); goto done; } if (msg->header.id != crm_class_members) { crm_update_peer(__FUNCTION__, msg->sender.id, 0, 0, 0, 0, msg->sender.uname, msg->sender.uname, NULL, NULL); } if (msg->header.id == crm_class_rmpeer) { uint32_t id = crm_int_helper(data, NULL); crm_info("Removing peer %s/%u", data, id); reap_crm_member(id); goto done; } crm_trace("Payload: %s", data); if (dispatch != NULL) { dispatch(msg, data, 0); } done: free(uncompressed); free_xml(xml); return TRUE; badmsg: crm_err("Invalid message (id=%d, dest=%s:%s, from=%s:%s.%d):" " min=%d, total=%d, size=%d, bz2_size=%d", msg->id, ais_dest(&(msg->host)), msg_type2text(msg->host.type), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, (int)sizeof(AIS_Message), msg->header.size, msg->size, msg->compressed_size); goto done; } gboolean(*pcmk_cpg_dispatch_fn) (AIS_Message *, char *, int) = NULL; static int pcmk_cpg_dispatch(gpointer user_data) { int rc = 0; pcmk_cpg_dispatch_fn = user_data; rc = cpg_dispatch(pcmk_cpg_handle, CS_DISPATCH_ALL); if (rc != CS_OK) { crm_err("Connection to the CPG API failed: %d", rc); return -1; } return 0; } static void pcmk_cpg_deliver(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { AIS_Message *ais_msg = (AIS_Message *) msg; if (ais_msg->sender.id > 0 && ais_msg->sender.id != nodeid) { crm_err("Nodeid mismatch from %d.%d: claimed nodeid=%u", nodeid, pid, ais_msg->sender.id); return; } else if (ais_msg->host.size != 0 && safe_str_neq(ais_msg->host.uname, pcmk_uname)) { /* Not for us */ return; } ais_msg->sender.id = nodeid; if (ais_msg->sender.size == 0) { crm_node_t *peer = crm_get_peer(nodeid, NULL); if (peer == NULL) { crm_err("Peer with nodeid=%u is unknown", nodeid); } else if (peer->uname == NULL) { crm_err("No uname for peer with nodeid=%u", nodeid); } else { crm_notice("Fixing uname for peer with nodeid=%u", nodeid); ais_msg->sender.size = strlen(peer->uname); memset(ais_msg->sender.uname, 0, MAX_NAME); memcpy(ais_msg->sender.uname, peer->uname, ais_msg->sender.size); } } ais_dispatch_message(ais_msg, pcmk_cpg_dispatch_fn); } static void pcmk_cpg_membership(cpg_handle_t handle, const struct cpg_name *groupName, const struct cpg_address *member_list, size_t member_list_entries, const struct cpg_address *left_list, size_t left_list_entries, const struct cpg_address *joined_list, size_t joined_list_entries) { int i; gboolean found = FALSE; static int counter = 0; for (i = 0; i < left_list_entries; i++) { crm_node_t *peer = crm_get_peer(left_list[i].nodeid, NULL); crm_info("Left[%d.%d] %s.%d ", counter, i, groupName->value, left_list[i].nodeid); crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, OFFLINESTATUS); } for (i = 0; i < joined_list_entries; i++) { crm_info("Joined[%d.%d] %s.%d ", counter, i, groupName->value, joined_list[i].nodeid); } for (i = 0; i < member_list_entries; i++) { crm_node_t *peer = crm_get_peer(member_list[i].nodeid, NULL); crm_info("Member[%d.%d] %s.%d ", counter, i, groupName->value, member_list[i].nodeid); crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, ONLINESTATUS); if(pcmk_nodeid == member_list[i].nodeid) { found = TRUE; } } if(!found) { crm_err("We're not part of CPG group %s anymore!", groupName->value); /* Possibly re-call cpg_join() */ } counter++; } cpg_callbacks_t cpg_callbacks = { .cpg_deliver_fn = pcmk_cpg_deliver, .cpg_confchg_fn = pcmk_cpg_membership, }; static gboolean init_cpg_connection(gboolean(*dispatch) (AIS_Message *, char *, int), void (*destroy) (gpointer), uint32_t * nodeid) { int rc = -1; int fd = 0; int retries = 0; crm_node_t *peer = NULL; struct mainloop_fd_callbacks cpg_fd_callbacks = { .dispatch = pcmk_cpg_dispatch, .destroy = destroy, }; strcpy(pcmk_cpg_group.value, crm_system_name); pcmk_cpg_group.length = strlen(crm_system_name) + 1; cs_repeat(retries, 30, rc = cpg_initialize(&pcmk_cpg_handle, &cpg_callbacks)); if (rc != CS_OK) { crm_err("Could not connect to the Cluster Process Group API: %d\n", rc); goto bail; } retries = 0; cs_repeat(retries, 30, rc = cpg_local_get(pcmk_cpg_handle, (unsigned int *)nodeid)); if (rc != CS_OK) { crm_err("Could not get local node id from the CPG API"); goto bail; } retries = 0; cs_repeat(retries, 30, rc = cpg_join(pcmk_cpg_handle, &pcmk_cpg_group)); if (rc != CS_OK) { crm_err("Could not join the CPG group '%s': %d", crm_system_name, rc); goto bail; } rc = cpg_fd_get(pcmk_cpg_handle, &fd); if (rc != CS_OK) { crm_err("Could not obtain the CPG API connection: %d\n", rc); goto bail; } mainloop_add_fd("corosync-cpg", fd, dispatch, &cpg_fd_callbacks); bail: if (rc != CS_OK) { cpg_finalize(pcmk_cpg_handle); return FALSE; } peer = crm_get_peer(pcmk_nodeid, pcmk_uname); crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, ONLINESTATUS); return TRUE; } static int pcmk_quorum_dispatch(gpointer user_data) { int rc = 0; rc = quorum_dispatch(pcmk_quorum_handle, CS_DISPATCH_ALL); if (rc < 0) { crm_err("Connection to the Quorum API failed: %d", rc); return -1; } return 0; } static void corosync_mark_unseen_peer_dead(gpointer key, gpointer value, gpointer user_data) { int *seq = user_data; crm_node_t *node = value; if (node->last_seen != *seq && crm_str_eq(CRM_NODE_LOST, node->state, TRUE) == FALSE) { crm_notice("Node %d/%s was not seen in the previous transition", node->id, node->uname); crm_update_peer(__FUNCTION__, node->id, 0, 0, 0, 0, NULL, NULL, NULL, CRM_NODE_LOST); } } static void corosync_mark_node_unseen(gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; node->last_seen = 0; } static void pcmk_quorum_notification(quorum_handle_t handle, uint32_t quorate, uint64_t ring_id, uint32_t view_list_entries, uint32_t * view_list) { int i; static gboolean init_phase = TRUE; if (quorate != crm_have_quorum) { crm_notice("Membership " U64T ": quorum %s (%lu)", ring_id, quorate ? "acquired" : "lost", (long unsigned int)view_list_entries); crm_have_quorum = quorate; } else { crm_info("Membership " U64T ": quorum %s (%lu)", ring_id, quorate ? "retained" : "still lost", (long unsigned int)view_list_entries); } if(view_list_entries == 0 && init_phase) { crm_info("Corosync membership is still forming, ignoring"); return; } init_phase = FALSE; g_hash_table_foreach(crm_peer_cache, corosync_mark_node_unseen, NULL); for (i = 0; i < view_list_entries; i++) { char *uuid = get_corosync_uuid(view_list[i], NULL); crm_debug("Member[%d] %d ", i, view_list[i]); crm_update_peer(__FUNCTION__, view_list[i], 0, ring_id, 0, 0, uuid, NULL, NULL, CRM_NODE_MEMBER); } crm_trace("Reaping unseen nodes..."); g_hash_table_foreach(crm_peer_cache, corosync_mark_unseen_peer_dead, &ring_id); if (quorum_app_callback) { quorum_app_callback(ring_id, quorate); } } quorum_callbacks_t quorum_callbacks = { .quorum_notify_fn = pcmk_quorum_notification, }; gboolean init_quorum_connection(gboolean(*dispatch) (unsigned long long, gboolean), void (*destroy) (gpointer)) { int rc = -1; int fd = 0; int quorate = 0; uint32_t quorum_type = 0; struct mainloop_fd_callbacks quorum_fd_callbacks; quorum_fd_callbacks.dispatch = pcmk_quorum_dispatch; quorum_fd_callbacks.destroy = destroy; crm_debug("Configuring Pacemaker to obtain quorum from Corosync"); rc = quorum_initialize(&pcmk_quorum_handle, &quorum_callbacks, &quorum_type); if (rc != CS_OK) { crm_err("Could not connect to the Quorum API: %d\n", rc); goto bail; } else if (quorum_type != QUORUM_SET) { crm_err("Corosync quorum is not configured\n"); goto bail; } rc = quorum_getquorate(pcmk_quorum_handle, &quorate); if (rc != CS_OK) { crm_err("Could not obtain the current Quorum API state: %d\n", rc); goto bail; } crm_notice("Quorum %s", quorate ? "acquired" : "lost"); quorum_app_callback = dispatch; crm_have_quorum = quorate; rc = quorum_trackstart(pcmk_quorum_handle, CS_TRACK_CHANGES | CS_TRACK_CURRENT); if (rc != CS_OK) { crm_err("Could not setup Quorum API notifications: %d\n", rc); goto bail; } rc = quorum_fd_get(pcmk_quorum_handle, &fd); if (rc != CS_OK) { crm_err("Could not obtain the Quorum API connection: %d\n", rc); goto bail; } mainloop_add_fd("quorum", fd, dispatch, &quorum_fd_callbacks); bail: if (rc != CS_OK) { quorum_finalize(pcmk_quorum_handle); return FALSE; } return TRUE; } gboolean init_ais_connection(gboolean(*dispatch) (AIS_Message *, char *, int), void (*destroy) (gpointer), char **our_uuid, char **our_uname, int *nodeid) { int retries = 0; while (retries++ < 30) { int rc = init_ais_connection_once(dispatch, destroy, our_uuid, our_uname, nodeid); switch (rc) { case CS_OK: return TRUE; break; case CS_ERR_TRY_AGAIN: case CS_ERR_QUEUE_FULL: break; default: return FALSE; } } crm_err("Retry count exceeded: %d", retries); return FALSE; } gboolean init_ais_connection_once(gboolean(*dispatch) (AIS_Message *, char *, int), void (*destroy) (gpointer), char **our_uuid, char **our_uname, int *nodeid) { struct utsname res; enum cluster_type_e stack = get_cluster_type(); crm_peer_init(); /* Here we just initialize comms */ if(stack != pcmk_cluster_corosync) { crm_err("Invalid cluster type: %s (%d)", name_for_cluster_type(stack), stack); return FALSE; } if (init_cpg_connection(dispatch, destroy, &pcmk_nodeid) == FALSE) { return FALSE; } else if (uname(&res) < 0) { crm_perror(LOG_ERR, "Could not determin the current host"); exit(100); } else { pcmk_uname = strdup(res.nodename); } crm_info("Connection to '%s': established", name_for_cluster_type(stack)); CRM_ASSERT(pcmk_uname != NULL); pcmk_uname_len = strlen(pcmk_uname); if (pcmk_nodeid != 0) { /* Ensure the local node always exists */ crm_update_peer(__FUNCTION__, pcmk_nodeid, 0, 0, 0, 0, pcmk_uname, pcmk_uname, NULL, NULL); } if (our_uuid != NULL) { *our_uuid = get_corosync_uuid(pcmk_nodeid, pcmk_uname); } if (our_uname != NULL) { *our_uname = strdup(pcmk_uname); } if (nodeid != NULL) { *nodeid = pcmk_nodeid; } return TRUE; } gboolean check_message_sanity(const AIS_Message * msg, const char *data) { gboolean sane = TRUE; int dest = msg->host.type; int tmp_size = msg->header.size - sizeof(AIS_Message); if (sane && msg->header.size == 0) { crm_warn("Message with no size"); sane = FALSE; } if (sane && msg->header.error != CS_OK) { crm_warn("Message header contains an error: %d", msg->header.error); sane = FALSE; } if (sane && ais_data_len(msg) != tmp_size) { crm_warn("Message payload size is incorrect: expected %d, got %d", ais_data_len(msg), tmp_size); sane = TRUE; } if (sane && ais_data_len(msg) == 0) { crm_warn("Message with no payload"); sane = FALSE; } if (sane && data && msg->is_compressed == FALSE) { int str_size = strlen(data) + 1; if (ais_data_len(msg) != str_size) { int lpc = 0; crm_warn("Message payload is corrupted: expected %d bytes, got %d", ais_data_len(msg), str_size); sane = FALSE; for (lpc = (str_size - 10); lpc < msg->size; lpc++) { if (lpc < 0) { lpc = 0; } crm_debug("bad_data[%d]: %d / '%c'", lpc, data[lpc], data[lpc]); } } } if (sane == FALSE) { crm_err("Invalid message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } else { crm_trace ("Verfied message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } return sane; } enum cluster_type_e find_corosync_variant(void) { int rc = CS_OK; cmap_handle_t handle; /* There can be only one (possibility if confdb isn't around) */ rc = cmap_initialize(&handle); if (rc != CS_OK) { crm_info("Failed to initialize the cmap API. Error %d", rc); return pcmk_cluster_unknown; } cmap_finalize(handle); return pcmk_cluster_corosync; } gboolean crm_is_corosync_peer_active(const crm_node_t * node) { if (node == NULL) { crm_trace("NULL"); return FALSE; } else if(safe_str_neq(node->state, CRM_NODE_MEMBER)) { crm_trace("%s: state=%s", node->uname, node->state); return FALSE; } else if((node->processes & crm_proc_cpg) == 0) { crm_trace("%s: processes=%.16x", node->uname, node->processes); return FALSE; } return TRUE; } diff --git a/lib/cluster/heartbeat.c b/lib/cluster/heartbeat.c index 38879dec20..c39c083ac7 100644 --- a/lib/cluster/heartbeat.c +++ b/lib/cluster/heartbeat.c @@ -1,565 +1,564 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include -#include -#include "stack.h" +#include #if HAVE_BZLIB_H # include #endif #if SUPPORT_HEARTBEAT ll_cluster_t *heartbeat_cluster = NULL; static void convert_ha_field(xmlNode *parent, void *msg_v, int lpc) { int type = 0; const char *name = NULL; const char *value = NULL; xmlNode *xml = NULL; HA_Message *msg = msg_v; int rc = BZ_OK; size_t orig_len = 0; unsigned int used = 0; char *uncompressed = NULL; char *compressed = NULL; int size = orig_len * 10; CRM_CHECK(parent != NULL, return); CRM_CHECK(msg != NULL, return); name = msg->names[lpc]; type = cl_get_type(msg, name); switch(type) { case FT_STRUCT: convert_ha_message(parent, msg->values[lpc], name); break; case FT_COMPRESS: case FT_UNCOMPRESS: convert_ha_message(parent, cl_get_struct(msg, name), name); break; case FT_STRING: value = msg->values[lpc]; CRM_CHECK(value != NULL, return); crm_trace("Converting %s/%d/%s", name, type, value[0] == '<' ? "xml":"field"); if( value[0] != '<' ) { crm_xml_add(parent, name, value); break; } /* unpack xml string */ xml = string2xml(value); if(xml == NULL) { crm_err("Conversion of field '%s' failed", name); return; } add_node_nocopy(parent, NULL, xml); break; case FT_BINARY: value = cl_get_binary(msg, name, &orig_len); size = orig_len * 10 + 1; /* +1 because an exact 10x compression factor happens occasionally */ if(orig_len < 3 || value[0] != 'B' || value[1] != 'Z' || value[2] != 'h') { if(strstr(name, "uuid") == NULL) { crm_err("Skipping non-bzip binary field: %s", name); } return; } compressed = calloc(1, orig_len); memcpy(compressed, value, orig_len); crm_trace("Trying to decompress %d bytes", (int)orig_len); retry: uncompressed = realloc(uncompressed, size); memset(uncompressed, 0, size); used = size - 1; /* always leave room for a trailing '\0' * BZ2_bzBuffToBuffDecompress wont say anything if * the uncompressed data is exactly 'size' bytes */ rc = BZ2_bzBuffToBuffDecompress( uncompressed, &used, compressed, orig_len, 1, 0); if(rc == BZ_OUTBUFF_FULL) { size = size * 2; /* dont try to allocate more memory than we have */ if(size > 0) { goto retry; } } if(rc != BZ_OK) { crm_err("Decompression of %s (%d bytes) into %d failed: %d", name, (int)orig_len, size, rc); } else if(used >= size) { CRM_ASSERT(used < size); } else { CRM_LOG_ASSERT(uncompressed[used] == 0); uncompressed[used] = 0; xml = string2xml(uncompressed); } if(xml != NULL) { add_node_copy(parent, xml); free_xml(xml); } free(uncompressed); free(compressed); break; } } xmlNode * convert_ha_message(xmlNode *parent, HA_Message *msg, const char *field) { int lpc = 0; xmlNode *child = NULL; const char *tag = NULL; CRM_CHECK(msg != NULL, crm_err("Empty message for %s", field); return parent); tag = cl_get_string(msg, F_XML_TAGNAME); if(tag == NULL) { tag = field; } else if(parent && safe_str_neq(field, tag)) { /* For compatability with 0.6.x */ crm_debug("Creating intermediate parent %s between %s and %s", field, crm_element_name(parent), tag); parent = create_xml_node(parent, field); } if(parent == NULL) { parent = create_xml_node(NULL, tag); child = parent; } else { child = create_xml_node(parent, tag); } for (lpc = 0; lpc < msg->nfields; lpc++) { convert_ha_field(child, msg, lpc); } return parent; } static void add_ha_nocopy(HA_Message *parent, HA_Message *child, const char *field) { int next = parent->nfields; if (parent->nfields >= parent->nalloc && ha_msg_expand(parent) != HA_OK ) { crm_err("Parent expansion failed"); return; } parent->names[next] = strdup(field); parent->nlens[next] = strlen(field); parent->values[next] = child; parent->vlens[next] = sizeof(HA_Message); parent->types[next] = FT_UNCOMPRESS; parent->nfields++; } static HA_Message* convert_xml_message_struct(HA_Message *parent, xmlNode *src_node, const char *field) { xmlNode *child = NULL; xmlNode *__crm_xml_iter = src_node->children; xmlAttrPtr prop_iter = src_node->properties; const char *name = NULL; const char *value = NULL; HA_Message *result = ha_msg_new(3); ha_msg_add(result, F_XML_TAGNAME, (const char *)src_node->name); while(prop_iter != NULL) { name = (const char *)prop_iter->name; value = (const char *)xmlGetProp(src_node, prop_iter->name); prop_iter = prop_iter->next; ha_msg_add(result, name, value); } while(__crm_xml_iter != NULL) { child = __crm_xml_iter; __crm_xml_iter = __crm_xml_iter->next; convert_xml_message_struct(result, child, NULL); } if(parent == NULL) { return result; } if(field) { HA_Message *holder = ha_msg_new(3); CRM_ASSERT(holder != NULL); ha_msg_add(holder, F_XML_TAGNAME, field); add_ha_nocopy(holder, result, (const char*)src_node->name); ha_msg_addstruct_compress(parent, field, holder); ha_msg_del(holder); } else { add_ha_nocopy(parent, result, (const char*)src_node->name); } return result; } static void convert_xml_child(HA_Message *msg, xmlNode *xml) { int orig = 0; int rc = BZ_OK; unsigned int len = 0; char *buffer = NULL; char *compressed = NULL; const char *name = NULL; name = (const char *)xml->name; buffer = dump_xml_unformatted(xml); orig = strlen(buffer); if(orig < CRM_BZ2_THRESHOLD) { ha_msg_add(msg, name, buffer); goto done; } len = (orig * 1.1) + 600; /* recomended size */ compressed = malloc( len); rc = BZ2_bzBuffToBuffCompress(compressed, &len, buffer, orig, CRM_BZ2_BLOCKS, 0, CRM_BZ2_WORK); if(rc != BZ_OK) { crm_err("Compression failed: %d", rc); free(compressed); convert_xml_message_struct(msg, xml, name); goto done; } free(buffer); buffer = compressed; crm_trace("Compression details: %d -> %d", orig, len); ha_msg_addbin(msg, name, buffer, len); done: free(buffer); # if 0 { unsigned int used = orig; char *uncompressed = NULL; crm_debug("Trying to decompress %d bytes", len); uncompressed = calloc(1, orig); rc = BZ2_bzBuffToBuffDecompress( uncompressed, &used, compressed, len, 1, 0); CRM_CHECK(rc == BZ_OK, ;); CRM_CHECK(used == orig, ;); crm_debug("rc=%d, used=%d", rc, used); if(rc != BZ_OK) { exit(100); } crm_debug("Original %s, decompressed %s", buffer, uncompressed); free(uncompressed); } # endif } static HA_Message* convert_xml_message(xmlNode *xml) { xmlNode *child = NULL; xmlAttrPtr pIter = NULL; HA_Message *result = NULL; result = ha_msg_new(3); ha_msg_add(result, F_XML_TAGNAME, (const char *)xml->name); for(pIter = xml->properties; pIter != NULL; pIter = pIter->next) { const char *p_name = (const char *)pIter->name; if(pIter->children) { const char *p_value = (const char *)pIter->children->content; ha_msg_add(result, p_name, p_value); } } for(child = __xml_first_child(xml); child != NULL; child = __xml_next(child)) { convert_xml_child(result, child); } return result; } gboolean crm_is_heartbeat_peer_active(const crm_node_t * node) { enum crm_proc_flag proc = text2proc(crm_system_name); if (node == NULL) { crm_trace("NULL"); return FALSE; } else if(safe_str_neq(node->state, CRM_NODE_MEMBER)) { crm_trace("%s: state=%s", node->uname, node->state); return FALSE; } else if((node->processes & crm_proc_heartbeat) == 0) { crm_trace("%s: processes=%.16x", node->uname, node->processes); return FALSE; } else if(proc == crm_proc_none) { return TRUE; } else if((node->processes & proc) == 0) { crm_trace("%s: proc %.16x not in %.16x", node->uname, proc, node->processes); return FALSE; } return TRUE; } crm_node_t * crm_update_ccm_node(const oc_ev_membership_t * oc, int offset, const char *state, uint64_t seq) { crm_node_t *peer = NULL; const char *uuid = NULL; CRM_CHECK(oc->m_array[offset].node_uname != NULL, return NULL); uuid = get_uuid(oc->m_array[offset].node_uname); peer = crm_update_peer(__FUNCTION__, oc->m_array[offset].node_id, oc->m_array[offset].node_born_on, seq, -1, 0, uuid, oc->m_array[offset].node_uname, NULL, state); if (safe_str_eq(CRM_NODE_ACTIVE, state)) { /* Heartbeat doesn't send status notifications for nodes that were already part of the cluster */ crm_update_peer_proc(__FUNCTION__, peer, crm_proc_heartbeat, ONLINESTATUS); /* Nor does it send status notifications for processes that were already active */ crm_update_peer_proc(__FUNCTION__, peer, crm_proc_crmd, ONLINESTATUS); } return peer; } gboolean send_ha_message(ll_cluster_t * hb_conn, xmlNode * xml, const char *node, gboolean force_ordered) { gboolean all_is_good = TRUE; HA_Message *msg = convert_xml_message(xml); if (msg == NULL) { crm_err("cant send NULL message"); all_is_good = FALSE; } else if (hb_conn == NULL) { crm_err("No heartbeat connection specified"); all_is_good = FALSE; } else if (hb_conn->llc_ops->chan_is_connected(hb_conn) == FALSE) { crm_err("Not connected to Heartbeat"); all_is_good = FALSE; } else if (node != NULL) { if (hb_conn->llc_ops->send_ordered_nodemsg(hb_conn, msg, node) != HA_OK) { all_is_good = FALSE; crm_err("Send failed"); } } else if (force_ordered) { if (hb_conn->llc_ops->send_ordered_clustermsg(hb_conn, msg) != HA_OK) { all_is_good = FALSE; crm_err("Broadcast Send failed"); } } else { if (hb_conn->llc_ops->sendclustermsg(hb_conn, msg) != HA_OK) { all_is_good = FALSE; crm_err("Broadcast Send failed"); } } if (all_is_good == FALSE && hb_conn != NULL) { IPC_Channel *ipc = NULL; IPC_Queue *send_q = NULL; if (hb_conn->llc_ops->chan_is_connected(hb_conn) != HA_OK) { ipc = hb_conn->llc_ops->ipcchan(hb_conn); } if (ipc != NULL) { /* ipc->ops->resume_io(ipc); */ send_q = ipc->send_queue; } if (send_q != NULL) { CRM_CHECK(send_q->current_qlen < send_q->max_qlen,;); } } if (all_is_good) { crm_log_xml_trace(xml, "outbound"); } else { crm_log_xml_warn(xml, "outbound"); } if(msg != NULL) { ha_msg_del(msg); } return all_is_good; } gboolean ha_msg_dispatch(ll_cluster_t * cluster_conn, gpointer user_data) { IPC_Channel *channel = NULL; crm_trace("Invoked"); if (cluster_conn != NULL) { channel = cluster_conn->llc_ops->ipcchan(cluster_conn); } CRM_CHECK(cluster_conn != NULL, return FALSE); CRM_CHECK(channel != NULL, return FALSE); if (channel != NULL && IPC_ISRCONN(channel)) { if (cluster_conn->llc_ops->msgready(cluster_conn) == 0) { crm_trace("no message ready yet"); } /* invoke the callbacks but dont block */ cluster_conn->llc_ops->rcvmsg(cluster_conn, 0); } if (channel == NULL || channel->ch_status != IPC_CONNECT) { crm_info("Lost connection to heartbeat service."); return FALSE; } return TRUE; } gboolean register_heartbeat_conn(ll_cluster_t * hb_cluster, char **uuid, char **uname, void (*hb_message) (HA_Message * msg, void *private_data), void (*hb_destroy) (gpointer user_data)) { const char *const_uuid = NULL; const char *const_uname = NULL; crm_debug("Signing in with Heartbeat"); if (hb_cluster->llc_ops->signon(hb_cluster, crm_system_name) != HA_OK) { crm_err("Cannot sign on with heartbeat: %s", hb_cluster->llc_ops->errmsg(hb_cluster)); return FALSE; } if (HA_OK != hb_cluster->llc_ops->set_msg_callback(hb_cluster, crm_system_name, hb_message, hb_cluster)) { crm_err("Cannot set msg callback: %s", hb_cluster->llc_ops->errmsg(hb_cluster)); return FALSE; } { void *handle = NULL; GLLclusterSource *(*g_main_add_cluster) (int priority, ll_cluster_t * api, gboolean can_recurse, gboolean(*dispatch) (ll_cluster_t * source_data, gpointer user_data), gpointer userdata, GDestroyNotify notify) = find_library_function(&handle, HEARTBEAT_LIBRARY, "G_main_add_ll_cluster", 1); (*g_main_add_cluster) (G_PRIORITY_HIGH, hb_cluster, FALSE, ha_msg_dispatch, hb_cluster, hb_destroy); dlclose(handle); } const_uname = hb_cluster->llc_ops->get_mynodeid(hb_cluster); CRM_CHECK(const_uname != NULL, return FALSE); const_uuid = get_uuid(const_uname); CRM_CHECK(const_uuid != NULL, return FALSE); crm_info("Hostname: %s", const_uname); crm_info("UUID: %s", const_uuid); if (uname) { *uname = strdup(const_uname); } if (uuid) { *uuid = strdup(const_uuid); } return TRUE; } gboolean ccm_have_quorum(oc_ed_t event) { if (event == OC_EV_MS_NEW_MEMBERSHIP || event == OC_EV_MS_PRIMARY_RESTORED) { return TRUE; } return FALSE; } const char * ccm_event_name(oc_ed_t event) { if (event == OC_EV_MS_NEW_MEMBERSHIP) { return "NEW MEMBERSHIP"; } else if (event == OC_EV_MS_NOT_PRIMARY) { return "NOT PRIMARY"; } else if (event == OC_EV_MS_PRIMARY_RESTORED) { return "PRIMARY RESTORED"; } else if (event == OC_EV_MS_EVICTED) { return "EVICTED"; } else if (event == OC_EV_MS_INVALID) { return "INVALID"; } return "NO QUORUM MEMBERSHIP"; } #endif diff --git a/lib/cluster/legacy.c b/lib/cluster/legacy.c index fe9b3ecc31..987900382e 100644 --- a/lib/cluster/legacy.c +++ b/lib/cluster/legacy.c @@ -1,1391 +1,1390 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include -#include #include #include #include #include -#include "stack.h" + #if SUPPORT_COROSYNC # include # include # include cpg_handle_t pcmk_cpg_handle = 0; struct cpg_name pcmk_cpg_group = { .length = 0, .value[0] = 0, }; #endif #if HAVE_CMAP # include #endif #if SUPPORT_CMAN # include cman_handle_t pcmk_cman_handle = NULL; #endif static char *pcmk_uname = NULL; static int pcmk_uname_len = 0; static uint32_t pcmk_nodeid = 0; int ais_membership_timer = 0; gboolean ais_membership_force = FALSE; int ais_dispatch(gpointer user_data); #define cs_repeat(counter, max, code) do { \ code; \ if(rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { \ counter++; \ crm_debug("Retrying operation after %ds", counter); \ sleep(counter); \ } else { \ break; \ } \ } while(counter < max) enum crm_ais_msg_types text2msg_type(const char *text) { int type = crm_msg_none; CRM_CHECK(text != NULL, return type); if (safe_str_eq(text, "ais")) { type = crm_msg_ais; } else if (safe_str_eq(text, "crm_plugin")) { type = crm_msg_ais; } else if (safe_str_eq(text, CRM_SYSTEM_CIB)) { type = crm_msg_cib; } else if (safe_str_eq(text, CRM_SYSTEM_CRMD)) { type = crm_msg_crmd; } else if (safe_str_eq(text, CRM_SYSTEM_DC)) { type = crm_msg_crmd; } else if (safe_str_eq(text, CRM_SYSTEM_TENGINE)) { type = crm_msg_te; } else if (safe_str_eq(text, CRM_SYSTEM_PENGINE)) { type = crm_msg_pe; } else if (safe_str_eq(text, CRM_SYSTEM_LRMD)) { type = crm_msg_lrmd; } else if (safe_str_eq(text, CRM_SYSTEM_STONITHD)) { type = crm_msg_stonithd; } else if (safe_str_eq(text, "stonith-ng")) { type = crm_msg_stonith_ng; } else if (safe_str_eq(text, "attrd")) { type = crm_msg_attrd; } else { /* This will normally be a transient client rather than * a cluster daemon. Set the type to the pid of the client */ int scan_rc = sscanf(text, "%d", &type); if (scan_rc != 1) { /* Ensure its sane */ type = crm_msg_none; } } return type; } char * get_ais_data(const AIS_Message * msg) { int rc = BZ_OK; char *uncompressed = NULL; unsigned int new_size = msg->size + 1; if (msg->is_compressed == FALSE) { crm_trace("Returning uncompressed message data"); uncompressed = strdup(msg->data); } else { crm_trace("Decompressing message data"); uncompressed = calloc(1, new_size); rc = BZ2_bzBuffToBuffDecompress(uncompressed, &new_size, (char *)msg->data, msg->compressed_size, 1, 0); CRM_ASSERT(rc == BZ_OK); CRM_ASSERT(new_size == msg->size); } return uncompressed; } #if SUPPORT_COROSYNC int ais_fd_sync = -1; int ais_fd_async = -1; /* never send messages via this channel */ void *ais_ipc_ctx = NULL; hdb_handle_t ais_ipc_handle = 0; static char *ais_cluster_name = NULL; gboolean get_ais_nodeid(uint32_t * id, char **uname) { struct iovec iov; int retries = 0; int rc = CS_OK; cs_ipc_header_response_t header; struct crm_ais_nodeid_resp_s answer; header.error = CS_OK; header.id = crm_class_nodeid; header.size = sizeof(cs_ipc_header_response_t); CRM_CHECK(id != NULL, return FALSE); CRM_CHECK(uname != NULL, return FALSE); iov.iov_base = &header; iov.iov_len = header.size; retry: errno = 0; rc = coroipcc_msg_send_reply_receive(ais_ipc_handle, &iov, 1, &answer, sizeof(answer)); if (rc == CS_OK) { CRM_CHECK(answer.header.size == sizeof(struct crm_ais_nodeid_resp_s), crm_err("Odd message: id=%d, size=%d, error=%d", answer.header.id, answer.header.size, answer.header.error)); CRM_CHECK(answer.header.id == crm_class_nodeid, crm_err("Bad response id: %d", answer.header.id)); } if ((rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) && retries < 20) { retries++; crm_info("Peer overloaded: Re-sending message (Attempt %d of 20)", retries); sleep(retries); /* Proportional back off */ goto retry; } if (rc != CS_OK) { crm_err("Sending nodeid request: FAILED (rc=%d): %s", rc, ais_error2text(rc)); return FALSE; } else if (answer.header.error != CS_OK) { crm_err("Bad response from peer: (rc=%d): %s", rc, ais_error2text(rc)); return FALSE; } crm_info("Server details: id=%u uname=%s cname=%s", answer.id, answer.uname, answer.cname); *id = answer.id; *uname = strdup(answer.uname); ais_cluster_name = strdup(answer.cname); return TRUE; } gboolean crm_get_cluster_name(char **cname) { CRM_CHECK(cname != NULL, return FALSE); if (ais_cluster_name) { *cname = strdup(ais_cluster_name); return TRUE; } return FALSE; } gboolean send_ais_text(int class, const char *data, gboolean local, const char *node, enum crm_ais_msg_types dest) { static int msg_id = 0; static int local_pid = 0; enum cluster_type_e cluster_type = get_cluster_type(); int retries = 0; int rc = CS_OK; int buf_len = sizeof(cs_ipc_header_response_t); char *buf = NULL; struct iovec iov; const char *transport = "pcmk"; cs_ipc_header_response_t *header = NULL; AIS_Message *ais_msg = NULL; enum crm_ais_msg_types sender = text2msg_type(crm_system_name); /* There are only 6 handlers registered to crm_lib_service in plugin.c */ CRM_CHECK(class < 6, crm_err("Invalid message class: %d", class); return FALSE); if (data == NULL) { data = ""; } if (local_pid == 0) { local_pid = getpid(); } if (sender == crm_msg_none) { sender = local_pid; } ais_msg = calloc(1, sizeof(AIS_Message)); ais_msg->id = msg_id++; ais_msg->header.id = class; ais_msg->header.error = CS_OK; ais_msg->host.type = dest; ais_msg->host.local = local; if (node) { ais_msg->host.size = strlen(node); memset(ais_msg->host.uname, 0, MAX_NAME); memcpy(ais_msg->host.uname, node, ais_msg->host.size); ais_msg->host.id = 0; } else { ais_msg->host.size = 0; memset(ais_msg->host.uname, 0, MAX_NAME); ais_msg->host.id = 0; } ais_msg->sender.id = 0; ais_msg->sender.type = sender; ais_msg->sender.pid = local_pid; ais_msg->sender.size = pcmk_uname_len; memset(ais_msg->sender.uname, 0, MAX_NAME); memcpy(ais_msg->sender.uname, pcmk_uname, ais_msg->sender.size); ais_msg->size = 1 + strlen(data); if (ais_msg->size < CRM_BZ2_THRESHOLD) { failback: ais_msg = realloc(ais_msg, sizeof(AIS_Message) + ais_msg->size); memcpy(ais_msg->data, data, ais_msg->size); } else { char *compressed = NULL; char *uncompressed = strdup(data); unsigned int len = (ais_msg->size * 1.1) + 600; /* recomended size */ crm_trace("Compressing message payload"); compressed = malloc( len); rc = BZ2_bzBuffToBuffCompress(compressed, &len, uncompressed, ais_msg->size, CRM_BZ2_BLOCKS, 0, CRM_BZ2_WORK); free(uncompressed); if (rc != BZ_OK) { crm_err("Compression failed: %d", rc); free(compressed); goto failback; } ais_msg = realloc(ais_msg, sizeof(AIS_Message) + len + 1); memcpy(ais_msg->data, compressed, len); ais_msg->data[len] = 0; free(compressed); ais_msg->is_compressed = TRUE; ais_msg->compressed_size = len; crm_trace("Compression details: %d -> %d", ais_msg->size, ais_data_len(ais_msg)); } ais_msg->header.size = sizeof(AIS_Message) + ais_data_len(ais_msg); crm_trace("Sending%s message %d to %s.%s (data=%d, total=%d)", ais_msg->is_compressed ? " compressed" : "", ais_msg->id, ais_dest(&(ais_msg->host)), msg_type2text(dest), ais_data_len(ais_msg), ais_msg->header.size); iov.iov_base = ais_msg; iov.iov_len = ais_msg->header.size; buf = realloc(buf, buf_len); do { if (rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { retries++; crm_info("Peer overloaded or membership in flux:" " Re-sending message (Attempt %d of 20)", retries); sleep(retries); /* Proportional back off */ } errno = 0; switch (cluster_type) { case pcmk_cluster_corosync: CRM_ASSERT(FALSE/*Not supported here*/); break; case pcmk_cluster_classic_ais: rc = coroipcc_msg_send_reply_receive(ais_ipc_handle, &iov, 1, buf, buf_len); header = (cs_ipc_header_response_t *) buf; if (rc == CS_OK) { CRM_CHECK(header->size == sizeof(cs_ipc_header_response_t), crm_err("Odd message: id=%d, size=%d, class=%d, error=%d", header->id, header->size, class, header->error)); CRM_ASSERT(buf_len >= header->size); CRM_CHECK(header->id == CRM_MESSAGE_IPC_ACK, crm_err("Bad response id (%d) for request (%d)", header->id, ais_msg->header.id)); CRM_CHECK(header->error == CS_OK, rc = header->error); } break; case pcmk_cluster_cman: transport = "cpg"; CRM_CHECK(dest != crm_msg_ais, rc = CS_ERR_MESSAGE_ERROR; goto bail); rc = cpg_mcast_joined(pcmk_cpg_handle, CPG_TYPE_AGREED, &iov, 1); if (rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { cpg_flow_control_state_t fc_state = CPG_FLOW_CONTROL_DISABLED; int rc2 = cpg_flow_control_state_get(pcmk_cpg_handle, &fc_state); if (rc2 == CS_OK && fc_state == CPG_FLOW_CONTROL_ENABLED) { crm_warn("Connection overloaded, cannot send messages"); goto bail; } else if (rc2 != CS_OK) { crm_warn("Could not determin the connection state: %s (%d)", ais_error2text(rc2), rc2); goto bail; } } break; case pcmk_cluster_unknown: case pcmk_cluster_invalid: case pcmk_cluster_heartbeat: CRM_ASSERT(is_openais_cluster()); break; } } while ((rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) && retries < 20); bail: if (rc != CS_OK) { crm_perror(LOG_ERR, "Sending message %d via %s: FAILED (rc=%d): %s", ais_msg->id, transport, rc, ais_error2text(rc)); } else { crm_trace("Message %d: sent", ais_msg->id); } free(buf); free(ais_msg); return (rc == CS_OK); } gboolean send_ais_message(xmlNode * msg, gboolean local, const char *node, enum crm_ais_msg_types dest) { gboolean rc = TRUE; char *data = NULL; if (is_classic_ais_cluster()) { if (ais_fd_async < 0) { crm_err("Not connected to AIS: %d", ais_fd_async); return FALSE; } } data = dump_xml_unformatted(msg); rc = send_ais_text(crm_class_cluster, data, local, node, dest); free(data); return rc; } void terminate_ais_connection(void) { crm_notice("Disconnecting from Corosync"); if (is_classic_ais_cluster()) { if(ais_ipc_handle) { crm_trace("Disconnecting plugin"); coroipcc_service_disconnect(ais_ipc_handle); ais_ipc_handle = 0; } else { crm_info("No plugin connection"); } } else { if(pcmk_cpg_handle) { crm_trace("Disconnecting CPG"); cpg_leave(pcmk_cpg_handle, &pcmk_cpg_group); cpg_finalize(pcmk_cpg_handle); pcmk_cpg_handle = 0; } else { crm_info("No CPG connection"); } } # if SUPPORT_CMAN if (is_cman_cluster()) { if(pcmk_cman_handle) { crm_trace("Disconnecting cman"); cman_stop_notification(pcmk_cman_handle); cman_finish(pcmk_cman_handle); } else { crm_info("No cman connection"); } } # endif ais_fd_async = -1; ais_fd_sync = -1; } static crm_node_t * crm_update_ais_node(xmlNode * member, long long seq) { const char *id_s = crm_element_value(member, "id"); const char *addr = crm_element_value(member, "addr"); const char *uname = crm_element_value(member, "uname"); const char *state = crm_element_value(member, "state"); const char *born_s = crm_element_value(member, "born"); const char *seen_s = crm_element_value(member, "seen"); const char *votes_s = crm_element_value(member, "votes"); const char *procs_s = crm_element_value(member, "processes"); int votes = crm_int_helper(votes_s, NULL); unsigned int id = crm_int_helper(id_s, NULL); unsigned int procs = crm_int_helper(procs_s, NULL); /* TODO: These values will contain garbage if version < 0.7.1 */ uint64_t born = crm_int_helper(born_s, NULL); uint64_t seen = crm_int_helper(seen_s, NULL); return crm_update_peer(__FUNCTION__, id, born, seen, votes, procs, uname, uname, addr, state); } static gboolean ais_dispatch_message(AIS_Message * msg, gboolean(*dispatch) (AIS_Message *, char *, int)) { char *data = NULL; char *uncompressed = NULL; xmlNode *xml = NULL; CRM_ASSERT(msg != NULL); crm_trace("Got new%s message (size=%d, %d, %d)", msg->is_compressed ? " compressed" : "", ais_data_len(msg), msg->size, msg->compressed_size); data = msg->data; if (msg->is_compressed && msg->size > 0) { int rc = BZ_OK; unsigned int new_size = msg->size + 1; if (check_message_sanity(msg, NULL) == FALSE) { goto badmsg; } crm_trace("Decompressing message data"); uncompressed = calloc(1, new_size); rc = BZ2_bzBuffToBuffDecompress(uncompressed, &new_size, data, msg->compressed_size, 1, 0); if (rc != BZ_OK) { crm_err("Decompression failed: %d", rc); goto badmsg; } CRM_ASSERT(rc == BZ_OK); CRM_ASSERT(new_size == msg->size); data = uncompressed; } else if (check_message_sanity(msg, data) == FALSE) { goto badmsg; } else if (safe_str_eq("identify", data)) { int pid = getpid(); char *pid_s = crm_itoa(pid); send_ais_text(crm_class_cluster, pid_s, TRUE, NULL, crm_msg_ais); free(pid_s); goto done; } if (msg->header.id != crm_class_members) { crm_update_peer(__FUNCTION__, msg->sender.id, 0, 0, 0, 0, msg->sender.uname, msg->sender.uname, NULL, NULL); } if (msg->header.id == crm_class_rmpeer) { uint32_t id = crm_int_helper(data, NULL); crm_info("Removing peer %s/%u", data, id); reap_crm_member(id); goto done; } else if (is_classic_ais_cluster()) { if (msg->header.id == crm_class_members || msg->header.id == crm_class_quorum) { xmlNode *node = NULL; const char *value = NULL; gboolean quorate = FALSE; xml = string2xml(data); if (xml == NULL) { crm_err("Invalid membership update: %s", data); goto badmsg; } value = crm_element_value(xml, "quorate"); CRM_CHECK(value != NULL, crm_log_xml_err(xml, "No quorum value:"); goto badmsg); if (crm_is_true(value)) { quorate = TRUE; } value = crm_element_value(xml, "id"); CRM_CHECK(value != NULL, crm_log_xml_err(xml, "No membership id"); goto badmsg); crm_peer_seq = crm_int_helper(value, NULL); if (quorate != crm_have_quorum) { crm_notice("Membership %s: quorum %s", value, quorate ? "acquired" : "lost"); crm_have_quorum = quorate; } else { crm_info("Membership %s: quorum %s", value, quorate ? "retained" : "still lost"); } for (node = __xml_first_child(xml); node != NULL; node = __xml_next(node)) { crm_update_ais_node(node, crm_peer_seq); } } } crm_trace("Payload: %s", data); if (dispatch != NULL) { dispatch(msg, data, 0); } done: free(uncompressed); free_xml(xml); return TRUE; badmsg: crm_err("Invalid message (id=%d, dest=%s:%s, from=%s:%s.%d):" " min=%d, total=%d, size=%d, bz2_size=%d", msg->id, ais_dest(&(msg->host)), msg_type2text(msg->host.type), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, (int)sizeof(AIS_Message), msg->header.size, msg->size, msg->compressed_size); goto done; } int ais_dispatch(gpointer user_data) { int rc = CS_OK; gboolean good = TRUE; gboolean(*dispatch) (AIS_Message *, char *, int) = user_data; do { char *buffer = NULL; rc = coroipcc_dispatch_get(ais_ipc_handle, (void **)&buffer, 0); if (rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { return 0; } if (rc != CS_OK) { crm_perror(LOG_ERR, "Receiving message body failed: (%d) %s", rc, ais_error2text(rc)); return -1; } if (buffer == NULL) { /* NULL is a legal "no message afterall" value */ return 0; } good = ais_dispatch_message((AIS_Message *) buffer, dispatch); coroipcc_dispatch_put(ais_ipc_handle); } while (good && ais_ipc_handle); if(good) { return 0; } return -1; } static void ais_destroy(gpointer user_data) { crm_err("AIS connection terminated"); ais_fd_sync = -1; exit(1); } # if SUPPORT_CMAN static int pcmk_cman_dispatch(gpointer user_data) { int rc = cman_dispatch(pcmk_cman_handle, CMAN_DISPATCH_ALL); if (rc < 0) { crm_err("Connection to cman failed: %d", rc); return FALSE; } return TRUE; } # define MAX_NODES 256 static void cman_event_callback(cman_handle_t handle, void *privdata, int reason, int arg) { int rc = 0, lpc = 0, node_count = 0; cman_cluster_t cluster; static cman_node_t cman_nodes[MAX_NODES]; gboolean(*dispatch) (unsigned long long, gboolean) = privdata; switch (reason) { case CMAN_REASON_STATECHANGE: memset(&cluster, 0, sizeof(cluster)); rc = cman_get_cluster(pcmk_cman_handle, &cluster); if (rc < 0) { crm_err("Couldn't query cman cluster details: %d %d", rc, errno); return; } crm_peer_seq = cluster.ci_generation; if (arg != crm_have_quorum) { crm_notice("Membership %llu: quorum %s", crm_peer_seq, arg ? "acquired" : "lost"); crm_have_quorum = arg; } else { crm_info("Membership %llu: quorum %s", crm_peer_seq, arg ? "retained" : "still lost"); } rc = cman_get_nodes(pcmk_cman_handle, MAX_NODES, &node_count, cman_nodes); if (rc < 0) { crm_err("Couldn't query cman node list: %d %d", rc, errno); return; } for (lpc = 0; lpc < node_count; lpc++) { if (cman_nodes[lpc].cn_nodeid == 0) { /* Never allow node ID 0 to be considered a member #315711 */ cman_nodes[lpc].cn_member = 0; } crm_update_peer(__FUNCTION__, cman_nodes[lpc].cn_nodeid, cman_nodes[lpc].cn_incarnation, cman_nodes[lpc].cn_member ? crm_peer_seq : 0, 0, 0, cman_nodes[lpc].cn_name, cman_nodes[lpc].cn_name, NULL, cman_nodes[lpc].cn_member ? CRM_NODE_MEMBER : CRM_NODE_LOST); } if (dispatch) { dispatch(crm_peer_seq, crm_have_quorum); } break; case CMAN_REASON_TRY_SHUTDOWN: /* Always reply with a negative - pacemaker needs to be stopped first */ crm_info("CMAN wants to shut down: %s", arg ? "forced" : "optional"); cman_replyto_shutdown(pcmk_cman_handle, 0); break; case CMAN_REASON_CONFIG_UPDATE: /* Ignore */ break; } } # endif gboolean init_cman_connection(gboolean(*dispatch) (unsigned long long, gboolean), void (*destroy) (gpointer)) { # if SUPPORT_CMAN int rc = -1, fd = -1; cman_cluster_t cluster; struct mainloop_fd_callbacks cman_fd_callbacks = { .dispatch = pcmk_cman_dispatch, .destroy = destroy, }; crm_info("Configuring Pacemaker to obtain quorum from cman"); memset(&cluster, 0, sizeof(cluster)); pcmk_cman_handle = cman_init(dispatch); if (pcmk_cman_handle == NULL || cman_is_active(pcmk_cman_handle) == FALSE) { crm_err("Couldn't connect to cman"); goto cman_bail; } rc = cman_get_cluster(pcmk_cman_handle, &cluster); if (rc < 0) { crm_err("Couldn't query cman cluster details: %d %d", rc, errno); goto cman_bail; } ais_cluster_name = strdup(cluster.ci_name); rc = cman_start_notification(pcmk_cman_handle, cman_event_callback); if (rc < 0) { crm_err("Couldn't register for cman notifications: %d %d", rc, errno); goto cman_bail; } /* Get the current membership state */ cman_event_callback(pcmk_cman_handle, dispatch, CMAN_REASON_STATECHANGE, cman_is_quorate(pcmk_cman_handle)); fd = cman_get_fd(pcmk_cman_handle); mainloop_add_fd("cman", fd, dispatch, &cman_fd_callbacks); cman_bail: if (rc < 0) { cman_finish(pcmk_cman_handle); return FALSE; } # else crm_err("cman qorum is not supported in this build"); exit(100); # endif return TRUE; } # ifdef SUPPORT_COROSYNC gboolean(*pcmk_cpg_dispatch_fn) (AIS_Message *, char *, int) = NULL; static int pcmk_cpg_dispatch(gpointer user_data) { int rc = 0; pcmk_cpg_dispatch_fn = user_data; rc = cpg_dispatch(pcmk_cpg_handle, CS_DISPATCH_ALL); if (rc != CS_OK) { crm_err("Connection to the CPG API failed: %d", rc); return -1; } return 0; } static void pcmk_cpg_deliver(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { AIS_Message *ais_msg = (AIS_Message *) msg; if (ais_msg->sender.id > 0 && ais_msg->sender.id != nodeid) { crm_err("Nodeid mismatch from %d.%d: claimed nodeid=%u", nodeid, pid, ais_msg->sender.id); return; } else if (ais_msg->host.size != 0 && safe_str_neq(ais_msg->host.uname, pcmk_uname)) { /* Not for us */ return; } ais_msg->sender.id = nodeid; if (ais_msg->sender.size == 0) { crm_node_t *peer = crm_get_peer(nodeid, NULL); if (peer == NULL) { crm_err("Peer with nodeid=%u is unknown", nodeid); } else if (peer->uname == NULL) { crm_err("No uname for peer with nodeid=%u", nodeid); } else { crm_notice("Fixing uname for peer with nodeid=%u", nodeid); ais_msg->sender.size = strlen(peer->uname); memset(ais_msg->sender.uname, 0, MAX_NAME); memcpy(ais_msg->sender.uname, peer->uname, ais_msg->sender.size); } } ais_dispatch_message(ais_msg, pcmk_cpg_dispatch_fn); } static void pcmk_cpg_membership(cpg_handle_t handle, const struct cpg_name *groupName, const struct cpg_address *member_list, size_t member_list_entries, const struct cpg_address *left_list, size_t left_list_entries, const struct cpg_address *joined_list, size_t joined_list_entries) { int i; for (i = 0; i < member_list_entries; i++) { crm_node_t *peer = crm_get_peer(member_list[i].nodeid, NULL); crm_debug("Member[%d] %d ", i, member_list[i].nodeid); crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, ONLINESTATUS); } for (i = 0; i < left_list_entries; i++) { crm_node_t *peer = crm_get_peer(left_list[i].nodeid, NULL); crm_debug("Left[%d] %d ", i, left_list[i].nodeid); crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, OFFLINESTATUS); } } cpg_callbacks_t cpg_callbacks = { .cpg_deliver_fn = pcmk_cpg_deliver, .cpg_confchg_fn = pcmk_cpg_membership, }; # endif static gboolean init_cpg_connection(gboolean(*dispatch) (AIS_Message *, char *, int), void (*destroy) (gpointer), uint32_t * nodeid) { # ifdef SUPPORT_COROSYNC int rc = -1; int fd = 0; int retries = 0; crm_node_t *peer = NULL; struct mainloop_fd_callbacks cpg_fd_callbacks = { .dispatch = pcmk_cpg_dispatch, .destroy = destroy, }; strcpy(pcmk_cpg_group.value, crm_system_name); pcmk_cpg_group.length = strlen(crm_system_name) + 1; cs_repeat(retries, 30, rc = cpg_initialize(&pcmk_cpg_handle, &cpg_callbacks)); if (rc != CS_OK) { crm_err("Could not connect to the Cluster Process Group API: %d\n", rc); goto bail; } retries = 0; cs_repeat(retries, 30, rc = cpg_local_get(pcmk_cpg_handle, (unsigned int *)nodeid)); if (rc != CS_OK) { crm_err("Could not get local node id from the CPG API"); goto bail; } retries = 0; cs_repeat(retries, 30, rc = cpg_join(pcmk_cpg_handle, &pcmk_cpg_group)); if (rc != CS_OK) { crm_err("Could not join the CPG group '%s': %d", crm_system_name, rc); goto bail; } rc = cpg_fd_get(pcmk_cpg_handle, &fd); if (rc != CS_OK) { crm_err("Could not obtain the CPG API connection: %d\n", rc); goto bail; } mainloop_add_fd("corosync-cpg", fd, dispatch, &cpg_fd_callbacks); bail: if (rc != CS_OK) { cpg_finalize(pcmk_cpg_handle); return FALSE; } peer = crm_get_peer(pcmk_nodeid, pcmk_uname); crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, ONLINESTATUS); # else crm_err("The Corosync CPG API is not supported in this build"); exit(100); # endif return TRUE; } gboolean init_quorum_connection(gboolean(*dispatch) (unsigned long long, gboolean), void (*destroy) (gpointer)) { crm_err("The Corosync quorum API is not supported in this build"); exit(100); return TRUE; } static gboolean init_ais_connection_classic(gboolean(*dispatch) (AIS_Message *, char *, int), void (*destroy) (gpointer), char **our_uuid, char **our_uname, int *nodeid) { int rc; int pid = 0; char *pid_s = NULL; struct utsname name; struct mainloop_fd_callbacks ais_fd_callbacks = { .dispatch = ais_dispatch, .destroy = destroy, }; crm_info("Creating connection to our Corosync plugin"); rc = coroipcc_service_connect(COROSYNC_SOCKET_NAME, PCMK_SERVICE_ID, AIS_IPC_MESSAGE_SIZE, AIS_IPC_MESSAGE_SIZE, AIS_IPC_MESSAGE_SIZE, &ais_ipc_handle); if (ais_ipc_handle) { coroipcc_fd_get(ais_ipc_handle, &ais_fd_async); } else { crm_info("Connection to our AIS plugin (%d) failed: %s (%d)", PCMK_SERVICE_ID, strerror(errno), errno); return FALSE; } if (ais_fd_async <= 0 && rc == CS_OK) { crm_err("No context created, but connection reported 'ok'"); rc = CS_ERR_LIBRARY; } if (rc != CS_OK) { crm_info("Connection to our AIS plugin (%d) failed: %s (%d)", PCMK_SERVICE_ID, ais_error2text(rc), rc); } if (rc != CS_OK) { return FALSE; } if (destroy == NULL) { destroy = ais_destroy; } mainloop_add_fd("corosync-plugin", ais_fd_async, dispatch, &ais_fd_callbacks); crm_info("AIS connection established"); pid = getpid(); pid_s = crm_itoa(pid); send_ais_text(crm_class_cluster, pid_s, TRUE, NULL, crm_msg_ais); free(pid_s); if (uname(&name) < 0) { crm_perror(LOG_ERR, "Could not determin the current host"); exit(100); } get_ais_nodeid(&pcmk_nodeid, &pcmk_uname); if (safe_str_neq(name.nodename, pcmk_uname)) { crm_crit("Node name mismatch! OpenAIS supplied %s, our lookup returned %s", pcmk_uname, name.nodename); crm_notice ("Node name mismatches usually occur when assigned automatically by DHCP servers"); crm_notice("If this node was part of the cluster with a different name," " you will need to remove the old entry with crm_node --remove"); } return TRUE; } static int pcmk_mcp_dispatch(const char *buffer, ssize_t length, gpointer userdata) { xmlNode *msg = string2xml(buffer); if (msg && is_classic_ais_cluster()) { xmlNode *node = NULL; for (node = __xml_first_child(msg); node != NULL; node = __xml_next(node)) { int id = 0; int children = 0; const char *uname = crm_element_value(node, "uname"); crm_element_value_int(node, "id", &id); crm_element_value_int(node, "processes", &children); if (id == 0) { crm_log_xml_err(msg, "Bad Update"); } else { crm_update_peer(__FUNCTION__, id, 0, 0, 0, children, NULL, uname, NULL, NULL); } } } free_xml(msg); return 0; } static void pcmk_mcp_destroy(gpointer user_data) { void (*callback)(gpointer data) = user_data; if(callback) { callback(NULL); } } gboolean init_ais_connection(gboolean(*dispatch) (AIS_Message *, char *, int), void (*destroy) (gpointer), char **our_uuid, char **our_uname, int *nodeid) { int retries = 0; static struct ipc_client_callbacks mcp_callbacks = { .dispatch = pcmk_mcp_dispatch, .destroy = pcmk_mcp_destroy }; while (retries++ < 30) { int rc = init_ais_connection_once(dispatch, destroy, our_uuid, our_uname, nodeid); switch (rc) { case CS_OK: if (getenv("HA_mcp")) { xmlNode *poke = create_xml_node(NULL, "poke"); mainloop_io_t *ipc = mainloop_add_ipc_client(CRM_SYSTEM_MCP, 0, destroy, &mcp_callbacks); crm_ipc_send(mainloop_get_ipc_client(ipc), poke, NULL, 0); free_xml(poke); } return TRUE; break; case CS_ERR_TRY_AGAIN: case CS_ERR_QUEUE_FULL: break; default: return FALSE; } } crm_err("Retry count exceeded: %d", retries); return FALSE; } static char * get_local_node_name(void) { char *name = NULL; struct utsname res; if (is_cman_cluster()) { # if SUPPORT_CMAN cman_node_t us; cman_handle_t cman; cman = cman_init(NULL); if (cman != NULL && cman_is_active(cman)) { us.cn_name[0] = 0; cman_get_node(cman, CMAN_NODEID_US, &us); name = strdup(us.cn_name); crm_info("Using CMAN node name: %s", name); } else { crm_err("Couldn't determin node name from CMAN"); } cman_finish(cman); # endif } else if (uname(&res) < 0) { crm_perror(LOG_ERR, "Could not determin the current host"); exit(100); } else { name = strdup(res.nodename); } return name; } extern int set_cluster_type(enum cluster_type_e type); gboolean init_ais_connection_once(gboolean(*dispatch) (AIS_Message *, char *, int), void (*destroy) (gpointer), char **our_uuid, char **our_uname, int *nodeid) { enum cluster_type_e stack = get_cluster_type(); crm_peer_init(); /* Here we just initialize comms */ switch (stack) { case pcmk_cluster_classic_ais: if (init_ais_connection_classic(dispatch, destroy, our_uuid, &pcmk_uname, nodeid) == FALSE) { return FALSE; } break; case pcmk_cluster_cman: if (init_cpg_connection(dispatch, destroy, &pcmk_nodeid) == FALSE) { return FALSE; } pcmk_uname = get_local_node_name(); break; case pcmk_cluster_heartbeat: crm_info("Could not find an active corosync based cluster"); return FALSE; break; default: crm_err("Invalid cluster type: %s (%d)", name_for_cluster_type(stack), stack); return FALSE; break; } crm_info("Connection to '%s': established", name_for_cluster_type(stack)); CRM_ASSERT(pcmk_uname != NULL); pcmk_uname_len = strlen(pcmk_uname); if (pcmk_nodeid != 0) { /* Ensure the local node always exists */ crm_update_peer(__FUNCTION__, pcmk_nodeid, 0, 0, 0, 0, pcmk_uname, pcmk_uname, NULL, NULL); } if (our_uuid != NULL) { *our_uuid = get_corosync_uuid(pcmk_nodeid, pcmk_uname); } if (our_uname != NULL) { *our_uname = strdup(pcmk_uname); } if (nodeid != NULL) { *nodeid = pcmk_nodeid; } return TRUE; } gboolean check_message_sanity(const AIS_Message * msg, const char *data) { gboolean sane = TRUE; gboolean repaired = FALSE; int dest = msg->host.type; int tmp_size = msg->header.size - sizeof(AIS_Message); if (sane && msg->header.size == 0) { crm_warn("Message with no size"); sane = FALSE; } if (sane && msg->header.error != CS_OK) { crm_warn("Message header contains an error: %d", msg->header.error); sane = FALSE; } if (sane && ais_data_len(msg) != tmp_size) { crm_warn("Message payload size is incorrect: expected %d, got %d", ais_data_len(msg), tmp_size); sane = TRUE; } if (sane && ais_data_len(msg) == 0) { crm_warn("Message with no payload"); sane = FALSE; } if (sane && data && msg->is_compressed == FALSE) { int str_size = strlen(data) + 1; if (ais_data_len(msg) != str_size) { int lpc = 0; crm_warn("Message payload is corrupted: expected %d bytes, got %d", ais_data_len(msg), str_size); sane = FALSE; for (lpc = (str_size - 10); lpc < msg->size; lpc++) { if (lpc < 0) { lpc = 0; } crm_debug("bad_data[%d]: %d / '%c'", lpc, data[lpc], data[lpc]); } } } if (sane == FALSE) { crm_err("Invalid message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } else if (repaired) { crm_err ("Repaired message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } else { crm_trace ("Verfied message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } return sane; } #endif static int get_config_opt(confdb_handle_t config, hdb_handle_t object_handle, const char *key, char **value, const char *fallback) { size_t len = 0; char *env_key = NULL; const char *env_value = NULL; char buffer[256]; if (*value) { free(*value); *value = NULL; } if (object_handle > 0) { if (CS_OK == confdb_key_get(config, object_handle, key, strlen(key), &buffer, &len)) { *value = strdup(buffer); } } if (*value) { crm_info("Found '%s' for option: %s", *value, key); return 0; } env_key = crm_concat("HA", key, '_'); env_value = getenv(env_key); free(env_key); if (*value) { crm_info("Found '%s' in ENV for option: %s", *value, key); *value = strdup(env_value); return 0; } if (fallback) { crm_info("Defaulting to '%s' for option: %s", fallback, key); *value = strdup(fallback); } else { crm_info("No default for option: %s", key); } return -1; } static confdb_handle_t config_find_init(confdb_handle_t config) { cs_error_t rc = CS_OK; confdb_handle_t local_handle = OBJECT_PARENT_HANDLE; rc = confdb_object_find_start(config, local_handle); if (rc == CS_OK) { return local_handle; } else { crm_err("Couldn't create search context: %d", rc); } return 0; } static hdb_handle_t config_find_next(confdb_handle_t config, const char *name, confdb_handle_t top_handle) { cs_error_t rc = CS_OK; hdb_handle_t local_handle = 0; if (top_handle == 0) { crm_err("Couldn't search for %s: no valid context", name); return 0; } crm_trace("Searching for %s in " HDB_X_FORMAT, name, top_handle); rc = confdb_object_find(config, top_handle, name, strlen(name), &local_handle); if (rc != CS_OK) { crm_info("No additional configuration supplied for: %s", name); local_handle = 0; } else { crm_info("Processing additional %s options...", name); } return local_handle; } enum cluster_type_e find_corosync_variant(void) { confdb_handle_t config; enum cluster_type_e found = pcmk_cluster_unknown; int rc; char *value = NULL; confdb_handle_t top_handle = 0; hdb_handle_t local_handle = 0; static confdb_callbacks_t callbacks = { }; rc = confdb_initialize(&config, &callbacks); if (rc != CS_OK) { crm_debug("Could not initialize Cluster Configuration Database API instance error %d", rc); return found; } top_handle = config_find_init(config); local_handle = config_find_next(config, "service", top_handle); while (local_handle) { get_config_opt(config, local_handle, "name", &value, NULL); if (safe_str_eq("pacemaker", value)) { found = pcmk_cluster_classic_ais; get_config_opt(config, local_handle, "ver", &value, "0"); crm_trace("Found Pacemaker plugin version: %s", value); break; } local_handle = config_find_next(config, "service", top_handle); } if (found == pcmk_cluster_unknown) { top_handle = config_find_init(config); local_handle = config_find_next(config, "quorum", top_handle); get_config_opt(config, local_handle, "provider", &value, NULL); if (safe_str_eq("quorum_cman", value)) { crm_trace("Found CMAN quorum provider"); found = pcmk_cluster_cman; } } free(value); confdb_finalize(config); return found; } gboolean crm_is_corosync_peer_active(const crm_node_t * node) { enum crm_proc_flag proc = crm_proc_none; if (node == NULL) { crm_trace("NULL"); return FALSE; } else if(safe_str_neq(node->state, CRM_NODE_MEMBER)) { crm_trace("%s: state=%s", node->uname, node->state); return FALSE; } else if(is_cman_cluster() && (node->processes & crm_proc_cpg)) { /* If we can still talk to our peer process on that node, * then its also part of the corosync membership */ crm_trace("%s: processes=%.16x", node->uname, node->processes); return TRUE; } else if(is_classic_ais_cluster() && (node->processes & crm_proc_plugin) == 0) { crm_trace("%s: processes=%.16x", node->uname, node->processes); return FALSE; } proc = text2proc(crm_system_name); if(proc != crm_proc_none && (node->processes & proc) == 0) { crm_trace("%s: proc %.16x not in %.16x", node->uname, proc, node->processes); return FALSE; } return TRUE; } diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c index 88c98a6ed7..0fd05bfb29 100644 --- a/lib/cluster/membership.c +++ b/lib/cluster/membership.c @@ -1,437 +1,436 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include -#include -#include +#include #include #include GHashTable *crm_peer_id_cache = NULL; GHashTable *crm_peer_cache = NULL; unsigned long long crm_peer_seq = 0; gboolean crm_have_quorum = FALSE; gboolean crm_is_peer_active(const crm_node_t * node) { #if SUPPORT_COROSYNC if(is_openais_cluster()) { return crm_is_corosync_peer_active(node); } #endif #if SUPPORT_HEARTBEAT if(is_heartbeat_cluster()) { return crm_is_heartbeat_peer_active(node); } #endif crm_err("Unhandled cluster type: %s", name_for_cluster_type(get_cluster_type())); return FALSE; } static gboolean crm_reap_dead_member(gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; crm_node_t *search = user_data; if (search != NULL && node->id != search->id) { return FALSE; } else if (crm_is_peer_active(value) == FALSE) { crm_notice("Removing %s/%u from the membership list", node->uname, node->id); return TRUE; } return FALSE; } guint reap_crm_member(uint32_t id) { int matches = 0; crm_node_t *node = g_hash_table_lookup(crm_peer_id_cache, GUINT_TO_POINTER(id)); if (node == NULL) { crm_info("Peer %u is unknown", id); } else if (crm_is_peer_active(node)) { crm_warn("Peer %u/%s is still active", id, node->uname); } else { if (g_hash_table_remove(crm_peer_id_cache, GUINT_TO_POINTER(id))) { crm_notice("Removed dead peer %u from the uuid cache", id); } else { crm_warn("Peer %u/%s was not removed", id, node->uname); } matches = g_hash_table_foreach_remove(crm_peer_cache, crm_reap_dead_member, node); crm_notice("Removed %d dead peers with id=%u from the membership list", matches, id); } return matches; } static void crm_count_peer(gpointer key, gpointer value, gpointer user_data) { guint *count = user_data; crm_node_t *node = value; if (crm_is_peer_active(node)) { *count = *count + 1; } } guint crm_active_peers(void) { guint count = 0; g_hash_table_foreach(crm_peer_cache, crm_count_peer, &count); return count; } void destroy_crm_node(gpointer data) { crm_node_t *node = data; crm_trace("Destroying entry for node %u", node->id); free(node->addr); free(node->uname); free(node->state); free(node->uuid); free(node); } void crm_peer_init(void) { static gboolean initialized = FALSE; if (initialized) { return; } initialized = TRUE; crm_peer_destroy(); if (crm_peer_cache == NULL) { crm_peer_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, destroy_crm_node); } if (crm_peer_id_cache == NULL) { crm_peer_id_cache = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL); } } void crm_peer_destroy(void) { if (crm_peer_cache != NULL) { g_hash_table_destroy(crm_peer_cache); crm_peer_cache = NULL; } if (crm_peer_id_cache != NULL) { g_hash_table_destroy(crm_peer_id_cache); crm_peer_id_cache = NULL; } } void (*crm_status_callback) (enum crm_status_type, crm_node_t *, const void *) = NULL; void crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *)) { crm_status_callback = dispatch; } static crm_node_t * crm_new_peer(unsigned int id, const char *uname) { crm_node_t *node = NULL; CRM_CHECK(uname != NULL || id > 0, return NULL); crm_peer_init(); crm_debug("Creating entry for node %s/%u", uname, id); node = calloc(1, sizeof(crm_node_t)); node->state = strdup("unknown"); if (id > 0) { node->id = id; crm_info("Node %s now has id: %u", crm_str(uname), id); g_hash_table_replace(crm_peer_id_cache, GUINT_TO_POINTER(node->id), node); } if (uname) { node->uname = strdup(uname); CRM_ASSERT(node->uname != NULL); crm_info("Node %u is now known as %s", id, node->uname); g_hash_table_replace(crm_peer_cache, node->uname, node); if (node->uuid == NULL) { const char *uuid = get_node_uuid(id, node->uname); if (node->uuid) { crm_info("Node %u has uuid %s", id, node->uuid); } else { node->uuid = strdup(uuid); } } if (crm_status_callback) { crm_status_callback(crm_status_uname, node, NULL); } } return node; } crm_node_t * crm_get_peer(unsigned int id, const char *uname) { crm_node_t *node = NULL; CRM_ASSERT(id > 0 || uname != NULL); crm_peer_init(); if (uname != NULL) { node = g_hash_table_lookup(crm_peer_cache, uname); } if (node == NULL && id > 0) { node = g_hash_table_lookup(crm_peer_id_cache, GUINT_TO_POINTER(id)); if (node && node->uname && uname) { crm_crit("Node %s and %s share the same cluster node id '%u'!", node->uname, uname, id); /* NOTE: Calling crm_new_peer() means the entry in * crm_peer_id_cache will point to the new entity */ /* TODO: Replace the old uname instead? */ node = crm_new_peer(id, uname); CRM_ASSERT(node->uname != NULL); } } if (node == NULL) { node = crm_new_peer(id, uname); if(uname) { const char *uuid = get_uuid(uname); crm_update_peer(__FUNCTION__, 0, 0, 0, -1, 0, uuid, uname, NULL, NULL); } } CRM_ASSERT(node); if (node && uname && node->uname == NULL) { node->uname = strdup(uname); crm_info("Node %u is now known as %s", id, uname); g_hash_table_insert(crm_peer_cache, node->uname, node); if (crm_status_callback) { crm_status_callback(crm_status_uname, node, NULL); } } if (node && node->uname && node->uuid == NULL) { const char *uuid = get_node_uuid(id, node->uname); if (node->uuid) { crm_info("Node %u has uuid %s", id, node->uuid); } else if (uuid) { node->uuid = strdup(uuid); } } if (node && id > 0 && id != node->id) { g_hash_table_remove(crm_peer_id_cache, GUINT_TO_POINTER(node->id)); g_hash_table_insert(crm_peer_id_cache, GUINT_TO_POINTER(id), node); node->id = id; crm_info("Node %s now has id: %u", crm_str(uname), id); } return node; } crm_node_t * crm_update_peer(const char *source, unsigned int id, uint64_t born, uint64_t seen, int32_t votes, uint32_t children, const char *uuid, const char *uname, const char *addr, const char *state) { gboolean addr_changed = FALSE; gboolean state_changed = FALSE; gboolean procs_changed = FALSE; gboolean votes_changed = FALSE; crm_node_t *node = NULL; id = get_corosync_id(id, uuid); CRM_CHECK(uname != NULL || id > 0, return NULL); CRM_ASSERT(crm_peer_cache != NULL); CRM_ASSERT(crm_peer_id_cache != NULL); node = crm_get_peer(id, uname); if (node == NULL) { crm_trace("No node found for %d/%s", id, uname); node = crm_new_peer(id, uname); CRM_LOG_ASSERT(node != NULL); if (node == NULL) { crm_err("Insufficient information to create node %d/%s", id, uname); return NULL; } /* do it now so we don't get '(new)' everywhere */ node->votes = votes; node->processes = children; if (addr) { node->addr = strdup(addr); } } if (votes > 0 && node->votes != votes) { votes_changed = TRUE; node->votes = votes; } if (node->uuid == NULL) { if (is_openais_cluster()) { /* Yes, overrule whatever was passed in */ node->uuid = get_corosync_uuid(id, uname); } else if (uuid != NULL) { node->uuid = strdup(uuid); } } if (children > 0 && children != node->processes) { uint32_t last = node->processes; node->processes = children; procs_changed = TRUE; if (crm_status_callback) { crm_status_callback(crm_status_processes, node, &last); } } if (born != 0) { node->born = born; } if (state != NULL && safe_str_neq(node->state, state)) { char *last = node->state; node->state = strdup(state); state_changed = TRUE; if (crm_status_callback) { crm_status_callback(crm_status_nstate, node, last); } free(last); } if (seen != 0 && safe_str_eq(node->state, CRM_NODE_MEMBER)) { node->last_seen = seen; } if (addr != NULL) { if (node->addr == NULL || crm_str_eq(node->addr, addr, FALSE) == FALSE) { addr_changed = TRUE; free(node->addr); node->addr = strdup(addr); } } if (state_changed || addr_changed || votes_changed) { do_crm_log_unlikely(state_changed?LOG_NOTICE:LOG_INFO, "%s: Node %s: id=%u state=%s%s addr=%s%s votes=%d%s born=" U64T " seen=" U64T " proc=%.32x%s", source, node->uname, node->id, node->state, state_changed ? " (new)" : "", node->addr, addr_changed ? " (new)" : "", node->votes, votes_changed ? " (new)" : "", node->born, node->last_seen, node->processes, procs_changed ? " (new)" : ""); } else if (procs_changed) { crm_debug("%s: Node %s: id=%u seen=" U64T " proc=%.32x (new)", source, node->uname, node->id, node->last_seen, node->processes); } return node; } void crm_update_peer_proc(const char *source, crm_node_t *node, uint32_t flag, const char *status) { uint32_t last = 0; gboolean changed = FALSE; CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL", source, peer2text(flag), status); return); last = node->processes; if (safe_str_eq(status, ONLINESTATUS)) { if ((node->processes & flag) == 0) { set_bit_inplace(node->processes, flag); changed = TRUE; } } else if (node->processes & flag) { clear_bit_inplace(node->processes, flag); changed = TRUE; } if (changed) { crm_info("%s: %s.%d.%s is now %s", source, node->uname, node->id, peer2text(flag), status); if (crm_status_callback) { crm_status_callback(crm_status_processes, node, &last); } } else { crm_trace("%s: %s.%d.%s is unchanged %s", source, node->uname, node->id, peer2text(flag), status); } } int crm_terminate_member(int nodeid, const char *uname, void * unused) { /* Always use the synchronous, non-mainloop version */ return stonith_api_kick(nodeid, uname, 120, TRUE); } int crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection) { return stonith_api_kick(nodeid, uname, 120, TRUE); } diff --git a/lib/cluster/stack.h b/lib/cluster/stack.h deleted file mode 100644 index fa604e92ac..0000000000 --- a/lib/cluster/stack.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (C) 2004 Andrew Beekhof - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#ifndef CRM_STACK__H -# define CRM_STACK__H - -# include - -# if SUPPORT_HEARTBEAT -extern ll_cluster_t *heartbeat_cluster; -extern gboolean send_ha_message(ll_cluster_t * hb_conn, xmlNode * msg, - const char *node, gboolean force_ordered); -extern gboolean ha_msg_dispatch(ll_cluster_t * cluster_conn, gpointer user_data); - -extern gboolean register_heartbeat_conn(ll_cluster_t * hb_cluster, char **uuid, char **uname, - void (*hb_message) (HA_Message * msg, void *private_data), - void (*hb_destroy) (gpointer user_data)); - -# endif - -# if SUPPORT_COROSYNC - -extern gboolean send_ais_message(xmlNode * msg, gboolean local, - const char *node, enum crm_ais_msg_types dest); - -extern enum cluster_type_e find_corosync_variant(void); - -extern void terminate_ais_connection(void); -extern gboolean init_ais_connection(gboolean(*dispatch) (AIS_Message *, char *, int), - void (*destroy) (gpointer), char **our_uuid, char **our_uname, - int *nodeid); -extern gboolean init_ais_connection_once(gboolean(*dispatch) (AIS_Message *, char *, int), - void (*destroy) (gpointer), char **our_uuid, - char **our_uname, int *nodeid); - -# endif - -enum crm_quorum_source { - crm_quorum_cman, - crm_quorum_corosync, - crm_quorum_pacemaker, -}; - -extern enum crm_quorum_source get_quorum_source(void); - -#endif diff --git a/lib/common/ipc.c b/lib/common/ipc.c index 3d48544842..98384a4094 100644 --- a/lib/common/ipc.c +++ b/lib/common/ipc.c @@ -1,518 +1,517 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include -#include static char * generateReference(const char *custom1, const char *custom2) { static uint ref_counter = 0; const char *local_cust1 = custom1; const char *local_cust2 = custom2; int reference_len = 4; char *since_epoch = NULL; reference_len += 20; /* too big */ reference_len += 40; /* too big */ if (local_cust1 == NULL) { local_cust1 = "_empty_"; } reference_len += strlen(local_cust1); if (local_cust2 == NULL) { local_cust2 = "_empty_"; } reference_len += strlen(local_cust2); since_epoch = calloc(1, reference_len); if (since_epoch != NULL) { sprintf(since_epoch, "%s-%s-%ld-%u", local_cust1, local_cust2, (unsigned long)time(NULL), ref_counter++); } return since_epoch; } xmlNode * create_request_adv(const char *task, xmlNode * msg_data, const char *host_to, const char *sys_to, const char *sys_from, const char *uuid_from, const char *origin) { char *true_from = NULL; xmlNode *request = NULL; char *reference = generateReference(task, sys_from); if (uuid_from != NULL) { true_from = generate_hash_key(sys_from, uuid_from); } else if (sys_from != NULL) { true_from = strdup(sys_from); } else { crm_err("No sys from specified"); } /* host_from will get set for us if necessary by CRMd when routed */ request = create_xml_node(NULL, __FUNCTION__); crm_xml_add(request, F_CRM_ORIGIN, origin); crm_xml_add(request, F_TYPE, T_CRM); crm_xml_add(request, F_CRM_VERSION, CRM_FEATURE_SET); crm_xml_add(request, F_CRM_MSG_TYPE, XML_ATTR_REQUEST); crm_xml_add(request, XML_ATTR_REFERENCE, reference); crm_xml_add(request, F_CRM_TASK, task); crm_xml_add(request, F_CRM_SYS_TO, sys_to); crm_xml_add(request, F_CRM_SYS_FROM, true_from); /* HOSTTO will be ignored if it is to the DC anyway. */ if (host_to != NULL && strlen(host_to) > 0) { crm_xml_add(request, F_CRM_HOST_TO, host_to); } if (msg_data != NULL) { add_message_xml(request, F_CRM_DATA, msg_data); } free(reference); free(true_from); return request; } /* * This method adds a copy of xml_response_data */ xmlNode * create_reply_adv(xmlNode * original_request, xmlNode * xml_response_data, const char *origin) { xmlNode *reply = NULL; const char *host_from = crm_element_value(original_request, F_CRM_HOST_FROM); const char *sys_from = crm_element_value(original_request, F_CRM_SYS_FROM); const char *sys_to = crm_element_value(original_request, F_CRM_SYS_TO); const char *type = crm_element_value(original_request, F_CRM_MSG_TYPE); const char *operation = crm_element_value(original_request, F_CRM_TASK); const char *crm_msg_reference = crm_element_value(original_request, XML_ATTR_REFERENCE); if (type == NULL) { crm_err("Cannot create new_message," " no message type in original message"); CRM_ASSERT(type != NULL); return NULL; #if 0 } else if (strcasecmp(XML_ATTR_REQUEST, type) != 0) { crm_err("Cannot create new_message," " original message was not a request"); return NULL; #endif } reply = create_xml_node(NULL, __FUNCTION__); crm_xml_add(reply, F_CRM_ORIGIN, origin); crm_xml_add(reply, F_TYPE, T_CRM); crm_xml_add(reply, F_CRM_VERSION, CRM_FEATURE_SET); crm_xml_add(reply, F_CRM_MSG_TYPE, XML_ATTR_RESPONSE); crm_xml_add(reply, XML_ATTR_REFERENCE, crm_msg_reference); crm_xml_add(reply, F_CRM_TASK, operation); /* since this is a reply, we reverse the from and to */ crm_xml_add(reply, F_CRM_SYS_TO, sys_from); crm_xml_add(reply, F_CRM_SYS_FROM, sys_to); /* HOSTTO will be ignored if it is to the DC anyway. */ if (host_from != NULL && strlen(host_from) > 0) { crm_xml_add(reply, F_CRM_HOST_TO, host_from); } if (xml_response_data != NULL) { add_message_xml(reply, F_CRM_DATA, xml_response_data); } return reply; } /* Libqb based IPC */ /* Server... */ int crm_ipcs_client_pid(qb_ipcs_connection_t *c) { struct qb_ipcs_connection_stats stats; stats.client_pid = 0; qb_ipcs_connection_stats_get(c, &stats, 0); return stats.client_pid; } xmlNode * crm_ipcs_recv(qb_ipcs_connection_t *c, void *data, size_t size) { char *text = ((char*)data) + sizeof(struct qb_ipc_request_header); crm_trace("Received %.200s", text); return string2xml(text); } ssize_t crm_ipcs_send(qb_ipcs_connection_t *c, xmlNode *message, enum ipcs_send_flags flags) { int rc; int lpc = 0; struct iovec iov[2]; static uint32_t id = 0; const char *type = "Response"; struct qb_ipc_response_header header; char *buffer = dump_xml_unformatted(message); iov[0].iov_len = sizeof(struct qb_ipc_response_header); iov[0].iov_base = &header; iov[1].iov_len = 1 + strlen(buffer); iov[1].iov_base = buffer; header.id = id++; /* We don't really use it, but doesn't hurt to set one */ header.error = 0; /* unused */ header.size = iov[0].iov_len + iov[1].iov_len; do { if(flags & ipcs_send_event) { rc = qb_ipcs_event_sendv(c, iov, 2); type = "Event"; } else { rc = qb_ipcs_response_sendv(c, iov, 2); } if(rc != -EAGAIN) { break; } else if(lpc > 3 && (flags & ipcs_send_error)) { break; } crm_debug("Attempting resend %d of %s %d (%d bytes) to %p[%d]: %.120s", ++lpc, type, header.id, header.size, c, crm_ipcs_client_pid(c), buffer); sleep(1); /* Only retry for important stuff, and even then only a limited amount for ipcs_send_error * Unless ipcs_send_info or ipcs_send_error is specified, we block by default */ } while((flags & ipcs_send_info) == 0); if(rc < header.size) { do_crm_log((flags & ipcs_send_error)?LOG_ERR:LOG_INFO, "%s %d failed, size=%d, to=%p[%d], rc=%d: %.120s", type, header.id, header.size, c, crm_ipcs_client_pid(c), rc, buffer); } else { crm_trace("%s %d sent, %d bytes to %p: %.120s", type, header.id, rc, c, buffer); } free(buffer); return rc; } void crm_ipcs_send_ack( qb_ipcs_connection_t *c, const char *tag, const char *function, int line) { xmlNode *ack = create_xml_node(NULL, tag); crm_xml_add(ack, "function", function); crm_xml_add_int(ack, "line", line); crm_ipcs_send(c, ack, FALSE); free_xml(ack); } /* Client... */ #define MIN_MSG_SIZE 12336 /* sizeof(struct qb_ipc_connection_response) */ #define MAX_MSG_SIZE 20*1024 struct crm_ipc_s { struct pollfd pfd; int buf_size; int msg_size; char *buffer; char *name; qb_ipcc_connection_t *ipc; }; static int pick_ipc_buffer(int max) { const char *env = getenv("PCMK_ipc_buffer"); if(env) { max = crm_parse_int(env, "0"); } if(max <= 0) { max = MAX_MSG_SIZE; } if(max < MIN_MSG_SIZE) { max = MIN_MSG_SIZE; } crm_trace("Using max message size of %d", max); return max; } crm_ipc_t * crm_ipc_new(const char *name, size_t max_size) { crm_ipc_t *client = NULL; client = calloc(1, sizeof(crm_ipc_t)); client->name = strdup(name); client->buf_size = pick_ipc_buffer(max_size); client->buffer = malloc(client->buf_size); client->pfd.fd = -1; client->pfd.events = POLLIN; client->pfd.revents = 0; return client; } bool crm_ipc_connect(crm_ipc_t *client) { client->ipc = qb_ipcc_connect(client->name, client->buf_size); if (client->ipc == NULL) { crm_perror(LOG_INFO, "Could not establish %s connection", client->name); return FALSE; } client->pfd.fd = crm_ipc_get_fd(client); if(client->pfd.fd < 0) { crm_perror(LOG_INFO, "Could not obtain file descriptor for %s connection", client->name); return FALSE; } qb_ipcc_context_set(client->ipc, client); return TRUE; } void crm_ipc_close(crm_ipc_t *client) { crm_trace("Disconnecting %s IPC connection %p (%p.%d)", client->name, client, client->ipc); if(client && client->ipc) { qb_ipcc_connection_t *ipc = client->ipc; client->ipc = NULL; qb_ipcc_disconnect(ipc); } } void crm_ipc_destroy(crm_ipc_t *client) { if(client) { if(client->ipc && qb_ipcc_is_connected(client->ipc)) { crm_notice("Destroying an active IPC connection to %s", client->name); /* The next line is basically unsafe * * If this connection was attached to mainloop and mainloop is active, * the 'disconnected' callback will end up back here and we'll end * up free'ing the memory twice - something that can still happen * even without this if we destroy a connection and it closes before * we call exit */ /* crm_ipc_close(client); */ } crm_trace("Destroying %s IPC connection %p", client->name, client); free(client->buffer); free(client->name); free(client); } } int crm_ipc_get_fd(crm_ipc_t *client) { int fd = 0; CRM_ASSERT(client != NULL); if(qb_ipcc_fd_get(client->ipc, &fd) < 0) { crm_perror(LOG_ERR, "Could not obtain file IPC descriptor for %s", client->name); } return fd; } bool crm_ipc_connected(crm_ipc_t *client) { bool rc = FALSE; if(client == NULL) { crm_trace("No client"); return FALSE; } else if(client->pfd.fd < 0) { crm_trace("Bad descriptor"); return FALSE; } rc = qb_ipcc_is_connected(client->ipc); if(rc == FALSE) { client->pfd.fd = -1; } return rc; } int crm_ipc_ready(crm_ipc_t *client) { CRM_ASSERT(client != NULL); if(crm_ipc_connected(client) == FALSE) { return -ENOTCONN; } client->pfd.revents = 0; return poll(&(client->pfd), 1, 0); } long crm_ipc_read(crm_ipc_t *client) { CRM_ASSERT(client != NULL); CRM_ASSERT(client->buffer != NULL); crm_trace("Message recieved on %s IPC connection", client->name); client->buffer[0] = 0; client->msg_size = qb_ipcc_event_recv(client->ipc, client->buffer, client->buf_size-1, -1); if(client->msg_size >= 0) { struct qb_ipc_response_header *header = (struct qb_ipc_response_header *)client->buffer; client->buffer[client->msg_size] = 0; crm_trace("Recieved response %d, size=%d, rc=%d, text: %.200s", header->id, header->size, client->msg_size, client->buffer+sizeof(struct qb_ipc_response_header)); } if(crm_ipc_connected(client) == FALSE || client->msg_size == -ENOTCONN) { crm_err("Connection to %s failed", client->name); } return client->msg_size; } const char * crm_ipc_buffer(crm_ipc_t *client) { CRM_ASSERT(client != NULL); return client->buffer + sizeof(struct qb_ipc_response_header); } const char *crm_ipc_name(crm_ipc_t *client) { CRM_ASSERT(client != NULL); return client->name; } int crm_ipc_send(crm_ipc_t *client, xmlNode *message, xmlNode **reply, int32_t ms_timeout) { long rc = 0; struct iovec iov[2]; static uint32_t id = 0; struct qb_ipc_request_header header; char *buffer = dump_xml_unformatted(message); iov[0].iov_len = sizeof(struct qb_ipc_request_header); iov[0].iov_base = &header; iov[1].iov_len = 1 + strlen(buffer); iov[1].iov_base = buffer; header.id = id++; /* We don't really use it, but doesn't hurt to set one */ header.size = iov[0].iov_len + iov[1].iov_len; if(ms_timeout == 0) { ms_timeout = 5000; } crm_trace("Waiting for reply to %u bytes: %.200s...", header.size, buffer); rc = qb_ipcc_sendv_recv(client->ipc, iov, 2, client->buffer, client->buf_size, ms_timeout); if(rc > 0) { struct qb_ipc_response_header *hdr = (struct qb_ipc_response_header *)client->buffer; crm_trace("Recieved response %d, size=%d, rc=%d, text: %.200s", hdr->id, hdr->size, rc, crm_ipc_buffer(client)); if(reply) { *reply = string2xml(crm_ipc_buffer(client)); } } else { crm_trace("Response not recieved: rc=%d, errno=%d", rc, errno); } if(crm_ipc_connected(client) == FALSE) { crm_notice("Connection to %s closed: %d", client->name, rc); } else if(rc <= 0) { crm_perror(LOG_ERR, "Request to %s failed: %ld", client->name, rc); crm_info("Request was %.120s", buffer); } free(buffer); return rc; } /* Utils */ xmlNode * create_hello_message(const char *uuid, const char *client_name, const char *major_version, const char *minor_version) { xmlNode *hello_node = NULL; xmlNode *hello = NULL; if (uuid == NULL || strlen(uuid) == 0 || client_name == NULL || strlen(client_name) == 0 || major_version == NULL || strlen(major_version) == 0 || minor_version == NULL || strlen(minor_version) == 0) { crm_err("Missing fields, Hello message will not be valid."); return NULL; } hello_node = create_xml_node(NULL, XML_TAG_OPTIONS); crm_xml_add(hello_node, "major_version", major_version); crm_xml_add(hello_node, "minor_version", minor_version); crm_xml_add(hello_node, "client_name", client_name); crm_xml_add(hello_node, "client_uuid", uuid); crm_trace("creating hello message"); hello = create_request(CRM_OP_HELLO, hello_node, NULL, NULL, client_name, uuid); free_xml(hello_node); return hello; } diff --git a/mcp/corosync.c b/mcp/corosync.c index c19e0fa251..57e7c5840a 100644 --- a/mcp/corosync.c +++ b/mcp/corosync.c @@ -1,649 +1,649 @@ /* * Copyright (C) 2010 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include /* for calls to stat() */ #include /* For basename() and dirname() */ #include #include /* For getpwname() */ #include #include #include #if HAVE_CONFDB # include #endif -#include +#include #include #if SUPPORT_CMAN # include #endif #if HAVE_CMAP # include #endif static struct cpg_name cpg_group = { .length = 0, .value[0] = 0, }; gboolean use_cman = FALSE; static cpg_handle_t cpg_handle; static corosync_cfg_handle_t cfg_handle; /* =::=::=::= CFG - Shutdown stuff =::=::=::= */ static void cfg_shutdown_callback(corosync_cfg_handle_t h, corosync_cfg_shutdown_flags_t flags) { crm_info("Corosync wants to shut down: %s", (flags == COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE) ? "immediate" : (flags == COROSYNC_CFG_SHUTDOWN_FLAG_REGARDLESS) ? "forced" : "optional"); /* Never allow corosync to shut down while we're running */ corosync_cfg_replyto_shutdown(h, COROSYNC_CFG_SHUTDOWN_FLAG_NO); } static corosync_cfg_callbacks_t cfg_callbacks = { .corosync_cfg_shutdown_callback = cfg_shutdown_callback, }; static int pcmk_cfg_dispatch(gpointer user_data) { corosync_cfg_handle_t *handle = (corosync_cfg_handle_t *) user_data; cs_error_t rc = corosync_cfg_dispatch(*handle, CS_DISPATCH_ALL); if (rc != CS_OK) { return -1; } return 0; } static void cfg_connection_destroy(gpointer user_data) { crm_err("Connection destroyed"); cfg_handle = 0; pcmk_shutdown(SIGTERM); } gboolean cluster_disconnect_cfg(void) { if (cfg_handle) { corosync_cfg_finalize(cfg_handle); cfg_handle = 0; } pcmk_shutdown(SIGTERM); return TRUE; } #define cs_repeat(counter, max, code) do { \ code; \ if(rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { \ counter++; \ crm_debug("Retrying operation after %ds", counter); \ sleep(counter); \ } else { \ break; \ } \ } while(counter < max) gboolean cluster_connect_cfg(uint32_t * nodeid) { cs_error_t rc; int fd = 0, retries = 0; static struct mainloop_fd_callbacks cfg_fd_callbacks = { .dispatch = pcmk_cfg_dispatch, .destroy = cfg_connection_destroy, }; cs_repeat(retries, 30, rc = corosync_cfg_initialize(&cfg_handle, &cfg_callbacks)); if (rc != CS_OK) { crm_err("corosync cfg init error %d", rc); return FALSE; } rc = corosync_cfg_fd_get(cfg_handle, &fd); if (rc != CS_OK) { crm_err("corosync cfg fd_get error %d", rc); goto bail; } retries = 0; cs_repeat(retries, 30, rc = corosync_cfg_local_get(cfg_handle, nodeid)); if (rc != CS_OK) { crm_err("corosync cfg local_get error %d", rc); goto bail; } crm_debug("Our nodeid: %d", *nodeid); mainloop_add_fd("corosync-cfg", fd, &cfg_handle, &cfg_fd_callbacks); return TRUE; bail: corosync_cfg_finalize(cfg_handle); return FALSE; } /* =::=::=::= CPG - Closed Process Group Messaging =::=::=::= */ static int pcmk_cpg_dispatch(gpointer user_data) { cpg_handle_t *handle = (cpg_handle_t *) user_data; cs_error_t rc = cpg_dispatch(*handle, CS_DISPATCH_ALL); if (rc != CS_OK) { return -1; } return 0; } static void cpg_connection_destroy(gpointer user_data) { crm_err("Connection destroyed"); cpg_handle = 0; } static void pcmk_cpg_deliver(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { if (nodeid != local_nodeid) { uint32_t procs = 0; xmlNode *xml = string2xml(msg); const char *uname = crm_element_value(xml, "uname"); crm_element_value_int(xml, "proclist", (int *)&procs); /* crm_debug("Got proclist %.32x from %s", procs, uname); */ if (update_node_processes(nodeid, uname, procs)) { update_process_clients(); } } } static void pcmk_cpg_membership(cpg_handle_t handle, const struct cpg_name *groupName, const struct cpg_address *member_list, size_t member_list_entries, const struct cpg_address *left_list, size_t left_list_entries, const struct cpg_address *joined_list, size_t joined_list_entries) { /* Don't care about CPG membership */ update_process_peers(); } cpg_callbacks_t cpg_callbacks = { .cpg_deliver_fn = pcmk_cpg_deliver, .cpg_confchg_fn = pcmk_cpg_membership, }; gboolean cluster_disconnect_cpg(void) { if (cpg_handle) { cpg_finalize(cpg_handle); cpg_handle = 0; } return TRUE; } gboolean cluster_connect_cpg(void) { cs_error_t rc; unsigned int nodeid; int fd; int retries = 0; static struct mainloop_fd_callbacks cpg_fd_callbacks = { .dispatch = pcmk_cpg_dispatch, .destroy = cpg_connection_destroy, }; strcpy(cpg_group.value, "pcmk"); cpg_group.length = strlen(cpg_group.value) + 1; retries = 0; cs_repeat(retries, 30, rc = cpg_initialize(&cpg_handle, &cpg_callbacks)); if (rc != CS_OK) { crm_err("corosync cpg init error %d", rc); return FALSE; } rc = cpg_fd_get(cpg_handle, &fd); if (rc != CS_OK) { crm_err("corosync cpg fd_get error %d", rc); goto bail; } retries = 0; cs_repeat(retries, 30, rc = cpg_local_get(cpg_handle, &nodeid)); if (rc != CS_OK) { crm_err("corosync cpg local_get error %d", rc); goto bail; } crm_debug("Our nodeid: %d", nodeid); retries = 0; cs_repeat(retries, 30, rc = cpg_join(cpg_handle, &cpg_group)); if (rc != CS_OK) { crm_err("Could not join the CPG group '%s': %d", crm_system_name, rc); goto bail; } mainloop_add_fd("corosync-cpg", fd, &cpg_handle, &cpg_fd_callbacks); return TRUE; bail: cpg_finalize(cpg_handle); return FALSE; } gboolean send_cpg_message(struct iovec * iov) { int rc = CS_OK; int retries = 0; errno = 0; do { rc = cpg_mcast_joined(cpg_handle, CPG_TYPE_AGREED, iov, 1); if (rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { cpg_flow_control_state_t fc_state = CPG_FLOW_CONTROL_DISABLED; int rc2 = cpg_flow_control_state_get(cpg_handle, &fc_state); if (rc2 == CS_OK && fc_state == CPG_FLOW_CONTROL_ENABLED) { crm_debug("Attempting to clear cpg dispatch queue"); rc2 = cpg_dispatch(cpg_handle, CS_DISPATCH_ALL); } if (rc2 != CS_OK) { crm_warn("Could not check/clear the cpg connection"); goto bail; } else { retries++; crm_debug("Retrying operation after %ds", retries); sleep(retries); } } else { break; } /* 5 retires is plenty, we'll resend once the membership reforms anyway */ } while (retries < 5); bail: if (rc != CS_OK) { crm_err("Sending message via cpg FAILED: (rc=%d) %s", rc, ais_error2text(rc)); } return (rc == CS_OK); } /* =::=::=::= Configuration =::=::=::= */ #if HAVE_CONFDB static int get_config_opt(confdb_handle_t config, hdb_handle_t object_handle, const char *key, char **value, const char *fallback) { size_t len = 0; char *env_key = NULL; const char *env_value = NULL; char buffer[256]; if (*value) { free(*value); *value = NULL; } if (object_handle > 0) { if (CS_OK == confdb_key_get(config, object_handle, key, strlen(key), &buffer, &len)) { *value = strdup(buffer); } } if (*value) { crm_info("Found '%s' for option: %s", *value, key); return 0; } env_key = crm_concat("HA", key, '_'); env_value = getenv(env_key); free(env_key); if (*value) { crm_info("Found '%s' in ENV for option: %s", *value, key); *value = strdup(env_value); return 0; } if (fallback) { crm_info("Defaulting to '%s' for option: %s", fallback, key); *value = strdup(fallback); } else { crm_info("No default for option: %s", key); } return -1; } static confdb_handle_t config_find_init(confdb_handle_t config) { cs_error_t rc = CS_OK; confdb_handle_t local_handle = OBJECT_PARENT_HANDLE; rc = confdb_object_find_start(config, local_handle); if (rc == CS_OK) { return local_handle; } else { crm_err("Couldn't create search context: %d", rc); } return 0; } static hdb_handle_t config_find_next(confdb_handle_t config, const char *name, confdb_handle_t top_handle) { cs_error_t rc = CS_OK; hdb_handle_t local_handle = 0; if (top_handle == 0) { crm_err("Couldn't search for %s: no valid context", name); return 0; } crm_trace("Searching for %s in " HDB_X_FORMAT, name, top_handle); rc = confdb_object_find(config, top_handle, name, strlen(name), &local_handle); if (rc != CS_OK) { crm_info("No additional configuration supplied for: %s", name); local_handle = 0; } else { crm_info("Processing additional %s options...", name); } return local_handle; } #else static int get_config_opt(cmap_handle_t object_handle, const char *key, char **value, const char *fallback) { int rc = 0, retries = 0; cs_repeat(retries, 5, rc = cmap_get_string(object_handle, key, value)); if(rc != CS_OK) { crm_trace("Search for %s failed %d, defaulting to %s", key, rc, fallback); if(fallback) { *value = strdup(fallback); } else { *value = NULL; } } crm_trace("%s: %s", key, *value); return rc; } #endif char * get_local_node_name(void) { char *name = NULL; struct utsname res; if (use_cman) { #if SUPPORT_CMAN cman_node_t us; cman_handle_t cman; cman = cman_init(NULL); if (cman != NULL && cman_is_active(cman)) { us.cn_name[0] = 0; cman_get_node(cman, CMAN_NODEID_US, &us); name = strdup(us.cn_name); crm_info("Using CMAN node name: %s", name); } else { crm_err("Couldn't determin node name from CMAN"); } cman_finish(cman); #endif } else if (uname(&res) < 0) { crm_perror(LOG_ERR, "Could not determin the current host"); exit(100); } else { name = strdup(res.nodename); } return name; } gboolean read_config(void) { int rc = CS_OK; int retries = 0; gboolean have_log = FALSE; char *logging_debug = NULL; char *logging_logfile = NULL; char *logging_to_logfile = NULL; char *logging_to_syslog = NULL; char *logging_syslog_facility = NULL; enum cluster_type_e stack = pcmk_cluster_unknown; #if HAVE_CONFDB char *value = NULL; confdb_handle_t config; confdb_handle_t top_handle = 0; hdb_handle_t local_handle; static confdb_callbacks_t callbacks = { }; do { rc = confdb_initialize(&config, &callbacks); if(rc != CS_OK) { retries++; printf("Connection setup failed: %d. Retrying in %ds\n", rc, retries); sleep(retries); } else { break; } } while(retries < 5); #elif HAVE_CMAP cmap_handle_t local_handle; /* There can be only one (possibility if confdb isn't around) */ do { rc = cmap_initialize(&local_handle); if(rc != CS_OK) { retries++; printf("API connection setup failed: %s. Retrying in %ds\n", cs_strerror(rc), retries); crm_info("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc), retries); sleep(retries); } else { break; } } while(retries < 5); #endif if (rc != CS_OK) { printf("Could not connect to Cluster Configuration Database API, error %d\n", rc); crm_warn("Could not connect to Cluster Configuration Database API, error %d", rc); return FALSE; } stack = get_cluster_type(); crm_info("Reading configure for stack: %s", name_for_cluster_type(stack)); /* =::=::= Should we be here =::=::= */ if (stack == pcmk_cluster_corosync) { setenv("HA_cluster_type", "corosync", 1); setenv("HA_quorum_type", "corosync", 1); #if HAVE_CONFDB } else if (stack == pcmk_cluster_cman) { setenv("HA_cluster_type", "cman", 1); setenv("HA_quorum_type", "cman", 1); enable_crmd_as_root(TRUE); use_cman = TRUE; } else if (stack == pcmk_cluster_classic_ais) { setenv("HA_cluster_type", "openais", 1); setenv("HA_quorum_type", "pcmk", 1); /* Look for a service block to indicate our plugin is loaded */ top_handle = config_find_init(config); local_handle = config_find_next(config, "service", top_handle); while (local_handle) { free(value); get_config_opt(config, local_handle, "name", &value, NULL); if (safe_str_eq("pacemaker", value)) { free(value); get_config_opt(config, local_handle, "ver", &value, "0"); if (safe_str_eq(value, "1")) { free(value); get_config_opt(config, local_handle, "use_logd", &value, "no"); setenv("HA_use_logd", value, 1); setenv("HA_LOGD", value, 1); free(value); get_config_opt(config, local_handle, "use_mgmtd", &value, "no"); enable_mgmtd(crm_is_true(value)); } else { crm_err("We can only start Pacemaker from init if using version 1" " of the Pacemaker plugin for Corosync. Terminating."); exit(100); } break; } local_handle = config_find_next(config, "service", top_handle); } free(value); #endif } else { crm_err("Unsupported stack type: %s", name_for_cluster_type(stack)); return FALSE; } #if HAVE_CONFDB top_handle = config_find_init(config); local_handle = config_find_next(config, "logging", top_handle); get_config_opt(config, local_handle, "debug", &logging_debug, "off"); get_config_opt(config, local_handle, "logfile", &logging_logfile, "/var/log/pacemaker"); get_config_opt(config, local_handle, "to_logfile", &logging_to_logfile, "off"); get_config_opt(config, local_handle, "to_syslog", &logging_to_syslog, "on"); get_config_opt(config, local_handle, "syslog_facility", &logging_syslog_facility, "daemon"); confdb_finalize(config); #elif HAVE_CMAP /* =::=::= Logging =::=::= */ get_config_opt(local_handle, "logging.debug", &logging_debug, "off"); get_config_opt(local_handle, "logging.logfile", &logging_logfile, "/var/log/pacemaker"); get_config_opt(local_handle, "logging.to_logfile", &logging_to_logfile, "off"); get_config_opt(local_handle, "logging.to_syslog", &logging_to_syslog, "on"); get_config_opt(local_handle, "logging.syslog_facility", &logging_syslog_facility, "daemon"); cmap_finalize(local_handle); #endif if (crm_is_true(logging_debug)) { setenv("HA_debug", "1", 1); if(get_crm_log_level() < LOG_DEBUG) { set_crm_log_level(LOG_DEBUG); } } else { setenv("HA_debug", "0", 1); } if (crm_is_true(logging_to_logfile)) { if(crm_add_logfile(logging_logfile)) { setenv("HA_debugfile", logging_logfile, 1); setenv("HA_DEBUGLOG", logging_logfile, 1); setenv("HA_LOGFILE", logging_logfile, 1); have_log = TRUE; } else { crm_err("Couldn't create logfile: %s", logging_logfile); } } if (have_log && crm_is_true(logging_to_syslog) == FALSE) { crm_info("User configured file based logging and explicitly disabled syslog."); free(logging_syslog_facility); logging_syslog_facility = NULL; } else { if (crm_is_true(logging_to_syslog) == FALSE) { crm_err ("Please enable some sort of logging, either 'to_logfile: on' or 'to_syslog: on'."); crm_err("If you use file logging, be sure to also define a value for 'logfile'"); } } if(logging_syslog_facility) { setenv("HA_logfacility", logging_syslog_facility, 1); setenv("HA_LOGFACILITY", logging_syslog_facility, 1); } else { unsetenv("HA_logfacility"); unsetenv("HA_LOGFACILITY"); } free(logging_debug); free(logging_logfile); free(logging_to_logfile); free(logging_to_syslog); free(logging_syslog_facility); return TRUE; } diff --git a/tools/attrd.c b/tools/attrd.c index 8359802b02..e1bc77e113 100644 --- a/tools/attrd.c +++ b/tools/attrd.c @@ -1,878 +1,878 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include +#include #include #include #include #define OPTARGS "hV" #if SUPPORT_HEARTBEAT ll_cluster_t *attrd_cluster_conn; #endif GMainLoop *mainloop = NULL; char *attrd_uname = NULL; char *attrd_uuid = NULL; gboolean need_shutdown = FALSE; GHashTable *attr_hash = NULL; cib_t *cib_conn = NULL; typedef struct attrd_client_s { char *user; } attrd_client_t; typedef struct attr_hash_entry_s { char *uuid; char *id; char *set; char *section; char *value; char *stored_value; int timeout; char *dampen; guint timer_id; char *user; } attr_hash_entry_t; void attrd_local_callback(xmlNode * msg); gboolean attrd_timer_callback(void *user_data); gboolean attrd_trigger_update(attr_hash_entry_t * hash_entry); void attrd_perform_update(attr_hash_entry_t * hash_entry); static void free_hash_entry(gpointer data) { attr_hash_entry_t *entry = data; if (entry == NULL) { return; } free(entry->id); free(entry->set); free(entry->dampen); free(entry->section); free(entry->uuid); free(entry->value); free(entry->stored_value); free(entry->user); free(entry); } static int32_t attrd_ipc_accept(qb_ipcs_connection_t *c, uid_t uid, gid_t gid) { attrd_client_t *new_client = NULL; #if ENABLE_ACL struct group *crm_grp = NULL; #endif crm_trace("Connecting %p for uid=%d gid=%d", c, uid, gid); if (need_shutdown) { crm_info("Ignoring connection request during shutdown"); return FALSE; } new_client = calloc(1, sizeof(attrd_client_t)); #if ENABLE_ACL crm_grp = getgrnam(CRM_DAEMON_GROUP); if (crm_grp) { qb_ipcs_connection_auth_set(c, -1, crm_grp->gr_gid, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); } new_client->user = uid2username(uid); #endif qb_ipcs_context_set(c, new_client); return 0; } static void attrd_ipc_created(qb_ipcs_connection_t *c) { crm_trace("Client %p connected", c); } /* Exit code means? */ static int32_t attrd_ipc_dispatch(qb_ipcs_connection_t *c, void *data, size_t size) { #if ENABLE_ACL attrd_client_t *client = qb_ipcs_context_get(c); #endif xmlNode *msg = crm_ipcs_recv(c, data, size); xmlNode *ack = create_xml_node(NULL, "ack"); crm_ipcs_send(c, ack, FALSE); free_xml(ack); if (msg == NULL) { return 0; } #if ENABLE_ACL determine_request_user(client->user, msg, F_ATTRD_USER); #endif crm_trace("Processing msg from %p", c); crm_log_xml_trace(msg, __PRETTY_FUNCTION__); attrd_local_callback(msg); free_xml(msg); return 0; } /* Error code means? */ static int32_t attrd_ipc_closed(qb_ipcs_connection_t *c) { return 0; } static void attrd_ipc_destroy(qb_ipcs_connection_t *c) { attrd_client_t *client = qb_ipcs_context_get(c); if (client == NULL) { return; } crm_trace("Destroying %p", c); free(client->user); free(client); crm_trace("Freed the cib client"); return; } struct qb_ipcs_service_handlers ipc_callbacks = { .connection_accept = attrd_ipc_accept, .connection_created = attrd_ipc_created, .msg_process = attrd_ipc_dispatch, .connection_closed = attrd_ipc_closed, .connection_destroyed = attrd_ipc_destroy }; static void attrd_shutdown(int nsig) { need_shutdown = TRUE; crm_info("Exiting"); if (mainloop != NULL && g_main_is_running(mainloop)) { g_main_quit(mainloop); } else { exit(0); } } static void usage(const char *cmd, int exit_status) { FILE *stream; stream = exit_status ? stderr : stdout; fprintf(stream, "usage: %s [-srkh] [-c configure file]\n", cmd); /* fprintf(stream, "\t-d\tsets debug level\n"); */ /* fprintf(stream, "\t-s\tgets daemon status\n"); */ /* fprintf(stream, "\t-r\trestarts daemon\n"); */ /* fprintf(stream, "\t-k\tstops daemon\n"); */ /* fprintf(stream, "\t-h\thelp message\n"); */ fflush(stream); exit(exit_status); } static void stop_attrd_timer(attr_hash_entry_t * hash_entry) { if (hash_entry != NULL && hash_entry->timer_id != 0) { crm_trace("Stopping %s timer", hash_entry->id); g_source_remove(hash_entry->timer_id); hash_entry->timer_id = 0; } } static void log_hash_entry(int level, attr_hash_entry_t * entry, const char *text) { do_crm_log(level, "%s", text); do_crm_log(level, "Set: %s", entry->section); do_crm_log(level, "Name: %s", entry->id); do_crm_log(level, "Value: %s", entry->value); do_crm_log(level, "Timeout: %s", entry->dampen); } static attr_hash_entry_t * find_hash_entry(xmlNode * msg) { const char *value = NULL; const char *attr = crm_element_value(msg, F_ATTRD_ATTRIBUTE); attr_hash_entry_t *hash_entry = NULL; if (attr == NULL) { crm_info("Ignoring message with no attribute name"); return NULL; } hash_entry = g_hash_table_lookup(attr_hash, attr); if (hash_entry == NULL) { /* create one and add it */ crm_info("Creating hash entry for %s", attr); hash_entry = calloc(1, sizeof(attr_hash_entry_t)); hash_entry->id = strdup(attr); g_hash_table_insert(attr_hash, hash_entry->id, hash_entry); hash_entry = g_hash_table_lookup(attr_hash, attr); CRM_CHECK(hash_entry != NULL, return NULL); } value = crm_element_value(msg, F_ATTRD_SET); if (value != NULL) { free(hash_entry->set); hash_entry->set = strdup(value); crm_debug("\t%s->set: %s", attr, value); } value = crm_element_value(msg, F_ATTRD_SECTION); if (value == NULL) { value = XML_CIB_TAG_STATUS; } free(hash_entry->section); hash_entry->section = strdup(value); crm_trace("\t%s->section: %s", attr, value); value = crm_element_value(msg, F_ATTRD_DAMPEN); if (value != NULL) { free(hash_entry->dampen); hash_entry->dampen = strdup(value); hash_entry->timeout = crm_get_msec(value); crm_trace("\t%s->timeout: %s", attr, value); } #if ENABLE_ACL free(hash_entry->user); value = crm_element_value(msg, F_ATTRD_USER); if (value != NULL) { hash_entry->user = strdup(value); crm_trace("\t%s->user: %s", attr, value); } #endif log_hash_entry(LOG_DEBUG_2, hash_entry, "Found (and updated) entry:"); return hash_entry; } #if SUPPORT_HEARTBEAT static void attrd_ha_connection_destroy(gpointer user_data) { crm_trace("Invoked"); if (need_shutdown) { /* we signed out, so this is expected */ crm_info("Heartbeat disconnection complete"); return; } crm_crit("Lost connection to heartbeat service!"); if (mainloop != NULL && g_main_is_running(mainloop)) { g_main_quit(mainloop); return; } exit(EX_OK); } static void attrd_ha_callback(HA_Message * msg, void *private_data) { attr_hash_entry_t *hash_entry = NULL; xmlNode *xml = convert_ha_message(NULL, msg, __FUNCTION__); const char *from = crm_element_value(xml, F_ORIG); const char *op = crm_element_value(xml, F_ATTRD_TASK); const char *host = crm_element_value(xml, F_ATTRD_HOST); const char *ignore = crm_element_value(xml, F_ATTRD_IGNORE_LOCALLY); if (host != NULL && safe_str_eq(host, attrd_uname)) { crm_info("Update relayed from %s", from); attrd_local_callback(xml); } else if (ignore == NULL || safe_str_neq(from, attrd_uname)) { crm_info("%s message from %s", op, from); hash_entry = find_hash_entry(xml); stop_attrd_timer(hash_entry); attrd_perform_update(hash_entry); } free_xml(xml); } #endif #if SUPPORT_COROSYNC static gboolean attrd_ais_dispatch(AIS_Message * wrapper, char *data, int sender) { xmlNode *xml = NULL; if (wrapper->header.id == crm_class_cluster) { xml = string2xml(data); if (xml == NULL) { crm_err("Bad message received: %d:'%.120s'", wrapper->id, data); } } if (xml != NULL) { attr_hash_entry_t *hash_entry = NULL; const char *op = crm_element_value(xml, F_ATTRD_TASK); const char *host = crm_element_value(xml, F_ATTRD_HOST); const char *ignore = crm_element_value(xml, F_ATTRD_IGNORE_LOCALLY); crm_xml_add_int(xml, F_SEQ, wrapper->id); crm_xml_add(xml, F_ORIG, wrapper->sender.uname); if (host != NULL && safe_str_eq(host, attrd_uname)) { crm_notice("Update relayed from %s", wrapper->sender.uname); attrd_local_callback(xml); } else if (ignore == NULL || safe_str_neq(wrapper->sender.uname, attrd_uname)) { crm_trace("%s message from %s", op, wrapper->sender.uname); hash_entry = find_hash_entry(xml); stop_attrd_timer(hash_entry); attrd_perform_update(hash_entry); } free_xml(xml); } return TRUE; } static void attrd_ais_destroy(gpointer unused) { if (need_shutdown) { /* we signed out, so this is expected */ crm_info("OpenAIS disconnection complete"); return; } crm_crit("Lost connection to OpenAIS service!"); if (mainloop != NULL && g_main_is_running(mainloop)) { g_main_quit(mainloop); return; } exit(EX_USAGE); } #endif static void attrd_cib_connection_destroy(gpointer user_data) { cib_t *conn = user_data; conn->cmds->signoff(conn); /* Ensure IPC is cleaned up */ if (need_shutdown) { crm_info("Connection to the CIB terminated..."); } else { /* eventually this will trigger a reconnect, not a shutdown */ crm_err("Connection to the CIB terminated..."); exit(1); } return; } static void update_for_hash_entry(gpointer key, gpointer value, gpointer user_data) { attr_hash_entry_t *entry = value; if (entry->value != NULL) { attrd_timer_callback(value); } } static void do_cib_replaced(const char *event, xmlNode * msg) { crm_info("Sending full refresh"); g_hash_table_foreach(attr_hash, update_for_hash_entry, NULL); } static gboolean cib_connect(void *user_data) { static int attempts = 1; static int max_retry = 20; gboolean was_err = FALSE; static cib_t *local_conn = NULL; if (local_conn == NULL) { local_conn = cib_new(); } if (was_err == FALSE) { int rc = -ENOTCONN; if (attempts < max_retry) { crm_debug("CIB signon attempt %d", attempts); rc = local_conn->cmds->signon(local_conn, T_ATTRD, cib_command); } if (rc != pcmk_ok && attempts > max_retry) { crm_err("Signon to CIB failed: %s", pcmk_strerror(rc)); was_err = TRUE; } else if (rc != pcmk_ok) { attempts++; return TRUE; } } crm_info("Connected to the CIB after %d signon attempts", attempts); if (was_err == FALSE) { int rc = local_conn->cmds->set_connection_dnotify(local_conn, attrd_cib_connection_destroy); if (rc != pcmk_ok) { crm_err("Could not set dnotify callback"); was_err = TRUE; } } if (was_err == FALSE) { if (pcmk_ok != local_conn->cmds->add_notify_callback(local_conn, T_CIB_REPLACE_NOTIFY, do_cib_replaced)) { crm_err("Could not set CIB notification callback"); was_err = TRUE; } } if (was_err) { crm_err("Aborting startup"); exit(100); } cib_conn = local_conn; crm_info("Sending full refresh"); g_hash_table_foreach(attr_hash, update_for_hash_entry, NULL); return FALSE; } int main(int argc, char **argv) { int flag = 0; int argerr = 0; gboolean was_err = FALSE; qb_ipcs_service_t *ipcs = NULL; crm_log_init(T_ATTRD, LOG_NOTICE, TRUE, FALSE, argc, argv, FALSE); mainloop_add_signal(SIGTERM, attrd_shutdown); while ((flag = getopt(argc, argv, OPTARGS)) != EOF) { switch (flag) { case 'V': crm_bump_log_level(); break; case 'h': /* Help message */ usage(T_ATTRD, EX_OK); break; default: ++argerr; break; } } if (optind > argc) { ++argerr; } if (argerr) { usage(T_ATTRD, EX_USAGE); } attr_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_hash_entry); crm_info("Starting up"); if (was_err == FALSE) { void *destroy = NULL; void *dispatch = NULL; void *data = NULL; #if SUPPORT_COROSYNC if (is_openais_cluster()) { destroy = attrd_ais_destroy; dispatch = attrd_ais_dispatch; } #endif #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { data = &attrd_cluster_conn; dispatch = attrd_ha_callback; destroy = attrd_ha_connection_destroy; } #endif if (FALSE == crm_cluster_connect(&attrd_uname, &attrd_uuid, dispatch, destroy, data)) { crm_err("HA Signon failed"); was_err = TRUE; } } crm_info("Cluster connection active"); if (was_err == FALSE) { ipcs = mainloop_add_ipc_server(T_ATTRD, QB_IPC_NATIVE, &ipc_callbacks); if (ipcs == NULL) { crm_err("Could not start IPC server"); was_err = TRUE; } } crm_info("Accepting attribute updates"); mainloop = g_main_new(FALSE); if (0 == g_timeout_add_full(G_PRIORITY_LOW + 1, 5000, cib_connect, NULL, NULL)) { crm_info("Adding timer failed"); was_err = TRUE; } if (was_err) { crm_err("Aborting startup"); return 100; } crm_notice("Starting mainloop..."); g_main_run(mainloop); crm_notice("Exiting..."); #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { attrd_cluster_conn->llc_ops->signoff(attrd_cluster_conn, TRUE); attrd_cluster_conn->llc_ops->delete(attrd_cluster_conn); } #endif qb_ipcs_destroy(ipcs); if (cib_conn) { cib_conn->cmds->signoff(cib_conn); cib_delete(cib_conn); } g_hash_table_destroy(attr_hash); free(attrd_uuid); empty_uuid_cache(); qb_log_fini(); return 0; } struct attrd_callback_s { char *attr; char *value; }; static void attrd_cib_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { attr_hash_entry_t *hash_entry = NULL; struct attrd_callback_s *data = user_data; if (data->value == NULL && rc == -ENXIO) { rc = pcmk_ok; } switch (rc) { case pcmk_ok: crm_debug("Update %d for %s=%s passed", call_id, data->attr, data->value); hash_entry = g_hash_table_lookup(attr_hash, data->attr); if (hash_entry) { free(hash_entry->stored_value); hash_entry->stored_value = NULL; if (data->value != NULL) { hash_entry->stored_value = strdup(data->value); } } break; case -pcmk_err_diff_failed: /* When an attr changes while the CIB is syncing */ case -ETIME: /* When an attr changes while there is a DC election */ case -ENXIO: /* When an attr changes while the CIB is syncing a * newer config from a node that just came up */ crm_warn("Update %d for %s=%s failed: %s", call_id, data->attr, data->value, pcmk_strerror(rc)); break; default: crm_err("Update %d for %s=%s failed: %s", call_id, data->attr, data->value, pcmk_strerror(rc)); } free(data->value); free(data->attr); free(data); } void attrd_perform_update(attr_hash_entry_t * hash_entry) { int rc = pcmk_ok; struct attrd_callback_s *data = NULL; const char *user_name = NULL; if (hash_entry == NULL) { return; } else if (cib_conn == NULL) { crm_info("Delaying operation %s=%s: cib not connected", hash_entry->id, crm_str(hash_entry->value)); return; } #if ENABLE_ACL if (hash_entry->user) { user_name = hash_entry->user; crm_trace("Performing request from user '%s'", hash_entry->user); } #endif if (hash_entry->value == NULL) { /* delete the attr */ rc = delete_attr_delegate(cib_conn, cib_none, hash_entry->section, attrd_uuid, NULL, hash_entry->set, hash_entry->uuid, hash_entry->id, NULL, FALSE, user_name); if (hash_entry->stored_value) { crm_notice("Sent delete %d: node=%s, attr=%s, id=%s, set=%s, section=%s", rc, attrd_uuid, hash_entry->id, hash_entry->uuid ? hash_entry->uuid : "", hash_entry->set, hash_entry->section); } else if (rc < 0 && rc != -ENXIO) { crm_notice ("Delete operation failed: node=%s, attr=%s, id=%s, set=%s, section=%s: %s (%d)", attrd_uuid, hash_entry->id, hash_entry->uuid ? hash_entry->uuid : "", hash_entry->set, hash_entry->section, pcmk_strerror(rc), rc); } else { crm_trace("Sent delete %d: node=%s, attr=%s, id=%s, set=%s, section=%s", rc, attrd_uuid, hash_entry->id, hash_entry->uuid ? hash_entry->uuid : "", hash_entry->set, hash_entry->section); } } else { /* send update */ rc = update_attr_delegate(cib_conn, cib_none, hash_entry->section, attrd_uuid, NULL, hash_entry->set, hash_entry->uuid, hash_entry->id, hash_entry->value, FALSE, user_name); if (safe_str_neq(hash_entry->value, hash_entry->stored_value) || rc < 0) { crm_notice("Sent update %d: %s=%s", rc, hash_entry->id, hash_entry->value); } else { crm_trace("Sent update %d: %s=%s", rc, hash_entry->id, hash_entry->value); } } data = calloc(1, sizeof(struct attrd_callback_s)); data->attr = strdup(hash_entry->id); if (hash_entry->value != NULL) { data->value = strdup(hash_entry->value); } add_cib_op_callback(cib_conn, rc, FALSE, data, attrd_cib_callback); return; } void attrd_local_callback(xmlNode * msg) { static int plus_plus_len = 5; attr_hash_entry_t *hash_entry = NULL; const char *from = crm_element_value(msg, F_ORIG); const char *op = crm_element_value(msg, F_ATTRD_TASK); const char *attr = crm_element_value(msg, F_ATTRD_ATTRIBUTE); const char *value = crm_element_value(msg, F_ATTRD_VALUE); const char *host = crm_element_value(msg, F_ATTRD_HOST); if (safe_str_eq(op, "refresh")) { crm_notice("Sending full refresh (origin=%s)", from); g_hash_table_foreach(attr_hash, update_for_hash_entry, NULL); return; } if (host != NULL && safe_str_neq(host, attrd_uname)) { send_cluster_message(host, crm_msg_attrd, msg, FALSE); return; } crm_debug("%s message from %s: %s=%s", op, from, attr, crm_str(value)); hash_entry = find_hash_entry(msg); if (hash_entry == NULL) { return; } if (hash_entry->uuid == NULL) { const char *key = crm_element_value(msg, F_ATTRD_KEY); if (key) { hash_entry->uuid = strdup(key); } } crm_debug("Supplied: %s, Current: %s, Stored: %s", value, hash_entry->value, hash_entry->stored_value); if (safe_str_eq(value, hash_entry->value) && safe_str_eq(value, hash_entry->stored_value)) { crm_trace("Ignoring non-change"); return; } else if (value) { int offset = 1; int int_value = 0; int value_len = strlen(value); if (value_len < (plus_plus_len + 2) || value[plus_plus_len] != '+' || (value[plus_plus_len + 1] != '+' && value[plus_plus_len + 1] != '=')) { goto set_unexpanded; } int_value = char2score(hash_entry->value); if (value[plus_plus_len + 1] != '+') { const char *offset_s = value + (plus_plus_len + 2); offset = char2score(offset_s); } int_value += offset; if (int_value > INFINITY) { int_value = INFINITY; } crm_info("Expanded %s=%s to %d", attr, value, int_value); crm_xml_add_int(msg, F_ATTRD_VALUE, int_value); value = crm_element_value(msg, F_ATTRD_VALUE); } set_unexpanded: if (safe_str_eq(value, hash_entry->value) && hash_entry->timer_id) { /* We're already waiting to set this value */ return; } free(hash_entry->value); hash_entry->value = NULL; if (value != NULL) { hash_entry->value = strdup(value); crm_debug("New value of %s is %s", attr, value); } stop_attrd_timer(hash_entry); if (hash_entry->timeout > 0) { hash_entry->timer_id = g_timeout_add(hash_entry->timeout, attrd_timer_callback, hash_entry); } else { attrd_trigger_update(hash_entry); } return; } gboolean attrd_timer_callback(void *user_data) { stop_attrd_timer(user_data); attrd_trigger_update(user_data); return TRUE; /* Always return true, removed cleanly by stop_attrd_timer() */ } gboolean attrd_trigger_update(attr_hash_entry_t * hash_entry) { xmlNode *msg = NULL; /* send HA message to everyone */ crm_notice("Sending flush op to all hosts for: %s (%s)", hash_entry->id, crm_str(hash_entry->value)); log_hash_entry(LOG_DEBUG_2, hash_entry, "Sending flush op to all hosts for:"); msg = create_xml_node(NULL, __FUNCTION__); crm_xml_add(msg, F_TYPE, T_ATTRD); crm_xml_add(msg, F_ORIG, attrd_uname); crm_xml_add(msg, F_ATTRD_TASK, "flush"); crm_xml_add(msg, F_ATTRD_ATTRIBUTE, hash_entry->id); crm_xml_add(msg, F_ATTRD_SET, hash_entry->set); crm_xml_add(msg, F_ATTRD_SECTION, hash_entry->section); crm_xml_add(msg, F_ATTRD_DAMPEN, hash_entry->dampen); crm_xml_add(msg, F_ATTRD_VALUE, hash_entry->value); #if ENABLE_ACL if (hash_entry->user) { crm_xml_add(msg, F_ATTRD_USER, hash_entry->user); } #endif if (hash_entry->timeout <= 0) { crm_xml_add(msg, F_ATTRD_IGNORE_LOCALLY, hash_entry->value); attrd_perform_update(hash_entry); } send_cluster_message(NULL, crm_msg_attrd, msg, FALSE); free_xml(msg); return TRUE; } diff --git a/tools/crm_node.c b/tools/crm_node.c index e71c102d68..0b0cc1e6f4 100644 --- a/tools/crm_node.c +++ b/tools/crm_node.c @@ -1,781 +1,778 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include /* for basename() */ #include -#include -#include +#include #include #include int command = 0; int ccm_fd = 0; gboolean do_quiet = FALSE; char *target_uuid = NULL; char *target_uname = NULL; const char *standby_value = NULL; const char *standby_scope = NULL; -#include <../lib/cluster/stack.h> - /* *INDENT-OFF* */ static struct crm_option long_options[] = { /* Top-level Options */ {"help", 0, 0, '?', "\tThis text"}, {"version", 0, 0, '$', "\tVersion information" }, {"verbose", 0, 0, 'V', "\tIncrease debug output"}, {"quiet", 0, 0, 'Q', "\tEssential output only"}, {"-spacer-", 1, 0, '-', "\nStack:"}, #if SUPPORT_CMAN {"cman", 0, 0, 'c', "\tOnly try connecting to a cman-based cluster"}, #endif #if SUPPORT_COROSYNC {"openais", 0, 0, 'A', "\tOnly try connecting to an OpenAIS-based cluster"}, #endif #ifdef SUPPORT_HEARTBEAT {"heartbeat", 0, 0, 'H', "Only try connecting to a Heartbeat-based cluster"}, #endif {"-spacer-", 1, 0, '-', "\nCommands:"}, {"epoch", 0, 0, 'e', "\tDisplay the epoch during which this node joined the cluster"}, {"quorum", 0, 0, 'q', "\tDisplay a 1 if our partition has quorum, 0 if not"}, {"list", 0, 0, 'l', "\tDisplay all known members (past and present) of this cluster (Not available for heartbeat clusters)"}, {"partition", 0, 0, 'p', "Display the members of this partition"}, {"cluster-id", 0, 0, 'i', "Display this node's cluster id"}, {"remove", 1, 0, 'R', "(Advanced, AIS-Only) Remove the (stopped) node with the specified nodeid from the cluster"}, {"-spacer-", 1, 0, '-', "\nAdditional Options:"}, {"force", 0, 0, 'f'}, {0, 0, 0, 0} }; /* *INDENT-ON* */ int local_id = 0; #if SUPPORT_HEARTBEAT # include # include # include # define UUID_LEN 16 oc_ev_t *ccm_token = NULL; static void *ccm_library = NULL; void oc_ev_special(const oc_ev_t *, oc_ev_class_t, int); static gboolean read_local_hb_uuid(void) { cl_uuid_t uuid; char *buffer = NULL; long start = 0, read_len = 0; FILE *input = fopen(UUID_FILE, "r"); if (input == NULL) { crm_info("Could not open UUID file %s\n", UUID_FILE); return FALSE; } /* see how big the file is */ start = ftell(input); fseek(input, 0L, SEEK_END); if (UUID_LEN != ftell(input)) { fprintf(stderr, "%s must contain exactly %d bytes\n", UUID_FILE, UUID_LEN); abort(); } fseek(input, 0L, start); if (start != ftell(input)) { fprintf(stderr, "fseek not behaving: %ld vs. %ld\n", start, ftell(input)); exit(2); } buffer = malloc(50); read_len = fread(uuid.uuid, 1, UUID_LEN, input); fclose(input); if (read_len != UUID_LEN) { fprintf(stderr, "Expected and read bytes differ: %d vs. %ld\n", UUID_LEN, read_len); exit(3); } else if (buffer != NULL) { cl_uuid_unparse(&uuid, buffer); fprintf(stdout, "%s\n", buffer); return TRUE; } else { fprintf(stderr, "No buffer to unparse\n"); exit(4); } free(buffer); return FALSE; } static void ccm_age_callback(oc_ed_t event, void *cookie, size_t size, const void *data) { int lpc; int node_list_size; const oc_ev_membership_t *oc = (const oc_ev_membership_t *)data; int (*ccm_api_callback_done) (void *cookie) = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_callback_done", 1); node_list_size = oc->m_n_member; if (command == 'q') { crm_debug("Processing \"%s\" event.", event == OC_EV_MS_NEW_MEMBERSHIP ? "NEW MEMBERSHIP" : event == OC_EV_MS_NOT_PRIMARY ? "NOT PRIMARY" : event == OC_EV_MS_PRIMARY_RESTORED ? "PRIMARY RESTORED" : event == OC_EV_MS_EVICTED ? "EVICTED" : "NO QUORUM MEMBERSHIP"); if (ccm_have_quorum(event)) { fprintf(stdout, "1\n"); } else { fprintf(stdout, "0\n"); } } else if (command == 'e') { crm_debug("Searching %d members for our birth", oc->m_n_member); } for (lpc = 0; lpc < node_list_size; lpc++) { if (command == 'p') { fprintf(stdout, "%s ", oc->m_array[oc->m_memb_idx + lpc].node_uname); } else if (command == 'e') { int (*ccm_api_is_my_nodeid) (const oc_ev_t * token, const oc_node_t * node) = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_is_my_nodeid", 1); if ((*ccm_api_is_my_nodeid) (ccm_token, &(oc->m_array[lpc]))) { crm_debug("MATCH: nodeid=%d, uname=%s, born=%d", oc->m_array[oc->m_memb_idx + lpc].node_id, oc->m_array[oc->m_memb_idx + lpc].node_uname, oc->m_array[oc->m_memb_idx + lpc].node_born_on); fprintf(stdout, "%d\n", oc->m_array[oc->m_memb_idx + lpc].node_born_on); } } } (*ccm_api_callback_done) (cookie); if (command == 'p') { fprintf(stdout, "\n"); } fflush(stdout); exit(0); } static gboolean ccm_age_connect(int *ccm_fd) { gboolean did_fail = FALSE; int ret = 0; int (*ccm_api_register) (oc_ev_t ** token) = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_register", 1); int (*ccm_api_set_callback) (const oc_ev_t * token, oc_ev_class_t class, oc_ev_callback_t * fn, oc_ev_callback_t ** prev_fn) = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_set_callback", 1); void (*ccm_api_special) (const oc_ev_t *, oc_ev_class_t, int) = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_special", 1); int (*ccm_api_activate) (const oc_ev_t * token, int *fd) = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_activate", 1); crm_debug("Registering with CCM"); ret = (*ccm_api_register) (&ccm_token); if (ret != 0) { crm_info("CCM registration failed: %d", ret); did_fail = TRUE; } if (did_fail == FALSE) { crm_debug("Setting up CCM callbacks"); ret = (*ccm_api_set_callback) (ccm_token, OC_EV_MEMB_CLASS, ccm_age_callback, NULL); if (ret != 0) { crm_warn("CCM callback not set: %d", ret); did_fail = TRUE; } } if (did_fail == FALSE) { (*ccm_api_special) (ccm_token, OC_EV_MEMB_CLASS, 0 /*don't care */ ); crm_debug("Activating CCM token"); ret = (*ccm_api_activate) (ccm_token, ccm_fd); if (ret != 0) { crm_warn("CCM Activation failed: %d", ret); did_fail = TRUE; } } return !did_fail; } static gboolean try_heartbeat(int command, enum cluster_type_e stack) { crm_debug("Attempting to process %c command", command); if (command == 'i') { if (read_local_hb_uuid()) { exit(0); } } else if (ccm_age_connect(&ccm_fd)) { int rc = 0; fd_set rset; int (*ccm_api_handle_event) (const oc_ev_t * token) = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_handle_event", 1); while (1) { sleep(1); FD_ZERO(&rset); FD_SET(ccm_fd, &rset); errno = 0; rc = select(ccm_fd + 1, &rset, NULL, NULL, NULL); if (rc > 0 && (*ccm_api_handle_event) (ccm_token) != 0) { crm_err("oc_ev_handle_event failed"); return FALSE; } else if (rc < 0 && errno != EINTR) { crm_perror(LOG_ERR, "select failed: %d", rc); return FALSE; } } } return FALSE; } #endif #if SUPPORT_CMAN # include # define MAX_NODES 256 static gboolean try_cman(int command, enum cluster_type_e stack) { int rc = -1, lpc = 0, node_count = 0; cman_node_t node; cman_cluster_t cluster; cman_handle_t cman_handle = NULL; cman_node_t cman_nodes[MAX_NODES]; memset(&cluster, 0, sizeof(cluster)); cman_handle = cman_init(NULL); if (cman_handle == NULL || cman_is_active(cman_handle) == FALSE) { crm_info("Couldn't connect to cman"); return FALSE; } switch (command) { case 'R': fprintf(stderr, "Node removal not supported for cman based clusters\n"); exit(-EPROTONOSUPPORT); break; case 'e': /* Age makes no sense (yet?) in a cman cluster */ fprintf(stdout, "1\n"); break; case 'q': fprintf(stdout, "%d\n", cman_is_quorate(cman_handle)); break; case 'l': case 'p': rc = cman_get_nodes(cman_handle, MAX_NODES, &node_count, cman_nodes); if (rc != 0) { fprintf(stderr, "Couldn't query cman node list: %d %d", rc, errno); goto cman_bail; } for (lpc = 0; lpc < node_count; lpc++) { if (command == 'l') { printf("%s ", cman_nodes[lpc].cn_name); } else if (cman_nodes[lpc].cn_nodeid != 0 && cman_nodes[lpc].cn_member) { /* Never allow node ID 0 to be considered a member #315711 */ printf("%s ", cman_nodes[lpc].cn_name); } } printf("\n"); break; case 'i': rc = cman_get_node(cman_handle, CMAN_NODEID_US, &node); if (rc != 0) { fprintf(stderr, "Couldn't query cman node id: %d %d", rc, errno); goto cman_bail; } fprintf(stdout, "%u\n", node.cn_nodeid); break; default: fprintf(stderr, "Unknown option '%c'\n", command); crm_help('?', EX_USAGE); } cman_finish(cman_handle); exit(0); cman_bail: cman_finish(cman_handle); exit(EX_USAGE); } #endif #if HAVE_CONFDB static void ais_membership_destroy(gpointer user_data) { crm_err("AIS connection terminated"); ais_fd_sync = -1; exit(1); } static gint member_sort(gconstpointer a, gconstpointer b) { const crm_node_t *node_a = a; const crm_node_t *node_b = b; return strcmp(node_a->uname, node_b->uname); } static void crm_add_member(gpointer key, gpointer value, gpointer user_data) { GList **list = user_data; crm_node_t *node = value; if (node->uname != NULL) { *list = g_list_insert_sorted(*list, node, member_sort); } } static gboolean ais_membership_dispatch(AIS_Message * wrapper, char *data, int sender) { switch (wrapper->header.id) { case crm_class_members: case crm_class_notify: case crm_class_quorum: break; default: return TRUE; break; } if (command == 'q') { if (crm_have_quorum) { fprintf(stdout, "1\n"); } else { fprintf(stdout, "0\n"); } } else if (command == 'l') { GList *nodes = NULL; GListPtr lpc = NULL; g_hash_table_foreach(crm_peer_cache, crm_add_member, &nodes); for (lpc = nodes; lpc != NULL; lpc = lpc->next) { crm_node_t *node = (crm_node_t *) lpc->data; fprintf(stdout, "%u %s %s\n", node->id, node->uname, node->state); } fprintf(stdout, "\n"); } else if (command == 'p') { GList *nodes = NULL; GListPtr lpc = NULL; g_hash_table_foreach(crm_peer_cache, crm_add_member, &nodes); for (lpc = nodes; lpc != NULL; lpc = lpc->next) { crm_node_t *node = (crm_node_t *) lpc->data; if (node->uname && safe_str_eq(node->state, CRM_NODE_MEMBER)) { fprintf(stdout, "%s ", node->uname); } } fprintf(stdout, "\n"); } exit(0); return TRUE; } #endif #ifdef SUPPORT_CS_QUORUM # include # include static int node_mcp_dispatch(const char *buffer, ssize_t length, gpointer userdata) { xmlNode *msg = string2xml(buffer); if (msg) { xmlNode *node = NULL; crm_log_xml_trace(msg, "message"); for (node = __xml_first_child(msg); node != NULL; node = __xml_next(node)) { const char *uname = crm_element_value(node, "uname"); if (command == 'l') { int id = 0; crm_element_value_int(node, "id", &id); fprintf(stdout, "%u %s\n", id, uname); } else if (command == 'p') { fprintf(stdout, "%s ", uname); } } free_xml(msg); if (command == 'p') { fprintf(stdout, "\n"); } exit(0); } return 0; } static void node_mcp_destroy(gpointer user_data) { exit(1); } static int crmd_remove_node_cache(int id) { int rc = -1; char *admin_uuid = NULL; crm_ipc_t *conn = crm_ipc_new(CRM_SYSTEM_CRMD, 0); xmlNode *cmd = NULL; xmlNode *hello = NULL; xmlNode *msg_data = NULL; if (!conn) { goto rm_node_cleanup; } if (!crm_ipc_connect(conn)) { goto rm_node_cleanup; } admin_uuid = calloc(1, 11); snprintf(admin_uuid, 10, "%d", getpid()); admin_uuid[10] = '\0'; hello = create_hello_message(admin_uuid, "crm_node", "0", "1"); rc = crm_ipc_send(conn, hello, NULL, 0); if (rc < 0) { goto rm_node_cleanup; } msg_data = create_xml_node(NULL, XML_TAG_OPTIONS); crm_xml_add_int(msg_data, XML_ATTR_ID, id); cmd = create_request(CRM_OP_RM_NODE_CACHE, msg_data, NULL, CRM_SYSTEM_CRMD, "crm_node", admin_uuid); rc = crm_ipc_send(conn, cmd, NULL, 0); rm_node_cleanup: if (conn) { crm_ipc_close(conn); crm_ipc_destroy(conn); } free_xml(cmd); free_xml(hello); free(admin_uuid); return rc > 0 ? 0 : rc; } static gboolean try_corosync(int command, enum cluster_type_e stack) { int rc = 0; int quorate = 0; uint32_t quorum_type = 0; unsigned int nodeid = 0; cpg_handle_t c_handle = 0; quorum_handle_t q_handle = 0; mainloop_io_t *ipc = NULL; GMainLoop *amainloop = NULL; struct ipc_client_callbacks node_callbacks = { .dispatch = node_mcp_dispatch, .destroy = node_mcp_destroy }; switch (command) { case 'R': if (crmd_remove_node_cache(atoi(target_uname))) { crm_err("Failed to connect to crmd to remove node id %s", target_uname); } case 'e': /* Age makes no sense (yet) in an AIS cluster */ fprintf(stdout, "1\n"); exit(0); case 'q': /* Go direct to the Quorum API */ rc = quorum_initialize(&q_handle, NULL, &quorum_type); if (rc != CS_OK) { crm_err("Could not connect to the Quorum API: %d\n", rc); return FALSE; } rc = quorum_getquorate(q_handle, &quorate); if (rc != CS_OK) { crm_err("Could not obtain the current Quorum API state: %d\n", rc); return FALSE; } if (quorate) { fprintf(stdout, "1\n"); } else { fprintf(stdout, "0\n"); } quorum_finalize(q_handle); exit(0); case 'i': /* Go direct to the CPG API */ rc = cpg_initialize(&c_handle, NULL); if (rc != CS_OK) { crm_err("Could not connect to the Cluster Process Group API: %d\n", rc); return FALSE; } rc = cpg_local_get(c_handle, &nodeid); if (rc != CS_OK) { crm_err("Could not get local node id from the CPG API"); return FALSE; } fprintf(stdout, "%u\n", nodeid); cpg_finalize(c_handle); exit(0); case 'l': case 'p': /* Go to pacemakerd */ amainloop = g_main_new(FALSE); ipc = mainloop_add_ipc_client(CRM_SYSTEM_MCP, 0, NULL, &node_callbacks); if(ipc != NULL) { xmlNode *poke = create_xml_node(NULL, "poke"); crm_ipc_send(mainloop_get_ipc_client(ipc), poke, NULL, 0); free_xml(poke); g_main_run(amainloop); } break; } return FALSE; } #endif #if HAVE_CONFDB static gboolean try_openais(int command, enum cluster_type_e stack) { if (init_ais_connection_once (ais_membership_dispatch, ais_membership_destroy, NULL, NULL, &local_id)) { GMainLoop *amainloop = NULL; switch (command) { case 'R': send_ais_text(crm_class_rmpeer, target_uname, TRUE, NULL, crm_msg_ais); exit(0); case 'e': /* Age makes no sense (yet) in an AIS cluster */ fprintf(stdout, "1\n"); exit(0); case 'q': send_ais_text(crm_class_quorum, NULL, TRUE, NULL, crm_msg_ais); break; case 'l': case 'p': crm_info("Requesting the list of configured nodes"); send_ais_text(crm_class_members, __FUNCTION__, TRUE, NULL, crm_msg_ais); break; case 'i': printf("%u\n", local_id); exit(0); default: fprintf(stderr, "Unknown option '%c'\n", command); crm_help('?', EX_USAGE); } amainloop = g_main_new(FALSE); g_main_run(amainloop); } return FALSE; } #endif int set_cluster_type(enum cluster_type_e type); int main(int argc, char **argv) { int flag = 0; int argerr = 0; gboolean force_flag = FALSE; gboolean dangerous_cmd = FALSE; enum cluster_type_e try_stack = pcmk_cluster_unknown; int option_index = 0; crm_peer_init(); crm_log_cli_init("crm_node"); crm_set_options(NULL, "command [options]", long_options, "Tool for displaying low-level node information"); while (flag >= 0) { flag = crm_get_option(argc, argv, &option_index); switch (flag) { case -1: break; case 'V': crm_bump_log_level(); break; case '$': case '?': crm_help(flag, EX_OK); break; case 'Q': do_quiet = TRUE; break; case 'H': set_cluster_type(pcmk_cluster_heartbeat); break; case 'A': set_cluster_type(pcmk_cluster_classic_ais); break; case 'C': set_cluster_type(pcmk_cluster_corosync); break; case 'c': set_cluster_type(pcmk_cluster_cman); break; case 'f': force_flag = TRUE; break; case 'R': dangerous_cmd = TRUE; command = flag; target_uname = optarg; break; case 'p': case 'e': case 'q': case 'i': case 'l': command = flag; break; default: ++argerr; break; } } if (optind > argc) { ++argerr; } if (argerr) { crm_help('?', EX_USAGE); } if (dangerous_cmd && force_flag == FALSE) { fprintf(stderr, "The supplied command is considered dangerous." " To prevent accidental destruction of the cluster," " the --force flag is required in order to proceed.\n"); fflush(stderr); exit(EX_USAGE); } try_stack = get_cluster_type(); crm_debug("Attempting to process -%c command for cluster type: %s", command, name_for_cluster_type(try_stack)); #if SUPPORT_CMAN if (try_stack == pcmk_cluster_cman) { try_cman(command, try_stack); } #endif #ifdef SUPPORT_CS_QUORUM if (try_stack == pcmk_cluster_corosync) { try_corosync(command, try_stack); } #endif #if HAVE_CONFDB /* Only an option if we're using the plugins */ if (try_stack == pcmk_cluster_classic_ais) { try_openais(command, try_stack); } #endif #if SUPPORT_HEARTBEAT if (try_stack == pcmk_cluster_heartbeat) { try_heartbeat(command, try_stack); } #endif return (1); }