diff --git a/cib/callbacks.c b/cib/callbacks.c index 0a44c92ed2..c28bd9fadd 100644 --- a/cib/callbacks.c +++ b/cib/callbacks.c @@ -1,1419 +1,1421 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "common.h" extern GMainLoop *mainloop; extern gboolean cib_shutdown_flag; extern gboolean stand_alone; extern const char *cib_root; static unsigned long cib_local_bcast_num = 0; typedef struct cib_local_notify_s { xmlNode *notify_src; char *client_id; gboolean from_peer; gboolean sync_reply; } cib_local_notify_t; qb_ipcs_service_t *ipcs_ro = NULL; qb_ipcs_service_t *ipcs_rw = NULL; qb_ipcs_service_t *ipcs_shm = NULL; extern crm_cluster_t crm_cluster; extern int cib_update_counter(xmlNode * xml_obj, const char *field, gboolean reset); extern void GHFunc_count_peers(gpointer key, gpointer value, gpointer user_data); gint cib_GCompareFunc(gconstpointer a, gconstpointer b); gboolean can_write(int flags); void send_cib_replace(const xmlNode * sync_request, const char *host); void cib_process_request(xmlNode * request, gboolean privileged, gboolean force_synchronous, gboolean from_peer, crm_client_t * cib_client); extern GHashTable *local_notify_queue; int next_client_id = 0; extern const char *cib_our_uname; extern unsigned long cib_num_ops, cib_num_local, cib_num_updates, cib_num_fail; extern unsigned long cib_bad_connects, cib_num_timeouts; extern int cib_status; int cib_process_command(xmlNode * request, xmlNode ** reply, xmlNode ** cib_diff, gboolean privileged); gboolean cib_common_callback(qb_ipcs_connection_t *c, void *data, size_t size, gboolean privileged); static int32_t cib_ipc_accept(qb_ipcs_connection_t *c, uid_t uid, gid_t gid) { crm_trace("Connection %p", c); if (cib_shutdown_flag) { crm_info("Ignoring new client [%d] during shutdown", crm_ipcs_client_pid(c)); return -EPERM; } if(crm_client_new(c, uid, gid) == NULL) { return -EIO; } return 0; } static void cib_ipc_created(qb_ipcs_connection_t *c) { crm_trace("Connection %p", c); } static int32_t cib_ipc_dispatch_rw(qb_ipcs_connection_t *c, void *data, size_t size) { crm_client_t *client = crm_client_get(c); crm_trace("%p message from %s", c, client->id); return cib_common_callback(c, data, size, TRUE); } static int32_t cib_ipc_dispatch_ro(qb_ipcs_connection_t *c, void *data, size_t size) { crm_client_t *client = crm_client_get(c); crm_trace("%p message from %s", c, client->id); return cib_common_callback(c, data, size, FALSE); } /* Error code means? */ static int32_t cib_ipc_closed(qb_ipcs_connection_t *c) { crm_client_t *client = crm_client_get(c); crm_trace("Connection %p", c); crm_client_destroy(client); return 0; } static void cib_ipc_destroy(qb_ipcs_connection_t *c) { crm_trace("Connection %p", c); if (cib_shutdown_flag) { cib_shutdown(0); } } struct qb_ipcs_service_handlers ipc_ro_callbacks = { .connection_accept = cib_ipc_accept, .connection_created = cib_ipc_created, .msg_process = cib_ipc_dispatch_ro, .connection_closed = cib_ipc_closed, .connection_destroyed = cib_ipc_destroy }; struct qb_ipcs_service_handlers ipc_rw_callbacks = { .connection_accept = cib_ipc_accept, .connection_created = cib_ipc_created, .msg_process = cib_ipc_dispatch_rw, .connection_closed = cib_ipc_closed, .connection_destroyed = cib_ipc_destroy }; void cib_common_callback_worker(uint32_t id, uint32_t flags, xmlNode * op_request, crm_client_t * cib_client, gboolean privileged) { const char *op = crm_element_value(op_request, F_CIB_OPERATION); if (crm_str_eq(op, CRM_OP_REGISTER, TRUE)) { if(flags & crm_ipc_client_response) { xmlNode *ack = create_xml_node(NULL, __FUNCTION__); crm_xml_add(ack, F_CIB_OPERATION, CRM_OP_REGISTER); crm_xml_add(ack, F_CIB_CLIENTID, cib_client->id); crm_ipcs_send(cib_client, id, ack, FALSE); cib_client->request_id = 0; free_xml(ack); } return; } else if (crm_str_eq(op, T_CIB_NOTIFY, TRUE)) { /* Update the notify filters for this client */ int on_off = 0; long long bit = 0; const char *type = crm_element_value(op_request, F_CIB_NOTIFY_TYPE); crm_element_value_int(op_request, F_CIB_NOTIFY_ACTIVATE, &on_off); crm_debug("Setting %s callbacks for %s (%s): %s", type, cib_client->name, cib_client->id, on_off ? "on" : "off"); if (safe_str_eq(type, T_CIB_POST_NOTIFY)) { bit = cib_notify_post; } else if (safe_str_eq(type, T_CIB_PRE_NOTIFY)) { bit = cib_notify_pre; } else if (safe_str_eq(type, T_CIB_UPDATE_CONFIRM)) { bit = cib_notify_confirm; } else if (safe_str_eq(type, T_CIB_DIFF_NOTIFY)) { bit = cib_notify_diff; } else if (safe_str_eq(type, T_CIB_REPLACE_NOTIFY)) { bit = cib_notify_replace; } if(on_off) { set_bit(cib_client->options, bit); } else { clear_bit(cib_client->options, bit); } if(flags & crm_ipc_client_response) { /* TODO - include rc */ crm_ipcs_send_ack(cib_client, id, "ack", __FUNCTION__, __LINE__); cib_client->request_id = 0; } return; } cib_process_request(op_request, FALSE, privileged, FALSE, cib_client); } int32_t cib_common_callback(qb_ipcs_connection_t *c, void *data, size_t size, gboolean privileged) { uint32_t id = 0; uint32_t flags = 0; int call_options = 0; crm_client_t *cib_client = crm_client_get(c); xmlNode *op_request = crm_ipcs_recv(cib_client, data, size, &id, &flags); if(op_request) { crm_element_value_int(op_request, F_CIB_CALLOPTS, &call_options); } crm_trace("Inbound: %.200s", data); if (op_request == NULL || cib_client == NULL) { crm_ipcs_send_ack(cib_client, id, "nack", __FUNCTION__, __LINE__); return 0; } if(is_set(call_options, cib_sync_call)) { CRM_ASSERT(flags & crm_ipc_client_response); } if(flags & crm_ipc_client_response) { CRM_LOG_ASSERT(cib_client->request_id == 0); /* This means the client has two synchronous events in-flight */ cib_client->request_id = id; /* Reply only to the last one */ } if (cib_client->name == NULL) { const char *value = crm_element_value(op_request, F_CIB_CLIENTNAME); if (value == NULL) { cib_client->name = crm_itoa(cib_client->pid); } else { cib_client->name = strdup(value); } } crm_xml_add(op_request, F_CIB_CLIENTID, cib_client->id); crm_xml_add(op_request, F_CIB_CLIENTNAME, cib_client->name); #if ENABLE_ACL determine_request_user(cib_client->user, op_request, F_CIB_USER); #endif crm_log_xml_trace(op_request, "Client[inbound]"); cib_common_callback_worker(id, flags, op_request, cib_client, privileged); free_xml(op_request); return 0; } static void do_local_notify(xmlNode * notify_src, const char *client_id, gboolean sync_reply, gboolean from_peer) { /* send callback to originating child */ crm_client_t *client_obj = NULL; int local_rc = pcmk_ok; if (client_id != NULL) { client_obj = crm_client_get_by_id(client_id); } if (client_obj == NULL) { local_rc = -ECONNRESET; crm_trace("No client to sent the response to. F_CIB_CLIENTID not set."); } else { int rid = 0; if(sync_reply) { if (client_obj->ipcs) { CRM_LOG_ASSERT(client_obj->request_id); rid = client_obj->request_id; client_obj->request_id = 0; crm_trace("Sending response %d to %s %s", rid, client_obj->name, from_peer?"(originator of delegated request)":""); } else { crm_trace("Sending response to %s %s", client_obj->name, from_peer?"(originator of delegated request)":""); } } else { crm_trace("Sending an event to %s %s", client_obj->name, from_peer?"(originator of delegated request)":""); } switch(client_obj->kind) { case CRM_CLIENT_IPC: if (crm_ipcs_send(client_obj, rid, notify_src, !sync_reply) < 0) { local_rc = -ENOMSG; } break; +# ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: +# endif case CRM_CLIENT_TCP: crm_remote_send(client_obj->remote, notify_src); break; default: crm_err("Unknown transport %d for %s", client_obj->kind, client_obj->name); } } if (local_rc != pcmk_ok && client_obj != NULL) { crm_warn("%sSync reply to %s failed: %s", sync_reply ? "" : "A-", client_obj ? client_obj->name : "", pcmk_strerror(local_rc)); } } static void local_notify_destroy_callback(gpointer data) { cib_local_notify_t *notify = data; free_xml(notify->notify_src); free(notify->client_id); free(notify); } static void check_local_notify(int bcast_id) { cib_local_notify_t *notify = NULL; if (!local_notify_queue) { return; } notify = g_hash_table_lookup(local_notify_queue, GINT_TO_POINTER(bcast_id)); if (notify) { do_local_notify(notify->notify_src, notify->client_id, notify->sync_reply, notify->from_peer); g_hash_table_remove(local_notify_queue, GINT_TO_POINTER(bcast_id)); } } static void queue_local_notify(xmlNode * notify_src, const char *client_id, gboolean sync_reply, gboolean from_peer) { cib_local_notify_t *notify = calloc(1, sizeof(cib_local_notify_t)); notify->notify_src = notify_src; notify->client_id = strdup(client_id); notify->sync_reply = sync_reply; notify->from_peer = from_peer; if (!local_notify_queue) { local_notify_queue = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, local_notify_destroy_callback); } g_hash_table_insert(local_notify_queue, GINT_TO_POINTER(cib_local_bcast_num), notify); } static void parse_local_options(crm_client_t * cib_client, int call_type, int call_options, const char *host, const char *op, gboolean * local_notify, gboolean * needs_reply, gboolean * process, gboolean * needs_forward) { if (cib_op_modifies(call_type) && !(call_options & cib_inhibit_bcast)) { /* we need to send an update anyway */ *needs_reply = TRUE; } else { *needs_reply = FALSE; } if (host == NULL && (call_options & cib_scope_local)) { crm_trace("Processing locally scoped %s op from %s", op, cib_client->name); *local_notify = TRUE; } else if (host == NULL && cib_is_master) { crm_trace("Processing master %s op locally from %s", op, cib_client->name); *local_notify = TRUE; } else if (safe_str_eq(host, cib_our_uname)) { crm_trace("Processing locally addressed %s op from %s", op, cib_client->name); *local_notify = TRUE; } else if (stand_alone) { *needs_forward = FALSE; *local_notify = TRUE; *process = TRUE; } else { crm_trace("%s op from %s needs to be forwarded to %s", op, cib_client->name, host ? host : "the master instance"); *needs_forward = TRUE; *process = FALSE; } } static gboolean parse_peer_options(int call_type, xmlNode * request, gboolean * local_notify, gboolean * needs_reply, gboolean * process, gboolean * needs_forward) { const char *op = NULL; const char *host = NULL; const char *delegated = NULL; const char *originator = crm_element_value(request, F_ORIG); const char *reply_to = crm_element_value(request, F_CIB_ISREPLY); const char *update = crm_element_value(request, F_CIB_GLOBAL_UPDATE); gboolean is_reply = safe_str_eq(reply_to, cib_our_uname); if (crm_is_true(update)) { *needs_reply = FALSE; if (is_reply) { *local_notify = TRUE; crm_trace("Processing global/peer update from %s" " that originated from us", originator); } else { crm_trace("Processing global/peer update from %s", originator); } return TRUE; } host = crm_element_value(request, F_CIB_HOST); if (host != NULL && safe_str_eq(host, cib_our_uname)) { crm_trace("Processing request sent to us from %s", originator); return TRUE; } else if (host == NULL && cib_is_master == TRUE) { crm_trace("Processing request sent to master instance from %s", originator); return TRUE; } op = crm_element_value(request, F_CIB_OPERATION); if(safe_str_eq(op, "cib_shutdown_req")) { /* Always process these */ *local_notify = FALSE; if(reply_to == NULL || is_reply) { *process = TRUE; } if(is_reply) { *needs_reply = FALSE; } return *process; } if (is_reply) { crm_trace("Forward reply sent from %s to local clients", originator); *process = FALSE; *needs_reply = FALSE; *local_notify = TRUE; return TRUE; } delegated = crm_element_value(request, F_CIB_DELEGATED); if (delegated != NULL) { crm_trace("Ignoring msg for master instance"); } else if (host != NULL) { /* this is for a specific instance and we're not it */ crm_trace("Ignoring msg for instance on %s", crm_str(host)); } else if (reply_to == NULL && cib_is_master == FALSE) { /* this is for the master instance and we're not it */ crm_trace("Ignoring reply to %s", crm_str(reply_to)); } else if (safe_str_eq(op, "cib_shutdown_req")) { if (reply_to != NULL) { crm_debug("Processing %s from %s", op, host); *needs_reply = FALSE; } else { crm_debug("Processing %s reply from %s", op, host); } return TRUE; } else { crm_err("Nothing for us to do?"); crm_log_xml_err(request, "Peer[inbound]"); } return FALSE; } static void forward_request(xmlNode * request, crm_client_t * cib_client, int call_options) { const char *op = crm_element_value(request, F_CIB_OPERATION); const char *host = crm_element_value(request, F_CIB_HOST); crm_xml_add(request, F_CIB_DELEGATED, cib_our_uname); if (host != NULL) { crm_trace("Forwarding %s op to %s", op, host); send_cluster_message(crm_get_peer(0, host), crm_msg_cib, request, FALSE); } else { crm_trace("Forwarding %s op to master instance", op); send_cluster_message(NULL, crm_msg_cib, request, FALSE); } /* Return the request to its original state */ xml_remove_prop(request, F_CIB_DELEGATED); if (call_options & cib_discard_reply) { crm_trace("Client not interested in reply"); } } static gboolean send_peer_reply(xmlNode * msg, xmlNode * result_diff, const char *originator, gboolean broadcast) { CRM_ASSERT(msg != NULL); if (broadcast) { /* this (successful) call modified the CIB _and_ the * change needs to be broadcast... * send via HA to other nodes */ int diff_add_updates = 0; int diff_add_epoch = 0; int diff_add_admin_epoch = 0; int diff_del_updates = 0; int diff_del_epoch = 0; int diff_del_admin_epoch = 0; const char *digest = NULL; CRM_LOG_ASSERT(result_diff != NULL); digest = crm_element_value(result_diff, XML_ATTR_DIGEST); cib_diff_version_details(result_diff, &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates); crm_trace("Sending update diff %d.%d.%d -> %d.%d.%d %s", diff_del_admin_epoch, diff_del_epoch, diff_del_updates, diff_add_admin_epoch, diff_add_epoch, diff_add_updates, digest); crm_xml_add(msg, F_CIB_ISREPLY, originator); crm_xml_add(msg, F_CIB_GLOBAL_UPDATE, XML_BOOLEAN_TRUE); crm_xml_add(msg, F_CIB_OPERATION, CIB_OP_APPLY_DIFF); CRM_ASSERT(digest != NULL); add_message_xml(msg, F_CIB_UPDATE_DIFF, result_diff); crm_log_xml_explicit(msg, "copy"); return send_cluster_message(NULL, crm_msg_cib, msg, TRUE); } else if (originator != NULL) { /* send reply via HA to originating node */ crm_trace("Sending request result to originator only"); crm_xml_add(msg, F_CIB_ISREPLY, originator); return send_cluster_message(crm_get_peer(0, originator), crm_msg_cib, msg, FALSE); } return FALSE; } void cib_process_request(xmlNode * request, gboolean force_synchronous, gboolean privileged, gboolean unused, crm_client_t * cib_client) { int call_type = 0; int call_options = 0; gboolean process = TRUE; gboolean is_update = TRUE; gboolean from_peer = TRUE; gboolean needs_reply = TRUE; gboolean local_notify = FALSE; gboolean needs_forward = FALSE; gboolean global_update = crm_is_true(crm_element_value(request, F_CIB_GLOBAL_UPDATE)); xmlNode *op_reply = NULL; xmlNode *result_diff = NULL; int rc = pcmk_ok; const char *op = crm_element_value(request, F_CIB_OPERATION); const char *originator = crm_element_value(request, F_ORIG); const char *host = crm_element_value(request, F_CIB_HOST); const char *target = NULL; const char *client_id = crm_element_value(request, F_CIB_CLIENTID); if(cib_client) { from_peer = FALSE; } cib_num_ops++; if (cib_num_ops == 0) { cib_num_fail = 0; cib_num_local = 0; cib_num_updates = 0; crm_info("Stats wrapped around"); } crm_element_value_int(request, F_CIB_CALLOPTS, &call_options); if (force_synchronous) { call_options |= cib_sync_call; } if (host != NULL && strlen(host) == 0) { host = NULL; } if(host) { target = host; } else if(call_options & cib_scope_local) { target = "local host"; } else { target = "master"; } if(from_peer) { crm_trace("Processing peer %s operation from %s on %s intended for %s", op, client_id, originator, target); } else { crm_xml_add(request, F_ORIG, cib_our_uname); crm_trace("Processing local %s operation from %s intended for %s", op, client_id, target); } rc = cib_get_operation_id(op, &call_type); if (rc != pcmk_ok) { /* TODO: construct error reply? */ crm_err("Pre-processing of command failed: %s", pcmk_strerror(rc)); return; } if (from_peer == FALSE) { parse_local_options(cib_client, call_type, call_options, host, op, &local_notify, &needs_reply, &process, &needs_forward); } else if (parse_peer_options(call_type, request, &local_notify, &needs_reply, &process, &needs_forward) == FALSE) { return; } is_update = cib_op_modifies(call_type); if (is_update) { cib_num_updates++; } if (call_options & cib_discard_reply) { needs_reply = is_update; local_notify = FALSE; } if (needs_forward) { const char *host = crm_element_value(request, F_CIB_HOST); const char *section = crm_element_value(request, F_CIB_SECTION); crm_info("Forwarding %s operation for section %s to %s (origin=%s/%s/%s)", op, section ? section : "'all'", host ? host : "master", originator ? originator : "local", crm_element_value(request, F_CIB_CLIENTNAME), crm_element_value(request, F_CIB_CALLID)); forward_request(request, cib_client, call_options); return; } if (cib_status != pcmk_ok) { rc = cib_status; crm_err("Operation ignored, cluster configuration is invalid." " Please repair and restart: %s", pcmk_strerror(cib_status)); op_reply = cib_construct_reply(request, the_cib, cib_status); } else if (process) { int now = time(NULL); int level = LOG_INFO; const char *section = crm_element_value(request, F_CIB_SECTION); cib_num_local++; rc = cib_process_command(request, &op_reply, &result_diff, privileged); if (global_update) { switch (rc) { case pcmk_ok: level = LOG_INFO; break; case -pcmk_err_old_data: case -pcmk_err_diff_resync: case -pcmk_err_diff_failed: level = LOG_TRACE; break; default: level = LOG_ERR; } } else if (rc != pcmk_ok && is_update) { cib_num_fail++; level = LOG_WARNING; /* } else if (safe_str_eq(op, CIB_OP_QUERY)) { level = LOG_TRACE; } else if (safe_str_eq(op, CIB_OP_SLAVE)) { level = LOG_TRACE; } else if (safe_str_eq(section, XML_CIB_TAG_STATUS)) { level = LOG_TRACE; */ } do_crm_log(level, "Completed %s operation for section %s: %s (rc=%d, origin=%s/%s/%s, version=%s.%s.%s)", op, section ? section : "'all'", pcmk_strerror(rc), rc, originator ? originator : "local", crm_element_value(request, F_CIB_CLIENTNAME), crm_element_value(request, F_CIB_CALLID), the_cib ? crm_element_value(the_cib, XML_ATTR_GENERATION_ADMIN) : "0", the_cib ? crm_element_value(the_cib, XML_ATTR_GENERATION) : "0", the_cib ? crm_element_value(the_cib, XML_ATTR_NUMUPDATES) : "0" ); if((now + 1) < time(NULL)) { crm_write_blackbox(0, NULL); } if (op_reply == NULL && (needs_reply || local_notify)) { crm_err("Unexpected NULL reply to message"); crm_log_xml_err(request, "null reply"); needs_reply = FALSE; local_notify = FALSE; } } /* from now on we are the server */ if (needs_reply == FALSE || stand_alone) { /* nothing more to do... * this was a non-originating slave update */ crm_trace("Completed slave update"); } else if (rc == pcmk_ok && result_diff != NULL && !(call_options & cib_inhibit_bcast)) { gboolean broadcast = FALSE; cib_local_bcast_num++; crm_xml_add_int(request, F_CIB_LOCAL_NOTIFY_ID, cib_local_bcast_num); broadcast = send_peer_reply(request, result_diff, originator, TRUE); if (broadcast && client_id && local_notify && op_reply) { /* If we have been asked to sync the reply, * and a bcast msg has gone out, we queue the local notify * until we know the bcast message has been received */ local_notify = FALSE; crm_trace("Queuing local %ssync notification for %s", (call_options & cib_sync_call)?"":"a-", client_id); queue_local_notify(op_reply, client_id, (call_options & cib_sync_call), from_peer); op_reply = NULL; /* the reply is queued, so don't free here */ } } else if (call_options & cib_discard_reply) { crm_trace("Caller isn't interested in reply"); } else if (from_peer) { if (is_update == FALSE || result_diff == NULL) { crm_trace("Request not broadcast: R/O call"); } else if (call_options & cib_inhibit_bcast) { crm_trace("Request not broadcast: inhibited"); } else if (rc != pcmk_ok) { crm_trace("Request not broadcast: call failed: %s", pcmk_strerror(rc)); } else { crm_trace("Directing reply to %s", originator); } send_peer_reply(op_reply, result_diff, originator, FALSE); } if (local_notify && client_id) { crm_trace("Performing local %ssync notification for %s", (call_options & cib_sync_call)?"":"a-", client_id); if (process == FALSE) { do_local_notify(request, client_id, call_options & cib_sync_call, from_peer); } else { do_local_notify(op_reply, client_id, call_options & cib_sync_call, from_peer); } } free_xml(op_reply); free_xml(result_diff); return; } xmlNode * cib_construct_reply(xmlNode * request, xmlNode * output, int rc) { int lpc = 0; xmlNode *reply = NULL; const char *name = NULL; const char *value = NULL; const char *names[] = { F_CIB_OPERATION, F_CIB_CALLID, F_CIB_CLIENTID, F_CIB_CALLOPTS }; static int max = DIMOF(names); crm_trace("Creating a basic reply"); reply = create_xml_node(NULL, "cib-reply"); crm_xml_add(reply, F_TYPE, T_CIB); for (lpc = 0; lpc < max; lpc++) { name = names[lpc]; value = crm_element_value(request, name); crm_xml_add(reply, name, value); } crm_xml_add_int(reply, F_CIB_RC, rc); if (output != NULL) { crm_trace("Attaching reply output"); add_message_xml(reply, F_CIB_CALLDATA, output); } return reply; } int cib_process_command(xmlNode * request, xmlNode ** reply, xmlNode ** cib_diff, gboolean privileged) { xmlNode *input = NULL; xmlNode *output = NULL; xmlNode *result_cib = NULL; xmlNode *current_cib = NULL; #if ENABLE_ACL xmlNode *filtered_current_cib = NULL; #endif int call_type = 0; int call_options = 0; int log_level = LOG_TRACE; const char *op = NULL; const char *section = NULL; int rc = pcmk_ok; int rc2 = pcmk_ok; gboolean send_r_notify = FALSE; gboolean global_update = FALSE; gboolean config_changed = FALSE; gboolean manage_counters = TRUE; CRM_ASSERT(cib_status == pcmk_ok); *reply = NULL; *cib_diff = NULL; current_cib = the_cib; /* Start processing the request... */ op = crm_element_value(request, F_CIB_OPERATION); crm_element_value_int(request, F_CIB_CALLOPTS, &call_options); rc = cib_get_operation_id(op, &call_type); if (rc == pcmk_ok && privileged == FALSE) { rc = cib_op_can_run(call_type, call_options, privileged, global_update); } rc2 = cib_op_prepare(call_type, request, &input, §ion); if (rc == pcmk_ok) { rc = rc2; } if (rc != pcmk_ok) { crm_trace("Call setup failed: %s", pcmk_strerror(rc)); goto done; } else if (cib_op_modifies(call_type) == FALSE) { #if ENABLE_ACL if (acl_enabled(config_hash) == FALSE || acl_filter_cib(request, current_cib, current_cib, &filtered_current_cib) == FALSE) { rc = cib_perform_op(op, call_options, cib_op_func(call_type), TRUE, section, request, input, FALSE, &config_changed, current_cib, &result_cib, NULL, &output); } else if (filtered_current_cib == NULL) { crm_debug("Pre-filtered the entire cib"); rc = -EACCES; } else { crm_debug("Pre-filtered the queried cib according to the ACLs"); rc = cib_perform_op(op, call_options, cib_op_func(call_type), TRUE, section, request, input, FALSE, &config_changed, filtered_current_cib, &result_cib, NULL, &output); } #else rc = cib_perform_op(op, call_options, cib_op_func(call_type), TRUE, section, request, input, FALSE, &config_changed, current_cib, &result_cib, NULL, &output); #endif CRM_CHECK(result_cib == NULL, free_xml(result_cib)); goto done; } /* Handle a valid write action */ global_update = crm_is_true(crm_element_value(request, F_CIB_GLOBAL_UPDATE)); if (global_update) { manage_counters = FALSE; call_options |= cib_force_diff; CRM_CHECK(call_type == 3 || call_type == 4, crm_err("Call type: %d", call_type); crm_log_xml_err(request, "bad op")); } #ifdef SUPPORT_PRENOTIFY if ((call_options & cib_inhibit_notify) == 0) { cib_pre_notify(call_options, op, the_cib, input); } #endif if (rc == pcmk_ok) { if (call_options & cib_inhibit_bcast) { /* skip */ crm_trace("Skipping update: inhibit broadcast"); manage_counters = FALSE; } /* result_cib must not be modified after cib_perform_op() returns */ rc = cib_perform_op(op, call_options, cib_op_func(call_type), FALSE, section, request, input, manage_counters, &config_changed, current_cib, &result_cib, cib_diff, &output); #if ENABLE_ACL if (acl_enabled(config_hash) == TRUE && acl_check_diff(request, current_cib, result_cib, *cib_diff) == FALSE) { rc = -EACCES; } #endif if (manage_counters == FALSE) { /* If the diff is NULL at this point, its because nothing changed */ config_changed = cib_config_changed(NULL, NULL, cib_diff); } /* Always write to disk for replace ops, * this also negates the need to detect ordering changes */ if (crm_str_eq(CIB_OP_REPLACE, op, TRUE)) { config_changed = TRUE; } } if (rc == pcmk_ok && (call_options & cib_dryrun) == 0) { rc = activateCibXml(result_cib, config_changed, op); if (rc == pcmk_ok && cib_internal_config_changed(*cib_diff)) { cib_read_config(config_hash, result_cib); } if (crm_str_eq(CIB_OP_REPLACE, op, TRUE)) { if (section == NULL) { send_r_notify = TRUE; } else if (safe_str_eq(section, XML_TAG_CIB)) { send_r_notify = TRUE; } else if (safe_str_eq(section, XML_CIB_TAG_NODES)) { send_r_notify = TRUE; } else if (safe_str_eq(section, XML_CIB_TAG_STATUS)) { send_r_notify = TRUE; } } else if (crm_str_eq(CIB_OP_ERASE, op, TRUE)) { send_r_notify = TRUE; } } else if (rc == -pcmk_err_dtd_validation) { if (output != NULL) { crm_log_xml_info(output, "cib:output"); free_xml(output); } #if ENABLE_ACL { xmlNode *filtered_result_cib = NULL; if (acl_enabled(config_hash) == FALSE || acl_filter_cib(request, current_cib, result_cib, &filtered_result_cib) == FALSE) { output = result_cib; } else { crm_debug("Filtered the result cib for output according to the ACLs"); output = filtered_result_cib; if (result_cib != NULL) { free_xml(result_cib); } } } #else output = result_cib; #endif } else { free_xml(result_cib); } if ((call_options & cib_inhibit_notify) == 0) { const char *call_id = crm_element_value(request, F_CIB_CALLID); const char *client = crm_element_value(request, F_CIB_CLIENTNAME); #ifdef SUPPORT_POSTNOTIFY cib_post_notify(call_options, op, input, rc, the_cib); #endif cib_diff_notify(call_options, client, call_id, op, input, rc, *cib_diff); } if (send_r_notify) { const char *origin = crm_element_value(request, F_ORIG); cib_replace_notify(origin, the_cib, rc, *cib_diff); } if (rc != pcmk_ok) { log_level = LOG_TRACE; if (rc == -pcmk_err_dtd_validation && global_update) { log_level = LOG_WARNING; crm_log_xml_info(input, "cib:global_update"); } } else if (config_changed) { log_level = LOG_TRACE; if (cib_is_master) { log_level = LOG_NOTICE; } } else if (cib_is_master) { log_level = LOG_TRACE; } log_cib_diff(log_level, *cib_diff, "cib:diff"); done: if ((call_options & cib_discard_reply) == 0) { *reply = cib_construct_reply(request, output, rc); crm_log_xml_explicit(*reply, "cib:reply"); } #if ENABLE_ACL if (filtered_current_cib != NULL) { free_xml(filtered_current_cib); } #endif if (call_type >= 0) { cib_op_cleanup(call_type, call_options, &input, &output); } return rc; } gint cib_GCompareFunc(gconstpointer a, gconstpointer b) { const xmlNode *a_msg = a; const xmlNode *b_msg = b; int msg_a_id = 0; int msg_b_id = 0; const char *value = NULL; value = crm_element_value_const(a_msg, F_CIB_CALLID); msg_a_id = crm_parse_int(value, NULL); value = crm_element_value_const(b_msg, F_CIB_CALLID); msg_b_id = crm_parse_int(value, NULL); if (msg_a_id == msg_b_id) { return 0; } else if (msg_a_id < msg_b_id) { return -1; } return 1; } #if SUPPORT_HEARTBEAT void cib_ha_peer_callback(HA_Message * msg, void *private_data) { xmlNode *xml = convert_ha_message(NULL, msg, __FUNCTION__); cib_peer_callback(xml, private_data); free_xml(xml); } #endif void cib_peer_callback(xmlNode * msg, void *private_data) { const char *reason = NULL; const char *originator = crm_element_value(msg, F_ORIG); if (originator == NULL || crm_str_eq(originator, cib_our_uname, TRUE)) { /* message is from ourselves */ int bcast_id = 0; if (!(crm_element_value_int(msg, F_CIB_LOCAL_NOTIFY_ID, &bcast_id))) { check_local_notify(bcast_id); } return; } else if (crm_peer_cache == NULL) { reason = "membership not established"; goto bail; } if (crm_element_value(msg, F_CIB_CLIENTNAME) == NULL) { crm_xml_add(msg, F_CIB_CLIENTNAME, originator); } /* crm_log_xml_trace("Peer[inbound]", msg); */ cib_process_request(msg, FALSE, TRUE, TRUE, NULL); return; bail: if (reason) { const char *seq = crm_element_value(msg, F_SEQ); const char *op = crm_element_value(msg, F_CIB_OPERATION); crm_warn("Discarding %s message (%s) from %s: %s", op, seq, originator, reason); } } #if SUPPORT_HEARTBEAT extern oc_ev_t *cib_ev_token; static void *ccm_library = NULL; int (*ccm_api_callback_done) (void *cookie) = NULL; int (*ccm_api_handle_event) (const oc_ev_t * token) = NULL; void cib_client_status_callback(const char *node, const char *client, const char *status, void *private) { crm_node_t *peer = NULL; if (safe_str_eq(client, CRM_SYSTEM_CIB)) { crm_info("Status update: Client %s/%s now has status [%s]", node, client, status); if (safe_str_eq(status, JOINSTATUS)) { status = ONLINESTATUS; } else if (safe_str_eq(status, LEAVESTATUS)) { status = OFFLINESTATUS; } peer = crm_get_peer(0, node); crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cib, status); } return; } int cib_ccm_dispatch(gpointer user_data) { int rc = 0; oc_ev_t *ccm_token = (oc_ev_t *) user_data; crm_trace("received callback"); if (ccm_api_handle_event == NULL) { ccm_api_handle_event = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_handle_event", 1); } rc = (*ccm_api_handle_event) (ccm_token); if (0 == rc) { return 0; } crm_err("CCM connection appears to have failed: rc=%d.", rc); /* eventually it might be nice to recover and reconnect... but until then... */ crm_err("Exiting to recover from CCM connection failure"); crm_exit(2); return -1; } int current_instance = 0; void cib_ccm_msg_callback(oc_ed_t event, void *cookie, size_t size, const void *data) { gboolean update_id = FALSE; const oc_ev_membership_t *membership = data; CRM_ASSERT(membership != NULL); crm_info("Processing CCM event=%s (id=%d)", ccm_event_name(event), membership->m_instance); if (current_instance > membership->m_instance) { crm_err("Membership instance ID went backwards! %d->%d", current_instance, membership->m_instance); CRM_ASSERT(current_instance <= membership->m_instance); } switch (event) { case OC_EV_MS_NEW_MEMBERSHIP: case OC_EV_MS_INVALID: update_id = TRUE; break; case OC_EV_MS_PRIMARY_RESTORED: update_id = TRUE; break; case OC_EV_MS_NOT_PRIMARY: crm_trace("Ignoring transitional CCM event: %s", ccm_event_name(event)); break; case OC_EV_MS_EVICTED: crm_err("Evicted from CCM: %s", ccm_event_name(event)); break; default: crm_err("Unknown CCM event: %d", event); } if (update_id) { unsigned int lpc = 0; CRM_CHECK(membership != NULL, return); current_instance = membership->m_instance; for (lpc = 0; lpc < membership->m_n_out; lpc++) { crm_update_ccm_node(membership, lpc + membership->m_out_idx, CRM_NODE_LOST, current_instance); } for (lpc = 0; lpc < membership->m_n_member; lpc++) { crm_update_ccm_node(membership, lpc + membership->m_memb_idx, CRM_NODE_ACTIVE, current_instance); } } if (ccm_api_callback_done == NULL) { ccm_api_callback_done = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_callback_done", 1); } (*ccm_api_callback_done) (cookie); return; } #endif gboolean can_write(int flags) { return TRUE; } static gboolean cib_force_exit(gpointer data) { crm_notice("Forcing exit!"); terminate_cib(__FUNCTION__, TRUE); return FALSE; } static void disconnect_remote_client(gpointer key, gpointer value, gpointer user_data) { crm_client_t *a_client = value; crm_err("Disconnecting %s... Not implemented", crm_str(a_client->name)); } void cib_shutdown(int nsig) { struct qb_ipcs_stats srv_stats; if (cib_shutdown_flag == FALSE) { int disconnects = 0; qb_ipcs_connection_t *c = NULL; cib_shutdown_flag = TRUE; c = qb_ipcs_connection_first_get(ipcs_rw); while(c != NULL) { qb_ipcs_connection_t *last = c; c = qb_ipcs_connection_next_get(ipcs_rw, last); crm_debug("Disconnecting r/w client %p...", last); qb_ipcs_disconnect(last); qb_ipcs_connection_unref(last); disconnects++; } c = qb_ipcs_connection_first_get(ipcs_ro); while(c != NULL) { qb_ipcs_connection_t *last = c; c = qb_ipcs_connection_next_get(ipcs_ro, last); crm_debug("Disconnecting r/o client %p...", last); qb_ipcs_disconnect(last); qb_ipcs_connection_unref(last); disconnects++; } c = qb_ipcs_connection_first_get(ipcs_shm); while(c != NULL) { qb_ipcs_connection_t *last = c; c = qb_ipcs_connection_next_get(ipcs_shm, last); crm_debug("Disconnecting non-blocking r/w client %p...", last); qb_ipcs_disconnect(last); qb_ipcs_connection_unref(last); disconnects++; } disconnects += crm_hash_table_size(client_connections); crm_debug("Disconnecting %d remote clients", crm_hash_table_size(client_connections)); g_hash_table_foreach(client_connections, disconnect_remote_client, NULL); crm_info("Disconnected %d clients", disconnects); } qb_ipcs_stats_get(ipcs_rw, &srv_stats, QB_FALSE); if(crm_hash_table_size(client_connections) == 0) { crm_info("All clients disconnected (%d)", srv_stats.active_connections); initiate_exit(); } else { crm_info("Waiting on %d clients to disconnect (%d)", crm_hash_table_size(client_connections), srv_stats.active_connections); } } void initiate_exit(void) { int active = 0; xmlNode *leaving = NULL; active = crm_active_peers(); if (active < 2) { terminate_cib(__FUNCTION__, FALSE); return; } crm_info("Sending disconnect notification to %d peers...", active); leaving = create_xml_node(NULL, "exit-notification"); crm_xml_add(leaving, F_TYPE, "cib"); crm_xml_add(leaving, F_CIB_OPERATION, "cib_shutdown_req"); send_cluster_message(NULL, crm_msg_cib, leaving, TRUE); free_xml(leaving); g_timeout_add(crm_get_msec("5s"), cib_force_exit, NULL); } extern int remote_fd; extern int remote_tls_fd; extern void terminate_cs_connection(void); void terminate_cib(const char *caller, gboolean fast) { if (remote_fd > 0) { close(remote_fd); remote_fd = 0; } if (remote_tls_fd > 0) { close(remote_tls_fd); remote_tls_fd = 0; } if(!fast) { crm_info("%s: Disconnecting from cluster infrastructure", caller); crm_cluster_disconnect(&crm_cluster); } uninitializeCib(); crm_info("%s: Exiting%s...", caller, fast?" fast":mainloop?" from mainloop":""); if(fast == FALSE && mainloop != NULL && g_main_is_running(mainloop)) { g_main_quit(mainloop); } else { qb_ipcs_destroy(ipcs_ro); qb_ipcs_destroy(ipcs_rw); qb_ipcs_destroy(ipcs_shm); if (fast) { crm_exit(EX_USAGE); } else { crm_exit(EX_OK); } } } diff --git a/cib/notify.c b/cib/notify.c index bcb06762eb..bc01086640 100644 --- a/cib/notify.c +++ b/cib/notify.c @@ -1,376 +1,378 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include int pending_updates = 0; struct cib_notification_s { xmlNode *msg; struct iovec *iov; }; void attach_cib_generation(xmlNode * msg, const char *field, xmlNode * a_cib); void do_cib_notify(int options, const char *op, xmlNode * update, int result, xmlNode * result_data, const char *msg_type); static void need_pre_notify(gpointer key, gpointer value, gpointer user_data) { crm_client_t *client = value; if (is_set(client->options, cib_notify_pre)) { gboolean *needed = user_data; *needed = TRUE; } } static void need_post_notify(gpointer key, gpointer value, gpointer user_data) { crm_client_t *client = value; if (is_set(client->options, cib_notify_post)) { gboolean *needed = user_data; *needed = TRUE; } } static gboolean cib_notify_send_one(gpointer key, gpointer value, gpointer user_data) { const char *type = NULL; gboolean do_send = FALSE; crm_client_t *client = value; struct cib_notification_s *update = user_data; CRM_CHECK(client != NULL, return TRUE); CRM_CHECK(update != NULL, return TRUE); if (client->ipcs == NULL && client->remote == NULL) { crm_warn("Skipping client with NULL channel"); return FALSE; } type = crm_element_value(update->msg, F_SUBTYPE); CRM_LOG_ASSERT(type != NULL); if (is_set(client->options, cib_notify_diff) && safe_str_eq(type, T_CIB_DIFF_NOTIFY)) { do_send = TRUE; } else if (is_set(client->options, cib_notify_replace) && safe_str_eq(type, T_CIB_REPLACE_NOTIFY)) { do_send = TRUE; } else if (is_set(client->options, cib_notify_confirm) && safe_str_eq(type, T_CIB_UPDATE_CONFIRM)) { do_send = TRUE; } else if (is_set(client->options, cib_notify_pre) && safe_str_eq(type, T_CIB_PRE_NOTIFY)) { do_send = TRUE; } else if (is_set(client->options, cib_notify_post) && safe_str_eq(type, T_CIB_POST_NOTIFY)) { do_send = TRUE; } if (do_send) { switch(client->kind) { case CRM_CLIENT_IPC: if(crm_ipcs_sendv(client, update->iov, crm_ipc_server_event) < 0) { crm_warn("Notification of client %s/%s failed", client->name, client->id); } break; +# ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: +# endif case CRM_CLIENT_TCP: crm_debug("Sent %s notification to client %s/%s", type, client->name, client->id); crm_remote_send(client->remote, update->msg); break; default: crm_err("Unknown transport %d for %s", client->kind, client->name); } } return FALSE; } static void cib_notify_send(xmlNode *xml) { struct iovec *iov; struct cib_notification_s update; ssize_t rc = crm_ipcs_prepare(0, xml, &iov); crm_trace("Notifying clients"); if(rc > 0) { update.msg = xml; update.iov = iov; g_hash_table_foreach_remove(client_connections, cib_notify_send_one, &update); } else { crm_notice("Notification failed: %s (%d)", pcmk_strerror(rc), rc); } if(iov) { free(iov[0].iov_base); free(iov[1].iov_base); free(iov); } crm_trace("Notify complete"); } void cib_pre_notify(int options, const char *op, xmlNode * existing, xmlNode * update) { xmlNode *update_msg = NULL; const char *type = NULL; const char *id = NULL; gboolean needed = FALSE; g_hash_table_foreach(client_connections, need_pre_notify, &needed); if (needed == FALSE) { return; } /* TODO: consider pre-notification for removal */ update_msg = create_xml_node(NULL, "pre-notify"); if (update != NULL) { id = crm_element_value(update, XML_ATTR_ID); } crm_xml_add(update_msg, F_TYPE, T_CIB_NOTIFY); crm_xml_add(update_msg, F_SUBTYPE, T_CIB_PRE_NOTIFY); crm_xml_add(update_msg, F_CIB_OPERATION, op); if (id != NULL) { crm_xml_add(update_msg, F_CIB_OBJID, id); } if (update != NULL) { crm_xml_add(update_msg, F_CIB_OBJTYPE, crm_element_name(update)); } else if (existing != NULL) { crm_xml_add(update_msg, F_CIB_OBJTYPE, crm_element_name(existing)); } type = crm_element_value(update_msg, F_CIB_OBJTYPE); attach_cib_generation(update_msg, "cib_generation", the_cib); if (existing != NULL) { add_message_xml(update_msg, F_CIB_EXISTING, existing); } if (update != NULL) { add_message_xml(update_msg, F_CIB_UPDATE, update); } cib_notify_send(update_msg); if (update == NULL) { crm_trace("Performing operation %s (on section=%s)", op, type); } else { crm_trace("Performing %s on <%s%s%s>", op, type, id ? " id=" : "", id ? id : ""); } free_xml(update_msg); } void cib_post_notify(int options, const char *op, xmlNode * update, int result, xmlNode * new_obj) { gboolean needed = FALSE; g_hash_table_foreach(client_connections, need_post_notify, &needed); if (needed == FALSE) { return; } do_cib_notify(options, op, update, result, new_obj, T_CIB_UPDATE_CONFIRM); } void cib_diff_notify(int options, const char *client, const char *call_id, const char *op, xmlNode * update, int result, xmlNode * diff) { int add_updates = 0; int add_epoch = 0; int add_admin_epoch = 0; int del_updates = 0; int del_epoch = 0; int del_admin_epoch = 0; int log_level = LOG_DEBUG_2; if (diff == NULL) { return; } if (result != pcmk_ok) { log_level = LOG_WARNING; } cib_diff_version_details(diff, &add_admin_epoch, &add_epoch, &add_updates, &del_admin_epoch, &del_epoch, &del_updates); if (add_updates != del_updates) { do_crm_log(log_level, "Update (client: %s%s%s): %d.%d.%d -> %d.%d.%d (%s)", client, call_id ? ", call:" : "", call_id ? call_id : "", del_admin_epoch, del_epoch, del_updates, add_admin_epoch, add_epoch, add_updates, pcmk_strerror(result)); } else if (diff != NULL) { do_crm_log(log_level, "Local-only Change (client:%s%s%s): %d.%d.%d (%s)", client, call_id ? ", call: " : "", call_id ? call_id : "", add_admin_epoch, add_epoch, add_updates, pcmk_strerror(result)); } do_cib_notify(options, op, update, result, diff, T_CIB_DIFF_NOTIFY); } void do_cib_notify(int options, const char *op, xmlNode * update, int result, xmlNode * result_data, const char *msg_type) { xmlNode *update_msg = NULL; const char *id = NULL; update_msg = create_xml_node(NULL, "notify"); if (result_data != NULL) { id = crm_element_value(result_data, XML_ATTR_ID); } crm_xml_add(update_msg, F_TYPE, T_CIB_NOTIFY); crm_xml_add(update_msg, F_SUBTYPE, msg_type); crm_xml_add(update_msg, F_CIB_OPERATION, op); crm_xml_add_int(update_msg, F_CIB_RC, result); if (id != NULL) { crm_xml_add(update_msg, F_CIB_OBJID, id); } if (update != NULL) { crm_trace("Setting type to update->name: %s", crm_element_name(update)); crm_xml_add(update_msg, F_CIB_OBJTYPE, crm_element_name(update)); } else if (result_data != NULL) { crm_trace("Setting type to new_obj->name: %s", crm_element_name(result_data)); crm_xml_add(update_msg, F_CIB_OBJTYPE, crm_element_name(result_data)); } else { crm_trace("Not Setting type"); } attach_cib_generation(update_msg, "cib_generation", the_cib); if (update != NULL) { add_message_xml(update_msg, F_CIB_UPDATE, update); } if (result_data != NULL) { add_message_xml(update_msg, F_CIB_UPDATE_RESULT, result_data); } cib_notify_send(update_msg); free_xml(update_msg); } void attach_cib_generation(xmlNode * msg, const char *field, xmlNode * a_cib) { xmlNode *generation = create_xml_node(NULL, XML_CIB_TAG_GENERATION_TUPPLE); if (a_cib != NULL) { copy_in_properties(generation, a_cib); } add_message_xml(msg, field, generation); free_xml(generation); } void cib_replace_notify(const char *origin, xmlNode * update, int result, xmlNode * diff) { xmlNode *replace_msg = NULL; int add_updates = 0; int add_epoch = 0; int add_admin_epoch = 0; int del_updates = 0; int del_epoch = 0; int del_admin_epoch = 0; if (diff == NULL) { return; } cib_diff_version_details(diff, &add_admin_epoch, &add_epoch, &add_updates, &del_admin_epoch, &del_epoch, &del_updates); if(del_updates < 0) { crm_log_xml_debug(diff, "Bad replace diff"); } if (add_updates != del_updates) { crm_info("Replaced: %d.%d.%d -> %d.%d.%d from %s", del_admin_epoch, del_epoch, del_updates, add_admin_epoch, add_epoch, add_updates, crm_str(origin)); } else if (diff != NULL) { crm_info("Local-only Replace: %d.%d.%d from %s", add_admin_epoch, add_epoch, add_updates, crm_str(origin)); } replace_msg = create_xml_node(NULL, "notify-replace"); crm_xml_add(replace_msg, F_TYPE, T_CIB_NOTIFY); crm_xml_add(replace_msg, F_SUBTYPE, T_CIB_REPLACE_NOTIFY); crm_xml_add(replace_msg, F_CIB_OPERATION, CIB_OP_REPLACE); crm_xml_add_int(replace_msg, F_CIB_RC, result); attach_cib_generation(replace_msg, "cib-replace-generation", update); crm_log_xml_trace(replace_msg, "CIB Replaced"); cib_notify_send(replace_msg); free_xml(replace_msg); } diff --git a/cib/remote.c b/cib/remote.c index f5e3744d8c..865fba4d49 100644 --- a/cib/remote.c +++ b/cib/remote.c @@ -1,691 +1,696 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "callbacks.h" /* #undef HAVE_PAM_PAM_APPL_H */ /* #undef HAVE_GNUTLS_GNUTLS_H */ #ifdef HAVE_GNUTLS_GNUTLS_H # undef KEYFILE # include #endif #include #include #if HAVE_SECURITY_PAM_APPL_H # include # define HAVE_PAM 1 #else # if HAVE_PAM_PAM_APPL_H # include # define HAVE_PAM 1 # endif #endif extern int remote_tls_fd; extern gboolean cib_shutdown_flag; int init_remote_listener(int port, gboolean encrypted); void cib_remote_connection_destroy(gpointer user_data); #ifdef HAVE_GNUTLS_GNUTLS_H # define DH_BITS 1024 gnutls_dh_params dh_params; gnutls_anon_server_credentials anon_cred_s; static void debug_log(int level, const char *str) { fputs(str, stderr); } #endif #define REMOTE_AUTH_TIMEOUT 10000 int num_clients; int authenticate_user(const char *user, const char *passwd); int cib_remote_listen(gpointer data); int cib_remote_msg(gpointer data); static void remote_connection_destroy(gpointer user_data) { return; } #define ERROR_SUFFIX " Shutting down remote listener" int init_remote_listener(int port, gboolean encrypted) { int rc; int *ssock = NULL; struct sockaddr_in saddr; int optval; static struct mainloop_fd_callbacks remote_listen_fd_callbacks = { .dispatch = cib_remote_listen, .destroy = remote_connection_destroy, }; if (port <= 0) { /* dont start it */ return 0; } if (encrypted) { #ifndef HAVE_GNUTLS_GNUTLS_H crm_warn("TLS support is not available"); return 0; #else crm_notice("Starting a tls listener on port %d.", port); gnutls_global_init(); /* gnutls_global_set_log_level (10); */ gnutls_global_set_log_function(debug_log); gnutls_dh_params_init(&dh_params); gnutls_dh_params_generate2(dh_params, DH_BITS); gnutls_anon_allocate_server_credentials(&anon_cred_s); gnutls_anon_set_server_dh_params(anon_cred_s, dh_params); #endif } else { crm_warn("Starting a plain_text listener on port %d.", port); } #ifndef HAVE_PAM crm_warn("PAM is _not_ enabled!"); #endif /* create server socket */ ssock = malloc(sizeof(int)); *ssock = socket(AF_INET, SOCK_STREAM, 0); if (*ssock == -1) { crm_perror(LOG_ERR, "Can not create server socket." ERROR_SUFFIX); free(ssock); return -1; } /* reuse address */ optval = 1; rc = setsockopt(*ssock, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval)); if(rc < 0) { crm_perror(LOG_INFO, "Couldn't allow the reuse of local addresses by our remote listener"); } /* bind server socket */ memset(&saddr, '\0', sizeof(saddr)); saddr.sin_family = AF_INET; saddr.sin_addr.s_addr = INADDR_ANY; saddr.sin_port = htons(port); if (bind(*ssock, (struct sockaddr *)&saddr, sizeof(saddr)) == -1) { crm_perror(LOG_ERR, "Can not bind server socket." ERROR_SUFFIX); close(*ssock); free(ssock); return -2; } if (listen(*ssock, 10) == -1) { crm_perror(LOG_ERR, "Can not start listen." ERROR_SUFFIX); close(*ssock); free(ssock); return -3; } mainloop_add_fd("cib-remote", G_PRIORITY_DEFAULT, *ssock, ssock, &remote_listen_fd_callbacks); return *ssock; } static int check_group_membership(const char *usr, const char *grp) { int index = 0; struct passwd *pwd = NULL; struct group *group = NULL; CRM_CHECK(usr != NULL, return FALSE); CRM_CHECK(grp != NULL, return FALSE); pwd = getpwnam(usr); if (pwd == NULL) { crm_err("No user named '%s' exists!", usr); return FALSE; } group = getgrgid(pwd->pw_gid); if (group != NULL && crm_str_eq(grp, group->gr_name, TRUE)) { return TRUE; } group = getgrnam(grp); if (group == NULL) { crm_err("No group named '%s' exists!", grp); return FALSE; } while (TRUE) { char *member = group->gr_mem[index++]; if (member == NULL) { break; } else if (crm_str_eq(usr, member, TRUE)) { return TRUE; } }; return FALSE; } static gboolean cib_remote_auth(xmlNode *login) { const char *user = NULL; const char *pass = NULL; const char *tmp = NULL; crm_log_xml_info(login, "Login: "); if (login == NULL) { return FALSE; } tmp = crm_element_name(login); if (safe_str_neq(tmp, "cib_command")) { crm_err("Wrong tag: %s", tmp); return FALSE; } tmp = crm_element_value(login, "op"); if (safe_str_neq(tmp, "authenticate")) { crm_err("Wrong operation: %s", tmp); return FALSE; } user = crm_element_value(login, "user"); pass = crm_element_value(login, "password"); if (!user || !pass) { crm_err("missing auth credentials"); return FALSE; } /* Non-root daemons can only validate the password of the * user they're running as */ if (check_group_membership(user, CRM_DAEMON_GROUP) == FALSE) { crm_err("User is not a member of the required group"); return FALSE; } else if (authenticate_user(user, pass) == FALSE) { crm_err("PAM auth failed"); return FALSE; } return TRUE; } static gboolean remote_auth_timeout_cb(gpointer data) { crm_client_t *client = data; client->remote->auth_timeout = 0; if (client->remote->authenticated == TRUE) { return FALSE; } mainloop_del_fd(client->remote->source); crm_err("Remote client authentication timed out"); return FALSE; } int cib_remote_listen(gpointer data) { int csock = 0; unsigned laddr; struct sockaddr_in addr; int ssock = *(int *)data; int flag; crm_client_t *new_client = NULL; static struct mainloop_fd_callbacks remote_client_fd_callbacks = { .dispatch = cib_remote_msg, .destroy = cib_remote_connection_destroy, }; /* accept the connection */ laddr = sizeof(addr); csock = accept(ssock, (struct sockaddr *)&addr, &laddr); crm_debug("New %s connection from %s", ssock == remote_tls_fd ? "secure" : "clear-text", inet_ntoa(addr.sin_addr)); if (csock == -1) { crm_err("accept socket failed"); return TRUE; } if ((flag = fcntl(csock, F_GETFL)) >= 0) { if (fcntl(csock, F_SETFL, flag | O_NONBLOCK) < 0) { crm_err( "fcntl() write failed"); close(csock); return TRUE; } } else { crm_err( "fcntl() read failed"); close(csock); return TRUE; } if (ssock == remote_tls_fd) { #ifdef HAVE_GNUTLS_GNUTLS_H /* create gnutls session for the server socket */ new_client->remote->tls_session = crm_create_anon_tls_session( csock, GNUTLS_SERVER, anon_cred_s); if (new_client->remote->tls_session == NULL) { crm_err("TLS session creation failed"); close(csock); return TRUE; } #endif } num_clients++; crm_client_init(); new_client = calloc(1, sizeof(crm_client_t)); new_client->remote = calloc(1, sizeof(crm_remote_t)); new_client->id = crm_generate_uuid(); g_hash_table_insert(client_connections, new_client->id/* Should work */, new_client); /* clients have a few seconds to perform handshake. */ new_client->remote->auth_timeout = g_timeout_add( REMOTE_AUTH_TIMEOUT, remote_auth_timeout_cb, new_client); if (ssock == remote_tls_fd) { #ifdef HAVE_GNUTLS_GNUTLS_H new_client->kind = CRM_CLIENT_TLS; #endif } else { new_client->kind = CRM_CLIENT_TCP; new_client->remote->tcp_socket = csock; } new_client->remote->source = mainloop_add_fd( "cib-remote-client", G_PRIORITY_DEFAULT, csock, new_client, &remote_client_fd_callbacks); return TRUE; } void cib_remote_connection_destroy(gpointer user_data) { crm_client_t *client = user_data; int csock = 0; if (client == NULL) { return; } crm_trace("Cleaning up after client disconnect: %s/%s", crm_str(client->name), client->id); num_clients--; crm_trace("Num unfree'd clients: %d", num_clients); - if (client->kind == CRM_CLIENT_TLS) { + switch (client->kind) { + case CRM_CLIENT_TCP: + csock = client->remote->tcp_socket; + break; #ifdef HAVE_GNUTLS_GNUTLS_H - if (client->remote->tls_session) { - void *sock_ptr = gnutls_transport_get_ptr(*client->remote->tls_session); - csock = GPOINTER_TO_INT(sock_ptr); - if (client->remote->tls_handshake_complete) { - gnutls_bye(*client->remote->tls_session, GNUTLS_SHUT_WR); + case CRM_CLIENT_TLS: + if (client->remote->tls_session) { + void *sock_ptr = gnutls_transport_get_ptr(*client->remote->tls_session); + csock = GPOINTER_TO_INT(sock_ptr); + if (client->remote->tls_handshake_complete) { + gnutls_bye(*client->remote->tls_session, GNUTLS_SHUT_WR); + } + gnutls_deinit(*client->remote->tls_session); + gnutls_free(client->remote->tls_session); + client->remote->tls_session = NULL; } - gnutls_deinit(*client->remote->tls_session); - gnutls_free(client->remote->tls_session); - } + break; #endif - } else { - csock = GPOINTER_TO_INT(client->remote->tls_session); + default: + crm_warn("Unexpected client type %d" , client->kind); } - client->remote->tls_session = NULL; if (csock > 0) { close(csock); } crm_client_destroy(client); crm_trace("Freed the cib client"); if (cib_shutdown_flag) { cib_shutdown(0); } return; } static void cib_handle_remote_msg(crm_client_t *client, xmlNode *command) { const char *value = NULL; value = crm_element_name(command); if (safe_str_neq(value, "cib_command")) { crm_log_xml_trace(command, "Bad command: "); return; } if (client->name == NULL) { value = crm_element_value(command, F_CLIENTNAME); if (value == NULL) { client->name = strdup(client->id); } else { client->name = strdup(value); } } if (client->userdata == NULL) { value = crm_element_value(command, F_CIB_CALLBACK_TOKEN); if (value != NULL) { client->userdata = strdup(value); crm_trace("Callback channel for %s is %s", client->id, client->userdata); } else { client->userdata = strdup(client->id); } } /* unset dangerous options */ xml_remove_prop(command, F_ORIG); xml_remove_prop(command, F_CIB_HOST); xml_remove_prop(command, F_CIB_GLOBAL_UPDATE); crm_xml_add(command, F_TYPE, T_CIB); crm_xml_add(command, F_CIB_CLIENTID, client->id); crm_xml_add(command, F_CIB_CLIENTNAME, client->name); #if ENABLE_ACL crm_xml_add(command, F_CIB_USER, client->user); #endif if (crm_element_value(command, F_CIB_CALLID) == NULL) { char *call_uuid = crm_generate_uuid(); /* fix the command */ crm_xml_add(command, F_CIB_CALLID, call_uuid); free(call_uuid); } if (crm_element_value(command, F_CIB_CALLOPTS) == NULL) { crm_xml_add_int(command, F_CIB_CALLOPTS, 0); } crm_log_xml_trace(command, "Remote command: "); cib_common_callback_worker(0, 0, command, client, TRUE); } int cib_remote_msg(gpointer data) { xmlNode *command = NULL; crm_client_t *client = data; int disconnected = 0; int timeout = client->remote->authenticated ? -1 : 1000; - crm_trace("%s callback", client->kind == CRM_CLIENT_TLS ? "secure" : "clear-text"); + crm_trace("%s callback", client->kind != CRM_CLIENT_TCP ? "secure" : "clear-text"); #ifdef HAVE_GNUTLS_GNUTLS_H if (client->kind == CRM_CLIENT_TLS && (client->remote->tls_handshake_complete == FALSE)) { int rc = 0; /* Muliple calls to handshake will be required, this callback * will be invoked once the client sends more handshake data. */ do { rc = gnutls_handshake(*client->remote->tls_session); if (rc < 0 && rc != GNUTLS_E_AGAIN) { crm_err("Remote cib tls handshake failed"); return -1; } } while (rc == GNUTLS_E_INTERRUPTED); if (rc == 0) { crm_debug("Remote cib tls handshake completed"); client->remote->tls_handshake_complete = TRUE; if (client->remote->auth_timeout) { g_source_remove(client->remote->auth_timeout); } /* after handshake, clients must send auth in a few seconds */ client->remote->auth_timeout = g_timeout_add(REMOTE_AUTH_TIMEOUT, remote_auth_timeout_cb, client); } return 0; } #endif crm_remote_recv(client->remote, timeout, &disconnected); /* must pass auth before we will process anything else */ if (client->remote->authenticated == FALSE) { xmlNode *reg; #if ENABLE_ACL const char *user = NULL; #endif command = crm_remote_parse_buffer(client->remote); if (cib_remote_auth(command) == FALSE) { free_xml(command); return -1; } crm_debug("remote connection authenticated successfully"); client->remote->authenticated = TRUE; g_source_remove(client->remote->auth_timeout); client->remote->auth_timeout = 0; client->name = crm_element_value_copy(command, "name"); #if ENABLE_ACL user = crm_element_value(command, "user"); if (user) { client->user = strdup(user); } #endif /* send ACK */ reg = create_xml_node(NULL, "cib_result"); crm_xml_add(reg, F_CIB_OPERATION, CRM_OP_REGISTER); crm_xml_add(reg, F_CIB_CLIENTID, client->id); crm_remote_send(client->remote, reg); free_xml(reg); free_xml(command); } command = crm_remote_parse_buffer(client->remote); while (command) { crm_trace("command received"); cib_handle_remote_msg(client, command); free_xml(command); command = crm_remote_parse_buffer(client->remote); } if (disconnected) { crm_trace("disconnected while receiving remote cib msg."); return -1; } return 0; } #ifdef HAVE_PAM /* * Useful Examples: * http://www.kernel.org/pub/linux/libs/pam/Linux-PAM-html * http://developer.apple.com/samplecode/CryptNoMore/index.html */ static int construct_pam_passwd(int num_msg, const struct pam_message **msg, struct pam_response **response, void *data) { int count = 0; struct pam_response *reply; char *string = (char *)data; CRM_CHECK(data, return PAM_CONV_ERR); CRM_CHECK(num_msg == 1, return PAM_CONV_ERR); /* We only want to handle one message */ reply = calloc(1, sizeof(struct pam_response)); CRM_ASSERT(reply != NULL); for (count = 0; count < num_msg; ++count) { switch (msg[count]->msg_style) { case PAM_TEXT_INFO: crm_info("PAM: %s\n", msg[count]->msg); break; case PAM_PROMPT_ECHO_OFF: case PAM_PROMPT_ECHO_ON: reply[count].resp_retcode = 0; reply[count].resp = string; /* We already made a copy */ case PAM_ERROR_MSG: /* In theory we'd want to print this, but then * we see the password prompt in the logs */ /* crm_err("PAM error: %s\n", msg[count]->msg); */ break; default: crm_err("Unhandled conversation type: %d", msg[count]->msg_style); goto bail; } } *response = reply; reply = NULL; return PAM_SUCCESS; bail: for (count = 0; count < num_msg; ++count) { if (reply[count].resp != NULL) { switch (msg[count]->msg_style) { case PAM_PROMPT_ECHO_ON: case PAM_PROMPT_ECHO_OFF: /* Erase the data - it contained a password */ while (*(reply[count].resp)) { *(reply[count].resp)++ = '\0'; } free(reply[count].resp); break; } reply[count].resp = NULL; } } free(reply); reply = NULL; return PAM_CONV_ERR; } #endif int authenticate_user(const char *user, const char *passwd) { #ifndef HAVE_PAM gboolean pass = TRUE; #else int rc = 0; gboolean pass = FALSE; const void *p_user = NULL; struct pam_conv p_conv; struct pam_handle *pam_h = NULL; static const char *pam_name = NULL; if (pam_name == NULL) { pam_name = getenv("CIB_pam_service"); } if (pam_name == NULL) { pam_name = "login"; } p_conv.conv = construct_pam_passwd; p_conv.appdata_ptr = strdup(passwd); rc = pam_start(pam_name, user, &p_conv, &pam_h); if (rc != PAM_SUCCESS) { crm_err("Could not initialize PAM: %s (%d)", pam_strerror(pam_h, rc), rc); goto bail; } rc = pam_authenticate(pam_h, 0); if (rc != PAM_SUCCESS) { crm_err("Authentication failed for %s: %s (%d)", user, pam_strerror(pam_h, rc), rc); goto bail; } /* Make sure we authenticated the user we wanted to authenticate. * Since we also run as non-root, it might be worth pre-checking * the user has the same EID as us, since that the only user we * can authenticate. */ rc = pam_get_item(pam_h, PAM_USER, &p_user); if (rc != PAM_SUCCESS) { crm_err("Internal PAM error: %s (%d)", pam_strerror(pam_h, rc), rc); goto bail; } else if (p_user == NULL) { crm_err("Unknown user authenticated."); goto bail; } else if (safe_str_neq(p_user, user)) { crm_err("User mismatch: %s vs. %s.", (const char *)p_user, (const char *)user); goto bail; } rc = pam_acct_mgmt(pam_h, 0); if (rc != PAM_SUCCESS) { crm_err("Access denied: %s (%d)", pam_strerror(pam_h, rc), rc); goto bail; } pass = TRUE; bail: rc = pam_end(pam_h, rc); #endif return pass; } diff --git a/fencing/remote.c b/fencing/remote.c index 4e1124976a..7912d50642 100644 --- a/fencing/remote.c +++ b/fencing/remote.c @@ -1,1226 +1,1250 @@ /* * Copyright (C) 2009 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TIMEOUT_MULTIPLY_FACTOR 1.2 typedef struct st_query_result_s { char *host; int devices; /* only try peers for non-topology based operations once */ gboolean tried; GListPtr device_list; GHashTable *custom_action_timeouts; /* Subset of devices that peer has verified connectivity on */ GHashTable *verified_devices; } st_query_result_t; GHashTable *remote_op_list = NULL; void call_remote_stonith(remote_fencing_op_t *op, st_query_result_t *peer); static void remote_op_done(remote_fencing_op_t *op, xmlNode *data, int rc, int dup); extern xmlNode *stonith_create_op( int call_id, const char *token, const char *op, xmlNode *data, int call_options); static void report_timeout_period(remote_fencing_op_t *op, int op_timeout); static int get_op_total_timeout(remote_fencing_op_t *op, st_query_result_t *chosen_peer, int default_timeout); static void free_remote_query(gpointer data) { if(data) { st_query_result_t *query = data; crm_trace("Free'ing query result from %s", query->host); free(query->host); g_hash_table_destroy(query->custom_action_timeouts); g_hash_table_destroy(query->verified_devices); free(query); } } static void clear_remote_op_timers(remote_fencing_op_t *op) { if(op->query_timer) { g_source_remove(op->query_timer); op->query_timer = 0; } if(op->op_timer_total) { g_source_remove(op->op_timer_total); op->op_timer_total = 0; } if(op->op_timer_one) { g_source_remove(op->op_timer_one); op->op_timer_one = 0; } } static void free_remote_op(gpointer data) { remote_fencing_op_t *op = data; crm_trace("Free'ing op %s for %s", op->id, op->target); crm_log_xml_debug(op->request, "Destroying"); clear_remote_op_timers(op); free(op->id); free(op->action); free(op->target); free(op->client_id); free(op->client_name); free(op->originator); if(op->query_results) { g_list_free_full(op->query_results, free_remote_query); } if(op->request) { free_xml(op->request); op->request = NULL; } free(op); } static xmlNode * create_op_done_notify(remote_fencing_op_t *op, int rc) { xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE); crm_xml_add_int(notify_data, "state", op->state); crm_xml_add_int(notify_data, F_STONITH_RC, rc); crm_xml_add(notify_data, F_STONITH_TARGET, op->target); crm_xml_add(notify_data, F_STONITH_ACTION, op->action); crm_xml_add(notify_data, F_STONITH_DELEGATE, op->delegate); crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, op->id); crm_xml_add(notify_data, F_STONITH_ORIGIN, op->originator); crm_xml_add(notify_data, F_STONITH_CLIENTID, op->client_id); crm_xml_add(notify_data, F_STONITH_CLIENTNAME,op->client_name); return notify_data; } static void bcast_result_to_peers(remote_fencing_op_t *op, int rc) { static int count = 0; xmlNode *bcast = create_xml_node(NULL, T_STONITH_REPLY); xmlNode *notify_data = create_op_done_notify(op, rc); count++; crm_trace("Broadcasting result to peers"); crm_xml_add(bcast, F_TYPE, T_STONITH_NOTIFY); crm_xml_add(bcast, F_SUBTYPE, "broadcast"); crm_xml_add(bcast, F_STONITH_OPERATION, T_STONITH_NOTIFY); crm_xml_add_int(bcast, "count", count); add_message_xml(bcast, F_STONITH_CALLDATA, notify_data); send_cluster_message(NULL, crm_msg_stonith_ng, bcast, FALSE); free_xml(notify_data); free_xml(bcast); return; } static void handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data, int rc) { xmlNode *notify_data = NULL; xmlNode *reply = NULL; if (op->notify_sent == TRUE) { /* nothing to do */ return; } /* Do notification with a clean data object */ notify_data = create_op_done_notify(op, rc); crm_xml_add_int(data, "state", op->state); crm_xml_add(data, F_STONITH_TARGET, op->target); crm_xml_add(data, F_STONITH_OPERATION, op->action); reply = stonith_construct_reply(op->request, NULL, data, rc); crm_xml_add(reply, F_STONITH_DELEGATE, op->delegate); /* Send fencing OP reply to local client that initiated fencing */ do_local_reply(reply, op->client_id, op->call_options & st_opt_sync_call, FALSE); /* bcast to all local clients that the fencing operation happend */ do_stonith_notify(0, T_STONITH_NOTIFY_FENCE, rc, notify_data); /* mark this op as having notify's already sent */ op->notify_sent = TRUE; free_xml(reply); free_xml(notify_data); } static void handle_duplicates(remote_fencing_op_t *op, xmlNode *data, int rc) { GListPtr iter = NULL; for(iter = op->duplicates; iter != NULL; iter = iter->next) { remote_fencing_op_t *other = iter->data; if(other->state == st_duplicate) { /* Ie. it hasn't timed out already */ other->state = op->state; crm_debug("Peforming duplicate notification for %s@%s.%.8s = %s", other->client_name, other->originator, other->id, pcmk_strerror(rc)); remote_op_done(other, data, rc, TRUE); } else { crm_err("Skipping duplicate notification for %s@%s - %d", other->client_name, other->originator, other->state); } } } /*! * \internal * \brief Finalize a remote operation. * * \description This function has two code paths. * * Path 1. This node is the owner of the operation and needs * to notify the cpg group via a broadcast as to the operation's * results. * * Path 2. The cpg broadcast is received. All nodes notify their local * stonith clients the operation results. * * So, The owner of the operation first notifies the cluster of the result, * and once that cpg notify is received back it notifies all the local clients. * * Nodes that are passive watchers of the operation will receive the * broadcast and only need to notify their local clients the operation finished. * * \param op, The fencing operation to finalize * \param data, The xml msg reply (if present) of the last delegated fencing * operation. * \param dup, Is this operation a duplicate, if so treat it a little differently * making sure the broadcast is not sent out. */ static void remote_op_done(remote_fencing_op_t *op, xmlNode *data, int rc, int dup) { int level = LOG_ERR; const char *subt = NULL; xmlNode *local_data = NULL; op->completed = time(NULL); clear_remote_op_timers(op); if(op->notify_sent == TRUE) { crm_err("Already sent notifications for '%s of %s by %s' (for=%s@%s.%.8s, state=%d): %s", op->action, op->target, op->delegate?op->delegate:"", op->client_name, op->originator, op->id, op->state, pcmk_strerror(rc)); goto remote_op_done_cleanup; } if(!op->delegate && data) { op->delegate = crm_element_value_copy(data, F_ORIG); } if(data == NULL) { data = create_xml_node(NULL, "remote-op"); local_data = data; } /* Tell everyone the operation is done, we will continue * with doing the local notifications once we receive * the broadcast back. */ subt = crm_element_value(data, F_SUBTYPE); if(dup == FALSE && safe_str_neq(subt, "broadcast")) { /* Defer notification until the bcast message arrives */ bcast_result_to_peers(op, rc); goto remote_op_done_cleanup; } if(rc == pcmk_ok || dup) { level = LOG_NOTICE; } else if(safe_str_neq(op->originator, stonith_our_uname)) { level = LOG_NOTICE; } do_crm_log(level, "Operation %s of %s by %s for %s@%s.%.8s: %s", op->action, op->target, op->delegate?op->delegate:"", op->client_name, op->originator, op->id, pcmk_strerror(rc)); handle_local_reply_and_notify(op, data, rc); if (dup == FALSE) { handle_duplicates(op, data, rc); } /* Free non-essential parts of the record * Keep the record around so we can query the history */ if(op->query_results) { g_list_free_full(op->query_results, free_remote_query); op->query_results = NULL; } if(op->request) { free_xml(op->request); op->request = NULL; } remote_op_done_cleanup: free_xml(local_data); } static gboolean remote_op_timeout_one(gpointer userdata) { remote_fencing_op_t *op = userdata; op->op_timer_one = 0; crm_notice("Remote %s operation on %s for %s.%8s timed out", op->action, op->target, op->client_name, op->id); call_remote_stonith(op, NULL); return FALSE; } static gboolean remote_op_timeout(gpointer userdata) { remote_fencing_op_t *op = userdata; op->op_timer_total = 0; if(op->state == st_done) { crm_debug("Action %s (%s) for %s (%s) already completed", op->action, op->id, op->target, op->client_name); return FALSE; } crm_debug("Action %s (%s) for %s (%s) timed out", op->action, op->id, op->target, op->client_name); op->state = st_failed; remote_op_done(op, NULL, -ETIME, FALSE); return FALSE; } static gboolean remote_op_query_timeout(gpointer data) { remote_fencing_op_t *op = data; op->query_timer = 0; if(op->state == st_done) { crm_debug("Operation %s for %s already completed", op->id, op->target); } else if(op->state == st_exec) { crm_debug("Operation %s for %s already in progress", op->id, op->target); } else if(op->query_results) { crm_debug("Query %s for %s complete: %d", op->id, op->target, op->state); call_remote_stonith(op, NULL); } else { crm_debug("Query %s for %s timed out: %d", op->id, op->target, op->state); if(op->op_timer_total) { g_source_remove(op->op_timer_total); op->op_timer_total = 0; } remote_op_timeout(op); } return FALSE; } static int stonith_topology_next(remote_fencing_op_t *op) { stonith_topology_t *tp = NULL; if(op->target) { /* Queries don't have a target set */ tp = g_hash_table_lookup(topology, op->target); } if(tp == NULL) { return pcmk_ok; } set_bit(op->call_options, st_opt_topology); do { op->level++; } while(op->level < ST_LEVEL_MAX && tp->levels[op->level] == NULL); if(op->level < ST_LEVEL_MAX) { crm_trace("Attempting fencing level %d for %s (%d devices) - %s@%s.%.8s", op->level, op->target, g_list_length(tp->levels[op->level]), op->client_name, op->originator, op->id); op->devices = tp->levels[op->level]; return pcmk_ok; } crm_notice("All fencing options to fence %s for %s@%s.%.8s failed", op->target, op->client_name, op->originator, op->id); return -EINVAL; } /*! * \brief Check to see if this operation is a duplicate of another in flight * operation. If so merge this operation into the inflight operation, and mark * it as a duplicate. */ static void merge_duplicates(remote_fencing_op_t *op) { GHashTableIter iter; remote_fencing_op_t *other = NULL; g_hash_table_iter_init(&iter, remote_op_list); while(g_hash_table_iter_next(&iter, NULL, (void**)&other)) { if(other->state > st_exec) { /* Must be in-progress */ continue; } else if (safe_str_neq(op->target, other->target)) { /* Must be for the same node */ continue; } else if(safe_str_neq(op->action, other->action)) { crm_trace("Must be for the same action: %s vs. ", op->action, other->action); continue; } else if(safe_str_eq(op->client_name, other->client_name)) { crm_trace("Must be for different clients: %s", op->client_name); continue; } else if(safe_str_eq(other->target, other->originator)) { crm_trace("Can't be a suicide operation: %s", other->target); continue; } /* There is another in-flight request to fence the same host * Piggyback on that instead. If it fails, so do we. */ other->duplicates = g_list_append(other->duplicates, op); if(other->total_timeout == 0) { crm_trace("Making a best-guess as to the timeout used"); other->total_timeout = op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, NULL, op->base_timeout); } crm_notice("Merging stonith action %s for node %s originating from client %s.%.8s with identical request from %s@%s.%.8s (%ds)", op->action, op->target, op->client_name, op->id, other->client_name, other->originator, other->id, other->total_timeout); report_timeout_period(op, other->total_timeout); op->state = st_duplicate; } } /*! * \internal * \brief Create a new remote stonith op * \param client, he local stonith client id that initaited the operation * \param request, The request from the client that started the operation * \param peer, Is this operation owned by another stonith peer? Operations * owned by other peers are stored on all the stonith nodes, but only the * owner executes the operation. All the nodes get the results to the operation * once the owner finishes executing it. */ void *create_remote_stonith_op(const char *client, xmlNode *request, gboolean peer) { remote_fencing_op_t *op = NULL; xmlNode *dev = get_xpath_object("//@"F_STONITH_TARGET, request, LOG_TRACE); if(remote_op_list == NULL) { remote_op_list = g_hash_table_new_full( crm_str_hash, g_str_equal, NULL, free_remote_op); } /* If this operation is owned by another node, check to make * sure we haven't already created this operation. */ if(peer && dev) { const char *op_id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID); CRM_CHECK(op_id != NULL, return NULL); op = g_hash_table_lookup(remote_op_list, op_id); if(op) { crm_debug("%s already exists", op_id); return op; } } op = calloc(1, sizeof(remote_fencing_op_t)); crm_element_value_int(request, F_STONITH_TIMEOUT, (int*)&(op->base_timeout)); if(peer && dev) { op->id = crm_element_value_copy(dev, F_STONITH_REMOTE_OP_ID); } else { op->id = crm_generate_uuid(); } g_hash_table_replace(remote_op_list, op->id, op); CRM_LOG_ASSERT(g_hash_table_lookup(remote_op_list, op->id) != NULL); op->state = st_query; op->action = crm_element_value_copy(dev, F_STONITH_ACTION); op->originator = crm_element_value_copy(dev, F_STONITH_ORIGIN); if(op->originator == NULL) { /* Local or relayed request */ op->originator = strdup(stonith_our_uname); } if(client) { op->client_id = strdup(client); } op->client_name = crm_element_value_copy(request, F_STONITH_CLIENTNAME); op->target = crm_element_value_copy(dev, F_STONITH_TARGET); op->request = copy_xml(request); /* TODO: Figure out how to avoid this */ crm_element_value_int(request, F_STONITH_CALLOPTS, (int*)&(op->call_options)); crm_trace("%s new stonith op: %s - %s of %s for %s", (peer && dev)?"Recorded":"Generated", op->id, op->action, op->target, op->client_name); if(op->call_options & st_opt_cs_nodeid) { int nodeid = crm_atoi(op->target, NULL); crm_node_t *node = crm_get_peer(nodeid, NULL); /* Ensure the conversion only happens once */ op->call_options &= ~st_opt_cs_nodeid; if(node && node->uname) { free(op->target); op->target = strdup(node->uname); } else { crm_warn("Could not expand nodeid '%s' into a host name (%p)", op->target, node); } } /* check to see if this is a duplicate operation of another in-flight operation */ merge_duplicates(op); return op; } remote_fencing_op_t *initiate_remote_stonith_op(crm_client_t *client, xmlNode *request, gboolean manual_ack) { xmlNode *query = NULL; const char *client_id = NULL; remote_fencing_op_t *op = NULL; if(client) { client_id = client->id; } else { client_id = crm_element_value(request, F_STONITH_CLIENTID); } CRM_LOG_ASSERT(client_id != NULL); op = create_remote_stonith_op(client_id, request, FALSE); op->owner = TRUE; CRM_CHECK(op->action, return NULL); if(stonith_topology_next(op) != pcmk_ok) { op->state = st_failed; } switch(op->state) { case st_failed: crm_warn("Initiation of remote operation %s for %s: failed (%s)", op->action, op->target, op->id); remote_op_done(op, NULL, -EINVAL, FALSE); return op; case st_duplicate: crm_info("Initiating remote operation %s for %s: %s (duplicate)", op->action, op->target, op->id); return op; default: crm_notice("Initiating remote operation %s for %s: %s (%d)", op->action, op->target, op->id, op->state); } query = stonith_create_op(0, op->id, STONITH_OP_QUERY, NULL, 0); if(!manual_ack) { int query_timeout = op->base_timeout * TIMEOUT_MULTIPLY_FACTOR; op->query_timer = g_timeout_add((1000 * query_timeout), remote_op_query_timeout, op); } else { crm_xml_add(query, F_STONITH_DEVICE, "manual_ack"); } crm_xml_add(query, F_STONITH_REMOTE_OP_ID, op->id); crm_xml_add(query, F_STONITH_TARGET, op->target); crm_xml_add(query, F_STONITH_ACTION, op->action); crm_xml_add(query, F_STONITH_ORIGIN, op->originator); crm_xml_add(query, F_STONITH_CLIENTID, op->client_id); crm_xml_add(query, F_STONITH_CLIENTNAME, op->client_name); crm_xml_add_int(query, F_STONITH_TIMEOUT, op->base_timeout); send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE); free_xml(query); return op; } static gint sort_strings(gconstpointer a, gconstpointer b) { return strcmp(a, b); } enum find_best_peer_options { /*! Skip checking the target peer for capable fencing devices */ FIND_PEER_SKIP_TARGET = 0x0001, /*! Only check the target peer for capable fencing devices */ FIND_PEER_TARGET_ONLY = 0x0002, /*! Skip peers and devices that are not verified */ FIND_PEER_VERIFIED_ONLY = 0x0004, }; static st_query_result_t * find_best_peer(const char *device, remote_fencing_op_t *op, enum find_best_peer_options options) { GListPtr iter = NULL; gboolean verified_devices_only = (options & FIND_PEER_VERIFIED_ONLY) ? TRUE : FALSE; if (!device && is_set(op->call_options, st_opt_topology)) { return NULL; } for(iter = op->query_results; iter != NULL; iter = iter->next) { st_query_result_t *peer = iter->data; if ((options & FIND_PEER_SKIP_TARGET) && safe_str_eq(peer->host, op->target)) { continue; } if ((options & FIND_PEER_TARGET_ONLY) && safe_str_neq(peer->host, op->target)) { continue; } if(is_set(op->call_options, st_opt_topology)) { /* Do they have the next device of the current fencing level? */ GListPtr match = NULL; if (verified_devices_only && !g_hash_table_lookup(peer->verified_devices, device)) { continue; } match = g_list_find_custom(peer->device_list, device, sort_strings); if(match) { crm_trace("Removing %s from %s (%d remaining)", (char*)match->data, peer->host, g_list_length(peer->device_list)); peer->device_list = g_list_remove(peer->device_list, match->data); return peer; } } else if(peer->devices > 0 && peer->tried == FALSE) { if (verified_devices_only && !g_hash_table_size(peer->verified_devices)) { continue; } /* No topology: Use the current best peer */ crm_trace("Simple fencing"); return peer; } } return NULL; } static st_query_result_t *stonith_choose_peer(remote_fencing_op_t *op) { st_query_result_t *peer = NULL; const char *device = NULL; do { if(op->devices) { device = op->devices->data; crm_trace("Checking for someone to fence %s with %s", op->target, (char*)op->devices->data); } else { crm_trace("Checking for someone to fence %s", op->target); } if ((peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET | FIND_PEER_VERIFIED_ONLY))) { return peer; } else if ((peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET))) { return peer; } else if ((peer = find_best_peer(device, op, FIND_PEER_TARGET_ONLY))) { return peer; } /* Try the next fencing level if there is one */ } while(is_set(op->call_options, st_opt_topology) && stonith_topology_next(op) == pcmk_ok); if(op->devices) { crm_debug("Couldn't find anyone to fence %s with %s", op->target, (char*)op->devices->data); } else { crm_debug("Couldn't find anyone to fence %s", op->target); } return NULL; } static int get_device_timeout(st_query_result_t *peer, const char *device, int default_timeout) { gpointer res; if (!peer || !device) { return default_timeout; } res = g_hash_table_lookup(peer->custom_action_timeouts, device); return res ? GPOINTER_TO_INT(res) : default_timeout; } +static int +get_peer_timeout(st_query_result_t *peer, int default_timeout) +{ + int total_timeout = 0; + + GListPtr cur = NULL; + for (cur = peer->device_list; cur; cur = cur->next) { + total_timeout += get_device_timeout(peer, cur->data, default_timeout); + } + + return total_timeout ? total_timeout : default_timeout; +} + static int get_op_total_timeout(remote_fencing_op_t *op, st_query_result_t *chosen_peer, int default_timeout) { stonith_topology_t *tp = g_hash_table_lookup(topology, op->target); int total_timeout = 0; if (is_set(op->call_options, st_opt_topology) && tp) { int i; GListPtr device_list = NULL; GListPtr iter = NULL; /* Yep, this looks scary, nested loops all over the place. * Here is what is going on. * Loop1: Iterate through fencing levels. * Loop2: If a fencing level has devices, loop through each device * Loop3: For each device in a fencing level, see what peer owns it * and what that peer has reported the timeout is for the device. */ for (i = 0; i < ST_LEVEL_MAX; i++) { if (!tp->levels[i]) { continue; } for (device_list = tp->levels[i]; device_list; device_list = device_list->next) { for(iter = op->query_results; iter != NULL; iter = iter->next) { st_query_result_t *peer = iter->data; if (g_list_find_custom(peer->device_list, device_list->data, sort_strings)) { total_timeout += get_device_timeout(peer, device_list->data, default_timeout); break; } } /* End Loop3: match device with peer that owns device, find device's timeout period */ } /* End Loop2: iterate through devices at a specific level */ } /*End Loop1: iterate through fencing levels */ } else if (chosen_peer) { - GListPtr cur = NULL; - for (cur = chosen_peer->device_list; cur; cur = cur->next) { - total_timeout += get_device_timeout(chosen_peer, cur->data, default_timeout); - } + total_timeout = get_peer_timeout(chosen_peer, default_timeout); } else { total_timeout = default_timeout; } return total_timeout ? total_timeout : default_timeout; } static void report_timeout_period(remote_fencing_op_t *op, int op_timeout) { GListPtr iter = NULL; xmlNode *update = NULL; const char *client_node = NULL; const char *client_id = NULL; const char *call_id = NULL; if (op->call_options & st_opt_sync_call) { /* There is no reason to report the timeout for a syncronous call. It * is impossible to use the reported timeout to do anything when the client * is blocking for the response. This update is only important for * async calls that require a callback to report the results in. */ return; } else if (!op->request) { return; } crm_trace("Reporting timeout for %s.%.8s", op->client_name, op->id); client_node = crm_element_value(op->request, F_STONITH_CLIENTNODE); call_id = crm_element_value(op->request, F_STONITH_CALLID); client_id = crm_element_value(op->request, F_STONITH_CLIENTID); if (!client_node || !call_id || !client_id) { return; } if (safe_str_eq(client_node, stonith_our_uname)) { /* The client is connected to this node, send the update direclty to them */ do_stonith_async_timeout_update(client_id, call_id, op_timeout); return; } /* The client is connected to another node, relay this update to them */ update = stonith_create_op(0, op->id, STONITH_OP_TIMEOUT_UPDATE, NULL, 0); crm_xml_add(update, F_STONITH_REMOTE_OP_ID, op->id); crm_xml_add(update, F_STONITH_CLIENTID, client_id); crm_xml_add(update, F_STONITH_CALLID, call_id); crm_xml_add_int(update, F_STONITH_TIMEOUT, op_timeout); send_cluster_message(crm_get_peer(0, client_node), crm_msg_stonith_ng, update, FALSE); free_xml(update); for(iter = op->duplicates; iter != NULL; iter = iter->next) { remote_fencing_op_t *dup = iter->data; crm_trace("Reporting timeout for duplicate %s.%.8s", dup->client_name, dup->id); report_timeout_period(iter->data, op_timeout); } } void call_remote_stonith(remote_fencing_op_t *op, st_query_result_t *peer) { const char *device = NULL; int timeout = op->base_timeout; crm_trace("State for %s.%.8s: %d", op->target, op->client_name, op->id, op->state); if(peer == NULL && !is_set(op->call_options, st_opt_topology)) { peer = stonith_choose_peer(op); } if(!op->op_timer_total) { - int t = get_op_total_timeout(op, peer, op->base_timeout); - op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * t; + int total_timeout = get_op_total_timeout(op, peer, op->base_timeout); + op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * total_timeout; op->op_timer_total = g_timeout_add(1000 * op->total_timeout, remote_op_timeout, op); report_timeout_period(op, op->total_timeout); crm_info("Total remote op timeout set to %d for fencing of node %s for %s.%.8s", - t, op->target, op->client_name, op->id); + total_timeout, op->target, op->client_name, op->id); } if(is_set(op->call_options, st_opt_topology) && op->devices) { /* Ignore any preference, they might not have the device we need */ /* When using topology, the stonith_choose_peer function pops off * the peer from the op's query results. Make sure to calculate * the op_timeout before calling this function when topology is in use */ peer = stonith_choose_peer(op); device = op->devices->data; timeout = get_device_timeout(peer, device, op->base_timeout); } if(peer) { - int t = TIMEOUT_MULTIPLY_FACTOR * get_device_timeout(peer, device, op->base_timeout); + int timeout_one = 0; xmlNode *query = stonith_create_op(0, op->id, STONITH_OP_FENCE, NULL, 0); crm_xml_add(query, F_STONITH_REMOTE_OP_ID, op->id); crm_xml_add(query, F_STONITH_TARGET, op->target); crm_xml_add(query, F_STONITH_ACTION, op->action); crm_xml_add(query, F_STONITH_ORIGIN, op->originator); crm_xml_add(query, F_STONITH_CLIENTID, op->client_id); crm_xml_add(query, F_STONITH_CLIENTNAME, op->client_name); crm_xml_add_int(query, F_STONITH_TIMEOUT, timeout); if(device) { + timeout_one = TIMEOUT_MULTIPLY_FACTOR * get_device_timeout(peer, device, op->base_timeout); crm_info("Requesting that %s perform op %s %s with %s for %s (%ds)", - peer->host, op->action, op->target, device, op->client_name, t); + peer->host, op->action, op->target, device, op->client_name, timeout_one); crm_xml_add(query, F_STONITH_DEVICE, device); crm_xml_add(query, F_STONITH_MODE, "slave"); } else { + timeout_one = TIMEOUT_MULTIPLY_FACTOR * get_peer_timeout(peer, op->base_timeout); crm_info("Requesting that %s perform op %s %s for %s (%ds)", - peer->host, op->action, op->target, op->client_name, t); + peer->host, op->action, op->target, op->client_name, timeout_one); crm_xml_add(query, F_STONITH_MODE, "smart"); } op->state = st_exec; if(op->op_timer_one) { g_source_remove(op->op_timer_one); } - op->op_timer_one = g_timeout_add((1000 * t), remote_op_timeout_one, op); + op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op); send_cluster_message(crm_get_peer(0, peer->host), crm_msg_stonith_ng, query, FALSE); peer->tried = TRUE; free_xml(query); return; } else if(op->owner == FALSE) { crm_err("The termination of %s for %s is not ours to control", op->target, op->client_name); } else if(op->query_timer == 0) { /* We've exhausted all available peers */ crm_info("No remaining peers capable of terminating %s for %s (%d)", op->target, op->client_name, op->state); CRM_LOG_ASSERT(op->state < st_done); remote_op_timeout(op); } else if(device) { crm_info("Waiting for additional peers capable of terminating %s with %s for %s.%.8s", op->target, device, op->client_name, op->id); } else { crm_info("Waiting for additional peers capable of terminating %s for %s%.8s", op->target, op->client_name, op->id); } } static gint sort_peers(gconstpointer a, gconstpointer b) { const st_query_result_t *peer_a = a; const st_query_result_t *peer_b = a; if(peer_a->devices > peer_b->devices) { return -1; } else if(peer_a->devices > peer_b->devices) { return 1; } return 0; } /*! * \internal * \brief Determine if all the devices in the topology are found or not */ static gboolean all_topology_devices_found(remote_fencing_op_t *op) { GListPtr device = NULL; GListPtr iter = NULL; GListPtr match = NULL; stonith_topology_t *tp = NULL; gboolean skip_target = FALSE; int i; tp = g_hash_table_lookup(topology, op->target); if (!tp) { return FALSE; } if (safe_str_eq(op->action, "off") || safe_str_eq(op->action, "reboot")) { /* Don't count the devices on the target node if we are killing * the target node. */ skip_target = TRUE; } for (i = 0; i < ST_LEVEL_MAX; i++) { for (device = tp->levels[i]; device; device = device->next) { match = FALSE; for(iter = op->query_results; iter != NULL; iter = iter->next) { st_query_result_t *peer = iter->data; if (skip_target && safe_str_eq(peer->host, op->target)) { continue; } match = g_list_find_custom(peer->device_list, device->data, sort_strings); } if (!match) { return FALSE; } } } return TRUE; } int process_remote_stonith_query(xmlNode *msg) { int devices = 0; + gboolean host_is_target = FALSE; const char *id = NULL; const char *host = NULL; remote_fencing_op_t *op = NULL; st_query_result_t *result = NULL; xmlNode *dev = get_xpath_object("//@"F_STONITH_REMOTE_OP_ID, msg, LOG_ERR); xmlNode *child = NULL; CRM_CHECK(dev != NULL, return -EPROTO); id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID); CRM_CHECK(id != NULL, return -EPROTO); dev = get_xpath_object("//@st-available-devices", msg, LOG_ERR); CRM_CHECK(dev != NULL, return -EPROTO); crm_element_value_int(dev, "st-available-devices", &devices); op = g_hash_table_lookup(remote_op_list, id); if(op == NULL) { crm_debug("Unknown or expired remote op: %s", id); return -EOPNOTSUPP; } op->replies++; host = crm_element_value(msg, F_ORIG); + host_is_target = safe_str_eq(host, op->target); if(devices <= 0) { /* If we're doing 'known' then we might need to fire anyway */ crm_trace("Query result from %s (%d devices)", host, devices); return pcmk_ok; - } else if(op->call_options & st_opt_allow_suicide) { - crm_trace("Allowing %s to potentialy fence itself", op->target); - - } else if(safe_str_eq(host, op->target)) { - crm_info("Ignoring reply from %s, hosts are not permitted to commit suicide", op->target); - return pcmk_ok; + } else if (host_is_target) { + if(op->call_options & st_opt_allow_suicide) { + crm_trace("Allowing %s to potentialy fence itself", op->target); + } else { + crm_info("Ignoring reply from %s, hosts are not permitted to commit suicide", op->target); + return pcmk_ok; + } } crm_debug("Query result from %s (%d devices)", host, devices); result = calloc(1, sizeof(st_query_result_t)); result->host = strdup(host); result->devices = devices; result->custom_action_timeouts = g_hash_table_new_full( crm_str_hash, g_str_equal, free, NULL); result->verified_devices = g_hash_table_new_full( crm_str_hash, g_str_equal, free, NULL); for (child = __xml_first_child(dev); child != NULL; child = __xml_next(child)) { const char *device = ID(child); int action_timeout = 0; int verified = 0; if(device) { result->device_list = g_list_prepend(result->device_list, strdup(device)); crm_element_value_int(child, F_STONITH_ACTION_TIMEOUT, &action_timeout); crm_element_value_int(child, F_STONITH_DEVICE_VERIFIED, &verified); if (action_timeout) { crm_trace("Peer %s with device %s returned action timeout %d", result->host, device, action_timeout); g_hash_table_insert(result->custom_action_timeouts, strdup(device), GINT_TO_POINTER(action_timeout)); } if (verified) { crm_trace("Peer %s has confirmed a verified device %s", result->host, device); g_hash_table_insert(result->verified_devices, strdup(device), GINT_TO_POINTER(verified)); } } } CRM_CHECK(devices == g_list_length(result->device_list), crm_err("Mis-match: Query claimed to have %d devices but %d found", devices, g_list_length(result->device_list))); op->query_results = g_list_insert_sorted(op->query_results, result, sort_peers); /* All the query results are in for the topology, start the fencing ops. */ if(is_set(op->call_options, st_opt_topology)) { /* If we start the fencing before all the topology results are in, * it is possible fencing levels will be skipped because of the missing * query results. */ if (op->state == st_query && all_topology_devices_found(op)) { call_remote_stonith(op, result); } - /* We have a result for a non-topology fencing op, start fencing */ } else if(op->state == st_query) { - call_remote_stonith(op, result); + /* We have a result for a non-topology fencing op that looks promising, + * go ahead and start fencing before query timeout */ + if (host_is_target == FALSE && g_hash_table_size(result->verified_devices)) { + /* we have a verified device living on a peer that is not the target */ + call_remote_stonith(op, result); + } else if (safe_str_eq(op->action, "on")) { + /* unfencing. */ + call_remote_stonith(op, result); + } else { + crm_trace("Waiting for more peer results before launching fencing operation"); + } } else if(op->state == st_done) { crm_info("Discarding query result from %s (%d devices): Operation is in state %d", result->host, result->devices, op->state); } return pcmk_ok; } int process_remote_stonith_exec(xmlNode *msg) { int rc = 0; const char *id = NULL; const char *device = NULL; remote_fencing_op_t *op = NULL; xmlNode *dev = get_xpath_object("//@"F_STONITH_REMOTE_OP_ID, msg, LOG_ERR); CRM_CHECK(dev != NULL, return -EPROTO); id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID); CRM_CHECK(id != NULL, return -EPROTO); dev = get_xpath_object("//@"F_STONITH_RC, msg, LOG_ERR); CRM_CHECK(dev != NULL, return -EPROTO); crm_element_value_int(dev, F_STONITH_RC, &rc); device = crm_element_value(dev, F_STONITH_DEVICE); if(remote_op_list) { op = g_hash_table_lookup(remote_op_list, id); } if(op == NULL && rc == pcmk_ok) { /* Record successful fencing operations */ const char *client_id = crm_element_value(msg, F_STONITH_CLIENTID); op = create_remote_stonith_op(client_id, msg, TRUE); } if(op == NULL) { /* Could be for an event that began before we started */ /* TODO: Record the op for later querying */ crm_info("Unknown or expired remote op: %s", id); return -EOPNOTSUPP; } if (op->devices && device && safe_str_neq(op->devices->data, device)) { crm_err("Received outdated reply for device %s to %s node %s. Operation already timed out at remote level.", device, op->action, op->target); return rc; } if(safe_str_eq(crm_element_value(msg, F_SUBTYPE), "broadcast")) { crm_debug("Marking call to %s for %s on behalf of %s@%s.%.8s: %s (%d)", op->action, op->target, op->client_name, op->id, op->originator, rc == pcmk_ok?"passed":"failed", rc); if(rc == pcmk_ok) { op->state = st_done; } else { op->state = st_failed; } remote_op_done(op, msg, rc, FALSE); return pcmk_ok; } else if(safe_str_neq(op->originator, stonith_our_uname)) { /* If this isn't a remote level broadcast, and we are not the * originator of the operation, we should not be receiving this msg. */ crm_err("%s received non-broadcast fencing result for operation it does not own (device %s targeting %s)", stonith_our_uname, device, op->target); return rc; } if(is_set(op->call_options, st_opt_topology)) { const char *device = crm_element_value(msg, F_STONITH_DEVICE); crm_notice("Call to %s for %s on behalf of %s@%s: %s (%d)", device, op->target, op->client_name, op->originator, rc == pcmk_ok?"passed":"failed", rc); /* We own the op, and it is complete. broadcast the result to all nodes * and notify our local clients. */ if(op->state == st_done) { remote_op_done(op, msg, rc, FALSE); return rc; } /* An operation completed succesfully but has not yet been marked as done. * Continue the topology if more devices exist at the current level, otherwise * mark as done. */ if(rc == pcmk_ok) { if (op->devices) { /* Success, are there any more? */ op->devices = op->devices->next; } /* if no more devices at this fencing level, we are done, * else we need to contine with executing the next device in the list */ if(op->devices == NULL) { crm_trace("Marking complex fencing op for %s as complete", op->target); op->state = st_done; remote_op_done(op, msg, rc, FALSE); return rc; } } else { /* This device failed, time to try another topology level. If no other * levels are available, mark this operation as failed and report results. */ if (stonith_topology_next(op) != pcmk_ok) { op->state = st_failed; remote_op_done(op, msg, rc, FALSE); return rc; } } } else if(rc == pcmk_ok && op->devices == NULL) { crm_trace("All done for %s", op->target); op->state = st_done; remote_op_done(op, msg, rc, FALSE); return rc; } /* Retry on failure or execute the rest of the topology */ crm_trace("Next for %s on behalf of %s@%s (rc was %d)", op->target, op->originator, op->client_name, rc); call_remote_stonith(op, NULL); return rc; } int stonith_fence_history(xmlNode *msg, xmlNode **output) { int rc = 0; const char *target = NULL; xmlNode *dev = get_xpath_object("//@"F_STONITH_TARGET, msg, LOG_TRACE); if(dev) { int options = 0; target = crm_element_value(dev, F_STONITH_TARGET); crm_element_value_int(msg, F_STONITH_CALLOPTS, &options); if(target && (options & st_opt_cs_nodeid)) { int nodeid = crm_atoi(target, NULL); crm_node_t *node = crm_get_peer(nodeid, NULL); if(node) { target = node->uname; } } } *output = create_xml_node(NULL, F_STONITH_HISTORY_LIST); if (remote_op_list) { GHashTableIter iter; remote_fencing_op_t *op = NULL; g_hash_table_iter_init(&iter, remote_op_list); while(g_hash_table_iter_next(&iter, NULL, (void**)&op)) { xmlNode *entry = NULL; if (target && strcmp(op->target, target) != 0) { continue; } rc = 0; entry = create_xml_node(*output, STONITH_OP_EXEC); crm_xml_add(entry, F_STONITH_TARGET, op->target); crm_xml_add(entry, F_STONITH_ACTION, op->action); crm_xml_add(entry, F_STONITH_ORIGIN, op->originator); crm_xml_add(entry, F_STONITH_DELEGATE, op->delegate); crm_xml_add_int(entry, F_STONITH_DATE, op->completed); crm_xml_add_int(entry, F_STONITH_STATE, op->state); } } return rc; } gboolean stonith_check_fence_tolerance(int tolerance, const char *target, const char *action) { GHashTableIter iter; time_t now = time(NULL); remote_fencing_op_t *rop = NULL; crm_trace("tolerance=%d, remote_op_list=%p", tolerance, remote_op_list); if (tolerance <= 0 || !remote_op_list || target == NULL || action == NULL) { return FALSE; } g_hash_table_iter_init(&iter, remote_op_list); while(g_hash_table_iter_next(&iter, NULL, (void**)&rop)) { if(strcmp(rop->target, target) != 0) { continue; } else if(rop->state != st_done) { continue; } else if(strcmp(rop->action, action) != 0) { continue; } else if((rop->completed + tolerance) < now) { continue; } crm_notice("Target %s was fenced (%s) less than %ds ago by %s on behalf of %s", target, action, tolerance, rop->delegate, rop->originator); return TRUE; } return FALSE; } diff --git a/lib/cib/cib_remote.c b/lib/cib/cib_remote.c index be6beecea3..a19da594b0 100644 --- a/lib/cib/cib_remote.c +++ b/lib/cib/cib_remote.c @@ -1,620 +1,619 @@ /* * Copyright (c) 2008 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_GNUTLS_GNUTLS_H # undef KEYFILE # include gnutls_anon_client_credentials anon_cred_c; #define DEFAULT_CLIENT_HANDSHAKE_TIMEOUT 5000 /* 5 seconds */ const int kx_prio[] = { GNUTLS_KX_ANON_DH, 0 }; static gboolean remote_gnutls_credentials_init = FALSE; #else typedef void gnutls_session; #endif #include #include #define DH_BITS 1024 typedef struct cib_remote_opaque_s { int flags; int socket; int port; char *server; char *user; char *passwd; gboolean encrypted; crm_remote_t command; crm_remote_t callback; } cib_remote_opaque_t; void cib_remote_connection_destroy(gpointer user_data); int cib_remote_callback_dispatch(gpointer user_data); int cib_remote_command_dispatch(gpointer user_data); int cib_remote_signon(cib_t * cib, const char *name, enum cib_conn_type type); int cib_remote_signoff(cib_t * cib); int cib_remote_free(cib_t * cib); int cib_remote_perform_op(cib_t * cib, const char *op, const char *host, const char *section, xmlNode * data, xmlNode ** output_data, int call_options, const char *name); static int cib_remote_inputfd(cib_t * cib) { cib_remote_opaque_t *private = cib->variant_opaque; return private->callback.tcp_socket; } static int cib_remote_set_connection_dnotify(cib_t * cib, void (*dnotify) (gpointer user_data)) { return -EPROTONOSUPPORT; } static int cib_remote_register_notification(cib_t * cib, const char *callback, int enabled) { xmlNode *notify_msg = create_xml_node(NULL, "cib_command"); cib_remote_opaque_t *private = cib->variant_opaque; crm_xml_add(notify_msg, F_CIB_OPERATION, T_CIB_NOTIFY); crm_xml_add(notify_msg, F_CIB_NOTIFY_TYPE, callback); crm_xml_add_int(notify_msg, F_CIB_NOTIFY_ACTIVATE, enabled); crm_remote_send(&private->callback, notify_msg); free_xml(notify_msg); return pcmk_ok; } cib_t * cib_remote_new(const char *server, const char *user, const char *passwd, int port, gboolean encrypted) { cib_remote_opaque_t *private = NULL; cib_t *cib = cib_new_variant(); private = calloc(1, sizeof(cib_remote_opaque_t)); cib->variant = cib_remote; cib->variant_opaque = private; if (server) { private->server = strdup(server); } if (user) { private->user = strdup(user); } if (passwd) { private->passwd = strdup(passwd); } private->port = port; private->encrypted = encrypted; /* assign variant specific ops */ cib->delegate_fn = cib_remote_perform_op; cib->cmds->signon = cib_remote_signon; cib->cmds->signoff = cib_remote_signoff; cib->cmds->free = cib_remote_free; cib->cmds->inputfd = cib_remote_inputfd; cib->cmds->register_notification = cib_remote_register_notification; cib->cmds->set_connection_dnotify = cib_remote_set_connection_dnotify; return cib; } static int cib_tls_close(cib_t * cib) { cib_remote_opaque_t *private = cib->variant_opaque; #ifdef HAVE_GNUTLS_GNUTLS_H if (private->encrypted) { if (private->command.tls_session) { gnutls_bye(*(private->command.tls_session), GNUTLS_SHUT_RDWR); gnutls_deinit(*(private->command.tls_session)); gnutls_free(private->command.tls_session); } if (private->callback.tls_session) { gnutls_bye(*(private->callback.tls_session), GNUTLS_SHUT_RDWR); gnutls_deinit(*(private->callback.tls_session)); gnutls_free(private->callback.tls_session); } private->command.tls_session = NULL; private->callback.tls_session = NULL; if (remote_gnutls_credentials_init) { gnutls_anon_free_client_credentials(anon_cred_c); gnutls_global_deinit(); remote_gnutls_credentials_init = FALSE; } } #endif if (private->command.tcp_socket) { shutdown(private->command.tcp_socket, SHUT_RDWR); /* no more receptions */ close(private->command.tcp_socket); } if (private->callback.tcp_socket) { shutdown(private->callback.tcp_socket, SHUT_RDWR); /* no more receptions */ close(private->callback.tcp_socket); } private->command.tcp_socket = 0; private->callback.tcp_socket = 0; free(private->command.buffer); free(private->callback.buffer); private->command.buffer = NULL; private->callback.buffer = NULL; return 0; } static int cib_tls_signon(cib_t * cib, crm_remote_t *connection, gboolean event_channel) { int sock; cib_remote_opaque_t *private = cib->variant_opaque; int rc = 0; int disconnected = 0; xmlNode *answer = NULL; xmlNode *login = NULL; static struct mainloop_fd_callbacks cib_fd_callbacks = { 0, }; cib_fd_callbacks.dispatch = event_channel ? cib_remote_callback_dispatch : cib_remote_command_dispatch; cib_fd_callbacks.destroy = cib_remote_connection_destroy; connection->tcp_socket = 0; +#ifdef HAVE_GNUTLS_GNUTLS_H connection->tls_session = NULL; - +#endif sock = crm_remote_tcp_connect(private->server, private->port); if (sock <= 0) { crm_perror(LOG_ERR, "remote tcp connection to %s:%d failed", private->server, private->port); } connection->tcp_socket = sock; if (private->encrypted) { /* initialize GnuTls lib */ #ifdef HAVE_GNUTLS_GNUTLS_H if (remote_gnutls_credentials_init == FALSE) { gnutls_global_init(); gnutls_anon_allocate_client_credentials(&anon_cred_c); remote_gnutls_credentials_init = TRUE; } /* bind the socket to GnuTls lib */ connection->tls_session = crm_create_anon_tls_session(sock, GNUTLS_CLIENT, anon_cred_c); if (crm_initiate_client_tls_handshake(connection, DEFAULT_CLIENT_HANDSHAKE_TIMEOUT) != 0) { crm_err("Session creation for %s:%d failed", private->server, private->port); gnutls_deinit(*connection->tls_session); gnutls_free(connection->tls_session); connection->tls_session = NULL; cib_tls_close(cib); return -1; } #else return -EPROTONOSUPPORT; #endif - } else { - connection->tls_session = GUINT_TO_POINTER(sock); } /* login to server */ login = create_xml_node(NULL, "cib_command"); crm_xml_add(login, "op", "authenticate"); crm_xml_add(login, "user", private->user); crm_xml_add(login, "password", private->passwd); crm_xml_add(login, "hidden", "password"); crm_remote_send(connection, login); free_xml(login); crm_remote_recv(connection, -1, &disconnected); if (disconnected) { rc = -ENOTCONN; } answer = crm_remote_parse_buffer(connection); crm_log_xml_trace(answer, "Reply"); if (answer == NULL) { rc = -EPROTO; } else { /* grab the token */ const char *msg_type = crm_element_value(answer, F_CIB_OPERATION); const char *tmp_ticket = crm_element_value(answer, F_CIB_CLIENTID); if (safe_str_neq(msg_type, CRM_OP_REGISTER)) { crm_err("Invalid registration message: %s", msg_type); rc = -EPROTO; } else if (tmp_ticket == NULL) { rc = -EPROTO; } else { connection->token = strdup(tmp_ticket); } } free_xml(answer); answer = NULL; if (rc != 0) { cib_tls_close(cib); return rc; } crm_trace("remote client connection established"); connection->source = mainloop_add_fd("cib-remote", G_PRIORITY_HIGH, connection->tcp_socket, cib, &cib_fd_callbacks); return rc; } void cib_remote_connection_destroy(gpointer user_data) { crm_err("Connection destroyed"); #ifdef HAVE_GNUTLS_GNUTLS_H cib_tls_close(user_data); #endif return; } int cib_remote_command_dispatch(gpointer user_data) { int disconnected = 0; cib_t *cib = user_data; cib_remote_opaque_t *private = cib->variant_opaque; crm_remote_recv(&private->command, -1, &disconnected); free(private->command.buffer); private->command.buffer = NULL; crm_err("received late reply for remote cib connection, discarding"); if (disconnected) { return -1; } return 0; } int cib_remote_callback_dispatch(gpointer user_data) { cib_t *cib = user_data; cib_remote_opaque_t *private = cib->variant_opaque; xmlNode *msg = NULL; int disconnected = 0; crm_info("Message on callback channel"); crm_remote_recv(&private->callback, -1, &disconnected); msg = crm_remote_parse_buffer(&private->callback); while (msg) { const char *type = crm_element_value(msg, F_TYPE); crm_trace("Activating %s callbacks...", type); if (safe_str_eq(type, T_CIB)) { cib_native_callback(cib, msg, 0, 0); } else if (safe_str_eq(type, T_CIB_NOTIFY)) { g_list_foreach(cib->notify_list, cib_native_notify, msg); } else { crm_err("Unknown message type: %s", type); } free_xml(msg); msg = crm_remote_parse_buffer(&private->callback); } if (disconnected) { return -1; } return 0; } int cib_remote_signon(cib_t * cib, const char *name, enum cib_conn_type type) { int rc = pcmk_ok; cib_remote_opaque_t *private = cib->variant_opaque; if (private->passwd == NULL) { struct termios settings; int rc; rc = tcgetattr(0, &settings); settings.c_lflag &= ~ECHO; rc = tcsetattr(0, TCSANOW, &settings); fprintf(stderr, "Password: "); private->passwd = calloc(1, 1024); rc = scanf("%s", private->passwd); fprintf(stdout, "\n"); /* fprintf(stderr, "entered: '%s'\n", buffer); */ if (rc < 1) { private->passwd = NULL; } settings.c_lflag |= ECHO; rc = tcsetattr(0, TCSANOW, &settings); } if (private->server == NULL || private->user == NULL) { rc = -EINVAL; } if (rc == pcmk_ok) { rc = cib_tls_signon(cib, &(private->command), FALSE); } if (rc == pcmk_ok) { rc = cib_tls_signon(cib, &(private->callback), TRUE); } if (rc == pcmk_ok) { xmlNode *hello = cib_create_op(0, private->callback.token, CRM_OP_REGISTER, NULL, NULL, NULL, 0, NULL); crm_xml_add(hello, F_CIB_CLIENTNAME, name); crm_remote_send(&private->command, hello); free_xml(hello); } if (rc == pcmk_ok) { crm_notice("%s: Opened connection to %s:%d\n", name, private->server, private->port); cib->state = cib_connected_command; cib->type = cib_command; } else { fprintf(stderr, "%s: Connection to %s:%d failed: %s\n", name, private->server, private->port, pcmk_strerror(rc)); } return rc; } int cib_remote_signoff(cib_t * cib) { int rc = pcmk_ok; /* cib_remote_opaque_t *private = cib->variant_opaque; */ crm_debug("Signing out of the CIB Service"); #ifdef HAVE_GNUTLS_GNUTLS_H cib_tls_close(cib); #endif cib->state = cib_disconnected; cib->type = cib_none; return rc; } int cib_remote_free(cib_t * cib) { int rc = pcmk_ok; crm_warn("Freeing CIB"); if (cib->state != cib_disconnected) { rc = cib_remote_signoff(cib); if (rc == pcmk_ok) { cib_remote_opaque_t *private = cib->variant_opaque; free(private->server); free(private->user); free(private->passwd); free(cib->cmds); free(private); free(cib); } } return rc; } int cib_remote_perform_op(cib_t * cib, const char *op, const char *host, const char *section, xmlNode * data, xmlNode ** output_data, int call_options, const char *name) { int rc = pcmk_ok; int disconnected = 0; int remaining_time = 0; time_t start_time; xmlNode *op_msg = NULL; xmlNode *op_reply = NULL; cib_remote_opaque_t *private = cib->variant_opaque; if (cib->state == cib_disconnected) { return -ENOTCONN; } if (output_data != NULL) { *output_data = NULL; } if (op == NULL) { crm_err("No operation specified"); return -EINVAL; } cib->call_id++; /* prevent call_id from being negative (or zero) and conflicting * with the cib_errors enum * use 2 because we use it as (cib->call_id - 1) below */ if (cib->call_id < 1) { cib->call_id = 1; } op_msg = cib_create_op(cib->call_id, private->callback.token, op, host, section, data, call_options, NULL); if (op_msg == NULL) { return -EPROTO; } crm_trace("Sending %s message to CIB service", op); if (!(call_options & cib_sync_call)) { crm_remote_send(&private->callback, op_msg); } else { crm_remote_send(&private->command, op_msg); } free_xml(op_msg); if ((call_options & cib_discard_reply)) { crm_trace("Discarding reply"); return pcmk_ok; } else if (!(call_options & cib_sync_call)) { return cib->call_id; } crm_trace("Waiting for a syncronous reply"); start_time = time(NULL); remaining_time = cib->call_timeout ? cib->call_timeout : 60; while (remaining_time > 0 && !disconnected) { int reply_id = -1; int msg_id = cib->call_id; crm_remote_recv(&private->command, remaining_time * 1000, &disconnected); op_reply = crm_remote_parse_buffer(&private->command); if (!op_reply) { break; } crm_element_value_int(op_reply, F_CIB_CALLID, &reply_id); if (reply_id == msg_id) { break; } else if (reply_id < msg_id) { crm_debug("Received old reply: %d (wanted %d)", reply_id, msg_id); crm_log_xml_trace(op_reply, "Old reply"); } else if ((reply_id - 10000) > msg_id) { /* wrap-around case */ crm_debug("Received old reply: %d (wanted %d)", reply_id, msg_id); crm_log_xml_trace(op_reply, "Old reply"); } else { crm_err("Received a __future__ reply:" " %d (wanted %d)", reply_id, msg_id); } free_xml(op_reply); op_reply = NULL; /* wasn't the right reply, try and read some more */ remaining_time = time(NULL) - start_time; } /* if(IPC_ISRCONN(native->command_channel) == FALSE) { */ /* crm_err("CIB disconnected: %d", */ /* native->command_channel->ch_status); */ /* cib->state = cib_disconnected; */ /* } */ if (disconnected) { crm_err("Disconnected while waiting for reply."); return -ENOTCONN; } else if (op_reply == NULL) { crm_err("No reply message - empty"); return -ENOMSG; } crm_trace("Syncronous reply received"); /* Start processing the reply... */ if (crm_element_value_int(op_reply, F_CIB_RC, &rc) != 0) { rc = -EPROTO; } if (rc == -pcmk_err_diff_resync) { /* This is an internal value that clients do not and should not care about */ rc = pcmk_ok; } if (rc == pcmk_ok || rc == -EPERM) { crm_log_xml_debug(op_reply, "passed"); } else { /* } else if(rc == -ETIME) { */ crm_err("Call failed: %s", pcmk_strerror(rc)); crm_log_xml_warn(op_reply, "failed"); } if (output_data == NULL) { /* do nothing more */ } else if (!(call_options & cib_discard_reply)) { xmlNode *tmp = get_message_xml(op_reply, F_CIB_CALLDATA); if (tmp == NULL) { crm_trace("No output in reply to \"%s\" command %d", op, cib->call_id - 1); } else { *output_data = copy_xml(tmp); } } free_xml(op_reply); return rc; } diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c index 5ad2d11d62..f2094aabc5 100644 --- a/lib/lrmd/lrmd_client.c +++ b/lib/lrmd/lrmd_client.c @@ -1,1846 +1,1838 @@ /* * Copyright (c) 2012 David Vossel * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_GNUTLS_GNUTLS_H # undef KEYFILE # include #endif #include #include #include #include CRM_TRACE_INIT_DATA(lrmd); static stonith_t *stonith_api = NULL; static int lrmd_api_disconnect(lrmd_t *lrmd); static int lrmd_api_is_connected(lrmd_t *lrmd); #ifdef HAVE_GNUTLS_GNUTLS_H #define LRMD_CLIENT_HANDSHAKE_TIMEOUT 5000 /* 5 seconds */ gnutls_psk_client_credentials_t psk_cred_s; int lrmd_tls_set_key(gnutls_datum_t *key, const char *location); static void lrmd_tls_disconnect(lrmd_t *lrmd); static int global_remote_msg_id = 0; int lrmd_tls_send_msg(crm_remote_t *session, xmlNode *msg, uint32_t id, const char *msg_type); static void lrmd_tls_connection_destroy(gpointer userdata); #endif typedef struct lrmd_private_s { enum client_type type; char *token; mainloop_io_t *source; /* IPC parameters */ crm_ipc_t *ipc; crm_remote_t *remote; /* Extra TLS parameters */ char *remote_nodename; #ifdef HAVE_GNUTLS_GNUTLS_H char *server; int port; gnutls_psk_client_credentials_t psk_cred_c; int sock; GList *pending_notify; crm_trigger_t *process_notify; #endif lrmd_event_callback callback; } lrmd_private_t; static lrmd_list_t * lrmd_list_add(lrmd_list_t * head, const char *value) { lrmd_list_t *p, *end; p = calloc(1, sizeof(lrmd_list_t)); p->val = strdup(value); end = head; while (end && end->next) { end = end->next; } if (end) { end->next = p; } else { head = p; } return head; } void lrmd_list_freeall(lrmd_list_t * head) { lrmd_list_t *p; while (head) { char *val = (char *)head->val; p = head->next; free(val); free(head); head = p; } } lrmd_key_value_t * lrmd_key_value_add(lrmd_key_value_t * head, const char *key, const char *value) { lrmd_key_value_t *p, *end; p = calloc(1, sizeof(lrmd_key_value_t)); p->key = strdup(key); p->value = strdup(value); end = head; while (end && end->next) { end = end->next; } if (end) { end->next = p; } else { head = p; } return head; } void lrmd_key_value_freeall(lrmd_key_value_t * head) { lrmd_key_value_t *p; while (head) { p = head->next; free(head->key); free(head->value); free(head); head = p; } } static void dup_attr(gpointer key, gpointer value, gpointer user_data) { g_hash_table_replace(user_data, strdup(key), strdup(value)); } lrmd_event_data_t * lrmd_copy_event(lrmd_event_data_t * event) { lrmd_event_data_t *copy = NULL; copy = calloc(1, sizeof(lrmd_event_data_t)); /* This will get all the int values. * we just have to be careful not to leave any * dangling pointers to strings. */ memcpy(copy, event, sizeof(lrmd_event_data_t)); copy->rsc_id = event->rsc_id ? strdup(event->rsc_id) : NULL; copy->op_type = event->op_type ? strdup(event->op_type) : NULL; copy->user_data = event->user_data ? strdup(event->user_data) : NULL; copy->output = event->output ? strdup(event->output) : NULL; copy->remote_nodename = event->remote_nodename ? strdup(event->remote_nodename) : NULL; if (event->params) { copy->params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if (copy->params != NULL) { g_hash_table_foreach(event->params, dup_attr, copy->params); } } return copy; } void lrmd_free_event(lrmd_event_data_t * event) { if (!event) { return; } /* free gives me grief if i try to cast */ free((char *)event->rsc_id); free((char *)event->op_type); free((char *)event->user_data); free((char *)event->output); free((char *)event->remote_nodename); if (event->params) { g_hash_table_destroy(event->params); } free(event); } static int lrmd_dispatch_internal(lrmd_t *lrmd, xmlNode *msg) { const char *type; lrmd_private_t *native = lrmd->private; lrmd_event_data_t event = { 0, }; if (!native->callback) { /* no callback set */ crm_trace("notify event received but client has not set callback"); return 1; } event.remote_nodename = native->remote_nodename; type = crm_element_value(msg, F_LRMD_OPERATION); crm_element_value_int(msg, F_LRMD_CALLID, &event.call_id); event.rsc_id = crm_element_value(msg, F_LRMD_RSC_ID); if (crm_str_eq(type, LRMD_OP_RSC_REG, TRUE)) { event.type = lrmd_event_register; } else if (crm_str_eq(type, LRMD_OP_RSC_UNREG, TRUE)) { event.type = lrmd_event_unregister; } else if (crm_str_eq(type, LRMD_OP_RSC_EXEC, TRUE)) { crm_element_value_int(msg, F_LRMD_TIMEOUT, &event.timeout); crm_element_value_int(msg, F_LRMD_RSC_INTERVAL, &event.interval); crm_element_value_int(msg, F_LRMD_RSC_START_DELAY, &event.start_delay); crm_element_value_int(msg, F_LRMD_EXEC_RC, (int *)&event.rc); crm_element_value_int(msg, F_LRMD_OP_STATUS, &event.op_status); crm_element_value_int(msg, F_LRMD_RSC_DELETED, &event.rsc_deleted); crm_element_value_int(msg, F_LRMD_RSC_RUN_TIME, (int *)&event.t_run); crm_element_value_int(msg, F_LRMD_RSC_RCCHANGE_TIME, (int *)&event.t_rcchange); crm_element_value_int(msg, F_LRMD_RSC_EXEC_TIME, (int *)&event.exec_time); crm_element_value_int(msg, F_LRMD_RSC_QUEUE_TIME, (int *)&event.queue_time); event.op_type = crm_element_value(msg, F_LRMD_RSC_ACTION); event.user_data = crm_element_value(msg, F_LRMD_RSC_USERDATA_STR); event.output = crm_element_value(msg, F_LRMD_RSC_OUTPUT); event.type = lrmd_event_exec_complete; event.params = xml2list(msg); } else if (crm_str_eq(type, LRMD_OP_POKE, TRUE)) { event.type = lrmd_event_poke; } else { return 1; } crm_trace("op %s notify event received", type); native->callback(&event); if (event.params) { g_hash_table_destroy(event.params); } return 1; } static int lrmd_ipc_dispatch(const char *buffer, ssize_t length, gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->private; xmlNode *msg; int rc; if (!native->callback) { /* no callback set */ return 1; } msg = string2xml(buffer); rc = lrmd_dispatch_internal(lrmd, msg); free_xml(msg); return rc; } #ifdef HAVE_GNUTLS_GNUTLS_H static void lrmd_free_xml(gpointer userdata) { free_xml((xmlNode *) userdata); } static int lrmd_tls_connected(lrmd_t *lrmd) { lrmd_private_t *native = lrmd->private; if (native->remote->tls_session) { return TRUE; } return FALSE; } static int lrmd_tls_dispatch(gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->private; xmlNode *xml = NULL; int rc = 0; int disconnected = 0; if (lrmd_tls_connected(lrmd) == FALSE) { crm_trace("tls dispatch triggered after disconnect"); return 0; } crm_trace("tls_dispatch triggered"); /* First check if there are any pending notifies to process that came * while we were waiting for replies earlier. */ if (native->pending_notify) { GList *iter = NULL; crm_trace("Processing pending notifies"); for (iter = native->pending_notify; iter; iter = iter->next) { lrmd_dispatch_internal(lrmd, iter->data); } g_list_free_full(native->pending_notify, lrmd_free_xml); native->pending_notify = NULL; } /* Next read the current buffer and see if there are any messages to handle. */ rc = crm_remote_ready(native->remote, 0); if (rc == 0) { /* nothing to read, see if any full messages are already in buffer. */ xml = crm_remote_parse_buffer(native->remote); } else if (rc < 0) { disconnected = 1; } else { crm_remote_recv(native->remote, -1, &disconnected); xml = crm_remote_parse_buffer(native->remote); } while (xml) { lrmd_dispatch_internal(lrmd, xml); free_xml(xml); xml = crm_remote_parse_buffer(native->remote); } if (disconnected) { crm_info("Server disconnected while reading remote server msg."); lrmd_tls_disconnect(lrmd); return 0; } return 1; } #endif /* Not used with mainloop */ int lrmd_poll(lrmd_t *lrmd, int timeout) { lrmd_private_t *native = lrmd->private; switch (native->type) { case CRM_CLIENT_IPC: return crm_ipc_ready(native->ipc); #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: if (native->pending_notify) { return 1; } else if (native->remote->buffer && strstr(native->remote->buffer, REMOTE_MSG_TERMINATOR)) { return 1; } return crm_remote_ready(native->remote, 0); #endif default: crm_err("Unsupported connection type: %d", native->type); } return 0; } /* Not used with mainloop */ bool lrmd_dispatch(lrmd_t * lrmd) { lrmd_private_t *private = NULL; CRM_ASSERT(lrmd != NULL); private = lrmd->private; switch (private->type) { case CRM_CLIENT_IPC: while (crm_ipc_ready(private->ipc)) { if (crm_ipc_read(private->ipc) > 0) { const char *msg = crm_ipc_buffer(private->ipc); lrmd_ipc_dispatch(msg, strlen(msg), lrmd); } } break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: lrmd_tls_dispatch(lrmd); break; #endif default: crm_err("Unsupported connection type: %d", private->type); } if (lrmd_api_is_connected(lrmd) == FALSE) { crm_err("Connection closed"); return FALSE; } return TRUE; } static xmlNode * lrmd_create_op(const char *token, const char *op, xmlNode * data, enum lrmd_call_options options) { xmlNode *op_msg = create_xml_node(NULL, "lrmd_command"); CRM_CHECK(op_msg != NULL, return NULL); CRM_CHECK(token != NULL, return NULL); crm_xml_add(op_msg, F_XML_TAGNAME, "lrmd_command"); crm_xml_add(op_msg, F_TYPE, T_LRMD); crm_xml_add(op_msg, F_LRMD_CALLBACK_TOKEN, token); crm_xml_add(op_msg, F_LRMD_OPERATION, op); crm_trace("Sending call options: %.8lx, %d", (long)options, options); crm_xml_add_int(op_msg, F_LRMD_CALLOPTS, options); if (data != NULL) { add_message_xml(op_msg, F_LRMD_CALLDATA, data); } return op_msg; } static void lrmd_ipc_connection_destroy(gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->private; crm_info("IPC connection destroyed"); /* Prevent these from being cleaned up in lrmd_api_disconnect() */ native->ipc = NULL; native->source = NULL; if (native->callback) { lrmd_event_data_t event = { 0, }; event.type = lrmd_event_disconnect; event.remote_nodename = native->remote_nodename; native->callback(&event); } } #ifdef HAVE_GNUTLS_GNUTLS_H static void lrmd_tls_connection_destroy(gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->private; crm_info("TLS connection destroyed"); if (native->remote->tls_session) { gnutls_bye(*native->remote->tls_session, GNUTLS_SHUT_RDWR); gnutls_deinit(*native->remote->tls_session); gnutls_free(native->remote->tls_session); } if (native->psk_cred_c) { gnutls_psk_free_client_credentials(native->psk_cred_c); } if (native->sock) { close(native->sock); } if (native->process_notify) { mainloop_destroy_trigger(native->process_notify); native->process_notify = NULL; } if (native->pending_notify) { g_list_free_full(native->pending_notify, lrmd_free_xml); native->pending_notify = NULL; } free(native->remote->buffer); native->remote->buffer = NULL; native->source = 0; native->sock = 0; native->psk_cred_c = NULL; native->remote->tls_session = NULL; native->sock = 0; if (native->callback) { lrmd_event_data_t event = { 0, }; event.remote_nodename = native->remote_nodename; event.type = lrmd_event_disconnect; native->callback(&event); } return; } int lrmd_tls_send_msg(crm_remote_t *session, xmlNode *msg, uint32_t id, const char *msg_type) { int rc = -1; crm_xml_add_int(msg, F_LRMD_REMOTE_MSG_ID, id); crm_xml_add(msg, F_LRMD_REMOTE_MSG_TYPE, msg_type); rc = crm_remote_send(session, msg); if (rc < 0) { crm_err("Failed to send remote lrmd tls msg, rc = %d" , rc); return rc; } return rc; } static xmlNode * lrmd_tls_recv_reply(lrmd_t *lrmd, int total_timeout, int expected_reply_id, int *disconnected) { lrmd_private_t *native = lrmd->private; xmlNode *xml = NULL; time_t start = time(NULL); const char *msg_type = NULL; int reply_id = 0; int remaining_timeout = 0; /* A timeout of 0 here makes no sense. We have to wait a period of time * for the response to come back. If -1 or 0, default to 10 seconds. */ if (total_timeout <= 0) { total_timeout = 10000; } while (!xml) { xml = crm_remote_parse_buffer(native->remote); if (!xml) { /* read some more off the tls buffer if we still have time left. */ if (remaining_timeout) { remaining_timeout = remaining_timeout - ((time(NULL) - start) * 1000); } else { remaining_timeout = total_timeout; } if (remaining_timeout <= 0) { return NULL; } crm_remote_recv(native->remote, remaining_timeout, disconnected); xml = crm_remote_parse_buffer(native->remote); if (!xml || *disconnected) { return NULL; } } CRM_ASSERT(xml != NULL); crm_element_value_int(xml, F_LRMD_REMOTE_MSG_ID, &reply_id); msg_type = crm_element_value(xml, F_LRMD_REMOTE_MSG_TYPE); if (!msg_type) { crm_err("Empty msg type received while waiting for reply"); free_xml(xml); xml = NULL; } else if (safe_str_eq(msg_type, "notify")) { /* got a notify while waiting for reply, trigger the notify to be processed later */ crm_info("queueing notify"); native->pending_notify = g_list_append(native->pending_notify, xml); if (native->process_notify) { crm_info("notify trigger set."); mainloop_set_trigger(native->process_notify); } xml = NULL; } else if (safe_str_neq(msg_type, "reply")) { /* msg isn't a reply, make some noise */ crm_err("Expected a reply, got %s", msg_type); free_xml(xml); xml = NULL; } else if (reply_id != expected_reply_id) { crm_err("Got outdated reply, expected id %d got id %d", expected_reply_id, reply_id); free_xml(xml); xml = NULL; } } if (native->remote->buffer && native->process_notify) { mainloop_set_trigger(native->process_notify); } return xml; } static int lrmd_tls_send(lrmd_t *lrmd, xmlNode *msg) { int rc = 0; lrmd_private_t *native = lrmd->private; global_remote_msg_id++; if (global_remote_msg_id <= 0) { global_remote_msg_id = 1; } rc = lrmd_tls_send_msg(native->remote, msg, global_remote_msg_id, "request"); if (rc <= 0) { crm_err("Remote lrmd send failed, disconnecting"); lrmd_tls_disconnect(lrmd); return -ENOTCONN; } return pcmk_ok; } static int lrmd_tls_send_recv(lrmd_t *lrmd, xmlNode *msg, int timeout, xmlNode **reply) { int rc = 0; int disconnected = 0; xmlNode *xml = NULL; if (lrmd_tls_connected(lrmd) == FALSE) { return -1; } rc = lrmd_tls_send(lrmd, msg); if (rc < 0) { return rc; } xml = lrmd_tls_recv_reply(lrmd, timeout, global_remote_msg_id, &disconnected); if (disconnected) { crm_err("Remote lrmd server disconnected while waiting for reply with id %d. ", global_remote_msg_id); lrmd_tls_disconnect(lrmd); rc = -ENOTCONN; } else if (!xml) { crm_err("Remote lrmd never received reply for request id %d. timeout: %dms ", global_remote_msg_id, timeout); rc = -ECOMM; } if (reply) { *reply = xml; } else { free_xml(xml); } return rc; } #endif static int lrmd_send_xml(lrmd_t *lrmd, xmlNode *msg, int timeout, xmlNode **reply) { int rc = -1; lrmd_private_t *native = lrmd->private; switch (native->type) { case CRM_CLIENT_IPC: rc = crm_ipc_send(native->ipc, msg, crm_ipc_client_response, timeout, reply); break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: rc = lrmd_tls_send_recv(lrmd, msg, timeout, reply); break; #endif default: crm_err("Unsupported connection type: %d", native->type); } return rc; } static int lrmd_send_xml_no_reply(lrmd_t *lrmd, xmlNode *msg) { int rc = -1; lrmd_private_t *native = lrmd->private; switch (native->type) { case CRM_CLIENT_IPC: rc = crm_ipc_send(native->ipc, msg, crm_ipc_client_none, 0, NULL); break; - case CRM_CLIENT_TLS: #ifdef HAVE_GNUTLS_GNUTLS_H + case CRM_CLIENT_TLS: rc = lrmd_tls_send(lrmd, msg); -#endif break; +#endif default: crm_err("Unsupported connection type: %d", native->type); } return rc; } static int lrmd_api_is_connected(lrmd_t *lrmd) { lrmd_private_t *native = lrmd->private; switch (native->type) { case CRM_CLIENT_IPC: return crm_ipc_connected(native->ipc); break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: return lrmd_tls_connected(lrmd); break; #endif default: crm_err("Unsupported connection type: %d", native->type); } return 0; } static int lrmd_send_command(lrmd_t * lrmd, const char *op, xmlNode * data, xmlNode ** output_data, int timeout, /* ms. defaults to 1000 if set to 0 */ enum lrmd_call_options options, gboolean expect_reply) /* TODO we need to reduce usage of this boolean */ { int rc = pcmk_ok; int reply_id = -1; lrmd_private_t *native = lrmd->private; xmlNode *op_msg = NULL; xmlNode *op_reply = NULL; if (!lrmd_api_is_connected(lrmd)) { return -ENOTCONN; } if (op == NULL) { crm_err("No operation specified"); return -EINVAL; } CRM_CHECK(native->token != NULL,;); crm_trace("sending %s op to lrmd", op); op_msg = lrmd_create_op(native->token, op, data, options); if (op_msg == NULL) { return -EINVAL; } crm_xml_add_int(op_msg, F_LRMD_TIMEOUT, timeout); if (expect_reply) { rc = lrmd_send_xml(lrmd, op_msg, timeout, &op_reply); } else { rc = lrmd_send_xml_no_reply(lrmd, op_msg); goto done; } if (rc < 0) { crm_perror(LOG_ERR, "Couldn't perform %s operation (timeout=%d): %d", op, timeout, rc); rc = -ECOMM; goto done; } rc = pcmk_ok; crm_element_value_int(op_reply, F_LRMD_CALLID, &reply_id); crm_trace("%s op reply received", op); if (crm_element_value_int(op_reply, F_LRMD_RC, &rc) != 0) { rc = -ENOMSG; goto done; } crm_log_xml_trace(op_reply, "Reply"); if (output_data) { *output_data = op_reply; op_reply = NULL; /* Prevent subsequent free */ } done: if (lrmd_api_is_connected(lrmd) == FALSE) { crm_err("LRMD disconnected"); } free_xml(op_msg); free_xml(op_reply); return rc; } static int lrmd_api_poke_connection(lrmd_t *lrmd) { int rc; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); rc = lrmd_send_command(lrmd, LRMD_OP_POKE, data, NULL, 0, 0, FALSE); free_xml(data); return rc; } static int lrmd_handshake(lrmd_t *lrmd, const char *name) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->private; xmlNode *reply = NULL; xmlNode *hello = create_xml_node(NULL, "lrmd_command"); crm_xml_add(hello, F_TYPE, T_LRMD); crm_xml_add(hello, F_LRMD_OPERATION, CRM_OP_REGISTER); crm_xml_add(hello, F_LRMD_CLIENTNAME, name); rc = lrmd_send_xml(lrmd, hello, -1, &reply); if (rc < 0) { crm_perror(LOG_DEBUG, "Couldn't complete registration with the lrmd API: %d", rc); rc = -ECOMM; } else if (reply == NULL) { crm_err("Did not receive registration reply"); rc = -EPROTO; } else { const char *msg_type = crm_element_value(reply, F_LRMD_OPERATION); const char *tmp_ticket = crm_element_value(reply, F_LRMD_CLIENTID); if (safe_str_neq(msg_type, CRM_OP_REGISTER)) { crm_err("Invalid registration message: %s", msg_type); crm_log_xml_err(reply, "Bad reply"); rc = -EPROTO; } else if (tmp_ticket == NULL) { crm_err("No registration token provided"); crm_log_xml_err(reply, "Bad reply"); rc = -EPROTO; } else { crm_trace("Obtained registration token: %s", tmp_ticket); native->token = strdup(tmp_ticket); rc = pcmk_ok; } } free_xml(reply); free_xml(hello); if (rc != pcmk_ok) { lrmd_api_disconnect(lrmd); } return rc; } static int lrmd_ipc_connect(lrmd_t * lrmd, int *fd) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->private; static struct ipc_client_callbacks lrmd_callbacks = { .dispatch = lrmd_ipc_dispatch, .destroy = lrmd_ipc_connection_destroy }; crm_info("Connecting to lrmd"); if (fd) { /* No mainloop */ native->ipc = crm_ipc_new("lrmd", 0); if (native->ipc && crm_ipc_connect(native->ipc)) { *fd = crm_ipc_get_fd(native->ipc); } else if (native->ipc) { rc = -ENOTCONN; } } else { native->source = mainloop_add_ipc_client("lrmd", G_PRIORITY_HIGH, 0, lrmd, &lrmd_callbacks); native->ipc = mainloop_get_ipc_client(native->source); } if (native->ipc == NULL) { crm_debug("Could not connect to the LRMD API"); rc = -ENOTCONN; } return rc; } #ifdef HAVE_GNUTLS_GNUTLS_H int lrmd_tls_set_key(gnutls_datum_t *key, const char *location) { FILE *stream; int read_len = 256; int cur_len = 0; int buf_len = read_len; static char *key_cache = NULL; static size_t key_cache_len = 0; static time_t key_cache_updated; if (key_cache) { time_t now = time(NULL); if ((now - key_cache_updated) < 60) { key->data = gnutls_malloc(key_cache_len + 1); key->size = key_cache_len; memcpy(key->data, key_cache, key_cache_len); crm_debug("using cached LRMD key"); return 0; } else { key_cache_len = 0; key_cache_updated = 0; free(key_cache); key_cache = NULL; crm_debug("clearing lrmd key cache"); } } stream = fopen(location, "r"); if (!stream) { return -1; } key->data = gnutls_malloc(read_len); while (!feof(stream)) { char next; if (cur_len == buf_len) { buf_len = cur_len + read_len; key->data = gnutls_realloc(key->data, buf_len); } next = fgetc(stream); if (next == EOF && feof(stream)) { break; } key->data[cur_len] = next; cur_len++; } fclose(stream); key->size = cur_len; if (!cur_len) { gnutls_free(key->data); key->data = 0; return -1; } if (!key_cache) { key_cache = calloc(1, key->size+1); memcpy(key_cache, key->data, key->size); key_cache_len = key->size; key_cache_updated = time(NULL); } return 0; } static int lrmd_tls_key_cb(gnutls_session_t session, char **username, gnutls_datum_t *key) { int rc = 0; if (lrmd_tls_set_key(key, DEFAULT_REMOTE_KEY_LOCATION)) { rc = lrmd_tls_set_key(key, ALT_REMOTE_KEY_LOCATION); } if (rc) { crm_err("No lrmd remote key found"); return -1; } *username = gnutls_malloc(strlen(DEFAULT_REMOTE_USERNAME) + 1); strcpy(*username, DEFAULT_REMOTE_USERNAME); return rc; } -#endif static void lrmd_gnutls_global_init(void) { static int gnutls_init = 0; if (!gnutls_init) { gnutls_global_init(); } gnutls_init = 1; } +#endif static void report_async_connection_result(lrmd_t *lrmd, int rc) { lrmd_private_t *native = lrmd->private; if (native->callback) { lrmd_event_data_t event = { 0, }; event.type = lrmd_event_connect; event.remote_nodename = native->remote_nodename; event.connection_rc = rc; native->callback(&event); } } #ifdef HAVE_GNUTLS_GNUTLS_H static void lrmd_tcp_connect_cb(void *userdata, int sock) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->private; char name[256] = { 0, }; static struct mainloop_fd_callbacks lrmd_tls_callbacks = { .dispatch = lrmd_tls_dispatch, .destroy = lrmd_tls_connection_destroy, }; int rc = sock; if (rc < 0) { lrmd_tls_connection_destroy(lrmd); crm_info("remote lrmd connect to %s at port %d failed", native->server, native->port); report_async_connection_result(lrmd, rc); return; } /* TODO continue with tls stuff now that tcp connect passed. make this async as well soon * to avoid all blocking code in the client. */ native->sock = sock; gnutls_psk_allocate_client_credentials(&native->psk_cred_c); gnutls_psk_set_client_credentials_function(native->psk_cred_c, lrmd_tls_key_cb); native->remote->tls_session = create_psk_tls_session(sock, GNUTLS_CLIENT, native->psk_cred_c); if (crm_initiate_client_tls_handshake(native->remote, LRMD_CLIENT_HANDSHAKE_TIMEOUT) != 0) { crm_warn("Client tls handshake failed for server %s:%d. Disconnecting", native->server, native->port); gnutls_deinit(*native->remote->tls_session); gnutls_free(native->remote->tls_session); native->remote->tls_session = NULL; lrmd_tls_connection_destroy(lrmd); report_async_connection_result(lrmd, -1); return; } crm_info("Remote lrmd client TLS connection established with server %s:%d", native->server, native->port); snprintf(name, 128, "remote-lrmd-%s:%d", native->server, native->port); native->process_notify = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_tls_dispatch, lrmd); native->source = mainloop_add_fd(name, G_PRIORITY_HIGH, native->sock, lrmd, &lrmd_tls_callbacks); rc = lrmd_handshake(lrmd, name); report_async_connection_result(lrmd, rc); return; } -#endif static int lrmd_tls_connect_async(lrmd_t *lrmd, int timeout /*ms*/) { -#ifdef HAVE_GNUTLS_GNUTLS_H int rc = 0; lrmd_private_t *native = lrmd->private; lrmd_gnutls_global_init(); rc = crm_remote_tcp_connect_async(native->server, native->port, timeout, lrmd, lrmd_tcp_connect_cb); return rc; -#else - crm_err("TLS not enabled for this build."); - return -ENOTCONN; -#endif } static int lrmd_tls_connect(lrmd_t *lrmd, int *fd) { -#ifdef HAVE_GNUTLS_GNUTLS_H static struct mainloop_fd_callbacks lrmd_tls_callbacks = { .dispatch = lrmd_tls_dispatch, .destroy = lrmd_tls_connection_destroy, }; lrmd_private_t *native = lrmd->private; int sock; lrmd_gnutls_global_init(); sock = crm_remote_tcp_connect(native->server, native->port); if (sock <= 0) { crm_warn("Could not establish remote lrmd connection to %s", native->server); lrmd_tls_connection_destroy(lrmd); return -ENOTCONN; } native->sock = sock; gnutls_psk_allocate_client_credentials(&native->psk_cred_c); gnutls_psk_set_client_credentials_function(native->psk_cred_c, lrmd_tls_key_cb); native->remote->tls_session = create_psk_tls_session(sock, GNUTLS_CLIENT, native->psk_cred_c); if (crm_initiate_client_tls_handshake(native->remote, LRMD_CLIENT_HANDSHAKE_TIMEOUT) != 0) { crm_err("Session creation for %s:%d failed", native->server, native->port); gnutls_deinit(*native->remote->tls_session); gnutls_free(native->remote->tls_session); native->remote->tls_session = NULL; lrmd_tls_connection_destroy(lrmd); return -1; } crm_info("Remote lrmd client TLS connection established with server %s:%d", native->server, native->port); if (fd) { *fd = sock; } else { char name[256] = { 0, }; snprintf(name, 128, "remote-lrmd-%s:%d", native->server, native->port); native->process_notify = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_tls_dispatch, lrmd); native->source = mainloop_add_fd(name, G_PRIORITY_HIGH, native->sock, lrmd, &lrmd_tls_callbacks); } return pcmk_ok; -#else - crm_err("TLS not enabled for this build."); - return -ENOTCONN; -#endif } +#endif static int lrmd_api_connect(lrmd_t * lrmd, const char *name, int *fd) { int rc = -ENOTCONN; lrmd_private_t *native = lrmd->private; switch (native->type) { case CRM_CLIENT_IPC: rc = lrmd_ipc_connect(lrmd, fd); break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: rc = lrmd_tls_connect(lrmd, fd); break; #endif default: crm_err("Unsupported connection type: %d", native->type); } if (rc == pcmk_ok) { rc = lrmd_handshake(lrmd, name); } return rc; } static int lrmd_api_connect_async(lrmd_t * lrmd, const char *name, int timeout) { int rc = 0; lrmd_private_t *native = lrmd->private; if (!native->callback) { crm_err("Async connect not possible, no lrmd client callback set."); return -1; } switch (native->type) { case CRM_CLIENT_IPC: /* fake async connection with ipc. it should be fast * enough that we gain very little from async */ rc = lrmd_api_connect(lrmd, name, NULL); if (!rc) { report_async_connection_result(lrmd, rc); } break; +#ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: rc = lrmd_tls_connect_async(lrmd, timeout); if (rc) { /* connection failed, report rc now */ report_async_connection_result(lrmd, rc); } break; +#endif default: crm_err("Unsupported connection type: %d", native->type); } return rc; } static void lrmd_ipc_disconnect(lrmd_t *lrmd) { lrmd_private_t *native = lrmd->private; if (native->source != NULL) { /* Attached to mainloop */ mainloop_del_ipc_client(native->source); native->source = NULL; native->ipc = NULL; } else if(native->ipc) { /* Not attached to mainloop */ crm_ipc_t *ipc = native->ipc; native->ipc = NULL; crm_ipc_close(ipc); crm_ipc_destroy(ipc); } } #ifdef HAVE_GNUTLS_GNUTLS_H static void lrmd_tls_disconnect(lrmd_t *lrmd) { lrmd_private_t *native = lrmd->private; if (native->remote->tls_session) { gnutls_bye(*native->remote->tls_session, GNUTLS_SHUT_RDWR); gnutls_deinit(*native->remote->tls_session); gnutls_free(native->remote->tls_session); native->remote->tls_session = 0; } if (native->source != NULL) { /* Attached to mainloop */ mainloop_del_ipc_client(native->source); native->source = NULL; } else if(native->sock) { close(native->sock); } if (native->pending_notify) { g_list_free_full(native->pending_notify, lrmd_free_xml); native->pending_notify = NULL; } } #endif static int lrmd_api_disconnect(lrmd_t *lrmd) { lrmd_private_t *native = lrmd->private; crm_info("Disconnecting from lrmd service"); switch (native->type) { case CRM_CLIENT_IPC: lrmd_ipc_disconnect(lrmd); break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: lrmd_tls_disconnect(lrmd); break; #endif default: crm_err("Unsupported connection type: %d", native->type); } free(native->token); native->token = NULL; return 0; } static int lrmd_api_register_rsc(lrmd_t * lrmd, const char *rsc_id, const char *class, const char *provider, const char *type, enum lrmd_call_options options) { int rc = pcmk_ok; xmlNode *data = NULL; if (!class || !type || !rsc_id) { return -EINVAL; } if (safe_str_eq(class, "ocf") && !provider) { return -EINVAL; } data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); crm_xml_add(data, F_LRMD_CLASS, class); crm_xml_add(data, F_LRMD_PROVIDER, provider); crm_xml_add(data, F_LRMD_TYPE, type); rc = lrmd_send_command(lrmd, LRMD_OP_RSC_REG, data, NULL, 0, options, TRUE); free_xml(data); return rc; } static int lrmd_api_unregister_rsc(lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options) { int rc = pcmk_ok; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); rc = lrmd_send_command(lrmd, LRMD_OP_RSC_UNREG, data, NULL, 0, options, TRUE); free_xml(data); return rc; } lrmd_rsc_info_t * lrmd_copy_rsc_info(lrmd_rsc_info_t * rsc_info) { lrmd_rsc_info_t *copy = NULL; copy = calloc(1, sizeof(lrmd_rsc_info_t)); copy->id = strdup(rsc_info->id); copy->type = strdup(rsc_info->type); copy->class = strdup(rsc_info->class); if (rsc_info->provider) { copy->provider = strdup(rsc_info->provider); } return copy; } void lrmd_free_rsc_info(lrmd_rsc_info_t * rsc_info) { if (!rsc_info) { return; } free(rsc_info->id); free(rsc_info->type); free(rsc_info->class); free(rsc_info->provider); free(rsc_info); } static lrmd_rsc_info_t * lrmd_api_get_rsc_info(lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options) { lrmd_rsc_info_t *rsc_info = NULL; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); xmlNode *output = NULL; const char *class = NULL; const char *provider = NULL; const char *type = NULL; crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); lrmd_send_command(lrmd, LRMD_OP_RSC_INFO, data, &output, 0, options, TRUE); free_xml(data); if (!output) { return NULL; } class = crm_element_value(output, F_LRMD_CLASS); provider = crm_element_value(output, F_LRMD_PROVIDER); type = crm_element_value(output, F_LRMD_TYPE); if (!class || !type) { free_xml(output); return NULL; } else if (safe_str_eq(class, "ocf") && !provider) { free_xml(output); return NULL; } rsc_info = calloc(1, sizeof(lrmd_rsc_info_t)); rsc_info->id = strdup(rsc_id); rsc_info->class = strdup(class); if (provider) { rsc_info->provider = strdup(provider); } rsc_info->type = strdup(type); free_xml(output); return rsc_info; } static void lrmd_api_set_callback(lrmd_t * lrmd, lrmd_event_callback callback) { lrmd_private_t *native = lrmd->private; native->callback = callback; } static int stonith_get_metadata(const char *provider, const char *type, char **output) { int rc = pcmk_ok; stonith_api->cmds->metadata(stonith_api, st_opt_sync_call, type, provider, output, 0); if (*output == NULL) { rc = -EIO; } return rc; } static int lsb_get_metadata(const char *type, char **output) { #define lsb_metadata_template \ "\n"\ "\n"\ "\n"\ " 1.0\n"\ " \n"\ " %s"\ " \n"\ " %s\n"\ " \n"\ " \n"\ " \n"\ " \n"\ " \n"\ " \n"\ " \n"\ " \n"\ " \n"\ " \n"\ " \n"\ " \n"\ " %s\n"\ " %s\n"\ " %s\n"\ " %s\n"\ " %s\n"\ " %s\n"\ " %s\n"\ " \n"\ "\n" #define LSB_INITSCRIPT_INFOBEGIN_TAG "### BEGIN INIT INFO" #define LSB_INITSCRIPT_INFOEND_TAG "### END INIT INFO" #define PROVIDES "# Provides:" #define REQ_START "# Required-Start:" #define REQ_STOP "# Required-Stop:" #define SHLD_START "# Should-Start:" #define SHLD_STOP "# Should-Stop:" #define DFLT_START "# Default-Start:" #define DFLT_STOP "# Default-Stop:" #define SHORT_DSCR "# Short-Description:" #define DESCRIPTION "# Description:" #define lsb_meta_helper_free_value(m) \ if ((m) != NULL) { \ xmlFree(m); \ (m) = NULL; \ } #define lsb_meta_helper_get_value(buffer, ptr, keyword) \ if (!ptr && !strncasecmp(buffer, keyword, strlen(keyword))) { \ (ptr) = (char *)xmlEncodeEntitiesReentrant(NULL, BAD_CAST buffer+strlen(keyword)); \ continue; \ } char ra_pathname[PATH_MAX] = { 0, }; FILE *fp; GString *meta_data = NULL; char buffer[1024]; char *provides = NULL; char *req_start = NULL; char *req_stop = NULL; char *shld_start = NULL; char *shld_stop = NULL; char *dflt_start = NULL; char *dflt_stop = NULL; char *s_dscrpt = NULL; char *xml_l_dscrpt = NULL; GString *l_dscrpt = NULL; snprintf(ra_pathname, sizeof(ra_pathname), "%s%s%s", type[0] == '/' ? "" : LSB_ROOT_DIR, type[0] == '/' ? "" : "/", type); if (!(fp = fopen(ra_pathname, "r"))) { return -EIO; } /* Enter into the lsb-compliant comment block */ while (fgets(buffer, sizeof(buffer), fp)) { /* Now suppose each of the following eight arguments contain only one line */ lsb_meta_helper_get_value(buffer, provides, PROVIDES) lsb_meta_helper_get_value(buffer, req_start, REQ_START) lsb_meta_helper_get_value(buffer, req_stop, REQ_STOP) lsb_meta_helper_get_value(buffer, shld_start, SHLD_START) lsb_meta_helper_get_value(buffer, shld_stop, SHLD_STOP) lsb_meta_helper_get_value(buffer, dflt_start, DFLT_START) lsb_meta_helper_get_value(buffer, dflt_stop, DFLT_STOP) lsb_meta_helper_get_value(buffer, s_dscrpt, SHORT_DSCR) /* Long description may cross multiple lines */ if ((l_dscrpt == NULL) && (0 == strncasecmp(buffer, DESCRIPTION, strlen(DESCRIPTION)))) { l_dscrpt = g_string_new(buffer + strlen(DESCRIPTION)); /* Between # and keyword, more than one space, or a tab character, * indicates the continuation line. Extracted from LSB init script standard */ while (fgets(buffer, sizeof(buffer), fp)) { if (!strncmp(buffer, "# ", 3) || !strncmp(buffer, "#\t", 2)) { buffer[0] = ' '; l_dscrpt = g_string_append(l_dscrpt, buffer); } else { fputs(buffer, fp); break; /* Long description ends */ } } continue; } if (l_dscrpt) { xml_l_dscrpt = (char *)xmlEncodeEntitiesReentrant(NULL, BAD_CAST(l_dscrpt->str)); } if (!strncasecmp(buffer, LSB_INITSCRIPT_INFOEND_TAG, strlen(LSB_INITSCRIPT_INFOEND_TAG))) { /* Get to the out border of LSB comment block */ break; } if (buffer[0] != '#') { break; /* Out of comment block in the beginning */ } } fclose(fp); meta_data = g_string_new(""); g_string_sprintf(meta_data, lsb_metadata_template, type, (xml_l_dscrpt == NULL) ? type : xml_l_dscrpt, (s_dscrpt == NULL) ? type : s_dscrpt, (provides == NULL) ? "" : provides, (req_start == NULL) ? "" : req_start, (req_stop == NULL) ? "" : req_stop, (shld_start == NULL) ? "" : shld_start, (shld_stop == NULL) ? "" : shld_stop, (dflt_start == NULL) ? "" : dflt_start, (dflt_stop == NULL) ? "" : dflt_stop); lsb_meta_helper_free_value(xml_l_dscrpt); lsb_meta_helper_free_value(s_dscrpt); lsb_meta_helper_free_value(provides); lsb_meta_helper_free_value(req_start); lsb_meta_helper_free_value(req_stop); lsb_meta_helper_free_value(shld_start); lsb_meta_helper_free_value(shld_stop); lsb_meta_helper_free_value(dflt_start); lsb_meta_helper_free_value(dflt_stop); if (l_dscrpt) { g_string_free(l_dscrpt, TRUE); } *output = strdup(meta_data->str); g_string_free(meta_data, TRUE); return pcmk_ok; } static int generic_get_metadata(const char *standard, const char *provider, const char *type, char **output) { svc_action_t *action = resources_action_create(type, standard, provider, type, "meta-data", 0, 5000, NULL); if (!(services_action_sync(action))) { crm_err("Failed to retrieve meta-data for %s:%s:%s", standard, provider, type); services_action_free(action); return -EIO; } if (!action->stdout_data) { crm_err("Failed to retrieve meta-data for %s:%s:%s", standard, provider, type); services_action_free(action); return -EIO; } *output = strdup(action->stdout_data); services_action_free(action); return pcmk_ok; } static int lrmd_api_get_metadata(lrmd_t * lrmd, const char *class, const char *provider, const char *type, char **output, enum lrmd_call_options options) { if (!class || !type) { return -EINVAL; } if (safe_str_eq(class, "stonith")) { return stonith_get_metadata(provider, type, output); } else if (safe_str_eq(class, "lsb")) { return lsb_get_metadata(type, output); } return generic_get_metadata(class, provider, type, output); } static int lrmd_api_exec(lrmd_t * lrmd, const char *rsc_id, const char *action, const char *userdata, int interval, /* ms */ int timeout, /* ms */ int start_delay, /* ms */ enum lrmd_call_options options, lrmd_key_value_t * params) { int rc = pcmk_ok; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); xmlNode *args = create_xml_node(data, XML_TAG_ATTRS); lrmd_key_value_t *tmp = NULL; crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); crm_xml_add(data, F_LRMD_RSC_ACTION, action); crm_xml_add(data, F_LRMD_RSC_USERDATA_STR, userdata); crm_xml_add_int(data, F_LRMD_RSC_INTERVAL, interval); crm_xml_add_int(data, F_LRMD_TIMEOUT, timeout); crm_xml_add_int(data, F_LRMD_RSC_START_DELAY, start_delay); for (tmp = params; tmp; tmp = tmp->next) { hash2field((gpointer) tmp->key, (gpointer) tmp->value, args); } rc = lrmd_send_command(lrmd, LRMD_OP_RSC_EXEC, data, NULL, timeout, options, TRUE); free_xml(data); lrmd_key_value_freeall(params); return rc; } static int lrmd_api_cancel(lrmd_t * lrmd, const char *rsc_id, const char *action, int interval) { int rc = pcmk_ok; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add(data, F_LRMD_RSC_ACTION, action); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); crm_xml_add_int(data, F_LRMD_RSC_INTERVAL, interval); rc = lrmd_send_command(lrmd, LRMD_OP_RSC_CANCEL, data, NULL, 0, 0, TRUE); free_xml(data); return rc; } static int list_stonith_agents(lrmd_list_t ** resources) { int rc = 0; stonith_key_value_t *stonith_resources = NULL; stonith_key_value_t *dIter = NULL; stonith_api->cmds->list_agents(stonith_api, st_opt_sync_call, NULL, &stonith_resources, 0); for (dIter = stonith_resources; dIter; dIter = dIter->next) { rc++; if(resources) { *resources = lrmd_list_add(*resources, dIter->value); } } stonith_key_value_freeall(stonith_resources, 1, 0); return rc; } static int lrmd_api_list_agents(lrmd_t * lrmd, lrmd_list_t ** resources, const char *class, const char *provider) { int rc = 0; if (safe_str_eq(class, "stonith")) { rc += list_stonith_agents(resources); } else { GListPtr gIter = NULL; GList *agents = resources_list_agents(class, provider); for (gIter = agents; gIter != NULL; gIter = gIter->next) { *resources = lrmd_list_add(*resources, (const char *)gIter->data); rc++; } g_list_free_full(agents, free); if (!class) { rc += list_stonith_agents(resources); } } if(rc == 0) { crm_notice("No agents found for class %s", class); rc = -EPROTONOSUPPORT; } return rc; } static int does_provider_have_agent(const char *agent, const char *provider, const char *class) { int found = 0; GList *agents = NULL; GListPtr gIter2 = NULL; agents = resources_list_agents(class, provider); for (gIter2 = agents; gIter2 != NULL; gIter2 = gIter2->next) { if (safe_str_eq(agent, gIter2->data)) { found = 1; } } g_list_free_full(agents, free); return found; } static int lrmd_api_list_ocf_providers(lrmd_t * lrmd, const char *agent, lrmd_list_t ** providers) { int rc = pcmk_ok; char *provider = NULL; GList *ocf_providers = NULL; GListPtr gIter = NULL; ocf_providers = resources_list_providers("ocf"); for (gIter = ocf_providers; gIter != NULL; gIter = gIter->next) { provider = gIter->data; if (!agent || does_provider_have_agent(agent, provider, "ocf")) { *providers = lrmd_list_add(*providers, (const char *)gIter->data); rc++; } } g_list_free_full(ocf_providers, free); return rc; } static int lrmd_api_list_standards(lrmd_t * lrmd, lrmd_list_t ** supported) { int rc = 0; GList *standards = NULL; GListPtr gIter = NULL; standards = resources_list_standards(); for (gIter = standards; gIter != NULL; gIter = gIter->next) { *supported = lrmd_list_add(*supported, (const char *)gIter->data); rc++; } if(list_stonith_agents(NULL) > 0) { *supported = lrmd_list_add(*supported, "stonith"); rc++; } g_list_free_full(standards, free); return rc; } lrmd_t * lrmd_api_new(void) { lrmd_t *new_lrmd = NULL; lrmd_private_t *pvt = NULL; new_lrmd = calloc(1, sizeof(lrmd_t)); pvt = calloc(1, sizeof(lrmd_private_t)); pvt->remote = calloc(1, sizeof(crm_remote_t)); new_lrmd->cmds = calloc(1, sizeof(lrmd_api_operations_t)); pvt->type = CRM_CLIENT_IPC; new_lrmd->private = pvt; new_lrmd->cmds->connect = lrmd_api_connect; new_lrmd->cmds->connect_async = lrmd_api_connect_async; new_lrmd->cmds->is_connected = lrmd_api_is_connected; new_lrmd->cmds->poke_connection = lrmd_api_poke_connection; new_lrmd->cmds->disconnect = lrmd_api_disconnect; new_lrmd->cmds->register_rsc = lrmd_api_register_rsc; new_lrmd->cmds->unregister_rsc = lrmd_api_unregister_rsc; new_lrmd->cmds->get_rsc_info = lrmd_api_get_rsc_info; new_lrmd->cmds->set_callback = lrmd_api_set_callback; new_lrmd->cmds->get_metadata = lrmd_api_get_metadata; new_lrmd->cmds->exec = lrmd_api_exec; new_lrmd->cmds->cancel = lrmd_api_cancel; new_lrmd->cmds->list_agents = lrmd_api_list_agents; new_lrmd->cmds->list_ocf_providers = lrmd_api_list_ocf_providers; new_lrmd->cmds->list_standards = lrmd_api_list_standards; if (!stonith_api) { stonith_api = stonith_api_new(); } return new_lrmd; } lrmd_t * lrmd_remote_api_new(const char *nodename, const char *server, int port) { #ifdef HAVE_GNUTLS_GNUTLS_H lrmd_t *new_lrmd = lrmd_api_new(); lrmd_private_t *native = new_lrmd->private; if (!nodename && !server) { return NULL; } native->type = CRM_CLIENT_TLS; native->remote_nodename = nodename ? strdup(nodename) : strdup(server); native->server = server ? strdup(server) : strdup(nodename); native->port = port ? port : DEFAULT_REMOTE_PORT; return new_lrmd; #else crm_err("GNUTLS is not enabled for this build, remote LRMD client can not be created"); return NULL; #endif } void lrmd_api_delete(lrmd_t * lrmd) { if (!lrmd) { return; } lrmd->cmds->disconnect(lrmd); /* no-op if already disconnected */ free(lrmd->cmds); if (lrmd->private) { lrmd_private_t *native = lrmd->private; #ifdef HAVE_GNUTLS_GNUTLS_H free(native->server); #endif free(native->remote_nodename); free(native->remote); } free(lrmd->private); free(lrmd); } diff --git a/lrmd/regression.py.in b/lrmd/regression.py.in index be323fd15c..7c33d9c3ba 100755 --- a/lrmd/regression.py.in +++ b/lrmd/regression.py.in @@ -1,926 +1,931 @@ #!/usr/bin/python # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. import os import sys import subprocess import shlex import time + def output_from_command(command, no_wait=0): test = subprocess.Popen(shlex.split(command), stdout=subprocess.PIPE) if no_wait == 0: test.wait() else: return 0 return test.communicate()[0].split("\n") class Test: def __init__(self, name, description, verbose = 0, tls = 0): self.name = name self.description = description self.cmds = [] self.daemon_location = "@CRM_DAEMON_DIR@/lrmd" self.test_tool_location = "@CRM_DAEMON_DIR@/lrmd_test" self.verbose = verbose self.tls = tls self.result_txt = "" self.cmd_tool_output = "" self.result_exitcode = 0; self.lrmd_process = None self.stonith_process = None self.executed = 0 def __new_cmd(self, cmd, args, exitcode, stdout_match = "", no_wait = 0, stdout_negative_match = "", kill=None): if self.verbose and cmd == self.test_tool_location: args = args + " -V " if (cmd == self.test_tool_location) and self.tls: args = args + " -S " self.cmds.append( { "cmd" : cmd, "kill" : kill, "args" : args, "expected_exitcode" : exitcode, "stdout_match" : stdout_match, "stdout_negative_match" : stdout_negative_match, "no_wait" : no_wait, "cmd_output" : "", } ) def start_environment(self): ### make sure we are in full control here ### cmd = shlex.split("killall -q -9 stonithd lrmd lt-lrmd lrmd_test lt-lrmd_test") test = subprocess.Popen(cmd, stdout=subprocess.PIPE) test.wait() additional_args = "" self.stonith_process = subprocess.Popen(shlex.split("@CRM_DAEMON_DIR@/stonithd -s")) if self.tls: additional_args = additional_args + " -t " if self.verbose: additional_args = additional_args + " -VVV " self.lrmd_process = subprocess.Popen(shlex.split("%s %s -l /tmp/lrmd-regression.log" % (self.daemon_location, additional_args))) time.sleep(1) def clean_environment(self): if self.lrmd_process: self.lrmd_process.terminate() self.lrmd_process.wait() if self.verbose: print "Daemon output" f = open('/tmp/lrmd-regression.log', 'r') for line in f.readlines(): print line.strip() os.remove('/tmp/lrmd-regression.log') if self.stonith_process: self.stonith_process.terminate() self.stonith_process.wait() self.lrmd_process = None self.stonith_process = None def add_sys_cmd(self, cmd, args): self.__new_cmd(cmd, args, 0, "") def add_sys_cmd_no_wait(self, cmd, args): self.__new_cmd(cmd, args, 0, "", 1) def add_cmd_check_stdout(self, args, match, no_match = ""): self.__new_cmd(self.test_tool_location, args, 0, match, 0, no_match) def add_cmd(self, args): self.__new_cmd(self.test_tool_location, args, 0, "") def add_cmd_and_kill(self, killProc, args): self.__new_cmd(self.test_tool_location, args, 0, "", kill=killProc) def add_expected_fail_cmd(self, args): self.__new_cmd(self.test_tool_location, args, 255, "") def get_exitcode(self): return self.result_exitcode def print_result(self, filler): print "%s%s" % (filler, self.result_txt) def run_cmd(self, args): cmd = shlex.split(args['args']) cmd.insert(0, args['cmd']) if self.verbose: print "\n\nRunning: "+" ".join(cmd) test = subprocess.Popen(cmd, stdout=subprocess.PIPE) if args['kill']: if self.verbose: print "Also running: "+args['kill'] ### Typically the kill argument is used to detect some sort of ### failure. Without yeilding for a few seconds here the process ### launched earlier that is listening for the failure may not have time ### to connect to the lrmd. time.sleep(2) subprocess.Popen(shlex.split(args['kill'])) if args['no_wait'] == 0: test.wait() else: return 0 output = test.communicate()[0] if args['stdout_match'] != "" and output.count(args['stdout_match']) == 0: test.returncode = -2 print "STDOUT string '%s' was not found in cmd output" % (args['stdout_match']) if args['stdout_negative_match'] != "" and output.count(args['stdout_negative_match']) != 0: test.returncode = -2 print "STDOUT string '%s' was found in cmd output" % (args['stdout_negative_match']) args['cmd_output'] = output return test.returncode; def run(self): res = 0 i = 1 self.start_environment() if self.verbose: print "\n--- START TEST - %s" % self.name self.result_txt = "SUCCESS - '%s'" % (self.name) self.result_exitcode = 0 for cmd in self.cmds: res = self.run_cmd(cmd) if res != cmd['expected_exitcode']: print cmd['cmd_output'] print "Step %d FAILED - command returned %d, expected %d" % (i, res, cmd['expected_exitcode']) self.result_txt = "FAILURE - '%s' failed at step %d. Command: lrmd_test %s" % (self.name, i, cmd['args']) self.result_exitcode = -1 break else: if self.verbose: print cmd['cmd_output'].strip() print "Step %d SUCCESS" % (i) i = i + 1 self.clean_environment() print self.result_txt if self.verbose: print "--- END TEST - %s\n" % self.name self.executed = 1 return res class Tests: def __init__(self, verbose = 0, tls = 0): self.tests = [] self.verbose = verbose self.tls = tls; self.rsc_classes = output_from_command("crm_resource --list-standards") self.rsc_classes = self.rsc_classes[:-1] # Strip trailing empty line print "Testing "+repr(self.rsc_classes) + if not os.path.isfile("/etc/pacemaker/authkey"): + os.system("mkdir -p /etc/pacemaker") + os.system("dd if=/dev/urandom of=/etc/pacemaker/authkey bs=4096 count=1") + self.common_cmds = { "ocf_reg_line" : "-c register_rsc -r ocf_test_rsc -t 3000 -C ocf -P pacemaker -T Dummy", "ocf_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:ocf_test_rsc action:none rc:ok op_status:complete\"", "ocf_unreg_line" : "-c unregister_rsc -r \"ocf_test_rsc\" -t 3000", "ocf_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:ocf_test_rsc action:none rc:ok op_status:complete\"", "ocf_start_line" : "-c exec -r \"ocf_test_rsc\" -a \"start\" -t 3000 ", "ocf_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:start rc:ok op_status:complete\" ", "ocf_stop_line" : "-c exec -r \"ocf_test_rsc\" -a \"stop\" -t 3000 ", "ocf_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:stop rc:ok op_status:complete\" ", "ocf_monitor_line" : "-c exec -r \"ocf_test_rsc\" -a \"monitor\" -i \"2000\" -t 3000", "ocf_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:monitor rc:ok op_status:complete\" -t 3000", "ocf_cancel_line" : "-c cancel -r \"ocf_test_rsc\" -a \"monitor\" -i \"2000\" -t \"3000\" ", "ocf_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:monitor rc:ok op_status:Cancelled\" ", "systemd_reg_line" : "-c register_rsc -r systemd_test_rsc -t 3000 -C systemd -T lrmd_dummy_daemon", "systemd_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:systemd_test_rsc action:none rc:ok op_status:complete\"", "systemd_unreg_line" : "-c unregister_rsc -r \"systemd_test_rsc\" -t 3000", "systemd_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:systemd_test_rsc action:none rc:ok op_status:complete\"", "systemd_start_line" : "-c exec -r \"systemd_test_rsc\" -a \"start\" -t 3000 ", "systemd_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:start rc:ok op_status:complete\" ", "systemd_stop_line" : "-c exec -r \"systemd_test_rsc\" -a \"stop\" -t 3000 ", "systemd_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:stop rc:ok op_status:complete\" ", "systemd_monitor_line" : "-c exec -r \"systemd_test_rsc\" -a \"monitor\" -i \"2000\" -t 3000", "systemd_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:monitor rc:ok op_status:complete\" -t 3000", "systemd_cancel_line" : "-c cancel -r \"systemd_test_rsc\" -a \"monitor\" -i \"2000\" -t \"3000\" ", "systemd_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:monitor rc:ok op_status:Cancelled\" ", "upstart_reg_line" : "-c register_rsc -r upstart_test_rsc -t 3000 -C upstart -T lrmd_dummy_daemon", "upstart_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:upstart_test_rsc action:none rc:ok op_status:complete\"", "upstart_unreg_line" : "-c unregister_rsc -r \"upstart_test_rsc\" -t 3000", "upstart_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:upstart_test_rsc action:none rc:ok op_status:complete\"", "upstart_start_line" : "-c exec -r \"upstart_test_rsc\" -a \"start\" -t 3000 ", "upstart_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:start rc:ok op_status:complete\" ", "upstart_stop_line" : "-c exec -r \"upstart_test_rsc\" -a \"stop\" -t 3000 ", "upstart_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:stop rc:ok op_status:complete\" ", "upstart_monitor_line" : "-c exec -r \"upstart_test_rsc\" -a \"monitor\" -i \"2000\" -t 3000", "upstart_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:monitor rc:ok op_status:complete\" -t 3000", "upstart_cancel_line" : "-c cancel -r \"upstart_test_rsc\" -a \"monitor\" -i \"2000\" -t \"3000\" ", "upstart_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:monitor rc:ok op_status:Cancelled\" ", "service_reg_line" : "-c register_rsc -r service_test_rsc -t 3000 -C service -T lrmd_dummy_daemon", "service_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:service_test_rsc action:none rc:ok op_status:complete\"", "service_unreg_line" : "-c unregister_rsc -r \"service_test_rsc\" -t 3000", "service_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:service_test_rsc action:none rc:ok op_status:complete\"", "service_start_line" : "-c exec -r \"service_test_rsc\" -a \"start\" -t 3000 ", "service_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:start rc:ok op_status:complete\" ", "service_stop_line" : "-c exec -r \"service_test_rsc\" -a \"stop\" -t 3000 ", "service_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:stop rc:ok op_status:complete\" ", "service_monitor_line" : "-c exec -r \"service_test_rsc\" -a \"monitor\" -i \"2000\" -t 3000", "service_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:monitor rc:ok op_status:complete\" -t 3000", "service_cancel_line" : "-c cancel -r \"service_test_rsc\" -a \"monitor\" -i \"2000\" -t \"3000\" ", "service_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:monitor rc:ok op_status:Cancelled\" ", "lsb_reg_line" : "-c register_rsc -r lsb_test_rsc -t 3000 -C lsb -T lrmd_dummy_daemon", "lsb_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:lsb_test_rsc action:none rc:ok op_status:complete\" ", "lsb_unreg_line" : "-c unregister_rsc -r \"lsb_test_rsc\" -t 3000", "lsb_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:lsb_test_rsc action:none rc:ok op_status:complete\"", "lsb_start_line" : "-c exec -r \"lsb_test_rsc\" -a \"start\" -t 3000 ", "lsb_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:start rc:ok op_status:complete\" ", "lsb_stop_line" : "-c exec -r \"lsb_test_rsc\" -a \"stop\" -t 3000 ", "lsb_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:stop rc:ok op_status:complete\" ", "lsb_monitor_line" : "-c exec -r \"lsb_test_rsc\" -a status -i \"2000\" -t 3000", "lsb_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:status rc:ok op_status:complete\" -t 3000", "lsb_cancel_line" : "-c cancel -r \"lsb_test_rsc\" -a \"status\" -i \"2000\" -t \"3000\" ", "lsb_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:status rc:ok op_status:Cancelled\" ", "stonith_reg_line" : "-c register_rsc -r stonith_test_rsc -t 3000 -C stonith -P pacemaker -T fence_dummy_monitor", "stonith_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:stonith_test_rsc action:none rc:ok op_status:complete\" ", "stonith_unreg_line" : "-c unregister_rsc -r \"stonith_test_rsc\" -t 3000", "stonith_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:stonith_test_rsc action:none rc:ok op_status:complete\"", "stonith_start_line" : "-c exec -r \"stonith_test_rsc\" -a \"start\" -t 8000 ", "stonith_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:start rc:ok op_status:complete\" ", "stonith_stop_line" : "-c exec -r \"stonith_test_rsc\" -a \"stop\" -t 3000 ", "stonith_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:stop rc:ok op_status:complete\" ", "stonith_monitor_line" : "-c exec -r \"stonith_test_rsc\" -a \"monitor\" -i \"2000\" -t 3000", "stonith_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:ok op_status:complete\" -t 3000", "stonith_cancel_line" : "-c cancel -r \"stonith_test_rsc\" -a \"monitor\" -i \"2000\" -t \"3000\" ", "stonith_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:ok op_status:Cancelled\" ", } def new_test(self, name, description): test = Test(name, description, self.verbose, self.tls) self.tests.append(test) return test def setup_test_environment(self): self.cleanup_test_environment() ### Make fake systemd daemon and unit file ### dummy_daemon = "#!/bin/bash\nwhile true\ndo\nsleep 5\ndone" dummy_service_file = ("[Unit]\n" "Description=Dummy Resource\n" "[Service]\n" "Type=simple\n" "ExecStart=/usr/sbin/lrmd_dummy_daemon\n") dummy_upstart_job = (""" description "Dummy service for regression tests" exec dd if=/dev/random of=/dev/null """) dummy_fence_sleep_agent = ("""#!/usr/bin/python import sys import time def main(): for line in sys.stdin.readlines(): if line.count("monitor") > 0: time.sleep(30000) sys.exit(0) sys.exit(-1) if __name__ == "__main__": main() """) dummy_fence_agent = ("""#!/usr/bin/python import sys def main(): for line in sys.stdin.readlines(): if line.count("monitor") > 0: sys.exit(0) if line.count("metadata") > 0: print '' print ' dummy description.' print ' http://www.example.com' print ' ' print ' ' print ' ' print ' ' print ' Fencing Action' print ' ' print ' ' print ' ' print ' ' print ' Physical plug number or name of virtual machine' print ' ' print ' ' print ' ' print ' ' print ' ' print ' ' print ' ' print ' ' print '' sys.exit(0) sys.exit(-1) if __name__ == "__main__": main() """) os.system("cat <<-END >>/etc/init/lrmd_dummy_daemon.conf\n%s\nEND" % (dummy_upstart_job)) os.system("cat <<-END >>/usr/sbin/lrmd_dummy_daemon\n%s\nEND" % (dummy_daemon)) os.system("cat <<-END >>/lib/systemd/system/lrmd_dummy_daemon.service\n%s\nEND" % (dummy_service_file)) os.system("chmod u+x /usr/sbin/lrmd_dummy_daemon") os.system("cat <<-END >>/usr/sbin/fence_dummy_sleep\n%s\nEND" % (dummy_fence_sleep_agent)) os.system("chmod 711 /usr/sbin/fence_dummy_sleep") os.system("cat <<-END >>/usr/sbin/fence_dummy_monitor\n%s\nEND" % (dummy_fence_agent)) os.system("chmod 711 /usr/sbin/fence_dummy_monitor") os.system("cp /usr/share/pacemaker/tests/cts/LSBDummy /etc/init.d/lrmd_dummy_daemon") os.system("mkdir -p @CRM_CORE_DIR@/root") os.system("systemctl daemon-reload") def cleanup_test_environment(self): os.system("rm -f /lib/systemd/system/lrmd_dummy_daemon.service") os.system("rm -f /etc/init.d/lrmd_dummy_daemon") os.system("rm -f /usr/sbin/lrmd_dummy_daemon") os.system("rm -f /usr/sbin/fence_dummy_monitor") os.system("rm -f /usr/sbin/fence_dummy_sleep") os.system("systemctl daemon-reload") ### These are tests that should apply to all resource classes ### def build_generic_tests(self): common_cmds = self.common_cmds ### register/unregister tests ### for rsc in self.rsc_classes: test = self.new_test("generic_registration_%s" % (rsc), "Simple resource registration test for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) ### start/stop tests ### for rsc in self.rsc_classes: test = self.new_test("generic_start_stop_%s" % (rsc), "Simple start and stop test for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) ### monitor cancel test ### for rsc in self.rsc_classes: test = self.new_test("generic_monitor_cancel_%s" % (rsc), "Simple monitor cancel test for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_cancel_line" % (rsc)] + " " + common_cmds["%s_cancel_event" % (rsc)]) test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ### test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ### test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) ### stop implies cancel test ### for rsc in self.rsc_classes: test = self.new_test("generic_stop_implies_cancel_%s" % (rsc), "Verify stopping a resource implies cancel of recurring ops for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ### test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ### test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) ### These are complex tests that involve managing multiple resouces of different types ### def build_multi_rsc_tests(self): common_cmds = self.common_cmds # do not use service and systemd at the same time, it is the same resource. ### register start monitor stop unregister resources of each type at the same time. ### test = self.new_test("multi_rsc_start_stop_all", "Start, monitor, and stop resources of multiple types and classes") for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor is not being rescheduled #### for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_cancel_line" % (rsc)] + " " + common_cmds["%s_cancel_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) ### These are tests related to how the lrmd handles failures. ### def build_negative_tests(self): ### ocf start timeout test ### test = self.new_test("ocf_start_timeout", "Force start timeout to occur, verify start failure.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" -t 3000 " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" -k \"op_sleep\" -v \"3\" -t 3000 -w") test.add_cmd("-l " "\"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:unknown error op_status:Timed Out\" -t 3000") test.add_cmd("-c exec -r test_rsc -a stop -t 3000" "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ") test.add_cmd("-c unregister_rsc -r test_rsc -t 3000 " "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### stonith start timeout test ### test = self.new_test("stonith_start_timeout", "Force start timeout to occur, verify start failure.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"stonith\" -P \"pacemaker\" -T \"fence_dummy_sleep\" -t 3000 " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" -t 3000 -w") test.add_cmd("-l " "\"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:unknown error op_status:Timed Out\" -t 4000") test.add_cmd("-c exec -r test_rsc -a stop -t 3000" "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ") test.add_cmd("-c unregister_rsc -r test_rsc -t 3000 " "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### stonith component fail ### common_cmds = self.common_cmds test = self.new_test("stonith_component_fail", "Kill stonith component after lrmd connects") test.add_cmd(common_cmds["stonith_reg_line"] + " " + common_cmds["stonith_reg_event"]) test.add_cmd(common_cmds["stonith_start_line"] + " " + common_cmds["stonith_start_event"]) test.add_cmd("-c exec -r \"stonith_test_rsc\" -a \"monitor\" -i \"600000\" " "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:ok op_status:complete\" -t 3000") test.add_cmd_and_kill("killall -9 -q stonithd" ,"-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:unknown error op_status:error\" -t 15000") test.add_cmd(common_cmds["stonith_unreg_line"] + " " + common_cmds["stonith_unreg_event"]) ### monitor fail for ocf resources ### test = self.new_test("monitor_fail_ocf", "Force ocf monitor to fail, verify failure is reported.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" -t 3000 " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" -t 3000 " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" -t 3000 " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"monitor\" -i \"100\" -t 3000 " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" -t 3000") test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" -t 3000") test.add_cmd_and_kill("rm -f /var/run/Dummy-test_rsc.state", "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" -t 6000") test.add_cmd("-c cancel -r \"test_rsc\" -a \"monitor\" -i \"100\" -t \"3000\" " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" -t 3000") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" -t 3000") test.add_cmd("-c unregister_rsc -r \"test_rsc\" -t 3000 " "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### verify notify changes only for monitor operation. ### test = self.new_test("monitor_changes_only", "Verify when flag is set, only monitor changes are notified.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" -t 3000 " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" -t 3000 -o " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"monitor\" -i \"100\" -t 3000 -o " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" -t 3000") test.add_cmd_and_kill("rm -f /var/run/Dummy-test_rsc.state", "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" -t 6000") test.add_cmd("-c cancel -r \"test_rsc\" -a \"monitor\" -i \"100\" -t \"3000\" " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" -t 3000") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" -t 3000") test.add_cmd("-c unregister_rsc -r \"test_rsc\" -t 3000 " "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### monitor fail for systemd resource ### if "systemd" in self.rsc_classes: test = self.new_test("monitor_fail_systemd", "Force systemd monitor to fail, verify failure is reported..") test.add_cmd("-c register_rsc -r \"test_rsc\" -C systemd -T lrmd_dummy_daemon -t 3000 " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" -t 3000 " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" -t 3000 " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"monitor\" -i \"100\" -t 3000 " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" -t 3000") test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" -t 3000") test.add_cmd_and_kill("killall -9 -q lrmd_dummy_daemon", "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" -t 8000") test.add_cmd("-c cancel -r \"test_rsc\" -a \"monitor\" -i \"100\" -t \"3000\" " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" -t 3000") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" -t 3000") test.add_cmd("-c unregister_rsc -r \"test_rsc\" -t 3000 " "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### monitor fail for upstart resource ### if "upstart" in self.rsc_classes: test = self.new_test("monitor_fail_upstart", "Force upstart monitor to fail, verify failure is reported..") test.add_cmd("-c register_rsc -r \"test_rsc\" -C upstart -T lrmd_dummy_daemon -t 3000 " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" -t 3000 " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" -t 3000 " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"monitor\" -i \"100\" -t 3000 " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" -t 3000") test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" -t 3000") test.add_cmd_and_kill("killall -9 -q dd", "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" -t 8000") test.add_cmd("-c cancel -r \"test_rsc\" -a \"monitor\" -i \"100\" -t \"3000\" " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" -t 3000") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" -t 3000") test.add_cmd("-c unregister_rsc -r \"test_rsc\" -t 3000 " "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Cancel non-existent operation on a resource ### test = self.new_test("cancel_non_existent_op", "Attempt to cancel the wrong monitor operation, verify expected failure") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" -t 3000 " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" -t 3000 " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" -t 3000 " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"monitor\" -i \"100\" -t 3000 " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" -t 3000") test.add_expected_fail_cmd("-c cancel -r test_rsc -a \"monitor\" -i 1234 -t \"3000\" " ### interval is wrong, should fail "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_expected_fail_cmd("-c cancel -r test_rsc -a stop -i 100 -t \"3000\" " ### action name is wrong, should fail "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" -t 3000 " "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Attempt to invoke non-existent rsc id ### test = self.new_test("invoke_non_existent_rsc", "Attempt to perform operations on a non-existent rsc id.") test.add_expected_fail_cmd("-c exec -r \"test_rsc\" -a \"start\" -t 3000 " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:unknown error op_status:complete\" ") test.add_expected_fail_cmd("-c exec -r test_rsc -a stop -t 3000" "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ") test.add_expected_fail_cmd("-c exec -r test_rsc -a monitor -i 3000 -t 3000" "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") test.add_expected_fail_cmd("-c cancel -r test_rsc -a start -t 3000 " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Cancelled\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" -t 3000 " "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register and start a resource that doesn't exist, systemd ### if "systemd" in self.rsc_classes: test = self.new_test("start_uninstalled_systemd", "Register uninstalled systemd agent, try to start, verify expected failure") test.add_cmd("-c register_rsc -r \"test_rsc\" -C systemd -T this_is_fake1234 -t 3000 " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" -t 3000 " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:complete\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" -t 3000 " "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") if "upstart" in self.rsc_classes: test = self.new_test("start_uninstalled_upstart", "Register uninstalled upstart agent, try to start, verify expected failure") test.add_cmd("-c register_rsc -r \"test_rsc\" -C upstart -T this_is_fake1234 -t 3000 " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" -t 3000 " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:complete\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" -t 3000 " "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register and start a resource that doesn't exist, ocf ### test = self.new_test("start_uninstalled_ocf", "Register uninstalled ocf agent, try to start, verify expected failure.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C ocf -P pacemaker -T this_is_fake1234 -t 3000 " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" -t 3000 " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:complete\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" -t 3000 " "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register ocf with non-existent provider ### test = self.new_test("start_ocf_bad_provider", "Register ocf agent with a non-existent provider, verify expected failure.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C ocf -P pancakes -T Dummy -t 3000 " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" -t 3000 " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:complete\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" -t 3000 " "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register ocf with empty provider field ### test = self.new_test("start_ocf_no_provider", "Register ocf agent with a no provider, verify expected failure.") test.add_expected_fail_cmd("-c register_rsc -r \"test_rsc\" -C ocf -T Dummy -t 3000 " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_expected_fail_cmd("-c exec -r \"test_rsc\" -a \"start\" -t 3000 " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Error\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" -t 3000 " "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### These are tests that target specific cases ### def build_custom_tests(self): ### start delay then stop test ### test = self.new_test("start_delay", "Verify start delay works as expected.") test.add_cmd("-c register_rsc -r test_rsc -P pacemaker -C ocf -T Dummy " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" -t 3000") test.add_cmd("-c exec -r test_rsc -s 6000 -a start -w -t 6000") test.add_expected_fail_cmd("-l " "\"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" -t 2000") test.add_cmd("-l " "\"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" -t 6000") test.add_cmd("-c exec -r test_rsc -a stop -t 3000" "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ") test.add_cmd("-c unregister_rsc -r test_rsc -t 3000 " "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### start delay, but cancel before it gets a chance to start. ### test = self.new_test("start_delay_cancel", "Using start_delay, start a rsc, but cancel the start op before execution.") test.add_cmd("-c register_rsc -r test_rsc -P pacemaker -C ocf -T Dummy " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" -t 3000") test.add_cmd("-c exec -r test_rsc -s 5000 -a start -w -t 4000") test.add_cmd("-c cancel -r test_rsc -a start -t 3000 " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Cancelled\" ") test.add_expected_fail_cmd("-l " "\"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" -t 5000") test.add_cmd("-c unregister_rsc -r test_rsc -t 3000 " "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register a bunch of resources, verify we can get info on them ### test = self.new_test("verify_get_rsc_info", "Register multiple resources, verify retrieval of rsc info.") if "systemd" in self.rsc_classes: test.add_cmd("-c register_rsc -r rsc1 -C systemd -T lrmd_dummy_daemon -t 3000 ") test.add_cmd("-c get_rsc_info -r rsc1 ") test.add_cmd("-c unregister_rsc -r rsc1 -t 3000 ") test.add_expected_fail_cmd("-c get_rsc_info -r rsc1 ") if "upstart" in self.rsc_classes: test.add_cmd("-c register_rsc -r rsc1 -C upstart -T lrmd_dummy_daemon -t 3000 ") test.add_cmd("-c get_rsc_info -r rsc1 ") test.add_cmd("-c unregister_rsc -r rsc1 -t 3000 ") test.add_expected_fail_cmd("-c get_rsc_info -r rsc1 ") test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Dummy -P pacemaker -t 3000 ") test.add_cmd("-c get_rsc_info -r rsc2 ") test.add_cmd("-c unregister_rsc -r rsc2 -t 3000 ") test.add_expected_fail_cmd("-c get_rsc_info -r rsc2 ") ### Register duplicate, verify only one entry exists and can still be removed. test = self.new_test("duplicate_registration", "Register resource multiple times, verify only one entry exists and can be removed.") test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Dummy -P pacemaker -t 3000 ") test.add_cmd_check_stdout("-c get_rsc_info -r rsc2 ", "id:rsc2 class:ocf provider:pacemaker type:Dummy") test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Dummy -P pacemaker -t 3000 ") test.add_cmd_check_stdout("-c get_rsc_info -r rsc2 ", "id:rsc2 class:ocf provider:pacemaker type:Dummy") test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Stateful -P pacemaker -t 3000 ") test.add_cmd_check_stdout("-c get_rsc_info -r rsc2 ", "id:rsc2 class:ocf provider:pacemaker type:Stateful") test.add_cmd("-c unregister_rsc -r rsc2 -t 3000 ") test.add_expected_fail_cmd("-c get_rsc_info -r rsc2 ") ### verify the option to only send notification to the original client. ### test = self.new_test("notify_orig_client_only", "Verify option to only send notifications to the client originating the action.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" -t 3000 " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" -t 3000 " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"monitor\" -i \"100\" -t 3000 -n " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") # this will fail because the monitor notifications should only go to the original caller, which no longer exists. test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" -t 3000") test.add_cmd("-c cancel -r \"test_rsc\" -a \"monitor\" -i \"100\" -t \"3000\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" -t 3000 " "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### get metadata ### test = self.new_test("get_ocf_metadata", "Retrieve metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"ocf\" -P \"pacemaker\" -T \"Dummy\"" ,"resource-agent name=\"Dummy\"") test.add_cmd("-c metadata -C \"ocf\" -P \"pacemaker\" -T \"Stateful\"") test.add_expected_fail_cmd("-c metadata -P \"pacemaker\" -T \"Stateful\"") test.add_expected_fail_cmd("-c metadata -C \"ocf\" -P \"pacemaker\" -T \"fake_agent\"") ### get metadata ### test = self.new_test("get_lsb_metadata", "Retrieve metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"lsb\" -T \"lrmd_dummy_daemon\"" ,"resource-agent name=\"lrmd_dummy_daemon\"") ### get stonith metadata ### test = self.new_test("get_stonith_metadata", "Retrieve stonith metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"stonith\" -P \"pacemaker\" -T \"fence_dummy_monitor\"", "resource-agent name=\"fence_dummy_monitor\"") ### get metadata ### if "systemd" in self.rsc_classes: test = self.new_test("get_systemd_metadata", "Retrieve metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"systemd\" -T \"lrmd_dummy_daemon\"" ,"resource-agent name=\"lrmd_dummy_daemon\"") ### get metadata ### if "upstart" in self.rsc_classes: test = self.new_test("get_systemd_metadata", "Retrieve metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"systemd\" -T \"lrmd_dummy_daemon\"" ,"resource-agent name=\"lrmd_dummy_daemon\"") ### get ocf providers ### test = self.new_test("list_ocf_providers", "Retrieve list of available resource providers, verifies pacemaker is a provider.") test.add_cmd_check_stdout("-c list_ocf_providers ", "pacemaker") test.add_cmd_check_stdout("-c list_ocf_providers -T ping", "pacemaker") ### Verify agents only exist in their lists ### test = self.new_test("verify_agent_lists", "Verify the agent lists contain the right data.") test.add_cmd_check_stdout("-c list_agents ", "Stateful") ### ocf ### test.add_cmd_check_stdout("-c list_agents -C ocf", "Stateful") test.add_cmd_check_stdout("-c list_agents -C lsb", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents -C service", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents ", "lrmd_dummy_daemon") ### init.d ### test.add_cmd_check_stdout("-c list_agents -C lsb", "lrmd_dummy_daemon") test.add_cmd_check_stdout("-c list_agents -C service", "lrmd_dummy_daemon") test.add_cmd_check_stdout("-c list_agents -C ocf", "", "lrmd_dummy_daemon") ### should not exist test.add_cmd_check_stdout("-c list_agents -C ocf", "", "lrmd_dummy_daemon") ### should not exist test.add_cmd_check_stdout("-c list_agents -C lsb", "", "fence_dummy_monitor") ### should not exist test.add_cmd_check_stdout("-c list_agents -C service", "", "fence_dummy_monitor") ### should not exist test.add_cmd_check_stdout("-c list_agents -C ocf", "", "fence_dummy_monitor") ### should not exist if "systemd" in self.rsc_classes: test.add_cmd_check_stdout("-c list_agents ", "lrmd_dummy_daemon") ### systemd ### test.add_cmd_check_stdout("-c list_agents -C service", "lrmd_dummy_daemon") test.add_cmd_check_stdout("-c list_agents -C systemd", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents -C systemd", "lrmd_dummy_daemon") test.add_cmd_check_stdout("-c list_agents -C systemd", "", "fence_dummy_monitor") ### should not exist if "upstart" in self.rsc_classes: test.add_cmd_check_stdout("-c list_agents ", "lrmd_dummy_daemon") ### upstart ### test.add_cmd_check_stdout("-c list_agents -C service", "lrmd_dummy_daemon") test.add_cmd_check_stdout("-c list_agents -C upstart", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents -C upstart", "lrmd_dummy_daemon") test.add_cmd_check_stdout("-c list_agents -C upstart", "", "fence_dummy_monitor") ### should not exist if "stonith" in self.rsc_classes: test.add_cmd_check_stdout("-c list_agents -C stonith", "fence_dummy_monitor") ### stonith ### test.add_cmd_check_stdout("-c list_agents -C stonith", "", "lrmd_dummy_daemon") ### should not exist test.add_cmd_check_stdout("-c list_agents -C stonith", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents ", "fence_dummy_monitor") def print_list(self): print "\n==== %d TESTS FOUND ====" % (len(self.tests)) print "%35s - %s" % ("TEST NAME", "TEST DESCRIPTION") print "%35s - %s" % ("--------------------", "--------------------") for test in self.tests: print "%35s - %s" % (test.name, test.description) print "==== END OF LIST ====\n" def run_single(self, name): for test in self.tests: if test.name == name: test.run() break; def run_tests_matching(self, pattern): for test in self.tests: if test.name.count(pattern) != 0: test.run() def run_tests(self): for test in self.tests: test.run() def exit(self): for test in self.tests: if test.executed == 0: continue if test.get_exitcode() != 0: sys.exit(-1) sys.exit(0); def print_results(self): failures = 0; success = 0; print "\n\n======= FINAL RESULTS ==========" print "\n--- FAILURE RESULTS:" for test in self.tests: if test.executed == 0: continue if test.get_exitcode() != 0: failures = failures + 1 test.print_result(" ") else: success = success + 1 if failures == 0: print " None" print "\n--- TOTALS\n Pass:%d\n Fail:%d\n" % (success, failures) class TestOptions: def __init__(self): self.options = {} self.options['list-tests'] = 0 self.options['run-all'] = 1 self.options['run-only'] = "" self.options['run-only-pattern'] = "" self.options['verbose'] = 0 self.options['invalid-arg'] = "" self.options['show-usage'] = 0 self.options['tls-backend'] = 0 def build_options(self, argv): args = argv[1:] skip = 0 for i in range(0, len(args)): if skip: skip = 0 continue elif args[i] == "-h" or args[i] == "--help": self.options['show-usage'] = 1 elif args[i] == "-l" or args[i] == "--list-tests": self.options['list-tests'] = 1 elif args[i] == "-V" or args[i] == "--verbose": self.options['verbose'] = 1 elif args[i] == "-S" or args[i] == "--tls-backend": self.options['tls-backend'] = 1 elif args[i] == "-r" or args[i] == "--run-only": self.options['run-only'] = args[i+1] skip = 1 elif args[i] == "-p" or args[i] == "--run-only-pattern": self.options['run-only-pattern'] = args[i+1] skip = 1 def show_usage(self): print "usage: " + sys.argv[0] + " [options]" print "If no options are provided, all tests will run" print "Options:" print "\t [--help | -h] Show usage" print "\t [--list-tests | -l] Print out all registered tests." print "\t [--run-only | -r 'testname'] Run a specific test" print "\t [--verbose | -V] Verbose output" print "\t [--tls-backend | -S Use tls backend" print "\t [--run-only-pattern | -p 'string'] Run only tests containing the string value" print "\n\tExample: Run only the test 'start_top'" print "\t\t python ./regression.py --run-only start_stop" print "\n\tExample: Run only the tests with the string 'systemd' present in them" print "\t\t python ./regression.py --run-only-pattern systemd" def main(argv): o = TestOptions() o.build_options(argv) tests = Tests(o.options['verbose'], o.options['tls-backend']) tests.build_generic_tests() tests.build_multi_rsc_tests() tests.build_negative_tests() tests.build_custom_tests() tests.setup_test_environment() print "Starting ..." if o.options['list-tests']: tests.print_list() elif o.options['show-usage']: o.show_usage() elif o.options['run-only-pattern'] != "": tests.run_tests_matching(o.options['run-only-pattern']) tests.print_results() elif o.options['run-only'] != "": tests.run_single(o.options['run-only']) tests.print_results() else: tests.run_tests() tests.print_results() tests.cleanup_test_environment() tests.exit() if __name__=="__main__": main(sys.argv)