diff --git a/crmd/control.c b/crmd/control.c index 0b7c71398e..f79db1ebb5 100644 --- a/crmd/control.c +++ b/crmd/control.c @@ -1,869 +1,907 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include qb_ipcs_service_t *ipcs = NULL; extern gboolean crm_connect_corosync(crm_cluster_t * cluster); extern void crmd_ha_connection_destroy(gpointer user_data); void crm_shutdown(int nsig); gboolean crm_read_options(gpointer user_data); gboolean fsa_has_quorum = FALSE; crm_trigger_t *fsa_source = NULL; crm_trigger_t *config_read = NULL; /* A_HA_CONNECT */ void do_ha_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { gboolean registered = FALSE; static crm_cluster_t *cluster = NULL; if (cluster == NULL) { cluster = calloc(1, sizeof(crm_cluster_t)); } if (action & A_HA_DISCONNECT) { crm_cluster_disconnect(cluster); crm_info("Disconnected from the cluster"); set_bit(fsa_input_register, R_HA_DISCONNECTED); } if (action & A_HA_CONNECT) { crm_set_status_callback(&peer_update_callback); if (is_openais_cluster()) { #if SUPPORT_COROSYNC registered = crm_connect_corosync(cluster); #endif } else if (is_heartbeat_cluster()) { #if SUPPORT_HEARTBEAT cluster->destroy = crmd_ha_connection_destroy; cluster->hb_dispatch = crmd_ha_msg_callback; registered = crm_cluster_connect(cluster); fsa_cluster_conn = cluster->hb_conn; crm_trace("Be informed of Node Status changes"); if (registered && fsa_cluster_conn->llc_ops->set_nstatus_callback(fsa_cluster_conn, crmd_ha_status_callback, fsa_cluster_conn) != HA_OK) { crm_err("Cannot set nstatus callback: %s", fsa_cluster_conn->llc_ops->errmsg(fsa_cluster_conn)); registered = FALSE; } crm_trace("Be informed of CRM Client Status changes"); if (registered && fsa_cluster_conn->llc_ops->set_cstatus_callback(fsa_cluster_conn, crmd_client_status_callback, fsa_cluster_conn) != HA_OK) { crm_err("Cannot set cstatus callback: %s", fsa_cluster_conn->llc_ops->errmsg(fsa_cluster_conn)); registered = FALSE; } if (registered) { crm_trace("Requesting an initial dump of CRMD client_status"); fsa_cluster_conn->llc_ops->client_status(fsa_cluster_conn, NULL, CRM_SYSTEM_CRMD, -1); } #endif } fsa_our_uname = cluster->uname; fsa_our_uuid = cluster->uuid; if (registered == FALSE) { set_bit(fsa_input_register, R_HA_DISCONNECTED); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } populate_cib_nodes(node_update_none, __FUNCTION__); clear_bit(fsa_input_register, R_HA_DISCONNECTED); crm_info("Connected to the cluster"); } if (action & ~(A_HA_CONNECT | A_HA_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } } /* A_SHUTDOWN */ void do_shutdown(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { /* just in case */ set_bit(fsa_input_register, R_SHUTDOWN); if (is_heartbeat_cluster()) { if (is_set(fsa_input_register, pe_subsystem->flag_connected)) { crm_info("Terminating the %s", pe_subsystem->name); if (stop_subsystem(pe_subsystem, TRUE) == FALSE) { /* its gone... */ crm_err("Faking %s exit", pe_subsystem->name); clear_bit(fsa_input_register, pe_subsystem->flag_connected); } else { crm_info("Waiting for subsystems to exit"); crmd_fsa_stall(FALSE); } } crm_info("All subsystems stopped, continuing"); } if (stonith_api) { /* Prevent it from comming up again */ clear_bit(fsa_input_register, R_ST_REQUIRED); crm_info("Disconnecting STONITH..."); stonith_api->cmds->disconnect(stonith_api); } } /* A_SHUTDOWN_REQ */ void do_shutdown_req(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { xmlNode *msg = NULL; crm_info("Sending shutdown request to %s", crm_str(fsa_our_dc)); msg = create_request(CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); /* set_bit(fsa_input_register, R_STAYDOWN); */ if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } free_xml(msg); } extern crm_ipc_t *attrd_ipc; extern char *max_generation_from; extern xmlNode *max_generation_xml; extern GHashTable *resource_history; extern GHashTable *voted; extern GHashTable *reload_hash; void log_connected_client(gpointer key, gpointer value, gpointer user_data); void log_connected_client(gpointer key, gpointer value, gpointer user_data) { crm_client_t *client = value; crm_err("%s is still connected at exit", crm_client_name(client)); } int crmd_exit(int rc) { GListPtr gIter = NULL; crm_trace("Preparing to exit"); + + if(ipcs) { + crm_trace("Closing IPC server"); + mainloop_del_ipc_server(ipcs); + } + if (attrd_ipc) { + crm_trace("Closing attrd connection"); crm_ipc_close(attrd_ipc); crm_ipc_destroy(attrd_ipc); - } - if (crmd_mainloop) { - g_main_loop_quit(crmd_mainloop); - g_main_loop_unref(crmd_mainloop); + attrd_ipc = NULL; } #if SUPPORT_HEARTBEAT if (fsa_cluster_conn) { + crm_trace("Disconnecting heartbeat"); fsa_cluster_conn->llc_ops->delete(fsa_cluster_conn); fsa_cluster_conn = NULL; } #endif for (gIter = fsa_message_queue; gIter != NULL; gIter = gIter->next) { fsa_data_t *fsa_data = gIter->data; crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]", fsa_input2string(fsa_data->fsa_input), fsa_state2string(fsa_state), fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); delete_fsa_input(fsa_data); } g_list_free(fsa_message_queue); fsa_message_queue = NULL; - crm_client_cleanup(); - empty_uuid_cache(); - crm_peer_destroy(); clear_bit(fsa_input_register, R_MEMBERSHIP); - if (te_subsystem->client && te_subsystem->client->ipcs) { - crm_debug("Full destroy: TE"); - qb_ipcs_disconnect(te_subsystem->client->ipcs); - } - free(te_subsystem); - if (pe_subsystem->client && pe_subsystem->client->ipcs) { - crm_debug("Full destroy: PE"); + crm_trace("Disconnecting Policy Engine"); qb_ipcs_disconnect(pe_subsystem->client->ipcs); } free(pe_subsystem); free(cib_subsystem); + free(te_subsystem); if (reload_hash) { + crm_trace("Destroying reload cache with %d members", g_hash_table_size(reload_hash)); g_hash_table_destroy(reload_hash); } if (voted) { + crm_trace("Destroying voted cache with %d members", g_hash_table_size(voted)); g_hash_table_destroy(voted); } cib_delete(fsa_cib_conn); fsa_cib_conn = NULL; verify_stopped(fsa_state, LOG_WARNING); clear_bit(fsa_input_register, R_LRM_CONNECTED); lrm_state_destroy_all(); + mainloop_destroy_trigger(fsa_source); + mainloop_destroy_trigger(config_read); + mainloop_destroy_trigger(stonith_reconnect); + + if(stonith_api) { + crm_trace("Disconnecting fencing API"); + stonith_api->cmds->free(stonith_api); + } + + crm_client_cleanup(); + empty_uuid_cache(); + crm_peer_destroy(); + free(transition_timer); free(integration_timer); free(finalization_timer); free(election_trigger); free(election_timeout); free(shutdown_escalation_timer); free(wait_timer); free(recheck_timer); free(fsa_our_dc_version); free(fsa_our_uname); free(fsa_our_uuid); free(fsa_our_dc); + free(te_uuid); + + free(fsa_pe_ref); free(max_generation_from); free_xml(max_generation_xml); + mainloop_destroy_signal(SIGUSR1); + mainloop_destroy_signal(SIGTERM); + mainloop_destroy_signal(SIGTRAP); + mainloop_destroy_signal(SIGCHLD); + + if (crmd_mainloop) { + int lpc = 0; + GMainContext *ctx = g_main_loop_get_context(crmd_mainloop); + crm_trace("Draining mainloop %d %d", g_main_loop_is_running(crmd_mainloop), g_main_context_pending(ctx)); + while(g_main_context_pending(ctx) && lpc < 10) { + lpc++; + crm_trace("Iteration %d", lpc); + g_main_context_dispatch(ctx); + } + + crm_trace("Closing mainloop %d %d", g_main_loop_is_running(crmd_mainloop), g_main_context_pending(ctx)); + g_main_loop_quit(crmd_mainloop); + g_main_loop_unref(crmd_mainloop); + crmd_mainloop = NULL; + } + + crm_trace("Done"); return crm_exit(rc); } /* A_EXIT_0, A_EXIT_1 */ void do_exit(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { int exit_code = 0; int log_level = LOG_INFO; const char *exit_type = "gracefully"; if (action & A_EXIT_1) { exit_code = 1; log_level = LOG_ERR; exit_type = "forcefully"; } verify_stopped(cur_state, LOG_ERR); do_crm_log(log_level, "Performing %s - %s exiting the CRMd", fsa_action2string(action), exit_type); if (is_set(fsa_input_register, R_IN_RECOVERY)) { crm_err("Could not recover from internal error"); exit_code = 2; } if (is_set(fsa_input_register, R_STAYDOWN)) { crm_warn("Inhibiting respawn by Heartbeat"); exit_code = 100; } crm_info("[%s] stopped (%d)", crm_system_name, exit_code); delete_fsa_input(msg_data); crmd_exit(exit_code); } /* A_STARTUP */ void do_startup(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { int was_error = 0; int interval = 1; /* seconds between DC heartbeats */ crm_debug("Registering Signal Handlers"); mainloop_add_signal(SIGTERM, crm_shutdown); fsa_source = mainloop_add_trigger(G_PRIORITY_HIGH, crm_fsa_trigger, NULL); config_read = mainloop_add_trigger(G_PRIORITY_HIGH, crm_read_options, NULL); crm_debug("Creating CIB and LRM objects"); fsa_cib_conn = cib_new(); lrm_state_init_local(); /* set up the timers */ transition_timer = calloc(1, sizeof(fsa_timer_t)); integration_timer = calloc(1, sizeof(fsa_timer_t)); finalization_timer = calloc(1, sizeof(fsa_timer_t)); election_trigger = calloc(1, sizeof(fsa_timer_t)); election_timeout = calloc(1, sizeof(fsa_timer_t)); shutdown_escalation_timer = calloc(1, sizeof(fsa_timer_t)); wait_timer = calloc(1, sizeof(fsa_timer_t)); recheck_timer = calloc(1, sizeof(fsa_timer_t)); interval = interval * 1000; if (election_trigger != NULL) { election_trigger->source_id = 0; election_trigger->period_ms = -1; election_trigger->fsa_input = I_DC_TIMEOUT; election_trigger->callback = crm_timer_popped; election_trigger->repeat = FALSE; } else { was_error = TRUE; } if (election_timeout != NULL) { election_timeout->source_id = 0; election_timeout->period_ms = -1; election_timeout->fsa_input = I_ELECTION_DC; election_timeout->callback = crm_timer_popped; election_timeout->repeat = FALSE; } else { was_error = TRUE; } if (transition_timer != NULL) { transition_timer->source_id = 0; transition_timer->period_ms = -1; transition_timer->fsa_input = I_PE_CALC; transition_timer->callback = crm_timer_popped; transition_timer->repeat = FALSE; } else { was_error = TRUE; } if (integration_timer != NULL) { integration_timer->source_id = 0; integration_timer->period_ms = -1; integration_timer->fsa_input = I_INTEGRATED; integration_timer->callback = crm_timer_popped; integration_timer->repeat = FALSE; } else { was_error = TRUE; } if (finalization_timer != NULL) { finalization_timer->source_id = 0; finalization_timer->period_ms = -1; finalization_timer->fsa_input = I_FINALIZED; finalization_timer->callback = crm_timer_popped; finalization_timer->repeat = FALSE; /* for possible enabling... a bug in the join protocol left * a slave in S_PENDING while we think its in S_NOT_DC * * raising I_FINALIZED put us into a transition loop which is * never resolved. * in this loop we continually send probes which the node * NACK's because its in S_PENDING * * if we have nodes where heartbeat is active but the * CRM is not... then this will be handled in the * integration phase */ finalization_timer->fsa_input = I_ELECTION; } else { was_error = TRUE; } if (shutdown_escalation_timer != NULL) { shutdown_escalation_timer->source_id = 0; shutdown_escalation_timer->period_ms = -1; shutdown_escalation_timer->fsa_input = I_STOP; shutdown_escalation_timer->callback = crm_timer_popped; shutdown_escalation_timer->repeat = FALSE; } else { was_error = TRUE; } if (wait_timer != NULL) { wait_timer->source_id = 0; wait_timer->period_ms = 2000; wait_timer->fsa_input = I_NULL; wait_timer->callback = crm_timer_popped; wait_timer->repeat = FALSE; } else { was_error = TRUE; } if (recheck_timer != NULL) { recheck_timer->source_id = 0; recheck_timer->period_ms = -1; recheck_timer->fsa_input = I_PE_CALC; recheck_timer->callback = crm_timer_popped; recheck_timer->repeat = FALSE; } else { was_error = TRUE; } /* set up the sub systems */ cib_subsystem = calloc(1, sizeof(struct crm_subsystem_s)); te_subsystem = calloc(1, sizeof(struct crm_subsystem_s)); pe_subsystem = calloc(1, sizeof(struct crm_subsystem_s)); if (cib_subsystem != NULL) { cib_subsystem->pid = -1; cib_subsystem->name = CRM_SYSTEM_CIB; cib_subsystem->flag_connected = R_CIB_CONNECTED; cib_subsystem->flag_required = R_CIB_REQUIRED; } else { was_error = TRUE; } if (te_subsystem != NULL) { te_subsystem->pid = -1; te_subsystem->name = CRM_SYSTEM_TENGINE; te_subsystem->flag_connected = R_TE_CONNECTED; te_subsystem->flag_required = R_TE_REQUIRED; } else { was_error = TRUE; } if (pe_subsystem != NULL) { pe_subsystem->pid = -1; pe_subsystem->path = CRM_DAEMON_DIR; pe_subsystem->name = CRM_SYSTEM_PENGINE; pe_subsystem->command = CRM_DAEMON_DIR "/" CRM_SYSTEM_PENGINE; pe_subsystem->args = NULL; pe_subsystem->flag_connected = R_PE_CONNECTED; pe_subsystem->flag_required = R_PE_REQUIRED; } else { was_error = TRUE; } if (was_error == FALSE && is_heartbeat_cluster()) { if (start_subsystem(pe_subsystem) == FALSE) { was_error = TRUE; } } if (was_error) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } static int32_t crmd_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { crm_trace("Connection %p", c); if (crm_client_new(c, uid, gid) == NULL) { return -EIO; } return 0; } static void crmd_ipc_created(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); } static int32_t crmd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; crm_client_t *client = crm_client_get(c); xmlNode *msg = crm_ipcs_recv(client, data, size, &id, &flags); crm_trace("Invoked: %s", crm_client_name(client)); if (flags & crm_ipc_client_response) { crm_ipcs_send_ack(client, id, "ack", __FUNCTION__, __LINE__); } if (msg == NULL) { return 0; } #if ENABLE_ACL determine_request_user(client->user, msg, F_CRM_USER); #endif crm_trace("Processing msg from %s", crm_client_name(client)); crm_log_xml_trace(msg, "CRMd[inbound]"); crm_xml_add(msg, F_CRM_SYS_FROM, client->id); if (crmd_authorize_message(msg, client, NULL)) { route_message(C_IPC_MESSAGE, msg); } trigger_fsa(fsa_source); free_xml(msg); return 0; } static int32_t crmd_ipc_closed(qb_ipcs_connection_t * c) { crm_client_t *client = crm_client_get(c); struct crm_subsystem_s *the_subsystem = NULL; crm_trace("Connection %p", c); if (client->userdata == NULL) { crm_trace("Client hadn't registered with us yet"); } else if (strcasecmp(CRM_SYSTEM_PENGINE, client->userdata) == 0) { the_subsystem = pe_subsystem; } else if (strcasecmp(CRM_SYSTEM_TENGINE, client->userdata) == 0) { the_subsystem = te_subsystem; } else if (strcasecmp(CRM_SYSTEM_CIB, client->userdata) == 0) { the_subsystem = cib_subsystem; } if (the_subsystem != NULL) { the_subsystem->source = NULL; the_subsystem->client = NULL; crm_info("Received HUP from %s:[%d]", the_subsystem->name, the_subsystem->pid); } else { /* else that was a transient client */ crm_trace("Received HUP from transient client"); } crm_trace("Disconnecting client %s (%p)", crm_client_name(client), client); free(client->userdata); crm_client_destroy(client); trigger_fsa(fsa_source); return 0; } static void crmd_ipc_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); } /* A_STOP */ void do_stop(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { if (is_heartbeat_cluster()) { stop_subsystem(pe_subsystem, FALSE); } - mainloop_del_ipc_server(ipcs); + crm_trace("Closing IPC server"); + mainloop_del_ipc_server(ipcs); ipcs = NULL; register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); } /* A_STARTED */ void do_started(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { static struct qb_ipcs_service_handlers crmd_callbacks = { .connection_accept = crmd_ipc_accept, .connection_created = crmd_ipc_created, .msg_process = crmd_ipc_dispatch, .connection_closed = crmd_ipc_closed, .connection_destroyed = crmd_ipc_destroy }; if (cur_state != S_STARTING) { crm_err("Start cancelled... %s", fsa_state2string(cur_state)); return; } else if (is_set(fsa_input_register, R_MEMBERSHIP) == FALSE) { crm_info("Delaying start, no membership data (%.16llx)", R_MEMBERSHIP); crmd_fsa_stall(TRUE); return; } else if (is_set(fsa_input_register, R_LRM_CONNECTED) == FALSE) { crm_info("Delaying start, LRM not connected (%.16llx)", R_LRM_CONNECTED); crmd_fsa_stall(TRUE); return; } else if (is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) { crm_info("Delaying start, CIB not connected (%.16llx)", R_CIB_CONNECTED); crmd_fsa_stall(TRUE); return; } else if (is_set(fsa_input_register, R_READ_CONFIG) == FALSE) { crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG); crmd_fsa_stall(TRUE); return; } else if (is_set(fsa_input_register, R_PEER_DATA) == FALSE) { /* try reading from HA */ crm_info("Delaying start, No peer data (%.16llx)", R_PEER_DATA); #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { HA_Message *msg = NULL; crm_trace("Looking for a HA message"); msg = fsa_cluster_conn->llc_ops->readmsg(fsa_cluster_conn, 0); if (msg != NULL) { crm_trace("There was a HA message"); ha_msg_del(msg); } } #endif crmd_fsa_stall(TRUE); return; } crm_debug("Init server comms"); ipcs = crmd_ipc_server_init(&crmd_callbacks); if (ipcs == NULL) { crm_err("Failed to create IPC server: shutting down and inhibiting respawn"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } if (stonith_reconnect == NULL) { int dummy; stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW, te_connect_stonith, &dummy); } set_bit(fsa_input_register, R_ST_REQUIRED); mainloop_set_trigger(stonith_reconnect); crm_notice("The local CRM is operational"); clear_bit(fsa_input_register, R_STARTING); register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL); } /* A_RECOVER */ void do_recover(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { set_bit(fsa_input_register, R_IN_RECOVERY); crm_warn("Fast-tracking shutdown in response to errors"); register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); } /* *INDENT-OFF* */ pe_cluster_option crmd_opts[] = { /* name, old-name, validate, default, description */ { "dc-version", NULL, "string", NULL, "none", NULL, "Version of Pacemaker on the cluster's DC.", "Includes the hash which identifies the exact Mercurial changeset it was built from. Used for diagnostic purposes." }, { "cluster-infrastructure", NULL, "string", NULL, "heartbeat", NULL, "The messaging stack on which Pacemaker is currently running.", "Used for informational and diagnostic purposes." }, { XML_CONFIG_ATTR_DC_DEADTIME, "dc_deadtime", "time", NULL, "20s", &check_time, "How long to wait for a response from other nodes during startup.", "The \"correct\" value will depend on the speed/load of your network and the type of switches used." }, { XML_CONFIG_ATTR_RECHECK, "cluster_recheck_interval", "time", "Zero disables polling. Positive values are an interval in seconds (unless other SI units are specified. eg. 5min)", "15min", &check_timer, "Polling interval for time based changes to options, resource parameters and constraints.", "The Cluster is primarily event driven, however the configuration can have elements that change based on time." " To ensure these changes take effect, we can optionally poll the cluster's status for changes." }, { XML_CONFIG_ATTR_ELECTION_FAIL, "election_timeout", "time", NULL, "2min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." }, { XML_CONFIG_ATTR_FORCE_QUIT, "shutdown_escalation", "time", NULL, "20min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." }, { "crmd-integration-timeout", NULL, "time", NULL, "3min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." }, { "crmd-finalization-timeout", NULL, "time", NULL, "30min", &check_timer, "*** Advanced Use Only ***.", "If you need to adjust this value, it probably indicates the presence of a bug." }, { "crmd-transition-delay", NULL, "time", NULL, "0s", &check_timer, "*** Advanced Use Only ***\nEnabling this option will slow down cluster recovery under all conditions", "Delay cluster recovery for the configured interval to allow for additional/related events to occur.\nUseful if your configuration is sensitive to the order in which ping updates arrive." }, { XML_ATTR_EXPECTED_VOTES, NULL, "integer", NULL, "2", &check_number, "The number of nodes expected to be in the cluster", "Used to calculate quorum in openais based clusters." }, }; /* *INDENT-ON* */ void crmd_metadata(void) { config_metadata("CRM Daemon", "1.0", "CRM Daemon Options", "This is a fake resource that details the options that can be configured for the CRM Daemon.", crmd_opts, DIMOF(crmd_opts)); } static void verify_crmd_options(GHashTable * options) { verify_all_options(options, crmd_opts, DIMOF(crmd_opts)); } static const char * crmd_pref(GHashTable * options, const char *name) { return get_cluster_pref(options, crmd_opts, DIMOF(crmd_opts), name); } static void config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { const char *value = NULL; GHashTable *config_hash = NULL; crm_time_t *now = crm_time_new(NULL); if (rc != pcmk_ok) { fsa_data_t *msg_data = NULL; crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); if (rc == -EACCES || rc == -pcmk_err_dtd_validation) { crm_err("The cluster is mis-configured - shutting down and staying down"); set_bit(fsa_input_register, R_STAYDOWN); } goto bail; } crm_debug("Call %d : Parsing CIB options", call_id); config_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); unpack_instance_attributes(output, output, XML_CIB_TAG_PROPSET, NULL, config_hash, CIB_OPTIONS_FIRST, FALSE, now); verify_crmd_options(config_hash); value = crmd_pref(config_hash, XML_CONFIG_ATTR_DC_DEADTIME); election_trigger->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, XML_CONFIG_ATTR_FORCE_QUIT); shutdown_escalation_timer->period_ms = crm_get_msec(value); crm_debug("Shutdown escalation occurs after: %dms", shutdown_escalation_timer->period_ms); value = crmd_pref(config_hash, XML_CONFIG_ATTR_ELECTION_FAIL); election_timeout->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, XML_CONFIG_ATTR_RECHECK); recheck_timer->period_ms = crm_get_msec(value); crm_debug("Checking for expired actions every %dms", recheck_timer->period_ms); value = crmd_pref(config_hash, "crmd-transition-delay"); transition_timer->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, "crmd-integration-timeout"); integration_timer->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, "crmd-finalization-timeout"); finalization_timer->period_ms = crm_get_msec(value); #if SUPPORT_COROSYNC if (is_classic_ais_cluster()) { value = crmd_pref(config_hash, XML_ATTR_EXPECTED_VOTES); crm_debug("Sending expected-votes=%s to corosync", value); send_ais_text(crm_class_quorum, value, TRUE, NULL, crm_msg_ais); } #endif set_bit(fsa_input_register, R_READ_CONFIG); crm_trace("Triggering FSA: %s", __FUNCTION__); mainloop_set_trigger(fsa_source); g_hash_table_destroy(config_hash); bail: crm_time_free(now); } gboolean crm_read_options(gpointer user_data) { int call_id = fsa_cib_conn->cmds->query(fsa_cib_conn, XML_CIB_TAG_CRMCONFIG, NULL, cib_scope_local); fsa_register_cib_callback(call_id, FALSE, NULL, config_query_callback); crm_trace("Querying the CIB... call %d", call_id); return TRUE; } /* A_READCONFIG */ void do_read_config(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { mainloop_set_trigger(config_read); } void crm_shutdown(int nsig) { if (crmd_mainloop != NULL && g_main_is_running(crmd_mainloop)) { if (is_set(fsa_input_register, R_SHUTDOWN)) { crm_err("Escalating the shutdown"); register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL); } else { set_bit(fsa_input_register, R_SHUTDOWN); register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL); if (shutdown_escalation_timer->period_ms < 1) { const char *value = crmd_pref(NULL, XML_CONFIG_ATTR_FORCE_QUIT); int msec = crm_get_msec(value); crm_debug("Using default shutdown escalation: %dms", msec); shutdown_escalation_timer->period_ms = msec; } /* cant rely on this... */ crm_notice("Requesting shutdown, upper limit is %dms", shutdown_escalation_timer->period_ms); crm_timer_start(shutdown_escalation_timer); } } else { crm_info("exit from shutdown"); crmd_exit(EX_OK); } } diff --git a/crmd/lrm_state.c b/crmd/lrm_state.c index 6da64b6986..43c1adb288 100644 --- a/crmd/lrm_state.c +++ b/crmd/lrm_state.c @@ -1,657 +1,666 @@ /* * Copyright (C) 2012 David Vossel * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include GHashTable *lrm_state_table = NULL; GHashTable *proxy_table = NULL; int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg); void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg)); typedef struct remote_proxy_s { char *node_name; char *session_id; gboolean is_local; crm_ipc_t *ipc; mainloop_io_t *source; } remote_proxy_t; static void history_cache_destroy(gpointer data) { rsc_history_t *entry = data; if (entry->stop_params) { g_hash_table_destroy(entry->stop_params); } free(entry->rsc.type); free(entry->rsc.class); free(entry->rsc.provider); lrmd_free_event(entry->failed); lrmd_free_event(entry->last); free(entry->id); free(entry); } static void free_deletion_op(gpointer value) { struct pending_deletion_op_s *op = value; free(op->rsc); delete_ha_msg_input(op->input); free(op); } static void free_recurring_op(gpointer value) { struct recurring_op_s *op = (struct recurring_op_s *)value; free(op->rsc_id); free(op->op_type); free(op->op_key); free(op); } lrm_state_t * lrm_state_create(const char *node_name) { lrm_state_t *state = NULL; if (!node_name) { crm_err("No node name given for lrm state object"); return NULL; } state = calloc(1, sizeof(lrm_state_t)); if (!state) { return NULL; } state->node_name = strdup(node_name); state->deletion_ops = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, free_deletion_op); state->pending_ops = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, free_recurring_op); state->resource_history = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, history_cache_destroy); g_hash_table_insert(lrm_state_table, (char *)state->node_name, state); return state; } void lrm_state_destroy(const char *node_name) { g_hash_table_remove(lrm_state_table, node_name); } static gboolean remote_proxy_remove_by_node(gpointer key, gpointer value, gpointer user_data) { remote_proxy_t *proxy = value; const char *node_name = user_data; if (safe_str_eq(node_name, proxy->node_name)) { return TRUE; } return FALSE; } static void internal_lrm_state_destroy(gpointer data) { lrm_state_t *lrm_state = data; if (!lrm_state) { return; } + crm_trace("Destroying proxy table with %d members", g_hash_table_size(proxy_table)); g_hash_table_foreach_remove(proxy_table, remote_proxy_remove_by_node, (char *) lrm_state->node_name); remote_ra_cleanup(lrm_state); lrmd_api_delete(lrm_state->conn); if (lrm_state->resource_history) { + crm_trace("Destroying history op cache with %d members", g_hash_table_size(lrm_state->resource_history)); g_hash_table_destroy(lrm_state->resource_history); } if (lrm_state->deletion_ops) { + crm_trace("Destroying deletion op cache with %d members", g_hash_table_size(lrm_state->deletion_ops)); g_hash_table_destroy(lrm_state->deletion_ops); } if (lrm_state->pending_ops) { + crm_trace("Destroying pending op cache with %d members", g_hash_table_size(lrm_state->pending_ops)); g_hash_table_destroy(lrm_state->pending_ops); } free((char *)lrm_state->node_name); free(lrm_state); } void lrm_state_reset_tables(lrm_state_t * lrm_state) { if (lrm_state->resource_history) { g_hash_table_remove_all(lrm_state->resource_history); } if (lrm_state->deletion_ops) { g_hash_table_remove_all(lrm_state->deletion_ops); } if (lrm_state->pending_ops) { g_hash_table_remove_all(lrm_state->pending_ops); } } static void remote_proxy_free(gpointer data) { remote_proxy_t *proxy = data; crm_debug("Signing out of the IPC Service"); if (proxy->source != NULL) { mainloop_del_ipc_client(proxy->source); } free(proxy->node_name); free(proxy->session_id); } gboolean lrm_state_init_local(void) { if (lrm_state_table) { return TRUE; } lrm_state_table = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, internal_lrm_state_destroy); if (!lrm_state_table) { return FALSE; } proxy_table = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, remote_proxy_free); if (!proxy_table) { g_hash_table_destroy(lrm_state_table); return FALSE; } return TRUE; } void lrm_state_destroy_all(void) { if (lrm_state_table) { + crm_trace("Destroying state table with %d members", g_hash_table_size(lrm_state_table)); g_hash_table_destroy(lrm_state_table); } + if(proxy_table) { + crm_trace("Destroying proxy table with %d members", g_hash_table_size(proxy_table)); + g_hash_table_destroy(proxy_table); + } } lrm_state_t * lrm_state_find(const char *node_name) { if (!node_name) { return NULL; } return g_hash_table_lookup(lrm_state_table, node_name); } lrm_state_t * lrm_state_find_or_create(const char *node_name) { lrm_state_t *lrm_state; lrm_state = g_hash_table_lookup(lrm_state_table, node_name); if (!lrm_state) { lrm_state = lrm_state_create(node_name); } return lrm_state; } GList * lrm_state_get_list(void) { return g_hash_table_get_values(lrm_state_table); } void lrm_state_disconnect(lrm_state_t * lrm_state) { if (!lrm_state->conn) { return; } ((lrmd_t *) lrm_state->conn)->cmds->disconnect(lrm_state->conn); lrmd_api_delete(lrm_state->conn); lrm_state->conn = NULL; } int lrm_state_is_connected(lrm_state_t * lrm_state) { if (!lrm_state->conn) { return FALSE; } return ((lrmd_t *) lrm_state->conn)->cmds->is_connected(lrm_state->conn); } int lrm_state_poke_connection(lrm_state_t * lrm_state) { if (!lrm_state->conn) { return -1; } return ((lrmd_t *) lrm_state->conn)->cmds->poke_connection(lrm_state->conn); } int lrm_state_ipc_connect(lrm_state_t * lrm_state) { int ret; if (!lrm_state->conn) { lrm_state->conn = lrmd_api_new(); ((lrmd_t *) lrm_state->conn)->cmds->set_callback(lrm_state->conn, lrm_op_callback); } ret = ((lrmd_t *) lrm_state->conn)->cmds->connect(lrm_state->conn, CRM_SYSTEM_CRMD, NULL); if (ret != pcmk_ok) { lrm_state->num_lrm_register_fails++; } else { lrm_state->num_lrm_register_fails = 0; } return ret; } static void remote_proxy_notify_destroy(lrmd_t *lrmd, const char *session_id) { /* sending to the remote node that an ipc connection has been destroyed */ xmlNode *msg = create_xml_node(NULL, T_LRMD_IPC_PROXY); crm_xml_add(msg, F_LRMD_IPC_OP, "destroy"); crm_xml_add(msg, F_LRMD_IPC_SESSION, session_id); lrmd_internal_proxy_send(lrmd, msg); free_xml(msg); } static void remote_proxy_relay_event(lrmd_t *lrmd, const char *session_id, xmlNode *msg) { /* sending to the remote node an event msg. */ xmlNode *event = create_xml_node(NULL, T_LRMD_IPC_PROXY); crm_xml_add(event, F_LRMD_IPC_OP, "event"); crm_xml_add(event, F_LRMD_IPC_SESSION, session_id); add_message_xml(event, F_LRMD_IPC_MSG, msg); lrmd_internal_proxy_send(lrmd, event); free_xml(event); } static void remote_proxy_relay_response(lrmd_t *lrmd, const char *session_id, xmlNode *msg, int msg_id) { /* sending to the remote node a response msg. */ xmlNode *response = create_xml_node(NULL, T_LRMD_IPC_PROXY); crm_xml_add(response, F_LRMD_IPC_OP, "response"); crm_xml_add(response, F_LRMD_IPC_SESSION, session_id); crm_xml_add_int(response, F_LRMD_IPC_MSG_ID, msg_id); add_message_xml(response, F_LRMD_IPC_MSG, msg); lrmd_internal_proxy_send(lrmd, response); free_xml(response); } static int remote_proxy_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata) { xmlNode *xml = NULL; remote_proxy_t *proxy = userdata; lrm_state_t *lrm_state = lrm_state_find(proxy->node_name); if (lrm_state == NULL) { return 0; } xml = string2xml(buffer); if (xml == NULL) { crm_warn("Received a NULL msg from IPC service."); return 1; } remote_proxy_relay_event(lrm_state->conn, proxy->session_id, xml); free_xml(xml); return 1; } static void remote_proxy_disconnected(void *userdata) { remote_proxy_t *proxy = userdata; lrm_state_t *lrm_state = lrm_state_find(proxy->node_name); crm_trace("destroying %p", userdata); proxy->source = NULL; proxy->ipc = NULL; if (lrm_state && lrm_state->conn) { remote_proxy_notify_destroy(lrm_state->conn, proxy->session_id); } g_hash_table_remove(proxy_table, proxy->session_id); } static remote_proxy_t * remote_proxy_new(const char *node_name, const char *session_id, const char *channel) { static struct ipc_client_callbacks proxy_callbacks = { .dispatch = remote_proxy_dispatch_internal, .destroy = remote_proxy_disconnected }; remote_proxy_t *proxy = calloc(1, sizeof(remote_proxy_t)); proxy->node_name = strdup(node_name); proxy->session_id = strdup(session_id); if (safe_str_eq(channel, CRM_SYSTEM_CRMD)) { proxy->is_local = TRUE; } else { proxy->source = mainloop_add_ipc_client(channel, G_PRIORITY_LOW, 512 * 1024 /* 512k */ , proxy, &proxy_callbacks); proxy->ipc = mainloop_get_ipc_client(proxy->source); if (proxy->source == NULL) { remote_proxy_free(proxy); return NULL; } } g_hash_table_insert(proxy_table, proxy->session_id, proxy); return proxy; } gboolean crmd_is_proxy_session(const char *session) { return g_hash_table_lookup(proxy_table, session) ? TRUE : FALSE; } void crmd_proxy_send(const char *session, xmlNode *msg) { remote_proxy_t *proxy = g_hash_table_lookup(proxy_table, session); lrm_state_t *lrm_state = NULL; if (!proxy) { return; } lrm_state = lrm_state_find(proxy->node_name); if (lrm_state) { remote_proxy_relay_event(lrm_state->conn, session, msg); } } static void crmd_proxy_dispatch(const char *user, const char *session, xmlNode *msg) { #if ENABLE_ACL determine_request_user(user, msg, F_CRM_USER); #endif crm_log_xml_trace(msg, "CRMd-PROXY[inbound]"); crm_xml_add(msg, F_CRM_SYS_FROM, session); if (crmd_authorize_message(msg, NULL, session)) { route_message(C_IPC_MESSAGE, msg); } trigger_fsa(fsa_source); } static void remote_proxy_cb(lrmd_t *lrmd, void *userdata, xmlNode *msg) { lrm_state_t *lrm_state = userdata; xmlNode *op_reply = NULL; const char *op = crm_element_value(msg, F_LRMD_IPC_OP); const char *session = crm_element_value(msg, F_LRMD_IPC_SESSION); const char *user = crm_element_value(msg, F_LRMD_IPC_USER); int msg_id = 0; /* sessions are raw ipc connections to IPC, * all we do is proxy requests/responses exactly * like they are given to us at the ipc level. */ CRM_CHECK(op != NULL, return); CRM_CHECK(session != NULL, return); crm_element_value_int(msg, F_LRMD_IPC_MSG_ID, &msg_id); /* This is msg from remote ipc client going to real ipc server */ if (safe_str_eq(op, "new")) { const char *channel = crm_element_value(msg, F_LRMD_IPC_IPC_SERVER); CRM_CHECK(channel != NULL, return); if (remote_proxy_new(lrm_state->node_name, session, channel) == NULL) { remote_proxy_notify_destroy(lrmd, session); } crm_info("new remote proxy client established, session id %s", session); } else if (safe_str_eq(op, "destroy")) { g_hash_table_remove(proxy_table, session); } else if (safe_str_eq(op, "request")) { int flags = 0; xmlNode *request = get_message_xml(msg, F_LRMD_IPC_MSG); remote_proxy_t *proxy = g_hash_table_lookup(proxy_table, session); CRM_CHECK(request != NULL, return); if (proxy == NULL) { /* proxy connection no longer exists */ remote_proxy_notify_destroy(lrmd, session); return; } else if ((proxy->is_local == FALSE) && (crm_ipc_connected(proxy->ipc) == FALSE)) { g_hash_table_remove(proxy_table, session); return; } crm_element_value_int(msg, F_LRMD_IPC_MSG_FLAGS, &flags); if (proxy->is_local) { /* this is for the crmd, which we are, so don't try * and connect/send to ourselves over ipc. instead * do it directly. */ if (flags & crm_ipc_client_response) { op_reply = create_xml_node(NULL, "ack"); crm_xml_add(op_reply, "function", __FUNCTION__); crm_xml_add_int(op_reply, "line", __LINE__); } crmd_proxy_dispatch(user, session, request); } else { /* TODO make this async. */ crm_ipc_send(proxy->ipc, request, flags, 10000, &op_reply); } } if (op_reply) { remote_proxy_relay_response(lrmd, session, op_reply, msg_id); free_xml(op_reply); } } int lrm_state_remote_connect_async(lrm_state_t * lrm_state, const char *server, int port, int timeout_ms) { int ret; if (!lrm_state->conn) { lrm_state->conn = lrmd_remote_api_new(lrm_state->node_name, server, port); if (!lrm_state->conn) { return -1; } ((lrmd_t *) lrm_state->conn)->cmds->set_callback(lrm_state->conn, remote_lrm_op_callback); lrmd_internal_set_proxy_callback(lrm_state->conn, lrm_state, remote_proxy_cb); } crm_trace("initiating remote connection to %s at %d with timeout %d", server, port, timeout_ms); ret = ((lrmd_t *) lrm_state->conn)->cmds->connect_async(lrm_state->conn, lrm_state->node_name, timeout_ms); if (ret != pcmk_ok) { lrm_state->num_lrm_register_fails++; } else { lrm_state->num_lrm_register_fails = 0; } return ret; } int lrm_state_get_metadata(lrm_state_t * lrm_state, const char *class, const char *provider, const char *agent, char **output, enum lrmd_call_options options) { if (!lrm_state->conn) { return -ENOTCONN; } /* Optimize this... only retrieve metadata from local lrmd connection. Perhaps consider * caching result. */ return ((lrmd_t *) lrm_state->conn)->cmds->get_metadata(lrm_state->conn, class, provider, agent, output, options); } int lrm_state_cancel(lrm_state_t * lrm_state, const char *rsc_id, const char *action, int interval) { if (!lrm_state->conn) { return -ENOTCONN; } /* Optimize this, cancel requires a synced request/response to the server. * Figure out a way to make this async. */ if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) { return remote_ra_cancel(lrm_state, rsc_id, action, interval); } return ((lrmd_t *) lrm_state->conn)->cmds->cancel(lrm_state->conn, rsc_id, action, interval); } lrmd_rsc_info_t * lrm_state_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id, enum lrmd_call_options options) { if (!lrm_state->conn) { return NULL; } /* optimize this... this function is a synced round trip from client to daemon. * It should be possible to cache the resource info in the lrmd client to prevent this. */ if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) { return remote_ra_get_rsc_info(lrm_state, rsc_id); } return ((lrmd_t *) lrm_state->conn)->cmds->get_rsc_info(lrm_state->conn, rsc_id, options); } int lrm_state_exec(lrm_state_t * lrm_state, const char *rsc_id, const char *action, const char *userdata, int interval, /* ms */ int timeout, /* ms */ int start_delay, /* ms */ lrmd_key_value_t * params) { if (!lrm_state->conn) { lrmd_key_value_freeall(params); return -ENOTCONN; } if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) { return remote_ra_exec(lrm_state, rsc_id, action, userdata, interval, timeout, start_delay, params); } return ((lrmd_t *) lrm_state->conn)->cmds->exec(lrm_state->conn, rsc_id, action, userdata, interval, timeout, start_delay, lrmd_opt_notify_changes_only, params); } int lrm_state_register_rsc(lrm_state_t * lrm_state, const char *rsc_id, const char *class, const char *provider, const char *agent, enum lrmd_call_options options) { if (!lrm_state->conn) { return -ENOTCONN; } /* optimize this... this function is a synced round trip from client to daemon. * The crmd/lrm.c code path should be re-factored to allow the register of resources * to be performed async. The lrmd client api needs to make an async version * of register available. */ if (is_remote_lrmd_ra(agent, provider, NULL)) { return lrm_state_find_or_create(rsc_id) ? pcmk_ok : -1; } return ((lrmd_t *) lrm_state->conn)->cmds->register_rsc(lrm_state->conn, rsc_id, class, provider, agent, options); } int lrm_state_unregister_rsc(lrm_state_t * lrm_state, const char *rsc_id, enum lrmd_call_options options) { if (!lrm_state->conn) { return -ENOTCONN; } /* optimize this... this function is a synced round trip from client to daemon. * The crmd/lrm.c code path that uses this function should always treat it as an * async operation. The lrmd client api needs to make an async version unreg available. */ if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) { lrm_state_destroy(rsc_id); return pcmk_ok; } return ((lrmd_t *) lrm_state->conn)->cmds->unregister_rsc(lrm_state->conn, rsc_id, options); } diff --git a/lib/cluster/cluster.c b/lib/cluster/cluster.c index a52748d32b..9a048d1e2c 100644 --- a/lib/cluster/cluster.c +++ b/lib/cluster/cluster.c @@ -1,696 +1,700 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include CRM_TRACE_INIT_DATA(cluster); #if SUPPORT_HEARTBEAT void *hb_library = NULL; #endif static GHashTable *crm_uuid_cache = NULL; static GHashTable *crm_uname_cache = NULL; static char * get_heartbeat_uuid(uint32_t unused, const char *uname) { char *uuid_calc = NULL; #if SUPPORT_HEARTBEAT cl_uuid_t uuid_raw; const char *unknown = "00000000-0000-0000-0000-000000000000"; if (heartbeat_cluster == NULL) { crm_warn("No connection to heartbeat, using uuid=uname"); return NULL; } if (heartbeat_cluster->llc_ops->get_uuid_by_name(heartbeat_cluster, uname, &uuid_raw) == HA_FAIL) { crm_err("get_uuid_by_name() call failed for host %s", uname); free(uuid_calc); return NULL; } uuid_calc = calloc(1, 50); cl_uuid_unparse(&uuid_raw, uuid_calc); if (safe_str_eq(uuid_calc, unknown)) { crm_warn("Could not calculate UUID for %s", uname); free(uuid_calc); return NULL; } #endif return uuid_calc; } static gboolean uname_is_uuid(void) { static const char *uuid_pref = NULL; if (uuid_pref == NULL) { uuid_pref = getenv("PCMK_uname_is_uuid"); } if (uuid_pref == NULL) { /* true is legacy mode */ uuid_pref = "false"; } return crm_is_true(uuid_pref); } int get_corosync_id(int id, const char *uuid) { if (id == 0 && !uname_is_uuid() && is_corosync_cluster()) { id = crm_atoi(uuid, "0"); } return id; } char * get_corosync_uuid(uint32_t id, const char *uname) { if (!uname_is_uuid() && is_corosync_cluster()) { if (id <= 0) { /* Try the membership cache... */ crm_node_t *node = g_hash_table_lookup(crm_peer_cache, uname); if (node != NULL) { id = node->id; } } if (id > 0) { int len = 32; char *buffer = NULL; buffer = calloc(1, (len + 1)); if (buffer != NULL) { snprintf(buffer, len, "%u", id); } return buffer; } else { crm_warn("Node %s is not yet known by corosync", uname); } } else if (uname != NULL) { return strdup(uname); } return NULL; } void set_node_uuid(const char *uname, const char *uuid) { CRM_CHECK(uuid != NULL, return); CRM_CHECK(uname != NULL, return); if (crm_uuid_cache == NULL) { crm_uuid_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); } g_hash_table_insert(crm_uuid_cache, strdup(uname), strdup(uuid)); } const char * get_node_uuid(uint32_t id, const char *uname) { char *uuid = NULL; enum cluster_type_e type = get_cluster_type(); if (crm_uuid_cache == NULL) { crm_uuid_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); } /* avoid blocking heartbeat calls where possible */ if (uname) { uuid = g_hash_table_lookup(crm_uuid_cache, uname); } if (uuid != NULL) { return uuid; } switch (type) { case pcmk_cluster_corosync: uuid = get_corosync_uuid(id, uname); break; case pcmk_cluster_cman: case pcmk_cluster_classic_ais: if (uname) { uuid = strdup(uname); } break; case pcmk_cluster_heartbeat: uuid = get_heartbeat_uuid(id, uname); break; case pcmk_cluster_unknown: case pcmk_cluster_invalid: crm_err("Unsupported cluster type"); break; } if (uuid == NULL) { return NULL; } if (uname) { g_hash_table_insert(crm_uuid_cache, strdup(uname), uuid); return g_hash_table_lookup(crm_uuid_cache, uname); } /* Memory leak! */ CRM_LOG_ASSERT(uuid != NULL); return uuid; } gboolean crm_cluster_connect(crm_cluster_t * cluster) { enum cluster_type_e type = get_cluster_type(); crm_notice("Connecting to cluster infrastructure: %s", name_for_cluster_type(type)); #if SUPPORT_COROSYNC if (is_openais_cluster()) { crm_peer_init(); return init_cs_connection(cluster); } #endif #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { int rv; /* coverity[var_deref_op] False positive */ if (cluster->hb_conn == NULL) { /* No object passed in, create a new one. */ ll_cluster_t *(*new_cluster) (const char *llctype) = find_library_function(&hb_library, HEARTBEAT_LIBRARY, "ll_cluster_new", 1); cluster->hb_conn = (*new_cluster) ("heartbeat"); /* dlclose(handle); */ } else { /* Object passed in. Disconnect first, then reconnect below. */ cluster->hb_conn->llc_ops->signoff(cluster->hb_conn, FALSE); } /* make sure we are disconnected first with the old object, if any. */ if (heartbeat_cluster && heartbeat_cluster != cluster->hb_conn) { heartbeat_cluster->llc_ops->signoff(heartbeat_cluster, FALSE); } CRM_ASSERT(cluster->hb_conn != NULL); heartbeat_cluster = cluster->hb_conn; rv = register_heartbeat_conn(cluster); if (rv) { /* we'll benefit from a bigger queue length on heartbeat side. * Otherwise, if peers send messages faster than we can consume * them right now, heartbeat messaging layer will kick us out once * it's (small) default queue fills up :( * If we fail to adjust the sendq length, that's not yet fatal, though. */ if (HA_OK != heartbeat_cluster->llc_ops->set_sendq_len(heartbeat_cluster, 1024)) { crm_warn("Cannot set sendq length: %s", heartbeat_cluster->llc_ops->errmsg(heartbeat_cluster)); } } return rv; } #endif crm_info("Unsupported cluster stack: %s", getenv("HA_cluster_type")); return FALSE; } void crm_cluster_disconnect(crm_cluster_t * cluster) { enum cluster_type_e type = get_cluster_type(); const char *type_str = name_for_cluster_type(type); crm_info("Disconnecting from cluster infrastructure: %s", type_str); #if SUPPORT_COROSYNC if (is_openais_cluster()) { crm_peer_destroy(); terminate_cs_connection(); crm_info("Disconnected from %s", type_str); return; } #endif #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { if (cluster == NULL) { crm_info("No cluster connection"); return; } else if (cluster->hb_conn) { cluster->hb_conn->llc_ops->signoff(cluster->hb_conn, FALSE); cluster->hb_conn = NULL; crm_info("Disconnected from %s", type_str); return; } else { crm_info("No %s connection", type_str); return; } } #endif crm_info("Unsupported cluster stack: %s", getenv("HA_cluster_type")); } gboolean send_cluster_message(crm_node_t * node, enum crm_ais_msg_types service, xmlNode * data, gboolean ordered) { #if SUPPORT_COROSYNC if (is_openais_cluster()) { return send_ais_message(data, FALSE, node, service); } #endif #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { return send_ha_message(heartbeat_cluster, data, node ? node->uname : NULL, ordered); } #endif return FALSE; } void empty_uuid_cache(void) { if (crm_uuid_cache != NULL) { g_hash_table_destroy(crm_uuid_cache); crm_uuid_cache = NULL; } + if (crm_uname_cache != NULL) { + g_hash_table_destroy(crm_uname_cache); + crm_uname_cache = NULL; + } } void unget_uuid(const char *uname) { if (crm_uuid_cache == NULL) { return; } g_hash_table_remove(crm_uuid_cache, uname); } const char * get_uuid(const char *uname) { return get_node_uuid(0, uname); } char * get_local_node_name(void) { char *name = NULL; enum cluster_type_e stack = get_cluster_type(); switch (stack) { #if SUPPORT_CMAN case pcmk_cluster_cman: name = cman_node_name(0 /* AKA. CMAN_NODEID_US */ ); break; #endif #if SUPPORT_COROSYNC # if !SUPPORT_PLUGIN case pcmk_cluster_corosync: name = corosync_node_name(0, 0); break; # endif #endif case pcmk_cluster_heartbeat: case pcmk_cluster_classic_ais: break; default: crm_err("Unknown cluster type: %s (%d)", name_for_cluster_type(stack), stack); } if (name == NULL) { struct utsname res; int rc = uname(&res); if (rc == 0) { crm_notice("Defaulting to uname -n for the local %s node name", name_for_cluster_type(stack)); name = strdup(res.nodename); } } if (name == NULL) { crm_err("Could not obtain the local %s node name", name_for_cluster_type(stack)); crm_exit(100); } return name; } char * get_node_name(uint32_t nodeid) { char *name = NULL; enum cluster_type_e stack = get_cluster_type(); switch (stack) { case pcmk_cluster_heartbeat: break; #if SUPPORT_PLUGIN case pcmk_cluster_classic_ais: name = classic_node_name(nodeid); break; #else # if SUPPORT_COROSYNC case pcmk_cluster_corosync: name = corosync_node_name(0, nodeid); break; # endif #endif #if SUPPORT_CMAN case pcmk_cluster_cman: name = cman_node_name(nodeid); break; #endif default: crm_err("Unknown cluster type: %s (%d)", name_for_cluster_type(stack), stack); } if (name == NULL) { crm_notice("Could not obtain a node name for %s nodeid %u", name_for_cluster_type(stack), nodeid); } return name; } /* Only used by update_failcount() in te_utils.c */ const char * get_uname(const char *uuid) { char *uname = NULL; if (crm_uname_cache == NULL) { crm_uname_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); } CRM_CHECK(uuid != NULL, return NULL); /* avoid blocking calls where possible */ uname = g_hash_table_lookup(crm_uname_cache, uuid); if (uname != NULL) { crm_trace("%s = %s (cached)", uuid, uname); return uname; } #if SUPPORT_COROSYNC if (is_openais_cluster()) { if (!uname_is_uuid() && is_corosync_cluster()) { uint32_t id = crm_int_helper(uuid, NULL); crm_node_t *node = g_hash_table_lookup(crm_peer_id_cache, GUINT_TO_POINTER(id)); if (node && node->uname) { uname = strdup(node->uname); } } else { uname = strdup(uuid); } } #endif #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { if (heartbeat_cluster != NULL) { cl_uuid_t uuid_raw; char *uuid_copy = strdup(uuid); cl_uuid_parse(uuid_copy, &uuid_raw); uname = malloc(MAX_NAME); if (heartbeat_cluster->llc_ops->get_name_by_uuid(heartbeat_cluster, &uuid_raw, uname, MAX_NAME) == HA_FAIL) { crm_err("Could not calculate uname for %s", uuid); free(uuid_copy); free(uname); uname = NULL; } } } #endif if (uname) { crm_trace("Storing %s = %s", uuid, uname); g_hash_table_insert(crm_uname_cache, strdup(uuid), uname); } return uname; } void set_uuid(xmlNode * node, const char *attr, const char *uname) { const char *uuid_calc = get_uuid(uname); crm_xml_add(node, attr, uuid_calc); return; } const char * name_for_cluster_type(enum cluster_type_e type) { switch (type) { case pcmk_cluster_classic_ais: return "classic openais (with plugin)"; case pcmk_cluster_cman: return "cman"; case pcmk_cluster_corosync: return "corosync"; case pcmk_cluster_heartbeat: return "heartbeat"; case pcmk_cluster_unknown: return "unknown"; case pcmk_cluster_invalid: return "invalid"; } crm_err("Invalid cluster type: %d", type); return "invalid"; } /* Do not expose these two */ int set_cluster_type(enum cluster_type_e type); static enum cluster_type_e cluster_type = pcmk_cluster_unknown; int set_cluster_type(enum cluster_type_e type) { if (cluster_type == pcmk_cluster_unknown) { crm_info("Cluster type set to: %s", name_for_cluster_type(type)); cluster_type = type; return 0; } else if (cluster_type == type) { return 0; } else if (pcmk_cluster_unknown == type) { cluster_type = type; return 0; } crm_err("Cluster type already set to %s, ignoring %s", name_for_cluster_type(cluster_type), name_for_cluster_type(type)); return -1; } enum cluster_type_e get_cluster_type(void) { bool detected = FALSE; const char *cluster = NULL; /* Return the previous calculation, if any */ if (cluster_type != pcmk_cluster_unknown) { return cluster_type; } cluster = getenv("HA_cluster_type"); #if SUPPORT_HEARTBEAT /* If nothing is defined in the environment, try heartbeat (if supported) */ if(cluster == NULL) { ll_cluster_t *hb; ll_cluster_t *(*new_cluster) (const char *llctype) = find_library_function( &hb_library, HEARTBEAT_LIBRARY, "ll_cluster_new", 1); hb = (*new_cluster) ("heartbeat"); crm_debug("Testing with Heartbeat"); if (hb->llc_ops->signon(hb, crm_system_name) == HA_OK) { hb->llc_ops->signoff(hb, FALSE); cluster_type = pcmk_cluster_heartbeat; detected = TRUE; goto done; } } #endif #if SUPPORT_COROSYNC /* If nothing is defined in the environment, try corosync (if supported) */ if(cluster == NULL) { crm_debug("Testing with Corosync"); cluster_type = find_corosync_variant(); if (cluster_type != pcmk_cluster_unknown) { detected = TRUE; goto done; } } #endif /* Something was defined in the environment, test it against what we support */ crm_info("Verifying cluster type: '%s'", cluster?cluster:"-unspecified-"); if (cluster == NULL) { #if SUPPORT_HEARTBEAT } else if (safe_str_eq(cluster, "heartbeat")) { cluster_type = pcmk_cluster_heartbeat; #endif #if SUPPORT_COROSYNC } else if (safe_str_eq(cluster, "openais") || safe_str_eq(cluster, "classic openais (with plugin)")) { cluster_type = pcmk_cluster_classic_ais; } else if (safe_str_eq(cluster, "corosync")) { cluster_type = pcmk_cluster_corosync; #endif #if SUPPORT_CMAN } else if (safe_str_eq(cluster, "cman")) { cluster_type = pcmk_cluster_cman; #endif } else { cluster_type = pcmk_cluster_invalid; goto done; /* Keep the compiler happy when no stacks are supported */ } done: if (cluster_type == pcmk_cluster_unknown) { crm_notice("Could not determin the current cluster type"); } else if (cluster_type == pcmk_cluster_invalid) { crm_notice("This installation does not support the '%s' cluster infrastructure: terminating.", cluster); crm_exit(100); } else { crm_info("%s an active '%s' cluster", detected?"Detected":"Assuming", name_for_cluster_type(cluster_type)); } return cluster_type; } gboolean is_cman_cluster(void) { return get_cluster_type() == pcmk_cluster_cman; } gboolean is_corosync_cluster(void) { return get_cluster_type() == pcmk_cluster_corosync; } gboolean is_classic_ais_cluster(void) { return get_cluster_type() == pcmk_cluster_classic_ais; } gboolean is_openais_cluster(void) { enum cluster_type_e type = get_cluster_type(); if (type == pcmk_cluster_classic_ais) { return TRUE; } else if (type == pcmk_cluster_corosync) { return TRUE; } else if (type == pcmk_cluster_cman) { return TRUE; } return FALSE; } gboolean is_heartbeat_cluster(void) { return get_cluster_type() == pcmk_cluster_heartbeat; } gboolean node_name_is_valid(const char *key, const char *name) { int octet; if (name == NULL) { crm_trace("%s is empty", key); return FALSE; } else if (sscanf(name, "%d.%d.%d.%d", &octet, &octet, &octet, &octet) == 4) { crm_trace("%s contains an ipv4 address, ignoring: %s", key, name); return FALSE; } else if (strstr(name, ":") != NULL) { crm_trace("%s contains an ipv6 address, ignoring: %s", key, name); return FALSE; } crm_trace("%s is valid", key); return TRUE; } diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c index 585459bbd1..68be6bdfb3 100644 --- a/lib/cluster/membership.c +++ b/lib/cluster/membership.c @@ -1,481 +1,483 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include #include GHashTable *crm_peer_id_cache = NULL; GHashTable *crm_peer_cache = NULL; unsigned long long crm_peer_seq = 0; gboolean crm_have_quorum = FALSE; gboolean crm_is_peer_active(const crm_node_t * node) { #if SUPPORT_COROSYNC if (is_openais_cluster()) { return crm_is_corosync_peer_active(node); } #endif #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { return crm_is_heartbeat_peer_active(node); } #endif crm_err("Unhandled cluster type: %s", name_for_cluster_type(get_cluster_type())); return FALSE; } static gboolean crm_reap_dead_member(gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; crm_node_t *search = user_data; if (search == NULL) { return FALSE; } else if (search->id && node->id != search->id) { return FALSE; } else if (search->id == 0 && safe_str_neq(node->uname, search->uname)) { return FALSE; } else if (crm_is_peer_active(value) == FALSE) { crm_notice("Removing %s/%u from the membership list", node->uname, node->id); return TRUE; } return FALSE; } guint reap_crm_member(uint32_t id, const char *name) { int matches = 0; crm_node_t *node = NULL; if (crm_peer_cache == NULL || crm_peer_id_cache == NULL) { crm_trace("Nothing to do, cache not initialized"); return 0; } if (name) { node = g_hash_table_lookup(crm_peer_cache, name); } if (node == NULL && id > 0) { node = g_hash_table_lookup(crm_peer_id_cache, GUINT_TO_POINTER(id)); } if (node == NULL) { crm_info("Peer %u/%s cannot be purged: does not exist", id, name); return 0; } if (crm_is_peer_active(node)) { crm_warn("Peer %u/%s cannot be purged: still active", id, name); } else { if (g_hash_table_remove(crm_peer_id_cache, GUINT_TO_POINTER(id))) { crm_notice("Purged dead peer %u/%s from the uuid cache", id, name); } else if (id) { crm_warn("Peer %u/%s was not found in the ID cache", id, name); } matches = g_hash_table_foreach_remove(crm_peer_cache, crm_reap_dead_member, node); crm_notice("Purged %d dead peers with id=%u from the membership cache", matches, id); } return matches; } static void crm_count_peer(gpointer key, gpointer value, gpointer user_data) { guint *count = user_data; crm_node_t *node = value; if (crm_is_peer_active(node)) { *count = *count + 1; } } guint crm_active_peers(void) { guint count = 0; if (crm_peer_cache) { g_hash_table_foreach(crm_peer_cache, crm_count_peer, &count); } return count; } void destroy_crm_node(gpointer data) { crm_node_t *node = data; crm_trace("Destroying entry for node %u", node->id); free(node->addr); free(node->uname); free(node->state); free(node->uuid); free(node->expected); free(node); } static gboolean crm_strcase_equal (gconstpointer v1, gconstpointer v2) { const gchar *string1 = v1; const gchar *string2 = v2; return strcasecmp (string1, string2) == 0; } void crm_peer_init(void) { static gboolean initialized = FALSE; if (initialized) { return; } initialized = TRUE; crm_peer_destroy(); if (crm_peer_cache == NULL) { crm_peer_cache = g_hash_table_new_full(crm_str_hash, crm_strcase_equal, NULL, destroy_crm_node); } if (crm_peer_id_cache == NULL) { crm_peer_id_cache = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL); } } void crm_peer_destroy(void) { if (crm_peer_cache != NULL) { + crm_trace("Destroying peer cache with %d members", g_hash_table_size(crm_peer_cache)); g_hash_table_destroy(crm_peer_cache); crm_peer_cache = NULL; } if (crm_peer_id_cache != NULL) { + crm_trace("Destroying peer ID cache with %d members", g_hash_table_size(crm_peer_id_cache)); g_hash_table_destroy(crm_peer_id_cache); crm_peer_id_cache = NULL; } } void (*crm_status_callback) (enum crm_status_type, crm_node_t *, const void *) = NULL; void crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *)) { crm_status_callback = dispatch; } /* coverity[-alloc] Memory is referenced in one or both hashtables */ crm_node_t * crm_get_peer(unsigned int id, const char *uname) { crm_node_t *node = NULL; CRM_ASSERT(id > 0 || uname != NULL); crm_peer_init(); if (node == NULL && uname != NULL) { node = g_hash_table_lookup(crm_peer_cache, uname); } if (node == NULL && id > 0) { node = g_hash_table_lookup(crm_peer_id_cache, GUINT_TO_POINTER(id)); if (node && node->uname && uname) { crm_crit("Node %s and %s share the same cluster node id '%u'!", node->uname, uname, id); /* NOTE: Calling crm_new_peer() means the entry in * crm_peer_id_cache will point to the new entity * * TO-DO: Replace the old uname instead? */ node = NULL; } } if (node == NULL) { crm_debug("Creating entry for node %s/%u", uname, id); node = calloc(1, sizeof(crm_node_t)); CRM_ASSERT(node); } if (id > 0 && node->id != id) { unsigned int previous_id = node->id; crm_node_t *old = g_hash_table_lookup(crm_peer_id_cache, GUINT_TO_POINTER(id)); node->id = id; crm_info("Node %s now has id: %u", crm_str(uname), id); if (old && old->state) { /* Only corosync based clusters use nodeid's * The functions that call crm_update_peer_state() only know nodeid so 'old' is authorative when merging * Same for crm_update_peer_proc() */ crm_update_peer_state(__FUNCTION__, node, old->state, 0); crm_update_peer_proc(__FUNCTION__, node, old->processes, NULL); } if (previous_id > 0) { crm_node_t *node_entry = g_hash_table_lookup(crm_peer_id_cache, GUINT_TO_POINTER(previous_id)); if (node_entry != NULL && node_entry == node) { g_hash_table_steal(crm_peer_id_cache, GUINT_TO_POINTER(previous_id)); } } g_hash_table_replace(crm_peer_id_cache, GUINT_TO_POINTER(node->id), node); } if (uname && node->uname == NULL) { node->uname = strdup(uname); if (node->id) { crm_info("Node %u is now known as %s", node->id, uname); } g_hash_table_replace(crm_peer_cache, node->uname, node); if (crm_status_callback) { crm_status_callback(crm_status_uname, node, NULL); } } if (node && node->uname && node->uuid == NULL) { const char *uuid = get_node_uuid(id, node->uname); if (uuid) { node->uuid = strdup(uuid); crm_info("Node %u has uuid %s", id, node->uuid); } else { crm_warn("Cannot obtain a UUID for node %d/%s", id, node->uname); } } return node; } crm_node_t * crm_update_peer(const char *source, unsigned int id, uint64_t born, uint64_t seen, int32_t votes, uint32_t children, const char *uuid, const char *uname, const char *addr, const char *state) { #if SUPPORT_PLUGIN gboolean addr_changed = FALSE; gboolean votes_changed = FALSE; #endif crm_node_t *node = NULL; id = get_corosync_id(id, uuid); node = crm_get_peer(id, uname); CRM_ASSERT(node != NULL); if (node->uuid == NULL) { if (is_openais_cluster()) { /* Yes, overrule whatever was passed in */ node->uuid = get_corosync_uuid(id, uname); } else if (uuid != NULL) { node->uuid = strdup(uuid); } } if (children > 0) { crm_update_peer_proc(source, node, children, state); } if (state != NULL) { crm_update_peer_state(source, node, state, seen); } #if SUPPORT_HEARTBEAT if (born != 0) { node->born = born; } #endif #if SUPPORT_PLUGIN /* These were only used by the plugin */ if (born != 0) { node->born = born; } if (votes > 0 && node->votes != votes) { votes_changed = TRUE; node->votes = votes; } if (addr != NULL) { if (node->addr == NULL || crm_str_eq(node->addr, addr, FALSE) == FALSE) { addr_changed = TRUE; free(node->addr); node->addr = strdup(addr); } } if (addr_changed || votes_changed) { crm_info("%s: Node %s: id=%u state=%s addr=%s%s votes=%d%s born=" U64T " seen=" U64T " proc=%.32x", source, node->uname, node->id, node->state, node->addr, addr_changed ? " (new)" : "", node->votes, votes_changed ? " (new)" : "", node->born, node->last_seen, node->processes); } #endif return node; } void crm_update_peer_proc(const char *source, crm_node_t * node, uint32_t flag, const char *status) { uint32_t last = 0; gboolean changed = FALSE; CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL", source, peer2text(flag), status); return); last = node->processes; if (status == NULL) { node->processes = flag; if (node->processes != last) { changed = TRUE; } } else if (safe_str_eq(status, ONLINESTATUS)) { if ((node->processes & flag) == 0) { set_bit(node->processes, flag); changed = TRUE; } #if SUPPORT_PLUGIN } else if (safe_str_eq(status, CRM_NODE_MEMBER)) { if (flag > 0 && node->processes != flag) { node->processes = flag; changed = TRUE; } #endif } else if (node->processes & flag) { clear_bit(node->processes, flag); changed = TRUE; } if (changed) { if (status == NULL && flag <= crm_proc_none) { crm_info("%s: Node %s[%u] - all processes are now offline", source, node->uname, node->id); } else { crm_info("%s: Node %s[%u] - %s is now %s", source, node->uname, node->id, peer2text(flag), status); } if (crm_status_callback) { crm_status_callback(crm_status_processes, node, &last); } } else { crm_trace("%s: Node %s[%u] - %s is unchanged (%s)", source, node->uname, node->id, peer2text(flag), status); } } void crm_update_peer_expected(const char *source, crm_node_t * node, const char *expected) { char *last = NULL; gboolean changed = FALSE; CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected); return); last = node->expected; if (expected != NULL && safe_str_neq(node->expected, expected)) { node->expected = strdup(expected); changed = TRUE; } if (changed) { crm_info("%s: Node %s[%u] - expected state is now %s", source, node->uname, node->id, expected); free(last); } else { crm_trace("%s: Node %s[%u] - expected state is unchanged (%s)", source, node->uname, node->id, expected); } } void crm_update_peer_state(const char *source, crm_node_t * node, const char *state, int membership) { char *last = NULL; gboolean changed = FALSE; CRM_CHECK(node != NULL, crm_err("%s: Could not set 'state' to %s", source, state); return); last = node->state; if (state != NULL && safe_str_neq(node->state, state)) { node->state = strdup(state); changed = TRUE; } if (membership != 0 && safe_str_eq(node->state, CRM_NODE_MEMBER)) { node->last_seen = membership; } if (changed) { crm_notice("%s: Node %s[%u] - state is now %s (was %s)", source, node->uname, node->id, state, last); if (crm_status_callback) { crm_status_callback(crm_status_nstate, node, last); } free(last); } else { crm_trace("%s: Node %s[%u] - state is unchanged (%s)", source, node->uname, node->id, state); } } int crm_terminate_member(int nodeid, const char *uname, void *unused) { /* Always use the synchronous, non-mainloop version */ return stonith_api_kick(nodeid, uname, 120, TRUE); } int crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection) { return stonith_api_kick(nodeid, uname, 120, TRUE); } diff --git a/lib/common/mainloop.c b/lib/common/mainloop.c index 85c19570ba..d1ebbb9dc0 100644 --- a/lib/common/mainloop.c +++ b/lib/common/mainloop.c @@ -1,934 +1,932 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include struct mainloop_child_s { pid_t pid; char *desc; unsigned timerid; unsigned watchid; gboolean timeout; void *privatedata; /* Called when a process dies */ void (*callback) (mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode); }; struct trigger_s { GSource source; gboolean running; gboolean trigger; void *user_data; guint id; }; static gboolean crm_trigger_prepare(GSource * source, gint * timeout) { crm_trigger_t *trig = (crm_trigger_t *) source; /* cluster-glue's FD and IPC related sources make use of * g_source_add_poll() but do not set a timeout in their prepare * functions * * This means mainloop's poll() will block until an event for one * of these sources occurs - any /other/ type of source, such as * this one or g_idle_*, that doesn't use g_source_add_poll() is * S-O-L and wont be processed until there is something fd-based * happens. * * Luckily the timeout we can set here affects all sources and * puts an upper limit on how long poll() can take. * * So unconditionally set a small-ish timeout, not too small that * we're in constant motion, which will act as an upper bound on * how long the signal handling might be delayed for. */ *timeout = 500; /* Timeout in ms */ return trig->trigger; } static gboolean crm_trigger_check(GSource * source) { crm_trigger_t *trig = (crm_trigger_t *) source; return trig->trigger; } static gboolean crm_trigger_dispatch(GSource * source, GSourceFunc callback, gpointer userdata) { int rc = TRUE; crm_trigger_t *trig = (crm_trigger_t *) source; if (trig->running) { /* Wait until the existing job is complete before starting the next one */ return TRUE; } trig->trigger = FALSE; if (callback) { rc = callback(trig->user_data); if (rc < 0) { crm_trace("Trigger handler %p not yet complete", trig); trig->running = TRUE; rc = TRUE; } } return rc; } static GSourceFuncs crm_trigger_funcs = { crm_trigger_prepare, crm_trigger_check, crm_trigger_dispatch, NULL }; static crm_trigger_t * mainloop_setup_trigger(GSource * source, int priority, int (*dispatch) (gpointer user_data), gpointer userdata) { crm_trigger_t *trigger = NULL; trigger = (crm_trigger_t *) source; trigger->id = 0; trigger->trigger = FALSE; trigger->user_data = userdata; if (dispatch) { g_source_set_callback(source, dispatch, trigger, NULL); } g_source_set_priority(source, priority); g_source_set_can_recurse(source, FALSE); trigger->id = g_source_attach(source, NULL); return trigger; } void mainloop_trigger_complete(crm_trigger_t * trig) { crm_trace("Trigger handler %p complete", trig); trig->running = FALSE; } /* If dispatch returns: * -1: Job running but not complete * 0: Remove the trigger from mainloop * 1: Leave the trigger in mainloop */ crm_trigger_t * mainloop_add_trigger(int priority, int (*dispatch) (gpointer user_data), gpointer userdata) { GSource *source = NULL; CRM_ASSERT(sizeof(crm_trigger_t) > sizeof(GSource)); source = g_source_new(&crm_trigger_funcs, sizeof(crm_trigger_t)); CRM_ASSERT(source != NULL); return mainloop_setup_trigger(source, priority, dispatch, userdata); } void mainloop_set_trigger(crm_trigger_t * source) { source->trigger = TRUE; } gboolean mainloop_destroy_trigger(crm_trigger_t * source) { - source->trigger = FALSE; - if (source->id > 0) { - g_source_remove(source->id); - source->id = 0; - } + GSource *gs = (GSource *)source; + g_source_destroy(gs); return TRUE; } typedef struct signal_s { crm_trigger_t trigger; /* must be first */ void (*handler) (int sig); int signal; } crm_signal_t; static crm_signal_t *crm_signals[NSIG]; static gboolean crm_signal_dispatch(GSource * source, GSourceFunc callback, gpointer userdata) { crm_signal_t *sig = (crm_signal_t *) source; if(sig->signal != SIGCHLD) { crm_info("Invoking handler for signal %d: %s", sig->signal, strsignal(sig->signal)); } sig->trigger.trigger = FALSE; if (sig->handler) { sig->handler(sig->signal); } return TRUE; } static void mainloop_signal_handler(int sig) { if (sig > 0 && sig < NSIG && crm_signals[sig] != NULL) { mainloop_set_trigger((crm_trigger_t *) crm_signals[sig]); } } static GSourceFuncs crm_signal_funcs = { crm_trigger_prepare, crm_trigger_check, crm_signal_dispatch, NULL }; gboolean crm_signal(int sig, void (*dispatch) (int sig)) { sigset_t mask; struct sigaction sa; struct sigaction old; if (sigemptyset(&mask) < 0) { crm_perror(LOG_ERR, "Call to sigemptyset failed"); return FALSE; } memset(&sa, 0, sizeof(struct sigaction)); sa.sa_handler = dispatch; sa.sa_flags = SA_RESTART; sa.sa_mask = mask; if (sigaction(sig, &sa, &old) < 0) { crm_perror(LOG_ERR, "Could not install signal handler for signal %d", sig); return FALSE; } return TRUE; } gboolean mainloop_add_signal(int sig, void (*dispatch) (int sig)) { GSource *source = NULL; int priority = G_PRIORITY_HIGH - 1; if (sig == SIGTERM) { /* TERM is higher priority than other signals, * signals are higher priority than other ipc. * Yes, minus: smaller is "higher" */ priority--; } if (sig >= NSIG || sig < 0) { crm_err("Signal %d is out of range", sig); return FALSE; } else if (crm_signals[sig] != NULL && crm_signals[sig]->handler == dispatch) { crm_trace("Signal handler for %d is already installed", sig); return TRUE; } else if (crm_signals[sig] != NULL) { crm_err("Different signal handler for %d is already installed", sig); return FALSE; } CRM_ASSERT(sizeof(crm_signal_t) > sizeof(GSource)); source = g_source_new(&crm_signal_funcs, sizeof(crm_signal_t)); crm_signals[sig] = (crm_signal_t *) mainloop_setup_trigger(source, priority, NULL, NULL); CRM_ASSERT(crm_signals[sig] != NULL); crm_signals[sig]->handler = dispatch; crm_signals[sig]->signal = sig; if (crm_signal(sig, mainloop_signal_handler) == FALSE) { crm_signal_t *tmp = crm_signals[sig]; crm_signals[sig] = NULL; mainloop_destroy_trigger((crm_trigger_t *) tmp); return FALSE; } #if 0 /* If we want signals to interrupt mainloop's poll(), instead of waiting for * the timeout, then we should call siginterrupt() below * * For now, just enforce a low timeout */ if (siginterrupt(sig, 1) < 0) { crm_perror(LOG_INFO, "Could not enable system call interruptions for signal %d", sig); } #endif return TRUE; } gboolean mainloop_destroy_signal(int sig) { crm_signal_t *tmp = NULL; if (sig >= NSIG || sig < 0) { crm_err("Signal %d is out of range", sig); return FALSE; } else if (crm_signal(sig, NULL) == FALSE) { crm_perror(LOG_ERR, "Could not uninstall signal handler for signal %d", sig); return FALSE; } else if (crm_signals[sig] == NULL) { return TRUE; } + crm_trace("Destroying signal %d", sig); tmp = crm_signals[sig]; crm_signals[sig] = NULL; mainloop_destroy_trigger((crm_trigger_t *) tmp); return TRUE; } static qb_array_t *gio_map = NULL; /* * libqb... */ struct gio_to_qb_poll { int32_t is_used; GIOChannel *channel; guint source; int32_t events; void *data; qb_ipcs_dispatch_fn_t fn; enum qb_loop_priority p; }; static int gio_adapter_refcount(struct gio_to_qb_poll *adaptor) { /* This is evil * Looking at the giochannel header file, ref_count is the first member of channel * So cheat... */ if (adaptor && adaptor->channel) { int *ref = (void *)adaptor->channel; return *ref; } return 0; } static gboolean gio_read_socket(GIOChannel * gio, GIOCondition condition, gpointer data) { struct gio_to_qb_poll *adaptor = (struct gio_to_qb_poll *)data; gint fd = g_io_channel_unix_get_fd(gio); crm_trace("%p.%d %d (ref=%d)", data, fd, condition, gio_adapter_refcount(adaptor)); if (condition & G_IO_NVAL) { crm_trace("Marking failed adaptor %p unused", adaptor); adaptor->is_used = QB_FALSE; } return (adaptor->fn(fd, condition, adaptor->data) == 0); } static void gio_poll_destroy(gpointer data) { /* adaptor->source is valid but about to be destroyed (ref_count == 0) in gmain.c * adaptor->channel will still have ref_count > 0... should be == 1 */ struct gio_to_qb_poll *adaptor = (struct gio_to_qb_poll *)data; crm_trace("Destroying adaptor %p channel %p (ref=%d)", adaptor, adaptor->channel, gio_adapter_refcount(adaptor)); adaptor->is_used = QB_FALSE; adaptor->channel = NULL; adaptor->source = 0; } static int32_t gio_poll_dispatch_add(enum qb_loop_priority p, int32_t fd, int32_t evts, void *data, qb_ipcs_dispatch_fn_t fn) { struct gio_to_qb_poll *adaptor; GIOChannel *channel; int32_t res = 0; res = qb_array_index(gio_map, fd, (void **)&adaptor); if (res < 0) { crm_err("Array lookup failed for fd=%d: %d", fd, res); return res; } crm_trace("Adding fd=%d to mainloop as adapater %p", fd, adaptor); if (adaptor->is_used) { crm_err("Adapter for descriptor %d is still in-use", fd); return -EEXIST; } /* channel is created with ref_count = 1 */ channel = g_io_channel_unix_new(fd); if (!channel) { crm_err("No memory left to add fd=%d", fd); return -ENOMEM; } /* Because unlike the poll() API, glib doesn't tell us about HUPs by default */ evts |= (G_IO_HUP | G_IO_NVAL | G_IO_ERR); adaptor->channel = channel; adaptor->fn = fn; adaptor->events = evts; adaptor->data = data; adaptor->p = p; adaptor->is_used = QB_TRUE; adaptor->source = g_io_add_watch_full(channel, G_PRIORITY_DEFAULT, evts, gio_read_socket, adaptor, gio_poll_destroy); /* Now that mainloop now holds a reference to adaptor->channel, * thanks to g_io_add_watch_full(), drop ours from g_io_channel_unix_new(). * * This means that adaptor->channel will be free'd by: * g_main_context_dispatch() * -> g_source_destroy_internal() * -> g_source_callback_unref() * shortly after gio_poll_destroy() completes */ g_io_channel_unref(adaptor->channel); crm_trace("Added to mainloop with gsource id=%d, ref=%d", adaptor->source, gio_adapter_refcount(adaptor)); if (adaptor->source > 0) { return 0; } return -EINVAL; } static int32_t gio_poll_dispatch_mod(enum qb_loop_priority p, int32_t fd, int32_t evts, void *data, qb_ipcs_dispatch_fn_t fn) { return 0; } static int32_t gio_poll_dispatch_del(int32_t fd) { struct gio_to_qb_poll *adaptor; crm_trace("Looking for fd=%d", fd); if (qb_array_index(gio_map, fd, (void **)&adaptor) == 0) { crm_trace("Marking adaptor %p unused (ref=%d)", adaptor, gio_adapter_refcount(adaptor)); adaptor->is_used = QB_FALSE; } return 0; } struct qb_ipcs_poll_handlers gio_poll_funcs = { .job_add = NULL, .dispatch_add = gio_poll_dispatch_add, .dispatch_mod = gio_poll_dispatch_mod, .dispatch_del = gio_poll_dispatch_del, }; static enum qb_ipc_type pick_ipc_type(enum qb_ipc_type requested) { const char *env = getenv("PCMK_ipc_type"); if (env && strcmp("shared-mem", env) == 0) { return QB_IPC_SHM; } else if (env && strcmp("socket", env) == 0) { return QB_IPC_SOCKET; } else if (env && strcmp("posix", env) == 0) { return QB_IPC_POSIX_MQ; } else if (env && strcmp("sysv", env) == 0) { return QB_IPC_SYSV_MQ; } else if (requested == QB_IPC_NATIVE) { /* We prefer shared memory because the server never blocks on * send. If part of a message fits into the socket, libqb * needs to block until the remainder can be sent also. * Otherwise the client will wait forever for the remaining * bytes. */ return QB_IPC_SHM; } return requested; } qb_ipcs_service_t * mainloop_add_ipc_server(const char *name, enum qb_ipc_type type, struct qb_ipcs_service_handlers * callbacks) { int rc = 0; qb_ipcs_service_t *server = NULL; if (gio_map == NULL) { gio_map = qb_array_create_2(64, sizeof(struct gio_to_qb_poll), 1); } crm_client_init(); server = qb_ipcs_create(name, 0, pick_ipc_type(type), callbacks); qb_ipcs_poll_handlers_set(server, &gio_poll_funcs); rc = qb_ipcs_run(server); if (rc < 0) { crm_err("Could not start %s IPC server: %s (%d)", name, pcmk_strerror(rc), rc); return NULL; } return server; } void mainloop_del_ipc_server(qb_ipcs_service_t * server) { if (server) { qb_ipcs_destroy(server); } } struct mainloop_io_s { char *name; void *userdata; guint source; crm_ipc_t *ipc; GIOChannel *channel; int (*dispatch_fn_ipc) (const char *buffer, ssize_t length, gpointer userdata); int (*dispatch_fn_io) (gpointer userdata); void (*destroy_fn) (gpointer userdata); }; static int mainloop_gio_refcount(mainloop_io_t * client) { /* This is evil * Looking at the giochannel header file, ref_count is the first member of channel * So cheat... */ if (client && client->channel) { int *ref = (void *)client->channel; return *ref; } return 0; } static gboolean mainloop_gio_callback(GIOChannel * gio, GIOCondition condition, gpointer data) { gboolean keep = TRUE; mainloop_io_t *client = data; if (condition & G_IO_IN) { if (client->ipc) { long rc = 0; int max = 10; do { rc = crm_ipc_read(client->ipc); if (rc <= 0) { crm_trace("Message acquisition from %s[%p] failed: %s (%ld)", client->name, client, pcmk_strerror(rc), rc); } else if (client->dispatch_fn_ipc) { const char *buffer = crm_ipc_buffer(client->ipc); crm_trace("New message from %s[%p] = %d", client->name, client, rc, condition); if (client->dispatch_fn_ipc(buffer, rc, client->userdata) < 0) { crm_trace("Connection to %s no longer required", client->name); keep = FALSE; } } } while (keep && rc > 0 && --max > 0); } else { crm_trace("New message from %s[%p]", client->name, client); if (client->dispatch_fn_io) { if (client->dispatch_fn_io(client->userdata) < 0) { crm_trace("Connection to %s no longer required", client->name); keep = FALSE; } } } } if (client->ipc && crm_ipc_connected(client->ipc) == FALSE) { crm_err("Connection to %s[%p] closed (I/O condition=%d)", client->name, client, condition); keep = FALSE; } else if (condition & (G_IO_HUP | G_IO_NVAL | G_IO_ERR)) { crm_trace("The connection %s[%p] has been closed (I/O condition=%d, refcount=%d)", client->name, client, condition, mainloop_gio_refcount(client)); keep = FALSE; } else if ((condition & G_IO_IN) == 0) { /* #define GLIB_SYSDEF_POLLIN =1 #define GLIB_SYSDEF_POLLPRI =2 #define GLIB_SYSDEF_POLLOUT =4 #define GLIB_SYSDEF_POLLERR =8 #define GLIB_SYSDEF_POLLHUP =16 #define GLIB_SYSDEF_POLLNVAL =32 typedef enum { G_IO_IN GLIB_SYSDEF_POLLIN, G_IO_OUT GLIB_SYSDEF_POLLOUT, G_IO_PRI GLIB_SYSDEF_POLLPRI, G_IO_ERR GLIB_SYSDEF_POLLERR, G_IO_HUP GLIB_SYSDEF_POLLHUP, G_IO_NVAL GLIB_SYSDEF_POLLNVAL } GIOCondition; A bitwise combination representing a condition to watch for on an event source. G_IO_IN There is data to read. G_IO_OUT Data can be written (without blocking). G_IO_PRI There is urgent data to read. G_IO_ERR Error condition. G_IO_HUP Hung up (the connection has been broken, usually for pipes and sockets). G_IO_NVAL Invalid request. The file descriptor is not open. */ crm_err("Strange condition: %d", condition); } /* keep == FALSE results in mainloop_gio_destroy() being called * just before the source is removed from mainloop */ return keep; } static void mainloop_gio_destroy(gpointer c) { mainloop_io_t *client = c; /* client->source is valid but about to be destroyed (ref_count == 0) in gmain.c * client->channel will still have ref_count > 0... should be == 1 */ crm_trace("Destroying client %s[%p] %d", client->name, c, mainloop_gio_refcount(client)); if (client->ipc) { crm_ipc_close(client->ipc); } if (client->destroy_fn) { client->destroy_fn(client->userdata); } if (client->ipc) { crm_ipc_destroy(client->ipc); } crm_trace("Destroyed client %s[%p] %d", client->name, c, mainloop_gio_refcount(client)); free(client->name); memset(client, 0, sizeof(mainloop_io_t)); /* A bit of pointless paranoia */ free(client); } mainloop_io_t * mainloop_add_ipc_client(const char *name, int priority, size_t max_size, void *userdata, struct ipc_client_callbacks *callbacks) { mainloop_io_t *client = NULL; crm_ipc_t *conn = crm_ipc_new(name, max_size); if (conn && crm_ipc_connect(conn)) { int32_t fd = crm_ipc_get_fd(conn); client = mainloop_add_fd(name, priority, fd, userdata, NULL); client->ipc = conn; client->destroy_fn = callbacks->destroy; client->dispatch_fn_ipc = callbacks->dispatch; } if (conn && client == NULL) { crm_trace("Connection to %s failed", name); crm_ipc_close(conn); crm_ipc_destroy(conn); } return client; } void mainloop_del_ipc_client(mainloop_io_t * client) { mainloop_del_fd(client); } crm_ipc_t * mainloop_get_ipc_client(mainloop_io_t * client) { if (client) { return client->ipc; } return NULL; } mainloop_io_t * mainloop_add_fd(const char *name, int priority, int fd, void *userdata, struct mainloop_fd_callbacks * callbacks) { mainloop_io_t *client = NULL; if (fd > 0) { client = calloc(1, sizeof(mainloop_io_t)); client->name = strdup(name); client->userdata = userdata; if (callbacks) { client->destroy_fn = callbacks->destroy; client->dispatch_fn_io = callbacks->dispatch; } client->channel = g_io_channel_unix_new(fd); client->source = g_io_add_watch_full(client->channel, priority, (G_IO_IN | G_IO_HUP | G_IO_NVAL | G_IO_ERR), mainloop_gio_callback, client, mainloop_gio_destroy); /* Now that mainloop now holds a reference to adaptor->channel, * thanks to g_io_add_watch_full(), drop ours from g_io_channel_unix_new(). * * This means that adaptor->channel will be free'd by: * g_main_context_dispatch() or g_source_remove() * -> g_source_destroy_internal() * -> g_source_callback_unref() * shortly after mainloop_gio_destroy() completes */ g_io_channel_unref(client->channel); crm_trace("Added connection %d for %s[%p].%d %d", client->source, client->name, client, fd, mainloop_gio_refcount(client)); } return client; } void mainloop_del_fd(mainloop_io_t * client) { if (client != NULL) { crm_trace("Removing client %s[%p] %d", client->name, client, mainloop_gio_refcount(client)); if (client->source) { /* Results in mainloop_gio_destroy() being called just * before the source is removed from mainloop */ g_source_remove(client->source); } } } pid_t mainloop_child_pid(mainloop_child_t * child) { return child->pid; } const char * mainloop_child_name(mainloop_child_t * child) { return child->desc; } int mainloop_child_timeout(mainloop_child_t * child) { return child->timeout; } void * mainloop_child_userdata(mainloop_child_t * child) { return child->privatedata; } void mainloop_clear_child_userdata(mainloop_child_t * child) { child->privatedata = NULL; } static gboolean child_timeout_callback(gpointer p) { mainloop_child_t *child = p; child->timerid = 0; if (child->timeout) { crm_crit("%s process (PID %d) will not die!", child->desc, (int)child->pid); return FALSE; } child->timeout = TRUE; crm_warn("%s process (PID %d) timed out", child->desc, (int)child->pid); if (kill(child->pid, SIGKILL) < 0) { if (errno == ESRCH) { /* Nothing left to do */ return FALSE; } crm_perror(LOG_ERR, "kill(%d, KILL) failed", child->pid); } child->timerid = g_timeout_add(5000, child_timeout_callback, child); return FALSE; } static GListPtr child_list = NULL; static void child_death_dispatch(int signal) { GListPtr iter = child_list; while(iter) { int rc = 0; int core = 0; int signo = 0; int status = 0; int exitcode = 0; GListPtr saved = NULL; mainloop_child_t *child = iter->data; rc = waitpid(child->pid, &status, WNOHANG); if(rc == 0) { iter = iter->next; continue; } else if(rc != child->pid) { signo = signal; exitcode = 1; status = 1; crm_perror(LOG_ERR, "Call to waitpid(%d) failed", child->pid); } else { crm_trace("Managed process %d exited: %p", child->pid, child); if (WIFEXITED(status)) { exitcode = WEXITSTATUS(status); crm_trace("Managed process %d (%s) exited with rc=%d", child->pid, child->desc, exitcode); } else if (WIFSIGNALED(status)) { signo = WTERMSIG(status); crm_trace("Managed process %d (%s) exited with signal=%d", child->pid, child->desc, signo); } #ifdef WCOREDUMP if (WCOREDUMP(status)) { core = 1; crm_err("Managed process %d (%s) dumped core", child->pid, child->desc); } #endif } if (child->callback) { child->callback(child, child->pid, core, signo, exitcode); } crm_trace("Removing process entry %p for %d", child, child->pid); saved = iter; iter = iter->next; child_list = g_list_remove_link(child_list, saved); g_list_free(saved); if (child->timerid != 0) { crm_trace("Removing timer %d", child->timerid); g_source_remove(child->timerid); child->timerid = 0; } free(child->desc); free(child); } } /* Create/Log a new tracked process * To track a process group, use -pid */ void mainloop_child_add(pid_t pid, int timeout, const char *desc, void *privatedata, void (*callback) (mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)) { static bool need_init = TRUE; mainloop_child_t *child = g_new(mainloop_child_t, 1); child->pid = pid; child->timerid = 0; child->timeout = FALSE; child->privatedata = privatedata; child->callback = callback; if(desc) { child->desc = strdup(desc); } if (timeout) { child->timerid = g_timeout_add(timeout, child_timeout_callback, child); } child_list = g_list_append(child_list, child); if(need_init) { need_init = FALSE; /* Do NOT use g_child_watch_add() and friends, they rely on pthreads */ mainloop_add_signal(SIGCHLD, child_death_dispatch); /* In case they terminated before the signal handler was installed */ child_death_dispatch(SIGCHLD); } } diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c index 8879cfdaf4..820c8c14a0 100644 --- a/lib/fencing/st_client.c +++ b/lib/fencing/st_client.c @@ -1,2451 +1,2454 @@ /* * Copyright (c) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #include #include #include #include #include #include #include #include #include #include #include #include #include /* Add it for compiling on OSX */ #include #include #include #include #include #ifdef HAVE_STONITH_STONITH_H # include # define LHA_STONITH_LIBRARY "libstonith.so.1" static void *lha_agents_lib = NULL; #endif #include CRM_TRACE_INIT_DATA(stonith); struct stonith_action_s { /*! user defined data */ char *agent; char *action; char *victim; char *args; int timeout; int async; void *userdata; void (*done_cb) (GPid pid, gint status, const char *output, gpointer user_data); /*! internal async track data */ int fd_stdout; int last_timeout_signo; /*! internal timing information */ time_t initial_start_time; int tries; int remaining_timeout; guint timer_sigterm; guint timer_sigkill; int max_retries; /* device output data */ GPid pid; int rc; char *output; }; typedef struct stonith_private_s { char *token; crm_ipc_t *ipc; mainloop_io_t *source; GHashTable *stonith_op_callback_table; GList *notify_list; void (*op_callback) (stonith_t * st, stonith_callback_data_t * data); } stonith_private_t; typedef struct stonith_notify_client_s { const char *event; const char *obj_id; /* implement one day */ const char *obj_type; /* implement one day */ void (*notify) (stonith_t * st, stonith_event_t * e); } stonith_notify_client_t; typedef struct stonith_callback_client_s { void (*callback) (stonith_t * st, stonith_callback_data_t * data); const char *id; void *user_data; gboolean only_success; gboolean allow_timeout_updates; struct timer_rec_s *timer; } stonith_callback_client_t; struct notify_blob_s { stonith_t *stonith; xmlNode *xml; }; struct timer_rec_s { int call_id; int timeout; guint ref; stonith_t *stonith; }; typedef int (*stonith_op_t) (const char *, int, const char *, xmlNode *, xmlNode *, xmlNode *, xmlNode **, xmlNode **); static const char META_TEMPLATE[] = "\n" "\n" "\n" " 1.0\n" " \n" "%s\n" " \n" " %s\n" "%s\n" " \n" " \n" " \n" " \n" " \n" " \n" " \n" " \n" " 2.0\n" " \n" "\n"; bool stonith_dispatch(stonith_t * st); int stonith_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata); void stonith_perform_callback(stonith_t * stonith, xmlNode * msg, int call_id, int rc); xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data, int call_options); int stonith_send_command(stonith_t * stonith, const char *op, xmlNode * data, xmlNode ** output_data, int call_options, int timeout); static void stonith_connection_destroy(gpointer user_data); static void stonith_send_notification(gpointer data, gpointer user_data); static int internal_stonith_action_execute(stonith_action_t * action); static void stonith_connection_destroy(gpointer user_data) { stonith_t *stonith = user_data; stonith_private_t *native = NULL; struct notify_blob_s blob; crm_trace("Sending destroyed notification"); blob.stonith = stonith; blob.xml = create_xml_node(NULL, "notify"); native = stonith->private; native->ipc = NULL; native->source = NULL; stonith->state = stonith_disconnected; crm_xml_add(blob.xml, F_TYPE, T_STONITH_NOTIFY); crm_xml_add(blob.xml, F_SUBTYPE, T_STONITH_NOTIFY_DISCONNECT); g_list_foreach(native->notify_list, stonith_send_notification, &blob); free_xml(blob.xml); } xmlNode * create_device_registration_xml(const char *id, const char *namespace, const char *agent, stonith_key_value_t * params) { xmlNode *data = create_xml_node(NULL, F_STONITH_DEVICE); xmlNode *args = create_xml_node(data, XML_TAG_ATTRS); #if HAVE_STONITH_STONITH_H namespace = get_stonith_provider(agent, namespace); if (safe_str_eq(namespace, "heartbeat")) { hash2field((gpointer) "plugin", (gpointer) agent, args); agent = "fence_legacy"; } #endif crm_xml_add(data, XML_ATTR_ID, id); crm_xml_add(data, "origin", __FUNCTION__); crm_xml_add(data, "agent", agent); crm_xml_add(data, "namespace", namespace); for (; params; params = params->next) { hash2field((gpointer) params->key, (gpointer) params->value, args); } return data; } static int stonith_api_register_device(stonith_t * st, int call_options, const char *id, const char *namespace, const char *agent, stonith_key_value_t * params) { int rc = 0; xmlNode *data = NULL; data = create_device_registration_xml(id, namespace, agent, params); rc = stonith_send_command(st, STONITH_OP_DEVICE_ADD, data, NULL, call_options, 0); free_xml(data); return rc; } static int stonith_api_remove_device(stonith_t * st, int call_options, const char *name) { int rc = 0; xmlNode *data = NULL; data = create_xml_node(NULL, F_STONITH_DEVICE); crm_xml_add(data, "origin", __FUNCTION__); crm_xml_add(data, XML_ATTR_ID, name); rc = stonith_send_command(st, STONITH_OP_DEVICE_DEL, data, NULL, call_options, 0); free_xml(data); return rc; } static int stonith_api_remove_level(stonith_t * st, int options, const char *node, int level) { int rc = 0; xmlNode *data = NULL; data = create_xml_node(NULL, F_STONITH_LEVEL); crm_xml_add(data, "origin", __FUNCTION__); crm_xml_add(data, F_STONITH_TARGET, node); crm_xml_add_int(data, XML_ATTR_ID, level); rc = stonith_send_command(st, STONITH_OP_LEVEL_DEL, data, NULL, options, 0); free_xml(data); return rc; } xmlNode * create_level_registration_xml(const char *node, int level, stonith_key_value_t * device_list) { xmlNode *data = create_xml_node(NULL, F_STONITH_LEVEL); crm_xml_add_int(data, XML_ATTR_ID, level); crm_xml_add(data, F_STONITH_TARGET, node); crm_xml_add(data, "origin", __FUNCTION__); for (; device_list; device_list = device_list->next) { xmlNode *dev = create_xml_node(data, F_STONITH_DEVICE); crm_xml_add(dev, XML_ATTR_ID, device_list->value); } return data; } static int stonith_api_register_level(stonith_t * st, int options, const char *node, int level, stonith_key_value_t * device_list) { int rc = 0; xmlNode *data = create_level_registration_xml(node, level, device_list); rc = stonith_send_command(st, STONITH_OP_LEVEL_ADD, data, NULL, options, 0); free_xml(data); return rc; } static void append_arg(gpointer key, gpointer value, gpointer user_data) { int len = 3; /* =, \n, \0 */ int last = 0; char **args = user_data; CRM_CHECK(key != NULL, return); CRM_CHECK(value != NULL, return); if (strstr(key, "pcmk_")) { return; } else if (strstr(key, CRM_META)) { return; } else if (safe_str_eq(key, "crm_feature_set")) { return; } len += strlen(key); len += strlen(value); if (*args != NULL) { last = strlen(*args); } *args = realloc(*args, last + len); crm_trace("Appending: %s=%s", (char *)key, (char *)value); sprintf((*args) + last, "%s=%s\n", (char *)key, (char *)value); } static void append_const_arg(const char *key, const char *value, char **arg_list) { char *glib_sucks_key = strdup(key); char *glib_sucks_value = strdup(value); append_arg(glib_sucks_key, glib_sucks_value, arg_list); free(glib_sucks_value); free(glib_sucks_key); } static void append_host_specific_args(const char *victim, const char *map, GHashTable * params, char **arg_list) { char *name = NULL; int last = 0, lpc = 0, max = 0; if (map == NULL) { /* The best default there is for now... */ crm_debug("Using default arg map: port=uname"); append_const_arg("port", victim, arg_list); return; } max = strlen(map); crm_debug("Processing arg map: %s", map); for (; lpc < max + 1; lpc++) { if (isalpha(map[lpc])) { /* keep going */ } else if (map[lpc] == '=' || map[lpc] == ':') { free(name); name = calloc(1, 1 + lpc - last); memcpy(name, map + last, lpc - last); crm_debug("Got name: %s", name); last = lpc + 1; } else if (map[lpc] == 0 || map[lpc] == ',' || isspace(map[lpc])) { char *param = NULL; const char *value = NULL; param = calloc(1, 1 + lpc - last); memcpy(param, map + last, lpc - last); last = lpc + 1; crm_debug("Got key: %s", param); if (name == NULL) { crm_err("Misparsed '%s', found '%s' without a name", map, param); free(param); continue; } if (safe_str_eq(param, "uname")) { value = victim; } else { char *key = crm_meta_name(param); value = g_hash_table_lookup(params, key); free(key); } if (value) { crm_debug("Setting '%s'='%s' (%s) for %s", name, value, param, victim); append_const_arg(name, value, arg_list); } else { crm_err("No node attribute '%s' for '%s'", name, victim); } free(name); name = NULL; free(param); if (map[lpc] == 0) { break; } } else if (isspace(map[lpc])) { last = lpc; } } free(name); } static char * make_args(const char *action, const char *victim, uint32_t victim_nodeid, GHashTable * device_args, GHashTable * port_map) { char buffer[512]; char *arg_list = NULL; const char *value = NULL; CRM_CHECK(action != NULL, return NULL); if (device_args) { g_hash_table_foreach(device_args, append_arg, &arg_list); } buffer[511] = 0; snprintf(buffer, 511, "pcmk_%s_action", action); if (device_args) { value = g_hash_table_lookup(device_args, buffer); } if (value == NULL && device_args) { /* Legacy support for early 1.1 releases - Remove for 1.4 */ snprintf(buffer, 511, "pcmk_%s_cmd", action); value = g_hash_table_lookup(device_args, buffer); } if (value == NULL && device_args && safe_str_eq(action, "off")) { /* Legacy support for late 1.1 releases - Remove for 1.4 */ value = g_hash_table_lookup(device_args, "pcmk_poweroff_action"); } if (value) { crm_info("Substituting action '%s' for requested operation '%s'", value, action); action = value; } append_const_arg(STONITH_ATTR_ACTION_OP, action, &arg_list); if (victim && device_args) { const char *alias = victim; const char *param = g_hash_table_lookup(device_args, STONITH_ATTR_HOSTARG); if (port_map && g_hash_table_lookup(port_map, victim)) { alias = g_hash_table_lookup(port_map, victim); } /* Always supply the node's name too: * https://fedorahosted.org/cluster/wiki/FenceAgentAPI */ append_const_arg("nodename", victim, &arg_list); if (victim_nodeid) { char nodeid_str[33] = { 0, }; if (snprintf(nodeid_str, 33, "%u", (unsigned int)victim_nodeid)) { crm_info("For stonith action (%s) for victim %s, adding nodeid (%d) to parameters", action, victim, nodeid_str); append_const_arg("nodeid", nodeid_str, &arg_list); } } /* Check if we need to supply the victim in any other form */ if (param == NULL) { const char *map = g_hash_table_lookup(device_args, STONITH_ATTR_ARGMAP); if (map == NULL) { param = "port"; value = g_hash_table_lookup(device_args, param); } else { /* Legacy handling */ append_host_specific_args(alias, map, device_args, &arg_list); value = map; /* Nothing more to do */ } } else if (safe_str_eq(param, "none")) { value = param; /* Nothing more to do */ } else { value = g_hash_table_lookup(device_args, param); } /* Don't overwrite explictly set values for $param */ if (value == NULL || safe_str_eq(value, "dynamic")) { crm_debug("Performing %s action for node '%s' as '%s=%s'", action, victim, param, alias); append_const_arg(param, alias, &arg_list); } } return arg_list; } static gboolean st_child_term(gpointer data) { int rc = 0; stonith_action_t *track = data; crm_info("Child %d timed out, sending SIGTERM", track->pid); track->timer_sigterm = 0; track->last_timeout_signo = SIGTERM; rc = kill(track->pid, SIGTERM); if (rc < 0) { crm_perror(LOG_ERR, "Couldn't send SIGTERM to %d", track->pid); } return FALSE; } static gboolean st_child_kill(gpointer data) { int rc = 0; stonith_action_t *track = data; crm_info("Child %d timed out, sending SIGKILL", track->pid); track->timer_sigkill = 0; track->last_timeout_signo = SIGKILL; rc = kill(track->pid, SIGKILL); if (rc < 0) { crm_perror(LOG_ERR, "Couldn't send SIGKILL to %d", track->pid); } return FALSE; } static void stonith_action_clear_tracking_data(stonith_action_t * action) { if (action->timer_sigterm > 0) { g_source_remove(action->timer_sigterm); action->timer_sigterm = 0; } if (action->timer_sigkill > 0) { g_source_remove(action->timer_sigkill); action->timer_sigkill = 0; } if (action->fd_stdout) { close(action->fd_stdout); action->fd_stdout = 0; } free(action->output); action->output = NULL; action->rc = 0; action->pid = 0; action->last_timeout_signo = 0; } static void stonith_action_destroy(stonith_action_t * action) { stonith_action_clear_tracking_data(action); free(action->agent); free(action->args); free(action->action); free(action->victim); free(action); } #define FAILURE_MAX_RETRIES 2 stonith_action_t * stonith_action_create(const char *agent, const char *_action, const char *victim, uint32_t victim_nodeid, int timeout, GHashTable * device_args, GHashTable * port_map) { stonith_action_t *action; action = calloc(1, sizeof(stonith_action_t)); crm_info("Initiating action %s for agent %s (target=%s)", _action, agent, victim); action->args = make_args(_action, victim, victim_nodeid, device_args, port_map); action->agent = strdup(agent); action->action = strdup(_action); if (victim) { action->victim = strdup(victim); } action->timeout = action->remaining_timeout = timeout; action->max_retries = FAILURE_MAX_RETRIES; if (device_args) { char buffer[512]; const char *value = NULL; snprintf(buffer, 511, "pcmk_%s_retries", _action); value = g_hash_table_lookup(device_args, buffer); if (value) { action->max_retries = atoi(value); } } return action; } #define READ_MAX 500 static char * read_output(int fd) { char buffer[READ_MAX]; char *output = NULL; int len = 0; int more = 0; if (!fd) { return NULL; } do { errno = 0; memset(&buffer, 0, READ_MAX); more = read(fd, buffer, READ_MAX - 1); if (more > 0) { buffer[more] = 0; /* Make sure its nul-terminated for logging * 'more' is always less than our buffer size */ crm_trace("Got %d more bytes: %.200s...", more, buffer); output = realloc(output, len + more + 1); snprintf(output + len, more + 1, "%s", buffer); len += more; } } while (more == (READ_MAX - 1) || (more < 0 && errno == EINTR)); return output; } static gboolean update_remaining_timeout(stonith_action_t * action) { int diff = time(NULL) - action->initial_start_time; if (action->tries >= action->max_retries) { crm_info("Attempted to execute agent %s (%s) the maximum number of times (%d) allowed", action->agent, action->action, action->max_retries); action->remaining_timeout = 0; } else if ((action->rc != -ETIME) && diff < (action->timeout * 0.7)) { /* only set remaining timeout period if there is 30% * or greater of the original timeout period left */ action->remaining_timeout = action->timeout - diff; } else { action->remaining_timeout = 0; } return action->remaining_timeout ? TRUE : FALSE; } static void stonith_action_async_done(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) { stonith_action_t *action = mainloop_child_userdata(p); if (action->timer_sigterm > 0) { g_source_remove(action->timer_sigterm); } if (action->timer_sigkill > 0) { g_source_remove(action->timer_sigkill); } if (action->last_timeout_signo) { action->rc = -ETIME; crm_notice("Child process %d performing action '%s' timed out with signal %d", pid, action->action, action->last_timeout_signo); } else if (signo) { action->rc = -ECONNABORTED; crm_notice("Child process %d performing action '%s' timed out with signal %d", pid, action->action, signo); } else { action->rc = exitcode; crm_debug("Child process %d performing action '%s' exited with rc %d", pid, action->action, exitcode); } action->output = read_output(action->fd_stdout); if (action->rc != pcmk_ok && update_remaining_timeout(action)) { int rc = internal_stonith_action_execute(action); if (rc == pcmk_ok) { return; } } if (action->done_cb) { action->done_cb(pid, action->rc, action->output, action->userdata); } stonith_action_destroy(action); } static int internal_stonith_action_execute(stonith_action_t * action) { int pid, status, len, rc = -EPROTO; int ret; int total = 0; int p_read_fd, p_write_fd; /* parent read/write file descriptors */ int c_read_fd, c_write_fd; /* child read/write file descriptors */ int fd1[2]; int fd2[2]; int is_retry = 0; /* clear any previous tracking data */ stonith_action_clear_tracking_data(action); if (!action->tries) { action->initial_start_time = time(NULL); } action->tries++; if (action->tries > 1) { crm_info("Attempt %d to execute %s (%s). remaining timeout is %d", action->tries, action->agent, action->action, action->remaining_timeout); is_retry = 1; } c_read_fd = c_write_fd = p_read_fd = p_write_fd = -1; if (action->args == NULL || action->agent == NULL) goto fail; len = strlen(action->args); if (pipe(fd1)) goto fail; p_read_fd = fd1[0]; c_write_fd = fd1[1]; if (pipe(fd2)) goto fail; c_read_fd = fd2[0]; p_write_fd = fd2[1]; crm_debug("forking"); pid = fork(); if (pid < 0) { rc = -ECHILD; goto fail; } if (!pid) { /* child */ close(1); /* coverity[leaked_handle] False positive */ if (dup(c_write_fd) < 0) goto fail; close(2); /* coverity[leaked_handle] False positive */ if (dup(c_write_fd) < 0) goto fail; close(0); /* coverity[leaked_handle] False positive */ if (dup(c_read_fd) < 0) goto fail; /* keep c_write_fd open so parent can report all errors. */ close(c_read_fd); close(p_read_fd); close(p_write_fd); /* keep retries from executing out of control */ if (is_retry) { sleep(1); } execlp(action->agent, action->agent, NULL); exit(EXIT_FAILURE); } /* parent */ action->pid = pid; ret = fcntl(p_read_fd, F_SETFL, fcntl(p_read_fd, F_GETFL, 0) | O_NONBLOCK); if (ret < 0) { crm_perror(LOG_NOTICE, "Could not change the output of %s to be non-blocking", action->agent); } do { crm_debug("sending args"); ret = write(p_write_fd, action->args + total, len - total); if (ret > 0) { total += ret; } } while (errno == EINTR && total < len); if (total != len) { crm_perror(LOG_ERR, "Sent %d not %d bytes", total, len); if (ret >= 0) { rc = -ECOMM; } goto fail; } close(p_write_fd); p_write_fd = -1; /* async */ if (action->async) { action->fd_stdout = p_read_fd; mainloop_child_add(pid, 0/* Move the timeout here? */, action->action, action, stonith_action_async_done); crm_trace("Op: %s on %s, pid: %d, timeout: %ds", action->action, action->agent, pid, action->remaining_timeout); action->last_timeout_signo = 0; if (action->remaining_timeout) { action->timer_sigterm = g_timeout_add(1000 * action->remaining_timeout, st_child_term, action); action->timer_sigkill = g_timeout_add(1000 * (action->remaining_timeout + 5), st_child_kill, action); } else { crm_err("No timeout set for stonith operation %s with device %s", action->action, action->agent); } close(c_write_fd); close(c_read_fd); return 0; } else { /* sync */ int timeout = action->remaining_timeout + 1; pid_t p = 0; while (action->remaining_timeout < 0 || timeout > 0) { p = waitpid(pid, &status, WNOHANG); if (p > 0) { break; } sleep(1); timeout--; } if (timeout == 0) { int killrc = kill(pid, SIGKILL); if (killrc && errno != ESRCH) { crm_err("kill(%d, KILL) failed: %s (%d)", pid, pcmk_strerror(errno), errno); } /* * From sigprocmask(2): * It is not possible to block SIGKILL or SIGSTOP. Attempts to do so are silently ignored. * * This makes it safe to skip WNOHANG here */ p = waitpid(pid, &status, 0); } if (p <= 0) { crm_perror(LOG_ERR, "waitpid(%d)", pid); } else if (p != pid) { crm_err("Waited for %d, got %d", pid, p); } action->output = read_output(p_read_fd); action->rc = -ECONNABORTED; rc = action->rc; if (timeout == 0) { action->rc = -ETIME; } else if (WIFEXITED(status)) { crm_debug("result = %d", WEXITSTATUS(status)); action->rc = -WEXITSTATUS(status); rc = 0; } else if (WIFSIGNALED(status)) { crm_err("call %s for %s exited due to signal %d", action->action, action->agent, WTERMSIG(status)); } else { crm_err("call %s for %s exited abnormally. stopped=%d, continued=%d", action->action, action->agent, WIFSTOPPED(status), WIFCONTINUED(status)); } } fail: if (p_read_fd >= 0) { close(p_read_fd); } if (p_write_fd >= 0) { close(p_write_fd); } if (c_read_fd >= 0) { close(c_read_fd); } if (c_write_fd >= 0) { close(c_write_fd); } return rc; } GPid stonith_action_execute_async(stonith_action_t * action, void *userdata, void (*done) (GPid pid, int rc, const char *output, gpointer user_data)) { int rc = 0; if (!action) { return -1; } action->userdata = userdata; action->done_cb = done; action->async = 1; rc = internal_stonith_action_execute(action); return rc < 0 ? rc : action->pid; } int stonith_action_execute(stonith_action_t * action, int *agent_result, char **output) { int rc = 0; if (!action) { return -1; } do { rc = internal_stonith_action_execute(action); if (rc == pcmk_ok) { /* success! */ break; } /* keep retrying while we have time left */ } while (update_remaining_timeout(action)); if (rc) { /* error */ return rc; } if (agent_result) { *agent_result = action->rc; } if (output) { *output = action->output; action->output = NULL; /* handed it off, do not free */ } stonith_action_destroy(action); return rc; } static int stonith_api_device_list(stonith_t * stonith, int call_options, const char *namespace, stonith_key_value_t ** devices, int timeout) { int count = 0; if (devices == NULL) { crm_err("Parameter error: stonith_api_device_list"); return -EFAULT; } /* Include Heartbeat agents */ if (namespace == NULL || safe_str_eq("heartbeat", namespace)) { #if HAVE_STONITH_STONITH_H static gboolean need_init = TRUE; char **entry = NULL; char **type_list = NULL; static char **(*type_list_fn) (void) = NULL; static void (*type_free_fn) (char **) = NULL; if (need_init) { need_init = FALSE; type_list_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_types", FALSE); type_free_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_free_hostlist", FALSE); } if (type_list_fn) { type_list = (*type_list_fn) (); } for (entry = type_list; entry != NULL && *entry; ++entry) { crm_trace("Added: %s", *entry); *devices = stonith_key_value_add(*devices, NULL, *entry); count++; } if (type_list && type_free_fn) { (*type_free_fn) (type_list); } #else if (namespace != NULL) { return -EINVAL; /* Heartbeat agents not supported */ } #endif } /* Include Red Hat agents, basically: ls -1 @sbin_dir@/fence_* */ if (namespace == NULL || safe_str_eq("redhat", namespace)) { struct dirent **namelist; int file_num = scandir(RH_STONITH_DIR, &namelist, 0, alphasort); if (file_num > 0) { struct stat prop; char buffer[FILENAME_MAX + 1]; while (file_num--) { if ('.' == namelist[file_num]->d_name[0]) { free(namelist[file_num]); continue; } else if (0 != strncmp(RH_STONITH_PREFIX, namelist[file_num]->d_name, strlen(RH_STONITH_PREFIX))) { free(namelist[file_num]); continue; } snprintf(buffer, FILENAME_MAX, "%s/%s", RH_STONITH_DIR, namelist[file_num]->d_name); if (stat(buffer, &prop) == 0 && S_ISREG(prop.st_mode)) { *devices = stonith_key_value_add(*devices, NULL, namelist[file_num]->d_name); count++; } free(namelist[file_num]); } free(namelist); } } return count; } #if HAVE_STONITH_STONITH_H static inline char * strdup_null(const char *val) { if (val) { return strdup(val); } return NULL; } static void stonith_plugin(int priority, const char *fmt, ...) G_GNUC_PRINTF(2, 3); static void stonith_plugin(int priority, const char *fmt, ...) { va_list args; char *str; int err = errno; va_start(args, fmt); str = g_strdup_vprintf(fmt, args); va_end(args); do_crm_log_alias(priority, __FILE__, __func__, __LINE__, "%s", str); g_free(str); errno = err; } #endif static int stonith_api_device_metadata(stonith_t * stonith, int call_options, const char *agent, const char *namespace, char **output, int timeout) { int rc = 0; char *buffer = NULL; const char *provider = get_stonith_provider(agent, namespace); crm_trace("looking up %s/%s metadata", agent, provider); /* By having this in a library, we can access it from stonith_admin * when neither lrmd or stonith-ng are running * Important for the crm shell's validations... */ if (safe_str_eq(provider, "redhat")) { stonith_action_t *action = stonith_action_create(agent, "metadata", NULL, 0, 5, NULL, NULL); int exec_rc = stonith_action_execute(action, &rc, &buffer); if (exec_rc < 0 || rc != 0 || buffer == NULL) { crm_debug("Query failed: %d %d: %s", exec_rc, rc, crm_str(buffer)); free(buffer); /* Just in case */ return -EINVAL; } else { xmlNode *xml = string2xml(buffer); xmlNode *actions = NULL; xmlXPathObject *xpathObj = NULL; xpathObj = xpath_search(xml, "//actions"); if (numXpathResults(xpathObj) > 0) { actions = getXpathResult(xpathObj, 0); } freeXpathObject(xpathObj); /* Now fudge the metadata so that the start/stop actions appear */ xpathObj = xpath_search(xml, "//action[@name='stop']"); if (numXpathResults(xpathObj) <= 0) { xmlNode *tmp = NULL; tmp = create_xml_node(actions, "action"); crm_xml_add(tmp, "name", "stop"); crm_xml_add(tmp, "timeout", "20s"); tmp = create_xml_node(actions, "action"); crm_xml_add(tmp, "name", "start"); crm_xml_add(tmp, "timeout", "20s"); } freeXpathObject(xpathObj); /* Now fudge the metadata so that the port isn't required in the configuration */ xpathObj = xpath_search(xml, "//parameter[@name='port']"); if (numXpathResults(xpathObj) > 0) { /* We'll fill this in */ xmlNode *tmp = getXpathResult(xpathObj, 0); crm_xml_add(tmp, "required", "0"); } freeXpathObject(xpathObj); free(buffer); buffer = dump_xml_formatted(xml); free_xml(xml); if (!buffer) { return -EINVAL; } } } else { #if !HAVE_STONITH_STONITH_H return -EINVAL; /* Heartbeat agents not supported */ #else int bufferlen = 0; static const char *no_parameter_info = ""; Stonith *stonith_obj = NULL; static gboolean need_init = TRUE; static Stonith *(*st_new_fn) (const char *) = NULL; static const char *(*st_info_fn) (Stonith *, int) = NULL; static void (*st_del_fn) (Stonith *) = NULL; static void (*st_log_fn) (Stonith *, PILLogFun) = NULL; if (need_init) { need_init = FALSE; st_new_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_new", FALSE); st_del_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_delete", FALSE); st_log_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_set_log", FALSE); st_info_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_get_info", FALSE); } if (lha_agents_lib && st_new_fn && st_del_fn && st_info_fn && st_log_fn) { char *xml_meta_longdesc = NULL; char *xml_meta_shortdesc = NULL; char *meta_param = NULL; char *meta_longdesc = NULL; char *meta_shortdesc = NULL; stonith_obj = (*st_new_fn) (agent); if (stonith_obj) { (*st_log_fn) (stonith_obj, (PILLogFun) & stonith_plugin); meta_longdesc = strdup_null((*st_info_fn) (stonith_obj, ST_DEVICEDESCR)); if (meta_longdesc == NULL) { crm_warn("no long description in %s's metadata.", agent); meta_longdesc = strdup(no_parameter_info); } meta_shortdesc = strdup_null((*st_info_fn) (stonith_obj, ST_DEVICEID)); if (meta_shortdesc == NULL) { crm_warn("no short description in %s's metadata.", agent); meta_shortdesc = strdup(no_parameter_info); } meta_param = strdup_null((*st_info_fn) (stonith_obj, ST_CONF_XML)); if (meta_param == NULL) { crm_warn("no list of parameters in %s's metadata.", agent); meta_param = strdup(no_parameter_info); } (*st_del_fn) (stonith_obj); } else { return -EINVAL; /* Heartbeat agents not supported */ } xml_meta_longdesc = (char *)xmlEncodeEntitiesReentrant(NULL, (const unsigned char *)meta_longdesc); xml_meta_shortdesc = (char *)xmlEncodeEntitiesReentrant(NULL, (const unsigned char *)meta_shortdesc); bufferlen = strlen(META_TEMPLATE) + strlen(agent) + strlen(xml_meta_longdesc) + strlen(xml_meta_shortdesc) + strlen(meta_param) + 1; buffer = calloc(1, bufferlen); snprintf(buffer, bufferlen - 1, META_TEMPLATE, agent, xml_meta_longdesc, xml_meta_shortdesc, meta_param); xmlFree(xml_meta_longdesc); xmlFree(xml_meta_shortdesc); free(meta_shortdesc); free(meta_longdesc); free(meta_param); } #endif } if (output) { *output = buffer; } else { free(buffer); } return rc; } static int stonith_api_query(stonith_t * stonith, int call_options, const char *target, stonith_key_value_t ** devices, int timeout) { int rc = 0, lpc = 0, max = 0; xmlNode *data = NULL; xmlNode *output = NULL; xmlXPathObjectPtr xpathObj = NULL; CRM_CHECK(devices != NULL, return -EINVAL); data = create_xml_node(NULL, F_STONITH_DEVICE); crm_xml_add(data, "origin", __FUNCTION__); crm_xml_add(data, F_STONITH_TARGET, target); crm_xml_add(data, F_STONITH_ACTION, "off"); rc = stonith_send_command(stonith, STONITH_OP_QUERY, data, &output, call_options, timeout); if (rc < 0) { return rc; } xpathObj = xpath_search(output, "//@agent"); if (xpathObj) { max = numXpathResults(xpathObj); for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); CRM_CHECK(match != NULL, continue); crm_info("%s[%d] = %s", "//@agent", lpc, xmlGetNodePath(match)); *devices = stonith_key_value_add(*devices, NULL, crm_element_value(match, XML_ATTR_ID)); } freeXpathObject(xpathObj); } free_xml(output); free_xml(data); return max; } static int stonith_api_call(stonith_t * stonith, int call_options, const char *id, const char *action, const char *victim, int timeout, xmlNode ** output) { int rc = 0; xmlNode *data = NULL; data = create_xml_node(NULL, F_STONITH_DEVICE); crm_xml_add(data, "origin", __FUNCTION__); crm_xml_add(data, F_STONITH_DEVICE, id); crm_xml_add(data, F_STONITH_ACTION, action); crm_xml_add(data, F_STONITH_TARGET, victim); rc = stonith_send_command(stonith, STONITH_OP_EXEC, data, output, call_options, timeout); free_xml(data); return rc; } static int stonith_api_list(stonith_t * stonith, int call_options, const char *id, char **list_info, int timeout) { int rc; xmlNode *output = NULL; rc = stonith_api_call(stonith, call_options, id, "list", NULL, timeout, &output); if (output && list_info) { const char *list_str; list_str = crm_element_value(output, "st_output"); if (list_str) { *list_info = strdup(list_str); } } if (output) { free_xml(output); } return rc; } static int stonith_api_monitor(stonith_t * stonith, int call_options, const char *id, int timeout) { return stonith_api_call(stonith, call_options, id, "monitor", NULL, timeout, NULL); } static int stonith_api_status(stonith_t * stonith, int call_options, const char *id, const char *port, int timeout) { return stonith_api_call(stonith, call_options, id, "status", port, timeout, NULL); } static int stonith_api_fence(stonith_t * stonith, int call_options, const char *node, const char *action, int timeout, int tolerance) { int rc = 0; xmlNode *data = NULL; data = create_xml_node(NULL, __FUNCTION__); crm_xml_add(data, F_STONITH_TARGET, node); crm_xml_add(data, F_STONITH_ACTION, action); crm_xml_add_int(data, F_STONITH_TIMEOUT, timeout); crm_xml_add_int(data, F_STONITH_TOLERANCE, tolerance); rc = stonith_send_command(stonith, STONITH_OP_FENCE, data, NULL, call_options, timeout); free_xml(data); return rc; } static int stonith_api_confirm(stonith_t * stonith, int call_options, const char *target) { return stonith_api_fence(stonith, call_options | st_opt_manual_ack, target, "off", 0, 0); } static int stonith_api_history(stonith_t * stonith, int call_options, const char *node, stonith_history_t ** history, int timeout) { int rc = 0; xmlNode *data = NULL; xmlNode *output = NULL; stonith_history_t *last = NULL; *history = NULL; if (node) { data = create_xml_node(NULL, __FUNCTION__); crm_xml_add(data, F_STONITH_TARGET, node); } rc = stonith_send_command(stonith, STONITH_OP_FENCE_HISTORY, data, &output, call_options | st_opt_sync_call, timeout); free_xml(data); if (rc == 0) { xmlNode *op = NULL; xmlNode *reply = get_xpath_object("//" F_STONITH_HISTORY_LIST, output, LOG_ERR); for (op = __xml_first_child(reply); op != NULL; op = __xml_next(op)) { stonith_history_t *kvp; kvp = calloc(1, sizeof(stonith_history_t)); kvp->target = crm_element_value_copy(op, F_STONITH_TARGET); kvp->action = crm_element_value_copy(op, F_STONITH_ACTION); kvp->origin = crm_element_value_copy(op, F_STONITH_ORIGIN); kvp->delegate = crm_element_value_copy(op, F_STONITH_DELEGATE); kvp->client = crm_element_value_copy(op, F_STONITH_CLIENTNAME); crm_element_value_int(op, F_STONITH_DATE, &kvp->completed); crm_element_value_int(op, F_STONITH_STATE, &kvp->state); if (last) { last->next = kvp; } else { *history = kvp; } last = kvp; } } return rc; } gboolean is_redhat_agent(const char *agent) { int rc = 0; struct stat prop; char buffer[FILENAME_MAX + 1]; snprintf(buffer, FILENAME_MAX, "%s/%s", RH_STONITH_DIR, agent); rc = stat(buffer, &prop); if (rc >= 0 && S_ISREG(prop.st_mode)) { return TRUE; } return FALSE; } const char * get_stonith_provider(const char *agent, const char *provider) { /* This function sucks */ if (is_redhat_agent(agent)) { return "redhat"; #if HAVE_STONITH_STONITH_H } else { Stonith *stonith_obj = NULL; static gboolean need_init = TRUE; static Stonith *(*st_new_fn) (const char *) = NULL; static void (*st_del_fn) (Stonith *) = NULL; if (need_init) { need_init = FALSE; st_new_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_new", FALSE); st_del_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_delete", FALSE); } if (lha_agents_lib && st_new_fn && st_del_fn) { stonith_obj = (*st_new_fn) (agent); if (stonith_obj) { (*st_del_fn) (stonith_obj); return "heartbeat"; } } #endif } crm_err("No such device: %s", agent); return NULL; } static gint stonithlib_GCompareFunc(gconstpointer a, gconstpointer b) { int rc = 0; const stonith_notify_client_t *a_client = a; const stonith_notify_client_t *b_client = b; CRM_CHECK(a_client->event != NULL && b_client->event != NULL, return 0); rc = strcmp(a_client->event, b_client->event); if (rc == 0) { if (a_client->notify == NULL || b_client->notify == NULL) { return 0; } else if (a_client->notify == b_client->notify) { return 0; } else if (((long)a_client->notify) < ((long)b_client->notify)) { crm_err("callbacks for %s are not equal: %p vs. %p", a_client->event, a_client->notify, b_client->notify); return -1; } crm_err("callbacks for %s are not equal: %p vs. %p", a_client->event, a_client->notify, b_client->notify); return 1; } return rc; } xmlNode * stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data, int call_options) { xmlNode *op_msg = create_xml_node(NULL, "stonith_command"); CRM_CHECK(op_msg != NULL, return NULL); CRM_CHECK(token != NULL, return NULL); crm_xml_add(op_msg, F_XML_TAGNAME, "stonith_command"); crm_xml_add(op_msg, F_TYPE, T_STONITH_NG); crm_xml_add(op_msg, F_STONITH_CALLBACK_TOKEN, token); crm_xml_add(op_msg, F_STONITH_OPERATION, op); crm_xml_add_int(op_msg, F_STONITH_CALLID, call_id); crm_trace("Sending call options: %.8lx, %d", (long)call_options, call_options); crm_xml_add_int(op_msg, F_STONITH_CALLOPTS, call_options); if (data != NULL) { add_message_xml(op_msg, F_STONITH_CALLDATA, data); } return op_msg; } static void stonith_destroy_op_callback(gpointer data) { stonith_callback_client_t *blob = data; if (blob->timer && blob->timer->ref > 0) { g_source_remove(blob->timer->ref); } free(blob->timer); free(blob); } static int stonith_api_signoff(stonith_t * stonith) { stonith_private_t *native = stonith->private; crm_debug("Signing out of the STONITH Service"); if (native->source != NULL) { /* Attached to mainloop */ mainloop_del_ipc_client(native->source); native->source = NULL; native->ipc = NULL; } else if (native->ipc) { /* Not attached to mainloop */ crm_ipc_t *ipc = native->ipc; native->ipc = NULL; crm_ipc_close(ipc); crm_ipc_destroy(ipc); } + free(native->token); native->token = NULL; stonith->state = stonith_disconnected; return pcmk_ok; } static int stonith_api_signon(stonith_t * stonith, const char *name, int *stonith_fd) { int rc = pcmk_ok; stonith_private_t *native = stonith->private; static struct ipc_client_callbacks st_callbacks = { .dispatch = stonith_dispatch_internal, .destroy = stonith_connection_destroy }; crm_trace("Connecting command channel"); stonith->state = stonith_connected_command; if (stonith_fd) { /* No mainloop */ native->ipc = crm_ipc_new("stonith-ng", 0); if (native->ipc && crm_ipc_connect(native->ipc)) { *stonith_fd = crm_ipc_get_fd(native->ipc); } else if (native->ipc) { rc = -ENOTCONN; } } else { /* With mainloop */ native->source = mainloop_add_ipc_client("stonith-ng", G_PRIORITY_MEDIUM, 0, stonith, &st_callbacks); native->ipc = mainloop_get_ipc_client(native->source); } if (native->ipc == NULL) { crm_debug("Could not connect to the Stonith API"); rc = -ENOTCONN; } if (rc == pcmk_ok) { xmlNode *reply = NULL; xmlNode *hello = create_xml_node(NULL, "stonith_command"); crm_xml_add(hello, F_TYPE, T_STONITH_NG); crm_xml_add(hello, F_STONITH_OPERATION, CRM_OP_REGISTER); crm_xml_add(hello, F_STONITH_CLIENTNAME, name); rc = crm_ipc_send(native->ipc, hello, crm_ipc_client_response, -1, &reply); if (rc < 0) { crm_perror(LOG_DEBUG, "Couldn't complete registration with the fencing API: %d", rc); rc = -ECOMM; } else if (reply == NULL) { crm_err("Did not receive registration reply"); rc = -EPROTO; } else { const char *msg_type = crm_element_value(reply, F_STONITH_OPERATION); const char *tmp_ticket = crm_element_value(reply, F_STONITH_CLIENTID); if (safe_str_neq(msg_type, CRM_OP_REGISTER)) { crm_err("Invalid registration message: %s", msg_type); crm_log_xml_err(reply, "Bad reply"); rc = -EPROTO; } else if (tmp_ticket == NULL) { crm_err("No registration token provided"); crm_log_xml_err(reply, "Bad reply"); rc = -EPROTO; } else { crm_trace("Obtained registration token: %s", tmp_ticket); native->token = strdup(tmp_ticket); rc = pcmk_ok; } } free_xml(reply); free_xml(hello); } if (rc == pcmk_ok) { #if HAVE_MSGFROMIPC_TIMEOUT stonith->call_timeout = MAX_IPC_DELAY; #endif crm_debug("Connection to STONITH successful"); return pcmk_ok; } crm_debug("Connection to STONITH failed: %s", pcmk_strerror(rc)); stonith->cmds->disconnect(stonith); return rc; } static int stonith_set_notification(stonith_t * stonith, const char *callback, int enabled) { xmlNode *notify_msg = create_xml_node(NULL, __FUNCTION__); stonith_private_t *native = stonith->private; if (stonith->state != stonith_disconnected) { int rc; crm_xml_add(notify_msg, F_STONITH_OPERATION, T_STONITH_NOTIFY); if (enabled) { crm_xml_add(notify_msg, F_STONITH_NOTIFY_ACTIVATE, callback); } else { crm_xml_add(notify_msg, F_STONITH_NOTIFY_DEACTIVATE, callback); } rc = crm_ipc_send(native->ipc, notify_msg, crm_ipc_client_response, -1, NULL); if (rc < 0) { crm_perror(LOG_DEBUG, "Couldn't register for fencing notifications: %d", rc); rc = -ECOMM; } } free_xml(notify_msg); return pcmk_ok; } static int stonith_api_add_notification(stonith_t * stonith, const char *event, void (*callback) (stonith_t * stonith, stonith_event_t * e)) { GList *list_item = NULL; stonith_notify_client_t *new_client = NULL; stonith_private_t *private = NULL; private = stonith->private; crm_trace("Adding callback for %s events (%d)", event, g_list_length(private->notify_list)); new_client = calloc(1, sizeof(stonith_notify_client_t)); new_client->event = event; new_client->notify = callback; list_item = g_list_find_custom(private->notify_list, new_client, stonithlib_GCompareFunc); if (list_item != NULL) { crm_warn("Callback already present"); free(new_client); return -ENOTUNIQ; } else { private->notify_list = g_list_append(private->notify_list, new_client); stonith_set_notification(stonith, event, 1); crm_trace("Callback added (%d)", g_list_length(private->notify_list)); } return pcmk_ok; } static int stonith_api_del_notification(stonith_t * stonith, const char *event) { GList *list_item = NULL; stonith_notify_client_t *new_client = NULL; stonith_private_t *private = NULL; crm_debug("Removing callback for %s events", event); private = stonith->private; new_client = calloc(1, sizeof(stonith_notify_client_t)); new_client->event = event; new_client->notify = NULL; list_item = g_list_find_custom(private->notify_list, new_client, stonithlib_GCompareFunc); stonith_set_notification(stonith, event, 0); if (list_item != NULL) { stonith_notify_client_t *list_client = list_item->data; private->notify_list = g_list_remove(private->notify_list, list_client); free(list_client); crm_trace("Removed callback"); } else { crm_trace("Callback not present"); } free(new_client); return pcmk_ok; } static gboolean stonith_async_timeout_handler(gpointer data) { struct timer_rec_s *timer = data; crm_err("Async call %d timed out after %dms", timer->call_id, timer->timeout); stonith_perform_callback(timer->stonith, NULL, timer->call_id, -ETIME); /* Always return TRUE, never remove the handler * We do that in stonith_del_callback() */ return TRUE; } static void set_callback_timeout(stonith_callback_client_t * callback, stonith_t * stonith, int call_id, int timeout) { struct timer_rec_s *async_timer = callback->timer; if (timeout <= 0) { return; } if (!async_timer) { async_timer = calloc(1, sizeof(struct timer_rec_s)); callback->timer = async_timer; } async_timer->stonith = stonith; async_timer->call_id = call_id; /* Allow a fair bit of grace to allow the server to tell us of a timeout * This is only a fallback */ async_timer->timeout = (timeout + 60) * 1000; if (async_timer->ref) { g_source_remove(async_timer->ref); } async_timer->ref = g_timeout_add(async_timer->timeout, stonith_async_timeout_handler, async_timer); } static void update_callback_timeout(int call_id, int timeout, stonith_t * st) { stonith_callback_client_t *callback = NULL; stonith_private_t *private = st->private; callback = g_hash_table_lookup(private->stonith_op_callback_table, GINT_TO_POINTER(call_id)); if (!callback || !callback->allow_timeout_updates) { return; } set_callback_timeout(callback, st, call_id, timeout); } static void invoke_callback(stonith_t * st, int call_id, int rc, void *userdata, void (*callback) (stonith_t * st, stonith_callback_data_t * data)) { stonith_callback_data_t data = { 0, }; data.call_id = call_id; data.rc = rc; data.userdata = userdata; callback(st, &data); } static int stonith_api_add_callback(stonith_t * stonith, int call_id, int timeout, int options, void *user_data, const char *callback_name, void (*callback) (stonith_t * st, stonith_callback_data_t * data)) { stonith_callback_client_t *blob = NULL; stonith_private_t *private = NULL; CRM_CHECK(stonith != NULL, return -EINVAL); CRM_CHECK(stonith->private != NULL, return -EINVAL); private = stonith->private; if (call_id == 0) { private->op_callback = callback; } else if (call_id < 0) { if (!(options & st_opt_report_only_success)) { crm_trace("Call failed, calling %s: %s", callback_name, pcmk_strerror(call_id)); invoke_callback(stonith, call_id, call_id, user_data, callback); } else { crm_warn("STONITH call failed: %s", pcmk_strerror(call_id)); } return FALSE; } blob = calloc(1, sizeof(stonith_callback_client_t)); blob->id = callback_name; blob->only_success = (options & st_opt_report_only_success) ? TRUE : FALSE; blob->user_data = user_data; blob->callback = callback; blob->allow_timeout_updates = (options & st_opt_timeout_updates) ? TRUE : FALSE; if (timeout > 0) { set_callback_timeout(blob, stonith, call_id, timeout); } g_hash_table_insert(private->stonith_op_callback_table, GINT_TO_POINTER(call_id), blob); crm_trace("Added callback to %s for call %d", callback_name, call_id); return TRUE; } static int stonith_api_del_callback(stonith_t * stonith, int call_id, bool all_callbacks) { stonith_private_t *private = stonith->private; if (all_callbacks) { private->op_callback = NULL; g_hash_table_destroy(private->stonith_op_callback_table); private->stonith_op_callback_table = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, stonith_destroy_op_callback); } else if (call_id == 0) { private->op_callback = NULL; } else { g_hash_table_remove(private->stonith_op_callback_table, GINT_TO_POINTER(call_id)); } return pcmk_ok; } static void stonith_dump_pending_op(gpointer key, gpointer value, gpointer user_data) { int call = GPOINTER_TO_INT(key); stonith_callback_client_t *blob = value; crm_debug("Call %d (%s): pending", call, crm_str(blob->id)); } void stonith_dump_pending_callbacks(stonith_t * stonith) { stonith_private_t *private = stonith->private; if (private->stonith_op_callback_table == NULL) { return; } return g_hash_table_foreach(private->stonith_op_callback_table, stonith_dump_pending_op, NULL); } void stonith_perform_callback(stonith_t * stonith, xmlNode * msg, int call_id, int rc) { stonith_private_t *private = NULL; stonith_callback_client_t *blob = NULL; stonith_callback_client_t local_blob; CRM_CHECK(stonith != NULL, return); CRM_CHECK(stonith->private != NULL, return); private = stonith->private; local_blob.id = NULL; local_blob.callback = NULL; local_blob.user_data = NULL; local_blob.only_success = FALSE; if (msg != NULL) { crm_element_value_int(msg, F_STONITH_RC, &rc); crm_element_value_int(msg, F_STONITH_CALLID, &call_id); } CRM_CHECK(call_id > 0, crm_log_xml_err(msg, "Bad result")); blob = g_hash_table_lookup(private->stonith_op_callback_table, GINT_TO_POINTER(call_id)); if (blob != NULL) { local_blob = *blob; blob = NULL; stonith_api_del_callback(stonith, call_id, FALSE); } else { crm_trace("No callback found for call %d", call_id); local_blob.callback = NULL; } if (local_blob.callback != NULL && (rc == pcmk_ok || local_blob.only_success == FALSE)) { crm_trace("Invoking callback %s for call %d", crm_str(local_blob.id), call_id); invoke_callback(stonith, call_id, rc, local_blob.user_data, local_blob.callback); } else if (private->op_callback == NULL && rc != pcmk_ok) { crm_warn("STONITH command failed: %s", pcmk_strerror(rc)); crm_log_xml_debug(msg, "Failed STONITH Update"); } if (private->op_callback != NULL) { crm_trace("Invoking global callback for call %d", call_id); invoke_callback(stonith, call_id, rc, NULL, private->op_callback); } crm_trace("OP callback activated."); } /* */ static stonith_event_t * xml_to_event(xmlNode * msg) { stonith_event_t *event = calloc(1, sizeof(stonith_event_t)); const char *ntype = crm_element_value(msg, F_SUBTYPE); char *data_addr = g_strdup_printf("//%s", ntype); xmlNode *data = get_xpath_object(data_addr, msg, LOG_DEBUG); crm_log_xml_trace(msg, "stonith_notify"); crm_element_value_int(msg, F_STONITH_RC, &(event->result)); if (safe_str_eq(ntype, T_STONITH_NOTIFY_FENCE)) { event->operation = crm_element_value_copy(msg, F_STONITH_OPERATION); if (data) { event->origin = crm_element_value_copy(data, F_STONITH_ORIGIN); event->action = crm_element_value_copy(data, F_STONITH_ACTION); event->target = crm_element_value_copy(data, F_STONITH_TARGET); event->executioner = crm_element_value_copy(data, F_STONITH_DELEGATE); event->id = crm_element_value_copy(data, F_STONITH_REMOTE_OP_ID); event->client_origin = crm_element_value_copy(data, F_STONITH_CLIENTNAME); } else { crm_err("No data for %s event", ntype); crm_log_xml_notice(msg, "BadEvent"); } } g_free(data_addr); return event; } static void event_free(stonith_event_t * event) { free(event->id); free(event->type); free(event->message); free(event->operation); free(event->origin); free(event->action); free(event->target); free(event->executioner); free(event->device); free(event->client_origin); free(event); } static void stonith_send_notification(gpointer data, gpointer user_data) { struct notify_blob_s *blob = user_data; stonith_notify_client_t *entry = data; stonith_event_t *st_event = NULL; const char *event = NULL; if (blob->xml == NULL) { crm_warn("Skipping callback - NULL message"); return; } event = crm_element_value(blob->xml, F_SUBTYPE); if (entry == NULL) { crm_warn("Skipping callback - NULL callback client"); return; } else if (entry->notify == NULL) { crm_warn("Skipping callback - NULL callback"); return; } else if (safe_str_neq(entry->event, event)) { crm_trace("Skipping callback - event mismatch %p/%s vs. %s", entry, entry->event, event); return; } st_event = xml_to_event(blob->xml); crm_trace("Invoking callback for %p/%s event...", entry, event); entry->notify(blob->stonith, st_event); crm_trace("Callback invoked..."); event_free(st_event); } int stonith_send_command(stonith_t * stonith, const char *op, xmlNode * data, xmlNode ** output_data, int call_options, int timeout) { int rc = 0; int reply_id = -1; enum crm_ipc_flags ipc_flags = crm_ipc_client_none; xmlNode *op_msg = NULL; xmlNode *op_reply = NULL; stonith_private_t *native = stonith->private; if (stonith->state == stonith_disconnected) { return -ENOTCONN; } if (output_data != NULL) { *output_data = NULL; } if (op == NULL) { crm_err("No operation specified"); return -EINVAL; } if (call_options & st_opt_sync_call) { ipc_flags |= crm_ipc_client_response; } stonith->call_id++; /* prevent call_id from being negative (or zero) and conflicting * with the stonith_errors enum * use 2 because we use it as (stonith->call_id - 1) below */ if (stonith->call_id < 1) { stonith->call_id = 1; } CRM_CHECK(native->token != NULL,; ); op_msg = stonith_create_op(stonith->call_id, native->token, op, data, call_options); if (op_msg == NULL) { return -EINVAL; } crm_xml_add_int(op_msg, F_STONITH_TIMEOUT, timeout); crm_trace("Sending %s message to STONITH service, Timeout: %ds", op, timeout); rc = crm_ipc_send(native->ipc, op_msg, ipc_flags, 1000 * (timeout + 60), &op_reply); free_xml(op_msg); if (rc < 0) { crm_perror(LOG_ERR, "Couldn't perform %s operation (timeout=%ds): %d", op, timeout, rc); rc = -ECOMM; goto done; } crm_log_xml_trace(op_reply, "Reply"); if (!(call_options & st_opt_sync_call)) { crm_trace("Async call %d, returning", stonith->call_id); CRM_CHECK(stonith->call_id != 0, return -EPROTO); free_xml(op_reply); return stonith->call_id; } rc = pcmk_ok; crm_element_value_int(op_reply, F_STONITH_CALLID, &reply_id); if (reply_id == stonith->call_id) { crm_trace("Syncronous reply %d received", reply_id); if (crm_element_value_int(op_reply, F_STONITH_RC, &rc) != 0) { rc = -ENOMSG; } if ((call_options & st_opt_discard_reply) || output_data == NULL) { crm_trace("Discarding reply"); } else { *output_data = op_reply; op_reply = NULL; /* Prevent subsequent free */ } } else if (reply_id <= 0) { crm_err("Recieved bad reply: No id set"); crm_log_xml_err(op_reply, "Bad reply"); free_xml(op_reply); rc = -ENOMSG; } else { crm_err("Recieved bad reply: %d (wanted %d)", reply_id, stonith->call_id); crm_log_xml_err(op_reply, "Old reply"); free_xml(op_reply); rc = -ENOMSG; } done: if (crm_ipc_connected(native->ipc) == FALSE) { crm_err("STONITH disconnected"); stonith->state = stonith_disconnected; } free_xml(op_reply); return rc; } /* Not used with mainloop */ bool stonith_dispatch(stonith_t * st) { gboolean stay_connected = TRUE; stonith_private_t *private = NULL; CRM_ASSERT(st != NULL); private = st->private; while (crm_ipc_ready(private->ipc)) { if (crm_ipc_read(private->ipc) > 0) { const char *msg = crm_ipc_buffer(private->ipc); stonith_dispatch_internal(msg, strlen(msg), st); } if (crm_ipc_connected(private->ipc) == FALSE) { crm_err("Connection closed"); stay_connected = FALSE; } } return stay_connected; } int stonith_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata) { const char *type = NULL; struct notify_blob_s blob; stonith_t *st = userdata; stonith_private_t *private = NULL; CRM_ASSERT(st != NULL); private = st->private; blob.stonith = st; blob.xml = string2xml(buffer); if (blob.xml == NULL) { crm_warn("Received a NULL msg from STONITH service: %s.", buffer); return 0; } /* do callbacks */ type = crm_element_value(blob.xml, F_TYPE); crm_trace("Activating %s callbacks...", type); if (safe_str_eq(type, T_STONITH_NG)) { stonith_perform_callback(st, blob.xml, 0, 0); } else if (safe_str_eq(type, T_STONITH_NOTIFY)) { g_list_foreach(private->notify_list, stonith_send_notification, &blob); } else if (safe_str_eq(type, T_STONITH_TIMEOUT_VALUE)) { int call_id = 0; int timeout = 0; crm_element_value_int(blob.xml, F_STONITH_TIMEOUT, &timeout); crm_element_value_int(blob.xml, F_STONITH_CALLID, &call_id); update_callback_timeout(call_id, timeout, st); } else { crm_err("Unknown message type: %s", type); crm_log_xml_warn(blob.xml, "BadReply"); } free_xml(blob.xml); return 1; } static int stonith_api_free(stonith_t * stonith) { int rc = pcmk_ok; if (stonith->state != stonith_disconnected) { rc = stonith->cmds->disconnect(stonith); } if (stonith->state == stonith_disconnected) { stonith_private_t *private = stonith->private; g_hash_table_destroy(private->stonith_op_callback_table); - free(private->token); free(stonith->private); free(stonith->cmds); free(stonith); + + } else { + crm_err("Not free'ing active connection"); } return rc; } void stonith_api_delete(stonith_t * stonith) { stonith_private_t *private = stonith->private; GList *list = private->notify_list; while (list != NULL) { stonith_notify_client_t *client = g_list_nth_data(list, 0); list = g_list_remove(list, client); free(client); } stonith->cmds->free(stonith); stonith = NULL; } stonith_t * stonith_api_new(void) { stonith_t *new_stonith = NULL; stonith_private_t *private = NULL; new_stonith = calloc(1, sizeof(stonith_t)); private = calloc(1, sizeof(stonith_private_t)); new_stonith->private = private; private->stonith_op_callback_table = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, stonith_destroy_op_callback); private->notify_list = NULL; new_stonith->call_id = 1; new_stonith->state = stonith_disconnected; new_stonith->cmds = calloc(1, sizeof(stonith_api_operations_t)); /* *INDENT-OFF* */ new_stonith->cmds->free = stonith_api_free; new_stonith->cmds->connect = stonith_api_signon; new_stonith->cmds->disconnect = stonith_api_signoff; new_stonith->cmds->list = stonith_api_list; new_stonith->cmds->monitor = stonith_api_monitor; new_stonith->cmds->status = stonith_api_status; new_stonith->cmds->fence = stonith_api_fence; new_stonith->cmds->confirm = stonith_api_confirm; new_stonith->cmds->history = stonith_api_history; new_stonith->cmds->list_agents = stonith_api_device_list; new_stonith->cmds->metadata = stonith_api_device_metadata; new_stonith->cmds->query = stonith_api_query; new_stonith->cmds->remove_device = stonith_api_remove_device; new_stonith->cmds->register_device = stonith_api_register_device; new_stonith->cmds->remove_level = stonith_api_remove_level; new_stonith->cmds->register_level = stonith_api_register_level; new_stonith->cmds->remove_callback = stonith_api_del_callback; new_stonith->cmds->register_callback = stonith_api_add_callback; new_stonith->cmds->remove_notification = stonith_api_del_notification; new_stonith->cmds->register_notification = stonith_api_add_notification; /* *INDENT-ON* */ return new_stonith; } stonith_key_value_t * stonith_key_value_add(stonith_key_value_t * head, const char *key, const char *value) { stonith_key_value_t *p, *end; p = calloc(1, sizeof(stonith_key_value_t)); if (key) { p->key = strdup(key); } if (value) { p->value = strdup(value); } end = head; while (end && end->next) { end = end->next; } if (end) { end->next = p; } else { head = p; } return head; } void stonith_key_value_freeall(stonith_key_value_t * head, int keys, int values) { stonith_key_value_t *p; while (head) { p = head->next; if (keys) { free(head->key); } if (values) { free(head->value); } free(head); head = p; } } int stonith_api_kick(int nodeid, const char *uname, int timeout, bool off) { char *name = NULL; const char *action = "reboot"; int rc = -EPROTO; stonith_t *st = NULL; enum stonith_call_options opts = st_opt_sync_call | st_opt_allow_suicide; st = stonith_api_new(); if (st) { rc = st->cmds->connect(st, "stonith-api", NULL); } if (uname != NULL) { name = strdup(uname); } else if (nodeid > 0) { opts |= st_opt_cs_nodeid; name = crm_itoa(nodeid); } if (off) { action = "off"; } if (rc == pcmk_ok) { rc = st->cmds->fence(st, opts, name, action, timeout, 0); } if (st) { st->cmds->disconnect(st); stonith_api_delete(st); } free(name); return rc; } time_t stonith_api_time(int nodeid, const char *uname, bool in_progress) { int rc = 0; char *name = NULL; time_t when = 0; time_t progress = 0; stonith_t *st = NULL; stonith_history_t *history, *hp = NULL; enum stonith_call_options opts = st_opt_sync_call; st = stonith_api_new(); if (st) { rc = st->cmds->connect(st, "stonith-api", NULL); } if (uname != NULL) { name = strdup(uname); } else if (nodeid > 0) { opts |= st_opt_cs_nodeid; name = crm_itoa(nodeid); } if (st && rc == pcmk_ok) { st->cmds->history(st, st_opt_sync_call | st_opt_cs_nodeid, name, &history, 120); for (hp = history; hp; hp = hp->next) { if (in_progress) { if (hp->state != st_done && hp->state != st_failed) { progress = time(NULL); } } else if (hp->state == st_done) { when = hp->completed; } } } if (progress) { when = progress; } if (st) { st->cmds->disconnect(st); stonith_api_delete(st); } free(name); return when; } #if HAVE_STONITH_STONITH_H # include const char *i_hate_pils(int rc); const char * i_hate_pils(int rc) { return PIL_strerror(rc); } #endif diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c index c2ace1483b..d2f5d37aee 100644 --- a/lib/lrmd/lrmd_client.c +++ b/lib/lrmd/lrmd_client.c @@ -1,1991 +1,1997 @@ /* * Copyright (c) 2012 David Vossel * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_GNUTLS_GNUTLS_H # undef KEYFILE # include #endif #include #include #include #include CRM_TRACE_INIT_DATA(lrmd); static stonith_t *stonith_api = NULL; static int lrmd_api_disconnect(lrmd_t * lrmd); static int lrmd_api_is_connected(lrmd_t * lrmd); /* IPC proxy functions */ int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg); static void lrmd_internal_proxy_dispatch(lrmd_t *lrmd, xmlNode *msg); void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg)); #ifdef HAVE_GNUTLS_GNUTLS_H # define LRMD_CLIENT_HANDSHAKE_TIMEOUT 5000 /* 5 seconds */ gnutls_psk_client_credentials_t psk_cred_s; int lrmd_tls_set_key(gnutls_datum_t * key, const char *location); static void lrmd_tls_disconnect(lrmd_t * lrmd); static int global_remote_msg_id = 0; int lrmd_tls_send_msg(crm_remote_t * session, xmlNode * msg, uint32_t id, const char *msg_type); static void lrmd_tls_connection_destroy(gpointer userdata); #endif typedef struct lrmd_private_s { enum client_type type; char *token; mainloop_io_t *source; /* IPC parameters */ crm_ipc_t *ipc; crm_remote_t *remote; /* Extra TLS parameters */ char *remote_nodename; #ifdef HAVE_GNUTLS_GNUTLS_H char *server; int port; gnutls_psk_client_credentials_t psk_cred_c; int sock; GList *pending_notify; crm_trigger_t *process_notify; #endif lrmd_event_callback callback; /* Internal IPC proxy msg passing for remote guests */ void (*proxy_callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg); void *proxy_callback_userdata; } lrmd_private_t; static lrmd_list_t * lrmd_list_add(lrmd_list_t * head, const char *value) { lrmd_list_t *p, *end; p = calloc(1, sizeof(lrmd_list_t)); p->val = strdup(value); end = head; while (end && end->next) { end = end->next; } if (end) { end->next = p; } else { head = p; } return head; } void lrmd_list_freeall(lrmd_list_t * head) { lrmd_list_t *p; while (head) { char *val = (char *)head->val; p = head->next; free(val); free(head); head = p; } } lrmd_key_value_t * lrmd_key_value_add(lrmd_key_value_t * head, const char *key, const char *value) { lrmd_key_value_t *p, *end; p = calloc(1, sizeof(lrmd_key_value_t)); p->key = strdup(key); p->value = strdup(value); end = head; while (end && end->next) { end = end->next; } if (end) { end->next = p; } else { head = p; } return head; } void lrmd_key_value_freeall(lrmd_key_value_t * head) { lrmd_key_value_t *p; while (head) { p = head->next; free(head->key); free(head->value); free(head); head = p; } } static void dup_attr(gpointer key, gpointer value, gpointer user_data) { g_hash_table_replace(user_data, strdup(key), strdup(value)); } lrmd_event_data_t * lrmd_copy_event(lrmd_event_data_t * event) { lrmd_event_data_t *copy = NULL; copy = calloc(1, sizeof(lrmd_event_data_t)); /* This will get all the int values. * we just have to be careful not to leave any * dangling pointers to strings. */ memcpy(copy, event, sizeof(lrmd_event_data_t)); copy->rsc_id = event->rsc_id ? strdup(event->rsc_id) : NULL; copy->op_type = event->op_type ? strdup(event->op_type) : NULL; copy->user_data = event->user_data ? strdup(event->user_data) : NULL; copy->output = event->output ? strdup(event->output) : NULL; copy->remote_nodename = event->remote_nodename ? strdup(event->remote_nodename) : NULL; if (event->params) { copy->params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if (copy->params != NULL) { g_hash_table_foreach(event->params, dup_attr, copy->params); } } return copy; } void lrmd_free_event(lrmd_event_data_t * event) { if (!event) { return; } /* free gives me grief if i try to cast */ free((char *)event->rsc_id); free((char *)event->op_type); free((char *)event->user_data); free((char *)event->output); free((char *)event->remote_nodename); if (event->params) { g_hash_table_destroy(event->params); } free(event); } static int lrmd_dispatch_internal(lrmd_t * lrmd, xmlNode * msg) { const char *type; const char *proxy_session = crm_element_value(msg, F_LRMD_IPC_SESSION); lrmd_private_t *native = lrmd->private; lrmd_event_data_t event = { 0, }; if (proxy_session != NULL) { /* this is proxy business */ lrmd_internal_proxy_dispatch(lrmd, msg); return 1; } if (!native->callback) { /* no callback set */ crm_trace("notify event received but client has not set callback"); return 1; } event.remote_nodename = native->remote_nodename; type = crm_element_value(msg, F_LRMD_OPERATION); crm_element_value_int(msg, F_LRMD_CALLID, &event.call_id); event.rsc_id = crm_element_value(msg, F_LRMD_RSC_ID); if (crm_str_eq(type, LRMD_OP_RSC_REG, TRUE)) { event.type = lrmd_event_register; } else if (crm_str_eq(type, LRMD_OP_RSC_UNREG, TRUE)) { event.type = lrmd_event_unregister; } else if (crm_str_eq(type, LRMD_OP_RSC_EXEC, TRUE)) { crm_element_value_int(msg, F_LRMD_TIMEOUT, &event.timeout); crm_element_value_int(msg, F_LRMD_RSC_INTERVAL, &event.interval); crm_element_value_int(msg, F_LRMD_RSC_START_DELAY, &event.start_delay); crm_element_value_int(msg, F_LRMD_EXEC_RC, (int *)&event.rc); crm_element_value_int(msg, F_LRMD_OP_STATUS, &event.op_status); crm_element_value_int(msg, F_LRMD_RSC_DELETED, &event.rsc_deleted); crm_element_value_int(msg, F_LRMD_RSC_RUN_TIME, (int *)&event.t_run); crm_element_value_int(msg, F_LRMD_RSC_RCCHANGE_TIME, (int *)&event.t_rcchange); crm_element_value_int(msg, F_LRMD_RSC_EXEC_TIME, (int *)&event.exec_time); crm_element_value_int(msg, F_LRMD_RSC_QUEUE_TIME, (int *)&event.queue_time); event.op_type = crm_element_value(msg, F_LRMD_RSC_ACTION); event.user_data = crm_element_value(msg, F_LRMD_RSC_USERDATA_STR); event.output = crm_element_value(msg, F_LRMD_RSC_OUTPUT); event.type = lrmd_event_exec_complete; event.params = xml2list(msg); } else if (crm_str_eq(type, LRMD_OP_POKE, TRUE)) { event.type = lrmd_event_poke; } else { return 1; } crm_trace("op %s notify event received", type); native->callback(&event); if (event.params) { g_hash_table_destroy(event.params); } return 1; } static int lrmd_ipc_dispatch(const char *buffer, ssize_t length, gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->private; xmlNode *msg; int rc; if (!native->callback) { /* no callback set */ return 1; } msg = string2xml(buffer); rc = lrmd_dispatch_internal(lrmd, msg); free_xml(msg); return rc; } #ifdef HAVE_GNUTLS_GNUTLS_H static void lrmd_free_xml(gpointer userdata) { free_xml((xmlNode *) userdata); } static int lrmd_tls_connected(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->private; if (native->remote->tls_session) { return TRUE; } return FALSE; } static int lrmd_tls_dispatch(gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->private; xmlNode *xml = NULL; int rc = 0; int disconnected = 0; if (lrmd_tls_connected(lrmd) == FALSE) { crm_trace("tls dispatch triggered after disconnect"); return 0; } crm_trace("tls_dispatch triggered"); /* First check if there are any pending notifies to process that came * while we were waiting for replies earlier. */ if (native->pending_notify) { GList *iter = NULL; crm_trace("Processing pending notifies"); for (iter = native->pending_notify; iter; iter = iter->next) { lrmd_dispatch_internal(lrmd, iter->data); } g_list_free_full(native->pending_notify, lrmd_free_xml); native->pending_notify = NULL; } /* Next read the current buffer and see if there are any messages to handle. */ rc = crm_remote_ready(native->remote, 0); if (rc == 0) { /* nothing to read, see if any full messages are already in buffer. */ xml = crm_remote_parse_buffer(native->remote); } else if (rc < 0) { disconnected = 1; } else { crm_remote_recv(native->remote, -1, &disconnected); xml = crm_remote_parse_buffer(native->remote); } while (xml) { lrmd_dispatch_internal(lrmd, xml); free_xml(xml); xml = crm_remote_parse_buffer(native->remote); } if (disconnected) { crm_info("Server disconnected while reading remote server msg."); lrmd_tls_disconnect(lrmd); return 0; } return 1; } #endif /* Not used with mainloop */ int lrmd_poll(lrmd_t * lrmd, int timeout) { lrmd_private_t *native = lrmd->private; switch (native->type) { case CRM_CLIENT_IPC: return crm_ipc_ready(native->ipc); #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: if (native->pending_notify) { return 1; } else if (native->remote->buffer && strstr(native->remote->buffer, REMOTE_MSG_TERMINATOR)) { return 1; } return crm_remote_ready(native->remote, 0); #endif default: crm_err("Unsupported connection type: %d", native->type); } return 0; } /* Not used with mainloop */ bool lrmd_dispatch(lrmd_t * lrmd) { lrmd_private_t *private = NULL; CRM_ASSERT(lrmd != NULL); private = lrmd->private; switch (private->type) { case CRM_CLIENT_IPC: while (crm_ipc_ready(private->ipc)) { if (crm_ipc_read(private->ipc) > 0) { const char *msg = crm_ipc_buffer(private->ipc); lrmd_ipc_dispatch(msg, strlen(msg), lrmd); } } break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: lrmd_tls_dispatch(lrmd); break; #endif default: crm_err("Unsupported connection type: %d", private->type); } if (lrmd_api_is_connected(lrmd) == FALSE) { crm_err("Connection closed"); return FALSE; } return TRUE; } static xmlNode * lrmd_create_op(const char *token, const char *op, xmlNode * data, enum lrmd_call_options options) { xmlNode *op_msg = create_xml_node(NULL, "lrmd_command"); CRM_CHECK(op_msg != NULL, return NULL); CRM_CHECK(token != NULL, return NULL); crm_xml_add(op_msg, F_XML_TAGNAME, "lrmd_command"); crm_xml_add(op_msg, F_TYPE, T_LRMD); crm_xml_add(op_msg, F_LRMD_CALLBACK_TOKEN, token); crm_xml_add(op_msg, F_LRMD_OPERATION, op); crm_trace("Sending call options: %.8lx, %d", (long)options, options); crm_xml_add_int(op_msg, F_LRMD_CALLOPTS, options); if (data != NULL) { add_message_xml(op_msg, F_LRMD_CALLDATA, data); } return op_msg; } static void lrmd_ipc_connection_destroy(gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->private; crm_info("IPC connection destroyed"); /* Prevent these from being cleaned up in lrmd_api_disconnect() */ native->ipc = NULL; native->source = NULL; if (native->callback) { lrmd_event_data_t event = { 0, }; event.type = lrmd_event_disconnect; event.remote_nodename = native->remote_nodename; native->callback(&event); } } #ifdef HAVE_GNUTLS_GNUTLS_H static void lrmd_tls_connection_destroy(gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->private; crm_info("TLS connection destroyed"); if (native->remote->tls_session) { gnutls_bye(*native->remote->tls_session, GNUTLS_SHUT_RDWR); gnutls_deinit(*native->remote->tls_session); gnutls_free(native->remote->tls_session); } if (native->psk_cred_c) { gnutls_psk_free_client_credentials(native->psk_cred_c); } if (native->sock) { close(native->sock); } if (native->process_notify) { mainloop_destroy_trigger(native->process_notify); native->process_notify = NULL; } if (native->pending_notify) { g_list_free_full(native->pending_notify, lrmd_free_xml); native->pending_notify = NULL; } free(native->remote->buffer); native->remote->buffer = NULL; native->source = 0; native->sock = 0; native->psk_cred_c = NULL; native->remote->tls_session = NULL; native->sock = 0; if (native->callback) { lrmd_event_data_t event = { 0, }; event.remote_nodename = native->remote_nodename; event.type = lrmd_event_disconnect; native->callback(&event); } return; } int lrmd_tls_send_msg(crm_remote_t * session, xmlNode * msg, uint32_t id, const char *msg_type) { int rc = -1; crm_xml_add_int(msg, F_LRMD_REMOTE_MSG_ID, id); crm_xml_add(msg, F_LRMD_REMOTE_MSG_TYPE, msg_type); rc = crm_remote_send(session, msg); if (rc < 0) { crm_err("Failed to send remote lrmd tls msg, rc = %d", rc); return rc; } return rc; } static xmlNode * lrmd_tls_recv_reply(lrmd_t * lrmd, int total_timeout, int expected_reply_id, int *disconnected) { lrmd_private_t *native = lrmd->private; xmlNode *xml = NULL; time_t start = time(NULL); const char *msg_type = NULL; int reply_id = 0; int remaining_timeout = 0; /* A timeout of 0 here makes no sense. We have to wait a period of time * for the response to come back. If -1 or 0, default to 10 seconds. */ if (total_timeout <= 0) { total_timeout = 10000; } while (!xml) { xml = crm_remote_parse_buffer(native->remote); if (!xml) { /* read some more off the tls buffer if we still have time left. */ if (remaining_timeout) { remaining_timeout = remaining_timeout - ((time(NULL) - start) * 1000); } else { remaining_timeout = total_timeout; } if (remaining_timeout <= 0) { return NULL; } crm_remote_recv(native->remote, remaining_timeout, disconnected); xml = crm_remote_parse_buffer(native->remote); if (!xml || *disconnected) { return NULL; } } CRM_ASSERT(xml != NULL); crm_element_value_int(xml, F_LRMD_REMOTE_MSG_ID, &reply_id); msg_type = crm_element_value(xml, F_LRMD_REMOTE_MSG_TYPE); if (!msg_type) { crm_err("Empty msg type received while waiting for reply"); free_xml(xml); xml = NULL; } else if (safe_str_eq(msg_type, "notify")) { /* got a notify while waiting for reply, trigger the notify to be processed later */ crm_info("queueing notify"); native->pending_notify = g_list_append(native->pending_notify, xml); if (native->process_notify) { crm_info("notify trigger set."); mainloop_set_trigger(native->process_notify); } xml = NULL; } else if (safe_str_neq(msg_type, "reply")) { /* msg isn't a reply, make some noise */ crm_err("Expected a reply, got %s", msg_type); free_xml(xml); xml = NULL; } else if (reply_id != expected_reply_id) { crm_err("Got outdated reply, expected id %d got id %d", expected_reply_id, reply_id); free_xml(xml); xml = NULL; } } if (native->remote->buffer && native->process_notify) { mainloop_set_trigger(native->process_notify); } return xml; } static int lrmd_tls_send(lrmd_t * lrmd, xmlNode * msg) { int rc = 0; lrmd_private_t *native = lrmd->private; global_remote_msg_id++; if (global_remote_msg_id <= 0) { global_remote_msg_id = 1; } rc = lrmd_tls_send_msg(native->remote, msg, global_remote_msg_id, "request"); if (rc <= 0) { crm_err("Remote lrmd send failed, disconnecting"); lrmd_tls_disconnect(lrmd); return -ENOTCONN; } return pcmk_ok; } static int lrmd_tls_send_recv(lrmd_t * lrmd, xmlNode * msg, int timeout, xmlNode ** reply) { int rc = 0; int disconnected = 0; xmlNode *xml = NULL; if (lrmd_tls_connected(lrmd) == FALSE) { return -1; } rc = lrmd_tls_send(lrmd, msg); if (rc < 0) { return rc; } xml = lrmd_tls_recv_reply(lrmd, timeout, global_remote_msg_id, &disconnected); if (disconnected) { crm_err("Remote lrmd server disconnected while waiting for reply with id %d. ", global_remote_msg_id); lrmd_tls_disconnect(lrmd); rc = -ENOTCONN; } else if (!xml) { crm_err("Remote lrmd never received reply for request id %d. timeout: %dms ", global_remote_msg_id, timeout); rc = -ECOMM; } if (reply) { *reply = xml; } else { free_xml(xml); } return rc; } #endif static int lrmd_send_xml(lrmd_t * lrmd, xmlNode * msg, int timeout, xmlNode ** reply) { int rc = -1; lrmd_private_t *native = lrmd->private; switch (native->type) { case CRM_CLIENT_IPC: rc = crm_ipc_send(native->ipc, msg, crm_ipc_client_response, timeout, reply); break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: rc = lrmd_tls_send_recv(lrmd, msg, timeout, reply); break; #endif default: crm_err("Unsupported connection type: %d", native->type); } return rc; } static int lrmd_send_xml_no_reply(lrmd_t * lrmd, xmlNode * msg) { int rc = -1; lrmd_private_t *native = lrmd->private; switch (native->type) { case CRM_CLIENT_IPC: rc = crm_ipc_send(native->ipc, msg, crm_ipc_client_none, 0, NULL); break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: rc = lrmd_tls_send(lrmd, msg); break; #endif default: crm_err("Unsupported connection type: %d", native->type); } return rc; } static int lrmd_api_is_connected(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->private; switch (native->type) { case CRM_CLIENT_IPC: return crm_ipc_connected(native->ipc); break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: return lrmd_tls_connected(lrmd); break; #endif default: crm_err("Unsupported connection type: %d", native->type); } return 0; } static int lrmd_send_command(lrmd_t * lrmd, const char *op, xmlNode * data, xmlNode ** output_data, int timeout, /* ms. defaults to 1000 if set to 0 */ enum lrmd_call_options options, gboolean expect_reply) { /* TODO we need to reduce usage of this boolean */ int rc = pcmk_ok; int reply_id = -1; lrmd_private_t *native = lrmd->private; xmlNode *op_msg = NULL; xmlNode *op_reply = NULL; if (!lrmd_api_is_connected(lrmd)) { return -ENOTCONN; } if (op == NULL) { crm_err("No operation specified"); return -EINVAL; } CRM_CHECK(native->token != NULL,; ); crm_trace("sending %s op to lrmd", op); op_msg = lrmd_create_op(native->token, op, data, options); if (op_msg == NULL) { return -EINVAL; } crm_xml_add_int(op_msg, F_LRMD_TIMEOUT, timeout); if (expect_reply) { rc = lrmd_send_xml(lrmd, op_msg, timeout, &op_reply); } else { rc = lrmd_send_xml_no_reply(lrmd, op_msg); goto done; } if (rc < 0) { crm_perror(LOG_ERR, "Couldn't perform %s operation (timeout=%d): %d", op, timeout, rc); rc = -ECOMM; goto done; } rc = pcmk_ok; crm_element_value_int(op_reply, F_LRMD_CALLID, &reply_id); crm_trace("%s op reply received", op); if (crm_element_value_int(op_reply, F_LRMD_RC, &rc) != 0) { rc = -ENOMSG; goto done; } crm_log_xml_trace(op_reply, "Reply"); if (output_data) { *output_data = op_reply; op_reply = NULL; /* Prevent subsequent free */ } done: if (lrmd_api_is_connected(lrmd) == FALSE) { crm_err("LRMD disconnected"); } free_xml(op_msg); free_xml(op_reply); return rc; } static int lrmd_api_poke_connection(lrmd_t * lrmd) { int rc; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); rc = lrmd_send_command(lrmd, LRMD_OP_POKE, data, NULL, 0, 0, FALSE); free_xml(data); return rc; } static int lrmd_handshake(lrmd_t * lrmd, const char *name) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->private; xmlNode *reply = NULL; xmlNode *hello = create_xml_node(NULL, "lrmd_command"); crm_xml_add(hello, F_TYPE, T_LRMD); crm_xml_add(hello, F_LRMD_OPERATION, CRM_OP_REGISTER); crm_xml_add(hello, F_LRMD_CLIENTNAME, name); crm_xml_add(hello, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION); /* advertise that we are a proxy provider */ if (native->proxy_callback) { crm_xml_add(hello, F_LRMD_IS_IPC_PROVIDER, "true"); } rc = lrmd_send_xml(lrmd, hello, -1, &reply); if (rc < 0) { crm_perror(LOG_DEBUG, "Couldn't complete registration with the lrmd API: %d", rc); rc = -ECOMM; } else if (reply == NULL) { crm_err("Did not receive registration reply"); rc = -EPROTO; } else { const char *msg_type = crm_element_value(reply, F_LRMD_OPERATION); const char *tmp_ticket = crm_element_value(reply, F_LRMD_CLIENTID); crm_element_value_int(reply, F_LRMD_RC, &rc); if (rc == -EPROTO) { crm_err("LRMD protocol mismatch client version %s, server version %s", LRMD_PROTOCOL_VERSION, crm_element_value(reply, F_LRMD_PROTOCOL_VERSION)); crm_log_xml_err(reply, "Protocol Error"); } else if (safe_str_neq(msg_type, CRM_OP_REGISTER)) { crm_err("Invalid registration message: %s", msg_type); crm_log_xml_err(reply, "Bad reply"); rc = -EPROTO; } else if (tmp_ticket == NULL) { crm_err("No registration token provided"); crm_log_xml_err(reply, "Bad reply"); rc = -EPROTO; } else { crm_trace("Obtained registration token: %s", tmp_ticket); native->token = strdup(tmp_ticket); rc = pcmk_ok; } } free_xml(reply); free_xml(hello); if (rc != pcmk_ok) { lrmd_api_disconnect(lrmd); } return rc; } static int lrmd_ipc_connect(lrmd_t * lrmd, int *fd) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->private; static struct ipc_client_callbacks lrmd_callbacks = { .dispatch = lrmd_ipc_dispatch, .destroy = lrmd_ipc_connection_destroy }; crm_info("Connecting to lrmd"); if (fd) { /* No mainloop */ native->ipc = crm_ipc_new("lrmd", 0); if (native->ipc && crm_ipc_connect(native->ipc)) { *fd = crm_ipc_get_fd(native->ipc); } else if (native->ipc) { rc = -ENOTCONN; } } else { native->source = mainloop_add_ipc_client("lrmd", G_PRIORITY_HIGH, 0, lrmd, &lrmd_callbacks); native->ipc = mainloop_get_ipc_client(native->source); } if (native->ipc == NULL) { crm_debug("Could not connect to the LRMD API"); rc = -ENOTCONN; } return rc; } #ifdef HAVE_GNUTLS_GNUTLS_H int lrmd_tls_set_key(gnutls_datum_t * key, const char *location) { FILE *stream; int read_len = 256; int cur_len = 0; int buf_len = read_len; static char *key_cache = NULL; static size_t key_cache_len = 0; static time_t key_cache_updated; if (location == NULL) { return -1; } if (key_cache) { time_t now = time(NULL); if ((now - key_cache_updated) < 60) { key->data = gnutls_malloc(key_cache_len + 1); key->size = key_cache_len; memcpy(key->data, key_cache, key_cache_len); crm_debug("using cached LRMD key"); return 0; } else { key_cache_len = 0; key_cache_updated = 0; free(key_cache); key_cache = NULL; crm_debug("clearing lrmd key cache"); } } stream = fopen(location, "r"); if (!stream) { return -1; } key->data = gnutls_malloc(read_len); while (!feof(stream)) { int next; if (cur_len == buf_len) { buf_len = cur_len + read_len; key->data = gnutls_realloc(key->data, buf_len); } next = fgetc(stream); if (next == EOF && feof(stream)) { break; } key->data[cur_len] = next; cur_len++; } fclose(stream); key->size = cur_len; if (!cur_len) { gnutls_free(key->data); key->data = 0; return -1; } if (!key_cache) { key_cache = calloc(1, key->size + 1); memcpy(key_cache, key->data, key->size); key_cache_len = key->size; key_cache_updated = time(NULL); } return 0; } static int lrmd_tls_key_cb(gnutls_session_t session, char **username, gnutls_datum_t * key) { int rc = 0; const char *specific_location = getenv("PCMK_authkey_location"); if (lrmd_tls_set_key(key, specific_location) == 0) { crm_debug("Using custom authkey location %s", specific_location); return 0; } if (lrmd_tls_set_key(key, DEFAULT_REMOTE_KEY_LOCATION)) { rc = lrmd_tls_set_key(key, ALT_REMOTE_KEY_LOCATION); } if (rc) { crm_err("No lrmd remote key found"); return -1; } *username = gnutls_malloc(strlen(DEFAULT_REMOTE_USERNAME) + 1); strcpy(*username, DEFAULT_REMOTE_USERNAME); return rc; } static void lrmd_gnutls_global_init(void) { static int gnutls_init = 0; if (!gnutls_init) { gnutls_global_init(); } gnutls_init = 1; } #endif static void report_async_connection_result(lrmd_t * lrmd, int rc) { lrmd_private_t *native = lrmd->private; if (native->callback) { lrmd_event_data_t event = { 0, }; event.type = lrmd_event_connect; event.remote_nodename = native->remote_nodename; event.connection_rc = rc; native->callback(&event); } } #ifdef HAVE_GNUTLS_GNUTLS_H static void lrmd_tcp_connect_cb(void *userdata, int sock) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->private; char name[256] = { 0, }; static struct mainloop_fd_callbacks lrmd_tls_callbacks = { .dispatch = lrmd_tls_dispatch, .destroy = lrmd_tls_connection_destroy, }; int rc = sock; if (rc < 0) { lrmd_tls_connection_destroy(lrmd); crm_info("remote lrmd connect to %s at port %d failed", native->server, native->port); report_async_connection_result(lrmd, rc); return; } /* TODO continue with tls stuff now that tcp connect passed. make this async as well soon * to avoid all blocking code in the client. */ native->sock = sock; gnutls_psk_allocate_client_credentials(&native->psk_cred_c); gnutls_psk_set_client_credentials_function(native->psk_cred_c, lrmd_tls_key_cb); native->remote->tls_session = create_psk_tls_session(sock, GNUTLS_CLIENT, native->psk_cred_c); if (crm_initiate_client_tls_handshake(native->remote, LRMD_CLIENT_HANDSHAKE_TIMEOUT) != 0) { crm_warn("Client tls handshake failed for server %s:%d. Disconnecting", native->server, native->port); gnutls_deinit(*native->remote->tls_session); gnutls_free(native->remote->tls_session); native->remote->tls_session = NULL; lrmd_tls_connection_destroy(lrmd); report_async_connection_result(lrmd, -1); return; } crm_info("Remote lrmd client TLS connection established with server %s:%d", native->server, native->port); snprintf(name, 128, "remote-lrmd-%s:%d", native->server, native->port); native->process_notify = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_tls_dispatch, lrmd); native->source = mainloop_add_fd(name, G_PRIORITY_HIGH, native->sock, lrmd, &lrmd_tls_callbacks); rc = lrmd_handshake(lrmd, name); report_async_connection_result(lrmd, rc); return; } static int lrmd_tls_connect_async(lrmd_t * lrmd, int timeout /*ms */ ) { int rc = 0; lrmd_private_t *native = lrmd->private; lrmd_gnutls_global_init(); rc = crm_remote_tcp_connect_async(native->server, native->port, timeout, lrmd, lrmd_tcp_connect_cb); return rc; } static int lrmd_tls_connect(lrmd_t * lrmd, int *fd) { static struct mainloop_fd_callbacks lrmd_tls_callbacks = { .dispatch = lrmd_tls_dispatch, .destroy = lrmd_tls_connection_destroy, }; lrmd_private_t *native = lrmd->private; int sock; lrmd_gnutls_global_init(); sock = crm_remote_tcp_connect(native->server, native->port); if (sock < 0) { crm_warn("Could not establish remote lrmd connection to %s", native->server); lrmd_tls_connection_destroy(lrmd); return -ENOTCONN; } native->sock = sock; gnutls_psk_allocate_client_credentials(&native->psk_cred_c); gnutls_psk_set_client_credentials_function(native->psk_cred_c, lrmd_tls_key_cb); native->remote->tls_session = create_psk_tls_session(sock, GNUTLS_CLIENT, native->psk_cred_c); if (crm_initiate_client_tls_handshake(native->remote, LRMD_CLIENT_HANDSHAKE_TIMEOUT) != 0) { crm_err("Session creation for %s:%d failed", native->server, native->port); gnutls_deinit(*native->remote->tls_session); gnutls_free(native->remote->tls_session); native->remote->tls_session = NULL; lrmd_tls_connection_destroy(lrmd); return -1; } crm_info("Remote lrmd client TLS connection established with server %s:%d", native->server, native->port); if (fd) { *fd = sock; } else { char name[256] = { 0, }; snprintf(name, 128, "remote-lrmd-%s:%d", native->server, native->port); native->process_notify = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_tls_dispatch, lrmd); native->source = mainloop_add_fd(name, G_PRIORITY_HIGH, native->sock, lrmd, &lrmd_tls_callbacks); } return pcmk_ok; } #endif static int lrmd_api_connect(lrmd_t * lrmd, const char *name, int *fd) { int rc = -ENOTCONN; lrmd_private_t *native = lrmd->private; switch (native->type) { case CRM_CLIENT_IPC: rc = lrmd_ipc_connect(lrmd, fd); break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: rc = lrmd_tls_connect(lrmd, fd); break; #endif default: crm_err("Unsupported connection type: %d", native->type); } if (rc == pcmk_ok) { rc = lrmd_handshake(lrmd, name); } return rc; } static int lrmd_api_connect_async(lrmd_t * lrmd, const char *name, int timeout) { int rc = 0; lrmd_private_t *native = lrmd->private; if (!native->callback) { crm_err("Async connect not possible, no lrmd client callback set."); return -1; } switch (native->type) { case CRM_CLIENT_IPC: /* fake async connection with ipc. it should be fast * enough that we gain very little from async */ rc = lrmd_api_connect(lrmd, name, NULL); if (!rc) { report_async_connection_result(lrmd, rc); } break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: rc = lrmd_tls_connect_async(lrmd, timeout); if (rc) { /* connection failed, report rc now */ report_async_connection_result(lrmd, rc); } break; #endif default: crm_err("Unsupported connection type: %d", native->type); } return rc; } static void lrmd_ipc_disconnect(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->private; if (native->source != NULL) { /* Attached to mainloop */ mainloop_del_ipc_client(native->source); native->source = NULL; native->ipc = NULL; } else if (native->ipc) { /* Not attached to mainloop */ crm_ipc_t *ipc = native->ipc; native->ipc = NULL; crm_ipc_close(ipc); crm_ipc_destroy(ipc); } } #ifdef HAVE_GNUTLS_GNUTLS_H static void lrmd_tls_disconnect(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->private; if (native->remote->tls_session) { gnutls_bye(*native->remote->tls_session, GNUTLS_SHUT_RDWR); gnutls_deinit(*native->remote->tls_session); gnutls_free(native->remote->tls_session); native->remote->tls_session = 0; } if (native->source != NULL) { /* Attached to mainloop */ mainloop_del_ipc_client(native->source); native->source = NULL; } else if (native->sock) { close(native->sock); } if (native->pending_notify) { g_list_free_full(native->pending_notify, lrmd_free_xml); native->pending_notify = NULL; } } #endif static int lrmd_api_disconnect(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->private; crm_info("Disconnecting from lrmd service"); switch (native->type) { case CRM_CLIENT_IPC: lrmd_ipc_disconnect(lrmd); break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: lrmd_tls_disconnect(lrmd); break; #endif default: crm_err("Unsupported connection type: %d", native->type); } free(native->token); native->token = NULL; return 0; } static int lrmd_api_register_rsc(lrmd_t * lrmd, const char *rsc_id, const char *class, const char *provider, const char *type, enum lrmd_call_options options) { int rc = pcmk_ok; xmlNode *data = NULL; if (!class || !type || !rsc_id) { return -EINVAL; } if (safe_str_eq(class, "ocf") && !provider) { return -EINVAL; } data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); crm_xml_add(data, F_LRMD_CLASS, class); crm_xml_add(data, F_LRMD_PROVIDER, provider); crm_xml_add(data, F_LRMD_TYPE, type); rc = lrmd_send_command(lrmd, LRMD_OP_RSC_REG, data, NULL, 0, options, TRUE); free_xml(data); return rc; } static int lrmd_api_unregister_rsc(lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options) { int rc = pcmk_ok; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); rc = lrmd_send_command(lrmd, LRMD_OP_RSC_UNREG, data, NULL, 0, options, TRUE); free_xml(data); return rc; } lrmd_rsc_info_t * lrmd_copy_rsc_info(lrmd_rsc_info_t * rsc_info) { lrmd_rsc_info_t *copy = NULL; copy = calloc(1, sizeof(lrmd_rsc_info_t)); copy->id = strdup(rsc_info->id); copy->type = strdup(rsc_info->type); copy->class = strdup(rsc_info->class); if (rsc_info->provider) { copy->provider = strdup(rsc_info->provider); } return copy; } void lrmd_free_rsc_info(lrmd_rsc_info_t * rsc_info) { if (!rsc_info) { return; } free(rsc_info->id); free(rsc_info->type); free(rsc_info->class); free(rsc_info->provider); free(rsc_info); } static lrmd_rsc_info_t * lrmd_api_get_rsc_info(lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options) { lrmd_rsc_info_t *rsc_info = NULL; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); xmlNode *output = NULL; const char *class = NULL; const char *provider = NULL; const char *type = NULL; crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); lrmd_send_command(lrmd, LRMD_OP_RSC_INFO, data, &output, 0, options, TRUE); free_xml(data); if (!output) { return NULL; } class = crm_element_value(output, F_LRMD_CLASS); provider = crm_element_value(output, F_LRMD_PROVIDER); type = crm_element_value(output, F_LRMD_TYPE); if (!class || !type) { free_xml(output); return NULL; } else if (safe_str_eq(class, "ocf") && !provider) { free_xml(output); return NULL; } rsc_info = calloc(1, sizeof(lrmd_rsc_info_t)); rsc_info->id = strdup(rsc_id); rsc_info->class = strdup(class); if (provider) { rsc_info->provider = strdup(provider); } rsc_info->type = strdup(type); free_xml(output); return rsc_info; } static void lrmd_api_set_callback(lrmd_t * lrmd, lrmd_event_callback callback) { lrmd_private_t *native = lrmd->private; native->callback = callback; } void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg)) { lrmd_private_t *native = lrmd->private; native->proxy_callback = callback; native->proxy_callback_userdata = userdata; } void lrmd_internal_proxy_dispatch(lrmd_t *lrmd, xmlNode *msg) { lrmd_private_t *native = lrmd->private; if (native->proxy_callback) { crm_log_xml_trace(msg, "PROXY_INBOUND"); native->proxy_callback(lrmd, native->proxy_callback_userdata, msg); } } int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg) { if (lrmd == NULL) { return -ENOTCONN; } crm_xml_add(msg, F_LRMD_OPERATION, CRM_OP_IPC_FWD); crm_log_xml_trace(msg, "PROXY_OUTBOUND"); return lrmd_send_xml_no_reply(lrmd, msg); } static int stonith_get_metadata(const char *provider, const char *type, char **output) { int rc = pcmk_ok; stonith_api->cmds->metadata(stonith_api, st_opt_sync_call, type, provider, output, 0); if (*output == NULL) { rc = -EIO; } return rc; } #define lsb_metadata_template \ "\n" \ "\n" \ "\n" \ " 1.0\n" \ " \n" \ " %s\n" \ " \n" \ " %s\n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " %s\n" \ " %s\n" \ " %s\n" \ " %s\n" \ " %s\n" \ " %s\n" \ " %s\n" \ " \n" \ "\n" #define LSB_INITSCRIPT_INFOBEGIN_TAG "### BEGIN INIT INFO" #define LSB_INITSCRIPT_INFOEND_TAG "### END INIT INFO" #define PROVIDES "# Provides:" #define REQ_START "# Required-Start:" #define REQ_STOP "# Required-Stop:" #define SHLD_START "# Should-Start:" #define SHLD_STOP "# Should-Stop:" #define DFLT_START "# Default-Start:" #define DFLT_STOP "# Default-Stop:" #define SHORT_DSCR "# Short-Description:" #define DESCRIPTION "# Description:" #define lsb_meta_helper_free_value(m) \ if ((m) != NULL) { \ xmlFree(m); \ (m) = NULL; \ } #define lsb_meta_helper_get_value(buffer, ptr, keyword) \ if (!ptr && !strncasecmp(buffer, keyword, strlen(keyword))) { \ (ptr) = (char *)xmlEncodeEntitiesReentrant(NULL, BAD_CAST buffer+strlen(keyword)); \ continue; \ } static int lsb_get_metadata(const char *type, char **output) { char ra_pathname[PATH_MAX] = { 0, }; FILE *fp; GString *meta_data = NULL; char buffer[1024]; char *provides = NULL; char *req_start = NULL; char *req_stop = NULL; char *shld_start = NULL; char *shld_stop = NULL; char *dflt_start = NULL; char *dflt_stop = NULL; char *s_dscrpt = NULL; char *xml_l_dscrpt = NULL; GString *l_dscrpt = NULL; if(type[0] == '/') { snprintf(ra_pathname, sizeof(ra_pathname), "%s", type); } else { snprintf(ra_pathname, sizeof(ra_pathname), "%s/%s", LSB_ROOT_DIR, type); } crm_trace("Looking into %s", ra_pathname); if (!(fp = fopen(ra_pathname, "r"))) { return -errno; } /* Enter into the lsb-compliant comment block */ while (fgets(buffer, sizeof(buffer), fp)) { /* Now suppose each of the following eight arguments contain only one line */ lsb_meta_helper_get_value(buffer, provides, PROVIDES) lsb_meta_helper_get_value(buffer, req_start, REQ_START) lsb_meta_helper_get_value(buffer, req_stop, REQ_STOP) lsb_meta_helper_get_value(buffer, shld_start, SHLD_START) lsb_meta_helper_get_value(buffer, shld_stop, SHLD_STOP) lsb_meta_helper_get_value(buffer, dflt_start, DFLT_START) lsb_meta_helper_get_value(buffer, dflt_stop, DFLT_STOP) lsb_meta_helper_get_value(buffer, s_dscrpt, SHORT_DSCR) /* Long description may cross multiple lines */ if ((l_dscrpt == NULL) && (0 == strncasecmp(buffer, DESCRIPTION, strlen(DESCRIPTION)))) { l_dscrpt = g_string_new(buffer + strlen(DESCRIPTION)); /* Between # and keyword, more than one space, or a tab character, * indicates the continuation line. Extracted from LSB init script standard */ while (fgets(buffer, sizeof(buffer), fp)) { if (!strncmp(buffer, "# ", 3) || !strncmp(buffer, "#\t", 2)) { buffer[0] = ' '; l_dscrpt = g_string_append(l_dscrpt, buffer); } else { fputs(buffer, fp); break; /* Long description ends */ } } continue; } if (l_dscrpt) { xml_l_dscrpt = (char *)xmlEncodeEntitiesReentrant(NULL, BAD_CAST(l_dscrpt->str)); } if (!strncasecmp(buffer, LSB_INITSCRIPT_INFOEND_TAG, strlen(LSB_INITSCRIPT_INFOEND_TAG))) { /* Get to the out border of LSB comment block */ break; } if (buffer[0] != '#') { break; /* Out of comment block in the beginning */ } } fclose(fp); meta_data = g_string_new(""); g_string_sprintf(meta_data, lsb_metadata_template, type, (xml_l_dscrpt == NULL) ? type : xml_l_dscrpt, (s_dscrpt == NULL) ? type : s_dscrpt, (provides == NULL) ? "" : provides, (req_start == NULL) ? "" : req_start, (req_stop == NULL) ? "" : req_stop, (shld_start == NULL) ? "" : shld_start, (shld_stop == NULL) ? "" : shld_stop, (dflt_start == NULL) ? "" : dflt_start, (dflt_stop == NULL) ? "" : dflt_stop); lsb_meta_helper_free_value(xml_l_dscrpt); lsb_meta_helper_free_value(s_dscrpt); lsb_meta_helper_free_value(provides); lsb_meta_helper_free_value(req_start); lsb_meta_helper_free_value(req_stop); lsb_meta_helper_free_value(shld_start); lsb_meta_helper_free_value(shld_stop); lsb_meta_helper_free_value(dflt_start); lsb_meta_helper_free_value(dflt_stop); if (l_dscrpt) { g_string_free(l_dscrpt, TRUE); } *output = strdup(meta_data->str); g_string_free(meta_data, TRUE); crm_trace("Created fake metadata: %d", strlen(*output)); return pcmk_ok; } #if SUPPORT_NAGIOS static int nagios_get_metadata(const char *type, char **output) { int rc = pcmk_ok; FILE *file_strm = NULL; int start = 0, length = 0, read_len = 0; char *metadata_file = NULL; int len = 36; len += strlen(NAGIOS_METADATA_DIR); len += strlen(type); metadata_file = calloc(1, len); CRM_CHECK(metadata_file != NULL, return -ENOMEM); sprintf(metadata_file, "%s/%s.xml", NAGIOS_METADATA_DIR, type); file_strm = fopen(metadata_file, "r"); if (file_strm == NULL) { crm_err("Metadata file %s does not exist", metadata_file); free(metadata_file); return -EIO; } /* see how big the file is */ start = ftell(file_strm); fseek(file_strm, 0L, SEEK_END); length = ftell(file_strm); fseek(file_strm, 0L, start); CRM_ASSERT(length >= 0); CRM_ASSERT(start == ftell(file_strm)); if (length <= 0) { crm_info("%s was not valid", metadata_file); free(*output); *output = NULL; rc = -EIO; } else { crm_trace("Reading %d bytes from file", length); *output = calloc(1, (length + 1)); read_len = fread(*output, 1, length, file_strm); if (read_len != length) { crm_err("Calculated and read bytes differ: %d vs. %d", length, read_len); free(*output); *output = NULL; rc = -EIO; } } fclose(file_strm); free(metadata_file); return rc; } #endif static int generic_get_metadata(const char *standard, const char *provider, const char *type, char **output) { svc_action_t *action = resources_action_create(type, standard, provider, type, "meta-data", 0, 5000, NULL); if (!(services_action_sync(action))) { crm_err("Failed to retrieve meta-data for %s:%s:%s", standard, provider, type); services_action_free(action); return -EIO; } if (!action->stdout_data) { crm_err("Failed to retrieve meta-data for %s:%s:%s", standard, provider, type); services_action_free(action); return -EIO; } *output = strdup(action->stdout_data); services_action_free(action); return pcmk_ok; } static int lrmd_api_get_metadata(lrmd_t * lrmd, const char *class, const char *provider, const char *type, char **output, enum lrmd_call_options options) { if (!class || !type) { return -EINVAL; } if (safe_str_eq(class, "stonith")) { return stonith_get_metadata(provider, type, output); } else if (safe_str_eq(class, "lsb")) { return lsb_get_metadata(type, output); #if SUPPORT_NAGIOS } else if (safe_str_eq(class, "nagios")) { return nagios_get_metadata(type, output); #endif } return generic_get_metadata(class, provider, type, output); } static int lrmd_api_exec(lrmd_t * lrmd, const char *rsc_id, const char *action, const char *userdata, int interval, /* ms */ int timeout, /* ms */ int start_delay, /* ms */ enum lrmd_call_options options, lrmd_key_value_t * params) { int rc = pcmk_ok; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); xmlNode *args = create_xml_node(data, XML_TAG_ATTRS); lrmd_key_value_t *tmp = NULL; crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); crm_xml_add(data, F_LRMD_RSC_ACTION, action); crm_xml_add(data, F_LRMD_RSC_USERDATA_STR, userdata); crm_xml_add_int(data, F_LRMD_RSC_INTERVAL, interval); crm_xml_add_int(data, F_LRMD_TIMEOUT, timeout); crm_xml_add_int(data, F_LRMD_RSC_START_DELAY, start_delay); for (tmp = params; tmp; tmp = tmp->next) { hash2field((gpointer) tmp->key, (gpointer) tmp->value, args); } rc = lrmd_send_command(lrmd, LRMD_OP_RSC_EXEC, data, NULL, timeout, options, TRUE); free_xml(data); lrmd_key_value_freeall(params); return rc; } static int lrmd_api_cancel(lrmd_t * lrmd, const char *rsc_id, const char *action, int interval) { int rc = pcmk_ok; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add(data, F_LRMD_RSC_ACTION, action); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); crm_xml_add_int(data, F_LRMD_RSC_INTERVAL, interval); rc = lrmd_send_command(lrmd, LRMD_OP_RSC_CANCEL, data, NULL, 0, 0, TRUE); free_xml(data); return rc; } static int list_stonith_agents(lrmd_list_t ** resources) { int rc = 0; stonith_key_value_t *stonith_resources = NULL; stonith_key_value_t *dIter = NULL; stonith_api->cmds->list_agents(stonith_api, st_opt_sync_call, NULL, &stonith_resources, 0); for (dIter = stonith_resources; dIter; dIter = dIter->next) { rc++; if (resources) { *resources = lrmd_list_add(*resources, dIter->value); } } stonith_key_value_freeall(stonith_resources, 1, 0); return rc; } static int lrmd_api_list_agents(lrmd_t * lrmd, lrmd_list_t ** resources, const char *class, const char *provider) { int rc = 0; if (safe_str_eq(class, "stonith")) { rc += list_stonith_agents(resources); } else { GListPtr gIter = NULL; GList *agents = resources_list_agents(class, provider); for (gIter = agents; gIter != NULL; gIter = gIter->next) { *resources = lrmd_list_add(*resources, (const char *)gIter->data); rc++; } g_list_free_full(agents, free); if (!class) { rc += list_stonith_agents(resources); } } if (rc == 0) { crm_notice("No agents found for class %s", class); rc = -EPROTONOSUPPORT; } return rc; } static int does_provider_have_agent(const char *agent, const char *provider, const char *class) { int found = 0; GList *agents = NULL; GListPtr gIter2 = NULL; agents = resources_list_agents(class, provider); for (gIter2 = agents; gIter2 != NULL; gIter2 = gIter2->next) { if (safe_str_eq(agent, gIter2->data)) { found = 1; } } g_list_free_full(agents, free); return found; } static int lrmd_api_list_ocf_providers(lrmd_t * lrmd, const char *agent, lrmd_list_t ** providers) { int rc = pcmk_ok; char *provider = NULL; GList *ocf_providers = NULL; GListPtr gIter = NULL; ocf_providers = resources_list_providers("ocf"); for (gIter = ocf_providers; gIter != NULL; gIter = gIter->next) { provider = gIter->data; if (!agent || does_provider_have_agent(agent, provider, "ocf")) { *providers = lrmd_list_add(*providers, (const char *)gIter->data); rc++; } } g_list_free_full(ocf_providers, free); return rc; } static int lrmd_api_list_standards(lrmd_t * lrmd, lrmd_list_t ** supported) { int rc = 0; GList *standards = NULL; GListPtr gIter = NULL; standards = resources_list_standards(); for (gIter = standards; gIter != NULL; gIter = gIter->next) { *supported = lrmd_list_add(*supported, (const char *)gIter->data); rc++; } if (list_stonith_agents(NULL) > 0) { *supported = lrmd_list_add(*supported, "stonith"); rc++; } g_list_free_full(standards, free); return rc; } lrmd_t * lrmd_api_new(void) { lrmd_t *new_lrmd = NULL; lrmd_private_t *pvt = NULL; new_lrmd = calloc(1, sizeof(lrmd_t)); pvt = calloc(1, sizeof(lrmd_private_t)); pvt->remote = calloc(1, sizeof(crm_remote_t)); new_lrmd->cmds = calloc(1, sizeof(lrmd_api_operations_t)); pvt->type = CRM_CLIENT_IPC; new_lrmd->private = pvt; new_lrmd->cmds->connect = lrmd_api_connect; new_lrmd->cmds->connect_async = lrmd_api_connect_async; new_lrmd->cmds->is_connected = lrmd_api_is_connected; new_lrmd->cmds->poke_connection = lrmd_api_poke_connection; new_lrmd->cmds->disconnect = lrmd_api_disconnect; new_lrmd->cmds->register_rsc = lrmd_api_register_rsc; new_lrmd->cmds->unregister_rsc = lrmd_api_unregister_rsc; new_lrmd->cmds->get_rsc_info = lrmd_api_get_rsc_info; new_lrmd->cmds->set_callback = lrmd_api_set_callback; new_lrmd->cmds->get_metadata = lrmd_api_get_metadata; new_lrmd->cmds->exec = lrmd_api_exec; new_lrmd->cmds->cancel = lrmd_api_cancel; new_lrmd->cmds->list_agents = lrmd_api_list_agents; new_lrmd->cmds->list_ocf_providers = lrmd_api_list_ocf_providers; new_lrmd->cmds->list_standards = lrmd_api_list_standards; if (!stonith_api) { stonith_api = stonith_api_new(); } return new_lrmd; } lrmd_t * lrmd_remote_api_new(const char *nodename, const char *server, int port) { #ifdef HAVE_GNUTLS_GNUTLS_H lrmd_t *new_lrmd = lrmd_api_new(); lrmd_private_t *native = new_lrmd->private; if (!nodename && !server) { lrmd_api_delete(new_lrmd); return NULL; } native->type = CRM_CLIENT_TLS; native->remote_nodename = nodename ? strdup(nodename) : strdup(server); native->server = server ? strdup(server) : strdup(nodename); native->port = port; if (native->port == 0) { const char *remote_port_str = getenv("PCMK_remote_port"); native->port = remote_port_str ? atoi(remote_port_str) : DEFAULT_REMOTE_PORT; } return new_lrmd; #else crm_err("GNUTLS is not enabled for this build, remote LRMD client can not be created"); return NULL; #endif } void lrmd_api_delete(lrmd_t * lrmd) { if (!lrmd) { return; } lrmd->cmds->disconnect(lrmd); /* no-op if already disconnected */ free(lrmd->cmds); if (lrmd->private) { lrmd_private_t *native = lrmd->private; #ifdef HAVE_GNUTLS_GNUTLS_H free(native->server); #endif free(native->remote_nodename); free(native->remote); } + + if (stonith_api) { + stonith_api->cmds->free(stonith_api); + stonith_api = NULL; + } + free(lrmd->private); free(lrmd); } diff --git a/mcp/pacemaker.sysconfig b/mcp/pacemaker.sysconfig index 8555fc154e..7f12111009 100644 --- a/mcp/pacemaker.sysconfig +++ b/mcp/pacemaker.sysconfig @@ -1,92 +1,92 @@ # For non-systemd based systems, prefix export to each enabled line # Turn on special handling for CMAN clusters in the init script # Without this, fenced (and by inference, cman) cannot reliably be made to shut down # PCMK_STACK=cman #==#==# Variables that control logging # Enable debug logging globally or per-subsystem # Multiple subsystems may me listed separated by commas # eg. PCMK_debug=crmd,pengine # PCMK_debug=yes|no|crmd|pengine|cib|stonith-ng|attrd|pacemakerd # Send INFO (and higher) messages to the named log file # Additional messages may also appear here depending on any configured debug and trace settings # By default Pacemaker will inherit the logfile specified in corosync.conf # PCMK_debugfile=/var/log/pacemaker.log # Specify an alternate syslog target for NOTICE (and higher) messages # Use 'none' to disable - not recommended # The default value is 'daemon' # PCMK_logfacility=none|daemon|user|local0|local1|local2|local3|local4|local5|local6|local7 # Send all messages up-to-and-including the configured priority to syslog # A value of 'info' will be far too verbose for most installations and 'debug' is almost certain to send you blind # The default value is 'notice' # PCMK_logpriority=emerg|alert|crit|error|warning|notice|info|debug # Log all messages from a comma-separated list of functions # PCMK_trace_functions=function1,function2,function3 # Log all messages from a comma-separated list of files (no path) # Supports wildcards eg. PCMK_trace_files=prefix*.c # PCMK_trace_files=file.c,other.h # Log all messages matching comma-separated list of formats # PCMK_trace_formats="Sent delete %d" # Log all messages from a comma-separated list of tags # PCMK_trace_tags=tag1,tag2 # Dump the blackbox whenever the message at function and line is printed # eg. PCMK_trace_blackbox=te_graph_trigger:223,unpack_clone:81 # PCMK_trace_blackbox=fn:line,fn2:line2,... # Enable blackbox logging globally or per-subsystem # The blackbox contains a rolling buffer of all logs (including info+debug+trace) # and is written after a crash, assertion failure and/or when SIGTRAP is recieved # # The blackbox recorder can also be enabled for Pacemaker daemons at runtime by sending SIGUSR1 # # Multiple subsystems may me listed separated by commas # eg. PCMK_blackbox=crmd,pengine # PCMK_blackbox=yes|no|crmd|pengine|cib|stonith-ng|attrd|pacemakerd #==#==# Advanced use only # Enable this for compatibility with older corosync (prior to 2.0) # based clusters which used the nodes uname as its uuid also # PCMK_uname_is_uuid=no # Specify an alternate location for RNG schemas and XSL transforms # Mostly only useful for developer testing # PCMK_schema_directory=/some/path #==#==# Pacemaker Remote # Use a custom directory for finding the authkey. # PCMK_authkey_location=/etc/pacemaker/authkey # # Specify a custom port for Pacemaker Remote connections # PCMK_remote_port=3121 #==#==# IPC # Force use of a particular class of IPC connection # PCMK_ipc_type=shared-mem|socket|posix|sysv # Specify an IPC buffer size in bytes # Useful when connecting to really big clusters that exceed the default 20k buffer # PCMK_ipc_buffer=20480 #==#==# Profiling and memory leak testing # Variables for running child daemons under valgrind and/or checking for memory problems # G_SLICE=always-malloc # MALLOC_PERTURB_=221 # or 0 # MALLOC_CHECK_=3 # or 0,1,2 # PCMK_valgrind_enabled=yes # PCMK_valgrind_enabled=cib,crmd # PCMK_callgrind_enabled=yes # PCMK_callgrind_enabled=cib,crmd -# VALGRIND_OPTS="--leak-check=full --trace-children=no --num-callers=25 --log-file=/var/lib/pacemaker/valgrind-%p" +# VALGRIND_OPTS="--leak-check=full --trace-children=no --num-callers=25 --log-file=/var/lib/pacemaker/valgrind-%p --suppressions=/usr/share/pacemaker/tests/valgrind-pcmk.suppressions --gen-suppressions=all" diff --git a/valgrind-pcmk.suppressions b/valgrind-pcmk.suppressions index cd36b2271d..811c2ea0cc 100644 --- a/valgrind-pcmk.suppressions +++ b/valgrind-pcmk.suppressions @@ -1,142 +1,153 @@ # Valgrind suppressions for PE testing { Valgrind bug Memcheck:Addr8 fun:__strspn_sse42 fun:crm_get_msec } { Ignore crm_system_name Memcheck:Leak fun:malloc fun:crm_strdup_fn fun:crm_log_init_worker fun:crm_log_init fun:main } { libqb fixed upstream 1 Memcheck:Leak fun:realloc fun:_grow_bin_array fun:_qb_array_grow } { libqb fixed upstream 2 Memcheck:Leak fun:calloc fun:qb_log_dcs_get fun:_qb_log_callsite_get } { Cman - Points to uninitialized bytes Memcheck:Param socketcall.sendmsg(msg) fun:__sendmsg_nocancel - obj:/usr/lib64/libcman.so.3.0 - obj:/usr/lib64/libcman.so.3.0 + obj:*/libcman.so.3.0 + obj:*/libcman.so.3.0 } { Cman - Jump or move depends on uninitialized values Memcheck:Cond - obj:/usr/lib64/libcman.so.3.0 - obj:/usr/lib64/libcman.so.3.0 + obj:*/libcman.so.3.0 + obj:*/libcman.so.3.0 +} + +{ + quarks - hashtable + Memcheck:Leak + fun:calloc + fun:g_malloc0 + obj:*/libglib-* + fun:g_slice_alloc + fun:g_hash_table_new_full + fun:g_quark_from_static_string } { glib types Memcheck:Leak fun:malloc fun:realloc fun:g_realloc obj:*/libgobject-* fun:g_type_register_static } { glib types 2 Memcheck:Leak fun:realloc fun:g_realloc obj:*/libgobject-* fun:g_type_register_static fun:g_param_type_register_static } { glib types 3 Memcheck:Leak fun:calloc fun:g_malloc0 obj:*/libgobject-* fun:g_type_register_fundamental } { glib types 4 Memcheck:Leak fun:calloc fun:g_malloc0 obj:*/libgobject-* obj:*/libgobject-* fun:g_type_register_fundamental } { glib - the return Memcheck:Leak fun:calloc fun:g_malloc0 obj:*/libgobject-* obj:*/libgobject-* fun:_dl_init } { glib - seriously? Memcheck:Leak fun:calloc fun:g_malloc0 obj:*/libgobject-* obj:*/libgobject-* obj:*/libgobject-* fun:_dl_init } { glib - this isnt funny anymore Memcheck:Leak fun:malloc fun:realloc fun:g_realloc obj:*/libgobject-* fun:g_type_register_fundamental } { glib - why do you hate me? Memcheck:Leak fun:calloc fun:g_malloc0 obj:*/libgobject-* obj:*/libgobject-* fun:call_init.part.0 fun:_dl_init } { dear glib - you suck at memory management Memcheck:Leak fun:calloc fun:g_malloc0 obj:*/libgobject-* obj:*/libgobject-* obj:*/libgobject-* fun:call_init.part.0 fun:_dl_init } \ No newline at end of file