diff --git a/cib/main.c b/cib/main.c index 79b79d903c..08a0c19d9e 100644 --- a/cib/main.c +++ b/cib/main.c @@ -1,708 +1,707 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if HAVE_LIBXML2 # include #endif #ifdef HAVE_GETOPT_H # include #endif #if HAVE_BZLIB_H # include #endif extern int init_remote_listener(int port, gboolean encrypted); extern gboolean stand_alone; gboolean cib_shutdown_flag = FALSE; enum cib_errors cib_status = cib_ok; #if SUPPORT_HEARTBEAT oc_ev_t *cib_ev_token; ll_cluster_t *hb_conn = NULL; extern void oc_ev_special(const oc_ev_t *, oc_ev_class_t, int); gboolean cib_register_ha(ll_cluster_t * hb_cluster, const char *client_name); #endif extern void terminate_cib(const char *caller, gboolean fast); GMainLoop *mainloop = NULL; const char *cib_root = CRM_CONFIG_DIR; char *cib_our_uname = NULL; gboolean preserve_status = FALSE; gboolean cib_writes_enabled = TRUE; int remote_fd = 0; int remote_tls_fd = 0; void usage(const char *cmd, int exit_status); int cib_init(void); void cib_shutdown(int nsig); void cib_ha_connection_destroy(gpointer user_data); gboolean startCib(const char *filename); extern int write_cib_contents(gpointer p); GTRIGSource *cib_writer = NULL; GHashTable *client_list = NULL; GHashTable *config_hash = NULL; char *channel1 = NULL; char *channel2 = NULL; char *channel3 = NULL; char *channel4 = NULL; char *channel5 = NULL; #define OPTARGS "maswr:V?" void cib_cleanup(void); static void cib_enable_writes(int nsig) { crm_info("(Re)enabling disk writes"); cib_writes_enabled = TRUE; } static void cib_diskwrite_complete(gpointer userdata, int status, int signo, int exitcode) { if (exitcode != LSB_EXIT_OK || signo != 0 || status != 0) { crm_err("Disk write failed: status=%d, signo=%d, exitcode=%d", status, signo, exitcode); if (cib_writes_enabled) { crm_err("Disabling disk writes after write failure"); cib_writes_enabled = FALSE; } } else { crm_trace("Disk write passed"); } } static void log_cib_client(gpointer key, gpointer value, gpointer user_data) { cib_client_t *a_client = value; crm_info("Client %s/%s", crm_str(a_client->name), crm_str(a_client->channel_name)); } int main(int argc, char **argv) { int flag; int rc = 0; int argerr = 0; #ifdef HAVE_GETOPT_H int option_index = 0; /* *INDENT-OFF* */ static struct option long_options[] = { {"per-action-cib", 0, 0, 'a'}, {"stand-alone", 0, 0, 's'}, {"disk-writes", 0, 0, 'w'}, {"cib-root", 1, 0, 'r'}, {"verbose", 0, 0, 'V'}, {"help", 0, 0, '?'}, {"metadata", 0, 0, 'm'}, {0, 0, 0, 0} }; /* *INDENT-ON* */ #endif struct passwd *pwentry = NULL; crm_log_init("cib", LOG_INFO, TRUE, FALSE, 0, NULL); mainloop_add_signal(SIGTERM, cib_shutdown); mainloop_add_signal(SIGPIPE, cib_enable_writes); cib_writer = G_main_add_tempproc_trigger(G_PRIORITY_LOW, write_cib_contents, "write_cib_contents", NULL, NULL, NULL, cib_diskwrite_complete); /* EnableProcLogging(); */ set_sigchld_proctrack(G_PRIORITY_HIGH, DEFAULT_MAXDISPATCHTIME); crm_peer_init(); client_list = g_hash_table_new(crm_str_hash, g_str_equal); while (1) { #ifdef HAVE_GETOPT_H flag = getopt_long(argc, argv, OPTARGS, long_options, &option_index); #else flag = getopt(argc, argv, OPTARGS); #endif if (flag == -1) break; switch (flag) { case 'V': crm_bump_log_level(); break; case 's': stand_alone = TRUE; preserve_status = TRUE; cib_writes_enabled = FALSE; pwentry = getpwnam(CRM_DAEMON_USER); CRM_CHECK(pwentry != NULL, crm_perror(LOG_ERR, "Invalid uid (%s) specified", CRM_DAEMON_USER); return 100); rc = setgid(pwentry->pw_gid); if (rc < 0) { crm_perror(LOG_ERR, "Could not set group to %d", pwentry->pw_gid); return 100; } rc = setuid(pwentry->pw_uid); if (rc < 0) { crm_perror(LOG_ERR, "Could not set user to %d", pwentry->pw_uid); return 100; } break; case '?': /* Help message */ usage(crm_system_name, LSB_EXIT_OK); break; case 'w': cib_writes_enabled = TRUE; break; case 'r': cib_root = optarg; break; case 'm': cib_metadata(); return 0; default: ++argerr; break; } } if (argc - optind == 1 && safe_str_eq("metadata", argv[optind])) { cib_metadata(); return 0; } if (optind > argc) { ++argerr; } if (argerr) { usage(crm_system_name, LSB_EXIT_GENERIC); } if (crm_is_writable(cib_root, NULL, CRM_DAEMON_USER, CRM_DAEMON_GROUP, FALSE) == FALSE) { crm_err("Bad permissions on %s. Terminating", cib_root); fprintf(stderr, "ERROR: Bad permissions on %s. See logs for details\n", cib_root); fflush(stderr); return 100; } /* read local config file */ rc = cib_init(); CRM_CHECK(g_hash_table_size(client_list) == 0, crm_warn("Not all clients gone at exit")); g_hash_table_foreach(client_list, log_cib_client, NULL); cib_cleanup(); #if SUPPORT_HEARTBEAT if (hb_conn) { hb_conn->llc_ops->delete(hb_conn); } #endif crm_info("Done"); return rc; } void cib_cleanup(void) { crm_peer_destroy(); g_hash_table_destroy(config_hash); g_hash_table_destroy(client_list); crm_free(cib_our_uname); #if HAVE_LIBXML2 crm_xml_cleanup(); #endif crm_free(channel1); crm_free(channel2); crm_free(channel3); crm_free(channel4); crm_free(channel5); } unsigned long cib_num_ops = 0; const char *cib_stat_interval = "10min"; unsigned long cib_num_local = 0, cib_num_updates = 0, cib_num_fail = 0; unsigned long cib_bad_connects = 0, cib_num_timeouts = 0; longclock_t cib_call_time = 0; gboolean cib_stats(gpointer data); gboolean cib_stats(gpointer data) { int local_log_level = LOG_DEBUG; static unsigned long last_stat = 0; unsigned int cib_calls_ms = 0; static unsigned long cib_stat_interval_ms = 0; if (cib_stat_interval_ms == 0) { cib_stat_interval_ms = crm_get_msec(cib_stat_interval); } cib_calls_ms = longclockto_ms(cib_call_time); if ((cib_num_ops - last_stat) > 0) { unsigned long calls_diff = cib_num_ops - last_stat; double stat_1 = (1000 * cib_calls_ms) / calls_diff; local_log_level = LOG_INFO; do_crm_log(local_log_level, "Processed %lu operations" " (%.2fus average, %lu%% utilization) in the last %s", calls_diff, stat_1, (100 * cib_calls_ms) / cib_stat_interval_ms, cib_stat_interval); } crm_trace( "\tDetail: %lu operations (%ums total)" " (%lu local, %lu updates, %lu failures," " %lu timeouts, %lu bad connects)", cib_num_ops, cib_calls_ms, cib_num_local, cib_num_updates, cib_num_fail, cib_bad_connects, cib_num_timeouts); last_stat = cib_num_ops; cib_call_time = 0; return TRUE; } #if SUPPORT_HEARTBEAT gboolean ccm_connect(void); static void ccm_connection_destroy(gpointer user_data) { crm_err("CCM connection failed... blocking while we reconnect"); CRM_ASSERT(ccm_connect()); return; } static void *ccm_library = NULL; gboolean ccm_connect(void) { gboolean did_fail = TRUE; int num_ccm_fails = 0; int max_ccm_fails = 30; int ret; int cib_ev_fd; int (*ccm_api_register) (oc_ev_t ** token) = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_register"); int (*ccm_api_set_callback) (const oc_ev_t * token, oc_ev_class_t class, oc_ev_callback_t * fn, oc_ev_callback_t ** prev_fn) = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_set_callback"); void (*ccm_api_special) (const oc_ev_t *, oc_ev_class_t, int) = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_special"); int (*ccm_api_activate) (const oc_ev_t * token, int *fd) = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_activate"); int (*ccm_api_unregister) (oc_ev_t * token) = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_unregister"); while (did_fail) { did_fail = FALSE; crm_info("Registering with CCM..."); ret = (*ccm_api_register) (&cib_ev_token); if (ret != 0) { did_fail = TRUE; } if (did_fail == FALSE) { crm_trace("Setting up CCM callbacks"); ret = (*ccm_api_set_callback) (cib_ev_token, OC_EV_MEMB_CLASS, cib_ccm_msg_callback, NULL); if (ret != 0) { crm_warn("CCM callback not set"); did_fail = TRUE; } } if (did_fail == FALSE) { (*ccm_api_special) (cib_ev_token, OC_EV_MEMB_CLASS, 0); crm_trace("Activating CCM token"); ret = (*ccm_api_activate) (cib_ev_token, &cib_ev_fd); if (ret != 0) { crm_warn("CCM Activation failed"); did_fail = TRUE; } } if (did_fail) { num_ccm_fails++; (*ccm_api_unregister) (cib_ev_token); if (num_ccm_fails < max_ccm_fails) { crm_warn("CCM Connection failed %d times (%d max)", num_ccm_fails, max_ccm_fails); sleep(3); } else { crm_err("CCM Activation failed %d (max) times", num_ccm_fails); return FALSE; } } } crm_debug("CCM Activation passed... all set to go!"); G_main_add_fd(G_PRIORITY_HIGH, cib_ev_fd, FALSE, cib_ccm_dispatch, cib_ev_token, ccm_connection_destroy); return TRUE; } #endif #if SUPPORT_COROSYNC static gboolean cib_ais_dispatch(AIS_Message * wrapper, char *data, int sender) { xmlNode *xml = NULL; if (wrapper->header.id == crm_class_cluster) { xml = string2xml(data); if (xml == NULL) { goto bail; } crm_xml_add(xml, F_ORIG, wrapper->sender.uname); crm_xml_add_int(xml, F_SEQ, wrapper->id); cib_peer_callback(xml, NULL); } free_xml(xml); return TRUE; bail: crm_err("Invalid XML: '%.120s'", data); return TRUE; } static void cib_ais_destroy(gpointer user_data) { - ais_fd_sync = -1; if (cib_shutdown_flag) { crm_info("Corosync disconnection complete"); } else { crm_err("Corosync connection lost! Exiting."); terminate_cib(__FUNCTION__, TRUE); } } #endif static void cib_ais_status_callback(enum crm_status_type type, crm_node_t * node, const void *data) { #if 0 /* crm_active_peers(crm_proc_cib) appears to give the wrong answer * sometimes, this might help figure out why */ if(type == crm_status_nstate) { crm_info("status: %s is now %s (was %s)", node->uname, node->state, (const char *)data); if (safe_str_eq(CRMD_STATE_ACTIVE, node->state)) { return; } } else if(type == crm_status_processes) { uint32_t old = 0; if (data) { old = *(const uint32_t *)data; } if ((node->processes ^ old) & crm_proc_cib) { crm_info("status: cib process on %s is now %sactive", node->uname, is_set(node->processes, crm_proc_cib)?"":"in"); } else { return; } } else { return; } #endif if(cib_shutdown_flag && crm_active_peers(crm_proc_cib) < 2 && g_hash_table_size(client_list) == 0) { crm_info("No more peers"); terminate_cib(__FUNCTION__, FALSE); } } int cib_init(void) { gboolean was_error = FALSE; config_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if (startCib("cib.xml") == FALSE) { crm_crit("Cannot start CIB... terminating"); exit(1); } if (stand_alone == FALSE) { void *dispatch = cib_ha_peer_callback; void *destroy = cib_ha_connection_destroy; if (is_openais_cluster()) { #if SUPPORT_COROSYNC destroy = cib_ais_destroy; dispatch = cib_ais_dispatch; #endif } if (crm_cluster_connect(&cib_our_uname, NULL, dispatch, destroy, #if SUPPORT_HEARTBEAT &hb_conn #else NULL #endif ) == FALSE) { crm_crit("Cannot sign in to the cluster... terminating"); exit(100); } if (is_openais_cluster()) { crm_set_status_callback(&cib_ais_status_callback); } #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { if (was_error == FALSE) { if (HA_OK != hb_conn->llc_ops->set_cstatus_callback(hb_conn, cib_client_status_callback, hb_conn)) { crm_err("Cannot set cstatus callback: %s", hb_conn->llc_ops->errmsg(hb_conn)); was_error = TRUE; } } if (was_error == FALSE) { was_error = (ccm_connect() == FALSE); } if (was_error == FALSE) { /* Async get client status information in the cluster */ crm_info("Requesting the list of configured nodes"); hb_conn->llc_ops->client_status(hb_conn, NULL, CRM_SYSTEM_CIB, -1); } } #endif } else { cib_our_uname = crm_strdup("localhost"); } channel1 = crm_strdup(cib_channel_callback); was_error = init_server_ipc_comms(channel1, cib_client_connect, default_ipc_connection_destroy); channel2 = crm_strdup(cib_channel_ro); was_error = was_error || init_server_ipc_comms(channel2, cib_client_connect, default_ipc_connection_destroy); channel3 = crm_strdup(cib_channel_rw); was_error = was_error || init_server_ipc_comms(channel3, cib_client_connect, default_ipc_connection_destroy); if (stand_alone) { if (was_error) { crm_err("Couldnt start"); return 1; } cib_is_master = TRUE; /* Create the mainloop and run it... */ mainloop = g_main_new(FALSE); crm_info("Starting %s mainloop", crm_system_name); g_main_run(mainloop); return 0; } if (was_error == FALSE) { /* Create the mainloop and run it... */ mainloop = g_main_new(FALSE); crm_info("Starting %s mainloop", crm_system_name); g_timeout_add(crm_get_msec(cib_stat_interval), cib_stats, NULL); g_main_run(mainloop); } else { crm_err("Couldnt start all communication channels, exiting."); } return 0; } void usage(const char *cmd, int exit_status) { FILE *stream; stream = exit_status ? stderr : stdout; fprintf(stream, "usage: %s [-%s]\n", cmd, OPTARGS); fprintf(stream, "\t--%s (-%c)\t\tTurn on debug info." " Additional instances increase verbosity\n", "verbose", 'V'); fprintf(stream, "\t--%s (-%c)\t\tThis help message\n", "help", '?'); fprintf(stream, "\t--%s (-%c)\t\tShow configurable cib options\n", "metadata", 'm'); fprintf(stream, "\t--%s (-%c)\tAdvanced use only\n", "per-action-cib", 'a'); fprintf(stream, "\t--%s (-%c)\tAdvanced use only\n", "stand-alone", 's'); fprintf(stream, "\t--%s (-%c)\tAdvanced use only\n", "disk-writes", 'w'); fprintf(stream, "\t--%s (-%c)\t\tAdvanced use only\n", "cib-root", 'r'); fflush(stream); exit(exit_status); } void cib_ha_connection_destroy(gpointer user_data) { if (cib_shutdown_flag) { crm_info("Heartbeat disconnection complete... exiting"); terminate_cib(__FUNCTION__, FALSE); } else { crm_err("Heartbeat connection lost! Exiting."); terminate_cib(__FUNCTION__, TRUE); } } static void disconnect_cib_client(gpointer key, gpointer value, gpointer user_data) { cib_client_t *a_client = value; crm_trace("Processing client %s/%s... send=%d, recv=%d", crm_str(a_client->name), crm_str(a_client->channel_name), (int)a_client->channel->send_queue->current_qlen, (int)a_client->channel->recv_queue->current_qlen); if (a_client->channel->ch_status == IPC_CONNECT) { a_client->channel->ops->resume_io(a_client->channel); if (a_client->channel->send_queue->current_qlen != 0 || a_client->channel->recv_queue->current_qlen != 0) { crm_info("Flushed messages to/from %s/%s... send=%d, recv=%d", crm_str(a_client->name), crm_str(a_client->channel_name), (int)a_client->channel->send_queue->current_qlen, (int)a_client->channel->recv_queue->current_qlen); } } if (a_client->channel->ch_status == IPC_CONNECT) { crm_warn("Disconnecting %s/%s...", crm_str(a_client->name), crm_str(a_client->channel_name)); a_client->channel->ops->disconnect(a_client->channel); } } extern gboolean cib_process_disconnect(IPC_Channel * channel, cib_client_t * cib_client); void cib_shutdown(int nsig) { if (cib_shutdown_flag == FALSE) { cib_shutdown_flag = TRUE; crm_debug("Disconnecting %d clients", g_hash_table_size(client_list)); g_hash_table_foreach(client_list, disconnect_cib_client, NULL); crm_info("Disconnected %d clients", g_hash_table_size(client_list)); cib_process_disconnect(NULL, NULL); } else { crm_info("Waiting for %d clients to disconnect...", g_hash_table_size(client_list)); } } gboolean startCib(const char *filename) { gboolean active = FALSE; xmlNode *cib = readCibXmlFile(cib_root, filename, !preserve_status); CRM_ASSERT(cib != NULL); if (activateCibXml(cib, TRUE, "start") == 0) { int port = 0; const char *port_s = NULL; active = TRUE; cib_read_config(config_hash, cib); port_s = crm_element_value(cib, "remote-tls-port"); if (port_s) { port = crm_parse_int(port_s, "0"); remote_tls_fd = init_remote_listener(port, TRUE); } port_s = crm_element_value(cib, "remote-clear-port"); if (port_s) { port = crm_parse_int(port_s, "0"); remote_fd = init_remote_listener(port, FALSE); } crm_info("CIB Initialization completed successfully"); } return active; } diff --git a/crmd/corosync.c b/crmd/corosync.c index 3d846b4cd2..c68ab7e6e3 100644 --- a/crmd/corosync.c +++ b/crmd/corosync.c @@ -1,179 +1,183 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern void post_cache_update(int seq); extern void crmd_ha_connection_destroy(gpointer user_data); /* A_HA_CONNECT */ #if SUPPORT_COROSYNC extern void crmd_ha_msg_filter(xmlNode * msg); static gboolean crmd_ais_dispatch(AIS_Message * wrapper, char *data, int sender) { int seq = 0; xmlNode *xml = NULL; const char *seq_s = NULL; xml = string2xml(data); if (xml == NULL) { crm_err("Could not parse message content (%d): %.100s", wrapper->header.id, data); return TRUE; } switch (wrapper->header.id) { case crm_class_members: seq_s = crm_element_value(xml, "id"); seq = crm_int_helper(seq_s, NULL); set_bit_inplace(fsa_input_register, R_PEER_DATA); post_cache_update(seq); /* fall through */ case crm_class_quorum: crm_update_quorum(crm_have_quorum, FALSE); if (AM_I_DC) { const char *votes = crm_element_value(xml, "expected"); if (votes == NULL || check_number(votes) == FALSE) { crm_log_xml_err(xml, "Invalid quorum/membership update"); } else { int rc = update_attr(fsa_cib_conn, cib_quorum_override | cib_scope_local | cib_inhibit_notify, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, XML_ATTR_EXPECTED_VOTES, votes, FALSE); crm_info("Setting expected votes to %s", votes); if (cib_ok > rc) { crm_err("Quorum update failed: %s", cib_error2string(rc)); } } } break; case crm_class_cluster: crm_xml_add(xml, F_ORIG, wrapper->sender.uname); crm_xml_add_int(xml, F_SEQ, wrapper->id); crmd_ha_msg_filter(xml); break; case crm_class_rmpeer: /* Ignore */ break; case crm_class_notify: case crm_class_nodeid: crm_err("Unexpected message class (%d): %.100s", wrapper->header.id, data); break; default: crm_err("Invalid message class (%d): %.100s", wrapper->header.id, data); } free_xml(xml); return TRUE; } static gboolean crmd_cman_dispatch(unsigned long long seq, gboolean quorate) { crm_update_quorum(quorate, FALSE); post_cache_update(seq); return TRUE; } static void -crmd_cman_destroy(gpointer user_data) +crmd_quorum_destroy(gpointer user_data) { if (is_set(fsa_input_register, R_HA_DISCONNECTED)) { crm_err("connection terminated"); exit(1); } else { crm_info("connection closed"); } } static void -crmd_quorum_destroy(gpointer user_data) +crmd_ais_destroy(gpointer user_data) { if (is_set(fsa_input_register, R_HA_DISCONNECTED)) { crm_err("connection terminated"); exit(1); } else { crm_info("connection closed"); } } +#if SUPPORT_CMAN static void -crmd_ais_destroy(gpointer user_data) +crmd_cman_destroy(gpointer user_data) { if (is_set(fsa_input_register, R_HA_DISCONNECTED)) { crm_err("connection terminated"); exit(1); } else { crm_info("connection closed"); } } +#endif extern gboolean crm_connect_corosync(void); gboolean crm_connect_corosync(void) { gboolean rc = FALSE; if (is_openais_cluster()) { crm_set_status_callback(&ais_status_callback); rc = crm_cluster_connect(&fsa_our_uname, &fsa_our_uuid, crmd_ais_dispatch, crmd_ais_destroy, NULL); } if (rc && is_corosync_cluster()) { init_quorum_connection(crmd_cman_dispatch, crmd_quorum_destroy); } +#if SUPPORT_CMAN if (rc && is_cman_cluster()) { init_cman_connection(crmd_cman_dispatch, crmd_cman_destroy); set_bit_inplace(fsa_input_register, R_MEMBERSHIP); } +#endif return rc; } #endif diff --git a/fencing/main.c b/fencing/main.c index 81cd65480e..317738d542 100644 --- a/fencing/main.c +++ b/fencing/main.c @@ -1,930 +1,929 @@ /* * Copyright (C) 2009 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include char *channel1 = NULL; char *channel2 = NULL; char *stonith_our_uname = NULL; GMainLoop *mainloop = NULL; GHashTable *client_list = NULL; gboolean stand_alone = FALSE; gboolean stonith_shutdown_flag = FALSE; #if SUPPORT_HEARTBEAT ll_cluster_t *hb_conn = NULL; #endif static gboolean stonith_client_disconnect( IPC_Channel *channel, stonith_client_t *stonith_client) { if (channel == NULL) { CRM_LOG_ASSERT(stonith_client == NULL); } else if (stonith_client == NULL) { crm_err("No client"); } else { CRM_LOG_ASSERT(channel->ch_status != IPC_CONNECT); crm_trace("Cleaning up after client disconnect: %s/%s/%s", crm_str(stonith_client->name), stonith_client->channel_name, stonith_client->id); if(stonith_client->id != NULL) { if(!g_hash_table_remove(client_list, stonith_client->id)) { crm_err("Client %s not found in the hashtable", stonith_client->name); } } } return FALSE; } static gboolean stonith_client_callback(IPC_Channel *channel, gpointer user_data) { int lpc = 0; const char *value = NULL; xmlNode *request = NULL; gboolean keep_channel = TRUE; stonith_client_t *stonith_client = user_data; CRM_CHECK(stonith_client != NULL, crm_err("Invalid client"); return FALSE); CRM_CHECK(stonith_client->id != NULL, crm_err("Invalid client: %p", stonith_client); return FALSE); if(IPC_ISRCONN(channel) && channel->ops->is_message_pending(channel)) { lpc++; request = xmlfromIPC(channel, MAX_IPC_DELAY); if (request == NULL) { goto bail; } if(stonith_client->name == NULL) { value = crm_element_value(request, F_STONITH_CLIENTNAME); if(value == NULL) { stonith_client->name = crm_itoa(channel->farside_pid); } else { stonith_client->name = crm_strdup(value); } } crm_xml_add(request, F_STONITH_CLIENTID, stonith_client->id); crm_xml_add(request, F_STONITH_CLIENTNAME, stonith_client->name); if(stonith_client->callback_id == NULL) { value = crm_element_value(request, F_STONITH_CALLBACK_TOKEN); if(value != NULL) { stonith_client->callback_id = crm_strdup(value); } else { stonith_client->callback_id = crm_strdup(stonith_client->id); } } crm_log_xml_trace(request, "Client[inbound]"); stonith_command(stonith_client, request, NULL); free_xml(request); } bail: if(channel->ch_status != IPC_CONNECT) { crm_trace("Client disconnected"); keep_channel = stonith_client_disconnect(channel, stonith_client); } return keep_channel; } static void stonith_client_destroy(gpointer user_data) { stonith_client_t *stonith_client = user_data; if(stonith_client == NULL) { crm_trace("Destroying %p", user_data); return; } if(stonith_client->source != NULL) { crm_trace("Deleting %s (%p) from mainloop", stonith_client->name, stonith_client->source); G_main_del_IPC_Channel(stonith_client->source); stonith_client->source = NULL; } crm_trace("Destroying %s (%p)", stonith_client->name, user_data); crm_free(stonith_client->name); crm_free(stonith_client->callback_id); crm_free(stonith_client->id); crm_free(stonith_client); crm_trace("Freed the cib client"); return; } static gboolean stonith_client_connect(IPC_Channel *channel, gpointer user_data) { cl_uuid_t client_id; xmlNode *reg_msg = NULL; stonith_client_t *new_client = NULL; char uuid_str[UU_UNPARSE_SIZEOF]; const char *channel_name = user_data; crm_trace("Connecting channel"); CRM_CHECK(channel_name != NULL, return FALSE); if (channel == NULL) { crm_err("Channel was NULL"); return FALSE; } else if (channel->ch_status != IPC_CONNECT) { crm_err("Channel was disconnected"); return FALSE; } else if(stonith_shutdown_flag) { crm_info("Ignoring new client [%d] during shutdown", channel->farside_pid); return FALSE; } crm_malloc0(new_client, sizeof(stonith_client_t)); new_client->channel = channel; new_client->channel_name = channel_name; crm_trace("Created channel %p for channel %s", new_client, new_client->channel_name); channel->ops->set_recv_qlen(channel, 1024); channel->ops->set_send_qlen(channel, 1024); new_client->source = G_main_add_IPC_Channel( G_PRIORITY_DEFAULT, channel, FALSE, stonith_client_callback, new_client, stonith_client_destroy); crm_trace("Channel %s connected for client %s", new_client->channel_name, new_client->id); cl_uuid_generate(&client_id); cl_uuid_unparse(&client_id, uuid_str); CRM_CHECK(new_client->id == NULL, crm_free(new_client->id)); new_client->id = crm_strdup(uuid_str); /* make sure we can find ourselves later for sync calls * redirected to the master instance */ g_hash_table_insert(client_list, new_client->id, new_client); reg_msg = create_xml_node(NULL, "callback"); crm_xml_add(reg_msg, F_STONITH_OPERATION, CRM_OP_REGISTER); crm_xml_add(reg_msg, F_STONITH_CLIENTID, new_client->id); send_ipc_message(channel, reg_msg); free_xml(reg_msg); return TRUE; } static void stonith_peer_callback(xmlNode * msg, void* private_data) { const char *remote = crm_element_value(msg, F_ORIG); crm_log_xml_trace(msg, "Peer[inbound]"); stonith_command(NULL, msg, remote); } static void stonith_peer_hb_callback(HA_Message * msg, void* private_data) { xmlNode *xml = convert_ha_message(NULL, msg, __FUNCTION__); stonith_peer_callback(xml, private_data); free_xml(xml); } #if SUPPORT_COROSYNC static gboolean stonith_peer_ais_callback( AIS_Message *wrapper, char *data, int sender) { xmlNode *xml = NULL; if(wrapper->header.id == crm_class_cluster) { xml = string2xml(data); if(xml == NULL) { goto bail; } crm_xml_add(xml, F_ORIG, wrapper->sender.uname); crm_xml_add_int(xml, F_SEQ, wrapper->id); stonith_peer_callback(xml, NULL); } free_xml(xml); return TRUE; bail: crm_err("Invalid XML: '%.120s'", data); return TRUE; } static void stonith_peer_ais_destroy(gpointer user_data) { crm_err("AIS connection terminated"); - ais_fd_sync = -1; exit(1); } #endif static void stonith_peer_hb_destroy(gpointer user_data) { if(stonith_shutdown_flag) { crm_info("Heartbeat disconnection complete... exiting"); } else { crm_err("Heartbeat connection lost! Exiting."); } crm_info("Exiting..."); if (mainloop != NULL && g_main_is_running(mainloop)) { g_main_quit(mainloop); } else { exit(LSB_EXIT_OK); } } static int send_via_callback_channel(xmlNode *msg, const char *token) { stonith_client_t *hash_client = NULL; enum stonith_errors rc = stonith_ok; crm_trace("Delivering msg %p to client %s", msg, token); if(token == NULL) { crm_err("No client id token, cant send message"); if(rc == stonith_ok) { rc = -1; } } else if(msg == NULL) { crm_err("No message to send"); rc = -1; } else { /* A client that left before we could reply is not really * _our_ error. Warn instead. */ hash_client = g_hash_table_lookup(client_list, token); if(hash_client == NULL) { crm_warn("Cannot find client for token %s", token); rc = -1; } else if (crm_str_eq(hash_client->channel_name, "remote", FALSE)) { /* just hope it's alive */ } else if(hash_client->channel == NULL) { crm_err("Cannot find channel for client %s", token); rc = -1; } } if(rc == stonith_ok) { crm_trace("Delivering reply to client %s (%s)", token, hash_client->channel_name); if(send_ipc_message(hash_client->channel, msg) == FALSE) { crm_warn("Delivery of reply to client %s/%s failed", hash_client->name, token); rc = -1; } } return rc; } void do_local_reply(xmlNode *notify_src, const char *client_id, gboolean sync_reply, gboolean from_peer) { /* send callback to originating child */ stonith_client_t *client_obj = NULL; enum stonith_errors local_rc = stonith_ok; crm_trace("Sending response"); if(client_id != NULL) { client_obj = g_hash_table_lookup(client_list, client_id); } else { crm_trace("No client to sent the response to." " F_STONITH_CLIENTID not set."); } crm_trace("Sending callback to request originator"); if(client_obj == NULL) { local_rc = -1; } else { const char *client_id = client_obj->callback_id; crm_trace("Sending %ssync response to %s %s", sync_reply?"":"an a-", client_obj->name, from_peer?"(originator of delegated request)":""); if(sync_reply) { client_id = client_obj->id; } local_rc = send_via_callback_channel(notify_src, client_id); } if(local_rc != stonith_ok && client_obj != NULL) { crm_warn("%sSync reply to %s failed: %s", sync_reply?"":"A-", client_obj?client_obj->name:"", stonith_error2string(local_rc)); } } long long get_stonith_flag(const char *name) { if(safe_str_eq(name, STONITH_OP_FENCE)) { return 0x01; } else if(safe_str_eq(name, STONITH_OP_DEVICE_ADD)) { return 0x04; } else if(safe_str_eq(name, STONITH_OP_DEVICE_DEL)) { return 0x10; } return 0; } static void stonith_notify_client(gpointer key, gpointer value, gpointer user_data) { IPC_Channel *ipc_client = NULL; xmlNode *update_msg = user_data; stonith_client_t *client = value; const char *type = NULL; CRM_CHECK(client != NULL, return); CRM_CHECK(update_msg != NULL, return); type = crm_element_value(update_msg, F_SUBTYPE); CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return); if(client == NULL) { crm_trace("Skipping NULL client"); return; } else if(client->channel == NULL) { crm_trace("Skipping client with NULL channel"); return; } else if(client->name == NULL) { crm_trace("Skipping unnammed client / comamnd channel"); return; } ipc_client = client->channel; if(client->flags & get_stonith_flag(type)) { crm_trace("Sending %s-notification to client %s/%s", type, client->name, client->id); if(ipc_client->send_queue->current_qlen >= ipc_client->send_queue->max_qlen) { /* We never want the STONITH to exit because our client is slow */ crm_debug("%s-notification of client %s/%s failed - queue saturated", type, client->name, client->id); } else if(send_ipc_message(ipc_client, update_msg) == FALSE) { crm_warn("%s-Notification of client %s/%s failed", type, client->name, client->id); } } } void do_stonith_notify( int options, const char *type, enum stonith_errors result, xmlNode *data, const char *remote) { /* TODO: Standardize the contents of data */ xmlNode *update_msg = create_xml_node(NULL, "notify"); CRM_CHECK(type != NULL, ;); crm_xml_add(update_msg, F_TYPE, T_STONITH_NOTIFY); crm_xml_add(update_msg, F_SUBTYPE, type); crm_xml_add(update_msg, F_STONITH_OPERATION, type); crm_xml_add_int(update_msg, F_STONITH_RC, result); if(data != NULL) { add_message_xml(update_msg, F_STONITH_CALLDATA, data); } crm_trace("Notifying clients"); g_hash_table_foreach(client_list, stonith_notify_client, update_msg); free_xml(update_msg); crm_trace("Notify complete"); } static stonith_key_value_t *parse_device_list(const char *devices) { int lpc = 0; int max = 0; int last = 0; stonith_key_value_t *output = NULL; if(devices == NULL) { return output; } max = strlen(devices); for(lpc = 0; lpc <= max; lpc++) { if(devices[lpc] == ',' || devices[lpc] == 0) { char *line = NULL; crm_malloc0(line, 2 + lpc - last); snprintf(line, 1 + lpc - last, "%s", devices+last); output = stonith_key_value_add(output, NULL, line); crm_free(line); last = lpc + 1; } } return output; } static void topology_remove_helper(const char *node, int level) { xmlNode *data = create_xml_node(NULL, F_STONITH_LEVEL); crm_xml_add(data, "origin", __FUNCTION__); crm_xml_add_int(data, XML_ATTR_ID, level); crm_xml_add(data, F_STONITH_TARGET, node); stonith_level_remove(data); free_xml(data); } static void topology_register_helper(const char *node, int level, stonith_key_value_t *device_list) { xmlNode *data = create_level_registration_xml(node, level, device_list); stonith_level_register(data); free_xml(data); } static void remove_fencing_topology(xmlXPathObjectPtr xpathObj) { int max = 0, lpc = 0; if(xpathObj && xpathObj->nodesetval) { max = xpathObj->nodesetval->nodeNr; } for(lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); CRM_CHECK(match != NULL, continue); if(crm_element_value(match, XML_DIFF_MARKER)) { /* Deletion */ int index = 0; const char *target = crm_element_value(match, XML_ATTR_STONITH_TARGET); crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index); if(target == NULL) { crm_err("Invalid fencing target in element %s", ID(match)); } else if(index <= 0) { crm_err("Invalid level for %s in element %s", target, ID(match)); } else { topology_remove_helper(target, index); } /* } else { Deal with modifications during the 'addition' stage */ } } } static void register_fencing_topology(xmlXPathObjectPtr xpathObj, gboolean force) { int max = 0, lpc = 0; if(xpathObj && xpathObj->nodesetval) { max = xpathObj->nodesetval->nodeNr; } for(lpc = 0; lpc < max; lpc++) { int index = 0; const char *target; const char *dev_list; stonith_key_value_t *devices = NULL; xmlNode *match = getXpathResult(xpathObj, lpc); CRM_CHECK(match != NULL, continue); crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index); target = crm_element_value(match, XML_ATTR_STONITH_TARGET); dev_list = crm_element_value(match, XML_ATTR_STONITH_DEVICES); devices = parse_device_list(dev_list); crm_trace("Updating %s[%d] (%s) to %s", target, index, ID(match), dev_list); if(target == NULL) { crm_err("Invalid fencing target in element %s", ID(match)); } else if(index <= 0) { crm_err("Invalid level for %s in element %s", target, ID(match)); } else if(force == FALSE && crm_element_value(match, XML_DIFF_MARKER)) { /* Addition */ topology_register_helper(target, index, devices); } else { /* Modification */ /* Remove then re-add */ topology_remove_helper(target, index); topology_register_helper(target, index, devices); } stonith_key_value_freeall(devices, 1, 1); } } /* Fencing */ static void fencing_topology_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { xmlXPathObjectPtr xpathObj = NULL; const char *xpath = "//" XML_TAG_FENCING_LEVEL; crm_trace("Pushing in stonith topology"); /* Grab everything */ xpathObj = xpath_search(msg, xpath); register_fencing_topology(xpathObj, TRUE); if(xpathObj) { xmlXPathFreeObject(xpathObj); } } static void update_fencing_topology(const char *event, xmlNode * msg) { const char *xpath; xmlXPathObjectPtr xpathObj = NULL; /* Process deletions (only) */ xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_TAG_FENCING_LEVEL; xpathObj = xpath_search(msg, xpath); remove_fencing_topology(xpathObj); if(xpathObj) { xmlXPathFreeObject(xpathObj); } /* Process additions and changes */ xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_TAG_FENCING_LEVEL; xpathObj = xpath_search(msg, xpath); register_fencing_topology(xpathObj, FALSE); if(xpathObj) { xmlXPathFreeObject(xpathObj); } } static void stonith_shutdown(int nsig) { stonith_shutdown_flag = TRUE; crm_info("Terminating with %d clients", g_hash_table_size(client_list)); stonith_client_disconnect(NULL, NULL); exit(0); } static void stonith_cleanup(void) { crm_peer_destroy(); g_hash_table_destroy(client_list); crm_free(stonith_our_uname); #if HAVE_LIBXML2 crm_xml_cleanup(); #endif crm_free(channel1); } /* *INDENT-OFF* */ static struct crm_option long_options[] = { {"stand-alone", 0, 0, 's'}, {"verbose", 0, 0, 'V'}, {"version", 0, 0, '$'}, {"help", 0, 0, '?'}, {0, 0, 0, 0} }; /* *INDENT-ON* */ static void setup_cib(void) { static void *cib_library = NULL; static cib_t *(*cib_new_fn)(void) = NULL; static const char *(*cib_err_fn)(enum cib_errors) = NULL; int rc, retries = 0; cib_t *cib = NULL; if(cib_library == NULL) { cib_library = dlopen(CIB_LIBRARY, RTLD_LAZY); } if(cib_library && cib_new_fn == NULL) { cib_new_fn = dlsym(cib_library, "cib_new"); } if(cib_library && cib_err_fn == NULL) { cib_err_fn = dlsym(cib_library, "cib_error2string"); } if(cib_new_fn != NULL) { cib = (*cib_new_fn)(); } if(cib == NULL) { crm_err("No connection to the CIB"); return; } do { sleep(retries); rc = cib->cmds->signon(cib, CRM_SYSTEM_CRMD, cib_command); } while(rc == cib_connection && ++retries < 5); if (rc != cib_ok) { crm_err("Could not connect to the CIB service: %s", (*cib_err_fn)(rc)); } else if (cib_ok != cib->cmds->add_notify_callback( cib, T_CIB_DIFF_NOTIFY, update_fencing_topology)) { crm_err("Could not set CIB notification callback"); } else { rc = cib->cmds->query(cib, NULL, NULL, cib_scope_local); add_cib_op_callback(cib, rc, FALSE, NULL, fencing_topology_callback); crm_notice("Watching for stonith topology changes"); } } int main(int argc, char ** argv) { int flag; int rc = 0; int lpc = 0; int argerr = 0; int option_index = 0; const char *actions[] = { "reboot", "poweroff", "list", "monitor", "status" }; crm_log_init("stonith-ng", LOG_INFO, TRUE, FALSE, argc, argv); crm_set_options(NULL, "mode [options]", long_options, "Provides a summary of cluster's current state." "\n\nOutputs varying levels of detail in a number of different formats.\n"); while (1) { flag = crm_get_option(argc, argv, &option_index); if (flag == -1) break; switch(flag) { case 'V': crm_bump_log_level(); break; case 's': stand_alone = TRUE; break; case '$': case '?': crm_help(flag, LSB_EXIT_OK); break; default: ++argerr; break; } } if(argc - optind == 1 && safe_str_eq("metadata", argv[optind])) { printf("\n"); printf("\n"); printf(" 1.0\n"); printf(" This is a fake resource that details the instance attributes handled by stonithd.\n"); printf(" Options available for all stonith resources\n"); printf(" \n"); printf(" \n"); printf(" How long to wait for the STONITH action to complete.\n"); printf(" Overrides the stonith-timeout cluster property\n"); printf(" \n"); printf(" \n"); printf(" \n"); printf(" The priority of the stonith resource. The lower the number, the higher the priority.\n"); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_HOSTARG); printf(" Advanced use only: An alternate parameter to supply instead of 'port'\n"); printf(" Some devices do not support the standard 'port' parameter or may provide additional ones.\n" "Use this to specify an alternate, device-specific, parameter that should indicate the machine to be fenced.\n" "A value of 'none' can be used to tell the cluster not to supply any additional parameters.\n" " \n"); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_HOSTMAP); printf(" A mapping of host names to ports numbers for devices that do not support host names.\n"); printf(" Eg. node1:1;node2:2,3 would tell the cluster to use port 1 for node1 and ports 2 and 3 for node2\n"); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_HOSTLIST); printf(" A list of machines controlled by this device (Optional unless %s=static-list).\n", STONITH_ATTR_HOSTCHECK); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_HOSTCHECK); printf(" How to determin which machines are controlled by the device.\n"); printf(" Allowed values: dynamic-list (query the device), static-list (check the %s attribute), none (assume every device can fence every machine)\n", STONITH_ATTR_HOSTLIST); printf(" \n"); printf(" \n"); for(lpc = 0; lpc < DIMOF(actions); lpc++) { printf(" \n", actions[lpc]); printf(" Advanced use only: An alternate command to run instead of '%s'\n", actions[lpc]); printf(" Some devices do not support the standard commands or may provide additional ones.\n" "Use this to specify an alternate, device-specific, command that implements the '%s' action.\n", actions[lpc]); printf(" \n", actions[lpc]); printf(" \n"); } printf(" \n"); printf("\n"); return 0; } if (optind != argc) { ++argerr; } if (argerr) { crm_help('?', LSB_EXIT_GENERIC); } mainloop_add_signal(SIGTERM, stonith_shutdown); /* EnableProcLogging(); */ set_sigchld_proctrack(G_PRIORITY_HIGH,DEFAULT_MAXDISPATCHTIME); crm_peer_init(); client_list = g_hash_table_new(crm_str_hash, g_str_equal); if(stand_alone == FALSE) { void *dispatch = stonith_peer_hb_callback; void *destroy = stonith_peer_hb_destroy; if(is_openais_cluster()) { #if SUPPORT_COROSYNC destroy = stonith_peer_ais_destroy; dispatch = stonith_peer_ais_callback; #endif } if(crm_cluster_connect(&stonith_our_uname, NULL, dispatch, destroy, #if SUPPORT_HEARTBEAT &hb_conn #else NULL #endif ) == FALSE){ crm_crit("Cannot sign in to the cluster... terminating"); exit(100); } setup_cib(); } else { stonith_our_uname = crm_strdup("localhost"); } device_list = g_hash_table_new_full( crm_str_hash, g_str_equal, NULL, free_device); topology = g_hash_table_new_full( crm_str_hash, g_str_equal, NULL, free_topology_entry); channel1 = crm_strdup(stonith_channel); rc = init_server_ipc_comms( channel1, stonith_client_connect, default_ipc_connection_destroy); #if SUPPORT_STONITH_CONFIG if (((stand_alone == TRUE)) && !(standalone_cfg_read_file(STONITH_NG_CONF_FILE))) { standalone_cfg_commit(); } #endif channel2 = crm_strdup(stonith_channel_callback); rc = init_server_ipc_comms( channel2, stonith_client_connect, default_ipc_connection_destroy); if(rc == 0) { /* Create the mainloop and run it... */ mainloop = g_main_new(FALSE); crm_info("Starting %s mainloop", crm_system_name); g_main_run(mainloop); } else { crm_err("Couldnt start all communication channels, exiting."); } stonith_cleanup(); #if SUPPORT_HEARTBEAT if(hb_conn) { hb_conn->llc_ops->delete(hb_conn); } #endif crm_info("Done"); return rc; } diff --git a/include/crm/common/cluster.h b/include/crm/common/cluster.h index 11d8f632c4..a3c1d6c466 100644 --- a/include/crm/common/cluster.h +++ b/include/crm/common/cluster.h @@ -1,135 +1,133 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef CRM_COMMON_CLUSTER__H # define CRM_COMMON_CLUSTER__H # include # include # include # include # if SUPPORT_HEARTBEAT # include # include # endif extern gboolean crm_have_quorum; extern GHashTable *crm_peer_cache; extern GHashTable *crm_peer_id_cache; extern unsigned long long crm_peer_seq; extern void crm_peer_init(void); extern void crm_peer_destroy(void); extern char *get_corosync_uuid(uint32_t id, const char *uname); extern const char *get_node_uuid(uint32_t id, const char *uname); extern int get_corosync_id(int id, const char *uuid); extern gboolean crm_cluster_connect(char **our_uname, char **our_uuid, void *dispatch, void *destroy, # if SUPPORT_HEARTBEAT ll_cluster_t ** hb_conn # else void **unused # endif ); extern gboolean init_cman_connection(gboolean(*dispatch) (unsigned long long, gboolean), void (*destroy) (gpointer)); extern gboolean init_quorum_connection(gboolean(*dispatch) (unsigned long long, gboolean), void (*destroy) (gpointer)); extern gboolean send_cluster_message(const char *node, enum crm_ais_msg_types service, xmlNode * data, gboolean ordered); extern void destroy_crm_node(gpointer data); extern crm_node_t *crm_get_peer(unsigned int id, const char *uname); -extern crm_node_t *crm_update_ais_node(xmlNode * member, long long seq); -extern crm_node_t *crm_update_cman_node(xmlNode * member, long long seq); extern void crm_update_peer_proc(const char *uname, uint32_t flag, const char *status); extern crm_node_t *crm_update_peer(unsigned int id, uint64_t born, uint64_t seen, int32_t votes, uint32_t children, const char *uuid, const char *uname, const char *addr, const char *state); extern gboolean crm_is_member_active(const crm_node_t * node); extern gboolean crm_is_full_member(const crm_node_t * node); extern guint crm_active_members(void); extern guint reap_crm_member(uint32_t id); extern guint crm_active_members(void); extern guint crm_active_peers(uint32_t peer); extern gboolean crm_calculate_quorum(void); extern int crm_terminate_member(int nodeid, const char *uname, IPC_Channel * cluster); extern int crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection); extern gboolean crm_get_cluster_name(char **cname); # if SUPPORT_HEARTBEAT extern gboolean ccm_have_quorum(oc_ed_t event); extern const char *ccm_event_name(oc_ed_t event); extern crm_node_t *crm_update_ccm_node(const oc_ev_membership_t * oc, int offset, const char *state, uint64_t seq); # endif # if SUPPORT_COROSYNC extern int ais_fd_sync; extern GFDSource *ais_source; extern gboolean send_ais_text(int class, const char *data, gboolean local, const char *node, enum crm_ais_msg_types dest); extern gboolean get_ais_nodeid(uint32_t * id, char **uname); extern gboolean ais_dispatch(int sender, gpointer user_data); # endif extern void empty_uuid_cache(void); extern const char *get_uuid(const char *uname); extern const char *get_uname(const char *uuid); extern void set_uuid(xmlNode * node, const char *attr, const char *uname); extern void unget_uuid(const char *uname); enum crm_status_type { crm_status_uname, crm_status_nstate, crm_status_processes, }; enum crm_ais_msg_types text2msg_type(const char *text); extern void crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *)); /* *INDENT-OFF* */ enum cluster_type_e { pcmk_cluster_unknown = 0x0001, pcmk_cluster_invalid = 0x0002, pcmk_cluster_heartbeat = 0x0004, pcmk_cluster_classic_ais = 0x0010, pcmk_cluster_corosync = 0x0020, pcmk_cluster_cman = 0x0040, }; /* *INDENT-ON* */ extern enum cluster_type_e get_cluster_type(void); extern const char *name_for_cluster_type(enum cluster_type_e type); extern gboolean is_corosync_cluster(void); extern gboolean is_cman_cluster(void); extern gboolean is_openais_cluster(void); extern gboolean is_classic_ais_cluster(void); extern gboolean is_heartbeat_cluster(void); #endif diff --git a/lib/cluster/Makefile.am b/lib/cluster/Makefile.am index 2b11f987a5..e5dce425b1 100644 --- a/lib/cluster/Makefile.am +++ b/lib/cluster/Makefile.am @@ -1,49 +1,53 @@ # # Copyright (C) 2004 Andrew Beekhof # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ -I$(top_builddir)/libltdl -I$(top_srcdir)/libltdl headerdir=$(pkgincludedir)/crm/cluster header_HEADERS = stack.h ## libraries lib_LTLIBRARIES = libcrmcluster.la libcrmcluster_la_SOURCES = cluster.c membership.c stack.h libcrmcluster_la_LDFLAGS = -version-info 1:0:0 $(CLUSTERLIBS) libcrmcluster_la_LIBADD = $(top_builddir)/lib/common/libcrmcommon.la $(top_builddir)/lib/fencing/libstonithd.la libcrmcluster_la_DEPENDENCIES = $(top_builddir)/lib/common/libcrmcommon.la $(top_builddir)/lib/fencing/libstonithd.la if BUILD_CS_SUPPORT +if BUILD_CS_PLUGIN +libcrmcluster_la_SOURCES += legacy.c +else libcrmcluster_la_SOURCES += corosync.c endif +endif if BUILD_HEARTBEAT_SUPPORT libcrmcluster_la_SOURCES += heartbeat.c #libcrmcluster_la_LIBADD += -ldl endif clean-generic: rm -f *.log *.debug *.xml *~ install-exec-local: uninstall-local: diff --git a/lib/cluster/corosync.c b/lib/cluster/corosync.c index 1975e589f8..87ffff0607 100644 --- a/lib/cluster/corosync.c +++ b/lib/cluster/corosync.c @@ -1,1560 +1,850 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include +#include +#include +#include + #include #include #include #include #include "stack.h" -#if SUPPORT_COROSYNC -# if CS_USES_LIBQB -# include -# include -# include -# include -# include -# if HAVE_CONFDB -# include -# endif -# else -# include -# include -# endif -# include + +#include +#include + +#include +#include +#include +#include +#include +#include + cpg_handle_t pcmk_cpg_handle = 0; struct cpg_name pcmk_cpg_group = { .length = 0, .value[0] = 0, }; -#endif - -#if HAVE_CMAP -# include -#endif - -#if SUPPORT_CMAN -# include -cman_handle_t pcmk_cman_handle = NULL; -#endif - -#ifdef SUPPORT_CS_QUORUM -# include -# include -# include - -# include quorum_handle_t pcmk_quorum_handle = 0; - -#endif +gboolean(*quorum_app_callback) (unsigned long long seq, gboolean quorate) = NULL; static char *pcmk_uname = NULL; static int pcmk_uname_len = 0; static uint32_t pcmk_nodeid = 0; #define cs_repeat(counter, max, code) do { \ code; \ if(rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { \ counter++; \ crm_debug("Retrying operation after %ds", counter); \ sleep(counter); \ } else { \ break; \ } \ } while(counter < max) enum crm_ais_msg_types text2msg_type(const char *text) { int type = crm_msg_none; CRM_CHECK(text != NULL, return type); if (safe_str_eq(text, "ais")) { type = crm_msg_ais; } else if (safe_str_eq(text, "crm_plugin")) { type = crm_msg_ais; } else if (safe_str_eq(text, CRM_SYSTEM_CIB)) { type = crm_msg_cib; } else if (safe_str_eq(text, CRM_SYSTEM_CRMD)) { type = crm_msg_crmd; } else if (safe_str_eq(text, CRM_SYSTEM_DC)) { type = crm_msg_crmd; } else if (safe_str_eq(text, CRM_SYSTEM_TENGINE)) { type = crm_msg_te; } else if (safe_str_eq(text, CRM_SYSTEM_PENGINE)) { type = crm_msg_pe; } else if (safe_str_eq(text, CRM_SYSTEM_LRMD)) { type = crm_msg_lrmd; } else if (safe_str_eq(text, CRM_SYSTEM_STONITHD)) { type = crm_msg_stonithd; } else if (safe_str_eq(text, "stonith-ng")) { type = crm_msg_stonith_ng; } else if (safe_str_eq(text, "attrd")) { type = crm_msg_attrd; } else { /* This will normally be a transient client rather than * a cluster daemon. Set the type to the pid of the client */ int scan_rc = sscanf(text, "%d", &type); if (scan_rc != 1) { /* Ensure its sane */ type = crm_msg_none; } } return type; } -char * -get_ais_data(const AIS_Message * msg) -{ - int rc = BZ_OK; - char *uncompressed = NULL; - unsigned int new_size = msg->size + 1; - - if (msg->is_compressed == FALSE) { - crm_trace("Returning uncompressed message data"); - uncompressed = strdup(msg->data); - - } else { - crm_trace("Decompressing message data"); - crm_malloc0(uncompressed, new_size); - - rc = BZ2_bzBuffToBuffDecompress(uncompressed, &new_size, (char *)msg->data, - msg->compressed_size, 1, 0); - - CRM_ASSERT(rc == BZ_OK); - CRM_ASSERT(new_size == msg->size); - } - - return uncompressed; -} - -#if SUPPORT_COROSYNC -int ais_fd_sync = -1; -int ais_fd_async = -1; /* never send messages via this channel */ -void *ais_ipc_ctx = NULL; - -# if CS_USES_LIBQB -qb_ipcc_connection_t *ais_ipc_handle = NULL; -# else -hdb_handle_t ais_ipc_handle = 0; -# endif -GFDSource *ais_source = NULL; -GFDSource *ais_source_sync = NULL; -GFDSource *cman_source = NULL; GFDSource *cpg_source = NULL; GFDSource *quorumd_source = NULL; static char *ais_cluster_name = NULL; -gboolean -get_ais_nodeid(uint32_t * id, char **uname) -{ - struct iovec iov; - int retries = 0; - int rc = CS_OK; - cs_ipc_header_response_t header; - struct crm_ais_nodeid_resp_s answer; - - header.error = CS_OK; - header.id = crm_class_nodeid; - header.size = sizeof(cs_ipc_header_response_t); - - CRM_CHECK(id != NULL, return FALSE); - CRM_CHECK(uname != NULL, return FALSE); - - iov.iov_base = &header; - iov.iov_len = header.size; - - retry: - errno = 0; -# if CS_USES_LIBQB - rc = qb_to_cs_error(qb_ipcc_sendv_recv(ais_ipc_handle, &iov, 1, &answer, sizeof(answer), -1)); -# else - rc = coroipcc_msg_send_reply_receive(ais_ipc_handle, &iov, 1, &answer, sizeof(answer)); -# endif - if (rc == CS_OK) { - CRM_CHECK(answer.header.size == sizeof(struct crm_ais_nodeid_resp_s), - crm_err("Odd message: id=%d, size=%d, error=%d", - answer.header.id, answer.header.size, answer.header.error)); - CRM_CHECK(answer.header.id == crm_class_nodeid, - crm_err("Bad response id: %d", answer.header.id)); - } - - if ((rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) && retries < 20) { - retries++; - crm_info("Peer overloaded: Re-sending message (Attempt %d of 20)", retries); - sleep(retries); /* Proportional back off */ - goto retry; - } - - if (rc != CS_OK) { - crm_err("Sending nodeid request: FAILED (rc=%d): %s", rc, ais_error2text(rc)); - return FALSE; - - } else if (answer.header.error != CS_OK) { - crm_err("Bad response from peer: (rc=%d): %s", rc, ais_error2text(rc)); - return FALSE; - } - - crm_info("Server details: id=%u uname=%s cname=%s", answer.id, answer.uname, answer.cname); - - *id = answer.id; - *uname = crm_strdup(answer.uname); - ais_cluster_name = crm_strdup(answer.cname); - - return TRUE; -} - gboolean crm_get_cluster_name(char **cname) { CRM_CHECK(cname != NULL, return FALSE); if (ais_cluster_name) { *cname = crm_strdup(ais_cluster_name); return TRUE; } return FALSE; } gboolean send_ais_text(int class, const char *data, gboolean local, const char *node, enum crm_ais_msg_types dest) { static int msg_id = 0; static int local_pid = 0; - enum cluster_type_e cluster_type = get_cluster_type(); int retries = 0; int rc = CS_OK; int buf_len = sizeof(cs_ipc_header_response_t); char *buf = NULL; struct iovec iov; const char *transport = "pcmk"; - cs_ipc_header_response_t *header = NULL; AIS_Message *ais_msg = NULL; enum crm_ais_msg_types sender = text2msg_type(crm_system_name); /* There are only 6 handlers registered to crm_lib_service in plugin.c */ CRM_CHECK(class < 6, crm_err("Invalid message class: %d", class); return FALSE); if (data == NULL) { data = ""; } if (local_pid == 0) { local_pid = getpid(); } if (sender == crm_msg_none) { sender = local_pid; } crm_malloc0(ais_msg, sizeof(AIS_Message)); ais_msg->id = msg_id++; ais_msg->header.id = class; ais_msg->header.error = CS_OK; ais_msg->host.type = dest; ais_msg->host.local = local; if (node) { ais_msg->host.size = strlen(node); memset(ais_msg->host.uname, 0, MAX_NAME); memcpy(ais_msg->host.uname, node, ais_msg->host.size); ais_msg->host.id = 0; } else { ais_msg->host.size = 0; memset(ais_msg->host.uname, 0, MAX_NAME); ais_msg->host.id = 0; } ais_msg->sender.id = 0; ais_msg->sender.type = sender; ais_msg->sender.pid = local_pid; ais_msg->sender.size = pcmk_uname_len; memset(ais_msg->sender.uname, 0, MAX_NAME); memcpy(ais_msg->sender.uname, pcmk_uname, ais_msg->sender.size); ais_msg->size = 1 + strlen(data); if (ais_msg->size < CRM_BZ2_THRESHOLD) { failback: crm_realloc(ais_msg, sizeof(AIS_Message) + ais_msg->size); memcpy(ais_msg->data, data, ais_msg->size); } else { char *compressed = NULL; char *uncompressed = crm_strdup(data); unsigned int len = (ais_msg->size * 1.1) + 600; /* recomended size */ crm_trace("Compressing message payload"); crm_malloc(compressed, len); rc = BZ2_bzBuffToBuffCompress(compressed, &len, uncompressed, ais_msg->size, CRM_BZ2_BLOCKS, 0, CRM_BZ2_WORK); crm_free(uncompressed); if (rc != BZ_OK) { crm_err("Compression failed: %d", rc); crm_free(compressed); goto failback; } crm_realloc(ais_msg, sizeof(AIS_Message) + len + 1); memcpy(ais_msg->data, compressed, len); ais_msg->data[len] = 0; crm_free(compressed); ais_msg->is_compressed = TRUE; ais_msg->compressed_size = len; crm_trace("Compression details: %d -> %d", ais_msg->size, ais_data_len(ais_msg)); } ais_msg->header.size = sizeof(AIS_Message) + ais_data_len(ais_msg); crm_trace("Sending%s message %d to %s.%s (data=%d, total=%d)", ais_msg->is_compressed ? " compressed" : "", ais_msg->id, ais_dest(&(ais_msg->host)), msg_type2text(dest), ais_data_len(ais_msg), ais_msg->header.size); iov.iov_base = ais_msg; iov.iov_len = ais_msg->header.size; crm_realloc(buf, buf_len); do { if (rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { retries++; crm_info("Peer overloaded or membership in flux:" " Re-sending message (Attempt %d of 20)", retries); sleep(retries); /* Proportional back off */ } errno = 0; - switch (cluster_type) { - case pcmk_cluster_classic_ais: -# if CS_USES_LIBQB - rc = qb_to_cs_error(qb_ipcc_sendv_recv(ais_ipc_handle, &iov, 1, buf, buf_len, -1)); -# else - rc = coroipcc_msg_send_reply_receive(ais_ipc_handle, &iov, 1, buf, buf_len); -# endif - header = (cs_ipc_header_response_t *) buf; - if (rc == CS_OK) { - CRM_CHECK(header->size == sizeof(cs_ipc_header_response_t), - crm_err("Odd message: id=%d, size=%d, class=%d, error=%d", - header->id, header->size, class, header->error)); - - CRM_ASSERT(buf_len >= header->size); - CRM_CHECK(header->id == CRM_MESSAGE_IPC_ACK, - crm_err("Bad response id (%d) for request (%d)", header->id, - ais_msg->header.id)); - CRM_CHECK(header->error == CS_OK, rc = header->error); - } - break; + transport = "cpg"; + CRM_CHECK(dest != crm_msg_ais, rc = CS_ERR_MESSAGE_ERROR; goto bail); + rc = cpg_mcast_joined(pcmk_cpg_handle, CPG_TYPE_AGREED, &iov, 1); + if (rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { + cpg_flow_control_state_t fc_state = CPG_FLOW_CONTROL_DISABLED; + int rc2 = cpg_flow_control_state_get(pcmk_cpg_handle, &fc_state); - case pcmk_cluster_corosync: - case pcmk_cluster_cman: - transport = "cpg"; - CRM_CHECK(dest != crm_msg_ais, rc = CS_ERR_MESSAGE_ERROR; goto bail); - rc = cpg_mcast_joined(pcmk_cpg_handle, CPG_TYPE_AGREED, &iov, 1); - if (rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { - cpg_flow_control_state_t fc_state = CPG_FLOW_CONTROL_DISABLED; - int rc2 = cpg_flow_control_state_get(pcmk_cpg_handle, &fc_state); - - if (rc2 == CS_OK && fc_state == CPG_FLOW_CONTROL_ENABLED) { - crm_warn("Connection overloaded, cannot send messages"); - goto bail; - - } else if (rc2 != CS_OK) { - crm_warn("Could not determin the connection state: %s (%d)", - ais_error2text(rc2), rc2); - goto bail; - } - } - break; + if (rc2 == CS_OK && fc_state == CPG_FLOW_CONTROL_ENABLED) { + crm_warn("Connection overloaded, cannot send messages"); + goto bail; - case pcmk_cluster_unknown: - case pcmk_cluster_invalid: - case pcmk_cluster_heartbeat: - CRM_ASSERT(is_openais_cluster()); - break; + } else if (rc2 != CS_OK) { + crm_warn("Could not determin the connection state: %s (%d)", + ais_error2text(rc2), rc2); + goto bail; + } } } while ((rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) && retries < 20); bail: if (rc != CS_OK) { crm_perror(LOG_ERR, "Sending message %d via %s: FAILED (rc=%d): %s", ais_msg->id, transport, rc, ais_error2text(rc)); } else { crm_trace("Message %d: sent", ais_msg->id); } crm_free(buf); crm_free(ais_msg); return (rc == CS_OK); } gboolean send_ais_message(xmlNode * msg, gboolean local, const char *node, enum crm_ais_msg_types dest) { gboolean rc = TRUE; - char *data = NULL; - - if (is_classic_ais_cluster()) { - if (ais_fd_async < 0 || ais_source == NULL) { - crm_err("Not connected to AIS: %d %p", ais_fd_async, ais_source); - return FALSE; - } - } - - data = dump_xml_unformatted(msg); + char *data = dump_xml_unformatted(msg); rc = send_ais_text(crm_class_cluster, data, local, node, dest); crm_free(data); return rc; } void terminate_ais_connection(void) { crm_notice("Disconnecting from Corosync"); -/* G_main_del_fd(ais_source); */ -/* G_main_del_fd(ais_source_sync); */ - - if (is_classic_ais_cluster()) { - if(ais_ipc_handle) { - crm_trace("Disconnecting plugin"); -# if CS_USES_LIBQB - qb_ipcc_disconnect(ais_ipc_handle); - ais_ipc_handle = NULL; -# else - coroipcc_service_disconnect(ais_ipc_handle); - ais_ipc_handle = 0; -# endif - } else { - crm_info("No plugin connection"); - } - + if(pcmk_cpg_handle) { + crm_trace("Disconnecting CPG"); + cpg_leave(pcmk_cpg_handle, &pcmk_cpg_group); + cpg_finalize(pcmk_cpg_handle); + pcmk_cpg_handle = 0; + } else { - if(pcmk_cpg_handle) { - crm_trace("Disconnecting CPG"); - cpg_leave(pcmk_cpg_handle, &pcmk_cpg_group); - cpg_finalize(pcmk_cpg_handle); - pcmk_cpg_handle = 0; - - } else { - crm_info("No CPG connection"); - } - } - -# ifdef SUPPORT_CS_QUORUM - if (is_corosync_cluster()) { - if(pcmk_quorum_handle) { - crm_trace("Disconnecting quorum"); - quorum_finalize(pcmk_quorum_handle); - pcmk_quorum_handle = 0; - - } else { - crm_info("No Quorum connection"); - } + crm_info("No CPG connection"); } -# endif -# if SUPPORT_CMAN - if (is_cman_cluster()) { - if(pcmk_cman_handle) { - crm_trace("Disconnecting cman"); - cman_stop_notification(pcmk_cman_handle); - cman_finish(pcmk_cman_handle); - } else { - crm_info("No cman connection"); - } + if(pcmk_quorum_handle) { + crm_trace("Disconnecting quorum"); + quorum_finalize(pcmk_quorum_handle); + pcmk_quorum_handle = 0; + + } else { + crm_info("No Quorum connection"); } -# endif - ais_fd_async = -1; - ais_fd_sync = -1; } int ais_membership_timer = 0; gboolean ais_membership_force = FALSE; static gboolean ais_dispatch_message(AIS_Message * msg, gboolean(*dispatch) (AIS_Message *, char *, int)) { char *data = NULL; char *uncompressed = NULL; xmlNode *xml = NULL; CRM_ASSERT(msg != NULL); crm_trace("Got new%s message (size=%d, %d, %d)", msg->is_compressed ? " compressed" : "", ais_data_len(msg), msg->size, msg->compressed_size); data = msg->data; if (msg->is_compressed && msg->size > 0) { int rc = BZ_OK; unsigned int new_size = msg->size + 1; if (check_message_sanity(msg, NULL) == FALSE) { goto badmsg; } crm_trace("Decompressing message data"); crm_malloc0(uncompressed, new_size); rc = BZ2_bzBuffToBuffDecompress(uncompressed, &new_size, data, msg->compressed_size, 1, 0); if (rc != BZ_OK) { crm_err("Decompression failed: %d", rc); goto badmsg; } CRM_ASSERT(rc == BZ_OK); CRM_ASSERT(new_size == msg->size); data = uncompressed; } else if (check_message_sanity(msg, data) == FALSE) { goto badmsg; } else if (safe_str_eq("identify", data)) { int pid = getpid(); char *pid_s = crm_itoa(pid); send_ais_text(crm_class_cluster, pid_s, TRUE, NULL, crm_msg_ais); crm_free(pid_s); goto done; } if (msg->header.id != crm_class_members) { crm_update_peer(msg->sender.id, 0, 0, 0, 0, msg->sender.uname, msg->sender.uname, NULL, NULL); } if (msg->header.id == crm_class_rmpeer) { uint32_t id = crm_int_helper(data, NULL); crm_info("Removing peer %s/%u", data, id); reap_crm_member(id); goto done; - - } else if (msg->header.id == crm_class_members || msg->header.id == crm_class_quorum) { - - xml = string2xml(data); - if (xml == NULL) { - crm_err("Invalid membership update: %s", data); - goto badmsg; - } - - if (is_classic_ais_cluster() == FALSE) { - xmlNode *node = NULL; - - for (node = __xml_first_child(xml); node != NULL; node = __xml_next(node)) { - crm_update_cman_node(node, crm_peer_seq); - } - - } else { - xmlNode *node = NULL; - const char *value = NULL; - gboolean quorate = FALSE; - - value = crm_element_value(xml, "quorate"); - CRM_CHECK(value != NULL, crm_log_xml_err(xml, "No quorum value:"); goto badmsg); - if (crm_is_true(value)) { - quorate = TRUE; - } - - value = crm_element_value(xml, "id"); - CRM_CHECK(value != NULL, crm_log_xml_err(xml, "No membership id"); goto badmsg); - crm_peer_seq = crm_int_helper(value, NULL); - - if (quorate != crm_have_quorum) { - crm_notice("Membership %s: quorum %s", value, quorate ? "acquired" : "lost"); - crm_have_quorum = quorate; - - } else { - crm_info("Membership %s: quorum %s", value, quorate ? "retained" : "still lost"); - } - - for (node = __xml_first_child(xml); node != NULL; node = __xml_next(node)) { - crm_update_ais_node(node, crm_peer_seq); - } - } } crm_trace("Payload: %s", data); if (dispatch != NULL) { dispatch(msg, data, 0); } done: crm_free(uncompressed); free_xml(xml); return TRUE; badmsg: crm_err("Invalid message (id=%d, dest=%s:%s, from=%s:%s.%d):" " min=%d, total=%d, size=%d, bz2_size=%d", msg->id, ais_dest(&(msg->host)), msg_type2text(msg->host.type), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, (int)sizeof(AIS_Message), msg->header.size, msg->size, msg->compressed_size); goto done; } -gboolean -ais_dispatch(int sender, gpointer user_data) -{ - int rc = CS_OK; - gboolean good = TRUE; - - gboolean(*dispatch) (AIS_Message *, char *, int) = user_data; - - do { -# if CS_USES_LIBQB - char buffer[AIS_IPC_MESSAGE_SIZE]; - - rc = qb_to_cs_error(qb_ipcc_event_recv(ais_ipc_handle, (void *)buffer, - AIS_IPC_MESSAGE_SIZE, 100)); -# else - char *buffer = NULL; - - rc = coroipcc_dispatch_get(ais_ipc_handle, (void **)&buffer, 0); -# endif - - if (rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { - return TRUE; - } - if (rc != CS_OK) { - crm_perror(LOG_ERR, "Receiving message body failed: (%d) %s", rc, ais_error2text(rc)); - goto bail; - } -# if !CS_USES_LIBQB - if (buffer == NULL) { - /* NULL is a legal "no message afterall" value */ - return TRUE; - } -# endif - good = ais_dispatch_message((AIS_Message *) buffer, dispatch); -# if !CS_USES_LIBQB - coroipcc_dispatch_put(ais_ipc_handle); -# endif - - } while (good && ais_ipc_handle); - - return good; - - bail: - if(ais_ipc_handle) { - crm_err("AIS connection failed: %p", (void*)ais_ipc_handle); - } - return FALSE; -} - -static void -ais_destroy(gpointer user_data) -{ - crm_err("AIS connection terminated"); - ais_fd_sync = -1; - exit(1); -} - static gboolean -pcmk_proc_dispatch(IPC_Channel * ch, gpointer user_data) +pcmk_mcp_dispatch(IPC_Channel * ch, gpointer user_data) { xmlNode *msg = NULL; gboolean stay_connected = TRUE; while (IPC_ISRCONN(ch)) { if (ch->ops->is_message_pending(ch) == 0) { break; } msg = xmlfromIPC(ch, MAX_IPC_DELAY); if (msg) { xmlNode *node = NULL; for (node = __xml_first_child(msg); node != NULL; node = __xml_next(node)) { int id = 0; int children = 0; const char *uname = crm_element_value(node, "uname"); crm_element_value_int(node, "id", &id); crm_element_value_int(node, "processes", &children); if (id == 0) { crm_log_xml_err(msg, "Bad Update"); } else { crm_update_peer(id, 0, 0, 0, children, NULL, uname, NULL, NULL); } } free_xml(msg); } if (ch->ch_status != IPC_CONNECT) { break; } } if (ch->ch_status != IPC_CONNECT) { stay_connected = FALSE; } return stay_connected; } -# if SUPPORT_CMAN - -static gboolean -pcmk_cman_dispatch(int sender, gpointer user_data) -{ - int rc = cman_dispatch(pcmk_cman_handle, CMAN_DISPATCH_ALL); - - if (rc < 0) { - crm_err("Connection to cman failed: %d", rc); - return FALSE; - } - return TRUE; -} - -# define MAX_NODES 256 - -static void -cman_event_callback(cman_handle_t handle, void *privdata, int reason, int arg) -{ - int rc = 0, lpc = 0, node_count = 0; - - cman_cluster_t cluster; - static cman_node_t cman_nodes[MAX_NODES]; - - gboolean(*dispatch) (unsigned long long, gboolean) = privdata; - - switch (reason) { - case CMAN_REASON_STATECHANGE: - - memset(&cluster, 0, sizeof(cluster)); - rc = cman_get_cluster(pcmk_cman_handle, &cluster); - if (rc < 0) { - crm_err("Couldn't query cman cluster details: %d %d", rc, errno); - return; - } - - crm_peer_seq = cluster.ci_generation; - if (arg != crm_have_quorum) { - crm_notice("Membership %llu: quorum %s", crm_peer_seq, arg ? "acquired" : "lost"); - crm_have_quorum = arg; - - } else { - crm_info("Membership %llu: quorum %s", crm_peer_seq, - arg ? "retained" : "still lost"); - } - - rc = cman_get_nodes(pcmk_cman_handle, MAX_NODES, &node_count, cman_nodes); - if (rc < 0) { - crm_err("Couldn't query cman node list: %d %d", rc, errno); - return; - } - - for (lpc = 0; lpc < node_count; lpc++) { - if (cman_nodes[lpc].cn_nodeid == 0) { - /* Never allow node ID 0 to be considered a member #315711 */ - cman_nodes[lpc].cn_member = 0; - } - crm_update_peer(cman_nodes[lpc].cn_nodeid, cman_nodes[lpc].cn_incarnation, - cman_nodes[lpc].cn_member ? crm_peer_seq : 0, 0, 0, - cman_nodes[lpc].cn_name, cman_nodes[lpc].cn_name, NULL, - cman_nodes[lpc].cn_member ? CRM_NODE_MEMBER : CRM_NODE_LOST); - } - - if (dispatch) { - dispatch(crm_peer_seq, crm_have_quorum); - } - break; - - case CMAN_REASON_TRY_SHUTDOWN: - /* Always reply with a negative - pacemaker needs to be stopped first */ - crm_info("CMAN wants to shut down: %s", arg ? "forced" : "optional"); - cman_replyto_shutdown(pcmk_cman_handle, 0); - break; - - case CMAN_REASON_CONFIG_UPDATE: - /* Ignore */ - break; - } -} -# endif - -gboolean -init_cman_connection(gboolean(*dispatch) (unsigned long long, gboolean), void (*destroy) (gpointer)) -{ -# if SUPPORT_CMAN - int rc = -1, fd = -1; - cman_cluster_t cluster; - - crm_info("Configuring Pacemaker to obtain quorum from cman"); - - memset(&cluster, 0, sizeof(cluster)); - - pcmk_cman_handle = cman_init(dispatch); - if (pcmk_cman_handle == NULL || cman_is_active(pcmk_cman_handle) == FALSE) { - crm_err("Couldn't connect to cman"); - goto cman_bail; - } - - rc = cman_get_cluster(pcmk_cman_handle, &cluster); - if (rc < 0) { - crm_err("Couldn't query cman cluster details: %d %d", rc, errno); - goto cman_bail; - } - ais_cluster_name = crm_strdup(cluster.ci_name); - - rc = cman_start_notification(pcmk_cman_handle, cman_event_callback); - if (rc < 0) { - crm_err("Couldn't register for cman notifications: %d %d", rc, errno); - goto cman_bail; - } - - /* Get the current membership state */ - cman_event_callback(pcmk_cman_handle, dispatch, CMAN_REASON_STATECHANGE, - cman_is_quorate(pcmk_cman_handle)); - - fd = cman_get_fd(pcmk_cman_handle); - crm_debug("Adding fd=%d to mainloop", fd); - cman_source = G_main_add_fd(G_PRIORITY_HIGH, fd, FALSE, pcmk_cman_dispatch, dispatch, destroy); - - cman_bail: - if (rc < 0) { - cman_finish(pcmk_cman_handle); - return FALSE; - } -# else - crm_err("cman qorum is not supported in this build"); - exit(100); -# endif - return TRUE; -} - -# ifdef SUPPORT_COROSYNC gboolean(*pcmk_cpg_dispatch_fn) (AIS_Message *, char *, int) = NULL; static gboolean pcmk_cpg_dispatch(int sender, gpointer user_data) { int rc = 0; pcmk_cpg_dispatch_fn = user_data; rc = cpg_dispatch(pcmk_cpg_handle, CS_DISPATCH_ALL); if (rc != CS_OK) { crm_err("Connection to the CPG API failed: %d", rc); return FALSE; } return TRUE; } static void pcmk_cpg_deliver(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { AIS_Message *ais_msg = (AIS_Message *) msg; if (ais_msg->sender.id > 0 && ais_msg->sender.id != nodeid) { crm_err("Nodeid mismatch from %d.%d: claimed nodeid=%u", nodeid, pid, ais_msg->sender.id); return; } else if (ais_msg->host.size != 0 && safe_str_neq(ais_msg->host.uname, pcmk_uname)) { /* Not for us */ return; } ais_msg->sender.id = nodeid; if (ais_msg->sender.size == 0) { crm_node_t *peer = crm_get_peer(nodeid, NULL); if (peer == NULL) { crm_err("Peer with nodeid=%u is unknown", nodeid); } else if (peer->uname == NULL) { crm_err("No uname for peer with nodeid=%u", nodeid); } else { crm_notice("Fixing uname for peer with nodeid=%u", nodeid); ais_msg->sender.size = strlen(peer->uname); memset(ais_msg->sender.uname, 0, MAX_NAME); memcpy(ais_msg->sender.uname, peer->uname, ais_msg->sender.size); } } ais_dispatch_message(ais_msg, pcmk_cpg_dispatch_fn); } static void pcmk_cpg_membership(cpg_handle_t handle, const struct cpg_name *groupName, const struct cpg_address *member_list, size_t member_list_entries, const struct cpg_address *left_list, size_t left_list_entries, const struct cpg_address *joined_list, size_t joined_list_entries) { int i; for (i = 0; i < member_list_entries; i++) { crm_debug("Member[%d] %d ", i, member_list[i].nodeid); } for (i = 0; i < left_list_entries; i++) { crm_debug("Left[%d] %d ", i, left_list[i].nodeid); } } cpg_callbacks_t cpg_callbacks = { .cpg_deliver_fn = pcmk_cpg_deliver, .cpg_confchg_fn = pcmk_cpg_membership, }; -# endif -# ifdef SUPPORT_CS_QUORUM +static gboolean +init_cpg_connection(gboolean(*dispatch) (AIS_Message *, char *, int), void (*destroy) (gpointer), + uint32_t * nodeid) +{ + int rc = -1; + int fd = 0; + int retries = 0; + + strcpy(pcmk_cpg_group.value, crm_system_name); + pcmk_cpg_group.length = strlen(crm_system_name) + 1; + + cs_repeat(retries, 30, rc = cpg_initialize(&pcmk_cpg_handle, &cpg_callbacks)); + if (rc != CS_OK) { + crm_err("Could not connect to the Cluster Process Group API: %d\n", rc); + goto bail; + } + + retries = 0; + cs_repeat(retries, 30, rc = cpg_local_get(pcmk_cpg_handle, (unsigned int *)nodeid)); + if (rc != CS_OK) { + crm_err("Could not get local node id from the CPG API"); + goto bail; + } + + retries = 0; + cs_repeat(retries, 30, rc = cpg_join(pcmk_cpg_handle, &pcmk_cpg_group)); + if (rc != CS_OK) { + crm_err("Could not join the CPG group '%s': %d", crm_system_name, rc); + goto bail; + } + + rc = cpg_fd_get(pcmk_cpg_handle, &fd); + if (rc != CS_OK) { + crm_err("Could not obtain the CPG API connection: %d\n", rc); + goto bail; + } + + crm_debug("Adding fd=%d to mainloop", fd); + cpg_source = G_main_add_fd(G_PRIORITY_HIGH, fd, FALSE, pcmk_cpg_dispatch, dispatch, destroy); + + bail: + if (rc != CS_OK) { + cpg_finalize(pcmk_cpg_handle); + return FALSE; + } + return TRUE; +} + static gboolean pcmk_quorum_dispatch(int sender, gpointer user_data) { int rc = 0; rc = quorum_dispatch(pcmk_quorum_handle, CS_DISPATCH_ALL); if (rc < 0) { crm_err("Connection to the Quorum API failed: %d", rc); return FALSE; } return TRUE; } -gboolean(*quorum_app_callback) (unsigned long long seq, gboolean quorate) = NULL; - static void corosync_mark_unseen_peer_dead(gpointer key, gpointer value, gpointer user_data) { int *seq = user_data; crm_node_t *node = value; if (node->last_seen != *seq && crm_str_eq(CRM_NODE_LOST, node->state, TRUE) == FALSE) { crm_notice("Node %d/%s was not seen in the previous transition", node->id, node->uname); crm_update_peer(node->id, 0, 0, 0, 0, NULL, NULL, NULL, CRM_NODE_LOST); } } static void corosync_mark_node_unseen(gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; node->last_seen = 0; } static void pcmk_quorum_notification(quorum_handle_t handle, uint32_t quorate, uint64_t ring_id, uint32_t view_list_entries, uint32_t * view_list) { int i; if (quorate != crm_have_quorum) { crm_notice("Membership " U64T ": quorum %s (%lu)", ring_id, quorate ? "acquired" : "lost", (long unsigned int)view_list_entries); crm_have_quorum = quorate; } else { crm_info("Membership " U64T ": quorum %s (%lu)", ring_id, quorate ? "retained" : "still lost", (long unsigned int)view_list_entries); } g_hash_table_foreach(crm_peer_cache, corosync_mark_node_unseen, NULL); for (i = 0; i < view_list_entries; i++) { char *uuid = get_corosync_uuid(view_list[i], NULL); crm_debug("Member[%d] %d ", i, view_list[i]); crm_update_peer(view_list[i], 0, ring_id, 0, 0, uuid, NULL, NULL, CRM_NODE_MEMBER); } crm_trace("Reaping unseen nodes..."); g_hash_table_foreach(crm_peer_cache, corosync_mark_unseen_peer_dead, &ring_id); if (quorum_app_callback) { quorum_app_callback(ring_id, quorate); } } quorum_callbacks_t quorum_callbacks = { .quorum_notify_fn = pcmk_quorum_notification, }; -# endif - -static gboolean -init_cpg_connection(gboolean(*dispatch) (AIS_Message *, char *, int), void (*destroy) (gpointer), - uint32_t * nodeid) -{ -# ifdef SUPPORT_COROSYNC - int rc = -1; - int fd = 0; - int retries = 0; - - strcpy(pcmk_cpg_group.value, crm_system_name); - pcmk_cpg_group.length = strlen(crm_system_name) + 1; - - cs_repeat(retries, 30, rc = cpg_initialize(&pcmk_cpg_handle, &cpg_callbacks)); - if (rc != CS_OK) { - crm_err("Could not connect to the Cluster Process Group API: %d\n", rc); - goto bail; - } - - retries = 0; - cs_repeat(retries, 30, rc = cpg_local_get(pcmk_cpg_handle, (unsigned int *)nodeid)); - if (rc != CS_OK) { - crm_err("Could not get local node id from the CPG API"); - goto bail; - } - - retries = 0; - cs_repeat(retries, 30, rc = cpg_join(pcmk_cpg_handle, &pcmk_cpg_group)); - if (rc != CS_OK) { - crm_err("Could not join the CPG group '%s': %d", crm_system_name, rc); - goto bail; - } - - rc = cpg_fd_get(pcmk_cpg_handle, &fd); - if (rc != CS_OK) { - crm_err("Could not obtain the CPG API connection: %d\n", rc); - goto bail; - } - - crm_debug("Adding fd=%d to mainloop", fd); - cpg_source = G_main_add_fd(G_PRIORITY_HIGH, fd, FALSE, pcmk_cpg_dispatch, dispatch, destroy); - - bail: - if (rc != CS_OK) { - cpg_finalize(pcmk_cpg_handle); - return FALSE; - } -# else - crm_err("The Corosync CPG API is not supported in this build"); - exit(100); -# endif - return TRUE; -} - gboolean init_quorum_connection(gboolean(*dispatch) (unsigned long long, gboolean), void (*destroy) (gpointer)) { -# ifdef SUPPORT_CS_QUORUM int rc = -1; int fd = 0; int quorate = 0; uint32_t quorum_type = 0; crm_debug("Configuring Pacemaker to obtain quorum from Corosync"); rc = quorum_initialize(&pcmk_quorum_handle, &quorum_callbacks, &quorum_type); if (rc != CS_OK) { crm_err("Could not connect to the Quorum API: %d\n", rc); goto bail; } else if (quorum_type != QUORUM_SET) { crm_err("Corosync quorum is not configured\n"); goto bail; } rc = quorum_getquorate(pcmk_quorum_handle, &quorate); if (rc != CS_OK) { crm_err("Could not obtain the current Quorum API state: %d\n", rc); goto bail; } + crm_notice("Quorum %s", quorate ? "acquired" : "lost"); quorum_app_callback = dispatch; crm_have_quorum = quorate; rc = quorum_trackstart(pcmk_quorum_handle, CS_TRACK_CHANGES | CS_TRACK_CURRENT); if (rc != CS_OK) { crm_err("Could not setup Quorum API notifications: %d\n", rc); goto bail; } rc = quorum_fd_get(pcmk_quorum_handle, &fd); if (rc != CS_OK) { crm_err("Could not obtain the Quorum API connection: %d\n", rc); goto bail; } quorumd_source = G_main_add_fd(G_PRIORITY_HIGH, fd, FALSE, pcmk_quorum_dispatch, dispatch, destroy); bail: if (rc != CS_OK) { quorum_finalize(pcmk_quorum_handle); return FALSE; } -# else - crm_err("The Corosync quorum API is not supported in this build"); - exit(100); -# endif - return TRUE; -} - -static gboolean -init_ais_connection_classic(gboolean(*dispatch) (AIS_Message *, char *, int), - void (*destroy) (gpointer), char **our_uuid, char **our_uname, - int *nodeid) -{ - int rc; - int pid = 0; - char *pid_s = NULL; - struct utsname name; - - crm_info("Creating connection to our Corosync plugin"); -# if CS_USES_LIBQB - rc = CS_OK; - ais_ipc_handle = qb_ipcc_connect("pacemaker.engine", AIS_IPC_MESSAGE_SIZE); -# else - rc = coroipcc_service_connect(COROSYNC_SOCKET_NAME, PCMK_SERVICE_ID, - AIS_IPC_MESSAGE_SIZE, AIS_IPC_MESSAGE_SIZE, AIS_IPC_MESSAGE_SIZE, - &ais_ipc_handle); -# endif - if (ais_ipc_handle) { -# if CS_USES_LIBQB - qb_ipcc_fd_get(ais_ipc_handle, &ais_fd_async); -# else - coroipcc_fd_get(ais_ipc_handle, &ais_fd_async); -# endif - } else { - crm_info("Connection to our AIS plugin (%d) failed: %s (%d)", - PCMK_SERVICE_ID, strerror(errno), errno); - return FALSE; - } - if (ais_fd_async <= 0 && rc == CS_OK) { - crm_err("No context created, but connection reported 'ok'"); - rc = CS_ERR_LIBRARY; - } - if (rc != CS_OK) { - crm_info("Connection to our AIS plugin (%d) failed: %s (%d)", PCMK_SERVICE_ID, - ais_error2text(rc), rc); - } - - if (rc != CS_OK) { - return FALSE; - } - - if (destroy == NULL) { - destroy = ais_destroy; - } - - if (dispatch) { - crm_debug("Adding fd=%d to mainloop", ais_fd_async); - ais_source = - G_main_add_fd(G_PRIORITY_HIGH, ais_fd_async, FALSE, ais_dispatch, dispatch, destroy); - } - - crm_info("AIS connection established"); - - pid = getpid(); - pid_s = crm_itoa(pid); - send_ais_text(crm_class_cluster, pid_s, TRUE, NULL, crm_msg_ais); - crm_free(pid_s); - - if (uname(&name) < 0) { - crm_perror(LOG_ERR, "Could not determin the current host"); - exit(100); - } - - get_ais_nodeid(&pcmk_nodeid, &pcmk_uname); - if (safe_str_neq(name.nodename, pcmk_uname)) { - crm_crit("Node name mismatch! OpenAIS supplied %s, our lookup returned %s", - pcmk_uname, name.nodename); - crm_notice - ("Node name mismatches usually occur when assigned automatically by DHCP servers"); - crm_notice("If this node was part of the cluster with a different name," - " you will need to remove the old entry with crm_node --remove"); - } return TRUE; } gboolean init_ais_connection(gboolean(*dispatch) (AIS_Message *, char *, int), void (*destroy) (gpointer), char **our_uuid, char **our_uname, int *nodeid) { int retries = 0; while (retries++ < 30) { + IPC_Channel *ch = NULL; int rc = init_ais_connection_once(dispatch, destroy, our_uuid, our_uname, nodeid); switch (rc) { case CS_OK: - if (getenv("HA_mcp")) { - IPC_Channel *ch = init_client_ipc_comms_nodispatch("pcmk"); - - G_main_add_IPC_Channel(G_PRIORITY_HIGH, ch, FALSE, pcmk_proc_dispatch, NULL, - destroy); - } + ch = init_client_ipc_comms_nodispatch("pcmk"); + G_main_add_IPC_Channel(G_PRIORITY_HIGH, ch, FALSE, pcmk_mcp_dispatch, NULL, destroy); return TRUE; break; case CS_ERR_TRY_AGAIN: case CS_ERR_QUEUE_FULL: break; default: return FALSE; } } crm_err("Retry count exceeded: %d", retries); return FALSE; } -static char * -get_local_node_name(void) -{ - char *name = NULL; - struct utsname res; - - if (is_cman_cluster()) { -# if SUPPORT_CMAN - cman_node_t us; - cman_handle_t cman; - - cman = cman_init(NULL); - if (cman != NULL && cman_is_active(cman)) { - us.cn_name[0] = 0; - cman_get_node(cman, CMAN_NODEID_US, &us); - name = crm_strdup(us.cn_name); - crm_info("Using CMAN node name: %s", name); - - } else { - crm_err("Couldn't determin node name from CMAN"); - } - - cman_finish(cman); -# endif - - } else if (uname(&res) < 0) { - crm_perror(LOG_ERR, "Could not determin the current host"); - exit(100); - - } else { - name = crm_strdup(res.nodename); - } - return name; -} - -extern int set_cluster_type(enum cluster_type_e type); - gboolean init_ais_connection_once(gboolean(*dispatch) (AIS_Message *, char *, int), void (*destroy) (gpointer), char **our_uuid, char **our_uname, int *nodeid) { + struct utsname res; enum cluster_type_e stack = get_cluster_type(); crm_peer_init(); /* Here we just initialize comms */ - switch (stack) { - case pcmk_cluster_classic_ais: - if (init_ais_connection_classic(dispatch, destroy, our_uuid, &pcmk_uname, nodeid) == - FALSE) { - return FALSE; - } - break; - case pcmk_cluster_cman: - case pcmk_cluster_corosync: - if (init_cpg_connection(dispatch, destroy, &pcmk_nodeid) == FALSE) { - return FALSE; - } - pcmk_uname = get_local_node_name(); - break; - case pcmk_cluster_heartbeat: - crm_info("Could not find an active corosync based cluster"); - return FALSE; - break; - default: - crm_err("Invalid cluster type: %s (%d)", name_for_cluster_type(stack), stack); - return FALSE; - break; + if(stack != pcmk_cluster_corosync) { + crm_err("Invalid cluster type: %s (%d)", name_for_cluster_type(stack), stack); + return FALSE; + } + + if (init_cpg_connection(dispatch, destroy, &pcmk_nodeid) == FALSE) { + return FALSE; + } else if (uname(&res) < 0) { + crm_perror(LOG_ERR, "Could not determin the current host"); + exit(100); + + } else { + pcmk_uname = crm_strdup(res.nodename); } crm_info("Connection to '%s': established", name_for_cluster_type(stack)); CRM_ASSERT(pcmk_uname != NULL); pcmk_uname_len = strlen(pcmk_uname); if (pcmk_nodeid != 0) { /* Ensure the local node always exists */ crm_update_peer(pcmk_nodeid, 0, 0, 0, 0, pcmk_uname, pcmk_uname, NULL, NULL); } if (our_uuid != NULL) { *our_uuid = get_corosync_uuid(pcmk_nodeid, pcmk_uname); } if (our_uname != NULL) { *our_uname = crm_strdup(pcmk_uname); } if (nodeid != NULL) { *nodeid = pcmk_nodeid; } return TRUE; } gboolean check_message_sanity(const AIS_Message * msg, const char *data) { gboolean sane = TRUE; gboolean repaired = FALSE; int dest = msg->host.type; int tmp_size = msg->header.size - sizeof(AIS_Message); if (sane && msg->header.size == 0) { crm_warn("Message with no size"); sane = FALSE; } if (sane && msg->header.error != CS_OK) { crm_warn("Message header contains an error: %d", msg->header.error); sane = FALSE; } if (sane && ais_data_len(msg) != tmp_size) { crm_warn("Message payload size is incorrect: expected %d, got %d", ais_data_len(msg), tmp_size); sane = TRUE; } if (sane && ais_data_len(msg) == 0) { crm_warn("Message with no payload"); sane = FALSE; } if (sane && data && msg->is_compressed == FALSE) { int str_size = strlen(data) + 1; if (ais_data_len(msg) != str_size) { int lpc = 0; crm_warn("Message payload is corrupted: expected %d bytes, got %d", ais_data_len(msg), str_size); sane = FALSE; for (lpc = (str_size - 10); lpc < msg->size; lpc++) { if (lpc < 0) { lpc = 0; } crm_debug("bad_data[%d]: %d / '%c'", lpc, data[lpc], data[lpc]); } } } if (sane == FALSE) { crm_err("Invalid message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } else if (repaired) { crm_err ("Repaired message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } else { crm_trace ("Verfied message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } return sane; } -#endif - -#if HAVE_CONFDB -static int -get_config_opt(confdb_handle_t config, - hdb_handle_t object_handle, const char *key, char **value, const char *fallback) -{ - size_t len = 0; - char *env_key = NULL; - const char *env_value = NULL; - char buffer[256]; - - if (*value) { - crm_free(*value); - *value = NULL; - } - - if (object_handle > 0) { - if (CS_OK == confdb_key_get(config, object_handle, key, strlen(key), &buffer, &len)) { - *value = crm_strdup(buffer); - } - } - - if (*value) { - crm_info("Found '%s' for option: %s", *value, key); - return 0; - } - - env_key = crm_concat("HA", key, '_'); - env_value = getenv(env_key); - crm_free(env_key); - - if (*value) { - crm_info("Found '%s' in ENV for option: %s", *value, key); - *value = crm_strdup(env_value); - return 0; - } - - if (fallback) { - crm_info("Defaulting to '%s' for option: %s", fallback, key); - *value = crm_strdup(fallback); - - } else { - crm_info("No default for option: %s", key); - } - - return -1; -} - -static confdb_handle_t -config_find_init(confdb_handle_t config) -{ - cs_error_t rc = CS_OK; - confdb_handle_t local_handle = OBJECT_PARENT_HANDLE; - - rc = confdb_object_find_start(config, local_handle); - if (rc == CS_OK) { - return local_handle; - } else { - crm_err("Couldn't create search context: %d", rc); - } - return 0; -} - -static hdb_handle_t -config_find_next(confdb_handle_t config, const char *name, confdb_handle_t top_handle) -{ - cs_error_t rc = CS_OK; - hdb_handle_t local_handle = 0; - - if (top_handle == 0) { - crm_err("Couldn't search for %s: no valid context", name); - return 0; - } - - crm_trace("Searching for %s in " HDB_X_FORMAT, name, top_handle); - rc = confdb_object_find(config, top_handle, name, strlen(name), &local_handle); - if (rc != CS_OK) { - crm_info("No additional configuration supplied for: %s", name); - local_handle = 0; - } else { - crm_info("Processing additional %s options...", name); - } - return local_handle; -} - -enum cluster_type_e -find_corosync_variant(void) -{ - confdb_handle_t config; - enum cluster_type_e found = pcmk_cluster_unknown; - - int rc; - char *value = NULL; - confdb_handle_t top_handle = 0; - hdb_handle_t local_handle = 0; - static confdb_callbacks_t callbacks = { }; - - rc = confdb_initialize(&config, &callbacks); - if (rc != CS_OK) { - crm_debug("Could not initialize Cluster Configuration Database API instance error %d", rc); - return found; - } - - top_handle = config_find_init(config); - local_handle = config_find_next(config, "service", top_handle); - while (local_handle) { - crm_free(value); - get_config_opt(config, local_handle, "name", &value, NULL); - if (safe_str_eq("pacemaker", value)) { - found = pcmk_cluster_classic_ais; - - crm_free(value); - get_config_opt(config, local_handle, "ver", &value, "0"); - crm_trace("Found Pacemaker plugin version: %s", value); - break; - } - - local_handle = config_find_next(config, "service", top_handle); - } - crm_free(value); - - if (found == pcmk_cluster_unknown) { - top_handle = config_find_init(config); - local_handle = config_find_next(config, "quorum", top_handle); - get_config_opt(config, local_handle, "provider", &value, NULL); - - if (safe_str_eq("quorum_cman", value)) { - crm_trace("Found CMAN quorum provider"); - found = pcmk_cluster_cman; - } - } - crm_free(value); - - if (found == pcmk_cluster_unknown) { - crm_trace("Defaulting to a 'bare' corosync cluster"); - found = pcmk_cluster_corosync; - } - - confdb_finalize(config); - return found; -} -#else enum cluster_type_e find_corosync_variant(void) { int rc = CS_OK; cmap_handle_t handle; /* There can be only one (possibility if confdb isn't around) */ rc = cmap_initialize(&handle); if (rc != CS_OK) { crm_info("Failed to initialize the cmap API. Error %d", rc); return pcmk_cluster_unknown; } cmap_finalize(handle); return pcmk_cluster_corosync; } -#endif diff --git a/lib/cluster/corosync.c b/lib/cluster/legacy.c similarity index 86% copy from lib/cluster/corosync.c copy to lib/cluster/legacy.c index 1975e589f8..3baf44ccf7 100644 --- a/lib/cluster/corosync.c +++ b/lib/cluster/legacy.c @@ -1,1560 +1,1373 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include "stack.h" #if SUPPORT_COROSYNC -# if CS_USES_LIBQB -# include -# include -# include -# include -# include -# if HAVE_CONFDB -# include -# endif -# else # include # include -# endif # include cpg_handle_t pcmk_cpg_handle = 0; struct cpg_name pcmk_cpg_group = { .length = 0, .value[0] = 0, }; #endif #if HAVE_CMAP # include #endif #if SUPPORT_CMAN # include cman_handle_t pcmk_cman_handle = NULL; #endif -#ifdef SUPPORT_CS_QUORUM -# include -# include -# include - -# include - -quorum_handle_t pcmk_quorum_handle = 0; - -#endif - static char *pcmk_uname = NULL; static int pcmk_uname_len = 0; static uint32_t pcmk_nodeid = 0; +int ais_membership_timer = 0; +gboolean ais_membership_force = FALSE; #define cs_repeat(counter, max, code) do { \ code; \ if(rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { \ counter++; \ crm_debug("Retrying operation after %ds", counter); \ sleep(counter); \ } else { \ break; \ } \ } while(counter < max) enum crm_ais_msg_types text2msg_type(const char *text) { int type = crm_msg_none; CRM_CHECK(text != NULL, return type); if (safe_str_eq(text, "ais")) { type = crm_msg_ais; } else if (safe_str_eq(text, "crm_plugin")) { type = crm_msg_ais; } else if (safe_str_eq(text, CRM_SYSTEM_CIB)) { type = crm_msg_cib; } else if (safe_str_eq(text, CRM_SYSTEM_CRMD)) { type = crm_msg_crmd; } else if (safe_str_eq(text, CRM_SYSTEM_DC)) { type = crm_msg_crmd; } else if (safe_str_eq(text, CRM_SYSTEM_TENGINE)) { type = crm_msg_te; } else if (safe_str_eq(text, CRM_SYSTEM_PENGINE)) { type = crm_msg_pe; } else if (safe_str_eq(text, CRM_SYSTEM_LRMD)) { type = crm_msg_lrmd; } else if (safe_str_eq(text, CRM_SYSTEM_STONITHD)) { type = crm_msg_stonithd; } else if (safe_str_eq(text, "stonith-ng")) { type = crm_msg_stonith_ng; } else if (safe_str_eq(text, "attrd")) { type = crm_msg_attrd; } else { /* This will normally be a transient client rather than * a cluster daemon. Set the type to the pid of the client */ int scan_rc = sscanf(text, "%d", &type); if (scan_rc != 1) { /* Ensure its sane */ type = crm_msg_none; } } return type; } char * get_ais_data(const AIS_Message * msg) { int rc = BZ_OK; char *uncompressed = NULL; unsigned int new_size = msg->size + 1; if (msg->is_compressed == FALSE) { crm_trace("Returning uncompressed message data"); uncompressed = strdup(msg->data); } else { crm_trace("Decompressing message data"); crm_malloc0(uncompressed, new_size); rc = BZ2_bzBuffToBuffDecompress(uncompressed, &new_size, (char *)msg->data, msg->compressed_size, 1, 0); CRM_ASSERT(rc == BZ_OK); CRM_ASSERT(new_size == msg->size); } return uncompressed; } #if SUPPORT_COROSYNC int ais_fd_sync = -1; int ais_fd_async = -1; /* never send messages via this channel */ void *ais_ipc_ctx = NULL; -# if CS_USES_LIBQB -qb_ipcc_connection_t *ais_ipc_handle = NULL; -# else hdb_handle_t ais_ipc_handle = 0; -# endif GFDSource *ais_source = NULL; GFDSource *ais_source_sync = NULL; GFDSource *cman_source = NULL; GFDSource *cpg_source = NULL; -GFDSource *quorumd_source = NULL; static char *ais_cluster_name = NULL; gboolean get_ais_nodeid(uint32_t * id, char **uname) { struct iovec iov; int retries = 0; int rc = CS_OK; cs_ipc_header_response_t header; struct crm_ais_nodeid_resp_s answer; header.error = CS_OK; header.id = crm_class_nodeid; header.size = sizeof(cs_ipc_header_response_t); CRM_CHECK(id != NULL, return FALSE); CRM_CHECK(uname != NULL, return FALSE); iov.iov_base = &header; iov.iov_len = header.size; retry: errno = 0; -# if CS_USES_LIBQB - rc = qb_to_cs_error(qb_ipcc_sendv_recv(ais_ipc_handle, &iov, 1, &answer, sizeof(answer), -1)); -# else rc = coroipcc_msg_send_reply_receive(ais_ipc_handle, &iov, 1, &answer, sizeof(answer)); -# endif if (rc == CS_OK) { CRM_CHECK(answer.header.size == sizeof(struct crm_ais_nodeid_resp_s), crm_err("Odd message: id=%d, size=%d, error=%d", answer.header.id, answer.header.size, answer.header.error)); CRM_CHECK(answer.header.id == crm_class_nodeid, crm_err("Bad response id: %d", answer.header.id)); } if ((rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) && retries < 20) { retries++; crm_info("Peer overloaded: Re-sending message (Attempt %d of 20)", retries); sleep(retries); /* Proportional back off */ goto retry; } if (rc != CS_OK) { crm_err("Sending nodeid request: FAILED (rc=%d): %s", rc, ais_error2text(rc)); return FALSE; } else if (answer.header.error != CS_OK) { crm_err("Bad response from peer: (rc=%d): %s", rc, ais_error2text(rc)); return FALSE; } crm_info("Server details: id=%u uname=%s cname=%s", answer.id, answer.uname, answer.cname); *id = answer.id; *uname = crm_strdup(answer.uname); ais_cluster_name = crm_strdup(answer.cname); return TRUE; } gboolean crm_get_cluster_name(char **cname) { CRM_CHECK(cname != NULL, return FALSE); if (ais_cluster_name) { *cname = crm_strdup(ais_cluster_name); return TRUE; } return FALSE; } gboolean send_ais_text(int class, const char *data, gboolean local, const char *node, enum crm_ais_msg_types dest) { static int msg_id = 0; static int local_pid = 0; enum cluster_type_e cluster_type = get_cluster_type(); int retries = 0; int rc = CS_OK; int buf_len = sizeof(cs_ipc_header_response_t); char *buf = NULL; struct iovec iov; const char *transport = "pcmk"; cs_ipc_header_response_t *header = NULL; AIS_Message *ais_msg = NULL; enum crm_ais_msg_types sender = text2msg_type(crm_system_name); /* There are only 6 handlers registered to crm_lib_service in plugin.c */ CRM_CHECK(class < 6, crm_err("Invalid message class: %d", class); return FALSE); if (data == NULL) { data = ""; } if (local_pid == 0) { local_pid = getpid(); } if (sender == crm_msg_none) { sender = local_pid; } crm_malloc0(ais_msg, sizeof(AIS_Message)); ais_msg->id = msg_id++; ais_msg->header.id = class; ais_msg->header.error = CS_OK; ais_msg->host.type = dest; ais_msg->host.local = local; if (node) { ais_msg->host.size = strlen(node); memset(ais_msg->host.uname, 0, MAX_NAME); memcpy(ais_msg->host.uname, node, ais_msg->host.size); ais_msg->host.id = 0; } else { ais_msg->host.size = 0; memset(ais_msg->host.uname, 0, MAX_NAME); ais_msg->host.id = 0; } ais_msg->sender.id = 0; ais_msg->sender.type = sender; ais_msg->sender.pid = local_pid; ais_msg->sender.size = pcmk_uname_len; memset(ais_msg->sender.uname, 0, MAX_NAME); memcpy(ais_msg->sender.uname, pcmk_uname, ais_msg->sender.size); ais_msg->size = 1 + strlen(data); if (ais_msg->size < CRM_BZ2_THRESHOLD) { failback: crm_realloc(ais_msg, sizeof(AIS_Message) + ais_msg->size); memcpy(ais_msg->data, data, ais_msg->size); } else { char *compressed = NULL; char *uncompressed = crm_strdup(data); unsigned int len = (ais_msg->size * 1.1) + 600; /* recomended size */ crm_trace("Compressing message payload"); crm_malloc(compressed, len); rc = BZ2_bzBuffToBuffCompress(compressed, &len, uncompressed, ais_msg->size, CRM_BZ2_BLOCKS, 0, CRM_BZ2_WORK); crm_free(uncompressed); if (rc != BZ_OK) { crm_err("Compression failed: %d", rc); crm_free(compressed); goto failback; } crm_realloc(ais_msg, sizeof(AIS_Message) + len + 1); memcpy(ais_msg->data, compressed, len); ais_msg->data[len] = 0; crm_free(compressed); ais_msg->is_compressed = TRUE; ais_msg->compressed_size = len; crm_trace("Compression details: %d -> %d", ais_msg->size, ais_data_len(ais_msg)); } ais_msg->header.size = sizeof(AIS_Message) + ais_data_len(ais_msg); crm_trace("Sending%s message %d to %s.%s (data=%d, total=%d)", ais_msg->is_compressed ? " compressed" : "", ais_msg->id, ais_dest(&(ais_msg->host)), msg_type2text(dest), ais_data_len(ais_msg), ais_msg->header.size); iov.iov_base = ais_msg; iov.iov_len = ais_msg->header.size; crm_realloc(buf, buf_len); do { if (rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { retries++; crm_info("Peer overloaded or membership in flux:" " Re-sending message (Attempt %d of 20)", retries); sleep(retries); /* Proportional back off */ } errno = 0; switch (cluster_type) { case pcmk_cluster_classic_ais: -# if CS_USES_LIBQB - rc = qb_to_cs_error(qb_ipcc_sendv_recv(ais_ipc_handle, &iov, 1, buf, buf_len, -1)); -# else rc = coroipcc_msg_send_reply_receive(ais_ipc_handle, &iov, 1, buf, buf_len); -# endif header = (cs_ipc_header_response_t *) buf; if (rc == CS_OK) { CRM_CHECK(header->size == sizeof(cs_ipc_header_response_t), crm_err("Odd message: id=%d, size=%d, class=%d, error=%d", header->id, header->size, class, header->error)); CRM_ASSERT(buf_len >= header->size); CRM_CHECK(header->id == CRM_MESSAGE_IPC_ACK, crm_err("Bad response id (%d) for request (%d)", header->id, ais_msg->header.id)); CRM_CHECK(header->error == CS_OK, rc = header->error); } break; - case pcmk_cluster_corosync: case pcmk_cluster_cman: transport = "cpg"; CRM_CHECK(dest != crm_msg_ais, rc = CS_ERR_MESSAGE_ERROR; goto bail); rc = cpg_mcast_joined(pcmk_cpg_handle, CPG_TYPE_AGREED, &iov, 1); if (rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { cpg_flow_control_state_t fc_state = CPG_FLOW_CONTROL_DISABLED; int rc2 = cpg_flow_control_state_get(pcmk_cpg_handle, &fc_state); if (rc2 == CS_OK && fc_state == CPG_FLOW_CONTROL_ENABLED) { crm_warn("Connection overloaded, cannot send messages"); goto bail; } else if (rc2 != CS_OK) { crm_warn("Could not determin the connection state: %s (%d)", ais_error2text(rc2), rc2); goto bail; } } break; case pcmk_cluster_unknown: case pcmk_cluster_invalid: case pcmk_cluster_heartbeat: CRM_ASSERT(is_openais_cluster()); break; } } while ((rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) && retries < 20); bail: if (rc != CS_OK) { crm_perror(LOG_ERR, "Sending message %d via %s: FAILED (rc=%d): %s", ais_msg->id, transport, rc, ais_error2text(rc)); } else { crm_trace("Message %d: sent", ais_msg->id); } crm_free(buf); crm_free(ais_msg); return (rc == CS_OK); } gboolean send_ais_message(xmlNode * msg, gboolean local, const char *node, enum crm_ais_msg_types dest) { gboolean rc = TRUE; char *data = NULL; if (is_classic_ais_cluster()) { if (ais_fd_async < 0 || ais_source == NULL) { crm_err("Not connected to AIS: %d %p", ais_fd_async, ais_source); return FALSE; } } data = dump_xml_unformatted(msg); rc = send_ais_text(crm_class_cluster, data, local, node, dest); crm_free(data); return rc; } void terminate_ais_connection(void) { crm_notice("Disconnecting from Corosync"); /* G_main_del_fd(ais_source); */ /* G_main_del_fd(ais_source_sync); */ if (is_classic_ais_cluster()) { if(ais_ipc_handle) { crm_trace("Disconnecting plugin"); -# if CS_USES_LIBQB - qb_ipcc_disconnect(ais_ipc_handle); - ais_ipc_handle = NULL; -# else coroipcc_service_disconnect(ais_ipc_handle); ais_ipc_handle = 0; -# endif } else { crm_info("No plugin connection"); } } else { if(pcmk_cpg_handle) { crm_trace("Disconnecting CPG"); cpg_leave(pcmk_cpg_handle, &pcmk_cpg_group); cpg_finalize(pcmk_cpg_handle); pcmk_cpg_handle = 0; } else { crm_info("No CPG connection"); } } -# ifdef SUPPORT_CS_QUORUM - if (is_corosync_cluster()) { - if(pcmk_quorum_handle) { - crm_trace("Disconnecting quorum"); - quorum_finalize(pcmk_quorum_handle); - pcmk_quorum_handle = 0; - - } else { - crm_info("No Quorum connection"); - } - } -# endif # if SUPPORT_CMAN if (is_cman_cluster()) { if(pcmk_cman_handle) { crm_trace("Disconnecting cman"); cman_stop_notification(pcmk_cman_handle); cman_finish(pcmk_cman_handle); } else { crm_info("No cman connection"); } } # endif ais_fd_async = -1; ais_fd_sync = -1; } -int ais_membership_timer = 0; -gboolean ais_membership_force = FALSE; + +static crm_node_t * +crm_update_ais_node(xmlNode * member, long long seq) +{ + const char *id_s = crm_element_value(member, "id"); + const char *addr = crm_element_value(member, "addr"); + const char *uname = crm_element_value(member, "uname"); + const char *state = crm_element_value(member, "state"); + const char *born_s = crm_element_value(member, "born"); + const char *seen_s = crm_element_value(member, "seen"); + const char *votes_s = crm_element_value(member, "votes"); + const char *procs_s = crm_element_value(member, "processes"); + + int votes = crm_int_helper(votes_s, NULL); + unsigned int id = crm_int_helper(id_s, NULL); + unsigned int procs = crm_int_helper(procs_s, NULL); + + /* TODO: These values will contain garbage if version < 0.7.1 */ + uint64_t born = crm_int_helper(born_s, NULL); + uint64_t seen = crm_int_helper(seen_s, NULL); + + return crm_update_peer(id, born, seen, votes, procs, uname, uname, addr, state); +} + +static crm_node_t * +crm_update_cman_node(xmlNode * member, long long seq) +{ + const char *id_s = crm_element_value(member, "id"); + const char *uname = crm_element_value(member, "uname"); + const char *procs_s = crm_element_value(member, "processes"); + + unsigned int id = crm_int_helper(id_s, NULL); + unsigned int procs = crm_int_helper(procs_s, NULL); + + crm_info("Updating peer processes for %s", crm_str(uname)); + return crm_update_peer(id, 0, 0, 0, procs, uname, uname, NULL, NULL); +} + static gboolean ais_dispatch_message(AIS_Message * msg, gboolean(*dispatch) (AIS_Message *, char *, int)) { char *data = NULL; char *uncompressed = NULL; xmlNode *xml = NULL; CRM_ASSERT(msg != NULL); crm_trace("Got new%s message (size=%d, %d, %d)", msg->is_compressed ? " compressed" : "", ais_data_len(msg), msg->size, msg->compressed_size); data = msg->data; if (msg->is_compressed && msg->size > 0) { int rc = BZ_OK; unsigned int new_size = msg->size + 1; if (check_message_sanity(msg, NULL) == FALSE) { goto badmsg; } crm_trace("Decompressing message data"); crm_malloc0(uncompressed, new_size); rc = BZ2_bzBuffToBuffDecompress(uncompressed, &new_size, data, msg->compressed_size, 1, 0); if (rc != BZ_OK) { crm_err("Decompression failed: %d", rc); goto badmsg; } CRM_ASSERT(rc == BZ_OK); CRM_ASSERT(new_size == msg->size); data = uncompressed; } else if (check_message_sanity(msg, data) == FALSE) { goto badmsg; } else if (safe_str_eq("identify", data)) { int pid = getpid(); char *pid_s = crm_itoa(pid); send_ais_text(crm_class_cluster, pid_s, TRUE, NULL, crm_msg_ais); crm_free(pid_s); goto done; } if (msg->header.id != crm_class_members) { crm_update_peer(msg->sender.id, 0, 0, 0, 0, msg->sender.uname, msg->sender.uname, NULL, NULL); } if (msg->header.id == crm_class_rmpeer) { uint32_t id = crm_int_helper(data, NULL); crm_info("Removing peer %s/%u", data, id); reap_crm_member(id); goto done; } else if (msg->header.id == crm_class_members || msg->header.id == crm_class_quorum) { xml = string2xml(data); if (xml == NULL) { crm_err("Invalid membership update: %s", data); goto badmsg; } if (is_classic_ais_cluster() == FALSE) { xmlNode *node = NULL; for (node = __xml_first_child(xml); node != NULL; node = __xml_next(node)) { crm_update_cman_node(node, crm_peer_seq); } } else { xmlNode *node = NULL; const char *value = NULL; gboolean quorate = FALSE; value = crm_element_value(xml, "quorate"); CRM_CHECK(value != NULL, crm_log_xml_err(xml, "No quorum value:"); goto badmsg); if (crm_is_true(value)) { quorate = TRUE; } value = crm_element_value(xml, "id"); CRM_CHECK(value != NULL, crm_log_xml_err(xml, "No membership id"); goto badmsg); crm_peer_seq = crm_int_helper(value, NULL); if (quorate != crm_have_quorum) { crm_notice("Membership %s: quorum %s", value, quorate ? "acquired" : "lost"); crm_have_quorum = quorate; } else { crm_info("Membership %s: quorum %s", value, quorate ? "retained" : "still lost"); } for (node = __xml_first_child(xml); node != NULL; node = __xml_next(node)) { crm_update_ais_node(node, crm_peer_seq); } } } crm_trace("Payload: %s", data); if (dispatch != NULL) { dispatch(msg, data, 0); } done: crm_free(uncompressed); free_xml(xml); return TRUE; badmsg: crm_err("Invalid message (id=%d, dest=%s:%s, from=%s:%s.%d):" " min=%d, total=%d, size=%d, bz2_size=%d", msg->id, ais_dest(&(msg->host)), msg_type2text(msg->host.type), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, (int)sizeof(AIS_Message), msg->header.size, msg->size, msg->compressed_size); goto done; } gboolean ais_dispatch(int sender, gpointer user_data) { int rc = CS_OK; gboolean good = TRUE; gboolean(*dispatch) (AIS_Message *, char *, int) = user_data; do { -# if CS_USES_LIBQB - char buffer[AIS_IPC_MESSAGE_SIZE]; - - rc = qb_to_cs_error(qb_ipcc_event_recv(ais_ipc_handle, (void *)buffer, - AIS_IPC_MESSAGE_SIZE, 100)); -# else char *buffer = NULL; rc = coroipcc_dispatch_get(ais_ipc_handle, (void **)&buffer, 0); -# endif - if (rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { return TRUE; } if (rc != CS_OK) { crm_perror(LOG_ERR, "Receiving message body failed: (%d) %s", rc, ais_error2text(rc)); goto bail; } -# if !CS_USES_LIBQB if (buffer == NULL) { /* NULL is a legal "no message afterall" value */ return TRUE; } -# endif good = ais_dispatch_message((AIS_Message *) buffer, dispatch); -# if !CS_USES_LIBQB coroipcc_dispatch_put(ais_ipc_handle); -# endif } while (good && ais_ipc_handle); return good; bail: if(ais_ipc_handle) { crm_err("AIS connection failed: %p", (void*)ais_ipc_handle); } return FALSE; } static void ais_destroy(gpointer user_data) { crm_err("AIS connection terminated"); ais_fd_sync = -1; exit(1); } static gboolean pcmk_proc_dispatch(IPC_Channel * ch, gpointer user_data) { xmlNode *msg = NULL; gboolean stay_connected = TRUE; while (IPC_ISRCONN(ch)) { if (ch->ops->is_message_pending(ch) == 0) { break; } msg = xmlfromIPC(ch, MAX_IPC_DELAY); if (msg) { xmlNode *node = NULL; for (node = __xml_first_child(msg); node != NULL; node = __xml_next(node)) { int id = 0; int children = 0; const char *uname = crm_element_value(node, "uname"); crm_element_value_int(node, "id", &id); crm_element_value_int(node, "processes", &children); if (id == 0) { crm_log_xml_err(msg, "Bad Update"); } else { crm_update_peer(id, 0, 0, 0, children, NULL, uname, NULL, NULL); } } free_xml(msg); } if (ch->ch_status != IPC_CONNECT) { break; } } if (ch->ch_status != IPC_CONNECT) { stay_connected = FALSE; } return stay_connected; } # if SUPPORT_CMAN static gboolean pcmk_cman_dispatch(int sender, gpointer user_data) { int rc = cman_dispatch(pcmk_cman_handle, CMAN_DISPATCH_ALL); if (rc < 0) { crm_err("Connection to cman failed: %d", rc); return FALSE; } return TRUE; } # define MAX_NODES 256 static void cman_event_callback(cman_handle_t handle, void *privdata, int reason, int arg) { int rc = 0, lpc = 0, node_count = 0; cman_cluster_t cluster; static cman_node_t cman_nodes[MAX_NODES]; gboolean(*dispatch) (unsigned long long, gboolean) = privdata; switch (reason) { case CMAN_REASON_STATECHANGE: memset(&cluster, 0, sizeof(cluster)); rc = cman_get_cluster(pcmk_cman_handle, &cluster); if (rc < 0) { crm_err("Couldn't query cman cluster details: %d %d", rc, errno); return; } crm_peer_seq = cluster.ci_generation; if (arg != crm_have_quorum) { crm_notice("Membership %llu: quorum %s", crm_peer_seq, arg ? "acquired" : "lost"); crm_have_quorum = arg; } else { crm_info("Membership %llu: quorum %s", crm_peer_seq, arg ? "retained" : "still lost"); } rc = cman_get_nodes(pcmk_cman_handle, MAX_NODES, &node_count, cman_nodes); if (rc < 0) { crm_err("Couldn't query cman node list: %d %d", rc, errno); return; } for (lpc = 0; lpc < node_count; lpc++) { if (cman_nodes[lpc].cn_nodeid == 0) { /* Never allow node ID 0 to be considered a member #315711 */ cman_nodes[lpc].cn_member = 0; } crm_update_peer(cman_nodes[lpc].cn_nodeid, cman_nodes[lpc].cn_incarnation, cman_nodes[lpc].cn_member ? crm_peer_seq : 0, 0, 0, cman_nodes[lpc].cn_name, cman_nodes[lpc].cn_name, NULL, cman_nodes[lpc].cn_member ? CRM_NODE_MEMBER : CRM_NODE_LOST); } if (dispatch) { dispatch(crm_peer_seq, crm_have_quorum); } break; case CMAN_REASON_TRY_SHUTDOWN: /* Always reply with a negative - pacemaker needs to be stopped first */ crm_info("CMAN wants to shut down: %s", arg ? "forced" : "optional"); cman_replyto_shutdown(pcmk_cman_handle, 0); break; case CMAN_REASON_CONFIG_UPDATE: /* Ignore */ break; } } # endif gboolean init_cman_connection(gboolean(*dispatch) (unsigned long long, gboolean), void (*destroy) (gpointer)) { # if SUPPORT_CMAN int rc = -1, fd = -1; cman_cluster_t cluster; crm_info("Configuring Pacemaker to obtain quorum from cman"); memset(&cluster, 0, sizeof(cluster)); pcmk_cman_handle = cman_init(dispatch); if (pcmk_cman_handle == NULL || cman_is_active(pcmk_cman_handle) == FALSE) { crm_err("Couldn't connect to cman"); goto cman_bail; } rc = cman_get_cluster(pcmk_cman_handle, &cluster); if (rc < 0) { crm_err("Couldn't query cman cluster details: %d %d", rc, errno); goto cman_bail; } ais_cluster_name = crm_strdup(cluster.ci_name); rc = cman_start_notification(pcmk_cman_handle, cman_event_callback); if (rc < 0) { crm_err("Couldn't register for cman notifications: %d %d", rc, errno); goto cman_bail; } /* Get the current membership state */ cman_event_callback(pcmk_cman_handle, dispatch, CMAN_REASON_STATECHANGE, cman_is_quorate(pcmk_cman_handle)); fd = cman_get_fd(pcmk_cman_handle); crm_debug("Adding fd=%d to mainloop", fd); cman_source = G_main_add_fd(G_PRIORITY_HIGH, fd, FALSE, pcmk_cman_dispatch, dispatch, destroy); cman_bail: if (rc < 0) { cman_finish(pcmk_cman_handle); return FALSE; } # else crm_err("cman qorum is not supported in this build"); exit(100); # endif return TRUE; } # ifdef SUPPORT_COROSYNC gboolean(*pcmk_cpg_dispatch_fn) (AIS_Message *, char *, int) = NULL; static gboolean pcmk_cpg_dispatch(int sender, gpointer user_data) { int rc = 0; pcmk_cpg_dispatch_fn = user_data; rc = cpg_dispatch(pcmk_cpg_handle, CS_DISPATCH_ALL); if (rc != CS_OK) { crm_err("Connection to the CPG API failed: %d", rc); return FALSE; } return TRUE; } static void pcmk_cpg_deliver(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { AIS_Message *ais_msg = (AIS_Message *) msg; if (ais_msg->sender.id > 0 && ais_msg->sender.id != nodeid) { crm_err("Nodeid mismatch from %d.%d: claimed nodeid=%u", nodeid, pid, ais_msg->sender.id); return; } else if (ais_msg->host.size != 0 && safe_str_neq(ais_msg->host.uname, pcmk_uname)) { /* Not for us */ return; } ais_msg->sender.id = nodeid; if (ais_msg->sender.size == 0) { crm_node_t *peer = crm_get_peer(nodeid, NULL); if (peer == NULL) { crm_err("Peer with nodeid=%u is unknown", nodeid); } else if (peer->uname == NULL) { crm_err("No uname for peer with nodeid=%u", nodeid); } else { crm_notice("Fixing uname for peer with nodeid=%u", nodeid); ais_msg->sender.size = strlen(peer->uname); memset(ais_msg->sender.uname, 0, MAX_NAME); memcpy(ais_msg->sender.uname, peer->uname, ais_msg->sender.size); } } ais_dispatch_message(ais_msg, pcmk_cpg_dispatch_fn); } static void pcmk_cpg_membership(cpg_handle_t handle, const struct cpg_name *groupName, const struct cpg_address *member_list, size_t member_list_entries, const struct cpg_address *left_list, size_t left_list_entries, const struct cpg_address *joined_list, size_t joined_list_entries) { int i; for (i = 0; i < member_list_entries; i++) { crm_debug("Member[%d] %d ", i, member_list[i].nodeid); } for (i = 0; i < left_list_entries; i++) { crm_debug("Left[%d] %d ", i, left_list[i].nodeid); } } cpg_callbacks_t cpg_callbacks = { .cpg_deliver_fn = pcmk_cpg_deliver, .cpg_confchg_fn = pcmk_cpg_membership, }; # endif -# ifdef SUPPORT_CS_QUORUM -static gboolean -pcmk_quorum_dispatch(int sender, gpointer user_data) -{ - int rc = 0; - - rc = quorum_dispatch(pcmk_quorum_handle, CS_DISPATCH_ALL); - if (rc < 0) { - crm_err("Connection to the Quorum API failed: %d", rc); - return FALSE; - } - return TRUE; -} - -gboolean(*quorum_app_callback) (unsigned long long seq, gboolean quorate) = NULL; - -static void -corosync_mark_unseen_peer_dead(gpointer key, gpointer value, gpointer user_data) -{ - int *seq = user_data; - crm_node_t *node = value; - - if (node->last_seen != *seq && crm_str_eq(CRM_NODE_LOST, node->state, TRUE) == FALSE) { - crm_notice("Node %d/%s was not seen in the previous transition", node->id, node->uname); - crm_update_peer(node->id, 0, 0, 0, 0, NULL, NULL, NULL, CRM_NODE_LOST); - } -} - -static void -corosync_mark_node_unseen(gpointer key, gpointer value, gpointer user_data) -{ - crm_node_t *node = value; - - node->last_seen = 0; -} - -static void -pcmk_quorum_notification(quorum_handle_t handle, - uint32_t quorate, - uint64_t ring_id, uint32_t view_list_entries, uint32_t * view_list) -{ - int i; - - if (quorate != crm_have_quorum) { - crm_notice("Membership " U64T ": quorum %s (%lu)", ring_id, - quorate ? "acquired" : "lost", (long unsigned int)view_list_entries); - crm_have_quorum = quorate; - - } else { - crm_info("Membership " U64T ": quorum %s (%lu)", ring_id, - quorate ? "retained" : "still lost", (long unsigned int)view_list_entries); - } - - g_hash_table_foreach(crm_peer_cache, corosync_mark_node_unseen, NULL); - - for (i = 0; i < view_list_entries; i++) { - char *uuid = get_corosync_uuid(view_list[i], NULL); - - crm_debug("Member[%d] %d ", i, view_list[i]); - - crm_update_peer(view_list[i], 0, ring_id, 0, 0, uuid, NULL, NULL, CRM_NODE_MEMBER); - } - - crm_trace("Reaping unseen nodes..."); - g_hash_table_foreach(crm_peer_cache, corosync_mark_unseen_peer_dead, &ring_id); - - if (quorum_app_callback) { - quorum_app_callback(ring_id, quorate); - } -} - -quorum_callbacks_t quorum_callbacks = { - .quorum_notify_fn = pcmk_quorum_notification, -}; - -# endif - static gboolean init_cpg_connection(gboolean(*dispatch) (AIS_Message *, char *, int), void (*destroy) (gpointer), uint32_t * nodeid) { # ifdef SUPPORT_COROSYNC int rc = -1; int fd = 0; int retries = 0; strcpy(pcmk_cpg_group.value, crm_system_name); pcmk_cpg_group.length = strlen(crm_system_name) + 1; cs_repeat(retries, 30, rc = cpg_initialize(&pcmk_cpg_handle, &cpg_callbacks)); if (rc != CS_OK) { crm_err("Could not connect to the Cluster Process Group API: %d\n", rc); goto bail; } retries = 0; cs_repeat(retries, 30, rc = cpg_local_get(pcmk_cpg_handle, (unsigned int *)nodeid)); if (rc != CS_OK) { crm_err("Could not get local node id from the CPG API"); goto bail; } retries = 0; cs_repeat(retries, 30, rc = cpg_join(pcmk_cpg_handle, &pcmk_cpg_group)); if (rc != CS_OK) { crm_err("Could not join the CPG group '%s': %d", crm_system_name, rc); goto bail; } rc = cpg_fd_get(pcmk_cpg_handle, &fd); if (rc != CS_OK) { crm_err("Could not obtain the CPG API connection: %d\n", rc); goto bail; } crm_debug("Adding fd=%d to mainloop", fd); cpg_source = G_main_add_fd(G_PRIORITY_HIGH, fd, FALSE, pcmk_cpg_dispatch, dispatch, destroy); bail: if (rc != CS_OK) { cpg_finalize(pcmk_cpg_handle); return FALSE; } # else crm_err("The Corosync CPG API is not supported in this build"); exit(100); # endif return TRUE; } gboolean init_quorum_connection(gboolean(*dispatch) (unsigned long long, gboolean), void (*destroy) (gpointer)) { -# ifdef SUPPORT_CS_QUORUM - int rc = -1; - int fd = 0; - int quorate = 0; - uint32_t quorum_type = 0; - - crm_debug("Configuring Pacemaker to obtain quorum from Corosync"); - - rc = quorum_initialize(&pcmk_quorum_handle, &quorum_callbacks, &quorum_type); - if (rc != CS_OK) { - crm_err("Could not connect to the Quorum API: %d\n", rc); - goto bail; - - } else if (quorum_type != QUORUM_SET) { - crm_err("Corosync quorum is not configured\n"); - goto bail; - } - - rc = quorum_getquorate(pcmk_quorum_handle, &quorate); - if (rc != CS_OK) { - crm_err("Could not obtain the current Quorum API state: %d\n", rc); - goto bail; - } - crm_notice("Quorum %s", quorate ? "acquired" : "lost"); - quorum_app_callback = dispatch; - crm_have_quorum = quorate; - - rc = quorum_trackstart(pcmk_quorum_handle, CS_TRACK_CHANGES | CS_TRACK_CURRENT); - if (rc != CS_OK) { - crm_err("Could not setup Quorum API notifications: %d\n", rc); - goto bail; - } - - rc = quorum_fd_get(pcmk_quorum_handle, &fd); - if (rc != CS_OK) { - crm_err("Could not obtain the Quorum API connection: %d\n", rc); - goto bail; - } - - quorumd_source = - G_main_add_fd(G_PRIORITY_HIGH, fd, FALSE, pcmk_quorum_dispatch, dispatch, destroy); - - bail: - if (rc != CS_OK) { - quorum_finalize(pcmk_quorum_handle); - return FALSE; - } -# else crm_err("The Corosync quorum API is not supported in this build"); exit(100); -# endif return TRUE; } static gboolean init_ais_connection_classic(gboolean(*dispatch) (AIS_Message *, char *, int), void (*destroy) (gpointer), char **our_uuid, char **our_uname, int *nodeid) { int rc; int pid = 0; char *pid_s = NULL; struct utsname name; crm_info("Creating connection to our Corosync plugin"); -# if CS_USES_LIBQB - rc = CS_OK; - ais_ipc_handle = qb_ipcc_connect("pacemaker.engine", AIS_IPC_MESSAGE_SIZE); -# else rc = coroipcc_service_connect(COROSYNC_SOCKET_NAME, PCMK_SERVICE_ID, AIS_IPC_MESSAGE_SIZE, AIS_IPC_MESSAGE_SIZE, AIS_IPC_MESSAGE_SIZE, &ais_ipc_handle); -# endif if (ais_ipc_handle) { -# if CS_USES_LIBQB - qb_ipcc_fd_get(ais_ipc_handle, &ais_fd_async); -# else coroipcc_fd_get(ais_ipc_handle, &ais_fd_async); -# endif } else { crm_info("Connection to our AIS plugin (%d) failed: %s (%d)", PCMK_SERVICE_ID, strerror(errno), errno); return FALSE; } if (ais_fd_async <= 0 && rc == CS_OK) { crm_err("No context created, but connection reported 'ok'"); rc = CS_ERR_LIBRARY; } if (rc != CS_OK) { crm_info("Connection to our AIS plugin (%d) failed: %s (%d)", PCMK_SERVICE_ID, ais_error2text(rc), rc); } if (rc != CS_OK) { return FALSE; } if (destroy == NULL) { destroy = ais_destroy; } if (dispatch) { crm_debug("Adding fd=%d to mainloop", ais_fd_async); ais_source = G_main_add_fd(G_PRIORITY_HIGH, ais_fd_async, FALSE, ais_dispatch, dispatch, destroy); } crm_info("AIS connection established"); pid = getpid(); pid_s = crm_itoa(pid); send_ais_text(crm_class_cluster, pid_s, TRUE, NULL, crm_msg_ais); crm_free(pid_s); if (uname(&name) < 0) { crm_perror(LOG_ERR, "Could not determin the current host"); exit(100); } get_ais_nodeid(&pcmk_nodeid, &pcmk_uname); if (safe_str_neq(name.nodename, pcmk_uname)) { crm_crit("Node name mismatch! OpenAIS supplied %s, our lookup returned %s", pcmk_uname, name.nodename); crm_notice ("Node name mismatches usually occur when assigned automatically by DHCP servers"); crm_notice("If this node was part of the cluster with a different name," " you will need to remove the old entry with crm_node --remove"); } return TRUE; } gboolean init_ais_connection(gboolean(*dispatch) (AIS_Message *, char *, int), void (*destroy) (gpointer), char **our_uuid, char **our_uname, int *nodeid) { int retries = 0; while (retries++ < 30) { int rc = init_ais_connection_once(dispatch, destroy, our_uuid, our_uname, nodeid); switch (rc) { case CS_OK: if (getenv("HA_mcp")) { IPC_Channel *ch = init_client_ipc_comms_nodispatch("pcmk"); G_main_add_IPC_Channel(G_PRIORITY_HIGH, ch, FALSE, pcmk_proc_dispatch, NULL, destroy); } return TRUE; break; case CS_ERR_TRY_AGAIN: case CS_ERR_QUEUE_FULL: break; default: return FALSE; } } crm_err("Retry count exceeded: %d", retries); return FALSE; } static char * get_local_node_name(void) { char *name = NULL; struct utsname res; if (is_cman_cluster()) { # if SUPPORT_CMAN cman_node_t us; cman_handle_t cman; cman = cman_init(NULL); if (cman != NULL && cman_is_active(cman)) { us.cn_name[0] = 0; cman_get_node(cman, CMAN_NODEID_US, &us); name = crm_strdup(us.cn_name); crm_info("Using CMAN node name: %s", name); } else { crm_err("Couldn't determin node name from CMAN"); } cman_finish(cman); # endif } else if (uname(&res) < 0) { crm_perror(LOG_ERR, "Could not determin the current host"); exit(100); } else { name = crm_strdup(res.nodename); } return name; } extern int set_cluster_type(enum cluster_type_e type); gboolean init_ais_connection_once(gboolean(*dispatch) (AIS_Message *, char *, int), void (*destroy) (gpointer), char **our_uuid, char **our_uname, int *nodeid) { enum cluster_type_e stack = get_cluster_type(); crm_peer_init(); /* Here we just initialize comms */ switch (stack) { case pcmk_cluster_classic_ais: if (init_ais_connection_classic(dispatch, destroy, our_uuid, &pcmk_uname, nodeid) == FALSE) { return FALSE; } break; case pcmk_cluster_cman: - case pcmk_cluster_corosync: if (init_cpg_connection(dispatch, destroy, &pcmk_nodeid) == FALSE) { return FALSE; } pcmk_uname = get_local_node_name(); break; case pcmk_cluster_heartbeat: crm_info("Could not find an active corosync based cluster"); return FALSE; break; default: crm_err("Invalid cluster type: %s (%d)", name_for_cluster_type(stack), stack); return FALSE; break; } crm_info("Connection to '%s': established", name_for_cluster_type(stack)); CRM_ASSERT(pcmk_uname != NULL); pcmk_uname_len = strlen(pcmk_uname); if (pcmk_nodeid != 0) { /* Ensure the local node always exists */ crm_update_peer(pcmk_nodeid, 0, 0, 0, 0, pcmk_uname, pcmk_uname, NULL, NULL); } if (our_uuid != NULL) { *our_uuid = get_corosync_uuid(pcmk_nodeid, pcmk_uname); } if (our_uname != NULL) { *our_uname = crm_strdup(pcmk_uname); } if (nodeid != NULL) { *nodeid = pcmk_nodeid; } return TRUE; } gboolean check_message_sanity(const AIS_Message * msg, const char *data) { gboolean sane = TRUE; gboolean repaired = FALSE; int dest = msg->host.type; int tmp_size = msg->header.size - sizeof(AIS_Message); if (sane && msg->header.size == 0) { crm_warn("Message with no size"); sane = FALSE; } if (sane && msg->header.error != CS_OK) { crm_warn("Message header contains an error: %d", msg->header.error); sane = FALSE; } if (sane && ais_data_len(msg) != tmp_size) { crm_warn("Message payload size is incorrect: expected %d, got %d", ais_data_len(msg), tmp_size); sane = TRUE; } if (sane && ais_data_len(msg) == 0) { crm_warn("Message with no payload"); sane = FALSE; } if (sane && data && msg->is_compressed == FALSE) { int str_size = strlen(data) + 1; if (ais_data_len(msg) != str_size) { int lpc = 0; crm_warn("Message payload is corrupted: expected %d bytes, got %d", ais_data_len(msg), str_size); sane = FALSE; for (lpc = (str_size - 10); lpc < msg->size; lpc++) { if (lpc < 0) { lpc = 0; } crm_debug("bad_data[%d]: %d / '%c'", lpc, data[lpc], data[lpc]); } } } if (sane == FALSE) { crm_err("Invalid message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } else if (repaired) { crm_err ("Repaired message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } else { crm_trace ("Verfied message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } return sane; } #endif -#if HAVE_CONFDB static int get_config_opt(confdb_handle_t config, hdb_handle_t object_handle, const char *key, char **value, const char *fallback) { size_t len = 0; char *env_key = NULL; const char *env_value = NULL; char buffer[256]; if (*value) { crm_free(*value); *value = NULL; } if (object_handle > 0) { if (CS_OK == confdb_key_get(config, object_handle, key, strlen(key), &buffer, &len)) { *value = crm_strdup(buffer); } } if (*value) { crm_info("Found '%s' for option: %s", *value, key); return 0; } env_key = crm_concat("HA", key, '_'); env_value = getenv(env_key); crm_free(env_key); if (*value) { crm_info("Found '%s' in ENV for option: %s", *value, key); *value = crm_strdup(env_value); return 0; } if (fallback) { crm_info("Defaulting to '%s' for option: %s", fallback, key); *value = crm_strdup(fallback); } else { crm_info("No default for option: %s", key); } return -1; } static confdb_handle_t config_find_init(confdb_handle_t config) { cs_error_t rc = CS_OK; confdb_handle_t local_handle = OBJECT_PARENT_HANDLE; rc = confdb_object_find_start(config, local_handle); if (rc == CS_OK) { return local_handle; } else { crm_err("Couldn't create search context: %d", rc); } return 0; } static hdb_handle_t config_find_next(confdb_handle_t config, const char *name, confdb_handle_t top_handle) { cs_error_t rc = CS_OK; hdb_handle_t local_handle = 0; if (top_handle == 0) { crm_err("Couldn't search for %s: no valid context", name); return 0; } crm_trace("Searching for %s in " HDB_X_FORMAT, name, top_handle); rc = confdb_object_find(config, top_handle, name, strlen(name), &local_handle); if (rc != CS_OK) { crm_info("No additional configuration supplied for: %s", name); local_handle = 0; } else { crm_info("Processing additional %s options...", name); } return local_handle; } enum cluster_type_e find_corosync_variant(void) { confdb_handle_t config; enum cluster_type_e found = pcmk_cluster_unknown; int rc; char *value = NULL; confdb_handle_t top_handle = 0; hdb_handle_t local_handle = 0; static confdb_callbacks_t callbacks = { }; rc = confdb_initialize(&config, &callbacks); if (rc != CS_OK) { crm_debug("Could not initialize Cluster Configuration Database API instance error %d", rc); return found; } top_handle = config_find_init(config); local_handle = config_find_next(config, "service", top_handle); while (local_handle) { crm_free(value); get_config_opt(config, local_handle, "name", &value, NULL); if (safe_str_eq("pacemaker", value)) { found = pcmk_cluster_classic_ais; crm_free(value); get_config_opt(config, local_handle, "ver", &value, "0"); crm_trace("Found Pacemaker plugin version: %s", value); break; } local_handle = config_find_next(config, "service", top_handle); } crm_free(value); if (found == pcmk_cluster_unknown) { top_handle = config_find_init(config); local_handle = config_find_next(config, "quorum", top_handle); get_config_opt(config, local_handle, "provider", &value, NULL); if (safe_str_eq("quorum_cman", value)) { crm_trace("Found CMAN quorum provider"); found = pcmk_cluster_cman; } } crm_free(value); - if (found == pcmk_cluster_unknown) { - crm_trace("Defaulting to a 'bare' corosync cluster"); - found = pcmk_cluster_corosync; - } - confdb_finalize(config); return found; } -#else -enum cluster_type_e -find_corosync_variant(void) -{ - int rc = CS_OK; - cmap_handle_t handle; - - /* There can be only one (possibility if confdb isn't around) */ - rc = cmap_initialize(&handle); - if (rc != CS_OK) { - crm_info("Failed to initialize the cmap API. Error %d", rc); - return pcmk_cluster_unknown; - } - - cmap_finalize(handle); - return pcmk_cluster_corosync; -} -#endif diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c index b00318cfb1..25762a4f80 100644 --- a/lib/cluster/membership.c +++ b/lib/cluster/membership.c @@ -1,511 +1,474 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include #include #include GHashTable *crm_peer_id_cache = NULL; GHashTable *crm_peer_cache = NULL; unsigned long long crm_peer_seq = 0; gboolean crm_have_quorum = FALSE; gboolean crm_is_member_active(const crm_node_t * node) { if (node && safe_str_eq(node->state, CRM_NODE_MEMBER)) { return TRUE; } return FALSE; } gboolean crm_is_full_member(const crm_node_t * node) { if (crm_is_member_active(node) && (node->processes & crm_proc_crmd)) { return TRUE; } return FALSE; } static gboolean crm_reap_dead_member(gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; crm_node_t *search = user_data; if (search != NULL && node->id != search->id) { return FALSE; } else if (crm_is_member_active(value) == FALSE) { crm_notice("Removing %s/%u from the membership list", node->uname, node->id); return TRUE; } return FALSE; } guint reap_crm_member(uint32_t id) { int matches = 0; crm_node_t *node = g_hash_table_lookup(crm_peer_id_cache, GUINT_TO_POINTER(id)); if (node == NULL) { crm_info("Peer %u is unknown", id); } else if (crm_is_member_active(node)) { crm_warn("Peer %u/%s is still active", id, node->uname); } else { if (g_hash_table_remove(crm_peer_id_cache, GUINT_TO_POINTER(id))) { crm_notice("Removed dead peer %u from the uuid cache", id); } else { crm_warn("Peer %u/%s was not removed", id, node->uname); } matches = g_hash_table_foreach_remove(crm_peer_cache, crm_reap_dead_member, node); crm_notice("Removed %d dead peers with id=%u from the membership list", matches, id); } return matches; } static void crm_count_member(gpointer key, gpointer value, gpointer user_data) { guint *count = user_data; if (crm_is_full_member(value)) { *count = *count + 1; } } guint crm_active_members(void) { guint count = 0; g_hash_table_foreach(crm_peer_cache, crm_count_member, &count); return count; } struct peer_count_s { uint32_t peer; guint count; }; static void crm_count_peer(gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; struct peer_count_s *search = user_data; if (crm_is_member_active(node) && (node->processes & search->peer)) { search->count = search->count + 1; } } guint crm_active_peers(uint32_t peer) { struct peer_count_s search; search.count = 0; search.peer = peer; g_hash_table_foreach(crm_peer_cache, crm_count_peer, &search); return search.count; } void destroy_crm_node(gpointer data) { crm_node_t *node = data; crm_trace("Destroying entry for node %u", node->id); crm_free(node->addr); crm_free(node->uname); crm_free(node->state); crm_free(node->uuid); crm_free(node); } void crm_peer_init(void) { static gboolean initialized = FALSE; if (initialized) { return; } initialized = TRUE; crm_peer_destroy(); if (crm_peer_cache == NULL) { crm_peer_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, destroy_crm_node); } if (crm_peer_id_cache == NULL) { crm_peer_id_cache = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL); } } void crm_peer_destroy(void) { if (crm_peer_cache != NULL) { g_hash_table_destroy(crm_peer_cache); crm_peer_cache = NULL; } if (crm_peer_id_cache != NULL) { g_hash_table_destroy(crm_peer_id_cache); crm_peer_id_cache = NULL; } } void (*crm_status_callback) (enum crm_status_type, crm_node_t *, const void *) = NULL; void crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *)) { crm_status_callback = dispatch; } static crm_node_t * crm_new_peer(unsigned int id, const char *uname) { crm_node_t *node = NULL; CRM_CHECK(uname != NULL || id > 0, return NULL); crm_debug("Creating entry for node %s/%u", uname, id); crm_malloc0(node, sizeof(crm_node_t)); node->state = crm_strdup("unknown"); if (id > 0) { node->id = id; crm_info("Node %s now has id: %u", crm_str(uname), id); g_hash_table_replace(crm_peer_id_cache, GUINT_TO_POINTER(node->id), node); } if (uname) { node->uname = crm_strdup(uname); CRM_ASSERT(node->uname != NULL); crm_info("Node %u is now known as %s", id, node->uname); g_hash_table_replace(crm_peer_cache, node->uname, node); if (node->uuid == NULL) { const char *uuid = get_node_uuid(id, node->uname); if (node->uuid) { crm_info("Node %u has uuid %s", id, node->uuid); } else { node->uuid = crm_strdup(uuid); } } if (crm_status_callback) { crm_status_callback(crm_status_uname, node, NULL); } } return node; } crm_node_t * crm_get_peer(unsigned int id, const char *uname) { crm_node_t *node = NULL; if (uname != NULL) { node = g_hash_table_lookup(crm_peer_cache, uname); } if (node == NULL && id > 0) { node = g_hash_table_lookup(crm_peer_id_cache, GUINT_TO_POINTER(id)); if (node && node->uname && uname) { crm_crit("Node %s and %s share the same cluster node id '%u'!", node->uname, uname, id); /* NOTE: Calling crm_new_peer() means the entry in * crm_peer_id_cache will point to the new entity */ /* TODO: Replace the old uname instead? */ node = crm_new_peer(id, uname); CRM_ASSERT(node->uname != NULL); } } if (node && uname && node->uname == NULL) { node->uname = crm_strdup(uname); crm_info("Node %u is now known as %s", id, uname); g_hash_table_insert(crm_peer_cache, node->uname, node); if (crm_status_callback) { crm_status_callback(crm_status_uname, node, NULL); } } if (node && node->uuid == NULL) { const char *uuid = get_node_uuid(id, node->uname); if (node->uuid) { crm_info("Node %u has uuid %s", id, node->uuid); } else if (uuid) { node->uuid = crm_strdup(uuid); } } if (node && id > 0 && id != node->id) { g_hash_table_remove(crm_peer_id_cache, GUINT_TO_POINTER(node->id)); g_hash_table_insert(crm_peer_id_cache, GUINT_TO_POINTER(id), node); node->id = id; crm_info("Node %s now has id: %u", crm_str(uname), id); } return node; } crm_node_t * crm_update_peer(unsigned int id, uint64_t born, uint64_t seen, int32_t votes, uint32_t children, const char *uuid, const char *uname, const char *addr, const char *state) { gboolean addr_changed = FALSE; gboolean state_changed = FALSE; gboolean procs_changed = FALSE; gboolean votes_changed = FALSE; crm_node_t *node = NULL; id = get_corosync_id(id, uuid); CRM_CHECK(uname != NULL || id > 0, return NULL); CRM_ASSERT(crm_peer_cache != NULL); CRM_ASSERT(crm_peer_id_cache != NULL); node = crm_get_peer(id, uname); if (node == NULL) { crm_trace("No node found for %d/%s", id, uname); node = crm_new_peer(id, uname); CRM_LOG_ASSERT(node != NULL); if (node == NULL) { crm_err("Insufficient information to create node %d/%s", id, uname); return NULL; } /* do it now so we don't get '(new)' everywhere */ node->votes = votes; node->processes = children; if (addr) { node->addr = crm_strdup(addr); } } if (votes > 0 && node->votes != votes) { votes_changed = TRUE; node->votes = votes; } if (node->uuid == NULL) { if (is_openais_cluster()) { /* Yes, overrule whatever was passed in */ node->uuid = get_corosync_uuid(id, uname); } else if (uuid != NULL) { node->uuid = crm_strdup(uuid); } } if (children > 0 && children != node->processes) { uint32_t last = node->processes; node->processes = children; procs_changed = TRUE; if (crm_status_callback) { crm_status_callback(crm_status_processes, node, &last); } } if (born != 0) { node->born = born; } if (state != NULL && safe_str_neq(node->state, state)) { char *last = node->state; node->state = crm_strdup(state); state_changed = TRUE; if (crm_status_callback) { crm_status_callback(crm_status_nstate, node, last); } crm_free(last); } if (seen != 0 && crm_is_member_active(node)) { node->last_seen = seen; } if (addr != NULL) { if (node->addr == NULL || crm_str_eq(node->addr, addr, FALSE) == FALSE) { addr_changed = TRUE; crm_free(node->addr); node->addr = crm_strdup(addr); } } if (state_changed || addr_changed || votes_changed) { crm_info("Node %s: id=%u state=%s%s addr=%s%s votes=%d%s born=" U64T " seen=" U64T " proc=%.32x%s", node->uname, node->id, node->state, state_changed ? " (new)" : "", node->addr, addr_changed ? " (new)" : "", node->votes, votes_changed ? " (new)" : "", node->born, node->last_seen, node->processes, procs_changed ? " (new)" : ""); } else if (procs_changed) { crm_debug("Node %s: id=%u seen=" U64T " proc=%.32x (new)", node->uname, node->id, node->last_seen, node->processes); } return node; } -crm_node_t * -crm_update_ais_node(xmlNode * member, long long seq) -{ - const char *id_s = crm_element_value(member, "id"); - const char *addr = crm_element_value(member, "addr"); - const char *uname = crm_element_value(member, "uname"); - const char *state = crm_element_value(member, "state"); - const char *born_s = crm_element_value(member, "born"); - const char *seen_s = crm_element_value(member, "seen"); - const char *votes_s = crm_element_value(member, "votes"); - const char *procs_s = crm_element_value(member, "processes"); - - int votes = crm_int_helper(votes_s, NULL); - unsigned int id = crm_int_helper(id_s, NULL); - unsigned int procs = crm_int_helper(procs_s, NULL); - - /* TODO: These values will contain garbage if version < 0.7.1 */ - uint64_t born = crm_int_helper(born_s, NULL); - uint64_t seen = crm_int_helper(seen_s, NULL); - - return crm_update_peer(id, born, seen, votes, procs, uname, uname, addr, state); -} - -crm_node_t * -crm_update_cman_node(xmlNode * member, long long seq) -{ - const char *id_s = crm_element_value(member, "id"); - const char *uname = crm_element_value(member, "uname"); - const char *procs_s = crm_element_value(member, "processes"); - - unsigned int id = crm_int_helper(id_s, NULL); - unsigned int procs = crm_int_helper(procs_s, NULL); - - crm_info("Updating peer processes for %s", crm_str(uname)); - return crm_update_peer(id, 0, 0, 0, procs, uname, uname, NULL, NULL); -} - #if SUPPORT_HEARTBEAT crm_node_t * crm_update_ccm_node(const oc_ev_membership_t * oc, int offset, const char *state, uint64_t seq) { crm_node_t *node = NULL; const char *uuid = NULL; CRM_CHECK(oc->m_array[offset].node_uname != NULL, return NULL); uuid = get_uuid(oc->m_array[offset].node_uname); node = crm_update_peer(oc->m_array[offset].node_id, oc->m_array[offset].node_born_on, seq, -1, 0, uuid, oc->m_array[offset].node_uname, NULL, state); if (safe_str_eq(CRM_NODE_ACTIVE, state)) { /* Heartbeat doesn't send status notifications for nodes that were already part of the cluster */ crm_update_peer_proc(oc->m_array[offset].node_uname, crm_proc_ais, ONLINESTATUS); /* Nor does it send status notifications for processes that were already active */ crm_update_peer_proc(oc->m_array[offset].node_uname, crm_proc_crmd, ONLINESTATUS); } return node; } #endif void crm_update_peer_proc(const char *uname, uint32_t flag, const char *status) { uint32_t last = 0; crm_node_t *node = NULL; gboolean changed = FALSE; CRM_ASSERT(crm_peer_cache != NULL); CRM_CHECK(uname != NULL, return); node = g_hash_table_lookup(crm_peer_cache, uname); CRM_CHECK(node != NULL, crm_err("Could not set %s.%s to %s", uname, peer2text(flag), status); return); last = node->processes; if (safe_str_eq(status, ONLINESTATUS)) { if ((node->processes & flag) == 0) { set_bit_inplace(node->processes, flag); changed = TRUE; } } else if (node->processes & flag) { clear_bit_inplace(node->processes, flag); changed = TRUE; } if (changed) { crm_info("%s.%s is now %s", uname, peer2text(flag), status); if (crm_status_callback) { crm_status_callback(crm_status_processes, node, &last); } } } int crm_terminate_member(int nodeid, const char *uname, IPC_Channel * cluster) { /* Always use the synchronous, non-mainloop version */ return stonith_api_kick(nodeid, uname, 120, TRUE); } int crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection) { return stonith_api_kick(nodeid, uname, 120, TRUE); } diff --git a/tools/attrd.c b/tools/attrd.c index ece61c5a0a..d3c88f35e2 100644 --- a/tools/attrd.c +++ b/tools/attrd.c @@ -1,890 +1,889 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define OPTARGS "hV" #if SUPPORT_HEARTBEAT ll_cluster_t *attrd_cluster_conn; #endif GMainLoop *mainloop = NULL; char *attrd_uname = NULL; char *attrd_uuid = NULL; gboolean need_shutdown = FALSE; GHashTable *attr_hash = NULL; cib_t *cib_conn = NULL; typedef struct attr_hash_entry_s { char *uuid; char *id; char *set; char *section; char *value; char *stored_value; int timeout; char *dampen; guint timer_id; char *user; } attr_hash_entry_t; static void free_hash_entry(gpointer data) { attr_hash_entry_t *entry = data; if (entry == NULL) { return; } crm_free(entry->id); crm_free(entry->set); crm_free(entry->dampen); crm_free(entry->section); crm_free(entry->uuid); crm_free(entry->value); crm_free(entry->stored_value); crm_free(entry->user); crm_free(entry); } void attrd_local_callback(xmlNode * msg); gboolean attrd_timer_callback(void *user_data); gboolean attrd_trigger_update(attr_hash_entry_t * hash_entry); void attrd_perform_update(attr_hash_entry_t * hash_entry); static void attrd_shutdown(int nsig) { need_shutdown = TRUE; crm_info("Exiting"); if (mainloop != NULL && g_main_is_running(mainloop)) { g_main_quit(mainloop); } else { exit(0); } } static void usage(const char *cmd, int exit_status) { FILE *stream; stream = exit_status ? stderr : stdout; fprintf(stream, "usage: %s [-srkh] [-c configure file]\n", cmd); /* fprintf(stream, "\t-d\tsets debug level\n"); */ /* fprintf(stream, "\t-s\tgets daemon status\n"); */ /* fprintf(stream, "\t-r\trestarts daemon\n"); */ /* fprintf(stream, "\t-k\tstops daemon\n"); */ /* fprintf(stream, "\t-h\thelp message\n"); */ fflush(stream); exit(exit_status); } typedef struct attrd_client_s { char *id; char *name; char *user; IPC_Channel *channel; GCHSource *source; } attrd_client_t; static void stop_attrd_timer(attr_hash_entry_t * hash_entry) { if (hash_entry != NULL && hash_entry->timer_id != 0) { crm_trace("Stopping %s timer", hash_entry->id); g_source_remove(hash_entry->timer_id); hash_entry->timer_id = 0; } } static gboolean attrd_ipc_callback(IPC_Channel * client, gpointer user_data) { int lpc = 0; xmlNode *msg = NULL; attrd_client_t *curr_client = (attrd_client_t *) user_data; gboolean stay_connected = TRUE; crm_trace("Invoked: %s", curr_client->id); while (IPC_ISRCONN(client)) { if (client->ops->is_message_pending(client) == 0) { break; } msg = xmlfromIPC(client, MAX_IPC_DELAY); if (msg == NULL) { break; } lpc++; #if ENABLE_ACL determine_request_user(&curr_client->user, client, msg, F_ATTRD_USER); #endif crm_trace("Processing msg from %s", curr_client->id); crm_log_xml_trace(msg, __PRETTY_FUNCTION__); attrd_local_callback(msg); free_xml(msg); msg = NULL; if (client->ch_status != IPC_CONNECT) { break; } } crm_trace("Processed %d messages", lpc); if (client->ch_status != IPC_CONNECT) { stay_connected = FALSE; } return stay_connected; } static void attrd_connection_destroy(gpointer user_data) { attrd_client_t *client = user_data; /* cib_process_disconnect */ if (client == NULL) { return; } if (client->source != NULL) { crm_trace("Deleting %s (%p) from mainloop", client->name, client->source); G_main_del_IPC_Channel(client->source); client->source = NULL; } crm_trace("Destroying %s (%p)", client->name, client); crm_free(client->name); crm_free(client->id); crm_free(client->user); crm_free(client); crm_trace("Freed the cib client"); return; } static gboolean attrd_connect(IPC_Channel * channel, gpointer user_data) { attrd_client_t *new_client = NULL; crm_trace("Connecting channel"); if (channel == NULL) { crm_err("Channel was NULL"); return FALSE; } else if (channel->ch_status != IPC_CONNECT) { crm_err("Channel was disconnected"); return FALSE; } else if (need_shutdown) { crm_info("Ignoring connection request during shutdown"); return FALSE; } crm_malloc0(new_client, sizeof(attrd_client_t)); new_client->channel = channel; crm_trace("Created channel %p for channel %s", new_client, new_client->id); /* channel->ops->set_recv_qlen(channel, 100); */ /* channel->ops->set_send_qlen(channel, 400); */ new_client->source = G_main_add_IPC_Channel(G_PRIORITY_DEFAULT, channel, FALSE, attrd_ipc_callback, new_client, attrd_connection_destroy); crm_trace("Client %s connected", new_client->id); return TRUE; } static void log_hash_entry(int level, attr_hash_entry_t * entry, const char *text) { do_crm_log(level, "%s", text); do_crm_log(level, "Set: %s", entry->section); do_crm_log(level, "Name: %s", entry->id); do_crm_log(level, "Value: %s", entry->value); do_crm_log(level, "Timeout: %s", entry->dampen); } static attr_hash_entry_t * find_hash_entry(xmlNode * msg) { const char *value = NULL; const char *attr = crm_element_value(msg, F_ATTRD_ATTRIBUTE); attr_hash_entry_t *hash_entry = NULL; if (attr == NULL) { crm_info("Ignoring message with no attribute name"); return NULL; } hash_entry = g_hash_table_lookup(attr_hash, attr); if (hash_entry == NULL) { /* create one and add it */ crm_info("Creating hash entry for %s", attr); crm_malloc0(hash_entry, sizeof(attr_hash_entry_t)); hash_entry->id = crm_strdup(attr); g_hash_table_insert(attr_hash, hash_entry->id, hash_entry); hash_entry = g_hash_table_lookup(attr_hash, attr); CRM_CHECK(hash_entry != NULL, return NULL); } value = crm_element_value(msg, F_ATTRD_SET); if (value != NULL) { crm_free(hash_entry->set); hash_entry->set = crm_strdup(value); crm_debug("\t%s->set: %s", attr, value); } value = crm_element_value(msg, F_ATTRD_SECTION); if (value == NULL) { value = XML_CIB_TAG_STATUS; } crm_free(hash_entry->section); hash_entry->section = crm_strdup(value); crm_trace("\t%s->section: %s", attr, value); value = crm_element_value(msg, F_ATTRD_DAMPEN); if (value != NULL) { crm_free(hash_entry->dampen); hash_entry->dampen = crm_strdup(value); hash_entry->timeout = crm_get_msec(value); crm_trace("\t%s->timeout: %s", attr, value); } #if ENABLE_ACL crm_free(hash_entry->user); value = crm_element_value(msg, F_ATTRD_USER); if (value != NULL) { hash_entry->user = crm_strdup(value); crm_trace("\t%s->user: %s", attr, value); } #endif log_hash_entry(LOG_DEBUG_2, hash_entry, "Found (and updated) entry:"); return hash_entry; } #if SUPPORT_HEARTBEAT static void attrd_ha_connection_destroy(gpointer user_data) { crm_trace("Invoked"); if (need_shutdown) { /* we signed out, so this is expected */ crm_info("Heartbeat disconnection complete"); return; } crm_crit("Lost connection to heartbeat service!"); if (mainloop != NULL && g_main_is_running(mainloop)) { g_main_quit(mainloop); return; } exit(LSB_EXIT_OK); } static void attrd_ha_callback(HA_Message * msg, void *private_data) { attr_hash_entry_t *hash_entry = NULL; xmlNode *xml = convert_ha_message(NULL, msg, __FUNCTION__); const char *from = crm_element_value(xml, F_ORIG); const char *op = crm_element_value(xml, F_ATTRD_TASK); const char *host = crm_element_value(xml, F_ATTRD_HOST); const char *ignore = crm_element_value(xml, F_ATTRD_IGNORE_LOCALLY); if (host != NULL && safe_str_eq(host, attrd_uname)) { crm_info("Update relayed from %s", from); attrd_local_callback(xml); } else if (ignore == NULL || safe_str_neq(from, attrd_uname)) { crm_info("%s message from %s", op, from); hash_entry = find_hash_entry(xml); stop_attrd_timer(hash_entry); attrd_perform_update(hash_entry); } free_xml(xml); } #endif #if SUPPORT_COROSYNC static gboolean attrd_ais_dispatch(AIS_Message * wrapper, char *data, int sender) { xmlNode *xml = NULL; if (wrapper->header.id == crm_class_cluster) { xml = string2xml(data); if (xml == NULL) { crm_err("Bad message received: %d:'%.120s'", wrapper->id, data); } } if (xml != NULL) { attr_hash_entry_t *hash_entry = NULL; const char *op = crm_element_value(xml, F_ATTRD_TASK); const char *host = crm_element_value(xml, F_ATTRD_HOST); const char *ignore = crm_element_value(xml, F_ATTRD_IGNORE_LOCALLY); crm_xml_add_int(xml, F_SEQ, wrapper->id); crm_xml_add(xml, F_ORIG, wrapper->sender.uname); if (host != NULL && safe_str_eq(host, attrd_uname)) { crm_notice("Update relayed from %s", wrapper->sender.uname); attrd_local_callback(xml); } else if (ignore == NULL || safe_str_neq(wrapper->sender.uname, attrd_uname)) { crm_trace("%s message from %s", op, wrapper->sender.uname); hash_entry = find_hash_entry(xml); stop_attrd_timer(hash_entry); attrd_perform_update(hash_entry); } free_xml(xml); } return TRUE; } static void attrd_ais_destroy(gpointer unused) { - ais_fd_sync = -1; if (need_shutdown) { /* we signed out, so this is expected */ crm_info("OpenAIS disconnection complete"); return; } crm_crit("Lost connection to OpenAIS service!"); if (mainloop != NULL && g_main_is_running(mainloop)) { g_main_quit(mainloop); return; } exit(LSB_EXIT_GENERIC); } #endif static void attrd_cib_connection_destroy(gpointer user_data) { if (need_shutdown) { crm_info("Connection to the CIB terminated..."); } else { /* eventually this will trigger a reconnect, not a shutdown */ crm_err("Connection to the CIB terminated..."); exit(1); } return; } static void update_for_hash_entry(gpointer key, gpointer value, gpointer user_data) { attr_hash_entry_t *entry = value; if (entry->value != NULL) { attrd_timer_callback(value); } } static void do_cib_replaced(const char *event, xmlNode * msg) { crm_info("Sending full refresh"); g_hash_table_foreach(attr_hash, update_for_hash_entry, NULL); } static gboolean cib_connect(void *user_data) { static int attempts = 1; static int max_retry = 20; gboolean was_err = FALSE; static cib_t *local_conn = NULL; if (local_conn == NULL) { local_conn = cib_new(); } if (was_err == FALSE) { enum cib_errors rc = cib_not_connected; if (attempts < max_retry) { crm_debug("CIB signon attempt %d", attempts); rc = local_conn->cmds->signon(local_conn, T_ATTRD, cib_command); } if (rc != cib_ok && attempts > max_retry) { crm_err("Signon to CIB failed: %s", cib_error2string(rc)); was_err = TRUE; } else if (rc != cib_ok) { attempts++; return TRUE; } } crm_info("Connected to the CIB after %d signon attempts", attempts); if (was_err == FALSE) { enum cib_errors rc = local_conn->cmds->set_connection_dnotify(local_conn, attrd_cib_connection_destroy); if (rc != cib_ok) { crm_err("Could not set dnotify callback"); was_err = TRUE; } } if (was_err == FALSE) { if (cib_ok != local_conn->cmds->add_notify_callback(local_conn, T_CIB_REPLACE_NOTIFY, do_cib_replaced)) { crm_err("Could not set CIB notification callback"); was_err = TRUE; } } if (was_err) { crm_err("Aborting startup"); exit(100); } cib_conn = local_conn; crm_info("Sending full refresh"); g_hash_table_foreach(attr_hash, update_for_hash_entry, NULL); return FALSE; } int main(int argc, char **argv) { int flag = 0; int argerr = 0; gboolean was_err = FALSE; char *channel_name = crm_strdup(T_ATTRD); crm_log_init(T_ATTRD, LOG_NOTICE, TRUE, FALSE, argc, argv); mainloop_add_signal(SIGTERM, attrd_shutdown); while ((flag = getopt(argc, argv, OPTARGS)) != EOF) { switch (flag) { case 'V': crm_bump_log_level(); break; case 'h': /* Help message */ usage(T_ATTRD, LSB_EXIT_OK); break; default: ++argerr; break; } } if (optind > argc) { ++argerr; } if (argerr) { usage(T_ATTRD, LSB_EXIT_GENERIC); } attr_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_hash_entry); crm_info("Starting up"); if (was_err == FALSE) { void *destroy = NULL; void *dispatch = NULL; void *data = NULL; #if SUPPORT_COROSYNC if (is_openais_cluster()) { destroy = attrd_ais_destroy; dispatch = attrd_ais_dispatch; } #endif #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { data = &attrd_cluster_conn; dispatch = attrd_ha_callback; destroy = attrd_ha_connection_destroy; } #endif if (FALSE == crm_cluster_connect(&attrd_uname, &attrd_uuid, dispatch, destroy, data)) { crm_err("HA Signon failed"); was_err = TRUE; } } crm_info("Cluster connection active"); if (was_err == FALSE) { int rc = init_server_ipc_comms(channel_name, attrd_connect, default_ipc_connection_destroy); if (rc != 0) { crm_err("Could not start IPC server"); was_err = TRUE; } } crm_info("Accepting attribute updates"); mainloop = g_main_new(FALSE); if (0 == g_timeout_add_full(G_PRIORITY_LOW + 1, 5000, cib_connect, NULL, NULL)) { crm_info("Adding timer failed"); was_err = TRUE; } if (was_err) { crm_err("Aborting startup"); return 100; } crm_notice("Starting mainloop..."); g_main_run(mainloop); crm_notice("Exiting..."); #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { attrd_cluster_conn->llc_ops->signoff(attrd_cluster_conn, TRUE); attrd_cluster_conn->llc_ops->delete(attrd_cluster_conn); } #endif if (cib_conn) { cib_conn->cmds->signoff(cib_conn); cib_delete(cib_conn); } g_hash_table_destroy(attr_hash); crm_free(channel_name); crm_free(attrd_uuid); empty_uuid_cache(); return 0; } struct attrd_callback_s { char *attr; char *value; }; static void attrd_cib_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { attr_hash_entry_t *hash_entry = NULL; struct attrd_callback_s *data = user_data; if (data->value == NULL && rc == cib_NOTEXISTS) { rc = cib_ok; } switch (rc) { case cib_ok: crm_debug("Update %d for %s=%s passed", call_id, data->attr, data->value); hash_entry = g_hash_table_lookup(attr_hash, data->attr); if (hash_entry) { crm_free(hash_entry->stored_value); hash_entry->stored_value = NULL; if (data->value != NULL) { hash_entry->stored_value = crm_strdup(data->value); } } break; case cib_diff_failed: /* When an attr changes while the CIB is syncing */ case cib_remote_timeout: /* When an attr changes while there is a DC election */ case cib_NOTEXISTS: /* When an attr changes while the CIB is syncing a * newer config from a node that just came up */ crm_warn("Update %d for %s=%s failed: %s", call_id, data->attr, data->value, cib_error2string(rc)); break; default: crm_err("Update %d for %s=%s failed: %s", call_id, data->attr, data->value, cib_error2string(rc)); } crm_free(data->value); crm_free(data->attr); crm_free(data); } void attrd_perform_update(attr_hash_entry_t * hash_entry) { int rc = cib_ok; struct attrd_callback_s *data = NULL; const char *user_name = NULL; if (hash_entry == NULL) { return; } else if (cib_conn == NULL) { crm_info("Delaying operation %s=%s: cib not connected", hash_entry->id, crm_str(hash_entry->value)); return; } #if ENABLE_ACL if (hash_entry->user) { user_name = hash_entry->user; crm_trace("Performing request from user '%s'", hash_entry->user); } #endif if (hash_entry->value == NULL) { /* delete the attr */ rc = delete_attr_delegate(cib_conn, cib_none, hash_entry->section, attrd_uuid, NULL, hash_entry->set, hash_entry->uuid, hash_entry->id, NULL, FALSE, user_name); if (hash_entry->stored_value) { crm_notice("Sent delete %d: node=%s, attr=%s, id=%s, set=%s, section=%s", rc, attrd_uuid, hash_entry->id, hash_entry->uuid ? hash_entry->uuid : "", hash_entry->set, hash_entry->section); } else if (rc < 0 && rc != cib_NOTEXISTS) { crm_notice ("Delete operation failed: node=%s, attr=%s, id=%s, set=%s, section=%s: %s (%d)", attrd_uuid, hash_entry->id, hash_entry->uuid ? hash_entry->uuid : "", hash_entry->set, hash_entry->section, cib_error2string(rc), rc); } else { crm_trace("Sent delete %d: node=%s, attr=%s, id=%s, set=%s, section=%s", rc, attrd_uuid, hash_entry->id, hash_entry->uuid ? hash_entry->uuid : "", hash_entry->set, hash_entry->section); } } else { /* send update */ rc = update_attr_delegate(cib_conn, cib_none, hash_entry->section, attrd_uuid, NULL, hash_entry->set, hash_entry->uuid, hash_entry->id, hash_entry->value, FALSE, user_name); if (safe_str_neq(hash_entry->value, hash_entry->stored_value) || rc < 0) { crm_notice("Sent update %d: %s=%s", rc, hash_entry->id, hash_entry->value); } else { crm_trace("Sent update %d: %s=%s", rc, hash_entry->id, hash_entry->value); } } crm_malloc0(data, sizeof(struct attrd_callback_s)); data->attr = crm_strdup(hash_entry->id); if (hash_entry->value != NULL) { data->value = crm_strdup(hash_entry->value); } add_cib_op_callback(cib_conn, rc, FALSE, data, attrd_cib_callback); return; } void attrd_local_callback(xmlNode * msg) { static int plus_plus_len = 5; attr_hash_entry_t *hash_entry = NULL; const char *from = crm_element_value(msg, F_ORIG); const char *op = crm_element_value(msg, F_ATTRD_TASK); const char *attr = crm_element_value(msg, F_ATTRD_ATTRIBUTE); const char *value = crm_element_value(msg, F_ATTRD_VALUE); const char *host = crm_element_value(msg, F_ATTRD_HOST); if (safe_str_eq(op, "refresh")) { crm_notice("Sending full refresh (origin=%s)", from); g_hash_table_foreach(attr_hash, update_for_hash_entry, NULL); return; } if (host != NULL && safe_str_neq(host, attrd_uname)) { send_cluster_message(host, crm_msg_attrd, msg, FALSE); return; } crm_debug("%s message from %s: %s=%s", op, from, attr, crm_str(value)); hash_entry = find_hash_entry(msg); if (hash_entry == NULL) { return; } if (hash_entry->uuid == NULL) { const char *key = crm_element_value(msg, F_ATTRD_KEY); if (key) { hash_entry->uuid = crm_strdup(key); } } crm_debug("Supplied: %s, Current: %s, Stored: %s", value, hash_entry->value, hash_entry->stored_value); if (safe_str_eq(value, hash_entry->value) && safe_str_eq(value, hash_entry->stored_value)) { crm_trace("Ignoring non-change"); return; } else if (value) { int offset = 1; int int_value = 0; int value_len = strlen(value); if (value_len < (plus_plus_len + 2) || value[plus_plus_len] != '+' || (value[plus_plus_len + 1] != '+' && value[plus_plus_len + 1] != '=')) { goto set_unexpanded; } int_value = char2score(hash_entry->value); if (value[plus_plus_len + 1] != '+') { const char *offset_s = value + (plus_plus_len + 2); offset = char2score(offset_s); } int_value += offset; if (int_value > INFINITY) { int_value = INFINITY; } crm_info("Expanded %s=%s to %d", attr, value, int_value); crm_xml_add_int(msg, F_ATTRD_VALUE, int_value); value = crm_element_value(msg, F_ATTRD_VALUE); } set_unexpanded: if (safe_str_eq(value, hash_entry->value) && hash_entry->timer_id) { /* We're already waiting to set this value */ return; } crm_free(hash_entry->value); hash_entry->value = NULL; if (value != NULL) { hash_entry->value = crm_strdup(value); crm_debug("New value of %s is %s", attr, value); } stop_attrd_timer(hash_entry); if (hash_entry->timeout > 0) { hash_entry->timer_id = g_timeout_add(hash_entry->timeout, attrd_timer_callback, hash_entry); } else { attrd_trigger_update(hash_entry); } return; } gboolean attrd_timer_callback(void *user_data) { stop_attrd_timer(user_data); attrd_trigger_update(user_data); return TRUE; /* Always return true, removed cleanly by stop_attrd_timer() */ } gboolean attrd_trigger_update(attr_hash_entry_t * hash_entry) { xmlNode *msg = NULL; /* send HA message to everyone */ crm_notice("Sending flush op to all hosts for: %s (%s)", hash_entry->id, crm_str(hash_entry->value)); log_hash_entry(LOG_DEBUG_2, hash_entry, "Sending flush op to all hosts for:"); msg = create_xml_node(NULL, __FUNCTION__); crm_xml_add(msg, F_TYPE, T_ATTRD); crm_xml_add(msg, F_ORIG, attrd_uname); crm_xml_add(msg, F_ATTRD_TASK, "flush"); crm_xml_add(msg, F_ATTRD_ATTRIBUTE, hash_entry->id); crm_xml_add(msg, F_ATTRD_SET, hash_entry->set); crm_xml_add(msg, F_ATTRD_SECTION, hash_entry->section); crm_xml_add(msg, F_ATTRD_DAMPEN, hash_entry->dampen); crm_xml_add(msg, F_ATTRD_VALUE, hash_entry->value); #if ENABLE_ACL if (hash_entry->user) { crm_xml_add(msg, F_ATTRD_USER, hash_entry->user); } #endif if (hash_entry->timeout <= 0) { crm_xml_add(msg, F_ATTRD_IGNORE_LOCALLY, hash_entry->value); attrd_perform_update(hash_entry); } send_cluster_message(NULL, crm_msg_attrd, msg, FALSE); free_xml(msg); return TRUE; }