diff --git a/crmd/corosync.c b/crmd/corosync.c index 581df28208..0ee8b599be 100644 --- a/crmd/corosync.c +++ b/crmd/corosync.c @@ -1,224 +1,224 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern void post_cache_update(int seq); extern void crmd_ha_connection_destroy(gpointer user_data); /* A_HA_CONNECT */ #if SUPPORT_COROSYNC extern void crmd_ha_msg_filter(xmlNode * msg); static void crmd_proc_destroy(gpointer user_data) { if(is_set(fsa_input_register, R_HA_DISCONNECTED)) { crm_err("MCP connection terminated"); exit(1); } } static gboolean crmd_proc_dispatch(IPC_Channel *ch, gpointer user_data) { xmlNode *msg = NULL; gboolean stay_connected = TRUE; while(IPC_ISRCONN(ch)) { if(ch->ops->is_message_pending(ch) == 0) { break; } msg = xmlfromIPC(ch, MAX_IPC_DELAY); if(msg) { xml_child_iter(msg, node, int id = 0; int children = 0; const char *uname = crm_element_value(node, "uname"); crm_element_value_int(node, "processes", &children); crm_update_peer(id, 0, 0, 0, children, NULL, uname, NULL, NULL); ); set_bit_inplace(fsa_input_register, R_PEER_DATA); free_xml(msg); } if(ch->ch_status != IPC_CONNECT) { break; } } if (ch->ch_status != IPC_CONNECT) { stay_connected = FALSE; } return stay_connected; } static gboolean crmd_ais_dispatch(AIS_Message *wrapper, char *data, int sender) { int seq = 0; xmlNode *xml = NULL; const char *seq_s = NULL; xml = string2xml(data); if(xml == NULL) { crm_err("Could not parse message content (%d): %.100s", wrapper->header.id, data); return TRUE; } switch(wrapper->header.id) { case crm_class_members: seq_s = crm_element_value(xml, "id"); seq = crm_int_helper(seq_s, NULL); set_bit_inplace(fsa_input_register, R_PEER_DATA); post_cache_update(seq); /* fall through */ case crm_class_quorum: crm_update_quorum(crm_have_quorum, FALSE); if(AM_I_DC) { const char *votes = crm_element_value(xml, "expected"); if(votes == NULL || check_number(votes) == FALSE) { crm_log_xml_err(xml, "Invalid quorum/membership update"); } else { int rc = update_attr( fsa_cib_conn, cib_quorum_override|cib_scope_local|cib_inhibit_notify, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, XML_ATTR_EXPECTED_VOTES, votes, FALSE); crm_info("Setting expected votes to %s", votes); if(cib_ok > rc) { crm_err("Quorum update failed: %s", cib_error2string(rc)); } } } break; case crm_class_cluster: crm_xml_add(xml, F_ORIG, wrapper->sender.uname); crm_xml_add_int(xml, F_SEQ, wrapper->id); crmd_ha_msg_filter(xml); break; case crm_class_rmpeer: /* Ignore */ break; case crm_class_notify: case crm_class_nodeid: crm_err("Unexpected message class (%d): %.100s", wrapper->header.id, data); break; default: crm_err("Invalid message class (%d): %.100s", wrapper->header.id, data); } free_xml(xml); return TRUE; } static gboolean crmd_cman_dispatch(AIS_Message *wrapper, char *data, int sender) { crm_update_quorum(crm_have_quorum, FALSE); return TRUE; } static void crmd_cman_destroy(gpointer user_data) { if(is_set(fsa_input_register, R_HA_DISCONNECTED)) { crm_err("connection terminated"); exit(1); } else { crm_info("connection closed"); } } static void crmd_quorum_destroy(gpointer user_data) { if(is_set(fsa_input_register, R_HA_DISCONNECTED)) { crm_err("connection terminated"); exit(1); } else { crm_info("connection closed"); } } static void crmd_ais_destroy(gpointer user_data) { if(is_set(fsa_input_register, R_HA_DISCONNECTED)) { crm_err("connection terminated"); ais_fd_sync = -1; exit(1); } else { crm_info("connection closed"); } } extern gboolean crm_connect_corosync(void); gboolean crm_connect_corosync(void) { gboolean rc = FALSE; if(is_openais_cluster()) { crm_set_status_callback(&ais_status_callback); rc = crm_cluster_connect( &fsa_our_uname, &fsa_our_uuid, crmd_ais_dispatch, crmd_ais_destroy, NULL); - if(is_classic_ais_cluster() == FALSE) { + if(getenv("HA_mcp")) { IPC_Channel *ch = init_client_ipc_comms_nodispatch("pcmk"); G_main_add_IPC_Channel(G_PRIORITY_HIGH, ch, FALSE, crmd_proc_dispatch, NULL, crmd_proc_destroy); } } if(rc && is_corosync_cluster()) { init_quorum_connection(crmd_cman_dispatch, crmd_quorum_destroy); } if(rc && is_cman_cluster()) { init_cman_connection(crmd_cman_dispatch, crmd_cman_destroy); set_bit_inplace(fsa_input_register, R_CCM_DATA); } return rc; } #endif diff --git a/lib/ais/plugin.c b/lib/ais/plugin.c index db182c2a14..a58ebc4594 100644 --- a/lib/ais/plugin.c +++ b/lib/ais/plugin.c @@ -1,1641 +1,1672 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "plugin.h" #include "utils.h" #include #include #include #include #include #include #include #include #include struct corosync_api_v1 *pcmk_api = NULL; uint32_t plugin_has_votes = 0; uint32_t plugin_expected_votes = 2; int use_mgmtd = 0; int plugin_log_level = LOG_DEBUG; char *local_uname = NULL; int local_uname_len = 0; char *local_cname = NULL; int local_cname_len = 0; uint32_t local_nodeid = 0; char *ipc_channel_name = NULL; static uint64_t local_born_on = 0; uint64_t membership_seq = 0; pthread_t pcmk_wait_thread; +gboolean use_mcp = FALSE; gboolean wait_active = TRUE; gboolean have_reliable_membership_id = FALSE; GHashTable *ipc_client_list = NULL; GHashTable *membership_list = NULL; GHashTable *membership_notify_list = NULL; #define MAX_RESPAWN 100 #define LOOPBACK_ID 16777343 #define crm_flag_none 0x00000000 #define crm_flag_members 0x00000001 struct crm_identify_msg_s { coroipc_request_header_t header __attribute__((aligned(8))); uint32_t id; uint32_t pid; int32_t votes; uint32_t processes; char uname[256]; char version[256]; uint64_t born_on; } __attribute__((packed)); static crm_child_t pcmk_children[] = { { 0, crm_proc_none, crm_flag_none, 0, 0, FALSE, "none", NULL, NULL, NULL, NULL }, { 0, crm_proc_ais, crm_flag_none, 0, 0, FALSE, "ais", NULL, NULL, NULL, NULL }, { 0, crm_proc_lrmd, crm_flag_none, 3, 0, TRUE, "lrmd", NULL, HB_DAEMON_DIR"/lrmd", NULL, NULL }, { 0, crm_proc_cib, crm_flag_members, 2, 0, TRUE, "cib", CRM_DAEMON_USER, CRM_DAEMON_DIR"/cib", NULL, NULL }, { 0, crm_proc_crmd, crm_flag_members, 6, 0, TRUE, "crmd", CRM_DAEMON_USER, CRM_DAEMON_DIR"/crmd", NULL, NULL }, { 0, crm_proc_attrd, crm_flag_none, 4, 0, TRUE, "attrd", CRM_DAEMON_USER, CRM_DAEMON_DIR"/attrd", NULL, NULL }, { 0, crm_proc_stonithd, crm_flag_none, 0, 0, TRUE, "stonithd", NULL, "/bin/false", NULL, NULL }, { 0, crm_proc_pe, crm_flag_none, 5, 0, TRUE, "pengine", CRM_DAEMON_USER, CRM_DAEMON_DIR"/pengine", NULL, NULL }, { 0, crm_proc_mgmtd, crm_flag_none, 7, 0, TRUE, "mgmtd", NULL, HB_DAEMON_DIR"/mgmtd", NULL, NULL }, { 0, crm_proc_stonith_ng, crm_flag_none, 1, 0, TRUE, "stonith-ng", NULL, CRM_DAEMON_DIR"/stonithd", NULL, NULL }, }; void send_cluster_id(void); int send_cluster_msg_raw(const AIS_Message *ais_msg); char *pcmk_generate_membership_data(void); gboolean check_message_sanity(const AIS_Message *msg, const char *data); typedef const void ais_void_ptr; int pcmk_shutdown(void); void pcmk_peer_update(enum totem_configuration_type configuration_type, const unsigned int *member_list, size_t member_list_entries, const unsigned int *left_list, size_t left_list_entries, const unsigned int *joined_list, size_t joined_list_entries, const struct memb_ring_id *ring_id); int pcmk_startup (struct corosync_api_v1 *corosync_api); int pcmk_config_init(struct corosync_api_v1 *corosync_api); int pcmk_ipc_exit (void *conn); int pcmk_ipc_connect (void *conn); void pcmk_ipc(void *conn, ais_void_ptr *msg); void pcmk_exec_dump(void); void pcmk_cluster_swab(void *msg); void pcmk_cluster_callback(ais_void_ptr *message, unsigned int nodeid); void pcmk_nodeid(void *conn, ais_void_ptr *msg); void pcmk_nodes(void *conn, ais_void_ptr *msg); void pcmk_notify(void *conn, ais_void_ptr *msg); void pcmk_remove_member(void *conn, ais_void_ptr *msg); void pcmk_quorum(void *conn, ais_void_ptr *msg); void pcmk_cluster_id_swab(void *msg); void pcmk_cluster_id_callback(ais_void_ptr *message, unsigned int nodeid); void ais_remove_peer(char *node_id); static uint32_t get_process_list(void) { int lpc = 0; uint32_t procs = crm_proc_ais; for (lpc = 0; lpc < SIZEOF(pcmk_children); lpc++) { if(pcmk_children[lpc].pid != 0) { procs |= pcmk_children[lpc].flag; } } return procs; } static struct corosync_lib_handler pcmk_lib_service[] = { { /* 0 */ .lib_handler_fn = pcmk_ipc, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED, }, { /* 1 */ .lib_handler_fn = pcmk_nodes, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED, }, { /* 2 */ .lib_handler_fn = pcmk_notify, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED, }, { /* 3 */ .lib_handler_fn = pcmk_nodeid, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED, }, { /* 4 */ .lib_handler_fn = pcmk_remove_member, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED, }, { /* 5 */ .lib_handler_fn = pcmk_quorum, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED, }, }; static struct corosync_exec_handler pcmk_exec_service[] = { { /* 0 */ .exec_handler_fn = pcmk_cluster_callback, .exec_endian_convert_fn = pcmk_cluster_swab }, { /* 1 */ .exec_handler_fn = pcmk_cluster_id_callback, .exec_endian_convert_fn = pcmk_cluster_id_swab } }; /* * Exports the interface for the service */ struct corosync_service_engine pcmk_service_handler = { .name = (unsigned char *)"Pacemaker Cluster Manager "PACKAGE_VERSION, .id = PCMK_SERVICE_ID, .private_data_size = 0, .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED, .allow_inquorate = CS_LIB_ALLOW_INQUORATE, .lib_init_fn = pcmk_ipc_connect, .lib_exit_fn = pcmk_ipc_exit, .exec_init_fn = pcmk_startup, .exec_exit_fn = pcmk_shutdown, .config_init_fn = pcmk_config_init, .priority = 50, .lib_engine = pcmk_lib_service, .lib_engine_count = sizeof (pcmk_lib_service) / sizeof (struct corosync_lib_handler), .exec_engine = pcmk_exec_service, .exec_engine_count = sizeof (pcmk_exec_service) / sizeof (struct corosync_exec_handler), .confchg_fn = pcmk_peer_update, .exec_dump_fn = pcmk_exec_dump, /* void (*sync_init) (void); */ /* int (*sync_process) (void); */ /* void (*sync_activate) (void); */ /* void (*sync_abort) (void); */ }; /* * Dynamic Loader definition */ struct corosync_service_engine *pcmk_get_handler_ver0 (void); struct corosync_service_engine_iface_ver0 pcmk_service_handler_iface = { .corosync_get_service_engine_ver0 = pcmk_get_handler_ver0 }; -static struct lcr_iface openais_pcmk_ver0[1] = { +static struct lcr_iface openais_pcmk_ver0[2] = { { .name = "pacemaker", .version = 0, .versions_replace = 0, .versions_replace_count = 0, .dependencies = 0, .dependency_count = 0, .constructor = NULL, .destructor = NULL, .interfaces = NULL + }, + { + .name = "pacemaker", + .version = 1, + .versions_replace = 0, + .versions_replace_count = 0, + .dependencies = 0, + .dependency_count = 0, + .constructor = NULL, + .destructor = NULL, + .interfaces = NULL } }; static struct lcr_comp pcmk_comp_ver0 = { - .iface_count = 1, + .iface_count = 2, .ifaces = openais_pcmk_ver0 }; struct corosync_service_engine *pcmk_get_handler_ver0 (void) { return (&pcmk_service_handler); } __attribute__ ((constructor)) static void register_this_component (void) { lcr_interfaces_set (&openais_pcmk_ver0[0], &pcmk_service_handler_iface); + lcr_interfaces_set (&openais_pcmk_ver0[1], &pcmk_service_handler_iface); lcr_component_register (&pcmk_comp_ver0); } static int plugin_has_quorum(void) { if((plugin_expected_votes >> 1) < plugin_has_votes) { return 1; } return 0; } static void update_expected_votes(int value) { if(value < plugin_has_votes) { /* Never drop below the number of connected nodes */ ais_info("Cannot update expected quorum votes %d -> %d:" " value cannot be less that the current number of votes", plugin_expected_votes, value); } else if(plugin_expected_votes != value) { ais_info("Expected quorum votes %d -> %d", plugin_expected_votes, value); plugin_expected_votes = value; } } /* Create our own local copy of the config so we can navigate it */ static void process_ais_conf(void) { char *value = NULL; gboolean any_log = FALSE; hdb_handle_t top_handle = 0; hdb_handle_t local_handle = 0; ais_info("Reading configure"); top_handle = config_find_init(pcmk_api, "logging"); local_handle = config_find_next(pcmk_api, "logging", top_handle); get_config_opt(pcmk_api, local_handle, "debug", &value, "on"); if(ais_get_boolean(value)) { plugin_log_level = LOG_DEBUG; pcmk_env.debug = "1"; } else { plugin_log_level = LOG_INFO; pcmk_env.debug = "0"; } get_config_opt(pcmk_api, local_handle, "to_logfile", &value, "off"); if(ais_get_boolean(value)) { get_config_opt(pcmk_api, local_handle, "logfile", &value, NULL); if(value == NULL) { ais_err("Logging to a file requested but no log file specified"); } else { uid_t pcmk_uid = geteuid(); uid_t pcmk_gid = getegid(); FILE *logfile = fopen(value, "a"); if(logfile) { int logfd = fileno(logfile); pcmk_env.logfile = value; /* Ensure the file has the correct permissions */ fchown(logfd, pcmk_uid, pcmk_gid); fchmod(logfd, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP); fprintf(logfile, "Set r/w permissions for uid=%d, gid=%d on %s\n", pcmk_uid, pcmk_gid, value); fflush(logfile); fsync(logfd); fclose(logfile); any_log = TRUE; } else { ais_err("Couldn't create logfile: %s", value); } } } get_config_opt(pcmk_api, local_handle, "to_syslog", &value, "on"); if(any_log && ais_get_boolean(value) == FALSE) { ais_info("User configured file based logging and explicitly disabled syslog."); value = "none"; } else { if(ais_get_boolean(value) == FALSE) { ais_err("Please enable some sort of logging, either 'to_file: on' or 'to_syslog: on'."); ais_err("If you use file logging, be sure to also define a value for 'logfile'"); } get_config_opt(pcmk_api, local_handle, "syslog_facility", &value, "daemon"); } pcmk_env.syslog = value; config_find_done(pcmk_api, local_handle); top_handle = config_find_init(pcmk_api, "quorum"); local_handle = config_find_next(pcmk_api, "quorum", top_handle); get_config_opt(pcmk_api, local_handle, "provider", &value, NULL); if(value && ais_str_eq("quorum_cman", value)) { pcmk_env.quorum = "cman"; } top_handle = config_find_init(pcmk_api, "service"); local_handle = config_find_next(pcmk_api, "service", top_handle); while(local_handle) { value = NULL; pcmk_api->object_key_get(local_handle, "name", strlen("name"), (void**)&value, NULL); if(ais_str_eq("pacemaker", value)) { break; } local_handle = config_find_next(pcmk_api, "service", top_handle); } - if(pcmk_env.quorum == NULL) { - get_config_opt(pcmk_api, local_handle, "quorum_provider", &value, "pcmk"); - pcmk_env.quorum = value; + get_config_opt(pcmk_api, local_handle, "ver", &value, "0"); + if(ais_str_eq(value, "1")) { + ais_info("Enabling MCP mode: Use the Pacemaker init script to complete Pacemaker startup"); + use_mcp = TRUE; } get_config_opt(pcmk_api, local_handle, "clustername", &local_cname, "pcmk"); local_cname_len = strlen(local_cname); get_config_opt(pcmk_api, local_handle, "use_logd", &value, "no"); pcmk_env.use_logd = value; get_config_opt(pcmk_api, local_handle, "use_mgmtd", &value, "no"); if(ais_get_boolean(value) == FALSE) { int lpc = 0; for (; lpc < SIZEOF(pcmk_children); lpc++) { if(crm_proc_mgmtd & pcmk_children[lpc].flag) { /* Disable mgmtd startup */ pcmk_children[lpc].start_seq = 0; break; } } } config_find_done(pcmk_api, local_handle); } int pcmk_config_init(struct corosync_api_v1 *unused) { return 0; } static void *pcmk_wait_dispatch (void *arg) { struct timespec waitsleep = { .tv_sec = 1, .tv_nsec = 0 }; while(wait_active) { int lpc = 0; for (; lpc < SIZEOF(pcmk_children); lpc++) { if(pcmk_children[lpc].pid > 0) { int status; pid_t pid = wait4( pcmk_children[lpc].pid, &status, WNOHANG, NULL); if(pid == 0) { continue; } else if(pid < 0) { ais_perror("Call to wait4(%s) failed", pcmk_children[lpc].name); continue; } /* cleanup */ pcmk_children[lpc].pid = 0; pcmk_children[lpc].conn = NULL; pcmk_children[lpc].async_conn = NULL; if(WIFSIGNALED(status)) { int sig = WTERMSIG(status); ais_err("Child process %s terminated with signal %d" " (pid=%d, core=%s)", pcmk_children[lpc].name, sig, pid, WCOREDUMP(status)?"true":"false"); } else if (WIFEXITED(status)) { int rc = WEXITSTATUS(status); do_ais_log(rc==0?LOG_NOTICE:LOG_ERR, "Child process %s exited (pid=%d, rc=%d)", pcmk_children[lpc].name, pid, rc); if(rc == 100) { ais_notice("Child process %s no longer wishes" " to be respawned", pcmk_children[lpc].name); pcmk_children[lpc].respawn = FALSE; } } pcmk_children[lpc].respawn_count += 1; if(pcmk_children[lpc].respawn_count > MAX_RESPAWN) { ais_err("Child respawn count exceeded by %s", pcmk_children[lpc].name); pcmk_children[lpc].respawn = FALSE; } if(pcmk_children[lpc].respawn) { ais_notice("Respawning failed child process: %s", pcmk_children[lpc].name); spawn_child(&(pcmk_children[lpc])); } send_cluster_id(); } } sched_yield (); nanosleep (&waitsleep, 0); } return 0; } static uint32_t pcmk_update_nodeid(void) { int last = local_nodeid; local_nodeid = pcmk_api->totem_nodeid_get(); if(last != local_nodeid) { if(last == 0) { ais_info("Local node id: %u", local_nodeid); } else { char *last_s = NULL; ais_malloc0(last_s, 32); ais_warn("Detected local node id change: %u -> %u", last, local_nodeid); snprintf(last_s, 31, "%u", last); ais_remove_peer(last_s); ais_free(last_s); } update_member(local_nodeid, 0, 0, 1, 0, local_uname, CRM_NODE_MEMBER, NULL); } return local_nodeid; } static void build_path(const char *path_c, mode_t mode) { int offset = 1, len = 0; char *path = ais_strdup(path_c); AIS_CHECK(path != NULL, return); for(len = strlen(path); offset < len; offset++) { if(path[offset] == '/') { path[offset] = 0; if(mkdir(path, mode) < 0 && errno != EEXIST) { ais_perror("Could not create directory '%s'", path); break; } path[offset] = '/'; } } if(mkdir(path, mode) < 0 && errno != EEXIST) { ais_perror("Could not create directory '%s'", path); } ais_free(path); } int pcmk_startup(struct corosync_api_v1 *init_with) { int rc = 0; int lpc = 0; int start_seq = 1; struct utsname us; struct rlimit cores; static int max = SIZEOF(pcmk_children); uid_t pcmk_uid = 0; gid_t pcmk_gid = 0; uid_t root_uid = -1; uid_t cs_uid = geteuid(); pcmk_user_lookup("root", &root_uid, NULL); pcmk_api = init_with; pcmk_env.debug = "0"; pcmk_env.logfile = NULL; pcmk_env.use_logd = "false"; pcmk_env.syslog = "daemon"; if(cs_uid != root_uid) { ais_err("Corosync must be configured to start as 'root'," " otherwise Pacemaker cannot manage services." " Expected %d got %d", root_uid, cs_uid); return -1; } process_ais_conf(); membership_list = g_hash_table_new_full( g_direct_hash, g_direct_equal, NULL, destroy_ais_node); membership_notify_list = g_hash_table_new(g_direct_hash, g_direct_equal); ipc_client_list = g_hash_table_new(g_direct_hash, g_direct_equal); ais_info("CRM: Initialized"); log_printf(LOG_INFO, "Logging: Initialized %s\n", __PRETTY_FUNCTION__); rc = getrlimit(RLIMIT_CORE, &cores); if(rc < 0) { ais_perror("Cannot determine current maximum core size."); } if(cores.rlim_max <= 0) { cores.rlim_max = RLIM_INFINITY; rc = setrlimit(RLIMIT_CORE, &cores); if(rc < 0) { ais_perror("Core file generation will remain disabled." " Core files are an important diagnositic tool," " please consider enabling them by default."); } } else { ais_info("Maximum core file size is: %lu", cores.rlim_max); #if 0 /* system() is not thread-safe, can't call from here * Actually, its a pretty hacky way to try and achieve this anyway */ if(system("echo 1 > /proc/sys/kernel/core_uses_pid") != 0) { ais_perror("Could not enable /proc/sys/kernel/core_uses_pid"); } #endif } if(pcmk_user_lookup(CRM_DAEMON_USER, &pcmk_uid, &pcmk_gid) < 0) { ais_err("Cluster user %s does not exist, aborting Pacemaker startup", CRM_DAEMON_USER); return TRUE; } mkdir(CRM_STATE_DIR, 0750); chown(CRM_STATE_DIR, pcmk_uid, pcmk_gid); /* Used by stonithd */ build_path(HA_STATE_DIR"/heartbeat", 0755); /* Used by RAs - Leave owned by root */ build_path(CRM_RSCTMP_DIR, 0755); rc = uname(&us); AIS_ASSERT(rc == 0); local_uname = ais_strdup(us.nodename); local_uname_len = strlen(local_uname); ais_info("Service: %d", PCMK_SERVICE_ID); ais_info("Local hostname: %s", local_uname); pcmk_update_nodeid(); + if(use_mcp == FALSE) { pthread_create (&pcmk_wait_thread, NULL, pcmk_wait_dispatch, NULL); for (start_seq = 1; start_seq < max; start_seq++) { /* dont start anything with start_seq < 1 */ for (lpc = 0; lpc < max; lpc++) { if(start_seq == pcmk_children[lpc].start_seq) { spawn_child(&(pcmk_children[lpc])); } } } + } return 0; } /* static void ais_print_node(const char *prefix, struct totem_ip_address *host) { int len = 0; char *buffer = NULL; ais_malloc0(buffer, INET6_ADDRSTRLEN+1); inet_ntop(host->family, host->addr, buffer, INET6_ADDRSTRLEN); len = strlen(buffer); ais_info("%s: %.*s", prefix, len, buffer); ais_free(buffer); } */ #if 0 /* copied here for reference from exec/totempg.c */ char *totempg_ifaces_print (unsigned int nodeid) { static char iface_string[256 * INTERFACE_MAX]; char one_iface[64]; struct totem_ip_address interfaces[INTERFACE_MAX]; char **status; unsigned int iface_count; unsigned int i; int res; iface_string[0] = '\0'; res = totempg_ifaces_get (nodeid, interfaces, &status, &iface_count); if (res == -1) { return ("no interface found for nodeid"); } for (i = 0; i < iface_count; i++) { sprintf (one_iface, "r(%d) ip(%s), ", i, totemip_print (&interfaces[i])); strcat (iface_string, one_iface); } return (iface_string); } #endif static void ais_mark_unseen_peer_dead( gpointer key, gpointer value, gpointer user_data) { int *changed = user_data; crm_node_t *node = value; if(node->last_seen != membership_seq && ais_str_eq(CRM_NODE_LOST, node->state) == FALSE) { ais_info("Node %s was not seen in the previous transition", node->uname); *changed += update_member(node->id, 0, membership_seq, node->votes, node->processes, node->uname, CRM_NODE_LOST, NULL); } } void pcmk_peer_update ( enum totem_configuration_type configuration_type, const unsigned int *member_list, size_t member_list_entries, const unsigned int *left_list, size_t left_list_entries, const unsigned int *joined_list, size_t joined_list_entries, const struct memb_ring_id *ring_id ) { int lpc = 0; int changed = 0; int do_update = 0; AIS_ASSERT(ring_id != NULL); switch(configuration_type) { case TOTEM_CONFIGURATION_REGULAR: do_update = 1; break; case TOTEM_CONFIGURATION_TRANSITIONAL: break; } membership_seq = ring_id->seq; ais_notice("%s membership event on ring %lld: memb=%ld, new=%ld, lost=%ld", do_update?"Stable":"Transitional", ring_id->seq, (long)member_list_entries, (long)joined_list_entries, (long)left_list_entries); if(do_update == 0) { for(lpc = 0; lpc < joined_list_entries; lpc++) { const char *prefix = "new: "; uint32_t nodeid = joined_list[lpc]; ais_info("%s %s %u", prefix, member_uname(nodeid), nodeid); } for(lpc = 0; lpc < member_list_entries; lpc++) { const char *prefix = "memb:"; uint32_t nodeid = member_list[lpc]; ais_info("%s %s %u", prefix, member_uname(nodeid), nodeid); } for(lpc = 0; lpc < left_list_entries; lpc++) { const char *prefix = "lost:"; uint32_t nodeid = left_list[lpc]; ais_info("%s %s %u", prefix, member_uname(nodeid), nodeid); } return; } for(lpc = 0; lpc < joined_list_entries; lpc++) { const char *prefix = "NEW: "; uint32_t nodeid = joined_list[lpc]; crm_node_t *node = NULL; changed += update_member( nodeid, 0, membership_seq, -1, 0, NULL, CRM_NODE_MEMBER, NULL); ais_info("%s %s %u", prefix, member_uname(nodeid), nodeid); node = g_hash_table_lookup(membership_list, GUINT_TO_POINTER(nodeid)); if(node->addr == NULL) { const char *addr = totempg_ifaces_print(nodeid); node->addr = ais_strdup(addr); ais_debug("Node %u has address %s", nodeid, node->addr); } } for(lpc = 0; lpc < member_list_entries; lpc++) { const char *prefix = "MEMB:"; uint32_t nodeid = member_list[lpc]; changed += update_member( nodeid, 0, membership_seq, -1, 0, NULL, CRM_NODE_MEMBER, NULL); ais_info("%s %s %u", prefix, member_uname(nodeid), nodeid); } for(lpc = 0; lpc < left_list_entries; lpc++) { const char *prefix = "LOST:"; uint32_t nodeid = left_list[lpc]; changed += update_member( nodeid, 0, membership_seq, -1, 0, NULL, CRM_NODE_LOST, NULL); ais_info("%s %s %u", prefix, member_uname(nodeid), nodeid); } if(changed && joined_list_entries == 0 && left_list_entries == 0) { ais_err("Something strange happened: %d", changed); changed = 0; } ais_debug_2("Reaping unseen nodes..."); g_hash_table_foreach(membership_list, ais_mark_unseen_peer_dead, &changed); if(member_list_entries > 1) { /* Used to set born-on in send_cluster_id()) * We need to wait until we have at least one peer since first * membership id is based on the one before we stopped and isn't reliable */ have_reliable_membership_id = TRUE; } if(changed) { ais_debug("%d nodes changed", changed); pcmk_update_nodeid(); send_member_notification(); } send_cluster_id(); } int pcmk_ipc_exit (void *conn) { int lpc = 0; const char *client = NULL; void *async_conn = conn; for (; lpc < SIZEOF(pcmk_children); lpc++) { if(pcmk_children[lpc].conn == conn) { if(wait_active == FALSE) { /* Make sure the shutdown loop exits */ pcmk_children[lpc].pid = 0; } pcmk_children[lpc].conn = NULL; pcmk_children[lpc].async_conn = NULL; client = pcmk_children[lpc].name; break; } } g_hash_table_remove(membership_notify_list, async_conn); g_hash_table_remove(ipc_client_list, async_conn); do_ais_log(client?LOG_INFO:(LOG_DEBUG+1), "Client %s (conn=%p, async-conn=%p) left", client?client:"unknown-transient", conn, async_conn); return (0); } int pcmk_ipc_connect (void *conn) { /* OpenAIS hasn't finished setting up the connection at this point * Sending messages now messes up the protocol! */ return (0); } /* * Executive message handlers */ void pcmk_cluster_swab(void *msg) { AIS_Message *ais_msg = msg; ais_debug_3("Performing endian conversion..."); ais_msg->id = swab32 (ais_msg->id); ais_msg->size = swab32 (ais_msg->size); ais_msg->is_compressed = swab32 (ais_msg->is_compressed); ais_msg->compressed_size = swab32 (ais_msg->compressed_size); ais_msg->host.id = swab32 (ais_msg->host.id); ais_msg->host.pid = swab32 (ais_msg->host.pid); ais_msg->host.type = swab32 (ais_msg->host.type); ais_msg->host.size = swab32 (ais_msg->host.size); ais_msg->host.local = swab32 (ais_msg->host.local); ais_msg->sender.id = swab32 (ais_msg->sender.id); ais_msg->sender.pid = swab32 (ais_msg->sender.pid); ais_msg->sender.type = swab32 (ais_msg->sender.type); ais_msg->sender.size = swab32 (ais_msg->sender.size); ais_msg->sender.local = swab32 (ais_msg->sender.local); ais_msg->header.size = swab32 (ais_msg->header.size); ais_msg->header.id = swab32 (ais_msg->header.id); ais_msg->header.error = swab32 (ais_msg->header.error); } void pcmk_cluster_callback ( ais_void_ptr *message, unsigned int nodeid) { const AIS_Message *ais_msg = message; ais_debug_2("Message from node %u (%s)", nodeid, nodeid==local_nodeid?"local":"remote"); /* Shouldn't be required... update_member( ais_msg->sender.id, membership_seq, -1, 0, ais_msg->sender.uname, NULL); */ if(ais_msg->host.size == 0 || ais_str_eq(ais_msg->host.uname, local_uname)) { route_ais_message(ais_msg, FALSE); } else { ais_debug_3("Discarding Msg[%d] (dest=%s:%s, from=%s:%s)", ais_msg->id, ais_dest(&(ais_msg->host)), msg_type2text(ais_msg->host.type), ais_dest(&(ais_msg->sender)), msg_type2text(ais_msg->sender.type)); } } void pcmk_cluster_id_swab(void *msg) { struct crm_identify_msg_s *ais_msg = msg; ais_debug_3("Performing endian conversion..."); ais_msg->id = swab32 (ais_msg->id); ais_msg->pid = swab32 (ais_msg->pid); ais_msg->votes = swab32 (ais_msg->votes); ais_msg->processes = swab32 (ais_msg->processes); ais_msg->born_on = swab64 (ais_msg->born_on); ais_msg->header.size = swab32 (ais_msg->header.size); ais_msg->header.id = swab32 (ais_msg->header.id); } void pcmk_cluster_id_callback (ais_void_ptr *message, unsigned int nodeid) { int changed = 0; const struct crm_identify_msg_s *msg = message; if(nodeid != msg->id) { ais_err("Invalid message: Node %u claimed to be node %d", nodeid, msg->id); return; } ais_debug("Node update: %s (%s)", msg->uname, msg->version); changed = update_member( nodeid, msg->born_on, membership_seq, msg->votes, msg->processes, msg->uname, NULL, msg->version); if(changed) { send_member_notification(); } } struct res_overlay { coroipc_response_header_t header __attribute((aligned(8))); char buf[4096]; }; struct res_overlay *res_overlay = NULL; static void send_ipc_ack(void *conn) { if(res_overlay == NULL) { ais_malloc0(res_overlay, sizeof(struct res_overlay)); } res_overlay->header.id = CRM_MESSAGE_IPC_ACK; res_overlay->header.size = sizeof (coroipc_response_header_t); res_overlay->header.error = CS_OK; pcmk_api->ipc_response_send (conn, res_overlay, res_overlay->header.size); } /* local callbacks */ void pcmk_ipc(void *conn, ais_void_ptr *msg) { AIS_Message *mutable; int type = 0, size = 0; gboolean transient = TRUE; const AIS_Message *ais_msg = (const AIS_Message*)msg; void *async_conn = conn; ais_debug_2("Message from client %p", conn); if(check_message_sanity(msg, ((const AIS_Message*)msg)->data) == FALSE) { /* The message is corrupted - ignore */ send_ipc_ack(conn); msg = NULL; return; } /* Make a copy of the message here and ACK it * The message is only valid until a response is sent * but the response must also be sent _before_ we send anything else */ mutable = ais_msg_copy(ais_msg); AIS_ASSERT(check_message_sanity(mutable, mutable->data)); size = mutable->header.size; /* ais_malloc0(ais_msg, size); */ /* memcpy(ais_msg, msg, size); */ type = mutable->sender.type; ais_debug_3("type: %d local: %d conn: %p host type: %d ais: %d sender pid: %d child pid: %d size: %d", type, mutable->host.local, pcmk_children[type].conn, mutable->host.type, crm_msg_ais, mutable->sender.pid, pcmk_children[type].pid, ((int)SIZEOF(pcmk_children))); if(type > crm_msg_none && type < SIZEOF(pcmk_children)) { /* known child process */ transient = FALSE; } +#if 0 /* If this check fails, the order of pcmk_children probably * doesn't match that of the crm_ais_msg_types enum */ AIS_CHECK(transient || mutable->sender.pid == pcmk_children[type].pid, ais_err("Sender: %d, child[%d]: %d", mutable->sender.pid, type, pcmk_children[type].pid); ais_free(mutable); return); +#endif if(transient == FALSE && type > crm_msg_none && mutable->host.local && pcmk_children[type].conn == NULL && mutable->host.type == crm_msg_ais) { AIS_CHECK(mutable->sender.type != mutable->sender.pid, ais_err("Pid=%d, type=%d", mutable->sender.pid, mutable->sender.type)); ais_info("Recorded connection %p for %s/%d", conn, pcmk_children[type].name, pcmk_children[type].pid); pcmk_children[type].conn = conn; pcmk_children[type].async_conn = async_conn; /* Make sure they have the latest membership */ if(pcmk_children[type].flags & crm_flag_members) { char *update = pcmk_generate_membership_data(); g_hash_table_replace(membership_notify_list, async_conn, async_conn); ais_info("Sending membership update "U64T" to %s", membership_seq, pcmk_children[type].name); send_client_msg(async_conn, crm_class_members, crm_msg_none,update); } } else if(transient) { AIS_CHECK(mutable->sender.type == mutable->sender.pid, ais_err("Pid=%d, type=%d", mutable->sender.pid, mutable->sender.type)); g_hash_table_replace(ipc_client_list, async_conn, GUINT_TO_POINTER(mutable->sender.pid)); } mutable->sender.id = local_nodeid; mutable->sender.size = local_uname_len; memset(mutable->sender.uname, 0, MAX_NAME); memcpy(mutable->sender.uname, local_uname, mutable->sender.size); route_ais_message(mutable, TRUE); send_ipc_ack(conn); msg = NULL; ais_free(mutable); } int pcmk_shutdown (void) { int lpc = 0; static int phase = 0; static int max_wait = 0; static time_t next_log = 0; static int max = SIZEOF(pcmk_children); + + if(use_mcp) { + if(pcmk_children[crm_msg_crmd].conn || pcmk_children[crm_msg_stonith_ng].conn) { + time_t now = time(NULL); + if(now > next_log) { + next_log = now + 300; + ais_notice("Preventing Corosync shutdown. Please ensure Pacemaker is stopped first."); + } + return -1; + } + ais_notice("Unloading Pacemaker plugin"); + return 0; + } if(phase == 0) { ais_notice("Shuting down Pacemaker"); phase = max; } wait_active = FALSE; /* stop the wait loop */ for (; phase > 0; phase--) { /* dont stop anything with start_seq < 1 */ for (lpc = max - 1; lpc >= 0; lpc--) { if(phase != pcmk_children[lpc].start_seq) { continue; } if(pcmk_children[lpc].pid) { pid_t pid = 0; int status = 0; time_t now = time(NULL); if(pcmk_children[lpc].respawn) { max_wait = 5; /* 5 * 30s = 2.5 minutes... plenty once the crmd is gone */ next_log = now + 30; pcmk_children[lpc].respawn = FALSE; stop_child(&(pcmk_children[lpc]), SIGTERM); } pid = wait4(pcmk_children[lpc].pid, &status, WNOHANG, NULL); if(pid < 0) { ais_perror("Call to wait4(%s/%d) failed - treating it as stopped", pcmk_children[lpc].name, pcmk_children[lpc].pid); } else if(pid == 0) { if(now >= next_log) { max_wait--; next_log = now + 30; ais_notice("Still waiting for %s (pid=%d, seq=%d) to terminate...", pcmk_children[lpc].name, pcmk_children[lpc].pid, pcmk_children[lpc].start_seq); if(max_wait <= 0 && phase < pcmk_children[crm_msg_crmd].start_seq) { ais_err("Child %s taking too long to terminate, sending SIGKILL", pcmk_children[lpc].name); stop_child(&(pcmk_children[lpc]), SIGKILL); } } /* Return control to corosync */ return -1; } } /* cleanup */ ais_notice("%s confirmed stopped", pcmk_children[lpc].name); pcmk_children[lpc].async_conn = NULL; pcmk_children[lpc].conn = NULL; pcmk_children[lpc].pid = 0; } } send_cluster_id(); ais_notice("Shutdown complete"); /* TODO: Add back the logsys flush call once its written */ return 0; } struct member_loop_data { char *string; }; void member_vote_count_fn(gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; if (ais_str_eq(CRM_NODE_MEMBER, node->state)) { plugin_has_votes += node->votes; } } void member_loop_fn(gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; struct member_loop_data *data = user_data; ais_debug_2("Dumping node %u", node->id); data->string = append_member(data->string, node); } char *pcmk_generate_membership_data(void) { int size = 0; struct member_loop_data data; size = 256; ais_malloc0(data.string, size); /* Ensure the list of active processes is up-to-date */ update_member(local_nodeid, 0, 0, -1, get_process_list(), local_uname, CRM_NODE_MEMBER, NULL); plugin_has_votes = 0; g_hash_table_foreach(membership_list, member_vote_count_fn, NULL); if(plugin_has_votes > plugin_expected_votes) { update_expected_votes(plugin_has_votes); } snprintf(data.string, size, "", membership_seq, plugin_has_quorum()?"true":"false", plugin_expected_votes, plugin_has_votes); g_hash_table_foreach(membership_list, member_loop_fn, &data); size = strlen(data.string); data.string = realloc(data.string, size + 9) ;/* 9 = + nul */ sprintf(data.string + size, ""); return data.string; } void pcmk_nodes(void *conn, ais_void_ptr *msg) { char *data = pcmk_generate_membership_data(); void *async_conn = conn; /* send the ACK before we send any other messages * - but after we no longer need to access the message */ send_ipc_ack(conn); msg = NULL; if(async_conn) { send_client_msg(async_conn, crm_class_members, crm_msg_none, data); } ais_free(data); } void pcmk_remove_member(void *conn, ais_void_ptr *msg) { const AIS_Message *ais_msg = msg; char *data = get_ais_data(ais_msg); send_ipc_ack(conn); msg = NULL; if(data != NULL) { char *bcast = ais_concat("remove-peer", data, ':'); send_cluster_msg(crm_msg_ais, NULL, bcast); ais_info("Sent: %s", bcast); ais_free(bcast); } ais_free(data); } static void send_quorum_details(void *conn) { int size = 256; char *data = NULL; ais_malloc0(data, size); snprintf(data, size, "", membership_seq, plugin_has_quorum()?"true":"false", plugin_expected_votes, plugin_has_votes); send_client_msg(conn, crm_class_quorum, crm_msg_none, data); ais_free(data); } void pcmk_quorum(void *conn, ais_void_ptr *msg) { char *dummy = NULL; const AIS_Message *ais_msg = msg; char *data = get_ais_data(ais_msg); send_ipc_ack(conn); msg = NULL; /* Make sure the current number of votes is accurate */ dummy = pcmk_generate_membership_data(); ais_free(dummy); /* Calls without data just want the current quorum details */ if(data != NULL && strlen(data) > 0) { int value = ais_get_int(data, NULL); update_expected_votes(value); } send_quorum_details(conn); ais_free(data); } void pcmk_notify(void *conn, ais_void_ptr *msg) { const AIS_Message *ais_msg = msg; char *data = get_ais_data(ais_msg); void *async_conn = conn; int enable = 0; int sender = ais_msg->sender.pid; send_ipc_ack(conn); msg = NULL; if(ais_str_eq("true", data)) { enable = 1; } ais_info("%s node notifications for child %d (%p)", enable?"Enabling":"Disabling", sender, async_conn); if(enable) { g_hash_table_replace(membership_notify_list, async_conn, async_conn); } else { g_hash_table_remove(membership_notify_list, async_conn); } ais_free(data); } void pcmk_nodeid(void *conn, ais_void_ptr *msg) { static int counter = 0; struct crm_ais_nodeid_resp_s resp; ais_debug_2("Sending local nodeid: %d to %p[%d]", local_nodeid, conn, counter); resp.header.id = crm_class_nodeid; resp.header.size = sizeof (struct crm_ais_nodeid_resp_s); resp.header.error = CS_OK; resp.id = local_nodeid; resp.counter = counter++; memset(resp.uname, 0, MAX_NAME); memcpy(resp.uname, local_uname, local_uname_len); memset(resp.cname, 0, MAX_NAME); memcpy(resp.cname, local_cname, local_cname_len); pcmk_api->ipc_response_send (conn, &resp, resp.header.size); } static gboolean ghash_send_update(gpointer key, gpointer value, gpointer data) { if(send_client_msg(value, crm_class_members, crm_msg_none, data) != 0) { /* remove it */ return TRUE; } return FALSE; } void send_member_notification(void) { char *update = pcmk_generate_membership_data(); ais_info("Sending membership update "U64T" to %d children", membership_seq, g_hash_table_size(membership_notify_list)); g_hash_table_foreach_remove(membership_notify_list, ghash_send_update, update); ais_free(update); } gboolean check_message_sanity(const AIS_Message *msg, const char *data) { gboolean sane = TRUE; gboolean repaired = FALSE; int dest = msg->host.type; int tmp_size = msg->header.size - sizeof(AIS_Message); if(sane && msg->header.size == 0) { ais_err("Message with no size"); sane = FALSE; } if(sane && msg->header.error != CS_OK) { ais_err("Message header contains an error: %d", msg->header.error); sane = FALSE; } AIS_CHECK(msg->header.size > sizeof(AIS_Message), ais_err("Message %d size too small: %d < %zu", msg->header.id, msg->header.size, sizeof(AIS_Message)); return FALSE); if(sane && ais_data_len(msg) != tmp_size) { ais_warn("Message payload size is incorrect: expected %d, got %d", ais_data_len(msg), tmp_size); sane = TRUE; } if(sane && ais_data_len(msg) == 0) { ais_err("Message with no payload"); sane = FALSE; } if(sane && data && msg->is_compressed == FALSE) { int str_size = strlen(data) + 1; if(ais_data_len(msg) != str_size) { int lpc = 0; ais_err("Message payload is corrupted: expected %d bytes, got %d", ais_data_len(msg), str_size); sane = FALSE; for(lpc = (str_size - 10); lpc < msg->size; lpc++) { if(lpc < 0) { lpc = 0; } ais_debug_2("bad_data[%d]: %d / '%c'", lpc, data[lpc], data[lpc]); } } } if(sane == FALSE) { AIS_CHECK(sane, ais_err("Invalid message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size)); } else if(repaired) { ais_err("Repaired message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } else { ais_debug_3("Verified message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } return sane; } static int delivered_transient = 0; static void deliver_transient_msg(gpointer key, gpointer value, gpointer user_data) { int pid = GPOINTER_TO_INT(value); AIS_Message *mutable = user_data; if(pid == mutable->host.type) { int rc = send_client_ipc(key, mutable); delivered_transient++; ais_info("Sent message to %s.%d (rc=%d)", ais_dest(&(mutable->host)), pid, rc); if(rc != 0) { ais_warn("Sending message to %s.%d failed (rc=%d)", ais_dest(&(mutable->host)), pid, rc); log_ais_message(LOG_DEBUG, mutable); } } } gboolean route_ais_message(const AIS_Message *msg, gboolean local_origin) { int rc = 0; int dest = msg->host.type; const char *reason = "unknown"; AIS_Message *mutable = ais_msg_copy(msg); static int service_id = SERVICE_ID_MAKE(PCMK_SERVICE_ID, 0); ais_debug_3("Msg[%d] (dest=%s:%s, from=%s:%s.%d, remote=%s, size=%d)", mutable->id, ais_dest(&(mutable->host)), msg_type2text(dest), ais_dest(&(mutable->sender)), msg_type2text(mutable->sender.type), mutable->sender.pid, local_origin?"false":"true", ais_data_len((mutable))); if(local_origin == FALSE) { if(mutable->host.size == 0 || ais_str_eq(local_uname, mutable->host.uname)) { mutable->host.local = TRUE; } } if(check_message_sanity(mutable, mutable->data) == FALSE) { /* Dont send this message to anyone */ rc = 1; goto bail; } if(mutable->host.local) { void *conn = NULL; const char *lookup = NULL; if(dest == crm_msg_ais) { process_ais_message(mutable); goto bail; } else if(dest == crm_msg_lrmd) { /* lrmd messages are routed via the crm */ dest = crm_msg_crmd; } else if(dest == crm_msg_te) { /* te messages are routed via the crm */ dest = crm_msg_crmd; } else if(dest >= SIZEOF(pcmk_children)) { /* Transient client */ delivered_transient = 0; g_hash_table_foreach(ipc_client_list, deliver_transient_msg, mutable); if(delivered_transient) { ais_debug_2("Sent message to %d transient clients: %d", delivered_transient, dest); goto bail; } else { /* try the crmd */ ais_debug_2("Sending message to transient client %d via crmd", dest); dest = crm_msg_crmd; } } else if(dest == 0) { ais_err("Invalid destination: %d", dest); log_ais_message(LOG_ERR, mutable); log_printf(LOG_ERR, "%s", get_ais_data(mutable)); rc = 1; goto bail; } lookup = msg_type2text(dest); conn = pcmk_children[dest].async_conn; /* the cluster fails in weird and wonderfully obscure ways when this is not true */ AIS_ASSERT(ais_str_eq(lookup, pcmk_children[dest].name)); if(mutable->header.id == service_id) { mutable->header.id = 0; /* reset this back to zero for IPC messages */ } else if(mutable->header.id != 0) { ais_err("reset header id back to zero from %d", mutable->header.id); mutable->header.id = 0; /* reset this back to zero for IPC messages */ } reason = "ipc delivery failed"; rc = send_client_ipc(conn, mutable); } else if(local_origin) { /* forward to other hosts */ ais_debug_3("Forwarding to cluster"); reason = "cluster delivery failed"; rc = send_cluster_msg_raw(mutable); } if(rc != 0) { ais_warn("Sending message to %s.%s failed: %s (rc=%d)", ais_dest(&(mutable->host)), msg_type2text(dest), reason, rc); log_ais_message(LOG_DEBUG, mutable); } bail: ais_free(mutable); return rc==0?TRUE:FALSE; } int send_cluster_msg_raw(const AIS_Message *ais_msg) { int rc = 0; struct iovec iovec; static uint32_t msg_id = 0; AIS_Message *mutable = ais_msg_copy(ais_msg); AIS_ASSERT(local_nodeid != 0); AIS_ASSERT(ais_msg->header.size == (sizeof(AIS_Message) + ais_data_len(ais_msg))); if(mutable->id == 0) { msg_id++; AIS_CHECK(msg_id != 0 /* detect wrap-around */, msg_id++; ais_err("Message ID wrapped around")); mutable->id = msg_id; } mutable->header.error = CS_OK; mutable->header.id = SERVICE_ID_MAKE(PCMK_SERVICE_ID, 0); mutable->sender.id = local_nodeid; mutable->sender.size = local_uname_len; memset(mutable->sender.uname, 0, MAX_NAME); memcpy(mutable->sender.uname, local_uname, mutable->sender.size); iovec.iov_base = (char *)mutable; iovec.iov_len = mutable->header.size; ais_debug_3("Sending message (size=%u)", (unsigned int)iovec.iov_len); rc = pcmk_api->totem_mcast(&iovec, 1, TOTEMPG_SAFE); if(rc == 0 && mutable->is_compressed == FALSE) { ais_debug_2("Message sent: %.80s", mutable->data); } AIS_CHECK(rc == 0, ais_err("Message not sent (%d): %.120s", rc, mutable->data)); ais_free(mutable); return rc; } #define min(x,y) (x)<(y)?(x):(y) void send_cluster_id(void) { int rc = 0; int lpc = 0; int len = 0; time_t now = time(NULL); struct iovec iovec; struct crm_identify_msg_s *msg = NULL; static time_t started = 0; static uint64_t first_seq = 0; AIS_ASSERT(local_nodeid != 0); if(started == 0) { started = now; first_seq = membership_seq; } if(local_born_on == 0) { if(started + 15 < now) { ais_debug("Born-on set to: "U64T" (age)", first_seq); local_born_on = first_seq; } else if(have_reliable_membership_id) { ais_debug("Born-on set to: "U64T" (peer)", membership_seq); local_born_on = membership_seq; } else { ais_debug("Leaving born-on unset: "U64T, membership_seq); } } ais_malloc0(msg, sizeof(struct crm_identify_msg_s)); msg->header.size = sizeof(struct crm_identify_msg_s); msg->id = local_nodeid; /* msg->header.error = CS_OK; */ msg->header.id = SERVICE_ID_MAKE(PCMK_SERVICE_ID, 1); len = min(local_uname_len, MAX_NAME-1); memset(msg->uname, 0, MAX_NAME); memcpy(msg->uname, local_uname, len); len = min(strlen(VERSION), MAX_NAME-1); memset(msg->version, 0, MAX_NAME); memcpy(msg->version, VERSION, len); msg->votes = 1; msg->pid = getpid(); msg->processes = get_process_list(); msg->born_on = local_born_on; ais_debug("Local update: id=%u, born="U64T", seq="U64T"", local_nodeid, local_born_on, membership_seq); update_member( local_nodeid, local_born_on, membership_seq, msg->votes, msg->processes, NULL, NULL, VERSION); iovec.iov_base = (char *)msg; iovec.iov_len = msg->header.size; rc = pcmk_api->totem_mcast(&iovec, 1, TOTEMPG_SAFE); AIS_CHECK(rc == 0, ais_err("Message not sent (%d)", rc)); ais_free(msg); } static gboolean ghash_send_removal(gpointer key, gpointer value, gpointer data) { send_quorum_details(value); if(send_client_msg(value, crm_class_rmpeer, crm_msg_none, data) != 0) { /* remove it */ return TRUE; } return FALSE; } void ais_remove_peer(char *node_id) { uint32_t id = ais_get_int(node_id, NULL); crm_node_t *node = g_hash_table_lookup(membership_list, GUINT_TO_POINTER(id)); if(node == NULL) { ais_info("Peer %u is unknown", id); } else if(ais_str_eq(CRM_NODE_MEMBER, node->state)) { ais_warn("Peer %u/%s is still active", id, node->uname); } else if(g_hash_table_remove(membership_list, GUINT_TO_POINTER(id))) { plugin_expected_votes--; ais_notice("Removed dead peer %u from the membership list", id); ais_info("Sending removal of %u to %d children", id, g_hash_table_size(membership_notify_list)); g_hash_table_foreach_remove(membership_notify_list, ghash_send_removal, node_id); } else { ais_warn("Peer %u/%s was not removed", id, node->uname); } } gboolean process_ais_message(const AIS_Message *msg) { int len = ais_data_len(msg); char *data = get_ais_data(msg); do_ais_log(LOG_DEBUG, "Msg[%d] (dest=%s:%s, from=%s:%s.%d, remote=%s, size=%d): %.90s", msg->id, ais_dest(&(msg->host)), msg_type2text(msg->host.type), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->sender.uname==local_uname?"false":"true", ais_data_len(msg), data); if(data && len > 12 && strncmp("remove-peer:", data, 12) == 0) { char *node = data+12; ais_remove_peer(node); } ais_free(data); return TRUE; } static void member_dump_fn(gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; ais_info(" node id:%u, uname=%s state=%s processes=%.16x born="U64T" seen="U64T" addr=%s version=%s", node->id, node->uname?node->uname:"-unknown-", node->state, node->processes, node->born, node->last_seen, node->addr?node->addr:"-unknown-", node->version?node->version:"-unknown-"); } void pcmk_exec_dump(void) { /* Called after SIG_USR2 */ process_ais_conf(); ais_info("Local id: %u, uname: %s, born: "U64T, local_nodeid, local_uname, local_born_on); ais_info("Membership id: "U64T", quorate: %s, expected: %u, actual: %u", membership_seq, plugin_has_quorum()?"true":"false", plugin_expected_votes, plugin_has_votes); g_hash_table_foreach(membership_list, member_dump_fn, NULL); } diff --git a/lib/common/ais.c b/lib/common/ais.c index e5d00f68cb..5d7c4acc36 100644 --- a/lib/common/ais.c +++ b/lib/common/ais.c @@ -1,1204 +1,1204 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include "stack.h" #ifdef SUPPORT_COROSYNC # include #endif #ifdef SUPPORT_CMAN # include cman_handle_t pcmk_cman_handle = NULL; #endif #ifdef SUPPORT_CS_QUORUM # include # include # include # include # include quorum_handle_t pcmk_quorum_handle = 0; cpg_handle_t pcmk_cpg_handle = 0; struct cpg_name pcmk_cpg_group = { .length = 0, .value[0] = 0, }; #endif static char *pcmk_uname = NULL; static int pcmk_uname_len = 0; static uint32_t pcmk_nodeid = 0; enum crm_ais_msg_types text2msg_type(const char *text) { int type = crm_msg_none; CRM_CHECK(text != NULL, return type); if(safe_str_eq(text, "ais")) { type = crm_msg_ais; } else if(safe_str_eq(text, "crm_plugin")) { type = crm_msg_ais; } else if(safe_str_eq(text, CRM_SYSTEM_CIB)) { type = crm_msg_cib; } else if(safe_str_eq(text, CRM_SYSTEM_CRMD)) { type = crm_msg_crmd; } else if(safe_str_eq(text, CRM_SYSTEM_DC)) { type = crm_msg_crmd; } else if(safe_str_eq(text, CRM_SYSTEM_TENGINE)) { type = crm_msg_te; } else if(safe_str_eq(text, CRM_SYSTEM_PENGINE)) { type = crm_msg_pe; } else if(safe_str_eq(text, CRM_SYSTEM_LRMD)) { type = crm_msg_lrmd; } else if(safe_str_eq(text, CRM_SYSTEM_STONITHD)) { type = crm_msg_stonithd; } else if(safe_str_eq(text, "stonith-ng")) { type = crm_msg_stonith_ng; } else if(safe_str_eq(text, "attrd")) { type = crm_msg_attrd; } else { /* This will normally be a transient client rather than * a cluster daemon. Set the type to the pid of the client */ int scan_rc = sscanf(text, "%d", &type); if(scan_rc != 1) { /* Ensure its sane */ type = crm_msg_none; } } return type; } char *get_ais_data(const AIS_Message *msg) { int rc = BZ_OK; char *uncompressed = NULL; unsigned int new_size = msg->size + 1; if(msg->is_compressed == FALSE) { crm_debug_2("Returning uncompressed message data"); uncompressed = strdup(msg->data); } else { crm_debug_2("Decompressing message data"); crm_malloc0(uncompressed, new_size); rc = BZ2_bzBuffToBuffDecompress( uncompressed, &new_size, (char*)msg->data, msg->compressed_size, 1, 0); CRM_ASSERT(rc == BZ_OK); CRM_ASSERT(new_size == msg->size); } return uncompressed; } #if SUPPORT_COROSYNC int ais_fd_sync = -1; int ais_fd_async = -1; /* never send messages via this channel */ void *ais_ipc_ctx = NULL; hdb_handle_t ais_ipc_handle = 0; GFDSource *ais_source = NULL; GFDSource *ais_source_sync = NULL; GFDSource *cman_source = NULL; GFDSource *cpg_source = NULL; GFDSource *quorumd_source = NULL; static char *ais_cluster_name = NULL; gboolean get_ais_nodeid(uint32_t *id, char **uname) { struct iovec iov; int retries = 0; int rc = CS_OK; coroipc_response_header_t header; struct crm_ais_nodeid_resp_s answer; header.error = CS_OK; header.id = crm_class_nodeid; header.size = sizeof(coroipc_response_header_t); CRM_CHECK(id != NULL, return FALSE); CRM_CHECK(uname != NULL, return FALSE); iov.iov_base = &header; iov.iov_len = header.size; retry: errno = 0; rc = coroipcc_msg_send_reply_receive( ais_ipc_handle, &iov, 1, &answer, sizeof (answer)); if(rc == CS_OK) { CRM_CHECK(answer.header.size == sizeof (struct crm_ais_nodeid_resp_s), crm_err("Odd message: id=%d, size=%d, error=%d", answer.header.id, answer.header.size, answer.header.error)); CRM_CHECK(answer.header.id == crm_class_nodeid, crm_err("Bad response id: %d", answer.header.id)); } if(rc == CS_ERR_TRY_AGAIN && retries < 20) { retries++; crm_info("Peer overloaded: Re-sending message (Attempt %d of 20)", retries); sleep(retries); /* Proportional back off */ goto retry; } if(rc != CS_OK) { crm_err("Sending nodeid request: FAILED (rc=%d): %s", rc, ais_error2text(rc)); return FALSE; } else if(answer.header.error != CS_OK) { crm_err("Bad response from peer: (rc=%d): %s", rc, ais_error2text(rc)); return FALSE; } crm_info("Server details: id=%u uname=%s cname=%s", answer.id, answer.uname, answer.cname); *id = answer.id; *uname = crm_strdup(answer.uname); ais_cluster_name = crm_strdup(answer.cname); return TRUE; } gboolean crm_get_cluster_name(char **cname) { CRM_CHECK(cname != NULL, return FALSE); if(ais_cluster_name) { *cname = crm_strdup(ais_cluster_name); return TRUE; } return FALSE; } gboolean send_ais_text(int class, const char *data, gboolean local, const char *node, enum crm_ais_msg_types dest) { static int msg_id = 0; static int local_pid = 0; enum cluster_type_e cluster_type = get_cluster_type(); int retries = 0; int rc = CS_OK; int buf_len = sizeof(coroipc_response_header_t); char *buf = NULL; struct iovec iov; const char *transport = "pcmk"; coroipc_response_header_t *header = NULL; AIS_Message *ais_msg = NULL; enum crm_ais_msg_types sender = text2msg_type(crm_system_name); /* There are only 6 handlers registered to crm_lib_service in plugin.c */ CRM_CHECK(class < 6, crm_err("Invalid message class: %d", class); return FALSE); if(data == NULL) { data = ""; } if(local_pid == 0) { local_pid = getpid(); } if(sender == crm_msg_none) { sender = local_pid; } crm_malloc0(ais_msg, sizeof(AIS_Message)); ais_msg->id = msg_id++; ais_msg->header.id = class; ais_msg->header.error = CS_OK; ais_msg->host.type = dest; ais_msg->host.local = local; if(node) { ais_msg->host.size = strlen(node); memset(ais_msg->host.uname, 0, MAX_NAME); memcpy(ais_msg->host.uname, node, ais_msg->host.size); ais_msg->host.id = 0; } else { ais_msg->host.size = 0; memset(ais_msg->host.uname, 0, MAX_NAME); ais_msg->host.id = 0; } ais_msg->sender.id = 0; ais_msg->sender.type = sender; ais_msg->sender.pid = local_pid; ais_msg->sender.size = pcmk_uname_len; memset(ais_msg->sender.uname, 0, MAX_NAME); memcpy(ais_msg->sender.uname, pcmk_uname, ais_msg->sender.size); ais_msg->size = 1 + strlen(data); if(ais_msg->size < CRM_BZ2_THRESHOLD) { failback: crm_realloc(ais_msg, sizeof(AIS_Message) + ais_msg->size); memcpy(ais_msg->data, data, ais_msg->size); } else { char *compressed = NULL; char *uncompressed = crm_strdup(data); unsigned int len = (ais_msg->size * 1.1) + 600; /* recomended size */ crm_debug_5("Compressing message payload"); crm_malloc(compressed, len); rc = BZ2_bzBuffToBuffCompress( compressed, &len, uncompressed, ais_msg->size, CRM_BZ2_BLOCKS, 0, CRM_BZ2_WORK); crm_free(uncompressed); if(rc != BZ_OK) { crm_err("Compression failed: %d", rc); crm_free(compressed); goto failback; } crm_realloc(ais_msg, sizeof(AIS_Message) + len + 1); memcpy(ais_msg->data, compressed, len); ais_msg->data[len] = 0; crm_free(compressed); ais_msg->is_compressed = TRUE; ais_msg->compressed_size = len; crm_debug_2("Compression details: %d -> %d", ais_msg->size, ais_data_len(ais_msg)); } ais_msg->header.size = sizeof(AIS_Message) + ais_data_len(ais_msg); crm_debug_3("Sending%s message %d to %s.%s (data=%d, total=%d)", ais_msg->is_compressed?" compressed":"", ais_msg->id, ais_dest(&(ais_msg->host)), msg_type2text(dest), ais_data_len(ais_msg), ais_msg->header.size); iov.iov_base = ais_msg; iov.iov_len = ais_msg->header.size; crm_realloc(buf, buf_len); do { if(rc == CS_ERR_TRY_AGAIN) { retries++; crm_info("Peer overloaded or membership in flux:" " Re-sending message (Attempt %d of 20)", retries); sleep(retries); /* Proportional back off */ } errno = 0; switch(cluster_type) { case pcmk_cluster_classic_ais: rc = coroipcc_msg_send_reply_receive(ais_ipc_handle, &iov, 1, buf, buf_len); header = (coroipc_response_header_t *)buf; if(rc == CS_OK) { CRM_CHECK_AND_STORE(header->size == sizeof (coroipc_response_header_t), crm_err("Odd message: id=%d, size=%d, class=%d, error=%d", header->id, header->size, class, header->error)); CRM_ASSERT(buf_len >= header->size); CRM_CHECK_AND_STORE(header->id == CRM_MESSAGE_IPC_ACK, crm_err("Bad response id (%d) for request (%d)", header->id, ais_msg->header.id)); CRM_CHECK(header->error == CS_OK, rc = header->error); } break; case pcmk_cluster_corosync: case pcmk_cluster_cman: transport = "cpg"; CRM_CHECK_AND_STORE(dest != crm_msg_ais, rc = CS_ERR_MESSAGE_ERROR; goto bail); rc = cpg_mcast_joined(pcmk_cpg_handle, CPG_TYPE_AGREED, &iov, 1); break; case pcmk_cluster_unknown: case pcmk_cluster_invalid: case pcmk_cluster_heartbeat: CRM_ASSERT(is_openais_cluster()); break; } } while (rc == CS_ERR_TRY_AGAIN && retries < 20); bail: if(rc != CS_OK) { crm_perror(LOG_ERR,"Sending message %d via %s: FAILED (rc=%d): %s", ais_msg->id, transport, rc, ais_error2text(rc)); } else { crm_debug_4("Message %d: sent", ais_msg->id); } crm_free(buf); crm_free(ais_msg); return (rc == CS_OK); } gboolean send_ais_message(xmlNode *msg, gboolean local, const char *node, enum crm_ais_msg_types dest) { gboolean rc = TRUE; char *data = NULL; if(is_classic_ais_cluster()) { if(ais_fd_async < 0 || ais_source == NULL) { crm_err("Not connected to AIS: %d %p", ais_fd_async, ais_source); return FALSE; } } data = dump_xml_unformatted(msg); rc = send_ais_text(0, data, local, node, dest); crm_free(data); return rc; } void terminate_ais_connection(void) { crm_notice("Disconnecting from AIS"); /* G_main_del_fd(ais_source); */ /* G_main_del_fd(ais_source_sync); */ #ifdef SUPPORT_CMAN if(is_cman_cluster()) { cman_stop_notification(pcmk_cman_handle); cman_finish(pcmk_cman_handle); } #endif if(is_corosync_cluster()) { quorum_finalize(pcmk_quorum_handle); } if(is_classic_ais_cluster() == FALSE) { coroipcc_service_disconnect(ais_ipc_handle); } else { cpg_leave(pcmk_cpg_handle, &pcmk_cpg_group); } } int ais_membership_timer = 0; gboolean ais_membership_force = FALSE; static gboolean ais_dispatch_message( AIS_Message *msg, gboolean (*dispatch)(AIS_Message*,char*,int)) { char *data = NULL; char *uncompressed = NULL; xmlNode *xml = NULL; CRM_ASSERT(msg != NULL); crm_debug_3("Got new%s message (size=%d, %d, %d)", msg->is_compressed?" compressed":"", ais_data_len(msg), msg->size, msg->compressed_size); data = msg->data; if(msg->is_compressed && msg->size > 0) { int rc = BZ_OK; unsigned int new_size = msg->size + 1; if(check_message_sanity(msg, NULL) == FALSE) { goto badmsg; } crm_debug_5("Decompressing message data"); crm_malloc0(uncompressed, new_size); rc = BZ2_bzBuffToBuffDecompress( uncompressed, &new_size, data, msg->compressed_size, 1, 0); if(rc != BZ_OK) { crm_err("Decompression failed: %d", rc); goto badmsg; } CRM_ASSERT(rc == BZ_OK); CRM_ASSERT(new_size == msg->size); data = uncompressed; } else if(check_message_sanity(msg, data) == FALSE) { goto badmsg; } else if(safe_str_eq("identify", data)) { int pid = getpid(); char *pid_s = crm_itoa(pid); send_ais_text(0, pid_s, TRUE, NULL, crm_msg_ais); crm_free(pid_s); goto done; } if(msg->header.id != crm_class_members) { crm_update_peer(msg->sender.id, 0,0,0,0, msg->sender.uname, msg->sender.uname, NULL, NULL); } if(msg->header.id == crm_class_rmpeer) { uint32_t id = crm_int_helper(data, NULL); crm_info("Removing peer %s/%u", data, id); reap_crm_member(id); goto done; } else if(msg->header.id == crm_class_members || msg->header.id == crm_class_quorum) { xml = string2xml(data); if(xml == NULL) { crm_err("Invalid membership update: %s", data); goto badmsg; } if(is_classic_ais_cluster() == FALSE) { xml_child_iter(xml, node, crm_update_cman_node(node, crm_peer_seq)); } else { const char *value = NULL; gboolean quorate = FALSE; value = crm_element_value(xml, "quorate"); CRM_CHECK(value != NULL, crm_log_xml_err(xml, "No quorum value:"); goto badmsg); if(crm_is_true(value)) { quorate = TRUE; } value = crm_element_value(xml, "id"); CRM_CHECK(value != NULL, crm_log_xml_err(xml, "No membership id"); goto badmsg); crm_peer_seq = crm_int_helper(value, NULL); if(quorate != crm_have_quorum) { crm_notice("Membership %s: quorum %s", value, quorate?"acquired":"lost"); crm_have_quorum = quorate; } else { crm_info("Membership %s: quorum %s", value, quorate?"retained":"still lost"); } xml_child_iter(xml, node, crm_update_ais_node(node, crm_peer_seq)); } } if(dispatch != NULL) { dispatch(msg, data, 0); } done: crm_free(uncompressed); free_xml(xml); return TRUE; badmsg: crm_err("Invalid message (id=%d, dest=%s:%s, from=%s:%s.%d):" " min=%d, total=%d, size=%d, bz2_size=%d", msg->id, ais_dest(&(msg->host)), msg_type2text(msg->host.type), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, (int)sizeof(AIS_Message), msg->header.size, msg->size, msg->compressed_size); goto done; } gboolean ais_dispatch(int sender, gpointer user_data) { int rc = CS_OK; char *buffer = NULL; gboolean good = TRUE; gboolean (*dispatch)(AIS_Message*,char*,int) = user_data; rc = coroipcc_dispatch_get (ais_ipc_handle, (void**)&buffer, 0); if (rc == 0 || buffer == NULL) { /* Zero is a legal "no message afterall" value */ return TRUE; } else if (rc != CS_OK) { crm_perror(LOG_ERR,"Receiving message body failed: (%d) %s", rc, ais_error2text(rc)); goto bail; } good = ais_dispatch_message((AIS_Message*)buffer, dispatch); coroipcc_dispatch_put (ais_ipc_handle); return good; bail: crm_err("AIS connection failed"); return FALSE; } static void ais_destroy(gpointer user_data) { crm_err("AIS connection terminated"); ais_fd_sync = -1; exit(1); } #ifdef SUPPORT_CMAN static gboolean pcmk_cman_dispatch(int sender, gpointer user_data) { int rc = cman_dispatch(pcmk_cman_handle, CMAN_DISPATCH_ONE); if(rc < 0) { crm_err("Connection to cman failed: %d", rc); return FALSE; } return TRUE; } static char *append_cman_member(char *data, cman_node_t *node, uint32_t seq) { int size = 1; /* nul */ int offset = 0; static int fixed_len = 4 + 8 + 7 + 6 + 6 + 7 + 11; if(data) { size = strlen(data); } offset = size; size += fixed_len; size += 32; /* node->id */ size += 100; /* node->seq, node->born */ size += strlen(CRM_NODE_MEMBER); if(node->cn_name) { size += (7 + strlen(node->cn_name)); } data = realloc(data, size); offset += snprintf(data + offset, size - offset, "cn_nodeid); if(node->cn_name) { offset += snprintf(data + offset, size - offset, "uname=\"%s\" ", node->cn_name); } offset += snprintf(data + offset, size - offset, "state=\"%s\" ", node->cn_member?CRM_NODE_MEMBER:CRM_NODE_LOST); offset += snprintf(data + offset, size - offset, "born=\"%u\" ", node->cn_incarnation); offset += snprintf(data + offset, size - offset, "seen=\"%d\" ", seq); offset += snprintf(data + offset, size - offset, "/>"); return data; } #define MAX_NODES 256 static void cman_event_callback(cman_handle_t handle, void *privdata, int reason, int arg) { char *payload = NULL; int rc = 0, lpc = 0, size = 256, node_count = 0; cman_cluster_t cluster; static cman_node_t cman_nodes[MAX_NODES]; gboolean (*dispatch)(AIS_Message*,char*,int) = privdata; switch (reason) { case CMAN_REASON_STATECHANGE: memset(&cluster, 0, sizeof(cluster)); rc = cman_get_cluster(pcmk_cman_handle, &cluster); if (rc < 0) { crm_err("Couldn't query cman cluster details: %d %d", rc, errno); return; } if(arg != crm_have_quorum) { crm_notice("Membership %d: quorum %s", cluster.ci_generation, arg?"acquired":"lost"); crm_have_quorum = arg; } else { crm_info("Membership %d: quorum %s", cluster.ci_generation, arg?"retained":"still lost"); } rc = cman_get_nodes(pcmk_cman_handle, MAX_NODES, &node_count, cman_nodes); if (rc < 0) { crm_err("Couldn't query cman node list: %d %d", rc, errno); return; } crm_malloc0(payload, size); snprintf(payload, size, "", cluster.ci_generation, arg?"true":"false"); for (lpc = 0; lpc < node_count; lpc++) { if (cman_nodes[lpc].cn_nodeid == 0) { /* Never allow node ID 0 to be considered a member #315711 */ cman_nodes[lpc].cn_member = 0; break; } crm_update_peer(cman_nodes[lpc].cn_nodeid, cman_nodes[lpc].cn_incarnation, cluster.ci_generation, 0, 0, cman_nodes[lpc].cn_name, cman_nodes[lpc].cn_name, NULL, cman_nodes[lpc].cn_member?CRM_NODE_MEMBER:CRM_NODE_LOST); if (dispatch && cman_nodes[lpc].cn_member) { payload = append_cman_member(payload, &(cman_nodes[lpc]), cluster.ci_generation); } } if(dispatch) { AIS_Message ais_msg; memset(&ais_msg, 0, sizeof(AIS_Message)); ais_msg.header.id = crm_class_members; ais_msg.header.error = CS_OK; ais_msg.header.size = sizeof(AIS_Message); ais_msg.host.type = crm_msg_none; ais_msg.sender.type = crm_msg_ais; size = strlen(payload); payload = realloc(payload, size + 9) ;/* 9 = + nul */ sprintf(payload + size, ""); /* ais_msg.data = payload; */ ais_msg.size = size + 9; /* ais_msg.header.size += ais_msg.size; */ dispatch(&ais_msg, payload, 0); } crm_free(payload); break; case CMAN_REASON_TRY_SHUTDOWN: /* Always reply with a negative - pacemaker needs to be stopped first */ crm_info("CMAN wants to shut down: %s", arg?"forced":"optional"); cman_replyto_shutdown(pcmk_cman_handle, 0); break; case CMAN_REASON_CONFIG_UPDATE: /* Ignore */ break; } } #endif gboolean init_cman_connection( gboolean (*dispatch)(AIS_Message*,char*,int), void (*destroy)(gpointer)) { #ifdef SUPPORT_CMAN int rc = -1, fd = -1; cman_cluster_t cluster; crm_info("Configuring Pacemaker to obtain quorum from cman"); memset(&cluster, 0, sizeof(cluster)); pcmk_cman_handle = cman_init(dispatch); if(pcmk_cman_handle == NULL || cman_is_active(pcmk_cman_handle) == FALSE) { crm_err("Couldn't connect to cman"); goto cman_bail; } rc = cman_get_cluster(pcmk_cman_handle, &cluster); if (rc < 0) { crm_err("Couldn't query cman cluster details: %d %d", rc, errno); goto cman_bail; } ais_cluster_name = crm_strdup(cluster.ci_name); rc = cman_start_notification(pcmk_cman_handle, cman_event_callback); if (rc < 0) { crm_err("Couldn't register for cman notifications: %d %d", rc, errno); goto cman_bail; } /* Get the current membership state */ cman_event_callback(pcmk_cman_handle, dispatch, CMAN_REASON_STATECHANGE, cman_is_quorate(pcmk_cman_handle)); fd = cman_get_fd(pcmk_cman_handle); crm_debug("Adding fd=%d to mainloop", fd); cman_source = G_main_add_fd( G_PRIORITY_HIGH, fd, FALSE, pcmk_cman_dispatch, dispatch, destroy); cman_bail: if (rc < 0) { cman_finish(pcmk_cman_handle); return FALSE; } #else crm_err("cman qorum is not supported in this build"); exit(100); #endif return TRUE; } #ifdef SUPPORT_CS_QUORUM gboolean (*pcmk_cpg_dispatch_fn)(AIS_Message*,char*,int) = NULL; static gboolean pcmk_cpg_dispatch(int sender, gpointer user_data) { int rc = 0; pcmk_cpg_dispatch_fn = user_data; rc = cpg_dispatch(pcmk_cpg_handle, CS_DISPATCH_ALL); if(rc != CS_OK) { crm_err("Connection to the CPG API failed: %d", rc); return FALSE; } return TRUE; } static void pcmk_cpg_deliver ( cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { AIS_Message *ais_msg = (AIS_Message*)msg; if(ais_msg->sender.id > 0 && ais_msg->sender.id != nodeid) { crm_err("Nodeid mismatch from %d.%d: claimed nodeid=%u", nodeid, pid, ais_msg->sender.id); return; } else if(ais_msg->host.size != 0 && safe_str_neq(ais_msg->host.uname, pcmk_uname)) { /* Not for us */ return; } ais_msg->sender.id = nodeid; if(ais_msg->sender.size == 0) { crm_node_t *peer = crm_get_peer(nodeid, NULL); if(peer == NULL) { crm_err("Peer with nodeid=%u is unknown", nodeid); } else if(peer->uname == NULL) { crm_err("No uname for peer with nodeid=%u", nodeid); } else { crm_notice("Fixing uname for peer with nodeid=%u", nodeid); ais_msg->sender.size = strlen(peer->uname); memset(ais_msg->sender.uname, 0, MAX_NAME); memcpy(ais_msg->sender.uname, peer->uname, ais_msg->sender.size); } } ais_dispatch_message(ais_msg, pcmk_cpg_dispatch_fn); } static void pcmk_cpg_membership( cpg_handle_t handle, const struct cpg_name *groupName, const struct cpg_address *member_list, size_t member_list_entries, const struct cpg_address *left_list, size_t left_list_entries, const struct cpg_address *joined_list, size_t joined_list_entries) { /* Don't care about CPG membership */ } static gboolean pcmk_quorum_dispatch(int sender, gpointer user_data) { int rc = 0; rc = quorum_dispatch(pcmk_quorum_handle, CS_DISPATCH_ALL); if(rc < 0) { crm_err("Connection to the Quorum API failed: %d", rc); return FALSE; } return TRUE; } static void pcmk_quorum_notification( quorum_handle_t handle, uint32_t quorate, uint64_t ring_id, uint32_t view_list_entries, uint32_t *view_list) { int i; if(quorate != crm_have_quorum) { crm_notice("Membership "U64T": quorum %s (%lu)", ring_id, quorate?"acquired":"lost", (long unsigned int)view_list_entries); crm_have_quorum = quorate; } else { crm_info("Membership "U64T": quorum %s (%lu)", ring_id, quorate?"retained":"still lost", (long unsigned int)view_list_entries); } for (i=0; ihost.type; int tmp_size = msg->header.size - sizeof(AIS_Message); if(sane && msg->header.size == 0) { crm_warn("Message with no size"); sane = FALSE; } if(sane && msg->header.error != CS_OK) { crm_warn("Message header contains an error: %d", msg->header.error); sane = FALSE; } if(sane && ais_data_len(msg) != tmp_size) { crm_warn("Message payload size is incorrect: expected %d, got %d", ais_data_len(msg), tmp_size); sane = TRUE; } if(sane && ais_data_len(msg) == 0) { crm_warn("Message with no payload"); sane = FALSE; } if(sane && data && msg->is_compressed == FALSE) { int str_size = strlen(data) + 1; if(ais_data_len(msg) != str_size) { int lpc = 0; crm_warn("Message payload is corrupted: expected %d bytes, got %d", ais_data_len(msg), str_size); sane = FALSE; for(lpc = (str_size - 10); lpc < msg->size; lpc++) { if(lpc < 0) { lpc = 0; } crm_debug("bad_data[%d]: %d / '%c'", lpc, data[lpc], data[lpc]); } } } if(sane == FALSE) { crm_err("Invalid message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } else if(repaired) { crm_err("Repaired message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } else { crm_debug_3("Verfied message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } return sane; } #endif diff --git a/mcp/corosync.c b/mcp/corosync.c index e2e702bb98..4e381ac907 100644 --- a/mcp/corosync.c +++ b/mcp/corosync.c @@ -1,613 +1,635 @@ /* * Copyright (C) 2010 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #ifdef SUPPORT_CMAN #include #endif static struct cpg_name cpg_group = { .length = 0, .value[0] = 0, }; gboolean use_cman = FALSE; static cpg_handle_t cpg_handle; static corosync_cfg_handle_t cfg_handle; static corosync_cfg_state_notification_t cfg_buffer; static inline const char *ais_error2text(int error) { const char *text = "unknown"; switch(error) { case CS_OK: text = "None"; break; case CS_ERR_LIBRARY: text = "Library error"; break; case CS_ERR_VERSION: text = "Version error"; break; case CS_ERR_INIT: text = "Initialization error"; break; case CS_ERR_TIMEOUT: text = "Timeout"; break; case CS_ERR_TRY_AGAIN: text = "Try again"; break; case CS_ERR_INVALID_PARAM: text = "Invalid parameter"; break; case CS_ERR_NO_MEMORY: text = "No memory"; break; case CS_ERR_BAD_HANDLE: text = "Bad handle"; break; case CS_ERR_BUSY: text = "Busy"; break; case CS_ERR_ACCESS: text = "Access error"; break; case CS_ERR_NOT_EXIST: text = "Doesn't exist"; break; case CS_ERR_NAME_TOO_LONG: text = "Name too long"; break; case CS_ERR_EXIST: text = "Exists"; break; case CS_ERR_NO_SPACE: text = "No space"; break; case CS_ERR_INTERRUPT: text = "Interrupt"; break; case CS_ERR_NAME_NOT_FOUND: text = "Name not found"; break; case CS_ERR_NO_RESOURCES: text = "No resources"; break; case CS_ERR_NOT_SUPPORTED: text = "Not supported"; break; case CS_ERR_BAD_OPERATION: text = "Bad operation"; break; case CS_ERR_FAILED_OPERATION: text = "Failed operation"; break; case CS_ERR_MESSAGE_ERROR: text = "Message error"; break; case CS_ERR_QUEUE_FULL: text = "Queue full"; break; case CS_ERR_QUEUE_NOT_AVAILABLE: text = "Queue not available"; break; case CS_ERR_BAD_FLAGS: text = "Bad flags"; break; case CS_ERR_TOO_BIG: text = "To big"; break; case CS_ERR_NO_SECTIONS: text = "No sections"; break; } return text; } /* =::=::=::= CFG - Shutdown stuff =::=::=::= */ static void cfg_shutdown_callback(corosync_cfg_handle_t h, corosync_cfg_shutdown_flags_t flags) { if (flags & COROSYNC_CFG_SHUTDOWN_FLAG_REQUEST) { /* Never allow corosync to shut down while we're running */ crm_info("Corosync wants to shut down: %s", (flags&COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE)?"immediate": (flags&COROSYNC_CFG_SHUTDOWN_FLAG_REGARDLESS)?"forced":"optional"); corosync_cfg_replyto_shutdown(h, COROSYNC_CFG_SHUTDOWN_FLAG_NO); } } static corosync_cfg_callbacks_t cfg_callbacks = { .corosync_cfg_shutdown_callback = cfg_shutdown_callback, .corosync_cfg_state_track_callback = NULL, }; static gboolean pcmk_cfg_dispatch(int sender, gpointer user_data) { corosync_cfg_handle_t *handle = (corosync_cfg_handle_t*)user_data; cs_error_t rc = corosync_cfg_dispatch(*handle, CS_DISPATCH_ALL); if (rc != CS_OK) { return FALSE; } return TRUE; } static void cfg_connection_destroy(gpointer user_data) { crm_err("Connection destroyed"); cfg_handle = 0; return; } gboolean cluster_disconnect_cfg(void) { if(cfg_handle) { corosync_cfg_finalize(cfg_handle); cfg_handle = 0; } return TRUE; } gboolean cluster_connect_cfg(uint32_t *nodeid) { cs_error_t rc; int fd; rc = corosync_cfg_initialize(&cfg_handle, &cfg_callbacks); if (rc != CS_OK) { crm_err("corosync cfg init error %d", rc); return FALSE; } rc = corosync_cfg_fd_get(cfg_handle, &fd); if (rc != CS_OK) { crm_err("corosync cfg fd_get error %d", rc); goto bail; } rc = corosync_cfg_local_get(cfg_handle, nodeid); if (rc != CS_OK) { crm_err("corosync cfg local_get error %d", rc); goto bail; } crm_debug("Our nodeid: %d", *nodeid); rc = corosync_cfg_state_track (cfg_handle, 0, &cfg_buffer); if (rc != CS_OK) { crm_err("corosync cfg stack_track error %d", rc); goto bail; } crm_debug("Adding fd=%d to mainloop", fd); G_main_add_fd( G_PRIORITY_HIGH, fd, FALSE, pcmk_cfg_dispatch, &cfg_handle, cfg_connection_destroy); return TRUE; bail: corosync_cfg_finalize(cfg_handle); return FALSE; } /* =::=::=::= CPG - Closed Process Group Messaging =::=::=::= */ static gboolean pcmk_cpg_dispatch(int sender, gpointer user_data) { cpg_handle_t *handle = (cpg_handle_t*)user_data; cs_error_t rc = cpg_dispatch(*handle, CS_DISPATCH_ALL); if (rc != CS_OK) { return FALSE; } return TRUE; } static void cpg_connection_destroy(gpointer user_data) { crm_err("Connection destroyed"); cpg_handle = 0; return; } static void pcmk_cpg_deliver ( cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { if(nodeid != local_nodeid) { uint32_t procs = 0; xmlNode *xml = string2xml(msg); const char *uname = crm_element_value(xml, "uname"); crm_element_value_int(xml, "proclist", (int*)&procs); /* crm_debug("Got proclist %.32x from %s", procs, uname); */ if(update_node_processes(nodeid, uname, procs)) { update_process_clients(); } } } static void pcmk_cpg_membership( cpg_handle_t handle, const struct cpg_name *groupName, const struct cpg_address *member_list, size_t member_list_entries, const struct cpg_address *left_list, size_t left_list_entries, const struct cpg_address *joined_list, size_t joined_list_entries) { /* Don't care about CPG membership */ } cpg_callbacks_t cpg_callbacks = { .cpg_deliver_fn = pcmk_cpg_deliver, .cpg_confchg_fn = pcmk_cpg_membership, }; gboolean cluster_disconnect_cpg(void) { if(cpg_handle) { cpg_finalize(cpg_handle); cpg_handle = 0; } return TRUE; } gboolean cluster_connect_cpg(void) { cs_error_t rc; unsigned int nodeid; int fd; int retries = 0; strcpy(cpg_group.value, "pcmk"); cpg_group.length = strlen(cpg_group.value)+1; rc = cpg_initialize(&cpg_handle, &cpg_callbacks); if (rc != CS_OK) { crm_err("corosync cpg init error %d", rc); return FALSE; } rc = cpg_fd_get(cpg_handle, &fd); if (rc != CS_OK) { crm_err("corosync cpg fd_get error %d", rc); goto bail; } rc = cpg_local_get(cpg_handle, &nodeid); if (rc != CS_OK) { crm_err("corosync cpg local_get error %d", rc); goto bail; } crm_debug("Our nodeid: %d", nodeid); do { rc = cpg_join(cpg_handle, &cpg_group); if(rc == CS_ERR_TRY_AGAIN) { retries++; sleep(retries); } } while(rc == CS_ERR_TRY_AGAIN && retries < 30); if (rc != CS_OK) { crm_err("Could not join the CPG group '%s': %d", crm_system_name, rc); goto bail; } crm_debug("Adding fd=%d to mainloop", fd); G_main_add_fd( G_PRIORITY_HIGH, fd, FALSE, pcmk_cpg_dispatch, &cpg_handle, cpg_connection_destroy); return TRUE; bail: cpg_finalize(cpg_handle); return FALSE; } gboolean send_cpg_message(struct iovec *iov) { int rc = CS_OK; int retries = 0; errno = 0; do { rc = cpg_mcast_joined(cpg_handle, CPG_TYPE_AGREED, iov, 1); if(rc == CS_ERR_TRY_AGAIN) { retries++; sleep(retries); } } while(rc == CS_ERR_TRY_AGAIN && retries < 30); if(rc != CS_OK) { crm_perror(LOG_ERR,"Sending message via cpg FAILED: (rc=%d) %s", rc, ais_error2text(rc)); } return (rc == CS_OK); } /* =::=::=::= Configuration =::=::=::= */ static int get_config_opt( confdb_handle_t config, hdb_handle_t object_handle, const char *key, char **value, const char *fallback) { size_t len = 0; char *env_key = NULL; const char *env_value = NULL; char buffer[256]; if(*value) { crm_free(*value); *value = NULL; } if(object_handle > 0) { if(CS_OK == confdb_key_get(config, object_handle, key, strlen(key), &buffer, &len)) { *value = crm_strdup(buffer); } } if (*value) { crm_info("Found '%s' for option: %s", *value, key); return 0; } env_key = crm_concat("HA", key, '_'); env_value = getenv(env_key); crm_free(env_key); if (*value) { crm_info("Found '%s' in ENV for option: %s", *value, key); *value = crm_strdup(env_value); return 0; } if(fallback) { crm_info("Defaulting to '%s' for option: %s", fallback, key); *value = crm_strdup(fallback); } else { crm_info("No default for option: %s", key); } return -1; } static confdb_handle_t config_find_init(confdb_handle_t config) { cs_error_t rc = CS_OK; confdb_handle_t local_handle = OBJECT_PARENT_HANDLE; rc = confdb_object_find_start(config, local_handle); if(rc == CS_OK) { return local_handle; } else { crm_err("Couldn't create search context: %d", rc); } return 0; } static hdb_handle_t config_find_next( confdb_handle_t config, const char *name, confdb_handle_t top_handle) { cs_error_t rc = CS_OK; hdb_handle_t local_handle = 0; if(top_handle == 0) { crm_err("Couldn't search for %s: no valid context", name); return 0; } - crm_info("Searching for %s in "HDB_X_FORMAT, name, top_handle); + crm_debug_2("Searching for %s in "HDB_X_FORMAT, name, top_handle); rc = confdb_object_find(config, top_handle, name, strlen(name), &local_handle); if(rc != CS_OK) { crm_info("No additional configuration supplied for: %s", name); local_handle = 0; } else { crm_info("Processing additional %s options...", name); } return local_handle; } char *get_local_node_name(void) { char *name = NULL; struct utsname res; if(use_cman) { #ifdef SUPPORT_CMAN cman_node_t us; cman_handle_t cman; cman = cman_init(NULL); if(cman != NULL && cman_is_active(cman)) { us.cn_name[0] = 0; cman_get_node(cman, CMAN_NODEID_US, &us); name = crm_strdup(us.cn_name); crm_info("Using CMAN node name: %s", name); } else { crm_err("Couldn't determin node name from CMAN"); } cman_finish(cman); #endif } else if(uname(&res) < 0) { crm_perror(LOG_ERR,"Could not determin the current host"); exit(100); } else { name = crm_strdup(res.nodename); } return name; } gboolean read_config(void) { confdb_handle_t config; int rc; char *value = NULL; gboolean have_log = FALSE; gboolean have_quorum = FALSE; confdb_handle_t top_handle = 0; hdb_handle_t local_handle = 0; static confdb_callbacks_t callbacks = {}; rc = confdb_initialize (&config, &callbacks); if (rc != CS_OK) { printf ("Could not initialize Cluster Configuration Database API instance error %d\n", rc); return FALSE; } crm_info("Reading configure"); /* =::=::= Defaults =::=::= */ + setenv("HA_mcp", "true", 1); setenv("HA_COMPRESSION", "bz2", 1); setenv("HA_cluster_type", "corosync",1); setenv("HA_debug", "0", 1); setenv("HA_logfacility", "daemon", 1); setenv("HA_LOGFACILITY", "daemon", 1); setenv("HA_use_logd", "off", 1); /* =::=::= Should we be here =::=::= */ top_handle = config_find_init(config); local_handle = config_find_next(config, "service", top_handle); - while(local_handle) { + while(local_handle && have_quorum == FALSE) { + crm_free(value); get_config_opt(config, local_handle, "name", &value, NULL); if(safe_str_eq("pacemaker", value)) { + have_quorum = TRUE; setenv("HA_cluster_type", "openais", 1); - crm_err("Pacemaker is already loaded as a plugin"); + crm_free(value); - exit(100); + get_config_opt(config, local_handle, "ver", &value, "0"); + if(safe_str_eq(value, "1")) { + crm_free(value); + get_config_opt(config, local_handle, "use_logd", &value, "no"); + setenv("HA_use_logd", value, 1); + + crm_free(value); + get_config_opt(config, local_handle, "use_mgmtd", &value, "no"); + enable_mgmtd(crm_is_true(value)); + + } else { + crm_err("We can only start Pacemaker from init if using version 1" + " of the Pacemaker plugin for Corosync. Terminating."); + exit(100); + } + } local_handle = config_find_next(config, "service", top_handle); } /* =::=::= Logging =::=::= */ + crm_free(value); top_handle = config_find_init(config); local_handle = config_find_next(config, "logging", top_handle); get_config_opt(config, local_handle, "debug", &value, "on"); if(crm_is_true(value) && crm_log_level < LOG_DEBUG) { crm_log_level = LOG_DEBUG; } if(crm_log_level >= LOG_DEBUG) { char *level = crm_itoa(crm_log_level - LOG_INFO); setenv("HA_debug", level, 1); crm_free(level); } get_config_opt(config, local_handle, "to_logfile", &value, "off"); if(crm_is_true(value)) { get_config_opt(config, local_handle, "logfile", &value, NULL); if(value == NULL) { crm_err("Logging to a file requested but no log file specified"); } else { uid_t pcmk_uid = geteuid(); uid_t pcmk_gid = getegid(); FILE *logfile = fopen(value, "a"); if(logfile) { int logfd = fileno(logfile); setenv("HA_debugfile", value, 1); /* Ensure the file has the correct permissions */ fchown(logfd, pcmk_uid, pcmk_gid); fchmod(logfd, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP); fprintf(logfile, "Set r/w permissions for uid=%d, gid=%d on %s\n", pcmk_uid, pcmk_gid, value); fflush(logfile); fsync(logfd); fclose(logfile); have_log = TRUE; } else { crm_err("Couldn't create logfile: %s", value); } } } get_config_opt(config, local_handle, "to_syslog", &value, "on"); if(have_log && crm_is_true(value) == FALSE) { crm_info("User configured file based logging and explicitly disabled syslog."); value = NULL; } else { if(crm_is_true(value) == FALSE) { crm_err("Please enable some sort of logging, either 'to_file: on' or 'to_syslog: on'."); crm_err("If you use file logging, be sure to also define a value for 'logfile'"); } get_config_opt(config, local_handle, "syslog_facility", &value, "daemon"); } setenv("HA_logfacility", value?value:"none", 1); setenv("HA_LOGFACILITY", value?value:"none", 1); /* =::=::= Quorum =::=::= */ - top_handle = config_find_init(config); - local_handle = config_find_next(config, "quorum", top_handle); - get_config_opt(config, local_handle, "provider", &value, NULL); - if(value) { - have_quorum = TRUE; - if(safe_str_eq("quorum_cman", value)) { + if(have_quorum == FALSE) { + top_handle = config_find_init(config); + local_handle = config_find_next(config, "quorum", top_handle); + get_config_opt(config, local_handle, "provider", &value, NULL); + + if(value) { + have_quorum = TRUE; + if(safe_str_eq("quorum_cman", value)) { #ifdef SUPPORT_CMAN - setenv("HA_cluster_type", "cman", 1); - use_cman = TRUE; + setenv("HA_cluster_type", "cman", 1); + use_cman = TRUE; #else - crm_err("Corosync configured for CMAN but this build of Pacemaker doesn't support it"); - exit(100); + crm_err("Corosync configured for CMAN but this build of Pacemaker doesn't support it"); + exit(100); #endif + } } } - + confdb_finalize (config); crm_free(value); return TRUE; } diff --git a/mcp/pacemaker.c b/mcp/pacemaker.c index 4d2cfc5392..6ff8fce8fa 100644 --- a/mcp/pacemaker.c +++ b/mcp/pacemaker.c @@ -1,730 +1,727 @@ /* * Copyright (C) 2010 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include GMainLoop *mainloop = NULL; GHashTable *client_list = NULL; GHashTable *peers = NULL; char ipc_name[] = "pcmk"; char *local_name = NULL; uint32_t local_nodeid = 0; crm_trigger_t *shutdown_trigger = NULL; const char *pid_file = "/var/run/pacemaker.pid"; enum crm_proc_flag { crm_proc_none = 0x00000001, crm_proc_ais = 0x00000002, crm_proc_lrmd = 0x00000010, crm_proc_cib = 0x00000100, crm_proc_crmd = 0x00000200, crm_proc_attrd = 0x00001000, crm_proc_stonithd = 0x00002000, crm_proc_pe = 0x00010000, crm_proc_te = 0x00020000, crm_proc_mgmtd = 0x00040000, crm_proc_stonith_ng = 0x00100000, }; -/* order here matters - its used to index into the crm_children array */ -enum crm_ais_msg_types { - crm_msg_none = 0, - crm_msg_ais = 1, - crm_msg_lrmd = 2, - crm_msg_cib = 3, - crm_msg_crmd = 4, - crm_msg_attrd = 5, - crm_msg_stonithd = 6, - crm_msg_te = 7, - crm_msg_pe = 8, - crm_msg_stonith_ng = 9, -}; - typedef struct pcmk_child_s { int pid; long flag; - long flags; int start_seq; int respawn_count; gboolean respawn; const char *name; const char *uid; const char *command; } pcmk_child_t; +/* Index into the array below */ +#define pcmk_child_crmd 4 +#define pcmk_child_mgmtd 8 static pcmk_child_t pcmk_children[] = { - { 0, crm_proc_none, crm_flag_none, 0, 0, FALSE, "none", NULL, NULL }, - { 0, crm_proc_ais, crm_flag_none, 0, 0, FALSE, "ais", NULL, NULL }, - { 0, crm_proc_lrmd, crm_flag_none, 3, 0, TRUE, "lrmd", NULL, HB_DAEMON_DIR"/lrmd" }, - { 0, crm_proc_cib, crm_flag_members, 2, 0, TRUE, "cib", CRM_DAEMON_USER, CRM_DAEMON_DIR"/cib" }, - { 0, crm_proc_crmd, crm_flag_members, 6, 0, TRUE, "crmd", CRM_DAEMON_USER, CRM_DAEMON_DIR"/crmd" }, - { 0, crm_proc_attrd, crm_flag_none, 4, 0, TRUE, "attrd", CRM_DAEMON_USER, CRM_DAEMON_DIR"/attrd" }, - { 0, crm_proc_stonithd, crm_flag_none, 0, 0, TRUE, "stonithd", NULL, "/bin/false" }, - { 0, crm_proc_pe, crm_flag_none, 5, 0, TRUE, "pengine", CRM_DAEMON_USER, CRM_DAEMON_DIR"/pengine" }, - { 0, crm_proc_mgmtd, crm_flag_none, 0, 0, TRUE, "mgmtd", NULL, HB_DAEMON_DIR"/mgmtd" }, - { 0, crm_proc_stonith_ng, crm_flag_none, 1, 0, TRUE, "stonith-ng", NULL, CRM_DAEMON_DIR"/stonithd" }, + { 0, crm_proc_none, 0, 0, FALSE, "none", NULL, NULL }, + { 0, crm_proc_ais, 0, 0, FALSE, "ais", NULL, NULL }, + { 0, crm_proc_lrmd, 3, 0, TRUE, "lrmd", NULL, HB_DAEMON_DIR"/lrmd" }, + { 0, crm_proc_cib, 2, 0, TRUE, "cib", CRM_DAEMON_USER, CRM_DAEMON_DIR"/cib" }, + { 0, crm_proc_crmd, 6, 0, TRUE, "crmd", CRM_DAEMON_USER, CRM_DAEMON_DIR"/crmd" }, + { 0, crm_proc_attrd, 4, 0, TRUE, "attrd", CRM_DAEMON_USER, CRM_DAEMON_DIR"/attrd" }, + { 0, crm_proc_stonithd, 0, 0, TRUE, "stonithd", NULL, NULL }, + { 0, crm_proc_pe, 5, 0, TRUE, "pengine", CRM_DAEMON_USER, CRM_DAEMON_DIR"/pengine" }, + { 0, crm_proc_mgmtd, 0, 0, TRUE, "mgmtd", NULL, HB_DAEMON_DIR"/mgmtd" }, + { 0, crm_proc_stonith_ng, 1, 0, TRUE, "stonith-ng", NULL, CRM_DAEMON_DIR"/stonithd" }, }; static gboolean start_child(pcmk_child_t *child); +void enable_mgmtd(gboolean enable) +{ + if(enable) { + pcmk_children[pcmk_child_mgmtd].start_seq = 7; + } else { + pcmk_children[pcmk_child_mgmtd].start_seq = 0; + } +} + static uint32_t get_process_list(void) { int lpc = 0; uint32_t procs = crm_proc_ais; for (lpc = 0; lpc < SIZEOF(pcmk_children); lpc++) { if(pcmk_children[lpc].pid != 0) { procs |= pcmk_children[lpc].flag; } } return procs; } static int pcmk_user_lookup(const char *name, uid_t *uid, gid_t *gid) { int rc = -1; char *buffer = NULL; struct passwd pwd; struct passwd *pwentry = NULL; crm_malloc0(buffer, PW_BUFFER_LEN); getpwnam_r(name, &pwd, buffer, PW_BUFFER_LEN, &pwentry); if(pwentry) { rc = 0; if(uid) { *uid = pwentry->pw_uid; } if(gid) { *gid = pwentry->pw_gid; } - crm_debug("Cluster user %s has uid=%d gid=%d", - name, pwentry->pw_uid, pwentry->pw_gid); + crm_debug_2("Cluster user %s has uid=%d gid=%d", + name, pwentry->pw_uid, pwentry->pw_gid); } else { crm_err("Cluster user %s does not exist", name); } crm_free(buffer); return rc; } static void pcmk_child_exit( ProcTrack* p, int status, int signo, int exitcode, int waslogged) { pcmk_child_t *child = p->privatedata; p->privatedata = NULL; crm_notice("Process %s [%d] exited (signal=%d, exitcode=%d)", child->name, child->pid, signo, exitcode); child->pid = 0; if(exitcode == 100) { crm_notice("Child process %s no longer wishes" " to be respawned", child->name); child->respawn = FALSE; } child->respawn_count += 1; if(child->respawn_count > MAX_RESPAWN) { crm_err("Child respawn count exceeded by %s", child->name); child->respawn = FALSE; } if(shutdown_trigger) { mainloop_set_trigger(shutdown_trigger); update_node_processes(local_nodeid, NULL, get_process_list()); } else if(child->respawn) { crm_notice("Respawning failed child process: %s", child->name); start_child(child); } } static void pcmkManagedChildRegistered(ProcTrack* p) { pcmk_child_t *child = p->privatedata; child->pid = p->pid; } static const char * pcmkManagedChildName(ProcTrack* p) { pcmk_child_t *child = p->privatedata; return child->name; } static ProcTrack_ops pcmk_managed_child_ops = { pcmk_child_exit, pcmkManagedChildRegistered, pcmkManagedChildName }; static gboolean stop_child(pcmk_child_t *child, int signal) { if(signal == 0) { signal = SIGTERM; } if(child->command == NULL) { crm_debug("Nothing to do for child \"%s\"", child->name); return TRUE; } crm_info("Stopping CRM child \"%s\"", child->name); if (child->pid <= 0) { crm_debug_2("Client %s not running", child->name); return TRUE; } errno = 0; if(kill(child->pid, signal) == 0) { crm_notice("Sent -%d to %s: [%d]", signal, child->name, child->pid); } else { crm_perror(LOG_ERR, "Sent -%d to %s: [%d]", signal, child->name, child->pid); } return TRUE; } static char *opts_default[] = { NULL, NULL }; static char *opts_vgrind[] = { NULL, NULL, NULL }; static gboolean start_child(pcmk_child_t *child) { int lpc = 0; uid_t uid = 0; struct rlimit oflimits; gboolean use_valgrind = FALSE; const char *devnull = "/dev/null"; const char *env_valgrind = getenv("HA_VALGRIND_ENABLED"); if(child->command == NULL) { crm_info("Nothing to do for child \"%s\"", child->name); return TRUE; } if(env_valgrind == NULL) { use_valgrind = FALSE; } else if(crm_is_true(env_valgrind)) { use_valgrind = TRUE; } else if(strstr(env_valgrind, child->name)) { use_valgrind = TRUE; } if(use_valgrind && strlen(VALGRIND_BIN) == 0) { crm_warn("Cannot enable valgrind for %s:" " The location of the valgrind binary is unknown", child->name); use_valgrind = FALSE; } child->pid = fork(); CRM_ASSERT(child->pid != -1); if(child->pid > 0) { /* parent */ NewTrackedProc(child->pid, 0, PT_LOGNORMAL, child, &pcmk_managed_child_ops); crm_info("Forked child %d for process %s%s", child->pid, child->name, use_valgrind?" (valgrind enabled: "VALGRIND_BIN")":""); update_node_processes(local_nodeid, NULL, get_process_list()); return TRUE; } else { /* Start a new session */ (void)setsid(); /* Setup the two alternate arg arrarys */ opts_vgrind[0] = crm_strdup(VALGRIND_BIN); opts_vgrind[1] = crm_strdup(child->command); opts_default[0] = opts_vgrind[1]; #if 0 /* Dont set the group for now - it prevents connection to the cluster */ if(gid && setgid(gid) < 0) { crm_perror("Could not set group to %d", gid); } #endif if(child->uid) { if(pcmk_user_lookup(child->uid, &uid, NULL) < 0) { crm_err("Invalid uid (%s) specified for %s", child->uid, child->name); return TRUE; } } if(uid && setuid(uid) < 0) { crm_perror(LOG_ERR, "Could not set user to %d (%s)", uid, child->uid); } /* Close all open file descriptors */ getrlimit(RLIMIT_NOFILE, &oflimits); for (lpc = 0; lpc < oflimits.rlim_cur; lpc++) { close(lpc); } (void)open(devnull, O_RDONLY); /* Stdin: fd 0 */ (void)open(devnull, O_WRONLY); /* Stdout: fd 1 */ (void)open(devnull, O_WRONLY); /* Stderr: fd 2 */ if(use_valgrind) { (void)execvp(VALGRIND_BIN, opts_vgrind); } else { (void)execvp(child->command, opts_default); } crm_perror(LOG_ERR, "FATAL: Cannot exec %s", child->command); exit(100); } return TRUE; /* never reached */ } static gboolean escalate_shutdown(gpointer data) { pcmk_child_t *child = data; if(child->pid) { crm_err("Child %s not terminating in a timely manner, forcing", child->name); stop_child(child, SIGKILL); } return FALSE; } static gboolean pcmk_shutdown_worker(gpointer user_data) { static int phase = 0; static time_t next_log = 0; static int max = SIZEOF(pcmk_children); int lpc = 0; if(phase == 0) { crm_notice("Shuting down Pacemaker"); phase = max; } for (; phase > 0; phase--) { /* dont stop anything with start_seq < 1 */ for (lpc = max - 1; lpc >= 0; lpc--) { pcmk_child_t *child = &(pcmk_children[lpc]); if(phase != child->start_seq) { continue; } if(child->pid) { time_t now = time(NULL); if(child->respawn) { next_log = now + 30; child->respawn = FALSE; stop_child(child, SIGTERM); - if(phase < pcmk_children[crm_msg_crmd].start_seq) { + if(phase < pcmk_children[pcmk_child_crmd].start_seq) { g_timeout_add(180000/* 3m */, escalate_shutdown, child); } } else if(now >= next_log) { next_log = now + 30; crm_notice("Still waiting for %s (pid=%d, seq=%d) to terminate...", child->name, child->pid, child->start_seq); } return TRUE; } /* cleanup */ crm_notice("%s confirmed stopped", child->name); child->pid = 0; } } /* send_cluster_id(); */ crm_notice("Shutdown complete"); g_main_loop_quit(mainloop); return TRUE; } static void pcmk_shutdown(int nsig) { shutdown_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, pcmk_shutdown_worker, NULL); mainloop_set_trigger(shutdown_trigger); } static void build_path(const char *path_c, mode_t mode) { int offset = 1, len = 0; char *path = crm_strdup(path_c); CRM_CHECK(path != NULL, return); for(len = strlen(path); offset < len; offset++) { if(path[offset] == '/') { path[offset] = 0; if(mkdir(path, mode) < 0 && errno != EEXIST) { crm_perror(LOG_ERR, "Could not create directory '%s'", path); break; } path[offset] = '/'; } } if(mkdir(path, mode) < 0 && errno != EEXIST) { crm_perror(LOG_ERR, "Could not create directory '%s'", path); } crm_free(path); } static void pcmk_server_destroy(gpointer user_data) { crm_info("Server destroyed"); return; } static void pcmk_client_destroy(gpointer user_data) { crm_debug("Client %p disconnected", user_data); g_hash_table_remove(client_list, user_data); return; } static gboolean pcmk_client_msg(IPC_Channel *client, gpointer user_data) { xmlNode *msg = NULL; gboolean stay_connected = TRUE; while(IPC_ISRCONN(client)) { if(client->ops->is_message_pending(client) == 0) { break; } msg = xmlfromIPC(client, MAX_IPC_DELAY); free_xml(msg); if(client->ch_status != IPC_CONNECT) { break; } } if (client->ch_status != IPC_CONNECT) { stay_connected = FALSE; } return stay_connected; } static gboolean pcmk_client_connect(IPC_Channel *ch, gpointer user_data) { if (ch == NULL) { crm_err("Channel was invalid"); } else if (ch->ch_status == IPC_DISCONNECT) { crm_err("Channel was disconnected"); } else { ch->ops->set_recv_qlen(ch, 1024); ch->ops->set_send_qlen(ch, 1024); g_hash_table_insert(client_list, ch, user_data); crm_debug("Channel %p connected: %d children", ch, g_hash_table_size(client_list)); update_process_clients(); G_main_add_IPC_Channel( G_PRIORITY_LOW, ch, FALSE, pcmk_client_msg, ch, pcmk_client_destroy); } return TRUE; } static gboolean ghash_send_proc_details(gpointer key, gpointer value, gpointer data) { if(send_ipc_message(key, data) == FALSE) { /* remove it */ return TRUE; } return FALSE; } static void peer_loop_fn(gpointer key, gpointer value, gpointer user_data) { pcmk_peer_t *node = value; xmlNode *update = user_data; xmlNode *xml = create_xml_node(update, "node"); crm_xml_add_int(xml, "id", node->id); crm_xml_add(xml, "uname", node->uname); crm_xml_add_int(xml, "processes", node->processes); } void update_process_clients(void) { xmlNode *update = create_xml_node(NULL, "nodes"); crm_debug_2("Sending process list to %d children", g_hash_table_size(client_list)); g_hash_table_foreach(peers, peer_loop_fn, update); g_hash_table_foreach_remove(client_list, ghash_send_proc_details, update); free_xml(update); } void update_process_peers(pcmk_peer_t *node) { char buffer[1024]; struct iovec iov; int rc = 0; memset(buffer, SIZEOF(buffer), 0); if(node->uname) { rc = snprintf(buffer, SIZEOF(buffer) - 1, "", node->uname, get_process_list()); } else { rc = snprintf(buffer, SIZEOF(buffer) - 1, "", get_process_list()); } iov.iov_base = buffer; iov.iov_len = rc + 1; send_cpg_message(&iov); } gboolean update_node_processes(uint32_t id, const char *uname, uint32_t procs) { gboolean changed = FALSE; pcmk_peer_t *node = g_hash_table_lookup(peers, GUINT_TO_POINTER(id)); if(node == NULL) { changed = TRUE; crm_malloc0(node, sizeof(pcmk_peer_t)); node->id = id; g_hash_table_insert(peers, GUINT_TO_POINTER(id), node); node = g_hash_table_lookup(peers, GUINT_TO_POINTER(id)); CRM_ASSERT(node != NULL); } if(uname != NULL) { if(node->uname == NULL || safe_str_eq(node->uname, uname) == FALSE) { crm_info("%p Node %u now known as %s (was: %s)", node, id, uname, node->uname); crm_free(node->uname); node->uname = crm_strdup(uname); changed = TRUE; } } if(procs != 0 && procs != node->processes) { crm_info("Node %s now has process list: %.32x (was %.32x)", node->uname, procs, node->processes); node->processes = procs; changed = TRUE; } if(changed && id == local_nodeid) { update_process_clients(); update_process_peers(node); } return changed; } static struct crm_option long_options[] = { /* Top-level Options */ {"help", 0, 0, '?', "\tThis text"}, {"version", 0, 0, '$', "\tVersion information" }, {"verbose", 0, 0, 'V', "\tIncrease debug output"}, {"-spacer-", 1, 0, '-', "\nAdditional Options:"}, {"foreground", 0, 0, 'f', "\tRun in the foreground instead of as a daemon"}, {"pid-file", 1, 0, 'p', "\t(Advanced) Daemon pid file location"}, {NULL, 0, 0, 0} }; int main(int argc, char **argv) { int rc; int flag; int argerr = 0; int option_index = 0; gboolean daemonize = TRUE; int start_seq = 1, lpc = 0; static int max = SIZEOF(pcmk_children); uid_t pcmk_uid = 0; gid_t pcmk_gid = 0; struct rlimit cores; crm_log_init(NULL, LOG_INFO, FALSE, FALSE, argc, argv, TRUE); crm_set_options("V?$fp:", "mode [options]", long_options, "Start/Stop Pacemaker\n"); #ifndef ON_DARWIN /* prevent zombies */ signal(SIGCLD, SIG_IGN); #endif while (1) { flag = crm_get_option(argc, argv, &option_index); if (flag == -1) break; switch(flag) { case 'V': cl_log_enable_stderr(TRUE); alter_debug(DEBUG_INC); break; case 'f': daemonize = FALSE; break; case 'p': pid_file = optarg; break; case '$': case '?': crm_help(flag, LSB_EXIT_OK); break; default: printf("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag); ++argerr; break; } } if (optind < argc) { printf("non-option ARGV-elements: "); while (optind < argc) printf("%s ", argv[optind++]); printf("\n"); } if (argerr) { crm_help('?', LSB_EXIT_GENERIC); } if(daemonize) { cl_log_enable_stderr(FALSE); crm_make_daemon(crm_system_name, TRUE, pid_file); } crm_info("Starting %s", crm_system_name); mainloop = g_main_new(FALSE); rc = getrlimit(RLIMIT_CORE, &cores); if(rc < 0) { crm_perror(LOG_ERR, "Cannot determine current maximum core size."); } if(cores.rlim_max <= 0) { cores.rlim_max = RLIM_INFINITY; rc = setrlimit(RLIMIT_CORE, &cores); if(rc < 0) { crm_perror(LOG_ERR, "Core file generation will remain disabled." " Core files are an important diagnositic tool," " please consider enabling them by default."); } } else { crm_info("Maximum core file size is: %lu", cores.rlim_max); #if 0 /* system() is not thread-safe, can't call from here * Actually, its a pretty hacky way to try and achieve this anyway */ if(system("echo 1 > /proc/sys/kernel/core_uses_pid") != 0) { crm_perror(LOG_ERR, "Could not enable /proc/sys/kernel/core_uses_pid"); } #endif } if(pcmk_user_lookup(CRM_DAEMON_USER, &pcmk_uid, &pcmk_gid) < 0) { crm_err("Cluster user %s does not exist, aborting Pacemaker startup", CRM_DAEMON_USER); return TRUE; } mkdir(CRM_STATE_DIR, 0750); chown(CRM_STATE_DIR, pcmk_uid, pcmk_gid); /* Used by stonithd */ build_path(HA_STATE_DIR"/heartbeat", 0755); /* Used by RAs - Leave owned by root */ build_path(CRM_RSCTMP_DIR, 0755); if(read_config() == FALSE) { return 1; } client_list = g_hash_table_new(g_direct_hash, g_direct_equal); peers = g_hash_table_new(g_direct_hash, g_direct_equal); if(init_server_ipc_comms(ipc_name, pcmk_client_connect, pcmk_server_destroy)) { crm_err("Couldn't start IPC server"); return 1; } if(cluster_connect_cfg(&local_nodeid) == FALSE) { return 1; } if(cluster_connect_cpg() == FALSE) { return 1; } local_name = get_local_node_name(); update_node_processes(local_nodeid, local_name, get_process_list()); mainloop_add_signal(SIGTERM, pcmk_shutdown); mainloop_add_signal(SIGINT, pcmk_shutdown); set_sigchld_proctrack(G_PRIORITY_HIGH, DEFAULT_MAXDISPATCHTIME); for (start_seq = 1; start_seq < max; start_seq++) { /* dont start anything with start_seq < 1 */ for (lpc = 0; lpc < max; lpc++) { if(start_seq == pcmk_children[lpc].start_seq) { start_child(&(pcmk_children[lpc])); } } } crm_info("Starting mainloop"); g_main_run(mainloop); g_main_destroy(mainloop); cluster_disconnect_cpg(); cluster_disconnect_cfg(); crm_info("Exiting %s", crm_system_name); return 0; } diff --git a/mcp/pacemaker.h b/mcp/pacemaker.h index a4aab397e7..e5a389715a 100644 --- a/mcp/pacemaker.h +++ b/mcp/pacemaker.h @@ -1,57 +1,58 @@ /* * Copyright (C) 2010 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #define SIZEOF(a) (sizeof(a) / sizeof(a[0])) #define crm_flag_none 0x00000000 #define crm_flag_members 0x00000001 #define MAX_RESPAWN 100 #define PW_BUFFER_LEN 500 extern uint32_t local_nodeid; typedef struct pcmk_peer_s { uint32_t id; uint32_t processes; char *uname; } pcmk_peer_t; extern gboolean read_config(void); extern gboolean cluster_connect_cfg(uint32_t *nodeid); extern gboolean cluster_disconnect_cfg(void); extern gboolean cluster_connect_cpg(void); extern gboolean cluster_disconnect_cpg(void); extern gboolean send_cpg_message(struct iovec *iov); extern void update_process_clients(void); extern void update_process_peers(pcmk_peer_t *node); extern gboolean update_node_processes(uint32_t node, const char *uname, uint32_t procs); extern char *get_local_node_name(void); +extern void enable_mgmtd(gboolean enable);