diff --git a/crm/cib/io.c b/crm/cib/io.c index 09d15b2869..99ff4847bf 100644 --- a/crm/cib/io.c +++ b/crm/cib/io.c @@ -1,751 +1,762 @@ /* $Id: io.c,v 1.81 2006/07/18 06:15:54 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include const char * local_resource_path[] = { XML_CIB_TAG_STATUS, }; const char * resource_path[] = { XML_CIB_TAG_RESOURCES, }; const char * node_path[] = { XML_CIB_TAG_NODES, }; const char * constraint_path[] = { XML_CIB_TAG_CONSTRAINTS, }; gboolean initialized = FALSE; crm_data_t *the_cib = NULL; crm_data_t *node_search = NULL; crm_data_t *resource_search = NULL; crm_data_t *constraint_search = NULL; crm_data_t *status_search = NULL; extern gboolean cib_writes_enabled; extern char *ccm_transition_id; extern gboolean cib_have_quorum; extern GHashTable *peer_hash; extern GHashTable *ccm_membership; extern GTRIGSource *cib_writer; extern enum cib_errors cib_status; int set_connected_peers(crm_data_t *xml_obj); void GHFunc_count_peers(gpointer key, gpointer value, gpointer user_data); int write_cib_contents(gpointer p); - +extern void cib_cleanup(void); static gboolean validate_cib_digest(crm_data_t *local_cib) { int s_res = -1; struct stat buf; char *digest = NULL; char *expected = NULL; gboolean passed = FALSE; FILE *expected_strm = NULL; int start = 0, length = 0, read_len = 0; s_res = stat(CIB_FILENAME ".sig", &buf); if (s_res != 0) { crm_warn("No on-disk digest present"); return TRUE; } if(local_cib != NULL) { digest = calculate_xml_digest(local_cib, FALSE); } expected_strm = fopen(CIB_FILENAME ".sig", "r"); start = ftell(expected_strm); fseek(expected_strm, 0L, SEEK_END); length = ftell(expected_strm); fseek(expected_strm, 0L, start); CRM_ASSERT(start == ftell(expected_strm)); crm_debug_3("Reading %d bytes from file", length); crm_malloc0(expected, (length+1)); read_len = fread(expected, 1, length, expected_strm); CRM_ASSERT(read_len == length); fclose(expected_strm); if(expected == NULL) { crm_err("On-disk digest is empty"); } else if(safe_str_eq(expected, digest)) { crm_debug("Digest comparision passed: %s", digest); passed = TRUE; } else { crm_err("Digest comparision failed: %s vs. %s", expected, digest); } crm_free(digest); crm_free(expected); return passed; } static int write_cib_digest(crm_data_t *local_cib, char *digest) { int rc = 0; FILE *digest_strm = fopen(CIB_FILENAME ".sig", "w"); char *local_digest = NULL; CRM_ASSERT(digest_strm != NULL); if(digest == NULL) { local_digest = calculate_xml_digest(local_cib, FALSE); CRM_ASSERT(digest != NULL); digest = local_digest; } rc = fprintf(digest_strm, "%s", digest); if(rc < 0) { cl_perror("Cannot write output to %s.sig", CIB_FILENAME); } fflush(digest_strm); fclose(digest_strm); crm_free(local_digest); return rc; } static gboolean validate_on_disk_cib(const char *filename, crm_data_t **on_disk_cib) { int s_res = -1; struct stat buf; FILE *cib_file = NULL; gboolean passed = TRUE; crm_data_t *root = NULL; if(filename != NULL) { s_res = stat(filename, &buf); } if (s_res == 0) { cib_file = fopen(filename, "r"); crm_debug_2("Reading cluster configuration from: %s", filename); root = file2xml(cib_file, FALSE); fclose(cib_file); if(validate_cib_digest(root) == FALSE) { passed = FALSE; } } if(on_disk_cib != NULL) { *on_disk_cib = root; } else { free_xml(root); } return passed; } /* * It is the callers responsibility to free the output of this function */ crm_data_t* readCibXmlFile(const char *dir, const char *file, gboolean discard_status) { struct stat buf; FILE *cib_file = NULL; gboolean dtd_ok = TRUE; char *filename = NULL; const char *name = NULL; const char *value = NULL; const char *ignore_dtd = NULL; crm_data_t *root = NULL; crm_data_t *status = NULL; if(!crm_is_writable(dir, file, HA_CCMUSER, HA_APIGROUP, FALSE)) { cib_status = cib_bad_permissions; return NULL; } filename = crm_concat(dir, file, '/'); if(stat(filename, &buf) != 0) { crm_warn("Cluster configuration not found: %s." " Creating an empty one.", filename); } else { crm_info("Reading cluster configuration from: %s", filename); cib_file = fopen(filename, "r"); root = file2xml(cib_file, FALSE); fclose(cib_file); if(root == NULL) { crm_err("%s exists but does NOT contain valid XML. ", filename); crm_err("Continuing with an empty configuration." " %s will NOT be overwritten.", filename); cib_writes_enabled = FALSE; } else if(validate_cib_digest(root) == FALSE) { crm_err("%s has been manually changed! If this was" " intended, remove the digest in %s.sig", filename, filename); cib_status = cib_bad_digest; } } if(root == NULL) { root = createEmptyCib(); } else { crm_xml_add(root, "generated", XML_BOOLEAN_FALSE); } status = find_xml_node(root, XML_CIB_TAG_STATUS, FALSE); if(discard_status && status != NULL) { /* strip out the status section if there is one */ free_xml_from_parent(root, status); status = NULL; } create_xml_node(root, XML_CIB_TAG_STATUS); /* Do this before DTD validation happens */ /* fill in some defaults */ name = XML_ATTR_GENERATION_ADMIN; value = crm_element_value(root, name); if(value == NULL) { crm_xml_add_int(root, name, 0); } name = XML_ATTR_GENERATION; value = crm_element_value(root, name); if(value == NULL) { crm_xml_add_int(root, name, 0); } name = XML_ATTR_NUMUPDATES; value = crm_element_value(root, name); if(value == NULL) { crm_xml_add_int(root, name, 0); } /* unset these and require the DC/CCM to update as needed */ update_counters(__FILE__, __PRETTY_FUNCTION__, root); xml_remove_prop(root, XML_ATTR_DC_UUID); if(discard_status) { crm_log_xml_info(root, "[on-disk]"); } ignore_dtd = crm_element_value(root, "ignore_dtd"); dtd_ok = validate_with_dtd(root, TRUE, HA_LIBDIR"/heartbeat/crm.dtd"); if(dtd_ok == FALSE) { if(ignore_dtd == NULL && crm_is_true(ignore_dtd) == FALSE) { cib_status = cib_dtd_validation; } } else if(ignore_dtd == NULL) { crm_notice("Enabling DTD validation on" " the existing (sane) configuration"); crm_xml_add(root, "ignore_dtd", XML_BOOLEAN_FALSE); } if(do_id_check(root, NULL, TRUE, FALSE)) { crm_err("%s does not contain a vaild configuration:" " ID check failed", filename); cib_status = cib_id_check; } if (verifyCibXml(root) == FALSE) { crm_err("%s does not contain a vaild configuration:" " structure test failed", filename); cib_status = cib_bad_config; } crm_free(filename); return root; } /* * The caller should never free the return value */ crm_data_t* get_the_CIB(void) { return the_cib; } gboolean uninitializeCib(void) { crm_data_t *tmp_cib = the_cib; if(tmp_cib == NULL) { crm_debug("The CIB has already been deallocated."); return FALSE; } initialized = FALSE; the_cib = NULL; node_search = NULL; resource_search = NULL; constraint_search = NULL; status_search = NULL; crm_debug("Deallocating the CIB."); free_xml(tmp_cib); crm_info("The CIB has been deallocated."); return TRUE; } /* * This method will not free the old CIB pointer or the new one. * We rely on the caller to have saved a pointer to the old CIB * and to free the old/bad one depending on what is appropriate. */ gboolean initializeCib(crm_data_t *new_cib) { gboolean is_valid = TRUE; crm_data_t *tmp_node = NULL; if(new_cib == NULL) { return FALSE; } xml_validate(new_cib); tmp_node = get_object_root(XML_CIB_TAG_NODES, new_cib); if (tmp_node == NULL) { is_valid = FALSE; } tmp_node = get_object_root(XML_CIB_TAG_RESOURCES, new_cib); if (tmp_node == NULL) { is_valid = FALSE; } tmp_node = get_object_root(XML_CIB_TAG_CONSTRAINTS, new_cib); if (tmp_node == NULL) { is_valid = FALSE; } tmp_node = get_object_root(XML_CIB_TAG_CRMCONFIG, new_cib); if (tmp_node == NULL) { is_valid = FALSE; } tmp_node = get_object_root(XML_CIB_TAG_STATUS, new_cib); if (is_valid && tmp_node == NULL) { create_xml_node(new_cib, XML_CIB_TAG_STATUS); } if(is_valid == FALSE) { crm_warn("CIB Verification failed"); return FALSE; } update_counters(__FILE__, __PRETTY_FUNCTION__, new_cib); the_cib = new_cib; initialized = TRUE; return TRUE; } static int archive_file(const char *oldname, const char *newname, const char *ext) { /* move 'oldname' to 'newname' by creating a hard link to it * and then removing the original hard link */ int rc = 0; int res = 0; struct stat tmp; int s_res = 0; char *backup_file = NULL; static const char *back_ext = "bak"; /* calculate the backup name if required */ if(newname != NULL) { backup_file = crm_strdup(newname); } else { int max_name_len = 1024; crm_malloc0(backup_file, max_name_len); if (ext == NULL) { ext = back_ext; } snprintf(backup_file, max_name_len - 1, "%s.%s", oldname, ext); } if(backup_file == NULL || strlen(backup_file) == 0) { crm_err("%s backup filename was %s", newname == NULL?"calculated":"supplied", backup_file == NULL?"null":"empty"); rc = -4; } s_res = stat(backup_file, &tmp); /* unlink the old backup */ if (rc == 0 && s_res >= 0) { res = unlink(backup_file); if (res < 0) { cl_perror("Could not unlink %s", backup_file); rc = -1; } } s_res = stat(oldname, &tmp); /* copy */ if (rc == 0 && s_res >= 0) { res = link(oldname, backup_file); if (res < 0) { cl_perror("Could not create backup %s from %s", backup_file, oldname); rc = -2; } } /* unlink the original */ if (rc == 0 && s_res >= 0) { res = unlink(oldname); if (res < 0) { cl_perror("Could not unlink %s", oldname); rc = -3; } } crm_free(backup_file); return rc; } /* * This method will free the old CIB pointer on success and the new one * on failure. */ int activateCibXml(crm_data_t *new_cib, const char *ignored) { int error_code = cib_ok; crm_data_t *saved_cib = the_cib; const char *ignore_dtd = NULL; crm_log_xml_debug_4(new_cib, "Attempting to activate CIB"); CRM_ASSERT(new_cib != saved_cib); if(saved_cib != NULL) { crm_validate_data(saved_cib); } ignore_dtd = crm_element_value(new_cib, "ignore_dtd"); if( #if CRM_DEPRECATED_SINCE_2_0_4 ignore_dtd != NULL && #endif crm_is_true(ignore_dtd) == FALSE && validate_with_dtd( new_cib, TRUE, HA_LIBDIR"/heartbeat/crm.dtd") == FALSE) { error_code = cib_dtd_validation; crm_err("Ignoring invalid CIB"); } if(error_code == cib_ok && initializeCib(new_cib) == FALSE) { error_code = cib_ACTIVATION; crm_err("Ignoring invalid or NULL CIB"); } if(error_code != cib_ok) { if(saved_cib != NULL) { crm_warn("Reverting to last known CIB"); if (initializeCib(saved_cib) == FALSE) { /* oh we are so dead */ crm_crit("Couldn't re-initialize the old CIB!"); cl_flush_logs(); exit(1); } } else { crm_crit("Could not write out new CIB and no saved" " version to revert to"); } } else if(per_action_cib && cib_writes_enabled && cib_status == cib_ok) { crm_err("Per-action CIB"); write_cib_contents(the_cib); } else if(cib_writes_enabled && cib_status == cib_ok) { crm_debug_2("Triggering CIB write"); G_main_set_trigger(cib_writer); } #if CIB_MEM_STATS /* this chews through a bunch of CPU */ if(the_cib == new_cib) { long new_bytes, new_allocs, new_frees; long old_bytes, old_allocs, old_frees; crm_xml_nbytes(new_cib, &new_bytes, &new_allocs, &new_frees); crm_xml_nbytes(saved_cib, &old_bytes, &old_allocs, &old_frees); if(new_bytes != old_bytes) { crm_info("CIB size is %ld bytes (was %ld)", new_bytes, old_bytes); crm_adjust_mem_stats(NULL, new_bytes - old_bytes, new_allocs - old_allocs, new_frees - old_frees); if(crm_running_stats != NULL) { crm_adjust_mem_stats( crm_running_stats, new_bytes - old_bytes, new_allocs - old_allocs, new_frees - old_frees); } } } #endif if(the_cib != saved_cib && the_cib != new_cib) { CRM_DEV_ASSERT(error_code != cib_ok); CRM_DEV_ASSERT(the_cib == NULL); } if(the_cib != new_cib) { free_xml(new_cib); CRM_DEV_ASSERT(error_code != cib_ok); } if(the_cib != saved_cib) { free_xml(saved_cib); } return error_code; } int write_cib_contents(gpointer p) { int rc = 0; char *digest = NULL; crm_data_t *cib_status_root = NULL; const char *digest_filename = CIB_FILENAME ".sig"; /* we can scribble on "the_cib" here and not affect the parent */ const char *epoch = crm_element_value(the_cib, XML_ATTR_GENERATION); const char *updates = crm_element_value(the_cib, XML_ATTR_NUMUPDATES); const char *admin_epoch = crm_element_value( the_cib, XML_ATTR_GENERATION_ADMIN); /* check the admin didnt modify it underneath us */ if(validate_on_disk_cib(CIB_FILENAME, NULL) == FALSE) { crm_err("%s was manually modified while Heartbeat was active!", CIB_FILENAME); - exit(LSB_EXIT_GENERIC); + rc = LSB_EXIT_GENERIC; + goto cleanup; } rc = archive_file(CIB_FILENAME, NULL, "last"); if(rc != 0) { crm_err("Could not make backup of the existing CIB: %d", rc); - exit(LSB_EXIT_GENERIC); + rc = LSB_EXIT_GENERIC; + goto cleanup; } rc = archive_file(digest_filename, NULL, "last"); if(rc != 0) { crm_warn("Could not make backup of the existing CIB digest: %d", rc); } /* Given that we discard the status section on startup * there is no point writing it out in the first place * since users just get confused by it * * Although, it does help me once in a while * * So delete the status section before we write it out */ if(p == NULL) { cib_status_root = find_xml_node( the_cib, XML_CIB_TAG_STATUS, TRUE); CRM_DEV_ASSERT(cib_status_root != NULL); if(cib_status_root != NULL) { free_xml_from_parent(the_cib, cib_status_root); } } rc = write_xml_file(the_cib, CIB_FILENAME, FALSE); if(rc <= 0) { crm_err("Changes couldn't be written to disk"); - exit(LSB_EXIT_GENERIC); + rc = LSB_EXIT_GENERIC; + goto cleanup; } digest = calculate_xml_digest(the_cib, FALSE); crm_info("Wrote version %s.%s.%s of the CIB to disk (digest: %s)", admin_epoch?admin_epoch:"0", epoch?epoch:"0", updates?updates:"0", digest); rc = write_cib_digest(the_cib, digest); crm_free(digest); if(rc <= 0) { crm_err("Digest couldn't be written to disk"); - exit(LSB_EXIT_GENERIC); + rc = LSB_EXIT_GENERIC; + goto cleanup; } #if 0 if(validate_on_disk_cib(CIB_FILENAME, NULL) == FALSE) { crm_err("wrote incorrect digest"); - exit(LSB_EXIT_GENERIC); + rc = LSB_EXIT_GENERIC; + goto cleanup; } #endif + + cleanup: if(p == NULL) { - exit(LSB_EXIT_OK); + /* fork-and-write mode */ + uninitializeCib(); + cib_cleanup(); + exit(rc); } - + + /* stand-alone mode */ return HA_OK; } gboolean set_transition(crm_data_t *xml_obj) { const char *current = NULL; if(xml_obj == NULL) { return FALSE; } current = crm_element_value(xml_obj, XML_ATTR_CCM_TRANSITION); if(safe_str_neq(current, ccm_transition_id)) { crm_debug("CCM transition: old=%s, new=%s", current, ccm_transition_id); crm_xml_add(xml_obj, XML_ATTR_CCM_TRANSITION,ccm_transition_id); return TRUE; } return FALSE; } gboolean set_connected_peers(crm_data_t *xml_obj) { int active = 0; int current = 0; char *peers_s = NULL; const char *current_s = NULL; if(xml_obj == NULL) { return FALSE; } current_s = crm_element_value(xml_obj, XML_ATTR_NUMPEERS); g_hash_table_foreach(peer_hash, GHFunc_count_peers, &active); current = crm_parse_int(current_s, "0"); if(current != active) { peers_s = crm_itoa(active); crm_xml_add(xml_obj, XML_ATTR_NUMPEERS, peers_s); crm_debug("We now have %s active peers", peers_s); crm_free(peers_s); return TRUE; } return FALSE; } gboolean update_quorum(crm_data_t *xml_obj) { const char *quorum_value = XML_BOOLEAN_FALSE; const char *current = NULL; if(xml_obj == NULL) { return FALSE; } current = crm_element_value(xml_obj, XML_ATTR_HAVE_QUORUM); if(cib_have_quorum) { quorum_value = XML_BOOLEAN_TRUE; } if(safe_str_neq(current, quorum_value)) { crm_debug("CCM quorum: old=%s, new=%s", current, quorum_value); crm_xml_add(xml_obj, XML_ATTR_HAVE_QUORUM, quorum_value); return TRUE; } return FALSE; } gboolean update_counters(const char *file, const char *fn, crm_data_t *xml_obj) { gboolean did_update = FALSE; did_update = did_update || update_quorum(xml_obj); did_update = did_update || set_transition(xml_obj); did_update = did_update || set_connected_peers(xml_obj); if(did_update) { do_crm_log(LOG_DEBUG, "Counters updated by %s", fn); } return did_update; } void GHFunc_count_peers(gpointer key, gpointer value, gpointer user_data) { int *active = user_data; if(safe_str_eq(value, ONLINESTATUS)) { (*active)++; } else if(safe_str_eq(value, JOINSTATUS)) { (*active)++; } } diff --git a/crm/cib/main.c b/crm/cib/main.c index 22d1a5e483..9d07fb83b2 100644 --- a/crm/cib/main.c +++ b/crm/cib/main.c @@ -1,559 +1,567 @@ /* $Id: main.c,v 1.56 2006/07/18 06:14:18 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* #include */ #include /* #include */ #include #include #include #include #include #include #include #include #include #include gboolean cib_shutdown_flag = FALSE; gboolean stand_alone = FALSE; gboolean per_action_cib = FALSE; enum cib_errors cib_status = cib_ok; extern char *ccm_transition_id; extern void oc_ev_special(const oc_ev_t *, oc_ev_class_t , int ); GMainLoop* mainloop = NULL; const char* crm_system_name = CRM_SYSTEM_CIB; char *cib_our_uname = NULL; oc_ev_t *cib_ev_token; gboolean cib_writes_enabled = TRUE; void usage(const char* cmd, int exit_status); -int init_start(void); +int cib_init(void); gboolean cib_register_ha(ll_cluster_t *hb_cluster, const char *client_name); gboolean cib_shutdown(int nsig, gpointer unused); void cib_ha_connection_destroy(gpointer user_data); gboolean startCib(const char *filename); extern gboolean cib_msg_timeout(gpointer data); extern int write_cib_contents(gpointer p); GHashTable *client_list = NULL; GHashTable *ccm_membership = NULL; GHashTable *peer_hash = NULL; ll_cluster_t *hb_conn = NULL; GTRIGSource *cib_writer = NULL; +char *channel1 = NULL; +char *channel2 = NULL; +char *channel3 = NULL; +char *channel4 = NULL; +char *channel5 = NULL; + #define OPTARGS "hVsf" #if HAVE_LIBXML2 # include #endif +void cib_cleanup(void); static void cib_diskwrite_complete(gpointer userdata, int status, int signo, int exitcode) { if(exitcode != LSB_EXIT_OK || signo != 0 || status != 0) { crm_err("Disk write failed: status=%d, signo=%d, exitcode=%d", status, signo, exitcode); if(cib_writes_enabled) { crm_err("Disabling disk writes after write failure"); cib_writes_enabled = FALSE; } } else { crm_debug_2("Disk write passed"); } } int main(int argc, char ** argv) { int flag; int rc = 0; int argerr = 0; crm_log_init(crm_system_name); G_main_add_SignalHandler( G_PRIORITY_HIGH, SIGTERM, cib_shutdown, NULL, NULL); cib_writer = G_main_add_tempproc_trigger( G_PRIORITY_LOW, write_cib_contents, "write_cib_contents", NULL, NULL, NULL, cib_diskwrite_complete); EnableProcLogging(); set_sigchld_proctrack(G_PRIORITY_HIGH); client_list = g_hash_table_new(g_str_hash, g_str_equal); ccm_membership = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, NULL); peer_hash = g_hash_table_new_full( g_str_hash, g_str_equal,g_hash_destroy_str, g_hash_destroy_str); while ((flag = getopt(argc, argv, OPTARGS)) != EOF) { switch(flag) { case 'V': alter_debug(DEBUG_INC); break; case 's': stand_alone = TRUE; cl_log_enable_stderr(1); break; case 'h': /* Help message */ usage(crm_system_name, LSB_EXIT_OK); break; case 'f': per_action_cib = TRUE; break; default: ++argerr; break; } } crm_info("Retrieval of a per-action CIB: %s", per_action_cib?"enabled":"disabled"); if (optind > argc) { ++argerr; } if (argerr) { usage(crm_system_name,LSB_EXIT_GENERIC); } /* read local config file */ - rc = init_start(); + rc = cib_init(); CRM_CHECK(g_hash_table_size(client_list) == 0, crm_err("Memory leak")); + cib_cleanup(); + + if(hb_conn) { + hb_conn->llc_ops->delete(hb_conn); + } + +#ifdef HA_MALLOC_TRACK + cl_malloc_dump_allocated(LOG_ERR, FALSE); +#endif + crm_info("Done"); + return rc; +} + +void +cib_cleanup(void) +{ g_hash_table_destroy(ccm_membership); g_hash_table_destroy(client_list); g_hash_table_destroy(peer_hash); crm_free(ccm_transition_id); crm_free(cib_our_uname); #if HAVE_LIBXML2 xmlCleanupParser(); -#endif - if(hb_conn) { - hb_conn->llc_ops->delete(hb_conn); - } - -#ifdef HA_MALLOC_TRACK - cl_malloc_dump_allocated(LOG_ERR, FALSE); #endif - return rc; + crm_free(channel1); + crm_free(channel2); + crm_free(channel3); + crm_free(channel4); + crm_free(channel5); } unsigned long cib_num_ops = 0; const char *cib_stat_interval = "10min"; unsigned long cib_num_local = 0, cib_num_updates = 0, cib_num_fail = 0; unsigned long cib_bad_connects = 0, cib_num_timeouts = 0; longclock_t cib_call_time = 0; gboolean cib_stats(gpointer data); gboolean cib_stats(gpointer data) { int local_log_level = LOG_DEBUG; static unsigned long last_stat = 0; unsigned int cib_calls_ms = 0; static unsigned long cib_stat_interval_ms = 0; if(cib_stat_interval_ms == 0) { cib_stat_interval_ms = crm_get_msec(cib_stat_interval); } cib_calls_ms = longclockto_ms(cib_call_time); if((cib_num_ops - last_stat) > 0) { unsigned long calls_diff = cib_num_ops - last_stat; double stat_1 = (1000*cib_calls_ms)/calls_diff; local_log_level = LOG_INFO; do_crm_log(local_log_level, "Processed %lu operations" " (%.2fus average, %lu%% utilization) in the last %s", calls_diff, stat_1, (100*cib_calls_ms)/cib_stat_interval_ms, cib_stat_interval); } do_crm_log(local_log_level+1, "\tDetail: %lu operations (%ums total)" " (%lu local, %lu updates, %lu failures," " %lu timeouts, %lu bad connects)", cib_num_ops, cib_calls_ms, cib_num_local, cib_num_updates, cib_num_fail, cib_bad_connects, cib_num_timeouts); #ifdef HA_MALLOC_TRACK cl_malloc_dump_allocated(LOG_DEBUG, TRUE); #endif last_stat = cib_num_ops; cib_call_time = 0; return TRUE; } int -init_start(void) +cib_init(void) { - char *channel1 = NULL; - char *channel2 = NULL; - char *channel3 = NULL; - char *channel4 = NULL; - char *channel5 = NULL; gboolean was_error = FALSE; if(stand_alone == FALSE) { hb_conn = ll_cluster_new("heartbeat"); if(cib_register_ha(hb_conn, CRM_SYSTEM_CIB) == FALSE) { crm_crit("Cannot sign in to heartbeat... terminating"); exit(1); } } if(startCib("cib.xml") == FALSE){ crm_crit("Cannot start CIB... terminating"); exit(1); } channel1 = crm_strdup(cib_channel_callback); was_error = init_server_ipc_comms( channel1, cib_client_connect_null, default_ipc_connection_destroy); channel2 = crm_strdup(cib_channel_ro); was_error = was_error || init_server_ipc_comms( channel2, cib_client_connect_rw_ro, default_ipc_connection_destroy); channel3 = crm_strdup(cib_channel_rw); was_error = was_error || init_server_ipc_comms( channel3, cib_client_connect_rw_ro, default_ipc_connection_destroy); channel4 = crm_strdup(cib_channel_rw_synchronous); was_error = was_error || init_server_ipc_comms( channel4, cib_client_connect_rw_synch, default_ipc_connection_destroy); channel5 = crm_strdup(cib_channel_ro_synchronous); was_error = was_error || init_server_ipc_comms( channel5, cib_client_connect_ro_synch, default_ipc_connection_destroy); if(stand_alone) { if(was_error) { crm_err("Couldnt start"); return 1; } cib_is_master = TRUE; /* Create the mainloop and run it... */ mainloop = g_main_new(FALSE); crm_info("Starting %s mainloop", crm_system_name); /* Gmain_timeout_add(crm_get_msec("10s"), cib_msg_timeout, NULL); */ /* Gmain_timeout_add( */ /* crm_get_msec(cib_stat_interval), cib_stats, NULL); */ g_main_run(mainloop); return_to_orig_privs(); - goto cleanup; + return 0; } if(was_error == FALSE) { crm_debug_3("Be informed of CRM Client Status changes"); if (HA_OK != hb_conn->llc_ops->set_cstatus_callback( hb_conn, cib_client_status_callback, hb_conn)) { crm_err("Cannot set cstatus callback: %s", hb_conn->llc_ops->errmsg(hb_conn)); was_error = TRUE; } else { crm_debug_3("Client Status callback set"); } } if(was_error == FALSE) { gboolean did_fail = TRUE; int num_ccm_fails = 0; int max_ccm_fails = 30; int ret; int cib_ev_fd; while(did_fail && was_error == FALSE) { did_fail = FALSE; crm_debug_3("Registering with CCM"); ret = oc_ev_register(&cib_ev_token); if (ret != 0) { crm_warn("CCM registration failed"); did_fail = TRUE; } if(did_fail == FALSE) { crm_debug_3("Setting up CCM callbacks"); ret = oc_ev_set_callback( cib_ev_token, OC_EV_MEMB_CLASS, cib_ccm_msg_callback, NULL); if (ret != 0) { crm_warn("CCM callback not set"); did_fail = TRUE; } } if(did_fail == FALSE) { oc_ev_special(cib_ev_token, OC_EV_MEMB_CLASS, 0); crm_debug_3("Activating CCM token"); ret = oc_ev_activate(cib_ev_token, &cib_ev_fd); if (ret != 0){ crm_warn("CCM Activation failed"); did_fail = TRUE; } } if(did_fail) { num_ccm_fails++; oc_ev_unregister(cib_ev_token); if(num_ccm_fails < max_ccm_fails){ crm_warn("CCM Connection failed" " %d times (%d max)", num_ccm_fails, max_ccm_fails); sleep(1); } else { crm_err("CCM Activation failed" " %d (max) times", num_ccm_fails); was_error = TRUE; } } } if(was_error == FALSE) { crm_debug_3("CCM Activation passed... all set to go!"); G_main_add_fd(G_PRIORITY_HIGH, cib_ev_fd, FALSE, cib_ccm_dispatch, cib_ev_token, default_ipc_connection_destroy); } } if(was_error == FALSE) { /* Async get client status information in the cluster */ crm_debug_3("Requesting an initial dump of CIB client_status"); hb_conn->llc_ops->client_status( hb_conn, NULL, CRM_SYSTEM_CIB, -1); /* Create the mainloop and run it... */ mainloop = g_main_new(FALSE); crm_info("Starting %s mainloop", crm_system_name); Gmain_timeout_add(crm_get_msec("10s"), cib_msg_timeout, NULL); Gmain_timeout_add( crm_get_msec(cib_stat_interval), cib_stats, NULL); g_main_run(mainloop); return_to_orig_privs(); } else { crm_err("Couldnt start all communication channels, exiting."); } - - cleanup: - crm_free(channel1); - crm_free(channel2); - crm_free(channel3); - crm_free(channel4); - crm_free(channel5); - + return 0; } void usage(const char* cmd, int exit_status) { FILE* stream; stream = exit_status ? stderr : stdout; fprintf(stream, "usage: %s [-srkh]" "[-c configure file]\n", cmd); /* fprintf(stream, "\t-d\tsets debug level\n"); */ /* fprintf(stream, "\t-s\tgets daemon status\n"); */ /* fprintf(stream, "\t-r\trestarts daemon\n"); */ /* fprintf(stream, "\t-k\tstops daemon\n"); */ /* fprintf(stream, "\t-h\thelp message\n"); */ fflush(stream); exit(exit_status); } gboolean cib_register_ha(ll_cluster_t *hb_cluster, const char *client_name) { const char *uname = NULL; crm_info("Signing in with Heartbeat"); if (hb_cluster->llc_ops->signon(hb_cluster, client_name)!= HA_OK) { crm_err("Cannot sign on with heartbeat: %s", hb_cluster->llc_ops->errmsg(hb_cluster)); return FALSE; } crm_debug_3("Be informed of CIB messages"); if (HA_OK != hb_cluster->llc_ops->set_msg_callback( hb_cluster, T_CIB, cib_peer_callback, hb_cluster)){ crm_err("Cannot set msg callback: %s", hb_cluster->llc_ops->errmsg(hb_cluster)); return FALSE; } crm_debug_3("Finding our node name"); if ((uname = hb_cluster->llc_ops->get_mynodeid(hb_cluster)) == NULL) { crm_err("get_mynodeid() failed"); return FALSE; } cib_our_uname = crm_strdup(uname); crm_info("FSA Hostname: %s", cib_our_uname); crm_debug_3("Adding channel to mainloop"); G_main_add_IPC_Channel( G_PRIORITY_DEFAULT, hb_cluster->llc_ops->ipcchan(hb_cluster), FALSE, cib_ha_dispatch, hb_cluster /* userdata */, cib_ha_connection_destroy); return TRUE; } void cib_ha_connection_destroy(gpointer user_data) { if(cib_shutdown_flag) { crm_info("Heartbeat disconnection complete... exiting"); } else { crm_err("Heartbeat connection lost! Exiting."); } uninitializeCib(); if (mainloop != NULL && g_main_is_running(mainloop)) { g_main_quit(mainloop); } else { exit(LSB_EXIT_OK); } } static void disconnect_cib_client(gpointer key, gpointer value, gpointer user_data) { cib_client_t *a_client = value; crm_debug_2("Processing client %s/%s... send=%d, recv=%d", a_client->name, a_client->channel_name, (int)a_client->channel->send_queue->current_qlen, (int)a_client->channel->recv_queue->current_qlen); if(a_client->channel->ch_status == IPC_CONNECT) { a_client->channel->ops->resume_io(a_client->channel); if(a_client->channel->send_queue->current_qlen != 0 || a_client->channel->recv_queue->current_qlen != 0) { crm_info("Flushed messages to/from %s/%s... send=%d, recv=%d", a_client->name, a_client->channel_name, (int)a_client->channel->send_queue->current_qlen, (int)a_client->channel->recv_queue->current_qlen); } } if(a_client->channel->ch_status == IPC_CONNECT) { crm_warn("Disconnecting %s/%s...", a_client->name, a_client->channel_name); a_client->channel->ops->disconnect(a_client->channel); } } extern gboolean cib_process_disconnect( IPC_Channel *channel, cib_client_t *cib_client); gboolean cib_shutdown(int nsig, gpointer unused) { if(cib_shutdown_flag == FALSE) { cib_shutdown_flag = TRUE; crm_debug("Disconnecting %d clients", g_hash_table_size(client_list)); g_hash_table_foreach(client_list, disconnect_cib_client, NULL); crm_info("Disconnected %d clients", g_hash_table_size(client_list)); cib_process_disconnect(NULL, NULL); } else { crm_info("Waiting for %d clients to disconnect...", g_hash_table_size(client_list)); } return TRUE; } gboolean startCib(const char *filename) { gboolean active = FALSE; crm_data_t *cib = readCibXmlFile(WORKING_DIR, filename, TRUE); CRM_ASSERT(cib != NULL); if(activateCibXml(cib, filename) == 0) { active = TRUE; crm_info("CIB Initialization completed successfully"); if(per_action_cib) { uninitializeCib(); } } return active; } diff --git a/crm/crmd/control.c b/crm/crmd/control.c index 9ab7370d2d..df2c62594c 100644 --- a/crm/crmd/control.c +++ b/crm/crmd/control.c @@ -1,895 +1,896 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include char *ipc_server = NULL; extern void crmd_ha_connection_destroy(gpointer user_data); extern gboolean verify_stopped(gboolean force, int log_level); gboolean crm_shutdown(int nsig, gpointer unused); gboolean register_with_ha(ll_cluster_t *hb_cluster, const char *client_name); void populate_cib_nodes(ll_cluster_t *hb_cluster, gboolean with_client_status); GHashTable *ipc_clients = NULL; GTRIGSource *fsa_source = NULL; /* A_HA_CONNECT */ enum crmd_fsa_input do_ha_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { gboolean registered = FALSE; if(action & A_HA_DISCONNECT) { if(fsa_cluster_conn != NULL) { set_bit_inplace(fsa_input_register, R_HA_DISCONNECTED); fsa_cluster_conn->llc_ops->signoff( fsa_cluster_conn, FALSE); - fsa_cluster_conn->llc_ops->delete(fsa_cluster_conn); - fsa_cluster_conn = NULL; } crm_info("Disconnected from Heartbeat"); } if(action & A_HA_CONNECT) { if(fsa_cluster_conn == NULL) { fsa_cluster_conn = ll_cluster_new("heartbeat"); } /* make sure we are disconnected first */ fsa_cluster_conn->llc_ops->signoff(fsa_cluster_conn, FALSE); registered = register_with_ha( fsa_cluster_conn, crm_system_name); if(registered == FALSE) { register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); return I_NULL; } clear_bit_inplace(fsa_input_register, R_HA_DISCONNECTED); crm_info("Connected to Heartbeat"); } if(action & ~(A_HA_CONNECT|A_HA_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } return I_NULL; } /* A_SHUTDOWN */ enum crmd_fsa_input do_shutdown(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { int lpc = 0; gboolean continue_shutdown = TRUE; struct crm_subsystem_s *subsystems[] = { pe_subsystem, te_subsystem }; /* just in case */ set_bit_inplace(fsa_input_register, R_SHUTDOWN); for(lpc = 0; lpc < DIMOF(subsystems); lpc++) { struct crm_subsystem_s *a_subsystem = subsystems[lpc]; if(is_set(fsa_input_register, a_subsystem->flag_connected)) { crm_info("Terminating the %s", a_subsystem->name); if(stop_subsystem(a_subsystem, TRUE) == FALSE) { /* its gone... */ crm_err("Faking %s exit", a_subsystem->name); clear_bit_inplace(fsa_input_register, a_subsystem->flag_connected); } continue_shutdown = FALSE; } } if(continue_shutdown == FALSE) { crm_info("Waiting for subsystems to exit"); crmd_fsa_stall(NULL); } else { register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); } return I_NULL; } /* A_SHUTDOWN_REQ */ enum crmd_fsa_input do_shutdown_req(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { HA_Message *msg = NULL; crm_info("Sending shutdown request to DC: %s", crm_str(fsa_our_dc)); msg = create_request( CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); /* set_bit_inplace(fsa_input_register, R_STAYDOWN); */ if(send_request(msg, NULL) == FALSE) { if(AM_I_DC) { crm_info("Processing shutdown locally"); } else { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } return I_NULL; } extern char *max_generation_from; extern crm_data_t *max_generation_xml; extern GHashTable *meta_hash; static void free_mem(fsa_data_t *msg_data) { int lpc = 0; + fsa_cluster_conn->llc_ops->delete(fsa_cluster_conn); + fsa_cluster_conn = NULL; + crm_debug("Stage %d", lpc++); while(is_message()) { fsa_data_t *fsa_data = get_message(); crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]", fsa_input2string(fsa_data->fsa_input), fsa_state2string(fsa_state), fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); delete_fsa_input(fsa_data); } delete_fsa_input(msg_data); crm_debug("Stage %d", lpc++); empty_uuid_cache(); crm_debug("Stage %d", lpc++); free_ccm_cache(fsa_membership_copy); crm_free(cib_subsystem); crm_debug("Stage %d", lpc++); crmd_ipc_connection_destroy(te_subsystem->client); crm_free(te_subsystem); crm_debug("Stage %d", lpc++); crmd_ipc_connection_destroy(pe_subsystem->client); crm_free(pe_subsystem); crm_debug("Stage %d", lpc++); if(integrated_nodes) { g_hash_table_destroy(integrated_nodes); } if(finalized_nodes) { g_hash_table_destroy(finalized_nodes); } if(confirmed_nodes) { g_hash_table_destroy(confirmed_nodes); } if(crmd_peer_state) { g_hash_table_destroy(crmd_peer_state); } if(meta_hash) { g_hash_table_destroy(meta_hash); } crm_debug("Stage %d", lpc++); if(ipc_clients) { g_hash_table_destroy(ipc_clients); } crm_debug("Stage %d", lpc++); cib_delete(fsa_cib_conn); fsa_cib_conn = NULL; if(fsa_lrm_conn) { fsa_lrm_conn->lrm_ops->delete(fsa_lrm_conn); } crm_debug("Stage %d", lpc++); crm_free(integration_timer); crm_free(finalization_timer); crm_free(election_trigger); crm_free(election_timeout); crm_free(shutdown_escalation_timer); crm_free(wait_timer); crm_free(recheck_timer); crm_debug("Stage %d", lpc++); crm_free(fsa_our_dc_version); crm_free(fsa_our_uuid); crm_free(fsa_our_dc); crm_free(ipc_server); crm_free(max_generation_from); free_xml(max_generation_xml); #ifdef HA_MALLOC_TRACK cl_malloc_dump_allocated(LOG_ERR, FALSE); #endif } /* A_EXIT_0, A_EXIT_1 */ enum crmd_fsa_input do_exit(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { int exit_code = 0; int log_level = LOG_INFO; const char *exit_type = "gracefully"; if(action & A_EXIT_1) { exit_code = 1; log_level = LOG_ERR; exit_type = "forcefully"; verify_stopped(TRUE, LOG_ERR); } do_crm_log(log_level, "Performing %s - %s exiting the CRMd", fsa_action2string(action), exit_type); if(is_set(fsa_input_register, R_IN_RECOVERY)) { crm_err("Could not recover from internal error"); exit_code = 2; } if(is_set(fsa_input_register, R_STAYDOWN)) { crm_warn("Inhibiting respawn by Heartbeat"); exit_code = 100; } free_mem(msg_data); crm_info("[%s] stopped (%d)", crm_system_name, exit_code); cl_flush_logs(); exit(exit_code); return I_NULL; } /* A_STARTUP */ enum crmd_fsa_input do_startup(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { int was_error = 0; int interval = 1; /* seconds between DC heartbeats */ crm_debug("Registering Signal Handlers"); G_main_add_SignalHandler( G_PRIORITY_HIGH, SIGTERM, crm_shutdown, NULL, NULL); fsa_source = G_main_add_TriggerHandler( G_PRIORITY_HIGH, crm_fsa_trigger, NULL, NULL); ipc_clients = g_hash_table_new(g_str_hash, g_str_equal); crm_debug("Creating CIB and LRM objects"); fsa_cib_conn = cib_new(); fsa_lrm_conn = ll_lrm_new(XML_CIB_TAG_LRM); crm_debug("Init server comms"); if(ipc_server == NULL) { ipc_server = crm_strdup(CRM_SYSTEM_CRMD); } was_error = init_server_ipc_comms(ipc_server, crmd_client_connect, default_ipc_connection_destroy); /* set up the timers */ crm_malloc0(integration_timer, sizeof(fsa_timer_t)); crm_malloc0(finalization_timer, sizeof(fsa_timer_t)); crm_malloc0(election_trigger, sizeof(fsa_timer_t)); crm_malloc0(election_timeout, sizeof(fsa_timer_t)); crm_malloc0(shutdown_escalation_timer, sizeof(fsa_timer_t)); crm_malloc0(wait_timer, sizeof(fsa_timer_t)); crm_malloc0(recheck_timer, sizeof(fsa_timer_t)); interval = interval * 1000; if(election_trigger != NULL) { election_trigger->source_id = 0; election_trigger->period_ms = -1; election_trigger->fsa_input = I_DC_TIMEOUT; election_trigger->callback = crm_timer_popped; election_trigger->repeat = FALSE; } else { was_error = TRUE; } if(election_timeout != NULL) { election_timeout->source_id = 0; election_timeout->period_ms = -1; election_timeout->fsa_input = I_ELECTION_DC; election_timeout->callback = crm_timer_popped; election_timeout->repeat = FALSE; } else { was_error = TRUE; } if(integration_timer != NULL) { integration_timer->source_id = 0; integration_timer->period_ms = -1; integration_timer->fsa_input = I_INTEGRATED; integration_timer->callback = crm_timer_popped; integration_timer->repeat = FALSE; } else { was_error = TRUE; } if(finalization_timer != NULL) { finalization_timer->source_id = 0; finalization_timer->period_ms = -1; finalization_timer->fsa_input = I_FINALIZED; finalization_timer->callback = crm_timer_popped; finalization_timer->repeat = FALSE; /* for possible enabling... a bug in the join protocol left * a slave in S_PENDING while we think its in S_NOT_DC * * raising I_FINALIZED put us into a transition loop which is * never resolved. * in this loop we continually send probes which the node * NACK's because its in S_PENDING * * if we have nodes where heartbeat is active but the * CRM is not... then this will be handled in the * integration phase */ finalization_timer->fsa_input = I_ELECTION; } else { was_error = TRUE; } if(shutdown_escalation_timer != NULL) { shutdown_escalation_timer->source_id = 0; shutdown_escalation_timer->period_ms = -1; shutdown_escalation_timer->fsa_input = I_STOP; shutdown_escalation_timer->callback = crm_timer_popped; shutdown_escalation_timer->repeat = FALSE; } else { was_error = TRUE; } if(wait_timer != NULL) { wait_timer->source_id = 0; wait_timer->period_ms = 500; wait_timer->fsa_input = I_NULL; wait_timer->callback = crm_timer_popped; wait_timer->repeat = FALSE; } else { was_error = TRUE; } if(recheck_timer != NULL) { recheck_timer->source_id = 0; recheck_timer->period_ms = -1; recheck_timer->fsa_input = I_PE_CALC; recheck_timer->callback = crm_timer_popped; recheck_timer->repeat = FALSE; } else { was_error = TRUE; } /* set up the sub systems */ crm_malloc0(cib_subsystem, sizeof(struct crm_subsystem_s)); crm_malloc0(te_subsystem, sizeof(struct crm_subsystem_s)); crm_malloc0(pe_subsystem, sizeof(struct crm_subsystem_s)); if(cib_subsystem != NULL) { cib_subsystem->pid = -1; cib_subsystem->path = BIN_DIR; cib_subsystem->name = CRM_SYSTEM_CIB; cib_subsystem->command = BIN_DIR"/"CRM_SYSTEM_CIB; cib_subsystem->args = "-VVc"; cib_subsystem->flag_connected = R_CIB_CONNECTED; cib_subsystem->flag_required = R_CIB_REQUIRED; } else { was_error = TRUE; } if(te_subsystem != NULL) { te_subsystem->pid = -1; te_subsystem->path = BIN_DIR; te_subsystem->name = CRM_SYSTEM_TENGINE; te_subsystem->command = BIN_DIR"/"CRM_SYSTEM_TENGINE; te_subsystem->args = NULL; te_subsystem->flag_connected = R_TE_CONNECTED; te_subsystem->flag_required = R_TE_REQUIRED; } else { was_error = TRUE; } if(pe_subsystem != NULL) { pe_subsystem->pid = -1; pe_subsystem->path = BIN_DIR; pe_subsystem->name = CRM_SYSTEM_PENGINE; pe_subsystem->command = BIN_DIR"/"CRM_SYSTEM_PENGINE; pe_subsystem->args = NULL; pe_subsystem->flag_connected = R_PE_CONNECTED; pe_subsystem->flag_required = R_PE_REQUIRED; } else { was_error = TRUE; } if(was_error) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } welcomed_nodes = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); integrated_nodes = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); finalized_nodes = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); confirmed_nodes = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); crmd_peer_state = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); set_sigchld_proctrack(G_PRIORITY_HIGH); return I_NULL; } /* A_STOP */ enum crmd_fsa_input do_stop(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { if(verify_stopped(FALSE, LOG_DEBUG) == FALSE) { crmd_fsa_stall(NULL); } return I_NULL; } /* A_STARTED */ enum crmd_fsa_input do_started(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { if(is_set(fsa_input_register, R_CCM_DATA) == FALSE) { crm_info("Delaying start, CCM (%.16llx) not connected", R_CCM_DATA); crmd_fsa_stall(NULL); return I_NULL; } else if(is_set(fsa_input_register, R_LRM_CONNECTED) == FALSE) { crm_info("Delaying start, LRM (%.16llx) not connected", R_LRM_CONNECTED); crmd_fsa_stall(NULL); return I_NULL; } else if(is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) { crm_info("Delaying start, CIB (%.16llx) not connected", R_CIB_CONNECTED); crmd_fsa_stall(NULL); return I_NULL; } else if(is_set(fsa_input_register, R_READ_CONFIG) == FALSE) { crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG); crmd_fsa_stall(NULL); return I_NULL; } else if(is_set(fsa_input_register, R_PEER_DATA) == FALSE) { HA_Message * msg = NULL; /* try reading from HA */ crm_info("Delaying start, Peer data (%.16llx) not recieved", R_PEER_DATA); crm_debug_3("Looking for a HA message"); msg = fsa_cluster_conn->llc_ops->readmsg(fsa_cluster_conn, 0); if(msg != NULL) { crm_debug_3("There was a HA message"); crm_msg_del(msg); } crm_timer_start(wait_timer); crmd_fsa_stall(NULL); return I_NULL; } crm_info("The local CRM is operational"); clear_bit_inplace(fsa_input_register, R_STARTING); register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL); return I_NULL; } /* A_RECOVER */ enum crmd_fsa_input do_recover(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { set_bit_inplace(fsa_input_register, R_IN_RECOVERY); crm_err("Action %s (%.16llx) not supported", fsa_action2string(action), action); register_fsa_input(C_FSA_INTERNAL, I_STOP, NULL); return I_NULL; } pe_cluster_option crmd_opts[] = { /* name, old-name, validate, default, description */ { XML_CONFIG_ATTR_DC_DEADTIME, NULL, "time", NULL, "10s", &check_time, "How long to wait for a response from other nodes during startup.", "The \"correct\" value will depend on the speed and load of your network." }, { XML_CONFIG_ATTR_RECHECK, NULL, "time", "Zero disables polling. Positive values are an interval in seconds (unless other SI units are specified. eg. 5min)", "0", &check_timer, "Polling interval for time based changes to options, resource parameters and constraints.", "The Cluster is primarily event driven, however the configuration can have elements that change based on time. To ensure these changes take effect, we can optionally poll the cluster's status for changes." }, { XML_CONFIG_ATTR_ELECTION_FAIL, NULL, "time", NULL, "2min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." }, { XML_CONFIG_ATTR_FORCE_QUIT, NULL, "time", NULL, "20min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." }, { "crmd-integration-timeout", NULL, "time", NULL, "3min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." }, { "crmd-finalization-timeout", NULL, "time", NULL, "10min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." }, }; void crmd_metadata(void) { config_metadata("CRM Daemon", "1.0", "CRM Daemon Options", "This is a fake resource that details the options that can be configured for the CRM Daemon.", crmd_opts, DIMOF(crmd_opts)); } static void verify_crmd_options(GHashTable *options) { verify_all_options(options, crmd_opts, DIMOF(crmd_opts)); } static const char * crmd_pref(GHashTable *options, const char *name) { return get_cluster_pref(options, crmd_opts, DIMOF(crmd_opts), name); } static void config_query_callback(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data) { const char *value = NULL; GHashTable *config_hash = NULL; if(rc != cib_ok) { fsa_data_t *msg_data = NULL; crm_err("Local CIB query resulted in an error: %s", cib_error2string(rc)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); if(rc == cib_bad_permissions || rc == cib_bad_digest || rc == cib_bad_config) { crm_err("The cluster is mis-configured - shutting down and staying down"); set_bit_inplace(fsa_input_register, R_STAYDOWN); } return; } crm_debug("Call %d : Parsing CIB options", call_id); config_hash = g_hash_table_new_full( g_str_hash,g_str_equal, g_hash_destroy_str,g_hash_destroy_str); unpack_instance_attributes( output, XML_CIB_TAG_PROPSET, NULL, config_hash, CIB_OPTIONS_FIRST, NULL); value = g_hash_table_lookup(config_hash, XML_CONFIG_ATTR_DC_DEADTIME); if(value == NULL) { /* apparently we're not allowed to free the result of getenv */ char *param_val = getenv(ENV_PREFIX "" KEY_INITDEAD); value = crmd_pref(config_hash, XML_CONFIG_ATTR_DC_DEADTIME); if(param_val != NULL) { int from_env = crm_get_msec(param_val) / 2; int from_defaults = crm_get_msec(value); if(from_env > from_defaults) { g_hash_table_replace( config_hash, crm_strdup(XML_CONFIG_ATTR_DC_DEADTIME), crm_strdup(param_val)); } } } verify_crmd_options(config_hash); value = crmd_pref(config_hash, XML_CONFIG_ATTR_DC_DEADTIME); election_trigger->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, XML_CONFIG_ATTR_FORCE_QUIT); shutdown_escalation_timer->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, XML_CONFIG_ATTR_ELECTION_FAIL); election_timeout->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, XML_CONFIG_ATTR_RECHECK); recheck_timer->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, "crmd-integration-timeout"); integration_timer->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, "crmd-finalization-timeout"); finalization_timer->period_ms = crm_get_msec(value); set_bit_inplace(fsa_input_register, R_READ_CONFIG); crm_debug_3("Triggering FSA: %s", __FUNCTION__); G_main_set_trigger(fsa_source); g_hash_table_destroy(config_hash); } /* A_READCONFIG */ enum crmd_fsa_input do_read_config(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { int call_id = fsa_cib_conn->cmds->query( fsa_cib_conn, XML_CIB_TAG_CRMCONFIG, NULL, cib_scope_local); add_cib_op_callback(call_id, FALSE, NULL, config_query_callback); crm_debug_2("Querying the CIB... call %d", call_id); return I_NULL; } gboolean crm_shutdown(int nsig, gpointer unused) { if (crmd_mainloop != NULL && g_main_is_running(crmd_mainloop)) { if(is_set(fsa_input_register, R_SHUTDOWN)) { crm_err("Escalating the shutdown"); register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL); } else { crm_info("Requesting shutdown"); set_bit_inplace(fsa_input_register, R_SHUTDOWN); register_fsa_input(C_SHUTDOWN,I_SHUTDOWN,NULL); if(shutdown_escalation_timer->period_ms < 1) { GHashTable *config_hash = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); const char *value = crmd_pref( config_hash, XML_CONFIG_ATTR_FORCE_QUIT); int msec = crm_atoi(value, NULL); crm_info("Using default shutdown escalation: %dms", msec); shutdown_escalation_timer->period_ms = msec; g_hash_table_destroy(config_hash); } /* cant rely on this... */ crm_timer_start(shutdown_escalation_timer); } } else { crm_info("exit from shutdown"); exit(LSB_EXIT_OK); } return TRUE; } static void default_cib_update_callback(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data) { if(rc != cib_ok) { fsa_data_t *msg_data = NULL; crm_err("CIB Update failed: %s", cib_error2string(rc)); crm_log_xml_warn(output, "update:failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } void populate_cib_nodes(ll_cluster_t *hb_cluster, gboolean with_client_status) { int call_id = 0; const char *ha_node = NULL; crm_data_t *cib_node_list = NULL; /* Async get client status information in the cluster */ crm_debug_2("Invoked"); if(with_client_status) { crm_debug_3("Requesting an initial dump of CRMD client_status"); fsa_cluster_conn->llc_ops->client_status( fsa_cluster_conn, NULL, CRM_SYSTEM_CRMD, -1); } crm_info("Requesting the list of configured nodes"); fsa_cluster_conn->llc_ops->init_nodewalk(fsa_cluster_conn); cib_node_list = create_xml_node(NULL, XML_CIB_TAG_NODES); do { const char *ha_node_type = NULL; const char *ha_node_uuid = NULL; crm_data_t *cib_new_node = NULL; ha_node = fsa_cluster_conn->llc_ops->nextnode(fsa_cluster_conn); if(ha_node == NULL) { continue; } ha_node_type = fsa_cluster_conn->llc_ops->node_type( fsa_cluster_conn, ha_node); if(safe_str_neq(NORMALNODE, ha_node_type)) { crm_debug("Node %s: skipping '%s'", ha_node, ha_node_type); continue; } ha_node_uuid = get_uuid(fsa_cluster_conn, ha_node); if(ha_node_uuid == NULL) { crm_warn("Node %s: no uuid found", ha_node); continue; } crm_notice("Node: %s (uuid: %s)", ha_node, ha_node_uuid); cib_new_node = create_xml_node(cib_node_list, XML_CIB_TAG_NODE); crm_xml_add(cib_new_node, XML_ATTR_ID, ha_node_uuid); crm_xml_add(cib_new_node, XML_ATTR_UNAME, ha_node); crm_xml_add(cib_new_node, XML_ATTR_TYPE, ha_node_type); } while(ha_node != NULL); fsa_cluster_conn->llc_ops->end_nodewalk(fsa_cluster_conn); /* Now update the CIB with the list of nodes */ fsa_cib_update( XML_CIB_TAG_NODES, cib_node_list, cib_scope_local|cib_quorum_override|cib_inhibit_bcast, call_id); add_cib_op_callback(call_id, FALSE, NULL, default_cib_update_callback); free_xml(cib_node_list); crm_debug_2("Complete"); } gboolean register_with_ha(ll_cluster_t *hb_cluster, const char *client_name) { const char *const_uuid = NULL; crm_debug("Signing in with Heartbeat"); if (hb_cluster->llc_ops->signon(hb_cluster, client_name)!= HA_OK) { crm_err("Cannot sign on with heartbeat: %s", hb_cluster->llc_ops->errmsg(hb_cluster)); return FALSE; } crm_debug_3("Be informed of CRM messages"); if (HA_OK != hb_cluster->llc_ops->set_msg_callback( hb_cluster, T_CRM, crmd_ha_msg_callback, hb_cluster)){ crm_err("Cannot set msg callback: %s", hb_cluster->llc_ops->errmsg(hb_cluster)); return FALSE; } crm_debug_3("Be informed of Node Status changes"); if (HA_OK != hb_cluster->llc_ops->set_nstatus_callback( hb_cluster, crmd_ha_status_callback, hb_cluster)){ crm_err("Cannot set nstatus callback: %s", hb_cluster->llc_ops->errmsg(hb_cluster)); return FALSE; } crm_debug_3("Be informed of CRM Client Status changes"); if (HA_OK != hb_cluster->llc_ops->set_cstatus_callback( hb_cluster, crmd_client_status_callback, hb_cluster)) { crm_err("Cannot set cstatus callback: %s", hb_cluster->llc_ops->errmsg(hb_cluster)); return FALSE; } crm_debug_3("Adding channel to mainloop"); G_main_add_ll_cluster( G_PRIORITY_HIGH, hb_cluster, FALSE, crmd_ha_msg_dispatch, hb_cluster /* userdata */, crmd_ha_connection_destroy); crm_debug_3("Finding our node name"); if ((fsa_our_uname = hb_cluster->llc_ops->get_mynodeid(hb_cluster)) == NULL) { crm_err("get_mynodeid() failed"); return FALSE; } crm_info("Hostname: %s", fsa_our_uname); crm_debug_3("Finding our node uuid"); const_uuid = get_uuid(fsa_cluster_conn, fsa_our_uname); if(const_uuid == NULL) { crm_err("get_uuid_by_name() failed"); return FALSE; } /* copy it so that unget_uuid() doesn't trash the value on us */ fsa_our_uuid = crm_strdup(const_uuid); crm_info("UUID: %s", fsa_our_uuid); populate_cib_nodes(hb_cluster, TRUE); return TRUE; } diff --git a/crm/crmd/main.c b/crm/crmd/main.c index d84468e676..703b0e7748 100644 --- a/crm/crmd/main.c +++ b/crm/crmd/main.c @@ -1,211 +1,211 @@ /* $Id: main.c,v 1.21 2006/08/14 09:06:31 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include const char* crm_system_name = SYS_NAME; #define OPTARGS "hV" void usage(const char* cmd, int exit_status); -int init_start(void); +int crmd_init(void); void crmd_hamsg_callback(const HA_Message * msg, void* private_data); gboolean crmd_tickle_apphb(gpointer data); extern void init_dotfile(void); GMainLoop* crmd_mainloop = NULL; int main(int argc, char ** argv) { int flag; int argerr = 0; crm_log_init(crm_system_name); crm_info("CRM Hg Version: %s\n", HA_HG_VERSION); while ((flag = getopt(argc, argv, OPTARGS)) != EOF) { switch(flag) { case 'V': cl_log_enable_stderr(1); alter_debug(DEBUG_INC); break; case 'h': /* Help message */ usage(crm_system_name, LSB_EXIT_OK); break; default: ++argerr; break; } } if(argc - optind == 1 && safe_str_eq("metadata", argv[optind])) { crmd_metadata(); return 0; } else if(argc - optind == 1 && safe_str_eq("version", argv[optind])) { fprintf(stderr, "CRM Version: "); fprintf(stdout, "%s (%s)\n", VERSION, HA_HG_VERSION); return 0; } if (optind > argc) { ++argerr; } if (argerr) { usage(crm_system_name,LSB_EXIT_GENERIC); } /* read local config file */ crm_debug_3("Enabling coredumps"); if(cl_enable_coredumps(1) != 0) { crm_warn("Cannot enable coredumps"); } if(crm_is_writable(HA_VARLIBDIR"/heartbeat/pengine", NULL, HA_CCMUSER, HA_APIGROUP, FALSE) == FALSE) { fprintf(stderr,"ERROR: Bad permissions on " HA_VARLIBDIR"/heartbeat/pengine... See logs for details\n"); fflush(stderr); return 100; } - return init_start(); + return crmd_init(); } int -init_start(void) +crmd_init(void) { int exit_code = 0; enum crmd_fsa_state state; fsa_state = S_STARTING; fsa_input_register = 0; /* zero out the regester */ init_dotfile(); crm_info("Starting %s", crm_system_name); register_fsa_input(C_STARTUP, I_STARTUP, NULL); state = s_crmd_fsa(C_STARTUP); if (state == S_PENDING || state == S_STARTING) { /* Create the mainloop and run it... */ crmd_mainloop = g_main_new(FALSE); crm_info("Starting %s's mainloop", crm_system_name); #ifdef REALTIME_SUPPORT static int crm_realtime = 1; if (crm_realtime == 1){ cl_enable_realtime(); }else if (crm_realtime == 0){ cl_disable_realtime(); } cl_make_realtime(SCHED_RR, 5, 64, 64); #endif g_main_run(crmd_mainloop); return_to_orig_privs(); if(is_set(fsa_input_register, R_STAYDOWN)) { crm_info("Inhibiting respawn by Heartbeat"); exit_code = 100; } } else { crm_err("Startup of %s failed. Current state: %s", crm_system_name, fsa_state2string(state)); exit_code = 1; } crm_info("[%s] stopped (%d)", crm_system_name, exit_code); #ifdef HA_MALLOC_TRACK cl_malloc_dump_allocated(LOG_ERR, FALSE); #endif return exit_code; } void usage(const char* cmd, int exit_status) { FILE* stream; stream = exit_status ? stderr : stdout; fprintf(stream, "usage: %s [-V] [-h|version|metadata]\n", cmd); fprintf(stream, "\t-h\t: this help message\n"); fprintf(stream, "\t-V\t: increase verbosity\n"); fprintf(stream, "\tmetadata\t: show configurable crmd options\n"); fprintf(stream, "\tversion\t\t: show version information and quit\n"); fflush(stream); exit(exit_status); } gboolean crmd_tickle_apphb(gpointer data) { char app_instance[APPNAME_LEN]; int rc = 0; sprintf(app_instance, "%s_%ld", crm_system_name, (long)getpid()); rc = apphb_hb(); if (rc < 0) { cl_perror("%s apphb_hb failure", app_instance); exit(3); } return TRUE; } diff --git a/crm/crmd/subsystems.c b/crm/crmd/subsystems.c index 6b32af71bf..02cee2fc97 100644 --- a/crm/crmd/subsystems.c +++ b/crm/crmd/subsystems.c @@ -1,235 +1,238 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include /* for access */ #include #include #include #include /* for calls to open */ #include /* for calls to open */ #include /* for calls to open */ #include /* for getpwuid */ #include /* for initgroups */ #include /* for getrlimit */ #include /* for getrlimit */ #include #include #include #include #include #include #include #include static void crmdManagedChildRegistered(ProcTrack* p) { struct crm_subsystem_s *the_subsystem = p->privatedata; the_subsystem->pid = p->pid; } static void crmdManagedChildDied( ProcTrack* p, int status, int signo, int exitcode, int waslogged) { struct crm_subsystem_s *the_subsystem = p->privatedata; crm_info("Process %s:[%d] exited (signal=%d, exitcode=%d)", the_subsystem->name, the_subsystem->pid, signo, exitcode); the_subsystem->pid = -1; the_subsystem->ipc = NULL; clear_bit_inplace(fsa_input_register, the_subsystem->flag_connected); crm_debug_3("Triggering FSA: %s", __FUNCTION__); G_main_set_trigger(fsa_source); if(is_set(fsa_input_register, the_subsystem->flag_required)) { /* this wasnt supposed to happen */ crm_err("The %s subsystem terminated unexpectedly", the_subsystem->name); register_fsa_input_before(C_IPC_MESSAGE, I_ERROR, NULL); } p->privatedata = NULL; } static const char * crmdManagedChildName(ProcTrack* p) { struct crm_subsystem_s *the_subsystem = p->privatedata; return the_subsystem->name; } static ProcTrack_ops crmd_managed_child_ops = { crmdManagedChildDied, crmdManagedChildRegistered, crmdManagedChildName }; gboolean stop_subsystem(struct crm_subsystem_s *the_subsystem, gboolean force_quit) { int quit_signal = SIGTERM; crm_debug_2("Stopping sub-system \"%s\"", the_subsystem->name); clear_bit_inplace(fsa_input_register, the_subsystem->flag_required); if (the_subsystem->pid <= 0) { crm_debug_2("Client %s not running", the_subsystem->name); return FALSE; } if(is_set(fsa_input_register, the_subsystem->flag_connected) == FALSE) { /* running but not yet connected */ crm_debug("Stopping %s before it had connected", the_subsystem->name); } /* if(force_quit && the_subsystem->sent_kill == FALSE) { quit_signal = SIGKILL; } else if(force_quit) { crm_debug("Already sent -KILL to %s: [%d]", the_subsystem->name, the_subsystem->pid); } */ errno = 0; if(CL_KILL(the_subsystem->pid, quit_signal) == 0) { crm_info("Sent -TERM to %s: [%d]", the_subsystem->name, the_subsystem->pid); the_subsystem->sent_kill = TRUE; } else { cl_perror("Sent -TERM to %s: [%d]", the_subsystem->name, the_subsystem->pid); } return TRUE; } gboolean start_subsystem(struct crm_subsystem_s* the_subsystem) { pid_t pid; struct stat buf; int s_res; unsigned int j; struct rlimit oflimits; const char *devnull = "/dev/null"; - char *args = NULL; crm_info("Starting sub-system \"%s\"", the_subsystem->name); set_bit_inplace(fsa_input_register, the_subsystem->flag_required); if (the_subsystem->pid > 0) { crm_warn("Client %s already running as pid %d", the_subsystem->name, (int) the_subsystem->pid); /* starting a started X is not an error */ return TRUE; } /* * We want to ensure that the exec will succeed before * we bother forking. */ if (access(the_subsystem->path, F_OK|X_OK) != 0) { cl_perror("Cannot (access) exec %s", the_subsystem->path); return FALSE; } s_res = stat(the_subsystem->command, &buf); if(s_res != 0) { cl_perror("Cannot (stat) exec %s", the_subsystem->command); return FALSE; } /* We need to fork so we can make child procs not real time */ switch(pid=fork()) { case -1: crm_err("Cannot fork."); return FALSE; default: /* Parent */ NewTrackedProc(pid, 0, PT_LOGNORMAL, the_subsystem, &crmd_managed_child_ops); crm_debug_2("Client %s is has pid: %d", the_subsystem->name, pid); the_subsystem->pid = pid; return TRUE; case 0: /* Child */ /* create a new process group to avoid * being interupted by heartbeat */ setpgid(0, 0); break; } - crm_debug("Executing \"%s %s\" (pid %d)", - the_subsystem->command, the_subsystem->args, (int) getpid()); + crm_debug("Executing \"%s (%s)\" (pid %d)", + the_subsystem->command, the_subsystem->name, (int) getpid()); /* A precautionary measure */ getrlimit(RLIMIT_NOFILE, &oflimits); for (j=0; j < oflimits.rlim_cur; ++j) { close(j); } (void)open(devnull, O_RDONLY); /* Stdin: fd 0 */ (void)open(devnull, O_WRONLY); /* Stdout: fd 1 */ (void)open(devnull, O_WRONLY); /* Stderr: fd 2 */ - if(the_subsystem->args != NULL) { - args = crm_strdup(the_subsystem->args); - } { - char* const start_args[] = { - crm_strdup(the_subsystem->command), - args, - NULL +#if WITH_VALGRIND + char *opts[] = { crm_strdup(VALGRIND_BIN), + crm_strdup("--show-reachable=yes"), + crm_strdup("--leak-check=full"), + crm_strdup("--time-stamp=yes"), + crm_strdup("--gen-suppressions=all"), + crm_strdup(VALGRIND_LOG), + crm_strdup(the_subsystem->command), + NULL }; - - (void)execvp(the_subsystem->command, start_args); + (void)execvp(VALGRIND_BIN, opts); +#else + char *opts[] = { crm_strdup(the_subsystem->command), NULL }; + (void)execvp(the_subsystem->command, opts); +#endif } /* Should not happen */ - cl_perror("FATAL: Cannot exec %s %s", - the_subsystem->command, crm_str(the_subsystem->args)); + cl_perror("FATAL: Cannot exec %s", the_subsystem->command); exit(100); /* Suppress respawning */ return TRUE; /* never reached */ } diff --git a/crm/pengine/main.c b/crm/pengine/main.c index 8440096015..3a6eb97159 100644 --- a/crm/pengine/main.c +++ b/crm/pengine/main.c @@ -1,174 +1,174 @@ /* $Id: main.c,v 1.22 2006/08/14 09:06:31 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define SYS_NAME CRM_SYSTEM_PENGINE #define OPTARGS "hVc" GMainLoop* mainloop = NULL; const char* crm_system_name = SYS_NAME; void usage(const char* cmd, int exit_status); -int init_start(void); +int pe_init(void); gboolean pengine_shutdown(int nsig, gpointer unused); extern gboolean process_pe_message(crm_data_t * msg, IPC_Channel *sender); extern unsigned int pengine_input_loglevel; int main(int argc, char ** argv) { int flag; int argerr = 0; char *param_val = NULL; gboolean allow_cores = TRUE; const char *param_name = NULL; crm_log_init(crm_system_name); G_main_add_SignalHandler( G_PRIORITY_HIGH, SIGTERM, pengine_shutdown, NULL, NULL); while ((flag = getopt(argc, argv, OPTARGS)) != EOF) { switch(flag) { case 'V': alter_debug(DEBUG_INC); break; case 'h': /* Help message */ usage(crm_system_name, LSB_EXIT_OK); break; case 'c': allow_cores = TRUE; break; default: ++argerr; break; } } if(argc - optind == 1 && safe_str_eq("metadata", argv[optind])) { pe_metadata(); return 0; } if (optind > argc) { ++argerr; } if (argerr) { usage(crm_system_name,LSB_EXIT_GENERIC); } param_name = ENV_PREFIX "" KEY_LOG_PENGINE_INPUTS; param_val = getenv(param_name); crm_debug("%s = %s", param_name, param_val); pengine_input_loglevel = crm_log_level; if(param_val != NULL) { int do_log = 0; cl_str_to_boolean(param_val, &do_log); if(do_log == FALSE) { pengine_input_loglevel = crm_log_level + 1; } param_val = NULL; } /* read local config file */ crm_debug_4("do start"); - return init_start(); + return pe_init(); } int -init_start(void) +pe_init(void) { IPC_Channel *crm_ch = NULL; crm_debug_4("initialize comms"); init_client_ipc_comms( CRM_SYSTEM_CRMD, subsystem_msg_dispatch, (void*)process_pe_message, &crm_ch); if(crm_ch != NULL) { crm_debug_4("sending hello message"); send_hello_message( crm_ch, "1234", CRM_SYSTEM_PENGINE, "0", "1"); /* Create the mainloop and run it... */ crm_info("Starting %s", crm_system_name); mainloop = g_main_new(FALSE); g_main_run(mainloop); return_to_orig_privs(); crm_info("Exiting %s", crm_system_name); return 0; } crm_err("Could not connect to the CRMd"); return 1; } void usage(const char* cmd, int exit_status) { FILE* stream; stream = exit_status ? stderr : stdout; fprintf(stream, "usage: %s [-srkh]" "[-c configure file]\n", cmd); /* fprintf(stream, "\t-d\tsets debug level\n"); */ /* fprintf(stream, "\t-s\tgets daemon status\n"); */ /* fprintf(stream, "\t-r\trestarts daemon\n"); */ /* fprintf(stream, "\t-k\tstops daemon\n"); */ /* fprintf(stream, "\t-h\thelp message\n"); */ fflush(stream); exit(exit_status); } gboolean pengine_shutdown(int nsig, gpointer unused) { crm_info("Exiting PEngine (SIGTERM)"); exit(LSB_EXIT_OK); } diff --git a/crm/tengine/main.c b/crm/tengine/main.c index 669fa5689f..840515f409 100644 --- a/crm/tengine/main.c +++ b/crm/tengine/main.c @@ -1,261 +1,261 @@ /* $Id: main.c,v 1.32 2006/02/27 09:55:57 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define SYS_NAME CRM_SYSTEM_TENGINE #define OPTARGS "hVc" GMainLoop* mainloop = NULL; const char* crm_system_name = SYS_NAME; cib_t *te_cib_conn = NULL; extern GTRIGSource *transition_trigger; extern crm_action_timer_t *transition_timer; void usage(const char* cmd, int exit_status); -int init_start(void); +int te_init(void); gboolean tengine_shutdown(int nsig, gpointer unused); extern void te_update_confirm(const char *event, HA_Message *msg); extern void te_update_diff(const char *event, HA_Message *msg); extern crm_graph_functions_t te_graph_fns; int main(int argc, char ** argv) { int flag; int rc = 0; int argerr = 0; gboolean allow_cores = TRUE; crm_log_init(crm_system_name); G_main_add_SignalHandler( G_PRIORITY_HIGH, SIGTERM, tengine_shutdown, NULL, NULL); transition_trigger = G_main_add_TriggerHandler( G_PRIORITY_LOW, te_graph_trigger, NULL, NULL); crm_debug_3("Begining option processing"); while ((flag = getopt(argc, argv, OPTARGS)) != EOF) { switch(flag) { case 'V': alter_debug(DEBUG_INC); break; case 'h': /* Help message */ usage(crm_system_name, LSB_EXIT_OK); break; case 'c': allow_cores = TRUE; break; default: ++argerr; break; } } crm_debug_3("Option processing complete"); if (optind > argc) { ++argerr; } if (argerr) { usage(crm_system_name,LSB_EXIT_GENERIC); } /* read local config file */ crm_debug_3("Starting..."); - rc = init_start(); + rc = te_init(); return rc; } int -init_start(void) +te_init(void) { int init_ok = TRUE; init_client_ipc_comms( CRM_SYSTEM_CRMD, subsystem_msg_dispatch, (void*)process_te_message, &crm_ch); if(crm_ch != NULL) { send_hello_message(crm_ch, "1234", CRM_SYSTEM_TENGINE, "0", "1"); } else { init_ok = FALSE; crm_err("Could not connect to the CRMd"); } if(init_ok) { crm_debug_4("Creating CIB connection"); te_cib_conn = cib_new(); if(te_cib_conn == NULL) { init_ok = FALSE; } } if(init_ok) { crm_debug_4("Connecting to the CIB"); if(cib_ok != te_cib_conn->cmds->signon( te_cib_conn, crm_system_name, cib_command)) { crm_err("Could not connect to the CIB"); init_ok = FALSE; } } if(init_ok) { crm_debug_4("Setting CIB notification callback"); if(cib_ok != te_cib_conn->cmds->add_notify_callback( te_cib_conn, T_CIB_DIFF_NOTIFY, te_update_diff)) { crm_err("Could not set CIB notification callback"); init_ok = FALSE; } } if(init_ok && ST_OK != stonithd_signon(crm_system_name)) { crm_err("Could not sign up to stonithd"); /* init_ok = FALSE; */ } if(init_ok && ST_OK != stonithd_set_stonith_ops_callback( tengine_stonith_callback)) { crm_err("Could not set stonith callback"); stonithd_signoff(); /* init_ok = FALSE; */ } if(init_ok) { IPC_Channel *fence_ch = stonithd_input_IPC_channel(); if(fence_ch == NULL) { } else if(NULL == G_main_add_IPC_Channel( G_PRIORITY_LOW, fence_ch, FALSE, tengine_stonith_dispatch, NULL, tengine_stonith_connection_destroy)) { crm_err("Failed to add Fencing channel to our mainloop"); init_ok = FALSE; } } if(init_ok) { cl_uuid_t new_uuid; char uuid_str[UU_UNPARSE_SIZEOF]; cl_uuid_generate(&new_uuid); cl_uuid_unparse(&new_uuid, uuid_str); te_uuid = crm_strdup(uuid_str); crm_info("Registering TE UUID: %s", te_uuid); set_graph_functions(&te_graph_fns); /* create a blank one */ transition_graph = unpack_graph(NULL); transition_graph->complete = TRUE; transition_graph->abort_reason = "DC Takeover"; transition_graph->completion_action = tg_restart; crm_malloc0(transition_timer, sizeof(crm_action_timer_t)); transition_timer->source_id = 0; transition_timer->reason = timeout_abort; transition_timer->action = NULL; } if(init_ok) { /* Create the mainloop and run it... */ crm_info("Starting %s", crm_system_name); mainloop = g_main_new(FALSE); g_main_run(mainloop); return_to_orig_privs(); crm_info("Exiting %s", crm_system_name); } else { crm_warn("Initialization errors, %s not starting.", crm_system_name); } destroy_graph(transition_graph); crm_free(transition_timer); te_cib_conn->cmds->signoff(te_cib_conn); cib_delete(te_cib_conn); te_cib_conn = NULL; stonithd_signoff(); crm_free(te_uuid); if(init_ok) { return 0; } return 1; } void usage(const char* cmd, int exit_status) { FILE* stream; stream = exit_status ? stderr : stdout; fprintf(stream, "usage: %s [-srkh]" "[-c configure file]\n", cmd); /* fprintf(stream, "\t-d\tsets debug level\n"); */ /* fprintf(stream, "\t-s\tgets daemon status\n"); */ /* fprintf(stream, "\t-r\trestarts daemon\n"); */ /* fprintf(stream, "\t-k\tstops daemon\n"); */ /* fprintf(stream, "\t-h\thelp message\n"); */ fflush(stream); exit(exit_status); } gboolean shuttingdown; gboolean tengine_shutdown(int nsig, gpointer unused) { shuttingdown = TRUE; abort_transition(INFINITY, tg_shutdown, "Shutdown", NULL); return TRUE; }