diff --git a/crm/cib/io.c b/crm/cib/io.c index 1b20df0595..bac65c20f7 100644 --- a/crm/cib/io.c +++ b/crm/cib/io.c @@ -1,774 +1,774 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include const char * local_resource_path[] = { XML_CIB_TAG_STATUS, }; const char * resource_path[] = { XML_CIB_TAG_RESOURCES, }; const char * node_path[] = { XML_CIB_TAG_NODES, }; const char * constraint_path[] = { XML_CIB_TAG_CONSTRAINTS, }; gboolean initialized = FALSE; crm_data_t *the_cib = NULL; crm_data_t *node_search = NULL; crm_data_t *resource_search = NULL; crm_data_t *constraint_search = NULL; crm_data_t *status_search = NULL; extern gboolean cib_writes_enabled; extern char *ccm_transition_id; extern gboolean cib_have_quorum; extern GHashTable *peer_hash; extern GHashTable *ccm_membership; extern GTRIGSource *cib_writer; extern enum cib_errors cib_status; int set_connected_peers(crm_data_t *xml_obj); void GHFunc_count_peers(gpointer key, gpointer value, gpointer user_data); int write_cib_contents(gpointer p); extern void cib_cleanup(void); static gboolean validate_cib_digest(crm_data_t *local_cib) { int s_res = -1; struct stat buf; char *digest = NULL; char *expected = NULL; gboolean passed = FALSE; FILE *expected_strm = NULL; int start = 0, length = 0, read_len = 0; s_res = stat(CIB_FILENAME ".sig", &buf); if (s_res != 0) { crm_warn("No on-disk digest present"); return TRUE; } if(local_cib != NULL) { digest = calculate_xml_digest(local_cib, FALSE); } expected_strm = fopen(CIB_FILENAME ".sig", "r"); start = ftell(expected_strm); fseek(expected_strm, 0L, SEEK_END); length = ftell(expected_strm); fseek(expected_strm, 0L, start); CRM_ASSERT(start == ftell(expected_strm)); crm_debug_3("Reading %d bytes from file", length); crm_malloc0(expected, (length+1)); read_len = fread(expected, 1, length, expected_strm); CRM_ASSERT(read_len == length); fclose(expected_strm); if(expected == NULL) { crm_err("On-disk digest is empty"); } else if(safe_str_eq(expected, digest)) { crm_debug("Digest comparision passed: %s", digest); passed = TRUE; } else { crm_err("Digest comparision failed: %s vs. %s", expected, digest); } crm_free(digest); crm_free(expected); return passed; } static int write_cib_digest(crm_data_t *local_cib, char *digest) { int rc = 0; FILE *digest_strm = fopen(CIB_FILENAME ".sig", "w"); char *local_digest = NULL; CRM_ASSERT(digest_strm != NULL); if(digest == NULL) { local_digest = calculate_xml_digest(local_cib, FALSE); CRM_ASSERT(digest != NULL); digest = local_digest; } rc = fprintf(digest_strm, "%s", digest); if(rc < 0) { cl_perror("Cannot write output to %s.sig", CIB_FILENAME); } fflush(digest_strm); fclose(digest_strm); crm_free(local_digest); return rc; } static gboolean validate_on_disk_cib(const char *filename, crm_data_t **on_disk_cib) { int s_res = -1; struct stat buf; FILE *cib_file = NULL; gboolean passed = TRUE; crm_data_t *root = NULL; if(filename != NULL) { s_res = stat(filename, &buf); } if (s_res == 0) { cib_file = fopen(filename, "r"); crm_debug_2("Reading cluster configuration from: %s", filename); root = file2xml(cib_file, FALSE); fclose(cib_file); if(validate_cib_digest(root) == FALSE) { passed = FALSE; } } if(on_disk_cib != NULL) { *on_disk_cib = root; } else { free_xml(root); } return passed; } /* * It is the callers responsibility to free the output of this function */ crm_data_t* readCibXmlFile(const char *dir, const char *file, gboolean discard_status) { struct stat buf; FILE *cib_file = NULL; gboolean dtd_ok = TRUE; char *filename = NULL; const char *name = NULL; const char *value = NULL; const char *ignore_dtd = NULL; crm_data_t *root = NULL; crm_data_t *status = NULL; if(!crm_is_writable(dir, file, HA_CCMUSER, NULL, FALSE)) { cib_status = cib_bad_permissions; return NULL; } filename = crm_concat(dir, file, '/'); if(stat(filename, &buf) != 0) { crm_warn("Cluster configuration not found: %s." " Creating an empty one.", filename); } else { crm_info("Reading cluster configuration from: %s", filename); cib_file = fopen(filename, "r"); if(cib_file == NULL) { cl_perror("could not open: %s", filename); } else { root = file2xml(cib_file, FALSE); fclose(cib_file); } if(root == NULL) { crm_err("%s exists but does NOT contain valid XML. ", filename); crm_err("Continuing with an empty configuration." " %s will NOT be overwritten.", filename); cib_writes_enabled = FALSE; } else if(validate_cib_digest(root) == FALSE) { crm_err("%s has been manually changed! If this was" " intended, remove the digest in %s.sig", filename, filename); cib_status = cib_bad_digest; } } - -#ifdef WITH_VALGRIND - cib_writes_enabled = FALSE; - crm_err("*********************************************************"); - crm_err("*** Disabling disk writes to avoid confusing Valgrind ***"); - crm_err("*********************************************************"); -#endif + + if(getenv("HA_VALGRIND_ENABLED") != NULL) { + cib_writes_enabled = FALSE; + crm_err("*********************************************************"); + crm_err("*** Disabling disk writes to avoid confusing Valgrind ***"); + crm_err("*********************************************************"); + } if(root == NULL) { root = createEmptyCib(); } else { crm_xml_add(root, "generated", XML_BOOLEAN_FALSE); } status = find_xml_node(root, XML_CIB_TAG_STATUS, FALSE); if(discard_status && status != NULL) { /* strip out the status section if there is one */ free_xml_from_parent(root, status); status = NULL; } create_xml_node(root, XML_CIB_TAG_STATUS); /* Do this before DTD validation happens */ /* fill in some defaults */ name = XML_ATTR_GENERATION_ADMIN; value = crm_element_value(root, name); if(value == NULL) { crm_xml_add_int(root, name, 0); } name = XML_ATTR_GENERATION; value = crm_element_value(root, name); if(value == NULL) { crm_xml_add_int(root, name, 0); } name = XML_ATTR_NUMUPDATES; value = crm_element_value(root, name); if(value == NULL) { crm_xml_add_int(root, name, 0); } /* unset these and require the DC/CCM to update as needed */ update_counters(__FILE__, __PRETTY_FUNCTION__, root); xml_remove_prop(root, XML_ATTR_DC_UUID); if(discard_status) { crm_log_xml_info(root, "[on-disk]"); } ignore_dtd = crm_element_value(root, "ignore_dtd"); dtd_ok = validate_with_dtd(root, TRUE, HA_LIBDIR"/heartbeat/crm.dtd"); if(dtd_ok == FALSE) { if(ignore_dtd == NULL && crm_is_true(ignore_dtd) == FALSE) { cib_status = cib_dtd_validation; } } else if(ignore_dtd == NULL) { crm_notice("Enabling DTD validation on" " the existing (sane) configuration"); crm_xml_add(root, "ignore_dtd", XML_BOOLEAN_FALSE); } if(do_id_check(root, NULL, TRUE, FALSE)) { crm_err("%s does not contain a vaild configuration:" " ID check failed", filename); cib_status = cib_id_check; } if (verifyCibXml(root) == FALSE) { crm_err("%s does not contain a vaild configuration:" " structure test failed", filename); cib_status = cib_bad_config; } crm_free(filename); return root; } /* * The caller should never free the return value */ crm_data_t* get_the_CIB(void) { return the_cib; } gboolean uninitializeCib(void) { crm_data_t *tmp_cib = the_cib; if(tmp_cib == NULL) { crm_debug("The CIB has already been deallocated."); return FALSE; } initialized = FALSE; the_cib = NULL; node_search = NULL; resource_search = NULL; constraint_search = NULL; status_search = NULL; crm_debug("Deallocating the CIB."); free_xml(tmp_cib); crm_info("The CIB has been deallocated."); return TRUE; } /* * This method will not free the old CIB pointer or the new one. * We rely on the caller to have saved a pointer to the old CIB * and to free the old/bad one depending on what is appropriate. */ gboolean initializeCib(crm_data_t *new_cib) { gboolean is_valid = TRUE; crm_data_t *tmp_node = NULL; if(new_cib == NULL) { return FALSE; } xml_validate(new_cib); tmp_node = get_object_root(XML_CIB_TAG_NODES, new_cib); if (tmp_node == NULL) { is_valid = FALSE; } tmp_node = get_object_root(XML_CIB_TAG_RESOURCES, new_cib); if (tmp_node == NULL) { is_valid = FALSE; } tmp_node = get_object_root(XML_CIB_TAG_CONSTRAINTS, new_cib); if (tmp_node == NULL) { is_valid = FALSE; } tmp_node = get_object_root(XML_CIB_TAG_CRMCONFIG, new_cib); if (tmp_node == NULL) { is_valid = FALSE; } tmp_node = get_object_root(XML_CIB_TAG_STATUS, new_cib); if (is_valid && tmp_node == NULL) { create_xml_node(new_cib, XML_CIB_TAG_STATUS); } if(is_valid == FALSE) { crm_warn("CIB Verification failed"); return FALSE; } update_counters(__FILE__, __PRETTY_FUNCTION__, new_cib); the_cib = new_cib; initialized = TRUE; return TRUE; } static int archive_file(const char *oldname, const char *newname, const char *ext) { /* move 'oldname' to 'newname' by creating a hard link to it * and then removing the original hard link */ int rc = 0; int res = 0; struct stat tmp; int s_res = 0; char *backup_file = NULL; static const char *back_ext = "bak"; /* calculate the backup name if required */ if(newname != NULL) { backup_file = crm_strdup(newname); } else { int max_name_len = 1024; crm_malloc0(backup_file, max_name_len); if (ext == NULL) { ext = back_ext; } snprintf(backup_file, max_name_len - 1, "%s.%s", oldname, ext); } if(backup_file == NULL || strlen(backup_file) == 0) { crm_err("%s backup filename was %s", newname == NULL?"calculated":"supplied", backup_file == NULL?"null":"empty"); rc = -4; } s_res = stat(backup_file, &tmp); /* unlink the old backup */ if (rc == 0 && s_res >= 0) { res = unlink(backup_file); if (res < 0) { cl_perror("Could not unlink %s", backup_file); rc = -1; } } s_res = stat(oldname, &tmp); /* copy */ if (rc == 0 && s_res >= 0) { res = link(oldname, backup_file); if (res < 0) { cl_perror("Could not create backup %s from %s", backup_file, oldname); rc = -2; } } /* unlink the original */ if (rc == 0 && s_res >= 0) { res = unlink(oldname); if (res < 0) { cl_perror("Could not unlink %s", oldname); rc = -3; } } crm_free(backup_file); return rc; } /* * This method will free the old CIB pointer on success and the new one * on failure. */ int activateCibXml(crm_data_t *new_cib, const char *ignored) { int error_code = cib_ok; crm_data_t *saved_cib = the_cib; const char *ignore_dtd = NULL; crm_log_xml_debug_4(new_cib, "Attempting to activate CIB"); CRM_ASSERT(new_cib != saved_cib); if(saved_cib != NULL) { crm_validate_data(saved_cib); } ignore_dtd = crm_element_value(new_cib, "ignore_dtd"); if( #if CRM_DEPRECATED_SINCE_2_0_4 ignore_dtd != NULL && #endif crm_is_true(ignore_dtd) == FALSE && validate_with_dtd( new_cib, TRUE, HA_LIBDIR"/heartbeat/crm.dtd") == FALSE) { error_code = cib_dtd_validation; crm_err("Ignoring invalid CIB"); } if(error_code == cib_ok && initializeCib(new_cib) == FALSE) { error_code = cib_ACTIVATION; crm_err("Ignoring invalid or NULL CIB"); } if(error_code != cib_ok) { if(saved_cib != NULL) { crm_warn("Reverting to last known CIB"); if (initializeCib(saved_cib) == FALSE) { /* oh we are so dead */ crm_crit("Couldn't re-initialize the old CIB!"); cl_flush_logs(); exit(1); } } else { crm_crit("Could not write out new CIB and no saved" " version to revert to"); } } else if(per_action_cib && cib_writes_enabled && cib_status == cib_ok) { crm_err("Per-action CIB"); write_cib_contents(the_cib); } else if(cib_writes_enabled && cib_status == cib_ok) { crm_debug_2("Triggering CIB write"); G_main_set_trigger(cib_writer); } #if CIB_MEM_STATS /* this chews through a bunch of CPU */ if(the_cib == new_cib) { long new_bytes, new_allocs, new_frees; long old_bytes, old_allocs, old_frees; crm_xml_nbytes(new_cib, &new_bytes, &new_allocs, &new_frees); crm_xml_nbytes(saved_cib, &old_bytes, &old_allocs, &old_frees); if(new_bytes != old_bytes) { crm_info("CIB size is %ld bytes (was %ld)", new_bytes, old_bytes); crm_adjust_mem_stats(NULL, new_bytes - old_bytes, new_allocs - old_allocs, new_frees - old_frees); if(crm_running_stats != NULL) { crm_adjust_mem_stats( crm_running_stats, new_bytes - old_bytes, new_allocs - old_allocs, new_frees - old_frees); } } } #endif if(the_cib != saved_cib && the_cib != new_cib) { CRM_DEV_ASSERT(error_code != cib_ok); CRM_DEV_ASSERT(the_cib == NULL); } if(the_cib != new_cib) { free_xml(new_cib); CRM_DEV_ASSERT(error_code != cib_ok); } if(the_cib != saved_cib) { free_xml(saved_cib); } return error_code; } int write_cib_contents(gpointer p) { int rc = 0; int exit_rc = LSB_EXIT_OK; char *digest = NULL; crm_data_t *cib_status_root = NULL; const char *digest_filename = CIB_FILENAME ".sig"; /* we can scribble on "the_cib" here and not affect the parent */ const char *epoch = crm_element_value(the_cib, XML_ATTR_GENERATION); const char *updates = crm_element_value(the_cib, XML_ATTR_NUMUPDATES); const char *admin_epoch = crm_element_value( the_cib, XML_ATTR_GENERATION_ADMIN); /* check the admin didnt modify it underneath us */ if(validate_on_disk_cib(CIB_FILENAME, NULL) == FALSE) { crm_err("%s was manually modified while Heartbeat was active!", CIB_FILENAME); exit_rc = LSB_EXIT_GENERIC; goto cleanup; } rc = archive_file(CIB_FILENAME, NULL, "last"); if(rc != 0) { crm_err("Could not make backup of the existing CIB: %d", rc); exit_rc = LSB_EXIT_GENERIC; goto cleanup; } rc = archive_file(digest_filename, NULL, "last"); if(rc != 0) { crm_warn("Could not make backup of the existing CIB digest: %d", rc); } /* Given that we discard the status section on startup * there is no point writing it out in the first place * since users just get confused by it * * Although, it does help me once in a while * * So delete the status section before we write it out */ if(p == NULL) { cib_status_root = find_xml_node( the_cib, XML_CIB_TAG_STATUS, TRUE); CRM_DEV_ASSERT(cib_status_root != NULL); if(cib_status_root != NULL) { free_xml_from_parent(the_cib, cib_status_root); } } rc = write_xml_file(the_cib, CIB_FILENAME, FALSE); if(rc <= 0) { crm_err("Changes couldn't be written to disk"); exit_rc = LSB_EXIT_GENERIC; goto cleanup; } digest = calculate_xml_digest(the_cib, FALSE); crm_info("Wrote version %s.%s.%s of the CIB to disk (digest: %s)", admin_epoch?admin_epoch:"0", epoch?epoch:"0", updates?updates:"0", digest); rc = write_cib_digest(the_cib, digest); if(rc <= 0) { crm_err("Digest couldn't be written to disk"); exit_rc = LSB_EXIT_GENERIC; goto cleanup; } #if 0 if(validate_on_disk_cib(CIB_FILENAME, NULL) == FALSE) { crm_err("wrote incorrect digest"); exit_rc = LSB_EXIT_GENERIC; goto cleanup; } #endif cleanup: crm_free(digest); if(p == NULL) { /* fork-and-write mode */ uninitializeCib(); cib_cleanup(); exit(exit_rc); } /* stand-alone mode */ return exit_rc; } gboolean set_transition(crm_data_t *xml_obj) { const char *current = NULL; if(xml_obj == NULL) { return FALSE; } current = crm_element_value(xml_obj, XML_ATTR_CCM_TRANSITION); if(safe_str_neq(current, ccm_transition_id)) { crm_debug("CCM transition: old=%s, new=%s", current, ccm_transition_id); crm_xml_add(xml_obj, XML_ATTR_CCM_TRANSITION,ccm_transition_id); return TRUE; } return FALSE; } gboolean set_connected_peers(crm_data_t *xml_obj) { int active = 0; int current = 0; char *peers_s = NULL; const char *current_s = NULL; if(xml_obj == NULL) { return FALSE; } current_s = crm_element_value(xml_obj, XML_ATTR_NUMPEERS); g_hash_table_foreach(peer_hash, GHFunc_count_peers, &active); current = crm_parse_int(current_s, "0"); if(current != active) { peers_s = crm_itoa(active); crm_xml_add(xml_obj, XML_ATTR_NUMPEERS, peers_s); crm_debug("We now have %s active peers", peers_s); crm_free(peers_s); return TRUE; } return FALSE; } gboolean update_quorum(crm_data_t *xml_obj) { const char *quorum_value = XML_BOOLEAN_FALSE; const char *current = NULL; if(xml_obj == NULL) { return FALSE; } current = crm_element_value(xml_obj, XML_ATTR_HAVE_QUORUM); if(cib_have_quorum) { quorum_value = XML_BOOLEAN_TRUE; } if(safe_str_neq(current, quorum_value)) { crm_debug("CCM quorum: old=%s, new=%s", current, quorum_value); crm_xml_add(xml_obj, XML_ATTR_HAVE_QUORUM, quorum_value); return TRUE; } return FALSE; } gboolean update_counters(const char *file, const char *fn, crm_data_t *xml_obj) { gboolean did_update = FALSE; did_update = did_update || update_quorum(xml_obj); did_update = did_update || set_transition(xml_obj); did_update = did_update || set_connected_peers(xml_obj); if(did_update) { do_crm_log(LOG_DEBUG, "Counters updated by %s", fn); } return did_update; } void GHFunc_count_peers(gpointer key, gpointer value, gpointer user_data) { int *active = user_data; if(safe_str_eq(value, ONLINESTATUS)) { (*active)++; } else if(safe_str_eq(value, JOINSTATUS)) { (*active)++; } } diff --git a/crm/crmd/control.c b/crm/crmd/control.c index 2b2265cad9..8686afb756 100644 --- a/crm/crmd/control.c +++ b/crm/crmd/control.c @@ -1,911 +1,915 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include char *ipc_server = NULL; extern void crmd_ha_connection_destroy(gpointer user_data); extern gboolean verify_stopped(gboolean force, int log_level); gboolean crm_shutdown(int nsig, gpointer unused); gboolean register_with_ha(ll_cluster_t *hb_cluster, const char *client_name); void populate_cib_nodes(ll_cluster_t *hb_cluster, gboolean with_client_status); GHashTable *ipc_clients = NULL; GTRIGSource *fsa_source = NULL; /* A_HA_CONNECT */ enum crmd_fsa_input do_ha_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { gboolean registered = FALSE; if(action & A_HA_DISCONNECT) { if(fsa_cluster_conn != NULL) { set_bit_inplace(fsa_input_register, R_HA_DISCONNECTED); fsa_cluster_conn->llc_ops->signoff( fsa_cluster_conn, FALSE); } crm_info("Disconnected from Heartbeat"); } if(action & A_HA_CONNECT) { if(fsa_cluster_conn == NULL) { fsa_cluster_conn = ll_cluster_new("heartbeat"); } /* make sure we are disconnected first */ fsa_cluster_conn->llc_ops->signoff(fsa_cluster_conn, FALSE); registered = register_with_ha( fsa_cluster_conn, crm_system_name); if(registered == FALSE) { register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); return I_NULL; } clear_bit_inplace(fsa_input_register, R_HA_DISCONNECTED); crm_info("Connected to Heartbeat"); } if(action & ~(A_HA_CONNECT|A_HA_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } return I_NULL; } /* A_SHUTDOWN */ enum crmd_fsa_input do_shutdown(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { int lpc = 0; gboolean continue_shutdown = TRUE; struct crm_subsystem_s *subsystems[] = { pe_subsystem, te_subsystem }; /* just in case */ set_bit_inplace(fsa_input_register, R_SHUTDOWN); for(lpc = 0; lpc < DIMOF(subsystems); lpc++) { struct crm_subsystem_s *a_subsystem = subsystems[lpc]; if(is_set(fsa_input_register, a_subsystem->flag_connected)) { crm_info("Terminating the %s", a_subsystem->name); if(stop_subsystem(a_subsystem, TRUE) == FALSE) { /* its gone... */ crm_err("Faking %s exit", a_subsystem->name); clear_bit_inplace(fsa_input_register, a_subsystem->flag_connected); } continue_shutdown = FALSE; } } if(continue_shutdown == FALSE) { crm_info("Waiting for subsystems to exit"); crmd_fsa_stall(NULL); } else { register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); } return I_NULL; } /* A_SHUTDOWN_REQ */ enum crmd_fsa_input do_shutdown_req(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { HA_Message *msg = NULL; crm_info("Sending shutdown request to DC: %s", crm_str(fsa_our_dc)); msg = create_request( CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); /* set_bit_inplace(fsa_input_register, R_STAYDOWN); */ if(send_request(msg, NULL) == FALSE) { if(AM_I_DC) { crm_info("Processing shutdown locally"); } else { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } return I_NULL; } extern char *max_generation_from; extern crm_data_t *max_generation_xml; extern GHashTable *meta_hash; extern GHashTable *resources; +extern GHashTable *voted; void log_connected_client(gpointer key, gpointer value, gpointer user_data); void log_connected_client(gpointer key, gpointer value, gpointer user_data) { crmd_client_t *client = value; crm_err("%s is still connected at exit", client->table_key); } static void free_mem(fsa_data_t *msg_data) { fsa_cluster_conn->llc_ops->delete(fsa_cluster_conn); fsa_cluster_conn = NULL; slist_destroy(fsa_data_t, fsa_data, fsa_message_queue, crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]", fsa_input2string(fsa_data->fsa_input), fsa_state2string(fsa_state), fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); delete_fsa_input(fsa_data); ); delete_fsa_input(msg_data); if(ipc_clients) { crm_debug("Number of connected clients: %d", g_hash_table_size(ipc_clients)); /* g_hash_table_foreach(ipc_clients, log_connected_client, NULL); */ g_hash_table_destroy(ipc_clients); } empty_uuid_cache(); free_ccm_cache(fsa_membership_copy); if(te_subsystem->client && te_subsystem->client->client_source) { crm_debug("Full destroy: TE"); G_main_del_IPC_Channel(te_subsystem->client->client_source); } else { crm_debug("Partial destroy: TE"); crmd_ipc_connection_destroy(te_subsystem->client); } crm_free(te_subsystem); if(pe_subsystem->client && pe_subsystem->client->client_source) { crm_debug("Full destroy: PE"); G_main_del_IPC_Channel(pe_subsystem->client->client_source); } else { crm_debug("Partial destroy: PE"); crmd_ipc_connection_destroy(pe_subsystem->client); } crm_free(pe_subsystem); crm_free(cib_subsystem); if(integrated_nodes) { g_hash_table_destroy(integrated_nodes); } if(finalized_nodes) { g_hash_table_destroy(finalized_nodes); } if(confirmed_nodes) { g_hash_table_destroy(confirmed_nodes); } if(crmd_peer_state) { g_hash_table_destroy(crmd_peer_state); } if(meta_hash) { g_hash_table_destroy(meta_hash); } if(resources) { g_hash_table_destroy(resources); } + if(voted) { + g_hash_table_destroy(voted); + } cib_delete(fsa_cib_conn); fsa_cib_conn = NULL; if(fsa_lrm_conn) { fsa_lrm_conn->lrm_ops->delete(fsa_lrm_conn); } crm_free(integration_timer); crm_free(finalization_timer); crm_free(election_trigger); crm_free(election_timeout); crm_free(shutdown_escalation_timer); crm_free(wait_timer); crm_free(recheck_timer); crm_free(fsa_our_dc_version); crm_free(fsa_our_uuid); crm_free(fsa_our_dc); crm_free(ipc_server); crm_free(max_generation_from); free_xml(max_generation_xml); #ifdef HA_MALLOC_TRACK cl_malloc_dump_allocated(LOG_ERR, FALSE); #endif } /* A_EXIT_0, A_EXIT_1 */ enum crmd_fsa_input do_exit(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { int exit_code = 0; int log_level = LOG_INFO; const char *exit_type = "gracefully"; if(action & A_EXIT_1) { exit_code = 1; log_level = LOG_ERR; exit_type = "forcefully"; verify_stopped(TRUE, LOG_ERR); } do_crm_log(log_level, "Performing %s - %s exiting the CRMd", fsa_action2string(action), exit_type); if(is_set(fsa_input_register, R_IN_RECOVERY)) { crm_err("Could not recover from internal error"); exit_code = 2; } if(is_set(fsa_input_register, R_STAYDOWN)) { crm_warn("Inhibiting respawn by Heartbeat"); exit_code = 100; } free_mem(msg_data); crm_info("[%s] stopped (%d)", crm_system_name, exit_code); cl_flush_logs(); exit(exit_code); return I_NULL; } /* A_STARTUP */ enum crmd_fsa_input do_startup(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { int was_error = 0; int interval = 1; /* seconds between DC heartbeats */ crm_debug("Registering Signal Handlers"); G_main_add_SignalHandler( G_PRIORITY_HIGH, SIGTERM, crm_shutdown, NULL, NULL); fsa_source = G_main_add_TriggerHandler( G_PRIORITY_HIGH, crm_fsa_trigger, NULL, NULL); ipc_clients = g_hash_table_new(g_str_hash, g_str_equal); crm_debug("Creating CIB and LRM objects"); fsa_cib_conn = cib_new(); fsa_lrm_conn = ll_lrm_new(XML_CIB_TAG_LRM); crm_debug("Init server comms"); if(ipc_server == NULL) { ipc_server = crm_strdup(CRM_SYSTEM_CRMD); } was_error = init_server_ipc_comms(ipc_server, crmd_client_connect, default_ipc_connection_destroy); /* set up the timers */ crm_malloc0(integration_timer, sizeof(fsa_timer_t)); crm_malloc0(finalization_timer, sizeof(fsa_timer_t)); crm_malloc0(election_trigger, sizeof(fsa_timer_t)); crm_malloc0(election_timeout, sizeof(fsa_timer_t)); crm_malloc0(shutdown_escalation_timer, sizeof(fsa_timer_t)); crm_malloc0(wait_timer, sizeof(fsa_timer_t)); crm_malloc0(recheck_timer, sizeof(fsa_timer_t)); interval = interval * 1000; if(election_trigger != NULL) { election_trigger->source_id = 0; election_trigger->period_ms = -1; election_trigger->fsa_input = I_DC_TIMEOUT; election_trigger->callback = crm_timer_popped; election_trigger->repeat = FALSE; } else { was_error = TRUE; } if(election_timeout != NULL) { election_timeout->source_id = 0; election_timeout->period_ms = -1; election_timeout->fsa_input = I_ELECTION_DC; election_timeout->callback = crm_timer_popped; election_timeout->repeat = FALSE; } else { was_error = TRUE; } if(integration_timer != NULL) { integration_timer->source_id = 0; integration_timer->period_ms = -1; integration_timer->fsa_input = I_INTEGRATED; integration_timer->callback = crm_timer_popped; integration_timer->repeat = FALSE; } else { was_error = TRUE; } if(finalization_timer != NULL) { finalization_timer->source_id = 0; finalization_timer->period_ms = -1; finalization_timer->fsa_input = I_FINALIZED; finalization_timer->callback = crm_timer_popped; finalization_timer->repeat = FALSE; /* for possible enabling... a bug in the join protocol left * a slave in S_PENDING while we think its in S_NOT_DC * * raising I_FINALIZED put us into a transition loop which is * never resolved. * in this loop we continually send probes which the node * NACK's because its in S_PENDING * * if we have nodes where heartbeat is active but the * CRM is not... then this will be handled in the * integration phase */ finalization_timer->fsa_input = I_ELECTION; } else { was_error = TRUE; } if(shutdown_escalation_timer != NULL) { shutdown_escalation_timer->source_id = 0; shutdown_escalation_timer->period_ms = -1; shutdown_escalation_timer->fsa_input = I_STOP; shutdown_escalation_timer->callback = crm_timer_popped; shutdown_escalation_timer->repeat = FALSE; } else { was_error = TRUE; } if(wait_timer != NULL) { wait_timer->source_id = 0; wait_timer->period_ms = 500; wait_timer->fsa_input = I_NULL; wait_timer->callback = crm_timer_popped; wait_timer->repeat = FALSE; } else { was_error = TRUE; } if(recheck_timer != NULL) { recheck_timer->source_id = 0; recheck_timer->period_ms = -1; recheck_timer->fsa_input = I_PE_CALC; recheck_timer->callback = crm_timer_popped; recheck_timer->repeat = FALSE; } else { was_error = TRUE; } /* set up the sub systems */ crm_malloc0(cib_subsystem, sizeof(struct crm_subsystem_s)); crm_malloc0(te_subsystem, sizeof(struct crm_subsystem_s)); crm_malloc0(pe_subsystem, sizeof(struct crm_subsystem_s)); if(cib_subsystem != NULL) { cib_subsystem->pid = -1; cib_subsystem->path = BIN_DIR; cib_subsystem->name = CRM_SYSTEM_CIB; cib_subsystem->command = BIN_DIR"/"CRM_SYSTEM_CIB; cib_subsystem->args = "-VVc"; cib_subsystem->flag_connected = R_CIB_CONNECTED; cib_subsystem->flag_required = R_CIB_REQUIRED; } else { was_error = TRUE; } if(te_subsystem != NULL) { te_subsystem->pid = -1; te_subsystem->path = BIN_DIR; te_subsystem->name = CRM_SYSTEM_TENGINE; te_subsystem->command = BIN_DIR"/"CRM_SYSTEM_TENGINE; te_subsystem->args = NULL; te_subsystem->flag_connected = R_TE_CONNECTED; te_subsystem->flag_required = R_TE_REQUIRED; } else { was_error = TRUE; } if(pe_subsystem != NULL) { pe_subsystem->pid = -1; pe_subsystem->path = BIN_DIR; pe_subsystem->name = CRM_SYSTEM_PENGINE; pe_subsystem->command = BIN_DIR"/"CRM_SYSTEM_PENGINE; pe_subsystem->args = NULL; pe_subsystem->flag_connected = R_PE_CONNECTED; pe_subsystem->flag_required = R_PE_REQUIRED; } else { was_error = TRUE; } if(was_error) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } welcomed_nodes = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); integrated_nodes = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); finalized_nodes = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); confirmed_nodes = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); crmd_peer_state = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); set_sigchld_proctrack(G_PRIORITY_HIGH); return I_NULL; } /* A_STOP */ enum crmd_fsa_input do_stop(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { if(verify_stopped(FALSE, LOG_DEBUG) == FALSE) { crmd_fsa_stall(NULL); } return I_NULL; } /* A_STARTED */ enum crmd_fsa_input do_started(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { if(is_set(fsa_input_register, R_CCM_DATA) == FALSE) { crm_info("Delaying start, CCM (%.16llx) not connected", R_CCM_DATA); crmd_fsa_stall(NULL); return I_NULL; } else if(is_set(fsa_input_register, R_LRM_CONNECTED) == FALSE) { crm_info("Delaying start, LRM (%.16llx) not connected", R_LRM_CONNECTED); crmd_fsa_stall(NULL); return I_NULL; } else if(is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) { crm_info("Delaying start, CIB (%.16llx) not connected", R_CIB_CONNECTED); crmd_fsa_stall(NULL); return I_NULL; } else if(is_set(fsa_input_register, R_READ_CONFIG) == FALSE) { crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG); crmd_fsa_stall(NULL); return I_NULL; } else if(is_set(fsa_input_register, R_PEER_DATA) == FALSE) { HA_Message * msg = NULL; /* try reading from HA */ crm_info("Delaying start, Peer data (%.16llx) not recieved", R_PEER_DATA); crm_debug_3("Looking for a HA message"); msg = fsa_cluster_conn->llc_ops->readmsg(fsa_cluster_conn, 0); if(msg != NULL) { crm_debug_3("There was a HA message"); crm_msg_del(msg); } crm_timer_start(wait_timer); crmd_fsa_stall(NULL); return I_NULL; } crm_info("The local CRM is operational"); clear_bit_inplace(fsa_input_register, R_STARTING); register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL); return I_NULL; } /* A_RECOVER */ enum crmd_fsa_input do_recover(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { set_bit_inplace(fsa_input_register, R_IN_RECOVERY); crm_err("Action %s (%.16llx) not supported", fsa_action2string(action), action); register_fsa_input(C_FSA_INTERNAL, I_STOP, NULL); return I_NULL; } pe_cluster_option crmd_opts[] = { /* name, old-name, validate, default, description */ { XML_CONFIG_ATTR_DC_DEADTIME, NULL, "time", NULL, "10s", &check_time, "How long to wait for a response from other nodes during startup.", "The \"correct\" value will depend on the speed and load of your network." }, { XML_CONFIG_ATTR_RECHECK, NULL, "time", "Zero disables polling. Positive values are an interval in seconds (unless other SI units are specified. eg. 5min)", "0", &check_timer, "Polling interval for time based changes to options, resource parameters and constraints.", "The Cluster is primarily event driven, however the configuration can have elements that change based on time. To ensure these changes take effect, we can optionally poll the cluster's status for changes." }, { XML_CONFIG_ATTR_ELECTION_FAIL, NULL, "time", NULL, "2min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." }, { XML_CONFIG_ATTR_FORCE_QUIT, NULL, "time", NULL, "20min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." }, { "crmd-integration-timeout", NULL, "time", NULL, "3min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." }, { "crmd-finalization-timeout", NULL, "time", NULL, "10min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." }, }; void crmd_metadata(void) { config_metadata("CRM Daemon", "1.0", "CRM Daemon Options", "This is a fake resource that details the options that can be configured for the CRM Daemon.", crmd_opts, DIMOF(crmd_opts)); } static void verify_crmd_options(GHashTable *options) { verify_all_options(options, crmd_opts, DIMOF(crmd_opts)); } static const char * crmd_pref(GHashTable *options, const char *name) { return get_cluster_pref(options, crmd_opts, DIMOF(crmd_opts), name); } static void config_query_callback(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data) { const char *value = NULL; GHashTable *config_hash = NULL; if(rc != cib_ok) { fsa_data_t *msg_data = NULL; crm_err("Local CIB query resulted in an error: %s", cib_error2string(rc)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); if(rc == cib_bad_permissions || rc == cib_bad_digest || rc == cib_bad_config) { crm_err("The cluster is mis-configured - shutting down and staying down"); set_bit_inplace(fsa_input_register, R_STAYDOWN); } return; } crm_debug("Call %d : Parsing CIB options", call_id); config_hash = g_hash_table_new_full( g_str_hash,g_str_equal, g_hash_destroy_str,g_hash_destroy_str); unpack_instance_attributes( output, XML_CIB_TAG_PROPSET, NULL, config_hash, CIB_OPTIONS_FIRST, NULL); value = g_hash_table_lookup(config_hash, XML_CONFIG_ATTR_DC_DEADTIME); if(value == NULL) { /* apparently we're not allowed to free the result of getenv */ char *param_val = getenv(ENV_PREFIX "" KEY_INITDEAD); value = crmd_pref(config_hash, XML_CONFIG_ATTR_DC_DEADTIME); if(param_val != NULL) { int from_env = crm_get_msec(param_val) / 2; int from_defaults = crm_get_msec(value); if(from_env > from_defaults) { g_hash_table_replace( config_hash, crm_strdup(XML_CONFIG_ATTR_DC_DEADTIME), crm_strdup(param_val)); } } } verify_crmd_options(config_hash); value = crmd_pref(config_hash, XML_CONFIG_ATTR_DC_DEADTIME); election_trigger->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, XML_CONFIG_ATTR_FORCE_QUIT); shutdown_escalation_timer->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, XML_CONFIG_ATTR_ELECTION_FAIL); election_timeout->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, XML_CONFIG_ATTR_RECHECK); recheck_timer->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, "crmd-integration-timeout"); integration_timer->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, "crmd-finalization-timeout"); finalization_timer->period_ms = crm_get_msec(value); set_bit_inplace(fsa_input_register, R_READ_CONFIG); crm_debug_3("Triggering FSA: %s", __FUNCTION__); G_main_set_trigger(fsa_source); g_hash_table_destroy(config_hash); } /* A_READCONFIG */ enum crmd_fsa_input do_read_config(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { int call_id = fsa_cib_conn->cmds->query( fsa_cib_conn, XML_CIB_TAG_CRMCONFIG, NULL, cib_scope_local); add_cib_op_callback(call_id, FALSE, NULL, config_query_callback); crm_debug_2("Querying the CIB... call %d", call_id); return I_NULL; } gboolean crm_shutdown(int nsig, gpointer unused) { if (crmd_mainloop != NULL && g_main_is_running(crmd_mainloop)) { if(is_set(fsa_input_register, R_SHUTDOWN)) { crm_err("Escalating the shutdown"); register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL); } else { crm_info("Requesting shutdown"); set_bit_inplace(fsa_input_register, R_SHUTDOWN); register_fsa_input(C_SHUTDOWN,I_SHUTDOWN,NULL); if(shutdown_escalation_timer->period_ms < 1) { GHashTable *config_hash = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); const char *value = crmd_pref( config_hash, XML_CONFIG_ATTR_FORCE_QUIT); int msec = crm_atoi(value, NULL); crm_info("Using default shutdown escalation: %dms", msec); shutdown_escalation_timer->period_ms = msec; g_hash_table_destroy(config_hash); } /* cant rely on this... */ crm_timer_start(shutdown_escalation_timer); } } else { crm_info("exit from shutdown"); exit(LSB_EXIT_OK); } return TRUE; } static void default_cib_update_callback(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data) { if(rc != cib_ok) { fsa_data_t *msg_data = NULL; crm_err("CIB Update failed: %s", cib_error2string(rc)); crm_log_xml_warn(output, "update:failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } void populate_cib_nodes(ll_cluster_t *hb_cluster, gboolean with_client_status) { int call_id = 0; const char *ha_node = NULL; crm_data_t *cib_node_list = NULL; /* Async get client status information in the cluster */ crm_debug_2("Invoked"); if(with_client_status) { crm_debug_3("Requesting an initial dump of CRMD client_status"); fsa_cluster_conn->llc_ops->client_status( fsa_cluster_conn, NULL, CRM_SYSTEM_CRMD, -1); } crm_info("Requesting the list of configured nodes"); fsa_cluster_conn->llc_ops->init_nodewalk(fsa_cluster_conn); cib_node_list = create_xml_node(NULL, XML_CIB_TAG_NODES); do { const char *ha_node_type = NULL; const char *ha_node_uuid = NULL; crm_data_t *cib_new_node = NULL; ha_node = fsa_cluster_conn->llc_ops->nextnode(fsa_cluster_conn); if(ha_node == NULL) { continue; } ha_node_type = fsa_cluster_conn->llc_ops->node_type( fsa_cluster_conn, ha_node); if(safe_str_neq(NORMALNODE, ha_node_type)) { crm_debug("Node %s: skipping '%s'", ha_node, ha_node_type); continue; } ha_node_uuid = get_uuid(fsa_cluster_conn, ha_node); if(ha_node_uuid == NULL) { crm_warn("Node %s: no uuid found", ha_node); continue; } crm_notice("Node: %s (uuid: %s)", ha_node, ha_node_uuid); cib_new_node = create_xml_node(cib_node_list, XML_CIB_TAG_NODE); crm_xml_add(cib_new_node, XML_ATTR_ID, ha_node_uuid); crm_xml_add(cib_new_node, XML_ATTR_UNAME, ha_node); crm_xml_add(cib_new_node, XML_ATTR_TYPE, ha_node_type); } while(ha_node != NULL); fsa_cluster_conn->llc_ops->end_nodewalk(fsa_cluster_conn); /* Now update the CIB with the list of nodes */ fsa_cib_update( XML_CIB_TAG_NODES, cib_node_list, cib_scope_local|cib_quorum_override|cib_inhibit_bcast, call_id); add_cib_op_callback(call_id, FALSE, NULL, default_cib_update_callback); free_xml(cib_node_list); crm_debug_2("Complete"); } gboolean register_with_ha(ll_cluster_t *hb_cluster, const char *client_name) { const char *const_uuid = NULL; crm_debug("Signing in with Heartbeat"); if (hb_cluster->llc_ops->signon(hb_cluster, client_name)!= HA_OK) { crm_err("Cannot sign on with heartbeat: %s", hb_cluster->llc_ops->errmsg(hb_cluster)); return FALSE; } crm_debug_3("Be informed of CRM messages"); if (HA_OK != hb_cluster->llc_ops->set_msg_callback( hb_cluster, T_CRM, crmd_ha_msg_callback, hb_cluster)){ crm_err("Cannot set msg callback: %s", hb_cluster->llc_ops->errmsg(hb_cluster)); return FALSE; } crm_debug_3("Be informed of Node Status changes"); if (HA_OK != hb_cluster->llc_ops->set_nstatus_callback( hb_cluster, crmd_ha_status_callback, hb_cluster)){ crm_err("Cannot set nstatus callback: %s", hb_cluster->llc_ops->errmsg(hb_cluster)); return FALSE; } crm_debug_3("Be informed of CRM Client Status changes"); if (HA_OK != hb_cluster->llc_ops->set_cstatus_callback( hb_cluster, crmd_client_status_callback, hb_cluster)) { crm_err("Cannot set cstatus callback: %s", hb_cluster->llc_ops->errmsg(hb_cluster)); return FALSE; } crm_debug_3("Adding channel to mainloop"); G_main_add_ll_cluster( G_PRIORITY_HIGH, hb_cluster, FALSE, crmd_ha_msg_dispatch, hb_cluster /* userdata */, crmd_ha_connection_destroy); crm_debug_3("Finding our node name"); if ((fsa_our_uname = hb_cluster->llc_ops->get_mynodeid(hb_cluster)) == NULL) { crm_err("get_mynodeid() failed"); return FALSE; } crm_info("Hostname: %s", fsa_our_uname); crm_debug_3("Finding our node uuid"); const_uuid = get_uuid(fsa_cluster_conn, fsa_our_uname); if(const_uuid == NULL) { crm_err("get_uuid_by_name() failed"); return FALSE; } /* copy it so that unget_uuid() doesn't trash the value on us */ fsa_our_uuid = crm_strdup(const_uuid); crm_info("UUID: %s", fsa_our_uuid); populate_cib_nodes(hb_cluster, TRUE); return TRUE; } diff --git a/crm/crmd/subsystems.c b/crm/crmd/subsystems.c index 8690448f07..965081550f 100644 --- a/crm/crmd/subsystems.c +++ b/crm/crmd/subsystems.c @@ -1,238 +1,236 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include /* for access */ #include #include #include #include /* for calls to open */ #include /* for calls to open */ #include /* for calls to open */ #include /* for getpwuid */ #include /* for initgroups */ #include /* for getrlimit */ #include /* for getrlimit */ #include #include #include #include #include #include #include static void crmdManagedChildRegistered(ProcTrack* p) { struct crm_subsystem_s *the_subsystem = p->privatedata; the_subsystem->pid = p->pid; } static void crmdManagedChildDied( ProcTrack* p, int status, int signo, int exitcode, int waslogged) { struct crm_subsystem_s *the_subsystem = p->privatedata; crm_info("Process %s:[%d] exited (signal=%d, exitcode=%d)", the_subsystem->name, the_subsystem->pid, signo, exitcode); the_subsystem->pid = -1; the_subsystem->ipc = NULL; clear_bit_inplace(fsa_input_register, the_subsystem->flag_connected); crm_debug_3("Triggering FSA: %s", __FUNCTION__); G_main_set_trigger(fsa_source); if(is_set(fsa_input_register, the_subsystem->flag_required)) { /* this wasnt supposed to happen */ crm_err("The %s subsystem terminated unexpectedly", the_subsystem->name); register_fsa_input_before(C_IPC_MESSAGE, I_ERROR, NULL); } p->privatedata = NULL; } static const char * crmdManagedChildName(ProcTrack* p) { struct crm_subsystem_s *the_subsystem = p->privatedata; return the_subsystem->name; } static ProcTrack_ops crmd_managed_child_ops = { crmdManagedChildDied, crmdManagedChildRegistered, crmdManagedChildName }; gboolean stop_subsystem(struct crm_subsystem_s *the_subsystem, gboolean force_quit) { int quit_signal = SIGTERM; crm_debug_2("Stopping sub-system \"%s\"", the_subsystem->name); clear_bit_inplace(fsa_input_register, the_subsystem->flag_required); if (the_subsystem->pid <= 0) { crm_debug_2("Client %s not running", the_subsystem->name); return FALSE; } if(is_set(fsa_input_register, the_subsystem->flag_connected) == FALSE) { /* running but not yet connected */ crm_debug("Stopping %s before it had connected", the_subsystem->name); } /* if(force_quit && the_subsystem->sent_kill == FALSE) { quit_signal = SIGKILL; } else if(force_quit) { crm_debug("Already sent -KILL to %s: [%d]", the_subsystem->name, the_subsystem->pid); } */ errno = 0; if(CL_KILL(the_subsystem->pid, quit_signal) == 0) { crm_info("Sent -TERM to %s: [%d]", the_subsystem->name, the_subsystem->pid); the_subsystem->sent_kill = TRUE; } else { cl_perror("Sent -TERM to %s: [%d]", the_subsystem->name, the_subsystem->pid); } return TRUE; } gboolean start_subsystem(struct crm_subsystem_s* the_subsystem) { pid_t pid; struct stat buf; int s_res; unsigned int j; struct rlimit oflimits; const char *devnull = "/dev/null"; crm_info("Starting sub-system \"%s\"", the_subsystem->name); set_bit_inplace(fsa_input_register, the_subsystem->flag_required); if (the_subsystem->pid > 0) { crm_warn("Client %s already running as pid %d", the_subsystem->name, (int) the_subsystem->pid); /* starting a started X is not an error */ return TRUE; } /* * We want to ensure that the exec will succeed before * we bother forking. */ if (access(the_subsystem->path, F_OK|X_OK) != 0) { cl_perror("Cannot (access) exec %s", the_subsystem->path); return FALSE; } s_res = stat(the_subsystem->command, &buf); if(s_res != 0) { cl_perror("Cannot (stat) exec %s", the_subsystem->command); return FALSE; } /* We need to fork so we can make child procs not real time */ switch(pid=fork()) { case -1: crm_err("Cannot fork."); return FALSE; default: /* Parent */ NewTrackedProc(pid, 0, PT_LOGNORMAL, the_subsystem, &crmd_managed_child_ops); crm_debug_2("Client %s is has pid: %d", the_subsystem->name, pid); the_subsystem->pid = pid; return TRUE; case 0: /* Child */ /* create a new process group to avoid * being interupted by heartbeat */ setpgid(0, 0); break; } crm_debug("Executing \"%s (%s)\" (pid %d)", the_subsystem->command, the_subsystem->name, (int) getpid()); /* A precautionary measure */ getrlimit(RLIMIT_NOFILE, &oflimits); for (j=0; j < oflimits.rlim_cur; ++j) { close(j); } (void)open(devnull, O_RDONLY); /* Stdin: fd 0 */ (void)open(devnull, O_WRONLY); /* Stdout: fd 1 */ (void)open(devnull, O_WRONLY); /* Stderr: fd 2 */ - { -#if WITH_VALGRIND + if(getenv("HA_VALGRIND_ENABLED") != NULL) { char *opts[] = { crm_strdup(VALGRIND_BIN), crm_strdup("--show-reachable=yes"), crm_strdup("--leak-check=full"), crm_strdup("--time-stamp=yes"), crm_strdup("--suppressions="VALGRIND_SUPP), /* crm_strdup("--gen-suppressions=all"), */ crm_strdup(VALGRIND_LOG), crm_strdup(the_subsystem->command), NULL }; (void)execvp(VALGRIND_BIN, opts); -#else + } else { char *opts[] = { crm_strdup(the_subsystem->command), NULL }; (void)execvp(the_subsystem->command, opts); -#endif } /* Should not happen */ cl_perror("FATAL: Cannot exec %s", the_subsystem->command); exit(100); /* Suppress respawning */ return TRUE; /* never reached */ } diff --git a/crm/pengine/native.c b/crm/pengine/native.c index 5d64316482..7b7d681c45 100644 --- a/crm/pengine/native.c +++ b/crm/pengine/native.c @@ -1,1606 +1,1615 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #define DELETE_THEN_REFRESH 1 #define VARIANT_NATIVE 1 #include resource_t *ultimate_parent(resource_t *rsc); void node_list_update(GListPtr list1, GListPtr list2, int factor); void native_rsc_colocation_rh_must(resource_t *rsc_lh, gboolean update_lh, resource_t *rsc_rh, gboolean update_rh); void native_rsc_colocation_rh_mustnot(resource_t *rsc_lh, gboolean update_lh, resource_t *rsc_rh, gboolean update_rh); void create_notifications(resource_t *rsc, pe_working_set_t *data_set); void Recurring(resource_t *rsc, action_t *start, node_t *node, pe_working_set_t *data_set); +void RecurringOp(resource_t *rsc, action_t *start, node_t *node, + crm_data_t *operation, pe_working_set_t *data_set); void pe_pre_notify( resource_t *rsc, node_t *node, action_t *op, notify_data_t *n_data, pe_working_set_t *data_set); void pe_post_notify( resource_t *rsc, node_t *node, action_t *op, notify_data_t *n_data, pe_working_set_t *data_set); gboolean DeleteRsc(resource_t *rsc, node_t *node, pe_working_set_t *data_set); void NoRoleChange(resource_t *rsc, node_t *current, node_t *next, pe_working_set_t *data_set); gboolean StopRsc(resource_t *rsc, node_t *next, pe_working_set_t *data_set); gboolean StartRsc(resource_t *rsc, node_t *next, pe_working_set_t *data_set); extern gboolean DemoteRsc(resource_t *rsc, node_t *next, pe_working_set_t *data_set); gboolean PromoteRsc(resource_t *rsc, node_t *next, pe_working_set_t *data_set); gboolean RoleError(resource_t *rsc, node_t *next, pe_working_set_t *data_set); gboolean NullOp(resource_t *rsc, node_t *next, pe_working_set_t *data_set); enum rsc_role_e rsc_state_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = { /* Current State */ /* Next State: Unknown Stopped Started Slave Master */ /* Unknown */ { RSC_ROLE_UNKNOWN, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, }, /* Stopped */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STARTED, RSC_ROLE_SLAVE, RSC_ROLE_SLAVE, }, /* Started */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STARTED, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, }, /* Slave */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_UNKNOWN, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, }, /* Master */ { RSC_ROLE_STOPPED, RSC_ROLE_SLAVE, RSC_ROLE_UNKNOWN, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, }, }; gboolean (*rsc_action_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX])(resource_t*,node_t*,pe_working_set_t*) = { /* Current State */ /* Next State: Unknown Stopped Started Slave Master */ /* Unknown */ { RoleError, StopRsc, RoleError, RoleError, RoleError, }, /* Stopped */ { RoleError, NullOp, StartRsc, StartRsc, RoleError, }, /* Started */ { RoleError, StopRsc, NullOp, NullOp, PromoteRsc, }, /* Slave */ { RoleError, StopRsc, RoleError, NullOp, PromoteRsc, }, /* Master */ { RoleError, RoleError, RoleError, DemoteRsc, NullOp, }, }; static gboolean native_choose_node(resource_t *rsc) { /* 1. Sort by weight 2. color.chosen_node = the node (of those with the highest wieght) with the fewest resources 3. remove color.chosen_node from all other colors */ GListPtr nodes = NULL; node_t *chosen = NULL; if(rsc->provisional == FALSE) { return rsc->allocated_to?TRUE:FALSE; } crm_debug_3("Choosing node for %s from %d candidates", rsc->id, g_list_length(rsc->allowed_nodes)); if(rsc->allowed_nodes) { rsc->allowed_nodes = g_list_sort( rsc->allowed_nodes, sort_node_weight); nodes = rsc->allowed_nodes; chosen = g_list_nth_data(nodes, 0); } return native_assign_node(rsc, nodes, chosen); } void native_set_cmds(resource_t *rsc) { } int native_num_allowed_nodes(resource_t *rsc) { int num_nodes = 0; if(rsc->next_role == RSC_ROLE_STOPPED) { return 0; } crm_debug_4("Default case"); slist_iter( this_node, node_t, rsc->allowed_nodes, lpc, crm_debug_3("Rsc %s Checking %s: %d", rsc->id, this_node->details->uname, this_node->weight); if(this_node->details->shutdown || this_node->details->online == FALSE) { this_node->weight = -INFINITY; } if(this_node->weight < 0) { continue; /* } else if(this_node->details->unclean) { */ /* continue; */ } num_nodes++; ); crm_debug_2("Resource %s can run on %d nodes", rsc->id, num_nodes); return num_nodes; } resource_t * ultimate_parent(resource_t *rsc) { resource_t *parent = rsc; while(parent->parent) { parent = parent->parent; } return parent; } node_t * native_color(resource_t *rsc, pe_working_set_t *data_set) { if(rsc->parent && rsc->parent->is_allocating == FALSE) { /* never allocate children on their own */ crm_debug("Escalating allocation of %s to its parent: %s", rsc->id, rsc->parent->id); rsc->parent->cmds->color(rsc->parent, data_set); } print_resource(LOG_DEBUG_2, "Allocating: ", rsc, FALSE); if(rsc->provisional == FALSE) { return rsc->allocated_to; } if(rsc->is_allocating) { crm_debug("Dependancy loop detected involving %s", rsc->id); return NULL; } rsc->is_allocating = TRUE; rsc->rsc_cons = g_list_sort(rsc->rsc_cons, sort_cons_strength); slist_iter( constraint, rsc_colocation_t, rsc->rsc_cons, lpc, crm_debug_3("%s: Pre-Processing %s", rsc->id, constraint->id); if(rsc->provisional && constraint->rsc_rh->provisional) { crm_info("Combine scores from %s and %s", rsc->id, constraint->rsc_rh->id); node_list_update(constraint->rsc_rh->allowed_nodes, rsc->allowed_nodes, constraint->score/INFINITY); } constraint->rsc_rh->cmds->color( constraint->rsc_rh, data_set); rsc->cmds->rsc_colocation_lh( rsc, constraint->rsc_rh, constraint); ); print_resource(LOG_DEBUG, "Allocating: ", rsc, FALSE); if(rsc->next_role == RSC_ROLE_STOPPED) { crm_debug_2("Making sure %s doesn't get allocated", rsc->id); /* make sure it doesnt come up again */ resource_location( rsc, NULL, -INFINITY, "target_role", data_set); } if(rsc->provisional && native_choose_node(rsc) ) { crm_debug_3("Allocated resource %s to %s", rsc->id, rsc->allocated_to->details->uname); } else if(rsc->allocated_to == NULL) { if(rsc->orphan == FALSE) { pe_warn("Resource %s cannot run anywhere", rsc->id); } else { crm_info("Stopping orphan resource %s", rsc->id); } } else { crm_debug("Pre-Allocated resource %s to %s", rsc->id, rsc->allocated_to->details->uname); } rsc->is_allocating = FALSE; print_resource(LOG_DEBUG_3, "Allocated ", rsc, TRUE); return rsc->allocated_to; } void -Recurring(resource_t *rsc, action_t *start, node_t *node, - pe_working_set_t *data_set) +RecurringOp(resource_t *rsc, action_t *start, node_t *node, + crm_data_t *operation, pe_working_set_t *data_set) { char *key = NULL; const char *name = NULL; const char *value = NULL; const char *interval = NULL; const char *node_uname = NULL; int interval_ms = 0; action_t *mon = NULL; gboolean is_optional = TRUE; GListPtr possible_matches = NULL; crm_debug_2("Creating recurring actions for %s", rsc->id); if(node != NULL) { node_uname = node->details->uname; } + + interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); + interval_ms = crm_get_msec(interval); - xml_child_iter_filter( - rsc->ops_xml, operation, "op", - + if(interval_ms <= 0) { + return; + } + + value = crm_element_value(operation, "disabled"); + if(crm_is_true(value)) { + return; + } + + name = crm_element_value(operation, "name"); + key = generate_op_key(rsc->id, name, interval_ms); + if(start != NULL) { + crm_debug_3("Marking %s %s due to %s", + key, start->optional?"optional":"manditory", + start->uuid); + is_optional = start->optional; + } else { + crm_debug_2("Marking %s optional", key); is_optional = TRUE; - name = crm_element_value(operation, "name"); - interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); - interval_ms = crm_get_msec(interval); - - if(interval_ms <= 0) { - continue; - } - - value = crm_element_value(operation, "disabled"); - if(crm_is_true(value)) { - continue; - } - - key = generate_op_key(rsc->id, name, interval_ms); - if(start != NULL) { - crm_debug_3("Marking %s %s due to %s", - key, start->optional?"optional":"manditory", - start->uuid); - is_optional = start->optional; - } else { - crm_debug_2("Marking %s optional", key); - is_optional = TRUE; + } + + /* start a monitor for an already active resource */ + possible_matches = find_actions_exact(rsc->actions, key, node); + if(possible_matches == NULL) { + is_optional = FALSE; + crm_debug_3("Marking %s manditory: not active", key); + } + + value = crm_element_value(operation, "role"); + if((rsc->next_role == RSC_ROLE_MASTER && value == NULL) + || (value != NULL && text2role(value) != rsc->next_role)) { + int log_level = LOG_DEBUG_2; + const char *result = "Ignoring"; + if(is_optional) { + char *local_key = crm_strdup(key); + log_level = LOG_INFO; + result = "Cancelling"; + /* its running : cancel it */ + + mon = custom_action( + rsc, local_key, CRMD_ACTION_CANCEL, node, + FALSE, TRUE, data_set); + + mon->task = CRMD_ACTION_CANCEL; + add_hash_param(mon->meta, XML_LRM_ATTR_INTERVAL, interval); + add_hash_param(mon->meta, XML_LRM_ATTR_TASK, name); + + custom_action_order( + rsc, NULL, mon, + rsc, promote_key(rsc), NULL, + pe_order_optional, data_set); + + mon = NULL; } - /* start a monitor for an already active resource */ - possible_matches = find_actions_exact(rsc->actions, key, node); - if(possible_matches == NULL) { - is_optional = FALSE; - crm_debug_3("Marking %s manditory: not active", key); - } + do_crm_log(log_level, "%s action %s (%s vs. %s)", + result , key, value?value:role2text(RSC_ROLE_SLAVE), + role2text(rsc->next_role)); - value = crm_element_value(operation, "role"); - if((rsc->next_role == RSC_ROLE_MASTER && value == NULL) - || (value != NULL && text2role(value) != rsc->next_role)) { - int log_level = LOG_DEBUG_2; - const char *foo = "Ignoring"; - if(is_optional) { - char *local_key = crm_strdup(key); - log_level = LOG_INFO; - foo = "Cancelling"; - /* its running : cancel it */ - - mon = custom_action( - rsc, local_key, CRMD_ACTION_CANCEL, node, - FALSE, TRUE, data_set); - - mon->task = CRMD_ACTION_CANCEL; - add_hash_param(mon->meta, XML_LRM_ATTR_INTERVAL, interval); - add_hash_param(mon->meta, XML_LRM_ATTR_TASK, name); - - custom_action_order( - rsc, NULL, mon, - rsc, promote_key(rsc), NULL, - pe_order_optional, data_set); - - mon = NULL; - } - - do_crm_log(log_level, "%s action %s (%s vs. %s)", - foo , key, value?value:role2text(RSC_ROLE_SLAVE), - role2text(rsc->next_role)); - crm_free(key); - key = NULL; - continue; - } + crm_free(key); + key = NULL; + return; + } - mon = custom_action(rsc, key, name, node, - is_optional, TRUE, data_set); - - if(is_optional) { - crm_debug("%s\t %s (optional)", - crm_str(node_uname), mon->uuid); - } + mon = custom_action(rsc, key, name, node, + is_optional, TRUE, data_set); + key = mon->uuid; + if(is_optional) { + crm_debug("%s\t %s (optional)", + crm_str(node_uname), mon->uuid); + } + + if(start == NULL || start->runnable == FALSE) { + crm_debug("%s\t %s (cancelled : start un-runnable)", + crm_str(node_uname), mon->uuid); + mon->runnable = FALSE; - if(start == NULL || start->runnable == FALSE) { - crm_debug("%s\t %s (cancelled : start un-runnable)", - crm_str(node_uname), mon->uuid); - mon->runnable = FALSE; - - } else if(node == NULL - || node->details->online == FALSE - || node->details->unclean) { - crm_debug("%s\t %s (cancelled : no node available)", - crm_str(node_uname), mon->uuid); - mon->runnable = FALSE; + } else if(node == NULL + || node->details->online == FALSE + || node->details->unclean) { + crm_debug("%s\t %s (cancelled : no node available)", + crm_str(node_uname), mon->uuid); + mon->runnable = FALSE; - } else if(mon->optional == FALSE) { - crm_notice("%s\t %s", crm_str(node_uname),mon->uuid); - } - - custom_action_order(rsc, start_key(rsc), NULL, - NULL, crm_strdup(key), mon, - pe_order_internal_restart, data_set); + } else if(mon->optional == FALSE) { + crm_notice("%s\t %s", crm_str(node_uname),mon->uuid); + } + + custom_action_order(rsc, start_key(rsc), NULL, + NULL, crm_strdup(key), mon, + pe_order_internal_restart, data_set); + + if(rsc->next_role == RSC_ROLE_MASTER) { + char *running_master = crm_itoa(EXECRA_RUNNING_MASTER); + add_hash_param(mon->meta, XML_ATTR_TE_TARGET_RC, running_master); + custom_action_order( + rsc, promote_key(rsc), NULL, + rsc, NULL, mon, + pe_order_optional, data_set); + crm_free(running_master); + } +} - if(rsc->next_role == RSC_ROLE_MASTER) { - char *running_master = crm_itoa(EXECRA_RUNNING_MASTER); - add_hash_param(mon->meta, XML_ATTR_TE_TARGET_RC, running_master); - custom_action_order( - rsc, promote_key(rsc), NULL, - rsc, NULL, mon, - pe_order_optional, data_set); - crm_free(running_master); - } +void +Recurring(resource_t *rsc, action_t *start, node_t *node, + pe_working_set_t *data_set) +{ + + xml_child_iter_filter( + rsc->ops_xml, operation, "op", + RecurringOp(rsc, start, node, operation, data_set); ); } void native_create_actions(resource_t *rsc, pe_working_set_t *data_set) { action_t *start = NULL; node_t *chosen = NULL; enum rsc_role_e role = RSC_ROLE_UNKNOWN; enum rsc_role_e next_role = RSC_ROLE_UNKNOWN; crm_debug_2("Creating actions for %s", rsc->id); chosen = rsc->allocated_to; if(chosen != NULL) { CRM_CHECK(rsc->next_role != RSC_ROLE_UNKNOWN, rsc->next_role = RSC_ROLE_STARTED); } unpack_instance_attributes( rsc->xml, XML_TAG_ATTR_SETS, chosen?chosen->details->attrs:NULL, rsc->parameters, NULL, data_set->now); crm_debug_2("%s: %s->%s", rsc->id, role2text(rsc->role), role2text(rsc->next_role)); if(g_list_length(rsc->running_on) > 1) { if(rsc->recovery_type == recovery_stop_start) { pe_proc_err("Attempting recovery of resource %s", rsc->id); StopRsc(rsc, NULL, data_set); rsc->role = RSC_ROLE_STOPPED; } } else if(rsc->running_on != NULL) { node_t *current = rsc->running_on->data; NoRoleChange(rsc, current, chosen, data_set); } else if(rsc->role == RSC_ROLE_STOPPED && rsc->next_role == RSC_ROLE_STOPPED) { char *key = start_key(rsc); GListPtr possible_matches = find_actions(rsc->actions, key, NULL); slist_iter( action, action_t, possible_matches, lpc, action->optional = TRUE; /* action->pseudo = TRUE; */ ); crm_debug_2("Stopping a stopped resource"); crm_free(key); return; } role = rsc->role; while(role != rsc->next_role) { next_role = rsc_state_matrix[role][rsc->next_role]; crm_debug_2("Executing: %s->%s (%s)", role2text(role), role2text(next_role), rsc->id); if(rsc_action_matrix[role][next_role]( rsc, chosen, data_set) == FALSE) { break; } role = next_role; } if(rsc->next_role != RSC_ROLE_STOPPED && rsc->is_managed) { start = start_action(rsc, chosen, TRUE); Recurring(rsc, start, chosen, data_set); } } void native_internal_constraints(resource_t *rsc, pe_working_set_t *data_set) { order_restart(rsc); custom_action_order(rsc, demote_key(rsc), NULL, rsc, stop_key(rsc), NULL, pe_order_implies_left, data_set); custom_action_order(rsc, start_key(rsc), NULL, rsc, promote_key(rsc), NULL, pe_order_optional, data_set); custom_action_order( rsc, stop_key(rsc), NULL, rsc, delete_key(rsc), NULL, pe_order_optional, data_set); custom_action_order( rsc, delete_key(rsc), NULL, rsc, start_key(rsc), NULL, pe_order_implies_left, data_set); if(rsc->notify) { char *key1 = NULL; char *key2 = NULL; key1 = generate_op_key(rsc->id, "confirmed-post_notify_start", 0); key2 = generate_op_key(rsc->id, "pre_notify_promote", 0); custom_action_order( rsc, key1, NULL, rsc, key2, NULL, pe_order_optional, data_set); key1 = generate_op_key(rsc->id, "confirmed-post_notify_demote", 0); key2 = generate_op_key(rsc->id, "pre_notify_stop", 0); custom_action_order( rsc, key1, NULL, rsc, key2, NULL, pe_order_optional, data_set); } } void native_rsc_colocation_lh( resource_t *rsc_lh, resource_t *rsc_rh, rsc_colocation_t *constraint) { if(rsc_lh == NULL) { pe_err("rsc_lh was NULL for %s", constraint->id); return; } else if(constraint->rsc_rh == NULL) { pe_err("rsc_rh was NULL for %s", constraint->id); return; } crm_debug_2("Processing colocation constraint between %s and %s", rsc_lh->id, rsc_rh->id); rsc_rh->cmds->rsc_colocation_rh(rsc_lh, rsc_rh, constraint); } static gboolean filter_colocation_constraint( resource_t *rsc_lh, resource_t *rsc_rh, rsc_colocation_t *constraint) { if(constraint->score == 0){ return FALSE; } if(constraint->role_lh != RSC_ROLE_UNKNOWN && constraint->role_lh != rsc_lh->next_role) { crm_debug_4("RH: Skipping constraint: \"%s\" state filter", role2text(constraint->role_rh)); return FALSE; } if(constraint->role_rh != RSC_ROLE_UNKNOWN && constraint->role_rh != rsc_rh->next_role) { crm_debug_4("RH: Skipping constraint: \"%s\" state filter", role2text(constraint->role_rh)); return FALSE; } return TRUE; } static void colocation_match( resource_t *rsc_lh, resource_t *rsc_rh, rsc_colocation_t *constraint) { const char *tmp = NULL; const char *value = NULL; gboolean do_check = FALSE; const char *attribute = "#id"; if(constraint->node_attribute != NULL) { attribute = constraint->node_attribute; } if(rsc_rh->allocated_to) { value = g_hash_table_lookup( rsc_rh->allocated_to->details->attrs, attribute); do_check = TRUE; } slist_iter( node, node_t, rsc_lh->allowed_nodes, lpc, tmp = g_hash_table_lookup(node->details->attrs, attribute); if(do_check && safe_str_eq(tmp, value)) { crm_debug_2("%s: %s.%s += %d", constraint->id, rsc_lh->id, node->details->uname, constraint->score); node->weight = merge_weights( constraint->score, node->weight); } else if(do_check == FALSE || constraint->score >= INFINITY) { crm_debug_2("%s: %s.%s = -INFINITY (%s)", constraint->id, rsc_lh->id, node->details->uname, do_check?"failed":"unallocated"); node->weight = -INFINITY; } ); } void native_rsc_colocation_rh( resource_t *rsc_lh, resource_t *rsc_rh, rsc_colocation_t *constraint) { crm_debug_2("%sColocating %s with %s (%s, weight=%d)", constraint->score >= 0?"":"Anti-", rsc_lh->id, rsc_rh->id, constraint->id, constraint->score); if(filter_colocation_constraint(rsc_lh, rsc_rh, constraint) == FALSE) { return; } if(rsc_rh->provisional) { return; } else if(rsc_lh->provisional == FALSE) { /* error check */ struct node_shared_s *details_lh; struct node_shared_s *details_rh; if((constraint->score > -INFINITY) && (constraint->score < INFINITY)) { return; } details_rh = rsc_rh->allocated_to?rsc_rh->allocated_to->details:NULL; details_lh = rsc_lh->allocated_to?rsc_lh->allocated_to->details:NULL; if(constraint->score == INFINITY && details_lh != details_rh) { crm_err("%s and %s are both allocated" " but to different nodes: %s vs. %s", rsc_lh->id, rsc_rh->id, details_lh?details_lh->uname:"n/a", details_rh?details_rh->uname:"n/a"); } else if(constraint->score == -INFINITY && details_lh == details_rh) { crm_err("%s and %s are both allocated" " but to the SAME node: %s", rsc_lh->id, rsc_rh->id, details_rh?details_rh->uname:"n/a"); } return; } else { colocation_match(rsc_lh, rsc_rh, constraint); } } void node_list_update(GListPtr list1, GListPtr list2, int factor) { node_t *other_node = NULL; slist_iter( node, node_t, list1, lpc, if(node == NULL) { continue; } other_node = (node_t*)pe_find_node_id( list2, node->details->id); if(other_node != NULL) { crm_debug_2("%s: %d + %d", node->details->uname, node->weight, other_node->weight); node->weight = merge_weights( factor*other_node->weight, node->weight); } ); } void native_rsc_order_lh(resource_t *lh_rsc, order_constraint_t *order) { GListPtr lh_actions = NULL; action_t *lh_action = order->lh_action; crm_debug_3("Processing LH of ordering constraint %d", order->id); if(lh_action != NULL) { lh_actions = g_list_append(NULL, lh_action); } else if(lh_action == NULL && lh_rsc != NULL) { lh_actions = find_actions( lh_rsc->actions, order->lh_action_task, NULL); if(lh_actions == NULL) { crm_debug_4("No LH-Side (%s/%s) found for constraint", lh_rsc->id, order->lh_action_task); if(lh_rsc->next_role == RSC_ROLE_STOPPED) { resource_t *rh_rsc = order->rh_rsc; if(order->rh_action && (order->type & pe_order_internal_restart)) { crm_debug_3("No LH(%s/%s) found for RH(%s)...", lh_rsc->id, order->lh_action_task, order->rh_action->uuid); order->rh_action->runnable = FALSE; return; } else if(rh_rsc != NULL) { crm_debug_3("No LH(%s/%s) found for RH(%s/%s)...", lh_rsc->id, order->lh_action_task, rh_rsc->id, order->rh_action_task); rh_rsc->cmds->rsc_order_rh(NULL, rh_rsc, order); return; } } return; } } else { pe_warn("No LH-Side (%s) specified for constraint", order->lh_action_task); if(order->rh_rsc != NULL) { crm_debug_4("RH-Side was: (%s/%s)", order->rh_rsc->id, order->rh_action_task); } else if(order->rh_action != NULL && order->rh_action->rsc != NULL) { crm_debug_4("RH-Side was: (%s/%s)", order->rh_action->rsc->id, order->rh_action_task); } else if(order->rh_action != NULL) { crm_debug_4("RH-Side was: %s", order->rh_action_task); } else { crm_debug_4("RH-Side was NULL"); } return; } slist_iter( lh_action_iter, action_t, lh_actions, lpc, resource_t *rh_rsc = order->rh_rsc; if(rh_rsc == NULL && order->rh_action) { rh_rsc = order->rh_action->rsc; } if(rh_rsc) { rh_rsc->cmds->rsc_order_rh( lh_action_iter, rh_rsc, order); } else if(order->rh_action) { order_actions(lh_action_iter, order->rh_action, order->type); } ); pe_free_shallow_adv(lh_actions, FALSE); } void native_rsc_order_rh( action_t *lh_action, resource_t *rsc, order_constraint_t *order) { GListPtr rh_actions = NULL; action_t *rh_action = NULL; CRM_CHECK(rsc != NULL, return); CRM_CHECK(order != NULL, return); rh_action = order->rh_action; crm_debug_3("Processing RH of ordering constraint %d", order->id); if(rh_action != NULL) { rh_actions = g_list_append(NULL, rh_action); } else if(rsc != NULL) { rh_actions = find_actions( rsc->actions, order->rh_action_task, NULL); } if(rh_actions == NULL && lh_action != NULL) { crm_debug_4("No RH-Side (%s/%s) found for constraint..." " ignoring", rsc->id,order->rh_action_task); if(lh_action) { crm_debug_4("LH-Side was: %s", lh_action->uuid); } return; } slist_iter( rh_action_iter, action_t, rh_actions, lpc, if(lh_action) { order_actions(lh_action, rh_action_iter, order->type); } else if(order->type & pe_order_internal_restart) { rh_action_iter->runnable = FALSE; } ); pe_free_shallow_adv(rh_actions, FALSE); } void native_rsc_location(resource_t *rsc, rsc_to_node_t *constraint) { GListPtr or_list; crm_debug_2("Applying %s (%s) to %s", constraint->id, role2text(constraint->role_filter), rsc->id); /* take "lifetime" into account */ if(constraint == NULL) { pe_err("Constraint is NULL"); return; } else if(rsc == NULL) { pe_err("LHS of rsc_to_node (%s) is NULL", constraint->id); return; } else if(constraint->role_filter > 0 && constraint->role_filter != rsc->next_role) { crm_debug("Constraint (%s) is not active (role : %s)", constraint->id, role2text(constraint->role_filter)); return; } else if(is_active(constraint) == FALSE) { crm_debug_2("Constraint (%s) is not active", constraint->id); return; } if(constraint->node_list_rh == NULL) { crm_debug_2("RHS of constraint %s is NULL", constraint->id); return; } or_list = node_list_or( rsc->allowed_nodes, constraint->node_list_rh, FALSE); pe_free_shallow(rsc->allowed_nodes); rsc->allowed_nodes = or_list; slist_iter(node, node_t, or_list, lpc, crm_debug_3("%s + %s : %d", rsc->id, node->details->uname, node->weight); ); } void native_expand(resource_t *rsc, pe_working_set_t *data_set) { slist_iter( action, action_t, rsc->actions, lpc, crm_debug_4("processing action %d for rsc=%s", action->id, rsc->id); graph_element_from_action(action, data_set); ); } void native_agent_constraints(resource_t *rsc) { } void create_notifications(resource_t *rsc, pe_working_set_t *data_set) { if(rsc->notify == FALSE) { return; } /* slist_iter( */ /* action, action_t, rsc->actions, lpc, */ /* ); */ } static void register_activity(resource_t *rsc, enum action_tasks task, node_t *node, notify_data_t *n_data) { notify_entry_t *entry = NULL; crm_malloc0(entry, sizeof(notify_entry_t)); entry->rsc = rsc; entry->node = node; switch(task) { case start_rsc: n_data->start = g_list_append(n_data->start, entry); break; case stop_rsc: n_data->stop = g_list_append(n_data->stop, entry); break; case action_promote: n_data->promote = g_list_append(n_data->promote, entry); break; case action_demote: n_data->demote = g_list_append(n_data->demote, entry); break; default: crm_err("Unsupported notify action: %s", task2text(task)); crm_free(entry); break; } } static void register_state(resource_t *rsc, node_t *on_node, notify_data_t *n_data) { notify_entry_t *entry = NULL; crm_malloc0(entry, sizeof(notify_entry_t)); entry->rsc = rsc; entry->node = on_node; crm_debug_2("%s state: %s", rsc->id, role2text(rsc->next_role)); switch(rsc->next_role) { case RSC_ROLE_STOPPED: /* n_data->inactive = g_list_append(n_data->inactive, entry); */ crm_free(entry); break; case RSC_ROLE_STARTED: n_data->active = g_list_append(n_data->active, entry); break; case RSC_ROLE_SLAVE: n_data->slave = g_list_append(n_data->slave, entry); break; case RSC_ROLE_MASTER: n_data->master = g_list_append(n_data->master, entry); break; default: crm_err("Unsupported notify role"); crm_free(entry); break; } } void native_create_notify_element(resource_t *rsc, action_t *op, notify_data_t *n_data, pe_working_set_t *data_set) { node_t *next_node = NULL; gboolean registered = FALSE; char *op_key = NULL; GListPtr possible_matches = NULL; enum action_tasks task = text2task(op->task); if(op->pre_notify == NULL || op->post_notify == NULL) { /* no notifications required */ crm_debug_4("No notificaitons required for %s", op->task); return; } next_node = rsc->allocated_to; op_key = generate_op_key(rsc->id, op->task, 0); possible_matches = find_actions(rsc->actions, op_key, NULL); crm_debug_2("Creating notificaitons for: %s (%s->%s)", op->uuid, role2text(rsc->role), role2text(rsc->next_role)); if(rsc->role == rsc->next_role) { register_state(rsc, next_node, n_data); } slist_iter( local_op, action_t, possible_matches, lpc, local_op->notify_keys = n_data->keys; if(local_op->optional == FALSE) { registered = TRUE; register_activity(rsc, task, local_op->node, n_data); } ); /* stop / demote */ if(rsc->role != RSC_ROLE_STOPPED) { if(task == stop_rsc || task == action_demote) { slist_iter( current_node, node_t, rsc->running_on, lpc, pe_pre_notify(rsc, current_node, op, n_data, data_set); if(task == action_demote || registered == FALSE) { pe_post_notify(rsc, current_node, op, n_data, data_set); } ); } } /* start / promote */ if(rsc->next_role != RSC_ROLE_STOPPED) { CRM_CHECK(next_node != NULL,;); if(next_node == NULL) { pe_proc_err("next role: %s", role2text(rsc->next_role)); } else if(task == start_rsc || task == action_promote) { if(task != start_rsc || registered == FALSE) { pe_pre_notify(rsc, next_node, op, n_data, data_set); } pe_post_notify(rsc, next_node, op, n_data, data_set); } } crm_free(op_key); pe_free_shallow_adv(possible_matches, FALSE); } static void dup_attr(gpointer key, gpointer value, gpointer user_data) { char *meta_key = crm_concat(CRM_META, key, '_'); g_hash_table_replace(user_data, meta_key, crm_strdup(value)); } static action_t * pe_notify(resource_t *rsc, node_t *node, action_t *op, action_t *confirm, notify_data_t *n_data, pe_working_set_t *data_set) { char *key = NULL; action_t *trigger = NULL; const char *value = NULL; const char *task = NULL; if(op == NULL || confirm == NULL) { crm_debug_2("Op=%p confirm=%p", op, confirm); return NULL; } CRM_CHECK(node != NULL, return NULL); if(node->details->online == FALSE) { crm_info("Skipping notification for %s", rsc->id); return NULL; } value = g_hash_table_lookup(op->meta, "notify_type"); task = g_hash_table_lookup(op->meta, "notify_operation"); crm_debug_2("Creating actions for %s: %s (%s-%s)", op->uuid, rsc->id, value, task); key = generate_notify_key(rsc->id, value, task); trigger = custom_action(rsc, key, op->task, node, op->optional, TRUE, data_set); g_hash_table_foreach(op->meta, dup_attr, trigger->extra); trigger->notify_keys = n_data->keys; /* pseudo_notify before notify */ crm_debug_3("Ordering %s before %s (%d->%d)", op->uuid, trigger->uuid, trigger->id, op->id); order_actions(op, trigger, pe_order_implies_left); value = g_hash_table_lookup(op->meta, "notify_confirm"); if(crm_is_true(value)) { /* notify before pseudo_notified */ crm_debug_3("Ordering %s before %s (%d->%d)", trigger->uuid, confirm->uuid, confirm->id, trigger->id); order_actions(trigger, confirm, pe_order_implies_left); } return trigger; } void pe_pre_notify(resource_t *rsc, node_t *node, action_t *op, notify_data_t *n_data, pe_working_set_t *data_set) { crm_debug_2("%s: %s", rsc->id, op->uuid); pe_notify(rsc, node, op->pre_notify, op->pre_notified, n_data, data_set); } void pe_post_notify(resource_t *rsc, node_t *node, action_t *op, notify_data_t *n_data, pe_working_set_t *data_set) { action_t *notify = NULL; CRM_CHECK(op != NULL, return); CRM_CHECK(rsc != NULL, return); crm_debug_2("%s: %s", rsc->id, op->uuid); notify = pe_notify(rsc, node, op->post_notify, op->post_notified, n_data, data_set); if(notify != NULL) { /* crm_err("Upgrading priority for %s to INFINITY", notify->uuid); */ notify->priority = INFINITY; } notify = op->post_notified; if(notify != NULL) { slist_iter( mon, action_t, rsc->actions, lpc, const char *interval = g_hash_table_lookup(mon->meta, "interval"); if(interval == NULL || safe_str_eq(interval, "0")) { crm_debug_3("Skipping %s: interval", mon->uuid); continue; } else if(safe_str_eq(mon->task, "cancel")) { crm_debug_3("Skipping %s: cancel", mon->uuid); continue; } order_actions(notify, mon, pe_order_optional); ); } } void NoRoleChange(resource_t *rsc, node_t *current, node_t *next, pe_working_set_t *data_set) { action_t *start = NULL; action_t *stop = NULL; GListPtr possible_matches = NULL; crm_debug("Executing: %s (role=%s)",rsc->id, role2text(rsc->next_role)); if(current == NULL || next == NULL) { return; } /* use StartRsc/StopRsc */ if(safe_str_neq(current->details->id, next->details->id)) { crm_notice("Move resource %s\t(%s -> %s)", rsc->id, current->details->uname, next->details->uname); stop = stop_action(rsc, current, FALSE); start = start_action(rsc, next, FALSE); possible_matches = find_recurring_actions(rsc->actions, next); slist_iter(match, action_t, possible_matches, lpc, if(match->optional == FALSE) { crm_err("Found bad recurring action: %s", match->uuid); match->optional = TRUE; } ); if(data_set->remove_after_stop) { DeleteRsc(rsc, current, data_set); } } else { if(rsc->failed) { crm_notice("Recover resource %s\t(%s)", rsc->id, next->details->uname); stop = stop_action(rsc, current, FALSE); start = start_action(rsc, next, FALSE); /* /\* make the restart required *\/ */ /* order_stop_start(rsc, rsc, pe_order_implies_left); */ } else if(rsc->start_pending) { start = start_action(rsc, next, TRUE); if(start->runnable) { /* wait for StartRsc() to be called */ rsc->role = RSC_ROLE_STOPPED; } else { /* wait for StopRsc() to be called */ rsc->next_role = RSC_ROLE_STOPPED; } } else { stop = stop_action(rsc, current, TRUE); start = start_action(rsc, next, TRUE); stop->optional = start->optional; if(start->runnable == FALSE) { rsc->next_role = RSC_ROLE_STOPPED; } else if(start->optional) { crm_notice("Leave resource %s\t(%s)", rsc->id, next->details->uname); } else { crm_notice("Restart resource %s\t(%s)", rsc->id, next->details->uname); } } } } gboolean StopRsc(resource_t *rsc, node_t *next, pe_working_set_t *data_set) { action_t *stop = NULL; crm_debug_2("Executing: %s", rsc->id); slist_iter( current, node_t, rsc->running_on, lpc, crm_notice(" %s\tStop %s", current->details->uname, rsc->id); stop = stop_action(rsc, current, FALSE); if(data_set->remove_after_stop) { DeleteRsc(rsc, current, data_set); } ); return TRUE; } gboolean StartRsc(resource_t *rsc, node_t *next, pe_working_set_t *data_set) { action_t *start = NULL; crm_debug_2("Executing: %s", rsc->id); start = start_action(rsc, next, TRUE); if(start->runnable) { crm_notice(" %s\tStart %s", next->details->uname, rsc->id); start->optional = FALSE; } return TRUE; } gboolean PromoteRsc(resource_t *rsc, node_t *next, pe_working_set_t *data_set) { char *key = NULL; gboolean runnable = TRUE; GListPtr action_list = NULL; crm_debug_2("Executing: %s", rsc->id); CRM_CHECK(rsc->next_role == RSC_ROLE_MASTER, return FALSE); key = start_key(rsc); action_list = find_actions_exact(rsc->actions, key, next); crm_free(key); slist_iter(start, action_t, action_list, lpc, if(start->runnable == FALSE) { runnable = FALSE; } ); if(runnable) { promote_action(rsc, next, FALSE); crm_notice("%s\tPromote %s", next->details->uname, rsc->id); return TRUE; } crm_debug("%s\tPromote %s (canceled)", next->details->uname, rsc->id); key = promote_key(rsc); action_list = find_actions_exact(rsc->actions, key, next); crm_free(key); slist_iter(promote, action_t, action_list, lpc, promote->runnable = FALSE; ); return TRUE; } gboolean DemoteRsc(resource_t *rsc, node_t *next, pe_working_set_t *data_set) { crm_debug_2("Executing: %s", rsc->id); /* CRM_CHECK(rsc->next_role == RSC_ROLE_SLAVE, return FALSE); */ slist_iter( current, node_t, rsc->running_on, lpc, crm_notice("%s\tDeomote %s", current->details->uname, rsc->id); demote_action(rsc, current, FALSE); ); return TRUE; } gboolean RoleError(resource_t *rsc, node_t *next, pe_working_set_t *data_set) { crm_debug("Executing: %s", rsc->id); CRM_CHECK(FALSE, return FALSE); return FALSE; } gboolean NullOp(resource_t *rsc, node_t *next, pe_working_set_t *data_set) { crm_debug("Executing: %s", rsc->id); return FALSE; } gboolean DeleteRsc(resource_t *rsc, node_t *node, pe_working_set_t *data_set) { action_t *delete = NULL; action_t *refresh = NULL; if(rsc->failed) { crm_debug_2("Resource %s not deleted from %s: failed", rsc->id, node->details->uname); return FALSE; } else if(node == NULL) { crm_debug_2("Resource %s not deleted: NULL node", rsc->id); return FALSE; } else if(node->details->unclean || node->details->online == FALSE) { crm_debug_2("Resource %s not deleted from %s: unrunnable", rsc->id, node->details->uname); return FALSE; } crm_notice("Removing %s from %s", rsc->id, node->details->uname); delete = delete_action(rsc, node); #if DELETE_THEN_REFRESH refresh = custom_action( NULL, crm_strdup(CRM_OP_LRM_REFRESH), CRM_OP_LRM_REFRESH, node, FALSE, TRUE, data_set); add_hash_param(refresh->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); order_actions(delete, refresh, pe_order_optional); #endif return TRUE; } gboolean native_create_probe(resource_t *rsc, node_t *node, action_t *complete, gboolean force, pe_working_set_t *data_set) { char *key = NULL; char *target_rc = NULL; action_t *probe = NULL; node_t *running = NULL; CRM_CHECK(node != NULL, return FALSE); if(rsc->orphan) { crm_debug_2("Skipping orphan: %s", rsc->id); return FALSE; } running = pe_find_node_id(rsc->known_on, node->details->id); if(force == FALSE && running != NULL) { /* we already know the status of the resource on this node */ crm_debug_3("Skipping active: %s", rsc->id); return FALSE; } key = generate_op_key(rsc->id, CRMD_ACTION_STATUS, 0); probe = custom_action(rsc, key, CRMD_ACTION_STATUS, node, FALSE, TRUE, data_set); probe->priority = INFINITY; running = pe_find_node_id(rsc->running_on, node->details->id); if(running == NULL) { target_rc = crm_itoa(EXECRA_NOT_RUNNING); add_hash_param(probe->meta, XML_ATTR_TE_TARGET_RC, target_rc); crm_free(target_rc); } crm_debug_2("%s: Created probe for %s", node->details->uname, rsc->id); custom_action_order(rsc, NULL, probe, rsc, NULL, complete, pe_order_implies_left, data_set); return TRUE; } static void native_start_constraints( resource_t *rsc, action_t *stonith_op, gboolean is_stonith, pe_working_set_t *data_set) { gboolean is_unprotected = FALSE; gboolean run_unprotected = TRUE; if(is_stonith) { char *key = start_key(rsc); crm_debug_2("Ordering %s action before stonith events", key); custom_action_order( rsc, key, NULL, NULL, crm_strdup(CRM_OP_FENCE), stonith_op, pe_order_optional, data_set); } else { slist_iter(action, action_t, rsc->actions, lpc2, if(action->needs != rsc_req_stonith) { crm_debug_3("%s doesnt need to wait for stonith events", action->uuid); continue; } crm_debug_2("Ordering %s after stonith events", action->uuid); if(stonith_op != NULL) { custom_action_order( NULL, crm_strdup(CRM_OP_FENCE), stonith_op, rsc, NULL, action, pe_order_implies_left, data_set); } else if(run_unprotected == FALSE) { /* mark the start unrunnable */ action->runnable = FALSE; } else { is_unprotected = TRUE; } ); } if(is_unprotected) { pe_err("SHARED RESOURCE %s IS NOT PROTECTED:" " Stonith disabled", rsc->id); } } static void native_stop_constraints( resource_t *rsc, action_t *stonith_op, gboolean is_stonith, pe_working_set_t *data_set) { char *key = NULL; GListPtr action_list = NULL; node_t *node = stonith_op->node; key = stop_key(rsc); action_list = find_actions(rsc->actions, key, node); crm_free(key); /* add the stonith OP as a stop pre-req and the mark the stop * as a pseudo op - since its now redundant */ slist_iter( action, action_t, action_list, lpc2, if(node->details->online == FALSE || rsc->failed) { resource_t *parent = NULL; crm_warn("Stop of failed resource %s is" " implicit after %s is fenced", action->uuid, node->details->uname); /* the stop would never complete and is * now implied by the stonith operation */ action->pseudo = TRUE; action->runnable = TRUE; if(is_stonith) { /* do nothing */ } else { custom_action_order( NULL, crm_strdup(CRM_OP_FENCE),stonith_op, rsc, start_key(rsc), NULL, pe_order_implies_left, data_set); } /* find the top-most resource */ parent = rsc->parent; while(parent != NULL && parent->parent != NULL) { parent = parent->parent; } if(parent) { crm_info("Re-creating actions for %s", parent->id); parent->cmds->create_actions(parent, data_set); /* make sure we dont mess anything up in create_actions */ CRM_CHECK(action->pseudo, action->pseudo = TRUE); CRM_CHECK(action->runnable, action->runnable = TRUE); } } else if(is_stonith == FALSE) { crm_info("Moving healthy resource %s" " off %s before fencing", rsc->id, node->details->uname); /* stop healthy resources before the * stonith op */ custom_action_order( rsc, stop_key(rsc), NULL, NULL,crm_strdup(CRM_OP_FENCE),stonith_op, pe_order_implies_left, data_set); } ); key = demote_key(rsc); action_list = find_actions(rsc->actions, key, node); crm_free(key); slist_iter( action, action_t, action_list, lpc2, if(node->details->online == FALSE || rsc->failed) { crm_info("Demote of failed resource %s is" " implict after %s is fenced", rsc->id, node->details->uname); /* the stop would never complete and is * now implied by the stonith operation */ action->pseudo = TRUE; action->runnable = TRUE; if(is_stonith == FALSE) { custom_action_order( NULL, crm_strdup(CRM_OP_FENCE), stonith_op, rsc, demote_key(rsc), NULL, pe_order_implies_left, data_set); } } ); } void native_stonith_ordering( resource_t *rsc, action_t *stonith_op, pe_working_set_t *data_set) { gboolean is_stonith = FALSE; const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); if(rsc->is_managed == FALSE) { crm_debug_3("Skipping fencing constraints for unmanaged resource: %s", rsc->id); return; } if(stonith_op != NULL && safe_str_eq(class, "stonith")) { is_stonith = TRUE; } /* Start constraints */ native_start_constraints(rsc, stonith_op, is_stonith, data_set); /* Stop constraints */ native_stop_constraints(rsc, stonith_op, is_stonith, data_set); } void native_migrate_reload(resource_t *rsc, pe_working_set_t *data_set) { char *key = NULL; GListPtr action_list = NULL; const char *value = NULL; action_t *stop = NULL; action_t *start = NULL; action_t *other = NULL; action_t *action = NULL; if(rsc->variant != pe_native) { return; } if(rsc->is_managed == FALSE || rsc->failed || rsc->start_pending || rsc->next_role != RSC_ROLE_STARTED || g_list_length(rsc->running_on) != 1) { crm_debug_3("%s: resource", rsc->id); return; } key = start_key(rsc); action_list = find_actions(rsc->actions, key, NULL); crm_free(key); if(action_list == NULL) { crm_debug_3("%s: no start action", rsc->id); return; } start = action_list->data; value = g_hash_table_lookup(rsc->meta, "allow_migrate"); if(crm_is_true(value)) { rsc->can_migrate = TRUE; } if(rsc->can_migrate == FALSE && start->allow_reload_conversion == FALSE) { crm_debug_3("%s: no need to continue", rsc->id); return; } key = stop_key(rsc); action_list = find_actions(rsc->actions, key, NULL); crm_free(key); if(action_list == NULL) { crm_debug_3("%s: no stop action", rsc->id); return; } stop = action_list->data; action = start; if(action->pseudo || action->optional || action->node == NULL || action->runnable == FALSE) { crm_debug_3("Skipping: %s", action->task); return; } action = stop; if(action->pseudo || action->optional || action->node == NULL || action->runnable == FALSE) { crm_debug_3("Skipping: %s", action->task); return; } slist_iter( other_w, action_wrapper_t, start->actions_before, lpc, other = other_w->action; if(other->optional == FALSE && other->rsc != NULL && other->rsc != start->rsc) { crm_debug_2("Skipping: start depends"); return; } ); slist_iter( other_w, action_wrapper_t, stop->actions_after, lpc, other = other_w->action; if(other->optional == FALSE && other->rsc != NULL && other->rsc != stop->rsc) { crm_debug_2("Skipping: stop depends"); return; } ); if(rsc->can_migrate && stop->node->details != start->node->details) { crm_info("Migrating %s from %s to %s", rsc->id, stop->node->details->uname, start->node->details->uname); crm_free(stop->uuid); stop->task = CRMD_ACTION_MIGRATE; stop->uuid = generate_op_key(rsc->id, stop->task, 0); add_hash_param(stop->meta, "migrate_source", stop->node->details->uname); add_hash_param(stop->meta, "migrate_target", start->node->details->uname); crm_free(start->uuid); start->task = CRMD_ACTION_MIGRATED; start->uuid = generate_op_key(rsc->id, start->task, 0); add_hash_param(start->meta, "migrate_source_uuid", stop->node->details->id); add_hash_param(start->meta, "migrate_source", stop->node->details->uname); add_hash_param(start->meta, "migrate_target", start->node->details->uname); } else if(start->allow_reload_conversion && stop->node->details == start->node->details) { crm_info("Rewriting restart of %s on %s as a reload", rsc->id, start->node->details->uname); crm_free(start->uuid); start->task = "reload"; start->uuid = generate_op_key(rsc->id, start->task, 0); stop->pseudo = TRUE; /* easier than trying to delete it from the graph */ } else { crm_debug_3("%s nothing to do", rsc->id); } } diff --git a/tools/attrd.c b/tools/attrd.c index eca01dbfd8..d7be87617e 100644 --- a/tools/attrd.c +++ b/tools/attrd.c @@ -1,627 +1,632 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* #include */ #include /* #include */ #include #include #include #include #include #include #include #include #define OPTARGS "hV" GMainLoop* mainloop = NULL; const char *attrd_uname = NULL; char *attrd_uuid = NULL; ll_cluster_t *attrd_cluster_conn; gboolean need_shutdown = FALSE; GHashTable *attr_hash = NULL; cib_t *cib_conn = NULL; typedef struct attr_hash_entry_s { char *id; char *set; char *section; char *value; char *last_value; int timeout; char *dampen; guint timer_id; } attr_hash_entry_t; static void free_hash_entry(gpointer data) { attr_hash_entry_t *entry = data; if (entry == NULL) { return; } crm_free(entry->id); crm_free(entry->set); crm_free(entry->dampen); crm_free(entry->section); if(entry->value != entry->last_value) { crm_free(entry->value); crm_free(entry->last_value); } else { crm_free(entry->value); } crm_free(entry); } void attrd_ha_callback(HA_Message * msg, void* private_data); void attrd_local_callback(HA_Message * msg); gboolean attrd_timer_callback(void *user_data); static gboolean attrd_shutdown(int nsig, gpointer unused) { need_shutdown = TRUE; crm_info("Exiting"); if (mainloop != NULL && g_main_is_running(mainloop)) { g_main_quit(mainloop); } else { exit(0); } return FALSE; } static void usage(const char* cmd, int exit_status) { FILE* stream; stream = exit_status ? stderr : stdout; fprintf(stream, "usage: %s [-srkh] [-c configure file]\n", cmd); /* fprintf(stream, "\t-d\tsets debug level\n"); */ /* fprintf(stream, "\t-s\tgets daemon status\n"); */ /* fprintf(stream, "\t-r\trestarts daemon\n"); */ /* fprintf(stream, "\t-k\tstops daemon\n"); */ /* fprintf(stream, "\t-h\thelp message\n"); */ fflush(stream); exit(exit_status); } typedef struct attrd_client_s { char *id; char *name; IPC_Channel *channel; GCHSource *source; } attrd_client_t; static void stop_attrd_timer(attr_hash_entry_t *hash_entry) { if(hash_entry != NULL && hash_entry->timer_id != 0) { crm_debug_2("Stopping %s timer", hash_entry->id); Gmain_timeout_remove(hash_entry->timer_id); hash_entry->timer_id = 0; } } static gboolean attrd_ipc_callback(IPC_Channel *client, gpointer user_data) { int lpc = 0; HA_Message *msg = NULL; attrd_client_t *curr_client = (attrd_client_t*)user_data; gboolean stay_connected = TRUE; crm_debug_2("Invoked: %s", curr_client->id); while(IPC_ISRCONN(client)) { if(client->ops->is_message_pending(client) == 0) { break; } msg = msgfromIPC_noauth(client); if (msg == NULL) { crm_debug("%s: no message this time", curr_client->id); continue; } lpc++; crm_debug_2("Processing msg from %s", curr_client->id); crm_log_message_adv(LOG_DEBUG_3, __PRETTY_FUNCTION__, msg); attrd_local_callback(msg); crm_msg_del(msg); msg = NULL; if(client->ch_status != IPC_CONNECT) { break; } } crm_debug_2("Processed %d messages", lpc); if (client->ch_status != IPC_CONNECT) { stay_connected = FALSE; } return stay_connected; } static void attrd_connection_destroy(gpointer user_data) { attrd_client_t *client = user_data; /* cib_process_disconnect */ if(client == NULL) { return; } if(client->source != NULL) { crm_debug_4("Deleting %s (%p) from mainloop", client->name, client->source); G_main_del_IPC_Channel(client->source); client->source = NULL; } crm_debug_3("Destroying %s (%p)", client->name, client); crm_free(client->name); crm_free(client->id); crm_free(client); crm_debug_4("Freed the cib client"); return; } static gboolean attrd_connect(IPC_Channel *channel, gpointer user_data) { attrd_client_t *new_client = NULL; crm_debug_3("Connecting channel"); if(channel == NULL) { crm_err("Channel was NULL"); return FALSE; } else if(channel->ch_status != IPC_CONNECT) { crm_err("Channel was disconnected"); return FALSE; } else if(need_shutdown) { crm_info("Ignoring connection request during shutdown"); return FALSE; } crm_malloc0(new_client, sizeof(attrd_client_t)); new_client->channel = channel; crm_debug_3("Created channel %p for channel %s", new_client, new_client->id); /* channel->ops->set_recv_qlen(channel, 100); */ /* channel->ops->set_send_qlen(channel, 400); */ new_client->source = G_main_add_IPC_Channel( G_PRIORITY_DEFAULT, channel, FALSE, attrd_ipc_callback, new_client, attrd_connection_destroy); crm_debug_3("Client %s connected", new_client->id); return TRUE; } static gboolean attrd_ha_dispatch(IPC_Channel *channel, gpointer user_data) { gboolean stay_connected = TRUE; crm_debug_2("Invoked"); while(attrd_cluster_conn != NULL && IPC_ISRCONN(channel)) { if(attrd_cluster_conn->llc_ops->msgready(attrd_cluster_conn) == 0) { crm_debug_2("no message ready yet"); break; } /* invoke the callbacks but dont block */ attrd_cluster_conn->llc_ops->rcvmsg(attrd_cluster_conn, 0); } if (attrd_cluster_conn == NULL || channel->ch_status != IPC_CONNECT) { if(need_shutdown == FALSE) { crm_crit("Lost connection to heartbeat service."); } else { crm_info("Lost connection to heartbeat service."); } stay_connected = FALSE; } return stay_connected; } static void attrd_ha_connection_destroy(gpointer user_data) { crm_debug_3("Invoked"); if(need_shutdown) { /* we signed out, so this is expected */ crm_info("Heartbeat disconnection complete"); return; } crm_crit("Lost connection to heartbeat service!"); + if (mainloop != NULL && g_main_is_running(mainloop)) { + g_main_quit(mainloop); + return; + } + exit(LSB_EXIT_OK); } static gboolean register_with_ha(void) { const char *const_attrd_uuid = NULL; if(attrd_cluster_conn == NULL) { attrd_cluster_conn = ll_cluster_new("heartbeat"); } if(attrd_cluster_conn == NULL) { crm_err("Cannot create heartbeat object"); return FALSE; } crm_debug("Signing in with Heartbeat"); if (attrd_cluster_conn->llc_ops->signon(attrd_cluster_conn, T_ATTRD)!= HA_OK) { crm_err("Cannot sign on with heartbeat: %s", attrd_cluster_conn->llc_ops->errmsg(attrd_cluster_conn)); return FALSE; } crm_debug_3("Be informed of CRM messages"); if (HA_OK != attrd_cluster_conn->llc_ops->set_msg_callback( attrd_cluster_conn, T_ATTRD, attrd_ha_callback, attrd_cluster_conn)) { crm_err("Cannot set msg callback: %s", attrd_cluster_conn->llc_ops->errmsg(attrd_cluster_conn)); return FALSE; } crm_debug_3("Adding channel to mainloop"); G_main_add_IPC_Channel( G_PRIORITY_HIGH, attrd_cluster_conn->llc_ops->ipcchan( attrd_cluster_conn), FALSE, attrd_ha_dispatch, attrd_cluster_conn /* userdata */, attrd_ha_connection_destroy); crm_debug_3("Finding our node name"); attrd_uname = attrd_cluster_conn->llc_ops->get_mynodeid( attrd_cluster_conn); if (attrd_uname == NULL) { crm_err("get_mynodeid() failed"); return FALSE; } crm_info("Hostname: %s", attrd_uname); crm_debug_3("Finding our node uuid"); const_attrd_uuid = get_uuid(attrd_cluster_conn, attrd_uname); if(const_attrd_uuid == NULL) { crm_err("get_uuid_by_name() failed"); return FALSE; } /* copy it so that unget_uuid() doesn't trash the value on us */ attrd_uuid = crm_strdup(const_attrd_uuid); crm_info("UUID: %s", attrd_uuid); return TRUE; } int main(int argc, char ** argv) { int flag; int argerr = 0; gboolean was_err = FALSE; char *channel_name = crm_strdup(attrd_channel); crm_log_init(T_ATTRD); G_main_add_SignalHandler( G_PRIORITY_HIGH, SIGTERM, attrd_shutdown, NULL, NULL); while ((flag = getopt(argc, argv, OPTARGS)) != EOF) { switch(flag) { case 'V': cl_log_enable_stderr(1); alter_debug(DEBUG_INC); break; case 'h': /* Help message */ usage(T_ATTRD, LSB_EXIT_OK); break; default: ++argerr; break; } } if (optind > argc) { ++argerr; } if (argerr) { usage(T_ATTRD, LSB_EXIT_GENERIC); } if(register_with_ha() == FALSE) { crm_err("HA Signon failed"); was_err = TRUE; } if(was_err == FALSE) { int lpc = 0; int max_retry = 20; enum cib_errors rc = cib_not_connected; cib_conn = cib_new(); for(lpc = 0; lpc < max_retry && rc != cib_ok; lpc++) { crm_debug("CIB signon attempt %d", lpc); rc = cib_conn->cmds->signon( cib_conn, T_ATTRD, cib_command); sleep(5); } if(rc != cib_ok) { crm_err("Signon to CIB failed: %s", cib_error2string(rc)); was_err = TRUE; } } if(was_err == FALSE) { int rc = init_server_ipc_comms( channel_name, attrd_connect, default_ipc_connection_destroy); if(rc != 0) { crm_err("Could not start IPC server"); was_err = TRUE; } } if(was_err) { crm_err("Aborting startup"); return 100; } attr_hash = g_hash_table_new_full( g_str_hash, g_str_equal, NULL, free_hash_entry); crm_info("Starting mainloop..."); mainloop = g_main_new(FALSE); g_main_run(mainloop); crm_info("Exiting..."); attrd_cluster_conn->llc_ops->signoff(attrd_cluster_conn, TRUE); attrd_cluster_conn->llc_ops->delete(attrd_cluster_conn); cib_conn->cmds->signoff(cib_conn); cib_delete(cib_conn); g_hash_table_destroy(attr_hash); crm_free(channel_name); crm_free(attrd_uuid); empty_uuid_cache(); #ifdef HA_MALLOC_TRACK cl_malloc_dump_allocated(LOG_ERR, FALSE); #endif return 0; } static void attrd_cib_callback(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data) { char *attr = user_data; if(rc == cib_NOTEXISTS) { rc = cib_ok; } if(rc < cib_ok) { crm_err("Update %d for %s failed: %s", call_id, attr, cib_error2string(rc)); } else { crm_debug("Update %d for %s passed", call_id, attr); } crm_free(attr); } static attr_hash_entry_t * find_hash_entry(HA_Message * msg) { const char *value = NULL; const char *attr = ha_msg_value(msg, F_ATTRD_ATTRIBUTE); attr_hash_entry_t *hash_entry = g_hash_table_lookup(attr_hash, attr); if(hash_entry == NULL) { /* create one and add it */ crm_info("Creating hash entry for %s", attr); crm_malloc0(hash_entry, sizeof(attr_hash_entry_t)); hash_entry->id = crm_strdup(attr); g_hash_table_insert(attr_hash, hash_entry->id, hash_entry); hash_entry = g_hash_table_lookup(attr_hash, attr); CRM_CHECK(hash_entry != NULL, return NULL); } value = ha_msg_value(msg, F_ATTRD_SET); if(value != NULL) { crm_free(hash_entry->set); hash_entry->set = crm_strdup(value); crm_debug("\t%s->set: %s", attr, value); } value = ha_msg_value(msg, F_ATTRD_SECTION); if(value == NULL) { value = XML_CIB_TAG_STATUS; } crm_free(hash_entry->section); hash_entry->section = crm_strdup(value); crm_debug("\t%s->section: %s", attr, value); value = ha_msg_value(msg, F_ATTRD_DAMPEN); if(value != NULL) { crm_free(hash_entry->dampen); hash_entry->dampen = crm_strdup(value); hash_entry->timeout = crm_get_msec(value); crm_debug("\t%s->timeout: %s", attr, value); } return hash_entry; } void attrd_ha_callback(HA_Message * msg, void* private_data) { int rc = cib_ok; attr_hash_entry_t *hash_entry = NULL; const char *from = ha_msg_value(msg, F_ORIG); const char *op = ha_msg_value(msg, F_ATTRD_TASK); const char *attr = ha_msg_value(msg, F_ATTRD_ATTRIBUTE); crm_info("%s message from %s", op, from); hash_entry = find_hash_entry(msg); stop_attrd_timer(hash_entry); if(hash_entry->value == NULL) { /* delete the attr */ rc = delete_attr(cib_conn, cib_none, hash_entry->section, attrd_uuid, hash_entry->set, NULL, attr, NULL); crm_info("Sent delete %d: %s %s %s", rc, attr, hash_entry->set, hash_entry->section); } else { /* send update */ rc = update_attr(cib_conn, cib_none, hash_entry->section, attrd_uuid, hash_entry->set, NULL, hash_entry->id, hash_entry->value); crm_info("Sent update %d: %s=%s", rc, hash_entry->id,hash_entry->value); } add_cib_op_callback(rc, FALSE, crm_strdup(attr), attrd_cib_callback); return; } static void update_for_hash_entry(gpointer key, gpointer value, gpointer user_data) { attrd_timer_callback(value); } void attrd_local_callback(HA_Message * msg) { attr_hash_entry_t *hash_entry = NULL; const char *from = ha_msg_value(msg, F_ORIG); const char *op = ha_msg_value(msg, F_ATTRD_TASK); const char *attr = ha_msg_value(msg, F_ATTRD_ATTRIBUTE); const char *value = ha_msg_value(msg, F_ATTRD_VALUE); if(safe_str_eq(op, "refresh")) { crm_info("Sending full refresh"); g_hash_table_foreach(attr_hash, update_for_hash_entry, NULL); return; } crm_debug("%s message from %s: %s=%s", op, from, attr, value); hash_entry = find_hash_entry(msg); crm_free(hash_entry->last_value); hash_entry->last_value = hash_entry->value; value = ha_msg_value(msg, F_ATTRD_VALUE); if(value != NULL) { hash_entry->value = crm_strdup(value); } else { hash_entry->value = NULL; } if(safe_str_eq(hash_entry->value, hash_entry->last_value)) { crm_debug_2("Ignoring non-change"); return; } stop_attrd_timer(hash_entry); if(hash_entry->timeout > 0) { hash_entry->timer_id = Gmain_timeout_add( hash_entry->timeout, attrd_timer_callback, hash_entry); } else { attrd_timer_callback(hash_entry); } return; } gboolean attrd_timer_callback(void *user_data) { HA_Message *msg = NULL; attr_hash_entry_t *hash_entry = user_data; stop_attrd_timer(hash_entry); /* send HA message to everyone */ crm_info("Sending flush op to all hosts for: %s", hash_entry->id); msg = ha_msg_new(4); ha_msg_add(msg, F_TYPE, T_ATTRD); ha_msg_add(msg, F_ORIG, attrd_uname); ha_msg_add(msg, F_ATTRD_TASK, "flush"); ha_msg_add(msg, F_ATTRD_ATTRIBUTE, hash_entry->id); ha_msg_add(msg, F_ATTRD_SET, hash_entry->set); ha_msg_add(msg, F_ATTRD_SECTION, hash_entry->section); ha_msg_add(msg, F_ATTRD_DAMPEN, hash_entry->dampen); send_ha_message(attrd_cluster_conn, msg, NULL, FALSE); crm_msg_del(msg); return TRUE; }