diff --git a/crm/cib/cib.c b/crm/cib/cib.c index 474f4adc5f..d1e3cd8ec2 100644 --- a/crm/cib/cib.c +++ b/crm/cib/cib.c @@ -1,335 +1,335 @@ -/* $Id: cib.c,v 1.29 2004/04/12 15:34:50 andrew Exp $ */ +/* $Id: cib.c,v 1.30 2004/04/13 13:26:44 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include gboolean startCib(const char *filename) { xmlNodePtr cib = readCibXmlFile(filename); if (initializeCib(cib)) { cl_log(LOG_INFO, "CIB Initialization completed successfully"); } else { - free_xml(cib); +// free_xml(cib); cl_log(LOG_WARNING, "CIB Initialization failed, " "starting with an empty default."); activateCibXml(createEmptyCib(), filename); } return TRUE; } xmlNodePtr get_cib_copy() { return copy_xml_node_recursive(get_the_CIB()); } /* * The caller should never free the return value */ xmlNodePtr get_object_root(const char *object_type, xmlNodePtr the_root) { const char *node_stack[2]; xmlNodePtr tmp_node = NULL; FNIN(); node_stack[0] = XML_CIB_TAG_CONFIGURATION; node_stack[1] = object_type; if(object_type == NULL || strlen(object_type) == 0) { FNRET(the_root); /* get the whole cib */ } else if(strcmp(object_type, XML_CIB_TAG_STATUS) == 0) { node_stack[0] = XML_CIB_TAG_STATUS; node_stack[1] = NULL; /* these live in a different place */ } tmp_node = find_xml_node_nested(the_root, node_stack, 2); if (tmp_node == NULL) { cl_log(LOG_ERR, "[cib] Section cib[%s[%s]] not present", node_stack[0], node_stack[1]); } FNRET(tmp_node); } FILE *msg_cib_strm = NULL; xmlNodePtr process_cib_message(xmlNodePtr message, gboolean auto_reply) { enum cib_result result = CIBRES_OK; xmlNodePtr fragment = find_xml_node(message, XML_TAG_FRAGMENT); xmlNodePtr options = find_xml_node(message, XML_TAG_OPTIONS); const char *op = get_xml_attr (message, XML_TAG_OPTIONS, XML_ATTR_OP, TRUE); #ifdef MSG_LOG if(msg_cib_strm == NULL) { msg_cib_strm = fopen("/tmp/cib.log", "w"); } fprintf(msg_cib_strm, "[Input ]\t%s\n", dump_xml_node(message, FALSE)); fflush(msg_cib_strm); #endif xmlNodePtr data = cib_process_request(op, options, fragment, &result); CRM_DEBUG2("[cib] operation returned result %d", result); if(auto_reply) { xmlNodePtr reply = create_reply(message, data); free_xml(data); #ifdef MSG_LOG fprintf(msg_cib_strm, "[Reply ]\t%s\n", dump_xml_node(reply, FALSE)); fflush(msg_cib_strm); #endif set_xml_attr(reply, XML_TAG_OPTIONS, XML_ATTR_RESULT, "ok", TRUE); // put real result in here return reply; } #ifdef MSG_LOG fprintf(msg_cib_strm, "[Output]\t%s\n", dump_xml_node(data, FALSE)); fflush(msg_cib_strm); #endif return data; } xmlNodePtr process_cib_request(const char *op, const xmlNodePtr options, const xmlNodePtr fragment) { enum cib_result result = CIBRES_OK; return cib_process_request(op, options, fragment, &result); } xmlNodePtr create_cib_fragment(xmlNodePtr update, const char *section) { gboolean whole_cib = FALSE; xmlNodePtr fragment = create_xml_node(NULL, XML_TAG_FRAGMENT); xmlNodePtr cib = NULL; char *auto_section = pluralSection(update->name); if(update == NULL) { cl_log(LOG_ERR, "No update to create a fragment for"); cl_free(auto_section); return NULL; } else if(section == NULL) { section = auto_section; } else if(strcmp(auto_section, section) != 0) { cl_log(LOG_ERR, "Values for update (tag=%s) and section (%s)" " were not consistent", update->name, section); cl_free(auto_section); return NULL; } if(strcmp(section, "all")==0 && strcmp(update->name, XML_TAG_CIB)==0) { whole_cib = TRUE; } set_xml_property_copy(fragment, XML_ATTR_SECTION, section); if(whole_cib == FALSE) { cib = createEmptyCib(); xmlNodePtr object_root = get_object_root(section, cib); - xmlAddChild(object_root, update); + add_node_copy(object_root, update); } else { - cib = update; + cib = copy_xml_node_recursive(update); } xmlAddChild(fragment, cib); CRM_DEBUG("Fragment created"); xml_message_debug(fragment, "created fragment"); cl_free(auto_section); return fragment; } char * pluralSection(const char *a_section) { char *a_section_parent = NULL; if (a_section == NULL) { a_section_parent = cl_strdup("all"); } else if(strcmp(a_section, XML_TAG_CIB) == 0) { a_section_parent = cl_strdup("all"); } else if(strcmp(a_section, XML_CIB_TAG_NODE) == 0) { a_section_parent = cl_strdup(XML_CIB_TAG_NODES); } else if(strcmp(a_section, XML_CIB_TAG_STATE) == 0) { a_section_parent = cl_strdup(XML_CIB_TAG_STATUS); } else if(strcmp(a_section, XML_CIB_TAG_CONSTRAINT) == 0) { a_section_parent = cl_strdup(XML_CIB_TAG_CONSTRAINTS); } else if(strcmp(a_section, XML_CIB_TAG_RESOURCE) == 0) { a_section_parent = cl_strdup(XML_CIB_TAG_RESOURCES); } else { cl_log(LOG_ERR, "Unknown section %s", a_section); a_section_parent = cl_strdup("all"); } CRM_DEBUG2("Plural is %s", a_section_parent); return a_section_parent; } const char * cib_error2string(enum cib_result return_code) { const char *error_msg = NULL; switch(return_code) { case CIBRES_MISSING_ID: error_msg = "The id field is missing"; break; case CIBRES_MISSING_TYPE: error_msg = "The type field is missing"; break; case CIBRES_MISSING_FIELD: error_msg = "A required field is missing"; break; case CIBRES_OBJTYPE_MISMATCH: error_msg = "CIBRES_OBJTYPE_MISMATCH"; break; case CIBRES_FAILED_EXISTS: error_msg = "The object already exists"; break; case CIBRES_FAILED_NOTEXISTS: error_msg = "The object does not exist"; break; case CIBRES_CORRUPT: error_msg = "The CIB is corrupt"; break; case CIBRES_FAILED_NOOBJECT: error_msg = "The update was empty"; break; case CIBRES_FAILED_NOPARENT: error_msg = "The parent object does not exist"; break; case CIBRES_FAILED_NODECOPY: error_msg = "Failed while copying update"; break; case CIBRES_OTHER: error_msg = "CIBRES_OTHER"; break; case CIBRES_OK: error_msg = "ok"; break; case CIBRES_FAILED: error_msg = "Failed"; break; case CIBRES_FAILED_STALE: error_msg = "Discarded old update"; break; case CIBRES_FAILED_ACTIVATION: error_msg = "Activation Failed"; break; case CIBRES_FAILED_NOSECTION: error_msg = "Required section was missing"; break; case CIBRES_FAILED_NOTSUPPORTED: error_msg = "Supplied information is not supported"; break; } if(error_msg == NULL) { cl_log(LOG_ERR, "Unknown CIB Error %d", return_code); error_msg = ""; } return error_msg; } const char * cib_op2string(enum cib_op operation) { const char *operation_msg = NULL; switch(operation) { case 0: operation_msg = "none"; break; case 1: operation_msg = "add"; break; case 2: operation_msg = "modify"; break; case 3: operation_msg = "delete"; break; case CIB_OP_MAX: operation_msg = "invalid operation"; break; } if(operation_msg == NULL) { cl_log(LOG_ERR, "Unknown CIB operation %d", operation); operation_msg = ""; } return operation_msg; } diff --git a/crm/cib/cibio.c b/crm/cib/cibio.c index 17b591b38b..dd03b5f87e 100644 --- a/crm/cib/cibio.c +++ b/crm/cib/cibio.c @@ -1,429 +1,431 @@ -/* $Id: cibio.c,v 1.18 2004/03/29 15:37:55 andrew Exp $ */ +/* $Id: cibio.c,v 1.19 2004/04/13 13:26:44 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include // for getNow() #include #include #include const char * local_resource_path[] = { XML_CIB_TAG_STATUS, }; const char * resource_path[] = { XML_CIB_TAG_RESOURCES, }; const char * node_path[] = { XML_CIB_TAG_NODES, }; const char * constraint_path[] = { XML_CIB_TAG_CONSTRAINTS, }; gboolean initialized = FALSE; xmlNodePtr the_cib = NULL; xmlNodePtr node_search = NULL; xmlNodePtr resource_search = NULL; xmlNodePtr constraint_search = NULL; xmlNodePtr status_search = NULL; /* * It is the callers responsibility to free both the new CIB (output) * and the new CIB (input) */ xmlNodePtr createEmptyCib(void) { xmlNodePtr cib_root = NULL, config = NULL, status = NULL; cib_root = create_xml_node(NULL, XML_TAG_CIB); config = create_xml_node(cib_root, XML_CIB_TAG_CONFIGURATION); status = create_xml_node(cib_root, XML_CIB_TAG_STATUS); set_node_tstamp(cib_root); set_node_tstamp(config); set_node_tstamp(status); set_xml_property_copy(cib_root, "version", "1"); set_xml_property_copy(cib_root, "generated", "true"); create_xml_node(config, XML_CIB_TAG_NODES); create_xml_node(config, XML_CIB_TAG_RESOURCES); create_xml_node(config, XML_CIB_TAG_CONSTRAINTS); if (verifyCibXml(cib_root)) { FNRET(cib_root); } cl_log(LOG_CRIT, "The generated CIB did not pass integrity testing!!" " All hope is lost."); FNRET(NULL); } gboolean verifyCibXml(xmlNodePtr cib) { gboolean is_valid = TRUE; xmlNodePtr tmp_node = NULL; FNIN(); if (cib == NULL) { - cl_log(LOG_INFO, "XML Buffer was empty."); + cl_log(LOG_ERR, "XML Buffer was empty."); FNRET(FALSE); } tmp_node = get_object_root(XML_CIB_TAG_NODES, cib); if (tmp_node == NULL) is_valid = FALSE; tmp_node = get_object_root(XML_CIB_TAG_RESOURCES, cib); if (tmp_node == NULL) is_valid = FALSE; tmp_node = get_object_root(XML_CIB_TAG_CONSTRAINTS, cib); if (tmp_node == NULL) is_valid = FALSE; tmp_node = get_object_root(XML_CIB_TAG_STATUS, cib); if (tmp_node == NULL) is_valid = FALSE; // more integrity tests FNRET(is_valid); } /* * It is the callers responsibility to free the output of this function */ xmlNodePtr readCibXml(char *buffer) { xmlNodePtr root = string2xml(buffer); if (verifyCibXml(root) == FALSE) { free_xml(root); FNRET(createEmptyCib()); } FNRET(root); } /* * It is the callers responsibility to free the output of this function */ xmlNodePtr readCibXmlFile(const char *filename) { int s_res = -1; struct stat buf; xmlNodePtr root = NULL; FNIN(); if(filename != NULL) { s_res = stat(filename, &buf); } if (s_res == 0) { FILE *cib_file = fopen(filename, "r"); root = file2xml(cib_file); set_xml_property_copy(root, "generated", "false"); fclose(cib_file); } else { cl_log(LOG_WARNING, "Stat of (%s) failed, file does not exist.", CIB_FILENAME); } if (verifyCibXml(root) == FALSE) { free_xml(root); // FNRET(createEmptyCib()); root = NULL; } FNRET(root); } /* * The caller should never free the return value */ xmlNodePtr get_the_CIB(void) { FNIN(); FNRET(the_cib); } gboolean uninitializeCib(void) { xmlNodePtr tmp_cib = the_cib; FNIN(); if(tmp_cib == NULL) { cl_log(LOG_ERR, "The CIB has already been deallocated."); FNRET(FALSE); } initialized = FALSE; the_cib = NULL; node_search = NULL; resource_search = NULL; constraint_search = NULL; status_search = NULL; cl_log(LOG_WARNING, "Deallocating the CIB."); free_xml(tmp_cib); cl_log(LOG_WARNING, "The CIB has been deallocated."); FNRET(TRUE); } /* * This method will not free the old CIB pointer or the new one. * We rely on the caller to have saved a pointer to the old CIB * and to free the old/bad one depending on what is appropriate. */ gboolean initializeCib(xmlNodePtr new_cib) { if (verifyCibXml(new_cib)) { initialized = FALSE; the_cib = new_cib; // update search paths /* not used yet... node_search = get_object_root(XML_CIB_TAG_NODES, new_cib); resource_search = get_object_root(XML_CIB_TAG_RESOURCES, new_cib); constraint_search = get_object_root(XML_CIB_TAG_CONSTRAINTS, new_cib); status_search = get_object_root(XML_CIB_TAG_STATUS, new_cib); */ initialized = TRUE; CRM_DEBUG("CIB initialized"); FNRET(TRUE); } - else - CRM_DEBUG("CIB Verification failed"); + else { + cl_log(LOG_ERR, "CIB Verification failed"); + } + FNRET(FALSE); } int moveFile(const char *oldname, const char *newname, gboolean backup, char *ext) { /* move 'oldname' to 'newname' by creating a hard link to it * and then removing the original hard link */ int res = 0; struct stat tmp; int s_res = stat(newname, &tmp); FNIN(); cl_log(LOG_INFO, "Stat of %s (code: %d).", newname, s_res); if (s_res >= 0) { if (backup == TRUE) { char backname[1024]; static const char *back_ext = "bak"; if (ext != NULL) back_ext = (char*)ext; snprintf(backname, sizeof(backname)-1, "%s.%s", newname, back_ext); moveFile(newname, backname, FALSE, NULL); } else { res = unlink(newname); if (res < 0) { perror("Could not remove the current backup of Cib"); FNRET(-1); } } } s_res = stat(oldname, &tmp); cl_log(LOG_INFO, "Stat of %s (code: %d).", oldname, s_res); if (s_res >= 0) { res = link(oldname, newname); if (res < 0) { perror("Could not create backup of current Cib"); FNRET(-2); } res = unlink(oldname); if (res < 0) { perror("Could not unlink the current Cib"); FNRET(-3); } } FNRET(0); } int activateCibBuffer(char *buffer, const char *filename) { int result = -1; xmlNodePtr local_cib = NULL; FNIN(); local_cib = readCibXml(buffer); result = activateCibXml(local_cib, filename); FNRET(result); } /* * This method will free the old CIB pointer on success and the new one * on failure. */ int activateCibXml(xmlNodePtr new_cib, const char *filename) { int error_code = 0; xmlNodePtr saved_cib = get_the_CIB(); const char *filename_bak = CIB_BACKUP; // calculate FNIN(); if (initializeCib(new_cib) == TRUE) { int res = moveFile(filename, filename_bak, FALSE, NULL); if (res < 0) { cl_log(LOG_INFO, "Could not make backup of the current Cib " "(code: %d)... aborting update.", res); error_code = -1; } else { cl_log(LOG_INFO, "Writing CIB out to %s", CIB_FILENAME); if (new_cib->doc == NULL) { cl_log(LOG_INFO, "Writing of a node tree with a NULL " "document will fail, creating a new " "back link."); xmlDocPtr foo = xmlNewDoc("1.0"); xmlDocSetRootElement(foo, new_cib); xmlSetTreeDoc(new_cib,foo); } /* save it. * set arg 3 to 0 to disable line breaks,1 to enable * res == num bytes saved */ res = xmlSaveFormatFile(filename, new_cib->doc, 1); /* for some reason, reading back after saving with * line-breaks doesnt go real well */ cl_log(LOG_INFO, "Saved %d bytes to the Cib as XML", res); if (res < 0) { // assume 0 is good if (moveFile(filename_bak, filename, FALSE, NULL) < -1) { cl_log(LOG_CRIT, "Could not restore the " "backup of the current Cib " "(code: %d)... panic!", res); error_code = -2; // should probably exit here } else if (initializeCib(saved_cib) == FALSE) { // oh we are so dead cl_log(LOG_CRIT, "Could not re-initialize " "with the old CIB. " "Everything is about to go " "pear shaped"); error_code = -3; } else { cl_log(LOG_CRIT, "Update of Cib failed " "(code: %d)... reverted to " "last known valid version", res); error_code = -4; } } } } else { cl_log(LOG_INFO, "Ignoring invalid or NULL Cib"); error_code = -5; } // Make sure memory is cleaned up appropriately - if (error_code < 0) { + if (error_code != 0) { // CRM_DEBUG2("Freeing new CIB %p", new_cib); free_xml(new_cib); } else { // CRM_DEBUG2("Freeing saved CIB %p", saved_cib); free_xml(saved_cib); } - + FNRET(error_code); } diff --git a/crm/cib/cibmessages.c b/crm/cib/cibmessages.c index 1fcbc1df0b..0257db685d 100644 --- a/crm/cib/cibmessages.c +++ b/crm/cib/cibmessages.c @@ -1,457 +1,459 @@ -/* $Id: cibmessages.c,v 1.29 2004/04/12 15:34:50 andrew Exp $ */ +/* $Id: cibmessages.c,v 1.30 2004/04/13 13:26:44 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include enum cib_result updateList(xmlNodePtr local_cib, xmlNodePtr update_command, xmlNodePtr failed, int operation, const char *section); xmlNodePtr createCibFragmentAnswer(const char *section, xmlNodePtr failed); gboolean replace_section(const char *section, xmlNodePtr tmpCib, xmlNodePtr command); gboolean check_generation(xmlNodePtr newCib, xmlNodePtr oldCib); gboolean update_results(xmlNodePtr failed, xmlNodePtr target, int operation, int return_code); xmlNodePtr cib_process_request(const char *op, const xmlNodePtr options, const xmlNodePtr fragment, enum cib_result *result) { const char *verbose = NULL; const char *section = NULL; const char *output_section = NULL; xmlNodePtr failed = NULL; xmlNodePtr cib_answer = NULL; gboolean update_the_cib = FALSE; int cib_update_op = CIB_OP_NONE; FNIN(); *result = CIBRES_OK; verbose = xmlGetProp(options, XML_ATTR_VERBOSE); section = xmlGetProp(options, XML_ATTR_FILTER_TYPE); failed = create_xml_node(NULL, XML_TAG_FAILED); cl_log(LOG_DEBUG, "[cib] Processing \"%s\" event", op); if(op == NULL) { *result = CIBRES_FAILED; cl_log(LOG_ERR, "No operation specified\n"); } else if(strcmp("noop", op) == 0) { ; } else if(strcmp("quit", op) == 0) { cl_log(LOG_WARNING, "The CRMd has asked us to exit... complying"); exit(0); } else if (strcmp(CRM_OPERATION_PING, op) == 0) { cib_answer = createPingAnswerFragment(CRM_SYSTEM_CIB, "ok"); } else if (strcmp(CRM_OPERATION_BUMP, op) == 0) { - xmlNodePtr tmpCib = copy_xml_node_recursive(get_the_CIB()); + xmlNodePtr tmpCib = get_cib_copy(); CRM_DEBUG3("Handling a %s for section=%s of the cib", CRM_OPERATION_BUMP, section); // modify the timestamp set_node_tstamp(tmpCib); char *new_value = NULL; char *old_value = xmlGetProp(get_the_CIB(), XML_ATTR_GENERATION); int int_value = -1; if(old_value != NULL) { new_value = (char*)cl_malloc(128*(sizeof(char))); int_value = atoi(old_value); sprintf(new_value, "%d", ++int_value); } else { new_value = cl_strdup("0"); } cl_log(LOG_DEBUG, "Generation %d(%s)->%s", int_value, old_value, new_value); set_xml_property_copy(tmpCib, XML_ATTR_GENERATION, new_value); cl_free(new_value); if(activateCibXml(tmpCib, CIB_FILENAME) >= 0) { verbose = "true"; } else { *result = CIBRES_FAILED; } } else if (strcmp("query", op) == 0) { CRM_DEBUG2("Handling a query for section=%s of the cib", section); /* force a pick-up of the relevant section before * returning */ verbose = "true"; } else if (strcmp(CRM_OPERATION_ERASE, op) == 0) { xmlNodePtr new_cib = createEmptyCib(); // Preserve generation counters etc copy_in_properties(new_cib, get_the_CIB()); if (activateCibXml(new_cib, CIB_FILENAME) < 0) { *result = CIBRES_FAILED; } } else if (strcmp(CRM_OPERATION_CREATE, op) == 0) { update_the_cib = TRUE; cib_update_op = CIB_OP_ADD; } else if (strcmp(CRM_OPERATION_UPDATE, op) == 0 || strcmp(CRM_OPERATION_WELCOME, op) == 0 || strcmp(CRM_OPERATION_SHUTDOWN_REQ, op) == 0) { update_the_cib = TRUE; cib_update_op = CIB_OP_MODIFY; } else if (strcmp(CRM_OPERATION_DELETE, op) == 0) { update_the_cib = TRUE; cib_update_op = CIB_OP_DELETE; } else if (strcmp(CRM_OPERATION_REPLACE, op) == 0) { CRM_DEBUG2("Replacing section=%s of the cib", section); xmlNodePtr tmpCib = NULL; section = xmlGetProp(fragment, XML_ATTR_SECTION); - + if (section == NULL || strlen(section) == 0 || strcmp("all", section) == 0) { - tmpCib = find_xml_node(fragment, XML_TAG_CIB); + tmpCib = copy_xml_node_recursive( + find_xml_node(fragment, XML_TAG_CIB)); } else { tmpCib = copy_xml_node_recursive(get_the_CIB()); replace_section(section, tmpCib, fragment); } /*if(check_generation(cib_updates, tmpCib) == FALSE) *result = "discarded old update"; else */ if (activateCibXml(tmpCib, CIB_FILENAME) < 0) *result = CIBRES_FAILED; } else { *result = CIBRES_FAILED_NOTSUPPORTED; cl_log(LOG_ERR, "Action [%s] is not supported by the CIB", op); } if (update_the_cib) { CRM_DEBUG("Backing up CIB"); xmlNodePtr tmpCib = copy_xml_node_recursive(get_the_CIB()); section = xmlGetProp(fragment, XML_ATTR_SECTION); CRM_DEBUG3("Updating section=%s of the cib (op=%s)", section, op); // should we be doing this? // do logging // make changes to a temp copy then activate if(section == NULL) { cl_log(LOG_ERR, "No section specified in %s", XML_ATTR_FILTER_TYPE); *result = CIBRES_FAILED_NOSECTION; } else if(strcmp("all", section) == 0 && cib_update_op == CIB_OP_DELETE) { // delete /* order is no longer important here */ updateList(tmpCib, fragment, failed, cib_update_op, XML_CIB_TAG_STATUS); updateList(tmpCib, fragment, failed, cib_update_op, XML_CIB_TAG_CONSTRAINTS); updateList(tmpCib, fragment, failed, cib_update_op, XML_CIB_TAG_RESOURCES); updateList(tmpCib, fragment, failed, cib_update_op, XML_CIB_TAG_NODES); } else if(strcmp("all", section) == 0) { /* order is no longer important here */ updateList(tmpCib, fragment, failed, cib_update_op, XML_CIB_TAG_NODES); updateList(tmpCib, fragment, failed, cib_update_op, XML_CIB_TAG_RESOURCES); updateList(tmpCib, fragment, failed, cib_update_op, XML_CIB_TAG_CONSTRAINTS); updateList(tmpCib, fragment, failed, cib_update_op, XML_CIB_TAG_STATUS); } else { *result = updateList(tmpCib, fragment, failed, cib_update_op, section); } CRM_DEBUG("Activating temporary CIB"); /* if(check_generation(cib_updates, tmpCib) == FALSE) */ /* status = "discarded old update"; */ /* else */ if (activateCibXml(tmpCib, CIB_FILENAME) < 0) { *result = CIBRES_FAILED_ACTIVATION; } else if (failed->children != NULL) { *result = CIBRES_FAILED; } CRM_DEBUG2("CIB update status: %d", *result); } output_section = section; if (failed->children != NULL || *result != CIBRES_OK) { cib_answer = createCibFragmentAnswer(NULL /*"all"*/, failed); } else if (verbose != NULL && strcmp("true", verbose) == 0) { cib_answer = createCibFragmentAnswer(output_section, failed); } free_xml(failed); + FNRET(cib_answer); } gboolean replace_section(const char *section, xmlNodePtr tmpCib, xmlNodePtr fragment) { xmlNodePtr parent = NULL, cib_updates = NULL, new_section = NULL, old_section = NULL; FNIN(); cib_updates = find_xml_node(fragment, XML_TAG_CIB); /* find the old and new versions of the section */ new_section = get_object_root(section, cib_updates); old_section = get_object_root(section, tmpCib); if(old_section == NULL) { cl_log(LOG_ERR, "The CIB is corrupt, cannot replace missing section %s", section); FNRET(FALSE); } else if(new_section == NULL) { cl_log(LOG_ERR, "The CIB is corrupt, cannot set section %s to nothing", section); FNRET(FALSE); } parent = old_section->parent; /* unlink and free the old one */ unlink_xml_node(old_section); free_xml(old_section); /* add the new copy */ add_node_copy(parent, new_section); FNRET(TRUE); } enum cib_result updateList(xmlNodePtr local_cib, xmlNodePtr update_fragment, xmlNodePtr failed, int operation, const char *section) { xmlNodePtr child = NULL; xmlNodePtr this_section = get_object_root(section, local_cib); xmlNodePtr cib_updates = find_xml_node(update_fragment, XML_TAG_CIB); xmlNodePtr xml_section = get_object_root(section, cib_updates); if (section == NULL || xml_section == NULL) { cl_log(LOG_ERR, "Section %s not found in message." " CIB update is corrupt, ignoring.", section); return CIBRES_FAILED_NOSECTION; } if(CIB_OP_NONE > operation > CIB_OP_MAX) { cl_log(LOG_ERR, "Invalid operation on section %s", section); return CIBRES_FAILED; } set_node_tstamp(this_section); child = xml_section->children; while(child != NULL) { if(operation == CIB_OP_DELETE) { update_results(failed, child, operation, delete_cib_object(this_section, child)); } else if(operation == CIB_OP_MODIFY) { update_results(failed, child, operation, update_cib_object(this_section, child, FALSE)); } else { update_results(failed, child, operation, add_cib_object(this_section, child)); } child = child->next; } if (failed->children != NULL) return CIBRES_FAILED; else return CIBRES_OK; } xmlNodePtr createCibFragmentAnswer(const char *section, xmlNodePtr failed) { xmlNodePtr fragment = create_xml_node(NULL, XML_TAG_FRAGMENT); FNIN(); set_xml_property_copy(fragment, XML_ATTR_SECTION, section); if (section == NULL || strlen(section) == 0 || strcmp("all", section) == 0) { add_node_copy(fragment, get_the_CIB()); } else { xmlNodePtr cib = create_xml_node(fragment, XML_TAG_CIB); add_node_copy(cib, get_object_root(section, get_the_CIB())); copy_in_properties(cib, get_the_CIB()); } if (failed != NULL && failed->children != NULL) { - xmlAddChild(fragment, copy_xml_node_recursive(failed)); + add_node_copy(fragment, failed); } FNRET(fragment); } gboolean check_generation(xmlNodePtr newCib, xmlNodePtr oldCib) { char *new_value = xmlGetProp(newCib, XML_ATTR_GENERATION); char *old_value = xmlGetProp(oldCib, XML_ATTR_GENERATION); int int_new_value = -1; int int_old_value = -1; if(old_value != NULL) int_old_value = atoi(old_value); if(new_value != NULL) int_new_value = atoi(new_value); if(int_new_value >= int_old_value) { return TRUE; } else { cl_log(LOG_ERR, "Generation from update (%d) is older than %d", int_new_value, int_old_value); } return FALSE; } gboolean update_results(xmlNodePtr failed, xmlNodePtr target, int operation, int return_code) { FNIN(); gboolean was_error = FALSE; if (return_code != CIBRES_OK) { const char *error_msg = cib_error2string(return_code); const char *operation_msg = cib_op2string(operation); xmlNodePtr xml_node = create_xml_node(failed, XML_FAIL_TAG_CIB); was_error = TRUE; set_xml_property_copy(xml_node, XML_FAILCIB_ATTR_ID, ID(target)); set_xml_property_copy(xml_node, XML_FAILCIB_ATTR_OBJTYPE, TYPE(target)); set_xml_property_copy(xml_node, XML_FAILCIB_ATTR_OP, operation_msg); set_xml_property_copy(xml_node, XML_FAILCIB_ATTR_REASON, error_msg); cl_log(LOG_DEBUG, "Action %s failed: %s (cde=%d)", operation_msg, error_msg, return_code); } FNRET(was_error); } diff --git a/crm/common/crmutils.c b/crm/common/crmutils.c index 6392c3fae8..3eb4174b02 100644 --- a/crm/common/crmutils.c +++ b/crm/common/crmutils.c @@ -1,286 +1,284 @@ -/* $Id: crmutils.c,v 1.10 2004/04/02 12:08:36 andrew Exp $ */ +/* $Id: crmutils.c,v 1.11 2004/04/13 13:26:44 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int wdt_interval_ms = 10000; gboolean tickle_apphb_template(gpointer data) { char app_instance[APPNAME_LEN]; int rc = 0; sprintf(app_instance, "%s_%ld", "our_system_name", (long)getpid()); rc = apphb_hb(); if (rc < 0) { cl_perror("%s apphb_hb failure", app_instance); exit(3); } return TRUE; } void register_pid(const char *pid_file, gboolean do_fork, void (*shutdown)(int nsig)) { int j; long pid; FILE * lockfd; if (do_fork) { pid = fork(); if (pid < 0) { cl_log(LOG_CRIT, "cannot start daemon"); exit(LSB_EXIT_GENERIC); }else if (pid > 0) { exit(LSB_EXIT_OK); } } lockfd = fopen(pid_file, "w"); if (lockfd == NULL) { cl_log(LOG_CRIT, "cannot create pid file: %s", pid_file); exit(LSB_EXIT_GENERIC); }else{ pid = getpid(); fprintf(lockfd, "%ld\n", pid); fclose(lockfd); } umask(022); for (j=0; j < 3; ++j) { close(j); (void)open("/dev/null", j == 0 ? O_RDONLY : O_RDONLY); } // CL_IGNORE_SIG(SIGINT); // CL_IGNORE_SIG(SIGHUP); CL_SIGNAL(SIGTERM, shutdown); } long get_running_pid(const char *pid_file, gboolean* anypidfile) { long pid; FILE * lockfd; lockfd = fopen(pid_file, "r"); if (anypidfile) { *anypidfile = (lockfd != NULL); } if (lockfd != NULL && fscanf(lockfd, "%ld", &pid) == 1 && pid > 0) { if (CL_PID_EXISTS((pid_t)pid)) { fclose(lockfd); return(pid); } } if (lockfd != NULL) { fclose(lockfd); } return(-1L); } int init_stop(const char *pid_file) { FNIN(); if (pid_file == NULL) { cl_log(LOG_ERR, "No pid file specified to kill process"); return LSB_EXIT_GENERIC; } long pid; int rc = LSB_EXIT_OK; pid = get_running_pid(pid_file, NULL); if (pid > 0) { if (CL_KILL((pid_t)pid, SIGTERM) < 0) { rc = (errno == EPERM ? LSB_EXIT_EPERM : LSB_EXIT_GENERIC); fprintf(stderr, "Cannot kill pid %ld\n", pid); }else{ cl_log(LOG_INFO, "Signal sent to pid=%ld," " waiting for process to exit", pid); while (CL_PID_EXISTS(pid)) { sleep(1); } } } FNRET(rc); } int init_status(const char *pid_file, const char *client_name) { gboolean anypidfile; long pid = get_running_pid(pid_file, &anypidfile); if (pid > 0) { fprintf(stderr, "%s is running [pid: %ld]\n" , client_name, pid); return LSB_STATUS_OK; } if (anypidfile) { fprintf(stderr, "%s is stopped [pidfile exists]\n" , client_name); return LSB_STATUS_VAR_PID; } fprintf(stderr, "%s is stopped.\n", client_name); return LSB_STATUS_STOPPED; } gboolean register_with_ha(ll_cluster_t *hb_cluster, const char *client_name, gboolean (*dispatch_method)(int fd, gpointer user_data), void (*message_callback)(const struct ha_msg* msg, void* private_data), GDestroyNotify cleanup_method) { - cl_log(LOG_DEBUG, "Register with HA"); - cl_log(LOG_INFO, "Signing in with Heartbeat"); if (hb_cluster->llc_ops->signon(hb_cluster, client_name)!= HA_OK) { cl_log(LOG_ERR, "Cannot sign on with heartbeat"); cl_log(LOG_ERR, "REASON: %s", hb_cluster->llc_ops->errmsg(hb_cluster)); return FALSE; } const char* ournode = NULL; - cl_log(LOG_INFO, "Finding our node name"); + cl_log(LOG_DEBUG, "Finding our node name"); if ((ournode = hb_cluster->llc_ops->get_mynodeid(hb_cluster)) == NULL) { cl_log(LOG_ERR, "get_mynodeid() failed"); return FALSE; } cl_log(LOG_INFO, "Hostname: %s", ournode); /* cl_log(LOG_INFO, "Be informed of link status changes"); */ /* if (hb_cluster->llc_ops->set_ifstatus_callback(hb_cluster, LinkStatus, NULL) */ /* !=HA_OK){ */ /* cl_log(LOG_ERR, "Cannot set if status callback"); */ /* cl_log(LOG_ERR, "REASON: %s", hb_cluster->llc_ops->errmsg(hb_cluster)); */ /* return FALSE; */ /* } */ - cl_log(LOG_INFO, "Be informed of CRM messages"); + cl_log(LOG_DEBUG, "Be informed of CRM messages"); if (hb_cluster->llc_ops->set_msg_callback(hb_cluster, "CRM", message_callback, hb_cluster) !=HA_OK){ cl_log(LOG_ERR, "Cannot set CRM message callback"); cl_log(LOG_ERR, "REASON: %s", hb_cluster->llc_ops->errmsg(hb_cluster)); return FALSE; } G_main_add_fd(G_PRIORITY_HIGH, hb_cluster->llc_ops->inputfd(hb_cluster), FALSE, dispatch_method, hb_cluster, // usrdata cleanup_method); /* it seems we need to poke the message receiving stuff in order for it to * start seeing messages. Its like it gets blocked or something. */ dispatch_method(0, hb_cluster); return TRUE; } void register_with_apphb(const char *client_name, gboolean(*tickle_fn)(gpointer data)) { // Register with apphb cl_log(LOG_INFO, "Signing in with AppHb"); char app_instance[APPNAME_LEN]; int hb_intvl_ms = wdt_interval_ms * 2; int rc = 0; sprintf(app_instance, "%s_%ld", client_name, (long)getpid()); cl_log(LOG_INFO, "Client %s registering with apphb", app_instance); rc = apphb_register(client_name, app_instance); if (rc < 0) { cl_perror("%s registration failure", app_instance); exit(1); } cl_log(LOG_DEBUG, "Client %s registered with apphb", app_instance); cl_log(LOG_INFO, "Client %s setting %d ms apphb heartbeat interval" , app_instance, hb_intvl_ms); rc = apphb_setinterval(hb_intvl_ms); if (rc < 0) { cl_perror("%s setinterval failure", app_instance); exit(2); } // regularly tell apphb that we are alive cl_log(LOG_INFO, "Setting up AppHb Heartbeat"); Gmain_timeout_add(wdt_interval_ms, tickle_fn, NULL); } char * crm_itoa(int an_int) { int len = 32; char *buffer = cl_malloc(sizeof(char)*(len+1)); snprintf(buffer, len, "%d", an_int); return buffer; } diff --git a/crm/common/xmlutils.c b/crm/common/xmlutils.c index 386a949a2c..1400582293 100644 --- a/crm/common/xmlutils.c +++ b/crm/common/xmlutils.c @@ -1,807 +1,814 @@ -/* $Id: xmlutils.c,v 1.24 2004/04/06 16:21:45 andrew Exp $ */ +/* $Id: xmlutils.c,v 1.25 2004/04/13 13:26:44 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include void dump_array(int log_level, const char *message, const char **array, int depth); xmlNodePtr find_xml_node_nested(xmlNodePtr root, const char **search_path, int len) { int j; FNIN(); if (root == NULL) { FNRET(NULL); } if(search_path == NULL) { CRM_DEBUG("Will never find NULL :)"); FNRET(NULL); } #ifdef XML_TRACE dump_array(LOG_DEBUG, "Looking for.", search_path, len); #endif xmlNodePtr child = root->children, lastMatch = NULL; for (j=0; j < len; ++j) { gboolean is_found = FALSE; if (search_path[j] == NULL) { len = j; /* a NULL also means stop searching */ break; } while(child != NULL) { const char * child_name = (const char*)child->name; #ifdef XML_TRACE CRM_DEBUG3("comparing (%s) with (%s).", search_path[j], child->name); #endif if (strcmp(child_name, search_path[j]) == 0) { lastMatch = child; child = lastMatch->children; #ifdef XML_TRACE CRM_DEBUG3("found node (%s) @line (%ld).", search_path[j], xmlGetLineNo(child)); #endif is_found = TRUE; break; } child = child->next; } if (is_found == FALSE) { #ifdef XML_TRACE CRM_DEBUG2( "No more siblings left... %s cannot be found.", search_path[j]); #endif break; } } if (j == len && lastMatch != NULL && strcmp(lastMatch->name, search_path[j-1]) == 0) { #ifdef XML_TRACE CRM_DEBUG2("returning node (%s).", xmlGetNodePath(lastMatch)); #endif FNRET(lastMatch); } dump_array(LOG_WARNING, "Could not find the full path to the node you specified.", search_path, len); cl_log(LOG_WARNING,"Closest point was node (%s).", xmlGetNodePath(lastMatch)); FNRET(NULL); } const char * get_xml_attr(xmlNodePtr parent, const char *node_name, const char *attr_name, gboolean error) { if(node_name == NULL) { // get it from the current node return get_xml_attr_nested(parent, NULL, 0, attr_name, error); } return get_xml_attr_nested(parent, &node_name, 1, attr_name, error); } const char * get_xml_attr_nested(xmlNodePtr parent, const char **node_path, int length, const char *attr_name, gboolean error) { const char *attr_value = NULL; xmlNodePtr attr_parent = NULL; if(parent == NULL) { cl_log(LOG_ERR, "Can not find attribute %s in NULL parent", attr_name); return NULL; } if(attr_name == NULL || strlen(attr_name) == 0) { cl_log(LOG_ERR, "Can not find attribute with no name in %s", xmlGetNodePath(parent)); return NULL; } if(length == 0) { attr_parent = parent; } else { attr_parent = find_xml_node_nested(parent, node_path, length); if(attr_parent == NULL && error) { cl_log(LOG_ERR, "No node at the path you specified."); return NULL; } } attr_value = xmlGetProp(attr_parent, attr_name); if((attr_value == NULL || strlen(attr_value) == 0) && error) { cl_log(LOG_ERR, "No value present for %s at %s", attr_name, xmlGetNodePath(attr_parent)); return NULL; } return attr_value; } xmlNodePtr set_xml_attr(xmlNodePtr parent, const char *node_name, const char *attr_name, const char *attr_value, gboolean create) { if(node_name == NULL) { // set it on the current node return set_xml_attr_nested(parent, NULL, 0, attr_name, attr_value, create); } return set_xml_attr_nested(parent, &node_name, 1, attr_name, attr_value, create); } xmlNodePtr set_xml_attr_nested(xmlNodePtr parent, const char **node_path, int length, const char *attr_name, const char *attr_value, gboolean create) { xmlAttrPtr result = NULL; xmlNodePtr attr_parent = NULL; xmlNodePtr create_parent = NULL; if(parent == NULL && create == FALSE) { cl_log(LOG_ERR, "Can not set attribute in NULL parent"); return NULL; } if(attr_name == NULL || strlen(attr_name) == 0) { cl_log(LOG_ERR, "Can not set attribute to %s with no name", attr_value); return NULL; } if(length == 0 && parent != NULL) { attr_parent = parent; } else if(length == 0 || node_path == NULL || *node_path == NULL || strlen(*node_path) == 0) { cl_log(LOG_ERR, "Can not create parent to set attribute %s=%s on", attr_name, attr_value); return NULL; } else { attr_parent = find_xml_node_nested(parent, node_path, length); } if(create && attr_parent == NULL) { int j = 0; attr_parent = parent; for (j=0; j < length; ++j) { if (node_path[j] == NULL) { break; } xmlNodePtr tmp = find_xml_node(attr_parent, node_path[j]); if(tmp == NULL) { attr_parent = create_xml_node(attr_parent, node_path[j]); if(j==0) { create_parent = attr_parent; } } else { attr_parent = tmp; } } } else if(attr_parent == NULL) { cl_log(LOG_ERR, "Can not find parent to set attribute on"); return NULL; } result = set_xml_property_copy(attr_parent, attr_name, attr_value); if(result == NULL) { cl_log(LOG_WARNING, "Could not set %s=%s at %s", attr_name, attr_value, xmlGetNodePath(attr_parent)); } if(create_parent != NULL) { return create_parent; } return parent; } xmlNodePtr find_xml_node(xmlNodePtr root, const char * search_path) { if(root == NULL) return NULL; return find_xml_node_nested(root, &search_path, 1); } xmlNodePtr find_entity(xmlNodePtr parent, const char *node_name, const char *id, gboolean siblings) { return find_entity_nested(parent, node_name, NULL, NULL, id, siblings); } xmlNodePtr find_entity_nested(xmlNodePtr parent, const char *node_name, const char *elem_filter_name, const char *elem_filter_value, const char *id, gboolean siblings) { xmlNodePtr child; FNIN(); #ifdef XML_TRACE cl_log(LOG_DEBUG, "Looking for %s elem with id=%s.", node_name, id); #endif while(parent != NULL) { #ifdef XML_TRACE CRM_DEBUG2("examining (%s).", xmlGetNodePath(parent)); #endif child = parent->children; while(child != NULL) { #ifdef XML_TRACE CRM_DEBUG2("looking for (%s) [name].", node_name); #endif if (node_name != NULL && strcmp(child->name, node_name) != 0) { #ifdef XML_TRACE CRM_DEBUG3( "skipping entity (%s=%s) [node_name].", xmlGetNodePath(child), child->name); #endif break; } else if (elem_filter_name != NULL && elem_filter_value != NULL) { const char* child_value = (const char*) xmlGetProp(child, elem_filter_name); #ifdef XML_TRACE cl_log(LOG_DEBUG, "comparing (%s) with (%s) [attr_value].", child_value, elem_filter_value); #endif if (strcmp(child_value, elem_filter_value)) { #ifdef XML_TRACE CRM_DEBUG2("skipping entity (%s) [attr_value].", xmlGetNodePath(child)); #endif break; } } #ifdef XML_TRACE cl_log(LOG_DEBUG, "looking for entity (%s) in %s.", id, xmlGetNodePath(child)); #endif while(child != NULL) { #ifdef XML_TRACE cl_log(LOG_DEBUG, "looking for entity (%s) in %s.", id, xmlGetNodePath(child)); #endif xmlChar *child_id = xmlGetProp(child, "id"); if (child_id == NULL) { cl_log(LOG_CRIT, "Entity (%s) has id=NULL..." "Cib not valid!", xmlGetNodePath(child)); } else if (strcmp(id, child_id) == 0) { #ifdef XML_TRACE CRM_DEBUG2("found entity (%s).", id); #endif FNRET(child); } child = child->next; } } if (siblings == TRUE) { #ifdef XML_TRACE CRM_DEBUG("Nothing yet... checking siblings"); #endif parent = parent->next; } else parent = NULL; } cl_log(LOG_INFO, "Couldnt find anything appropriate for %s elem with id=%s.", node_name, id); FNRET(NULL); } void copy_in_properties(xmlNodePtr target, xmlNodePtr src) { if(src == NULL) { cl_log(LOG_WARNING, "No node to copy properties from"); } else if (src->properties == NULL) { cl_log(LOG_INFO, "No properties to copy"); } else if (target == NULL) { cl_log(LOG_WARNING, "No node to copy properties into"); } else { #ifndef USE_BUGGY_LIBXML xmlAttrPtr prop_iter = NULL; FNIN(); prop_iter = src->properties; while(prop_iter != NULL) { const char *local_prop_name = prop_iter->name; const char *local_prop_value = xmlGetProp(src, local_prop_name); set_xml_property_copy(target, local_prop_name, local_prop_value); prop_iter = prop_iter->next; } #else xmlCopyPropList(target, src->properties); #endif } FNOUT(); } char * dump_xml(xmlNodePtr msg) { FNIN(); FNRET(dump_xml_node(msg, FALSE)); } void xml_message_debug(xmlNodePtr msg, const char *text) { FNIN(); + if(msg == NULL) { + CRM_DEBUG3("%s: %s", + text==NULL?"":text,""); + + FNOUT(); + } + char *msg_buffer = dump_xml_node(msg, FALSE); CRM_DEBUG3("%s: %s", text==NULL?"":text, msg_buffer==NULL?"":msg_buffer); cl_free(msg_buffer); FNOUT(); } char * dump_xml_node(xmlNodePtr msg, gboolean whole_doc) { int lpc = 0; int msg_size = -1; FNIN(); xmlChar *xml_message = NULL; if (msg == NULL) FNRET(NULL); xmlInitParser(); if (whole_doc) { if (msg->doc == NULL) { xmlDocPtr foo = xmlNewDoc("1.0"); xmlDocSetRootElement(foo, msg); xmlSetTreeDoc(msg,foo); } xmlDocDumpMemory(msg->doc, &xml_message, &msg_size); } else { #ifdef XML_TRACE CRM_DEBUG2("mem used by xml: %d", xmlMemUsed()); #endif xmlMemoryDump (); xmlBufferPtr xml_buffer = xmlBufferCreate(); msg_size = xmlNodeDump(xml_buffer, msg->doc, msg, 0, 0); xml_message = (xmlChar*)cl_strdup(xmlBufferContent(xml_buffer)); xmlBufferFree(xml_buffer); if (!xml_message) { cl_log(LOG_ERR, "memory allocation failed in dump_xml_node()"); } } // HA wont send messages with newlines in them. for(; xml_message != NULL && lpc < msg_size; lpc++) if (xml_message[lpc] == '\n') xml_message[lpc] = ' '; FNRET((char*)xml_message); } xmlNodePtr add_node_copy(xmlNodePtr new_parent, xmlNodePtr xml_node) { xmlNodePtr node_copy = NULL; FNIN(); if(xml_node != NULL && new_parent != NULL) { node_copy = copy_xml_node_recursive(xml_node); xmlAddChild(new_parent, node_copy); } else if(xml_node == NULL) { cl_log(LOG_ERR, "Could not add copy of NULL node"); } else { cl_log(LOG_ERR, "Could not add copy of node to NULL parent"); } FNRET(node_copy); } xmlAttrPtr set_xml_property_copy(xmlNodePtr node, const xmlChar *name, const xmlChar *value) { const char *parent_name = NULL; const char *local_name = NULL; const char *local_value = NULL; xmlAttrPtr ret_value = NULL; FNIN(); if(node != NULL) { parent_name = node->name; } #ifdef XML_TRACE CRM_DEBUG4("[%s] Setting %s to %s", parent_name, name, value); #endif if (name == NULL || strlen(name) <= 0) { ret_value = NULL; } else if(node == NULL) { ret_value = NULL; } else if (value == NULL || strlen(value) <= 0) { ret_value = NULL; xmlUnsetProp(node, local_name); } else { local_value = cl_strdup(value); local_name = cl_strdup(name); ret_value = xmlSetProp(node, local_name, local_value); } FNRET(ret_value); } xmlNodePtr create_xml_node(xmlNodePtr parent, const char *name) { const char *local_name = NULL; const char *parent_name = NULL; xmlNodePtr ret_value = NULL; FNIN(); - if (name == NULL) + if (name == NULL || strlen(name) < 1) { ret_value = NULL; - else { + } else { local_name = cl_strdup(name); if(parent == NULL) ret_value = xmlNewNode(NULL, local_name); else { parent_name = parent->name; ret_value = xmlNewChild(parent, NULL, local_name, NULL); } } #ifdef XML_TRACE CRM_DEBUG3("Created node [%s [%s]]", parent_name, local_name); #endif FNRET(ret_value); } void unlink_xml_node(xmlNodePtr node) { xmlUnlinkNode(node); /* this helps us with frees and really should be being done by * the library call */ node->doc = NULL; } void free_xml(xmlNodePtr a_node) { FNIN(); if (a_node == NULL) ; // nothing to do else if (a_node->doc != NULL) xmlFreeDoc(a_node->doc); else { /* make sure the node is unlinked first */ xmlUnlinkNode(a_node); #if 0 /* set a new doc, wont delete without one? */ xmlDocPtr foo = xmlNewDoc("1.0"); xmlDocSetRootElement(foo, a_node); xmlSetTreeDoc(a_node,foo); xmlFreeDoc(foo); #else xmlFreeNode(a_node); #endif } FNOUT(); } void set_node_tstamp(xmlNodePtr a_node) { char *since_epoch = (char*)cl_malloc(128*(sizeof(char))); FNIN(); sprintf(since_epoch, "%ld", (unsigned long)time(NULL)); set_xml_property_copy(a_node, XML_ATTR_TSTAMP, since_epoch); cl_free(since_epoch); } xmlNodePtr copy_xml_node_recursive(xmlNodePtr src_node) { #if XML_TRACE const char *local_name = NULL; xmlNodePtr local_node = NULL, node_iter = NULL, local_child = NULL; xmlAttrPtr prop_iter = NULL; FNIN(); if(src_node != NULL && src_node->name != NULL) { local_node = create_xml_node(NULL, src_node->name); prop_iter = src_node->properties; while(prop_iter != NULL) { const char *local_prop_name = prop_iter->name; const char *local_prop_value = xmlGetProp(src_node, local_prop_name); set_xml_property_copy(local_node, local_prop_name, local_prop_value); prop_iter = prop_iter->next; } node_iter = src_node->children; while(node_iter != NULL) { local_child = copy_xml_node_recursive(node_iter); if(local_child != NULL) { xmlAddChild(local_node, local_child); CRM_DEBUG3("Copied node [%s [%s]", local_name, local_child->name); } node_iter = node_iter->next; } CRM_DEBUG2("Returning [%s]", local_node->name); FNRET(local_node); } CRM_DEBUG("Returning null"); FNRET(NULL); #else return xmlCopyNode(src_node, 1); #endif } xmlNodePtr string2xml(const char *input) { char ch = 0; int lpc = 0, input_len = strlen(input); gboolean more = TRUE; gboolean inTag = FALSE; xmlBufferPtr xml_buffer = xmlBufferCreate(); for(lpc = 0; (lpc < input_len) && more; lpc++) { ch = input[lpc]; switch(ch) { case EOF: case 0: ch = 0; more = FALSE; xmlBufferAdd(xml_buffer, &ch, 1); break; case '>': case '<': inTag = TRUE; if(ch == '>') inTag = FALSE; xmlBufferAdd(xml_buffer, &ch, 1); break; case '\n': case '\t': case ' ': ch = ' '; if(inTag) { xmlBufferAdd(xml_buffer, &ch, 1); } break; default: xmlBufferAdd(xml_buffer, &ch, 1); break; } } xmlNodePtr xml_object = NULL; const char *the_xml = xmlBufferContent(xml_buffer); xmlDocPtr doc = xmlParseMemory(the_xml, strlen(the_xml)); if (doc == NULL) { cl_log(LOG_ERR, "Malformed XML [xml=%s]", the_xml); return NULL; } xml_object = xmlDocGetRootElement(doc); xml_message_debug(xml_object, "Created fragment"); return xml_object; } xmlNodePtr file2xml(FILE *input) { char ch = 0; gboolean more = TRUE; gboolean inTag = FALSE; xmlBufferPtr xml_buffer = xmlBufferCreate(); if(input == NULL) { cl_log(LOG_ERR, "File pointer was NULL"); return NULL; } while (more) { ch = fgetc(input); // cl_log(LOG_DEBUG, "Got [%c]", ch); switch(ch) { case EOF: case 0: ch = 0; more = FALSE; xmlBufferAdd(xml_buffer, &ch, 1); break; case '>': case '<': inTag = TRUE; if(ch == '>') inTag = FALSE; xmlBufferAdd(xml_buffer, &ch, 1); break; case '\n': case '\t': case ' ': ch = ' '; if(inTag) { xmlBufferAdd(xml_buffer, &ch, 1); } break; default: xmlBufferAdd(xml_buffer, &ch, 1); break; } } xmlNodePtr xml_object = NULL; const char *the_xml = xmlBufferContent(xml_buffer); xmlDocPtr doc = xmlParseMemory(the_xml, strlen(the_xml)); if (doc == NULL) { cl_log(LOG_ERR, "Malformed XML [xml=%s]", the_xml); return NULL; } xml_object = xmlDocGetRootElement(doc); xml_message_debug(xml_object, "Created fragment"); return xml_object; } void dump_array(int log_level, const char *message, const char **array, int depth) { int j; if(message != NULL) { cl_log(log_level, "%s", message); } cl_log(log_level, "Contents of the array:"); if(array == NULL || array[0] == NULL || depth == 0) { cl_log(log_level, "\t"); } for (j=0; j < depth && array[j] != NULL; j++) { if (array[j] == NULL) break; cl_log(log_level, "\t--> (%s).", array[j]); } } diff --git a/crm/crmd/election.c b/crm/crmd/election.c index 667cbe9c8e..126145e483 100644 --- a/crm/crmd/election.c +++ b/crm/crmd/election.c @@ -1,572 +1,568 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include GHashTable *joined_nodes = NULL; /* A_ELECTION_VOTE */ enum crmd_fsa_input do_election_vote(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, void *data) { enum crmd_fsa_input election_result = I_NULL; FNIN(); /* dont vote if we're in one of these states or wanting to shut down */ switch(cur_state) { case S_RECOVERY: case S_RECOVERY_DC: case S_STOPPING: case S_RELEASE_DC: case S_TERMINATE: FNRET(I_NULL); // log warning break; default: if(is_set(fsa_input_register, R_SHUTDOWN)) { FNRET(I_NULL); // log warning } break; } send_request(NULL, NULL, CRM_OPERATION_VOTE, NULL, CRM_SYSTEM_CRMD); FNRET(election_result); } gboolean timer_popped(gpointer data) { fsa_timer_t *timer = (fsa_timer_t *)data; cl_log(LOG_INFO, "#!!#!!# Timer %s just popped!", fsa_input2string(timer->fsa_input)); stopTimer(timer); // dont make it go off again s_crmd_fsa(C_TIMER_POPPED, timer->fsa_input, NULL); return TRUE; } gboolean do_dc_heartbeat(gpointer data) { fsa_timer_t *timer = (fsa_timer_t *)data; // cl_log(LOG_DEBUG, "#!!#!!# Heartbeat timer just popped!"); gboolean was_sent = send_request(NULL, NULL, CRM_OPERATION_HBEAT, NULL, CRM_SYSTEM_CRMD); if(was_sent == FALSE) { // this is bad stopTimer(timer); // dont make it go off again s_crmd_fsa(C_HEARTBEAT_FAILED, I_SHUTDOWN, NULL); } return TRUE; } /* A_ELECTION_COUNT */ enum crmd_fsa_input do_election_count_vote(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, void *data) { gboolean we_loose = FALSE; xmlNodePtr vote = (xmlNodePtr)data; unsigned int my_born = -1, your_born = -1; int lpc = 0, my_index = -1, your_index = -1; enum crmd_fsa_input election_result = I_NULL; const char *vote_from = xmlGetProp(vote, XML_ATTR_HOSTFROM); const char *lowest_uname = NULL; int lowest_bornon = 0; FNIN(); if(vote_from == NULL || strcmp(vote_from, fsa_our_uname) == 0) { // dont count our own vote FNRET(election_result); } if(fsa_membership_copy->members_size < 1) { // if even we are not in the cluster then we should not vote FNRET(I_FAIL); } lowest_uname = fsa_membership_copy->members[0].node_uname; lowest_bornon = fsa_membership_copy->members[0].node_born_on; for(; lpc < fsa_membership_copy->members_size; lpc++) { const char *node_uname = fsa_membership_copy->members[lpc].node_uname; int this_born_on = fsa_membership_copy->members[lpc].node_born_on; if(node_uname == NULL) { continue; } if(strcmp(vote_from, node_uname) == 0) { your_born = this_born_on; your_index = lpc; } else if (strcmp(fsa_our_uname, node_uname) == 0) { my_born = this_born_on; my_index = lpc; } if(lowest_bornon > this_born_on) { lowest_uname = node_uname; lowest_bornon = this_born_on; } else if(lowest_bornon == this_born_on && strcmp(lowest_uname, node_uname) > 0) { lowest_uname = node_uname; lowest_bornon = this_born_on; } } #if 0 cl_log(LOG_DEBUG, "%s (bornon=%d), our bornon (%d)", vote_from, your_born, my_born); cl_log(LOG_DEBUG, "%s %s %s", fsa_our_uname, strcmp(fsa_our_uname, vote_from) < 0?"<":">=", vote_from); #endif cl_log(LOG_DEBUG, "Election winner should be %s (born_on=%d)", lowest_uname, lowest_bornon); if(lowest_uname != NULL && strcmp(lowest_uname, fsa_our_uname) == 0){ cl_log(LOG_DEBUG, "Election win: lowest born_on and uname"); election_result = I_ELECTION_DC; } else if(your_born < my_born) { cl_log(LOG_DEBUG, "Election fail: born_on"); we_loose = TRUE; } else if(your_born == my_born && strcmp(fsa_our_uname, vote_from) > 0) { cl_log(LOG_DEBUG, "Election fail: uname"); we_loose = TRUE; } else { CRM_DEBUG("We might win... we should vote (possibly again)"); election_result = I_DC_TIMEOUT; } if(we_loose) { if(fsa_input_register & R_THE_DC) { cl_log(LOG_DEBUG, "Give up the DC"); election_result = I_RELEASE_DC; } else { cl_log(LOG_DEBUG, "We werent the DC anyway"); election_result = I_NOT_DC; } } if(we_loose || election_result == I_ELECTION_DC) { // cancel timer, its been decided stopTimer(election_timeout); } FNRET(election_result); } /* A_ELECT_TIMER_START, A_ELECTION_TIMEOUT */ // we won enum crmd_fsa_input do_election_timer_ctrl(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, void *data) { FNIN(); if(action & A_ELECT_TIMER_START) { CRM_DEBUG("Starting the election timer..."); startTimer(election_timeout); } else if(action & A_ELECT_TIMER_STOP || action & A_ELECTION_TIMEOUT) { CRM_DEBUG("Stopping the election timer..."); stopTimer(election_timeout); } else { cl_log(LOG_ERR, "unexpected action %s", fsa_action2string(action)); } if(action & A_ELECTION_TIMEOUT) { CRM_DEBUG("The election timer went off, we win!"); FNRET(I_ELECTION_DC); } FNRET(I_NULL); } /* A_DC_TIMER_STOP, A_DC_TIMER_START */ enum crmd_fsa_input do_dc_timer_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, void *data) { gboolean timer_op_ok = TRUE; FNIN(); if(action & A_DC_TIMER_STOP) { timer_op_ok = stopTimer(election_trigger); } /* dont start a timer that wasnt already running */ if(action & A_DC_TIMER_START && timer_op_ok) { startTimer(election_trigger); } FNRET(I_NULL); } /* A_DC_TAKEOVER */ enum crmd_fsa_input do_dc_takeover(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, void *data) { FNIN(); CRM_DEBUG("################## Taking over the DC ##################"); set_bit_inplace(&fsa_input_register, R_THE_DC); CRM_DEBUG2("Am I the DC? %s", AM_I_DC?"yes":"no"); set_bit_inplace(&fsa_input_register, R_JOIN_OK); set_bit_inplace(&fsa_input_register, R_INVOKE_PE); clear_bit_inplace(&fsa_input_register, R_CIB_DONE); clear_bit_inplace(&fsa_input_register, R_HAVE_CIB); startTimer(dc_heartbeat); FNRET(I_NULL); } /* A_DC_RELEASE */ enum crmd_fsa_input do_dc_release(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, void *data) { enum crmd_fsa_input result = I_NULL; FNIN(); CRM_DEBUG("################## Releasing the DC ##################"); stopTimer(dc_heartbeat); if(action & A_DC_RELEASE) { clear_bit_inplace(&fsa_input_register, R_THE_DC); /* get a new CIB from the new DC */ clear_bit_inplace(&fsa_input_register, R_HAVE_CIB); } else if (action & A_DC_RELEASED) { if(cur_state == S_STOPPING) { result = I_SHUTDOWN; // necessary? result = I_RELEASE_SUCCESS; } #if 0 else if( are there errors ) { // we cant stay up if not healthy // or perhaps I_ERROR and go to S_RECOVER? result = I_SHUTDOWN; } #endif else result = I_RELEASE_SUCCESS; } else { cl_log(LOG_ERR, "Warning, do_dc_release invoked for action %s", fsa_action2string(action)); } CRM_DEBUG2("Am I still the DC? %s", AM_I_DC?"yes":"no"); FNRET(result); } /* A_JOIN_WELCOME, A_JOIN_WELCOME_ALL */ enum crmd_fsa_input do_send_welcome(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, void *data) { int lpc = 0, size = 0, num_sent = 0; oc_node_t *members; gboolean was_sent = TRUE; FNIN(); if(action & A_JOIN_WELCOME && data == NULL) { cl_log(LOG_ERR, "Attempt to send welcome message " "without a message to reply to!"); FNRET(I_NULL); } else if(action & A_JOIN_WELCOME) { xmlNodePtr welcome = (xmlNodePtr)data; const char *join_to = xmlGetProp(welcome, XML_ATTR_HOSTFROM); if(join_to != NULL) { send_request(NULL, NULL, CRM_OPERATION_WELCOME, join_to, CRM_SYSTEM_CRMD); } FNRET(I_NULL); } // welcome everyone... /* Give everyone a chance to join before invoking the PolicyEngine */ stopTimer(integration_timer); startTimer(integration_timer); members = fsa_membership_copy->members; size = fsa_membership_copy->members_size; if(joined_nodes != NULL) { g_hash_table_destroy(joined_nodes); joined_nodes = g_hash_table_new(&g_str_hash, &g_str_equal); } for(; members != NULL && lpc < size; lpc++) { const char *new_node = members[lpc].node_uname; if(strcmp(fsa_our_uname, new_node) == 0) { // dont send one to ourselves continue; } CRM_DEBUG3("Sending welcome message to %s (%d)", new_node, was_sent); num_sent++; was_sent = was_sent && send_request(NULL, NULL, CRM_OPERATION_WELCOME, new_node, CRM_SYSTEM_CRMD); CRM_DEBUG3("Sent welcome message to %s (%d)", new_node, was_sent); } if(was_sent == FALSE) FNRET(I_FAIL); /* No point hanging around in S_INTEGRATION if we're the only ones here! */ if(num_sent == 0) { // that was the last outstanding join ack) cl_log(LOG_INFO,"That was the last outstanding join ack"); FNRET(I_SUCCESS); } else { cl_log(LOG_DEBUG, "Still waiting on %d outstanding join acks", num_sent); //dont waste time by invoking the pe yet; } FNRET(I_NULL); } /* A_JOIN_ACK */ enum crmd_fsa_input do_ack_welcome(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, void *data) { xmlNodePtr welcome = (xmlNodePtr)data; FNIN(); #if 0 if(we are sick) { log error ; FNRET(I_NULL); } #endif xmlNodePtr cib_copy = get_cib_copy(); xmlNodePtr tmp1 = get_object_root(XML_CIB_TAG_STATUS, cib_copy); - - xmlUnlinkNode(tmp1); /* so that it can be deleted as part - * of the fragment - */ - tmp1 = create_cib_fragment(tmp1, XML_CIB_TAG_STATUS); + xmlNodePtr tmp2 = create_cib_fragment(tmp1, XML_CIB_TAG_STATUS); - send_ha_reply(fsa_cluster_conn, welcome, tmp1); + send_ha_reply(fsa_cluster_conn, welcome, tmp2); - free_xml(tmp1); + free_xml(tmp2); free_xml(cib_copy); FNRET(I_NULL); } /* A_ANNOUNCE */ enum crmd_fsa_input do_announce(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, void *data) { FNIN(); /* Once we hear from the DC, we can stop the timer * * This timer was started either on startup or when a node * left the CCM list */ /* dont announce if we're in one of these states */ switch(cur_state) { case S_RECOVERY: case S_RECOVERY_DC: case S_RELEASE_DC: case S_TERMINATE: FNRET(I_NULL); // log warning break; default: break; } if(AM_I_OPERATIONAL) { send_request(NULL, NULL, CRM_OPERATION_ANNOUNCE, NULL, CRM_SYSTEM_DC); } else { /* Delay announce until we have finished local startup */ FNRET(I_NULL); } FNRET(I_NULL); } /* A_JOIN_PROCESS_ACK */ enum crmd_fsa_input do_process_welcome_ack(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, void *data) { int lpc = 0, size = 0; oc_node_t *members; gboolean is_a_member = FALSE; xmlNodePtr join_ack = (xmlNodePtr)data; const char *join_from = xmlGetProp(join_ack, XML_ATTR_HOSTFROM); FNIN(); FNIN(); members = fsa_membership_copy->members; size = fsa_membership_copy->members_size; for(; lpc < size; lpc++) { const char *new_node = members[lpc].node_uname; if(strcmp(join_from, new_node) == 0) { is_a_member = TRUE; } } xmlNodePtr cib_fragment = find_xml_node(join_ack, XML_TAG_FRAGMENT); if(is_a_member == FALSE) { cl_log(LOG_ERR, "Node %s is not known to us", join_from); /* make sure any information from this node is discarded, * it is invalid */ free_xml(cib_fragment); FNRET(I_FAIL); } // add them to our list of "active" nodes g_hash_table_insert(joined_nodes, strdup(join_from),strdup(join_from)); /* TODO: check the fragment is only for the status section const char *section = get_xml_attr(cib_fragment, NULL, XML_ATTR_FILTER_TYPE, TRUE); */ /* Make changes so that state=active for this node when the update * is processed by A_CIB_INVOKE */ xmlNodePtr tmp1 = find_xml_node(cib_fragment, XML_TAG_CIB); tmp1 = get_object_root(XML_CIB_TAG_STATUS, tmp1); tmp1 = find_entity(tmp1, XML_CIB_TAG_STATE, join_from, FALSE); set_xml_property_copy(tmp1, "state", "active"); if(g_hash_table_size(joined_nodes) == fsa_membership_copy->members_size) { // that was the last outstanding join ack) cl_log(LOG_INFO,"That was the last outstanding join ack"); FNRET(I_SUCCESS); } else { cl_log(LOG_DEBUG, "Still waiting on %d outstanding join acks", size); //dont waste time by invoking the pe yet; } FNRET(I_NULL); } diff --git a/crm/crmd/fsa.c b/crm/crmd/fsa.c index 5962ef5ba8..7c46f6038c 100644 --- a/crm/crmd/fsa.c +++ b/crm/crmd/fsa.c @@ -1,1060 +1,1062 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include long long do_state_transition(long long actions, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_state next_state, enum crmd_fsa_input current_input, void *data); #ifdef DOT_FSA_ACTIONS # ifdef FSA_TRACE # define ELSEIF_FSA_ACTION(x,y) \ else if(is_set(actions,x)) { \ - fprintf(dot_strm, "\t// %s:\t%s\t(data? %s)", \ - fsa_input2string(cur_input), fsa_action2string(x), \ - data==NULL?"no":"yes"); \ - fflush(dot_strm); \ CRM_DEBUG3("Invoking action %s (%.16llx)", \ fsa_action2string(x), x); \ actions = clear_bit(actions, x); \ next_input = y(x, cause, cur_state, last_input, data); \ - fprintf(dot_strm, "\t(result=%s)\n", \ - fsa_input2string(next_input)); \ + if( (x & O_DC_TICKLE) == 0 && next_input != I_DC_HEARTBEAT ) \ + fprintf(dot_strm, \ + "\t// %s:\t%s\t(data? %s)\t(result=%s)\n", \ + fsa_input2string(cur_input), \ + fsa_action2string(x), \ + data==NULL?"no":"yes", \ + fsa_input2string(next_input)); \ fflush(dot_strm); \ CRM_DEBUG3("Result of action %s was %s", \ fsa_action2string(x), fsa_input2string(next_input)); \ } # else # define ELSEIF_FSA_ACTION(x,y) \ else if(is_set(actions,x)) { \ - fprintf(dot_strm, "\t// %s:\t%s\t(data? %s)", \ - fsa_input2string(cur_input), fsa_action2string(x), \ - data==NULL?"no":"yes"); \ - fflush(dot_strm); \ actions = clear_bit(actions, x); \ next_input = y(x, cause, cur_state, last_input, data); \ - fprintf(dot_strm, "\t(result=%s)\n", \ - fsa_input2string(next_input)); \ + if( (x & O_DC_TICKLE) == 0 && next_input != I_DC_HEARTBEAT ) \ + fprintf(dot_strm, \ + "\t// %s:\t%s\t(data? %s)\t(result=%s)\n", \ + fsa_input2string(cur_input), \ + fsa_action2string(x), \ + data==NULL?"no":"yes", \ + fsa_input2string(next_input)); \ fflush(dot_strm); \ } # endif #else # ifdef FSA_TRACE # define ELSEIF_FSA_ACTION(x,y) \ else if(is_set(actions,x)) { \ CRM_DEBUG3("Invoking action %s (%.16llx)", \ fsa_action2string(x), x); \ actions = clear_bit(actions, x); \ next_input = y(x, cause, cur_state, last_input, data); \ CRM_DEBUG3("Result of action %s was %s", \ fsa_action2string(x), fsa_input2string(next_input)); \ } # else # define ELSEIF_FSA_ACTION(x,y) \ else if(is_set(actions,x)) { \ actions = clear_bit(actions, x); \ next_input = y(x, cause, cur_state, last_input, data); \ } # endif #endif const char *dot_intro = "digraph \"g\" {\n" " size = \"30,30\"\n" " graph [\n" " fontsize = \"12\"\n" " fontname = \"Times-Roman\"\n" " fontcolor = \"black\"\n" " bb = \"0,0,398.922306,478.927856\"\n" " color = \"black\"\n" " ]\n" " node [\n" " fontsize = \"12\"\n" " fontname = \"Times-Roman\"\n" " fontcolor = \"black\"\n" " shape = \"ellipse\"\n" " color = \"black\"\n" " ]\n" " edge [\n" " fontsize = \"12\"\n" " fontname = \"Times-Roman\"\n" " fontcolor = \"black\"\n" " color = \"black\"\n" " ]\n" "// special nodes\n" " \"S_PENDING\" \n" " [\n" " color = \"blue\"\n" " fontcolor = \"blue\"\n" " ]\n" " \"S_TERMINATE\" \n" " [\n" " color = \"red\"\n" " fontcolor = \"red\"\n" " ]\n" "\n" "// DC only nodes\n" " \"S_RECOVERY_DC\" [ fontcolor = \"green\" ]\n" " \"S_INTEGRATION\" [ fontcolor = \"green\" ]\n" " \"S_POLICY_ENGINE\" [ fontcolor = \"green\" ]\n" " \"S_TRANSITION_ENGINE\" [ fontcolor = \"green\" ]\n" " \"S_RELEASE_DC\" [ fontcolor = \"green\" ]\n" " \"S_IDLE\" [ fontcolor = \"green\" ]\n"; static FILE *dot_strm = NULL; enum crmd_fsa_state fsa_state; oc_node_list_t *fsa_membership_copy; ll_cluster_t *fsa_cluster_conn; ll_lrm_t *fsa_lrm_conn; long long fsa_input_register; const char *fsa_our_uname; fsa_timer_t *election_trigger = NULL; /* */ fsa_timer_t *election_timeout = NULL; /* */ fsa_timer_t *shutdown_escalation_timmer = NULL; /* */ fsa_timer_t *integration_timer = NULL; fsa_timer_t *dc_heartbeat = NULL; long long toggle_bit(long long action_list, long long action) { // CRM_DEBUG2("Toggling bit %.16llx", action); action_list ^= action; // CRM_DEBUG2("Result %.16llx", action_list & action); return action_list; } long long clear_bit(long long action_list, long long action) { // CRM_DEBUG2("Clearing bit\t%.16llx", action); // ensure its set action_list |= action; // then toggle action_list = action_list ^ action; return action_list; } long long set_bit(long long action_list, long long action) { // CRM_DEBUG2("Adding bit\t%.16llx", action); action_list |= action; return action_list; } void toggle_bit_inplace(long long *action_list, long long action) { *action_list = toggle_bit(*action_list, action); } void clear_bit_inplace(long long *action_list, long long action) { *action_list = clear_bit(*action_list, action); } void set_bit_inplace(long long *action_list, long long action) { *action_list = set_bit(*action_list, action); } gboolean is_set(long long action_list, long long action) { // CRM_DEBUG2("Checking bit\t%.16llx", action); return ((action_list & action) == action); } gboolean startTimer(fsa_timer_t *timer) { if(((int)timer->source_id) < 0) { timer->source_id = Gmain_timeout_add(timer->period_ms, timer->callback, (void*)timer); /* CRM_DEBUG3("#!!#!!# Started %s timer (%d)", fsa_input2string(timer->fsa_input), timer->source_id); */ } else { cl_log(LOG_INFO, "#!!#!!# Timer %s already running (%d)", fsa_input2string(timer->fsa_input), timer->source_id); return FALSE; } return TRUE; } gboolean stopTimer(fsa_timer_t *timer) { if(((int)timer->source_id) > 0) { /* CRM_DEBUG3("#!!#!!# Stopping %s timer (%d)", fsa_input2string(timer->fsa_input), timer->source_id); */ g_source_remove(timer->source_id); timer->source_id = -2; } else { cl_log(LOG_INFO, "#!!#!!# Timer %s already stopped (%d)", fsa_input2string(timer->fsa_input), timer->source_id); return FALSE; } return TRUE; } enum crmd_fsa_state s_crmd_fsa(enum crmd_fsa_cause cause, enum crmd_fsa_input initial_input, void *data) { long long actions = A_NOTHING, new_actions = A_NOTHING; enum crmd_fsa_input last_input = initial_input; enum crmd_fsa_input cur_input; enum crmd_fsa_input next_input; enum crmd_fsa_state last_state, cur_state, next_state, starting_state; FNIN(); starting_state = fsa_state; cur_input = initial_input; next_input = initial_input; last_state = starting_state; cur_state = starting_state; next_state = starting_state; #ifdef FSA_TRACE CRM_DEBUG4("FSA invoked with Cause: %s\n\tState: %s, Input: %s", fsa_cause2string(cause), fsa_state2string(cur_state), fsa_input2string(cur_input)); #endif if(dot_strm == NULL) { dot_strm = fopen("/tmp/live.dot", "w"); fprintf(dot_strm, "%s", dot_intro); } /* * Process actions in order of priority but do only one * action at a time to avoid complicating the ordering. * * Actions may result in a new I_ event, these are added to * (not replace) existing actions before the next iteration. * */ while(next_input != I_NULL || actions != A_NOTHING) { if(next_input == I_WAIT_FOR_EVENT) { /* we may be waiting for an a-sync task to "happen" * and until it does, we cant do anything else */ cl_log(LOG_INFO, "Wait until something else happens"); break; } cur_input = next_input; #ifdef FSA_TRACE CRM_DEBUG3("FSA while loop:\tState: %s, Input: %s", fsa_state2string(cur_state), fsa_input2string(cur_input)); #endif new_actions = crmd_fsa_actions[cur_input][cur_state]; next_state = crmd_fsa_state[cur_input][cur_state]; last_state = cur_state; cur_state = next_state; fsa_state = next_state; if(new_actions != A_NOTHING) { -#ifndef FSA_TRACE +#ifdef FSA_TRACE CRM_DEBUG2("Adding actions %.16llx", new_actions); #endif actions |= new_actions; } switch(cur_input) { case I_NULL: break; #if 0 case I_SOME_EVENT: case I_SOME_OTHER_EVENT: cc_transitioner(cur_input, data); /* flow through... */ #endif default: last_input = cur_input; break; } /* * Hook for change of state. * Allows actions to be added or removed when entering a state */ if(last_state != cur_state){ actions = do_state_transition(actions, cause, last_state, cur_state, last_input, data); } /* this is always run, some inputs/states may make various * actions irrelevant/invalid */ actions = clear_flags(actions, cause, cur_state, cur_input); /* regular action processing in order of action priority * * Make sure all actions that connect to required systems * are performed first */ if(actions == A_NOTHING) { cl_log(LOG_INFO, "Nothing to do"); next_input = I_NULL; /* // check registers, see if anything is pending if(is_set(fsa_input_register, R_SHUTDOWN)) { CRM_DEBUG("(Re-)invoking shutdown"); next_input = I_SHUTDOWN; } else if(is_set(fsa_input_register, R_INVOKE_PE)) { CRM_DEBUG("Invoke the PE somehow"); } */ } /* logging */ ELSEIF_FSA_ACTION(A_ERROR, do_log) ELSEIF_FSA_ACTION(A_WARN, do_log) ELSEIF_FSA_ACTION(A_LOG, do_log) /* get out of here NOW! before anything worse happens */ ELSEIF_FSA_ACTION(A_EXIT_1, do_exit) ELSEIF_FSA_ACTION(A_STARTUP, do_startup) ELSEIF_FSA_ACTION(A_CIB_START, do_cib_control) ELSEIF_FSA_ACTION(A_HA_CONNECT, do_ha_control) ELSEIF_FSA_ACTION(A_LRM_CONNECT,do_lrm_control) ELSEIF_FSA_ACTION(A_CCM_CONNECT,do_ccm_control) ELSEIF_FSA_ACTION(A_ANNOUNCE, do_announce) /* sub-system start */ ELSEIF_FSA_ACTION(A_PE_START, do_pe_control) ELSEIF_FSA_ACTION(A_TE_START, do_te_control) /* sub-system restart */ ELSEIF_FSA_ACTION(O_CIB_RESTART,do_cib_control) ELSEIF_FSA_ACTION(O_PE_RESTART, do_pe_control) ELSEIF_FSA_ACTION(O_TE_RESTART, do_te_control) ELSEIF_FSA_ACTION(A_STARTED, do_started) /* DC Timer */ ELSEIF_FSA_ACTION(O_DC_TIMER_RESTART, do_dc_timer_control) ELSEIF_FSA_ACTION(A_DC_TIMER_STOP, do_dc_timer_control) ELSEIF_FSA_ACTION(A_DC_TIMER_START, do_dc_timer_control) /* * Highest priority actions */ ELSEIF_FSA_ACTION(A_SHUTDOWN_REQ, do_shutdown_req) ELSEIF_FSA_ACTION(A_MSG_ROUTE, do_msg_route) ELSEIF_FSA_ACTION(A_RECOVER, do_recover) ELSEIF_FSA_ACTION(A_ELECTION_VOTE, do_election_vote) ELSEIF_FSA_ACTION(A_ELECT_TIMER_START, do_election_timer_ctrl) ELSEIF_FSA_ACTION(A_ELECT_TIMER_STOP, do_election_timer_ctrl) ELSEIF_FSA_ACTION(A_ELECTION_COUNT, do_election_count_vote) ELSEIF_FSA_ACTION(A_ELECTION_TIMEOUT, do_election_timer_ctrl) /* * "Get this over with" actions */ ELSEIF_FSA_ACTION(A_MSG_STORE, do_msg_store) ELSEIF_FSA_ACTION(A_NODE_BLOCK, do_node_block) /* * High priority actions * Update the cache first */ ELSEIF_FSA_ACTION(A_CCM_UPDATE_CACHE, do_ccm_update_cache) ELSEIF_FSA_ACTION(A_CCM_EVENT, do_ccm_event) /* * Medium priority actions */ ELSEIF_FSA_ACTION(A_DC_TAKEOVER, do_dc_takeover) ELSEIF_FSA_ACTION(A_DC_RELEASE, do_dc_release) ELSEIF_FSA_ACTION(A_JOIN_WELCOME_ALL, do_send_welcome) ELSEIF_FSA_ACTION(A_JOIN_WELCOME, do_send_welcome) ELSEIF_FSA_ACTION(A_JOIN_ACK, do_ack_welcome) ELSEIF_FSA_ACTION(A_JOIN_PROCESS_ACK, do_process_welcome_ack) /* * Low(er) priority actions * Make sure the CIB is always updated before invoking the * PE, and the PE before the TE */ ELSEIF_FSA_ACTION(A_UPDATE_NODESTATUS, do_cib_invoke) ELSEIF_FSA_ACTION(A_CIB_INVOKE_LOCAL, do_cib_invoke) ELSEIF_FSA_ACTION(A_CIB_INVOKE, do_cib_invoke) ELSEIF_FSA_ACTION(A_CIB_BUMPGEN, do_cib_invoke) ELSEIF_FSA_ACTION(A_LRM_INVOKE, do_lrm_invoke) ELSEIF_FSA_ACTION(A_LRM_EVENT, do_lrm_event) ELSEIF_FSA_ACTION(A_PE_INVOKE, do_pe_invoke) ELSEIF_FSA_ACTION(A_TE_INVOKE, do_te_invoke) /* sub-system stop */ ELSEIF_FSA_ACTION(A_PE_STOP, do_pe_control) ELSEIF_FSA_ACTION(A_TE_STOP, do_te_control) ELSEIF_FSA_ACTION(A_DC_RELEASED, do_dc_release) ELSEIF_FSA_ACTION(A_HA_DISCONNECT, do_ha_control) ELSEIF_FSA_ACTION(A_CCM_DISCONNECT, do_ccm_control) ELSEIF_FSA_ACTION(A_LRM_DISCONNECT, do_lrm_control) ELSEIF_FSA_ACTION(A_CIB_STOP, do_cib_control) /* time to go now... */ /* Some of these can probably be consolidated */ ELSEIF_FSA_ACTION(A_SHUTDOWN, do_shutdown) ELSEIF_FSA_ACTION(A_STOP, do_stop) /* exit gracefully */ ELSEIF_FSA_ACTION(A_EXIT_0, do_exit) // ELSEIF_FSA_ACTION(A_, do_) else if(actions & A_MSG_PROCESS) { xmlNodePtr stored_msg = NULL; /* any more queued messages? */ if(is_message() == FALSE) { CRM_DEBUG("No more messages"); actions = clear_bit(actions, A_MSG_PROCESS); continue; } else { fsa_message_queue_t msg = get_message(); - if(msg == NULL || msg->message) { + if(msg == NULL || msg->message == NULL) { cl_log(LOG_ERR, "Invalid stored message"); continue; } data = msg->message; } #ifdef DOT_FSA_ACTIONS fprintf(dot_strm, "\t// %s:\t%s\t(data? %s)", fsa_input2string(cur_input), fsa_action2string(A_MSG_PROCESS), stored_msg==NULL?"no":"yes"); fflush(dot_strm); #endif #ifdef FSA_TRACE CRM_DEBUG3("Invoking action %s (%.16llx)", fsa_action2string(A_MSG_PROCESS), A_MSG_PROCESS); #endif stored_msg = (xmlNodePtr)data; #ifdef FSA_TRACE xml_message_debug(stored_msg,"FSA processing message"); #endif next_input = handle_message(stored_msg); #ifdef DOT_FSA_ACTIONS fprintf(dot_strm, "\t(result=%s)\n", fsa_input2string(next_input)); #endif CRM_DEBUG3("Result of action %s was %s", fsa_action2string(A_MSG_PROCESS), fsa_input2string(next_input)); /* Error checking and reporting */ } else if(cur_input != I_NULL && is_set(actions, A_NOTHING)) { cl_log(LOG_WARNING, "No action specified for input,state (%s,%s)", fsa_input2string(cur_input), fsa_state2string(cur_state)); next_input = I_NULL; } else if(cur_input == I_NULL && is_set(actions, A_NOTHING)) { #ifdef FSA_TRACE cl_log(LOG_INFO, "Nothing left to do"); #endif } else { cl_log(LOG_ERR, "Action %s (0x%llx) not supported ", fsa_action2string(actions), actions); next_input = I_ERROR; } } #ifdef FSA_TRACE CRM_DEBUG2("################# Exiting the FSA (%s) ##################", fsa_state2string(fsa_state)); #endif // cleanup inputs? FNRET(fsa_state); } /* A_NODE_BLOCK */ enum crmd_fsa_input do_node_block(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, void *data) { xmlNodePtr xml_message = (xmlNodePtr)data; const char *host_from = xmlGetProp(xml_message, XML_ATTR_HOSTFROM); FNIN(); (void)host_from; FNRET(I_NULL); } const char * fsa_input2string(int input) { const char *inputAsText = NULL; switch(input){ case I_NULL: inputAsText = "I_NULL"; break; case I_CCM_EVENT: inputAsText = "I_CCM_EVENT"; break; case I_CIB_OP: inputAsText = "I_CIB_OP"; break; case I_CIB_UPDATE: inputAsText = "I_CIB_UPDATE"; break; case I_DC_TIMEOUT: inputAsText = "I_DC_TIMEOUT"; break; case I_ELECTION: inputAsText = "I_ELECTION"; break; case I_RELEASE_DC: inputAsText = "I_RELEASE_DC"; break; case I_ELECTION_DC: inputAsText = "I_ELECTION_DC"; break; case I_ERROR: inputAsText = "I_ERROR"; break; case I_FAIL: inputAsText = "I_FAIL"; break; case I_INTEGRATION_TIMEOUT: inputAsText = "I_INTEGRATION_TIMEOUT"; break; case I_NODE_JOIN: inputAsText = "I_NODE_JOIN"; break; case I_NODE_LEFT: inputAsText = "I_NODE_LEFT"; break; case I_NODE_LEAVING: inputAsText = "I_NODE_LEAVING"; break; case I_NOT_DC: inputAsText = "I_NOT_DC"; break; case I_RECOVERED: inputAsText = "I_RECOVERED"; break; case I_RELEASE_FAIL: inputAsText = "I_RELEASE_FAIL"; break; case I_RELEASE_SUCCESS: inputAsText = "I_RELEASE_SUCCESS"; break; case I_RESTART: inputAsText = "I_RESTART"; break; case I_REQUEST: inputAsText = "I_REQUEST"; break; case I_ROUTER: inputAsText = "I_ROUTER"; break; case I_SHUTDOWN: inputAsText = "I_SHUTDOWN"; break; /* case I_SHUTDOWN_REQ: */ /* inputAsText = "I_SHUTDOWN_REQ"; */ /* break; */ case I_STARTUP: inputAsText = "I_STARTUP"; break; case I_SUCCESS: inputAsText = "I_SUCCESS"; break; case I_TERMINATE: inputAsText = "I_TERMINATE"; break; case I_WELCOME: inputAsText = "I_WELCOME"; break; case I_WELCOME_ACK: inputAsText = "I_WELCOME_ACK"; break; case I_DC_HEARTBEAT: inputAsText = "I_DC_HEARTBEAT"; break; case I_ILLEGAL: inputAsText = "I_ILLEGAL"; break; } if(inputAsText == NULL) { cl_log(LOG_ERR, "Input %d is unknown", input); inputAsText = ""; } return inputAsText; } const char * fsa_state2string(int state) { const char *stateAsText = NULL; switch(state){ case S_IDLE: stateAsText = "S_IDLE"; break; case S_ELECTION: stateAsText = "S_ELECTION"; break; case S_INTEGRATION: stateAsText = "S_INTEGRATION"; break; case S_NOT_DC: stateAsText = "S_NOT_DC"; break; case S_POLICY_ENGINE: stateAsText = "S_POLICY_ENGINE"; break; case S_RECOVERY: stateAsText = "S_RECOVERY"; break; case S_RECOVERY_DC: stateAsText = "S_RECOVERY_DC"; break; case S_RELEASE_DC: stateAsText = "S_RELEASE_DC"; break; case S_PENDING: stateAsText = "S_PENDING"; break; case S_STOPPING: stateAsText = "S_STOPPING"; break; case S_TERMINATE: stateAsText = "S_TERMINATE"; break; case S_TRANSITION_ENGINE: stateAsText = "S_TRANSITION_ENGINE"; break; case S_ILLEGAL: stateAsText = "S_ILLEGAL"; break; } if(stateAsText == NULL) { cl_log(LOG_ERR, "State %d is unknown", state); stateAsText = ""; } return stateAsText; } const char * fsa_cause2string(int cause) { const char *causeAsText = NULL; switch(cause){ case C_UNKNOWN: causeAsText = "C_UNKNOWN"; break; case C_STARTUP: causeAsText = "C_STARTUP"; break; case C_IPC_MESSAGE: causeAsText = "C_IPC_MESSAGE"; break; case C_HA_MESSAGE: causeAsText = "C_HA_MESSAGE"; break; case C_CCM_CALLBACK: causeAsText = "C_CCM_CALLBACK"; break; case C_TIMER_POPPED: causeAsText = "C_TIMER_POPPED"; break; case C_SHUTDOWN: causeAsText = "C_SHUTDOWN"; break; case C_HEARTBEAT_FAILED: causeAsText = "C_HEARTBEAT_FAILED"; break; case C_ILLEGAL: causeAsText = "C_ILLEGAL"; break; } if(causeAsText == NULL) { cl_log(LOG_ERR, "Cause %d is unknown", cause); causeAsText = ""; } return causeAsText; } const char * fsa_action2string(long long action) { const char *actionAsText = NULL; switch(action){ case A_NOTHING: actionAsText = "A_NOTHING"; break; case O_SHUTDOWN: actionAsText = "O_SHUTDOWN"; break; case O_RELEASE: actionAsText = "O_RELEASE"; break; case A_STARTUP: actionAsText = "A_STARTUP"; break; case A_STARTED: actionAsText = "A_STARTED"; break; case A_HA_CONNECT: actionAsText = "A_HA_CONNECT"; break; case A_HA_DISCONNECT: actionAsText = "A_HA_DISCONNECT"; break; case A_LRM_CONNECT: actionAsText = "A_LRM_CONNECT"; break; case A_LRM_DISCONNECT: actionAsText = "A_LRM_DISCONNECT"; break; case O_DC_TIMER_RESTART: actionAsText = "O_DC_TIMER_RESTART"; break; case A_DC_TIMER_STOP: actionAsText = "A_DC_TIMER_STOP"; break; case A_DC_TIMER_START: actionAsText = "A_DC_TIMER_START"; break; case A_ELECTION_COUNT: actionAsText = "A_ELECTION_COUNT"; break; case A_ELECTION_TIMEOUT: actionAsText = "A_ELECTION_TIMEOUT"; break; case A_ELECT_TIMER_START: actionAsText = "A_ELECT_TIMER_START"; break; case A_ELECT_TIMER_STOP: actionAsText = "A_ELECT_TIMER_STOP"; break; case A_ELECTION_VOTE: actionAsText = "A_ELECTION_VOTE"; break; case A_ANNOUNCE: actionAsText = "A_ANNOUNCE"; break; case A_JOIN_ACK: actionAsText = "A_JOIN_ACK"; break; case A_JOIN_WELCOME: actionAsText = "A_JOIN_WELCOME"; break; case A_JOIN_WELCOME_ALL: actionAsText = "A_JOIN_WELCOME_ALL"; break; case A_JOIN_PROCESS_ACK: actionAsText = "A_JOIN_PROCESS_ACK"; break; case A_MSG_PROCESS: actionAsText = "A_MSG_PROCESS"; break; case A_MSG_ROUTE: actionAsText = "A_MSG_ROUTE"; break; case A_MSG_STORE: actionAsText = "A_MSG_STORE"; break; case A_RECOVER: actionAsText = "A_RECOVER"; break; case A_DC_RELEASE: actionAsText = "A_DC_RELEASE"; break; case A_DC_RELEASED: actionAsText = "A_DC_RELEASED"; break; case A_DC_TAKEOVER: actionAsText = "A_DC_TAKEOVER"; break; case A_SHUTDOWN: actionAsText = "A_SHUTDOWN"; break; case A_SHUTDOWN_REQ: actionAsText = "A_SHUTDOWN_REQ"; break; case A_STOP: actionAsText = "A_STOP "; break; case A_EXIT_0: actionAsText = "A_EXIT_0"; break; case A_EXIT_1: actionAsText = "A_EXIT_1"; break; case A_CCM_CONNECT: actionAsText = "A_CCM_CONNECT"; break; case A_CCM_DISCONNECT: actionAsText = "A_CCM_DISCONNECT"; break; case A_CCM_EVENT: actionAsText = "A_CCM_EVENT"; break; case A_CCM_UPDATE_CACHE: actionAsText = "A_CCM_UPDATE_CACHE"; break; case A_CIB_BUMPGEN: actionAsText = "A_CIB_BUMPGEN"; break; case A_CIB_INVOKE: actionAsText = "A_CIB_INVOKE"; break; case O_CIB_RESTART: actionAsText = "O_CIB_RESTART"; break; case A_CIB_START: actionAsText = "A_CIB_START"; break; case A_CIB_STOP: actionAsText = "A_CIB_STOP"; break; case A_TE_INVOKE: actionAsText = "A_TE_INVOKE"; break; case O_TE_RESTART: actionAsText = "O_TE_RESTART"; break; case A_TE_START: actionAsText = "A_TE_START"; break; case A_TE_STOP: actionAsText = "A_TE_STOP"; break; case A_PE_INVOKE: actionAsText = "A_PE_INVOKE"; break; case O_PE_RESTART: actionAsText = "O_PE_RESTART"; break; case A_PE_START: actionAsText = "A_PE_START"; break; case A_PE_STOP: actionAsText = "A_PE_STOP"; break; case A_NODE_BLOCK: actionAsText = "A_NODE_BLOCK"; break; case A_UPDATE_NODESTATUS: actionAsText = "A_UPDATE_NODESTATUS"; break; case A_LOG: actionAsText = "A_LOG "; break; case A_ERROR: actionAsText = "A_ERROR "; break; case A_WARN: actionAsText = "A_WARN "; break; } if(actionAsText == NULL) { cl_log(LOG_ERR, "Action %.16llx is unknown", action); actionAsText = ""; } return actionAsText; } long long do_state_transition(long long actions, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_state next_state, enum crmd_fsa_input current_input, void *data) { long long tmp = A_NOTHING; if(current_input != I_NULL && (current_input != I_DC_HEARTBEAT || cur_state != S_NOT_DC)){ const char *state_from = fsa_state2string(cur_state); const char *state_to = fsa_state2string(next_state); const char *input = fsa_input2string(current_input); time_t now = time(NULL); fprintf(dot_strm, "\t\"%s\" -> \"%s\" [ label =\"%s\" ] // %s", state_from, state_to, input, asctime(localtime(&now))); fflush(dot_strm); } switch(next_state) { case S_PENDING: case S_NOT_DC: if(is_set(fsa_input_register, R_SHUTDOWN)){ tmp = set_bit(actions, A_SHUTDOWN_REQ); } tmp = clear_bit(actions, A_RECOVER); break; case S_RECOVERY_DC: case S_RECOVERY: tmp = set_bit(actions, A_RECOVER); break; default: tmp = clear_bit(actions, A_RECOVER); break; } if(tmp != actions) { cl_log(LOG_INFO, "Action b4 %.16llx ", actions); cl_log(LOG_INFO, "Action after %.16llx ", tmp); actions = tmp; } return actions; } long long clear_flags(long long actions, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input) { if(is_set(fsa_input_register, R_SHUTDOWN)){ clear_bit_inplace(&actions, A_DC_TIMER_START); } switch(cur_state) { case S_IDLE: break; case S_ELECTION: break; case S_INTEGRATION: break; case S_NOT_DC: break; case S_POLICY_ENGINE: break; case S_RECOVERY: break; case S_RECOVERY_DC: break; case S_RELEASE_DC: break; case S_PENDING: break; case S_STOPPING: break; case S_TERMINATE: break; case S_TRANSITION_ENGINE: break; case S_ILLEGAL: break; } return actions; } diff --git a/crm/crmd/fsa_defines.h b/crm/crmd/fsa_defines.h index 04cab215c3..a975f87d21 100644 --- a/crm/crmd/fsa_defines.h +++ b/crm/crmd/fsa_defines.h @@ -1,488 +1,488 @@ -/* $Id: fsa_defines.h,v 1.10 2004/04/12 15:34:50 andrew Exp $ */ +/* $Id: fsa_defines.h,v 1.11 2004/04/13 13:26:45 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef XML_FSA_DEFINES__H #define XML_FSA_DEFINES__H /*====================================== * States the DC/CRMd can be in *======================================*/ enum crmd_fsa_state { S_IDLE = 0, /* Nothing happening */ S_ELECTION, /* Take part in the election algorithm as * described below */ S_INTEGRATION, /* integrate that status of new nodes (which is * all of them if we have just been elected DC) * to form a complete and up-to-date picture of * the CIB */ S_NOT_DC, /* we are in crmd/slave mode */ S_POLICY_ENGINE,/* Determin the next stable state of the cluster */ S_RECOVERY, /* Something bad happened, check everything is ok * before continuing and attempt to recover if * required */ S_RECOVERY_DC, /* Something bad happened to the DC, check * everything is ok before continuing and attempt * to recover if required */ S_RELEASE_DC, /* we were the DC, but now we arent anymore, * possibly by our own request, and we should * release all unnecessary sub-systems, finish * any pending actions, do general cleanup and * unset anything that makes us think we are * special :) */ S_PENDING, /* we are just starting out */ S_STOPPING, /* We are in the final stages of shutting down */ S_TERMINATE, /* We are going to shutdown, this is the equiv of * "Sending TERM signal to all processes" in Linux * and in worst case scenarios could be considered * a self STONITH */ S_TRANSITION_ENGINE,/* Attempt to make the calculated next stable * state of the cluster a reality */ /* ----------- Last input found in table is above ---------- */ S_ILLEGAL, /* This is an illegal FSA state */ /* (must be last) */ }; #define MAXSTATE S_ILLEGAL /* A state diagram can be constructed from the dc_fsa.dot with the following command: dot -Tpng crmd_fsa.dot > crmd_fsa.png Description: Once we start and do some basic sanity checks, we go into the S_NOT_DC state and await instructions from the DC or input from the CCM which indicates the election algorithm needs to run. If the election algorithm is triggered we enter the S_ELECTION state from where we can either go back to the S_NOT_DC state or progress to the S_INTEGRATION state (or S_RELEASE_DC if we used to be the DC but arent anymore). The election algorithm has been adapted from http://www.cs.indiana.edu/cgi-bin/techreports/TRNNN.cgi?trnum=TR521 Loosly known as the Bully Algorithm, its major points are: - Election is initiated by any node (N) notices that the coordinator is no longer responding - Concurrent multiple elections are possible - Algorithm + N sends ELECTION messages to all nodes that occur earlier in the CCM's membership list. + If no one responds, N wins and becomes coordinator + N sends out COORDINATOR messages to all other nodes in the partition + If one of higher-ups answers, it takes over. N is done. Once the election is complete, if we are the DC, we enter the S_INTEGRATION state which is a DC-in-waiting style state. We are the DC, but we shouldnt do anything yet because we may not have an up-to-date picture of the cluster. There may of course be times when this fails, so we should go back to the S_RECOVERY stage and check everything is ok. We may also end up here if a new node came online, since each node is authorative on itself and we would want to incorporate its information into the CIB. Once we have the latest CIB, we then enter the S_POLICY_ENGINE state where invoke the Policy Engine. It is possible that between invoking the Policy Engine and recieving an answer, that we recieve more input. In this case we would discard the orginal result and invoke it again. Once we are satisfied with the output from the Policy Engine we enter S_TRANSITION_ENGINE and feed the Policy Engine's output to the Transition Engine who attempts to make the Policy Engine's calculation a reality. If the transition completes successfully, we enter S_IDLE, otherwise we go back to S_POLICY_ENGINE with the current unstable state and try again. Of course we may be asked to shutdown at any time, however we must progress to S_NOT_DC before doing so. Once we have handed over DC duties to another node, we can then shut down like everyone else, that is by asking the DC for permission and waiting it to take all our resources away. The case where we are the DC and the only node in the cluster is a special case and handled as an escalation which takes us to S_SHUTDOWN. Similarly if any other point in the shutdown fails or stalls, this is escalated and we end up in S_TERMINATE. At any point, the CRMd/DC can relay messages for its sub-systems, but outbound messages (from sub-systems) should probably be blocked until S_INTEGRATION (for the DC case) or the join protocol has completed (for the CRMd case) */ /*====================================== * * Inputs/Events/Stimuli to be given to the finite state machine * * Some of these a true events, and others a synthesised based on * the "register" (see below) and the contents or source of messages. * * At this point, my plan is to have a loop of some sort that keeps * going until recieving I_NULL * *======================================*/ enum crmd_fsa_input { I_NULL, /* Nothing happened */ I_CCM_EVENT, I_CIB_OP, /* An update to the CIB occurred */ I_CIB_UPDATE, /* An update to the CIB occurred */ I_DC_TIMEOUT, /* We have lost communication with the DC */ I_ELECTION, /* Someone started an election */ I_RELEASE_DC, /* The election completed and we were not * elected, but we were the DC beforehand */ I_ELECTION_DC, /* The election completed and we were (re-)elected * DC */ I_ERROR, /* Something bad happened (more serious than * I_FAIL) and may not have been due to the action * being performed. For example, we may have lost * our connection to the CIB. */ I_FAIL, /* The action failed to complete successfully */ I_INTEGRATION_TIMEOUT, I_NODE_JOIN, /* A node has entered the CCM membership list*/ I_NODE_LEFT, /* A node shutdown (possibly unexpectedly) */ I_NODE_LEAVING, /* A node has asked to be shutdown */ I_NOT_DC, /* We are not and were not the DC before or after * the current operation or state */ I_RECOVERED, /* The recovery process completed successfully */ I_RELEASE_FAIL, /* We could not give up DC status for some reason */ I_RELEASE_SUCCESS, /* We are no longer the DC */ I_RESTART, /* The current set of actions needs to be * restarted */ I_REQUEST, /* Some non-resource, non-ccm action is required * of us, eg. ping */ I_ROUTER, /* Do our job as router and forward this to the * right place */ I_SHUTDOWN, /* We are asking to shutdown */ I_TERMINATE, /* We have been told to shutdown */ I_STARTUP, I_SUCCESS, /* The action completed successfully */ I_WELCOME, /* Welcome a newly joined node */ I_WELCOME_ACK, /* The newly joined node has acknowledged us as overlord */ I_WAIT_FOR_EVENT, /* we may be waiting for an async task to "happen" * and until it does, we cant do anything else */ I_DC_HEARTBEAT, /* The DC is telling us that it is alive and well */ I_LRM_EVENT, /* ------------ Last input found in table is above ----------- */ I_ILLEGAL, /* This is an illegal value for an FSA input */ /* (must be last) */ }; #define MAXINPUT I_ILLEGAL #define I_MESSAGE I_ROUTER /*====================================== * * actions * * Some of the actions below will always occur together for now, but I can * forsee that this may not always be the case. So I've spilt them up so * that if they ever do need to be called independantly in the future, it * wont be a problem. * * For example, separating A_LRM_CONNECT from A_STARTUP might be useful * if we ever try to recover from a faulty or disconnected LRM. * *======================================*/ /* Complete list of actions A_CCM_CONNECT A_CCM_EVENT A_CCM_UPDATE_CACHE A_CIB_INVOKE A_CIB_RESTART A_CIB_START A_CIB_STOP A_CIB_UPDATE A_DC_RELEASE A_DC_TAKEOVER A_DISCONNECT A_ELECTION_COUNT A_ELECTION_TIMEOUT A_ELECTION_VOTE A_ERROR A_EXIT_0 A_EXIT_1 A_HA_CONNECT A_JOIN_ACK A_JOIN_WELCOME A_JOIN_WELCOME_ALL A_LOG A_LRM_CONNECT A_MSG_PROCESS A_MSG_ROUTE A_MSG_STORE A_NODE_BLOCK A_NOTHING A_PE_INVOKE A_PE_RESTART A_PE_START A_PE_STOP A_RECOVER A_SHUTDOWN A_STARTED A_STARTUP A_STOP A_TERMINATE A_TE_INVOKE A_TE_RESTART A_TE_START A_TE_STOP A_DC_TIMER_STOP A_DC_TIMER_START A_WARN */ /* Dont do anything */ #define A_NOTHING 0x0000000000000000ULL /* -- Startup actions -- */ /* Hook to perform any actions (other than starting the CIB, * connecting to HA or the CCM) that might be needed as part * of the startup. */ #define A_STARTUP 0x0000000000000001ULL /* Hook to perform any actions that might be needed as part * after startup is successful. */ #define A_STARTED 0x0000000000000002ULL /* Connect to Heartbeat */ #define A_HA_CONNECT 0x0000000000000004ULL #define A_HA_DISCONNECT 0x0000000000000008ULL /* -- Election actions -- */ #define A_DC_TIMER_START 0x0000000000000010ULL #define A_DC_TIMER_STOP 0x0000000000000020ULL #define A_ELECT_TIMER_START 0x0000000000000040ULL #define A_ELECT_TIMER_STOP 0x0000000000000080ULL #define A_ELECTION_COUNT 0x0000000000000100ULL #define A_ELECTION_TIMEOUT 0x0000000000000200ULL #define A_ELECTION_VOTE 0x0000000000000400ULL /* -- Join protocol actions -- */ #define A_ANNOUNCE 0x0000000000000800ULL /* Acknowledge the DC as our overlord*/ #define A_JOIN_ACK 0x0000000000001000ULL /* Send a welcome message to new node(s) */ #define A_JOIN_WELCOME 0x0000000000002000ULL /* Send a welcome message to all nodes */ #define A_JOIN_WELCOME_ALL 0x0000000000004000ULL /* Process the remote node's ack of our join message */ #define A_JOIN_PROCESS_ACK 0x0000000000008000ULL /* -- Message processing -- */ /* Process the queue of requests */ #define A_MSG_PROCESS 0x0000000000010000ULL /* Send the message to the correct recipient */ #define A_MSG_ROUTE 0x0000000000020000ULL /* Put the request into a queue for processing. We do this every * time so that the processing is consistent. The intent is to * allow the DC to keep doing important work while still not * loosing requests. * Messages are not considered recieved until processed. */ #define A_MSG_STORE 0x0000000000040000ULL /* -- Recovery, DC start/stop -- */ /* Something bad happened, try to recover */ #define A_RECOVER 0x0000000001000000ULL /* Hook to perform any actions (apart from starting, the TE, PE * and gathering the latest CIB) that might be necessary before * giving up the responsibilities of being the DC. */ #define A_DC_RELEASE 0x0000000002000000ULL /* */ #define A_DC_RELEASED 0x0000000004000000ULL /* Hook to perform any actions (apart from starting, the TE, PE * and gathering the latest CIB) that might be necessary before * taking over the responsibilities of being the DC. */ #define A_DC_TAKEOVER 0x0000000008000000ULL /* -- Shutdown actions -- */ #define A_SHUTDOWN 0x0000000010000000ULL #define A_STOP 0x0000000020000000ULL #define A_EXIT_0 0x0000000040000000ULL #define A_EXIT_1 0x0000000080000000ULL #define A_SHUTDOWN_REQ 0x0000000100000000ULL /* -- CCM actions -- */ #define A_CCM_CONNECT 0x0000001000000000ULL #define A_CCM_DISCONNECT 0x0000002000000000ULL /* Process whatever it is the CCM is trying to tell us. * This will generate inputs such as I_NODE_JOIN, * I_NODE_LEAVE, I_SHUTDOWN, I_DC_RELEASE, I_DC_TAKEOVER */ #define A_CCM_EVENT 0x0000004000000000ULL #define A_CCM_UPDATE_CACHE 0x0000008000000000ULL /* -- CBI actions -- */ #define A_CIB_INVOKE 0x0000010000000000ULL #define A_CIB_START 0x0000020000000000ULL #define A_CIB_STOP 0x0000040000000000ULL #define A_CIB_INVOKE_LOCAL 0x0000080000000000ULL /* -- Transition Engine actions -- */ /* Attempt to reach the newly calculated cluster state. This is * only called once per transition (except if it is asked to * stop the transition or start a new one). * Once given a cluster state to reach, the TE will determin * tasks that can be performed in parallel, execute them, wait * for replies and then determin the next set until the new * state is reached or no further tasks can be taken. */ #define A_TE_INVOKE 0x0000100000000000ULL #define A_TE_START 0x0000200000000000ULL #define A_TE_STOP 0x0000400000000000ULL /* -- Policy Engine actions -- */ /* Calculate the next state for the cluster. This is only * invoked once per needed calculation. */ #define A_PE_INVOKE 0x0001000000000000ULL #define A_PE_START 0x0002000000000000ULL #define A_PE_STOP 0x0004000000000000ULL /* -- Misc actions -- */ /* Add a system generate "block" so that resources arent moved * to or are activly moved away from the affected node. This * way we can return quickly even if busy with other things. */ #define A_NODE_BLOCK 0x0010000000000000ULL /* Update our information in the local CIB */ #define A_UPDATE_NODESTATUS 0x0020000000000000ULL #define A_CIB_BUMPGEN 0x0040000000000000ULL /* -- LRM Actions -- */ /* Connect to the Local Resource Manager */ #define A_LRM_CONNECT 0x0100000000000000ULL /* Disconnect from the Local Resource Manager */ #define A_LRM_DISCONNECT 0x0200000000000000ULL #define A_LRM_INVOKE 0x0400000000000000ULL #define A_LRM_EVENT 0x0800000000000000ULL /* -- Logging actions -- */ #define A_LOG 0x1000000000000000ULL #define A_ERROR 0x2000000000000000ULL #define A_WARN 0x4000000000000000ULL -#define O_SHUTDOWN A_DC_TIMER_STOP|A_CCM_DISCONNECT|A_LRM_DISCONNECT|A_HA_DISCONNECT|A_SHUTDOWN|A_STOP|A_EXIT_0|A_CIB_STOP -#define O_RELEASE A_DC_RELEASE|A_PE_STOP|A_TE_STOP|A_DC_RELEASED -#define O_DC_TIMER_RESTART A_DC_TIMER_STOP|A_DC_TIMER_START -#define O_PE_RESTART A_PE_START|A_PE_STOP -#define O_TE_RESTART A_TE_START|A_TE_STOP -#define O_CIB_RESTART A_CIB_START|A_CIB_STOP +#define O_SHUTDOWN (A_DC_TIMER_STOP|A_CCM_DISCONNECT|A_LRM_DISCONNECT|A_HA_DISCONNECT|A_SHUTDOWN|A_STOP|A_EXIT_0|A_CIB_STOP) +#define O_RELEASE (A_DC_RELEASE|A_PE_STOP|A_TE_STOP|A_DC_RELEASED) +#define O_DC_TIMER_RESTART (A_DC_TIMER_STOP|A_DC_TIMER_START) +#define O_PE_RESTART (A_PE_START|A_PE_STOP) +#define O_TE_RESTART (A_TE_START|A_TE_STOP) +#define O_CIB_RESTART (A_CIB_START|A_CIB_STOP) #define O_DC_TICKLE O_DC_TIMER_RESTART /*====================================== * * "register" contents * * Things we may want to remember regardless of which state we are in. * * These also count as inputs for synthesizing I_* * *======================================*/ #define R_THE_DC 0x00000001 /* Are we the DC? */ #define R_STARTING 0x00000002 /* Are we starting up? */ #define R_SHUTDOWN 0x00000004 /* Are we trying to shut down? */ #define R_CIB_DONE 0x00000008 /* Have we calculated the CIB? */ #define R_JOIN_OK 0x00000010 /* Have we completed the join process */ #define R_HAVE_CIB 0x00000020 /* Do we have an up-to-date CIB */ #define R_HAVE_RES 0x00000040 /* Do we have any resources running locally */ #define R_INVOKE_PE 0x00000080 /* Does the PE needed to be invoked at the next appropriate point? */ #define R_CIB_CONNECTED 0x00000100 /* Is the CIB connected? */ #define R_PE_CONNECTED 0x00000200 /* Is the Policy Engine connected? */ #define R_TE_CONNECTED 0x00000400 /* Is the Transition Engine connected? */ #define R_LRM_CONNECTED 0x00000800 /* Is the Local Resource Manager connected? */ #define R_REQ_PEND 0x00001000 /* Are there Requests waiting for processing? */ #define R_PE_PEND 0x00002000 /* Has the PE been invoked and we're awaiting a reply? */ #define R_TE_PEND 0x00004000 /* Has the TE been invoked and we're awaiting completion? */ #define R_RESP_PEND 0x00008000 /* Do we have clients waiting on a response? if so perhaps we shouldnt stop yet */ enum crmd_fsa_cause { C_UNKNOWN = 0, C_STARTUP, C_IPC_MESSAGE, C_HA_MESSAGE, C_CCM_CALLBACK, C_LRM_OP_CALLBACK, C_LRM_MONITOR_CALLBACK, C_TIMER_POPPED, C_SHUTDOWN, C_HEARTBEAT_FAILED, C_ILLEGAL }; extern const char *fsa_input2string(int input); extern const char *fsa_state2string(int state); extern const char *fsa_cause2string(int cause); extern const char *fsa_action2string(long long action); #endif diff --git a/crm/crmd/messages.c b/crm/crmd/messages.c index 9b947d3583..e65b488bac 100644 --- a/crm/crmd/messages.c +++ b/crm/crmd/messages.c @@ -1,849 +1,812 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include FILE *msg_in_strm = NULL; FILE *router_strm = NULL; fsa_message_queue_t fsa_message_queue = NULL; gboolean relay_message(xmlNodePtr xml_relay_message, gboolean originated_locally); #ifdef MSG_LOG # define ROUTER_RESULT(x) char *msg_text = dump_xml(xml_relay_message);\ if(router_strm == NULL) { \ router_strm = fopen("/tmp/router.log", "w"); \ } \ fprintf(router_strm, "[%d RESULT (%s)]\t%s\t%s\n", \ AM_I_DC, \ xmlGetProp(xml_relay_message, XML_ATTR_REFERENCE),\ x, msg_text); \ fflush(router_strm); \ cl_free(msg_text); #else # define ROUTER_RESULT(x) CRM_DEBUG(x); #endif /* returns the current head of the FIFO queue */ fsa_message_queue_t put_message(xmlNodePtr new_message) { fsa_message_queue_t next_message = (fsa_message_queue_t) cl_malloc(sizeof(struct fsa_message_queue_s)); CRM_DEBUG("Adding msg to queue"); next_message->message = new_message; next_message->next = NULL; if(fsa_message_queue == NULL) { fsa_message_queue = next_message; } else { fsa_message_queue->next = next_message; } CRM_DEBUG("Added msg to queue"); return fsa_message_queue; } /* returns the next message */ fsa_message_queue_t get_message(void) { fsa_message_queue_t next_message = NULL; if(fsa_message_queue != NULL) { next_message = fsa_message_queue; fsa_message_queue = fsa_message_queue->next; next_message->next = NULL; } return next_message; } /* returns the current head of the FIFO queue */ gboolean is_message(void) { return (fsa_message_queue != NULL && fsa_message_queue->message != NULL); } /* A_MSG_STORE */ enum crmd_fsa_input do_msg_store(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, void *data) { // xmlNodePtr new_message = (xmlNodePtr)data; FNIN(); // put_message(new_message); FNRET(I_NULL); } /* A_MSG_ROUTE */ enum crmd_fsa_input do_msg_route(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, void *data) { enum crmd_fsa_input result = I_NULL; xmlNodePtr xml_message = (xmlNodePtr)data; gboolean routed = FALSE, defer = TRUE, do_process = TRUE; FNIN(); #if 0 // if(cause == C_IPC_MESSAGE) { if (crmd_authorize_message(root_xml_node, msg, curr_client) == FALSE) { CRM_DEBUG("Message not authorized"); do_process = FALSE; } // } #endif if(do_process) { /* try passing the buck first */ routed = relay_message(xml_message, cause==C_IPC_MESSAGE); if(routed == FALSE) { defer = TRUE; /* calculate defer */ result = handle_message(xml_message); switch(result) { case I_NULL: defer = FALSE; break; case I_DC_HEARTBEAT: defer = FALSE; break; /* what else should go here? */ default: CRM_DEBUG("Defering local processing of message"); put_message(xml_message); result = I_REQUEST; break; } } } FNRET(result); } void crmd_ha_input_callback(const struct ha_msg* msg, void* private_data) { const char *from = ha_msg_value(msg, F_ORIG); const char *to = NULL; FNIN(); #ifdef MSG_LOG - if(msg_in_strm == NULL) { msg_in_strm = fopen("/tmp/inbound.log", "w"); } - fprintf(msg_in_strm, "[%s (%s:%s)]\t%s\n", - from, - ha_msg_value(msg, F_SEQ), - ha_msg_value(msg, F_TYPE), - ha_msg_value(msg, "xml") - ); - fflush(msg_in_strm); - #endif if(from == NULL || strcmp(from, fsa_our_uname) == 0) { #ifdef MSG_LOG fprintf(msg_in_strm, "Discarded message [F_SEQ=%s] from ourselves.\n", ha_msg_value(msg, F_SEQ)); #endif FNOUT(); } + +#ifdef MSG_LOG + fprintf(msg_in_strm, "[%s (%s:%s)]\t%s\n", + from, + ha_msg_value(msg, F_SEQ), + ha_msg_value(msg, F_TYPE), + ha_msg_value(msg, "xml") + ); + fflush(msg_in_strm); +#endif xmlNodePtr root_xml_node = find_xml_in_hamessage(msg); to = xmlGetProp(root_xml_node, XML_ATTR_HOSTTO); - -/* - if(AM_I_DC == FALSE && (to == NULL || strlen(to) == 0)) { - CRM_DEBUG2("Discarding message [F_SEQ=%s] for the DC.", - ha_msg_value(msg, F_SEQ)); - FNOUT(); - } -*/ - + if(to != NULL && strlen(to) > 0 && strcmp(to, fsa_our_uname) != 0) { #ifdef MSG_LOG fprintf(msg_in_strm, "Discarding message [F_SEQ=%s] for someone else.", ha_msg_value(msg, F_SEQ)); #endif FNOUT(); } set_xml_property_copy(root_xml_node, XML_ATTR_HOSTFROM, from); s_crmd_fsa(C_HA_MESSAGE, I_ROUTER, root_xml_node); free_xml(root_xml_node); FNOUT(); } /* * Apparently returning TRUE means "stay connected, keep doing stuff". * Returning FALSE means "we're all done, close the connection" */ gboolean crmd_ipc_input_callback(IPC_Channel *client, gpointer user_data) { int lpc = 0; char *buffer = NULL; IPC_Message *msg = NULL; gboolean hack_return_good = TRUE; crmd_client_t *curr_client = (crmd_client_t*)user_data; FNIN(); CRM_DEBUG2("Processing IPC message from %s", curr_client->table_key); while(client->ops->is_message_pending(client)) { if (client->ch_status == IPC_DISCONNECT) { /* The message which was pending for us is that * the IPC status is now IPC_DISCONNECT */ break; } if (client->ops->recv(client, &msg) != IPC_OK) { perror("Receive failure:"); FNRET(!hack_return_good); } if (msg == NULL) { CRM_DEBUG("No message this time"); continue; } lpc++; buffer = (char*)msg->msg_body; CRM_DEBUG3("Processing xml from %s [text=%s]", curr_client->table_key, buffer); xmlNodePtr root_xml_node = find_xml_in_ipcmessage(msg, FALSE); if (root_xml_node != NULL) { if (crmd_authorize_message(root_xml_node, msg, curr_client)) { CRM_DEBUG("Message authorized,about to relay"); s_crmd_fsa(C_IPC_MESSAGE, I_ROUTER, root_xml_node); } else { CRM_DEBUG("Message not authorized"); } } else { cl_log(LOG_INFO, "IPC Message was not valid... discarding."); } free_xml(root_xml_node); msg->msg_done(msg); msg = NULL; buffer = NULL; root_xml_node = NULL; } CRM_DEBUG2("Processed %d messages", lpc); if (client->ch_status == IPC_DISCONNECT) { cl_log(LOG_INFO, "received HUP from %s", curr_client->table_key); if (curr_client != NULL) { struct crm_subsystem_s *the_subsystem = NULL; if (curr_client->sub_sys == NULL) CRM_DEBUG("Client had not registered with us yet"); else if (strcmp(CRM_SYSTEM_PENGINE, curr_client->sub_sys) == 0) the_subsystem = pe_subsystem; else if (strcmp(CRM_SYSTEM_TENGINE, curr_client->sub_sys) == 0) the_subsystem = te_subsystem; else if (strcmp(CRM_SYSTEM_CIB, curr_client->sub_sys) == 0) the_subsystem = cib_subsystem; if(the_subsystem != NULL) { cleanup_subsystem(the_subsystem); } // else that was a transient client if (curr_client->table_key != NULL) { /* * Key is destroyed below: curr_client->table_key * Value is cleaned up by G_main_del_IPC_Channel */ g_hash_table_remove(ipc_clients, curr_client->table_key); } if(curr_client->client_source != NULL) { gboolean det = G_main_del_IPC_Channel(curr_client->client_source); CRM_DEBUG2("crm_client was %s detached", det?"successfully":"not"); } cl_free(curr_client->table_key); cl_free(curr_client->sub_sys); cl_free(curr_client->uuid); cl_free(curr_client); } CRM_DEBUG("this client has now left the building."); FNRET(!hack_return_good); } FNRET(hack_return_good); } /* * This method adds a copy of xml_response_data */ gboolean send_request(xmlNodePtr msg_options, xmlNodePtr msg_data, const char *operation, const char *host_to, const char *sys_to) { gboolean was_sent = FALSE; xmlNodePtr request = NULL; FNIN(); msg_options = set_xml_attr(msg_options, XML_TAG_OPTIONS, XML_ATTR_OP, operation, TRUE); request = create_request(msg_options, msg_data, host_to, sys_to, AM_I_DC?CRM_SYSTEM_DC:CRM_SYSTEM_CRMD, NULL, NULL); // xml_message_debug(request, "Final request..."); was_sent = relay_message(request, TRUE); free_xml(request); FNRET(was_sent); } gboolean relay_message(xmlNodePtr xml_relay_message, gboolean originated_locally) { int is_for_dc = 0; int is_for_dcib = 0; int is_for_crm = 0; int is_for_cib = 0; int is_local = 0; gboolean dont_cc= TRUE; gboolean processing_complete = FALSE; const char *host_to = xmlGetProp(xml_relay_message, XML_ATTR_HOSTTO); const char *sys_to = xmlGetProp(xml_relay_message, XML_ATTR_SYSTO); const char *sys_cc = get_xml_attr(xml_relay_message, XML_TAG_OPTIONS, XML_ATTR_SYSCC, FALSE); FNIN(); if(xml_relay_message == NULL) { cl_log(LOG_ERR, "Cannot route empty message"); FNRET(TRUE); } if(strcmp("hello", xml_relay_message->name) == 0) { /* quietly ignore */ FNRET(TRUE); } if(strcmp(XML_MSG_TAG, xml_relay_message->name) != 0) { xml_message_debug(xml_relay_message, "Bad message type, should be crm_message"); cl_log(LOG_ERR, "Ignoring message of type %s", xml_relay_message->name); FNRET(TRUE); } if(sys_to == NULL) { xml_message_debug(xml_relay_message, "Message did not have any value for sys_to"); cl_log(LOG_ERR, "Message did not have any value for %s", XML_ATTR_SYSTO); FNRET(TRUE); } is_for_dc = (strcmp(CRM_SYSTEM_DC, sys_to) == 0); is_for_dcib = (strcmp(CRM_SYSTEM_DCIB, sys_to) == 0); is_for_cib = (strcmp(CRM_SYSTEM_CIB, sys_to) == 0); is_for_crm = (strcmp(CRM_SYSTEM_CRMD, sys_to) == 0); is_local = 0; if(host_to == NULL || strlen(host_to) == 0) { if(is_for_dc) is_local = 0; else if(is_for_crm && originated_locally) is_local = 0; else is_local = 1; } else if(strcmp(fsa_our_uname, host_to) == 0) { is_local=1; } #if 0 CRM_DEBUG2("is_local %d", is_local); CRM_DEBUG2("is_for_dcib %d", is_for_dcib); CRM_DEBUG2("is_for_dc %d", is_for_dc); CRM_DEBUG2("is_for_crm %d", is_for_crm); CRM_DEBUG2("AM_I_DC %d", AM_I_DC); CRM_DEBUG2("sys_to %s", sys_to); CRM_DEBUG2("host_to %s", host_to); #endif /* if(AM_I_DC && sys_cc != NULL && strcmp(sys_cc, CRM_SYSTEM_DC) == 0) { dont_cc = FALSE; } */ (void)sys_cc; if(is_for_dc || is_for_dcib) { if(AM_I_DC) { ROUTER_RESULT("Message result: DC/CRMd process"); processing_complete = FALSE; // more to be done by caller } else if(originated_locally) { ROUTER_RESULT("Message result: External relay to DC"); send_msg_via_ha(xml_relay_message, NULL); processing_complete = TRUE; } else { ROUTER_RESULT("Message result: Discard, not DC"); processing_complete = TRUE; // discard } } else if(is_local && (is_for_crm || is_for_cib)) { ROUTER_RESULT("Message result: CRMd process"); } else if(is_local) { if(dont_cc) { ROUTER_RESULT("Message result: Local relay"); } else { /* The DC should also get this message */ ROUTER_RESULT("Message result: Local relay with CC"); } send_msg_via_ipc(xml_relay_message, sys_to); processing_complete = TRUE & dont_cc; } else { if(dont_cc) { ROUTER_RESULT("Message result: External relay"); } else { /* The DC should also get this message */ ROUTER_RESULT("Message result: External relay with CC"); } send_msg_via_ha(xml_relay_message, host_to); processing_complete = TRUE & dont_cc; } FNRET(processing_complete); } void send_msg_via_ha(xmlNodePtr action, const char *dest_node) { FNIN(); if (action == NULL) FNOUT(); if (validate_crm_message(action, NULL, NULL, NULL) == NULL) { cl_log(LOG_ERR, "Relay message to (%s) via HA was invalid, ignoring", dest_node); FNOUT(); } // CRM_DEBUG2("Relaying message to (%s) via HA", dest_node); set_xml_property_copy(action, XML_ATTR_HOSTTO, dest_node); send_xmlha_message(fsa_cluster_conn, action); FNOUT(); } void send_msg_via_ipc(xmlNodePtr action, const char *sys) { FNIN(); // cl_log(LOG_DEBUG, "relaying msg to sub_sys=%s via IPC", sys); IPC_Channel *client_channel = (IPC_Channel*)g_hash_table_lookup (ipc_clients, sys); if (client_channel != NULL) { cl_log(LOG_DEBUG, "Sending message via channel %s.", sys); send_xmlipc_message(client_channel, action); } else if(sys != NULL && strcmp(sys, CRM_SYSTEM_CIB) == 0) { cl_log(LOG_ERR, "Sub-system (%s) has been incorporated into the CRMd.", sys); xml_message_debug(action, "Change the way we handle"); relay_message(process_cib_message(action, TRUE), TRUE); } else if(sys != NULL && strcmp(sys, CRM_SYSTEM_LRMD) == 0) { do_lrm_invoke(A_LRM_INVOKE, C_IPC_MESSAGE, fsa_state, I_MESSAGE, action); } else { cl_log(LOG_ERR, "Unknown Sub-system (%s)... discarding message.", sys); } FNOUT(); } gboolean crmd_authorize_message(xmlNodePtr root_xml_node, IPC_Message *client_msg, crmd_client_t *curr_client) { // check the best case first const char *sys_from = xmlGetProp(root_xml_node, XML_ATTR_SYSFROM); char *uuid = NULL; char *client_name = NULL; char *major_version = NULL; char *minor_version = NULL; gpointer table_key = NULL; FNIN(); if (sys_from != NULL) { gboolean can_reply = FALSE; // no-one has registered with this id const char *filtered_from = sys_from; /* The CIB can have two names on the DC */ if(strcmp(sys_from, CRM_SYSTEM_DCIB) == 0) filtered_from = CRM_SYSTEM_CIB; if (g_hash_table_lookup (ipc_clients, filtered_from) != NULL) can_reply = TRUE; // reply can be routed CRM_DEBUG3("Message reply can%s be routed from %s.", can_reply?"":" not", sys_from); FNRET(can_reply); } // otherwise, check if it was a hello message cl_log(LOG_INFO, "received client join msg: %s", (char*)client_msg->msg_body); gboolean result = process_hello_message(client_msg, &uuid, &client_name, &major_version, &minor_version); if (result == TRUE) { // check version int mav = atoi(major_version); int miv = atoi(minor_version); if (mav < 0 || miv < 0) { cl_log(LOG_ERR, "Client version (%d:%d) is not acceptable", mav, miv); result = FALSE; } cl_free(major_version); cl_free(minor_version); } if (result == TRUE) { /* if we already have one of those clients * only applies to te, pe etc. not admin clients */ struct crm_subsystem_s *the_subsystem = NULL; if (client_name == NULL) CRM_DEBUG("Client had not registered with us yet"); else if (strcmp(CRM_SYSTEM_PENGINE, client_name) == 0) the_subsystem = pe_subsystem; else if (strcmp(CRM_SYSTEM_TENGINE, client_name) == 0) the_subsystem = te_subsystem; else if (strcmp(CRM_SYSTEM_CIB, client_name) == 0) the_subsystem = cib_subsystem; if (the_subsystem != NULL) { // do we already have one? result = (fsa_input_register & the_subsystem->flag) == 0; set_bit_inplace(&fsa_input_register, the_subsystem->flag); if(result) { the_subsystem->ipc = curr_client->client_channel; } // else we didnt ask for the client to start } else if(client_name != NULL && uuid != NULL) { table_key = (gpointer) generate_hash_key(client_name, uuid); } else { result = FALSE; cl_log(LOG_ERR, "Bad client details (client_name=%s, uuid=%s)", client_name, uuid); } } if(result == TRUE && table_key == NULL) table_key = (gpointer)cl_strdup(client_name); if (result == TRUE) { cl_log(LOG_INFO, "Accepted client %s", (char*)table_key); curr_client->table_key = table_key; curr_client->sub_sys = cl_strdup(client_name); curr_client->uuid = cl_strdup(uuid); g_hash_table_insert (ipc_clients, table_key, curr_client->client_channel); send_hello_message(curr_client->client_channel, "n/a", CRM_SYSTEM_CRMD, "0", "1"); } else { cl_log(LOG_ERR, "Rejected client logon request"); curr_client->client_channel->ch_status = IPC_DISC_PENDING; } if(uuid != NULL) cl_free(uuid); if(client_name != NULL) cl_free(client_name); /* hello messages should never be processed further */ FNRET(FALSE); } enum crmd_fsa_input handle_message(xmlNodePtr stored_msg) { enum crmd_fsa_input next_input = I_NULL; const char *sys_to = get_xml_attr(stored_msg, NULL, XML_ATTR_SYSTO, TRUE); - const char *sys_from = get_xml_attr(stored_msg, NULL, - XML_ATTR_SYSFROM, TRUE); +// const char *sys_from = get_xml_attr(stored_msg, NULL, +// XML_ATTR_SYSFROM, TRUE); const char *type = get_xml_attr(stored_msg, NULL, XML_ATTR_MSGTYPE, TRUE); const char *op = get_xml_attr(stored_msg, XML_TAG_OPTIONS, XML_ATTR_OP, TRUE); // xml_message_debug(stored_msg, "Processing message"); if(type == NULL || op == NULL) { cl_log(LOG_ERR, "Ignoring message (type=%s), (op=%s)", type, op); xml_message_debug(stored_msg, "Bad message"); } else if(strcmp(type, XML_ATTR_REQUEST) == 0){ if(strcmp(op, CRM_OPERATION_VOTE) == 0) { next_input = I_ELECTION; } else if(strcmp(op, CRM_OPERATION_HBEAT) == 0) { next_input = I_DC_HEARTBEAT; } else if(strcmp(op, CRM_OPERATION_WELCOME) == 0) { next_input = I_WELCOME; } else if(strcmp(op, CRM_OPERATION_SHUTDOWN_REQ) == 0) { next_input = I_CIB_OP; } else if(strcmp(op, CRM_OPERATION_SHUTDOWN) == 0) { next_input = I_TERMINATE; } else if(strcmp(op, CRM_OPERATION_ANNOUNCE) == 0) { next_input = I_NODE_JOIN; } else if(strcmp(op, CRM_OPERATION_REPLACE) == 0 || strcmp(op, CRM_OPERATION_ERASE) == 0) { next_input = I_CIB_OP; fprintf(router_strm, "Message result: CIB Op\n"); } else if(AM_I_DC && (strcmp(op, CRM_OPERATION_CREATE) == 0 || strcmp(op, CRM_OPERATION_UPDATE) == 0 || strcmp(op, CRM_OPERATION_DELETE) == 0)) { /* updates should only be performed on the DC */ next_input = I_CIB_OP; } else if(strcmp(op, CRM_OPERATION_PING) == 0) { /* eventually do some stuff to figure out * if we /are/ ok */ xmlNodePtr ping = createPingAnswerFragment(sys_to, "ok"); xmlNodePtr wrapper = create_reply(stored_msg, ping); relay_message(wrapper, TRUE); free_xml(wrapper); } else { cl_log(LOG_ERR, "Unexpected request (op=%s) sent to the %s", op, AM_I_DC?"DC":"CRMd"); } } else if(strcmp(type, XML_ATTR_RESPONSE) == 0) { if(strcmp(op, CRM_OPERATION_WELCOME) == 0) { next_input = I_WELCOME_ACK; } else if(strcmp(op, CRM_OPERATION_VOTE) == 0 || strcmp(op, CRM_OPERATION_HBEAT) == 0 || strcmp(op, CRM_OPERATION_WELCOME) == 0 || strcmp(op, CRM_OPERATION_SHUTDOWN_REQ) == 0 || strcmp(op, CRM_OPERATION_SHUTDOWN) == 0 || strcmp(op, CRM_OPERATION_ANNOUNCE) == 0) { next_input = I_NULL; } else if(AM_I_DC && (strcmp(op, CRM_OPERATION_CREATE) == 0 || strcmp(op, CRM_OPERATION_UPDATE) == 0 || strcmp(op, CRM_OPERATION_DELETE) == 0 + || strcmp(op, CRM_OPERATION_REPLACE) == 0 || strcmp(op, CRM_OPERATION_ERASE) == 0)) { // perhaps we should do somethign with these replies fprintf(router_strm, "Message result: CIB Reply\n"); next_input = I_CIB_UPDATE; - } else if(AM_I_DC && strcmp(sys_from, CRM_SYSTEM_CIB) == 0) { - - /* this is a reply to our earlier command - * Send it to the relevant node(s) - */ - const char *uname = - get_xml_attr(stored_msg, XML_TAG_OPTIONS, - "forward_to", FALSE); - - xmlNodePtr data = find_xml_node(stored_msg, - XML_TAG_FRAGMENT); - - xmlNodePtr local_options = NULL; - - /* If this is part of join request processing, - * ask for the status section, otherwise just blast - * it down to them. - */ - if(uname != NULL) { - local_options = set_xml_attr(NULL, - XML_TAG_OPTIONS, - XML_ATTR_VERBOSE, - "true", - TRUE); - } - - send_request(local_options, data, CRM_OPERATION_REPLACE, - uname, CRM_SYSTEM_CRMD); - - free_xml(local_options); - } else { cl_log(LOG_ERR, "Unexpected response (op=%s) sent to the %s", op, AM_I_DC?"DC":"CRMd"); } } else { cl_log(LOG_ERR, "Unexpected message type %s", type); } /* CRM_DEBUG3("%s: Next input is %s", __FUNCTION__, */ /* fsa_input2string(next_input)); */ return next_input; } void lrm_op_callback (lrm_op_t* op) { CRM_DEBUG("In lrm_op_callback()"); s_crmd_fsa(C_LRM_OP_CALLBACK, I_LRM_EVENT, op); } void lrm_monitor_callback (lrm_mon_t* monitor) { CRM_DEBUG("In lrm_monitor_callback()"); s_crmd_fsa(C_LRM_MONITOR_CALLBACK, I_LRM_EVENT, monitor); } diff --git a/crm/crmd/subsystems.c b/crm/crmd/subsystems.c index d4fa9a7e19..9496846c6b 100644 --- a/crm/crmd/subsystems.c +++ b/crm/crmd/subsystems.c @@ -1,1066 +1,1084 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include // for access #include #include #include // for calls to open #include // for calls to open #include // for calls to open #include // for getpwuid #include // for initgroups #include // for getrlimit #include // for getrlimit #include #include #include #include #include #include #include #include #include #include #include #define CLIENT_EXIT_WAIT 10 static gboolean stop_subsystem (struct crm_subsystem_s *centry); static gboolean start_subsystem(struct crm_subsystem_s *centry); static gboolean run_command (struct crm_subsystem_s *centry, const char *options, gboolean update_pid); xmlNodePtr do_lrm_query(void); GHashTable *xml2list(xmlNodePtr parent, const char **attr_path, int depth); gboolean lrm_dispatch(int fd, gpointer user_data); void send_cib_status_update(xmlNodePtr update); void send_cib_lrm_update(xmlNodePtr update, gboolean lrm_replace); void do_update_resource(lrm_rsc_t *rsc, int status, int rc, const char *op_type); struct crm_subsystem_s *cib_subsystem = NULL; struct crm_subsystem_s *te_subsystem = NULL; struct crm_subsystem_s *pe_subsystem = NULL; void cleanup_subsystem(struct crm_subsystem_s *the_subsystem) { int pid_status = -1; the_subsystem->ipc = NULL; clear_bit_inplace(&fsa_input_register, the_subsystem->flag); /* Forcing client to die */ kill(the_subsystem->pid, -9); // cleanup the ps entry waitpid(the_subsystem->pid, &pid_status, WNOHANG); the_subsystem->pid = -1; } /* A_CIB_STOP, A_CIB_START, A_CIB_RESTART, */ enum crmd_fsa_input do_cib_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, void *data) { enum crmd_fsa_input result = I_NULL; struct crm_subsystem_s *this_subsys = cib_subsystem; long long stop_actions = A_CIB_STOP; long long start_actions = A_CIB_START; FNIN(); if(action & stop_actions) { // dont do anything, its embedded now } if(action & start_actions) { if(cur_state != S_STOPPING) { if(startCib(CIB_FILENAME) == FALSE) result = I_FAIL; } else { cl_log(LOG_INFO, "Ignoring request to start %s after shutdown", this_subsys->command); } } FNRET(result); } /* A_CIB_INVOKE, A_CIB_BUMPGEN, A_UPDATE_NODESTATUS */ enum crmd_fsa_input do_cib_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, void *data) { xmlNodePtr cib_msg = NULL; FNIN(); if(data != NULL) cib_msg = (xmlNodePtr)data; if(action & A_CIB_INVOKE) { const char *op = xmlGetProp(cib_msg, XML_ATTR_OP); if(safe_str_eq(op, CRM_OPERATION_SHUTDOWN_REQ)){ // create update section xmlNodePtr tmp1 = NULL; xmlNodePtr tmp2 = create_xml_node(NULL, XML_CIB_TAG_STATE); const char *req_from = xmlGetProp(cib_msg, XML_ATTR_HOSTFROM); set_xml_property_copy(tmp1, "id", req_from); set_xml_property_copy(tmp1, "exp_state", "shutdown"); // create fragment tmp1 = create_cib_fragment(tmp2, XML_CIB_TAG_STATUS); // add to cib_msg - xmlAddChild(cib_msg, tmp1); + add_node_copy(cib_msg, tmp1); + + free_xml(tmp2); + free_xml(tmp1); } set_xml_property_copy(cib_msg, XML_ATTR_SYSTO, "cib"); xmlNodePtr answer = process_cib_message(cib_msg, TRUE); if(relay_message(answer, TRUE) == FALSE) { cl_log(LOG_ERR, "Confused what to do with cib result"); xml_message_debug(answer, "Couldnt route: "); } // check the answer, see if we are interested in it also #if 0 if(interested in reply) { put_message(answer); FNRET(I_REQUEST); } #endif free_xml(answer); /* experimental */ } else if(action & A_CIB_INVOKE_LOCAL) { xmlNodePtr answer = process_cib_message(cib_msg, TRUE); put_message(answer); FNRET(I_REQUEST); } else if(action & A_CIB_BUMPGEN) { // check if the response was ok before next bit const char *section = get_xml_attr(cib_msg, XML_TAG_OPTIONS, XML_ATTR_FILTER_TYPE, FALSE); /* set the section so that we dont always send the * whole thing */ xmlNodePtr new_options = set_xml_attr(NULL, XML_TAG_OPTIONS, XML_ATTR_FILTER_TYPE, section, TRUE); xmlNodePtr answer = process_cib_request(CRM_OPERATION_BUMP, new_options, NULL); + free_xml(new_options); + + if(answer == NULL) { + cl_log(LOG_ERR, "Result of BUMP in %s was NULL", + __FUNCTION__); + FNRET(I_FAIL); + } + send_request(NULL, answer, CRM_OPERATION_REPLACE, NULL, CRM_SYSTEM_CRMD); - + free_xml(answer); - free_xml(new_options); } else if(action & A_UPDATE_NODESTATUS) { xmlNodePtr data = do_lrm_query(); send_cib_lrm_update(data, TRUE); free_xml(data); } else { cl_log(LOG_ERR, "Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } FNRET(I_NULL); } /* A_PE_START, A_PE_STOP, A_TE_RESTART */ enum crmd_fsa_input do_pe_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, void *data) { enum crmd_fsa_input result = I_NULL; struct crm_subsystem_s *this_subsys = pe_subsystem; long long stop_actions = A_PE_STOP; long long start_actions = A_PE_START; FNIN(); if(action & stop_actions) { if(stop_subsystem(this_subsys) == FALSE) result = I_FAIL; else if(this_subsys->pid > 0){ int lpc = CLIENT_EXIT_WAIT; int pid_status = -1; while(lpc-- > 0 && this_subsys->pid > 0 && CL_PID_EXISTS(this_subsys->pid)) { sleep(1); waitpid(this_subsys->pid, &pid_status, WNOHANG); } if(CL_PID_EXISTS(this_subsys->pid)) { cl_log(LOG_ERR, "Process %s is still active with pid=%d", this_subsys->command, this_subsys->pid); result = I_FAIL; } } cleanup_subsystem(this_subsys); } if(action & start_actions) { if(cur_state != S_STOPPING) { if(start_subsystem(this_subsys) == FALSE) { result = I_FAIL; cleanup_subsystem(this_subsys); } } else { cl_log(LOG_INFO, "Ignoring request to start %s while shutting down", this_subsys->command); } } FNRET(result); } /* A_PE_INVOKE */ enum crmd_fsa_input do_pe_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, void *data) { FNIN(); stopTimer(integration_timer); cl_log(LOG_ERR, "Action %s (%.16llx) not supported\n", fsa_action2string(action), action); FNRET(I_NULL); } /* A_TE_START, A_TE_STOP, A_TE_RESTART */ enum crmd_fsa_input do_te_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, void *data) { enum crmd_fsa_input result = I_NULL; struct crm_subsystem_s *this_subsys = te_subsystem; long long stop_actions = A_TE_STOP; long long start_actions = A_TE_START; FNIN(); /* if(action & stop_actions && cur_state != S_STOPPING */ /* && is_set(fsa_input_register, R_TE_PEND)) { */ /* result = I_WAIT_FOR_EVENT; */ /* FNRET(result); */ /* } */ if(action & stop_actions) { if(stop_subsystem(this_subsys) == FALSE) result = I_FAIL; else if(this_subsys->pid > 0){ int lpc = CLIENT_EXIT_WAIT; int pid_status = -1; while(lpc-- > 0 && this_subsys->pid > 0 && CL_PID_EXISTS(this_subsys->pid)) { sleep(1); waitpid(this_subsys->pid, &pid_status, WNOHANG); } if(CL_PID_EXISTS(this_subsys->pid)) { cl_log(LOG_ERR, "Process %s is still active with pid=%d", this_subsys->command, this_subsys->pid); result = I_FAIL; } } cleanup_subsystem(this_subsys); } if(action & start_actions) { if(cur_state != S_STOPPING) { if(start_subsystem(this_subsys) == FALSE) { result = I_FAIL; cleanup_subsystem(this_subsys); } } else { cl_log(LOG_INFO, "Ignoring request to start %s while shutting down", this_subsys->command); } } FNRET(result); } /* A_TE_INVOKE */ enum crmd_fsa_input do_te_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, void *data) { FNIN(); cl_log(LOG_ERR, "Action %s (%.16llx) not supported\n", fsa_action2string(action), action); FNRET(I_NULL); } gboolean crmd_client_connect(IPC_Channel *client_channel, gpointer user_data) { FNIN(); CRM_DEBUG("A client tried to connect... and there was much rejoicing."); if (client_channel == NULL) { cl_log(LOG_ERR, "Channel was NULL"); } else if (client_channel->ch_status == IPC_DISCONNECT) { cl_log(LOG_ERR, "Channel was disconnected"); } else { crmd_client_t *blank_client = (crmd_client_t *)cl_malloc(sizeof(crmd_client_t)); if (blank_client == NULL) { cl_log(LOG_ERR, "Could not allocate memory for a blank crmd_client_t"); FNRET(FALSE); } client_channel->ops->set_recv_qlen(client_channel, 100); client_channel->ops->set_send_qlen(client_channel, 100); blank_client->client_channel = client_channel; blank_client->sub_sys = NULL; blank_client->uuid = NULL; blank_client->table_key = NULL; CRM_DEBUG("Adding IPC Channel to main thread."); blank_client->client_source = G_main_add_IPC_Channel(G_PRIORITY_LOW, client_channel, FALSE, crmd_ipc_input_callback, blank_client, default_ipc_input_destroy); } FNRET(TRUE); } static gboolean stop_subsystem(struct crm_subsystem_s* centry) { cl_log(LOG_INFO, "Stopping sub-system \"%s\"", centry->name); if (centry->pid <= 0) { cl_log(LOG_ERR, "OOPS! client %s not running yet", centry->command); } else { cl_log(LOG_INFO, "Sending quit message to %s.", centry->name); send_request(NULL, NULL, "quit", NULL, centry->name); } return TRUE; } static gboolean start_subsystem(struct crm_subsystem_s* centry) { cl_log(LOG_INFO, "Starting sub-system \"%s\"", centry->command); if (centry->pid != 0) { cl_log(LOG_ERR, "OOPS! client %s already running as pid %d" , centry->command, (int) centry->pid); } return run_command(centry, "-r", TRUE); } static gboolean run_command(struct crm_subsystem_s *centry, const char *options, gboolean update_pid) { pid_t pid; /* * We need to ensure that the exec will succeed before * we bother forking. We don't want to respawn something that * won't exec in the first place. */ if (access(centry->path, F_OK|X_OK) != 0) { cl_perror("Cannot (access) exec %s", centry->path); return FALSE; } struct stat buf; int s_res = stat(centry->command, &buf); if(s_res != 0) { cl_perror("Cannot (stat) exec %s", centry->command); return FALSE; } /* We need to fork so we can make child procs not real time */ switch(pid=fork()) { case -1: cl_log(LOG_ERR , "start_a_child_client: Cannot fork."); return FALSE; default: /* Parent */ #if 0 NewTrackedProc(pid, 1, PT_LOGVERBOSE , centry, &ManagedChildTrackOps); #else if(update_pid) centry->pid = pid; #endif return TRUE; case 0: /* Child */ break; } /* Child process: start the managed child */ cl_make_normaltime(); setpgid(0,0); /* Limit peak resource usage, maximize success chances */ if (centry->shortrcount > 0) { alarm(0); sleep(1); } char *cmd_with_options = NULL; int size = strlen(options); size += strlen(centry->command); size += 2; // ' ' + \0 cmd_with_options = cl_malloc((1+size)*sizeof(char)); sprintf(cmd_with_options, "%s %s", centry->command, options); cmd_with_options[size] = 0; cl_log(LOG_INFO, "Executing \"%s\" (pid %d)", cmd_with_options, (int) getpid()); if(CL_SIGINTERRUPT(SIGALRM, 0) < 0) { cl_perror("Cannot set interrupt for child process %s", cmd_with_options); }else{ const char * devnull = "/dev/null"; unsigned int j; struct rlimit oflimits; CL_SIGNAL(SIGCHLD, SIG_DFL); alarm(0); CL_IGNORE_SIG(SIGALRM); /* A precautionary measure */ getrlimit(RLIMIT_NOFILE, &oflimits); for (j=0; j < oflimits.rlim_cur; ++j) { close(j); } (void)devnull; (void)open(devnull, O_RDONLY); /* Stdin: fd 0 */ (void)open(devnull, O_WRONLY); /* Stdout: fd 1 */ (void)open(devnull, O_WRONLY); /* Stderr: fd 2 */ (void)execl("/bin/sh", "sh", "-c", cmd_with_options, (const char *)NULL); /* Should not happen */ cl_perror("Cannot exec %s", cmd_with_options); } /* Suppress respawning */ exit(100); // never reached return TRUE; } /* A_LRM_CONNECT */ enum crmd_fsa_input do_lrm_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, void *data) { enum crmd_fsa_input failed = I_NULL;//I_FAIL; int ret = HA_OK; FNIN(); if(action & A_LRM_DISCONNECT) { fsa_lrm_conn->lrm_ops->signoff(fsa_lrm_conn); } if(action & A_LRM_CONNECT) { CRM_DEBUG("LRM: connect..."); fsa_lrm_conn = ll_lrm_new("lrm"); if(NULL == fsa_lrm_conn) { return failed; } CRM_DEBUG("LRM: sigon..."); ret = fsa_lrm_conn->lrm_ops->signon(fsa_lrm_conn, "crmd"); if(ret != HA_OK) { cl_log(LOG_ERR, "Failed to sign on to the LRM"); return failed; } CRM_DEBUG("LRM: set_lrm_callback..."); ret = fsa_lrm_conn->lrm_ops->set_lrm_callback(fsa_lrm_conn, lrm_op_callback, lrm_monitor_callback); if(ret != HA_OK) { cl_log(LOG_ERR, "Failed to set LRM callbacks"); return failed; } /* TODO: create a destroy handler that causes * some recovery to happen */ G_main_add_fd(G_PRIORITY_LOW, fsa_lrm_conn->lrm_ops->inputfd(fsa_lrm_conn), FALSE, lrm_dispatch, fsa_lrm_conn, default_ipc_input_destroy); } if(action & ~(A_LRM_CONNECT|A_LRM_DISCONNECT)) { cl_log(LOG_ERR, "Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } FNRET(I_NULL); } gboolean lrm_dispatch(int fd, gpointer user_data) { ll_lrm_t *lrm = (ll_lrm_t*)user_data; lrm->lrm_ops->rcvmsg(lrm, FALSE); return TRUE; } xmlNodePtr do_lrm_query(void) { GList* lrm_list = NULL; xmlNodePtr data = create_xml_node(NULL, "lrm"); xmlNodePtr agent_list = create_xml_node(data, "lrm_agents"); lrm_list = fsa_lrm_conn->lrm_ops->get_ra_supported(fsa_lrm_conn); if (NULL != lrm_list) { GList* element = g_list_first(lrm_list); while (NULL != element) { char *rsc_type = (char*)element->data; xmlNodePtr agent = create_xml_node(agent_list, "lrm_agent"); set_xml_property_copy(agent, "class", rsc_type); /* we dont have these yet */ set_xml_property_copy(agent, "type", NULL); set_xml_property_copy(agent, "version", NULL); element = g_list_next(element); } } g_list_free(lrm_list); lrm_list = fsa_lrm_conn->lrm_ops->get_all_rscs(fsa_lrm_conn); xmlNodePtr rsc_list = create_xml_node(data, "lrm_resources"); GList* element = NULL; if (NULL != lrm_list) { element = g_list_first(lrm_list); } while (NULL != element) { lrm_rsc_t *the_rsc = (lrm_rsc_t*)element->data; /* const char* ra_type; */ /* GHashTable* params; */ xmlNodePtr xml_rsc = create_xml_node(rsc_list, "rsc_state"); set_xml_property_copy(xml_rsc, "id", the_rsc->id); set_xml_property_copy(xml_rsc, "rsc_id", the_rsc->name); set_xml_property_copy(xml_rsc, "node_id",fsa_our_uname); state_flag_t cur_state = 0; CRM_DEBUG("get_cur_state..."); GList* op_list = the_rsc->ops->get_cur_state(the_rsc, &cur_state); CRM_DEBUG2("\tcurrent state:%s\n", cur_state==LRM_RSC_IDLE?"Idel":"Busy"); const char *this_op = NULL; GList* node = g_list_first(op_list); while(NULL != node){ lrm_op_t* op = (lrm_op_t*)node->data; this_op = op->op_type; if(this_op == NULL || strcmp(this_op, "status") != 0){ const char *status_text = ""; switch(op->status) { case LRM_OP_DONE: status_text = "done"; break; case LRM_OP_CANCELLED: status_text = "cancelled"; break; case LRM_OP_TIMEOUT: status_text = "timeout"; break; case LRM_OP_NOTSUPPORTED: status_text = "not suported"; break; case LRM_OP_ERROR: status_text = "error"; break; } set_xml_property_copy(xml_rsc, "op_result", status_text); set_xml_property_copy(xml_rsc, "rsc_op", this_op); // we only want the last one break; } node = g_list_next(node); } element = g_list_next(element); } if (NULL != lrm_list) { g_list_free(lrm_list); } return data; } /* A_LRM_INVOKE */ enum crmd_fsa_input do_lrm_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, void *data) { FNIN(); cl_log(LOG_ERR, "Action %s (%.16llx) not supported\n", fsa_action2string(action), action); xmlNodePtr msg = (xmlNodePtr)data; const char *rsc_path[] = { "msg_data", "rsc_op", "resource", "instance_attributes", "parameters" }; const char *operation = get_xml_attr_nested(msg, rsc_path, DIMOF(rsc_path) -3, "operation", TRUE); rsc_id_t rid; const char *id_from_cib = get_xml_attr_nested(msg, rsc_path, DIMOF(rsc_path) -2, "id", TRUE); // only the first 16 chars are used by the LRM strncpy(rid, id_from_cib, 16); const char *crm_op = get_xml_attr(msg, XML_TAG_OPTIONS, "operation", TRUE); lrm_rsc_t *rsc = fsa_lrm_conn->lrm_ops->get_rsc( fsa_lrm_conn, rid); if(crm_op != NULL && strcmp(crm_op, "lrm_query") == 0) { xmlNodePtr data, tmp1, tmp2, reply; tmp1 = create_xml_node(NULL, XML_CIB_TAG_STATE); set_xml_property_copy(tmp1, XML_ATTR_ID, fsa_our_uname); data = create_cib_fragment(tmp1, NULL); tmp2 = do_lrm_query(); add_node_copy(tmp1, tmp2); reply = create_reply(msg, data); relay_message(reply, TRUE); + free_xml(data); free_xml(reply); free_xml(tmp2); free_xml(tmp1); } else if(operation != NULL && strcmp(operation, "monitor") == 0) { if(rsc == NULL) { cl_log(LOG_ERR, "Could not find resource to monitor"); FNRET(I_FAIL); } lrm_mon_t* mon = g_new(lrm_mon_t, 1); mon->op_type = "status"; mon->params = NULL; mon->timeout = 0; mon->user_data = rsc; mon->mode = LRM_MONITOR_SET; mon->interval = 2; mon->target = 1; rsc->ops->set_monitor(rsc,mon); mon = g_new(lrm_mon_t, 1); } else if(operation != NULL) { if(rsc == NULL) { // add it to the list CRM_DEBUG("add_rsc..."); fsa_lrm_conn->lrm_ops->add_rsc( fsa_lrm_conn, rid, get_xml_attr_nested(msg, rsc_path, DIMOF(rsc_path) -2, "class", TRUE), get_xml_attr_nested(msg, rsc_path, DIMOF(rsc_path) -2, "type", TRUE), NULL); rsc = fsa_lrm_conn->lrm_ops->get_rsc( fsa_lrm_conn, rid); } if(rsc == NULL) { cl_log(LOG_ERR, "Could not add resource to LRM"); FNRET(I_FAIL); } // now do the op CRM_DEBUG2("performing op %s...", operation); lrm_op_t* op = g_new(lrm_op_t, 1); op->op_type = operation; op->params = xml2list(msg, rsc_path, DIMOF(rsc_path)); op->timeout = 0; op->user_data = rsc; rsc->ops->perform_op(rsc, op); } /* while (TRUE) { */ /* lrm->lrm_ops->rcvmsg(lrm,TRUE); */ /* } */ FNRET(I_NULL); } GHashTable * xml2list(xmlNodePtr parent, const char**attr_path, int depth) { xmlNodePtr node_iter = NULL; GHashTable *nvpair_hash = g_hash_table_new(&g_str_hash, &g_str_equal); xmlNodePtr nvpair_list = find_xml_node_nested(parent, attr_path, depth); if(nvpair_list != NULL){ node_iter = nvpair_list->children; while(node_iter != NULL) { const char *key = xmlGetProp(node_iter, "name"); const char *value = xmlGetProp(node_iter, "value"); CRM_DEBUG3("Added %s=%s", key, value); g_hash_table_insert (nvpair_hash, cl_strdup(key), cl_strdup(value)); node_iter = node_iter->next; } } return nvpair_hash; } void do_update_resource(lrm_rsc_t *rsc, int status, int rc, const char *op_type) { /* */ xmlNodePtr update, iter; update = create_xml_node(NULL, "node_state"); set_xml_property_copy(update, XML_ATTR_ID, fsa_our_uname); iter = create_xml_node(update, "lrm"); iter = create_xml_node(iter, "lrm_resources"); iter = create_xml_node(iter, "lrm_resource"); set_xml_property_copy(iter, XML_ATTR_ID, rsc->id); set_xml_property_copy(iter, "last_op", op_type); char *tmp = crm_itoa(status); set_xml_property_copy(iter, "op_status", tmp); cl_free(tmp); tmp = crm_itoa(rc); set_xml_property_copy(iter, "op_code", tmp); cl_free(tmp); send_cib_lrm_update(update, FALSE); free_xml(update); } enum crmd_fsa_input do_lrm_event(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, void *data) { FNIN(); if(cause == C_LRM_MONITOR_CALLBACK) { lrm_mon_t* monitor = (lrm_mon_t*)data; lrm_rsc_t* rsc = monitor->rsc; switch(monitor->status) { case LRM_OP_DONE: CRM_DEBUG("An LRM monitor operation passed"); FNRET(I_NULL); break; case LRM_OP_CANCELLED: case LRM_OP_TIMEOUT: case LRM_OP_NOTSUPPORTED: case LRM_OP_ERROR: cl_log(LOG_ERR, "An LRM monitor operation failed" " or was aborted"); do_update_resource(rsc, monitor->status, monitor->rc, monitor->op_type); break; } } else if(cause == C_LRM_OP_CALLBACK) { lrm_op_t* op = (lrm_op_t*)data; lrm_rsc_t* rsc = op->rsc; switch(op->status) { case LRM_OP_CANCELLED: case LRM_OP_TIMEOUT: case LRM_OP_NOTSUPPORTED: case LRM_OP_ERROR: cl_log(LOG_ERR, "An LRM operation failed" " or was aborted"); // keep going case LRM_OP_DONE: do_update_resource(rsc, op->status, op->rc, op->op_type); break; } } else { FNRET(I_FAIL); } FNRET(I_NULL); } /* * If lrm_replace is TRUE, then we keep the update local * (Ie. we dont send it to the DC) because that will be handled by * another part of the JoinProtocol at the correct point. */ void send_cib_lrm_update(xmlNodePtr update, gboolean lrm_replace) { xmlNodePtr fragment, tmp1, tmp2; tmp1 = create_xml_node(NULL, XML_CIB_TAG_STATE); set_xml_property_copy(tmp1, XML_ATTR_ID, fsa_our_uname); fragment = create_cib_fragment(tmp1, NULL); if(lrm_replace) { tmp2 = create_xml_node(tmp1, "lrm"); process_cib_request(CRM_OPERATION_DELETE, NULL, fragment); free_xml(tmp2); add_node_copy(tmp1, update); process_cib_request(CRM_OPERATION_UPDATE, NULL, fragment); } else { add_node_copy(tmp1, update); send_cib_status_update(tmp1); } - free_xml(fragment); // takes tmp1 with it + free_xml(fragment); + free_xml(tmp1); } void send_cib_status_update(xmlNodePtr update) { xmlNodePtr answer; xmlNodePtr fragment = create_cib_fragment(update, NULL); xmlNodePtr options = create_xml_node(NULL, XML_TAG_OPTIONS); set_xml_property_copy(options, XML_ATTR_VERBOSE, "true"); // set verbose answer = process_cib_request(CRM_OPERATION_UPDATE, options, fragment); // distribute the answer if(AM_I_DC) { xmlNodePtr new_options = set_xml_attr(NULL, XML_TAG_OPTIONS, XML_ATTR_FILTER_TYPE, XML_CIB_TAG_STATUS, TRUE); + free_xml(answer); answer = process_cib_request(CRM_OPERATION_BUMP, new_options, NULL); - send_request(NULL, answer, CRM_OPERATION_REPLACE, - NULL, CRM_SYSTEM_CRMD); - free_xml(new_options); + if(answer == NULL) { + cl_log(LOG_ERR, "Result of BUMP in %s was NULL", + __FUNCTION__); + } else { + send_request(NULL, answer, CRM_OPERATION_REPLACE, + NULL, CRM_SYSTEM_CRMD); + } } else { send_request(NULL, answer, CRM_OPERATION_UPDATE, NULL, CRM_SYSTEM_DCIB); } + free_xml(fragment); free_xml(answer); }