diff --git a/Makefile.am b/Makefile.am index 2bc2bab6c7..2b4c598bc3 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,140 +1,140 @@ # # Pacemaker code # # Copyright (C) 2004 Andrew Beekhof # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # EXTRA_DIST = bootstrap ConfigureMe README.in libltdl.tar RPM = @RPM@ RPMFLAGS = -ba TARFILE = pacemaker.tar.gz AM_TAR = tar LAST_RELEASE = Pacemaker-0.6.3 STABLE_SERIES = stable-0.6 AUTOMAKE_OPTIONS = foreign ##ACLOCAL = aclocal -I $(auxdir) PRETTY_ARGS = --braces-on-if-line --braces-on-struct-decl-line --cuddle-do-while --cuddle-else --leave-preprocessor-space --blank-lines-after-declarations --blank-lines-after-procedures -sc --case-indentation4 --no-space-after-function-call-names --no-blank-lines-after-commas --procnames-start-lines --leave-optional-blank-lines --indent-level4 --line-length100 --break-before-boolean-operator --ignore-newlines --no-space-after-if --no-space-after-for --no-space-after-while --paren-indentation4 MAINTAINERCLEANFILES = Makefile.in aclocal.m4 configure DRF/config-h.in \ DRF/stamp-h.in libtool.m4 ltdl.m4 libltdl.tar -SUBDIRS = debian build replace include lib cib pengine crmd crm tools doc cts xml +SUBDIRS = debian build replace include lib cib pengine crmd crm fencing tools doc cts xml tgz: rm -f $(TARFILE) hg archive -t tgz $(TARFILE) echo Rebuilt $(TARFILE) on `date` changes: printf "$(PACKAGE) ($(VERSION)-1) stable; urgency=medium\n" printf " * Update source tarball to revision: `hg id`\n" printf " * Statistics:\n" printf " Changesets: `hg log -M --template "{desc|firstline|strip}\n" -r $(LAST_RELEASE):tip | wc -l`\n" printf " Diff: " hg diff -r $(LAST_RELEASE):tip | diffstat | tail -n 1 printf "\n * Testing Notes:\n" printf "\n + Test hardware:\n" printf "\n + All testing was performed with STONITH enabled\n" printf "\n + Pending bugs encountered during testing:\n" printf "\n * Changes since $(LAST_RELEASE)\n" hg log -M --template " + {desc|firstline|strip}\n" -r $(LAST_RELEASE):tip | grep -v Low: | sort -uf printf "\n -- Andrew Beekhof `date +"%a, %d %b %Y %T %z"`\n" features: printf "$(PACKAGE) ($(VERSION)-1) unstable; urgency=medium\n" printf " * Update source tarball to revision: `hg id`\n" printf " * Statistics:\n" printf " Changesets: `hg out -M --template "{desc|firstline|strip}\n" ../$(STABLE_SERIES) | wc -l`\n" printf " Diff: " hg out -M -p ../$(STABLE_SERIES) | diffstat | tail -n 1 printf "\n * Changes added since $(STABLE_SERIES)\n" hg out -M --template " + {desc|firstline|strip}\n" ../$(STABLE_SERIES) | grep -v Low: | sort -uf printf "\n -- Andrew Beekhof `date +"%a, %d %b %Y %T %z"`\n" obs: tgz make changes > .changes scp .changes $(TARFILE) vmhost.beekhof.net:Development/obs/server:ha-clustering/pacemaker/ dev: tgz make features > .changes scp .changes $(TARFILE) vmhost.beekhof.net:Development/obs/server:ha-clustering:UNSTABLE/pacemaker/ home: tgz make changes > .changes scp .changes $(TARFILE) vmhost.beekhof.net:Development/obs/home:beekhof/pacemaker-test/ global: clean-generic gtags -q global-html: global htags -sanhIT global-www: global-html rsync -avzxlSD --progress HTML/ root@clusterlabs.org:/var/lib/global/pacemaker pretty: for file in `find . -name "*.c" | tr '\n' ' '`; do \ gnuindent $(PRETTY_ARGS) $$file; \ done rpmtgz: tgz echo "Installing $(TARFILE) into /usr/src/packages/SOURCES for rpm" -test -d /usr/src/packages/SOURCES && cp $(TARFILE) /usr/src/packages/SOURCES/ -test -d /usr/src/redhat/SOURCES && cp $(TARFILE) /usr/src/redhat/SOURCES/ rpm: rpmtgz $(RPM) $(RPMFLAGS) $(top_srcdir)/pacemaker.spec * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_GETOPT_H # include #endif void usage(const char *cmd, int exit_status); gboolean BE_QUIET = FALSE; gboolean DO_WRITE = TRUE; gboolean DO_DELETE = FALSE; char *dest_node = NULL; char *set_name = NULL; char *attr_id = NULL; char *attr_name = NULL; const char *type = NULL; const char *rsc_id = NULL; const char *dest_uname = NULL; const char *attr_value = NULL; #define OPTARGS "V?GDQN:U:u:s:n:v:l:t:i:!r:" int main(int argc, char **argv) { gboolean is_done = FALSE; cib_t * the_cib = NULL; enum cib_errors rc = cib_ok; int cib_opts = cib_sync_call; int argerr = 0; int flag; #ifdef HAVE_GETOPT_H int option_index = 0; static struct option long_options[] = { /* Top-level Options */ {"verbose", 0, 0, 'V'}, {"help", 0, 0, '?'}, {"quiet", 0, 0, 'Q'}, {"get-value", 0, 0, 'G'}, {"delete-attr", 0, 0, 'D'}, {"node", 1, 0, 'N'}, {"node-uname", 1, 0, 'U'}, /* legacy */ {"node-uuid", 1, 0, 'u'}, /* legacy */ {"set-name", 1, 0, 's'}, {"attr-id", 1, 0, 'i'}, {"attr-name", 1, 0, 'n'}, {"attr-value", 1, 0, 'v'}, {"resource-id", 1, 0, 'r'}, {"lifetime", 1, 0, 'l'}, {"type", 1, 0, 't'}, {"inhibit-policy-engine", 0, 0, '!'}, {0, 0, 0, 0} }; #endif crm_log_init(basename(argv[0]), LOG_ERR, FALSE, FALSE, argc, argv); if(argc < 2) { usage(crm_system_name, LSB_EXIT_EINVAL); } while (1) { #ifdef HAVE_GETOPT_H flag = getopt_long(argc, argv, OPTARGS, long_options, &option_index); #else flag = getopt(argc, argv, OPTARGS); #endif if (flag == -1) break; switch(flag) { case 'V': cl_log_enable_stderr(TRUE); alter_debug(DEBUG_INC); break; case '?': usage(crm_system_name, LSB_EXIT_OK); break; case 'G': DO_WRITE = FALSE; break; case 'Q': BE_QUIET = TRUE; break; case 'D': DO_DELETE = TRUE; + DO_WRITE = FALSE; break; case 'U': case 'N': crm_debug_2("Option %c => %s", flag, optarg); dest_uname = optarg; break; case 'u': crm_debug_2("Option %c => %s", flag, optarg); dest_node = crm_strdup(optarg); break; case 's': crm_debug_2("Option %c => %s", flag, optarg); set_name = crm_strdup(optarg); break; case 'l': crm_debug_2("Option %c => %s", flag, optarg); type = optarg; break; case 't': crm_debug_2("Option %c => %s", flag, optarg); type = optarg; break; case 'n': crm_debug_2("Option %c => %s", flag, optarg); attr_name = crm_strdup(optarg); break; case 'i': crm_debug_2("Option %c => %s", flag, optarg); attr_id = crm_strdup(optarg); break; case 'v': crm_debug_2("Option %c => %s", flag, optarg); attr_value = optarg; break; case 'r': crm_debug_2("Option %c => %s", flag, optarg); rsc_id = optarg; break; case '!': crm_warn("Inhibiting notifications for this update"); cib_opts |= cib_inhibit_notify; break; default: printf("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag); ++argerr; break; } } if (optind < argc) { printf("non-option ARGV-elements: "); while (optind < argc) printf("%s ", argv[optind++]); printf("\n"); } if (optind > argc) { ++argerr; } if (argerr) { usage(crm_system_name, LSB_EXIT_GENERIC); } the_cib = cib_new(); rc = the_cib->cmds->signon(the_cib, crm_system_name, cib_command); if(rc != cib_ok) { fprintf(stderr, "Error signing on to the CIB service: %s\n", cib_error2string(rc)); return rc; } if(safe_str_eq(crm_system_name, "crm_attribute") && type == NULL && dest_uname == NULL) { /* we're updating cluster options - dont populate dest_node */ type = XML_CIB_TAG_CRMCONFIG; } else if(dest_uname == NULL) { struct utsname name; if(uname(&name) < 0) { cl_perror("uname(2) call failed"); return 1; } dest_uname = name.nodename; crm_info("Detected uname: %s", dest_uname); } if(dest_node == NULL && dest_uname != NULL) { rc = query_node_uuid(the_cib, dest_uname, &dest_node); if(rc != cib_ok) { fprintf(stderr,"Could not map uname=%s to a UUID: %s\n", dest_uname, cib_error2string(rc)); return rc; } else { crm_info("Mapped %s to %s", dest_uname, crm_str(dest_node)); } } if(safe_str_eq(crm_system_name, "crm_master")) { int len = 0; if(safe_str_eq(type, "reboot")) { type = XML_CIB_TAG_STATUS; } else { type = XML_CIB_TAG_NODES; } rsc_id = getenv("OCF_RESOURCE_INSTANCE"); if(rsc_id == NULL && dest_node == NULL) { fprintf(stderr, "This program should only ever be " "invoked from inside an OCF resource agent.\n"); fprintf(stderr, "DO NOT INVOKE MANUALLY FROM THE COMMAND LINE.\n"); return 1; } else if(dest_node == NULL) { fprintf(stderr, "Could not determine node UUID.\n"); return 1; } else if(rsc_id == NULL) { fprintf(stderr, "Could not determine resource name.\n"); return 1; } len = 8 + strlen(rsc_id); crm_malloc0(attr_name, len); snprintf(attr_name, len, "master-%s", rsc_id); len = 3 + strlen(type) + strlen(attr_name) + strlen(dest_node); crm_malloc0(attr_id, len); snprintf(attr_id, len, "%s-%s-%s", type, attr_name, dest_node); len = 8 + strlen(dest_node); crm_malloc0(set_name, len); snprintf(set_name, len, "master-%s", dest_node); } else if(safe_str_eq(crm_system_name, "crm_failcount")) { type = XML_CIB_TAG_STATUS; if(rsc_id == NULL) { fprintf(stderr,"Please specify a resource with -r\n"); return 1; } if(dest_node == NULL) { fprintf(stderr,"Please specify a node with -U or -u\n"); return 1; } - if(char2score(attr_value) <= 0){ + if(DO_WRITE && char2score(attr_value) <= 0) { if(safe_str_neq(attr_value, "0")) { fprintf(stderr,"%s is an inappropriate value for a failcount.\n", attr_value); return 1; } } set_name = NULL; attr_name = crm_concat("fail-count", rsc_id, '-'); } else if(safe_str_eq(crm_system_name, "crm_standby")) { if(dest_node == NULL) { fprintf(stderr,"Please specify a node with -U or -u\n"); return 1; } else if(DO_DELETE) { rc = delete_standby( the_cib, dest_node, type, attr_value); } else if(DO_WRITE) { if(attr_value == NULL) { fprintf(stderr, "Please specify 'true' or 'false' with -v\n"); return 1; } rc = set_standby(the_cib, dest_node, type, attr_value); } else { char *read_value = NULL; char *scope = NULL; if(type) { scope = crm_strdup(type); } rc = query_standby( the_cib, dest_node, &scope, &read_value); if(BE_QUIET == FALSE) { if(attr_id) { fprintf(stdout, "id=%s ", attr_id); } if(attr_name) { fprintf(stdout, "name=%s ", attr_name); } fprintf(stdout, "scope=%s value=%s\n", scope, read_value?read_value:"(null)"); } else if(read_value != NULL) { fprintf(stdout, "%s\n", read_value); } else if(rc == cib_NOTEXISTS) { fprintf(stdout, "off\n"); rc = cib_ok; } crm_free(scope); } is_done = TRUE; } else if (type == NULL) { type = XML_CIB_TAG_NODES; return 1; } if(safe_str_eq(type, XML_CIB_TAG_CRMCONFIG)) { dest_node = NULL; } if(is_done) { } else if(DO_DELETE) { rc = delete_attr(the_cib, cib_opts, type, dest_node, set_name, attr_id, attr_name, attr_value, TRUE); if(rc == cib_NOTEXISTS) { /* Nothing to delete... * which means its not there... * which is what the admin wanted */ rc = cib_ok; } else if(rc != cib_missing_data && safe_str_eq(crm_system_name, "crm_failcount")) { char *now_s = NULL; time_t now = time(NULL); now_s = crm_itoa(now); update_attr(the_cib, cib_sync_call, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, "last-lrm-refresh", now_s, TRUE); crm_free(now_s); } } else if(DO_WRITE) { CRM_DEV_ASSERT(type != NULL); CRM_DEV_ASSERT(attr_name != NULL); CRM_DEV_ASSERT(attr_value != NULL); rc = update_attr(the_cib, cib_opts, type, dest_node, set_name, attr_id, attr_name, attr_value, TRUE); } else { char *read_value = NULL; rc = read_attr(the_cib, type, dest_node, set_name, attr_id, attr_name, &read_value, TRUE); if(rc == cib_NOTEXISTS && safe_str_eq(crm_system_name, "crm_failcount")) { rc = cib_ok; read_value = crm_strdup("0"); } crm_info("Read %s=%s %s%s", attr_name, crm_str(read_value), set_name?"in ":"", set_name?set_name:""); if(rc == cib_missing_data) { rc = cib_ok; } else if(BE_QUIET == FALSE) { fprintf(stdout, "%s%s %s%s value=%s\n", attr_id?"id=":"", attr_id?attr_id:"", attr_name?"name=":"", attr_name?attr_name:"", read_value?read_value:"(null)"); } else if(read_value != NULL) { fprintf(stdout, "%s\n", read_value); } } the_cib->cmds->signoff(the_cib); if(rc == cib_missing_data) { printf("Please choose from one of the matches above and suppy the 'id' with --attr-id\n"); } else if(rc != cib_ok) { fprintf(stderr, "Error performing operation: %s\n", cib_error2string(rc)); } return rc; } void usage(const char *cmd, int exit_status) { FILE *stream; stream = exit_status ? stderr : stdout; if(safe_str_eq(cmd, "crm_master")) { fprintf(stream, "usage: %s [-?VQ] -(D|G|v) [-l]\n", cmd); } else if(safe_str_eq(cmd, "crm_standby")) { fprintf(stream, "usage: %s [-?V] -(u|U) -(D|G|v) [-l]\n", cmd); } else if(safe_str_eq(cmd, "crm_failcount")) { fprintf(stream, "usage: %s [-?V] -(u|U) -(D|G|v) -r\n", cmd); } else { fprintf(stream, "usage: %s [-?V] -(D|G|v) [options]\n", cmd); } fprintf(stream, "Options\n"); fprintf(stream, "\t--%s (-%c)\t: this help message\n", "help", '?'); fprintf(stream, "\t--%s (-%c)\t: " "turn on debug info. additional instances increase verbosity\n", "verbose", 'V'); fprintf(stream, "\t--%s (-%c)\t: Print only the value on stdout" " (use with -G)\n", "quiet", 'Q'); fprintf(stream, "\t--%s (-%c)\t: " "Retrieve rather than set the %s\n", "get-value", 'G', safe_str_eq(cmd, "crm_master")?"named attribute":"preference to be promoted"); fprintf(stream, "\t--%s (-%c)\t: " "Delete rather than set the attribute\n", "delete-attr", 'D'); fprintf(stream, "\t--%s (-%c) \t: " "Value to use (ignored with -G)\n", "attr-value", 'v'); fprintf(stream, "\t--%s (-%c) \t: " "The 'id' of the attribute. Advanced use only.\n", "attr-id", 'i'); if(safe_str_eq(cmd, "crm_master")) { fprintf(stream, "\t--%s (-%c) \t: " "How long the preference lasts (reboot|forever)\n", "lifetime", 'l'); exit(exit_status); } else if(safe_str_eq(cmd, "crm_standby")) { fprintf(stream, "\t--%s (-%c) \t: " "uname of the node to change\n", "node", 'N'); fprintf(stream, "\t--%s (-%c) \t: " "How long the preference lasts (reboot|forever)\n" "\t If a forever value exists, it is ALWAYS used by the CRM\n" "\t instead of any reboot value\n", "lifetime", 'l'); exit(exit_status); } fprintf(stream, "\t--%s (-%c) \t: " "uname of the node to change\n", "node-uname", 'h'); if(safe_str_eq(cmd, "crm_failcount")) { fprintf(stream, "\t--%s (-%c) \t: " "name of the resource to operate on\n", "resource-id", 'r'); } else { fprintf(stream, "\t--%s (-%c) \t: " "Set of attributes in which to read/write the attribute\n", "set-name", 's'); fprintf(stream, "\t--%s (-%c) \t: " "Attribute to set\n", "attr-name", 'n'); fprintf(stream, "\t--%s (-%c) \t: " "Which section of the CIB to set the attribute: (%s|%s|%s)\n", "type", 't', XML_CIB_TAG_NODES, XML_CIB_TAG_STATUS, XML_CIB_TAG_CRMCONFIG); fprintf(stream, "\t -t=%s options: -N -n [-s]\n", XML_CIB_TAG_NODES); fprintf(stream, "\t -t=%s options: -N -n [-s]\n", XML_CIB_TAG_STATUS); fprintf(stream, "\t -t=%s options: -N [-s]\n", XML_CIB_TAG_CRMCONFIG); } if(safe_str_neq(crm_system_name, "crm_standby")) { fprintf(stream, "\t--%s (-%c)\t: " "Make a change and prevent the TE/PE from seeing it straight away.\n" "\t You may think you want this option but you don't." " Advanced use only - you have been warned!\n", "inhibit-policy-engine", '!'); } fflush(stream); exit(exit_status); } diff --git a/crm/ais/plugin.c b/crm/ais/plugin.c index ebe6719164..e2444083fc 100644 --- a/crm/ais/plugin.c +++ b/crm/ais/plugin.c @@ -1,1174 +1,1182 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "plugin.h" #include "utils.h" #include #define OPENAIS_EXTERNAL_SERVICE insane_ais_header_hack_in__totem_h #include #include #include #include #include #include #include #include #include #include #include #include int plugin_log_level = LOG_DEBUG; char *local_uname = NULL; int local_uname_len = 0; unsigned int local_nodeid = 0; char *ipc_channel_name = NULL; unsigned long long membership_seq = 0; pthread_t crm_wait_thread; gboolean wait_active = TRUE; GHashTable *membership_list = NULL; #define MAX_RESPAWN 100 #define crm_flag_none 0x00000000 #define crm_flag_members 0x00000001 struct crm_identify_msg_s { mar_req_header_t header __attribute__((aligned(8))); uint32_t id; uint32_t pid; int32_t votes; uint32_t processes; char uname[256]; char version[256]; } __attribute__((packed)); static crm_child_t crm_children[] = { { 0, crm_proc_none, crm_flag_none, 0, FALSE, "none", 0, NULL, NULL }, { 0, crm_proc_ais, crm_flag_none, 0, FALSE, "ais", 0, NULL, NULL }, { 0, crm_proc_lrmd, crm_flag_none, 0, TRUE, "lrmd", 0, HA_LIBHBDIR"/lrmd", NULL }, { 0, crm_proc_cib, crm_flag_members, 0, TRUE, "cib", HA_CCMUID, HA_LIBHBDIR"/cib", NULL }, { 0, crm_proc_crmd, crm_flag_members, 0, TRUE, "crmd", HA_CCMUID, HA_LIBHBDIR"/crmd", NULL }, { 0, crm_proc_attrd,crm_flag_none, 0, TRUE, "attrd", HA_CCMUID, HA_LIBHBDIR"/attrd", NULL }, + { 0, crm_proc_stonithd,crm_flag_none, 0, TRUE, "stonithd", 0, HA_LIBHBDIR"/stonithd", NULL }, }; void send_cluster_id(void); int send_cluster_msg_raw(AIS_Message *ais_msg); char *ais_generate_membership_data(void); extern totempg_groups_handle openais_group_handle; void global_confchg_fn ( enum totem_configuration_type configuration_type, unsigned int *member_list, int member_list_entries, unsigned int *left_list, int left_list_entries, unsigned int *joined_list, int joined_list_entries, struct memb_ring_id *ring_id); int crm_exec_exit_fn (struct objdb_iface_ver0 *objdb); int crm_exec_init_fn (struct objdb_iface_ver0 *objdb); int crm_config_init_fn(struct objdb_iface_ver0 *objdb); int ais_ipc_client_connect_callback (void *conn); int ais_ipc_client_exit_callback (void *conn); void ais_cluster_message_swab(void *msg); void ais_cluster_message_callback(void *message, unsigned int nodeid); void ais_ipc_message_callback(void *conn, void *msg); void ais_quorum_query(void *conn, void *msg); void ais_node_list_query(void *conn, void *msg); void ais_manage_notification(void *conn, void *msg); void ais_cluster_id_swab(void *msg); void ais_cluster_id_callback(void *message, unsigned int nodeid); static struct openais_lib_handler crm_lib_service[] = { { /* 0 */ .lib_handler_fn = ais_ipc_message_callback, .response_size = sizeof (mar_res_header_t), .response_id = CRM_MESSAGE_IPC_ACK, .flow_control = OPENAIS_FLOW_CONTROL_NOT_REQUIRED }, { /* 1 */ .lib_handler_fn = ais_node_list_query, .response_size = sizeof (mar_res_header_t), .response_id = CRM_MESSAGE_IPC_ACK, .flow_control = OPENAIS_FLOW_CONTROL_NOT_REQUIRED }, { /* 2 */ .lib_handler_fn = ais_manage_notification, .response_size = sizeof (mar_res_header_t), .response_id = CRM_MESSAGE_IPC_ACK, .flow_control = OPENAIS_FLOW_CONTROL_NOT_REQUIRED }, }; static struct openais_exec_handler crm_exec_service[] = { { /* 0 */ .exec_handler_fn = ais_cluster_message_callback, .exec_endian_convert_fn = ais_cluster_message_swab }, { /* 1 */ .exec_handler_fn = ais_cluster_id_callback, .exec_endian_convert_fn = ais_cluster_id_swab } }; static void crm_exec_dump_fn(void) { ENTER(""); ais_err("Called after SIG_USR2"); LEAVE(""); } /* * Exports the interface for the service */ struct openais_service_handler crm_service_handler = { .name = "LHA Cluster Manager", .id = CRM_SERVICE, .private_data_size = 0, .flow_control = OPENAIS_FLOW_CONTROL_NOT_REQUIRED, .lib_init_fn = ais_ipc_client_connect_callback, .lib_exit_fn = ais_ipc_client_exit_callback, .lib_service = crm_lib_service, .lib_service_count = sizeof (crm_lib_service) / sizeof (struct openais_lib_handler), .exec_init_fn = crm_exec_init_fn, .exec_exit_fn = crm_exec_exit_fn, .exec_service = crm_exec_service, .exec_service_count = sizeof (crm_exec_service) / sizeof (struct openais_exec_handler), .config_init_fn = crm_config_init_fn, .confchg_fn = global_confchg_fn, .exec_dump_fn = crm_exec_dump_fn, /* void (*sync_init) (void); */ /* int (*sync_process) (void); */ /* void (*sync_activate) (void); */ /* void (*sync_abort) (void); */ }; /* * Dynamic Loader definition */ struct openais_service_handler *crm_get_handler_ver0 (void); static struct openais_service_handler_iface_ver0 crm_service_handler_iface = { .openais_get_service_handler_ver0 = crm_get_handler_ver0 }; static struct lcr_iface openais_crm_ver0[1] = { { .name = "lha_crm", .version = 0, .versions_replace = 0, .versions_replace_count = 0, .dependencies = 0, .dependency_count = 0, .constructor = NULL, .destructor = NULL, .interfaces = NULL } }; static struct lcr_comp crm_comp_ver0 = { .iface_count = 1, .ifaces = openais_crm_ver0 }; struct openais_service_handler *crm_get_handler_ver0 (void) { return (&crm_service_handler); } __attribute__ ((constructor)) static void register_this_component (void) { lcr_interfaces_set (&openais_crm_ver0[0], &crm_service_handler_iface); lcr_component_register (&crm_comp_ver0); } static void crm_plugin_init(struct objdb_iface_ver0 *objdb) { int rc = 0; struct utsname us; char *value = NULL; unsigned int object_service_handle = 0; membership_list = g_hash_table_new_full( g_direct_hash, g_direct_equal, NULL, destroy_ais_node); setenv("HA_COMPRESSION", "bz2", 1); setenv("HA_cluster_type", "openais", 1); #if 0 objdb->object_find_reset (OBJECT_PARENT_HANDLE); if (objdb->object_find ( OBJECT_PARENT_HANDLE, "pacemaker", strlen ("pacemaker"), &object_service_handle) != 0) { object_service_handle = 0; ais_info("No configuration supplied for pacemaker"); } #endif objdb_get_string( objdb, object_service_handle, "logfacility", &value, "daemon"); setenv("HA_logfacility", value, 1); objdb_get_string(objdb, object_service_handle, "initdead", &value, "20"); setenv("HA_initdead", value, 1); objdb_get_string(objdb, object_service_handle, "debug", &value, "1"); setenv("HA_debug", value, 1); rc = atoi(value); plugin_log_level = LOG_INFO+rc; if(system("echo 1 > /proc/sys/kernel/core_uses_pid") != 0) { ais_perror("Could not enable /proc/sys/kernel/core_uses_pid"); } ais_info("CRM: Initialized"); log_printf(LOG_INFO, "Logging: Initialized %s\n", __PRETTY_FUNCTION__); rc = uname(&us); AIS_ASSERT(rc == 0); local_uname = ais_strdup(us.nodename); local_uname_len = strlen(local_uname); ais_info("Local hostname: %s", local_uname); local_nodeid = totempg_my_nodeid_get(); update_member(local_nodeid, 0, 1, 0, local_uname, CRM_NODE_LOST); } /* IMPL */ int crm_config_init_fn(struct objdb_iface_ver0 *objdb) { ENTER(""); LEAVE(""); return 0; } static void *crm_wait_dispatch (void *arg) { struct timespec waitsleep = { .tv_sec = 0, .tv_nsec = 100000 /* 100 msec */ }; while(wait_active) { int lpc = 0; for (; lpc < SIZEOF(crm_children); lpc++) { if(crm_children[lpc].pid > 0) { int status; pid_t pid = wait4( crm_children[lpc].pid, &status, WNOHANG, NULL); if(pid == 0) { continue; } else if(pid < 0) { ais_perror("crm_wait_dispatch: Call to wait4(%s) failed", crm_children[lpc].name); continue; } /* cleanup */ crm_children[lpc].pid = 0; crm_children[lpc].conn = NULL; crm_children[lpc].async_conn = NULL; if(WIFSIGNALED(status)) { int sig = WTERMSIG(status); ais_warn("Child process %s terminated with signal %d" " (pid=%d, core=%s)", crm_children[lpc].name, sig, pid, WCOREDUMP(status)?"true":"false"); } else if (WIFEXITED(status)) { int rc = WEXITSTATUS(status); ais_notice("Child process %s exited (pid=%d, rc=%d)", crm_children[lpc].name, pid, rc); if(rc == 100) { ais_notice("Child process %s no longer wishes" " to be respawned", crm_children[lpc].name); crm_children[lpc].respawn = FALSE; } } crm_children[lpc].respawn_count += 1; if(crm_children[lpc].respawn_count > MAX_RESPAWN) { ais_notice("Child respawn count exceeded by %s", crm_children[lpc].name); crm_children[lpc].respawn = FALSE; } if(crm_children[lpc].respawn) { ais_info("Respawning failed child process: %s", crm_children[lpc].name); spawn_child(&(crm_children[lpc])); } else { send_cluster_id(); } } } sched_yield (); nanosleep (&waitsleep, 0); } return 0; } +#include + int crm_exec_init_fn (struct objdb_iface_ver0 *objdb) { int lpc = 0; static gboolean need_init = TRUE; ENTER(""); if(need_init) { need_init = FALSE; crm_plugin_init(objdb); pthread_create (&crm_wait_thread, NULL, crm_wait_dispatch, NULL); + + mkdir(HA_VARRUNDIR, 750); + mkdir(HA_VARRUNDIR"/crm", 750); + chown(HA_VARRUNDIR"/crm", HA_CCMUID, HA_APIGID); + chown(HA_VARRUNDIR, HA_CCMUID, HA_APIGID); for (; lpc < SIZEOF(crm_children); lpc++) { spawn_child(&(crm_children[lpc])); } } ais_info("CRM: Initialized"); LEAVE(""); return 0; } /* static void ais_print_node(const char *prefix, struct totem_ip_address *host) { int len = 0; char *buffer = NULL; ais_malloc0(buffer, INET6_ADDRSTRLEN+1); inet_ntop(host->family, host->addr, buffer, INET6_ADDRSTRLEN); len = strlen(buffer); ais_info("%s: %.*s", prefix, len, buffer); ais_free(buffer); } */ #if 0 /* copied here for reference from exec/totempg.c */ char *totempg_ifaces_print (unsigned int nodeid) { static char iface_string[256 * INTERFACE_MAX]; char one_iface[64]; struct totem_ip_address interfaces[INTERFACE_MAX]; char **status; unsigned int iface_count; unsigned int i; int res; iface_string[0] = '\0'; res = totempg_ifaces_get (nodeid, interfaces, &status, &iface_count); if (res == -1) { return ("no interface found for nodeid"); } for (i = 0; i < iface_count; i++) { sprintf (one_iface, "r(%d) ip(%s) ", i, totemip_print (&interfaces[i])); strcat (iface_string, one_iface); } return (iface_string); } #endif static void ais_mark_unseen_peer_dead( gpointer key, gpointer value, gpointer user_data) { int *changed = user_data; crm_node_t *node = value; if(node->last_seen != membership_seq && ais_str_eq(CRM_NODE_LOST, node->state) == FALSE) { ais_info("Node %s was not seen in the previous transition", node->uname); *changed += update_member(node->id, membership_seq, node->votes, node->processes, node->uname, CRM_NODE_LOST); ais_info("Node %s marked dead", node->uname); } } void global_confchg_fn ( enum totem_configuration_type configuration_type, unsigned int *member_list, int member_list_entries, unsigned int *left_list, int left_list_entries, unsigned int *joined_list, int joined_list_entries, struct memb_ring_id *ring_id) { int lpc = 0; int changed = 0; int do_update = 0; ENTER(""); AIS_ASSERT(ring_id != NULL); switch(configuration_type) { case TOTEM_CONFIGURATION_REGULAR: do_update = 1; break; case TOTEM_CONFIGURATION_TRANSITIONAL: break; } membership_seq = ring_id->seq; ais_notice("%s membership event on ring %lld: memb=%d, new=%d, lost=%d", do_update?"Stable":"Transitional", ring_id->seq, member_list_entries, joined_list_entries, left_list_entries); if(do_update == 0) { for(lpc = 0; lpc < joined_list_entries; lpc++) { const char *prefix = "new: "; uint32_t nodeid = joined_list[lpc]; ais_info("%s %s %u", prefix, member_uname(nodeid), nodeid); } for(lpc = 0; lpc < member_list_entries; lpc++) { const char *prefix = "memb:"; uint32_t nodeid = member_list[lpc]; ais_info("%s %s %u", prefix, member_uname(nodeid), nodeid); } for(lpc = 0; lpc < left_list_entries; lpc++) { const char *prefix = "lost:"; uint32_t nodeid = left_list[lpc]; ais_info("%s %s %u", prefix, member_uname(nodeid), nodeid); } return; } for(lpc = 0; lpc < joined_list_entries; lpc++) { const char *prefix = "NEW: "; uint32_t nodeid = joined_list[lpc]; crm_node_t *node = NULL; changed += update_member( nodeid, membership_seq, -1, 0, NULL, CRM_NODE_MEMBER); ais_info("%s %s %u", prefix, member_uname(nodeid), nodeid); node = g_hash_table_lookup(membership_list, GUINT_TO_POINTER(nodeid)); if(node->addr == NULL) { const char *addr = totempg_ifaces_print(nodeid); node->addr = ais_strdup(addr); ais_debug("Node %u has address %s", nodeid, node->addr); } } for(lpc = 0; lpc < member_list_entries; lpc++) { const char *prefix = "MEMB:"; uint32_t nodeid = member_list[lpc]; changed += update_member( nodeid, membership_seq, -1, 0, NULL, CRM_NODE_MEMBER); ais_info("%s %s %u", prefix, member_uname(nodeid), nodeid); } for(lpc = 0; lpc < left_list_entries; lpc++) { const char *prefix = "LOST:"; uint32_t nodeid = left_list[lpc]; changed += update_member( nodeid, membership_seq, -1, 0, NULL, CRM_NODE_LOST); ais_info("%s %s %u", prefix, member_uname(nodeid), nodeid); } if(do_update) { ais_debug_2("Reaping unseen nodes..."); g_hash_table_foreach( membership_list, ais_mark_unseen_peer_dead, &changed); } if(changed) { ais_debug("%d nodes changed", changed); send_member_notification(); } send_cluster_id(); LEAVE(""); } int ais_ipc_client_exit_callback (void *conn) { int lpc = 0; const char *client = NULL; ENTER("Client=%p", conn); for (; lpc < SIZEOF(crm_children); lpc++) { if(crm_children[lpc].conn == conn) { crm_children[lpc].conn = NULL; crm_children[lpc].async_conn = NULL; client = crm_children[lpc].name; break; } } ais_info("Client %p/%s left", conn, client?client:"unknown-transient"); LEAVE(""); return (0); } int ais_ipc_client_connect_callback (void *conn) { void *async_conn = openais_conn_partner_get(conn); ENTER("Client=%p", conn); ais_debug("Client %p/%p joined", conn, async_conn); if(async_conn) { send_client_msg(async_conn, crm_class_cluster, crm_msg_none, "identify"); } else { ais_err("No async connection"); } LEAVE(""); return (0); } /* * Executive message handlers */ void ais_cluster_message_swab(void *msg) { AIS_Message *ais_msg = msg; ENTER(""); ais_debug_3("Performing endian conversion..."); ais_msg->id = swab32 (ais_msg->id); ais_msg->size = swab32 (ais_msg->size); ais_msg->is_compressed = swab32 (ais_msg->is_compressed); ais_msg->compressed_size = swab32 (ais_msg->compressed_size); ais_msg->host.id = swab32 (ais_msg->host.id); ais_msg->host.pid = swab32 (ais_msg->host.pid); ais_msg->host.type = swab32 (ais_msg->host.type); ais_msg->host.size = swab32 (ais_msg->host.size); ais_msg->host.local = swab32 (ais_msg->host.local); ais_msg->sender.id = swab32 (ais_msg->sender.id); ais_msg->sender.pid = swab32 (ais_msg->sender.pid); ais_msg->sender.type = swab32 (ais_msg->sender.type); ais_msg->sender.size = swab32 (ais_msg->sender.size); ais_msg->sender.local = swab32 (ais_msg->sender.local); LEAVE(""); } void ais_cluster_message_callback ( void *message, unsigned int nodeid) { AIS_Message *ais_msg = message; ENTER("Node=%u (%s)", nodeid, nodeid==local_nodeid?"local":"remote"); ais_debug_2("Message from node %u (%s)", nodeid, nodeid==local_nodeid?"local":"remote"); /* Shouldn't be required... update_member( ais_msg->sender.id, membership_seq, -1, 0, ais_msg->sender.uname, NULL); */ if(ais_msg->host.size == 0 || ais_str_eq(ais_msg->host.uname, local_uname)) { route_ais_message(ais_msg, FALSE); } else { ais_debug_3("Discarding Msg[%d] (dest=%s:%s, from=%s:%s)", ais_msg->id, ais_dest(&(ais_msg->host)), msg_type2text(ais_msg->host.type), ais_dest(&(ais_msg->sender)), msg_type2text(ais_msg->sender.type)); } LEAVE(""); } void ais_cluster_id_swab(void *msg) { struct crm_identify_msg_s *ais_msg = msg; ENTER(""); ais_debug_3("Performing endian conversion..."); ais_msg->id = swab32 (ais_msg->id); ais_msg->pid = swab32 (ais_msg->pid); ais_msg->votes = swab32 (ais_msg->votes); ais_msg->processes = swab32 (ais_msg->processes); LEAVE(""); } void ais_cluster_id_callback (void *message, unsigned int nodeid) { int changed = 0; struct crm_identify_msg_s *msg = message; if(nodeid != msg->id) { ais_err("Invalid message: Node %u claimed to be node %d", nodeid, msg->id); return; } ais_debug("Node update: %s (%s)", msg->uname, msg->version); changed = update_member( nodeid, membership_seq, msg->votes, msg->processes, msg->uname, NULL); if(changed) { send_member_notification(); } } struct res_overlay { mar_res_header_t header __attribute((aligned(8))); char buf[4096]; }; struct res_overlay *res_overlay = NULL; static void send_ipc_ack(void *conn, int class) { if(res_overlay == NULL) { ais_malloc0(res_overlay, sizeof(struct res_overlay)); } res_overlay->header.size = crm_lib_service[class].response_size; res_overlay->header.id = crm_lib_service[class].response_id; res_overlay->header.error = 0; openais_conn_send_response (conn, res_overlay, res_overlay->header.size); } /* local callbacks */ void ais_ipc_message_callback(void *conn, void *msg) { AIS_Message *ais_msg = msg; int type = ais_msg->sender.type; void *async_conn = openais_conn_partner_get(conn); ENTER("Client=%p", conn); ais_debug_2("Message from client %p", conn); if(type > 0 && ais_msg->host.local && crm_children[type].conn == NULL && ais_msg->host.type == crm_msg_ais && ais_msg->sender.pid == crm_children[type].pid && type < SIZEOF(crm_children)) { ais_info("Recorded connection %p for %s/%d", conn, crm_children[type].name, crm_children[type].pid); crm_children[type].conn = conn; crm_children[type].async_conn = async_conn; /* Make sure they have the latest membership */ if(crm_children[type].flags & crm_flag_members) { char *update = ais_generate_membership_data(); ais_info("Sending membership update %llu to %s", membership_seq, crm_children[type].name); send_client_msg(async_conn, crm_class_members, crm_msg_none,update); } } ais_msg->sender.id = local_nodeid; ais_msg->sender.size = local_uname_len; memset(ais_msg->sender.uname, 0, MAX_NAME); memcpy(ais_msg->sender.uname, local_uname, ais_msg->sender.size); route_ais_message(msg, TRUE); send_ipc_ack(conn, 0); LEAVE(""); } int crm_exec_exit_fn (struct objdb_iface_ver0 *objdb) { int lpc = 0; struct timespec waitsleep = { .tv_sec = 1, .tv_nsec = 0 }; ENTER(""); ais_notice("Begining shutdown"); in_shutdown = TRUE; wait_active = FALSE; /* stop the wait loop */ for (lpc = SIZEOF(crm_children) - 1; lpc > 0; lpc--) { crm_children[lpc].respawn = FALSE; stop_child(&(crm_children[lpc]), SIGTERM); while(crm_children[lpc].command && crm_children[lpc].pid) { int status; pid_t pid = 0; pid = wait4( crm_children[lpc].pid, &status, WNOHANG, NULL); if(pid == 0) { sched_yield (); nanosleep (&waitsleep, 0); continue; } else if(pid < 0) { ais_perror("crm_wait_dispatch: Call to wait4(%s) failed", crm_children[lpc].name); } ais_notice("%s (pid=%d) confirmed dead", crm_children[lpc].name, crm_children[lpc].pid); /* cleanup */ crm_children[lpc].pid = 0; crm_children[lpc].conn = NULL; crm_children[lpc].async_conn = NULL; break; } } send_cluster_id(); ais_notice("Shutdown complete"); LEAVE(""); logsys_flush (); return 0; } struct member_loop_data { char *string; }; void member_loop_fn(gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; struct member_loop_data *data = user_data; ais_debug_2("Dumping node %u", node->id); data->string = append_member(data->string, node); } char *ais_generate_membership_data(void) { int size = 0; struct member_loop_data data; size = 14 + 32; /* + int */ ais_malloc0(data.string, size); sprintf(data.string, "", membership_seq); g_hash_table_foreach(membership_list, member_loop_fn, &data); size = strlen(data.string); data.string = realloc(data.string, size + 9) ;/* 9 = + nul */ sprintf(data.string + size, ""); return data.string; } void ais_node_list_query(void *conn, void *msg) { char *data = ais_generate_membership_data(); void *async_conn = openais_conn_partner_get(conn); ais_debug_4("members: %s", data); if(async_conn) { send_client_msg(async_conn, crm_class_members, crm_msg_none, data); } ais_free(data); send_ipc_ack(conn, 1); } void ais_manage_notification(void *conn, void *msg) { int lpc = 0; int enable = 0; AIS_Message *ais_msg = msg; char *data = get_ais_data(ais_msg); if(ais_str_eq("true", data)) { enable = 1; } for (; lpc < SIZEOF(crm_children); lpc++) { if(crm_children[lpc].conn == conn) { ais_info("%s node notifications for %s", enable?"Enabling":"Disabling", crm_children[lpc].name); if(enable) { crm_children[lpc].flags |= crm_flag_members; } else { crm_children[lpc].flags |= crm_flag_members; crm_children[lpc].flags ^= crm_flag_members; } break; } } send_ipc_ack(conn, 2); } void send_member_notification(void) { int lpc = 0; char *update = ais_generate_membership_data(); for (; lpc < SIZEOF(crm_children); lpc++) { if(crm_children[lpc].flags & crm_flag_members) { if(crm_children[lpc].async_conn == NULL) { continue; } ais_info("Sending membership update %llu to %s", membership_seq, crm_children[lpc].name); send_client_msg(crm_children[lpc].async_conn, crm_class_members, crm_msg_none, update); } } ais_free(update); } static gboolean check_message_sanity(AIS_Message *msg, char *data) { gboolean sane = TRUE; gboolean repaired = FALSE; int dest = msg->host.type; int tmp_size = msg->header.size - sizeof(AIS_Message); if(sane && msg->header.size == 0) { ais_warn("Message with no size"); sane = FALSE; } if(sane && msg->header.error != 0) { ais_warn("Message header contains an error: %d", msg->header.error); sane = FALSE; } if(sane && ais_data_len(msg) != tmp_size) { int cur_size = ais_data_len(msg); repaired = TRUE; if(msg->is_compressed) { msg->compressed_size = tmp_size; } else { msg->size = tmp_size; } ais_warn("Repaired message payload size %d -> %d", cur_size, tmp_size); } if(sane && ais_data_len(msg) == 0) { ais_warn("Message with no payload"); sane = FALSE; } if(sane && data && msg->is_compressed == FALSE) { int str_size = strlen(data) + 1; if(ais_data_len(msg) != str_size) { int lpc = 0; ais_warn("Message payload is corrupted: expected %d bytes, got %d", ais_data_len(msg), str_size); sane = FALSE; for(lpc = (str_size - 10); lpc < msg->size; lpc++) { if(lpc < 0) { lpc = 0; } ais_debug_2("bad_data[%d]: %d / '%c'", lpc, data[lpc], data[lpc]); } } } if(sane == FALSE) { ais_err("Invalid message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } else if(repaired) { ais_err("Repaired message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } else { ais_debug_3("Verified message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } return sane; } gboolean route_ais_message(AIS_Message *msg, gboolean local_origin) { int rc = 0; int level = LOG_WARNING; int dest = msg->host.type; ais_debug_3("Msg[%d] (dest=%s:%s, from=%s:%s.%d, remote=%s, size=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, local_origin?"false":"true", ais_data_len(msg)); if(local_origin == FALSE) { if(msg->host.size == 0 || ais_str_eq(local_uname, msg->host.uname)) { msg->host.local = TRUE; } } if(check_message_sanity(msg, msg->data) == FALSE) { /* Dont send this message to anyone */ return FALSE; } if(msg->host.local) { void *conn = NULL; const char *lookup = NULL; if(dest == crm_msg_ais) { process_ais_message(msg); return TRUE; } else if(dest == crm_msg_lrmd) { /* lrmd messages are routed via the crm */ dest = crm_msg_crmd; } else if(dest == crm_msg_te) { /* te messages are routed via the crm - for now */ dest = crm_msg_crmd; } if(in_shutdown) { level = LOG_INFO; } AIS_CHECK(dest > 0 && dest < SIZEOF(crm_children), ais_err("Invalid destination: %d", dest); log_ais_message(LOG_ERR, msg); return FALSE; ); rc = 1; lookup = msg_type2text(dest); conn = crm_children[dest].async_conn; /* the cluster fails in weird and wonderfully obscure ways when this is not true */ AIS_ASSERT(ais_str_eq(lookup, crm_children[dest].name)); if (conn == NULL) { do_ais_log(level, "No connection to %s", crm_children[dest].name); } else if (!libais_connection_active(conn)) { do_ais_log(level, "Connection to %s is no longer active", crm_children[dest].name); crm_children[dest].async_conn = NULL; /* } else if ((queue->size - 1) == queue->used) { */ /* ais_err("Connection is throttled: %d", queue->size); */ } else { level = LOG_ERR; ais_debug_3("Delivering locally to %s (size=%d)", crm_children[dest].name, msg->header.size); rc = openais_conn_send_response(conn, msg, msg->header.size); } } else if(local_origin) { /* forward to other hosts */ ais_debug_3("Forwarding to cluster"); rc = send_cluster_msg_raw(msg); } else { ais_debug_3("Ignoring..."); } if(rc != 0) { do_ais_log(level, "Sending message to %s.%s failed (rc=%d)", ais_dest(&(msg->host)), msg_type2text(dest), rc); log_ais_message(level, msg); return FALSE; } return TRUE; } int send_cluster_msg_raw(AIS_Message *ais_msg) { int rc = 0; struct iovec iovec; static uint32_t msg_id = 0; AIS_Message *bz2_msg = NULL; ENTER(""); AIS_ASSERT(local_nodeid != 0); if(ais_msg->header.size != (sizeof(AIS_Message) + ais_data_len(ais_msg))) { ais_err("Repairing size mismatch: %u + %d = %d", (unsigned int)sizeof(AIS_Message), ais_data_len(ais_msg), ais_msg->header.size); ais_msg->header.size = sizeof(AIS_Message) + ais_data_len(ais_msg); } if(ais_msg->id == 0) { msg_id++; AIS_CHECK(msg_id != 0 /* detect wrap-around */, msg_id++; ais_err("Message ID wrapped around")); ais_msg->id = msg_id; } ais_msg->header.id = SERVICE_ID_MAKE(CRM_SERVICE, 0); ais_msg->sender.id = local_nodeid; ais_msg->sender.size = local_uname_len; memset(ais_msg->sender.uname, 0, MAX_NAME); memcpy(ais_msg->sender.uname, local_uname, ais_msg->sender.size); iovec.iov_base = (char *)ais_msg; iovec.iov_len = ais_msg->header.size; #if 0 if(ais_msg->is_compressed == FALSE && ais_msg->size > 1024) { char *compressed = NULL; unsigned int len = (ais_msg->size * 1.1) + 600; /* recomended size */ ais_debug_2("Creating compressed message"); ais_malloc0(compressed, len); rc = BZ2_bzBuffToBuffCompress( compressed, &len, ais_msg->data, ais_msg->size, 3, 0, 30); if(rc != BZ_OK) { ais_err("Compression failed: %d", rc); ais_free(compressed); goto send; } ais_malloc0(bz2_msg, sizeof(AIS_Message) + len + 1); memcpy(bz2_msg, ais_msg, sizeof(AIS_Message)); memcpy(bz2_msg->data, compressed, len); ais_free(compressed); bz2_msg->is_compressed = TRUE; bz2_msg->compressed_size = len; bz2_msg->header.size = sizeof(AIS_Message) + ais_data_len(bz2_msg); ais_debug("Compression details: %d -> %d", bz2_msg->size, ais_data_len(bz2_msg)); iovec.iov_base = (char *)bz2_msg; iovec.iov_len = bz2_msg->header.size; } send: #endif ais_debug_3("Sending message (size=%u)", (unsigned int)iovec.iov_len); rc = totempg_groups_mcast_joined ( openais_group_handle, &iovec, 1, TOTEMPG_SAFE); if(rc == 0 && ais_msg->is_compressed == FALSE) { ais_debug_2("Message sent: %.80s", ais_msg->data); } AIS_CHECK(rc == 0, ais_err("Message not sent (%d)", rc)); ais_free(bz2_msg); LEAVE(""); return rc; } #define min(x,y) (x)<(y)?(x):(y) void send_cluster_id(void) { int rc = 0; int lpc = 0; int len = 0; struct iovec iovec; struct crm_identify_msg_s *msg = NULL; ENTER(""); AIS_ASSERT(local_nodeid != 0); ais_malloc0(msg, sizeof(struct crm_identify_msg_s)); msg->header.size = sizeof(struct crm_identify_msg_s); msg->id = local_nodeid; msg->header.id = SERVICE_ID_MAKE(CRM_SERVICE, 1); len = min(local_uname_len, MAX_NAME-1); memset(msg->uname, 0, MAX_NAME); memcpy(msg->uname, local_uname, len); len = min(strlen(VERSION), MAX_NAME-1); memset(msg->version, 0, MAX_NAME); memcpy(msg->version, VERSION, len); msg->votes = 1; msg->pid = getpid(); msg->processes = crm_proc_ais; for (lpc = 0; lpc < SIZEOF(crm_children); lpc++) { if(crm_children[lpc].pid != 0) { msg->processes |= crm_children[lpc].flag; } } ais_debug("Local update: %u", local_nodeid); update_member( local_nodeid, membership_seq, msg->votes, msg->processes, NULL, NULL); iovec.iov_base = (char *)msg; iovec.iov_len = msg->header.size; rc = totempg_groups_mcast_joined ( openais_group_handle, &iovec, 1, TOTEMPG_SAFE); AIS_CHECK(rc == 0, ais_err("Message not sent (%d)", rc)); ais_free(msg); LEAVE(""); } diff --git a/crmd/te_actions.c b/crmd/te_actions.c index c951764449..d1a4ab079c 100644 --- a/crmd/te_actions.c +++ b/crmd/te_actions.c @@ -1,533 +1,526 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include char *te_uuid = NULL; void send_rsc_command(crm_action_t *action); extern crm_action_timer_t *transition_timer; static void te_start_action_timer(crm_action_t *action) { crm_malloc0(action->timer, sizeof(crm_action_timer_t)); action->timer->timeout = action->timeout; action->timer->reason = timeout_action_warn; action->timer->action = action; action->timer->source_id = Gmain_timeout_add( action->timer->timeout, action_timer_callback, (void*)action->timer); CRM_ASSERT(action->timer->source_id != 0); } static gboolean te_pseudo_action(crm_graph_t *graph, crm_action_t *pseudo) { crm_info("Pseudo action %d fired and confirmed", pseudo->id); pseudo->confirmed = TRUE; update_graph(graph, pseudo); trigger_graph(); return TRUE; } -#if SUPPORT_HEARTBEAT void send_stonith_update(stonith_ops_t * op) { enum cib_errors rc = cib_ok; const char *target = op->node_name; const char *uuid = op->node_uuid; /* zero out the node-status & remove all LRM status info */ xmlNode *node_state = create_xml_node(NULL, XML_CIB_TAG_STATE); CRM_CHECK(op->node_name != NULL, return); CRM_CHECK(op->node_uuid != NULL, return); crm_xml_add(node_state, XML_ATTR_UUID, uuid); crm_xml_add(node_state, XML_ATTR_UNAME, target); crm_xml_add(node_state, XML_CIB_ATTR_HASTATE, DEADSTATUS); crm_xml_add(node_state, XML_CIB_ATTR_INCCM, XML_BOOLEAN_NO); crm_xml_add(node_state, XML_CIB_ATTR_CRMDSTATE, OFFLINESTATUS); crm_xml_add(node_state, XML_CIB_ATTR_JOINSTATE, CRMD_JOINSTATE_DOWN); crm_xml_add(node_state, XML_CIB_ATTR_EXPSTATE, CRMD_JOINSTATE_DOWN); crm_xml_add(node_state, XML_ATTR_ORIGIN, __FUNCTION__); rc = fsa_cib_conn->cmds->update( fsa_cib_conn, XML_CIB_TAG_STATUS, node_state, cib_quorum_override|cib_scope_local); if(rc < cib_ok) { crm_err("CIB update failed: %s", cib_error2string(rc)); abort_transition( INFINITY, tg_shutdown, "CIB update failed", node_state); } else { /* delay processing the trigger until the update completes */ add_cib_op_callback(fsa_cib_conn, rc, FALSE, crm_strdup(target), cib_fencing_updated); } free_xml(node_state); return; } -#endif static gboolean te_fence_node(crm_graph_t *graph, crm_action_t *action) { -#if SUPPORT_HEARTBEAT - if(is_heartbeat_cluster()) { const char *id = NULL; const char *uuid = NULL; const char *target = NULL; const char *type = NULL; stonith_ops_t * st_op = NULL; id = ID(action->xml); target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); type = g_hash_table_lookup(action->params, crm_meta_name("stonith_action")); CRM_CHECK(id != NULL, crm_log_xml_warn(action->xml, "BadAction"); return FALSE); CRM_CHECK(uuid != NULL, crm_log_xml_warn(action->xml, "BadAction"); return FALSE); CRM_CHECK(type != NULL, crm_log_xml_warn(action->xml, "BadAction"); return FALSE); CRM_CHECK(target != NULL, crm_log_xml_warn(action->xml, "BadAction"); return FALSE); te_log_action(LOG_INFO, "Executing %s fencing operation (%s) on %s (timeout=%d)", type, id, target, transition_graph->transition_timeout / 2); /* Passing NULL means block until we can connect... */ te_connect_stonith(NULL); crm_malloc0(st_op, sizeof(stonith_ops_t)); if(safe_str_eq(type, "poweroff")) { st_op->optype = POWEROFF; } else { st_op->optype = RESET; } st_op->timeout = transition_graph->transition_timeout / 2; st_op->node_name = crm_strdup(target); st_op->node_uuid = crm_strdup(uuid); st_op->private_data = generate_transition_key( transition_graph->id, action->id, 0, te_uuid); CRM_ASSERT(stonithd_input_IPC_channel() != NULL); if (ST_OK != stonithd_node_fence( st_op )) { crm_err("Cannot fence %s: stonithd_node_fence() call failed ", target); } return TRUE; - } -#endif - return FALSE; } static int get_target_rc(crm_action_t *action) { const char *target_rc_s = g_hash_table_lookup( action->params, crm_meta_name(XML_ATTR_TE_TARGET_RC)); if(target_rc_s != NULL) { return crm_parse_int(target_rc_s, "0"); } return 0; } static gboolean te_crm_command(crm_graph_t *graph, crm_action_t *action) { char *value = NULL; char *counter = NULL; xmlNode *cmd = NULL; const char *id = NULL; const char *task = NULL; const char *on_node = NULL; gboolean ret = TRUE; id = ID(action->xml); task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); CRM_CHECK(on_node != NULL && strlen(on_node) != 0, te_log_action(LOG_ERR, "Corrupted command (id=%s) %s: no node", crm_str(id), crm_str(task)); return FALSE); te_log_action(LOG_INFO, "Executing crm-event (%s): %s on %s", crm_str(id), crm_str(task), on_node); if(safe_str_eq(on_node, fsa_our_uname) && safe_str_eq(task, CRM_OP_SHUTDOWN)) { /* defer until everything else completes */ te_log_action(LOG_INFO, "crm-event (%s) is a local shutdown", crm_str(id)); graph->completion_action = tg_shutdown; graph->abort_reason = "local shutdown"; action->confirmed = TRUE; update_graph(graph, action); trigger_graph(); return TRUE; } cmd = create_request(task, NULL, on_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL); counter = generate_transition_key( transition_graph->id, action->id, get_target_rc(action), te_uuid); crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter); ret = send_cluster_message(on_node, crm_proc_crmd, cmd, TRUE); crm_free(counter); free_xml(cmd); value = g_hash_table_lookup(action->params, crm_meta_name(XML_ATTR_TE_NOWAIT)); if(ret == FALSE) { crm_err("Action %d failed: send", action->id); return FALSE; } else if(crm_is_true(value)) { crm_info("Skipping wait for %d", action->id); action->confirmed = TRUE; update_graph(graph, action); trigger_graph(); } else if(ret && action->timeout > 0) { crm_debug("Setting timer for action %d",action->id); action->timer->reason = timeout_action_warn; te_start_action_timer(action); } return TRUE; } static gboolean te_rsc_command(crm_graph_t *graph, crm_action_t *action) { /* never overwrite stop actions in the CIB with * anything other than completed results * * Writing pending stops makes it look like the * resource is running again */ const char *task = NULL; const char *on_node = NULL; action->executed = FALSE; on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); CRM_CHECK(on_node != NULL && strlen(on_node) != 0, te_log_action(LOG_ERR, "Corrupted command(id=%s) %s: no node", ID(action->xml), crm_str(task)); return FALSE); send_rsc_command(action); return TRUE; } gboolean cib_action_update(crm_action_t *action, int status) { char *op_id = NULL; char *code = NULL; char *digest = NULL; xmlNode *tmp = NULL; xmlNode *params = NULL; xmlNode *state = NULL; xmlNode *rsc = NULL; xmlNode *xml_op = NULL; xmlNode *action_rsc = NULL; enum cib_errors rc = cib_ok; const char *name = NULL; const char *value = NULL; const char *rsc_id = NULL; const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); const char *task_uuid = crm_element_value( action->xml, XML_LRM_ATTR_TASK_KEY); const char *target_uuid = crm_element_value( action->xml, XML_LRM_ATTR_TARGET_UUID); int call_options = cib_quorum_override|cib_scope_local; crm_warn("%s %d: %s on %s timed out", crm_element_name(action->xml), action->id, task_uuid, target); action_rsc = find_xml_node(action->xml, XML_CIB_TAG_RESOURCE, TRUE); if(action_rsc == NULL) { return FALSE; } rsc_id = ID(action_rsc); CRM_CHECK(rsc_id != NULL, crm_log_xml_err(action->xml, "Bad:action"); return FALSE); code = crm_itoa(status); /* update the CIB */ state = create_xml_node(NULL, XML_CIB_TAG_STATE); crm_xml_add(state, XML_ATTR_UUID, target_uuid); crm_xml_add(state, XML_ATTR_UNAME, target); rsc = create_xml_node(state, XML_CIB_TAG_LRM); crm_xml_add(rsc, XML_ATTR_ID, target_uuid); rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCES); rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCE); crm_xml_add(rsc, XML_ATTR_ID, rsc_id); name = XML_ATTR_TYPE; value = crm_element_value(action_rsc, name); crm_xml_add(rsc, name, value); name = XML_AGENT_ATTR_CLASS; value = crm_element_value(action_rsc, name); crm_xml_add(rsc, name, value); name = XML_AGENT_ATTR_PROVIDER; value = crm_element_value(action_rsc, name); crm_xml_add(rsc, name, value); xml_op = create_xml_node(rsc, XML_LRM_TAG_RSC_OP); crm_xml_add(xml_op, XML_ATTR_ID, task); op_id = generate_op_key(rsc_id, task, action->interval); crm_xml_add(xml_op, XML_ATTR_ID, op_id); crm_free(op_id); crm_xml_add(xml_op, XML_LRM_ATTR_TASK, task); crm_xml_add(xml_op, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); crm_xml_add(xml_op, XML_LRM_ATTR_OPSTATUS, code); crm_xml_add(xml_op, XML_LRM_ATTR_CALLID, "-1"); crm_xml_add_int(xml_op, XML_LRM_ATTR_INTERVAL, action->interval); crm_xml_add(xml_op, XML_LRM_ATTR_RC, code); crm_xml_add(xml_op, XML_ATTR_ORIGIN, __FUNCTION__); crm_free(code); code = generate_transition_key( transition_graph->id, action->id, get_target_rc(action), te_uuid); crm_xml_add(xml_op, XML_ATTR_TRANSITION_KEY, code); crm_free(code); code = generate_transition_magic( crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY), status, status); crm_xml_add(xml_op, XML_ATTR_TRANSITION_MAGIC, code); crm_free(code); tmp = find_xml_node(action->xml, "attributes", TRUE); params = create_xml_node(NULL, XML_TAG_PARAMS); copy_in_properties(params, tmp); filter_action_parameters(params, CRM_FEATURE_SET); digest = calculate_xml_digest(params, TRUE, FALSE); /* info for now as this area has been problematic to debug */ crm_debug("Calculated digest %s for %s (%s)\n", digest, ID(xml_op), crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC)); crm_log_xml(LOG_DEBUG, "digest:source", params); crm_xml_add(xml_op, XML_LRM_ATTR_OP_DIGEST, digest); crm_free(digest); free_xml(params); crm_debug_3("Updating CIB with \"%s\" (%s): %s %s on %s", status<0?"new action":XML_ATTR_TIMEOUT, crm_element_name(action->xml), crm_str(task), rsc_id, target); rc = fsa_cib_conn->cmds->update( fsa_cib_conn, XML_CIB_TAG_STATUS, state, call_options); crm_debug("Updating CIB with %s action %d: %s on %s (call_id=%d)", op_status2text(status), action->id, task_uuid, target, rc); add_cib_op_callback(fsa_cib_conn, rc, FALSE, NULL, cib_action_updated); free_xml(state); action->sent_update = TRUE; if(rc < cib_ok) { return FALSE; } return TRUE; } void send_rsc_command(crm_action_t *action) { xmlNode *cmd = NULL; xmlNode *rsc_op = NULL; char *counter = NULL; const char *task = NULL; const char *value = NULL; const char *on_node = NULL; const char *task_uuid = NULL; CRM_ASSERT(action != NULL); CRM_ASSERT(action->xml != NULL); rsc_op = action->xml; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); on_node = crm_element_value(rsc_op, XML_LRM_ATTR_TARGET); counter = generate_transition_key( transition_graph->id, action->id, get_target_rc(action), te_uuid); crm_xml_add(rsc_op, XML_ATTR_TRANSITION_KEY, counter); crm_info("Initiating action %d: %s %s on %s", action->id, task, task_uuid, on_node); crm_free(counter); if(rsc_op != NULL) { crm_log_xml_debug_2(rsc_op, "Performing"); } cmd = create_request(CRM_OP_INVOKE_LRM, rsc_op, on_node, CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL); send_cluster_message(on_node, crm_proc_lrmd, cmd, TRUE); free_xml(cmd); action->executed = TRUE; value = g_hash_table_lookup(action->params, crm_meta_name(XML_ATTR_TE_NOWAIT)); if(crm_is_true(value)) { crm_debug("Skipping wait for %d", action->id); action->confirmed = TRUE; update_graph(transition_graph, action); trigger_graph(); } else if(action->timeout > 0) { int action_timeout = (2 * action->timeout) + transition_graph->network_delay; crm_debug_3("Setting timer for action %s", task_uuid); if(transition_graph->transition_timeout < action_timeout) { crm_debug("Action %d:" " Increasing transition %d timeout to %d (2*%d + %d)", action->id, transition_graph->id, action_timeout, action->timeout, transition_graph->network_delay); transition_graph->transition_timeout = action_timeout; } te_start_action_timer(action); } } crm_graph_functions_t te_graph_fns = { te_pseudo_action, te_rsc_command, te_crm_command, te_fence_node }; void notify_crmd(crm_graph_t *graph) { int log_level = LOG_DEBUG; const char *type = "unknown"; enum crmd_fsa_input event = I_NULL; crm_debug("Processing transition completion in state %s", fsa_state2string(fsa_state)); stop_te_timer(transition_timer); CRM_CHECK(graph->complete, graph->complete = TRUE); switch(graph->completion_action) { case tg_stop: type = "stop"; /* fall through */ case tg_done: type = "done"; log_level = LOG_INFO; if(fsa_state == S_TRANSITION_ENGINE) { event = I_TE_SUCCESS; } break; case tg_restart: type = "restart"; if(fsa_state == S_TRANSITION_ENGINE) { event = I_PE_CALC; } else if(fsa_state == S_POLICY_ENGINE) { register_fsa_action(A_PE_INVOKE); } break; case tg_shutdown: type = "shutdown"; if(is_set(fsa_input_register, R_SHUTDOWN)) { event = I_STOP; } else { event = I_TERMINATE; } } te_log_action(log_level, "Transition %d status: %s - %s", graph->id, type, crm_str(graph->abort_reason)); graph->abort_reason = NULL; graph->completion_action = tg_done; clear_bit_inplace(fsa_input_register, R_IN_TRANSITION); if(event != I_NULL) { register_fsa_input(C_FSA_INTERNAL, event, NULL); } } diff --git a/crmd/te_callbacks.c b/crmd/te_callbacks.c index 7fc13a0848..df9dfe5c7f 100644 --- a/crmd/te_callbacks.c +++ b/crmd/te_callbacks.c @@ -1,479 +1,477 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include void te_update_confirm(const char *event, xmlNode *msg); xmlNode *need_abort(xmlNode *update); extern char *te_uuid; gboolean shuttingdown = FALSE; crm_graph_t *transition_graph; GTRIGSource *transition_trigger = NULL; crm_action_timer_t *transition_timer = NULL; void te_update_diff(const char *event, xmlNode *msg) { int rc = -1; const char *op = NULL; const char *set_name = NULL; xmlNode *diff = NULL; xmlNode *aborted = NULL; int diff_add_updates = 0; int diff_add_epoch = 0; int diff_add_admin_epoch = 0; int diff_del_updates = 0; int diff_del_epoch = 0; int diff_del_admin_epoch = 0; CRM_CHECK(msg != NULL, return); crm_element_value_int(msg, F_CIB_RC, &rc); if(transition_graph == NULL) { crm_debug_3("No graph"); return; } else if(rc < cib_ok) { crm_debug_3("Filter rc=%d (%s)", rc, cib_error2string(rc)); return; } else if(transition_graph->complete == TRUE && fsa_state != S_IDLE && fsa_state != S_TRANSITION_ENGINE && fsa_state != S_POLICY_ENGINE) { crm_debug_2("Filter state=%s, complete=%d", fsa_state2string(fsa_state), transition_graph->complete); return; } op = crm_element_value(msg, F_CIB_OPERATION); diff = get_message_xml(msg, F_CIB_UPDATE_RESULT); cib_diff_version_details( diff, &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates); crm_debug("Processing diff (%s): %d.%d.%d -> %d.%d.%d (%s)", op, diff_del_admin_epoch,diff_del_epoch,diff_del_updates, diff_add_admin_epoch,diff_add_epoch,diff_add_updates, fsa_state2string(fsa_state)); log_cib_diff(LOG_DEBUG_2, diff, op); set_name = "diff-added"; if(diff != NULL) { xmlNode *section = NULL; xmlNode *change_set = find_xml_node(diff, set_name, FALSE); change_set = find_xml_node(change_set, XML_TAG_CIB, FALSE); if(change_set != NULL) { crm_debug_2("Checking status changes"); section=get_object_root(XML_CIB_TAG_STATUS,change_set); } if(section != NULL) { extract_event(section); } crm_debug_2("Checking change set: %s", set_name); aborted = need_abort(change_set); } set_name = "diff-removed"; if(diff != NULL && aborted == NULL) { xmlNode *attrs = NULL; xmlNode *status = NULL; xmlNode *change_set = find_xml_node(diff, set_name, FALSE); change_set = find_xml_node(change_set, XML_TAG_CIB, FALSE); crm_debug_2("Checking change set: %s", set_name); aborted = need_abort(change_set); if(aborted == NULL && change_set != NULL) { status = get_object_root(XML_CIB_TAG_STATUS, change_set); xml_child_iter_filter( status, node_state, XML_CIB_TAG_STATE, attrs = find_xml_node( node_state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); if(attrs != NULL) { crm_info("Aborting on "XML_TAG_TRANSIENT_NODEATTRS" deletions"); abort_transition(INFINITY, tg_restart, XML_TAG_TRANSIENT_NODEATTRS, attrs); } ); } } if(aborted != NULL) { abort_transition( INFINITY, tg_restart, "Non-status change", NULL); } return; } gboolean process_te_message(xmlNode *msg, xmlNode *xml_data) { xmlNode *xml_obj = NULL; const char *from = crm_element_value(msg, F_ORIG); const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO); const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM); const char *ref = crm_element_value(msg, XML_ATTR_REFERENCE); const char *op = crm_element_value(msg, F_CRM_TASK); const char *type = crm_element_value(msg, F_CRM_MSG_TYPE); crm_debug_2("Processing %s (%s) message", op, ref); crm_log_xml(LOG_DEBUG_3, "ipc", msg); if(op == NULL){ /* error */ } else if(sys_to == NULL || strcasecmp(sys_to, CRM_SYSTEM_TENGINE) != 0) { crm_debug_2("Bad sys-to %s", crm_str(sys_to)); return FALSE; } else if(safe_str_eq(op, CRM_OP_INVOKE_LRM) && safe_str_eq(sys_from, CRM_SYSTEM_LRMD) /* && safe_str_eq(type, XML_ATTR_RESPONSE) */ ){ xml_obj = xml_data; CRM_CHECK(xml_obj != NULL, crm_log_xml(LOG_ERR, "Invalid (N)ACK", msg); return FALSE); CRM_CHECK(xml_obj != NULL, crm_log_xml(LOG_ERR, "Invalid (N)ACK", msg); return FALSE); xml_obj = get_object_root(XML_CIB_TAG_STATUS, xml_obj); CRM_CHECK(xml_obj != NULL, crm_log_xml(LOG_ERR, "Invalid (N)ACK", msg); return FALSE); crm_log_xml(LOG_DEBUG_2, "Processing (N)ACK", msg); crm_info("Processing (N)ACK %s from %s", crm_element_value(msg, XML_ATTR_REFERENCE), from); extract_event(xml_obj); } else { crm_err("Unknown command: %s::%s from %s", type, op, sys_from); } crm_debug_3("finished processing message"); return TRUE; } -#if SUPPORT_HEARTBEAT void tengine_stonith_callback(stonith_ops_t * op) { const char *allow_fail = NULL; int target_rc = -1; int stonith_id = -1; int transition_id = -1; char *uuid = NULL; crm_action_t *stonith_action = NULL; if(op == NULL) { crm_err("Called with a NULL op!"); return; } crm_info("call=%d, optype=%d, node_name=%s, result=%d, node_list=%s, action=%s", op->call_id, op->optype, op->node_name, op->op_result, (char *)op->node_list, op->private_data); /* this will mark the event complete if a match is found */ CRM_CHECK(op->private_data != NULL, return); /* filter out old STONITH actions */ CRM_CHECK(decode_transition_key( op->private_data, &uuid, &transition_id, &stonith_id, &target_rc), crm_err("Invalid event detected"); goto bail; ); if(transition_graph->complete || stonith_id < 0 || safe_str_neq(uuid, te_uuid) || transition_graph->id != transition_id) { crm_info("Ignoring STONITH action initiated outside" " of the current transition"); } stonith_action = get_action(stonith_id, TRUE); if(stonith_action == NULL) { crm_err("Stonith action not matched"); goto bail; } switch(op->op_result) { case STONITH_SUCCEEDED: send_stonith_update(op); break; case STONITH_CANNOT: case STONITH_TIMEOUT: case STONITH_GENERIC: stonith_action->failed = TRUE; allow_fail = g_hash_table_lookup( stonith_action->params, crm_meta_name(XML_ATTR_TE_ALLOWFAIL)); if(FALSE == crm_is_true(allow_fail)) { crm_err("Stonith of %s failed (%d)..." " aborting transition.", op->node_name, op->op_result); abort_transition(INFINITY, tg_restart, "Stonith failed", NULL); } break; default: crm_err("Unsupported action result: %d", op->op_result); abort_transition(INFINITY, tg_restart, "Unsupport Stonith result", NULL); } update_graph(transition_graph, stonith_action); trigger_graph(); bail: crm_free(uuid); return; } void tengine_stonith_connection_destroy(gpointer user_data) { crm_err("Fencing daemon has left us"); stonith_src = NULL; if(stonith_src == NULL) { G_main_set_trigger(stonith_reconnect); } /* cbchan will be garbage at this point, arrange for it to be reset */ set_stonithd_input_IPC_channel_NULL(); return; } gboolean tengine_stonith_dispatch(IPC_Channel *sender, void *user_data) { int lpc = 0; while(stonithd_op_result_ready()) { if (sender->ch_status == IPC_DISCONNECT) { /* The message which was pending for us is that * the IPC status is now IPC_DISCONNECT */ break; } if(ST_FAIL == stonithd_receive_ops_result(FALSE)) { crm_err("stonithd_receive_ops_result() failed"); } else { lpc++; } } crm_debug_2("Processed %d messages", lpc); if (sender->ch_status == IPC_DISCONNECT) { return FALSE; } return TRUE; } -#endif void cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { if(rc < cib_ok) { crm_err("CIB update failed: %s", cib_error2string(rc)); crm_log_xml_warn(msg, "Failed update"); } else { erase_status_tag(user_data, XML_CIB_TAG_LRM); } crm_free(user_data); } void cib_action_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { if(rc < cib_ok) { crm_err("Update %d FAILED: %s", call_id, cib_error2string(rc)); } } void cib_failcount_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { if(rc < cib_ok) { crm_err("Update %d FAILED: %s", call_id, cib_error2string(rc)); } } gboolean action_timer_callback(gpointer data) { crm_action_timer_t *timer = NULL; if(data == NULL) { crm_err("Timer popped with no data"); return FALSE; } timer = (crm_action_timer_t*)data; stop_te_timer(timer); crm_warn("Timer popped (abort_level=%d, complete=%s)", transition_graph->abort_priority, transition_graph->complete?"true":"false"); CRM_CHECK(timer->action != NULL, return FALSE); if(transition_graph->complete) { crm_warn("Ignoring timeout while not in transition"); } else if(timer->reason == timeout_action_warn) { print_action( LOG_WARNING,"Action missed its timeout: ", timer->action); } else { /* fail the action */ cib_action_update(timer->action, LRM_OP_TIMEOUT); } return FALSE; } static int unconfirmed_actions(gboolean send_updates) { int unconfirmed = 0; const char *key = NULL; const char *task = NULL; const char *node = NULL; crm_debug_2("Unconfirmed actions..."); slist_iter( synapse, synapse_t, transition_graph->synapses, lpc, /* lookup event */ slist_iter( action, crm_action_t, synapse->actions, lpc2, if(action->executed == FALSE) { continue; } else if(action->confirmed) { continue; } unconfirmed++; task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); key = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); crm_info("Action %s %d unconfirmed from %s", key, action->id, node); if(action->type != action_type_rsc) { continue; } else if(send_updates == FALSE) { continue; } else if(safe_str_eq(task, "cancel")) { /* we dont need to update the CIB with these */ continue; } else if(safe_str_eq(task, "stop")) { /* *never* update the CIB with these */ continue; } cib_action_update(action, LRM_OP_PENDING); ); ); if(unconfirmed > 0) { crm_warn("Waiting on %d unconfirmed actions", unconfirmed); } return unconfirmed; } gboolean global_timer_callback(gpointer data) { crm_action_timer_t *timer = NULL; if(data == NULL) { crm_err("Timer popped with no data"); return FALSE; } timer = (crm_action_timer_t*)data; stop_te_timer(timer); crm_warn("Timer popped (abort_level=%d, complete=%s)", transition_graph->abort_priority, transition_graph->complete?"true":"false"); CRM_CHECK(timer->action == NULL, return FALSE); if(transition_graph->complete) { crm_err("Ignoring timeout while not in transition"); } else if(timer->reason == timeout_abort) { int unconfirmed = unconfirmed_actions(FALSE); crm_warn("Transition abort timeout reached..." " marking transition complete."); transition_graph->complete = TRUE; abort_transition(INFINITY, tg_restart, "Global Timeout", NULL); if(unconfirmed != 0) { crm_warn("Writing %d unconfirmed actions to the CIB", unconfirmed); unconfirmed_actions(TRUE); } } return FALSE; } diff --git a/crmd/te_callbacks.h b/crmd/te_callbacks.h index ed9336f6fd..402aa1324b 100644 --- a/crmd/te_callbacks.h +++ b/crmd/te_callbacks.h @@ -1,43 +1,41 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef TE_CALLBACKS__H #define TE_CALLBACKS__H extern void cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data); extern void cib_action_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data); extern void cib_failcount_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data); extern gboolean global_timer_callback(gpointer data); extern gboolean action_timer_callback(gpointer data); extern gboolean te_graph_trigger(gpointer user_data); extern void tengine_stonith_connection_destroy(gpointer user_data); extern void te_update_diff(const char *event, xmlNode *msg); -#if SUPPORT_HEARTBEAT extern void tengine_stonith_callback(stonith_ops_t * op); extern gboolean tengine_stonith_dispatch(IPC_Channel *sender, void *user_data); -#endif #endif diff --git a/crmd/te_utils.c b/crmd/te_utils.c index e8d2d22df9..1a50c47ddd 100644 --- a/crmd/te_utils.c +++ b/crmd/te_utils.c @@ -1,230 +1,225 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include GCHSource *stonith_src = NULL; GTRIGSource *stonith_reconnect = NULL; gboolean te_connect_stonith(gpointer user_data) { -#if SUPPORT_HEARTBEAT - if(is_heartbeat_cluster()) { int lpc = 0; int rc = ST_OK; IPC_Channel *fence_ch = NULL; if(stonith_src != NULL) { crm_debug("Still connected"); return TRUE; } for(lpc = 0; lpc < 30; lpc++) { crm_info("Attempting connection to fencing daemon..."); sleep(1); rc = stonithd_signon("tengine"); if(rc == ST_OK) { break; } if(user_data != NULL) { crm_err("Sign-in failed: triggered a retry"); G_main_set_trigger(stonith_reconnect); return TRUE; } crm_err("Sign-in failed: pausing and trying again in 2s..."); sleep(1); } CRM_ASSERT(rc == ST_OK); /* If not, we failed 30 times... just get out */ CRM_ASSERT(stonithd_set_stonith_ops_callback( tengine_stonith_callback) == ST_OK); crm_debug_2("Grabbing IPC channel"); fence_ch = stonithd_input_IPC_channel(); CRM_ASSERT(fence_ch != NULL); crm_debug_2("Attaching to mainloop"); stonith_src = G_main_add_IPC_Channel( G_PRIORITY_LOW, fence_ch, FALSE, tengine_stonith_dispatch, NULL, tengine_stonith_connection_destroy); CRM_ASSERT(stonith_src != NULL); crm_info("Connected"); return TRUE; - } -#endif - return FALSE; } gboolean start_global_timer(crm_action_timer_t *timer, int timeout) { CRM_ASSERT(timer != NULL); CRM_CHECK(timer > 0, return FALSE); CRM_CHECK(timer->source_id == 0, return FALSE); if(timeout <= 0) { crm_err("Tried to start timer with period: %d", timeout); } else if(timer->source_id == 0) { crm_debug_2("Starting abort timer: %dms", timeout); timer->timeout = timeout; timer->source_id = Gmain_timeout_add( timeout, global_timer_callback, (void*)timer); CRM_ASSERT(timer->source_id != 0); return TRUE; } else { crm_err("Timer is already active with period: %d", timer->timeout); } return FALSE; } gboolean stop_te_timer(crm_action_timer_t *timer) { const char *timer_desc = "action timer"; if(timer == NULL) { return FALSE; } if(timer->reason == timeout_abort) { timer_desc = "global timer"; } if(timer->source_id != 0) { crm_debug_2("Stopping %s", timer_desc); Gmain_timeout_remove(timer->source_id); timer->source_id = 0; } else { return FALSE; } return TRUE; } gboolean te_graph_trigger(gpointer user_data) { int timeout = 0; enum transition_status graph_rc = -1; crm_debug("Invoking the TE graph in state %s", fsa_state2string(fsa_state)); switch(fsa_state) { case S_STARTING: case S_PENDING: case S_NOT_DC: case S_HALT: case S_ILLEGAL: case S_STOPPING: case S_TERMINATE: return TRUE; break; default: break; } if(transition_graph->complete == FALSE) { graph_rc = run_graph(transition_graph); timeout = transition_graph->transition_timeout; print_graph(LOG_DEBUG_3, transition_graph); if(graph_rc == transition_active) { crm_debug_3("Transition not yet complete"); stop_te_timer(transition_timer); start_global_timer(transition_timer, timeout); return TRUE; } else if(graph_rc == transition_pending) { crm_debug_3("Transition not yet complete - no actions fired"); return TRUE; } if(graph_rc != transition_complete) { crm_err("Transition failed: %s", transition_status(graph_rc)); print_graph(LOG_WARNING, transition_graph); } } transition_graph->complete = TRUE; notify_crmd(transition_graph); return TRUE; } void trigger_graph_processing(const char *fn, int line) { G_main_set_trigger(transition_trigger); crm_debug_2("%s:%d - Triggered graph processing", fn, line); } void abort_transition_graph( int abort_priority, enum transition_action abort_action, const char *abort_text, xmlNode *reason, const char *fn, int line) { int log_level = LOG_DEBUG; /* if(abort_priority >= INFINITY) { log_level = LOG_INFO; } */ do_crm_log(log_level, "%s:%d - Triggered graph processing (complete=%d) : %s", fn, line, transition_graph->complete, abort_text); if(transition_graph && transition_graph->complete) { register_fsa_input(C_FSA_INTERNAL, I_PE_CALC, NULL); return; } update_abort_priority( transition_graph, abort_priority, abort_action, abort_text); if(reason != NULL) { const char *magic = crm_element_value( reason, XML_ATTR_TRANSITION_MAGIC); if(magic) { do_crm_log(log_level, "Caused by update to %s: %s", ID(reason), magic); } else { crm_log_xml(log_level, "Cause", reason); } } G_main_set_trigger(transition_trigger); } diff --git a/crmd/tengine.h b/crmd/tengine.h index 16aa2a0adc..2fe95f764e 100644 --- a/crmd/tengine.h +++ b/crmd/tengine.h @@ -1,73 +1,71 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef TENGINE__H #define TENGINE__H #include #include -#if SUPPORT_HEARTBEAT -# include +#include extern void send_stonith_update(stonith_ops_t * op); -#endif /* tengine */ extern crm_action_t *match_down_event( int rc, const char *target, const char *filter); extern gboolean cib_action_update(crm_action_t *action, int status); /* utils */ extern crm_action_t *get_action(int id, gboolean confirmed); extern gboolean start_global_timer(crm_action_timer_t *timer, int timeout); extern gboolean stop_te_timer(crm_action_timer_t *timer); extern const char *get_rsc_state(const char *task, op_status_t status); /* unpack */ extern gboolean extract_event(xmlNode *msg); extern gboolean process_te_message(xmlNode * msg, xmlNode *xml_data); extern crm_graph_t *transition_graph; extern GTRIGSource *transition_trigger; extern char *te_uuid; extern void notify_crmd(crm_graph_t *graph); #include extern void trigger_graph_processing(const char *fn, int line); extern void abort_transition_graph( int abort_priority, enum transition_action abort_action, const char *abort_text, xmlNode *reason, const char *fn, int line); #define trigger_graph() trigger_graph_processing(__FUNCTION__, __LINE__) #define abort_transition(pri, action, text, reason) \ abort_transition_graph(pri, action, text, reason,__FUNCTION__,__LINE__); extern gboolean te_connect_stonith(gpointer user_data); extern GCHSource *stonith_src; extern GTRIGSource *transition_trigger; extern GTRIGSource *stonith_reconnect; extern crm_action_timer_t *transition_timer; extern char *failed_stop_offset; extern char *failed_start_offset; #endif diff --git a/lib/Makefile.am b/fencing/Makefile.am similarity index 65% copy from lib/Makefile.am copy to fencing/Makefile.am index 386f183668..db0a495be7 100644 --- a/lib/Makefile.am +++ b/fencing/Makefile.am @@ -1,34 +1,20 @@ -# -# heartbeat library directory: Linux-HA code -# -# Copyright (C) 2001 Alan Robertson +# Author: Sun Jiang Dong +# Copyright (c) 2004 International Business Machines # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # -MAINTAINERCLEANFILES = Makefile.in - - -# -# PILS is supposed to be an independent library -# Stonith should rely only on PILS -# -# The rest can use clplumbing, and the plugins should be able -# to use any of the libraries (nothing can be linked against them) -# -# - -## Subdirectories... -SUBDIRS = crm +MAINTAINERCLEANFILES = Makefile.in +SUBDIRS = stonithd test diff --git a/include/Makefile.am b/include/Makefile.am index 4464602d19..1d18390e18 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -1,64 +1,64 @@ # # linux-ha: Linux-HA heartbeat code # # Copyright (C) 2001 Michael Moerz # This instance created by Horms # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in ha_version.h config.h.in -EXTRA_DIST = ha_version.h +EXTRA_DIST = ha_version.h crm_internal.h headerdir=$(includedir)/@PKG_NAME@ noinst_HEADERS = portability.h config.h ha_version.h header_HEADERS = crm_config.h -SUBDIRS = crm +SUBDIRS = crm fencing ## The backtick commands are not executed here, ## but rather as macro-expansions at use within the rules. HG_LIVE_VERSION=`$(HG) -R "$(top_srcdir)" id` ARCHIVE_VERSION="$(top_srcdir)/.hg_archival.txt" HG_TAR_VERSION=`$(EGREP) node: "$(ARCHIVE_VERSION)"` ha_version.h: $(ARCHIVE_VERSION) if [ -r ha_version.h -a ! -w ha_version.h ]; then \ hgv=""; \ echo "Saved Version"; \ elif [ -f $(ARCHIVE_VERSION) ]; then \ hgv="$(HG_TAR_VERSION)"; \ echo "Hg Archived Version: $${hgv}"; \ elif [ -x $(HG) -a -d $(top_srcdir)/.hg ]; then \ hgv="$(HG_LIVE_VERSION)"; \ echo "Hg Live Version: $${hgv}"; \ elif [ -r ha_version.h ]; then \ hgv=""; \ echo "Hg Saved Live Version"; \ cat ha_version.h; \ else \ hgv="Unknown"; \ echo "Unknown Hg Version"; \ fi ; \ if [ X"$${hgv}" != "X" ]; then \ echo "/* $${hgv} */" > ha_version.h; \ echo "#define HA_HG_VERSION \"$${hgv}\"" >> ha_version.h; \ fi .PHONY: $(ARCHIVE_VERSION) diff --git a/include/crm/ais_common.h b/include/crm/ais_common.h index 46333bba89..1e3e973745 100644 --- a/include/crm/ais_common.h +++ b/include/crm/ais_common.h @@ -1,298 +1,302 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef CRM_AIS_COMMON__H #define CRM_AIS_COMMON__H #include #include #if SUPPORT_AIS # include # include # include #else typedef struct { int size __attribute__((aligned(8))); int id __attribute__((aligned(8))); } mar_req_header_t __attribute__((aligned(8))); typedef struct { int size; __attribute__((aligned(8))) int id __attribute__((aligned(8))); int error __attribute__((aligned(8))); } mar_res_header_t __attribute__((aligned(8))); #endif #define MAX_NAME 256 #define AIS_IPC_NAME "ais-crm-ipc" #define CRM_NODE_LOST "lost" #define CRM_NODE_MEMBER "member" #define CRM_NODE_ACTIVE CRM_NODE_MEMBER #define CRM_NODE_INACTIVE CRM_NODE_LOST #define CRM_NODE_EVICTED "evicted" typedef struct crm_ais_host_s AIS_Host; typedef struct crm_ais_msg_s AIS_Message; enum crm_ais_msg_class { crm_class_cluster = 0, crm_class_members = 1, crm_class_notify = 2, }; /* order here matters - its used to index into the crm_children array */ enum crm_ais_msg_types { crm_msg_none = 0, crm_msg_ais = 1, crm_msg_lrmd = 2, crm_msg_cib = 3, crm_msg_crmd = 4, - crm_msg_te = 5, - crm_msg_pe = 6, - crm_msg_attrd = 7, + crm_msg_attrd = 5, + crm_msg_stonithd = 6, + crm_msg_te = 7, + crm_msg_pe = 8, }; enum crm_proc_flag { crm_proc_none = 0x00000001, crm_proc_ais = 0x00000002, crm_proc_lrmd = 0x00000010, - crm_proc_stonith = 0x00000020, crm_proc_cib = 0x00000100, crm_proc_crmd = 0x00000200, - crm_proc_pe = 0x00001000, - crm_proc_te = 0x00002000, - crm_proc_attrd = 0x00010000, + crm_proc_attrd = 0x00001000, + crm_proc_stonithd = 0x00002000, + crm_proc_pe = 0x00010000, + crm_proc_te = 0x00020000, }; typedef struct crm_peer_node_s { unsigned int id; unsigned long long born; unsigned long long last_seen; int32_t votes; uint32_t processes; char *uname; char *state; char *uuid; char *addr; char *version; } crm_node_t; struct crm_ais_host_s { uint32_t id; uint32_t pid; gboolean local; enum crm_ais_msg_types type; uint32_t size; char uname[256]; } __attribute__((packed)); struct crm_ais_msg_s { mar_res_header_t header __attribute__((aligned(8))); uint32_t id; gboolean is_compressed; AIS_Host host; AIS_Host sender; uint32_t size; uint32_t compressed_size; /* 584 bytes */ char data[0]; } __attribute__((packed)); #if SUPPORT_AIS static inline const char *ais_error2text(SaAisErrorT error) { const char *text = "unknown"; switch(error) { case SA_AIS_OK: text = "None"; break; case SA_AIS_ERR_LIBRARY: text = "Library error"; break; case SA_AIS_ERR_VERSION: text = "Version error"; break; case SA_AIS_ERR_INIT: text = "Initialization error"; break; case SA_AIS_ERR_TIMEOUT: text = "Timeout"; break; case SA_AIS_ERR_TRY_AGAIN: text = "Try again"; break; case SA_AIS_ERR_INVALID_PARAM: text = "Invalid parameter"; break; case SA_AIS_ERR_NO_MEMORY: text = "No memory"; break; case SA_AIS_ERR_BAD_HANDLE: text = "Bad handle"; break; case SA_AIS_ERR_BUSY: text = "Busy"; break; case SA_AIS_ERR_ACCESS: text = "Access error"; break; case SA_AIS_ERR_NOT_EXIST: text = "Doesn't exist"; break; case SA_AIS_ERR_NAME_TOO_LONG: text = "Name too long"; break; case SA_AIS_ERR_EXIST: text = "Exists"; break; case SA_AIS_ERR_NO_SPACE: text = "No space"; break; case SA_AIS_ERR_INTERRUPT: text = "Interrupt"; break; case SA_AIS_ERR_NAME_NOT_FOUND: text = "Name not found"; break; case SA_AIS_ERR_NO_RESOURCES: text = "No resources"; break; case SA_AIS_ERR_NOT_SUPPORTED: text = "Not supported"; break; case SA_AIS_ERR_BAD_OPERATION: text = "Bad operation"; break; case SA_AIS_ERR_FAILED_OPERATION: text = "Failed operation"; break; case SA_AIS_ERR_MESSAGE_ERROR: text = "Message error"; break; case SA_AIS_ERR_QUEUE_FULL: text = "Queue full"; break; case SA_AIS_ERR_QUEUE_NOT_AVAILABLE: text = "Queue not available"; break; case SA_AIS_ERR_BAD_FLAGS: text = "Bad flags"; break; case SA_AIS_ERR_TOO_BIG: text = "To big"; break; case SA_AIS_ERR_NO_SECTIONS: text = "No sections"; break; } return text; } #endif static inline const char *msg_type2text(enum crm_ais_msg_types type) { const char *text = "unknown"; switch(type) { case crm_msg_none: text = "unknown"; break; case crm_msg_ais: text = "ais"; break; case crm_msg_cib: text = "cib"; break; case crm_msg_crmd: text = "crmd"; break; case crm_msg_pe: text = "pengine"; break; case crm_msg_te: text = "tengine"; break; case crm_msg_lrmd: text = "lrmd"; break; case crm_msg_attrd: text = "attrd"; break; + case crm_msg_stonithd: + text = "stonithd"; + break; } return text; } static inline const char *peer2text(enum crm_proc_flag proc) { const char *text = "unknown"; switch(proc) { case crm_proc_none: text = "unknown"; break; case crm_proc_ais: text = "ais"; break; case crm_proc_cib: text = "cib"; break; case crm_proc_crmd: text = "crmd"; break; case crm_proc_pe: text = "pengine"; break; case crm_proc_te: text = "tengine"; break; case crm_proc_lrmd: text = "lrmd"; break; case crm_proc_attrd: text = "attrd"; break; - case crm_proc_stonith: - text = "stonith"; + case crm_proc_stonithd: + text = "stonithd"; break; } return text; } static inline const char *ais_dest(struct crm_ais_host_s *host) { if(host->local) { return "local"; } else if(host->size > 0) { return host->uname; } else { return ""; } } #define ais_data_len(msg) (msg->is_compressed?msg->compressed_size:msg->size) #endif diff --git a/include/crm/crm.h b/include/crm/crm.h index 466200939c..7b085ef668 100644 --- a/include/crm/crm.h +++ b/include/crm/crm.h @@ -1,349 +1,350 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef CRM__H #define CRM__H #include #include #include #undef MIN #undef MAX #include #include #include #ifdef MCHECK #include #endif #include #define CRM_FEATURE_SET "2.1" #define MINIMUM_SCHEMA_VERSION "pacemaker-0.7" #define LATEST_SCHEMA_VERSION "pacemaker-"DTD_VERSION #define EOS '\0' #define DIMOF(a) ((int) (sizeof(a)/sizeof(a[0])) ) #define HAURL(url) HA_URLBASE url #ifndef CRM_DEV_BUILD # define CRM_DEV_BUILD 0 #endif #define CRM_DEPRECATED_SINCE_2_0_1 0 #define CRM_DEPRECATED_SINCE_2_0_2 0 #define CRM_DEPRECATED_SINCE_2_0_3 0 #define CRM_DEPRECATED_SINCE_2_0_4 0 #define CRM_DEPRECATED_SINCE_2_0_5 0 #define CRM_DEPRECATED_SINCE_2_0_6 1 #define CRM_DEPRECATED_SINCE_2_0_7 1 #define CRM_DEPRECATED_SINCE_2_0_8 1 #define CRM_DEPRECATED_SINCE_2_1_0 1 #define CRM_META "CRM_meta" #define crm_meta_name(field) CRM_META"_"field #define ipc_call_diff_max_ms 5000 #define action_diff_warn_ms 5000 #define action_diff_max_ms 20000 #define fsa_diff_warn_ms 10000 #define fsa_diff_max_ms 30000 #include #define CRM_ASSERT(expr) if((expr) == FALSE) { \ crm_abort(__FILE__, __PRETTY_FUNCTION__, __LINE__, #expr, TRUE, FALSE); \ } extern gboolean crm_assert_failed; #define CRM_DEV_ASSERT(expr) \ crm_assert_failed = FALSE; \ if((expr) == FALSE) { \ crm_assert_failed = TRUE; \ crm_abort(__FILE__,__PRETTY_FUNCTION__,__LINE__, #expr, FALSE, TRUE); \ } #define CRM_CHECK(expr, failure_action) if((expr) == FALSE) { \ crm_abort(__FILE__,__PRETTY_FUNCTION__,__LINE__, #expr, FALSE, TRUE); \ failure_action; \ } #define CRM_CHECK_AND_STORE(expr, failure_action) if((expr) == FALSE) { \ crm_abort(__FILE__,__PRETTY_FUNCTION__,__LINE__, #expr, TRUE, TRUE); \ failure_action; \ } extern const char *crm_system_name; /* Clean these up at some point, some probably should be runtime options */ #define WORKING_DIR HA_VARLIBDIR"/heartbeat/crm" #define CRM_SOCK_DIR HA_VARRUNDIR"/heartbeat/crm" #define BIN_DIR HA_LIBDIR"/heartbeat" #define SOCKET_LEN 1024 #define APPNAME_LEN 256 #define MAX_IPC_FAIL 5 #define CIB_FILENAME WORKING_DIR"/cib.xml" #define CIB_BACKUP WORKING_DIR"/cib_backup.xml" #define MSG_LOG 1 #define DOT_FSA_ACTIONS 1 #define DOT_ALL_FSA_INPUTS 1 /* #define FSA_TRACE 1 */ #define INFINITY_S "INFINITY" #define MINUS_INFINITY_S "-INFINITY" #define INFINITY 1000000 /* Sub-systems */ #define CRM_SYSTEM_DC "dc" #define CRM_SYSTEM_DCIB "dcib" /* The master CIB */ #define CRM_SYSTEM_CIB "cib" #define CRM_SYSTEM_CRMD "crmd" #define CRM_SYSTEM_LRMD "lrmd" #define CRM_SYSTEM_PENGINE "pengine" #define CRM_SYSTEM_TENGINE "tengine" +#define CRM_SYSTEM_STONITHD "stonithd" /* Valid operations */ #define CRM_OP_NOOP "noop" #define CRM_OP_JOIN_ANNOUNCE "join_announce" #define CRM_OP_JOIN_OFFER "join_offer" #define CRM_OP_JOIN_REQUEST "join_request" #define CRM_OP_JOIN_ACKNAK "join_ack_nack" #define CRM_OP_JOIN_CONFIRM "join_confirm" #define CRM_OP_DIE "die_no_respawn" #define CRM_OP_RETRIVE_CIB "retrieve_cib" #define CRM_OP_PING "ping" #define CRM_OP_VOTE "vote" #define CRM_OP_NOVOTE "no-vote" #define CRM_OP_HELLO "hello" #define CRM_OP_HBEAT "dc_beat" #define CRM_OP_PECALC "pe_calc" #define CRM_OP_ABORT "abort" #define CRM_OP_QUIT "quit" #define CRM_OP_LOCAL_SHUTDOWN "start_shutdown" #define CRM_OP_SHUTDOWN_REQ "req_shutdown" #define CRM_OP_SHUTDOWN "do_shutdown" #define CRM_OP_FENCE "stonith" #define CRM_OP_EVENTCC "event_cc" #define CRM_OP_TEABORT "te_abort" #define CRM_OP_TEABORTED "te_abort_confirmed" /* we asked */ #define CRM_OP_TE_HALT "te_halt" #define CRM_OP_TECOMPLETE "te_complete" #define CRM_OP_TETIMEOUT "te_timeout" #define CRM_OP_TRANSITION "transition" #define CRM_OP_REGISTER "register" #define CRM_OP_DEBUG_UP "debug_inc" #define CRM_OP_DEBUG_DOWN "debug_dec" #define CRM_OP_INVOKE_LRM "lrm_invoke" #define CRM_OP_LRM_REFRESH "lrm_refresh" #define CRM_OP_LRM_QUERY "lrm_query" #define CRM_OP_LRM_DELETE "lrm_delete" #define CRM_OP_LRM_FAIL "lrm_fail" #define CRM_OP_PROBED "probe_complete" #define CRM_OP_REPROBE "probe_again" #define CRMD_STATE_ACTIVE "member" #define CRMD_STATE_INACTIVE "down" #define CRMD_JOINSTATE_DOWN CRMD_STATE_INACTIVE #define CRMD_JOINSTATE_PENDING "pending" #define CRMD_JOINSTATE_MEMBER CRMD_STATE_ACTIVE #define CRMD_ACTION_DELETE "delete" #define CRMD_ACTION_CANCEL "cancel" #define CRMD_ACTION_MIGRATE "migrate_to" #define CRMD_ACTION_MIGRATED "migrate_from" #define CRMD_ACTION_START "start" #define CRMD_ACTION_STARTED "running" #define CRMD_ACTION_STOP "stop" #define CRMD_ACTION_STOPPED "stopped" #define CRMD_ACTION_PROMOTE "promote" #define CRMD_ACTION_PROMOTED "promoted" #define CRMD_ACTION_DEMOTE "demote" #define CRMD_ACTION_DEMOTED "demoted" #define CRMD_ACTION_NOTIFY "notify" #define CRMD_ACTION_NOTIFIED "notified" #define CRMD_ACTION_STATUS "monitor" /* short names */ #define RSC_DELETE CRMD_ACTION_DELETE #define RSC_CANCEL CRMD_ACTION_CANCEL #define RSC_MIGRATE CRMD_ACTION_MIGRATE #define RSC_MIGRATED CRMD_ACTION_MIGRATED #define RSC_START CRMD_ACTION_START #define RSC_STARTED CRMD_ACTION_STARTED #define RSC_STOP CRMD_ACTION_STOP #define RSC_STOPPED CRMD_ACTION_STOPPED #define RSC_PROMOTE CRMD_ACTION_PROMOTE #define RSC_PROMOTED CRMD_ACTION_PROMOTED #define RSC_DEMOTE CRMD_ACTION_DEMOTE #define RSC_DEMOTED CRMD_ACTION_DEMOTED #define RSC_NOTIFY CRMD_ACTION_NOTIFY #define RSC_NOTIFIED CRMD_ACTION_NOTIFIED #define RSC_STATUS CRMD_ACTION_STATUS typedef GList* GListPtr; #define slist_destroy(child_type, child, parent, a) \ { \ GListPtr __crm_iter_head = parent; \ child_type *child = NULL; \ while(__crm_iter_head != NULL) { \ child = (child_type *) __crm_iter_head->data; \ __crm_iter_head = __crm_iter_head->next; \ { a; } \ } \ g_list_free(parent); \ } #define slist_iter(child, child_type, parent, counter, a) \ { \ GListPtr __crm_iter_head = parent; \ child_type *child = NULL; \ int counter = 0; \ for(; __crm_iter_head != NULL; counter++) { \ child = (child_type *) __crm_iter_head->data; \ __crm_iter_head = __crm_iter_head->next; \ { a; } \ } \ } #define LOG_DEBUG_2 LOG_DEBUG+1 #define LOG_DEBUG_3 LOG_DEBUG+2 #define LOG_DEBUG_4 LOG_DEBUG+3 #define LOG_DEBUG_5 LOG_DEBUG+4 #define LOG_DEBUG_6 LOG_DEBUG+5 #define LOG_MSG LOG_DEBUG_3 /* * Throughout the macros below, note the leading, pre-comma, space in the * various ' , ##args' occurences to aid portability across versions of 'gcc'. * http://gcc.gnu.org/onlinedocs/cpp/Variadic-Macros.html#Variadic-Macros */ #define do_crm_log(level, fmt, args...) do { \ if(crm_log_level < (level)) { \ continue; \ } else if((level) > LOG_DEBUG) { \ cl_log(LOG_DEBUG, "debug%d: %s: " fmt, \ level-LOG_INFO, __PRETTY_FUNCTION__ , ##args); \ } else { \ cl_log(level, "%s: " fmt, \ __PRETTY_FUNCTION__ , ##args); \ } \ } while(0) #define crm_crit(fmt, args...) do_crm_log(LOG_CRIT, fmt , ##args) #define crm_err(fmt, args...) do_crm_log(LOG_ERR, fmt , ##args) #define crm_warn(fmt, args...) do_crm_log(LOG_WARNING, fmt , ##args) #define crm_notice(fmt, args...) do_crm_log(LOG_NOTICE, fmt , ##args) #define crm_info(fmt, args...) do_crm_log(LOG_INFO, fmt , ##args) #define crm_debug(fmt, args...) do_crm_log(LOG_DEBUG, fmt , ##args) #define crm_debug_2(fmt, args...) do_crm_log(LOG_DEBUG_2, fmt , ##args) #define crm_debug_3(fmt, args...) do_crm_log(LOG_DEBUG_3, fmt , ##args) #define crm_debug_4(fmt, args...) do_crm_log(LOG_DEBUG_4, fmt , ##args) #define crm_debug_5(fmt, args...) do_crm_log(LOG_DEBUG_5, fmt , ##args) #define crm_debug_6(fmt, args...) do_crm_log(LOG_DEBUG_6, fmt , ##args) extern void crm_log_message_adv( int level, const char *alt_debugfile, const HA_Message *msg); #define crm_log_xml(level, text, xml) if(crm_log_level >= (level)) { \ print_xml_formatted(level, __PRETTY_FUNCTION__, xml, text); \ } #define crm_log_xml_crit(xml, text) crm_log_xml(LOG_CRIT, text, xml) #define crm_log_xml_err(xml, text) crm_log_xml(LOG_ERR, text, xml) #define crm_log_xml_warn(xml, text) crm_log_xml(LOG_WARNING, text, xml) #define crm_log_xml_notice(xml, text) crm_log_xml(LOG_NOTICE, text, xml) #define crm_log_xml_info(xml, text) crm_log_xml(LOG_INFO, text, xml) #define crm_log_xml_debug(xml, text) crm_log_xml(LOG_DEBUG, text, xml) #define crm_log_xml_debug_2(xml, text) crm_log_xml(LOG_DEBUG_2, text, xml) #define crm_log_xml_debug_3(xml, text) crm_log_xml(LOG_DEBUG_3, text, xml) #define crm_log_xml_debug_4(xml, text) crm_log_xml(LOG_DEBUG_4, text, xml) #define crm_log_xml_debug_5(xml, text) crm_log_xml(LOG_DEBUG_5, text, xml) #define crm_str(x) (const char*)(x?x:"") #if CRM_DEV_BUILD # define crm_malloc0(malloc_obj, length) do { \ if(malloc_obj) { \ crm_err("Potential memory leak:" \ " %s at %s:%d not NULL before alloc.", \ #malloc_obj, __FILE__, __LINE__); \ } \ malloc_obj = cl_malloc(length); \ if(malloc_obj == NULL) { \ crm_err("Failed allocation of %lu bytes", (unsigned long)length); \ CRM_ASSERT(malloc_obj != NULL); \ } \ memset(malloc_obj, 0, length); \ } while(0) /* it's not a memory leak to already have an object to realloc, that's * the usual case, however if it does have a value, it must have been * allocated by the same allocator! */ # define crm_realloc(realloc_obj, length) do { \ if (realloc_obj != NULL) { \ CRM_ASSERT(cl_is_allocated(realloc_obj) == 1); \ } \ realloc_obj = cl_realloc(realloc_obj, length); \ CRM_ASSERT(realloc_obj != NULL); \ } while(0) # define crm_free(free_obj) if(free_obj) { \ CRM_ASSERT(cl_is_allocated(free_obj) == 1); \ cl_free(free_obj); \ free_obj=NULL; \ } #else # define crm_malloc0(malloc_obj, length) do { \ malloc_obj = cl_malloc(length); \ if(malloc_obj == NULL) { \ crm_err("Failed allocation of %lu bytes", (unsigned long)length); \ CRM_ASSERT(malloc_obj != NULL); \ } \ memset(malloc_obj, 0, length); \ } while(0) # define crm_realloc(realloc_obj, length) do { \ realloc_obj = cl_realloc(realloc_obj, length); \ CRM_ASSERT(realloc_obj != NULL); \ } while(0) # define crm_free(free_obj) if(free_obj) { cl_free(free_obj); free_obj=NULL; } #endif #define crm_msg_del(msg) if(msg != NULL) { ha_msg_del(msg); msg = NULL; } #define crm_strdup(str) crm_strdup_fn(str, __FILE__, __PRETTY_FUNCTION__, __LINE__) #endif diff --git a/lib/Makefile.am b/lib/Makefile.am index 386f183668..8f1f9368aa 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -1,34 +1,34 @@ # # heartbeat library directory: Linux-HA code # # Copyright (C) 2001 Alan Robertson # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in # # PILS is supposed to be an independent library # Stonith should rely only on PILS # # The rest can use clplumbing, and the plugins should be able # to use any of the libraries (nothing can be linked against them) # # ## Subdirectories... -SUBDIRS = crm +SUBDIRS = crm fencing plugins diff --git a/lib/crm/common/ais.c b/lib/crm/common/ais.c index 7fcf1a2298..2d06c3c092 100644 --- a/lib/crm/common/ais.c +++ b/lib/crm/common/ais.c @@ -1,490 +1,492 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include "stack.h" #include enum crm_ais_msg_types text2msg_type(const char *text) { int type = crm_msg_none; CRM_CHECK(text != NULL, return type); if(safe_str_eq(text, "ais")) { type = crm_msg_ais; } else if(safe_str_eq(text, "crm_plugin")) { type = crm_msg_ais; } else if(safe_str_eq(text, CRM_SYSTEM_CIB)) { type = crm_msg_cib; } else if(safe_str_eq(text, CRM_SYSTEM_CRMD)) { type = crm_msg_crmd; } else if(safe_str_eq(text, CRM_SYSTEM_DC)) { type = crm_msg_crmd; } else if(safe_str_eq(text, CRM_SYSTEM_TENGINE)) { type = crm_msg_te; } else if(safe_str_eq(text, CRM_SYSTEM_PENGINE)) { type = crm_msg_pe; } else if(safe_str_eq(text, CRM_SYSTEM_LRMD)) { type = crm_msg_lrmd; + } else if(safe_str_eq(text, CRM_SYSTEM_STONITHD)) { + type = crm_msg_stonithd; } else { crm_debug_2("Unknown message type: %s", text); } return type; } char *get_ais_data(AIS_Message *msg) { int rc = BZ_OK; char *uncompressed = NULL; unsigned int new_size = msg->size; if(msg->is_compressed == FALSE) { crm_debug_2("Returning uncompressed message data"); uncompressed = strdup(msg->data); } else { crm_debug_2("Decompressing message data"); crm_malloc0(uncompressed, new_size); rc = BZ2_bzBuffToBuffDecompress( uncompressed, &new_size, msg->data, msg->compressed_size, 1, 0); CRM_ASSERT(rc = BZ_OK); CRM_ASSERT(new_size == msg->size); } return uncompressed; } #if SUPPORT_AIS int ais_fd_sync = -1; static int ais_fd_async = -1; /* never send messages via this channel */ GFDSource *ais_source = NULL; GFDSource *ais_source_sync = NULL; struct res_overlay { mar_res_header_t header __attribute((aligned(8))); /* char buf[4096]; */ }; gboolean send_ais_text(int class, const char *data, gboolean local, const char *node, enum crm_ais_msg_types dest) { int retries = 0; static int msg_id = 0; static int local_pid = 0; int rc = SA_AIS_OK; mar_res_header_t header; AIS_Message *ais_msg = NULL; enum crm_ais_msg_types sender = text2msg_type(crm_system_name); if(local_pid == 0) { local_pid = getpid(); } CRM_CHECK(data != NULL, return FALSE); crm_malloc0(ais_msg, sizeof(AIS_Message)); ais_msg->id = msg_id++; ais_msg->header.id = class; ais_msg->host.type = dest; ais_msg->host.local = local; if(node) { ais_msg->host.size = strlen(node); memset(ais_msg->host.uname, 0, MAX_NAME); memcpy(ais_msg->host.uname, node, ais_msg->host.size); ais_msg->host.id = 0; } else { ais_msg->host.size = 0; memset(ais_msg->host.uname, 0, MAX_NAME); ais_msg->host.id = 0; } ais_msg->sender.type = sender; ais_msg->sender.pid = local_pid; ais_msg->sender.size = 0; memset(ais_msg->sender.uname, 0, MAX_NAME); ais_msg->sender.id = 0; ais_msg->size = 1 + strlen(data); if(ais_msg->size < 5120) { failback: crm_realloc(ais_msg, sizeof(AIS_Message) + ais_msg->size); memcpy(ais_msg->data, data, ais_msg->size); } else { char *compressed = NULL; char *uncompressed = crm_strdup(data); unsigned int len = (ais_msg->size * 1.1) + 600; /* recomended size */ crm_debug_5("Compressing message payload"); crm_malloc0(compressed, len); rc = BZ2_bzBuffToBuffCompress( compressed, &len, uncompressed, ais_msg->size, 3, 0, 30); crm_free(uncompressed); if(rc != BZ_OK) { crm_err("Compression failed: %d", rc); crm_free(compressed); goto failback; } crm_realloc(ais_msg, sizeof(AIS_Message) + len + 1); memcpy(ais_msg->data, compressed, len); crm_free(compressed); ais_msg->is_compressed = TRUE; ais_msg->compressed_size = len; crm_debug("Compression details: %d -> %d", ais_msg->size, ais_data_len(ais_msg)); } ais_msg->header.size = sizeof(AIS_Message) + ais_data_len(ais_msg); crm_debug("Sending%s message %d to %s.%s (data=%d, total=%d)", ais_msg->is_compressed?" compressed":"", ais_msg->id, ais_dest(&(ais_msg->host)), msg_type2text(dest), ais_data_len(ais_msg), ais_msg->header.size); retry: errno = 0; rc = saSendReceiveReply(ais_fd_sync, ais_msg, ais_msg->header.size, &header, sizeof (mar_res_header_t)); if(rc == SA_AIS_OK) { CRM_CHECK(header.error == 0, rc = header.error); } if(rc == SA_AIS_ERR_TRY_AGAIN && retries < 20) { retries++; crm_info("Peer overloaded: Re-sending message (Attempt %d of 20)", retries); mssleep(retries * 100); /* Proportional back off */ goto retry; } if(rc != SA_AIS_OK) { cl_perror("Sending message %d: FAILED (rc=%d): %s", ais_msg->id, rc, ais_error2text(rc)); ais_fd_async = -1; } else { crm_debug_4("Message %d: sent", ais_msg->id); } crm_free(ais_msg); return (rc == SA_AIS_OK); } gboolean send_ais_message(xmlNode *msg, gboolean local, const char *node, enum crm_ais_msg_types dest) { gboolean rc = TRUE; char *data = NULL; if(ais_fd_async < 0 || ais_source == NULL) { crm_err("Not connected to AIS"); return FALSE; } data = dump_xml_unformatted(msg); rc = send_ais_text(0, data, local, node, dest); crm_free(data); return rc; } void terminate_ais_connection(void) { close(ais_fd_sync); close(ais_fd_async); crm_notice("Disconnected from AIS"); /* G_main_del_fd(ais_source); */ /* G_main_del_fd(ais_source_sync); */ } static gboolean ais_dispatch(int sender, gpointer user_data) { char *data = NULL; char *uncompressed = NULL; AIS_Message *msg = NULL; SaAisErrorT rc = SA_AIS_OK; mar_res_header_t *header = NULL; static int header_len = sizeof(mar_res_header_t); gboolean (*dispatch)(AIS_Message*,char*,int) = user_data; crm_malloc0(header, header_len); errno = 0; rc = saRecvRetry(sender, header, header_len); if (rc != SA_AIS_OK) { cl_perror("Receiving message header failed: (%d) %s", rc, ais_error2text(rc)); goto bail; } else if(header->size == header_len) { crm_err("Empty message: error=%d", header->error); goto done; } else if(header->size == 0 || header->size < header_len) { crm_err("Mangled header: size=%d, header=%d, error=%d", header->size, header_len, header->error); goto done; } else if(header->error != 0) { crm_err("Header contined error: %d", header->error); } crm_debug_2("Looking for %d (%d - %d) more bytes", header->size - header_len, header->size, header_len); crm_realloc(header, header->size); /* Use a char* so we can store the remainder into an offset */ data = (char*)header; errno = 0; rc = saRecvRetry(sender, data+header_len, header->size - header_len); msg = (AIS_Message*)data; if (rc != SA_AIS_OK) { cl_perror("Receiving message body failed: (%d) %s", rc, ais_error2text(rc)); goto bail; } crm_debug_3("Got new%s message (size=%d, %d, %d)", msg->is_compressed?" compressed":"", ais_data_len(msg), msg->size, msg->compressed_size); data = msg->data; if(msg->is_compressed) { int rc = BZ_OK; unsigned int new_size = msg->size; if(check_message_sanity(msg, NULL) == FALSE) { goto badmsg; } crm_debug_5("Decompressing message data"); crm_malloc0(uncompressed, new_size); rc = BZ2_bzBuffToBuffDecompress( uncompressed, &new_size, data, msg->compressed_size, 1, 0); if(rc != BZ_OK) { crm_err("Decompression failed: %d", rc); crm_free(uncompressed); goto badmsg; } CRM_ASSERT(rc == BZ_OK); CRM_ASSERT(new_size == msg->size); data = uncompressed; } else if(check_message_sanity(msg, data) == FALSE) { goto badmsg; } else if(safe_str_eq("identify", data)) { int pid = getpid(); char *pid_s = crm_itoa(pid); send_ais_text(0, pid_s, TRUE, NULL, crm_msg_ais); crm_free(pid_s); goto done; } if(msg->header.id == crm_class_members) { xmlNode *xml = string2xml(data); if(xml != NULL) { const char *seq_s = crm_element_value(xml, "id"); unsigned long seq = crm_int_helper(seq_s, NULL); crm_info("Processing membership %ld/%s", seq, seq_s); /* crm_log_xml_debug(xml, __PRETTY_FUNCTION__); */ xml_child_iter(xml, node, crm_update_ais_node(node, seq)); crm_calculate_quorum(); } else { crm_warn("Invalid peer update: %s", data); } free_xml(xml); } if(dispatch != NULL) { dispatch(msg, data, sender); } done: crm_free(uncompressed); crm_free(msg); return TRUE; badmsg: crm_err("Invalid message (id=%d, dest=%s:%s, from=%s:%s.%d):" " min=%d, total=%d, size=%d, bz2_size=%d", msg->id, ais_dest(&(msg->host)), msg_type2text(msg->host.type), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, (int)sizeof(AIS_Message), msg->header.size, msg->size, msg->compressed_size); goto done; bail: crm_err("AIS connection failed"); return FALSE; } static void ais_destroy(gpointer user_data) { crm_err("AIS connection terminated"); ais_fd_sync = -1; exit(1); } gboolean init_ais_connection( gboolean (*dispatch)(AIS_Message*,char*,int), void (*destroy)(gpointer), char **our_uuid, char **our_uname) { int rc = SA_AIS_OK; struct utsname name; if(our_uname != NULL) { if(uname(&name) < 0) { cl_perror("uname(2) call failed"); exit(100); } *our_uname = crm_strdup(name.nodename); crm_notice("Local node name: %s", *our_uname); } if(our_uuid != NULL) { *our_uuid = crm_strdup(name.nodename); } /* 16 := CRM_SERVICE */ crm_info("Creating connection to our AIS plugin"); rc = saServiceConnect (&ais_fd_async, &ais_fd_sync, 16); if (rc != SA_AIS_OK) { crm_info("Connection to our AIS plugin failed"); return FALSE; } if(destroy == NULL) { crm_debug("Using the default destroy handler"); destroy = ais_destroy; } crm_info("AIS connection established"); #if 0 ais_source_sync = G_main_add_fd( G_PRIORITY_HIGH, ais_fd_sync, FALSE, ais_dispatch, dispatch, destroy); #endif ais_source = G_main_add_fd( G_PRIORITY_HIGH, ais_fd_async, FALSE, ais_dispatch, dispatch, destroy); return TRUE; } gboolean check_message_sanity(AIS_Message *msg, char *data) { gboolean sane = TRUE; gboolean repaired = FALSE; int dest = msg->host.type; int tmp_size = msg->header.size - sizeof(AIS_Message); if(sane && msg->header.size == 0) { crm_warn("Message with no size"); sane = FALSE; } if(sane && msg->header.error != 0) { crm_warn("Message header contains an error: %d", msg->header.error); sane = FALSE; } if(sane && ais_data_len(msg) != tmp_size) { int cur_size = ais_data_len(msg); repaired = TRUE; if(msg->is_compressed) { msg->compressed_size = tmp_size; } else { msg->size = tmp_size; } crm_warn("Repaired message payload size %d -> %d", cur_size, tmp_size); } if(sane && ais_data_len(msg) == 0) { crm_warn("Message with no payload"); sane = FALSE; } if(sane && data && msg->is_compressed == FALSE) { int str_size = strlen(data) + 1; if(ais_data_len(msg) != str_size) { int lpc = 0; crm_warn("Message payload is corrupted: expected %d bytes, got %d", ais_data_len(msg), str_size); sane = FALSE; for(lpc = (str_size - 10); lpc < msg->size; lpc++) { if(lpc < 0) { lpc = 0; } crm_debug("bad_data[%d]: %d / '%c'", lpc, data[lpc], data[lpc]); } } } if(sane == FALSE) { crm_err("Invalid message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } else if(repaired) { crm_err("Repaired message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } else { crm_debug_3("Verfied message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } return sane; } #endif diff --git a/lib/crm/pengine/native.c b/lib/crm/pengine/native.c index 28460aa788..d04807d9dd 100644 --- a/lib/crm/pengine/native.c +++ b/lib/crm/pengine/native.c @@ -1,422 +1,422 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #define DELETE_THEN_REFRESH 1 #define VARIANT_NATIVE 1 #include "./variant.h" void native_add_running(resource_t *rsc, node_t *node, pe_working_set_t *data_set) { CRM_CHECK(node != NULL, return); slist_iter( a_node, node_t, rsc->running_on, lpc, CRM_CHECK(a_node != NULL, return); if(safe_str_eq(a_node->details->id, node->details->id)) { return; } ); crm_debug_3("Adding %s to %s", rsc->id, node->details->uname); rsc->running_on = g_list_append(rsc->running_on, node); if(rsc->variant == pe_native) { node->details->running_rsc = g_list_append( node->details->running_rsc, rsc); } if(is_not_set(rsc->flags, pe_rsc_managed)) { crm_info("resource %s isnt managed", rsc->id); resource_location(rsc, node, INFINITY, "not_managed_default", data_set); return; } if(rsc->variant == pe_native && g_list_length(rsc->running_on) > 1) { const char *type = crm_element_value(rsc->xml, XML_ATTR_TYPE); const char *class = crm_element_value( rsc->xml, XML_AGENT_ATTR_CLASS); /* these are errors because hardly any gets it right * at the moment and this way the might notice */ pe_proc_err("Resource %s::%s:%s appears to be active on %d nodes.", class, type, rsc->id, g_list_length(rsc->running_on)); cl_log(LOG_ERR, "See %s for more information.", HAURL("v2/faq/resource_too_active")); if(rsc->recovery_type == recovery_stop_only) { crm_debug("Making sure %s doesn't come up again", rsc->id); /* make sure it doesnt come up again */ pe_free_shallow_adv(rsc->allowed_nodes, TRUE); rsc->allowed_nodes = node_list_dup( data_set->nodes, FALSE, FALSE); slist_iter( node, node_t, rsc->allowed_nodes, lpc, node->weight = -INFINITY; ); } else if(rsc->recovery_type == recovery_block) { clear_bit(rsc->flags, pe_rsc_managed); } } else { crm_debug_3("Resource %s is active on: %s", rsc->id, node->details->uname); } if(rsc->parent != NULL) { native_add_running(rsc->parent, node, data_set); } } gboolean native_unpack(resource_t *rsc, pe_working_set_t *data_set) { native_variant_data_t *native_data = NULL; crm_debug_3("Processing resource %s...", rsc->id); crm_malloc0(native_data, sizeof(native_variant_data_t)); rsc->allowed_nodes = NULL; rsc->running_on = NULL; rsc->variant_opaque = native_data; return TRUE; } resource_t * native_find_child(resource_t *rsc, const char *id) { if(rsc->children) { return pe_find_resource(rsc->children, id); } return NULL; } GListPtr native_children(resource_t *rsc) { return rsc->children; } static void hash_copy_field(gpointer key, gpointer value, gpointer user_data) { const char *name = key; const char *s_value = value; GHashTable *hash_copy = user_data; g_hash_table_insert(hash_copy, crm_strdup(name), crm_strdup(s_value)); } char * native_parameter( resource_t *rsc, node_t *node, gboolean create, const char *name, pe_working_set_t *data_set) { char *value_copy = NULL; const char *value = NULL; GHashTable *hash = rsc->parameters; GHashTable *local_hash = NULL; CRM_CHECK(rsc != NULL, return NULL); CRM_CHECK(name != NULL && strlen(name) != 0, return NULL); crm_debug_2("Looking up %s in %s", name, rsc->id); if(create) { if(node != NULL) { crm_debug_2("Creating hash with node %s", node->details->uname); } else { crm_debug_2("Creating default hash"); } local_hash = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); g_hash_table_foreach( rsc->parameters, hash_copy_field, local_hash); unpack_instance_attributes( rsc->xml, XML_TAG_ATTR_SETS, node?node->details->attrs:NULL, local_hash, NULL, FALSE, data_set->now); hash = local_hash; } value = g_hash_table_lookup(hash, name); if(value == NULL) { /* try meta attributes instead */ value = g_hash_table_lookup(rsc->meta, name); } if(value != NULL) { value_copy = crm_strdup(value); } if(local_hash != NULL) { g_hash_table_destroy(local_hash); } return value_copy; } gboolean native_active(resource_t *rsc, gboolean all) { slist_iter( a_node, node_t, rsc->running_on, lpc, if(a_node->details->online == FALSE) { crm_debug("Resource %s: node %s is offline", rsc->id, a_node->details->uname); } else if(a_node->details->unclean) { crm_debug("Resource %s: node %s is unclean", rsc->id, a_node->details->uname); } else { crm_debug("Resource %s active on %s", rsc->id, a_node->details->uname); return TRUE; } ); return FALSE; } struct print_data_s { long options; void *print_data; }; static void native_print_attr(gpointer key, gpointer value, gpointer user_data) { long options = ((struct print_data_s*)user_data)->options; void *print_data = ((struct print_data_s*)user_data)->print_data; status_print("Option: %s = %s\n", (char*)key, (char*)value); } void native_print( resource_t *rsc, const char *pre_text, long options, void *print_data) { node_t *node = NULL; const char *prov = NULL; const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); if(safe_str_eq(class, "ocf")) { prov = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); } if(rsc->running_on != NULL) { node = rsc->running_on->data; } if(options & pe_print_html) { if(is_not_set(rsc->flags, pe_rsc_managed)) { status_print(""); } else if(is_set(rsc->flags, pe_rsc_failed)) { status_print(""); } else if(rsc->variant == pe_native && g_list_length(rsc->running_on) == 0) { status_print(""); } else if(g_list_length(rsc->running_on) > 1) { status_print(""); } else { status_print(""); } } if((options & pe_print_rsconly) || g_list_length(rsc->running_on) > 1) { const char *desc = NULL; desc = crm_element_value(rsc->xml, XML_ATTR_DESC); status_print("%s%s\t(%s%s%s:%s%s)%s%s", pre_text?pre_text:"", rsc->id, class, prov?"::":"", prov?prov:"", crm_element_value(rsc->xml, XML_ATTR_TYPE), is_set(rsc->flags, pe_rsc_orphan)?" ORPHANED":"", desc?": ":"", desc?desc:""); } else { status_print("%s%s\t(%s%s%s:%s%s):\t%s %s%s%s", pre_text?pre_text:"", rsc->id, class, prov?"::":"", prov?prov:"", crm_element_value(rsc->xml, XML_ATTR_TYPE), is_set(rsc->flags, pe_rsc_orphan)?" ORPHANED":"", (rsc->variant!=pe_native)?"":role2text(rsc->role), (rsc->variant!=pe_native)?"":node!=NULL?node->details->uname:"", is_set(rsc->flags, pe_rsc_managed)?"":" (unmanaged)", is_set(rsc->flags, pe_rsc_failed)?" FAILED":""); #if CURSES_ENABLED if(options & pe_print_ncurses) { move(-1, 0); } #endif } if(options & pe_print_html) { status_print(" "); } if((options & pe_print_rsconly)) { } else if(g_list_length(rsc->running_on) > 1) { if(options & pe_print_html) { status_print("
    \n"); } else if((options & pe_print_printf) || (options & pe_print_ncurses)) { status_print("["); } slist_iter(node, node_t, rsc->running_on, lpc, if(options & pe_print_html) { status_print("
  • \n%s", node->details->uname); } else if((options & pe_print_printf) || (options & pe_print_ncurses)) { status_print("\t%s", node->details->uname); } else if((options & pe_print_log)) { status_print("\t%d : %s", lpc, node->details->uname); } else { status_print("%s", node->details->uname); } if(options & pe_print_html) { status_print("
  • \n"); } ); if(options & pe_print_html) { status_print("
\n"); } else if((options & pe_print_printf) || (options & pe_print_ncurses)) { status_print(" ]"); } } if(options & pe_print_html) { status_print("
\n"); } else if(options & pe_print_suppres_nl) { /* nothing */ } else if((options & pe_print_printf) || (options & pe_print_ncurses)) { status_print("\n"); } if(options & pe_print_details) { struct print_data_s pdata; pdata.options = options; pdata.print_data = print_data; g_hash_table_foreach(rsc->parameters, native_print_attr, &pdata); } if(options & pe_print_dev) { status_print("%s\t(%s%svariant=%s, priority=%f)", pre_text, is_set(rsc->flags, pe_rsc_provisional)?"provisional, ":"", is_set(rsc->flags, pe_rsc_runnable)?"":"non-startable, ", crm_element_name(rsc->xml), (double)rsc->priority); status_print("%s\tAllowed Nodes", pre_text); slist_iter(node, node_t, rsc->allowed_nodes, lpc, status_print("%s\t * %s %d", pre_text, node->details->uname, node->weight); ); } if(options & pe_print_max_details) { status_print("%s\t=== Allowed Nodes\n", pre_text); slist_iter( node, node_t, rsc->allowed_nodes, lpc, print_node("\t", node, FALSE); ); } } void native_free(resource_t *rsc) { crm_debug_4("Freeing resource action list (not the data)"); common_free(rsc); } enum rsc_role_e native_resource_state(const resource_t *rsc, gboolean current) { enum rsc_role_e role = rsc->next_role; if(current) { role = rsc->role; } - crm_debug_2("%s state: %s", rsc->id, role2text(role)); + crm_debug_4("%s state: %s", rsc->id, role2text(role)); return role; } node_t *native_location(resource_t *rsc, GListPtr *list, gboolean current) { node_t *one = NULL; GListPtr result = NULL; if(rsc->children) { slist_iter(child, resource_t, rsc->children, lpc, child->fns->location(child, &result, current); ); } else if(current && rsc->running_on) { result = g_list_copy(rsc->running_on); } else if(current == FALSE && rsc->allocated_to) { result = g_list_append(NULL, rsc->allocated_to); } if(result && g_list_length(result) == 1) { one = g_list_nth_data(result, 0); } if(list) { slist_iter(node, node_t, result, lpc, if(*list == NULL || pe_find_node_id(*list, node->details->id) == NULL) { *list = g_list_append(*list, node); } ); } g_list_free(result); return one; } diff --git a/lib/crm/pengine/unpack.c b/lib/crm/pengine/unpack.c index b63352d35f..9a6fc5781c 100644 --- a/lib/crm/pengine/unpack.c +++ b/lib/crm/pengine/unpack.c @@ -1,1455 +1,1462 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include /* for ONLINESTATUS */ #include #include #include #include gboolean unpack_config(xmlNode *config, pe_working_set_t *data_set) { const char *name = NULL; const char *value = NULL; GHashTable *config_hash = g_hash_table_new_full( g_str_hash,g_str_equal, g_hash_destroy_str,g_hash_destroy_str); data_set->config_hash = config_hash; unpack_instance_attributes( config, XML_CIB_TAG_PROPSET, NULL, config_hash, CIB_OPTIONS_FIRST, FALSE, data_set->now); #if CRM_DEPRECATED_SINCE_2_0_1 xml_child_iter_filter( config, a_child, XML_CIB_TAG_NVPAIR, name = crm_element_value(a_child, XML_NVPAIR_ATTR_NAME); value = crm_element_value(a_child, XML_NVPAIR_ATTR_VALUE); if(g_hash_table_lookup(config_hash, name) == NULL) { g_hash_table_insert( config_hash,crm_strdup(name),crm_strdup(value)); } crm_config_err("Creating directly" "beneath has been depreciated since" " 2.0.1", ID(a_child), name); ); #else xml_child_iter_filter( config, a_child, XML_CIB_TAG_NVPAIR, name = crm_element_value(a_child, XML_NVPAIR_ATTR_NAME); crm_config_err("Creating directly" "beneath has been depreciated since" " 2.0.1 and is now disabled", ID(a_child), name); ); #endif verify_pe_options(data_set->config_hash); value = pe_pref(data_set->config_hash, "default-action-timeout"); data_set->transition_idle_timeout = crm_strdup(value); crm_debug("Default action timeout: %s", data_set->transition_idle_timeout); value = pe_pref(data_set->config_hash, "default-resource-stickiness"); data_set->default_resource_stickiness = char2score(value); crm_debug("Default stickiness: %d", data_set->default_resource_stickiness); value = pe_pref(data_set->config_hash, "stop-all-resources"); data_set->stop_everything = crm_is_true(value); crm_debug("Stop all active resources: %s", data_set->stop_everything?"true":"false"); value = pe_pref(data_set->config_hash, "default-failure-timeout"); data_set->default_failure_timeout = (crm_get_msec(value) / 1000); crm_debug("Default failure timeout: %d", data_set->default_failure_timeout); value = pe_pref(data_set->config_hash, "default-migration-threshold"); data_set->default_migration_threshold = char2score(value); crm_debug("Default migration threshold: %d", data_set->default_migration_threshold); value = pe_pref(data_set->config_hash, "stonith-enabled"); cl_str_to_boolean(value, &data_set->stonith_enabled); crm_debug("STONITH of failed nodes is %s", data_set->stonith_enabled?"enabled":"disabled"); data_set->stonith_action = pe_pref(data_set->config_hash, "stonith-action"); crm_debug_2("STONITH will %s nodes", data_set->stonith_action); value = pe_pref(data_set->config_hash, "symmetric-cluster"); cl_str_to_boolean(value, &data_set->symmetric_cluster); if(data_set->symmetric_cluster) { crm_debug("Cluster is symmetric" " - resources can run anywhere by default"); } value = pe_pref(data_set->config_hash, "no-quorum-policy"); if(safe_str_eq(value, "ignore")) { data_set->no_quorum_policy = no_quorum_ignore; } else if(safe_str_eq(value, "freeze")) { data_set->no_quorum_policy = no_quorum_freeze; } else { data_set->no_quorum_policy = no_quorum_stop; } switch (data_set->no_quorum_policy) { case no_quorum_freeze: crm_debug("On loss of CCM Quorum: Freeze resources"); break; case no_quorum_stop: crm_debug("On loss of CCM Quorum: Stop ALL resources"); break; case no_quorum_ignore: crm_notice("On loss of CCM Quorum: Ignore"); break; } value = pe_pref(data_set->config_hash, "stop-orphan-resources"); cl_str_to_boolean(value, &data_set->stop_rsc_orphans); crm_debug_2("Orphan resources are %s", data_set->stop_rsc_orphans?"stopped":"ignored"); value = pe_pref(data_set->config_hash, "stop-orphan-actions"); cl_str_to_boolean(value, &data_set->stop_action_orphans); crm_debug_2("Orphan resource actions are %s", data_set->stop_action_orphans?"stopped":"ignored"); value = pe_pref(data_set->config_hash, "remove-after-stop"); cl_str_to_boolean(value, &data_set->remove_after_stop); crm_debug_2("Stopped resources are removed from the status section: %s", data_set->remove_after_stop?"true":"false"); value = pe_pref(data_set->config_hash, "is-managed-default"); cl_str_to_boolean(value, &data_set->is_managed_default); crm_debug_2("By default resources are %smanaged", data_set->is_managed_default?"":"not "); value = pe_pref(data_set->config_hash, "start-failure-is-fatal"); cl_str_to_boolean(value, &data_set->start_failure_fatal); crm_debug_2("Start failures are %s", data_set->start_failure_fatal?"always fatal":"handled by failcount"); return TRUE; } gboolean unpack_nodes(xmlNode * xml_nodes, pe_working_set_t *data_set) { node_t *new_node = NULL; const char *id = NULL; const char *uname = NULL; const char *type = NULL; gboolean unseen_are_unclean = TRUE; const char *blind_faith = pe_pref( data_set->config_hash, "startup-fencing"); if(crm_is_true(blind_faith) == FALSE) { unseen_are_unclean = FALSE; crm_warn("Blind faith: not fencing unseen nodes"); } xml_child_iter_filter( xml_nodes, xml_obj, XML_CIB_TAG_NODE, new_node = NULL; id = crm_element_value(xml_obj, XML_ATTR_ID); uname = crm_element_value(xml_obj, XML_ATTR_UNAME); type = crm_element_value(xml_obj, XML_ATTR_TYPE); crm_debug_3("Processing node %s/%s", uname, id); if(id == NULL) { crm_config_err("Must specify id tag in "); continue; } if(type == NULL) { crm_config_err("Must specify type tag in "); continue; } if(pe_find_node(data_set->nodes, uname) != NULL) { crm_config_warn("Detected multiple node entries with uname=%s" " - this is rarely intended", uname); } crm_malloc0(new_node, sizeof(node_t)); if(new_node == NULL) { return FALSE; } new_node->weight = 0; new_node->fixed = FALSE; crm_malloc0(new_node->details, sizeof(struct node_shared_s)); if(new_node->details == NULL) { crm_free(new_node); return FALSE; } crm_debug_3("Creaing node for entry %s/%s", uname, id); new_node->details->id = id; new_node->details->uname = uname; new_node->details->type = node_ping; new_node->details->online = FALSE; new_node->details->shutdown = FALSE; new_node->details->running_rsc = NULL; new_node->details->attrs = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); /* if(data_set->have_quorum == FALSE */ /* && data_set->no_quorum_policy == no_quorum_stop) { */ /* /\* start shutting resources down *\/ */ /* new_node->weight = -INFINITY; */ /* } */ if(data_set->stonith_enabled == FALSE || unseen_are_unclean == FALSE) { /* blind faith... */ new_node->details->unclean = FALSE; } else { /* all nodes are unclean until we've seen their * status entry */ new_node->details->unclean = TRUE; } if(type == NULL || safe_str_eq(type, "member") || safe_str_eq(type, NORMALNODE)) { new_node->details->type = node_member; } add_node_attrs(xml_obj, new_node, FALSE, data_set); data_set->nodes = g_list_append(data_set->nodes, new_node); crm_debug_3("Done with node %s", crm_element_value(xml_obj, XML_ATTR_UNAME)); ); return TRUE; } gboolean unpack_resources(xmlNode * xml_resources, pe_working_set_t *data_set) { xml_child_iter( xml_resources, xml_obj, resource_t *new_rsc = NULL; crm_debug_3("Begining unpack... %s", xml_obj?crm_element_name(xml_obj):""); if(common_unpack(xml_obj, &new_rsc, NULL, data_set)) { data_set->resources = g_list_append( data_set->resources, new_rsc); print_resource(LOG_DEBUG_3, "Added", new_rsc, FALSE); } else { crm_config_err("Failed unpacking %s %s", crm_element_name(xml_obj), crm_element_value(xml_obj, XML_ATTR_ID)); if(new_rsc != NULL && new_rsc->fns != NULL) { new_rsc->fns->free(new_rsc); } } ); data_set->resources = g_list_sort( data_set->resources, sort_rsc_priority); return TRUE; } /* remove nodes that are down, stopping */ /* create +ve rsc_to_node constraints between resources and the nodes they are running on */ /* anything else? */ gboolean unpack_status(xmlNode * status, pe_working_set_t *data_set) { const char *id = NULL; const char *uname = NULL; xmlNode * lrm_rsc = NULL; xmlNode * attrs = NULL; node_t *this_node = NULL; crm_debug_3("Begining unpack"); xml_child_iter_filter( status, node_state, XML_CIB_TAG_STATE, id = crm_element_value(node_state, XML_ATTR_ID); uname = crm_element_value(node_state, XML_ATTR_UNAME); attrs = find_xml_node( node_state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); lrm_rsc = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE); crm_debug_3("Processing node %s", uname); this_node = pe_find_node_id(data_set->nodes, id); if(uname == NULL) { /* error */ continue; } else if(this_node == NULL) { crm_config_warn("Node %s in status section no longer exists", uname); continue; } /* Mark the node as provisionally clean * - at least we have seen it in the current cluster's lifetime */ this_node->details->unclean = FALSE; crm_debug_3("Adding runtime node attrs"); add_node_attrs(attrs, this_node, TRUE, data_set); if(crm_is_true(g_hash_table_lookup(this_node->details->attrs, "standby"))) { crm_info("Node %s is in standby-mode", this_node->details->uname); this_node->details->standby = TRUE; } crm_debug_3("determining node state"); determine_online_status(node_state, this_node, data_set); if(this_node->details->online || data_set->stonith_enabled) { /* offline nodes run no resources... * unless stonith is enabled in which case we need to * make sure rsc start events happen after the stonith */ crm_debug_3("Processing lrm resource entries"); unpack_lrm_resources(this_node, lrm_rsc, data_set); } ); return TRUE; } static gboolean determine_online_status_no_fencing(xmlNode * node_state, node_t *this_node) { gboolean online = FALSE; const char *join_state = crm_element_value(node_state, XML_CIB_ATTR_JOINSTATE); const char *crm_state = crm_element_value(node_state, XML_CIB_ATTR_CRMDSTATE); const char *ccm_state = crm_element_value(node_state, XML_CIB_ATTR_INCCM); const char *ha_state = crm_element_value(node_state, XML_CIB_ATTR_HASTATE); const char *exp_state = crm_element_value(node_state, XML_CIB_ATTR_EXPSTATE); if(ha_state == NULL) { ha_state = DEADSTATUS; } if(!crm_is_true(ccm_state) || safe_str_eq(ha_state, DEADSTATUS)){ crm_debug_2("Node is down: ha_state=%s, ccm_state=%s", crm_str(ha_state), crm_str(ccm_state)); } else if(!crm_is_true(ccm_state) || safe_str_eq(ha_state, DEADSTATUS)) { } else if(safe_str_eq(crm_state, ONLINESTATUS)) { if(safe_str_eq(join_state, CRMD_JOINSTATE_MEMBER)) { online = TRUE; } else { crm_debug("Node is not ready to run resources: %s", join_state); } } else if(this_node->details->expected_up == FALSE) { crm_debug_2("CRMd is down: ha_state=%s, ccm_state=%s", crm_str(ha_state), crm_str(ccm_state)); crm_debug_2("\tcrm_state=%s, join_state=%s, expected=%s", crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } else { /* mark it unclean */ this_node->details->unclean = TRUE; crm_warn("Node %s is partially & un-expectedly down", this_node->details->uname); crm_info("\tha_state=%s, ccm_state=%s," " crm_state=%s, join_state=%s, expected=%s", crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } return online; } static gboolean determine_online_status_fencing(xmlNode * node_state, node_t *this_node) { gboolean online = FALSE; const char *join_state = crm_element_value(node_state, XML_CIB_ATTR_JOINSTATE); const char *crm_state = crm_element_value(node_state, XML_CIB_ATTR_CRMDSTATE); const char *ccm_state = crm_element_value(node_state, XML_CIB_ATTR_INCCM); const char *ha_state = crm_element_value(node_state, XML_CIB_ATTR_HASTATE); const char *exp_state = crm_element_value(node_state, XML_CIB_ATTR_EXPSTATE); if(ha_state == NULL) { ha_state = DEADSTATUS; } if(crm_is_true(ccm_state) && safe_str_eq(ha_state, ACTIVESTATUS) && safe_str_eq(crm_state, ONLINESTATUS)) { online = TRUE; if(safe_str_neq(join_state, CRMD_JOINSTATE_MEMBER)) { crm_info("Node %s is not ready to run resources", this_node->details->uname); this_node->details->standby = TRUE; } } else if(crm_is_true(ccm_state) == FALSE && safe_str_eq(ha_state, DEADSTATUS) && safe_str_eq(crm_state, OFFLINESTATUS) && this_node->details->expected_up == FALSE) { crm_debug("Node %s is down: join_state=%s, expected=%s", this_node->details->uname, crm_str(join_state), crm_str(exp_state)); } else if(this_node->details->expected_up) { /* mark it unclean */ this_node->details->unclean = TRUE; crm_warn("Node %s (%s) is un-expectedly down", this_node->details->uname, this_node->details->id); crm_info("\tha_state=%s, ccm_state=%s," " crm_state=%s, join_state=%s, expected=%s", crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } else { crm_info("Node %s is comming up", this_node->details->uname); crm_debug("\tha_state=%s, ccm_state=%s," " crm_state=%s, join_state=%s, expected=%s", crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } return online; } gboolean determine_online_status( xmlNode * node_state, node_t *this_node, pe_working_set_t *data_set) { gboolean online = FALSE; const char *shutdown = crm_element_value(node_state, XML_CIB_ATTR_SHUTDOWN); const char *exp_state = crm_element_value(node_state, XML_CIB_ATTR_EXPSTATE); if(this_node == NULL) { crm_config_err("No node to check"); return online; } shutdown = crm_element_value(node_state, XML_CIB_ATTR_SHUTDOWN); this_node->details->expected_up = FALSE; if(safe_str_eq(exp_state, CRMD_JOINSTATE_MEMBER)) { this_node->details->expected_up = TRUE; } this_node->details->shutdown = FALSE; if(shutdown != NULL && safe_str_neq("0", shutdown)) { this_node->details->shutdown = TRUE; this_node->details->expected_up = FALSE; } if(data_set->stonith_enabled == FALSE) { online = determine_online_status_no_fencing( node_state, this_node); } else { online = determine_online_status_fencing( node_state, this_node); } if(online) { this_node->details->online = TRUE; } else { /* remove node from contention */ this_node->fixed = TRUE; this_node->weight = -INFINITY; } if(online && this_node->details->shutdown) { /* dont run resources here */ this_node->fixed = TRUE; this_node->weight = -INFINITY; } if(this_node->details->unclean) { pe_proc_warn("Node %s is unclean", this_node->details->uname); } else if(this_node->details->online) { crm_info("Node %s is %s", this_node->details->uname, this_node->details->shutdown?"shutting down": this_node->details->standby?"standby":"online"); } else { crm_debug_2("Node %s is offline", this_node->details->uname); } return online; } #define set_char(x) last_rsc_id[lpc] = x; complete = TRUE; static char * increment_clone(char *last_rsc_id) { int lpc = 0; int len = 0; char *tmp = NULL; gboolean complete = FALSE; CRM_CHECK(last_rsc_id != NULL, return NULL); if(last_rsc_id != NULL) { len = strlen(last_rsc_id); } lpc = len-1; while(complete == FALSE && lpc > 0) { switch (last_rsc_id[lpc]) { case 0: lpc--; break; case '0': set_char('1'); break; case '1': set_char('2'); break; case '2': set_char('3'); break; case '3': set_char('4'); break; case '4': set_char('5'); break; case '5': set_char('6'); break; case '6': set_char('7'); break; case '7': set_char('8'); break; case '8': set_char('9'); break; case '9': last_rsc_id[lpc] = '0'; lpc--; break; case ':': tmp = last_rsc_id; crm_malloc0(last_rsc_id, len + 2); memcpy(last_rsc_id, tmp, len); last_rsc_id[++lpc] = '1'; last_rsc_id[len] = '0'; last_rsc_id[len+1] = 0; complete = TRUE; crm_free(tmp); break; default: crm_err("Unexpected char: %c (%d)", last_rsc_id[lpc], lpc); break; } } return last_rsc_id; } static resource_t * create_fake_resource(const char *rsc_id, xmlNode *rsc_entry, pe_working_set_t *data_set) { resource_t *rsc = NULL; xmlNode *xml_rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE); copy_in_properties(xml_rsc, rsc_entry); crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id); crm_log_xml_info(xml_rsc, "Orphan resource"); common_unpack(xml_rsc, &rsc, NULL, data_set); set_bit(rsc->flags, pe_rsc_orphan); data_set->resources = g_list_append(data_set->resources, rsc); return rsc; } static resource_t * unpack_find_resource( pe_working_set_t *data_set, node_t *node, const char *rsc_id, xmlNode *rsc_entry) { resource_t *rsc = NULL; gboolean is_duped_clone = FALSE; char *alt_rsc_id = crm_strdup(rsc_id); while(rsc == NULL) { crm_debug_3("looking for: %s", alt_rsc_id); rsc = pe_find_resource(data_set->resources, alt_rsc_id); /* no match */ if(rsc == NULL) { crm_debug_2("%s not found: %d", alt_rsc_id, is_duped_clone); if(is_duped_clone) { /* create one */ rsc = create_fake_resource(alt_rsc_id, rsc_entry, data_set); crm_info("Making sure orphan %s/%s is stopped on %s", rsc_id, rsc->id, node->details->uname); resource_location(rsc, NULL, -INFINITY, "__orphan_clone_dont_run__", data_set); } break; /* not running anywhere else */ } else if(rsc->running_on == NULL) { crm_debug_3("not active yet"); break; /* always unique */ } else if(is_set(rsc->flags, pe_rsc_unique)) { crm_debug_3("unique"); break; /* running somewhere already but we dont care * find another clone instead */ } else { crm_debug_3("find another one"); rsc = NULL; is_duped_clone = TRUE; alt_rsc_id = increment_clone(alt_rsc_id); } } crm_free(alt_rsc_id); if(rsc != NULL) { crm_free(rsc->clone_name); rsc->clone_name = NULL; if(is_duped_clone) { crm_info("Internally renamed %s on %s to %s", rsc_id, node->details->uname, rsc->id); rsc->clone_name = crm_strdup(rsc_id); } } return rsc; } static resource_t * process_orphan_resource(xmlNode *rsc_entry, node_t *node, pe_working_set_t *data_set) { resource_t *rsc = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); crm_log_xml_info(rsc_entry, "Orphan resource"); crm_config_warn("Nothing known about resource %s running on %s", rsc_id, node->details->uname); rsc = create_fake_resource(rsc_id, rsc_entry, data_set); if(data_set->stop_rsc_orphans == FALSE) { clear_bit(rsc->flags, pe_rsc_managed); } else { crm_info("Making sure orphan %s is stopped", rsc_id); print_resource(LOG_DEBUG_3, "Added orphan", rsc, FALSE); CRM_CHECK(rsc != NULL, return NULL); resource_location(rsc, NULL, -INFINITY, "__orphan_dont_run__", data_set); } return rsc; } static void process_rsc_state(resource_t *rsc, node_t *node, enum action_fail_response on_fail, xmlNode *migrate_op, pe_working_set_t *data_set) { if(on_fail == action_migrate_failure) { node_t *from = NULL; const char *uuid = NULL; uuid = crm_element_value(migrate_op, CRMD_ACTION_MIGRATED); from = pe_find_node_id(data_set->nodes, uuid); process_rsc_state(rsc, from, action_fail_recover,NULL,data_set); on_fail = action_fail_recover; } crm_debug_2("Resource %s is %s on %s", rsc->id, role2text(rsc->role), node->details->uname); /* process current state */ if(rsc->role != RSC_ROLE_UNKNOWN) { rsc->known_on = g_list_append(rsc->known_on, node); } if(rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { if(on_fail != action_fail_ignore) { set_bit(rsc->flags, pe_rsc_failed); crm_debug_2("Force stop"); } native_add_running(rsc, node, data_set); if(is_set(rsc->flags, pe_rsc_managed) && rsc->stickiness != 0) { + node_t *match = pe_find_node_id(rsc->allowed_nodes, node->details->id); + + if(match != NULL || data_set->symmetric_cluster) { resource_location(rsc, node, rsc->stickiness, "stickiness", data_set); crm_debug_2("Resource %s: preferring current location" " (node=%s, weight=%d)", rsc->id, node->details->uname, rsc->stickiness); + } else { + crm_debug("Ignoring stickiness for %s: the cluster is asymmetric and node %s is no longer explicitly allowed", + rsc->id, node->details->uname); + } } if(on_fail == action_fail_ignore) { /* nothing to do */ } else if(node->details->unclean) { stop_action(rsc, node, FALSE); } else if(on_fail == action_fail_fence) { /* treat it as if it is still running * but also mark the node as unclean */ node->details->unclean = TRUE; stop_action(rsc, node, FALSE); } else if(on_fail == action_fail_block) { /* is_managed == FALSE will prevent any * actions being sent for the resource */ clear_bit(rsc->flags, pe_rsc_managed); } else if(on_fail == action_fail_migrate) { stop_action(rsc, node, FALSE); /* make sure it comes up somewhere else * or not at all */ resource_location(rsc, node, -INFINITY, "__action_migration_auto__",data_set); } else { stop_action(rsc, node, FALSE); } } else if(rsc->clone_name) { crm_debug_2("Resetting clone_name %s for %s (stopped)", rsc->clone_name, rsc->id); crm_free(rsc->clone_name); rsc->clone_name = NULL; } else { char *key = stop_key(rsc); GListPtr possible_matches = find_actions(rsc->actions, key, node); slist_iter(stop, action_t, possible_matches, lpc, stop->optional = TRUE; ); crm_free(key); } } /* create active recurring operations as optional */ static void process_recurring(node_t *node, resource_t *rsc, int start_index, int stop_index, GListPtr sorted_op_list, pe_working_set_t *data_set) { const char *task = NULL; const char *status = NULL; crm_debug_3("%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index); slist_iter(rsc_op, xmlNode, sorted_op_list, lpc, int interval = 0; char *key = NULL; const char *id = ID(rsc_op); const char *interval_s = NULL; if(node->details->online == FALSE) { crm_debug_4("Skipping %s/%s: node is offline", rsc->id, node->details->uname); break; } else if(start_index < stop_index) { crm_debug_4("Skipping %s/%s: not active", rsc->id, node->details->uname); break; } else if(lpc <= start_index) { crm_debug_4("Skipping %s/%s: old", id, node->details->uname); continue; } interval_s = crm_element_value(rsc_op,XML_LRM_ATTR_INTERVAL); interval = crm_parse_int(interval_s, "0"); if(interval == 0) { crm_debug_4("Skipping %s/%s: non-recurring", id, node->details->uname); continue; } status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS); if(safe_str_eq(status, "-1")) { crm_debug_4("Skipping %s/%s: status", id, node->details->uname); continue; } task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); /* create the action */ key = generate_op_key(rsc->id, task, interval); crm_debug_3("Creating %s/%s", key, node->details->uname); custom_action(rsc, key, task, node, TRUE, TRUE, data_set); ); } void calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index) { const char *task = NULL; const char *status = NULL; *stop_index = -1; *start_index = -1; slist_iter( rsc_op, xmlNode, sorted_op_list, lpc, task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS); if(safe_str_eq(task, CRMD_ACTION_STOP) && safe_str_eq(status, "0")) { *stop_index = lpc; } else if(safe_str_eq(task, CRMD_ACTION_START)) { *start_index = lpc; } else if(*start_index <= *stop_index && safe_str_eq(task, CRMD_ACTION_STATUS)) { const char *rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC); if(safe_str_eq(rc, "0") || safe_str_eq(rc, "8")) { *start_index = lpc; } } ); } static void unpack_lrm_rsc_state( node_t *node, xmlNode * rsc_entry, pe_working_set_t *data_set) { int stop_index = -1; int start_index = -1; int max_call_id = -1; const char *task = NULL; const char *value = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); resource_t *rsc = NULL; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; xmlNode *migrate_op = NULL; enum action_fail_response on_fail = FALSE; enum rsc_role_e saved_role = RSC_ROLE_UNKNOWN; crm_debug_3("[%s] Processing %s on %s", crm_element_name(rsc_entry), rsc_id, node->details->uname); /* extract operations */ op_list = NULL; sorted_op_list = NULL; xml_child_iter_filter( rsc_entry, rsc_op, XML_LRM_TAG_RSC_OP, op_list = g_list_append(op_list, rsc_op); ); if(op_list == NULL) { /* if there are no operations, there is nothing to do */ return; } /* find the resource */ rsc = unpack_find_resource(data_set, node, rsc_id, rsc_entry); if(rsc == NULL) { rsc = process_orphan_resource(rsc_entry, node, data_set); } CRM_ASSERT(rsc != NULL); /* process operations */ max_call_id = -1; saved_role = rsc->role; on_fail = action_fail_ignore; rsc->role = RSC_ROLE_UNKNOWN; sorted_op_list = g_list_sort(op_list, sort_op_by_callid); slist_iter( rsc_op, xmlNode, sorted_op_list, lpc, task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); if(safe_str_eq(task, CRMD_ACTION_MIGRATED)) { migrate_op = rsc_op; } unpack_rsc_op(rsc, node, rsc_op, &max_call_id, &on_fail, data_set); ); /* create active recurring operations as optional */ calculate_active_ops(sorted_op_list, &start_index, &stop_index); process_recurring(node, rsc, start_index, stop_index, sorted_op_list, data_set); /* no need to free the contents */ g_list_free(sorted_op_list); process_rsc_state(rsc, node, on_fail, migrate_op, data_set); value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); if(value != NULL && safe_str_neq("default", value)) { enum rsc_role_e req_role = text2role(value); if(req_role != RSC_ROLE_UNKNOWN && req_role != rsc->next_role){ if(rsc->next_role != RSC_ROLE_UNKNOWN) { crm_debug("%s: Overwriting calculated next role %s" " with requested next role %s", rsc->id, role2text(rsc->next_role), role2text(req_role)); } rsc->next_role = req_role; } } if(saved_role > rsc->role) { rsc->role = saved_role; } } gboolean unpack_lrm_resources(node_t *node, xmlNode * lrm_rsc_list, pe_working_set_t *data_set) { CRM_CHECK(node != NULL, return FALSE); crm_debug_3("Unpacking resources on %s", node->details->uname); xml_child_iter_filter( lrm_rsc_list, rsc_entry, XML_LRM_TAG_RESOURCE, unpack_lrm_rsc_state(node, rsc_entry, data_set); ); return TRUE; } gboolean unpack_rsc_op(resource_t *rsc, node_t *node, xmlNode *xml_op, int *max_call_id, enum action_fail_response *on_fail, pe_working_set_t *data_set) { const char *id = NULL; const char *task = NULL; const char *magic = NULL; const char *task_id = NULL; const char *actual_rc = NULL; /* const char *target_rc = NULL; */ const char *task_status = NULL; const char *interval_s = NULL; const char *op_digest = NULL; const char *op_version = NULL; int interval = 0; int task_id_i = -1; int task_status_i = -2; int actual_rc_i = 0; action_t *action = NULL; node_t *effective_node = NULL; gboolean expired = FALSE; gboolean is_probe = FALSE; gboolean is_stop_action = FALSE; CRM_CHECK(rsc != NULL, return FALSE); CRM_CHECK(node != NULL, return FALSE); CRM_CHECK(xml_op != NULL, return FALSE); id = ID(xml_op); task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); task_id = crm_element_value(xml_op, XML_LRM_ATTR_CALLID); task_status = crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS); op_digest = crm_element_value(xml_op, XML_LRM_ATTR_OP_DIGEST); op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION); magic = crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC); CRM_CHECK(id != NULL, return FALSE); CRM_CHECK(task != NULL, return FALSE); CRM_CHECK(task_status != NULL, return FALSE); task_status_i = crm_parse_int(task_status, NULL); CRM_CHECK(task_status_i <= LRM_OP_ERROR, return FALSE); CRM_CHECK(task_status_i >= LRM_OP_PENDING, return FALSE); if(safe_str_eq(task, CRMD_ACTION_NOTIFY)) { /* safe to ignore these */ return TRUE; } if(rsc->failure_timeout > 0) { int last_run = 0; if(crm_element_value_int(xml_op, "last-run", &last_run) == 0) { /* int last_change = crm_element_value_int(xml_op, "last_rc_change"); */ time_t now = get_timet_now(data_set); if(now > (last_run + rsc->failure_timeout)) { expired = TRUE; } } } crm_debug_2("Unpacking task %s/%s (call_id=%s, status=%s) on %s (role=%s)", id, task, task_id, task_status, node->details->uname, role2text(rsc->role)); interval_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL); interval = crm_parse_int(interval_s, "0"); if(interval == 0 && safe_str_eq(task, CRMD_ACTION_STATUS)) { is_probe = TRUE; } if(safe_str_eq(task, CRMD_ACTION_STOP)) { is_stop_action = TRUE; } if(task_status_i != LRM_OP_PENDING) { task_id_i = crm_parse_int(task_id, "-1"); CRM_CHECK(task_id != NULL, return FALSE); CRM_CHECK(task_id_i >= 0, return FALSE); CRM_CHECK(task_id_i > *max_call_id, return FALSE); } if(*max_call_id < task_id_i) { *max_call_id = task_id_i; } if(node->details->unclean) { crm_debug_2("Node %s (where %s is running) is unclean." " Further action depends on the value of the stop's on-fail attribue", node->details->uname, rsc->id); } actual_rc = crm_element_value(xml_op, XML_LRM_ATTR_RC); CRM_CHECK(actual_rc != NULL, return FALSE); actual_rc_i = crm_parse_int(actual_rc, NULL); if(task_status_i == LRM_OP_NOTSUPPORTED) { actual_rc_i = EXECRA_UNIMPLEMENT_FEATURE; } if(expired && actual_rc_i != EXECRA_NOT_RUNNING && actual_rc_i != EXECRA_RUNNING_MASTER && actual_rc_i != EXECRA_OK) { crm_notice("Ignoring expired failure %s (rc=%d, magic=%s) on %s", id, actual_rc_i, magic, node->details->uname); goto done; } switch(actual_rc_i) { case EXECRA_NOT_RUNNING: if(is_probe) { /* treat these like stops */ is_stop_action = TRUE; } if(is_stop_action) { task_status_i = LRM_OP_DONE; } else { task_status_i = LRM_OP_ERROR; } break; case EXECRA_RUNNING_MASTER: if(is_probe || (rsc->role == RSC_ROLE_MASTER && safe_str_eq(task, CRMD_ACTION_STATUS))) { task_status_i = LRM_OP_DONE; } else { task_status_i = LRM_OP_ERROR; if(rsc->role != RSC_ROLE_MASTER) { /* this wil happen normally if the PE is invoked after * a resource is demoted and re-promoted but before the * 'master' monitor has been re-initiated * * The monitor will occur before the promote and appear * to be an error (the error status is be cleared by a * successful demote) */ crm_warn("%s reported %s in master mode on %s", id, rsc->id, node->details->uname); } } rsc->role = RSC_ROLE_MASTER; break; case EXECRA_FAILED_MASTER: rsc->role = RSC_ROLE_MASTER; task_status_i = LRM_OP_ERROR; break; case EXECRA_UNIMPLEMENT_FEATURE: if(interval > 0) { task_status_i = LRM_OP_ERROR; break; } /* else: fall through */ case EXECRA_INSUFFICIENT_PRIV: case EXECRA_NOT_INSTALLED: effective_node = node; /* fall through */ case EXECRA_NOT_CONFIGURED: case EXECRA_INVALID_PARAM: crm_err("Hard error - %s failed with rc=%d: Preventing %s from re-starting %s %s", id, actual_rc_i, rsc->id, effective_node?"on":"anywhere", effective_node?effective_node->details->uname:"in the cluster"); resource_location(rsc, effective_node, -INFINITY, "hard-error", data_set); if(is_probe) { /* treat these like stops */ is_stop_action = TRUE; task_status_i = LRM_OP_DONE; actual_rc_i = EXECRA_NOT_RUNNING; } else { task_status_i = LRM_OP_ERROR; } break; case EXECRA_OK: if(interval > 0 && rsc->role == RSC_ROLE_MASTER) { /* catch status ops that return 0 instead of 8 while they * are supposed to be in master mode */ task_status_i = LRM_OP_ERROR; } break; default: if(task_status_i == LRM_OP_DONE) { crm_info("Remapping %s (rc=%d) on %s to an ERROR", id, actual_rc_i, node->details->uname); task_status_i = LRM_OP_ERROR; } } if(task_status_i == LRM_OP_ERROR || task_status_i == LRM_OP_TIMEOUT || task_status_i == LRM_OP_NOTSUPPORTED) { action = custom_action(rsc, crm_strdup(id), task, NULL, TRUE, FALSE, data_set); if(expired) { crm_notice("Ignoring expired failure (calculated) %s (rc=%d, magic=%s) on %s", id, actual_rc_i, magic, node->details->uname); goto done; } else if(action->on_fail == action_fail_ignore) { crm_warn("Remapping %s (rc=%d) on %s to DONE: ignore", id, actual_rc_i, node->details->uname); task_status_i = LRM_OP_DONE; } } switch(task_status_i) { case LRM_OP_PENDING: if(safe_str_eq(task, CRMD_ACTION_START)) { set_bit(rsc->flags, pe_rsc_start_pending); rsc->role = RSC_ROLE_STARTED; } else if(safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } break; case LRM_OP_DONE: crm_debug_3("%s/%s completed on %s", rsc->id, task, node->details->uname); if(is_stop_action) { rsc->role = RSC_ROLE_STOPPED; /* clear any previous failure actions */ *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; } else if(safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } else if(safe_str_eq(task, CRMD_ACTION_DEMOTE)) { rsc->role = RSC_ROLE_SLAVE; } else if(rsc->role < RSC_ROLE_STARTED) { crm_debug_3("%s active on %s", rsc->id, node->details->uname); rsc->role = RSC_ROLE_STARTED; } break; case LRM_OP_ERROR: case LRM_OP_TIMEOUT: case LRM_OP_NOTSUPPORTED: crm_warn("Processing failed op %s on %s: %s", id, node->details->uname, op_status2text(task_status_i)); crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname); add_node_copy(data_set->failed, xml_op); if(*on_fail < action->on_fail) { *on_fail = action->on_fail; } if(is_stop_action) { resource_location( rsc, node, -INFINITY, "__stop_fail__", data_set); } else if((data_set->start_failure_fatal || compare_version("2.0", op_version) > 0) && safe_str_eq(task, CRMD_ACTION_START)) { crm_warn("Compatability handling for failed op %s on %s", id, node->details->uname); resource_location( rsc, node, -INFINITY, "__legacy_start__", data_set); } if(safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } else if(safe_str_eq(task, CRMD_ACTION_DEMOTE)) { /* * staying in role=master ends up putting the PE/TE into a loop * setting role=slave is not dangerous because no master will be * promoted until the failed resource has been fully stopped */ crm_warn("Forcing %s to stop after a failed demote action", rsc->id); rsc->next_role = RSC_ROLE_STOPPED; rsc->role = RSC_ROLE_SLAVE; } else if(rsc->role < RSC_ROLE_STARTED) { rsc->role = RSC_ROLE_STARTED; } crm_debug_2("Resource %s: role=%s, unclean=%s, on_fail=%s, fail_role=%s", rsc->id, role2text(rsc->role), node->details->unclean?"true":"false", fail2text(action->on_fail), role2text(action->fail_role)); if(action->fail_role != RSC_ROLE_STARTED && rsc->next_role < action->fail_role) { rsc->next_role = action->fail_role; } if(action->fail_role == RSC_ROLE_STOPPED) { crm_err("Making sure %s doesn't come up again", rsc->id); /* make sure it doesnt come up again */ pe_free_shallow_adv(rsc->allowed_nodes, TRUE); rsc->allowed_nodes = node_list_dup( data_set->nodes, FALSE, FALSE); slist_iter( node, node_t, rsc->allowed_nodes, lpc, node->weight = -INFINITY; ); } pe_free_action(action); action = NULL; break; case LRM_OP_CANCELLED: /* do nothing?? */ pe_err("Dont know what to do for cancelled ops yet"); break; } done: crm_debug_3("Resource %s after %s: role=%s", rsc->id, task, role2text(rsc->role)); pe_free_action(action); return TRUE; } gboolean add_node_attrs(xmlNode *xml_obj, node_t *node, gboolean overwrite, pe_working_set_t *data_set) { g_hash_table_insert(node->details->attrs, crm_strdup("#"XML_ATTR_UNAME), crm_strdup(node->details->uname)); g_hash_table_insert(node->details->attrs, crm_strdup("#"XML_ATTR_ID), crm_strdup(node->details->id)); if(safe_str_eq(node->details->id, data_set->dc_uuid)) { data_set->dc_node = node; node->details->is_dc = TRUE; g_hash_table_insert(node->details->attrs, crm_strdup("#"XML_ATTR_DC), crm_strdup(XML_BOOLEAN_TRUE)); } else { g_hash_table_insert(node->details->attrs, crm_strdup("#"XML_ATTR_DC), crm_strdup(XML_BOOLEAN_FALSE)); } unpack_instance_attributes( xml_obj, XML_TAG_ATTR_SETS, NULL, node->details->attrs, NULL, overwrite, data_set->now); return TRUE; } static GListPtr extract_operations(const char *node, const char *rsc, xmlNode *rsc_entry, gboolean active_filter) { int stop_index = -1; int start_index = -1; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; /* extract operations */ op_list = NULL; sorted_op_list = NULL; xml_child_iter_filter( rsc_entry, rsc_op, XML_LRM_TAG_RSC_OP, crm_xml_add(rsc_op, "resource", rsc); crm_xml_add(rsc_op, XML_ATTR_UNAME, node); op_list = g_list_append(op_list, rsc_op); ); if(op_list == NULL) { /* if there are no operations, there is nothing to do */ return NULL; } sorted_op_list = g_list_sort(op_list, sort_op_by_callid); /* create active recurring operations as optional */ if(active_filter == FALSE) { return sorted_op_list; } op_list = NULL; calculate_active_ops(sorted_op_list, &start_index, &stop_index); slist_iter(rsc_op, xmlNode, sorted_op_list, lpc, if(start_index < stop_index) { crm_debug_4("Skipping %s: not active", ID(rsc_entry)); break; } else if(lpc < start_index) { crm_debug_4("Skipping %s: old", ID(rsc_op)); continue; } op_list = g_list_append(op_list, rsc_op); ); g_list_free(sorted_op_list); return op_list; } GListPtr find_operations( const char *rsc, const char *node, gboolean active_filter, pe_working_set_t *data_set) { GListPtr output = NULL; GListPtr intermediate = NULL; xmlNode *tmp = NULL; xmlNode *status = find_xml_node(data_set->input, XML_CIB_TAG_STATUS, TRUE); const char *uname = NULL; node_t *this_node = NULL; xml_child_iter_filter( status, node_state, XML_CIB_TAG_STATE, uname = crm_element_value(node_state, XML_ATTR_UNAME); if(node != NULL && safe_str_neq(uname, node)) { continue; } this_node = pe_find_node(data_set->nodes, uname); CRM_CHECK(this_node != NULL, continue); determine_online_status(node_state, this_node, data_set); if(this_node->details->online || data_set->stonith_enabled) { /* offline nodes run no resources... * unless stonith is enabled in which case we need to * make sure rsc start events happen after the stonith */ tmp = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); tmp = find_xml_node(tmp, XML_LRM_TAG_RESOURCES, FALSE); xml_child_iter_filter( tmp, lrm_rsc, XML_LRM_TAG_RESOURCE, const char *rsc_id = crm_element_value(lrm_rsc, XML_ATTR_ID); if(rsc != NULL && safe_str_neq(rsc_id, rsc)) { continue; } intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter); output = g_list_concat(output, intermediate); ); } ); return output; } diff --git a/lib/crm/pengine/utils.c b/lib/crm/pengine/utils.c index 5896b7aa8f..7aa4d8c8fc 100644 --- a/lib/crm/pengine/utils.c +++ b/lib/crm/pengine/utils.c @@ -1,1302 +1,1302 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include void print_str_str(gpointer key, gpointer value, gpointer user_data); gboolean ghash_free_str_str(gpointer key, gpointer value, gpointer user_data); void unpack_operation( action_t *action, xmlNode *xml_obj, pe_working_set_t* data_set); void pe_free_shallow(GListPtr alist) { pe_free_shallow_adv(alist, TRUE); } void pe_free_shallow_adv(GListPtr alist, gboolean with_data) { GListPtr item; GListPtr item_next = alist; if(with_data == FALSE && alist != NULL) { g_list_free(alist); return; } while(item_next != NULL) { item = item_next; item_next = item_next->next; if(with_data) { /* crm_debug_5("freeing %p", item->data); */ crm_free(item->data); } item->data = NULL; item->next = NULL; g_list_free_1(item); } } node_t * node_copy(node_t *this_node) { node_t *new_node = NULL; CRM_CHECK(this_node != NULL, return NULL); crm_malloc0(new_node, sizeof(node_t)); CRM_ASSERT(new_node != NULL); crm_debug_5("Copying %p (%s) to %p", this_node, this_node->details->uname, new_node); new_node->weight = this_node->weight; new_node->fixed = this_node->fixed; new_node->details = this_node->details; return new_node; } /* are the contents of list1 and list2 equal * nodes with weight < 0 are ignored if filter == TRUE * * slow but linear * */ gboolean node_list_eq(GListPtr list1, GListPtr list2, gboolean filter) { node_t *other_node; GListPtr lhs = list1; GListPtr rhs = list2; slist_iter( node, node_t, lhs, lpc, if(node == NULL || (filter && node->weight < 0)) { continue; } other_node = (node_t*) pe_find_node_id(rhs, node->details->id); if(other_node == NULL || other_node->weight < 0) { return FALSE; } ); lhs = list2; rhs = list1; slist_iter( node, node_t, lhs, lpc, if(node == NULL || (filter && node->weight < 0)) { continue; } other_node = (node_t*) pe_find_node_id(rhs, node->details->id); if(other_node == NULL || other_node->weight < 0) { return FALSE; } ); return TRUE; } /* any node in list1 or list2 and not in the other gets a score of -INFINITY */ GListPtr node_list_exclude(GListPtr list1, GListPtr list2) { node_t *other_node = NULL; GListPtr result = NULL; result = node_list_dup(list1, FALSE, FALSE); slist_iter( node, node_t, result, lpc, other_node = pe_find_node_id(list2, node->details->id); if(other_node == NULL) { node->weight = -INFINITY; } else { node->weight = merge_weights(node->weight, other_node->weight); } ); slist_iter( node, node_t, list2, lpc, other_node = pe_find_node_id(result, node->details->id); if(other_node == NULL) { node_t *new_node = node_copy(node); new_node->weight = -INFINITY; result = g_list_append(result, new_node); } ); return result; } /* the intersection of list1 and list2 */ GListPtr node_list_and(GListPtr list1, GListPtr list2, gboolean filter) { GListPtr result = NULL; unsigned lpc = 0; for(lpc = 0; lpc < g_list_length(list1); lpc++) { node_t *node = (node_t*)g_list_nth_data(list1, lpc); node_t *other_node = pe_find_node_id(list2, node->details->id); node_t *new_node = NULL; if(other_node != NULL) { new_node = node_copy(node); } if(new_node != NULL) { crm_debug_4("%s: %d + %d", node->details->uname, other_node->weight, new_node->weight); new_node->weight = merge_weights( new_node->weight, other_node->weight); crm_debug_3("New node weight for %s: %d", new_node->details->uname, new_node->weight); if(filter && new_node->weight < 0) { crm_free(new_node); new_node = NULL; } } if(new_node != NULL) { result = g_list_append(result, new_node); } } return result; } /* list1 - list2 */ GListPtr node_list_minus(GListPtr list1, GListPtr list2, gboolean filter) { GListPtr result = NULL; slist_iter( node, node_t, list1, lpc, node_t *other_node = pe_find_node_id(list2, node->details->id); node_t *new_node = NULL; if(node == NULL || other_node != NULL || (filter && node->weight < 0)) { continue; } new_node = node_copy(node); result = g_list_append(result, new_node); ); crm_debug_3("Minus result len: %d", g_list_length(result)); return result; } /* list1 + list2 - (intersection of list1 and list2) */ GListPtr node_list_xor(GListPtr list1, GListPtr list2, gboolean filter) { GListPtr result = NULL; slist_iter( node, node_t, list1, lpc, node_t *new_node = NULL; node_t *other_node = (node_t*) pe_find_node_id(list2, node->details->id); if(node == NULL || other_node != NULL || (filter && node->weight < 0)) { continue; } new_node = node_copy(node); result = g_list_append(result, new_node); ); slist_iter( node, node_t, list2, lpc, node_t *new_node = NULL; node_t *other_node = (node_t*) pe_find_node_id(list1, node->details->id); if(node == NULL || other_node != NULL || (filter && node->weight < 0)) { continue; } new_node = node_copy(node); result = g_list_append(result, new_node); ); crm_debug_3("Xor result len: %d", g_list_length(result)); return result; } GListPtr node_list_or(GListPtr list1, GListPtr list2, gboolean filter) { node_t *other_node = NULL; GListPtr result = NULL; gboolean needs_filter = FALSE; result = node_list_dup(list1, FALSE, filter); slist_iter( node, node_t, list2, lpc, if(node == NULL) { continue; } other_node = (node_t*)pe_find_node_id( result, node->details->id); if(other_node != NULL) { crm_debug_4("%s + %s: %d + %d", node->details->uname, other_node->details->uname, node->weight, other_node->weight); other_node->weight = merge_weights( other_node->weight, node->weight); if(filter && node->weight < 0) { needs_filter = TRUE; } } else if(filter == FALSE || node->weight >= 0) { node_t *new_node = node_copy(node); result = g_list_append(result, new_node); } ); /* not the neatest way, but the most expedient for now */ if(filter && needs_filter) { GListPtr old_result = result; result = node_list_dup(old_result, FALSE, filter); pe_free_shallow_adv(old_result, TRUE); } return result; } GListPtr node_list_dup(GListPtr list1, gboolean reset, gboolean filter) { GListPtr result = NULL; slist_iter( this_node, node_t, list1, lpc, node_t *new_node = NULL; if(filter && this_node->weight < 0) { continue; } new_node = node_copy(this_node); if(reset) { new_node->weight = 0; } if(new_node != NULL) { result = g_list_append(result, new_node); } ); return result; } void dump_node_scores(int level, resource_t *rsc, const char *comment, GListPtr nodes) { GListPtr list = nodes; if(rsc) { list = rsc->allowed_nodes; } slist_iter( node, node_t, list, lpc, if(rsc) { do_crm_log(level, "%s: %s allocation score on %s: %d", comment, rsc->id, node->details->uname, node->weight); } else { do_crm_log(level, "%s: %s = %d", comment, node->details->uname, node->weight); } ); if(rsc && rsc->children) { slist_iter( child, resource_t, rsc->children, lpc, dump_node_scores(level, child, comment, nodes); ); } } gint sort_rsc_index(gconstpointer a, gconstpointer b) { const resource_t *resource1 = (const resource_t*)a; const resource_t *resource2 = (const resource_t*)b; if(a == NULL && b == NULL) { return 0; } if(a == NULL) { return 1; } if(b == NULL) { return -1; } if(resource1->sort_index > resource2->sort_index) { return -1; } if(resource1->sort_index < resource2->sort_index) { return 1; } return 0; } gint sort_rsc_priority(gconstpointer a, gconstpointer b) { const resource_t *resource1 = (const resource_t*)a; const resource_t *resource2 = (const resource_t*)b; if(a == NULL && b == NULL) { return 0; } if(a == NULL) { return 1; } if(b == NULL) { return -1; } if(resource1->priority > resource2->priority) { return -1; } if(resource1->priority < resource2->priority) { return 1; } return 0; } action_t * custom_action(resource_t *rsc, char *key, const char *task, node_t *on_node, gboolean optional, gboolean save_action, pe_working_set_t *data_set) { action_t *action = NULL; GListPtr possible_matches = NULL; CRM_CHECK(key != NULL, return NULL); CRM_CHECK(task != NULL, return NULL); if(save_action && rsc != NULL) { possible_matches = find_actions(rsc->actions, key, on_node); } if(possible_matches != NULL) { crm_free(key); if(g_list_length(possible_matches) > 1) { pe_warn("Action %s for %s on %s exists %d times", task, rsc?rsc->id:"", on_node?on_node->details->uname:"", g_list_length(possible_matches)); } action = g_list_nth_data(possible_matches, 0); crm_debug_4("Found existing action (%d) %s for %s on %s", action->id, task, rsc?rsc->id:"", on_node?on_node->details->uname:""); g_list_free(possible_matches); } if(action == NULL) { if(save_action) { - crm_debug_2("Creating%s action %d: %s for %s on %s", + crm_debug_4("Creating%s action %d: %s for %s on %s", optional?"":" manditory", data_set->action_id, key, rsc?rsc->id:"", on_node?on_node->details->uname:""); } crm_malloc0(action, sizeof(action_t)); if(save_action) { action->id = data_set->action_id++; } else { action->id = 0; } action->rsc = rsc; CRM_ASSERT(task != NULL); action->task = crm_strdup(task); action->node = on_node; action->uuid = key; action->actions_before = NULL; action->actions_after = NULL; action->failure_is_fatal = TRUE; action->pseudo = FALSE; action->dumped = FALSE; action->runnable = TRUE; action->processed = FALSE; action->optional = optional; action->seen_count = 0; action->extra = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); action->meta = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if(save_action) { data_set->actions = g_list_append( data_set->actions, action); } if(rsc != NULL) { action->op_entry = find_rsc_op_entry(rsc, key); unpack_operation( action, action->op_entry, data_set); if(save_action) { rsc->actions = g_list_append( rsc->actions, action); } } if(save_action) { crm_debug_4("Action %d created", action->id); } } if(optional == FALSE && action->optional) { crm_debug_2("Action %d (%s) marked manditory", action->id, action->uuid); action->optional = FALSE; } if(rsc != NULL) { enum action_tasks a_task = text2task(action->task); int warn_level = LOG_DEBUG_3; if(save_action) { warn_level = LOG_WARNING; } if(action->node != NULL && action->op_entry != NULL) { unpack_instance_attributes( action->op_entry, XML_TAG_ATTR_SETS, action->node->details->attrs, action->extra, NULL, FALSE, data_set->now); } if(action->pseudo) { /* leave untouched */ } else if(action->node == NULL) { action->runnable = FALSE; } else if(is_not_set(rsc->flags, pe_rsc_managed)) { do_crm_log(warn_level, "Action %s (unmanaged)", action->uuid); action->optional = TRUE; /* action->runnable = FALSE; */ } else if(action->node->details->online == FALSE) { action->runnable = FALSE; do_crm_log(warn_level, "Action %s on %s is unrunnable (offline)", action->uuid, action->node->details->uname); if(is_set(action->rsc->flags, pe_rsc_managed) && save_action && a_task == stop_rsc) { do_crm_log(warn_level, "Marking node %s unclean", action->node->details->uname); action->node->details->unclean = TRUE; } } else if(action->needs == rsc_req_nothing) { crm_debug_3("Action %s doesnt require anything", action->uuid); action->runnable = TRUE; #if 0 /* * No point checking this * - if we dont have quorum we cant stonith anyway */ } else if(action->needs == rsc_req_stonith) { crm_debug_3("Action %s requires only stonith", action->uuid); action->runnable = TRUE; #endif } else if(data_set->have_quorum == FALSE && data_set->no_quorum_policy == no_quorum_stop) { action->runnable = FALSE; crm_debug("%s\t%s (cancelled : quorum)", action->node->details->uname, action->uuid); } else if(data_set->have_quorum == FALSE && data_set->no_quorum_policy == no_quorum_freeze) { crm_debug_3("Check resource is already active"); if(rsc->fns->active(rsc, TRUE) == FALSE) { action->runnable = FALSE; crm_debug("%s\t%s (cancelled : quorum freeze)", action->node->details->uname, action->uuid); } } else { crm_debug_3("Action %s is runnable", action->uuid); action->runnable = TRUE; } if(save_action) { switch(a_task) { case stop_rsc: set_bit(rsc->flags, pe_rsc_stopping); break; case start_rsc: clear_bit(rsc->flags, pe_rsc_starting); if(action->runnable) { set_bit(rsc->flags, pe_rsc_starting); } break; default: break; } } } return action; } void unpack_operation( action_t *action, xmlNode *xml_obj, pe_working_set_t* data_set) { int value_i = 0; int start_delay = 0; char *value_ms = NULL; const char *class = NULL; const char *value = NULL; const char *field = NULL; CRM_CHECK(action->rsc != NULL, return); class = g_hash_table_lookup(action->rsc->meta, "class"); if(xml_obj != NULL) { value = crm_element_value(xml_obj, "prereq"); } if(value == NULL && safe_str_neq(action->task, CRMD_ACTION_START)) { /* todo: integrate stop as an option? */ action->needs = rsc_req_nothing; value = "nothing (default)"; } else if(safe_str_eq(value, "nothing")) { action->needs = rsc_req_nothing; } else if(safe_str_eq(value, "quorum")) { action->needs = rsc_req_quorum; } else if(safe_str_eq(value, "fencing")) { action->needs = rsc_req_stonith; } else if(data_set->no_quorum_policy == no_quorum_ignore || safe_str_eq(class, "stonith")) { action->needs = rsc_req_nothing; value = "nothing (default)"; } else if(data_set->no_quorum_policy == no_quorum_freeze && data_set->stonith_enabled) { action->needs = rsc_req_stonith; value = "fencing (default)"; } else { action->needs = rsc_req_quorum; value = "quorum (default)"; } if(safe_str_eq(class, "stonith")) { if(action->needs == rsc_req_stonith) { crm_config_err("Stonith resources (eg. %s) cannot require" " fencing to start", action->rsc->id); } action->needs = rsc_req_nothing; value = "nothing (fencing override)"; } crm_debug_3("\tAction %s requires: %s", action->task, value); value = NULL; if(xml_obj != NULL) { value = crm_element_value(xml_obj, XML_OP_ATTR_ON_FAIL); } if(value == NULL) { } else if(safe_str_eq(value, "block")) { action->on_fail = action_fail_block; } else if(safe_str_eq(value, "fence")) { action->on_fail = action_fail_fence; value = "node fencing"; if(data_set->stonith_enabled == FALSE) { crm_config_err("Specifying on_fail=fence and" " stonith-enabled=false makes no sense"); action->on_fail = action_fail_stop; action->fail_role = RSC_ROLE_STOPPED; value = "stop resource"; } } else if(safe_str_eq(value, "ignore") || safe_str_eq(value, "nothing")) { action->on_fail = action_fail_ignore; value = "ignore"; } else if(safe_str_eq(value, "migrate")) { action->on_fail = action_fail_migrate; value = "force migration"; } else if(safe_str_eq(value, "stop")) { action->on_fail = action_fail_stop; action->fail_role = RSC_ROLE_STOPPED; value = "stop resource"; } else if(safe_str_eq(value, "restart")) { action->on_fail = action_fail_recover; value = "restart (and possibly migrate)"; } else { pe_err("Resource %s: Unknown failure type (%s)", action->rsc->id, value); value = NULL; } /* defaults */ if(value == NULL && safe_str_eq(action->task, CRMD_ACTION_STOP)) { if(data_set->stonith_enabled) { action->on_fail = action_fail_fence; value = "resource fence (default)"; } else { action->on_fail = action_fail_block; value = "resource block (default)"; } } else if(value == NULL && safe_str_eq(action->task, CRMD_ACTION_MIGRATED)) { action->on_fail = action_migrate_failure; value = "atomic migration recovery (default)"; } else if(value == NULL) { action->on_fail = action_fail_recover; value = "restart (and possibly migrate) (default)"; } crm_debug_3("\t%s failure handling: %s", action->task, value); value = NULL; if(xml_obj != NULL) { value = crm_element_value(xml_obj, "role_after_failure"); } if(value != NULL && action->fail_role == RSC_ROLE_UNKNOWN) { action->fail_role = text2role(value); } /* defaults */ if(action->fail_role == RSC_ROLE_UNKNOWN) { if(safe_str_eq(action->task, CRMD_ACTION_PROMOTE)) { action->fail_role = RSC_ROLE_SLAVE; } else { action->fail_role = RSC_ROLE_STARTED; } } crm_debug_3("\t%s failure results in: %s", action->task, role2text(action->fail_role)); if(xml_obj != NULL) { xml_prop_iter(xml_obj, p_name, p_value, if(p_value != NULL) { g_hash_table_insert(action->meta, crm_strdup(p_name), crm_strdup(p_value)); } ); g_hash_table_remove(action->meta, "id"); unpack_instance_attributes(xml_obj, XML_TAG_META_SETS, NULL, action->meta, NULL, FALSE, data_set->now); unpack_instance_attributes(xml_obj, XML_TAG_ATTR_SETS, NULL, action->meta, NULL, FALSE, data_set->now); } field = XML_LRM_ATTR_INTERVAL; value = g_hash_table_lookup(action->meta, field); if(value != NULL) { value_i = crm_get_msec(value); CRM_CHECK(value_i >= 0, value_i = 0); value_ms = crm_itoa(value_i); if(value_i > 0) { g_hash_table_replace(action->meta, crm_strdup(field), value_ms); } else { g_hash_table_remove(action->meta, field); } } field = XML_OP_ATTR_START_DELAY; value = g_hash_table_lookup(action->meta, field); if(value != NULL) { value_i = crm_get_msec(value); if(value_i < 0) { value_i = 0; } start_delay = value_i; value_ms = crm_itoa(value_i); g_hash_table_replace(action->meta, crm_strdup(field), value_ms); } field = XML_ATTR_TIMEOUT; value = g_hash_table_lookup(action->meta, field); if(value == NULL) { value = pe_pref( data_set->config_hash, "default-action-timeout"); } value_i = crm_get_msec(value); if(value_i < 0) { value_i = 0; } value_i += start_delay; value_ms = crm_itoa(value_i); g_hash_table_replace(action->meta, crm_strdup(field), value_ms); } xmlNode * find_rsc_op_entry(resource_t *rsc, const char *key) { int number = 0; const char *name = NULL; const char *value = NULL; const char *interval = NULL; char *match_key = NULL; xmlNode *op = NULL; xml_child_iter_filter( rsc->ops_xml, operation, "op", name = crm_element_value(operation, "name"); interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); value = crm_element_value(operation, "disabled"); if(crm_is_true(value)) { crm_debug_2("%s disabled", ID(operation)); continue; } number = crm_get_msec(interval); if(number < 0) { continue; } match_key = generate_op_key(rsc->id, name, number); if(safe_str_eq(key, match_key)) { op = operation; } crm_free(match_key); if(op != NULL) { return op; } ); crm_debug_3("No match for %s", key); return op; } void print_node(const char *pre_text, node_t *node, gboolean details) { if(node == NULL) { crm_debug_4("%s%s: ", pre_text==NULL?"":pre_text, pre_text==NULL?"":": "); return; } crm_debug_4("%s%s%sNode %s: (weight=%d, fixed=%s)", pre_text==NULL?"":pre_text, pre_text==NULL?"":": ", node->details==NULL?"error ":node->details->online?"":"Unavailable/Unclean ", node->details->uname, node->weight, node->fixed?"True":"False"); if(details && node != NULL && node->details != NULL) { char *pe_mutable = crm_strdup("\t\t"); crm_debug_4("\t\t===Node Attributes"); g_hash_table_foreach(node->details->attrs, print_str_str, pe_mutable); crm_free(pe_mutable); crm_debug_4("\t\t=== Resources"); slist_iter( rsc, resource_t, node->details->running_rsc, lpc, print_resource(LOG_DEBUG_4, "\t\t", rsc, FALSE); ); } } /* * Used by the HashTable for-loop */ void print_str_str(gpointer key, gpointer value, gpointer user_data) { crm_debug_4("%s%s %s ==> %s", user_data==NULL?"":(char*)user_data, user_data==NULL?"":": ", (char*)key, (char*)value); } void print_resource( int log_level, const char *pre_text, resource_t *rsc, gboolean details) { long options = pe_print_log; if(rsc == NULL) { do_crm_log(log_level-1, "%s%s: ", pre_text==NULL?"":pre_text, pre_text==NULL?"":": "); return; } if(details) { options |= pe_print_details; } rsc->fns->print(rsc, pre_text, options, &log_level); } void pe_free_action(action_t *action) { if(action == NULL) { return; } pe_free_shallow(action->actions_before);/* action_warpper_t* */ pe_free_shallow(action->actions_after); /* action_warpper_t* */ g_hash_table_destroy(action->extra); g_hash_table_destroy(action->meta); crm_free(action->task); crm_free(action->uuid); crm_free(action); } GListPtr find_recurring_actions(GListPtr input, node_t *not_on_node) { const char *value = NULL; GListPtr result = NULL; CRM_CHECK(input != NULL, return NULL); slist_iter( action, action_t, input, lpc, value = g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL); if(value == NULL) { /* skip */ } else if(safe_str_eq(value, "0")) { /* skip */ } else if(safe_str_eq(CRMD_ACTION_CANCEL, action->task)) { /* skip */ } else if(not_on_node == NULL) { crm_debug_5("(null) Found: %s", action->uuid); result = g_list_append(result, action); } else if(action->node == NULL) { /* skip */ } else if(action->node->details != not_on_node->details) { crm_debug_5("Found: %s", action->uuid); result = g_list_append(result, action); } ); return result; } GListPtr find_actions(GListPtr input, const char *key, node_t *on_node) { GListPtr result = NULL; CRM_CHECK(key != NULL, return NULL); slist_iter( action, action_t, input, lpc, crm_debug_5("Matching %s against %s", key, action->uuid); if(safe_str_neq(key, action->uuid)) { continue; } else if(on_node == NULL) { result = g_list_append(result, action); } else if(action->node == NULL) { /* skip */ crm_debug_2("While looking for %s action on %s, " "found an unallocated one. Assigning" " it to the requested node...", key, on_node->details->uname); action->node = on_node; result = g_list_append(result, action); } else if(safe_str_eq(on_node->details->id, action->node->details->id)) { result = g_list_append(result, action); } ); return result; } GListPtr find_actions_exact(GListPtr input, const char *key, node_t *on_node) { GListPtr result = NULL; CRM_CHECK(key != NULL, return NULL); slist_iter( action, action_t, input, lpc, crm_debug_5("Matching %s against %s", key, action->uuid); if(safe_str_neq(key, action->uuid)) { crm_debug_3("Key mismatch: %s vs. %s", key, action->uuid); continue; } else if(on_node == NULL || action->node == NULL) { crm_debug_3("on_node=%p, action->node=%p", on_node, action->node); continue; } else if(safe_str_eq(on_node->details->id, action->node->details->id)) { result = g_list_append(result, action); } crm_debug_2("Node mismatch: %s vs. %s", on_node->details->id, action->node->details->id); ); return result; } void set_id(xmlNode * xml_obj, const char *prefix, int child) { int id_len = 0; gboolean use_prefix = TRUE; gboolean use_child = TRUE; char *new_id = NULL; const char *id = crm_element_value(xml_obj, XML_ATTR_ID); id_len = 1 + strlen(id); if(child > 999) { pe_err("Are you insane?!?" " The CRM does not support > 1000 children per resource"); return; } else if(child < 0) { use_child = FALSE; } else { id_len += 4; /* child */ } if(prefix == NULL || safe_str_eq(id, prefix)) { use_prefix = FALSE; } else { id_len += (1 + strlen(prefix)); } crm_malloc0(new_id, id_len); if(use_child) { snprintf(new_id, id_len, "%s%s%s:%d", use_prefix?prefix:"", use_prefix?":":"", id, child); } else { snprintf(new_id, id_len, "%s%s%s", use_prefix?prefix:"", use_prefix?":":"", id); } crm_xml_add(xml_obj, XML_ATTR_ID, new_id); crm_free(new_id); } static void resource_node_score(resource_t *rsc, node_t *node, int score, const char *tag) { node_t *match = NULL; if(rsc->children) { slist_iter( child_rsc, resource_t, rsc->children, lpc, resource_node_score(child_rsc, node, score, tag); ); } crm_debug_2("Setting %s for %s on %s: %d", tag, rsc->id, node->details->uname, score); match = pe_find_node_id(rsc->allowed_nodes, node->details->id); if(match == NULL) { match = node_copy(node); match->weight = 0; rsc->allowed_nodes = g_list_append(rsc->allowed_nodes, match); } match->weight = merge_weights(match->weight, score); } void resource_location(resource_t *rsc, node_t *node, int score, const char *tag, pe_working_set_t *data_set) { if(node != NULL) { resource_node_score(rsc, node, score, tag); } else if(data_set != NULL) { slist_iter( node, node_t, data_set->nodes, lpc, resource_node_score(rsc, node, score, tag); ); } else { slist_iter( node, node_t, rsc->allowed_nodes, lpc, resource_node_score(rsc, node, score, tag); ); } if(node == NULL && score == -INFINITY) { if(rsc->allocated_to) { crm_info("Deallocating %s from %s", rsc->id, rsc->allocated_to->details->uname); crm_free(rsc->allocated_to); rsc->allocated_to = NULL; } } } #define sort_return(an_int) crm_free(a_uuid); crm_free(b_uuid); return an_int gint sort_op_by_callid(gconstpointer a, gconstpointer b) { char *a_uuid = NULL; char *b_uuid = NULL; const xmlNode *xml_a = a; const xmlNode *xml_b = b; const char *a_xml_id = crm_element_value_const(xml_a, XML_ATTR_ID); const char *b_xml_id = crm_element_value_const(xml_b, XML_ATTR_ID); const char *a_task_id = crm_element_value_const(xml_a, XML_LRM_ATTR_CALLID); const char *b_task_id = crm_element_value_const(xml_b, XML_LRM_ATTR_CALLID); const char *a_key = crm_element_value_const(xml_a, XML_ATTR_TRANSITION_MAGIC); const char *b_key = crm_element_value_const(xml_b, XML_ATTR_TRANSITION_MAGIC); int dummy = -1; int a_id = -1; int b_id = -1; int a_rc = -1; int b_rc = -1; int a_status = -1; int b_status = -1; int a_call_id = -1; int b_call_id = -1; if(safe_str_eq(a_xml_id, b_xml_id)) { /* We have duplicate lrm_rsc_op entries in the status * section which is unliklely to be a good thing * - we can handle it easily enough, but we need to get * to the bottom of why its happening. */ pe_err("Duplicate lrm_rsc_op entries named %s", a_xml_id); sort_return(0); } CRM_CHECK(a_task_id != NULL && b_task_id != NULL, crm_err("a: %s, b: %s", crm_str(a_xml_id), crm_str(b_xml_id)); sort_return(0)); a_call_id = crm_parse_int(a_task_id, NULL); b_call_id = crm_parse_int(b_task_id, NULL); if(a_call_id == -1 && b_call_id == -1) { /* both are pending ops so it doesnt matter since * stops are never pending */ sort_return(0); } else if(a_call_id >= 0 && a_call_id < b_call_id) { crm_debug_4("%s (%d) < %s (%d) : call id", a_xml_id, a_call_id, b_xml_id, b_call_id); sort_return(-1); } else if(b_call_id >= 0 && a_call_id > b_call_id) { crm_debug_4("%s (%d) > %s (%d) : call id", a_xml_id, a_call_id, b_xml_id, b_call_id); sort_return(1); } crm_debug_5("%s (%d) == %s (%d) : continuing", a_xml_id, a_call_id, b_xml_id, b_call_id); /* now process pending ops */ CRM_CHECK(a_key != NULL && b_key != NULL, sort_return(0)); CRM_CHECK(decode_transition_magic( a_key, &a_uuid, &a_id, &dummy, &a_status, &a_rc, &dummy), sort_return(0)); CRM_CHECK(decode_transition_magic( b_key, &b_uuid, &b_id, &dummy, &b_status, &b_rc, &dummy), sort_return(0)); /* try and determin the relative age of the operation... * some pending operations (ie. a start) may have been supuerceeded * by a subsequent stop * * [a|b]_id == -1 means its a shutdown operation and _always_ comes last */ if(safe_str_neq(a_uuid, b_uuid) || a_id == b_id) { /* * some of the logic in here may be redundant... * * if the UUID from the TE doesnt match then one better * be a pending operation. * pending operations dont survive between elections and joins * because we query the LRM directly */ CRM_CHECK(a_call_id == -1 || b_call_id == -1, crm_err("a: %s=%d, b: %s=%d", crm_str(a_xml_id), a_call_id, crm_str(b_xml_id), b_call_id); sort_return(0)); CRM_CHECK(a_call_id >= 0 || b_call_id >= 0, sort_return(0)); if(b_call_id == -1) { crm_debug_2("%s (%d) < %s (%d) : transition + call id", a_xml_id, a_call_id, b_xml_id, b_call_id); sort_return(-1); } if(a_call_id == -1) { crm_debug_2("%s (%d) > %s (%d) : transition + call id", a_xml_id, a_call_id, b_xml_id, b_call_id); sort_return(1); } } else if((a_id >= 0 && a_id < b_id) || b_id == -1) { crm_debug_3("%s (%d) < %s (%d) : transition", a_xml_id, a_id, b_xml_id, b_id); sort_return(-1); } else if((b_id >= 0 && a_id > b_id) || a_id == -1) { crm_debug_3("%s (%d) > %s (%d) : transition", a_xml_id, a_id, b_xml_id, b_id); sort_return(1); } /* we should never end up here */ crm_err("%s (%d:%d:%s) ?? %s (%d:%d:%s) : default", a_xml_id, a_call_id, a_id, a_uuid, b_xml_id, b_call_id, b_id, b_uuid); CRM_CHECK(FALSE, sort_return(0)); } time_t get_timet_now(pe_working_set_t *data_set) { time_t now = 0; if(data_set && data_set->now) { now = data_set->now->tm_now; } if(now == 0) { /* eventually we should convert data_set->now into time_tm * for now, its only triggered by PE regression tests */ now = time(NULL); crm_crit("Defaulting to 'now'"); if(data_set && data_set->now) { data_set->now->tm_now = now; } } return now; } int get_failcount(node_t *node, resource_t *rsc, int *last_failure, pe_working_set_t *data_set) { int last = 0; int fail_count = 0; resource_t *failed = rsc; char *fail_attr = crm_concat("fail-count", rsc->id, '-'); const char *value = g_hash_table_lookup(node->details->attrs, fail_attr); if(is_not_set(rsc->flags, pe_rsc_unique)) { failed = uber_parent(rsc); } if(value != NULL) { fail_count = char2score(value); crm_info("%s has failed %d times on %s", rsc->id, fail_count, node->details->uname); } crm_free(fail_attr); fail_attr = crm_concat("last-failure", rsc->id, '-'); value = g_hash_table_lookup(node->details->attrs, fail_attr); if(value != NULL && rsc->failure_timeout) { last = crm_parse_int(value, NULL); if(last_failure) { *last_failure = last; } if(last > 0) { time_t now = get_timet_now(data_set); if(now > (last + rsc->failure_timeout)) { crm_notice("Failcount for %s on %s has expired (limit was %ds)", failed->id, node->details->uname, rsc->failure_timeout); fail_count = 0; } } } crm_free(fail_attr); return fail_count; } diff --git a/lib/Makefile.am b/lib/fencing/Makefile.am similarity index 50% copy from lib/Makefile.am copy to lib/fencing/Makefile.am index 386f183668..80242d2e69 100644 --- a/lib/Makefile.am +++ b/lib/fencing/Makefile.am @@ -1,34 +1,36 @@ -# -# heartbeat library directory: Linux-HA code -# -# Copyright (C) 2001 Alan Robertson +# File: Makefile.am +# Author: Sun Jiang Dong +# Copyright (c) 2004 International Business Machines # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # -MAINTAINERCLEANFILES = Makefile.in +MAINTAINERCLEANFILES = Makefile.in +INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ + -I$(top_builddir)/libltdl -I$(top_srcdir)/libltdl \ + -I$(top_builddir)/linux-ha -I$(top_srcdir)/linux-ha \ + -I$(top_builddir) -I$(top_srcdir) -# -# PILS is supposed to be an independent library -# Stonith should rely only on PILS -# -# The rest can use clplumbing, and the plugins should be able -# to use any of the libraries (nothing can be linked against them) -# -# +halibdir = $(libdir)/@HB_PKG@ +havarlibdir = $(localstatedir)/lib/@HB_PKG@ + +COMMONLIBS = -lplumb -lhbclient \ + $(GLIBLIB) -## Subdirectories... -SUBDIRS = crm +lib_LTLIBRARIES = libstonithd.la +libstonithd_la_SOURCES = stonithd_lib.c stonithd_msg.c +libstonithd_la_LDFLAGS = $(COMMONLIBS) +AM_CFLAGS = $(INCLUDES) diff --git a/lib/Makefile.am b/lib/plugins/Makefile.am similarity index 98% copy from lib/Makefile.am copy to lib/plugins/Makefile.am index 386f183668..ae427607d9 100644 --- a/lib/Makefile.am +++ b/lib/plugins/Makefile.am @@ -1,34 +1,34 @@ # # heartbeat library directory: Linux-HA code # # Copyright (C) 2001 Alan Robertson # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in # # PILS is supposed to be an independent library # Stonith should rely only on PILS # # The rest can use clplumbing, and the plugins should be able # to use any of the libraries (nothing can be linked against them) # # ## Subdirectories... -SUBDIRS = crm +SUBDIRS = lrm diff --git a/lib/plugins/lrm/Makefile.am b/lib/plugins/lrm/Makefile.am new file mode 100644 index 0000000000..f25f0cc218 --- /dev/null +++ b/lib/plugins/lrm/Makefile.am @@ -0,0 +1,44 @@ +# +# Author: Sun Jiang Dong +# Copyright (c) 2004 International Business Machines +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +MAINTAINERCLEANFILES = Makefile.in +LRM_DIR = lrm +INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ + -I$(top_builddir)/linux-ha -I$(top_srcdir)/linux-ha \ + -I$(top_builddir)/libltdl -I$(top_srcdir)/libltdl + +halibdir = $(libdir)/@HB_PKG@ +havarlibdir = $(localstatedir)/lib/@HB_PKG@ +COMMONLIBS = -lplumb \ + $(GLIBLIB) + +lrmdir = $(HA_VARLIBDIR)/$(HB_PKG)/$(LRM_DIR) +plugindir = $(halibdir)/plugins/RAExec +apigid = @HA_APIGID@ + +plugin_LTLIBRARIES = stonith.la + +stonith_la_SOURCES = raexecstonith.c +stonith_la_LDFLAGS = -L$(top_builddir)/lib/pils -lpils -export-dynamic -module -avoid-version \ + -L$(top_builddir)/lib/fencing -lstonithd \ + -L$(top_builddir)/lib/stonith -lstonith -llrm + +install-exec-local: + $(mkinstalldirs) $(DESTDIR)$(lrmdir) + -chgrp $(apigid) $(DESTDIR)/$(lrmdir) + chmod 770 $(DESTDIR)/$(lrmdir) diff --git a/lib/plugins/lrm/raexecstonith.c b/lib/plugins/lrm/raexecstonith.c new file mode 100644 index 0000000000..b1de9bfc9e --- /dev/null +++ b/lib/plugins/lrm/raexecstonith.c @@ -0,0 +1,390 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * File: raexecocf.c + * Author: Sun Jiang Dong + * Copyright (c) 2004 International Business Machines + * + * This code implements the Resource Agent Plugin Module for LSB style. + * It's a part of Local Resource Manager. Currently it's used by lrmd only. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* Add it for compiling on OSX */ +#include + +#include +#include +#include + +# define PIL_PLUGINTYPE RA_EXEC_TYPE +# define PIL_PLUGINTYPE_S "RAExec" +# define PIL_PLUGINLICENSE LICENSE_PUBDOM +# define PIL_PLUGINLICENSEURL URL_PUBDOM + +# define PIL_PLUGIN stonith +# define PIL_PLUGIN_S "stonith" + +static PIL_rc close_stonithRA(PILInterface*, void* ud_interface); + +/* static const char * RA_PATH = STONITH_RA_DIR; */ +/* Temporarily use it */ +static const char * RA_PATH = HA_LIBHBDIR "/stonith/plugins/stonith/"; + +/* The begin of exported function list */ +static int execra(const char * rsc_id, + const char * rsc_type, + const char * provider, + const char * op_type, + const int timeout, + GHashTable * params); +static uniform_ret_execra_t map_ra_retvalue(int ret_execra + , const char * op_type, const char * std_output); +static int get_resource_list(GList ** rsc_info); +static char* get_resource_meta(const char* rsc_type, const char* provider); +static int get_provider_list(const char* op_type, GList ** providers); + +/* The end of exported function list */ + +/* The begin of internal used function & data list */ +static int get_providers(const char* class_path, const char* op_type, + GList ** providers); +static void stonithRA_ops_callback(stonithRA_ops_t * op, void * private_data); +static int exit_value; +/* The end of internal function & data list */ + +/* Rource agent execution plugin operations */ +static struct RAExecOps raops = +{ execra, + map_ra_retvalue, + get_resource_list, + get_provider_list, + get_resource_meta +}; + +static const char META_TEMPLATE[] = +"\n" +"\n" +"\n" +"1.0\n" +"\n" +"%s\n" +"\n" +"%s\n" +"%s\n" +"\n" +"\n" +"\n" +"\n" +"\n" +"\n" +"\n" +"\n" +"2.0\n" +"\n" +"\n"; + +static const char * no_parameter_info = ""; + +#define CHECKMETANULL(ret, which) \ + if (ret == NULL) { \ + cl_log(LOG_WARNING, "stonithRA plugin: cannot get %s " \ + "segment of %s's metadata.", which, rsc_type); \ + ret = no_parameter_info; \ + } +#define xmlize(p) \ + ( p ? (char *)xmlEncodeEntitiesReentrant(NULL, \ + (const unsigned char *)p) \ + : NULL ) +#define zapxml(p) do { \ + if( p ) { \ + xmlFree(p); \ + } \ +} while(0) + +PIL_PLUGIN_BOILERPLATE2("1.0", Debug); + +static const PILPluginImports* PluginImports; +static PILPlugin* OurPlugin; +static PILInterface* OurInterface; +static void* OurImports; +static void* interfprivate; + +/* + * Our plugin initialization and registration function + * It gets called when the plugin gets loaded. + */ +PIL_rc +PIL_PLUGIN_INIT(PILPlugin * us, const PILPluginImports* imports); + +PIL_rc +PIL_PLUGIN_INIT(PILPlugin * us, const PILPluginImports* imports) +{ + /* Force the compiler to do a little type checking */ + (void)(PILPluginInitFun)PIL_PLUGIN_INIT; + + PluginImports = imports; + OurPlugin = us; + + /* Register ourself as a plugin */ + imports->register_plugin(us, &OurPIExports); + + /* Register our interfaces */ + return imports->register_interface(us, PIL_PLUGINTYPE_S, PIL_PLUGIN_S, + &raops, close_stonithRA, &OurInterface, &OurImports, + interfprivate); +} + +static PIL_rc +close_stonithRA(PILInterface* pif, void* ud_interface) +{ + return PIL_OK; +} + +/* + * Most of the oprations will be sent to sotnithd directly, such as 'start', + * 'stop', 'monitor'. And others like 'meta-data' will be handled by itself + * locally. + * Some of important parameters' name: + * config_file + * config_string + */ +static int +execra(const char * rsc_id, const char * rsc_type, const char * provider, + const char * op_type,const int timeout, GHashTable * params) +{ + stonithRA_ops_t * op; + int call_id = -1; + char buffer_tmp[32]; + + /* Handling "meta-data" operation in a special way. + * Now handle "meta-data" operation locally. + * Should be changed in the future? + */ + if ( 0 == STRNCMP_CONST(op_type, "meta-data")) { + char * tmp; + tmp = get_resource_meta(rsc_type, provider); + printf("%s", tmp); + g_free(tmp); + exit(0); + } + + g_snprintf(buffer_tmp, sizeof(buffer_tmp), "%s_%d" + , "STONITH_RA_EXEC", getpid()); + if (ST_OK != stonithd_signon(buffer_tmp)) { + cl_log(LOG_ERR, "%s:%d: Cannot sign on the stonithd." + , __FUNCTION__, __LINE__); + exit(EXECRA_UNKNOWN_ERROR); + } + + stonithd_set_stonithRA_ops_callback(stonithRA_ops_callback, &call_id); + + /* Temporarily donnot use it, but how to deal with the global OCF + * variables. This is a important thing to think about and do. + */ + /* send the RA operation to stonithd to simulate a RA's actions */ + if ( 0==STRNCMP_CONST(op_type, "start") + || 0==STRNCMP_CONST(op_type, "stop") ) { + cl_log(LOG_INFO + , "Try to %s STONITH resource : Device=%s" + , op_type, rsc_id, rsc_type); + } + + op = g_new(stonithRA_ops_t, 1); + op->ra_name = g_strdup(rsc_type); + op->op_type = g_strdup(op_type); + op->params = params; + op->rsc_id = g_strdup(rsc_id); + if (ST_OK != stonithd_virtual_stonithRA_ops(op, &call_id)) { + cl_log(LOG_ERR, "sending stonithRA op to stonithd failed."); + /* Need to improve the granularity for error return code */ + stonithd_signoff(); + exit(EXECRA_EXEC_UNKNOWN_ERROR); + } + + /* May be redundant */ + /* + while (stonithd_op_result_ready() != TRUE) { + ; + } + */ + /* cl_log(LOG_DEBUG, "Will call stonithd_receive_ops_result."); */ + if (ST_OK != stonithd_receive_ops_result(TRUE)) { + cl_log(LOG_ERR, "stonithd_receive_ops_result failed."); + /* Need to improve the granularity for error return code */ + stonithd_signoff(); + exit(EXECRA_EXEC_UNKNOWN_ERROR); + } + + /* exit_value will be setted by the callback function */ + g_free(op->ra_name); + g_free(op->op_type); + g_free(op->rsc_id); + g_free(op); + + stonithd_signoff(); + /* cl_log(LOG_DEBUG, "stonithRA orignal exit code=%d", exit_value); */ + exit(map_ra_retvalue(exit_value, op_type, NULL)); +} + +static void +stonithRA_ops_callback(stonithRA_ops_t * op, void * private_data) +{ + /* cl_log(LOG_DEBUG, "setting exit code=%d", exit_value); */ + exit_value = op->op_result; +} + +static uniform_ret_execra_t +map_ra_retvalue(int ret_execra, const char * op_type, const char * std_output) +{ + /* Because the UNIFORM_RET_EXECRA is compatible with OCF standard, no + * actual mapping except validating, which ensure the return code + * will be in the range 0 to 7. Too strict? + */ + if (ret_execra < 0 || + ret_execra > EXECRA_STATUS_UNKNOWN) { + cl_log(LOG_WARNING, "mapped the invalid return code %d." + , ret_execra); + ret_execra = EXECRA_UNKNOWN_ERROR; + } + return ret_execra; +} + +static int +get_resource_list(GList ** rsc_info) +{ + int rc; + int needprivs = !cl_have_full_privs(); + + if ( rsc_info == NULL ) { + cl_log(LOG_ERR, "Parameter error: get_resource_list"); + return -2; + } + + if ( *rsc_info != NULL ) { + cl_log(LOG_ERR, "Parameter error: get_resource_list."\ + "will cause memory leak."); + *rsc_info = NULL; + } + + if (needprivs) { + return_to_orig_privs(); + } + if (ST_OK != stonithd_signon("STONITH_RA")) { + cl_log(LOG_ERR, "%s:%d: Can not signon to the stonithd." + , __FUNCTION__, __LINE__); + rc = -1; + } else { + rc = stonithd_list_stonith_types(rsc_info); + stonithd_signoff(); + } + + if (needprivs) { + return_to_dropped_privs(); + } + return rc; +} + +static int +get_provider_list(const char* op_type, GList ** providers) +{ + int ret; + ret = get_providers(RA_PATH, op_type, providers); + if (0>ret) { + cl_log(LOG_ERR, "scandir failed in stonith RA plugin"); + } + return ret; +} + +static char * +get_resource_meta(const char* rsc_type, const char* provider) +{ + char * buffer; + int bufferlen = 0; + const char * meta_param = NULL; + const char * meta_longdesc = NULL; + const char * meta_shortdesc = NULL; + char *xml_meta_longdesc = NULL; + char *xml_meta_shortdesc = NULL; + Stonith * stonith_obj = NULL; + + if ( provider != NULL ) { + cl_log(LOG_DEBUG, "stonithRA plugin: provider attribute " + "is not needed and will be ignored."); + } + + stonith_obj = stonith_new(rsc_type); + meta_longdesc = stonith_get_info(stonith_obj, ST_DEVICEDESCR); + CHECKMETANULL(meta_longdesc, "longdesc") + xml_meta_longdesc = xmlize(meta_longdesc); + meta_shortdesc = stonith_get_info(stonith_obj, ST_DEVICENAME); + CHECKMETANULL(meta_shortdesc, "shortdesc") + xml_meta_shortdesc = xmlize(meta_shortdesc); + meta_param = stonith_get_info(stonith_obj, ST_CONF_XML); + CHECKMETANULL(meta_param, "parameters") + + + bufferlen = STRLEN_CONST(META_TEMPLATE) + strlen(rsc_type) + + strlen(xml_meta_longdesc) + strlen(xml_meta_shortdesc) + + strlen(meta_param) + 1; + buffer = g_new(char, bufferlen); + buffer[bufferlen-1] = '\0'; + snprintf(buffer, bufferlen-1, META_TEMPLATE, rsc_type + , xml_meta_longdesc, xml_meta_shortdesc, meta_param); + stonith_delete(stonith_obj); + zapxml(xml_meta_longdesc); + zapxml(xml_meta_shortdesc); + + return buffer; +} + +/* + * Currently should return *providers = NULL, but remain the old code for + * possible unsing in the future + */ +static int +get_providers(const char* class_path, const char* op_type, GList ** providers) +{ + if ( providers == NULL ) { + cl_log(LOG_ERR, "%s:%d: Parameter error: providers==NULL" + , __FUNCTION__, __LINE__); + return -2; + } + + if ( *providers != NULL ) { + cl_log(LOG_ERR, "%s:%d: Parameter error: *providers==NULL." + "This will cause memory leak." + , __FUNCTION__, __LINE__); + } + + /* Now temporarily make it fixed */ + *providers = g_list_append(*providers, g_strdup("heartbeat")); + + return g_list_length(*providers); +} diff --git a/pacemaker.spec b/pacemaker.spec index f34ac7d4bc..f8c384f8f9 100644 --- a/pacemaker.spec +++ b/pacemaker.spec @@ -1,292 +1,295 @@ # # spec file for package Pacemaker (Version 0.7.0) # # Copyright (c) 2006 SUSE LINUX Products GmbH, Nuernberg, Germany. # This file and all modifications and additions to the pristine # package are under the same license as the package itself. # # Please submit bugfixes or comments via http://bugs.opensuse.org/ # # norootforbuild %define with_extra_warnings 0 %define with_debugging 0 %define without_fatal_warnings 1 %define with_ais_support 1 %define with_heartbeat_support 1 %define with_snmp_support 1 %define pkg_group Productivity/Clustering/HA %if 0%{?fedora_version} %define pkg_group System Environment/Daemons %endif %define gname haclient %define uname hacluster Name: pacemaker Summary: The Pacemaker scalable High-Availability cluster resource manager Version: 0.6.2 Release: 1 License: GPL2/LGPL2 URL: http://www.clusterlabs.org Group: %{pkg_group} Source: pacemaker.tar.gz BuildRoot: %{_tmppath}/%{name}-%{version}-build Autoreqprov: on %if %with_ais_support BuildRequires: openais-devel %endif %if %with_heartbeat_support BuildRequires: heartbeat heartbeat-devel > 2.1.2 %endif %if %{with_ais_support} %if %{with_heartbeat_support} Conflicts: pacemaker-ais Conflicts: pacemaker-heartbeat %else Conflicts: pacemaker Conflicts: pacemaker-heartbeat %endif %else Conflicts: pacemaker Conflicts: pacemaker-ais %endif BuildRequires: heartbeat-common heartbeat-common-devel e2fsprogs-devel glib2-devel gnutls-devel libxml2-devel pam-devel python-devel swig %if 0%{?suse_version} %if 0%{?suse_version} > 1000 %if %with_ais_support Supplements: openais %endif %if %with_heartbeat_support Supplements: heartbeat %endif %endif %if 0%{?suse_version} == 930 BuildRequires: rpm-devel %endif %if 0%{?suse_version} == 1000 BuildRequires: lzo lzo-devel %endif %if 0%{?suse_version} < 1020 BuildRequires: tcpd-devel %endif %if 0%{?sles_version} == 9 BuildRequires: pkgconfig %endif %endif %if 0%{?fedora_version} || 0%{?centos_version} || 0%{?rhel_version} BuildRequires: which %endif %if 0%{?fedora_version} == 8 BuildRequires: openssl-devel %endif %if 0%{?mandriva_version} BuildRequires: libbzip2-devel %endif %description Pacemaker is an advanced, scalable High-Availability cluster resource manager for Linux-HA (Heartbeat) and/or OpenAIS. It supports "n-node" clusters with significant capabilities for managing resources and dependencies. It will run scripts at initialization, when machines go up or down, when related resources fail and can be configured to periodically check resource health. %if 0%{?suse_version} %debug_package %endif %package devel Summary: Pacemaker development package Group: %{pkg_group} Requires: %{name} = %{version}-%{release} %description devel Header files and shared libraries needed for developing programs based on the Pacemaker High-Availability cluster resource manager. %prep ########################################################### %setup -n pacemaker ########################################################### %build # TODO: revisit -all CFLAGS="${CFLAGS} ${RPM_OPT_FLAGS}" # Feature-dependent CFLAGS: %if %with_extra_warnings # CFLAGS="${CFLAGS} -Wshadow -Wfloat-equal -Waggregate-return -Wnested-externs -Wunreachable-code -Wendif-labels -Winline" CFLAGS="${CFLAGS} -Wfloat-equal -Wendif-labels -Winline" %endif %if %with_debugging CFLAGS="${CFLAGS} -O0" %endif # Distribution specific settings: %if 0%{?suse_version} > 1001 CFLAGS="${CFLAGS} -fstack-protector-all" %endif %if 0%{?suse_version} < 1001 export PKG_CONFIG_PATH="$PKG_CONFIG_PATH:/opt/gnome/%{_lib}/pkgconfig:/opt/gnome/share/pkgconfig" %endif %if 0%{?suse_version} > 1020 CFLAGS="$CFLAGS -fgnu89-inline" %endif %if 0%{?fedora_version} > 6 CFLAGS="$CFLAGS -fgnu89-inline" %endif export CFLAGS ./ConfigureMe configure --prefix=%{_prefix} --sysconfdir=%{_sysconfdir} \ --localstatedir=%{_var} --infodir=%{_infodir} \ --mandir=%{_mandir} --libdir=%{_libdir} \ --libexecdir=%{_libexecdir} \ --with-group-name=%{gname} --with-ccmuser-name=%{uname} \ --with-hapkgversion=%{version} \ --enable-glib-malloc \ %if %with_snmp_support == 1 --enable-snmp-subagent \ %else --disable-snmp-subagent \ %endif --with-ais-prefix=%{_prefix} \ %if %with_ais_support == 0 --without-ais-support \ %endif %if %with_heartbeat_support == 0 --without-heartbeat-support \ %endif %if %without_fatal_warnings --enable-fatal-warnings=no \ %endif --enable-pretty export MAKE="make %{?jobs:-j%jobs}" make %{?jobs:-j%jobs} ########################################################### %install ########################################################### #make DESTDIR=$RPM_BUILD_ROOT install-strip rm -rf $RPM_BUILD_ROOT mkdir -p $RPM_BUILD_ROOT make DESTDIR=$RPM_BUILD_ROOT install #%if %with_ais_support # mkdir -p $RPM_BUILD_ROOT/%{_libexecdir}/lcrso # cp $RPM_BUILD_ROOT/%{_libdir}/service_crm.so $RPM_BUILD_ROOT/%{_libexecdir}/lcrso/pacemaker.lcrso #%endif # Cleanup [ -d $RPM_BUILD_ROOT/usr/man ] && rm -rf $RPM_BUILD_ROOT/usr/man [ -d $RPM_BUILD_ROOT/usr/share/libtool ] && rm -rf $RPM_BUILD_ROOT/usr/share/libtool find $RPM_BUILD_ROOT -name '*.a' -type f -print0 | xargs -0 rm -f find $RPM_BUILD_ROOT -name '*.la' -type f -print0 | xargs -0 rm -f ########################################################### %clean ########################################################### if [ -n "${RPM_BUILD_ROOT}" -a "${RPM_BUILD_ROOT}" != "/" ] then rm -rf $RPM_BUILD_ROOT fi rm -rf $RPM_BUILD_DIR/pacemaker ########################################################### %pre %preun # Use the following if more commands need to be executed # %post # /sbin/ldconfig # [...] # http://en.opensuse.org/SUSE_Package_Conventions/RPM_Macros %post -p /sbin/ldconfig %postun -p /sbin/ldconfig %files ########################################################### %defattr(-,root,root) %dir %{_libdir}/heartbeat %{_prefix}/share/pacemaker %{_prefix}/share/heartbeat %{_libdir}/heartbeat/* +%{_libdir}/heartbeat/plugins/RAExec/* %dir %{_var}/lib/heartbeat %{_libdir}/libcib.so.* %{_libdir}/libcrmcommon.so.* %{_libdir}/libcrmcluster.so.* #%{_libdir}/heartbeat/crm_primitive.py %{_libdir}/libpe_status.so.* %{_libdir}/libpe_rules.so.* %{_libdir}/libpengine.so.* %{_libdir}/libtransitioner.so.* +%{_libdir}/libstonithd.so.* %{_sbindir}/cibadmin %{_sbindir}/crm_attribute %{_sbindir}/crm_diff %{_sbindir}/crm_failcount %{_sbindir}/crm_master %{_sbindir}/crm_mon %{_sbindir}/crm_sh %{_sbindir}/crm_resource %{_sbindir}/crm_standby %{_sbindir}/crm_uuid %{_sbindir}/crm_verify %{_sbindir}/crmadmin %{_sbindir}/iso8601 %{_sbindir}/ccm_tool %{_sbindir}/attrd_updater %{_sbindir}/ptest %doc %{_mandir}/man8/cibadmin.8* %doc %{_mandir}/man8/crm_resource.8* %dir %attr (750, %{uname}, %{gname}) %{_var}/lib/heartbeat/crm %dir %attr (750, %{uname}, %{gname}) %{_var}/lib/heartbeat/pengine %dir %attr (750, %{uname}, %{gname}) %{_var}/run/heartbeat/crm %if %with_ais_support %{_libexecdir}/lcrso/pacemaker.lcrso %endif %if %with_snmp_support == 1 /usr/share/snmp/mibs/LINUX-HA-MIB.mib %endif %files devel %defattr(-,root,root) #%doc %{_datadir}/doc/%{name}-%{version} %{_includedir}/pacemaker +%{_includedir}/heartbeat/fencing %{_libdir}/*.so %changelog pacemaker diff --git a/pengine/allocate.c b/pengine/allocate.c index 0a583e6ebc..7cea121114 100644 --- a/pengine/allocate.c +++ b/pengine/allocate.c @@ -1,944 +1,944 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include void set_alloc_actions(pe_working_set_t *data_set); void migrate_reload_madness(pe_working_set_t *data_set); resource_alloc_functions_t resource_class_alloc_functions[] = { { native_merge_weights, native_color, native_create_actions, native_create_probe, native_internal_constraints, native_rsc_colocation_lh, native_rsc_colocation_rh, native_rsc_order_lh, native_rsc_order_rh, native_rsc_location, native_expand, complex_migrate_reload, complex_stonith_ordering, complex_create_notify_element, }, { group_merge_weights, group_color, group_create_actions, native_create_probe, group_internal_constraints, group_rsc_colocation_lh, group_rsc_colocation_rh, group_rsc_order_lh, group_rsc_order_rh, group_rsc_location, group_expand, complex_migrate_reload, complex_stonith_ordering, complex_create_notify_element, }, { native_merge_weights, clone_color, clone_create_actions, clone_create_probe, clone_internal_constraints, clone_rsc_colocation_lh, clone_rsc_colocation_rh, clone_rsc_order_lh, clone_rsc_order_rh, clone_rsc_location, clone_expand, complex_migrate_reload, complex_stonith_ordering, complex_create_notify_element, }, { native_merge_weights, master_color, master_create_actions, clone_create_probe, master_internal_constraints, clone_rsc_colocation_lh, master_rsc_colocation_rh, clone_rsc_order_lh, clone_rsc_order_rh, clone_rsc_location, clone_expand, complex_migrate_reload, complex_stonith_ordering, complex_create_notify_element, } }; static gboolean check_rsc_parameters(resource_t *rsc, node_t *node, xmlNode *rsc_entry, pe_working_set_t *data_set) { int attr_lpc = 0; gboolean force_restart = FALSE; gboolean delete_resource = FALSE; const char *value = NULL; const char *old_value = NULL; const char *attr_list[] = { XML_ATTR_TYPE, XML_AGENT_ATTR_CLASS, XML_AGENT_ATTR_PROVIDER }; for(; attr_lpc < DIMOF(attr_list); attr_lpc++) { value = crm_element_value(rsc->xml, attr_list[attr_lpc]); old_value = crm_element_value(rsc_entry, attr_list[attr_lpc]); if(value == old_value /* ie. NULL */ || crm_str_eq(value, old_value, TRUE)) { continue; } force_restart = TRUE; crm_notice("Forcing restart of %s on %s, %s changed: %s -> %s", rsc->id, node->details->uname, attr_list[attr_lpc], crm_str(old_value), crm_str(value)); } if(force_restart) { /* make sure the restart happens */ stop_action(rsc, node, FALSE); set_bit(rsc->flags, pe_rsc_start_pending); delete_resource = TRUE; } return delete_resource; } static gboolean check_action_definition(resource_t *rsc, node_t *active_node, xmlNode *xml_op, pe_working_set_t *data_set) { char *key = NULL; int interval = 0; const char *interval_s = NULL; gboolean did_change = FALSE; gboolean start_op = FALSE; xmlNode *params_all = NULL; xmlNode *params_restart = NULL; GHashTable *local_rsc_params = NULL; char *digest_all_calc = NULL; const char *digest_all = NULL; const char *restart_list = NULL; const char *digest_restart = NULL; char *digest_restart_calc = NULL; action_t *action = NULL; const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION); CRM_CHECK(active_node != NULL, return FALSE); interval_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL); interval = crm_parse_int(interval_s, "0"); /* we need to reconstruct the key because of the way we used to construct resource IDs */ key = generate_op_key(rsc->id, task, interval); if(interval > 0) { xmlNode *op_match = NULL; crm_debug_2("Checking parameters for %s", key); op_match = find_rsc_op_entry(rsc, key); if(op_match == NULL && data_set->stop_action_orphans) { /* create a cancel action */ action_t *cancel = NULL; char *cancel_key = crm_strdup(key); const char *call_id = crm_element_value(xml_op, XML_LRM_ATTR_CALLID); crm_info("Orphan action will be stopped: %s on %s", key, active_node->details->uname); /* cancel_key = generate_op_key( */ /* rsc->id, RSC_CANCEL, interval); */ cancel = custom_action( rsc, cancel_key, RSC_CANCEL, active_node, FALSE, TRUE, data_set); crm_free(cancel->task); cancel->task = crm_strdup(RSC_CANCEL); add_hash_param(cancel->meta, XML_LRM_ATTR_TASK, task); add_hash_param(cancel->meta, XML_LRM_ATTR_CALLID, call_id); add_hash_param(cancel->meta, XML_LRM_ATTR_INTERVAL, interval_s); custom_action_order( rsc, stop_key(rsc), NULL, rsc, NULL, cancel, pe_order_optional, data_set); crm_free(key); key = NULL; return TRUE; } else if(op_match == NULL) { crm_debug("Orphan action detected: %s on %s", key, active_node->details->uname); crm_free(key); key = NULL; return TRUE; } } action = custom_action(rsc, key, task, active_node, TRUE, FALSE, data_set); local_rsc_params = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); unpack_instance_attributes( rsc->xml, XML_TAG_ATTR_SETS, active_node->details->attrs, local_rsc_params, NULL, FALSE, data_set->now); params_all = create_xml_node(NULL, XML_TAG_PARAMS); g_hash_table_foreach(action->extra, hash2field, params_all); g_hash_table_foreach(rsc->parameters, hash2field, params_all); g_hash_table_foreach(action->meta, hash2metafield, params_all); g_hash_table_foreach(local_rsc_params, hash2field, params_all); filter_action_parameters(params_all, op_version); digest_all_calc = calculate_xml_digest(params_all, TRUE, FALSE); digest_all = crm_element_value(xml_op, XML_LRM_ATTR_OP_DIGEST); digest_restart = crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST); restart_list = crm_element_value(xml_op, XML_LRM_ATTR_OP_RESTART); if(crm_str_eq(task, RSC_START, TRUE)) { start_op = TRUE; } if(start_op && digest_restart) { params_restart = copy_xml(params_all); if(restart_list) { filter_reload_parameters(params_restart, restart_list); } digest_restart_calc = calculate_xml_digest(params_restart, TRUE, FALSE); if(safe_str_neq(digest_restart_calc, digest_restart)) { did_change = TRUE; crm_log_xml_info(params_restart, "params:restart"); crm_warn("Parameters to %s on %s changed: recorded %s vs. %s (restart:%s) %s", key, active_node->details->uname, crm_str(digest_restart), digest_restart_calc, op_version, crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC)); key = generate_op_key(rsc->id, task, interval); custom_action(rsc, key, task, NULL, FALSE, TRUE, data_set); goto cleanup; } } if(safe_str_neq(digest_all_calc, digest_all)) { action_t *op = NULL; did_change = TRUE; crm_log_xml_info(params_all, "params:all"); crm_warn("Parameters to %s on %s changed: recorded %s vs. %s (all:%s) %s", key, active_node->details->uname, crm_str(digest_all), digest_all_calc, op_version, crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC)); key = generate_op_key(rsc->id, task, interval); op = custom_action(rsc, key, task, NULL, FALSE, TRUE, data_set); if(start_op && digest_restart) { op->allow_reload_conversion = TRUE; } else if(interval > 0) { custom_action_order(rsc, start_key(rsc), NULL, NULL, crm_strdup(op->task), op, pe_order_runnable_left, data_set); } } cleanup: free_xml(params_all); free_xml(params_restart); crm_free(digest_all_calc); crm_free(digest_restart_calc); g_hash_table_destroy(local_rsc_params); pe_free_action(action); return did_change; } extern gboolean DeleteRsc(resource_t *rsc, node_t *node, gboolean optional, pe_working_set_t *data_set); static void check_actions_for(xmlNode *rsc_entry, node_t *node, pe_working_set_t *data_set) { const char *id = NULL; const char *task = NULL; int interval = 0; const char *interval_s = NULL; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; const char *rsc_id = ID(rsc_entry); gboolean is_probe = FALSE; int start_index = 0, stop_index = 0; resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); CRM_CHECK(rsc != NULL, return); CRM_CHECK(node != NULL, return); CRM_CHECK(rsc_id != NULL, return); if(is_set(rsc->flags, pe_rsc_orphan)) { crm_debug_2("Skipping param check for %s: orphan", rsc->id); return; } else if(pe_find_node_id(rsc->running_on, node->details->id) == NULL) { crm_debug_2("Skipping param check for %s: no longer active on %s", rsc->id, node->details->uname); return; } crm_debug_3("Processing %s on %s", rsc->id, node->details->uname); if(check_rsc_parameters(rsc, node, rsc_entry, data_set)) { DeleteRsc(rsc, node, FALSE, data_set); } xml_child_iter_filter( rsc_entry, rsc_op, XML_LRM_TAG_RSC_OP, op_list = g_list_append(op_list, rsc_op); ); sorted_op_list = g_list_sort(op_list, sort_op_by_callid); calculate_active_ops(sorted_op_list, &start_index, &stop_index); slist_iter( rsc_op, xmlNode, sorted_op_list, lpc, if(start_index < stop_index) { /* stopped */ continue; } else if(lpc < start_index) { /* action occurred prior to a start */ continue; } id = ID(rsc_op); is_probe = FALSE; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); interval_s = crm_element_value(rsc_op, XML_LRM_ATTR_INTERVAL); interval = crm_parse_int(interval_s, "0"); if(interval == 0 && safe_str_eq(task, RSC_STATUS)) { is_probe = TRUE; } if(is_probe || safe_str_eq(task, RSC_START) || interval > 0) { check_action_definition(rsc, node, rsc_op, data_set); } ); g_list_free(sorted_op_list); } static void check_actions(pe_working_set_t *data_set) { const char *id = NULL; node_t *node = NULL; xmlNode *lrm_rscs = NULL; xmlNode *status = get_object_root(XML_CIB_TAG_STATUS, data_set->input); xml_child_iter_filter( status, node_state, XML_CIB_TAG_STATE, id = crm_element_value(node_state, XML_ATTR_ID); lrm_rscs = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); lrm_rscs = find_xml_node(lrm_rscs, XML_LRM_TAG_RESOURCES, FALSE); node = pe_find_node_id(data_set->nodes, id); if(node == NULL) { continue; } else if(can_run_resources(node) == FALSE) { crm_debug_2("Skipping param check for %s: cant run resources", node->details->uname); continue; } crm_debug_2("Processing node %s", node->details->uname); if(node->details->online || data_set->stonith_enabled) { xml_child_iter_filter( lrm_rscs, rsc_entry, XML_LRM_TAG_RESOURCE, if(xml_has_children(rsc_entry)) { check_actions_for(rsc_entry, node, data_set); } ); } ); } static gboolean apply_placement_constraints(pe_working_set_t *data_set) { crm_debug_3("Applying constraints..."); slist_iter( cons, rsc_to_node_t, data_set->placement_constraints, lpc, cons->rsc_lh->cmds->rsc_location(cons->rsc_lh, cons); ); return TRUE; } static void common_apply_stickiness(resource_t *rsc, node_t *node, pe_working_set_t *data_set) { int fail_count = 0; const char *value = NULL; resource_t *failed = rsc; GHashTable *meta_hash = NULL; if(rsc->children) { slist_iter( child_rsc, resource_t, rsc->children, lpc, common_apply_stickiness(child_rsc, node, data_set); ); return; } meta_hash = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); get_meta_attributes(meta_hash, rsc, node, data_set); /* update resource preferences that relate to the current node */ value = g_hash_table_lookup(meta_hash, XML_RSC_ATTR_STICKINESS); if(value != NULL && safe_str_neq("default", value)) { rsc->stickiness = char2score(value); } else { rsc->stickiness = data_set->default_resource_stickiness; } value = g_hash_table_lookup(meta_hash, XML_RSC_ATTR_FAIL_STICKINESS); if(value != NULL && safe_str_neq("default", value)) { rsc->migration_threshold = char2score(value); } else { rsc->migration_threshold = data_set->default_migration_threshold; } if(is_not_set(rsc->flags, pe_rsc_unique)) { failed = uber_parent(rsc); } fail_count = get_failcount(node, rsc, NULL, data_set); if(fail_count > 0 && rsc->migration_threshold != 0) { if(rsc->migration_threshold <= fail_count) { resource_location(failed, node, -INFINITY, "__fail_limit__", data_set); crm_warn("Forcing %s away from %s after %d failures (max=%d)", failed->id, node->details->uname, fail_count, rsc->migration_threshold); } else { crm_notice("%s can fail %d more times on %s before being forced off", failed->id, rsc->migration_threshold - fail_count, node->details->uname); } } g_hash_table_destroy(meta_hash); } static void complex_set_cmds(resource_t *rsc) { rsc->cmds = &resource_class_alloc_functions[rsc->variant]; slist_iter( child_rsc, resource_t, rsc->children, lpc, complex_set_cmds(child_rsc); ); } void set_alloc_actions(pe_working_set_t *data_set) { slist_iter( rsc, resource_t, data_set->resources, lpc, complex_set_cmds(rsc); ); } gboolean stage0(pe_working_set_t *data_set) { xmlNode * cib_constraints = get_object_root( XML_CIB_TAG_CONSTRAINTS, data_set->input); if(data_set->input == NULL) { return FALSE; } cluster_status(data_set); set_alloc_actions(data_set); slist_iter(node, node_t, data_set->nodes, lpc, slist_iter( rsc, resource_t, data_set->resources, lpc2, common_apply_stickiness(rsc, node, data_set); ); ); unpack_constraints(cib_constraints, data_set); return TRUE; } /* * Check nodes for resources started outside of the LRM */ gboolean probe_resources(pe_working_set_t *data_set) { action_t *probe_complete = NULL; action_t *probe_node_complete = NULL; slist_iter( node, node_t, data_set->nodes, lpc, gboolean force_probe = FALSE; const char *probed = g_hash_table_lookup( node->details->attrs, CRM_OP_PROBED); if(node->details->online == FALSE) { continue; } else if(node->details->unclean) { continue; } else if(probe_complete == NULL) { probe_complete = get_pseudo_op(CRM_OP_PROBED, data_set); } if(probed != NULL && crm_is_true(probed) == FALSE) { force_probe = TRUE; } probe_node_complete = custom_action( NULL, crm_strdup(CRM_OP_PROBED), CRM_OP_PROBED, node, FALSE, TRUE, data_set); probe_node_complete->optional = crm_is_true(probed); probe_node_complete->priority = INFINITY; add_hash_param(probe_node_complete->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); order_actions(probe_node_complete, probe_complete, pe_order_optional); slist_iter( rsc, resource_t, data_set->resources, lpc2, if(rsc->cmds->create_probe( rsc, node, probe_node_complete, force_probe, data_set)) { probe_complete->optional = FALSE; probe_node_complete->optional = FALSE; custom_action_order( NULL, NULL, probe_complete, rsc, start_key(rsc), NULL, pe_order_optional, data_set); } ); ); return TRUE; } /* * Count how many valid nodes we have (so we know the maximum number of * colors we can resolve). * * Apply node constraints (ie. filter the "allowed_nodes" part of resources */ gboolean stage2(pe_working_set_t *data_set) { crm_debug_3("Applying placement constraints"); slist_iter( node, node_t, data_set->nodes, lpc, if(node == NULL) { /* error */ } else if(node->weight >= 0.0 /* global weight */ && node->details->online && node->details->type == node_member) { data_set->max_valid_nodes++; } ); apply_placement_constraints(data_set); return TRUE; } /* * Create internal resource constraints before allocation */ gboolean stage3(pe_working_set_t *data_set) { slist_iter( rsc, resource_t, data_set->resources, lpc, rsc->cmds->internal_constraints(rsc, data_set); ); return TRUE; } /* * Check for orphaned or redefined actions */ gboolean stage4(pe_working_set_t *data_set) { check_actions(data_set); return TRUE; } gboolean stage5(pe_working_set_t *data_set) { /* Take (next) highest resource, assign it and create its actions */ slist_iter( rsc, resource_t, data_set->resources, lpc, rsc->cmds->color(rsc, data_set); ); probe_resources(data_set); slist_iter( rsc, resource_t, data_set->resources, lpc, rsc->cmds->create_actions(rsc, data_set); ); return TRUE; } static gboolean is_managed(const resource_t *rsc) { if(is_set(rsc->flags, pe_rsc_managed)) { return TRUE; } slist_iter( child_rsc, resource_t, rsc->children, lpc, if(is_managed(child_rsc)) { return TRUE; } ); return FALSE; } static gboolean any_managed_resouces(pe_working_set_t *data_set) { slist_iter( rsc, resource_t, data_set->resources, lpc, if(is_managed(rsc)) { return TRUE; } ); return FALSE; } /* * Create dependancies for stonith and shutdown operations */ gboolean stage6(pe_working_set_t *data_set) { action_t *dc_down = NULL; action_t *stonith_op = NULL; action_t *last_stonith = NULL; gboolean integrity_lost = FALSE; action_t *ready = get_pseudo_op(STONITH_UP, data_set); action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set); gboolean need_stonith = FALSE; crm_debug_3("Processing fencing and shutdown cases"); if(data_set->stonith_enabled && (data_set->have_quorum || data_set->no_quorum_policy == no_quorum_ignore)) { need_stonith = TRUE; } if(need_stonith && any_managed_resouces(data_set) == FALSE) { crm_crit("Delaying fencing operations until there are resources to manage"); need_stonith = FALSE; } slist_iter( node, node_t, data_set->nodes, lpc, stonith_op = NULL; if(node->details->unclean && need_stonith) { pe_warn("Scheduling Node %s for STONITH", node->details->uname); stonith_op = custom_action( NULL, crm_strdup(CRM_OP_FENCE), CRM_OP_FENCE, node, FALSE, TRUE, data_set); add_hash_param( stonith_op->meta, XML_LRM_ATTR_TARGET, node->details->uname); add_hash_param( stonith_op->meta, XML_LRM_ATTR_TARGET_UUID, node->details->id); add_hash_param( stonith_op->meta, "stonith_action", data_set->stonith_action); stonith_constraints(node, stonith_op, data_set); order_actions(ready, stonith_op, pe_order_implies_left); order_actions(stonith_op, all_stopped, pe_order_implies_right); if(node->details->is_dc) { dc_down = stonith_op; } else { if(last_stonith) { order_actions(last_stonith, stonith_op, pe_order_implies_left); } last_stonith = stonith_op; } } else if(node->details->online && node->details->shutdown) { action_t *down_op = NULL; crm_info("Scheduling Node %s for shutdown", node->details->uname); down_op = custom_action( NULL, crm_strdup(CRM_OP_SHUTDOWN), CRM_OP_SHUTDOWN, node, FALSE, TRUE, data_set); shutdown_constraints(node, down_op, data_set); if(node->details->is_dc) { dc_down = down_op; } } if(node->details->unclean && stonith_op == NULL) { integrity_lost = TRUE; pe_warn("Node %s is unclean!", node->details->uname); } ); if(integrity_lost) { if(data_set->have_quorum == FALSE) { crm_notice("Cannot fence unclean nodes until quorum is" " attained (or no_quorum_policy is set to ignore)"); } else if(data_set->stonith_enabled == FALSE) { pe_warn("YOUR RESOURCES ARE NOW LIKELY COMPROMISED"); pe_err("ENABLE STONITH TO KEEP YOUR RESOURCES SAFE"); } } if(dc_down != NULL) { GListPtr shutdown_matches = find_actions( data_set->actions, CRM_OP_SHUTDOWN, NULL); crm_debug_2("Ordering shutdowns before %s on %s (DC)", dc_down->task, dc_down->node->details->uname); add_hash_param(dc_down->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); slist_iter( node_stop, action_t, shutdown_matches, lpc, if(node_stop->node->details->is_dc) { continue; } crm_debug("Ordering shutdown on %s before %s on %s", node_stop->node->details->uname, dc_down->task, dc_down->node->details->uname); order_actions(node_stop, dc_down, pe_order_implies_left); ); if(last_stonith && dc_down != last_stonith) { order_actions(last_stonith, dc_down, pe_order_implies_left); } g_list_free(shutdown_matches); } return TRUE; } /* * Determin the sets of independant actions and the correct order for the * actions in each set. * * Mark dependencies of un-runnable actions un-runnable * */ gboolean stage7(pe_working_set_t *data_set) { crm_debug_4("Applying ordering constraints"); slist_iter( order, order_constraint_t, data_set->ordering_constraints, lpc, resource_t *rsc = order->lh_rsc; - crm_debug_2("Applying ordering constraint: %d", order->id); + crm_debug_3("Applying ordering constraint: %d", order->id); if(rsc != NULL) { crm_debug_4("rsc_action-to-*"); rsc->cmds->rsc_order_lh(rsc, order, data_set); continue; } rsc = order->rh_rsc; if(rsc != NULL) { crm_debug_4("action-to-rsc_action"); rsc->cmds->rsc_order_rh(order->lh_action, rsc, order); } else { crm_debug_4("action-to-action"); order_actions( order->lh_action, order->rh_action, order->type); } ); update_action_states(data_set->actions); slist_iter( rsc, resource_t, data_set->resources, lpc, rsc->cmds->migrate_reload(rsc, data_set); ); return TRUE; } int transition_id = -1; /* * Create a dependency graph to send to the transitioner (via the CRMd) */ gboolean stage8(pe_working_set_t *data_set) { const char *value = NULL; transition_id++; crm_debug_2("Creating transition graph %d.", transition_id); data_set->graph = create_xml_node(NULL, XML_TAG_GRAPH); value = pe_pref(data_set->config_hash, "cluster-delay"); crm_xml_add(data_set->graph, "cluster-delay", value); crm_xml_add(data_set->graph, "failed-stop-offset", "INFINITY"); value = pe_pref(data_set->config_hash, "start-failure-is-fatal"); if(crm_is_true(value)) { crm_xml_add(data_set->graph, "failed-start-offset", "INFINITY"); } else { crm_xml_add(data_set->graph, "failed-start-offset", "1"); } value = pe_pref(data_set->config_hash, "batch-limit"); crm_xml_add(data_set->graph, "batch-limit", value); crm_xml_add_int(data_set->graph, "transition_id", transition_id); /* errors... slist_iter(action, action_t, action_list, lpc, if(action->optional == FALSE && action->runnable == FALSE) { print_action("Ignoring", action, TRUE); } ); */ slist_iter( rsc, resource_t, data_set->resources, lpc, crm_debug_4("processing actions for rsc=%s", rsc->id); rsc->cmds->expand(rsc, data_set); ); crm_log_xml_debug_3( data_set->graph, "created resource-driven action list"); /* catch any non-resource specific actions */ crm_debug_4("processing non-resource actions"); slist_iter( action, action_t, data_set->actions, lpc, graph_element_from_action(action, data_set); ); crm_log_xml_debug_3(data_set->graph, "created generic action list"); crm_debug_2("Created transition graph %d.", transition_id); return TRUE; } void cleanup_alloc_calculations(pe_working_set_t *data_set) { if(data_set == NULL) { return; } crm_debug_3("deleting order cons: %p", data_set->ordering_constraints); pe_free_ordering(data_set->ordering_constraints); data_set->ordering_constraints = NULL; crm_debug_3("deleting node cons: %p", data_set->placement_constraints); pe_free_rsc_to_node(data_set->placement_constraints); data_set->placement_constraints = NULL; crm_debug_3("deleting inter-resource cons: %p", data_set->colocation_constraints); pe_free_shallow(data_set->colocation_constraints); data_set->colocation_constraints = NULL; cleanup_calculations(data_set); } diff --git a/pengine/clone.c b/pengine/clone.c index 29ef72c3ed..0122d885cb 100644 --- a/pengine/clone.c +++ b/pengine/clone.c @@ -1,1475 +1,1475 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #define VARIANT_CLONE 1 #include gint sort_clone_instance(gconstpointer a, gconstpointer b); void clone_create_notifications( resource_t *rsc, action_t *action, action_t *action_complete, pe_working_set_t *data_set); void child_stopping_constraints( clone_variant_data_t *clone_data, resource_t *self, resource_t *child, resource_t *last, pe_working_set_t *data_set); void child_starting_constraints( clone_variant_data_t *clone_data, resource_t *self, resource_t *child, resource_t *last, pe_working_set_t *data_set); static node_t * parent_node_instance(const resource_t *rsc, node_t *node) { node_t *ret = NULL; if(node != NULL) { ret = pe_find_node_id( rsc->parent->allowed_nodes, node->details->id); } return ret; } static gboolean did_fail(const resource_t *rsc) { if(is_set(rsc->flags, pe_rsc_failed)) { return TRUE; } slist_iter( child_rsc, resource_t, rsc->children, lpc, if(did_fail(child_rsc)) { return TRUE; } ); return FALSE; } gint sort_clone_instance(gconstpointer a, gconstpointer b) { int level = LOG_DEBUG_3; node_t *node1 = NULL; node_t *node2 = NULL; gboolean can1 = TRUE; gboolean can2 = TRUE; const resource_t *resource1 = (const resource_t*)a; const resource_t *resource2 = (const resource_t*)b; CRM_ASSERT(resource1 != NULL); CRM_ASSERT(resource2 != NULL); /* allocation order: * - active instances * - instances running on nodes with the least copies * - active instances on nodes that cant support them or are to be fenced * - failed instances * - inactive instances */ do_crm_log(level+1, "%s ? %s", resource1->id, resource2->id); if(resource1->running_on && resource2->running_on) { if(g_list_length(resource1->running_on) < g_list_length(resource2->running_on)) { do_crm_log(level, "%s < %s: running_on", resource1->id, resource2->id); return -1; } else if(g_list_length(resource1->running_on) > g_list_length(resource2->running_on)) { do_crm_log(level, "%s > %s: running_on", resource1->id, resource2->id); return 1; } } if(resource1->running_on) { node1 = resource1->running_on->data; } if(resource2->running_on) { node2 = resource2->running_on->data; } if(node1) { node_t *match = pe_find_node_id(resource1->allowed_nodes, node1->details->id); - if(match->weight < 0) { + if(match == NULL || match->weight < 0) { do_crm_log(level, "%s: current location is unavailable", resource1->id); node1 = NULL; can1 = FALSE; } } if(node2) { node_t *match = pe_find_node_id(resource2->allowed_nodes, node2->details->id); - if(match->weight < 0) { + if(match == NULL || match->weight < 0) { do_crm_log(level, "%s: current location is unavailable", resource2->id); node2 = NULL; can2 = FALSE; } } if(can1 != can2) { if(can1) { do_crm_log(level, "%s < %s: availability of current location", resource1->id, resource2->id); return -1; } do_crm_log(level, "%s > %s: availability of current location", resource1->id, resource2->id); return 1; } if(resource1->priority < resource2->priority) { do_crm_log(level, "%s < %s: priority", resource1->id, resource2->id); return 1; } else if(resource1->priority > resource2->priority) { do_crm_log(level, "%s > %s: priority", resource1->id, resource2->id); return -1; } if(node1 == NULL && node2 == NULL) { do_crm_log(level, "%s == %s: not active", resource1->id, resource2->id); return 0; } if(node1 != node2) { if(node1 == NULL) { do_crm_log(level, "%s > %s: active", resource1->id, resource2->id); return 1; } else if(node2 == NULL) { do_crm_log(level, "%s < %s: active", resource1->id, resource2->id); return -1; } } can1 = can_run_resources(node1); can2 = can_run_resources(node2); if(can1 != can2) { if(can1) { do_crm_log(level, "%s < %s: can", resource1->id, resource2->id); return -1; } do_crm_log(level, "%s > %s: can", resource1->id, resource2->id); return 1; } node1 = parent_node_instance(resource1, node1); node2 = parent_node_instance(resource2, node2); if(node1 != NULL && node2 == NULL) { do_crm_log(level, "%s < %s: not allowed", resource1->id, resource2->id); return -1; } else if(node1 == NULL && node2 != NULL) { do_crm_log(level, "%s > %s: not allowed", resource1->id, resource2->id); return 1; } if(node1 == NULL) { do_crm_log(level, "%s == %s: not allowed", resource1->id, resource2->id); return 0; } if(node1->count < node2->count) { do_crm_log(level, "%s < %s: count", resource1->id, resource2->id); return -1; } else if(node1->count > node2->count) { do_crm_log(level, "%s > %s: count", resource1->id, resource2->id); return 1; } if(node1->weight < node2->weight) { do_crm_log(level, "%s < %s: node score", resource1->id, resource2->id); return 1; } else if(node1->weight > node2->weight) { do_crm_log(level, "%s > %s: node score", resource1->id, resource2->id); return -1; } can1 = did_fail(resource1); can2 = did_fail(resource2); if(can1 != can2) { if(can1) { do_crm_log(level, "%s > %s: failed", resource1->id, resource2->id); return 1; } do_crm_log(level, "%s < %s: failed", resource1->id, resource2->id); return -1; } do_crm_log(level, "%s == %s: default %d", resource1->id, resource2->id, node2->weight); return 0; } static node_t * can_run_instance(resource_t *rsc, node_t *node) { node_t *local_node = NULL; clone_variant_data_t *clone_data = NULL; if(can_run_resources(node) == FALSE) { goto bail; } local_node = parent_node_instance(rsc, node); get_clone_variant_data(clone_data, rsc->parent); if(local_node == NULL) { crm_warn("%s cannot run on %s: node not allowed", rsc->id, node->details->uname); goto bail; } else if(local_node->count < clone_data->clone_node_max) { return local_node; } else { crm_debug_2("%s cannot run on %s: node full", rsc->id, node->details->uname); } bail: if(node) { common_update_score(rsc, node->details->id, -INFINITY); } return NULL; } static node_t * color_instance(resource_t *rsc, pe_working_set_t *data_set) { node_t *chosen = NULL; node_t *local_node = NULL; crm_debug_2("Processing %s", rsc->id); if(is_not_set(rsc->flags, pe_rsc_provisional)) { return rsc->fns->location(rsc, NULL, FALSE); } else if(is_set(rsc->flags, pe_rsc_allocating)) { crm_debug("Dependancy loop detected involving %s", rsc->id); return NULL; } if(rsc->allowed_nodes) { slist_iter(try_node, node_t, rsc->allowed_nodes, lpc, can_run_instance(rsc, try_node); ); } chosen = rsc->cmds->color(rsc, data_set); if(chosen) { local_node = pe_find_node_id( rsc->parent->allowed_nodes, chosen->details->id); if(local_node) { local_node->count++; } else if(is_set(rsc->flags, pe_rsc_managed)) { /* what to do? we can't enforce per-node limits in this case */ crm_config_err("%s not found in %s (list=%d)", chosen->details->id, rsc->parent->id, g_list_length(rsc->parent->allowed_nodes)); } } return chosen; } static void append_parent_colocation(resource_t *rsc, resource_t *child, gboolean all) { slist_iter(cons, rsc_colocation_t, rsc->rsc_cons, lpc, if(all || cons->score < 0) { child->rsc_cons = g_list_append(child->rsc_cons, cons); } ); slist_iter(cons, rsc_colocation_t, rsc->rsc_cons_lhs, lpc, if(all || cons->score < 0) { child->rsc_cons_lhs = g_list_append(child->rsc_cons_lhs, cons); } ); } node_t * clone_color(resource_t *rsc, pe_working_set_t *data_set) { int allocated = 0; int available_nodes = 0; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); if(is_not_set(rsc->flags, pe_rsc_provisional)) { return NULL; } else if(is_set(rsc->flags, pe_rsc_allocating)) { crm_debug("Dependancy loop detected involving %s", rsc->id); return NULL; } set_bit(rsc->flags, pe_rsc_allocating); crm_debug_2("Processing %s", rsc->id); /* this information is used by sort_clone_instance() when deciding in which * order to allocate clone instances */ slist_iter( constraint, rsc_colocation_t, rsc->rsc_cons_lhs, lpc, rsc->allowed_nodes = constraint->rsc_lh->cmds->merge_weights( constraint->rsc_lh, rsc->id, rsc->allowed_nodes, constraint->score/INFINITY, TRUE); ); dump_node_scores(scores_log_level, rsc, __FUNCTION__, rsc->allowed_nodes); /* count now tracks the number of clones currently allocated */ slist_iter(node, node_t, rsc->allowed_nodes, lpc, node->count = 0; ); slist_iter(child, resource_t, rsc->children, lpc, if(g_list_length(child->running_on) > 0) { node_t *child_node = child->running_on->data; node_t *local_node = parent_node_instance( child, child->running_on->data); if(local_node) { local_node->count++; } else { crm_err("%s is running on %s which isn't allowed", child->id, child_node->details->uname); } } ); rsc->children = g_list_sort(rsc->children, sort_clone_instance); /* count now tracks the number of clones we have allocated */ slist_iter(node, node_t, rsc->allowed_nodes, lpc, node->count = 0; ); rsc->allowed_nodes = g_list_sort( rsc->allowed_nodes, sort_node_weight); slist_iter(node, node_t, rsc->allowed_nodes, lpc, if(can_run_resources(node) == FALSE) { available_nodes++; } ); slist_iter(child, resource_t, rsc->children, lpc, if(allocated >= clone_data->clone_max) { crm_debug("Child %s not allocated - limit reached", child->id); resource_location(child, NULL, -INFINITY, "clone_color:limit_reached", data_set); } else if (clone_data->clone_max < available_nodes) { /* Only include positive colocation preferences of dependant resources * if not every node will get a copy of the clone */ append_parent_colocation(rsc, child, TRUE); } else { append_parent_colocation(rsc, child, FALSE); } if(color_instance(child, data_set)) { allocated++; } ); crm_debug("Allocated %d %s instances of a possible %d", allocated, rsc->id, clone_data->clone_max); clear_bit(rsc->flags, pe_rsc_provisional); clear_bit(rsc->flags, pe_rsc_allocating); return NULL; } static void clone_update_pseudo_status( resource_t *rsc, gboolean *stopping, gboolean *starting) { if(rsc->children) { slist_iter(child, resource_t, rsc->children, lpc, clone_update_pseudo_status(child, stopping, starting) ); return; } CRM_ASSERT(stopping != NULL); CRM_ASSERT(starting != NULL); slist_iter( action, action_t, rsc->actions, lpc, if(*starting && *stopping) { return; } else if(action->optional) { crm_debug_3("Skipping optional: %s", action->uuid); continue; } else if(action->pseudo == FALSE && action->runnable == FALSE){ crm_debug_3("Skipping unrunnable: %s", action->uuid); continue; } else if(safe_str_eq(RSC_STOP, action->task)) { crm_debug_2("Stopping due to: %s", action->uuid); *stopping = TRUE; } else if(safe_str_eq(RSC_START, action->task)) { if(action->runnable == FALSE) { crm_debug_3("Skipping pseudo-op: %s run=%d, pseudo=%d", action->uuid, action->runnable, action->pseudo); } else { crm_debug_2("Starting due to: %s", action->uuid); crm_debug_3("%s run=%d, pseudo=%d", action->uuid, action->runnable, action->pseudo); *starting = TRUE; } } ); } void clone_create_actions(resource_t *rsc, pe_working_set_t *data_set) { gboolean child_starting = FALSE; gboolean child_stopping = FALSE; action_t *stop = NULL; action_t *start = NULL; action_t *action_complete = NULL; resource_t *last_start_rsc = NULL; resource_t *last_stop_rsc = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); crm_debug_2("Creating actions for %s", rsc->id); slist_iter( child_rsc, resource_t, rsc->children, lpc, child_rsc->cmds->create_actions(child_rsc, data_set); clone_update_pseudo_status( child_rsc, &child_stopping, &child_starting); if(is_set(child_rsc->flags, pe_rsc_starting)) { last_start_rsc = child_rsc; } if(is_set(child_rsc->flags, pe_rsc_stopping)) { last_stop_rsc = child_rsc; } ); /* start */ start = start_action(rsc, NULL, !child_starting); action_complete = custom_action( rsc, started_key(rsc), RSC_STARTED, NULL, !child_starting, TRUE, data_set); start->pseudo = TRUE; start->runnable = TRUE; action_complete->pseudo = TRUE; action_complete->runnable = TRUE; action_complete->priority = INFINITY; /* crm_err("Upgrading priority for %s to INFINITY", action_complete->uuid); */ child_starting_constraints(clone_data, rsc, NULL, last_start_rsc, data_set); clone_create_notifications( rsc, start, action_complete, data_set); /* stop */ stop = stop_action(rsc, NULL, !child_stopping); action_complete = custom_action( rsc, stopped_key(rsc), RSC_STOPPED, NULL, !child_stopping, TRUE, data_set); stop->pseudo = TRUE; stop->runnable = TRUE; action_complete->pseudo = TRUE; action_complete->runnable = TRUE; action_complete->priority = INFINITY; /* crm_err("Upgrading priority for %s to INFINITY", action_complete->uuid); */ child_stopping_constraints(clone_data, rsc, NULL, last_stop_rsc, data_set); clone_create_notifications(rsc, stop, action_complete, data_set); rsc->actions = rsc->actions; if(stop->post_notified != NULL && start->pre_notify != NULL) { order_actions(stop->post_notified, start->pre_notify, pe_order_optional); } } void clone_create_notifications( resource_t *rsc, action_t *action, action_t *action_complete, pe_working_set_t *data_set) { /* * pre_notify -> pre_notify_complete -> pseudo_action * -> (real actions) -> pseudo_action_complete * -> post_notify -> post_notify_complete * * if the pre_noitfy requires confirmation, * then a list of confirmations will be added as triggers * to pseudo_action in clone_expand() */ action_t *notify = NULL; action_t *notify_complete = NULL; enum action_tasks task; char *notify_key = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); if(is_not_set(rsc->flags, pe_rsc_notify)) { return; } task = text2task(action->task); /* create pre_notify */ notify_key = generate_notify_key( rsc->id, "pre", action->task); notify = custom_action(rsc, notify_key, RSC_NOTIFY, NULL, action->optional, TRUE, data_set); add_hash_param(notify->meta, "notify_type", "pre"); add_hash_param(notify->meta, "notify_operation", action->task); if(clone_data->notify_confirm) { add_hash_param(notify->meta, "notify_confirm", "yes"); } else { add_hash_param(notify->meta, "notify_confirm", "no"); } /* create pre_notify_complete */ notify_key = generate_notify_key( rsc->id, "confirmed-pre", action->task); notify_complete = custom_action(rsc, notify_key, RSC_NOTIFIED, NULL, action->optional, TRUE, data_set); add_hash_param(notify_complete->meta, "notify_type", "pre"); add_hash_param(notify_complete->meta, "notify_operation", action->task); if(clone_data->notify_confirm) { add_hash_param(notify->meta, "notify_confirm", "yes"); } else { add_hash_param(notify->meta, "notify_confirm", "no"); } notify->pseudo = TRUE; notify->runnable = TRUE; notify_complete->pseudo = TRUE; notify_complete->runnable = TRUE; /* pre_notify before pre_notify_complete */ order_actions(notify, notify_complete, pe_order_optional); /* pre_notify_complete before action */ order_actions(notify_complete, action, pe_order_optional); action->pre_notify = notify; action->pre_notified = notify_complete; /* create post_notify */ notify_key = generate_notify_key (rsc->id, "post", action->task); notify = custom_action(rsc, notify_key, RSC_NOTIFY, NULL, action_complete->optional, TRUE, data_set); add_hash_param(notify->meta, "notify_type", "post"); add_hash_param(notify->meta, "notify_operation", action->task); if(clone_data->notify_confirm) { add_hash_param(notify->meta, "notify_confirm", "yes"); } else { add_hash_param(notify->meta, "notify_confirm", "no"); } /* action_complete before post_notify */ order_actions(action_complete, notify, pe_order_optional); /* create post_notify_complete */ notify_key = generate_notify_key( rsc->id, "confirmed-post", action->task); notify_complete = custom_action(rsc, notify_key, RSC_NOTIFIED, NULL, action->optional, TRUE, data_set); add_hash_param(notify_complete->meta, "notify_type", "pre"); add_hash_param(notify_complete->meta, "notify_operation", action->task); if(clone_data->notify_confirm) { add_hash_param(notify->meta, "notify_confirm", "yes"); } else { add_hash_param(notify->meta, "notify_confirm", "no"); } notify->pseudo = TRUE; notify->runnable = TRUE; notify->priority = INFINITY; notify->runnable = action_complete->runnable; notify_complete->pseudo = TRUE; notify_complete->runnable = TRUE; notify_complete->priority = INFINITY; notify_complete->runnable = action_complete->runnable; /* post_notify before post_notify_complete */ order_actions(notify, notify_complete, pe_order_optional); action->post_notify = notify; action->post_notified = notify_complete; if(safe_str_eq(action->task, RSC_STOP)) { /* post_notify_complete before start */ custom_action_order( rsc, NULL, notify_complete, rsc, start_key(rsc), NULL, pe_order_optional, data_set); } else if(safe_str_eq(action->task, RSC_START)) { /* post_notify_complete before promote */ custom_action_order( rsc, NULL, notify_complete, rsc, promote_key(rsc), NULL, pe_order_optional, data_set); } else if(safe_str_eq(action->task, RSC_DEMOTE)) { /* post_notify_complete before promote */ custom_action_order( rsc, NULL, notify_complete, rsc, stop_key(rsc), NULL, pe_order_optional, data_set); } } void child_starting_constraints( clone_variant_data_t *clone_data, resource_t *rsc, resource_t *child, resource_t *last, pe_working_set_t *data_set) { if(child == NULL && last == NULL) { crm_debug("%s has no active children", rsc->id); return; } if(child != NULL) { order_start_start( rsc, child, pe_order_runnable_left|pe_order_implies_left_printed); new_rsc_order(child, RSC_START, rsc, RSC_STARTED, pe_order_implies_right_printed, data_set); } if(clone_data->ordered) { if(child == NULL) { /* last child start before global started */ new_rsc_order(last, RSC_START, rsc, RSC_STARTED, pe_order_runnable_left, data_set); } else if(last == NULL) { /* global start before first child start */ order_start_start( rsc, child, pe_order_implies_left); } else { /* child/child relative start */ order_start_start(last, child, pe_order_implies_left); } } } void child_stopping_constraints( clone_variant_data_t *clone_data, resource_t *rsc, resource_t *child, resource_t *last, pe_working_set_t *data_set) { if(child == NULL && last == NULL) { crm_debug("%s has no active children", rsc->id); return; } if(child != NULL) { order_stop_stop(rsc, child, pe_order_shutdown|pe_order_implies_left_printed); new_rsc_order(child, RSC_STOP, rsc, RSC_STOPPED, pe_order_implies_right_printed, data_set); } if(clone_data->ordered) { if(last == NULL) { /* first child stop before global stopped */ new_rsc_order(child, RSC_STOP, rsc, RSC_STOPPED, pe_order_runnable_left, data_set); } else if(child == NULL) { /* global stop before last child stop */ order_stop_stop( rsc, last, pe_order_implies_left); } else { /* child/child relative stop */ order_stop_stop(child, last, pe_order_implies_left); } } } void clone_internal_constraints(resource_t *rsc, pe_working_set_t *data_set) { resource_t *last_rsc = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); native_internal_constraints(rsc, data_set); /* global stop before stopped */ new_rsc_order(rsc, RSC_STOP, rsc, RSC_STOPPED, pe_order_runnable_left, data_set); /* global start before started */ new_rsc_order(rsc, RSC_START, rsc, RSC_STARTED, pe_order_runnable_left, data_set); /* global stopped before start */ new_rsc_order(rsc, RSC_STOPPED, rsc, RSC_START, pe_order_optional, data_set); slist_iter( child_rsc, resource_t, rsc->children, lpc, child_rsc->cmds->internal_constraints(child_rsc, data_set); child_starting_constraints( clone_data, rsc, child_rsc, last_rsc, data_set); child_stopping_constraints( clone_data, rsc, child_rsc, last_rsc, data_set); last_rsc = child_rsc; ); } resource_t* find_compatible_child( resource_t *local_child, resource_t *rsc, enum rsc_role_e filter, gboolean current) { node_t *local_node = NULL; node_t *node = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); local_node = local_child->fns->location(local_child, NULL, current); if(local_node == NULL) { crm_debug("Can't colocate unrunnable child %s with %s", local_child->id, rsc->id); return NULL; } slist_iter( child_rsc, resource_t, rsc->children, lpc, enum rsc_role_e next_role = child_rsc->fns->state(child_rsc, current); node = child_rsc->fns->location(child_rsc, NULL, current); if(filter != RSC_ROLE_UNKNOWN && next_role != filter) { crm_debug_2("Filtered %s", child_rsc->id); continue; } if(node && local_node && node->details == local_node->details) { crm_info("Colocating %s with %s on %s", local_child->id, child_rsc->id, node->details->uname); return child_rsc; } ); crm_debug("Can't colocate child %s with %s", local_child->id, rsc->id); return NULL; } void clone_rsc_colocation_lh( resource_t *rsc_lh, resource_t *rsc_rh, rsc_colocation_t *constraint) { gboolean do_interleave = FALSE; resource_t *rsc = constraint->rsc_lh; clone_variant_data_t *clone_data = NULL; clone_variant_data_t *clone_data_rh = NULL; if(rsc == NULL) { pe_err("rsc_lh was NULL for %s", constraint->id); return; } else if(constraint->rsc_rh == NULL) { pe_err("rsc_rh was NULL for %s", constraint->id); return; } else { crm_debug_4("Processing constraints from %s", rsc->id); } get_clone_variant_data(clone_data, rsc); if(constraint->rsc_rh->variant == pe_clone || constraint->rsc_rh->variant == pe_master) { get_clone_variant_data( clone_data_rh, constraint->rsc_rh); if(clone_data->clone_node_max != clone_data_rh->clone_node_max) { crm_config_err("Cannot interleave "XML_CIB_TAG_INCARNATION " %s and %s because" " they do not support the same number of" " resources per node", constraint->rsc_lh->id, constraint->rsc_rh->id); /* only the LHS side needs to be labeled as interleave */ } else if(clone_data->interleave) { do_interleave = TRUE; } else if(constraint->score >= INFINITY) { GListPtr lhs = NULL, rhs = NULL; lhs = rsc_lh->allowed_nodes; slist_iter( child_rsc, resource_t, rsc_rh->children, lpc, node_t *chosen = child_rsc->fns->location(child_rsc, NULL, FALSE); if(chosen != NULL) { rhs = g_list_append(rhs, chosen); } ); rsc_lh->allowed_nodes = node_list_exclude(lhs, rhs); pe_free_shallow_adv(rhs, FALSE); pe_free_shallow(lhs); return; } } else if(constraint->score >= INFINITY) { crm_config_err("Manditory co-location of clones (%s) with other" " non-clone (%s) resources is not supported", rsc_lh->id, rsc_rh->id); return; } if(do_interleave) { resource_t *rh_child = NULL; slist_iter(lh_child, resource_t, rsc->children, lpc, CRM_ASSERT(lh_child != NULL); rh_child = find_compatible_child( lh_child, rsc_rh, RSC_ROLE_UNKNOWN, FALSE); if(rh_child == NULL) { crm_debug_2("No match found for %s", lh_child->id); continue; } crm_debug("Interleaving %s with %s", lh_child->id, rh_child->id); lh_child->cmds->rsc_colocation_lh( lh_child, rh_child, constraint); ); return; } slist_iter( child_rsc, resource_t, rsc->children, lpc, child_rsc->cmds->rsc_colocation_lh(child_rsc, constraint->rsc_rh, constraint); ); } void clone_rsc_colocation_rh( resource_t *rsc_lh, resource_t *rsc_rh, rsc_colocation_t *constraint) { clone_variant_data_t *clone_data = NULL; CRM_CHECK(rsc_lh != NULL, return); CRM_CHECK(rsc_lh->variant == pe_native, return); get_clone_variant_data(clone_data, rsc_rh); crm_debug_3("Processing constraint %s: %d", constraint->id, constraint->score); if(rsc_rh == NULL) { pe_err("rsc_rh was NULL for %s", constraint->id); return; } else if(is_set(rsc_rh->flags, pe_rsc_provisional)) { crm_debug_3("%s is still provisional", rsc_rh->id); return; } else if(constraint->score >= INFINITY) { GListPtr lhs = NULL, rhs = NULL; lhs = rsc_lh->allowed_nodes; slist_iter( child_rsc, resource_t, rsc_rh->children, lpc, node_t *chosen = child_rsc->fns->location(child_rsc, NULL, FALSE); if(chosen != NULL) { rhs = g_list_append(rhs, chosen); } ); rsc_lh->allowed_nodes = node_list_exclude(lhs, rhs); pe_free_shallow_adv(rhs, FALSE); pe_free_shallow(lhs); return; } slist_iter( child_rsc, resource_t, rsc_rh->children, lpc, child_rsc->cmds->rsc_colocation_rh(rsc_lh, child_rsc, constraint); ); } void clone_rsc_order_lh(resource_t *rsc, order_constraint_t *order, pe_working_set_t *data_set) { resource_t *r1 = NULL; resource_t *r2 = NULL; gboolean do_interleave = FALSE; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); - crm_debug_2("%s->%s", order->lh_action_task, order->rh_action_task); + crm_debug_4("%s->%s", order->lh_action_task, order->rh_action_task); r1 = uber_parent(rsc); r2 = uber_parent(order->rh_rsc); if(r1 == r2) { native_rsc_order_lh(rsc, order, data_set); return; } if(order->rh_rsc->variant == pe_clone || order->rh_rsc->variant == pe_master) { clone_variant_data_t *clone_data_rh = NULL; get_clone_variant_data(clone_data_rh, order->rh_rsc); if(clone_data->clone_node_max != clone_data_rh->clone_node_max) { crm_config_err("Cannot interleave "XML_CIB_TAG_INCARNATION " %s and %s because they do not support the same" " number of resources per node", rsc->id, order->rh_rsc->id); /* only the LHS side needs to be labeled as interleave */ } else if(clone_data->interleave) { do_interleave = TRUE; } } if(do_interleave && order->rh_rsc) { resource_t *lh_child = NULL; resource_t *rh_saved = order->rh_rsc; gboolean current = FALSE; if(strstr(order->lh_action_task, "_stop_0") || strstr(order->lh_action_task, "_demote_0")) { current = TRUE; } slist_iter( rh_child, resource_t, rh_saved->children, lpc, CRM_ASSERT(rh_child != NULL); lh_child = find_compatible_child(rh_child, rsc, RSC_ROLE_UNKNOWN, current); if(lh_child == NULL) { crm_debug_2("No match found for %s", rh_child->id); continue; } crm_debug("Interleaving %s with %s", lh_child->id, rh_child->id); order->rh_rsc = rh_child; lh_child->cmds->rsc_order_lh(lh_child, order, data_set); order->rh_rsc = rh_saved; ); } else { #if 0 if(order->type != pe_order_optional) { crm_debug("Upgraded ordering constraint %d - 0x%.6x", order->id, order->type); native_rsc_order_lh(rsc, order, data_set); } #endif if(order->type & pe_order_implies_left) { if(rsc->variant == order->rh_rsc->variant) { crm_debug_2("Clone-to-clone ordering: %s -> %s 0x%.6x", order->lh_action_task, order->rh_action_task, order->type); /* stop instances on the same nodes as stopping RHS instances */ slist_iter( child_rsc, resource_t, rsc->children, lpc, native_rsc_order_lh(child_rsc, order, data_set); ); } else { /* stop everything */ crm_debug_2("Clone-to-* ordering: %s -> %s 0x%.6x", order->lh_action_task, order->rh_action_task, order->type); slist_iter( child_rsc, resource_t, rsc->children, lpc, native_rsc_order_lh(child_rsc, order, data_set); ); } } convert_non_atomic_task(rsc, order, FALSE); native_rsc_order_lh(rsc, order, data_set); } if(is_set(rsc->flags, pe_rsc_notify)) { order->type = pe_order_optional; convert_non_atomic_task(rsc, order, TRUE); native_rsc_order_lh(rsc, order, data_set); } } void clone_rsc_order_rh( action_t *lh_action, resource_t *rsc, order_constraint_t *order) { clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); - crm_debug_2("%s->%s", lh_action->uuid, order->rh_action_task); + crm_debug_4("%s->%s", lh_action->uuid, order->rh_action_task); if(safe_str_eq(CRM_OP_PROBED, lh_action->uuid)) { slist_iter( child_rsc, resource_t, rsc->children, lpc, child_rsc->cmds->rsc_order_rh(lh_action, child_rsc, order); ); if(rsc->fns->state(rsc, TRUE) < RSC_ROLE_STARTED && rsc->fns->state(rsc, FALSE) > RSC_ROLE_STOPPED) { order->type |= pe_order_implies_right; } } native_rsc_order_rh(lh_action, rsc, order); } void clone_rsc_location(resource_t *rsc, rsc_to_node_t *constraint) { clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); crm_debug_3("Processing location constraint %s for %s", constraint->id, rsc->id); native_rsc_location(rsc, constraint); slist_iter( child_rsc, resource_t, rsc->children, lpc, child_rsc->cmds->rsc_location(child_rsc, constraint); ); } static gint sort_notify_entries(gconstpointer a, gconstpointer b) { int tmp; const notify_entry_t *entry_a = a; const notify_entry_t *entry_b = b; if(entry_a == NULL && entry_b == NULL) { return 0; } if(entry_a == NULL) { return 1; } if(entry_b == NULL) { return -1; } if(entry_a->rsc == NULL && entry_b->rsc == NULL) { return 0; } if(entry_a->rsc == NULL) { return 1; } if(entry_b->rsc == NULL) { return -1; } tmp = strcmp(entry_a->rsc->id, entry_b->rsc->id); if(tmp != 0) { return tmp; } if(entry_a->node == NULL && entry_b->node == NULL) { return 0; } if(entry_a->node == NULL) { return 1; } if(entry_b->node == NULL) { return -1; } return strcmp(entry_a->node->details->id, entry_b->node->details->id); } static void expand_list(GListPtr list, int clones, char **rsc_list, char **node_list, char **uuid_list) { const char *uname = NULL; const char *rsc_id = NULL; const char *last_rsc_id = NULL; CRM_CHECK(list != NULL, return); if(rsc_list) { CRM_CHECK(*rsc_list == NULL, *rsc_list = NULL); } if(node_list) { CRM_CHECK(*node_list == NULL, *node_list = NULL); } slist_iter(entry, notify_entry_t, list, lpc, CRM_CHECK(entry != NULL, continue); CRM_CHECK(entry->rsc != NULL, continue); CRM_CHECK(entry->node != NULL, continue); rsc_id = entry->rsc->id; uname = entry->node->details->uname; CRM_ASSERT(uname != NULL); CRM_ASSERT(rsc_id != NULL); /* filter dups */ if(safe_str_eq(rsc_id, last_rsc_id)) { continue; } last_rsc_id = rsc_id; if(rsc_list != NULL) { int existing_len = 0; int len = 2 + strlen(rsc_id); /* +1 space, +1 EOS */ if(rsc_list && *rsc_list) { existing_len = strlen(*rsc_list); } crm_debug_5("Adding %s (%dc) at offset %d", rsc_id, len-2, existing_len); crm_realloc(*rsc_list, len + existing_len); sprintf(*rsc_list + existing_len, "%s ", rsc_id); } if(node_list != NULL) { int existing_len = 0; int len = 2 + strlen(uname); if(node_list && *node_list) { existing_len = strlen(*node_list); } crm_debug_5("Adding %s (%dc) at offset %d", uname, len-2, existing_len); crm_realloc(*node_list, len + existing_len); sprintf(*node_list + existing_len, "%s ", uname); } ); } static void mark_notifications_required(resource_t *rsc, enum action_tasks task, gboolean top) { char *key = NULL; char *key_complete = NULL; const char *task_s = task2text(task); if(top) { key = generate_op_key(rsc->id, task_s, 0); key_complete = generate_op_key(rsc->id, task2text(task+1), 0); } slist_iter(action, action_t, rsc->actions, lpc, if(action->optional == FALSE) { continue; } if(safe_str_eq(action->uuid, key) || safe_str_eq(action->uuid, key_complete)) { crm_debug_3("Marking top-level action %s as required", action->uuid); action->optional = FALSE; } if(strstr(action->uuid, task_s)) { if(safe_str_eq(RSC_NOTIFIED, action->task) || safe_str_eq(RSC_NOTIFY, action->task)) { crm_debug_3("Marking %s as required", action->uuid); action->optional = FALSE; } } ); slist_iter( child, resource_t, rsc->children, lpc, mark_notifications_required(child, task, FALSE); ); } void clone_expand(resource_t *rsc, pe_working_set_t *data_set) { char *rsc_list = NULL; char *node_list = NULL; char *uuid_list = NULL; notify_data_t *n_data = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); crm_malloc0(n_data, sizeof(notify_data_t)); n_data->keys = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); crm_debug_2("Processing actions from %s", rsc->id); if(is_set(rsc->flags, pe_rsc_notify)) { slist_iter( child_rsc, resource_t, rsc->children, lpc, slist_iter( op, action_t, rsc->actions, lpc2, child_rsc->cmds->create_notify_element( child_rsc, op, n_data, data_set); ); ); } /* expand the notify data */ if(is_set(rsc->flags, pe_rsc_notify) && n_data->stop) { n_data->stop = g_list_sort( n_data->stop, sort_notify_entries); rsc_list = NULL; node_list = NULL; expand_list(n_data->stop, clone_data->clone_max, &rsc_list, &node_list, &uuid_list); g_hash_table_insert( n_data->keys, crm_strdup("notify_stop_resource"), rsc_list); g_hash_table_insert( n_data->keys, crm_strdup("notify_stop_uname"), node_list); if(rsc_list != NULL) { mark_notifications_required(rsc, stop_rsc, TRUE); } } if(is_set(rsc->flags, pe_rsc_notify) && n_data->start) { n_data->start = g_list_sort( n_data->start, sort_notify_entries); rsc_list = NULL; node_list = NULL; expand_list(n_data->start, clone_data->clone_max, &rsc_list, &node_list, &uuid_list); g_hash_table_insert( n_data->keys, crm_strdup("notify_start_resource"), rsc_list); g_hash_table_insert( n_data->keys, crm_strdup("notify_start_uname"), node_list); mark_notifications_required(rsc, start_rsc, TRUE); } if(is_set(rsc->flags, pe_rsc_notify) && n_data->demote) { n_data->demote = g_list_sort( n_data->demote, sort_notify_entries); rsc_list = NULL; node_list = NULL; expand_list(n_data->demote, clone_data->clone_max, &rsc_list, &node_list, &uuid_list); g_hash_table_insert( n_data->keys, crm_strdup("notify_demote_resource"), rsc_list); g_hash_table_insert( n_data->keys, crm_strdup("notify_demote_uname"), node_list); mark_notifications_required(rsc, action_demote, TRUE); } if(is_set(rsc->flags, pe_rsc_notify) && n_data->promote) { n_data->promote = g_list_sort( n_data->promote, sort_notify_entries); rsc_list = NULL; node_list = NULL; uuid_list = NULL; expand_list(n_data->promote, clone_data->clone_max, &rsc_list, &node_list, &uuid_list); g_hash_table_insert( n_data->keys, crm_strdup("notify_promote_resource"), rsc_list); g_hash_table_insert( n_data->keys, crm_strdup("notify_promote_uname"), node_list); mark_notifications_required(rsc, action_promote, TRUE); } if(is_set(rsc->flags, pe_rsc_notify) && n_data->active) { n_data->active = g_list_sort( n_data->active, sort_notify_entries); rsc_list = NULL; node_list = NULL; uuid_list = NULL; expand_list(n_data->active, clone_data->clone_max, &rsc_list, &node_list, &uuid_list); g_hash_table_insert( n_data->keys, crm_strdup("notify_active_resource"), rsc_list); g_hash_table_insert( n_data->keys, crm_strdup("notify_active_uname"), node_list); } if(is_set(rsc->flags, pe_rsc_notify) && n_data->slave) { n_data->slave = g_list_sort( n_data->slave, sort_notify_entries); rsc_list = NULL; node_list = NULL; uuid_list = NULL; expand_list(n_data->slave, clone_data->clone_max, &rsc_list, &node_list, &uuid_list); g_hash_table_insert( n_data->keys, crm_strdup("notify_slave_resource"), rsc_list); g_hash_table_insert( n_data->keys, crm_strdup("notify_slave_uname"), node_list); } if(is_set(rsc->flags, pe_rsc_notify) && n_data->master) { n_data->master = g_list_sort( n_data->master, sort_notify_entries); rsc_list = NULL; node_list = NULL; uuid_list = NULL; expand_list(n_data->master, clone_data->clone_max, &rsc_list, &node_list, &uuid_list); g_hash_table_insert( n_data->keys, crm_strdup("notify_master_resource"), rsc_list); g_hash_table_insert( n_data->keys, crm_strdup("notify_master_uname"), node_list); } if(is_set(rsc->flags, pe_rsc_notify) && n_data->inactive) { n_data->inactive = g_list_sort( n_data->inactive, sort_notify_entries); rsc_list = NULL; node_list = NULL; uuid_list = NULL; expand_list(n_data->inactive, clone_data->clone_max, &rsc_list, &node_list, &uuid_list); g_hash_table_insert( n_data->keys, crm_strdup("notify_inactive_resource"), rsc_list); g_hash_table_insert( n_data->keys, crm_strdup("notify_inactive_uname"), node_list); } /* yes, we DO need this second loop */ slist_iter( child_rsc, resource_t, rsc->children, lpc, child_rsc->cmds->expand(child_rsc, data_set); ); /* slist_iter( */ /* action, action_t, rsc->actions, lpc2, */ /* if(safe_str_eq(action->task, RSC_NOTIFY)) { */ /* action->meta_xml = notify_xml; */ /* } */ /* ); */ native_expand(rsc, data_set); /* destroy the notify_data */ pe_free_shallow(n_data->stop); pe_free_shallow(n_data->start); pe_free_shallow(n_data->demote); pe_free_shallow(n_data->promote); pe_free_shallow(n_data->master); pe_free_shallow(n_data->slave); pe_free_shallow(n_data->active); pe_free_shallow(n_data->inactive); g_hash_table_destroy(n_data->keys); crm_free(n_data); } static gint sort_rsc_id(gconstpointer a, gconstpointer b) { const resource_t *resource1 = (const resource_t*)a; const resource_t *resource2 = (const resource_t*)b; CRM_ASSERT(resource1 != NULL); CRM_ASSERT(resource2 != NULL); return strcmp(resource1->id, resource2->id); } static resource_t *find_instance_on(resource_t *rsc, node_t *node) { slist_iter(child, resource_t, rsc->children, lpc, slist_iter(known ,node_t, child->known_on, lpc2, if(node->details == known->details) { return child; } ); ); return NULL; } gboolean clone_create_probe(resource_t *rsc, node_t *node, action_t *complete, gboolean force, pe_working_set_t *data_set) { gboolean any_created = FALSE; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); rsc->children = g_list_sort(rsc->children, sort_rsc_id); if(is_not_set(rsc->flags, pe_rsc_unique) && clone_data->clone_node_max == 1) { /* only look for one copy */ resource_t *child = NULL; /* Try whoever we probed last time */ child = find_instance_on(rsc, node); if(child) { return child->cmds->create_probe( child, node, complete, force, data_set); } /* Try whoever we plan on starting there */ slist_iter( child_rsc, resource_t, rsc->children, lpc, node_t *local_node = child_rsc->fns->location(child_rsc, NULL, FALSE); if(local_node == NULL) { continue; } if(local_node->details == node->details) { return child_rsc->cmds->create_probe( child_rsc, node, complete, force, data_set); } ); /* Fall back to the first clone instance */ child = rsc->children->data; return child->cmds->create_probe(child, node, complete, force, data_set); } slist_iter( child_rsc, resource_t, rsc->children, lpc, if(child_rsc->cmds->create_probe( child_rsc, node, complete, force, data_set)) { any_created = TRUE; } if(any_created && is_not_set(rsc->flags, pe_rsc_unique) && clone_data->clone_node_max == 1) { /* only look for one copy (clone :0) */ break; } ); return any_created; } diff --git a/pengine/group.c b/pengine/group.c index 05f0b01605..0a29f1d9b3 100644 --- a/pengine/group.c +++ b/pengine/group.c @@ -1,509 +1,509 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #define VARIANT_GROUP 1 #include node_t * group_color(resource_t *rsc, pe_working_set_t *data_set) { node_t *node = NULL; node_t *group_node = NULL; group_variant_data_t *group_data = NULL; get_group_variant_data(group_data, rsc); if(is_not_set(rsc->flags, pe_rsc_provisional)) { return rsc->allocated_to; } crm_debug_2("Processing %s", rsc->id); if(is_set(rsc->flags, pe_rsc_allocating)) { crm_debug("Dependancy loop detected involving %s", rsc->id); return NULL; } if(group_data->first_child == NULL) { /* nothign to allocate */ clear_bit(rsc->flags, pe_rsc_provisional); return NULL; } set_bit(rsc->flags, pe_rsc_allocating); rsc->role = group_data->first_child->role; group_data->first_child->rsc_cons = g_list_concat( group_data->first_child->rsc_cons, rsc->rsc_cons); rsc->rsc_cons = NULL; dump_node_scores(scores_log_level, rsc, __PRETTY_FUNCTION__, rsc->allowed_nodes); slist_iter( child_rsc, resource_t, rsc->children, lpc, node = child_rsc->cmds->color(child_rsc, data_set); if(group_node == NULL) { group_node = node; } ); rsc->next_role = group_data->first_child->next_role; clear_bit(rsc->flags, pe_rsc_allocating); clear_bit(rsc->flags, pe_rsc_provisional); if(group_data->colocated) { return group_node; } return NULL; } void group_update_pseudo_status(resource_t *parent, resource_t *child); void group_create_actions(resource_t *rsc, pe_working_set_t *data_set) { action_t *op = NULL; const char *value = NULL; group_variant_data_t *group_data = NULL; get_group_variant_data(group_data, rsc); crm_debug_2("Creating actions for %s", rsc->id); slist_iter( child_rsc, resource_t, rsc->children, lpc, child_rsc->cmds->create_actions(child_rsc, data_set); group_update_pseudo_status(rsc, child_rsc); ); op = start_action(rsc, NULL, TRUE/* !group_data->child_starting */); op->pseudo = TRUE; op->runnable = TRUE; op = custom_action(rsc, started_key(rsc), RSC_STARTED, NULL, TRUE/* !group_data->child_starting */, TRUE, data_set); op->pseudo = TRUE; op->runnable = TRUE; op = stop_action(rsc, NULL, TRUE/* !group_data->child_stopping */); op->pseudo = TRUE; op->runnable = TRUE; op = custom_action(rsc, stopped_key(rsc), RSC_STOPPED, NULL, TRUE/* !group_data->child_stopping */, TRUE, data_set); op->pseudo = TRUE; op->runnable = TRUE; value = g_hash_table_lookup(rsc->parameters, crm_meta_name("stateful")); if(crm_is_true(value)) { op = custom_action(rsc, demote_key(rsc), RSC_DEMOTE, NULL, TRUE, TRUE, data_set); op->pseudo = TRUE; op->runnable = TRUE; op = custom_action(rsc, demoted_key(rsc), RSC_DEMOTED, NULL, TRUE, TRUE, data_set); op->pseudo = TRUE; op->runnable = TRUE; op = custom_action(rsc, promote_key(rsc), RSC_PROMOTE, NULL, TRUE, TRUE, data_set); op->pseudo = TRUE; op->runnable = TRUE; op = custom_action(rsc, promoted_key(rsc), RSC_PROMOTED, NULL, TRUE, TRUE, data_set); op->pseudo = TRUE; op->runnable = TRUE; } rsc->actions = rsc->actions; /* rsc->actions = NULL; */ } void group_update_pseudo_status(resource_t *parent, resource_t *child) { group_variant_data_t *group_data = NULL; get_group_variant_data(group_data, parent); if(group_data->ordered == FALSE) { /* If this group is not ordered, then leave the meta-actions as optional */ return; } if(group_data->child_stopping && group_data->child_starting) { return; } slist_iter( action, action_t, child->actions, lpc, if(action->optional) { continue; } if(safe_str_eq(RSC_STOP, action->task) && action->runnable) { group_data->child_stopping = TRUE; crm_debug_3("Based on %s the group is stopping", action->uuid); } else if(safe_str_eq(RSC_START, action->task) && action->runnable) { group_data->child_starting = TRUE; crm_debug_3("Based on %s the group is starting", action->uuid); } ); } void group_internal_constraints(resource_t *rsc, pe_working_set_t *data_set) { const char *value = NULL; gboolean stateful = FALSE; resource_t *last_rsc = NULL; group_variant_data_t *group_data = NULL; get_group_variant_data(group_data, rsc); native_internal_constraints(rsc, data_set); value = g_hash_table_lookup(rsc->parameters, crm_meta_name("stateful")); stateful = crm_is_true(value); new_rsc_order(rsc, RSC_STOPPED, rsc, RSC_START, pe_order_optional, data_set); new_rsc_order(rsc, RSC_STOP, rsc, RSC_STOPPED, pe_order_runnable_left|pe_order_implies_right|pe_order_implies_left, data_set); new_rsc_order(rsc, RSC_START, rsc, RSC_STARTED, pe_order_runnable_left, data_set); slist_iter( child_rsc, resource_t, rsc->children, lpc, int stop = pe_order_shutdown|pe_order_implies_right; int stopped = pe_order_implies_right_printed; int start = pe_order_implies_right|pe_order_runnable_left; int started = pe_order_runnable_left|pe_order_implies_right|pe_order_implies_right_printed; child_rsc->cmds->internal_constraints(child_rsc, data_set); if(last_rsc == NULL) { stop |= pe_order_implies_left; stopped = pe_order_implies_right; } else if(group_data->colocated) { rsc_colocation_new( "group:internal_colocation", NULL, INFINITY, child_rsc, last_rsc, NULL, NULL, data_set); } if(stateful) { new_rsc_order(rsc, RSC_DEMOTE, child_rsc, RSC_DEMOTE, stop|pe_order_implies_left_printed, data_set); new_rsc_order(child_rsc, RSC_DEMOTE, rsc, RSC_DEMOTED, stopped, data_set); new_rsc_order(child_rsc, RSC_PROMOTE, rsc, RSC_PROMOTED, started, data_set); new_rsc_order(rsc, RSC_PROMOTE, child_rsc, RSC_PROMOTE, pe_order_implies_left_printed, data_set); } order_start_start(rsc, child_rsc, pe_order_implies_left_printed); order_stop_stop(rsc, child_rsc, stop|pe_order_implies_left_printed); new_rsc_order(child_rsc, RSC_STOP, rsc, RSC_STOPPED, stopped, data_set); new_rsc_order(child_rsc, RSC_START, rsc, RSC_STARTED, started, data_set); if(group_data->ordered == FALSE) { order_start_start(rsc, child_rsc, start|pe_order_implies_left_printed); if(stateful) { new_rsc_order(rsc, RSC_PROMOTE, child_rsc, RSC_PROMOTE, start|pe_order_implies_left_printed, data_set); } } else if(last_rsc != NULL) { child_rsc->restart_type = pe_restart_restart; order_start_start(last_rsc, child_rsc, start); order_stop_stop(child_rsc, last_rsc, pe_order_implies_left); if(stateful) { new_rsc_order(last_rsc, RSC_PROMOTE, child_rsc, RSC_PROMOTE, start, data_set); new_rsc_order(child_rsc, RSC_DEMOTE, last_rsc, RSC_DEMOTE, pe_order_implies_left, data_set); } } else { /* If anyone in the group is starting, then * pe_order_implies_right will cause _everyone_ in the group * to be sent a start action * But this is safe since starting something that is already * started is required to be "safe" */ int flags = pe_order_implies_left|pe_order_implies_right|pe_order_runnable_right|pe_order_runnable_left; order_start_start(rsc, child_rsc, flags); if(stateful) { new_rsc_order(rsc, RSC_PROMOTE, child_rsc, RSC_PROMOTE, flags, data_set); } } last_rsc = child_rsc; ); if(group_data->ordered && last_rsc != NULL) { int stop_stop_flags = pe_order_implies_right; int stop_stopped_flags = pe_order_implies_left; order_stop_stop(rsc, last_rsc, stop_stop_flags); new_rsc_order(last_rsc, RSC_STOP, rsc, RSC_STOPPED, stop_stopped_flags, data_set); if(stateful) { new_rsc_order(rsc, RSC_DEMOTE, last_rsc, RSC_DEMOTE, stop_stop_flags, data_set); new_rsc_order(last_rsc, RSC_DEMOTE, rsc, RSC_DEMOTED, stop_stopped_flags, data_set); } } } void group_rsc_colocation_lh( resource_t *rsc_lh, resource_t *rsc_rh, rsc_colocation_t *constraint) { group_variant_data_t *group_data = NULL; if(rsc_lh == NULL) { pe_err("rsc_lh was NULL for %s", constraint->id); return; } else if(rsc_rh == NULL) { pe_err("rsc_rh was NULL for %s", constraint->id); return; } crm_debug_4("Processing constraints from %s", rsc_lh->id); get_group_variant_data(group_data, rsc_lh); if(group_data->colocated) { group_data->first_child->cmds->rsc_colocation_lh( group_data->first_child, rsc_rh, constraint); return; } else if(constraint->score >= INFINITY) { crm_config_err("%s: Cannot perform manditory colocation" " between non-colocated group and %s", rsc_lh->id, rsc_rh->id); return; } slist_iter( child_rsc, resource_t, rsc_lh->children, lpc, child_rsc->cmds->rsc_colocation_lh( child_rsc, rsc_rh, constraint); ); } void group_rsc_colocation_rh( resource_t *rsc_lh, resource_t *rsc_rh, rsc_colocation_t *constraint) { group_variant_data_t *group_data = NULL; get_group_variant_data(group_data, rsc_rh); CRM_CHECK(rsc_lh->variant == pe_native, return); crm_debug_3("Processing RH of constraint %s", constraint->id); print_resource(LOG_DEBUG_3, "LHS", rsc_lh, TRUE); if(is_set(rsc_rh->flags, pe_rsc_provisional)) { return; } else if(group_data->colocated && group_data->first_child) { group_data->first_child->cmds->rsc_colocation_rh( rsc_lh, group_data->first_child, constraint); return; } else if(constraint->score >= INFINITY) { crm_config_err("%s: Cannot perform manditory colocation with" " non-colocated group: %s", rsc_lh->id, rsc_rh->id); return; } slist_iter( child_rsc, resource_t, rsc_rh->children, lpc, child_rsc->cmds->rsc_colocation_rh( rsc_lh, child_rsc, constraint); ); } void group_rsc_order_lh(resource_t *rsc, order_constraint_t *order, pe_working_set_t *data_set) { group_variant_data_t *group_data = NULL; get_group_variant_data(group_data, rsc); - crm_debug_2("%s->%s", order->lh_action_task, order->rh_action_task); + crm_debug_4("%s->%s", order->lh_action_task, order->rh_action_task); if(order->rh_rsc != NULL && (rsc == order->rh_rsc || rsc == order->rh_rsc->parent)) { native_rsc_order_lh(rsc, order, data_set); return; } #if 0 if(order->type != pe_order_optional) { native_rsc_order_lh(rsc, order, data_set); } if(order->type & pe_order_implies_left) { native_rsc_order_lh(group_data->first_child, order, data_set); } #endif convert_non_atomic_task(rsc, order, TRUE); native_rsc_order_lh(rsc, order, data_set); } void group_rsc_order_rh( action_t *lh_action, resource_t *rsc, order_constraint_t *order) { group_variant_data_t *group_data = NULL; get_group_variant_data(group_data, rsc); - crm_debug_2("%s->%s", lh_action->uuid, order->rh_action_task); + crm_debug_4("%s->%s", lh_action->uuid, order->rh_action_task); if(rsc == NULL) { return; } if(safe_str_eq(CRM_OP_PROBED, lh_action->uuid)) { slist_iter( child_rsc, resource_t, rsc->children, lpc, child_rsc->cmds->rsc_order_rh(lh_action, child_rsc, order); ); if(rsc->fns->state(rsc, TRUE) < RSC_ROLE_STARTED && rsc->fns->state(rsc, FALSE) > RSC_ROLE_STOPPED) { order->type |= pe_order_implies_right; } } else if(lh_action->rsc != NULL && lh_action->rsc != rsc && lh_action->rsc != rsc->parent && lh_action->rsc->parent != rsc) { char *tmp = NULL; char *task_s = NULL; int interval = 0; enum action_tasks task = 0; parse_op_key(order->lh_action_task, &tmp, &task_s, &interval); task = text2task(task_s); crm_free(task_s); crm_free(tmp); switch(task) { case no_action: case monitor_rsc: case action_notify: case action_notified: case shutdown_crm: case stonith_node: break; case stop_rsc: case stopped_rsc: case action_demote: case action_demoted: order->type |= pe_order_complex_left; break; case start_rsc: case started_rsc: case action_promote: case action_promoted: order->type |= pe_order_complex_right; break; } } native_rsc_order_rh(lh_action, rsc, order); } void group_rsc_location(resource_t *rsc, rsc_to_node_t *constraint) { GListPtr saved = constraint->node_list_rh; GListPtr zero = node_list_dup(constraint->node_list_rh, TRUE, FALSE); gboolean reset_scores = TRUE; group_variant_data_t *group_data = NULL; get_group_variant_data(group_data, rsc); crm_debug("Processing rsc_location %s for %s", constraint->id, rsc->id); slist_iter( child_rsc, resource_t, rsc->children, lpc, child_rsc->cmds->rsc_location(child_rsc, constraint); if(group_data->colocated && reset_scores) { reset_scores = FALSE; constraint->node_list_rh = zero; } ); constraint->node_list_rh = saved; pe_free_shallow_adv(zero, TRUE); } void group_expand(resource_t *rsc, pe_working_set_t *data_set) { group_variant_data_t *group_data = NULL; get_group_variant_data(group_data, rsc); crm_debug_3("Processing actions from %s", rsc->id); CRM_CHECK(rsc != NULL, return); native_expand(rsc, data_set); slist_iter( child_rsc, resource_t, rsc->children, lpc, child_rsc->cmds->expand(child_rsc, data_set); ); } GListPtr group_merge_weights( resource_t *rsc, const char *rhs, GListPtr nodes, int factor, gboolean allow_rollback) { group_variant_data_t *group_data = NULL; get_group_variant_data(group_data, rsc); if(is_set(rsc->flags, pe_rsc_merging)) { crm_debug("Breaking dependancy loop with %s at %s", rsc->id, rhs); return nodes; } else if(is_not_set(rsc->flags, pe_rsc_provisional) || can_run_any(nodes) == FALSE) { return nodes; } set_bit(rsc->flags, pe_rsc_merging); nodes = group_data->first_child->cmds->merge_weights( group_data->first_child, rhs, nodes, factor, allow_rollback); slist_iter( constraint, rsc_colocation_t, rsc->rsc_cons_lhs, lpc, nodes = native_merge_weights( constraint->rsc_lh, rsc->id, nodes, constraint->score/INFINITY, allow_rollback); ); clear_bit(rsc->flags, pe_rsc_merging); return nodes; } diff --git a/pengine/native.c b/pengine/native.c index aea5d775cc..dd4af40c68 100644 --- a/pengine/native.c +++ b/pengine/native.c @@ -1,2037 +1,2037 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #define DELETE_THEN_REFRESH 1 /* The crmd will remove the resource from the CIB itself, making this redundant */ #define VARIANT_NATIVE 1 #include gboolean at_stack_bottom(resource_t *rsc); void node_list_update(GListPtr list1, GListPtr list2, int factor); void native_rsc_colocation_rh_must(resource_t *rsc_lh, gboolean update_lh, resource_t *rsc_rh, gboolean update_rh); void native_rsc_colocation_rh_mustnot(resource_t *rsc_lh, gboolean update_lh, resource_t *rsc_rh, gboolean update_rh); void create_notifications(resource_t *rsc, pe_working_set_t *data_set); void Recurring(resource_t *rsc, action_t *start, node_t *node, pe_working_set_t *data_set); void RecurringOp(resource_t *rsc, action_t *start, node_t *node, xmlNode *operation, pe_working_set_t *data_set); void pe_pre_notify( resource_t *rsc, node_t *node, action_t *op, notify_data_t *n_data, pe_working_set_t *data_set); void pe_post_notify( resource_t *rsc, node_t *node, action_t *op, notify_data_t *n_data, pe_working_set_t *data_set); void NoRoleChange (resource_t *rsc, node_t *current, node_t *next, pe_working_set_t *data_set); gboolean DeleteRsc (resource_t *rsc, node_t *node, gboolean optional, pe_working_set_t *data_set); gboolean StopRsc (resource_t *rsc, node_t *next, gboolean optional, pe_working_set_t *data_set); gboolean StartRsc (resource_t *rsc, node_t *next, gboolean optional, pe_working_set_t *data_set); gboolean DemoteRsc (resource_t *rsc, node_t *next, gboolean optional, pe_working_set_t *data_set); gboolean PromoteRsc(resource_t *rsc, node_t *next, gboolean optional, pe_working_set_t *data_set); gboolean RoleError (resource_t *rsc, node_t *next, gboolean optional, pe_working_set_t *data_set); gboolean NullOp (resource_t *rsc, node_t *next, gboolean optional, pe_working_set_t *data_set); enum rsc_role_e rsc_state_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = { /* Current State */ /* Next State: Unknown Stopped Started Slave Master */ /* Unknown */ { RSC_ROLE_UNKNOWN, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, }, /* Stopped */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STARTED, RSC_ROLE_SLAVE, RSC_ROLE_SLAVE, }, /* Started */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STARTED, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, }, /* Slave */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_UNKNOWN, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, }, /* Master */ { RSC_ROLE_STOPPED, RSC_ROLE_SLAVE, RSC_ROLE_UNKNOWN, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, }, }; gboolean (*rsc_action_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX])(resource_t*,node_t*,gboolean,pe_working_set_t*) = { /* Current State */ /* Next State: Unknown Stopped Started Slave Master */ /* Unknown */ { RoleError, StopRsc, RoleError, RoleError, RoleError, }, /* Stopped */ { RoleError, NullOp, StartRsc, StartRsc, RoleError, }, /* Started */ { RoleError, StopRsc, NullOp, NullOp, PromoteRsc, }, /* Slave */ { RoleError, StopRsc, RoleError, NullOp, PromoteRsc, }, /* Master */ { RoleError, RoleError, RoleError, DemoteRsc, NullOp, }, }; static gboolean native_choose_node(resource_t *rsc) { /* 1. Sort by weight 2. color.chosen_node = the node (of those with the highest wieght) with the fewest resources 3. remove color.chosen_node from all other colors */ GListPtr nodes = NULL; node_t *chosen = NULL; if(is_not_set(rsc->flags, pe_rsc_provisional)) { return rsc->allocated_to?TRUE:FALSE; } crm_debug_3("Choosing node for %s from %d candidates", rsc->id, g_list_length(rsc->allowed_nodes)); if(rsc->allowed_nodes) { rsc->allowed_nodes = g_list_sort( rsc->allowed_nodes, sort_node_weight); nodes = rsc->allowed_nodes; chosen = g_list_nth_data(nodes, 0); } return native_assign_node(rsc, nodes, chosen); } GListPtr native_merge_weights( resource_t *rsc, const char *rhs, GListPtr nodes, int factor, gboolean allow_rollback) { GListPtr archive = NULL; if(is_set(rsc->flags, pe_rsc_merging)) { - crm_debug("%s: Breaking dependancy loop", rhs); + crm_info("%s: Breaking dependancy loop", rhs); return nodes; - } else if(is_not_set(rsc->flags, pe_rsc_provisional) - || can_run_any(nodes) == FALSE) { + } else if(is_not_set(rsc->flags, pe_rsc_provisional)) { + crm_debug_4("%s: not provisional", rsc->id); return nodes; } set_bit(rsc->flags, pe_rsc_merging); crm_debug_2("%s: Combining scores from %s", rhs, rsc->id); if(allow_rollback) { archive = node_list_dup(nodes, FALSE, FALSE); } node_list_update(nodes, rsc->allowed_nodes, factor); if(archive && can_run_any(nodes) == FALSE) { crm_debug("%s: Rolling back scores from %s", rhs, rsc->id); pe_free_shallow_adv(nodes, TRUE); nodes = archive; goto bail; } pe_free_shallow_adv(archive, TRUE); slist_iter( constraint, rsc_colocation_t, rsc->rsc_cons_lhs, lpc, nodes = constraint->rsc_lh->cmds->merge_weights( constraint->rsc_lh, rhs, nodes, constraint->score/INFINITY, allow_rollback); ); bail: clear_bit(rsc->flags, pe_rsc_merging); return nodes; } node_t * native_color(resource_t *rsc, pe_working_set_t *data_set) { - int alloc_details = scores_log_level+1; + int alloc_details = scores_log_level; if(rsc->parent && is_not_set(rsc->parent->flags, pe_rsc_allocating)) { /* never allocate children on their own */ crm_debug("Escalating allocation of %s to its parent: %s", rsc->id, rsc->parent->id); rsc->parent->cmds->color(rsc->parent, data_set); } if(is_not_set(rsc->flags, pe_rsc_provisional)) { return rsc->allocated_to; } if(is_set(rsc->flags, pe_rsc_allocating)) { crm_debug("Dependancy loop detected involving %s", rsc->id); return NULL; } set_bit(rsc->flags, pe_rsc_allocating); - print_resource(alloc_details, "Allocating: ", rsc, FALSE); - dump_node_scores(alloc_details, rsc, "Pre-allloc", rsc->allowed_nodes); + print_resource(alloc_details+1, "Allocating: ", rsc, FALSE); + dump_node_scores(alloc_details+1, rsc, "Pre-allloc", rsc->allowed_nodes); slist_iter( constraint, rsc_colocation_t, rsc->rsc_cons, lpc, resource_t *rsc_rh = constraint->rsc_rh; crm_debug_2("%s: Pre-Processing %s (%s)", rsc->id, constraint->id, rsc_rh->id); rsc_rh->cmds->color(rsc_rh, data_set); rsc->cmds->rsc_colocation_lh(rsc, rsc_rh, constraint); ); - dump_node_scores(alloc_details, rsc, "Post-coloc", rsc->allowed_nodes); + dump_node_scores(alloc_details+1, rsc, "Post-coloc", rsc->allowed_nodes); slist_iter( constraint, rsc_colocation_t, rsc->rsc_cons_lhs, lpc, rsc->allowed_nodes = constraint->rsc_lh->cmds->merge_weights( constraint->rsc_lh, rsc->id, rsc->allowed_nodes, constraint->score/INFINITY, TRUE); ); - dump_node_scores(alloc_details-1, rsc, __PRETTY_FUNCTION__, rsc->allowed_nodes); + dump_node_scores(alloc_details, rsc, __PRETTY_FUNCTION__, rsc->allowed_nodes); print_resource(LOG_DEBUG_2, "Allocating: ", rsc, FALSE); if(rsc->next_role == RSC_ROLE_STOPPED) { crm_debug_2("Making sure %s doesn't get allocated", rsc->id); /* make sure it doesnt come up again */ resource_location( rsc, NULL, -INFINITY, XML_RSC_ATTR_TARGET_ROLE, data_set); } if(is_set(rsc->flags, pe_rsc_provisional) && native_choose_node(rsc) ) { crm_debug_3("Allocated resource %s to %s", rsc->id, rsc->allocated_to->details->uname); } else if(rsc->allocated_to == NULL) { if(is_not_set(rsc->flags, pe_rsc_orphan)) { pe_warn("Resource %s cannot run anywhere", rsc->id); } else if(rsc->running_on != NULL) { crm_info("Stopping orphan resource %s", rsc->id); } } else { crm_debug("Pre-Allocated resource %s to %s", rsc->id, rsc->allocated_to->details->uname); } clear_bit(rsc->flags, pe_rsc_allocating); print_resource(LOG_DEBUG_3, "Allocated ", rsc, TRUE); return rsc->allocated_to; } static gboolean is_op_dup( resource_t *rsc, const char *name, const char *interval) { gboolean dup = FALSE; const char *id = NULL; const char *value = NULL; xml_child_iter_filter( rsc->ops_xml, operation, "op", value = crm_element_value(operation, "name"); if(safe_str_neq(value, name)) { continue; } value = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); if(value == NULL) { value = "0"; } if(safe_str_neq(value, interval)) { continue; } if(id == NULL) { id = ID(operation); } else { crm_config_err("Operation %s is a duplicate of %s", ID(operation), id); crm_config_err("Do not use the same (name, interval) combination more than once per resource"); dup = TRUE; } ); return dup; } void RecurringOp(resource_t *rsc, action_t *start, node_t *node, xmlNode *operation, pe_working_set_t *data_set) { char *key = NULL; const char *name = NULL; const char *value = NULL; const char *interval = NULL; const char *node_uname = NULL; int interval_ms = 0; action_t *mon = NULL; gboolean is_optional = TRUE; GListPtr possible_matches = NULL; crm_debug_2("Creating recurring action %s for %s", ID(operation), rsc->id); if(node != NULL) { node_uname = node->details->uname; } interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); interval_ms = crm_get_msec(interval); if(interval_ms == 0) { return; } else if(interval_ms < 0) { crm_config_warn("%s contains an invalid interval: %s", ID(operation), interval); return; } value = crm_element_value(operation, "disabled"); if(crm_is_true(value)) { return; } name = crm_element_value(operation, "name"); if(is_op_dup(rsc, name, interval)) { return; } key = generate_op_key(rsc->id, name, interval_ms); if(start != NULL) { crm_debug_3("Marking %s %s due to %s", key, start->optional?"optional":"manditory", start->uuid); is_optional = start->optional; } else { crm_debug_2("Marking %s optional", key); is_optional = TRUE; } /* start a monitor for an already active resource */ possible_matches = find_actions_exact(rsc->actions, key, node); if(possible_matches == NULL) { is_optional = FALSE; crm_debug_3("Marking %s manditory: not active", key); } else { g_list_free(possible_matches); } value = crm_element_value(operation, "role"); if((rsc->next_role == RSC_ROLE_MASTER && value == NULL) || (value != NULL && text2role(value) != rsc->next_role)) { int log_level = LOG_DEBUG_2; const char *result = "Ignoring"; if(is_optional) { char *local_key = crm_strdup(key); log_level = LOG_INFO; result = "Cancelling"; /* its running : cancel it */ mon = custom_action( rsc, local_key, RSC_CANCEL, node, FALSE, TRUE, data_set); crm_free(mon->task); mon->task = crm_strdup(RSC_CANCEL); add_hash_param(mon->meta, XML_LRM_ATTR_INTERVAL, interval); add_hash_param(mon->meta, XML_LRM_ATTR_TASK, name); local_key = NULL; switch(rsc->role) { case RSC_ROLE_SLAVE: case RSC_ROLE_STARTED: if(rsc->next_role == RSC_ROLE_MASTER) { local_key = promote_key(rsc); } else if(rsc->next_role == RSC_ROLE_STOPPED) { local_key = stop_key(rsc); } break; case RSC_ROLE_MASTER: local_key = demote_key(rsc); break; default: break; } if(local_key) { custom_action_order(rsc, NULL, mon, rsc, local_key, NULL, pe_order_runnable_left, data_set); } mon = NULL; } do_crm_log(log_level, "%s action %s (%s vs. %s)", result , key, value?value:role2text(RSC_ROLE_SLAVE), role2text(rsc->next_role)); crm_free(key); key = NULL; return; } mon = custom_action(rsc, key, name, node, is_optional, TRUE, data_set); key = mon->uuid; if(is_optional) { crm_debug_2("%s\t %s (optional)", crm_str(node_uname), mon->uuid); } if(start == NULL || start->runnable == FALSE) { crm_debug("%s\t %s (cancelled : start un-runnable)", crm_str(node_uname), mon->uuid); mon->runnable = FALSE; } else if(node == NULL || node->details->online == FALSE || node->details->unclean) { crm_debug("%s\t %s (cancelled : no node available)", crm_str(node_uname), mon->uuid); mon->runnable = FALSE; } else if(mon->optional == FALSE) { crm_notice(" Start recurring %s (%ds) for %s on %s", mon->task, interval_ms/1000, rsc->id, crm_str(node_uname)); } custom_action_order(rsc, start_key(rsc), NULL, NULL, crm_strdup(key), mon, pe_order_implies_right|pe_order_runnable_left, data_set); if(rsc->next_role == RSC_ROLE_MASTER) { char *running_master = crm_itoa(EXECRA_RUNNING_MASTER); add_hash_param(mon->meta, XML_ATTR_TE_TARGET_RC, running_master); custom_action_order( rsc, promote_key(rsc), NULL, rsc, NULL, mon, pe_order_optional|pe_order_runnable_left, data_set); crm_free(running_master); } else if(rsc->role == RSC_ROLE_MASTER) { custom_action_order( rsc, demote_key(rsc), NULL, rsc, NULL, mon, pe_order_optional|pe_order_runnable_left, data_set); } } void Recurring(resource_t *rsc, action_t *start, node_t *node, pe_working_set_t *data_set) { xml_child_iter_filter( rsc->ops_xml, operation, "op", RecurringOp(rsc, start, node, operation, data_set); ); } void native_create_actions(resource_t *rsc, pe_working_set_t *data_set) { action_t *start = NULL; node_t *chosen = NULL; enum rsc_role_e role = RSC_ROLE_UNKNOWN; enum rsc_role_e next_role = RSC_ROLE_UNKNOWN; crm_debug_2("Creating actions for %s", rsc->id); chosen = rsc->allocated_to; if(chosen != NULL) { CRM_CHECK(rsc->next_role != RSC_ROLE_UNKNOWN, rsc->next_role = RSC_ROLE_STARTED); } unpack_instance_attributes( rsc->xml, XML_TAG_ATTR_SETS, chosen?chosen->details->attrs:NULL, rsc->parameters, NULL, FALSE, data_set->now); crm_debug_2("%s: %s->%s", rsc->id, role2text(rsc->role), role2text(rsc->next_role)); if(g_list_length(rsc->running_on) > 1) { if(rsc->recovery_type == recovery_stop_start) { pe_proc_err("Attempting recovery of resource %s", rsc->id); StopRsc(rsc, NULL, FALSE, data_set); rsc->role = RSC_ROLE_STOPPED; } } else if(rsc->running_on != NULL) { node_t *current = rsc->running_on->data; NoRoleChange(rsc, current, chosen, data_set); } else if(rsc->role == RSC_ROLE_STOPPED && rsc->next_role == RSC_ROLE_STOPPED) { char *key = start_key(rsc); GListPtr possible_matches = find_actions(rsc->actions, key, NULL); slist_iter( action, action_t, possible_matches, lpc, action->optional = TRUE; /* action->pseudo = TRUE; */ ); g_list_free(possible_matches); crm_debug_2("Stopping a stopped resource"); crm_free(key); return; } else if(rsc->role != RSC_ROLE_STOPPED) { /* A cheap trick to account for the fact that Master/Slave groups may not be * completely running when we set their role to Slave */ crm_debug_2("Resetting %s.role = %s (was %s)", rsc->id, role2text(RSC_ROLE_STOPPED), role2text(rsc->role)); rsc->role = RSC_ROLE_STOPPED; } role = rsc->role; while(role != rsc->next_role) { next_role = rsc_state_matrix[role][rsc->next_role]; crm_debug_2("Executing: %s->%s (%s)", role2text(role), role2text(next_role), rsc->id); if(rsc_action_matrix[role][next_role]( rsc, chosen, FALSE, data_set) == FALSE) { break; } role = next_role; } if(rsc->next_role != RSC_ROLE_STOPPED && is_set(rsc->flags, pe_rsc_managed)) { start = start_action(rsc, chosen, TRUE); Recurring(rsc, start, chosen, data_set); } } void native_internal_constraints(resource_t *rsc, pe_working_set_t *data_set) { int type = pe_order_optional; const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set); if(rsc->variant == pe_native) { type |= pe_order_implies_right; } if(rsc->parent == NULL) { type |= pe_order_restart; } new_rsc_order(rsc, RSC_STOP, rsc, RSC_START, type, data_set); new_rsc_order(rsc, RSC_DEMOTE, rsc, RSC_STOP, pe_order_demote_stop, data_set); new_rsc_order(rsc, RSC_START, rsc, RSC_PROMOTE, pe_order_runnable_left, data_set); new_rsc_order(rsc, RSC_DELETE, rsc, RSC_START, pe_order_optional, data_set); if(is_set(rsc->flags, pe_rsc_notify)) { new_rsc_order(rsc, "confirmed-post_notify_start", rsc, "pre_notify_promote", pe_order_optional, data_set); new_rsc_order(rsc, "confirmed-post_notify_demote", rsc, "pre_notify_stop", pe_order_optional, data_set); } if(is_not_set(rsc->flags, pe_rsc_managed)) { crm_debug_3("Skipping fencing constraints for unmanaged resource: %s", rsc->id); return; } if(rsc->variant == pe_native && safe_str_neq(class, "stonith")) { custom_action_order( rsc, stop_key(rsc), NULL, NULL, crm_strdup(all_stopped->task), all_stopped, pe_order_implies_right|pe_order_runnable_left, data_set); } } void native_rsc_colocation_lh( resource_t *rsc_lh, resource_t *rsc_rh, rsc_colocation_t *constraint) { if(rsc_lh == NULL) { pe_err("rsc_lh was NULL for %s", constraint->id); return; } else if(constraint->rsc_rh == NULL) { pe_err("rsc_rh was NULL for %s", constraint->id); return; } crm_debug_2("Processing colocation constraint between %s and %s", rsc_lh->id, rsc_rh->id); rsc_rh->cmds->rsc_colocation_rh(rsc_lh, rsc_rh, constraint); } static gboolean filter_colocation_constraint( resource_t *rsc_lh, resource_t *rsc_rh, rsc_colocation_t *constraint) { int level = LOG_DEBUG_4; if(constraint->score == 0){ return FALSE; } if(constraint->score > 0 && constraint->role_lh != RSC_ROLE_UNKNOWN && constraint->role_lh != rsc_lh->next_role) { do_crm_log(level, "LH: Skipping constraint: \"%s\" state filter", role2text(constraint->role_rh)); return FALSE; } if(constraint->score > 0 && constraint->role_rh != RSC_ROLE_UNKNOWN && constraint->role_rh != rsc_rh->next_role) { do_crm_log(level, "RH: Skipping constraint: \"%s\" state filter", role2text(constraint->role_rh)); return FALSE; } if(constraint->score < 0 && constraint->role_lh != RSC_ROLE_UNKNOWN && constraint->role_lh == rsc_lh->next_role) { do_crm_log(level, "LH: Skipping -ve constraint: \"%s\" state filter", role2text(constraint->role_rh)); return FALSE; } if(constraint->score < 0 && constraint->role_rh != RSC_ROLE_UNKNOWN && constraint->role_rh == rsc_rh->next_role) { do_crm_log(level, "RH: Skipping -ve constraint: \"%s\" state filter", role2text(constraint->role_rh)); return FALSE; } return TRUE; } static void colocation_match( resource_t *rsc_lh, resource_t *rsc_rh, rsc_colocation_t *constraint) { const char *tmp = NULL; const char *value = NULL; gboolean do_check = FALSE; const char *attribute = "#id"; if(constraint->node_attribute != NULL) { attribute = constraint->node_attribute; } if(rsc_rh->allocated_to) { value = g_hash_table_lookup( rsc_rh->allocated_to->details->attrs, attribute); do_check = TRUE; } else if(constraint->score < 0) { /* nothing to do: * anti-colocation with something thats not running */ return; } slist_iter( node, node_t, rsc_lh->allowed_nodes, lpc, tmp = g_hash_table_lookup(node->details->attrs, attribute); if(do_check && safe_str_eq(tmp, value)) { if(constraint->score < INFINITY) { crm_debug_2("%s: %s.%s += %d", constraint->id, rsc_lh->id, node->details->uname, constraint->score); node->weight = merge_weights( constraint->score, node->weight); } } else if(do_check == FALSE || constraint->score >= INFINITY) { crm_debug_2("%s: %s.%s = -INFINITY (%s)", constraint->id, rsc_lh->id, node->details->uname, do_check?"failed":"unallocated"); node->weight = -INFINITY; } ); } void native_rsc_colocation_rh( resource_t *rsc_lh, resource_t *rsc_rh, rsc_colocation_t *constraint) { crm_debug_2("%sColocating %s with %s (%s, weight=%d)", constraint->score >= 0?"":"Anti-", rsc_lh->id, rsc_rh->id, constraint->id, constraint->score); if(filter_colocation_constraint(rsc_lh, rsc_rh, constraint) == FALSE) { return; } if(is_set(rsc_rh->flags, pe_rsc_provisional)) { return; } else if(is_not_set(rsc_lh->flags, pe_rsc_provisional)) { /* error check */ struct node_shared_s *details_lh; struct node_shared_s *details_rh; if((constraint->score > -INFINITY) && (constraint->score < INFINITY)) { return; } details_rh = rsc_rh->allocated_to?rsc_rh->allocated_to->details:NULL; details_lh = rsc_lh->allocated_to?rsc_lh->allocated_to->details:NULL; if(constraint->score == INFINITY && details_lh != details_rh) { crm_err("%s and %s are both allocated" " but to different nodes: %s vs. %s", rsc_lh->id, rsc_rh->id, details_lh?details_lh->uname:"n/a", details_rh?details_rh->uname:"n/a"); } else if(constraint->score == -INFINITY && details_lh == details_rh) { crm_err("%s and %s are both allocated" " but to the SAME node: %s", rsc_lh->id, rsc_rh->id, details_rh?details_rh->uname:"n/a"); } return; } else { colocation_match(rsc_lh, rsc_rh, constraint); } } void node_list_update(GListPtr list1, GListPtr list2, int factor) { node_t *other_node = NULL; slist_iter( node, node_t, list1, lpc, if(node == NULL) { continue; } other_node = (node_t*)pe_find_node_id(list2, node->details->id); if(other_node != NULL) { crm_debug_2("%s: %d + %d", node->details->uname, node->weight, other_node->weight); node->weight = merge_weights( factor*other_node->weight, node->weight); } ); } static GListPtr find_actions_by_task(GListPtr actions, resource_t *rsc, const char *original_key) { GListPtr list = NULL; list = find_actions(actions, original_key, NULL); if(list == NULL) { /* we're potentially searching a child of the original resource */ char *key = NULL; char *tmp = NULL; char *task = NULL; int interval = 0; CRM_CHECK(parse_op_key(original_key, &tmp, &task, &interval), crm_err("search key: %s", original_key); return NULL); key = generate_op_key(rsc->id, task, interval); list = find_actions(actions, key, NULL); crm_free(key); crm_free(tmp); crm_free(task); } return list; } void native_rsc_order_lh(resource_t *lh_rsc, order_constraint_t *order, pe_working_set_t *data_set) { GListPtr lh_actions = NULL; action_t *lh_action = order->lh_action; resource_t *rh_rsc = order->rh_rsc; - crm_debug_2("Processing LH of ordering constraint %d", order->id); + crm_debug_3("Processing LH of ordering constraint %d", order->id); CRM_ASSERT(lh_rsc != NULL); if(lh_action != NULL) { lh_actions = g_list_append(NULL, lh_action); } else if(lh_action == NULL) { lh_actions = find_actions_by_task( lh_rsc->actions, lh_rsc, order->lh_action_task); } if(lh_actions == NULL && lh_rsc != rh_rsc) { char *key = NULL; char *rsc_id = NULL; char *op_type = NULL; int interval = 0; crm_debug_2("No LH-Side (%s/%s) found for constraint %d with %s - creating", lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task); parse_op_key( order->lh_action_task, &rsc_id, &op_type, &interval); key = generate_op_key(lh_rsc->id, op_type, interval); lh_action = custom_action(lh_rsc, key, op_type, NULL, TRUE, TRUE, data_set); if(lh_rsc->fns->state(lh_rsc, TRUE) == RSC_ROLE_STOPPED && safe_str_eq(op_type, RSC_STOP)) { lh_action->pseudo = TRUE; lh_action->runnable = TRUE; } lh_actions = g_list_append(NULL, lh_action); crm_free(op_type); crm_free(rsc_id); } slist_iter( lh_action_iter, action_t, lh_actions, lpc, if(rh_rsc == NULL && order->rh_action) { rh_rsc = order->rh_action->rsc; } if(rh_rsc) { rh_rsc->cmds->rsc_order_rh( lh_action_iter, rh_rsc, order); } else if(order->rh_action) { order_actions( lh_action_iter, order->rh_action, order->type); } ); pe_free_shallow_adv(lh_actions, FALSE); } void native_rsc_order_rh( action_t *lh_action, resource_t *rsc, order_constraint_t *order) { GListPtr rh_actions = NULL; action_t *rh_action = NULL; CRM_CHECK(rsc != NULL, return); CRM_CHECK(order != NULL, return); rh_action = order->rh_action; crm_debug_3("Processing RH of ordering constraint %d", order->id); if(rh_action != NULL) { rh_actions = g_list_append(NULL, rh_action); } else if(rsc != NULL) { rh_actions = find_actions_by_task( rsc->actions, rsc, order->rh_action_task); } if(rh_actions == NULL) { crm_debug_4("No RH-Side (%s/%s) found for constraint..." " ignoring", rsc->id,order->rh_action_task); if(lh_action) { crm_debug_4("LH-Side was: %s", lh_action->uuid); } return; } slist_iter( rh_action_iter, action_t, rh_actions, lpc, if(lh_action) { order_actions(lh_action, rh_action_iter, order->type); } else if(order->type & pe_order_implies_right) { rh_action_iter->runnable = FALSE; crm_warn("Unrunnable %s 0x%.6x", rh_action_iter->uuid, order->type); } else { crm_warn("neither %s 0x%.6x", rh_action_iter->uuid, order->type); } ); pe_free_shallow_adv(rh_actions, FALSE); } void native_rsc_location(resource_t *rsc, rsc_to_node_t *constraint) { GListPtr or_list; crm_debug_2("Applying %s (%s) to %s", constraint->id, role2text(constraint->role_filter), rsc->id); /* take "lifetime" into account */ if(constraint == NULL) { pe_err("Constraint is NULL"); return; } else if(rsc == NULL) { pe_err("LHS of rsc_to_node (%s) is NULL", constraint->id); return; } else if(constraint->role_filter > 0 && constraint->role_filter != rsc->next_role) { crm_debug("Constraint (%s) is not active (role : %s)", - constraint->id, role2text(constraint->role_filter)); + constraint->id, role2text(constraint->role_filter)); return; } else if(is_active(constraint) == FALSE) { crm_debug_2("Constraint (%s) is not active", constraint->id); return; } if(constraint->node_list_rh == NULL) { crm_debug_2("RHS of constraint %s is NULL", constraint->id); return; } or_list = node_list_or( rsc->allowed_nodes, constraint->node_list_rh, FALSE); pe_free_shallow(rsc->allowed_nodes); rsc->allowed_nodes = or_list; slist_iter(node, node_t, or_list, lpc, crm_debug_3("%s + %s : %d", rsc->id, node->details->uname, node->weight); ); } void native_expand(resource_t *rsc, pe_working_set_t *data_set) { crm_debug_3("Processing actions from %s", rsc->id); slist_iter( action, action_t, rsc->actions, lpc, crm_debug_4("processing action %d for rsc=%s", action->id, rsc->id); graph_element_from_action(action, data_set); ); slist_iter( child_rsc, resource_t, rsc->children, lpc, child_rsc->cmds->expand(child_rsc, data_set); ); } void create_notifications(resource_t *rsc, pe_working_set_t *data_set) { } static void register_activity(resource_t *rsc, enum action_tasks task, node_t *node, notify_data_t *n_data) { notify_entry_t *entry = NULL; if(node == NULL) { pe_proc_warn("%s has no node for required action %s", rsc->id, task2text(task)); return; } crm_malloc0(entry, sizeof(notify_entry_t)); entry->rsc = rsc; entry->node = node; switch(task) { case start_rsc: n_data->start = g_list_append(n_data->start, entry); break; case stop_rsc: n_data->stop = g_list_append(n_data->stop, entry); break; case action_promote: n_data->promote = g_list_append(n_data->promote, entry); break; case action_demote: n_data->demote = g_list_append(n_data->demote, entry); break; default: crm_err("Unsupported notify action: %s", task2text(task)); crm_free(entry); break; } } static void register_state(resource_t *rsc, node_t *on_node, notify_data_t *n_data) { notify_entry_t *entry = NULL; crm_malloc0(entry, sizeof(notify_entry_t)); entry->rsc = rsc; entry->node = on_node; crm_debug_2("%s state: %s", rsc->id, role2text(rsc->next_role)); switch(rsc->next_role) { case RSC_ROLE_STOPPED: /* n_data->inactive = g_list_append(n_data->inactive, entry); */ crm_free(entry); break; case RSC_ROLE_STARTED: n_data->active = g_list_append(n_data->active, entry); break; case RSC_ROLE_SLAVE: n_data->slave = g_list_append(n_data->slave, entry); break; case RSC_ROLE_MASTER: n_data->master = g_list_append(n_data->master, entry); break; default: crm_err("Unsupported notify role"); crm_free(entry); break; } } void complex_create_notify_element(resource_t *rsc, action_t *op, notify_data_t *n_data, pe_working_set_t *data_set) { node_t *next_node = NULL; gboolean registered = FALSE; char *op_key = NULL; GListPtr possible_matches = NULL; enum action_tasks task = text2task(op->task); if(rsc->children) { slist_iter( child_rsc, resource_t, rsc->children, lpc, child_rsc->cmds->create_notify_element( child_rsc, op, n_data, data_set); ); return; } if(op->pre_notify == NULL || op->post_notify == NULL) { /* no notifications required */ crm_debug_4("No notificaitons required for %s", op->task); return; } next_node = rsc->allocated_to; op_key = generate_op_key(rsc->id, op->task, 0); possible_matches = find_actions(rsc->actions, op_key, NULL); crm_debug_2("Creating notificaitons for: %s (%s->%s)", op->uuid, role2text(rsc->role), role2text(rsc->next_role)); if(rsc->role == rsc->next_role) { register_state(rsc, next_node, n_data); } slist_iter( local_op, action_t, possible_matches, lpc, local_op->notify_keys = n_data->keys; if(local_op->optional == FALSE) { registered = TRUE; register_activity(rsc, task, local_op->node, n_data); } ); /* stop / demote */ if(rsc->role != RSC_ROLE_STOPPED) { if(task == stop_rsc || task == action_demote) { slist_iter( current_node, node_t, rsc->running_on, lpc, pe_pre_notify(rsc, current_node, op, n_data, data_set); if(task == action_demote || registered == FALSE) { pe_post_notify(rsc, current_node, op, n_data, data_set); } ); } } /* start / promote */ if(rsc->next_role != RSC_ROLE_STOPPED) { CRM_CHECK(next_node != NULL,;); if(next_node == NULL) { pe_proc_err("next role: %s", role2text(rsc->next_role)); } else if(task == start_rsc || task == action_promote) { if(task != start_rsc || registered == FALSE) { pe_pre_notify(rsc, next_node, op, n_data, data_set); } pe_post_notify(rsc, next_node, op, n_data, data_set); } } crm_free(op_key); g_list_free(possible_matches); } static void dup_attr(gpointer key, gpointer value, gpointer user_data) { char *meta_key = crm_concat(CRM_META, key, '_'); g_hash_table_replace(user_data, meta_key, crm_strdup(value)); } static action_t * pe_notify(resource_t *rsc, node_t *node, action_t *op, action_t *confirm, notify_data_t *n_data, pe_working_set_t *data_set) { char *key = NULL; action_t *trigger = NULL; const char *value = NULL; const char *task = NULL; if(op == NULL || confirm == NULL) { crm_debug_2("Op=%p confirm=%p", op, confirm); return NULL; } CRM_CHECK(node != NULL, return NULL); if(node->details->online == FALSE) { crm_debug_2("Skipping notification for %s: node offline", rsc->id); return NULL; } else if(op->runnable == FALSE) { crm_debug_2("Skipping notification for %s: not runnable", op->uuid); return NULL; } value = g_hash_table_lookup(op->meta, "notify_type"); task = g_hash_table_lookup(op->meta, "notify_operation"); crm_debug_2("Creating notify actions for %s: %s (%s-%s)", op->uuid, rsc->id, value, task); key = generate_notify_key(rsc->id, value, task); trigger = custom_action(rsc, key, op->task, node, op->optional, TRUE, data_set); g_hash_table_foreach(op->meta, dup_attr, trigger->extra); trigger->notify_keys = n_data->keys; /* pseudo_notify before notify */ crm_debug_3("Ordering %s before %s (%d->%d)", op->uuid, trigger->uuid, trigger->id, op->id); order_actions(op, trigger, pe_order_implies_left); value = g_hash_table_lookup(op->meta, "notify_confirm"); if(crm_is_true(value)) { /* notify before pseudo_notified */ crm_debug_3("Ordering %s before %s (%d->%d)", trigger->uuid, confirm->uuid, confirm->id, trigger->id); order_actions(trigger, confirm, pe_order_implies_left); } return trigger; } void pe_pre_notify(resource_t *rsc, node_t *node, action_t *op, notify_data_t *n_data, pe_working_set_t *data_set) { crm_debug_2("%s: %s", rsc->id, op->uuid); pe_notify(rsc, node, op->pre_notify, op->pre_notified, n_data, data_set); } void pe_post_notify(resource_t *rsc, node_t *node, action_t *op, notify_data_t *n_data, pe_working_set_t *data_set) { action_t *notify = NULL; CRM_CHECK(op != NULL, return); CRM_CHECK(rsc != NULL, return); crm_debug_2("%s: %s", rsc->id, op->uuid); notify = pe_notify(rsc, node, op->post_notify, op->post_notified, n_data, data_set); if(notify != NULL) { /* crm_err("Upgrading priority for %s to INFINITY", notify->uuid); */ notify->priority = INFINITY; } notify = op->post_notified; if(notify != NULL) { slist_iter( mon, action_t, rsc->actions, lpc, const char *interval = g_hash_table_lookup(mon->meta, "interval"); if(interval == NULL || safe_str_eq(interval, "0")) { crm_debug_3("Skipping %s: interval", mon->uuid); continue; } else if(safe_str_eq(mon->task, "cancel")) { crm_debug_3("Skipping %s: cancel", mon->uuid); continue; } order_actions(notify, mon, pe_order_optional); ); } } void NoRoleChange(resource_t *rsc, node_t *current, node_t *next, pe_working_set_t *data_set) { action_t *stop = NULL; action_t *start = NULL; GListPtr possible_matches = NULL; crm_debug_2("Executing: %s (role=%s)", rsc->id, role2text(rsc->next_role)); if(current == NULL && next == NULL) { crm_notice("Leave resource %s\t(%s)", rsc->id, role2text(rsc->role)); return; } else if(next == NULL) { crm_notice("Stop resource %s\t(%s %s)", rsc->id, role2text(rsc->role), current->details->uname); return; } else if(current == NULL && rsc->next_role > RSC_ROLE_SLAVE) { crm_notice("Promote %s\t(%s -> %s %s)", rsc->id, role2text(rsc->role), role2text(rsc->next_role), next->details->uname); return; } else if(current == NULL) { crm_notice("Start %s\t(%s -> %s %s)", rsc->id, role2text(rsc->role), role2text(rsc->next_role), next->details->uname); return; } if(rsc->role == rsc->next_role) { start = start_action(rsc, next, TRUE); if(start->optional) { crm_notice("Leave resource %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } else if(safe_str_eq(current->details->id, next->details->id)) { if(is_set(rsc->flags, pe_rsc_failed)) { crm_notice("Recover resource %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } else { crm_notice("Restart resource %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } } else { crm_notice("Move resource %s\t(%s %s -> %s)", rsc->id, role2text(rsc->role), current->details->uname, next->details->uname); } } else if(rsc->role < rsc->next_role) { crm_notice("Promote %s\t(%s -> %s %s)", rsc->id, role2text(rsc->role), role2text(rsc->next_role), next->details->uname); } else if(rsc->role > rsc->next_role) { crm_notice("Demote %s\t(%s -> %s %s)", rsc->id, role2text(rsc->role), role2text(rsc->next_role), next->details->uname); } if(is_set(rsc->flags, pe_rsc_failed) || safe_str_neq(current->details->id, next->details->id)) { if(rsc->next_role > RSC_ROLE_STARTED) { gboolean optional = TRUE; if(rsc->role == RSC_ROLE_MASTER) { optional = FALSE; } DemoteRsc(rsc, current, optional, data_set); } if(rsc->role == RSC_ROLE_MASTER) { DemoteRsc(rsc, current, FALSE, data_set); } StopRsc(rsc, current, FALSE, data_set); StartRsc(rsc, next, FALSE, data_set); if(rsc->next_role == RSC_ROLE_MASTER) { PromoteRsc(rsc, next, FALSE, data_set); } possible_matches = find_recurring_actions(rsc->actions, next); slist_iter(match, action_t, possible_matches, lpc, if(match->optional == FALSE) { crm_debug("Fixing recurring action: %s", match->uuid); match->optional = TRUE; } ); g_list_free(possible_matches); } else if(is_set(rsc->flags, pe_rsc_start_pending)) { start = start_action(rsc, next, TRUE); if(start->runnable) { /* wait for StartRsc() to be called */ rsc->role = RSC_ROLE_STOPPED; } else { /* wait for StopRsc() to be called */ rsc->next_role = RSC_ROLE_STOPPED; } } else { stop = stop_action(rsc, current, TRUE); start = start_action(rsc, next, TRUE); stop->optional = start->optional; if(rsc->next_role > RSC_ROLE_STARTED) { DemoteRsc(rsc, current, start->optional, data_set); } StopRsc(rsc, current, start->optional, data_set); StartRsc(rsc, current, start->optional, data_set); if(rsc->next_role == RSC_ROLE_MASTER) { PromoteRsc(rsc, next, start->optional, data_set); } if(start->runnable == FALSE) { rsc->next_role = RSC_ROLE_STOPPED; } } } gboolean StopRsc(resource_t *rsc, node_t *next, gboolean optional, pe_working_set_t *data_set) { action_t *stop = NULL; const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); crm_debug_2("Executing: %s", rsc->id); if(rsc->next_role == RSC_ROLE_STOPPED && rsc->variant == pe_native && safe_str_eq(class, "stonith")) { action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set); custom_action_order( NULL, crm_strdup(all_stopped->task), all_stopped, rsc, stop_key(rsc), NULL, pe_order_implies_left|pe_order_stonith_stop, data_set); } slist_iter( current, node_t, rsc->running_on, lpc, stop = stop_action(rsc, current, optional); if(stop->runnable && stop->optional == FALSE) { crm_notice(" %s\tStop %s",current->details->uname,rsc->id); } if(data_set->remove_after_stop) { DeleteRsc(rsc, current, optional, data_set); } ); return TRUE; } gboolean StartRsc(resource_t *rsc, node_t *next, gboolean optional, pe_working_set_t *data_set) { action_t *start = NULL; crm_debug_2("Executing: %s", rsc->id); start = start_action(rsc, next, TRUE); if(start->runnable && optional == FALSE) { crm_notice(" %s\tStart %s", next->details->uname, rsc->id); start->optional = FALSE; } return TRUE; } gboolean PromoteRsc(resource_t *rsc, node_t *next, gboolean optional, pe_working_set_t *data_set) { char *key = NULL; gboolean runnable = TRUE; GListPtr action_list = NULL; crm_debug_2("Executing: %s", rsc->id); CRM_CHECK(rsc->next_role == RSC_ROLE_MASTER, crm_err("Next role: %s", role2text(rsc->next_role)); return FALSE); CRM_CHECK(next != NULL, return FALSE); key = start_key(rsc); action_list = find_actions_exact(rsc->actions, key, next); crm_free(key); slist_iter(start, action_t, action_list, lpc, if(start->runnable == FALSE) { runnable = FALSE; } ); g_list_free(action_list); if(runnable) { promote_action(rsc, next, optional); if(optional == FALSE) { crm_debug("%s\tPromote %s", next->details->uname, rsc->id); } return TRUE; } crm_debug("%s\tPromote %s (canceled)", next->details->uname, rsc->id); key = promote_key(rsc); action_list = find_actions_exact(rsc->actions, key, next); crm_free(key); slist_iter(promote, action_t, action_list, lpc, promote->runnable = FALSE; ); g_list_free(action_list); return TRUE; } gboolean DemoteRsc(resource_t *rsc, node_t *next, gboolean optional, pe_working_set_t *data_set) { crm_debug_2("Executing: %s", rsc->id); /* CRM_CHECK(rsc->next_role == RSC_ROLE_SLAVE, return FALSE); */ slist_iter( current, node_t, rsc->running_on, lpc, do_crm_log(optional?LOG_DEBUG:LOG_NOTICE, "%s\tDemote %s", current->details->uname, rsc->id); demote_action(rsc, current, optional); ); return TRUE; } gboolean RoleError(resource_t *rsc, node_t *next, gboolean optional, pe_working_set_t *data_set) { crm_debug("Executing: %s", rsc->id); CRM_CHECK(FALSE, return FALSE); return FALSE; } gboolean NullOp(resource_t *rsc, node_t *next, gboolean optional, pe_working_set_t *data_set) { crm_debug_2("Executing: %s", rsc->id); return FALSE; } gboolean DeleteRsc(resource_t *rsc, node_t *node, gboolean optional, pe_working_set_t *data_set) { action_t *delete = NULL; #if DELETE_THEN_REFRESH action_t *refresh = NULL; #endif if(is_set(rsc->flags, pe_rsc_failed)) { crm_debug_2("Resource %s not deleted from %s: failed", rsc->id, node->details->uname); return FALSE; } else if(node == NULL) { crm_debug_2("Resource %s not deleted: NULL node", rsc->id); return FALSE; } else if(node->details->unclean || node->details->online == FALSE) { crm_debug_2("Resource %s not deleted from %s: unrunnable", rsc->id, node->details->uname); return FALSE; } crm_notice("Removing %s from %s", rsc->id, node->details->uname); delete = delete_action(rsc, node, optional); new_rsc_order(rsc, RSC_STOP, rsc, RSC_DELETE, optional?pe_order_implies_right:pe_order_implies_left, data_set); #if DELETE_THEN_REFRESH refresh = custom_action( NULL, crm_strdup(CRM_OP_LRM_REFRESH), CRM_OP_LRM_REFRESH, node, FALSE, TRUE, data_set); add_hash_param(refresh->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); order_actions(delete, refresh, pe_order_optional); #endif return TRUE; } gboolean native_create_probe(resource_t *rsc, node_t *node, action_t *complete, gboolean force, pe_working_set_t *data_set) { char *key = NULL; char *target_rc = NULL; action_t *probe = NULL; node_t *running = NULL; CRM_CHECK(node != NULL, return FALSE); if(rsc->children) { gboolean any_created = FALSE; slist_iter( child_rsc, resource_t, rsc->children, lpc, any_created = child_rsc->cmds->create_probe( child_rsc, node, complete, force, data_set) || any_created; ); return any_created; } if(is_set(rsc->flags, pe_rsc_orphan)) { crm_debug_2("Skipping orphan: %s", rsc->id); return FALSE; } running = pe_find_node_id(rsc->known_on, node->details->id); if(force == FALSE && running != NULL) { /* we already know the status of the resource on this node */ crm_debug_3("Skipping active: %s", rsc->id); return FALSE; } key = generate_op_key(rsc->id, RSC_STATUS, 0); probe = custom_action(rsc, key, RSC_STATUS, node, FALSE, TRUE, data_set); probe->optional = FALSE; running = pe_find_node_id(rsc->running_on, node->details->id); if(running == NULL) { target_rc = crm_itoa(EXECRA_NOT_RUNNING); } else if(rsc->role == RSC_ROLE_MASTER) { target_rc = crm_itoa(EXECRA_RUNNING_MASTER); } if(target_rc != NULL) { add_hash_param(probe->meta, XML_ATTR_TE_TARGET_RC, target_rc); crm_free(target_rc); } crm_debug_2("Probing %s on %s (%s)", rsc->id, node->details->uname, role2text(rsc->role)); order_actions(probe, complete, pe_order_implies_right); return TRUE; } static void native_start_constraints( resource_t *rsc, action_t *stonith_op, gboolean is_stonith, pe_working_set_t *data_set) { node_t *target = stonith_op?stonith_op->node:NULL; if(is_stonith) { char *key = start_key(rsc); action_t *ready = get_pseudo_op(STONITH_UP, data_set); crm_debug_2("Ordering %s action before stonith events", key); custom_action_order( rsc, key, NULL, NULL, crm_strdup(ready->task), ready, pe_order_implies_right, data_set); } else { action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set); slist_iter(action, action_t, rsc->actions, lpc2, if(action->needs == rsc_req_stonith) { order_actions(all_stopped, action, pe_order_implies_left); } else if(target != NULL && safe_str_eq(action->task, RSC_START) && NULL == pe_find_node_id( rsc->known_on, target->details->id)) { /* if known == NULL, then we dont know if * the resource is active on the node * we're about to shoot * * in this case, regardless of action->needs, * the only safe option is to wait until * the node is shot before doing anything * to with the resource * * its analogous to waiting for all the probes * for rscX to complete before starting rscX * * the most likely explaination is that the * DC died and took its status with it */ crm_info("Ordering %s after %s recovery", action->uuid, target->details->uname); order_actions(all_stopped, action, pe_order_implies_left|pe_order_runnable_left); } ); } } static void native_stop_constraints( resource_t *rsc, action_t *stonith_op, gboolean is_stonith, pe_working_set_t *data_set) { char *key = NULL; GListPtr action_list = NULL; node_t *node = stonith_op->node; key = stop_key(rsc); action_list = find_actions(rsc->actions, key, node); crm_free(key); /* add the stonith OP as a stop pre-req and the mark the stop * as a pseudo op - since its now redundant */ slist_iter( action, action_t, action_list, lpc2, resource_t *parent = NULL; if(node->details->online && node->details->unclean == FALSE && is_set(rsc->flags, pe_rsc_failed)) { continue; } if(is_set(rsc->flags, pe_rsc_failed)) { crm_warn("Stop of failed resource %s is" " implicit after %s is fenced", rsc->id, node->details->uname); } else { crm_info("%s is implicit after %s is fenced", action->uuid, node->details->uname); } /* the stop would never complete and is * now implied by the stonith operation */ action->pseudo = TRUE; action->runnable = TRUE; action->implied_by_stonith = TRUE; if(is_stonith == FALSE) { order_actions(stonith_op, action, pe_order_optional); } /* find the top-most resource */ parent = rsc->parent; while(parent != NULL && parent->parent != NULL) { parent = parent->parent; } if(parent) { crm_debug_2("Re-creating actions for %s", parent->id); parent->cmds->create_actions(parent, data_set); /* make sure we dont mess anything up in create_actions */ CRM_CHECK(action->pseudo, action->pseudo = TRUE); CRM_CHECK(action->runnable, action->runnable = TRUE); } /* From Bug #1601, successful fencing must be an input to a failed resources stop action. However given group(A, B) running on nodeX and B.stop has failed, A := stop healthy resource (A.stop) B := stop failed resource (pseudo operation B.stop) C := stonith nodeX A requires B, B requires C, C requires A This loop would prevent the cluster from making progress. This block creates the "C requires A" dependancy and therefore must (at least for now) be disabled. Instead, run the block above and treat all resources on nodeX as B would be (marked as a pseudo op depending on the STONITH). } else if(is_stonith == FALSE) { crm_info("Moving healthy resource %s" " off %s before fencing", rsc->id, node->details->uname); * stop healthy resources before the * stonith op * custom_action_order( rsc, stop_key(rsc), NULL, NULL,crm_strdup(CRM_OP_FENCE),stonith_op, pe_order_optional, data_set); */ ); g_list_free(action_list); key = demote_key(rsc); action_list = find_actions(rsc->actions, key, node); crm_free(key); slist_iter( action, action_t, action_list, lpc2, if(node->details->online == FALSE || is_set(rsc->flags, pe_rsc_failed)) { crm_info("Demote of failed resource %s is" " implict after %s is fenced", rsc->id, node->details->uname); /* the stop would never complete and is * now implied by the stonith operation */ action->pseudo = TRUE; action->runnable = TRUE; if(is_stonith == FALSE) { order_actions(stonith_op, action, pe_order_optional); } } ); g_list_free(action_list); } void complex_stonith_ordering( resource_t *rsc, action_t *stonith_op, pe_working_set_t *data_set) { gboolean is_stonith = FALSE; const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); if(rsc->children) { slist_iter( child_rsc, resource_t, rsc->children, lpc, child_rsc->cmds->stonith_ordering( child_rsc, stonith_op, data_set); ); return; } if(is_not_set(rsc->flags, pe_rsc_managed)) { crm_debug_3("Skipping fencing constraints for unmanaged resource: %s", rsc->id); return; } if(stonith_op != NULL && safe_str_eq(class, "stonith")) { is_stonith = TRUE; } /* Start constraints */ native_start_constraints(rsc, stonith_op, is_stonith, data_set); /* Stop constraints */ native_stop_constraints(rsc, stonith_op, is_stonith, data_set); } #define ALLOW_WEAK_MIGRATION 0 static gboolean check_stack_element(resource_t *rsc, resource_t *other_rsc, const char *type) { char *key = NULL; int level = LOG_DEBUG; GListPtr action_list = NULL; if(other_rsc == NULL || other_rsc == rsc) { return TRUE; } do_crm_log(level+1, "%s: processing %s (%s)", rsc->id, other_rsc->id, type); if(other_rsc->variant == pe_native) { do_crm_log(level, "%s: depends on %s (mid-stack) %s", rsc->id, other_rsc->id, type); return FALSE; } else if(other_rsc->variant == pe_group) { if(at_stack_bottom(other_rsc) == FALSE) { do_crm_log(level, "%s: depends on group %s (mid-stack) %s", rsc->id, other_rsc->id, type); return FALSE; } return TRUE; } /* is the clone also moving moved around? * * if so, then we can't yet be completely sure the * resource can safely migrate since the node we're * moving too may not have the clone instance started * yet * * in theory we can figure out if the clone instance we * will run on is already there, but there that would * involve too much knowledge of internal clone code. * maybe later... */ do_crm_log(level+1,"%s: start depends on clone %s", rsc->id, other_rsc->id); key = stop_key(other_rsc); action_list = find_actions(other_rsc->actions, key, NULL); crm_free(key); slist_iter( other_stop, action_t, action_list,lpc, if(other_stop && other_stop->optional == FALSE) { do_crm_log(level, "%s: start depends on %s", rsc->id, other_stop->uuid); g_list_free(action_list); return FALSE; } ); g_list_free(action_list); return TRUE; } gboolean at_stack_bottom(resource_t *rsc) { char *key = NULL; action_t *start = NULL; action_t *other = NULL; GListPtr action_list = NULL; key = start_key(rsc); action_list = find_actions(rsc->actions, key, NULL); crm_free(key); crm_debug_3("%s: processing", rsc->id); CRM_CHECK(action_list != NULL, return FALSE); start = action_list->data; g_list_free(action_list); slist_iter( constraint, rsc_colocation_t, rsc->rsc_cons, lpc, resource_t *target = constraint->rsc_rh; crm_debug_4("%s == %s (%d)", rsc->id, target->id, constraint->score); if(constraint->score > 0 && check_stack_element(rsc, target, "coloc") == FALSE) { return FALSE; } ); slist_iter( other_w, action_wrapper_t, start->actions_before, lpc, other = other_w->action; #if ALLOW_WEAK_MIGRATION if((other_w->type & pe_order_implies_right) == 0) { crm_debug_3("%s: depends on %s (optional ordering)", rsc->id, other->uuid); continue; } #endif if(other->optional == FALSE && check_stack_element(rsc, other->rsc, "order") == FALSE) { return FALSE; } ); return TRUE; } void complex_migrate_reload(resource_t *rsc, pe_working_set_t *data_set) { char *key = NULL; int level = LOG_DEBUG; GListPtr action_list = NULL; action_t *stop = NULL; action_t *start = NULL; action_t *other = NULL; action_t *action = NULL; const char *value = NULL; if(rsc->children) { slist_iter( child_rsc, resource_t, rsc->children, lpc, child_rsc->cmds->migrate_reload(child_rsc, data_set); ); other = NULL; return; } do_crm_log(level+1, "Processing %s", rsc->id); CRM_CHECK(rsc->variant == pe_native, return); if(is_not_set(rsc->flags, pe_rsc_managed) || is_set(rsc->flags, pe_rsc_failed) || is_set(rsc->flags, pe_rsc_start_pending) || rsc->next_role != RSC_ROLE_STARTED || g_list_length(rsc->running_on) != 1) { do_crm_log(level+1, "%s: general resource state", rsc->id); return; } key = start_key(rsc); action_list = find_actions(rsc->actions, key, NULL); crm_free(key); if(action_list == NULL) { do_crm_log(level, "%s: no start action", rsc->id); return; } start = action_list->data; g_list_free(action_list); value = g_hash_table_lookup(rsc->meta, XML_OP_ATTR_ALLOW_MIGRATE); if(crm_is_true(value)) { set_bit(rsc->flags, pe_rsc_can_migrate); } if(is_not_set(rsc->flags, pe_rsc_can_migrate) && start->allow_reload_conversion == FALSE) { do_crm_log(level+1, "%s: no need to continue", rsc->id); return; } key = stop_key(rsc); action_list = find_actions(rsc->actions, key, NULL); crm_free(key); if(action_list == NULL) { do_crm_log(level, "%s: no stop action", rsc->id); return; } stop = action_list->data; g_list_free(action_list); action = start; if(action->pseudo || action->optional || action->node == NULL || action->runnable == FALSE) { do_crm_log(level, "%s: %s", rsc->id, action->task); return; } action = stop; if(action->pseudo || action->optional || action->node == NULL || action->runnable == FALSE) { do_crm_log(level, "%s: %s", rsc->id, action->task); return; } if(is_set(rsc->flags, pe_rsc_can_migrate)) { if(start->node == NULL || stop->node == NULL || stop->node->details == start->node->details) { clear_bit(rsc->flags, pe_rsc_can_migrate); } else if(at_stack_bottom(rsc) == FALSE) { crm_notice("Cannot migrate %s from %s to %s" " - %s is not at the bottom of the resource stack", rsc->id, stop->node->details->uname, start->node->details->uname, rsc->id); clear_bit(rsc->flags, pe_rsc_can_migrate); } } if(is_set(rsc->flags, pe_rsc_can_migrate)) { crm_notice("Migrating %s from %s to %s", rsc->id, stop->node->details->uname, start->node->details->uname); crm_free(stop->uuid); crm_free(stop->task); stop->task = crm_strdup(RSC_MIGRATE); stop->uuid = generate_op_key(rsc->id, stop->task, 0); add_hash_param(stop->meta, "migrate_source", stop->node->details->uname); add_hash_param(stop->meta, "migrate_target", start->node->details->uname); /* Hook up to the all_stopped and shutdown actions */ slist_iter( other_w, action_wrapper_t, stop->actions_after, lpc, other = other_w->action; if(other->optional == FALSE && other->rsc == NULL) { order_actions(start, other, other_w->type); } ); slist_iter( other_w, action_wrapper_t, start->actions_before, lpc, other = other_w->action; if(other->optional == FALSE #if ALLOW_WEAK_MIGRATION && (other_w->type & pe_order_implies_right) #endif && other->rsc != NULL && other->rsc != rsc->parent && other->rsc != rsc) { do_crm_log(level, "Ordering %s before %s", other->uuid, stop->uuid); order_actions(other, stop, other_w->type); } ); crm_free(start->uuid); crm_free(start->task); start->task = crm_strdup(RSC_MIGRATED); start->uuid = generate_op_key(rsc->id, start->task, 0); add_hash_param(start->meta, "migrate_source_uuid", stop->node->details->id); add_hash_param(start->meta, "migrate_source", stop->node->details->uname); add_hash_param(start->meta, "migrate_target", start->node->details->uname); } else if(start->allow_reload_conversion && stop->node->details == start->node->details) { crm_info("Rewriting restart of %s on %s as a reload", rsc->id, start->node->details->uname); crm_free(start->uuid); crm_free(start->task); start->task = crm_strdup("reload"); start->uuid = generate_op_key(rsc->id, start->task, 0); stop->pseudo = TRUE; /* easier than trying to delete it from the graph */ } else { do_crm_log(level+1, "%s nothing to do", rsc->id); } } diff --git a/pengine/regression.sh b/pengine/regression.sh index afa3503a45..10734c26fa 100755 --- a/pengine/regression.sh +++ b/pengine/regression.sh @@ -1,288 +1,289 @@ #!/bin/bash # Copyright (C) 2004 Andrew Beekhof # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # This software is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # if [ -x /usr/bin/valgrind ]; then export G_SLICE=always-malloc VALGRIND_CMD="valgrind -q --show-reachable=yes --leak-check=full --trace-children=no --time-stamp=yes --num-callers=20 --suppressions=./ptest.supp" fi . regression.core.sh create_mode="true" echo Generating test outputs for these tests... # do_test echo Done. echo "" echo Performing the following tests... create_mode="false" echo "" do_test simple1 "Offline " do_test simple2 "Start " do_test simple3 "Start 2 " do_test simple4 "Start Failed" do_test simple6 "Stop Start " do_test simple7 "Shutdown " #do_test simple8 "Stonith " #do_test simple9 "Lower version" #do_test simple10 "Higher version" do_test simple11 "Priority (ne)" do_test simple12 "Priority (eq)" do_test simple8 "Stickiness" echo "" do_test params-0 "Params: No change" do_test params-1 "Params: Changed" do_test params-2 "Params: Resource definition" do_test params-4 "Params: Reload" do_test novell-251689 "Resource definition change + target_role=stopped" echo "" do_test orphan-0 "Orphan ignore" do_test orphan-1 "Orphan stop" echo "" do_test target-0 "Target Role : baseline" do_test target-1 "Target Role : test" echo "" do_test date-1 "Dates" -d "2005-020" do_test date-2 "Date Spec - Pass" -d "2005-020T12:30" do_test date-3 "Date Spec - Fail" -d "2005-020T11:30" do_test probe-0 "Probe (anon clone)" do_test probe-1 "Pending Probe" do_test standby "Standby" do_test comments "Comments" echo "" do_test rsc_dep1 "Must not " do_test rsc_dep3 "Must " do_test rsc_dep5 "Must not 3 " do_test rsc_dep7 "Must 3 " do_test rsc_dep10 "Must (but cant)" do_test rsc_dep2 "Must (running) " do_test rsc_dep8 "Must (running : alt) " do_test rsc_dep4 "Must (running + move)" +do_test asymmetric "Asymmetric - require explicit location constraints" echo "" do_test order1 "Order start 1 " do_test order2 "Order start 2 " do_test order3 "Order stop " do_test order4 "Order (multiple) " do_test order5 "Order (move) " do_test order6 "Order (move w/ restart) " do_test order7 "Order (manditory) " do_test order-optional "Order (score=0) " do_test order-required "Order (score=INFINITY) " echo "" do_test coloc-loop "Colocation - loop" do_test coloc-many-one "Colocation - many-to-one" do_test coloc-list "Colocation - many-to-one with list" do_test coloc-group "Colocation - groups" do_test coloc-slave-anti "Anti-colocation with slave shouldn't prevent master colocation" #echo "" #do_test agent1 "version: lt (empty)" #do_test agent2 "version: eq " #do_test agent3 "version: gt " echo "" do_test attrs1 "string: eq (and) " do_test attrs2 "string: lt / gt (and)" do_test attrs3 "string: ne (or) " do_test attrs4 "string: exists " do_test attrs5 "string: not_exists " do_test attrs6 "is_dc: true " do_test attrs7 "is_dc: false " do_test attrs8 "score_attribute " echo "" do_test mon-rsc-1 "Schedule Monitor - start" do_test mon-rsc-2 "Schedule Monitor - move " do_test mon-rsc-3 "Schedule Monitor - pending start " do_test mon-rsc-4 "Schedule Monitor - move/pending start" echo "" do_test rec-rsc-0 "Resource Recover - no start " do_test rec-rsc-1 "Resource Recover - start " do_test rec-rsc-2 "Resource Recover - monitor " do_test rec-rsc-3 "Resource Recover - stop - ignore" do_test rec-rsc-4 "Resource Recover - stop - block " do_test rec-rsc-5 "Resource Recover - stop - fence " do_test rec-rsc-6 "Resource Recover - multiple - restart" do_test rec-rsc-7 "Resource Recover - multiple - stop " do_test rec-rsc-8 "Resource Recover - multiple - block " do_test rec-rsc-9 "Resource Recover - group/group" echo "" do_test quorum-1 "No quorum - ignore" do_test quorum-2 "No quorum - freeze" do_test quorum-3 "No quorum - stop " do_test quorum-4 "No quorum - start anyway" do_test quorum-5 "No quorum - start anyway (group)" do_test quorum-6 "No quorum - start anyway (clone)" echo "" do_test rec-node-1 "Node Recover - Startup - no fence" do_test rec-node-2 "Node Recover - Startup - fence " do_test rec-node-3 "Node Recover - HA down - no fence" do_test rec-node-4 "Node Recover - HA down - fence " do_test rec-node-5 "Node Recover - CRM down - no fence" do_test rec-node-6 "Node Recover - CRM down - fence " do_test rec-node-7 "Node Recover - no quorum - ignore " do_test rec-node-8 "Node Recover - no quorum - freeze " do_test rec-node-9 "Node Recover - no quorum - stop " do_test rec-node-10 "Node Recover - no quorum - stop w/fence" do_test rec-node-11 "Node Recover - CRM down w/ group - fence " do_test rec-node-12 "Node Recover - nothing active - fence " do_test rec-node-13 "Node Recover - failed resource + shutdown - fence " do_test rec-node-15 "Node Recover - unknown lrm section" do_test rec-node-14 "Serialize all stonith's" echo "" do_test multi1 "Multiple Active (stop/start)" echo "" do_test migrate-1 "Migrate (migrate)" do_test migrate-2 "Migrate (stable)" do_test migrate-3 "Migrate (failed migrate_to)" do_test migrate-4 "Migrate (failed migrate_from)" do_test novell-252693 "Migration in a stopping stack" do_test novell-252693-2 "Migration in a starting stack" do_test novell-252693-3 "Non-Migration in a starting and stopping stack" do_test bug-1820 "Migration in a group" do_test bug-1820-1 "Non-migration in a group" #echo "" #do_test complex1 "Complex " echo "" do_test group1 "Group " do_test group2 "Group + Native " do_test group3 "Group + Group " do_test group4 "Group + Native (nothing)" do_test group5 "Group + Native (move) " do_test group6 "Group + Group (move) " do_test group7 "Group colocation" do_test group13 "Group colocation (cant run)" do_test group8 "Group anti-colocation" do_test group9 "Group recovery" do_test group10 "Group partial recovery" do_test group11 "Group target_role" do_test group14 "Group stop (graph terminated)" do_test group15 "-ve group colocation" do_test bug-1573 "Partial stop of a group with two children" do_test bug-1718 "Mandatory group ordering - Stop group_FUN" echo "" do_test clone-anon-probe-1 "Probe the correct (anonymous) clone instance for each node" do_test clone-anon-probe-2 "Avoid needless re-probing of anonymous clones" do_test inc0 "Incarnation start" do_test inc1 "Incarnation start order" do_test inc2 "Incarnation silent restart, stop, move" do_test inc3 "Inter-incarnation ordering, silent restart, stop, move" do_test inc4 "Inter-incarnation ordering, silent restart, stop, move (ordered)" do_test inc5 "Inter-incarnation ordering, silent restart, stop, move (restart 1)" do_test inc6 "Inter-incarnation ordering, silent restart, stop, move (restart 2)" do_test inc7 "Clone colocation" do_test inc8 "Clone anti-colocation" do_test inc9 "Non-unique clone" do_test inc10 "Non-unique clone (stop)" do_test inc11 "Primitive colocation with clones" do_test inc12 "Clone shutdown" do_test cloned-group "Make sure only the correct number of cloned groups are started" do_test clone-no-shuffle "Dont prioritize allocation of instances that must be moved" echo "" do_test master-0 "Stopped -> Slave" do_test master-1 "Stopped -> Promote" do_test master-2 "Stopped -> Promote : notify" do_test master-3 "Stopped -> Promote : master location" do_test master-4 "Started -> Promote : master location" do_test master-5 "Promoted -> Promoted" do_test master-6 "Promoted -> Promoted (2)" do_test master-7 "Promoted -> Fenced" do_test master-8 "Promoted -> Fenced -> Moved" do_test master-9 "Stopped + Promotable + No quorum" do_test master-10 "Stopped -> Promotable : notify with monitor" do_test master-11 "Stopped -> Promote : colocation" do_test novell-239082 "Demote/Promote ordering" do_test novell-239087 "Stable master placement" do_test master-12 "Promotion based solely on rsc_location constraints" do_test master-13 "Include preferences of colocated resources when placing master" do_test master-demote "Ordering when actions depends on demoting a slave resource" do_test master-ordering "Prevent resources from starting that need a master" do_test bug-1765 "Master-Master Colocation (dont stop the slaves)" do_test master-group "Promotion of cloned groups" do_test bug-lf-1852 "Don't shuffle master/slave instances unnecessarily" do_test master-failed-demote "Dont retry failed demote actions" do_test master-failed-demote-2 "Dont retry failed demote actions (notify=false)" echo "" do_test managed-0 "Managed (reference)" do_test managed-1 "Not managed - down " do_test managed-2 "Not managed - up " echo "" do_test interleave-0 "Interleave (reference)" do_test interleave-1 "coloc - not interleaved" do_test interleave-2 "coloc - interleaved " do_test interleave-3 "coloc - interleaved (2)" do_test interleave-pseudo-stop "Interleaved clone during stonith" do_test interleave-stop "Interleaved clone during stop" do_test interleave-restart "Interleaved clone during dependancy restart" echo "" do_test notify-0 "Notify reference" do_test notify-1 "Notify simple" do_test notify-2 "Notify simple, confirm" do_test notify-3 "Notify move, confirm" do_test novell-239079 "Notification priority" #do_test notify-2 "Notify - 764" echo "" do_test 594 "OSDL #594" do_test 662 "OSDL #662" do_test 696 "OSDL #696" do_test 726 "OSDL #726" do_test 735 "OSDL #735" do_test 764 "OSDL #764" do_test 797 "OSDL #797" do_test 829 "OSDL #829" do_test 994 "OSDL #994" do_test 994-2 "OSDL #994 - with a dependant resource" do_test 1360 "OSDL #1360 - Clone stickiness" do_test 1484 "OSDL #1484 - on_fail=stop" do_test 1494 "OSDL #1494 - Clone stability" do_test unrunnable-1 "Unrunnable" do_test stonith-0 "Stonith loop - 1" do_test stonith-1 "Stonith loop - 2" do_test stonith-2 "Stonith loop - 3" do_test bug-1572-1 "Recovery of groups depending on master/slave" do_test bug-1572-2 "Recovery of groups depending on master/slave when the master is never re-promoted" do_test bug-1685 "Depends-on-master ordering" do_test bug-1822 "Dont promote partially active groups" do_test bug-pm-11 "New resource added to a m/s group" do_test bug-pm-12 "Recover only the failed portion of a cloned group" do_test bug-n-387749 "Don't shuffle clone instances" do_test bug-n-385265 "Don't ignore the failure stickiness of group children - resource_idvscommon should stay stopped" echo "" test_results diff --git a/pengine/testcases/asymmetric.dot b/pengine/testcases/asymmetric.dot new file mode 100644 index 0000000000..8abf57a851 --- /dev/null +++ b/pengine/testcases/asymmetric.dot @@ -0,0 +1,13 @@ +digraph "g" { +"drbd_target_poolA_monitor_0 puma1" -> "probe_complete puma1" [ style = bold] +"drbd_target_poolA_monitor_0 puma1" [ style=bold color="green" fontcolor="black" ] +"drbd_target_poolA_monitor_0 puma3" -> "probe_complete puma3" [ style = bold] +"drbd_target_poolA_monitor_0 puma3" [ style=bold color="green" fontcolor="black" ] +"ebe3fb6e-7778-426e-be58-190ab1ff3dd3:0_monitor_20000 puma3" [ style=bold color="green" fontcolor="black" ] +"ebe3fb6e-7778-426e-be58-190ab1ff3dd3:1_monitor_19000 puma1" [ style=bold color="green" fontcolor="black" ] +"probe_complete puma1" -> "probe_complete" [ style = bold] +"probe_complete puma1" [ style=bold color="green" fontcolor="black" ] +"probe_complete puma3" -> "probe_complete" [ style = bold] +"probe_complete puma3" [ style=bold color="green" fontcolor="black" ] +"probe_complete" [ style=bold color="green" fontcolor="orange" ] +} diff --git a/pengine/testcases/asymmetric.exp b/pengine/testcases/asymmetric.exp new file mode 100644 index 0000000000..876447ac8e --- /dev/null +++ b/pengine/testcases/asymmetric.exp @@ -0,0 +1,78 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/pengine/testcases/asymmetric.xml b/pengine/testcases/asymmetric.xml new file mode 100644 index 0000000000..caa12f9470 --- /dev/null +++ b/pengine/testcases/asymmetric.xml @@ -0,0 +1,174 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/pengine/utils.c b/pengine/utils.c index c2ad4bb42f..b2b75b14b3 100644 --- a/pengine/utils.c +++ b/pengine/utils.c @@ -1,545 +1,545 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include void print_rsc_to_node(const char *pre_text, rsc_to_node_t *cons, gboolean details) { if(cons == NULL) { crm_debug_4("%s%s: ", pre_text==NULL?"":pre_text, pre_text==NULL?"":": "); return; } crm_debug_4("%s%s%s Constraint %s (%p) - %d nodes:", pre_text==NULL?"":pre_text, pre_text==NULL?"":": ", "rsc_to_node", cons->id, cons, g_list_length(cons->node_list_rh)); if(details == FALSE) { crm_debug_4("\t%s (node placement rule)", safe_val3(NULL, cons, rsc_lh, id)); slist_iter( node, node_t, cons->node_list_rh, lpc, print_node("\t\t-->", node, FALSE) ); } } void print_rsc_colocation(const char *pre_text, rsc_colocation_t *cons, gboolean details) { if(cons == NULL) { crm_debug_4("%s%s: ", pre_text==NULL?"":pre_text, pre_text==NULL?"":": "); return; } crm_debug_4("%s%s%s Constraint %s (%p):", pre_text==NULL?"":pre_text, pre_text==NULL?"":": ", XML_CONS_TAG_RSC_DEPEND, cons->id, cons); if(details == FALSE) { crm_debug_4("\t%s --> %s, %d", safe_val3(NULL, cons, rsc_lh, id), safe_val3(NULL, cons, rsc_rh, id), cons->score); } } void pe_free_ordering(GListPtr constraints) { GListPtr iterator = constraints; while(iterator != NULL) { order_constraint_t *order = iterator->data; iterator = iterator->next; crm_free(order->lh_action_task); crm_free(order->rh_action_task); crm_free(order); } if(constraints != NULL) { g_list_free(constraints); } } void pe_free_rsc_to_node(GListPtr constraints) { GListPtr iterator = constraints; while(iterator != NULL) { rsc_to_node_t *cons = iterator->data; iterator = iterator->next; pe_free_shallow(cons->node_list_rh); crm_free(cons); } if(constraints != NULL) { g_list_free(constraints); } } rsc_to_node_t * rsc2node_new(const char *id, resource_t *rsc, int node_weight, node_t *foo_node, pe_working_set_t *data_set) { rsc_to_node_t *new_con = NULL; if(rsc == NULL || id == NULL) { pe_err("Invalid constraint %s for rsc=%p", crm_str(id), rsc); return NULL; } else if(foo_node == NULL) { CRM_CHECK(node_weight == 0, return NULL); } crm_malloc0(new_con, sizeof(rsc_to_node_t)); if(new_con != NULL) { new_con->id = id; new_con->rsc_lh = rsc; new_con->node_list_rh = NULL; new_con->role_filter = RSC_ROLE_UNKNOWN; if(foo_node != NULL) { node_t *copy = node_copy(foo_node); copy->weight = node_weight; new_con->node_list_rh = g_list_append(NULL, copy); } data_set->placement_constraints = g_list_append( data_set->placement_constraints, new_con); rsc->rsc_location = g_list_append(rsc->rsc_location, new_con); } return new_con; } const char * ordering_type2text(enum pe_ordering type) { const char *result = ""; if(type & pe_order_implies_left) { /* was: mandatory */ result = "right_implies_left"; } else if(type & pe_order_implies_right) { /* was: recover */ result = "left_implies_right"; } else if(type & pe_order_optional) { /* pure ordering, nothing implied */ result = "optional"; } else if(type & pe_order_runnable_left) { result = "runnable"; } else { crm_err("Unknown ordering type: %.3x", type); } return result; } gboolean can_run_resources(const node_t *node) { if(node == NULL) { return FALSE; } if(node->details->online == FALSE || node->details->shutdown || node->details->unclean || node->details->standby) { crm_debug_2("%s: online=%d, unclean=%d, standby=%d", node->details->uname, node->details->online, node->details->unclean, node->details->standby); return FALSE; } return TRUE; } /* return -1 if 'a' is more preferred * return 1 if 'b' is more preferred */ gint sort_node_weight(gconstpointer a, gconstpointer b) { int level = LOG_DEBUG_3; const node_t *node1 = (const node_t*)a; const node_t *node2 = (const node_t*)b; int node1_weight = 0; int node2_weight = 0; if(a == NULL) { return 1; } if(b == NULL) { return -1; } node1_weight = node1->weight; node2_weight = node2->weight; if(can_run_resources(node1) == FALSE) { node1_weight = -INFINITY; } if(can_run_resources(node2) == FALSE) { node2_weight = -INFINITY; } if(node1_weight > node2_weight) { do_crm_log(level, "%s (%d) > %s (%d) : weight", node1->details->uname, node1_weight, node2->details->uname, node2_weight); return -1; } if(node1_weight < node2_weight) { do_crm_log(level, "%s (%d) < %s (%d) : weight", node1->details->uname, node1_weight, node2->details->uname, node2_weight); return 1; } do_crm_log(level, "%s (%d) == %s (%d) : weight", node1->details->uname, node1_weight, node2->details->uname, node2_weight); /* now try to balance resources across the cluster */ if(node1->details->num_resources < node2->details->num_resources) { do_crm_log(level, "%s (%d) < %s (%d) : resources", node1->details->uname, node1->details->num_resources, node2->details->uname, node2->details->num_resources); return -1; } else if(node1->details->num_resources > node2->details->num_resources) { do_crm_log(level, "%s (%d) > %s (%d) : resources", node1->details->uname, node1->details->num_resources, node2->details->uname, node2->details->num_resources); return 1; } do_crm_log(level, "%s = %s", node1->details->uname, node2->details->uname); return 0; } gboolean native_assign_node(resource_t *rsc, GListPtr nodes, node_t *chosen) { int multiple = 0; CRM_ASSERT(rsc->variant == pe_native); clear_bit(rsc->flags, pe_rsc_provisional); slist_iter(candidate, node_t, nodes, lpc, if(chosen->weight > 0 && candidate->details->unclean == FALSE && candidate->weight == chosen->weight) { multiple++; } ); if(chosen == NULL) { crm_debug("Could not allocate a node for %s", rsc->id); rsc->next_role = RSC_ROLE_STOPPED; return FALSE; } else if(can_run_resources(chosen) == FALSE || chosen->weight < 0) { crm_debug("All nodes for resource %s are unavailable" ", unclean or shutting down", rsc->id); rsc->next_role = RSC_ROLE_STOPPED; return FALSE; } if(rsc->next_role == RSC_ROLE_UNKNOWN) { rsc->next_role = RSC_ROLE_STARTED; } if(multiple > 1) { int log_level = LOG_INFO; char *score = score2char(chosen->weight); if(chosen->weight >= INFINITY) { log_level = LOG_WARNING; } do_crm_log(log_level, "%d nodes with equal score (%s) for" " running %s resources (chose %s):", multiple, score, rsc->id, chosen->details->uname); crm_free(score); } /* todo: update the old node for each resource to reflect its * new resource count */ if(rsc->allocated_to) { node_t *old = rsc->allocated_to; old->details->allocated_rsc = g_list_remove( old->details->allocated_rsc, rsc); old->details->num_resources--; old->count--; } crm_debug("Assigning %s to %s", chosen->details->uname, rsc->id); crm_free(rsc->allocated_to); rsc->allocated_to = node_copy(chosen); chosen->details->allocated_rsc = g_list_append(chosen->details->allocated_rsc, rsc); chosen->details->num_resources++; chosen->count++; return TRUE; } void convert_non_atomic_task(resource_t *rsc, order_constraint_t *order, gboolean with_notify) { int interval = 0; char *rid = NULL; char *raw_task = NULL; int task = no_action; char *old_uuid = order->lh_action_task; crm_debug_3("Processing %s", old_uuid); if(order->lh_action_task == NULL || strstr(order->lh_action_task, "notify") != NULL) { /* no conversion */ return; } CRM_ASSERT(parse_op_key(old_uuid, &rid, &raw_task, &interval)); task = text2task(raw_task); switch(task) { case stop_rsc: case start_rsc: case action_notify: case action_promote: case action_demote: break; case stopped_rsc: case started_rsc: case action_notified: case action_promoted: case action_demoted: task--; break; case monitor_rsc: case shutdown_crm: case stonith_node: task = no_action; break; default: crm_err("Unknown action: %s", raw_task); task = no_action; break; } if(task != no_action) { if(with_notify && is_set(rsc->flags, pe_rsc_notify)) { order->lh_action_task = generate_notify_key( rsc->id, "confirmed-post", task2text(task)); } else { order->lh_action_task = generate_op_key( rsc->id, task2text(task+1), 0); } crm_debug_2("Converted %s -> %s", old_uuid, order->lh_action_task); crm_free(old_uuid); } crm_free(raw_task); crm_free(rid); } void order_actions( action_t *lh_action, action_t *rh_action, enum pe_ordering order) { action_wrapper_t *wrapper = NULL; GListPtr list = NULL; - crm_debug_2("Ordering Action %s before %s", + crm_debug_3("Ordering Action %s before %s", lh_action->uuid, rh_action->uuid); log_action(LOG_DEBUG_4, "LH (order_actions)", lh_action, FALSE); log_action(LOG_DEBUG_4, "RH (order_actions)", rh_action, FALSE); crm_malloc0(wrapper, sizeof(action_wrapper_t)); wrapper->action = rh_action; wrapper->type = order; list = lh_action->actions_after; list = g_list_append(list, wrapper); lh_action->actions_after = list; wrapper = NULL; /* order |= pe_order_implies_right; */ /* order ^= pe_order_implies_right; */ crm_malloc0(wrapper, sizeof(action_wrapper_t)); wrapper->action = lh_action; wrapper->type = order; list = rh_action->actions_before; list = g_list_append(list, wrapper); rh_action->actions_before = list; } void log_action(unsigned int log_level, const char *pre_text, action_t *action, gboolean details) { const char *node_uname = NULL; const char *node_uuid = NULL; if(action == NULL) { do_crm_log(log_level, "%s%s: ", pre_text==NULL?"":pre_text, pre_text==NULL?"":": "); return; } if(action->pseudo) { node_uname = NULL; node_uuid = NULL; } else if(action->node != NULL) { node_uname = action->node->details->uname; node_uuid = action->node->details->id; } else { node_uname = ""; node_uuid = NULL; } switch(text2task(action->task)) { case stonith_node: case shutdown_crm: do_crm_log(log_level, "%s%s%sAction %d: %s%s%s%s%s%s", pre_text==NULL?"":pre_text, pre_text==NULL?"":": ", action->pseudo?"Pseduo ":action->optional?"Optional ":action->runnable?action->processed?"":"(Provisional) ":"!!Non-Startable!! ", action->id, action->uuid, node_uname?"\ton ":"", node_uname?node_uname:"", node_uuid?"\t\t(":"", node_uuid?node_uuid:"", node_uuid?")":""); break; default: do_crm_log(log_level, "%s%s%sAction %d: %s %s%s%s%s%s%s", pre_text==NULL?"":pre_text, pre_text==NULL?"":": ", action->optional?"Optional ":action->pseudo?"Pseduo ":action->runnable?action->processed?"":"(Provisional) ":"!!Non-Startable!! ", action->id, action->uuid, safe_val3("", action, rsc, id), node_uname?"\ton ":"", node_uname?node_uname:"", node_uuid?"\t\t(":"", node_uuid?node_uuid:"", node_uuid?")":""); break; } if(details) { do_crm_log(log_level+1, "\t\t====== Preceeding Actions"); slist_iter( other, action_wrapper_t, action->actions_before, lpc, log_action(log_level+1, "\t\t", other->action, FALSE); ); do_crm_log(log_level+1, "\t\t====== Subsequent Actions"); slist_iter( other, action_wrapper_t, action->actions_after, lpc, log_action(log_level+1, "\t\t", other->action, FALSE); ); do_crm_log(log_level+1, "\t\t====== End"); } else { do_crm_log(log_level, "\t\t(seen=%d, before=%d, after=%d)", action->seen_count, g_list_length(action->actions_before), g_list_length(action->actions_after)); } } action_t *get_pseudo_op(const char *name, pe_working_set_t *data_set) { action_t *op = NULL; const char *op_s = name; GListPtr possible_matches = NULL; possible_matches = find_actions(data_set->actions, name, NULL); if(possible_matches != NULL) { if(g_list_length(possible_matches) > 1) { pe_warn("Action %s exists %d times", name, g_list_length(possible_matches)); } op = g_list_nth_data(possible_matches, 0); g_list_free(possible_matches); } else { op = custom_action(NULL, crm_strdup(op_s), op_s, NULL, TRUE, TRUE, data_set); op->pseudo = TRUE; op->runnable = TRUE; } return op; } gboolean can_run_any(GListPtr nodes) { if(nodes == NULL) { return FALSE; } slist_iter( node, node_t, nodes, lpc, if(can_run_resources(node) && node->weight >= 0) { return TRUE; } ); return FALSE; }