diff --git a/Makefile.am b/Makefile.am index 525caee845..ed789763fb 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,111 +1,111 @@ # # linux-ha: Linux-HA code # # Copyright (C) 2002 Alan Robertson # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # EXTRA_DIST = bootstrap ConfigureMe README.in libltdl.tar RPM = @RPM@ RPMFLAGS = -ba TARFILE = pacemaker.tar.gz AM_TAR = tar LAST_RELEASE = Pacemaker-0.6.4 AUTOMAKE_OPTIONS = foreign ##ACLOCAL = aclocal -I $(auxdir) MAINTAINERCLEANFILES = Makefile.in aclocal.m4 configure DRF/config-h.in \ DRF/stamp-h.in libtool.m4 ltdl.m4 libltdl.tar -SUBDIRS = debian build replace include lib cib crmd pengine transitioner crm tools doc cts +SUBDIRS = debian build config replace include lib cib crmd pengine transitioner crm fencing tools doc cts tgz: rm -f $(TARFILE) hg archive -t tgz $(TARFILE) echo Rebuilt $(TARFILE) on `date` changes: printf "$(PACKAGE) ($(VERSION)-1) stable; urgency=medium\n" printf " * Update source tarball to revision: `hg id`\n" printf " * Statistics:\n" printf " Changesets: `hg log -M --template "{desc|firstline|strip}\n" -r $(LAST_RELEASE):tip | wc -l`\n" printf " Diff: " hg diff -r $(LAST_RELEASE):tip | diffstat | tail -n 1 printf "\n * Testing Notes:\n" printf "\n + Test hardware:\n" printf "\n + All testing was performed with STONITH enabled\n" printf "\n + Pending bugs encountered during testing:\n" printf "\n * Changes since $(LAST_RELEASE)\n" hg log -M --template " + {desc|firstline|strip}\n" -r $(LAST_RELEASE):tip | grep -v Low: | sort -uf printf "\n -- Andrew Beekhof `date +"%a, %d %b %Y %T %z"`\n" obs: tgz make changes > .changes scp .changes $(TARFILE) vmhost.beekhof.net:Development/obs/server:ha-clustering/pacemaker/ home: tgz make changes > .changes scp .changes $(TARFILE) vmhost.beekhof.net:Development/obs/home:beekhof/pacemaker-test/ global: gtags htags -sanhIT global-www: global rsync -avzxlSD --progress HTML/ root@clusterlabs.org:/var/lib/global/pacemaker rpmtgz: tgz echo "Installing $(TARFILE) into /usr/src/packages/SOURCES for rpm" -test -d /usr/src/packages/SOURCES && cp $(TARFILE) /usr/src/packages/SOURCES/ -test -d /usr/src/redhat/SOURCES && cp $(TARFILE) /usr/src/redhat/SOURCES/ rpm: rpmtgz $(RPM) $(RPMFLAGS) $(top_srcdir)/pacemaker.spec * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "plugin.h" #include "utils.h" #include #define OPENAIS_EXTERNAL_SERVICE insane_ais_header_hack_in__totem_h #include #include #include #include -#include - +#ifndef AIS_WHITETANK +# include +#endif #include #include #include #include #include #include #include int plugin_log_level = LOG_DEBUG; char *local_uname = NULL; int local_uname_len = 0; unsigned int local_nodeid = 0; char *ipc_channel_name = NULL; unsigned long long membership_seq = 0; pthread_t crm_wait_thread; gboolean wait_active = TRUE; GHashTable *membership_list = NULL; #define MAX_RESPAWN 100 #define crm_flag_none 0x00000000 #define crm_flag_members 0x00000001 struct crm_identify_msg_s { mar_req_header_t header __attribute__((aligned(8))); uint32_t id; uint32_t pid; int32_t votes; uint32_t processes; char uname[256]; char version[256]; } __attribute__((packed)); static crm_child_t crm_children[] = { - { 0, crm_proc_none, crm_flag_none, 0, FALSE, "none", 0, NULL, NULL }, - { 0, crm_proc_ais, crm_flag_none, 0, FALSE, "ais", 0, NULL, NULL }, - { 0, crm_proc_lrmd, crm_flag_none, 0, TRUE, "lrmd", 0, HA_LIBHBDIR"/lrmd", NULL }, - { 0, crm_proc_cib, crm_flag_members, 0, TRUE, "cib", HA_CCMUID, HA_LIBHBDIR"/cib", NULL }, - { 0, crm_proc_crmd, crm_flag_members, 0, TRUE, "crmd", HA_CCMUID, HA_LIBHBDIR"/crmd", NULL }, - { 0, crm_proc_attrd,crm_flag_none, 0, TRUE, "attrd", HA_CCMUID, HA_LIBHBDIR"/attrd", NULL }, + { 0, crm_proc_none, crm_flag_none, 0, 0, FALSE, "none", 0, NULL, NULL }, + { 0, crm_proc_ais, crm_flag_none, 0, 0, FALSE, "ais", 0, NULL, NULL }, + { 0, crm_proc_lrmd, crm_flag_none, 2, 0, TRUE, "lrmd", 0, HA_LIBHBDIR"/lrmd", NULL }, + { 0, crm_proc_cib, crm_flag_members, 1, 0, TRUE, "cib", HA_CCMUID, HA_LIBHBDIR"/cib", NULL }, + { 0, crm_proc_crmd, crm_flag_members, 5, 0, TRUE, "crmd", HA_CCMUID, HA_LIBHBDIR"/crmd", NULL }, + { 0, crm_proc_attrd, crm_flag_none, 4, 0, TRUE, "attrd", HA_CCMUID, HA_LIBHBDIR"/attrd", NULL }, + { 0, crm_proc_stonithd, crm_flag_none, 3, 0, TRUE, "stonithd", 0, HA_LIBHBDIR"/stonithd", NULL }, }; void send_cluster_id(void); int send_cluster_msg_raw(AIS_Message *ais_msg); char *ais_generate_membership_data(void); extern totempg_groups_handle openais_group_handle; void global_confchg_fn ( enum totem_configuration_type configuration_type, unsigned int *member_list, int member_list_entries, unsigned int *left_list, int left_list_entries, unsigned int *joined_list, int joined_list_entries, struct memb_ring_id *ring_id); int crm_exec_exit_fn (struct objdb_iface_ver0 *objdb); int crm_exec_init_fn (struct objdb_iface_ver0 *objdb); int crm_config_init_fn(struct objdb_iface_ver0 *objdb); int ais_ipc_client_connect_callback (void *conn); int ais_ipc_client_exit_callback (void *conn); void ais_cluster_message_swab(void *msg); void ais_cluster_message_callback(void *message, unsigned int nodeid); void ais_ipc_message_callback(void *conn, void *msg); void ais_quorum_query(void *conn, void *msg); void ais_node_list_query(void *conn, void *msg); void ais_manage_notification(void *conn, void *msg); void ais_cluster_id_swab(void *msg); void ais_cluster_id_callback(void *message, unsigned int nodeid); static struct openais_lib_handler crm_lib_service[] = { { /* 0 */ .lib_handler_fn = ais_ipc_message_callback, .response_size = sizeof (mar_res_header_t), .response_id = CRM_MESSAGE_IPC_ACK, .flow_control = OPENAIS_FLOW_CONTROL_NOT_REQUIRED }, { /* 1 */ .lib_handler_fn = ais_node_list_query, .response_size = sizeof (mar_res_header_t), .response_id = CRM_MESSAGE_IPC_ACK, .flow_control = OPENAIS_FLOW_CONTROL_NOT_REQUIRED }, { /* 2 */ .lib_handler_fn = ais_manage_notification, .response_size = sizeof (mar_res_header_t), .response_id = CRM_MESSAGE_IPC_ACK, .flow_control = OPENAIS_FLOW_CONTROL_NOT_REQUIRED }, }; static struct openais_exec_handler crm_exec_service[] = { { /* 0 */ .exec_handler_fn = ais_cluster_message_callback, .exec_endian_convert_fn = ais_cluster_message_swab }, { /* 1 */ .exec_handler_fn = ais_cluster_id_callback, .exec_endian_convert_fn = ais_cluster_id_swab } }; static void crm_exec_dump_fn(void) { ENTER(""); ais_err("Called after SIG_USR2"); LEAVE(""); } /* * Exports the interface for the service */ struct openais_service_handler crm_service_handler = { .name = "LHA Cluster Manager", .id = CRM_SERVICE, .private_data_size = 0, .flow_control = OPENAIS_FLOW_CONTROL_NOT_REQUIRED, .lib_init_fn = ais_ipc_client_connect_callback, .lib_exit_fn = ais_ipc_client_exit_callback, .lib_service = crm_lib_service, .lib_service_count = sizeof (crm_lib_service) / sizeof (struct openais_lib_handler), .exec_init_fn = crm_exec_init_fn, .exec_exit_fn = crm_exec_exit_fn, .exec_service = crm_exec_service, .exec_service_count = sizeof (crm_exec_service) / sizeof (struct openais_exec_handler), .config_init_fn = crm_config_init_fn, .confchg_fn = global_confchg_fn, .exec_dump_fn = crm_exec_dump_fn, /* void (*sync_init) (void); */ /* int (*sync_process) (void); */ /* void (*sync_activate) (void); */ /* void (*sync_abort) (void); */ }; /* * Dynamic Loader definition */ struct openais_service_handler *crm_get_handler_ver0 (void); static struct openais_service_handler_iface_ver0 crm_service_handler_iface = { .openais_get_service_handler_ver0 = crm_get_handler_ver0 }; static struct lcr_iface openais_crm_ver0[1] = { { .name = "lha_crm", .version = 0, .versions_replace = 0, .versions_replace_count = 0, .dependencies = 0, .dependency_count = 0, .constructor = NULL, .destructor = NULL, .interfaces = NULL } }; static struct lcr_comp crm_comp_ver0 = { .iface_count = 1, .ifaces = openais_crm_ver0 }; struct openais_service_handler *crm_get_handler_ver0 (void) { return (&crm_service_handler); } __attribute__ ((constructor)) static void register_this_component (void) { lcr_interfaces_set (&openais_crm_ver0[0], &crm_service_handler_iface); lcr_component_register (&crm_comp_ver0); } /* IMPL */ int crm_config_init_fn(struct objdb_iface_ver0 *objdb) { int rc = 0; struct utsname us; char *value = NULL; - unsigned int object_service_handle; + unsigned int object_service_handle = 0; +#ifdef AIS_WHITETANK + log_init ("crm"); +#endif + membership_list = g_hash_table_new_full( g_direct_hash, g_direct_equal, NULL, destroy_ais_node); setenv("HA_COMPRESSION", "bz2", 1); setenv("HA_cluster_type", "openais", 1); +#if 0 objdb->object_find_reset (OBJECT_PARENT_HANDLE); if (objdb->object_find ( OBJECT_PARENT_HANDLE, "pacemaker", strlen ("pacemaker"), &object_service_handle) != 0) { object_service_handle = 0; ais_info("No configuration suplpied for pacemaker"); } +#endif objdb_get_string( objdb, object_service_handle, "logfacility", &value, "daemon"); setenv("HA_logfacility", value, 1); objdb_get_string(objdb, object_service_handle, "initdead", &value, "20"); setenv("HA_initdead", value, 1); objdb_get_string(objdb, object_service_handle, "debug", &value, "1"); setenv("HA_debug", value, 1); rc = atoi(value); plugin_log_level = LOG_INFO+rc; if(system("echo 1 > /proc/sys/kernel/core_uses_pid") != 0) { ais_perror("Could not enable /proc/sys/kernel/core_uses_pid"); } ais_info("CRM: Initialized"); log_printf(LOG_INFO, "Logging: Initialized %s\n", __PRETTY_FUNCTION__); rc = uname(&us); AIS_ASSERT(rc == 0); local_uname = ais_strdup(us.nodename); local_uname_len = strlen(local_uname); ais_info("Local hostname: %s", local_uname); local_nodeid = totempg_my_nodeid_get(); update_member(local_nodeid, 0, 1, 0, local_uname, CRM_NODE_LOST); LEAVE(""); return 0; } static void *crm_wait_dispatch (void *arg) { struct timespec waitsleep = { .tv_sec = 0, .tv_nsec = 100000 /* 100 msec */ }; while(wait_active) { int lpc = 0; for (; lpc < SIZEOF(crm_children); lpc++) { if(crm_children[lpc].pid > 0) { int status; pid_t pid = wait4( crm_children[lpc].pid, &status, WNOHANG, NULL); if(pid == 0) { continue; } else if(pid < 0) { ais_perror("crm_wait_dispatch: Call to wait4(%s) failed", crm_children[lpc].name); continue; } /* cleanup */ crm_children[lpc].pid = 0; crm_children[lpc].conn = NULL; crm_children[lpc].async_conn = NULL; if(WIFSIGNALED(status)) { int sig = WTERMSIG(status); ais_warn("Child process %s terminated with signal %d" " (pid=%d, core=%s)", crm_children[lpc].name, sig, pid, WCOREDUMP(status)?"true":"false"); } else if (WIFEXITED(status)) { int rc = WEXITSTATUS(status); ais_notice("Child process %s exited (pid=%d, rc=%d)", crm_children[lpc].name, pid, rc); if(rc == 100) { ais_notice("Child process %s no longer wishes" " to be respawned", crm_children[lpc].name); crm_children[lpc].respawn = FALSE; } } crm_children[lpc].respawn_count += 1; if(crm_children[lpc].respawn_count > MAX_RESPAWN) { ais_notice("Child respawn count exceeded by %s", crm_children[lpc].name); crm_children[lpc].respawn = FALSE; } if(crm_children[lpc].respawn) { ais_info("Respawning failed child process: %s", crm_children[lpc].name); spawn_child(&(crm_children[lpc])); } else { send_cluster_id(); } } } sched_yield (); nanosleep (&waitsleep, 0); } return 0; } #include int crm_exec_init_fn (struct objdb_iface_ver0 *objdb) { int lpc = 0; + int start_seq = 1; + static int max = SIZEOF(crm_children); ENTER(""); mkdir(HA_VARRUNDIR, 750); mkdir(HA_VARRUNDIR"/crm", 750); chown(HA_VARRUNDIR"/crm", HA_CCMUID, HA_APIGID); chown(HA_VARRUNDIR, HA_CCMUID, HA_APIGID); pthread_create (&crm_wait_thread, NULL, crm_wait_dispatch, NULL); - for (; lpc < SIZEOF(crm_children); lpc++) { - spawn_child(&(crm_children[lpc])); + for (start_seq = 1; start_seq < max; start_seq++) { + /* dont start anything with start_seq < 1 */ + for (lpc = 0; lpc < max; lpc++) { + if(start_seq == crm_children[lpc].start_seq) { + spawn_child(&(crm_children[lpc])); + } + } } ais_info("CRM: Initialized"); LEAVE(""); return 0; } /* static void ais_print_node(const char *prefix, struct totem_ip_address *host) { int len = 0; char *buffer = NULL; ais_malloc0(buffer, INET6_ADDRSTRLEN+1); inet_ntop(host->family, host->addr, buffer, INET6_ADDRSTRLEN); len = strlen(buffer); ais_info("%s: %.*s", prefix, len, buffer); ais_free(buffer); } */ #if 0 /* copied here for reference from exec/totempg.c */ char *totempg_ifaces_print (unsigned int nodeid) { static char iface_string[256 * INTERFACE_MAX]; char one_iface[64]; struct totem_ip_address interfaces[INTERFACE_MAX]; char **status; unsigned int iface_count; unsigned int i; int res; iface_string[0] = '\0'; res = totempg_ifaces_get (nodeid, interfaces, &status, &iface_count); if (res == -1) { return ("no interface found for nodeid"); } for (i = 0; i < iface_count; i++) { sprintf (one_iface, "r(%d) ip(%s) ", i, totemip_print (&interfaces[i])); strcat (iface_string, one_iface); } return (iface_string); } #endif static void ais_mark_unseen_peer_dead( gpointer key, gpointer value, gpointer user_data) { int *changed = user_data; crm_node_t *node = value; if(node->last_seen != membership_seq && ais_str_eq(CRM_NODE_LOST, node->state) == FALSE) { ais_info("Node %s was not seen in the previous transition", node->uname); *changed += update_member(node->id, membership_seq, node->votes, node->processes, node->uname, CRM_NODE_LOST); ais_info("Node %s marked dead", node->uname); } } void global_confchg_fn ( enum totem_configuration_type configuration_type, unsigned int *member_list, int member_list_entries, unsigned int *left_list, int left_list_entries, unsigned int *joined_list, int joined_list_entries, struct memb_ring_id *ring_id) { int lpc = 0; int changed = 0; int do_update = 0; ENTER(""); AIS_ASSERT(ring_id != NULL); switch(configuration_type) { case TOTEM_CONFIGURATION_REGULAR: do_update = 1; break; case TOTEM_CONFIGURATION_TRANSITIONAL: break; } membership_seq = ring_id->seq; ais_notice("%s membership event on ring %lld: memb=%d, new=%d, lost=%d", do_update?"Stable":"Transitional", ring_id->seq, member_list_entries, joined_list_entries, left_list_entries); if(do_update == 0) { for(lpc = 0; lpc < joined_list_entries; lpc++) { const char *prefix = "new: "; uint32_t nodeid = joined_list[lpc]; ais_info("%s %s %u", prefix, member_uname(nodeid), nodeid); } for(lpc = 0; lpc < member_list_entries; lpc++) { const char *prefix = "memb:"; uint32_t nodeid = member_list[lpc]; ais_info("%s %s %u", prefix, member_uname(nodeid), nodeid); } for(lpc = 0; lpc < left_list_entries; lpc++) { const char *prefix = "lost:"; uint32_t nodeid = left_list[lpc]; ais_info("%s %s %u", prefix, member_uname(nodeid), nodeid); } return; } for(lpc = 0; lpc < joined_list_entries; lpc++) { const char *prefix = "NEW: "; uint32_t nodeid = joined_list[lpc]; crm_node_t *node = NULL; changed += update_member( nodeid, membership_seq, -1, 0, NULL, CRM_NODE_MEMBER); ais_info("%s %s %u", prefix, member_uname(nodeid), nodeid); node = g_hash_table_lookup(membership_list, GUINT_TO_POINTER(nodeid)); if(node->addr == NULL) { const char *addr = totempg_ifaces_print(nodeid); node->addr = ais_strdup(addr); ais_debug("Node %u has address %s", nodeid, node->addr); } } for(lpc = 0; lpc < member_list_entries; lpc++) { const char *prefix = "MEMB:"; uint32_t nodeid = member_list[lpc]; changed += update_member( nodeid, membership_seq, -1, 0, NULL, CRM_NODE_MEMBER); ais_info("%s %s %u", prefix, member_uname(nodeid), nodeid); } for(lpc = 0; lpc < left_list_entries; lpc++) { const char *prefix = "LOST:"; uint32_t nodeid = left_list[lpc]; changed += update_member( nodeid, membership_seq, -1, 0, NULL, CRM_NODE_LOST); ais_info("%s %s %u", prefix, member_uname(nodeid), nodeid); } if(do_update) { ais_debug_2("Reaping unseen nodes..."); g_hash_table_foreach( membership_list, ais_mark_unseen_peer_dead, &changed); } if(changed) { ais_debug("%d nodes changed", changed); send_member_notification(); } send_cluster_id(); LEAVE(""); } int ais_ipc_client_exit_callback (void *conn) { int lpc = 0; const char *client = NULL; ENTER("Client=%p", conn); for (; lpc < SIZEOF(crm_children); lpc++) { if(crm_children[lpc].conn == conn) { crm_children[lpc].conn = NULL; crm_children[lpc].async_conn = NULL; client = crm_children[lpc].name; break; } } ais_info("Client %p/%s left", conn, client?client:"unknown-transient"); LEAVE(""); return (0); } int ais_ipc_client_connect_callback (void *conn) { ENTER("Client=%p", conn); /* OpenAIS hasn't finished setting up the connection at this point * Sending messages now messes up the protocol! */ LEAVE(""); return (0); } /* * Executive message handlers */ void ais_cluster_message_swab(void *msg) { AIS_Message *ais_msg = msg; ENTER(""); ais_debug_3("Performing endian conversion..."); ais_msg->id = swab32 (ais_msg->id); ais_msg->is_compressed = swab32 (ais_msg->is_compressed); ais_msg->compressed_size = swab32 (ais_msg->compressed_size); ais_msg->size = swab32 (ais_msg->size); ais_msg->host.id = swab32 (ais_msg->host.id); ais_msg->host.pid = swab32 (ais_msg->host.pid); ais_msg->host.type = swab32 (ais_msg->host.type); ais_msg->host.size = swab32 (ais_msg->host.size); ais_msg->host.local = swab32 (ais_msg->host.local); ais_msg->sender.id = swab32 (ais_msg->sender.id); ais_msg->sender.pid = swab32 (ais_msg->sender.pid); ais_msg->sender.type = swab32 (ais_msg->sender.type); ais_msg->sender.size = swab32 (ais_msg->sender.size); ais_msg->sender.local = swab32 (ais_msg->sender.local); LEAVE(""); } void ais_cluster_message_callback ( void *message, unsigned int nodeid) { AIS_Message *ais_msg = message; ENTER("Node=%u (%s)", nodeid, nodeid==local_nodeid?"local":"remote"); ais_debug_2("Message from node %u (%s)", nodeid, nodeid==local_nodeid?"local":"remote"); /* Shouldn't be required... update_member( ais_msg->sender.id, membership_seq, -1, 0, ais_msg->sender.uname, NULL); */ if(ais_msg->host.size == 0 || ais_str_eq(ais_msg->host.uname, local_uname)) { route_ais_message(ais_msg, FALSE); } else { ais_debug_3("Discarding Msg[%d] (dest=%s:%s, from=%s:%s)", ais_msg->id, ais_dest(&(ais_msg->host)), msg_type2text(ais_msg->host.type), ais_dest(&(ais_msg->sender)), msg_type2text(ais_msg->sender.type)); } LEAVE(""); } void ais_cluster_id_swab(void *msg) { struct crm_identify_msg_s *ais_msg = msg; ENTER(""); ais_debug_3("Performing endian conversion..."); ais_msg->id = swab32 (ais_msg->id); ais_msg->pid = swab32 (ais_msg->pid); ais_msg->votes = swab32 (ais_msg->votes); ais_msg->processes = swab32 (ais_msg->processes); LEAVE(""); } void ais_cluster_id_callback (void *message, unsigned int nodeid) { int changed = 0; struct crm_identify_msg_s *msg = message; if(nodeid != msg->id) { ais_err("Invalid message: Node %u claimed to be node %d", nodeid, msg->id); return; } ais_debug("Node update: %s (%s)", msg->uname, msg->version); changed = update_member( nodeid, membership_seq, msg->votes, msg->processes, msg->uname, NULL); if(changed) { send_member_notification(); } } struct res_overlay { mar_res_header_t header __attribute((aligned(8))); char buf[4096]; }; struct res_overlay *res_overlay = NULL; static void send_ipc_ack(void *conn, int class) { if(res_overlay == NULL) { ais_malloc0(res_overlay, sizeof(struct res_overlay)); } - res_overlay->header.error = SA_AIS_OK; - res_overlay->header.id = crm_lib_service[class].response_id; res_overlay->header.size = crm_lib_service[class].response_size; + res_overlay->header.id = crm_lib_service[class].response_id; + res_overlay->header.error = SA_AIS_OK; +#ifdef AIS_WHITETANK + openais_response_send (conn, res_overlay, res_overlay->header.size); +#else openais_conn_send_response (conn, res_overlay, res_overlay->header.size); +#endif } /* local callbacks */ void ais_ipc_message_callback(void *conn, void *msg) { + gboolean transient = TRUE; AIS_Message *ais_msg = msg; int type = ais_msg->sender.type; void *async_conn = openais_conn_partner_get(conn); ENTER("Client=%p", conn); - ais_debug_2("Message from client %p/%p sender=%d msg=%d, spid=%d, cpid=%d", - conn, async_conn, type, ais_msg->host.type, - ais_msg->sender.pid, crm_children[type].pid); - + ais_debug_2("Message from client %p", conn); send_ipc_ack(conn, 0); + + ais_debug_3("type: %d local: %d conn: %p host type: %d ais: %d sender pid: %d child pid: %d size: %d", + type, ais_msg->host.local, crm_children[type].conn, ais_msg->host.type, crm_msg_ais, + ais_msg->sender.pid, crm_children[type].pid, ((int)SIZEOF(crm_children))); + + if(type > crm_msg_none && type < SIZEOF(crm_children)) { + /* known child process */ + transient = FALSE; + } - if(type > 0 + /* If this check fails, the order of crm_children probably + * doesn't match that of the crm_ais_msg_types enum + */ + AIS_CHECK(transient || ais_msg->sender.pid == crm_children[type].pid, + ais_err("Sender: %d, child[%d]: %d", ais_msg->sender.pid, type, crm_children[type].pid); + return); + + if(transient == FALSE + && type > crm_msg_none && ais_msg->host.local && crm_children[type].conn == NULL - && ais_msg->host.type == crm_msg_ais - && ais_msg->sender.pid == crm_children[type].pid - && type < SIZEOF(crm_children)) { + && ais_msg->host.type == crm_msg_ais) { + ais_info("Recorded connection %p for %s/%d", conn, crm_children[type].name, crm_children[type].pid); crm_children[type].conn = conn; crm_children[type].async_conn = async_conn; /* Make sure they have the latest membership */ if(crm_children[type].flags & crm_flag_members) { char *update = ais_generate_membership_data(); ais_info("Sending membership update %llu to %s", membership_seq, crm_children[type].name); send_client_msg(async_conn, crm_class_members, crm_msg_none,update); } } ais_msg->sender.id = local_nodeid; ais_msg->sender.size = local_uname_len; memset(ais_msg->sender.uname, 0, MAX_NAME); memcpy(ais_msg->sender.uname, local_uname, ais_msg->sender.size); route_ais_message(msg, TRUE); LEAVE(""); } int crm_exec_exit_fn (struct objdb_iface_ver0 *objdb) { int lpc = 0; + int start_seq = 1; + static int max = SIZEOF(crm_children); + struct timespec waitsleep = { .tv_sec = 1, .tv_nsec = 0 }; ENTER(""); ais_notice("Begining shutdown"); in_shutdown = TRUE; wait_active = FALSE; /* stop the wait loop */ - - for (lpc = SIZEOF(crm_children) - 1; lpc > 0; lpc--) { - crm_children[lpc].respawn = FALSE; - stop_child(&(crm_children[lpc]), SIGTERM); - while(crm_children[lpc].command && crm_children[lpc].pid) { - int status; - pid_t pid = 0; - - pid = wait4( - crm_children[lpc].pid, &status, WNOHANG, NULL); - - if(pid == 0) { - sched_yield (); - nanosleep (&waitsleep, 0); + + for (start_seq = max; start_seq > 0; start_seq--) { + /* dont stop anything with start_seq < 1 */ + + for (lpc = max - 1; lpc >= 0; lpc--) { + if(start_seq != crm_children[lpc].start_seq) { continue; + } - } else if(pid < 0) { - ais_perror("crm_wait_dispatch: Call to wait4(%s) failed", - crm_children[lpc].name); + crm_children[lpc].respawn = FALSE; + stop_child(&(crm_children[lpc]), SIGTERM); + while(crm_children[lpc].command && crm_children[lpc].pid) { + int status; + pid_t pid = 0; + + pid = wait4( + crm_children[lpc].pid, &status, WNOHANG, NULL); + + if(pid == 0) { + sched_yield (); + nanosleep (&waitsleep, 0); + continue; + + } else if(pid < 0) { + ais_perror("crm_wait_dispatch: Call to wait4(%s) failed", + crm_children[lpc].name); + } + + ais_notice("%s (pid=%d) confirmed dead", + crm_children[lpc].name, crm_children[lpc].pid); + + /* cleanup */ + crm_children[lpc].pid = 0; + crm_children[lpc].conn = NULL; + crm_children[lpc].async_conn = NULL; + break; } - - ais_notice("%s (pid=%d) confirmed dead", - crm_children[lpc].name, crm_children[lpc].pid); - - /* cleanup */ - crm_children[lpc].pid = 0; - crm_children[lpc].conn = NULL; - crm_children[lpc].async_conn = NULL; - break; } } - + send_cluster_id(); ais_notice("Shutdown complete"); LEAVE(""); +#ifndef AIS_WHITETANK logsys_flush (); +#endif return 0; } struct member_loop_data { char *string; }; void member_loop_fn(gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; struct member_loop_data *data = user_data; ais_debug_2("Dumping node %u", node->id); data->string = append_member(data->string, node); } char *ais_generate_membership_data(void) { int size = 0; struct member_loop_data data; size = 14 + 32; /* + int */ ais_malloc0(data.string, size); sprintf(data.string, "", membership_seq); g_hash_table_foreach(membership_list, member_loop_fn, &data); size = strlen(data.string); data.string = realloc(data.string, size + 9) ;/* 9 = + nul */ sprintf(data.string + size, ""); return data.string; } void ais_node_list_query(void *conn, void *msg) { char *data = ais_generate_membership_data(); void *async_conn = openais_conn_partner_get(conn); - ais_debug_4("members: %s", data); + + /* send the ACK before we send any other messages */ + send_ipc_ack(conn, 1); + if(async_conn) { send_client_msg(async_conn, crm_class_members, crm_msg_none, data); } ais_free(data); - send_ipc_ack(conn, 1); } void ais_manage_notification(void *conn, void *msg) { int lpc = 0; int enable = 0; AIS_Message *ais_msg = msg; char *data = get_ais_data(ais_msg); if(ais_str_eq("true", data)) { enable = 1; } for (; lpc < SIZEOF(crm_children); lpc++) { if(crm_children[lpc].conn == conn) { ais_info("%s node notifications for %s", enable?"Enabling":"Disabling", crm_children[lpc].name); if(enable) { crm_children[lpc].flags |= crm_flag_members; } else { crm_children[lpc].flags |= crm_flag_members; crm_children[lpc].flags ^= crm_flag_members; } break; } } send_ipc_ack(conn, 2); } void send_member_notification(void) { int lpc = 0; char *update = ais_generate_membership_data(); for (; lpc < SIZEOF(crm_children); lpc++) { if(crm_children[lpc].flags & crm_flag_members) { if(crm_children[lpc].async_conn == NULL) { continue; } ais_info("Sending membership update %llu to %s", membership_seq, crm_children[lpc].name); send_client_msg(crm_children[lpc].async_conn, crm_class_members, crm_msg_none, update); } } ais_free(update); } static gboolean check_message_sanity(AIS_Message *msg, char *data) { gboolean sane = TRUE; gboolean repaired = FALSE; int dest = msg->host.type; int tmp_size = msg->header.size - sizeof(AIS_Message); if(sane && msg->header.size == 0) { ais_warn("Message with no size"); sane = FALSE; } if(sane && msg->header.error != 0) { ais_warn("Message header contains an error: %d", msg->header.error); sane = FALSE; } if(sane && ais_data_len(msg) != tmp_size) { int cur_size = ais_data_len(msg); repaired = TRUE; if(msg->is_compressed) { msg->compressed_size = tmp_size; } else { msg->size = tmp_size; } ais_warn("Repaired message payload size %d -> %d", cur_size, tmp_size); } if(sane && ais_data_len(msg) == 0) { ais_warn("Message with no payload"); sane = FALSE; } if(sane && data && msg->is_compressed == FALSE) { int str_size = strlen(data) + 1; if(ais_data_len(msg) != str_size) { int lpc = 0; ais_warn("Message payload is corrupted: expected %d bytes, got %d", ais_data_len(msg), str_size); sane = FALSE; for(lpc = (str_size - 10); lpc < msg->size; lpc++) { if(lpc < 0) { lpc = 0; } ais_debug_2("bad_data[%d]: %d / '%c'", lpc, data[lpc], data[lpc]); } } } if(sane == FALSE) { ais_err("Invalid message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } else if(repaired) { ais_err("Repaired message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } else { ais_debug_3("Verified message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } return sane; } gboolean route_ais_message(AIS_Message *msg, gboolean local_origin) { int rc = 0; int level = LOG_WARNING; int dest = msg->host.type; ais_debug_3("Msg[%d] (dest=%s:%s, from=%s:%s.%d, remote=%s, size=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, local_origin?"false":"true", ais_data_len(msg)); if(local_origin == FALSE) { if(msg->host.size == 0 || ais_str_eq(local_uname, msg->host.uname)) { msg->host.local = TRUE; } } if(check_message_sanity(msg, msg->data) == FALSE) { /* Dont send this message to anyone */ return FALSE; } if(msg->host.local) { void *conn = NULL; const char *lookup = NULL; if(dest == crm_msg_ais) { process_ais_message(msg); return TRUE; } else if(dest == crm_msg_lrmd) { /* lrmd messages are routed via the crm */ dest = crm_msg_crmd; } else if(dest == crm_msg_te) { /* te messages are routed via the crm - for now */ dest = crm_msg_crmd; } if(in_shutdown) { level = LOG_INFO; } AIS_CHECK(dest > 0 && dest < SIZEOF(crm_children), ais_err("Invalid destination: %d", dest); log_ais_message(LOG_ERR, msg); return FALSE; ); rc = 1; lookup = msg_type2text(dest); conn = crm_children[dest].async_conn; /* the cluster fails in weird and wonderfully obscure ways when this is not true */ AIS_ASSERT(ais_str_eq(lookup, crm_children[dest].name)); if (conn == NULL) { do_ais_log(level, "No connection to %s", crm_children[dest].name); } else if (!libais_connection_active(conn)) { do_ais_log(level, "Connection to %s is no longer active", crm_children[dest].name); crm_children[dest].async_conn = NULL; /* } else if ((queue->size - 1) == queue->used) { */ /* ais_err("Connection is throttled: %d", queue->size); */ } else { level = LOG_ERR; ais_debug_3("Delivering locally to %s (size=%d)", crm_children[dest].name, msg->header.size); +#ifdef AIS_WHITETANK + rc = openais_dispatch_send(conn, msg, msg->header.size); +#else rc = openais_conn_send_response(conn, msg, msg->header.size); +#endif } } else if(local_origin) { /* forward to other hosts */ ais_debug_3("Forwarding to cluster"); rc = send_cluster_msg_raw(msg); } else { ais_debug_3("Ignoring..."); } if(rc != 0) { do_ais_log(level, "Sending message to %s.%s failed (rc=%d)", ais_dest(&(msg->host)), msg_type2text(dest), rc); log_ais_message(level, msg); return FALSE; } return TRUE; } int send_cluster_msg_raw(AIS_Message *ais_msg) { int rc = 0; struct iovec iovec; static uint32_t msg_id = 0; AIS_Message *bz2_msg = NULL; ENTER(""); AIS_ASSERT(local_nodeid != 0); if(ais_msg->header.size != (sizeof(AIS_Message) + ais_data_len(ais_msg))) { ais_err("Repairing size mismatch: %u + %d = %d", (unsigned int)sizeof(AIS_Message), ais_data_len(ais_msg), ais_msg->header.size); ais_msg->header.size = sizeof(AIS_Message) + ais_data_len(ais_msg); } if(ais_msg->id == 0) { msg_id++; AIS_CHECK(msg_id != 0 /* detect wrap-around */, msg_id++; ais_err("Message ID wrapped around")); ais_msg->id = msg_id; } ais_msg->header.id = SERVICE_ID_MAKE(CRM_SERVICE, 0); ais_msg->sender.id = local_nodeid; ais_msg->sender.size = local_uname_len; memset(ais_msg->sender.uname, 0, MAX_NAME); memcpy(ais_msg->sender.uname, local_uname, ais_msg->sender.size); iovec.iov_base = (char *)ais_msg; iovec.iov_len = ais_msg->header.size; #if 0 if(ais_msg->is_compressed == FALSE && ais_msg->size > 1024) { char *compressed = NULL; unsigned int len = (ais_msg->size * 1.1) + 600; /* recomended size */ ais_debug_2("Creating compressed message"); ais_malloc0(compressed, len); rc = BZ2_bzBuffToBuffCompress( compressed, &len, ais_msg->data, ais_msg->size, 3, 0, 30); if(rc != BZ_OK) { ais_err("Compression failed: %d", rc); ais_free(compressed); goto send; } ais_malloc0(bz2_msg, sizeof(AIS_Message) + len + 1); memcpy(bz2_msg, ais_msg, sizeof(AIS_Message)); memcpy(bz2_msg->data, compressed, len); ais_free(compressed); bz2_msg->is_compressed = TRUE; bz2_msg->compressed_size = len; bz2_msg->header.size = sizeof(AIS_Message) + ais_data_len(bz2_msg); ais_debug("Compression details: %d -> %d", bz2_msg->size, ais_data_len(bz2_msg)); iovec.iov_base = (char *)bz2_msg; iovec.iov_len = bz2_msg->header.size; } send: #endif ais_debug_3("Sending message (size=%u)", (unsigned int)iovec.iov_len); rc = totempg_groups_mcast_joined ( openais_group_handle, &iovec, 1, TOTEMPG_SAFE); if(rc == 0 && ais_msg->is_compressed == FALSE) { ais_debug_2("Message sent: %.80s", ais_msg->data); } AIS_CHECK(rc == 0, ais_err("Message not sent (%d)", rc)); ais_free(bz2_msg); LEAVE(""); return rc; } #define min(x,y) (x)<(y)?(x):(y) void send_cluster_id(void) { int rc = 0; int lpc = 0; int len = 0; struct iovec iovec; struct crm_identify_msg_s *msg = NULL; ENTER(""); AIS_ASSERT(local_nodeid != 0); ais_malloc0(msg, sizeof(struct crm_identify_msg_s)); msg->header.size = sizeof(struct crm_identify_msg_s); msg->id = local_nodeid; msg->header.id = SERVICE_ID_MAKE(CRM_SERVICE, 1); len = min(local_uname_len, MAX_NAME-1); memset(msg->uname, 0, MAX_NAME); memcpy(msg->uname, local_uname, len); len = min(strlen(VERSION), MAX_NAME-1); memset(msg->version, 0, MAX_NAME); memcpy(msg->version, VERSION, len); msg->votes = 1; msg->pid = getpid(); msg->processes = crm_proc_ais; for (lpc = 0; lpc < SIZEOF(crm_children); lpc++) { if(crm_children[lpc].pid != 0) { msg->processes |= crm_children[lpc].flag; } } ais_debug("Local update: %u", local_nodeid); update_member( local_nodeid, membership_seq, msg->votes, msg->processes, NULL, NULL); iovec.iov_base = (char *)msg; iovec.iov_len = msg->header.size; rc = totempg_groups_mcast_joined ( openais_group_handle, &iovec, 1, TOTEMPG_SAFE); AIS_CHECK(rc == 0, ais_err("Message not sent (%d)", rc)); ais_free(msg); LEAVE(""); } diff --git a/crm/ais/utils.c b/crm/ais/utils.c index dd6bfaf8b0..cc5cdb4c34 100644 --- a/crm/ais/utils.c +++ b/crm/ais/utils.c @@ -1,481 +1,484 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "./utils.h" int in_shutdown = FALSE; extern int send_cluster_msg_raw(AIS_Message *ais_msg); void log_ais_message(int level, AIS_Message *msg) { char *data = get_ais_data(msg); do_ais_log(level, "Msg[%d] (dest=%s:%s, from=%s:%s.%d, remote=%s, size=%d): %.90s", msg->id, ais_dest(&(msg->host)), msg_type2text(msg->host.type), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->sender.uname==local_uname?"false":"true", ais_data_len(msg), data); ais_free(data); } gboolean process_ais_message(AIS_Message *msg) { char *data = get_ais_data(msg); do_ais_log(LOG_NOTICE, "Msg[%d] (dest=%s:%s, from=%s:%s.%d, remote=%s, size=%d): %.90s", msg->id, ais_dest(&(msg->host)), msg_type2text(msg->host.type), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->sender.uname==local_uname?"false":"true", ais_data_len(msg), data); ais_free(data); return TRUE; } static int ais_string_to_boolean(const char * s) { int rc = 0; if(s == NULL) { return rc; } if(strcasecmp(s, "true") == 0 || strcasecmp(s, "on") == 0 || strcasecmp(s, "yes") == 0 || strcasecmp(s, "y") == 0 || strcasecmp(s, "1") == 0) { rc = 1; } return rc; } gboolean spawn_child(crm_child_t *child) { int rc = 0; int lpc = 0; struct rlimit oflimits; const char *devnull = "/dev/null"; const char *use_valgrind = getenv("HA_VALGRIND_ENABLED"); if(child->command == NULL) { ais_info("Nothing to do for child \"%s\"", child->name); return TRUE; } child->pid = fork(); AIS_ASSERT(child->pid != -1); if(child->pid > 0) { /* parent */ ais_info("Forked child %d for process %s", child->pid, child->name); return TRUE; } /* Child */ ais_debug("Executing \"%s (%s)\" (pid %d)", child->command, child->name, (int) getpid()); if(child->uid > 0) { rc = setuid(child->uid); if(rc < 0) { ais_perror("Could not set user to %d", child->uid); } } /* A precautionary measure */ getrlimit(RLIMIT_NOFILE, &oflimits); for (; lpc < oflimits.rlim_cur; lpc++) { close(lpc); } (void)open(devnull, O_RDONLY); /* Stdin: fd 0 */ (void)open(devnull, O_WRONLY); /* Stdout: fd 1 */ (void)open(devnull, O_WRONLY); /* Stderr: fd 2 */ if(ais_string_to_boolean(use_valgrind)) { char *opts[] = { ais_strdup(VALGRIND_BIN), ais_strdup(child->command), NULL }; (void)execvp(VALGRIND_BIN, opts); } else { char *opts[] = { ais_strdup(child->command), NULL }; (void)execvp(child->command, opts); } ais_perror("FATAL: Cannot exec %s", child->command); exit(100); return TRUE; /* never reached */ } gboolean stop_child(crm_child_t *child, int signal) { if(signal == 0) { signal = SIGTERM; } if(child->command == NULL) { ais_info("Nothing to do for child \"%s\"", child->name); return TRUE; } ais_debug("Stopping CRM child \"%s\"", child->name); if (child->pid <= 0) { ais_debug_2("Client %s not running", child->name); return TRUE; } errno = 0; if(kill(child->pid, signal) == 0) { ais_notice("Sent -%d to %s: [%d]", signal, child->name, child->pid); } else { ais_perror("Sent -%d to %s: [%d]", signal, child->name, child->pid); } return TRUE; } void destroy_ais_node(gpointer data) { crm_node_t *node = data; ais_info("Destroying entry for node %u", node->id); ais_free(node->addr); ais_free(node->uname); ais_free(node->state); ais_free(node); } int update_member(unsigned int id, unsigned long long seq, int32_t votes, uint32_t procs, const char *uname, const char *state) { int changed = 0; crm_node_t *node = NULL; node = g_hash_table_lookup(membership_list, GUINT_TO_POINTER(id)); if(node == NULL) { ais_malloc0(node, sizeof(crm_node_t)); ais_info("Creating entry for node %u born on %llu", id, seq); node->id = id; node->addr = NULL; node->state = ais_strdup("unknown"); g_hash_table_insert(membership_list, GUINT_TO_POINTER(id), node); node = g_hash_table_lookup(membership_list, GUINT_TO_POINTER(id)); } if(seq != 0) { node->last_seen = seq; } if(uname != NULL) { if(node->uname == NULL || ais_str_eq(node->uname, uname) == FALSE) { ais_info("%p Node %u now known as %s (was: %s)", node, id, uname, node->uname); ais_free(node->uname); node->uname = ais_strdup(uname); changed = TRUE; } } if(procs != 0 && procs != node->processes) { ais_info("Node %s now has process list: %.32x (%u)", node->uname, procs, procs); node->processes = procs; changed = TRUE; } if(votes >= 0 && votes != node->votes) { ais_info("Node %s now has %d quorum votes (was %d)", node->uname, votes, node->votes); node->votes = votes; changed = TRUE; } if(state != NULL) { if(node->state == NULL || ais_str_eq(node->state, state) == FALSE) { ais_free(node->state); node->state = ais_strdup(state); ais_info("Node %u/%s is now: %s", id, node->uname?node->uname:"unknown", state); changed = TRUE; } } AIS_ASSERT(node != NULL); return changed; } void delete_member(uint32_t id, const char *uname) { if(uname == NULL) { g_hash_table_remove(membership_list, GUINT_TO_POINTER(id)); return; } ais_err("Deleting by uname is not yet supported"); } const char *member_uname(uint32_t id) { crm_node_t *node = g_hash_table_lookup( membership_list, GUINT_TO_POINTER(id)); if(node == NULL) { return ".unknown."; } if(node->uname == NULL) { return ".pending."; } return node->uname; } #define MEMBER_FORMAT "" char *append_member(char *data, crm_node_t *node) { int size = 1; /* nul */ int offset = 0; if(node->uname == NULL) { return data; } if(data) { size = strlen(data); } offset = size; size += strlen(MEMBER_FORMAT); size += 32; /* node->id */ size += strlen(node->uname); size += strlen(node->state); data = realloc(data, size); if(node->addr) { size += strlen(node->addr); } sprintf(data+offset, MEMBER_FORMAT, node->id, node->uname, node->state, membership_seq, node->votes, node->processes, node->addr?node->addr:""); return data; } void swap_sender(AIS_Message *msg) { int tmp = 0; char tmp_s[256]; tmp = msg->host.type; msg->host.type = msg->sender.type; msg->sender.type = tmp; tmp = msg->host.type; msg->host.size = msg->sender.type; msg->sender.type = tmp; memcpy(tmp_s, msg->host.uname, 256); memcpy(msg->host.uname, msg->sender.uname, 256); memcpy(msg->sender.uname, tmp_s, 256); } char *get_ais_data(AIS_Message *msg) { int rc = BZ_OK; char *uncompressed = NULL; unsigned int new_size = msg->size; if(msg->is_compressed == FALSE) { uncompressed = strdup(msg->data); } else { ais_malloc0(uncompressed, new_size); rc = BZ2_bzBuffToBuffDecompress( uncompressed, &new_size, msg->data, msg->compressed_size, 1, 0); if(rc != BZ_OK) { ais_info("rc=%d, new=%u expected=%u", rc, new_size, msg->size); } AIS_ASSERT(rc == BZ_OK); AIS_ASSERT(new_size == msg->size); } return uncompressed; } int send_cluster_msg( enum crm_ais_msg_types type, const char *host, const char *data) { int rc = 0; int data_len = 0; AIS_Message *ais_msg = NULL; int total_size = sizeof(AIS_Message); ENTER(""); AIS_ASSERT(local_nodeid != 0); if(data != NULL) { data_len = 1 + strlen(data); total_size += data_len; } ais_malloc0(ais_msg, total_size); ais_msg->header.size = total_size; ais_msg->header.id = 0; ais_msg->size = data_len; memcpy(ais_msg->data, data, data_len); ais_msg->sender.type = crm_msg_ais; ais_msg->host.type = type; ais_msg->host.id = 0; if(host) { ais_msg->host.size = strlen(host); memset(ais_msg->host.uname, 0, MAX_NAME); memcpy(ais_msg->host.uname, host, ais_msg->host.size); /* ais_msg->host.id = nodeid_lookup(host); */ } else { ais_msg->host.type = type; ais_msg->host.size = 0; memset(ais_msg->host.uname, 0, MAX_NAME); } rc = send_cluster_msg_raw(ais_msg); ais_free(ais_msg); LEAVE(""); return rc; } int send_client_msg( void *conn, enum crm_ais_msg_class class, enum crm_ais_msg_types type, const char *data) { int rc = 0; int data_len = 0; int total_size = sizeof(AIS_Message); AIS_Message *ais_msg = NULL; static int msg_id = 0; ENTER(""); AIS_ASSERT(local_nodeid != 0); msg_id++; AIS_ASSERT(msg_id != 0 /* wrap-around */); if(data != NULL) { data_len = 1 + strlen(data); } total_size += data_len; ais_malloc0(ais_msg, total_size); ais_msg->id = msg_id; ais_msg->header.size = total_size; ais_msg->header.id = class; ais_msg->size = data_len; memcpy(ais_msg->data, data, data_len); ais_msg->host.type = type; ais_msg->host.size = 0; memset(ais_msg->host.uname, 0, MAX_NAME); ais_msg->host.id = 0; ais_msg->sender.type = crm_msg_ais; ais_msg->sender.size = local_uname_len; memset(ais_msg->sender.uname, 0, MAX_NAME); memcpy(ais_msg->sender.uname, local_uname, ais_msg->sender.size); ais_msg->sender.id = local_nodeid; rc = 1; if (conn == NULL) { ais_err("No connection"); } else if (!libais_connection_active(conn)) { ais_warn("Connection no longer active"); /* } else if ((queue->size - 1) == queue->used) { */ /* ais_err("Connection is throttled: %d", queue->size); */ } else { +#ifndef AIS_WHITETANK rc = openais_conn_send_response (conn, ais_msg, total_size); +#else + rc = openais_dispatch_send (conn, ais_msg, total_size); +#endif AIS_CHECK(rc == 0, ais_err("Message not sent (%d): %s", rc, data?data:"")); } - ais_debug_5("Sent %d:%s", class, data); ais_free(ais_msg); LEAVE(""); return rc; } int objdb_get_string( struct objdb_iface_ver0 *objdb, unsigned int object_service_handle, char *key, char **value, const char *fallback) { *value = NULL; if(object_service_handle > 0) { objdb->object_key_get( object_service_handle, key, strlen(key), (void**)value, NULL); } if (*value) { ais_info("Found '%s' for option %s", *value, key); return 0; } ais_info("Defaulting to '%s' for option %s", fallback, key); *value = ais_strdup(fallback); return -1; } int objdb_get_int( struct objdb_iface_ver0 *objdb, unsigned int object_service_handle, char *key, unsigned int *int_value, const char *fallback) { char *value = NULL; objdb_get_string(objdb, object_service_handle, key, &value, fallback); if (value) { *int_value = atoi(value); return 0; } return -1; } diff --git a/crm/ais/utils.h b/crm/ais/utils.h index bf34e44ce6..540fbd5bf6 100644 --- a/crm/ais/utils.h +++ b/crm/ais/utils.h @@ -1,180 +1,192 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef AIS_CRM_UTILS__H #define AIS_CRM_UTILS__H +#include +#include +#ifdef AIS_WHITETANK +# include +# define openais_conn_partner_get(conn) conn +extern int openais_response_send (void *conn, void *msg, int mlen); +extern int openais_dispatch_send (void *conn, void *msg, int mlen); +static int libais_connection_active (void *conn) { return 1; } + +#else +# include +LOGSYS_DECLARE_SUBSYS("crm", LOG_LEVEL_DEBUG); + /* from openais/exec/ipc.h */ extern int openais_conn_send_response (void *conn, void *msg, int mlen); extern int libais_connection_active (void *conn); +#endif -#include -#include -LOGSYS_DECLARE_SUBSYS("crm", LOG_LEVEL_DEBUG); /* #include "plugin.h" */ #define SIZEOF(a) (sizeof(a) / sizeof(a[0])) #define CRM_MESSAGE_IPC_ACK 1 #define CRM_SERVICE 16 typedef struct crm_child_s { int pid; long flag; long flags; + int start_seq; int respawn_count; gboolean respawn; const char *name; int uid; const char *command; void *conn; void *async_conn; } crm_child_t; extern void destroy_ais_node(gpointer data); extern void delete_member(uint32_t id, const char *uname); extern int update_member(unsigned int id, unsigned long long seq, int32_t votes, uint32_t procs, const char *uname, const char *state); extern const char *member_uname(uint32_t id); extern char *append_member(char *data, crm_node_t *node); extern void member_loop_fn(gpointer key, gpointer value, gpointer user_data); extern gboolean stop_child(crm_child_t *child, int signal); extern gboolean spawn_child(crm_child_t *child); extern void swap_sender(AIS_Message *msg); extern char *get_ais_data(AIS_Message *msg); extern gboolean route_ais_message(AIS_Message *msg, gboolean local); extern gboolean process_ais_message(AIS_Message *msg); extern int send_cluster_msg( enum crm_ais_msg_types type, const char *host, const char *data); extern int send_client_msg(void *conn, enum crm_ais_msg_class class, enum crm_ais_msg_types type, const char *data); extern void send_member_notification(void); extern void log_ais_message(int level, AIS_Message *msg); extern int objdb_get_int( struct objdb_iface_ver0 *objdb, unsigned int object_service_handle, char *key, unsigned int *int_value, const char *fallback); extern int objdb_get_string( struct objdb_iface_ver0 *objdb, unsigned int object_service_handle, char *key, char **value, const char *fallback); extern GHashTable *membership_list; extern pthread_t crm_wait_thread; extern int plugin_log_level; extern char *local_uname; extern int local_uname_len; extern unsigned int local_nodeid; extern unsigned long long membership_seq; extern int in_shutdown; static inline const char *level2char(int level) { switch(level) { case LOG_CRIT: return "CRIT"; case LOG_ERR: return "ERROR"; case LOG_WARNING: return "WARN"; case LOG_NOTICE: return "notice"; case LOG_INFO: return "info"; } return "debug"; } #define do_ais_log(level, fmt, args...) do { \ if(plugin_log_level < (level)) { \ continue; \ } else if((level) > LOG_DEBUG) { \ log_printf(LOG_DEBUG, "debug%d: %s: " fmt, \ level-LOG_INFO, __PRETTY_FUNCTION__ , ##args); \ } else { \ log_printf(level, "%s: %s: " fmt, level2char(level), \ __PRETTY_FUNCTION__ , ##args); \ } \ } while(0) #define ais_perror(fmt, args...) log_printf( \ LOG_ERR, "%s: " fmt ": (%d) %s", \ __PRETTY_FUNCTION__ , ##args, errno, strerror(errno)) #define ais_crit(fmt, args...) do_ais_log(LOG_CRIT, fmt , ##args) #define ais_err(fmt, args...) do_ais_log(LOG_ERR, fmt , ##args) #define ais_warn(fmt, args...) do_ais_log(LOG_WARNING, fmt , ##args) #define ais_notice(fmt, args...) do_ais_log(LOG_NOTICE, fmt , ##args) #define ais_info(fmt, args...) do_ais_log(LOG_INFO, fmt , ##args) #define ais_debug(fmt, args...) do_ais_log(LOG_DEBUG, fmt , ##args) #define ais_debug_2(fmt, args...) do_ais_log(LOG_DEBUG+1, fmt , ##args) #define ais_debug_3(fmt, args...) do_ais_log(LOG_DEBUG+2, fmt , ##args) #define ais_debug_4(fmt, args...) do_ais_log(LOG_DEBUG+3, fmt , ##args) #define ais_debug_5(fmt, args...) do_ais_log(LOG_DEBUG+4, fmt , ##args) #define ais_debug_6(fmt, args...) do_ais_log(LOG_DEBUG+5, fmt , ##args) #define ais_malloc0(malloc_obj, length) do { \ malloc_obj = malloc(length); \ if(malloc_obj == NULL) { \ abort(); \ } \ memset(malloc_obj, 0, length); \ } while(0) #define ais_free(obj) do { \ if(obj) { \ free(obj); \ obj = NULL; \ } \ } while(0) #define AIS_ASSERT(expr) if((expr) == FALSE) { \ ais_crit("Assertion failure line %d: %s", __LINE__, #expr); \ abort(); \ } #define AIS_CHECK(expr, failure_action) if((expr) == FALSE) { \ ais_err("Non fatal assertion failure line %d: %s", __LINE__, #expr); \ failure_action; \ } static inline char *ais_strdup(const char *src) { char *dup = NULL; if(src == NULL) { return NULL; } ais_malloc0(dup, strlen(src) + 1); return strcpy(dup, src); } static inline gboolean ais_str_eq(const char *a, const char *b) { if(a == NULL || b == NULL) { return FALSE; } else if(a == b) { return TRUE; } else if(strcasecmp(a, b) == 0) { return TRUE; } return FALSE; } #endif diff --git a/cts/CTSlab.py.in b/cts/CTSlab.py.in index 9757d933e5..a8b263e0bf 100755 --- a/cts/CTSlab.py.in +++ b/cts/CTSlab.py.in @@ -1,841 +1,848 @@ #!@PYTHON@ '''CTS: Cluster Testing System: Lab environment module ''' __copyright__=''' Copyright (C) 2001,2005 Alan Robertson Licensed under the GNU GPL. ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. from UserDict import UserDict import sys, time, types, string, syslog, random, os, string, signal, traceback from CTS import ClusterManager from CM_hb import HeartbeatCM from CTStests import BSC_AddResource from socket import gethostbyname_ex tests = None cm = None old_handler = None DefaultFacility = "daemon" def sig_handler(signum, frame) : if cm != None: cm.log("Interrupted by signal %d"%signum) if signum == 10 and tests != None : tests.summarize() if signum == 15 : sys.exit(1) class ResetMechanism: def reset(self, node): raise ValueError("Abstract class member (reset)") class Stonith(ResetMechanism): def __init__(self, sttype="external/ssh", pName=None, pValue=None , path="@sbindir@/stonith"): self.pathname=path self.configName=pName self.configValue=pValue self.stonithtype=sttype def reset(self, node): if self.configValue == None : config=node else: config=self.configValue cmdstring = "%s -t '%s' -p '%s' '%s' 2>/dev/null" % (self.pathname , self.stonithtype, config, node) return (os.system(cmdstring) == 0) class Stonithd(ResetMechanism): def __init__(self, nodes, sttype = 'external/ssh'): self.sttype = sttype self.nodes = nodes self.query_cmd_pat = '@libdir@/heartbeat/stonithdtest/apitest 0 %s 20000 0' self.reset_cmd_pat = '@libdir@/heartbeat/stonithdtest/apitest 1 %s 20000 0' self.poweron_cmd_pat = '@libdir@/heartbeat/stonithdtest/apitest 2 %s 20000 0' self.poweroff_cmd_pat= '@libdir@/heartbeat/stonithdtest/apitest 3 %s 20000 0' self.lrmd_add_pat = '@libdir@/heartbeat/lrmadmin -A %s stonith ' + sttype + ' NULL hostlist=%s' self.lrmd_start_pat = '@libdir@/heartbeat/lrmadmin -E %s start 0 0 EVERYTIME' self.lrmd_stop_pat = '@libdir@/heartbeat/lrmadmin -E %s stop 0 0 EVERYTIME' self.lrmd_del_pat = '@libdir@/heartbeat/lrmadmin -D %s' self.rsc_id = 'my_stonithd_id' self.command = "@SSH@ -l root -n -x" self.command_noblock = "@SSH@ -f -l root -n -x" self.stonithd_started_nodes = [] self.fail_reason = '' def _remote_exec(self, node, cmnd): return (os.system("%s %s %s > /dev/null" % (self.command, node, cmnd)) == 0) def _remote_readlines(self, node, cmnd): f = os.popen("%s %s %s" % (self.command, node, cmnd)) return f.readlines() def _stonithd_started(self, node): return node in self.stonithd_started_nodes def _start_stonithd(self, node, hosts): hostlist = string.join(hosts, ',') lrmd_add_cmd = self.lrmd_add_pat % (self.rsc_id, hostlist) ret = self._remote_exec(node, lrmd_add_cmd) if not ret:return ret lrmd_start_cmd = self.lrmd_start_pat % self.rsc_id ret = self._remote_exec(node, lrmd_start_cmd) if not ret:return ret self.stonithd_started_nodes.append(node) return 1 def _stop_stonithd(self, node): lrmd_stop_cmd = self.lrmd_stop_pat % self.rsc_id ret = self._remote_exec(node, lrmd_stop_cmd) if not ret:return ret lrmd_del_cmd = self.lrmd_del_pat % self.rsc_id ret = self._remote_exec(node, lrmd_del_cmd) if not ret:return ret self.stonithd_started_nodes.remove(node) return 1 def _do_stonith(self, init_node, target_node, action): stonithd_started = self._stonithd_started(init_node) if not stonithd_started: ret = self._start_stonithd(init_node, [target_node]) if not ret: self.fail_reason = "failed to start stonithd on node %s" % init_node return ret command = "" if action == "RESET": command = self.reset_cmd_pat % target_node elif action == "POWEROFF": command = self.poweroff_cmd_pat % target_node elif action == "POWERON": command = self.poweron_cmd_pat % target_node else: self.fail_reason = "unknown opration type %s" % action return 0 lines = self._remote_readlines(init_node, command) result = "".join(lines) if not stonithd_started: self._stop_stonithd(init_node) index = result.find("result=0") if index == -1: self.fail_reason = "unexpected stonithd status: %s" % result return 0 return 1 # Should we randomly choose a node as init_node here if init_node not specified? def reset(self, init_node, target_node): return self._do_stonith(init_node, target_node, "RESET") def poweron(self, init_node, target_node): return self._do_stonith(init_node, target_node, "POWERON") def poweroff(self, init_node, target_node): return self._do_stonith(init_node, target_node, "POWEROFF") class Logger: TimeFormat = "%b %d %H:%M:%S\t" def __call__(self, lines): raise ValueError("Abstract class member (__call__)") def write(self, line): return self(line.rstrip()) def writelines(self, lines): for s in lines: self.write(s) return 1 def flush(self): return 1 def isatty(self): return None class SysLog(Logger): # http://docs.python.org/lib/module-syslog.html defaultsource="CTS" map = { "kernel": syslog.LOG_KERN, "user": syslog.LOG_USER, "mail": syslog.LOG_MAIL, "daemon": syslog.LOG_DAEMON, "auth": syslog.LOG_AUTH, "lpr": syslog.LOG_LPR, "news": syslog.LOG_NEWS, "uucp": syslog.LOG_UUCP, "cron": syslog.LOG_CRON, "local0": syslog.LOG_LOCAL0, "local1": syslog.LOG_LOCAL1, "local2": syslog.LOG_LOCAL2, "local3": syslog.LOG_LOCAL3, "local4": syslog.LOG_LOCAL4, "local5": syslog.LOG_LOCAL5, "local6": syslog.LOG_LOCAL6, "local7": syslog.LOG_LOCAL7, } def __init__(self, labinfo): if labinfo.has_key("syslogsource"): self.source=labinfo["syslogsource"] else: self.source=SysLog.defaultsource if labinfo.has_key("SyslogFacility"): self.facility=labinfo["SyslogFacility"] else: self.facility=DefaultFacility if SysLog.map.has_key(self.facility): self.facility=SysLog.map[self.facility] syslog.openlog(self.source, 0, self.facility) def setfacility(self, facility): self.facility = facility if SysLog.map.has_key(self.facility): self.facility=SysLog.map[self.facility] syslog.closelog() syslog.openlog(self.source, 0, self.facility) def __call__(self, lines): if isinstance(lines, types.StringType): syslog.syslog(lines) else: for line in lines: syslog.syslog(line) def name(self): return "Syslog" class StdErrLog(Logger): def __init__(self, labinfo): pass def __call__(self, lines): t = time.strftime(Logger.TimeFormat, time.localtime(time.time())) if isinstance(lines, types.StringType): sys.__stderr__.writelines([t, lines, "\n"]) else: for line in lines: sys.__stderr__.writelines([t, line, "\n"]) sys.__stderr__.flush() def name(self): return "StdErrLog" class FileLog(Logger): def __init__(self, labinfo, filename=None): if filename == None: filename=labinfo["LogFileName"] self.logfile=filename import os self.hostname = os.uname()[1]+" " self.source = "CTS: " def __call__(self, lines): fd = open(self.logfile, "a") t = time.strftime(Logger.TimeFormat, time.localtime(time.time())) if isinstance(lines, types.StringType): fd.writelines([t, self.hostname, self.source, lines, "\n"]) else: for line in lines: fd.writelines([t, self.hostname, self.source, line, "\n"]) fd.close() def name(self): return "FileLog" class CtsLab(UserDict): '''This class defines the Lab Environment for the Cluster Test System. It defines those things which are expected to change from test environment to test environment for the same cluster manager. It is where you define the set of nodes that are in your test lab what kind of reset mechanism you use, etc. This class is derived from a UserDict because we hold many different parameters of different kinds, and this provides provide a uniform and extensible interface useful for any kind of communication between the user/administrator/tester and CTS. At this point in time, it is the intent of this class to model static configuration and/or environmental data about the environment which doesn't change as the tests proceed. Well-known names (keys) are an important concept in this class. The HasMinimalKeys member function knows the minimal set of well-known names for the class. The following names are standard (well-known) at this time: nodes An array of the nodes in the cluster reset A ResetMechanism object logger An array of objects that log strings... CMclass The type of ClusterManager we are running (This is a class object, not a class instance) RandSeed Random seed. It is a triple of bytes. (optional) HAdir Base directory for HA installation The CTS code ignores names it doesn't know about/need. The individual tests have access to this information, and it is perfectly acceptable to provide hints, tweaks, fine-tuning directions or other information to the tests through this mechanism. ''' def __init__(self, nodes): self.data = {} self["nodes"] = nodes self.MinimalKeys=["nodes", "reset", "logger", "CMclass", "HAdir"] def HasMinimalKeys(self): 'Return TRUE if our object has the minimal set of keys/values in it' result = 1 for key in self.MinimalKeys: if not self.has_key(key): result = None return result def SupplyDefaults(self): if not self.has_key("logger"): self["logger"] = (SysLog(self), StdErrLog(self)) if not self.has_key("reset"): self["reset"] = Stonith() if not self.has_key("CMclass"): self["CMclass"] = HeartbeatCM if not self.has_key("LogFileName"): self["LogFileName"] = "@HA_VARLOGDIR@/ha-log" if not self.has_key("logrestartcmd"): self["logrestartcmd"] = "@INITDIR@/syslog restart" if not self.has_key("logfacility"): LogFacility = DefaultFacility # # Now set up our random number generator... # self.RandomGen = random.Random() # Get a random seed for the random number generator. if self.has_key("RandSeed"): randseed = self["RandSeed"] self.log("Random seed is: " + str(randseed)) self.RandomGen.seed(str(randseed)) else: randseed = int(time.time()) self.log("Random seed is: " + str(randseed)) self.RandomGen.seed(str(randseed)) def log(self, args): "Log using each of the supplied logging methods" for logfcn in self._logfunctions: logfcn(string.strip(args)) def debug(self, args): "Log using each of the supplied logging methods" for logfcn in self._logfunctions: if logfcn.name() != "StdErrLog": logfcn("debug: %s" % string.strip(args)) def __setitem__(self, key, value): '''Since this function gets called whenever we modify the dictionary (object), we can (and do) validate those keys that we know how to validate. For the most part, we know how to validate the "MinimalKeys" elements. ''' # # List of nodes in the system # if key == "nodes": self.Nodes = {} for node in value: # I don't think I need the IP address, etc. but this validates # the node name against /etc/hosts and/or DNS, so it's a # GoodThing(tm). try: self.Nodes[node] = gethostbyname_ex(node) except: print node+" not found in DNS... aborting" raise # # Reset Mechanism # elif key == "reset": if not issubclass(value.__class__, ResetMechanism): raise ValueError("'reset' Value must be a subclass" " of ResetMechanism") # # List of Logging Mechanism(s) # elif key == "logger": if len(value) < 1: raise ValueError("Must have at least one logging mechanism") for logger in value: if not callable(logger): raise ValueError("'logger' elements must be callable") self._logfunctions = value # # Cluster Manager Class # elif key == "CMclass": if not issubclass(value, ClusterManager): raise ValueError("'CMclass' must be a subclass of" " ClusterManager") # # Initial Random seed... # #elif key == "RandSeed": # if len(value) != 3: # raise ValueError("'Randseed' must be a 3-element list/tuple") # for elem in value: # if not isinstance(elem, types.IntType): # raise ValueError("'Randseed' list must all be ints") self.data[key] = value def IsValidNode(self, node): 'Return TRUE if the given node is valid' return self.Nodes.has_key(node) def __CheckNode(self, node): "Raise a ValueError if the given node isn't valid" if not self.IsValidNode(node): raise ValueError("Invalid node [%s] in CheckNode" % node) def RandomNode(self): '''Choose a random node from the cluster''' return self.RandomGen.choice(self["nodes"]) def ResetNode(self, node): "Reset a node, (normally) using a hardware mechanism" self.__CheckNode(node) return self["reset"].reset(node) def ResetNode2(self, init_node, target_node, reasons): self.__CheckNode(target_node) stonithd = Stonithd(self["nodes"]) ret = stonithd.reset(init_node, target_node) if not ret: reasons.append(stonithd.fail_reason) return ret def usage(arg): print "Illegal argument " + arg print "usage: " + sys.argv[0] \ + " --directory config-directory" \ + " -D config-directory" \ + " --logfile system-logfile-name" \ + " --trunc (truncate logfile before starting)" \ + " -L system-logfile-name" \ + " --limit-nodes maxnumnodes" \ + " --xmit-loss lost-rate(0.0-1.0)" \ + " --recv-loss lost-rate(0.0-1.0)" \ + " --suppressmonitoring" \ + " --syslog-facility syslog-facility" \ + " --facility syslog-facility" \ + " --choose testcase-name" \ + " --test-ip-base ip" \ + " --oprofile \"whitespace separated list of nodes to oprofile\"" \ + " (-2 |"\ + " -v2 |"\ + " --crm |"\ + " --classic)"\ + " (--populate-resources | -r)" \ + " --resource-can-stop" \ + " --stonith (1 | 0 | yes | no)" \ + " --stonith-type type" \ + " --stonith-args name=value" \ + " --standby (1 | 0 | yes | no)" \ + " --fencing (1 | 0 | yes | no)" \ + " --suppress_cib_writes (1 | 0 | yes | no)" \ + " -lstests" \ + " --seed" \ + " [number-of-iterations]" sys.exit(1) # # A little test code... # if __name__ == '__main__': from CTSaudits import AuditList from CTStests import TestList,RandomTests from CTS import Scenario, InitClusterManager, PingFest, PacketLoss, BasicSanityCheck import CM_hb HAdir = "@sysconfdir@/ha.d" LogFile = "@HA_VARLOGDIR@/ha-log-"+DefaultFacility DoStonith = 1 DoStandby = 1 DoFencing = 1 NumIter = 500 SuppressMonitoring = None Version = 1 CIBfilename = None CIBResource = 0 ClobberCIB = 0 LimitNodes = 0 TestCase = None LogFacility = None TruncateLog = 0 ResCanStop = 0 XmitLoss = "0.0" RecvLoss = "0.0" IPBase = "127.0.0.10" SuppressCib = 1 DoBSC = 0 ListTests = 0 HaveSeed = 0 oprofile = None warn_inactive = 0 StonithType = "ssh" StonithParams = None StonithParams = "hostlist=dynamic".split('=') + node_list = '' # # The values of the rest of the parameters are now properly derived from # the configuration files. # # Stonith is configurable because it's slow, I have a few machines which # don't reboot very reliably, and it can mild damage to your machine if # you're using a real power switch. # # Standby is configurable because the test is very heartbeat specific # and I haven't written the code to set it properly yet. Patches are # being accepted... # Set the signal handler signal.signal(15, sig_handler) signal.signal(10, sig_handler) # Process arguments... skipthis=None args=sys.argv[1:] for i in range(0, len(args)): if skipthis: skipthis=None continue elif args[i] == "-D" or args[i] == "--directory": skipthis=1 HAdir = args[i+1] elif args[i] == "-l" or args[i] == "--limit-nodes": skipthis=1 LimitNodes = int(args[i+1]) elif args[i] == "-r" or args[i] == "--populate-resources": CIBResource = 1 elif args[i] == "-L" or args[i] == "--logfile": skipthis=1 LogFile = args[i+1] elif args[i] == "--test-ip-base": skipthis=1 IPBase = args[i+1] elif args[i] == "--oprofile": skipthis=1 oprofile = args[i+1].split(' ') elif args[i] == "--trunc": TruncateLog=1 elif args[i] == "-v2": Version=2 elif args[i] == "-lstests": ListTests=1 elif args[i] == "--stonith": skipthis=1 if args[i+1] == "1" or args[i+1] == "yes": DoStonith=1 elif args[i+1] == "0" or args[i+1] == "no": DoStonith=0 else: usage(args[i+1]) elif args[i] == "--stonith-type": StonithType = args[i+1] skipthis=1 elif args[i] == "--stonith-args": StonithParams = args[i+1].split('=') skipthis=1 elif args[i] == "--suppress-cib-writes": skipthis=1 if args[i+1] == "1" or args[i+1] == "yes": SuppressCib=1 elif args[i+1] == "0" or args[i+1] == "no": SuppressCib=0 else: usage(args[i+1]) elif args[i] == "--bsc": DoBSC=1 elif args[i] == "--standby": skipthis=1 if args[i+1] == "1" or args[i+1] == "yes": DoStandby=1 elif args[i+1] == "0" or args[i+1] == "no": DoStandby=0 else: usage(args[i+1]) elif args[i] == "--fencing": skipthis=1 if args[i+1] == "1" or args[i+1] == "yes": DoFencing=1 elif args[i+1] == "0" or args[i+1] == "no": DoFencing=0 else: usage(args[i+1]) elif args[i] == "--suppressmonitoring": SuppressMonitoring = 1 elif args[i] == "--resource-can-stop": ResCanStop = 1 elif args[i] == "-3" or args[i] == "--ais": Version = 3 elif args[i] == "-2" or args[i] == "--crm": Version = 2 elif args[i] == "-1" or args[i] == "--classic": Version = 1 elif args[i] == "--clobber-cib" or args[i] == "-c": ClobberCIB = 1 elif args[i] == "--cib-filename": skipthis=1 CIBfilename = args[i+1] elif args[i] == "--xmit-loss": try: float(args[i+1]) except ValueError: print ("--xmit-loss parameter should be float") usage(args[i+1]) skipthis=1 XmitLoss = args[i+1] elif args[i] == "--recv-loss": try: float(args[i+1]) except ValueError: print ("--recv-loss parameter should be float") usage(args[i+1]) skipthis=1 RecvLoss = args[i+1] elif args[i] == "--choose": skipthis=1 TestCase = args[i+1] + elif args[i] == "--nodes": + skipthis=1 + node_list = args[i+1].split(' ') elif args[i] == "--syslog-facility" or args[i] == "--facility": skipthis=1 LogFacility = args[i+1] elif args[i] == "--seed": skipthis=1 Seed=args[i+1] HaveSeed = 1 elif args[i] == "--warn-inactive": warn_inactive = 1 else: NumIter=int(args[i]) if not oprofile: oprofile = [] # # This reading of HBconfig here is ugly, and I suppose ought to # be done by the Cluster manager. This would probably mean moving the # list of cluster nodes into the ClusterManager class. A good thought # for our Copious Spare Time in the future... # config = CM_hb.HBConfig(HAdir) - node_list = config.Parameters["node"] + if not node_list: + node_list = config.Parameters["node"] + else: + config.Parameters["node"] = node_list if DoBSC: NumIter = 2 Version = 2 while len(node_list) > 1: node_list.pop(len(node_list)-1) if LogFacility == None: if config.Parameters.has_key("logfacility"): LogFacility = config.Parameters["logfacility"][0] else: LogFacility = DefaultFacility if LimitNodes > 0: if len(node_list) > LimitNodes: print("Limiting the number of nodes configured=%d (max=%d)" %(len(node_list), LimitNodes)) while len(node_list) > LimitNodes: node_list.pop(len(node_list)-1) if StonithParams[0] == "hostlist": StonithParams[1] = string.join(node_list, " ") # alt_list = [] # for node in node_list: # alt_list.append(string.lower(node)) # node_list = alt_list Environment = CtsLab(node_list) Environment["HAdir"] = HAdir Environment["ClobberCIB"] = ClobberCIB Environment["CIBfilename"] = CIBfilename Environment["CIBResource"] = CIBResource Environment["LogFileName"] = LogFile Environment["DoStonith"] = DoStonith Environment["SyslogFacility"] = LogFacility Environment["DoStandby"] = DoStandby Environment["DoFencing"] = DoFencing Environment["ResCanStop"] = ResCanStop Environment["SuppressMonitoring"] = SuppressMonitoring Environment["XmitLoss"] = XmitLoss Environment["RecvLoss"] = RecvLoss Environment["IPBase"] = IPBase Environment["SuppressCib"] = SuppressCib Environment["DoBSC"] = 0 Environment["use_logd"] = 0 Environment["logfacility"] = LogFacility Environment["oprofile"] = oprofile Environment["warn-inactive"] = warn_inactive if config.Parameters.has_key("use_logd"): Environment["use_logd"] = 1 if Version == 2: from CM_LinuxHAv2 import LinuxHAv2 Environment['CMclass']=LinuxHAv2 if Version == 3: from CM_ais import crm_ais Environment['CMclass'] = crm_ais Environment["DoStonith"] = 0 Environment["DoFencing"] = 0 Environment["use_logd"] = 0 if HaveSeed: Environment["RandSeed"] = Seed Environment["reset"] = Stonith(sttype=StonithType, pName=StonithParams[0], pValue=StonithParams[1]) if DoBSC: Environment["DoBSC"] = 1 Environment["ClobberCIB"] = 1 Environment["CIBResource"] = 0 Environment["logger"] = (FileLog(Environment), StdErrLog(Environment)) scenario = Scenario([ BasicSanityCheck(Environment) ]) else: scenario = Scenario( [ InitClusterManager(Environment), PacketLoss(Environment)]) Environment.SupplyDefaults() # Your basic start up the world type of test scenario... #scenario = Scenario( #[ InitClusterManager(Environment) #, PingFest(Environment)]) # Create the Cluster Manager object cm = Environment['CMclass'](Environment) if TruncateLog: cm.log("Truncating %s" % LogFile) lf = open(LogFile, "w"); if lf != None: lf.truncate(0) lf.close() cm.log(">>>>>>>>>>>>>>>> BEGINNING " + repr(NumIter) + " TESTS ") cm.log("Version: %d" % Version) cm.log("HA configuration directory: " + Environment["HAdir"]) cm.log("System log files: " + Environment["LogFileName"]) cm.log("Enable Stonith: %d" % Environment["DoStonith"]) cm.log("Enable Fencing: %d" % Environment["DoFencing"]) cm.log("Enable Standby: %d" % Environment["DoStandby"]) cm.log("Enable Resources: %d" % Environment["CIBResource"]) if Environment.has_key("SuppressMonitoring") \ and Environment["SuppressMonitoring"]: cm.log("Resource Monitoring is disabled") cm.ns.WaitForAllNodesToComeUp(config.Parameters["node"]) cm.log("Cluster nodes: ") for node in config.Parameters["node"]: (rc, lines) = cm.rsh.remote_py(node, "os", "system", "@sbindir@/crm_uuid") if not lines: cm.log(" * %s: __undefined_uuid__" % node) else: out=lines[0] out = out[:-1] cm.log(" * %s: %s" % (node, out)) Audits = AuditList(cm) Tests = [] if Environment["DoBSC"]: test = BSC_AddResource(cm) Tests.append(test) elif TestCase != None: for test in TestList(cm): if test.name == TestCase: Tests.append(test) if Tests == []: usage("--choose: No applicable/valid tests chosen") else: Tests = TestList(cm) if ListTests == 1 : cm.log("Total %d tests"%len(Tests)) for test in Tests : cm.log(str(test.name)); sys.exit(0) tests = RandomTests(scenario, cm, Tests, Audits) Environment.RandomTests = tests try : overall, detailed = tests.run(NumIter) except : cm.Env.log("Exception by %s" % sys.exc_info()[0]) for logmethod in Environment["logger"]: traceback.print_exc(50, logmethod) tests.summarize() if tests.Stats["failure"] > 0: sys.exit(tests.Stats["failure"]) elif tests.Stats["success"] != NumIter: cm.Env.log("No failure count but success != requested iterations") sys.exit(1) diff --git a/doc/Makefile.am b/doc/Makefile.am index f704fd5635..2b01a9c831 100644 --- a/doc/Makefile.am +++ b/doc/Makefile.am @@ -1,53 +1,53 @@ # # doc: Linux-HA heartbeat code # # Copyright (C) 2001 Michael Moerz # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in docdir = $(datadir)/doc/pacakges/@HB_PKG@-@VERSION@ htmlfiles = HardwareGuide.html \ GettingStarted.html \ Requirements.html \ faqntips.html \ heartbeat_api.html \ rsync.html txtfiles = $(htmlfiles:.html=.txt) CLEANFILES = ChangeLog $(txtfiles) -SPECSRC = $(top_builddir)/heartbeat.spec +SPECSRC = $(top_builddir)/pacemaker.spec OTHER_DOCS = AUTHORS COPYING COPYING.LGPL ChangeLog \ README authkeys ha.cf startstop haresources \ DirectoryMap.txt apphbd.cf logd.cf #doc_DATA = $(OTHER_DOCS) $(txtfiles) $(htmlfiles) man_MANS = cibadmin.8 crm_resource.8 EXTRA_DIST = $(man_MANS) ChangeLog: $(SPECSRC) rm -fr ChangeLog sed -e '1,/^%changelog/d' -e '/^%/,$$d' < $(SPECSRC) > $@ %.txt: %.html if [ "X$(HTML2TXT)" = "X" ]; then echo "Lynx or w3m or user-defined HTML2TXT required to convert $< to $@" >$@ ; else $(HTML2TXT) -dump $< >$@ ; fi diff --git a/include/crm/common/Makefile.am b/fencing/Makefile.am similarity index 81% copy from include/crm/common/Makefile.am copy to fencing/Makefile.am index c07bc616d5..db0a495be7 100644 --- a/include/crm/common/Makefile.am +++ b/fencing/Makefile.am @@ -1,23 +1,20 @@ -# -# Copyright (C) 2004 Andrew Beekhof +# Author: Sun Jiang Dong +# Copyright (c) 2004 International Business Machines # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # - MAINTAINERCLEANFILES = Makefile.in -headerdir=$(includedir)/@PKG_NAME@/crm/common - -header_HEADERS = xml.h ipc.h msg.h ctrl.h util.h iso8601.h +SUBDIRS = stonithd test diff --git a/include/Makefile.am b/include/Makefile.am index 4464602d19..1d18390e18 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -1,64 +1,64 @@ # # linux-ha: Linux-HA heartbeat code # # Copyright (C) 2001 Michael Moerz # This instance created by Horms # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in ha_version.h config.h.in -EXTRA_DIST = ha_version.h +EXTRA_DIST = ha_version.h crm_internal.h headerdir=$(includedir)/@PKG_NAME@ noinst_HEADERS = portability.h config.h ha_version.h header_HEADERS = crm_config.h -SUBDIRS = crm +SUBDIRS = crm fencing ## The backtick commands are not executed here, ## but rather as macro-expansions at use within the rules. HG_LIVE_VERSION=`$(HG) -R "$(top_srcdir)" id` ARCHIVE_VERSION="$(top_srcdir)/.hg_archival.txt" HG_TAR_VERSION=`$(EGREP) node: "$(ARCHIVE_VERSION)"` ha_version.h: $(ARCHIVE_VERSION) if [ -r ha_version.h -a ! -w ha_version.h ]; then \ hgv=""; \ echo "Saved Version"; \ elif [ -f $(ARCHIVE_VERSION) ]; then \ hgv="$(HG_TAR_VERSION)"; \ echo "Hg Archived Version: $${hgv}"; \ elif [ -x $(HG) -a -d $(top_srcdir)/.hg ]; then \ hgv="$(HG_LIVE_VERSION)"; \ echo "Hg Live Version: $${hgv}"; \ elif [ -r ha_version.h ]; then \ hgv=""; \ echo "Hg Saved Live Version"; \ cat ha_version.h; \ else \ hgv="Unknown"; \ echo "Unknown Hg Version"; \ fi ; \ if [ X"$${hgv}" != "X" ]; then \ echo "/* $${hgv} */" > ha_version.h; \ echo "#define HA_HG_VERSION \"$${hgv}\"" >> ha_version.h; \ fi .PHONY: $(ARCHIVE_VERSION) diff --git a/include/crm/Makefile.am b/include/crm/Makefile.am index b8b35f59df..0951f0bee2 100644 --- a/include/crm/Makefile.am +++ b/include/crm/Makefile.am @@ -1,24 +1,24 @@ # # Copyright (C) 2004 Andrew Beekhof # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in headerdir=$(includedir)/@PKG_NAME@/crm -header_HEADERS = crm.h cib.h msg_xml.h transition.h ais.h +header_HEADERS = crm.h cib.h msg_xml.h transition.h ais.h ais_common.h SUBDIRS = common pengine diff --git a/include/crm/ais.h b/include/crm/ais.h index d14bd5206d..60be30ea76 100644 --- a/include/crm/ais.h +++ b/include/crm/ais.h @@ -1,39 +1,34 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef CRM_AIS__H #define CRM_AIS__H #include #include #include -#if SUPPORT_AIS -#include -#include -#include -#endif #include extern enum crm_ais_msg_types text2msg_type(const char *text); extern enum crm_ais_msg_types crm_system_type; extern char *get_ais_data(AIS_Message *msg); extern gboolean check_message_sanity(AIS_Message *msg, char *data); #endif diff --git a/include/crm/ais_common.h b/include/crm/ais_common.h index fd32df7248..483fbcaa62 100644 --- a/include/crm/ais_common.h +++ b/include/crm/ais_common.h @@ -1,300 +1,339 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef CRM_AIS_COMMON__H #define CRM_AIS_COMMON__H #include #include #include #if SUPPORT_AIS -# include -# include -# include +# ifdef AIS_WHITETANK +/* cheap hacks for building against the stable series of openais */ + +# include + +enum service_types { + EVS_SERVICE = 0, + CLM_SERVICE = 1, + AMF_SERVICE = 2, + CKPT_SERVICE = 3, + EVT_SERVICE = 4, + LCK_SERVICE = 5, + MSG_SERVICE = 6, + CFG_SERVICE = 7, + CPG_SERVICE = 8 +}; + +typedef struct { + int size; __attribute__((aligned(8))) + int id __attribute__((aligned(8))); + SaAisErrorT error __attribute__((aligned(8))); +} mar_res_header_t __attribute__((aligned(8))); + +typedef struct { + int size __attribute__((aligned(8))); + int id __attribute__((aligned(8))); +} mar_req_header_t __attribute__((aligned(8))); + +extern SaAisErrorT saSendReceiveReply ( + int s, void *requestMessage, int requestLen, void *responseMessage, int responseLen); +extern SaAisErrorT saRecvRetry (int s, void *msg, size_t len); +extern SaAisErrorT saServiceConnect (int *responseOut, int *callbackOut, enum service_types service); + +# else +# include +# include +# include +# endif #else typedef struct { int size __attribute__((aligned(8))); int id __attribute__((aligned(8))); } mar_req_header_t __attribute__((aligned(8))); typedef struct { int size; __attribute__((aligned(8))) int id __attribute__((aligned(8))); int error __attribute__((aligned(8))); } mar_res_header_t __attribute__((aligned(8))); #endif #define MAX_NAME 256 #define AIS_IPC_NAME "ais-crm-ipc" #define CRM_NODE_LOST "lost" #define CRM_NODE_MEMBER "member" #define CRM_NODE_ACTIVE CRM_NODE_MEMBER #define CRM_NODE_INACTIVE CRM_NODE_LOST #define CRM_NODE_EVICTED "evicted" typedef struct crm_ais_host_s AIS_Host; typedef struct crm_ais_msg_s AIS_Message; enum crm_ais_msg_class { crm_class_cluster = 0, crm_class_members = 1, crm_class_notify = 2, }; /* order here matters - its used to index into the crm_children array */ enum crm_ais_msg_types { - crm_msg_none = 0, - crm_msg_ais = 1, - crm_msg_lrmd = 2, - crm_msg_cib = 3, - crm_msg_crmd = 4, - crm_msg_te = 5, - crm_msg_pe = 6, - crm_msg_attrd = 7, + crm_msg_none = 0, + crm_msg_ais = 1, + crm_msg_lrmd = 2, + crm_msg_cib = 3, + crm_msg_crmd = 4, + crm_msg_attrd = 5, + crm_msg_stonithd = 6, + crm_msg_te = 7, + crm_msg_pe = 8, }; enum crm_proc_flag { crm_proc_none = 0x00000001, crm_proc_ais = 0x00000002, crm_proc_lrmd = 0x00000010, - crm_proc_stonith = 0x00000020, crm_proc_cib = 0x00000100, crm_proc_crmd = 0x00000200, - crm_proc_pe = 0x00001000, - crm_proc_te = 0x00002000, - crm_proc_attrd = 0x00010000, + crm_proc_attrd = 0x00001000, + crm_proc_stonithd = 0x00002000, + crm_proc_pe = 0x00010000, + crm_proc_te = 0x00020000, }; typedef struct crm_peer_node_s { unsigned int id; unsigned long long born; unsigned long long last_seen; int32_t votes; uint32_t processes; char *uname; char *state; char *uuid; char *addr; char *version; } crm_node_t; struct crm_ais_host_s { uint32_t id; uint32_t pid; gboolean local; enum crm_ais_msg_types type; uint32_t size; char uname[256]; } __attribute__((packed)); struct crm_ais_msg_s { mar_res_header_t header __attribute__((aligned(8))); uint32_t id; gboolean is_compressed; AIS_Host host; AIS_Host sender; uint32_t size; uint32_t compressed_size; /* 584 bytes */ char data[0]; } __attribute__((packed)); #if SUPPORT_AIS static inline const char *ais_error2text(SaAisErrorT error) { const char *text = "unknown"; switch(error) { case SA_AIS_OK: text = "None"; break; case SA_AIS_ERR_LIBRARY: text = "Library error"; break; case SA_AIS_ERR_VERSION: text = "Version error"; break; case SA_AIS_ERR_INIT: text = "Initialization error"; break; case SA_AIS_ERR_TIMEOUT: text = "Timeout"; break; case SA_AIS_ERR_TRY_AGAIN: text = "Try again"; break; case SA_AIS_ERR_INVALID_PARAM: text = "Invalid parameter"; break; case SA_AIS_ERR_NO_MEMORY: text = "No memory"; break; case SA_AIS_ERR_BAD_HANDLE: text = "Bad handle"; break; case SA_AIS_ERR_BUSY: text = "Busy"; break; case SA_AIS_ERR_ACCESS: text = "Access error"; break; case SA_AIS_ERR_NOT_EXIST: text = "Doesn't exist"; break; case SA_AIS_ERR_NAME_TOO_LONG: text = "Name too long"; break; case SA_AIS_ERR_EXIST: text = "Exists"; break; case SA_AIS_ERR_NO_SPACE: text = "No space"; break; case SA_AIS_ERR_INTERRUPT: text = "Interrupt"; break; case SA_AIS_ERR_NAME_NOT_FOUND: text = "Name not found"; break; case SA_AIS_ERR_NO_RESOURCES: text = "No resources"; break; case SA_AIS_ERR_NOT_SUPPORTED: text = "Not supported"; break; case SA_AIS_ERR_BAD_OPERATION: text = "Bad operation"; break; case SA_AIS_ERR_FAILED_OPERATION: text = "Failed operation"; break; case SA_AIS_ERR_MESSAGE_ERROR: text = "Message error"; break; case SA_AIS_ERR_QUEUE_FULL: text = "Queue full"; break; case SA_AIS_ERR_QUEUE_NOT_AVAILABLE: text = "Queue not available"; break; case SA_AIS_ERR_BAD_FLAGS: text = "Bad flags"; break; case SA_AIS_ERR_TOO_BIG: text = "To big"; break; case SA_AIS_ERR_NO_SECTIONS: text = "No sections"; break; } return text; } #endif static inline const char *msg_type2text(enum crm_ais_msg_types type) { const char *text = "unknown"; switch(type) { case crm_msg_none: text = "unknown"; break; case crm_msg_ais: text = "ais"; break; case crm_msg_cib: text = "cib"; break; case crm_msg_crmd: text = "crmd"; break; case crm_msg_pe: text = "pengine"; break; case crm_msg_te: text = "tengine"; break; case crm_msg_lrmd: text = "lrmd"; break; case crm_msg_attrd: text = "attrd"; break; + case crm_msg_stonithd: + text = "stonithd"; + break; } return text; } static inline const char *peer2text(enum crm_proc_flag proc) { const char *text = "unknown"; switch(proc) { case crm_proc_none: text = "unknown"; break; case crm_proc_ais: text = "ais"; break; case crm_proc_cib: text = "cib"; break; case crm_proc_crmd: text = "crmd"; break; case crm_proc_pe: text = "pengine"; break; case crm_proc_te: text = "tengine"; break; case crm_proc_lrmd: text = "lrmd"; break; case crm_proc_attrd: text = "attrd"; break; - case crm_proc_stonith: - text = "stonith"; + case crm_proc_stonithd: + text = "stonithd"; break; } return text; } static inline const char *ais_dest(struct crm_ais_host_s *host) { if(host->local) { return "local"; } else if(host->size > 0) { return host->uname; } else { return ""; } } #define ais_data_len(msg) (msg->is_compressed?msg->compressed_size:msg->size) #endif diff --git a/include/crm/common/Makefile.am b/include/crm/common/Makefile.am index c07bc616d5..34ba227eab 100644 --- a/include/crm/common/Makefile.am +++ b/include/crm/common/Makefile.am @@ -1,23 +1,23 @@ # # Copyright (C) 2004 Andrew Beekhof # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in headerdir=$(includedir)/@PKG_NAME@/crm/common -header_HEADERS = xml.h ipc.h msg.h ctrl.h util.h iso8601.h +header_HEADERS = xml.h ipc.h msg.h ctrl.h cluster.h util.h iso8601.h diff --git a/include/crm/crm.h b/include/crm/crm.h index 78150cdb33..4c8bb165e6 100644 --- a/include/crm/crm.h +++ b/include/crm/crm.h @@ -1,323 +1,324 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef CRM__H #define CRM__H #include #include #include #undef MIN #undef MAX #include #include #include #ifdef MCHECK #include #endif #define EOS '\0' #define DIMOF(a) ((int) (sizeof(a)/sizeof(a[0])) ) #define HAURL(url) HA_URLBASE url #ifndef CRM_DEV_BUILD # define CRM_DEV_BUILD 0 #endif #define CRM_DEPRECATED_SINCE_2_0_1 0 #define CRM_DEPRECATED_SINCE_2_0_2 0 #define CRM_DEPRECATED_SINCE_2_0_3 0 #define CRM_DEPRECATED_SINCE_2_0_4 0 #define CRM_DEPRECATED_SINCE_2_0_5 0 #define CRM_DEPRECATED_SINCE_2_0_6 1 #define CRM_DEPRECATED_SINCE_2_0_7 1 #define CRM_DEPRECATED_SINCE_2_0_8 1 #define CRM_DEPRECATED_SINCE_2_1_0 1 #define CRM_META "CRM_meta" #define crm_meta_name(field) CRM_META"_"field #define ipc_call_diff_max_ms 5000 #define action_diff_warn_ms 5000 #define action_diff_max_ms 20000 #define fsa_diff_warn_ms 10000 #define fsa_diff_max_ms 30000 #include #define CRM_ASSERT(expr) if((expr) == FALSE) { \ crm_abort(__FILE__, __PRETTY_FUNCTION__, __LINE__, #expr, TRUE, FALSE); \ } extern gboolean crm_assert_failed; #define CRM_DEV_ASSERT(expr) \ crm_assert_failed = FALSE; \ if((expr) == FALSE) { \ crm_assert_failed = TRUE; \ crm_abort(__FILE__,__PRETTY_FUNCTION__,__LINE__, #expr, FALSE, TRUE); \ } #define CRM_CHECK(expr, failure_action) if((expr) == FALSE) { \ crm_abort(__FILE__,__PRETTY_FUNCTION__,__LINE__, #expr, FALSE, TRUE); \ failure_action; \ } #define CRM_CHECK_AND_STORE(expr, failure_action) if((expr) == FALSE) { \ crm_abort(__FILE__,__PRETTY_FUNCTION__,__LINE__, #expr, TRUE, TRUE); \ failure_action; \ } extern const char *crm_system_name; /* Clean these up at some point, some probably should be runtime options */ #define WORKING_DIR HA_VARLIBDIR"/heartbeat/crm" #define CRM_SOCK_DIR HA_VARRUNDIR"/heartbeat/crm" #define BIN_DIR HA_LIBDIR"/heartbeat" #define SOCKET_LEN 1024 #define APPNAME_LEN 256 #define MAX_IPC_FAIL 5 #define CIB_FILENAME WORKING_DIR"/cib.xml" #define CIB_BACKUP WORKING_DIR"/cib_backup.xml" #define CRM_FEATURE_SET "2.1" #define MSG_LOG 1 #define DOT_FSA_ACTIONS 1 #define DOT_ALL_FSA_INPUTS 1 /* #define FSA_TRACE 1 */ #define INFINITY_S "INFINITY" #define MINUS_INFINITY_S "-INFINITY" #define INFINITY 1000000 /* Sub-systems */ #define CRM_SYSTEM_DC "dc" #define CRM_SYSTEM_DCIB "dcib" /* The master CIB */ #define CRM_SYSTEM_CIB "cib" #define CRM_SYSTEM_CRMD "crmd" #define CRM_SYSTEM_LRMD "lrmd" #define CRM_SYSTEM_PENGINE "pengine" #define CRM_SYSTEM_TENGINE "tengine" +#define CRM_SYSTEM_STONITHD "stonithd" /* Valid operations */ #define CRM_OP_NOOP "noop" #define CRM_OP_JOIN_ANNOUNCE "join_announce" #define CRM_OP_JOIN_OFFER "join_offer" #define CRM_OP_JOIN_REQUEST "join_request" #define CRM_OP_JOIN_ACKNAK "join_ack_nack" #define CRM_OP_JOIN_CONFIRM "join_confirm" #define CRM_OP_DIE "die_no_respawn" #define CRM_OP_RETRIVE_CIB "retrieve_cib" #define CRM_OP_PING "ping" #define CRM_OP_VOTE "vote" #define CRM_OP_NOVOTE "no-vote" #define CRM_OP_HELLO "hello" #define CRM_OP_HBEAT "dc_beat" #define CRM_OP_PECALC "pe_calc" #define CRM_OP_ABORT "abort" #define CRM_OP_QUIT "quit" #define CRM_OP_LOCAL_SHUTDOWN "start_shutdown" #define CRM_OP_SHUTDOWN_REQ "req_shutdown" #define CRM_OP_SHUTDOWN "do_shutdown" #define CRM_OP_FENCE "stonith" #define CRM_OP_EVENTCC "event_cc" #define CRM_OP_TEABORT "te_abort" #define CRM_OP_TEABORTED "te_abort_confirmed" /* we asked */ #define CRM_OP_TE_HALT "te_halt" #define CRM_OP_TECOMPLETE "te_complete" #define CRM_OP_TETIMEOUT "te_timeout" #define CRM_OP_TRANSITION "transition" #define CRM_OP_REGISTER "register" #define CRM_OP_DEBUG_UP "debug_inc" #define CRM_OP_DEBUG_DOWN "debug_dec" #define CRM_OP_INVOKE_LRM "lrm_invoke" #define CRM_OP_LRM_REFRESH "lrm_refresh" #define CRM_OP_LRM_QUERY "lrm_query" #define CRM_OP_LRM_DELETE "lrm_delete" #define CRM_OP_LRM_FAIL "lrm_fail" #define CRM_OP_PROBED "probe_complete" #define CRM_OP_REPROBE "probe_again" #define CRMD_STATE_ACTIVE "member" #define CRMD_STATE_INACTIVE "down" #define CRMD_JOINSTATE_DOWN CRMD_STATE_INACTIVE #define CRMD_JOINSTATE_PENDING "pending" #define CRMD_JOINSTATE_MEMBER CRMD_STATE_ACTIVE #define CRMD_ACTION_DELETE "delete" #define CRMD_ACTION_CANCEL "cancel" #define CRMD_ACTION_MIGRATE "migrate_to" #define CRMD_ACTION_MIGRATED "migrate_from" #define CRMD_ACTION_START "start" #define CRMD_ACTION_STARTED "running" #define CRMD_ACTION_STOP "stop" #define CRMD_ACTION_STOPPED "stopped" #define CRMD_ACTION_PROMOTE "promote" #define CRMD_ACTION_PROMOTED "promoted" #define CRMD_ACTION_DEMOTE "demote" #define CRMD_ACTION_DEMOTED "demoted" #define CRMD_ACTION_NOTIFY "notify" #define CRMD_ACTION_NOTIFIED "notified" #define CRMD_ACTION_STATUS "monitor" typedef GList* GListPtr; #define slist_destroy(child_type, child, parent, a) \ { \ GListPtr __crm_iter_head = parent; \ child_type *child = NULL; \ while(__crm_iter_head != NULL) { \ child = (child_type *) __crm_iter_head->data; \ __crm_iter_head = __crm_iter_head->next; \ { a; } \ } \ g_list_free(parent); \ } #define slist_iter(child, child_type, parent, counter, a) \ { \ GListPtr __crm_iter_head = parent; \ child_type *child = NULL; \ int counter = 0; \ for(; __crm_iter_head != NULL; counter++) { \ child = (child_type *) __crm_iter_head->data; \ __crm_iter_head = __crm_iter_head->next; \ { a; } \ } \ } #define LOG_DEBUG_2 LOG_DEBUG+1 #define LOG_DEBUG_3 LOG_DEBUG+2 #define LOG_DEBUG_4 LOG_DEBUG+3 #define LOG_DEBUG_5 LOG_DEBUG+4 #define LOG_DEBUG_6 LOG_DEBUG+5 #define LOG_MSG LOG_DEBUG_3 /* * Throughout the macros below, note the leading, pre-comma, space in the * various ' , ##args' occurences to aid portability across versions of 'gcc'. * http://gcc.gnu.org/onlinedocs/cpp/Variadic-Macros.html#Variadic-Macros */ #define do_crm_log(level, fmt, args...) do { \ if(crm_log_level < (level)) { \ continue; \ } else if((level) > LOG_DEBUG) { \ cl_log(LOG_DEBUG, "debug%d: %s: " fmt, \ level-LOG_INFO, __PRETTY_FUNCTION__ , ##args); \ } else { \ cl_log(level, "%s: " fmt, \ __PRETTY_FUNCTION__ , ##args); \ } \ } while(0) #define crm_crit(fmt, args...) do_crm_log(LOG_CRIT, fmt , ##args) #define crm_err(fmt, args...) do_crm_log(LOG_ERR, fmt , ##args) #define crm_warn(fmt, args...) do_crm_log(LOG_WARNING, fmt , ##args) #define crm_notice(fmt, args...) do_crm_log(LOG_NOTICE, fmt , ##args) #define crm_info(fmt, args...) do_crm_log(LOG_INFO, fmt , ##args) #define crm_debug(fmt, args...) do_crm_log(LOG_DEBUG, fmt , ##args) #define crm_debug_2(fmt, args...) do_crm_log(LOG_DEBUG_2, fmt , ##args) #define crm_debug_3(fmt, args...) do_crm_log(LOG_DEBUG_3, fmt , ##args) #define crm_debug_4(fmt, args...) do_crm_log(LOG_DEBUG_4, fmt , ##args) #define crm_debug_5(fmt, args...) do_crm_log(LOG_DEBUG_5, fmt , ##args) #define crm_debug_6(fmt, args...) do_crm_log(LOG_DEBUG_6, fmt , ##args) extern void crm_log_message_adv( int level, const char *alt_debugfile, const HA_Message *msg); #define crm_log_message(level, msg) if(crm_log_level >= (level)) { \ crm_log_message_adv(level, NULL, msg); \ } #define crm_log_xml(level, text, xml) if(crm_log_level >= (level)) { \ print_xml_formatted(level, __PRETTY_FUNCTION__, xml, text); \ } #define crm_log_xml_crit(xml, text) crm_log_xml(LOG_CRIT, text, xml) #define crm_log_xml_err(xml, text) crm_log_xml(LOG_ERR, text, xml) #define crm_log_xml_warn(xml, text) crm_log_xml(LOG_WARNING, text, xml) #define crm_log_xml_notice(xml, text) crm_log_xml(LOG_NOTICE, text, xml) #define crm_log_xml_info(xml, text) crm_log_xml(LOG_INFO, text, xml) #define crm_log_xml_debug(xml, text) crm_log_xml(LOG_DEBUG, text, xml) #define crm_log_xml_debug_2(xml, text) crm_log_xml(LOG_DEBUG_2, text, xml) #define crm_log_xml_debug_3(xml, text) crm_log_xml(LOG_DEBUG_3, text, xml) #define crm_log_xml_debug_4(xml, text) crm_log_xml(LOG_DEBUG_4, text, xml) #define crm_log_xml_debug_5(xml, text) crm_log_xml(LOG_DEBUG_5, text, xml) #define crm_str(x) (const char*)(x?x:"") #if CRM_DEV_BUILD # define crm_malloc0(malloc_obj, length) do { \ if(malloc_obj) { \ crm_err("Potential memory leak:" \ " %s at %s:%d not NULL before alloc.", \ #malloc_obj, __FILE__, __LINE__); \ } \ malloc_obj = cl_malloc(length); \ if(malloc_obj == NULL) { \ crm_err("Failed allocation of %lu bytes", (unsigned long)length); \ CRM_ASSERT(malloc_obj != NULL); \ } \ memset(malloc_obj, 0, length); \ } while(0) /* it's not a memory leak to already have an object to realloc, that's * the usual case, however if it does have a value, it must have been * allocated by the same allocator! */ # define crm_realloc(realloc_obj, length) do { \ if (realloc_obj != NULL) { \ CRM_ASSERT(cl_is_allocated(realloc_obj) == 1); \ } \ realloc_obj = cl_realloc(realloc_obj, length); \ CRM_ASSERT(realloc_obj != NULL); \ } while(0) # define crm_free(free_obj) if(free_obj) { \ CRM_ASSERT(cl_is_allocated(free_obj) == 1); \ cl_free(free_obj); \ free_obj=NULL; \ } #else # define crm_malloc0(malloc_obj, length) do { \ malloc_obj = cl_malloc(length); \ if(malloc_obj == NULL) { \ crm_err("Failed allocation of %lu bytes", (unsigned long)length); \ CRM_ASSERT(malloc_obj != NULL); \ } \ memset(malloc_obj, 0, length); \ } while(0) # define crm_realloc(realloc_obj, length) do { \ realloc_obj = cl_realloc(realloc_obj, length); \ CRM_ASSERT(realloc_obj != NULL); \ } while(0) # define crm_free(free_obj) if(free_obj) { cl_free(free_obj); free_obj=NULL; } #endif #define crm_msg_del(msg) if(msg != NULL) { ha_msg_del(msg); msg = NULL; } #define crm_strdup(str) crm_strdup_fn(str, __FILE__, __PRETTY_FUNCTION__, __LINE__) #endif diff --git a/lib/Makefile.am b/lib/Makefile.am index 386f183668..8f1f9368aa 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -1,34 +1,34 @@ # # heartbeat library directory: Linux-HA code # # Copyright (C) 2001 Alan Robertson # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in # # PILS is supposed to be an independent library # Stonith should rely only on PILS # # The rest can use clplumbing, and the plugins should be able # to use any of the libraries (nothing can be linked against them) # # ## Subdirectories... -SUBDIRS = crm +SUBDIRS = crm fencing plugins diff --git a/lib/crm/common/Makefile.am b/lib/crm/common/Makefile.am index 7cf81af65d..63ae459709 100644 --- a/lib/crm/common/Makefile.am +++ b/lib/crm/common/Makefile.am @@ -1,51 +1,51 @@ # # Copyright (C) 2004 Andrew Beekhof # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ -I$(top_builddir)/libltdl -I$(top_srcdir)/libltdl \ -I$(AISPREFIX)/include/openais ## libraries lib_LTLIBRARIES = libcrmcommon.la libcrmcluster.la -libcrmcluster_la_SOURCES = cluster.c membership.c +libcrmcluster_la_SOURCES = cluster.c membership.c stack.h if BUILD_AIS_SUPPORT libcrmcluster_la_SOURCES += ais.c endif if BUILD_HEARTBEAT_SUPPORT libcrmcluster_la_SOURCES += heartbeat.c endif libcrmcluster_la_LDFLAGS = -version-info 1:0:0 libcrmcommon_la_SOURCES = ipc.c utils.c xml.c ctrl.c iso8601.c \ iso8601_fields.c libcrmcommon_la_LDFLAGS = -version-info 2:0:0 clean-generic: rm -f *.log *.debug *.xml *~ install-exec-local: uninstall-local: diff --git a/lib/crm/common/ais.c b/lib/crm/common/ais.c index 440d0d7651..403a716b79 100644 --- a/lib/crm/common/ais.c +++ b/lib/crm/common/ais.c @@ -1,499 +1,503 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include "stack.h" #include enum crm_ais_msg_types text2msg_type(const char *text) { int type = crm_msg_none; CRM_CHECK(text != NULL, return type); if(safe_str_eq(text, "ais")) { type = crm_msg_ais; } else if(safe_str_eq(text, "crm_plugin")) { type = crm_msg_ais; } else if(safe_str_eq(text, CRM_SYSTEM_CIB)) { type = crm_msg_cib; } else if(safe_str_eq(text, CRM_SYSTEM_CRMD)) { type = crm_msg_crmd; } else if(safe_str_eq(text, CRM_SYSTEM_DC)) { type = crm_msg_crmd; } else if(safe_str_eq(text, CRM_SYSTEM_TENGINE)) { type = crm_msg_te; } else if(safe_str_eq(text, CRM_SYSTEM_PENGINE)) { type = crm_msg_pe; } else if(safe_str_eq(text, CRM_SYSTEM_LRMD)) { type = crm_msg_lrmd; + } else if(safe_str_eq(text, CRM_SYSTEM_STONITHD)) { + type = crm_msg_stonithd; + } else if(safe_str_eq(text, "attrd")) { + type = crm_msg_attrd; } else { crm_debug_2("Unknown message type: %s", text); } return type; } char *get_ais_data(AIS_Message *msg) { int rc = BZ_OK; char *uncompressed = NULL; unsigned int new_size = msg->size; if(msg->is_compressed == FALSE) { crm_debug_2("Returning uncompressed message data"); uncompressed = strdup(msg->data); } else { crm_debug_2("Decompressing message data"); crm_malloc0(uncompressed, new_size); rc = BZ2_bzBuffToBuffDecompress( uncompressed, &new_size, msg->data, msg->compressed_size, 1, 0); CRM_ASSERT(rc = BZ_OK); CRM_ASSERT(new_size == msg->size); } return uncompressed; } #if SUPPORT_AIS int ais_fd_sync = -1; static int ais_fd_async = -1; /* never send messages via this channel */ GFDSource *ais_source = NULL; GFDSource *ais_source_sync = NULL; gboolean send_ais_text(int class, const char *data, gboolean local, const char *node, enum crm_ais_msg_types dest) { int retries = 0; static int msg_id = 0; static int local_pid = 0; int rc = SA_AIS_OK; mar_res_header_t header; AIS_Message *ais_msg = NULL; enum crm_ais_msg_types sender = text2msg_type(crm_system_name); if(local_pid == 0) { local_pid = getpid(); } CRM_CHECK(data != NULL, return FALSE); crm_malloc0(ais_msg, sizeof(AIS_Message)); ais_msg->id = msg_id++; ais_msg->header.id = class; ais_msg->host.type = dest; ais_msg->host.local = local; if(node) { ais_msg->host.size = strlen(node); memset(ais_msg->host.uname, 0, MAX_NAME); memcpy(ais_msg->host.uname, node, ais_msg->host.size); ais_msg->host.id = 0; } else { ais_msg->host.size = 0; memset(ais_msg->host.uname, 0, MAX_NAME); ais_msg->host.id = 0; } ais_msg->sender.type = sender; ais_msg->sender.pid = local_pid; ais_msg->sender.size = 0; memset(ais_msg->sender.uname, 0, MAX_NAME); ais_msg->sender.id = 0; ais_msg->size = 1 + strlen(data); if(ais_msg->size < 5120) { failback: crm_realloc(ais_msg, sizeof(AIS_Message) + ais_msg->size); memcpy(ais_msg->data, data, ais_msg->size); } else { char *compressed = NULL; char *uncompressed = crm_strdup(data); unsigned int len = (ais_msg->size * 1.1) + 600; /* recomended size */ crm_debug_5("Compressing message payload"); crm_malloc0(compressed, len); rc = BZ2_bzBuffToBuffCompress( compressed, &len, uncompressed, ais_msg->size, 3, 0, 30); crm_free(uncompressed); if(rc != BZ_OK) { crm_err("Compression failed: %d", rc); crm_free(compressed); goto failback; } crm_realloc(ais_msg, sizeof(AIS_Message) + len + 1); memcpy(ais_msg->data, compressed, len); crm_free(compressed); ais_msg->is_compressed = TRUE; ais_msg->compressed_size = len; crm_debug("Compression details: %d -> %d", ais_msg->size, ais_data_len(ais_msg)); } ais_msg->header.size = sizeof(AIS_Message) + ais_data_len(ais_msg); crm_debug("Sending%s message %d to %s.%s (data=%d, total=%d)", ais_msg->is_compressed?" compressed":"", ais_msg->id, ais_dest(&(ais_msg->host)), msg_type2text(dest), ais_data_len(ais_msg), ais_msg->header.size); retry: errno = 0; rc = saSendReceiveReply(ais_fd_sync, ais_msg, ais_msg->header.size, &header, sizeof (mar_res_header_t)); if(rc == SA_AIS_OK) { CRM_CHECK(header.size == sizeof (mar_res_header_t), crm_err("Odd message: id=%d, size=%d, error=%d", header.id, header.size, header.error)); CRM_CHECK(header.error == SA_AIS_OK, rc = header.error); } if(rc == SA_AIS_ERR_TRY_AGAIN && retries < 20) { retries++; crm_info("Peer overloaded: Re-sending message (Attempt %d of 20)", retries); mssleep(retries * 100); /* Proportional back off */ goto retry; } if(rc != SA_AIS_OK) { cl_perror("Sending message %d: FAILED (rc=%d): %s", ais_msg->id, rc, ais_error2text(rc)); ais_fd_async = -1; } else { crm_debug_4("Message %d: sent", ais_msg->id); } crm_free(ais_msg); return (rc == SA_AIS_OK); } gboolean send_ais_message(crm_data_t *msg, gboolean local, const char *node, enum crm_ais_msg_types dest) { gboolean rc = TRUE; char *data = NULL; if(ais_fd_async < 0 || ais_source == NULL) { crm_err("Not connected to AIS"); return FALSE; } if(cl_get_string(msg, F_XML_TAGNAME) == NULL) { ha_msg_add(msg, F_XML_TAGNAME, "ais_msg"); } data = dump_xml_unformatted(msg); rc = send_ais_text(0, data, local, node, dest); crm_free(data); return rc; } void terminate_ais_connection(void) { close(ais_fd_sync); close(ais_fd_async); crm_notice("Disconnected from AIS"); /* G_main_del_fd(ais_source); */ /* G_main_del_fd(ais_source_sync); */ } static gboolean ais_dispatch(int sender, gpointer user_data) { char *data = NULL; char *uncompressed = NULL; AIS_Message *msg = NULL; SaAisErrorT rc = SA_AIS_OK; mar_res_header_t *header = NULL; static int header_len = sizeof(mar_res_header_t); gboolean (*dispatch)(AIS_Message*,char*,int) = user_data; crm_malloc0(header, header_len); errno = 0; rc = saRecvRetry(sender, header, header_len); if (rc != SA_AIS_OK) { cl_perror("Receiving message header failed: (%d) %s", rc, ais_error2text(rc)); goto bail; } else if(header->size == header_len) { crm_err("Empty message: id=%d, size=%d, error=%d, header_len=%d", header->id, header->size, header->error, header_len); goto done; } else if(header->size == 0 || header->size < header_len) { crm_err("Mangled header: size=%d, header=%d, error=%d", header->size, header_len, header->error); goto done; } else if(header->error != 0) { crm_err("Header contined error: %d", header->error); } crm_debug_2("Looking for %d (%d - %d) more bytes", header->size - header_len, header->size, header_len); crm_realloc(header, header->size); /* Use a char* so we can store the remainder into an offset */ data = (char*)header; errno = 0; rc = saRecvRetry(sender, data+header_len, header->size - header_len); msg = (AIS_Message*)data; if (rc != SA_AIS_OK) { cl_perror("Receiving message body failed: (%d) %s", rc, ais_error2text(rc)); goto bail; } crm_debug_3("Got new%s message (size=%d, %d, %d)", msg->is_compressed?" compressed":"", ais_data_len(msg), msg->size, msg->compressed_size); data = msg->data; if(msg->is_compressed && msg->size > 0) { int rc = BZ_OK; unsigned int new_size = msg->size; if(check_message_sanity(msg, NULL) == FALSE) { goto badmsg; } crm_debug_5("Decompressing message data"); crm_malloc0(uncompressed, new_size); rc = BZ2_bzBuffToBuffDecompress( uncompressed, &new_size, data, msg->compressed_size, 1, 0); if(rc != BZ_OK) { crm_err("Decompression failed: %d", rc); crm_free(uncompressed); goto badmsg; } CRM_ASSERT(rc == BZ_OK); CRM_ASSERT(new_size == msg->size); data = uncompressed; } else if(check_message_sanity(msg, data) == FALSE) { goto badmsg; } else if(safe_str_eq("identify", data)) { int pid = getpid(); char *pid_s = crm_itoa(pid); send_ais_text(0, pid_s, TRUE, NULL, crm_msg_ais); crm_free(pid_s); goto done; } if(msg->header.id == crm_class_members) { crm_data_t *xml = string2xml(data); if(xml != NULL) { const char *seq_s = crm_element_value(xml, "id"); unsigned long seq = crm_int_helper(seq_s, NULL); crm_info("Processing membership %ld/%s", seq, seq_s); /* crm_log_xml_debug(xml, __PRETTY_FUNCTION__); */ xml_child_iter(xml, node, crm_update_ais_node(node, seq)); crm_calculate_quorum(); } else { crm_warn("Invalid peer update: %s", data); } free_xml(xml); } if(dispatch != NULL) { dispatch(msg, data, sender); } done: crm_free(uncompressed); crm_free(msg); return TRUE; badmsg: crm_err("Invalid message (id=%d, dest=%s:%s, from=%s:%s.%d):" " min=%d, total=%d, size=%d, bz2_size=%d", msg->id, ais_dest(&(msg->host)), msg_type2text(msg->host.type), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, (int)sizeof(AIS_Message), msg->header.size, msg->size, msg->compressed_size); goto done; bail: crm_err("AIS connection failed"); return FALSE; } static void ais_destroy(gpointer user_data) { crm_err("AIS connection terminated"); ais_fd_sync = -1; exit(1); } gboolean init_ais_connection( gboolean (*dispatch)(AIS_Message*,char*,int), void (*destroy)(gpointer), char **our_uuid, char **our_uname) { int rc = SA_AIS_OK; struct utsname name; if(our_uname != NULL) { if(uname(&name) < 0) { cl_perror("uname(2) call failed"); exit(100); } *our_uname = crm_strdup(name.nodename); crm_notice("Local node name: %s", *our_uname); } if(our_uuid != NULL) { *our_uuid = crm_strdup(name.nodename); } /* 16 := CRM_SERVICE */ crm_info("Creating connection to our AIS plugin"); rc = saServiceConnect (&ais_fd_sync, &ais_fd_async, 16); if (rc != SA_AIS_OK) { crm_info("Connection to our AIS plugin failed: %s (%d)", ais_error2text(rc), rc); return FALSE; } if(destroy == NULL) { crm_debug("Using the default destroy handler"); destroy = ais_destroy; } crm_info("AIS connection established"); #if 0 ais_source_sync = G_main_add_fd( G_PRIORITY_HIGH, ais_fd_sync, FALSE, ais_dispatch, dispatch, destroy); #endif { int pid = getpid(); char *pid_s = crm_itoa(pid); send_ais_text(0, pid_s, TRUE, NULL, crm_msg_ais); crm_free(pid_s); } ais_source = G_main_add_fd( G_PRIORITY_HIGH, ais_fd_async, FALSE, ais_dispatch, dispatch, destroy); return TRUE; } gboolean check_message_sanity(AIS_Message *msg, char *data) { gboolean sane = TRUE; gboolean repaired = FALSE; int dest = msg->host.type; int tmp_size = msg->header.size - sizeof(AIS_Message); if(sane && msg->header.size == 0) { crm_warn("Message with no size"); sane = FALSE; } if(sane && msg->header.error != 0) { crm_warn("Message header contains an error: %d", msg->header.error); sane = FALSE; } if(sane && ais_data_len(msg) != tmp_size) { int cur_size = ais_data_len(msg); repaired = TRUE; if(msg->is_compressed) { msg->compressed_size = tmp_size; } else { msg->size = tmp_size; } crm_warn("Repaired message payload size %d -> %d", cur_size, tmp_size); } if(sane && ais_data_len(msg) == 0) { crm_warn("Message with no payload"); sane = FALSE; } if(sane && data && msg->is_compressed == FALSE) { int str_size = strlen(data) + 1; if(ais_data_len(msg) != str_size) { int lpc = 0; crm_warn("Message payload is corrupted: expected %d bytes, got %d", ais_data_len(msg), str_size); sane = FALSE; for(lpc = (str_size - 10); lpc < msg->size; lpc++) { if(lpc < 0) { lpc = 0; } crm_debug("bad_data[%d]: %d / '%c'", lpc, data[lpc], data[lpc]); } } } if(sane == FALSE) { crm_err("Invalid message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } else if(repaired) { crm_err("Repaired message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } else { crm_debug_3("Verfied message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } return sane; } #endif diff --git a/lib/crm/pengine/unpack.c b/lib/crm/pengine/unpack.c index f3465982d5..171b27cf12 100644 --- a/lib/crm/pengine/unpack.c +++ b/lib/crm/pengine/unpack.c @@ -1,1382 +1,1380 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include /* for ONLINESTATUS */ #include #include #include #include gboolean unpack_config(crm_data_t *config, pe_working_set_t *data_set) { const char *name = NULL; const char *value = NULL; GHashTable *config_hash = g_hash_table_new_full( g_str_hash,g_str_equal, g_hash_destroy_str,g_hash_destroy_str); data_set->config_hash = config_hash; unpack_instance_attributes( config, XML_CIB_TAG_PROPSET, NULL, config_hash, CIB_OPTIONS_FIRST, data_set->now); #if CRM_DEPRECATED_SINCE_2_0_1 xml_child_iter_filter( config, a_child, XML_CIB_TAG_NVPAIR, name = crm_element_value(a_child, XML_NVPAIR_ATTR_NAME); value = crm_element_value(a_child, XML_NVPAIR_ATTR_VALUE); if(g_hash_table_lookup(config_hash, name) == NULL) { g_hash_table_insert( config_hash,crm_strdup(name),crm_strdup(value)); } crm_config_err("Creating directly" "beneath has been depreciated since" " 2.0.1", ID(a_child), name); ); #else xml_child_iter_filter( config, a_child, XML_CIB_TAG_NVPAIR, name = crm_element_value(a_child, XML_NVPAIR_ATTR_NAME); crm_config_err("Creating directly" "beneath has been depreciated since" " 2.0.1 and is now disabled", ID(a_child), name); ); #endif verify_pe_options(data_set->config_hash); value = pe_pref(data_set->config_hash, "default-action-timeout"); data_set->transition_idle_timeout = crm_strdup(value); crm_debug("Default action timeout: %s", data_set->transition_idle_timeout); value = pe_pref(data_set->config_hash, "default-resource-stickiness"); data_set->default_resource_stickiness = char2score(value); crm_debug("Default stickiness: %d", data_set->default_resource_stickiness); value = pe_pref(data_set->config_hash, "default-resource-failure-stickiness"); data_set->default_resource_fail_stickiness = char2score(value); crm_debug("Default failure stickiness: %d", data_set->default_resource_fail_stickiness); value = pe_pref(data_set->config_hash, "stonith-enabled"); cl_str_to_boolean(value, &data_set->stonith_enabled); crm_debug("STONITH of failed nodes is %s", data_set->stonith_enabled?"enabled":"disabled"); data_set->stonith_action = pe_pref(data_set->config_hash, "stonith-action"); crm_debug_2("STONITH will %s nodes", data_set->stonith_action); value = pe_pref(data_set->config_hash, "symmetric-cluster"); cl_str_to_boolean(value, &data_set->symmetric_cluster); if(data_set->symmetric_cluster) { crm_debug("Cluster is symmetric" " - resources can run anywhere by default"); } value = pe_pref(data_set->config_hash, "no-quorum-policy"); if(safe_str_eq(value, "ignore")) { data_set->no_quorum_policy = no_quorum_ignore; } else if(safe_str_eq(value, "freeze")) { data_set->no_quorum_policy = no_quorum_freeze; } else { data_set->no_quorum_policy = no_quorum_stop; } switch (data_set->no_quorum_policy) { case no_quorum_freeze: crm_debug("On loss of CCM Quorum: Freeze resources"); break; case no_quorum_stop: crm_debug("On loss of CCM Quorum: Stop ALL resources"); break; case no_quorum_ignore: crm_notice("On loss of CCM Quorum: Ignore"); break; } value = pe_pref(data_set->config_hash, "stop-orphan-resources"); cl_str_to_boolean(value, &data_set->stop_rsc_orphans); crm_debug_2("Orphan resources are %s", data_set->stop_rsc_orphans?"stopped":"ignored"); value = pe_pref(data_set->config_hash, "stop-orphan-actions"); cl_str_to_boolean(value, &data_set->stop_action_orphans); crm_debug_2("Orphan resource actions are %s", data_set->stop_action_orphans?"stopped":"ignored"); value = pe_pref(data_set->config_hash, "remove-after-stop"); cl_str_to_boolean(value, &data_set->remove_after_stop); crm_debug_2("Stopped resources are removed from the status section: %s", data_set->remove_after_stop?"true":"false"); value = pe_pref(data_set->config_hash, "is-managed-default"); cl_str_to_boolean(value, &data_set->is_managed_default); crm_debug_2("By default resources are %smanaged", data_set->is_managed_default?"":"not "); value = pe_pref(data_set->config_hash, "start-failure-is-fatal"); cl_str_to_boolean(value, &data_set->start_failure_fatal); crm_debug_2("Start failures are %s", data_set->start_failure_fatal?"always fatal":"handled by failcount"); return TRUE; } gboolean unpack_nodes(crm_data_t * xml_nodes, pe_working_set_t *data_set) { node_t *new_node = NULL; const char *id = NULL; const char *uname = NULL; const char *type = NULL; gboolean unseen_are_unclean = TRUE; const char *blind_faith = pe_pref( data_set->config_hash, "startup-fencing"); if(crm_is_true(blind_faith) == FALSE) { unseen_are_unclean = FALSE; crm_warn("Blind faith: not fencing unseen nodes"); } xml_child_iter_filter( xml_nodes, xml_obj, XML_CIB_TAG_NODE, new_node = NULL; id = crm_element_value(xml_obj, XML_ATTR_ID); uname = crm_element_value(xml_obj, XML_ATTR_UNAME); type = crm_element_value(xml_obj, XML_ATTR_TYPE); crm_debug_3("Processing node %s/%s", uname, id); if(id == NULL) { crm_config_err("Must specify id tag in "); continue; } if(type == NULL) { crm_config_err("Must specify type tag in "); continue; } if(pe_find_node(data_set->nodes, uname) != NULL) { crm_config_warn("Detected multiple node entries with uname=%s" " - this is rarely intended", uname); } crm_malloc0(new_node, sizeof(node_t)); if(new_node == NULL) { return FALSE; } new_node->weight = 0; new_node->fixed = FALSE; crm_malloc0(new_node->details, sizeof(struct node_shared_s)); if(new_node->details == NULL) { crm_free(new_node); return FALSE; } crm_debug_3("Creaing node for entry %s/%s", uname, id); new_node->details->id = id; new_node->details->uname = uname; new_node->details->type = node_ping; new_node->details->online = FALSE; new_node->details->shutdown = FALSE; new_node->details->running_rsc = NULL; new_node->details->attrs = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); /* if(data_set->have_quorum == FALSE */ /* && data_set->no_quorum_policy == no_quorum_stop) { */ /* /\* start shutting resources down *\/ */ /* new_node->weight = -INFINITY; */ /* } */ if(data_set->stonith_enabled == FALSE || unseen_are_unclean == FALSE) { /* blind faith... */ new_node->details->unclean = FALSE; } else { /* all nodes are unclean until we've seen their * status entry */ new_node->details->unclean = TRUE; } if(type == NULL || safe_str_eq(type, "member") || safe_str_eq(type, NORMALNODE)) { new_node->details->type = node_member; } add_node_attrs(xml_obj, new_node, data_set); if(crm_is_true(g_hash_table_lookup( new_node->details->attrs, "standby"))) { crm_info("Node %s is in standby-mode", new_node->details->uname); new_node->weight = -INFINITY; new_node->details->standby = TRUE; } data_set->nodes = g_list_append(data_set->nodes, new_node); crm_debug_3("Done with node %s", crm_element_value(xml_obj, XML_ATTR_UNAME)); ); return TRUE; } gboolean unpack_resources(crm_data_t * xml_resources, pe_working_set_t *data_set) { xml_child_iter( xml_resources, xml_obj, resource_t *new_rsc = NULL; crm_debug_3("Begining unpack... %s", xml_obj?crm_element_name(xml_obj):""); if(common_unpack(xml_obj, &new_rsc, NULL, data_set)) { data_set->resources = g_list_append( data_set->resources, new_rsc); print_resource(LOG_DEBUG_3, "Added", new_rsc, FALSE); } else { crm_config_err("Failed unpacking %s %s", crm_element_name(xml_obj), crm_element_value(xml_obj, XML_ATTR_ID)); if(new_rsc != NULL && new_rsc->fns != NULL) { new_rsc->fns->free(new_rsc); } } ); data_set->resources = g_list_sort( data_set->resources, sort_rsc_priority); return TRUE; } /* remove nodes that are down, stopping */ /* create +ve rsc_to_node constraints between resources and the nodes they are running on */ /* anything else? */ gboolean unpack_status(crm_data_t * status, pe_working_set_t *data_set) { const char *id = NULL; const char *uname = NULL; crm_data_t * lrm_rsc = NULL; crm_data_t * attrs = NULL; node_t *this_node = NULL; crm_debug_3("Begining unpack"); xml_child_iter_filter( status, node_state, XML_CIB_TAG_STATE, id = crm_element_value(node_state, XML_ATTR_ID); uname = crm_element_value(node_state, XML_ATTR_UNAME); attrs = find_xml_node( node_state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); lrm_rsc = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE); crm_debug_3("Processing node %s", uname); this_node = pe_find_node_id(data_set->nodes, id); if(uname == NULL) { /* error */ continue; } else if(this_node == NULL) { crm_config_warn("Node %s in status section no longer exists", uname); continue; } /* Mark the node as provisionally clean * - at least we have seen it in the current cluster's lifetime */ this_node->details->unclean = FALSE; crm_debug_3("Adding runtime node attrs"); add_node_attrs(attrs, this_node, data_set); crm_debug_3("determining node state"); determine_online_status(node_state, this_node, data_set); if(this_node->details->online || data_set->stonith_enabled) { /* offline nodes run no resources... * unless stonith is enabled in which case we need to * make sure rsc start events happen after the stonith */ crm_debug_3("Processing lrm resource entries"); unpack_lrm_resources(this_node, lrm_rsc, data_set); } ); return TRUE; } static gboolean determine_online_status_no_fencing(crm_data_t * node_state, node_t *this_node) { gboolean online = FALSE; const char *join_state = crm_element_value(node_state, XML_CIB_ATTR_JOINSTATE); const char *crm_state = crm_element_value(node_state, XML_CIB_ATTR_CRMDSTATE); const char *ccm_state = crm_element_value(node_state, XML_CIB_ATTR_INCCM); const char *ha_state = crm_element_value(node_state, XML_CIB_ATTR_HASTATE); const char *exp_state = crm_element_value(node_state, XML_CIB_ATTR_EXPSTATE); if(ha_state == NULL) { ha_state = DEADSTATUS; } if(!crm_is_true(ccm_state) || safe_str_eq(ha_state, DEADSTATUS)){ crm_debug_2("Node is down: ha_state=%s, ccm_state=%s", crm_str(ha_state), crm_str(ccm_state)); } else if(!crm_is_true(ccm_state) || safe_str_eq(ha_state, DEADSTATUS)) { } else if(safe_str_eq(crm_state, ONLINESTATUS)) { if(safe_str_eq(join_state, CRMD_JOINSTATE_MEMBER)) { online = TRUE; } else { crm_debug("Node is not ready to run resources: %s", join_state); } } else if(this_node->details->expected_up == FALSE) { crm_debug_2("CRMd is down: ha_state=%s, ccm_state=%s", crm_str(ha_state), crm_str(ccm_state)); crm_debug_2("\tcrm_state=%s, join_state=%s, expected=%s", crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } else { /* mark it unclean */ this_node->details->unclean = TRUE; crm_warn("Node %s is partially & un-expectedly down", this_node->details->uname); crm_info("\tha_state=%s, ccm_state=%s," " crm_state=%s, join_state=%s, expected=%s", crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } return online; } static gboolean determine_online_status_fencing(crm_data_t * node_state, node_t *this_node) { gboolean online = FALSE; const char *join_state = crm_element_value(node_state, XML_CIB_ATTR_JOINSTATE); const char *crm_state = crm_element_value(node_state, XML_CIB_ATTR_CRMDSTATE); const char *ccm_state = crm_element_value(node_state, XML_CIB_ATTR_INCCM); const char *ha_state = crm_element_value(node_state, XML_CIB_ATTR_HASTATE); const char *exp_state = crm_element_value(node_state, XML_CIB_ATTR_EXPSTATE); if(ha_state == NULL) { ha_state = DEADSTATUS; } if(crm_is_true(ccm_state) && safe_str_eq(ha_state, ACTIVESTATUS) && safe_str_eq(crm_state, ONLINESTATUS)) { online = TRUE; if(safe_str_neq(join_state, CRMD_JOINSTATE_MEMBER)) { crm_info("Node %s is not ready to run resources", this_node->details->uname); this_node->details->standby = TRUE; } } else if(crm_is_true(ccm_state) == FALSE && safe_str_eq(ha_state, DEADSTATUS) && safe_str_eq(crm_state, OFFLINESTATUS) && this_node->details->expected_up == FALSE) { crm_debug("Node %s is down: join_state=%s, expected=%s", this_node->details->uname, crm_str(join_state), crm_str(exp_state)); } else if(this_node->details->expected_up) { /* mark it unclean */ this_node->details->unclean = TRUE; crm_warn("Node %s (%s) is un-expectedly down", this_node->details->uname, this_node->details->id); crm_info("\tha_state=%s, ccm_state=%s," " crm_state=%s, join_state=%s, expected=%s", crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } else { crm_info("Node %s is comming up", this_node->details->uname); crm_debug("\tha_state=%s, ccm_state=%s," " crm_state=%s, join_state=%s, expected=%s", crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } return online; } gboolean determine_online_status( crm_data_t * node_state, node_t *this_node, pe_working_set_t *data_set) { int shutdown = 0; gboolean online = FALSE; const char *exp_state = crm_element_value(node_state, XML_CIB_ATTR_EXPSTATE); if(this_node == NULL) { crm_config_err("No node to check"); return online; } ha_msg_value_int(node_state, XML_CIB_ATTR_SHUTDOWN, &shutdown); this_node->details->expected_up = FALSE; if(safe_str_eq(exp_state, CRMD_JOINSTATE_MEMBER)) { this_node->details->expected_up = TRUE; } this_node->details->shutdown = FALSE; if(shutdown != 0) { this_node->details->shutdown = TRUE; this_node->details->expected_up = FALSE; } if(data_set->stonith_enabled == FALSE) { online = determine_online_status_no_fencing( node_state, this_node); } else { online = determine_online_status_fencing( node_state, this_node); } if(online) { this_node->details->online = TRUE; } else { /* remove node from contention */ this_node->fixed = TRUE; this_node->weight = -INFINITY; } if(online && this_node->details->shutdown) { /* dont run resources here */ this_node->fixed = TRUE; this_node->weight = -INFINITY; } if(this_node->details->unclean) { pe_proc_warn("Node %s is unclean", this_node->details->uname); } else if(this_node->details->online) { crm_info("Node %s is %s", this_node->details->uname, this_node->details->shutdown?"shutting down": this_node->details->standby?"standby":"online"); } else { crm_debug_2("Node %s is offline", this_node->details->uname); } return online; } #define set_char(x) last_rsc_id[lpc] = x; complete = TRUE; static char * increment_clone(char *last_rsc_id) { int lpc = 0; int len = 0; char *tmp = NULL; gboolean complete = FALSE; CRM_CHECK(last_rsc_id != NULL, return NULL); if(last_rsc_id != NULL) { len = strlen(last_rsc_id); } lpc = len-1; while(complete == FALSE && lpc > 0) { switch (last_rsc_id[lpc]) { case 0: lpc--; break; case '0': set_char('1'); break; case '1': set_char('2'); break; case '2': set_char('3'); break; case '3': set_char('4'); break; case '4': set_char('5'); break; case '5': set_char('6'); break; case '6': set_char('7'); break; case '7': set_char('8'); break; case '8': set_char('9'); break; case '9': last_rsc_id[lpc] = '0'; lpc--; break; case ':': tmp = last_rsc_id; crm_malloc0(last_rsc_id, len + 2); memcpy(last_rsc_id, tmp, len); last_rsc_id[++lpc] = '1'; last_rsc_id[len] = '0'; last_rsc_id[len+1] = 0; complete = TRUE; crm_free(tmp); break; default: crm_err("Unexpected char: %c (%d)", last_rsc_id[lpc], lpc); break; } } return last_rsc_id; } static resource_t * create_fake_resource(const char *rsc_id, crm_data_t *rsc_entry, pe_working_set_t *data_set) { resource_t *rsc = NULL; crm_data_t *xml_rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE); copy_in_properties(xml_rsc, rsc_entry); crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id); crm_log_xml_info(xml_rsc, "Orphan resource"); common_unpack(xml_rsc, &rsc, NULL, data_set); set_bit(rsc->flags, pe_rsc_orphan); data_set->resources = g_list_append(data_set->resources, rsc); return rsc; } static resource_t * unpack_find_resource( pe_working_set_t *data_set, node_t *node, const char *rsc_id, crm_data_t *rsc_entry) { resource_t *rsc = NULL; gboolean is_duped_clone = FALSE; char *alt_rsc_id = crm_strdup(rsc_id); while(rsc == NULL) { crm_debug_3("looking for: %s", alt_rsc_id); rsc = pe_find_resource(data_set->resources, alt_rsc_id); /* no match */ if(rsc == NULL) { crm_debug_2("%s not found: %d", alt_rsc_id, is_duped_clone); if(is_duped_clone) { /* create one */ rsc = create_fake_resource(alt_rsc_id, rsc_entry, data_set); crm_info("Making sure orphan %s/%s is stopped on %s", rsc_id, rsc->id, node->details->uname); resource_location(rsc, NULL, -INFINITY, "__orphan_clone_dont_run__", data_set); } break; /* not running anywhere else */ } else if(rsc->running_on == NULL) { crm_debug_3("not active yet"); break; /* always unique */ } else if(is_set(rsc->flags, pe_rsc_unique)) { crm_debug_3("unique"); break; /* running somewhere already but we dont care * find another clone instead */ } else { crm_debug_3("find another one"); rsc = NULL; is_duped_clone = TRUE; alt_rsc_id = increment_clone(alt_rsc_id); } } crm_free(alt_rsc_id); if(rsc != NULL) { crm_free(rsc->clone_name); rsc->clone_name = NULL; if(is_duped_clone) { crm_info("Internally renamed %s on %s to %s", rsc_id, node->details->uname, rsc->id); rsc->clone_name = crm_strdup(rsc_id); } } return rsc; } static resource_t * process_orphan_resource(crm_data_t *rsc_entry, node_t *node, pe_working_set_t *data_set) { resource_t *rsc = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); crm_log_xml_info(rsc_entry, "Orphan resource"); crm_config_warn("Nothing known about resource %s running on %s", rsc_id, node->details->uname); rsc = create_fake_resource(rsc_id, rsc_entry, data_set); if(data_set->stop_rsc_orphans == FALSE) { clear_bit(rsc->flags, pe_rsc_managed); } else { crm_info("Making sure orphan %s is stopped", rsc_id); print_resource(LOG_DEBUG_3, "Added orphan", rsc, FALSE); CRM_CHECK(rsc != NULL, return NULL); resource_location(rsc, NULL, -INFINITY, "__orphan_dont_run__", data_set); } return rsc; } static void process_rsc_state(resource_t *rsc, node_t *node, enum action_fail_response on_fail, crm_data_t *migrate_op, pe_working_set_t *data_set) { if(on_fail == action_migrate_failure) { node_t *from = NULL; const char *uuid = NULL; uuid = crm_element_value(migrate_op, CRMD_ACTION_MIGRATED); from = pe_find_node_id(data_set->nodes, uuid); process_rsc_state(rsc, from, action_fail_recover,NULL,data_set); on_fail = action_fail_recover; } crm_debug_2("Resource %s is %s on %s", rsc->id, role2text(rsc->role), node->details->uname); /* process current state */ if(rsc->role != RSC_ROLE_UNKNOWN) { rsc->known_on = g_list_append(rsc->known_on, node); } if(rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { if(on_fail != action_fail_ignore) { set_bit(rsc->flags, pe_rsc_failed); crm_debug_2("Force stop"); } native_add_running(rsc, node, data_set); if(is_set(rsc->flags, pe_rsc_managed) && rsc->stickiness != 0) { node_t *match = pe_find_node_id(rsc->allowed_nodes, node->details->id); if(match != NULL || data_set->symmetric_cluster) { resource_t *sticky_rsc = rsc; if(rsc->parent && rsc->parent->variant == pe_group) { sticky_rsc = rsc->parent; } resource_location(sticky_rsc, node, rsc->stickiness, "stickiness", data_set); crm_debug_2("Resource %s: preferring current location" " (node=%s, weight=%d)", sticky_rsc->id, node->details->uname, rsc->stickiness); } else { crm_debug("Ignoring stickiness for %s: the cluster is asymmetric and node %s is no longer explicitly allowed", rsc->id, node->details->uname); } } if(on_fail == action_fail_ignore) { /* nothing to do */ } else if(node->details->unclean) { stop_action(rsc, node, FALSE); } else if(on_fail == action_fail_fence) { /* treat it as if it is still running * but also mark the node as unclean */ node->details->unclean = TRUE; stop_action(rsc, node, FALSE); } else if(on_fail == action_fail_block) { /* is_managed == FALSE will prevent any * actions being sent for the resource */ clear_bit(rsc->flags, pe_rsc_managed); } else if(on_fail == action_fail_migrate) { stop_action(rsc, node, FALSE); /* make sure it comes up somewhere else * or not at all */ resource_location(rsc, node, -INFINITY, "__action_migration_auto__",data_set); } else { stop_action(rsc, node, FALSE); } } else if(rsc->clone_name) { crm_debug_2("Resetting clone_name %s for %s (stopped)", rsc->clone_name, rsc->id); crm_free(rsc->clone_name); rsc->clone_name = NULL; } else { char *key = stop_key(rsc); GListPtr possible_matches = find_actions(rsc->actions, key, node); slist_iter(stop, action_t, possible_matches, lpc, stop->optional = TRUE; ); crm_free(key); } } /* create active recurring operations as optional */ static void process_recurring(node_t *node, resource_t *rsc, int start_index, int stop_index, GListPtr sorted_op_list, pe_working_set_t *data_set) { const char *task = NULL; const char *status = NULL; crm_debug_3("%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index); slist_iter(rsc_op, crm_data_t, sorted_op_list, lpc, int interval = 0; char *key = NULL; const char *id = ID(rsc_op); const char *interval_s = NULL; if(node->details->online == FALSE) { crm_debug_4("Skipping %s/%s: node is offline", rsc->id, node->details->uname); break; } else if(start_index < stop_index) { crm_debug_4("Skipping %s/%s: not active", rsc->id, node->details->uname); break; } else if(lpc <= start_index) { crm_debug_4("Skipping %s/%s: old", id, node->details->uname); continue; } interval_s = crm_element_value(rsc_op,XML_LRM_ATTR_INTERVAL); interval = crm_parse_int(interval_s, "0"); if(interval == 0) { crm_debug_4("Skipping %s/%s: non-recurring", id, node->details->uname); continue; } status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS); if(safe_str_eq(status, "-1")) { crm_debug_4("Skipping %s/%s: status", id, node->details->uname); continue; } task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); /* create the action */ key = generate_op_key(rsc->id, task, interval); crm_debug_3("Creating %s/%s", key, node->details->uname); custom_action(rsc, key, task, node, TRUE, TRUE, data_set); ); } void calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index) { const char *task = NULL; const char *status = NULL; *stop_index = -1; *start_index = -1; slist_iter( rsc_op, crm_data_t, sorted_op_list, lpc, task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS); if(safe_str_eq(task, CRMD_ACTION_STOP) && safe_str_eq(status, "0")) { *stop_index = lpc; } else if(safe_str_eq(task, CRMD_ACTION_START)) { *start_index = lpc; } else if(*start_index <= *stop_index && safe_str_eq(task, CRMD_ACTION_STATUS)) { const char *rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC); if(safe_str_eq(rc, "0") || safe_str_eq(rc, "8")) { *start_index = lpc; } } ); } static void unpack_lrm_rsc_state( node_t *node, crm_data_t * rsc_entry, pe_working_set_t *data_set) { int stop_index = -1; int start_index = -1; int max_call_id = -1; const char *task = NULL; const char *value = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); resource_t *rsc = NULL; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; crm_data_t *migrate_op = NULL; enum action_fail_response on_fail = FALSE; enum rsc_role_e saved_role = RSC_ROLE_UNKNOWN; crm_debug_3("[%s] Processing %s on %s", crm_element_name(rsc_entry), rsc_id, node->details->uname); /* extract operations */ op_list = NULL; sorted_op_list = NULL; xml_child_iter_filter( rsc_entry, rsc_op, XML_LRM_TAG_RSC_OP, op_list = g_list_append(op_list, rsc_op); ); if(op_list == NULL) { /* if there are no operations, there is nothing to do */ return; } /* find the resource */ rsc = unpack_find_resource(data_set, node, rsc_id, rsc_entry); if(rsc == NULL) { rsc = process_orphan_resource(rsc_entry, node, data_set); } CRM_ASSERT(rsc != NULL); /* process operations */ max_call_id = -1; saved_role = rsc->role; on_fail = action_fail_ignore; rsc->role = RSC_ROLE_UNKNOWN; sorted_op_list = g_list_sort(op_list, sort_op_by_callid); slist_iter( rsc_op, crm_data_t, sorted_op_list, lpc, task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); if(safe_str_eq(task, CRMD_ACTION_MIGRATED)) { migrate_op = rsc_op; } unpack_rsc_op(rsc, node, rsc_op, &max_call_id, &on_fail, data_set); ); /* create active recurring operations as optional */ calculate_active_ops(sorted_op_list, &start_index, &stop_index); process_recurring(node, rsc, start_index, stop_index, sorted_op_list, data_set); /* no need to free the contents */ g_list_free(sorted_op_list); process_rsc_state(rsc, node, on_fail, migrate_op, data_set); value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); if(value != NULL && safe_str_neq("default", value)) { enum rsc_role_e req_role = text2role(value); if(req_role != RSC_ROLE_UNKNOWN && req_role != rsc->next_role){ if(rsc->next_role != RSC_ROLE_UNKNOWN) { crm_debug("%s: Overwriting calculated next role %s" " with requested next role %s", rsc->id, role2text(rsc->next_role), role2text(req_role)); } rsc->next_role = req_role; } } if(saved_role > rsc->role) { rsc->role = saved_role; } } gboolean unpack_lrm_resources(node_t *node, crm_data_t * lrm_rsc_list, pe_working_set_t *data_set) { CRM_CHECK(node != NULL, return FALSE); crm_debug_3("Unpacking resources on %s", node->details->uname); slist_iter( rsc, resource_t, data_set->resources, lpc, common_apply_stickiness(rsc, node, data_set); ); xml_child_iter_filter( lrm_rsc_list, rsc_entry, XML_LRM_TAG_RESOURCE, unpack_lrm_rsc_state(node, rsc_entry, data_set); ); return TRUE; } gboolean unpack_rsc_op(resource_t *rsc, node_t *node, crm_data_t *xml_op, int *max_call_id, enum action_fail_response *on_fail, pe_working_set_t *data_set) { const char *id = NULL; const char *key = NULL; const char *task = NULL; const char *task_id = NULL; const char *actual_rc = NULL; /* const char *target_rc = NULL; */ const char *task_status = NULL; const char *interval_s = NULL; const char *op_digest = NULL; const char *op_version = NULL; int interval = 0; int task_id_i = -1; int task_status_i = -2; int actual_rc_i = 0; int target_rc = -1; action_t *action = NULL; node_t *effective_node = NULL; gboolean is_probe = FALSE; CRM_CHECK(rsc != NULL, return FALSE); CRM_CHECK(node != NULL, return FALSE); CRM_CHECK(xml_op != NULL, return FALSE); id = ID(xml_op); task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); task_id = crm_element_value(xml_op, XML_LRM_ATTR_CALLID); task_status = crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS); op_digest = crm_element_value(xml_op, XML_LRM_ATTR_OP_DIGEST); op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION); key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY); CRM_CHECK(id != NULL, return FALSE); CRM_CHECK(task != NULL, return FALSE); CRM_CHECK(task_status != NULL, return FALSE); task_status_i = crm_parse_int(task_status, NULL); CRM_CHECK(task_status_i <= LRM_OP_ERROR, return FALSE); CRM_CHECK(task_status_i >= LRM_OP_PENDING, return FALSE); if(safe_str_eq(task, CRMD_ACTION_NOTIFY)) { /* safe to ignore these */ return TRUE; } crm_debug_2("Unpacking task %s/%s (call_id=%s, status=%s) on %s (role=%s)", id, task, task_id, task_status, node->details->uname, role2text(rsc->role)); interval_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL); interval = crm_parse_int(interval_s, "0"); if(interval == 0 && safe_str_eq(task, CRMD_ACTION_STATUS)) { is_probe = TRUE; } if(task_status_i != LRM_OP_PENDING) { task_id_i = crm_parse_int(task_id, "-1"); CRM_CHECK(task_id != NULL, return FALSE); CRM_CHECK(task_id_i >= 0, return FALSE); CRM_CHECK(task_id_i > *max_call_id, return FALSE); } if(*max_call_id < task_id_i) { *max_call_id = task_id_i; } if(node->details->unclean) { crm_debug_2("Node %s (where %s is running) is unclean." " Further action depends on the value of %s", node->details->uname, rsc->id, XML_RSC_ATTR_STOPFAIL); } actual_rc = crm_element_value(xml_op, XML_LRM_ATTR_RC); CRM_CHECK(actual_rc != NULL, return FALSE); actual_rc_i = crm_parse_int(actual_rc, NULL); if(key) { int dummy = 0; char *dummy_string = NULL; decode_transition_key(key, &dummy_string, &dummy, &dummy, &target_rc); crm_free(dummy_string); } if(task_status_i == LRM_OP_DONE && target_rc >= 0) { if(target_rc == actual_rc_i) { task_status_i = LRM_OP_DONE; } else { task_status_i = LRM_OP_ERROR; crm_info("Remapping %s (rc=%d) on %s to an ERROR (expected %d)", id, actual_rc_i, node->details->uname, target_rc); } } else if(task_status_i == LRM_OP_ERROR) { /* let us decide that */ crm_debug("Remapping %s (rc=%d, status=%d) on %s to DONE", id, actual_rc_i, task_status_i, node->details->uname); task_status_i = LRM_OP_DONE; } if(task_status_i == LRM_OP_NOTSUPPORTED) { actual_rc_i = EXECRA_UNIMPLEMENT_FEATURE; } /* we could clean this up significantly except for old LRMs and CRMs that * didnt include target_rc and liked to remap status */ switch(actual_rc_i) { case EXECRA_NOT_RUNNING: if(is_probe || target_rc == actual_rc_i) { task_status_i = LRM_OP_DONE; rsc->role = RSC_ROLE_STOPPED; /* clear any previous failure actions */ *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; } else if(safe_str_neq(task, CRMD_ACTION_STOP)) { task_status_i = LRM_OP_ERROR; } break; case EXECRA_RUNNING_MASTER: if(is_probe) { task_status_i = LRM_OP_DONE; crm_warn("%s found active %s in master mode on %s", id, rsc->id, node->details->uname); } else if(target_rc == actual_rc_i) { /* nothing to do */ } else if(target_rc >= 0) { task_status_i = LRM_OP_ERROR; /* legacy code for pre-0.6.5 operations */ } else if(safe_str_neq(task, CRMD_ACTION_STATUS) || rsc->role != RSC_ROLE_MASTER) { task_status_i = LRM_OP_ERROR; if(rsc->role != RSC_ROLE_MASTER) { crm_err("%s reported %s in master mode on %s", id, rsc->id, node->details->uname); } } rsc->role = RSC_ROLE_MASTER; break; case EXECRA_FAILED_MASTER: rsc->role = RSC_ROLE_MASTER; task_status_i = LRM_OP_ERROR; break; case EXECRA_UNIMPLEMENT_FEATURE: if(interval > 0) { task_status_i = LRM_OP_NOTSUPPORTED; break; } /* else: fall through */ case EXECRA_INSUFFICIENT_PRIV: case EXECRA_NOT_INSTALLED: effective_node = node; /* fall through */ case EXECRA_NOT_CONFIGURED: case EXECRA_INVALID_PARAM: crm_err("Hard error: %s failed with rc=%d.", id, actual_rc_i); if(effective_node) { crm_err(" Preventing %s from re-starting on %s", rsc->id, effective_node->details->uname); } else { crm_err(" Preventing %s from re-starting anywhere in the cluster", rsc->id); } resource_location(rsc, effective_node, -INFINITY, "hard-error", data_set); if(is_probe) { /* treat these like stops */ task = CRMD_ACTION_STOP; - - } else { - task_status_i = LRM_OP_ERROR; + task_status_i = LRM_OP_DONE; } break; case EXECRA_OK: if(is_probe && target_rc == 7) { task_status_i = LRM_OP_DONE; crm_warn("%s found active %s on %s", id, rsc->id, node->details->uname); /* legacy code for pre-0.6.5 operations */ } else if(target_rc < 0 && interval > 0 && rsc->role == RSC_ROLE_MASTER) { /* catch status ops that return 0 instead of 8 while they * are supposed to be in master mode */ task_status_i = LRM_OP_ERROR; } break; default: if(task_status_i == LRM_OP_DONE) { crm_info("Remapping %s (rc=%d) on %s to an ERROR", id, actual_rc_i, node->details->uname); task_status_i = LRM_OP_ERROR; } } if(task_status_i == LRM_OP_ERROR || task_status_i == LRM_OP_TIMEOUT || task_status_i == LRM_OP_NOTSUPPORTED) { action = custom_action(rsc, crm_strdup(id), task, NULL, TRUE, FALSE, data_set); if(action->on_fail == action_fail_ignore) { task_status_i = LRM_OP_DONE; } } switch(task_status_i) { case LRM_OP_PENDING: if(safe_str_eq(task, CRMD_ACTION_START)) { set_bit(rsc->flags, pe_rsc_start_pending); rsc->role = RSC_ROLE_STARTED; } else if(safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } break; case LRM_OP_DONE: crm_debug_3("%s/%s completed on %s", rsc->id, task, node->details->uname); if(actual_rc_i == EXECRA_NOT_RUNNING) { /* nothing to do */ } else if(safe_str_eq(task, CRMD_ACTION_STOP)) { rsc->role = RSC_ROLE_STOPPED; /* clear any previous failure actions */ *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; } else if(safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } else if(safe_str_eq(task, CRMD_ACTION_DEMOTE)) { rsc->role = RSC_ROLE_SLAVE; } else if(rsc->role < RSC_ROLE_STARTED) { crm_debug_3("%s active on %s", rsc->id, node->details->uname); rsc->role = RSC_ROLE_STARTED; } break; case LRM_OP_ERROR: case LRM_OP_TIMEOUT: case LRM_OP_NOTSUPPORTED: crm_warn("Processing failed op %s on %s: %s", id, node->details->uname, op_status2text(task_status_i)); crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname); add_node_copy(data_set->failed, xml_op); if(*on_fail < action->on_fail) { *on_fail = action->on_fail; } if(safe_str_eq(task, CRMD_ACTION_STOP)) { resource_location( rsc, node, -INFINITY, "__stop_fail__", data_set); } else if(safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } else if(safe_str_eq(task, CRMD_ACTION_DEMOTE)) { /* * staying in role=master ends up putting the PE/TE into a loop * setting role=slave is not dangerous because no master will be * promoted until the failed resource has been fully stopped */ crm_warn("Forcing %s to stop after a failed demote action", rsc->id); rsc->next_role = RSC_ROLE_STOPPED; rsc->role = RSC_ROLE_SLAVE; } else if((data_set->start_failure_fatal || compare_version("2.0", op_version) > 0) && safe_str_eq(task, CRMD_ACTION_START)) { crm_warn("Compatability handling for failed op %s on %s", id, node->details->uname); resource_location( rsc, node, -INFINITY, "__legacy_start__", data_set); } if(rsc->role < RSC_ROLE_STARTED) { rsc->role = RSC_ROLE_STARTED; } crm_debug_2("Resource %s: role=%s, unclean=%s, on_fail=%s, fail_role=%s", rsc->id, role2text(rsc->role), node->details->unclean?"true":"false", fail2text(action->on_fail), role2text(action->fail_role)); if(action->fail_role != RSC_ROLE_STARTED && rsc->next_role < action->fail_role) { rsc->next_role = action->fail_role; } if(action->fail_role == RSC_ROLE_STOPPED) { crm_err("Making sure %s doesn't come up again", rsc->id); /* make sure it doesnt come up again */ pe_free_shallow_adv(rsc->allowed_nodes, TRUE); rsc->allowed_nodes = node_list_dup( data_set->nodes, FALSE, FALSE); slist_iter( node, node_t, rsc->allowed_nodes, lpc, node->weight = -INFINITY; ); } pe_free_action(action); action = NULL; break; case LRM_OP_CANCELLED: /* do nothing?? */ pe_err("Dont know what to do for cancelled ops yet"); break; } crm_debug_3("Resource %s after %s: role=%s", rsc->id, task, role2text(rsc->role)); pe_free_action(action); return TRUE; } gboolean add_node_attrs(crm_data_t *xml_obj, node_t *node, pe_working_set_t *data_set) { g_hash_table_insert(node->details->attrs, crm_strdup("#"XML_ATTR_UNAME), crm_strdup(node->details->uname)); g_hash_table_insert(node->details->attrs, crm_strdup("#"XML_ATTR_ID), crm_strdup(node->details->id)); if(safe_str_eq(node->details->id, data_set->dc_uuid)) { data_set->dc_node = node; node->details->is_dc = TRUE; g_hash_table_insert(node->details->attrs, crm_strdup("#"XML_ATTR_DC), crm_strdup(XML_BOOLEAN_TRUE)); } else { g_hash_table_insert(node->details->attrs, crm_strdup("#"XML_ATTR_DC), crm_strdup(XML_BOOLEAN_FALSE)); } unpack_instance_attributes( xml_obj, XML_TAG_ATTR_SETS, NULL, node->details->attrs, NULL, data_set->now); return TRUE; } diff --git a/lib/crm/pengine/utils.c b/lib/crm/pengine/utils.c index 06d6be77d9..66771d1d2c 100644 --- a/lib/crm/pengine/utils.c +++ b/lib/crm/pengine/utils.c @@ -1,1249 +1,1250 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include void print_str_str(gpointer key, gpointer value, gpointer user_data); gboolean ghash_free_str_str(gpointer key, gpointer value, gpointer user_data); void unpack_operation( action_t *action, crm_data_t *xml_obj, pe_working_set_t* data_set); void pe_free_shallow(GListPtr alist) { pe_free_shallow_adv(alist, TRUE); } void pe_free_shallow_adv(GListPtr alist, gboolean with_data) { GListPtr item; GListPtr item_next = alist; if(with_data == FALSE && alist != NULL) { g_list_free(alist); return; } while(item_next != NULL) { item = item_next; item_next = item_next->next; if(with_data) { /* crm_debug_5("freeing %p", item->data); */ crm_free(item->data); } item->data = NULL; item->next = NULL; g_list_free_1(item); } } node_t * node_copy(node_t *this_node) { node_t *new_node = NULL; CRM_CHECK(this_node != NULL, return NULL); crm_malloc0(new_node, sizeof(node_t)); CRM_ASSERT(new_node != NULL); crm_debug_5("Copying %p (%s) to %p", this_node, this_node->details->uname, new_node); new_node->weight = this_node->weight; new_node->fixed = this_node->fixed; new_node->details = this_node->details; return new_node; } /* are the contents of list1 and list2 equal * nodes with weight < 0 are ignored if filter == TRUE * * slow but linear * */ gboolean node_list_eq(GListPtr list1, GListPtr list2, gboolean filter) { node_t *other_node; GListPtr lhs = list1; GListPtr rhs = list2; slist_iter( node, node_t, lhs, lpc, if(node == NULL || (filter && node->weight < 0)) { continue; } other_node = (node_t*) pe_find_node_id(rhs, node->details->id); if(other_node == NULL || other_node->weight < 0) { return FALSE; } ); lhs = list2; rhs = list1; slist_iter( node, node_t, lhs, lpc, if(node == NULL || (filter && node->weight < 0)) { continue; } other_node = (node_t*) pe_find_node_id(rhs, node->details->id); if(other_node == NULL || other_node->weight < 0) { return FALSE; } ); return TRUE; } /* any node in list1 or list2 and not in the other gets a score of -INFINITY */ GListPtr node_list_exclude(GListPtr list1, GListPtr list2) { node_t *other_node = NULL; GListPtr result = NULL; result = node_list_dup(list1, FALSE, FALSE); slist_iter( node, node_t, result, lpc, other_node = pe_find_node_id(list2, node->details->id); if(other_node == NULL) { node->weight = -INFINITY; } else { node->weight = merge_weights(node->weight, other_node->weight); } ); slist_iter( node, node_t, list2, lpc, other_node = pe_find_node_id(result, node->details->id); if(other_node == NULL) { node_t *new_node = node_copy(node); new_node->weight = -INFINITY; result = g_list_append(result, new_node); } ); return result; } /* the intersection of list1 and list2 */ GListPtr node_list_and(GListPtr list1, GListPtr list2, gboolean filter) { GListPtr result = NULL; unsigned lpc = 0; for(lpc = 0; lpc < g_list_length(list1); lpc++) { node_t *node = (node_t*)g_list_nth_data(list1, lpc); node_t *other_node = pe_find_node_id(list2, node->details->id); node_t *new_node = NULL; if(other_node != NULL) { new_node = node_copy(node); } if(new_node != NULL) { crm_debug_4("%s: %d + %d", node->details->uname, other_node->weight, new_node->weight); new_node->weight = merge_weights( new_node->weight, other_node->weight); crm_debug_3("New node weight for %s: %d", new_node->details->uname, new_node->weight); if(filter && new_node->weight < 0) { crm_free(new_node); new_node = NULL; } } if(new_node != NULL) { result = g_list_append(result, new_node); } } return result; } /* list1 - list2 */ GListPtr node_list_minus(GListPtr list1, GListPtr list2, gboolean filter) { GListPtr result = NULL; slist_iter( node, node_t, list1, lpc, node_t *other_node = pe_find_node_id(list2, node->details->id); node_t *new_node = NULL; if(node == NULL || other_node != NULL || (filter && node->weight < 0)) { continue; } new_node = node_copy(node); result = g_list_append(result, new_node); ); crm_debug_3("Minus result len: %d", g_list_length(result)); return result; } /* list1 + list2 - (intersection of list1 and list2) */ GListPtr node_list_xor(GListPtr list1, GListPtr list2, gboolean filter) { GListPtr result = NULL; slist_iter( node, node_t, list1, lpc, node_t *new_node = NULL; node_t *other_node = (node_t*) pe_find_node_id(list2, node->details->id); if(node == NULL || other_node != NULL || (filter && node->weight < 0)) { continue; } new_node = node_copy(node); result = g_list_append(result, new_node); ); slist_iter( node, node_t, list2, lpc, node_t *new_node = NULL; node_t *other_node = (node_t*) pe_find_node_id(list1, node->details->id); if(node == NULL || other_node != NULL || (filter && node->weight < 0)) { continue; } new_node = node_copy(node); result = g_list_append(result, new_node); ); crm_debug_3("Xor result len: %d", g_list_length(result)); return result; } GListPtr node_list_or(GListPtr list1, GListPtr list2, gboolean filter) { node_t *other_node = NULL; GListPtr result = NULL; gboolean needs_filter = FALSE; result = node_list_dup(list1, FALSE, filter); slist_iter( node, node_t, list2, lpc, if(node == NULL) { continue; } other_node = (node_t*)pe_find_node_id( result, node->details->id); if(other_node != NULL) { crm_debug_4("%s + %s: %d + %d", node->details->uname, other_node->details->uname, node->weight, other_node->weight); other_node->weight = merge_weights( other_node->weight, node->weight); if(filter && node->weight < 0) { needs_filter = TRUE; } } else if(filter == FALSE || node->weight >= 0) { node_t *new_node = node_copy(node); result = g_list_append(result, new_node); } ); /* not the neatest way, but the most expedient for now */ if(filter && needs_filter) { GListPtr old_result = result; result = node_list_dup(old_result, FALSE, filter); pe_free_shallow_adv(old_result, TRUE); } return result; } GListPtr node_list_dup(GListPtr list1, gboolean reset, gboolean filter) { GListPtr result = NULL; slist_iter( this_node, node_t, list1, lpc, node_t *new_node = NULL; if(filter && this_node->weight < 0) { continue; } new_node = node_copy(this_node); if(reset) { new_node->weight = 0; } if(new_node != NULL) { result = g_list_append(result, new_node); } ); return result; } void dump_node_scores(int level, resource_t *rsc, const char *comment, GListPtr nodes) { GListPtr list = nodes; if(rsc) { list = rsc->allowed_nodes; } slist_iter( node, node_t, list, lpc, if(rsc) { do_crm_log(level, "%s: %s allocation score on %s: %d", comment, rsc->id, node->details->uname, node->weight); } else { do_crm_log(level, "%s: %s = %d", comment, node->details->uname, node->weight); } ); if(rsc && rsc->children) { slist_iter( child, resource_t, rsc->children, lpc, dump_node_scores(level, child, comment, nodes); ); } } gint sort_rsc_index(gconstpointer a, gconstpointer b) { const resource_t *resource1 = (const resource_t*)a; const resource_t *resource2 = (const resource_t*)b; if(a == NULL && b == NULL) { return 0; } if(a == NULL) { return 1; } if(b == NULL) { return -1; } if(resource1->sort_index > resource2->sort_index) { return -1; } if(resource1->sort_index < resource2->sort_index) { return 1; } return 0; } gint sort_rsc_priority(gconstpointer a, gconstpointer b) { const resource_t *resource1 = (const resource_t*)a; const resource_t *resource2 = (const resource_t*)b; if(a == NULL && b == NULL) { return 0; } if(a == NULL) { return 1; } if(b == NULL) { return -1; } if(resource1->priority > resource2->priority) { return -1; } if(resource1->priority < resource2->priority) { return 1; } return 0; } action_t * custom_action(resource_t *rsc, char *key, const char *task, node_t *on_node, gboolean optional, gboolean save_action, pe_working_set_t *data_set) { action_t *action = NULL; GListPtr possible_matches = NULL; CRM_CHECK(key != NULL, return NULL); CRM_CHECK(task != NULL, return NULL); if(save_action && rsc != NULL) { possible_matches = find_actions(rsc->actions, key, on_node); } if(possible_matches != NULL) { crm_free(key); if(g_list_length(possible_matches) > 1) { pe_warn("Action %s for %s on %s exists %d times", task, rsc?rsc->id:"", on_node?on_node->details->uname:"", g_list_length(possible_matches)); } action = g_list_nth_data(possible_matches, 0); crm_debug_4("Found existing action (%d) %s for %s on %s", action->id, task, rsc?rsc->id:"", on_node?on_node->details->uname:""); g_list_free(possible_matches); } if(action == NULL) { if(save_action) { crm_debug_4("Creating%s action %d: %s for %s on %s", optional?"":" manditory", data_set->action_id, key, rsc?rsc->id:"", on_node?on_node->details->uname:""); } crm_malloc0(action, sizeof(action_t)); if(save_action) { action->id = data_set->action_id++; } else { action->id = 0; } action->rsc = rsc; CRM_ASSERT(task != NULL); action->task = crm_strdup(task); action->node = on_node; action->uuid = key; action->actions_before = NULL; action->actions_after = NULL; action->failure_is_fatal = TRUE; action->pseudo = FALSE; action->dumped = FALSE; action->runnable = TRUE; action->processed = FALSE; action->optional = optional; action->seen_count = 0; action->extra = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); action->meta = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if(save_action) { data_set->actions = g_list_append( data_set->actions, action); } if(rsc != NULL) { action->op_entry = find_rsc_op_entry(rsc, key); unpack_operation( action, action->op_entry, data_set); if(save_action) { rsc->actions = g_list_append( rsc->actions, action); } } if(save_action) { crm_debug_4("Action %d created", action->id); } } if(optional == FALSE && action->optional) { crm_debug_2("Action %d (%s) marked manditory", action->id, action->uuid); action->optional = FALSE; } if(rsc != NULL) { enum action_tasks a_task = text2task(action->task); int warn_level = LOG_DEBUG_3; if(save_action) { warn_level = LOG_WARNING; } if(action->node != NULL && action->op_entry != NULL) { unpack_instance_attributes( action->op_entry, XML_TAG_ATTR_SETS, action->node->details->attrs, action->extra, NULL, data_set->now); } if(action->pseudo) { /* leave untouched */ } else if(action->node == NULL) { action->runnable = FALSE; - } else if(is_not_set(rsc->flags, pe_rsc_managed)) { + } else if(g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL) == NULL + && is_not_set(rsc->flags, pe_rsc_managed)) { do_crm_log(warn_level, "Action %s (unmanaged)", action->uuid); action->optional = TRUE; /* action->runnable = FALSE; */ } else if(action->node->details->online == FALSE) { action->runnable = FALSE; do_crm_log(warn_level, "Action %s on %s is unrunnable (offline)", action->uuid, action->node->details->uname); if(is_set(action->rsc->flags, pe_rsc_managed) && save_action && a_task == stop_rsc) { do_crm_log(warn_level, "Marking node %s unclean", action->node->details->uname); action->node->details->unclean = TRUE; } } else if(action->needs == rsc_req_nothing) { crm_debug_3("Action %s doesnt require anything", action->uuid); action->runnable = TRUE; #if 0 /* * No point checking this * - if we dont have quorum we cant stonith anyway */ } else if(action->needs == rsc_req_stonith) { crm_debug_3("Action %s requires only stonith", action->uuid); action->runnable = TRUE; #endif } else if(data_set->have_quorum == FALSE && data_set->no_quorum_policy == no_quorum_stop) { action->runnable = FALSE; crm_debug("%s\t%s (cancelled : quorum)", action->node->details->uname, action->uuid); } else if(data_set->have_quorum == FALSE && data_set->no_quorum_policy == no_quorum_freeze) { crm_debug_3("Check resource is already active"); if(rsc->fns->active(rsc, TRUE) == FALSE) { action->runnable = FALSE; crm_debug("%s\t%s (cancelled : quorum freeze)", action->node->details->uname, action->uuid); } } else { crm_debug_3("Action %s is runnable", action->uuid); action->runnable = TRUE; } if(save_action) { switch(a_task) { case stop_rsc: set_bit(rsc->flags, pe_rsc_stopping); break; case start_rsc: clear_bit(rsc->flags, pe_rsc_starting); if(action->runnable) { set_bit(rsc->flags, pe_rsc_starting); } break; default: break; } } } return action; } void unpack_operation( action_t *action, crm_data_t *xml_obj, pe_working_set_t* data_set) { int value_i = 0; int start_delay = 0; char *value_ms = NULL; const char *class = NULL; const char *value = NULL; const char *field = NULL; CRM_CHECK(action->rsc != NULL, return); class = g_hash_table_lookup(action->rsc->meta, "class"); if(xml_obj != NULL) { value = crm_element_value(xml_obj, "prereq"); } if(value == NULL && safe_str_eq(action->task, CRMD_ACTION_START)) { value = g_hash_table_lookup(action->rsc->meta, "start_prereq"); } if(value == NULL && safe_str_neq(action->task, CRMD_ACTION_START)) { /* todo: integrate stop as an option? */ action->needs = rsc_req_nothing; value = "nothing (default)"; } else if(safe_str_eq(value, "nothing")) { action->needs = rsc_req_nothing; } else if(safe_str_eq(value, "quorum")) { action->needs = rsc_req_quorum; } else if(safe_str_eq(value, "fencing")) { action->needs = rsc_req_stonith; } else if(data_set->no_quorum_policy == no_quorum_ignore || safe_str_eq(class, "stonith")) { action->needs = rsc_req_nothing; value = "nothing (default)"; } else if(data_set->no_quorum_policy == no_quorum_freeze && data_set->stonith_enabled) { action->needs = rsc_req_stonith; value = "fencing (default)"; } else { action->needs = rsc_req_quorum; value = "quorum (default)"; } if(safe_str_eq(class, "stonith")) { if(action->needs == rsc_req_stonith) { crm_config_err("Stonith resources (eg. %s) cannot require" " fencing to start", action->rsc->id); } action->needs = rsc_req_nothing; value = "nothing (fencing override)"; } crm_debug_3("\tAction %s requires: %s", action->task, value); value = NULL; if(xml_obj != NULL) { value = crm_element_value(xml_obj, "on_fail"); } if(value == NULL && safe_str_eq(action->task, CRMD_ACTION_STOP)) { value = g_hash_table_lookup( action->rsc->meta, "on_stopfail"); if(value != NULL) { #if CRM_DEPRECATED_SINCE_2_0_2 crm_config_err("The \"on_stopfail\" attribute used in" " %s has been deprecated since 2.0.2", action->rsc->id); #else crm_config_err("The \"on_stopfail\" attribute used in" " %s has been deprecated since 2.0.2" " and is now disabled", action->rsc->id); value = NULL; #endif crm_config_err("Please use specify the \"on_fail\"" " attribute on the \"stop\" operation" " instead"); } } if(value == NULL) { } else if(safe_str_eq(value, "block")) { action->on_fail = action_fail_block; } else if(safe_str_eq(value, "fence")) { action->on_fail = action_fail_fence; value = "node fencing"; if(data_set->stonith_enabled == FALSE) { crm_config_err("Specifying on_fail=fence and" " stonith-enabled=false makes no sense"); action->on_fail = action_fail_stop; action->fail_role = RSC_ROLE_STOPPED; value = "stop resource"; } } else if(safe_str_eq(value, "ignore") || safe_str_eq(value, "nothing")) { action->on_fail = action_fail_ignore; value = "ignore"; } else if(safe_str_eq(value, "migrate")) { action->on_fail = action_fail_migrate; value = "force migration"; } else if(safe_str_eq(value, "stop")) { action->on_fail = action_fail_stop; action->fail_role = RSC_ROLE_STOPPED; value = "stop resource"; } else if(safe_str_eq(value, "restart")) { action->on_fail = action_fail_recover; value = "restart (and possibly migrate)"; } else { pe_err("Resource %s: Unknown failure type (%s)", action->rsc->id, value); value = NULL; } /* defaults */ if(value == NULL && safe_str_eq(action->task, CRMD_ACTION_STOP)) { if(data_set->stonith_enabled) { action->on_fail = action_fail_fence; value = "resource fence (default)"; } else { action->on_fail = action_fail_block; value = "resource block (default)"; } } else if(value == NULL && safe_str_eq(action->task, CRMD_ACTION_MIGRATED)) { action->on_fail = action_migrate_failure; value = "atomic migration recovery (default)"; } else if(value == NULL) { action->on_fail = action_fail_recover; value = "restart (and possibly migrate) (default)"; } crm_debug_3("\t%s failure handling: %s", action->task, value); value = NULL; if(xml_obj != NULL) { value = crm_element_value(xml_obj, "role_after_failure"); } if(value != NULL && action->fail_role == RSC_ROLE_UNKNOWN) { action->fail_role = text2role(value); } /* defaults */ if(action->fail_role == RSC_ROLE_UNKNOWN) { if(safe_str_eq(action->task, CRMD_ACTION_PROMOTE)) { action->fail_role = RSC_ROLE_SLAVE; } else { action->fail_role = RSC_ROLE_STARTED; } } crm_debug_3("\t%s failure results in: %s", action->task, role2text(action->fail_role)); if(xml_obj != NULL) { xml_prop_iter(xml_obj, p_name, p_value, if(p_value != NULL) { g_hash_table_insert(action->meta, crm_strdup(p_name), crm_strdup(p_value)); } ); unpack_instance_attributes(xml_obj, XML_TAG_META_SETS, NULL, action->meta, NULL, data_set->now); unpack_instance_attributes(xml_obj, XML_TAG_ATTR_SETS, NULL, action->meta, NULL, data_set->now); } field = XML_LRM_ATTR_INTERVAL; value = g_hash_table_lookup(action->meta, field); if(value != NULL) { value_i = crm_get_msec(value); CRM_CHECK(value_i >= 0, value_i = 0); value_ms = crm_itoa(value_i); g_hash_table_replace(action->meta, crm_strdup(field), value_ms); } field = "start_delay"; value = g_hash_table_lookup(action->meta, field); if(value != NULL) { value_i = crm_get_msec(value); if(value_i < 0) { value_i = 0; } start_delay = value_i; value_ms = crm_itoa(value_i); g_hash_table_replace(action->meta, crm_strdup(field), value_ms); } field = "timeout"; value = g_hash_table_lookup(action->meta, field); if(value == NULL) { value = pe_pref( data_set->config_hash, "default-action-timeout"); } value_i = crm_get_msec(value); if(value_i < 0) { value_i = 0; } value_i += start_delay; value_ms = crm_itoa(value_i); g_hash_table_replace(action->meta, crm_strdup(field), value_ms); } crm_data_t * find_rsc_op_entry(resource_t *rsc, const char *key) { int number = 0; const char *name = NULL; const char *value = NULL; const char *interval = NULL; char *match_key = NULL; crm_data_t *op = NULL; xml_child_iter_filter( rsc->ops_xml, operation, "op", name = crm_element_value(operation, "name"); interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); value = crm_element_value(operation, "disabled"); if(crm_is_true(value)) { crm_debug_2("%s disabled", ID(operation)); continue; } number = crm_get_msec(interval); if(number < 0) { continue; } match_key = generate_op_key(rsc->id, name, number); if(safe_str_eq(key, match_key)) { op = operation; } crm_free(match_key); if(op != NULL) { return op; } ); crm_debug_3("No match for %s", key); return op; } void print_node(const char *pre_text, node_t *node, gboolean details) { if(node == NULL) { crm_debug_4("%s%s: ", pre_text==NULL?"":pre_text, pre_text==NULL?"":": "); return; } crm_debug_4("%s%s%sNode %s: (weight=%d, fixed=%s)", pre_text==NULL?"":pre_text, pre_text==NULL?"":": ", node->details==NULL?"error ":node->details->online?"":"Unavailable/Unclean ", node->details->uname, node->weight, node->fixed?"True":"False"); if(details && node != NULL && node->details != NULL) { char *pe_mutable = crm_strdup("\t\t"); crm_debug_4("\t\t===Node Attributes"); g_hash_table_foreach(node->details->attrs, print_str_str, pe_mutable); crm_free(pe_mutable); crm_debug_4("\t\t=== Resources"); slist_iter( rsc, resource_t, node->details->running_rsc, lpc, print_resource(LOG_DEBUG_4, "\t\t", rsc, FALSE); ); } } /* * Used by the HashTable for-loop */ void print_str_str(gpointer key, gpointer value, gpointer user_data) { crm_debug_4("%s%s %s ==> %s", user_data==NULL?"":(char*)user_data, user_data==NULL?"":": ", (char*)key, (char*)value); } void print_resource( int log_level, const char *pre_text, resource_t *rsc, gboolean details) { long options = pe_print_log; if(rsc == NULL) { do_crm_log(log_level-1, "%s%s: ", pre_text==NULL?"":pre_text, pre_text==NULL?"":": "); return; } if(details) { options |= pe_print_details; } rsc->fns->print(rsc, pre_text, options, &log_level); } void pe_free_action(action_t *action) { if(action == NULL) { return; } pe_free_shallow(action->actions_before);/* action_warpper_t* */ pe_free_shallow(action->actions_after); /* action_warpper_t* */ g_hash_table_destroy(action->extra); g_hash_table_destroy(action->meta); crm_free(action->task); crm_free(action->uuid); crm_free(action); } GListPtr find_recurring_actions(GListPtr input, node_t *not_on_node) { const char *value = NULL; GListPtr result = NULL; CRM_CHECK(input != NULL, return NULL); slist_iter( action, action_t, input, lpc, value = g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL); if(value == NULL) { /* skip */ } else if(safe_str_eq(value, "0")) { /* skip */ } else if(safe_str_eq(CRMD_ACTION_CANCEL, action->task)) { /* skip */ } else if(not_on_node == NULL) { crm_debug_5("(null) Found: %s", action->uuid); result = g_list_append(result, action); } else if(action->node == NULL) { /* skip */ } else if(action->node->details != not_on_node->details) { crm_debug_5("Found: %s", action->uuid); result = g_list_append(result, action); } ); return result; } GListPtr find_actions(GListPtr input, const char *key, node_t *on_node) { GListPtr result = NULL; CRM_CHECK(key != NULL, return NULL); slist_iter( action, action_t, input, lpc, crm_debug_5("Matching %s against %s", key, action->uuid); if(safe_str_neq(key, action->uuid)) { continue; } else if(on_node == NULL) { result = g_list_append(result, action); } else if(action->node == NULL) { /* skip */ crm_debug_2("While looking for %s action on %s, " "found an unallocated one. Assigning" " it to the requested node...", key, on_node->details->uname); action->node = on_node; result = g_list_append(result, action); } else if(safe_str_eq(on_node->details->id, action->node->details->id)) { result = g_list_append(result, action); } ); return result; } GListPtr find_actions_exact(GListPtr input, const char *key, node_t *on_node) { GListPtr result = NULL; CRM_CHECK(key != NULL, return NULL); slist_iter( action, action_t, input, lpc, crm_debug_5("Matching %s against %s", key, action->uuid); if(safe_str_neq(key, action->uuid)) { crm_debug_3("Key mismatch: %s vs. %s", key, action->uuid); continue; } else if(on_node == NULL || action->node == NULL) { crm_debug_3("on_node=%p, action->node=%p", on_node, action->node); continue; } else if(safe_str_eq(on_node->details->id, action->node->details->id)) { result = g_list_append(result, action); } crm_debug_2("Node mismatch: %s vs. %s", on_node->details->id, action->node->details->id); ); return result; } void set_id(crm_data_t * xml_obj, const char *prefix, int child) { int id_len = 0; gboolean use_prefix = TRUE; gboolean use_child = TRUE; char *new_id = NULL; const char *id = crm_element_value(xml_obj, XML_ATTR_ID); id_len = 1 + strlen(id); if(child > 999) { pe_err("Are you insane?!?" " The CRM does not support > 1000 children per resource"); return; } else if(child < 0) { use_child = FALSE; } else { id_len += 4; /* child */ } if(prefix == NULL || safe_str_eq(id, prefix)) { use_prefix = FALSE; } else { id_len += (1 + strlen(prefix)); } crm_malloc0(new_id, id_len); if(use_child) { snprintf(new_id, id_len, "%s%s%s:%d", use_prefix?prefix:"", use_prefix?":":"", id, child); } else { snprintf(new_id, id_len, "%s%s%s", use_prefix?prefix:"", use_prefix?":":"", id); } crm_xml_add(xml_obj, XML_ATTR_ID, new_id); crm_free(new_id); } static void resource_node_score(resource_t *rsc, node_t *node, int score, const char *tag) { node_t *match = NULL; if(rsc->children) { slist_iter( child_rsc, resource_t, rsc->children, lpc, resource_node_score(child_rsc, node, score, tag); ); } crm_debug_2("Setting %s for %s on %s: %d", tag, rsc->id, node->details->uname, score); match = pe_find_node_id(rsc->allowed_nodes, node->details->id); if(match == NULL) { match = node_copy(node); match->weight = 0; rsc->allowed_nodes = g_list_append(rsc->allowed_nodes, match); } match->weight = merge_weights(match->weight, score); } void resource_location(resource_t *rsc, node_t *node, int score, const char *tag, pe_working_set_t *data_set) { if(node != NULL) { resource_node_score(rsc, node, score, tag); } else if(data_set != NULL) { slist_iter( node, node_t, data_set->nodes, lpc, resource_node_score(rsc, node, score, tag); ); } else { slist_iter( node, node_t, rsc->allowed_nodes, lpc, resource_node_score(rsc, node, score, tag); ); } if(node == NULL && score == -INFINITY) { if(rsc->allocated_to) { crm_info("Deallocating %s from %s", rsc->id, rsc->allocated_to->details->uname); crm_free(rsc->allocated_to); rsc->allocated_to = NULL; } } } #define sort_return(an_int) crm_free(a_uuid); crm_free(b_uuid); return an_int gint sort_op_by_callid(gconstpointer a, gconstpointer b) { char *a_uuid = NULL; char *b_uuid = NULL; const char *a_task_id = cl_get_string(a, XML_LRM_ATTR_CALLID); const char *b_task_id = cl_get_string(b, XML_LRM_ATTR_CALLID); const char *a_key = cl_get_string(a, XML_ATTR_TRANSITION_MAGIC); const char *b_key = cl_get_string(b, XML_ATTR_TRANSITION_MAGIC); const char *a_xml_id = ID(a); const char *b_xml_id = ID(b); int dummy = -1; int a_id = -1; int b_id = -1; int a_rc = -1; int b_rc = -1; int a_status = -1; int b_status = -1; int a_call_id = -1; int b_call_id = -1; if(safe_str_eq(a_xml_id, b_xml_id)) { /* We have duplicate lrm_rsc_op entries in the status * section which is unliklely to be a good thing * - we can handle it easily enough, but we need to get * to the bottom of why its happening. */ pe_err("Duplicate lrm_rsc_op entries named %s", a_xml_id); sort_return(0); } CRM_CHECK(a_task_id != NULL && b_task_id != NULL, sort_return(0)); a_call_id = crm_parse_int(a_task_id, NULL); b_call_id = crm_parse_int(b_task_id, NULL); if(a_call_id == -1 && b_call_id == -1) { /* both are pending ops so it doesnt matter since * stops are never pending */ sort_return(0); } else if(a_call_id >= 0 && a_call_id < b_call_id) { crm_debug_4("%s (%d) < %s (%d) : call id", ID(a), a_call_id, ID(b), b_call_id); sort_return(-1); } else if(b_call_id >= 0 && a_call_id > b_call_id) { crm_debug_4("%s (%d) > %s (%d) : call id", ID(a), a_call_id, ID(b), b_call_id); sort_return(1); } crm_debug_5("%s (%d) == %s (%d) : continuing", ID(a), a_call_id, ID(b), b_call_id); /* now process pending ops */ CRM_CHECK(a_key != NULL && b_key != NULL, sort_return(0)); CRM_CHECK(decode_transition_magic( a_key, &a_uuid, &a_id, &dummy, &a_status, &a_rc, &dummy), sort_return(0)); CRM_CHECK(decode_transition_magic( b_key, &b_uuid, &b_id, &dummy, &b_status, &b_rc, &dummy), sort_return(0)); /* try and determin the relative age of the operation... * some pending operations (ie. a start) may have been supuerceeded * by a subsequent stop * * [a|b]_id == -1 means its a shutdown operation and _always_ comes last */ if(safe_str_neq(a_uuid, b_uuid) || a_id == b_id) { /* * some of the logic in here may be redundant... * * if the UUID from the TE doesnt match then one better * be a pending operation. * pending operations dont survive between elections and joins * because we query the LRM directly */ CRM_CHECK(a_call_id == -1 || b_call_id == -1, sort_return(0)); CRM_CHECK(a_call_id >= 0 || b_call_id >= 0, sort_return(0)); if(b_call_id == -1) { crm_debug_2("%s (%d) < %s (%d) : transition + call id", ID(a), a_call_id, ID(b), b_call_id); sort_return(-1); } if(a_call_id == -1) { crm_debug_2("%s (%d) > %s (%d) : transition + call id", ID(a), a_call_id, ID(b), b_call_id); sort_return(1); } } else if((a_id >= 0 && a_id < b_id) || b_id == -1) { crm_debug_3("%s (%d) < %s (%d) : transition", ID(a), a_id, ID(b), b_id); sort_return(-1); } else if((b_id >= 0 && a_id > b_id) || a_id == -1) { crm_debug_3("%s (%d) > %s (%d) : transition", ID(a), a_id, ID(b), b_id); sort_return(1); } /* we should never end up here */ crm_err("%s (%d:%d:%s) ?? %s (%d:%d:%s) : default", ID(a), a_call_id, a_id, a_uuid, ID(b), b_call_id, b_id, b_uuid); CRM_CHECK(FALSE, sort_return(0)); } diff --git a/include/crm/common/Makefile.am b/lib/fencing/Makefile.am similarity index 52% copy from include/crm/common/Makefile.am copy to lib/fencing/Makefile.am index c07bc616d5..80242d2e69 100644 --- a/include/crm/common/Makefile.am +++ b/lib/fencing/Makefile.am @@ -1,23 +1,36 @@ -# -# Copyright (C) 2004 Andrew Beekhof +# File: Makefile.am +# Author: Sun Jiang Dong +# Copyright (c) 2004 International Business Machines # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # - MAINTAINERCLEANFILES = Makefile.in -headerdir=$(includedir)/@PKG_NAME@/crm/common +INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ + -I$(top_builddir)/libltdl -I$(top_srcdir)/libltdl \ + -I$(top_builddir)/linux-ha -I$(top_srcdir)/linux-ha \ + -I$(top_builddir) -I$(top_srcdir) + +halibdir = $(libdir)/@HB_PKG@ +havarlibdir = $(localstatedir)/lib/@HB_PKG@ + +COMMONLIBS = -lplumb -lhbclient \ + $(GLIBLIB) + +lib_LTLIBRARIES = libstonithd.la +libstonithd_la_SOURCES = stonithd_lib.c stonithd_msg.c +libstonithd_la_LDFLAGS = $(COMMONLIBS) -header_HEADERS = xml.h ipc.h msg.h ctrl.h util.h iso8601.h +AM_CFLAGS = $(INCLUDES) diff --git a/lib/Makefile.am b/lib/plugins/Makefile.am similarity index 98% copy from lib/Makefile.am copy to lib/plugins/Makefile.am index 386f183668..ae427607d9 100644 --- a/lib/Makefile.am +++ b/lib/plugins/Makefile.am @@ -1,34 +1,34 @@ # # heartbeat library directory: Linux-HA code # # Copyright (C) 2001 Alan Robertson # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in # # PILS is supposed to be an independent library # Stonith should rely only on PILS # # The rest can use clplumbing, and the plugins should be able # to use any of the libraries (nothing can be linked against them) # # ## Subdirectories... -SUBDIRS = crm +SUBDIRS = lrm diff --git a/lib/plugins/lrm/Makefile.am b/lib/plugins/lrm/Makefile.am new file mode 100644 index 0000000000..cfc8ae9ed3 --- /dev/null +++ b/lib/plugins/lrm/Makefile.am @@ -0,0 +1,42 @@ +# +# Author: Sun Jiang Dong +# Copyright (c) 2004 International Business Machines +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +MAINTAINERCLEANFILES = Makefile.in +LRM_DIR = lrm +INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ + -I$(top_builddir)/linux-ha -I$(top_srcdir)/linux-ha \ + -I$(top_builddir)/libltdl -I$(top_srcdir)/libltdl + +halibdir = $(libdir)/@HB_PKG@ +havarlibdir = $(localstatedir)/lib/@HB_PKG@ +COMMONLIBS = -lplumb \ + $(GLIBLIB) + +lrmdir = $(HA_VARLIBDIR)/$(HB_PKG)/$(LRM_DIR) +plugindir = $(halibdir)/plugins/RAExec + +plugin_LTLIBRARIES = stonith.la + +stonith_la_SOURCES = raexecstonith.c +stonith_la_LDFLAGS = -lpils -export-dynamic -module -avoid-version \ + -L$(top_builddir)/lib/fencing -lstonithd -lstonith -llrm + +install-exec-local: + $(mkinstalldirs) $(DESTDIR)$(lrmdir) + -chgrp $(HA_APIGROUP) $(DESTDIR)/$(lrmdir) + chmod 770 $(DESTDIR)/$(lrmdir) diff --git a/lib/plugins/lrm/raexecstonith.c b/lib/plugins/lrm/raexecstonith.c new file mode 100644 index 0000000000..ef4f88dfeb --- /dev/null +++ b/lib/plugins/lrm/raexecstonith.c @@ -0,0 +1,390 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * File: raexecocf.c + * Author: Sun Jiang Dong + * Copyright (c) 2004 International Business Machines + * + * This code implements the Resource Agent Plugin Module for LSB style. + * It's a part of Local Resource Manager. Currently it's used by lrmd only. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* Add it for compiling on OSX */ +#include + +#include +#include +#include + +# define PIL_PLUGINTYPE RA_EXEC_TYPE +# define PIL_PLUGINTYPE_S "RAExec" +# define PIL_PLUGINLICENSE LICENSE_PUBDOM +# define PIL_PLUGINLICENSEURL URL_PUBDOM + +# define PIL_PLUGIN stonith +# define PIL_PLUGIN_S "stonith" + +static PIL_rc close_stonithRA(PILInterface*, void* ud_interface); + +/* static const char * RA_PATH = STONITH_RA_DIR; */ +/* Temporarily use it */ +static const char * RA_PATH = HA_LIBHBDIR "/stonith/plugins/stonith/"; + +/* The begin of exported function list */ +static int execra(const char * rsc_id, + const char * rsc_type, + const char * provider, + const char * op_type, + const int timeout, + GHashTable * params); +static uniform_ret_execra_t map_ra_retvalue(int ret_execra + , const char * op_type, const char * std_output); +static int get_resource_list(GList ** rsc_info); +static char* get_resource_meta(const char* rsc_type, const char* provider); +static int get_provider_list(const char* op_type, GList ** providers); + +/* The end of exported function list */ + +/* The begin of internal used function & data list */ +static int get_providers(const char* class_path, const char* op_type, + GList ** providers); +static void stonithRA_ops_callback(stonithRA_ops_t * op, void * private_data); +static int exit_value; +/* The end of internal function & data list */ + +/* Rource agent execution plugin operations */ +static struct RAExecOps raops = +{ execra, + map_ra_retvalue, + get_resource_list, + get_provider_list, + get_resource_meta +}; + +static const char META_TEMPLATE[] = +"\n" +"\n" +"\n" +"1.0\n" +"\n" +"%s\n" +"\n" +"%s\n" +"%s\n" +"\n" +"\n" +"\n" +"\n" +"\n" +"\n" +"\n" +"\n" +"2.0\n" +"\n" +"\n"; + +static const char * no_parameter_info = ""; + +#define CHECKMETANULL(ret, which) \ + if (ret == NULL) { \ + cl_log(LOG_WARNING, "stonithRA plugin: cannot get %s " \ + "segment of %s's metadata.", which, rsc_type); \ + ret = no_parameter_info; \ + } +#define xmlize(p) \ + ( p ? (char *)xmlEncodeEntitiesReentrant(NULL, \ + (const unsigned char *)p) \ + : NULL ) +#define zapxml(p) do { \ + if( p ) { \ + xmlFree(p); \ + } \ +} while(0) + +PIL_PLUGIN_BOILERPLATE2("1.0", Debug); + +static const PILPluginImports* PluginImports; +static PILPlugin* OurPlugin; +static PILInterface* OurInterface; +static void* OurImports; +static void* interfprivate; + +/* + * Our plugin initialization and registration function + * It gets called when the plugin gets loaded. + */ +PIL_rc +PIL_PLUGIN_INIT(PILPlugin * us, const PILPluginImports* imports); + +PIL_rc +PIL_PLUGIN_INIT(PILPlugin * us, const PILPluginImports* imports) +{ + /* Force the compiler to do a little type checking */ + (void)(PILPluginInitFun)PIL_PLUGIN_INIT; + + PluginImports = imports; + OurPlugin = us; + + /* Register ourself as a plugin */ + imports->register_plugin(us, &OurPIExports); + + /* Register our interfaces */ + return imports->register_interface(us, PIL_PLUGINTYPE_S, PIL_PLUGIN_S, + &raops, close_stonithRA, &OurInterface, &OurImports, + interfprivate); +} + +static PIL_rc +close_stonithRA(PILInterface* pif, void* ud_interface) +{ + return PIL_OK; +} + +/* + * Most of the oprations will be sent to sotnithd directly, such as 'start', + * 'stop', 'monitor'. And others like 'meta-data' will be handled by itself + * locally. + * Some of important parameters' name: + * config_file + * config_string + */ +static int +execra(const char * rsc_id, const char * rsc_type, const char * provider, + const char * op_type,const int timeout, GHashTable * params) +{ + stonithRA_ops_t * op; + int call_id = -1; + char buffer_tmp[32]; + + /* Handling "meta-data" operation in a special way. + * Now handle "meta-data" operation locally. + * Should be changed in the future? + */ + if ( 0 == STRNCMP_CONST(op_type, "meta-data")) { + char * tmp; + tmp = get_resource_meta(rsc_type, provider); + printf("%s", tmp); + g_free(tmp); + exit(0); + } + + g_snprintf(buffer_tmp, sizeof(buffer_tmp), "%s_%d" + , "STONITH_RA_EXEC", getpid()); + if (ST_OK != stonithd_signon(buffer_tmp)) { + cl_log(LOG_ERR, "%s:%d: Cannot sign on the stonithd." + , __FUNCTION__, __LINE__); + exit(EXECRA_UNKNOWN_ERROR); + } + + stonithd_set_stonithRA_ops_callback(stonithRA_ops_callback, &call_id); + + /* Temporarily donnot use it, but how to deal with the global OCF + * variables. This is a important thing to think about and do. + */ + /* send the RA operation to stonithd to simulate a RA's actions */ + if ( 0==STRNCMP_CONST(op_type, "start") + || 0==STRNCMP_CONST(op_type, "stop") ) { + cl_log(LOG_INFO + , "Try to %s STONITH resource : Device=%s" + , op_type, rsc_id, rsc_type); + } + + op = g_new(stonithRA_ops_t, 1); + op->ra_name = g_strdup(rsc_type); + op->op_type = g_strdup(op_type); + op->params = params; + op->rsc_id = g_strdup(rsc_id); + if (ST_OK != stonithd_virtual_stonithRA_ops(op, &call_id)) { + cl_log(LOG_ERR, "sending stonithRA op to stonithd failed."); + /* Need to improve the granularity for error return code */ + stonithd_signoff(); + exit(EXECRA_EXEC_UNKNOWN_ERROR); + } + + /* May be redundant */ + /* + while (stonithd_op_result_ready() != TRUE) { + ; + } + */ + /* cl_log(LOG_DEBUG, "Will call stonithd_receive_ops_result."); */ + if (ST_OK != stonithd_receive_ops_result(TRUE)) { + cl_log(LOG_ERR, "stonithd_receive_ops_result failed."); + /* Need to improve the granularity for error return code */ + stonithd_signoff(); + exit(EXECRA_EXEC_UNKNOWN_ERROR); + } + + /* exit_value will be setted by the callback function */ + g_free(op->ra_name); + g_free(op->op_type); + g_free(op->rsc_id); + g_free(op); + + stonithd_signoff(); + /* cl_log(LOG_DEBUG, "stonithRA orignal exit code=%d", exit_value); */ + exit(map_ra_retvalue(exit_value, op_type, NULL)); +} + +static void +stonithRA_ops_callback(stonithRA_ops_t * op, void * private_data) +{ + /* cl_log(LOG_DEBUG, "setting exit code=%d", exit_value); */ + exit_value = op->op_result; +} + +static uniform_ret_execra_t +map_ra_retvalue(int ret_execra, const char * op_type, const char * std_output) +{ + /* Because the UNIFORM_RET_EXECRA is compatible with OCF standard, no + * actual mapping except validating, which ensure the return code + * will be in the range 0 to 7. Too strict? + */ + if (ret_execra < 0 || + ret_execra > EXECRA_STATUS_UNKNOWN) { + cl_log(LOG_WARNING, "mapped the invalid return code %d." + , ret_execra); + ret_execra = EXECRA_UNKNOWN_ERROR; + } + return ret_execra; +} + +static int +get_resource_list(GList ** rsc_info) +{ + int rc; + int needprivs = !cl_have_full_privs(); + + if ( rsc_info == NULL ) { + cl_log(LOG_ERR, "Parameter error: get_resource_list"); + return -2; + } + + if ( *rsc_info != NULL ) { + cl_log(LOG_ERR, "Parameter error: get_resource_list."\ + "will cause memory leak."); + *rsc_info = NULL; + } + + if (needprivs) { + return_to_orig_privs(); + } + if (ST_OK != stonithd_signon("STONITH_RA")) { + cl_log(LOG_ERR, "%s:%d: Can not signon to the stonithd." + , __FUNCTION__, __LINE__); + rc = -1; + } else { + rc = stonithd_list_stonith_types(rsc_info); + stonithd_signoff(); + } + + if (needprivs) { + return_to_dropped_privs(); + } + return rc; +} + +static int +get_provider_list(const char* op_type, GList ** providers) +{ + int ret; + ret = get_providers(RA_PATH, op_type, providers); + if (0>ret) { + cl_log(LOG_ERR, "scandir failed in stonith RA plugin"); + } + return ret; +} + +static char * +get_resource_meta(const char* rsc_type, const char* provider) +{ + char * buffer; + int bufferlen = 0; + const char * meta_param = NULL; + const char * meta_longdesc = NULL; + const char * meta_shortdesc = NULL; + char *xml_meta_longdesc = NULL; + char *xml_meta_shortdesc = NULL; + Stonith * stonith_obj = NULL; + + if ( provider != NULL ) { + cl_log(LOG_DEBUG, "stonithRA plugin: provider attribute " + "is not needed and will be ignored."); + } + + stonith_obj = stonith_new(rsc_type); + meta_longdesc = stonith_get_info(stonith_obj, ST_DEVICEDESCR); + CHECKMETANULL(meta_longdesc, "longdesc") + xml_meta_longdesc = xmlize(meta_longdesc); + meta_shortdesc = stonith_get_info(stonith_obj, ST_DEVICENAME); + CHECKMETANULL(meta_shortdesc, "shortdesc") + xml_meta_shortdesc = xmlize(meta_shortdesc); + meta_param = stonith_get_info(stonith_obj, ST_CONF_XML); + CHECKMETANULL(meta_param, "parameters") + + + bufferlen = STRLEN_CONST(META_TEMPLATE) + strlen(rsc_type) + + strlen(xml_meta_longdesc) + strlen(xml_meta_shortdesc) + + strlen(meta_param) + 1; + buffer = g_new(char, bufferlen); + buffer[bufferlen-1] = '\0'; + snprintf(buffer, bufferlen-1, META_TEMPLATE, rsc_type + , xml_meta_longdesc, xml_meta_shortdesc, meta_param); + stonith_delete(stonith_obj); + zapxml(xml_meta_longdesc); + zapxml(xml_meta_shortdesc); + + return buffer; +} + +/* + * Currently should return *providers = NULL, but remain the old code for + * possible unsing in the future + */ +static int +get_providers(const char* class_path, const char* op_type, GList ** providers) +{ + if ( providers == NULL ) { + cl_log(LOG_ERR, "%s:%d: Parameter error: providers==NULL" + , __FUNCTION__, __LINE__); + return -2; + } + + if ( *providers != NULL ) { + cl_log(LOG_ERR, "%s:%d: Parameter error: *providers==NULL." + "This will cause memory leak." + , __FUNCTION__, __LINE__); + } + + /* Now temporarily make it fixed */ + *providers = g_list_append(*providers, g_strdup("heartbeat")); + + return g_list_length(*providers); +} diff --git a/pacemaker.spec b/pacemaker.spec index f34ac7d4bc..b433a35633 100644 --- a/pacemaker.spec +++ b/pacemaker.spec @@ -1,292 +1,294 @@ # # spec file for package Pacemaker (Version 0.7.0) # # Copyright (c) 2006 SUSE LINUX Products GmbH, Nuernberg, Germany. # This file and all modifications and additions to the pristine # package are under the same license as the package itself. # # Please submit bugfixes or comments via http://bugs.opensuse.org/ # # norootforbuild %define with_extra_warnings 0 %define with_debugging 0 %define without_fatal_warnings 1 %define with_ais_support 1 %define with_heartbeat_support 1 %define with_snmp_support 1 %define pkg_group Productivity/Clustering/HA %if 0%{?fedora_version} %define pkg_group System Environment/Daemons %endif %define gname haclient %define uname hacluster Name: pacemaker Summary: The Pacemaker scalable High-Availability cluster resource manager Version: 0.6.2 Release: 1 License: GPL2/LGPL2 URL: http://www.clusterlabs.org Group: %{pkg_group} Source: pacemaker.tar.gz BuildRoot: %{_tmppath}/%{name}-%{version}-build Autoreqprov: on %if %with_ais_support BuildRequires: openais-devel %endif %if %with_heartbeat_support BuildRequires: heartbeat heartbeat-devel > 2.1.2 %endif %if %{with_ais_support} %if %{with_heartbeat_support} Conflicts: pacemaker-ais Conflicts: pacemaker-heartbeat %else Conflicts: pacemaker Conflicts: pacemaker-heartbeat %endif %else Conflicts: pacemaker Conflicts: pacemaker-ais %endif BuildRequires: heartbeat-common heartbeat-common-devel e2fsprogs-devel glib2-devel gnutls-devel libxml2-devel pam-devel python-devel swig %if 0%{?suse_version} %if 0%{?suse_version} > 1000 %if %with_ais_support Supplements: openais %endif %if %with_heartbeat_support Supplements: heartbeat %endif %endif %if 0%{?suse_version} == 930 BuildRequires: rpm-devel %endif %if 0%{?suse_version} == 1000 BuildRequires: lzo lzo-devel %endif %if 0%{?suse_version} < 1020 BuildRequires: tcpd-devel %endif %if 0%{?sles_version} == 9 BuildRequires: pkgconfig %endif %endif %if 0%{?fedora_version} || 0%{?centos_version} || 0%{?rhel_version} BuildRequires: which %endif %if 0%{?fedora_version} == 8 BuildRequires: openssl-devel %endif %if 0%{?mandriva_version} BuildRequires: libbzip2-devel %endif %description Pacemaker is an advanced, scalable High-Availability cluster resource manager for Linux-HA (Heartbeat) and/or OpenAIS. It supports "n-node" clusters with significant capabilities for managing resources and dependencies. It will run scripts at initialization, when machines go up or down, when related resources fail and can be configured to periodically check resource health. %if 0%{?suse_version} %debug_package %endif %package devel Summary: Pacemaker development package Group: %{pkg_group} Requires: %{name} = %{version}-%{release} %description devel Header files and shared libraries needed for developing programs based on the Pacemaker High-Availability cluster resource manager. %prep ########################################################### %setup -n pacemaker ########################################################### %build # TODO: revisit -all CFLAGS="${CFLAGS} ${RPM_OPT_FLAGS}" # Feature-dependent CFLAGS: %if %with_extra_warnings # CFLAGS="${CFLAGS} -Wshadow -Wfloat-equal -Waggregate-return -Wnested-externs -Wunreachable-code -Wendif-labels -Winline" CFLAGS="${CFLAGS} -Wfloat-equal -Wendif-labels -Winline" %endif %if %with_debugging CFLAGS="${CFLAGS} -O0" %endif # Distribution specific settings: %if 0%{?suse_version} > 1001 CFLAGS="${CFLAGS} -fstack-protector-all" %endif %if 0%{?suse_version} < 1001 export PKG_CONFIG_PATH="$PKG_CONFIG_PATH:/opt/gnome/%{_lib}/pkgconfig:/opt/gnome/share/pkgconfig" %endif %if 0%{?suse_version} > 1020 CFLAGS="$CFLAGS -fgnu89-inline" %endif %if 0%{?fedora_version} > 6 CFLAGS="$CFLAGS -fgnu89-inline" %endif export CFLAGS ./ConfigureMe configure --prefix=%{_prefix} --sysconfdir=%{_sysconfdir} \ --localstatedir=%{_var} --infodir=%{_infodir} \ --mandir=%{_mandir} --libdir=%{_libdir} \ --libexecdir=%{_libexecdir} \ --with-group-name=%{gname} --with-ccmuser-name=%{uname} \ --with-hapkgversion=%{version} \ --enable-glib-malloc \ %if %with_snmp_support == 1 --enable-snmp-subagent \ %else --disable-snmp-subagent \ %endif --with-ais-prefix=%{_prefix} \ %if %with_ais_support == 0 --without-ais-support \ %endif %if %with_heartbeat_support == 0 --without-heartbeat-support \ %endif %if %without_fatal_warnings --enable-fatal-warnings=no \ %endif --enable-pretty export MAKE="make %{?jobs:-j%jobs}" make %{?jobs:-j%jobs} ########################################################### %install ########################################################### #make DESTDIR=$RPM_BUILD_ROOT install-strip rm -rf $RPM_BUILD_ROOT mkdir -p $RPM_BUILD_ROOT make DESTDIR=$RPM_BUILD_ROOT install #%if %with_ais_support # mkdir -p $RPM_BUILD_ROOT/%{_libexecdir}/lcrso # cp $RPM_BUILD_ROOT/%{_libdir}/service_crm.so $RPM_BUILD_ROOT/%{_libexecdir}/lcrso/pacemaker.lcrso #%endif # Cleanup [ -d $RPM_BUILD_ROOT/usr/man ] && rm -rf $RPM_BUILD_ROOT/usr/man [ -d $RPM_BUILD_ROOT/usr/share/libtool ] && rm -rf $RPM_BUILD_ROOT/usr/share/libtool find $RPM_BUILD_ROOT -name '*.a' -type f -print0 | xargs -0 rm -f find $RPM_BUILD_ROOT -name '*.la' -type f -print0 | xargs -0 rm -f ########################################################### %clean ########################################################### if [ -n "${RPM_BUILD_ROOT}" -a "${RPM_BUILD_ROOT}" != "/" ] then rm -rf $RPM_BUILD_ROOT fi rm -rf $RPM_BUILD_DIR/pacemaker ########################################################### %pre %preun # Use the following if more commands need to be executed # %post # /sbin/ldconfig # [...] # http://en.opensuse.org/SUSE_Package_Conventions/RPM_Macros %post -p /sbin/ldconfig %postun -p /sbin/ldconfig %files ########################################################### %defattr(-,root,root) %dir %{_libdir}/heartbeat %{_prefix}/share/pacemaker %{_prefix}/share/heartbeat %{_libdir}/heartbeat/* %dir %{_var}/lib/heartbeat %{_libdir}/libcib.so.* %{_libdir}/libcrmcommon.so.* %{_libdir}/libcrmcluster.so.* #%{_libdir}/heartbeat/crm_primitive.py %{_libdir}/libpe_status.so.* %{_libdir}/libpe_rules.so.* %{_libdir}/libpengine.so.* %{_libdir}/libtransitioner.so.* +%{_libdir}/libstonithd.so.* %{_sbindir}/cibadmin %{_sbindir}/crm_attribute %{_sbindir}/crm_diff %{_sbindir}/crm_failcount %{_sbindir}/crm_master %{_sbindir}/crm_mon %{_sbindir}/crm_sh %{_sbindir}/crm_resource %{_sbindir}/crm_standby %{_sbindir}/crm_uuid %{_sbindir}/crm_verify %{_sbindir}/crmadmin %{_sbindir}/iso8601 %{_sbindir}/ccm_tool %{_sbindir}/attrd_updater %{_sbindir}/ptest %doc %{_mandir}/man8/cibadmin.8* %doc %{_mandir}/man8/crm_resource.8* %dir %attr (750, %{uname}, %{gname}) %{_var}/lib/heartbeat/crm %dir %attr (750, %{uname}, %{gname}) %{_var}/lib/heartbeat/pengine %dir %attr (750, %{uname}, %{gname}) %{_var}/run/heartbeat/crm %if %with_ais_support %{_libexecdir}/lcrso/pacemaker.lcrso %endif %if %with_snmp_support == 1 /usr/share/snmp/mibs/LINUX-HA-MIB.mib %endif %files devel %defattr(-,root,root) #%doc %{_datadir}/doc/%{name}-%{version} %{_includedir}/pacemaker +%{_includedir}/heartbeat/fencing %{_libdir}/*.so %changelog pacemaker diff --git a/pengine/graph.c b/pengine/graph.c index 644aef1fae..030b4f1555 100644 --- a/pengine/graph.c +++ b/pengine/graph.c @@ -1,729 +1,723 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include gboolean update_action(action_t *action); gboolean update_action_states(GListPtr actions) { crm_debug_2("Updating %d actions", g_list_length(actions)); slist_iter( action, action_t, actions, lpc, update_action(action); ); return TRUE; } gboolean update_action(action_t *action) { int local_type = 0; int default_log_level = LOG_DEBUG_3; int log_level = default_log_level; gboolean changed = FALSE; do_crm_log(log_level, "Processing action %s: %s %s %s", action->uuid, action->optional?"optional":"required", action->runnable?"runnable":"unrunnable", action->pseudo?"pseudo":action->task); slist_iter( other, action_wrapper_t, action->actions_before, lpc, gboolean other_changed = FALSE; node_t *node = other->action->node; resource_t *other_rsc = other->action->rsc; enum rsc_role_e other_role = RSC_ROLE_UNKNOWN; if(other_rsc) { other_role = other_rsc->fns->state(other_rsc, TRUE); } if(other->type & pe_order_test) { log_level = LOG_NOTICE; do_crm_log(log_level, "Processing action %s: %s %s %s", action->uuid, action->optional?"optional":"required", action->runnable?"runnable":"unrunnable", action->pseudo?"pseudo":action->task); } else { log_level = default_log_level; } do_crm_log(log_level, " Checking action %s: %s %s %s (flags=0x%.6x)", other->action->uuid, other->action->optional?"optional":"required", other->action->runnable?"runnable":"unrunnable", other->action->pseudo?"pseudo":other->action->task, other->type); local_type = other->type; if((local_type & pe_order_demote_stop) && other->action->pseudo == FALSE && other_role > RSC_ROLE_SLAVE && node != NULL && node->details->online) { local_type |= pe_order_implies_left; do_crm_log(log_level,"Upgrading demote->stop constraint to implies_left"); } if((local_type & pe_order_demote) && other->action->pseudo == FALSE && other_role > RSC_ROLE_SLAVE && node != NULL && node->details->online) { local_type |= pe_order_runnable_left; do_crm_log(log_level,"Upgrading restart constraint to runnable_left"); } if((local_type & pe_order_complex_right) && action->optional && other->action->optional == FALSE) { local_type |= pe_order_implies_right; do_crm_log(log_level,"Upgrading complex constraint to implies_right"); } if((local_type & pe_order_complex_left) && action->optional == FALSE && other->action->optional) { local_type |= pe_order_implies_left; do_crm_log(log_level,"Upgrading complex constraint to implies_left"); } if((local_type & pe_order_shutdown) && action->optional && other->action->optional == FALSE && is_set(other_rsc->flags, pe_rsc_shutdown)) { action->optional = FALSE; changed = TRUE; do_crm_log(log_level-1, " * Marking action %s manditory because of %s (complex)", action->uuid, other->action->uuid); } if((local_type & pe_order_restart) && other_role > RSC_ROLE_STOPPED) { if(other_rsc->variant == pe_native) { local_type |= pe_order_implies_left; do_crm_log(log_level,"Upgrading restart constraint to implies_left"); } if(other->action->optional && other->action->runnable && action->runnable == FALSE) { do_crm_log(log_level-1, " * Marking action %s manditory because %s is unrunnable", other->action->uuid, action->uuid); other->action->optional = FALSE; set_bit(other_rsc->flags, pe_rsc_shutdown); other_changed = TRUE; } } if((local_type & pe_order_runnable_left) && other->action->runnable == FALSE) { if(other->action->implied_by_stonith) { do_crm_log(log_level, "Ignoring un-runnable - implied_by_stonith"); } else if(action->runnable == FALSE) { do_crm_log(log_level+1, "Already un-runnable"); } else { action->runnable = FALSE; do_crm_log(log_level-1, " * Marking action %s un-runnable because of %s", action->uuid, other->action->uuid); changed = TRUE; } } if((local_type & pe_order_runnable_right) && action->runnable == FALSE) { if(action->pseudo) { do_crm_log(log_level, "Ignoring un-runnable - pseudo"); } else if(other->action->runnable == FALSE) { do_crm_log(log_level+1, "Already un-runnable"); } else { other->action->runnable = FALSE; do_crm_log(log_level-1, " * Marking action %s un-runnable because of %s", other->action->uuid, action->uuid); other_changed = TRUE; } } if(local_type & pe_order_implies_left) { if(other->action->optional == FALSE) { /* nothing to do */ do_crm_log(log_level+1, " Ignoring implies left - redundant"); } else if(safe_str_eq(other->action->task, CRMD_ACTION_STOP) && other_role == RSC_ROLE_STOPPED) { do_crm_log(log_level-1, " Ignoring implies left - %s already stopped", other_rsc->id); } else if((local_type & pe_order_demote) && other_rsc->role < RSC_ROLE_MASTER) { do_crm_log(log_level-1, " Ignoring implies left - %s already demoted", other_rsc->id); } else if(action->optional == FALSE) { other->action->optional = FALSE; do_crm_log(log_level-1, " * (implies left) Marking action %s mandatory because of %s", other->action->uuid, action->uuid); other_changed = TRUE; } else { do_crm_log(log_level, " Ignoring implies left"); } } if(local_type & pe_order_implies_left_printed) { if(other->action->optional == TRUE && other->action->print_always == FALSE) { if(action->optional == FALSE || (other->action->pseudo && action->print_always)) { other_changed = TRUE; other->action->print_always = TRUE; do_crm_log(log_level-1, " * (implies left) Ensuring action %s is included because of %s", other->action->uuid, action->uuid); } } } if(local_type & pe_order_implies_right) { if(action->optional == FALSE) { /* nothing to do */ do_crm_log(log_level+1, " Ignoring implies right - redundant"); } else if(other->action->optional == FALSE) { action->optional = FALSE; do_crm_log(log_level-1, " * (implies right) Marking action %s mandatory because of %s", action->uuid, other->action->uuid); changed = TRUE; } else { do_crm_log(log_level, " Ignoring implies right"); } } if(local_type & pe_order_implies_right_printed) { if(action->optional == TRUE && action->print_always == FALSE) { if(other->action->optional == FALSE || (action->pseudo && other->action->print_always)) { changed = TRUE; action->print_always = TRUE; do_crm_log(log_level-1, " * (implies right) Ensuring action %s is included because of %s", action->uuid, other->action->uuid); } } } if(other_changed) { do_crm_log(log_level, "%s changed, processing after list", other->action->uuid); update_action(other->action); slist_iter( before_other, action_wrapper_t, other->action->actions_after, lpc2, do_crm_log(log_level, "%s changed, processing %s", other->action->uuid, before_other->action->uuid); update_action(before_other->action); ); slist_iter( before_other, action_wrapper_t, other->action->actions_before, lpc2, do_crm_log(log_level, "%s changed, processing %s", other->action->uuid, before_other->action->uuid); update_action(before_other->action); ); } ); if(changed) { update_action(action); do_crm_log(log_level, "%s changed, processing after list", action->uuid); slist_iter( other, action_wrapper_t, action->actions_after, lpc, do_crm_log(log_level, "%s changed, processing %s", action->uuid, other->action->uuid); update_action(other->action); ); do_crm_log(log_level, "%s changed, processing before list", action->uuid); slist_iter( other, action_wrapper_t, action->actions_before, lpc, do_crm_log(log_level, "%s changed, processing %s", action->uuid, other->action->uuid); update_action(other->action); ); } return FALSE; } gboolean shutdown_constraints( node_t *node, action_t *shutdown_op, pe_working_set_t *data_set) { /* add the stop to the before lists so it counts as a pre-req * for the shutdown */ slist_iter( rsc, resource_t, node->details->running_rsc, lpc, if(is_not_set(rsc->flags, pe_rsc_managed)) { continue; } custom_action_order( rsc, stop_key(rsc), NULL, NULL, crm_strdup(CRM_OP_SHUTDOWN), shutdown_op, pe_order_implies_left, data_set); ); return TRUE; } gboolean stonith_constraints( node_t *node, action_t *stonith_op, pe_working_set_t *data_set) { CRM_CHECK(stonith_op != NULL, return FALSE); /* * Make sure the stonith OP occurs before we start any shared resources */ if(stonith_op != NULL) { slist_iter( rsc, resource_t, data_set->resources, lpc, rsc->cmds->stonith_ordering(rsc, stonith_op, data_set); ); } /* add the stonith OP as a stop pre-req and the mark the stop * as a pseudo op - since its now redundant */ return TRUE; } static void dup_attr(gpointer key, gpointer value, gpointer user_data) { g_hash_table_replace(user_data, crm_strdup(key), crm_strdup(value)); } crm_data_t * action2xml(action_t *action, gboolean as_input) { gboolean needs_node_info = TRUE; crm_data_t * action_xml = NULL; crm_data_t * args_xml = NULL; char *action_id_s = NULL; if(action == NULL) { return NULL; } crm_debug_4("Dumping action %d as XML", action->id); if(safe_str_eq(action->task, CRM_OP_FENCE)) { action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); /* needs_node_info = FALSE; */ } else if(safe_str_eq(action->task, CRM_OP_SHUTDOWN)) { action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); } else if(safe_str_eq(action->task, CRM_OP_LRM_REFRESH)) { action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); /* } else if(safe_str_eq(action->task, CRMD_ACTION_PROBED)) { */ /* action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); */ } else if(action->pseudo) { action_xml = create_xml_node(NULL, XML_GRAPH_TAG_PSEUDO_EVENT); needs_node_info = FALSE; } else { action_xml = create_xml_node(NULL, XML_GRAPH_TAG_RSC_OP); } action_id_s = crm_itoa(action->id); crm_xml_add(action_xml, XML_ATTR_ID, action_id_s); crm_free(action_id_s); crm_xml_add(action_xml, XML_LRM_ATTR_TASK, action->task); if(action->rsc != NULL && action->rsc->clone_name != NULL) { char *clone_key = NULL; const char *interval_s = g_hash_table_lookup(action->meta, "interval"); int interval = crm_parse_int(interval_s, "0"); if(safe_str_eq(action->task, CRMD_ACTION_NOTIFY)) { const char *n_type = g_hash_table_lookup( action->extra, crm_meta_name("notify_type")); const char *n_task = g_hash_table_lookup( action->extra, crm_meta_name("notify_operation")); CRM_CHECK(n_type != NULL, ;); CRM_CHECK(n_task != NULL, ;); clone_key = generate_notify_key(action->rsc->clone_name, n_type, n_task); } else { clone_key = generate_op_key(action->rsc->clone_name, action->task, interval); } crm_xml_add(action_xml, XML_LRM_ATTR_TASK_KEY, clone_key); crm_xml_add(action_xml, "internal_"XML_LRM_ATTR_TASK_KEY, action->uuid); crm_free(clone_key); } else { crm_xml_add(action_xml, XML_LRM_ATTR_TASK_KEY, action->uuid); } if(needs_node_info && action->node != NULL) { crm_xml_add(action_xml, XML_LRM_ATTR_TARGET, action->node->details->uname); crm_xml_add(action_xml, XML_LRM_ATTR_TARGET_UUID, action->node->details->id); } if(action->failure_is_fatal == FALSE) { add_hash_param(action->meta, XML_ATTR_TE_ALLOWFAIL, XML_BOOLEAN_TRUE); } if(as_input) { return action_xml; } if(action->notify_keys != NULL) { g_hash_table_foreach( action->notify_keys, dup_attr, action->meta); } if(action->rsc != NULL && action->pseudo == FALSE) { int lpc = 0; crm_data_t *rsc_xml = create_xml_node( action_xml, crm_element_name(action->rsc->xml)); const char *attr_list[] = { XML_AGENT_ATTR_CLASS, XML_AGENT_ATTR_PROVIDER, XML_ATTR_TYPE }; if(action->rsc->clone_name != NULL) { crm_debug("Using clone name %s for %s", action->rsc->clone_name, action->rsc->id); crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->clone_name); crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->id); } else { crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->id); crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->long_name); } for(lpc = 0; lpc < DIMOF(attr_list); lpc++) { crm_xml_add(rsc_xml, attr_list[lpc], g_hash_table_lookup(action->rsc->meta, attr_list[lpc])); } } args_xml = create_xml_node(action_xml, XML_TAG_ATTRS); crm_xml_add(args_xml, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); g_hash_table_foreach(action->extra, hash2field, args_xml); if(action->rsc != NULL && safe_str_neq(action->task, CRMD_ACTION_STOP)) { g_hash_table_foreach(action->rsc->parameters, hash2field, args_xml); } g_hash_table_foreach(action->meta, hash2metafield, args_xml); if(action->rsc != NULL) { int lpc = 0; const char *key = NULL; const char *value = NULL; const char *meta_list[] = { XML_RSC_ATTR_UNIQUE, XML_RSC_ATTR_INCARNATION, XML_RSC_ATTR_INCARNATION_MAX, XML_RSC_ATTR_INCARNATION_NODEMAX, XML_RSC_ATTR_MASTER_MAX, XML_RSC_ATTR_MASTER_NODEMAX, }; for(lpc = 0; lpc < DIMOF(meta_list); lpc++) { key = meta_list[lpc]; value = g_hash_table_lookup(action->rsc->meta, key); if(value != NULL) { char *crm_name = crm_concat(CRM_META, key, '_'); crm_xml_add(args_xml, crm_name, value); crm_free(crm_name); } } } crm_log_xml_debug_4(action_xml, "dumped action"); return action_xml; } static gboolean should_dump_action(action_t *action) { int log_filter = LOG_DEBUG_5; CRM_CHECK(action != NULL, return FALSE); if(action->dumped) { do_crm_log(log_filter, "action %d (%s) was already dumped", action->id, action->uuid); return FALSE; } else if(action->runnable == FALSE) { do_crm_log(log_filter, "action %d (%s) was not runnable", action->id, action->uuid); return FALSE; } else if(action->optional && action->print_always == FALSE) { do_crm_log(log_filter, "action %d (%s) was optional", action->id, action->uuid); return FALSE; } else if(action->rsc != NULL && is_not_set(action->rsc->flags, pe_rsc_managed)) { const char * interval = NULL; interval = g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL); - /* make sure probes go through */ - if(safe_str_neq(action->task, CRMD_ACTION_STATUS)) { - pe_warn("action %d (%s) was for an unmanaged resource (%s)", - action->id, action->uuid, action->rsc->id); - return FALSE; - } - - if(interval != NULL && safe_str_neq(interval, "0")) { + /* make sure probes and recurring monitors go through */ + if(safe_str_neq(action->task, CRMD_ACTION_STATUS) && interval == NULL) { pe_warn("action %d (%s) was for an unmanaged resource (%s)", action->id, action->uuid, action->rsc->id); return FALSE; } } if(action->pseudo || safe_str_eq(action->task, CRM_OP_FENCE) || safe_str_eq(action->task, CRM_OP_SHUTDOWN)) { /* skip the next checks */ return TRUE; } if(action->node == NULL) { pe_err("action %d (%s) was not allocated", action->id, action->uuid); log_action(LOG_DEBUG, "Unallocated action", action, FALSE); return FALSE; } else if(action->node->details->online == FALSE) { pe_err("action %d was (%s) scheduled for offline node", action->id, action->uuid); log_action(LOG_DEBUG, "Action for offline node", action, FALSE); return FALSE; #if 0 /* but this would also affect resources that can be safely * migrated before a fencing op */ } else if(action->node->details->unclean == FALSE) { pe_err("action %d was (%s) scheduled for unclean node", action->id, action->uuid); log_action(LOG_DEBUG, "Action for unclean node", action, FALSE); return FALSE; #endif } return TRUE; } /* lowest to highest */ static gint sort_action_id(gconstpointer a, gconstpointer b) { const action_wrapper_t *action_wrapper2 = (const action_wrapper_t*)a; const action_wrapper_t *action_wrapper1 = (const action_wrapper_t*)b; if(a == NULL) { return 1; } if(b == NULL) { return -1; } if(action_wrapper1->action->id > action_wrapper2->action->id) { return -1; } if(action_wrapper1->action->id < action_wrapper2->action->id) { return 1; } return 0; } static gboolean should_dump_input(int last_action, action_t *action, action_wrapper_t *wrapper) { int type = wrapper->type; int log_dump = LOG_DEBUG_3; int log_filter = LOG_DEBUG_3; type &= ~pe_order_implies_left_printed; type &= ~pe_order_implies_right_printed; type &= ~pe_order_optional; wrapper->state = pe_link_not_dumped; if(last_action == wrapper->action->id) { do_crm_log(log_filter, "Input (%d) %s duplicated for %s", wrapper->action->id, wrapper->action->uuid, action->uuid); wrapper->state = pe_link_dup; return FALSE; } else if(wrapper->type == pe_order_none) { do_crm_log(log_filter, "Input (%d) %s suppressed for %s", wrapper->action->id, wrapper->action->uuid, action->uuid); return FALSE; } else if(wrapper->action->runnable == FALSE && type == pe_order_none) { do_crm_log(log_filter, "Input (%d) %s optional (ordering) for %s", wrapper->action->id, wrapper->action->uuid, action->uuid); return FALSE; } else if(action->pseudo && (wrapper->type & pe_order_stonith_stop)) { do_crm_log(log_filter, "Input (%d) %s suppressed for %s", wrapper->action->id, wrapper->action->uuid, action->uuid); return FALSE; } else if(wrapper->action->dumped || should_dump_action(wrapper->action)) { do_crm_log(log_dump, "Input (%d) %s should be dumped for %s", wrapper->action->id, wrapper->action->uuid, action->uuid); goto dump; #if 0 } if(wrapper->action->runnable && wrapper->action->pseudo && wrapper->action->rsc->variant != pe_native) { do_crm_log(LOG_CRIT, "Input (%d) %s should be dumped for %s", wrapper->action->id, wrapper->action->uuid, action->uuid); goto dump; #endif } else if(wrapper->action->optional == TRUE && wrapper->action->print_always == FALSE) { do_crm_log(log_filter, "Input (%d) %s optional for %s", wrapper->action->id, wrapper->action->uuid, action->uuid); do_crm_log(log_filter, "Input (%d) %s n=%p p=%d r=%d o=%d a=%d f=0x%.6x", wrapper->action->id, wrapper->action->uuid, wrapper->action->node, wrapper->action->pseudo, wrapper->action->runnable, wrapper->action->optional, wrapper->action->print_always, wrapper->type); return FALSE; } dump: do_crm_log(log_dump, "Input (%d) %s n=%p p=%d r=%d o=%d a=%d f=0x%.6x dumped for %s", wrapper->action->id, wrapper->action->uuid, wrapper->action->node, wrapper->action->pseudo, wrapper->action->runnable, wrapper->action->optional, wrapper->action->print_always, wrapper->type, action->uuid); return TRUE; } void graph_element_from_action(action_t *action, pe_working_set_t *data_set) { int last_action = -1; int synapse_priority = 0; crm_data_t * syn = NULL; crm_data_t * set = NULL; crm_data_t * in = NULL; crm_data_t * input = NULL; crm_data_t * xml_action = NULL; if(should_dump_action(action) == FALSE) { return; } action->dumped = TRUE; syn = create_xml_node(data_set->graph, "synapse"); set = create_xml_node(syn, "action_set"); in = create_xml_node(syn, "inputs"); crm_xml_add_int(syn, XML_ATTR_ID, data_set->num_synapse); data_set->num_synapse++; if(action->rsc != NULL) { synapse_priority = action->rsc->priority; } if(action->priority > synapse_priority) { synapse_priority = action->priority; } if(synapse_priority > 0) { crm_xml_add_int(syn, XML_CIB_ATTR_PRIORITY, synapse_priority); } xml_action = action2xml(action, FALSE); add_node_nocopy(set, crm_element_name(xml_action), xml_action); action->actions_before = g_list_sort( action->actions_before, sort_action_id); slist_iter(wrapper,action_wrapper_t,action->actions_before,lpc, if(should_dump_input(last_action, action, wrapper) == FALSE) { continue; } wrapper->state = pe_link_dumped; CRM_CHECK(last_action < wrapper->action->id, ;); last_action = wrapper->action->id; input = create_xml_node(in, "trigger"); xml_action = action2xml(wrapper->action, TRUE); add_node_nocopy(input, crm_element_name(xml_action), xml_action); ); } diff --git a/pengine/native.c b/pengine/native.c index ef9f56d627..c52bb0b196 100644 --- a/pengine/native.c +++ b/pengine/native.c @@ -1,2085 +1,2087 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #define DELETE_THEN_REFRESH 1 /* The crmd will remove the resource from the CIB itself, making this redundant */ #define VARIANT_NATIVE 1 #include gboolean at_stack_bottom(resource_t *rsc); void node_list_update(GListPtr list1, GListPtr list2, int factor); void native_rsc_colocation_rh_must(resource_t *rsc_lh, gboolean update_lh, resource_t *rsc_rh, gboolean update_rh); void native_rsc_colocation_rh_mustnot(resource_t *rsc_lh, gboolean update_lh, resource_t *rsc_rh, gboolean update_rh); void create_notifications(resource_t *rsc, pe_working_set_t *data_set); void Recurring(resource_t *rsc, action_t *start, node_t *node, pe_working_set_t *data_set); void RecurringOp(resource_t *rsc, action_t *start, node_t *node, crm_data_t *operation, pe_working_set_t *data_set); void pe_pre_notify( resource_t *rsc, node_t *node, action_t *op, notify_data_t *n_data, pe_working_set_t *data_set); void pe_post_notify( resource_t *rsc, node_t *node, action_t *op, notify_data_t *n_data, pe_working_set_t *data_set); void NoRoleChange (resource_t *rsc, node_t *current, node_t *next, pe_working_set_t *data_set); gboolean DeleteRsc (resource_t *rsc, node_t *node, gboolean optional, pe_working_set_t *data_set); gboolean StopRsc (resource_t *rsc, node_t *next, gboolean optional, pe_working_set_t *data_set); gboolean StartRsc (resource_t *rsc, node_t *next, gboolean optional, pe_working_set_t *data_set); gboolean DemoteRsc (resource_t *rsc, node_t *next, gboolean optional, pe_working_set_t *data_set); gboolean PromoteRsc(resource_t *rsc, node_t *next, gboolean optional, pe_working_set_t *data_set); gboolean RoleError (resource_t *rsc, node_t *next, gboolean optional, pe_working_set_t *data_set); gboolean NullOp (resource_t *rsc, node_t *next, gboolean optional, pe_working_set_t *data_set); enum rsc_role_e rsc_state_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = { /* Current State */ /* Next State: Unknown Stopped Started Slave Master */ /* Unknown */ { RSC_ROLE_UNKNOWN, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, }, /* Stopped */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STARTED, RSC_ROLE_SLAVE, RSC_ROLE_SLAVE, }, /* Started */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STARTED, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, }, /* Slave */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_UNKNOWN, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, }, /* Master */ { RSC_ROLE_STOPPED, RSC_ROLE_SLAVE, RSC_ROLE_UNKNOWN, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, }, }; gboolean (*rsc_action_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX])(resource_t*,node_t*,gboolean,pe_working_set_t*) = { /* Current State */ /* Next State: Unknown Stopped Started Slave Master */ /* Unknown */ { RoleError, StopRsc, RoleError, RoleError, RoleError, }, /* Stopped */ { RoleError, NullOp, StartRsc, StartRsc, RoleError, }, /* Started */ { RoleError, StopRsc, NullOp, NullOp, PromoteRsc, }, /* Slave */ { RoleError, StopRsc, RoleError, NullOp, PromoteRsc, }, /* Master */ { RoleError, RoleError, RoleError, DemoteRsc, NullOp, }, }; static gboolean native_choose_node(resource_t *rsc) { /* 1. Sort by weight 2. color.chosen_node = the node (of those with the highest wieght) with the fewest resources 3. remove color.chosen_node from all other colors */ GListPtr nodes = NULL; node_t *chosen = NULL; if(is_not_set(rsc->flags, pe_rsc_provisional)) { return rsc->allocated_to?TRUE:FALSE; } crm_debug_3("Choosing node for %s from %d candidates", rsc->id, g_list_length(rsc->allowed_nodes)); if(rsc->allowed_nodes) { rsc->allowed_nodes = g_list_sort( rsc->allowed_nodes, sort_node_weight); nodes = rsc->allowed_nodes; chosen = g_list_nth_data(nodes, 0); } return native_assign_node(rsc, nodes, chosen); } GListPtr native_merge_weights( resource_t *rsc, const char *rhs, GListPtr nodes, int factor, gboolean allow_rollback) { GListPtr archive = NULL; if(is_set(rsc->flags, pe_rsc_merging)) { crm_info("%s: Breaking dependancy loop", rhs); return nodes; } else if(is_not_set(rsc->flags, pe_rsc_provisional)) { crm_debug_4("%s: not provisional", rsc->id); return nodes; } set_bit(rsc->flags, pe_rsc_merging); crm_debug_2("%s: Combining scores from %s", rhs, rsc->id); if(allow_rollback) { archive = node_list_dup(nodes, FALSE, FALSE); } #if 0 node_list_update(nodes, rsc->allowed_nodes, factor); #else /* turn this off once we switch to migration-threshold */ { GListPtr tmp = node_list_dup(rsc->allowed_nodes, FALSE, FALSE); slist_iter( node, node_t, tmp, lpc, if(node->weight < 0 && node->weight > -INFINITY) { /* Once a dependant's score goes below zero, force the node score to -INFINITY * * This prevents the colocation sets from being partially active in scenarios * where it could be fully active elsewhere * * If we don't do this, then the next resource's stickiness might bring * the combined score above 0 again - which confuses the PE into thinking * the whole colocation set can run there but is pointless since the later children * are not be able to run if the ones before them can't */ node->weight = -INFINITY; } ); node_list_update(nodes, tmp, factor); pe_free_shallow_adv(tmp, TRUE); } #endif if(archive && can_run_any(nodes) == FALSE) { crm_debug("%s: Rolling back scores from %s", rhs, rsc->id); pe_free_shallow_adv(nodes, TRUE); nodes = archive; goto bail; } pe_free_shallow_adv(archive, TRUE); slist_iter( constraint, rsc_colocation_t, rsc->rsc_cons_lhs, lpc, nodes = constraint->rsc_lh->cmds->merge_weights( constraint->rsc_lh, rhs, nodes, constraint->score/INFINITY, allow_rollback); ); bail: clear_bit(rsc->flags, pe_rsc_merging); return nodes; } node_t * native_color(resource_t *rsc, pe_working_set_t *data_set) { int alloc_details = scores_log_level; if(rsc->parent && is_not_set(rsc->parent->flags, pe_rsc_allocating)) { /* never allocate children on their own */ crm_debug("Escalating allocation of %s to its parent: %s", rsc->id, rsc->parent->id); rsc->parent->cmds->color(rsc->parent, data_set); } if(is_not_set(rsc->flags, pe_rsc_provisional)) { return rsc->allocated_to; } if(is_set(rsc->flags, pe_rsc_allocating)) { crm_debug("Dependancy loop detected involving %s", rsc->id); return NULL; } set_bit(rsc->flags, pe_rsc_allocating); print_resource(alloc_details+1, "Allocating: ", rsc, FALSE); dump_node_scores(alloc_details+1, rsc, "Pre-allloc", rsc->allowed_nodes); slist_iter( constraint, rsc_colocation_t, rsc->rsc_cons, lpc, resource_t *rsc_rh = constraint->rsc_rh; crm_debug_2("%s: Pre-Processing %s (%s)", rsc->id, constraint->id, rsc_rh->id); rsc_rh->cmds->color(rsc_rh, data_set); rsc->cmds->rsc_colocation_lh(rsc, rsc_rh, constraint); ); dump_node_scores(alloc_details+1, rsc, "Post-coloc", rsc->allowed_nodes); slist_iter( constraint, rsc_colocation_t, rsc->rsc_cons_lhs, lpc, rsc->allowed_nodes = constraint->rsc_lh->cmds->merge_weights( constraint->rsc_lh, rsc->id, rsc->allowed_nodes, constraint->score/INFINITY, TRUE); ); print_resource(LOG_DEBUG_2, "Allocating: ", rsc, FALSE); if(rsc->next_role == RSC_ROLE_STOPPED) { crm_debug_2("Making sure %s doesn't get allocated", rsc->id); /* make sure it doesnt come up again */ resource_location( rsc, NULL, -INFINITY, "target_role", data_set); } dump_node_scores(alloc_details, rsc, __PRETTY_FUNCTION__, rsc->allowed_nodes); if(is_set(rsc->flags, pe_rsc_provisional) && native_choose_node(rsc) ) { crm_debug_3("Allocated resource %s to %s", rsc->id, rsc->allocated_to->details->uname); } else if(rsc->allocated_to == NULL) { if(is_not_set(rsc->flags, pe_rsc_orphan)) { pe_warn("Resource %s cannot run anywhere", rsc->id); } else if(rsc->running_on != NULL) { crm_info("Stopping orphan resource %s", rsc->id); } } else { crm_debug("Pre-Allocated resource %s to %s", rsc->id, rsc->allocated_to->details->uname); } clear_bit(rsc->flags, pe_rsc_allocating); print_resource(LOG_DEBUG_3, "Allocated ", rsc, TRUE); return rsc->allocated_to; } static gboolean is_op_dup( resource_t *rsc, const char *name, const char *interval) { gboolean dup = FALSE; const char *id = NULL; const char *value = NULL; xml_child_iter_filter( rsc->ops_xml, operation, "op", value = crm_element_value(operation, "name"); if(safe_str_neq(value, name)) { continue; } value = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); if(value == NULL) { value = "0"; } if(safe_str_neq(value, interval)) { continue; } if(id == NULL) { id = ID(operation); } else { crm_config_err("Operation %s is a duplicate of %s", ID(operation), id); crm_config_err("Do not use the same (name, interval) combination more than once per resource"); dup = TRUE; } ); return dup; } void RecurringOp(resource_t *rsc, action_t *start, node_t *node, crm_data_t *operation, pe_working_set_t *data_set) { char *key = NULL; const char *name = NULL; const char *value = NULL; const char *interval = NULL; const char *node_uname = NULL; int interval_ms = 0; action_t *mon = NULL; gboolean is_optional = TRUE; GListPtr possible_matches = NULL; crm_debug_2("Creating recurring action %s for %s", ID(operation), rsc->id); if(node != NULL) { node_uname = node->details->uname; } interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); interval_ms = crm_get_msec(interval); if(interval_ms == 0) { return; } else if(interval_ms < 0) { crm_config_warn("%s contains an invalid interval: %s", ID(operation), interval); return; } value = crm_element_value(operation, "disabled"); if(crm_is_true(value)) { return; } name = crm_element_value(operation, "name"); if(is_op_dup(rsc, name, interval)) { return; } key = generate_op_key(rsc->id, name, interval_ms); if(start != NULL) { crm_debug_3("Marking %s %s due to %s", key, start->optional?"optional":"manditory", start->uuid); is_optional = start->optional; } else { crm_debug_2("Marking %s optional", key); is_optional = TRUE; } /* start a monitor for an already active resource */ possible_matches = find_actions_exact(rsc->actions, key, node); if(possible_matches == NULL) { is_optional = FALSE; crm_debug_3("Marking %s manditory: not active", key); } else { g_list_free(possible_matches); } value = crm_element_value(operation, "role"); if((rsc->next_role == RSC_ROLE_MASTER && value == NULL) || (value != NULL && text2role(value) != rsc->next_role)) { int log_level = LOG_DEBUG_2; const char *result = "Ignoring"; if(is_optional) { char *local_key = crm_strdup(key); log_level = LOG_INFO; result = "Cancelling"; /* its running : cancel it */ mon = custom_action( rsc, local_key, CRMD_ACTION_CANCEL, node, FALSE, TRUE, data_set); crm_free(mon->task); mon->task = crm_strdup(CRMD_ACTION_CANCEL); add_hash_param(mon->meta, XML_LRM_ATTR_INTERVAL, interval); add_hash_param(mon->meta, XML_LRM_ATTR_TASK, name); local_key = NULL; switch(rsc->role) { case RSC_ROLE_SLAVE: case RSC_ROLE_STARTED: if(rsc->next_role == RSC_ROLE_MASTER) { local_key = promote_key(rsc); } else if(rsc->next_role == RSC_ROLE_STOPPED) { local_key = stop_key(rsc); } break; case RSC_ROLE_MASTER: local_key = demote_key(rsc); break; default: break; } if(local_key) { custom_action_order(rsc, NULL, mon, rsc, local_key, NULL, pe_order_runnable_left, data_set); } mon = NULL; } do_crm_log(log_level, "%s action %s (%s vs. %s)", result , key, value?value:role2text(RSC_ROLE_SLAVE), role2text(rsc->next_role)); crm_free(key); key = NULL; return; } mon = custom_action(rsc, key, name, node, is_optional, TRUE, data_set); key = mon->uuid; if(is_optional) { crm_debug_2("%s\t %s (optional)", crm_str(node_uname), mon->uuid); } if(start == NULL || start->runnable == FALSE) { crm_debug("%s\t %s (cancelled : start un-runnable)", crm_str(node_uname), mon->uuid); mon->runnable = FALSE; } else if(node == NULL || node->details->online == FALSE || node->details->unclean) { crm_debug("%s\t %s (cancelled : no node available)", crm_str(node_uname), mon->uuid); mon->runnable = FALSE; } else if(mon->optional == FALSE) { crm_notice(" Start recurring %s (%ds) for %s on %s", mon->task, interval_ms/1000, rsc->id, crm_str(node_uname)); } custom_action_order(rsc, start_key(rsc), NULL, NULL, crm_strdup(key), mon, pe_order_implies_right|pe_order_runnable_left, data_set); if(rsc->next_role == RSC_ROLE_MASTER) { char *running_master = crm_itoa(EXECRA_RUNNING_MASTER); add_hash_param(mon->meta, XML_ATTR_TE_TARGET_RC, running_master); custom_action_order( rsc, promote_key(rsc), NULL, rsc, NULL, mon, pe_order_optional|pe_order_runnable_left, data_set); crm_free(running_master); } else if(rsc->role == RSC_ROLE_MASTER) { custom_action_order( rsc, demote_key(rsc), NULL, rsc, NULL, mon, pe_order_optional|pe_order_runnable_left, data_set); } } void Recurring(resource_t *rsc, action_t *start, node_t *node, pe_working_set_t *data_set) { xml_child_iter_filter( rsc->ops_xml, operation, "op", RecurringOp(rsc, start, node, operation, data_set); ); } void native_create_actions(resource_t *rsc, pe_working_set_t *data_set) { action_t *start = NULL; node_t *chosen = NULL; enum rsc_role_e role = RSC_ROLE_UNKNOWN; enum rsc_role_e next_role = RSC_ROLE_UNKNOWN; crm_debug_2("Creating actions for %s", rsc->id); chosen = rsc->allocated_to; if(chosen != NULL) { CRM_CHECK(rsc->next_role != RSC_ROLE_UNKNOWN, rsc->next_role = RSC_ROLE_STARTED); } unpack_instance_attributes( rsc->xml, XML_TAG_ATTR_SETS, chosen?chosen->details->attrs:NULL, rsc->parameters, NULL, data_set->now); crm_debug_2("%s: %s->%s", rsc->id, role2text(rsc->role), role2text(rsc->next_role)); if(g_list_length(rsc->running_on) > 1) { if(rsc->recovery_type == recovery_stop_start) { pe_proc_err("Attempting recovery of resource %s", rsc->id); if(rsc->role == RSC_ROLE_MASTER) { DemoteRsc(rsc, NULL, FALSE, data_set); } StopRsc(rsc, NULL, FALSE, data_set); rsc->role = RSC_ROLE_STOPPED; } } else if(rsc->running_on != NULL) { node_t *current = rsc->running_on->data; NoRoleChange(rsc, current, chosen, data_set); } else if(rsc->role == RSC_ROLE_STOPPED && rsc->next_role == RSC_ROLE_STOPPED) { char *key = start_key(rsc); GListPtr possible_matches = find_actions(rsc->actions, key, NULL); slist_iter( action, action_t, possible_matches, lpc, action->optional = TRUE; /* action->pseudo = TRUE; */ ); g_list_free(possible_matches); crm_debug_2("Stopping a stopped resource"); crm_free(key); - return; + goto do_recurring; + } else if(rsc->role != RSC_ROLE_STOPPED) { /* A cheap trick to account for the fact that Master/Slave groups may not be * completely running when we set their role to Slave */ crm_debug_2("Resetting %s.role = %s (was %s)", rsc->id, role2text(RSC_ROLE_STOPPED), role2text(rsc->role)); rsc->role = RSC_ROLE_STOPPED; } role = rsc->role; while(role != rsc->next_role) { next_role = rsc_state_matrix[role][rsc->next_role]; crm_debug_2("Executing: %s->%s (%s)", role2text(role), role2text(next_role), rsc->id); if(rsc_action_matrix[role][next_role]( rsc, chosen, FALSE, data_set) == FALSE) { break; } role = next_role; } - if(rsc->next_role != RSC_ROLE_STOPPED && is_set(rsc->flags, pe_rsc_managed)) { + do_recurring: + if(rsc->next_role != RSC_ROLE_STOPPED || is_set(rsc->flags, pe_rsc_managed) == FALSE) { start = start_action(rsc, chosen, TRUE); Recurring(rsc, start, chosen, data_set); } } void native_internal_constraints(resource_t *rsc, pe_working_set_t *data_set) { int type = pe_order_optional; const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set); if(rsc->variant == pe_native) { type |= pe_order_implies_right; } if(rsc->parent == NULL || rsc->parent->variant == pe_group) { type |= pe_order_restart; } custom_action_order(rsc, stop_key(rsc), NULL, rsc, start_key(rsc), NULL, type, data_set); custom_action_order(rsc, demote_key(rsc), NULL, rsc, stop_key(rsc), NULL, pe_order_demote_stop, data_set); custom_action_order(rsc, start_key(rsc), NULL, rsc, promote_key(rsc), NULL, pe_order_runnable_left, data_set); custom_action_order( rsc, delete_key(rsc), NULL, rsc, start_key(rsc), NULL, pe_order_optional, data_set); if(is_set(rsc->flags, pe_rsc_notify)) { char *key1 = NULL; char *key2 = NULL; key1 = generate_op_key(rsc->id, "confirmed-post_notify_start", 0); key2 = generate_op_key(rsc->id, "pre_notify_promote", 0); custom_action_order( rsc, key1, NULL, rsc, key2, NULL, pe_order_optional, data_set); key1 = generate_op_key(rsc->id, "confirmed-post_notify_demote", 0); key2 = generate_op_key(rsc->id, "pre_notify_stop", 0); custom_action_order( rsc, key1, NULL, rsc, key2, NULL, pe_order_optional, data_set); } if(is_not_set(rsc->flags, pe_rsc_managed)) { crm_debug_3("Skipping fencing constraints for unmanaged resource: %s", rsc->id); return; } if(rsc->variant == pe_native && safe_str_neq(class, "stonith")) { custom_action_order( rsc, stop_key(rsc), NULL, NULL, crm_strdup(all_stopped->task), all_stopped, pe_order_implies_right|pe_order_runnable_left, data_set); } } void native_rsc_colocation_lh( resource_t *rsc_lh, resource_t *rsc_rh, rsc_colocation_t *constraint) { if(rsc_lh == NULL) { pe_err("rsc_lh was NULL for %s", constraint->id); return; } else if(constraint->rsc_rh == NULL) { pe_err("rsc_rh was NULL for %s", constraint->id); return; } crm_debug_2("Processing colocation constraint between %s and %s", rsc_lh->id, rsc_rh->id); rsc_rh->cmds->rsc_colocation_rh(rsc_lh, rsc_rh, constraint); } static gboolean filter_colocation_constraint( resource_t *rsc_lh, resource_t *rsc_rh, rsc_colocation_t *constraint) { int level = LOG_DEBUG_4; if(constraint->score == 0){ return FALSE; } if(constraint->score > 0 && constraint->role_lh != RSC_ROLE_UNKNOWN && constraint->role_lh != rsc_lh->next_role) { do_crm_log(level, "LH: Skipping constraint: \"%s\" state filter", role2text(constraint->role_rh)); return FALSE; } if(constraint->score > 0 && constraint->role_rh != RSC_ROLE_UNKNOWN && constraint->role_rh != rsc_rh->next_role) { do_crm_log(level, "RH: Skipping constraint: \"%s\" state filter", role2text(constraint->role_rh)); return FALSE; } if(constraint->score < 0 && constraint->role_lh != RSC_ROLE_UNKNOWN && constraint->role_lh == rsc_lh->next_role) { do_crm_log(level, "LH: Skipping -ve constraint: \"%s\" state filter", role2text(constraint->role_rh)); return FALSE; } if(constraint->score < 0 && constraint->role_rh != RSC_ROLE_UNKNOWN && constraint->role_rh == rsc_rh->next_role) { do_crm_log(level, "RH: Skipping -ve constraint: \"%s\" state filter", role2text(constraint->role_rh)); return FALSE; } return TRUE; } static void colocation_match( resource_t *rsc_lh, resource_t *rsc_rh, rsc_colocation_t *constraint) { const char *tmp = NULL; const char *value = NULL; gboolean do_check = FALSE; const char *attribute = "#id"; if(constraint->node_attribute != NULL) { attribute = constraint->node_attribute; } if(rsc_rh->allocated_to) { value = g_hash_table_lookup( rsc_rh->allocated_to->details->attrs, attribute); do_check = TRUE; } else if(constraint->score < 0) { /* nothing to do: * anti-colocation with something thats not running */ return; } slist_iter( node, node_t, rsc_lh->allowed_nodes, lpc, tmp = g_hash_table_lookup(node->details->attrs, attribute); if(do_check && safe_str_eq(tmp, value)) { if(constraint->score < INFINITY) { crm_debug_2("%s: %s.%s += %d", constraint->id, rsc_lh->id, node->details->uname, constraint->score); node->weight = merge_weights( constraint->score, node->weight); } } else if(do_check == FALSE || constraint->score >= INFINITY) { crm_debug_2("%s: %s.%s = -INFINITY (%s)", constraint->id, rsc_lh->id, node->details->uname, do_check?"failed":"unallocated"); node->weight = -INFINITY; } ); } void native_rsc_colocation_rh( resource_t *rsc_lh, resource_t *rsc_rh, rsc_colocation_t *constraint) { crm_debug_2("%sColocating %s with %s (%s, weight=%d)", constraint->score >= 0?"":"Anti-", rsc_lh->id, rsc_rh->id, constraint->id, constraint->score); if(filter_colocation_constraint(rsc_lh, rsc_rh, constraint) == FALSE) { return; } if(is_set(rsc_rh->flags, pe_rsc_provisional)) { return; } else if(is_not_set(rsc_lh->flags, pe_rsc_provisional)) { /* error check */ struct node_shared_s *details_lh; struct node_shared_s *details_rh; if((constraint->score > -INFINITY) && (constraint->score < INFINITY)) { return; } details_rh = rsc_rh->allocated_to?rsc_rh->allocated_to->details:NULL; details_lh = rsc_lh->allocated_to?rsc_lh->allocated_to->details:NULL; if(constraint->score == INFINITY && details_lh != details_rh) { crm_err("%s and %s are both allocated" " but to different nodes: %s vs. %s", rsc_lh->id, rsc_rh->id, details_lh?details_lh->uname:"n/a", details_rh?details_rh->uname:"n/a"); } else if(constraint->score == -INFINITY && details_lh == details_rh) { crm_err("%s and %s are both allocated" " but to the SAME node: %s", rsc_lh->id, rsc_rh->id, details_rh?details_rh->uname:"n/a"); } return; } else { colocation_match(rsc_lh, rsc_rh, constraint); } } void node_list_update(GListPtr list1, GListPtr list2, int factor) { node_t *other_node = NULL; slist_iter( node, node_t, list1, lpc, if(node == NULL) { continue; } other_node = (node_t*)pe_find_node_id(list2, node->details->id); if(other_node != NULL) { crm_debug_2("%s: %d + %d", node->details->uname, node->weight, other_node->weight); node->weight = merge_weights( factor*other_node->weight, node->weight); } ); } static GListPtr find_actions_by_task(GListPtr actions, resource_t *rsc, const char *original_key) { GListPtr list = NULL; list = find_actions(actions, original_key, NULL); if(list == NULL) { /* we're potentially searching a child of the original resource */ char *key = NULL; char *tmp = NULL; char *task = NULL; int interval = 0; CRM_CHECK(parse_op_key(original_key, &tmp, &task, &interval), crm_err("search key: %s", original_key); return NULL); key = generate_op_key(rsc->id, task, interval); list = find_actions(actions, key, NULL); crm_free(key); crm_free(tmp); crm_free(task); } return list; } void native_rsc_order_lh(resource_t *lh_rsc, order_constraint_t *order, pe_working_set_t *data_set) { GListPtr lh_actions = NULL; action_t *lh_action = order->lh_action; resource_t *rh_rsc = order->rh_rsc; crm_debug_3("Processing LH of ordering constraint %d", order->id); CRM_ASSERT(lh_rsc != NULL); if(lh_action != NULL) { lh_actions = g_list_append(NULL, lh_action); } else if(lh_action == NULL) { lh_actions = find_actions_by_task( lh_rsc->actions, lh_rsc, order->lh_action_task); } if(lh_actions == NULL && lh_rsc != rh_rsc) { char *key = NULL; char *rsc_id = NULL; char *op_type = NULL; int interval = 0; crm_debug_2("No LH-Side (%s/%s) found for constraint %d with %s - creating", lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task); parse_op_key( order->lh_action_task, &rsc_id, &op_type, &interval); key = generate_op_key(lh_rsc->id, op_type, interval); lh_action = custom_action(lh_rsc, key, op_type, NULL, TRUE, TRUE, data_set); if(lh_rsc->fns->state(lh_rsc, TRUE) == RSC_ROLE_STOPPED && safe_str_eq(op_type, CRMD_ACTION_STOP)) { lh_action->pseudo = TRUE; lh_action->runnable = TRUE; } lh_actions = g_list_append(NULL, lh_action); crm_free(op_type); crm_free(rsc_id); } slist_iter( lh_action_iter, action_t, lh_actions, lpc, if(rh_rsc == NULL && order->rh_action) { rh_rsc = order->rh_action->rsc; } if(rh_rsc) { rh_rsc->cmds->rsc_order_rh( lh_action_iter, rh_rsc, order); } else if(order->rh_action) { order_actions( lh_action_iter, order->rh_action, order->type); } ); pe_free_shallow_adv(lh_actions, FALSE); } void native_rsc_order_rh( action_t *lh_action, resource_t *rsc, order_constraint_t *order) { GListPtr rh_actions = NULL; action_t *rh_action = NULL; CRM_CHECK(rsc != NULL, return); CRM_CHECK(order != NULL, return); rh_action = order->rh_action; crm_debug_3("Processing RH of ordering constraint %d", order->id); if(rh_action != NULL) { rh_actions = g_list_append(NULL, rh_action); } else if(rsc != NULL) { rh_actions = find_actions_by_task( rsc->actions, rsc, order->rh_action_task); } if(rh_actions == NULL) { crm_debug_4("No RH-Side (%s/%s) found for constraint..." " ignoring", rsc->id,order->rh_action_task); if(lh_action) { crm_debug_4("LH-Side was: %s", lh_action->uuid); } return; } slist_iter( rh_action_iter, action_t, rh_actions, lpc, if(lh_action) { order_actions(lh_action, rh_action_iter, order->type); } else if(order->type & pe_order_implies_right) { rh_action_iter->runnable = FALSE; crm_warn("Unrunnable %s 0x%.6x", rh_action_iter->uuid, order->type); } else { crm_warn("neither %s 0x%.6x", rh_action_iter->uuid, order->type); } ); pe_free_shallow_adv(rh_actions, FALSE); } void native_rsc_location(resource_t *rsc, rsc_to_node_t *constraint) { GListPtr or_list; crm_debug_2("Applying %s (%s) to %s", constraint->id, role2text(constraint->role_filter), rsc->id); /* take "lifetime" into account */ if(constraint == NULL) { pe_err("Constraint is NULL"); return; } else if(rsc == NULL) { pe_err("LHS of rsc_to_node (%s) is NULL", constraint->id); return; } else if(constraint->role_filter > 0 && constraint->role_filter != rsc->next_role) { crm_debug("Constraint (%s) is not active (role : %s)", constraint->id, role2text(constraint->role_filter)); return; } else if(is_active(constraint) == FALSE) { crm_debug_2("Constraint (%s) is not active", constraint->id); return; } if(constraint->node_list_rh == NULL) { crm_debug_2("RHS of constraint %s is NULL", constraint->id); return; } or_list = node_list_or( rsc->allowed_nodes, constraint->node_list_rh, FALSE); pe_free_shallow(rsc->allowed_nodes); rsc->allowed_nodes = or_list; slist_iter(node, node_t, or_list, lpc, crm_debug_3("%s + %s : %d", rsc->id, node->details->uname, node->weight); ); } void native_expand(resource_t *rsc, pe_working_set_t *data_set) { crm_debug_3("Processing actions from %s", rsc->id); slist_iter( action, action_t, rsc->actions, lpc, crm_debug_4("processing action %d for rsc=%s", action->id, rsc->id); graph_element_from_action(action, data_set); ); slist_iter( child_rsc, resource_t, rsc->children, lpc, child_rsc->cmds->expand(child_rsc, data_set); ); } void create_notifications(resource_t *rsc, pe_working_set_t *data_set) { } static void register_activity(resource_t *rsc, enum action_tasks task, node_t *node, notify_data_t *n_data) { notify_entry_t *entry = NULL; if(node == NULL) { pe_proc_warn("%s has no node for required action %s", rsc->id, task2text(task)); return; } crm_malloc0(entry, sizeof(notify_entry_t)); entry->rsc = rsc; entry->node = node; switch(task) { case start_rsc: n_data->start = g_list_append(n_data->start, entry); break; case stop_rsc: n_data->stop = g_list_append(n_data->stop, entry); break; case action_promote: n_data->promote = g_list_append(n_data->promote, entry); break; case action_demote: n_data->demote = g_list_append(n_data->demote, entry); break; default: crm_err("Unsupported notify action: %s", task2text(task)); crm_free(entry); break; } } static void register_state(resource_t *rsc, node_t *on_node, notify_data_t *n_data) { notify_entry_t *entry = NULL; crm_malloc0(entry, sizeof(notify_entry_t)); entry->rsc = rsc; entry->node = on_node; crm_debug_2("%s state: %s", rsc->id, role2text(rsc->next_role)); switch(rsc->next_role) { case RSC_ROLE_STOPPED: /* n_data->inactive = g_list_append(n_data->inactive, entry); */ crm_free(entry); break; case RSC_ROLE_STARTED: n_data->active = g_list_append(n_data->active, entry); break; case RSC_ROLE_SLAVE: n_data->slave = g_list_append(n_data->slave, entry); break; case RSC_ROLE_MASTER: n_data->master = g_list_append(n_data->master, entry); break; default: crm_err("Unsupported notify role"); crm_free(entry); break; } } void complex_create_notify_element(resource_t *rsc, action_t *op, notify_data_t *n_data, pe_working_set_t *data_set) { node_t *next_node = NULL; gboolean registered = FALSE; char *op_key = NULL; GListPtr possible_matches = NULL; enum action_tasks task = text2task(op->task); if(rsc->children) { slist_iter( child_rsc, resource_t, rsc->children, lpc, child_rsc->cmds->create_notify_element( child_rsc, op, n_data, data_set); ); return; } if(op->pre_notify == NULL || op->post_notify == NULL) { /* no notifications required */ crm_debug_4("No notificaitons required for %s", op->task); return; } next_node = rsc->allocated_to; op_key = generate_op_key(rsc->id, op->task, 0); possible_matches = find_actions(rsc->actions, op_key, NULL); crm_debug_2("Creating notificaitons for: %s (%s->%s)", op->uuid, role2text(rsc->role), role2text(rsc->next_role)); if(rsc->role == rsc->next_role) { register_state(rsc, next_node, n_data); } slist_iter( local_op, action_t, possible_matches, lpc, local_op->notify_keys = n_data->keys; if(local_op->optional == FALSE) { registered = TRUE; register_activity(rsc, task, local_op->node, n_data); } ); /* stop / demote */ if(rsc->role != RSC_ROLE_STOPPED) { if(task == stop_rsc || task == action_demote) { slist_iter( current_node, node_t, rsc->running_on, lpc, pe_pre_notify(rsc, current_node, op, n_data, data_set); if(task == action_demote || registered == FALSE) { pe_post_notify(rsc, current_node, op, n_data, data_set); } ); } } /* start / promote */ if(rsc->next_role != RSC_ROLE_STOPPED) { CRM_CHECK(next_node != NULL,;); if(next_node == NULL) { pe_proc_err("next role: %s", role2text(rsc->next_role)); } else if(task == start_rsc || task == action_promote) { if(task != start_rsc || registered == FALSE) { pe_pre_notify(rsc, next_node, op, n_data, data_set); } pe_post_notify(rsc, next_node, op, n_data, data_set); } } crm_free(op_key); g_list_free(possible_matches); } static void dup_attr(gpointer key, gpointer value, gpointer user_data) { char *meta_key = crm_concat(CRM_META, key, '_'); g_hash_table_replace(user_data, meta_key, crm_strdup(value)); } static action_t * pe_notify(resource_t *rsc, node_t *node, action_t *op, action_t *confirm, notify_data_t *n_data, pe_working_set_t *data_set) { char *key = NULL; action_t *trigger = NULL; const char *value = NULL; const char *task = NULL; if(op == NULL || confirm == NULL) { crm_debug_2("Op=%p confirm=%p", op, confirm); return NULL; } CRM_CHECK(node != NULL, return NULL); if(node->details->online == FALSE) { crm_debug_2("Skipping notification for %s: node offline", rsc->id); return NULL; } else if(op->runnable == FALSE) { crm_debug_2("Skipping notification for %s: not runnable", op->uuid); return NULL; } value = g_hash_table_lookup(op->meta, "notify_type"); task = g_hash_table_lookup(op->meta, "notify_operation"); crm_debug_2("Creating notify actions for %s: %s (%s-%s)", op->uuid, rsc->id, value, task); key = generate_notify_key(rsc->id, value, task); trigger = custom_action(rsc, key, op->task, node, op->optional, TRUE, data_set); g_hash_table_foreach(op->meta, dup_attr, trigger->extra); trigger->notify_keys = n_data->keys; /* pseudo_notify before notify */ crm_debug_3("Ordering %s before %s (%d->%d)", op->uuid, trigger->uuid, trigger->id, op->id); order_actions(op, trigger, pe_order_implies_left); value = g_hash_table_lookup(op->meta, "notify_confirm"); if(crm_is_true(value)) { /* notify before pseudo_notified */ crm_debug_3("Ordering %s before %s (%d->%d)", trigger->uuid, confirm->uuid, confirm->id, trigger->id); order_actions(trigger, confirm, pe_order_implies_left); } return trigger; } void pe_pre_notify(resource_t *rsc, node_t *node, action_t *op, notify_data_t *n_data, pe_working_set_t *data_set) { crm_debug_2("%s: %s", rsc->id, op->uuid); pe_notify(rsc, node, op->pre_notify, op->pre_notified, n_data, data_set); } void pe_post_notify(resource_t *rsc, node_t *node, action_t *op, notify_data_t *n_data, pe_working_set_t *data_set) { action_t *notify = NULL; CRM_CHECK(op != NULL, return); CRM_CHECK(rsc != NULL, return); crm_debug_2("%s: %s", rsc->id, op->uuid); notify = pe_notify(rsc, node, op->post_notify, op->post_notified, n_data, data_set); if(notify != NULL) { /* crm_err("Upgrading priority for %s to INFINITY", notify->uuid); */ notify->priority = INFINITY; } notify = op->post_notified; if(notify != NULL) { slist_iter( mon, action_t, rsc->actions, lpc, const char *interval = g_hash_table_lookup(mon->meta, "interval"); if(interval == NULL || safe_str_eq(interval, "0")) { crm_debug_3("Skipping %s: interval", mon->uuid); continue; } else if(safe_str_eq(mon->task, "cancel")) { crm_debug_3("Skipping %s: cancel", mon->uuid); continue; } order_actions(notify, mon, pe_order_optional); ); } } void NoRoleChange(resource_t *rsc, node_t *current, node_t *next, pe_working_set_t *data_set) { action_t *stop = NULL; action_t *start = NULL; GListPtr possible_matches = NULL; crm_debug_2("Executing: %s (role=%s)", rsc->id, role2text(rsc->next_role)); if(current == NULL && next == NULL) { crm_notice("Leave resource %s\t(%s)", rsc->id, role2text(rsc->role)); return; } else if(next == NULL) { crm_notice("Stop resource %s\t(%s %s)", rsc->id, role2text(rsc->role), current->details->uname); return; } else if(current == NULL && rsc->next_role > RSC_ROLE_SLAVE) { crm_notice("Promote %s\t(%s -> %s %s)", rsc->id, role2text(rsc->role), role2text(rsc->next_role), next->details->uname); return; } else if(current == NULL) { crm_notice("Start %s\t(%s -> %s %s)", rsc->id, role2text(rsc->role), role2text(rsc->next_role), next->details->uname); return; } if(rsc->role == rsc->next_role) { start = start_action(rsc, next, TRUE); if(start->optional) { crm_notice("Leave resource %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } else if(safe_str_eq(current->details->id, next->details->id)) { if(is_set(rsc->flags, pe_rsc_failed)) { crm_notice("Recover resource %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } else { crm_notice("Restart resource %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } } else { crm_notice("Move resource %s\t(%s %s -> %s)", rsc->id, role2text(rsc->role), current->details->uname, next->details->uname); } } else if(rsc->role < rsc->next_role) { crm_notice("Promote %s\t(%s -> %s %s)", rsc->id, role2text(rsc->role), role2text(rsc->next_role), next->details->uname); } else if(rsc->role > rsc->next_role) { crm_notice("Demote %s\t(%s -> %s %s)", rsc->id, role2text(rsc->role), role2text(rsc->next_role), next->details->uname); } if(is_set(rsc->flags, pe_rsc_failed) || safe_str_neq(current->details->id, next->details->id)) { if(rsc->next_role > RSC_ROLE_STARTED) { gboolean optional = TRUE; if(rsc->role == RSC_ROLE_MASTER) { optional = FALSE; } DemoteRsc(rsc, current, optional, data_set); } if(rsc->role == RSC_ROLE_MASTER) { DemoteRsc(rsc, current, FALSE, data_set); } StopRsc(rsc, current, FALSE, data_set); StartRsc(rsc, next, FALSE, data_set); if(rsc->next_role == RSC_ROLE_MASTER) { PromoteRsc(rsc, next, FALSE, data_set); } possible_matches = find_recurring_actions(rsc->actions, next); slist_iter(match, action_t, possible_matches, lpc, if(match->optional == FALSE) { crm_debug("Fixing recurring action: %s", match->uuid); match->optional = TRUE; } ); g_list_free(possible_matches); } else if(is_set(rsc->flags, pe_rsc_start_pending)) { start = start_action(rsc, next, TRUE); if(start->runnable) { /* wait for StartRsc() to be called */ rsc->role = RSC_ROLE_STOPPED; } else { /* wait for StopRsc() to be called */ rsc->next_role = RSC_ROLE_STOPPED; } } else { stop = stop_action(rsc, current, TRUE); start = start_action(rsc, next, TRUE); stop->optional = start->optional; if(rsc->next_role > RSC_ROLE_STARTED) { DemoteRsc(rsc, current, start->optional, data_set); } StopRsc(rsc, current, start->optional, data_set); StartRsc(rsc, current, start->optional, data_set); if(rsc->next_role == RSC_ROLE_MASTER) { PromoteRsc(rsc, next, start->optional, data_set); } if(start->runnable == FALSE) { rsc->next_role = RSC_ROLE_STOPPED; } } } gboolean StopRsc(resource_t *rsc, node_t *next, gboolean optional, pe_working_set_t *data_set) { action_t *stop = NULL; const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); crm_debug_2("Executing: %s", rsc->id); if(rsc->next_role == RSC_ROLE_STOPPED && rsc->variant == pe_native && safe_str_eq(class, "stonith")) { action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set); custom_action_order( NULL, crm_strdup(all_stopped->task), all_stopped, rsc, stop_key(rsc), NULL, pe_order_implies_left|pe_order_stonith_stop, data_set); } slist_iter( current, node_t, rsc->running_on, lpc, stop = stop_action(rsc, current, optional); if(stop->runnable && stop->optional == FALSE) { crm_notice(" %s\tStop %s",current->details->uname,rsc->id); } if(data_set->remove_after_stop) { DeleteRsc(rsc, current, optional, data_set); } ); return TRUE; } gboolean StartRsc(resource_t *rsc, node_t *next, gboolean optional, pe_working_set_t *data_set) { action_t *start = NULL; crm_debug_2("Executing: %s", rsc->id); start = start_action(rsc, next, TRUE); if(start->runnable && optional == FALSE) { crm_notice(" %s\tStart %s", next->details->uname, rsc->id); start->optional = FALSE; } return TRUE; } gboolean PromoteRsc(resource_t *rsc, node_t *next, gboolean optional, pe_working_set_t *data_set) { char *key = NULL; gboolean runnable = TRUE; GListPtr action_list = NULL; crm_debug_2("Executing: %s", rsc->id); CRM_CHECK(rsc->next_role == RSC_ROLE_MASTER, crm_err("Next role: %s", role2text(rsc->next_role)); return FALSE); CRM_CHECK(next != NULL, return FALSE); key = start_key(rsc); action_list = find_actions_exact(rsc->actions, key, next); crm_free(key); slist_iter(start, action_t, action_list, lpc, if(start->runnable == FALSE) { runnable = FALSE; } ); g_list_free(action_list); if(runnable) { promote_action(rsc, next, optional); if(optional == FALSE) { crm_debug("%s\tPromote %s", next->details->uname, rsc->id); } return TRUE; } crm_debug("%s\tPromote %s (canceled)", next->details->uname, rsc->id); key = promote_key(rsc); action_list = find_actions_exact(rsc->actions, key, next); crm_free(key); slist_iter(promote, action_t, action_list, lpc, promote->runnable = FALSE; ); g_list_free(action_list); return TRUE; } gboolean DemoteRsc(resource_t *rsc, node_t *next, gboolean optional, pe_working_set_t *data_set) { crm_debug_2("Executing: %s", rsc->id); /* CRM_CHECK(rsc->next_role == RSC_ROLE_SLAVE, return FALSE); */ slist_iter( current, node_t, rsc->running_on, lpc, do_crm_log(optional?LOG_DEBUG:LOG_NOTICE, "%s\tDemote %s", current->details->uname, rsc->id); demote_action(rsc, current, optional); ); return TRUE; } gboolean RoleError(resource_t *rsc, node_t *next, gboolean optional, pe_working_set_t *data_set) { crm_debug("Executing: %s", rsc->id); CRM_CHECK(FALSE, return FALSE); return FALSE; } gboolean NullOp(resource_t *rsc, node_t *next, gboolean optional, pe_working_set_t *data_set) { crm_debug_2("Executing: %s", rsc->id); return FALSE; } gboolean DeleteRsc(resource_t *rsc, node_t *node, gboolean optional, pe_working_set_t *data_set) { action_t *delete = NULL; #if DELETE_THEN_REFRESH action_t *refresh = NULL; #endif if(is_set(rsc->flags, pe_rsc_failed)) { crm_debug_2("Resource %s not deleted from %s: failed", rsc->id, node->details->uname); return FALSE; } else if(node == NULL) { crm_debug_2("Resource %s not deleted: NULL node", rsc->id); return FALSE; } else if(node->details->unclean || node->details->online == FALSE) { crm_debug_2("Resource %s not deleted from %s: unrunnable", rsc->id, node->details->uname); return FALSE; } crm_notice("Removing %s from %s", rsc->id, node->details->uname); delete = delete_action(rsc, node, optional); custom_action_order( rsc, stop_key(rsc), NULL, rsc, delete_key(rsc), NULL, optional?pe_order_implies_right:pe_order_implies_left, data_set); #if DELETE_THEN_REFRESH refresh = custom_action( NULL, crm_strdup(CRM_OP_LRM_REFRESH), CRM_OP_LRM_REFRESH, node, FALSE, TRUE, data_set); add_hash_param(refresh->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); order_actions(delete, refresh, pe_order_optional); #endif return TRUE; } gboolean native_create_probe(resource_t *rsc, node_t *node, action_t *complete, gboolean force, pe_working_set_t *data_set) { char *key = NULL; char *target_rc = NULL; action_t *probe = NULL; node_t *running = NULL; CRM_CHECK(node != NULL, return FALSE); if(rsc->children) { gboolean any_created = FALSE; slist_iter( child_rsc, resource_t, rsc->children, lpc, any_created = child_rsc->cmds->create_probe( child_rsc, node, complete, force, data_set) || any_created; ); return any_created; } if(is_set(rsc->flags, pe_rsc_orphan)) { crm_debug_2("Skipping orphan: %s", rsc->id); return FALSE; } running = pe_find_node_id(rsc->known_on, node->details->id); if(force == FALSE && running != NULL) { /* we already know the status of the resource on this node */ crm_debug_3("Skipping active: %s", rsc->id); return FALSE; } key = generate_op_key(rsc->id, CRMD_ACTION_STATUS, 0); probe = custom_action(rsc, key, CRMD_ACTION_STATUS, node, FALSE, TRUE, data_set); probe->optional = FALSE; running = pe_find_node_id(rsc->running_on, node->details->id); if(running == NULL) { target_rc = crm_itoa(EXECRA_NOT_RUNNING); } else if(rsc->role == RSC_ROLE_MASTER) { target_rc = crm_itoa(EXECRA_RUNNING_MASTER); } if(target_rc != NULL) { add_hash_param(probe->meta, XML_ATTR_TE_TARGET_RC, target_rc); crm_free(target_rc); } crm_debug_2("Probing %s on %s (%s)", rsc->id, node->details->uname, role2text(rsc->role)); custom_action_order(rsc, NULL, probe, NULL, NULL, complete, pe_order_implies_right, data_set); return TRUE; } static void native_start_constraints( resource_t *rsc, action_t *stonith_op, gboolean is_stonith, pe_working_set_t *data_set) { node_t *target = stonith_op?stonith_op->node:NULL; if(is_stonith) { char *key = start_key(rsc); action_t *ready = get_pseudo_op(STONITH_UP, data_set); crm_debug_2("Ordering %s action before stonith events", key); custom_action_order( rsc, key, NULL, NULL, crm_strdup(ready->task), ready, pe_order_implies_right, data_set); } else { action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set); slist_iter(action, action_t, rsc->actions, lpc2, if(action->needs == rsc_req_stonith) { order_actions(all_stopped, action, pe_order_implies_left); } else if(target != NULL && safe_str_eq(action->task, CRMD_ACTION_START) && NULL == pe_find_node_id( rsc->known_on, target->details->id)) { /* if known == NULL, then we dont know if * the resource is active on the node * we're about to shoot * * in this case, regardless of action->needs, * the only safe option is to wait until * the node is shot before doing anything * to with the resource * * its analogous to waiting for all the probes * for rscX to complete before starting rscX * * the most likely explaination is that the * DC died and took its status with it */ crm_info("Ordering %s after %s recovery", action->uuid, target->details->uname); order_actions(all_stopped, action, pe_order_implies_left|pe_order_runnable_left); } ); } } static void native_stop_constraints( resource_t *rsc, action_t *stonith_op, gboolean is_stonith, pe_working_set_t *data_set) { char *key = NULL; GListPtr action_list = NULL; node_t *node = stonith_op->node; key = stop_key(rsc); action_list = find_actions(rsc->actions, key, node); crm_free(key); /* add the stonith OP as a stop pre-req and the mark the stop * as a pseudo op - since its now redundant */ slist_iter( action, action_t, action_list, lpc2, resource_t *parent = NULL; if(node->details->online && node->details->unclean == FALSE && is_set(rsc->flags, pe_rsc_failed)) { continue; } if(is_set(rsc->flags, pe_rsc_failed)) { crm_warn("Stop of failed resource %s is" " implicit after %s is fenced", rsc->id, node->details->uname); } else { crm_info("%s is implicit after %s is fenced", action->uuid, node->details->uname); } /* the stop would never complete and is * now implied by the stonith operation */ action->pseudo = TRUE; action->runnable = TRUE; action->implied_by_stonith = TRUE; if(is_stonith == FALSE) { order_actions(stonith_op, action, pe_order_optional); } /* find the top-most resource */ parent = rsc->parent; while(parent != NULL && parent->parent != NULL) { parent = parent->parent; } if(parent) { crm_debug_2("Re-creating actions for %s", parent->id); parent->cmds->create_actions(parent, data_set); /* make sure we dont mess anything up in create_actions */ CRM_CHECK(action->pseudo, action->pseudo = TRUE); CRM_CHECK(action->runnable, action->runnable = TRUE); } /* From Bug #1601, successful fencing must be an input to a failed resources stop action. However given group(A, B) running on nodeX and B.stop has failed, A := stop healthy resource (A.stop) B := stop failed resource (pseudo operation B.stop) C := stonith nodeX A requires B, B requires C, C requires A This loop would prevent the cluster from making progress. This block creates the "C requires A" dependancy and therefore must (at least for now) be disabled. Instead, run the block above and treat all resources on nodeX as B would be (marked as a pseudo op depending on the STONITH). } else if(is_stonith == FALSE) { crm_info("Moving healthy resource %s" " off %s before fencing", rsc->id, node->details->uname); * stop healthy resources before the * stonith op * custom_action_order( rsc, stop_key(rsc), NULL, NULL,crm_strdup(CRM_OP_FENCE),stonith_op, pe_order_optional, data_set); */ ); g_list_free(action_list); key = demote_key(rsc); action_list = find_actions(rsc->actions, key, node); crm_free(key); slist_iter( action, action_t, action_list, lpc2, if(node->details->online == FALSE || is_set(rsc->flags, pe_rsc_failed)) { crm_info("Demote of failed resource %s is" " implict after %s is fenced", rsc->id, node->details->uname); /* the stop would never complete and is * now implied by the stonith operation */ action->pseudo = TRUE; action->runnable = TRUE; if(is_stonith == FALSE) { order_actions(stonith_op, action, pe_order_optional); } } ); g_list_free(action_list); } void complex_stonith_ordering( resource_t *rsc, action_t *stonith_op, pe_working_set_t *data_set) { gboolean is_stonith = FALSE; const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); if(rsc->children) { slist_iter( child_rsc, resource_t, rsc->children, lpc, child_rsc->cmds->stonith_ordering( child_rsc, stonith_op, data_set); ); return; } if(is_not_set(rsc->flags, pe_rsc_managed)) { crm_debug_3("Skipping fencing constraints for unmanaged resource: %s", rsc->id); return; } if(stonith_op != NULL && safe_str_eq(class, "stonith")) { is_stonith = TRUE; } /* Start constraints */ native_start_constraints(rsc, stonith_op, is_stonith, data_set); /* Stop constraints */ native_stop_constraints(rsc, stonith_op, is_stonith, data_set); } #define ALLOW_WEAK_MIGRATION 0 static gboolean check_stack_element(resource_t *rsc, resource_t *other_rsc, const char *type) { char *key = NULL; int level = LOG_DEBUG; GListPtr action_list = NULL; if(other_rsc == NULL || other_rsc == rsc) { return TRUE; } do_crm_log(level+1, "%s: processing %s (%s)", rsc->id, other_rsc->id, type); if(other_rsc->variant == pe_native) { do_crm_log(level, "%s: depends on %s (mid-stack) %s", rsc->id, other_rsc->id, type); return FALSE; } else if(other_rsc->variant == pe_group) { if(at_stack_bottom(other_rsc) == FALSE) { do_crm_log(level, "%s: depends on group %s (mid-stack) %s", rsc->id, other_rsc->id, type); return FALSE; } return TRUE; } /* is the clone also moving moved around? * * if so, then we can't yet be completely sure the * resource can safely migrate since the node we're * moving too may not have the clone instance started * yet * * in theory we can figure out if the clone instance we * will run on is already there, but there that would * involve too much knowledge of internal clone code. * maybe later... */ do_crm_log(level+1,"%s: start depends on clone %s", rsc->id, other_rsc->id); key = stop_key(other_rsc); action_list = find_actions(other_rsc->actions, key, NULL); crm_free(key); slist_iter( other_stop, action_t, action_list,lpc, if(other_stop && other_stop->optional == FALSE) { do_crm_log(level, "%s: start depends on %s", rsc->id, other_stop->uuid); g_list_free(action_list); return FALSE; } ); g_list_free(action_list); return TRUE; } gboolean at_stack_bottom(resource_t *rsc) { char *key = NULL; action_t *start = NULL; action_t *other = NULL; GListPtr action_list = NULL; key = start_key(rsc); action_list = find_actions(rsc->actions, key, NULL); crm_free(key); crm_debug_3("%s: processing", rsc->id); CRM_CHECK(action_list != NULL, return FALSE); start = action_list->data; g_list_free(action_list); slist_iter( constraint, rsc_colocation_t, rsc->rsc_cons, lpc, resource_t *target = constraint->rsc_rh; crm_debug_4("%s == %s (%d)", rsc->id, target->id, constraint->score); if(constraint->score > 0 && check_stack_element(rsc, target, "coloc") == FALSE) { return FALSE; } ); slist_iter( other_w, action_wrapper_t, start->actions_before, lpc, other = other_w->action; #if ALLOW_WEAK_MIGRATION if((other_w->type & pe_order_implies_right) == 0) { crm_debug_3("%s: depends on %s (optional ordering)", rsc->id, other->uuid); continue; } #endif if(other->optional == FALSE && check_stack_element(rsc, other->rsc, "order") == FALSE) { return FALSE; } ); return TRUE; } void complex_migrate_reload(resource_t *rsc, pe_working_set_t *data_set) { char *key = NULL; int level = LOG_DEBUG; GListPtr action_list = NULL; action_t *stop = NULL; action_t *start = NULL; action_t *other = NULL; action_t *action = NULL; const char *value = NULL; if(rsc->children) { slist_iter( child_rsc, resource_t, rsc->children, lpc, child_rsc->cmds->migrate_reload(child_rsc, data_set); ); other = NULL; return; } do_crm_log(level+1, "Processing %s", rsc->id); CRM_CHECK(rsc->variant == pe_native, return); if(is_not_set(rsc->flags, pe_rsc_managed) || is_set(rsc->flags, pe_rsc_failed) || is_set(rsc->flags, pe_rsc_start_pending) || rsc->next_role != RSC_ROLE_STARTED || g_list_length(rsc->running_on) != 1) { do_crm_log(level+1, "%s: general resource state", rsc->id); return; } key = start_key(rsc); action_list = find_actions(rsc->actions, key, NULL); crm_free(key); if(action_list == NULL) { do_crm_log(level, "%s: no start action", rsc->id); return; } start = action_list->data; g_list_free(action_list); value = g_hash_table_lookup(rsc->meta, "allow_migrate"); if(crm_is_true(value)) { set_bit(rsc->flags, pe_rsc_can_migrate); } if(is_not_set(rsc->flags, pe_rsc_can_migrate) && start->allow_reload_conversion == FALSE) { do_crm_log(level+1, "%s: no need to continue", rsc->id); return; } key = stop_key(rsc); action_list = find_actions(rsc->actions, key, NULL); crm_free(key); if(action_list == NULL) { do_crm_log(level, "%s: no stop action", rsc->id); return; } stop = action_list->data; g_list_free(action_list); action = start; if(action->pseudo || action->optional || action->node == NULL || action->runnable == FALSE) { do_crm_log(level, "%s: %s", rsc->id, action->task); return; } action = stop; if(action->pseudo || action->optional || action->node == NULL || action->runnable == FALSE) { do_crm_log(level, "%s: %s", rsc->id, action->task); return; } if(is_set(rsc->flags, pe_rsc_can_migrate)) { if(start->node == NULL || stop->node == NULL || stop->node->details == start->node->details) { clear_bit(rsc->flags, pe_rsc_can_migrate); } else if(at_stack_bottom(rsc) == FALSE) { crm_notice("Cannot migrate %s from %s to %s" " - %s is not at the bottom of the resource stack", rsc->id, stop->node->details->uname, start->node->details->uname, rsc->id); clear_bit(rsc->flags, pe_rsc_can_migrate); } } if(is_set(rsc->flags, pe_rsc_can_migrate)) { crm_notice("Migrating %s from %s to %s", rsc->id, stop->node->details->uname, start->node->details->uname); crm_free(stop->uuid); crm_free(stop->task); stop->task = crm_strdup(CRMD_ACTION_MIGRATE); stop->uuid = generate_op_key(rsc->id, stop->task, 0); add_hash_param(stop->meta, "migrate_source", stop->node->details->uname); add_hash_param(stop->meta, "migrate_target", start->node->details->uname); /* Hook up to the all_stopped and shutdown actions */ slist_iter( other_w, action_wrapper_t, stop->actions_after, lpc, other = other_w->action; if(other->optional == FALSE && other->rsc == NULL) { order_actions(start, other, other_w->type); } ); slist_iter( other_w, action_wrapper_t, start->actions_before, lpc, other = other_w->action; if(other->optional == FALSE #if ALLOW_WEAK_MIGRATION && (other_w->type & pe_order_implies_right) #endif && other->rsc != NULL && other->rsc != rsc->parent && other->rsc != rsc) { do_crm_log(level, "Ordering %s before %s", other->uuid, stop->uuid); order_actions(other, stop, other_w->type); } ); crm_free(start->uuid); crm_free(start->task); start->task = crm_strdup(CRMD_ACTION_MIGRATED); start->uuid = generate_op_key(rsc->id, start->task, 0); add_hash_param(start->meta, "migrate_source_uuid", stop->node->details->id); add_hash_param(start->meta, "migrate_source", stop->node->details->uname); add_hash_param(start->meta, "migrate_target", start->node->details->uname); } else if(start->allow_reload_conversion && stop->node->details == start->node->details) { crm_info("Rewriting restart of %s on %s as a reload", rsc->id, start->node->details->uname); crm_free(start->uuid); crm_free(start->task); start->task = crm_strdup("reload"); start->uuid = generate_op_key(rsc->id, start->task, 0); stop->pseudo = TRUE; /* easier than trying to delete it from the graph */ } else { do_crm_log(level+1, "%s nothing to do", rsc->id); } } diff --git a/pengine/testcases/interleave-0.exp b/pengine/testcases/interleave-0.exp index 7904be0f18..a5f13e2f07 100644 --- a/pengine/testcases/interleave-0.exp +++ b/pengine/testcases/interleave-0.exp @@ -1,2140 +1,2140 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + diff --git a/pengine/testcases/interleave-1.exp b/pengine/testcases/interleave-1.exp index 7904be0f18..a5f13e2f07 100644 --- a/pengine/testcases/interleave-1.exp +++ b/pengine/testcases/interleave-1.exp @@ -1,2140 +1,2140 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + diff --git a/pengine/testcases/interleave-2.exp b/pengine/testcases/interleave-2.exp index 7904be0f18..a5f13e2f07 100644 --- a/pengine/testcases/interleave-2.exp +++ b/pengine/testcases/interleave-2.exp @@ -1,2140 +1,2140 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + diff --git a/pengine/testcases/interleave-3.exp b/pengine/testcases/interleave-3.exp index 7904be0f18..a5f13e2f07 100644 --- a/pengine/testcases/interleave-3.exp +++ b/pengine/testcases/interleave-3.exp @@ -1,2140 +1,2140 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + diff --git a/pengine/testcases/order7.exp b/pengine/testcases/order7.exp index c496008c44..1f029ca220 100644 --- a/pengine/testcases/order7.exp +++ b/pengine/testcases/order7.exp @@ -1,111 +1,111 @@ - + diff --git a/tools/attrd.c b/tools/attrd.c index 5c61db72ab..a81b0557b7 100644 --- a/tools/attrd.c +++ b/tools/attrd.c @@ -1,638 +1,638 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define OPTARGS "hV" #if SUPPORT_HEARTBEAT ll_cluster_t *attrd_cluster_conn; #endif GMainLoop* mainloop = NULL; char *attrd_uname = NULL; char *attrd_uuid = NULL; gboolean need_shutdown = FALSE; GHashTable *attr_hash = NULL; cib_t *cib_conn = NULL; typedef struct attr_hash_entry_s { char *id; char *set; char *section; char *value; char *last_value; int timeout; char *dampen; guint timer_id; } attr_hash_entry_t; static void free_hash_entry(gpointer data) { attr_hash_entry_t *entry = data; if (entry == NULL) { return; } crm_free(entry->id); crm_free(entry->set); crm_free(entry->dampen); crm_free(entry->section); if(entry->value != entry->last_value) { crm_free(entry->value); crm_free(entry->last_value); } else { crm_free(entry->value); } crm_free(entry); } void attrd_ha_callback(HA_Message * msg, void* private_data); void attrd_local_callback(HA_Message * msg); gboolean attrd_timer_callback(void *user_data); gboolean attrd_trigger_update(attr_hash_entry_t *hash_entry); void attrd_perform_update(attr_hash_entry_t *hash_entry); static gboolean attrd_shutdown(int nsig, gpointer unused) { need_shutdown = TRUE; crm_info("Exiting"); if (mainloop != NULL && g_main_is_running(mainloop)) { g_main_quit(mainloop); } else { exit(0); } return FALSE; } static void usage(const char* cmd, int exit_status) { FILE* stream; stream = exit_status ? stderr : stdout; fprintf(stream, "usage: %s [-srkh] [-c configure file]\n", cmd); /* fprintf(stream, "\t-d\tsets debug level\n"); */ /* fprintf(stream, "\t-s\tgets daemon status\n"); */ /* fprintf(stream, "\t-r\trestarts daemon\n"); */ /* fprintf(stream, "\t-k\tstops daemon\n"); */ /* fprintf(stream, "\t-h\thelp message\n"); */ fflush(stream); exit(exit_status); } typedef struct attrd_client_s { char *id; char *name; IPC_Channel *channel; GCHSource *source; } attrd_client_t; static void stop_attrd_timer(attr_hash_entry_t *hash_entry) { if(hash_entry != NULL && hash_entry->timer_id != 0) { crm_debug_2("Stopping %s timer", hash_entry->id); Gmain_timeout_remove(hash_entry->timer_id); hash_entry->timer_id = 0; } } static gboolean attrd_ipc_callback(IPC_Channel *client, gpointer user_data) { int lpc = 0; HA_Message *msg = NULL; attrd_client_t *curr_client = (attrd_client_t*)user_data; gboolean stay_connected = TRUE; crm_debug_2("Invoked: %s", curr_client->id); while(IPC_ISRCONN(client)) { if(client->ops->is_message_pending(client) == 0) { break; } msg = msgfromIPC_noauth(client); if (msg == NULL) { crm_debug("%s: no message this time", curr_client->id); continue; } lpc++; crm_debug_2("Processing msg from %s", curr_client->id); crm_log_message_adv(LOG_DEBUG_3, __PRETTY_FUNCTION__, msg); attrd_local_callback(msg); crm_msg_del(msg); msg = NULL; if(client->ch_status != IPC_CONNECT) { break; } } crm_debug_2("Processed %d messages", lpc); if (client->ch_status != IPC_CONNECT) { stay_connected = FALSE; } return stay_connected; } static void attrd_connection_destroy(gpointer user_data) { attrd_client_t *client = user_data; /* cib_process_disconnect */ if(client == NULL) { return; } if(client->source != NULL) { crm_debug_4("Deleting %s (%p) from mainloop", client->name, client->source); G_main_del_IPC_Channel(client->source); client->source = NULL; } crm_debug_3("Destroying %s (%p)", client->name, client); crm_free(client->name); crm_free(client->id); crm_free(client); crm_debug_4("Freed the cib client"); return; } static gboolean attrd_connect(IPC_Channel *channel, gpointer user_data) { attrd_client_t *new_client = NULL; crm_debug_3("Connecting channel"); if(channel == NULL) { crm_err("Channel was NULL"); return FALSE; } else if(channel->ch_status != IPC_CONNECT) { crm_err("Channel was disconnected"); return FALSE; } else if(need_shutdown) { crm_info("Ignoring connection request during shutdown"); return FALSE; } crm_malloc0(new_client, sizeof(attrd_client_t)); new_client->channel = channel; crm_debug_3("Created channel %p for channel %s", new_client, new_client->id); /* channel->ops->set_recv_qlen(channel, 100); */ /* channel->ops->set_send_qlen(channel, 400); */ new_client->source = G_main_add_IPC_Channel( G_PRIORITY_DEFAULT, channel, FALSE, attrd_ipc_callback, new_client, attrd_connection_destroy); crm_debug_3("Client %s connected", new_client->id); return TRUE; } static void attrd_ha_connection_destroy(gpointer user_data) { crm_debug_3("Invoked"); if(need_shutdown) { /* we signed out, so this is expected */ crm_info("Heartbeat disconnection complete"); return; } crm_crit("Lost connection to heartbeat service!"); if (mainloop != NULL && g_main_is_running(mainloop)) { g_main_quit(mainloop); return; } exit(LSB_EXIT_OK); } static gboolean register_with_ha(void) { void *dispatch = attrd_ha_callback; void *destroy = attrd_ha_connection_destroy; if(is_openais_cluster()) { #if SUPPORT_AIS destroy = NULL; dispatch = NULL; #endif } return crm_cluster_connect(&attrd_uname, &attrd_uuid, dispatch, destroy, #if SUPPORT_HEARTBEAT &attrd_cluster_conn #else NULL #endif ); } static void attrd_cib_connection_destroy(gpointer user_data) { if(need_shutdown) { crm_info("Connection to the CIB terminated..."); } else { /* eventually this will trigger a reconnect, not a shutdown */ crm_err("Connection to the CIB terminated..."); exit(1); } return; } int main(int argc, char ** argv) { int flag; int argerr = 0; gboolean was_err = FALSE; char *channel_name = crm_strdup(attrd_channel); crm_log_init(T_ATTRD, LOG_INFO, TRUE, FALSE, 0, NULL); G_main_add_SignalHandler( G_PRIORITY_HIGH, SIGTERM, attrd_shutdown, NULL, NULL); while ((flag = getopt(argc, argv, OPTARGS)) != EOF) { switch(flag) { case 'V': cl_log_enable_stderr(1); alter_debug(DEBUG_INC); break; case 'h': /* Help message */ usage(T_ATTRD, LSB_EXIT_OK); break; default: ++argerr; break; } } if (optind > argc) { ++argerr; } if (argerr) { usage(T_ATTRD, LSB_EXIT_GENERIC); } crm_info("Starting up...."); if(register_with_ha() == FALSE) { crm_err("HA Signon failed"); was_err = TRUE; } if(was_err == FALSE) { int lpc = 0; int max_retry = 20; enum cib_errors rc = cib_not_connected; cib_conn = cib_new(); for(lpc = 0; lpc < max_retry && rc != cib_ok; lpc++) { crm_debug("CIB signon attempt %d", lpc); rc = cib_conn->cmds->signon( cib_conn, T_ATTRD, cib_command); sleep(5); } if(rc != cib_ok) { crm_err("Signon to CIB failed: %s", cib_error2string(rc)); was_err = TRUE; } } if(was_err == FALSE) { enum cib_errors rc = cib_conn->cmds->set_connection_dnotify( cib_conn, attrd_cib_connection_destroy); if(rc != cib_ok) { crm_err("Could not set dnotify callback"); was_err = TRUE; } } if(was_err == FALSE) { int rc = init_server_ipc_comms( channel_name, attrd_connect, default_ipc_connection_destroy); if(rc != 0) { crm_err("Could not start IPC server"); was_err = TRUE; } } if(was_err) { crm_err("Aborting startup"); return 100; } attr_hash = g_hash_table_new_full( g_str_hash, g_str_equal, NULL, free_hash_entry); crm_info("Starting mainloop..."); mainloop = g_main_new(FALSE); g_main_run(mainloop); crm_info("Exiting..."); #if SUPPORT_HEARTBEAT if(is_heartbeat_cluster()) { attrd_cluster_conn->llc_ops->signoff(attrd_cluster_conn, TRUE); attrd_cluster_conn->llc_ops->delete(attrd_cluster_conn); } #endif cib_conn->cmds->signoff(cib_conn); cib_delete(cib_conn); g_hash_table_destroy(attr_hash); crm_free(channel_name); crm_free(attrd_uuid); empty_uuid_cache(); return 0; } static void attrd_cib_callback(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data) { char *attr = user_data; if(rc == cib_NOTEXISTS) { rc = cib_ok; } if(rc < cib_ok) { crm_err("Update %d for %s failed: %s", call_id, attr, cib_error2string(rc)); } else { crm_debug("Update %d for %s passed", call_id, attr); } crm_free(attr); } static void log_hash_entry(int level, attr_hash_entry_t *entry, const char *text) { do_crm_log(level, "%s", text); do_crm_log(level, "Set: %s", entry->section); do_crm_log(level, "Name: %s", entry->id); do_crm_log(level, "Value: %s", entry->value); do_crm_log(level, "Timeout: %s", entry->dampen); } static attr_hash_entry_t * find_hash_entry(HA_Message * msg) { const char *value = NULL; const char *attr = ha_msg_value(msg, F_ATTRD_ATTRIBUTE); attr_hash_entry_t *hash_entry = NULL; if(attr == NULL) { crm_info("Ignoring message with no attribute name"); return NULL; } hash_entry = g_hash_table_lookup(attr_hash, attr); if(hash_entry == NULL) { /* create one and add it */ crm_info("Creating hash entry for %s", attr); crm_malloc0(hash_entry, sizeof(attr_hash_entry_t)); hash_entry->id = crm_strdup(attr); g_hash_table_insert(attr_hash, hash_entry->id, hash_entry); hash_entry = g_hash_table_lookup(attr_hash, attr); CRM_CHECK(hash_entry != NULL, return NULL); } value = ha_msg_value(msg, F_ATTRD_SET); if(value != NULL) { crm_free(hash_entry->set); hash_entry->set = crm_strdup(value); crm_debug("\t%s->set: %s", attr, value); } value = ha_msg_value(msg, F_ATTRD_SECTION); if(value == NULL) { value = XML_CIB_TAG_STATUS; } crm_free(hash_entry->section); hash_entry->section = crm_strdup(value); crm_debug("\t%s->section: %s", attr, value); value = ha_msg_value(msg, F_ATTRD_DAMPEN); if(value != NULL) { crm_free(hash_entry->dampen); hash_entry->dampen = crm_strdup(value); hash_entry->timeout = crm_get_msec(value); crm_debug("\t%s->timeout: %s", attr, value); } log_hash_entry(LOG_DEBUG_2, hash_entry, "Found (and updated) entry:"); return hash_entry; } void attrd_ha_callback(HA_Message * msg, void* private_data) { attr_hash_entry_t *hash_entry = NULL; const char *from = ha_msg_value(msg, F_ORIG); const char *op = ha_msg_value(msg, F_ATTRD_TASK); const char *ignore = ha_msg_value(msg, F_ATTRD_IGNORE_LOCALLY); if(ignore == NULL || safe_str_neq(from, attrd_uname)) { crm_info("%s message from %s", op, from); hash_entry = find_hash_entry(msg); stop_attrd_timer(hash_entry); attrd_perform_update(hash_entry); } } void attrd_perform_update(attr_hash_entry_t *hash_entry) { int rc = cib_ok; if(hash_entry == NULL) { return; } else if(hash_entry->value == NULL) { /* delete the attr */ rc = delete_attr(cib_conn, cib_none, hash_entry->section, attrd_uuid, hash_entry->set, NULL, hash_entry->id, NULL, FALSE); crm_info("Sent delete %d: %s %s %s", rc, hash_entry->id, hash_entry->set, hash_entry->section); } else { /* send update */ rc = update_attr(cib_conn, cib_none, hash_entry->section, attrd_uuid, hash_entry->set, NULL, hash_entry->id, hash_entry->value, FALSE); crm_info("Sent update %d: %s=%s", rc, hash_entry->id, hash_entry->value); } add_cib_op_callback(rc, FALSE, crm_strdup(hash_entry->id), attrd_cib_callback); return; } static void update_for_hash_entry(gpointer key, gpointer value, gpointer user_data) { attrd_timer_callback(value); } void attrd_local_callback(HA_Message * msg) { attr_hash_entry_t *hash_entry = NULL; const char *from = ha_msg_value(msg, F_ORIG); const char *op = ha_msg_value(msg, F_ATTRD_TASK); const char *attr = ha_msg_value(msg, F_ATTRD_ATTRIBUTE); const char *value = ha_msg_value(msg, F_ATTRD_VALUE); if(safe_str_eq(op, "refresh")) { crm_info("Sending full refresh"); g_hash_table_foreach(attr_hash, update_for_hash_entry, NULL); return; } crm_debug("%s message from %s: %s=%s", op, from, attr, crm_str(value)); hash_entry = find_hash_entry(msg); if(hash_entry == NULL) { return; } crm_free(hash_entry->last_value); hash_entry->last_value = hash_entry->value; if(value != NULL) { hash_entry->value = crm_strdup(value); } else { hash_entry->value = NULL; } if(safe_str_eq(hash_entry->value, hash_entry->last_value)) { crm_debug_2("Ignoring non-change"); return; } stop_attrd_timer(hash_entry); if(hash_entry->timeout > 0) { hash_entry->timer_id = Gmain_timeout_add( hash_entry->timeout, attrd_timer_callback, hash_entry); } else { attrd_trigger_update(hash_entry); } return; } gboolean attrd_timer_callback(void *user_data) { stop_attrd_timer(user_data); attrd_trigger_update(user_data); return TRUE; } gboolean attrd_trigger_update(attr_hash_entry_t *hash_entry) { HA_Message *msg = NULL; /* send HA message to everyone */ crm_info("Sending flush op to all hosts for: %s", hash_entry->id); log_hash_entry(LOG_DEBUG_2, hash_entry, "Sending flush op to all hosts for:"); msg = ha_msg_new(8); ha_msg_add(msg, F_TYPE, T_ATTRD); ha_msg_add(msg, F_ORIG, attrd_uname); ha_msg_add(msg, F_ATTRD_TASK, "flush"); ha_msg_add(msg, F_ATTRD_ATTRIBUTE, hash_entry->id); ha_msg_add(msg, F_ATTRD_SET, hash_entry->set); ha_msg_add(msg, F_ATTRD_SECTION, hash_entry->section); ha_msg_add(msg, F_ATTRD_DAMPEN, hash_entry->dampen); ha_msg_add(msg, F_ATTRD_VALUE, hash_entry->value); if(hash_entry->timeout <= 0) { ha_msg_add(msg, F_ATTRD_IGNORE_LOCALLY, hash_entry->value); attrd_perform_update(hash_entry); } - send_cluster_message(NULL, crm_proc_attrd, msg, FALSE); + send_cluster_message(NULL, crm_msg_attrd, msg, FALSE); crm_msg_del(msg); return TRUE; } diff --git a/transitioner/Makefile.am b/transitioner/Makefile.am index 318010e1f3..0a9e564f0d 100644 --- a/transitioner/Makefile.am +++ b/transitioner/Makefile.am @@ -1,51 +1,52 @@ # # Copyright (C) 2004 Andrew Beekhof # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ -I$(top_builddir)/libltdl -I$(top_srcdir)/libltdl halibdir = $(libdir)/@HB_PKG@ COMMONLIBS = \ $(top_builddir)/lib/crm/common/libcrmcommon.la \ $(top_builddir)/lib/crm/cib/libcib.la \ + $(top_builddir)/lib/fencing/libstonithd.la \ $(CLUSTERLIBS) \ $(GLIBLIB) \ $(LIBRT) ## binary progs halib_PROGRAMS = tengine ttest ## SOURCES noinst_HEADERS = tengine.h te_callbacks.h tengine_SOURCES = actions.c events.c utils.c callbacks.c main.c tengine_LDADD = $(COMMONLIBS) $(CLUSTERLIBS) \ $(top_builddir)/lib/crm/transition/libtransitioner.la ttest_SOURCES = actions.c events.c utils.c callbacks.c ttest.c ttest_LDADD = $(COMMONLIBS) $(CLUSTERLIBS) \ $(top_builddir)/lib/crm/transition/libtransitioner.la clean-generic: rm -f *.log *.debug *~ install-exec-local: uninstall-local: diff --git a/transitioner/actions.c b/transitioner/actions.c index 906c7eb749..16036a2d1e 100644 --- a/transitioner/actions.c +++ b/transitioner/actions.c @@ -1,532 +1,525 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include char *te_uuid = NULL; IPC_Channel *crm_ch = NULL; void send_rsc_command(crm_action_t *action); extern crm_action_timer_t *transition_timer; static void te_start_action_timer(crm_action_t *action) { crm_malloc0(action->timer, sizeof(crm_action_timer_t)); action->timer->timeout = action->timeout; action->timer->reason = timeout_action_warn; action->timer->action = action; action->timer->source_id = Gmain_timeout_add( action->timer->timeout, action_timer_callback, (void*)action->timer); CRM_ASSERT(action->timer->source_id != 0); } static gboolean te_pseudo_action(crm_graph_t *graph, crm_action_t *pseudo) { crm_info("Pseudo action %d fired and confirmed", pseudo->id); pseudo->confirmed = TRUE; update_graph(graph, pseudo); trigger_graph(); return TRUE; } -#if SUPPORT_HEARTBEAT void send_stonith_update(stonith_ops_t * op) { enum cib_errors rc = cib_ok; const char *target = op->node_name; const char *uuid = op->node_uuid; /* zero out the node-status & remove all LRM status info */ crm_data_t *node_state = create_xml_node(NULL, XML_CIB_TAG_STATE); CRM_CHECK(op->node_name != NULL, return); CRM_CHECK(op->node_uuid != NULL, return); crm_xml_add(node_state, XML_ATTR_UUID, uuid); crm_xml_add(node_state, XML_ATTR_UNAME, target); crm_xml_add(node_state, XML_CIB_ATTR_HASTATE, DEADSTATUS); crm_xml_add(node_state, XML_CIB_ATTR_INCCM, XML_BOOLEAN_NO); crm_xml_add(node_state, XML_CIB_ATTR_CRMDSTATE, OFFLINESTATUS); crm_xml_add(node_state, XML_CIB_ATTR_JOINSTATE, CRMD_JOINSTATE_DOWN); crm_xml_add(node_state, XML_CIB_ATTR_EXPSTATE, CRMD_JOINSTATE_DOWN); crm_xml_add(node_state, XML_CIB_ATTR_REPLACE, XML_CIB_TAG_LRM); crm_xml_add(node_state, XML_ATTR_ORIGIN, __FUNCTION__); rc = te_cib_conn->cmds->update( te_cib_conn, XML_CIB_TAG_STATUS, node_state, NULL, cib_quorum_override|cib_scope_local); if(rc < cib_ok) { crm_err("CIB update failed: %s", cib_error2string(rc)); abort_transition( INFINITY, tg_shutdown, "CIB update failed", node_state); } else { /* delay processing the trigger until the update completes */ add_cib_op_callback(rc, FALSE, NULL, cib_fencing_updated); } free_xml(node_state); return; } -#endif static gboolean te_fence_node(crm_graph_t *graph, crm_action_t *action) { -#if SUPPORT_HEARTBEAT - if(is_heartbeat_cluster()) { const char *id = NULL; const char *uuid = NULL; const char *target = NULL; const char *type = NULL; stonith_ops_t * st_op = NULL; id = ID(action->xml); target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); type = g_hash_table_lookup(action->params, crm_meta_name("stonith_action")); CRM_CHECK(id != NULL, crm_log_xml_warn(action->xml, "BadAction"); return FALSE); CRM_CHECK(uuid != NULL, crm_log_xml_warn(action->xml, "BadAction"); return FALSE); CRM_CHECK(type != NULL, crm_log_xml_warn(action->xml, "BadAction"); return FALSE); CRM_CHECK(target != NULL, crm_log_xml_warn(action->xml, "BadAction"); return FALSE); te_log_action(LOG_INFO, "Executing %s fencing operation (%s) on %s (timeout=%d)", type, id, target, transition_graph->transition_timeout / 2); /* Passing NULL means block until we can connect... */ te_connect_stonith(NULL); crm_malloc0(st_op, sizeof(stonith_ops_t)); if(safe_str_eq(type, "poweroff")) { st_op->optype = POWEROFF; } else { st_op->optype = RESET; } st_op->timeout = transition_graph->transition_timeout / 2; st_op->node_name = crm_strdup(target); st_op->node_uuid = crm_strdup(uuid); st_op->private_data = generate_transition_key( transition_graph->id, action->id, 0, te_uuid); CRM_ASSERT(stonithd_input_IPC_channel() != NULL); if (ST_OK != stonithd_node_fence( st_op )) { crm_err("Cannot fence %s: stonithd_node_fence() call failed ", target); } return TRUE; - } -#endif - return FALSE; } static int get_target_rc(crm_action_t *action) { const char *target_rc_s = g_hash_table_lookup( action->params, crm_meta_name(XML_ATTR_TE_TARGET_RC)); if(target_rc_s != NULL) { return crm_parse_int(target_rc_s, "0"); } return 0; } static gboolean te_crm_command(crm_graph_t *graph, crm_action_t *action) { char *value = NULL; char *counter = NULL; HA_Message *cmd = NULL; const char *id = NULL; const char *task = NULL; const char *on_node = NULL; gboolean ret = TRUE; id = ID(action->xml); task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); CRM_CHECK(on_node != NULL && strlen(on_node) != 0, te_log_action(LOG_ERR, "Corrupted command (id=%s) %s: no node", crm_str(id), crm_str(task)); return FALSE); te_log_action(LOG_INFO, "Executing crm-event (%s): %s on %s", crm_str(id), crm_str(task), on_node); cmd = create_request(task, NULL, on_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL); counter = generate_transition_key( transition_graph->id, action->id, get_target_rc(action), te_uuid); crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter); ret = send_ipc_message(crm_ch, cmd); crm_free(counter); crm_msg_del(cmd); value = g_hash_table_lookup(action->params, crm_meta_name(XML_ATTR_TE_NOWAIT)); if(ret == FALSE) { crm_err("Action %d failed: send", action->id); return FALSE; } else if(crm_is_true(value)) { crm_info("Skipping wait for %d", action->id); action->confirmed = TRUE; update_graph(graph, action); trigger_graph(); } else if(ret && action->timeout > 0) { crm_debug("Setting timer for action %d",action->id); action->timer->reason = timeout_action_warn; te_start_action_timer(action); } return TRUE; } static gboolean te_rsc_command(crm_graph_t *graph, crm_action_t *action) { /* never overwrite stop actions in the CIB with * anything other than completed results * * Writing pending stops makes it look like the * resource is running again */ const char *task = NULL; const char *on_node = NULL; action->executed = FALSE; on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); CRM_CHECK(on_node != NULL && strlen(on_node) != 0, te_log_action(LOG_ERR, "Corrupted command(id=%s) %s: no node", ID(action->xml), crm_str(task)); return FALSE); send_rsc_command(action); return TRUE; } gboolean cib_action_update(crm_action_t *action, int status) { char *op_id = NULL; char *code = NULL; char *digest = NULL; crm_data_t *tmp = NULL; crm_data_t *params = NULL; crm_data_t *state = NULL; crm_data_t *rsc = NULL; crm_data_t *xml_op = NULL; crm_data_t *action_rsc = NULL; enum cib_errors rc = cib_ok; const char *name = NULL; const char *value = NULL; const char *rsc_id = NULL; const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); const char *task_uuid = crm_element_value( action->xml, XML_LRM_ATTR_TASK_KEY); const char *target_uuid = crm_element_value( action->xml, XML_LRM_ATTR_TARGET_UUID); int call_options = cib_quorum_override|cib_scope_local; crm_warn("%s %d: %s on %s timed out", crm_element_name(action->xml), action->id, task_uuid, target); action_rsc = find_xml_node(action->xml, XML_CIB_TAG_RESOURCE, TRUE); if(action_rsc == NULL) { return FALSE; } rsc_id = ID(action_rsc); CRM_CHECK(rsc_id != NULL, crm_log_xml_err(action->xml, "Bad:action"); return FALSE); code = crm_itoa(status); /* update the CIB */ state = create_xml_node(NULL, XML_CIB_TAG_STATE); crm_xml_add(state, XML_ATTR_UUID, target_uuid); crm_xml_add(state, XML_ATTR_UNAME, target); rsc = create_xml_node(state, XML_CIB_TAG_LRM); crm_xml_add(rsc, XML_ATTR_ID, target_uuid); rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCES); rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCE); crm_xml_add(rsc, XML_ATTR_ID, rsc_id); name = XML_ATTR_TYPE; value = crm_element_value(action_rsc, name); crm_xml_add(rsc, name, value); name = XML_AGENT_ATTR_CLASS; value = crm_element_value(action_rsc, name); crm_xml_add(rsc, name, value); name = XML_AGENT_ATTR_PROVIDER; value = crm_element_value(action_rsc, name); crm_xml_add(rsc, name, value); xml_op = create_xml_node(rsc, XML_LRM_TAG_RSC_OP); crm_xml_add(xml_op, XML_ATTR_ID, task); op_id = generate_op_key(rsc_id, task, action->interval); crm_xml_add(xml_op, XML_ATTR_ID, op_id); crm_free(op_id); crm_xml_add(xml_op, XML_LRM_ATTR_TASK, task); crm_xml_add(xml_op, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); crm_xml_add(xml_op, XML_LRM_ATTR_OPSTATUS, code); crm_xml_add(xml_op, XML_LRM_ATTR_CALLID, "-1"); crm_xml_add_int(xml_op, XML_LRM_ATTR_INTERVAL, action->interval); crm_xml_add(xml_op, XML_LRM_ATTR_RC, code); crm_xml_add(xml_op, XML_ATTR_ORIGIN, __FUNCTION__); crm_free(code); code = generate_transition_key( transition_graph->id, action->id, get_target_rc(action), te_uuid); crm_xml_add(xml_op, XML_ATTR_TRANSITION_KEY, code); crm_free(code); code = generate_transition_magic( crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY), status, status); crm_xml_add(xml_op, XML_ATTR_TRANSITION_MAGIC, code); crm_free(code); tmp = find_xml_node(action->xml, "attributes", TRUE); params = create_xml_node(NULL, XML_TAG_PARAMS); copy_in_properties(params, tmp); filter_action_parameters(params, CRM_FEATURE_SET); digest = calculate_xml_digest(params, TRUE, FALSE); /* info for now as this area has been problematic to debug */ crm_debug("Calculated digest %s for %s (%s)\n", digest, ID(xml_op), crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC)); crm_log_xml(LOG_DEBUG, "digest:source", params); crm_xml_add(xml_op, XML_LRM_ATTR_OP_DIGEST, digest); crm_free(digest); free_xml(params); crm_debug_3("Updating CIB with \"%s\" (%s): %s %s on %s", status<0?"new action":XML_ATTR_TIMEOUT, crm_element_name(action->xml), crm_str(task), rsc_id, target); rc = te_cib_conn->cmds->update( te_cib_conn, XML_CIB_TAG_STATUS, state, NULL, call_options); crm_debug("Updating CIB with %s action %d: %s on %s (call_id=%d)", op_status2text(status), action->id, task_uuid, target, rc); add_cib_op_callback(rc, FALSE, NULL, cib_action_updated); free_xml(state); action->sent_update = TRUE; if(rc < cib_ok) { return FALSE; } return TRUE; } void send_rsc_command(crm_action_t *action) { HA_Message *cmd = NULL; crm_data_t *rsc_op = NULL; char *counter = NULL; const char *task = NULL; const char *value = NULL; const char *on_node = NULL; const char *task_uuid = NULL; CRM_ASSERT(action != NULL); CRM_ASSERT(action->xml != NULL); rsc_op = action->xml; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); on_node = crm_element_value(rsc_op, XML_LRM_ATTR_TARGET); counter = generate_transition_key( transition_graph->id, action->id, get_target_rc(action), te_uuid); crm_xml_add(rsc_op, XML_ATTR_TRANSITION_KEY, counter); crm_info("Initiating action %d: %s %s on %s", action->id, task, task_uuid, on_node); crm_free(counter); if(rsc_op != NULL) { crm_log_xml_debug_2(rsc_op, "Performing"); } cmd = create_request(CRM_OP_INVOKE_LRM, rsc_op, on_node, CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL); #if 1 send_ipc_message(crm_ch, cmd); #else /* test the TE timer/recovery code */ if((action->id % 11) == 0) { crm_err("Faking lost action %d: %s", action->id, task_uuid); } else { send_ipc_message(crm_ch, cmd); } #endif crm_msg_del(cmd); action->executed = TRUE; value = g_hash_table_lookup(action->params, crm_meta_name(XML_ATTR_TE_NOWAIT)); if(crm_is_true(value)) { crm_debug("Skipping wait for %d", action->id); action->confirmed = TRUE; update_graph(transition_graph, action); trigger_graph(); } else if(action->timeout > 0) { int action_timeout = (2 * action->timeout) + transition_graph->network_delay; crm_debug_3("Setting timer for action %s", task_uuid); if(transition_graph->transition_timeout < action_timeout) { crm_debug("Action %d:" " Increasing transition %d timeout to %d (2*%d + %d)", action->id, transition_graph->id, action_timeout, action->timeout, transition_graph->network_delay); transition_graph->transition_timeout = action_timeout; } te_start_action_timer(action); } } crm_graph_functions_t te_graph_fns = { te_pseudo_action, te_rsc_command, te_crm_command, te_fence_node }; extern GMainLoop* mainloop; void notify_crmd(crm_graph_t *graph) { HA_Message *cmd = NULL; int log_level = LOG_DEBUG; const char *op = CRM_OP_TEABORT; int pending_callbacks = num_cib_op_callbacks(); stop_te_timer(transition_timer); if(pending_callbacks != 0) { crm_warn("Delaying completion until all CIB updates complete"); return; } CRM_CHECK(graph->complete, graph->complete = TRUE); switch(graph->completion_action) { case tg_stop: op = CRM_OP_TECOMPLETE; log_level = LOG_INFO; break; case tg_abort: case tg_restart: op = CRM_OP_TEABORT; break; case tg_shutdown: crm_info("Exiting after transition"); if (mainloop != NULL && g_main_is_running(mainloop)) { g_main_quit(mainloop); return; } exit(LSB_EXIT_OK); } te_log_action(log_level, "Transition %d status: %s - %s", graph->id, op, crm_str(graph->abort_reason)); print_graph(LOG_DEBUG_3, graph); cmd = create_request( op, NULL, NULL, CRM_SYSTEM_DC, CRM_SYSTEM_TENGINE, NULL); if(graph->abort_reason != NULL) { ha_msg_add(cmd, "message", graph->abort_reason); } send_ipc_message(crm_ch, cmd); crm_msg_del(cmd); graph->abort_reason = NULL; graph->completion_action = tg_restart; } diff --git a/transitioner/callbacks.c b/transitioner/callbacks.c index c3eec65638..8bbeeeb76f 100644 --- a/transitioner/callbacks.c +++ b/transitioner/callbacks.c @@ -1,610 +1,608 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include void te_update_confirm(const char *event, HA_Message *msg); void te_update_diff(const char *event, HA_Message *msg); crm_data_t *need_abort(crm_data_t *update); void cib_fencing_updated(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data); extern char *te_uuid; gboolean shuttingdown = FALSE; crm_graph_t *transition_graph; GTRIGSource *transition_trigger = NULL; crm_action_timer_t *transition_timer = NULL; static gboolean start_global_timer(crm_action_timer_t *timer, int timeout) { CRM_ASSERT(timer != NULL); CRM_CHECK(timer > 0, return FALSE); CRM_CHECK(timer->source_id == 0, return FALSE); if(timeout <= 0) { crm_err("Tried to start timer with period: %d", timeout); } else if(timer->source_id == 0) { crm_debug_2("Starting abort timer: %dms", timeout); timer->timeout = timeout; timer->source_id = Gmain_timeout_add( timeout, global_timer_callback, (void*)timer); CRM_ASSERT(timer->source_id != 0); return TRUE; } else { crm_err("Timer is already active with period: %d", timer->timeout); } return FALSE; } void te_update_diff(const char *event, HA_Message *msg) { int rc = -1; const char *op = NULL; crm_data_t *diff = NULL; crm_data_t *aborted = NULL; const char *set_name = NULL; int diff_add_updates = 0; int diff_add_epoch = 0; int diff_add_admin_epoch = 0; int diff_del_updates = 0; int diff_del_epoch = 0; int diff_del_admin_epoch = 0; if(msg == NULL) { crm_err("NULL update"); return; } ha_msg_value_int(msg, F_CIB_RC, &rc); op = cl_get_string(msg, F_CIB_OPERATION); if(rc < cib_ok) { crm_debug_2("Ignoring failed %s operation: %s", op, cib_error2string(rc)); return; } diff = get_message_xml(msg, F_CIB_UPDATE_RESULT); cib_diff_version_details( diff, &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates); crm_debug("Processing diff (%s): %d.%d.%d -> %d.%d.%d", op, diff_del_admin_epoch,diff_del_epoch,diff_del_updates, diff_add_admin_epoch,diff_add_epoch,diff_add_updates); log_cib_diff(LOG_DEBUG_2, diff, op); set_name = "diff-added"; if(diff != NULL) { crm_data_t *section = NULL; crm_data_t *change_set = find_xml_node(diff, set_name, FALSE); change_set = find_xml_node(change_set, XML_TAG_CIB, FALSE); if(change_set != NULL) { crm_debug_2("Checking status changes"); section=get_object_root(XML_CIB_TAG_STATUS,change_set); } if(section != NULL) { extract_event(section); } crm_debug_2("Checking change set: %s", set_name); aborted = need_abort(change_set); } set_name = "diff-removed"; if(diff != NULL && aborted == NULL) { crm_data_t *attrs = NULL; crm_data_t *status = NULL; crm_data_t *change_set = find_xml_node(diff, set_name, FALSE); change_set = find_xml_node(change_set, XML_TAG_CIB, FALSE); crm_debug_2("Checking change set: %s", set_name); aborted = need_abort(change_set); if(aborted == NULL && change_set != NULL) { status = get_object_root(XML_CIB_TAG_STATUS, change_set); xml_child_iter_filter( status, node_state, XML_CIB_TAG_STATE, attrs = find_xml_node( node_state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); if(attrs != NULL) { crm_info("Aborting on "XML_TAG_TRANSIENT_NODEATTRS" deletions"); abort_transition(INFINITY, tg_restart, XML_TAG_TRANSIENT_NODEATTRS, attrs); } ); } } if(aborted != NULL) { abort_transition( INFINITY, tg_restart, "Non-status change", NULL); } free_xml(diff); return; } gboolean process_te_message(HA_Message *msg, crm_data_t *xml_data, IPC_Channel *sender) { crm_data_t *xml_obj = NULL; const char *from = cl_get_string(msg, F_ORIG); const char *sys_to = cl_get_string(msg, F_CRM_SYS_TO); const char *sys_from = cl_get_string(msg, F_CRM_SYS_FROM); const char *ref = cl_get_string(msg, XML_ATTR_REFERENCE); const char *op = cl_get_string(msg, F_CRM_TASK); const char *type = cl_get_string(msg, F_CRM_MSG_TYPE); crm_debug_2("Processing %s (%s) message", op, ref); crm_log_message(LOG_DEBUG_3, msg); if(op == NULL){ /* error */ } else if(strcasecmp(op, CRM_OP_HELLO) == 0) { /* ignore */ } else if(sys_to == NULL || strcasecmp(sys_to, CRM_SYSTEM_TENGINE) != 0) { crm_debug_2("Bad sys-to %s", crm_str(sys_to)); return FALSE; } else if(safe_str_eq(op, CRM_OP_INVOKE_LRM) && safe_str_eq(sys_from, CRM_SYSTEM_LRMD) /* && safe_str_eq(type, XML_ATTR_RESPONSE) */ ){ #if CRM_DEPRECATED_SINCE_2_0_4 if(safe_str_eq(crm_element_name(xml_data), XML_TAG_CIB)) { xml_obj = xml_data; } else { xml_obj = find_xml_node(xml_data, XML_TAG_CIB, TRUE); } #else xml_obj = xml_data; CRM_CHECK(xml_obj != NULL, crm_log_message_adv(LOG_ERR, "Invalid (N)ACK", msg); return FALSE); #endif CRM_CHECK(xml_obj != NULL, crm_log_message_adv(LOG_ERR, "Invalid (N)ACK", msg); return FALSE); xml_obj = get_object_root(XML_CIB_TAG_STATUS, xml_obj); CRM_CHECK(xml_obj != NULL, crm_log_message_adv(LOG_ERR, "Invalid (N)ACK", msg); return FALSE); crm_log_message_adv(LOG_DEBUG_2, "Processing (N)ACK", msg); crm_info("Processing (N)ACK %s from %s", cl_get_string(msg, XML_ATTR_REFERENCE), from); extract_event(xml_obj); } else if(safe_str_eq(type, XML_ATTR_RESPONSE)) { crm_err("Message was a response not a request. Discarding"); return TRUE; } else if(strcasecmp(op, CRM_OP_TRANSITION) == 0) { const char *graph_file = cl_get_string(msg, F_CRM_TGRAPH); const char *graph_input = cl_get_string(msg, F_CRM_TGRAPH_INPUT); CRM_CHECK(graph_file != NULL || xml_data != NULL, crm_err("No graph provided"); crm_log_message(LOG_WARNING, msg); return TRUE); if(transition_graph->complete == FALSE) { crm_info("Another transition is already active"); abort_transition( INFINITY, tg_restart, "Transition Active", NULL); } else { const char *value = NULL; crm_data_t *graph_data = xml_data; crm_info("Processing graph derived from %s", graph_input); if(graph_file != NULL) { FILE *graph_fd = fopen(graph_file, "r"); CRM_CHECK(graph_fd != NULL, cl_perror("Could not open graph file %s", graph_file); return TRUE); graph_data = file2xml(graph_fd, FALSE); unlink(graph_file); fclose(graph_fd); } destroy_graph(transition_graph); transition_graph = unpack_graph(graph_data); start_global_timer(transition_timer, transition_graph->transition_timeout); value = crm_element_value(graph_data, "failed-stop-offset"); if(value) { failed_stop_offset = crm_strdup(value); } value = crm_element_value(graph_data, "failed-start-offset"); if(value) { failed_start_offset = crm_strdup(value); } trigger_graph(); print_graph(LOG_DEBUG_2, transition_graph); if(graph_data != xml_data) { free_xml(graph_data); } } } else if(strcasecmp(op, CRM_OP_TE_HALT) == 0) { abort_transition(INFINITY, tg_stop, "Peer Halt", NULL); } else if(strcasecmp(op, CRM_OP_TEABORT) == 0) { abort_transition(INFINITY, tg_restart, "Peer Cancelled", NULL); } else { crm_err("Unknown command: %s::%s from %s", type, op, sys_from); } crm_debug_3("finished processing message"); return TRUE; } -#if SUPPORT_HEARTBEAT void tengine_stonith_callback(stonith_ops_t * op) { const char *allow_fail = NULL; int target_rc = -1; int stonith_id = -1; int transition_id = -1; char *uuid = NULL; crm_action_t *stonith_action = NULL; if(op == NULL) { crm_err("Called with a NULL op!"); return; } crm_info("call=%d, optype=%d, node_name=%s, result=%d, node_list=%s, action=%s", op->call_id, op->optype, op->node_name, op->op_result, (char *)op->node_list, op->private_data); /* this will mark the event complete if a match is found */ CRM_CHECK(op->private_data != NULL, return); /* filter out old STONITH actions */ CRM_CHECK(decode_transition_key( op->private_data, &uuid, &transition_id, &stonith_id, &target_rc), crm_err("Invalid event detected"); goto bail; ); if(transition_graph->complete || stonith_id < 0 || safe_str_neq(uuid, te_uuid) || transition_graph->id != transition_id) { crm_info("Ignoring STONITH action initiated outside" " of the current transition"); } stonith_action = get_action(stonith_id, TRUE); if(stonith_action == NULL) { crm_err("Stonith action not matched"); goto bail; } switch(op->op_result) { case STONITH_SUCCEEDED: send_stonith_update(op); break; case STONITH_CANNOT: case STONITH_TIMEOUT: case STONITH_GENERIC: stonith_action->failed = TRUE; allow_fail = g_hash_table_lookup( stonith_action->params, crm_meta_name(XML_ATTR_TE_ALLOWFAIL)); if(FALSE == crm_is_true(allow_fail)) { crm_err("Stonith of %s failed (%d)..." " aborting transition.", op->node_name, op->op_result); abort_transition(INFINITY, tg_restart, "Stonith failed", NULL); } break; default: crm_err("Unsupported action result: %d", op->op_result); abort_transition(INFINITY, tg_restart, "Unsupport Stonith result", NULL); } update_graph(transition_graph, stonith_action); trigger_graph(); bail: crm_free(uuid); return; } void tengine_stonith_connection_destroy(gpointer user_data) { crm_err("Fencing daemon has left us"); stonith_src = NULL; if(stonith_src == NULL) { G_main_set_trigger(stonith_reconnect); } /* cbchan will be garbage at this point, arrange for it to be reset */ set_stonithd_input_IPC_channel_NULL(); return; } gboolean tengine_stonith_dispatch(IPC_Channel *sender, void *user_data) { int lpc = 0; while(stonithd_op_result_ready()) { if (sender->ch_status == IPC_DISCONNECT) { /* The message which was pending for us is that * the IPC status is now IPC_DISCONNECT */ break; } if(ST_FAIL == stonithd_receive_ops_result(FALSE)) { crm_err("stonithd_receive_ops_result() failed"); } else { lpc++; } } crm_debug_2("Processed %d messages", lpc); if (sender->ch_status == IPC_DISCONNECT) { return FALSE; } return TRUE; } -#endif void cib_fencing_updated(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data) { trigger_graph(); if(rc < cib_ok) { crm_err("CIB update failed: %s", cib_error2string(rc)); crm_log_xml_warn(msg, "[Failed Update]"); } } void cib_action_updated(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data) { trigger_graph(); if(rc < cib_ok) { crm_err("Update %d FAILED: %s", call_id, cib_error2string(rc)); } } void cib_failcount_updated(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data) { trigger_graph(); if(rc < cib_ok) { crm_err("Update %d FAILED: %s", call_id, cib_error2string(rc)); } } gboolean action_timer_callback(gpointer data) { crm_action_timer_t *timer = NULL; if(data == NULL) { crm_err("Timer popped with no data"); return FALSE; } timer = (crm_action_timer_t*)data; stop_te_timer(timer); crm_warn("Timer popped (abort_level=%d, complete=%s)", transition_graph->abort_priority, transition_graph->complete?"true":"false"); CRM_CHECK(timer->action != NULL, return FALSE); if(transition_graph->complete) { crm_warn("Ignoring timeout while not in transition"); } else if(timer->reason == timeout_action_warn) { print_action( LOG_WARNING,"Action missed its timeout", timer->action); } else { /* fail the action */ cib_action_update(timer->action, LRM_OP_TIMEOUT); } return FALSE; } static int unconfirmed_actions(gboolean send_updates) { int unconfirmed = 0; const char *key = NULL; const char *task = NULL; const char *node = NULL; crm_debug_2("Unconfirmed actions..."); slist_iter( synapse, synapse_t, transition_graph->synapses, lpc, /* lookup event */ slist_iter( action, crm_action_t, synapse->actions, lpc2, if(action->executed == FALSE) { continue; } else if(action->confirmed) { continue; } unconfirmed++; task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); key = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); crm_info("Action %s %d unconfirmed from %s", key, action->id, node); if(action->type != action_type_rsc) { continue; } else if(send_updates == FALSE) { continue; } else if(safe_str_eq(task, "cancel")) { /* we dont need to update the CIB with these */ continue; } else if(safe_str_eq(task, "stop")) { /* *never* update the CIB with these */ continue; } cib_action_update(action, LRM_OP_PENDING); ); ); if(unconfirmed > 0) { crm_warn("Waiting on %d unconfirmed actions", unconfirmed); } return unconfirmed; } gboolean global_timer_callback(gpointer data) { crm_action_timer_t *timer = NULL; if(data == NULL) { crm_err("Timer popped with no data"); return FALSE; } timer = (crm_action_timer_t*)data; stop_te_timer(timer); crm_warn("Timer popped (abort_level=%d, complete=%s)", transition_graph->abort_priority, transition_graph->complete?"true":"false"); CRM_CHECK(timer->action == NULL, return FALSE); if(transition_graph->complete) { crm_err("Ignoring timeout while not in transition"); } else if(timer->reason == timeout_abort) { int unconfirmed = unconfirmed_actions(FALSE); crm_warn("Transition abort timeout reached..." " marking transition complete."); transition_graph->complete = TRUE; abort_transition(INFINITY, tg_restart, "Global Timeout", NULL); if(unconfirmed != 0) { crm_warn("Writing %d unconfirmed actions to the CIB", unconfirmed); unconfirmed_actions(TRUE); } } return FALSE; } gboolean te_graph_trigger(gpointer user_data) { int timeout = 0; enum transition_status graph_rc = -1; if(transition_graph->complete == FALSE) { graph_rc = run_graph(transition_graph); timeout = transition_graph->transition_timeout; print_graph(LOG_DEBUG_3, transition_graph); if(graph_rc == transition_active) { crm_debug_3("Transition not yet complete"); stop_te_timer(transition_timer); start_global_timer(transition_timer, timeout); return TRUE; } else if(graph_rc == transition_pending) { crm_debug_3("Transition not yet complete - no actions fired"); return TRUE; } if(graph_rc != transition_complete) { crm_err("Transition failed: %s", transition_status(graph_rc)); print_graph(LOG_WARNING, transition_graph); } } transition_graph->complete = TRUE; notify_crmd(transition_graph); return TRUE; } diff --git a/transitioner/main.c b/transitioner/main.c index 7a902fe9cf..a5a4cecd83 100644 --- a/transitioner/main.c +++ b/transitioner/main.c @@ -1,238 +1,234 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define OPTARGS "hVc" GMainLoop* mainloop = NULL; cib_t *te_cib_conn = NULL; void usage(const char* cmd, int exit_status); int te_init(void); gboolean tengine_shutdown(int nsig, gpointer unused); extern void te_update_confirm(const char *event, HA_Message *msg); extern void te_update_diff(const char *event, HA_Message *msg); extern crm_graph_functions_t te_graph_fns; int main(int argc, char ** argv) { int flag; int rc = 0; int dummy = 0; int argerr = 0; gboolean allow_cores = TRUE; crm_log_init(CRM_SYSTEM_TENGINE, LOG_INFO, TRUE, FALSE, 0, NULL); G_main_add_SignalHandler( G_PRIORITY_HIGH, SIGTERM, tengine_shutdown, NULL, NULL); transition_trigger = G_main_add_TriggerHandler( G_PRIORITY_LOW, te_graph_trigger, NULL, NULL); stonith_reconnect = G_main_add_TriggerHandler( G_PRIORITY_LOW, te_connect_stonith, &dummy, NULL); crm_debug_3("Begining option processing"); while ((flag = getopt(argc, argv, OPTARGS)) != EOF) { switch(flag) { case 'V': alter_debug(DEBUG_INC); break; case 'h': /* Help message */ usage(crm_system_name, LSB_EXIT_OK); break; case 'c': allow_cores = TRUE; break; default: ++argerr; break; } } crm_debug_3("Option processing complete"); if (optind > argc) { ++argerr; } if (argerr) { usage(crm_system_name,LSB_EXIT_GENERIC); } /* read local config file */ crm_debug_3("Starting..."); rc = te_init(); return rc; } int te_init(void) { int init_ok = TRUE; init_client_ipc_comms( CRM_SYSTEM_CRMD, subsystem_msg_dispatch, (void*)process_te_message, &crm_ch); if(crm_ch != NULL) { send_hello_message(crm_ch, "1234", CRM_SYSTEM_TENGINE, "0", "1"); } else { init_ok = FALSE; crm_err("Could not connect to the CRMd"); } if(init_ok) { crm_debug_4("Creating CIB connection"); te_cib_conn = cib_new(); if(te_cib_conn == NULL) { init_ok = FALSE; } } if(init_ok) { crm_debug_4("Connecting to the CIB"); if(cib_ok != te_cib_conn->cmds->signon( te_cib_conn, crm_system_name, cib_command)) { crm_err("Could not connect to the CIB"); init_ok = FALSE; } } if(init_ok) { crm_debug_4("Setting CIB notification callback"); if(cib_ok != te_cib_conn->cmds->add_notify_callback( te_cib_conn, T_CIB_DIFF_NOTIFY, te_update_diff)) { crm_err("Could not set CIB notification callback"); init_ok = FALSE; } } - if(is_heartbeat_cluster() && init_ok) { + if(init_ok) { G_main_set_trigger(stonith_reconnect); } if(init_ok) { cl_uuid_t new_uuid; char uuid_str[UU_UNPARSE_SIZEOF]; cl_uuid_generate(&new_uuid); cl_uuid_unparse(&new_uuid, uuid_str); te_uuid = crm_strdup(uuid_str); crm_info("Registering TE UUID: %s", te_uuid); set_graph_functions(&te_graph_fns); /* create a blank one */ transition_graph = unpack_graph(NULL); transition_graph->complete = TRUE; transition_graph->abort_reason = "DC Takeover"; transition_graph->completion_action = tg_restart; crm_malloc0(transition_timer, sizeof(crm_action_timer_t)); transition_timer->source_id = 0; transition_timer->reason = timeout_abort; transition_timer->action = NULL; } if(init_ok) { /* Create the mainloop and run it... */ crm_info("Starting %s", crm_system_name); mainloop = g_main_new(FALSE); g_main_run(mainloop); return_to_orig_privs(); crm_info("Exiting %s", crm_system_name); } else { crm_warn("Initialization errors, %s not starting.", crm_system_name); } destroy_graph(transition_graph); crm_free(transition_timer); te_cib_conn->cmds->signoff(te_cib_conn); cib_delete(te_cib_conn); te_cib_conn = NULL; -#if SUPPORT_HEARTBEAT - if(is_heartbeat_cluster()) { - stonithd_signoff(); - } -#endif + stonithd_signoff(); crm_free(te_uuid); if(init_ok) { return 0; } return 1; } void usage(const char* cmd, int exit_status) { FILE* stream; stream = exit_status ? stderr : stdout; fprintf(stream, "usage: %s [-srkh]" "[-c configure file]\n", cmd); /* fprintf(stream, "\t-d\tsets debug level\n"); */ /* fprintf(stream, "\t-s\tgets daemon status\n"); */ /* fprintf(stream, "\t-r\trestarts daemon\n"); */ /* fprintf(stream, "\t-k\tstops daemon\n"); */ /* fprintf(stream, "\t-h\thelp message\n"); */ fflush(stream); exit(exit_status); } gboolean shuttingdown; gboolean tengine_shutdown(int nsig, gpointer unused) { shuttingdown = TRUE; abort_transition(INFINITY, tg_shutdown, "Shutdown", NULL); return TRUE; } diff --git a/transitioner/te_callbacks.h b/transitioner/te_callbacks.h index 157450dda8..8fac7a853c 100644 --- a/transitioner/te_callbacks.h +++ b/transitioner/te_callbacks.h @@ -1,42 +1,40 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef TE_CALLBACKS__H #define TE_CALLBACKS__H extern void cib_fencing_updated(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data); extern void cib_action_updated(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data); extern void cib_failcount_updated(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data); extern gboolean global_timer_callback(gpointer data); extern gboolean action_timer_callback(gpointer data); extern gboolean te_graph_trigger(gpointer user_data); extern void tengine_stonith_connection_destroy(gpointer user_data); -#if SUPPORT_HEARTBEAT extern void tengine_stonith_callback(stonith_ops_t * op); extern gboolean tengine_stonith_dispatch(IPC_Channel *sender, void *user_data); -#endif #endif diff --git a/transitioner/tengine.h b/transitioner/tengine.h index 303a2fd301..36cb394f53 100644 --- a/transitioner/tengine.h +++ b/transitioner/tengine.h @@ -1,77 +1,75 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef TENGINE__H #define TENGINE__H #include #include -#if SUPPORT_HEARTBEAT -# include +#include extern void send_stonith_update(stonith_ops_t * op); -#endif extern IPC_Channel *crm_ch; extern GMainLoop* mainloop; /* tengine */ extern crm_action_t *match_down_event( int rc, const char *target, const char *filter); extern gboolean cib_action_update(crm_action_t *action, int status); /* utils */ extern crm_action_t *get_action(int id, gboolean confirmed); extern gboolean stop_te_timer(crm_action_timer_t *timer); extern const char *get_rsc_state(const char *task, op_status_t status); /* unpack */ extern gboolean extract_event(crm_data_t *msg); extern gboolean process_te_message( HA_Message * msg, crm_data_t *xml_data, IPC_Channel *sender); extern crm_graph_t *transition_graph; extern GTRIGSource *transition_trigger; extern char *te_uuid; extern cib_t *te_cib_conn; extern void notify_crmd(crm_graph_t *graph); #include extern void trigger_graph_processing(const char *fn, int line); extern void abort_transition_graph( int abort_priority, enum transition_action abort_action, const char *abort_text, crm_data_t *reason, const char *fn, int line); #define trigger_graph() trigger_graph_processing(__FUNCTION__, __LINE__) #define abort_transition(pri, action, text, reason) \ abort_transition_graph(pri, action, text, reason,__FUNCTION__,__LINE__); extern gboolean te_connect_stonith(gpointer user_data); extern GCHSource *stonith_src; extern GTRIGSource *transition_trigger; extern GTRIGSource *stonith_reconnect; extern crm_action_timer_t *transition_timer; extern char *failed_stop_offset; extern char *failed_start_offset; #endif diff --git a/transitioner/utils.c b/transitioner/utils.c index f51ba2f75e..796e40ba17 100644 --- a/transitioner/utils.c +++ b/transitioner/utils.c @@ -1,150 +1,145 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include extern cib_t *te_cib_conn; GCHSource *stonith_src = NULL; GTRIGSource *stonith_reconnect = NULL; gboolean te_connect_stonith(gpointer user_data) { -#if SUPPORT_HEARTBEAT - if(is_heartbeat_cluster()) { int lpc = 0; int rc = ST_OK; IPC_Channel *fence_ch = NULL; if(stonith_src != NULL) { crm_debug("Still connected"); return TRUE; } for(lpc = 0; lpc < 30; lpc++) { crm_info("Attempting connection to fencing daemon..."); sleep(1); rc = stonithd_signon("tengine"); if(rc == ST_OK) { break; } if(user_data != NULL) { crm_err("Sign-in failed: triggered a retry"); G_main_set_trigger(stonith_reconnect); return TRUE; } crm_err("Sign-in failed: pausing and trying again in 2s..."); sleep(1); } CRM_ASSERT(rc == ST_OK); /* If not, we failed 30 times... just get out */ CRM_ASSERT(stonithd_set_stonith_ops_callback( tengine_stonith_callback) == ST_OK); crm_debug_2("Grabbing IPC channel"); fence_ch = stonithd_input_IPC_channel(); CRM_ASSERT(fence_ch != NULL); crm_debug_2("Attaching to mainloop"); stonith_src = G_main_add_IPC_Channel( G_PRIORITY_LOW, fence_ch, FALSE, tengine_stonith_dispatch, NULL, tengine_stonith_connection_destroy); CRM_ASSERT(stonith_src != NULL); crm_info("Connected"); return TRUE; - } -#endif - return FALSE; } gboolean stop_te_timer(crm_action_timer_t *timer) { const char *timer_desc = "action timer"; if(timer == NULL) { return FALSE; } if(timer->reason == timeout_abort) { timer_desc = "global timer"; } if(timer->source_id != 0) { crm_debug_2("Stopping %s", timer_desc); Gmain_timeout_remove(timer->source_id); timer->source_id = 0; } else { return FALSE; } return TRUE; } void trigger_graph_processing(const char *fn, int line) { G_main_set_trigger(transition_trigger); crm_debug_2("%s:%d - Triggered graph processing", fn, line); } void abort_transition_graph( int abort_priority, enum transition_action abort_action, const char *abort_text, crm_data_t *reason, const char *fn, int line) { int log_level = LOG_DEBUG; /* if(abort_priority >= INFINITY) { log_level = LOG_INFO; } */ update_abort_priority( transition_graph, abort_priority, abort_action, abort_text); do_crm_log(log_level, "%s:%d - Triggered graph processing : %s", fn, line, abort_text); if(reason != NULL) { const char *magic = crm_element_value( reason, XML_ATTR_TRANSITION_MAGIC); if(magic) { do_crm_log(log_level, "Caused by update to %s: %s", ID(reason), magic); } else { crm_log_xml(log_level, "Cause", reason); } } G_main_set_trigger(transition_trigger); }