diff --git a/cib/main.c b/cib/main.c index c87ae8b175..3261415fda 100644 --- a/cib/main.c +++ b/cib/main.c @@ -1,584 +1,585 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "common.h" #if HAVE_LIBXML2 # include #endif #ifdef HAVE_GETOPT_H # include #endif #if HAVE_BZLIB_H # include #endif extern int init_remote_listener(int port, gboolean encrypted); gboolean cib_shutdown_flag = FALSE; int cib_status = pcmk_ok; crm_cluster_t crm_cluster; #if SUPPORT_HEARTBEAT oc_ev_t *cib_ev_token; ll_cluster_t *hb_conn = NULL; extern void oc_ev_special(const oc_ev_t *, oc_ev_class_t, int); gboolean cib_register_ha(ll_cluster_t * hb_cluster, const char *client_name); #else void *hb_conn = NULL; #endif extern void terminate_cib(const char *caller, gboolean fast); GMainLoop *mainloop = NULL; const char *cib_root = NULL; char *cib_our_uname = NULL; gboolean preserve_status = FALSE; gboolean cib_writes_enabled = TRUE; int remote_fd = 0; int remote_tls_fd = 0; int cib_init(void); void cib_shutdown(int nsig); gboolean startCib(const char *filename); extern int write_cib_contents(gpointer p); GHashTable *config_hash = NULL; GHashTable *local_notify_queue = NULL; char *channel1 = NULL; char *channel2 = NULL; char *channel3 = NULL; char *channel4 = NULL; char *channel5 = NULL; #define OPTARGS "maswr:V?" void cib_cleanup(void); static void cib_enable_writes(int nsig) { crm_info("(Re)enabling disk writes"); cib_writes_enabled = TRUE; } static void log_cib_client(gpointer key, gpointer value, gpointer user_data) { crm_info("Client %s", crm_client_name(value)); } /* *INDENT-OFF* */ static struct crm_option long_options[] = { /* Top-level Options */ {"help", 0, 0, '?', "\tThis text"}, {"verbose", 0, 0, 'V', "\tIncrease debug output"}, {"per-action-cib", 0, 0, 'a', "\tAdvanced use only"}, {"stand-alone", 0, 0, 's', "\tAdvanced use only"}, {"disk-writes", 0, 0, 'w', "\tAdvanced use only"}, {"cib-root", 1, 0, 'r', "\tAdvanced use only"}, {0, 0, 0, 0} }; /* *INDENT-ON* */ int main(int argc, char **argv) { int flag; int rc = 0; int index = 0; int argerr = 0; struct passwd *pwentry = NULL; crm_log_preinit(NULL, argc, argv); crm_set_options(NULL, "[options]", long_options, "Daemon for storing and replicating the cluster configuration"); crm_peer_init(); mainloop_add_signal(SIGTERM, cib_shutdown); mainloop_add_signal(SIGPIPE, cib_enable_writes); cib_writer = mainloop_add_trigger(G_PRIORITY_LOW, write_cib_contents, NULL); while (1) { flag = crm_get_option(argc, argv, &index); if (flag == -1) break; switch (flag) { case 'V': crm_bump_log_level(argc, argv); break; case 's': stand_alone = TRUE; preserve_status = TRUE; cib_writes_enabled = FALSE; pwentry = getpwnam(CRM_DAEMON_USER); CRM_CHECK(pwentry != NULL, crm_perror(LOG_ERR, "Invalid uid (%s) specified", CRM_DAEMON_USER); return 100); rc = setgid(pwentry->pw_gid); if (rc < 0) { crm_perror(LOG_ERR, "Could not set group to %d", pwentry->pw_gid); return 100; } rc = initgroups(CRM_DAEMON_GROUP, pwentry->pw_gid); if (rc < 0) { crm_perror(LOG_ERR, "Could not setup groups for user %d", pwentry->pw_uid); return 100; } rc = setuid(pwentry->pw_uid); if (rc < 0) { crm_perror(LOG_ERR, "Could not set user to %d", pwentry->pw_uid); return 100; } break; case '?': /* Help message */ crm_help(flag, EX_OK); break; case 'w': cib_writes_enabled = TRUE; break; case 'r': cib_root = optarg; break; case 'm': cib_metadata(); return 0; default: ++argerr; break; } } if (argc - optind == 1 && safe_str_eq("metadata", argv[optind])) { cib_metadata(); return 0; } if (optind > argc) { ++argerr; } if (argerr) { crm_help('?', EX_USAGE); } crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); if (cib_root == NULL) { char *path = crm_strdup_printf("%s/cib.xml", CRM_CONFIG_DIR); char *legacy = crm_strdup_printf("%s/cib.xml", CRM_LEGACY_CONFIG_DIR); if (g_file_test(path, G_FILE_TEST_EXISTS)) { cib_root = CRM_CONFIG_DIR; } else if (g_file_test(legacy, G_FILE_TEST_EXISTS)) { cib_root = CRM_LEGACY_CONFIG_DIR; crm_notice("Using legacy config location: %s", cib_root); } else { cib_root = CRM_CONFIG_DIR; crm_notice("Using new config location: %s", cib_root); } free(legacy); free(path); } else { crm_notice("Using custom config location: %s", cib_root); } if (crm_is_writable(cib_root, NULL, CRM_DAEMON_USER, CRM_DAEMON_GROUP, FALSE) == FALSE) { crm_err("Bad permissions on %s. Terminating", cib_root); fprintf(stderr, "ERROR: Bad permissions on %s. See logs for details\n", cib_root); fflush(stderr); return 100; } /* read local config file */ rc = cib_init(); CRM_CHECK(crm_hash_table_size(client_connections) == 0, crm_warn("Not all clients gone at exit")); g_hash_table_foreach(client_connections, log_cib_client, NULL); cib_cleanup(); #if SUPPORT_HEARTBEAT if (hb_conn) { hb_conn->llc_ops->delete(hb_conn); } #endif crm_info("Done"); return rc; } void cib_cleanup(void) { crm_peer_destroy(); if (local_notify_queue) { g_hash_table_destroy(local_notify_queue); } crm_client_cleanup(); g_hash_table_destroy(config_hash); free(cib_our_uname); free(channel1); free(channel2); free(channel3); free(channel4); free(channel5); } unsigned long cib_num_ops = 0; const char *cib_stat_interval = "10min"; unsigned long cib_num_local = 0, cib_num_updates = 0, cib_num_fail = 0; unsigned long cib_bad_connects = 0, cib_num_timeouts = 0; #if SUPPORT_HEARTBEAT gboolean ccm_connect(void); static void ccm_connection_destroy(gpointer user_data) { crm_err("CCM connection failed... blocking while we reconnect"); CRM_ASSERT(ccm_connect()); return; } static void *ccm_library = NULL; gboolean ccm_connect(void) { gboolean did_fail = TRUE; int num_ccm_fails = 0; int max_ccm_fails = 30; int ret; int cib_ev_fd; int (*ccm_api_register) (oc_ev_t ** token) = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_register", 1); int (*ccm_api_set_callback) (const oc_ev_t * token, oc_ev_class_t class, oc_ev_callback_t * fn, oc_ev_callback_t ** prev_fn) = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_set_callback", 1); void (*ccm_api_special) (const oc_ev_t *, oc_ev_class_t, int) = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_special", 1); int (*ccm_api_activate) (const oc_ev_t * token, int *fd) = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_activate", 1); int (*ccm_api_unregister) (oc_ev_t * token) = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_unregister", 1); static struct mainloop_fd_callbacks ccm_fd_callbacks = { .dispatch = cib_ccm_dispatch, .destroy = ccm_connection_destroy, }; while (did_fail) { did_fail = FALSE; crm_info("Registering with CCM..."); ret = (*ccm_api_register) (&cib_ev_token); if (ret != 0) { did_fail = TRUE; } if (did_fail == FALSE) { crm_trace("Setting up CCM callbacks"); ret = (*ccm_api_set_callback) (cib_ev_token, OC_EV_MEMB_CLASS, cib_ccm_msg_callback, NULL); if (ret != 0) { crm_warn("CCM callback not set"); did_fail = TRUE; } } if (did_fail == FALSE) { (*ccm_api_special) (cib_ev_token, OC_EV_MEMB_CLASS, 0); crm_trace("Activating CCM token"); ret = (*ccm_api_activate) (cib_ev_token, &cib_ev_fd); if (ret != 0) { crm_warn("CCM Activation failed"); did_fail = TRUE; } } if (did_fail) { num_ccm_fails++; (*ccm_api_unregister) (cib_ev_token); if (num_ccm_fails < max_ccm_fails) { crm_warn("CCM Connection failed %d times (%d max)", num_ccm_fails, max_ccm_fails); sleep(3); } else { crm_err("CCM Activation failed %d (max) times", num_ccm_fails); return FALSE; } } } crm_debug("CCM Activation passed... all set to go!"); mainloop_add_fd("heartbeat-ccm", G_PRIORITY_MEDIUM, cib_ev_fd, cib_ev_token, &ccm_fd_callbacks); return TRUE; } #endif #if SUPPORT_COROSYNC static void cib_cs_dispatch(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { uint32_t kind = 0; xmlNode *xml = NULL; const char *from = NULL; char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); if(data == NULL) { return; } if (kind == crm_class_cluster) { xml = string2xml(data); if (xml == NULL) { crm_err("Invalid XML: '%.120s'", data); free(data); return; } crm_xml_add(xml, F_ORIG, from); /* crm_xml_add_int(xml, F_SEQ, wrapper->id); */ cib_peer_callback(xml, NULL); } free_xml(xml); free(data); } static void cib_cs_destroy(gpointer user_data) { if (cib_shutdown_flag) { crm_info("Corosync disconnection complete"); } else { crm_err("Corosync connection lost! Exiting."); terminate_cib(__FUNCTION__, TRUE); } } #endif static void cib_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data) { if (type == crm_status_processes) { crm_update_peer_state(__FUNCTION__, node, is_set(node->processes, crm_proc_cpg)?CRM_NODE_MEMBER:CRM_NODE_LOST, 0); } - if (type == crm_status_processes && legacy_mode && is_not_set(node->processes, crm_proc_cpg)) { + if ((type == crm_status_processes) && legacy_mode + && is_not_set(node->processes, crm_get_cluster_proc())) { uint32_t old = 0; if (data) { old = *(const uint32_t *)data; } if ((node->processes ^ old) & crm_proc_cpg) { crm_info("Attempting to disable legacy mode after %s left the cluster", node->uname); legacy_mode = FALSE; } } if (cib_shutdown_flag && crm_active_peers() < 2 && crm_hash_table_size(client_connections) == 0) { crm_info("No more peers"); terminate_cib(__FUNCTION__, FALSE); } if(type == crm_status_nstate && node->id && safe_str_eq(node->state, CRM_NODE_LOST)) { /* Avoid conflicts, keep the membership list to active members */ reap_crm_member(node->id, NULL); } } #if SUPPORT_HEARTBEAT static void cib_ha_connection_destroy(gpointer user_data) { if (cib_shutdown_flag) { crm_info("Heartbeat disconnection complete... exiting"); terminate_cib(__FUNCTION__, FALSE); } else { crm_err("Heartbeat connection lost! Exiting."); terminate_cib(__FUNCTION__, TRUE); } } #endif int cib_init(void) { if (is_openais_cluster()) { #if SUPPORT_COROSYNC crm_cluster.destroy = cib_cs_destroy; crm_cluster.cpg.cpg_deliver_fn = cib_cs_dispatch; crm_cluster.cpg.cpg_confchg_fn = pcmk_cpg_membership; #endif } else if (is_heartbeat_cluster()) { #if SUPPORT_HEARTBEAT crm_cluster.hb_dispatch = cib_ha_peer_callback; crm_cluster.destroy = cib_ha_connection_destroy; #endif } config_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if (startCib("cib.xml") == FALSE) { crm_crit("Cannot start CIB... terminating"); crm_exit(ENODATA); } if (stand_alone == FALSE) { if (crm_cluster_connect(&crm_cluster) == FALSE) { crm_crit("Cannot sign in to the cluster... terminating"); crm_exit(DAEMON_RESPAWN_STOP); } cib_our_uname = crm_cluster.uname; if (is_openais_cluster()) { crm_set_status_callback(&cib_peer_update_callback); } #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { gboolean was_error = FALSE; hb_conn = crm_cluster.hb_conn; if (was_error == FALSE) { if (HA_OK != hb_conn->llc_ops->set_cstatus_callback(hb_conn, cib_client_status_callback, hb_conn)) { crm_err("Cannot set cstatus callback: %s", hb_conn->llc_ops->errmsg(hb_conn)); was_error = TRUE; } } if (was_error == FALSE) { was_error = (ccm_connect() == FALSE); } if (was_error == FALSE) { /* Async get client status information in the cluster */ crm_info("Requesting the list of configured nodes"); hb_conn->llc_ops->client_status(hb_conn, NULL, CRM_SYSTEM_CIB, -1); } } #endif } else { cib_our_uname = strdup("localhost"); } cib_ipc_servers_init(&ipcs_ro, &ipcs_rw, &ipcs_shm, &ipc_ro_callbacks, &ipc_rw_callbacks); if (stand_alone) { cib_is_master = TRUE; } /* Create the mainloop and run it... */ mainloop = g_main_new(FALSE); crm_info("Starting %s mainloop", crm_system_name); g_main_run(mainloop); cib_ipc_servers_destroy(ipcs_ro, ipcs_rw, ipcs_shm); return crm_exit(pcmk_ok); } gboolean startCib(const char *filename) { gboolean active = FALSE; xmlNode *cib = readCibXmlFile(cib_root, filename, !preserve_status); CRM_ASSERT(cib != NULL); if (activateCibXml(cib, TRUE, "start") == 0) { int port = 0; const char *port_s = NULL; active = TRUE; cib_read_config(config_hash, cib); port_s = crm_element_value(cib, "remote-tls-port"); if (port_s) { port = crm_parse_int(port_s, "0"); remote_tls_fd = init_remote_listener(port, TRUE); } port_s = crm_element_value(cib, "remote-clear-port"); if (port_s) { port = crm_parse_int(port_s, "0"); remote_fd = init_remote_listener(port, FALSE); } crm_info("CIB Initialization completed successfully"); } return active; } diff --git a/crmd/membership.h b/crmd/membership.h index 13f2f2d012..7dfe39b423 100644 --- a/crmd/membership.h +++ b/crmd/membership.h @@ -1,7 +1,8 @@ +#include void reap_dead_ccm_nodes(gpointer key, gpointer value, gpointer user_data); void post_cache_update(int instance); extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source); -#define proc_flags (crm_proc_crmd | crm_proc_cpg) +#define proc_flags (crm_proc_crmd | crm_get_cluster_proc()) diff --git a/fencing/commands.c b/fencing/commands.c index 13e059db41..e181b8d4d9 100644 --- a/fencing/commands.c +++ b/fencing/commands.c @@ -1,2287 +1,2287 @@ /* * Copyright (C) 2009 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if SUPPORT_CIBSECRETS # include #endif #include GHashTable *device_list = NULL; GHashTable *topology = NULL; GList *cmd_list = NULL; static int active_children = 0; struct device_search_s { char *host; char *action; int per_device_timeout; int replies_needed; int replies_received; bool allow_suicide; void *user_data; void (*callback) (GList * devices, void *user_data); GListPtr capable; }; static gboolean stonith_device_dispatch(gpointer user_data); static void st_child_done(GPid pid, int rc, const char *output, gpointer user_data); static void stonith_send_reply(xmlNode * reply, int call_options, const char *remote_peer, const char *client_id); static void search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence); typedef struct async_command_s { int id; int pid; int fd_stdout; int options; int default_timeout; /* seconds */ int timeout; /* seconds */ int start_delay; /* milliseconds */ int delay_id; char *op; char *origin; char *client; char *client_name; char *remote_op_id; char *victim; uint32_t victim_nodeid; char *action; char *device; char *mode; GListPtr device_list; GListPtr device_next; void *internal_user_data; void (*done_cb) (GPid pid, int rc, const char *output, gpointer user_data); guint timer_sigterm; guint timer_sigkill; /*! If the operation timed out, this is the last signal * we sent to the process to get it to terminate */ int last_timeout_signo; } async_command_t; static xmlNode *stonith_construct_async_reply(async_command_t * cmd, const char *output, xmlNode * data, int rc); static gboolean is_action_required(const char *action, stonith_device_t *device) { if(device == NULL) { return FALSE; } else if (device->required_actions == NULL) { return FALSE; } else if (strstr(device->required_actions, action)) { return TRUE; } return FALSE; } static int get_action_delay_max(stonith_device_t * device, const char * action) { const char *value = NULL; int delay_max_ms = 0; if (safe_str_neq(action, "off") && safe_str_neq(action, "reboot")) { return 0; } value = g_hash_table_lookup(device->params, STONITH_ATTR_DELAY_MAX); if (value) { delay_max_ms = crm_get_msec(value); } return delay_max_ms; } static int get_action_timeout(stonith_device_t * device, const char *action, int default_timeout) { char buffer[512] = { 0, }; char *value = NULL; CRM_CHECK(action != NULL, return default_timeout); if (!device->params) { return default_timeout; } snprintf(buffer, sizeof(buffer) - 1, "pcmk_%s_timeout", action); value = g_hash_table_lookup(device->params, buffer); if (!value) { return default_timeout; } return atoi(value); } static void free_async_command(async_command_t * cmd) { if (!cmd) { return; } if (cmd->delay_id) { g_source_remove(cmd->delay_id); } cmd_list = g_list_remove(cmd_list, cmd); g_list_free_full(cmd->device_list, free); free(cmd->device); free(cmd->action); free(cmd->victim); free(cmd->remote_op_id); free(cmd->client); free(cmd->client_name); free(cmd->origin); free(cmd->mode); free(cmd->op); free(cmd); } static async_command_t * create_async_command(xmlNode * msg) { async_command_t *cmd = NULL; xmlNode *op = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_ERR); const char *action = crm_element_value(op, F_STONITH_ACTION); CRM_CHECK(action != NULL, crm_log_xml_warn(msg, "NoAction"); return NULL); crm_log_xml_trace(msg, "Command"); cmd = calloc(1, sizeof(async_command_t)); crm_element_value_int(msg, F_STONITH_CALLID, &(cmd->id)); crm_element_value_int(msg, F_STONITH_CALLOPTS, &(cmd->options)); crm_element_value_int(msg, F_STONITH_TIMEOUT, &(cmd->default_timeout)); cmd->timeout = cmd->default_timeout; cmd->origin = crm_element_value_copy(msg, F_ORIG); cmd->remote_op_id = crm_element_value_copy(msg, F_STONITH_REMOTE_OP_ID); cmd->client = crm_element_value_copy(msg, F_STONITH_CLIENTID); cmd->client_name = crm_element_value_copy(msg, F_STONITH_CLIENTNAME); cmd->op = crm_element_value_copy(msg, F_STONITH_OPERATION); cmd->action = strdup(action); cmd->victim = crm_element_value_copy(op, F_STONITH_TARGET); cmd->mode = crm_element_value_copy(op, F_STONITH_MODE); cmd->device = crm_element_value_copy(op, F_STONITH_DEVICE); CRM_CHECK(cmd->op != NULL, crm_log_xml_warn(msg, "NoOp"); free_async_command(cmd); return NULL); CRM_CHECK(cmd->client != NULL, crm_log_xml_warn(msg, "NoClient")); cmd->done_cb = st_child_done; cmd_list = g_list_append(cmd_list, cmd); return cmd; } static gboolean stonith_device_execute(stonith_device_t * device) { int exec_rc = 0; const char *action_str = NULL; async_command_t *cmd = NULL; stonith_action_t *action = NULL; CRM_CHECK(device != NULL, return FALSE); if (device->active_pid) { crm_trace("%s is still active with pid %u", device->id, device->active_pid); return TRUE; } if (device->pending_ops) { GList *first = device->pending_ops; cmd = first->data; if (cmd && cmd->delay_id) { crm_trace ("Operation %s%s%s on %s was asked to run too early, waiting for start_delay timeout of %dms", cmd->action, cmd->victim ? " for node " : "", cmd->victim ? cmd->victim : "", device->id, cmd->start_delay); return TRUE; } device->pending_ops = g_list_remove_link(device->pending_ops, first); g_list_free_1(first); } if (cmd == NULL) { crm_trace("Nothing further to do for %s", device->id); return TRUE; } if(safe_str_eq(device->agent, STONITH_WATCHDOG_AGENT)) { if(safe_str_eq(cmd->action, "reboot")) { pcmk_panic(__FUNCTION__); return TRUE; } else if(safe_str_eq(cmd->action, "off")) { pcmk_panic(__FUNCTION__); return TRUE; } else { crm_info("Faking success for %s watchdog operation", cmd->action); cmd->done_cb(0, 0, NULL, cmd); return TRUE; } } #if SUPPORT_CIBSECRETS if (replace_secret_params(device->id, device->params) < 0) { /* replacing secrets failed! */ if (safe_str_eq(cmd->action,"stop")) { /* don't fail on stop! */ crm_info("proceeding with the stop operation for %s", device->id); } else { crm_err("failed to get secrets for %s, " "considering resource not configured", device->id); exec_rc = PCMK_OCF_NOT_CONFIGURED; cmd->done_cb(0, exec_rc, NULL, cmd); return TRUE; } } #endif action_str = cmd->action; if (safe_str_eq(cmd->action, "reboot") && is_not_set(device->flags, st_device_supports_reboot)) { crm_warn("Agent '%s' does not advertise support for 'reboot', performing 'off' action instead", device->agent); action_str = "off"; } action = stonith_action_create(device->agent, action_str, cmd->victim, cmd->victim_nodeid, cmd->timeout, device->params, device->aliases); /* for async exec, exec_rc is pid if positive and error code if negative/zero */ exec_rc = stonith_action_execute_async(action, (void *)cmd, cmd->done_cb); if (exec_rc > 0) { crm_debug("Operation %s%s%s on %s now running with pid=%d, timeout=%ds", cmd->action, cmd->victim ? " for node " : "", cmd->victim ? cmd->victim : "", device->id, exec_rc, cmd->timeout); device->active_pid = exec_rc; } else { crm_warn("Operation %s%s%s on %s failed: %s (%d)", cmd->action, cmd->victim ? " for node " : "", cmd->victim ? cmd->victim : "", device->id, pcmk_strerror(exec_rc), exec_rc); cmd->done_cb(0, exec_rc, NULL, cmd); } return TRUE; } static gboolean stonith_device_dispatch(gpointer user_data) { return stonith_device_execute(user_data); } static gboolean start_delay_helper(gpointer data) { async_command_t *cmd = data; stonith_device_t *device = NULL; cmd->delay_id = 0; device = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL; if (device) { mainloop_set_trigger(device->work); } return FALSE; } static void schedule_stonith_command(async_command_t * cmd, stonith_device_t * device) { int delay_max = 0; CRM_CHECK(cmd != NULL, return); CRM_CHECK(device != NULL, return); if (cmd->device) { free(cmd->device); } if (device->include_nodeid && cmd->victim) { crm_node_t *node = crm_get_peer(0, cmd->victim); cmd->victim_nodeid = node->id; } cmd->device = strdup(device->id); cmd->timeout = get_action_timeout(device, cmd->action, cmd->default_timeout); if (cmd->remote_op_id) { crm_debug("Scheduling %s on %s for remote peer %s with op id (%s) (timeout=%ds)", cmd->action, device->id, cmd->origin, cmd->remote_op_id, cmd->timeout); } else { crm_debug("Scheduling %s on %s for %s (timeout=%ds)", cmd->action, device->id, cmd->client, cmd->timeout); } device->pending_ops = g_list_append(device->pending_ops, cmd); mainloop_set_trigger(device->work); delay_max = get_action_delay_max(device, cmd->action); if (delay_max > 0) { cmd->start_delay = rand() % delay_max; crm_notice("Delaying %s on %s for %lldms (timeout=%ds)", cmd->action, device->id, cmd->start_delay, cmd->timeout); cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd); } } void free_device(gpointer data) { GListPtr gIter = NULL; stonith_device_t *device = data; g_hash_table_destroy(device->params); g_hash_table_destroy(device->aliases); for (gIter = device->pending_ops; gIter != NULL; gIter = gIter->next) { async_command_t *cmd = gIter->data; crm_warn("Removal of device '%s' purged operation %s", device->id, cmd->action); cmd->done_cb(0, -ENODEV, NULL, cmd); free_async_command(cmd); } g_list_free(device->pending_ops); g_list_free_full(device->targets, free); mainloop_destroy_trigger(device->work); free_xml(device->agent_metadata); free(device->namespace); free(device->on_target_actions); free(device->required_actions); free(device->agent); free(device->id); free(device); } static GHashTable * build_port_aliases(const char *hostmap, GListPtr * targets) { char *name = NULL; int last = 0, lpc = 0, max = 0, added = 0; GHashTable *aliases = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, g_hash_destroy_str, g_hash_destroy_str); if (hostmap == NULL) { return aliases; } max = strlen(hostmap); for (; lpc <= max; lpc++) { switch (hostmap[lpc]) { /* Assignment chars */ case '=': case ':': if (lpc > last) { free(name); name = calloc(1, 1 + lpc - last); memcpy(name, hostmap + last, lpc - last); } last = lpc + 1; break; /* Delimeter chars */ /* case ',': Potentially used to specify multiple ports */ case 0: case ';': case ' ': case '\t': if (name) { char *value = NULL; value = calloc(1, 1 + lpc - last); memcpy(value, hostmap + last, lpc - last); crm_debug("Adding alias '%s'='%s'", name, value); g_hash_table_replace(aliases, name, value); if (targets) { *targets = g_list_append(*targets, strdup(value)); } value = NULL; name = NULL; added++; } else if (lpc > last) { crm_debug("Parse error at offset %d near '%s'", lpc - last, hostmap + last); } last = lpc + 1; break; } if (hostmap[lpc] == 0) { break; } } if (added == 0) { crm_info("No host mappings detected in '%s'", hostmap); } free(name); return aliases; } static void parse_host_line(const char *line, int max, GListPtr * output) { int lpc = 0; int last = 0; if (max <= 0) { return; } /* Check for any complaints about additional parameters that the device doesn't understand */ if (strstr(line, "invalid") || strstr(line, "variable")) { crm_debug("Skipping: %s", line); return; } crm_trace("Processing %d bytes: [%s]", max, line); /* Skip initial whitespace */ for (lpc = 0; lpc <= max && isspace(line[lpc]); lpc++) { last = lpc + 1; } /* Now the actual content */ for (lpc = 0; lpc <= max; lpc++) { gboolean a_space = isspace(line[lpc]); if (a_space && lpc < max && isspace(line[lpc + 1])) { /* fast-forward to the end of the spaces */ } else if (a_space || line[lpc] == ',' || line[lpc] == ';' || line[lpc] == 0) { int rc = 1; char *entry = NULL; if (lpc != last) { entry = calloc(1, 1 + lpc - last); rc = sscanf(line + last, "%[a-zA-Z0-9_-.]", entry); } if (entry == NULL) { /* Skip */ } else if (rc != 1) { crm_warn("Could not parse (%d %d): %s", last, lpc, line + last); } else if (safe_str_neq(entry, "on") && safe_str_neq(entry, "off")) { crm_trace("Adding '%s'", entry); *output = g_list_append(*output, entry); entry = NULL; } free(entry); last = lpc + 1; } } } static GListPtr parse_host_list(const char *hosts) { int lpc = 0; int max = 0; int last = 0; GListPtr output = NULL; if (hosts == NULL) { return output; } max = strlen(hosts); for (lpc = 0; lpc <= max; lpc++) { if (hosts[lpc] == '\n' || hosts[lpc] == 0) { char *line = NULL; int len = lpc - last; if(len > 1) { line = malloc(1 + len); } if(line) { snprintf(line, 1 + len, "%s", hosts + last); line[len] = 0; /* Because it might be '\n' */ parse_host_line(line, len, &output); free(line); } last = lpc + 1; } } crm_trace("Parsed %d entries from '%s'", g_list_length(output), hosts); return output; } GHashTable *metadata_cache = NULL; static xmlNode * get_agent_metadata(const char *agent) { xmlNode *xml = NULL; char *buffer = NULL; if(metadata_cache == NULL) { metadata_cache = g_hash_table_new_full( crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); } buffer = g_hash_table_lookup(metadata_cache, agent); if(safe_str_eq(agent, STONITH_WATCHDOG_AGENT)) { return NULL; } else if(buffer == NULL) { stonith_t *st = stonith_api_new(); int rc = st->cmds->metadata(st, st_opt_sync_call, agent, NULL, &buffer, 10); stonith_api_delete(st); if (rc || !buffer) { crm_err("Could not retrieve metadata for fencing agent %s", agent); return NULL; } g_hash_table_replace(metadata_cache, strdup(agent), buffer); } xml = string2xml(buffer); return xml; } static gboolean is_nodeid_required(xmlNode * xml) { xmlXPathObjectPtr xpath = NULL; if (stand_alone) { return FALSE; } if (!xml) { return FALSE; } xpath = xpath_search(xml, "//parameter[@name='nodeid']"); if (numXpathResults(xpath) <= 0) { freeXpathObject(xpath); return FALSE; } freeXpathObject(xpath); return TRUE; } static char * add_action(char *actions, const char *action) { static size_t len = 256; int offset = 0; if (actions == NULL) { actions = calloc(1, len); } else { offset = strlen(actions); } if (offset > 0) { offset += snprintf(actions+offset, len-offset, " "); } offset += snprintf(actions+offset, len-offset, "%s", action); return actions; } static void read_action_metadata(stonith_device_t *device) { xmlXPathObjectPtr xpath = NULL; int max = 0; int lpc = 0; if (device->agent_metadata == NULL) { return; } xpath = xpath_search(device->agent_metadata, "//action"); max = numXpathResults(xpath); if (max <= 0) { freeXpathObject(xpath); return; } for (lpc = 0; lpc < max; lpc++) { const char *on_target = NULL; const char *action = NULL; const char *automatic = NULL; const char *required = NULL; xmlNode *match = getXpathResult(xpath, lpc); CRM_LOG_ASSERT(match != NULL); if(match == NULL) { continue; }; on_target = crm_element_value(match, "on_target"); action = crm_element_value(match, "name"); automatic = crm_element_value(match, "automatic"); required = crm_element_value(match, "required"); if(safe_str_eq(action, "list")) { set_bit(device->flags, st_device_supports_list); } else if(safe_str_eq(action, "status")) { set_bit(device->flags, st_device_supports_status); } else if(safe_str_eq(action, "reboot")) { set_bit(device->flags, st_device_supports_reboot); } else if(safe_str_eq(action, "on") && (crm_is_true(automatic))) { /* this setting implies required=true for unfencing */ required = "true"; } if (action && crm_is_true(on_target)) { device->on_target_actions = add_action(device->on_target_actions, action); } if (action && crm_is_true(required)) { device->required_actions = add_action(device->required_actions, action); } } freeXpathObject(xpath); } static stonith_device_t * build_device_from_xml(xmlNode * msg) { const char *value = NULL; xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, msg, LOG_ERR); stonith_device_t *device = NULL; device = calloc(1, sizeof(stonith_device_t)); device->id = crm_element_value_copy(dev, XML_ATTR_ID); device->agent = crm_element_value_copy(dev, "agent"); device->namespace = crm_element_value_copy(dev, "namespace"); device->params = xml2list(dev); value = g_hash_table_lookup(device->params, STONITH_ATTR_HOSTLIST); if (value) { device->targets = parse_host_list(value); } value = g_hash_table_lookup(device->params, STONITH_ATTR_HOSTMAP); device->aliases = build_port_aliases(value, &(device->targets)); device->agent_metadata = get_agent_metadata(device->agent); read_action_metadata(device); value = g_hash_table_lookup(device->params, "nodeid"); if (!value) { device->include_nodeid = is_nodeid_required(device->agent_metadata); } value = crm_element_value(dev, "rsc_provides"); if (safe_str_eq(value, "unfencing")) { /* if this agent requires unfencing, 'on' is considered a required action */ device->required_actions = add_action(device->required_actions, "on"); } if (is_action_required("on", device)) { crm_info("The fencing device '%s' requires unfencing", device->id); } if (device->on_target_actions) { crm_info("The fencing device '%s' requires actions (%s) to be executed on the target node", device->id, device->on_target_actions); } device->work = mainloop_add_trigger(G_PRIORITY_HIGH, stonith_device_dispatch, device); /* TODO: Hook up priority */ return device; } static const char * target_list_type(stonith_device_t * dev) { const char *check_type = NULL; check_type = g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTCHECK); if (check_type == NULL) { if (g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTLIST)) { check_type = "static-list"; } else if (g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTMAP)) { check_type = "static-list"; } else if(is_set(dev->flags, st_device_supports_list)){ check_type = "dynamic-list"; } else if(is_set(dev->flags, st_device_supports_status)){ check_type = "status"; } else { check_type = "none"; } } return check_type; } void schedule_internal_command(const char *origin, stonith_device_t * device, const char *action, const char *victim, int timeout, void *internal_user_data, void (*done_cb) (GPid pid, int rc, const char *output, gpointer user_data)) { async_command_t *cmd = NULL; cmd = calloc(1, sizeof(async_command_t)); cmd->id = -1; cmd->default_timeout = timeout ? timeout : 60; cmd->timeout = cmd->default_timeout; cmd->action = strdup(action); cmd->victim = victim ? strdup(victim) : NULL; cmd->device = strdup(device->id); cmd->origin = strdup(origin); cmd->client = strdup(crm_system_name); cmd->client_name = strdup(crm_system_name); cmd->internal_user_data = internal_user_data; cmd->done_cb = done_cb; /* cmd, not internal_user_data, is passed to 'done_cb' as the userdata */ schedule_stonith_command(cmd, device); } gboolean string_in_list(GListPtr list, const char *item) { int lpc = 0; int max = g_list_length(list); for (lpc = 0; lpc < max; lpc++) { const char *value = g_list_nth_data(list, lpc); if (safe_str_eq(item, value)) { return TRUE; } else { crm_trace("%d: '%s' != '%s'", lpc, item, value); } } return FALSE; } static void status_search_cb(GPid pid, int rc, const char *output, gpointer user_data) { async_command_t *cmd = user_data; struct device_search_s *search = cmd->internal_user_data; stonith_device_t *dev = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL; gboolean can = FALSE; free_async_command(cmd); if (!dev) { search_devices_record_result(search, NULL, FALSE); return; } dev->active_pid = 0; mainloop_set_trigger(dev->work); if (rc == 1 /* unknown */ ) { crm_trace("Host %s is not known by %s", search->host, dev->id); } else if (rc == 0 /* active */ || rc == 2 /* inactive */ ) { crm_trace("Host %s is known by %s", search->host, dev->id); can = TRUE; } else { crm_notice("Unknown result when testing if %s can fence %s: rc=%d", dev->id, search->host, rc); } search_devices_record_result(search, dev->id, can); } static void dynamic_list_search_cb(GPid pid, int rc, const char *output, gpointer user_data) { async_command_t *cmd = user_data; struct device_search_s *search = cmd->internal_user_data; stonith_device_t *dev = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL; gboolean can_fence = FALSE; free_async_command(cmd); /* Host/alias must be in the list output to be eligable to be fenced * * Will cause problems if down'd nodes aren't listed or (for virtual nodes) * if the guest is still listed despite being moved to another machine */ if (!dev) { search_devices_record_result(search, NULL, FALSE); return; } dev->active_pid = 0; mainloop_set_trigger(dev->work); /* If we successfully got the targets earlier, don't disable. */ if (rc != 0 && !dev->targets) { crm_notice("Disabling port list queries for %s (%d): %s", dev->id, rc, output); /* Fall back to status */ g_hash_table_replace(dev->params, strdup(STONITH_ATTR_HOSTCHECK), strdup("status")); g_list_free_full(dev->targets, free); dev->targets = NULL; } else if (!rc) { crm_info("Refreshing port list for %s", dev->id); g_list_free_full(dev->targets, free); dev->targets = parse_host_list(output); dev->targets_age = time(NULL); } if (dev->targets) { const char *alias = g_hash_table_lookup(dev->aliases, search->host); if (!alias) { alias = search->host; } if (string_in_list(dev->targets, alias)) { can_fence = TRUE; } } search_devices_record_result(search, dev->id, can_fence); } /*! * \internal * \brief Checks to see if an identical device already exists in the device_list */ static stonith_device_t * device_has_duplicate(stonith_device_t * device) { char *key = NULL; char *value = NULL; GHashTableIter gIter; stonith_device_t *dup = g_hash_table_lookup(device_list, device->id); if (!dup) { crm_trace("No match for %s", device->id); return NULL; } else if (safe_str_neq(dup->agent, device->agent)) { crm_trace("Different agent: %s != %s", dup->agent, device->agent); return NULL; } /* Use calculate_operation_digest() here? */ g_hash_table_iter_init(&gIter, device->params); while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&value)) { if(strstr(key, "CRM_meta") == key) { continue; } else if(strcmp(key, "crm_feature_set") == 0) { continue; } else { char *other_value = g_hash_table_lookup(dup->params, key); if (!other_value || safe_str_neq(other_value, value)) { crm_trace("Different value for %s: %s != %s", key, other_value, value); return NULL; } } } crm_trace("Match"); return dup; } int stonith_device_register(xmlNode * msg, const char **desc, gboolean from_cib) { stonith_device_t *dup = NULL; stonith_device_t *device = build_device_from_xml(msg); dup = device_has_duplicate(device); if (dup) { crm_debug("Device '%s' already existed in device list (%d active devices)", device->id, g_hash_table_size(device_list)); free_device(device); device = dup; } else { stonith_device_t *old = g_hash_table_lookup(device_list, device->id); if (from_cib && old && old->api_registered) { /* If the cib is writing over an entry that is shared with a stonith client, * copy any pending ops that currently exist on the old entry to the new one. * Otherwise the pending ops will be reported as failures */ crm_info("Overwriting an existing entry for %s from the cib", device->id); device->pending_ops = old->pending_ops; device->api_registered = TRUE; old->pending_ops = NULL; if (device->pending_ops) { mainloop_set_trigger(device->work); } } g_hash_table_replace(device_list, device->id, device); crm_notice("Added '%s' to the device list (%d active devices)", device->id, g_hash_table_size(device_list)); } if (desc) { *desc = device->id; } if (from_cib) { device->cib_registered = TRUE; } else { device->api_registered = TRUE; } return pcmk_ok; } int stonith_device_remove(const char *id, gboolean from_cib) { stonith_device_t *device = g_hash_table_lookup(device_list, id); if (!device) { crm_info("Device '%s' not found (%d active devices)", id, g_hash_table_size(device_list)); return pcmk_ok; } if (from_cib) { device->cib_registered = FALSE; } else { device->verified = FALSE; device->api_registered = FALSE; } if (!device->cib_registered && !device->api_registered) { g_hash_table_remove(device_list, id); crm_info("Removed '%s' from the device list (%d active devices)", id, g_hash_table_size(device_list)); } return pcmk_ok; } static int count_active_levels(stonith_topology_t * tp) { int lpc = 0; int count = 0; for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) { if (tp->levels[lpc] != NULL) { count++; } } return count; } void free_topology_entry(gpointer data) { stonith_topology_t *tp = data; int lpc = 0; for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) { if (tp->levels[lpc] != NULL) { g_list_free_full(tp->levels[lpc], free); } } free(tp->node); free(tp); } int stonith_level_register(xmlNode * msg, char **desc) { int id = 0; int rc = pcmk_ok; xmlNode *child = NULL; xmlNode *level = get_xpath_object("//" F_STONITH_LEVEL, msg, LOG_ERR); const char *node = crm_element_value(level, F_STONITH_TARGET); stonith_topology_t *tp = g_hash_table_lookup(topology, node); CRM_LOG_ASSERT(node != NULL); crm_element_value_int(level, XML_ATTR_ID, &id); if (desc) { *desc = crm_strdup_printf("%s[%d]", node, id); } if (id <= 0 || id >= ST_LEVEL_MAX) { return -EINVAL; } if (tp == NULL) { tp = calloc(1, sizeof(stonith_topology_t)); tp->node = strdup(node); g_hash_table_replace(topology, tp->node, tp); crm_trace("Added %s to the topology (%d active entries)", node, g_hash_table_size(topology)); } if (tp->levels[id] != NULL) { crm_info("Adding to the existing %s[%d] topology entry (%d active entries)", node, id, count_active_levels(tp)); } for (child = __xml_first_child(level); child != NULL; child = __xml_next(child)) { const char *device = ID(child); crm_trace("Adding device '%s' for %s (%d)", device, node, id); tp->levels[id] = g_list_append(tp->levels[id], strdup(device)); } crm_info("Node %s has %d active fencing levels", node, count_active_levels(tp)); return rc; } int stonith_level_remove(xmlNode * msg, char **desc) { int id = 0; xmlNode *level = get_xpath_object("//" F_STONITH_LEVEL, msg, LOG_ERR); const char *node = crm_element_value(level, F_STONITH_TARGET); stonith_topology_t *tp = g_hash_table_lookup(topology, node); CRM_LOG_ASSERT(node != NULL); if (desc) { *desc = crm_strdup_printf("%s[%d]", node, id); } crm_element_value_int(level, XML_ATTR_ID, &id); if (tp == NULL) { crm_info("Node %s not found (%d active entries)", node, g_hash_table_size(topology)); return pcmk_ok; } else if (id < 0 || id >= ST_LEVEL_MAX) { return -EINVAL; } if (id == 0 && g_hash_table_remove(topology, node)) { crm_info("Removed all %s related entries from the topology (%d active entries)", node, g_hash_table_size(topology)); } else if (id > 0 && tp->levels[id] != NULL) { g_list_free_full(tp->levels[id], free); tp->levels[id] = NULL; crm_info("Removed entry '%d' from %s's topology (%d active entries remaining)", id, node, count_active_levels(tp)); } return pcmk_ok; } static int stonith_device_action(xmlNode * msg, char **output) { int rc = pcmk_ok; xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, msg, LOG_ERR); const char *id = crm_element_value(dev, F_STONITH_DEVICE); async_command_t *cmd = NULL; stonith_device_t *device = NULL; if (id) { crm_trace("Looking for '%s'", id); device = g_hash_table_lookup(device_list, id); } if (device && device->api_registered == FALSE) { rc = -ENODEV; } else if (device) { cmd = create_async_command(msg); if (cmd == NULL) { free_device(device); return -EPROTO; } schedule_stonith_command(cmd, device); rc = -EINPROGRESS; } else { crm_info("Device %s not found", id ? id : ""); rc = -ENODEV; } return rc; } static void search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence) { search->replies_received++; if (can_fence && device) { search->capable = g_list_append(search->capable, strdup(device)); } if (search->replies_needed == search->replies_received) { crm_debug("Finished Search. %d devices can perform action (%s) on node %s", g_list_length(search->capable), search->action ? search->action : "", search->host ? search->host : ""); search->callback(search->capable, search->user_data); free(search->host); free(search->action); free(search); } } static void can_fence_host_with_device(stonith_device_t * dev, struct device_search_s *search) { gboolean can = FALSE; const char *check_type = NULL; const char *host = search->host; const char *alias = NULL; CRM_LOG_ASSERT(dev != NULL); if (dev == NULL) { goto search_report_results; } else if (host == NULL) { can = TRUE; goto search_report_results; } if (dev->on_target_actions && search->action && strstr(dev->on_target_actions, search->action)) { /* this device can only execute this action on the target node */ if(safe_str_neq(host, stonith_our_uname)) { crm_trace("%s operation with %s can only be executed for localhost not %s", search->action, dev->id, host); goto search_report_results; } } else if(safe_str_eq(host, stonith_our_uname) && search->allow_suicide == FALSE) { crm_trace("%s operation does not support self-fencing", search->action); goto search_report_results; } alias = g_hash_table_lookup(dev->aliases, host); if (alias == NULL) { alias = host; } check_type = target_list_type(dev); if (safe_str_eq(check_type, "none")) { can = TRUE; } else if (safe_str_eq(check_type, "static-list")) { /* Presence in the hostmap is sufficient * Only use if all hosts on which the device can be active can always fence all listed hosts */ if (string_in_list(dev->targets, host)) { can = TRUE; } else if (g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTMAP) && g_hash_table_lookup(dev->aliases, host)) { can = TRUE; } } else if (safe_str_eq(check_type, "dynamic-list")) { time_t now = time(NULL); if (dev->targets == NULL || dev->targets_age + 60 < now) { crm_trace("Running %s command to see if %s can fence %s (%s)", check_type, dev?dev->id:"N/A", search->host, search->action); schedule_internal_command(__FUNCTION__, dev, "list", NULL, search->per_device_timeout, search, dynamic_list_search_cb); /* we'll respond to this search request async in the cb */ return; } if (string_in_list(dev->targets, alias)) { can = TRUE; } } else if (safe_str_eq(check_type, "status")) { crm_trace("Running %s command to see if %s can fence %s (%s)", check_type, dev?dev->id:"N/A", search->host, search->action); schedule_internal_command(__FUNCTION__, dev, "status", search->host, search->per_device_timeout, search, status_search_cb); /* we'll respond to this search request async in the cb */ return; } else { crm_err("Unknown check type: %s", check_type); } if (safe_str_eq(host, alias)) { crm_notice("%s can%s fence (%s) %s: %s", dev->id, can ? "" : " not", search->action, host, check_type); } else { crm_notice("%s can%s fence (%s) %s (aka. '%s'): %s", dev->id, can ? "" : " not", search->action, host, alias, check_type); } search_report_results: search_devices_record_result(search, dev ? dev->id : NULL, can); } static void search_devices(gpointer key, gpointer value, gpointer user_data) { stonith_device_t *dev = value; struct device_search_s *search = user_data; can_fence_host_with_device(dev, search); } #define DEFAULT_QUERY_TIMEOUT 20 static void get_capable_devices(const char *host, const char *action, int timeout, bool suicide, void *user_data, void (*callback) (GList * devices, void *user_data)) { struct device_search_s *search; int per_device_timeout = DEFAULT_QUERY_TIMEOUT; int devices_needing_async_query = 0; char *key = NULL; const char *check_type = NULL; GHashTableIter gIter; stonith_device_t *device = NULL; if (!g_hash_table_size(device_list)) { callback(NULL, user_data); return; } search = calloc(1, sizeof(struct device_search_s)); if (!search) { callback(NULL, user_data); return; } g_hash_table_iter_init(&gIter, device_list); while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&device)) { check_type = target_list_type(device); if (safe_str_eq(check_type, "status") || safe_str_eq(check_type, "dynamic-list")) { devices_needing_async_query++; } } /* If we have devices that require an async event in order to know what * nodes they can fence, we have to give the events a timeout. The total * query timeout is divided among those events. */ if (devices_needing_async_query) { per_device_timeout = timeout / devices_needing_async_query; if (!per_device_timeout) { crm_err("stonith-timeout duration %d is too low, raise the duration to %d seconds", timeout, DEFAULT_QUERY_TIMEOUT * devices_needing_async_query); per_device_timeout = DEFAULT_QUERY_TIMEOUT; } else if (per_device_timeout < DEFAULT_QUERY_TIMEOUT) { crm_notice ("stonith-timeout duration %d is low for the current configuration. Consider raising it to %d seconds", timeout, DEFAULT_QUERY_TIMEOUT * devices_needing_async_query); } } search->host = host ? strdup(host) : NULL; search->action = action ? strdup(action) : NULL; search->per_device_timeout = per_device_timeout; /* We are guaranteed this many replies. Even if a device gets * unregistered some how during the async search, we will get * the correct number of replies. */ search->replies_needed = g_hash_table_size(device_list); search->allow_suicide = suicide; search->callback = callback; search->user_data = user_data; /* kick off the search */ crm_debug("Searching through %d devices to see what is capable of action (%s) for target %s", search->replies_needed, search->action ? search->action : "", search->host ? search->host : ""); g_hash_table_foreach(device_list, search_devices, search); } struct st_query_data { xmlNode *reply; char *remote_peer; char *client_id; char *target; char *action; int call_options; }; static void stonith_query_capable_device_cb(GList * devices, void *user_data) { struct st_query_data *query = user_data; int available_devices = 0; xmlNode *dev = NULL; xmlNode *list = NULL; GListPtr lpc = NULL; /* Pack the results into data */ list = create_xml_node(NULL, __FUNCTION__); crm_xml_add(list, F_STONITH_TARGET, query->target); for (lpc = devices; lpc != NULL; lpc = lpc->next) { stonith_device_t *device = g_hash_table_lookup(device_list, lpc->data); int action_specific_timeout; int delay_max; if (!device) { /* It is possible the device got unregistered while * determining who can fence the target */ continue; } available_devices++; action_specific_timeout = get_action_timeout(device, query->action, 0); dev = create_xml_node(list, F_STONITH_DEVICE); crm_xml_add(dev, XML_ATTR_ID, device->id); crm_xml_add(dev, "namespace", device->namespace); crm_xml_add(dev, "agent", device->agent); crm_xml_add_int(dev, F_STONITH_DEVICE_VERIFIED, device->verified); if (is_action_required(query->action, device)) { crm_xml_add_int(dev, F_STONITH_DEVICE_REQUIRED, 1); } if (action_specific_timeout) { crm_xml_add_int(dev, F_STONITH_ACTION_TIMEOUT, action_specific_timeout); } delay_max = get_action_delay_max(device, query->action); if (delay_max > 0) { crm_xml_add_int(dev, F_STONITH_DELAY_MAX, delay_max / 1000); } if (query->target == NULL) { xmlNode *attrs = create_xml_node(dev, XML_TAG_ATTRS); g_hash_table_foreach(device->params, hash2field, attrs); } } crm_xml_add_int(list, "st-available-devices", available_devices); if (query->target) { crm_debug("Found %d matching devices for '%s'", available_devices, query->target); } else { crm_debug("%d devices installed", available_devices); } if (list != NULL) { crm_trace("Attaching query list output"); add_message_xml(query->reply, F_STONITH_CALLDATA, list); } stonith_send_reply(query->reply, query->call_options, query->remote_peer, query->client_id); free_xml(query->reply); free(query->remote_peer); free(query->client_id); free(query->target); free(query->action); free(query); free_xml(list); g_list_free_full(devices, free); } static void stonith_query(xmlNode * msg, const char *remote_peer, const char *client_id, int call_options) { struct st_query_data *query = NULL; const char *action = NULL; const char *target = NULL; int timeout = 0; xmlNode *dev = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_DEBUG_3); crm_element_value_int(msg, F_STONITH_TIMEOUT, &timeout); if (dev) { const char *device = crm_element_value(dev, F_STONITH_DEVICE); target = crm_element_value(dev, F_STONITH_TARGET); action = crm_element_value(dev, F_STONITH_ACTION); if (device && safe_str_eq(device, "manual_ack")) { /* No query or reply necessary */ return; } } crm_log_xml_debug(msg, "Query"); query = calloc(1, sizeof(struct st_query_data)); query->reply = stonith_construct_reply(msg, NULL, NULL, pcmk_ok); query->remote_peer = remote_peer ? strdup(remote_peer) : NULL; query->client_id = client_id ? strdup(client_id) : NULL; query->target = target ? strdup(target) : NULL; query->action = action ? strdup(action) : NULL; query->call_options = call_options; get_capable_devices(target, action, timeout, is_set(call_options, st_opt_allow_suicide), query, stonith_query_capable_device_cb); } #define ST_LOG_OUTPUT_MAX 512 static void log_operation(async_command_t * cmd, int rc, int pid, const char *next, const char *output) { if (rc == 0) { next = NULL; } if (cmd->victim != NULL) { do_crm_log(rc == 0 ? LOG_NOTICE : LOG_ERR, "Operation '%s' [%d] (call %d from %s) for host '%s' with device '%s' returned: %d (%s)%s%s", cmd->action, pid, cmd->id, cmd->client_name, cmd->victim, cmd->device, rc, pcmk_strerror(rc), next ? ". Trying: " : "", next ? next : ""); } else { do_crm_log_unlikely(rc == 0 ? LOG_DEBUG : LOG_NOTICE, "Operation '%s' [%d] for device '%s' returned: %d (%s)%s%s", cmd->action, pid, cmd->device, rc, pcmk_strerror(rc), next ? ". Trying: " : "", next ? next : ""); } if (output) { /* Logging the whole string confuses syslog when the string is xml */ char *prefix = crm_strdup_printf("%s:%d", cmd->device, pid); crm_log_output(rc == 0 ? LOG_DEBUG : LOG_WARNING, prefix, output); free(prefix); } } static void stonith_send_async_reply(async_command_t * cmd, const char *output, int rc, GPid pid) { xmlNode *reply = NULL; gboolean bcast = FALSE; reply = stonith_construct_async_reply(cmd, output, NULL, rc); if (safe_str_eq(cmd->action, "metadata")) { /* Too verbose to log */ crm_trace("Metadata query for %s", cmd->device); output = NULL; } else if (crm_str_eq(cmd->action, "monitor", TRUE) || crm_str_eq(cmd->action, "list", TRUE) || crm_str_eq(cmd->action, "status", TRUE)) { crm_trace("Never broadcast %s replies", cmd->action); } else if (!stand_alone && safe_str_eq(cmd->origin, cmd->victim) && safe_str_neq(cmd->action, "on")) { crm_trace("Broadcast %s reply for %s", cmd->action, cmd->victim); crm_xml_add(reply, F_SUBTYPE, "broadcast"); bcast = TRUE; } log_operation(cmd, rc, pid, NULL, output); crm_log_xml_trace(reply, "Reply"); if (bcast) { crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY); send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE); } else if (cmd->origin) { crm_trace("Directed reply to %s", cmd->origin); send_cluster_message(crm_get_peer(0, cmd->origin), crm_msg_stonith_ng, reply, FALSE); } else { crm_trace("Directed local %ssync reply to %s", (cmd->options & st_opt_sync_call) ? "" : "a-", cmd->client_name); do_local_reply(reply, cmd->client, cmd->options & st_opt_sync_call, FALSE); } if (stand_alone) { /* Do notification with a clean data object */ xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE); crm_xml_add_int(notify_data, F_STONITH_RC, rc); crm_xml_add(notify_data, F_STONITH_TARGET, cmd->victim); crm_xml_add(notify_data, F_STONITH_OPERATION, cmd->op); crm_xml_add(notify_data, F_STONITH_DELEGATE, "localhost"); crm_xml_add(notify_data, F_STONITH_DEVICE, cmd->device); crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id); crm_xml_add(notify_data, F_STONITH_ORIGIN, cmd->client); do_stonith_notify(0, T_STONITH_NOTIFY_FENCE, rc, notify_data); } free_xml(reply); } void unfence_cb(GPid pid, int rc, const char *output, gpointer user_data) { async_command_t * cmd = user_data; stonith_device_t *dev = g_hash_table_lookup(device_list, cmd->device); log_operation(cmd, rc, pid, NULL, output); if(dev) { dev->active_pid = 0; mainloop_set_trigger(dev->work); } else { crm_trace("Device %s does not exist", cmd->device); } if(rc != 0) { crm_exit(DAEMON_RESPAWN_STOP); } } static void cancel_stonith_command(async_command_t * cmd) { stonith_device_t *device; CRM_CHECK(cmd != NULL, return); if (!cmd->device) { return; } device = g_hash_table_lookup(device_list, cmd->device); if (device) { crm_trace("Cancel scheduled %s on %s", cmd->action, device->id); device->pending_ops = g_list_remove(device->pending_ops, cmd); } } #define READ_MAX 500 static void st_child_done(GPid pid, int rc, const char *output, gpointer user_data) { stonith_device_t *device = NULL; stonith_device_t *next_device = NULL; async_command_t *cmd = user_data; GListPtr gIter = NULL; GListPtr gIterNext = NULL; CRM_CHECK(cmd != NULL, return); active_children--; /* The device is ready to do something else now */ device = g_hash_table_lookup(device_list, cmd->device); if (device) { device->active_pid = 0; if (rc == pcmk_ok && (safe_str_eq(cmd->action, "list") || safe_str_eq(cmd->action, "monitor") || safe_str_eq(cmd->action, "status"))) { device->verified = TRUE; } mainloop_set_trigger(device->work); } crm_debug("Operation '%s' on '%s' completed with rc=%d (%d remaining)", cmd->action, cmd->device, rc, g_list_length(cmd->device_next)); if (rc == 0) { GListPtr iter; /* see if there are any required devices left to execute for this op */ for (iter = cmd->device_next; iter != NULL; iter = iter->next) { next_device = g_hash_table_lookup(device_list, iter->data); if (next_device != NULL && is_action_required(cmd->action, next_device)) { cmd->device_next = iter->next; break; } next_device = NULL; } } else if (rc != 0 && cmd->device_next && (is_action_required(cmd->action, device) == FALSE)) { /* if this device didn't work out, see if there are any others we can try. * if the failed device was 'required', we can't pick another device. */ next_device = g_hash_table_lookup(device_list, cmd->device_next->data); cmd->device_next = cmd->device_next->next; } /* this operation requires more fencing, hooray! */ if (next_device) { log_operation(cmd, rc, pid, cmd->device, output); schedule_stonith_command(cmd, next_device); /* Prevent cmd from being freed */ cmd = NULL; goto done; } if (rc > 0) { /* Try to provide _something_ useful */ if(output == NULL) { rc = -ENODATA; } else if(strstr(output, "imed out")) { rc = -ETIMEDOUT; } else if(strstr(output, "Unrecognised action")) { rc = -EOPNOTSUPP; } else { rc = -pcmk_err_generic; } } stonith_send_async_reply(cmd, output, rc, pid); if (rc != 0) { goto done; } /* Check to see if any operations are scheduled to do the exact * same thing that just completed. If so, rather than * performing the same fencing operation twice, return the result * of this operation for all pending commands it matches. */ for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) { async_command_t *cmd_other = gIter->data; gIterNext = gIter->next; if (cmd == cmd_other) { continue; } /* A pending scheduled command matches the command that just finished if. * 1. The client connections are different. * 2. The node victim is the same. * 3. The fencing action is the same. * 4. The device scheduled to execute the action is the same. */ if (safe_str_eq(cmd->client, cmd_other->client) || safe_str_neq(cmd->victim, cmd_other->victim) || safe_str_neq(cmd->action, cmd_other->action) || safe_str_neq(cmd->device, cmd_other->device)) { continue; } crm_notice ("Merging stonith action %s for node %s originating from client %s with identical stonith request from client %s", cmd_other->action, cmd_other->victim, cmd_other->client_name, cmd->client_name); cmd_list = g_list_remove_link(cmd_list, gIter); stonith_send_async_reply(cmd_other, output, rc, pid); cancel_stonith_command(cmd_other); free_async_command(cmd_other); g_list_free_1(gIter); } done: free_async_command(cmd); } static gint sort_device_priority(gconstpointer a, gconstpointer b) { const stonith_device_t *dev_a = a; const stonith_device_t *dev_b = b; if (dev_a->priority > dev_b->priority) { return -1; } else if (dev_a->priority < dev_b->priority) { return 1; } return 0; } static void stonith_fence_get_devices_cb(GList * devices, void *user_data) { async_command_t *cmd = user_data; stonith_device_t *device = NULL; crm_info("Found %d matching devices for '%s'", g_list_length(devices), cmd->victim); if (g_list_length(devices) > 0) { /* Order based on priority */ devices = g_list_sort(devices, sort_device_priority); device = g_hash_table_lookup(device_list, devices->data); if (device) { cmd->device_list = devices; cmd->device_next = devices->next; devices = NULL; /* list owned by cmd now */ } } /* we have a device, schedule it for fencing. */ if (device) { schedule_stonith_command(cmd, device); /* in progress */ return; } /* no device found! */ stonith_send_async_reply(cmd, NULL, -ENODEV, 0); free_async_command(cmd); g_list_free_full(devices, free); } static int stonith_fence(xmlNode * msg) { const char *device_id = NULL; stonith_device_t *device = NULL; async_command_t *cmd = create_async_command(msg); xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR); if (cmd == NULL) { return -EPROTO; } device_id = crm_element_value(dev, F_STONITH_DEVICE); if (device_id) { device = g_hash_table_lookup(device_list, device_id); if (device == NULL) { crm_err("Requested device '%s' is not available", device_id); return -ENODEV; } schedule_stonith_command(cmd, device); } else { const char *host = crm_element_value(dev, F_STONITH_TARGET); if (cmd->options & st_opt_cs_nodeid) { int nodeid = crm_atoi(host, NULL); crm_node_t *node = crm_get_peer(nodeid, NULL); if (node) { host = node->uname; } } /* If we get to here, then self-fencing is implicitly allowed */ get_capable_devices(host, cmd->action, cmd->default_timeout, TRUE, cmd, stonith_fence_get_devices_cb); } return -EINPROGRESS; } xmlNode * stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data, int rc) { int lpc = 0; xmlNode *reply = NULL; const char *name = NULL; const char *value = NULL; const char *names[] = { F_STONITH_OPERATION, F_STONITH_CALLID, F_STONITH_CLIENTID, F_STONITH_CLIENTNAME, F_STONITH_REMOTE_OP_ID, F_STONITH_CALLOPTS }; crm_trace("Creating a basic reply"); reply = create_xml_node(NULL, T_STONITH_REPLY); crm_xml_add(reply, "st_origin", __FUNCTION__); crm_xml_add(reply, F_TYPE, T_STONITH_NG); crm_xml_add(reply, "st_output", output); crm_xml_add_int(reply, F_STONITH_RC, rc); CRM_CHECK(request != NULL, crm_warn("Can't create a sane reply"); return reply); for (lpc = 0; lpc < DIMOF(names); lpc++) { name = names[lpc]; value = crm_element_value(request, name); crm_xml_add(reply, name, value); } if (data != NULL) { crm_trace("Attaching reply output"); add_message_xml(reply, F_STONITH_CALLDATA, data); } return reply; } static xmlNode * stonith_construct_async_reply(async_command_t * cmd, const char *output, xmlNode * data, int rc) { xmlNode *reply = NULL; crm_trace("Creating a basic reply"); reply = create_xml_node(NULL, T_STONITH_REPLY); crm_xml_add(reply, "st_origin", __FUNCTION__); crm_xml_add(reply, F_TYPE, T_STONITH_NG); crm_xml_add(reply, F_STONITH_OPERATION, cmd->op); crm_xml_add(reply, F_STONITH_DEVICE, cmd->device); crm_xml_add(reply, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id); crm_xml_add(reply, F_STONITH_CLIENTID, cmd->client); crm_xml_add(reply, F_STONITH_CLIENTNAME, cmd->client_name); crm_xml_add(reply, F_STONITH_TARGET, cmd->victim); crm_xml_add(reply, F_STONITH_ACTION, cmd->op); crm_xml_add(reply, F_STONITH_ORIGIN, cmd->origin); crm_xml_add_int(reply, F_STONITH_CALLID, cmd->id); crm_xml_add_int(reply, F_STONITH_CALLOPTS, cmd->options); crm_xml_add_int(reply, F_STONITH_RC, rc); crm_xml_add(reply, "st_output", output); if (data != NULL) { crm_info("Attaching reply output"); add_message_xml(reply, F_STONITH_CALLDATA, data); } return reply; } bool fencing_peer_active(crm_node_t *peer) { if (peer == NULL) { return FALSE; } else if (peer->uname == NULL) { return FALSE; - } else if(peer->processes & (crm_proc_plugin | crm_proc_heartbeat | crm_proc_cpg)) { + } else if (is_set(peer->processes, crm_get_cluster_proc())) { return TRUE; } return FALSE; } /*! * \internal * \brief Determine if we need to use an alternate node to * fence the target. If so return that node's uname * * \retval NULL, no alternate host * \retval uname, uname of alternate host to use */ static const char * check_alternate_host(const char *target) { const char *alternate_host = NULL; if (find_topology_for_host(target) && safe_str_eq(target, stonith_our_uname)) { GHashTableIter gIter; crm_node_t *entry = NULL; g_hash_table_iter_init(&gIter, crm_peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { crm_trace("Checking for %s.%d != %s", entry->uname, entry->id, target); if (fencing_peer_active(entry) && safe_str_neq(entry->uname, target)) { alternate_host = entry->uname; break; } } if (alternate_host == NULL) { crm_err("No alternate host available to handle complex self fencing request"); g_hash_table_iter_init(&gIter, crm_peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { crm_notice("Peer[%d] %s", entry->id, entry->uname); } } } return alternate_host; } static void stonith_send_reply(xmlNode * reply, int call_options, const char *remote_peer, const char *client_id) { if (remote_peer) { send_cluster_message(crm_get_peer(0, remote_peer), crm_msg_stonith_ng, reply, FALSE); } else { do_local_reply(reply, client_id, is_set(call_options, st_opt_sync_call), remote_peer != NULL); } } static int handle_request(crm_client_t * client, uint32_t id, uint32_t flags, xmlNode * request, const char *remote_peer) { int call_options = 0; int rc = -EOPNOTSUPP; xmlNode *data = NULL; xmlNode *reply = NULL; char *output = NULL; const char *op = crm_element_value(request, F_STONITH_OPERATION); const char *client_id = crm_element_value(request, F_STONITH_CLIENTID); crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); if (is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(client == NULL || client->request_id == id); } if (crm_str_eq(op, CRM_OP_REGISTER, TRUE)) { xmlNode *reply = create_xml_node(NULL, "reply"); CRM_ASSERT(client); crm_xml_add(reply, F_STONITH_OPERATION, CRM_OP_REGISTER); crm_xml_add(reply, F_STONITH_CLIENTID, client->id); crm_ipcs_send(client, id, reply, flags); client->request_id = 0; free_xml(reply); return 0; } else if (crm_str_eq(op, STONITH_OP_EXEC, TRUE)) { rc = stonith_device_action(request, &output); } else if (crm_str_eq(op, STONITH_OP_TIMEOUT_UPDATE, TRUE)) { const char *call_id = crm_element_value(request, F_STONITH_CALLID); const char *client_id = crm_element_value(request, F_STONITH_CLIENTID); int op_timeout = 0; crm_element_value_int(request, F_STONITH_TIMEOUT, &op_timeout); do_stonith_async_timeout_update(client_id, call_id, op_timeout); return 0; } else if (crm_str_eq(op, STONITH_OP_QUERY, TRUE)) { if (remote_peer) { create_remote_stonith_op(client_id, request, TRUE); /* Record it for the future notification */ } stonith_query(request, remote_peer, client_id, call_options); return 0; } else if (crm_str_eq(op, T_STONITH_NOTIFY, TRUE)) { const char *flag_name = NULL; CRM_ASSERT(client); flag_name = crm_element_value(request, F_STONITH_NOTIFY_ACTIVATE); if (flag_name) { crm_debug("Setting %s callbacks for %s (%s): ON", flag_name, client->name, client->id); client->options |= get_stonith_flag(flag_name); } flag_name = crm_element_value(request, F_STONITH_NOTIFY_DEACTIVATE); if (flag_name) { crm_debug("Setting %s callbacks for %s (%s): off", flag_name, client->name, client->id); client->options |= get_stonith_flag(flag_name); } if (flags & crm_ipc_client_response) { crm_ipcs_send_ack(client, id, flags, "ack", __FUNCTION__, __LINE__); } return 0; } else if (crm_str_eq(op, STONITH_OP_RELAY, TRUE)) { xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE); crm_notice("Peer %s has received a forwarded fencing request from %s to fence (%s) peer %s", stonith_our_uname, client ? client->name : remote_peer, crm_element_value(dev, F_STONITH_ACTION), crm_element_value(dev, F_STONITH_TARGET)); if (initiate_remote_stonith_op(NULL, request, FALSE) != NULL) { rc = -EINPROGRESS; } } else if (crm_str_eq(op, STONITH_OP_FENCE, TRUE)) { if (remote_peer || stand_alone) { rc = stonith_fence(request); } else if (call_options & st_opt_manual_ack) { remote_fencing_op_t *rop = NULL; xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE); const char *target = crm_element_value(dev, F_STONITH_TARGET); crm_notice("Received manual confirmation that %s is fenced", target); rop = initiate_remote_stonith_op(client, request, TRUE); rc = stonith_manual_ack(request, rop); } else { const char *alternate_host = NULL; xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE); const char *target = crm_element_value(dev, F_STONITH_TARGET); const char *action = crm_element_value(dev, F_STONITH_ACTION); const char *device = crm_element_value(dev, F_STONITH_DEVICE); if (client) { int tolerance = 0; crm_notice("Client %s.%.8s wants to fence (%s) '%s' with device '%s'", client->name, client->id, action, target, device ? device : "(any)"); crm_element_value_int(dev, F_STONITH_TOLERANCE, &tolerance); if (stonith_check_fence_tolerance(tolerance, target, action)) { rc = 0; goto done; } } else { crm_notice("Peer %s wants to fence (%s) '%s' with device '%s'", remote_peer, action, target, device ? device : "(any)"); } alternate_host = check_alternate_host(target); if (alternate_host && client) { const char *client_id = NULL; crm_notice("Forwarding complex self fencing request to peer %s", alternate_host); if (client) { client_id = client->id; } else { client_id = crm_element_value(request, F_STONITH_CLIENTID); } /* Create a record of it, otherwise call_id will be 0 if we need to notify of failures */ create_remote_stonith_op(client_id, request, FALSE); crm_xml_add(request, F_STONITH_OPERATION, STONITH_OP_RELAY); crm_xml_add(request, F_STONITH_CLIENTID, client->id); send_cluster_message(crm_get_peer(0, alternate_host), crm_msg_stonith_ng, request, FALSE); rc = -EINPROGRESS; } else if (initiate_remote_stonith_op(client, request, FALSE) != NULL) { rc = -EINPROGRESS; } } } else if (crm_str_eq(op, STONITH_OP_FENCE_HISTORY, TRUE)) { rc = stonith_fence_history(request, &data); } else if (crm_str_eq(op, STONITH_OP_DEVICE_ADD, TRUE)) { const char *id = NULL; xmlNode *notify_data = create_xml_node(NULL, op); rc = stonith_device_register(request, &id, FALSE); crm_xml_add(notify_data, F_STONITH_DEVICE, id); crm_xml_add_int(notify_data, F_STONITH_ACTIVE, g_hash_table_size(device_list)); do_stonith_notify(call_options, op, rc, notify_data); free_xml(notify_data); } else if (crm_str_eq(op, STONITH_OP_DEVICE_DEL, TRUE)) { xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, request, LOG_ERR); const char *id = crm_element_value(dev, XML_ATTR_ID); xmlNode *notify_data = create_xml_node(NULL, op); rc = stonith_device_remove(id, FALSE); crm_xml_add(notify_data, F_STONITH_DEVICE, id); crm_xml_add_int(notify_data, F_STONITH_ACTIVE, g_hash_table_size(device_list)); do_stonith_notify(call_options, op, rc, notify_data); free_xml(notify_data); } else if (crm_str_eq(op, STONITH_OP_LEVEL_ADD, TRUE)) { char *id = NULL; xmlNode *notify_data = create_xml_node(NULL, op); rc = stonith_level_register(request, &id); crm_xml_add(notify_data, F_STONITH_DEVICE, id); crm_xml_add_int(notify_data, F_STONITH_ACTIVE, g_hash_table_size(topology)); do_stonith_notify(call_options, op, rc, notify_data); free_xml(notify_data); free(id); } else if (crm_str_eq(op, STONITH_OP_LEVEL_DEL, TRUE)) { char *id = NULL; xmlNode *notify_data = create_xml_node(NULL, op); rc = stonith_level_remove(request, &id); crm_xml_add(notify_data, F_STONITH_DEVICE, id); crm_xml_add_int(notify_data, F_STONITH_ACTIVE, g_hash_table_size(topology)); do_stonith_notify(call_options, op, rc, notify_data); free_xml(notify_data); } else if (crm_str_eq(op, STONITH_OP_CONFIRM, TRUE)) { async_command_t *cmd = create_async_command(request); xmlNode *reply = stonith_construct_async_reply(cmd, NULL, NULL, 0); crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY); crm_notice("Broadcasting manual fencing confirmation for node %s", cmd->victim); send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE); free_async_command(cmd); free_xml(reply); } else if(safe_str_eq(op, CRM_OP_RM_NODE_CACHE)) { int id = 0; const char *name = NULL; crm_element_value_int(request, XML_ATTR_ID, &id); name = crm_element_value(request, XML_ATTR_UNAME); reap_crm_member(id, name); return pcmk_ok; } else { crm_err("Unknown %s from %s", op, client ? client->name : remote_peer); crm_log_xml_warn(request, "UnknownOp"); } done: /* Always reply unles the request is in process still. * If in progress, a reply will happen async after the request * processing is finished */ if (rc != -EINPROGRESS) { crm_trace("Reply handling: %p %u %u %d %d %s", client, client?client->request_id:0, id, is_set(call_options, st_opt_sync_call), call_options, crm_element_value(request, F_STONITH_CALLOPTS)); if (is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(client == NULL || client->request_id == id); } reply = stonith_construct_reply(request, output, data, rc); stonith_send_reply(reply, call_options, remote_peer, client_id); } free(output); free_xml(data); free_xml(reply); return rc; } static void handle_reply(crm_client_t * client, xmlNode * request, const char *remote_peer) { const char *op = crm_element_value(request, F_STONITH_OPERATION); if (crm_str_eq(op, STONITH_OP_QUERY, TRUE)) { process_remote_stonith_query(request); } else if (crm_str_eq(op, T_STONITH_NOTIFY, TRUE)) { process_remote_stonith_exec(request); } else if (crm_str_eq(op, STONITH_OP_FENCE, TRUE)) { /* Reply to a complex fencing op */ process_remote_stonith_exec(request); } else { crm_err("Unknown %s reply from %s", op, client ? client->name : remote_peer); crm_log_xml_warn(request, "UnknownOp"); } } void stonith_command(crm_client_t * client, uint32_t id, uint32_t flags, xmlNode * request, const char *remote_peer) { int call_options = 0; int rc = 0; gboolean is_reply = FALSE; char *op = crm_element_value_copy(request, F_STONITH_OPERATION); /* F_STONITH_OPERATION can be overwritten in remote_op_done() with crm_xml_add() * * by 0x4C2E934: crm_xml_add (xml.c:377) * by 0x40C5E9: remote_op_done (remote.c:178) * by 0x40F1D3: process_remote_stonith_exec (remote.c:1084) * by 0x40AD4F: stonith_command (commands.c:1891) * */ if (get_xpath_object("//" T_STONITH_REPLY, request, LOG_DEBUG_3)) { is_reply = TRUE; } crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); crm_debug("Processing %s%s %u from %s (%16x)", op, is_reply ? " reply" : "", id, client ? client->name : remote_peer, call_options); if (is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(client == NULL || client->request_id == id); } if (is_reply) { handle_reply(client, request, remote_peer); } else { rc = handle_request(client, id, flags, request, remote_peer); } crm_debug("Processed %s%s from %s: %s (%d)", op, is_reply ? " reply" : "", client ? client->name : remote_peer, rc > 0 ? "" : pcmk_strerror(rc), rc); free(op); } diff --git a/include/crm/cluster/internal.h b/include/crm/cluster/internal.h index 0834be2e15..3a8b2242f7 100644 --- a/include/crm/cluster/internal.h +++ b/include/crm/cluster/internal.h @@ -1,449 +1,474 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef CRM_CLUSTER_INTERNAL__H # define CRM_CLUSTER_INTERNAL__H # include # define AIS_IPC_NAME "ais-crm-ipc" # define AIS_IPC_MESSAGE_SIZE 8192*128 # define CRM_MESSAGE_IPC_ACK 0 # ifndef INTERFACE_MAX # define INTERFACE_MAX 2 /* from the private coroapi.h header */ # endif typedef struct crm_ais_host_s AIS_Host; typedef struct crm_ais_msg_s AIS_Message; struct crm_ais_host_s { uint32_t id; uint32_t pid; gboolean local; enum crm_ais_msg_types type; uint32_t size; char uname[MAX_NAME]; } __attribute__ ((packed)); struct crm_ais_msg_s { cs_ipc_header_response_t header __attribute__ ((aligned(8))); uint32_t id; gboolean is_compressed; AIS_Host host; AIS_Host sender; uint32_t size; uint32_t compressed_size; /* 584 bytes */ char data[0]; } __attribute__ ((packed)); struct crm_ais_nodeid_resp_s { cs_ipc_header_response_t header __attribute__ ((aligned(8))); uint32_t id; uint32_t counter; char uname[MAX_NAME]; char cname[MAX_NAME]; } __attribute__ ((packed)); struct crm_ais_quorum_resp_s { cs_ipc_header_response_t header __attribute__ ((aligned(8))); uint64_t id; uint32_t votes; uint32_t expected_votes; uint32_t quorate; } __attribute__ ((packed)); /* *INDENT-OFF* */ enum crm_proc_flag { crm_proc_none = 0x00000001, /* These values are sent over the network by the legacy plugin * Therefor changing any of these values is going to break compatability * * So don't */ /* 3 messaging types */ crm_proc_heartbeat = 0x01000000, crm_proc_plugin = 0x00000002, crm_proc_cpg = 0x04000000, crm_proc_lrmd = 0x00000010, crm_proc_cib = 0x00000100, crm_proc_crmd = 0x00000200, crm_proc_attrd = 0x00001000, crm_proc_stonithd = 0x00002000, crm_proc_stonith_ng= 0x00100000, crm_proc_pe = 0x00010000, crm_proc_te = 0x00020000, crm_proc_mgmtd = 0x00040000, }; /* *INDENT-ON* */ +/*! + * \internal + * \brief Return the process bit corresponding to the current cluster stack + * + * \return Process flag if detectable, otherwise 0 + */ +static inline uint32_t +crm_get_cluster_proc() +{ + switch (get_cluster_type()) { + case pcmk_cluster_corosync: + case pcmk_cluster_cman: + return crm_proc_cpg; + + case pcmk_cluster_heartbeat: + return crm_proc_heartbeat; + + case pcmk_cluster_classic_ais: + return crm_proc_plugin; + + default: + break; + } + return crm_proc_none; +} + static inline const char * peer2text(enum crm_proc_flag proc) { const char *text = "unknown"; - if (proc == (crm_proc_cpg | crm_proc_crmd) - || proc == (crm_proc_heartbeat | crm_proc_crmd)) { + if (proc == (crm_proc_crmd | crm_get_cluster_proc())) { return "peer"; } switch (proc) { case crm_proc_none: text = "none"; break; case crm_proc_plugin: text = "ais"; break; case crm_proc_heartbeat: text = "heartbeat"; break; case crm_proc_cib: text = "cib"; break; case crm_proc_crmd: text = "crmd"; break; case crm_proc_pe: text = "pengine"; break; case crm_proc_te: text = "tengine"; break; case crm_proc_lrmd: text = "lrmd"; break; case crm_proc_attrd: text = "attrd"; break; case crm_proc_stonithd: text = "stonithd"; break; case crm_proc_stonith_ng: text = "stonith-ng"; break; case crm_proc_mgmtd: text = "mgmtd"; break; case crm_proc_cpg: text = "corosync-cpg"; break; } return text; } static inline enum crm_proc_flag text2proc(const char *proc) { /* We only care about these two so far */ if (proc && strcmp(proc, "cib") == 0) { return crm_proc_cib; } else if (proc && strcmp(proc, "crmd") == 0) { return crm_proc_crmd; } return crm_proc_none; } static inline const char * ais_dest(const struct crm_ais_host_s *host) { if (host->local) { return "local"; } else if (host->size > 0) { return host->uname; } else { return ""; } } # define ais_data_len(msg) (msg->is_compressed?msg->compressed_size:msg->size) static inline AIS_Message * ais_msg_copy(const AIS_Message * source) { AIS_Message *target = malloc(sizeof(AIS_Message) + ais_data_len(source)); if(target) { memcpy(target, source, sizeof(AIS_Message)); memcpy(target->data, source->data, ais_data_len(target)); } return target; } /* typedef enum { CS_OK = 1, CS_ERR_LIBRARY = 2, CS_ERR_VERSION = 3, CS_ERR_INIT = 4, CS_ERR_TIMEOUT = 5, CS_ERR_TRY_AGAIN = 6, CS_ERR_INVALID_PARAM = 7, CS_ERR_NO_MEMORY = 8, CS_ERR_BAD_HANDLE = 9, CS_ERR_BUSY = 10, CS_ERR_ACCESS = 11, CS_ERR_NOT_EXIST = 12, CS_ERR_NAME_TOO_LONG = 13, CS_ERR_EXIST = 14, CS_ERR_NO_SPACE = 15, CS_ERR_INTERRUPT = 16, CS_ERR_NAME_NOT_FOUND = 17, CS_ERR_NO_RESOURCES = 18, CS_ERR_NOT_SUPPORTED = 19, CS_ERR_BAD_OPERATION = 20, CS_ERR_FAILED_OPERATION = 21, CS_ERR_MESSAGE_ERROR = 22, CS_ERR_QUEUE_FULL = 23, CS_ERR_QUEUE_NOT_AVAILABLE = 24, CS_ERR_BAD_FLAGS = 25, CS_ERR_TOO_BIG = 26, CS_ERR_NO_SECTIONS = 27, CS_ERR_CONTEXT_NOT_FOUND = 28, CS_ERR_TOO_MANY_GROUPS = 30, CS_ERR_SECURITY = 100 } cs_error_t; */ static inline const char * ais_error2text(int error) { const char *text = "unknown"; # if SUPPORT_COROSYNC switch (error) { case CS_OK: text = "OK"; break; case CS_ERR_LIBRARY: text = "Library error"; break; case CS_ERR_VERSION: text = "Version error"; break; case CS_ERR_INIT: text = "Initialization error"; break; case CS_ERR_TIMEOUT: text = "Timeout"; break; case CS_ERR_TRY_AGAIN: text = "Try again"; break; case CS_ERR_INVALID_PARAM: text = "Invalid parameter"; break; case CS_ERR_NO_MEMORY: text = "No memory"; break; case CS_ERR_BAD_HANDLE: text = "Bad handle"; break; case CS_ERR_BUSY: text = "Busy"; break; case CS_ERR_ACCESS: text = "Access error"; break; case CS_ERR_NOT_EXIST: text = "Doesn't exist"; break; case CS_ERR_NAME_TOO_LONG: text = "Name too long"; break; case CS_ERR_EXIST: text = "Exists"; break; case CS_ERR_NO_SPACE: text = "No space"; break; case CS_ERR_INTERRUPT: text = "Interrupt"; break; case CS_ERR_NAME_NOT_FOUND: text = "Name not found"; break; case CS_ERR_NO_RESOURCES: text = "No resources"; break; case CS_ERR_NOT_SUPPORTED: text = "Not supported"; break; case CS_ERR_BAD_OPERATION: text = "Bad operation"; break; case CS_ERR_FAILED_OPERATION: text = "Failed operation"; break; case CS_ERR_MESSAGE_ERROR: text = "Message error"; break; case CS_ERR_QUEUE_FULL: text = "Queue full"; break; case CS_ERR_QUEUE_NOT_AVAILABLE: text = "Queue not available"; break; case CS_ERR_BAD_FLAGS: text = "Bad flags"; break; case CS_ERR_TOO_BIG: text = "To big"; break; case CS_ERR_NO_SECTIONS: text = "No sections"; break; } # endif return text; } static inline const char * msg_type2text(enum crm_ais_msg_types type) { const char *text = "unknown"; switch (type) { case crm_msg_none: text = "unknown"; break; case crm_msg_ais: text = "ais"; break; case crm_msg_cib: text = "cib"; break; case crm_msg_crmd: text = "crmd"; break; case crm_msg_pe: text = "pengine"; break; case crm_msg_te: text = "tengine"; break; case crm_msg_lrmd: text = "lrmd"; break; case crm_msg_attrd: text = "attrd"; break; case crm_msg_stonithd: text = "stonithd"; break; case crm_msg_stonith_ng: text = "stonith-ng"; break; } return text; } enum crm_ais_msg_types text2msg_type(const char *text); char *get_ais_data(const AIS_Message * msg); gboolean check_message_sanity(const AIS_Message * msg, const char *data); # if SUPPORT_HEARTBEAT extern ll_cluster_t *heartbeat_cluster; gboolean send_ha_message(ll_cluster_t * hb_conn, xmlNode * msg, const char *node, gboolean force_ordered); gboolean ha_msg_dispatch(ll_cluster_t * cluster_conn, gpointer user_data); gboolean register_heartbeat_conn(crm_cluster_t * cluster); xmlNode *convert_ha_message(xmlNode * parent, HA_Message * msg, const char *field); gboolean ccm_have_quorum(oc_ed_t event); const char *ccm_event_name(oc_ed_t event); crm_node_t *crm_update_ccm_node(const oc_ev_membership_t * oc, int offset, const char *state, uint64_t seq); gboolean heartbeat_initialize_nodelist(void *cluster, gboolean force_member, xmlNode * xml_parent); # endif # if SUPPORT_COROSYNC gboolean send_cpg_iov(struct iovec * iov); # if SUPPORT_PLUGIN char *classic_node_name(uint32_t nodeid); void plugin_handle_membership(AIS_Message *msg); bool send_plugin_text(int class, struct iovec *iov); # else char *corosync_node_name(uint64_t /*cmap_handle_t */ cmap_handle, uint32_t nodeid); char *corosync_cluster_name(void); int corosync_cmap_has_config(const char *prefix); # endif gboolean corosync_initialize_nodelist(void *cluster, gboolean force_member, xmlNode * xml_parent); gboolean send_cluster_message_cs(xmlNode * msg, gboolean local, crm_node_t * node, enum crm_ais_msg_types dest); enum cluster_type_e find_corosync_variant(void); void terminate_cs_connection(crm_cluster_t * cluster); gboolean init_cs_connection(crm_cluster_t * cluster); gboolean init_cs_connection_once(crm_cluster_t * cluster); # endif # ifdef SUPPORT_CMAN char *cman_node_name(uint32_t nodeid); # endif enum crm_quorum_source { crm_quorum_cman, crm_quorum_corosync, crm_quorum_pacemaker, }; int get_corosync_id(int id, const char *uuid); char *get_corosync_uuid(crm_node_t *peer); enum crm_quorum_source get_quorum_source(void); void crm_update_peer_proc(const char *source, crm_node_t * peer, uint32_t flag, const char *status); crm_node_t *crm_update_peer(const char *source, unsigned int id, uint64_t born, uint64_t seen, int32_t votes, uint32_t children, const char *uuid, const char *uname, const char *addr, const char *state); void crm_update_peer_expected(const char *source, crm_node_t * node, const char *expected); void crm_update_peer_state(const char *source, crm_node_t * node, const char *state, int membership); gboolean init_cman_connection(gboolean(*dispatch) (unsigned long long, gboolean), void (*destroy) (gpointer)); gboolean cluster_connect_quorum(gboolean(*dispatch) (unsigned long long, gboolean), void (*destroy) (gpointer)); void set_node_uuid(const char *uname, const char *uuid); gboolean node_name_is_valid(const char *key, const char *name); crm_node_t * crm_find_peer_full(unsigned int id, const char *uname, int flags); crm_node_t * crm_find_peer(unsigned int id, const char *uname); #endif diff --git a/mcp/pacemaker.c b/mcp/pacemaker.c index 9921e132f4..f885e9ae58 100644 --- a/mcp/pacemaker.c +++ b/mcp/pacemaker.c @@ -1,1143 +1,1139 @@ /* * Copyright (C) 2010 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include gboolean fatal_error = FALSE; GMainLoop *mainloop = NULL; #define PCMK_PROCESS_CHECK_INTERVAL 5 const char *local_name = NULL; uint32_t local_nodeid = 0; crm_trigger_t *shutdown_trigger = NULL; const char *pid_file = "/var/run/pacemaker.pid"; typedef struct pcmk_child_s { int pid; long flag; int start_seq; int respawn_count; gboolean respawn; const char *name; const char *uid; const char *command; gboolean active_before_startup; } pcmk_child_t; /* Index into the array below */ #define pcmk_child_crmd 4 #define pcmk_child_mgmtd 8 /* *INDENT-OFF* */ static pcmk_child_t pcmk_children[] = { { 0, crm_proc_none, 0, 0, FALSE, "none", NULL, NULL }, { 0, crm_proc_plugin, 0, 0, FALSE, "ais", NULL, NULL }, { 0, crm_proc_lrmd, 3, 0, TRUE, "lrmd", NULL, CRM_DAEMON_DIR"/lrmd" }, { 0, crm_proc_cib, 1, 0, TRUE, "cib", CRM_DAEMON_USER, CRM_DAEMON_DIR"/cib" }, { 0, crm_proc_crmd, 6, 0, TRUE, "crmd", CRM_DAEMON_USER, CRM_DAEMON_DIR"/crmd" }, { 0, crm_proc_attrd, 4, 0, TRUE, "attrd", CRM_DAEMON_USER, CRM_DAEMON_DIR"/attrd" }, { 0, crm_proc_stonithd, 0, 0, TRUE, "stonithd", NULL, NULL }, { 0, crm_proc_pe, 5, 0, TRUE, "pengine", CRM_DAEMON_USER, CRM_DAEMON_DIR"/pengine" }, { 0, crm_proc_mgmtd, 0, 0, TRUE, "mgmtd", NULL, HB_DAEMON_DIR"/mgmtd" }, { 0, crm_proc_stonith_ng, 2, 0, TRUE, "stonith-ng", NULL, CRM_DAEMON_DIR"/stonithd" }, }; /* *INDENT-ON* */ static gboolean start_child(pcmk_child_t * child); static gboolean check_active_before_startup_processes(gpointer user_data); void update_process_clients(crm_client_t *client); void update_process_peers(void); void enable_crmd_as_root(gboolean enable) { if (enable) { pcmk_children[pcmk_child_crmd].uid = NULL; } else { pcmk_children[pcmk_child_crmd].uid = CRM_DAEMON_USER; } } void enable_mgmtd(gboolean enable) { if (enable) { pcmk_children[pcmk_child_mgmtd].start_seq = 7; } else { pcmk_children[pcmk_child_mgmtd].start_seq = 0; } } static uint32_t get_process_list(void) { int lpc = 0; - uint32_t procs = 0; - - if(is_classic_ais_cluster()) { - procs |= crm_proc_plugin; - } + uint32_t procs = crm_get_cluster_proc(); for (lpc = 0; lpc < SIZEOF(pcmk_children); lpc++) { if (pcmk_children[lpc].pid != 0) { procs |= pcmk_children[lpc].flag; } } return procs; } static void pcmk_process_exit(pcmk_child_t * child) { child->pid = 0; child->active_before_startup = FALSE; /* Broadcast the fact that one of our processes died ASAP * * Try to get some logging of the cause out first though * because we're probably about to get fenced * * Potentially do this only if respawn_count > N * to allow for local recovery */ update_node_processes(local_nodeid, NULL, get_process_list()); child->respawn_count += 1; if (child->respawn_count > MAX_RESPAWN) { crm_err("Child respawn count exceeded by %s", child->name); child->respawn = FALSE; } if (shutdown_trigger) { mainloop_set_trigger(shutdown_trigger); update_node_processes(local_nodeid, NULL, get_process_list()); } else if (child->respawn && crm_is_true(getenv("PCMK_fail_fast"))) { crm_err("Rebooting system because of %s", child->name); pcmk_panic(__FUNCTION__); } else if (child->respawn) { crm_notice("Respawning failed child process: %s", child->name); start_child(child); } } static void pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) { pcmk_child_t *child = mainloop_child_userdata(p); const char *name = mainloop_child_name(p); if (signo && signo == SIGKILL) { crm_warn("The %s process (%d) terminated with signal %d (core=%d)", name, pid, signo, core); } else if (signo) { crm_err("The %s process (%d) terminated with signal %d (core=%d)", name, pid, signo, core); } else { switch(exitcode) { case pcmk_ok: crm_info("The %s process (%d) exited: %s (%d)", name, pid, pcmk_strerror(exitcode), exitcode); break; case DAEMON_RESPAWN_STOP: crm_warn("The %s process (%d) can no longer be respawned, shutting the cluster down.", name, pid); child->respawn = FALSE; fatal_error = TRUE; pcmk_shutdown(SIGTERM); break; case pcmk_err_panic: do_crm_log_always(LOG_EMERG, "The %s process (%d) instructed the machine to reset", name, pid); child->respawn = FALSE; fatal_error = TRUE; pcmk_panic(__FUNCTION__); pcmk_shutdown(SIGTERM); break; default: crm_err("The %s process (%d) exited: %s (%d)", name, pid, pcmk_strerror(exitcode), exitcode); break; } } pcmk_process_exit(child); } static gboolean stop_child(pcmk_child_t * child, int signal) { if (signal == 0) { signal = SIGTERM; } if (child->command == NULL) { crm_debug("Nothing to do for child \"%s\"", child->name); return TRUE; } if (child->pid <= 0) { crm_trace("Client %s not running", child->name); return TRUE; } errno = 0; if (kill(child->pid, signal) == 0) { crm_notice("Stopping %s: Sent -%d to process %d", child->name, signal, child->pid); } else { crm_perror(LOG_ERR, "Stopping %s: Could not send -%d to process %d failed", child->name, signal, child->pid); } return TRUE; } static char *opts_default[] = { NULL, NULL }; static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL }; static gboolean start_child(pcmk_child_t * child) { int lpc = 0; uid_t uid = 0; gid_t gid = 0; struct rlimit oflimits; gboolean use_valgrind = FALSE; gboolean use_callgrind = FALSE; const char *devnull = "/dev/null"; const char *env_valgrind = getenv("PCMK_valgrind_enabled"); const char *env_callgrind = getenv("PCMK_callgrind_enabled"); enum cluster_type_e stack = get_cluster_type(); child->active_before_startup = FALSE; if (child->command == NULL) { crm_info("Nothing to do for child \"%s\"", child->name); return TRUE; } if (env_callgrind != NULL && crm_is_true(env_callgrind)) { use_callgrind = TRUE; use_valgrind = TRUE; } else if (env_callgrind != NULL && strstr(env_callgrind, child->name)) { use_callgrind = TRUE; use_valgrind = TRUE; } else if (env_valgrind != NULL && crm_is_true(env_valgrind)) { use_valgrind = TRUE; } else if (env_valgrind != NULL && strstr(env_valgrind, child->name)) { use_valgrind = TRUE; } if (use_valgrind && strlen(VALGRIND_BIN) == 0) { crm_warn("Cannot enable valgrind for %s:" " The location of the valgrind binary is unknown", child->name); use_valgrind = FALSE; } if (child->uid) { if (crm_user_lookup(child->uid, &uid, &gid) < 0) { crm_err("Invalid user (%s) for %s: not found", child->uid, child->name); return FALSE; } crm_info("Using uid=%u and group=%u for process %s", uid, gid, child->name); } child->pid = fork(); CRM_ASSERT(child->pid != -1); if (child->pid > 0) { /* parent */ mainloop_child_add(child->pid, 0, child->name, child, pcmk_child_exit); crm_info("Forked child %d for process %s%s", child->pid, child->name, use_valgrind ? " (valgrind enabled: " VALGRIND_BIN ")" : ""); update_node_processes(local_nodeid, NULL, get_process_list()); return TRUE; } else { /* Start a new session */ (void)setsid(); /* Setup the two alternate arg arrarys */ opts_vgrind[0] = strdup(VALGRIND_BIN); if (use_callgrind) { opts_vgrind[1] = strdup("--tool=callgrind"); opts_vgrind[2] = strdup("--callgrind-out-file=" CRM_STATE_DIR "/callgrind.out.%p"); opts_vgrind[3] = strdup(child->command); opts_vgrind[4] = NULL; } else { opts_vgrind[1] = strdup(child->command); opts_vgrind[2] = NULL; opts_vgrind[3] = NULL; opts_vgrind[4] = NULL; } opts_default[0] = strdup(child->command);; if(gid) { if(stack == pcmk_cluster_corosync) { /* Drop root privileges completely * * We can do this because we set uidgid.gid.${gid}=1 * via CMAP which allows these processes to connect to * corosync */ if (setgid(gid) < 0) { crm_perror(LOG_ERR, "Could not set group to %d", gid); } /* Keep the root group (so we can access corosync), but add the haclient group (so we can access ipc) */ } else if (initgroups(child->uid, gid) < 0) { crm_err("Cannot initialize groups for %s: %s (%d)", child->uid, pcmk_strerror(errno), errno); } } if (uid && setuid(uid) < 0) { crm_perror(LOG_ERR, "Could not set user to %d (%s)", uid, child->uid); } /* Close all open file descriptors */ getrlimit(RLIMIT_NOFILE, &oflimits); for (lpc = 0; lpc < oflimits.rlim_cur; lpc++) { close(lpc); } (void)open(devnull, O_RDONLY); /* Stdin: fd 0 */ (void)open(devnull, O_WRONLY); /* Stdout: fd 1 */ (void)open(devnull, O_WRONLY); /* Stderr: fd 2 */ if (use_valgrind) { (void)execvp(VALGRIND_BIN, opts_vgrind); } else { (void)execvp(child->command, opts_default); } crm_perror(LOG_ERR, "FATAL: Cannot exec %s", child->command); crm_exit(DAEMON_RESPAWN_STOP); } return TRUE; /* never reached */ } static gboolean escalate_shutdown(gpointer data) { pcmk_child_t *child = data; if (child->pid) { /* Use SIGSEGV instead of SIGKILL to create a core so we can see what it was up to */ crm_err("Child %s not terminating in a timely manner, forcing", child->name); stop_child(child, SIGSEGV); } return FALSE; } static gboolean pcmk_shutdown_worker(gpointer user_data) { static int phase = 0; static time_t next_log = 0; static int max = SIZEOF(pcmk_children); int lpc = 0; if (phase == 0) { crm_notice("Shuting down Pacemaker"); phase = max; /* Add a second, more frequent, check to speed up shutdown */ g_timeout_add_seconds(5, check_active_before_startup_processes, NULL); } for (; phase > 0; phase--) { /* dont stop anything with start_seq < 1 */ for (lpc = max - 1; lpc >= 0; lpc--) { pcmk_child_t *child = &(pcmk_children[lpc]); if (phase != child->start_seq) { continue; } if (child->pid) { time_t now = time(NULL); if (child->respawn) { next_log = now + 30; child->respawn = FALSE; stop_child(child, SIGTERM); if (phase < pcmk_children[pcmk_child_crmd].start_seq) { g_timeout_add(180000 /* 3m */ , escalate_shutdown, child); } } else if (now >= next_log) { next_log = now + 30; crm_notice("Still waiting for %s (pid=%d, seq=%d) to terminate...", child->name, child->pid, child->start_seq); } return TRUE; } /* cleanup */ crm_debug("%s confirmed stopped", child->name); child->pid = 0; } } /* send_cluster_id(); */ crm_notice("Shutdown complete"); { const char *delay = daemon_option("shutdown_delay"); if(delay) { sync(); sleep(crm_get_msec(delay) / 1000); } } g_main_loop_quit(mainloop); if (fatal_error) { crm_notice("Attempting to inhibit respawning after fatal error"); crm_exit(DAEMON_RESPAWN_STOP); } return TRUE; } static void pcmk_ignore(int nsig) { crm_info("Ignoring signal %s (%d)", strsignal(nsig), nsig); } static void pcmk_sigquit(int nsig) { pcmk_panic(__FUNCTION__); } void pcmk_shutdown(int nsig) { if (shutdown_trigger == NULL) { shutdown_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, pcmk_shutdown_worker, NULL); } mainloop_set_trigger(shutdown_trigger); } static int32_t pcmk_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { crm_trace("Connection %p", c); if (crm_client_new(c, uid, gid) == NULL) { return -EIO; } return 0; } static void pcmk_ipc_created(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); } /* Exit code means? */ static int32_t pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; const char *task = NULL; crm_client_t *c = crm_client_get(qbc); xmlNode *msg = crm_ipcs_recv(c, data, size, &id, &flags); crm_ipcs_send_ack(c, id, flags, "ack", __FUNCTION__, __LINE__); if (msg == NULL) { return 0; } task = crm_element_value(msg, F_CRM_TASK); if (crm_str_eq(task, CRM_OP_QUIT, TRUE)) { /* Time to quit */ crm_notice("Shutting down in responce to ticket %s (%s)", crm_element_value(msg, F_CRM_REFERENCE), crm_element_value(msg, F_CRM_ORIGIN)); pcmk_shutdown(15); } else if (crm_str_eq(task, CRM_OP_RM_NODE_CACHE, TRUE)) { /* Send to everyone */ struct iovec *iov; int id = 0; const char *name = NULL; crm_element_value_int(msg, XML_ATTR_ID, &id); name = crm_element_value(msg, XML_ATTR_UNAME); crm_notice("Instructing peers to remove references to node %s/%u", name, id); iov = calloc(1, sizeof(struct iovec)); iov->iov_base = dump_xml_unformatted(msg); iov->iov_len = 1 + strlen(iov->iov_base); send_cpg_iov(iov); } else { update_process_clients(c); } free_xml(msg); return 0; } /* Error code means? */ static int32_t pcmk_ipc_closed(qb_ipcs_connection_t * c) { crm_client_t *client = crm_client_get(c); if (client == NULL) { return 0; } crm_trace("Connection %p", c); crm_client_destroy(client); return 0; } static void pcmk_ipc_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); pcmk_ipc_closed(c); } struct qb_ipcs_service_handlers mcp_ipc_callbacks = { .connection_accept = pcmk_ipc_accept, .connection_created = pcmk_ipc_created, .msg_process = pcmk_ipc_dispatch, .connection_closed = pcmk_ipc_closed, .connection_destroyed = pcmk_ipc_destroy }; /*! * \internal * \brief Send an XML message with process list of all known peers to client(s) * * \param[in] client Send message to this client, or all clients if NULL */ void update_process_clients(crm_client_t *client) { GHashTableIter iter; crm_node_t *node = NULL; xmlNode *update = create_xml_node(NULL, "nodes"); g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) { xmlNode *xml = create_xml_node(update, "node"); crm_xml_add_int(xml, "id", node->id); crm_xml_add(xml, "uname", node->uname); crm_xml_add(xml, "state", node->state); crm_xml_add_int(xml, "processes", node->processes); } if(client) { crm_trace("Sending process list to client %s", client->id); crm_ipcs_send(client, 0, update, crm_ipc_server_event); } else { crm_trace("Sending process list to %d clients", crm_hash_table_size(client_connections)); g_hash_table_iter_init(&iter, client_connections); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & client)) { crm_ipcs_send(client, 0, update, crm_ipc_server_event); } } free_xml(update); } /*! * \internal * \brief Send a CPG message with local node's process list to all peers */ void update_process_peers(void) { /* Do nothing for corosync-2 based clusters */ char buffer[1024]; struct iovec *iov; int rc = 0; memset(buffer, 0, SIZEOF(buffer)); if (local_name) { rc = snprintf(buffer, SIZEOF(buffer) - 1, "", local_name, get_process_list()); } else { rc = snprintf(buffer, SIZEOF(buffer) - 1, "", get_process_list()); } crm_trace("Sending %s", buffer); iov = calloc(1, sizeof(struct iovec)); iov->iov_base = strdup(buffer); iov->iov_len = rc + 1; send_cpg_iov(iov); } /*! * \internal * \brief Update a node's process list, notifying clients and peers if needed * * \param[in] id Node ID of affected node * \param[in] uname Uname of affected node * \param[in] procs Affected node's process list mask * * \return TRUE if the process list changed, FALSE otherwise */ gboolean update_node_processes(uint32_t id, const char *uname, uint32_t procs) { gboolean changed = FALSE; crm_node_t *node = crm_get_peer(id, uname); if (procs != 0) { if (procs != node->processes) { crm_debug("Node %s now has process list: %.32x (was %.32x)", node->uname, procs, node->processes); node->processes = procs; changed = TRUE; /* If local node's processes have changed, notify clients/peers */ if (id == local_nodeid) { update_process_clients(NULL); update_process_peers(); } } else { crm_trace("Node %s still has process list: %.32x", node->uname, procs); } } return changed; } /* *INDENT-OFF* */ static struct crm_option long_options[] = { /* Top-level Options */ {"help", 0, 0, '?', "\tThis text"}, {"version", 0, 0, '$', "\tVersion information" }, {"verbose", 0, 0, 'V', "\tIncrease debug output"}, {"shutdown", 0, 0, 'S', "\tInstruct Pacemaker to shutdown on this machine"}, {"features", 0, 0, 'F', "\tDisplay the full version and list of features Pacemaker was built with"}, {"-spacer-", 1, 0, '-', "\nAdditional Options:"}, {"foreground", 0, 0, 'f', "\t(Ignored) Pacemaker always runs in the foreground"}, {"pid-file", 1, 0, 'p', "\t(Ignored) Daemon pid file location"}, {NULL, 0, 0, 0} }; /* *INDENT-ON* */ static void mcp_chown(const char *path, uid_t uid, gid_t gid) { int rc = chown(path, uid, gid); if (rc < 0) { crm_warn("Cannot change the ownership of %s to user %s and gid %d: %s", path, CRM_DAEMON_USER, gid, pcmk_strerror(errno)); } } static gboolean check_active_before_startup_processes(gpointer user_data) { int start_seq = 1, lpc = 0; static int max = SIZEOF(pcmk_children); gboolean keep_tracking = FALSE; for (start_seq = 1; start_seq < max; start_seq++) { for (lpc = 0; lpc < max; lpc++) { if (pcmk_children[lpc].active_before_startup == FALSE) { /* we are already tracking it as a child process. */ continue; } else if (start_seq != pcmk_children[lpc].start_seq) { continue; } else if (crm_pid_active(pcmk_children[lpc].pid) != 1) { crm_notice("Process %s terminated (pid=%d)", pcmk_children[lpc].name, pcmk_children[lpc].pid); pcmk_process_exit(&(pcmk_children[lpc])); continue; } /* at least one of the processes found at startup * is still going, so keep this recurring timer around */ keep_tracking = TRUE; } } return keep_tracking; } static bool find_and_track_existing_processes(void) { DIR *dp; struct dirent *entry; struct stat statbuf; int start_tracker = 0; dp = opendir("/proc"); if (!dp) { /* no proc directory to search through */ crm_notice("Can not read /proc directory to track existing components"); return FALSE; } while ((entry = readdir(dp)) != NULL) { char procpath[128]; char value[64]; char key[16]; FILE *file; int pid; int max = SIZEOF(pcmk_children); int i; strcpy(procpath, "/proc/"); /* strlen("/proc/") + strlen("/status") + 1 = 14 * 128 - 14 = 114 */ strncat(procpath, entry->d_name, 114); if (lstat(procpath, &statbuf)) { continue; } if (!S_ISDIR(statbuf.st_mode) || !isdigit(entry->d_name[0])) { continue; } strcat(procpath, "/status"); file = fopen(procpath, "r"); if (!file) { continue; } if (fscanf(file, "%15s%63s", key, value) != 2) { fclose(file); continue; } fclose(file); pid = atoi(entry->d_name); if (pid <= 0) { continue; } for (i = 0; i < max; i++) { const char *name = pcmk_children[i].name; if (pcmk_children[i].start_seq == 0) { continue; } if (pcmk_children[i].flag == crm_proc_stonith_ng) { name = "stonithd"; } if (safe_str_eq(name, value)) { if (crm_pid_active(pid) != 1) { continue; } crm_notice("Tracking existing %s process (pid=%d)", value, pid); pcmk_children[i].pid = pid; pcmk_children[i].active_before_startup = TRUE; start_tracker = 1; } } } if (start_tracker) { g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL, check_active_before_startup_processes, NULL); } closedir(dp); return start_tracker; } static void init_children_processes(void) { int start_seq = 1, lpc = 0; static int max = SIZEOF(pcmk_children); /* start any children that have not been detected */ for (start_seq = 1; start_seq < max; start_seq++) { /* dont start anything with start_seq < 1 */ for (lpc = 0; lpc < max; lpc++) { if (pcmk_children[lpc].pid) { /* we are already tracking it */ continue; } if (start_seq == pcmk_children[lpc].start_seq) { start_child(&(pcmk_children[lpc])); } } } /* From this point on, any daemons being started will be due to * respawning rather than node start. * * This may be useful for the daemons to know */ setenv("PCMK_respawned", "true", 1); } static void mcp_cpg_destroy(gpointer user_data) { crm_err("Connection destroyed"); crm_exit(ENOTCONN); } /*! * \internal * \brief Process a CPG message (process list or manual peer cache removal) * * \param[in] handle CPG connection (ignored) * \param[in] groupName CPG group name (ignored) * \param[in] nodeid ID of affected node * \param[in] pid Process ID (ignored) * \param[in] msg CPG XML message * \param[in] msg_len Length of msg in bytes (ignored) */ static void mcp_cpg_deliver(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { xmlNode *xml = string2xml(msg); const char *task = crm_element_value(xml, F_CRM_TASK); crm_trace("Received CPG message (%s): %.200s", (task? task : "process list"), msg); if (task == NULL) { if (nodeid == local_nodeid) { crm_info("Ignoring process list sent by peer for local node"); } else { uint32_t procs = 0; const char *uname = crm_element_value(xml, "uname"); crm_element_value_int(xml, "proclist", (int *)&procs); if (update_node_processes(nodeid, uname, procs)) { update_process_clients(NULL); } } } else if (crm_str_eq(task, CRM_OP_RM_NODE_CACHE, TRUE)) { int id = 0; const char *name = NULL; crm_element_value_int(xml, XML_ATTR_ID, &id); name = crm_element_value(xml, XML_ATTR_UNAME); reap_crm_member(id, name); } if (xml != NULL) { free_xml(xml); } } static void mcp_cpg_membership(cpg_handle_t handle, const struct cpg_name *groupName, const struct cpg_address *member_list, size_t member_list_entries, const struct cpg_address *left_list, size_t left_list_entries, const struct cpg_address *joined_list, size_t joined_list_entries) { /* Don't care about CPG membership, but we do want to broadcast our own presence */ update_process_peers(); } static gboolean mcp_quorum_callback(unsigned long long seq, gboolean quorate) { /* Nothing to do */ return TRUE; } static void mcp_quorum_destroy(gpointer user_data) { crm_info("connection closed"); } int main(int argc, char **argv) { int rc; int flag; int argerr = 0; int option_index = 0; gboolean shutdown = FALSE; uid_t pcmk_uid = 0; gid_t pcmk_gid = 0; struct rlimit cores; crm_ipc_t *old_instance = NULL; qb_ipcs_service_t *ipcs = NULL; const char *facility = daemon_option("logfacility"); static crm_cluster_t cluster; crm_log_preinit(NULL, argc, argv); crm_set_options(NULL, "mode [options]", long_options, "Start/Stop Pacemaker\n"); mainloop_add_signal(SIGHUP, pcmk_ignore); mainloop_add_signal(SIGQUIT, pcmk_sigquit); while (1) { flag = crm_get_option(argc, argv, &option_index); if (flag == -1) break; switch (flag) { case 'V': crm_bump_log_level(argc, argv); break; case 'f': /* Legacy */ break; case 'p': pid_file = optarg; break; case '$': case '?': crm_help(flag, EX_OK); break; case 'S': shutdown = TRUE; break; case 'F': printf("Pacemaker %s (Build: %s)\n Supporting v%s: %s\n", VERSION, BUILD_VERSION, CRM_FEATURE_SET, CRM_FEATURES); crm_exit(pcmk_ok); default: printf("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag); ++argerr; break; } } if (optind < argc) { printf("non-option ARGV-elements: "); while (optind < argc) printf("%s ", argv[optind++]); printf("\n"); } if (argerr) { crm_help('?', EX_USAGE); } setenv("LC_ALL", "C", 1); setenv("HA_LOGD", "no", 1); set_daemon_option("mcp", "true"); set_daemon_option("use_logd", "off"); crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); /* Restore the original facility so that mcp_read_config() does the right thing */ set_daemon_option("logfacility", facility); crm_debug("Checking for old instances of %s", CRM_SYSTEM_MCP); old_instance = crm_ipc_new(CRM_SYSTEM_MCP, 0); crm_ipc_connect(old_instance); if (shutdown) { crm_debug("Terminating previous instance"); while (crm_ipc_connected(old_instance)) { xmlNode *cmd = create_request(CRM_OP_QUIT, NULL, NULL, CRM_SYSTEM_MCP, CRM_SYSTEM_MCP, NULL); crm_debug("."); crm_ipc_send(old_instance, cmd, 0, 0, NULL); free_xml(cmd); sleep(2); } crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); crm_exit(pcmk_ok); } else if (crm_ipc_connected(old_instance)) { crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); crm_err("Pacemaker is already active, aborting startup"); crm_exit(DAEMON_RESPAWN_STOP); } crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); if (mcp_read_config() == FALSE) { crm_notice("Could not obtain corosync config data, exiting"); crm_exit(ENODATA); } crm_notice("Starting Pacemaker %s (Build: %s): %s", VERSION, BUILD_VERSION, CRM_FEATURES); mainloop = g_main_new(FALSE); sysrq_init(); rc = getrlimit(RLIMIT_CORE, &cores); if (rc < 0) { crm_perror(LOG_ERR, "Cannot determine current maximum core size."); } else { if (cores.rlim_max == 0 && geteuid() == 0) { cores.rlim_max = RLIM_INFINITY; } else { crm_info("Maximum core file size is: %lu", (unsigned long)cores.rlim_max); } cores.rlim_cur = cores.rlim_max; rc = setrlimit(RLIMIT_CORE, &cores); if (rc < 0) { crm_perror(LOG_ERR, "Core file generation will remain disabled." " Core files are an important diagnositic tool," " please consider enabling them by default."); } #if 0 /* system() is not thread-safe, can't call from here * Actually, its a pretty hacky way to try and achieve this anyway */ if (system("echo 1 > /proc/sys/kernel/core_uses_pid") != 0) { crm_perror(LOG_ERR, "Could not enable /proc/sys/kernel/core_uses_pid"); } #endif } rc = pcmk_ok; if (crm_user_lookup(CRM_DAEMON_USER, &pcmk_uid, &pcmk_gid) < 0) { crm_err("Cluster user %s does not exist, aborting Pacemaker startup", CRM_DAEMON_USER); crm_exit(ENOKEY); } mkdir(CRM_STATE_DIR, 0750); mcp_chown(CRM_STATE_DIR, pcmk_uid, pcmk_gid); /* Used to store core files in */ crm_build_path(CRM_CORE_DIR, 0775); mcp_chown(CRM_CORE_DIR, pcmk_uid, pcmk_gid); /* Used to store blackbox dumps in */ crm_build_path(CRM_BLACKBOX_DIR, 0755); mcp_chown(CRM_BLACKBOX_DIR, pcmk_uid, pcmk_gid); /* Used to store policy engine inputs in */ crm_build_path(PE_STATE_DIR, 0755); mcp_chown(PE_STATE_DIR, pcmk_uid, pcmk_gid); /* Used to store the cluster configuration */ crm_build_path(CRM_CONFIG_DIR, 0755); mcp_chown(CRM_CONFIG_DIR, pcmk_uid, pcmk_gid); /* Resource agent paths are constructed by the lrmd */ ipcs = mainloop_add_ipc_server(CRM_SYSTEM_MCP, QB_IPC_NATIVE, &mcp_ipc_callbacks); if (ipcs == NULL) { crm_err("Couldn't start IPC server"); crm_exit(EIO); } /* Allows us to block shutdown */ if (cluster_connect_cfg(&local_nodeid) == FALSE) { crm_err("Couldn't connect to Corosync's CFG service"); crm_exit(ENOPROTOOPT); } if(pcmk_locate_sbd() > 0) { setenv("PCMK_watchdog", "true", 1); } else { setenv("PCMK_watchdog", "false", 1); } find_and_track_existing_processes(); cluster.destroy = mcp_cpg_destroy; cluster.cpg.cpg_deliver_fn = mcp_cpg_deliver; cluster.cpg.cpg_confchg_fn = mcp_cpg_membership; if(cluster_connect_cpg(&cluster) == FALSE) { crm_err("Couldn't connect to Corosync's CPG service"); rc = -ENOPROTOOPT; } if (rc == pcmk_ok && is_corosync_cluster()) { /* Keep the membership list up-to-date for crm_node to query */ if(cluster_connect_quorum(mcp_quorum_callback, mcp_quorum_destroy) == FALSE) { rc = -ENOTCONN; } } if(rc == pcmk_ok) { local_name = get_local_node_name(); update_node_processes(local_nodeid, local_name, get_process_list()); mainloop_add_signal(SIGTERM, pcmk_shutdown); mainloop_add_signal(SIGINT, pcmk_shutdown); init_children_processes(); crm_info("Starting mainloop"); g_main_run(mainloop); } if (ipcs) { crm_trace("Closing IPC server"); mainloop_del_ipc_server(ipcs); ipcs = NULL; } g_main_destroy(mainloop); cluster_disconnect_cpg(&cluster); cluster_disconnect_cfg(); crm_info("Exiting %s", crm_system_name); return crm_exit(rc); }