diff --git a/src/sbd-cluster.c b/src/sbd-cluster.c index b3deeb5..372a16a 100644 --- a/src/sbd-cluster.c +++ b/src/sbd-cluster.c @@ -1,404 +1,405 @@ /* * Copyright (C) 2013 Lars Marowsky-Bree * * Based on crm_mon.c, which was: * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include "sbd.h" #include #include #include #include #include #include #include #include #include #include #include //undef SUPPORT_PLUGIN //define SUPPORT_PLUGIN 1 static bool remote_node = false; static pid_t remoted_pid = 0; static int reconnect_msec = 1000; static GMainLoop *mainloop = NULL; static guint notify_timer = 0; static crm_cluster_t cluster; static gboolean sbd_remote_check(gpointer user_data); static long unsigned int find_pacemaker_remote(void); #if SUPPORT_PLUGIN static void sbd_plugin_membership_dispatch(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { if(msg_len > 0) { set_servant_health(pcmk_health_online, LOG_INFO, "Connected to %s", name_for_cluster_type(get_cluster_type())); } else { set_servant_health(pcmk_health_unclean, LOG_WARNING, "Broken %s message", name_for_cluster_type(get_cluster_type())); } notify_parent(); return; } #endif #if SUPPORT_COROSYNC void sbd_cpg_membership_dispatch(cpg_handle_t handle, const struct cpg_name *groupName, const struct cpg_address *member_list, size_t member_list_entries, const struct cpg_address *left_list, size_t left_list_entries, const struct cpg_address *joined_list, size_t joined_list_entries) { if(member_list_entries > 0) { set_servant_health(pcmk_health_online, LOG_INFO, "Connected to %s", name_for_cluster_type(get_cluster_type())); } else { set_servant_health(pcmk_health_unclean, LOG_WARNING, "Empty %s membership", name_for_cluster_type(get_cluster_type())); } notify_parent(); } #endif static gboolean notify_timer_cb(gpointer data) { enum cluster_type_e stack = get_cluster_type(); switch (stack) { case pcmk_cluster_classic_ais: send_cluster_text(crm_class_quorum, NULL, TRUE, NULL, crm_msg_ais); break; case pcmk_cluster_corosync: case pcmk_cluster_cman: /* TODO - Make a CPG call and only call notify_parent() when we get a reply */ notify_parent(); break; case pcmk_cluster_unknown: if(remote_node) { sbd_remote_check(NULL); } default: break; } return TRUE; } static void sbd_membership_connect(void) { bool connected = false; cl_log(LOG_NOTICE, "Attempting cluster connection"); while(connected == false) { enum cluster_type_e stack = get_cluster_type(); cl_log(LOG_INFO, "Attempting connection to %s", name_for_cluster_type(stack)); if(crm_cluster_connect(&cluster)) { connected = true; } if(connected == false && get_cluster_type() == pcmk_cluster_unknown) { cl_log(LOG_INFO, "Attempting connection to Pacemaker Remote"); /* Go looking for the pacemaker remote process */ if(find_pacemaker_remote() > 0) { connected = true; } } if(connected == false) { cl_log(LOG_INFO, "Failed, retrying in %ds", reconnect_msec / 1000); sleep(reconnect_msec / 1000); } } set_servant_health(pcmk_health_transient, LOG_NOTICE, "Connected"); notify_parent(); notify_timer_cb(NULL); } static void sbd_membership_destroy(gpointer user_data) { cl_log(LOG_WARNING, "Lost connection to %s", name_for_cluster_type(get_cluster_type())); set_servant_health(pcmk_health_unclean, LOG_ERR, "Cluster connection terminated"); notify_parent(); /* Attempt to reconnect, the watchdog will take the node down if the problem isn't transient */ sbd_membership_connect(); } /* * \internal * \brief Get process ID and name associated with a /proc directory entry * * \param[in] entry Directory entry (must be result of readdir() on /proc) * \param[out] name If not NULL, a char[64] to hold the process name * \param[out] pid If not NULL, will be set to process ID of entry * * \return 0 on success, -1 if entry is not for a process or info not found * * \note This should be called only on Linux systems, as not all systems that * support /proc store process names and IDs in the same way. * Copied from the Pacemaker implementation. */ int sbd_procfs_process_info(struct dirent *entry, char *name, int *pid) { int fd, local_pid; FILE *file; struct stat statbuf; char key[16] = { 0 }, procpath[128] = { 0 }; /* We're only interested in entries whose name is a PID, * so skip anything non-numeric or that is too long. * * 114 = 128 - strlen("/proc/") - strlen("/status") - 1 */ local_pid = atoi(entry->d_name); if ((local_pid <= 0) || (strlen(entry->d_name) > 114)) { return -1; } if (pid) { *pid = local_pid; } /* Get this entry's file information */ strcpy(procpath, "/proc/"); strcat(procpath, entry->d_name); fd = open(procpath, O_RDONLY); if (fd < 0 ) { return -1; } if (fstat(fd, &statbuf) < 0) { close(fd); return -1; } close(fd); /* We're only interested in subdirectories */ if (!S_ISDIR(statbuf.st_mode)) { return -1; } /* Read the first entry ("Name:") from the process's status file. * We could handle the valgrind case if we parsed the cmdline file * instead, but that's more of a pain than it's worth. */ if (name != NULL) { strcat(procpath, "/status"); file = fopen(procpath, "r"); if (!file) { return -1; } if ((fscanf(file, "%15s%63s", key, name) != 2) || safe_str_neq(key, "Name:")) { fclose(file); return -1; } fclose(file); } return 0; } static gboolean sbd_remote_check(gpointer user_data) { static int have_proc_pid = 0; int running = 0; if(have_proc_pid == 0) { char proc_path[PATH_MAX], exe_path[PATH_MAX]; /* check to make sure pid hasn't been reused by another process */ snprintf(proc_path, sizeof(proc_path), "/proc/%lu/exe", (long unsigned int)getpid()); have_proc_pid = 1; if(readlink(proc_path, exe_path, PATH_MAX - 1) < 0) { have_proc_pid = -1; } } if (remoted_pid <= 0) { set_servant_health(pcmk_health_transient, LOG_WARNING, "No Pacemaker Remote connection"); goto notify; } else if (kill(remoted_pid, 0) < 0 && errno == ESRCH) { /* Not running */ } else if(have_proc_pid == -1) { running = 1; } else { int rc = 0; char proc_path[PATH_MAX], exe_path[PATH_MAX], expected_path[PATH_MAX]; /* check to make sure pid hasn't been reused by another process */ snprintf(proc_path, sizeof(proc_path), "/proc/%lu/exe", (long unsigned int)remoted_pid); rc = readlink(proc_path, exe_path, PATH_MAX - 1); if (rc < 0) { crm_perror(LOG_ERR, "Could not read from %s", proc_path); goto done; } exe_path[rc] = 0; rc = snprintf(expected_path, sizeof(proc_path), "%s/pacemaker_remoted", SBINDIR); expected_path[rc] = 0; if (strcmp(exe_path, expected_path) == 0) { running = 1; } } done: if(running) { set_servant_health(pcmk_health_online, LOG_INFO, "Connected to Pacemaker Remote %lu", (long unsigned int)remoted_pid); } else { set_servant_health(pcmk_health_unclean, LOG_WARNING, "Connection to Pacemaker Remote %lu lost", (long unsigned int)remoted_pid); } notify: notify_parent(); if(running == 0) { sbd_membership_connect(); } return true; } static long unsigned int find_pacemaker_remote(void) { DIR *dp; char entry_name[64]; struct dirent *entry; dp = opendir("/proc"); if (!dp) { /* no proc directory to search through */ cl_log(LOG_NOTICE, "Can not read /proc directory to track existing components"); return FALSE; } while ((entry = readdir(dp)) != NULL) { int pid; if (sbd_procfs_process_info(entry, entry_name, &pid) < 0) { continue; } if (strcmp(entry_name, "pacemaker_remoted") == 0) { cl_log(LOG_NOTICE, "Found Pacemaker Remote at PID %lu", pid); remoted_pid = pid; remote_node = true; break; } } closedir(dp); return remoted_pid; } static void clean_up(int rc) { return; } static void cluster_shutdown(int nsig) { clean_up(0); } int servant_cluster(const char *diskname, int mode, const void* argp) { enum cluster_type_e cluster_stack = get_cluster_type(); + crm_system_name = strdup("sbd:cluster"); cl_log(LOG_INFO, "Monitoring Cluster health"); set_proc_title("sbd: watcher: Cluster"); switch (cluster_stack) { #if SUPPORT_PLUGIN case pcmk_cluster_classic_ais: cluster.destroy = sbd_membership_destroy; cluster.cpg.cpg_deliver_fn = sbd_plugin_membership_dispatch; sbd_membership_connect(); break; #endif #if SUPPORT_COROSYNC case pcmk_cluster_corosync: case pcmk_cluster_cman: cluster.destroy = sbd_membership_destroy; cluster.cpg.cpg_confchg_fn = sbd_cpg_membership_dispatch; sbd_membership_connect(); break; #endif case pcmk_cluster_unknown: sbd_membership_connect(); break; default: cl_log(LOG_ERR, "Unsupported cluster type: %s", name_for_cluster_type(cluster_stack)); exit(1); break; } /* stonith_our_uname = cluster.uname; */ /* stonith_our_uuid = cluster.uuid; */ mainloop = g_main_new(FALSE); notify_timer = g_timeout_add(timeout_loop * 1000, notify_timer_cb, NULL); mainloop_add_signal(SIGTERM, cluster_shutdown); mainloop_add_signal(SIGINT, cluster_shutdown); g_main_run(mainloop); g_main_destroy(mainloop); clean_up(0); return 0; /* never reached */ } diff --git a/src/sbd-pacemaker.c b/src/sbd-pacemaker.c index 7b54266..2319722 100644 --- a/src/sbd-pacemaker.c +++ b/src/sbd-pacemaker.c @@ -1,425 +1,426 @@ /* * Copyright (C) 2013 Lars Marowsky-Bree * * Based on crm_mon.c, which was: * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* TODO list: * * - Trying to shutdown a node if no devices are up will fail, since SBD * currently uses a message via the disk to achieve this. * * - Shutting down cluster nodes while the majority of devices is down * will eventually take the cluster below the quorum threshold, at which * time the remaining cluster nodes will all immediately suicide. * */ #include "sbd.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern int disk_count; static void clean_up(int rc); static void crm_diff_update(const char *event, xmlNode * msg); static int cib_connect(gboolean full); static void compute_status(pe_working_set_t * data_set); static gboolean mon_refresh_state(gpointer user_data); static GMainLoop *mainloop = NULL; static guint timer_id_reconnect = 0; static guint timer_id_notify = 0; static int reconnect_msec = 1000; static int cib_connected = 0; static cib_t *cib = NULL; static xmlNode *current_cib = NULL; static long last_refresh = 0; static gboolean mon_timer_reconnect(gpointer data) { int rc = 0; if (timer_id_reconnect > 0) { g_source_remove(timer_id_reconnect); } rc = cib_connect(TRUE); if (rc != 0) { cl_log(LOG_WARNING, "CIB reconnect failed: %d", rc); timer_id_reconnect = g_timeout_add(reconnect_msec, mon_timer_reconnect, NULL); } else { cl_log(LOG_INFO, "CIB reconnect successful"); } return FALSE; } static void mon_cib_connection_destroy(gpointer user_data) { if (cib) { cib->cmds->signoff(cib); set_servant_health(pcmk_health_transient, LOG_WARNING, "Disconnected from CIB"); timer_id_reconnect = g_timeout_add(reconnect_msec, mon_timer_reconnect, NULL); } cib_connected = 0; return; } static gboolean mon_timer_notify(gpointer data) { static int counter = 0; int counter_max = timeout_watchdog / timeout_loop; if (timer_id_notify > 0) { g_source_remove(timer_id_notify); } if (cib_connected) { if (counter == counter_max) { free_xml(current_cib); current_cib = get_cib_copy(cib); mon_refresh_state(NULL); counter = 0; } else { cib->cmds->noop(cib, 0); notify_parent(); counter++; } } timer_id_notify = g_timeout_add(timeout_loop * 1000, mon_timer_notify, NULL); return FALSE; } /* * Mainloop signal handler. */ static void mon_shutdown(int nsig) { clean_up(0); } static int cib_connect(gboolean full) { int rc = 0; CRM_CHECK(cib != NULL, return -EINVAL); cib_connected = 0; crm_xml_init(); if (cib->state != cib_connected_query && cib->state != cib_connected_command) { rc = cib->cmds->signon(cib, crm_system_name, cib_query); if (rc != 0) { return rc; } current_cib = get_cib_copy(cib); mon_refresh_state(NULL); if (full) { if (rc == 0) { rc = cib->cmds->set_connection_dnotify(cib, mon_cib_connection_destroy); if (rc == -EPROTONOSUPPORT) { /* Notification setup failed, won't be able to reconnect after failure */ rc = 0; } } if (rc == 0) { cib->cmds->del_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update); rc = cib->cmds->add_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update); } if (rc != 0) { /* Notification setup failed, could not monitor CIB actions */ clean_up(-rc); } } } if (!rc) { cib_connected = 1; } return rc; } static void compute_status(pe_working_set_t * data_set) { static int updates = 0; static int ever_had_quorum = FALSE; node_t *node = pe_find_node(data_set->nodes, local_uname); updates++; if (data_set->dc_node == NULL) { set_servant_health(pcmk_health_transient, LOG_INFO, "We don't have a DC right now."); notify_parent(); return; } if (node == NULL) { set_servant_health(pcmk_health_unknown, LOG_WARNING, "Node state: %s is UNKNOWN", local_uname); } else if (node->details->online == FALSE) { set_servant_health(pcmk_health_unknown, LOG_WARNING, "Node state: OFFLINE"); } else if (node->details->unclean) { set_servant_health(pcmk_health_unclean, LOG_WARNING, "Node state: UNCLEAN"); } else if (node->details->pending) { set_servant_health(pcmk_health_pending, LOG_WARNING, "Node state: pending"); #if 0 } else if (node->details->shutdown) { set_servant_health(pcmk_health_shutdown, LOG_WARNING, "Node state: shutting down"); #endif } else if (data_set->flags & pe_flag_have_quorum) { set_servant_health(pcmk_health_online, LOG_INFO, "Node state: online"); ever_had_quorum = TRUE; } else if(disk_count > 0) { set_servant_health(pcmk_health_noquorum, LOG_WARNING, "Quorum lost"); } else if(ever_had_quorum == FALSE) { set_servant_health(pcmk_health_online, LOG_INFO, "We do not have quorum yet"); } else { /* We lost quorum, and there are no disks present * Setting healthy > 2 here will result in us self-fencing */ switch (data_set->no_quorum_policy) { case no_quorum_freeze: set_servant_health(pcmk_health_transient, LOG_INFO, "Quorum lost: Freeze resources"); break; case no_quorum_stop: set_servant_health(pcmk_health_transient, LOG_INFO, "Quorum lost: Stop ALL resources"); break; case no_quorum_ignore: set_servant_health(pcmk_health_transient, LOG_INFO, "Quorum lost: Ignore"); break; case no_quorum_suicide: set_servant_health(pcmk_health_unclean, LOG_INFO, "Quorum lost: Self-fence"); break; } } notify_parent(); return; } static crm_trigger_t *refresh_trigger = NULL; static gboolean mon_trigger_refresh(gpointer user_data) { mainloop_set_trigger(refresh_trigger); mon_refresh_state(NULL); return FALSE; } static void crm_diff_update(const char *event, xmlNode * msg) { int rc = -1; const char *op = NULL; long now = time(NULL); static int updates = 0; static mainloop_timer_t *refresh_timer = NULL; if(refresh_timer == NULL) { refresh_timer = mainloop_timer_add("refresh", 2000, FALSE, mon_trigger_refresh, NULL); refresh_trigger = mainloop_add_trigger(G_PRIORITY_LOW, mon_refresh_state, refresh_timer); } if (current_cib != NULL) { xmlNode *cib_last = current_cib; current_cib = NULL; rc = cib_apply_patch_event(msg, cib_last, ¤t_cib, LOG_DEBUG); free_xml(cib_last); switch(rc) { case -pcmk_err_diff_resync: case -pcmk_err_diff_failed: crm_warn("[%s] %s Patch aborted: %s (%d)", event, op, pcmk_strerror(rc), rc); break; case pcmk_ok: updates++; break; default: crm_notice("[%s] %s ABORTED: %s (%d)", event, op, pcmk_strerror(rc), rc); break; } } if (current_cib == NULL) { current_cib = get_cib_copy(cib); } /* Refresh * - immediately if the last update was more than 5s ago * - every 10 updates * - at most 2s after the last update */ if (updates > 10 || (now - last_refresh) > (reconnect_msec / 1000)) { mon_refresh_state(refresh_timer); updates = 0; } else { mainloop_set_trigger(refresh_trigger); mainloop_timer_start(refresh_timer); } } static gboolean mon_refresh_state(gpointer user_data) { xmlNode *cib_copy = NULL; pe_working_set_t data_set; if(current_cib == NULL) { return FALSE; } if(user_data) { mainloop_timer_t *timer = user_data; mainloop_timer_stop(timer); } cib_copy = copy_xml(current_cib); if (cli_config_update(&cib_copy, NULL, FALSE) == FALSE) { cl_log(LOG_WARNING, "cli_config_update() failed - forcing reconnect to CIB"); if (cib) { cib->cmds->signoff(cib); } } else { last_refresh = time(NULL); set_working_set_defaults(&data_set); data_set.input = cib_copy; data_set.flags |= pe_flag_have_stonith_resource; cluster_status(&data_set); compute_status(&data_set); cleanup_calculations(&data_set); } return FALSE; } static void clean_up(int rc) { if (cib != NULL) { cib->cmds->signoff(cib); cib_delete(cib); cib = NULL; } if (rc >= 0) { exit(rc); } return; } int servant_pcmk(const char *diskname, int mode, const void* argp) { int exit_code = 0; + crm_system_name = strdup("sbd:pcmk"); cl_log(LOG_INFO, "Monitoring Pacemaker health"); set_proc_title("sbd: watcher: Pacemaker"); setenv("PCMK_watchdog", "true", 1); if(debug == 0) { /* We don't want any noisy crm messages */ set_crm_log_level(LOG_CRIT); } if (current_cib == NULL) { cib = cib_new(); do { exit_code = cib_connect(TRUE); if (exit_code != 0) { sleep(reconnect_msec / 1000); } } while (exit_code == -ENOTCONN); if (exit_code != 0) { clean_up(-exit_code); } } mainloop = g_main_new(FALSE); mainloop_add_signal(SIGTERM, mon_shutdown); mainloop_add_signal(SIGINT, mon_shutdown); timer_id_notify = g_timeout_add(timeout_loop * 1000, mon_timer_notify, NULL); g_main_run(mainloop); g_main_destroy(mainloop); clean_up(0); return 0; /* never reached */ }