Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F4525251
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
47 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/mcp/pacemaker.c b/mcp/pacemaker.c
index c0af3ea4c8..ca5bb104d4 100644
--- a/mcp/pacemaker.c
+++ b/mcp/pacemaker.c
@@ -1,1125 +1,1126 @@
/*
* Copyright (C) 2010 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <pacemaker.h>
#include <pwd.h>
#include <grp.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/reboot.h>
#include <crm/msg_xml.h>
#include <crm/common/ipcs.h>
#include <crm/common/mainloop.h>
#include <crm/cluster/internal.h>
#include <crm/cluster.h>
#include <dirent.h>
#include <ctype.h>
gboolean fatal_error = FALSE;
GMainLoop *mainloop = NULL;
#define PCMK_PROCESS_CHECK_INTERVAL 5
const char *local_name = NULL;
uint32_t local_nodeid = 0;
crm_trigger_t *shutdown_trigger = NULL;
const char *pid_file = "/var/run/pacemaker.pid";
typedef struct pcmk_child_s {
int pid;
long flag;
int start_seq;
int respawn_count;
gboolean respawn;
const char *name;
const char *uid;
const char *command;
gboolean active_before_startup;
} pcmk_child_t;
/* Index into the array below */
#define pcmk_child_crmd 4
#define pcmk_child_mgmtd 8
/* *INDENT-OFF* */
static pcmk_child_t pcmk_children[] = {
{ 0, crm_proc_none, 0, 0, FALSE, "none", NULL, NULL },
{ 0, crm_proc_plugin, 0, 0, FALSE, "ais", NULL, NULL },
{ 0, crm_proc_lrmd, 3, 0, TRUE, "lrmd", NULL, CRM_DAEMON_DIR"/lrmd" },
{ 0, crm_proc_cib, 1, 0, TRUE, "cib", CRM_DAEMON_USER, CRM_DAEMON_DIR"/cib" },
{ 0, crm_proc_crmd, 6, 0, TRUE, "crmd", CRM_DAEMON_USER, CRM_DAEMON_DIR"/crmd" },
{ 0, crm_proc_attrd, 4, 0, TRUE, "attrd", CRM_DAEMON_USER, CRM_DAEMON_DIR"/attrd" },
{ 0, crm_proc_stonithd, 0, 0, TRUE, "stonithd", NULL, NULL },
{ 0, crm_proc_pe, 5, 0, TRUE, "pengine", CRM_DAEMON_USER, CRM_DAEMON_DIR"/pengine" },
{ 0, crm_proc_mgmtd, 0, 0, TRUE, "mgmtd", NULL, HB_DAEMON_DIR"/mgmtd" },
{ 0, crm_proc_stonith_ng, 2, 0, TRUE, "stonith-ng", NULL, CRM_DAEMON_DIR"/stonithd" },
};
/* *INDENT-ON* */
static gboolean start_child(pcmk_child_t * child);
static gboolean check_active_before_startup_processes(gpointer user_data);
void update_process_clients(crm_client_t *client);
void update_process_peers(void);
void
enable_crmd_as_root(gboolean enable)
{
if (enable) {
pcmk_children[pcmk_child_crmd].uid = NULL;
} else {
pcmk_children[pcmk_child_crmd].uid = CRM_DAEMON_USER;
}
}
void
enable_mgmtd(gboolean enable)
{
if (enable) {
pcmk_children[pcmk_child_mgmtd].start_seq = 7;
} else {
pcmk_children[pcmk_child_mgmtd].start_seq = 0;
}
}
static uint32_t
get_process_list(void)
{
int lpc = 0;
uint32_t procs = 0;
if(is_classic_ais_cluster()) {
procs |= crm_proc_plugin;
}
for (lpc = 0; lpc < SIZEOF(pcmk_children); lpc++) {
if (pcmk_children[lpc].pid != 0) {
procs |= pcmk_children[lpc].flag;
}
}
return procs;
}
static void
pcmk_process_exit(pcmk_child_t * child)
{
child->pid = 0;
child->active_before_startup = FALSE;
/* Broadcast the fact that one of our processes died ASAP
*
* Try to get some logging of the cause out first though
* because we're probably about to get fenced
*
* Potentially do this only if respawn_count > N
* to allow for local recovery
*/
update_node_processes(local_nodeid, NULL, get_process_list());
child->respawn_count += 1;
if (child->respawn_count > MAX_RESPAWN) {
crm_err("Child respawn count exceeded by %s", child->name);
child->respawn = FALSE;
}
if (shutdown_trigger) {
mainloop_set_trigger(shutdown_trigger);
update_node_processes(local_nodeid, NULL, get_process_list());
} else if (child->respawn) {
gboolean fail_fast = crm_is_true(getenv("PCMK_fail_fast"));
crm_notice("Respawning failed child process: %s", child->name);
#ifdef RB_HALT_SYSTEM
if (fail_fast) {
crm_err("Rebooting system", child->name);
sync();
reboot(RB_HALT_SYSTEM);
crm_exit(DAEMON_RESPAWN_STOP);
}
#endif
start_child(child);
}
}
static void
pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)
{
pcmk_child_t *child = mainloop_child_userdata(p);
const char *name = mainloop_child_name(p);
if (signo && signo == SIGKILL) {
crm_warn("The %s process (%d) terminated with signal %d (core=%d)", name, pid, signo, core);
} else if (signo) {
crm_err("The %s process (%d) terminated with signal %d (core=%d)", name, pid, signo, core);
} else {
switch(exitcode) {
case pcmk_ok:
crm_info("The %s process (%d) exited: %s (%d)", name, pid, pcmk_strerror(exitcode), exitcode);
break;
case DAEMON_RESPAWN_STOP:
crm_warn("The %s process (%d) can no longer be respawned, shutting the cluster down.", name, pid);
child->respawn = FALSE;
fatal_error = TRUE;
pcmk_shutdown(15);
break;
case pcmk_err_machine_off:
do_crm_log_always(LOG_EMERG, "The %s process (%d) instructed the machine to power off", name, pid);
child->respawn = FALSE;
do_off();
break;
case pcmk_err_machine_reset:
do_crm_log_always(LOG_EMERG, "The %s process (%d) instructed the machine to reset", name, pid);
child->respawn = FALSE;
do_reset();
break;
default:
crm_err("The %s process (%d) exited: %s (%d)", name, pid, pcmk_strerror(exitcode), exitcode);
break;
}
}
pcmk_process_exit(child);
}
static gboolean
stop_child(pcmk_child_t * child, int signal)
{
if (signal == 0) {
signal = SIGTERM;
}
if (child->command == NULL) {
crm_debug("Nothing to do for child \"%s\"", child->name);
return TRUE;
}
if (child->pid <= 0) {
crm_trace("Client %s not running", child->name);
return TRUE;
}
errno = 0;
if (kill(child->pid, signal) == 0) {
crm_notice("Stopping %s: Sent -%d to process %d", child->name, signal, child->pid);
} else {
crm_perror(LOG_ERR, "Stopping %s: Could not send -%d to process %d failed",
child->name, signal, child->pid);
}
return TRUE;
}
static char *opts_default[] = { NULL, NULL };
static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL };
static gboolean
start_child(pcmk_child_t * child)
{
int lpc = 0;
uid_t uid = 0;
gid_t gid = 0;
struct rlimit oflimits;
gboolean use_valgrind = FALSE;
gboolean use_callgrind = FALSE;
const char *devnull = "/dev/null";
const char *env_valgrind = getenv("PCMK_valgrind_enabled");
const char *env_callgrind = getenv("PCMK_callgrind_enabled");
enum cluster_type_e stack = get_cluster_type();
child->active_before_startup = FALSE;
if (child->command == NULL) {
crm_info("Nothing to do for child \"%s\"", child->name);
return TRUE;
}
if (env_callgrind != NULL && crm_is_true(env_callgrind)) {
use_callgrind = TRUE;
use_valgrind = TRUE;
} else if (env_callgrind != NULL && strstr(env_callgrind, child->name)) {
use_callgrind = TRUE;
use_valgrind = TRUE;
} else if (env_valgrind != NULL && crm_is_true(env_valgrind)) {
use_valgrind = TRUE;
} else if (env_valgrind != NULL && strstr(env_valgrind, child->name)) {
use_valgrind = TRUE;
}
if (use_valgrind && strlen(VALGRIND_BIN) == 0) {
crm_warn("Cannot enable valgrind for %s:"
" The location of the valgrind binary is unknown", child->name);
use_valgrind = FALSE;
}
if (child->uid) {
if (crm_user_lookup(child->uid, &uid, &gid) < 0) {
crm_err("Invalid user (%s) for %s: not found", child->uid, child->name);
return FALSE;
}
crm_info("Using uid=%u and group=%u for process %s", uid, gid, child->name);
}
child->pid = fork();
CRM_ASSERT(child->pid != -1);
if (child->pid > 0) {
/* parent */
mainloop_child_add(child->pid, 0, child->name, child, pcmk_child_exit);
crm_info("Forked child %d for process %s%s", child->pid, child->name,
use_valgrind ? " (valgrind enabled: " VALGRIND_BIN ")" : "");
update_node_processes(local_nodeid, NULL, get_process_list());
return TRUE;
} else {
/* Start a new session */
(void)setsid();
/* Setup the two alternate arg arrarys */
opts_vgrind[0] = strdup(VALGRIND_BIN);
if (use_callgrind) {
opts_vgrind[1] = strdup("--tool=callgrind");
opts_vgrind[2] = strdup("--callgrind-out-file=" CRM_STATE_DIR "/callgrind.out.%p");
opts_vgrind[3] = strdup(child->command);
opts_vgrind[4] = NULL;
} else {
opts_vgrind[1] = strdup(child->command);
opts_vgrind[2] = NULL;
opts_vgrind[3] = NULL;
opts_vgrind[4] = NULL;
}
opts_default[0] = strdup(child->command);;
if(gid) {
if(stack == pcmk_cluster_corosync) {
/* Drop root privileges completely
*
* We can do this because we set uidgid.gid.${gid}=1
* via CMAP which allows these processes to connect to
* corosync
*/
if (setgid(gid) < 0) {
crm_perror(LOG_ERR, "Could not set group to %d", gid);
}
/* Keep the root group (so we can access corosync), but add the haclient group (so we can access ipc) */
} else if (initgroups(child->uid, gid) < 0) {
crm_err("Cannot initalize groups for %s: %s (%d)", child->uid, pcmk_strerror(errno), errno);
}
}
if (uid && setuid(uid) < 0) {
crm_perror(LOG_ERR, "Could not set user to %d (%s)", uid, child->uid);
}
/* Close all open file descriptors */
getrlimit(RLIMIT_NOFILE, &oflimits);
for (lpc = 0; lpc < oflimits.rlim_cur; lpc++) {
close(lpc);
}
(void)open(devnull, O_RDONLY); /* Stdin: fd 0 */
(void)open(devnull, O_WRONLY); /* Stdout: fd 1 */
(void)open(devnull, O_WRONLY); /* Stderr: fd 2 */
if (use_valgrind) {
(void)execvp(VALGRIND_BIN, opts_vgrind);
} else {
(void)execvp(child->command, opts_default);
}
crm_perror(LOG_ERR, "FATAL: Cannot exec %s", child->command);
crm_exit(DAEMON_RESPAWN_STOP);
}
return TRUE; /* never reached */
}
static gboolean
escalate_shutdown(gpointer data)
{
pcmk_child_t *child = data;
if (child->pid) {
/* Use SIGSEGV instead of SIGKILL to create a core so we can see what it was up to */
crm_err("Child %s not terminating in a timely manner, forcing", child->name);
stop_child(child, SIGSEGV);
}
return FALSE;
}
static gboolean
pcmk_shutdown_worker(gpointer user_data)
{
static int phase = 0;
static time_t next_log = 0;
static int max = SIZEOF(pcmk_children);
int lpc = 0;
if (phase == 0) {
crm_notice("Shuting down Pacemaker");
phase = max;
/* Add a second, more frequent, check to speed up shutdown */
g_timeout_add_seconds(5, check_active_before_startup_processes, NULL);
}
for (; phase > 0; phase--) {
/* dont stop anything with start_seq < 1 */
for (lpc = max - 1; lpc >= 0; lpc--) {
pcmk_child_t *child = &(pcmk_children[lpc]);
if (phase != child->start_seq) {
continue;
}
if (child->pid) {
time_t now = time(NULL);
if (child->respawn) {
next_log = now + 30;
child->respawn = FALSE;
stop_child(child, SIGTERM);
if (phase < pcmk_children[pcmk_child_crmd].start_seq) {
g_timeout_add(180000 /* 3m */ , escalate_shutdown, child);
}
} else if (now >= next_log) {
next_log = now + 30;
crm_notice("Still waiting for %s (pid=%d, seq=%d) to terminate...",
child->name, child->pid, child->start_seq);
}
return TRUE;
}
/* cleanup */
crm_debug("%s confirmed stopped", child->name);
child->pid = 0;
}
}
crm_notice("Closing watchdog device");
- watchdog_close();
+ watchdog_close(true);
/* send_cluster_id(); */
crm_notice("Shutdown complete");
g_main_loop_quit(mainloop);
if (fatal_error) {
crm_notice("Attempting to inhibit respawning after fatal error");
crm_exit(DAEMON_RESPAWN_STOP);
}
return TRUE;
}
static void
pcmk_ignore(int nsig)
{
crm_info("Ignoring signal %s (%d)", strsignal(nsig), nsig);
}
void
pcmk_shutdown(int nsig)
{
if (shutdown_trigger == NULL) {
shutdown_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, pcmk_shutdown_worker, NULL);
}
mainloop_set_trigger(shutdown_trigger);
}
static int32_t
pcmk_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
crm_trace("Connection %p", c);
if (crm_client_new(c, uid, gid) == NULL) {
return -EIO;
}
return 0;
}
static void
pcmk_ipc_created(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p", c);
}
/* Exit code means? */
static int32_t
pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
const char *task = NULL;
crm_client_t *c = crm_client_get(qbc);
xmlNode *msg = crm_ipcs_recv(c, data, size, &id, &flags);
crm_ipcs_send_ack(c, id, flags, "ack", __FUNCTION__, __LINE__);
if (msg == NULL) {
return 0;
}
task = crm_element_value(msg, F_CRM_TASK);
if (crm_str_eq(task, CRM_OP_QUIT, TRUE)) {
/* Time to quit */
crm_notice("Shutting down in responce to ticket %s (%s)",
crm_element_value(msg, F_CRM_REFERENCE), crm_element_value(msg, F_CRM_ORIGIN));
pcmk_shutdown(15);
} else if (crm_str_eq(task, CRM_OP_RM_NODE_CACHE, TRUE)) {
/* Send to everyone */
struct iovec *iov;
int id = 0;
const char *name = NULL;
crm_element_value_int(msg, XML_ATTR_ID, &id);
name = crm_element_value(msg, XML_ATTR_UNAME);
crm_notice("Instructing peers to remove references to node %s/%u", name, id);
iov = calloc(1, sizeof(struct iovec));
iov->iov_base = dump_xml_unformatted(msg);
iov->iov_len = 1 + strlen(iov->iov_base);
send_cpg_iov(iov);
} else {
update_process_clients(c);
}
free_xml(msg);
return 0;
}
/* Error code means? */
static int32_t
pcmk_ipc_closed(qb_ipcs_connection_t * c)
{
crm_client_t *client = crm_client_get(c);
if (client == NULL) {
return 0;
}
crm_trace("Connection %p", c);
crm_client_destroy(client);
return 0;
}
static void
pcmk_ipc_destroy(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p", c);
pcmk_ipc_closed(c);
}
struct qb_ipcs_service_handlers mcp_ipc_callbacks = {
.connection_accept = pcmk_ipc_accept,
.connection_created = pcmk_ipc_created,
.msg_process = pcmk_ipc_dispatch,
.connection_closed = pcmk_ipc_closed,
.connection_destroyed = pcmk_ipc_destroy
};
void
update_process_clients(crm_client_t *client)
{
GHashTableIter iter;
crm_node_t *node = NULL;
xmlNode *update = create_xml_node(NULL, "nodes");
crm_trace("Sending process list to %d children", crm_hash_table_size(client_connections));
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) {
xmlNode *xml = create_xml_node(update, "node");
crm_xml_add_int(xml, "id", node->id);
crm_xml_add(xml, "uname", node->uname);
crm_xml_add(xml, "state", node->state);
crm_xml_add_int(xml, "processes", node->processes);
}
if(client) {
crm_ipcs_send(client, 0, update, crm_ipc_server_event);
} else {
g_hash_table_iter_init(&iter, client_connections);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & client)) {
crm_ipcs_send(client, 0, update, crm_ipc_server_event);
}
}
free_xml(update);
}
void
update_process_peers(void)
{
/* Do nothing for corosync-2 based clusters */
char buffer[1024];
struct iovec *iov;
int rc = 0;
memset(buffer, 0, SIZEOF(buffer));
if (local_name) {
rc = snprintf(buffer, SIZEOF(buffer) - 1, "<node uname=\"%s\" proclist=\"%u\"/>",
local_name, get_process_list());
} else {
rc = snprintf(buffer, SIZEOF(buffer) - 1, "<node proclist=\"%u\"/>", get_process_list());
}
crm_trace("Sending %s", buffer);
iov = calloc(1, sizeof(struct iovec));
iov->iov_base = strdup(buffer);
iov->iov_len = rc + 1;
send_cpg_iov(iov);
}
gboolean
update_node_processes(uint32_t id, const char *uname, uint32_t procs)
{
gboolean changed = FALSE;
crm_node_t *node = crm_get_peer(id, uname);
if (procs != 0) {
if (procs != node->processes) {
crm_debug("Node %s now has process list: %.32x (was %.32x)",
node->uname, procs, node->processes);
node->processes = procs;
changed = TRUE;
} else {
crm_trace("Node %s still has process list: %.32x", node->uname, procs);
}
}
if (changed && id == local_nodeid) {
update_process_clients(NULL);
update_process_peers();
}
return changed;
}
/* *INDENT-OFF* */
static struct crm_option long_options[] = {
/* Top-level Options */
{"help", 0, 0, '?', "\tThis text"},
{"version", 0, 0, '$', "\tVersion information" },
{"verbose", 0, 0, 'V', "\tIncrease debug output"},
{"shutdown", 0, 0, 'S', "\tInstruct Pacemaker to shutdown on this machine"},
{"features", 0, 0, 'F', "\tDisplay the full version and list of features Pacemaker was built with"},
{"-spacer-", 1, 0, '-', "\nAdditional Options:"},
{"foreground", 0, 0, 'f', "\t(Ignored) Pacemaker always runs in the foreground"},
{"pid-file", 1, 0, 'p', "\t(Ignored) Daemon pid file location"},
{NULL, 0, 0, 0}
};
/* *INDENT-ON* */
static void
mcp_chown(const char *path, uid_t uid, gid_t gid)
{
int rc = chown(path, uid, gid);
if (rc < 0) {
crm_warn("Cannot change the ownership of %s to user %s and gid %d: %s",
path, CRM_DAEMON_USER, gid, pcmk_strerror(errno));
}
}
static gboolean
check_active_before_startup_processes(gpointer user_data)
{
int start_seq = 1, lpc = 0;
static int max = SIZEOF(pcmk_children);
gboolean keep_tracking = FALSE;
for (start_seq = 1; start_seq < max; start_seq++) {
for (lpc = 0; lpc < max; lpc++) {
if (pcmk_children[lpc].active_before_startup == FALSE) {
/* we are already tracking it as a child process. */
continue;
} else if (start_seq != pcmk_children[lpc].start_seq) {
continue;
} else if (crm_pid_active(pcmk_children[lpc].pid) != 1) {
crm_notice("Process %s terminated (pid=%d)",
pcmk_children[lpc].name, pcmk_children[lpc].pid);
pcmk_process_exit(&(pcmk_children[lpc]));
continue;
}
/* at least one of the processes found at startup
* is still going, so keep this recurring timer around */
keep_tracking = TRUE;
}
}
return keep_tracking;
}
static bool
find_and_track_existing_processes(void)
{
DIR *dp;
struct dirent *entry;
struct stat statbuf;
int start_tracker = 0;
dp = opendir("/proc");
if (!dp) {
/* no proc directory to search through */
crm_notice("Can not read /proc directory to track existing components");
return FALSE;
}
while ((entry = readdir(dp)) != NULL) {
char procpath[128];
char value[64];
char key[16];
FILE *file;
int pid;
int max = SIZEOF(pcmk_children);
int i;
strcpy(procpath, "/proc/");
/* strlen("/proc/") + strlen("/status") + 1 = 14
* 128 - 14 = 114 */
strncat(procpath, entry->d_name, 114);
if (lstat(procpath, &statbuf)) {
continue;
}
if (!S_ISDIR(statbuf.st_mode) || !isdigit(entry->d_name[0])) {
continue;
}
strcat(procpath, "/status");
file = fopen(procpath, "r");
if (!file) {
continue;
}
if (fscanf(file, "%15s%63s", key, value) != 2) {
fclose(file);
continue;
}
fclose(file);
pid = atoi(entry->d_name);
if (pid <= 0) {
continue;
}
for (i = 0; i < max; i++) {
const char *name = pcmk_children[i].name;
if (pcmk_children[i].start_seq == 0) {
continue;
}
if (pcmk_children[i].flag == crm_proc_stonith_ng) {
name = "stonithd";
}
if (safe_str_eq(name, value)) {
if (crm_pid_active(pid) != 1) {
continue;
}
crm_notice("Tracking existing %s process (pid=%d)", value, pid);
pcmk_children[i].pid = pid;
pcmk_children[i].active_before_startup = TRUE;
start_tracker = 1;
}
}
}
if (start_tracker) {
g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL, check_active_before_startup_processes,
NULL);
}
closedir(dp);
return start_tracker;
}
static void
init_children_processes(void)
{
int start_seq = 1, lpc = 0;
static int max = SIZEOF(pcmk_children);
/* start any children that have not been detected */
for (start_seq = 1; start_seq < max; start_seq++) {
/* dont start anything with start_seq < 1 */
for (lpc = 0; lpc < max; lpc++) {
if (pcmk_children[lpc].pid) {
/* we are already tracking it */
continue;
}
if (start_seq == pcmk_children[lpc].start_seq) {
start_child(&(pcmk_children[lpc]));
}
}
}
/* From this point on, any daemons being started will be due to
* respawning rather than node start.
*
* This may be useful for the daemons to know
*/
setenv("PCMK_respawned", "true", 1);
}
static void
mcp_cpg_destroy(gpointer user_data)
{
crm_err("Connection destroyed");
crm_exit(ENOTCONN);
}
static void
mcp_cpg_deliver(cpg_handle_t handle,
const struct cpg_name *groupName,
uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
{
xmlNode *xml = string2xml(msg);
const char *task = crm_element_value(xml, F_CRM_TASK);
crm_trace("Received %s %.200s", task, msg);
if (task == NULL && nodeid != local_nodeid) {
uint32_t procs = 0;
const char *uname = crm_element_value(xml, "uname");
crm_element_value_int(xml, "proclist", (int *)&procs);
/* crm_debug("Got proclist %.32x from %s", procs, uname); */
if (update_node_processes(nodeid, uname, procs)) {
update_process_clients(NULL);
}
} else if (crm_str_eq(task, CRM_OP_RM_NODE_CACHE, TRUE)) {
int id = 0;
const char *name = NULL;
crm_element_value_int(xml, XML_ATTR_ID, &id);
name = crm_element_value(xml, XML_ATTR_UNAME);
reap_crm_member(id, name);
}
}
static void
mcp_cpg_membership(cpg_handle_t handle,
const struct cpg_name *groupName,
const struct cpg_address *member_list, size_t member_list_entries,
const struct cpg_address *left_list, size_t left_list_entries,
const struct cpg_address *joined_list, size_t joined_list_entries)
{
/* Don't care about CPG membership, but we do want to broadcast our own presence */
update_process_peers();
}
static gboolean
mcp_quorum_callback(unsigned long long seq, gboolean quorate)
{
/* Nothing to do */
return TRUE;
}
static void
mcp_quorum_destroy(gpointer user_data)
{
crm_info("connection closed");
}
static gboolean
mcp_tickle(gpointer user_data)
{
watchdog_tickle();
return TRUE;
}
int
main(int argc, char **argv)
{
int rc;
int flag;
int argerr = 0;
int option_index = 0;
bool respawned = FALSE;
gboolean shutdown = FALSE;
uid_t pcmk_uid = 0;
gid_t pcmk_gid = 0;
struct rlimit cores;
crm_ipc_t *old_instance = NULL;
qb_ipcs_service_t *ipcs = NULL;
const char *facility = daemon_option("logfacility");
static crm_cluster_t cluster;
crm_log_preinit(NULL, argc, argv);
crm_set_options(NULL, "mode [options]", long_options, "Start/Stop Pacemaker\n");
mainloop_add_signal(SIGHUP, pcmk_ignore);
while (1) {
flag = crm_get_option(argc, argv, &option_index);
if (flag == -1)
break;
switch (flag) {
case 'V':
crm_bump_log_level(argc, argv);
break;
case 'f':
/* Legacy */
break;
case 'p':
pid_file = optarg;
break;
case '$':
case '?':
crm_help(flag, EX_OK);
break;
case 'S':
shutdown = TRUE;
break;
case 'F':
printf("Pacemaker %s (Build: %s)\n Supporting v%s: %s\n", VERSION, BUILD_VERSION,
CRM_FEATURE_SET, CRM_FEATURES);
crm_exit(pcmk_ok);
default:
printf("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag);
++argerr;
break;
}
}
if (optind < argc) {
printf("non-option ARGV-elements: ");
while (optind < argc)
printf("%s ", argv[optind++]);
printf("\n");
}
if (argerr) {
crm_help('?', EX_USAGE);
}
setenv("LC_ALL", "C", 1);
setenv("HA_LOGD", "no", 1);
set_daemon_option("mcp", "true");
set_daemon_option("use_logd", "off");
crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
/* Restore the original facility so that mcp_read_config() does the right thing */
set_daemon_option("logfacility", facility);
crm_debug("Checking for old instances of %s", CRM_SYSTEM_MCP);
old_instance = crm_ipc_new(CRM_SYSTEM_MCP, 0);
crm_ipc_connect(old_instance);
if (shutdown) {
crm_debug("Terminating previous instance");
while (crm_ipc_connected(old_instance)) {
xmlNode *cmd =
create_request(CRM_OP_QUIT, NULL, NULL, CRM_SYSTEM_MCP, CRM_SYSTEM_MCP, NULL);
crm_debug(".");
crm_ipc_send(old_instance, cmd, 0, 0, NULL);
free_xml(cmd);
sleep(2);
}
crm_ipc_close(old_instance);
crm_ipc_destroy(old_instance);
crm_exit(pcmk_ok);
} else if (crm_ipc_connected(old_instance)) {
crm_ipc_close(old_instance);
crm_ipc_destroy(old_instance);
crm_err("Pacemaker is already active, aborting startup");
crm_exit(DAEMON_RESPAWN_STOP);
}
crm_ipc_close(old_instance);
crm_ipc_destroy(old_instance);
if (mcp_read_config() == FALSE) {
crm_notice("Could not obtain corosync config data, exiting");
crm_exit(ENODATA);
}
crm_notice("Starting Pacemaker %s (Build: %s): %s", VERSION, BUILD_VERSION, CRM_FEATURES);
mainloop = g_main_new(FALSE);
rc = getrlimit(RLIMIT_CORE, &cores);
if (rc < 0) {
crm_perror(LOG_ERR, "Cannot determine current maximum core size.");
} else {
if (cores.rlim_max == 0 && geteuid() == 0) {
cores.rlim_max = RLIM_INFINITY;
} else {
crm_info("Maximum core file size is: %lu", (unsigned long)cores.rlim_max);
}
cores.rlim_cur = cores.rlim_max;
rc = setrlimit(RLIMIT_CORE, &cores);
if (rc < 0) {
crm_perror(LOG_ERR,
"Core file generation will remain disabled."
" Core files are an important diagnositic tool,"
" please consider enabling them by default.");
}
#if 0
/* system() is not thread-safe, can't call from here
* Actually, its a pretty hacky way to try and achieve this anyway
*/
if (system("echo 1 > /proc/sys/kernel/core_uses_pid") != 0) {
crm_perror(LOG_ERR, "Could not enable /proc/sys/kernel/core_uses_pid");
}
#endif
}
rc = pcmk_ok;
if (crm_user_lookup(CRM_DAEMON_USER, &pcmk_uid, &pcmk_gid) < 0) {
crm_err("Cluster user %s does not exist, aborting Pacemaker startup", CRM_DAEMON_USER);
crm_exit(ENOKEY);
}
mkdir(CRM_STATE_DIR, 0750);
mcp_chown(CRM_STATE_DIR, pcmk_uid, pcmk_gid);
/* Used to store core files in */
crm_build_path(CRM_CORE_DIR, 0775);
mcp_chown(CRM_CORE_DIR, pcmk_uid, pcmk_gid);
/* Used to store blackbox dumps in */
crm_build_path(CRM_BLACKBOX_DIR, 0755);
mcp_chown(CRM_BLACKBOX_DIR, pcmk_uid, pcmk_gid);
/* Used to store policy engine inputs in */
crm_build_path(PE_STATE_DIR, 0755);
mcp_chown(PE_STATE_DIR, pcmk_uid, pcmk_gid);
/* Used to store the cluster configuration */
crm_build_path(CRM_CONFIG_DIR, 0755);
mcp_chown(CRM_CONFIG_DIR, pcmk_uid, pcmk_gid);
/* Resource agent paths are constructed by the lrmd */
ipcs = mainloop_add_ipc_server(CRM_SYSTEM_MCP, QB_IPC_NATIVE, &mcp_ipc_callbacks);
if (ipcs == NULL) {
crm_err("Couldn't start IPC server");
crm_exit(EIO);
}
/* Allows us to block shutdown */
if (cluster_connect_cfg(&local_nodeid) == FALSE) {
crm_err("Couldn't connect to Corosync's CFG service");
crm_exit(ENOPROTOOPT);
}
respawned = find_and_track_existing_processes();
/* Must happen prior to joining corosync */
if(daemon_option("watchdog")) {
int interval = 6; /* Make this configurable */
if(respawned) {
/* We can't get at their exit code, so we have to let the watchdog expire */
crm_notice("Detected existing child daemons, delaying initialization for %ds", 2*interval);
+ qb_log_fini();
sleep(2*interval);
}
sysrq_init();
mcp_make_realtime(0, 256, 256); /* Allow this to be optional? */
rc = watchdog_init(interval, 2);
if(rc == pcmk_ok) {
g_timeout_add_seconds(interval/3, mcp_tickle, NULL);
} else {
crm_exit(DAEMON_RESPAWN_STOP);
}
}
cluster.destroy = mcp_cpg_destroy;
cluster.cpg.cpg_deliver_fn = mcp_cpg_deliver;
cluster.cpg.cpg_confchg_fn = mcp_cpg_membership;
if(cluster_connect_cpg(&cluster) == FALSE) {
crm_err("Couldn't connect to Corosync's CPG service");
rc = -ENOPROTOOPT;
}
if (rc == pcmk_ok && is_corosync_cluster()) {
/* Keep the membership list up-to-date for crm_node to query */
if(cluster_connect_quorum(mcp_quorum_callback, mcp_quorum_destroy) == FALSE) {
rc = -ENOTCONN;
}
}
if(rc == pcmk_ok) {
local_name = get_local_node_name();
update_node_processes(local_nodeid, local_name, get_process_list());
mainloop_add_signal(SIGTERM, pcmk_shutdown);
mainloop_add_signal(SIGINT, pcmk_shutdown);
init_children_processes();
crm_info("Starting mainloop");
g_main_run(mainloop);
}
- watchdog_close();
+ watchdog_close(true);
if (ipcs) {
crm_trace("Closing IPC server");
mainloop_del_ipc_server(ipcs);
ipcs = NULL;
}
g_main_destroy(mainloop);
cluster_disconnect_cpg(&cluster);
cluster_disconnect_cfg();
crm_info("Exiting %s", crm_system_name);
return crm_exit(rc);
}
diff --git a/mcp/pacemaker.h b/mcp/pacemaker.h
index f3c2c2516b..f06e0c705b 100644
--- a/mcp/pacemaker.h
+++ b/mcp/pacemaker.h
@@ -1,62 +1,62 @@
/*
* Copyright (C) 2010 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/resource.h>
#include <stdint.h>
#include <crm/crm.h>
#include <crm/common/xml.h>
#define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
#define crm_flag_none 0x00000000
#define crm_flag_members 0x00000001
#define MAX_RESPAWN 100
extern uint32_t local_nodeid;
gboolean mcp_read_config(void);
gboolean cluster_connect_cfg(uint32_t * nodeid);
gboolean cluster_disconnect_cfg(void);
gboolean update_node_processes(uint32_t node, const char *uname, uint32_t procs);
void enable_mgmtd(gboolean enable);
void enable_crmd_as_root(gboolean enable);
void pcmk_shutdown(int nsig);
void mcp_make_realtime(int priority, int stackgrowK, int heapgrowK);
void sysrq_init(void);
int watchdog_init(int interval, int mode);
int watchdog_tickle(void);
-void watchdog_close(void);
+void watchdog_close(bool disarm);
void do_crashdump(void);
void do_reset(void);
void do_off(void);
diff --git a/mcp/watchdog.c b/mcp/watchdog.c
index b69af71727..6a0007cb6b 100644
--- a/mcp/watchdog.c
+++ b/mcp/watchdog.c
@@ -1,422 +1,444 @@
/*
* Copyright (C) 2013 Lars Marowsky-Bree <lmb@suse.com>
* 2014 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <crm_internal.h>
#include <pacemaker.h>
#include <sched.h>
#include <syscall.h>
#include <sys/ioctl.h>
#include <sys/reboot.h>
#include <linux/watchdog.h>
#ifdef _POSIX_MEMLOCK
# include <sys/mman.h>
#endif
#define HOG_CHAR 0xff
static int wd_fd = -1;
static int wd_debug = 2;
static int wd_interval_s = 0;
/* Begin kernel duplication */
/* This duplicates some code from linux/ioprio.h since these are not
* included even in linux-kernel-headers. Sucks.
*
* See also ioprio_set(2) and
* https://www.kernel.org/doc/Documentation/block/ioprio.txt
*/
extern int sys_ioprio_set(int, int, int);
int ioprio_set(int which, int who, int ioprio);
inline int ioprio_set(int which, int who, int ioprio)
{
return syscall(__NR_ioprio_set, which, who, ioprio);
}
enum {
IOPRIO_CLASS_NONE,
IOPRIO_CLASS_RT,
IOPRIO_CLASS_BE,
IOPRIO_CLASS_IDLE,
};
enum {
IOPRIO_WHO_PROCESS = 1,
IOPRIO_WHO_PGRP,
IOPRIO_WHO_USER,
};
#define IOPRIO_CLASS_SHIFT (13)
#define IOPRIO_PRIO_VALUE(class, data) (((class) << IOPRIO_CLASS_SHIFT) | data)
/* End kernel duplication */
static unsigned char
mcp_stack_hogger(unsigned char * inbuf, int kbytes)
{
unsigned char buf[1024];
if(kbytes <= 0) {
return HOG_CHAR;
}
if (inbuf == NULL) {
memset(buf, HOG_CHAR, sizeof(buf));
} else {
memcpy(buf, inbuf, sizeof(buf));
}
if (kbytes > 0) {
return mcp_stack_hogger(buf, kbytes-1);
} else {
return buf[sizeof(buf)-1];
}
}
static void
mcp_malloc_hogger(int kbytes)
{
int j;
void**chunks;
int chunksize = 1024;
if(kbytes <= 0) {
return;
}
/*
* We could call mallopt(M_MMAP_MAX, 0) to disable it completely,
* but we've already called mlockall()
*
* We could also call mallopt(M_TRIM_THRESHOLD, -1) to prevent malloc
* from giving memory back to the system, but we've already called
* mlockall(MCL_FUTURE), so there's no need.
*/
chunks = malloc(kbytes * sizeof(void *));
if (chunks == NULL) {
crm_warn("Could not preallocate chunk array");
return;
}
for (j=0; j < kbytes; ++j) {
chunks[j] = malloc(chunksize);
if (chunks[j] == NULL) {
crm_warn("Could not preallocate block %d", j);
} else {
memset(chunks[j], 0, chunksize);
}
}
for (j=0; j < kbytes; ++j) {
free(chunks[j]);
}
free(chunks);
}
static void mcp_memlock(int stackgrowK, int heapgrowK)
{
#ifdef _POSIX_MEMLOCK
/*
* We could call setrlimit(RLIMIT_MEMLOCK,...) with a large
* number, but the mcp runs as root and mlock(2) says:
*
* Since Linux 2.6.9, no limits are placed on the amount of memory
* that a privileged process may lock, and this limit instead
* governs the amount of memory that an unprivileged process may
* lock.
*/
if (mlockall(MCL_CURRENT|MCL_FUTURE) >= 0) {
crm_info("Locked ourselves in memory");
/* Now allocate some extra pages (MCL_FUTURE will ensure they stay around) */
mcp_malloc_hogger(heapgrowK);
mcp_stack_hogger(NULL, stackgrowK);
} else {
crm_perror(LOG_ERR, "Unable to lock ourselves into memory");
}
#else
crm_err("Unable to lock ourselves into memory");
#endif
}
void
mcp_make_realtime(int priority, int stackgrowK, int heapgrowK)
{
if(priority < 0) {
return;
}
#ifdef SCHED_RR
{
int pcurrent = 0;
int pmin = sched_get_priority_min(SCHED_RR);
int pmax = sched_get_priority_max(SCHED_RR);
if (priority == 0) {
priority = pmax;
} else if (priority < pmin) {
priority = pmin;
} else if (priority > pmax) {
priority = pmax;
}
pcurrent = sched_getscheduler(0);
if (pcurrent < 0) {
crm_perror(LOG_ERR, "Unable to get scheduler priority");
} else if(pcurrent < priority) {
struct sched_param sp;
memset(&sp, 0, sizeof(sp));
sp.sched_priority = priority;
if (sched_setscheduler(0, SCHED_RR, &sp) < 0) {
crm_perror(LOG_ERR, "Unable to set scheduler priority to %d", priority);
} else {
crm_info("Scheduler priority is now %d", priority);
}
}
/* Do we need to update the I/O priority since we'll be writing to the watchdog? */
if (ioprio_set(IOPRIO_WHO_PROCESS, getpid(),
IOPRIO_PRIO_VALUE(IOPRIO_CLASS_RT, 1)) != 0) {
crm_perror(LOG_ERR, "Could not update I/O priority");
}
}
#else
crm_err("System does not support updating the scheduler priority");
#endif
mcp_memlock(heapgrowK, stackgrowK);
}
static int
watchdog_init_interval(int timeout)
{
wd_interval_s = timeout;
if (wd_fd < 0) {
return 0;
}
if (wd_interval_s < 1) {
crm_info("NOT setting watchdog timeout on explicit user request!");
return 0;
}
if (ioctl(wd_fd, WDIOC_SETTIMEOUT, &wd_interval_s) < 0) {
int rc = errno;
crm_perror(LOG_ERR, "Failed to set watchdog timer to %u seconds", timeout);
crm_crit("Please validate your watchdog configuration!");
crm_crit("Choose a different watchdog driver or specify -T to skip this if you are completely sure.");
return -rc;
} else {
crm_notice("Set watchdog timeout to %u seconds", timeout);
}
return 0;
}
int
watchdog_tickle(void)
{
if (wd_fd >= 0) {
if (write(wd_fd, "", 1) != 1) {
int rc = errno;
crm_perror(LOG_ERR, "Could not write to %s", daemon_option("watchdog"));
return -rc;
}
}
return 0;
}
int
watchdog_init(int interval, int mode)
{
int rc = 0;
const char *device = daemon_option("watchdog");
wd_debug = mode;
if (wd_fd < 0 && device != NULL) {
wd_fd = open(device, O_WRONLY);
if (wd_fd >= 0) {
crm_notice("Using watchdog device: %s", device);
rc = watchdog_init_interval(interval);
if(rc == 0) {
rc = watchdog_tickle();
}
} else {
rc = errno;
crm_perror(LOG_ERR, "Cannot open watchdog device %s", device);
}
}
return rc;
}
void
-watchdog_close(void)
+watchdog_close(bool disarm)
{
- if (wd_fd >= 0) {
- if (write(wd_fd, "V", 1) != 1) {
- crm_perror(LOG_ERR, "Cannot write magic character to %s", daemon_option("watchdog"));
+ if (wd_fd < 0) {
+ return;
+ }
+
+ if (disarm) {
+ int r;
+ int flags = WDIOS_DISABLECARD;;
+
+ /* Explicitly disarm it */
+ r = ioctl(wd_fd, WDIOC_SETOPTIONS, &flags);
+ if (r < 0) {
+ crm_perror(LOG_WARNING, "Failed to disable hardware watchdog %s", daemon_option("watchdog"));
}
- if (close(wd_fd) < 0) {
- crm_perror(LOG_ERR, "Watchdog close(%d) failed", wd_fd);
+
+ /* To be sure, use magic close logic, too */
+ for (;;) {
+ if (write(wd_fd, "V", 1) > 0) {
+ break;
+ }
+ crm_perror(LOG_ERR, "Cannot disable watchdog device %s", daemon_option("watchdog"));
}
- wd_fd = -1;
}
+
+ if (close(wd_fd) < 0) {
+ crm_perror(LOG_ERR, "Watchdog close(%d) failed", wd_fd);
+ }
+
+ wd_fd = -1;
}
#define SYSRQ "/proc/sys/kernel/sysrq"
void
sysrq_init(void)
{
FILE* procf;
int c;
procf = fopen(SYSRQ, "r");
if (!procf) {
crm_perror(LOG_ERR, "Cannot open "SYSRQ" for read");
return;
}
if (fscanf(procf, "%d", &c) != 1) {
crm_perror(LOG_ERR, "Parsing "SYSRQ" failed");
c = 0;
}
fclose(procf);
if (c == 1)
return;
/* 8 for debugging dumps of processes, 128 for reboot/poweroff */
c |= 136;
procf = fopen(SYSRQ, "w");
if (!procf) {
crm_perror(LOG_ERR, "Cannot write to "SYSRQ);
return;
}
fprintf(procf, "%d", c);
fclose(procf);
return;
}
static void
sysrq_trigger(char t)
{
FILE *procf;
procf = fopen("/proc/sysrq-trigger", "a");
if (!procf) {
crm_perror(LOG_ERR, "Opening sysrq-trigger failed");
return;
}
crm_info("sysrq-trigger: %c\n", t);
fprintf(procf, "%c\n", t);
fclose(procf);
return;
}
static void
do_exit(char kind)
{
int rc = pcmk_ok;
const char *reason = NULL;
if (kind == 'c') {
crm_notice("Initiating kdump");
} else if (wd_debug == 1) {
crm_warn("Initiating kdump instead of panicing the node (DEBUG MODE)");
kind = 'c';
} else if (wd_debug == 2) {
crm_warn("Shutting down the cluster instead of panicing the node (DEBUG MODE)");
- watchdog_close();
+ watchdog_close(true);
pcmk_shutdown(15);
} else if (wd_debug == 3) {
/* Give the system some time to flush logs to disk before rebooting. */
crm_warn("Delaying node panic by 10s (DEBUG MODE)");
- watchdog_close();
+ watchdog_close(true);
sync();
sleep(10);
} else {
rc = DAEMON_RESPAWN_STOP;
}
switch(kind) {
case 'b':
reason = "reboot";
break;
case 'c':
reason = "crashdump";
- watchdog_close();
+ watchdog_close(true);
break;
case 'o':
reason = "off";
break;
default:
reason = "unknown";
break;
}
do_crm_log_always(LOG_EMERG, "Rebooting system: %s", reason);
sync();
- sysrq_trigger(kind);
-
if(kind != 'c') {
+ watchdog_close(false);
+ sysrq_trigger(kind);
rc = reboot(RB_AUTOBOOT);
do_crm_log_always(LOG_EMERG, "Reboot failed: %s (%d)", pcmk_strerror(rc), rc);
+
+ } else {
+ sysrq_trigger(kind);
}
sleep(wd_interval_s * 2);
pcmk_shutdown(15);
}
void
do_crashdump(void)
{
do_exit('c');
}
void
do_reset(void)
{
do_exit('b');
}
void
do_off(void)
{
do_exit('o');
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Thu, Jun 26, 6:12 PM (22 h, 45 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1959369
Default Alt Text
(47 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment