Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F4624287
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
48 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/mcp/corosync.c b/mcp/corosync.c
index 28a7ff7603..adc57cc54d 100644
--- a/mcp/corosync.c
+++ b/mcp/corosync.c
@@ -1,634 +1,635 @@
/*
* Copyright (C) 2010 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <pacemaker.h>
#include <sys/utsname.h>
#include <sys/stat.h> /* for calls to stat() */
#include <libgen.h> /* For basename() and dirname() */
#include <sys/types.h>
#include <pwd.h> /* For getpwname() */
#include <corosync/hdb.h>
#include <corosync/cfg.h>
#include <corosync/cpg.h>
#if HAVE_CONFDB
# include <corosync/confdb.h>
#endif
#include <crm/cluster/internal.h>
#include <crm/common/mainloop.h>
#if SUPPORT_CMAN
# include <libcman.h>
#endif
#if HAVE_CMAP
# include <corosync/cmap.h>
#endif
static struct cpg_name cpg_group = {
.length = 0,
.value[0] = 0,
};
enum cluster_type_e stack = pcmk_cluster_unknown;
static cpg_handle_t cpg_handle;
static corosync_cfg_handle_t cfg_handle;
/* =::=::=::= CFG - Shutdown stuff =::=::=::= */
static void
cfg_shutdown_callback(corosync_cfg_handle_t h, corosync_cfg_shutdown_flags_t flags)
{
crm_info("Corosync wants to shut down: %s",
(flags == COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE) ? "immediate" :
(flags == COROSYNC_CFG_SHUTDOWN_FLAG_REGARDLESS) ? "forced" : "optional");
/* Never allow corosync to shut down while we're running */
corosync_cfg_replyto_shutdown(h, COROSYNC_CFG_SHUTDOWN_FLAG_NO);
}
static corosync_cfg_callbacks_t cfg_callbacks = {
.corosync_cfg_shutdown_callback = cfg_shutdown_callback,
};
static int
pcmk_cfg_dispatch(gpointer user_data)
{
corosync_cfg_handle_t *handle = (corosync_cfg_handle_t *) user_data;
cs_error_t rc = corosync_cfg_dispatch(*handle, CS_DISPATCH_ALL);
if (rc != CS_OK) {
return -1;
}
return 0;
}
static void
cfg_connection_destroy(gpointer user_data)
{
crm_err("Connection destroyed");
cfg_handle = 0;
pcmk_shutdown(SIGTERM);
}
gboolean
cluster_disconnect_cfg(void)
{
if (cfg_handle) {
corosync_cfg_finalize(cfg_handle);
cfg_handle = 0;
}
pcmk_shutdown(SIGTERM);
return TRUE;
}
#define cs_repeat(counter, max, code) do { \
code; \
if(rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { \
counter++; \
crm_debug("Retrying operation after %ds", counter); \
sleep(counter); \
} else { \
break; \
} \
} while(counter < max)
gboolean
cluster_connect_cfg(uint32_t * nodeid)
{
cs_error_t rc;
int fd = 0, retries = 0;
static struct mainloop_fd_callbacks cfg_fd_callbacks = {
.dispatch = pcmk_cfg_dispatch,
.destroy = cfg_connection_destroy,
};
cs_repeat(retries, 30, rc = corosync_cfg_initialize(&cfg_handle, &cfg_callbacks));
if (rc != CS_OK) {
crm_err("corosync cfg init error %d", rc);
return FALSE;
}
rc = corosync_cfg_fd_get(cfg_handle, &fd);
if (rc != CS_OK) {
crm_err("corosync cfg fd_get error %d", rc);
goto bail;
}
retries = 0;
cs_repeat(retries, 30, rc = corosync_cfg_local_get(cfg_handle, nodeid));
if (rc != CS_OK) {
crm_err("corosync cfg local_get error %d", rc);
goto bail;
}
crm_debug("Our nodeid: %d", *nodeid);
mainloop_add_fd("corosync-cfg", G_PRIORITY_DEFAULT, fd, &cfg_handle, &cfg_fd_callbacks);
return TRUE;
bail:
corosync_cfg_finalize(cfg_handle);
return FALSE;
}
/* =::=::=::= CPG - Closed Process Group Messaging =::=::=::= */
static int
pcmk_cpg_dispatch(gpointer user_data)
{
cpg_handle_t *handle = (cpg_handle_t *) user_data;
cs_error_t rc = cpg_dispatch(*handle, CS_DISPATCH_ALL);
if (rc != CS_OK) {
return -1;
}
return 0;
}
static void
cpg_connection_destroy(gpointer user_data)
{
crm_err("Connection destroyed");
cpg_handle = 0;
crm_exit(1);
}
static void
pcmk_cpg_deliver(cpg_handle_t handle,
const struct cpg_name *groupName,
uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
{
if (nodeid != local_nodeid) {
uint32_t procs = 0;
xmlNode *xml = string2xml(msg);
const char *uname = crm_element_value(xml, "uname");
crm_element_value_int(xml, "proclist", (int *)&procs);
/* crm_debug("Got proclist %.32x from %s", procs, uname); */
if (update_node_processes(nodeid, uname, procs)) {
update_process_clients();
}
}
}
static void
pcmk_cpg_membership(cpg_handle_t handle,
const struct cpg_name *groupName,
const struct cpg_address *member_list, size_t member_list_entries,
const struct cpg_address *left_list, size_t left_list_entries,
const struct cpg_address *joined_list, size_t joined_list_entries)
{
/* Don't care about CPG membership */
update_process_peers();
}
cpg_callbacks_t cpg_callbacks = {
.cpg_deliver_fn = pcmk_cpg_deliver,
.cpg_confchg_fn = pcmk_cpg_membership,
};
gboolean
cluster_disconnect_cpg(void)
{
if (cpg_handle) {
cpg_finalize(cpg_handle);
cpg_handle = 0;
}
return TRUE;
}
gboolean
cluster_connect_cpg(void)
{
cs_error_t rc;
unsigned int nodeid;
int fd;
int retries = 0;
static struct mainloop_fd_callbacks cpg_fd_callbacks = {
.dispatch = pcmk_cpg_dispatch,
.destroy = cpg_connection_destroy,
};
strcpy(cpg_group.value, "pcmk");
cpg_group.length = strlen(cpg_group.value) + 1;
retries = 0;
cs_repeat(retries, 30, rc = cpg_initialize(&cpg_handle, &cpg_callbacks));
if (rc != CS_OK) {
crm_err("corosync cpg init error %d", rc);
return FALSE;
}
rc = cpg_fd_get(cpg_handle, &fd);
if (rc != CS_OK) {
crm_err("corosync cpg fd_get error %d", rc);
goto bail;
}
retries = 0;
cs_repeat(retries, 30, rc = cpg_local_get(cpg_handle, &nodeid));
if (rc != CS_OK) {
crm_err("corosync cpg local_get error %d", rc);
goto bail;
}
crm_debug("Our nodeid: %d", nodeid);
retries = 0;
cs_repeat(retries, 30, rc = cpg_join(cpg_handle, &cpg_group));
if (rc != CS_OK) {
crm_err("Could not join the CPG group '%s': %d", crm_system_name, rc);
goto bail;
}
mainloop_add_fd("corosync-cpg", G_PRIORITY_DEFAULT, fd, &cpg_handle, &cpg_fd_callbacks);
return TRUE;
bail:
cpg_finalize(cpg_handle);
return FALSE;
}
gboolean
send_cpg_message(struct iovec * iov)
{
int rc = CS_OK;
int retries = 0;
errno = 0;
do {
rc = cpg_mcast_joined(cpg_handle, CPG_TYPE_AGREED, iov, 1);
if (rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) {
cpg_flow_control_state_t fc_state = CPG_FLOW_CONTROL_DISABLED;
int rc2 = cpg_flow_control_state_get(cpg_handle, &fc_state);
if (rc2 == CS_OK && fc_state == CPG_FLOW_CONTROL_ENABLED) {
crm_debug("Attempting to clear cpg dispatch queue");
rc2 = cpg_dispatch(cpg_handle, CS_DISPATCH_ALL);
}
if (rc2 != CS_OK) {
crm_warn("Could not check/clear the cpg connection");
goto bail;
} else {
retries++;
crm_debug("Retrying operation after %ds", retries);
sleep(retries);
}
} else {
break;
}
/* 5 retires is plenty, we'll resend once the membership reforms anyway */
} while (retries < 5);
bail:
if (rc != CS_OK) {
crm_err("Sending message via cpg FAILED: (rc=%d) %s", rc, ais_error2text(rc));
}
return (rc == CS_OK);
}
/* =::=::=::= Configuration =::=::=::= */
#if HAVE_CONFDB
static int
get_config_opt(confdb_handle_t config,
hdb_handle_t object_handle, const char *key, char **value, const char *fallback)
{
size_t len = 0;
char *env_key = NULL;
const char *env_value = NULL;
char buffer[256];
if (*value) {
free(*value);
*value = NULL;
}
if (object_handle > 0) {
if (CS_OK == confdb_key_get(config, object_handle, key, strlen(key), &buffer, &len)) {
*value = strdup(buffer);
}
}
if (*value) {
crm_info("Found '%s' for option: %s", *value, key);
return 0;
}
env_key = crm_concat("HA", key, '_');
env_value = getenv(env_key);
free(env_key);
if (*value) {
crm_info("Found '%s' in ENV for option: %s", *value, key);
*value = strdup(env_value);
return 0;
}
if (fallback) {
crm_info("Defaulting to '%s' for option: %s", fallback, key);
*value = strdup(fallback);
} else {
crm_info("No default for option: %s", key);
}
return -1;
}
static confdb_handle_t
config_find_init(confdb_handle_t config)
{
cs_error_t rc = CS_OK;
confdb_handle_t local_handle = OBJECT_PARENT_HANDLE;
rc = confdb_object_find_start(config, local_handle);
if (rc == CS_OK) {
return local_handle;
} else {
crm_err("Couldn't create search context: %d", rc);
}
return 0;
}
static hdb_handle_t
config_find_next(confdb_handle_t config, const char *name, confdb_handle_t top_handle)
{
cs_error_t rc = CS_OK;
hdb_handle_t local_handle = 0;
if (top_handle == 0) {
crm_err("Couldn't search for %s: no valid context", name);
return 0;
}
crm_trace("Searching for %s in " HDB_X_FORMAT, name, top_handle);
rc = confdb_object_find(config, top_handle, name, strlen(name), &local_handle);
if (rc != CS_OK) {
crm_info("No additional configuration supplied for: %s", name);
local_handle = 0;
} else {
crm_info("Processing additional %s options...", name);
}
return local_handle;
}
#else
static int
get_config_opt(uint64_t unused, cmap_handle_t object_handle, const char *key, char **value,
const char *fallback)
{
int rc = 0, retries = 0;
cs_repeat(retries, 5, rc = cmap_get_string(object_handle, key, value));
if (rc != CS_OK) {
crm_trace("Search for %s failed %d, defaulting to %s", key, rc, fallback);
if (fallback) {
*value = strdup(fallback);
} else {
*value = NULL;
}
}
crm_trace("%s: %s", key, *value);
return rc;
}
#endif
#if HAVE_CONFDB
# define KEY_PREFIX ""
#elif HAVE_CMAP
# define KEY_PREFIX "logging."
#endif
gboolean
read_config(void)
{
int rc = CS_OK;
int retries = 0;
gboolean have_log = FALSE;
const char *const_value = NULL;
char *logging_debug = NULL;
char *logging_logfile = NULL;
char *logging_to_logfile = NULL;
char *logging_to_syslog = NULL;
char *logging_syslog_facility = NULL;
#if HAVE_CONFDB
char *value = NULL;
confdb_handle_t config;
confdb_handle_t top_handle = 0;
hdb_handle_t local_handle;
static confdb_callbacks_t callbacks = { };
do {
rc = confdb_initialize(&config, &callbacks);
if (rc != CS_OK) {
retries++;
printf("Connection setup failed: %d. Retrying in %ds\n", rc, retries);
sleep(retries);
} else {
break;
}
} while (retries < 5);
#elif HAVE_CMAP
cmap_handle_t local_handle;
uint64_t config = 0;
/* There can be only one (possibility if confdb isn't around) */
do {
rc = cmap_initialize(&local_handle);
if (rc != CS_OK) {
retries++;
printf("API connection setup failed: %s. Retrying in %ds\n", cs_strerror(rc), retries);
crm_info("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc), retries);
sleep(retries);
} else {
break;
}
} while (retries < 5);
#endif
if (rc != CS_OK) {
printf("Could not connect to Cluster Configuration Database API, error %d\n", rc);
crm_warn("Could not connect to Cluster Configuration Database API, error %d", rc);
return FALSE;
}
stack = get_cluster_type();
crm_info("Reading configure for stack: %s", name_for_cluster_type(stack));
/* =::=::= Should we be here =::=::= */
if (stack == pcmk_cluster_corosync) {
set_daemon_option("cluster_type", "corosync");
set_daemon_option("quorum_type", "corosync");
#if HAVE_CONFDB
} else if (stack == pcmk_cluster_cman) {
set_daemon_option("cluster_type", "cman");
set_daemon_option("quorum_type", "cman");
enable_crmd_as_root(TRUE);
} else if (stack == pcmk_cluster_classic_ais) {
set_daemon_option("cluster_type", "openais");
set_daemon_option("quorum_type", "pcmk");
/* Look for a service block to indicate our plugin is loaded */
top_handle = config_find_init(config);
local_handle = config_find_next(config, "service", top_handle);
while (local_handle) {
get_config_opt(config, local_handle, "name", &value, NULL);
if (safe_str_eq("pacemaker", value)) {
get_config_opt(config, local_handle, "ver", &value, "0");
if (safe_str_eq(value, "1")) {
get_config_opt(config, local_handle, "use_logd", &value, "no");
set_daemon_option("use_logd", value);
set_daemon_option("LOGD", value);
get_config_opt(config, local_handle, "use_mgmtd", &value, "no");
enable_mgmtd(crm_is_true(value));
} else {
crm_err("We can only start Pacemaker from init if using version 1"
" of the Pacemaker plugin for Corosync. Terminating.");
crm_exit(100);
}
break;
}
local_handle = config_find_next(config, "service", top_handle);
}
free(value);
#endif
} else {
crm_err("Unsupported stack type: %s", name_for_cluster_type(stack));
return FALSE;
}
#if HAVE_CONFDB
top_handle = config_find_init(config);
local_handle = config_find_next(config, "logging", top_handle);
#endif
/* =::=::= Logging =::=::= */
get_config_opt(config, local_handle, KEY_PREFIX "debug", &logging_debug, "off");
const_value = daemon_option("debugfile");
if (const_value) {
logging_to_logfile = strdup("on");
logging_logfile = strdup(const_value);
crm_trace("Using debugfile setting from the environment: %s", logging_logfile);
} else {
get_config_opt(config, local_handle, KEY_PREFIX "to_logfile", &logging_to_logfile, "off");
get_config_opt(config, local_handle, KEY_PREFIX "logfile", &logging_logfile,
"/var/log/pacemaker");
}
const_value = daemon_option("logfacility");
if (const_value) {
logging_syslog_facility = strdup(const_value);
crm_trace("Using logfacility setting from the environment: %s", logging_syslog_facility);
if (safe_str_eq(logging_syslog_facility, "none")) {
logging_to_syslog = strdup("off");
} else {
logging_to_syslog = strdup("on");
}
} else {
get_config_opt(config, local_handle, KEY_PREFIX "to_syslog", &logging_to_syslog, "on");
get_config_opt(config, local_handle, KEY_PREFIX "syslog_facility", &logging_syslog_facility,
"daemon");
}
#if HAVE_CONFDB
confdb_finalize(config);
#elif HAVE_CMAP
cmap_finalize(local_handle);
#endif
if (daemon_option("debug")) {
crm_trace("Using debug setting from the environment: %s", daemon_option("debug"));
if (get_crm_log_level() < LOG_DEBUG && daemon_option_enabled("pacemakerd", "debug")) {
set_crm_log_level(LOG_DEBUG);
}
} else if (crm_is_true(logging_debug)) {
set_daemon_option("debug", "1");
if (get_crm_log_level() < LOG_DEBUG) {
set_crm_log_level(LOG_DEBUG);
}
} else {
set_daemon_option("debug", "0");
}
if (crm_is_true(logging_to_logfile)) {
if (crm_add_logfile(logging_logfile)) {
/* What a cluster fsck, eventually we need to mandate /one/ */
set_daemon_option("debugfile", logging_logfile);
set_daemon_option("DEBUGLOG", logging_logfile);
have_log = TRUE;
} else {
crm_err("Couldn't create logfile: %s", logging_logfile);
}
}
if (have_log && crm_is_true(logging_to_syslog) == FALSE) {
qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, QB_FALSE);
free(logging_syslog_facility);
logging_syslog_facility = strdup("none");
crm_info("User configured file based logging and explicitly disabled syslog.");
} else if (crm_is_true(logging_to_syslog) == FALSE) {
crm_err("Please enable some sort of logging, either 'to_logfile: on' or 'to_syslog: on'.");
crm_err("If you use file logging, be sure to also define a value for 'logfile'");
}
set_daemon_option("logfacility", logging_syslog_facility);
+ setenv("HA_LOGFACILITY", logging_syslog_facility, 1);
free(logging_debug);
free(logging_logfile);
free(logging_to_logfile);
free(logging_to_syslog);
free(logging_syslog_facility);
return TRUE;
}
diff --git a/mcp/pacemaker.c b/mcp/pacemaker.c
index ea978510de..30a357f8ca 100644
--- a/mcp/pacemaker.c
+++ b/mcp/pacemaker.c
@@ -1,1009 +1,1008 @@
/*
* Copyright (C) 2010 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <pacemaker.h>
#include <pwd.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <crm/msg_xml.h>
#include <crm/common/ipcs.h>
#include <crm/common/mainloop.h>
#include <crm/cluster.h>
#include <dirent.h>
#include <ctype.h>
gboolean fatal_error = FALSE;
GMainLoop *mainloop = NULL;
GHashTable *peers = NULL;
#define PCMK_PROCESS_CHECK_INTERVAL 5
char *local_name = NULL;
uint32_t local_nodeid = 0;
crm_trigger_t *shutdown_trigger = NULL;
const char *pid_file = "/var/run/pacemaker.pid";
/* *INDENT-OFF* */
enum crm_proc_flag {
crm_proc_none = 0x00000001,
crm_proc_plugin = 0x00000002,
crm_proc_lrmd = 0x00000010,
crm_proc_cib = 0x00000100,
crm_proc_crmd = 0x00000200,
crm_proc_attrd = 0x00001000,
crm_proc_stonithd = 0x00002000,
crm_proc_pe = 0x00010000,
crm_proc_te = 0x00020000,
crm_proc_mgmtd = 0x00040000,
crm_proc_stonith_ng = 0x00100000,
};
/* *INDENT-ON* */
typedef struct pcmk_child_s {
int pid;
long flag;
int start_seq;
int respawn_count;
gboolean respawn;
const char *name;
const char *uid;
const char *command;
gboolean active_before_startup;
} pcmk_child_t;
/* Index into the array below */
#define pcmk_child_crmd 4
#define pcmk_child_mgmtd 8
/* *INDENT-OFF* */
static pcmk_child_t pcmk_children[] = {
{ 0, crm_proc_none, 0, 0, FALSE, "none", NULL, NULL },
{ 0, crm_proc_plugin, 0, 0, FALSE, "ais", NULL, NULL },
{ 0, crm_proc_lrmd, 3, 0, TRUE, "lrmd", NULL, CRM_DAEMON_DIR"/lrmd" },
{ 0, crm_proc_cib, 1, 0, TRUE, "cib", CRM_DAEMON_USER, CRM_DAEMON_DIR"/cib" },
{ 0, crm_proc_crmd, 6, 0, TRUE, "crmd", CRM_DAEMON_USER, CRM_DAEMON_DIR"/crmd" },
{ 0, crm_proc_attrd, 4, 0, TRUE, "attrd", CRM_DAEMON_USER, CRM_DAEMON_DIR"/attrd" },
{ 0, crm_proc_stonithd, 0, 0, TRUE, "stonithd", NULL, NULL },
{ 0, crm_proc_pe, 5, 0, TRUE, "pengine", CRM_DAEMON_USER, CRM_DAEMON_DIR"/pengine" },
{ 0, crm_proc_mgmtd, 0, 0, TRUE, "mgmtd", NULL, HB_DAEMON_DIR"/mgmtd" },
{ 0, crm_proc_stonith_ng, 2, 0, TRUE, "stonith-ng", NULL, CRM_DAEMON_DIR"/stonithd" },
};
/* *INDENT-ON* */
static gboolean start_child(pcmk_child_t * child);
static gboolean check_active_before_startup_processes(gpointer user_data);
void
enable_crmd_as_root(gboolean enable)
{
if (enable) {
pcmk_children[pcmk_child_crmd].uid = NULL;
} else {
pcmk_children[pcmk_child_crmd].uid = CRM_DAEMON_USER;
}
}
void
enable_mgmtd(gboolean enable)
{
if (enable) {
pcmk_children[pcmk_child_mgmtd].start_seq = 7;
} else {
pcmk_children[pcmk_child_mgmtd].start_seq = 0;
}
}
static uint32_t
get_process_list(void)
{
int lpc = 0;
uint32_t procs = crm_proc_plugin;
for (lpc = 0; lpc < SIZEOF(pcmk_children); lpc++) {
if (pcmk_children[lpc].pid != 0) {
procs |= pcmk_children[lpc].flag;
}
}
return procs;
}
static void
pcmk_process_exit(pcmk_child_t * child)
{
child->pid = 0;
child->active_before_startup = FALSE;
/* Broadcast the fact that one of our processes died ASAP
*
* Try to get some logging of the cause out first though
* because we're probably about to get fenced
*
* Potentially do this only if respawn_count > N
* to allow for local recovery
*/
update_node_processes(local_nodeid, NULL, get_process_list());
child->respawn_count += 1;
if (child->respawn_count > MAX_RESPAWN) {
crm_err("Child respawn count exceeded by %s", child->name);
child->respawn = FALSE;
}
if (shutdown_trigger) {
mainloop_set_trigger(shutdown_trigger);
update_node_processes(local_nodeid, NULL, get_process_list());
} else if (child->respawn) {
crm_notice("Respawning failed child process: %s", child->name);
start_child(child);
}
}
static void
pcmk_child_exit(GPid pid, gint status, gpointer user_data)
{
int exitcode = 0;
pcmk_child_t *child = user_data;
if (WIFSIGNALED(status)) {
int signo = WTERMSIG(status);
int core = WCOREDUMP(status);
crm_notice("Child process %s terminated with signal %d (pid=%d, core=%d)",
child->name, signo, child->pid, core);
} else if (WIFEXITED(status)) {
exitcode = WEXITSTATUS(status);
do_crm_log(exitcode == 0 ? LOG_INFO : LOG_ERR,
"Child process %s exited (pid=%d, rc=%d)", child->name, child->pid, exitcode);
}
if (exitcode == 100) {
crm_warn("Pacemaker child process %s no longer wishes to be respawned. "
"Shutting ourselves down.", child->name);
child->respawn = FALSE;
fatal_error = TRUE;
pcmk_shutdown(15);
}
pcmk_process_exit(child);
}
static gboolean
stop_child(pcmk_child_t * child, int signal)
{
if (signal == 0) {
signal = SIGTERM;
}
if (child->command == NULL) {
crm_debug("Nothing to do for child \"%s\"", child->name);
return TRUE;
}
if (child->pid <= 0) {
crm_trace("Client %s not running", child->name);
return TRUE;
}
errno = 0;
if (kill(child->pid, signal) == 0) {
crm_notice("Stopping %s: Sent -%d to process %d", child->name, signal, child->pid);
} else {
crm_perror(LOG_ERR, "Stopping %s: Could not send -%d to process %d failed",
child->name, signal, child->pid);
}
return TRUE;
}
static char *opts_default[] = { NULL, NULL };
static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL };
static gboolean
start_child(pcmk_child_t * child)
{
int lpc = 0;
uid_t uid = 0;
struct rlimit oflimits;
gboolean use_valgrind = FALSE;
gboolean use_callgrind = FALSE;
const char *devnull = "/dev/null";
const char *env_valgrind = getenv("PCMK_valgrind_enabled");
const char *env_callgrind = getenv("PCMK_callgrind_enabled");
child->active_before_startup = FALSE;
if (child->command == NULL) {
crm_info("Nothing to do for child \"%s\"", child->name);
return TRUE;
}
if (env_callgrind != NULL && crm_is_true(env_callgrind)) {
use_callgrind = TRUE;
use_valgrind = TRUE;
} else if (env_callgrind != NULL && strstr(env_callgrind, child->name)) {
use_callgrind = TRUE;
use_valgrind = TRUE;
} else if (env_valgrind != NULL && crm_is_true(env_valgrind)) {
use_valgrind = TRUE;
} else if (env_valgrind != NULL && strstr(env_valgrind, child->name)) {
use_valgrind = TRUE;
}
if (use_valgrind && strlen(VALGRIND_BIN) == 0) {
crm_warn("Cannot enable valgrind for %s:"
" The location of the valgrind binary is unknown", child->name);
use_valgrind = FALSE;
}
child->pid = fork();
CRM_ASSERT(child->pid != -1);
if (child->pid > 0) {
/* parent */
g_child_watch_add(child->pid, pcmk_child_exit, child);
crm_info("Forked child %d for process %s%s", child->pid, child->name,
use_valgrind ? " (valgrind enabled: " VALGRIND_BIN ")" : "");
update_node_processes(local_nodeid, NULL, get_process_list());
return TRUE;
} else {
/* Start a new session */
(void)setsid();
/* Setup the two alternate arg arrarys */
opts_vgrind[0] = strdup(VALGRIND_BIN);
if (use_callgrind) {
opts_vgrind[1] = strdup("--tool=callgrind");
opts_vgrind[2] = strdup("--callgrind-out-file=" CRM_STATE_DIR "/callgrind.out.%p");
opts_vgrind[3] = strdup(child->command);
opts_vgrind[4] = NULL;
} else {
opts_vgrind[1] = strdup(child->command);
opts_vgrind[2] = NULL;
opts_vgrind[3] = NULL;
opts_vgrind[4] = NULL;
}
opts_default[0] = strdup(child->command);;
#if 0
/* Dont set the group for now - it prevents connection to the cluster */
if (gid && setgid(gid) < 0) {
crm_perror("Could not set group to %d", gid);
}
#endif
if (child->uid) {
if (crm_user_lookup(child->uid, &uid, NULL) < 0) {
crm_err("Invalid uid (%s) specified for %s", child->uid, child->name);
return TRUE;
}
}
if (uid && setuid(uid) < 0) {
crm_perror(LOG_ERR, "Could not set user to %d (%s)", uid, child->uid);
}
/* Close all open file descriptors */
getrlimit(RLIMIT_NOFILE, &oflimits);
for (lpc = 0; lpc < oflimits.rlim_cur; lpc++) {
close(lpc);
}
(void)open(devnull, O_RDONLY); /* Stdin: fd 0 */
(void)open(devnull, O_WRONLY); /* Stdout: fd 1 */
(void)open(devnull, O_WRONLY); /* Stderr: fd 2 */
if (use_valgrind) {
(void)execvp(VALGRIND_BIN, opts_vgrind);
} else {
(void)execvp(child->command, opts_default);
}
crm_perror(LOG_ERR, "FATAL: Cannot exec %s", child->command);
crm_exit(100);
}
return TRUE; /* never reached */
}
static gboolean
escalate_shutdown(gpointer data)
{
pcmk_child_t *child = data;
if (child->pid) {
/* Use SIGSEGV instead of SIGKILL to create a core so we can see what it was up to */
crm_err("Child %s not terminating in a timely manner, forcing", child->name);
stop_child(child, SIGSEGV);
}
return FALSE;
}
static gboolean
pcmk_shutdown_worker(gpointer user_data)
{
static int phase = 0;
static time_t next_log = 0;
static int max = SIZEOF(pcmk_children);
int lpc = 0;
if (phase == 0) {
crm_notice("Shuting down Pacemaker");
phase = max;
/* Add a second, more frequent, check to speed up shutdown */
g_timeout_add_seconds(5, check_active_before_startup_processes, NULL);
}
for (; phase > 0; phase--) {
/* dont stop anything with start_seq < 1 */
for (lpc = max - 1; lpc >= 0; lpc--) {
pcmk_child_t *child = &(pcmk_children[lpc]);
if (phase != child->start_seq) {
continue;
}
if (child->pid) {
time_t now = time(NULL);
if (child->respawn) {
next_log = now + 30;
child->respawn = FALSE;
stop_child(child, SIGTERM);
if (phase < pcmk_children[pcmk_child_crmd].start_seq) {
g_timeout_add(180000 /* 3m */ , escalate_shutdown, child);
}
} else if (now >= next_log) {
next_log = now + 30;
crm_notice("Still waiting for %s (pid=%d, seq=%d) to terminate...",
child->name, child->pid, child->start_seq);
}
return TRUE;
}
/* cleanup */
crm_debug("%s confirmed stopped", child->name);
child->pid = 0;
}
}
/* send_cluster_id(); */
crm_notice("Shutdown complete");
g_main_loop_quit(mainloop);
if (fatal_error) {
crm_notice("Attempting to inhibit respawning after fatal error");
crm_exit(100);
}
return TRUE;
}
void
pcmk_shutdown(int nsig)
{
if (shutdown_trigger == NULL) {
shutdown_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, pcmk_shutdown_worker, NULL);
}
mainloop_set_trigger(shutdown_trigger);
}
static void
build_path(const char *path_c, mode_t mode)
{
int offset = 1, len = 0;
char *path = strdup(path_c);
CRM_CHECK(path != NULL, return);
for (len = strlen(path); offset < len; offset++) {
if (path[offset] == '/') {
path[offset] = 0;
if (mkdir(path, mode) < 0 && errno != EEXIST) {
crm_perror(LOG_ERR, "Could not create directory '%s'", path);
break;
}
path[offset] = '/';
}
}
if (mkdir(path, mode) < 0 && errno != EEXIST) {
crm_perror(LOG_ERR, "Could not create directory '%s'", path);
}
free(path);
}
static int32_t
pcmk_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
crm_trace("Connection %p", c);
if (crm_client_new(c, uid, gid) == NULL) {
return -EIO;
}
return 0;
}
static void
pcmk_ipc_created(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p", c);
}
/* Exit code means? */
static int32_t
pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
const char *task = NULL;
crm_client_t *c = crm_client_get(qbc);
xmlNode *msg = crm_ipcs_recv(c, data, size, &id, &flags);
if (flags & crm_ipc_client_response) {
crm_ipcs_send_ack(c, id, "ack", __FUNCTION__, __LINE__);
}
if (msg == NULL) {
return 0;
}
task = crm_element_value(msg, F_CRM_TASK);
if (crm_str_eq(task, CRM_OP_QUIT, TRUE)) {
/* Time to quit */
crm_notice("Shutting down in responce to ticket %s (%s)",
crm_element_value(msg, F_CRM_REFERENCE), crm_element_value(msg, F_CRM_ORIGIN));
pcmk_shutdown(15);
} else {
/* Just send to everyone */
update_process_clients();
}
free_xml(msg);
return 0;
}
/* Error code means? */
static int32_t
pcmk_ipc_closed(qb_ipcs_connection_t * c)
{
crm_client_t *client = crm_client_get(c);
crm_trace("Connection %p", c);
crm_client_destroy(client);
return 0;
}
static void
pcmk_ipc_destroy(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p", c);
}
struct qb_ipcs_service_handlers ipc_callbacks = {
.connection_accept = pcmk_ipc_accept,
.connection_created = pcmk_ipc_created,
.msg_process = pcmk_ipc_dispatch,
.connection_closed = pcmk_ipc_closed,
.connection_destroyed = pcmk_ipc_destroy
};
static void
ghash_send_proc_details(gpointer key, gpointer value, gpointer data)
{
crm_ipcs_send(value, 0, data, TRUE);
}
static void
peer_loop_fn(gpointer key, gpointer value, gpointer user_data)
{
pcmk_peer_t *node = value;
xmlNode *update = user_data;
xmlNode *xml = create_xml_node(update, "node");
crm_xml_add_int(xml, "id", node->id);
crm_xml_add(xml, "uname", node->uname);
crm_xml_add_int(xml, "processes", node->processes);
}
void
update_process_clients(void)
{
xmlNode *update = create_xml_node(NULL, "nodes");
crm_trace("Sending process list to %d children", crm_hash_table_size(client_connections));
g_hash_table_foreach(peers, peer_loop_fn, update);
g_hash_table_foreach(client_connections, ghash_send_proc_details, update);
free_xml(update);
}
void
update_process_peers(void)
{
char buffer[1024];
struct iovec iov;
int rc = 0;
memset(buffer, 0, SIZEOF(buffer));
if (local_name) {
rc = snprintf(buffer, SIZEOF(buffer) - 1, "<node uname=\"%s\" proclist=\"%u\"/>",
local_name, get_process_list());
} else {
rc = snprintf(buffer, SIZEOF(buffer) - 1, "<node proclist=\"%u\"/>", get_process_list());
}
iov.iov_base = buffer;
iov.iov_len = rc + 1;
crm_trace("Sending %s", buffer);
send_cpg_message(&iov);
}
gboolean
update_node_processes(uint32_t id, const char *uname, uint32_t procs)
{
gboolean changed = FALSE;
pcmk_peer_t *node = g_hash_table_lookup(peers, GUINT_TO_POINTER(id));
if (node == NULL) {
changed = TRUE;
node = calloc(1, sizeof(pcmk_peer_t));
node->id = id;
g_hash_table_insert(peers, GUINT_TO_POINTER(id), node);
node = g_hash_table_lookup(peers, GUINT_TO_POINTER(id));
CRM_ASSERT(node != NULL);
}
if (uname != NULL) {
if (node->uname == NULL || safe_str_eq(node->uname, uname) == FALSE) {
int lpc, len = strlen(uname);
crm_notice("%p Node %u now known as %s%s%s", node, id, uname,
node->uname ? node->uname : ", was: ", node->uname ? node->uname : "");
free(node->uname);
node->uname = strdup(uname);
changed = TRUE;
for (lpc = 0; lpc < len; lpc++) {
if (uname[lpc] >= 'A' && uname[lpc] <= 'Z') {
crm_warn
("Node names with capitals are discouraged, consider changing '%s' to something else",
uname);
break;
}
}
}
} else {
crm_trace("Empty uname for node %u", id);
}
if (procs != 0) {
if (procs != node->processes) {
crm_debug("Node %s now has process list: %.32x (was %.32x)",
node->uname, procs, node->processes);
node->processes = procs;
changed = TRUE;
} else {
crm_trace("Node %s still has process list: %.32x", node->uname, procs);
}
}
if (changed && id == local_nodeid) {
update_process_clients();
update_process_peers();
}
return changed;
}
/* *INDENT-OFF* */
static struct crm_option long_options[] = {
/* Top-level Options */
{"help", 0, 0, '?', "\tThis text"},
{"version", 0, 0, '$', "\tVersion information" },
{"verbose", 0, 0, 'V', "\tIncrease debug output"},
{"shutdown", 0, 0, 'S', "\tInstruct Pacemaker to shutdown on this machine"},
{"features", 0, 0, 'F', "\tDisplay the full version and list of features Pacemaker was built with"},
{"-spacer-", 1, 0, '-', "\nAdditional Options:"},
{"foreground", 0, 0, 'f', "\tRun in the foreground instead of as a daemon"},
{"pid-file", 1, 0, 'p', "\t(Advanced) Daemon pid file location"},
{NULL, 0, 0, 0}
};
/* *INDENT-ON* */
static void
mcp_chown(const char *path, uid_t uid, gid_t gid)
{
int rc = chown(path, uid, gid);
if (rc < 0) {
crm_warn("Cannot change the ownership of %s to user %s and gid %d: %s",
path, CRM_DAEMON_USER, gid, pcmk_strerror(errno));
}
}
static gboolean
check_active_before_startup_processes(gpointer user_data)
{
int start_seq = 1, lpc = 0;
static int max = SIZEOF(pcmk_children);
gboolean keep_tracking = FALSE;
for (start_seq = 1; start_seq < max; start_seq++) {
for (lpc = 0; lpc < max; lpc++) {
if (pcmk_children[lpc].active_before_startup == FALSE) {
/* we are already tracking it as a child process. */
continue;
} else if (start_seq != pcmk_children[lpc].start_seq) {
continue;
} else if (crm_pid_active(pcmk_children[lpc].pid) != 1) {
crm_notice("Process %s terminated (pid=%d)",
pcmk_children[lpc].name, pcmk_children[lpc].pid);
pcmk_process_exit(&(pcmk_children[lpc]));
continue;
}
/* at least one of the processes found at startup
* is still going, so keep this recurring timer around */
keep_tracking = TRUE;
}
}
return keep_tracking;
}
static void
find_and_track_existing_processes(void)
{
DIR *dp;
struct dirent *entry;
struct stat statbuf;
int start_tracker = 0;
dp = opendir("/proc");
if (!dp) {
/* no proc directory to search through */
crm_notice("Can not read /proc directory to track existing components");
return;
}
while ((entry = readdir(dp)) != NULL) {
char procpath[128];
char value[64];
char key[16];
FILE *file;
int pid;
int max = SIZEOF(pcmk_children);
int i;
strcpy(procpath, "/proc/");
/* strlen("/proc/") + strlen("/status") + 1 = 14
* 128 - 14 = 114 */
strncat(procpath, entry->d_name, 114);
if (lstat(procpath, &statbuf)) {
continue;
}
if (!S_ISDIR(statbuf.st_mode) || !isdigit(entry->d_name[0])) {
continue;
}
strcat(procpath, "/status");
file = fopen(procpath, "r");
if (!file) {
continue;
}
if (fscanf(file, "%15s%63s", key, value) != 2) {
fclose(file);
continue;
}
fclose(file);
pid = atoi(entry->d_name);
if (pid <= 0) {
continue;
}
for (i = 0; i < max; i++) {
const char *name = pcmk_children[i].name;
if (pcmk_children[i].start_seq == 0) {
continue;
}
if (pcmk_children[i].flag == crm_proc_stonith_ng) {
name = "stonithd";
}
if (safe_str_eq(name, value)) {
if (crm_pid_active(pid) != 1) {
continue;
}
crm_notice("Tracking existing %s process (pid=%d)", value, pid);
pcmk_children[i].pid = pid;
pcmk_children[i].active_before_startup = TRUE;
start_tracker = 1;
}
}
}
if (start_tracker) {
g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL, check_active_before_startup_processes,
NULL);
}
closedir(dp);
}
static void
init_children_processes(void)
{
int start_seq = 1, lpc = 0;
static int max = SIZEOF(pcmk_children);
/* start any children that have not been detected */
for (start_seq = 1; start_seq < max; start_seq++) {
/* dont start anything with start_seq < 1 */
for (lpc = 0; lpc < max; lpc++) {
if (pcmk_children[lpc].pid) {
/* we are already tracking it */
continue;
}
if (start_seq == pcmk_children[lpc].start_seq) {
start_child(&(pcmk_children[lpc]));
}
}
}
}
int
main(int argc, char **argv)
{
int rc;
int flag;
int argerr = 0;
int option_index = 0;
gboolean shutdown = FALSE;
uid_t pcmk_uid = 0;
gid_t pcmk_gid = 0;
struct rlimit cores;
crm_ipc_t *old_instance = NULL;
qb_ipcs_service_t *ipcs = NULL;
const char *facility = daemon_option("logfacility");
setenv("LC_ALL", "C", 1);
- setenv("HA_LOGFACILITY", facility, 1);
setenv("HA_LOGD", "no", 1);
set_daemon_option("mcp", "true");
set_daemon_option("use_logd", "off");
crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
crm_set_options(NULL, "mode [options]", long_options, "Start/Stop Pacemaker\n");
/* Restore the original facility so that read_config() does the right thing */
set_daemon_option("logfacility", facility);
while (1) {
flag = crm_get_option(argc, argv, &option_index);
if (flag == -1)
break;
switch (flag) {
case 'V':
crm_bump_log_level(argc, argv);
break;
case 'f':
/* Legacy */
break;
case 'p':
pid_file = optarg;
break;
case '$':
case '?':
crm_help(flag, EX_OK);
break;
case 'S':
shutdown = TRUE;
break;
case 'F':
printf("Pacemaker %s (Build: %s)\n Supporting: %s\n", VERSION, BUILD_VERSION,
CRM_FEATURES);
crm_exit(0);
default:
printf("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag);
++argerr;
break;
}
}
if (optind < argc) {
printf("non-option ARGV-elements: ");
while (optind < argc)
printf("%s ", argv[optind++]);
printf("\n");
}
if (argerr) {
crm_help('?', EX_USAGE);
}
crm_debug("Checking for old instances of %s", CRM_SYSTEM_MCP);
old_instance = crm_ipc_new(CRM_SYSTEM_MCP, 0);
crm_ipc_connect(old_instance);
if (shutdown) {
crm_debug("Terminating previous instance");
while (crm_ipc_connected(old_instance)) {
xmlNode *cmd =
create_request(CRM_OP_QUIT, NULL, NULL, CRM_SYSTEM_MCP, CRM_SYSTEM_MCP, NULL);
crm_debug(".");
crm_ipc_send(old_instance, cmd, 0, 0, NULL);
free_xml(cmd);
sleep(2);
}
crm_ipc_close(old_instance);
crm_ipc_destroy(old_instance);
crm_exit(0);
} else if (crm_ipc_connected(old_instance)) {
crm_ipc_close(old_instance);
crm_ipc_destroy(old_instance);
crm_err("Pacemaker is already active, aborting startup");
crm_exit(100);
}
crm_ipc_close(old_instance);
crm_ipc_destroy(old_instance);
if (read_config() == FALSE) {
crm_notice("Could not obtain corosync config data, exiting");
crm_exit(1);
}
crm_notice("Starting Pacemaker %s (Build: %s): %s", VERSION, BUILD_VERSION, CRM_FEATURES);
mainloop = g_main_new(FALSE);
rc = getrlimit(RLIMIT_CORE, &cores);
if (rc < 0) {
crm_perror(LOG_ERR, "Cannot determine current maximum core size.");
} else {
if (cores.rlim_max == 0 && geteuid() == 0) {
cores.rlim_max = RLIM_INFINITY;
} else {
crm_info("Maximum core file size is: %lu", (unsigned long)cores.rlim_max);
}
cores.rlim_cur = cores.rlim_max;
rc = setrlimit(RLIMIT_CORE, &cores);
if (rc < 0) {
crm_perror(LOG_ERR,
"Core file generation will remain disabled."
" Core files are an important diagnositic tool,"
" please consider enabling them by default.");
}
#if 0
/* system() is not thread-safe, can't call from here
* Actually, its a pretty hacky way to try and achieve this anyway
*/
if (system("echo 1 > /proc/sys/kernel/core_uses_pid") != 0) {
crm_perror(LOG_ERR, "Could not enable /proc/sys/kernel/core_uses_pid");
}
#endif
}
if (crm_user_lookup(CRM_DAEMON_USER, &pcmk_uid, &pcmk_gid) < 0) {
crm_err("Cluster user %s does not exist, aborting Pacemaker startup", CRM_DAEMON_USER);
crm_exit(1);
}
mkdir(CRM_STATE_DIR, 0750);
mcp_chown(CRM_STATE_DIR, pcmk_uid, pcmk_gid);
/* Used by stonithd */
build_path(HA_STATE_DIR "/heartbeat", 0755);
mcp_chown(HA_STATE_DIR "/heartbeat", pcmk_uid, pcmk_gid);
/* Used by RAs - Leave owned by root */
build_path(CRM_RSCTMP_DIR, 0755);
/* Used to store core files in */
build_path(CRM_CORE_DIR, 0755);
mcp_chown(CRM_CORE_DIR, pcmk_uid, pcmk_gid);
/* Used to store blackbox dumps in */
build_path(CRM_BLACKBOX_DIR, 0755);
mcp_chown(CRM_BLACKBOX_DIR, pcmk_uid, pcmk_gid);
/* Used to store policy engine inputs in */
build_path(PE_STATE_DIR, 0755);
mcp_chown(PE_STATE_DIR, pcmk_uid, pcmk_gid);
/* Used to store the cluster configuration */
build_path(CRM_CONFIG_DIR, 0755);
mcp_chown(CRM_CONFIG_DIR, pcmk_uid, pcmk_gid);
peers = g_hash_table_new(g_direct_hash, g_direct_equal);
ipcs = mainloop_add_ipc_server(CRM_SYSTEM_MCP, QB_IPC_NATIVE, &ipc_callbacks);
if (ipcs == NULL) {
crm_err("Couldn't start IPC server");
crm_exit(1);
}
if (cluster_connect_cfg(&local_nodeid) == FALSE) {
crm_err("Couldn't connect to Corosync's CFG service");
crm_exit(1);
}
if (cluster_connect_cpg() == FALSE) {
crm_err("Couldn't connect to Corosync's CPG service");
crm_exit(1);
}
local_name = get_local_node_name();
update_node_processes(local_nodeid, local_name, get_process_list());
mainloop_add_signal(SIGTERM, pcmk_shutdown);
mainloop_add_signal(SIGINT, pcmk_shutdown);
find_and_track_existing_processes();
init_children_processes();
crm_info("Starting mainloop");
g_main_run(mainloop);
if (ipcs) {
crm_trace("Closing IPC server");
mainloop_del_ipc_server(ipcs);
ipcs = NULL;
}
g_main_destroy(mainloop);
cluster_disconnect_cpg();
cluster_disconnect_cfg();
crm_info("Exiting %s", crm_system_name);
crm_exit(0);
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Tue, Jul 8, 6:11 PM (17 h, 27 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1980669
Default Alt Text
(48 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment