Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/mcp/corosync.c b/mcp/corosync.c
index 28a7ff7603..adc57cc54d 100644
--- a/mcp/corosync.c
+++ b/mcp/corosync.c
@@ -1,634 +1,635 @@
/*
* Copyright (C) 2010 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <pacemaker.h>
#include <sys/utsname.h>
#include <sys/stat.h> /* for calls to stat() */
#include <libgen.h> /* For basename() and dirname() */
#include <sys/types.h>
#include <pwd.h> /* For getpwname() */
#include <corosync/hdb.h>
#include <corosync/cfg.h>
#include <corosync/cpg.h>
#if HAVE_CONFDB
# include <corosync/confdb.h>
#endif
#include <crm/cluster/internal.h>
#include <crm/common/mainloop.h>
#if SUPPORT_CMAN
# include <libcman.h>
#endif
#if HAVE_CMAP
# include <corosync/cmap.h>
#endif
static struct cpg_name cpg_group = {
.length = 0,
.value[0] = 0,
};
enum cluster_type_e stack = pcmk_cluster_unknown;
static cpg_handle_t cpg_handle;
static corosync_cfg_handle_t cfg_handle;
/* =::=::=::= CFG - Shutdown stuff =::=::=::= */
static void
cfg_shutdown_callback(corosync_cfg_handle_t h, corosync_cfg_shutdown_flags_t flags)
{
crm_info("Corosync wants to shut down: %s",
(flags == COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE) ? "immediate" :
(flags == COROSYNC_CFG_SHUTDOWN_FLAG_REGARDLESS) ? "forced" : "optional");
/* Never allow corosync to shut down while we're running */
corosync_cfg_replyto_shutdown(h, COROSYNC_CFG_SHUTDOWN_FLAG_NO);
}
static corosync_cfg_callbacks_t cfg_callbacks = {
.corosync_cfg_shutdown_callback = cfg_shutdown_callback,
};
static int
pcmk_cfg_dispatch(gpointer user_data)
{
corosync_cfg_handle_t *handle = (corosync_cfg_handle_t *) user_data;
cs_error_t rc = corosync_cfg_dispatch(*handle, CS_DISPATCH_ALL);
if (rc != CS_OK) {
return -1;
}
return 0;
}
static void
cfg_connection_destroy(gpointer user_data)
{
crm_err("Connection destroyed");
cfg_handle = 0;
pcmk_shutdown(SIGTERM);
}
gboolean
cluster_disconnect_cfg(void)
{
if (cfg_handle) {
corosync_cfg_finalize(cfg_handle);
cfg_handle = 0;
}
pcmk_shutdown(SIGTERM);
return TRUE;
}
#define cs_repeat(counter, max, code) do { \
code; \
if(rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { \
counter++; \
crm_debug("Retrying operation after %ds", counter); \
sleep(counter); \
} else { \
break; \
} \
} while(counter < max)
gboolean
cluster_connect_cfg(uint32_t * nodeid)
{
cs_error_t rc;
int fd = 0, retries = 0;
static struct mainloop_fd_callbacks cfg_fd_callbacks = {
.dispatch = pcmk_cfg_dispatch,
.destroy = cfg_connection_destroy,
};
cs_repeat(retries, 30, rc = corosync_cfg_initialize(&cfg_handle, &cfg_callbacks));
if (rc != CS_OK) {
crm_err("corosync cfg init error %d", rc);
return FALSE;
}
rc = corosync_cfg_fd_get(cfg_handle, &fd);
if (rc != CS_OK) {
crm_err("corosync cfg fd_get error %d", rc);
goto bail;
}
retries = 0;
cs_repeat(retries, 30, rc = corosync_cfg_local_get(cfg_handle, nodeid));
if (rc != CS_OK) {
crm_err("corosync cfg local_get error %d", rc);
goto bail;
}
crm_debug("Our nodeid: %d", *nodeid);
mainloop_add_fd("corosync-cfg", G_PRIORITY_DEFAULT, fd, &cfg_handle, &cfg_fd_callbacks);
return TRUE;
bail:
corosync_cfg_finalize(cfg_handle);
return FALSE;
}
/* =::=::=::= CPG - Closed Process Group Messaging =::=::=::= */
static int
pcmk_cpg_dispatch(gpointer user_data)
{
cpg_handle_t *handle = (cpg_handle_t *) user_data;
cs_error_t rc = cpg_dispatch(*handle, CS_DISPATCH_ALL);
if (rc != CS_OK) {
return -1;
}
return 0;
}
static void
cpg_connection_destroy(gpointer user_data)
{
crm_err("Connection destroyed");
cpg_handle = 0;
crm_exit(1);
}
static void
pcmk_cpg_deliver(cpg_handle_t handle,
const struct cpg_name *groupName,
uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
{
if (nodeid != local_nodeid) {
uint32_t procs = 0;
xmlNode *xml = string2xml(msg);
const char *uname = crm_element_value(xml, "uname");
crm_element_value_int(xml, "proclist", (int *)&procs);
/* crm_debug("Got proclist %.32x from %s", procs, uname); */
if (update_node_processes(nodeid, uname, procs)) {
update_process_clients();
}
}
}
static void
pcmk_cpg_membership(cpg_handle_t handle,
const struct cpg_name *groupName,
const struct cpg_address *member_list, size_t member_list_entries,
const struct cpg_address *left_list, size_t left_list_entries,
const struct cpg_address *joined_list, size_t joined_list_entries)
{
/* Don't care about CPG membership */
update_process_peers();
}
cpg_callbacks_t cpg_callbacks = {
.cpg_deliver_fn = pcmk_cpg_deliver,
.cpg_confchg_fn = pcmk_cpg_membership,
};
gboolean
cluster_disconnect_cpg(void)
{
if (cpg_handle) {
cpg_finalize(cpg_handle);
cpg_handle = 0;
}
return TRUE;
}
gboolean
cluster_connect_cpg(void)
{
cs_error_t rc;
unsigned int nodeid;
int fd;
int retries = 0;
static struct mainloop_fd_callbacks cpg_fd_callbacks = {
.dispatch = pcmk_cpg_dispatch,
.destroy = cpg_connection_destroy,
};
strcpy(cpg_group.value, "pcmk");
cpg_group.length = strlen(cpg_group.value) + 1;
retries = 0;
cs_repeat(retries, 30, rc = cpg_initialize(&cpg_handle, &cpg_callbacks));
if (rc != CS_OK) {
crm_err("corosync cpg init error %d", rc);
return FALSE;
}
rc = cpg_fd_get(cpg_handle, &fd);
if (rc != CS_OK) {
crm_err("corosync cpg fd_get error %d", rc);
goto bail;
}
retries = 0;
cs_repeat(retries, 30, rc = cpg_local_get(cpg_handle, &nodeid));
if (rc != CS_OK) {
crm_err("corosync cpg local_get error %d", rc);
goto bail;
}
crm_debug("Our nodeid: %d", nodeid);
retries = 0;
cs_repeat(retries, 30, rc = cpg_join(cpg_handle, &cpg_group));
if (rc != CS_OK) {
crm_err("Could not join the CPG group '%s': %d", crm_system_name, rc);
goto bail;
}
mainloop_add_fd("corosync-cpg", G_PRIORITY_DEFAULT, fd, &cpg_handle, &cpg_fd_callbacks);
return TRUE;
bail:
cpg_finalize(cpg_handle);
return FALSE;
}
gboolean
send_cpg_message(struct iovec * iov)
{
int rc = CS_OK;
int retries = 0;
errno = 0;
do {
rc = cpg_mcast_joined(cpg_handle, CPG_TYPE_AGREED, iov, 1);
if (rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) {
cpg_flow_control_state_t fc_state = CPG_FLOW_CONTROL_DISABLED;
int rc2 = cpg_flow_control_state_get(cpg_handle, &fc_state);
if (rc2 == CS_OK && fc_state == CPG_FLOW_CONTROL_ENABLED) {
crm_debug("Attempting to clear cpg dispatch queue");
rc2 = cpg_dispatch(cpg_handle, CS_DISPATCH_ALL);
}
if (rc2 != CS_OK) {
crm_warn("Could not check/clear the cpg connection");
goto bail;
} else {
retries++;
crm_debug("Retrying operation after %ds", retries);
sleep(retries);
}
} else {
break;
}
/* 5 retires is plenty, we'll resend once the membership reforms anyway */
} while (retries < 5);
bail:
if (rc != CS_OK) {
crm_err("Sending message via cpg FAILED: (rc=%d) %s", rc, ais_error2text(rc));
}
return (rc == CS_OK);
}
/* =::=::=::= Configuration =::=::=::= */
#if HAVE_CONFDB
static int
get_config_opt(confdb_handle_t config,
hdb_handle_t object_handle, const char *key, char **value, const char *fallback)
{
size_t len = 0;
char *env_key = NULL;
const char *env_value = NULL;
char buffer[256];
if (*value) {
free(*value);
*value = NULL;
}
if (object_handle > 0) {
if (CS_OK == confdb_key_get(config, object_handle, key, strlen(key), &buffer, &len)) {
*value = strdup(buffer);
}
}
if (*value) {
crm_info("Found '%s' for option: %s", *value, key);
return 0;
}
env_key = crm_concat("HA", key, '_');
env_value = getenv(env_key);
free(env_key);
if (*value) {
crm_info("Found '%s' in ENV for option: %s", *value, key);
*value = strdup(env_value);
return 0;
}
if (fallback) {
crm_info("Defaulting to '%s' for option: %s", fallback, key);
*value = strdup(fallback);
} else {
crm_info("No default for option: %s", key);
}
return -1;
}
static confdb_handle_t
config_find_init(confdb_handle_t config)
{
cs_error_t rc = CS_OK;
confdb_handle_t local_handle = OBJECT_PARENT_HANDLE;
rc = confdb_object_find_start(config, local_handle);
if (rc == CS_OK) {
return local_handle;
} else {
crm_err("Couldn't create search context: %d", rc);
}
return 0;
}
static hdb_handle_t
config_find_next(confdb_handle_t config, const char *name, confdb_handle_t top_handle)
{
cs_error_t rc = CS_OK;
hdb_handle_t local_handle = 0;
if (top_handle == 0) {
crm_err("Couldn't search for %s: no valid context", name);
return 0;
}
crm_trace("Searching for %s in " HDB_X_FORMAT, name, top_handle);
rc = confdb_object_find(config, top_handle, name, strlen(name), &local_handle);
if (rc != CS_OK) {
crm_info("No additional configuration supplied for: %s", name);
local_handle = 0;
} else {
crm_info("Processing additional %s options...", name);
}
return local_handle;
}
#else
static int
get_config_opt(uint64_t unused, cmap_handle_t object_handle, const char *key, char **value,
const char *fallback)
{
int rc = 0, retries = 0;
cs_repeat(retries, 5, rc = cmap_get_string(object_handle, key, value));
if (rc != CS_OK) {
crm_trace("Search for %s failed %d, defaulting to %s", key, rc, fallback);
if (fallback) {
*value = strdup(fallback);
} else {
*value = NULL;
}
}
crm_trace("%s: %s", key, *value);
return rc;
}
#endif
#if HAVE_CONFDB
# define KEY_PREFIX ""
#elif HAVE_CMAP
# define KEY_PREFIX "logging."
#endif
gboolean
read_config(void)
{
int rc = CS_OK;
int retries = 0;
gboolean have_log = FALSE;
const char *const_value = NULL;
char *logging_debug = NULL;
char *logging_logfile = NULL;
char *logging_to_logfile = NULL;
char *logging_to_syslog = NULL;
char *logging_syslog_facility = NULL;
#if HAVE_CONFDB
char *value = NULL;
confdb_handle_t config;
confdb_handle_t top_handle = 0;
hdb_handle_t local_handle;
static confdb_callbacks_t callbacks = { };
do {
rc = confdb_initialize(&config, &callbacks);
if (rc != CS_OK) {
retries++;
printf("Connection setup failed: %d. Retrying in %ds\n", rc, retries);
sleep(retries);
} else {
break;
}
} while (retries < 5);
#elif HAVE_CMAP
cmap_handle_t local_handle;
uint64_t config = 0;
/* There can be only one (possibility if confdb isn't around) */
do {
rc = cmap_initialize(&local_handle);
if (rc != CS_OK) {
retries++;
printf("API connection setup failed: %s. Retrying in %ds\n", cs_strerror(rc), retries);
crm_info("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc), retries);
sleep(retries);
} else {
break;
}
} while (retries < 5);
#endif
if (rc != CS_OK) {
printf("Could not connect to Cluster Configuration Database API, error %d\n", rc);
crm_warn("Could not connect to Cluster Configuration Database API, error %d", rc);
return FALSE;
}
stack = get_cluster_type();
crm_info("Reading configure for stack: %s", name_for_cluster_type(stack));
/* =::=::= Should we be here =::=::= */
if (stack == pcmk_cluster_corosync) {
set_daemon_option("cluster_type", "corosync");
set_daemon_option("quorum_type", "corosync");
#if HAVE_CONFDB
} else if (stack == pcmk_cluster_cman) {
set_daemon_option("cluster_type", "cman");
set_daemon_option("quorum_type", "cman");
enable_crmd_as_root(TRUE);
} else if (stack == pcmk_cluster_classic_ais) {
set_daemon_option("cluster_type", "openais");
set_daemon_option("quorum_type", "pcmk");
/* Look for a service block to indicate our plugin is loaded */
top_handle = config_find_init(config);
local_handle = config_find_next(config, "service", top_handle);
while (local_handle) {
get_config_opt(config, local_handle, "name", &value, NULL);
if (safe_str_eq("pacemaker", value)) {
get_config_opt(config, local_handle, "ver", &value, "0");
if (safe_str_eq(value, "1")) {
get_config_opt(config, local_handle, "use_logd", &value, "no");
set_daemon_option("use_logd", value);
set_daemon_option("LOGD", value);
get_config_opt(config, local_handle, "use_mgmtd", &value, "no");
enable_mgmtd(crm_is_true(value));
} else {
crm_err("We can only start Pacemaker from init if using version 1"
" of the Pacemaker plugin for Corosync. Terminating.");
crm_exit(100);
}
break;
}
local_handle = config_find_next(config, "service", top_handle);
}
free(value);
#endif
} else {
crm_err("Unsupported stack type: %s", name_for_cluster_type(stack));
return FALSE;
}
#if HAVE_CONFDB
top_handle = config_find_init(config);
local_handle = config_find_next(config, "logging", top_handle);
#endif
/* =::=::= Logging =::=::= */
get_config_opt(config, local_handle, KEY_PREFIX "debug", &logging_debug, "off");
const_value = daemon_option("debugfile");
if (const_value) {
logging_to_logfile = strdup("on");
logging_logfile = strdup(const_value);
crm_trace("Using debugfile setting from the environment: %s", logging_logfile);
} else {
get_config_opt(config, local_handle, KEY_PREFIX "to_logfile", &logging_to_logfile, "off");
get_config_opt(config, local_handle, KEY_PREFIX "logfile", &logging_logfile,
"/var/log/pacemaker");
}
const_value = daemon_option("logfacility");
if (const_value) {
logging_syslog_facility = strdup(const_value);
crm_trace("Using logfacility setting from the environment: %s", logging_syslog_facility);
if (safe_str_eq(logging_syslog_facility, "none")) {
logging_to_syslog = strdup("off");
} else {
logging_to_syslog = strdup("on");
}
} else {
get_config_opt(config, local_handle, KEY_PREFIX "to_syslog", &logging_to_syslog, "on");
get_config_opt(config, local_handle, KEY_PREFIX "syslog_facility", &logging_syslog_facility,
"daemon");
}
#if HAVE_CONFDB
confdb_finalize(config);
#elif HAVE_CMAP
cmap_finalize(local_handle);
#endif
if (daemon_option("debug")) {
crm_trace("Using debug setting from the environment: %s", daemon_option("debug"));
if (get_crm_log_level() < LOG_DEBUG && daemon_option_enabled("pacemakerd", "debug")) {
set_crm_log_level(LOG_DEBUG);
}
} else if (crm_is_true(logging_debug)) {
set_daemon_option("debug", "1");
if (get_crm_log_level() < LOG_DEBUG) {
set_crm_log_level(LOG_DEBUG);
}
} else {
set_daemon_option("debug", "0");
}
if (crm_is_true(logging_to_logfile)) {
if (crm_add_logfile(logging_logfile)) {
/* What a cluster fsck, eventually we need to mandate /one/ */
set_daemon_option("debugfile", logging_logfile);
set_daemon_option("DEBUGLOG", logging_logfile);
have_log = TRUE;
} else {
crm_err("Couldn't create logfile: %s", logging_logfile);
}
}
if (have_log && crm_is_true(logging_to_syslog) == FALSE) {
qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, QB_FALSE);
free(logging_syslog_facility);
logging_syslog_facility = strdup("none");
crm_info("User configured file based logging and explicitly disabled syslog.");
} else if (crm_is_true(logging_to_syslog) == FALSE) {
crm_err("Please enable some sort of logging, either 'to_logfile: on' or 'to_syslog: on'.");
crm_err("If you use file logging, be sure to also define a value for 'logfile'");
}
set_daemon_option("logfacility", logging_syslog_facility);
+ setenv("HA_LOGFACILITY", logging_syslog_facility, 1);
free(logging_debug);
free(logging_logfile);
free(logging_to_logfile);
free(logging_to_syslog);
free(logging_syslog_facility);
return TRUE;
}
diff --git a/mcp/pacemaker.c b/mcp/pacemaker.c
index ea978510de..30a357f8ca 100644
--- a/mcp/pacemaker.c
+++ b/mcp/pacemaker.c
@@ -1,1009 +1,1008 @@
/*
* Copyright (C) 2010 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <pacemaker.h>
#include <pwd.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <crm/msg_xml.h>
#include <crm/common/ipcs.h>
#include <crm/common/mainloop.h>
#include <crm/cluster.h>
#include <dirent.h>
#include <ctype.h>
gboolean fatal_error = FALSE;
GMainLoop *mainloop = NULL;
GHashTable *peers = NULL;
#define PCMK_PROCESS_CHECK_INTERVAL 5
char *local_name = NULL;
uint32_t local_nodeid = 0;
crm_trigger_t *shutdown_trigger = NULL;
const char *pid_file = "/var/run/pacemaker.pid";
/* *INDENT-OFF* */
enum crm_proc_flag {
crm_proc_none = 0x00000001,
crm_proc_plugin = 0x00000002,
crm_proc_lrmd = 0x00000010,
crm_proc_cib = 0x00000100,
crm_proc_crmd = 0x00000200,
crm_proc_attrd = 0x00001000,
crm_proc_stonithd = 0x00002000,
crm_proc_pe = 0x00010000,
crm_proc_te = 0x00020000,
crm_proc_mgmtd = 0x00040000,
crm_proc_stonith_ng = 0x00100000,
};
/* *INDENT-ON* */
typedef struct pcmk_child_s {
int pid;
long flag;
int start_seq;
int respawn_count;
gboolean respawn;
const char *name;
const char *uid;
const char *command;
gboolean active_before_startup;
} pcmk_child_t;
/* Index into the array below */
#define pcmk_child_crmd 4
#define pcmk_child_mgmtd 8
/* *INDENT-OFF* */
static pcmk_child_t pcmk_children[] = {
{ 0, crm_proc_none, 0, 0, FALSE, "none", NULL, NULL },
{ 0, crm_proc_plugin, 0, 0, FALSE, "ais", NULL, NULL },
{ 0, crm_proc_lrmd, 3, 0, TRUE, "lrmd", NULL, CRM_DAEMON_DIR"/lrmd" },
{ 0, crm_proc_cib, 1, 0, TRUE, "cib", CRM_DAEMON_USER, CRM_DAEMON_DIR"/cib" },
{ 0, crm_proc_crmd, 6, 0, TRUE, "crmd", CRM_DAEMON_USER, CRM_DAEMON_DIR"/crmd" },
{ 0, crm_proc_attrd, 4, 0, TRUE, "attrd", CRM_DAEMON_USER, CRM_DAEMON_DIR"/attrd" },
{ 0, crm_proc_stonithd, 0, 0, TRUE, "stonithd", NULL, NULL },
{ 0, crm_proc_pe, 5, 0, TRUE, "pengine", CRM_DAEMON_USER, CRM_DAEMON_DIR"/pengine" },
{ 0, crm_proc_mgmtd, 0, 0, TRUE, "mgmtd", NULL, HB_DAEMON_DIR"/mgmtd" },
{ 0, crm_proc_stonith_ng, 2, 0, TRUE, "stonith-ng", NULL, CRM_DAEMON_DIR"/stonithd" },
};
/* *INDENT-ON* */
static gboolean start_child(pcmk_child_t * child);
static gboolean check_active_before_startup_processes(gpointer user_data);
void
enable_crmd_as_root(gboolean enable)
{
if (enable) {
pcmk_children[pcmk_child_crmd].uid = NULL;
} else {
pcmk_children[pcmk_child_crmd].uid = CRM_DAEMON_USER;
}
}
void
enable_mgmtd(gboolean enable)
{
if (enable) {
pcmk_children[pcmk_child_mgmtd].start_seq = 7;
} else {
pcmk_children[pcmk_child_mgmtd].start_seq = 0;
}
}
static uint32_t
get_process_list(void)
{
int lpc = 0;
uint32_t procs = crm_proc_plugin;
for (lpc = 0; lpc < SIZEOF(pcmk_children); lpc++) {
if (pcmk_children[lpc].pid != 0) {
procs |= pcmk_children[lpc].flag;
}
}
return procs;
}
static void
pcmk_process_exit(pcmk_child_t * child)
{
child->pid = 0;
child->active_before_startup = FALSE;
/* Broadcast the fact that one of our processes died ASAP
*
* Try to get some logging of the cause out first though
* because we're probably about to get fenced
*
* Potentially do this only if respawn_count > N
* to allow for local recovery
*/
update_node_processes(local_nodeid, NULL, get_process_list());
child->respawn_count += 1;
if (child->respawn_count > MAX_RESPAWN) {
crm_err("Child respawn count exceeded by %s", child->name);
child->respawn = FALSE;
}
if (shutdown_trigger) {
mainloop_set_trigger(shutdown_trigger);
update_node_processes(local_nodeid, NULL, get_process_list());
} else if (child->respawn) {
crm_notice("Respawning failed child process: %s", child->name);
start_child(child);
}
}
static void
pcmk_child_exit(GPid pid, gint status, gpointer user_data)
{
int exitcode = 0;
pcmk_child_t *child = user_data;
if (WIFSIGNALED(status)) {
int signo = WTERMSIG(status);
int core = WCOREDUMP(status);
crm_notice("Child process %s terminated with signal %d (pid=%d, core=%d)",
child->name, signo, child->pid, core);
} else if (WIFEXITED(status)) {
exitcode = WEXITSTATUS(status);
do_crm_log(exitcode == 0 ? LOG_INFO : LOG_ERR,
"Child process %s exited (pid=%d, rc=%d)", child->name, child->pid, exitcode);
}
if (exitcode == 100) {
crm_warn("Pacemaker child process %s no longer wishes to be respawned. "
"Shutting ourselves down.", child->name);
child->respawn = FALSE;
fatal_error = TRUE;
pcmk_shutdown(15);
}
pcmk_process_exit(child);
}
static gboolean
stop_child(pcmk_child_t * child, int signal)
{
if (signal == 0) {
signal = SIGTERM;
}
if (child->command == NULL) {
crm_debug("Nothing to do for child \"%s\"", child->name);
return TRUE;
}
if (child->pid <= 0) {
crm_trace("Client %s not running", child->name);
return TRUE;
}
errno = 0;
if (kill(child->pid, signal) == 0) {
crm_notice("Stopping %s: Sent -%d to process %d", child->name, signal, child->pid);
} else {
crm_perror(LOG_ERR, "Stopping %s: Could not send -%d to process %d failed",
child->name, signal, child->pid);
}
return TRUE;
}
static char *opts_default[] = { NULL, NULL };
static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL };
static gboolean
start_child(pcmk_child_t * child)
{
int lpc = 0;
uid_t uid = 0;
struct rlimit oflimits;
gboolean use_valgrind = FALSE;
gboolean use_callgrind = FALSE;
const char *devnull = "/dev/null";
const char *env_valgrind = getenv("PCMK_valgrind_enabled");
const char *env_callgrind = getenv("PCMK_callgrind_enabled");
child->active_before_startup = FALSE;
if (child->command == NULL) {
crm_info("Nothing to do for child \"%s\"", child->name);
return TRUE;
}
if (env_callgrind != NULL && crm_is_true(env_callgrind)) {
use_callgrind = TRUE;
use_valgrind = TRUE;
} else if (env_callgrind != NULL && strstr(env_callgrind, child->name)) {
use_callgrind = TRUE;
use_valgrind = TRUE;
} else if (env_valgrind != NULL && crm_is_true(env_valgrind)) {
use_valgrind = TRUE;
} else if (env_valgrind != NULL && strstr(env_valgrind, child->name)) {
use_valgrind = TRUE;
}
if (use_valgrind && strlen(VALGRIND_BIN) == 0) {
crm_warn("Cannot enable valgrind for %s:"
" The location of the valgrind binary is unknown", child->name);
use_valgrind = FALSE;
}
child->pid = fork();
CRM_ASSERT(child->pid != -1);
if (child->pid > 0) {
/* parent */
g_child_watch_add(child->pid, pcmk_child_exit, child);
crm_info("Forked child %d for process %s%s", child->pid, child->name,
use_valgrind ? " (valgrind enabled: " VALGRIND_BIN ")" : "");
update_node_processes(local_nodeid, NULL, get_process_list());
return TRUE;
} else {
/* Start a new session */
(void)setsid();
/* Setup the two alternate arg arrarys */
opts_vgrind[0] = strdup(VALGRIND_BIN);
if (use_callgrind) {
opts_vgrind[1] = strdup("--tool=callgrind");
opts_vgrind[2] = strdup("--callgrind-out-file=" CRM_STATE_DIR "/callgrind.out.%p");
opts_vgrind[3] = strdup(child->command);
opts_vgrind[4] = NULL;
} else {
opts_vgrind[1] = strdup(child->command);
opts_vgrind[2] = NULL;
opts_vgrind[3] = NULL;
opts_vgrind[4] = NULL;
}
opts_default[0] = strdup(child->command);;
#if 0
/* Dont set the group for now - it prevents connection to the cluster */
if (gid && setgid(gid) < 0) {
crm_perror("Could not set group to %d", gid);
}
#endif
if (child->uid) {
if (crm_user_lookup(child->uid, &uid, NULL) < 0) {
crm_err("Invalid uid (%s) specified for %s", child->uid, child->name);
return TRUE;
}
}
if (uid && setuid(uid) < 0) {
crm_perror(LOG_ERR, "Could not set user to %d (%s)", uid, child->uid);
}
/* Close all open file descriptors */
getrlimit(RLIMIT_NOFILE, &oflimits);
for (lpc = 0; lpc < oflimits.rlim_cur; lpc++) {
close(lpc);
}
(void)open(devnull, O_RDONLY); /* Stdin: fd 0 */
(void)open(devnull, O_WRONLY); /* Stdout: fd 1 */
(void)open(devnull, O_WRONLY); /* Stderr: fd 2 */
if (use_valgrind) {
(void)execvp(VALGRIND_BIN, opts_vgrind);
} else {
(void)execvp(child->command, opts_default);
}
crm_perror(LOG_ERR, "FATAL: Cannot exec %s", child->command);
crm_exit(100);
}
return TRUE; /* never reached */
}
static gboolean
escalate_shutdown(gpointer data)
{
pcmk_child_t *child = data;
if (child->pid) {
/* Use SIGSEGV instead of SIGKILL to create a core so we can see what it was up to */
crm_err("Child %s not terminating in a timely manner, forcing", child->name);
stop_child(child, SIGSEGV);
}
return FALSE;
}
static gboolean
pcmk_shutdown_worker(gpointer user_data)
{
static int phase = 0;
static time_t next_log = 0;
static int max = SIZEOF(pcmk_children);
int lpc = 0;
if (phase == 0) {
crm_notice("Shuting down Pacemaker");
phase = max;
/* Add a second, more frequent, check to speed up shutdown */
g_timeout_add_seconds(5, check_active_before_startup_processes, NULL);
}
for (; phase > 0; phase--) {
/* dont stop anything with start_seq < 1 */
for (lpc = max - 1; lpc >= 0; lpc--) {
pcmk_child_t *child = &(pcmk_children[lpc]);
if (phase != child->start_seq) {
continue;
}
if (child->pid) {
time_t now = time(NULL);
if (child->respawn) {
next_log = now + 30;
child->respawn = FALSE;
stop_child(child, SIGTERM);
if (phase < pcmk_children[pcmk_child_crmd].start_seq) {
g_timeout_add(180000 /* 3m */ , escalate_shutdown, child);
}
} else if (now >= next_log) {
next_log = now + 30;
crm_notice("Still waiting for %s (pid=%d, seq=%d) to terminate...",
child->name, child->pid, child->start_seq);
}
return TRUE;
}
/* cleanup */
crm_debug("%s confirmed stopped", child->name);
child->pid = 0;
}
}
/* send_cluster_id(); */
crm_notice("Shutdown complete");
g_main_loop_quit(mainloop);
if (fatal_error) {
crm_notice("Attempting to inhibit respawning after fatal error");
crm_exit(100);
}
return TRUE;
}
void
pcmk_shutdown(int nsig)
{
if (shutdown_trigger == NULL) {
shutdown_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, pcmk_shutdown_worker, NULL);
}
mainloop_set_trigger(shutdown_trigger);
}
static void
build_path(const char *path_c, mode_t mode)
{
int offset = 1, len = 0;
char *path = strdup(path_c);
CRM_CHECK(path != NULL, return);
for (len = strlen(path); offset < len; offset++) {
if (path[offset] == '/') {
path[offset] = 0;
if (mkdir(path, mode) < 0 && errno != EEXIST) {
crm_perror(LOG_ERR, "Could not create directory '%s'", path);
break;
}
path[offset] = '/';
}
}
if (mkdir(path, mode) < 0 && errno != EEXIST) {
crm_perror(LOG_ERR, "Could not create directory '%s'", path);
}
free(path);
}
static int32_t
pcmk_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
crm_trace("Connection %p", c);
if (crm_client_new(c, uid, gid) == NULL) {
return -EIO;
}
return 0;
}
static void
pcmk_ipc_created(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p", c);
}
/* Exit code means? */
static int32_t
pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
const char *task = NULL;
crm_client_t *c = crm_client_get(qbc);
xmlNode *msg = crm_ipcs_recv(c, data, size, &id, &flags);
if (flags & crm_ipc_client_response) {
crm_ipcs_send_ack(c, id, "ack", __FUNCTION__, __LINE__);
}
if (msg == NULL) {
return 0;
}
task = crm_element_value(msg, F_CRM_TASK);
if (crm_str_eq(task, CRM_OP_QUIT, TRUE)) {
/* Time to quit */
crm_notice("Shutting down in responce to ticket %s (%s)",
crm_element_value(msg, F_CRM_REFERENCE), crm_element_value(msg, F_CRM_ORIGIN));
pcmk_shutdown(15);
} else {
/* Just send to everyone */
update_process_clients();
}
free_xml(msg);
return 0;
}
/* Error code means? */
static int32_t
pcmk_ipc_closed(qb_ipcs_connection_t * c)
{
crm_client_t *client = crm_client_get(c);
crm_trace("Connection %p", c);
crm_client_destroy(client);
return 0;
}
static void
pcmk_ipc_destroy(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p", c);
}
struct qb_ipcs_service_handlers ipc_callbacks = {
.connection_accept = pcmk_ipc_accept,
.connection_created = pcmk_ipc_created,
.msg_process = pcmk_ipc_dispatch,
.connection_closed = pcmk_ipc_closed,
.connection_destroyed = pcmk_ipc_destroy
};
static void
ghash_send_proc_details(gpointer key, gpointer value, gpointer data)
{
crm_ipcs_send(value, 0, data, TRUE);
}
static void
peer_loop_fn(gpointer key, gpointer value, gpointer user_data)
{
pcmk_peer_t *node = value;
xmlNode *update = user_data;
xmlNode *xml = create_xml_node(update, "node");
crm_xml_add_int(xml, "id", node->id);
crm_xml_add(xml, "uname", node->uname);
crm_xml_add_int(xml, "processes", node->processes);
}
void
update_process_clients(void)
{
xmlNode *update = create_xml_node(NULL, "nodes");
crm_trace("Sending process list to %d children", crm_hash_table_size(client_connections));
g_hash_table_foreach(peers, peer_loop_fn, update);
g_hash_table_foreach(client_connections, ghash_send_proc_details, update);
free_xml(update);
}
void
update_process_peers(void)
{
char buffer[1024];
struct iovec iov;
int rc = 0;
memset(buffer, 0, SIZEOF(buffer));
if (local_name) {
rc = snprintf(buffer, SIZEOF(buffer) - 1, "<node uname=\"%s\" proclist=\"%u\"/>",
local_name, get_process_list());
} else {
rc = snprintf(buffer, SIZEOF(buffer) - 1, "<node proclist=\"%u\"/>", get_process_list());
}
iov.iov_base = buffer;
iov.iov_len = rc + 1;
crm_trace("Sending %s", buffer);
send_cpg_message(&iov);
}
gboolean
update_node_processes(uint32_t id, const char *uname, uint32_t procs)
{
gboolean changed = FALSE;
pcmk_peer_t *node = g_hash_table_lookup(peers, GUINT_TO_POINTER(id));
if (node == NULL) {
changed = TRUE;
node = calloc(1, sizeof(pcmk_peer_t));
node->id = id;
g_hash_table_insert(peers, GUINT_TO_POINTER(id), node);
node = g_hash_table_lookup(peers, GUINT_TO_POINTER(id));
CRM_ASSERT(node != NULL);
}
if (uname != NULL) {
if (node->uname == NULL || safe_str_eq(node->uname, uname) == FALSE) {
int lpc, len = strlen(uname);
crm_notice("%p Node %u now known as %s%s%s", node, id, uname,
node->uname ? node->uname : ", was: ", node->uname ? node->uname : "");
free(node->uname);
node->uname = strdup(uname);
changed = TRUE;
for (lpc = 0; lpc < len; lpc++) {
if (uname[lpc] >= 'A' && uname[lpc] <= 'Z') {
crm_warn
("Node names with capitals are discouraged, consider changing '%s' to something else",
uname);
break;
}
}
}
} else {
crm_trace("Empty uname for node %u", id);
}
if (procs != 0) {
if (procs != node->processes) {
crm_debug("Node %s now has process list: %.32x (was %.32x)",
node->uname, procs, node->processes);
node->processes = procs;
changed = TRUE;
} else {
crm_trace("Node %s still has process list: %.32x", node->uname, procs);
}
}
if (changed && id == local_nodeid) {
update_process_clients();
update_process_peers();
}
return changed;
}
/* *INDENT-OFF* */
static struct crm_option long_options[] = {
/* Top-level Options */
{"help", 0, 0, '?', "\tThis text"},
{"version", 0, 0, '$', "\tVersion information" },
{"verbose", 0, 0, 'V', "\tIncrease debug output"},
{"shutdown", 0, 0, 'S', "\tInstruct Pacemaker to shutdown on this machine"},
{"features", 0, 0, 'F', "\tDisplay the full version and list of features Pacemaker was built with"},
{"-spacer-", 1, 0, '-', "\nAdditional Options:"},
{"foreground", 0, 0, 'f', "\tRun in the foreground instead of as a daemon"},
{"pid-file", 1, 0, 'p', "\t(Advanced) Daemon pid file location"},
{NULL, 0, 0, 0}
};
/* *INDENT-ON* */
static void
mcp_chown(const char *path, uid_t uid, gid_t gid)
{
int rc = chown(path, uid, gid);
if (rc < 0) {
crm_warn("Cannot change the ownership of %s to user %s and gid %d: %s",
path, CRM_DAEMON_USER, gid, pcmk_strerror(errno));
}
}
static gboolean
check_active_before_startup_processes(gpointer user_data)
{
int start_seq = 1, lpc = 0;
static int max = SIZEOF(pcmk_children);
gboolean keep_tracking = FALSE;
for (start_seq = 1; start_seq < max; start_seq++) {
for (lpc = 0; lpc < max; lpc++) {
if (pcmk_children[lpc].active_before_startup == FALSE) {
/* we are already tracking it as a child process. */
continue;
} else if (start_seq != pcmk_children[lpc].start_seq) {
continue;
} else if (crm_pid_active(pcmk_children[lpc].pid) != 1) {
crm_notice("Process %s terminated (pid=%d)",
pcmk_children[lpc].name, pcmk_children[lpc].pid);
pcmk_process_exit(&(pcmk_children[lpc]));
continue;
}
/* at least one of the processes found at startup
* is still going, so keep this recurring timer around */
keep_tracking = TRUE;
}
}
return keep_tracking;
}
static void
find_and_track_existing_processes(void)
{
DIR *dp;
struct dirent *entry;
struct stat statbuf;
int start_tracker = 0;
dp = opendir("/proc");
if (!dp) {
/* no proc directory to search through */
crm_notice("Can not read /proc directory to track existing components");
return;
}
while ((entry = readdir(dp)) != NULL) {
char procpath[128];
char value[64];
char key[16];
FILE *file;
int pid;
int max = SIZEOF(pcmk_children);
int i;
strcpy(procpath, "/proc/");
/* strlen("/proc/") + strlen("/status") + 1 = 14
* 128 - 14 = 114 */
strncat(procpath, entry->d_name, 114);
if (lstat(procpath, &statbuf)) {
continue;
}
if (!S_ISDIR(statbuf.st_mode) || !isdigit(entry->d_name[0])) {
continue;
}
strcat(procpath, "/status");
file = fopen(procpath, "r");
if (!file) {
continue;
}
if (fscanf(file, "%15s%63s", key, value) != 2) {
fclose(file);
continue;
}
fclose(file);
pid = atoi(entry->d_name);
if (pid <= 0) {
continue;
}
for (i = 0; i < max; i++) {
const char *name = pcmk_children[i].name;
if (pcmk_children[i].start_seq == 0) {
continue;
}
if (pcmk_children[i].flag == crm_proc_stonith_ng) {
name = "stonithd";
}
if (safe_str_eq(name, value)) {
if (crm_pid_active(pid) != 1) {
continue;
}
crm_notice("Tracking existing %s process (pid=%d)", value, pid);
pcmk_children[i].pid = pid;
pcmk_children[i].active_before_startup = TRUE;
start_tracker = 1;
}
}
}
if (start_tracker) {
g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL, check_active_before_startup_processes,
NULL);
}
closedir(dp);
}
static void
init_children_processes(void)
{
int start_seq = 1, lpc = 0;
static int max = SIZEOF(pcmk_children);
/* start any children that have not been detected */
for (start_seq = 1; start_seq < max; start_seq++) {
/* dont start anything with start_seq < 1 */
for (lpc = 0; lpc < max; lpc++) {
if (pcmk_children[lpc].pid) {
/* we are already tracking it */
continue;
}
if (start_seq == pcmk_children[lpc].start_seq) {
start_child(&(pcmk_children[lpc]));
}
}
}
}
int
main(int argc, char **argv)
{
int rc;
int flag;
int argerr = 0;
int option_index = 0;
gboolean shutdown = FALSE;
uid_t pcmk_uid = 0;
gid_t pcmk_gid = 0;
struct rlimit cores;
crm_ipc_t *old_instance = NULL;
qb_ipcs_service_t *ipcs = NULL;
const char *facility = daemon_option("logfacility");
setenv("LC_ALL", "C", 1);
- setenv("HA_LOGFACILITY", facility, 1);
setenv("HA_LOGD", "no", 1);
set_daemon_option("mcp", "true");
set_daemon_option("use_logd", "off");
crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
crm_set_options(NULL, "mode [options]", long_options, "Start/Stop Pacemaker\n");
/* Restore the original facility so that read_config() does the right thing */
set_daemon_option("logfacility", facility);
while (1) {
flag = crm_get_option(argc, argv, &option_index);
if (flag == -1)
break;
switch (flag) {
case 'V':
crm_bump_log_level(argc, argv);
break;
case 'f':
/* Legacy */
break;
case 'p':
pid_file = optarg;
break;
case '$':
case '?':
crm_help(flag, EX_OK);
break;
case 'S':
shutdown = TRUE;
break;
case 'F':
printf("Pacemaker %s (Build: %s)\n Supporting: %s\n", VERSION, BUILD_VERSION,
CRM_FEATURES);
crm_exit(0);
default:
printf("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag);
++argerr;
break;
}
}
if (optind < argc) {
printf("non-option ARGV-elements: ");
while (optind < argc)
printf("%s ", argv[optind++]);
printf("\n");
}
if (argerr) {
crm_help('?', EX_USAGE);
}
crm_debug("Checking for old instances of %s", CRM_SYSTEM_MCP);
old_instance = crm_ipc_new(CRM_SYSTEM_MCP, 0);
crm_ipc_connect(old_instance);
if (shutdown) {
crm_debug("Terminating previous instance");
while (crm_ipc_connected(old_instance)) {
xmlNode *cmd =
create_request(CRM_OP_QUIT, NULL, NULL, CRM_SYSTEM_MCP, CRM_SYSTEM_MCP, NULL);
crm_debug(".");
crm_ipc_send(old_instance, cmd, 0, 0, NULL);
free_xml(cmd);
sleep(2);
}
crm_ipc_close(old_instance);
crm_ipc_destroy(old_instance);
crm_exit(0);
} else if (crm_ipc_connected(old_instance)) {
crm_ipc_close(old_instance);
crm_ipc_destroy(old_instance);
crm_err("Pacemaker is already active, aborting startup");
crm_exit(100);
}
crm_ipc_close(old_instance);
crm_ipc_destroy(old_instance);
if (read_config() == FALSE) {
crm_notice("Could not obtain corosync config data, exiting");
crm_exit(1);
}
crm_notice("Starting Pacemaker %s (Build: %s): %s", VERSION, BUILD_VERSION, CRM_FEATURES);
mainloop = g_main_new(FALSE);
rc = getrlimit(RLIMIT_CORE, &cores);
if (rc < 0) {
crm_perror(LOG_ERR, "Cannot determine current maximum core size.");
} else {
if (cores.rlim_max == 0 && geteuid() == 0) {
cores.rlim_max = RLIM_INFINITY;
} else {
crm_info("Maximum core file size is: %lu", (unsigned long)cores.rlim_max);
}
cores.rlim_cur = cores.rlim_max;
rc = setrlimit(RLIMIT_CORE, &cores);
if (rc < 0) {
crm_perror(LOG_ERR,
"Core file generation will remain disabled."
" Core files are an important diagnositic tool,"
" please consider enabling them by default.");
}
#if 0
/* system() is not thread-safe, can't call from here
* Actually, its a pretty hacky way to try and achieve this anyway
*/
if (system("echo 1 > /proc/sys/kernel/core_uses_pid") != 0) {
crm_perror(LOG_ERR, "Could not enable /proc/sys/kernel/core_uses_pid");
}
#endif
}
if (crm_user_lookup(CRM_DAEMON_USER, &pcmk_uid, &pcmk_gid) < 0) {
crm_err("Cluster user %s does not exist, aborting Pacemaker startup", CRM_DAEMON_USER);
crm_exit(1);
}
mkdir(CRM_STATE_DIR, 0750);
mcp_chown(CRM_STATE_DIR, pcmk_uid, pcmk_gid);
/* Used by stonithd */
build_path(HA_STATE_DIR "/heartbeat", 0755);
mcp_chown(HA_STATE_DIR "/heartbeat", pcmk_uid, pcmk_gid);
/* Used by RAs - Leave owned by root */
build_path(CRM_RSCTMP_DIR, 0755);
/* Used to store core files in */
build_path(CRM_CORE_DIR, 0755);
mcp_chown(CRM_CORE_DIR, pcmk_uid, pcmk_gid);
/* Used to store blackbox dumps in */
build_path(CRM_BLACKBOX_DIR, 0755);
mcp_chown(CRM_BLACKBOX_DIR, pcmk_uid, pcmk_gid);
/* Used to store policy engine inputs in */
build_path(PE_STATE_DIR, 0755);
mcp_chown(PE_STATE_DIR, pcmk_uid, pcmk_gid);
/* Used to store the cluster configuration */
build_path(CRM_CONFIG_DIR, 0755);
mcp_chown(CRM_CONFIG_DIR, pcmk_uid, pcmk_gid);
peers = g_hash_table_new(g_direct_hash, g_direct_equal);
ipcs = mainloop_add_ipc_server(CRM_SYSTEM_MCP, QB_IPC_NATIVE, &ipc_callbacks);
if (ipcs == NULL) {
crm_err("Couldn't start IPC server");
crm_exit(1);
}
if (cluster_connect_cfg(&local_nodeid) == FALSE) {
crm_err("Couldn't connect to Corosync's CFG service");
crm_exit(1);
}
if (cluster_connect_cpg() == FALSE) {
crm_err("Couldn't connect to Corosync's CPG service");
crm_exit(1);
}
local_name = get_local_node_name();
update_node_processes(local_nodeid, local_name, get_process_list());
mainloop_add_signal(SIGTERM, pcmk_shutdown);
mainloop_add_signal(SIGINT, pcmk_shutdown);
find_and_track_existing_processes();
init_children_processes();
crm_info("Starting mainloop");
g_main_run(mainloop);
if (ipcs) {
crm_trace("Closing IPC server");
mainloop_del_ipc_server(ipcs);
ipcs = NULL;
}
g_main_destroy(mainloop);
cluster_disconnect_cpg();
cluster_disconnect_cfg();
crm_info("Exiting %s", crm_system_name);
crm_exit(0);
}

File Metadata

Mime Type
text/x-diff
Expires
Tue, Jul 8, 6:11 PM (21 h, 45 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1980669
Default Alt Text
(48 KB)

Event Timeline