Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/cib/io.c b/cib/io.c
index 1fd020f4d0..a2b4e862fa 100644
--- a/cib/io.c
+++ b/cib/io.c
@@ -1,762 +1,756 @@
-/*
+/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
- *
+ *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
- *
+ *
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
- *
+ *
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <dirent.h>
#include <sys/param.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <crm/crm.h>
#include <cibio.h>
#include <crm/cib.h>
#include <crm/common/util.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/common/util.h>
#include <crm/cluster.h>
#define CIB_SERIES "cib"
extern const char *cib_root;
static int cib_wrap = 100;
#define CIB_WRITE_PARANOIA 0
const char *local_resource_path[] = {
XML_CIB_TAG_STATUS,
};
const char *resource_path[] = {
XML_CIB_TAG_RESOURCES,
};
const char *node_path[] = {
XML_CIB_TAG_NODES,
};
const char *constraint_path[] = {
XML_CIB_TAG_CONSTRAINTS,
};
crm_trigger_t *cib_writer = NULL;
gboolean initialized = FALSE;
xmlNode *node_search = NULL;
xmlNode *resource_search = NULL;
xmlNode *constraint_search = NULL;
xmlNode *status_search = NULL;
extern int cib_status;
int set_connected_peers(xmlNode * xml_obj);
void GHFunc_count_peers(gpointer key, gpointer value, gpointer user_data);
int write_cib_contents(gpointer p);
extern void cib_cleanup(void);
static gboolean
validate_cib_digest(xmlNode * local_cib, const char *sigfile)
{
char *digest = NULL;
char *expected = NULL;
gboolean passed = FALSE;
FILE *expected_strm = NULL;
int start = 0, length = 0, read_len = 0;
CRM_ASSERT(sigfile != NULL);
expected_strm = fopen(sigfile, "r");
if (expected_strm == NULL && errno == ENOENT) {
crm_warn("No on-disk digest present");
return TRUE;
} else if (expected_strm == NULL) {
crm_perror(LOG_ERR, "Could not open signature file %s for reading", sigfile);
goto bail;
}
if (local_cib != NULL) {
digest = calculate_on_disk_digest(local_cib);
}
start = ftell(expected_strm);
fseek(expected_strm, 0L, SEEK_END);
length = ftell(expected_strm);
fseek(expected_strm, 0L, start);
CRM_ASSERT(length >= 0);
CRM_ASSERT(start == ftell(expected_strm));
if (length > 0) {
crm_trace("Reading %d bytes from file", length);
expected = calloc(1, (length + 1));
read_len = fread(expected, 1, length, expected_strm); /* Coverity: False positive */
CRM_ASSERT(read_len == length);
}
fclose(expected_strm);
bail:
if (expected == NULL) {
crm_err("On-disk digest is empty");
} else if (safe_str_eq(expected, digest)) {
crm_trace("Digest comparision passed: %s", digest);
passed = TRUE;
} else {
crm_err("Digest comparision failed: expected %s (%s), calculated %s",
expected, sigfile, digest);
}
free(digest);
free(expected);
return passed;
}
static int
write_cib_digest(xmlNode * local_cib, const char *digest_file, int fd, char *digest)
{
int rc = 0;
char *local_digest = NULL;
FILE *digest_strm = fdopen(fd, "w");
if (digest_strm == NULL) {
crm_perror(LOG_ERR, "Cannot open signature file %s for writing", digest_file);
return -1;
}
if (digest == NULL) {
local_digest = calculate_on_disk_digest(local_cib);
CRM_ASSERT(digest != NULL);
digest = local_digest;
}
rc = fprintf(digest_strm, "%s", digest);
if (rc < 0) {
crm_perror(LOG_ERR, "Cannot write to signature file %s", digest_file);
}
CRM_ASSERT(digest_strm != NULL);
if (fflush(digest_strm) != 0) {
crm_perror(LOG_ERR, "Couldnt flush the contents of %s", digest_file);
rc = -1;
}
if (fsync(fileno(digest_strm)) < 0) {
crm_perror(LOG_ERR, "Couldnt sync the contents of %s", digest_file);
rc = -1;
}
fclose(digest_strm);
free(local_digest);
return rc;
}
static gboolean
validate_on_disk_cib(const char *filename, xmlNode ** on_disk_cib)
{
int s_res = -1;
struct stat buf;
gboolean passed = TRUE;
xmlNode *root = NULL;
CRM_ASSERT(filename != NULL);
s_res = stat(filename, &buf);
if (s_res == 0) {
char *sigfile = NULL;
size_t fnsize;
crm_trace("Reading cluster configuration from: %s", filename);
root = filename2xml(filename);
fnsize = strlen(filename) + 5;
sigfile = calloc(1, fnsize);
snprintf(sigfile, fnsize, "%s.sig", filename);
if (validate_cib_digest(root, sigfile) == FALSE) {
passed = FALSE;
}
free(sigfile);
}
if (on_disk_cib != NULL) {
*on_disk_cib = root;
} else {
free_xml(root);
}
return passed;
}
static int
cib_rename(const char *old, const char *new)
{
int rc = 0;
int automatic_fd = 0;
char *automatic = NULL;
if (new == NULL) {
umask(S_IWGRP | S_IWOTH | S_IROTH);
automatic = g_strdup_printf("%s/cib.auto.XXXXXX", cib_root);
automatic_fd = mkstemp(automatic);
new = automatic;
crm_err("Archiving corrupt or unusable file %s as %s", old, automatic);
}
rc = rename(old, new);
if (rc < 0) {
crm_perror(LOG_ERR, "Couldn't rename %s as %s - Disabling disk writes and continuing", old,
new);
cib_writes_enabled = FALSE;
}
if (automatic_fd > 0) {
close(automatic_fd);
}
free(automatic);
return rc;
}
/*
* It is the callers responsibility to free the output of this function
*/
static xmlNode *
retrieveCib(const char *filename, const char *sigfile, gboolean archive_invalid)
{
struct stat buf;
xmlNode *root = NULL;
crm_info("Reading cluster configuration from: %s (digest: %s)", filename, sigfile);
if (stat(filename, &buf) != 0) {
crm_warn("Cluster configuration not found: %s", filename);
return NULL;
}
root = filename2xml(filename);
if (root == NULL) {
crm_err("%s exists but does NOT contain valid XML. ", filename);
crm_warn("Continuing but %s will NOT used.", filename);
} else if (validate_cib_digest(root, sigfile) == FALSE) {
crm_err("Checksum of %s failed! Configuration contents ignored!", filename);
crm_err("Usually this is caused by manual changes, "
"please refer to http://clusterlabs.org/wiki/FAQ#cib_changes_detected");
crm_warn("Continuing but %s will NOT used.", filename);
free_xml(root);
root = NULL;
if (archive_invalid) {
/* Archive the original files so the contents are not lost */
cib_rename(filename, NULL);
cib_rename(sigfile, NULL);
}
}
return root;
}
xmlNode *
readCibXmlFile(const char *dir, const char *file, gboolean discard_status)
{
int seq = 0;
char *backup_file = NULL;
char *filename = NULL, *sigfile = NULL;
const char *name = NULL;
const char *value = NULL;
const char *validation = NULL;
const char *use_valgrind = getenv("HA_VALGRIND_ENABLED");
xmlNode *root = NULL;
xmlNode *status = NULL;
if (!crm_is_writable(dir, file, CRM_DAEMON_USER, NULL, FALSE)) {
cib_status = -EACCES;
return NULL;
}
filename = crm_concat(dir, file, '/');
sigfile = crm_concat(filename, "sig", '.');
cib_status = pcmk_ok;
root = retrieveCib(filename, sigfile, TRUE);
if (root == NULL) {
crm_warn("Primary configuration corrupt or unusable, trying backup...");
seq = get_last_sequence(cib_root, CIB_SERIES);
}
while (root == NULL) {
struct stat buf;
free(sigfile);
if (seq == 0) {
seq += cib_wrap; /* unwrap */
}
backup_file = generate_series_filename(cib_root, CIB_SERIES, seq - 1, FALSE);
sigfile = crm_concat(filename, "sig", '.');
if (stat(backup_file, &buf) != 0) {
crm_debug("Backup file %s not found", backup_file);
break;
}
crm_warn("Attempting to load: %s", backup_file);
root = retrieveCib(backup_file, sigfile, FALSE);
seq--;
}
free(backup_file);
if (root == NULL) {
root = createEmptyCib();
crm_xml_add(root, XML_ATTR_GENERATION, "0");
crm_xml_add(root, XML_ATTR_NUMUPDATES, "0");
crm_xml_add(root, XML_ATTR_GENERATION_ADMIN, "0");
crm_xml_add(root, XML_ATTR_VALIDATION, LATEST_SCHEMA_VERSION);
crm_warn("Continuing with an empty configuration.");
}
if (cib_writes_enabled && use_valgrind) {
if (crm_is_true(use_valgrind) || strstr(use_valgrind, "cib")) {
cib_writes_enabled = FALSE;
crm_err("*********************************************************");
crm_err("*** Disabling disk writes to avoid confusing Valgrind ***");
crm_err("*********************************************************");
}
}
status = find_xml_node(root, XML_CIB_TAG_STATUS, FALSE);
if (discard_status && status != NULL) {
/* strip out the status section if there is one */
free_xml(status);
status = NULL;
}
if (status == NULL) {
create_xml_node(root, XML_CIB_TAG_STATUS);
}
/* Do this before DTD validation happens */
/* fill in some defaults */
name = XML_ATTR_GENERATION_ADMIN;
value = crm_element_value(root, name);
if (value == NULL) {
crm_warn("No value for %s was specified in the configuration.", name);
crm_warn("The reccomended course of action is to shutdown,"
" run crm_verify and fix any errors it reports.");
crm_warn("We will default to zero and continue but may get"
" confused about which configuration to use if"
" multiple nodes are powered up at the same time.");
crm_xml_add_int(root, name, 0);
}
name = XML_ATTR_GENERATION;
value = crm_element_value(root, name);
if (value == NULL) {
crm_xml_add_int(root, name, 0);
}
name = XML_ATTR_NUMUPDATES;
value = crm_element_value(root, name);
if (value == NULL) {
crm_xml_add_int(root, name, 0);
}
/* unset these and require the DC/CCM to update as needed */
xml_remove_prop(root, XML_ATTR_DC_UUID);
if (discard_status) {
crm_log_xml_trace(root, "[on-disk]");
}
validation = crm_element_value(root, XML_ATTR_VALIDATION);
if (validate_xml(root, NULL, TRUE) == FALSE) {
crm_err("CIB does not validate with %s", crm_str(validation));
cib_status = -pcmk_err_dtd_validation;
} else if (validation == NULL) {
int version = 0;
update_validation(&root, &version, FALSE, FALSE);
if (version > 0) {
crm_notice("Enabling %s validation on"
" the existing (sane) configuration", get_schema_name(version));
} else {
crm_err("CIB does not validate with any known DTD or schema");
cib_status = -pcmk_err_dtd_validation;
}
}
free(filename);
free(sigfile);
return root;
}
/*
* The caller should never free the return value
*/
xmlNode *
get_the_CIB(void)
{
return the_cib;
}
gboolean
uninitializeCib(void)
{
xmlNode *tmp_cib = the_cib;
if (tmp_cib == NULL) {
crm_debug("The CIB has already been deallocated.");
return FALSE;
}
initialized = FALSE;
the_cib = NULL;
node_search = NULL;
resource_search = NULL;
constraint_search = NULL;
status_search = NULL;
crm_debug("Deallocating the CIB.");
free_xml(tmp_cib);
crm_debug("The CIB has been deallocated.");
return TRUE;
}
/*
* This method will not free the old CIB pointer or the new one.
* We rely on the caller to have saved a pointer to the old CIB
* and to free the old/bad one depending on what is appropriate.
*/
gboolean
initializeCib(xmlNode * new_cib)
{
if (new_cib == NULL) {
return FALSE;
}
the_cib = new_cib;
initialized = TRUE;
return TRUE;
}
static void
sync_directory(const char *name)
{
int fd = 0;
DIR *directory = NULL;
directory = opendir(name);
if (directory == NULL) {
crm_perror(LOG_ERR, "Could not open %s for syncing", name);
return;
}
fd = dirfd(directory);
if (fd < 0) {
crm_perror(LOG_ERR, "Could not obtain file descriptor for %s", name);
} else if (fsync(fd) < 0) {
crm_perror(LOG_ERR, "Could not sync %s", name);
}
if (closedir(directory) < 0) {
crm_perror(LOG_ERR, "Could not close %s after fsync", name);
}
}
/*
* This method will free the old CIB pointer on success and the new one
* on failure.
*/
int
activateCibXml(xmlNode * new_cib, gboolean to_disk, const char *op)
{
xmlNode *saved_cib = the_cib;
CRM_ASSERT(new_cib != saved_cib);
if (initializeCib(new_cib) == FALSE) {
free_xml(new_cib);
crm_err("Ignoring invalid or NULL CIB");
if (saved_cib != NULL) {
crm_warn("Reverting to last known CIB");
if (initializeCib(saved_cib) == FALSE) {
/* oh we are so dead */
crm_crit("Couldn't re-initialize the old CIB!");
exit(1);
}
} else {
crm_crit("Could not write out new CIB and no saved" " version to revert to");
}
return -ENODATA;
}
free_xml(saved_cib);
if (cib_writes_enabled && cib_status == pcmk_ok && to_disk) {
crm_debug("Triggering CIB write for %s op", op);
mainloop_set_trigger(cib_writer);
}
return pcmk_ok;
}
static void
-cib_diskwrite_complete(GPid pid, gint status, gpointer user_data)
+cib_diskwrite_complete(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)
{
- int exitcode = -1;
-
- if (WIFSIGNALED(status)) {
- int signo = WTERMSIG(status);
- int core = WCOREDUMP(status);
-
+ if (signo) {
crm_notice("Disk write process terminated with signal %d (pid=%d, core=%d)", signo, pid,
core);
- } else if (WIFEXITED(status)) {
- exitcode = WEXITSTATUS(status);
+ } else {
do_crm_log(exitcode == 0 ? LOG_TRACE : LOG_ERR, "Disk write process exited (pid=%d, rc=%d)",
pid, exitcode);
}
if (exitcode != 0 && cib_writes_enabled) {
crm_err("Disabling disk writes after write failure");
cib_writes_enabled = FALSE;
}
mainloop_trigger_complete(cib_writer);
}
int
write_cib_contents(gpointer p)
{
int fd = -1;
int exit_rc = EX_OK;
char *digest = NULL;
xmlNode *cib_status_root = NULL;
xmlNode *cib_local = NULL;
xmlNode *cib_tmp = NULL;
int tmp_cib_fd = 0;
int tmp_digest_fd = 0;
char *tmp_cib = NULL;
char *tmp_digest = NULL;
char *digest_file = NULL;
char *primary_file = NULL;
char *backup_file = NULL;
char *backup_digest = NULL;
const char *epoch = NULL;
const char *admin_epoch = NULL;
if (p) {
/* Synchronous write out */
cib_local = copy_xml(p);
} else {
int pid = 0;
int bb_state = qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_STATE_GET, 0);
/* Turn it off before the fork() to avoid:
* - 2 processes writing to the same shared mem
* - the child needing to disable it
* (which would close it from underneath the parent)
* This way, the shared mem files are already closed
*/
qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_FALSE);
pid = fork();
if (pid < 0) {
crm_perror(LOG_ERR, "Disabling disk writes after fork failure");
cib_writes_enabled = FALSE;
return FALSE;
}
if (pid) {
/* Parent */
- g_child_watch_add(pid, cib_diskwrite_complete, NULL);
+ mainloop_child_add(pid, 0, "disk-writer", NULL, cib_diskwrite_complete);
if (bb_state == QB_LOG_STATE_ENABLED) {
/* Re-enable now that it it safe */
qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_TRUE);
}
return -1; /* -1 means 'still work to do' */
}
/* A-synchronous write out after a fork() */
/* Don't log anything unless strictly necessary */
set_crm_log_level(LOG_ERR);
/* In theory we can scribble on "the_cib" here and not affect the parent
* But lets be safe anyway
*/
cib_local = copy_xml(the_cib);
}
epoch = crm_element_value(cib_local, XML_ATTR_GENERATION);
admin_epoch = crm_element_value(cib_local, XML_ATTR_GENERATION_ADMIN);
primary_file = crm_concat(cib_root, "cib.xml", '/');
digest_file = crm_concat(primary_file, "sig", '.');
/* Always write out with num_updates=0 */
crm_xml_add(cib_local, XML_ATTR_NUMUPDATES, "0");
fd = open(primary_file, O_NOFOLLOW);
if (fd >= 0) {
int rc = 0;
int seq = get_last_sequence(cib_root, CIB_SERIES);
/* check the admin didnt modify it underneath us */
if (validate_on_disk_cib(primary_file, NULL) == FALSE) {
crm_err("%s was manually modified while the cluster was active!", primary_file);
exit_rc = 1;
goto cleanup;
}
backup_file = generate_series_filename(cib_root, CIB_SERIES, seq, FALSE);
backup_digest = crm_concat(backup_file, "sig", '.');
unlink(backup_file);
unlink(backup_digest);
rc = link(primary_file, backup_file);
if (rc < 0) {
exit_rc = 4;
crm_perror(LOG_ERR, "Cannot link %s to %s", primary_file, backup_file);
goto cleanup;
}
rc = link(digest_file, backup_digest);
if (rc < 0 && errno != ENOENT) {
exit_rc = 5;
crm_perror(LOG_ERR, "Cannot link %s to %s", digest_file, backup_digest);
goto cleanup;
}
write_last_sequence(cib_root, CIB_SERIES, seq + 1, cib_wrap);
sync_directory(cib_root);
crm_info("Archived previous version as %s", backup_file);
}
/* Given that we discard the status section on startup
* there is no point writing it out in the first place
* since users just get confused by it
*
* So delete the status section before we write it out
*/
crm_debug("Writing CIB to disk");
if (p == NULL) {
cib_status_root = find_xml_node(cib_local, XML_CIB_TAG_STATUS, TRUE);
CRM_LOG_ASSERT(cib_status_root != NULL);
if (cib_status_root != NULL) {
free_xml(cib_status_root);
}
}
tmp_cib = g_strdup_printf("%s/cib.XXXXXX", cib_root);
tmp_digest = g_strdup_printf("%s/cib.XXXXXX", cib_root);
umask(S_IWGRP | S_IWOTH | S_IROTH);
tmp_cib_fd = mkstemp(tmp_cib);
if (write_xml_fd(cib_local, tmp_cib, tmp_cib_fd, FALSE) <= 0) {
crm_err("Changes couldn't be written to %s", tmp_cib);
exit_rc = 2;
goto cleanup;
}
/* Must calculate the digest after writing as write_xml_file() updates the last-written field */
digest = calculate_on_disk_digest(cib_local);
crm_info("Wrote version %s.%s.0 of the CIB to disk (digest: %s)",
admin_epoch ? admin_epoch : "0", epoch ? epoch : "0", digest);
tmp_digest_fd = mkstemp(tmp_digest);
if (write_cib_digest(cib_local, tmp_digest, tmp_digest_fd, digest) <= 0) {
crm_err("Digest couldn't be written to %s", tmp_digest);
exit_rc = 3;
goto cleanup;
}
crm_debug("Wrote digest %s to disk", digest);
cib_tmp = retrieveCib(tmp_cib, tmp_digest, FALSE);
CRM_ASSERT(cib_tmp != NULL);
sync_directory(cib_root);
crm_debug("Activating %s", tmp_cib);
cib_rename(tmp_cib, primary_file);
cib_rename(tmp_digest, digest_file);
sync_directory(cib_root);
cleanup:
free(backup_digest);
free(primary_file);
free(backup_file);
free(digest_file);
free(digest);
free(tmp_digest);
free(tmp_cib);
free_xml(cib_tmp);
free_xml(cib_local);
if (fd >= 0) {
close(fd);
}
if (p == NULL) {
/* exit() could potentially affect the parent by closing things it shouldn't
* Use _exit instead
*/
_exit(exit_rc);
}
return exit_rc;
}
void
GHFunc_count_peers(gpointer key, gpointer value, gpointer user_data)
{
int *active = user_data;
if (safe_str_eq(value, ONLINESTATUS)) {
(*active)++;
} else if (safe_str_eq(value, JOINSTATUS)) {
(*active)++;
}
}
diff --git a/crmd/subsystems.c b/crmd/subsystems.c
index ce12f4206d..c0fe988b75 100644
--- a/crmd/subsystems.c
+++ b/crmd/subsystems.c
@@ -1,190 +1,183 @@
-/*
+/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
- *
+ *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
- *
+ *
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
- *
+ *
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <unistd.h> /* for access */
#include <sys/types.h> /* for calls to open */
#include <sys/stat.h> /* for calls to open */
#include <fcntl.h> /* for calls to open */
#include <pwd.h> /* for getpwuid */
#include <grp.h> /* for initgroups */
#include <errno.h>
#include <sys/wait.h>
#include <sys/time.h> /* for getrlimit */
#include <sys/param.h>
#include <sys/types.h>
#include <sys/resource.h> /* for getrlimit */
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/cib.h>
#include <crmd_fsa.h>
#include <crmd_messages.h>
#include <crmd_callbacks.h>
#include <crmd.h>
#include <crm/common/util.h>
static void
-crmdManagedChildDied(GPid pid, gint status, gpointer user_data)
+crmd_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)
{
- struct crm_subsystem_s *the_subsystem = user_data;
-
- if (WIFSIGNALED(status)) {
- int signo = WTERMSIG(status);
- int core = WCOREDUMP(status);
+ /* struct crm_subsystem_s *the_subsystem = mainloop_child_userdata(p); */
+ const char *name = mainloop_child_name(p);
+ if (signo) {
crm_notice("Child process %s terminated with signal %d (pid=%d, core=%d)",
- the_subsystem->name, signo, the_subsystem->pid, core);
-
- } else if (WIFEXITED(status)) {
- int exitcode = WEXITSTATUS(status);
-
- do_crm_log(exitcode == 0 ? LOG_INFO : LOG_ERR,
- "Child process %s exited (pid=%d, rc=%d)", the_subsystem->name,
- the_subsystem->pid, exitcode);
+ name, signo, pid, core);
} else {
- crm_err("Process %s:[%d] exited?", the_subsystem->name, the_subsystem->pid);
+ do_crm_log(exitcode == 0 ? LOG_INFO : LOG_ERR,
+ "Child process %s exited (pid=%d, rc=%d)", name,
+ pid, exitcode);
}
}
gboolean
stop_subsystem(struct crm_subsystem_s *the_subsystem, gboolean force_quit)
{
int quit_signal = SIGTERM;
crm_trace("Stopping sub-system \"%s\"", the_subsystem->name);
clear_bit(fsa_input_register, the_subsystem->flag_required);
if (the_subsystem->pid <= 0) {
crm_trace("Client %s not running", the_subsystem->name);
return FALSE;
}
if (is_set(fsa_input_register, the_subsystem->flag_connected) == FALSE) {
/* running but not yet connected */
crm_debug("Stopping %s before it had connected", the_subsystem->name);
}
/*
if(force_quit && the_subsystem->sent_kill == FALSE) {
quit_signal = SIGKILL;
} else if(force_quit) {
crm_debug("Already sent -KILL to %s: [%d]",
the_subsystem->name, the_subsystem->pid);
}
*/
errno = 0;
if (kill(the_subsystem->pid, quit_signal) == 0) {
crm_info("Sent -TERM to %s: [%d]", the_subsystem->name, the_subsystem->pid);
the_subsystem->sent_kill = TRUE;
} else {
crm_perror(LOG_ERR, "Sent -TERM to %s: [%d]", the_subsystem->name, the_subsystem->pid);
}
return TRUE;
}
gboolean
start_subsystem(struct crm_subsystem_s * the_subsystem)
{
pid_t pid;
struct stat buf;
int s_res;
unsigned int j;
struct rlimit oflimits;
const char *devnull = "/dev/null";
crm_info("Starting sub-system \"%s\"", the_subsystem->name);
if (the_subsystem->pid > 0) {
crm_warn("Client %s already running as pid %d",
the_subsystem->name, (int)the_subsystem->pid);
/* starting a started X is not an error */
return TRUE;
}
/*
* We want to ensure that the exec will succeed before
* we bother forking.
*/
if (access(the_subsystem->path, F_OK | X_OK) != 0) {
crm_perror(LOG_ERR, "Cannot (access) exec %s", the_subsystem->path);
return FALSE;
}
s_res = stat(the_subsystem->command, &buf);
if (s_res != 0) {
crm_perror(LOG_ERR, "Cannot (stat) exec %s", the_subsystem->command);
return FALSE;
}
/* We need to fork so we can make child procs not real time */
switch (pid = fork()) {
case -1:
crm_err("Cannot fork.");
return FALSE;
default: /* Parent */
- g_child_watch_add(pid, crmdManagedChildDied, the_subsystem);
+ mainloop_child_add(pid, 0, the_subsystem->name, the_subsystem, crmd_child_exit);
crm_trace("Client %s is has pid: %d", the_subsystem->name, pid);
the_subsystem->pid = pid;
return TRUE;
case 0: /* Child */
/* create a new process group to avoid
* being interupted by heartbeat
*/
setpgid(0, 0);
break;
}
crm_debug("Executing \"%s (%s)\" (pid %d)",
the_subsystem->command, the_subsystem->name, (int)getpid());
/* A precautionary measure */
getrlimit(RLIMIT_NOFILE, &oflimits);
for (j = 0; j < oflimits.rlim_cur; ++j) {
close(j);
}
(void)open(devnull, O_RDONLY); /* Stdin: fd 0 */
(void)open(devnull, O_WRONLY); /* Stdout: fd 1 */
(void)open(devnull, O_WRONLY); /* Stderr: fd 2 */
{
char *opts[] = { strdup(the_subsystem->command), NULL };
/* coverity[toctou] The call to stat() is a fail-fast, not a race */
(void)execvp(the_subsystem->command, opts);
}
/* Should not happen */
crm_perror(LOG_ERR, "FATAL: Cannot exec %s", the_subsystem->command);
crmd_exit(100); /* Suppress respawning */
return TRUE; /* never reached */
}
diff --git a/include/crm/common/mainloop.h b/include/crm/common/mainloop.h
index d10310753f..01e8d09ce0 100644
--- a/include/crm/common/mainloop.h
+++ b/include/crm/common/mainloop.h
@@ -1,101 +1,97 @@
/*
* Copyright (C) 2009 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef CRM_COMMON_MAINLOOP__H
# define CRM_COMMON_MAINLOOP__H
/**
* \file
* \brief Wrappers for and extensions to glib mainloop
* \ingroup core
*/
# include <glib.h>
typedef struct trigger_s crm_trigger_t;
typedef struct mainloop_io_s mainloop_io_t;
typedef struct mainloop_child_s mainloop_child_t;
crm_trigger_t *mainloop_add_trigger(int priority, int (*dispatch) (gpointer user_data),
gpointer userdata);
void mainloop_set_trigger(crm_trigger_t * source);
void mainloop_trigger_complete(crm_trigger_t * trig);
gboolean mainloop_destroy_trigger(crm_trigger_t * source);
gboolean crm_signal(int sig, void (*dispatch) (int sig));
gboolean mainloop_add_signal(int sig, void (*dispatch) (int sig));
gboolean mainloop_destroy_signal(int sig);
# include <crm/common/ipc.h>
# include <qb/qbipcs.h>
struct ipc_client_callbacks {
int (*dispatch) (const char *buffer, ssize_t length, gpointer userdata);
void (*destroy) (gpointer);
};
qb_ipcs_service_t *mainloop_add_ipc_server(const char *name, enum qb_ipc_type type,
struct qb_ipcs_service_handlers *callbacks);
void mainloop_del_ipc_server(qb_ipcs_service_t * server);
mainloop_io_t *mainloop_add_ipc_client(const char *name, int priority, size_t max_size,
void *userdata, struct ipc_client_callbacks *callbacks);
void mainloop_del_ipc_client(mainloop_io_t * client);
crm_ipc_t *mainloop_get_ipc_client(mainloop_io_t * client);
struct mainloop_fd_callbacks {
int (*dispatch) (gpointer userdata);
void (*destroy) (gpointer userdata);
};
mainloop_io_t *mainloop_add_fd(const char *name, int priority, int fd, void *userdata,
struct mainloop_fd_callbacks *callbacks);
void mainloop_del_fd(mainloop_io_t * client);
/*
* Create a new tracked process
* To track a process group, use -pid
*/
-void
-
-
-mainloop_add_child(pid_t pid,
- int timeout,
- const char *desc,
- void *userdata,
- void (*callback) (mainloop_child_t * p, int status, int signo, int exitcode));
-
-void *mainloop_get_child_userdata(mainloop_child_t * child);
-int
- mainloop_get_child_timeout(mainloop_child_t * child);
-
-pid_t mainloop_get_child_pid(mainloop_child_t * child);
-void
- mainloop_clear_child_userdata(mainloop_child_t * child);
+void mainloop_child_add(pid_t pid,
+ int timeout,
+ const char *desc,
+ void *userdata,
+ void (*callback) (mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode));
+
+void *mainloop_child_userdata(mainloop_child_t * child);
+int mainloop_child_timeout(mainloop_child_t * child);
+const char *mainloop_child_name(mainloop_child_t * child);
+
+pid_t mainloop_child_pid(mainloop_child_t * child);
+void mainloop_clear_child_userdata(mainloop_child_t * child);
# define G_PRIORITY_MEDIUM (G_PRIORITY_HIGH/2)
#endif
diff --git a/lib/common/mainloop.c b/lib/common/mainloop.c
index c038174fac..53c99e236f 100644
--- a/lib/common/mainloop.c
+++ b/lib/common/mainloop.c
@@ -1,888 +1,925 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#ifndef _GNU_SOURCE
# define _GNU_SOURCE
#endif
#include <stdlib.h>
#include <signal.h>
#include <errno.h>
#include <sys/wait.h>
#include <crm/crm.h>
#include <crm/common/xml.h>
#include <crm/common/mainloop.h>
#include <crm/common/ipcs.h>
struct mainloop_child_s {
pid_t pid;
char *desc;
unsigned timerid;
unsigned watchid;
gboolean timeout;
void *privatedata;
/* Called when a process dies */
- void (*callback) (mainloop_child_t * p, int status, int signo, int exitcode);
+ void (*callback) (mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode);
};
struct trigger_s {
GSource source;
gboolean running;
gboolean trigger;
void *user_data;
guint id;
};
static gboolean
crm_trigger_prepare(GSource * source, gint * timeout)
{
crm_trigger_t *trig = (crm_trigger_t *) source;
/* cluster-glue's FD and IPC related sources make use of
* g_source_add_poll() but do not set a timeout in their prepare
* functions
*
* This means mainloop's poll() will block until an event for one
* of these sources occurs - any /other/ type of source, such as
* this one or g_idle_*, that doesn't use g_source_add_poll() is
* S-O-L and wont be processed until there is something fd-based
* happens.
*
* Luckily the timeout we can set here affects all sources and
* puts an upper limit on how long poll() can take.
*
* So unconditionally set a small-ish timeout, not too small that
* we're in constant motion, which will act as an upper bound on
* how long the signal handling might be delayed for.
*/
*timeout = 500; /* Timeout in ms */
return trig->trigger;
}
static gboolean
crm_trigger_check(GSource * source)
{
crm_trigger_t *trig = (crm_trigger_t *) source;
return trig->trigger;
}
static gboolean
crm_trigger_dispatch(GSource * source, GSourceFunc callback, gpointer userdata)
{
int rc = TRUE;
crm_trigger_t *trig = (crm_trigger_t *) source;
if (trig->running) {
/* Wait until the existing job is complete before starting the next one */
return TRUE;
}
trig->trigger = FALSE;
if (callback) {
rc = callback(trig->user_data);
if (rc < 0) {
crm_trace("Trigger handler %p not yet complete", trig);
trig->running = TRUE;
rc = TRUE;
}
}
return rc;
}
static GSourceFuncs crm_trigger_funcs = {
crm_trigger_prepare,
crm_trigger_check,
crm_trigger_dispatch,
NULL
};
static crm_trigger_t *
mainloop_setup_trigger(GSource * source, int priority, int (*dispatch) (gpointer user_data),
gpointer userdata)
{
crm_trigger_t *trigger = NULL;
trigger = (crm_trigger_t *) source;
trigger->id = 0;
trigger->trigger = FALSE;
trigger->user_data = userdata;
if (dispatch) {
g_source_set_callback(source, dispatch, trigger, NULL);
}
g_source_set_priority(source, priority);
g_source_set_can_recurse(source, FALSE);
trigger->id = g_source_attach(source, NULL);
return trigger;
}
void
mainloop_trigger_complete(crm_trigger_t * trig)
{
crm_trace("Trigger handler %p complete", trig);
trig->running = FALSE;
}
/* If dispatch returns:
* -1: Job running but not complete
* 0: Remove the trigger from mainloop
* 1: Leave the trigger in mainloop
*/
crm_trigger_t *
mainloop_add_trigger(int priority, int (*dispatch) (gpointer user_data), gpointer userdata)
{
GSource *source = NULL;
CRM_ASSERT(sizeof(crm_trigger_t) > sizeof(GSource));
source = g_source_new(&crm_trigger_funcs, sizeof(crm_trigger_t));
CRM_ASSERT(source != NULL);
return mainloop_setup_trigger(source, priority, dispatch, userdata);
}
void
mainloop_set_trigger(crm_trigger_t * source)
{
source->trigger = TRUE;
}
gboolean
mainloop_destroy_trigger(crm_trigger_t * source)
{
source->trigger = FALSE;
if (source->id > 0) {
g_source_remove(source->id);
source->id = 0;
}
return TRUE;
}
typedef struct signal_s {
crm_trigger_t trigger; /* must be first */
void (*handler) (int sig);
int signal;
} crm_signal_t;
static crm_signal_t *crm_signals[NSIG];
static gboolean
crm_signal_dispatch(GSource * source, GSourceFunc callback, gpointer userdata)
{
crm_signal_t *sig = (crm_signal_t *) source;
- crm_info("Invoking handler for signal %d: %s", sig->signal, strsignal(sig->signal));
+ if(sig->signal != SIGCHLD) {
+ crm_info("Invoking handler for signal %d: %s", sig->signal, strsignal(sig->signal));
+ }
sig->trigger.trigger = FALSE;
if (sig->handler) {
sig->handler(sig->signal);
}
return TRUE;
}
static void
mainloop_signal_handler(int sig)
{
if (sig > 0 && sig < NSIG && crm_signals[sig] != NULL) {
mainloop_set_trigger((crm_trigger_t *) crm_signals[sig]);
}
}
static GSourceFuncs crm_signal_funcs = {
crm_trigger_prepare,
crm_trigger_check,
crm_signal_dispatch,
NULL
};
gboolean
crm_signal(int sig, void (*dispatch) (int sig))
{
sigset_t mask;
struct sigaction sa;
struct sigaction old;
if (sigemptyset(&mask) < 0) {
crm_perror(LOG_ERR, "Call to sigemptyset failed");
return FALSE;
}
memset(&sa, 0, sizeof(struct sigaction));
sa.sa_handler = dispatch;
sa.sa_flags = SA_RESTART;
sa.sa_mask = mask;
if (sigaction(sig, &sa, &old) < 0) {
crm_perror(LOG_ERR, "Could not install signal handler for signal %d", sig);
return FALSE;
}
return TRUE;
}
gboolean
mainloop_add_signal(int sig, void (*dispatch) (int sig))
{
GSource *source = NULL;
int priority = G_PRIORITY_HIGH - 1;
if (sig == SIGTERM) {
/* TERM is higher priority than other signals,
* signals are higher priority than other ipc.
* Yes, minus: smaller is "higher"
*/
priority--;
}
if (sig >= NSIG || sig < 0) {
crm_err("Signal %d is out of range", sig);
return FALSE;
} else if (crm_signals[sig] != NULL && crm_signals[sig]->handler == dispatch) {
crm_trace("Signal handler for %d is already installed", sig);
return TRUE;
} else if (crm_signals[sig] != NULL) {
crm_err("Different signal handler for %d is already installed", sig);
return FALSE;
}
CRM_ASSERT(sizeof(crm_signal_t) > sizeof(GSource));
source = g_source_new(&crm_signal_funcs, sizeof(crm_signal_t));
crm_signals[sig] = (crm_signal_t *) mainloop_setup_trigger(source, priority, NULL, NULL);
CRM_ASSERT(crm_signals[sig] != NULL);
crm_signals[sig]->handler = dispatch;
crm_signals[sig]->signal = sig;
if (crm_signal(sig, mainloop_signal_handler) == FALSE) {
crm_signal_t *tmp = crm_signals[sig];
crm_signals[sig] = NULL;
mainloop_destroy_trigger((crm_trigger_t *) tmp);
return FALSE;
}
#if 0
/* If we want signals to interrupt mainloop's poll(), instead of waiting for
* the timeout, then we should call siginterrupt() below
*
* For now, just enforce a low timeout
*/
if (siginterrupt(sig, 1) < 0) {
crm_perror(LOG_INFO, "Could not enable system call interruptions for signal %d", sig);
}
#endif
return TRUE;
}
gboolean
mainloop_destroy_signal(int sig)
{
crm_signal_t *tmp = NULL;
if (sig >= NSIG || sig < 0) {
crm_err("Signal %d is out of range", sig);
return FALSE;
} else if (crm_signal(sig, NULL) == FALSE) {
crm_perror(LOG_ERR, "Could not uninstall signal handler for signal %d", sig);
return FALSE;
} else if (crm_signals[sig] == NULL) {
return TRUE;
}
tmp = crm_signals[sig];
crm_signals[sig] = NULL;
mainloop_destroy_trigger((crm_trigger_t *) tmp);
return TRUE;
}
static qb_array_t *gio_map = NULL;
/*
* libqb...
*/
struct gio_to_qb_poll {
int32_t is_used;
GIOChannel *channel;
guint source;
int32_t events;
void *data;
qb_ipcs_dispatch_fn_t fn;
enum qb_loop_priority p;
};
static int
gio_adapter_refcount(struct gio_to_qb_poll *adaptor)
{
/* This is evil
* Looking at the giochannel header file, ref_count is the first member of channel
* So cheat...
*/
if (adaptor && adaptor->channel) {
int *ref = (void *)adaptor->channel;
return *ref;
}
return 0;
}
static gboolean
gio_read_socket(GIOChannel * gio, GIOCondition condition, gpointer data)
{
struct gio_to_qb_poll *adaptor = (struct gio_to_qb_poll *)data;
gint fd = g_io_channel_unix_get_fd(gio);
crm_trace("%p.%d %d (ref=%d)", data, fd, condition, gio_adapter_refcount(adaptor));
if (condition & G_IO_NVAL) {
crm_trace("Marking failed adaptor %p unused", adaptor);
adaptor->is_used = QB_FALSE;
}
return (adaptor->fn(fd, condition, adaptor->data) == 0);
}
static void
gio_poll_destroy(gpointer data)
{
/* adaptor->source is valid but about to be destroyed (ref_count == 0) in gmain.c
* adaptor->channel will still have ref_count > 0... should be == 1
*/
struct gio_to_qb_poll *adaptor = (struct gio_to_qb_poll *)data;
crm_trace("Destroying adaptor %p channel %p (ref=%d)", adaptor, adaptor->channel,
gio_adapter_refcount(adaptor));
adaptor->is_used = QB_FALSE;
adaptor->channel = NULL;
adaptor->source = 0;
}
static int32_t
gio_poll_dispatch_add(enum qb_loop_priority p, int32_t fd, int32_t evts,
void *data, qb_ipcs_dispatch_fn_t fn)
{
struct gio_to_qb_poll *adaptor;
GIOChannel *channel;
int32_t res = 0;
res = qb_array_index(gio_map, fd, (void **)&adaptor);
if (res < 0) {
crm_err("Array lookup failed for fd=%d: %d", fd, res);
return res;
}
crm_trace("Adding fd=%d to mainloop as adapater %p", fd, adaptor);
if (adaptor->is_used) {
crm_err("Adapter for descriptor %d is still in-use", fd);
return -EEXIST;
}
/* channel is created with ref_count = 1 */
channel = g_io_channel_unix_new(fd);
if (!channel) {
crm_err("No memory left to add fd=%d", fd);
return -ENOMEM;
}
/* Because unlike the poll() API, glib doesn't tell us about HUPs by default */
evts |= (G_IO_HUP | G_IO_NVAL | G_IO_ERR);
adaptor->channel = channel;
adaptor->fn = fn;
adaptor->events = evts;
adaptor->data = data;
adaptor->p = p;
adaptor->is_used = QB_TRUE;
adaptor->source =
g_io_add_watch_full(channel, G_PRIORITY_DEFAULT, evts, gio_read_socket, adaptor,
gio_poll_destroy);
/* Now that mainloop now holds a reference to adaptor->channel,
* thanks to g_io_add_watch_full(), drop ours from g_io_channel_unix_new().
*
* This means that adaptor->channel will be free'd by:
* g_main_context_dispatch()
* -> g_source_destroy_internal()
* -> g_source_callback_unref()
* shortly after gio_poll_destroy() completes
*/
g_io_channel_unref(adaptor->channel);
crm_trace("Added to mainloop with gsource id=%d, ref=%d", adaptor->source,
gio_adapter_refcount(adaptor));
if (adaptor->source > 0) {
return 0;
}
return -EINVAL;
}
static int32_t
gio_poll_dispatch_mod(enum qb_loop_priority p, int32_t fd, int32_t evts,
void *data, qb_ipcs_dispatch_fn_t fn)
{
return 0;
}
static int32_t
gio_poll_dispatch_del(int32_t fd)
{
struct gio_to_qb_poll *adaptor;
crm_trace("Looking for fd=%d", fd);
if (qb_array_index(gio_map, fd, (void **)&adaptor) == 0) {
crm_trace("Marking adaptor %p unused (ref=%d)", adaptor, gio_adapter_refcount(adaptor));
adaptor->is_used = QB_FALSE;
}
return 0;
}
struct qb_ipcs_poll_handlers gio_poll_funcs = {
.job_add = NULL,
.dispatch_add = gio_poll_dispatch_add,
.dispatch_mod = gio_poll_dispatch_mod,
.dispatch_del = gio_poll_dispatch_del,
};
static enum qb_ipc_type
pick_ipc_type(enum qb_ipc_type requested)
{
const char *env = getenv("PCMK_ipc_type");
if (env && strcmp("shared-mem", env) == 0) {
return QB_IPC_SHM;
} else if (env && strcmp("socket", env) == 0) {
return QB_IPC_SOCKET;
} else if (env && strcmp("posix", env) == 0) {
return QB_IPC_POSIX_MQ;
} else if (env && strcmp("sysv", env) == 0) {
return QB_IPC_SYSV_MQ;
} else if (requested == QB_IPC_NATIVE) {
/* We prefer shared memory because the server never blocks on
* send. If part of a message fits into the socket, libqb
* needs to block until the remainder can be sent also.
* Otherwise the client will wait forever for the remaining
* bytes.
*/
return QB_IPC_SHM;
}
return requested;
}
qb_ipcs_service_t *
mainloop_add_ipc_server(const char *name, enum qb_ipc_type type,
struct qb_ipcs_service_handlers * callbacks)
{
int rc = 0;
qb_ipcs_service_t *server = NULL;
if (gio_map == NULL) {
gio_map = qb_array_create_2(64, sizeof(struct gio_to_qb_poll), 1);
}
crm_client_init();
server = qb_ipcs_create(name, 0, pick_ipc_type(type), callbacks);
qb_ipcs_poll_handlers_set(server, &gio_poll_funcs);
rc = qb_ipcs_run(server);
if (rc < 0) {
crm_err("Could not start %s IPC server: %s (%d)", name, pcmk_strerror(rc), rc);
return NULL;
}
return server;
}
void
mainloop_del_ipc_server(qb_ipcs_service_t * server)
{
if (server) {
qb_ipcs_destroy(server);
}
}
struct mainloop_io_s {
char *name;
void *userdata;
guint source;
crm_ipc_t *ipc;
GIOChannel *channel;
int (*dispatch_fn_ipc) (const char *buffer, ssize_t length, gpointer userdata);
int (*dispatch_fn_io) (gpointer userdata);
void (*destroy_fn) (gpointer userdata);
};
static int
mainloop_gio_refcount(mainloop_io_t * client)
{
/* This is evil
* Looking at the giochannel header file, ref_count is the first member of channel
* So cheat...
*/
if (client && client->channel) {
int *ref = (void *)client->channel;
return *ref;
}
return 0;
}
static gboolean
mainloop_gio_callback(GIOChannel * gio, GIOCondition condition, gpointer data)
{
gboolean keep = TRUE;
mainloop_io_t *client = data;
if (condition & G_IO_IN) {
if (client->ipc) {
long rc = 0;
int max = 10;
do {
rc = crm_ipc_read(client->ipc);
if (rc <= 0) {
crm_trace("Message acquisition from %s[%p] failed: %s (%ld)",
client->name, client, pcmk_strerror(rc), rc);
} else if (client->dispatch_fn_ipc) {
const char *buffer = crm_ipc_buffer(client->ipc);
crm_trace("New message from %s[%p] = %d", client->name, client, rc, condition);
if (client->dispatch_fn_ipc(buffer, rc, client->userdata) < 0) {
crm_trace("Connection to %s no longer required", client->name);
keep = FALSE;
}
}
} while (keep && rc > 0 && --max > 0);
} else {
crm_trace("New message from %s[%p]", client->name, client);
if (client->dispatch_fn_io) {
if (client->dispatch_fn_io(client->userdata) < 0) {
crm_trace("Connection to %s no longer required", client->name);
keep = FALSE;
}
}
}
}
if (client->ipc && crm_ipc_connected(client->ipc) == FALSE) {
crm_err("Connection to %s[%p] closed (I/O condition=%d)", client->name, client, condition);
keep = FALSE;
} else if (condition & (G_IO_HUP | G_IO_NVAL | G_IO_ERR)) {
crm_trace("The connection %s[%p] has been closed (I/O condition=%d, refcount=%d)",
client->name, client, condition, mainloop_gio_refcount(client));
keep = FALSE;
} else if ((condition & G_IO_IN) == 0) {
/*
#define GLIB_SYSDEF_POLLIN =1
#define GLIB_SYSDEF_POLLPRI =2
#define GLIB_SYSDEF_POLLOUT =4
#define GLIB_SYSDEF_POLLERR =8
#define GLIB_SYSDEF_POLLHUP =16
#define GLIB_SYSDEF_POLLNVAL =32
typedef enum
{
G_IO_IN GLIB_SYSDEF_POLLIN,
G_IO_OUT GLIB_SYSDEF_POLLOUT,
G_IO_PRI GLIB_SYSDEF_POLLPRI,
G_IO_ERR GLIB_SYSDEF_POLLERR,
G_IO_HUP GLIB_SYSDEF_POLLHUP,
G_IO_NVAL GLIB_SYSDEF_POLLNVAL
} GIOCondition;
A bitwise combination representing a condition to watch for on an event source.
G_IO_IN There is data to read.
G_IO_OUT Data can be written (without blocking).
G_IO_PRI There is urgent data to read.
G_IO_ERR Error condition.
G_IO_HUP Hung up (the connection has been broken, usually for pipes and sockets).
G_IO_NVAL Invalid request. The file descriptor is not open.
*/
crm_err("Strange condition: %d", condition);
}
/* keep == FALSE results in mainloop_gio_destroy() being called
* just before the source is removed from mainloop
*/
return keep;
}
static void
mainloop_gio_destroy(gpointer c)
{
mainloop_io_t *client = c;
/* client->source is valid but about to be destroyed (ref_count == 0) in gmain.c
* client->channel will still have ref_count > 0... should be == 1
*/
crm_trace("Destroying client %s[%p] %d", client->name, c, mainloop_gio_refcount(client));
if (client->ipc) {
crm_ipc_close(client->ipc);
}
if (client->destroy_fn) {
client->destroy_fn(client->userdata);
}
if (client->ipc) {
crm_ipc_destroy(client->ipc);
}
crm_trace("Destroyed client %s[%p] %d", client->name, c, mainloop_gio_refcount(client));
free(client->name);
memset(client, 0, sizeof(mainloop_io_t)); /* A bit of pointless paranoia */
free(client);
}
mainloop_io_t *
mainloop_add_ipc_client(const char *name, int priority, size_t max_size, void *userdata,
struct ipc_client_callbacks *callbacks)
{
mainloop_io_t *client = NULL;
crm_ipc_t *conn = crm_ipc_new(name, max_size);
if (conn && crm_ipc_connect(conn)) {
int32_t fd = crm_ipc_get_fd(conn);
client = mainloop_add_fd(name, priority, fd, userdata, NULL);
client->ipc = conn;
client->destroy_fn = callbacks->destroy;
client->dispatch_fn_ipc = callbacks->dispatch;
}
if (conn && client == NULL) {
crm_trace("Connection to %s failed", name);
crm_ipc_close(conn);
crm_ipc_destroy(conn);
}
return client;
}
void
mainloop_del_ipc_client(mainloop_io_t * client)
{
mainloop_del_fd(client);
}
crm_ipc_t *
mainloop_get_ipc_client(mainloop_io_t * client)
{
if (client) {
return client->ipc;
}
return NULL;
}
mainloop_io_t *
mainloop_add_fd(const char *name, int priority, int fd, void *userdata,
struct mainloop_fd_callbacks * callbacks)
{
mainloop_io_t *client = NULL;
if (fd > 0) {
client = calloc(1, sizeof(mainloop_io_t));
client->name = strdup(name);
client->userdata = userdata;
if (callbacks) {
client->destroy_fn = callbacks->destroy;
client->dispatch_fn_io = callbacks->dispatch;
}
client->channel = g_io_channel_unix_new(fd);
client->source =
g_io_add_watch_full(client->channel, priority,
(G_IO_IN | G_IO_HUP | G_IO_NVAL | G_IO_ERR), mainloop_gio_callback,
client, mainloop_gio_destroy);
/* Now that mainloop now holds a reference to adaptor->channel,
* thanks to g_io_add_watch_full(), drop ours from g_io_channel_unix_new().
*
* This means that adaptor->channel will be free'd by:
* g_main_context_dispatch() or g_source_remove()
* -> g_source_destroy_internal()
* -> g_source_callback_unref()
* shortly after mainloop_gio_destroy() completes
*/
g_io_channel_unref(client->channel);
crm_trace("Added connection %d for %s[%p].%d %d", client->source, client->name, client, fd,
mainloop_gio_refcount(client));
}
return client;
}
void
mainloop_del_fd(mainloop_io_t * client)
{
if (client != NULL) {
crm_trace("Removing client %s[%p] %d", client->name, client, mainloop_gio_refcount(client));
if (client->source) {
/* Results in mainloop_gio_destroy() being called just
* before the source is removed from mainloop
*/
g_source_remove(client->source);
}
}
}
pid_t
-mainloop_get_child_pid(mainloop_child_t * child)
+mainloop_child_pid(mainloop_child_t * child)
{
return child->pid;
}
+const char *
+mainloop_child_name(mainloop_child_t * child)
+{
+ return child->desc;
+}
+
int
-mainloop_get_child_timeout(mainloop_child_t * child)
+mainloop_child_timeout(mainloop_child_t * child)
{
return child->timeout;
}
void *
-mainloop_get_child_userdata(mainloop_child_t * child)
+mainloop_child_userdata(mainloop_child_t * child)
{
return child->privatedata;
}
void
mainloop_clear_child_userdata(mainloop_child_t * child)
{
child->privatedata = NULL;
}
static gboolean
child_timeout_callback(gpointer p)
{
mainloop_child_t *child = p;
child->timerid = 0;
if (child->timeout) {
crm_crit("%s process (PID %d) will not die!", child->desc, (int)child->pid);
return FALSE;
}
child->timeout = TRUE;
crm_warn("%s process (PID %d) timed out", child->desc, (int)child->pid);
if (kill(child->pid, SIGKILL) < 0) {
if (errno == ESRCH) {
/* Nothing left to do */
return FALSE;
}
crm_perror(LOG_ERR, "kill(%d, KILL) failed", child->pid);
}
child->timerid = g_timeout_add(5000, child_timeout_callback, child);
return FALSE;
}
+static GListPtr child_list = NULL;
+
static void
-mainloop_child_destroy(mainloop_child_t * child)
+child_death_dispatch(int signal)
{
- if (child->timerid != 0) {
- crm_trace("Removing timer %d", child->timerid);
- g_source_remove(child->timerid);
- child->timerid = 0;
- }
+ GListPtr iter = child_list;
- free(child->desc);
- g_free(child);
-}
+ while(iter) {
+ int rc = 0;
+ int core = 0;
+ int signo = 0;
+ int status = 0;
+ int exitcode = 0;
-static void
-child_death_dispatch(GPid pid, gint status, gpointer user_data)
-{
- int signo = 0;
- int exitcode = 0;
- mainloop_child_t *child = user_data;
+ GListPtr saved = NULL;
+ mainloop_child_t *child = iter->data;
- crm_trace("Managed process %d exited: %p", pid, child);
+ rc = waitpid(child->pid, &status, WNOHANG);
+ if(rc == 0) {
+ iter = iter->next;
+ continue;
- if (WIFEXITED(status)) {
- exitcode = WEXITSTATUS(status);
- crm_trace("Managed process %d (%s) exited with rc=%d", pid, child->desc, exitcode);
+ } else if(rc != child->pid) {
+ signo = signal;
+ exitcode = 1;
+ status = 1;
+ crm_perror(LOG_ERR, "Call to waitpid(%d) failed", child->pid);
- } else if (WIFSIGNALED(status)) {
- signo = WTERMSIG(status);
- crm_trace("Managed process %d (%s) exited with signal=%d", pid, child->desc, signo);
- }
+ } else {
+ crm_trace("Managed process %d exited: %p", child->pid, child);
+
+ if (WIFEXITED(status)) {
+ exitcode = WEXITSTATUS(status);
+ crm_trace("Managed process %d (%s) exited with rc=%d", child->pid, child->desc, exitcode);
+
+ } else if (WIFSIGNALED(status)) {
+ signo = WTERMSIG(status);
+ crm_trace("Managed process %d (%s) exited with signal=%d", child->pid, child->desc, signo);
+ }
#ifdef WCOREDUMP
- if (WCOREDUMP(status)) {
- crm_err("Managed process %d (%s) dumped core", pid, child->desc);
- }
+ if (WCOREDUMP(status)) {
+ core = 1;
+ crm_err("Managed process %d (%s) dumped core", child->pid, child->desc);
+ }
#endif
+ }
- if (child->callback) {
- child->callback(child, status, signo, exitcode);
- }
- crm_trace("Removed process entry for %d", pid);
+ if (child->callback) {
+ child->callback(child, child->pid, core, signo, exitcode);
+ }
+
+ crm_trace("Removing process entry %p for %d", child, child->pid);
- mainloop_child_destroy(child);
- return;
+ saved = iter;
+ iter = iter->next;
+
+ child_list = g_list_remove_link(child_list, saved);
+ g_list_free(saved);
+
+ if (child->timerid != 0) {
+ crm_trace("Removing timer %d", child->timerid);
+ g_source_remove(child->timerid);
+ child->timerid = 0;
+ }
+ free(child->desc);
+ free(child);
+ }
}
/* Create/Log a new tracked process
* To track a process group, use -pid
*/
void
-mainloop_add_child(pid_t pid, int timeout, const char *desc, void *privatedata,
- void (*callback) (mainloop_child_t * p, int status, int signo, int exitcode))
+mainloop_child_add(pid_t pid, int timeout, const char *desc, void *privatedata,
+ void (*callback) (mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode))
{
mainloop_child_t *child = g_new(mainloop_child_t, 1);
child->pid = pid;
child->timerid = 0;
child->timeout = FALSE;
- child->desc = strdup(desc);
child->privatedata = privatedata;
child->callback = callback;
+ if(desc) {
+ child->desc = strdup(desc);
+ }
+
if (timeout) {
child->timerid = g_timeout_add(timeout, child_timeout_callback, child);
}
- child->watchid = g_child_watch_add(pid, child_death_dispatch, child);
+ /* Do NOT use g_child_watch_add() and friends, they rely on pthreads */
+ mainloop_add_signal(SIGCHLD, child_death_dispatch);
+ child_list = g_list_append(child_list, child);
}
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
index 864acc5857..ae4f69f078 100644
--- a/lib/fencing/st_client.c
+++ b/lib/fencing/st_client.c
@@ -1,2449 +1,2447 @@
/*
* Copyright (c) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include <crm_internal.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdarg.h>
#include <string.h>
#include <ctype.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <glib.h>
#include <dirent.h>
#include <libgen.h> /* Add it for compiling on OSX */
#include <crm/crm.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#ifdef HAVE_STONITH_STONITH_H
# include <stonith/stonith.h>
# define LHA_STONITH_LIBRARY "libstonith.so.1"
static void *lha_agents_lib = NULL;
#endif
#include <crm/common/mainloop.h>
CRM_TRACE_INIT_DATA(stonith);
struct stonith_action_s {
/*! user defined data */
char *agent;
char *action;
char *victim;
char *args;
int timeout;
int async;
void *userdata;
void (*done_cb) (GPid pid, gint status, const char *output, gpointer user_data);
/*! internal async track data */
int fd_stdout;
int last_timeout_signo;
/*! internal timing information */
time_t initial_start_time;
int tries;
int remaining_timeout;
guint timer_sigterm;
guint timer_sigkill;
int max_retries;
/* device output data */
GPid pid;
int rc;
char *output;
};
typedef struct stonith_private_s {
char *token;
crm_ipc_t *ipc;
mainloop_io_t *source;
GHashTable *stonith_op_callback_table;
GList *notify_list;
void (*op_callback) (stonith_t * st, stonith_callback_data_t * data);
} stonith_private_t;
typedef struct stonith_notify_client_s {
const char *event;
const char *obj_id; /* implement one day */
const char *obj_type; /* implement one day */
void (*notify) (stonith_t * st, stonith_event_t * e);
} stonith_notify_client_t;
typedef struct stonith_callback_client_s {
void (*callback) (stonith_t * st, stonith_callback_data_t * data);
const char *id;
void *user_data;
gboolean only_success;
gboolean allow_timeout_updates;
struct timer_rec_s *timer;
} stonith_callback_client_t;
struct notify_blob_s {
stonith_t *stonith;
xmlNode *xml;
};
struct timer_rec_s {
int call_id;
int timeout;
guint ref;
stonith_t *stonith;
};
typedef int (*stonith_op_t) (const char *, int, const char *, xmlNode *,
xmlNode *, xmlNode *, xmlNode **, xmlNode **);
static const char META_TEMPLATE[] =
"<?xml version=\"1.0\"?>\n"
"<!DOCTYPE resource-agent SYSTEM \"ra-api-1.dtd\">\n"
"<resource-agent name=\"%s\">\n"
" <version>1.0</version>\n"
" <longdesc lang=\"en\">\n"
"%s\n"
" </longdesc>\n"
" <shortdesc lang=\"en\">%s</shortdesc>\n"
"%s\n"
" <actions>\n"
" <action name=\"start\" timeout=\"20\" />\n"
" <action name=\"stop\" timeout=\"15\" />\n"
" <action name=\"status\" timeout=\"20\" />\n"
" <action name=\"monitor\" timeout=\"20\" interval=\"3600\"/>\n"
" <action name=\"meta-data\" timeout=\"15\" />\n"
" </actions>\n"
" <special tag=\"heartbeat\">\n"
" <version>2.0</version>\n" " </special>\n" "</resource-agent>\n";
bool stonith_dispatch(stonith_t * st);
int stonith_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata);
void stonith_perform_callback(stonith_t * stonith, xmlNode * msg, int call_id, int rc);
xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data,
int call_options);
int stonith_send_command(stonith_t * stonith, const char *op, xmlNode * data,
xmlNode ** output_data, int call_options, int timeout);
static void stonith_connection_destroy(gpointer user_data);
static void stonith_send_notification(gpointer data, gpointer user_data);
static int internal_stonith_action_execute(stonith_action_t * action);
static void
stonith_connection_destroy(gpointer user_data)
{
stonith_t *stonith = user_data;
stonith_private_t *native = NULL;
struct notify_blob_s blob;
crm_trace("Sending destroyed notification");
blob.stonith = stonith;
blob.xml = create_xml_node(NULL, "notify");
native = stonith->private;
native->ipc = NULL;
native->source = NULL;
stonith->state = stonith_disconnected;
crm_xml_add(blob.xml, F_TYPE, T_STONITH_NOTIFY);
crm_xml_add(blob.xml, F_SUBTYPE, T_STONITH_NOTIFY_DISCONNECT);
g_list_foreach(native->notify_list, stonith_send_notification, &blob);
free_xml(blob.xml);
}
xmlNode *
create_device_registration_xml(const char *id, const char *namespace, const char *agent,
stonith_key_value_t * params)
{
xmlNode *data = create_xml_node(NULL, F_STONITH_DEVICE);
xmlNode *args = create_xml_node(data, XML_TAG_ATTRS);
#if HAVE_STONITH_STONITH_H
namespace = get_stonith_provider(agent, namespace);
if (safe_str_eq(namespace, "heartbeat")) {
hash2field((gpointer) "plugin", (gpointer) agent, args);
agent = "fence_legacy";
}
#endif
crm_xml_add(data, XML_ATTR_ID, id);
crm_xml_add(data, "origin", __FUNCTION__);
crm_xml_add(data, "agent", agent);
crm_xml_add(data, "namespace", namespace);
for (; params; params = params->next) {
hash2field((gpointer) params->key, (gpointer) params->value, args);
}
return data;
}
static int
stonith_api_register_device(stonith_t * st, int call_options,
const char *id, const char *namespace, const char *agent,
stonith_key_value_t * params)
{
int rc = 0;
xmlNode *data = NULL;
data = create_device_registration_xml(id, namespace, agent, params);
rc = stonith_send_command(st, STONITH_OP_DEVICE_ADD, data, NULL, call_options, 0);
free_xml(data);
return rc;
}
static int
stonith_api_remove_device(stonith_t * st, int call_options, const char *name)
{
int rc = 0;
xmlNode *data = NULL;
data = create_xml_node(NULL, F_STONITH_DEVICE);
crm_xml_add(data, "origin", __FUNCTION__);
crm_xml_add(data, XML_ATTR_ID, name);
rc = stonith_send_command(st, STONITH_OP_DEVICE_DEL, data, NULL, call_options, 0);
free_xml(data);
return rc;
}
static int
stonith_api_remove_level(stonith_t * st, int options, const char *node, int level)
{
int rc = 0;
xmlNode *data = NULL;
data = create_xml_node(NULL, F_STONITH_LEVEL);
crm_xml_add(data, "origin", __FUNCTION__);
crm_xml_add(data, F_STONITH_TARGET, node);
crm_xml_add_int(data, XML_ATTR_ID, level);
rc = stonith_send_command(st, STONITH_OP_LEVEL_DEL, data, NULL, options, 0);
free_xml(data);
return rc;
}
xmlNode *
create_level_registration_xml(const char *node, int level, stonith_key_value_t * device_list)
{
xmlNode *data = create_xml_node(NULL, F_STONITH_LEVEL);
crm_xml_add_int(data, XML_ATTR_ID, level);
crm_xml_add(data, F_STONITH_TARGET, node);
crm_xml_add(data, "origin", __FUNCTION__);
for (; device_list; device_list = device_list->next) {
xmlNode *dev = create_xml_node(data, F_STONITH_DEVICE);
crm_xml_add(dev, XML_ATTR_ID, device_list->value);
}
return data;
}
static int
stonith_api_register_level(stonith_t * st, int options, const char *node, int level,
stonith_key_value_t * device_list)
{
int rc = 0;
xmlNode *data = create_level_registration_xml(node, level, device_list);
rc = stonith_send_command(st, STONITH_OP_LEVEL_ADD, data, NULL, options, 0);
free_xml(data);
return rc;
}
static void
append_arg(gpointer key, gpointer value, gpointer user_data)
{
int len = 3; /* =, \n, \0 */
int last = 0;
char **args = user_data;
CRM_CHECK(key != NULL, return);
CRM_CHECK(value != NULL, return);
if (strstr(key, "pcmk_")) {
return;
} else if (strstr(key, CRM_META)) {
return;
} else if (safe_str_eq(key, "crm_feature_set")) {
return;
}
len += strlen(key);
len += strlen(value);
if (*args != NULL) {
last = strlen(*args);
}
*args = realloc(*args, last + len);
crm_trace("Appending: %s=%s", (char *)key, (char *)value);
sprintf((*args) + last, "%s=%s\n", (char *)key, (char *)value);
}
static void
append_const_arg(const char *key, const char *value, char **arg_list)
{
char *glib_sucks_key = strdup(key);
char *glib_sucks_value = strdup(value);
append_arg(glib_sucks_key, glib_sucks_value, arg_list);
free(glib_sucks_value);
free(glib_sucks_key);
}
static void
append_host_specific_args(const char *victim, const char *map, GHashTable * params, char **arg_list)
{
char *name = NULL;
int last = 0, lpc = 0, max = 0;
if (map == NULL) {
/* The best default there is for now... */
crm_debug("Using default arg map: port=uname");
append_const_arg("port", victim, arg_list);
return;
}
max = strlen(map);
crm_debug("Processing arg map: %s", map);
for (; lpc < max + 1; lpc++) {
if (isalpha(map[lpc])) {
/* keep going */
} else if (map[lpc] == '=' || map[lpc] == ':') {
free(name);
name = calloc(1, 1 + lpc - last);
memcpy(name, map + last, lpc - last);
crm_debug("Got name: %s", name);
last = lpc + 1;
} else if (map[lpc] == 0 || map[lpc] == ',' || isspace(map[lpc])) {
char *param = NULL;
const char *value = NULL;
param = calloc(1, 1 + lpc - last);
memcpy(param, map + last, lpc - last);
last = lpc + 1;
crm_debug("Got key: %s", param);
if (name == NULL) {
crm_err("Misparsed '%s', found '%s' without a name", map, param);
free(param);
continue;
}
if (safe_str_eq(param, "uname")) {
value = victim;
} else {
char *key = crm_meta_name(param);
value = g_hash_table_lookup(params, key);
free(key);
}
if (value) {
crm_debug("Setting '%s'='%s' (%s) for %s", name, value, param, victim);
append_const_arg(name, value, arg_list);
} else {
crm_err("No node attribute '%s' for '%s'", name, victim);
}
free(name);
name = NULL;
free(param);
if (map[lpc] == 0) {
break;
}
} else if (isspace(map[lpc])) {
last = lpc;
}
}
free(name);
}
static char *
make_args(const char *action, const char *victim, uint32_t victim_nodeid, GHashTable * device_args,
GHashTable * port_map)
{
char buffer[512];
char *arg_list = NULL;
const char *value = NULL;
CRM_CHECK(action != NULL, return NULL);
if (device_args) {
g_hash_table_foreach(device_args, append_arg, &arg_list);
}
buffer[511] = 0;
snprintf(buffer, 511, "pcmk_%s_action", action);
if (device_args) {
value = g_hash_table_lookup(device_args, buffer);
}
if (value == NULL && device_args) {
/* Legacy support for early 1.1 releases - Remove for 1.4 */
snprintf(buffer, 511, "pcmk_%s_cmd", action);
value = g_hash_table_lookup(device_args, buffer);
}
if (value == NULL && device_args && safe_str_eq(action, "off")) {
/* Legacy support for late 1.1 releases - Remove for 1.4 */
value = g_hash_table_lookup(device_args, "pcmk_poweroff_action");
}
if (value) {
crm_info("Substituting action '%s' for requested operation '%s'", value, action);
action = value;
}
append_const_arg(STONITH_ATTR_ACTION_OP, action, &arg_list);
if (victim && device_args) {
const char *alias = victim;
const char *param = g_hash_table_lookup(device_args, STONITH_ATTR_HOSTARG);
if (port_map && g_hash_table_lookup(port_map, victim)) {
alias = g_hash_table_lookup(port_map, victim);
}
/* Always supply the node's name too:
* https://fedorahosted.org/cluster/wiki/FenceAgentAPI
*/
append_const_arg("nodename", victim, &arg_list);
if (victim_nodeid) {
char nodeid_str[33] = { 0, };
if (snprintf(nodeid_str, 33, "%u", (unsigned int)victim_nodeid)) {
crm_info("For stonith action (%s) for victim %s, adding nodeid (%d) to parameters",
action, victim, nodeid_str);
append_const_arg("nodeid", nodeid_str, &arg_list);
}
}
/* Check if we need to supply the victim in any other form */
if (param == NULL) {
const char *map = g_hash_table_lookup(device_args, STONITH_ATTR_ARGMAP);
if (map == NULL) {
param = "port";
value = g_hash_table_lookup(device_args, param);
} else {
/* Legacy handling */
append_host_specific_args(alias, map, device_args, &arg_list);
value = map; /* Nothing more to do */
}
} else if (safe_str_eq(param, "none")) {
value = param; /* Nothing more to do */
} else {
value = g_hash_table_lookup(device_args, param);
}
/* Don't overwrite explictly set values for $param */
if (value == NULL || safe_str_eq(value, "dynamic")) {
crm_debug("Performing %s action for node '%s' as '%s=%s'", action, victim, param,
alias);
append_const_arg(param, alias, &arg_list);
}
}
return arg_list;
}
static gboolean
st_child_term(gpointer data)
{
int rc = 0;
stonith_action_t *track = data;
crm_info("Child %d timed out, sending SIGTERM", track->pid);
track->timer_sigterm = 0;
track->last_timeout_signo = SIGTERM;
rc = kill(track->pid, SIGTERM);
if (rc < 0) {
crm_perror(LOG_ERR, "Couldn't send SIGTERM to %d", track->pid);
}
return FALSE;
}
static gboolean
st_child_kill(gpointer data)
{
int rc = 0;
stonith_action_t *track = data;
crm_info("Child %d timed out, sending SIGKILL", track->pid);
track->timer_sigkill = 0;
track->last_timeout_signo = SIGKILL;
rc = kill(track->pid, SIGKILL);
if (rc < 0) {
crm_perror(LOG_ERR, "Couldn't send SIGKILL to %d", track->pid);
}
return FALSE;
}
static void
stonith_action_clear_tracking_data(stonith_action_t * action)
{
if (action->timer_sigterm > 0) {
g_source_remove(action->timer_sigterm);
action->timer_sigterm = 0;
}
if (action->timer_sigkill > 0) {
g_source_remove(action->timer_sigkill);
action->timer_sigkill = 0;
}
if (action->fd_stdout) {
close(action->fd_stdout);
action->fd_stdout = 0;
}
free(action->output);
action->output = NULL;
action->rc = 0;
action->pid = 0;
action->last_timeout_signo = 0;
}
static void
stonith_action_destroy(stonith_action_t * action)
{
stonith_action_clear_tracking_data(action);
free(action->agent);
free(action->args);
free(action->action);
free(action->victim);
free(action);
}
#define FAILURE_MAX_RETRIES 2
stonith_action_t *
stonith_action_create(const char *agent,
const char *_action,
const char *victim,
uint32_t victim_nodeid,
int timeout, GHashTable * device_args, GHashTable * port_map)
{
stonith_action_t *action;
action = calloc(1, sizeof(stonith_action_t));
crm_info("Initiating action %s for agent %s (target=%s)", _action, agent, victim);
action->args = make_args(_action, victim, victim_nodeid, device_args, port_map);
action->agent = strdup(agent);
action->action = strdup(_action);
if (victim) {
action->victim = strdup(victim);
}
action->timeout = action->remaining_timeout = timeout;
action->max_retries = FAILURE_MAX_RETRIES;
if (device_args) {
char buffer[512];
const char *value = NULL;
snprintf(buffer, 511, "pcmk_%s_retries", _action);
value = g_hash_table_lookup(device_args, buffer);
if (value) {
action->max_retries = atoi(value);
}
}
return action;
}
#define READ_MAX 500
static char *
read_output(int fd)
{
char buffer[READ_MAX];
char *output = NULL;
int len = 0;
int more = 0;
if (!fd) {
return NULL;
}
do {
errno = 0;
memset(&buffer, 0, READ_MAX);
more = read(fd, buffer, READ_MAX - 1);
if (more > 0) {
crm_trace("Got %d more bytes: %.200s...", more, buffer);
output = realloc(output, len + more + 1);
snprintf(output + len, more + 1, "%s", buffer);
len += more;
}
} while (more == (READ_MAX - 1) || (more < 0 && errno == EINTR));
return output;
}
static gboolean
update_remaining_timeout(stonith_action_t * action)
{
int diff = time(NULL) - action->initial_start_time;
if (action->tries >= action->max_retries) {
crm_info("Attempted to execute agent %s (%s) the maximum number of times (%d) allowed",
action->agent, action->action, action->max_retries);
action->remaining_timeout = 0;
} else if ((action->rc != -ETIME) && diff < (action->timeout * 0.7)) {
/* only set remaining timeout period if there is 30%
* or greater of the original timeout period left */
action->remaining_timeout = action->timeout - diff;
} else {
action->remaining_timeout = 0;
}
return action->remaining_timeout ? TRUE : FALSE;
}
static void
-stonith_action_async_done(GPid pid, gint status, gpointer user_data)
+stonith_action_async_done(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)
{
- int rc = -pcmk_err_generic;
- stonith_action_t *action = user_data;
+ stonith_action_t *action = mainloop_child_userdata(p);
if (action->timer_sigterm > 0) {
g_source_remove(action->timer_sigterm);
}
if (action->timer_sigkill > 0) {
g_source_remove(action->timer_sigkill);
}
if (action->last_timeout_signo) {
- rc = -ETIME;
+ action->rc = -ETIME;
crm_notice("Child process %d performing action '%s' timed out with signal %d",
pid, action->action, action->last_timeout_signo);
- } else if (WIFSIGNALED(status)) {
- int signo = WTERMSIG(status);
- rc = -ECONNABORTED;
+ } else if (signo) {
+ action->rc = -ECONNABORTED;
crm_notice("Child process %d performing action '%s' timed out with signal %d",
pid, action->action, signo);
- } else if (WIFEXITED(status)) {
- rc = WEXITSTATUS(status);
+
+ } else {
+ action->rc = exitcode;
crm_debug("Child process %d performing action '%s' exited with rc %d",
- pid, action->action, rc);
+ pid, action->action, exitcode);
}
- action->rc = rc;
action->output = read_output(action->fd_stdout);
if (action->rc != pcmk_ok && update_remaining_timeout(action)) {
- rc = internal_stonith_action_execute(action);
+ int rc = internal_stonith_action_execute(action);
if (rc == pcmk_ok) {
return;
}
}
if (action->done_cb) {
action->done_cb(pid, action->rc, action->output, action->userdata);
}
stonith_action_destroy(action);
}
static int
internal_stonith_action_execute(stonith_action_t * action)
{
int pid, status, len, rc = -EPROTO;
int ret;
int total = 0;
int p_read_fd, p_write_fd; /* parent read/write file descriptors */
int c_read_fd, c_write_fd; /* child read/write file descriptors */
int fd1[2];
int fd2[2];
int is_retry = 0;
/* clear any previous tracking data */
stonith_action_clear_tracking_data(action);
if (!action->tries) {
action->initial_start_time = time(NULL);
}
action->tries++;
if (action->tries > 1) {
crm_info("Attempt %d to execute %s (%s). remaining timeout is %d",
action->tries, action->agent, action->action, action->remaining_timeout);
is_retry = 1;
}
c_read_fd = c_write_fd = p_read_fd = p_write_fd = -1;
if (action->args == NULL || action->agent == NULL)
goto fail;
len = strlen(action->args);
if (pipe(fd1))
goto fail;
p_read_fd = fd1[0];
c_write_fd = fd1[1];
if (pipe(fd2))
goto fail;
c_read_fd = fd2[0];
p_write_fd = fd2[1];
crm_debug("forking");
pid = fork();
if (pid < 0) {
rc = -ECHILD;
goto fail;
}
if (!pid) {
/* child */
close(1);
/* coverity[leaked_handle] False positive */
if (dup(c_write_fd) < 0)
goto fail;
close(2);
/* coverity[leaked_handle] False positive */
if (dup(c_write_fd) < 0)
goto fail;
close(0);
/* coverity[leaked_handle] False positive */
if (dup(c_read_fd) < 0)
goto fail;
/* keep c_write_fd open so parent can report all errors. */
close(c_read_fd);
close(p_read_fd);
close(p_write_fd);
/* keep retries from executing out of control */
if (is_retry) {
sleep(1);
}
execlp(action->agent, action->agent, NULL);
exit(EXIT_FAILURE);
}
/* parent */
action->pid = pid;
ret = fcntl(p_read_fd, F_SETFL, fcntl(p_read_fd, F_GETFL, 0) | O_NONBLOCK);
if (ret < 0) {
crm_perror(LOG_NOTICE, "Could not change the output of %s to be non-blocking",
action->agent);
}
do {
crm_debug("sending args");
ret = write(p_write_fd, action->args + total, len - total);
if (ret > 0) {
total += ret;
}
} while (errno == EINTR && total < len);
if (total != len) {
crm_perror(LOG_ERR, "Sent %d not %d bytes", total, len);
if (ret >= 0) {
rc = -ECOMM;
}
goto fail;
}
close(p_write_fd);
/* async */
if (action->async) {
action->fd_stdout = p_read_fd;
- g_child_watch_add(pid, stonith_action_async_done, action);
+ mainloop_child_add(pid, 0/* Move the timeout here? */, action->action, action, stonith_action_async_done);
crm_trace("Op: %s on %s, pid: %d, timeout: %ds", action->action, action->agent, pid,
action->remaining_timeout);
action->last_timeout_signo = 0;
if (action->remaining_timeout) {
action->timer_sigterm =
g_timeout_add(1000 * action->remaining_timeout, st_child_term, action);
action->timer_sigkill =
g_timeout_add(1000 * (action->remaining_timeout + 5), st_child_kill, action);
} else {
crm_err("No timeout set for stonith operation %s with device %s",
action->action, action->agent);
}
close(c_write_fd);
close(c_read_fd);
return 0;
} else {
/* sync */
int timeout = action->remaining_timeout + 1;
pid_t p = 0;
while (action->remaining_timeout < 0 || timeout > 0) {
p = waitpid(pid, &status, WNOHANG);
if (p > 0) {
break;
}
sleep(1);
timeout--;
}
if (timeout == 0) {
int killrc = kill(pid, 9 /*SIGKILL*/);
if (killrc && errno != ESRCH) {
crm_err("kill(%d, KILL) failed: %s (%d)", pid, pcmk_strerror(errno), errno);
}
p = waitpid(pid, &status, WNOHANG);
}
if (p <= 0) {
crm_perror(LOG_ERR, "waitpid(%d)", pid);
} else if (p != pid) {
crm_err("Waited for %d, got %d", pid, p);
}
action->output = read_output(p_read_fd);
action->rc = -ECONNABORTED;
rc = action->rc;
if (timeout == 0) {
action->rc = -ETIME;
} else if (WIFEXITED(status)) {
crm_debug("result = %d", WEXITSTATUS(status));
action->rc = -WEXITSTATUS(status);
rc = 0;
} else if (WIFSIGNALED(status)) {
crm_err("call %s for %s exited due to signal %d", action->action, action->agent,
WTERMSIG(status));
} else {
crm_err("call %s for %s exited abnormally. stopped=%d, continued=%d",
action->action, action->agent, WIFSTOPPED(status), WIFCONTINUED(status));
}
}
fail:
if (p_read_fd >= 0) {
close(p_read_fd);
}
if (p_write_fd >= 0) {
close(p_write_fd);
}
if (c_read_fd >= 0) {
close(c_read_fd);
}
if (c_write_fd >= 0) {
close(c_write_fd);
}
return rc;
}
GPid
stonith_action_execute_async(stonith_action_t * action,
void *userdata,
void (*done) (GPid pid, int rc, const char *output,
gpointer user_data))
{
int rc = 0;
if (!action) {
return -1;
}
action->userdata = userdata;
action->done_cb = done;
action->async = 1;
rc = internal_stonith_action_execute(action);
return rc < 0 ? rc : action->pid;
}
int
stonith_action_execute(stonith_action_t * action, int *agent_result, char **output)
{
int rc = 0;
if (!action) {
return -1;
}
do {
rc = internal_stonith_action_execute(action);
if (rc == pcmk_ok) {
/* success! */
break;
}
/* keep retrying while we have time left */
} while (update_remaining_timeout(action));
if (rc) {
/* error */
return rc;
}
if (agent_result) {
*agent_result = action->rc;
}
if (output) {
*output = action->output;
action->output = NULL; /* handed it off, do not free */
}
stonith_action_destroy(action);
return rc;
}
static int
stonith_api_device_list(stonith_t * stonith, int call_options, const char *namespace,
stonith_key_value_t ** devices, int timeout)
{
int count = 0;
if (devices == NULL) {
crm_err("Parameter error: stonith_api_device_list");
return -EFAULT;
}
/* Include Heartbeat agents */
if (namespace == NULL || safe_str_eq("heartbeat", namespace)) {
#if HAVE_STONITH_STONITH_H
static gboolean need_init = TRUE;
char **entry = NULL;
char **type_list = NULL;
static char **(*type_list_fn) (void) = NULL;
static void (*type_free_fn) (char **) = NULL;
if (need_init) {
need_init = FALSE;
type_list_fn =
find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_types", FALSE);
type_free_fn =
find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_free_hostlist",
FALSE);
}
if (type_list_fn) {
type_list = (*type_list_fn) ();
}
for (entry = type_list; entry != NULL && *entry; ++entry) {
crm_trace("Added: %s", *entry);
*devices = stonith_key_value_add(*devices, NULL, *entry);
count++;
}
if (type_list && type_free_fn) {
(*type_free_fn) (type_list);
}
#else
if (namespace != NULL) {
return -EINVAL; /* Heartbeat agents not supported */
}
#endif
}
/* Include Red Hat agents, basically: ls -1 @sbin_dir@/fence_* */
if (namespace == NULL || safe_str_eq("redhat", namespace)) {
struct dirent **namelist;
int file_num = scandir(RH_STONITH_DIR, &namelist, 0, alphasort);
if (file_num > 0) {
struct stat prop;
char buffer[FILENAME_MAX + 1];
while (file_num--) {
if ('.' == namelist[file_num]->d_name[0]) {
free(namelist[file_num]);
continue;
} else if (0 != strncmp(RH_STONITH_PREFIX,
namelist[file_num]->d_name, strlen(RH_STONITH_PREFIX))) {
free(namelist[file_num]);
continue;
}
snprintf(buffer, FILENAME_MAX, "%s/%s", RH_STONITH_DIR, namelist[file_num]->d_name);
if (stat(buffer, &prop) == 0 && S_ISREG(prop.st_mode)) {
*devices = stonith_key_value_add(*devices, NULL, namelist[file_num]->d_name);
count++;
}
free(namelist[file_num]);
}
free(namelist);
}
}
return count;
}
#if HAVE_STONITH_STONITH_H
static inline char *
strdup_null(const char *val)
{
if (val) {
return strdup(val);
}
return NULL;
}
static void
stonith_plugin(int priority, const char *fmt, ...)
G_GNUC_PRINTF(2, 3);
static void
stonith_plugin(int priority, const char *fmt, ...)
{
va_list args;
char *str;
int err = errno;
va_start(args, fmt);
str = g_strdup_vprintf(fmt, args);
va_end(args);
do_crm_log_alias(priority, __FILE__, __func__, __LINE__, "%s", str);
g_free(str);
errno = err;
}
#endif
static int
stonith_api_device_metadata(stonith_t * stonith, int call_options, const char *agent,
const char *namespace, char **output, int timeout)
{
int rc = 0;
char *buffer = NULL;
const char *provider = get_stonith_provider(agent, namespace);
crm_trace("looking up %s/%s metadata", agent, provider);
/* By having this in a library, we can access it from stonith_admin
* when neither lrmd or stonith-ng are running
* Important for the crm shell's validations...
*/
if (safe_str_eq(provider, "redhat")) {
stonith_action_t *action = stonith_action_create(agent, "metadata", NULL, 0, 5, NULL, NULL);
int exec_rc = stonith_action_execute(action, &rc, &buffer);
if (exec_rc < 0 || rc != 0 || buffer == NULL) {
crm_debug("Query failed: %d %d: %s", exec_rc, rc, crm_str(buffer));
free(buffer); /* Just in case */
return -EINVAL;
} else {
xmlNode *xml = string2xml(buffer);
xmlNode *actions = NULL;
xmlXPathObject *xpathObj = NULL;
xpathObj = xpath_search(xml, "//actions");
if (xpathObj && xpathObj->nodesetval->nodeNr > 0) {
actions = getXpathResult(xpathObj, 0);
}
if (xpathObj) {
xmlXPathFreeObject(xpathObj);
}
/* Now fudge the metadata so that the start/stop actions appear */
xpathObj = xpath_search(xml, "//action[@name='stop']");
if (xpathObj == NULL || xpathObj->nodesetval->nodeNr <= 0) {
xmlNode *tmp = NULL;
tmp = create_xml_node(actions, "action");
crm_xml_add(tmp, "name", "stop");
crm_xml_add(tmp, "timeout", "20s");
tmp = create_xml_node(actions, "action");
crm_xml_add(tmp, "name", "start");
crm_xml_add(tmp, "timeout", "20s");
}
if (xpathObj) {
xmlXPathFreeObject(xpathObj);
}
/* Now fudge the metadata so that the port isn't required in the configuration */
xpathObj = xpath_search(xml, "//parameter[@name='port']");
if (xpathObj && xpathObj->nodesetval->nodeNr > 0) {
/* We'll fill this in */
xmlNode *tmp = getXpathResult(xpathObj, 0);
crm_xml_add(tmp, "required", "0");
}
if (xpathObj) {
xmlXPathFreeObject(xpathObj);
}
free(buffer);
buffer = dump_xml_formatted(xml);
free_xml(xml);
if (!buffer) {
return -EINVAL;
}
}
} else {
#if !HAVE_STONITH_STONITH_H
return -EINVAL; /* Heartbeat agents not supported */
#else
int bufferlen = 0;
static const char *no_parameter_info = "<!-- no value -->";
Stonith *stonith_obj = NULL;
static gboolean need_init = TRUE;
static Stonith *(*st_new_fn) (const char *) = NULL;
static const char *(*st_info_fn) (Stonith *, int) = NULL;
static void (*st_del_fn) (Stonith *) = NULL;
static void (*st_log_fn) (Stonith *, PILLogFun) = NULL;
if (need_init) {
need_init = FALSE;
st_new_fn =
find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_new", FALSE);
st_del_fn =
find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_delete",
FALSE);
st_log_fn =
find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_set_log",
FALSE);
st_info_fn =
find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_get_info",
FALSE);
}
if (lha_agents_lib && st_new_fn && st_del_fn && st_info_fn && st_log_fn) {
char *xml_meta_longdesc = NULL;
char *xml_meta_shortdesc = NULL;
char *meta_param = NULL;
char *meta_longdesc = NULL;
char *meta_shortdesc = NULL;
stonith_obj = (*st_new_fn) (agent);
if (stonith_obj) {
(*st_log_fn) (stonith_obj, (PILLogFun) & stonith_plugin);
meta_longdesc = strdup_null((*st_info_fn) (stonith_obj, ST_DEVICEDESCR));
if (meta_longdesc == NULL) {
crm_warn("no long description in %s's metadata.", agent);
meta_longdesc = strdup(no_parameter_info);
}
meta_shortdesc = strdup_null((*st_info_fn) (stonith_obj, ST_DEVICEID));
if (meta_shortdesc == NULL) {
crm_warn("no short description in %s's metadata.", agent);
meta_shortdesc = strdup(no_parameter_info);
}
meta_param = strdup_null((*st_info_fn) (stonith_obj, ST_CONF_XML));
if (meta_param == NULL) {
crm_warn("no list of parameters in %s's metadata.", agent);
meta_param = strdup(no_parameter_info);
}
(*st_del_fn) (stonith_obj);
} else {
return -EINVAL; /* Heartbeat agents not supported */
}
xml_meta_longdesc =
(char *)xmlEncodeEntitiesReentrant(NULL, (const unsigned char *)meta_longdesc);
xml_meta_shortdesc =
(char *)xmlEncodeEntitiesReentrant(NULL, (const unsigned char *)meta_shortdesc);
bufferlen = strlen(META_TEMPLATE) + strlen(agent)
+ strlen(xml_meta_longdesc) + strlen(xml_meta_shortdesc)
+ strlen(meta_param) + 1;
buffer = calloc(1, bufferlen);
snprintf(buffer, bufferlen - 1, META_TEMPLATE,
agent, xml_meta_longdesc, xml_meta_shortdesc, meta_param);
xmlFree(xml_meta_longdesc);
xmlFree(xml_meta_shortdesc);
free(meta_shortdesc);
free(meta_longdesc);
free(meta_param);
}
#endif
}
if (output) {
*output = buffer;
} else {
free(buffer);
}
return rc;
}
static int
stonith_api_query(stonith_t * stonith, int call_options, const char *target,
stonith_key_value_t ** devices, int timeout)
{
int rc = 0, lpc = 0, max = 0;
xmlNode *data = NULL;
xmlNode *output = NULL;
xmlXPathObjectPtr xpathObj = NULL;
CRM_CHECK(devices != NULL, return -EINVAL);
data = create_xml_node(NULL, F_STONITH_DEVICE);
crm_xml_add(data, "origin", __FUNCTION__);
crm_xml_add(data, F_STONITH_TARGET, target);
crm_xml_add(data, F_STONITH_ACTION, "off");
rc = stonith_send_command(stonith, STONITH_OP_QUERY, data, &output, call_options, timeout);
if (rc < 0) {
return rc;
}
xpathObj = xpath_search(output, "//@agent");
if (xpathObj) {
max = xpathObj->nodesetval->nodeNr;
for (lpc = 0; lpc < max; lpc++) {
xmlNode *match = getXpathResult(xpathObj, lpc);
CRM_CHECK(match != NULL, continue);
crm_info("%s[%d] = %s", "//@agent", lpc, xmlGetNodePath(match));
*devices = stonith_key_value_add(*devices, NULL, crm_element_value(match, XML_ATTR_ID));
}
xmlXPathFreeObject(xpathObj);
}
free_xml(output);
free_xml(data);
return max;
}
static int
stonith_api_call(stonith_t * stonith,
int call_options,
const char *id,
const char *action, const char *victim, int timeout, xmlNode ** output)
{
int rc = 0;
xmlNode *data = NULL;
data = create_xml_node(NULL, F_STONITH_DEVICE);
crm_xml_add(data, "origin", __FUNCTION__);
crm_xml_add(data, F_STONITH_DEVICE, id);
crm_xml_add(data, F_STONITH_ACTION, action);
crm_xml_add(data, F_STONITH_TARGET, victim);
rc = stonith_send_command(stonith, STONITH_OP_EXEC, data, output, call_options, timeout);
free_xml(data);
return rc;
}
static int
stonith_api_list(stonith_t * stonith, int call_options, const char *id, char **list_info,
int timeout)
{
int rc;
xmlNode *output = NULL;
rc = stonith_api_call(stonith, call_options, id, "list", NULL, timeout, &output);
if (output && list_info) {
const char *list_str;
list_str = crm_element_value(output, "st_output");
if (list_str) {
*list_info = strdup(list_str);
}
}
if (output) {
free_xml(output);
}
return rc;
}
static int
stonith_api_monitor(stonith_t * stonith, int call_options, const char *id, int timeout)
{
return stonith_api_call(stonith, call_options, id, "monitor", NULL, timeout, NULL);
}
static int
stonith_api_status(stonith_t * stonith, int call_options, const char *id, const char *port,
int timeout)
{
return stonith_api_call(stonith, call_options, id, "status", port, timeout, NULL);
}
static int
stonith_api_fence(stonith_t * stonith, int call_options, const char *node, const char *action,
int timeout, int tolerance)
{
int rc = 0;
xmlNode *data = NULL;
data = create_xml_node(NULL, __FUNCTION__);
crm_xml_add(data, F_STONITH_TARGET, node);
crm_xml_add(data, F_STONITH_ACTION, action);
crm_xml_add_int(data, F_STONITH_TIMEOUT, timeout);
crm_xml_add_int(data, F_STONITH_TOLERANCE, tolerance);
rc = stonith_send_command(stonith, STONITH_OP_FENCE, data, NULL, call_options, timeout);
free_xml(data);
return rc;
}
static int
stonith_api_confirm(stonith_t * stonith, int call_options, const char *target)
{
return stonith_api_fence(stonith, call_options | st_opt_manual_ack, target, "off", 0, 0);
}
static int
stonith_api_history(stonith_t * stonith, int call_options, const char *node,
stonith_history_t ** history, int timeout)
{
int rc = 0;
xmlNode *data = NULL;
xmlNode *output = NULL;
stonith_history_t *last = NULL;
*history = NULL;
if (node) {
data = create_xml_node(NULL, __FUNCTION__);
crm_xml_add(data, F_STONITH_TARGET, node);
}
rc = stonith_send_command(stonith, STONITH_OP_FENCE_HISTORY, data, &output,
call_options | st_opt_sync_call, timeout);
free_xml(data);
if (rc == 0) {
xmlNode *op = NULL;
xmlNode *reply = get_xpath_object("//" F_STONITH_HISTORY_LIST, output, LOG_ERR);
for (op = __xml_first_child(reply); op != NULL; op = __xml_next(op)) {
stonith_history_t *kvp;
kvp = calloc(1, sizeof(stonith_history_t));
kvp->target = crm_element_value_copy(op, F_STONITH_TARGET);
kvp->action = crm_element_value_copy(op, F_STONITH_ACTION);
kvp->origin = crm_element_value_copy(op, F_STONITH_ORIGIN);
kvp->delegate = crm_element_value_copy(op, F_STONITH_DELEGATE);
crm_element_value_int(op, F_STONITH_DATE, &kvp->completed);
crm_element_value_int(op, F_STONITH_STATE, &kvp->state);
if (last) {
last->next = kvp;
} else {
*history = kvp;
}
last = kvp;
}
}
return rc;
}
gboolean
is_redhat_agent(const char *agent)
{
int rc = 0;
struct stat prop;
char buffer[FILENAME_MAX + 1];
snprintf(buffer, FILENAME_MAX, "%s/%s", RH_STONITH_DIR, agent);
rc = stat(buffer, &prop);
if (rc >= 0 && S_ISREG(prop.st_mode)) {
return TRUE;
}
return FALSE;
}
const char *
get_stonith_provider(const char *agent, const char *provider)
{
/* This function sucks */
if (is_redhat_agent(agent)) {
return "redhat";
#if HAVE_STONITH_STONITH_H
} else {
Stonith *stonith_obj = NULL;
static gboolean need_init = TRUE;
static Stonith *(*st_new_fn) (const char *) = NULL;
static void (*st_del_fn) (Stonith *) = NULL;
if (need_init) {
need_init = FALSE;
st_new_fn =
find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_new", FALSE);
st_del_fn =
find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_delete",
FALSE);
}
if (lha_agents_lib && st_new_fn && st_del_fn) {
stonith_obj = (*st_new_fn) (agent);
if (stonith_obj) {
(*st_del_fn) (stonith_obj);
return "heartbeat";
}
}
#endif
}
crm_err("No such device: %s", agent);
return NULL;
}
static gint
stonithlib_GCompareFunc(gconstpointer a, gconstpointer b)
{
int rc = 0;
const stonith_notify_client_t *a_client = a;
const stonith_notify_client_t *b_client = b;
CRM_CHECK(a_client->event != NULL && b_client->event != NULL, return 0);
rc = strcmp(a_client->event, b_client->event);
if (rc == 0) {
if (a_client->notify == NULL || b_client->notify == NULL) {
return 0;
} else if (a_client->notify == b_client->notify) {
return 0;
} else if (((long)a_client->notify) < ((long)b_client->notify)) {
crm_err("callbacks for %s are not equal: %p vs. %p",
a_client->event, a_client->notify, b_client->notify);
return -1;
}
crm_err("callbacks for %s are not equal: %p vs. %p",
a_client->event, a_client->notify, b_client->notify);
return 1;
}
return rc;
}
xmlNode *
stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data, int call_options)
{
xmlNode *op_msg = create_xml_node(NULL, "stonith_command");
CRM_CHECK(op_msg != NULL, return NULL);
CRM_CHECK(token != NULL, return NULL);
crm_xml_add(op_msg, F_XML_TAGNAME, "stonith_command");
crm_xml_add(op_msg, F_TYPE, T_STONITH_NG);
crm_xml_add(op_msg, F_STONITH_CALLBACK_TOKEN, token);
crm_xml_add(op_msg, F_STONITH_OPERATION, op);
crm_xml_add_int(op_msg, F_STONITH_CALLID, call_id);
crm_trace("Sending call options: %.8lx, %d", (long)call_options, call_options);
crm_xml_add_int(op_msg, F_STONITH_CALLOPTS, call_options);
if (data != NULL) {
add_message_xml(op_msg, F_STONITH_CALLDATA, data);
}
return op_msg;
}
static void
stonith_destroy_op_callback(gpointer data)
{
stonith_callback_client_t *blob = data;
if (blob->timer && blob->timer->ref > 0) {
g_source_remove(blob->timer->ref);
}
free(blob->timer);
free(blob);
}
static int
stonith_api_signoff(stonith_t * stonith)
{
stonith_private_t *native = stonith->private;
crm_debug("Signing out of the STONITH Service");
if (native->source != NULL) {
/* Attached to mainloop */
mainloop_del_ipc_client(native->source);
native->source = NULL;
native->ipc = NULL;
} else if (native->ipc) {
/* Not attached to mainloop */
crm_ipc_t *ipc = native->ipc;
native->ipc = NULL;
crm_ipc_close(ipc);
crm_ipc_destroy(ipc);
}
stonith->state = stonith_disconnected;
return pcmk_ok;
}
static int
stonith_api_signon(stonith_t * stonith, const char *name, int *stonith_fd)
{
int rc = pcmk_ok;
stonith_private_t *native = stonith->private;
static struct ipc_client_callbacks st_callbacks = {
.dispatch = stonith_dispatch_internal,
.destroy = stonith_connection_destroy
};
crm_trace("Connecting command channel");
stonith->state = stonith_connected_command;
if (stonith_fd) {
/* No mainloop */
native->ipc = crm_ipc_new("stonith-ng", 0);
if (native->ipc && crm_ipc_connect(native->ipc)) {
*stonith_fd = crm_ipc_get_fd(native->ipc);
} else if (native->ipc) {
rc = -ENOTCONN;
}
} else {
/* With mainloop */
native->source =
mainloop_add_ipc_client("stonith-ng", G_PRIORITY_MEDIUM, 0, stonith, &st_callbacks);
native->ipc = mainloop_get_ipc_client(native->source);
}
if (native->ipc == NULL) {
crm_debug("Could not connect to the Stonith API");
rc = -ENOTCONN;
}
if (rc == pcmk_ok) {
xmlNode *reply = NULL;
xmlNode *hello = create_xml_node(NULL, "stonith_command");
crm_xml_add(hello, F_TYPE, T_STONITH_NG);
crm_xml_add(hello, F_STONITH_OPERATION, CRM_OP_REGISTER);
crm_xml_add(hello, F_STONITH_CLIENTNAME, name);
rc = crm_ipc_send(native->ipc, hello, crm_ipc_client_response, -1, &reply);
if (rc < 0) {
crm_perror(LOG_DEBUG, "Couldn't complete registration with the fencing API: %d", rc);
rc = -ECOMM;
} else if (reply == NULL) {
crm_err("Did not receive registration reply");
rc = -EPROTO;
} else {
const char *msg_type = crm_element_value(reply, F_STONITH_OPERATION);
const char *tmp_ticket = crm_element_value(reply, F_STONITH_CLIENTID);
if (safe_str_neq(msg_type, CRM_OP_REGISTER)) {
crm_err("Invalid registration message: %s", msg_type);
crm_log_xml_err(reply, "Bad reply");
rc = -EPROTO;
} else if (tmp_ticket == NULL) {
crm_err("No registration token provided");
crm_log_xml_err(reply, "Bad reply");
rc = -EPROTO;
} else {
crm_trace("Obtained registration token: %s", tmp_ticket);
native->token = strdup(tmp_ticket);
rc = pcmk_ok;
}
}
free_xml(reply);
free_xml(hello);
}
if (rc == pcmk_ok) {
#if HAVE_MSGFROMIPC_TIMEOUT
stonith->call_timeout = MAX_IPC_DELAY;
#endif
crm_debug("Connection to STONITH successful");
return pcmk_ok;
}
crm_debug("Connection to STONITH failed: %s", pcmk_strerror(rc));
stonith->cmds->disconnect(stonith);
return rc;
}
static int
stonith_set_notification(stonith_t * stonith, const char *callback, int enabled)
{
xmlNode *notify_msg = create_xml_node(NULL, __FUNCTION__);
stonith_private_t *native = stonith->private;
if (stonith->state != stonith_disconnected) {
int rc;
crm_xml_add(notify_msg, F_STONITH_OPERATION, T_STONITH_NOTIFY);
if (enabled) {
crm_xml_add(notify_msg, F_STONITH_NOTIFY_ACTIVATE, callback);
} else {
crm_xml_add(notify_msg, F_STONITH_NOTIFY_DEACTIVATE, callback);
}
rc = crm_ipc_send(native->ipc, notify_msg, crm_ipc_client_response, -1, NULL);
if (rc < 0) {
crm_perror(LOG_DEBUG, "Couldn't register for fencing notifications: %d", rc);
rc = -ECOMM;
}
}
free_xml(notify_msg);
return pcmk_ok;
}
static int
stonith_api_add_notification(stonith_t * stonith, const char *event,
void (*callback) (stonith_t * stonith, stonith_event_t * e))
{
GList *list_item = NULL;
stonith_notify_client_t *new_client = NULL;
stonith_private_t *private = NULL;
private = stonith->private;
crm_trace("Adding callback for %s events (%d)", event, g_list_length(private->notify_list));
new_client = calloc(1, sizeof(stonith_notify_client_t));
new_client->event = event;
new_client->notify = callback;
list_item = g_list_find_custom(private->notify_list, new_client, stonithlib_GCompareFunc);
if (list_item != NULL) {
crm_warn("Callback already present");
free(new_client);
return -ENOTUNIQ;
} else {
private->notify_list = g_list_append(private->notify_list, new_client);
stonith_set_notification(stonith, event, 1);
crm_trace("Callback added (%d)", g_list_length(private->notify_list));
}
return pcmk_ok;
}
static int
stonith_api_del_notification(stonith_t * stonith, const char *event)
{
GList *list_item = NULL;
stonith_notify_client_t *new_client = NULL;
stonith_private_t *private = NULL;
crm_debug("Removing callback for %s events", event);
private = stonith->private;
new_client = calloc(1, sizeof(stonith_notify_client_t));
new_client->event = event;
new_client->notify = NULL;
list_item = g_list_find_custom(private->notify_list, new_client, stonithlib_GCompareFunc);
stonith_set_notification(stonith, event, 0);
if (list_item != NULL) {
stonith_notify_client_t *list_client = list_item->data;
private->notify_list = g_list_remove(private->notify_list, list_client);
free(list_client);
crm_trace("Removed callback");
} else {
crm_trace("Callback not present");
}
free(new_client);
return pcmk_ok;
}
static gboolean
stonith_async_timeout_handler(gpointer data)
{
struct timer_rec_s *timer = data;
crm_err("Async call %d timed out after %dms", timer->call_id, timer->timeout);
stonith_perform_callback(timer->stonith, NULL, timer->call_id, -ETIME);
/* Always return TRUE, never remove the handler
* We do that in stonith_del_callback()
*/
return TRUE;
}
static void
set_callback_timeout(stonith_callback_client_t * callback, stonith_t * stonith, int call_id,
int timeout)
{
struct timer_rec_s *async_timer = callback->timer;
if (timeout <= 0) {
return;
}
if (!async_timer) {
async_timer = calloc(1, sizeof(struct timer_rec_s));
callback->timer = async_timer;
}
async_timer->stonith = stonith;
async_timer->call_id = call_id;
/* Allow a fair bit of grace to allow the server to tell us of a timeout
* This is only a fallback
*/
async_timer->timeout = (timeout + 60) * 1000;
if (async_timer->ref) {
g_source_remove(async_timer->ref);
}
async_timer->ref =
g_timeout_add(async_timer->timeout, stonith_async_timeout_handler, async_timer);
}
static void
update_callback_timeout(int call_id, int timeout, stonith_t * st)
{
stonith_callback_client_t *callback = NULL;
stonith_private_t *private = st->private;
callback = g_hash_table_lookup(private->stonith_op_callback_table, GINT_TO_POINTER(call_id));
if (!callback || !callback->allow_timeout_updates) {
return;
}
set_callback_timeout(callback, st, call_id, timeout);
}
static void
invoke_callback(stonith_t * st, int call_id, int rc, void *userdata,
void (*callback) (stonith_t * st, stonith_callback_data_t * data))
{
stonith_callback_data_t data = { 0, };
data.call_id = call_id;
data.rc = rc;
data.userdata = userdata;
callback(st, &data);
}
static int
stonith_api_add_callback(stonith_t * stonith, int call_id, int timeout, int options,
void *user_data, const char *callback_name,
void (*callback) (stonith_t * st, stonith_callback_data_t * data))
{
stonith_callback_client_t *blob = NULL;
stonith_private_t *private = NULL;
CRM_CHECK(stonith != NULL, return -EINVAL);
CRM_CHECK(stonith->private != NULL, return -EINVAL);
private = stonith->private;
if (call_id == 0) {
private->op_callback = callback;
} else if (call_id < 0) {
if (!(options & st_opt_report_only_success)) {
crm_trace("Call failed, calling %s: %s", callback_name, pcmk_strerror(call_id));
invoke_callback(stonith, call_id, call_id, user_data, callback);
} else {
crm_warn("STONITH call failed: %s", pcmk_strerror(call_id));
}
return FALSE;
}
blob = calloc(1, sizeof(stonith_callback_client_t));
blob->id = callback_name;
blob->only_success = (options & st_opt_report_only_success) ? TRUE : FALSE;
blob->user_data = user_data;
blob->callback = callback;
blob->allow_timeout_updates = (options & st_opt_timeout_updates) ? TRUE : FALSE;
if (timeout > 0) {
set_callback_timeout(blob, stonith, call_id, timeout);
}
g_hash_table_insert(private->stonith_op_callback_table, GINT_TO_POINTER(call_id), blob);
crm_trace("Added callback to %s for call %d", callback_name, call_id);
return TRUE;
}
static int
stonith_api_del_callback(stonith_t * stonith, int call_id, bool all_callbacks)
{
stonith_private_t *private = stonith->private;
if (all_callbacks) {
private->op_callback = NULL;
g_hash_table_destroy(private->stonith_op_callback_table);
private->stonith_op_callback_table = g_hash_table_new_full(g_direct_hash, g_direct_equal,
NULL,
stonith_destroy_op_callback);
} else if (call_id == 0) {
private->op_callback = NULL;
} else {
g_hash_table_remove(private->stonith_op_callback_table, GINT_TO_POINTER(call_id));
}
return pcmk_ok;
}
static void
stonith_dump_pending_op(gpointer key, gpointer value, gpointer user_data)
{
int call = GPOINTER_TO_INT(key);
stonith_callback_client_t *blob = value;
crm_debug("Call %d (%s): pending", call, crm_str(blob->id));
}
void
stonith_dump_pending_callbacks(stonith_t * stonith)
{
stonith_private_t *private = stonith->private;
if (private->stonith_op_callback_table == NULL) {
return;
}
return g_hash_table_foreach(private->stonith_op_callback_table, stonith_dump_pending_op, NULL);
}
void
stonith_perform_callback(stonith_t * stonith, xmlNode * msg, int call_id, int rc)
{
stonith_private_t *private = NULL;
stonith_callback_client_t *blob = NULL;
stonith_callback_client_t local_blob;
CRM_CHECK(stonith != NULL, return);
CRM_CHECK(stonith->private != NULL, return);
private = stonith->private;
local_blob.id = NULL;
local_blob.callback = NULL;
local_blob.user_data = NULL;
local_blob.only_success = FALSE;
if (msg != NULL) {
crm_element_value_int(msg, F_STONITH_RC, &rc);
crm_element_value_int(msg, F_STONITH_CALLID, &call_id);
}
CRM_CHECK(call_id > 0, crm_log_xml_err(msg, "Bad result"));
blob = g_hash_table_lookup(private->stonith_op_callback_table, GINT_TO_POINTER(call_id));
if (blob != NULL) {
local_blob = *blob;
blob = NULL;
stonith_api_del_callback(stonith, call_id, FALSE);
} else {
crm_trace("No callback found for call %d", call_id);
local_blob.callback = NULL;
}
if (local_blob.callback != NULL && (rc == pcmk_ok || local_blob.only_success == FALSE)) {
crm_trace("Invoking callback %s for call %d", crm_str(local_blob.id), call_id);
invoke_callback(stonith, call_id, rc, local_blob.user_data, local_blob.callback);
} else if (private->op_callback == NULL && rc != pcmk_ok) {
crm_warn("STONITH command failed: %s", pcmk_strerror(rc));
crm_log_xml_debug(msg, "Failed STONITH Update");
}
if (private->op_callback != NULL) {
crm_trace("Invoking global callback for call %d", call_id);
invoke_callback(stonith, call_id, rc, NULL, private->op_callback);
}
crm_trace("OP callback activated.");
}
/*
<notify t="st_notify" subt="st_device_register" st_op="st_device_register" st_rc="0" >
<st_calldata >
<stonith_command t="stonith-ng" st_async_id="088fb640-431a-48b9-b2fc-c4ff78d0a2d9" st_op="st_device_register" st_callid="2" st_callopt="4096" st_timeout="0" st_clientid="088fb640-431a-48b9-b2fc-c4ff78d0a2d9" st_clientname="stonith-test" >
<st_calldata >
<st_device_id id="test-id" origin="create_device_registration_xml" agent="fence_virsh" namespace="stonith-ng" >
<attributes ipaddr="localhost" pcmk-portmal="some-host=pcmk-1 pcmk-3=3,4" login="root" identity_file="/root/.ssh/id_dsa" />
</st_device_id>
</st_calldata>
</stonith_command>
</st_calldata>
</notify>
<notify t="st_notify" subt="st_notify_fence" st_op="st_notify_fence" st_rc="0" >
<st_calldata >
<st_notify_fence st_rc="0" st_target="some-host" st_op="st_fence" st_delegate="test-id" st_origin="61dd7759-e229-4be7-b1f8-ef49dd14d9f0" />
</st_calldata>
</notify>
*/
static stonith_event_t *
xml_to_event(xmlNode * msg)
{
stonith_event_t *event = calloc(1, sizeof(stonith_event_t));
const char *ntype = crm_element_value(msg, F_SUBTYPE);
char *data_addr = g_strdup_printf("//%s", ntype);
xmlNode *data = get_xpath_object(data_addr, msg, LOG_DEBUG);
crm_log_xml_trace(msg, "stonith_notify");
crm_element_value_int(msg, F_STONITH_RC, &(event->result));
if (safe_str_eq(ntype, T_STONITH_NOTIFY_FENCE)) {
event->operation = crm_element_value_copy(msg, F_STONITH_OPERATION);
if (data) {
event->origin = crm_element_value_copy(data, F_STONITH_ORIGIN);
event->action = crm_element_value_copy(data, F_STONITH_ACTION);
event->target = crm_element_value_copy(data, F_STONITH_TARGET);
event->executioner = crm_element_value_copy(data, F_STONITH_DELEGATE);
event->id = crm_element_value_copy(data, F_STONITH_REMOTE_OP_ID);
event->client_origin = crm_element_value_copy(data, F_STONITH_CLIENTNAME);
} else {
crm_err("No data for %s event", ntype);
crm_log_xml_notice(msg, "BadEvent");
}
}
g_free(data_addr);
return event;
}
static void
event_free(stonith_event_t * event)
{
free(event->id);
free(event->type);
free(event->message);
free(event->operation);
free(event->origin);
free(event->action);
free(event->target);
free(event->executioner);
free(event->device);
free(event->client_origin);
free(event);
}
static void
stonith_send_notification(gpointer data, gpointer user_data)
{
struct notify_blob_s *blob = user_data;
stonith_notify_client_t *entry = data;
stonith_event_t *st_event = NULL;
const char *event = NULL;
if (blob->xml == NULL) {
crm_warn("Skipping callback - NULL message");
return;
}
event = crm_element_value(blob->xml, F_SUBTYPE);
if (entry == NULL) {
crm_warn("Skipping callback - NULL callback client");
return;
} else if (entry->notify == NULL) {
crm_warn("Skipping callback - NULL callback");
return;
} else if (safe_str_neq(entry->event, event)) {
crm_trace("Skipping callback - event mismatch %p/%s vs. %s", entry, entry->event, event);
return;
}
st_event = xml_to_event(blob->xml);
crm_trace("Invoking callback for %p/%s event...", entry, event);
entry->notify(blob->stonith, st_event);
crm_trace("Callback invoked...");
event_free(st_event);
}
int
stonith_send_command(stonith_t * stonith, const char *op, xmlNode * data, xmlNode ** output_data,
int call_options, int timeout)
{
int rc = 0;
int reply_id = -1;
enum crm_ipc_flags ipc_flags = crm_ipc_client_none;
xmlNode *op_msg = NULL;
xmlNode *op_reply = NULL;
stonith_private_t *native = stonith->private;
if (stonith->state == stonith_disconnected) {
return -ENOTCONN;
}
if (output_data != NULL) {
*output_data = NULL;
}
if (op == NULL) {
crm_err("No operation specified");
return -EINVAL;
}
if (call_options & st_opt_sync_call) {
ipc_flags |= crm_ipc_client_response;
}
stonith->call_id++;
/* prevent call_id from being negative (or zero) and conflicting
* with the stonith_errors enum
* use 2 because we use it as (stonith->call_id - 1) below
*/
if (stonith->call_id < 1) {
stonith->call_id = 1;
}
CRM_CHECK(native->token != NULL,;
);
op_msg = stonith_create_op(stonith->call_id, native->token, op, data, call_options);
if (op_msg == NULL) {
return -EINVAL;
}
crm_xml_add_int(op_msg, F_STONITH_TIMEOUT, timeout);
crm_trace("Sending %s message to STONITH service, Timeout: %ds", op, timeout);
rc = crm_ipc_send(native->ipc, op_msg, ipc_flags, 1000 * (timeout + 60), &op_reply);
free_xml(op_msg);
if (rc < 0) {
crm_perror(LOG_ERR, "Couldn't perform %s operation (timeout=%ds): %d", op, timeout, rc);
rc = -ECOMM;
goto done;
}
crm_log_xml_trace(op_reply, "Reply");
if (!(call_options & st_opt_sync_call)) {
crm_trace("Async call %d, returning", stonith->call_id);
CRM_CHECK(stonith->call_id != 0, return -EPROTO);
free_xml(op_reply);
return stonith->call_id;
}
rc = pcmk_ok;
crm_element_value_int(op_reply, F_STONITH_CALLID, &reply_id);
if (reply_id == stonith->call_id) {
crm_trace("Syncronous reply %d received", reply_id);
if (crm_element_value_int(op_reply, F_STONITH_RC, &rc) != 0) {
rc = -ENOMSG;
}
if ((call_options & st_opt_discard_reply) || output_data == NULL) {
crm_trace("Discarding reply");
} else {
*output_data = op_reply;
op_reply = NULL; /* Prevent subsequent free */
}
} else if (reply_id <= 0) {
crm_err("Recieved bad reply: No id set");
crm_log_xml_err(op_reply, "Bad reply");
free_xml(op_reply);
rc = -ENOMSG;
} else {
crm_err("Recieved bad reply: %d (wanted %d)", reply_id, stonith->call_id);
crm_log_xml_err(op_reply, "Old reply");
free_xml(op_reply);
rc = -ENOMSG;
}
done:
if (crm_ipc_connected(native->ipc) == FALSE) {
crm_err("STONITH disconnected");
stonith->state = stonith_disconnected;
}
free_xml(op_reply);
return rc;
}
/* Not used with mainloop */
bool
stonith_dispatch(stonith_t * st)
{
gboolean stay_connected = TRUE;
stonith_private_t *private = NULL;
CRM_ASSERT(st != NULL);
private = st->private;
while (crm_ipc_ready(private->ipc)) {
if (crm_ipc_read(private->ipc) > 0) {
const char *msg = crm_ipc_buffer(private->ipc);
stonith_dispatch_internal(msg, strlen(msg), st);
}
if (crm_ipc_connected(private->ipc) == FALSE) {
crm_err("Connection closed");
stay_connected = FALSE;
}
}
return stay_connected;
}
int
stonith_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata)
{
const char *type = NULL;
struct notify_blob_s blob;
stonith_t *st = userdata;
stonith_private_t *private = NULL;
CRM_ASSERT(st != NULL);
private = st->private;
blob.stonith = st;
blob.xml = string2xml(buffer);
if (blob.xml == NULL) {
crm_warn("Received a NULL msg from STONITH service: %s.", buffer);
return 0;
}
/* do callbacks */
type = crm_element_value(blob.xml, F_TYPE);
crm_trace("Activating %s callbacks...", type);
if (safe_str_eq(type, T_STONITH_NG)) {
stonith_perform_callback(st, blob.xml, 0, 0);
} else if (safe_str_eq(type, T_STONITH_NOTIFY)) {
g_list_foreach(private->notify_list, stonith_send_notification, &blob);
} else if (safe_str_eq(type, T_STONITH_TIMEOUT_VALUE)) {
int call_id = 0;
int timeout = 0;
crm_element_value_int(blob.xml, F_STONITH_TIMEOUT, &timeout);
crm_element_value_int(blob.xml, F_STONITH_CALLID, &call_id);
update_callback_timeout(call_id, timeout, st);
} else {
crm_err("Unknown message type: %s", type);
crm_log_xml_warn(blob.xml, "BadReply");
}
free_xml(blob.xml);
return 1;
}
static int
stonith_api_free(stonith_t * stonith)
{
int rc = pcmk_ok;
if (stonith->state != stonith_disconnected) {
rc = stonith->cmds->disconnect(stonith);
}
if (stonith->state == stonith_disconnected) {
stonith_private_t *private = stonith->private;
g_hash_table_destroy(private->stonith_op_callback_table);
free(private->token);
free(stonith->private);
free(stonith->cmds);
free(stonith);
}
return rc;
}
void
stonith_api_delete(stonith_t * stonith)
{
stonith_private_t *private = stonith->private;
GList *list = private->notify_list;
while (list != NULL) {
stonith_notify_client_t *client = g_list_nth_data(list, 0);
list = g_list_remove(list, client);
free(client);
}
stonith->cmds->free(stonith);
stonith = NULL;
}
stonith_t *
stonith_api_new(void)
{
stonith_t *new_stonith = NULL;
stonith_private_t *private = NULL;
new_stonith = calloc(1, sizeof(stonith_t));
private = calloc(1, sizeof(stonith_private_t));
new_stonith->private = private;
private->stonith_op_callback_table = g_hash_table_new_full(g_direct_hash, g_direct_equal,
NULL, stonith_destroy_op_callback);
private->notify_list = NULL;
new_stonith->call_id = 1;
new_stonith->state = stonith_disconnected;
new_stonith->cmds = calloc(1, sizeof(stonith_api_operations_t));
/* *INDENT-OFF* */
new_stonith->cmds->free = stonith_api_free;
new_stonith->cmds->connect = stonith_api_signon;
new_stonith->cmds->disconnect = stonith_api_signoff;
new_stonith->cmds->list = stonith_api_list;
new_stonith->cmds->monitor = stonith_api_monitor;
new_stonith->cmds->status = stonith_api_status;
new_stonith->cmds->fence = stonith_api_fence;
new_stonith->cmds->confirm = stonith_api_confirm;
new_stonith->cmds->history = stonith_api_history;
new_stonith->cmds->list_agents = stonith_api_device_list;
new_stonith->cmds->metadata = stonith_api_device_metadata;
new_stonith->cmds->query = stonith_api_query;
new_stonith->cmds->remove_device = stonith_api_remove_device;
new_stonith->cmds->register_device = stonith_api_register_device;
new_stonith->cmds->remove_level = stonith_api_remove_level;
new_stonith->cmds->register_level = stonith_api_register_level;
new_stonith->cmds->remove_callback = stonith_api_del_callback;
new_stonith->cmds->register_callback = stonith_api_add_callback;
new_stonith->cmds->remove_notification = stonith_api_del_notification;
new_stonith->cmds->register_notification = stonith_api_add_notification;
/* *INDENT-ON* */
return new_stonith;
}
stonith_key_value_t *
stonith_key_value_add(stonith_key_value_t * head, const char *key, const char *value)
{
stonith_key_value_t *p, *end;
p = calloc(1, sizeof(stonith_key_value_t));
if (key) {
p->key = strdup(key);
}
if (value) {
p->value = strdup(value);
}
end = head;
while (end && end->next) {
end = end->next;
}
if (end) {
end->next = p;
} else {
head = p;
}
return head;
}
void
stonith_key_value_freeall(stonith_key_value_t * head, int keys, int values)
{
stonith_key_value_t *p;
while (head) {
p = head->next;
if (keys) {
free(head->key);
}
if (values) {
free(head->value);
}
free(head);
head = p;
}
}
int
stonith_api_kick(int nodeid, const char *uname, int timeout, bool off)
{
char *name = NULL;
const char *action = "reboot";
int rc = -EPROTO;
stonith_t *st = NULL;
enum stonith_call_options opts = st_opt_sync_call | st_opt_allow_suicide;
st = stonith_api_new();
if (st) {
rc = st->cmds->connect(st, "stonith-api", NULL);
}
if (uname != NULL) {
name = strdup(uname);
} else if (nodeid > 0) {
opts |= st_opt_cs_nodeid;
name = crm_itoa(nodeid);
}
if (off) {
action = "off";
}
if (rc == pcmk_ok) {
rc = st->cmds->fence(st, opts, name, action, timeout, 0);
}
if (st) {
st->cmds->disconnect(st);
stonith_api_delete(st);
}
free(name);
return rc;
}
time_t
stonith_api_time(int nodeid, const char *uname, bool in_progress)
{
int rc = 0;
char *name = NULL;
time_t when = 0;
time_t progress = 0;
stonith_t *st = NULL;
stonith_history_t *history, *hp = NULL;
enum stonith_call_options opts = st_opt_sync_call;
st = stonith_api_new();
if (st) {
rc = st->cmds->connect(st, "stonith-api", NULL);
}
if (uname != NULL) {
name = strdup(uname);
} else if (nodeid > 0) {
opts |= st_opt_cs_nodeid;
name = crm_itoa(nodeid);
}
if (st && rc == pcmk_ok) {
st->cmds->history(st, st_opt_sync_call | st_opt_cs_nodeid, name, &history, 120);
for (hp = history; hp; hp = hp->next) {
if (in_progress) {
if (hp->state != st_done && hp->state != st_failed) {
progress = time(NULL);
}
} else if (hp->state == st_done) {
when = hp->completed;
}
}
}
if (progress) {
when = progress;
}
if (st) {
st->cmds->disconnect(st);
stonith_api_delete(st);
}
free(name);
return when;
}
#if HAVE_STONITH_STONITH_H
# include <pils/plugin.h>
const char *i_hate_pils(int rc);
const char *
i_hate_pils(int rc)
{
return PIL_strerror(rc);
}
#endif
diff --git a/lib/services/services_linux.c b/lib/services/services_linux.c
index 47d511431b..63f9df1d00 100644
--- a/lib/services/services_linux.c
+++ b/lib/services/services_linux.c
@@ -1,593 +1,592 @@
/*
* Copyright (C) 2010 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <crm_internal.h>
#ifndef _GNU_SOURCE
# define _GNU_SOURCE
#endif
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <errno.h>
#include <unistd.h>
#include <dirent.h>
#include <fcntl.h>
#include <string.h>
#include "crm/crm.h"
#include "crm/common/mainloop.h"
#include "crm/services.h"
#include "services_private.h"
#if SUPPORT_CIBSECRETS
# include "crm/common/cib_secrets.h"
#endif
static inline void
set_fd_opts(int fd, int opts)
{
int flag;
if ((flag = fcntl(fd, F_GETFL)) >= 0) {
if (fcntl(fd, F_SETFL, flag | opts) < 0) {
crm_err("fcntl() write failed");
}
} else {
crm_err("fcntl() read failed");
}
}
static gboolean
read_output(int fd, svc_action_t * op)
{
char *data = NULL;
int rc = 0, len = 0;
gboolean is_err = FALSE;
char buf[500];
static const size_t buf_read_len = sizeof(buf) - 1;
crm_trace("%p", op);
if (fd < 0) {
return FALSE;
}
if (fd == op->opaque->stderr_fd) {
is_err = TRUE;
if (op->stderr_data) {
len = strlen(op->stderr_data);
data = op->stderr_data;
}
} else if (op->stdout_data) {
len = strlen(op->stdout_data);
data = op->stdout_data;
}
do {
rc = read(fd, buf, buf_read_len);
if (rc > 0) {
buf[rc] = 0;
data = realloc(data, len + rc + 1);
sprintf(data + len, "%s", buf);
len += rc;
} else if (errno != EINTR) {
/* error or EOF
* Cleanup happens in pipe_done()
*/
rc = FALSE;
break;
}
} while (rc == buf_read_len || rc < 0);
if (data != NULL && is_err) {
op->stderr_data = data;
} else if (data != NULL) {
op->stdout_data = data;
}
return rc;
}
static int
dispatch_stdout(gpointer userdata)
{
svc_action_t *op = (svc_action_t *) userdata;
return read_output(op->opaque->stdout_fd, op);
}
static int
dispatch_stderr(gpointer userdata)
{
svc_action_t *op = (svc_action_t *) userdata;
return read_output(op->opaque->stderr_fd, op);
}
static void
pipe_out_done(gpointer user_data)
{
svc_action_t *op = (svc_action_t *) user_data;
crm_trace("%p", op);
op->opaque->stdout_gsource = NULL;
if (op->opaque->stdout_fd > STDOUT_FILENO) {
close(op->opaque->stdout_fd);
}
op->opaque->stdout_fd = -1;
}
static void
pipe_err_done(gpointer user_data)
{
svc_action_t *op = (svc_action_t *) user_data;
op->opaque->stderr_gsource = NULL;
if (op->opaque->stderr_fd > STDERR_FILENO) {
close(op->opaque->stderr_fd);
}
op->opaque->stderr_fd = -1;
}
static struct mainloop_fd_callbacks stdout_callbacks = {
.dispatch = dispatch_stdout,
.destroy = pipe_out_done,
};
static struct mainloop_fd_callbacks stderr_callbacks = {
.dispatch = dispatch_stderr,
.destroy = pipe_err_done,
};
static void
set_ocf_env(const char *key, const char *value, gpointer user_data)
{
if (setenv(key, value, 1) != 0) {
crm_perror(LOG_ERR, "setenv failed for key:%s and value:%s", key, value);
}
}
static void
set_ocf_env_with_prefix(gpointer key, gpointer value, gpointer user_data)
{
char buffer[500];
snprintf(buffer, sizeof(buffer), "OCF_RESKEY_%s", (char *)key);
set_ocf_env(buffer, value, user_data);
}
static void
add_OCF_env_vars(svc_action_t * op)
{
if (!op->standard || strcasecmp("ocf", op->standard) != 0) {
return;
}
if (op->params) {
g_hash_table_foreach(op->params, set_ocf_env_with_prefix, NULL);
}
set_ocf_env("OCF_RA_VERSION_MAJOR", "1", NULL);
set_ocf_env("OCF_RA_VERSION_MINOR", "0", NULL);
set_ocf_env("OCF_ROOT", OCF_ROOT_DIR, NULL);
if (op->rsc) {
set_ocf_env("OCF_RESOURCE_INSTANCE", op->rsc, NULL);
}
if (op->agent != NULL) {
set_ocf_env("OCF_RESOURCE_TYPE", op->agent, NULL);
}
/* Notes: this is not added to specification yet. Sept 10,2004 */
if (op->provider != NULL) {
set_ocf_env("OCF_RESOURCE_PROVIDER", op->provider, NULL);
}
}
gboolean
recurring_action_timer(gpointer data)
{
svc_action_t *op = data;
crm_debug("Scheduling another invokation of %s", op->id);
/* Clean out the old result */
free(op->stdout_data);
op->stdout_data = NULL;
free(op->stderr_data);
op->stderr_data = NULL;
services_action_async(op, NULL);
return FALSE;
}
void
operation_finalize(svc_action_t * op)
{
int recurring = 0;
if (op->interval) {
if (op->cancel) {
op->status = PCMK_LRM_OP_CANCELLED;
cancel_recurring_action(op);
} else {
recurring = 1;
op->opaque->repeat_timer = g_timeout_add(op->interval,
recurring_action_timer, (void *)op);
}
}
if (op->opaque->callback) {
op->opaque->callback(op);
}
op->pid = 0;
if (!recurring) {
/*
* If this is a recurring action, do not free explicitly.
* It will get freed whenever the action gets cancelled.
*/
services_action_free(op);
}
}
static void
-operation_finished(mainloop_child_t * p, int status, int signo, int exitcode)
+operation_finished(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)
{
- svc_action_t *op = mainloop_get_child_userdata(p);
- pid_t pid = mainloop_get_child_pid(p);
+ svc_action_t *op = mainloop_child_userdata(p);
char *prefix = g_strdup_printf("%s:%d", op->id, op->pid);
mainloop_clear_child_userdata(p);
op->status = PCMK_LRM_OP_DONE;
CRM_ASSERT(op->pid == pid);
if (op->opaque->stderr_gsource) {
/* Make sure we have read everything from the buffer.
* Depending on the priority mainloop gives the fd, operation_finished
* could occur before all the reads are done. Force the read now.*/
dispatch_stderr(op);
}
if (op->opaque->stdout_gsource) {
/* Make sure we have read everything from the buffer.
* Depending on the priority mainloop gives the fd, operation_finished
* could occur before all the reads are done. Force the read now.*/
dispatch_stdout(op);
}
if (signo) {
- if (mainloop_get_child_timeout(p)) {
+ if (mainloop_child_timeout(p)) {
crm_warn("%s - timed out after %dms", prefix, op->timeout);
op->status = PCMK_LRM_OP_TIMEOUT;
op->rc = PCMK_OCF_TIMEOUT;
} else {
crm_warn("%s - terminated with signal %d", prefix, signo);
op->status = PCMK_LRM_OP_ERROR;
op->rc = PCMK_OCF_SIGNAL;
}
} else {
op->rc = exitcode;
crm_debug("%s - exited with rc=%d", prefix, exitcode);
}
crm_log_output(LOG_NOTICE, prefix, op->stderr_data);
crm_log_output(LOG_DEBUG, prefix, op->stdout_data);
g_free(prefix);
operation_finalize(op);
}
gboolean
services_os_action_execute(svc_action_t * op, gboolean synchronous)
{
int rc, lpc;
int stdout_fd[2];
int stderr_fd[2];
if (pipe(stdout_fd) < 0) {
crm_err("pipe() failed");
}
if (pipe(stderr_fd) < 0) {
crm_err("pipe() failed");
}
op->pid = fork();
switch (op->pid) {
case -1:
crm_err("fork() failed");
close(stdout_fd[0]);
close(stdout_fd[1]);
close(stderr_fd[0]);
close(stderr_fd[1]);
return FALSE;
case 0: /* Child */
/* Man: The call setpgrp() is equivalent to setpgid(0,0)
* _and_ compiles on BSD variants too
* need to investigate if it works the same too.
*/
setpgid(0, 0);
close(stdout_fd[0]);
close(stderr_fd[0]);
if (STDOUT_FILENO != stdout_fd[1]) {
if (dup2(stdout_fd[1], STDOUT_FILENO) != STDOUT_FILENO) {
crm_err("dup2() failed (stdout)");
}
close(stdout_fd[1]);
}
if (STDERR_FILENO != stderr_fd[1]) {
if (dup2(stderr_fd[1], STDERR_FILENO) != STDERR_FILENO) {
crm_err("dup2() failed (stderr)");
}
close(stderr_fd[1]);
}
/* close all descriptors except stdin/out/err and channels to logd */
for (lpc = getdtablesize() - 1; lpc > STDERR_FILENO; lpc--) {
close(lpc);
}
#if SUPPORT_CIBSECRETS
if (replace_secret_params(op->rsc, op->params) < 0) {
/* replacing secrets failed! */
if (safe_str_eq(op->action,"stop")) {
/* don't fail on stop! */
crm_info("proceeding with the stop operation for %s", op->rsc);
} else {
crm_err("failed to get secrets for %s, "
"considering resource not configured", op->rsc);
_exit(PCMK_OCF_NOT_CONFIGURED);
}
}
#endif
/* Setup environment correctly */
add_OCF_env_vars(op);
/* execute the RA */
execvp(op->opaque->exec, op->opaque->args);
switch (errno) { /* see execve(2) */
case ENOENT: /* No such file or directory */
case EISDIR: /* Is a directory */
rc = PCMK_OCF_NOT_INSTALLED;
#if SUPPORT_NAGIOS
if (safe_str_eq(op->standard, "nagios")) {
rc = NAGIOS_NOT_INSTALLED;
}
#endif
break;
case EACCES: /* permission denied (various errors) */
rc = PCMK_OCF_INSUFFICIENT_PRIV;
#if SUPPORT_NAGIOS
if (safe_str_eq(op->standard, "nagios")) {
rc = NAGIOS_INSUFFICIENT_PRIV;
}
#endif
break;
default:
rc = PCMK_OCF_UNKNOWN_ERROR;
break;
}
_exit(rc);
}
/* Only the parent reaches here */
close(stdout_fd[1]);
close(stderr_fd[1]);
op->opaque->stdout_fd = stdout_fd[0];
set_fd_opts(op->opaque->stdout_fd, O_NONBLOCK);
op->opaque->stderr_fd = stderr_fd[0];
set_fd_opts(op->opaque->stderr_fd, O_NONBLOCK);
if (synchronous) {
int status = 0;
int timeout = (1 + op->timeout) / 1000;
crm_trace("Waiting for %d", op->pid);
while ((op->timeout < 0 || timeout > 0) && waitpid(op->pid, &status, WNOHANG) <= 0) {
sleep(1);
read_output(op->opaque->stdout_fd, op);
read_output(op->opaque->stderr_fd, op);
timeout--;
}
crm_trace("Child done: %d", op->pid);
if (timeout == 0) {
int killrc = kill(op->pid, 9 /*SIGKILL*/);
op->rc = PCMK_OCF_UNKNOWN_ERROR;
op->status = PCMK_LRM_OP_TIMEOUT;
crm_warn("%s:%d - timed out after %dms", op->id, op->pid, op->timeout);
if (killrc && errno != ESRCH) {
crm_err("kill(%d, KILL) failed: %d", op->pid, errno);
}
} else if (WIFEXITED(status)) {
op->status = PCMK_LRM_OP_DONE;
op->rc = WEXITSTATUS(status);
crm_info("Managed %s process %d exited with rc=%d", op->id, op->pid, op->rc);
} else if (WIFSIGNALED(status)) {
int signo = WTERMSIG(status);
op->status = PCMK_LRM_OP_ERROR;
crm_err("Managed %s process %d exited with signal=%d", op->id, op->pid, signo);
}
#ifdef WCOREDUMP
if (WCOREDUMP(status)) {
crm_err("Managed %s process %d dumped core", op->id, op->pid);
}
#endif
read_output(op->opaque->stdout_fd, op);
read_output(op->opaque->stderr_fd, op);
close(op->opaque->stdout_fd);
close(op->opaque->stderr_fd);
} else {
crm_trace("Async waiting for %d - %s", op->pid, op->opaque->exec);
- mainloop_add_child(op->pid, op->timeout, op->id, op, operation_finished);
+ mainloop_child_add(op->pid, op->timeout, op->id, op, operation_finished);
op->opaque->stdout_gsource = mainloop_add_fd(op->id,
G_PRIORITY_LOW,
op->opaque->stdout_fd, op, &stdout_callbacks);
op->opaque->stderr_gsource = mainloop_add_fd(op->id,
G_PRIORITY_LOW,
op->opaque->stderr_fd, op, &stderr_callbacks);
}
return TRUE;
}
GList *
services_os_get_directory_list(const char *root, gboolean files, gboolean executable)
{
GList *list = NULL;
struct dirent **namelist;
int entries = 0, lpc = 0;
char buffer[PATH_MAX];
entries = scandir(root, &namelist, NULL, alphasort);
if (entries <= 0) {
return list;
}
for (lpc = 0; lpc < entries; lpc++) {
struct stat sb;
if ('.' == namelist[lpc]->d_name[0]) {
free(namelist[lpc]);
continue;
}
snprintf(buffer, sizeof(buffer), "%s/%s", root, namelist[lpc]->d_name);
if (stat(buffer, &sb)) {
continue;
}
if (S_ISDIR(sb.st_mode)) {
if (files) {
free(namelist[lpc]);
continue;
}
} else if (S_ISREG(sb.st_mode)) {
if (files == FALSE) {
free(namelist[lpc]);
continue;
} else if (executable
&& (sb.st_mode & S_IXUSR) == 0
&& (sb.st_mode & S_IXGRP) == 0 && (sb.st_mode & S_IXOTH) == 0) {
free(namelist[lpc]);
continue;
}
}
list = g_list_append(list, strdup(namelist[lpc]->d_name));
free(namelist[lpc]);
}
free(namelist);
return list;
}
GList *
resources_os_list_lsb_agents(void)
{
return get_directory_list(LSB_ROOT_DIR, TRUE, TRUE);
}
GList *
resources_os_list_ocf_providers(void)
{
return get_directory_list(OCF_ROOT_DIR "/resource.d", FALSE, TRUE);
}
GList *
resources_os_list_ocf_agents(const char *provider)
{
GList *gIter = NULL;
GList *result = NULL;
GList *providers = NULL;
if (provider) {
char buffer[500];
snprintf(buffer, sizeof(buffer), "%s/resource.d/%s", OCF_ROOT_DIR, provider);
return get_directory_list(buffer, TRUE, TRUE);
}
providers = resources_os_list_ocf_providers();
for (gIter = providers; gIter != NULL; gIter = gIter->next) {
GList *tmp1 = result;
GList *tmp2 = resources_os_list_ocf_agents(gIter->data);
if (tmp2) {
result = g_list_concat(tmp1, tmp2);
}
}
g_list_free_full(providers, free);
return result;
}
#if SUPPORT_NAGIOS
GList *
resources_os_list_nagios_agents(void)
{
GList *plugin_list = NULL;
GList *result = NULL;
GList *gIter = NULL;
plugin_list = get_directory_list(NAGIOS_PLUGIN_DIR, TRUE, TRUE);
/* Make sure both the plugin and its metadata exist */
for (gIter = plugin_list; gIter != NULL; gIter = gIter->next) {
const char *plugin = gIter->data;
char *metadata = g_strdup_printf(NAGIOS_METADATA_DIR "/%s.xml", plugin);
struct stat st;
if (stat(metadata, &st) == 0) {
result = g_list_append(result, strdup(plugin));
}
g_free(metadata);
}
g_list_free_full(plugin_list, free);
return result;
}
#endif
diff --git a/mcp/pacemaker.c b/mcp/pacemaker.c
index 6d6a6dcae8..2a0d7969d1 100644
--- a/mcp/pacemaker.c
+++ b/mcp/pacemaker.c
@@ -1,1023 +1,1019 @@
/*
* Copyright (C) 2010 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <pacemaker.h>
#include <pwd.h>
#include <grp.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <crm/msg_xml.h>
#include <crm/common/ipcs.h>
#include <crm/common/mainloop.h>
#include <crm/cluster.h>
#include <dirent.h>
#include <ctype.h>
gboolean fatal_error = FALSE;
GMainLoop *mainloop = NULL;
GHashTable *peers = NULL;
#define PCMK_PROCESS_CHECK_INTERVAL 5
char *local_name = NULL;
uint32_t local_nodeid = 0;
crm_trigger_t *shutdown_trigger = NULL;
const char *pid_file = "/var/run/pacemaker.pid";
/* *INDENT-OFF* */
enum crm_proc_flag {
crm_proc_none = 0x00000001,
crm_proc_plugin = 0x00000002,
crm_proc_lrmd = 0x00000010,
crm_proc_cib = 0x00000100,
crm_proc_crmd = 0x00000200,
crm_proc_attrd = 0x00001000,
crm_proc_stonithd = 0x00002000,
crm_proc_pe = 0x00010000,
crm_proc_te = 0x00020000,
crm_proc_mgmtd = 0x00040000,
crm_proc_stonith_ng = 0x00100000,
};
/* *INDENT-ON* */
typedef struct pcmk_child_s {
int pid;
long flag;
int start_seq;
int respawn_count;
gboolean respawn;
const char *name;
const char *uid;
const char *command;
gboolean active_before_startup;
} pcmk_child_t;
/* Index into the array below */
#define pcmk_child_crmd 4
#define pcmk_child_mgmtd 8
/* *INDENT-OFF* */
static pcmk_child_t pcmk_children[] = {
{ 0, crm_proc_none, 0, 0, FALSE, "none", NULL, NULL },
{ 0, crm_proc_plugin, 0, 0, FALSE, "ais", NULL, NULL },
{ 0, crm_proc_lrmd, 3, 0, TRUE, "lrmd", NULL, CRM_DAEMON_DIR"/lrmd" },
{ 0, crm_proc_cib, 1, 0, TRUE, "cib", CRM_DAEMON_USER, CRM_DAEMON_DIR"/cib" },
{ 0, crm_proc_crmd, 6, 0, TRUE, "crmd", CRM_DAEMON_USER, CRM_DAEMON_DIR"/crmd" },
{ 0, crm_proc_attrd, 4, 0, TRUE, "attrd", CRM_DAEMON_USER, CRM_DAEMON_DIR"/attrd" },
{ 0, crm_proc_stonithd, 0, 0, TRUE, "stonithd", NULL, NULL },
{ 0, crm_proc_pe, 5, 0, TRUE, "pengine", CRM_DAEMON_USER, CRM_DAEMON_DIR"/pengine" },
{ 0, crm_proc_mgmtd, 0, 0, TRUE, "mgmtd", NULL, HB_DAEMON_DIR"/mgmtd" },
{ 0, crm_proc_stonith_ng, 2, 0, TRUE, "stonith-ng", NULL, CRM_DAEMON_DIR"/stonithd" },
};
/* *INDENT-ON* */
static gboolean start_child(pcmk_child_t * child);
static gboolean check_active_before_startup_processes(gpointer user_data);
void
enable_crmd_as_root(gboolean enable)
{
if (enable) {
pcmk_children[pcmk_child_crmd].uid = NULL;
} else {
pcmk_children[pcmk_child_crmd].uid = CRM_DAEMON_USER;
}
}
void
enable_mgmtd(gboolean enable)
{
if (enable) {
pcmk_children[pcmk_child_mgmtd].start_seq = 7;
} else {
pcmk_children[pcmk_child_mgmtd].start_seq = 0;
}
}
static uint32_t
get_process_list(void)
{
int lpc = 0;
uint32_t procs = crm_proc_plugin;
for (lpc = 0; lpc < SIZEOF(pcmk_children); lpc++) {
if (pcmk_children[lpc].pid != 0) {
procs |= pcmk_children[lpc].flag;
}
}
return procs;
}
static void
pcmk_process_exit(pcmk_child_t * child)
{
child->pid = 0;
child->active_before_startup = FALSE;
/* Broadcast the fact that one of our processes died ASAP
*
* Try to get some logging of the cause out first though
* because we're probably about to get fenced
*
* Potentially do this only if respawn_count > N
* to allow for local recovery
*/
update_node_processes(local_nodeid, NULL, get_process_list());
child->respawn_count += 1;
if (child->respawn_count > MAX_RESPAWN) {
crm_err("Child respawn count exceeded by %s", child->name);
child->respawn = FALSE;
}
if (shutdown_trigger) {
mainloop_set_trigger(shutdown_trigger);
update_node_processes(local_nodeid, NULL, get_process_list());
} else if (child->respawn) {
crm_notice("Respawning failed child process: %s", child->name);
start_child(child);
}
}
static void
-pcmk_child_exit(GPid pid, gint status, gpointer user_data)
+pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)
{
- int exitcode = 0;
- pcmk_child_t *child = user_data;
-
- if (WIFSIGNALED(status)) {
- int signo = WTERMSIG(status);
- int core = WCOREDUMP(status);
+ pcmk_child_t *child = mainloop_child_userdata(p);
+ const char *name = mainloop_child_name(p);
+ if (signo) {
crm_notice("Child process %s terminated with signal %d (pid=%d, core=%d)",
- child->name, signo, child->pid, core);
+ name, signo, pid, core);
- } else if (WIFEXITED(status)) {
- exitcode = WEXITSTATUS(status);
+ } else {
do_crm_log(exitcode == 0 ? LOG_INFO : LOG_ERR,
- "Child process %s exited (pid=%d, rc=%d)", child->name, child->pid, exitcode);
+ "Child process %s exited (pid=%d, rc=%d)", name, pid, exitcode);
}
if (exitcode == 100) {
crm_warn("Pacemaker child process %s no longer wishes to be respawned. "
- "Shutting ourselves down.", child->name);
+ "Shutting ourselves down.", name);
child->respawn = FALSE;
fatal_error = TRUE;
pcmk_shutdown(15);
}
pcmk_process_exit(child);
}
static gboolean
stop_child(pcmk_child_t * child, int signal)
{
if (signal == 0) {
signal = SIGTERM;
}
if (child->command == NULL) {
crm_debug("Nothing to do for child \"%s\"", child->name);
return TRUE;
}
if (child->pid <= 0) {
crm_trace("Client %s not running", child->name);
return TRUE;
}
errno = 0;
if (kill(child->pid, signal) == 0) {
crm_notice("Stopping %s: Sent -%d to process %d", child->name, signal, child->pid);
} else {
crm_perror(LOG_ERR, "Stopping %s: Could not send -%d to process %d failed",
child->name, signal, child->pid);
}
return TRUE;
}
static char *opts_default[] = { NULL, NULL };
static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL };
static gboolean
start_child(pcmk_child_t * child)
{
int lpc = 0;
uid_t uid = 0;
gid_t gid = 0;
struct rlimit oflimits;
gboolean use_valgrind = FALSE;
gboolean use_callgrind = FALSE;
const char *devnull = "/dev/null";
const char *env_valgrind = getenv("PCMK_valgrind_enabled");
const char *env_callgrind = getenv("PCMK_callgrind_enabled");
enum cluster_type_e stack = get_cluster_type();
child->active_before_startup = FALSE;
if (child->command == NULL) {
crm_info("Nothing to do for child \"%s\"", child->name);
return TRUE;
}
if (env_callgrind != NULL && crm_is_true(env_callgrind)) {
use_callgrind = TRUE;
use_valgrind = TRUE;
} else if (env_callgrind != NULL && strstr(env_callgrind, child->name)) {
use_callgrind = TRUE;
use_valgrind = TRUE;
} else if (env_valgrind != NULL && crm_is_true(env_valgrind)) {
use_valgrind = TRUE;
} else if (env_valgrind != NULL && strstr(env_valgrind, child->name)) {
use_valgrind = TRUE;
}
if (use_valgrind && strlen(VALGRIND_BIN) == 0) {
crm_warn("Cannot enable valgrind for %s:"
" The location of the valgrind binary is unknown", child->name);
use_valgrind = FALSE;
}
if (child->uid) {
if (crm_user_lookup(child->uid, &uid, &gid) < 0) {
crm_err("Invalid user (%s) for %s: not found", child->uid, child->name);
return FALSE;
}
crm_info("Using uid=%u and group=%u for process %s", uid, gid, child->name);
}
child->pid = fork();
CRM_ASSERT(child->pid != -1);
if (child->pid > 0) {
/* parent */
- g_child_watch_add(child->pid, pcmk_child_exit, child);
+ mainloop_child_add(child->pid, 0, child->name, child, pcmk_child_exit);
crm_info("Forked child %d for process %s%s", child->pid, child->name,
use_valgrind ? " (valgrind enabled: " VALGRIND_BIN ")" : "");
update_node_processes(local_nodeid, NULL, get_process_list());
return TRUE;
} else {
/* Start a new session */
(void)setsid();
/* Setup the two alternate arg arrarys */
opts_vgrind[0] = strdup(VALGRIND_BIN);
if (use_callgrind) {
opts_vgrind[1] = strdup("--tool=callgrind");
opts_vgrind[2] = strdup("--callgrind-out-file=" CRM_STATE_DIR "/callgrind.out.%p");
opts_vgrind[3] = strdup(child->command);
opts_vgrind[4] = NULL;
} else {
opts_vgrind[1] = strdup(child->command);
opts_vgrind[2] = NULL;
opts_vgrind[3] = NULL;
opts_vgrind[4] = NULL;
}
opts_default[0] = strdup(child->command);;
if(gid) {
if(stack == pcmk_cluster_corosync) {
/* Drop root privileges completely
*
* We can do this because we set uidgid.gid.${gid}=1
* via CMAP which allows these processes to connect to
* corosync
*/
if (setgid(gid) < 0) {
crm_perror(LOG_ERR, "Could not set group to %d", gid);
}
/* Keep the root group (so we can access corosync), but add the haclient group (so we can access ipc) */
} else if (initgroups(child->uid, gid) < 0) {
crm_err("Cannot initalize groups for %s: %s (%d)", child->uid, pcmk_strerror(errno), errno);
}
}
if (uid && setuid(uid) < 0) {
crm_perror(LOG_ERR, "Could not set user to %d (%s)", uid, child->uid);
}
/* Close all open file descriptors */
getrlimit(RLIMIT_NOFILE, &oflimits);
for (lpc = 0; lpc < oflimits.rlim_cur; lpc++) {
close(lpc);
}
(void)open(devnull, O_RDONLY); /* Stdin: fd 0 */
(void)open(devnull, O_WRONLY); /* Stdout: fd 1 */
(void)open(devnull, O_WRONLY); /* Stderr: fd 2 */
if (use_valgrind) {
(void)execvp(VALGRIND_BIN, opts_vgrind);
} else {
(void)execvp(child->command, opts_default);
}
crm_perror(LOG_ERR, "FATAL: Cannot exec %s", child->command);
crm_exit(100);
}
return TRUE; /* never reached */
}
static gboolean
escalate_shutdown(gpointer data)
{
pcmk_child_t *child = data;
if (child->pid) {
/* Use SIGSEGV instead of SIGKILL to create a core so we can see what it was up to */
crm_err("Child %s not terminating in a timely manner, forcing", child->name);
stop_child(child, SIGSEGV);
}
return FALSE;
}
static gboolean
pcmk_shutdown_worker(gpointer user_data)
{
static int phase = 0;
static time_t next_log = 0;
static int max = SIZEOF(pcmk_children);
int lpc = 0;
if (phase == 0) {
crm_notice("Shuting down Pacemaker");
phase = max;
/* Add a second, more frequent, check to speed up shutdown */
g_timeout_add_seconds(5, check_active_before_startup_processes, NULL);
}
for (; phase > 0; phase--) {
/* dont stop anything with start_seq < 1 */
for (lpc = max - 1; lpc >= 0; lpc--) {
pcmk_child_t *child = &(pcmk_children[lpc]);
if (phase != child->start_seq) {
continue;
}
if (child->pid) {
time_t now = time(NULL);
if (child->respawn) {
next_log = now + 30;
child->respawn = FALSE;
stop_child(child, SIGTERM);
if (phase < pcmk_children[pcmk_child_crmd].start_seq) {
g_timeout_add(180000 /* 3m */ , escalate_shutdown, child);
}
} else if (now >= next_log) {
next_log = now + 30;
crm_notice("Still waiting for %s (pid=%d, seq=%d) to terminate...",
child->name, child->pid, child->start_seq);
}
return TRUE;
}
/* cleanup */
crm_debug("%s confirmed stopped", child->name);
child->pid = 0;
}
}
/* send_cluster_id(); */
crm_notice("Shutdown complete");
g_main_loop_quit(mainloop);
if (fatal_error) {
crm_notice("Attempting to inhibit respawning after fatal error");
crm_exit(100);
}
return TRUE;
}
void
pcmk_shutdown(int nsig)
{
if (shutdown_trigger == NULL) {
shutdown_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, pcmk_shutdown_worker, NULL);
}
mainloop_set_trigger(shutdown_trigger);
}
static void
build_path(const char *path_c, mode_t mode)
{
int offset = 1, len = 0;
char *path = strdup(path_c);
CRM_CHECK(path != NULL, return);
for (len = strlen(path); offset < len; offset++) {
if (path[offset] == '/') {
path[offset] = 0;
if (mkdir(path, mode) < 0 && errno != EEXIST) {
crm_perror(LOG_ERR, "Could not create directory '%s'", path);
break;
}
path[offset] = '/';
}
}
if (mkdir(path, mode) < 0 && errno != EEXIST) {
crm_perror(LOG_ERR, "Could not create directory '%s'", path);
}
free(path);
}
static int32_t
pcmk_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
crm_trace("Connection %p", c);
if (crm_client_new(c, uid, gid) == NULL) {
return -EIO;
}
return 0;
}
static void
pcmk_ipc_created(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p", c);
}
/* Exit code means? */
static int32_t
pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
const char *task = NULL;
crm_client_t *c = crm_client_get(qbc);
xmlNode *msg = crm_ipcs_recv(c, data, size, &id, &flags);
if (flags & crm_ipc_client_response) {
crm_ipcs_send_ack(c, id, "ack", __FUNCTION__, __LINE__);
}
if (msg == NULL) {
return 0;
}
task = crm_element_value(msg, F_CRM_TASK);
if (crm_str_eq(task, CRM_OP_QUIT, TRUE)) {
/* Time to quit */
crm_notice("Shutting down in responce to ticket %s (%s)",
crm_element_value(msg, F_CRM_REFERENCE), crm_element_value(msg, F_CRM_ORIGIN));
pcmk_shutdown(15);
} else {
/* Just send to everyone */
update_process_clients();
}
free_xml(msg);
return 0;
}
/* Error code means? */
static int32_t
pcmk_ipc_closed(qb_ipcs_connection_t * c)
{
crm_client_t *client = crm_client_get(c);
crm_trace("Connection %p", c);
crm_client_destroy(client);
return 0;
}
static void
pcmk_ipc_destroy(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p", c);
}
struct qb_ipcs_service_handlers ipc_callbacks = {
.connection_accept = pcmk_ipc_accept,
.connection_created = pcmk_ipc_created,
.msg_process = pcmk_ipc_dispatch,
.connection_closed = pcmk_ipc_closed,
.connection_destroyed = pcmk_ipc_destroy
};
static void
ghash_send_proc_details(gpointer key, gpointer value, gpointer data)
{
crm_ipcs_send(value, 0, data, TRUE);
}
static void
peer_loop_fn(gpointer key, gpointer value, gpointer user_data)
{
pcmk_peer_t *node = value;
xmlNode *update = user_data;
xmlNode *xml = create_xml_node(update, "node");
crm_xml_add_int(xml, "id", node->id);
crm_xml_add(xml, "uname", node->uname);
crm_xml_add_int(xml, "processes", node->processes);
}
void
update_process_clients(void)
{
xmlNode *update = create_xml_node(NULL, "nodes");
crm_trace("Sending process list to %d children", crm_hash_table_size(client_connections));
g_hash_table_foreach(peers, peer_loop_fn, update);
g_hash_table_foreach(client_connections, ghash_send_proc_details, update);
free_xml(update);
}
void
update_process_peers(void)
{
char buffer[1024];
struct iovec iov;
int rc = 0;
memset(buffer, 0, SIZEOF(buffer));
if (local_name) {
rc = snprintf(buffer, SIZEOF(buffer) - 1, "<node uname=\"%s\" proclist=\"%u\"/>",
local_name, get_process_list());
} else {
rc = snprintf(buffer, SIZEOF(buffer) - 1, "<node proclist=\"%u\"/>", get_process_list());
}
iov.iov_base = buffer;
iov.iov_len = rc + 1;
crm_trace("Sending %s", buffer);
send_cpg_message(&iov);
}
gboolean
update_node_processes(uint32_t id, const char *uname, uint32_t procs)
{
gboolean changed = FALSE;
pcmk_peer_t *node = g_hash_table_lookup(peers, GUINT_TO_POINTER(id));
if (node == NULL) {
changed = TRUE;
node = calloc(1, sizeof(pcmk_peer_t));
node->id = id;
g_hash_table_insert(peers, GUINT_TO_POINTER(id), node);
node = g_hash_table_lookup(peers, GUINT_TO_POINTER(id));
CRM_ASSERT(node != NULL);
}
if (uname != NULL) {
if (node->uname == NULL || safe_str_eq(node->uname, uname) == FALSE) {
int lpc, len = strlen(uname);
crm_notice("%p Node %u now known as %s%s%s", node, id, uname,
node->uname ? node->uname : ", was: ", node->uname ? node->uname : "");
free(node->uname);
node->uname = strdup(uname);
changed = TRUE;
for (lpc = 0; lpc < len; lpc++) {
if (uname[lpc] >= 'A' && uname[lpc] <= 'Z') {
crm_warn
("Node names with capitals are discouraged, consider changing '%s' to something else",
uname);
break;
}
}
}
} else {
crm_trace("Empty uname for node %u", id);
}
if (procs != 0) {
if (procs != node->processes) {
crm_debug("Node %s now has process list: %.32x (was %.32x)",
node->uname, procs, node->processes);
node->processes = procs;
changed = TRUE;
} else {
crm_trace("Node %s still has process list: %.32x", node->uname, procs);
}
}
if (changed && id == local_nodeid) {
update_process_clients();
update_process_peers();
}
return changed;
}
/* *INDENT-OFF* */
static struct crm_option long_options[] = {
/* Top-level Options */
{"help", 0, 0, '?', "\tThis text"},
{"version", 0, 0, '$', "\tVersion information" },
{"verbose", 0, 0, 'V', "\tIncrease debug output"},
{"shutdown", 0, 0, 'S', "\tInstruct Pacemaker to shutdown on this machine"},
{"features", 0, 0, 'F', "\tDisplay the full version and list of features Pacemaker was built with"},
{"-spacer-", 1, 0, '-', "\nAdditional Options:"},
{"foreground", 0, 0, 'f', "\tRun in the foreground instead of as a daemon"},
{"pid-file", 1, 0, 'p', "\t(Advanced) Daemon pid file location"},
{NULL, 0, 0, 0}
};
/* *INDENT-ON* */
static void
mcp_chown(const char *path, uid_t uid, gid_t gid)
{
int rc = chown(path, uid, gid);
if (rc < 0) {
crm_warn("Cannot change the ownership of %s to user %s and gid %d: %s",
path, CRM_DAEMON_USER, gid, pcmk_strerror(errno));
}
}
static gboolean
check_active_before_startup_processes(gpointer user_data)
{
int start_seq = 1, lpc = 0;
static int max = SIZEOF(pcmk_children);
gboolean keep_tracking = FALSE;
for (start_seq = 1; start_seq < max; start_seq++) {
for (lpc = 0; lpc < max; lpc++) {
if (pcmk_children[lpc].active_before_startup == FALSE) {
/* we are already tracking it as a child process. */
continue;
} else if (start_seq != pcmk_children[lpc].start_seq) {
continue;
} else if (crm_pid_active(pcmk_children[lpc].pid) != 1) {
crm_notice("Process %s terminated (pid=%d)",
pcmk_children[lpc].name, pcmk_children[lpc].pid);
pcmk_process_exit(&(pcmk_children[lpc]));
continue;
}
/* at least one of the processes found at startup
* is still going, so keep this recurring timer around */
keep_tracking = TRUE;
}
}
return keep_tracking;
}
static void
find_and_track_existing_processes(void)
{
DIR *dp;
struct dirent *entry;
struct stat statbuf;
int start_tracker = 0;
dp = opendir("/proc");
if (!dp) {
/* no proc directory to search through */
crm_notice("Can not read /proc directory to track existing components");
return;
}
while ((entry = readdir(dp)) != NULL) {
char procpath[128];
char value[64];
char key[16];
FILE *file;
int pid;
int max = SIZEOF(pcmk_children);
int i;
strcpy(procpath, "/proc/");
/* strlen("/proc/") + strlen("/status") + 1 = 14
* 128 - 14 = 114 */
strncat(procpath, entry->d_name, 114);
if (lstat(procpath, &statbuf)) {
continue;
}
if (!S_ISDIR(statbuf.st_mode) || !isdigit(entry->d_name[0])) {
continue;
}
strcat(procpath, "/status");
file = fopen(procpath, "r");
if (!file) {
continue;
}
if (fscanf(file, "%15s%63s", key, value) != 2) {
fclose(file);
continue;
}
fclose(file);
pid = atoi(entry->d_name);
if (pid <= 0) {
continue;
}
for (i = 0; i < max; i++) {
const char *name = pcmk_children[i].name;
if (pcmk_children[i].start_seq == 0) {
continue;
}
if (pcmk_children[i].flag == crm_proc_stonith_ng) {
name = "stonithd";
}
if (safe_str_eq(name, value)) {
if (crm_pid_active(pid) != 1) {
continue;
}
crm_notice("Tracking existing %s process (pid=%d)", value, pid);
pcmk_children[i].pid = pid;
pcmk_children[i].active_before_startup = TRUE;
start_tracker = 1;
}
}
}
if (start_tracker) {
g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL, check_active_before_startup_processes,
NULL);
}
closedir(dp);
}
static void
init_children_processes(void)
{
int start_seq = 1, lpc = 0;
static int max = SIZEOF(pcmk_children);
/* start any children that have not been detected */
for (start_seq = 1; start_seq < max; start_seq++) {
/* dont start anything with start_seq < 1 */
for (lpc = 0; lpc < max; lpc++) {
if (pcmk_children[lpc].pid) {
/* we are already tracking it */
continue;
}
if (start_seq == pcmk_children[lpc].start_seq) {
start_child(&(pcmk_children[lpc]));
}
}
}
}
int
main(int argc, char **argv)
{
int rc;
int flag;
int argerr = 0;
int option_index = 0;
gboolean shutdown = FALSE;
uid_t pcmk_uid = 0;
gid_t pcmk_gid = 0;
struct rlimit cores;
crm_ipc_t *old_instance = NULL;
qb_ipcs_service_t *ipcs = NULL;
const char *facility = daemon_option("logfacility");
setenv("LC_ALL", "C", 1);
setenv("HA_LOGD", "no", 1);
set_daemon_option("mcp", "true");
set_daemon_option("use_logd", "off");
crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
crm_set_options(NULL, "mode [options]", long_options, "Start/Stop Pacemaker\n");
/* Restore the original facility so that read_config() does the right thing */
set_daemon_option("logfacility", facility);
while (1) {
flag = crm_get_option(argc, argv, &option_index);
if (flag == -1)
break;
switch (flag) {
case 'V':
crm_bump_log_level(argc, argv);
break;
case 'f':
/* Legacy */
break;
case 'p':
pid_file = optarg;
break;
case '$':
case '?':
crm_help(flag, EX_OK);
break;
case 'S':
shutdown = TRUE;
break;
case 'F':
printf("Pacemaker %s (Build: %s)\n Supporting v%s: %s\n", VERSION, BUILD_VERSION,
CRM_FEATURE_SET, CRM_FEATURES);
crm_exit(0);
default:
printf("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag);
++argerr;
break;
}
}
if (optind < argc) {
printf("non-option ARGV-elements: ");
while (optind < argc)
printf("%s ", argv[optind++]);
printf("\n");
}
if (argerr) {
crm_help('?', EX_USAGE);
}
crm_debug("Checking for old instances of %s", CRM_SYSTEM_MCP);
old_instance = crm_ipc_new(CRM_SYSTEM_MCP, 0);
crm_ipc_connect(old_instance);
if (shutdown) {
crm_debug("Terminating previous instance");
while (crm_ipc_connected(old_instance)) {
xmlNode *cmd =
create_request(CRM_OP_QUIT, NULL, NULL, CRM_SYSTEM_MCP, CRM_SYSTEM_MCP, NULL);
crm_debug(".");
crm_ipc_send(old_instance, cmd, 0, 0, NULL);
free_xml(cmd);
sleep(2);
}
crm_ipc_close(old_instance);
crm_ipc_destroy(old_instance);
crm_exit(0);
} else if (crm_ipc_connected(old_instance)) {
crm_ipc_close(old_instance);
crm_ipc_destroy(old_instance);
crm_err("Pacemaker is already active, aborting startup");
crm_exit(100);
}
crm_ipc_close(old_instance);
crm_ipc_destroy(old_instance);
if (read_config() == FALSE) {
crm_notice("Could not obtain corosync config data, exiting");
crm_exit(1);
}
crm_notice("Starting Pacemaker %s (Build: %s): %s", VERSION, BUILD_VERSION, CRM_FEATURES);
mainloop = g_main_new(FALSE);
rc = getrlimit(RLIMIT_CORE, &cores);
if (rc < 0) {
crm_perror(LOG_ERR, "Cannot determine current maximum core size.");
} else {
if (cores.rlim_max == 0 && geteuid() == 0) {
cores.rlim_max = RLIM_INFINITY;
} else {
crm_info("Maximum core file size is: %lu", (unsigned long)cores.rlim_max);
}
cores.rlim_cur = cores.rlim_max;
rc = setrlimit(RLIMIT_CORE, &cores);
if (rc < 0) {
crm_perror(LOG_ERR,
"Core file generation will remain disabled."
" Core files are an important diagnositic tool,"
" please consider enabling them by default.");
}
#if 0
/* system() is not thread-safe, can't call from here
* Actually, its a pretty hacky way to try and achieve this anyway
*/
if (system("echo 1 > /proc/sys/kernel/core_uses_pid") != 0) {
crm_perror(LOG_ERR, "Could not enable /proc/sys/kernel/core_uses_pid");
}
#endif
}
if (crm_user_lookup(CRM_DAEMON_USER, &pcmk_uid, &pcmk_gid) < 0) {
crm_err("Cluster user %s does not exist, aborting Pacemaker startup", CRM_DAEMON_USER);
crm_exit(1);
}
mkdir(CRM_STATE_DIR, 0750);
mcp_chown(CRM_STATE_DIR, pcmk_uid, pcmk_gid);
/* Used by stonithd */
build_path(HA_STATE_DIR "/heartbeat", 0755);
mcp_chown(HA_STATE_DIR "/heartbeat", pcmk_uid, pcmk_gid);
/* Used by RAs - Leave owned by root */
build_path(CRM_RSCTMP_DIR, 0755);
/* Used to store core files in */
build_path(CRM_CORE_DIR, 0755);
mcp_chown(CRM_CORE_DIR, pcmk_uid, pcmk_gid);
/* Used to store blackbox dumps in */
build_path(CRM_BLACKBOX_DIR, 0755);
mcp_chown(CRM_BLACKBOX_DIR, pcmk_uid, pcmk_gid);
/* Used to store policy engine inputs in */
build_path(PE_STATE_DIR, 0755);
mcp_chown(PE_STATE_DIR, pcmk_uid, pcmk_gid);
/* Used to store the cluster configuration */
build_path(CRM_CONFIG_DIR, 0755);
mcp_chown(CRM_CONFIG_DIR, pcmk_uid, pcmk_gid);
peers = g_hash_table_new(g_direct_hash, g_direct_equal);
ipcs = mainloop_add_ipc_server(CRM_SYSTEM_MCP, QB_IPC_NATIVE, &ipc_callbacks);
if (ipcs == NULL) {
crm_err("Couldn't start IPC server");
crm_exit(1);
}
if (cluster_connect_cfg(&local_nodeid) == FALSE) {
crm_err("Couldn't connect to Corosync's CFG service");
crm_exit(1);
}
if (cluster_connect_cpg() == FALSE) {
crm_err("Couldn't connect to Corosync's CPG service");
crm_exit(1);
}
local_name = get_local_node_name();
update_node_processes(local_nodeid, local_name, get_process_list());
mainloop_add_signal(SIGTERM, pcmk_shutdown);
mainloop_add_signal(SIGINT, pcmk_shutdown);
find_and_track_existing_processes();
init_children_processes();
crm_info("Starting mainloop");
g_main_run(mainloop);
if (ipcs) {
crm_trace("Closing IPC server");
mainloop_del_ipc_server(ipcs);
ipcs = NULL;
}
g_main_destroy(mainloop);
cluster_disconnect_cpg();
cluster_disconnect_cfg();
crm_info("Exiting %s", crm_system_name);
crm_exit(0);
}

File Metadata

Mime Type
text/x-diff
Expires
Sat, Jan 25, 11:42 AM (1 d, 19 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1322415
Default Alt Text
(181 KB)

Event Timeline