Page MenuHomeClusterLabs Projects

No OneTemporary

This file is larger than 256 KB, so syntax highlighting was skipped.
diff --git a/.travis.yml b/.travis.yml
index 27b6cd3907..8eb10e87aa 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,73 +1,77 @@
# Control file for the Travis autobuilder
# https://docs.travis-ci.com/user/customizing-the-build/
language: c
compiler:
- gcc
- clang
# - cov-build
env:
global:
# -- BEGIN Coverity Scan ENV
# Used by https://scan.coverity.com/scripts/travisci_build_coverity_scan.sh
# The build command with all of the arguments that you would apply to a manual `cov-build`
- COVERITY_SCAN_BUILD_COMMAND="make"
# Email address for notifications related to this build
- OWNER_EMAIL="andrew@beekhof.net"
# Regular expression selects on which branches to run analysis
# Be aware of quotas. Do not run on every branch/commit
- COVERITY_SCAN_BRANCH_PATTERN="1.1"
# COVERITY_SCAN_TOKEN via "travis encrypt" using the repo's public key
- secure: "qnrF7L8RejLUY7URdNe7XP4Hu4R55C0tvAuMRg4EjVtelOpw+nIgA7BLiX19q/70VjFuKcGnMhW28TdYl0uwMMdWKKxmwTim04Sy3UfOE2BPeuQOBphr+8s9gd0U1MO8j2dZ84A40t5Mkk946wWZwT0okpjOr/PfBOZkU3o87FM="
# -- END Coverity Scan ENV
# sudo add-apt-repository ppa:hotot-team
before_install:
- sudo add-apt-repository "deb http://archive.ubuntu.com/ubuntu/ trusty main"
- sudo apt-get update -qq
install:
- sudo apt-get install -qq automake autoconf chrpath libglib2.0-dev perl net-tools python libtool libxml2-dev bison flex uuid-dev libbz2-dev zlib1g-dev libltdl3-dev libgnutls-dev python-dev libpam0g-dev libncurses5-dev libcorosync-dev libxslt1-dev libdbus-1-dev
- sudo apt-get install -qq cluster-glue-dev heartbeat-dev libheartbeat2-dev
- sudo apt-get install -qq libqb-dev
before_script:
# Save and restore CC so that ./configure can pass
- export CC_SAVED=$CC
- export CC=`echo ${CC} | sed s/cov-build/gcc/`
+ # some tests (e.g. cts-exec-helper) require actual system-wide credentials
+ - sed -e 's|^\(CRM_DAEMON_USER=\).*$|\1nobody|'
+ -e 's|^\(CRM_DAEMON_GROUP=\).*$|\1nobody|'
+ -i -- configure.ac
- ./autogen.sh
- ./configure
- export CC=$CC_SAVED
script:
- if test ${CC} != cov-build; then sudo make install-exec-local || true; fi
- if test ${CC} != cov-build; then make && ./BasicSanity.sh -V; fi
- if test ${CC} = cov-build; then export CC=gcc; bash ./travisci_build_coverity_scan.sh; fi
#after_script:
#after_success:
after_failure:
- lsb_release -a
- sudo cat /etc/apt/sources.list
- whoami
- env | sort
- cat include/config.h
notifications:
irc: "irc.freenode.org#pcmk"
# email:
# recipients:
# - developers@clusterlabs.org
# whitelist
branches:
only:
- master
- "1.1"
diff --git a/attrd/main.c b/attrd/main.c
index 4cc15cc534..e0a1e7c6ca 100644
--- a/attrd/main.c
+++ b/attrd/main.c
@@ -1,409 +1,426 @@
/*
- * Copyright 2013-2019 Andrew Beekhof <andrew@beekhof.net>
+ * Copyright 2013-2019 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <crm/crm.h>
#include <crm/cib/internal.h>
#include <crm/msg_xml.h>
#include <crm/pengine/rules.h>
#include <crm/common/iso8601.h>
#include <crm/common/ipc.h>
#include <crm/common/ipcs.h>
#include <crm/common/xml.h>
#include <crm/cluster/internal.h>
#include <crm/attrd.h>
#include <internal.h>
lrmd_t *the_lrmd = NULL;
crm_cluster_t *attrd_cluster = NULL;
crm_trigger_t *attrd_config_read = NULL;
static int attrd_exit_status = pcmk_ok;
static void
attrd_cpg_dispatch(cpg_handle_t handle,
const struct cpg_name *groupName,
uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
{
uint32_t kind = 0;
xmlNode *xml = NULL;
const char *from = NULL;
char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from);
if(data == NULL) {
return;
}
if (kind == crm_class_cluster) {
xml = string2xml(data);
}
if (xml == NULL) {
crm_err("Bad message of class %d received from %s[%u]: '%.120s'", kind, from, nodeid, data);
} else {
crm_node_t *peer = crm_get_peer(nodeid, from);
attrd_peer_message(peer, xml);
}
free_xml(xml);
free(data);
}
static void
attrd_cpg_destroy(gpointer unused)
{
if (attrd_shutting_down()) {
crm_info("Corosync disconnection complete");
} else {
crm_crit("Lost connection to Corosync service!");
attrd_exit_status = ECONNRESET;
attrd_shutdown(0);
}
}
static void
attrd_cib_replaced_cb(const char *event, xmlNode * msg)
{
crm_notice("Updating all attributes after %s event", event);
if (attrd_election_won()) {
write_attributes(TRUE, FALSE);
}
// Check for changes in alerts
mainloop_set_trigger(attrd_config_read);
}
static void
attrd_cib_destroy_cb(gpointer user_data)
{
cib_t *conn = user_data;
conn->cmds->signoff(conn); /* Ensure IPC is cleaned up */
if (attrd_shutting_down()) {
crm_info("Connection disconnection complete");
} else {
/* eventually this should trigger a reconnect, not a shutdown */
crm_err("Lost connection to CIB service!");
attrd_exit_status = ECONNRESET;
attrd_shutdown(0);
}
return;
}
static void
attrd_erase_cb(xmlNode *msg, int call_id, int rc, xmlNode *output,
void *user_data)
{
do_crm_log_unlikely((rc? LOG_NOTICE : LOG_DEBUG),
"Cleared transient attributes: %s "
CRM_XS " xpath=%s rc=%d",
pcmk_strerror(rc), (char *) user_data, rc);
}
#define XPATH_TRANSIENT "//node_state[@uname='%s']/" XML_TAG_TRANSIENT_NODEATTRS
/*!
* \internal
* \brief Wipe all transient attributes for this node from the CIB
*
* Clear any previous transient node attributes from the CIB. This is
* normally done by the DC's crmd when this node leaves the cluster, but
* this handles the case where the node restarted so quickly that the
* cluster layer didn't notice.
*
* \todo If attrd respawns after crashing (see PCMK_respawned), ideally we'd
* skip this and sync our attributes from the writer. However, currently
* we reject any values for us that the writer has, in
* attrd_peer_update().
*/
static void
attrd_erase_attrs()
{
int call_id;
char *xpath = crm_strdup_printf(XPATH_TRANSIENT, attrd_cluster->uname);
crm_info("Clearing transient attributes from CIB " CRM_XS " xpath=%s",
xpath);
call_id = the_cib->cmds->delete(the_cib, xpath, NULL,
cib_quorum_override | cib_xpath);
the_cib->cmds->register_callback_full(the_cib, call_id, 120, FALSE, xpath,
"attrd_erase_cb", attrd_erase_cb,
free);
}
static int
attrd_cib_connect(int max_retry)
{
static int attempts = 0;
int rc = -ENOTCONN;
the_cib = cib_new();
if (the_cib == NULL) {
return DAEMON_RESPAWN_STOP;
}
do {
if(attempts > 0) {
sleep(attempts);
}
attempts++;
crm_debug("CIB signon attempt %d", attempts);
rc = the_cib->cmds->signon(the_cib, T_ATTRD, cib_command);
} while(rc != pcmk_ok && attempts < max_retry);
if (rc != pcmk_ok) {
crm_err("Signon to CIB failed: %s (%d)", pcmk_strerror(rc), rc);
goto cleanup;
}
crm_debug("Connected to the CIB after %d attempts", attempts);
rc = the_cib->cmds->set_connection_dnotify(the_cib, attrd_cib_destroy_cb);
if (rc != pcmk_ok) {
crm_err("Could not set disconnection callback");
goto cleanup;
}
rc = the_cib->cmds->add_notify_callback(the_cib, T_CIB_REPLACE_NOTIFY, attrd_cib_replaced_cb);
if(rc != pcmk_ok) {
crm_err("Could not set CIB notification callback");
goto cleanup;
}
rc = the_cib->cmds->add_notify_callback(the_cib, T_CIB_DIFF_NOTIFY, attrd_cib_updated_cb);
if (rc != pcmk_ok) {
crm_err("Could not set CIB notification callback (update)");
goto cleanup;
}
// We have no attribute values in memory, wipe the CIB to match
attrd_erase_attrs();
// Set a trigger for reading the CIB (for the alerts section)
attrd_config_read = mainloop_add_trigger(G_PRIORITY_HIGH, attrd_read_options, NULL);
// Always read the CIB at start-up
mainloop_set_trigger(attrd_config_read);
/* Set a private attribute for ourselves with the protocol version we
* support. This lets all nodes determine the minimum supported version
* across all nodes. It also ensures that the writer learns our node name,
* so it can send our attributes to the CIB.
*/
attrd_broadcast_protocol();
return pcmk_ok;
cleanup:
the_cib->cmds->signoff(the_cib);
cib_delete(the_cib);
the_cib = NULL;
return DAEMON_RESPAWN_STOP;
}
static int32_t
attrd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
crm_client_t *client = crm_client_get(c);
xmlNode *xml = crm_ipcs_recv(client, data, size, &id, &flags);
const char *op;
if (xml == NULL) {
crm_debug("No msg from %d (%p)", crm_ipcs_client_pid(c), c);
return 0;
}
#if ENABLE_ACL
CRM_ASSERT(client->user != NULL);
crm_acl_get_set_user(xml, F_ATTRD_USER, client->user);
#endif
crm_trace("Processing msg from %d (%p)", crm_ipcs_client_pid(c), c);
crm_log_xml_trace(xml, __FUNCTION__);
op = crm_element_value(xml, F_ATTRD_TASK);
if (client->name == NULL) {
const char *value = crm_element_value(xml, F_ORIG);
client->name = crm_strdup_printf("%s.%d", value?value:"unknown", client->pid);
}
if (safe_str_eq(op, ATTRD_OP_PEER_REMOVE)) {
attrd_send_ack(client, id, flags);
attrd_client_peer_remove(client->name, xml);
} else if (safe_str_eq(op, ATTRD_OP_CLEAR_FAILURE)) {
attrd_send_ack(client, id, flags);
attrd_client_clear_failure(xml);
} else if (safe_str_eq(op, ATTRD_OP_UPDATE)) {
attrd_send_ack(client, id, flags);
attrd_client_update(xml);
} else if (safe_str_eq(op, ATTRD_OP_UPDATE_BOTH)) {
attrd_send_ack(client, id, flags);
attrd_client_update(xml);
} else if (safe_str_eq(op, ATTRD_OP_UPDATE_DELAY)) {
attrd_send_ack(client, id, flags);
attrd_client_update(xml);
} else if (safe_str_eq(op, ATTRD_OP_REFRESH)) {
attrd_send_ack(client, id, flags);
attrd_client_refresh();
} else if (safe_str_eq(op, ATTRD_OP_QUERY)) {
/* queries will get reply, so no ack is necessary */
attrd_client_query(client, id, flags, xml);
} else {
crm_info("Ignoring request from client %s with unknown operation %s",
client->name, op);
}
free_xml(xml);
return 0;
}
static int
attrd_cluster_connect()
{
attrd_cluster = calloc(1, sizeof(crm_cluster_t));
attrd_cluster->destroy = attrd_cpg_destroy;
attrd_cluster->cpg.cpg_deliver_fn = attrd_cpg_dispatch;
attrd_cluster->cpg.cpg_confchg_fn = pcmk_cpg_membership;
crm_set_status_callback(&attrd_peer_change_cb);
if (crm_cluster_connect(attrd_cluster) == FALSE) {
crm_err("Cluster connection failed");
return DAEMON_RESPAWN_STOP;
}
return pcmk_ok;
}
/* *INDENT-OFF* */
static struct crm_option long_options[] = {
/* Top-level Options */
{"help", 0, 0, '?', "\tThis text"},
{"verbose", 0, 0, 'V', "\tIncrease debug output"},
{0, 0, 0, 0}
};
/* *INDENT-ON* */
int
main(int argc, char **argv)
{
int flag = 0;
int index = 0;
int argerr = 0;
qb_ipcs_service_t *ipcs = NULL;
+ crm_ipc_t *old_instance = NULL;
attrd_init_mainloop();
crm_log_preinit(NULL, argc, argv);
crm_set_options(NULL, "[options]", long_options,
"Daemon for aggregating and atomically storing node attribute updates into the CIB");
mainloop_add_signal(SIGTERM, attrd_shutdown);
while (1) {
flag = crm_get_option(argc, argv, &index);
if (flag == -1)
break;
switch (flag) {
case 'V':
crm_bump_log_level(argc, argv);
break;
case 'h': /* Help message */
crm_help(flag, EX_OK);
break;
default:
++argerr;
break;
}
}
if (optind > argc) {
++argerr;
}
if (argerr) {
crm_help('?', EX_USAGE);
}
crm_log_init(T_ATTRD, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
crm_info("Starting up");
+
+ old_instance = crm_ipc_new(T_ATTRD, 0);
+ if (crm_ipc_connect(old_instance)) {
+ /* IPC end-point already up */
+ crm_ipc_close(old_instance);
+ crm_ipc_destroy(old_instance);
+ crm_err("attrd is already active, aborting startup");
+ crm_exit(EX_OK);
+ } else {
+ /* not up or not authentic, we'll proceed either way */
+ crm_ipc_destroy(old_instance);
+ old_instance = NULL;
+ }
+
attributes = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_attribute);
attrd_exit_status = attrd_cluster_connect();
if (attrd_exit_status != pcmk_ok) {
goto done;
}
crm_info("Cluster connection active");
attrd_election_init();
attrd_exit_status = attrd_cib_connect(10);
if (attrd_exit_status != pcmk_ok) {
goto done;
}
crm_info("CIB connection active");
attrd_init_ipc(&ipcs, attrd_ipc_dispatch);
crm_info("Accepting attribute updates");
attrd_run_mainloop();
done:
crm_info("Shutting down attribute manager");
attrd_election_fini();
if (ipcs) {
crm_client_disconnect_all(ipcs);
qb_ipcs_destroy(ipcs);
g_hash_table_destroy(attributes);
}
attrd_lrmd_disconnect();
attrd_cib_disconnect();
return crm_exit(attrd_exit_status);
}
diff --git a/cib/main.c b/cib/main.c
index 5473d40958..7c745dafc5 100644
--- a/cib/main.c
+++ b/cib/main.c
@@ -1,585 +1,590 @@
/*
- * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
+ * Copyright 2004-2019 the Pacemaker project contributors
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
+ * The version control history for this file may have further details.
*
- * This software is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <sys/utsname.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <crm/crm.h>
#include <crm/cib/internal.h>
#include <crm/msg_xml.h>
#include <crm/cluster/internal.h>
#include <crm/common/xml.h>
#include <crm/common/mainloop.h>
#include <cibio.h>
#include <callbacks.h>
#include <pwd.h>
#include <grp.h>
#include "common.h"
#if HAVE_LIBXML2
# include <libxml/parser.h>
#endif
#ifdef HAVE_GETOPT_H
# include <getopt.h>
#endif
#if HAVE_BZLIB_H
# include <bzlib.h>
#endif
extern int init_remote_listener(int port, gboolean encrypted);
gboolean cib_shutdown_flag = FALSE;
int cib_status = pcmk_ok;
crm_cluster_t crm_cluster;
#if SUPPORT_HEARTBEAT
oc_ev_t *cib_ev_token;
ll_cluster_t *hb_conn = NULL;
extern void oc_ev_special(const oc_ev_t *, oc_ev_class_t, int);
gboolean cib_register_ha(ll_cluster_t * hb_cluster, const char *client_name);
#else
void *hb_conn = NULL;
#endif
GMainLoop *mainloop = NULL;
const char *cib_root = NULL;
char *cib_our_uname = NULL;
gboolean preserve_status = FALSE;
/* volatile because it may be changed in a signal handler */
volatile gboolean cib_writes_enabled = TRUE;
int remote_fd = 0;
int remote_tls_fd = 0;
int cib_init(void);
void cib_shutdown(int nsig);
gboolean startCib(const char *filename);
extern int write_cib_contents(gpointer p);
GHashTable *config_hash = NULL;
GHashTable *local_notify_queue = NULL;
char *channel1 = NULL;
char *channel2 = NULL;
char *channel3 = NULL;
char *channel4 = NULL;
char *channel5 = NULL;
#define OPTARGS "maswr:V?"
void cib_cleanup(void);
static void
cib_enable_writes(int nsig)
{
crm_info("(Re)enabling disk writes");
cib_writes_enabled = TRUE;
}
static void
log_cib_client(gpointer key, gpointer value, gpointer user_data)
{
crm_info("Client %s", crm_client_name(value));
}
/* *INDENT-OFF* */
static struct crm_option long_options[] = {
/* Top-level Options */
{"help", 0, 0, '?', "\tThis text"},
{"verbose", 0, 0, 'V', "\tIncrease debug output"},
{"per-action-cib", 0, 0, 'a', "\tAdvanced use only"},
{"stand-alone", 0, 0, 's', "\tAdvanced use only"},
{"disk-writes", 0, 0, 'w', "\tAdvanced use only"},
{"cib-root", 1, 0, 'r', "\tAdvanced use only"},
{0, 0, 0, 0}
};
/* *INDENT-ON* */
int
main(int argc, char **argv)
{
int flag;
int rc = 0;
int index = 0;
int argerr = 0;
struct passwd *pwentry = NULL;
+ crm_ipc_t *old_instance = NULL;
crm_log_preinit(NULL, argc, argv);
crm_set_options(NULL, "[options]",
long_options, "Daemon for storing and replicating the cluster configuration");
- crm_peer_init();
-
mainloop_add_signal(SIGTERM, cib_shutdown);
mainloop_add_signal(SIGPIPE, cib_enable_writes);
cib_writer = mainloop_add_trigger(G_PRIORITY_LOW, write_cib_contents, NULL);
while (1) {
flag = crm_get_option(argc, argv, &index);
if (flag == -1)
break;
switch (flag) {
case 'V':
crm_bump_log_level(argc, argv);
break;
case 's':
stand_alone = TRUE;
preserve_status = TRUE;
cib_writes_enabled = FALSE;
pwentry = getpwnam(CRM_DAEMON_USER);
CRM_CHECK(pwentry != NULL,
crm_perror(LOG_ERR, "Invalid uid (%s) specified", CRM_DAEMON_USER);
return 100);
rc = setgid(pwentry->pw_gid);
if (rc < 0) {
crm_perror(LOG_ERR, "Could not set group to %d", pwentry->pw_gid);
return 100;
}
rc = initgroups(CRM_DAEMON_GROUP, pwentry->pw_gid);
if (rc < 0) {
crm_perror(LOG_ERR, "Could not setup groups for user %d", pwentry->pw_uid);
return 100;
}
rc = setuid(pwentry->pw_uid);
if (rc < 0) {
crm_perror(LOG_ERR, "Could not set user to %d", pwentry->pw_uid);
return 100;
}
break;
case '?': /* Help message */
crm_help(flag, EX_OK);
break;
case 'w':
cib_writes_enabled = TRUE;
break;
case 'r':
cib_root = optarg;
break;
case 'm':
cib_metadata();
return 0;
default:
++argerr;
break;
}
}
if (argc - optind == 1 && safe_str_eq("metadata", argv[optind])) {
cib_metadata();
return 0;
}
if (optind > argc) {
++argerr;
}
if (argerr) {
crm_help('?', EX_USAGE);
}
crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
+ old_instance = crm_ipc_new(cib_channel_ro, 0);
+ if (crm_ipc_connect(old_instance)) {
+ /* IPC end-point already up */
+ crm_ipc_close(old_instance);
+ crm_ipc_destroy(old_instance);
+ crm_err("cib is already active, aborting startup");
+ crm_exit(EX_OK);
+ } else {
+ /* not up or not authentic, we'll proceed either way */
+ crm_ipc_destroy(old_instance);
+ old_instance = NULL;
+ }
+
if (cib_root == NULL) {
if ((g_file_test(CRM_CONFIG_DIR "/cib.xml", G_FILE_TEST_EXISTS) == FALSE)
&& (g_file_test(CRM_LEGACY_CONFIG_DIR "/cib.xml", G_FILE_TEST_EXISTS) == TRUE)) {
crm_notice("Using legacy config location: " CRM_LEGACY_CONFIG_DIR);
cib_root = CRM_LEGACY_CONFIG_DIR;
} else {
cib_root = CRM_CONFIG_DIR;
}
} else {
crm_notice("Using custom config location: %s", cib_root);
}
if (pcmk__daemon_can_write(cib_root, NULL) == FALSE) {
crm_err("Terminating due to bad permissions on %s", cib_root);
fprintf(stderr, "ERROR: Bad permissions on %s (see logs for details)\n",
cib_root);
fflush(stderr);
return 100;
}
+ crm_peer_init();
+
/* read local config file */
rc = cib_init();
CRM_CHECK(crm_hash_table_size(client_connections) == 0,
crm_warn("Not all clients gone at exit"));
g_hash_table_foreach(client_connections, log_cib_client, NULL);
cib_cleanup();
#if SUPPORT_HEARTBEAT
if (hb_conn) {
hb_conn->llc_ops->delete(hb_conn);
}
#endif
crm_info("Done");
return rc;
}
void
cib_cleanup(void)
{
crm_peer_destroy();
if (local_notify_queue) {
g_hash_table_destroy(local_notify_queue);
}
crm_client_cleanup();
g_hash_table_destroy(config_hash);
free(cib_our_uname);
free(channel1);
free(channel2);
free(channel3);
free(channel4);
free(channel5);
}
unsigned long cib_num_ops = 0;
const char *cib_stat_interval = "10min";
unsigned long cib_num_local = 0, cib_num_updates = 0, cib_num_fail = 0;
unsigned long cib_bad_connects = 0, cib_num_timeouts = 0;
#if SUPPORT_HEARTBEAT
gboolean ccm_connect(void);
static void
ccm_connection_destroy(gpointer user_data)
{
crm_err("CCM connection failed... blocking while we reconnect");
CRM_ASSERT(ccm_connect());
return;
}
static void *ccm_library = NULL;
gboolean
ccm_connect(void)
{
gboolean did_fail = TRUE;
int num_ccm_fails = 0;
int max_ccm_fails = 30;
int ret;
int cib_ev_fd;
int (*ccm_api_register) (oc_ev_t ** token) =
find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_register", 1);
int (*ccm_api_set_callback) (const oc_ev_t * token,
oc_ev_class_t class,
oc_ev_callback_t * fn,
oc_ev_callback_t ** prev_fn) =
find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_set_callback", 1);
void (*ccm_api_special) (const oc_ev_t *, oc_ev_class_t, int) =
find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_special", 1);
int (*ccm_api_activate) (const oc_ev_t * token, int *fd) =
find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_activate", 1);
int (*ccm_api_unregister) (oc_ev_t * token) =
find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_unregister", 1);
static struct mainloop_fd_callbacks ccm_fd_callbacks = {
.dispatch = cib_ccm_dispatch,
.destroy = ccm_connection_destroy,
};
while (did_fail) {
did_fail = FALSE;
crm_info("Registering with CCM...");
ret = (*ccm_api_register) (&cib_ev_token);
if (ret != 0) {
did_fail = TRUE;
}
if (did_fail == FALSE) {
crm_trace("Setting up CCM callbacks");
ret = (*ccm_api_set_callback) (cib_ev_token, OC_EV_MEMB_CLASS,
cib_ccm_msg_callback, NULL);
if (ret != 0) {
crm_warn("CCM callback not set");
did_fail = TRUE;
}
}
if (did_fail == FALSE) {
(*ccm_api_special) (cib_ev_token, OC_EV_MEMB_CLASS, 0);
crm_trace("Activating CCM token");
ret = (*ccm_api_activate) (cib_ev_token, &cib_ev_fd);
if (ret != 0) {
crm_warn("CCM Activation failed");
did_fail = TRUE;
}
}
if (did_fail) {
num_ccm_fails++;
(*ccm_api_unregister) (cib_ev_token);
if (num_ccm_fails < max_ccm_fails) {
crm_warn("CCM Connection failed %d times (%d max)", num_ccm_fails, max_ccm_fails);
sleep(3);
} else {
crm_err("CCM Activation failed %d (max) times", num_ccm_fails);
return FALSE;
}
}
}
crm_debug("CCM Activation passed... all set to go!");
mainloop_add_fd("heartbeat-ccm", G_PRIORITY_MEDIUM, cib_ev_fd, cib_ev_token, &ccm_fd_callbacks);
return TRUE;
}
#endif
#if SUPPORT_COROSYNC
static void
cib_cs_dispatch(cpg_handle_t handle,
const struct cpg_name *groupName,
uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
{
uint32_t kind = 0;
xmlNode *xml = NULL;
const char *from = NULL;
char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from);
if(data == NULL) {
return;
}
if (kind == crm_class_cluster) {
xml = string2xml(data);
if (xml == NULL) {
crm_err("Invalid XML: '%.120s'", data);
free(data);
return;
}
crm_xml_add(xml, F_ORIG, from);
/* crm_xml_add_int(xml, F_SEQ, wrapper->id); */
cib_peer_callback(xml, NULL);
}
free_xml(xml);
free(data);
}
static void
cib_cs_destroy(gpointer user_data)
{
if (cib_shutdown_flag) {
crm_info("Corosync disconnection complete");
} else {
crm_err("Corosync connection lost! Exiting.");
terminate_cib(__FUNCTION__, -1);
}
}
#endif
static void
cib_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data)
{
switch (type) {
case crm_status_processes:
#if !SUPPORT_PLUGIN
if (cib_legacy_mode()
&& is_not_set(node->processes, crm_get_cluster_proc())) {
uint32_t old = data? *(const uint32_t *)data : 0;
if ((node->processes ^ old) & crm_proc_cpg) {
crm_info("Attempting to disable legacy mode after %s left the cluster",
node->uname);
legacy_mode = FALSE;
}
}
#endif
break;
case crm_status_uname:
case crm_status_rstate:
case crm_status_nstate:
if (cib_shutdown_flag && (crm_active_peers() < 2)
&& crm_hash_table_size(client_connections) == 0) {
crm_info("No more peers");
terminate_cib(__FUNCTION__, 1);
}
break;
}
}
#if SUPPORT_HEARTBEAT
static void
cib_ha_connection_destroy(gpointer user_data)
{
if (cib_shutdown_flag) {
crm_info("Heartbeat disconnection complete... exiting");
terminate_cib(__FUNCTION__, 0);
} else {
crm_err("Heartbeat connection lost! Exiting.");
terminate_cib(__FUNCTION__, -1);
}
}
#endif
int
cib_init(void)
{
if (is_openais_cluster()) {
#if SUPPORT_COROSYNC
crm_cluster.destroy = cib_cs_destroy;
crm_cluster.cpg.cpg_deliver_fn = cib_cs_dispatch;
crm_cluster.cpg.cpg_confchg_fn = pcmk_cpg_membership;
#endif
} else if (is_heartbeat_cluster()) {
#if SUPPORT_HEARTBEAT
crm_cluster.hb_dispatch = cib_ha_peer_callback;
crm_cluster.destroy = cib_ha_connection_destroy;
#endif
}
config_hash = crm_str_table_new();
if (startCib("cib.xml") == FALSE) {
crm_crit("Cannot start CIB... terminating");
crm_exit(ENODATA);
}
if (stand_alone == FALSE) {
if (is_openais_cluster()) {
crm_set_status_callback(&cib_peer_update_callback);
}
if (crm_cluster_connect(&crm_cluster) == FALSE) {
crm_crit("Cannot sign in to the cluster... terminating");
crm_exit(DAEMON_RESPAWN_STOP);
}
cib_our_uname = crm_cluster.uname;
#if SUPPORT_HEARTBEAT
if (is_heartbeat_cluster()) {
gboolean was_error = FALSE;
hb_conn = crm_cluster.hb_conn;
if (was_error == FALSE) {
if (HA_OK !=
hb_conn->llc_ops->set_cstatus_callback(hb_conn, cib_client_status_callback,
hb_conn)) {
crm_err("Cannot set cstatus callback: %s", hb_conn->llc_ops->errmsg(hb_conn));
was_error = TRUE;
}
}
if (was_error == FALSE) {
was_error = (ccm_connect() == FALSE);
}
if (was_error == FALSE) {
/* Async get client status information in the cluster */
crm_info("Requesting the list of configured nodes");
hb_conn->llc_ops->client_status(hb_conn, NULL, CRM_SYSTEM_CIB, -1);
}
}
#endif
} else {
cib_our_uname = strdup("localhost");
}
cib_ipc_servers_init(&ipcs_ro,
&ipcs_rw,
&ipcs_shm,
&ipc_ro_callbacks,
&ipc_rw_callbacks);
if (stand_alone) {
cib_is_master = TRUE;
}
/* Create the mainloop and run it... */
mainloop = g_main_new(FALSE);
crm_info("Starting %s mainloop", crm_system_name);
g_main_run(mainloop);
/* If main loop returned, clean up and exit. We disconnect in case
* terminate_cib() was called with fast=1.
*/
crm_cluster_disconnect(&crm_cluster);
cib_ipc_servers_destroy(ipcs_ro, ipcs_rw, ipcs_shm);
return crm_exit(pcmk_ok);
}
gboolean
startCib(const char *filename)
{
gboolean active = FALSE;
xmlNode *cib = readCibXmlFile(cib_root, filename, !preserve_status);
CRM_ASSERT(cib != NULL);
if (activateCibXml(cib, TRUE, "start") == 0) {
int port = 0;
const char *port_s = NULL;
active = TRUE;
cib_read_config(config_hash, cib);
port_s = crm_element_value(cib, "remote-tls-port");
if (port_s) {
port = crm_parse_int(port_s, "0");
remote_tls_fd = init_remote_listener(port, TRUE);
}
port_s = crm_element_value(cib, "remote-clear-port");
if (port_s) {
port = crm_parse_int(port_s, "0");
remote_fd = init_remote_listener(port, FALSE);
}
crm_info("CIB Initialization completed successfully");
}
return active;
}
diff --git a/configure.ac b/configure.ac
index ed51f67c87..ce02777665 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,2181 +1,2224 @@
dnl
dnl autoconf for Pacemaker
dnl
dnl License: GNU General Public License (GPL)
dnl ===============================================
dnl Bootstrap
dnl ===============================================
AC_PREREQ(2.59)
dnl Suggested structure:
dnl information on the package
dnl checks for programs
dnl checks for libraries
dnl checks for header files
dnl checks for types
dnl checks for structures
dnl checks for compiler characteristics
dnl checks for library functions
dnl checks for system services
m4_include([version.m4])
AC_INIT([pacemaker], VERSION_NUMBER, [users@clusterlabs.org],
[pacemaker], PCMK_URL)
dnl Workaround autoconf < 2.64
if test x"${PACKAGE_URL}" = x""; then
AC_SUBST([PACKAGE_URL], PCMK_URL)
fi
PCMK_FEATURES=""
HB_PKG=heartbeat
AC_CONFIG_AUX_DIR(.)
AC_CANONICAL_HOST
dnl Where #defines go (e.g. `AC_CHECK_HEADERS' below)
dnl
dnl Internal header: include/config.h
dnl - Contains ALL defines
dnl - include/config.h.in is generated automatically by autoheader
dnl - NOT to be included in any header files except lha_internal.h
dnl (which is also not to be included in any other header files)
dnl
dnl External header: include/crm_config.h
dnl - Contains a subset of defines checked here
dnl - Manually edit include/crm_config.h.in to have configure include
dnl new defines
dnl - Should not include HAVE_* defines
dnl - Safe to include anywhere
AM_CONFIG_HEADER(include/config.h include/crm_config.h)
ALL_LINGUAS="en fr"
AC_ARG_WITH(version,
[ --with-version=version Override package version (if you're a packager needing to pretend) ],
[ PACKAGE_VERSION="$withval" ])
AC_ARG_WITH(pkg-name,
[ --with-pkg-name=name Override package name (if you're a packager needing to pretend) ],
[ PACKAGE_NAME="$withval" ])
dnl Older distros may need: AM_INIT_AUTOMAKE($PACKAGE_NAME, $PACKAGE_VERSION)
AM_INIT_AUTOMAKE([foreign])
AC_DEFINE_UNQUOTED(PACEMAKER_VERSION, "$PACKAGE_VERSION", Current pacemaker version)
dnl Versioned attributes implementation is not yet production-ready
AC_DEFINE_UNQUOTED(ENABLE_VERSIONED_ATTRS, 0, Enable versioned attributes)
PACKAGE_SERIES=`echo $PACKAGE_VERSION | awk -F. '{ print $1"."$2 }'`
AC_SUBST(PACKAGE_SERIES)
AC_SUBST(PACKAGE_VERSION)
dnl automake >= 1.11 offers --enable-silent-rules for suppressing the output from
dnl normal compilation. When a failure occurs, it will then display the full
dnl command line
dnl Wrap in m4_ifdef to avoid breaking on older platforms
m4_ifdef([AM_SILENT_RULES],[AM_SILENT_RULES([yes])])
dnl Example 2.4. Silent Custom Rule to Generate a File
dnl %-bar.pc: %.pc
dnl $(AM_V_GEN)$(LN_S) $(notdir $^) $@
CC_IN_CONFIGURE=yes
export CC_IN_CONFIGURE
LDD=ldd
BUILD_ATOMIC_ATTRD=1
dnl ========================================================================
dnl Compiler characteristics
dnl ========================================================================
AC_PROG_CC dnl Can force other with environment variable "CC".
AM_PROG_CC_C_O
AC_PROG_CC_STDC
gl_EARLY
gl_INIT
LT_INIT([dlopen])
LTDL_INIT([convenience])
AC_PROG_YACC
AM_PROG_LEX
AC_C_STRINGIZE
AC_TYPE_SIZE_T
AC_CHECK_SIZEOF(char)
AC_CHECK_SIZEOF(short)
AC_CHECK_SIZEOF(int)
AC_CHECK_SIZEOF(long)
AC_CHECK_SIZEOF(long long)
AC_STRUCT_TIMEZONE
dnl ===============================================
dnl Helpers
dnl ===============================================
cc_supports_flag() {
local CFLAGS="-Werror $@"
AC_MSG_CHECKING(whether $CC supports "$@")
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ ]], [[ ]])], [RC=0; AC_MSG_RESULT(yes)],[RC=1; AC_MSG_RESULT(no)])
return $RC
}
try_extract_header_define() {
AC_MSG_CHECKING(if $2 in $1 exists)
Cfile=$srcdir/extract_define.$2.${$}
printf "#include <stdio.h>\n" > ${Cfile}.c
printf "#include <%s>\n" $1 >> ${Cfile}.c
printf "int main(int argc, char **argv) {\n" >> ${Cfile}.c
printf "#ifdef %s\n" $2 >> ${Cfile}.c
printf "printf(\"%%s\", %s);\n" $2 >> ${Cfile}.c
printf "#endif \n return 0; }\n" >> ${Cfile}.c
$CC $CFLAGS ${Cfile}.c -o ${Cfile} 2>/dev/null
value=
if test -x ${Cfile}; then
value=`${Cfile} 2>/dev/null`
fi
if test x"${value}" == x""; then
value=$3
AC_MSG_RESULT(default: $value)
else
AC_MSG_RESULT($value)
fi
printf $value
rm -rf ${Cfile}.c ${Cfile} ${Cfile}.dSYM ${Cfile}.gcno
}
extract_header_define() {
AC_MSG_CHECKING(for $2 in $1)
Cfile=$srcdir/extract_define.$2.${$}
printf "#include <stdio.h>\n" > ${Cfile}.c
printf "#include <%s>\n" $1 >> ${Cfile}.c
printf "int main(int argc, char **argv) { printf(\"%%s\", %s); return 0; }\n" $2 >> ${Cfile}.c
$CC $CFLAGS ${Cfile}.c -o ${Cfile}
value=`${Cfile}`
AC_MSG_RESULT($value)
printf $value
rm -rf ${Cfile}.c ${Cfile} ${Cfile}.dSYM ${Cfile}.gcno
}
dnl ===============================================
dnl Configure Options
dnl ===============================================
dnl Some systems, like Solaris require a custom package name
AC_ARG_WITH(pkgname,
[ --with-pkgname=name name for pkg (typically for Solaris) ],
[ PKGNAME="$withval" ],
[ PKGNAME="LXHAhb" ],
)
AC_SUBST(PKGNAME)
AC_ARG_ENABLE([ansi],
[ --enable-ansi force GCC to compile to ANSI/ANSI standard for older compilers.
[default=no]])
AC_ARG_ENABLE([fatal-warnings],
[ --enable-fatal-warnings very pedantic and fatal warnings for gcc
[default=yes]])
AC_ARG_ENABLE([quiet],
[ --enable-quiet
Supress make output unless there is an error
[default=no]])
AC_ARG_ENABLE([no-stack],
[ --enable-no-stack
Only build the Policy Engine and pieces needed to support it [default=no]])
AC_ARG_ENABLE([upstart],
[ --enable-upstart
Enable support for managing resources via Upstart [default=try]],
[],
[enable_upstart=try],
)
AC_ARG_ENABLE([systemd],
[ --enable-systemd
Enable support for managing resources via systemd [default=try]],
[],
[enable_systemd=try],
)
AC_ARG_ENABLE(hardening,
[ --with-hardening
Harden the resulting executables/libraries (best effort by default)],
[ HARDENING="${enableval}" ],
[ HARDENING=try ],
)
AC_ARG_WITH(ais,
[ --with-ais
Support the Corosync messaging and membership layer ],
[ SUPPORT_CS=$withval ],
[ SUPPORT_CS=try ],
)
AC_ARG_WITH(corosync,
[ --with-corosync
Support the Corosync messaging and membership layer ],
[ SUPPORT_CS=$withval ]
dnl initialized in AC_ARG_WITH(ais...) already,
dnl don't reset to try if it was given as --without-ais
)
AC_ARG_WITH(heartbeat,
[ --with-heartbeat
Support the Heartbeat messaging and membership layer ],
[ SUPPORT_HEARTBEAT=$withval ],
[ SUPPORT_HEARTBEAT=try ],
)
AC_ARG_WITH(cman,
[ --with-cman
Support the consumption of membership and quorum from cman ],
[ SUPPORT_CMAN=$withval ],
[ SUPPORT_CMAN=try ],
)
AC_ARG_WITH(cpg,
[ --with-cs-quorum
Support the consumption of membership and quorum from corosync ],
[ SUPPORT_CS_QUORUM=$withval ],
[ SUPPORT_CS_QUORUM=try ],
)
AC_ARG_WITH(nagios,
[ --with-nagios
Support nagios remote monitoring ],
[ SUPPORT_NAGIOS=$withval ],
[ SUPPORT_NAGIOS=try ],
)
AC_ARG_WITH(nagios-plugin-dir,
[ --with-nagios-plugin-dir=DIR
Directory for nagios plugins [${NAGIOS_PLUGIN_DIR}]],
[ NAGIOS_PLUGIN_DIR="$withval" ]
)
AC_ARG_WITH(nagios-metadata-dir,
[ --with-nagios-metadata-dir=DIR
Directory for nagios plugins metadata [${NAGIOS_METADATA_DIR}]],
[ NAGIOS_METADATA_DIR="$withval" ]
)
AC_ARG_WITH(snmp,
[ --with-snmp
Support the SNMP protocol ],
[ SUPPORT_SNMP=$withval ],
[ SUPPORT_SNMP=try ],
)
AC_ARG_WITH(esmtp,
[ --with-esmtp
Support the sending mail notifications with the esmtp library ],
[ SUPPORT_ESMTP=$withval ],
[ SUPPORT_ESMTP=try ],
)
AC_ARG_WITH(acl,
[ --with-acl
Support CIB ACL ],
[ SUPPORT_ACL=$withval ],
[ SUPPORT_ACL=yes ],
)
AC_ARG_WITH(cibsecrets,
[ --with-cibsecrets
Support CIB secrets ],
[ SUPPORT_CIBSECRETS=$withval ],
[ SUPPORT_CIBSECRETS=no ],
)
CSPREFIX=""
AC_ARG_WITH(ais-prefix,
[ --with-ais-prefix=DIR Prefix used when Corosync was installed [$prefix]],
[ CSPREFIX=$withval ],
[ CSPREFIX=$prefix ])
INITDIR=""
AC_ARG_WITH(initdir,
[ --with-initdir=DIR directory for init (rc) scripts [${INITDIR}]],
[ INITDIR="$withval" ])
SUPPORT_PROFILING=0
AC_ARG_WITH(profiling,
[ --with-profiling
Disable optimizations for effective profiling ],
[ SUPPORT_PROFILING=$withval ])
AC_ARG_WITH(coverage,
[ --with-coverage
Disable optimizations for effective profiling ],
[ SUPPORT_COVERAGE=$withval ])
PUBLICAN_BRAND="common"
AC_ARG_WITH(brand,
[ --with-brand=brand Brand to use for generated documentation (set empty for no docs) [$PUBLICAN_BRAND]],
[ test x"$withval" = x"no" || PUBLICAN_BRAND="$withval" ])
AC_SUBST(PUBLICAN_BRAND)
BUG_URL=""
AC_ARG_WITH(bug-url,
[ --with-bug-url=DIR Address where users should submit bug reports @<:@https://bugs.clusterlabs.org/enter_bug.cgi?product=Pacemaker@:>@],
[ BUG_URL="$withval" ]
)
ASCIIDOC_CLI_TYPE="pcs"
AC_ARG_WITH(doc-cli,
[ --with-doc-cli=cli_type CLI type to use for generated documentation. [$ASCIIDOC_CLI_TYPE]],
[ ASCIIDOC_CLI_TYPE="$withval" ])
AC_SUBST(ASCIIDOC_CLI_TYPE)
CONFIGDIR=""
AC_ARG_WITH(configdir,
[ --with-configdir=DIR
Directory for Pacemaker configuration file [${CONFIGDIR}]],
[ CONFIGDIR="$withval" ]
)
dnl ===============================================
dnl General Processing
dnl ===============================================
AC_SUBST(HB_PKG)
INIT_EXT=""
echo Our Host OS: $host_os/$host
AC_MSG_NOTICE(Sanitizing prefix: ${prefix})
case $prefix in
NONE)
prefix=/usr
dnl Fix default variables - "prefix" variable if not specified
if test "$localstatedir" = "\${prefix}/var"; then
localstatedir="/var"
fi
if test "$sysconfdir" = "\${prefix}/etc"; then
sysconfdir="/etc"
fi
;;
esac
AC_MSG_NOTICE(Sanitizing exec_prefix: ${exec_prefix})
case $exec_prefix in
dnl For consistency with Heartbeat, map NONE->$prefix
NONE) exec_prefix=$prefix;;
prefix) exec_prefix=$prefix;;
esac
AC_MSG_NOTICE(Sanitizing ais_prefix: ${CSPREFIX})
case $CSPREFIX in
dnl For consistency with Heartbeat, map NONE->$prefix
NONE) CSPREFIX=$prefix;;
prefix) CSPREFIX=$prefix;;
esac
AC_MSG_NOTICE(Sanitizing INITDIR: ${INITDIR})
case $INITDIR in
prefix) INITDIR=$prefix;;
"")
AC_MSG_CHECKING(which init (rc) directory to use)
for initdir in /etc/init.d /etc/rc.d/init.d /sbin/init.d \
/usr/local/etc/rc.d /etc/rc.d
do
if
test -d $initdir
then
INITDIR=$initdir
break
fi
done
AC_MSG_RESULT($INITDIR);;
esac
AC_SUBST(INITDIR)
AC_MSG_NOTICE(Sanitizing libdir: ${libdir})
case $libdir in
dnl For consistency with Heartbeat, map NONE->$prefix
prefix|NONE)
AC_MSG_CHECKING(which lib directory to use)
for aDir in lib64 lib
do
trydir="${exec_prefix}/${aDir}"
if
test -d ${trydir}
then
libdir=${trydir}
break
fi
done
AC_MSG_RESULT($libdir);
;;
esac
dnl Expand autoconf variables so that we don't end up with '${prefix}'
dnl in #defines and python scripts
dnl NOTE: Autoconf deliberately leaves them unexpanded to allow
dnl make exec_prefix=/foo install
dnl No longer being able to do this seems like no great loss to me...
eval prefix="`eval echo ${prefix}`"
eval exec_prefix="`eval echo ${exec_prefix}`"
eval bindir="`eval echo ${bindir}`"
eval sbindir="`eval echo ${sbindir}`"
eval libexecdir="`eval echo ${libexecdir}`"
eval datadir="`eval echo ${datadir}`"
eval sysconfdir="`eval echo ${sysconfdir}`"
eval sharedstatedir="`eval echo ${sharedstatedir}`"
eval localstatedir="`eval echo ${localstatedir}`"
eval libdir="`eval echo ${libdir}`"
eval includedir="`eval echo ${includedir}`"
eval oldincludedir="`eval echo ${oldincludedir}`"
eval infodir="`eval echo ${infodir}`"
eval mandir="`eval echo ${mandir}`"
dnl Home-grown variables
eval INITDIR="${INITDIR}"
eval docdir="`eval echo ${docdir}`"
if test x"${docdir}" = x""; then
docdir=${datadir}/doc/${PACKAGE}-${VERSION}
#docdir=${datadir}/doc/packages/${PACKAGE}
fi
AC_SUBST(docdir)
if test x"${CONFIGDIR}" = x""; then
CONFIGDIR="${sysconfdir}/sysconfig"
fi
AC_SUBST(CONFIGDIR)
if test x"${BUG_URL}" = x""; then
BUG_URL="https://bugs.clusterlabs.org/enter_bug.cgi?product=Pacemaker"
fi
AC_SUBST(BUG_URL)
for j in prefix exec_prefix bindir sbindir libexecdir datadir sysconfdir \
sharedstatedir localstatedir libdir includedir oldincludedir infodir \
mandir INITDIR docdir CONFIGDIR
do
dirname=`eval echo '${'${j}'}'`
if
test ! -d "$dirname"
then
AC_MSG_WARN([$j directory ($dirname) does not exist!])
fi
done
+us_auth=
+AC_CHECK_HEADER([sys/socket.h], [
+ AC_CHECK_DECL([SO_PEERCRED], [
+ # Linux
+ AC_CHECK_TYPE([struct ucred], [
+ us_auth=peercred_ucred;
+ AC_DEFINE([US_AUTH_PEERCRED_UCRED], [1],
+ [Define if Unix socket auth method is
+ getsockopt(s, SO_PEERCRED, &ucred, ...)])
+ ], [
+ # OpenBSD
+ AC_CHECK_TYPE([struct sockpeercred], [
+ us_auth=localpeercred_sockepeercred;
+ AC_DEFINE([US_AUTH_PEERCRED_SOCKPEERCRED], [1],
+ [Define if Unix socket auth method is
+ getsockopt(s, SO_PEERCRED, &sockpeercred, ...)])
+ ], [], [[#include <sys/socket.h>]])
+ ], [[#define _GNU_SOURCE
+ #include <sys/socket.h>]])
+ ], [], [[#include <sys/socket.h>]])
+])
+
+if test -z "${us_auth}"; then
+ # FreeBSD
+ AC_CHECK_DECL([getpeereid], [
+ us_auth=getpeereid;
+ AC_DEFINE([US_AUTH_GETPEEREID], [1],
+ [Define if Unix socket auth method is
+ getpeereid(s, &uid, &gid)])
+ ], [
+ # Solaris/OpenIndiana
+ AC_CHECK_DECL([getpeerucred], [
+ us_auth=getpeerucred;
+ AC_DEFINE([US_AUTH_GETPEERUCRED], [1],
+ [Define if Unix socket auth method is
+ getpeercred(s, &ucred)])
+ ], [
+ AC_MSG_ERROR([No way to authenticate a Unix socket peer])
+ ], [[#include <ucred.h>]])
+ ])
+fi
+
dnl This OS-based decision-making is poor autotools practice;
dnl feature-based mechanisms are strongly preferred.
dnl
dnl So keep this section to a bare minimum; regard as a "necessary evil".
case "$host_os" in
*bsd*)
AC_DEFINE_UNQUOTED(ON_BSD, 1, Compiling for BSD platform)
LIBS="-L/usr/local/lib"
CPPFLAGS="$CPPFLAGS -I/usr/local/include"
INIT_EXT=".sh"
;;
*solaris*)
AC_DEFINE_UNQUOTED(ON_SOLARIS, 1, Compiling for Solaris platform)
;;
*linux*)
AC_DEFINE_UNQUOTED(ON_LINUX, 1, Compiling for Linux platform)
;;
darwin*)
AC_DEFINE_UNQUOTED(ON_DARWIN, 1, Compiling for Darwin platform)
LIBS="$LIBS -L${prefix}/lib"
CFLAGS="$CFLAGS -I${prefix}/include"
;;
esac
dnl Eventually remove this
if test "$cross_compiling" != "yes"; then
CPPFLAGS="$CPPFLAGS -I${prefix}/include/heartbeat"
fi
AC_SUBST(INIT_EXT)
AC_MSG_NOTICE(Host CPU: $host_cpu)
case "$host_cpu" in
ppc64|powerpc64)
case $CFLAGS in
*powerpc64*) ;;
*) if test "$GCC" = yes; then
CFLAGS="$CFLAGS -m64"
fi ;;
esac
esac
AC_MSG_CHECKING(which format is needed to print uint64_t)
ac_save_CFLAGS=$CFLAGS
CFLAGS="-Wall -Werror"
AC_COMPILE_IFELSE(
[AC_LANG_PROGRAM(
[
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
],
[
int max = 512;
uint64_t bignum = 42;
char *buffer = malloc(max);
const char *random = "random";
snprintf(buffer, max-1, "<quorum id=%lu quorate=%s/>", bignum, random);
fprintf(stderr, "Result: %s\n", buffer);
]
)],
[U64T="%lu"],
[U64T="%llu"]
)
CFLAGS=$ac_save_CFLAGS
AC_MSG_RESULT($U64T)
AC_DEFINE_UNQUOTED(U64T, "$U64T", Correct printf format for logging uint64_t)
dnl ===============================================
dnl Program Paths
dnl ===============================================
PATH="$PATH:/sbin:/usr/sbin:/usr/local/sbin:/usr/local/bin"
export PATH
dnl Replacing AC_PROG_LIBTOOL with AC_CHECK_PROG because LIBTOOL
dnl was NOT being expanded all the time thus causing things to fail.
AC_CHECK_PROGS(LIBTOOL, glibtool libtool libtool15 libtool13)
AM_PATH_PYTHON
AC_CHECK_PROGS(MAKE, gmake make)
AC_PATH_PROGS(HTML2TXT, lynx w3m)
AC_PATH_PROGS(HELP2MAN, help2man)
AC_PATH_PROGS(POD2MAN, pod2man, pod2man)
AC_PATH_PROGS(ASCIIDOC, asciidoc)
AC_PATH_PROGS(PUBLICAN, publican)
AC_PATH_PROGS(INKSCAPE, inkscape)
AC_PATH_PROGS(XSLTPROC, xsltproc)
AC_PATH_PROGS(XMLCATALOG, xmlcatalog)
AC_PATH_PROGS(FOP, fop)
AC_PATH_PROGS(SSH, ssh, /usr/bin/ssh)
AC_PATH_PROGS(SCP, scp, /usr/bin/scp)
AC_PATH_PROGS(TAR, tar)
AC_PATH_PROGS(MD5, md5)
AC_PATH_PROGS(TEST, test)
PKG_PROG_PKG_CONFIG
AC_PATH_PROGS(XML2CONFIG, xml2-config)
AC_PATH_PROGS(VALGRIND_BIN, valgrind, /usr/bin/valgrind)
AC_DEFINE_UNQUOTED(VALGRIND_BIN, "$VALGRIND_BIN", Valgrind command)
dnl Disable these until we decide if the stonith config file should be supported
dnl AC_PATH_PROGS(BISON, bison)
dnl AC_PATH_PROGS(FLEX, flex)
dnl AC_PATH_PROGS(HAVE_YACC, $YACC)
if test x"${LIBTOOL}" = x""; then
AC_MSG_ERROR(You need (g)libtool installed in order to build ${PACKAGE})
fi
if test x"${MAKE}" = x""; then
AC_MSG_ERROR(You need (g)make installed in order to build ${PACKAGE})
fi
AM_CONDITIONAL(BUILD_HELP, test x"${HELP2MAN}" != x"")
if test x"${HELP2MAN}" != x""; then
PCMK_FEATURES="$PCMK_FEATURES generated-manpages"
fi
MANPAGE_XSLT=""
if test x"${XSLTPROC}" != x""; then
AC_MSG_CHECKING(docbook to manpage transform)
# first try to figure out correct template using xmlcatalog query,
# resort to extensive (semi-deterministic) file search if that fails
DOCBOOK_XSL_URI='http://docbook.sourceforge.net/release/xsl/current'
DOCBOOK_XSL_PATH='manpages/docbook.xsl'
MANPAGE_XSLT=$(${XMLCATALOG} "" ${DOCBOOK_XSL_URI}/${DOCBOOK_XSL_PATH} \
| sed -n 's|^file://||p;q')
if test x"${MANPAGE_XSLT}" = x""; then
DIRS=$(find "${datadir}" -name $(basename $(dirname ${DOCBOOK_XSL_PATH})) \
-type d | LC_ALL=C sort)
XSLT=$(basename ${DOCBOOK_XSL_PATH})
for d in ${DIRS}; do
if test -f "${d}/${XSLT}"; then
MANPAGE_XSLT="${d}/${XSLT}"
break
fi
done
fi
fi
AC_MSG_RESULT($MANPAGE_XSLT)
AC_SUBST(MANPAGE_XSLT)
AM_CONDITIONAL(BUILD_XML_HELP, test x"${MANPAGE_XSLT}" != x"")
if test x"${MANPAGE_XSLT}" != x""; then
PCMK_FEATURES="$PCMK_FEATURES agent-manpages"
fi
AM_CONDITIONAL(BUILD_ASCIIDOC, test x"${ASCIIDOC}" != x"")
if test x"${ASCIIDOC}" != x""; then
PCMK_FEATURES="$PCMK_FEATURES ascii-docs"
fi
SUPPORT_STONITH_CONFIG=0
if test x"${HAVE_YACC}" != x"" -a x"${FLEX}" != x"" -a x"${BISON}" != x""; then
SUPPORT_STONITH_CONFIG=1
PCMK_FEATURES="$PCMK_FEATURES st-conf"
fi
AM_CONDITIONAL(BUILD_STONITH_CONFIG, test $SUPPORT_STONITH_CONFIG = 1)
AC_DEFINE_UNQUOTED(SUPPORT_STONITH_CONFIG, $SUPPORT_STONITH_CONFIG, Support a stand-alone stonith config file in addition to the CIB)
publican_intree_brand=no
if test x"${PUBLICAN_BRAND}" != x"" \
&& test x"${PUBLICAN}" != x"" \
&& test x"${INKSCAPE}" != x""; then
dnl special handling for clusterlabs brand (possibly in-tree version used)
test "${PUBLICAN_BRAND}" != "clusterlabs" \
|| test -d /usr/share/publican/Common_Content/clusterlabs
if test $? -ne 0; then
dnl Unknown option: brand_dir vs. Option brand_dir requires an argument
if ${PUBLICAN} build --brand_dir 2>&1 | grep -Eq 'brand_dir$'; then
AC_MSG_WARN([Cannot use in-tree clusterlabs brand, resorting to common])
PUBLICAN_BRAND=common
else
publican_intree_brand=yes
fi
fi
AC_MSG_NOTICE([Enabling Publican-generated documentation using ${PUBLICAN_BRAND} brand])
PCMK_FEATURES="$PCMK_FEATURES publican-docs"
fi
AM_CONDITIONAL([BUILD_DOCBOOK],
[test x"${PUBLICAN_BRAND}" != x"" \
&& test x"${PUBLICAN}" != x"" \
&& test x"${INKSCAPE}" != x""])
AM_CONDITIONAL([PUBLICAN_INTREE_BRAND],
[test x"${publican_intree_brand}" = x"yes"])
dnl ========================================================================
dnl checks for library functions to replace them
dnl
dnl NoSuchFunctionName:
dnl is a dummy function which no system supplies. It is here to make
dnl the system compile semi-correctly on OpenBSD which doesn't know
dnl how to create an empty archive
dnl
dnl scandir: Only on BSD.
dnl System-V systems may have it, but hidden and/or deprecated.
dnl A replacement function is supplied for it.
dnl
dnl setenv: is some bsdish function that should also be avoided (use
dnl putenv instead)
dnl On the other hand, putenv doesn't provide the right API for the
dnl code and has memory leaks designed in (sigh...) Fortunately this
dnl A replacement function is supplied for it.
dnl
dnl strerror: returns a string that corresponds to an errno.
dnl A replacement function is supplied for it.
dnl
dnl strnlen: is a gnu function similar to strlen, but safer.
dnl We wrote a tolearably-fast replacement function for it.
dnl
dnl strndup: is a gnu function similar to strdup, but safer.
dnl We wrote a tolearably-fast replacement function for it.
AC_REPLACE_FUNCS(alphasort NoSuchFunctionName scandir setenv strerror strchrnul unsetenv strnlen strndup)
dnl ===============================================
dnl Libraries
dnl ===============================================
AC_CHECK_LIB(socket, socket) dnl -lsocket
AC_CHECK_LIB(c, dlopen) dnl if dlopen is in libc...
AC_CHECK_LIB(dl, dlopen) dnl -ldl (for Linux)
AC_CHECK_LIB(rt, sched_getscheduler) dnl -lrt (for Tru64)
AC_CHECK_LIB(gnugetopt, getopt_long) dnl -lgnugetopt ( if available )
AC_CHECK_LIB(pam, pam_start) dnl -lpam (if available)
AC_CHECK_FUNCS([sched_setscheduler])
AC_CHECK_LIB(uuid, uuid_parse) dnl load the library if necessary
AC_CHECK_FUNCS(uuid_unparse) dnl OSX ships uuid_* as standard functions
AC_CHECK_HEADERS(uuid/uuid.h)
if test "x$ac_cv_func_uuid_unparse" != xyes; then
AC_MSG_ERROR(You do not have the libuuid development package installed)
fi
if test x"${PKG_CONFIG}" = x""; then
AC_MSG_ERROR(You need pkgconfig installed in order to build ${PACKAGE})
fi
if
$PKG_CONFIG --exists glib-2.0
then
GLIBCONFIG="$PKG_CONFIG glib-2.0"
else
set -x
echo PKG_CONFIG_PATH=$PKG_CONFIG_PATH
$PKG_CONFIG --exists glib-2.0; echo $?
$PKG_CONFIG --cflags glib-2.0; echo $?
$PKG_CONFIG glib-2.0; echo $?
set +x
AC_MSG_ERROR(You need glib2-devel installed in order to build ${PACKAGE})
fi
AC_MSG_RESULT(using $GLIBCONFIG)
#
# Where is dlopen?
#
if test "$ac_cv_lib_c_dlopen" = yes; then
LIBADD_DL=""
elif test "$ac_cv_lib_dl_dlopen" = yes; then
LIBADD_DL=-ldl
else
LIBADD_DL=${lt_cv_dlopen_libs}
fi
if test "X$GLIBCONFIG" != X; then
AC_MSG_CHECKING(for special glib includes: )
GLIBHEAD=`$GLIBCONFIG --cflags`
AC_MSG_RESULT($GLIBHEAD)
CPPFLAGS="$CPPFLAGS $GLIBHEAD"
AC_MSG_CHECKING(for glib library flags)
GLIBLIB=`$GLIBCONFIG --libs`
AC_MSG_RESULT($GLIBLIB)
LIBS="$LIBS $GLIBLIB"
fi
dnl FreeBSD needs -lcompat for ftime() used by lrmd.c
AC_CHECK_LIB([compat], [ftime], [COMPAT_LIBS='-lcompat'])
AC_SUBST(COMPAT_LIBS)
dnl ========================================================================
dnl Headers
dnl ========================================================================
AC_HEADER_STDC
AC_CHECK_HEADERS(arpa/inet.h)
AC_CHECK_HEADERS(asm/types.h)
AC_CHECK_HEADERS(assert.h)
AC_CHECK_HEADERS(auth-client.h)
AC_CHECK_HEADERS(ctype.h)
AC_CHECK_HEADERS(dirent.h)
AC_CHECK_HEADERS(errno.h)
AC_CHECK_HEADERS(fcntl.h)
AC_CHECK_HEADERS(getopt.h)
AC_CHECK_HEADERS(glib.h)
AC_CHECK_HEADERS(grp.h)
AC_CHECK_HEADERS(limits.h)
AC_CHECK_HEADERS(linux/errqueue.h)
AC_CHECK_HEADERS(linux/swab.h)
AC_CHECK_HEADERS(malloc.h)
AC_CHECK_HEADERS(netdb.h)
AC_CHECK_HEADERS(netinet/in.h)
AC_CHECK_HEADERS(netinet/ip.h)
AC_CHECK_HEADERS(pam/pam_appl.h)
AC_CHECK_HEADERS(pthread.h)
AC_CHECK_HEADERS(pwd.h)
AC_CHECK_HEADERS(security/pam_appl.h)
AC_CHECK_HEADERS(sgtty.h)
AC_CHECK_HEADERS(signal.h)
AC_CHECK_HEADERS(stdarg.h)
AC_CHECK_HEADERS(stddef.h)
AC_CHECK_HEADERS(stdio.h)
AC_CHECK_HEADERS(stdlib.h)
AC_CHECK_HEADERS(string.h)
AC_CHECK_HEADERS(strings.h)
AC_CHECK_HEADERS(sys/dir.h)
AC_CHECK_HEADERS(sys/ioctl.h)
AC_CHECK_HEADERS(sys/param.h)
AC_CHECK_HEADERS(sys/poll.h)
AC_CHECK_HEADERS(sys/reboot.h)
AC_CHECK_HEADERS(sys/resource.h)
AC_CHECK_HEADERS(sys/select.h)
AC_CHECK_HEADERS(sys/socket.h)
AC_CHECK_HEADERS(sys/signalfd.h)
AC_CHECK_HEADERS(sys/sockio.h)
AC_CHECK_HEADERS(sys/stat.h)
AC_CHECK_HEADERS(sys/time.h)
AC_CHECK_HEADERS(sys/timeb.h)
AC_CHECK_HEADERS(sys/types.h)
AC_CHECK_HEADERS(sys/uio.h)
AC_CHECK_HEADERS(sys/un.h)
AC_CHECK_HEADERS(sys/utsname.h)
AC_CHECK_HEADERS(sys/wait.h)
AC_CHECK_HEADERS(time.h)
AC_CHECK_HEADERS(unistd.h)
AC_CHECK_HEADERS(winsock.h)
dnl These headers need prerequisits before the tests will pass
dnl AC_CHECK_HEADERS(net/if.h)
dnl AC_CHECK_HEADERS(netinet/icmp6.h)
dnl AC_CHECK_HEADERS(netinet/ip6.h)
dnl AC_CHECK_HEADERS(netinet/ip_icmp.h)
AC_MSG_CHECKING(for special libxml2 includes)
if test "x$XML2CONFIG" = "x"; then
AC_MSG_ERROR(libxml2 config not found)
else
XML2HEAD="`$XML2CONFIG --cflags`"
AC_MSG_RESULT($XML2HEAD)
AC_CHECK_LIB(xml2, xmlReadMemory)
AC_CHECK_LIB(xslt, xsltApplyStylesheet)
fi
CPPFLAGS="$CPPFLAGS $XML2HEAD"
AC_CHECK_HEADERS(libxml/xpath.h)
AC_CHECK_HEADERS(libxslt/xslt.h)
if test "$ac_cv_header_libxml_xpath_h" != "yes"; then
AC_MSG_ERROR(The libxml developement headers were not found)
fi
if test "$ac_cv_header_libxslt_xslt_h" != "yes"; then
AC_MSG_ERROR(The libxslt developement headers were not found)
fi
AC_CACHE_CHECK(whether __progname and __progname_full are available,
pf_cv_var_progname,
AC_TRY_LINK([extern char *__progname, *__progname_full;],
[__progname = "foo"; __progname_full = "foo bar";],
pf_cv_var_progname="yes", pf_cv_var_progname="no"))
if test "$pf_cv_var_progname" = "yes"; then
AC_DEFINE(HAVE___PROGNAME,1,[ ])
fi
dnl ========================================================================
dnl Structures
dnl ========================================================================
AC_CHECK_MEMBERS([struct tm.tm_gmtoff],,,[[#include <time.h>]])
AC_CHECK_MEMBERS([lrm_op_t.rsc_deleted],,,[[#include <lrm/lrm_api.h>]])
AC_CHECK_MEMBER([struct dirent.d_type],
AC_DEFINE(HAVE_STRUCT_DIRENT_D_TYPE,1,[Define this if struct dirent has d_type]),,
[#include <dirent.h>])
dnl ========================================================================
dnl Functions
dnl ========================================================================
AC_CHECK_FUNCS(g_log_set_default_handler)
AC_CHECK_FUNCS(getopt, AC_DEFINE(HAVE_DECL_GETOPT, 1, [Have getopt function]))
AC_CHECK_FUNCS(nanosleep, AC_DEFINE(HAVE_DECL_NANOSLEEP, 1, [Have nanosleep function]))
dnl ========================================================================
dnl bzip2
dnl ========================================================================
AC_CHECK_HEADERS(bzlib.h)
AC_CHECK_LIB(bz2, BZ2_bzBuffToBuffCompress)
if test x$ac_cv_lib_bz2_BZ2_bzBuffToBuffCompress != xyes ; then
AC_MSG_ERROR(BZ2 libraries not found)
fi
if test x$ac_cv_header_bzlib_h != xyes; then
AC_MSG_ERROR(BZ2 Development headers not found)
fi
dnl ========================================================================
dnl sighandler_t is missing from Illumos, Solaris11 systems
dnl ========================================================================
AC_MSG_CHECKING([for sighandler_t])
AC_TRY_COMPILE([#include <signal.h>],[sighandler_t *f;],
has_sighandler_t=yes,has_sighandler_t=no)
AC_MSG_RESULT($has_sighandler_t)
if test "$has_sighandler_t" = "yes" ; then
AC_DEFINE( HAVE_SIGHANDLER_T, 1, [Define if sighandler_t available] )
fi
dnl ========================================================================
dnl ncurses
dnl ========================================================================
dnl
dnl A few OSes (e.g. Linux) deliver a default "ncurses" alongside "curses".
dnl Many non-Linux deliver "curses"; sites may add "ncurses".
dnl
dnl However, the source-code recommendation for both is to #include "curses.h"
dnl (i.e. "ncurses" still wants the include to be simple, no-'n', "curses.h").
dnl
dnl ncurse takes precedence.
dnl
AC_CHECK_HEADERS(curses.h)
AC_CHECK_HEADERS(curses/curses.h)
AC_CHECK_HEADERS(ncurses.h)
AC_CHECK_HEADERS(ncurses/ncurses.h)
dnl Although n-library is preferred, only look for it if the n-header was found.
CURSESLIBS=''
if test "$ac_cv_header_ncurses_h" = "yes"; then
AC_CHECK_LIB(ncurses, printw,
[AC_DEFINE(HAVE_LIBNCURSES,1, have ncurses library)]
)
CURSESLIBS=`$PKG_CONFIG --libs ncurses` || CURSESLIBS='-lncurses'
fi
if test "$ac_cv_header_ncurses_ncurses_h" = "yes"; then
AC_CHECK_LIB(ncurses, printw,
[AC_DEFINE(HAVE_LIBNCURSES,1, have ncurses library)]
)
CURSESLIBS=`$PKG_CONFIG --libs ncurses` || CURSESLIBS='-lncurses'
fi
dnl Only look for non-n-library if there was no n-library.
if test X"$CURSESLIBS" = X"" -a "$ac_cv_header_curses_h" = "yes"; then
AC_CHECK_LIB(curses, printw,
[CURSESLIBS='-lcurses'; AC_DEFINE(HAVE_LIBCURSES,1, have curses library)]
)
fi
dnl Only look for non-n-library if there was no n-library.
if test X"$CURSESLIBS" = X"" -a "$ac_cv_header_curses_curses_h" = "yes"; then
AC_CHECK_LIB(curses, printw,
[CURSESLIBS='-lcurses'; AC_DEFINE(HAVE_LIBCURSES,1, have curses library)]
)
fi
if test "x$CURSESLIBS" != "x"; then
PCMK_FEATURES="$PCMK_FEATURES ncurses"
fi
dnl Check for printw() prototype compatibility
if test X"$CURSESLIBS" != X"" && cc_supports_flag -Wcast-qual && cc_supports_flag -Werror; then
ac_save_LIBS=$LIBS
LIBS="$CURSESLIBS"
ac_save_CFLAGS=$CFLAGS
CFLAGS="-Wcast-qual -Werror"
# avoid broken test because of hardened build environment in Fedora 23+
# - https://fedoraproject.org/wiki/Changes/Harden_All_Packages
# - https://bugzilla.redhat.com/1297985
if cc_supports_flag -fPIC; then
CFLAGS="$CFLAGS -fPIC"
fi
AC_MSG_CHECKING(whether printw() requires argument of "const char *")
AC_LINK_IFELSE(
[AC_LANG_PROGRAM(
[
#if defined(HAVE_NCURSES_H)
# include <ncurses.h>
#elif defined(HAVE_NCURSES_NCURSES_H)
# include <ncurses/ncurses.h>
#elif defined(HAVE_CURSES_H)
# include <curses.h>
#endif
],
[printw((const char *)"Test");]
)],
[ac_cv_compatible_printw=yes],
[ac_cv_compatible_printw=no]
)
LIBS=$ac_save_LIBS
CFLAGS=$ac_save_CFLAGS
AC_MSG_RESULT([$ac_cv_compatible_printw])
if test "$ac_cv_compatible_printw" = no; then
AC_MSG_WARN([The printw() function of your ncurses or curses library is old, we will disable usage of the library. If you want to use this library anyway, please update to newer version of the library, ncurses 5.4 or later is recommended. You can get the library from http://www.gnu.org/software/ncurses/.])
AC_MSG_NOTICE([Disabling curses])
AC_DEFINE(HAVE_INCOMPATIBLE_PRINTW, 1, [Do we have incompatible printw() in curses library?])
fi
fi
AC_SUBST(CURSESLIBS)
dnl ========================================================================
dnl Profiling and GProf
dnl ========================================================================
AC_MSG_NOTICE(Old CFLAGS: $CFLAGS)
case $SUPPORT_COVERAGE in
1|yes|true)
SUPPORT_PROFILING=1
PCMK_FEATURES="$PCMK_FEATURES coverage"
CFLAGS="$CFLAGS -fprofile-arcs -ftest-coverage"
dnl During linking, make sure to specify -lgcov or -coverage
dnl Enable gprof
#LIBS="$LIBS -pg"
#CFLAGS="$CFLAGS -pg"
;;
esac
case $SUPPORT_PROFILING in
1|yes|true)
SUPPORT_PROFILING=1
dnl Disable various compiler optimizations
CFLAGS="$CFLAGS -fno-omit-frame-pointer -fno-inline -fno-builtin "
dnl CFLAGS="$CFLAGS -fno-inline-functions -fno-default-inline -fno-inline-functions-called-once -fno-optimize-sibling-calls"
dnl Turn off optimization so tools can get accurate line numbers
CFLAGS=`echo $CFLAGS | sed -e 's/-O.\ //g' -e 's/-Wp,-D_FORTIFY_SOURCE=.\ //g' -e 's/-D_FORTIFY_SOURCE=.\ //g'`
CFLAGS="$CFLAGS -O0 -g3 -gdwarf-2"
dnl Update features
PCMK_FEATURES="$PCMK_FEATURES profile"
;;
*) SUPPORT_PROFILING=0;;
esac
AC_MSG_NOTICE(New CFLAGS: $CFLAGS)
AC_DEFINE_UNQUOTED(SUPPORT_PROFILING, $SUPPORT_PROFILING, Support for profiling)
dnl ========================================================================
dnl Cluster infrastructure - Heartbeat / LibQB
dnl ========================================================================
dnl Compatibility checks
AC_CHECK_MEMBERS([struct lrm_ops.fail_rsc],,,[[#include <lrm/lrm_api.h>]])
if test x${enable_no_stack} = xyes; then
SUPPORT_HEARTBEAT=no
SUPPORT_CS=no
fi
PKG_CHECK_MODULES(libqb, libqb >= 0.13)
CPPFLAGS="$libqb_CFLAGS $CPPFLAGS"
LIBS="$libqb_LIBS $LIBS"
AC_CHECK_HEADERS(qb/qbipc_common.h)
AC_CHECK_LIB(qb, qb_ipcs_connection_auth_set)
PCMK_FEATURES="$PCMK_FEATURES libqb-logging libqb-ipc"
AC_CHECK_FUNCS(qb_ipcs_connection_get_buffer_size, AC_DEFINE(HAVE_IPCS_GET_BUFFER_SIZE, 1, [Have qb_ipcc_get_buffer_size function]))
AC_CHECK_HEADERS(heartbeat/hb_config.h)
AC_CHECK_HEADERS(heartbeat/glue_config.h)
AC_CHECK_HEADERS(stonith/stonith.h)
AC_CHECK_HEADERS(agent_config.h)
GLUE_HEADER=none
HAVE_GLUE=0
if test "$ac_cv_header_heartbeat_glue_config_h" = "yes"; then
GLUE_HEADER=glue_config.h
HAVE_GLUE=1
elif test "$ac_cv_header_heartbeat_hb_config_h" = "yes"; then
GLUE_HEADER=hb_config.h
HAVE_GLUE=1
else
AC_MSG_WARN(cluster-glue development headers were not found)
fi
if test "$ac_cv_header_stonith_stonith_h" = "yes"; then
PCMK_FEATURES="$PCMK_FEATURES lha-fencing"
fi
AM_CONDITIONAL([BUILD_LHA_SUPPORT], [test "$ac_cv_header_stonith_stonith_h" = "yes"])
if test $HAVE_GLUE = 1; then
dnl On Debian, AC_CHECK_LIBS fail if a library has any unresolved symbols
dnl So check for all the dependencies (so they're added to LIBS) before checking for -lplumb
AC_CHECK_LIB(pils, PILLoadPlugin)
AC_CHECK_LIB(plumb, G_main_add_IPC_Channel)
fi
dnl ===============================================
dnl Variables needed for substitution
dnl ===============================================
CRM_DTD_DIRECTORY="${datadir}/pacemaker"
AC_DEFINE_UNQUOTED(CRM_DTD_DIRECTORY,"$CRM_DTD_DIRECTORY", Location for the Pacemaker Relax-NG Schema)
AC_SUBST(CRM_DTD_DIRECTORY)
CRM_CORE_DIR=`try_extract_header_define $GLUE_HEADER HA_COREDIR ${localstatedir}/lib/pacemaker/cores`
AC_DEFINE_UNQUOTED(CRM_CORE_DIR,"$CRM_CORE_DIR", Location to store core files produced by Pacemaker daemons)
AC_SUBST(CRM_CORE_DIR)
CRM_DAEMON_USER=`try_extract_header_define $GLUE_HEADER HA_CCMUSER hacluster`
AC_DEFINE_UNQUOTED(CRM_DAEMON_USER,"$CRM_DAEMON_USER", User to run Pacemaker daemons as)
AC_SUBST(CRM_DAEMON_USER)
CRM_DAEMON_GROUP=`try_extract_header_define $GLUE_HEADER HA_APIGROUP haclient`
AC_DEFINE_UNQUOTED(CRM_DAEMON_GROUP,"$CRM_DAEMON_GROUP", Group to run Pacemaker daemons as)
AC_SUBST(CRM_DAEMON_GROUP)
CRM_STATE_DIR=${localstatedir}/run/crm
AC_DEFINE_UNQUOTED(CRM_STATE_DIR,"$CRM_STATE_DIR", Where to keep state files and sockets)
AC_SUBST(CRM_STATE_DIR)
CRM_LOG_DIR="${localstatedir}/log/pacemaker"
AC_DEFINE_UNQUOTED(CRM_LOG_DIR,"$CRM_LOG_DIR", Where Pacemaker can store log files)
AC_SUBST(CRM_LOG_DIR)
CRM_PACEMAKER_DIR=${localstatedir}/lib/pacemaker
AC_DEFINE_UNQUOTED(CRM_PACEMAKER_DIR,"$CRM_PACEMAKER_DIR", Location to store directory produced by Pacemaker daemons)
AC_SUBST(CRM_PACEMAKER_DIR)
CRM_BLACKBOX_DIR=${localstatedir}/lib/pacemaker/blackbox
AC_DEFINE_UNQUOTED(CRM_BLACKBOX_DIR,"$CRM_BLACKBOX_DIR", Where to keep blackbox dumps)
AC_SUBST(CRM_BLACKBOX_DIR)
PE_STATE_DIR="${localstatedir}/lib/pacemaker/pengine"
AC_DEFINE_UNQUOTED(PE_STATE_DIR,"$PE_STATE_DIR", Where to keep PEngine outputs)
AC_SUBST(PE_STATE_DIR)
CRM_CONFIG_DIR="${localstatedir}/lib/pacemaker/cib"
AC_DEFINE_UNQUOTED(CRM_CONFIG_DIR,"$CRM_CONFIG_DIR", Where to keep configuration files)
AC_SUBST(CRM_CONFIG_DIR)
CRM_CONFIG_CTS="${localstatedir}/lib/pacemaker/cts"
AC_DEFINE_UNQUOTED(CRM_CONFIG_CTS,"$CRM_CONFIG_CTS", Where to keep cts stateful data)
AC_SUBST(CRM_CONFIG_CTS)
CRM_LEGACY_CONFIG_DIR="${localstatedir}/lib/heartbeat/crm"
AC_DEFINE_UNQUOTED(CRM_LEGACY_CONFIG_DIR,"$CRM_LEGACY_CONFIG_DIR", Where Pacemaker used to keep configuration files)
AC_SUBST(CRM_LEGACY_CONFIG_DIR)
CRM_DAEMON_DIR="${libexecdir}/pacemaker"
AC_DEFINE_UNQUOTED(CRM_DAEMON_DIR,"$CRM_DAEMON_DIR", Location for Pacemaker daemons)
AC_SUBST(CRM_DAEMON_DIR)
HB_DAEMON_DIR=`try_extract_header_define $GLUE_HEADER HA_LIBHBDIR $libdir/heartbeat`
AC_DEFINE_UNQUOTED(HB_DAEMON_DIR,"$HB_DAEMON_DIR", Location Heartbeat expects Pacemaker daemons to be in)
AC_SUBST(HB_DAEMON_DIR)
dnl Needed so that the Corosync plugin can clear out the directory as Heartbeat does
HA_STATE_DIR=`try_extract_header_define $GLUE_HEADER HA_VARRUNDIR ${localstatedir}/run`
AC_DEFINE_UNQUOTED(HA_STATE_DIR,"$HA_STATE_DIR", Where Heartbeat keeps state files and sockets)
AC_SUBST(HA_STATE_DIR)
CRM_RSCTMP_DIR=`try_extract_header_define agent_config.h HA_RSCTMPDIR $HA_STATE_DIR/resource-agents`
AC_MSG_CHECKING(Scratch dir for resource agents)
AC_MSG_RESULT($CRM_RSCTMP_DIR)
AC_DEFINE_UNQUOTED(CRM_RSCTMP_DIR,"$CRM_RSCTMP_DIR", Where resource agents should keep state files)
AC_SUBST(CRM_RSCTMP_DIR)
dnl Needed for the location of hostcache in CTS.py
HA_VARLIBHBDIR=`try_extract_header_define $GLUE_HEADER HA_VARLIBHBDIR ${localstatedir}/lib/heartbeat`
AC_SUBST(HA_VARLIBHBDIR)
AC_DEFINE_UNQUOTED(UUID_FILE,"$localstatedir/lib/heartbeat/hb_uuid", Location of Heartbeat's UUID file)
OCF_ROOT_DIR=`try_extract_header_define $GLUE_HEADER OCF_ROOT_DIR /usr/lib/ocf`
if test "X$OCF_ROOT_DIR" = X; then
AC_MSG_ERROR(Could not locate OCF directory)
fi
AC_SUBST(OCF_ROOT_DIR)
OCF_RA_DIR=`try_extract_header_define $GLUE_HEADER OCF_RA_DIR $OCF_ROOT_DIR/resource.d`
AC_DEFINE_UNQUOTED(OCF_RA_DIR,"$OCF_RA_DIR", Location for OCF RAs)
AC_SUBST(OCF_RA_DIR)
RH_STONITH_DIR="$sbindir"
AC_DEFINE_UNQUOTED(RH_STONITH_DIR,"$RH_STONITH_DIR", Location for Red Hat Stonith agents)
AC_DEFINE_UNQUOTED(SBIN_DIR,"$sbindir", Location for system binaries)
RH_STONITH_PREFIX="fence_"
AC_DEFINE_UNQUOTED(RH_STONITH_PREFIX,"$RH_STONITH_PREFIX", Prefix for Red Hat Stonith agents)
AC_PATH_PROGS(GIT, git false)
AC_MSG_CHECKING(build version)
BUILD_VERSION=$Format:%h$
if test $BUILD_VERSION != ":%h$"; then
AC_MSG_RESULT(archive hash: $BUILD_VERSION)
elif test -x $GIT -a -d .git; then
BUILD_VERSION=`$GIT log --pretty="format:%h" -n 1`
AC_MSG_RESULT(git hash: $BUILD_VERSION)
else
# The current directory name make a reasonable default
# Most generated archives will include the hash or tag
BASE=`basename $PWD`
BUILD_VERSION=`echo $BASE | sed s:.*[[Pp]]acemaker-::`
AC_MSG_RESULT(directory based hash: $BUILD_VERSION)
fi
AC_DEFINE_UNQUOTED(BUILD_VERSION, "$BUILD_VERSION", Build version)
AC_SUBST(BUILD_VERSION)
HAVE_dbus=1
HAVE_upstart=0
HAVE_systemd=0
PKG_CHECK_MODULES(DBUS, dbus-1, ,HAVE_dbus=0)
AC_DEFINE_UNQUOTED(SUPPORT_DBUS, $HAVE_dbus, Support dbus)
AM_CONDITIONAL(BUILD_DBUS, test $HAVE_dbus = 1)
if test $HAVE_dbus = 1; then
CFLAGS="$CFLAGS `$PKG_CONFIG --cflags dbus-1`"
fi
DBUS_LIBS="$CFLAGS `$PKG_CONFIG --libs dbus-1`"
AC_SUBST(DBUS_LIBS)
AC_CHECK_TYPES([DBusBasicValue],,,[[#include <dbus/dbus.h>]])
if test "x${enable_systemd}" != xno; then
if test $HAVE_dbus = 0; then
if test "x${enable_systemd}" = xyes; then
AC_MSG_FAILURE([cannot enable systemd without DBus])
else
enable_systemd=no
fi
fi
if test "x${enable_systemd}" = xtry; then
AC_MSG_CHECKING([for systemd version query result via dbus-send])
ret=$({ dbus-send --system --print-reply \
--dest=org.freedesktop.systemd1 \
/org/freedesktop/systemd1 \
org.freedesktop.DBus.Properties.Get \
string:org.freedesktop.systemd1.Manager \
string:Version 2>/dev/null \
|| echo "this borked"; } | tail -n1)
# sanitize output a bit (interested just in value, not type),
# ret is intentionally unenquoted so as to normalize whitespace
ret=$(echo ${ret} | cut -d' ' -f2-)
AC_MSG_RESULT([${ret}])
if test "x${ret}" != xborked \
|| systemctl --version 2>/dev/null | grep -q systemd; then
enable_systemd=yes
else
enable_systemd=no
fi
fi
fi
AC_MSG_CHECKING([whether to enable support for managing resources via systemd])
AC_MSG_RESULT([${enable_systemd}])
if test "x${enable_systemd}" = xyes; then
HAVE_systemd=1
PCMK_FEATURES="$PCMK_FEATURES systemd"
AC_MSG_CHECKING([for systemd path for system unit files])
systemdunitdir="${systemdunitdir-}"
PKG_CHECK_VAR([systemdunitdir], [systemd],
[systemdsystemunitdir], [],[
systemdunitdir=no
])
AC_MSG_RESULT([${systemdunitdir}])
if test "x${systemdunitdir}" = xno; then
AC_MSG_FAILURE([cannot enable systemd when systemdunitdir unresolved])
fi
fi
AC_DEFINE_UNQUOTED(SUPPORT_SYSTEMD, $HAVE_systemd, Support systemd based system services)
AM_CONDITIONAL(BUILD_SYSTEMD, test $HAVE_systemd = 1)
AC_SUBST(SUPPORT_SYSTEMD)
if test "x${enable_upstart}" != xno; then
if test $HAVE_dbus = 0; then
if test "x${enable_upstart}" = xyes; then
AC_MSG_FAILURE([cannot enable Upstart without DBus])
else
enable_upstart=no
fi
fi
if test "x${enable_upstart}" = xtry; then
AC_MSG_CHECKING([for Upstart version query result via dbus-send])
ret=$({ dbus-send --system --print-reply --dest=com.ubuntu.Upstart \
/com/ubuntu/Upstart org.freedesktop.DBus.Properties.Get \
string:com.ubuntu.Upstart0_6 string:version 2>/dev/null \
|| echo "this borked"; } | tail -n1)
# sanitize output a bit (interested just in value, not type),
# ret is intentionally unenquoted so as to normalize whitespace
ret=$(echo ${ret} | cut -d' ' -f2-)
AC_MSG_RESULT([${ret}])
if test "x${ret}" != xborked \
|| initctl --version 2>/dev/null | grep -q upstart; then
enable_upstart=yes
else
enable_upstart=no
fi
fi
fi
AC_MSG_CHECKING([whether to enable support for managing resources via Upstart])
AC_MSG_RESULT([${enable_upstart}])
if test "x${enable_upstart}" = xyes; then
HAVE_upstart=1
PCMK_FEATURES="$PCMK_FEATURES upstart"
fi
AC_DEFINE_UNQUOTED(SUPPORT_UPSTART, $HAVE_upstart, Support upstart based system services)
AM_CONDITIONAL(BUILD_UPSTART, test $HAVE_upstart = 1)
AC_SUBST(SUPPORT_UPSTART)
case $SUPPORT_NAGIOS in
1|yes|true|try)
SUPPORT_NAGIOS=1;;
*)
SUPPORT_NAGIOS=0;;
esac
if test $SUPPORT_NAGIOS = 1; then
PCMK_FEATURES="$PCMK_FEATURES nagios"
fi
AC_DEFINE_UNQUOTED(SUPPORT_NAGIOS, $SUPPORT_NAGIOS, Support nagios plugins)
AM_CONDITIONAL(BUILD_NAGIOS, test $SUPPORT_NAGIOS = 1)
if test x"$NAGIOS_PLUGIN_DIR" = x""; then
NAGIOS_PLUGIN_DIR="${libexecdir}/nagios/plugins"
fi
AC_DEFINE_UNQUOTED(NAGIOS_PLUGIN_DIR, "$NAGIOS_PLUGIN_DIR", Directory for nagios plugins)
AC_SUBST(NAGIOS_PLUGIN_DIR)
if test x"$NAGIOS_METADATA_DIR" = x""; then
NAGIOS_METADATA_DIR="${datadir}/nagios/plugins-metadata"
fi
AC_DEFINE_UNQUOTED(NAGIOS_METADATA_DIR, "$NAGIOS_METADATA_DIR", Directory for nagios plugins metadata)
AC_SUBST(NAGIOS_METADATA_DIR)
STACKS=""
CLUSTERLIBS=""
dnl ========================================================================
dnl Cluster stack - Heartbeat
dnl ========================================================================
case $SUPPORT_HEARTBEAT in
1|yes|true|try)
AC_MSG_CHECKING(for heartbeat support)
AC_CHECK_LIB(hbclient, ll_cluster_new, [SUPPORT_HEARTBEAT=1],
[if test $SUPPORT_HEARTBEAT != try; then
AC_MSG_FAILURE(Unable to support Heartbeat: client libraries not found)
fi])
if test $SUPPORT_HEARTBEAT = 1 ; then
STACKS="$STACKS heartbeat"
dnl objdump -x ${libdir}/libccmclient.so | grep SONAME | awk '{print $2}'
AC_DEFINE_UNQUOTED(CCM_LIBRARY, "libccmclient.so.1", Library to load for ccm support)
AC_DEFINE_UNQUOTED(HEARTBEAT_LIBRARY, "libhbclient.so.1", Library to load for heartbeat support)
BUILD_ATOMIC_ATTRD=0
else
SUPPORT_HEARTBEAT=0
fi
;;
*) SUPPORT_HEARTBEAT=0;;
esac
AM_CONDITIONAL(BUILD_HEARTBEAT_SUPPORT, test $SUPPORT_HEARTBEAT = 1)
AC_DEFINE_UNQUOTED(SUPPORT_HEARTBEAT, $SUPPORT_HEARTBEAT, Support the Heartbeat messaging and membership layer)
AC_SUBST(SUPPORT_HEARTBEAT)
dnl ========================================================================
dnl Cluster stack - Corosync
dnl ========================================================================
dnl Normalize the values
case $SUPPORT_CS in
1|yes|true)
SUPPORT_CS=yes
missingisfatal=1;;
try) missingisfatal=0;;
*) SUPPORT_CS=no;;
esac
AC_MSG_CHECKING(for native corosync)
COROSYNC_LIBS=""
CS_USES_LIBQB=0
PCMK_SERVICE_ID=9
if test $SUPPORT_CS = no; then
AC_MSG_RESULT(no (disabled))
SUPPORT_CS=0
else
AC_MSG_RESULT($SUPPORT_CS, with '$CSPREFIX')
SUPPORT_CS=1
PKG_CHECK_MODULES(cpg, libcpg) dnl Fatal
PKG_CHECK_MODULES(cfg, libcfg) dnl Fatal
PKG_CHECK_MODULES(cmap, libcmap, HAVE_cmap=1, HAVE_cmap=0)
PKG_CHECK_MODULES(cman, libcman, HAVE_cman=1, HAVE_cman=0)
PKG_CHECK_MODULES(confdb, libconfdb, HAVE_confdb=1, HAVE_confdb=0)
PKG_CHECK_MODULES(fenced, libfenced, HAVE_fenced=1, HAVE_fenced=0)
PKG_CHECK_MODULES(quorum, libquorum, HAVE_quorum=1, HAVE_quorum=0)
PKG_CHECK_MODULES(oldipc, libcoroipcc, HAVE_oldipc=1, HAVE_oldipc=0)
if test $HAVE_oldipc = 1; then
CFLAGS="$CFLAGS $oldipc_FLAGS $cpg_FLAGS $cfg_FLAGS"
COROSYNC_LIBS="$COROSYNC_LIBS $oldipc_LIBS $cpg_LIBS $cfg_LIBS"
else
CS_USES_LIBQB=1
CFLAGS="$CFLAGS $libqb_FLAGS $cpg_FLAGS $cfg_FLAGS"
COROSYNC_LIBS="$COROSYNC_LIBS $libqb_LIBS $cpg_LIBS $cfg_LIBS"
AC_CHECK_LIB(corosync_common, cs_strerror)
fi
AC_DEFINE_UNQUOTED(HAVE_CONFDB, $HAVE_confdb, Have the old herarchial Corosync config API)
AC_DEFINE_UNQUOTED(HAVE_CMAP, $HAVE_cmap, Have the new non-herarchial Corosync config API)
fi
if test $SUPPORT_CS = 1 -a x$HAVE_oldipc = x0 ; then
dnl Support for plugins was removed about the time the IPC was
dnl moved to libqb.
dnl The only option now is the built-in quorum API
CFLAGS="$CFLAGS $cmap_CFLAGS $quorum_CFLAGS"
COROSYNC_LIBS="$COROSYNC_LIBS $cmap_LIBS $quorum_LIBS"
STACKS="$STACKS corosync-native"
AC_DEFINE_UNQUOTED(SUPPORT_CS_QUORUM, 1, Support the consumption of membership and quorum from corosync)
fi
SUPPORT_PLUGIN=0
if test $SUPPORT_CS = 1 -a x$HAVE_confdb = x1; then
dnl Need confdb to support cman and the plugins
SUPPORT_PLUGIN=1
BUILD_ATOMIC_ATTRD=0
AC_MSG_CHECKING([for corosync path for plugins])
PKG_CHECK_VAR([LCRSODIR], [corosync], [lcrsodir], [
AC_MSG_RESULT([$LCRSODIR])
],[
AC_SUBST([LCRSODIR], [$libdir])
AC_MSG_RESULT([$LCRSODIR (fallback)])
])
STACKS="$STACKS corosync-plugin"
COROSYNC_LIBS="$COROSYNC_LIBS $confdb_LIBS"
if test $SUPPORT_CMAN != no; then
if test $HAVE_cman = 1 -a $HAVE_fenced = 1; then
SUPPORT_CMAN=1
STACKS="$STACKS cman"
CFLAGS="$CFLAGS $cman_FLAGS $fenced_FLAGS"
COROSYNC_LIBS="$COROSYNC_LIBS $cman_LIBS $fenced_LIBS"
fi
fi
fi
dnl Normalize SUPPORT_CS and SUPPORT_CMAN for use with #if directives
if test $SUPPORT_CMAN != 1; then
SUPPORT_CMAN=0
fi
if test $SUPPORT_CS = 1; then
CLUSTERLIBS="$CLUSTERLIBS $COROSYNC_LIBS"
elif test $SUPPORT_CS != 0; then
SUPPORT_CS=0
if test $missingisfatal = 0; then
AC_MSG_WARN(Unable to support Corosync: $aisreason)
else
AC_MSG_FAILURE(Unable to support Corosync: $aisreason)
fi
fi
AC_DEFINE_UNQUOTED(SUPPORT_COROSYNC, $SUPPORT_CS, Support the Corosync messaging and membership layer)
AC_DEFINE_UNQUOTED(SUPPORT_CMAN, $SUPPORT_CMAN, Support the consumption of membership and quorum from cman)
AC_DEFINE_UNQUOTED(CS_USES_LIBQB, $CS_USES_LIBQB, Does corosync use libqb for its ipc)
AC_DEFINE_UNQUOTED(PCMK_SERVICE_ID, $PCMK_SERVICE_ID, Corosync service number)
AC_DEFINE_UNQUOTED(SUPPORT_PLUGIN, $SUPPORT_PLUGIN, Support the Pacemaker plugin for Corosync)
AM_CONDITIONAL(BUILD_CS_SUPPORT, test $SUPPORT_CS = 1)
AM_CONDITIONAL(BUILD_CS_PLUGIN, test $SUPPORT_PLUGIN = 1)
AM_CONDITIONAL(BUILD_CMAN, test $SUPPORT_CMAN = 1)
AM_CONDITIONAL(BUILD_ATOMIC_ATTRD, test $BUILD_ATOMIC_ATTRD = 1)
AC_DEFINE_UNQUOTED(HAVE_ATOMIC_ATTRD, $BUILD_ATOMIC_ATTRD, Support the new atomic attrd)
AC_SUBST(SUPPORT_CMAN)
AC_SUBST(SUPPORT_CS)
AC_SUBST(SUPPORT_PLUGIN)
dnl
dnl Cluster stack - Sanity
dnl
if test x${enable_no_stack} = xyes; then
AC_MSG_NOTICE(No cluster stack supported. Just building the Policy Engine)
PCMK_FEATURES="$PCMK_FEATURES no-cluster-stack"
else
AC_MSG_CHECKING(for supported stacks)
if test x"$STACKS" = x; then
AC_MSG_FAILURE(You must support at least one cluster stack (heartbeat or corosync) )
fi
AC_MSG_RESULT($STACKS)
PCMK_FEATURES="$PCMK_FEATURES $STACKS"
fi
if test ${BUILD_ATOMIC_ATTRD} = 1; then
PCMK_FEATURES="$PCMK_FEATURES atomic-attrd"
fi
AC_SUBST(CLUSTERLIBS)
dnl ========================================================================
dnl SNMP
dnl ========================================================================
case $SUPPORT_SNMP in
1|yes|true) missingisfatal=1;;
try) missingisfatal=0;;
*) SUPPORT_SNMP=no;;
esac
SNMPLIBS=""
AC_MSG_CHECKING(for snmp support)
if test $SUPPORT_SNMP = no; then
AC_MSG_RESULT(no (disabled))
SUPPORT_SNMP=0
else
SNMPCONFIG=""
AC_MSG_RESULT($SUPPORT_SNMP)
AC_CHECK_HEADERS(net-snmp/net-snmp-config.h)
if test "x${ac_cv_header_net_snmp_net_snmp_config_h}" != "xyes"; then
SUPPORT_SNMP="no"
fi
if test $SUPPORT_SNMP != no; then
AC_PATH_PROGS(SNMPCONFIG, net-snmp-config)
if test "X${SNMPCONFIG}" = "X"; then
AC_MSG_RESULT(You need the net_snmp development package to continue.)
SUPPORT_SNMP=no
fi
fi
if test $SUPPORT_SNMP != no; then
AC_MSG_CHECKING(for special snmp libraries)
SNMPLIBS=`$SNMPCONFIG --agent-libs`
AC_MSG_RESULT($SNMPLIBS)
fi
if test $SUPPORT_SNMP != no; then
savedLibs=$LIBS
LIBS="$LIBS $SNMPLIBS"
dnl On many systems libcrypto is needed when linking against libsnmp.
dnl Check to see if it exists, and if so use it.
dnl AC_CHECK_LIB(crypto, CRYPTO_free, CRYPTOLIB="-lcrypto",)
dnl AC_SUBST(CRYPTOLIB)
AC_CHECK_FUNCS(netsnmp_transport_open_client)
if test $ac_cv_func_netsnmp_transport_open_client != yes; then
AC_CHECK_FUNCS(netsnmp_tdomain_transport)
if test $ac_cv_func_netsnmp_tdomain_transport != yes; then
SUPPORT_SNMP=no
else
AC_DEFINE_UNQUOTED(NETSNMPV53, 1, [Use the older 5.3 version of the net-snmp API])
fi
fi
LIBS=$savedLibs
fi
if test $SUPPORT_SNMP = no; then
SNMPLIBS=""
SUPPORT_SNMP=0
if test $missingisfatal = 0; then
AC_MSG_WARN(Unable to support SNMP)
else
AC_MSG_FAILURE(Unable to support SNMP)
fi
else
SUPPORT_SNMP=1
fi
fi
if test $SUPPORT_SNMP = 1; then
PCMK_FEATURES="$PCMK_FEATURES snmp"
fi
AC_SUBST(SNMPLIBS)
AM_CONDITIONAL(ENABLE_SNMP, test "$SUPPORT_SNMP" = "1")
AC_DEFINE_UNQUOTED(ENABLE_SNMP, $SUPPORT_SNMP, Build in support for sending SNMP traps)
dnl ========================================================================
dnl ESMTP
dnl ========================================================================
case $SUPPORT_ESMTP in
1|yes|true) missingisfatal=1;;
try) missingisfatal=0;;
*) SUPPORT_ESMTP=no;;
esac
ESMTPLIB=""
AC_MSG_CHECKING(for esmtp support)
if test $SUPPORT_ESMTP = no; then
AC_MSG_RESULT(no (disabled))
SUPPORT_ESMTP=0
else
ESMTPCONFIG=""
AC_MSG_RESULT($SUPPORT_ESMTP)
AC_CHECK_HEADERS(libesmtp.h)
if test "x${ac_cv_header_libesmtp_h}" != "xyes"; then
ENABLE_ESMTP="no"
fi
if test $SUPPORT_ESMTP != no; then
AC_PATH_PROGS(ESMTPCONFIG, libesmtp-config)
if test "X${ESMTPCONFIG}" = "X"; then
AC_MSG_RESULT(You need the libesmtp development package to continue.)
SUPPORT_ESMTP=no
fi
fi
if test $SUPPORT_ESMTP != no; then
AC_MSG_CHECKING(for special esmtp libraries)
ESMTPLIBS=`$ESMTPCONFIG --libs | tr '\n' ' '`
AC_MSG_RESULT($ESMTPLIBS)
fi
if test $SUPPORT_ESMTP = no; then
SUPPORT_ESMTP=0
if test $missingisfatal = 0; then
AC_MSG_WARN(Unable to support ESMTP)
else
AC_MSG_FAILURE(Unable to support ESMTP)
fi
else
SUPPORT_ESMTP=1
PCMK_FEATURES="$PCMK_FEATURES libesmtp"
fi
fi
AC_SUBST(ESMTPLIBS)
AM_CONDITIONAL(ENABLE_ESMTP, test "$SUPPORT_ESMTP" = "1")
AC_DEFINE_UNQUOTED(ENABLE_ESMTP, $SUPPORT_ESMTP, Build in support for sending mail notifications with ESMTP)
dnl ========================================================================
dnl ACL
dnl ========================================================================
case $SUPPORT_ACL in
1|yes|true) missingisfatal=1;;
try) missingisfatal=0;;
*) SUPPORT_ACL=no;;
esac
AC_MSG_CHECKING(for acl support)
if test $SUPPORT_ACL = no; then
AC_MSG_RESULT(no (disabled))
SUPPORT_ACL=0
else
AC_MSG_RESULT($SUPPORT_ACL)
SUPPORT_ACL=1
AC_CHECK_LIB(qb, qb_ipcs_connection_auth_set)
if test $ac_cv_lib_qb_qb_ipcs_connection_auth_set != yes; then
SUPPORT_ACL=0
fi
if test $SUPPORT_ACL = 0; then
if test $missingisfatal = 0; then
AC_MSG_WARN(Unable to support ACL. You need to use libqb > 0.13.0)
else
AC_MSG_FAILURE(Unable to support ACL. You need to use libqb > 0.13.0)
fi
fi
fi
if test $SUPPORT_ACL = 1; then
PCMK_FEATURES="$PCMK_FEATURES acls"
fi
AM_CONDITIONAL(ENABLE_ACL, test "$SUPPORT_ACL" = "1")
AC_DEFINE_UNQUOTED(ENABLE_ACL, $SUPPORT_ACL, Build in support for CIB ACL)
dnl ========================================================================
dnl CIB secrets
dnl ========================================================================
case $SUPPORT_CIBSECRETS in
1|yes|true|try)
SUPPORT_CIBSECRETS=1;;
*)
SUPPORT_CIBSECRETS=0;;
esac
AC_DEFINE_UNQUOTED(SUPPORT_CIBSECRETS, $SUPPORT_CIBSECRETS, Support CIB secrets)
AM_CONDITIONAL(BUILD_CIBSECRETS, test $SUPPORT_CIBSECRETS = 1)
if test $SUPPORT_CIBSECRETS = 1; then
PCMK_FEATURES="$PCMK_FEATURES cibsecrets"
LRM_CIBSECRETS_DIR="${localstatedir}/lib/pacemaker/lrm/secrets"
AC_DEFINE_UNQUOTED(LRM_CIBSECRETS_DIR,"$LRM_CIBSECRETS_DIR", Location for CIB secrets)
AC_SUBST(LRM_CIBSECRETS_DIR)
LRM_LEGACY_CIBSECRETS_DIR="${localstatedir}/lib/heartbeat/lrm/secrets"
AC_DEFINE_UNQUOTED(LRM_LEGACY_CIBSECRETS_DIR,"$LRM_LEGACY_CIBSECRETS_DIR", Legacy location for CIB secrets)
AC_SUBST(LRM_LEGACY_CIBSECRETS_DIR)
fi
dnl ========================================================================
dnl GnuTLS
dnl ========================================================================
AC_CHECK_HEADERS(gnutls/gnutls.h)
AC_CHECK_HEADERS(security/pam_appl.h pam/pam_appl.h)
dnl GNUTLS library: Attempt to determine by 'libgnutls-config' program.
dnl If no 'libgnutls-config', try traditional autoconf means.
AC_PATH_PROGS(LIBGNUTLS_CONFIG, libgnutls-config)
if test -n "$LIBGNUTLS_CONFIG"; then
AC_MSG_CHECKING(for gnutls header flags)
GNUTLSHEAD="`$LIBGNUTLS_CONFIG --cflags`";
AC_MSG_RESULT($GNUTLSHEAD)
AC_MSG_CHECKING(for gnutls library flags)
GNUTLSLIBS="`$LIBGNUTLS_CONFIG --libs`";
AC_MSG_RESULT($GNUTLSLIBS)
fi
AC_CHECK_LIB(gnutls, gnutls_init)
AC_CHECK_FUNCS(gnutls_priority_set_direct)
AC_CHECK_FUNCS([gnutls_sec_param_to_pk_bits]) dnl since 2.12.0 (2011-03-24)
AC_SUBST(GNUTLSHEAD)
AC_SUBST(GNUTLSLIBS)
dnl ========================================================================
dnl System Health
dnl ========================================================================
dnl Check if servicelog development package is installed
SERVICELOG=servicelog-1
SERVICELOG_EXISTS="no"
AC_MSG_CHECKING(for $SERVICELOG packages)
if
$PKG_CONFIG --exists $SERVICELOG
then
PKG_CHECK_MODULES([SERVICELOG], [servicelog-1])
SERVICELOG_EXISTS="yes"
fi
AC_MSG_RESULT($SERVICELOG_EXISTS)
AM_CONDITIONAL(BUILD_SERVICELOG, test "$SERVICELOG_EXISTS" = "yes")
dnl Check if OpenIMPI packages and servicelog are installed
OPENIPMI="OpenIPMI OpenIPMIposix"
OPENIPMI_SERVICELOG_EXISTS="no"
AC_MSG_CHECKING(for $SERVICELOG $OPENIPMI packages)
if
$PKG_CONFIG --exists $OPENIPMI $SERVICELOG
then
PKG_CHECK_MODULES([OPENIPMI_SERVICELOG],[OpenIPMI OpenIPMIposix])
OPENIPMI_SERVICELOG_EXISTS="yes"
fi
AC_MSG_RESULT($OPENIPMI_SERVICELOG_EXISTS)
AM_CONDITIONAL(BUILD_OPENIPMI_SERVICELOG, test "$OPENIPMI_SERVICELOG_EXISTS" = "yes")
dnl ========================================================================
dnl Compiler flags
dnl ========================================================================
dnl Make sure that CFLAGS is not exported. If the user did
dnl not have CFLAGS in their environment then this should have
dnl no effect. However if CFLAGS was exported from the user's
dnl environment, then the new CFLAGS will also be exported
dnl to sub processes.
if export | fgrep " CFLAGS=" > /dev/null; then
SAVED_CFLAGS="$CFLAGS"
unset CFLAGS
CFLAGS="$SAVED_CFLAGS"
unset SAVED_CFLAGS
fi
AC_ARG_VAR([CFLAGS_HARDENED_LIB], [extra C compiler flags for hardened libraries])
AC_ARG_VAR([LDFLAGS_HARDENED_LIB], [extra linker flags for hardened libraries])
AC_ARG_VAR([CFLAGS_HARDENED_EXE], [extra C compiler flags for hardened executables])
AC_ARG_VAR([LDFLAGS_HARDENED_EXE], [extra linker flags for hardened executables])
CC_EXTRAS=""
if test "$GCC" != yes; then
CFLAGS="$CFLAGS -g"
enable_fatal_warnings=no
else
CFLAGS="$CFLAGS -ggdb"
dnl When we don't have diagnostic push / pull, we can't explicitly disable
dnl checking for nonliteral formats in the places where they occur on purpose
dnl thus we disable nonliteral format checking globally as we are aborting
dnl on warnings.
dnl what makes the things really ugly is that nonliteral format checking is
dnl obviously available as an extra switch in very modern gcc but for older
dnl gcc this is part of -Wformat=2
dnl so if we have push/pull we can enable -Wformat=2 -Wformat-nonliteral
dnl if we don't have push/pull but -Wformat-nonliteral we can enable -Wformat=2
dnl otherwise none of both
gcc_diagnostic_push_pull=no
SAVED_CFLAGS="$CFLAGS"
CFLAGS="$CFLAGS -Werror"
AC_MSG_CHECKING([for gcc diagnostic push / pull])
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
#pragma GCC diagnostic push
#pragma GCC diagnostic pop
]])], [
AC_MSG_RESULT([yes])
gcc_diagnostic_push_pull=yes
], AC_MSG_RESULT([no]))
CFLAGS="$SAVED_CFLAGS"
if cc_supports_flag "-Wformat-nonliteral"; then
gcc_format_nonliteral=yes
else
gcc_format_nonliteral=no
fi
# We had to eliminate -Wnested-externs because of libtool changes
# Make sure to order options so that the former stand for prerequisites
# of the latter (e.g., -Wformat-nonliteral requires -Wformat).
EXTRA_FLAGS="-fgnu89-inline
-Wall
-Waggregate-return
-Wbad-function-cast
-Wcast-align
-Wdeclaration-after-statement
-Wendif-labels
-Wfloat-equal
-Wformat-security
-Wmissing-prototypes
-Wmissing-declarations
-Wnested-externs
-Wno-long-long
-Wno-strict-aliasing
-Wpointer-arith
-Wstrict-prototypes
-Wwrite-strings
-Wunused-but-set-variable
-Wunsigned-char"
if test "x$gcc_diagnostic_push_pull" = "xyes"; then
AC_DEFINE([GCC_FORMAT_NONLITERAL_CHECKING_ENABLED], [],
[gcc can complain about nonliterals in format])
EXTRA_FLAGS="$EXTRA_FLAGS
-Wformat=2
-Wformat-nonliteral"
else
if test "x$gcc_format_nonliteral" = "xyes"; then
EXTRA_FLAGS="$EXTRA_FLAGS
-Wformat=2"
fi
fi
# Additional warnings it might be nice to enable one day
# -Wshadow
# -Wunreachable-code
for j in $EXTRA_FLAGS
do
if
cc_supports_flag $CC_EXTRAS $j
then
CC_EXTRAS="$CC_EXTRAS $j"
fi
done
dnl In lib/ais/Makefile.am there's a gcc option available as of v4.x
GCC_MAJOR=`gcc -v 2>&1 | awk 'END{print $3}' | sed 's/[.].*//'`
AM_CONDITIONAL(GCC_4, test "${GCC_MAJOR}" = 4)
dnl System specific options
case "$host_os" in
*linux*|*bsd*)
if test "${enable_fatal_warnings}" = "unknown"; then
enable_fatal_warnings=yes
fi
;;
esac
if test "x${enable_fatal_warnings}" != xno && cc_supports_flag -Werror ; then
enable_fatal_warnings=yes
else
enable_fatal_warnings=no
fi
if test "x${enable_ansi}" = xyes && cc_supports_flag -std=iso9899:199409 ; then
AC_MSG_NOTICE(Enabling ANSI Compatibility)
CC_EXTRAS="$CC_EXTRAS -ansi -D_GNU_SOURCE -DANSI_ONLY"
fi
AC_MSG_NOTICE(Activated additional gcc flags: ${CC_EXTRAS})
fi
dnl
dnl Hardening flags
dnl
dnl The prime control of whether to apply (targeted) hardening build flags and
dnl which ones is --{enable,disable}-hardening option passed to ./configure:
dnl
dnl --enable-hardening=try (default):
dnl depending on whether any of CFLAGS_HARDENED_EXE, LDFLAGS_HARDENED_EXE,
dnl CFLAGS_HARDENED_LIB or LDFLAGS_HARDENED_LIB environment variables
dnl (see below) is set and non-null, all these custom flags (even if not
dnl set) are used as are, otherwise the best effort is made to offer
dnl reasonably strong hardening in several categories (RELRO, PIE,
dnl "bind now", stack protector) according to what the selected toolchain
dnl can offer
dnl
dnl --enable-hardening:
dnl same effect as --enable-hardening=try when the environment variables
dnl in question are suppressed
dnl
dnl --disable-hardening:
dnl do not apply any targeted hardening measures at all
dnl
dnl The user-injected environment variables that regulate the hardening in
dnl default case are as follows:
dnl
dnl * CFLAGS_HARDENED_EXE, LDFLAGS_HARDENED_EXE
dnl compiler and linker flags (respectively) for daemon programs
dnl (attrd, cib, crmd, lrmd, stonithd, pacemakerd, pacemaker_remoted,
dnl pengine)
dnl
dnl * CFLAGS_HARDENED_LIB, LDFLAGS_HARDENED_LIB
dnl compiler and linker flags (respectively) for libraries linked
dnl with the daemon programs
dnl
dnl Note that these are purposedly targeted variables (addressing particular
dnl targets all over the scattered Makefiles) and have no effect outside of
dnl the predestined scope (e.g., CLI utilities). For a global reach,
dnl use CFLAGS, LDFLAGS, etc. as usual.
dnl
dnl For guidance on the suitable flags consult, for instance:
dnl https://fedoraproject.org/wiki/Changes/Harden_All_Packages#Detailed_Harden_Flags_Description
dnl https://owasp.org/index.php/C-Based_Toolchain_Hardening#GCC.2FBinutils
dnl
if test "x${HARDENING}" != "xtry"; then
unset CFLAGS_HARDENED_EXE
unset CFLAGS_HARDENED_LIB
unset LDFLAGS_HARDENED_EXE
unset LDFLAGS_HARDENED_LIB
fi
if test "x${HARDENING}" = "xno"; then
AC_MSG_NOTICE([Hardening: explicitly disabled])
elif test "x${HARDENING}" = "xyes" \
|| test "$(env | grep -Ec '^(C|LD)FLAGS_HARDENED_(EXE|LIB)=.')" = 0; then
dnl We'll figure out on our own...
CFLAGS_HARDENED_EXE=
CFLAGS_HARDENED_LIB=
LDFLAGS_HARDENED_EXE=
LDFLAGS_HARDENED_LIB=
relro=0
pie=0
bindnow=0
# daemons incl. libs: partial RELRO
flag="-Wl,-z,relro"
CC_CHECK_LDFLAGS(["${flag}"],
[LDFLAGS_HARDENED_EXE="${LDFLAGS_HARDENED_EXE} ${flag}";
LDFLAGS_HARDENED_LIB="${LDFLAGS_HARDENED_LIB} ${flag}";
relro=1]
)
# daemons: PIE for both CFLAGS and LDFLAGS
if cc_supports_flag -fPIE; then
flag="-pie"
CC_CHECK_LDFLAGS(["${flag}"],
[CFLAGS_HARDENED_EXE="${CFLAGS_HARDENED_EXE} -fPIE";
LDFLAGS_HARDENED_EXE="${LDFLAGS_HARDENED_EXE} ${flag}";
pie=1]
)
fi
# daemons incl. libs: full RELRO if sensible + as-needed linking
# so as to possibly mitigate startup performance
# hit caused by excessive linking with unneeded
# libraries
if test "${relro}" = 1 && test "${pie}" = 1; then
flag="-Wl,-z,now"
CC_CHECK_LDFLAGS(["${flag}"],
[LDFLAGS_HARDENED_EXE="${LDFLAGS_HARDENED_EXE} ${flag}";
LDFLAGS_HARDENED_LIB="${LDFLAGS_HARDENED_LIB} ${flag}";
bindnow=1]
)
fi
if test "${bindnow}" = 1; then
flag="-Wl,--as-needed"
CC_CHECK_LDFLAGS(["${flag}"],
[LDFLAGS_HARDENED_EXE="${LDFLAGS_HARDENED_EXE} ${flag}";
LDFLAGS_HARDENED_LIB="${LDFLAGS_HARDENED_LIB} ${flag}"]
)
fi
# universal: prefer strong > all > default stack protector if possible
flag=
if cc_supports_flag -fstack-protector-strong; then
flag="-fstack-protector-strong"
elif cc_supports_flag -fstack-protector-all; then
flag="-fstack-protector-all"
elif cc_supports_flag -fstack-protector; then
flag="-fstack-protector"
fi
if test -n "${flag}"; then
CC_EXTRAS="${CC_EXTRAS} ${flag}"
stackprot=1
fi
if test "${relro}" = 1 \
|| test "${pie}" = 1 \
|| test "${stackprot}" = 1; then
AC_MSG_NOTICE(
[Hardening: relro=${relro} pie=${pie} bindnow=${bindnow} stackprot=${flag}])
else
AC_MSG_WARN([Hardening: no suitable features in the toolchain detected])
fi
else
AC_MSG_NOTICE([Hardening: using custom flags])
fi
CFLAGS="$CFLAGS $CC_EXTRAS"
NON_FATAL_CFLAGS="$CFLAGS"
AC_SUBST(NON_FATAL_CFLAGS)
dnl
dnl We reset CFLAGS to include our warnings *after* all function
dnl checking goes on, so that our warning flags don't keep the
dnl AC_*FUNCS() calls above from working. In particular, -Werror will
dnl *always* cause us troubles if we set it before here.
dnl
dnl
if test "x${enable_fatal_warnings}" = xyes ; then
AC_MSG_NOTICE(Enabling Fatal Warnings)
CFLAGS="$CFLAGS -Werror"
fi
AC_SUBST(CFLAGS)
dnl This is useful for use in Makefiles that need to remove one specific flag
CFLAGS_COPY="$CFLAGS"
AC_SUBST(CFLAGS_COPY)
AC_SUBST(LIBADD_DL) dnl extra flags for dynamic linking libraries
AC_SUBST(LIBADD_INTL) dnl extra flags for GNU gettext stuff...
AC_SUBST(LOCALE)
dnl Options for cleaning up the compiler output
QUIET_LIBTOOL_OPTS=""
QUIET_MAKE_OPTS=""
if test "x${enable_quiet}" = "xyes"; then
QUIET_LIBTOOL_OPTS="--quiet"
QUIET_MAKE_OPTS="--quiet"
fi
AC_MSG_RESULT(Supress make details: ${enable_quiet})
dnl Put the above variables to use
LIBTOOL="${LIBTOOL} --tag=CC \$(QUIET_LIBTOOL_OPTS)"
MAKE="${MAKE} \$(QUIET_MAKE_OPTS)"
AC_SUBST(CC)
AC_SUBST(MAKE)
AC_SUBST(LIBTOOL)
AC_SUBST(QUIET_MAKE_OPTS)
AC_SUBST(QUIET_LIBTOOL_OPTS)
AC_DEFINE_UNQUOTED(CRM_FEATURES, "$PCMK_FEATURES", Set of enabled features)
AC_SUBST(PCMK_FEATURES)
dnl The Makefiles and shell scripts we output
AC_CONFIG_FILES(Makefile \
Doxyfile \
coverage.sh \
cts/Makefile \
cts/CTSvars.py \
cts/LSBDummy \
cts/HBDummy \
cts/benchmark/Makefile \
cts/benchmark/clubench \
cts/lxc_autogen.sh \
cts/pacemaker-cts-dummyd.service \
cib/Makefile \
attrd/Makefile \
crmd/Makefile \
pengine/Makefile \
pengine/regression.core.sh \
doc/Makefile \
doc/Clusters_from_Scratch/publican.cfg \
doc/Pacemaker_Development/publican.cfg \
doc/Pacemaker_Explained/publican.cfg \
doc/Pacemaker_Remote/publican.cfg \
include/Makefile \
include/crm/Makefile \
include/crm/cib/Makefile \
include/crm/common/Makefile \
include/crm/cluster/Makefile \
include/crm/fencing/Makefile \
include/crm/pengine/Makefile \
replace/Makefile \
lib/Makefile \
lib/pacemaker.pc \
lib/pacemaker-cib.pc \
lib/pacemaker-lrmd.pc \
lib/pacemaker-service.pc \
lib/pacemaker-pengine.pc \
lib/pacemaker-fencing.pc \
lib/pacemaker-cluster.pc \
lib/ais/Makefile \
lib/common/Makefile \
lib/cluster/Makefile \
lib/cib/Makefile \
lib/pengine/Makefile \
lib/transition/Makefile \
lib/fencing/Makefile \
lib/lrmd/Makefile \
lib/services/Makefile \
mcp/Makefile \
mcp/pacemaker \
mcp/pacemaker.service \
mcp/pacemaker.upstart \
mcp/pacemaker.combined.upstart \
fencing/Makefile \
fencing/regression.py \
lrmd/Makefile \
lrmd/regression.py \
lrmd/pacemaker_remote.service \
lrmd/pacemaker_remote \
extra/Makefile \
extra/alerts/Makefile \
extra/resources/Makefile \
extra/logrotate/Makefile \
extra/logrotate/pacemaker \
tools/Makefile \
tools/crm_report \
tools/report.collector \
tools/report.common \
tools/cibsecret \
tools/crm_mon.service \
tools/crm_mon.upstart \
xml/Makefile \
lib/gnu/Makefile \
)
AC_CONFIG_FILES([cts/pacemaker-cts-dummyd], [chmod +x cts/pacemaker-cts-dummyd])
dnl Now process the entire list of files added by previous
dnl calls to AC_CONFIG_FILES()
AC_OUTPUT()
dnl *****************
dnl Configure summary
dnl *****************
AC_MSG_RESULT([])
AC_MSG_RESULT([$PACKAGE configuration:])
AC_MSG_RESULT([ Version = ${VERSION} (Build: $BUILD_VERSION)])
AC_MSG_RESULT([ Features =${PCMK_FEATURES}])
AC_MSG_RESULT([])
AC_MSG_RESULT([ Prefix = ${prefix}])
AC_MSG_RESULT([ Executables = ${sbindir}])
AC_MSG_RESULT([ Man pages = ${mandir}])
AC_MSG_RESULT([ Libraries = ${libdir}])
AC_MSG_RESULT([ Header files = ${includedir}])
AC_MSG_RESULT([ Arch-independent files = ${datadir}])
AC_MSG_RESULT([ State information = ${localstatedir}])
AC_MSG_RESULT([ System configuration = ${sysconfdir}])
if test $SUPPORT_PLUGIN = 1; then
AC_MSG_RESULT([ Corosync Plugins = ${LCRSODIR}])
fi
AC_MSG_RESULT([])
AC_MSG_RESULT([ HA group name = ${CRM_DAEMON_GROUP}])
AC_MSG_RESULT([ HA user name = ${CRM_DAEMON_USER}])
AC_MSG_RESULT([])
AC_MSG_RESULT([ CFLAGS = ${CFLAGS}])
AC_MSG_RESULT([ CFLAGS_HARDENED_EXE = ${CFLAGS_HARDENED_EXE}])
AC_MSG_RESULT([ CFLAGS_HARDENED_LIB = ${CFLAGS_HARDENED_LIB}])
AC_MSG_RESULT([ LDFLAGS_HARDENED_EXE = ${LDFLAGS_HARDENED_EXE}])
AC_MSG_RESULT([ LDFLAGS_HARDENED_LIB = ${LDFLAGS_HARDENED_LIB}])
AC_MSG_RESULT([ Libraries = ${LIBS}])
AC_MSG_RESULT([ Stack Libraries = ${CLUSTERLIBS}])
+AC_MSG_RESULT([ Unix socket auth method = ${us_auth}])
diff --git a/crmd/control.c b/crmd/control.c
index e01066a755..488ea88018 100644
--- a/crmd/control.c
+++ b/crmd/control.c
@@ -1,1157 +1,1151 @@
/*
- * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
+ * Copyright 2004-2019 the Pacemaker project contributors
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
+ * The version control history for this file may have further details.
*
- * This software is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/pengine/rules.h>
#include <crm/cluster/internal.h>
#include <crm/cluster/election.h>
#include <crm/common/ipcs.h>
#include <crmd.h>
#include <crmd_fsa.h>
#include <fsa_proto.h>
#include <crmd_messages.h>
#include <crmd_callbacks.h>
#include <crmd_lrm.h>
#include <crmd_alerts.h>
#include <crmd_metadata.h>
#include <tengine.h>
#include <throttle.h>
#include <sys/types.h>
#include <sys/stat.h>
#ifdef RHEL7_COMPAT
// for pe_enable_legacy_alerts()
#include <crm/pengine/rules_internal.h>
#endif
qb_ipcs_service_t *ipcs = NULL;
extern gboolean crm_connect_corosync(crm_cluster_t * cluster);
extern void crmd_ha_connection_destroy(gpointer user_data);
void crm_shutdown(int nsig);
gboolean crm_read_options(gpointer user_data);
gboolean fsa_has_quorum = FALSE;
crm_trigger_t *fsa_source = NULL;
crm_trigger_t *config_read = NULL;
bool no_quorum_suicide_escalation = FALSE;
/* A_HA_CONNECT */
void
do_ha_control(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
gboolean registered = FALSE;
static crm_cluster_t *cluster = NULL;
if (cluster == NULL) {
cluster = calloc(1, sizeof(crm_cluster_t));
}
if (action & A_HA_DISCONNECT) {
crm_cluster_disconnect(cluster);
crm_info("Disconnected from the cluster");
set_bit(fsa_input_register, R_HA_DISCONNECTED);
}
if (action & A_HA_CONNECT) {
crm_set_status_callback(&peer_update_callback);
crm_set_autoreap(FALSE);
if (is_openais_cluster()) {
#if SUPPORT_COROSYNC
registered = crm_connect_corosync(cluster);
#endif
} else if (is_heartbeat_cluster()) {
#if SUPPORT_HEARTBEAT
cluster->destroy = crmd_ha_connection_destroy;
cluster->hb_dispatch = crmd_ha_msg_callback;
registered = crm_cluster_connect(cluster);
fsa_cluster_conn = cluster->hb_conn;
crm_trace("Be informed of Node Status changes");
if (registered &&
fsa_cluster_conn->llc_ops->set_nstatus_callback(fsa_cluster_conn,
crmd_ha_status_callback,
fsa_cluster_conn) != HA_OK) {
crm_err("Cannot set nstatus callback: %s",
fsa_cluster_conn->llc_ops->errmsg(fsa_cluster_conn));
registered = FALSE;
}
crm_trace("Be informed of CRM Client Status changes");
if (registered &&
fsa_cluster_conn->llc_ops->set_cstatus_callback(fsa_cluster_conn,
crmd_client_status_callback,
fsa_cluster_conn) != HA_OK) {
crm_err("Cannot set cstatus callback: %s",
fsa_cluster_conn->llc_ops->errmsg(fsa_cluster_conn));
registered = FALSE;
}
if (registered) {
crm_trace("Requesting an initial dump of CRMD client_status");
fsa_cluster_conn->llc_ops->client_status(fsa_cluster_conn, NULL, CRM_SYSTEM_CRMD,
-1);
}
#endif
}
- controld_election_init(cluster->uname);
- fsa_our_uname = cluster->uname;
- fsa_our_uuid = cluster->uuid;
- if(cluster->uuid == NULL) {
- crm_err("Could not obtain local uuid");
- registered = FALSE;
+
+ if (registered == TRUE) {
+ controld_election_init(cluster->uname);
+ fsa_our_uname = cluster->uname;
+ fsa_our_uuid = cluster->uuid;
+ if(cluster->uuid == NULL) {
+ crm_err("Could not obtain local uuid");
+ registered = FALSE;
+ }
}
if (registered == FALSE) {
set_bit(fsa_input_register, R_HA_DISCONNECTED);
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
return;
}
populate_cib_nodes(node_update_none, __FUNCTION__);
clear_bit(fsa_input_register, R_HA_DISCONNECTED);
crm_info("Connected to the cluster");
}
if (action & ~(A_HA_CONNECT | A_HA_DISCONNECT)) {
crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__);
}
}
static bool
need_spawn_pengine_from_crmd(void)
{
static int result = -1;
if (result != -1)
return result;
if (!is_heartbeat_cluster()) {
result = 0;
return result;
}
/* NULL, or "strange" value: rather spawn from here. */
result = TRUE;
crm_str_to_boolean(daemon_option("crmd_spawns_pengine"), &result);
return result;
}
/* A_SHUTDOWN */
void
do_shutdown(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
/* just in case */
set_bit(fsa_input_register, R_SHUTDOWN);
if (need_spawn_pengine_from_crmd()) {
if (is_set(fsa_input_register, pe_subsystem->flag_connected)) {
crm_info("Terminating the %s", pe_subsystem->name);
if (stop_subsystem(pe_subsystem, TRUE) == FALSE) {
/* It's gone ... */
crm_err("Faking %s exit", pe_subsystem->name);
clear_bit(fsa_input_register, pe_subsystem->flag_connected);
} else {
crm_info("Waiting for subsystems to exit");
crmd_fsa_stall(FALSE);
}
}
crm_info("All subsystems stopped, continuing");
}
if (stonith_api) {
/* Prevent it from coming up again */
clear_bit(fsa_input_register, R_ST_REQUIRED);
crm_info("Disconnecting STONITH...");
stonith_api->cmds->disconnect(stonith_api);
}
}
/* A_SHUTDOWN_REQ */
void
do_shutdown_req(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
xmlNode *msg = NULL;
set_bit(fsa_input_register, R_SHUTDOWN);
crm_info("Sending shutdown request to all peers (DC is %s)",
(fsa_our_dc? fsa_our_dc : "not set"));
msg = create_request(CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
/* set_bit(fsa_input_register, R_STAYDOWN); */
if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) {
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
free_xml(msg);
}
extern crm_ipc_t *attrd_ipc;
extern char *max_generation_from;
extern xmlNode *max_generation_xml;
extern GHashTable *resource_history;
extern GHashTable *voted;
extern char *te_client_id;
void log_connected_client(gpointer key, gpointer value, gpointer user_data);
void
log_connected_client(gpointer key, gpointer value, gpointer user_data)
{
crm_client_t *client = value;
crm_err("%s is still connected at exit", crm_client_name(client));
}
int
crmd_fast_exit(int rc)
{
if (is_set(fsa_input_register, R_STAYDOWN)) {
crm_warn("Inhibiting respawn "CRM_XS" remapping exit code %d to %d",
rc, DAEMON_RESPAWN_STOP);
rc = DAEMON_RESPAWN_STOP;
}
if (rc == pcmk_ok && is_set(fsa_input_register, R_IN_RECOVERY)) {
crm_err("Could not recover from internal error");
rc = pcmk_err_generic;
}
return crm_exit(rc);
}
int
crmd_exit(int rc)
{
GListPtr gIter = NULL;
GMainLoop *mloop = crmd_mainloop;
static bool in_progress = FALSE;
if(in_progress && rc == 0) {
crm_debug("Exit is already in progress");
return rc;
} else if(in_progress) {
crm_notice("Error during shutdown process, terminating now with status %d: %s",
rc, pcmk_strerror(rc));
crm_write_blackbox(SIGTRAP, NULL);
crmd_fast_exit(rc);
}
in_progress = TRUE;
crm_trace("Preparing to exit: %d", rc);
/* Suppress secondary errors resulting from us disconnecting everything */
set_bit(fsa_input_register, R_HA_DISCONNECTED);
/* Close all IPC servers and clients to ensure any and all shared memory files are cleaned up */
if(ipcs) {
crm_trace("Closing IPC server");
mainloop_del_ipc_server(ipcs);
ipcs = NULL;
}
if (attrd_ipc) {
crm_trace("Closing attrd connection");
crm_ipc_close(attrd_ipc);
crm_ipc_destroy(attrd_ipc);
attrd_ipc = NULL;
}
if (pe_subsystem && pe_subsystem->client && pe_subsystem->client->ipcs) {
crm_trace("Disconnecting Policy Engine");
qb_ipcs_disconnect(pe_subsystem->client->ipcs);
}
if(stonith_api) {
crm_trace("Disconnecting fencing API");
clear_bit(fsa_input_register, R_ST_REQUIRED);
stonith_api->cmds->free(stonith_api); stonith_api = NULL;
}
if (rc == pcmk_ok && crmd_mainloop == NULL) {
crm_debug("No mainloop detected");
rc = EPROTO;
}
/* On an error, just get out.
*
* Otherwise, make the effort to have mainloop exit gracefully so
* that it (mostly) cleans up after itself and valgrind has less
* to report on - allowing real errors stand out
*/
if(rc != pcmk_ok) {
crm_notice("Forcing immediate exit with status %d: %s",
rc, pcmk_strerror(rc));
crm_write_blackbox(SIGTRAP, NULL);
return crmd_fast_exit(rc);
}
/* Clean up as much memory as possible for valgrind */
for (gIter = fsa_message_queue; gIter != NULL; gIter = gIter->next) {
fsa_data_t *fsa_data = gIter->data;
crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]",
fsa_input2string(fsa_data->fsa_input),
fsa_state2string(fsa_state),
fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
delete_fsa_input(fsa_data);
}
clear_bit(fsa_input_register, R_MEMBERSHIP);
g_list_free(fsa_message_queue); fsa_message_queue = NULL;
free(pe_subsystem); pe_subsystem = NULL;
free(te_subsystem); te_subsystem = NULL;
free(cib_subsystem); cib_subsystem = NULL;
metadata_cache_fini();
controld_election_fini();
/* Tear down the CIB connection, but don't free it yet -- it could be used
* when we drain the mainloop later.
*/
cib_free_callbacks(fsa_cib_conn);
fsa_cib_conn->cmds->signoff(fsa_cib_conn);
verify_stopped(fsa_state, LOG_WARNING);
clear_bit(fsa_input_register, R_LRM_CONNECTED);
lrm_state_destroy_all();
/* This basically will not work, since mainloop has a reference to it */
mainloop_destroy_trigger(fsa_source); fsa_source = NULL;
mainloop_destroy_trigger(config_read); config_read = NULL;
mainloop_destroy_trigger(stonith_reconnect); stonith_reconnect = NULL;
mainloop_destroy_trigger(transition_trigger); transition_trigger = NULL;
crm_client_cleanup();
crm_peer_destroy();
crm_timer_stop(transition_timer);
crm_timer_stop(integration_timer);
crm_timer_stop(finalization_timer);
crm_timer_stop(election_trigger);
crm_timer_stop(shutdown_escalation_timer);
crm_timer_stop(wait_timer);
crm_timer_stop(recheck_timer);
free(transition_timer); transition_timer = NULL;
free(integration_timer); integration_timer = NULL;
free(finalization_timer); finalization_timer = NULL;
free(election_trigger); election_trigger = NULL;
free(shutdown_escalation_timer); shutdown_escalation_timer = NULL;
free(wait_timer); wait_timer = NULL;
free(recheck_timer); recheck_timer = NULL;
free(fsa_our_dc_version); fsa_our_dc_version = NULL;
free(fsa_our_uname); fsa_our_uname = NULL;
free(fsa_our_uuid); fsa_our_uuid = NULL;
free(fsa_our_dc); fsa_our_dc = NULL;
free(fsa_cluster_name); fsa_cluster_name = NULL;
free(te_uuid); te_uuid = NULL;
free(te_client_id); te_client_id = NULL;
free(fsa_pe_ref); fsa_pe_ref = NULL;
free(failed_stop_offset); failed_stop_offset = NULL;
free(failed_start_offset); failed_start_offset = NULL;
free(max_generation_from); max_generation_from = NULL;
free_xml(max_generation_xml); max_generation_xml = NULL;
mainloop_destroy_signal(SIGPIPE);
mainloop_destroy_signal(SIGUSR1);
mainloop_destroy_signal(SIGTERM);
mainloop_destroy_signal(SIGTRAP);
/* leave SIGCHLD engaged as we might still want to drain some service-actions */
if (mloop) {
GMainContext *ctx = g_main_loop_get_context(crmd_mainloop);
/* Don't re-enter this block */
crmd_mainloop = NULL;
/* no signals on final draining anymore */
mainloop_destroy_signal(SIGCHLD);
crm_trace("Draining mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
{
int lpc = 0;
while((g_main_context_pending(ctx) && lpc < 10)) {
lpc++;
crm_trace("Iteration %d", lpc);
g_main_context_dispatch(ctx);
}
}
crm_trace("Closing mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
g_main_loop_quit(mloop);
#if SUPPORT_HEARTBEAT
/* Do this only after g_main_loop_quit().
*
* This interface was broken (incomplete) since it was introduced.
* ->delete() does cleanup and free most of it, but it does not
* actually remove and destroy the corresponding GSource, so the next
* prepare/check iteratioin would find a corrupt (because partially
* freed) GSource, and segfault.
*
* Apparently one was supposed to store the GSource as returned by
* G_main_add_ll_cluster(), and g_source_destroy() that "by hand".
*
* But no-one ever did this, not even in the old hb code when this was
* introduced.
*
* Note that fsa_cluster_conn was set as an "alias" to cluster->hb_conn
* in do_ha_control() right after crm_cluster_connect(), and only
* happens to still point at that object, because do_ha_control() does
* not reset it to NULL after crm_cluster_disconnect() above does
* reset cluster->hb_conn to NULL.
* Not sure if that's something to cleanup, too.
*
* I'll try to fix this up in heartbeat proper, so ->delete
* will actually remove, and destroy, and unref, and free this thing.
* Doing so after g_main_loop_quit() is valid with both old,
* and eventually fixed heartbeat.
*
* If we introduce the "by hand" destroy/remove/unref,
* this may break again once heartbeat is fixed :-(
*
* -- Lars Ellenberg
*/
if (fsa_cluster_conn) {
crm_trace("Deleting heartbeat api object");
fsa_cluster_conn->llc_ops->delete(fsa_cluster_conn);
fsa_cluster_conn = NULL;
}
#endif
/* Won't do anything yet, since we're inside it now */
g_main_loop_unref(mloop);
crm_trace("Done %d", rc);
} else {
mainloop_destroy_signal(SIGCHLD);
}
cib_delete(fsa_cib_conn);
fsa_cib_conn = NULL;
throttle_fini();
/* Graceful */
return rc;
}
/* A_EXIT_0, A_EXIT_1 */
void
do_exit(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
int exit_code = pcmk_ok;
int log_level = LOG_INFO;
const char *exit_type = "gracefully";
if (action & A_EXIT_1) {
/* exit_code = pcmk_err_generic; */
log_level = LOG_ERR;
exit_type = "forcefully";
exit_code = pcmk_err_generic;
}
verify_stopped(cur_state, LOG_ERR);
do_crm_log(log_level, "Performing %s - %s exiting the CRMd",
fsa_action2string(action), exit_type);
crm_info("[%s] stopped (%d)", crm_system_name, exit_code);
crmd_exit(exit_code);
}
static void sigpipe_ignore(int nsig) { return; }
/* A_STARTUP */
void
do_startup(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
int was_error = 0;
crm_debug("Registering Signal Handlers");
mainloop_add_signal(SIGTERM, crm_shutdown);
mainloop_add_signal(SIGPIPE, sigpipe_ignore);
fsa_source = mainloop_add_trigger(G_PRIORITY_HIGH, crm_fsa_trigger, NULL);
config_read = mainloop_add_trigger(G_PRIORITY_HIGH, crm_read_options, NULL);
transition_trigger = mainloop_add_trigger(G_PRIORITY_LOW, te_graph_trigger, NULL);
crm_debug("Creating CIB and LRM objects");
fsa_cib_conn = cib_new();
lrm_state_init_local();
/* set up the timers */
transition_timer = calloc(1, sizeof(fsa_timer_t));
integration_timer = calloc(1, sizeof(fsa_timer_t));
finalization_timer = calloc(1, sizeof(fsa_timer_t));
election_trigger = calloc(1, sizeof(fsa_timer_t));
shutdown_escalation_timer = calloc(1, sizeof(fsa_timer_t));
wait_timer = calloc(1, sizeof(fsa_timer_t));
recheck_timer = calloc(1, sizeof(fsa_timer_t));
if (election_trigger != NULL) {
election_trigger->source_id = 0;
election_trigger->period_ms = -1;
election_trigger->fsa_input = I_DC_TIMEOUT;
election_trigger->callback = crm_timer_popped;
election_trigger->repeat = FALSE;
} else {
was_error = TRUE;
}
if (transition_timer != NULL) {
transition_timer->source_id = 0;
transition_timer->period_ms = -1;
transition_timer->fsa_input = I_PE_CALC;
transition_timer->callback = crm_timer_popped;
transition_timer->repeat = FALSE;
} else {
was_error = TRUE;
}
if (integration_timer != NULL) {
integration_timer->source_id = 0;
integration_timer->period_ms = -1;
integration_timer->fsa_input = I_INTEGRATED;
integration_timer->callback = crm_timer_popped;
integration_timer->repeat = FALSE;
} else {
was_error = TRUE;
}
if (finalization_timer != NULL) {
finalization_timer->source_id = 0;
finalization_timer->period_ms = -1;
finalization_timer->fsa_input = I_FINALIZED;
finalization_timer->callback = crm_timer_popped;
finalization_timer->repeat = FALSE;
/* for possible enabling... a bug in the join protocol left
* a slave in S_PENDING while we think it's in S_NOT_DC
*
* raising I_FINALIZED put us into a transition loop which is
* never resolved.
* in this loop we continually send probes which the node
* NACK's because it's in S_PENDING
*
* if we have nodes where heartbeat is active but the
* CRM is not... then this will be handled in the
* integration phase
*/
finalization_timer->fsa_input = I_ELECTION;
} else {
was_error = TRUE;
}
if (shutdown_escalation_timer != NULL) {
shutdown_escalation_timer->source_id = 0;
shutdown_escalation_timer->period_ms = -1;
shutdown_escalation_timer->fsa_input = I_STOP;
shutdown_escalation_timer->callback = crm_timer_popped;
shutdown_escalation_timer->repeat = FALSE;
} else {
was_error = TRUE;
}
if (wait_timer != NULL) {
wait_timer->source_id = 0;
wait_timer->period_ms = 2000;
wait_timer->fsa_input = I_NULL;
wait_timer->callback = crm_timer_popped;
wait_timer->repeat = FALSE;
} else {
was_error = TRUE;
}
if (recheck_timer != NULL) {
recheck_timer->source_id = 0;
recheck_timer->period_ms = -1;
recheck_timer->fsa_input = I_PE_CALC;
recheck_timer->callback = crm_timer_popped;
recheck_timer->repeat = FALSE;
} else {
was_error = TRUE;
}
/* set up the sub systems */
cib_subsystem = calloc(1, sizeof(struct crm_subsystem_s));
te_subsystem = calloc(1, sizeof(struct crm_subsystem_s));
pe_subsystem = calloc(1, sizeof(struct crm_subsystem_s));
if (cib_subsystem != NULL) {
cib_subsystem->pid = -1;
cib_subsystem->name = CRM_SYSTEM_CIB;
cib_subsystem->flag_connected = R_CIB_CONNECTED;
cib_subsystem->flag_required = R_CIB_REQUIRED;
} else {
was_error = TRUE;
}
if (te_subsystem != NULL) {
te_subsystem->pid = -1;
te_subsystem->name = CRM_SYSTEM_TENGINE;
te_subsystem->flag_connected = R_TE_CONNECTED;
te_subsystem->flag_required = R_TE_REQUIRED;
} else {
was_error = TRUE;
}
if (pe_subsystem != NULL) {
pe_subsystem->pid = -1;
pe_subsystem->path = CRM_DAEMON_DIR;
pe_subsystem->name = CRM_SYSTEM_PENGINE;
pe_subsystem->command = CRM_DAEMON_DIR "/" CRM_SYSTEM_PENGINE;
pe_subsystem->args = NULL;
pe_subsystem->flag_connected = R_PE_CONNECTED;
pe_subsystem->flag_required = R_PE_REQUIRED;
} else {
was_error = TRUE;
}
if (was_error == FALSE && need_spawn_pengine_from_crmd()) {
if (start_subsystem(pe_subsystem) == FALSE) {
was_error = TRUE;
}
}
if (was_error) {
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
}
static int32_t
crmd_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
crm_trace("Connection %p", c);
if (crm_client_new(c, uid, gid) == NULL) {
return -EIO;
}
return 0;
}
static void
crmd_ipc_created(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p", c);
}
static int32_t
crmd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
crm_client_t *client = crm_client_get(c);
xmlNode *msg = crm_ipcs_recv(client, data, size, &id, &flags);
crm_trace("Invoked: %s", crm_client_name(client));
crm_ipcs_send_ack(client, id, flags, "ack", __FUNCTION__, __LINE__);
if (msg == NULL) {
return 0;
}
#if ENABLE_ACL
CRM_ASSERT(client->user != NULL);
crm_acl_get_set_user(msg, F_CRM_USER, client->user);
#endif
crm_trace("Processing msg from %s", crm_client_name(client));
crm_log_xml_trace(msg, "CRMd[inbound]");
crm_xml_add(msg, F_CRM_SYS_FROM, client->id);
if (crmd_authorize_message(msg, client, NULL)) {
route_message(C_IPC_MESSAGE, msg);
}
trigger_fsa(fsa_source);
free_xml(msg);
return 0;
}
static int32_t
crmd_ipc_closed(qb_ipcs_connection_t * c)
{
crm_client_t *client = crm_client_get(c);
struct crm_subsystem_s *the_subsystem = NULL;
if (client == NULL) {
return 0;
}
crm_trace("Connection %p", c);
if (client->userdata == NULL) {
crm_trace("Client hadn't registered with us yet");
} else if (strcasecmp(CRM_SYSTEM_PENGINE, client->userdata) == 0) {
the_subsystem = pe_subsystem;
} else if (strcasecmp(CRM_SYSTEM_TENGINE, client->userdata) == 0) {
the_subsystem = te_subsystem;
} else if (strcasecmp(CRM_SYSTEM_CIB, client->userdata) == 0) {
the_subsystem = cib_subsystem;
}
if (the_subsystem != NULL) {
the_subsystem->source = NULL;
the_subsystem->client = NULL;
crm_info("Received HUP from %s:[%d]", the_subsystem->name, the_subsystem->pid);
} else {
/* else that was a transient client */
crm_trace("Received HUP from transient client");
}
crm_trace("Disconnecting client %s (%p)", crm_client_name(client), client);
free(client->userdata);
crm_client_destroy(client);
trigger_fsa(fsa_source);
return 0;
}
static void
crmd_ipc_destroy(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p", c);
crmd_ipc_closed(c);
}
/* A_STOP */
void
do_stop(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_trace("Closing IPC server");
mainloop_del_ipc_server(ipcs); ipcs = NULL;
register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
}
/* A_STARTED */
void
do_started(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
static struct qb_ipcs_service_handlers crmd_callbacks = {
.connection_accept = crmd_ipc_accept,
.connection_created = crmd_ipc_created,
.msg_process = crmd_ipc_dispatch,
.connection_closed = crmd_ipc_closed,
.connection_destroyed = crmd_ipc_destroy
};
if (cur_state != S_STARTING) {
crm_err("Start cancelled... %s", fsa_state2string(cur_state));
return;
} else if (is_set(fsa_input_register, R_MEMBERSHIP) == FALSE) {
crm_info("Delaying start, no membership data (%.16llx)", R_MEMBERSHIP);
crmd_fsa_stall(TRUE);
return;
} else if (is_set(fsa_input_register, R_LRM_CONNECTED) == FALSE) {
crm_info("Delaying start, LRM not connected (%.16llx)", R_LRM_CONNECTED);
crmd_fsa_stall(TRUE);
return;
} else if (is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) {
crm_info("Delaying start, CIB not connected (%.16llx)", R_CIB_CONNECTED);
crmd_fsa_stall(TRUE);
return;
} else if (is_set(fsa_input_register, R_READ_CONFIG) == FALSE) {
crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG);
crmd_fsa_stall(TRUE);
return;
} else if (is_set(fsa_input_register, R_PEER_DATA) == FALSE) {
/* try reading from HA */
crm_info("Delaying start, No peer data (%.16llx)", R_PEER_DATA);
#if SUPPORT_HEARTBEAT
if (is_heartbeat_cluster()) {
HA_Message *msg = NULL;
crm_trace("Looking for a HA message");
msg = fsa_cluster_conn->llc_ops->readmsg(fsa_cluster_conn, 0);
if (msg != NULL) {
crm_trace("There was a HA message");
ha_msg_del(msg);
}
}
#endif
crmd_fsa_stall(TRUE);
return;
}
crm_debug("Init server comms");
ipcs = crmd_ipc_server_init(&crmd_callbacks);
if (ipcs == NULL) {
crm_err("Failed to create IPC server: shutting down and inhibiting respawn");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
if (stonith_reconnect == NULL) {
int dummy;
stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW, te_connect_stonith, &dummy);
}
set_bit(fsa_input_register, R_ST_REQUIRED);
mainloop_set_trigger(stonith_reconnect);
crm_notice("The local CRM is operational");
clear_bit(fsa_input_register, R_STARTING);
register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL);
}
/* A_RECOVER */
void
do_recover(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
set_bit(fsa_input_register, R_IN_RECOVERY);
crm_warn("Fast-tracking shutdown in response to errors");
register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
}
/* *INDENT-OFF* */
pe_cluster_option crmd_opts[] = {
/* name, old-name, validate, values, default, short description, long description */
{ "dc-version", NULL, "string", NULL, "none", NULL,
"Version of Pacemaker on the cluster's DC.",
"Includes the hash which identifies the exact changeset it was built from. Used for diagnostic purposes."
},
{ "cluster-infrastructure", NULL, "string", NULL, "heartbeat", NULL,
"The messaging stack on which Pacemaker is currently running.",
"Used for informational and diagnostic purposes." },
{ XML_CONFIG_ATTR_DC_DEADTIME, "dc_deadtime", "time", NULL, "20s", &check_time,
"How long to wait for a response from other nodes during startup.",
"The \"correct\" value will depend on the speed/load of your network and the type of switches used."
},
{ XML_CONFIG_ATTR_RECHECK, "cluster_recheck_interval", "time",
"Zero disables polling. Positive values are an interval in seconds (unless other SI units are specified. eg. 5min)",
"15min", &check_timer,
"Polling interval for time based changes to options, resource parameters and constraints.",
"The Cluster is primarily event driven, however the configuration can have elements that change based on time."
" To ensure these changes take effect, we can optionally poll the cluster's status for changes."
},
#ifdef RHEL7_COMPAT
/* These options were superseded by the alerts feature and now are just an
* alternate interface to it. It was never released upstream, but was
* released in RHEL 7, so we allow it to be enabled at compile-time by
* defining RHEL7_COMPAT.
*/
{ "notification-agent", NULL, "string", NULL, "/dev/null", &check_script,
"Deprecated",
"Use alert path in alerts section instead"
},
{ "notification-recipient", NULL, "string", NULL, "", NULL,
"Deprecated",
"Use recipient value in alerts section instead"
},
#endif
{ "load-threshold", NULL, "percentage", NULL, "80%", &check_utilization,
"The maximum amount of system resources that should be used by nodes in the cluster",
"The cluster will slow down its recovery process when the amount of system resources used"
" (currently CPU) approaches this limit",
},
{ "node-action-limit", NULL, "integer", NULL, "0", &check_number,
"The maximum number of jobs that can be scheduled per node. Defaults to 2x cores"},
{ XML_CONFIG_ATTR_ELECTION_FAIL, "election_timeout", "time", NULL, "2min", &check_timer,
"*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug."
},
{ XML_CONFIG_ATTR_FORCE_QUIT, "shutdown_escalation", "time", NULL, "20min", &check_timer,
"*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug."
},
{ "crmd-integration-timeout", NULL, "time", NULL, "3min", &check_timer,
"*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug."
},
{ "crmd-finalization-timeout", NULL, "time", NULL, "30min", &check_timer,
"*** Advanced Use Only ***.", "If you need to adjust this value, it probably indicates the presence of a bug."
},
{ "crmd-transition-delay", NULL, "time", NULL, "0s", &check_timer,
"*** Advanced Use Only ***\n"
"Enabling this option will slow down cluster recovery under all conditions",
"Delay cluster recovery for the configured interval to allow for additional/related events to occur.\n"
"Useful if your configuration is sensitive to the order in which ping updates arrive."
},
{ "stonith-watchdog-timeout", NULL, "time", NULL, NULL, &check_sbd_timeout,
"How long to wait before we can assume nodes are safely down", NULL
},
{ "stonith-max-attempts",NULL,"integer",NULL,"10",&check_positive_number,
"How many times stonith can fail before it will no longer be attempted on a target"
},
{ "no-quorum-policy", "no_quorum_policy", "enum", "stop, freeze, ignore, suicide", "stop", &check_quorum, NULL, NULL },
#if SUPPORT_PLUGIN
{ XML_ATTR_EXPECTED_VOTES, NULL, "integer", NULL, "2", &check_number, "The number of nodes expected to be in the cluster", "Used to calculate quorum in openais based clusters." },
#endif
};
/* *INDENT-ON* */
void
crmd_metadata(void)
{
config_metadata("CRM Daemon", "1.0",
"CRM Daemon Options",
"This is a fake resource that details the options that can be configured for the CRM Daemon.",
crmd_opts, DIMOF(crmd_opts));
}
static void
verify_crmd_options(GHashTable * options)
{
verify_all_options(options, crmd_opts, DIMOF(crmd_opts));
}
static const char *
crmd_pref(GHashTable * options, const char *name)
{
return get_cluster_pref(options, crmd_opts, DIMOF(crmd_opts), name);
}
static void
config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
const char *value = NULL;
GHashTable *config_hash = NULL;
crm_time_t *now = crm_time_new(NULL);
xmlNode *crmconfig = NULL;
xmlNode *alerts = NULL;
if (rc != pcmk_ok) {
fsa_data_t *msg_data = NULL;
crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc));
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
if (rc == -EACCES || rc == -pcmk_err_schema_validation) {
crm_err("The cluster is mis-configured - shutting down and staying down");
set_bit(fsa_input_register, R_STAYDOWN);
}
goto bail;
}
crmconfig = output;
if ((crmconfig) &&
(crm_element_name(crmconfig)) &&
(strcmp(crm_element_name(crmconfig), XML_CIB_TAG_CRMCONFIG) != 0)) {
crmconfig = first_named_child(crmconfig, XML_CIB_TAG_CRMCONFIG);
}
if (!crmconfig) {
fsa_data_t *msg_data = NULL;
crm_err("Local CIB query for " XML_CIB_TAG_CRMCONFIG " section failed");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
goto bail;
}
crm_debug("Call %d : Parsing CIB options", call_id);
config_hash = crm_str_table_new();
unpack_instance_attributes(crmconfig, crmconfig, XML_CIB_TAG_PROPSET, NULL, config_hash,
CIB_OPTIONS_FIRST, FALSE, now);
verify_crmd_options(config_hash);
#ifdef RHEL7_COMPAT
{
const char *script = crmd_pref(config_hash, "notification-agent");
const char *recip = crmd_pref(config_hash, "notification-recipient");
pe_enable_legacy_alerts(script, recip);
}
#endif
value = crmd_pref(config_hash, XML_CONFIG_ATTR_DC_DEADTIME);
election_trigger->period_ms = crm_get_msec(value);
value = crmd_pref(config_hash, "node-action-limit"); /* Also checks migration-limit */
throttle_update_job_max(value);
value = crmd_pref(config_hash, "load-threshold");
if(value) {
throttle_set_load_target(strtof(value, NULL) / 100.0);
}
value = crmd_pref(config_hash, "no-quorum-policy");
if (safe_str_eq(value, "suicide") && pcmk_locate_sbd()) {
no_quorum_suicide_escalation = TRUE;
}
value = crmd_pref(config_hash,"stonith-max-attempts");
update_stonith_max_attempts(value);
value = crmd_pref(config_hash, XML_CONFIG_ATTR_FORCE_QUIT);
shutdown_escalation_timer->period_ms = crm_get_msec(value);
/* How long to declare an election over - even if not everyone voted */
crm_debug("Shutdown escalation occurs after: %dms", shutdown_escalation_timer->period_ms);
value = crmd_pref(config_hash, XML_CONFIG_ATTR_ELECTION_FAIL);
controld_set_election_period(value);
value = crmd_pref(config_hash, XML_CONFIG_ATTR_RECHECK);
recheck_timer->period_ms = crm_get_msec(value);
crm_debug("Checking for expired actions every %dms", recheck_timer->period_ms);
value = crmd_pref(config_hash, "crmd-transition-delay");
transition_timer->period_ms = crm_get_msec(value);
value = crmd_pref(config_hash, "crmd-integration-timeout");
integration_timer->period_ms = crm_get_msec(value);
value = crmd_pref(config_hash, "crmd-finalization-timeout");
finalization_timer->period_ms = crm_get_msec(value);
#if SUPPORT_COROSYNC
if (is_classic_ais_cluster()) {
value = crmd_pref(config_hash, XML_ATTR_EXPECTED_VOTES);
crm_debug("Sending expected-votes=%s to corosync", value);
send_cluster_text(crm_class_quorum, value, TRUE, NULL, crm_msg_ais);
}
#endif
free(fsa_cluster_name);
fsa_cluster_name = NULL;
value = g_hash_table_lookup(config_hash, "cluster-name");
if (value) {
fsa_cluster_name = strdup(value);
}
alerts = first_named_child(output, XML_CIB_TAG_ALERTS);
crmd_unpack_alerts(alerts);
set_bit(fsa_input_register, R_READ_CONFIG);
crm_trace("Triggering FSA: %s", __FUNCTION__);
mainloop_set_trigger(fsa_source);
g_hash_table_destroy(config_hash);
bail:
crm_time_free(now);
}
gboolean
crm_read_options(gpointer user_data)
{
int call_id =
fsa_cib_conn->cmds->query(fsa_cib_conn,
"//" XML_CIB_TAG_CRMCONFIG " | //" XML_CIB_TAG_ALERTS,
NULL, cib_xpath | cib_scope_local);
fsa_register_cib_callback(call_id, FALSE, NULL, config_query_callback);
crm_trace("Querying the CIB... call %d", call_id);
return TRUE;
}
/* A_READCONFIG */
void
do_read_config(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
throttle_init();
mainloop_set_trigger(config_read);
}
void
crm_shutdown(int nsig)
{
if (crmd_mainloop != NULL && g_main_is_running(crmd_mainloop)) {
if (is_set(fsa_input_register, R_SHUTDOWN)) {
crm_err("Escalating the shutdown");
register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL);
} else {
set_bit(fsa_input_register, R_SHUTDOWN);
register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL);
if (shutdown_escalation_timer->period_ms < 1) {
const char *value = crmd_pref(NULL, XML_CONFIG_ATTR_FORCE_QUIT);
int msec = crm_get_msec(value);
crm_debug("Using default shutdown escalation: %dms", msec);
shutdown_escalation_timer->period_ms = msec;
}
/* can't rely on this... */
crm_notice("Shutting down cluster resource manager " CRM_XS
" limit=%dms", shutdown_escalation_timer->period_ms);
crm_timer_start(shutdown_escalation_timer);
}
} else {
crm_info("exit from shutdown");
crmd_exit(pcmk_ok);
}
}
diff --git a/crmd/main.c b/crmd/main.c
index e8baa125e9..6eb7c032f9 100644
--- a/crmd/main.c
+++ b/crmd/main.c
@@ -1,204 +1,209 @@
/*
- * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This software is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ * Copyright 2004-2019 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <crm/crm.h>
#include <crm/common/ipc.h>
#include <crm/common/xml.h>
#include <crmd.h>
#include <crmd_fsa.h>
#include <crmd_messages.h>
#define OPTARGS "hV"
void usage(const char *cmd, int exit_status);
int crmd_init(void);
void crmd_hamsg_callback(const xmlNode * msg, void *private_data);
extern void init_dotfile(void);
GMainLoop *crmd_mainloop = NULL;
/* *INDENT-OFF* */
static struct crm_option long_options[] = {
/* Top-level Options */
{"help", 0, 0, '?', "\tThis text"},
{"verbose", 0, 0, 'V', "\tIncrease debug output"},
{0, 0, 0, 0}
};
/* *INDENT-ON* */
int
main(int argc, char **argv)
{
int flag;
int index = 0;
int argerr = 0;
+ crm_ipc_t *old_instance = NULL;
crmd_mainloop = g_main_new(FALSE);
crm_log_preinit(NULL, argc, argv);
crm_set_options(NULL, "[options]", long_options,
"Daemon for aggregating resource and node failures as well as co-ordinating the cluster's response");
while (1) {
flag = crm_get_option(argc, argv, &index);
if (flag == -1)
break;
switch (flag) {
case 'V':
crm_bump_log_level(argc, argv);
break;
case 'h': /* Help message */
crm_help(flag, EX_OK);
break;
default:
++argerr;
break;
}
}
if (argc - optind == 1 && safe_str_eq("metadata", argv[optind])) {
crmd_metadata();
return 0;
} else if (argc - optind == 1 && safe_str_eq("version", argv[optind])) {
fprintf(stdout, "CRM Version: %s (%s)\n", PACEMAKER_VERSION, BUILD_VERSION);
return 0;
}
crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
crm_info("CRM Git Version: %s (%s)", PACEMAKER_VERSION, BUILD_VERSION);
if (optind > argc) {
++argerr;
}
if (argerr) {
crm_help('?', EX_USAGE);
}
+ old_instance = crm_ipc_new(CRM_SYSTEM_CRMD, 0);
+ if (crm_ipc_connect(old_instance)) {
+ /* IPC end-point already up */
+ crm_ipc_close(old_instance);
+ crm_ipc_destroy(old_instance);
+ crm_err("crmd is already active, aborting startup");
+ crm_exit(EX_OK);
+ } else {
+ /* not up or not authentic, we'll proceed either way */
+ crm_ipc_destroy(old_instance);
+ old_instance = NULL;
+ }
+
if (pcmk__daemon_can_write(PE_STATE_DIR, NULL) == FALSE) {
crm_err("Terminating due to bad permissions on " PE_STATE_DIR);
fprintf(stderr,
"ERROR: Bad permissions on " PE_STATE_DIR " (see logs for details)\n");
fflush(stderr);
return 100;
} else if (pcmk__daemon_can_write(CRM_CONFIG_DIR, NULL) == FALSE) {
crm_err("Terminating due to bad permissions on " CRM_CONFIG_DIR);
fprintf(stderr,
"ERROR: Bad permissions on " CRM_CONFIG_DIR " (see logs for details)\n");
fflush(stderr);
return 100;
}
return crmd_init();
}
static void
log_deprecation_warnings()
{
enum cluster_type_e cluster_type = get_cluster_type();
if (cluster_type != pcmk_cluster_corosync) {
crm_warn("Support for cluster infrastructure %s "
" is deprecated and will be removed in a future release",
name_for_cluster_type(cluster_type));
}
#if ENABLE_SNMP
crm_warn("Compile-time support for crm_mon SNMP options"
" is deprecated and will be removed in a future release"
" (configure alerts instead)",
name_for_cluster_type(cluster_type));
#endif
#if ENABLE_ESMTP
crm_warn("Compile-time support for crm_mon SMTP options"
" is deprecated and will be removed in a future release"
" (configure alerts instead)",
name_for_cluster_type(cluster_type));
#endif
if (getenv("LRMD_MAX_CHILDREN")) {
crm_warn("The LRMD_MAX_CHILDREN environment variable"
" is deprecated and will be removed in a future release"
" (use PCMK_node_action_limit instead)");
}
}
int
crmd_init(void)
{
int exit_code = 0;
enum crmd_fsa_state state;
log_deprecation_warnings();
fsa_state = S_STARTING;
fsa_input_register = 0; /* zero out the regester */
init_dotfile();
crm_debug("Starting %s", crm_system_name);
register_fsa_input(C_STARTUP, I_STARTUP, NULL);
crm_peer_init();
state = s_crmd_fsa(C_STARTUP);
if (state == S_PENDING || state == S_STARTING) {
/* Create the mainloop and run it... */
crm_trace("Starting %s's mainloop", crm_system_name);
#ifdef REALTIME_SUPPORT
static int crm_realtime = 1;
if (crm_realtime == 1) {
cl_enable_realtime();
} else if (crm_realtime == 0) {
cl_disable_realtime();
}
cl_make_realtime(SCHED_RR, 5, 64, 64);
#endif
g_main_run(crmd_mainloop);
if (is_set(fsa_input_register, R_STAYDOWN)) {
crm_info("Inhibiting automated respawn");
exit_code = 100;
}
} else {
crm_err("Startup of %s failed. Current state: %s",
crm_system_name, fsa_state2string(state));
exit_code = 1;
}
crm_info("%lu stopped: %s (%d)",
(unsigned long) getpid(), pcmk_strerror(exit_code), exit_code);
return crmd_fast_exit(exit_code);
}
diff --git a/fencing/main.c b/fencing/main.c
index 16663f6259..c46c9a531b 100644
--- a/fencing/main.c
+++ b/fencing/main.c
@@ -1,1566 +1,1572 @@
/*
- * Copyright (C) 2009 Andrew Beekhof <andrew@beekhof.net>
+ * Copyright 2009-2019 the Pacemaker project contributors
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
+ * The version control history for this file may have further details.
*
- * This software is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <sys/utsname.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/ipc.h>
#include <crm/common/ipcs.h>
#include <crm/cluster/internal.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <crm/common/xml.h>
#include <crm/common/mainloop.h>
#include <crm/cib/internal.h>
#include <crm/pengine/status.h>
#include <allocate.h>
#include <internal.h>
#include <standalone_config.h>
char *stonith_our_uname = NULL;
char *stonith_our_uuid = NULL;
long stonith_watchdog_timeout_ms = 0;
GMainLoop *mainloop = NULL;
gboolean stand_alone = FALSE;
gboolean no_cib_connect = FALSE;
gboolean stonith_shutdown_flag = FALSE;
qb_ipcs_service_t *ipcs = NULL;
xmlNode *local_cib = NULL;
static pe_working_set_t *fenced_data_set = NULL;
static cib_t *cib_api = NULL;
static void *cib_library = NULL;
static void stonith_shutdown(int nsig);
static void stonith_cleanup(void);
static int32_t
st_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
if (stonith_shutdown_flag) {
crm_info("Ignoring new client [%d] during shutdown", crm_ipcs_client_pid(c));
return -EPERM;
}
if (crm_client_new(c, uid, gid) == NULL) {
return -EIO;
}
return 0;
}
static void
st_ipc_created(qb_ipcs_connection_t * c)
{
crm_trace("Connection created for %p", c);
}
/* Exit code means? */
static int32_t
st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
int call_options = 0;
xmlNode *request = NULL;
crm_client_t *c = crm_client_get(qbc);
const char *op = NULL;
if (c == NULL) {
crm_info("Invalid client: %p", qbc);
return 0;
}
request = crm_ipcs_recv(c, data, size, &id, &flags);
if (request == NULL) {
crm_ipcs_send_ack(c, id, flags, "nack", __FUNCTION__, __LINE__);
return 0;
}
op = crm_element_value(request, F_CRM_TASK);
if(safe_str_eq(op, CRM_OP_RM_NODE_CACHE)) {
crm_xml_add(request, F_TYPE, T_STONITH_NG);
crm_xml_add(request, F_STONITH_OPERATION, op);
crm_xml_add(request, F_STONITH_CLIENTID, c->id);
crm_xml_add(request, F_STONITH_CLIENTNAME, crm_client_name(c));
crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname);
send_cluster_message(NULL, crm_msg_stonith_ng, request, FALSE);
free_xml(request);
return 0;
}
if (c->name == NULL) {
const char *value = crm_element_value(request, F_STONITH_CLIENTNAME);
if (value == NULL) {
value = "unknown";
}
c->name = crm_strdup_printf("%s.%u", value, c->pid);
}
crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
crm_trace("Flags %u/%u for command %u from %s", flags, call_options, id, crm_client_name(c));
if (is_set(call_options, st_opt_sync_call)) {
CRM_ASSERT(flags & crm_ipc_client_response);
CRM_LOG_ASSERT(c->request_id == 0); /* This means the client has two synchronous events in-flight */
c->request_id = id; /* Reply only to the last one */
}
crm_xml_add(request, F_STONITH_CLIENTID, c->id);
crm_xml_add(request, F_STONITH_CLIENTNAME, crm_client_name(c));
crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname);
crm_log_xml_trace(request, "Client[inbound]");
stonith_command(c, id, flags, request, NULL);
free_xml(request);
return 0;
}
/* Error code means? */
static int32_t
st_ipc_closed(qb_ipcs_connection_t * c)
{
crm_client_t *client = crm_client_get(c);
if (client == NULL) {
return 0;
}
crm_trace("Connection %p closed", c);
crm_client_destroy(client);
/* 0 means: yes, go ahead and destroy the connection */
return 0;
}
static void
st_ipc_destroy(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p destroyed", c);
st_ipc_closed(c);
}
static void
stonith_peer_callback(xmlNode * msg, void *private_data)
{
const char *remote_peer = crm_element_value(msg, F_ORIG);
const char *op = crm_element_value(msg, F_STONITH_OPERATION);
if (crm_str_eq(op, "poke", TRUE)) {
return;
}
crm_log_xml_trace(msg, "Peer[inbound]");
stonith_command(NULL, 0, 0, msg, remote_peer);
}
#if SUPPORT_HEARTBEAT
static void
stonith_peer_hb_callback(HA_Message * msg, void *private_data)
{
xmlNode *xml = convert_ha_message(NULL, msg, __FUNCTION__);
stonith_peer_callback(xml, private_data);
free_xml(xml);
}
static void
stonith_peer_hb_destroy(gpointer user_data)
{
if (stonith_shutdown_flag) {
crm_info("Heartbeat disconnection complete... exiting");
} else {
crm_err("Heartbeat connection lost! Exiting.");
}
stonith_shutdown(0);
}
#endif
#if SUPPORT_COROSYNC
static void
stonith_peer_ais_callback(cpg_handle_t handle,
const struct cpg_name *groupName,
uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
{
uint32_t kind = 0;
xmlNode *xml = NULL;
const char *from = NULL;
char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from);
if(data == NULL) {
return;
}
if (kind == crm_class_cluster) {
xml = string2xml(data);
if (xml == NULL) {
crm_err("Invalid XML: '%.120s'", data);
free(data);
return;
}
crm_xml_add(xml, F_ORIG, from);
/* crm_xml_add_int(xml, F_SEQ, wrapper->id); */
stonith_peer_callback(xml, NULL);
}
free_xml(xml);
free(data);
return;
}
static void
stonith_peer_cs_destroy(gpointer user_data)
{
crm_err("Corosync connection terminated");
stonith_shutdown(0);
}
#endif
void
do_local_reply(xmlNode * notify_src, const char *client_id, gboolean sync_reply, gboolean from_peer)
{
/* send callback to originating child */
crm_client_t *client_obj = NULL;
int local_rc = pcmk_ok;
crm_trace("Sending response");
client_obj = crm_client_get_by_id(client_id);
crm_trace("Sending callback to request originator");
if (client_obj == NULL) {
local_rc = -1;
crm_trace("No client to sent the response to. F_STONITH_CLIENTID not set.");
} else {
int rid = 0;
if (sync_reply) {
CRM_LOG_ASSERT(client_obj->request_id);
rid = client_obj->request_id;
client_obj->request_id = 0;
crm_trace("Sending response %d to %s %s",
rid, client_obj->name, from_peer ? "(originator of delegated request)" : "");
} else {
crm_trace("Sending an event to %s %s",
client_obj->name, from_peer ? "(originator of delegated request)" : "");
}
local_rc = crm_ipcs_send(client_obj, rid, notify_src, sync_reply?crm_ipc_flags_none:crm_ipc_server_event);
}
if (local_rc < pcmk_ok && client_obj != NULL) {
crm_warn("%sSync reply to %s failed: %s",
sync_reply ? "" : "A-",
client_obj ? client_obj->name : "<unknown>", pcmk_strerror(local_rc));
}
}
long long
get_stonith_flag(const char *name)
{
if (safe_str_eq(name, T_STONITH_NOTIFY_FENCE)) {
return st_callback_notify_fence;
} else if (safe_str_eq(name, STONITH_OP_DEVICE_ADD)) {
return st_callback_device_add;
} else if (safe_str_eq(name, STONITH_OP_DEVICE_DEL)) {
return st_callback_device_del;
} else if (safe_str_eq(name, T_STONITH_NOTIFY_HISTORY)) {
return st_callback_notify_history;
}
return st_callback_unknown;
}
static void
stonith_notify_client(gpointer key, gpointer value, gpointer user_data)
{
xmlNode *update_msg = user_data;
crm_client_t *client = value;
const char *type = NULL;
CRM_CHECK(client != NULL, return);
CRM_CHECK(update_msg != NULL, return);
type = crm_element_value(update_msg, F_SUBTYPE);
CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return);
if (client->ipcs == NULL) {
crm_trace("Skipping client with NULL channel");
return;
}
if (client->options & get_stonith_flag(type)) {
int rc = crm_ipcs_send(client, 0, update_msg, crm_ipc_server_event | crm_ipc_server_error);
if (rc <= 0) {
crm_warn("%s notification of client %s.%.6s failed: %s (%d)",
type, crm_client_name(client), client->id, pcmk_strerror(rc), rc);
} else {
crm_trace("Sent %s notification to client %s.%.6s", type, crm_client_name(client),
client->id);
}
}
}
void
do_stonith_async_timeout_update(const char *client_id, const char *call_id, int timeout)
{
crm_client_t *client = NULL;
xmlNode *notify_data = NULL;
if (!timeout || !call_id || !client_id) {
return;
}
client = crm_client_get_by_id(client_id);
if (!client) {
return;
}
notify_data = create_xml_node(NULL, T_STONITH_TIMEOUT_VALUE);
crm_xml_add(notify_data, F_TYPE, T_STONITH_TIMEOUT_VALUE);
crm_xml_add(notify_data, F_STONITH_CALLID, call_id);
crm_xml_add_int(notify_data, F_STONITH_TIMEOUT, timeout);
crm_trace("timeout update is %d for client %s and call id %s", timeout, client_id, call_id);
if (client) {
crm_ipcs_send(client, 0, notify_data, crm_ipc_server_event);
}
free_xml(notify_data);
}
void
do_stonith_notify(int options, const char *type, int result, xmlNode * data)
{
/* TODO: Standardize the contents of data */
xmlNode *update_msg = create_xml_node(NULL, "notify");
CRM_CHECK(type != NULL,;);
crm_xml_add(update_msg, F_TYPE, T_STONITH_NOTIFY);
crm_xml_add(update_msg, F_SUBTYPE, type);
crm_xml_add(update_msg, F_STONITH_OPERATION, type);
crm_xml_add_int(update_msg, F_STONITH_RC, result);
if (data != NULL) {
add_message_xml(update_msg, F_STONITH_CALLDATA, data);
}
crm_trace("Notifying clients");
g_hash_table_foreach(client_connections, stonith_notify_client, update_msg);
free_xml(update_msg);
crm_trace("Notify complete");
}
static void
do_stonith_notify_config(int options, const char *op, int rc,
const char *desc, int active)
{
xmlNode *notify_data = create_xml_node(NULL, op);
CRM_CHECK(notify_data != NULL, return);
crm_xml_add(notify_data, F_STONITH_DEVICE, desc);
crm_xml_add_int(notify_data, F_STONITH_ACTIVE, active);
do_stonith_notify(options, op, rc, notify_data);
free_xml(notify_data);
}
void
do_stonith_notify_device(int options, const char *op, int rc, const char *desc)
{
do_stonith_notify_config(options, op, rc, desc, g_hash_table_size(device_list));
}
void
do_stonith_notify_level(int options, const char *op, int rc, const char *desc)
{
do_stonith_notify_config(options, op, rc, desc, g_hash_table_size(topology));
}
static void
topology_remove_helper(const char *node, int level)
{
int rc;
char *desc = NULL;
xmlNode *data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL);
crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__);
crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level);
crm_xml_add(data, XML_ATTR_STONITH_TARGET, node);
rc = stonith_level_remove(data, &desc);
do_stonith_notify_level(0, STONITH_OP_LEVEL_DEL, rc, desc);
free_xml(data);
free(desc);
}
static void
remove_cib_device(xmlXPathObjectPtr xpathObj)
{
int max = numXpathResults(xpathObj), lpc = 0;
for (lpc = 0; lpc < max; lpc++) {
const char *rsc_id = NULL;
const char *standard = NULL;
xmlNode *match = getXpathResult(xpathObj, lpc);
CRM_LOG_ASSERT(match != NULL);
if(match != NULL) {
standard = crm_element_value(match, XML_AGENT_ATTR_CLASS);
}
if (safe_str_neq(standard, PCMK_RESOURCE_CLASS_STONITH)) {
continue;
}
rsc_id = crm_element_value(match, XML_ATTR_ID);
stonith_device_remove(rsc_id, TRUE);
}
}
static void
handle_topology_change(xmlNode *match, bool remove)
{
int rc;
char *desc = NULL;
CRM_CHECK(match != NULL, return);
crm_trace("Updating %s", ID(match));
if(remove) {
int index = 0;
char *key = stonith_level_key(match, -1);
crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index);
topology_remove_helper(key, index);
free(key);
}
rc = stonith_level_register(match, &desc);
do_stonith_notify_level(0, STONITH_OP_LEVEL_ADD, rc, desc);
free(desc);
}
static void
remove_fencing_topology(xmlXPathObjectPtr xpathObj)
{
int max = numXpathResults(xpathObj), lpc = 0;
for (lpc = 0; lpc < max; lpc++) {
xmlNode *match = getXpathResult(xpathObj, lpc);
CRM_LOG_ASSERT(match != NULL);
if (match && crm_element_value(match, XML_DIFF_MARKER)) {
/* Deletion */
int index = 0;
char *target = stonith_level_key(match, -1);
crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index);
if (target == NULL) {
crm_err("Invalid fencing target in element %s", ID(match));
} else if (index <= 0) {
crm_err("Invalid level for %s in element %s", target, ID(match));
} else {
topology_remove_helper(target, index);
}
/* } else { Deal with modifications during the 'addition' stage */
}
}
}
static void
register_fencing_topology(xmlXPathObjectPtr xpathObj)
{
int max = numXpathResults(xpathObj), lpc = 0;
for (lpc = 0; lpc < max; lpc++) {
xmlNode *match = getXpathResult(xpathObj, lpc);
handle_topology_change(match, TRUE);
}
}
/* Fencing
<diff crm_feature_set="3.0.6">
<diff-removed>
<fencing-topology>
<fencing-level id="f-p1.1" target="pcmk-1" index="1" devices="poison-pill" __crm_diff_marker__="removed:top"/>
<fencing-level id="f-p1.2" target="pcmk-1" index="2" devices="power" __crm_diff_marker__="removed:top"/>
<fencing-level devices="disk,network" id="f-p2.1"/>
</fencing-topology>
</diff-removed>
<diff-added>
<fencing-topology>
<fencing-level id="f-p.1" target="pcmk-1" index="1" devices="poison-pill" __crm_diff_marker__="added:top"/>
<fencing-level id="f-p2.1" target="pcmk-2" index="1" devices="disk,something"/>
<fencing-level id="f-p3.1" target="pcmk-2" index="2" devices="power" __crm_diff_marker__="added:top"/>
</fencing-topology>
</diff-added>
</diff>
*/
static void
fencing_topology_init()
{
xmlXPathObjectPtr xpathObj = NULL;
const char *xpath = "//" XML_TAG_FENCING_LEVEL;
crm_trace("Full topology refresh");
if(topology) {
g_hash_table_destroy(topology);
topology = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_topology_entry);
}
/* Grab everything */
xpathObj = xpath_search(local_cib, xpath);
register_fencing_topology(xpathObj);
freeXpathObject(xpathObj);
}
#define rsc_name(x) x->clone_name?x->clone_name:x->id
/*!
* \internal
* \brief Check whether our uname is in a resource's allowed node list
*
* \param[in] rsc Resource to check
*
* \return Pointer to node object if found, NULL otherwise
*/
static node_t *
our_node_allowed_for(resource_t *rsc)
{
GHashTableIter iter;
node_t *node = NULL;
if (rsc && stonith_our_uname) {
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
if (node && strcmp(node->details->uname, stonith_our_uname) == 0) {
break;
}
node = NULL;
}
}
return node;
}
/*!
* \internal
* \brief If a resource or any of its children are STONITH devices, update their
* definitions given a cluster working set.
*
* \param[in] rsc Resource to check
* \param[in] data_set Cluster working set with device information
*/
static void cib_device_update(resource_t *rsc, pe_working_set_t *data_set)
{
node_t *node = NULL;
const char *value = NULL;
const char *rclass = NULL;
node_t *parent = NULL;
gboolean remove = TRUE;
/* If this is a complex resource, check children rather than this resource itself.
* TODO: Mark each installed device and remove if untouched when this process finishes.
*/
if(rsc->children) {
GListPtr gIter = NULL;
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
cib_device_update(gIter->data, data_set);
if(pe_rsc_is_clone(rsc)) {
crm_trace("Only processing one copy of the clone %s", rsc->id);
break;
}
}
return;
}
/* We only care about STONITH resources. */
rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
if (safe_str_neq(rclass, PCMK_RESOURCE_CLASS_STONITH)) {
return;
}
/* If this STONITH resource is disabled, just remove it. */
value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE);
if (safe_str_eq(value, RSC_STOPPED)) {
crm_info("Device %s has been disabled", rsc->id);
goto update_done;
}
/* Check whether our node is allowed for this resource (and its parent if in a group) */
node = our_node_allowed_for(rsc);
if (rsc->parent && (rsc->parent->variant == pe_group)) {
parent = our_node_allowed_for(rsc->parent);
}
if(node == NULL) {
/* Our node is disallowed, so remove the device */
GHashTableIter iter;
crm_info("Device %s has been disabled on %s: unknown", rsc->id, stonith_our_uname);
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
crm_trace("Available: %s = %d", node->details->uname, node->weight);
}
goto update_done;
} else if(node->weight < 0 || (parent && parent->weight < 0)) {
/* Our node (or its group) is disallowed by score, so remove the device */
char *score = score2char((node->weight < 0) ? node->weight : parent->weight);
crm_info("Device %s has been disabled on %s: score=%s", rsc->id, stonith_our_uname, score);
free(score);
goto update_done;
} else {
/* Our node is allowed, so update the device information */
xmlNode *data;
GHashTableIter gIter;
stonith_key_value_t *params = NULL;
const char *name = NULL;
const char *agent = crm_element_value(rsc->xml, XML_EXPR_ATTR_TYPE);
const char *rsc_provides = NULL;
crm_debug("Device %s is allowed on %s: score=%d", rsc->id, stonith_our_uname, node->weight);
get_rsc_attributes(rsc->parameters, rsc, node, data_set);
get_meta_attributes(rsc->meta, rsc, node, data_set);
rsc_provides = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_PROVIDES);
g_hash_table_iter_init(&gIter, rsc->parameters);
while (g_hash_table_iter_next(&gIter, (gpointer *) & name, (gpointer *) & value)) {
if (!name || !value) {
continue;
}
params = stonith_key_value_add(params, name, value);
crm_trace(" %s=%s", name, value);
}
remove = FALSE;
data = create_device_registration_xml(rsc_name(rsc), st_namespace_any,
agent, params, rsc_provides);
stonith_device_register(data, NULL, TRUE);
stonith_key_value_freeall(params, 1, 1);
free_xml(data);
}
update_done:
if(remove && g_hash_table_lookup(device_list, rsc_name(rsc))) {
stonith_device_remove(rsc_name(rsc), TRUE);
}
}
extern xmlNode *do_calculations(pe_working_set_t * data_set, xmlNode * xml_input, crm_time_t * now);
/*!
* \internal
* \brief Update all STONITH device definitions based on current CIB
*/
static void
cib_devices_update(void)
{
GListPtr gIter = NULL;
crm_info("Updating devices to version %s.%s.%s",
crm_element_value(local_cib, XML_ATTR_GENERATION_ADMIN),
crm_element_value(local_cib, XML_ATTR_GENERATION),
crm_element_value(local_cib, XML_ATTR_NUMUPDATES));
CRM_ASSERT(fenced_data_set != NULL);
fenced_data_set->input = local_cib;
fenced_data_set->now = crm_time_new(NULL);
fenced_data_set->flags |= pe_flag_quick_location;
fenced_data_set->localhost = stonith_our_uname;
cluster_status(fenced_data_set);
do_calculations(fenced_data_set, NULL, NULL);
for (gIter = fenced_data_set->resources; gIter != NULL; gIter = gIter->next) {
cib_device_update(gIter->data, fenced_data_set);
}
fenced_data_set->input = NULL; // Wasn't a copy, so don't let API free it
pe_reset_working_set(fenced_data_set);
}
static void
update_cib_stonith_devices_v2(const char *event, xmlNode * msg)
{
xmlNode *change = NULL;
char *reason = NULL;
bool needs_update = FALSE;
xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);
for (change = __xml_first_child(patchset); change != NULL; change = __xml_next(change)) {
const char *op = crm_element_value(change, XML_DIFF_OP);
const char *xpath = crm_element_value(change, XML_DIFF_PATH);
const char *shortpath = NULL;
if(op == NULL || strcmp(op, "move") == 0) {
continue;
} else if(safe_str_eq(op, "delete") && strstr(xpath, XML_CIB_TAG_RESOURCE)) {
const char *rsc_id = NULL;
char *search = NULL;
char *mutable = NULL;
if (strstr(xpath, XML_TAG_ATTR_SETS)) {
needs_update = TRUE;
break;
}
mutable = strdup(xpath);
rsc_id = strstr(mutable, "primitive[@id=\'");
if (rsc_id != NULL) {
rsc_id += strlen("primitive[@id=\'");
search = strchr(rsc_id, '\'');
}
if (search != NULL) {
*search = 0;
stonith_device_remove(rsc_id, TRUE);
} else {
crm_warn("Ignoring malformed CIB update (resource deletion)");
}
free(mutable);
} else if(strstr(xpath, XML_CIB_TAG_RESOURCES)) {
shortpath = strrchr(xpath, '/'); CRM_ASSERT(shortpath);
reason = crm_strdup_printf("%s %s", op, shortpath+1);
needs_update = TRUE;
break;
} else if(strstr(xpath, XML_CIB_TAG_CONSTRAINTS)) {
shortpath = strrchr(xpath, '/'); CRM_ASSERT(shortpath);
reason = crm_strdup_printf("%s %s", op, shortpath+1);
needs_update = TRUE;
break;
}
}
if(needs_update) {
crm_info("Updating device list from the cib: %s", reason);
cib_devices_update();
} else {
crm_trace("No updates for device list found in cib");
}
free(reason);
}
static void
update_cib_stonith_devices_v1(const char *event, xmlNode * msg)
{
const char *reason = "none";
gboolean needs_update = FALSE;
xmlXPathObjectPtr xpath_obj = NULL;
/* process new constraints */
xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_CONS_TAG_RSC_LOCATION);
if (numXpathResults(xpath_obj) > 0) {
int max = numXpathResults(xpath_obj), lpc = 0;
/* Safest and simplest to always recompute */
needs_update = TRUE;
reason = "new location constraint";
for (lpc = 0; lpc < max; lpc++) {
xmlNode *match = getXpathResult(xpath_obj, lpc);
crm_log_xml_trace(match, "new constraint");
}
}
freeXpathObject(xpath_obj);
/* process deletions */
xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_RESOURCE);
if (numXpathResults(xpath_obj) > 0) {
remove_cib_device(xpath_obj);
}
freeXpathObject(xpath_obj);
/* process additions */
xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_RESOURCE);
if (numXpathResults(xpath_obj) > 0) {
int max = numXpathResults(xpath_obj), lpc = 0;
for (lpc = 0; lpc < max; lpc++) {
const char *rsc_id = NULL;
const char *standard = NULL;
xmlNode *match = getXpathResult(xpath_obj, lpc);
rsc_id = crm_element_value(match, XML_ATTR_ID);
standard = crm_element_value(match, XML_AGENT_ATTR_CLASS);
if (safe_str_neq(standard, PCMK_RESOURCE_CLASS_STONITH)) {
continue;
}
crm_trace("Fencing resource %s was added or modified", rsc_id);
reason = "new resource";
needs_update = TRUE;
}
}
freeXpathObject(xpath_obj);
if(needs_update) {
crm_info("Updating device list from the cib: %s", reason);
cib_devices_update();
}
}
static void
update_cib_stonith_devices(const char *event, xmlNode * msg)
{
int format = 1;
xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);
CRM_ASSERT(patchset);
crm_element_value_int(patchset, "format", &format);
switch(format) {
case 1:
update_cib_stonith_devices_v1(event, msg);
break;
case 2:
update_cib_stonith_devices_v2(event, msg);
break;
default:
crm_warn("Unknown patch format: %d", format);
}
}
/* Needs to hold node name + attribute name + attribute value + 75 */
#define XPATH_MAX 512
/*!
* \internal
* \brief Check whether a node has a specific attribute name/value
*
* \param[in] node Name of node to check
* \param[in] name Name of an attribute to look for
* \param[in] value The value the named attribute needs to be set to in order to be considered a match
*
* \return TRUE if the locally cached CIB has the specified node attribute
*/
gboolean
node_has_attr(const char *node, const char *name, const char *value)
{
char xpath[XPATH_MAX];
xmlNode *match;
int n;
CRM_CHECK(local_cib != NULL, return FALSE);
/* Search for the node's attributes in the CIB. While the schema allows
* multiple sets of instance attributes, and allows instance attributes to
* use id-ref to reference values elsewhere, that is intended for resources,
* so we ignore that here.
*/
n = snprintf(xpath, XPATH_MAX, "//" XML_CIB_TAG_NODES
"/" XML_CIB_TAG_NODE "[@uname='%s']/" XML_TAG_ATTR_SETS
"/" XML_CIB_TAG_NVPAIR "[@name='%s' and @value='%s']",
node, name, value);
match = get_xpath_object(xpath, local_cib, LOG_TRACE);
CRM_CHECK(n < XPATH_MAX, return FALSE);
return (match != NULL);
}
static void
update_fencing_topology(const char *event, xmlNode * msg)
{
int format = 1;
const char *xpath;
xmlXPathObjectPtr xpathObj = NULL;
xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);
CRM_ASSERT(patchset);
crm_element_value_int(patchset, "format", &format);
if(format == 1) {
/* Process deletions (only) */
xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_TAG_FENCING_LEVEL;
xpathObj = xpath_search(msg, xpath);
remove_fencing_topology(xpathObj);
freeXpathObject(xpathObj);
/* Process additions and changes */
xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_TAG_FENCING_LEVEL;
xpathObj = xpath_search(msg, xpath);
register_fencing_topology(xpathObj);
freeXpathObject(xpathObj);
} else if(format == 2) {
xmlNode *change = NULL;
int add[] = { 0, 0, 0 };
int del[] = { 0, 0, 0 };
xml_patch_versions(patchset, add, del);
for (change = __xml_first_child(patchset); change != NULL; change = __xml_next(change)) {
const char *op = crm_element_value(change, XML_DIFF_OP);
const char *xpath = crm_element_value(change, XML_DIFF_PATH);
if(op == NULL) {
continue;
} else if(strstr(xpath, "/" XML_TAG_FENCING_LEVEL) != NULL) {
/* Change to a specific entry */
crm_trace("Handling %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath);
if(strcmp(op, "move") == 0) {
continue;
} else if(strcmp(op, "create") == 0) {
handle_topology_change(change->children, FALSE);
} else if(strcmp(op, "modify") == 0) {
xmlNode *match = first_named_child(change, XML_DIFF_RESULT);
if(match) {
handle_topology_change(match->children, TRUE);
}
} else if(strcmp(op, "delete") == 0) {
/* Nuclear option, all we have is the path and an id... not enough to remove a specific entry */
crm_info("Re-initializing fencing topology after %s operation %d.%d.%d for %s",
op, add[0], add[1], add[2], xpath);
fencing_topology_init();
return;
}
} else if (strstr(xpath, "/" XML_TAG_FENCING_TOPOLOGY) != NULL) {
/* Change to the topology in general */
crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s",
op, add[0], add[1], add[2], xpath);
fencing_topology_init();
return;
} else if (strstr(xpath, "/" XML_CIB_TAG_CONFIGURATION)) {
/* Changes to the whole config section, possibly including the topology as a whild */
if(first_named_child(change, XML_TAG_FENCING_TOPOLOGY) == NULL) {
crm_trace("Nothing for us in %s operation %d.%d.%d for %s.",
op, add[0], add[1], add[2], xpath);
} else if(strcmp(op, "delete") == 0 || strcmp(op, "create") == 0) {
crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s.",
op, add[0], add[1], add[2], xpath);
fencing_topology_init();
return;
}
} else {
crm_trace("Nothing for us in %s operation %d.%d.%d for %s",
op, add[0], add[1], add[2], xpath);
}
}
} else {
crm_warn("Unknown patch format: %d", format);
}
}
static bool have_cib_devices = FALSE;
static void
update_cib_cache_cb(const char *event, xmlNode * msg)
{
int rc = pcmk_ok;
xmlNode *stonith_enabled_xml = NULL;
xmlNode *stonith_watchdog_xml = NULL;
const char *stonith_enabled_s = NULL;
static gboolean stonith_enabled_saved = TRUE;
if(!have_cib_devices) {
crm_trace("Skipping updates until we get a full dump");
return;
} else if(msg == NULL) {
crm_trace("Missing %s update", event);
return;
}
/* Maintain a local copy of the CIB so that we have full access
* to device definitions, location constraints, and node attributes
*/
if (local_cib != NULL) {
int rc = pcmk_ok;
xmlNode *patchset = NULL;
crm_element_value_int(msg, F_CIB_RC, &rc);
if (rc != pcmk_ok) {
return;
}
patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);
xml_log_patchset(LOG_TRACE, "Config update", patchset);
rc = xml_apply_patchset(local_cib, patchset, TRUE);
switch (rc) {
case pcmk_ok:
case -pcmk_err_old_data:
break;
case -pcmk_err_diff_resync:
case -pcmk_err_diff_failed:
crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc);
free_xml(local_cib);
local_cib = NULL;
break;
default:
crm_warn("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc);
free_xml(local_cib);
local_cib = NULL;
}
}
if (local_cib == NULL) {
crm_trace("Re-requesting the full cib");
rc = cib_api->cmds->query(cib_api, NULL, &local_cib, cib_scope_local | cib_sync_call);
if(rc != pcmk_ok) {
crm_err("Couldn't retrieve the CIB: %s (%d)", pcmk_strerror(rc), rc);
return;
}
CRM_ASSERT(local_cib != NULL);
stonith_enabled_saved = FALSE; /* Trigger a full refresh below */
}
crm_peer_caches_refresh(local_cib);
stonith_enabled_xml = get_xpath_object("//nvpair[@name='stonith-enabled']", local_cib, LOG_TRACE);
if (stonith_enabled_xml) {
stonith_enabled_s = crm_element_value(stonith_enabled_xml, XML_NVPAIR_ATTR_VALUE);
}
if (stonith_enabled_s == NULL || crm_is_true(stonith_enabled_s)) {
long timeout_ms = 0;
const char *value = NULL;
stonith_watchdog_xml = get_xpath_object("//nvpair[@name='stonith-watchdog-timeout']", local_cib, LOG_TRACE);
if (stonith_watchdog_xml) {
value = crm_element_value(stonith_watchdog_xml, XML_NVPAIR_ATTR_VALUE);
}
if(value) {
timeout_ms = crm_get_msec(value);
}
if(timeout_ms != stonith_watchdog_timeout_ms) {
crm_notice("New watchdog timeout %lds (was %lds)", timeout_ms/1000, stonith_watchdog_timeout_ms/1000);
stonith_watchdog_timeout_ms = timeout_ms;
}
} else {
stonith_watchdog_timeout_ms = 0;
}
if (stonith_enabled_s && crm_is_true(stonith_enabled_s) == FALSE) {
crm_trace("Ignoring cib updates while stonith is disabled");
stonith_enabled_saved = FALSE;
return;
} else if (stonith_enabled_saved == FALSE) {
crm_info("Updating stonith device and topology lists now that stonith is enabled");
stonith_enabled_saved = TRUE;
fencing_topology_init();
cib_devices_update();
} else {
update_fencing_topology(event, msg);
update_cib_stonith_devices(event, msg);
}
}
static void
init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
crm_info("Updating device list from the cib: init");
have_cib_devices = TRUE;
local_cib = copy_xml(output);
crm_peer_caches_refresh(local_cib);
fencing_topology_init();
cib_devices_update();
}
static void
stonith_shutdown(int nsig)
{
stonith_shutdown_flag = TRUE;
crm_info("Terminating with %d clients",
crm_hash_table_size(client_connections));
if (mainloop != NULL && g_main_is_running(mainloop)) {
g_main_quit(mainloop);
} else {
stonith_cleanup();
crm_exit(pcmk_ok);
}
}
static void
cib_connection_destroy(gpointer user_data)
{
if (stonith_shutdown_flag) {
crm_info("Connection to the CIB closed.");
return;
} else {
crm_notice("Connection to the CIB terminated. Shutting down.");
}
if (cib_api) {
cib_api->cmds->signoff(cib_api);
}
stonith_shutdown(0);
}
static void
stonith_cleanup(void)
{
if (cib_api) {
cib_api->cmds->signoff(cib_api);
}
if (ipcs) {
qb_ipcs_destroy(ipcs);
}
crm_peer_destroy();
crm_client_cleanup();
free_stonith_remote_op_list();
free(stonith_our_uname);
free_xml(local_cib);
}
/* *INDENT-OFF* */
static struct crm_option long_options[] = {
{"stand-alone", 0, 0, 's'},
{"stand-alone-w-cpg", 0, 0, 'c'},
{"logfile", 1, 0, 'l'},
{"verbose", 0, 0, 'V'},
{"version", 0, 0, '$'},
{"help", 0, 0, '?'},
{0, 0, 0, 0}
};
/* *INDENT-ON* */
static void
setup_cib(void)
{
int rc, retries = 0;
static cib_t *(*cib_new_fn) (void) = NULL;
if (cib_new_fn == NULL) {
cib_new_fn = find_library_function(&cib_library, CIB_LIBRARY, "cib_new", TRUE);
}
if (cib_new_fn != NULL) {
cib_api = (*cib_new_fn) ();
}
if (cib_api == NULL) {
crm_err("No connection to the CIB");
return;
}
do {
sleep(retries);
rc = cib_api->cmds->signon(cib_api, CRM_SYSTEM_STONITHD, cib_command);
} while (rc == -ENOTCONN && ++retries < 5);
if (rc != pcmk_ok) {
crm_err("Could not connect to the CIB service: %s (%d)", pcmk_strerror(rc), rc);
} else if (pcmk_ok !=
cib_api->cmds->add_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, update_cib_cache_cb)) {
crm_err("Could not set CIB notification callback");
} else {
rc = cib_api->cmds->query(cib_api, NULL, NULL, cib_scope_local);
cib_api->cmds->register_callback(cib_api, rc, 120, FALSE, NULL, "init_cib_cache_cb",
init_cib_cache_cb);
cib_api->cmds->set_connection_dnotify(cib_api, cib_connection_destroy);
crm_info("Watching for stonith topology changes");
}
}
struct qb_ipcs_service_handlers ipc_callbacks = {
.connection_accept = st_ipc_accept,
.connection_created = st_ipc_created,
.msg_process = st_ipc_dispatch,
.connection_closed = st_ipc_closed,
.connection_destroyed = st_ipc_destroy
};
/*!
* \internal
* \brief Callback for peer status changes
*
* \param[in] type What changed
* \param[in] node What peer had the change
* \param[in] data Previous value of what changed
*/
static void
st_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data)
{
if ((type != crm_status_processes) && !is_set(node->flags, crm_remote_node)) {
/*
* This is a hack until we can send to a nodeid and/or we fix node name lookups
* These messages are ignored in stonith_peer_callback()
*/
xmlNode *query = create_xml_node(NULL, "stonith_command");
crm_xml_add(query, F_XML_TAGNAME, "stonith_command");
crm_xml_add(query, F_TYPE, T_STONITH_NG);
crm_xml_add(query, F_STONITH_OPERATION, "poke");
crm_debug("Broadcasting our uname because of node %u", node->id);
send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE);
free_xml(query);
}
}
int
main(int argc, char **argv)
{
int flag;
int rc = 0;
int lpc = 0;
int argerr = 0;
int option_index = 0;
crm_cluster_t cluster;
const char *actions[] = { "reboot", "off", "on", "list", "monitor", "status" };
+ crm_ipc_t *old_instance = NULL;
crm_log_preinit("stonith-ng", argc, argv);
crm_set_options(NULL, "mode [options]", long_options,
"Provides a summary of cluster's current state."
"\n\nOutputs varying levels of detail in a number of different formats.\n");
while (1) {
flag = crm_get_option(argc, argv, &option_index);
if (flag == -1) {
break;
}
switch (flag) {
case 'V':
crm_bump_log_level(argc, argv);
break;
case 'l':
crm_add_logfile(optarg);
break;
case 's':
stand_alone = TRUE;
break;
case 'c':
stand_alone = FALSE;
no_cib_connect = TRUE;
break;
case '$':
case '?':
crm_help(flag, EX_OK);
break;
default:
++argerr;
break;
}
}
if (argc - optind == 1 && safe_str_eq("metadata", argv[optind])) {
printf("<?xml version=\"1.0\"?><!DOCTYPE resource-agent SYSTEM \"ra-api-1.dtd\">\n");
printf("<resource-agent name=\"stonithd\">\n");
printf(" <version>1.0</version>\n");
printf
(" <longdesc lang=\"en\">This is a fake resource that details the instance attributes handled by stonithd.</longdesc>\n");
printf(" <shortdesc lang=\"en\">Options available for all stonith resources</shortdesc>\n");
printf(" <parameters>\n");
#if 0
// priority is not implemented yet
printf(" <parameter name=\"priority\" unique=\"0\">\n");
printf(" <shortdesc lang=\"en\">Devices that are not in a topology "
"are tried in order of highest to lowest integer priority</shortdesc>\n");
printf(" <content type=\"integer\" default=\"0\"/>\n");
printf(" </parameter>\n");
#endif
printf(" <parameter name=\"%s\" unique=\"0\">\n", STONITH_ATTR_HOSTARG);
printf
(" <shortdesc lang=\"en\">Advanced use only: An alternate parameter to supply instead of 'port'</shortdesc>\n");
printf
(" <longdesc lang=\"en\">Some devices do not support the standard 'port' parameter or may provide additional ones.\n"
"Use this to specify an alternate, device-specific, parameter that should indicate the machine to be fenced.\n"
"A value of 'none' can be used to tell the cluster not to supply any additional parameters.\n"
" </longdesc>\n");
printf(" <content type=\"string\" default=\"port\"/>\n");
printf(" </parameter>\n");
printf(" <parameter name=\"%s\" unique=\"0\">\n", STONITH_ATTR_HOSTMAP);
printf
(" <shortdesc lang=\"en\">A mapping of host names to ports numbers for devices that do not support host names.</shortdesc>\n");
printf
(" <longdesc lang=\"en\">Eg. node1:1;node2:2,3 would tell the cluster to use port 1 for node1 and ports 2 and 3 for node2</longdesc>\n");
printf(" <content type=\"string\" default=\"\"/>\n");
printf(" </parameter>\n");
printf(" <parameter name=\"%s\" unique=\"0\">\n", STONITH_ATTR_HOSTLIST);
printf
(" <shortdesc lang=\"en\">A list of machines controlled by this device (Optional unless %s=static-list).</shortdesc>\n",
STONITH_ATTR_HOSTCHECK);
printf(" <content type=\"string\" default=\"\"/>\n");
printf(" </parameter>\n");
printf(" <parameter name=\"%s\" unique=\"0\">\n", STONITH_ATTR_HOSTCHECK);
printf
(" <shortdesc lang=\"en\">How to determine which machines are controlled by the device.</shortdesc>\n");
printf(" <longdesc lang=\"en\">Allowed values: dynamic-list "
"(query the device via the 'list' command), static-list "
"(check the " STONITH_ATTR_HOSTLIST " attribute), status "
"(query the device via the 'status' command), none (assume "
"every device can fence every machine)</longdesc>\n");
printf(" <content type=\"string\" default=\"dynamic-list\"/>\n");
printf(" </parameter>\n");
printf(" <parameter name=\"%s\" unique=\"0\">\n", STONITH_ATTR_DELAY_MAX);
printf
(" <shortdesc lang=\"en\">Enable a random delay for stonith actions and specify the maximum of random delay.</shortdesc>\n");
printf
(" <longdesc lang=\"en\">This prevents double fencing when using slow devices such as sbd.\n"
"Use this to enable a random delay for stonith actions.\n"
"The overall delay is derived from this random delay value adding a static delay so that the sum is kept below the maximum delay.</longdesc>\n");
printf(" <content type=\"time\" default=\"0s\"/>\n");
printf(" </parameter>\n");
printf(" <parameter name=\"%s\" unique=\"0\">\n", STONITH_ATTR_DELAY_BASE);
printf
(" <shortdesc lang=\"en\">Enable a base delay for stonith actions and specify base delay value.</shortdesc>\n");
printf
(" <longdesc lang=\"en\">This prevents double fencing when different delays are configured on the nodes.\n"
"Use this to enable a static delay for stonith actions.\n"
"The overall delay is derived from a random delay value adding this static delay so that the sum is kept below the maximum delay.</longdesc>\n");
printf(" <content type=\"time\" default=\"0s\"/>\n");
printf(" </parameter>\n");
printf(" <parameter name=\"%s\" unique=\"0\">\n", STONITH_ATTR_ACTION_LIMIT);
printf
(" <shortdesc lang=\"en\">The maximum number of actions can be performed in parallel on this device</shortdesc>\n");
printf
(" <longdesc lang=\"en\">Pengine property concurrent-fencing=true needs to be configured first.\n"
"Then use this to specify the maximum number of actions can be performed in parallel on this device. -1 is unlimited.</longdesc>\n");
printf(" <content type=\"integer\" default=\"1\"/>\n");
printf(" </parameter>\n");
for (lpc = 0; lpc < DIMOF(actions); lpc++) {
printf(" <parameter name=\"pcmk_%s_action\" unique=\"0\">\n", actions[lpc]);
printf
(" <shortdesc lang=\"en\">Advanced use only: An alternate command to run instead of '%s'</shortdesc>\n",
actions[lpc]);
printf
(" <longdesc lang=\"en\">Some devices do not support the standard commands or may provide additional ones.\n"
"Use this to specify an alternate, device-specific, command that implements the '%s' action.</longdesc>\n",
actions[lpc]);
printf(" <content type=\"string\" default=\"%s\"/>\n", actions[lpc]);
printf(" </parameter>\n");
printf(" <parameter name=\"pcmk_%s_timeout\" unique=\"0\">\n", actions[lpc]);
printf
(" <shortdesc lang=\"en\">Advanced use only: Specify an alternate timeout to use for %s actions instead of stonith-timeout</shortdesc>\n",
actions[lpc]);
printf
(" <longdesc lang=\"en\">Some devices need much more/less time to complete than normal.\n"
"Use this to specify an alternate, device-specific, timeout for '%s' actions.</longdesc>\n",
actions[lpc]);
printf(" <content type=\"time\" default=\"60s\"/>\n");
printf(" </parameter>\n");
printf(" <parameter name=\"pcmk_%s_retries\" unique=\"0\">\n", actions[lpc]);
printf
(" <shortdesc lang=\"en\">Advanced use only: The maximum number of times to retry the '%s' command within the timeout period</shortdesc>\n",
actions[lpc]);
printf(" <longdesc lang=\"en\">Some devices do not support multiple connections."
" Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining."
" Use this option to alter the number of times Pacemaker retries '%s' actions before giving up."
"</longdesc>\n", actions[lpc]);
printf(" <content type=\"integer\" default=\"2\"/>\n");
printf(" </parameter>\n");
}
printf(" </parameters>\n");
printf("</resource-agent>\n");
return 0;
}
if (optind != argc) {
++argerr;
}
if (argerr) {
crm_help('?', EX_USAGE);
}
crm_log_init("stonith-ng", LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
+
+ old_instance = crm_ipc_new("stonith-ng", 0);
+ if (crm_ipc_connect(old_instance)) {
+ /* IPC end-point already up */
+ crm_ipc_close(old_instance);
+ crm_ipc_destroy(old_instance);
+ crm_err("stonithd is already active, aborting startup");
+ crm_exit(EX_OK);
+ } else {
+ /* not up or not authentic, we'll proceed either way */
+ crm_ipc_destroy(old_instance);
+ old_instance = NULL;
+ }
+
mainloop_add_signal(SIGTERM, stonith_shutdown);
crm_peer_init();
fenced_data_set = pe_new_working_set();
CRM_ASSERT(fenced_data_set != NULL);
if (stand_alone == FALSE) {
#if SUPPORT_HEARTBEAT
cluster.hb_conn = NULL;
cluster.hb_dispatch = stonith_peer_hb_callback;
cluster.destroy = stonith_peer_hb_destroy;
#endif
if (is_openais_cluster()) {
#if SUPPORT_COROSYNC
cluster.destroy = stonith_peer_cs_destroy;
cluster.cpg.cpg_deliver_fn = stonith_peer_ais_callback;
cluster.cpg.cpg_confchg_fn = pcmk_cpg_membership;
#endif
}
crm_set_status_callback(&st_peer_update_callback);
if (crm_cluster_connect(&cluster) == FALSE) {
crm_crit("Cannot sign in to the cluster... terminating");
crm_exit(DAEMON_RESPAWN_STOP);
}
stonith_our_uname = cluster.uname;
stonith_our_uuid = cluster.uuid;
#if SUPPORT_HEARTBEAT
if (is_heartbeat_cluster()) {
/* crm_cluster_connect() registered us for crm_system_name, which
* usually is the only F_TYPE used by the respective sub system.
* Stonith needs to register two additional F_TYPE callbacks,
* because it can :-/ */
if (HA_OK !=
cluster.hb_conn->llc_ops->set_msg_callback(cluster.hb_conn, T_STONITH_NOTIFY,
cluster.hb_dispatch, cluster.hb_conn)) {
crm_crit("Cannot set msg callback %s: %s", T_STONITH_NOTIFY, cluster.hb_conn->llc_ops->errmsg(cluster.hb_conn));
crm_exit(DAEMON_RESPAWN_STOP);
}
if (HA_OK !=
cluster.hb_conn->llc_ops->set_msg_callback(cluster.hb_conn, T_STONITH_TIMEOUT_VALUE,
cluster.hb_dispatch, cluster.hb_conn)) {
crm_crit("Cannot set msg callback %s: %s", T_STONITH_TIMEOUT_VALUE, cluster.hb_conn->llc_ops->errmsg(cluster.hb_conn));
crm_exit(DAEMON_RESPAWN_STOP);
}
}
#endif
if (no_cib_connect == FALSE) {
setup_cib();
}
} else {
stonith_our_uname = strdup("localhost");
}
device_list = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_device);
topology = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_topology_entry);
if(stonith_watchdog_timeout_ms > 0) {
xmlNode *xml;
stonith_key_value_t *params = NULL;
params = stonith_key_value_add(params, STONITH_ATTR_HOSTLIST, stonith_our_uname);
xml = create_device_registration_xml("watchdog", st_namespace_internal,
STONITH_WATCHDOG_AGENT, params,
NULL);
stonith_device_register(xml, NULL, FALSE);
stonith_key_value_freeall(params, 1, 1);
free_xml(xml);
}
stonith_ipc_server_init(&ipcs, &ipc_callbacks);
#if SUPPORT_STONITH_CONFIG
if (((stand_alone == TRUE)) && !(standalone_cfg_read_file(STONITH_NG_CONF_FILE))) {
standalone_cfg_commit();
}
#endif
/* Create the mainloop and run it... */
mainloop = g_main_new(FALSE);
crm_info("Starting %s mainloop", crm_system_name);
g_main_run(mainloop);
stonith_cleanup();
#if SUPPORT_HEARTBEAT
if (cluster.hb_conn) {
cluster.hb_conn->llc_ops->delete(cluster.hb_conn);
}
#endif
pe_free_working_set(fenced_data_set);
return crm_exit(rc);
}
diff --git a/include/crm/common/Makefile.am b/include/crm/common/Makefile.am
index b90ac792db..aacb6ff44f 100644
--- a/include/crm/common/Makefile.am
+++ b/include/crm/common/Makefile.am
@@ -1,17 +1,20 @@
#
-# Copyright 2004-2019 Andrew Beekhof <andrew@beekhof.net>
+# Copyright 2004-2019 the Pacemaker project contributors
+#
+# The version control history for this file may have further details.
#
# This source code is licensed under the GNU General Public License version 2
# or later (GPLv2+) WITHOUT ANY WARRANTY.
#
MAINTAINERCLEANFILES = Makefile.in
headerdir=$(pkgincludedir)/crm/common
header_HEADERS = xml.h ipc.h util.h iso8601.h mainloop.h logging.h \
nvpair.h
-noinst_HEADERS = ipcs.h internal.h remote_internal.h xml_internal.h
+noinst_HEADERS = ipcs.h internal.h remote_internal.h xml_internal.h \
+ ipc_internal.h
if BUILD_CIBSECRETS
noinst_HEADERS += cib_secrets.h
endif
diff --git a/include/crm/common/ipc.h b/include/crm/common/ipc.h
index 872225259d..df56bbebb2 100644
--- a/include/crm/common/ipc.h
+++ b/include/crm/common/ipc.h
@@ -1,84 +1,113 @@
/*
- * Copyright (C) 2013 Andrew Beekhof <andrew@beekhof.net>
+ * Copyright 2013-2019 the Pacemaker project contributors
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
+ * The version control history for this file may have further details.
*
- * This software is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef CRM_COMMON_IPC__H
# define CRM_COMMON_IPC__H
/**
* \file
* \brief Wrappers for and extensions to libqb IPC
* \ingroup core
*/
# include <crm/common/xml.h>
/* clplumbing based IPC */
# define create_reply(request, xml_response_data) create_reply_adv(request, xml_response_data, __FUNCTION__);
xmlNode *create_reply_adv(xmlNode * request, xmlNode * xml_response_data, const char *origin);
# define create_request(task, xml_data, host_to, sys_to, sys_from, uuid_from) create_request_adv(task, xml_data, host_to, sys_to, sys_from, uuid_from, __FUNCTION__)
xmlNode *create_request_adv(const char *task, xmlNode * xml_data, const char *host_to,
const char *sys_to, const char *sys_from, const char *uuid_from,
const char *origin);
/* *INDENT-OFF* */
enum crm_ipc_flags
{
crm_ipc_flags_none = 0x00000000,
crm_ipc_compressed = 0x00000001, /* Message has been compressed */
crm_ipc_proxied = 0x00000100, /* _ALL_ replies to proxied connections need to be sent as events */
crm_ipc_client_response = 0x00000200, /* A Response is expected in reply */
/* These options are just options for crm_ipcs_sendv() */
crm_ipc_server_event = 0x00010000, /* Send an Event instead of a Response */
crm_ipc_server_free = 0x00020000, /* Free the iovec after sending */
crm_ipc_proxied_relay_response = 0x00040000, /* all replies to proxied connections are sent as events, this flag preserves whether the event should be treated as an actual event, or a response.*/
crm_ipc_server_info = 0x00100000, /* Log failures as LOG_INFO */
crm_ipc_server_error = 0x00200000, /* Log failures as LOG_ERR */
};
/* *INDENT-ON* */
# include <qb/qbipcc.h>
typedef struct crm_ipc_s crm_ipc_t;
crm_ipc_t *crm_ipc_new(const char *name, size_t max_size);
bool crm_ipc_connect(crm_ipc_t * client);
void crm_ipc_close(crm_ipc_t * client);
void crm_ipc_destroy(crm_ipc_t * client);
int crm_ipc_send(crm_ipc_t * client, xmlNode * message, enum crm_ipc_flags flags,
int32_t ms_timeout, xmlNode ** reply);
int crm_ipc_get_fd(crm_ipc_t * client);
bool crm_ipc_connected(crm_ipc_t * client);
int crm_ipc_ready(crm_ipc_t * client);
long crm_ipc_read(crm_ipc_t * client);
const char *crm_ipc_buffer(crm_ipc_t * client);
uint32_t crm_ipc_buffer_flags(crm_ipc_t * client);
const char *crm_ipc_name(crm_ipc_t * client);
unsigned int crm_ipc_default_buffer_size(void);
+/*!
+ * \brief Check the authenticity of the IPC socket peer process
+ *
+ * If everything goes well, peer's authenticity is verified by the means
+ * of comparing against provided referential UID and GID (either satisfies),
+ * and the result of this check can be deduced from the return value.
+ * As an exception, detected UID of 0 ("root") satisfies arbitrary
+ * provided referential daemon's credentials.
+ *
+ * \param[in] sock IPC related, connected Unix socket to check peer of
+ * \param[in] refuid referential UID to check against
+ * \param[in] refgid referential GID to check against
+ * \param[out] gotpid to optionally store obtained PID of the peer
+ * (not available on FreeBSD, special value of 1
+ * used instead, and the caller is required to
+ * special case this value respectively)
+ * \param[out] gotuid to optionally store obtained UID of the peer
+ * \param[out] gotgid to optionally store obtained GID of the peer
+ *
+ * \return 0 if IPC related socket's peer is not authentic given the
+ * referential credentials (see above), 1 if it is,
+ * negative value on error (generally expressing -errno unless
+ * it was zero even on nonhappy path, -pcmk_err_generic is
+ * returned then; no message is directly emitted)
+ *
+ * \note While this function is tolerant on what constitutes authorized
+ * IPC daemon process (its effective user matches UID=0 or \p refuid,
+ * or at least its group matches \p refroup), either or both (in case
+ * of UID=0) mismatches on the expected credentials of such peer
+ * process \e shall be investigated at the caller when value of 1
+ * gets returned there, since higher-than-expected privileges in
+ * respect to the expected/intended credentials possibly violate
+ * the least privilege principle and may pose an additional risk
+ * (i.e. such accidental inconsistency shall be eventually fixed).
+ */
+int crm_ipc_is_authentic_process(int sock, uid_t refuid, gid_t refgid,
+ pid_t *gotpid, uid_t *gotuid, gid_t *gotgid);
+
/* Utils */
xmlNode *create_hello_message(const char *uuid, const char *client_name,
const char *major_version, const char *minor_version);
#endif
diff --git a/include/crm/common/ipc_internal.h b/include/crm/common/ipc_internal.h
new file mode 100644
index 0000000000..41a665373e
--- /dev/null
+++ b/include/crm/common/ipc_internal.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright 2019 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#ifndef PCMK__IPC_INTERNAL_H
+#define PCMK__IPC_INTERNAL_H
+
+#include <sys/types.h>
+
+#include <crm_config.h> /* US_AUTH_GETPEEREID */
+
+
+/* denotes "non yieldable PID" on FreeBSD, or actual PID1 in scenarios that
+ require a delicate handling anyway (socket-based activation with systemd);
+ we can be reasonably sure that this PID is never possessed by the actual
+ child daemon, as it gets taken either by the proper init, or by pacemakerd
+ itself (i.e. this precludes anything else); note that value of zero
+ is meant to carry "unset" meaning, and better not to bet on/conditionalize
+ over signedness of pid_t */
+#define PCMK__SPECIAL_PID 1
+
+#if defined(US_AUTH_GETPEEREID)
+/* on FreeBSD, we don't want to expose "non-yieldable PID" (leading to
+ "IPC liveness check only") as its nominal representation, which could
+ cause confusion -- this is unambiguous as long as there's no
+ socket-based activation like with systemd (very improbable) */
+#define PCMK__SPECIAL_PID_AS_0(p) (((p) == PCMK__SPECIAL_PID) ? 0 : (p))
+#else
+#define PCMK__SPECIAL_PID_AS_0(p) (p)
+#endif
+
+/*!
+ * \internal
+ * \brief Check the authenticity and liveness of the process via IPC end-point
+ *
+ * When IPC daemon under given IPC end-point (name) detected, its authenticity
+ * is verified by the means of comparing against provided referential UID and
+ * GID, and the result of this check can be deduced from the return value.
+ * As an exception, referential UID of 0 (~ root) satisfies arbitrary
+ * detected daemon's credentials.
+ *
+ * \param[in] name IPC name to base the search on
+ * \param[in] refuid referential UID to check against
+ * \param[in] refgid referential GID to check against
+ * \param[out] gotpid to optionally store obtained PID of the found process
+ * upon returning 1 or -2
+ * (not available on FreeBSD, special value of 1,
+ * see PCMK__SPECIAL_PID, used instead, and the caller
+ * is required to special case this value respectively)
+ *
+ * \return 0 if no trace of IPC peer's liveness detected, 1 if it was,
+ * -1 on error, and -2 when the IPC blocked with unauthorized
+ * process (log message emitted in both latter cases)
+ *
+ * \note This function emits a log message also in case there isn't a perfect
+ * match in respect to \p reguid and/or \p refgid, for a possible
+ * least privilege principle violation.
+ *
+ * \see crm_ipc_is_authentic_process
+ */
+int pcmk__ipc_is_authentic_process_active(const char *name, uid_t refuid,
+ gid_t refgid, pid_t *gotpid);
+
+#endif
diff --git a/include/crm_internal.h b/include/crm_internal.h
index 5692929d04..0adeb7b39e 100644
--- a/include/crm_internal.h
+++ b/include/crm_internal.h
@@ -1,341 +1,362 @@
/* crm_internal.h */
/*
* Copyright (C) 2006 - 2008
* Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef CRM_INTERNAL__H
# define CRM_INTERNAL__H
# include <config.h>
# include <portability.h>
# include <glib.h>
# include <stdbool.h>
# include <libxml/tree.h>
# include <crm/lrmd.h>
# include <crm/common/logging.h>
# include <crm/common/ipcs.h>
# include <crm/common/internal.h>
/* Dynamic loading of libraries */
void *find_library_function(void **handle, const char *lib, const char *fn, int fatal);
void *convert_const_pointer(const void *ptr);
/* For ACLs */
char *uid2username(uid_t uid);
void determine_request_user(const char *user, xmlNode * request, const char *field);
const char *crm_acl_get_set_user(xmlNode * request, const char *field, const char *peer_user);
# if ENABLE_ACL
# include <string.h>
static inline gboolean
is_privileged(const char *user)
{
if (user == NULL) {
return FALSE;
} else if (strcmp(user, CRM_DAEMON_USER) == 0) {
return TRUE;
} else if (strcmp(user, "root") == 0) {
return TRUE;
}
return FALSE;
}
# endif
/* CLI option processing*/
# ifdef HAVE_GETOPT_H
# include <getopt.h>
# else
# define no_argument 0
# define required_argument 1
# endif
# define pcmk_option_default 0x00000
# define pcmk_option_hidden 0x00001
# define pcmk_option_paragraph 0x00002
# define pcmk_option_example 0x00004
struct crm_option {
/* Fields from 'struct option' in getopt.h */
/* name of long option */
const char *name;
/*
* one of no_argument, required_argument, and optional_argument:
* whether option takes an argument
*/
int has_arg;
/* if not NULL, set *flag to val when option found */
int *flag;
/* if flag not NULL, value to set *flag to; else return value */
int val;
/* Custom fields */
const char *desc;
long flags;
};
void crm_set_options(const char *short_options, const char *usage, struct crm_option *long_options,
const char *app_desc);
int crm_get_option(int argc, char **argv, int *index);
int crm_get_option_long(int argc, char **argv, int *index, const char **longname);
int crm_help(char cmd, int exit_code);
/* Cluster Option Processing */
typedef struct pe_cluster_option_s {
const char *name;
const char *alt_name;
const char *type;
const char *values;
const char *default_value;
gboolean(*is_valid) (const char *);
const char *description_short;
const char *description_long;
} pe_cluster_option;
const char *cluster_option(GHashTable * options, gboolean(*validate) (const char *),
const char *name, const char *old_name, const char *def_value);
const char *get_cluster_pref(GHashTable * options, pe_cluster_option * option_list, int len,
const char *name);
void config_metadata(const char *name, const char *version, const char *desc_short,
const char *desc_long, pe_cluster_option * option_list, int len);
void verify_all_options(GHashTable * options, pe_cluster_option * option_list, int len);
gboolean check_time(const char *value);
gboolean check_timer(const char *value);
gboolean check_boolean(const char *value);
gboolean check_number(const char *value);
gboolean check_positive_number(const char *value);
gboolean check_quorum(const char *value);
gboolean check_script(const char *value);
gboolean check_utilization(const char *value);
long crm_get_sbd_timeout(void);
gboolean check_sbd_timeout(const char *value);
/* char2score */
extern int node_score_red;
extern int node_score_green;
extern int node_score_yellow;
extern int node_score_infinity;
/* Assorted convenience functions */
+
+/*!
+ * \internal
+ * \brief Detect if process per PID and optionally exe path (component) exists
+ *
+ * \param[in] pid PID of process assumed alive, disproving of which to try
+ * \param[in] daemon exe path (component) to possibly match with procfs entry
+ *
+ * \return -1 on invalid PID specification, -2 when the calling process has no
+ * (is refused an) ability to (dis)prove the predicate,
+ * 0 if the negation of the predicate is confirmed (check-through-kill
+ * indicates so, or the subsequent check-through-procfs-match on
+ * \p daemon when provided and procfs available at the standard path),
+ * 1 if it cannot be disproved (reliably [modulo race conditions]
+ * when \p daemon provided, procfs available at the standard path
+ * and the calling process has permissions to access the respective
+ * procfs location, less so otherwise, since mere check-through-kill
+ * is exercised without powers to exclude PID recycled in the interim).
+ *
+ * \note This function cannot be used to verify \e authenticity of the process.
+ */
int crm_pid_active(long pid, const char *daemon);
void crm_make_daemon(const char *name, gboolean daemonize, const char *pidfile);
/* from operations.c */
char *generate_op_key(const char *rsc_id, const char *op_type, int interval);
char *generate_notify_key(const char *rsc_id, const char *notify_type, const char *op_type);
char *generate_transition_magic_v202(const char *transition_key, int op_status);
char *generate_transition_magic(const char *transition_key, int op_status, int op_rc);
char *generate_transition_key(int action, int transition_id, int target_rc, const char *node);
void filter_action_parameters(xmlNode *param_set, const char *version);
xmlNode *create_operation_update(xmlNode *parent, lrmd_event_data_t *event,
const char *caller_version, int target_rc,
const char *node, const char *origin,
int level);
static inline long long
crm_clear_bit(const char *function, int line, const char *target, long long word, long long bit)
{
long long rc = (word & ~bit);
if (rc == word) {
/* Unchanged */
} else if (target) {
crm_trace("Bit 0x%.8llx for %s cleared by %s:%d", bit, target, function, line);
} else {
crm_trace("Bit 0x%.8llx cleared by %s:%d", bit, function, line);
}
return rc;
}
static inline long long
crm_set_bit(const char *function, int line, const char *target, long long word, long long bit)
{
long long rc = (word | bit);
if (rc == word) {
/* Unchanged */
} else if (target) {
crm_trace("Bit 0x%.8llx for %s set by %s:%d", bit, target, function, line);
} else {
crm_trace("Bit 0x%.8llx set by %s:%d", bit, function, line);
}
return rc;
}
# define set_bit(word, bit) word = crm_set_bit(__FUNCTION__, __LINE__, NULL, word, bit)
# define clear_bit(word, bit) word = crm_clear_bit(__FUNCTION__, __LINE__, NULL, word, bit)
char *generate_hash_key(const char *crm_msg_reference, const char *sys);
const char *daemon_option(const char *option);
void set_daemon_option(const char *option, const char *value);
gboolean daemon_option_enabled(const char *daemon, const char *option);
void strip_text_nodes(xmlNode * xml);
void pcmk_panic(const char *origin);
void sysrq_init(void);
pid_t pcmk_locate_sbd(void);
long crm_pidfile_inuse(const char *filename, long mypid, const char *daemon);
long crm_read_pidfile(const char *filename);
# define crm_config_err(fmt...) { crm_config_error = TRUE; crm_err(fmt); }
# define crm_config_warn(fmt...) { crm_config_warning = TRUE; crm_warn(fmt); }
# define attrd_channel T_ATTRD
# define F_ATTRD_KEY "attr_key"
# define F_ATTRD_ATTRIBUTE "attr_name"
# define F_ATTRD_REGEX "attr_regex"
# define F_ATTRD_TASK "task"
# define F_ATTRD_VALUE "attr_value"
# define F_ATTRD_SET "attr_set"
# define F_ATTRD_IS_REMOTE "attr_is_remote"
# define F_ATTRD_IS_PRIVATE "attr_is_private"
# define F_ATTRD_SECTION "attr_section"
# define F_ATTRD_DAMPEN "attr_dampening"
# define F_ATTRD_IGNORE_LOCALLY "attr_ignore_locally"
# define F_ATTRD_HOST "attr_host"
# define F_ATTRD_HOST_ID "attr_host_id"
# define F_ATTRD_USER "attr_user"
# define F_ATTRD_WRITER "attr_writer"
# define F_ATTRD_VERSION "attr_version"
# define F_ATTRD_RESOURCE "attr_resource"
# define F_ATTRD_OPERATION "attr_clear_operation"
# define F_ATTRD_INTERVAL "attr_clear_interval"
# define F_ATTRD_IS_FORCE_WRITE "attrd_is_force_write"
/* attrd operations */
# define ATTRD_OP_PEER_REMOVE "peer-remove"
# define ATTRD_OP_UPDATE "update"
# define ATTRD_OP_UPDATE_BOTH "update-both"
# define ATTRD_OP_UPDATE_DELAY "update-delay"
# define ATTRD_OP_QUERY "query"
# define ATTRD_OP_REFRESH "refresh"
# define ATTRD_OP_FLUSH "flush"
# define ATTRD_OP_SYNC "sync"
# define ATTRD_OP_SYNC_RESPONSE "sync-response"
# define ATTRD_OP_CLEAR_FAILURE "clear-failure"
# define PCMK_ENV_PHYSICAL_HOST "physical_host"
# if SUPPORT_COROSYNC
# if CS_USES_LIBQB
# include <qb/qbipc_common.h>
# include <corosync/corotypes.h>
typedef struct qb_ipc_request_header cs_ipc_header_request_t;
typedef struct qb_ipc_response_header cs_ipc_header_response_t;
# else
# include <corosync/corodefs.h>
# include <corosync/coroipcc.h>
# include <corosync/coroipc_types.h>
typedef coroipc_request_header_t cs_ipc_header_request_t;
typedef coroipc_response_header_t cs_ipc_header_response_t;
# endif
# else
typedef struct {
int size __attribute__ ((aligned(8)));
int id __attribute__ ((aligned(8)));
} __attribute__ ((aligned(8))) cs_ipc_header_request_t;
typedef struct {
int size __attribute__ ((aligned(8)));
int id __attribute__ ((aligned(8)));
int error __attribute__ ((aligned(8)));
} __attribute__ ((aligned(8))) cs_ipc_header_response_t;
# endif
void
attrd_ipc_server_init(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb);
void
stonith_ipc_server_init(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb);
qb_ipcs_service_t *
crmd_ipc_server_init(struct qb_ipcs_service_handlers *cb);
void cib_ipc_servers_init(qb_ipcs_service_t **ipcs_ro,
qb_ipcs_service_t **ipcs_rw,
qb_ipcs_service_t **ipcs_shm,
struct qb_ipcs_service_handlers *ro_cb,
struct qb_ipcs_service_handlers *rw_cb);
void cib_ipc_servers_destroy(qb_ipcs_service_t *ipcs_ro,
qb_ipcs_service_t *ipcs_rw,
qb_ipcs_service_t *ipcs_shm);
static inline void *realloc_safe(void *ptr, size_t size)
{
void *ret = realloc(ptr, size);
if (ret == NULL) {
free(ptr); /* make coverity happy */
abort();
}
return ret;
}
const char *crm_xml_add_last_written(xmlNode *xml_node);
void crm_xml_dump(xmlNode * data, int options, char **buffer, int *offset, int *max, int depth);
void crm_buffer_add_char(char **buffer, int *offset, int *max, char c);
gboolean crm_digest_verify(xmlNode *input, const char *expected);
/* cross-platform compatibility functions */
char *crm_compat_realpath(const char *path);
/* IPC Proxy Backend Shared Functions */
typedef struct remote_proxy_s {
char *node_name;
char *session_id;
gboolean is_local;
crm_ipc_t *ipc;
mainloop_io_t *source;
uint32_t last_request_id;
lrmd_t *lrm;
} remote_proxy_t;
remote_proxy_t *remote_proxy_new(
lrmd_t *lrmd, struct ipc_client_callbacks *proxy_callbacks,
const char *node_name, const char *session_id, const char *channel);
int remote_proxy_check(lrmd_t *lrmd, GHashTable *hash);
void remote_proxy_cb(lrmd_t *lrmd, const char *node_name, xmlNode *msg);
void remote_proxy_ack_shutdown(lrmd_t *lrmd);
void remote_proxy_nack_shutdown(lrmd_t *lrmd);
int remote_proxy_dispatch(const char *buffer, ssize_t length, gpointer userdata);
void remote_proxy_disconnected(gpointer data);
void remote_proxy_free(gpointer data);
void remote_proxy_relay_event(remote_proxy_t *proxy, xmlNode *msg);
void remote_proxy_relay_response(remote_proxy_t *proxy, xmlNode *msg, int msg_id);
#endif /* CRM_INTERNAL__H */
diff --git a/lib/cluster/corosync.c b/lib/cluster/corosync.c
index 9719541b5d..0acf9b2b1d 100644
--- a/lib/cluster/corosync.c
+++ b/lib/cluster/corosync.c
@@ -1,643 +1,765 @@
/*
- * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
+ * Copyright 2004-2019 the Pacemaker project contributors
*
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
+ * The version control history for this file may have further details.
*
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <bzlib.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <crm/common/ipc.h>
#include <crm/cluster/internal.h>
#include <crm/common/mainloop.h>
#include <sys/utsname.h>
#include <qb/qbipcc.h>
#include <qb/qbutil.h>
#include <corosync/corodefs.h>
#include <corosync/corotypes.h>
#include <corosync/hdb.h>
#include <corosync/cfg.h>
#include <corosync/cmap.h>
#include <corosync/quorum.h>
#include <crm/msg_xml.h>
+#include <crm/common/ipc_internal.h> /* PCMK__SPECIAL_PID* */
+
quorum_handle_t pcmk_quorum_handle = 0;
gboolean(*quorum_app_callback) (unsigned long long seq, gboolean quorate) = NULL;
/*
* CFG functionality stolen from node_name() in corosync-quorumtool.c
* This resolves the first address assigned to a node and returns the name or IP address.
*/
char *
corosync_node_name(uint64_t /*cmap_handle_t */ cmap_handle, uint32_t nodeid)
{
int lpc = 0;
- int rc = CS_OK;
+ cs_error_t rc = CS_OK;
int retries = 0;
char *name = NULL;
cmap_handle_t local_handle = 0;
+ int fd = -1;
+ uid_t found_uid = 0;
+ gid_t found_gid = 0;
+ pid_t found_pid = 0;
+ int rv;
/* nodeid == 0 == CMAN_NODEID_US */
if (nodeid == 0) {
nodeid = get_local_nodeid(0);
}
if (cmap_handle == 0 && local_handle == 0) {
retries = 0;
crm_trace("Initializing CMAP connection");
do {
rc = cmap_initialize(&local_handle);
if (rc != CS_OK) {
retries++;
crm_debug("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc),
retries);
sleep(retries);
}
} while (retries < 5 && rc != CS_OK);
if (rc != CS_OK) {
crm_warn("Could not connect to Cluster Configuration Database API, error %s",
cs_strerror(rc));
local_handle = 0;
}
}
if (cmap_handle == 0) {
cmap_handle = local_handle;
+
+ rc = cmap_fd_get(cmap_handle, &fd);
+ if (rc != CS_OK) {
+ crm_err("Could not obtain the CMAP API connection: %s (%d)",
+ cs_strerror(rc), rc);
+ goto bail;
+ }
+
+ /* CMAP provider run as root (in given user namespace, anyway)? */
+ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
+ &found_uid, &found_gid))) {
+ crm_err("CMAP provider is not authentic:"
+ " process %lld (uid: %lld, gid: %lld)",
+ (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
+ (long long) found_uid, (long long) found_gid);
+ goto bail;
+ } else if (rv < 0) {
+ crm_err("Could not verify authenticity of CMAP provider: %s (%d)",
+ strerror(-rv), -rv);
+ goto bail;
+ }
}
while (name == NULL && cmap_handle != 0) {
uint32_t id = 0;
char *key = NULL;
key = crm_strdup_printf("nodelist.node.%d.nodeid", lpc);
rc = cmap_get_uint32(cmap_handle, key, &id);
crm_trace("Checking %u vs %u from %s", nodeid, id, key);
free(key);
if (rc != CS_OK) {
break;
}
if (nodeid == id) {
crm_trace("Searching for node name for %u in nodelist.node.%d %s", nodeid, lpc, name);
if (name == NULL) {
key = crm_strdup_printf("nodelist.node.%d.ring0_addr", lpc);
cmap_get_string(cmap_handle, key, &name);
crm_trace("%s = %s", key, name);
if (node_name_is_valid(key, name) == FALSE) {
free(name);
name = NULL;
}
free(key);
}
if (name == NULL) {
key = crm_strdup_printf("nodelist.node.%d.name", lpc);
cmap_get_string(cmap_handle, key, &name);
crm_trace("%s = %s %d", key, name, rc);
free(key);
}
break;
}
lpc++;
}
+bail:
if(local_handle) {
cmap_finalize(local_handle);
}
if (name == NULL) {
crm_info("Unable to get node name for nodeid %u", nodeid);
}
return name;
}
void
terminate_cs_connection(crm_cluster_t *cluster)
{
crm_info("Disconnecting from Corosync");
cluster_disconnect_cpg(cluster);
if (pcmk_quorum_handle) {
crm_trace("Disconnecting quorum");
quorum_finalize(pcmk_quorum_handle);
pcmk_quorum_handle = 0;
} else {
crm_info("No Quorum connection");
}
crm_notice("Disconnected from Corosync");
}
int ais_membership_timer = 0;
gboolean ais_membership_force = FALSE;
static int
pcmk_quorum_dispatch(gpointer user_data)
{
int rc = 0;
rc = quorum_dispatch(pcmk_quorum_handle, CS_DISPATCH_ALL);
if (rc < 0) {
crm_err("Connection to the Quorum API failed: %d", rc);
pcmk_quorum_handle = 0;
return -1;
}
return 0;
}
static void
pcmk_quorum_notification(quorum_handle_t handle,
uint32_t quorate,
uint64_t ring_id, uint32_t view_list_entries, uint32_t * view_list)
{
int i;
GHashTableIter iter;
crm_node_t *node = NULL;
static gboolean init_phase = TRUE;
if (quorate != crm_have_quorum) {
if (quorate) {
crm_notice("Quorum acquired " CRM_XS " membership=" U64T " members=%lu",
ring_id, (long unsigned int)view_list_entries);
} else {
crm_warn("Quorum lost " CRM_XS " membership=" U64T " members=%lu",
ring_id, (long unsigned int)view_list_entries);
}
crm_have_quorum = quorate;
} else {
crm_info("Quorum %s " CRM_XS " membership=" U64T " members=%lu",
(quorate? "retained" : "still lost"), ring_id,
(long unsigned int)view_list_entries);
}
if (view_list_entries == 0 && init_phase) {
crm_info("Corosync membership is still forming, ignoring");
return;
}
init_phase = FALSE;
/* Reset last_seen for all cached nodes so we can tell which ones aren't
* in the view list */
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
node->last_seen = 0;
}
/* Update the peer cache for each node in view list */
for (i = 0; i < view_list_entries; i++) {
uint32_t id = view_list[i];
crm_debug("Member[%d] %u ", i, id);
/* Get this node's peer cache entry (adding one if not already there) */
node = crm_get_peer(id, NULL);
if (node->uname == NULL) {
char *name = corosync_node_name(0, id);
crm_info("Obtaining name for new node %u", id);
node = crm_get_peer(id, name);
free(name);
}
/* Update the node state (including updating last_seen to ring_id) */
crm_update_peer_state(__FUNCTION__, node, CRM_NODE_MEMBER, ring_id);
}
/* Remove any peer cache entries we didn't update */
crm_reap_unseen_nodes(ring_id);
if (quorum_app_callback) {
quorum_app_callback(ring_id, quorate);
}
}
quorum_callbacks_t quorum_callbacks = {
.quorum_notify_fn = pcmk_quorum_notification,
};
gboolean
cluster_connect_quorum(gboolean(*dispatch) (unsigned long long, gboolean),
void (*destroy) (gpointer))
{
- int rc = -1;
+ cs_error_t rc;
int fd = 0;
int quorate = 0;
uint32_t quorum_type = 0;
struct mainloop_fd_callbacks quorum_fd_callbacks;
+ uid_t found_uid = 0;
+ gid_t found_gid = 0;
+ pid_t found_pid = 0;
+ int rv;
quorum_fd_callbacks.dispatch = pcmk_quorum_dispatch;
quorum_fd_callbacks.destroy = destroy;
crm_debug("Configuring Pacemaker to obtain quorum from Corosync");
rc = quorum_initialize(&pcmk_quorum_handle, &quorum_callbacks, &quorum_type);
if (rc != CS_OK) {
- crm_err("Could not connect to the Quorum API: %d", rc);
+ crm_err("Could not connect to the Quorum API: %s (%d)",
+ cs_strerror(rc), rc);
goto bail;
} else if (quorum_type != QUORUM_SET) {
crm_err("Corosync quorum is not configured");
goto bail;
}
+ rc = quorum_fd_get(pcmk_quorum_handle, &fd);
+ if (rc != CS_OK) {
+ crm_err("Could not obtain the Quorum API connection: %s (%d)",
+ strerror(rc), rc);
+ goto bail;
+ }
+
+ /* Quorum provider run as root (in given user namespace, anyway)? */
+ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
+ &found_uid, &found_gid))) {
+ crm_err("Quorum provider is not authentic:"
+ " process %lld (uid: %lld, gid: %lld)",
+ (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
+ (long long) found_uid, (long long) found_gid);
+ rc = CS_ERR_ACCESS;
+ goto bail;
+ } else if (rv < 0) {
+ crm_err("Could not verify authenticity of Quorum provider: %s (%d)",
+ strerror(-rv), -rv);
+ rc = CS_ERR_ACCESS;
+ goto bail;
+ }
+
rc = quorum_getquorate(pcmk_quorum_handle, &quorate);
if (rc != CS_OK) {
crm_err("Could not obtain the current Quorum API state: %d", rc);
goto bail;
}
if (quorate) {
crm_notice("Quorum acquired");
} else {
crm_warn("Quorum lost");
}
quorum_app_callback = dispatch;
crm_have_quorum = quorate;
rc = quorum_trackstart(pcmk_quorum_handle, CS_TRACK_CHANGES | CS_TRACK_CURRENT);
if (rc != CS_OK) {
crm_err("Could not setup Quorum API notifications: %d", rc);
goto bail;
}
- rc = quorum_fd_get(pcmk_quorum_handle, &fd);
- if (rc != CS_OK) {
- crm_err("Could not obtain the Quorum API connection: %d", rc);
- goto bail;
- }
-
mainloop_add_fd("quorum", G_PRIORITY_HIGH, fd, dispatch, &quorum_fd_callbacks);
corosync_initialize_nodelist(NULL, FALSE, NULL);
bail:
if (rc != CS_OK) {
quorum_finalize(pcmk_quorum_handle);
return FALSE;
}
return TRUE;
}
gboolean
init_cs_connection(crm_cluster_t * cluster)
{
int retries = 0;
while (retries < 5) {
int rc = init_cs_connection_once(cluster);
retries++;
switch (rc) {
case CS_OK:
return TRUE;
break;
case CS_ERR_TRY_AGAIN:
case CS_ERR_QUEUE_FULL:
sleep(retries);
break;
default:
return FALSE;
}
}
crm_err("Could not connect to corosync after %d retries", retries);
return FALSE;
}
gboolean
init_cs_connection_once(crm_cluster_t * cluster)
{
crm_node_t *peer = NULL;
enum cluster_type_e stack = get_cluster_type();
crm_peer_init();
/* Here we just initialize comms */
if (stack != pcmk_cluster_corosync) {
crm_err("Invalid cluster type: %s (%d)", name_for_cluster_type(stack), stack);
return FALSE;
}
if (cluster_connect_cpg(cluster) == FALSE) {
return FALSE;
}
crm_info("Connection to '%s': established", name_for_cluster_type(stack));
cluster->nodeid = get_local_nodeid(0);
if(cluster->nodeid == 0) {
crm_err("Could not establish local nodeid");
return FALSE;
}
cluster->uname = get_node_name(0);
if(cluster->uname == NULL) {
crm_err("Could not establish local node name");
return FALSE;
}
/* Ensure the local node always exists */
peer = crm_get_peer(cluster->nodeid, cluster->uname);
cluster->uuid = get_corosync_uuid(peer);
return TRUE;
}
gboolean
check_message_sanity(const AIS_Message * msg, const char *data)
{
gboolean sane = TRUE;
int dest = msg->host.type;
int tmp_size = msg->header.size - sizeof(AIS_Message);
if (sane && msg->header.size == 0) {
crm_warn("Message with no size");
sane = FALSE;
}
if (sane && msg->header.error != CS_OK) {
crm_warn("Message header contains an error: %d", msg->header.error);
sane = FALSE;
}
if (sane && ais_data_len(msg) != tmp_size) {
crm_warn("Message payload size is incorrect: expected %d, got %d", ais_data_len(msg),
tmp_size);
sane = TRUE;
}
if (sane && ais_data_len(msg) == 0) {
crm_warn("Message with no payload");
sane = FALSE;
}
if (sane && data && msg->is_compressed == FALSE) {
int str_size = strlen(data) + 1;
if (ais_data_len(msg) != str_size) {
int lpc = 0;
crm_warn("Message payload is corrupted: expected %d bytes, got %d",
ais_data_len(msg), str_size);
sane = FALSE;
for (lpc = (str_size - 10); lpc < msg->size; lpc++) {
if (lpc < 0) {
lpc = 0;
}
crm_debug("bad_data[%d]: %d / '%c'", lpc, data[lpc], data[lpc]);
}
}
}
if (sane == FALSE) {
crm_err("Invalid message %d: (dest=%s:%s, from=%s:%s.%u, compressed=%d, size=%d, total=%d)",
msg->id, ais_dest(&(msg->host)), msg_type2text(dest),
ais_dest(&(msg->sender)), msg_type2text(msg->sender.type),
msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size);
} else {
crm_trace
("Verified message %d: (dest=%s:%s, from=%s:%s.%u, compressed=%d, size=%d, total=%d)",
msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)),
msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed,
ais_data_len(msg), msg->header.size);
}
return sane;
}
enum cluster_type_e
find_corosync_variant(void)
{
int rc = CS_OK;
cmap_handle_t handle;
rc = cmap_initialize(&handle);
switch(rc) {
case CS_OK:
break;
case CS_ERR_SECURITY:
crm_debug("Failed to initialize the cmap API: Permission denied (%d)", rc);
/* It's there, we just can't talk to it.
* Good enough for us to identify as 'corosync'
*/
return pcmk_cluster_corosync;
default:
crm_info("Failed to initialize the cmap API: %s (%d)",
ais_error2text(rc), rc);
return pcmk_cluster_unknown;
}
cmap_finalize(handle);
return pcmk_cluster_corosync;
}
gboolean
crm_is_corosync_peer_active(const crm_node_t * node)
{
if (node == NULL) {
crm_trace("NULL");
return FALSE;
} else if (safe_str_neq(node->state, CRM_NODE_MEMBER)) {
crm_trace("%s: state=%s", node->uname, node->state);
return FALSE;
} else if ((node->processes & crm_proc_cpg) == 0) {
crm_trace("%s: processes=%.16x", node->uname, node->processes);
return FALSE;
}
return TRUE;
}
gboolean
corosync_initialize_nodelist(void *cluster, gboolean force_member, xmlNode * xml_parent)
{
int lpc = 0;
- int rc = CS_OK;
+ cs_error_t rc = CS_OK;
int retries = 0;
gboolean any = FALSE;
cmap_handle_t cmap_handle;
+ int fd = -1;
+ uid_t found_uid = 0;
+ gid_t found_gid = 0;
+ pid_t found_pid = 0;
+ int rv;
do {
rc = cmap_initialize(&cmap_handle);
if (rc != CS_OK) {
retries++;
crm_debug("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc),
retries);
sleep(retries);
}
} while (retries < 5 && rc != CS_OK);
if (rc != CS_OK) {
crm_warn("Could not connect to Cluster Configuration Database API, error %d", rc);
return FALSE;
}
+ rc = cmap_fd_get(cmap_handle, &fd);
+ if (rc != CS_OK) {
+ crm_err("Could not obtain the CMAP API connection: %s (%d)",
+ cs_strerror(rc), rc);
+ goto bail;
+ }
+
+ /* CMAP provider run as root (in given user namespace, anyway)? */
+ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
+ &found_uid, &found_gid))) {
+ crm_err("CMAP provider is not authentic:"
+ " process %lld (uid: %lld, gid: %lld)",
+ (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
+ (long long) found_uid, (long long) found_gid);
+ goto bail;
+ } else if (rv < 0) {
+ crm_err("Could not verify authenticity of CMAP provider: %s (%d)",
+ strerror(-rv), -rv);
+ goto bail;
+ }
+
crm_peer_init();
crm_trace("Initializing corosync nodelist");
for (lpc = 0; TRUE; lpc++) {
uint32_t nodeid = 0;
char *name = NULL;
char *key = NULL;
key = crm_strdup_printf("nodelist.node.%d.nodeid", lpc);
rc = cmap_get_uint32(cmap_handle, key, &nodeid);
free(key);
if (rc != CS_OK) {
break;
}
name = corosync_node_name(cmap_handle, nodeid);
if (name != NULL) {
GHashTableIter iter;
crm_node_t *node = NULL;
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
if(node && node->uname && strcasecmp(node->uname, name) == 0) {
if (node->id && node->id != nodeid) {
crm_crit("Nodes %u and %u share the same name '%s': shutting down", node->id,
nodeid, name);
crm_exit(DAEMON_RESPAWN_STOP);
}
}
}
}
if (nodeid > 0 || name != NULL) {
crm_trace("Initializing node[%d] %u = %s", lpc, nodeid, name);
crm_get_peer(nodeid, name);
}
if (nodeid > 0 && name != NULL) {
any = TRUE;
if (xml_parent) {
xmlNode *node = create_xml_node(xml_parent, XML_CIB_TAG_NODE);
crm_xml_set_id(node, "%u", nodeid);
crm_xml_add(node, XML_ATTR_UNAME, name);
if (force_member) {
crm_xml_add(node, XML_ATTR_TYPE, CRM_NODE_MEMBER);
}
}
}
free(name);
}
+bail:
cmap_finalize(cmap_handle);
return any;
}
char *
corosync_cluster_name(void)
{
cmap_handle_t handle;
char *cluster_name = NULL;
- int rc = CS_OK;
+ cs_error_t rc = CS_OK;
+ int fd = -1;
+ uid_t found_uid = 0;
+ gid_t found_gid = 0;
+ pid_t found_pid = 0;
+ int rv;
rc = cmap_initialize(&handle);
if (rc != CS_OK) {
- crm_info("Failed to initialize the cmap API: %s (%d)", ais_error2text(rc), rc);
+ crm_info("Failed to initialize the cmap API: %s (%d)",
+ cs_strerror(rc), rc);
return NULL;
}
+ rc = cmap_fd_get(handle, &fd);
+ if (rc != CS_OK) {
+ crm_err("Could not obtain the CMAP API connection: %s (%d)",
+ cs_strerror(rc), rc);
+ goto bail;
+ }
+
+ /* CMAP provider run as root (in given user namespace, anyway)? */
+ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
+ &found_uid, &found_gid))) {
+ crm_err("CMAP provider is not authentic:"
+ " process %lld (uid: %lld, gid: %lld)",
+ (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
+ (long long) found_uid, (long long) found_gid);
+ goto bail;
+ } else if (rv < 0) {
+ crm_err("Could not verify authenticity of CMAP provider: %s (%d)",
+ strerror(-rv), -rv);
+ goto bail;
+ }
+
rc = cmap_get_string(handle, "totem.cluster_name", &cluster_name);
if (rc != CS_OK) {
- crm_info("Cannot get totem.cluster_name: %s (%d)", ais_error2text(rc), rc);
+ crm_info("Cannot get totem.cluster_name: %s (%d)", cs_strerror(rc), rc);
} else {
crm_debug("cmap totem.cluster_name = '%s'", cluster_name);
}
+bail:
cmap_finalize(handle);
-
return cluster_name;
}
int
corosync_cmap_has_config(const char *prefix)
{
- int rc = CS_OK;
+ cs_error_t rc = CS_OK;
int retries = 0;
static int found = -1;
cmap_handle_t cmap_handle;
cmap_iter_handle_t iter_handle;
char key_name[CMAP_KEYNAME_MAXLEN + 1];
+ int fd = -1;
+ uid_t found_uid = 0;
+ gid_t found_gid = 0;
+ pid_t found_pid = 0;
+ int rv;
if(found != -1) {
return found;
}
do {
rc = cmap_initialize(&cmap_handle);
if (rc != CS_OK) {
retries++;
crm_debug("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc),
retries);
sleep(retries);
}
} while (retries < 5 && rc != CS_OK);
if (rc != CS_OK) {
crm_warn("Could not connect to Cluster Configuration Database API: %s (rc=%d)",
cs_strerror(rc), rc);
return -1;
}
+ rc = cmap_fd_get(cmap_handle, &fd);
+ if (rc != CS_OK) {
+ crm_err("Could not obtain the CMAP API connection: %s (%d)",
+ cs_strerror(rc), rc);
+ goto bail;
+ }
+
+ /* CMAP provider run as root (in given user namespace, anyway)? */
+ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
+ &found_uid, &found_gid))) {
+ crm_err("CMAP provider is not authentic:"
+ " process %lld (uid: %lld, gid: %lld)",
+ (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
+ (long long) found_uid, (long long) found_gid);
+ goto bail;
+ } else if (rv < 0) {
+ crm_err("Could not verify authenticity of CMAP provider: %s (%d)",
+ strerror(-rv), -rv);
+ goto bail;
+ }
+
rc = cmap_iter_init(cmap_handle, prefix, &iter_handle);
if (rc != CS_OK) {
crm_warn("Failed to initialize iteration for corosync cmap '%s': %s (rc=%d)",
prefix, cs_strerror(rc), rc);
goto bail;
}
found = 0;
while ((rc = cmap_iter_next(cmap_handle, iter_handle, key_name, NULL, NULL)) == CS_OK) {
crm_trace("'%s' is configured in corosync cmap: %s", prefix, key_name);
found++;
break;
}
cmap_iter_finalize(cmap_handle, iter_handle);
bail:
cmap_finalize(cmap_handle);
return found;
}
diff --git a/lib/cluster/cpg.c b/lib/cluster/cpg.c
index 1e6cf79956..c5ecc67164 100644
--- a/lib/cluster/cpg.c
+++ b/lib/cluster/cpg.c
@@ -1,692 +1,818 @@
/*
- * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
+ * Copyright 2004-2019 the Pacemaker project contributors
*
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
+ * The version control history for this file may have further details.
*
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <bzlib.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <crm/common/ipc.h>
#include <crm/cluster/internal.h>
#include <crm/common/mainloop.h>
#include <sys/utsname.h>
#include <qb/qbipcc.h>
#include <qb/qbutil.h>
#include <corosync/corodefs.h>
#include <corosync/corotypes.h>
#include <corosync/hdb.h>
#include <corosync/cpg.h>
#include <crm/msg_xml.h>
+#include <crm/common/ipc_internal.h> /* PCMK__SPECIAL_PID* */
+
cpg_handle_t pcmk_cpg_handle = 0; /* TODO: Remove, use cluster.cpg_handle */
static bool cpg_evicted = FALSE;
gboolean(*pcmk_cpg_dispatch_fn) (int kind, const char *from, const char *data) = NULL;
#define cs_repeat(counter, max, code) do { \
code; \
if(rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { \
counter++; \
crm_debug("Retrying operation after %ds", counter); \
sleep(counter); \
} else { \
break; \
} \
} while(counter < max)
void
cluster_disconnect_cpg(crm_cluster_t *cluster)
{
pcmk_cpg_handle = 0;
if (cluster->cpg_handle) {
crm_trace("Disconnecting CPG");
cpg_leave(cluster->cpg_handle, &cluster->group);
cpg_finalize(cluster->cpg_handle);
cluster->cpg_handle = 0;
} else {
crm_info("No CPG connection");
}
}
uint32_t get_local_nodeid(cpg_handle_t handle)
{
- int rc = CS_OK;
+ cs_error_t rc = CS_OK;
int retries = 0;
static uint32_t local_nodeid = 0;
cpg_handle_t local_handle = handle;
cpg_callbacks_t cb = { };
+ int fd = -1;
+ uid_t found_uid = 0;
+ gid_t found_gid = 0;
+ pid_t found_pid = 0;
+ int rv;
if(local_nodeid != 0) {
return local_nodeid;
}
#if 0
/* Should not be necessary */
if(get_cluster_type() == pcmk_cluster_classic_ais) {
get_ais_details(&local_nodeid, NULL);
goto done;
}
#endif
if(handle == 0) {
crm_trace("Creating connection");
cs_repeat(retries, 5, rc = cpg_initialize(&local_handle, &cb));
+ if (rc != CS_OK) {
+ crm_err("Could not connect to the CPG API: %s (%d)",
+ cs_strerror(rc), rc);
+ return 0;
+ }
+
+ rc = cpg_fd_get(local_handle, &fd);
+ if (rc != CS_OK) {
+ crm_err("Could not obtain the CPG API connection: %s (%d)",
+ cs_strerror(rc), rc);
+ goto bail;
+ }
+
+ /* CPG provider run as root (in given user namespace, anyway)? */
+ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
+ &found_uid, &found_gid))) {
+ crm_err("CPG provider is not authentic:"
+ " process %lld (uid: %lld, gid: %lld)",
+ (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
+ (long long) found_uid, (long long) found_gid);
+ goto bail;
+ } else if (rv < 0) {
+ crm_err("Could not verify authenticity of CPG provider: %s (%d)",
+ strerror(-rv), -rv);
+ goto bail;
+ }
}
if (rc == CS_OK) {
retries = 0;
crm_trace("Performing lookup");
cs_repeat(retries, 5, rc = cpg_local_get(local_handle, &local_nodeid));
}
if (rc != CS_OK) {
crm_err("Could not get local node id from the CPG API: %s (%d)", ais_error2text(rc), rc);
}
+
+bail:
if(handle == 0) {
crm_trace("Closing connection");
cpg_finalize(local_handle);
}
crm_debug("Local nodeid is %u", local_nodeid);
return local_nodeid;
}
GListPtr cs_message_queue = NULL;
int cs_message_timer = 0;
static ssize_t crm_cs_flush(gpointer data);
static gboolean
crm_cs_flush_cb(gpointer data)
{
cs_message_timer = 0;
crm_cs_flush(data);
return FALSE;
}
#define CS_SEND_MAX 200
static ssize_t
crm_cs_flush(gpointer data)
{
int sent = 0;
ssize_t rc = 0;
int queue_len = 0;
static unsigned int last_sent = 0;
cpg_handle_t *handle = (cpg_handle_t *)data;
if (*handle == 0) {
crm_trace("Connection is dead");
return pcmk_ok;
}
queue_len = g_list_length(cs_message_queue);
if ((queue_len % 1000) == 0 && queue_len > 1) {
crm_err("CPG queue has grown to %d", queue_len);
} else if (queue_len == CS_SEND_MAX) {
crm_warn("CPG queue has grown to %d", queue_len);
}
if (cs_message_timer) {
/* There is already a timer, wait until it goes off */
crm_trace("Timer active %d", cs_message_timer);
return pcmk_ok;
}
while (cs_message_queue && sent < CS_SEND_MAX) {
struct iovec *iov = cs_message_queue->data;
errno = 0;
rc = cpg_mcast_joined(*handle, CPG_TYPE_AGREED, iov, 1);
if (rc != CS_OK) {
break;
}
sent++;
last_sent++;
crm_trace("CPG message sent, size=%llu",
(unsigned long long) iov->iov_len);
cs_message_queue = g_list_remove(cs_message_queue, iov);
free(iov->iov_base);
free(iov);
}
queue_len -= sent;
if (sent > 1 || cs_message_queue) {
crm_info("Sent %d CPG messages (%d remaining, last=%u): %s (%lld)",
sent, queue_len, last_sent, ais_error2text(rc),
(long long) rc);
} else {
crm_trace("Sent %d CPG messages (%d remaining, last=%u): %s (%lld)",
sent, queue_len, last_sent, ais_error2text(rc),
(long long) rc);
}
if (cs_message_queue) {
uint32_t delay_ms = 100;
if(rc != CS_OK) {
/* Proportionally more if sending failed but cap at 1s */
delay_ms = QB_MIN(1000, CS_SEND_MAX + (10 * queue_len));
}
cs_message_timer = g_timeout_add(delay_ms, crm_cs_flush_cb, data);
}
return rc;
}
gboolean
send_cpg_iov(struct iovec * iov)
{
static unsigned int queued = 0;
queued++;
crm_trace("Queueing CPG message %u (%llu bytes)",
queued, (unsigned long long) iov->iov_len);
cs_message_queue = g_list_append(cs_message_queue, iov);
crm_cs_flush(&pcmk_cpg_handle);
return TRUE;
}
static int
pcmk_cpg_dispatch(gpointer user_data)
{
int rc = 0;
crm_cluster_t *cluster = (crm_cluster_t*) user_data;
rc = cpg_dispatch(cluster->cpg_handle, CS_DISPATCH_ONE);
if (rc != CS_OK) {
crm_err("Connection to the CPG API failed: %s (%d)", ais_error2text(rc), rc);
cluster->cpg_handle = 0;
return -1;
} else if(cpg_evicted) {
crm_err("Evicted from CPG membership");
return -1;
}
return 0;
}
char *
pcmk_message_common_cs(cpg_handle_t handle, uint32_t nodeid, uint32_t pid, void *content,
uint32_t *kind, const char **from)
{
char *data = NULL;
AIS_Message *msg = (AIS_Message *) content;
if(handle) {
/* 'msg' came from CPG not the plugin
* Do filtering and field massaging
*/
uint32_t local_nodeid = get_local_nodeid(handle);
const char *local_name = get_local_node_name();
if (msg->sender.id > 0 && msg->sender.id != nodeid) {
crm_err("Nodeid mismatch from %d.%d: claimed nodeid=%u", nodeid, pid, msg->sender.id);
return NULL;
} else if (msg->host.id != 0 && (local_nodeid != msg->host.id)) {
/* Not for us */
crm_trace("Not for us: %u != %u", msg->host.id, local_nodeid);
return NULL;
} else if (msg->host.size != 0 && safe_str_neq(msg->host.uname, local_name)) {
/* Not for us */
crm_trace("Not for us: %s != %s", msg->host.uname, local_name);
return NULL;
}
msg->sender.id = nodeid;
if (msg->sender.size == 0) {
crm_node_t *peer = crm_get_peer(nodeid, NULL);
if (peer == NULL) {
crm_err("Peer with nodeid=%u is unknown", nodeid);
} else if (peer->uname == NULL) {
crm_err("No uname for peer with nodeid=%u", nodeid);
} else {
crm_notice("Fixing uname for peer with nodeid=%u", nodeid);
msg->sender.size = strlen(peer->uname);
memset(msg->sender.uname, 0, MAX_NAME);
memcpy(msg->sender.uname, peer->uname, msg->sender.size);
}
}
}
crm_trace("Got new%s message (size=%d, %d, %d)",
msg->is_compressed ? " compressed" : "",
ais_data_len(msg), msg->size, msg->compressed_size);
if (kind != NULL) {
*kind = msg->header.id;
}
if (from != NULL) {
*from = msg->sender.uname;
}
if (msg->is_compressed && msg->size > 0) {
int rc = BZ_OK;
char *uncompressed = NULL;
unsigned int new_size = msg->size + 1;
if (check_message_sanity(msg, NULL) == FALSE) {
goto badmsg;
}
crm_trace("Decompressing message data");
uncompressed = calloc(1, new_size);
rc = BZ2_bzBuffToBuffDecompress(uncompressed, &new_size, msg->data, msg->compressed_size, 1, 0);
if (rc != BZ_OK) {
crm_err("Decompression failed: %d", rc);
free(uncompressed);
goto badmsg;
}
CRM_ASSERT(rc == BZ_OK);
CRM_ASSERT(new_size == msg->size);
data = uncompressed;
} else if (check_message_sanity(msg, data) == FALSE) {
goto badmsg;
} else if (safe_str_eq("identify", data)) {
char *pid_s = crm_getpid_s();
send_cluster_text(crm_class_cluster, pid_s, TRUE, NULL, crm_msg_ais);
free(pid_s);
return NULL;
} else {
data = strdup(msg->data);
}
if (msg->header.id != crm_class_members) {
/* Is this even needed anymore? */
crm_get_peer(msg->sender.id, msg->sender.uname);
}
if (msg->header.id == crm_class_rmpeer) {
uint32_t id = crm_int_helper(data, NULL);
crm_info("Removing peer %s/%u", data, id);
reap_crm_member(id, NULL);
free(data);
return NULL;
#if SUPPORT_PLUGIN
} else if (is_classic_ais_cluster()) {
plugin_handle_membership(msg);
#endif
}
crm_trace("Payload: %.200s", data);
return data;
badmsg:
crm_err("Invalid message (id=%d, dest=%s:%s, from=%s:%s.%d):"
" min=%d, total=%d, size=%d, bz2_size=%d",
msg->id, ais_dest(&(msg->host)), msg_type2text(msg->host.type),
ais_dest(&(msg->sender)), msg_type2text(msg->sender.type),
msg->sender.pid, (int)sizeof(AIS_Message),
msg->header.size, msg->size, msg->compressed_size);
free(data);
return NULL;
}
+static int cmp_member_list_nodeid(const void *first,
+ const void *second)
+{
+ const struct cpg_address *const a = *((const struct cpg_address **) first),
+ *const b = *((const struct cpg_address **) second);
+ if (a->nodeid < b->nodeid) {
+ return -1;
+ } else if (a->nodeid > b->nodeid) {
+ return 1;
+ }
+ /* don't bother with "reason" nor "pid" */
+ return 0;
+}
+
void
pcmk_cpg_membership(cpg_handle_t handle,
const struct cpg_name *groupName,
const struct cpg_address *member_list, size_t member_list_entries,
const struct cpg_address *left_list, size_t left_list_entries,
const struct cpg_address *joined_list, size_t joined_list_entries)
{
int i;
gboolean found = FALSE;
static int counter = 0;
uint32_t local_nodeid = get_local_nodeid(handle);
+ const struct cpg_address *key, **rival, **sorted;
+
+ sorted = malloc(member_list_entries * sizeof(const struct cpg_address *));
+ CRM_ASSERT(sorted != NULL);
+
+ for (size_t iter = 0; iter < member_list_entries; iter++) {
+ sorted[iter] = member_list + iter;
+ }
+ /* so that the cross-matching multiply-subscribed nodes is then cheap */
+ qsort(sorted, member_list_entries, sizeof(const struct cpg_address *),
+ cmp_member_list_nodeid);
for (i = 0; i < left_list_entries; i++) {
crm_node_t *peer = crm_find_peer(left_list[i].nodeid, NULL);
- crm_info("Node %u left group %s (peer=%s, counter=%d.%d)",
+ crm_info("Node %u left group %s (peer=%s:%llu, counter=%d.%d)",
left_list[i].nodeid, groupName->value,
- (peer? peer->uname : "<none>"), counter, i);
+ (peer? peer->uname : "<none>"),
+ (unsigned long long) left_list[i].pid, counter, i);
+
+ /* in CPG world, NODE:PROCESS-IN-MEMBERSHIP-OF-G is an 1:N relation
+ and not playing by this rule may go wild in case of multiple
+ residual instances of the same pacemaker daemon at the same node
+ -- we must ensure that the possible local rival(s) won't make us
+ cry out and bail (e.g. when they quit themselves), since all the
+ surrounding logic denies this simple fact that the full membership
+ is discriminated also per the PID of the process beside mere node
+ ID (and implicitly, group ID); practically, this will be sound in
+ terms of not preventing progress, since all the CPG joiners are
+ also API end-point carriers, and that's what matters locally
+ (who's the winner);
+ remotely, we will just compare leave_list and member_list and if
+ the left process has it's node retained in member_list (under some
+ other PID, anyway) we will just ignore it as well
+ XXX: long-term fix is to establish in-out PID-aware tracking? */
if (peer) {
- crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, OFFLINESTATUS);
+ key = &left_list[i];
+ rival = bsearch(&key, sorted, member_list_entries,
+ sizeof(const struct cpg_address *),
+ cmp_member_list_nodeid);
+ if (rival == NULL) {
+ crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg,
+ OFFLINESTATUS);
+ } else if (left_list[i].nodeid == local_nodeid) {
+ crm_info("Ignoring the above event %s.%d, comes from a local"
+ " rival process (presumably not us): %llu",
+ groupName->value, counter,
+ (unsigned long long) left_list[i].pid);
+ } else {
+ crm_info("Ignoring the above event %s.%d, comes from"
+ " a rival-rich node: %llu (e.g. %llu process"
+ " carries on)",
+ groupName->value, counter,
+ (unsigned long long) left_list[i].pid,
+ (unsigned long long) (*rival)->pid);
+ }
}
}
+ free(sorted);
+ sorted = NULL;
for (i = 0; i < joined_list_entries; i++) {
- crm_info("Node %u joined group %s (counter=%d.%d)",
- joined_list[i].nodeid, groupName->value, counter, i);
+ crm_info("Node %u joined group %s (counter=%d.%d, pid=%llu,"
+ " unchecked for rivals)",
+ joined_list[i].nodeid, groupName->value, counter, i,
+ (unsigned long long) left_list[i].pid);
}
for (i = 0; i < member_list_entries; i++) {
crm_node_t *peer = crm_get_peer(member_list[i].nodeid, NULL);
- crm_info("Node %u still member of group %s (peer=%s, counter=%d.%d)",
+ crm_info("Node %u still member of group %s (peer=%s:%llu,"
+ " counter=%d.%d, at least once)",
member_list[i].nodeid, groupName->value,
- (peer? peer->uname : "<none>"), counter, i);
+ (peer? peer->uname : "<none>"), member_list[i].pid,
+ counter, i);
+
+ if (member_list[i].nodeid == local_nodeid
+ && member_list[i].pid != getpid()) {
+ /* see the note above */
+ crm_info("Ignoring the above event %s.%d, comes from a local rival"
+ " process: %llu", groupName->value, counter,
+ (unsigned long long) member_list[i].pid);
+ continue;
+ }
/* Anyone that is sending us CPG messages must also be a _CPG_ member.
* But it's _not_ safe to assume it's in the quorum membership.
* We may have just found out it's dead and are processing the last couple of messages it sent
*/
peer = crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, ONLINESTATUS);
if(peer && peer->state && crm_is_peer_active(peer) == FALSE) {
time_t now = time(NULL);
/* Co-opt the otherwise unused votes field */
if(peer->votes == 0) {
peer->votes = now;
} else if(now > (60 + peer->votes)) {
/* On the otherhand, if we're still getting messages, at a certain point
* we need to acknowledge our internal cache is probably wrong
*
* Set the threshold to 1 minute
*/
- crm_err("Node %s[%u] appears to be online even though we think it is dead", peer->uname, peer->id);
+ crm_err("Node %s[%u] appears to be online even though we think"
+ " it is dead (unchecked for rivals)",
+ peer->uname, peer->id);
if (crm_update_peer_state(__FUNCTION__, peer, CRM_NODE_MEMBER, 0)) {
peer->votes = 0;
}
}
}
if (local_nodeid == member_list[i].nodeid) {
found = TRUE;
}
}
if (!found) {
crm_err("We're not part of CPG group '%s' anymore!", groupName->value);
cpg_evicted = TRUE;
}
counter++;
}
gboolean
cluster_connect_cpg(crm_cluster_t *cluster)
{
- int rc = -1;
- int fd = 0;
+ cs_error_t rc;
+ int fd = -1;
int retries = 0;
uint32_t id = 0;
crm_node_t *peer = NULL;
cpg_handle_t handle = 0;
+ uid_t found_uid = 0;
+ gid_t found_gid = 0;
+ pid_t found_pid = 0;
+ int rv;
struct mainloop_fd_callbacks cpg_fd_callbacks = {
.dispatch = pcmk_cpg_dispatch,
.destroy = cluster->destroy,
};
cpg_callbacks_t cpg_callbacks = {
.cpg_deliver_fn = cluster->cpg.cpg_deliver_fn,
.cpg_confchg_fn = cluster->cpg.cpg_confchg_fn,
/* .cpg_deliver_fn = pcmk_cpg_deliver, */
/* .cpg_confchg_fn = pcmk_cpg_membership, */
};
cpg_evicted = FALSE;
cluster->group.length = 0;
cluster->group.value[0] = 0;
/* group.value is char[128] */
strncpy(cluster->group.value, crm_system_name?crm_system_name:"unknown", 127);
cluster->group.value[127] = 0;
cluster->group.length = 1 + QB_MIN(127, strlen(cluster->group.value));
cs_repeat(retries, 30, rc = cpg_initialize(&handle, &cpg_callbacks));
if (rc != CS_OK) {
- crm_err("Could not connect to the Cluster Process Group API: %d", rc);
+ crm_err("Could not connect to the CPG API: %s (%d)",
+ cs_strerror(rc), rc);
+ goto bail;
+ }
+
+ rc = cpg_fd_get(handle, &fd);
+ if (rc != CS_OK) {
+ crm_err("Could not obtain the CPG API connection: %s (%d)",
+ cs_strerror(rc), rc);
+ goto bail;
+ }
+
+ /* CPG provider run as root (in given user namespace, anyway)? */
+ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
+ &found_uid, &found_gid))) {
+ crm_err("CPG provider is not authentic:"
+ " process %lld (uid: %lld, gid: %lld)",
+ (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
+ (long long) found_uid, (long long) found_gid);
+ rc = CS_ERR_ACCESS;
+ goto bail;
+ } else if (rv < 0) {
+ crm_err("Could not verify authenticity of CPG provider: %s (%d)",
+ strerror(-rv), -rv);
+ rc = CS_ERR_ACCESS;
goto bail;
}
id = get_local_nodeid(handle);
if (id == 0) {
crm_err("Could not get local node id from the CPG API");
goto bail;
}
cluster->nodeid = id;
retries = 0;
cs_repeat(retries, 30, rc = cpg_join(handle, &cluster->group));
if (rc != CS_OK) {
crm_err("Could not join the CPG group '%s': %d", crm_system_name, rc);
goto bail;
}
- rc = cpg_fd_get(handle, &fd);
- if (rc != CS_OK) {
- crm_err("Could not obtain the CPG API connection: %d", rc);
- goto bail;
- }
-
pcmk_cpg_handle = handle;
cluster->cpg_handle = handle;
mainloop_add_fd("corosync-cpg", G_PRIORITY_MEDIUM, fd, cluster, &cpg_fd_callbacks);
bail:
if (rc != CS_OK) {
cpg_finalize(handle);
return FALSE;
}
peer = crm_get_peer(id, NULL);
crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, ONLINESTATUS);
return TRUE;
}
gboolean
send_cluster_message_cs(xmlNode * msg, gboolean local, crm_node_t * node, enum crm_ais_msg_types dest)
{
gboolean rc = TRUE;
char *data = NULL;
data = dump_xml_unformatted(msg);
rc = send_cluster_text(crm_class_cluster, data, local, node, dest);
free(data);
return rc;
}
gboolean
send_cluster_text(int class, const char *data,
gboolean local, crm_node_t * node, enum crm_ais_msg_types dest)
{
static int msg_id = 0;
static int local_pid = 0;
static int local_name_len = 0;
static const char *local_name = NULL;
char *target = NULL;
struct iovec *iov;
AIS_Message *msg = NULL;
enum crm_ais_msg_types sender = text2msg_type(crm_system_name);
/* There are only 6 handlers registered to crm_lib_service in plugin.c */
CRM_CHECK(class < 6, crm_err("Invalid message class: %d", class);
return FALSE);
#if !SUPPORT_PLUGIN
CRM_CHECK(dest != crm_msg_ais, return FALSE);
#endif
if(local_name == NULL) {
local_name = get_local_node_name();
}
if(local_name_len == 0 && local_name) {
local_name_len = strlen(local_name);
}
if (data == NULL) {
data = "";
}
if (local_pid == 0) {
local_pid = getpid();
}
if (sender == crm_msg_none) {
sender = local_pid;
}
msg = calloc(1, sizeof(AIS_Message));
msg_id++;
msg->id = msg_id;
msg->header.id = class;
msg->header.error = CS_OK;
msg->host.type = dest;
msg->host.local = local;
if (node) {
if (node->uname) {
target = strdup(node->uname);
msg->host.size = strlen(node->uname);
memset(msg->host.uname, 0, MAX_NAME);
memcpy(msg->host.uname, node->uname, msg->host.size);
} else {
target = crm_strdup_printf("%u", node->id);
}
msg->host.id = node->id;
} else {
target = strdup("all");
}
msg->sender.id = 0;
msg->sender.type = sender;
msg->sender.pid = local_pid;
msg->sender.size = local_name_len;
memset(msg->sender.uname, 0, MAX_NAME);
if(local_name && msg->sender.size) {
memcpy(msg->sender.uname, local_name, msg->sender.size);
}
msg->size = 1 + strlen(data);
msg->header.size = sizeof(AIS_Message) + msg->size;
if (msg->size < CRM_BZ2_THRESHOLD) {
msg = realloc_safe(msg, msg->header.size);
memcpy(msg->data, data, msg->size);
} else {
char *compressed = NULL;
unsigned int new_size = 0;
char *uncompressed = strdup(data);
if (crm_compress_string(uncompressed, msg->size, 0, &compressed, &new_size)) {
msg->header.size = sizeof(AIS_Message) + new_size;
msg = realloc_safe(msg, msg->header.size);
memcpy(msg->data, compressed, new_size);
msg->is_compressed = TRUE;
msg->compressed_size = new_size;
} else {
msg = realloc_safe(msg, msg->header.size);
memcpy(msg->data, data, msg->size);
}
free(uncompressed);
free(compressed);
}
iov = calloc(1, sizeof(struct iovec));
iov->iov_base = msg;
iov->iov_len = msg->header.size;
if (msg->compressed_size) {
crm_trace("Queueing CPG message %u to %s (%llu bytes, %d bytes compressed payload): %.200s",
msg->id, target, (unsigned long long) iov->iov_len,
msg->compressed_size, data);
} else {
crm_trace("Queueing CPG message %u to %s (%llu bytes, %d bytes payload): %.200s",
msg->id, target, (unsigned long long) iov->iov_len,
msg->size, data);
}
free(target);
#if SUPPORT_PLUGIN
/* The plugin is the only time we don't use CPG messaging */
if(get_cluster_type() == pcmk_cluster_classic_ais) {
return send_plugin_text(class, iov);
}
#endif
send_cpg_iov(iov);
return TRUE;
}
enum crm_ais_msg_types
text2msg_type(const char *text)
{
int type = crm_msg_none;
CRM_CHECK(text != NULL, return type);
if (safe_str_eq(text, "ais")) {
type = crm_msg_ais;
} else if (safe_str_eq(text, "crm_plugin")) {
type = crm_msg_ais;
} else if (safe_str_eq(text, CRM_SYSTEM_CIB)) {
type = crm_msg_cib;
} else if (safe_str_eq(text, CRM_SYSTEM_CRMD)) {
type = crm_msg_crmd;
} else if (safe_str_eq(text, CRM_SYSTEM_DC)) {
type = crm_msg_crmd;
} else if (safe_str_eq(text, CRM_SYSTEM_TENGINE)) {
type = crm_msg_te;
} else if (safe_str_eq(text, CRM_SYSTEM_PENGINE)) {
type = crm_msg_pe;
} else if (safe_str_eq(text, CRM_SYSTEM_LRMD)) {
type = crm_msg_lrmd;
} else if (safe_str_eq(text, CRM_SYSTEM_STONITHD)) {
type = crm_msg_stonithd;
} else if (safe_str_eq(text, "stonith-ng")) {
type = crm_msg_stonith_ng;
} else if (safe_str_eq(text, "attrd")) {
type = crm_msg_attrd;
} else {
/* This will normally be a transient client rather than
* a cluster daemon. Set the type to the pid of the client
*/
int scan_rc = sscanf(text, "%d", &type);
if (scan_rc != 1 || type <= crm_msg_stonith_ng) {
/* Ensure it's sane */
type = crm_msg_none;
}
}
return type;
}
diff --git a/lib/common/ipc.c b/lib/common/ipc.c
index 3258bcb614..3e547f32a5 100644
--- a/lib/common/ipc.c
+++ b/lib/common/ipc.c
@@ -1,1406 +1,1575 @@
/*
- * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
+ * Copyright 2004-2019 the Pacemaker project contributors
*
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
+ * The version control history for this file may have further details.
*
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
+#if defined(US_AUTH_PEERCRED_UCRED) || defined(US_AUTH_PEERCRED_SOCKPEERCRED)
+# ifdef US_AUTH_PEERCRED_UCRED
+# ifndef _GNU_SOURCE
+# define _GNU_SOURCE
+# endif
+# endif
+# include <sys/socket.h>
+#elif defined(US_AUTH_GETPEERUCRED)
+# include <ucred.h>
+#endif
+
#include <sys/param.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <grp.h>
#include <errno.h>
#include <fcntl.h>
#include <bzlib.h>
-#include <crm/crm.h>
+#include <crm/crm.h> /* indirectly: pcmk_err_generic */
#include <crm/msg_xml.h>
#include <crm/common/ipc.h>
#include <crm/common/ipcs.h>
+#include <crm/common/ipc_internal.h> /* PCMK__SPECIAL_PID* */
+
#define PCMK_IPC_VERSION 1
/* Evict clients whose event queue grows this large (by default) */
#define PCMK_IPC_DEFAULT_QUEUE_MAX 500
struct crm_ipc_response_header {
struct qb_ipc_response_header qb;
uint32_t size_uncompressed;
uint32_t size_compressed;
uint32_t flags;
uint8_t version; /* Protect against version changes for anyone that might bother to statically link us */
};
static int hdr_offset = 0;
static unsigned int ipc_buffer_max = 0;
static unsigned int pick_ipc_buffer(unsigned int max);
static inline void
crm_ipc_init(void)
{
if (hdr_offset == 0) {
hdr_offset = sizeof(struct crm_ipc_response_header);
}
if (ipc_buffer_max == 0) {
ipc_buffer_max = pick_ipc_buffer(0);
}
}
unsigned int
crm_ipc_default_buffer_size(void)
{
return pick_ipc_buffer(0);
}
static char *
generateReference(const char *custom1, const char *custom2)
{
static uint ref_counter = 0;
const char *local_cust1 = custom1;
const char *local_cust2 = custom2;
int reference_len = 4;
char *since_epoch = NULL;
reference_len += 20; /* too big */
reference_len += 40; /* too big */
if (local_cust1 == NULL) {
local_cust1 = "_empty_";
}
reference_len += strlen(local_cust1);
if (local_cust2 == NULL) {
local_cust2 = "_empty_";
}
reference_len += strlen(local_cust2);
since_epoch = calloc(1, reference_len);
if (since_epoch != NULL) {
sprintf(since_epoch, "%s-%s-%lu-%u",
local_cust1, local_cust2, (unsigned long)time(NULL), ref_counter++);
}
return since_epoch;
}
xmlNode *
create_request_adv(const char *task, xmlNode * msg_data,
const char *host_to, const char *sys_to,
const char *sys_from, const char *uuid_from, const char *origin)
{
char *true_from = NULL;
xmlNode *request = NULL;
char *reference = generateReference(task, sys_from);
if (uuid_from != NULL) {
true_from = generate_hash_key(sys_from, uuid_from);
} else if (sys_from != NULL) {
true_from = strdup(sys_from);
} else {
crm_err("No sys from specified");
}
/* host_from will get set for us if necessary by CRMd when routed */
request = create_xml_node(NULL, __FUNCTION__);
crm_xml_add(request, F_CRM_ORIGIN, origin);
crm_xml_add(request, F_TYPE, T_CRM);
crm_xml_add(request, F_CRM_VERSION, CRM_FEATURE_SET);
crm_xml_add(request, F_CRM_MSG_TYPE, XML_ATTR_REQUEST);
crm_xml_add(request, F_CRM_REFERENCE, reference);
crm_xml_add(request, F_CRM_TASK, task);
crm_xml_add(request, F_CRM_SYS_TO, sys_to);
crm_xml_add(request, F_CRM_SYS_FROM, true_from);
/* HOSTTO will be ignored if it is to the DC anyway. */
if (host_to != NULL && strlen(host_to) > 0) {
crm_xml_add(request, F_CRM_HOST_TO, host_to);
}
if (msg_data != NULL) {
add_message_xml(request, F_CRM_DATA, msg_data);
}
free(reference);
free(true_from);
return request;
}
/*
* This method adds a copy of xml_response_data
*/
xmlNode *
create_reply_adv(xmlNode * original_request, xmlNode * xml_response_data, const char *origin)
{
xmlNode *reply = NULL;
const char *host_from = crm_element_value(original_request, F_CRM_HOST_FROM);
const char *sys_from = crm_element_value(original_request, F_CRM_SYS_FROM);
const char *sys_to = crm_element_value(original_request, F_CRM_SYS_TO);
const char *type = crm_element_value(original_request, F_CRM_MSG_TYPE);
const char *operation = crm_element_value(original_request, F_CRM_TASK);
const char *crm_msg_reference = crm_element_value(original_request, F_CRM_REFERENCE);
if (type == NULL) {
crm_err("Cannot create new_message, no message type in original message");
CRM_ASSERT(type != NULL);
return NULL;
#if 0
} else if (strcasecmp(XML_ATTR_REQUEST, type) != 0) {
crm_err("Cannot create new_message, original message was not a request");
return NULL;
#endif
}
reply = create_xml_node(NULL, __FUNCTION__);
if (reply == NULL) {
crm_err("Cannot create new_message, malloc failed");
return NULL;
}
crm_xml_add(reply, F_CRM_ORIGIN, origin);
crm_xml_add(reply, F_TYPE, T_CRM);
crm_xml_add(reply, F_CRM_VERSION, CRM_FEATURE_SET);
crm_xml_add(reply, F_CRM_MSG_TYPE, XML_ATTR_RESPONSE);
crm_xml_add(reply, F_CRM_REFERENCE, crm_msg_reference);
crm_xml_add(reply, F_CRM_TASK, operation);
/* since this is a reply, we reverse the from and to */
crm_xml_add(reply, F_CRM_SYS_TO, sys_from);
crm_xml_add(reply, F_CRM_SYS_FROM, sys_to);
/* HOSTTO will be ignored if it is to the DC anyway. */
if (host_from != NULL && strlen(host_from) > 0) {
crm_xml_add(reply, F_CRM_HOST_TO, host_from);
}
if (xml_response_data != NULL) {
add_message_xml(reply, F_CRM_DATA, xml_response_data);
}
return reply;
}
/* Libqb based IPC */
/* Server... */
GHashTable *client_connections = NULL;
crm_client_t *
crm_client_get(qb_ipcs_connection_t * c)
{
if (client_connections) {
return g_hash_table_lookup(client_connections, c);
}
crm_trace("No client found for %p", c);
return NULL;
}
crm_client_t *
crm_client_get_by_id(const char *id)
{
gpointer key;
crm_client_t *client;
GHashTableIter iter;
if (client_connections && id) {
g_hash_table_iter_init(&iter, client_connections);
while (g_hash_table_iter_next(&iter, &key, (gpointer *) & client)) {
if (strcmp(client->id, id) == 0) {
return client;
}
}
}
crm_trace("No client found with id=%s", id);
return NULL;
}
const char *
crm_client_name(crm_client_t * c)
{
if (c == NULL) {
return "null";
} else if (c->name == NULL && c->id == NULL) {
return "unknown";
} else if (c->name == NULL) {
return c->id;
} else {
return c->name;
}
}
void
crm_client_init(void)
{
if (client_connections == NULL) {
crm_trace("Creating client hash table");
client_connections = g_hash_table_new(g_direct_hash, g_direct_equal);
}
}
void
crm_client_cleanup(void)
{
if (client_connections != NULL) {
int active = g_hash_table_size(client_connections);
if (active) {
crm_err("Exiting with %d active connections", active);
}
g_hash_table_destroy(client_connections); client_connections = NULL;
}
}
void
crm_client_disconnect_all(qb_ipcs_service_t *service)
{
qb_ipcs_connection_t *c = NULL;
if (service == NULL) {
return;
}
c = qb_ipcs_connection_first_get(service);
while (c != NULL) {
qb_ipcs_connection_t *last = c;
c = qb_ipcs_connection_next_get(service, last);
/* There really shouldn't be anyone connected at this point */
crm_notice("Disconnecting client %p, pid=%d...", last, crm_ipcs_client_pid(last));
qb_ipcs_disconnect(last);
qb_ipcs_connection_unref(last);
}
}
/*!
* \internal
* \brief Allocate a new crm_client_t object based on an IPC connection
*
* \param[in] c IPC connection (or NULL to allocate generic client)
* \param[in] key Connection table key (or NULL to use sane default)
* \param[in] uid_client UID corresponding to c (ignored if c is NULL)
*
* \return Pointer to new crm_client_t (or NULL on error)
*/
static crm_client_t *
client_from_connection(qb_ipcs_connection_t *c, void *key, uid_t uid_client)
{
crm_client_t *client = calloc(1, sizeof(crm_client_t));
if (client == NULL) {
crm_perror(LOG_ERR, "Allocating client");
return NULL;
}
if (c) {
#if ENABLE_ACL
client->user = uid2username(uid_client);
if (client->user == NULL) {
client->user = strdup("#unprivileged");
CRM_CHECK(client->user != NULL, free(client); return NULL);
crm_err("Unable to enforce ACLs for user ID %d, assuming unprivileged",
uid_client);
}
#endif
client->ipcs = c;
client->kind = CRM_CLIENT_IPC;
client->pid = crm_ipcs_client_pid(c);
if (key == NULL) {
key = c;
}
}
client->id = crm_generate_uuid();
if (client->id == NULL) {
crm_err("Could not generate UUID for client");
free(client->user);
free(client);
return NULL;
}
if (key == NULL) {
key = client->id;
}
g_hash_table_insert(client_connections, key, client);
return client;
}
/*!
* \brief Allocate a new crm_client_t object and generate its ID
*
* \param[in] key What to use as connections hash table key (NULL to use ID)
*
* \return Pointer to new crm_client_t (asserts on failure)
*/
crm_client_t *
crm_client_alloc(void *key)
{
crm_client_t *client = client_from_connection(NULL, key, 0);
CRM_ASSERT(client != NULL);
return client;
}
crm_client_t *
crm_client_new(qb_ipcs_connection_t * c, uid_t uid_client, gid_t gid_client)
{
static gid_t uid_cluster = 0;
static gid_t gid_cluster = 0;
crm_client_t *client = NULL;
CRM_CHECK(c != NULL, return NULL);
if (uid_cluster == 0) {
if (crm_user_lookup(CRM_DAEMON_USER, &uid_cluster, &gid_cluster) < 0) {
static bool need_log = TRUE;
if (need_log) {
crm_warn("Could not find user and group IDs for user %s",
CRM_DAEMON_USER);
need_log = FALSE;
}
}
}
if (uid_client != 0) {
crm_trace("Giving access to group %u", gid_cluster);
/* Passing -1 to chown(2) means don't change */
qb_ipcs_connection_auth_set(c, -1, gid_cluster, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
}
crm_client_init();
/* TODO: Do our own auth checking, return NULL if unauthorized */
client = client_from_connection(c, NULL, uid_client);
if (client == NULL) {
return NULL;
}
if ((uid_client == 0) || (uid_client == uid_cluster)) {
/* Remember when a connection came from root or hacluster */
set_bit(client->flags, crm_client_flag_ipc_privileged);
}
crm_debug("Connecting %p for uid=%d gid=%d pid=%u id=%s", c, uid_client, gid_client, client->pid, client->id);
return client;
}
void
crm_client_destroy(crm_client_t * c)
{
if (c == NULL) {
return;
}
if (client_connections) {
if (c->ipcs) {
crm_trace("Destroying %p/%p (%d remaining)",
c, c->ipcs, crm_hash_table_size(client_connections) - 1);
g_hash_table_remove(client_connections, c->ipcs);
} else {
crm_trace("Destroying remote connection %p (%d remaining)",
c, crm_hash_table_size(client_connections) - 1);
g_hash_table_remove(client_connections, c->id);
}
}
if (c->event_timer) {
g_source_remove(c->event_timer);
}
crm_debug("Destroying %d events", g_list_length(c->event_queue));
while (c->event_queue) {
struct iovec *event = c->event_queue->data;
c->event_queue = g_list_remove(c->event_queue, event);
free(event[0].iov_base);
free(event[1].iov_base);
free(event);
}
free(c->id);
free(c->name);
free(c->user);
if (c->remote) {
if (c->remote->auth_timeout) {
g_source_remove(c->remote->auth_timeout);
}
free(c->remote->buffer);
free(c->remote);
}
free(c);
}
/*!
* \brief Raise IPC eviction threshold for a client, if allowed
*
* \param[in,out] client Client to modify
* \param[in] queue_max New threshold (as string)
*
* \return TRUE if change was allowed, FALSE otherwise
*/
bool
crm_set_client_queue_max(crm_client_t *client, const char *qmax)
{
if (is_set(client->flags, crm_client_flag_ipc_privileged)) {
int qmax_int = crm_int_helper(qmax, NULL);
if ((errno == 0) && (qmax_int > 0)) {
client->queue_max = qmax_int;
return TRUE;
}
}
return FALSE;
}
int
crm_ipcs_client_pid(qb_ipcs_connection_t * c)
{
struct qb_ipcs_connection_stats stats;
stats.client_pid = 0;
qb_ipcs_connection_stats_get(c, &stats, 0);
return stats.client_pid;
}
xmlNode *
crm_ipcs_recv(crm_client_t * c, void *data, size_t size, uint32_t * id, uint32_t * flags)
{
xmlNode *xml = NULL;
char *uncompressed = NULL;
char *text = ((char *)data) + sizeof(struct crm_ipc_response_header);
struct crm_ipc_response_header *header = data;
if (id) {
*id = ((struct qb_ipc_response_header *)data)->id;
}
if (flags) {
*flags = header->flags;
}
if (is_set(header->flags, crm_ipc_proxied)) {
/* mark this client as being the endpoint of a proxy connection.
* Proxy connections responses are sent on the event channel to avoid
* blocking the proxy daemon (crmd) */
c->flags |= crm_client_flag_ipc_proxied;
}
if(header->version > PCMK_IPC_VERSION) {
crm_err("Filtering incompatible v%d IPC message, we only support versions <= %d",
header->version, PCMK_IPC_VERSION);
return NULL;
}
if (header->size_compressed) {
int rc = 0;
unsigned int size_u = 1 + header->size_uncompressed;
uncompressed = calloc(1, size_u);
crm_trace("Decompressing message data %u bytes into %u bytes",
header->size_compressed, size_u);
rc = BZ2_bzBuffToBuffDecompress(uncompressed, &size_u, text, header->size_compressed, 1, 0);
text = uncompressed;
if (rc != BZ_OK) {
crm_err("Decompression failed: %s (%d)", bz2_strerror(rc), rc);
free(uncompressed);
return NULL;
}
}
CRM_ASSERT(text[header->size_uncompressed - 1] == 0);
crm_trace("Received %.200s", text);
xml = string2xml(text);
free(uncompressed);
return xml;
}
ssize_t crm_ipcs_flush_events(crm_client_t * c);
static gboolean
crm_ipcs_flush_events_cb(gpointer data)
{
crm_client_t *c = data;
c->event_timer = 0;
crm_ipcs_flush_events(c);
return FALSE;
}
/*!
* \internal
* \brief Add progressive delay before next event queue flush
*
* \param[in,out] c Client connection to add delay to
* \param[in] queue_len Current event queue length
*/
static inline void
delay_next_flush(crm_client_t *c, unsigned int queue_len)
{
/* Delay a maximum of 5 seconds */
guint delay = (queue_len < 40)? (1000 + 100 * queue_len) : 5000;
c->event_timer = g_timeout_add(delay, crm_ipcs_flush_events_cb, c);
}
ssize_t
crm_ipcs_flush_events(crm_client_t * c)
{
ssize_t rc = 0;
unsigned int sent = 0;
unsigned int queue_len = 0;
if (c == NULL) {
return pcmk_ok;
} else if (c->event_timer) {
/* There is already a timer, wait until it goes off */
crm_trace("Timer active for %p - %d", c->ipcs, c->event_timer);
return pcmk_ok;
}
queue_len = g_list_length(c->event_queue);
while (c->event_queue && sent < 100) {
struct crm_ipc_response_header *header = NULL;
struct iovec *event = c->event_queue->data;
rc = qb_ipcs_event_sendv(c->ipcs, event, 2);
if (rc < 0) {
break;
}
sent++;
header = event[0].iov_base;
if (header->size_compressed) {
crm_trace("Event %d to %p[%d] (%lld compressed bytes) sent",
header->qb.id, c->ipcs, c->pid, (long long) rc);
} else {
crm_trace("Event %d to %p[%d] (%lld bytes) sent: %.120s",
header->qb.id, c->ipcs, c->pid, (long long) rc,
(char *) (event[1].iov_base));
}
c->event_queue = g_list_remove(c->event_queue, event);
free(event[0].iov_base);
free(event[1].iov_base);
free(event);
}
queue_len -= sent;
if (sent > 0 || queue_len) {
crm_trace("Sent %d events (%d remaining) for %p[%d]: %s (%lld)",
sent, queue_len, c->ipcs, c->pid,
pcmk_strerror(rc < 0 ? rc : 0), (long long) rc);
}
if (queue_len) {
/* Allow clients to briefly fall behind on processing incoming messages,
* but drop completely unresponsive clients so the connection doesn't
* consume resources indefinitely.
*/
if (queue_len > QB_MAX(c->queue_max, PCMK_IPC_DEFAULT_QUEUE_MAX)) {
if ((c->queue_backlog <= 1) || (queue_len < c->queue_backlog)) {
/* Don't evict for a new or shrinking backlog */
crm_warn("Client with process ID %u has a backlog of %u messages "
CRM_XS " %p", c->pid, queue_len, c->ipcs);
} else {
crm_err("Evicting client with process ID %u due to backlog of %u messages "
CRM_XS " %p", c->pid, queue_len, c->ipcs);
c->queue_backlog = 0;
qb_ipcs_disconnect(c->ipcs);
return rc;
}
}
c->queue_backlog = queue_len;
delay_next_flush(c, queue_len);
} else {
/* Event queue is empty, there is no backlog */
c->queue_backlog = 0;
}
return rc;
}
ssize_t
crm_ipc_prepare(uint32_t request, xmlNode * message, struct iovec ** result, uint32_t max_send_size)
{
static unsigned int biggest = 0;
struct iovec *iov;
unsigned int total = 0;
char *compressed = NULL;
char *buffer = dump_xml_unformatted(message);
struct crm_ipc_response_header *header = calloc(1, sizeof(struct crm_ipc_response_header));
CRM_ASSERT(result != NULL);
crm_ipc_init();
if (max_send_size == 0) {
max_send_size = ipc_buffer_max;
}
CRM_LOG_ASSERT(max_send_size != 0);
*result = NULL;
iov = calloc(2, sizeof(struct iovec));
iov[0].iov_len = hdr_offset;
iov[0].iov_base = header;
header->version = PCMK_IPC_VERSION;
header->size_uncompressed = 1 + strlen(buffer);
total = iov[0].iov_len + header->size_uncompressed;
if (total < max_send_size) {
iov[1].iov_base = buffer;
iov[1].iov_len = header->size_uncompressed;
} else {
unsigned int new_size = 0;
if (crm_compress_string
(buffer, header->size_uncompressed, max_send_size, &compressed, &new_size)) {
header->flags |= crm_ipc_compressed;
header->size_compressed = new_size;
iov[1].iov_len = header->size_compressed;
iov[1].iov_base = compressed;
free(buffer);
biggest = QB_MAX(header->size_compressed, biggest);
} else {
ssize_t rc = -EMSGSIZE;
crm_log_xml_trace(message, "EMSGSIZE");
biggest = QB_MAX(header->size_uncompressed, biggest);
crm_err
("Could not compress the message (%u bytes) into less than the configured ipc limit (%u bytes). "
"Set PCMK_ipc_buffer to a higher value (%u bytes suggested)",
header->size_uncompressed, max_send_size, 4 * biggest);
free(compressed);
free(buffer);
free(header);
free(iov);
return rc;
}
}
header->qb.size = iov[0].iov_len + iov[1].iov_len;
header->qb.id = (int32_t)request; /* Replying to a specific request */
*result = iov;
CRM_ASSERT(header->qb.size > 0);
return header->qb.size;
}
ssize_t
crm_ipcs_sendv(crm_client_t * c, struct iovec * iov, enum crm_ipc_flags flags)
{
ssize_t rc;
static uint32_t id = 1;
struct crm_ipc_response_header *header = iov[0].iov_base;
if (c->flags & crm_client_flag_ipc_proxied) {
/* _ALL_ replies to proxied connections need to be sent as events */
if (is_not_set(flags, crm_ipc_server_event)) {
flags |= crm_ipc_server_event;
/* this flag lets us know this was originally meant to be a response.
* even though we're sending it over the event channel. */
flags |= crm_ipc_proxied_relay_response;
}
}
header->flags |= flags;
if (flags & crm_ipc_server_event) {
header->qb.id = id++; /* We don't really use it, but doesn't hurt to set one */
if (flags & crm_ipc_server_free) {
crm_trace("Sending the original to %p[%d]", c->ipcs, c->pid);
c->event_queue = g_list_append(c->event_queue, iov);
} else {
struct iovec *iov_copy = calloc(2, sizeof(struct iovec));
crm_trace("Sending a copy to %p[%d]", c->ipcs, c->pid);
iov_copy[0].iov_len = iov[0].iov_len;
iov_copy[0].iov_base = malloc(iov[0].iov_len);
memcpy(iov_copy[0].iov_base, iov[0].iov_base, iov[0].iov_len);
iov_copy[1].iov_len = iov[1].iov_len;
iov_copy[1].iov_base = malloc(iov[1].iov_len);
memcpy(iov_copy[1].iov_base, iov[1].iov_base, iov[1].iov_len);
c->event_queue = g_list_append(c->event_queue, iov_copy);
}
} else {
CRM_LOG_ASSERT(header->qb.id != 0); /* Replying to a specific request */
rc = qb_ipcs_response_sendv(c->ipcs, iov, 2);
if (rc < header->qb.size) {
crm_notice("Response %d to %p[%d] (%u bytes) failed: %s (%d)",
header->qb.id, c->ipcs, c->pid, header->qb.size, pcmk_strerror(rc), rc);
} else {
crm_trace("Response %d sent, %lld bytes to %p[%d]",
header->qb.id, (long long) rc, c->ipcs, c->pid);
}
if (flags & crm_ipc_server_free) {
free(iov[0].iov_base);
free(iov[1].iov_base);
free(iov);
}
}
if (flags & crm_ipc_server_event) {
rc = crm_ipcs_flush_events(c);
} else {
crm_ipcs_flush_events(c);
}
if (rc == -EPIPE || rc == -ENOTCONN) {
crm_trace("Client %p disconnected", c->ipcs);
}
return rc;
}
ssize_t
crm_ipcs_send(crm_client_t * c, uint32_t request, xmlNode * message,
enum crm_ipc_flags flags)
{
struct iovec *iov = NULL;
ssize_t rc = 0;
if(c == NULL) {
return -EDESTADDRREQ;
}
crm_ipc_init();
rc = crm_ipc_prepare(request, message, &iov, ipc_buffer_max);
if (rc > 0) {
rc = crm_ipcs_sendv(c, iov, flags | crm_ipc_server_free);
} else {
free(iov);
crm_notice("Message to %p[%d] failed: %s (%d)",
c->ipcs, c->pid, pcmk_strerror(rc), rc);
}
return rc;
}
void
crm_ipcs_send_ack(crm_client_t * c, uint32_t request, uint32_t flags, const char *tag, const char *function,
int line)
{
if (flags & crm_ipc_client_response) {
xmlNode *ack = create_xml_node(NULL, tag);
crm_trace("Ack'ing msg from %s (%p)", crm_client_name(c), c);
c->request_id = 0;
crm_xml_add(ack, "function", function);
crm_xml_add_int(ack, "line", line);
crm_ipcs_send(c, request, ack, flags);
free_xml(ack);
}
}
/* Client... */
#define MIN_MSG_SIZE 12336 /* sizeof(struct qb_ipc_connection_response) */
#define MAX_MSG_SIZE 128*1024 /* 128k default */
struct crm_ipc_s {
struct pollfd pfd;
/* the max size we can send/receive over ipc */
unsigned int max_buf_size;
/* Size of the allocated 'buffer' */
unsigned int buf_size;
int msg_size;
int need_reply;
char *buffer;
char *name;
uint32_t buffer_flags;
qb_ipcc_connection_t *ipc;
};
static unsigned int
pick_ipc_buffer(unsigned int max)
{
static unsigned int global_max = 0;
if (global_max == 0) {
const char *env = getenv("PCMK_ipc_buffer");
if (env) {
int env_max = crm_parse_int(env, "0");
global_max = (env_max > 0)? QB_MAX(MIN_MSG_SIZE, env_max) : MAX_MSG_SIZE;
} else {
global_max = MAX_MSG_SIZE;
}
}
return QB_MAX(max, global_max);
}
crm_ipc_t *
crm_ipc_new(const char *name, size_t max_size)
{
crm_ipc_t *client = NULL;
client = calloc(1, sizeof(crm_ipc_t));
client->name = strdup(name);
client->buf_size = pick_ipc_buffer(max_size);
client->buffer = malloc(client->buf_size);
/* Clients initiating connection pick the max buf size */
client->max_buf_size = client->buf_size;
client->pfd.fd = -1;
client->pfd.events = POLLIN;
client->pfd.revents = 0;
return client;
}
/*!
* \brief Establish an IPC connection to a Pacemaker component
*
* \param[in] client Connection instance obtained from crm_ipc_new()
*
- * \return TRUE on success, FALSE otherwise (in which case errno will be set)
+ * \return TRUE on success, FALSE otherwise (in which case errno will be set;
+ * specifically, in case of discovering the remote side is not
+ * authentic, its value is set to ECONNABORTED).
*/
bool
crm_ipc_connect(crm_ipc_t * client)
{
+ static uid_t cl_uid = 0;
+ static gid_t cl_gid = 0;
+ pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0;
+ int rv;
+
client->need_reply = FALSE;
client->ipc = qb_ipcc_connect(client->name, client->buf_size);
if (client->ipc == NULL) {
crm_debug("Could not establish %s connection: %s (%d)", client->name, pcmk_strerror(errno), errno);
return FALSE;
}
client->pfd.fd = crm_ipc_get_fd(client);
if (client->pfd.fd < 0) {
- crm_debug("Could not obtain file descriptor for %s connection: %s (%d)", client->name, pcmk_strerror(errno), errno);
+ rv = errno;
+ /* message already omitted */
+ crm_ipc_close(client);
+ errno = rv;
+ return FALSE;
+ }
+
+ if (!cl_uid && !cl_gid
+ && (rv = crm_user_lookup(CRM_DAEMON_USER, &cl_uid, &cl_gid)) < 0) {
+ errno = -rv;
+ /* message already omitted */
+ crm_ipc_close(client);
+ errno = -rv;
+ return FALSE;
+ }
+
+ if (!(rv = crm_ipc_is_authentic_process(client->pfd.fd, cl_uid, cl_gid,
+ &found_pid, &found_uid,
+ &found_gid))) {
+ crm_err("Daemon (IPC %s) is not authentic:"
+ " process %lld (uid: %lld, gid: %lld)",
+ client->name, (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
+ (long long) found_uid, (long long) found_gid);
+ crm_ipc_close(client);
+ errno = ECONNABORTED;
+ return FALSE;
+
+ } else if (rv < 0) {
+ errno = -rv;
+ crm_perror(LOG_ERR, "Could not verify authenticity of daemon (IPC %s)",
+ client->name);
+ crm_ipc_close(client);
+ errno = -rv;
return FALSE;
}
qb_ipcc_context_set(client->ipc, client);
#ifdef HAVE_IPCS_GET_BUFFER_SIZE
client->max_buf_size = qb_ipcc_get_buffer_size(client->ipc);
if (client->max_buf_size > client->buf_size) {
free(client->buffer);
client->buffer = calloc(1, client->max_buf_size);
client->buf_size = client->max_buf_size;
}
#endif
return TRUE;
}
void
crm_ipc_close(crm_ipc_t * client)
{
if (client) {
crm_trace("Disconnecting %s IPC connection %p (%p)", client->name, client, client->ipc);
if (client->ipc) {
qb_ipcc_connection_t *ipc = client->ipc;
client->ipc = NULL;
qb_ipcc_disconnect(ipc);
}
}
}
void
crm_ipc_destroy(crm_ipc_t * client)
{
if (client) {
if (client->ipc && qb_ipcc_is_connected(client->ipc)) {
crm_notice("Destroying an active IPC connection to %s", client->name);
/* The next line is basically unsafe
*
* If this connection was attached to mainloop and mainloop is active,
* the 'disconnected' callback will end up back here and we'll end
* up free'ing the memory twice - something that can still happen
* even without this if we destroy a connection and it closes before
* we call exit
*/
/* crm_ipc_close(client); */
}
crm_trace("Destroying IPC connection to %s: %p", client->name, client);
free(client->buffer);
free(client->name);
free(client);
}
}
int
crm_ipc_get_fd(crm_ipc_t * client)
{
int fd = 0;
if (client && client->ipc && (qb_ipcc_fd_get(client->ipc, &fd) == 0)) {
return fd;
}
errno = EINVAL;
crm_perror(LOG_ERR, "Could not obtain file IPC descriptor for %s",
(client? client->name : "unspecified client"));
return -errno;
}
bool
crm_ipc_connected(crm_ipc_t * client)
{
bool rc = FALSE;
if (client == NULL) {
crm_trace("No client");
return FALSE;
} else if (client->ipc == NULL) {
crm_trace("No connection");
return FALSE;
} else if (client->pfd.fd < 0) {
crm_trace("Bad descriptor");
return FALSE;
}
rc = qb_ipcc_is_connected(client->ipc);
if (rc == FALSE) {
client->pfd.fd = -EINVAL;
}
return rc;
}
/*!
* \brief Check whether an IPC connection is ready to be read
*
* \param[in] client Connection to check
*
* \return Positive value if ready to be read, 0 if not ready, -errno on error
*/
int
crm_ipc_ready(crm_ipc_t *client)
{
int rc;
CRM_ASSERT(client != NULL);
if (crm_ipc_connected(client) == FALSE) {
return -ENOTCONN;
}
client->pfd.revents = 0;
rc = poll(&(client->pfd), 1, 0);
return (rc < 0)? -errno : rc;
}
static int
crm_ipc_decompress(crm_ipc_t * client)
{
struct crm_ipc_response_header *header = (struct crm_ipc_response_header *)(void*)client->buffer;
if (header->size_compressed) {
int rc = 0;
unsigned int size_u = 1 + header->size_uncompressed;
/* never let buf size fall below our max size required for ipc reads. */
unsigned int new_buf_size = QB_MAX((hdr_offset + size_u), client->max_buf_size);
char *uncompressed = calloc(1, new_buf_size);
crm_trace("Decompressing message data %u bytes into %u bytes",
header->size_compressed, size_u);
rc = BZ2_bzBuffToBuffDecompress(uncompressed + hdr_offset, &size_u,
client->buffer + hdr_offset, header->size_compressed, 1, 0);
if (rc != BZ_OK) {
crm_err("Decompression failed: %s (%d)", bz2_strerror(rc), rc);
free(uncompressed);
return -EILSEQ;
}
/*
* This assert no longer holds true. For an identical msg, some clients may
* require compression, and others may not. If that same msg (event) is sent
* to multiple clients, it could result in some clients receiving a compressed
* msg even though compression was not explicitly required for them.
*
* CRM_ASSERT((header->size_uncompressed + hdr_offset) >= ipc_buffer_max);
*/
CRM_ASSERT(size_u == header->size_uncompressed);
memcpy(uncompressed, client->buffer, hdr_offset); /* Preserve the header */
header = (struct crm_ipc_response_header *)(void*)uncompressed;
free(client->buffer);
client->buf_size = new_buf_size;
client->buffer = uncompressed;
}
CRM_ASSERT(client->buffer[hdr_offset + header->size_uncompressed - 1] == 0);
return pcmk_ok;
}
long
crm_ipc_read(crm_ipc_t * client)
{
struct crm_ipc_response_header *header = NULL;
CRM_ASSERT(client != NULL);
CRM_ASSERT(client->ipc != NULL);
CRM_ASSERT(client->buffer != NULL);
crm_ipc_init();
client->buffer[0] = 0;
client->msg_size = qb_ipcc_event_recv(client->ipc, client->buffer,
client->buf_size, 0);
if (client->msg_size >= 0) {
int rc = crm_ipc_decompress(client);
if (rc != pcmk_ok) {
return rc;
}
header = (struct crm_ipc_response_header *)(void*)client->buffer;
if(header->version > PCMK_IPC_VERSION) {
crm_err("Filtering incompatible v%d IPC message, we only support versions <= %d",
header->version, PCMK_IPC_VERSION);
return -EBADMSG;
}
crm_trace("Received %s event %d, size=%u, rc=%d, text: %.100s",
client->name, header->qb.id, header->qb.size, client->msg_size,
client->buffer + hdr_offset);
} else {
crm_trace("No message from %s received: %s", client->name, pcmk_strerror(client->msg_size));
}
if (crm_ipc_connected(client) == FALSE || client->msg_size == -ENOTCONN) {
crm_err("Connection to %s failed", client->name);
}
if (header) {
/* Data excluding the header */
return header->size_uncompressed;
}
return -ENOMSG;
}
const char *
crm_ipc_buffer(crm_ipc_t * client)
{
CRM_ASSERT(client != NULL);
return client->buffer + sizeof(struct crm_ipc_response_header);
}
uint32_t
crm_ipc_buffer_flags(crm_ipc_t * client)
{
struct crm_ipc_response_header *header = NULL;
CRM_ASSERT(client != NULL);
if (client->buffer == NULL) {
return 0;
}
header = (struct crm_ipc_response_header *)(void*)client->buffer;
return header->flags;
}
const char *
crm_ipc_name(crm_ipc_t * client)
{
CRM_ASSERT(client != NULL);
return client->name;
}
static int
internal_ipc_send_recv(crm_ipc_t * client, const void *iov)
{
int rc = 0;
do {
rc = qb_ipcc_sendv_recv(client->ipc, iov, 2, client->buffer, client->buf_size, -1);
} while (rc == -EAGAIN && crm_ipc_connected(client));
return rc;
}
static int
internal_ipc_send_request(crm_ipc_t * client, const void *iov, int ms_timeout)
{
int rc = 0;
time_t timeout = time(NULL) + 1 + (ms_timeout / 1000);
do {
rc = qb_ipcc_sendv(client->ipc, iov, 2);
} while (rc == -EAGAIN && time(NULL) < timeout && crm_ipc_connected(client));
return rc;
}
static int
internal_ipc_get_reply(crm_ipc_t * client, int request_id, int ms_timeout)
{
time_t timeout = time(NULL) + 1 + (ms_timeout / 1000);
int rc = 0;
crm_ipc_init();
/* get the reply */
crm_trace("client %s waiting on reply to msg id %d", client->name, request_id);
do {
rc = qb_ipcc_recv(client->ipc, client->buffer, client->buf_size, 1000);
if (rc > 0) {
struct crm_ipc_response_header *hdr = NULL;
int rc = crm_ipc_decompress(client);
if (rc != pcmk_ok) {
return rc;
}
hdr = (struct crm_ipc_response_header *)(void*)client->buffer;
if (hdr->qb.id == request_id) {
/* Got it */
break;
} else if (hdr->qb.id < request_id) {
xmlNode *bad = string2xml(crm_ipc_buffer(client));
crm_err("Discarding old reply %d (need %d)", hdr->qb.id, request_id);
crm_log_xml_notice(bad, "OldIpcReply");
} else {
xmlNode *bad = string2xml(crm_ipc_buffer(client));
crm_err("Discarding newer reply %d (need %d)", hdr->qb.id, request_id);
crm_log_xml_notice(bad, "ImpossibleReply");
CRM_ASSERT(hdr->qb.id <= request_id);
}
} else if (crm_ipc_connected(client) == FALSE) {
crm_err("Server disconnected client %s while waiting for msg id %d", client->name,
request_id);
break;
}
} while (time(NULL) < timeout);
return rc;
}
int
crm_ipc_send(crm_ipc_t * client, xmlNode * message, enum crm_ipc_flags flags, int32_t ms_timeout,
xmlNode ** reply)
{
long rc = 0;
struct iovec *iov;
static uint32_t id = 0;
static int factor = 8;
struct crm_ipc_response_header *header;
crm_ipc_init();
if (client == NULL) {
crm_notice("Invalid connection");
return -ENOTCONN;
} else if (crm_ipc_connected(client) == FALSE) {
/* Don't even bother */
crm_notice("Connection to %s closed", client->name);
return -ENOTCONN;
}
if (ms_timeout == 0) {
ms_timeout = 5000;
}
if (client->need_reply) {
crm_trace("Trying again to obtain pending reply from %s", client->name);
rc = qb_ipcc_recv(client->ipc, client->buffer, client->buf_size, ms_timeout);
if (rc < 0) {
crm_warn("Sending to %s (%p) is disabled until pending reply is received", client->name,
client->ipc);
return -EALREADY;
} else {
crm_notice("Lost reply from %s (%p) finally arrived, sending re-enabled", client->name,
client->ipc);
client->need_reply = FALSE;
}
}
id++;
CRM_LOG_ASSERT(id != 0); /* Crude wrap-around detection */
rc = crm_ipc_prepare(id, message, &iov, client->max_buf_size);
if(rc < 0) {
return rc;
}
header = iov[0].iov_base;
header->flags |= flags;
if(is_set(flags, crm_ipc_proxied)) {
/* Don't look for a synchronous response */
clear_bit(flags, crm_ipc_client_response);
}
if(header->size_compressed) {
if(factor < 10 && (client->max_buf_size / 10) < (rc / factor)) {
crm_notice("Compressed message exceeds %d0%% of the configured ipc limit (%u bytes), "
"consider setting PCMK_ipc_buffer to %u or higher",
factor, client->max_buf_size, 2 * client->max_buf_size);
factor++;
}
}
crm_trace("Sending from client: %s request id: %d bytes: %u timeout:%d msg...",
client->name, header->qb.id, header->qb.size, ms_timeout);
if (ms_timeout > 0 || is_not_set(flags, crm_ipc_client_response)) {
rc = internal_ipc_send_request(client, iov, ms_timeout);
if (rc <= 0) {
crm_trace("Failed to send from client %s request %d with %u bytes...",
client->name, header->qb.id, header->qb.size);
goto send_cleanup;
} else if (is_not_set(flags, crm_ipc_client_response)) {
crm_trace("Message sent, not waiting for reply to %d from %s to %u bytes...",
header->qb.id, client->name, header->qb.size);
goto send_cleanup;
}
rc = internal_ipc_get_reply(client, header->qb.id, ms_timeout);
if (rc < 0) {
/* No reply, for now, disable sending
*
* The alternative is to close the connection since we don't know
* how to detect and discard out-of-sequence replies
*
* TODO - implement the above
*/
client->need_reply = TRUE;
}
} else {
rc = internal_ipc_send_recv(client, iov);
}
if (rc > 0) {
struct crm_ipc_response_header *hdr = (struct crm_ipc_response_header *)(void*)client->buffer;
crm_trace("Received response %d, size=%u, rc=%ld, text: %.200s", hdr->qb.id, hdr->qb.size,
rc, crm_ipc_buffer(client));
if (reply) {
*reply = string2xml(crm_ipc_buffer(client));
}
} else {
crm_trace("Response not received: rc=%ld, errno=%d", rc, errno);
}
send_cleanup:
if (crm_ipc_connected(client) == FALSE) {
crm_notice("Connection to %s closed: %s (%ld)", client->name, pcmk_strerror(rc), rc);
} else if (rc == -ETIMEDOUT) {
crm_warn("Request %d to %s (%p) failed: %s (%ld) after %dms",
header->qb.id, client->name, client->ipc, pcmk_strerror(rc), rc, ms_timeout);
crm_write_blackbox(0, NULL);
} else if (rc <= 0) {
crm_warn("Request %d to %s (%p) failed: %s (%ld)",
header->qb.id, client->name, client->ipc, pcmk_strerror(rc), rc);
}
free(header);
free(iov[1].iov_base);
free(iov);
return rc;
}
+int
+crm_ipc_is_authentic_process(int sock, uid_t refuid, gid_t refgid,
+ pid_t *gotpid, uid_t *gotuid, gid_t *gotgid) {
+ int ret = 0;
+ pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0;
+#if defined(US_AUTH_PEERCRED_UCRED)
+ struct ucred ucred;
+ socklen_t ucred_len = sizeof(ucred);
+
+ if (!getsockopt(sock, SOL_SOCKET, SO_PEERCRED,
+ &ucred, &ucred_len)
+ && ucred_len == sizeof(ucred)) {
+ found_pid = ucred.pid; found_uid = ucred.uid; found_gid = ucred.gid;
+
+#elif defined(US_AUTH_PEERCRED_SOCKPEERCRED)
+ struct sockpeercred sockpeercred;
+ socklen_t sockpeercred_len = sizeof(sockpeercred);
+
+ if (!getsockopt(sock, SOL_SOCKET, SO_PEERCRED,
+ &sockpeercred, &sockpeercred_len)
+ && sockpeercred_len == sizeof(sockpeercred_len)) {
+ found_pid = sockpeercred.pid;
+ found_uid = sockpeercred.uid; found_gid = sockpeercred.gid;
+
+#elif defined(US_AUTH_GETPEEREID)
+ if (!getpeereid(sock, &found_uid, &found_gid)) {
+ found_pid = PCMK__SPECIAL_PID; /* cannot obtain PID (FreeBSD) */
+
+#elif defined(US_AUTH_GETPEERUCRED)
+ ucred_t *ucred;
+ if (!getpeerucred(sock, &ucred)) {
+ errno = 0;
+ found_pid = ucred_getpid(ucred);
+ found_uid = ucred_geteuid(ucred); found_gid = ucred_getegid(ucred);
+ ret = -errno;
+ ucred_free(ucred);
+ if (ret) {
+ return (ret < 0) ? ret : -pcmk_err_generic;
+ }
+
+#else
+# error "No way to authenticate a Unix socket peer"
+ errno = 0;
+ if (0) {
+#endif
+ if (gotpid != NULL) {
+ *gotpid = found_pid;
+ }
+ if (gotuid != NULL) {
+ *gotuid = found_uid;
+ }
+ if (gotgid != NULL) {
+ *gotgid = found_gid;
+ }
+ ret = (found_uid == 0 || found_uid == refuid || found_gid == refgid);
+ } else {
+ ret = (errno > 0) ? -errno : -pcmk_err_generic;
+ }
+
+ return ret;
+}
+
+int
+pcmk__ipc_is_authentic_process_active(const char *name, uid_t refuid,
+ gid_t refgid, pid_t *gotpid) {
+ static char last_asked_name[PATH_MAX / 2] = ""; /* log spam prevention */
+ int fd, ret = 0;
+ pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0;
+ qb_ipcc_connection_t *c;
+
+ if ((c = qb_ipcc_connect(name, 0)) == NULL) {
+ crm_info("Could not connect to %s IPC: %s", name, strerror(errno));
+
+ } else if ((ret = qb_ipcc_fd_get(c, &fd))) {
+ crm_err("Could not get fd from %s IPC: %s (%d)", name,
+ strerror(-ret), -ret);
+ ret = -1;
+
+ } else if ((ret = crm_ipc_is_authentic_process(fd, refuid, refgid,
+ &found_pid, &found_uid,
+ &found_gid)) < 0) {
+ if (ret == -pcmk_err_generic) {
+ crm_err("Could not get peer credentials from %s IPC", name);
+ } else {
+ crm_err("Could not get peer credentials from %s IPC: %s (%d)",
+ name, strerror(-ret), -ret);
+ }
+ ret = -1;
+
+ } else {
+ if (gotpid != NULL) {
+ *gotpid = found_pid;
+ }
+
+ if (!ret) {
+ crm_err("Daemon (IPC %s) effectively blocked with unauthorized"
+ " process %lld (uid: %lld, gid: %lld)",
+ name, (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
+ (long long) found_uid, (long long) found_gid);
+ ret = -2;
+ } else if ((found_uid != refuid || found_gid != refgid)
+ && strncmp(last_asked_name, name, sizeof(last_asked_name))) {
+ if (!found_uid && refuid) {
+ crm_warn("Daemon (IPC %s) runs as root, whereas the expected"
+ " credentials are %lld:%lld, hazard of violating"
+ " the least privilege principle",
+ name, (long long) refuid, (long long) refgid);
+ } else {
+ crm_notice("Daemon (IPC %s) runs as %lld:%lld, whereas the"
+ " expected credentials are %lld:%lld, which may"
+ " mean a different set of privileges than expected",
+ name, (long long) found_uid, (long long) found_gid,
+ (long long) refuid, (long long) refgid);
+ }
+ memccpy(last_asked_name, name, '\0', sizeof(last_asked_name));
+ }
+ }
+
+ if (ret) { /* here, !ret only when we could not initially connect */
+ qb_ipcc_disconnect(c);
+ }
+
+ return ret;
+}
+
+
/* Utils */
xmlNode *
create_hello_message(const char *uuid,
const char *client_name, const char *major_version, const char *minor_version)
{
xmlNode *hello_node = NULL;
xmlNode *hello = NULL;
if (uuid == NULL || strlen(uuid) == 0
|| client_name == NULL || strlen(client_name) == 0
|| major_version == NULL || strlen(major_version) == 0
|| minor_version == NULL || strlen(minor_version) == 0) {
crm_err("Missing fields, Hello message will not be valid.");
return NULL;
}
hello_node = create_xml_node(NULL, XML_TAG_OPTIONS);
crm_xml_add(hello_node, "major_version", major_version);
crm_xml_add(hello_node, "minor_version", minor_version);
crm_xml_add(hello_node, "client_name", client_name);
crm_xml_add(hello_node, "client_uuid", uuid);
crm_trace("creating hello message");
hello = create_request(CRM_OP_HELLO, hello_node, NULL, NULL, client_name, uuid);
free_xml(hello_node);
return hello;
}
diff --git a/lib/common/utils.c b/lib/common/utils.c
index f3f60ed58f..2ac7901b47 100644
--- a/lib/common/utils.c
+++ b/lib/common/utils.c
@@ -1,1345 +1,1353 @@
/*
- * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
+ * Copyright 2004-2019 the Pacemaker project contributors
*
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
+ * The version control history for this file may have further details.
*
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <dlfcn.h>
#ifndef _GNU_SOURCE
# define _GNU_SOURCE
#endif
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <sys/utsname.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <limits.h>
#include <pwd.h>
#include <time.h>
#include <libgen.h>
#include <signal.h>
#include <qb/qbdefs.h>
#include <crm/crm.h>
#include <crm/services.h>
#include <crm/msg_xml.h>
#include <crm/cib/internal.h>
#include <crm/common/xml.h>
#include <crm/common/util.h>
#include <crm/common/ipc.h>
#include <crm/common/iso8601.h>
#include <crm/common/mainloop.h>
#include <libxml2/libxml/relaxng.h>
#ifndef MAXLINE
# define MAXLINE 512
#endif
#ifdef HAVE_GETOPT_H
# include <getopt.h>
#endif
#ifndef PW_BUFFER_LEN
# define PW_BUFFER_LEN 500
#endif
CRM_TRACE_INIT_DATA(common);
gboolean crm_config_error = FALSE;
gboolean crm_config_warning = FALSE;
char *crm_system_name = NULL;
int node_score_red = 0;
int node_score_green = 0;
int node_score_yellow = 0;
int node_score_infinity = INFINITY;
static struct crm_option *crm_long_options = NULL;
static const char *crm_app_description = NULL;
static char *crm_short_options = NULL;
static const char *crm_app_usage = NULL;
int
crm_exit(int rc)
{
mainloop_cleanup();
#if HAVE_LIBXML2
crm_trace("cleaning up libxml");
crm_xml_cleanup();
#endif
crm_trace("exit %d", rc);
qb_log_fini();
free(crm_short_options);
free(crm_system_name);
exit(ABS(rc)); /* Always exit with a positive value so that it can be passed to crm_error
*
* Otherwise the system wraps it around and people
* have to jump through hoops figuring out what the
* error was
*/
return rc; /* Can never happen, but allows return crm_exit(rc)
* where "return rc" was used previously - which
* keeps compilers happy.
*/
}
gboolean
check_time(const char *value)
{
if (crm_get_msec(value) < 5000) {
return FALSE;
}
return TRUE;
}
gboolean
check_timer(const char *value)
{
if (crm_get_msec(value) < 0) {
return FALSE;
}
return TRUE;
}
gboolean
check_boolean(const char *value)
{
int tmp = FALSE;
if (crm_str_to_boolean(value, &tmp) != 1) {
return FALSE;
}
return TRUE;
}
gboolean
check_number(const char *value)
{
errno = 0;
if (value == NULL) {
return FALSE;
} else if (safe_str_eq(value, MINUS_INFINITY_S)) {
} else if (safe_str_eq(value, INFINITY_S)) {
} else {
crm_int_helper(value, NULL);
}
if (errno != 0) {
return FALSE;
}
return TRUE;
}
gboolean
check_positive_number(const char* value)
{
if (safe_str_eq(value, INFINITY_S) || (crm_int_helper(value, NULL))) {
return TRUE;
}
return FALSE;
}
gboolean
check_quorum(const char *value)
{
if (safe_str_eq(value, "stop")) {
return TRUE;
} else if (safe_str_eq(value, "freeze")) {
return TRUE;
} else if (safe_str_eq(value, "ignore")) {
return TRUE;
} else if (safe_str_eq(value, "suicide")) {
return TRUE;
}
return FALSE;
}
gboolean
check_script(const char *value)
{
struct stat st;
if(safe_str_eq(value, "/dev/null")) {
return TRUE;
}
if(stat(value, &st) != 0) {
crm_err("Script %s does not exist", value);
return FALSE;
}
if(S_ISREG(st.st_mode) == 0) {
crm_err("Script %s is not a regular file", value);
return FALSE;
}
if( (st.st_mode & (S_IXUSR | S_IXGRP )) == 0) {
crm_err("Script %s is not executable", value);
return FALSE;
}
return TRUE;
}
gboolean
check_utilization(const char *value)
{
char *end = NULL;
long number = strtol(value, &end, 10);
if(end && end[0] != '%') {
return FALSE;
} else if(number < 0) {
return FALSE;
}
return TRUE;
}
int
char2score(const char *score)
{
int score_f = 0;
if (score == NULL) {
} else if (safe_str_eq(score, MINUS_INFINITY_S)) {
score_f = -node_score_infinity;
} else if (safe_str_eq(score, INFINITY_S)) {
score_f = node_score_infinity;
} else if (safe_str_eq(score, "+" INFINITY_S)) {
score_f = node_score_infinity;
} else if (safe_str_eq(score, "red")) {
score_f = node_score_red;
} else if (safe_str_eq(score, "yellow")) {
score_f = node_score_yellow;
} else if (safe_str_eq(score, "green")) {
score_f = node_score_green;
} else {
score_f = crm_parse_int(score, NULL);
if (score_f > 0 && score_f > node_score_infinity) {
score_f = node_score_infinity;
} else if (score_f < 0 && score_f < -node_score_infinity) {
score_f = -node_score_infinity;
}
}
return score_f;
}
char *
score2char_stack(int score, char *buf, size_t len)
{
if (score >= node_score_infinity) {
strncpy(buf, INFINITY_S, 9);
} else if (score <= -node_score_infinity) {
strncpy(buf, MINUS_INFINITY_S , 10);
} else {
return crm_itoa_stack(score, buf, len);
}
return buf;
}
char *
score2char(int score)
{
if (score >= node_score_infinity) {
return strdup(INFINITY_S);
} else if (score <= -node_score_infinity) {
return strdup("-" INFINITY_S);
}
return crm_itoa(score);
}
const char *
cluster_option(GHashTable * options, gboolean(*validate) (const char *),
const char *name, const char *old_name, const char *def_value)
{
const char *value = NULL;
char *new_value = NULL;
CRM_ASSERT(name != NULL);
if (options) {
value = g_hash_table_lookup(options, name);
if ((value == NULL) && old_name) {
value = g_hash_table_lookup(options, old_name);
if (value != NULL) {
crm_config_warn("Support for legacy name '%s' for cluster option '%s'"
" is deprecated and will be removed in a future release",
old_name, name);
// Inserting copy with current name ensures we only warn once
new_value = strdup(value);
g_hash_table_insert(options, strdup(name), new_value);
value = new_value;
}
}
if (value && validate && (validate(value) == FALSE)) {
crm_config_err("Resetting cluster option '%s' to default: value '%s' is invalid",
name, value);
value = NULL;
}
if (value) {
return value;
}
}
// No value found, use default
value = def_value;
if (value == NULL) {
crm_trace("No value or default provided for cluster option '%s'",
name);
return NULL;
}
if (validate) {
CRM_CHECK(validate(value) != FALSE,
crm_err("Bug: default value for cluster option '%s' is invalid", name);
return NULL);
}
crm_trace("Using default value '%s' for cluster option '%s'",
value, name);
if (options) {
new_value = strdup(value);
g_hash_table_insert(options, strdup(name), new_value);
value = new_value;
}
return value;
}
const char *
get_cluster_pref(GHashTable * options, pe_cluster_option * option_list, int len, const char *name)
{
const char *value = NULL;
for (int lpc = 0; lpc < len; lpc++) {
if (safe_str_eq(name, option_list[lpc].name)) {
value = cluster_option(options,
option_list[lpc].is_valid,
option_list[lpc].name,
option_list[lpc].alt_name,
option_list[lpc].default_value);
return value;
}
}
CRM_CHECK(FALSE, crm_err("Bug: looking for unknown option '%s'", name));
return NULL;
}
void
config_metadata(const char *name, const char *version, const char *desc_short,
const char *desc_long, pe_cluster_option * option_list, int len)
{
int lpc = 0;
fprintf(stdout, "<?xml version=\"1.0\"?>"
"<!DOCTYPE resource-agent SYSTEM \"ra-api-1.dtd\">\n"
"<resource-agent name=\"%s\">\n"
" <version>%s</version>\n"
" <longdesc lang=\"en\">%s</longdesc>\n"
" <shortdesc lang=\"en\">%s</shortdesc>\n"
" <parameters>\n", name, version, desc_long, desc_short);
for (lpc = 0; lpc < len; lpc++) {
if (option_list[lpc].description_long == NULL && option_list[lpc].description_short == NULL) {
continue;
}
fprintf(stdout, " <parameter name=\"%s\" unique=\"0\">\n"
" <shortdesc lang=\"en\">%s</shortdesc>\n"
" <content type=\"%s\" default=\"%s\"/>\n"
" <longdesc lang=\"en\">%s%s%s</longdesc>\n"
" </parameter>\n",
option_list[lpc].name,
option_list[lpc].description_short,
option_list[lpc].type,
option_list[lpc].default_value,
option_list[lpc].description_long ? option_list[lpc].
description_long : option_list[lpc].description_short,
option_list[lpc].values ? " Allowed values: " : "",
option_list[lpc].values ? option_list[lpc].values : "");
}
fprintf(stdout, " </parameters>\n</resource-agent>\n");
}
void
verify_all_options(GHashTable * options, pe_cluster_option * option_list, int len)
{
int lpc = 0;
for (lpc = 0; lpc < len; lpc++) {
cluster_option(options,
option_list[lpc].is_valid,
option_list[lpc].name,
option_list[lpc].alt_name, option_list[lpc].default_value);
}
}
char *
generate_hash_key(const char *crm_msg_reference, const char *sys)
{
char *hash_key = crm_concat(sys ? sys : "none", crm_msg_reference, '_');
crm_trace("created hash key: (%s)", hash_key);
return hash_key;
}
int
crm_user_lookup(const char *name, uid_t * uid, gid_t * gid)
{
int rc = pcmk_ok;
char *buffer = NULL;
struct passwd pwd;
struct passwd *pwentry = NULL;
buffer = calloc(1, PW_BUFFER_LEN);
rc = getpwnam_r(name, &pwd, buffer, PW_BUFFER_LEN, &pwentry);
if (pwentry) {
if (uid) {
*uid = pwentry->pw_uid;
}
if (gid) {
*gid = pwentry->pw_gid;
}
crm_trace("User %s has uid=%d gid=%d", name, pwentry->pw_uid, pwentry->pw_gid);
} else {
rc = rc? -rc : -EINVAL;
crm_info("User %s lookup: %s", name, pcmk_strerror(rc));
}
free(buffer);
return rc;
}
static int
crm_version_helper(const char *text, char **end_text)
{
int atoi_result = -1;
CRM_ASSERT(end_text != NULL);
errno = 0;
if (text != NULL && text[0] != 0) {
atoi_result = (int)strtol(text, end_text, 10);
if (errno == EINVAL) {
crm_err("Conversion of '%s' %c failed", text, text[0]);
atoi_result = -1;
}
}
return atoi_result;
}
/*
* version1 < version2 : -1
* version1 = version2 : 0
* version1 > version2 : 1
*/
int
compare_version(const char *version1, const char *version2)
{
int rc = 0;
int lpc = 0;
char *ver1_copy = NULL, *ver2_copy = NULL;
char *rest1 = NULL, *rest2 = NULL;
if (version1 == NULL && version2 == NULL) {
return 0;
} else if (version1 == NULL) {
return -1;
} else if (version2 == NULL) {
return 1;
}
ver1_copy = strdup(version1);
ver2_copy = strdup(version2);
rest1 = ver1_copy;
rest2 = ver2_copy;
while (1) {
int digit1 = 0;
int digit2 = 0;
lpc++;
if (rest1 == rest2) {
break;
}
if (rest1 != NULL) {
digit1 = crm_version_helper(rest1, &rest1);
}
if (rest2 != NULL) {
digit2 = crm_version_helper(rest2, &rest2);
}
if (digit1 < digit2) {
rc = -1;
break;
} else if (digit1 > digit2) {
rc = 1;
break;
}
if (rest1 != NULL && rest1[0] == '.') {
rest1++;
}
if (rest1 != NULL && rest1[0] == 0) {
rest1 = NULL;
}
if (rest2 != NULL && rest2[0] == '.') {
rest2++;
}
if (rest2 != NULL && rest2[0] == 0) {
rest2 = NULL;
}
}
free(ver1_copy);
free(ver2_copy);
if (rc == 0) {
crm_trace("%s == %s (%d)", version1, version2, lpc);
} else if (rc < 0) {
crm_trace("%s < %s (%d)", version1, version2, lpc);
} else if (rc > 0) {
crm_trace("%s > %s (%d)", version1, version2, lpc);
}
return rc;
}
gboolean do_stderr = FALSE;
#ifndef NUMCHARS
# define NUMCHARS "0123456789."
#endif
#ifndef WHITESPACE
# define WHITESPACE " \t\n\r\f"
#endif
unsigned long long
crm_get_interval(const char *input)
{
unsigned long long msec = 0;
if (input == NULL) {
return msec;
} else if (input[0] != 'P') {
long long tmp = crm_get_msec(input);
if(tmp > 0) {
msec = tmp;
}
} else {
crm_time_t *interval = crm_time_parse_duration(input);
msec = 1000 * crm_time_get_seconds(interval);
crm_time_free(interval);
}
return msec;
}
long long
crm_get_msec(const char *input)
{
const char *cp = input;
const char *units;
long long multiplier = 1000;
long long divisor = 1;
long long msec = -1;
char *end_text = NULL;
/* double dret; */
if (input == NULL) {
return msec;
}
cp += strspn(cp, WHITESPACE);
units = cp + strspn(cp, NUMCHARS);
units += strspn(units, WHITESPACE);
if (strchr(NUMCHARS, *cp) == NULL) {
return msec;
}
if (strncasecmp(units, "ms", 2) == 0 || strncasecmp(units, "msec", 4) == 0) {
multiplier = 1;
divisor = 1;
} else if (strncasecmp(units, "us", 2) == 0 || strncasecmp(units, "usec", 4) == 0) {
multiplier = 1;
divisor = 1000;
} else if (strncasecmp(units, "s", 1) == 0 || strncasecmp(units, "sec", 3) == 0) {
multiplier = 1000;
divisor = 1;
} else if (strncasecmp(units, "m", 1) == 0 || strncasecmp(units, "min", 3) == 0) {
multiplier = 60 * 1000;
divisor = 1;
} else if (strncasecmp(units, "h", 1) == 0 || strncasecmp(units, "hr", 2) == 0) {
multiplier = 60 * 60 * 1000;
divisor = 1;
} else if (*units != EOS && *units != '\n' && *units != '\r') {
return msec;
}
msec = crm_int_helper(cp, &end_text);
if (msec > LLONG_MAX/multiplier) {
/* arithmetics overflow while multiplier/divisor mutually exclusive */
return LLONG_MAX;
}
msec *= multiplier;
msec /= divisor;
/* dret += 0.5; */
/* msec = (long long)dret; */
return msec;
}
extern bool crm_is_daemon;
/* coverity[+kill] */
void
crm_abort(const char *file, const char *function, int line,
const char *assert_condition, gboolean do_core, gboolean do_fork)
{
int rc = 0;
int pid = 0;
int status = 0;
/* Implied by the parent's error logging below */
/* crm_write_blackbox(0); */
if(crm_is_daemon == FALSE) {
/* This is a command line tool - do not fork */
/* crm_add_logfile(NULL); * Record it to a file? */
crm_enable_stderr(TRUE); /* Make sure stderr is enabled so we can tell the caller */
do_fork = FALSE; /* Just crash if needed */
}
if (do_core == FALSE) {
crm_err("%s: Triggered assert at %s:%d : %s", function, file, line, assert_condition);
return;
} else if (do_fork) {
pid = fork();
} else {
crm_err("%s: Triggered fatal assert at %s:%d : %s", function, file, line, assert_condition);
}
if (pid == -1) {
crm_crit("%s: Cannot create core for non-fatal assert at %s:%d : %s",
function, file, line, assert_condition);
return;
} else if(pid == 0) {
/* Child process */
abort();
return;
}
/* Parent process */
crm_err("%s: Forked child %d to record non-fatal assert at %s:%d : %s",
function, pid, file, line, assert_condition);
crm_write_blackbox(SIGTRAP, NULL);
do {
rc = waitpid(pid, &status, 0);
if(rc == pid) {
return; /* Job done */
}
} while(errno == EINTR);
if (errno == ECHILD) {
/* crm_mon does this */
crm_trace("Cannot wait on forked child %d - SIGCHLD is probably set to SIG_IGN", pid);
return;
}
crm_perror(LOG_ERR, "Cannot wait on forked child %d", pid);
}
int
crm_pid_active(long pid, const char *daemon)
{
+ static int last_asked_pid = 0; /* log spam prevention */
+#if SUPPORT_PROCFS
static int have_proc_pid = 0;
+#else
+ static int have_proc_pid = -1;
+#endif
+ int rc = 0;
- if(have_proc_pid == 0) {
+ if (have_proc_pid == 0) {
+ /* evaluation of /proc/PID/exe applicability via self-introspection */
char proc_path[PATH_MAX], exe_path[PATH_MAX];
-
- /* check to make sure pid hasn't been reused by another process */
- snprintf(proc_path, sizeof(proc_path), "/proc/%lu/exe", (long unsigned int)getpid());
-
+ snprintf(proc_path, sizeof(proc_path), "/proc/%lu/exe",
+ (long unsigned int) getpid());
have_proc_pid = 1;
- if(readlink(proc_path, exe_path, PATH_MAX - 1) < 0) {
+ if (readlink(proc_path, exe_path, sizeof(exe_path) - 1) < 0) {
have_proc_pid = -1;
}
}
if (pid <= 0) {
return -1;
- } else if (kill(pid, 0) < 0 && errno == ESRCH) {
- return 0;
+ } else if ((rc = kill(pid, 0)) < 0 && errno == ESRCH) {
+ return 0; /* no such PID detected */
- } else if(daemon == NULL || have_proc_pid == -1) {
- return 1;
+ } else if (rc < 0 && have_proc_pid == -1) {
+ if (last_asked_pid != pid) {
+ crm_info("Cannot examine PID %ld: %s", pid, strerror(errno));
+ last_asked_pid = pid;
+ }
+ return -2; /* errno != ESRCH */
+
+ } else if (rc == 0 && (daemon == NULL || have_proc_pid == -1)) {
+ return 1; /* kill as the only indicator, cannot double check */
} else {
- int rc = 0;
+ /* make sure PID hasn't been reused by another process
+ XXX: might still be just a zombie, which could confuse decisions */
+ bool checked_through_kill = (rc == 0);
char proc_path[PATH_MAX], exe_path[PATH_MAX], myexe_path[PATH_MAX];
-
- /* check to make sure pid hasn't been reused by another process */
- snprintf(proc_path, sizeof(proc_path), "/proc/%lu/exe", pid);
-
- rc = readlink(proc_path, exe_path, PATH_MAX - 1);
- if (rc < 0 && errno == EACCES) {
- crm_perror(LOG_INFO, "Could not read from %s", proc_path);
- return 1;
+ snprintf(proc_path, sizeof(proc_path), "/proc/%ld/exe", pid);
+
+ rc = readlink(proc_path, exe_path, sizeof(exe_path) - 1);
+ if ((rc < 0) && (errno == EACCES)) {
+ if (last_asked_pid != pid) {
+ crm_info("Could not read from %s: %s", proc_path,
+ strerror(errno));
+ last_asked_pid = pid;
+ }
+ return checked_through_kill ? 1 : -2;
} else if (rc < 0) {
- crm_perror(LOG_ERR, "Could not read from %s", proc_path);
- return 0;
+ if (last_asked_pid != pid) {
+ crm_err("Could not read from %s: %s (%d)", proc_path,
+ strerror(errno), errno);
+ last_asked_pid = pid;
+ }
+ return 0; /* most likely errno == ENOENT */
}
-
+ exe_path[rc] = '\0';
- exe_path[rc] = 0;
-
- if(daemon[0] != '/') {
- rc = snprintf(myexe_path, sizeof(proc_path), CRM_DAEMON_DIR"/%s", daemon);
- myexe_path[rc] = 0;
+ if (daemon[0] != '/') {
+ rc = snprintf(myexe_path, sizeof(myexe_path), CRM_DAEMON_DIR"/%s",
+ daemon);
} else {
- rc = snprintf(myexe_path, sizeof(proc_path), "%s", daemon);
- myexe_path[rc] = 0;
+ rc = snprintf(myexe_path, sizeof(myexe_path), "%s", daemon);
}
-
- if (strcmp(exe_path, myexe_path) == 0) {
+
+ if (rc > 0 && rc < sizeof(myexe_path) && !strcmp(exe_path, myexe_path)) {
return 1;
}
}
return 0;
}
#define LOCKSTRLEN 11
long
crm_read_pidfile(const char *filename)
{
int fd;
struct stat sbuf;
long pid = -ENOENT;
char buf[LOCKSTRLEN + 1];
if ((fd = open(filename, O_RDONLY)) < 0) {
goto bail;
}
if (fstat(fd, &sbuf) >= 0 && sbuf.st_size < LOCKSTRLEN) {
sleep(2); /* if someone was about to create one,
* give'm a sec to do so
*/
}
if (read(fd, buf, sizeof(buf)) < 1) {
goto bail;
}
if (sscanf(buf, "%lu", &pid) > 0) {
if (pid <= 0) {
pid = -ESRCH;
} else {
crm_trace("Got pid %lu from %s\n", pid, filename);
}
}
bail:
if (fd >= 0) {
close(fd);
}
return pid;
}
long
crm_pidfile_inuse(const char *filename, long mypid, const char *daemon)
{
long pid = crm_read_pidfile(filename);
if (pid < 2) {
/* Invalid pid */
pid = -ENOENT;
unlink(filename);
} else if (mypid && pid == mypid) {
/* In use by us */
pid = pcmk_ok;
} else if (crm_pid_active(pid, daemon) == FALSE) {
/* Contains a stale value */
unlink(filename);
pid = -ENOENT;
} else if (mypid && pid != mypid) {
/* locked by existing process - give up */
pid = -EEXIST;
}
return pid;
}
static int
crm_lock_pidfile(const char *filename, const char *name)
{
long mypid = 0;
int fd = 0, rc = 0;
char buf[LOCKSTRLEN + 1];
mypid = (unsigned long)getpid();
rc = crm_pidfile_inuse(filename, 0, name);
if (rc == -ENOENT) {
/* exists but the process is not active */
} else if (rc != pcmk_ok) {
/* locked by existing process - give up */
return rc;
}
if ((fd = open(filename, O_CREAT | O_WRONLY | O_EXCL, 0644)) < 0) {
/* Hmmh, why did we fail? Anyway, nothing we can do about it */
return -errno;
}
snprintf(buf, sizeof(buf), "%*lu\n", LOCKSTRLEN - 1, mypid);
rc = write(fd, buf, LOCKSTRLEN);
close(fd);
if (rc != LOCKSTRLEN) {
crm_perror(LOG_ERR, "Incomplete write to %s", filename);
return -errno;
}
return crm_pidfile_inuse(filename, mypid, name);
}
void
crm_make_daemon(const char *name, gboolean daemonize, const char *pidfile)
{
int rc;
long pid;
const char *devnull = "/dev/null";
if (daemonize == FALSE) {
return;
}
/* Check before we even try... */
rc = crm_pidfile_inuse(pidfile, 1, name);
if(rc < pcmk_ok && rc != -ENOENT) {
pid = crm_read_pidfile(pidfile);
crm_err("%s: already running [pid %ld in %s]", name, pid, pidfile);
printf("%s: already running [pid %ld in %s]\n", name, pid, pidfile);
crm_exit(rc);
}
pid = fork();
if (pid < 0) {
fprintf(stderr, "%s: could not start daemon\n", name);
crm_perror(LOG_ERR, "fork");
crm_exit(EINVAL);
} else if (pid > 0) {
crm_exit(pcmk_ok);
}
rc = crm_lock_pidfile(pidfile, name);
if(rc < pcmk_ok) {
crm_err("Could not lock '%s' for %s: %s (%d)", pidfile, name, pcmk_strerror(rc), rc);
printf("Could not lock '%s' for %s: %s (%d)\n", pidfile, name, pcmk_strerror(rc), rc);
crm_exit(rc);
}
umask(S_IWGRP | S_IWOTH | S_IROTH);
close(STDIN_FILENO);
(void)open(devnull, O_RDONLY); /* Stdin: fd 0 */
close(STDOUT_FILENO);
(void)open(devnull, O_WRONLY); /* Stdout: fd 1 */
close(STDERR_FILENO);
(void)open(devnull, O_WRONLY); /* Stderr: fd 2 */
}
char *
crm_meta_name(const char *field)
{
int lpc = 0;
int max = 0;
char *crm_name = NULL;
CRM_CHECK(field != NULL, return NULL);
crm_name = crm_concat(CRM_META, field, '_');
/* Massage the names so they can be used as shell variables */
max = strlen(crm_name);
for (; lpc < max; lpc++) {
switch (crm_name[lpc]) {
case '-':
crm_name[lpc] = '_';
break;
}
}
return crm_name;
}
const char *
crm_meta_value(GHashTable * hash, const char *field)
{
char *key = NULL;
const char *value = NULL;
key = crm_meta_name(field);
if (key) {
value = g_hash_table_lookup(hash, key);
free(key);
}
return value;
}
static struct option *
crm_create_long_opts(struct crm_option *long_options)
{
struct option *long_opts = NULL;
#ifdef HAVE_GETOPT_H
int index = 0, lpc = 0;
/*
* A previous, possibly poor, choice of '?' as the short form of --help
* means that getopt_long() returns '?' for both --help and for "unknown option"
*
* This dummy entry allows us to differentiate between the two in crm_get_option()
* and exit with the correct error code
*/
long_opts = realloc_safe(long_opts, (index + 1) * sizeof(struct option));
long_opts[index].name = "__dummmy__";
long_opts[index].has_arg = 0;
long_opts[index].flag = 0;
long_opts[index].val = '_';
index++;
for (lpc = 0; long_options[lpc].name != NULL; lpc++) {
if (long_options[lpc].name[0] == '-') {
continue;
}
long_opts = realloc_safe(long_opts, (index + 1) * sizeof(struct option));
/*fprintf(stderr, "Creating %d %s = %c\n", index,
* long_options[lpc].name, long_options[lpc].val); */
long_opts[index].name = long_options[lpc].name;
long_opts[index].has_arg = long_options[lpc].has_arg;
long_opts[index].flag = long_options[lpc].flag;
long_opts[index].val = long_options[lpc].val;
index++;
}
/* Now create the list terminator */
long_opts = realloc_safe(long_opts, (index + 1) * sizeof(struct option));
long_opts[index].name = NULL;
long_opts[index].has_arg = 0;
long_opts[index].flag = 0;
long_opts[index].val = 0;
#endif
return long_opts;
}
void
crm_set_options(const char *short_options, const char *app_usage, struct crm_option *long_options,
const char *app_desc)
{
if (short_options) {
crm_short_options = strdup(short_options);
} else if (long_options) {
int lpc = 0;
int opt_string_len = 0;
char *local_short_options = NULL;
for (lpc = 0; long_options[lpc].name != NULL; lpc++) {
if (long_options[lpc].val && long_options[lpc].val != '-' && long_options[lpc].val < UCHAR_MAX) {
local_short_options = realloc_safe(local_short_options, opt_string_len + 4);
local_short_options[opt_string_len++] = long_options[lpc].val;
/* getopt(3) says: Two colons mean an option takes an optional arg; */
if (long_options[lpc].has_arg == optional_argument) {
local_short_options[opt_string_len++] = ':';
}
if (long_options[lpc].has_arg >= required_argument) {
local_short_options[opt_string_len++] = ':';
}
local_short_options[opt_string_len] = 0;
}
}
crm_short_options = local_short_options;
crm_trace("Generated short option string: '%s'", local_short_options);
}
if (long_options) {
crm_long_options = long_options;
}
if (app_desc) {
crm_app_description = app_desc;
}
if (app_usage) {
crm_app_usage = app_usage;
}
}
int
crm_get_option(int argc, char **argv, int *index)
{
return crm_get_option_long(argc, argv, index, NULL);
}
int
crm_get_option_long(int argc, char **argv, int *index, const char **longname)
{
#ifdef HAVE_GETOPT_H
static struct option *long_opts = NULL;
if (long_opts == NULL && crm_long_options) {
long_opts = crm_create_long_opts(crm_long_options);
}
*index = 0;
if (long_opts) {
int flag = getopt_long(argc, argv, crm_short_options, long_opts, index);
switch (flag) {
case 0:
if (long_opts[*index].val) {
return long_opts[*index].val;
} else if (longname) {
*longname = long_opts[*index].name;
} else {
crm_notice("Unhandled option --%s", long_opts[*index].name);
return flag;
}
case -1: /* End of option processing */
break;
case ':':
crm_trace("Missing argument");
crm_help('?', 1);
break;
case '?':
crm_help('?', *index ? 0 : 1);
break;
}
return flag;
}
#endif
if (crm_short_options) {
return getopt(argc, argv, crm_short_options);
}
return -1;
}
int
crm_help(char cmd, int exit_code)
{
int i = 0;
FILE *stream = (exit_code ? stderr : stdout);
if (cmd == 'v' || cmd == '$') {
fprintf(stream, "Pacemaker %s\n", PACEMAKER_VERSION);
fprintf(stream, "Written by Andrew Beekhof\n");
goto out;
}
if (cmd == '!') {
fprintf(stream, "Pacemaker %s (Build: %s): %s\n", PACEMAKER_VERSION, BUILD_VERSION, CRM_FEATURES);
goto out;
}
fprintf(stream, "%s - %s\n", crm_system_name, crm_app_description);
if (crm_app_usage) {
fprintf(stream, "Usage: %s %s\n", crm_system_name, crm_app_usage);
}
if (crm_long_options) {
fprintf(stream, "Options:\n");
for (i = 0; crm_long_options[i].name != NULL; i++) {
if (crm_long_options[i].flags & pcmk_option_hidden) {
} else if (crm_long_options[i].flags & pcmk_option_paragraph) {
fprintf(stream, "%s\n\n", crm_long_options[i].desc);
} else if (crm_long_options[i].flags & pcmk_option_example) {
fprintf(stream, "\t#%s\n\n", crm_long_options[i].desc);
} else if (crm_long_options[i].val == '-' && crm_long_options[i].desc) {
fprintf(stream, "%s\n", crm_long_options[i].desc);
} else {
/* is val printable as char ? */
if (crm_long_options[i].val && crm_long_options[i].val <= UCHAR_MAX) {
fprintf(stream, " -%c,", crm_long_options[i].val);
} else {
fputs(" ", stream);
}
fprintf(stream, " --%s%s\t%s\n", crm_long_options[i].name,
crm_long_options[i].has_arg == optional_argument ? "[=value]" :
crm_long_options[i].has_arg == required_argument ? "=value" : "",
crm_long_options[i].desc ? crm_long_options[i].desc : "");
}
}
} else if (crm_short_options) {
fprintf(stream, "Usage: %s - %s\n", crm_system_name, crm_app_description);
for (i = 0; crm_short_options[i] != 0; i++) {
int has_arg = no_argument /* 0 */;
if (crm_short_options[i + 1] == ':') {
if (crm_short_options[i + 2] == ':')
has_arg = optional_argument /* 2 */;
else
has_arg = required_argument /* 1 */;
}
fprintf(stream, " -%c %s\n", crm_short_options[i],
has_arg == optional_argument ? "[value]" :
has_arg == required_argument ? "{value}" : "");
i += has_arg;
}
}
fprintf(stream, "\nReport bugs to %s\n", PACKAGE_BUGREPORT);
out:
return crm_exit(exit_code);
}
void cib_ipc_servers_init(qb_ipcs_service_t **ipcs_ro,
qb_ipcs_service_t **ipcs_rw,
qb_ipcs_service_t **ipcs_shm,
struct qb_ipcs_service_handlers *ro_cb,
struct qb_ipcs_service_handlers *rw_cb)
{
*ipcs_ro = mainloop_add_ipc_server(cib_channel_ro, QB_IPC_NATIVE, ro_cb);
*ipcs_rw = mainloop_add_ipc_server(cib_channel_rw, QB_IPC_NATIVE, rw_cb);
*ipcs_shm = mainloop_add_ipc_server(cib_channel_shm, QB_IPC_SHM, rw_cb);
if (*ipcs_ro == NULL || *ipcs_rw == NULL || *ipcs_shm == NULL) {
crm_err("Failed to create cib servers: exiting and inhibiting respawn.");
crm_warn("Verify pacemaker and pacemaker_remote are not both enabled.");
crm_exit(DAEMON_RESPAWN_STOP);
}
}
void cib_ipc_servers_destroy(qb_ipcs_service_t *ipcs_ro,
qb_ipcs_service_t *ipcs_rw,
qb_ipcs_service_t *ipcs_shm)
{
qb_ipcs_destroy(ipcs_ro);
qb_ipcs_destroy(ipcs_rw);
qb_ipcs_destroy(ipcs_shm);
}
qb_ipcs_service_t *
crmd_ipc_server_init(struct qb_ipcs_service_handlers *cb)
{
return mainloop_add_ipc_server(CRM_SYSTEM_CRMD, QB_IPC_NATIVE, cb);
}
void
attrd_ipc_server_init(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb)
{
*ipcs = mainloop_add_ipc_server(T_ATTRD, QB_IPC_NATIVE, cb);
if (*ipcs == NULL) {
crm_err("Failed to create attrd servers: exiting and inhibiting respawn.");
crm_warn("Verify pacemaker and pacemaker_remote are not both enabled.");
crm_exit(DAEMON_RESPAWN_STOP);
}
}
void
stonith_ipc_server_init(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb)
{
*ipcs = mainloop_add_ipc_server("stonith-ng", QB_IPC_NATIVE, cb);
if (*ipcs == NULL) {
crm_err("Failed to create stonith-ng servers: exiting and inhibiting respawn.");
crm_warn("Verify pacemaker and pacemaker_remote are not both enabled.");
crm_exit(DAEMON_RESPAWN_STOP);
}
}
void *
find_library_function(void **handle, const char *lib, const char *fn, gboolean fatal)
{
char *error;
void *a_function;
if (*handle == NULL) {
*handle = dlopen(lib, RTLD_LAZY);
}
if (!(*handle)) {
crm_err("%sCould not open %s: %s", fatal ? "Fatal: " : "", lib, dlerror());
if (fatal) {
crm_exit(DAEMON_RESPAWN_STOP);
}
return NULL;
}
a_function = dlsym(*handle, fn);
if (a_function == NULL) {
error = dlerror();
crm_err("%sCould not find %s in %s: %s", fatal ? "Fatal: " : "", fn, lib, error);
if (fatal) {
crm_exit(DAEMON_RESPAWN_STOP);
}
}
return a_function;
}
void *
convert_const_pointer(const void *ptr)
{
/* Worst function ever */
return (void *)ptr;
}
#ifdef HAVE_UUID_UUID_H
# include <uuid/uuid.h>
#endif
char *
crm_generate_uuid(void)
{
unsigned char uuid[16];
char *buffer = malloc(37); /* Including NUL byte */
uuid_generate(uuid);
uuid_unparse(uuid, buffer);
return buffer;
}
/*!
* \brief Check whether a string represents a cluster daemon name
*
* \param[in] name String to check
*
* \return TRUE if name is standard client name used by daemons, FALSE otherwise
*/
bool
crm_is_daemon_name(const char *name)
{
return (name &&
(!strcmp(name, CRM_SYSTEM_CRMD)
|| !strcmp(name, CRM_SYSTEM_STONITHD)
|| !strcmp(name, T_ATTRD)
|| !strcmp(name, CRM_SYSTEM_CIB)
|| !strcmp(name, CRM_SYSTEM_MCP)
|| !strcmp(name, CRM_SYSTEM_DC)
|| !strcmp(name, CRM_SYSTEM_TENGINE)
|| !strcmp(name, CRM_SYSTEM_LRMD)));
}
#include <md5.h>
char *
crm_md5sum(const char *buffer)
{
int lpc = 0, len = 0;
char *digest = NULL;
unsigned char raw_digest[MD5_DIGEST_SIZE];
if (buffer == NULL) {
buffer = "";
}
len = strlen(buffer);
crm_trace("Beginning digest of %d bytes", len);
digest = malloc(2 * MD5_DIGEST_SIZE + 1);
if(digest) {
md5_buffer(buffer, len, raw_digest);
for (lpc = 0; lpc < MD5_DIGEST_SIZE; lpc++) {
sprintf(digest + (2 * lpc), "%02x", raw_digest[lpc]);
}
digest[(2 * MD5_DIGEST_SIZE)] = 0;
crm_trace("Digest %s.", digest);
} else {
crm_err("Could not create digest");
}
return digest;
}
#ifdef HAVE_GNUTLS_GNUTLS_H
void
crm_gnutls_global_init(void)
{
signal(SIGPIPE, SIG_IGN);
gnutls_global_init();
}
#endif
diff --git a/lib/services/services.c b/lib/services/services.c
index ef2c5fc3ef..1d06c5df47 100644
--- a/lib/services/services.c
+++ b/lib/services/services.c
@@ -1,1512 +1,1474 @@
/*
* Copyright 2010-2019 Andrew Beekhof <andrew@beekhof.net>
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#ifndef _GNU_SOURCE
# define _GNU_SOURCE
#endif
#include <sys/types.h>
#include <sys/stat.h>
#include <stdio.h>
#include <errno.h>
#include <unistd.h>
#include <dirent.h>
#include <fcntl.h>
#include <crm/crm.h>
#include <crm/common/mainloop.h>
#include <crm/services.h>
#include <crm/msg_xml.h>
#include "services_private.h"
#if SUPPORT_UPSTART
# include <upstart.h>
#endif
#if SUPPORT_SYSTEMD
# include <systemd.h>
#endif
/* TODO: Develop a rollover strategy */
static int operations = 0;
static GHashTable *recurring_actions = NULL;
/* ops waiting to run async because of conflicting active
* pending ops */
static GList *blocked_ops = NULL;
/* ops currently active (in-flight) */
static GList *inflight_ops = NULL;
static void handle_blocked_ops(void);
svc_action_t *
services_action_create(const char *name, const char *action, int interval, int timeout)
{
return resources_action_create(name, PCMK_RESOURCE_CLASS_LSB, NULL, name,
action, interval, timeout, NULL, 0);
}
static char *
services__lsb_agent_path(const char *agent)
{
return (*agent == '/')? strdup(agent)
: crm_strdup_printf("%s/%s", LSB_ROOT_DIR, agent);
}
static bool
services__lsb_agent_exists(const char *agent)
{
bool rc = FALSE;
struct stat st;
char *path = services__lsb_agent_path(agent);
rc = (stat(path, &st) == 0);
free(path);
return rc;
}
/*!
* \brief Find first service class that can provide a specified agent
*
* \param[in] agent Name of agent to search for
*
* \return Service class if found, NULL otherwise
*
* \note The priority is LSB, then systemd, then upstart. It would be preferable
* to put systemd first, but LSB merely requires a file existence check,
* while systemd requires contacting D-Bus.
*/
const char *
resources_find_service_class(const char *agent)
{
/* Priority is:
* - lsb
* - systemd
* - upstart
*/
if (services__lsb_agent_exists(agent)) {
return PCMK_RESOURCE_CLASS_LSB;
}
#if SUPPORT_SYSTEMD
if (systemd_unit_exists(agent)) {
return PCMK_RESOURCE_CLASS_SYSTEMD;
}
#endif
#if SUPPORT_UPSTART
if (upstart_job_exists(agent)) {
return PCMK_RESOURCE_CLASS_UPSTART;
}
#endif
return NULL;
}
static inline void
init_recurring_actions(void)
{
if (recurring_actions == NULL) {
recurring_actions = g_hash_table_new_full(g_str_hash, g_str_equal, NULL,
NULL);
}
}
/*!
* \internal
* \brief Check whether op is in-flight systemd or upstart op
*
* \param[in] op Operation to check
*
* \return TRUE if op is in-flight systemd or upstart op
*/
static inline gboolean
inflight_systemd_or_upstart(svc_action_t *op)
{
return (safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_SYSTEMD)
|| safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_UPSTART))
&& (g_list_find(inflight_ops, op) != NULL);
}
/*!
* \internal
* \brief Expand "service" alias to an actual resource class
*
* \param[in] rsc Resource name (for logging only)
* \param[in] standard Resource class as configured
* \param[in] agent Agent name to look for
*
* \return Newly allocated string with actual resource class
*
* \note The caller is responsible for calling free() on the result.
*/
static char *
expand_resource_class(const char *rsc, const char *standard, const char *agent)
{
char *expanded_class = NULL;
if (strcasecmp(standard, PCMK_RESOURCE_CLASS_SERVICE) == 0) {
const char *found_class = resources_find_service_class(agent);
if (found_class) {
crm_debug("Found %s agent %s for %s", found_class, agent, rsc);
expanded_class = strdup(found_class);
} else {
crm_info("Assuming resource class lsb for agent %s for %s",
agent, rsc);
expanded_class = strdup(PCMK_RESOURCE_CLASS_LSB);
}
} else {
expanded_class = strdup(standard);
}
CRM_ASSERT(expanded_class);
return expanded_class;
}
svc_action_t *
resources_action_create(const char *name, const char *standard, const char *provider,
const char *agent, const char *action, int interval, int timeout,
GHashTable * params, enum svc_action_flags flags)
{
svc_action_t *op = NULL;
uint32_t ra_caps = 0;
/*
* Do some up front sanity checks before we go off and
* build the svc_action_t instance.
*/
if (crm_strlen_zero(name)) {
crm_err("Cannot create operation without resource name");
goto return_error;
}
if (crm_strlen_zero(standard)) {
crm_err("Cannot create operation for %s without resource class", name);
goto return_error;
}
ra_caps = pcmk_get_ra_caps(standard);
if (is_set(ra_caps, pcmk_ra_cap_provider) && crm_strlen_zero(provider)) {
crm_err("Cannot create operation for %s without provider", name);
goto return_error;
}
if (crm_strlen_zero(agent)) {
crm_err("Cannot create operation for %s without agent name", name);
goto return_error;
}
if (crm_strlen_zero(action)) {
crm_err("Cannot create operation for %s without operation name", name);
goto return_error;
}
/*
* Sanity checks passed, proceed!
*/
op = calloc(1, sizeof(svc_action_t));
op->opaque = calloc(1, sizeof(svc_action_private_t));
op->rsc = strdup(name);
op->interval = interval;
op->timeout = timeout;
op->standard = expand_resource_class(name, standard, agent);
op->agent = strdup(agent);
op->sequence = ++operations;
op->flags = flags;
op->id = generate_op_key(name, action, interval);
if (is_set(ra_caps, pcmk_ra_cap_status) && safe_str_eq(action, "monitor")) {
op->action = strdup("status");
} else {
op->action = strdup(action);
}
if (is_set(ra_caps, pcmk_ra_cap_provider)) {
op->provider = strdup(provider);
}
if (is_set(ra_caps, pcmk_ra_cap_params)) {
op->params = params;
params = NULL; // so we don't free them in this function
}
if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_OCF) == 0) {
if (asprintf(&op->opaque->exec, "%s/resource.d/%s/%s", OCF_ROOT_DIR, provider, agent) == -1) {
crm_err("Internal error: cannot create agent path");
goto return_error;
}
op->opaque->args[0] = strdup(op->opaque->exec);
op->opaque->args[1] = strdup(op->action);
} else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_LSB) == 0) {
op->opaque->exec = services__lsb_agent_path(op->agent);
op->opaque->args[0] = strdup(op->opaque->exec);
op->opaque->args[1] = strdup(op->action);
#if SUPPORT_HEARTBEAT
} else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_HB) == 0) {
int index;
int param_num;
char buf_tmp[20];
void *value_tmp;
if (op->agent[0] == '/') {
/* if given an absolute path, use that instead
* of tacking on the HB_RA_DIR path to the front */
op->opaque->exec = strdup(op->agent);
} else if (asprintf(&op->opaque->exec, "%s/%s", HB_RA_DIR, op->agent) == -1) {
crm_err("Internal error: cannot create agent path");
goto return_error;
}
op->opaque->args[0] = strdup(op->opaque->exec);
/* The "heartbeat" agent class only has positional arguments,
* which we keyed by their decimal position number. */
param_num = 1;
if (op->params) {
for (index = 1; index <= MAX_ARGC - 3; index++ ) {
snprintf(buf_tmp, sizeof(buf_tmp), "%d", index);
value_tmp = g_hash_table_lookup(op->params, buf_tmp);
if (value_tmp == NULL) {
/* maybe: strdup("") ??
* But the old lrmd did simply continue as well. */
continue;
}
op->opaque->args[param_num++] = strdup(value_tmp);
}
// Heartbeat actions don't need to keep the parameters
g_hash_table_destroy(op->params);
op->params = NULL;
}
/* Add operation code as the last argument, */
/* and the terminating NULL pointer */
op->opaque->args[param_num++] = strdup(op->action);
op->opaque->args[param_num] = NULL;
#endif
#if SUPPORT_SYSTEMD
} else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_SYSTEMD) == 0) {
op->opaque->exec = strdup("systemd-dbus");
#endif
#if SUPPORT_UPSTART
} else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_UPSTART) == 0) {
op->opaque->exec = strdup("upstart-dbus");
#endif
#if SUPPORT_NAGIOS
} else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_NAGIOS) == 0) {
if (op->agent[0] == '/') {
/* if given an absolute path, use that instead
* of tacking on the NAGIOS_PLUGIN_DIR path to the front */
op->opaque->exec = strdup(op->agent);
} else if (asprintf(&op->opaque->exec, "%s/%s", NAGIOS_PLUGIN_DIR, op->agent) == -1) {
crm_err("Internal error: cannot create agent path");
goto return_error;
}
op->opaque->args[0] = strdup(op->opaque->exec);
if (safe_str_eq(op->action, "monitor") && op->interval == 0) {
/* Invoke --version for a nagios probe */
op->opaque->args[1] = strdup("--version");
} else if (op->params) {
GHashTableIter iter;
char *key = NULL;
char *value = NULL;
int index = 1;
static int args_size = sizeof(op->opaque->args) / sizeof(char *);
g_hash_table_iter_init(&iter, op->params);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value) &&
index <= args_size - 3) {
int len = 3;
char *long_opt = NULL;
if (safe_str_eq(key, XML_ATTR_CRM_VERSION) || strstr(key, CRM_META "_")) {
continue;
}
len += strlen(key);
long_opt = calloc(1, len);
sprintf(long_opt, "--%s", key);
long_opt[len - 1] = 0;
op->opaque->args[index] = long_opt;
op->opaque->args[index + 1] = strdup(value);
index += 2;
}
}
// Nagios actions don't need to keep the parameters
if (op->params != NULL) {
g_hash_table_destroy(op->params);
op->params = NULL;
}
#endif
} else {
crm_err("Unknown resource standard: %s", op->standard);
goto return_error;
}
if(params) {
g_hash_table_destroy(params);
}
return op;
return_error:
if(params) {
g_hash_table_destroy(params);
}
services_action_free(op);
return NULL;
}
svc_action_t *
services_action_create_generic(const char *exec, const char *args[])
{
svc_action_t *op;
unsigned int cur_arg;
op = calloc(1, sizeof(*op));
op->opaque = calloc(1, sizeof(svc_action_private_t));
op->opaque->exec = strdup(exec);
op->opaque->args[0] = strdup(exec);
for (cur_arg = 1; args && args[cur_arg - 1]; cur_arg++) {
op->opaque->args[cur_arg] = strdup(args[cur_arg - 1]);
if (cur_arg == DIMOF(op->opaque->args) - 1) {
crm_err("svc_action_t args list not long enough for '%s' execution request.", exec);
break;
}
}
return op;
}
/*!
* \brief Create an alert agent action
*
* \param[in] id Alert ID
* \param[in] exec Path to alert agent executable
* \param[in] timeout Action timeout
* \param[in] params Parameters to use with action
* \param[in] sequence Action sequence number
* \param[in] cb_data Data to pass to callback function
*
* \return New action on success, NULL on error
* \note It is the caller's responsibility to free cb_data.
* The caller should not free params explicitly.
*/
svc_action_t *
services_alert_create(const char *id, const char *exec, int timeout,
GHashTable *params, int sequence, void *cb_data)
{
svc_action_t *action = services_action_create_generic(exec, NULL);
CRM_ASSERT(action);
action->timeout = timeout;
action->id = strdup(id);
action->params = params;
action->sequence = sequence;
action->cb_data = cb_data;
return action;
}
/*!
* \brief Set the user and group that an action will execute as
*
* \param[in,out] action Action to modify
* \param[in] user Name of user to execute action as
* \param[in] group Name of group to execute action as
*
* \return pcmk_ok on success, -errno otherwise
*
* \note This will have no effect unless the process executing the action runs
* as root, and the action is not a systemd or upstart action.
* We could implement this for systemd by adding User= and Group= to
* [Service] in the override file, but that seems more likely to cause
* problems than be useful.
*/
int
services_action_user(svc_action_t *op, const char *user)
{
CRM_CHECK((op != NULL) && (user != NULL), return -EINVAL);
return crm_user_lookup(user, &(op->opaque->uid), &(op->opaque->gid));
}
-static void
-set_alert_env(gpointer key, gpointer value, gpointer user_data)
-{
- int rc;
-
- if (value) {
- rc = setenv(key, value, 1);
- } else {
- rc = unsetenv(key);
- }
-
- if (rc < 0) {
- crm_perror(LOG_ERR, "setenv %s=%s",
- (char*)key, (value? (char*)value : ""));
- } else {
- crm_trace("setenv %s=%s", (char*)key, (value? (char*)value : ""));
- }
-}
-
-static void
-unset_alert_env(gpointer key, gpointer value, gpointer user_data)
-{
- if (unsetenv(key) < 0) {
- crm_perror(LOG_ERR, "unset %s", (char*)key);
- } else {
- crm_trace("unset %s", (char*)key);
- }
-}
-
/*!
* \brief Execute an alert agent action
*
* \param[in] action Action to execute
* \param[in] cb Function to call when action completes
*
* \return TRUE if the library will free action, FALSE otherwise
*
* \note If this function returns FALSE, it is the caller's responsibility to
* free the action with services_action_free().
*/
gboolean
services_alert_async(svc_action_t *action, void (*cb)(svc_action_t *op))
{
- gboolean responsible;
-
action->synchronous = false;
action->opaque->callback = cb;
- if (action->params) {
- g_hash_table_foreach(action->params, set_alert_env, NULL);
- }
- responsible = services_os_action_execute(action);
- if (action->params) {
- g_hash_table_foreach(action->params, unset_alert_env, NULL);
- }
- return responsible;
+ return services_os_action_execute(action);
}
#if SUPPORT_DBUS
/*!
* \internal
* \brief Update operation's pending DBus call, unreferencing old one if needed
*
* \param[in,out] op Operation to modify
* \param[in] pending Pending call to set
*/
void
services_set_op_pending(svc_action_t *op, DBusPendingCall *pending)
{
if (op->opaque->pending && (op->opaque->pending != pending)) {
if (pending) {
crm_info("Lost pending %s DBus call (%p)", op->id, op->opaque->pending);
} else {
crm_trace("Done with pending %s DBus call (%p)", op->id, op->opaque->pending);
}
dbus_pending_call_unref(op->opaque->pending);
}
op->opaque->pending = pending;
if (pending) {
crm_trace("Updated pending %s DBus call (%p)", op->id, pending);
} else {
crm_trace("Cleared pending %s DBus call", op->id);
}
}
#endif
void
services_action_cleanup(svc_action_t * op)
{
if ((op == NULL) || (op->opaque == NULL)) {
return;
}
#if SUPPORT_DBUS
if(op->opaque->timerid != 0) {
crm_trace("Removing timer for call %s to %s", op->action, op->rsc);
g_source_remove(op->opaque->timerid);
op->opaque->timerid = 0;
}
if(op->opaque->pending) {
if (dbus_pending_call_get_completed(op->opaque->pending)) {
// This should never be the case
crm_warn("Result of %s op %s was unhandled",
op->standard, op->id);
} else {
crm_debug("Will ignore any result of canceled %s op %s",
op->standard, op->id);
}
dbus_pending_call_cancel(op->opaque->pending);
services_set_op_pending(op, NULL);
}
#endif
if (op->opaque->stderr_gsource) {
mainloop_del_fd(op->opaque->stderr_gsource);
op->opaque->stderr_gsource = NULL;
}
if (op->opaque->stdout_gsource) {
mainloop_del_fd(op->opaque->stdout_gsource);
op->opaque->stdout_gsource = NULL;
}
}
void
services_action_free(svc_action_t * op)
{
unsigned int i;
if (op == NULL) {
return;
}
/* The operation should be removed from all tracking lists by this point.
* If it's not, we have a bug somewhere, so bail. That may lead to a
* memory leak, but it's better than a use-after-free segmentation fault.
*/
CRM_CHECK(g_list_find(inflight_ops, op) == NULL, return);
CRM_CHECK(g_list_find(blocked_ops, op) == NULL, return);
CRM_CHECK((recurring_actions == NULL)
|| (g_hash_table_lookup(recurring_actions, op->id) == NULL),
return);
services_action_cleanup(op);
if (op->opaque->repeat_timer) {
g_source_remove(op->opaque->repeat_timer);
op->opaque->repeat_timer = 0;
}
free(op->id);
free(op->opaque->exec);
for (i = 0; i < DIMOF(op->opaque->args); i++) {
free(op->opaque->args[i]);
}
free(op->opaque);
free(op->rsc);
free(op->action);
free(op->standard);
free(op->agent);
free(op->provider);
free(op->stdout_data);
free(op->stderr_data);
if (op->params) {
g_hash_table_destroy(op->params);
op->params = NULL;
}
free(op);
}
gboolean
cancel_recurring_action(svc_action_t * op)
{
crm_info("Cancelling %s operation %s", op->standard, op->id);
if (recurring_actions) {
g_hash_table_remove(recurring_actions, op->id);
}
if (op->opaque->repeat_timer) {
g_source_remove(op->opaque->repeat_timer);
op->opaque->repeat_timer = 0;
}
return TRUE;
}
/*!
* \brief Cancel a recurring action
*
* \param[in] name Name of resource that operation is for
* \param[in] action Name of operation to cancel
* \param[in] interval Interval of operation to cancel
*
* \return TRUE if action was successfully cancelled, FALSE otherwise
*/
gboolean
services_action_cancel(const char *name, const char *action, int interval)
{
gboolean cancelled = FALSE;
char *id = generate_op_key(name, action, interval);
svc_action_t *op = NULL;
/* We can only cancel a recurring action */
init_recurring_actions();
op = g_hash_table_lookup(recurring_actions, id);
if (op == NULL) {
goto done;
}
/* Tell operation_finalize() not to reschedule the operation */
op->cancel = TRUE;
/* Stop tracking it as a recurring operation, and stop its repeat timer */
cancel_recurring_action(op);
/* If the op has a PID, it's an in-flight child process, so kill it.
*
* Whether the kill succeeds or fails, the main loop will send the op to
* operation_finished() (and thus operation_finalize()) when the process
* goes away.
*/
if (op->pid != 0) {
crm_info("Terminating in-flight op %s (pid %d) early because it was cancelled",
id, op->pid);
cancelled = mainloop_child_kill(op->pid);
if (cancelled == FALSE) {
crm_err("Termination of %s (pid %d) failed", id, op->pid);
}
goto done;
}
#if SUPPORT_DBUS
// In-flight systemd and upstart ops don't have a pid
if (inflight_systemd_or_upstart(op)) {
inflight_ops = g_list_remove(inflight_ops, op);
/* This will cause any result that comes in later to be discarded, so we
* don't call the callback and free the operation twice.
*/
services_action_cleanup(op);
}
#endif
// The rest of this is essentially equivalent to operation_finalize(),
// except without calling handle_blocked_ops()
// Report operation as cancelled
op->status = PCMK_LRM_OP_CANCELLED;
if (op->opaque->callback) {
op->opaque->callback(op);
}
blocked_ops = g_list_remove(blocked_ops, op);
services_action_free(op);
cancelled = TRUE;
// @TODO Initiate handle_blocked_ops() asynchronously
done:
free(id);
return cancelled;
}
gboolean
services_action_kick(const char *name, const char *action, int interval /* ms */)
{
svc_action_t * op = NULL;
char *id = generate_op_key(name, action, interval);
init_recurring_actions();
op = g_hash_table_lookup(recurring_actions, id);
free(id);
if (op == NULL) {
return FALSE;
}
if (op->pid || inflight_systemd_or_upstart(op)) {
return TRUE;
} else {
if (op->opaque->repeat_timer) {
g_source_remove(op->opaque->repeat_timer);
op->opaque->repeat_timer = 0;
}
recurring_action_timer(op);
return TRUE;
}
}
/*!
* \internal
* \brief Add a new recurring operation, checking for duplicates
*
* \param[in] op Operation to add
*
* \return TRUE if duplicate found (and reschedule), FALSE otherwise
*/
static gboolean
handle_duplicate_recurring(svc_action_t * op)
{
svc_action_t * dup = NULL;
/* check for duplicates */
dup = g_hash_table_lookup(recurring_actions, op->id);
if (dup && (dup != op)) {
/* update user data */
if (op->opaque->callback) {
dup->opaque->callback = op->opaque->callback;
dup->cb_data = op->cb_data;
op->cb_data = NULL;
}
/* immediately execute the next interval */
if (dup->pid != 0) {
if (op->opaque->repeat_timer) {
g_source_remove(op->opaque->repeat_timer);
op->opaque->repeat_timer = 0;
}
recurring_action_timer(dup);
}
/* free the duplicate */
services_action_free(op);
return TRUE;
}
return FALSE;
}
inline static gboolean
action_exec_helper(svc_action_t * op)
{
/* Whether a/synchronous must be decided (op->synchronous) beforehand. */
if (op->standard
&& (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_UPSTART) == 0)) {
#if SUPPORT_UPSTART
return upstart_job_exec(op);
#endif
} else if (op->standard && strcasecmp(op->standard,
PCMK_RESOURCE_CLASS_SYSTEMD) == 0) {
#if SUPPORT_SYSTEMD
return systemd_unit_exec(op);
#endif
} else {
return services_os_action_execute(op);
}
/* The 'op' has probably been freed if the execution functions return TRUE
for the asynchronous 'op'. */
/* Avoid using the 'op' in here. */
return FALSE;
}
void
services_add_inflight_op(svc_action_t * op)
{
if (op == NULL) {
return;
}
CRM_ASSERT(op->synchronous == FALSE);
/* keep track of ops that are in-flight to avoid collisions in the same namespace */
if (op->rsc) {
inflight_ops = g_list_append(inflight_ops, op);
}
}
/*!
* \internal
* \brief Stop tracking an operation that completed
*
* \param[in] op Operation to stop tracking
*/
void
services_untrack_op(svc_action_t *op)
{
/* Op is no longer in-flight or blocked */
inflight_ops = g_list_remove(inflight_ops, op);
blocked_ops = g_list_remove(blocked_ops, op);
/* Op is no longer blocking other ops, so check if any need to run */
handle_blocked_ops();
}
gboolean
services_action_async_fork_notify(svc_action_t * op,
void (*action_callback) (svc_action_t *),
void (*action_fork_callback) (svc_action_t *))
{
op->synchronous = false;
if (action_callback) {
op->opaque->callback = action_callback;
}
if (action_fork_callback) {
op->opaque->fork_callback = action_fork_callback;
}
if (op->interval > 0) {
init_recurring_actions();
if (handle_duplicate_recurring(op) == TRUE) {
/* entry rescheduled, dup freed */
/* exit early */
return TRUE;
}
g_hash_table_replace(recurring_actions, op->id, op);
}
if (op->rsc && is_op_blocked(op->rsc)) {
blocked_ops = g_list_append(blocked_ops, op);
return TRUE;
}
return action_exec_helper(op);
}
gboolean
services_action_async(svc_action_t * op,
void (*action_callback) (svc_action_t *))
{
return services_action_async_fork_notify(op, action_callback, NULL);
}
static gboolean processing_blocked_ops = FALSE;
gboolean
is_op_blocked(const char *rsc)
{
GList *gIter = NULL;
svc_action_t *op = NULL;
for (gIter = inflight_ops; gIter != NULL; gIter = gIter->next) {
op = gIter->data;
if (safe_str_eq(op->rsc, rsc)) {
return TRUE;
}
}
return FALSE;
}
static void
handle_blocked_ops(void)
{
GList *executed_ops = NULL;
GList *gIter = NULL;
svc_action_t *op = NULL;
gboolean res = FALSE;
if (processing_blocked_ops) {
/* avoid nested calling of this function */
return;
}
processing_blocked_ops = TRUE;
/* n^2 operation here, but blocked ops are incredibly rare. this list
* will be empty 99% of the time. */
for (gIter = blocked_ops; gIter != NULL; gIter = gIter->next) {
op = gIter->data;
if (is_op_blocked(op->rsc)) {
continue;
}
executed_ops = g_list_append(executed_ops, op);
res = action_exec_helper(op);
if (res == FALSE) {
op->status = PCMK_LRM_OP_ERROR;
/* this can cause this function to be called recursively
* which is why we have processing_blocked_ops static variable */
operation_finalize(op);
}
}
for (gIter = executed_ops; gIter != NULL; gIter = gIter->next) {
op = gIter->data;
blocked_ops = g_list_remove(blocked_ops, op);
}
g_list_free(executed_ops);
processing_blocked_ops = FALSE;
}
#define lsb_metadata_template \
"<?xml version='1.0'?>\n" \
"<!DOCTYPE resource-agent SYSTEM 'ra-api-1.dtd'>\n" \
"<resource-agent name='%s' version='" PCMK_DEFAULT_AGENT_VERSION "'>\n" \
" <version>1.0</version>\n" \
" <longdesc lang='en'>\n" \
"%s" \
" </longdesc>\n" \
" <shortdesc lang='en'>%s</shortdesc>\n" \
" <parameters>\n" \
" </parameters>\n" \
" <actions>\n" \
" <action name='meta-data' timeout='5' />\n" \
" <action name='start' timeout='15' />\n" \
" <action name='stop' timeout='15' />\n" \
" <action name='status' timeout='15' />\n" \
" <action name='restart' timeout='15' />\n" \
" <action name='force-reload' timeout='15' />\n" \
" <action name='monitor' timeout='15' interval='15' />\n" \
" </actions>\n" \
" <special tag='LSB'>\n" \
" <Provides>%s</Provides>\n" \
" <Required-Start>%s</Required-Start>\n" \
" <Required-Stop>%s</Required-Stop>\n" \
" <Should-Start>%s</Should-Start>\n" \
" <Should-Stop>%s</Should-Stop>\n" \
" <Default-Start>%s</Default-Start>\n" \
" <Default-Stop>%s</Default-Stop>\n" \
" </special>\n" \
"</resource-agent>\n"
/* See "Comment Conventions for Init Scripts" in the LSB core specification at:
* http://refspecs.linuxfoundation.org/lsb.shtml
*/
#define LSB_INITSCRIPT_INFOBEGIN_TAG "### BEGIN INIT INFO"
#define LSB_INITSCRIPT_INFOEND_TAG "### END INIT INFO"
#define PROVIDES "# Provides:"
#define REQ_START "# Required-Start:"
#define REQ_STOP "# Required-Stop:"
#define SHLD_START "# Should-Start:"
#define SHLD_STOP "# Should-Stop:"
#define DFLT_START "# Default-Start:"
#define DFLT_STOP "# Default-Stop:"
#define SHORT_DSCR "# Short-Description:"
#define DESCRIPTION "# Description:"
#define lsb_meta_helper_free_value(m) \
do { \
if ((m) != NULL) { \
xmlFree(m); \
(m) = NULL; \
} \
} while(0)
/*!
* \internal
* \brief Grab an LSB header value
*
* \param[in] line Line read from LSB init script
* \param[in,out] value If not set, will be set to XML-safe copy of value
* \param[in] prefix Set value if line starts with this pattern
*
* \return TRUE if value was set, FALSE otherwise
*/
static inline gboolean
lsb_meta_helper_get_value(const char *line, char **value, const char *prefix)
{
if (!*value && crm_starts_with(line, prefix)) {
*value = (char *)xmlEncodeEntitiesReentrant(NULL, BAD_CAST line+strlen(prefix));
return TRUE;
}
return FALSE;
}
#define DESC_MAX 2048
static int
lsb_get_metadata(const char *type, char **output)
{
char ra_pathname[PATH_MAX] = { 0, };
FILE *fp = NULL;
char buffer[1024] = { 0, };
char *provides = NULL;
char *req_start = NULL;
char *req_stop = NULL;
char *shld_start = NULL;
char *shld_stop = NULL;
char *dflt_start = NULL;
char *dflt_stop = NULL;
char *s_dscrpt = NULL;
char *xml_l_dscrpt = NULL;
int offset = 0;
bool in_header = FALSE;
char description[DESC_MAX] = { 0, };
if (type[0] == '/') {
snprintf(ra_pathname, sizeof(ra_pathname), "%s", type);
} else {
snprintf(ra_pathname, sizeof(ra_pathname), "%s/%s",
LSB_ROOT_DIR, type);
}
crm_trace("Looking into %s", ra_pathname);
fp = fopen(ra_pathname, "r");
if (fp == NULL) {
return -errno;
}
/* Enter into the LSB-compliant comment block */
while (fgets(buffer, sizeof(buffer), fp)) {
// Ignore lines up to and including the block delimiter
if (crm_starts_with(buffer, LSB_INITSCRIPT_INFOBEGIN_TAG)) {
in_header = TRUE;
continue;
}
if (!in_header) {
continue;
}
/* Assume each of the following eight arguments contain one line */
if (lsb_meta_helper_get_value(buffer, &provides, PROVIDES)) {
continue;
}
if (lsb_meta_helper_get_value(buffer, &req_start, REQ_START)) {
continue;
}
if (lsb_meta_helper_get_value(buffer, &req_stop, REQ_STOP)) {
continue;
}
if (lsb_meta_helper_get_value(buffer, &shld_start, SHLD_START)) {
continue;
}
if (lsb_meta_helper_get_value(buffer, &shld_stop, SHLD_STOP)) {
continue;
}
if (lsb_meta_helper_get_value(buffer, &dflt_start, DFLT_START)) {
continue;
}
if (lsb_meta_helper_get_value(buffer, &dflt_stop, DFLT_STOP)) {
continue;
}
if (lsb_meta_helper_get_value(buffer, &s_dscrpt, SHORT_DSCR)) {
continue;
}
/* Long description may cross multiple lines */
if ((offset == 0) // haven't already found long description
&& crm_starts_with(buffer, DESCRIPTION)) {
bool processed_line = TRUE;
// Get remainder of description line itself
offset += snprintf(description, DESC_MAX, "%s",
buffer + strlen(DESCRIPTION));
// Read any continuation lines of the description
buffer[0] = '\0';
while (fgets(buffer, sizeof(buffer), fp)) {
if (crm_starts_with(buffer, "# ")
|| crm_starts_with(buffer, "#\t")) {
/* '#' followed by a tab or more than one space indicates a
* continuation of the long description.
*/
offset += snprintf(description + offset, DESC_MAX - offset,
"%s", buffer + 1);
} else {
/* This line is not part of the long description,
* so continue with normal processing.
*/
processed_line = FALSE;
break;
}
}
// Make long description safe to use in XML
xml_l_dscrpt = (char *)xmlEncodeEntitiesReentrant(NULL, BAD_CAST(description));
if (processed_line) {
// We grabbed the line into the long description
continue;
}
}
// Stop if we leave the header block
if (crm_starts_with(buffer, LSB_INITSCRIPT_INFOEND_TAG)) {
break;
}
if (buffer[0] != '#') {
break;
}
}
fclose(fp);
*output = crm_strdup_printf(lsb_metadata_template, type,
(xml_l_dscrpt? xml_l_dscrpt : type),
(s_dscrpt? s_dscrpt : type),
(provides? provides : ""),
(req_start? req_start : ""),
(req_stop? req_stop : ""),
(shld_start? shld_start : ""),
(shld_stop? shld_stop : ""),
(dflt_start? dflt_start : ""),
(dflt_stop? dflt_stop : ""));
lsb_meta_helper_free_value(xml_l_dscrpt);
lsb_meta_helper_free_value(s_dscrpt);
lsb_meta_helper_free_value(provides);
lsb_meta_helper_free_value(req_start);
lsb_meta_helper_free_value(req_stop);
lsb_meta_helper_free_value(shld_start);
lsb_meta_helper_free_value(shld_stop);
lsb_meta_helper_free_value(dflt_start);
lsb_meta_helper_free_value(dflt_stop);
crm_trace("Created fake metadata: %llu",
(unsigned long long) strlen(*output));
return pcmk_ok;
}
#if SUPPORT_NAGIOS
static int
nagios_get_metadata(const char *type, char **output)
{
int rc = pcmk_ok;
FILE *file_strm = NULL;
int start = 0, length = 0, read_len = 0;
char *metadata_file = crm_strdup_printf("%s/%s.xml",
NAGIOS_METADATA_DIR, type);
file_strm = fopen(metadata_file, "r");
if (file_strm == NULL) {
crm_err("Metadata file %s does not exist", metadata_file);
free(metadata_file);
return -EIO;
}
/* see how big the file is */
start = ftell(file_strm);
fseek(file_strm, 0L, SEEK_END);
length = ftell(file_strm);
fseek(file_strm, 0L, start);
CRM_ASSERT(length >= 0);
CRM_ASSERT(start == ftell(file_strm));
if (length <= 0) {
crm_info("%s was not valid", metadata_file);
free(*output);
*output = NULL;
rc = -EIO;
} else {
crm_trace("Reading %d bytes from file", length);
*output = calloc(1, (length + 1));
read_len = fread(*output, 1, length, file_strm);
if (read_len != length) {
crm_err("Calculated and read bytes differ: %d vs. %d",
length, read_len);
free(*output);
*output = NULL;
rc = -EIO;
}
}
fclose(file_strm);
free(metadata_file);
return rc;
}
#endif
#if SUPPORT_HEARTBEAT
/* strictly speaking, support for class=heartbeat style scripts
* does not require "heartbeat support" to be enabled.
* But since those scripts are part of the "heartbeat" package usually,
* and are very unlikely to be present in any other deployment,
* I leave it inside this ifdef.
*
* Yes, I know, these are legacy and should die,
* or at least be rewritten to be a proper OCF style agent.
* But they exist, and custom scripts following these rules do, too.
*
* Taken from the old "glue" lrmd, see
* http://hg.linux-ha.org/glue/file/0a7add1d9996/lib/plugins/lrm/raexechb.c#l49
* http://hg.linux-ha.org/glue/file/0a7add1d9996/lib/plugins/lrm/raexechb.c#l393
*/
static const char hb_metadata_template[] =
"<?xml version='1.0'?>\n"
"<!DOCTYPE resource-agent SYSTEM 'ra-api-1.dtd'>\n"
"<resource-agent name='%s' version='" PCMK_DEFAULT_AGENT_VERSION "'>\n"
"<version>1.0</version>\n"
"<longdesc lang='en'>\n"
"%s"
"</longdesc>\n"
"<shortdesc lang='en'>%s</shortdesc>\n"
"<parameters>\n"
"<parameter name='1' unique='1' required='0'>\n"
"<longdesc lang='en'>\n"
"This argument will be passed as the first argument to the "
"heartbeat resource agent (assuming it supports one)\n"
"</longdesc>\n"
"<shortdesc lang='en'>argv[1]</shortdesc>\n"
"<content type='string' default=' ' />\n"
"</parameter>\n"
"<parameter name='2' unique='1' required='0'>\n"
"<longdesc lang='en'>\n"
"This argument will be passed as the second argument to the "
"heartbeat resource agent (assuming it supports one)\n"
"</longdesc>\n"
"<shortdesc lang='en'>argv[2]</shortdesc>\n"
"<content type='string' default=' ' />\n"
"</parameter>\n"
"<parameter name='3' unique='1' required='0'>\n"
"<longdesc lang='en'>\n"
"This argument will be passed as the third argument to the "
"heartbeat resource agent (assuming it supports one)\n"
"</longdesc>\n"
"<shortdesc lang='en'>argv[3]</shortdesc>\n"
"<content type='string' default=' ' />\n"
"</parameter>\n"
"<parameter name='4' unique='1' required='0'>\n"
"<longdesc lang='en'>\n"
"This argument will be passed as the fourth argument to the "
"heartbeat resource agent (assuming it supports one)\n"
"</longdesc>\n"
"<shortdesc lang='en'>argv[4]</shortdesc>\n"
"<content type='string' default=' ' />\n"
"</parameter>\n"
"<parameter name='5' unique='1' required='0'>\n"
"<longdesc lang='en'>\n"
"This argument will be passed as the fifth argument to the "
"heartbeat resource agent (assuming it supports one)\n"
"</longdesc>\n"
"<shortdesc lang='en'>argv[5]</shortdesc>\n"
"<content type='string' default=' ' />\n"
"</parameter>\n"
"</parameters>\n"
"<actions>\n"
"<action name='start' timeout='15' />\n"
"<action name='stop' timeout='15' />\n"
"<action name='status' timeout='15' />\n"
"<action name='monitor' timeout='15' interval='15' start-delay='15' />\n"
"<action name='meta-data' timeout='5' />\n"
"</actions>\n"
"<special tag='heartbeat'>\n"
"</special>\n"
"</resource-agent>\n";
static int
heartbeat_get_metadata(const char *type, char **output)
{
*output = crm_strdup_printf(hb_metadata_template, type, type, type);
crm_trace("Created fake metadata: %llu",
(unsigned long long) strlen(*output));
return pcmk_ok;
}
#endif
static gboolean
action_get_metadata(svc_action_t *op)
{
const char *class = op->standard;
if (op->agent == NULL) {
crm_err("meta-data requested without specifying agent");
return FALSE;
}
if (class == NULL) {
crm_err("meta-data requested for agent %s without specifying class",
op->agent);
return FALSE;
}
if (!strcmp(class, PCMK_RESOURCE_CLASS_SERVICE)) {
class = resources_find_service_class(op->agent);
}
if (class == NULL) {
crm_err("meta-data requested for %s, but could not determine class",
op->agent);
return FALSE;
}
if (safe_str_eq(class, PCMK_RESOURCE_CLASS_LSB)) {
return (lsb_get_metadata(op->agent, &op->stdout_data) >= 0);
}
#if SUPPORT_NAGIOS
if (safe_str_eq(class, PCMK_RESOURCE_CLASS_NAGIOS)) {
return (nagios_get_metadata(op->agent, &op->stdout_data) >= 0);
}
#endif
#if SUPPORT_HEARTBEAT
if (safe_str_eq(class, PCMK_RESOURCE_CLASS_HB)) {
return (heartbeat_get_metadata(op->agent, &op->stdout_data) >= 0);
}
#endif
return action_exec_helper(op);
}
gboolean
services_action_sync(svc_action_t * op)
{
gboolean rc = TRUE;
if (op == NULL) {
crm_trace("No operation to execute");
return FALSE;
}
op->synchronous = true;
if (safe_str_eq(op->action, "meta-data")) {
/* Synchronous meta-data operations are handled specially. Since most
* resource classes don't provide any meta-data, it has to be
* synthesized from available information about the agent.
*
* services_action_async() doesn't treat meta-data actions specially, so
* it will result in an error for classes that don't support the action.
*/
rc = action_get_metadata(op);
} else {
rc = action_exec_helper(op);
}
crm_trace(" > %s_%s_%d: %s = %d",
op->rsc, op->action, op->interval, op->opaque->exec, op->rc);
if (op->stdout_data) {
crm_trace(" > stdout: %s", op->stdout_data);
}
if (op->stderr_data) {
crm_trace(" > stderr: %s", op->stderr_data);
}
return rc;
}
GList *
get_directory_list(const char *root, gboolean files, gboolean executable)
{
return services_os_get_directory_list(root, files, executable);
}
GList *
services_list(void)
{
return resources_list_agents(PCMK_RESOURCE_CLASS_LSB, NULL);
}
#if SUPPORT_HEARTBEAT
static GList *
resources_os_list_hb_agents(void)
{
return services_os_get_directory_list(HB_RA_DIR, TRUE, TRUE);
}
#endif
GList *
resources_list_standards(void)
{
GList *standards = NULL;
GList *agents = NULL;
standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_OCF));
standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_LSB));
standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_SERVICE));
#if SUPPORT_SYSTEMD
agents = systemd_unit_listall();
if (agents) {
standards = g_list_append(standards,
strdup(PCMK_RESOURCE_CLASS_SYSTEMD));
g_list_free_full(agents, free);
}
#endif
#if SUPPORT_UPSTART
agents = upstart_job_listall();
if (agents) {
standards = g_list_append(standards,
strdup(PCMK_RESOURCE_CLASS_UPSTART));
g_list_free_full(agents, free);
}
#endif
#if SUPPORT_NAGIOS
agents = resources_os_list_nagios_agents();
if (agents) {
standards = g_list_append(standards,
strdup(PCMK_RESOURCE_CLASS_NAGIOS));
g_list_free_full(agents, free);
}
#endif
#if SUPPORT_HEARTBEAT
standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_HB));
#endif
return standards;
}
GList *
resources_list_providers(const char *standard)
{
if (is_set(pcmk_get_ra_caps(standard), pcmk_ra_cap_provider)) {
return resources_os_list_ocf_providers();
}
return NULL;
}
GList *
resources_list_agents(const char *standard, const char *provider)
{
if ((standard == NULL)
|| (strcasecmp(standard, PCMK_RESOURCE_CLASS_SERVICE) == 0)) {
GList *tmp1;
GList *tmp2;
GList *result = resources_os_list_lsb_agents();
if (standard == NULL) {
tmp1 = result;
tmp2 = resources_os_list_ocf_agents(NULL);
if (tmp2) {
result = g_list_concat(tmp1, tmp2);
}
}
#if SUPPORT_SYSTEMD
tmp1 = result;
tmp2 = systemd_unit_listall();
if (tmp2) {
result = g_list_concat(tmp1, tmp2);
}
#endif
#if SUPPORT_UPSTART
tmp1 = result;
tmp2 = upstart_job_listall();
if (tmp2) {
result = g_list_concat(tmp1, tmp2);
}
#endif
return result;
} else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_OCF) == 0) {
return resources_os_list_ocf_agents(provider);
} else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_LSB) == 0) {
return resources_os_list_lsb_agents();
#if SUPPORT_HEARTBEAT
} else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_HB) == 0) {
return resources_os_list_hb_agents();
#endif
#if SUPPORT_SYSTEMD
} else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_SYSTEMD) == 0) {
return systemd_unit_listall();
#endif
#if SUPPORT_UPSTART
} else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_UPSTART) == 0) {
return upstart_job_listall();
#endif
#if SUPPORT_NAGIOS
} else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_NAGIOS) == 0) {
return resources_os_list_nagios_agents();
#endif
}
return NULL;
}
diff --git a/lib/services/services_linux.c b/lib/services/services_linux.c
index 705901e31c..2047b641fa 100644
--- a/lib/services/services_linux.c
+++ b/lib/services/services_linux.c
@@ -1,1032 +1,1059 @@
/*
* Copyright 2010-2019 Andrew Beekhof <andrew@beekhof.net>
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#ifndef _GNU_SOURCE
# define _GNU_SOURCE
#endif
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <errno.h>
#include <unistd.h>
#include <dirent.h>
#include <string.h>
#include <sys/time.h>
#include <sys/resource.h>
#ifdef HAVE_SYS_SIGNALFD_H
#include <sys/signalfd.h>
#endif
#include "crm/crm.h"
#include "crm/common/mainloop.h"
#include "crm/services.h"
#include "services_private.h"
#if SUPPORT_CIBSECRETS
# include "crm/common/cib_secrets.h"
#endif
static gboolean
svc_read_output(int fd, svc_action_t * op, bool is_stderr)
{
char *data = NULL;
int rc = 0, len = 0;
char buf[500];
static const size_t buf_read_len = sizeof(buf) - 1;
if (fd < 0) {
crm_trace("No fd for %s", op->id);
return FALSE;
}
if (is_stderr && op->stderr_data) {
len = strlen(op->stderr_data);
data = op->stderr_data;
crm_trace("Reading %s stderr into offset %d", op->id, len);
} else if (is_stderr == FALSE && op->stdout_data) {
len = strlen(op->stdout_data);
data = op->stdout_data;
crm_trace("Reading %s stdout into offset %d", op->id, len);
} else {
crm_trace("Reading %s %s into offset %d", op->id, is_stderr?"stderr":"stdout", len);
}
do {
rc = read(fd, buf, buf_read_len);
if (rc > 0) {
buf[rc] = 0;
crm_trace("Got %d chars: %.80s", rc, buf);
data = realloc_safe(data, len + rc + 1);
len += sprintf(data + len, "%s", buf);
} else if (errno != EINTR) {
/* error or EOF
* Cleanup happens in pipe_done()
*/
rc = FALSE;
break;
}
} while (rc == buf_read_len || rc < 0);
if (is_stderr) {
op->stderr_data = data;
} else {
op->stdout_data = data;
}
return rc;
}
static int
dispatch_stdout(gpointer userdata)
{
svc_action_t *op = (svc_action_t *) userdata;
return svc_read_output(op->opaque->stdout_fd, op, FALSE);
}
static int
dispatch_stderr(gpointer userdata)
{
svc_action_t *op = (svc_action_t *) userdata;
return svc_read_output(op->opaque->stderr_fd, op, TRUE);
}
static void
pipe_out_done(gpointer user_data)
{
svc_action_t *op = (svc_action_t *) user_data;
crm_trace("%p", op);
op->opaque->stdout_gsource = NULL;
if (op->opaque->stdout_fd > STDOUT_FILENO) {
close(op->opaque->stdout_fd);
}
op->opaque->stdout_fd = -1;
}
static void
pipe_err_done(gpointer user_data)
{
svc_action_t *op = (svc_action_t *) user_data;
op->opaque->stderr_gsource = NULL;
if (op->opaque->stderr_fd > STDERR_FILENO) {
close(op->opaque->stderr_fd);
}
op->opaque->stderr_fd = -1;
}
static struct mainloop_fd_callbacks stdout_callbacks = {
.dispatch = dispatch_stdout,
.destroy = pipe_out_done,
};
static struct mainloop_fd_callbacks stderr_callbacks = {
.dispatch = dispatch_stderr,
.destroy = pipe_err_done,
};
static void
set_ocf_env(const char *key, const char *value, gpointer user_data)
{
if (setenv(key, value, 1) != 0) {
crm_perror(LOG_ERR, "setenv failed for key:%s and value:%s", key, value);
}
}
static void
set_ocf_env_with_prefix(gpointer key, gpointer value, gpointer user_data)
{
char buffer[500];
snprintf(buffer, sizeof(buffer), "OCF_RESKEY_%s", (char *)key);
set_ocf_env(buffer, value, user_data);
}
+static void
+set_alert_env(gpointer key, gpointer value, gpointer user_data)
+{
+ int rc;
+
+ if (value != NULL) {
+ rc = setenv(key, value, 1);
+ } else {
+ rc = unsetenv(key);
+ }
+
+ if (rc < 0) {
+ crm_perror(LOG_ERR, "setenv %s=%s",
+ (char*)key, (value? (char*)value : ""));
+ } else {
+ crm_trace("setenv %s=%s", (char*)key, (value? (char*)value : ""));
+ }
+}
+
/*!
* \internal
* \brief Add environment variables suitable for an action
*
* \param[in] op Action to use
*/
static void
add_action_env_vars(const svc_action_t *op)
{
- if (safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_OCF) == FALSE) {
- return;
+ void (*env_setter)(gpointer, gpointer, gpointer) = NULL;
+ if (op->agent == NULL) {
+ env_setter = set_alert_env; /* we deal with alert handler */
+
+ } else if (safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_OCF)) {
+ env_setter = set_ocf_env_with_prefix;
}
- if (op->params) {
- g_hash_table_foreach(op->params, set_ocf_env_with_prefix, NULL);
+ if (env_setter != NULL && op->params != NULL) {
+ g_hash_table_foreach(op->params, env_setter, NULL);
+ }
+
+ if (env_setter == NULL || env_setter == set_alert_env) {
+ return;
}
set_ocf_env("OCF_RA_VERSION_MAJOR", "1", NULL);
set_ocf_env("OCF_RA_VERSION_MINOR", "0", NULL);
set_ocf_env("OCF_ROOT", OCF_ROOT_DIR, NULL);
set_ocf_env("OCF_EXIT_REASON_PREFIX", PCMK_OCF_REASON_PREFIX, NULL);
if (op->rsc) {
set_ocf_env("OCF_RESOURCE_INSTANCE", op->rsc, NULL);
}
if (op->agent != NULL) {
set_ocf_env("OCF_RESOURCE_TYPE", op->agent, NULL);
}
/* Notes: this is not added to specification yet. Sept 10,2004 */
if (op->provider != NULL) {
set_ocf_env("OCF_RESOURCE_PROVIDER", op->provider, NULL);
}
}
static void
pipe_in_single_parameter(gpointer key, gpointer value, gpointer user_data)
{
svc_action_t *op = user_data;
char *buffer = crm_strdup_printf("%s=%s\n", (char *)key, (char *) value);
int ret, total = 0, len = strlen(buffer);
do {
errno = 0;
ret = write(op->opaque->stdin_fd, buffer + total, len - total);
if (ret > 0) {
total += ret;
}
} while ((errno == EINTR) && (total < len));
free(buffer);
}
/*!
* \internal
* \brief Pipe parameters in via stdin for action
*
* \param[in] op Action to use
*/
static void
pipe_in_action_stdin_parameters(const svc_action_t *op)
{
crm_debug("sending args");
if (op->params) {
g_hash_table_foreach(op->params, pipe_in_single_parameter, (gpointer) op);
}
}
gboolean
recurring_action_timer(gpointer data)
{
svc_action_t *op = data;
crm_debug("Scheduling another invocation of %s", op->id);
/* Clean out the old result */
free(op->stdout_data);
op->stdout_data = NULL;
free(op->stderr_data);
op->stderr_data = NULL;
op->opaque->repeat_timer = 0;
services_action_async(op, NULL);
return FALSE;
}
/* Returns FALSE if 'op' should be free'd by the caller */
gboolean
operation_finalize(svc_action_t * op)
{
int recurring = 0;
if (op->interval) {
if (op->cancel) {
op->status = PCMK_LRM_OP_CANCELLED;
cancel_recurring_action(op);
} else {
recurring = 1;
op->opaque->repeat_timer = g_timeout_add(op->interval,
recurring_action_timer, (void *)op);
}
}
if (op->opaque->callback) {
op->opaque->callback(op);
}
op->pid = 0;
services_untrack_op(op);
if (!recurring && op->synchronous == FALSE) {
/*
* If this is a recurring action, do not free explicitly.
* It will get freed whenever the action gets cancelled.
*/
services_action_free(op);
return TRUE;
}
services_action_cleanup(op);
return FALSE;
}
static void
operation_finished(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)
{
svc_action_t *op = mainloop_child_userdata(p);
char *prefix = crm_strdup_printf("%s:%d", op->id, op->pid);
mainloop_clear_child_userdata(p);
op->status = PCMK_LRM_OP_DONE;
CRM_ASSERT(op->pid == pid);
crm_trace("%s %p %p", prefix, op->opaque->stderr_gsource, op->opaque->stdout_gsource);
if (op->opaque->stderr_gsource) {
/* Make sure we have read everything from the buffer.
* Depending on the priority mainloop gives the fd, operation_finished
* could occur before all the reads are done. Force the read now.*/
crm_trace("%s dispatching stderr", prefix);
dispatch_stderr(op);
crm_trace("%s: %p", op->id, op->stderr_data);
mainloop_del_fd(op->opaque->stderr_gsource);
op->opaque->stderr_gsource = NULL;
}
if (op->opaque->stdout_gsource) {
/* Make sure we have read everything from the buffer.
* Depending on the priority mainloop gives the fd, operation_finished
* could occur before all the reads are done. Force the read now.*/
crm_trace("%s dispatching stdout", prefix);
dispatch_stdout(op);
crm_trace("%s: %p", op->id, op->stdout_data);
mainloop_del_fd(op->opaque->stdout_gsource);
op->opaque->stdout_gsource = NULL;
}
if (op->opaque->stdin_fd >= 0) {
close(op->opaque->stdin_fd);
}
if (signo) {
if (mainloop_child_timeout(p)) {
crm_warn("%s - timed out after %dms", prefix, op->timeout);
op->status = PCMK_LRM_OP_TIMEOUT;
op->rc = PCMK_OCF_TIMEOUT;
} else if (op->cancel) {
/* If an in-flight recurring operation was killed because it was
* cancelled, don't treat that as a failure.
*/
crm_info("%s - terminated with signal %d", prefix, signo);
op->status = PCMK_LRM_OP_CANCELLED;
op->rc = PCMK_OCF_OK;
} else {
crm_warn("%s - terminated with signal %d", prefix, signo);
op->status = PCMK_LRM_OP_ERROR;
op->rc = PCMK_OCF_SIGNAL;
}
} else {
op->rc = exitcode;
crm_debug("%s - exited with rc=%d", prefix, exitcode);
}
free(prefix);
prefix = crm_strdup_printf("%s:%d:stderr", op->id, op->pid);
crm_log_output(LOG_NOTICE, prefix, op->stderr_data);
free(prefix);
prefix = crm_strdup_printf("%s:%d:stdout", op->id, op->pid);
crm_log_output(LOG_DEBUG, prefix, op->stdout_data);
free(prefix);
operation_finalize(op);
}
/*!
* \internal
* \brief Set operation rc and status per errno from stat(), fork() or execvp()
*
* \param[in,out] op Operation to set rc and status for
* \param[in] error Value of errno after system call
*
* \return void
*/
static void
services_handle_exec_error(svc_action_t * op, int error)
{
int rc_not_installed, rc_insufficient_priv, rc_exec_error;
/* Mimic the return codes for each standard as that's what we'll convert back from in get_uniform_rc() */
if (safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_LSB)
&& safe_str_eq(op->action, "status")) {
rc_not_installed = PCMK_LSB_STATUS_NOT_INSTALLED;
rc_insufficient_priv = PCMK_LSB_STATUS_INSUFFICIENT_PRIV;
rc_exec_error = PCMK_LSB_STATUS_UNKNOWN;
#if SUPPORT_NAGIOS
} else if (safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_NAGIOS)) {
rc_not_installed = NAGIOS_NOT_INSTALLED;
rc_insufficient_priv = NAGIOS_INSUFFICIENT_PRIV;
rc_exec_error = PCMK_OCF_EXEC_ERROR;
#endif
} else {
rc_not_installed = PCMK_OCF_NOT_INSTALLED;
rc_insufficient_priv = PCMK_OCF_INSUFFICIENT_PRIV;
rc_exec_error = PCMK_OCF_EXEC_ERROR;
}
switch (error) { /* see execve(2), stat(2) and fork(2) */
case ENOENT: /* No such file or directory */
case EISDIR: /* Is a directory */
case ENOTDIR: /* Path component is not a directory */
case EINVAL: /* Invalid executable format */
case ENOEXEC: /* Invalid executable format */
op->rc = rc_not_installed;
op->status = PCMK_LRM_OP_NOT_INSTALLED;
break;
case EACCES: /* permission denied (various errors) */
case EPERM: /* permission denied (various errors) */
op->rc = rc_insufficient_priv;
op->status = PCMK_LRM_OP_ERROR;
break;
default:
op->rc = rc_exec_error;
op->status = PCMK_LRM_OP_ERROR;
}
}
static void
action_launch_child(svc_action_t *op)
{
int lpc;
/* SIGPIPE is ignored (which is different from signal blocking) by the gnutls library.
* Depending on the libqb version in use, libqb may set SIGPIPE to be ignored as well.
* We do not want this to be inherited by the child process. By resetting this the signal
* to the default behavior, we avoid some potential odd problems that occur during OCF
* scripts when SIGPIPE is ignored by the environment. */
signal(SIGPIPE, SIG_DFL);
#if defined(HAVE_SCHED_SETSCHEDULER)
if (sched_getscheduler(0) != SCHED_OTHER) {
struct sched_param sp;
memset(&sp, 0, sizeof(sp));
sp.sched_priority = 0;
if (sched_setscheduler(0, SCHED_OTHER, &sp) == -1) {
crm_perror(LOG_ERR, "Could not reset scheduling policy to SCHED_OTHER for %s", op->id);
}
}
#endif
if (setpriority(PRIO_PROCESS, 0, 0) == -1) {
crm_perror(LOG_ERR, "Could not reset process priority to 0 for %s", op->id);
}
/* Man: The call setpgrp() is equivalent to setpgid(0,0)
* _and_ compiles on BSD variants too
* need to investigate if it works the same too.
*/
setpgid(0, 0);
/* close all descriptors except stdin/out/err and channels to logd */
for (lpc = getdtablesize() - 1; lpc > STDERR_FILENO; lpc--) {
close(lpc);
}
#if SUPPORT_CIBSECRETS
if (replace_secret_params(op->rsc, op->params) < 0) {
/* replacing secrets failed! */
if (safe_str_eq(op->action,"stop")) {
/* don't fail on stop! */
crm_info("proceeding with the stop operation for %s", op->rsc);
} else {
crm_err("failed to get secrets for %s, "
"considering resource not configured", op->rsc);
_exit(PCMK_OCF_NOT_CONFIGURED);
}
}
#endif
add_action_env_vars(op);
/* Become the desired user */
if (op->opaque->uid && (geteuid() == 0)) {
if (op->opaque->gid && (setgid(op->opaque->gid) < 0)) {
crm_perror(LOG_ERR, "setting group to %d", op->opaque->gid);
_exit(PCMK_OCF_NOT_CONFIGURED);
}
if (setuid(op->opaque->uid) < 0) {
crm_perror(LOG_ERR, "setting user to %d", op->opaque->uid);
_exit(PCMK_OCF_NOT_CONFIGURED);
}
/* We could do initgroups() here if we kept a copy of the username */
}
/* execute the RA */
execvp(op->opaque->exec, op->opaque->args);
/* Most cases should have been already handled by stat() */
services_handle_exec_error(op, errno);
_exit(op->rc);
}
#ifndef HAVE_SYS_SIGNALFD_H
static int sigchld_pipe[2] = { -1, -1 };
static void
sigchld_handler()
{
if ((sigchld_pipe[1] >= 0) && (write(sigchld_pipe[1], "", 1) == -1)) {
crm_perror(LOG_TRACE, "Could not poke SIGCHLD self-pipe");
}
}
#endif
static void
action_synced_wait(svc_action_t * op, sigset_t *mask)
{
int status = 0;
int timeout = op->timeout;
int sfd = -1;
time_t start = -1;
struct pollfd fds[3];
int wait_rc = 0;
#ifdef HAVE_SYS_SIGNALFD_H
sfd = signalfd(-1, mask, SFD_NONBLOCK);
if (sfd < 0) {
crm_perror(LOG_ERR, "signalfd() failed");
}
#else
sfd = sigchld_pipe[0];
#endif
fds[0].fd = op->opaque->stdout_fd;
fds[0].events = POLLIN;
fds[0].revents = 0;
fds[1].fd = op->opaque->stderr_fd;
fds[1].events = POLLIN;
fds[1].revents = 0;
fds[2].fd = sfd;
fds[2].events = POLLIN;
fds[2].revents = 0;
crm_trace("Waiting for %d", op->pid);
start = time(NULL);
do {
int poll_rc = poll(fds, 3, timeout);
if (poll_rc > 0) {
if (fds[0].revents & POLLIN) {
svc_read_output(op->opaque->stdout_fd, op, FALSE);
}
if (fds[1].revents & POLLIN) {
svc_read_output(op->opaque->stderr_fd, op, TRUE);
}
if (fds[2].revents & POLLIN) {
#ifdef HAVE_SYS_SIGNALFD_H
struct signalfd_siginfo fdsi;
ssize_t s;
s = read(sfd, &fdsi, sizeof(struct signalfd_siginfo));
if (s != sizeof(struct signalfd_siginfo)) {
crm_perror(LOG_ERR, "Read from signal fd %d failed", sfd);
} else if (fdsi.ssi_signo == SIGCHLD) {
#else
if (1) {
/* Clear out the sigchld pipe. */
char ch;
while (read(sfd, &ch, 1) == 1) /*omit*/;
#endif
wait_rc = waitpid(op->pid, &status, WNOHANG);
if (wait_rc > 0) {
break;
} else if (wait_rc < 0){
if (errno == ECHILD) {
/* Here, don't dare to kill and bail out... */
break;
} else {
/* ...otherwise pretend process still runs. */
wait_rc = 0;
}
crm_perror(LOG_ERR, "waitpid() for %d failed", op->pid);
}
}
}
} else if (poll_rc == 0) {
timeout = 0;
break;
} else if (poll_rc < 0) {
if (errno != EINTR) {
crm_perror(LOG_ERR, "poll() failed");
break;
}
}
timeout = op->timeout - (time(NULL) - start) * 1000;
} while ((op->timeout < 0 || timeout > 0));
crm_trace("Child done: %d", op->pid);
if (wait_rc <= 0) {
op->rc = PCMK_OCF_UNKNOWN_ERROR;
if (op->timeout > 0 && timeout <= 0) {
op->status = PCMK_LRM_OP_TIMEOUT;
crm_warn("%s:%d - timed out after %dms", op->id, op->pid, op->timeout);
} else {
op->status = PCMK_LRM_OP_ERROR;
}
/* If only child hasn't been successfully waited for, yet.
This is to limit killing wrong target a bit more. */
if (wait_rc == 0 && waitpid(op->pid, &status, WNOHANG) == 0) {
if (kill(op->pid, SIGKILL)) {
crm_err("kill(%d, KILL) failed: %d", op->pid, errno);
}
/* Safe to skip WNOHANG here as we sent non-ignorable signal. */
while (waitpid(op->pid, &status, 0) == (pid_t) -1 && errno == EINTR) /*omit*/;
}
} else if (WIFEXITED(status)) {
op->status = PCMK_LRM_OP_DONE;
op->rc = WEXITSTATUS(status);
crm_info("Managed %s process %d exited with rc=%d", op->id, op->pid, op->rc);
} else if (WIFSIGNALED(status)) {
int signo = WTERMSIG(status);
op->status = PCMK_LRM_OP_ERROR;
crm_err("Managed %s process %d exited with signal=%d", op->id, op->pid, signo);
}
#ifdef WCOREDUMP
if (WCOREDUMP(status)) {
crm_err("Managed %s process %d dumped core", op->id, op->pid);
}
#endif
svc_read_output(op->opaque->stdout_fd, op, FALSE);
svc_read_output(op->opaque->stderr_fd, op, TRUE);
close(op->opaque->stdout_fd);
close(op->opaque->stderr_fd);
if (op->opaque->stdin_fd >= 0) {
close(op->opaque->stdin_fd);
}
#ifdef HAVE_SYS_SIGNALFD_H
close(sfd);
#endif
}
/* For an asynchronous 'op', returns FALSE if 'op' should be free'd by the caller */
/* For a synchronous 'op', returns FALSE if 'op' fails */
gboolean
services_os_action_execute(svc_action_t * op)
{
int stdout_fd[2];
int stderr_fd[2];
int stdin_fd[2] = {-1, -1};
int rc;
struct stat st;
sigset_t *pmask;
#ifdef HAVE_SYS_SIGNALFD_H
sigset_t mask;
sigset_t old_mask;
#define sigchld_cleanup() do { \
if (sigismember(&old_mask, SIGCHLD) == 0) { \
if (sigprocmask(SIG_UNBLOCK, &mask, NULL) < 0) { \
crm_perror(LOG_ERR, "sigprocmask() failed to unblock sigchld"); \
} \
} \
} while (0)
#else
struct sigaction sa;
struct sigaction old_sa;
#define sigchld_cleanup() do { \
if (sigaction(SIGCHLD, &old_sa, NULL) < 0) { \
crm_perror(LOG_ERR, "sigaction() failed to remove sigchld handler"); \
} \
close(sigchld_pipe[0]); \
close(sigchld_pipe[1]); \
sigchld_pipe[0] = sigchld_pipe[1] = -1; \
} while(0)
#endif
/* Fail fast */
if(stat(op->opaque->exec, &st) != 0) {
rc = errno;
crm_warn("Cannot execute '%s': %s (%d)", op->opaque->exec, pcmk_strerror(rc), rc);
services_handle_exec_error(op, rc);
if (!op->synchronous) {
return operation_finalize(op);
}
return FALSE;
}
if (pipe(stdout_fd) < 0) {
rc = errno;
crm_err("pipe(stdout_fd) failed. '%s': %s (%d)", op->opaque->exec, pcmk_strerror(rc), rc);
services_handle_exec_error(op, rc);
if (!op->synchronous) {
return operation_finalize(op);
}
return FALSE;
}
if (pipe(stderr_fd) < 0) {
rc = errno;
close(stdout_fd[0]);
close(stdout_fd[1]);
crm_err("pipe(stderr_fd) failed. '%s': %s (%d)", op->opaque->exec, pcmk_strerror(rc), rc);
services_handle_exec_error(op, rc);
if (!op->synchronous) {
return operation_finalize(op);
}
return FALSE;
}
if (safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_STONITH)) {
if (pipe(stdin_fd) < 0) {
rc = errno;
close(stdout_fd[0]);
close(stdout_fd[1]);
close(stderr_fd[0]);
close(stderr_fd[1]);
crm_err("pipe(stdin_fd) failed. '%s': %s (%d)", op->opaque->exec, pcmk_strerror(rc), rc);
services_handle_exec_error(op, rc);
if (!op->synchronous) {
return operation_finalize(op);
}
return FALSE;
}
}
if (op->synchronous) {
#ifdef HAVE_SYS_SIGNALFD_H
sigemptyset(&mask);
sigaddset(&mask, SIGCHLD);
sigemptyset(&old_mask);
if (sigprocmask(SIG_BLOCK, &mask, &old_mask) < 0) {
crm_perror(LOG_ERR, "sigprocmask() failed to block sigchld");
}
pmask = &mask;
#else
if(pipe(sigchld_pipe) == -1) {
crm_perror(LOG_ERR, "pipe() failed");
}
rc = crm_set_nonblocking(sigchld_pipe[0]);
if (rc < 0) {
crm_warn("Could not set pipe input non-blocking: %s " CRM_XS " rc=%d",
pcmk_strerror(rc), rc);
}
rc = crm_set_nonblocking(sigchld_pipe[1]);
if (rc < 0) {
crm_warn("Could not set pipe output non-blocking: %s " CRM_XS " rc=%d",
pcmk_strerror(rc), rc);
}
sa.sa_handler = sigchld_handler;
sa.sa_flags = 0;
sigemptyset(&sa.sa_mask);
if (sigaction(SIGCHLD, &sa, &old_sa) < 0) {
crm_perror(LOG_ERR, "sigaction() failed to set sigchld handler");
}
pmask = NULL;
#endif
}
op->pid = fork();
switch (op->pid) {
case -1:
rc = errno;
close(stdout_fd[0]);
close(stdout_fd[1]);
close(stderr_fd[0]);
close(stderr_fd[1]);
if (stdin_fd[0] >= 0) {
close(stdin_fd[0]);
close(stdin_fd[1]);
}
crm_err("Could not execute '%s': %s (%d)", op->opaque->exec, pcmk_strerror(rc), rc);
services_handle_exec_error(op, rc);
if (!op->synchronous) {
return operation_finalize(op);
}
sigchld_cleanup();
return FALSE;
case 0: /* Child */
close(stdout_fd[0]);
close(stderr_fd[0]);
if (stdin_fd[1] >= 0) {
close(stdin_fd[1]);
}
if (STDOUT_FILENO != stdout_fd[1]) {
if (dup2(stdout_fd[1], STDOUT_FILENO) != STDOUT_FILENO) {
crm_err("dup2() failed (stdout)");
}
close(stdout_fd[1]);
}
if (STDERR_FILENO != stderr_fd[1]) {
if (dup2(stderr_fd[1], STDERR_FILENO) != STDERR_FILENO) {
crm_err("dup2() failed (stderr)");
}
close(stderr_fd[1]);
}
if ((stdin_fd[0] >= 0) &&
(STDIN_FILENO != stdin_fd[0])) {
if (dup2(stdin_fd[0], STDIN_FILENO) != STDIN_FILENO) {
crm_err("dup2() failed (stdin)");
}
close(stdin_fd[0]);
}
if (op->synchronous) {
sigchld_cleanup();
}
action_launch_child(op);
CRM_ASSERT(0); /* action_launch_child is effectively noreturn */
}
/* Only the parent reaches here */
close(stdout_fd[1]);
close(stderr_fd[1]);
if (stdin_fd[0] >= 0) {
close(stdin_fd[0]);
}
op->opaque->stdout_fd = stdout_fd[0];
rc = crm_set_nonblocking(op->opaque->stdout_fd);
if (rc < 0) {
crm_warn("Could not set child output non-blocking: %s "
CRM_XS " rc=%d",
pcmk_strerror(rc), rc);
}
op->opaque->stderr_fd = stderr_fd[0];
rc = crm_set_nonblocking(op->opaque->stderr_fd);
if (rc < 0) {
crm_warn("Could not set child error output non-blocking: %s "
CRM_XS " rc=%d",
pcmk_strerror(rc), rc);
}
op->opaque->stdin_fd = stdin_fd[1];
if (op->opaque->stdin_fd >= 0) {
// using buffer behind non-blocking-fd here - that could be improved
// as long as no other standard uses stdin_fd assume stonith
rc = crm_set_nonblocking(op->opaque->stdin_fd);
if (rc < 0) {
crm_warn("Could not set child input non-blocking: %s "
CRM_XS " fd=%d,rc=%d",
pcmk_strerror(rc), op->opaque->stdin_fd, rc);
}
pipe_in_action_stdin_parameters(op);
// as long as we are handling parameters directly in here just close
close(op->opaque->stdin_fd);
op->opaque->stdin_fd = -1;
}
// after fds are setup properly and before we plug anything into mainloop
if (op->opaque->fork_callback) {
op->opaque->fork_callback(op);
}
if (op->synchronous) {
action_synced_wait(op, pmask);
sigchld_cleanup();
} else {
crm_trace("Async waiting for %d - %s", op->pid, op->opaque->exec);
mainloop_child_add_with_flags(op->pid,
op->timeout,
op->id,
op,
(op->flags & SVC_ACTION_LEAVE_GROUP) ? mainloop_leave_pid_group : 0,
operation_finished);
op->opaque->stdout_gsource = mainloop_add_fd(op->id,
G_PRIORITY_LOW,
op->opaque->stdout_fd, op, &stdout_callbacks);
op->opaque->stderr_gsource = mainloop_add_fd(op->id,
G_PRIORITY_LOW,
op->opaque->stderr_fd, op, &stderr_callbacks);
services_add_inflight_op(op);
}
return TRUE;
}
GList *
services_os_get_directory_list(const char *root, gboolean files, gboolean executable)
{
GList *list = NULL;
struct dirent **namelist;
int entries = 0, lpc = 0;
char buffer[PATH_MAX];
entries = scandir(root, &namelist, NULL, alphasort);
if (entries <= 0) {
return list;
}
for (lpc = 0; lpc < entries; lpc++) {
struct stat sb;
if ('.' == namelist[lpc]->d_name[0]) {
free(namelist[lpc]);
continue;
}
snprintf(buffer, sizeof(buffer), "%s/%s", root, namelist[lpc]->d_name);
if (stat(buffer, &sb)) {
continue;
}
if (S_ISDIR(sb.st_mode)) {
if (files) {
free(namelist[lpc]);
continue;
}
} else if (S_ISREG(sb.st_mode)) {
if (files == FALSE) {
free(namelist[lpc]);
continue;
} else if (executable
&& (sb.st_mode & S_IXUSR) == 0
&& (sb.st_mode & S_IXGRP) == 0 && (sb.st_mode & S_IXOTH) == 0) {
free(namelist[lpc]);
continue;
}
}
list = g_list_append(list, strdup(namelist[lpc]->d_name));
free(namelist[lpc]);
}
free(namelist);
return list;
}
GList *
resources_os_list_lsb_agents(void)
{
return get_directory_list(LSB_ROOT_DIR, TRUE, TRUE);
}
GList *
resources_os_list_ocf_providers(void)
{
return get_directory_list(OCF_ROOT_DIR "/resource.d", FALSE, TRUE);
}
GList *
resources_os_list_ocf_agents(const char *provider)
{
GList *gIter = NULL;
GList *result = NULL;
GList *providers = NULL;
if (provider) {
char buffer[500];
snprintf(buffer, sizeof(buffer), "%s/resource.d/%s", OCF_ROOT_DIR, provider);
return get_directory_list(buffer, TRUE, TRUE);
}
providers = resources_os_list_ocf_providers();
for (gIter = providers; gIter != NULL; gIter = gIter->next) {
GList *tmp1 = result;
GList *tmp2 = resources_os_list_ocf_agents(gIter->data);
if (tmp2) {
result = g_list_concat(tmp1, tmp2);
}
}
g_list_free_full(providers, free);
return result;
}
#if SUPPORT_NAGIOS
GList *
resources_os_list_nagios_agents(void)
{
GList *plugin_list = NULL;
GList *result = NULL;
GList *gIter = NULL;
plugin_list = get_directory_list(NAGIOS_PLUGIN_DIR, TRUE, TRUE);
/* Make sure both the plugin and its metadata exist */
for (gIter = plugin_list; gIter != NULL; gIter = gIter->next) {
const char *plugin = gIter->data;
char *metadata = crm_strdup_printf(NAGIOS_METADATA_DIR "/%s.xml", plugin);
struct stat st;
if (stat(metadata, &st) == 0) {
result = g_list_append(result, strdup(plugin));
}
free(metadata);
}
g_list_free_full(plugin_list, free);
return result;
}
#endif
diff --git a/mcp/corosync.c b/mcp/corosync.c
index 7502da7643..407a63f3c2 100644
--- a/mcp/corosync.c
+++ b/mcp/corosync.c
@@ -1,490 +1,530 @@
/*
- * Copyright (C) 2010 Andrew Beekhof <andrew@beekhof.net>
+ * Copyright 2010-2019 the Pacemaker project contributors
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
+ * The version control history for this file may have further details.
*
- * This software is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <pacemaker.h>
#include <sys/utsname.h>
#include <sys/stat.h> /* for calls to stat() */
#include <libgen.h> /* For basename() and dirname() */
#include <sys/types.h>
#include <pwd.h> /* For getpwname() */
#include <corosync/hdb.h>
#include <corosync/cfg.h>
#include <corosync/cpg.h>
#if HAVE_CONFDB
# include <corosync/confdb.h>
#endif
#include <crm/cluster/internal.h>
+#include <crm/common/ipc.h> /* for crm_ipc_is_authentic_process */
#include <crm/common/mainloop.h>
+#include <crm/common/ipc_internal.h> /* PCMK__SPECIAL_PID* */
+
#if SUPPORT_CMAN
# include <libcman.h>
#endif
#if HAVE_CMAP
# include <corosync/cmap.h>
#endif
enum cluster_type_e stack = pcmk_cluster_unknown;
static corosync_cfg_handle_t cfg_handle;
/* =::=::=::= CFG - Shutdown stuff =::=::=::= */
static void
cfg_shutdown_callback(corosync_cfg_handle_t h, corosync_cfg_shutdown_flags_t flags)
{
crm_info("Corosync wants to shut down: %s",
(flags == COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE) ? "immediate" :
(flags == COROSYNC_CFG_SHUTDOWN_FLAG_REGARDLESS) ? "forced" : "optional");
/* Never allow corosync to shut down while we're running */
corosync_cfg_replyto_shutdown(h, COROSYNC_CFG_SHUTDOWN_FLAG_NO);
}
static corosync_cfg_callbacks_t cfg_callbacks = {
.corosync_cfg_shutdown_callback = cfg_shutdown_callback,
};
static int
pcmk_cfg_dispatch(gpointer user_data)
{
corosync_cfg_handle_t *handle = (corosync_cfg_handle_t *) user_data;
cs_error_t rc = corosync_cfg_dispatch(*handle, CS_DISPATCH_ALL);
if (rc != CS_OK) {
return -1;
}
return 0;
}
static void
cfg_connection_destroy(gpointer user_data)
{
crm_err("Connection destroyed");
cfg_handle = 0;
pcmk_shutdown(SIGTERM);
}
gboolean
cluster_disconnect_cfg(void)
{
if (cfg_handle) {
corosync_cfg_finalize(cfg_handle);
cfg_handle = 0;
}
pcmk_shutdown(SIGTERM);
return TRUE;
}
#define cs_repeat(counter, max, code) do { \
code; \
if(rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { \
counter++; \
crm_debug("Retrying operation after %ds", counter); \
sleep(counter); \
} else { \
break; \
} \
} while(counter < max)
gboolean
cluster_connect_cfg(uint32_t * nodeid)
{
cs_error_t rc;
- int fd = 0, retries = 0;
+ int fd = -1, retries = 0, rv;
+ uid_t found_uid = 0;
+ gid_t found_gid = 0;
+ pid_t found_pid = 0;
static struct mainloop_fd_callbacks cfg_fd_callbacks = {
.dispatch = pcmk_cfg_dispatch,
.destroy = cfg_connection_destroy,
};
cs_repeat(retries, 30, rc = corosync_cfg_initialize(&cfg_handle, &cfg_callbacks));
if (rc != CS_OK) {
- crm_err("corosync cfg init error %d", rc);
+ crm_err("corosync cfg init: %s (%d)", cs_strerror(rc), rc);
return FALSE;
}
rc = corosync_cfg_fd_get(cfg_handle, &fd);
if (rc != CS_OK) {
- crm_err("corosync cfg fd_get error %d", rc);
+ crm_err("corosync cfg fd_get: %s (%d)", cs_strerror(rc), rc);
+ goto bail;
+ }
+
+ /* CFG provider run as root (in given user namespace, anyway)? */
+ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
+ &found_uid, &found_gid))) {
+ crm_err("CFG provider is not authentic:"
+ " process %lld (uid: %lld, gid: %lld)",
+ (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
+ (long long) found_uid, (long long) found_gid);
+ goto bail;
+ } else if (rv < 0) {
+ crm_err("Could not verify authenticity of CFG provider: %s (%d)",
+ strerror(-rv), -rv);
goto bail;
}
retries = 0;
cs_repeat(retries, 30, rc = corosync_cfg_local_get(cfg_handle, nodeid));
if (rc != CS_OK) {
crm_err("corosync cfg local_get error %d", rc);
goto bail;
}
crm_debug("Our nodeid: %d", *nodeid);
mainloop_add_fd("corosync-cfg", G_PRIORITY_DEFAULT, fd, &cfg_handle, &cfg_fd_callbacks);
return TRUE;
bail:
corosync_cfg_finalize(cfg_handle);
return FALSE;
}
/* =::=::=::= Configuration =::=::=::= */
#if HAVE_CONFDB
static int
get_config_opt(confdb_handle_t config,
hdb_handle_t object_handle, const char *key, char **value, const char *fallback)
{
size_t len = 0;
char *env_key = NULL;
const char *env_value = NULL;
char buffer[256];
if (*value) {
free(*value);
*value = NULL;
}
if (object_handle > 0) {
if (CS_OK == confdb_key_get(config, object_handle, key, strlen(key), &buffer, &len)) {
*value = strdup(buffer);
}
}
if (*value) {
crm_info("Found '%s' for option: %s", *value, key);
return 0;
}
env_key = crm_concat("HA", key, '_');
env_value = getenv(env_key);
free(env_key);
if (*value) {
crm_info("Found '%s' in ENV for option: %s", *value, key);
*value = strdup(env_value);
return 0;
}
if (fallback) {
crm_info("Defaulting to '%s' for option: %s", fallback, key);
*value = strdup(fallback);
} else {
crm_info("No default for option: %s", key);
}
return -1;
}
static confdb_handle_t
config_find_init(confdb_handle_t config)
{
cs_error_t rc = CS_OK;
confdb_handle_t local_handle = OBJECT_PARENT_HANDLE;
rc = confdb_object_find_start(config, local_handle);
if (rc == CS_OK) {
return local_handle;
} else {
crm_err("Couldn't create search context: %d", rc);
}
return 0;
}
static hdb_handle_t
config_find_next(confdb_handle_t config, const char *name, confdb_handle_t top_handle)
{
cs_error_t rc = CS_OK;
hdb_handle_t local_handle = 0;
if (top_handle == 0) {
crm_err("Couldn't search for %s: no valid context", name);
return 0;
}
crm_trace("Searching for %s in " HDB_X_FORMAT, name, top_handle);
rc = confdb_object_find(config, top_handle, name, strlen(name), &local_handle);
if (rc != CS_OK) {
crm_info("No additional configuration supplied for: %s", name);
local_handle = 0;
} else {
crm_info("Processing additional %s options...", name);
}
return local_handle;
}
#else
static int
get_config_opt(uint64_t unused, cmap_handle_t object_handle, const char *key, char **value,
const char *fallback)
{
int rc = 0, retries = 0;
cs_repeat(retries, 5, rc = cmap_get_string(object_handle, key, value));
if (rc != CS_OK) {
crm_trace("Search for %s failed %d, defaulting to %s", key, rc, fallback);
if (fallback) {
*value = strdup(fallback);
} else {
*value = NULL;
}
}
crm_trace("%s: %s", key, *value);
return rc;
}
#endif
#if HAVE_CONFDB
# define KEY_PREFIX ""
#elif HAVE_CMAP
# define KEY_PREFIX "logging."
#endif
gboolean
mcp_read_config(void)
{
- int rc = CS_OK;
+ cs_error_t rc = CS_OK;
int retries = 0;
const char *const_value = NULL;
#if HAVE_CONFDB
char *value = NULL;
confdb_handle_t config = 0;
confdb_handle_t top_handle = 0;
hdb_handle_t local_handle;
static confdb_callbacks_t callbacks = { };
do {
rc = confdb_initialize(&config, &callbacks);
if (rc != CS_OK) {
retries++;
printf("confdb connection setup failed: %s. Retrying in %ds\n", ais_error2text(rc), retries);
crm_info("confdb connection setup failed: %s. Retrying in %ds", ais_error2text(rc), retries);
sleep(retries);
} else {
break;
}
-
} while (retries < 5);
+
#elif HAVE_CMAP
cmap_handle_t local_handle;
uint64_t config = 0;
+ int fd = -1;
+ uid_t found_uid = 0;
+ gid_t found_gid = 0;
+ pid_t found_pid = 0;
+ int rv;
/* There can be only one (possibility if confdb isn't around) */
do {
rc = cmap_initialize(&local_handle);
if (rc != CS_OK) {
retries++;
printf("cmap connection setup failed: %s. Retrying in %ds\n", cs_strerror(rc), retries);
crm_info("cmap connection setup failed: %s. Retrying in %ds", cs_strerror(rc), retries);
sleep(retries);
} else {
break;
}
} while (retries < 5);
#endif
if (rc != CS_OK) {
printf("Could not connect to Cluster Configuration Database API, error %d\n", rc);
crm_warn("Could not connect to Cluster Configuration Database API, error %d", rc);
return FALSE;
}
+ rc = cmap_fd_get(local_handle, &fd);
+ if (rc != CS_OK) {
+ crm_err("Could not obtain the CMAP API connection: %s (%d)",
+ cs_strerror(rc), rc);
+ cmap_finalize(local_handle);
+ return FALSE;
+ }
+
+ /* CMAP provider run as root (in given user namespace, anyway)? */
+ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
+ &found_uid, &found_gid))) {
+ crm_err("CMAP provider is not authentic:"
+ " process %lld (uid: %lld, gid: %lld)",
+ (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
+ (long long) found_uid, (long long) found_gid);
+ cmap_finalize(local_handle);
+ return FALSE;
+ } else if (rv < 0) {
+ crm_err("Could not verify authenticity of CMAP provider: %s (%d)",
+ strerror(-rv), -rv);
+ cmap_finalize(local_handle);
+ return FALSE;
+ }
+
stack = get_cluster_type();
crm_info("Reading configure for stack: %s", name_for_cluster_type(stack));
/* =::=::= Should we be here =::=::= */
if (stack == pcmk_cluster_corosync) {
set_daemon_option("cluster_type", "corosync");
set_daemon_option("quorum_type", "corosync");
#if HAVE_CONFDB
} else if (stack == pcmk_cluster_cman) {
set_daemon_option("cluster_type", "cman");
set_daemon_option("quorum_type", "cman");
enable_crmd_as_root(TRUE);
} else if (stack == pcmk_cluster_classic_ais) {
set_daemon_option("cluster_type", "openais");
set_daemon_option("quorum_type", "pcmk");
/* Look for a service block to indicate our plugin is loaded */
top_handle = config_find_init(config);
local_handle = config_find_next(config, "service", top_handle);
while (local_handle) {
get_config_opt(config, local_handle, "name", &value, NULL);
if (safe_str_eq("pacemaker", value)) {
get_config_opt(config, local_handle, "ver", &value, "0");
if (safe_str_eq(value, "1")) {
get_config_opt(config, local_handle, "use_logd", &value, "no");
set_daemon_option("use_logd", value);
set_daemon_option("LOGD", value);
get_config_opt(config, local_handle, "use_mgmtd", &value, "no");
enable_mgmtd(crm_is_true(value));
} else {
crm_err("We can only start Pacemaker from init if using version 1"
" of the Pacemaker plugin for Corosync. Terminating.");
crm_exit(DAEMON_RESPAWN_STOP);
}
break;
}
local_handle = config_find_next(config, "service", top_handle);
}
free(value);
#endif
} else {
crm_err("Unsupported stack type: %s", name_for_cluster_type(stack));
return FALSE;
}
#if HAVE_CONFDB
top_handle = config_find_init(config);
local_handle = config_find_next(config, "logging", top_handle);
#endif
/* =::=::= Logging =::=::= */
if (daemon_option("debug")) {
/* Syslog logging is already setup by crm_log_init() */
} else {
/* Check corosync */
char *debug_enabled = NULL;
get_config_opt(config, local_handle, KEY_PREFIX "debug", &debug_enabled, "off");
if (crm_is_true(debug_enabled)) {
set_daemon_option("debug", "1");
if (get_crm_log_level() < LOG_DEBUG) {
set_crm_log_level(LOG_DEBUG);
}
} else {
set_daemon_option("debug", "0");
}
free(debug_enabled);
}
/* If the user didn't explicitly configure a Pacemaker log file, check
* whether they configured a heartbeat or corosync log file, and use that.
*
* @COMPAT This should all go away, and we should just rely on the logging
* set up by crm_log_init(). We aren't doing this yet because it is a
* significant user-visible change that will need to be publicized.
*/
const_value = daemon_option("debugfile");
if (daemon_option("logfile")) {
/* File logging is already setup by crm_log_init() */
} else if(const_value) {
/* From when we cared what options heartbeat used */
set_daemon_option("logfile", const_value);
crm_add_logfile(const_value);
} else {
/* Check corosync */
char *logfile = NULL;
char *logfile_enabled = NULL;
get_config_opt(config, local_handle, KEY_PREFIX "to_logfile", &logfile_enabled, "on");
get_config_opt(config, local_handle, KEY_PREFIX "logfile", &logfile, "/var/log/pacemaker.log");
if (crm_is_true(logfile_enabled) == FALSE) {
crm_trace("File logging disabled in corosync");
} else if (crm_add_logfile(logfile)) {
set_daemon_option("logfile", logfile);
} else {
crm_err("Couldn't create logfile: %s", logfile);
set_daemon_option("logfile", "none");
}
free(logfile);
free(logfile_enabled);
}
if (daemon_option("logfacility")) {
/* Syslog logging is already setup by crm_log_init() */
} else {
/* Check corosync */
char *syslog_enabled = NULL;
char *syslog_facility = NULL;
get_config_opt(config, local_handle, KEY_PREFIX "to_syslog", &syslog_enabled, "on");
get_config_opt(config, local_handle, KEY_PREFIX "syslog_facility", &syslog_facility, "daemon");
if (crm_is_true(syslog_enabled) == FALSE) {
qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, QB_FALSE);
set_daemon_option("logfacility", "none");
} else {
qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_FACILITY, qb_log_facility2int(syslog_facility));
qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, QB_TRUE);
set_daemon_option("logfacility", syslog_facility);
}
free(syslog_enabled);
free(syslog_facility);
}
const_value = daemon_option("logfacility");
if (const_value) {
/* cluster-glue module needs HA_LOGFACILITY */
setenv("HA_LOGFACILITY", const_value, 1);
}
#if HAVE_CONFDB
confdb_finalize(config);
#elif HAVE_CMAP
if(local_handle){
gid_t gid = 0;
if (crm_user_lookup(CRM_DAEMON_USER, NULL, &gid) < 0) {
crm_warn("Could not authorize group with corosync " CRM_XS
" No group found for user %s", CRM_DAEMON_USER);
} else {
char key[PATH_MAX];
snprintf(key, PATH_MAX, "uidgid.gid.%u", gid);
rc = cmap_set_uint8(local_handle, key, 1);
if (rc != CS_OK) {
crm_warn("Could not authorize group with corosync "CRM_XS
" group=%u rc=%d (%s)", gid, rc, ais_error2text(rc));
}
}
}
cmap_finalize(local_handle);
#endif
return TRUE;
}
diff --git a/mcp/pacemaker.c b/mcp/pacemaker.c
index 2986be6515..86df216bc8 100644
--- a/mcp/pacemaker.c
+++ b/mcp/pacemaker.c
@@ -1,1163 +1,1456 @@
/*
- * Copyright 2010-2018 Andrew Beekhof <andrew@beekhof.net>
+ * Copyright 2010-2019 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <pacemaker.h>
#include <pwd.h>
#include <grp.h>
+#include <poll.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/reboot.h>
+#include <crm/crm.h> /* indirectly: CRM_EX_* */
+#include <crm/cib/internal.h> /* cib_channel_ro */
#include <crm/msg_xml.h>
#include <crm/common/ipcs.h>
#include <crm/common/mainloop.h>
#include <crm/cluster/internal.h>
#include <crm/cluster.h>
+
+#include <crm/common/ipc_internal.h> /* PCMK__SPECIAL_PID*, ... */
+
#ifdef SUPPORT_COROSYNC
#include <corosync/cfg.h>
#endif
#include <dirent.h>
#include <ctype.h>
gboolean pcmk_quorate = FALSE;
gboolean fatal_error = FALSE;
GMainLoop *mainloop = NULL;
+static bool global_keep_tracking = false;
#define PCMK_PROCESS_CHECK_INTERVAL 5
const char *local_name = NULL;
uint32_t local_nodeid = 0;
crm_trigger_t *shutdown_trigger = NULL;
const char *pid_file = "/var/run/pacemaker.pid";
typedef struct pcmk_child_s {
int pid;
long flag;
int start_seq;
int respawn_count;
gboolean respawn;
const char *name;
const char *uid;
const char *command;
+ const char *endpoint; /* IPC server name */
gboolean active_before_startup;
} pcmk_child_t;
/* Index into the array below */
#define pcmk_child_crmd 4
#define pcmk_child_mgmtd 8
/* *INDENT-OFF* */
static pcmk_child_t pcmk_children[] = {
{ 0, crm_proc_none, 0, 0, FALSE, "none", NULL, NULL },
{ 0, crm_proc_plugin, 0, 0, FALSE, "ais", NULL, NULL },
- { 0, crm_proc_lrmd, 3, 0, TRUE, "lrmd", NULL, CRM_DAEMON_DIR"/lrmd" },
- { 0, crm_proc_cib, 1, 0, TRUE, "cib", CRM_DAEMON_USER, CRM_DAEMON_DIR"/cib" },
- { 0, crm_proc_crmd, 6, 0, TRUE, "crmd", CRM_DAEMON_USER, CRM_DAEMON_DIR"/crmd" },
- { 0, crm_proc_attrd, 4, 0, TRUE, "attrd", CRM_DAEMON_USER, CRM_DAEMON_DIR"/attrd" },
- { 0, crm_proc_stonithd, 0, 0, TRUE, "stonithd", NULL, NULL },
- { 0, crm_proc_pe, 5, 0, TRUE, "pengine", CRM_DAEMON_USER, CRM_DAEMON_DIR"/pengine" },
- { 0, crm_proc_mgmtd, 0, 0, TRUE, "mgmtd", NULL, HB_DAEMON_DIR"/mgmtd" },
- { 0, crm_proc_stonith_ng, 2, 0, TRUE, "stonith-ng", NULL, CRM_DAEMON_DIR"/stonithd" },
+ { 0, crm_proc_lrmd, 3, 0, TRUE, "lrmd", NULL, CRM_DAEMON_DIR"/lrmd",
+ CRM_SYSTEM_LRMD
+ },
+ { 0, crm_proc_cib, 1, 0, TRUE, "cib", CRM_DAEMON_USER, CRM_DAEMON_DIR"/cib",
+ cib_channel_ro
+ },
+ { 0, crm_proc_crmd, 6, 0, TRUE, "crmd", CRM_DAEMON_USER, CRM_DAEMON_DIR"/crmd",
+ CRM_SYSTEM_CRMD
+ },
+ { 0, crm_proc_attrd, 4, 0, TRUE, "attrd", CRM_DAEMON_USER, CRM_DAEMON_DIR"/attrd",
+ T_ATTRD
+ },
+ { 0, crm_proc_stonithd, 0, 0, TRUE, "stonithd", NULL, NULL,
+ NULL
+ },
+ { 0, crm_proc_pe, 5, 0, TRUE, "pengine", CRM_DAEMON_USER, CRM_DAEMON_DIR"/pengine",
+ CRM_SYSTEM_PENGINE
+ },
+ { 0, crm_proc_mgmtd, 0, 0, TRUE, "mgmtd", NULL, HB_DAEMON_DIR"/mgmtd",
+ NULL
+ },
+ { 0, crm_proc_stonith_ng, 2, 0, TRUE, "stonith-ng", NULL, CRM_DAEMON_DIR"/stonithd",
+ "stonith-ng"
+ },
};
/* *INDENT-ON* */
+static gboolean check_active_before_startup_processes(gpointer user_data);
+static int pcmk_child_active(pcmk_child_t *child);
static gboolean start_child(pcmk_child_t * child);
void update_process_clients(crm_client_t *client);
void update_process_peers(void);
void
enable_crmd_as_root(gboolean enable)
{
if (enable) {
pcmk_children[pcmk_child_crmd].uid = NULL;
} else {
pcmk_children[pcmk_child_crmd].uid = CRM_DAEMON_USER;
}
}
void
enable_mgmtd(gboolean enable)
{
if (enable) {
pcmk_children[pcmk_child_mgmtd].start_seq = 7;
} else {
pcmk_children[pcmk_child_mgmtd].start_seq = 0;
}
}
static uint32_t
get_process_list(void)
{
int lpc = 0;
uint32_t procs = crm_get_cluster_proc();
for (lpc = 0; lpc < SIZEOF(pcmk_children); lpc++) {
if (pcmk_children[lpc].pid != 0) {
procs |= pcmk_children[lpc].flag;
}
}
return procs;
}
static void
pcmk_process_exit(pcmk_child_t * child)
{
child->pid = 0;
child->active_before_startup = FALSE;
/* Broadcast the fact that one of our processes died ASAP
*
* Try to get some logging of the cause out first though
* because we're probably about to get fenced
*
* Potentially do this only if respawn_count > N
* to allow for local recovery
*/
update_node_processes(local_nodeid, NULL, get_process_list());
child->respawn_count += 1;
if (child->respawn_count > MAX_RESPAWN) {
crm_err("Child respawn count exceeded by %s", child->name);
child->respawn = FALSE;
}
if (shutdown_trigger) {
+ /* resume step-wise shutdown (returned TRUE yields no parallelizing) */
mainloop_set_trigger(shutdown_trigger);
+ /* intended to speed up propagating expected lay-off of the daemons? */
update_node_processes(local_nodeid, NULL, get_process_list());
- } else if (child->respawn && crm_is_true(getenv("PCMK_fail_fast"))) {
+ } else if (!child->respawn) {
+ /* nothing to do */
+
+ } else if (crm_is_true(getenv("PCMK_fail_fast"))) {
crm_err("Rebooting system because of %s", child->name);
pcmk_panic(__FUNCTION__);
- } else if (child->respawn) {
+ } else if (pcmk_child_active(child) == 1) {
+ crm_warn("One-off suppressing strict respawning of a child process %s,"
+ " appears alright per %s IPC end-point",
+ child->name, child->endpoint);
+ /* need to monitor how it evolves, and start new process if badly */
+ child->active_before_startup = TRUE;
+ if (!global_keep_tracking) {
+ global_keep_tracking = true;
+ g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL,
+ check_active_before_startup_processes, NULL);
+ }
+
+ } else {
crm_notice("Respawning failed child process: %s", child->name);
start_child(child);
}
}
static void pcmk_exit_with_cluster(int exitcode)
{
#ifdef SUPPORT_COROSYNC
corosync_cfg_handle_t cfg_handle;
cs_error_t err;
if (exitcode == DAEMON_RESPAWN_STOP) {
crm_info("Asking Corosync to shut down");
err = corosync_cfg_initialize(&cfg_handle, NULL);
if (err != CS_OK) {
crm_warn("Unable to open handle to corosync to close it down. err=%d", err);
}
err = corosync_cfg_try_shutdown(cfg_handle, COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE);
if (err != CS_OK) {
crm_warn("Corosync shutdown failed. err=%d", err);
}
corosync_cfg_finalize(cfg_handle);
}
#endif
crm_exit(exitcode);
}
static void
pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)
{
pcmk_child_t *child = mainloop_child_userdata(p);
const char *name = mainloop_child_name(p);
if (signo && signo == SIGKILL) {
crm_warn("The %s process (%d) terminated with signal %d (core=%d)", name, pid, signo, core);
} else if (signo) {
crm_err("The %s process (%d) terminated with signal %d (core=%d)", name, pid, signo, core);
} else {
switch(exitcode) {
case pcmk_ok:
crm_info("The %s process (%d) exited: %s (%d)", name, pid, pcmk_strerror(exitcode), exitcode);
break;
case DAEMON_RESPAWN_STOP:
crm_warn("The %s process (%d) can no longer be respawned, shutting the cluster down.", name, pid);
child->respawn = FALSE;
fatal_error = TRUE;
pcmk_shutdown(SIGTERM);
break;
case pcmk_err_panic:
do_crm_log_always(LOG_EMERG, "The %s process (%d) instructed the machine to reset", name, pid);
child->respawn = FALSE;
fatal_error = TRUE;
pcmk_panic(__FUNCTION__);
pcmk_shutdown(SIGTERM);
break;
default:
crm_err("The %s process (%d) exited: %s (%d)", name, pid, pcmk_strerror(exitcode), exitcode);
break;
}
}
pcmk_process_exit(child);
}
static gboolean
stop_child(pcmk_child_t * child, int signal)
{
if (signal == 0) {
signal = SIGTERM;
}
- if (child->command == NULL) {
- crm_debug("Nothing to do for child \"%s\"", child->name);
+ /* why to skip PID of 1?
+ - FreeBSD ~ how untrackable process behind IPC is masqueraded as
+ - elsewhere: how "init" task is designated; in particular, in systemd
+ arrangement of socket-based activation, this is pretty real */
+ if (child->command == NULL || child->pid == PCMK__SPECIAL_PID) {
+ crm_debug("Nothing to do for child \"%s\" (process %lld)",
+ child->name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid));
return TRUE;
}
if (child->pid <= 0) {
crm_trace("Client %s not running", child->name);
return TRUE;
}
errno = 0;
if (kill(child->pid, signal) == 0) {
crm_notice("Stopping %s "CRM_XS" sent signal %d to process %d",
child->name, signal, child->pid);
} else {
crm_perror(LOG_ERR, "Could not stop %s (process %d) with signal %d",
child->name, child->pid, signal);
}
return TRUE;
}
static char *opts_default[] = { NULL, NULL };
static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL };
+/* TODO once libqb is taught to juggle with IPC end-points carried over as
+ bare file descriptor (https://github.com/ClusterLabs/libqb/issues/325)
+ it shall hand over these descriptors here if/once they are successfully
+ pre-opened in (presumably) pcmk_child_active, to avoid any remaining
+ room for races */
static gboolean
start_child(pcmk_child_t * child)
{
int lpc = 0;
uid_t uid = 0;
gid_t gid = 0;
struct rlimit oflimits;
gboolean use_valgrind = FALSE;
gboolean use_callgrind = FALSE;
const char *devnull = "/dev/null";
const char *env_valgrind = getenv("PCMK_valgrind_enabled");
const char *env_callgrind = getenv("PCMK_callgrind_enabled");
enum cluster_type_e stack = get_cluster_type();
child->active_before_startup = FALSE;
if (child->command == NULL) {
crm_info("Nothing to do for child \"%s\"", child->name);
return TRUE;
}
if (env_callgrind != NULL && crm_is_true(env_callgrind)) {
use_callgrind = TRUE;
use_valgrind = TRUE;
} else if (env_callgrind != NULL && strstr(env_callgrind, child->name)) {
use_callgrind = TRUE;
use_valgrind = TRUE;
} else if (env_valgrind != NULL && crm_is_true(env_valgrind)) {
use_valgrind = TRUE;
} else if (env_valgrind != NULL && strstr(env_valgrind, child->name)) {
use_valgrind = TRUE;
}
if (use_valgrind && strlen(VALGRIND_BIN) == 0) {
crm_warn("Cannot enable valgrind for %s:"
" The location of the valgrind binary is unknown", child->name);
use_valgrind = FALSE;
}
if (child->uid) {
if (crm_user_lookup(child->uid, &uid, &gid) < 0) {
crm_err("Invalid user (%s) for %s: not found", child->uid, child->name);
return FALSE;
}
crm_info("Using uid=%u and group=%u for process %s", uid, gid, child->name);
}
child->pid = fork();
CRM_ASSERT(child->pid != -1);
if (child->pid > 0) {
/* parent */
mainloop_child_add(child->pid, 0, child->name, child, pcmk_child_exit);
crm_info("Forked child %d for process %s%s", child->pid, child->name,
use_valgrind ? " (valgrind enabled: " VALGRIND_BIN ")" : "");
update_node_processes(local_nodeid, NULL, get_process_list());
return TRUE;
} else {
/* Start a new session */
(void)setsid();
/* Setup the two alternate arg arrays */
opts_vgrind[0] = strdup(VALGRIND_BIN);
if (use_callgrind) {
opts_vgrind[1] = strdup("--tool=callgrind");
opts_vgrind[2] = strdup("--callgrind-out-file=" CRM_STATE_DIR "/callgrind.out.%p");
opts_vgrind[3] = strdup(child->command);
opts_vgrind[4] = NULL;
} else {
opts_vgrind[1] = strdup(child->command);
opts_vgrind[2] = NULL;
opts_vgrind[3] = NULL;
opts_vgrind[4] = NULL;
}
opts_default[0] = strdup(child->command);
if(gid) {
if(stack == pcmk_cluster_corosync) {
/* Drop root privileges completely
*
* We can do this because we set uidgid.gid.${gid}=1
* via CMAP which allows these processes to connect to
* corosync
*/
if (setgid(gid) < 0) {
crm_perror(LOG_ERR, "Could not set group to %d", gid);
}
/* Keep the root group (so we can access corosync), but add the haclient group (so we can access ipc) */
} else if (initgroups(child->uid, gid) < 0) {
crm_err("Cannot initialize groups for %s: %s (%d)", child->uid, pcmk_strerror(errno), errno);
}
}
if (uid && setuid(uid) < 0) {
crm_perror(LOG_ERR, "Could not set user to %d (%s)", uid, child->uid);
}
/* Close all open file descriptors */
getrlimit(RLIMIT_NOFILE, &oflimits);
for (lpc = 0; lpc < oflimits.rlim_cur; lpc++) {
close(lpc);
}
(void)open(devnull, O_RDONLY); /* Stdin: fd 0 */
(void)open(devnull, O_WRONLY); /* Stdout: fd 1 */
(void)open(devnull, O_WRONLY); /* Stderr: fd 2 */
if (use_valgrind) {
(void)execvp(VALGRIND_BIN, opts_vgrind);
} else {
(void)execvp(child->command, opts_default);
}
crm_perror(LOG_ERR, "FATAL: Cannot exec %s", child->command);
crm_exit(DAEMON_RESPAWN_STOP);
}
return TRUE; /* never reached */
}
static gboolean
escalate_shutdown(gpointer data)
{
pcmk_child_t *child = data;
- if (child->pid) {
+ if (child->pid == PCMK__SPECIAL_PID) {
+ pcmk_process_exit(child);
+
+ } else if (child->pid) {
/* Use SIGSEGV instead of SIGKILL to create a core so we can see what it was up to */
crm_err("Child %s not terminating in a timely manner, forcing", child->name);
stop_child(child, SIGSEGV);
}
return FALSE;
}
+#define SHUTDOWN_ESCALATION_PERIOD 180000 /* 3m */
+
static gboolean
pcmk_shutdown_worker(gpointer user_data)
{
static int phase = 0;
static time_t next_log = 0;
static int max = SIZEOF(pcmk_children);
int lpc = 0;
if (phase == 0) {
crm_notice("Shutting down Pacemaker");
phase = max;
}
for (; phase > 0; phase--) {
/* Don't stop anything with start_seq < 1 */
for (lpc = max - 1; lpc >= 0; lpc--) {
pcmk_child_t *child = &(pcmk_children[lpc]);
if (phase != child->start_seq) {
continue;
}
if (child->pid) {
time_t now = time(NULL);
if (child->respawn) {
+ if (child->pid == PCMK__SPECIAL_PID) {
+ crm_warn("The process behind %s IPC cannot be"
+ " terminated, so either wait the graceful"
+ " period of %ld s for its native termination"
+ " if it vitally depends on some other daemons"
+ " going down in a controlled way already,"
+ " or locate and kill the correct %s process"
+ " on your own; set PCMK_fail_fast=1 to avoid"
+ " this altogether next time around",
+ child->name, (long) SHUTDOWN_ESCALATION_PERIOD,
+ child->command);
+ }
next_log = now + 30;
child->respawn = FALSE;
stop_child(child, SIGTERM);
if (phase < pcmk_children[pcmk_child_crmd].start_seq) {
- g_timeout_add(180000 /* 3m */ , escalate_shutdown, child);
+ g_timeout_add(SHUTDOWN_ESCALATION_PERIOD,
+ escalate_shutdown, child);
}
} else if (now >= next_log) {
next_log = now + 30;
crm_notice("Still waiting for %s to terminate "
CRM_XS " pid=%d seq=%d",
child->name, child->pid, child->start_seq);
}
return TRUE;
}
/* cleanup */
crm_debug("%s confirmed stopped", child->name);
child->pid = 0;
}
}
/* send_cluster_id(); */
crm_notice("Shutdown complete");
{
const char *delay = daemon_option("shutdown_delay");
if(delay) {
sync();
sleep(crm_get_msec(delay) / 1000);
}
}
g_main_loop_quit(mainloop);
if (fatal_error) {
crm_notice("Attempting to inhibit respawning after fatal error");
pcmk_exit_with_cluster(DAEMON_RESPAWN_STOP);
}
return TRUE;
}
static void
pcmk_ignore(int nsig)
{
crm_info("Ignoring signal %s (%d)", strsignal(nsig), nsig);
}
static void
pcmk_sigquit(int nsig)
{
pcmk_panic(__FUNCTION__);
}
void
pcmk_shutdown(int nsig)
{
if (shutdown_trigger == NULL) {
shutdown_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, pcmk_shutdown_worker, NULL);
}
mainloop_set_trigger(shutdown_trigger);
}
static int32_t
pcmk_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
crm_trace("Connection %p", c);
if (crm_client_new(c, uid, gid) == NULL) {
return -EIO;
}
return 0;
}
static void
pcmk_ipc_created(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p", c);
}
/* Exit code means? */
static int32_t
pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
const char *task = NULL;
crm_client_t *c = crm_client_get(qbc);
xmlNode *msg = crm_ipcs_recv(c, data, size, &id, &flags);
crm_ipcs_send_ack(c, id, flags, "ack", __FUNCTION__, __LINE__);
if (msg == NULL) {
return 0;
}
task = crm_element_value(msg, F_CRM_TASK);
if (crm_str_eq(task, CRM_OP_QUIT, TRUE)) {
/* Time to quit */
crm_notice("Shutting down in response to ticket %s (%s)",
crm_element_value(msg, F_CRM_REFERENCE), crm_element_value(msg, F_CRM_ORIGIN));
pcmk_shutdown(15);
} else if (crm_str_eq(task, CRM_OP_RM_NODE_CACHE, TRUE)) {
/* Send to everyone */
struct iovec *iov;
int id = 0;
const char *name = NULL;
crm_element_value_int(msg, XML_ATTR_ID, &id);
name = crm_element_value(msg, XML_ATTR_UNAME);
crm_notice("Instructing peers to remove references to node %s/%u", name, id);
iov = calloc(1, sizeof(struct iovec));
iov->iov_base = dump_xml_unformatted(msg);
iov->iov_len = 1 + strlen(iov->iov_base);
send_cpg_iov(iov);
} else {
update_process_clients(c);
}
free_xml(msg);
return 0;
}
/* Error code means? */
static int32_t
pcmk_ipc_closed(qb_ipcs_connection_t * c)
{
crm_client_t *client = crm_client_get(c);
if (client == NULL) {
return 0;
}
crm_trace("Connection %p", c);
crm_client_destroy(client);
return 0;
}
static void
pcmk_ipc_destroy(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p", c);
pcmk_ipc_closed(c);
}
struct qb_ipcs_service_handlers mcp_ipc_callbacks = {
.connection_accept = pcmk_ipc_accept,
.connection_created = pcmk_ipc_created,
.msg_process = pcmk_ipc_dispatch,
.connection_closed = pcmk_ipc_closed,
.connection_destroyed = pcmk_ipc_destroy
};
/*!
* \internal
* \brief Send an XML message with process list of all known peers to client(s)
*
* \param[in] client Send message to this client, or all clients if NULL
*/
void
update_process_clients(crm_client_t *client)
{
GHashTableIter iter;
crm_node_t *node = NULL;
xmlNode *update = create_xml_node(NULL, "nodes");
if (is_corosync_cluster()) {
crm_xml_add_int(update, "quorate", pcmk_quorate);
}
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) {
xmlNode *xml = create_xml_node(update, "node");
crm_xml_add_int(xml, "id", node->id);
crm_xml_add(xml, "uname", node->uname);
crm_xml_add(xml, "state", node->state);
crm_xml_add_int(xml, "processes", node->processes);
}
if(client) {
crm_trace("Sending process list to client %s", client->id);
crm_ipcs_send(client, 0, update, crm_ipc_server_event);
} else {
crm_trace("Sending process list to %d clients", crm_hash_table_size(client_connections));
g_hash_table_iter_init(&iter, client_connections);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & client)) {
crm_ipcs_send(client, 0, update, crm_ipc_server_event);
}
}
free_xml(update);
}
/*!
* \internal
* \brief Send a CPG message with local node's process list to all peers
*/
void
update_process_peers(void)
{
/* Do nothing for corosync-2 based clusters */
char buffer[1024];
struct iovec *iov;
int rc = 0;
if (local_name) {
rc = snprintf(buffer, SIZEOF(buffer), "<node uname=\"%s\" proclist=\"%u\"/>",
local_name, get_process_list());
} else {
rc = snprintf(buffer, SIZEOF(buffer), "<node proclist=\"%u\"/>", get_process_list());
}
crm_trace("Sending %s", buffer);
iov = calloc(1, sizeof(struct iovec));
iov->iov_base = strdup(buffer);
iov->iov_len = rc + 1;
send_cpg_iov(iov);
}
/*!
* \internal
* \brief Update a node's process list, notifying clients and peers if needed
*
* \param[in] id Node ID of affected node
* \param[in] uname Uname of affected node
* \param[in] procs Affected node's process list mask
*
* \return TRUE if the process list changed, FALSE otherwise
*/
gboolean
update_node_processes(uint32_t id, const char *uname, uint32_t procs)
{
gboolean changed = FALSE;
crm_node_t *node = crm_get_peer(id, uname);
if (procs != 0) {
if (procs != node->processes) {
crm_debug("Node %s now has process list: %.32x (was %.32x)",
node->uname, procs, node->processes);
node->processes = procs;
changed = TRUE;
/* If local node's processes have changed, notify clients/peers */
if (id == local_nodeid) {
update_process_clients(NULL);
update_process_peers();
}
} else {
crm_trace("Node %s still has process list: %.32x", node->uname, procs);
}
}
return changed;
}
/* *INDENT-OFF* */
static struct crm_option long_options[] = {
/* Top-level Options */
{"help", 0, 0, '?', "\tThis text"},
{"version", 0, 0, '$', "\tVersion information" },
{"verbose", 0, 0, 'V', "\tIncrease debug output"},
{"shutdown", 0, 0, 'S', "\tInstruct Pacemaker to shutdown on this machine"},
{"features", 0, 0, 'F', "\tDisplay the full version and list of features Pacemaker was built with"},
{"-spacer-", 1, 0, '-', "\nAdditional Options:"},
{"foreground", 0, 0, 'f', "\t(Ignored) Pacemaker always runs in the foreground"},
{"pid-file", 1, 0, 'p', "\t(Ignored) Daemon pid file location"},
{"standby", 0, 0, 's', "\tStart node in standby state"},
{NULL, 0, 0, 0}
};
/* *INDENT-ON* */
static void
mcp_chown(const char *path, uid_t uid, gid_t gid)
{
int rc = chown(path, uid, gid);
if (rc < 0) {
crm_warn("Cannot change the ownership of %s to user %s and gid %d: %s",
path, CRM_DAEMON_USER, gid, pcmk_strerror(errno));
}
}
-#if SUPPORT_PROCFS
+/*!
+ * \internal
+ * \brief Check the liveness of the child based on IPC name and PID if tracked
+ *
+ * \param[inout] child Child tracked data
+ *
+ * \return 0 if no trace of child's liveness detected (while it is detectable
+ * to begin with, at least according to one of the two properties),
+ * 1 if everything is fine, 2 if it's up per PID, but not per IPC
+ * end-point (still starting?), -1 on error, and -2 when the child
+ * (its IPC) blocked with an unauthorized process (log message
+ * emitted in both latter cases)
+ *
+ * \note This function doesn't modify any of \p child members but \c pid,
+ * and is not actively toying with processes as such but invoking
+ * \c stop_child in one particular case (there's for some reason
+ * a different authentic holder of the IPC end-point).
+ */
+static int
+pcmk_child_active(pcmk_child_t *child) {
+ static uid_t cl_uid = 0;
+ static gid_t cl_gid = 0;
+ const uid_t root_uid = 0;
+ const gid_t root_gid = 0;
+ const uid_t *ref_uid;
+ const gid_t *ref_gid;
+ int ret = 0;
+ pid_t ipc_pid = 0;
+ const char *use_name;
+
+ if (child->endpoint == NULL
+ && (child->pid <= 0 || child->pid == PCMK__SPECIAL_PID)) {
+ crm_err("Cannot track child %s for missing both API end-point and PID",
+ child->name);
+ ret = -1; /* misuse of the function when child is not trackable */
+
+ } else if (child->endpoint != NULL) {
+
+ ref_uid = (child->uid != NULL) ? &cl_uid : &root_uid;
+ ref_gid = (child->uid != NULL) ? &cl_gid : &root_gid;
+
+ if (child->uid != NULL && !cl_uid && !cl_gid
+ && crm_user_lookup(CRM_DAEMON_USER, &cl_uid, &cl_gid) < 0) {
+ crm_err("Could not find user and group IDs for user %s",
+ CRM_DAEMON_USER);
+ ret = -1;
+ } else if ((ret = pcmk__ipc_is_authentic_process_active(child->endpoint,
+ *ref_uid, *ref_gid,
+ &ipc_pid)) < 0) {
+ /* game over */
+ } else if (child->pid <= 0) {
+ /* hit new child to be initialized, or reset to zero
+ and investigate further for ret == 0 */
+ child->pid = ipc_pid;
+ } else if (ipc_pid && child->pid != ipc_pid) {
+ /* ultimately strange for ret == 1; either way, investigate */
+ ret = 0;
+ }
+ }
+
+ if (!ret) {
+ use_name = (child->flag == crm_proc_stonith_ng)
+ ? "stonithd" /* compensate "simplification" 61fc951e9 */
+ : child->name;
+ /* when no IPC based liveness detected (incl. if ever a child without
+ IPC is tracked), or detected for a different _authentic_ process;
+ safe on FreeBSD since the only change possible from a proper child's
+ PID into "special" PID of 1 behind more loosely related process */
+ ret = crm_pid_active(child->pid, use_name);
+ if (ipc_pid && (ret != 1
+ || ipc_pid == PCMK__SPECIAL_PID
+ || crm_pid_active(ipc_pid, use_name) == 1)) {
+ if (ret == 1) {
+ /* assume there's no forking-while-retaining-IPC-socket
+ involved in the "children's" lifecycle, hence that the
+ tracking got out of sync purely because of some external
+ (esotheric?) forces (user initiated process "refresh" by
+ force? or intentionally racing on start-up, even?), and
+ that switching over to this other detected, authentic
+ instance with an IPC already in possession is a better
+ trade-off than "neutralizing" it first so as to give
+ either the original or possibly a new to-be-spawned
+ daemon process a leeway for operation, which would
+ otherwise have to be carried out */
+ /* not possessing IPC, afterall (what about corosync CPG?) */
+ stop_child(child, SIGKILL);
+ } else {
+ ret = 1;
+ }
+ child->pid = ipc_pid;
+ } else if (ret == 1) {
+ ret = 2; /* up per PID, but not per IPC (still starting?) */
+ } else if (!child->pid && ret == -1) {
+ ret = 0; /* correct -1 on FreeBSD from above back to 0 */
+ }
+ }
+
+ return ret;
+}
+
static gboolean
check_active_before_startup_processes(gpointer user_data)
{
int start_seq = 1, lpc = 0;
static int max = SIZEOF(pcmk_children);
gboolean keep_tracking = FALSE;
for (start_seq = 1; start_seq < max; start_seq++) {
for (lpc = 0; lpc < max; lpc++) {
if (pcmk_children[lpc].active_before_startup == FALSE) {
/* we are already tracking it as a child process. */
continue;
} else if (start_seq != pcmk_children[lpc].start_seq) {
continue;
} else {
const char *name = pcmk_children[lpc].name;
- if (pcmk_children[lpc].flag == crm_proc_stonith_ng) {
- name = "stonithd";
- }
-
- if (crm_pid_active(pcmk_children[lpc].pid, name) != 1) {
- crm_notice("Process %s terminated (pid=%d)",
- name, pcmk_children[lpc].pid);
- pcmk_process_exit(&(pcmk_children[lpc]));
- continue;
+ int ret;
+
+ switch ((ret = pcmk_child_active(&pcmk_children[lpc]))) {
+ case 1:
+ break;
+ case 0:
+ case 2: /* this very case: it was OK once already */
+ if (pcmk_children[lpc].respawn == TRUE) {
+ /* presumably after crash, hence critical */
+ crm_crit("Process %s terminated (pid=%lld)%s", \
+ name, (long long)
+ PCMK__SPECIAL_PID_AS_0(pcmk_children[lpc].pid),
+ ret ? ", at least per IPC end-point that went AWOL"
+ : "");
+ } else {
+ /* orderly shutdown */
+ crm_notice("Process %s terminated (pid=%lld)%s", \
+ name, (long long)
+ PCMK__SPECIAL_PID_AS_0(pcmk_children[lpc].pid),
+ ret ? ", at least per IPC end-point that went AWOL"
+ : "");
+ }
+ pcmk_process_exit(&(pcmk_children[lpc]));
+ continue;
+ default:
+ crm_crit("Unexpected value from pcmk_child_active:"
+ " %d (pid=%lld)", ret,
+ (long long) PCMK__SPECIAL_PID_AS_0(
+ pcmk_children[lpc].pid));
+ /* fall through */
+ case -1:
+ case -2:
+ /* message(s) already emitted */
+ crm_exit(DAEMON_RESPAWN_STOP);
+ break; /* static analysis/noreturn */
}
}
/* at least one of the processes found at startup
* is still going, so keep this recurring timer around */
keep_tracking = TRUE;
}
}
+ global_keep_tracking = keep_tracking;
return keep_tracking;
}
-#endif // SUPPORT_PROCFS
-static void
+/*!
+ * \internal
+ * \brief Initial one-off check of the pre-existing "child" processes
+ *
+ * With "child" process, we mean the subdaemon that defines an API end-point
+ * (all of them do as of the comment) -- the possible complement is skipped
+ * as it is deemed it has no such shared resources to cause conflicts about,
+ * hence it can presumably be started anew without hesitation.
+ * If that won't hold true in the future, the concept of a shared resource
+ * will have to be generalized beyond the API end-point.
+ *
+ * For boundary cases that the "child" is still starting (IPC end-point is yet
+ * to be witnessed), or more rarely (practically FreeBSD only), when there's
+ * a pre-existing "untrackable" authentic process, we give the situation some
+ * time to possibly unfold in the right direction, meaning that said socket
+ * will appear or the unattainable process will disappear per the observable
+ * IPC, respectively.
+ *
+ * \return 0 if no such "child" process found, positive number X when X
+ * "children" detected, -1 on an internal error, -2 when any
+ * would-be-used IPC is blocked with an unauthorized process
+ *
+ * \note Since this gets run at the very start, \c respawn_count fields
+ * for particular children get temporarily overloaded with "rounds
+ * of waiting" tracking, restored once we are about to finish with
+ * success (i.e. returning value >=0) and will remain unrestored
+ * otherwise. One way to suppress liveness detection logic for
+ * particular child is to set the said value to a negative number.
+ */
+#define WAIT_TRIES 4 /* together with interleaved sleeps, worst case ~ 1s */
+static int
find_and_track_existing_processes(void)
{
-#if SUPPORT_PROCFS
- DIR *dp;
- struct dirent *entry;
- bool start_tracker = FALSE;
- char entry_name[64];
-
- dp = opendir("/proc");
- if (!dp) {
- /* no proc directory to search through */
- crm_notice("Can not read /proc directory to track existing components");
- return;
- }
-
- while ((entry = readdir(dp)) != NULL) {
- int pid;
- int max = SIZEOF(pcmk_children);
- int i;
-
- if (crm_procfs_process_info(entry, entry_name, &pid) < 0) {
- continue;
- }
- for (i = 0; i < max; i++) {
- const char *name = pcmk_children[i].name;
-
- if (pcmk_children[i].start_seq == 0) {
+ unsigned tracking = 0U;
+ bool wait_in_progress;
+ int cur;
+ size_t i, rounds;
+
+ for (rounds = 1; rounds <= WAIT_TRIES; rounds++) {
+ wait_in_progress = false;
+ for (i = 0; i < SIZEOF(pcmk_children); i++) {
+ if (!pcmk_children[i].endpoint
+ || pcmk_children[i].respawn_count < 0
+ || !(cur = pcmk_child_active(&pcmk_children[i]))) {
+ /* as a speculation, don't give up in the context of
+ pcmk_child_active check if there are more rounds to
+ come for other reasons, but don't artificially wait just
+ because of this, since we would preferably start ASAP */
continue;
}
- if (pcmk_children[i].flag == crm_proc_stonith_ng) {
- name = "stonithd";
- }
- if (safe_str_eq(entry_name, name) && (crm_pid_active(pid, NULL) == 1)) {
- crm_notice("Tracking existing %s process (pid=%d)", name, pid);
- pcmk_children[i].pid = pid;
- pcmk_children[i].active_before_startup = TRUE;
- start_tracker = TRUE;
- break;
+ pcmk_children[i].respawn_count = rounds;
+ switch (cur) {
+ case 1:
+ if (pcmk_children[i].pid == PCMK__SPECIAL_PID) {
+ if (crm_is_true(getenv("PCMK_fail_fast"))) {
+ crm_crit("Cannot reliably track pre-existing"
+ " authentic process behind %s IPC on this"
+ " platform and PCMK_fail_fast requested",
+ pcmk_children[i].endpoint);
+ return -1;
+ } else if (pcmk_children[i].respawn_count == WAIT_TRIES) {
+ crm_notice("Assuming pre-existing authentic, though"
+ " on this platform untrackable, process"
+ " behind %s IPC is stable (was in %d"
+ " previous samples) so rather than"
+ " bailing out (PCMK_fail_fast not"
+ " requested), we just switch to a less"
+ " optimal IPC liveness monitoring"
+ " (not very suitable for heavy load)",
+ pcmk_children[i].name, WAIT_TRIES - 1);
+ crm_warn("The process behind %s IPC cannot be"
+ " terminated, so the overall shutdown"
+ " will get delayed implicitly (%ld s),"
+ " which serves as a graceful period for"
+ " its native termination if it vitally"
+ " depends on some other daemons going"
+ " down in a controlled way already",
+ pcmk_children[i].name,
+ (long) SHUTDOWN_ESCALATION_PERIOD);
+ } else {
+ wait_in_progress = true;
+ crm_warn("Cannot reliably track pre-existing"
+ " authentic process behind %s IPC on this"
+ " platform, can still disappear in %d"
+ " attempt(s)", pcmk_children[i].endpoint,
+ WAIT_TRIES - pcmk_children[i].respawn_count);
+ continue;
+ }
+ }
+ crm_notice("Tracking existing %s process (pid=%lld)",
+ pcmk_children[i].name,
+ (long long) PCMK__SPECIAL_PID_AS_0(
+ pcmk_children[i].pid));
+ pcmk_children[i].respawn_count = -1; /* 0~keep watching */
+ pcmk_children[i].active_before_startup = TRUE;
+ tracking++;
+ break;
+ case 2:
+ if (pcmk_children[i].respawn_count == WAIT_TRIES) {
+ crm_crit("%s IPC end-point for existing authentic"
+ " process %lld did not (re)appear",
+ pcmk_children[i].endpoint,
+ (long long) PCMK__SPECIAL_PID_AS_0(
+ pcmk_children[i].pid));
+ return -1;
+ }
+ wait_in_progress = true;
+ crm_warn("Cannot find %s IPC end-point for existing"
+ " authentic process %lld, can still (re)appear"
+ " in %d attempts (?)",
+ pcmk_children[i].endpoint,
+ (long long) PCMK__SPECIAL_PID_AS_0(
+ pcmk_children[i].pid),
+ WAIT_TRIES - pcmk_children[i].respawn_count);
+ continue;
+ case -1:
+ case -2:
+ return cur; /* messages already emitted */
+ default:
+ crm_crit("Unexpected condition"CRM_XS"cur=%d", cur);
+ return -1; /* unexpected condition */
}
}
+ if (!wait_in_progress) {
+ break;
+ }
+ (void) poll(NULL, 0, 250); /* a bit for changes to possibly happen */
+ }
+ for (i = 0; i < SIZEOF(pcmk_children); i++) {
+ pcmk_children[i].respawn_count = 0; /* restore pristine state */
}
- if (start_tracker) {
- g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL, check_active_before_startup_processes,
- NULL);
+ if (tracking) {
+ g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL,
+ check_active_before_startup_processes, NULL);
}
- closedir(dp);
-#else
- crm_notice("No procfs support, so skipping check for existing components");
-#endif // SUPPORT_PROCFS
+ return (tracking > INT_MAX) ? INT_MAX : tracking;
}
static void
init_children_processes(void)
{
int start_seq = 1, lpc = 0;
static int max = SIZEOF(pcmk_children);
/* start any children that have not been detected */
for (start_seq = 1; start_seq < max; start_seq++) {
/* don't start anything with start_seq < 1 */
for (lpc = 0; lpc < max; lpc++) {
if (pcmk_children[lpc].pid) {
/* we are already tracking it */
continue;
}
if (start_seq == pcmk_children[lpc].start_seq) {
start_child(&(pcmk_children[lpc]));
}
}
}
/* From this point on, any daemons being started will be due to
* respawning rather than node start.
*
* This may be useful for the daemons to know
*/
setenv("PCMK_respawned", "true", 1);
}
static void
mcp_cpg_destroy(gpointer user_data)
{
crm_err("Connection destroyed");
crm_exit(ENOTCONN);
}
/*!
* \internal
* \brief Process a CPG message (process list or manual peer cache removal)
*
* \param[in] handle CPG connection (ignored)
* \param[in] groupName CPG group name (ignored)
* \param[in] nodeid ID of affected node
* \param[in] pid Process ID (ignored)
* \param[in] msg CPG XML message
* \param[in] msg_len Length of msg in bytes (ignored)
*/
static void
mcp_cpg_deliver(cpg_handle_t handle,
const struct cpg_name *groupName,
uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
{
xmlNode *xml = string2xml(msg);
const char *task = crm_element_value(xml, F_CRM_TASK);
crm_trace("Received CPG message (%s): %.200s",
(task? task : "process list"), (char*)msg);
if (task == NULL) {
if (nodeid == local_nodeid) {
crm_info("Ignoring process list sent by peer for local node");
} else {
uint32_t procs = 0;
const char *uname = crm_element_value(xml, "uname");
crm_element_value_int(xml, "proclist", (int *)&procs);
if (update_node_processes(nodeid, uname, procs)) {
update_process_clients(NULL);
}
}
} else if (crm_str_eq(task, CRM_OP_RM_NODE_CACHE, TRUE)) {
int id = 0;
const char *name = NULL;
crm_element_value_int(xml, XML_ATTR_ID, &id);
name = crm_element_value(xml, XML_ATTR_UNAME);
reap_crm_member(id, name);
}
if (xml != NULL) {
free_xml(xml);
}
}
static void
mcp_cpg_membership(cpg_handle_t handle,
const struct cpg_name *groupName,
const struct cpg_address *member_list, size_t member_list_entries,
const struct cpg_address *left_list, size_t left_list_entries,
const struct cpg_address *joined_list, size_t joined_list_entries)
{
/* Update peer cache if needed */
pcmk_cpg_membership(handle, groupName, member_list, member_list_entries,
left_list, left_list_entries,
joined_list, joined_list_entries);
/* Always broadcast our own presence after any membership change */
update_process_peers();
}
static gboolean
mcp_quorum_callback(unsigned long long seq, gboolean quorate)
{
pcmk_quorate = quorate;
return TRUE;
}
static void
mcp_quorum_destroy(gpointer user_data)
{
crm_info("connection lost");
}
#if SUPPORT_CMAN
static gboolean
mcp_cman_dispatch(unsigned long long seq, gboolean quorate)
{
pcmk_quorate = quorate;
return TRUE;
}
static void
mcp_cman_destroy(gpointer user_data)
{
crm_info("connection closed");
}
#endif
int
main(int argc, char **argv)
{
int rc;
int flag;
int argerr = 0;
int option_index = 0;
gboolean shutdown = FALSE;
uid_t pcmk_uid = 0;
gid_t pcmk_gid = 0;
struct rlimit cores;
crm_ipc_t *old_instance = NULL;
qb_ipcs_service_t *ipcs = NULL;
const char *facility = daemon_option("logfacility");
static crm_cluster_t cluster;
crm_log_preinit(NULL, argc, argv);
crm_set_options(NULL, "mode [options]", long_options, "Start/Stop Pacemaker\n");
mainloop_add_signal(SIGHUP, pcmk_ignore);
mainloop_add_signal(SIGQUIT, pcmk_sigquit);
while (1) {
flag = crm_get_option(argc, argv, &option_index);
if (flag == -1)
break;
switch (flag) {
case 'V':
crm_bump_log_level(argc, argv);
break;
case 'f':
/* Legacy */
break;
case 'p':
pid_file = optarg;
break;
case 's':
set_daemon_option("node_start_state", "standby");
break;
case '$':
case '?':
crm_help(flag, EX_OK);
break;
case 'S':
shutdown = TRUE;
break;
case 'F':
printf("Pacemaker %s (Build: %s)\n Supporting v%s: %s\n", PACEMAKER_VERSION, BUILD_VERSION,
CRM_FEATURE_SET, CRM_FEATURES);
crm_exit(pcmk_ok);
default:
printf("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag);
++argerr;
break;
}
}
if (optind < argc) {
printf("non-option ARGV-elements: ");
while (optind < argc)
printf("%s ", argv[optind++]);
printf("\n");
}
if (argerr) {
crm_help('?', EX_USAGE);
}
setenv("LC_ALL", "C", 1);
setenv("HA_LOGD", "no", 1);
set_daemon_option("mcp", "true");
set_daemon_option("use_logd", "off");
crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
/* Restore the original facility so that mcp_read_config() does the right thing */
set_daemon_option("logfacility", facility);
crm_debug("Checking for old instances of %s", CRM_SYSTEM_MCP);
old_instance = crm_ipc_new(CRM_SYSTEM_MCP, 0);
crm_ipc_connect(old_instance);
if (shutdown) {
crm_debug("Terminating previous instance");
while (crm_ipc_connected(old_instance)) {
xmlNode *cmd =
create_request(CRM_OP_QUIT, NULL, NULL, CRM_SYSTEM_MCP, CRM_SYSTEM_MCP, NULL);
crm_debug(".");
crm_ipc_send(old_instance, cmd, 0, 0, NULL);
free_xml(cmd);
sleep(2);
}
crm_ipc_close(old_instance);
crm_ipc_destroy(old_instance);
crm_exit(pcmk_ok);
} else if (crm_ipc_connected(old_instance)) {
crm_ipc_close(old_instance);
crm_ipc_destroy(old_instance);
crm_err("Pacemaker is already active, aborting startup");
crm_exit(DAEMON_RESPAWN_STOP);
}
crm_ipc_close(old_instance);
crm_ipc_destroy(old_instance);
if (mcp_read_config() == FALSE) {
crm_notice("Could not obtain corosync config data, exiting");
crm_exit(ENODATA);
}
crm_notice("Starting Pacemaker %s "CRM_XS" build=%s features:%s",
PACEMAKER_VERSION, BUILD_VERSION, CRM_FEATURES);
mainloop = g_main_new(FALSE);
sysrq_init();
rc = getrlimit(RLIMIT_CORE, &cores);
if (rc < 0) {
crm_perror(LOG_ERR, "Cannot determine current maximum core size.");
} else {
if (cores.rlim_max == 0 && geteuid() == 0) {
cores.rlim_max = RLIM_INFINITY;
} else {
crm_info("Maximum core file size is: %lu", (unsigned long)cores.rlim_max);
}
cores.rlim_cur = cores.rlim_max;
rc = setrlimit(RLIMIT_CORE, &cores);
if (rc < 0) {
crm_perror(LOG_ERR,
"Core file generation will remain disabled."
" Core files are an important diagnositic tool,"
" please consider enabling them by default.");
}
}
rc = pcmk_ok;
if (crm_user_lookup(CRM_DAEMON_USER, &pcmk_uid, &pcmk_gid) < 0) {
crm_err("Cluster user %s does not exist, aborting Pacemaker startup", CRM_DAEMON_USER);
crm_exit(ENOKEY);
}
mkdir(CRM_STATE_DIR, 0750);
mcp_chown(CRM_STATE_DIR, pcmk_uid, pcmk_gid);
/* Used to store core/blackbox/pengine/cib files in */
crm_build_path(CRM_PACEMAKER_DIR, 0750);
mcp_chown(CRM_PACEMAKER_DIR, pcmk_uid, pcmk_gid);
/* Used to store core files in */
crm_build_path(CRM_CORE_DIR, 0750);
mcp_chown(CRM_CORE_DIR, pcmk_uid, pcmk_gid);
/* Used to store blackbox dumps in */
crm_build_path(CRM_BLACKBOX_DIR, 0750);
mcp_chown(CRM_BLACKBOX_DIR, pcmk_uid, pcmk_gid);
/* Used to store policy engine inputs in */
crm_build_path(PE_STATE_DIR, 0750);
mcp_chown(PE_STATE_DIR, pcmk_uid, pcmk_gid);
/* Used to store the cluster configuration */
crm_build_path(CRM_CONFIG_DIR, 0750);
mcp_chown(CRM_CONFIG_DIR, pcmk_uid, pcmk_gid);
/* Resource agent paths are constructed by the lrmd */
ipcs = mainloop_add_ipc_server(CRM_SYSTEM_MCP, QB_IPC_NATIVE, &mcp_ipc_callbacks);
if (ipcs == NULL) {
crm_err("Couldn't start IPC server");
crm_exit(EIO);
}
/* Allows us to block shutdown */
if (cluster_connect_cfg(&local_nodeid) == FALSE) {
crm_err("Couldn't connect to Corosync's CFG service");
crm_exit(ENOPROTOOPT);
}
if(pcmk_locate_sbd() > 0) {
setenv("PCMK_watchdog", "true", 1);
} else {
setenv("PCMK_watchdog", "false", 1);
}
- find_and_track_existing_processes();
+ switch (find_and_track_existing_processes()) {
+ case -1:
+ crm_crit("Internal fatality, see the log");
+ crm_exit(DAEMON_RESPAWN_STOP);
+ case -2:
+ crm_crit("Blocked by foreign process, kill the offender");
+ crm_exit(ENOLCK);
+ default:
+ break;
+ };
cluster.destroy = mcp_cpg_destroy;
cluster.cpg.cpg_deliver_fn = mcp_cpg_deliver;
cluster.cpg.cpg_confchg_fn = mcp_cpg_membership;
crm_set_autoreap(FALSE);
if(cluster_connect_cpg(&cluster) == FALSE) {
crm_err("Couldn't connect to Corosync's CPG service");
rc = -ENOPROTOOPT;
}
if (rc == pcmk_ok && is_corosync_cluster()) {
/* Keep the membership list up-to-date for crm_node to query */
if(cluster_connect_quorum(mcp_quorum_callback, mcp_quorum_destroy) == FALSE) {
rc = -ENOTCONN;
}
}
#if SUPPORT_CMAN
if (rc == pcmk_ok && is_cman_cluster()) {
init_cman_connection(mcp_cman_dispatch, mcp_cman_destroy);
}
#endif
if(rc == pcmk_ok) {
local_name = get_local_node_name();
update_node_processes(local_nodeid, local_name, get_process_list());
mainloop_add_signal(SIGTERM, pcmk_shutdown);
mainloop_add_signal(SIGINT, pcmk_shutdown);
init_children_processes();
crm_info("Starting mainloop");
g_main_run(mainloop);
}
if (ipcs) {
crm_trace("Closing IPC server");
mainloop_del_ipc_server(ipcs);
ipcs = NULL;
}
g_main_destroy(mainloop);
cluster_disconnect_cpg(&cluster);
cluster_disconnect_cfg();
crm_info("Exiting %s", crm_system_name);
return crm_exit(rc);
}

File Metadata

Mime Type
text/x-diff
Expires
Thu, Oct 16, 12:28 AM (22 h, 59 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2530868
Default Alt Text
(514 KB)

Event Timeline