Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F2825262
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
223 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/daemons/attrd/attrd_utils.c b/daemons/attrd/attrd_utils.c
index 6fc16bedce..8bd3cd5d2a 100644
--- a/daemons/attrd/attrd_utils.c
+++ b/daemons/attrd/attrd_utils.c
@@ -1,285 +1,273 @@
/*
* Copyright 2004-2020 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdio.h>
#include <stdbool.h>
#include <errno.h>
#include <glib.h>
#include <regex.h>
#include <sys/types.h>
#include <crm/crm.h>
#include <crm/common/ipcs_internal.h>
#include <crm/common/mainloop.h>
#include "pacemaker-attrd.h"
cib_t *the_cib = NULL;
static bool shutting_down = FALSE;
static GMainLoop *mloop = NULL;
/*!
* \internal
* \brief Check whether we're currently shutting down
*
* \return TRUE if shutting down, FALSE otherwise
*/
gboolean
attrd_shutting_down()
{
return shutting_down;
}
/*!
* \internal
* \brief Exit (using mainloop or not, as appropriate)
*
* \param[in] nsig Ignored
*/
void
attrd_shutdown(int nsig)
{
// Tell various functions not to do anthing
shutting_down = TRUE;
// Don't respond to signals while shutting down
mainloop_destroy_signal(SIGTERM);
mainloop_destroy_signal(SIGCHLD);
mainloop_destroy_signal(SIGPIPE);
mainloop_destroy_signal(SIGUSR1);
mainloop_destroy_signal(SIGUSR2);
mainloop_destroy_signal(SIGTRAP);
if ((mloop == NULL) || !g_main_loop_is_running(mloop)) {
/* If there's no main loop active, just exit. This should be possible
* only if we get SIGTERM in brief windows at start-up and shutdown.
*/
crm_exit(CRM_EX_OK);
} else {
g_main_loop_quit(mloop);
g_main_loop_unref(mloop);
}
}
/*!
* \internal
* \brief Create a main loop for attrd
*/
void
attrd_init_mainloop()
{
mloop = g_main_loop_new(NULL, FALSE);
}
/*!
* \internal
* \brief Run attrd main loop
*/
void
attrd_run_mainloop()
{
g_main_loop_run(mloop);
}
/*!
* \internal
* \brief Accept a new client IPC connection
*
* \param[in] c New connection
* \param[in] uid Client user id
* \param[in] gid Client group id
*
* \return pcmk_ok on success, -errno otherwise
*/
static int32_t
attrd_ipc_accept(qb_ipcs_connection_t *c, uid_t uid, gid_t gid)
{
crm_trace("New client connection %p", c);
if (shutting_down) {
crm_info("Ignoring new connection from pid %d during shutdown",
pcmk__client_pid(c));
return -EPERM;
}
if (pcmk__new_client(c, uid, gid) == NULL) {
return -EIO;
}
return pcmk_ok;
}
-/*!
- * \internal
- * \brief Callback for successful client connection
- *
- * \param[in] c New connection
- */
-static void
-attrd_ipc_created(qb_ipcs_connection_t *c)
-{
- crm_trace("Client connection %p accepted", c);
-}
-
/*!
* \internal
* \brief Destroy a client IPC connection
*
* \param[in] c Connection to destroy
*
* \return FALSE (i.e. do not re-run this callback)
*/
static int32_t
attrd_ipc_closed(qb_ipcs_connection_t *c)
{
pcmk__client_t *client = pcmk__find_client(c);
if (client == NULL) {
crm_trace("Ignoring request to clean up unknown connection %p", c);
} else {
crm_trace("Cleaning up closed client connection %p", c);
pcmk__free_client(client);
}
return FALSE;
}
/*!
* \internal
* \brief Destroy a client IPC connection
*
* \param[in] c Connection to destroy
*
* \note We handle a destroyed connection the same as a closed one,
* but we need a separate handler because the return type is different.
*/
static void
attrd_ipc_destroy(qb_ipcs_connection_t *c)
{
crm_trace("Destroying client connection %p", c);
attrd_ipc_closed(c);
}
/*!
* \internal
* \brief Set up attrd IPC communication
*
* \param[out] ipcs Will be set to newly allocated server connection
* \param[in] dispatch_fn Handler for new messages on connection
*/
void
attrd_init_ipc(qb_ipcs_service_t **ipcs, qb_ipcs_msg_process_fn dispatch_fn)
{
static struct qb_ipcs_service_handlers ipc_callbacks = {
.connection_accept = attrd_ipc_accept,
- .connection_created = attrd_ipc_created,
+ .connection_created = NULL,
.msg_process = NULL,
.connection_closed = attrd_ipc_closed,
.connection_destroyed = attrd_ipc_destroy
};
ipc_callbacks.msg_process = dispatch_fn;
attrd_ipc_server_init(ipcs, &ipc_callbacks);
}
void
attrd_cib_disconnect()
{
CRM_CHECK(the_cib != NULL, return);
the_cib->cmds->signoff(the_cib);
cib_delete(the_cib);
the_cib = NULL;
}
/* strlen("value") */
#define plus_plus_len (5)
/*!
* \internal
* \brief Check whether an attribute value should be expanded
*
* \param[in] value Attribute value to check
*
* \return TRUE if value needs expansion, FALSE otherwise
*/
gboolean
attrd_value_needs_expansion(const char *value)
{
return ((strlen(value) >= (plus_plus_len + 2))
&& (value[plus_plus_len] == '+')
&& ((value[plus_plus_len + 1] == '+')
|| (value[plus_plus_len + 1] == '=')));
}
/*!
* \internal
* \brief Expand an increment expression into an integer
*
* \param[in] value Attribute increment expression to expand
* \param[in] old_value Previous value of attribute
*
* \return Expanded value
*/
int
attrd_expand_value(const char *value, const char *old_value)
{
int offset = 1;
int int_value = char2score(old_value);
if (value[plus_plus_len + 1] != '+') {
const char *offset_s = value + (plus_plus_len + 2);
offset = char2score(offset_s);
}
int_value += offset;
if (int_value > INFINITY) {
int_value = INFINITY;
}
return int_value;
}
/*!
* \internal
* \brief Create regular expression matching failure-related attributes
*
* \param[out] regex Where to store created regular expression
* \param[in] rsc Name of resource to clear (or NULL for all)
* \param[in] op Operation to clear if rsc is specified (or NULL for all)
* \param[in] interval_ms Interval of operation to clear if op is specified
*
* \return pcmk_ok on success, -EINVAL if arguments are invalid
*
* \note The caller is responsible for freeing the result with regfree().
*/
int
attrd_failure_regex(regex_t *regex, const char *rsc, const char *op,
guint interval_ms)
{
char *pattern = NULL;
int rc;
/* Create a pattern that matches desired attributes */
if (rsc == NULL) {
pattern = strdup(ATTRD_RE_CLEAR_ALL);
} else if (op == NULL) {
pattern = crm_strdup_printf(ATTRD_RE_CLEAR_ONE, rsc);
} else {
pattern = crm_strdup_printf(ATTRD_RE_CLEAR_OP, rsc, op, interval_ms);
}
/* Compile pattern into regular expression */
crm_trace("Clearing attributes matching %s", pattern);
rc = regcomp(regex, pattern, REG_EXTENDED|REG_NOSUB);
free(pattern);
return (rc == 0)? pcmk_ok : -EINVAL;
}
diff --git a/daemons/based/based_callbacks.c b/daemons/based/based_callbacks.c
index 200bce902a..87ed31c9b4 100644
--- a/daemons/based/based_callbacks.c
+++ b/daemons/based/based_callbacks.c
@@ -1,1557 +1,1551 @@
/*
* Copyright 2004-2020 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <stdio.h>
#include <sys/types.h>
#include <unistd.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h> /* U64T ~ PRIu64 */
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/msg_xml.h>
#include <crm/cluster/internal.h>
#include <crm/common/xml.h>
#include <crm/common/remote_internal.h>
#include <pacemaker-based.h>
#define EXIT_ESCALATION_MS 10000
static unsigned long cib_local_bcast_num = 0;
typedef struct cib_local_notify_s {
xmlNode *notify_src;
char *client_id;
gboolean from_peer;
gboolean sync_reply;
} cib_local_notify_t;
int next_client_id = 0;
gboolean legacy_mode = FALSE;
qb_ipcs_service_t *ipcs_ro = NULL;
qb_ipcs_service_t *ipcs_rw = NULL;
qb_ipcs_service_t *ipcs_shm = NULL;
void send_cib_replace(const xmlNode * sync_request, const char *host);
static void cib_process_request(xmlNode* request, gboolean force_synchronous,
gboolean privileged,
pcmk__client_t *cib_client);
static int cib_process_command(xmlNode *request, xmlNode **reply,
xmlNode **cib_diff, gboolean privileged);
gboolean cib_common_callback(qb_ipcs_connection_t * c, void *data, size_t size,
gboolean privileged);
gboolean cib_legacy_mode(void)
{
return legacy_mode;
}
static int32_t
cib_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
if (cib_shutdown_flag) {
crm_info("Ignoring new IPC client [%d] during shutdown",
pcmk__client_pid(c));
return -EPERM;
}
if (pcmk__new_client(c, uid, gid) == NULL) {
return -EIO;
}
return 0;
}
-static void
-cib_ipc_created(qb_ipcs_connection_t * c)
-{
- crm_trace("Connection %p", c);
-}
-
static int32_t
cib_ipc_dispatch_rw(qb_ipcs_connection_t * c, void *data, size_t size)
{
pcmk__client_t *client = pcmk__find_client(c);
crm_trace("%p message from %s", c, client->id);
return cib_common_callback(c, data, size, TRUE);
}
static int32_t
cib_ipc_dispatch_ro(qb_ipcs_connection_t * c, void *data, size_t size)
{
pcmk__client_t *client = pcmk__find_client(c);
crm_trace("%p message from %s", c, client->id);
return cib_common_callback(c, data, size, FALSE);
}
/* Error code means? */
static int32_t
cib_ipc_closed(qb_ipcs_connection_t * c)
{
pcmk__client_t *client = pcmk__find_client(c);
if (client == NULL) {
return 0;
}
crm_trace("Connection %p", c);
pcmk__free_client(client);
return 0;
}
static void
cib_ipc_destroy(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p", c);
cib_ipc_closed(c);
if (cib_shutdown_flag) {
cib_shutdown(0);
}
}
struct qb_ipcs_service_handlers ipc_ro_callbacks = {
.connection_accept = cib_ipc_accept,
- .connection_created = cib_ipc_created,
+ .connection_created = NULL,
.msg_process = cib_ipc_dispatch_ro,
.connection_closed = cib_ipc_closed,
.connection_destroyed = cib_ipc_destroy
};
struct qb_ipcs_service_handlers ipc_rw_callbacks = {
.connection_accept = cib_ipc_accept,
- .connection_created = cib_ipc_created,
+ .connection_created = NULL,
.msg_process = cib_ipc_dispatch_rw,
.connection_closed = cib_ipc_closed,
.connection_destroyed = cib_ipc_destroy
};
void
cib_common_callback_worker(uint32_t id, uint32_t flags, xmlNode * op_request,
pcmk__client_t *cib_client, gboolean privileged)
{
const char *op = crm_element_value(op_request, F_CIB_OPERATION);
if (crm_str_eq(op, CRM_OP_REGISTER, TRUE)) {
if (flags & crm_ipc_client_response) {
xmlNode *ack = create_xml_node(NULL, __FUNCTION__);
crm_xml_add(ack, F_CIB_OPERATION, CRM_OP_REGISTER);
crm_xml_add(ack, F_CIB_CLIENTID, cib_client->id);
pcmk__ipc_send_xml(cib_client, id, ack, flags);
cib_client->request_id = 0;
free_xml(ack);
}
return;
} else if (crm_str_eq(op, T_CIB_NOTIFY, TRUE)) {
/* Update the notify filters for this client */
int on_off = 0;
long long bit = 0;
const char *type = crm_element_value(op_request, F_CIB_NOTIFY_TYPE);
crm_element_value_int(op_request, F_CIB_NOTIFY_ACTIVATE, &on_off);
crm_debug("Setting %s callbacks for %s (%s): %s",
type, cib_client->name, cib_client->id, on_off ? "on" : "off");
if (safe_str_eq(type, T_CIB_POST_NOTIFY)) {
bit = cib_notify_post;
} else if (safe_str_eq(type, T_CIB_PRE_NOTIFY)) {
bit = cib_notify_pre;
} else if (safe_str_eq(type, T_CIB_UPDATE_CONFIRM)) {
bit = cib_notify_confirm;
} else if (safe_str_eq(type, T_CIB_DIFF_NOTIFY)) {
bit = cib_notify_diff;
} else if (safe_str_eq(type, T_CIB_REPLACE_NOTIFY)) {
bit = cib_notify_replace;
}
if (on_off) {
set_bit(cib_client->options, bit);
} else {
clear_bit(cib_client->options, bit);
}
if (flags & crm_ipc_client_response) {
/* TODO - include rc */
pcmk__ipc_send_ack(cib_client, id, flags, "ack");
}
return;
}
cib_process_request(op_request, FALSE, privileged, cib_client);
}
int32_t
cib_common_callback(qb_ipcs_connection_t * c, void *data, size_t size, gboolean privileged)
{
uint32_t id = 0;
uint32_t flags = 0;
int call_options = 0;
pcmk__client_t *cib_client = pcmk__find_client(c);
xmlNode *op_request = pcmk__client_data2xml(cib_client, data, size, &id,
&flags);
if (op_request) {
crm_element_value_int(op_request, F_CIB_CALLOPTS, &call_options);
}
if (op_request == NULL) {
crm_trace("Invalid message from %p", c);
pcmk__ipc_send_ack(cib_client, id, flags, "nack");
return 0;
} else if(cib_client == NULL) {
crm_trace("Invalid client %p", c);
return 0;
}
if (is_set(call_options, cib_sync_call)) {
CRM_LOG_ASSERT(flags & crm_ipc_client_response);
CRM_LOG_ASSERT(cib_client->request_id == 0); /* This means the client has two synchronous events in-flight */
cib_client->request_id = id; /* Reply only to the last one */
}
if (cib_client->name == NULL) {
const char *value = crm_element_value(op_request, F_CIB_CLIENTNAME);
if (value == NULL) {
cib_client->name = crm_itoa(cib_client->pid);
} else {
cib_client->name = strdup(value);
if (crm_is_daemon_name(value)) {
set_bit(cib_client->options, cib_is_daemon);
}
}
}
/* Allow cluster daemons more leeway before being evicted */
if (is_set(cib_client->options, cib_is_daemon)) {
const char *qmax = cib_config_lookup("cluster-ipc-limit");
if (pcmk__set_client_queue_max(cib_client, qmax)) {
crm_trace("IPC threshold for %s[%u] is now %u",
cib_client->name, cib_client->pid, cib_client->queue_max);
}
}
crm_xml_add(op_request, F_CIB_CLIENTID, cib_client->id);
crm_xml_add(op_request, F_CIB_CLIENTNAME, cib_client->name);
#if ENABLE_ACL
CRM_LOG_ASSERT(cib_client->user != NULL);
crm_acl_get_set_user(op_request, F_CIB_USER, cib_client->user);
#endif
crm_log_xml_trace(op_request, "Client[inbound]");
cib_common_callback_worker(id, flags, op_request, cib_client, privileged);
free_xml(op_request);
return 0;
}
static uint64_t ping_seq = 0;
static char *ping_digest = NULL;
static bool ping_modified_since = FALSE;
int sync_our_cib(xmlNode * request, gboolean all);
static gboolean
cib_digester_cb(gpointer data)
{
if (cib_is_master) {
char buffer[32];
xmlNode *ping = create_xml_node(NULL, "ping");
ping_seq++;
free(ping_digest);
ping_digest = NULL;
ping_modified_since = FALSE;
snprintf(buffer, 32, "%" U64T, ping_seq);
crm_trace("Requesting peer digests (%s)", buffer);
crm_xml_add(ping, F_TYPE, "cib");
crm_xml_add(ping, F_CIB_OPERATION, CRM_OP_PING);
crm_xml_add(ping, F_CIB_PING_ID, buffer);
crm_xml_add(ping, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET);
send_cluster_message(NULL, crm_msg_cib, ping, TRUE);
free_xml(ping);
}
return FALSE;
}
static void
process_ping_reply(xmlNode *reply)
{
uint64_t seq = 0;
const char *host = crm_element_value(reply, F_ORIG);
xmlNode *pong = get_message_xml(reply, F_CIB_CALLDATA);
const char *seq_s = crm_element_value(pong, F_CIB_PING_ID);
const char *digest = crm_element_value(pong, XML_ATTR_DIGEST);
if (seq_s) {
seq = (uint64_t) crm_parse_ll(seq_s, NULL);
}
if(digest == NULL) {
crm_trace("Ignoring ping reply %s from %s with no digest", seq_s, host);
} else if(seq != ping_seq) {
crm_trace("Ignoring out of sequence ping reply %s from %s", seq_s, host);
} else if(ping_modified_since) {
crm_trace("Ignoring ping reply %s from %s: cib updated since", seq_s, host);
} else {
const char *version = crm_element_value(pong, XML_ATTR_CRM_VERSION);
if(ping_digest == NULL) {
crm_trace("Calculating new digest");
ping_digest = calculate_xml_versioned_digest(the_cib, FALSE, TRUE, version);
}
crm_trace("Processing ping reply %s from %s (%s)", seq_s, host, digest);
if(safe_str_eq(ping_digest, digest) == FALSE) {
xmlNode *remote_cib = get_message_xml(pong, F_CIB_CALLDATA);
crm_notice("Local CIB %s.%s.%s.%s differs from %s: %s.%s.%s.%s %p",
crm_element_value(the_cib, XML_ATTR_GENERATION_ADMIN),
crm_element_value(the_cib, XML_ATTR_GENERATION),
crm_element_value(the_cib, XML_ATTR_NUMUPDATES),
ping_digest, host,
remote_cib?crm_element_value(remote_cib, XML_ATTR_GENERATION_ADMIN):"_",
remote_cib?crm_element_value(remote_cib, XML_ATTR_GENERATION):"_",
remote_cib?crm_element_value(remote_cib, XML_ATTR_NUMUPDATES):"_",
digest, remote_cib);
if(remote_cib && remote_cib->children) {
/* Additional debug */
xml_calculate_changes(the_cib, remote_cib);
xml_log_changes(LOG_INFO, __FUNCTION__, remote_cib);
crm_trace("End of differences");
}
free_xml(remote_cib);
sync_our_cib(reply, FALSE);
}
}
}
static void
do_local_notify(xmlNode * notify_src, const char *client_id,
gboolean sync_reply, gboolean from_peer)
{
int rid = 0;
int call_id = 0;
pcmk__client_t *client_obj = NULL;
CRM_ASSERT(notify_src && client_id);
crm_element_value_int(notify_src, F_CIB_CALLID, &call_id);
client_obj = pcmk__find_client_by_id(client_id);
if (client_obj == NULL) {
crm_debug("Could not send response %d: client %s not found",
call_id, client_id);
return;
}
if (sync_reply) {
if (client_obj->ipcs) {
CRM_LOG_ASSERT(client_obj->request_id);
rid = client_obj->request_id;
client_obj->request_id = 0;
crm_trace("Sending response %d to %s %s",
rid, client_obj->name,
from_peer ? "(originator of delegated request)" : "");
} else {
crm_trace("Sending response [call %d] to %s %s",
call_id, client_obj->name, from_peer ? "(originator of delegated request)" : "");
}
} else {
crm_trace("Sending event %d to %s %s",
call_id, client_obj->name, from_peer ? "(originator of delegated request)" : "");
}
switch (client_obj->kind) {
case PCMK__CLIENT_IPC:
{
int rc = pcmk__ipc_send_xml(client_obj, rid, notify_src,
(sync_reply? crm_ipc_flags_none
: crm_ipc_server_event));
if (rc != pcmk_rc_ok) {
crm_warn("%s reply to %s failed: %s " CRM_XS " rc=%d",
(sync_reply? "Synchronous" : "Asynchronous"),
client_obj->name, pcmk_rc_str(rc), rc);
}
}
break;
#ifdef HAVE_GNUTLS_GNUTLS_H
case PCMK__CLIENT_TLS:
#endif
case PCMK__CLIENT_TCP:
pcmk__remote_send_xml(client_obj->remote, notify_src);
break;
default:
crm_err("Unknown transport %d for %s", client_obj->kind, client_obj->name);
}
}
static void
local_notify_destroy_callback(gpointer data)
{
cib_local_notify_t *notify = data;
free_xml(notify->notify_src);
free(notify->client_id);
free(notify);
}
static void
check_local_notify(int bcast_id)
{
cib_local_notify_t *notify = NULL;
if (!local_notify_queue) {
return;
}
notify = g_hash_table_lookup(local_notify_queue, GINT_TO_POINTER(bcast_id));
if (notify) {
do_local_notify(notify->notify_src, notify->client_id, notify->sync_reply,
notify->from_peer);
g_hash_table_remove(local_notify_queue, GINT_TO_POINTER(bcast_id));
}
}
static void
queue_local_notify(xmlNode * notify_src, const char *client_id, gboolean sync_reply,
gboolean from_peer)
{
cib_local_notify_t *notify = calloc(1, sizeof(cib_local_notify_t));
notify->notify_src = notify_src;
notify->client_id = strdup(client_id);
notify->sync_reply = sync_reply;
notify->from_peer = from_peer;
if (!local_notify_queue) {
local_notify_queue = g_hash_table_new_full(g_direct_hash,
g_direct_equal, NULL,
local_notify_destroy_callback);
}
g_hash_table_insert(local_notify_queue, GINT_TO_POINTER(cib_local_bcast_num), notify);
}
static void
parse_local_options_v1(pcmk__client_t *cib_client, int call_type,
int call_options, const char *host, const char *op,
gboolean *local_notify, gboolean *needs_reply,
gboolean *process, gboolean *needs_forward)
{
if (cib_op_modifies(call_type)
&& !(call_options & cib_inhibit_bcast)) {
/* we need to send an update anyway */
*needs_reply = TRUE;
} else {
*needs_reply = FALSE;
}
if (host == NULL && (call_options & cib_scope_local)) {
crm_trace("Processing locally scoped %s op from %s", op, cib_client->name);
*local_notify = TRUE;
} else if (host == NULL && cib_is_master) {
crm_trace("Processing master %s op locally from %s", op, cib_client->name);
*local_notify = TRUE;
} else if (safe_str_eq(host, cib_our_uname)) {
crm_trace("Processing locally addressed %s op from %s", op, cib_client->name);
*local_notify = TRUE;
} else if (stand_alone) {
*needs_forward = FALSE;
*local_notify = TRUE;
*process = TRUE;
} else {
crm_trace("%s op from %s needs to be forwarded to %s",
op, cib_client->name, host ? host : "the master instance");
*needs_forward = TRUE;
*process = FALSE;
}
}
static void
parse_local_options_v2(pcmk__client_t *cib_client, int call_type,
int call_options, const char *host, const char *op,
gboolean *local_notify, gboolean *needs_reply,
gboolean *process, gboolean *needs_forward)
{
if (cib_op_modifies(call_type)) {
if(safe_str_eq(op, CIB_OP_MASTER) || safe_str_eq(op, CIB_OP_SLAVE)) {
/* Always handle these locally */
*process = TRUE;
*needs_reply = FALSE;
*local_notify = TRUE;
*needs_forward = FALSE;
return;
} else {
/* Redirect all other updates via CPG */
*needs_reply = TRUE;
*needs_forward = TRUE;
*process = FALSE;
crm_trace("%s op from %s needs to be forwarded to %s",
op, cib_client->name, host ? host : "the master instance");
return;
}
}
*process = TRUE;
*needs_reply = FALSE;
*local_notify = TRUE;
*needs_forward = FALSE;
if (stand_alone) {
crm_trace("Processing %s op from %s (stand-alone)", op, cib_client->name);
} else if (host == NULL) {
crm_trace("Processing unaddressed %s op from %s", op, cib_client->name);
} else if (safe_str_eq(host, cib_our_uname)) {
crm_trace("Processing locally addressed %s op from %s", op, cib_client->name);
} else {
crm_trace("%s op from %s needs to be forwarded to %s", op, cib_client->name, host);
*needs_forward = TRUE;
*process = FALSE;
}
}
static void
parse_local_options(pcmk__client_t *cib_client, int call_type,
int call_options, const char *host, const char *op,
gboolean *local_notify, gboolean *needs_reply,
gboolean *process, gboolean *needs_forward)
{
if(cib_legacy_mode()) {
parse_local_options_v1(cib_client, call_type, call_options, host,
op, local_notify, needs_reply, process, needs_forward);
} else {
parse_local_options_v2(cib_client, call_type, call_options, host,
op, local_notify, needs_reply, process, needs_forward);
}
}
static gboolean
parse_peer_options_v1(int call_type, xmlNode * request,
gboolean * local_notify, gboolean * needs_reply, gboolean * process,
gboolean * needs_forward)
{
const char *op = NULL;
const char *host = NULL;
const char *delegated = NULL;
const char *originator = crm_element_value(request, F_ORIG);
const char *reply_to = crm_element_value(request, F_CIB_ISREPLY);
const char *update = crm_element_value(request, F_CIB_GLOBAL_UPDATE);
gboolean is_reply = safe_str_eq(reply_to, cib_our_uname);
if (crm_is_true(update)) {
*needs_reply = FALSE;
if (is_reply) {
*local_notify = TRUE;
crm_trace("Processing global/peer update from %s"
" that originated from us", originator);
} else {
crm_trace("Processing global/peer update from %s", originator);
}
return TRUE;
}
op = crm_element_value(request, F_CIB_OPERATION);
crm_trace("Processing %s request sent by %s", op, originator);
if (safe_str_eq(op, "cib_shutdown_req")) {
/* Always process these */
*local_notify = FALSE;
if (reply_to == NULL || is_reply) {
*process = TRUE;
}
if (is_reply) {
*needs_reply = FALSE;
}
return *process;
}
if (is_reply && safe_str_eq(op, CRM_OP_PING)) {
process_ping_reply(request);
return FALSE;
}
if (is_reply) {
crm_trace("Forward reply sent from %s to local clients", originator);
*process = FALSE;
*needs_reply = FALSE;
*local_notify = TRUE;
return TRUE;
}
host = crm_element_value(request, F_CIB_HOST);
if (host != NULL && safe_str_eq(host, cib_our_uname)) {
crm_trace("Processing %s request sent to us from %s", op, originator);
return TRUE;
} else if(is_reply == FALSE && safe_str_eq(op, CRM_OP_PING)) {
crm_trace("Processing %s request sent to %s by %s", op, host?host:"everyone", originator);
*needs_reply = TRUE;
return TRUE;
} else if (host == NULL && cib_is_master == TRUE) {
crm_trace("Processing %s request sent to master instance from %s", op, originator);
return TRUE;
}
delegated = crm_element_value(request, F_CIB_DELEGATED);
if (delegated != NULL) {
crm_trace("Ignoring msg for master instance");
} else if (host != NULL) {
/* this is for a specific instance and we're not it */
crm_trace("Ignoring msg for instance on %s", crm_str(host));
} else if (reply_to == NULL && cib_is_master == FALSE) {
/* this is for the master instance and we're not it */
crm_trace("Ignoring reply to %s", crm_str(reply_to));
} else if (safe_str_eq(op, "cib_shutdown_req")) {
if (reply_to != NULL) {
crm_debug("Processing %s from %s", op, originator);
*needs_reply = FALSE;
} else {
crm_debug("Processing %s reply from %s", op, originator);
}
return TRUE;
} else {
crm_err("Nothing for us to do?");
crm_log_xml_err(request, "Peer[inbound]");
}
return FALSE;
}
static gboolean
parse_peer_options_v2(int call_type, xmlNode * request,
gboolean * local_notify, gboolean * needs_reply, gboolean * process,
gboolean * needs_forward)
{
const char *host = NULL;
const char *delegated = crm_element_value(request, F_CIB_DELEGATED);
const char *op = crm_element_value(request, F_CIB_OPERATION);
const char *originator = crm_element_value(request, F_ORIG);
const char *reply_to = crm_element_value(request, F_CIB_ISREPLY);
const char *update = crm_element_value(request, F_CIB_GLOBAL_UPDATE);
gboolean is_reply = safe_str_eq(reply_to, cib_our_uname);
if(safe_str_eq(op, CIB_OP_REPLACE)) {
/* sync_our_cib() sets F_CIB_ISREPLY */
if (reply_to) {
delegated = reply_to;
}
goto skip_is_reply;
} else if(safe_str_eq(op, CIB_OP_SYNC)) {
} else if (is_reply && safe_str_eq(op, CRM_OP_PING)) {
process_ping_reply(request);
return FALSE;
} else if (safe_str_eq(op, CIB_OP_UPGRADE)) {
/* Only the DC (node with the oldest software) should process
* this operation if F_CIB_SCHEMA_MAX is unset
*
* If the DC is happy it will then send out another
* CIB_OP_UPGRADE which will tell all nodes to do the actual
* upgrade.
*
* Except this time F_CIB_SCHEMA_MAX will be set which puts a
* limit on how far newer nodes will go
*/
const char *max = crm_element_value(request, F_CIB_SCHEMA_MAX);
const char *upgrade_rc = crm_element_value(request, F_CIB_UPGRADE_RC);
crm_trace("Parsing %s operation%s for %s with max=%s and upgrade_rc=%s",
op, (is_reply? " reply" : ""),
(cib_is_master? "master" : "slave"),
(max? max : "none"), (upgrade_rc? upgrade_rc : "none"));
if (upgrade_rc != NULL) {
// Our upgrade request was rejected by DC, notify clients of result
crm_xml_add(request, F_CIB_RC, upgrade_rc);
} else if ((max == NULL) && cib_is_master) {
/* We are the DC, check if this upgrade is allowed */
goto skip_is_reply;
} else if(max) {
/* Ok, go ahead and upgrade to 'max' */
goto skip_is_reply;
} else {
// Ignore broadcast client requests when we're not DC
return FALSE;
}
} else if (crm_is_true(update)) {
crm_info("Detected legacy %s global update from %s", op, originator);
send_sync_request(NULL);
legacy_mode = TRUE;
return FALSE;
} else if (is_reply && cib_op_modifies(call_type)) {
crm_trace("Ignoring legacy %s reply sent from %s to local clients", op, originator);
return FALSE;
} else if (safe_str_eq(op, "cib_shutdown_req")) {
/* Legacy handling */
crm_debug("Legacy handling of %s message from %s", op, originator);
*local_notify = FALSE;
if (reply_to == NULL) {
*process = TRUE;
}
return *process;
}
if(is_reply) {
crm_trace("Handling %s reply sent from %s to local clients", op, originator);
*process = FALSE;
*needs_reply = FALSE;
*local_notify = TRUE;
return TRUE;
}
skip_is_reply:
*process = TRUE;
*needs_reply = FALSE;
if(safe_str_eq(delegated, cib_our_uname)) {
*local_notify = TRUE;
} else {
*local_notify = FALSE;
}
host = crm_element_value(request, F_CIB_HOST);
if (host != NULL && safe_str_eq(host, cib_our_uname)) {
crm_trace("Processing %s request sent to us from %s", op, originator);
*needs_reply = TRUE;
return TRUE;
} else if (host != NULL) {
/* this is for a specific instance and we're not it */
crm_trace("Ignoring %s operation for instance on %s", op, crm_str(host));
return FALSE;
} else if(is_reply == FALSE && safe_str_eq(op, CRM_OP_PING)) {
*needs_reply = TRUE;
}
crm_trace("Processing %s request sent to everyone by %s/%s on %s %s", op,
crm_element_value(request, F_CIB_CLIENTNAME),
crm_element_value(request, F_CIB_CALLID),
originator, (*local_notify)?"(notify)":"");
return TRUE;
}
static gboolean
parse_peer_options(int call_type, xmlNode * request,
gboolean * local_notify, gboolean * needs_reply, gboolean * process,
gboolean * needs_forward)
{
/* TODO: What happens when an update comes in after node A
* requests the CIB from node B, but before it gets the reply (and
* sends out the replace operation)
*/
if(cib_legacy_mode()) {
return parse_peer_options_v1(
call_type, request, local_notify, needs_reply, process, needs_forward);
} else {
return parse_peer_options_v2(
call_type, request, local_notify, needs_reply, process, needs_forward);
}
}
static void
forward_request(xmlNode * request, pcmk__client_t *cib_client, int call_options)
{
const char *op = crm_element_value(request, F_CIB_OPERATION);
const char *host = crm_element_value(request, F_CIB_HOST);
crm_xml_add(request, F_CIB_DELEGATED, cib_our_uname);
if (host != NULL) {
crm_trace("Forwarding %s op to %s", op, host);
send_cluster_message(crm_get_peer(0, host), crm_msg_cib, request, FALSE);
} else {
crm_trace("Forwarding %s op to master instance", op);
send_cluster_message(NULL, crm_msg_cib, request, FALSE);
}
/* Return the request to its original state */
xml_remove_prop(request, F_CIB_DELEGATED);
if (call_options & cib_discard_reply) {
crm_trace("Client not interested in reply");
}
}
static gboolean
send_peer_reply(xmlNode * msg, xmlNode * result_diff, const char *originator, gboolean broadcast)
{
CRM_ASSERT(msg != NULL);
if (broadcast) {
/* this (successful) call modified the CIB _and_ the
* change needs to be broadcast...
* send via HA to other nodes
*/
int diff_add_updates = 0;
int diff_add_epoch = 0;
int diff_add_admin_epoch = 0;
int diff_del_updates = 0;
int diff_del_epoch = 0;
int diff_del_admin_epoch = 0;
const char *digest = NULL;
int format = 1;
CRM_LOG_ASSERT(result_diff != NULL);
digest = crm_element_value(result_diff, XML_ATTR_DIGEST);
crm_element_value_int(result_diff, "format", &format);
cib_diff_version_details(result_diff,
&diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates,
&diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates);
crm_trace("Sending update diff %d.%d.%d -> %d.%d.%d %s",
diff_del_admin_epoch, diff_del_epoch, diff_del_updates,
diff_add_admin_epoch, diff_add_epoch, diff_add_updates, digest);
crm_xml_add(msg, F_CIB_ISREPLY, originator);
crm_xml_add(msg, F_CIB_GLOBAL_UPDATE, XML_BOOLEAN_TRUE);
crm_xml_add(msg, F_CIB_OPERATION, CIB_OP_APPLY_DIFF);
crm_xml_add(msg, F_CIB_USER, CRM_DAEMON_USER);
if (format == 1) {
CRM_ASSERT(digest != NULL);
}
add_message_xml(msg, F_CIB_UPDATE_DIFF, result_diff);
crm_log_xml_explicit(msg, "copy");
return send_cluster_message(NULL, crm_msg_cib, msg, TRUE);
} else if (originator != NULL) {
/* send reply via HA to originating node */
crm_trace("Sending request result to %s only", originator);
crm_xml_add(msg, F_CIB_ISREPLY, originator);
return send_cluster_message(crm_get_peer(0, originator), crm_msg_cib, msg, FALSE);
}
return FALSE;
}
static void
cib_process_request(xmlNode *request, gboolean force_synchronous,
gboolean privileged, pcmk__client_t *cib_client)
{
int call_type = 0;
int call_options = 0;
gboolean process = TRUE;
gboolean is_update = TRUE;
gboolean from_peer = TRUE;
gboolean needs_reply = TRUE;
gboolean local_notify = FALSE;
gboolean needs_forward = FALSE;
gboolean global_update = crm_is_true(crm_element_value(request, F_CIB_GLOBAL_UPDATE));
xmlNode *op_reply = NULL;
xmlNode *result_diff = NULL;
int rc = pcmk_ok;
const char *op = crm_element_value(request, F_CIB_OPERATION);
const char *originator = crm_element_value(request, F_ORIG);
const char *host = crm_element_value(request, F_CIB_HOST);
const char *target = NULL;
const char *call_id = crm_element_value(request, F_CIB_CALLID);
const char *client_id = crm_element_value(request, F_CIB_CLIENTID);
const char *client_name = crm_element_value(request, F_CIB_CLIENTNAME);
const char *reply_to = crm_element_value(request, F_CIB_ISREPLY);
if (cib_client) {
from_peer = FALSE;
}
crm_element_value_int(request, F_CIB_CALLOPTS, &call_options);
if (force_synchronous) {
call_options |= cib_sync_call;
}
if (host != NULL && strlen(host) == 0) {
host = NULL;
}
if (host) {
target = host;
} else if (call_options & cib_scope_local) {
target = "local host";
} else {
target = "master";
}
if (from_peer) {
crm_trace("Processing peer %s operation from %s/%s on %s intended for %s (reply=%s)",
op, client_name, call_id, originator, target, reply_to);
} else {
crm_xml_add(request, F_ORIG, cib_our_uname);
crm_trace("Processing local %s operation from %s/%s intended for %s", op, client_name, call_id, target);
}
rc = cib_get_operation_id(op, &call_type);
if (rc != pcmk_ok) {
/* TODO: construct error reply? */
crm_err("Pre-processing of command failed: %s", pcmk_strerror(rc));
return;
}
if (from_peer == FALSE) {
parse_local_options(cib_client, call_type, call_options, host, op,
&local_notify, &needs_reply, &process, &needs_forward);
} else if (parse_peer_options(call_type, request, &local_notify,
&needs_reply, &process, &needs_forward) == FALSE) {
return;
}
is_update = cib_op_modifies(call_type);
if (call_options & cib_discard_reply) {
needs_reply = is_update;
local_notify = FALSE;
}
if (needs_forward) {
const char *host = crm_element_value(request, F_CIB_HOST);
const char *section = crm_element_value(request, F_CIB_SECTION);
int log_level = LOG_INFO;
if (safe_str_eq(op, CRM_OP_NOOP)) {
log_level = LOG_DEBUG;
}
do_crm_log(log_level,
"Forwarding %s operation for section %s to %s (origin=%s/%s/%s)",
op,
section ? section : "'all'",
host ? host : cib_legacy_mode() ? "master" : "all",
originator ? originator : "local",
client_name, call_id);
forward_request(request, cib_client, call_options);
return;
}
if (cib_status != pcmk_ok) {
const char *call = crm_element_value(request, F_CIB_CALLID);
rc = cib_status;
crm_err("Operation ignored, cluster configuration is invalid."
" Please repair and restart: %s", pcmk_strerror(cib_status));
op_reply = create_xml_node(NULL, "cib-reply");
crm_xml_add(op_reply, F_TYPE, T_CIB);
crm_xml_add(op_reply, F_CIB_OPERATION, op);
crm_xml_add(op_reply, F_CIB_CALLID, call);
crm_xml_add(op_reply, F_CIB_CLIENTID, client_id);
crm_xml_add_int(op_reply, F_CIB_CALLOPTS, call_options);
crm_xml_add_int(op_reply, F_CIB_RC, rc);
crm_trace("Attaching reply output");
add_message_xml(op_reply, F_CIB_CALLDATA, the_cib);
crm_log_xml_explicit(op_reply, "cib:reply");
} else if (process) {
time_t finished = 0;
time_t now = time(NULL);
int level = LOG_INFO;
const char *section = crm_element_value(request, F_CIB_SECTION);
rc = cib_process_command(request, &op_reply, &result_diff, privileged);
if (is_update == FALSE) {
level = LOG_TRACE;
} else if (global_update) {
switch (rc) {
case pcmk_ok:
level = LOG_INFO;
break;
case -pcmk_err_old_data:
case -pcmk_err_diff_resync:
case -pcmk_err_diff_failed:
level = LOG_TRACE;
break;
default:
level = LOG_ERR;
}
} else if (rc != pcmk_ok && is_update) {
level = LOG_WARNING;
}
do_crm_log(level,
"Completed %s operation for section %s: %s (rc=%d, origin=%s/%s/%s, version=%s.%s.%s)",
op, section ? section : "'all'", pcmk_strerror(rc), rc,
originator ? originator : "local", client_name, call_id,
the_cib ? crm_element_value(the_cib, XML_ATTR_GENERATION_ADMIN) : "0",
the_cib ? crm_element_value(the_cib, XML_ATTR_GENERATION) : "0",
the_cib ? crm_element_value(the_cib, XML_ATTR_NUMUPDATES) : "0");
finished = time(NULL);
if ((finished - now) > 3) {
crm_trace("%s operation took %lds to complete", op, (long)(finished - now));
crm_write_blackbox(0, NULL);
}
if (op_reply == NULL && (needs_reply || local_notify)) {
crm_err("Unexpected NULL reply to message");
crm_log_xml_err(request, "null reply");
needs_reply = FALSE;
local_notify = FALSE;
}
}
/* from now on we are the server */
if(is_update && cib_legacy_mode() == FALSE) {
crm_trace("Completed pre-sync update from %s/%s/%s%s",
originator ? originator : "local", client_name, call_id,
local_notify?" with local notification":"");
} else if (needs_reply == FALSE || stand_alone) {
/* nothing more to do...
* this was a non-originating slave update
*/
crm_trace("Completed slave update");
} else if (cib_legacy_mode() &&
rc == pcmk_ok && result_diff != NULL && !(call_options & cib_inhibit_bcast)) {
gboolean broadcast = FALSE;
cib_local_bcast_num++;
crm_xml_add_int(request, F_CIB_LOCAL_NOTIFY_ID, cib_local_bcast_num);
broadcast = send_peer_reply(request, result_diff, originator, TRUE);
if (broadcast && client_id && local_notify && op_reply) {
/* If we have been asked to sync the reply,
* and a bcast msg has gone out, we queue the local notify
* until we know the bcast message has been received */
local_notify = FALSE;
crm_trace("Queuing local %ssync notification for %s",
(call_options & cib_sync_call) ? "" : "a-", client_id);
queue_local_notify(op_reply, client_id, (call_options & cib_sync_call), from_peer);
op_reply = NULL; /* the reply is queued, so don't free here */
}
} else if (call_options & cib_discard_reply) {
crm_trace("Caller isn't interested in reply");
} else if (from_peer) {
if (is_update == FALSE || result_diff == NULL) {
crm_trace("Request not broadcast: R/O call");
} else if (call_options & cib_inhibit_bcast) {
crm_trace("Request not broadcast: inhibited");
} else if (rc != pcmk_ok) {
crm_trace("Request not broadcast: call failed: %s", pcmk_strerror(rc));
} else {
crm_trace("Directing reply to %s", originator);
}
send_peer_reply(op_reply, result_diff, originator, FALSE);
}
if (local_notify && client_id) {
crm_trace("Performing local %ssync notification for %s",
(call_options & cib_sync_call) ? "" : "a-", client_id);
if (process == FALSE) {
do_local_notify(request, client_id, call_options & cib_sync_call, from_peer);
} else {
do_local_notify(op_reply, client_id, call_options & cib_sync_call, from_peer);
}
}
free_xml(op_reply);
free_xml(result_diff);
return;
}
static int
cib_process_command(xmlNode * request, xmlNode ** reply, xmlNode ** cib_diff, gboolean privileged)
{
xmlNode *input = NULL;
xmlNode *output = NULL;
xmlNode *result_cib = NULL;
xmlNode *current_cib = NULL;
int call_type = 0;
int call_options = 0;
const char *op = NULL;
const char *section = NULL;
const char *call_id = crm_element_value(request, F_CIB_CALLID);
int rc = pcmk_ok;
int rc2 = pcmk_ok;
gboolean send_r_notify = FALSE;
gboolean global_update = FALSE;
gboolean config_changed = FALSE;
gboolean manage_counters = TRUE;
static mainloop_timer_t *digest_timer = NULL;
CRM_ASSERT(cib_status == pcmk_ok);
if(digest_timer == NULL) {
digest_timer = mainloop_timer_add("digester", 5000, FALSE, cib_digester_cb, NULL);
}
*reply = NULL;
*cib_diff = NULL;
current_cib = the_cib;
/* Start processing the request... */
op = crm_element_value(request, F_CIB_OPERATION);
crm_element_value_int(request, F_CIB_CALLOPTS, &call_options);
rc = cib_get_operation_id(op, &call_type);
if (rc == pcmk_ok && privileged == FALSE) {
rc = cib_op_can_run(call_type, call_options, privileged, global_update);
}
rc2 = cib_op_prepare(call_type, request, &input, §ion);
if (rc == pcmk_ok) {
rc = rc2;
}
if (rc != pcmk_ok) {
crm_trace("Call setup failed: %s", pcmk_strerror(rc));
goto done;
} else if (cib_op_modifies(call_type) == FALSE) {
rc = cib_perform_op(op, call_options, cib_op_func(call_type), TRUE,
section, request, input, FALSE, &config_changed,
current_cib, &result_cib, NULL, &output);
CRM_CHECK(result_cib == NULL, free_xml(result_cib));
goto done;
}
/* Handle a valid write action */
global_update = crm_is_true(crm_element_value(request, F_CIB_GLOBAL_UPDATE));
if (global_update) {
/* legacy code */
manage_counters = FALSE;
call_options |= cib_force_diff;
crm_trace("Global update detected");
CRM_CHECK(call_type == 3 || call_type == 4, crm_err("Call type: %d", call_type);
crm_log_xml_err(request, "bad op"));
}
if (rc == pcmk_ok) {
ping_modified_since = TRUE;
if (call_options & cib_inhibit_bcast) {
/* skip */
crm_trace("Skipping update: inhibit broadcast");
manage_counters = FALSE;
}
if (is_not_set(call_options, cib_dryrun) && safe_str_eq(section, XML_CIB_TAG_STATUS)) {
/* Copying large CIBs accounts for a huge percentage of our CIB usage */
call_options |= cib_zero_copy;
} else {
clear_bit(call_options, cib_zero_copy);
}
/* result_cib must not be modified after cib_perform_op() returns */
rc = cib_perform_op(op, call_options, cib_op_func(call_type), FALSE,
section, request, input, manage_counters, &config_changed,
current_cib, &result_cib, cib_diff, &output);
if (manage_counters == FALSE) {
int format = 1;
/* Legacy code
* If the diff is NULL at this point, it's because nothing changed
*/
if (*cib_diff) {
crm_element_value_int(*cib_diff, "format", &format);
}
if (format == 1) {
config_changed = cib_config_changed(NULL, NULL, cib_diff);
}
}
/* Always write to disk for replace ops,
* this also negates the need to detect ordering changes
*/
if (crm_str_eq(CIB_OP_REPLACE, op, TRUE)) {
config_changed = TRUE;
}
}
if (rc == pcmk_ok && is_not_set(call_options, cib_dryrun)) {
crm_trace("Activating %s->%s%s%s",
crm_element_value(current_cib, XML_ATTR_NUMUPDATES),
crm_element_value(result_cib, XML_ATTR_NUMUPDATES),
(is_set(call_options, cib_zero_copy)? " zero-copy" : ""),
(config_changed? " changed" : ""));
if(is_not_set(call_options, cib_zero_copy)) {
rc = activateCibXml(result_cib, config_changed, op);
crm_trace("Activated %s (%d)",
crm_element_value(current_cib, XML_ATTR_NUMUPDATES), rc);
}
if (rc == pcmk_ok && cib_internal_config_changed(*cib_diff)) {
cib_read_config(config_hash, result_cib);
}
if (crm_str_eq(CIB_OP_REPLACE, op, TRUE)) {
if (section == NULL) {
send_r_notify = TRUE;
} else if (safe_str_eq(section, XML_TAG_CIB)) {
send_r_notify = TRUE;
} else if (safe_str_eq(section, XML_CIB_TAG_NODES)) {
send_r_notify = TRUE;
} else if (safe_str_eq(section, XML_CIB_TAG_STATUS)) {
send_r_notify = TRUE;
}
} else if (crm_str_eq(CIB_OP_ERASE, op, TRUE)) {
send_r_notify = TRUE;
}
mainloop_timer_stop(digest_timer);
mainloop_timer_start(digest_timer);
} else if (rc == -pcmk_err_schema_validation) {
CRM_ASSERT(is_not_set(call_options, cib_zero_copy));
if (output != NULL) {
crm_log_xml_info(output, "cib:output");
free_xml(output);
}
output = result_cib;
} else {
crm_trace("Not activating %d %d %s", rc, is_set(call_options, cib_dryrun), crm_element_value(result_cib, XML_ATTR_NUMUPDATES));
if(is_not_set(call_options, cib_zero_copy)) {
free_xml(result_cib);
}
}
if ((call_options & (cib_inhibit_notify|cib_dryrun)) == 0) {
const char *client = crm_element_value(request, F_CIB_CLIENTNAME);
crm_trace("Sending notifications %d", is_set(call_options, cib_dryrun));
cib_diff_notify(call_options, client, call_id, op, input, rc, *cib_diff);
}
if (send_r_notify) {
const char *origin = crm_element_value(request, F_ORIG);
cib_replace_notify(origin, the_cib, rc, *cib_diff);
}
xml_log_patchset(LOG_TRACE, "cib:diff", *cib_diff);
done:
if ((call_options & cib_discard_reply) == 0) {
const char *caller = crm_element_value(request, F_CIB_CLIENTID);
*reply = create_xml_node(NULL, "cib-reply");
crm_xml_add(*reply, F_TYPE, T_CIB);
crm_xml_add(*reply, F_CIB_OPERATION, op);
crm_xml_add(*reply, F_CIB_CALLID, call_id);
crm_xml_add(*reply, F_CIB_CLIENTID, caller);
crm_xml_add_int(*reply, F_CIB_CALLOPTS, call_options);
crm_xml_add_int(*reply, F_CIB_RC, rc);
if (output != NULL) {
crm_trace("Attaching reply output");
add_message_xml(*reply, F_CIB_CALLDATA, output);
}
crm_log_xml_explicit(*reply, "cib:reply");
}
crm_trace("cleanup");
if (cib_op_modifies(call_type) == FALSE && output != current_cib) {
free_xml(output);
output = NULL;
}
if (call_type >= 0) {
cib_op_cleanup(call_type, call_options, &input, &output);
}
crm_trace("done");
return rc;
}
void
cib_peer_callback(xmlNode * msg, void *private_data)
{
const char *reason = NULL;
const char *originator = crm_element_value(msg, F_ORIG);
if (cib_legacy_mode() && (originator == NULL || crm_str_eq(originator, cib_our_uname, TRUE))) {
/* message is from ourselves */
int bcast_id = 0;
if (!(crm_element_value_int(msg, F_CIB_LOCAL_NOTIFY_ID, &bcast_id))) {
check_local_notify(bcast_id);
}
return;
} else if (crm_peer_cache == NULL) {
reason = "membership not established";
goto bail;
}
if (crm_element_value(msg, F_CIB_CLIENTNAME) == NULL) {
crm_xml_add(msg, F_CIB_CLIENTNAME, originator);
}
/* crm_log_xml_trace("Peer[inbound]", msg); */
cib_process_request(msg, FALSE, TRUE, NULL);
return;
bail:
if (reason) {
const char *seq = crm_element_value(msg, F_SEQ);
const char *op = crm_element_value(msg, F_CIB_OPERATION);
crm_warn("Discarding %s message (%s) from %s: %s", op, seq, originator, reason);
}
}
static gboolean
cib_force_exit(gpointer data)
{
crm_notice("Forcing exit!");
terminate_cib(__FUNCTION__, CRM_EX_ERROR);
return FALSE;
}
static void
disconnect_remote_client(gpointer key, gpointer value, gpointer user_data)
{
pcmk__client_t *a_client = value;
crm_err("Disconnecting %s... Not implemented", crm_str(a_client->name));
}
void
cib_shutdown(int nsig)
{
struct qb_ipcs_stats srv_stats;
if (cib_shutdown_flag == FALSE) {
int disconnects = 0;
qb_ipcs_connection_t *c = NULL;
cib_shutdown_flag = TRUE;
c = qb_ipcs_connection_first_get(ipcs_rw);
while (c != NULL) {
qb_ipcs_connection_t *last = c;
c = qb_ipcs_connection_next_get(ipcs_rw, last);
crm_debug("Disconnecting r/w client %p...", last);
qb_ipcs_disconnect(last);
qb_ipcs_connection_unref(last);
disconnects++;
}
c = qb_ipcs_connection_first_get(ipcs_ro);
while (c != NULL) {
qb_ipcs_connection_t *last = c;
c = qb_ipcs_connection_next_get(ipcs_ro, last);
crm_debug("Disconnecting r/o client %p...", last);
qb_ipcs_disconnect(last);
qb_ipcs_connection_unref(last);
disconnects++;
}
c = qb_ipcs_connection_first_get(ipcs_shm);
while (c != NULL) {
qb_ipcs_connection_t *last = c;
c = qb_ipcs_connection_next_get(ipcs_shm, last);
crm_debug("Disconnecting non-blocking r/w client %p...", last);
qb_ipcs_disconnect(last);
qb_ipcs_connection_unref(last);
disconnects++;
}
disconnects += pcmk__ipc_client_count();
crm_debug("Disconnecting %d remote clients", pcmk__ipc_client_count());
pcmk__foreach_ipc_client(disconnect_remote_client, NULL);
crm_info("Disconnected %d clients", disconnects);
}
qb_ipcs_stats_get(ipcs_rw, &srv_stats, QB_FALSE);
if (pcmk__ipc_client_count() == 0) {
crm_info("All clients disconnected (%d)", srv_stats.active_connections);
initiate_exit();
} else {
crm_info("Waiting on %d clients to disconnect (%d)",
pcmk__ipc_client_count(), srv_stats.active_connections);
}
}
void
initiate_exit(void)
{
int active = 0;
xmlNode *leaving = NULL;
active = crm_active_peers();
if (active < 2) {
terminate_cib(__FUNCTION__, 0);
return;
}
crm_info("Sending disconnect notification to %d peers...", active);
leaving = create_xml_node(NULL, "exit-notification");
crm_xml_add(leaving, F_TYPE, "cib");
crm_xml_add(leaving, F_CIB_OPERATION, "cib_shutdown_req");
send_cluster_message(NULL, crm_msg_cib, leaving, TRUE);
free_xml(leaving);
g_timeout_add(EXIT_ESCALATION_MS, cib_force_exit, NULL);
}
extern int remote_fd;
extern int remote_tls_fd;
/*!
* \internal
* \brief Close remote sockets, free the global CIB and quit
*
* \param[in] caller Name of calling function (for log message)
* \param[in] fast If -1, skip disconnect; if positive, exit that
*/
void
terminate_cib(const char *caller, int fast)
{
crm_info("%s: Exiting%s...", caller,
(fast > 0)? " fast" : mainloop ? " from mainloop" : "");
if (remote_fd > 0) {
close(remote_fd);
remote_fd = 0;
}
if (remote_tls_fd > 0) {
close(remote_tls_fd);
remote_tls_fd = 0;
}
uninitializeCib();
if (fast > 0) {
/* Quit fast on error */
cib_ipc_servers_destroy(ipcs_ro, ipcs_rw, ipcs_shm);
crm_exit(fast);
} else if ((mainloop != NULL) && g_main_loop_is_running(mainloop)) {
/* Quit via returning from the main loop. If fast == -1, we skip the
* disconnect here, and it will be done when the main loop returns
* (this allows the peer status callback to avoid messing with the
* peer caches).
*/
if (fast == 0) {
crm_cluster_disconnect(&crm_cluster);
}
g_main_loop_quit(mainloop);
} else {
/* Quit via clean exit. Even the peer status callback can disconnect
* here, because we're not returning control to the caller. */
crm_cluster_disconnect(&crm_cluster);
cib_ipc_servers_destroy(ipcs_ro, ipcs_rw, ipcs_shm);
crm_exit(CRM_EX_OK);
}
}
diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c
index 83a27deb7d..1a1c8c44bf 100644
--- a/daemons/controld/controld_control.c
+++ b/daemons/controld/controld_control.c
@@ -1,780 +1,774 @@
/*
* Copyright 2004-2020 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/pengine/rules.h>
#include <crm/cluster/internal.h>
#include <crm/cluster/election.h>
#include <crm/common/ipcs_internal.h>
#include <pacemaker-controld.h>
qb_ipcs_service_t *ipcs = NULL;
#if SUPPORT_COROSYNC
extern gboolean crm_connect_corosync(crm_cluster_t * cluster);
#endif
void crm_shutdown(int nsig);
gboolean crm_read_options(gpointer user_data);
gboolean fsa_has_quorum = FALSE;
crm_trigger_t *fsa_source = NULL;
crm_trigger_t *config_read = NULL;
bool no_quorum_suicide_escalation = FALSE;
bool controld_shutdown_lock_enabled = false;
/* A_HA_CONNECT */
void
do_ha_control(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
gboolean registered = FALSE;
static crm_cluster_t *cluster = NULL;
if (cluster == NULL) {
cluster = calloc(1, sizeof(crm_cluster_t));
}
if (action & A_HA_DISCONNECT) {
crm_cluster_disconnect(cluster);
crm_info("Disconnected from the cluster");
set_bit(fsa_input_register, R_HA_DISCONNECTED);
}
if (action & A_HA_CONNECT) {
crm_set_status_callback(&peer_update_callback);
crm_set_autoreap(FALSE);
if (is_corosync_cluster()) {
#if SUPPORT_COROSYNC
registered = crm_connect_corosync(cluster);
#endif
}
if (registered == TRUE) {
controld_election_init(cluster->uname);
fsa_our_uname = cluster->uname;
fsa_our_uuid = cluster->uuid;
if(cluster->uuid == NULL) {
crm_err("Could not obtain local uuid");
registered = FALSE;
}
}
if (registered == FALSE) {
set_bit(fsa_input_register, R_HA_DISCONNECTED);
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
return;
}
populate_cib_nodes(node_update_none, __FUNCTION__);
clear_bit(fsa_input_register, R_HA_DISCONNECTED);
crm_info("Connected to the cluster");
}
if (action & ~(A_HA_CONNECT | A_HA_DISCONNECT)) {
crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__);
}
}
/* A_SHUTDOWN */
void
do_shutdown(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
/* just in case */
set_bit(fsa_input_register, R_SHUTDOWN);
controld_disconnect_fencer(FALSE);
}
/* A_SHUTDOWN_REQ */
void
do_shutdown_req(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
xmlNode *msg = NULL;
set_bit(fsa_input_register, R_SHUTDOWN);
crm_info("Sending shutdown request to all peers (DC is %s)",
(fsa_our_dc? fsa_our_dc : "not set"));
msg = create_request(CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
/* set_bit(fsa_input_register, R_STAYDOWN); */
if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) {
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
free_xml(msg);
}
extern char *max_generation_from;
extern xmlNode *max_generation_xml;
extern GHashTable *resource_history;
extern GHashTable *voted;
void
crmd_fast_exit(crm_exit_t exit_code)
{
if (is_set(fsa_input_register, R_STAYDOWN)) {
crm_warn("Inhibiting respawn "CRM_XS" remapping exit code %d to %d",
exit_code, CRM_EX_FATAL);
exit_code = CRM_EX_FATAL;
} else if ((exit_code == CRM_EX_OK)
&& is_set(fsa_input_register, R_IN_RECOVERY)) {
crm_err("Could not recover from internal error");
exit_code = CRM_EX_ERROR;
}
crm_exit(exit_code);
}
crm_exit_t
crmd_exit(crm_exit_t exit_code)
{
GListPtr gIter = NULL;
GMainLoop *mloop = crmd_mainloop;
static bool in_progress = FALSE;
if (in_progress && (exit_code == CRM_EX_OK)) {
crm_debug("Exit is already in progress");
return exit_code;
} else if(in_progress) {
crm_notice("Error during shutdown process, exiting now with status %d (%s)",
exit_code, crm_exit_str(exit_code));
crm_write_blackbox(SIGTRAP, NULL);
crmd_fast_exit(exit_code);
}
in_progress = TRUE;
crm_trace("Preparing to exit with status %d (%s)",
exit_code, crm_exit_str(exit_code));
/* Suppress secondary errors resulting from us disconnecting everything */
set_bit(fsa_input_register, R_HA_DISCONNECTED);
/* Close all IPC servers and clients to ensure any and all shared memory files are cleaned up */
if(ipcs) {
crm_trace("Closing IPC server");
mainloop_del_ipc_server(ipcs);
ipcs = NULL;
}
controld_close_attrd_ipc();
pe_subsystem_free();
controld_disconnect_fencer(TRUE);
if ((exit_code == CRM_EX_OK) && (crmd_mainloop == NULL)) {
crm_debug("No mainloop detected");
exit_code = CRM_EX_ERROR;
}
/* On an error, just get out.
*
* Otherwise, make the effort to have mainloop exit gracefully so
* that it (mostly) cleans up after itself and valgrind has less
* to report on - allowing real errors stand out
*/
if (exit_code != CRM_EX_OK) {
crm_notice("Forcing immediate exit with status %d (%s)",
exit_code, crm_exit_str(exit_code));
crm_write_blackbox(SIGTRAP, NULL);
crmd_fast_exit(exit_code);
}
/* Clean up as much memory as possible for valgrind */
for (gIter = fsa_message_queue; gIter != NULL; gIter = gIter->next) {
fsa_data_t *fsa_data = gIter->data;
crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]",
fsa_input2string(fsa_data->fsa_input),
fsa_state2string(fsa_state),
fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
delete_fsa_input(fsa_data);
}
clear_bit(fsa_input_register, R_MEMBERSHIP);
g_list_free(fsa_message_queue); fsa_message_queue = NULL;
metadata_cache_fini();
controld_election_fini();
/* Tear down the CIB manager connection, but don't free it yet -- it could
* be used when we drain the mainloop later.
*/
cib_free_callbacks(fsa_cib_conn);
fsa_cib_conn->cmds->signoff(fsa_cib_conn);
verify_stopped(fsa_state, LOG_WARNING);
clear_bit(fsa_input_register, R_LRM_CONNECTED);
lrm_state_destroy_all();
/* This basically will not work, since mainloop has a reference to it */
mainloop_destroy_trigger(fsa_source); fsa_source = NULL;
mainloop_destroy_trigger(config_read); config_read = NULL;
mainloop_destroy_trigger(transition_trigger); transition_trigger = NULL;
pcmk__client_cleanup();
crm_peer_destroy();
controld_free_fsa_timers();
te_cleanup_stonith_history_sync(NULL, TRUE);
controld_free_sched_timer();
free(fsa_our_dc_version); fsa_our_dc_version = NULL;
free(fsa_our_uname); fsa_our_uname = NULL;
free(fsa_our_uuid); fsa_our_uuid = NULL;
free(fsa_our_dc); fsa_our_dc = NULL;
free(fsa_cluster_name); fsa_cluster_name = NULL;
free(te_uuid); te_uuid = NULL;
free(failed_stop_offset); failed_stop_offset = NULL;
free(failed_start_offset); failed_start_offset = NULL;
free(max_generation_from); max_generation_from = NULL;
free_xml(max_generation_xml); max_generation_xml = NULL;
mainloop_destroy_signal(SIGPIPE);
mainloop_destroy_signal(SIGUSR1);
mainloop_destroy_signal(SIGTERM);
mainloop_destroy_signal(SIGTRAP);
/* leave SIGCHLD engaged as we might still want to drain some service-actions */
if (mloop) {
GMainContext *ctx = g_main_loop_get_context(crmd_mainloop);
/* Don't re-enter this block */
crmd_mainloop = NULL;
/* no signals on final draining anymore */
mainloop_destroy_signal(SIGCHLD);
crm_trace("Draining mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
{
int lpc = 0;
while((g_main_context_pending(ctx) && lpc < 10)) {
lpc++;
crm_trace("Iteration %d", lpc);
g_main_context_dispatch(ctx);
}
}
crm_trace("Closing mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
g_main_loop_quit(mloop);
/* Won't do anything yet, since we're inside it now */
g_main_loop_unref(mloop);
} else {
mainloop_destroy_signal(SIGCHLD);
}
cib_delete(fsa_cib_conn);
fsa_cib_conn = NULL;
throttle_fini();
/* Graceful */
crm_trace("Done preparing for exit with status %d (%s)",
exit_code, crm_exit_str(exit_code));
return exit_code;
}
/* A_EXIT_0, A_EXIT_1 */
void
do_exit(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_exit_t exit_code = CRM_EX_OK;
int log_level = LOG_INFO;
const char *exit_type = "gracefully";
if (action & A_EXIT_1) {
log_level = LOG_ERR;
exit_type = "forcefully";
exit_code = CRM_EX_ERROR;
}
verify_stopped(cur_state, LOG_ERR);
do_crm_log(log_level, "Performing %s - %s exiting the controller",
fsa_action2string(action), exit_type);
crm_info("[%s] stopped (%d)", crm_system_name, exit_code);
crmd_exit(exit_code);
}
static void sigpipe_ignore(int nsig) { return; }
/* A_STARTUP */
void
do_startup(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_debug("Registering Signal Handlers");
mainloop_add_signal(SIGTERM, crm_shutdown);
mainloop_add_signal(SIGPIPE, sigpipe_ignore);
fsa_source = mainloop_add_trigger(G_PRIORITY_HIGH, crm_fsa_trigger, NULL);
config_read = mainloop_add_trigger(G_PRIORITY_HIGH, crm_read_options, NULL);
transition_trigger = mainloop_add_trigger(G_PRIORITY_LOW, te_graph_trigger, NULL);
crm_debug("Creating CIB manager and executor objects");
fsa_cib_conn = cib_new();
lrm_state_init_local();
if (controld_init_fsa_timers() == FALSE) {
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
}
static int32_t
crmd_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
crm_trace("Connection %p", c);
if (pcmk__new_client(c, uid, gid) == NULL) {
return -EIO;
}
return 0;
}
-static void
-crmd_ipc_created(qb_ipcs_connection_t * c)
-{
- crm_trace("Connection %p", c);
-}
-
static int32_t
crmd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
pcmk__client_t *client = pcmk__find_client(c);
xmlNode *msg = pcmk__client_data2xml(client, data, size, &id, &flags);
crm_trace("Invoked: %s", pcmk__client_name(client));
pcmk__ipc_send_ack(client, id, flags, "ack");
if (msg == NULL) {
return 0;
}
#if ENABLE_ACL
CRM_ASSERT(client->user != NULL);
crm_acl_get_set_user(msg, F_CRM_USER, client->user);
#endif
crm_trace("Processing IPC message from %s", pcmk__client_name(client));
crm_log_xml_trace(msg, "controller[inbound]");
crm_xml_add(msg, F_CRM_SYS_FROM, client->id);
if (controld_authorize_ipc_message(msg, client, NULL)) {
route_message(C_IPC_MESSAGE, msg);
}
trigger_fsa(fsa_source);
free_xml(msg);
return 0;
}
static int32_t
crmd_ipc_closed(qb_ipcs_connection_t * c)
{
pcmk__client_t *client = pcmk__find_client(c);
if (client) {
crm_trace("Disconnecting %sregistered client %s (%p/%p)",
(client->userdata? "" : "un"), pcmk__client_name(client),
c, client);
free(client->userdata);
pcmk__free_client(client);
trigger_fsa(fsa_source);
}
return 0;
}
static void
crmd_ipc_destroy(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p", c);
crmd_ipc_closed(c);
}
/* A_STOP */
void
do_stop(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_trace("Closing IPC server");
mainloop_del_ipc_server(ipcs); ipcs = NULL;
register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
}
/* A_STARTED */
void
do_started(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
static struct qb_ipcs_service_handlers crmd_callbacks = {
.connection_accept = crmd_ipc_accept,
- .connection_created = crmd_ipc_created,
+ .connection_created = NULL,
.msg_process = crmd_ipc_dispatch,
.connection_closed = crmd_ipc_closed,
.connection_destroyed = crmd_ipc_destroy
};
if (cur_state != S_STARTING) {
crm_err("Start cancelled... %s", fsa_state2string(cur_state));
return;
} else if (is_set(fsa_input_register, R_MEMBERSHIP) == FALSE) {
crm_info("Delaying start, no membership data (%.16llx)", R_MEMBERSHIP);
crmd_fsa_stall(TRUE);
return;
} else if (is_set(fsa_input_register, R_LRM_CONNECTED) == FALSE) {
crm_info("Delaying start, not connected to executor (%.16llx)", R_LRM_CONNECTED);
crmd_fsa_stall(TRUE);
return;
} else if (is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) {
crm_info("Delaying start, CIB not connected (%.16llx)", R_CIB_CONNECTED);
crmd_fsa_stall(TRUE);
return;
} else if (is_set(fsa_input_register, R_READ_CONFIG) == FALSE) {
crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG);
crmd_fsa_stall(TRUE);
return;
} else if (is_set(fsa_input_register, R_PEER_DATA) == FALSE) {
crm_info("Delaying start, No peer data (%.16llx)", R_PEER_DATA);
crmd_fsa_stall(TRUE);
return;
}
crm_debug("Init server comms");
ipcs = crmd_ipc_server_init(&crmd_callbacks);
if (ipcs == NULL) {
crm_err("Failed to create IPC server: shutting down and inhibiting respawn");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
} else {
crm_notice("Pacemaker controller successfully started and accepting connections");
}
controld_trigger_fencer_connect();
clear_bit(fsa_input_register, R_STARTING);
register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL);
}
/* A_RECOVER */
void
do_recover(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
set_bit(fsa_input_register, R_IN_RECOVERY);
crm_warn("Fast-tracking shutdown in response to errors");
register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
}
/* *INDENT-OFF* */
static pe_cluster_option crmd_opts[] = {
/* name, old-name, validate, values, default, short description, long description */
{ "dc-version", NULL, "string", NULL, "none", NULL,
"Version of Pacemaker on the cluster's DC.",
"Includes the hash which identifies the exact changeset it was built from. Used for diagnostic purposes."
},
{ "cluster-infrastructure", NULL, "string", NULL, "corosync", NULL,
"The messaging stack on which Pacemaker is currently running.",
"Used for informational and diagnostic purposes." },
{ "cluster-name", NULL, "string", NULL, NULL, NULL,
"An arbitrary name for the cluster",
"This optional value is mostly for users' convenience as desired "
"in administration, but may also be used in Pacemaker configuration "
"rules via the #cluster-name node attribute, and by higher-level tools "
"and resource agents."
},
{ XML_CONFIG_ATTR_DC_DEADTIME, NULL, "time", NULL, "20s", &check_time,
"How long to wait for a response from other nodes during startup.",
"The \"correct\" value will depend on the speed/load of your network and the type of switches used."
},
{ XML_CONFIG_ATTR_RECHECK, NULL, "time",
"Zero disables polling. Positive values are an interval in seconds (unless other SI units are specified. eg. 5min)",
"15min", &check_timer,
"Polling interval for time based changes to options, resource parameters and constraints.",
"The Cluster is primarily event driven, however the configuration can have elements that change based on time."
" To ensure these changes take effect, we can optionally poll the cluster's status for changes."
},
{ "load-threshold", NULL, "percentage", NULL, "80%", &check_utilization,
"The maximum amount of system resources that should be used by nodes in the cluster",
"The cluster will slow down its recovery process when the amount of system resources used"
" (currently CPU) approaches this limit",
},
{ "node-action-limit", NULL, "integer", NULL, "0", &check_number,
"The maximum number of jobs that can be scheduled per node. Defaults to 2x cores"},
{ XML_CONFIG_ATTR_FENCE_REACTION, NULL, "string", NULL, "stop", NULL,
"How a cluster node should react if notified of its own fencing",
"A cluster node may receive notification of its own fencing if fencing "
"is misconfigured, or if fabric fencing is in use that doesn't cut "
"cluster communication. Allowed values are \"stop\" to attempt to "
"immediately stop pacemaker and stay stopped, or \"panic\" to attempt "
"to immediately reboot the local node, falling back to stop on failure."
},
{ XML_CONFIG_ATTR_ELECTION_FAIL, NULL, "time", NULL, "2min", &check_timer,
"*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug."
},
{ XML_CONFIG_ATTR_FORCE_QUIT, NULL, "time", NULL, "20min", &check_timer,
"*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug."
},
{
"join-integration-timeout", "crmd-integration-timeout",
"time", NULL, "3min", &check_timer,
"*** Advanced Use Only ***",
"If need to adjust this value, it probably indicates the presence of a bug"
},
{
"join-finalization-timeout", "crmd-finalization-timeout",
"time", NULL, "30min", &check_timer,
"*** Advanced Use Only ***",
"If you need to adjust this value, it probably indicates the presence of a bug"
},
{
"transition-delay", "crmd-transition-delay",
"time", NULL, "0s", &check_timer,
"*** Advanced Use Only *** Enabling this option will slow down cluster recovery under all conditions",
"Delay cluster recovery for the configured interval to allow for additional/related events to occur.\n"
"Useful if your configuration is sensitive to the order in which ping updates arrive."
},
{ "stonith-watchdog-timeout", NULL, "time", NULL, NULL, &check_sbd_timeout,
"How long to wait before we can assume nodes are safely down", NULL
},
{ "stonith-max-attempts",NULL,"integer",NULL,"10",&check_positive_number,
"How many times stonith can fail before it will no longer be attempted on a target"
},
// Already documented in libpe_status (other values must be kept identical)
{ "no-quorum-policy", NULL, "enum", "stop, freeze, ignore, suicide", "stop", &check_quorum, NULL, NULL },
{ XML_CONFIG_ATTR_SHUTDOWN_LOCK, NULL, "boolean", NULL, "false", &check_boolean, NULL, NULL },
};
/* *INDENT-ON* */
void
crmd_metadata(void)
{
config_metadata("pacemaker-controld", "1.0",
"controller properties",
"Cluster properties used by Pacemaker's controller,"
" formerly known as crmd",
crmd_opts, DIMOF(crmd_opts));
}
static void
verify_crmd_options(GHashTable * options)
{
verify_all_options(options, crmd_opts, DIMOF(crmd_opts));
}
static const char *
crmd_pref(GHashTable * options, const char *name)
{
return get_cluster_pref(options, crmd_opts, DIMOF(crmd_opts), name);
}
static void
config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
const char *value = NULL;
GHashTable *config_hash = NULL;
crm_time_t *now = crm_time_new(NULL);
xmlNode *crmconfig = NULL;
xmlNode *alerts = NULL;
if (rc != pcmk_ok) {
fsa_data_t *msg_data = NULL;
crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc));
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
if (rc == -EACCES || rc == -pcmk_err_schema_validation) {
crm_err("The cluster is mis-configured - shutting down and staying down");
set_bit(fsa_input_register, R_STAYDOWN);
}
goto bail;
}
crmconfig = output;
if ((crmconfig) &&
(crm_element_name(crmconfig)) &&
(strcmp(crm_element_name(crmconfig), XML_CIB_TAG_CRMCONFIG) != 0)) {
crmconfig = first_named_child(crmconfig, XML_CIB_TAG_CRMCONFIG);
}
if (!crmconfig) {
fsa_data_t *msg_data = NULL;
crm_err("Local CIB query for " XML_CIB_TAG_CRMCONFIG " section failed");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
goto bail;
}
crm_debug("Call %d : Parsing CIB options", call_id);
config_hash = crm_str_table_new();
pe_unpack_nvpairs(crmconfig, crmconfig, XML_CIB_TAG_PROPSET, NULL,
config_hash, CIB_OPTIONS_FIRST, FALSE, now, NULL);
verify_crmd_options(config_hash);
value = crmd_pref(config_hash, XML_CONFIG_ATTR_DC_DEADTIME);
election_trigger->period_ms = crm_parse_interval_spec(value);
value = crmd_pref(config_hash, "node-action-limit"); /* Also checks migration-limit */
throttle_update_job_max(value);
value = crmd_pref(config_hash, "load-threshold");
if(value) {
throttle_set_load_target(strtof(value, NULL) / 100.0);
}
value = crmd_pref(config_hash, "no-quorum-policy");
if (safe_str_eq(value, "suicide") && pcmk_locate_sbd()) {
no_quorum_suicide_escalation = TRUE;
}
set_fence_reaction(crmd_pref(config_hash, XML_CONFIG_ATTR_FENCE_REACTION));
value = crmd_pref(config_hash,"stonith-max-attempts");
update_stonith_max_attempts(value);
value = crmd_pref(config_hash, XML_CONFIG_ATTR_FORCE_QUIT);
shutdown_escalation_timer->period_ms = crm_parse_interval_spec(value);
crm_debug("Shutdown escalation occurs if DC has not responded to request in %ums",
shutdown_escalation_timer->period_ms);
value = crmd_pref(config_hash, XML_CONFIG_ATTR_ELECTION_FAIL);
controld_set_election_period(value);
value = crmd_pref(config_hash, XML_CONFIG_ATTR_RECHECK);
recheck_interval_ms = crm_parse_interval_spec(value);
crm_debug("Re-run scheduler after %dms of inactivity", recheck_interval_ms);
value = crmd_pref(config_hash, "transition-delay");
transition_timer->period_ms = crm_parse_interval_spec(value);
value = crmd_pref(config_hash, "join-integration-timeout");
integration_timer->period_ms = crm_parse_interval_spec(value);
value = crmd_pref(config_hash, "join-finalization-timeout");
finalization_timer->period_ms = crm_parse_interval_spec(value);
value = crmd_pref(config_hash, XML_CONFIG_ATTR_SHUTDOWN_LOCK);
controld_shutdown_lock_enabled = crm_is_true(value);
free(fsa_cluster_name);
fsa_cluster_name = NULL;
value = g_hash_table_lookup(config_hash, "cluster-name");
if (value) {
fsa_cluster_name = strdup(value);
}
alerts = first_named_child(output, XML_CIB_TAG_ALERTS);
crmd_unpack_alerts(alerts);
set_bit(fsa_input_register, R_READ_CONFIG);
crm_trace("Triggering FSA: %s", __FUNCTION__);
mainloop_set_trigger(fsa_source);
g_hash_table_destroy(config_hash);
bail:
crm_time_free(now);
}
gboolean
crm_read_options(gpointer user_data)
{
int call_id =
fsa_cib_conn->cmds->query(fsa_cib_conn,
"//" XML_CIB_TAG_CRMCONFIG " | //" XML_CIB_TAG_ALERTS,
NULL, cib_xpath | cib_scope_local);
fsa_register_cib_callback(call_id, FALSE, NULL, config_query_callback);
crm_trace("Querying the CIB... call %d", call_id);
return TRUE;
}
/* A_READCONFIG */
void
do_read_config(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
throttle_init();
mainloop_set_trigger(config_read);
}
void
crm_shutdown(int nsig)
{
if (crmd_mainloop != NULL && g_main_loop_is_running(crmd_mainloop)) {
if (is_set(fsa_input_register, R_SHUTDOWN)) {
crm_err("Escalating the shutdown");
register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL);
} else {
set_bit(fsa_input_register, R_SHUTDOWN);
register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL);
if (shutdown_escalation_timer->period_ms == 0) {
const char *value = crmd_pref(NULL, XML_CONFIG_ATTR_FORCE_QUIT);
shutdown_escalation_timer->period_ms = crm_parse_interval_spec(value);
}
/* can't rely on this... */
crm_notice("Shutting down cluster resource manager " CRM_XS
" limit=%ums", shutdown_escalation_timer->period_ms);
controld_start_timer(shutdown_escalation_timer);
}
} else {
crm_info("exit from shutdown");
crmd_exit(CRM_EX_OK);
}
}
diff --git a/daemons/execd/remoted_proxy.c b/daemons/execd/remoted_proxy.c
index bfe0202999..dda111e242 100644
--- a/daemons/execd/remoted_proxy.c
+++ b/daemons/execd/remoted_proxy.c
@@ -1,456 +1,450 @@
/*
* Copyright 2012-2020 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <glib.h>
#include <unistd.h>
#include "pacemaker-execd.h"
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/services.h>
#include <crm/common/mainloop.h>
#include <crm/common/ipc.h>
#include <crm/common/ipcs_internal.h>
#include <crm/cib/internal.h>
#include <crm/fencing/internal.h>
static qb_ipcs_service_t *cib_ro = NULL;
static qb_ipcs_service_t *cib_rw = NULL;
static qb_ipcs_service_t *cib_shm = NULL;
static qb_ipcs_service_t *attrd_ipcs = NULL;
static qb_ipcs_service_t *crmd_ipcs = NULL;
static qb_ipcs_service_t *stonith_ipcs = NULL;
// An IPC provider is a cluster node controller connecting as a client
static GList *ipc_providers = NULL;
/* ipc clients == things like cibadmin, crm_resource, connecting locally */
static GHashTable *ipc_clients = NULL;
/*!
* \internal
* \brief Get an IPC proxy provider
*
* \return Pointer to a provider if one exists, NULL otherwise
*
* \note Grab the first provider, which is the most recent connection. That way,
* if we haven't yet timed out an old, failed connection, we don't try to
* use it.
*/
pcmk__client_t *
ipc_proxy_get_provider()
{
return ipc_providers? (pcmk__client_t *) (ipc_providers->data) : NULL;
}
/*!
* \internal
* \brief Accept a client connection on a proxy IPC server
*
* \param[in] c Client's IPC connection
* \param[in] uid Client's user ID
* \param[in] gid Client's group ID
* \param[in] ipc_channel Name of IPC server to proxy
*
* \return pcmk_ok on success, -errno on error
*/
static int32_t
ipc_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid, const char *ipc_channel)
{
pcmk__client_t *client;
pcmk__client_t *ipc_proxy = ipc_proxy_get_provider();
xmlNode *msg;
if (ipc_proxy == NULL) {
crm_warn("Cannot proxy IPC connection from uid %d gid %d to %s "
"because not connected to cluster", uid, gid, ipc_channel);
return -EREMOTEIO;
}
/* This new client is a local IPC client on a Pacemaker Remote controlled
* node, needing to access cluster node IPC services.
*/
client = pcmk__new_client(c, uid, gid);
if (client == NULL) {
return -EREMOTEIO;
}
/* This ipc client is bound to a single ipc provider. If the
* provider goes away, this client is disconnected */
client->userdata = strdup(ipc_proxy->id);
client->name = crm_strdup_printf("proxy-%s-%d-%.8s", ipc_channel, client->pid, client->id);
g_hash_table_insert(ipc_clients, client->id, client);
msg = create_xml_node(NULL, T_LRMD_IPC_PROXY);
crm_xml_add(msg, F_LRMD_IPC_OP, LRMD_IPC_OP_NEW);
crm_xml_add(msg, F_LRMD_IPC_IPC_SERVER, ipc_channel);
crm_xml_add(msg, F_LRMD_IPC_SESSION, client->id);
lrmd_server_send_notify(ipc_proxy, msg);
free_xml(msg);
crm_debug("Accepted IPC proxy connection (session ID %s) "
"from uid %d gid %d on channel %s",
client->id, uid, gid, ipc_channel);
return 0;
}
static int32_t
crmd_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
return ipc_proxy_accept(c, uid, gid, CRM_SYSTEM_CRMD);
}
static int32_t
attrd_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
return ipc_proxy_accept(c, uid, gid, T_ATTRD);
}
static int32_t
stonith_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
return ipc_proxy_accept(c, uid, gid, "stonith-ng");
}
static int32_t
cib_proxy_accept_rw(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
return ipc_proxy_accept(c, uid, gid, CIB_CHANNEL_RW);
}
static int32_t
cib_proxy_accept_ro(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
return ipc_proxy_accept(c, uid, gid, CIB_CHANNEL_RO);
}
-static void
-ipc_proxy_created(qb_ipcs_connection_t * c)
-{
- crm_trace("Connection %p", c);
-}
-
void
ipc_proxy_forward_client(pcmk__client_t *ipc_proxy, xmlNode *xml)
{
const char *session = crm_element_value(xml, F_LRMD_IPC_SESSION);
const char *msg_type = crm_element_value(xml, F_LRMD_IPC_OP);
xmlNode *msg = get_message_xml(xml, F_LRMD_IPC_MSG);
pcmk__client_t *ipc_client;
int rc = pcmk_rc_ok;
/* If the IPC provider is acknowledging our shutdown request,
* defuse the short exit timer to give the cluster time to
* stop any resources we're running.
*/
if (safe_str_eq(msg_type, LRMD_IPC_OP_SHUTDOWN_ACK)) {
handle_shutdown_ack();
return;
}
if (safe_str_eq(msg_type, LRMD_IPC_OP_SHUTDOWN_NACK)) {
handle_shutdown_nack();
return;
}
ipc_client = pcmk__find_client_by_id(session);
if (ipc_client == NULL) {
xmlNode *msg = create_xml_node(NULL, T_LRMD_IPC_PROXY);
crm_xml_add(msg, F_LRMD_IPC_OP, LRMD_IPC_OP_DESTROY);
crm_xml_add(msg, F_LRMD_IPC_SESSION, session);
lrmd_server_send_notify(ipc_proxy, msg);
free_xml(msg);
return;
}
/* This is an event or response from the ipc provider
* going to the local ipc client.
*
* Looking at the chain of events.
*
* -----remote node----------------|---- cluster node ------
* ipc_client <--1--> this code
* <--2--> pacemaker-controld:remote_proxy_cb/remote_proxy_relay_event()
* <--3--> ipc server
*
* This function is receiving a msg from connection 2
* and forwarding it to connection 1.
*/
if (safe_str_eq(msg_type, LRMD_IPC_OP_EVENT)) {
crm_trace("Sending event to %s", ipc_client->id);
rc = pcmk__ipc_send_xml(ipc_client, 0, msg, crm_ipc_server_event);
} else if (safe_str_eq(msg_type, LRMD_IPC_OP_RESPONSE)) {
int msg_id = 0;
crm_element_value_int(xml, F_LRMD_IPC_MSG_ID, &msg_id);
crm_trace("Sending response to %d - %s", ipc_client->request_id, ipc_client->id);
rc = pcmk__ipc_send_xml(ipc_client, msg_id, msg, FALSE);
CRM_LOG_ASSERT(msg_id == ipc_client->request_id);
ipc_client->request_id = 0;
} else if (safe_str_eq(msg_type, LRMD_IPC_OP_DESTROY)) {
qb_ipcs_disconnect(ipc_client->ipcs);
} else {
crm_err("Unknown ipc proxy msg type %s" , msg_type);
}
if (rc != pcmk_rc_ok) {
crm_warn("Could not proxy IPC to client %s: %s " CRM_XS " rc=%d",
ipc_client->id, pcmk_rc_str(rc), rc);
}
}
static int32_t
ipc_proxy_dispatch(qb_ipcs_connection_t * c, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
pcmk__client_t *client = pcmk__find_client(c);
pcmk__client_t *ipc_proxy = pcmk__find_client_by_id(client->userdata);
xmlNode *request = NULL;
xmlNode *msg = NULL;
if (!ipc_proxy) {
qb_ipcs_disconnect(client->ipcs);
return 0;
}
/* This is a request from the local ipc client going
* to the ipc provider.
*
* Looking at the chain of events.
*
* -----remote node----------------|---- cluster node ------
* ipc_client <--1--> this code
* <--2--> pacemaker-controld:remote_proxy_dispatch_internal()
* <--3--> ipc server
*
* This function is receiving a request from connection
* 1 and forwarding it to connection 2.
*/
request = pcmk__client_data2xml(client, data, size, &id, &flags);
if (!request) {
return 0;
}
CRM_CHECK(client != NULL, crm_err("Invalid client");
free_xml(request); return FALSE);
CRM_CHECK(client->id != NULL, crm_err("Invalid client: %p", client);
free_xml(request); return FALSE);
/* This ensures that synced request/responses happen over the event channel
* in the controller, allowing the controller to process the messages async.
*/
set_bit(flags, crm_ipc_proxied);
client->request_id = id;
msg = create_xml_node(NULL, T_LRMD_IPC_PROXY);
crm_xml_add(msg, F_LRMD_IPC_OP, LRMD_IPC_OP_REQUEST);
crm_xml_add(msg, F_LRMD_IPC_SESSION, client->id);
crm_xml_add(msg, F_LRMD_IPC_CLIENT, pcmk__client_name(client));
crm_xml_add(msg, F_LRMD_IPC_USER, client->user);
crm_xml_add_int(msg, F_LRMD_IPC_MSG_ID, id);
crm_xml_add_int(msg, F_LRMD_IPC_MSG_FLAGS, flags);
add_message_xml(msg, F_LRMD_IPC_MSG, request);
lrmd_server_send_notify(ipc_proxy, msg);
free_xml(request);
free_xml(msg);
return 0;
}
/*!
* \internal
* \brief Notify a proxy provider that we wish to shut down
*
* \return 0 on success, -1 on error
*/
int
ipc_proxy_shutdown_req(pcmk__client_t *ipc_proxy)
{
xmlNode *msg = create_xml_node(NULL, T_LRMD_IPC_PROXY);
int rc;
crm_xml_add(msg, F_LRMD_IPC_OP, LRMD_IPC_OP_SHUTDOWN_REQ);
/* We don't really have a session, but the controller needs this attribute
* to recognize this as proxy communication.
*/
crm_xml_add(msg, F_LRMD_IPC_SESSION, "0");
rc = (lrmd_server_send_notify(ipc_proxy, msg) != pcmk_rc_ok)? -1 : 0;
free_xml(msg);
return rc;
}
static int32_t
ipc_proxy_closed(qb_ipcs_connection_t * c)
{
pcmk__client_t *client = pcmk__find_client(c);
pcmk__client_t *ipc_proxy;
if (client == NULL) {
return 0;
}
ipc_proxy = pcmk__find_client_by_id(client->userdata);
crm_trace("Connection %p", c);
if (ipc_proxy) {
xmlNode *msg = create_xml_node(NULL, T_LRMD_IPC_PROXY);
crm_xml_add(msg, F_LRMD_IPC_OP, LRMD_IPC_OP_DESTROY);
crm_xml_add(msg, F_LRMD_IPC_SESSION, client->id);
lrmd_server_send_notify(ipc_proxy, msg);
free_xml(msg);
}
g_hash_table_remove(ipc_clients, client->id);
free(client->userdata);
client->userdata = NULL;
pcmk__free_client(client);
return 0;
}
static void
ipc_proxy_destroy(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p", c);
ipc_proxy_closed(c);
}
static struct qb_ipcs_service_handlers crmd_proxy_callbacks = {
.connection_accept = crmd_proxy_accept,
- .connection_created = ipc_proxy_created,
+ .connection_created = NULL,
.msg_process = ipc_proxy_dispatch,
.connection_closed = ipc_proxy_closed,
.connection_destroyed = ipc_proxy_destroy
};
static struct qb_ipcs_service_handlers attrd_proxy_callbacks = {
.connection_accept = attrd_proxy_accept,
- .connection_created = ipc_proxy_created,
+ .connection_created = NULL,
.msg_process = ipc_proxy_dispatch,
.connection_closed = ipc_proxy_closed,
.connection_destroyed = ipc_proxy_destroy
};
static struct qb_ipcs_service_handlers stonith_proxy_callbacks = {
.connection_accept = stonith_proxy_accept,
- .connection_created = ipc_proxy_created,
+ .connection_created = NULL,
.msg_process = ipc_proxy_dispatch,
.connection_closed = ipc_proxy_closed,
.connection_destroyed = ipc_proxy_destroy
};
static struct qb_ipcs_service_handlers cib_proxy_callbacks_ro = {
.connection_accept = cib_proxy_accept_ro,
- .connection_created = ipc_proxy_created,
+ .connection_created = NULL,
.msg_process = ipc_proxy_dispatch,
.connection_closed = ipc_proxy_closed,
.connection_destroyed = ipc_proxy_destroy
};
static struct qb_ipcs_service_handlers cib_proxy_callbacks_rw = {
.connection_accept = cib_proxy_accept_rw,
- .connection_created = ipc_proxy_created,
+ .connection_created = NULL,
.msg_process = ipc_proxy_dispatch,
.connection_closed = ipc_proxy_closed,
.connection_destroyed = ipc_proxy_destroy
};
void
ipc_proxy_add_provider(pcmk__client_t *ipc_proxy)
{
// Prepending ensures the most recent connection is always first
ipc_providers = g_list_prepend(ipc_providers, ipc_proxy);
}
void
ipc_proxy_remove_provider(pcmk__client_t *ipc_proxy)
{
GHashTableIter iter;
pcmk__client_t *ipc_client = NULL;
char *key = NULL;
GList *remove_these = NULL;
GListPtr gIter = NULL;
ipc_providers = g_list_remove(ipc_providers, ipc_proxy);
g_hash_table_iter_init(&iter, ipc_clients);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & ipc_client)) {
const char *proxy_id = ipc_client->userdata;
if (safe_str_eq(proxy_id, ipc_proxy->id)) {
crm_info("ipc proxy connection for client %s pid %d destroyed because cluster node disconnected.",
ipc_client->id, ipc_client->pid);
/* we can't remove during the iteration, so copy items
* to a list we can destroy later */
remove_these = g_list_append(remove_these, ipc_client);
}
}
for (gIter = remove_these; gIter != NULL; gIter = gIter->next) {
ipc_client = gIter->data;
// Disconnection callback will free the client here
qb_ipcs_disconnect(ipc_client->ipcs);
}
/* just frees the list, not the elements in the list */
g_list_free(remove_these);
}
void
ipc_proxy_init(void)
{
ipc_clients = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, NULL);
cib_ipc_servers_init(&cib_ro,
&cib_rw,
&cib_shm,
&cib_proxy_callbacks_ro,
&cib_proxy_callbacks_rw);
attrd_ipc_server_init(&attrd_ipcs, &attrd_proxy_callbacks);
stonith_ipc_server_init(&stonith_ipcs, &stonith_proxy_callbacks);
crmd_ipcs = crmd_ipc_server_init(&crmd_proxy_callbacks);
if (crmd_ipcs == NULL) {
crm_err("Failed to create controller: exiting and inhibiting respawn");
crm_warn("Verify pacemaker and pacemaker_remote are not both enabled");
crm_exit(CRM_EX_FATAL);
}
}
void
ipc_proxy_cleanup(void)
{
if (ipc_providers) {
g_list_free(ipc_providers);
ipc_providers = NULL;
}
if (ipc_clients) {
g_hash_table_destroy(ipc_clients);
ipc_clients = NULL;
}
cib_ipc_servers_destroy(cib_ro, cib_rw, cib_shm);
qb_ipcs_destroy(attrd_ipcs);
qb_ipcs_destroy(stonith_ipcs);
qb_ipcs_destroy(crmd_ipcs);
cib_ro = NULL;
cib_rw = NULL;
cib_shm = NULL;
}
diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
index bff2efd082..5274f599a6 100644
--- a/daemons/fenced/pacemaker-fenced.c
+++ b/daemons/fenced/pacemaker-fenced.c
@@ -1,1529 +1,1523 @@
/*
* Copyright 2009-2020 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <sys/utsname.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h> /* U32T ~ PRIu32, X32T ~ PRIx32 */
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/ipc.h>
#include <crm/common/ipcs_internal.h>
#include <crm/cluster/internal.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <crm/common/xml.h>
#include <crm/common/mainloop.h>
#include <crm/cib/internal.h>
#include <crm/pengine/status.h>
#include <pacemaker-internal.h>
#include <pacemaker-fenced.h>
char *stonith_our_uname = NULL;
char *stonith_our_uuid = NULL;
long stonith_watchdog_timeout_ms = 0;
static GMainLoop *mainloop = NULL;
gboolean stand_alone = FALSE;
static gboolean no_cib_connect = FALSE;
static gboolean stonith_shutdown_flag = FALSE;
static qb_ipcs_service_t *ipcs = NULL;
static xmlNode *local_cib = NULL;
static pe_working_set_t *fenced_data_set = NULL;
static cib_t *cib_api = NULL;
static void *cib_library = NULL;
static void stonith_shutdown(int nsig);
static void stonith_cleanup(void);
static int32_t
st_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
if (stonith_shutdown_flag) {
crm_info("Ignoring new client [%d] during shutdown",
pcmk__client_pid(c));
return -EPERM;
}
if (pcmk__new_client(c, uid, gid) == NULL) {
return -EIO;
}
return 0;
}
-static void
-st_ipc_created(qb_ipcs_connection_t * c)
-{
- crm_trace("Connection created for %p", c);
-}
-
/* Exit code means? */
static int32_t
st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
int call_options = 0;
xmlNode *request = NULL;
pcmk__client_t *c = pcmk__find_client(qbc);
const char *op = NULL;
if (c == NULL) {
crm_info("Invalid client: %p", qbc);
return 0;
}
request = pcmk__client_data2xml(c, data, size, &id, &flags);
if (request == NULL) {
pcmk__ipc_send_ack(c, id, flags, "nack");
return 0;
}
op = crm_element_value(request, F_CRM_TASK);
if(safe_str_eq(op, CRM_OP_RM_NODE_CACHE)) {
crm_xml_add(request, F_TYPE, T_STONITH_NG);
crm_xml_add(request, F_STONITH_OPERATION, op);
crm_xml_add(request, F_STONITH_CLIENTID, c->id);
crm_xml_add(request, F_STONITH_CLIENTNAME, pcmk__client_name(c));
crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname);
send_cluster_message(NULL, crm_msg_stonith_ng, request, FALSE);
free_xml(request);
return 0;
}
if (c->name == NULL) {
const char *value = crm_element_value(request, F_STONITH_CLIENTNAME);
if (value == NULL) {
value = "unknown";
}
c->name = crm_strdup_printf("%s.%u", value, c->pid);
}
crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
crm_trace("Flags %" X32T "/%u for command %" U32T " from %s",
flags, call_options, id, pcmk__client_name(c));
if (is_set(call_options, st_opt_sync_call)) {
CRM_ASSERT(flags & crm_ipc_client_response);
CRM_LOG_ASSERT(c->request_id == 0); /* This means the client has two synchronous events in-flight */
c->request_id = id; /* Reply only to the last one */
}
crm_xml_add(request, F_STONITH_CLIENTID, c->id);
crm_xml_add(request, F_STONITH_CLIENTNAME, pcmk__client_name(c));
crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname);
crm_log_xml_trace(request, "Client[inbound]");
stonith_command(c, id, flags, request, NULL);
free_xml(request);
return 0;
}
/* Error code means? */
static int32_t
st_ipc_closed(qb_ipcs_connection_t * c)
{
pcmk__client_t *client = pcmk__find_client(c);
if (client == NULL) {
return 0;
}
crm_trace("Connection %p closed", c);
pcmk__free_client(client);
/* 0 means: yes, go ahead and destroy the connection */
return 0;
}
static void
st_ipc_destroy(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p destroyed", c);
st_ipc_closed(c);
}
static void
stonith_peer_callback(xmlNode * msg, void *private_data)
{
const char *remote_peer = crm_element_value(msg, F_ORIG);
const char *op = crm_element_value(msg, F_STONITH_OPERATION);
if (crm_str_eq(op, "poke", TRUE)) {
return;
}
crm_log_xml_trace(msg, "Peer[inbound]");
stonith_command(NULL, 0, 0, msg, remote_peer);
}
#if SUPPORT_COROSYNC
static void
stonith_peer_ais_callback(cpg_handle_t handle,
const struct cpg_name *groupName,
uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
{
uint32_t kind = 0;
xmlNode *xml = NULL;
const char *from = NULL;
char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from);
if(data == NULL) {
return;
}
if (kind == crm_class_cluster) {
xml = string2xml(data);
if (xml == NULL) {
crm_err("Invalid XML: '%.120s'", data);
free(data);
return;
}
crm_xml_add(xml, F_ORIG, from);
/* crm_xml_add_int(xml, F_SEQ, wrapper->id); */
stonith_peer_callback(xml, NULL);
}
free_xml(xml);
free(data);
return;
}
static void
stonith_peer_cs_destroy(gpointer user_data)
{
crm_crit("Lost connection to cluster layer, shutting down");
stonith_shutdown(0);
}
#endif
void
do_local_reply(xmlNode * notify_src, const char *client_id, gboolean sync_reply, gboolean from_peer)
{
/* send callback to originating child */
pcmk__client_t *client_obj = NULL;
int local_rc = pcmk_rc_ok;
crm_trace("Sending response");
client_obj = pcmk__find_client_by_id(client_id);
crm_trace("Sending callback to request originator");
if (client_obj == NULL) {
local_rc = EPROTO;
crm_trace("No client to sent the response to. F_STONITH_CLIENTID not set.");
} else {
int rid = 0;
if (sync_reply) {
CRM_LOG_ASSERT(client_obj->request_id);
rid = client_obj->request_id;
client_obj->request_id = 0;
crm_trace("Sending response %d to %s %s",
rid, client_obj->name, from_peer ? "(originator of delegated request)" : "");
} else {
crm_trace("Sending an event to %s %s",
client_obj->name, from_peer ? "(originator of delegated request)" : "");
}
local_rc = pcmk__ipc_send_xml(client_obj, rid, notify_src,
(sync_reply? crm_ipc_flags_none
: crm_ipc_server_event));
}
if ((local_rc != pcmk_rc_ok) && (client_obj != NULL)) {
crm_warn("%s reply to %s failed: %s",
(sync_reply? "Synchronous" : "Asynchronous"),
(client_obj? client_obj->name : "unknown client"),
pcmk_rc_str(local_rc));
}
}
long long
get_stonith_flag(const char *name)
{
if (safe_str_eq(name, T_STONITH_NOTIFY_FENCE)) {
return st_callback_notify_fence;
} else if (safe_str_eq(name, STONITH_OP_DEVICE_ADD)) {
return st_callback_device_add;
} else if (safe_str_eq(name, STONITH_OP_DEVICE_DEL)) {
return st_callback_device_del;
} else if (safe_str_eq(name, T_STONITH_NOTIFY_HISTORY)) {
return st_callback_notify_history;
} else if (safe_str_eq(name, T_STONITH_NOTIFY_HISTORY_SYNCED)) {
return st_callback_notify_history_synced;
}
return st_callback_unknown;
}
static void
stonith_notify_client(gpointer key, gpointer value, gpointer user_data)
{
xmlNode *update_msg = user_data;
pcmk__client_t *client = value;
const char *type = NULL;
CRM_CHECK(client != NULL, return);
CRM_CHECK(update_msg != NULL, return);
type = crm_element_value(update_msg, F_SUBTYPE);
CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return);
if (client->ipcs == NULL) {
crm_trace("Skipping client with NULL channel");
return;
}
if (client->options & get_stonith_flag(type)) {
int rc = pcmk__ipc_send_xml(client, 0, update_msg,
crm_ipc_server_event|crm_ipc_server_error);
if (rc != pcmk_rc_ok) {
crm_warn("%s notification of client %s failed: %s "
CRM_XS " id=%.8s rc=%d", type, pcmk__client_name(client),
pcmk_rc_str(rc), client->id, rc);
} else {
crm_trace("Sent %s notification to client %s.%.6s", type,
pcmk__client_name(client), client->id);
}
}
}
void
do_stonith_async_timeout_update(const char *client_id, const char *call_id, int timeout)
{
pcmk__client_t *client = NULL;
xmlNode *notify_data = NULL;
if (!timeout || !call_id || !client_id) {
return;
}
client = pcmk__find_client_by_id(client_id);
if (!client) {
return;
}
notify_data = create_xml_node(NULL, T_STONITH_TIMEOUT_VALUE);
crm_xml_add(notify_data, F_TYPE, T_STONITH_TIMEOUT_VALUE);
crm_xml_add(notify_data, F_STONITH_CALLID, call_id);
crm_xml_add_int(notify_data, F_STONITH_TIMEOUT, timeout);
crm_trace("timeout update is %d for client %s and call id %s", timeout, client_id, call_id);
if (client) {
pcmk__ipc_send_xml(client, 0, notify_data, crm_ipc_server_event);
}
free_xml(notify_data);
}
void
do_stonith_notify(int options, const char *type, int result, xmlNode * data)
{
/* TODO: Standardize the contents of data */
xmlNode *update_msg = create_xml_node(NULL, "notify");
CRM_CHECK(type != NULL,;);
crm_xml_add(update_msg, F_TYPE, T_STONITH_NOTIFY);
crm_xml_add(update_msg, F_SUBTYPE, type);
crm_xml_add(update_msg, F_STONITH_OPERATION, type);
crm_xml_add_int(update_msg, F_STONITH_RC, result);
if (data != NULL) {
add_message_xml(update_msg, F_STONITH_CALLDATA, data);
}
crm_trace("Notifying clients");
pcmk__foreach_ipc_client(stonith_notify_client, update_msg);
free_xml(update_msg);
crm_trace("Notify complete");
}
static void
do_stonith_notify_config(int options, const char *op, int rc,
const char *desc, int active)
{
xmlNode *notify_data = create_xml_node(NULL, op);
CRM_CHECK(notify_data != NULL, return);
crm_xml_add(notify_data, F_STONITH_DEVICE, desc);
crm_xml_add_int(notify_data, F_STONITH_ACTIVE, active);
do_stonith_notify(options, op, rc, notify_data);
free_xml(notify_data);
}
void
do_stonith_notify_device(int options, const char *op, int rc, const char *desc)
{
do_stonith_notify_config(options, op, rc, desc, g_hash_table_size(device_list));
}
void
do_stonith_notify_level(int options, const char *op, int rc, const char *desc)
{
do_stonith_notify_config(options, op, rc, desc, g_hash_table_size(topology));
}
static void
topology_remove_helper(const char *node, int level)
{
int rc;
char *desc = NULL;
xmlNode *data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL);
crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__);
crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level);
crm_xml_add(data, XML_ATTR_STONITH_TARGET, node);
rc = stonith_level_remove(data, &desc);
do_stonith_notify_level(0, STONITH_OP_LEVEL_DEL, rc, desc);
free_xml(data);
free(desc);
}
static void
remove_cib_device(xmlXPathObjectPtr xpathObj)
{
int max = numXpathResults(xpathObj), lpc = 0;
for (lpc = 0; lpc < max; lpc++) {
const char *rsc_id = NULL;
const char *standard = NULL;
xmlNode *match = getXpathResult(xpathObj, lpc);
CRM_LOG_ASSERT(match != NULL);
if(match != NULL) {
standard = crm_element_value(match, XML_AGENT_ATTR_CLASS);
}
if (safe_str_neq(standard, PCMK_RESOURCE_CLASS_STONITH)) {
continue;
}
rsc_id = crm_element_value(match, XML_ATTR_ID);
stonith_device_remove(rsc_id, TRUE);
}
}
static void
handle_topology_change(xmlNode *match, bool remove)
{
int rc;
char *desc = NULL;
CRM_CHECK(match != NULL, return);
crm_trace("Updating %s", ID(match));
if(remove) {
int index = 0;
char *key = stonith_level_key(match, -1);
crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index);
topology_remove_helper(key, index);
free(key);
}
rc = stonith_level_register(match, &desc);
do_stonith_notify_level(0, STONITH_OP_LEVEL_ADD, rc, desc);
free(desc);
}
static void
remove_fencing_topology(xmlXPathObjectPtr xpathObj)
{
int max = numXpathResults(xpathObj), lpc = 0;
for (lpc = 0; lpc < max; lpc++) {
xmlNode *match = getXpathResult(xpathObj, lpc);
CRM_LOG_ASSERT(match != NULL);
if (match && crm_element_value(match, XML_DIFF_MARKER)) {
/* Deletion */
int index = 0;
char *target = stonith_level_key(match, -1);
crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index);
if (target == NULL) {
crm_err("Invalid fencing target in element %s", ID(match));
} else if (index <= 0) {
crm_err("Invalid level for %s in element %s", target, ID(match));
} else {
topology_remove_helper(target, index);
}
/* } else { Deal with modifications during the 'addition' stage */
}
}
}
static void
register_fencing_topology(xmlXPathObjectPtr xpathObj)
{
int max = numXpathResults(xpathObj), lpc = 0;
for (lpc = 0; lpc < max; lpc++) {
xmlNode *match = getXpathResult(xpathObj, lpc);
handle_topology_change(match, TRUE);
}
}
/* Fencing
<diff crm_feature_set="3.0.6">
<diff-removed>
<fencing-topology>
<fencing-level id="f-p1.1" target="pcmk-1" index="1" devices="poison-pill" __crm_diff_marker__="removed:top"/>
<fencing-level id="f-p1.2" target="pcmk-1" index="2" devices="power" __crm_diff_marker__="removed:top"/>
<fencing-level devices="disk,network" id="f-p2.1"/>
</fencing-topology>
</diff-removed>
<diff-added>
<fencing-topology>
<fencing-level id="f-p.1" target="pcmk-1" index="1" devices="poison-pill" __crm_diff_marker__="added:top"/>
<fencing-level id="f-p2.1" target="pcmk-2" index="1" devices="disk,something"/>
<fencing-level id="f-p3.1" target="pcmk-2" index="2" devices="power" __crm_diff_marker__="added:top"/>
</fencing-topology>
</diff-added>
</diff>
*/
static void
fencing_topology_init()
{
xmlXPathObjectPtr xpathObj = NULL;
const char *xpath = "//" XML_TAG_FENCING_LEVEL;
crm_trace("Full topology refresh");
free_topology_list();
init_topology_list();
/* Grab everything */
xpathObj = xpath_search(local_cib, xpath);
register_fencing_topology(xpathObj);
freeXpathObject(xpathObj);
}
#define rsc_name(x) x->clone_name?x->clone_name:x->id
/*!
* \internal
* \brief Check whether our uname is in a resource's allowed node list
*
* \param[in] rsc Resource to check
*
* \return Pointer to node object if found, NULL otherwise
*/
static node_t *
our_node_allowed_for(resource_t *rsc)
{
GHashTableIter iter;
node_t *node = NULL;
if (rsc && stonith_our_uname) {
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
if (node && strcmp(node->details->uname, stonith_our_uname) == 0) {
break;
}
node = NULL;
}
}
return node;
}
/*!
* \internal
* \brief If a resource or any of its children are STONITH devices, update their
* definitions given a cluster working set.
*
* \param[in] rsc Resource to check
* \param[in] data_set Cluster working set with device information
*/
static void cib_device_update(resource_t *rsc, pe_working_set_t *data_set)
{
node_t *node = NULL;
const char *value = NULL;
const char *rclass = NULL;
node_t *parent = NULL;
gboolean remove = TRUE;
/* If this is a complex resource, check children rather than this resource itself.
* TODO: Mark each installed device and remove if untouched when this process finishes.
*/
if(rsc->children) {
GListPtr gIter = NULL;
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
cib_device_update(gIter->data, data_set);
if(pe_rsc_is_clone(rsc)) {
crm_trace("Only processing one copy of the clone %s", rsc->id);
break;
}
}
return;
}
/* We only care about STONITH resources. */
rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
if (safe_str_neq(rclass, PCMK_RESOURCE_CLASS_STONITH)) {
return;
}
/* If this STONITH resource is disabled, just remove it. */
if (pe__resource_is_disabled(rsc)) {
crm_info("Device %s has been disabled", rsc->id);
goto update_done;
}
/* Check whether our node is allowed for this resource (and its parent if in a group) */
node = our_node_allowed_for(rsc);
if (rsc->parent && (rsc->parent->variant == pe_group)) {
parent = our_node_allowed_for(rsc->parent);
}
if(node == NULL) {
/* Our node is disallowed, so remove the device */
GHashTableIter iter;
crm_info("Device %s has been disabled on %s: unknown", rsc->id, stonith_our_uname);
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
crm_trace("Available: %s = %d", node->details->uname, node->weight);
}
goto update_done;
} else if(node->weight < 0 || (parent && parent->weight < 0)) {
/* Our node (or its group) is disallowed by score, so remove the device */
char *score = score2char((node->weight < 0) ? node->weight : parent->weight);
crm_info("Device %s has been disabled on %s: score=%s", rsc->id, stonith_our_uname, score);
free(score);
goto update_done;
} else {
/* Our node is allowed, so update the device information */
int rc;
xmlNode *data;
GHashTableIter gIter;
stonith_key_value_t *params = NULL;
const char *name = NULL;
const char *agent = crm_element_value(rsc->xml, XML_EXPR_ATTR_TYPE);
const char *rsc_provides = NULL;
crm_debug("Device %s is allowed on %s: score=%d", rsc->id, stonith_our_uname, node->weight);
get_rsc_attributes(rsc->parameters, rsc, node, data_set);
get_meta_attributes(rsc->meta, rsc, node, data_set);
rsc_provides = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_PROVIDES);
g_hash_table_iter_init(&gIter, rsc->parameters);
while (g_hash_table_iter_next(&gIter, (gpointer *) & name, (gpointer *) & value)) {
if (!name || !value) {
continue;
}
params = stonith_key_value_add(params, name, value);
crm_trace(" %s=%s", name, value);
}
remove = FALSE;
data = create_device_registration_xml(rsc_name(rsc), st_namespace_any,
agent, params, rsc_provides);
stonith_key_value_freeall(params, 1, 1);
rc = stonith_device_register(data, NULL, TRUE);
CRM_ASSERT(rc == pcmk_ok);
free_xml(data);
}
update_done:
if(remove && g_hash_table_lookup(device_list, rsc_name(rsc))) {
stonith_device_remove(rsc_name(rsc), TRUE);
}
}
/*!
* \internal
* \brief Update all STONITH device definitions based on current CIB
*/
static void
cib_devices_update(void)
{
GListPtr gIter = NULL;
crm_info("Updating devices to version %s.%s.%s",
crm_element_value(local_cib, XML_ATTR_GENERATION_ADMIN),
crm_element_value(local_cib, XML_ATTR_GENERATION),
crm_element_value(local_cib, XML_ATTR_NUMUPDATES));
CRM_ASSERT(fenced_data_set != NULL);
fenced_data_set->input = local_cib;
fenced_data_set->now = crm_time_new(NULL);
fenced_data_set->flags |= pe_flag_quick_location;
fenced_data_set->localhost = stonith_our_uname;
cluster_status(fenced_data_set);
pcmk__schedule_actions(fenced_data_set, NULL, NULL);
for (gIter = fenced_data_set->resources; gIter != NULL; gIter = gIter->next) {
cib_device_update(gIter->data, fenced_data_set);
}
fenced_data_set->input = NULL; // Wasn't a copy, so don't let API free it
pe_reset_working_set(fenced_data_set);
}
static void
update_cib_stonith_devices_v2(const char *event, xmlNode * msg)
{
xmlNode *change = NULL;
char *reason = NULL;
bool needs_update = FALSE;
xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);
for (change = __xml_first_child(patchset); change != NULL; change = __xml_next(change)) {
const char *op = crm_element_value(change, XML_DIFF_OP);
const char *xpath = crm_element_value(change, XML_DIFF_PATH);
const char *shortpath = NULL;
if ((op == NULL) ||
(strcmp(op, "move") == 0) ||
strstr(xpath, "/"XML_CIB_TAG_STATUS)) {
continue;
} else if (safe_str_eq(op, "delete") && strstr(xpath, "/"XML_CIB_TAG_RESOURCE)) {
const char *rsc_id = NULL;
char *search = NULL;
char *mutable = NULL;
if (strstr(xpath, XML_TAG_ATTR_SETS) ||
strstr(xpath, XML_TAG_META_SETS)) {
needs_update = TRUE;
reason = strdup("(meta) attribute deleted from resource");
break;
}
mutable = strdup(xpath);
rsc_id = strstr(mutable, "primitive[@id=\'");
if (rsc_id != NULL) {
rsc_id += strlen("primitive[@id=\'");
search = strchr(rsc_id, '\'');
}
if (search != NULL) {
*search = 0;
stonith_device_remove(rsc_id, TRUE);
} else {
crm_warn("Ignoring malformed CIB update (resource deletion)");
}
free(mutable);
} else if (strstr(xpath, "/"XML_CIB_TAG_RESOURCES) ||
strstr(xpath, "/"XML_CIB_TAG_CONSTRAINTS) ||
strstr(xpath, "/"XML_CIB_TAG_RSCCONFIG)) {
shortpath = strrchr(xpath, '/'); CRM_ASSERT(shortpath);
reason = crm_strdup_printf("%s %s", op, shortpath+1);
needs_update = TRUE;
break;
}
}
if(needs_update) {
crm_info("Updating device list from the cib: %s", reason);
cib_devices_update();
} else {
crm_trace("No updates for device list found in cib");
}
free(reason);
}
static void
update_cib_stonith_devices_v1(const char *event, xmlNode * msg)
{
const char *reason = "none";
gboolean needs_update = FALSE;
xmlXPathObjectPtr xpath_obj = NULL;
/* process new constraints */
xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_CONS_TAG_RSC_LOCATION);
if (numXpathResults(xpath_obj) > 0) {
int max = numXpathResults(xpath_obj), lpc = 0;
/* Safest and simplest to always recompute */
needs_update = TRUE;
reason = "new location constraint";
for (lpc = 0; lpc < max; lpc++) {
xmlNode *match = getXpathResult(xpath_obj, lpc);
crm_log_xml_trace(match, "new constraint");
}
}
freeXpathObject(xpath_obj);
/* process deletions */
xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_RESOURCE);
if (numXpathResults(xpath_obj) > 0) {
remove_cib_device(xpath_obj);
}
freeXpathObject(xpath_obj);
/* process additions */
xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_RESOURCE);
if (numXpathResults(xpath_obj) > 0) {
int max = numXpathResults(xpath_obj), lpc = 0;
for (lpc = 0; lpc < max; lpc++) {
const char *rsc_id = NULL;
const char *standard = NULL;
xmlNode *match = getXpathResult(xpath_obj, lpc);
rsc_id = crm_element_value(match, XML_ATTR_ID);
standard = crm_element_value(match, XML_AGENT_ATTR_CLASS);
if (safe_str_neq(standard, PCMK_RESOURCE_CLASS_STONITH)) {
continue;
}
crm_trace("Fencing resource %s was added or modified", rsc_id);
reason = "new resource";
needs_update = TRUE;
}
}
freeXpathObject(xpath_obj);
if(needs_update) {
crm_info("Updating device list from the cib: %s", reason);
cib_devices_update();
}
}
static void
update_cib_stonith_devices(const char *event, xmlNode * msg)
{
int format = 1;
xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);
CRM_ASSERT(patchset);
crm_element_value_int(patchset, "format", &format);
switch(format) {
case 1:
update_cib_stonith_devices_v1(event, msg);
break;
case 2:
update_cib_stonith_devices_v2(event, msg);
break;
default:
crm_warn("Unknown patch format: %d", format);
}
}
/* Needs to hold node name + attribute name + attribute value + 75 */
#define XPATH_MAX 512
/*!
* \internal
* \brief Check whether a node has a specific attribute name/value
*
* \param[in] node Name of node to check
* \param[in] name Name of an attribute to look for
* \param[in] value The value the named attribute needs to be set to in order to be considered a match
*
* \return TRUE if the locally cached CIB has the specified node attribute
*/
gboolean
node_has_attr(const char *node, const char *name, const char *value)
{
char xpath[XPATH_MAX];
xmlNode *match;
int n;
CRM_CHECK(local_cib != NULL, return FALSE);
/* Search for the node's attributes in the CIB. While the schema allows
* multiple sets of instance attributes, and allows instance attributes to
* use id-ref to reference values elsewhere, that is intended for resources,
* so we ignore that here.
*/
n = snprintf(xpath, XPATH_MAX, "//" XML_CIB_TAG_NODES
"/" XML_CIB_TAG_NODE "[@uname='%s']/" XML_TAG_ATTR_SETS
"/" XML_CIB_TAG_NVPAIR "[@name='%s' and @value='%s']",
node, name, value);
match = get_xpath_object(xpath, local_cib, LOG_NEVER);
CRM_CHECK(n < XPATH_MAX, return FALSE);
return (match != NULL);
}
static void
update_fencing_topology(const char *event, xmlNode * msg)
{
int format = 1;
const char *xpath;
xmlXPathObjectPtr xpathObj = NULL;
xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);
CRM_ASSERT(patchset);
crm_element_value_int(patchset, "format", &format);
if(format == 1) {
/* Process deletions (only) */
xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_TAG_FENCING_LEVEL;
xpathObj = xpath_search(msg, xpath);
remove_fencing_topology(xpathObj);
freeXpathObject(xpathObj);
/* Process additions and changes */
xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_TAG_FENCING_LEVEL;
xpathObj = xpath_search(msg, xpath);
register_fencing_topology(xpathObj);
freeXpathObject(xpathObj);
} else if(format == 2) {
xmlNode *change = NULL;
int add[] = { 0, 0, 0 };
int del[] = { 0, 0, 0 };
xml_patch_versions(patchset, add, del);
for (change = __xml_first_child(patchset); change != NULL; change = __xml_next(change)) {
const char *op = crm_element_value(change, XML_DIFF_OP);
const char *xpath = crm_element_value(change, XML_DIFF_PATH);
if(op == NULL) {
continue;
} else if(strstr(xpath, "/" XML_TAG_FENCING_LEVEL) != NULL) {
/* Change to a specific entry */
crm_trace("Handling %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath);
if(strcmp(op, "move") == 0) {
continue;
} else if(strcmp(op, "create") == 0) {
handle_topology_change(change->children, FALSE);
} else if(strcmp(op, "modify") == 0) {
xmlNode *match = first_named_child(change, XML_DIFF_RESULT);
if(match) {
handle_topology_change(match->children, TRUE);
}
} else if(strcmp(op, "delete") == 0) {
/* Nuclear option, all we have is the path and an id... not enough to remove a specific entry */
crm_info("Re-initializing fencing topology after %s operation %d.%d.%d for %s",
op, add[0], add[1], add[2], xpath);
fencing_topology_init();
return;
}
} else if (strstr(xpath, "/" XML_TAG_FENCING_TOPOLOGY) != NULL) {
/* Change to the topology in general */
crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s",
op, add[0], add[1], add[2], xpath);
fencing_topology_init();
return;
} else if (strstr(xpath, "/" XML_CIB_TAG_CONFIGURATION)) {
/* Changes to the whole config section, possibly including the topology as a whild */
if(first_named_child(change, XML_TAG_FENCING_TOPOLOGY) == NULL) {
crm_trace("Nothing for us in %s operation %d.%d.%d for %s.",
op, add[0], add[1], add[2], xpath);
} else if(strcmp(op, "delete") == 0 || strcmp(op, "create") == 0) {
crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s.",
op, add[0], add[1], add[2], xpath);
fencing_topology_init();
return;
}
} else {
crm_trace("Nothing for us in %s operation %d.%d.%d for %s",
op, add[0], add[1], add[2], xpath);
}
}
} else {
crm_warn("Unknown patch format: %d", format);
}
}
static bool have_cib_devices = FALSE;
static void
update_cib_cache_cb(const char *event, xmlNode * msg)
{
int rc = pcmk_ok;
xmlNode *stonith_enabled_xml = NULL;
xmlNode *stonith_watchdog_xml = NULL;
const char *stonith_enabled_s = NULL;
static gboolean stonith_enabled_saved = TRUE;
if(!have_cib_devices) {
crm_trace("Skipping updates until we get a full dump");
return;
} else if(msg == NULL) {
crm_trace("Missing %s update", event);
return;
}
/* Maintain a local copy of the CIB so that we have full access
* to device definitions, location constraints, and node attributes
*/
if (local_cib != NULL) {
int rc = pcmk_ok;
xmlNode *patchset = NULL;
crm_element_value_int(msg, F_CIB_RC, &rc);
if (rc != pcmk_ok) {
return;
}
patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);
xml_log_patchset(LOG_TRACE, "Config update", patchset);
rc = xml_apply_patchset(local_cib, patchset, TRUE);
switch (rc) {
case pcmk_ok:
case -pcmk_err_old_data:
break;
case -pcmk_err_diff_resync:
case -pcmk_err_diff_failed:
crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc);
free_xml(local_cib);
local_cib = NULL;
break;
default:
crm_warn("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc);
free_xml(local_cib);
local_cib = NULL;
}
}
if (local_cib == NULL) {
crm_trace("Re-requesting the full cib");
rc = cib_api->cmds->query(cib_api, NULL, &local_cib, cib_scope_local | cib_sync_call);
if(rc != pcmk_ok) {
crm_err("Couldn't retrieve the CIB: %s (%d)", pcmk_strerror(rc), rc);
return;
}
CRM_ASSERT(local_cib != NULL);
stonith_enabled_saved = FALSE; /* Trigger a full refresh below */
}
crm_peer_caches_refresh(local_cib);
stonith_enabled_xml = get_xpath_object("//nvpair[@name='stonith-enabled']",
local_cib, LOG_NEVER);
if (stonith_enabled_xml) {
stonith_enabled_s = crm_element_value(stonith_enabled_xml, XML_NVPAIR_ATTR_VALUE);
}
if (stonith_enabled_s == NULL || crm_is_true(stonith_enabled_s)) {
long timeout_ms = 0;
const char *value = NULL;
stonith_watchdog_xml = get_xpath_object("//nvpair[@name='stonith-watchdog-timeout']",
local_cib, LOG_NEVER);
if (stonith_watchdog_xml) {
value = crm_element_value(stonith_watchdog_xml, XML_NVPAIR_ATTR_VALUE);
}
if(value) {
timeout_ms = crm_get_msec(value);
}
if (timeout_ms < 0) {
timeout_ms = crm_auto_watchdog_timeout();
}
if(timeout_ms != stonith_watchdog_timeout_ms) {
crm_notice("New watchdog timeout %lds (was %lds)", timeout_ms/1000, stonith_watchdog_timeout_ms/1000);
stonith_watchdog_timeout_ms = timeout_ms;
}
} else {
stonith_watchdog_timeout_ms = 0;
}
if (stonith_enabled_s && crm_is_true(stonith_enabled_s) == FALSE) {
crm_trace("Ignoring cib updates while stonith is disabled");
stonith_enabled_saved = FALSE;
return;
} else if (stonith_enabled_saved == FALSE) {
crm_info("Updating stonith device and topology lists now that stonith is enabled");
stonith_enabled_saved = TRUE;
fencing_topology_init();
cib_devices_update();
} else {
update_fencing_topology(event, msg);
update_cib_stonith_devices(event, msg);
}
}
static void
init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
crm_info("Updating device list from the cib: init");
have_cib_devices = TRUE;
local_cib = copy_xml(output);
crm_peer_caches_refresh(local_cib);
fencing_topology_init();
cib_devices_update();
}
static void
stonith_shutdown(int nsig)
{
crm_info("Terminating with %d clients", pcmk__ipc_client_count());
stonith_shutdown_flag = TRUE;
if (mainloop != NULL && g_main_loop_is_running(mainloop)) {
g_main_loop_quit(mainloop);
} else {
stonith_cleanup();
crm_exit(CRM_EX_OK);
}
}
static void
cib_connection_destroy(gpointer user_data)
{
if (stonith_shutdown_flag) {
crm_info("Connection to the CIB manager closed");
return;
} else {
crm_crit("Lost connection to the CIB manager, shutting down");
}
if (cib_api) {
cib_api->cmds->signoff(cib_api);
}
stonith_shutdown(0);
}
static void
stonith_cleanup(void)
{
if (cib_api) {
cib_api->cmds->signoff(cib_api);
}
if (ipcs) {
qb_ipcs_destroy(ipcs);
}
crm_peer_destroy();
pcmk__client_cleanup();
free_stonith_remote_op_list();
free_topology_list();
free_device_list();
free_metadata_cache();
free(stonith_our_uname);
stonith_our_uname = NULL;
free_xml(local_cib);
local_cib = NULL;
}
/* *INDENT-OFF* */
static struct crm_option long_options[] = {
{"stand-alone", 0, 0, 's'},
{"stand-alone-w-cpg", 0, 0, 'c'},
{"logfile", 1, 0, 'l'},
{"verbose", 0, 0, 'V'},
{"version", 0, 0, '$'},
{"help", 0, 0, '?'},
{0, 0, 0, 0}
};
/* *INDENT-ON* */
static void
setup_cib(void)
{
int rc, retries = 0;
static cib_t *(*cib_new_fn) (void) = NULL;
if (cib_new_fn == NULL) {
cib_new_fn = find_library_function(&cib_library, CIB_LIBRARY, "cib_new", TRUE);
}
if (cib_new_fn != NULL) {
cib_api = (*cib_new_fn) ();
}
if (cib_api == NULL) {
crm_err("No connection to the CIB manager");
return;
}
do {
sleep(retries);
rc = cib_api->cmds->signon(cib_api, CRM_SYSTEM_STONITHD, cib_command);
} while (rc == -ENOTCONN && ++retries < 5);
if (rc != pcmk_ok) {
crm_err("Could not connect to the CIB manager: %s (%d)", pcmk_strerror(rc), rc);
} else if (pcmk_ok !=
cib_api->cmds->add_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, update_cib_cache_cb)) {
crm_err("Could not set CIB notification callback");
} else {
rc = cib_api->cmds->query(cib_api, NULL, NULL, cib_scope_local);
cib_api->cmds->register_callback(cib_api, rc, 120, FALSE, NULL, "init_cib_cache_cb",
init_cib_cache_cb);
cib_api->cmds->set_connection_dnotify(cib_api, cib_connection_destroy);
crm_info("Watching for stonith topology changes");
}
}
struct qb_ipcs_service_handlers ipc_callbacks = {
.connection_accept = st_ipc_accept,
- .connection_created = st_ipc_created,
+ .connection_created = NULL,
.msg_process = st_ipc_dispatch,
.connection_closed = st_ipc_closed,
.connection_destroyed = st_ipc_destroy
};
/*!
* \internal
* \brief Callback for peer status changes
*
* \param[in] type What changed
* \param[in] node What peer had the change
* \param[in] data Previous value of what changed
*/
static void
st_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data)
{
if ((type != crm_status_processes) && !is_set(node->flags, crm_remote_node)) {
/*
* This is a hack until we can send to a nodeid and/or we fix node name lookups
* These messages are ignored in stonith_peer_callback()
*/
xmlNode *query = create_xml_node(NULL, "stonith_command");
crm_xml_add(query, F_XML_TAGNAME, "stonith_command");
crm_xml_add(query, F_TYPE, T_STONITH_NG);
crm_xml_add(query, F_STONITH_OPERATION, "poke");
crm_debug("Broadcasting our uname because of node %u", node->id);
send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE);
free_xml(query);
}
}
int
main(int argc, char **argv)
{
int flag;
int lpc = 0;
int argerr = 0;
int option_index = 0;
crm_cluster_t cluster;
const char *actions[] = { "reboot", "off", "on", "list", "monitor", "status" };
crm_ipc_t *old_instance = NULL;
crm_log_preinit(NULL, argc, argv);
crm_set_options(NULL, "mode [options]", long_options,
"Provides a summary of cluster's current state."
"\n\nOutputs varying levels of detail in a number of different formats.\n");
while (1) {
flag = crm_get_option(argc, argv, &option_index);
if (flag == -1) {
break;
}
switch (flag) {
case 'V':
crm_bump_log_level(argc, argv);
break;
case 'l':
crm_add_logfile(optarg);
break;
case 's':
stand_alone = TRUE;
break;
case 'c':
stand_alone = FALSE;
no_cib_connect = TRUE;
break;
case '$':
case '?':
crm_help(flag, CRM_EX_OK);
break;
default:
++argerr;
break;
}
}
if (argc - optind == 1 && safe_str_eq("metadata", argv[optind])) {
printf("<?xml version=\"1.0\"?><!DOCTYPE resource-agent SYSTEM \"ra-api-1.dtd\">\n");
printf("<resource-agent name=\"pacemaker-fenced\">\n");
printf(" <version>1.0</version>\n");
printf(" <longdesc lang=\"en\">Instance attributes available for all \"stonith\"-class resources"
" and used by Pacemaker's fence daemon, formerly known as stonithd</longdesc>\n");
printf(" <shortdesc lang=\"en\">Instance attributes available for all \"stonith\"-class resources</shortdesc>\n");
printf(" <parameters>\n");
#if 0
// priority is not implemented yet
printf(" <parameter name=\"priority\" unique=\"0\">\n");
printf(" <shortdesc lang=\"en\">Devices that are not in a topology "
"are tried in order of highest to lowest integer priority</shortdesc>\n");
printf(" <content type=\"integer\" default=\"0\"/>\n");
printf(" </parameter>\n");
#endif
printf(" <parameter name=\"%s\" unique=\"0\">\n", STONITH_ATTR_HOSTARG);
printf
(" <shortdesc lang=\"en\">Advanced use only: An alternate parameter to supply instead of 'port'</shortdesc>\n");
printf
(" <longdesc lang=\"en\">Some devices do not support the standard 'port' parameter or may provide additional ones.\n"
"Use this to specify an alternate, device-specific, parameter that should indicate the machine to be fenced.\n"
"A value of 'none' can be used to tell the cluster not to supply any additional parameters.\n"
" </longdesc>\n");
printf(" <content type=\"string\" default=\"port\"/>\n");
printf(" </parameter>\n");
printf(" <parameter name=\"%s\" unique=\"0\">\n", STONITH_ATTR_HOSTMAP);
printf
(" <shortdesc lang=\"en\">A mapping of host names to ports numbers for devices that do not support host names.</shortdesc>\n");
printf
(" <longdesc lang=\"en\">Eg. node1:1;node2:2,3 would tell the cluster to use port 1 for node1 and ports 2 and 3 for node2</longdesc>\n");
printf(" <content type=\"string\" default=\"\"/>\n");
printf(" </parameter>\n");
printf(" <parameter name=\"%s\" unique=\"0\">\n", STONITH_ATTR_HOSTLIST);
printf
(" <shortdesc lang=\"en\">A list of machines controlled by this device (Optional unless %s=static-list).</shortdesc>\n",
STONITH_ATTR_HOSTCHECK);
printf(" <content type=\"string\" default=\"\"/>\n");
printf(" </parameter>\n");
printf(" <parameter name=\"%s\" unique=\"0\">\n", STONITH_ATTR_HOSTCHECK);
printf
(" <shortdesc lang=\"en\">How to determine which machines are controlled by the device.</shortdesc>\n");
printf(" <longdesc lang=\"en\">Allowed values: dynamic-list "
"(query the device via the 'list' command), static-list "
"(check the " STONITH_ATTR_HOSTLIST " attribute), status "
"(query the device via the 'status' command), none (assume "
"every device can fence every machine)</longdesc>\n");
printf(" <content type=\"string\" default=\"dynamic-list\"/>\n");
printf(" </parameter>\n");
printf(" <parameter name=\"%s\" unique=\"0\">\n", STONITH_ATTR_DELAY_MAX);
printf
(" <shortdesc lang=\"en\">Enable a random delay for stonith actions and specify the maximum of random delay.</shortdesc>\n");
printf
(" <longdesc lang=\"en\">This prevents double fencing when using slow devices such as sbd.\n"
"Use this to enable a random delay for stonith actions.\n"
"The overall delay is derived from this random delay value adding a static delay so that the sum is kept below the maximum delay.</longdesc>\n");
printf(" <content type=\"time\" default=\"0s\"/>\n");
printf(" </parameter>\n");
printf(" <parameter name=\"%s\" unique=\"0\">\n", STONITH_ATTR_DELAY_BASE);
printf
(" <shortdesc lang=\"en\">Enable a base delay for stonith actions and specify base delay value.</shortdesc>\n");
printf
(" <longdesc lang=\"en\">This prevents double fencing when different delays are configured on the nodes.\n"
"Use this to enable a static delay for stonith actions.\n"
"The overall delay is derived from a random delay value adding this static delay so that the sum is kept below the maximum delay.</longdesc>\n");
printf(" <content type=\"time\" default=\"0s\"/>\n");
printf(" </parameter>\n");
printf(" <parameter name=\"%s\" unique=\"0\">\n", STONITH_ATTR_ACTION_LIMIT);
printf
(" <shortdesc lang=\"en\">The maximum number of actions can be performed in parallel on this device</shortdesc>\n");
printf
(" <longdesc lang=\"en\">Cluster property concurrent-fencing=true needs to be configured first.\n"
"Then use this to specify the maximum number of actions can be performed in parallel on this device. -1 is unlimited.</longdesc>\n");
printf(" <content type=\"integer\" default=\"1\"/>\n");
printf(" </parameter>\n");
for (lpc = 0; lpc < DIMOF(actions); lpc++) {
printf(" <parameter name=\"pcmk_%s_action\" unique=\"0\">\n", actions[lpc]);
printf
(" <shortdesc lang=\"en\">Advanced use only: An alternate command to run instead of '%s'</shortdesc>\n",
actions[lpc]);
printf
(" <longdesc lang=\"en\">Some devices do not support the standard commands or may provide additional ones.\n"
"Use this to specify an alternate, device-specific, command that implements the '%s' action.</longdesc>\n",
actions[lpc]);
printf(" <content type=\"string\" default=\"%s\"/>\n", actions[lpc]);
printf(" </parameter>\n");
printf(" <parameter name=\"pcmk_%s_timeout\" unique=\"0\">\n", actions[lpc]);
printf
(" <shortdesc lang=\"en\">Advanced use only: Specify an alternate timeout to use for %s actions instead of stonith-timeout</shortdesc>\n",
actions[lpc]);
printf
(" <longdesc lang=\"en\">Some devices need much more/less time to complete than normal.\n"
"Use this to specify an alternate, device-specific, timeout for '%s' actions.</longdesc>\n",
actions[lpc]);
printf(" <content type=\"time\" default=\"60s\"/>\n");
printf(" </parameter>\n");
printf(" <parameter name=\"pcmk_%s_retries\" unique=\"0\">\n", actions[lpc]);
printf
(" <shortdesc lang=\"en\">Advanced use only: The maximum number of times to retry the '%s' command within the timeout period</shortdesc>\n",
actions[lpc]);
printf(" <longdesc lang=\"en\">Some devices do not support multiple connections."
" Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining."
" Use this option to alter the number of times Pacemaker retries '%s' actions before giving up."
"</longdesc>\n", actions[lpc]);
printf(" <content type=\"integer\" default=\"2\"/>\n");
printf(" </parameter>\n");
}
printf(" </parameters>\n");
printf("</resource-agent>\n");
return CRM_EX_OK;
}
if (optind != argc) {
++argerr;
}
if (argerr) {
crm_help('?', CRM_EX_USAGE);
}
crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
crm_notice("Starting Pacemaker fencer");
old_instance = crm_ipc_new("stonith-ng", 0);
if (crm_ipc_connect(old_instance)) {
/* IPC end-point already up */
crm_ipc_close(old_instance);
crm_ipc_destroy(old_instance);
crm_err("pacemaker-fenced is already active, aborting startup");
crm_exit(CRM_EX_OK);
} else {
/* not up or not authentic, we'll proceed either way */
crm_ipc_destroy(old_instance);
old_instance = NULL;
}
mainloop_add_signal(SIGTERM, stonith_shutdown);
crm_peer_init();
fenced_data_set = pe_new_working_set();
CRM_ASSERT(fenced_data_set != NULL);
set_bit(fenced_data_set->flags, pe_flag_no_counts);
set_bit(fenced_data_set->flags, pe_flag_no_compat);
if (stand_alone == FALSE) {
if (is_corosync_cluster()) {
#if SUPPORT_COROSYNC
cluster.destroy = stonith_peer_cs_destroy;
cluster.cpg.cpg_deliver_fn = stonith_peer_ais_callback;
cluster.cpg.cpg_confchg_fn = pcmk_cpg_membership;
#endif
}
crm_set_status_callback(&st_peer_update_callback);
if (crm_cluster_connect(&cluster) == FALSE) {
crm_crit("Cannot sign in to the cluster... terminating");
crm_exit(CRM_EX_FATAL);
}
stonith_our_uname = cluster.uname;
stonith_our_uuid = cluster.uuid;
if (no_cib_connect == FALSE) {
setup_cib();
}
} else {
stonith_our_uname = strdup("localhost");
}
init_device_list();
init_topology_list();
if(stonith_watchdog_timeout_ms > 0) {
int rc;
xmlNode *xml;
stonith_key_value_t *params = NULL;
params = stonith_key_value_add(params, STONITH_ATTR_HOSTLIST, stonith_our_uname);
xml = create_device_registration_xml("watchdog", st_namespace_internal,
STONITH_WATCHDOG_AGENT, params,
NULL);
stonith_key_value_freeall(params, 1, 1);
rc = stonith_device_register(xml, NULL, FALSE);
free_xml(xml);
if (rc != pcmk_ok) {
crm_crit("Cannot register watchdog pseudo fence agent");
crm_exit(CRM_EX_FATAL);
}
}
stonith_ipc_server_init(&ipcs, &ipc_callbacks);
/* Create the mainloop and run it... */
mainloop = g_main_loop_new(NULL, FALSE);
crm_notice("Pacemaker fencer successfully started and accepting connections");
g_main_loop_run(mainloop);
stonith_cleanup();
pe_free_working_set(fenced_data_set);
crm_exit(CRM_EX_OK);
}
diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c
index 642f5c4f1c..70c044b990 100644
--- a/daemons/pacemakerd/pacemakerd.c
+++ b/daemons/pacemakerd/pacemakerd.c
@@ -1,1445 +1,1439 @@
/*
* Copyright 2010-2020 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include "pacemakerd.h"
#include <pwd.h>
#include <grp.h>
#include <poll.h>
#include <stdio.h>
#include <stdbool.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/reboot.h>
#include <crm/crm.h> /* indirectly: CRM_EX_* */
#include <crm/cib/internal.h> /* cib_channel_ro */
#include <crm/msg_xml.h>
#include <crm/common/ipcs_internal.h>
#include <crm/common/mainloop.h>
#include <crm/cluster/internal.h>
#include <crm/cluster.h>
#include <crm/common/ipc_internal.h> /* PCMK__SPECIAL_PID*, ... */
#ifdef SUPPORT_COROSYNC
#include <corosync/cfg.h>
#endif
#include <dirent.h>
#include <ctype.h>
static gboolean pcmk_quorate = FALSE;
static gboolean fatal_error = FALSE;
static GMainLoop *mainloop = NULL;
static bool global_keep_tracking = false;
#define PCMK_PROCESS_CHECK_INTERVAL 5
static const char *local_name = NULL;
static uint32_t local_nodeid = 0;
static crm_trigger_t *shutdown_trigger = NULL;
static const char *pid_file = PCMK_RUN_DIR "/pacemaker.pid";
typedef struct pcmk_child_s {
pid_t pid;
long flag;
int start_seq;
int respawn_count;
gboolean respawn;
const char *name;
const char *uid;
const char *command;
const char *endpoint; /* IPC server name */
gboolean active_before_startup;
} pcmk_child_t;
/* Index into the array below */
#define PCMK_CHILD_CONTROLD 3
static pcmk_child_t pcmk_children[] = {
{
0, crm_proc_none, 0, 0, FALSE, "none",
NULL, NULL
},
{
0, crm_proc_execd, 3, 0, TRUE, "pacemaker-execd",
NULL, CRM_DAEMON_DIR "/pacemaker-execd",
CRM_SYSTEM_LRMD
},
{
0, crm_proc_based, 1, 0, TRUE, "pacemaker-based",
CRM_DAEMON_USER, CRM_DAEMON_DIR "/pacemaker-based",
CIB_CHANNEL_RO
},
{
0, crm_proc_controld, 6, 0, TRUE, "pacemaker-controld",
CRM_DAEMON_USER, CRM_DAEMON_DIR "/pacemaker-controld",
CRM_SYSTEM_CRMD
},
{
0, crm_proc_attrd, 4, 0, TRUE, "pacemaker-attrd",
CRM_DAEMON_USER, CRM_DAEMON_DIR "/pacemaker-attrd",
T_ATTRD
},
{
0, crm_proc_schedulerd, 5, 0, TRUE, "pacemaker-schedulerd",
CRM_DAEMON_USER, CRM_DAEMON_DIR "/pacemaker-schedulerd",
CRM_SYSTEM_PENGINE
},
{
0, crm_proc_fenced, 2, 0, TRUE, "pacemaker-fenced",
NULL, CRM_DAEMON_DIR "/pacemaker-fenced",
"stonith-ng"
},
};
static gboolean check_active_before_startup_processes(gpointer user_data);
static int child_liveness(pcmk_child_t *child);
static gboolean start_child(pcmk_child_t * child);
static gboolean update_node_processes(uint32_t id, const char *uname,
uint32_t procs);
void update_process_clients(pcmk__client_t *client);
static uint32_t
get_process_list(void)
{
int lpc = 0;
uint32_t procs = crm_get_cluster_proc();
for (lpc = 0; lpc < SIZEOF(pcmk_children); lpc++) {
if (pcmk_children[lpc].pid != 0) {
procs |= pcmk_children[lpc].flag;
}
}
return procs;
}
static void
pcmk_process_exit(pcmk_child_t * child)
{
child->pid = 0;
child->active_before_startup = FALSE;
/* Broadcast the fact that one of our processes died ASAP
*
* Try to get some logging of the cause out first though
* because we're probably about to get fenced
*
* Potentially do this only if respawn_count > N
* to allow for local recovery
*/
update_node_processes(local_nodeid, NULL, get_process_list());
child->respawn_count += 1;
if (child->respawn_count > MAX_RESPAWN) {
crm_err("Child respawn count exceeded by %s", child->name);
child->respawn = FALSE;
}
if (shutdown_trigger) {
/* resume step-wise shutdown (returned TRUE yields no parallelizing) */
mainloop_set_trigger(shutdown_trigger);
/* intended to speed up propagating expected lay-off of the daemons? */
update_node_processes(local_nodeid, NULL, get_process_list());
} else if (!child->respawn) {
/* nothing to do */
} else if (crm_is_true(getenv("PCMK_fail_fast"))) {
crm_err("Rebooting system because of %s", child->name);
pcmk_panic(__FUNCTION__);
} else if (child_liveness(child) == pcmk_rc_ok) {
crm_warn("One-off suppressing strict respawning of a child process %s,"
" appears alright per %s IPC end-point",
child->name, child->endpoint);
/* need to monitor how it evolves, and start new process if badly */
child->active_before_startup = TRUE;
if (!global_keep_tracking) {
global_keep_tracking = true;
g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL,
check_active_before_startup_processes, NULL);
}
} else {
crm_notice("Respawning failed child process: %s", child->name);
start_child(child);
}
}
static void pcmk_exit_with_cluster(int exitcode)
{
#ifdef SUPPORT_COROSYNC
corosync_cfg_handle_t cfg_handle;
cs_error_t err;
if (exitcode == CRM_EX_FATAL) {
crm_info("Asking Corosync to shut down");
err = corosync_cfg_initialize(&cfg_handle, NULL);
if (err != CS_OK) {
crm_warn("Unable to open handle to corosync to close it down. err=%d", err);
}
err = corosync_cfg_try_shutdown(cfg_handle, COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE);
if (err != CS_OK) {
crm_warn("Corosync shutdown failed. err=%d", err);
}
corosync_cfg_finalize(cfg_handle);
}
#endif
crm_exit(exitcode);
}
static void
pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)
{
pcmk_child_t *child = mainloop_child_userdata(p);
const char *name = mainloop_child_name(p);
if (signo) {
do_crm_log(((signo == SIGKILL)? LOG_WARNING : LOG_ERR),
"%s[%d] terminated with signal %d (core=%d)",
name, pid, signo, core);
} else {
switch(exitcode) {
case CRM_EX_OK:
crm_info("%s[%d] exited with status %d (%s)",
name, pid, exitcode, crm_exit_str(exitcode));
break;
case CRM_EX_FATAL:
crm_warn("Shutting cluster down because %s[%d] had fatal failure",
name, pid);
child->respawn = FALSE;
fatal_error = TRUE;
pcmk_shutdown(SIGTERM);
break;
case CRM_EX_PANIC:
do_crm_log_always(LOG_EMERG,
"%s[%d] instructed the machine to reset",
name, pid);
child->respawn = FALSE;
fatal_error = TRUE;
pcmk_panic(__FUNCTION__);
pcmk_shutdown(SIGTERM);
break;
default:
crm_err("%s[%d] exited with status %d (%s)",
name, pid, exitcode, crm_exit_str(exitcode));
break;
}
}
pcmk_process_exit(child);
}
static gboolean
stop_child(pcmk_child_t * child, int signal)
{
if (signal == 0) {
signal = SIGTERM;
}
/* why to skip PID of 1?
- FreeBSD ~ how untrackable process behind IPC is masqueraded as
- elsewhere: how "init" task is designated; in particular, in systemd
arrangement of socket-based activation, this is pretty real */
if (child->command == NULL || child->pid == PCMK__SPECIAL_PID) {
crm_debug("Nothing to do for child \"%s\" (process %lld)",
child->name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid));
return TRUE;
}
if (child->pid <= 0) {
crm_trace("Client %s not running", child->name);
return TRUE;
}
errno = 0;
if (kill(child->pid, signal) == 0) {
crm_notice("Stopping %s "CRM_XS" sent signal %d to process %lld",
child->name, signal, (long long) child->pid);
} else {
crm_err("Could not stop %s (process %lld) with signal %d: %s",
child->name, (long long) child->pid, signal, strerror(errno));
}
return TRUE;
}
static char *opts_default[] = { NULL, NULL };
static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL };
/* TODO once libqb is taught to juggle with IPC end-points carried over as
bare file descriptor (https://github.com/ClusterLabs/libqb/issues/325)
it shall hand over these descriptors here if/once they are successfully
pre-opened in (presumably) child_liveness(), to avoid any remaining
room for races */
static gboolean
start_child(pcmk_child_t * child)
{
uid_t uid = 0;
gid_t gid = 0;
gboolean use_valgrind = FALSE;
gboolean use_callgrind = FALSE;
const char *devnull = "/dev/null";
const char *env_valgrind = getenv("PCMK_valgrind_enabled");
const char *env_callgrind = getenv("PCMK_callgrind_enabled");
child->active_before_startup = FALSE;
if (child->command == NULL) {
crm_info("Nothing to do for child \"%s\"", child->name);
return TRUE;
}
if (env_callgrind != NULL && crm_is_true(env_callgrind)) {
use_callgrind = TRUE;
use_valgrind = TRUE;
} else if (env_callgrind != NULL && strstr(env_callgrind, child->name)) {
use_callgrind = TRUE;
use_valgrind = TRUE;
} else if (env_valgrind != NULL && crm_is_true(env_valgrind)) {
use_valgrind = TRUE;
} else if (env_valgrind != NULL && strstr(env_valgrind, child->name)) {
use_valgrind = TRUE;
}
if (use_valgrind && strlen(VALGRIND_BIN) == 0) {
crm_warn("Cannot enable valgrind for %s:"
" The location of the valgrind binary is unknown", child->name);
use_valgrind = FALSE;
}
if (child->uid) {
if (crm_user_lookup(child->uid, &uid, &gid) < 0) {
crm_err("Invalid user (%s) for %s: not found", child->uid, child->name);
return FALSE;
}
crm_info("Using uid=%u and group=%u for process %s", uid, gid, child->name);
}
child->pid = fork();
CRM_ASSERT(child->pid != -1);
if (child->pid > 0) {
/* parent */
mainloop_child_add(child->pid, 0, child->name, child, pcmk_child_exit);
crm_info("Forked child %lld for process %s%s",
(long long) child->pid, child->name,
use_valgrind ? " (valgrind enabled: " VALGRIND_BIN ")" : "");
update_node_processes(local_nodeid, NULL, get_process_list());
return TRUE;
} else {
/* Start a new session */
(void)setsid();
/* Setup the two alternate arg arrays */
opts_vgrind[0] = strdup(VALGRIND_BIN);
if (use_callgrind) {
opts_vgrind[1] = strdup("--tool=callgrind");
opts_vgrind[2] = strdup("--callgrind-out-file=" CRM_STATE_DIR "/callgrind.out.%p");
opts_vgrind[3] = strdup(child->command);
opts_vgrind[4] = NULL;
} else {
opts_vgrind[1] = strdup(child->command);
opts_vgrind[2] = NULL;
opts_vgrind[3] = NULL;
opts_vgrind[4] = NULL;
}
opts_default[0] = strdup(child->command);
if(gid) {
// Whether we need root group access to talk to cluster layer
bool need_root_group = TRUE;
if (is_corosync_cluster()) {
/* Corosync clusters can drop root group access, because we set
* uidgid.gid.${gid}=1 via CMAP, which allows these processes to
* connect to corosync.
*/
need_root_group = FALSE;
}
// Drop root group access if not needed
if (!need_root_group && (setgid(gid) < 0)) {
crm_perror(LOG_ERR, "Could not set group to %d", gid);
}
/* Initialize supplementary groups to only those always granted to
* the user, plus haclient (so we can access IPC).
*/
if (initgroups(child->uid, gid) < 0) {
crm_err("Cannot initialize groups for %s: %s (%d)", child->uid, pcmk_strerror(errno), errno);
}
}
if (uid && setuid(uid) < 0) {
crm_perror(LOG_ERR, "Could not set user to %d (%s)", uid, child->uid);
}
pcmk__close_fds_in_child(true);
(void)open(devnull, O_RDONLY); /* Stdin: fd 0 */
(void)open(devnull, O_WRONLY); /* Stdout: fd 1 */
(void)open(devnull, O_WRONLY); /* Stderr: fd 2 */
if (use_valgrind) {
(void)execvp(VALGRIND_BIN, opts_vgrind);
} else {
(void)execvp(child->command, opts_default);
}
crm_perror(LOG_ERR, "FATAL: Cannot exec %s", child->command);
crm_exit(CRM_EX_FATAL);
}
return TRUE; /* never reached */
}
static gboolean
escalate_shutdown(gpointer data)
{
pcmk_child_t *child = data;
if (child->pid == PCMK__SPECIAL_PID) {
pcmk_process_exit(child);
} else if (child->pid != 0) {
/* Use SIGSEGV instead of SIGKILL to create a core so we can see what it was up to */
crm_err("Child %s not terminating in a timely manner, forcing", child->name);
stop_child(child, SIGSEGV);
}
return FALSE;
}
#define SHUTDOWN_ESCALATION_PERIOD 180000 /* 3m */
static gboolean
pcmk_shutdown_worker(gpointer user_data)
{
static int phase = 0;
static time_t next_log = 0;
static int max = SIZEOF(pcmk_children);
int lpc = 0;
if (phase == 0) {
crm_notice("Shutting down Pacemaker");
phase = max;
}
for (; phase > 0; phase--) {
/* Don't stop anything with start_seq < 1 */
for (lpc = max - 1; lpc >= 0; lpc--) {
pcmk_child_t *child = &(pcmk_children[lpc]);
if (phase != child->start_seq) {
continue;
}
if (child->pid != 0) {
time_t now = time(NULL);
if (child->respawn) {
if (child->pid == PCMK__SPECIAL_PID) {
crm_warn("The process behind %s IPC cannot be"
" terminated, so either wait the graceful"
" period of %ld s for its native termination"
" if it vitally depends on some other daemons"
" going down in a controlled way already,"
" or locate and kill the correct %s process"
" on your own; set PCMK_fail_fast=1 to avoid"
" this altogether next time around",
child->name, (long) SHUTDOWN_ESCALATION_PERIOD,
child->command);
}
next_log = now + 30;
child->respawn = FALSE;
stop_child(child, SIGTERM);
if (phase < pcmk_children[PCMK_CHILD_CONTROLD].start_seq) {
g_timeout_add(SHUTDOWN_ESCALATION_PERIOD,
escalate_shutdown, child);
}
} else if (now >= next_log) {
next_log = now + 30;
crm_notice("Still waiting for %s to terminate "
CRM_XS " pid=%lld seq=%d",
child->name, (long long) child->pid,
child->start_seq);
}
return TRUE;
}
/* cleanup */
crm_debug("%s confirmed stopped", child->name);
child->pid = 0;
}
}
/* send_cluster_id(); */
crm_notice("Shutdown complete");
{
const char *delay = pcmk__env_option("shutdown_delay");
if(delay) {
sync();
sleep(crm_get_msec(delay) / 1000);
}
}
g_main_loop_quit(mainloop);
if (fatal_error) {
crm_notice("Shutting down and staying down after fatal error");
pcmk_exit_with_cluster(CRM_EX_FATAL);
}
return TRUE;
}
static void
pcmk_ignore(int nsig)
{
crm_info("Ignoring signal %s (%d)", strsignal(nsig), nsig);
}
static void
pcmk_sigquit(int nsig)
{
pcmk_panic(__FUNCTION__);
}
void
pcmk_shutdown(int nsig)
{
if (shutdown_trigger == NULL) {
shutdown_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, pcmk_shutdown_worker, NULL);
}
mainloop_set_trigger(shutdown_trigger);
}
static int32_t
pcmk_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
crm_trace("Connection %p", c);
if (pcmk__new_client(c, uid, gid) == NULL) {
return -EIO;
}
return 0;
}
-static void
-pcmk_ipc_created(qb_ipcs_connection_t * c)
-{
- crm_trace("Connection %p", c);
-}
-
/* Exit code means? */
static int32_t
pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
const char *task = NULL;
pcmk__client_t *c = pcmk__find_client(qbc);
xmlNode *msg = pcmk__client_data2xml(c, data, size, &id, &flags);
pcmk__ipc_send_ack(c, id, flags, "ack");
if (msg == NULL) {
return 0;
}
task = crm_element_value(msg, F_CRM_TASK);
if (crm_str_eq(task, CRM_OP_QUIT, TRUE)) {
/* Time to quit */
crm_notice("Shutting down in response to ticket %s (%s)",
crm_element_value(msg, F_CRM_REFERENCE), crm_element_value(msg, F_CRM_ORIGIN));
pcmk_shutdown(15);
} else if (crm_str_eq(task, CRM_OP_RM_NODE_CACHE, TRUE)) {
/* Send to everyone */
struct iovec *iov;
int id = 0;
const char *name = NULL;
crm_element_value_int(msg, XML_ATTR_ID, &id);
name = crm_element_value(msg, XML_ATTR_UNAME);
crm_notice("Instructing peers to remove references to node %s/%u", name, id);
iov = calloc(1, sizeof(struct iovec));
iov->iov_base = dump_xml_unformatted(msg);
iov->iov_len = 1 + strlen(iov->iov_base);
send_cpg_iov(iov);
} else {
update_process_clients(c);
}
free_xml(msg);
return 0;
}
/* Error code means? */
static int32_t
pcmk_ipc_closed(qb_ipcs_connection_t * c)
{
pcmk__client_t *client = pcmk__find_client(c);
if (client == NULL) {
return 0;
}
crm_trace("Connection %p", c);
pcmk__free_client(client);
return 0;
}
static void
pcmk_ipc_destroy(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p", c);
pcmk_ipc_closed(c);
}
struct qb_ipcs_service_handlers mcp_ipc_callbacks = {
.connection_accept = pcmk_ipc_accept,
- .connection_created = pcmk_ipc_created,
+ .connection_created = NULL,
.msg_process = pcmk_ipc_dispatch,
.connection_closed = pcmk_ipc_closed,
.connection_destroyed = pcmk_ipc_destroy
};
static void
send_xml_to_client(gpointer key, gpointer value, gpointer user_data)
{
pcmk__ipc_send_xml((pcmk__client_t *) value, 0, (xmlNode *) user_data,
crm_ipc_server_event);
}
/*!
* \internal
* \brief Send an XML message with process list of all known peers to client(s)
*
* \param[in] client Send message to this client, or all clients if NULL
*/
void
update_process_clients(pcmk__client_t *client)
{
GHashTableIter iter;
crm_node_t *node = NULL;
xmlNode *update = create_xml_node(NULL, "nodes");
if (is_corosync_cluster()) {
crm_xml_add_int(update, "quorate", pcmk_quorate);
}
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) {
xmlNode *xml = create_xml_node(update, "node");
crm_xml_add_int(xml, "id", node->id);
crm_xml_add(xml, "uname", node->uname);
crm_xml_add(xml, "state", node->state);
crm_xml_add_int(xml, "processes", node->processes);
}
if(client) {
crm_trace("Sending process list to client %s", client->id);
send_xml_to_client(NULL, client, update);
} else {
crm_trace("Sending process list to %d clients",
pcmk__ipc_client_count());
pcmk__foreach_ipc_client(send_xml_to_client, update);
}
free_xml(update);
}
/*!
* \internal
* \brief Send a CPG message with local node's process list to all peers
*/
static void
update_process_peers(void)
{
/* Do nothing for corosync-2 based clusters */
struct iovec *iov = calloc(1, sizeof(struct iovec));
CRM_ASSERT(iov);
if (local_name) {
iov->iov_base = crm_strdup_printf("<node uname=\"%s\" proclist=\"%u\"/>",
local_name, get_process_list());
} else {
iov->iov_base = crm_strdup_printf("<node proclist=\"%u\"/>",
get_process_list());
}
iov->iov_len = strlen(iov->iov_base) + 1;
crm_trace("Sending %s", (char*) iov->iov_base);
send_cpg_iov(iov);
}
/*!
* \internal
* \brief Update a node's process list, notifying clients and peers if needed
*
* \param[in] id Node ID of affected node
* \param[in] uname Uname of affected node
* \param[in] procs Affected node's process list mask
*
* \return TRUE if the process list changed, FALSE otherwise
*/
static gboolean
update_node_processes(uint32_t id, const char *uname, uint32_t procs)
{
gboolean changed = FALSE;
crm_node_t *node = crm_get_peer(id, uname);
if (procs != 0) {
if (procs != node->processes) {
crm_debug("Node %s now has process list: %.32x (was %.32x)",
node->uname, procs, node->processes);
node->processes = procs;
changed = TRUE;
/* If local node's processes have changed, notify clients/peers */
if (id == local_nodeid) {
update_process_clients(NULL);
update_process_peers();
}
} else {
crm_trace("Node %s still has process list: %.32x", node->uname, procs);
}
}
return changed;
}
/* *INDENT-OFF* */
static struct crm_option long_options[] = {
/* Top-level Options */
{"help", 0, 0, '?', "\tThis text"},
{"version", 0, 0, '$', "\tVersion information" },
{"verbose", 0, 0, 'V', "\tIncrease debug output"},
{"shutdown", 0, 0, 'S', "\tInstruct Pacemaker to shutdown on this machine"},
{"features", 0, 0, 'F', "\tDisplay the full version and list of features Pacemaker was built with"},
{"-spacer-", 1, 0, '-', "\nAdditional Options:"},
{"foreground", 0, 0, 'f', "\t(Ignored) Pacemaker always runs in the foreground"},
{"pid-file", 1, 0, 'p', "\t(Ignored) Daemon pid file location"},
{"standby", 0, 0, 's', "\tStart node in standby state"},
{NULL, 0, 0, 0}
};
/* *INDENT-ON* */
static void
mcp_chown(const char *path, uid_t uid, gid_t gid)
{
int rc = chown(path, uid, gid);
if (rc < 0) {
crm_warn("Cannot change the ownership of %s to user %s and gid %d: %s",
path, CRM_DAEMON_USER, gid, pcmk_strerror(errno));
}
}
/*!
* \internal
* \brief Check the liveness of the child based on IPC name and PID if tracked
*
* \param[inout] child Child tracked data
*
* \return Standard Pacemaker return code
*
* \note Return codes of particular interest include pcmk_rc_ipc_unresponsive
* indicating that no trace of IPC liveness was detected,
* pcmk_rc_ipc_unauthorized indicating that the IPC endpoint is blocked by
* an unauthorized process, and pcmk_rc_ipc_pid_only indicating that
* the child is up by PID but not IPC end-point (possibly starting).
* \note This function doesn't modify any of \p child members but \c pid,
* and is not actively toying with processes as such but invoking
* \c stop_child in one particular case (there's for some reason
* a different authentic holder of the IPC end-point).
*/
static int
child_liveness(pcmk_child_t *child)
{
uid_t cl_uid = 0;
gid_t cl_gid = 0;
const uid_t root_uid = 0;
const gid_t root_gid = 0;
const uid_t *ref_uid;
const gid_t *ref_gid;
int rc = pcmk_rc_ipc_unresponsive;
pid_t ipc_pid = 0;
if (child->endpoint == NULL
&& (child->pid <= 0 || child->pid == PCMK__SPECIAL_PID)) {
crm_err("Cannot track child %s for missing both API end-point and PID",
child->name);
rc = EINVAL; // Misuse of function when child is not trackable
} else if (child->endpoint != NULL) {
int legacy_rc = pcmk_ok;
if (child->uid == NULL) {
ref_uid = &root_uid;
ref_gid = &root_gid;
} else {
ref_uid = &cl_uid;
ref_gid = &cl_gid;
legacy_rc = pcmk_daemon_user(&cl_uid, &cl_gid);
}
if (legacy_rc < 0) {
rc = pcmk_legacy2rc(legacy_rc);
crm_err("Could not find user and group IDs for user %s: %s "
CRM_XS " rc=%d", CRM_DAEMON_USER, pcmk_rc_str(rc), rc);
} else {
rc = pcmk__ipc_is_authentic_process_active(child->endpoint,
*ref_uid, *ref_gid,
&ipc_pid);
if ((rc == pcmk_rc_ok) || (rc == pcmk_rc_ipc_unresponsive)) {
if (child->pid <= 0) {
/* If rc is pcmk_rc_ok, ipc_pid is nonzero and this
* initializes a new child. If rc is
* pcmk_rc_ipc_unresponsive, ipc_pid is zero, and we will
* investigate further.
*/
child->pid = ipc_pid;
} else if ((ipc_pid != 0) && (child->pid != ipc_pid)) {
/* An unexpected (but authorized) process is responding to
* IPC. Investigate further.
*/
rc = pcmk_rc_ipc_unresponsive;
}
}
}
}
if (rc == pcmk_rc_ipc_unresponsive) {
/* If we get here, a child without IPC is being tracked, no IPC liveness
* has been detected, or IPC liveness has been detected with an
* unexpected (but authorized) process. This is safe on FreeBSD since
* the only change possible from a proper child's PID into "special" PID
* of 1 behind more loosely related process.
*/
int ret = pcmk__pid_active(child->pid, child->name);
if (ipc_pid && ((ret != pcmk_rc_ok)
|| ipc_pid == PCMK__SPECIAL_PID
|| (pcmk__pid_active(ipc_pid,
child->name) == pcmk_rc_ok))) {
/* An unexpected (but authorized) process was detected at the IPC
* endpoint, and either it is active, or the child we're tracking is
* not.
*/
if (ret == pcmk_rc_ok) {
/* The child we're tracking is active. Kill it, and adopt the
* detected process. This assumes that our children don't fork
* (thus getting a different PID owning the IPC), but rather the
* tracking got out of sync because of some means external to
* Pacemaker, and adopting the detected process is better than
* killing it and possibly having to spawn a new child.
*/
/* not possessing IPC, afterall (what about corosync CPG?) */
stop_child(child, SIGKILL);
}
rc = pcmk_rc_ok;
child->pid = ipc_pid;
} else if (ret == pcmk_rc_ok) {
// Our tracked child's PID was found active, but not its IPC
rc = pcmk_rc_ipc_pid_only;
} else if ((child->pid == 0) && (ret == EINVAL)) {
// FreeBSD can return EINVAL
rc = pcmk_rc_ipc_unresponsive;
} else {
switch (ret) {
case EACCES:
rc = pcmk_rc_ipc_unauthorized;
break;
case ESRCH:
rc = pcmk_rc_ipc_unresponsive;
break;
default:
rc = ret;
break;
}
}
}
return rc;
}
static gboolean
check_active_before_startup_processes(gpointer user_data)
{
int start_seq = 1, lpc = 0;
static int max = SIZEOF(pcmk_children);
gboolean keep_tracking = FALSE;
for (start_seq = 1; start_seq < max; start_seq++) {
for (lpc = 0; lpc < max; lpc++) {
if (pcmk_children[lpc].active_before_startup == FALSE) {
/* we are already tracking it as a child process. */
continue;
} else if (start_seq != pcmk_children[lpc].start_seq) {
continue;
} else {
int rc = child_liveness(&pcmk_children[lpc]);
switch (rc) {
case pcmk_rc_ok:
break;
case pcmk_rc_ipc_unresponsive:
case pcmk_rc_ipc_pid_only: // This case: it was previously OK
if (pcmk_children[lpc].respawn == TRUE) {
crm_err("%s[%lld] terminated%s", pcmk_children[lpc].name,
(long long) PCMK__SPECIAL_PID_AS_0(pcmk_children[lpc].pid),
(rc == pcmk_rc_ipc_pid_only)? " as IPC server" : "");
} else {
/* orderly shutdown */
crm_notice("%s[%lld] terminated%s", pcmk_children[lpc].name,
(long long) PCMK__SPECIAL_PID_AS_0(pcmk_children[lpc].pid),
(rc == pcmk_rc_ipc_pid_only)? " as IPC server" : "");
}
pcmk_process_exit(&(pcmk_children[lpc]));
continue;
default:
crm_exit(CRM_EX_FATAL);
break; /* static analysis/noreturn */
}
}
/* at least one of the processes found at startup
* is still going, so keep this recurring timer around */
keep_tracking = TRUE;
}
}
global_keep_tracking = keep_tracking;
return keep_tracking;
}
/*!
* \internal
* \brief Initial one-off check of the pre-existing "child" processes
*
* With "child" process, we mean the subdaemon that defines an API end-point
* (all of them do as of the comment) -- the possible complement is skipped
* as it is deemed it has no such shared resources to cause conflicts about,
* hence it can presumably be started anew without hesitation.
* If that won't hold true in the future, the concept of a shared resource
* will have to be generalized beyond the API end-point.
*
* For boundary cases that the "child" is still starting (IPC end-point is yet
* to be witnessed), or more rarely (practically FreeBSD only), when there's
* a pre-existing "untrackable" authentic process, we give the situation some
* time to possibly unfold in the right direction, meaning that said socket
* will appear or the unattainable process will disappear per the observable
* IPC, respectively.
*
* \return Standard Pacemaker return code
*
* \note Since this gets run at the very start, \c respawn_count fields
* for particular children get temporarily overloaded with "rounds
* of waiting" tracking, restored once we are about to finish with
* success (i.e. returning value >=0) and will remain unrestored
* otherwise. One way to suppress liveness detection logic for
* particular child is to set the said value to a negative number.
*/
#define WAIT_TRIES 4 /* together with interleaved sleeps, worst case ~ 1s */
static int
find_and_track_existing_processes(void)
{
bool tracking = false;
bool wait_in_progress;
int rc;
size_t i, rounds;
for (rounds = 1; rounds <= WAIT_TRIES; rounds++) {
wait_in_progress = false;
for (i = 0; i < SIZEOF(pcmk_children); i++) {
if ((pcmk_children[i].endpoint == NULL)
|| (pcmk_children[i].respawn_count < 0)) {
continue;
}
rc = child_liveness(&pcmk_children[i]);
if (rc == pcmk_rc_ipc_unresponsive) {
/* As a speculation, don't give up if there are more rounds to
* come for other reasons, but don't artificially wait just
* because of this, since we would preferably start ASAP.
*/
continue;
}
pcmk_children[i].respawn_count = rounds;
switch (rc) {
case pcmk_rc_ok:
if (pcmk_children[i].pid == PCMK__SPECIAL_PID) {
if (crm_is_true(getenv("PCMK_fail_fast"))) {
crm_crit("Cannot reliably track pre-existing"
" authentic process behind %s IPC on this"
" platform and PCMK_fail_fast requested",
pcmk_children[i].endpoint);
return EOPNOTSUPP;
} else if (pcmk_children[i].respawn_count == WAIT_TRIES) {
crm_notice("Assuming pre-existing authentic, though"
" on this platform untrackable, process"
" behind %s IPC is stable (was in %d"
" previous samples) so rather than"
" bailing out (PCMK_fail_fast not"
" requested), we just switch to a less"
" optimal IPC liveness monitoring"
" (not very suitable for heavy load)",
pcmk_children[i].name, WAIT_TRIES - 1);
crm_warn("The process behind %s IPC cannot be"
" terminated, so the overall shutdown"
" will get delayed implicitly (%ld s),"
" which serves as a graceful period for"
" its native termination if it vitally"
" depends on some other daemons going"
" down in a controlled way already",
pcmk_children[i].name,
(long) SHUTDOWN_ESCALATION_PERIOD);
} else {
wait_in_progress = true;
crm_warn("Cannot reliably track pre-existing"
" authentic process behind %s IPC on this"
" platform, can still disappear in %d"
" attempt(s)", pcmk_children[i].endpoint,
WAIT_TRIES - pcmk_children[i].respawn_count);
continue;
}
}
crm_notice("Tracking existing %s process (pid=%lld)",
pcmk_children[i].name,
(long long) PCMK__SPECIAL_PID_AS_0(
pcmk_children[i].pid));
pcmk_children[i].respawn_count = -1; /* 0~keep watching */
pcmk_children[i].active_before_startup = TRUE;
tracking = true;
break;
case pcmk_rc_ipc_pid_only:
if (pcmk_children[i].respawn_count == WAIT_TRIES) {
crm_crit("%s IPC end-point for existing authentic"
" process %lld did not (re)appear",
pcmk_children[i].endpoint,
(long long) PCMK__SPECIAL_PID_AS_0(
pcmk_children[i].pid));
return rc;
}
wait_in_progress = true;
crm_warn("Cannot find %s IPC end-point for existing"
" authentic process %lld, can still (re)appear"
" in %d attempts (?)",
pcmk_children[i].endpoint,
(long long) PCMK__SPECIAL_PID_AS_0(
pcmk_children[i].pid),
WAIT_TRIES - pcmk_children[i].respawn_count);
continue;
default:
crm_crit("Checked liveness of %s: %s " CRM_XS " rc=%d",
pcmk_children[i].name, pcmk_rc_str(rc), rc);
return rc;
}
}
if (!wait_in_progress) {
break;
}
(void) poll(NULL, 0, 250); /* a bit for changes to possibly happen */
}
for (i = 0; i < SIZEOF(pcmk_children); i++) {
pcmk_children[i].respawn_count = 0; /* restore pristine state */
}
if (tracking) {
g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL,
check_active_before_startup_processes, NULL);
}
return pcmk_rc_ok;
}
static void
init_children_processes(void)
{
int start_seq = 1, lpc = 0;
static int max = SIZEOF(pcmk_children);
/* start any children that have not been detected */
for (start_seq = 1; start_seq < max; start_seq++) {
/* don't start anything with start_seq < 1 */
for (lpc = 0; lpc < max; lpc++) {
if (pcmk_children[lpc].pid != 0) {
/* we are already tracking it */
continue;
}
if (start_seq == pcmk_children[lpc].start_seq) {
start_child(&(pcmk_children[lpc]));
}
}
}
/* From this point on, any daemons being started will be due to
* respawning rather than node start.
*
* This may be useful for the daemons to know
*/
setenv("PCMK_respawned", "true", 1);
}
static void
mcp_cpg_destroy(gpointer user_data)
{
crm_crit("Lost connection to cluster layer, shutting down");
crm_exit(CRM_EX_DISCONNECT);
}
/*!
* \internal
* \brief Process a CPG message (process list or manual peer cache removal)
*
* \param[in] handle CPG connection (ignored)
* \param[in] groupName CPG group name (ignored)
* \param[in] nodeid ID of affected node
* \param[in] pid Process ID (ignored)
* \param[in] msg CPG XML message
* \param[in] msg_len Length of msg in bytes (ignored)
*/
static void
mcp_cpg_deliver(cpg_handle_t handle,
const struct cpg_name *groupName,
uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
{
xmlNode *xml = string2xml(msg);
const char *task = crm_element_value(xml, F_CRM_TASK);
crm_trace("Received CPG message (%s): %.200s",
(task? task : "process list"), (char*)msg);
if (task == NULL) {
if (nodeid == local_nodeid) {
crm_debug("Ignoring message with local node's process list");
} else {
uint32_t procs = 0;
const char *uname = crm_element_value(xml, "uname");
crm_element_value_int(xml, "proclist", (int *)&procs);
if (update_node_processes(nodeid, uname, procs)) {
update_process_clients(NULL);
}
}
} else if (crm_str_eq(task, CRM_OP_RM_NODE_CACHE, TRUE)) {
int id = 0;
const char *name = NULL;
crm_element_value_int(xml, XML_ATTR_ID, &id);
name = crm_element_value(xml, XML_ATTR_UNAME);
reap_crm_member(id, name);
}
if (xml != NULL) {
free_xml(xml);
}
}
static void
mcp_cpg_membership(cpg_handle_t handle,
const struct cpg_name *groupName,
const struct cpg_address *member_list, size_t member_list_entries,
const struct cpg_address *left_list, size_t left_list_entries,
const struct cpg_address *joined_list, size_t joined_list_entries)
{
/* Update peer cache if needed */
pcmk_cpg_membership(handle, groupName, member_list, member_list_entries,
left_list, left_list_entries,
joined_list, joined_list_entries);
/* Always broadcast our own presence after any membership change */
update_process_peers();
}
static gboolean
mcp_quorum_callback(unsigned long long seq, gboolean quorate)
{
pcmk_quorate = quorate;
return TRUE;
}
static void
mcp_quorum_destroy(gpointer user_data)
{
crm_info("connection lost");
}
int
main(int argc, char **argv)
{
int rc;
int flag;
int argerr = 0;
int option_index = 0;
gboolean shutdown = FALSE;
uid_t pcmk_uid = 0;
gid_t pcmk_gid = 0;
struct rlimit cores;
crm_ipc_t *old_instance = NULL;
qb_ipcs_service_t *ipcs = NULL;
static crm_cluster_t cluster;
crm_log_preinit(NULL, argc, argv);
crm_set_options(NULL, "mode [options]", long_options, "Start/Stop Pacemaker\n");
mainloop_add_signal(SIGHUP, pcmk_ignore);
mainloop_add_signal(SIGQUIT, pcmk_sigquit);
while (1) {
flag = crm_get_option(argc, argv, &option_index);
if (flag == -1)
break;
switch (flag) {
case 'V':
crm_bump_log_level(argc, argv);
break;
case 'f':
/* Legacy */
break;
case 'p':
pid_file = optarg;
break;
case 's':
pcmk__set_env_option("node_start_state", "standby");
break;
case '$':
case '?':
crm_help(flag, CRM_EX_OK);
break;
case 'S':
shutdown = TRUE;
break;
case 'F':
printf("Pacemaker %s (Build: %s)\n Supporting v%s: %s\n", PACEMAKER_VERSION, BUILD_VERSION,
CRM_FEATURE_SET, CRM_FEATURES);
crm_exit(CRM_EX_OK);
default:
printf("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag);
++argerr;
break;
}
}
if (optind < argc) {
printf("non-option ARGV-elements: ");
while (optind < argc)
printf("%s ", argv[optind++]);
printf("\n");
}
if (argerr) {
crm_help('?', CRM_EX_USAGE);
}
setenv("LC_ALL", "C", 1);
pcmk__set_env_option("mcp", "true");
crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
crm_debug("Checking for existing Pacemaker instance");
old_instance = crm_ipc_new(CRM_SYSTEM_MCP, 0);
(void) crm_ipc_connect(old_instance);
if (shutdown) {
crm_debug("Shutting down existing Pacemaker instance by request");
while (crm_ipc_connected(old_instance)) {
xmlNode *cmd =
create_request(CRM_OP_QUIT, NULL, NULL, CRM_SYSTEM_MCP, CRM_SYSTEM_MCP, NULL);
crm_debug(".");
crm_ipc_send(old_instance, cmd, 0, 0, NULL);
free_xml(cmd);
sleep(2);
}
crm_ipc_close(old_instance);
crm_ipc_destroy(old_instance);
crm_exit(CRM_EX_OK);
} else if (crm_ipc_connected(old_instance)) {
crm_ipc_close(old_instance);
crm_ipc_destroy(old_instance);
crm_err("Aborting start-up because active Pacemaker instance found");
crm_exit(CRM_EX_FATAL);
}
crm_ipc_close(old_instance);
crm_ipc_destroy(old_instance);
if (mcp_read_config() == FALSE) {
crm_notice("Could not obtain corosync config data, exiting");
crm_exit(CRM_EX_UNAVAILABLE);
}
// OCF shell functions and cluster-glue need facility under different name
{
const char *facility = pcmk__env_option("logfacility");
if (facility && safe_str_neq(facility, "none")) {
setenv("HA_LOGFACILITY", facility, 1);
}
}
crm_notice("Starting Pacemaker %s "CRM_XS" build=%s features:%s",
PACEMAKER_VERSION, BUILD_VERSION, CRM_FEATURES);
mainloop = g_main_loop_new(NULL, FALSE);
rc = getrlimit(RLIMIT_CORE, &cores);
if (rc < 0) {
crm_perror(LOG_ERR, "Cannot determine current maximum core size.");
} else {
if (cores.rlim_max == 0 && geteuid() == 0) {
cores.rlim_max = RLIM_INFINITY;
} else {
crm_info("Maximum core file size is: %lu", (unsigned long)cores.rlim_max);
}
cores.rlim_cur = cores.rlim_max;
rc = setrlimit(RLIMIT_CORE, &cores);
if (rc < 0) {
crm_perror(LOG_ERR,
"Core file generation will remain disabled."
" Core files are an important diagnostic tool, so"
" please consider enabling them by default.");
}
}
if (pcmk_daemon_user(&pcmk_uid, &pcmk_gid) < 0) {
crm_err("Cluster user %s does not exist, aborting Pacemaker startup", CRM_DAEMON_USER);
crm_exit(CRM_EX_NOUSER);
}
// Used by some resource agents
if ((mkdir(CRM_STATE_DIR, 0750) < 0) && (errno != EEXIST)) {
crm_warn("Could not create " CRM_STATE_DIR ": %s", pcmk_strerror(errno));
} else {
mcp_chown(CRM_STATE_DIR, pcmk_uid, pcmk_gid);
}
/* Used to store core/blackbox/scheduler/cib files in */
crm_build_path(CRM_PACEMAKER_DIR, 0750);
mcp_chown(CRM_PACEMAKER_DIR, pcmk_uid, pcmk_gid);
/* Used to store core files in */
crm_build_path(CRM_CORE_DIR, 0750);
mcp_chown(CRM_CORE_DIR, pcmk_uid, pcmk_gid);
/* Used to store blackbox dumps in */
crm_build_path(CRM_BLACKBOX_DIR, 0750);
mcp_chown(CRM_BLACKBOX_DIR, pcmk_uid, pcmk_gid);
// Used to store scheduler inputs in
crm_build_path(PE_STATE_DIR, 0750);
mcp_chown(PE_STATE_DIR, pcmk_uid, pcmk_gid);
/* Used to store the cluster configuration */
crm_build_path(CRM_CONFIG_DIR, 0750);
mcp_chown(CRM_CONFIG_DIR, pcmk_uid, pcmk_gid);
// Don't build CRM_RSCTMP_DIR, pacemaker-execd will do it
ipcs = mainloop_add_ipc_server(CRM_SYSTEM_MCP, QB_IPC_NATIVE, &mcp_ipc_callbacks);
if (ipcs == NULL) {
crm_err("Couldn't start IPC server");
crm_exit(CRM_EX_OSERR);
}
/* Allows us to block shutdown */
if (cluster_connect_cfg(&local_nodeid) == FALSE) {
crm_err("Couldn't connect to Corosync's CFG service");
crm_exit(CRM_EX_PROTOCOL);
}
if(pcmk_locate_sbd() > 0) {
setenv("PCMK_watchdog", "true", 1);
} else {
setenv("PCMK_watchdog", "false", 1);
}
switch (find_and_track_existing_processes()) {
case pcmk_rc_ok:
break;
case pcmk_rc_ipc_unauthorized:
crm_exit(CRM_EX_CANTCREAT);
default:
crm_exit(CRM_EX_FATAL);
};
cluster.destroy = mcp_cpg_destroy;
cluster.cpg.cpg_deliver_fn = mcp_cpg_deliver;
cluster.cpg.cpg_confchg_fn = mcp_cpg_membership;
crm_set_autoreap(FALSE);
rc = pcmk_ok;
if (cluster_connect_cpg(&cluster) == FALSE) {
crm_err("Couldn't connect to Corosync's CPG service");
rc = -ENOPROTOOPT;
} else if (cluster_connect_quorum(mcp_quorum_callback, mcp_quorum_destroy)
== FALSE) {
rc = -ENOTCONN;
} else {
local_name = get_local_node_name();
update_node_processes(local_nodeid, local_name, get_process_list());
mainloop_add_signal(SIGTERM, pcmk_shutdown);
mainloop_add_signal(SIGINT, pcmk_shutdown);
init_children_processes();
crm_notice("Pacemaker daemon successfully started and accepting connections");
g_main_loop_run(mainloop);
}
if (ipcs) {
crm_trace("Closing IPC server");
mainloop_del_ipc_server(ipcs);
ipcs = NULL;
}
g_main_loop_unref(mainloop);
cluster_disconnect_cpg(&cluster);
cluster_disconnect_cfg();
crm_exit(crm_errno2exit(rc));
}
diff --git a/daemons/schedulerd/pacemaker-schedulerd.c b/daemons/schedulerd/pacemaker-schedulerd.c
index 88a17b2fd3..e4cd9a4a7f 100644
--- a/daemons/schedulerd/pacemaker-schedulerd.c
+++ b/daemons/schedulerd/pacemaker-schedulerd.c
@@ -1,360 +1,354 @@
/*
* Copyright 2004-2020 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <stdio.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <libxml/parser.h>
#include <crm/common/ipcs_internal.h>
#include <crm/common/mainloop.h>
#include <crm/pengine/internal.h>
#include <pacemaker-internal.h>
#include <crm/msg_xml.h>
#define OPTARGS "hVc"
static GMainLoop *mainloop = NULL;
static qb_ipcs_service_t *ipcs = NULL;
static pe_working_set_t *sched_data_set = NULL;
#define get_series() was_processing_error?1:was_processing_warning?2:3
typedef struct series_s {
const char *name;
const char *param;
int wrap;
} series_t;
series_t series[] = {
{"pe-unknown", "_do_not_match_anything_", -1},
{"pe-error", "pe-error-series-max", -1},
{"pe-warn", "pe-warn-series-max", 200},
{"pe-input", "pe-input-series-max", 400},
};
void pengine_shutdown(int nsig);
static gboolean
process_pe_message(xmlNode *msg, xmlNode *xml_data, pcmk__client_t *sender)
{
static char *last_digest = NULL;
static char *filename = NULL;
const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO);
const char *op = crm_element_value(msg, F_CRM_TASK);
const char *ref = crm_element_value(msg, F_CRM_REFERENCE);
crm_trace("Processing %s op (ref=%s)...", op, ref);
if (op == NULL) {
/* error */
} else if (strcasecmp(op, CRM_OP_HELLO) == 0) {
/* ignore */
} else if (safe_str_eq(crm_element_value(msg, F_CRM_MSG_TYPE), XML_ATTR_RESPONSE)) {
/* ignore */
} else if (sys_to == NULL || strcasecmp(sys_to, CRM_SYSTEM_PENGINE) != 0) {
crm_trace("Bad sys-to %s", crm_str(sys_to));
return FALSE;
} else if (strcasecmp(op, CRM_OP_PECALC) == 0) {
unsigned int seq;
int series_id = 0;
int series_wrap = 0;
char *digest = NULL;
const char *value = NULL;
time_t execution_date = time(NULL);
xmlNode *converted = NULL;
xmlNode *reply = NULL;
gboolean is_repoke = FALSE;
gboolean process = TRUE;
crm_config_error = FALSE;
crm_config_warning = FALSE;
was_processing_error = FALSE;
was_processing_warning = FALSE;
if (sched_data_set == NULL) {
sched_data_set = pe_new_working_set();
CRM_ASSERT(sched_data_set != NULL);
set_bit(sched_data_set->flags, pe_flag_no_counts);
set_bit(sched_data_set->flags, pe_flag_no_compat);
}
digest = calculate_xml_versioned_digest(xml_data, FALSE, FALSE, CRM_FEATURE_SET);
converted = copy_xml(xml_data);
if (cli_config_update(&converted, NULL, TRUE) == FALSE) {
sched_data_set->graph = create_xml_node(NULL, XML_TAG_GRAPH);
crm_xml_add_int(sched_data_set->graph, "transition_id", 0);
crm_xml_add_int(sched_data_set->graph, "cluster-delay", 0);
process = FALSE;
free(digest);
} else if (safe_str_eq(digest, last_digest)) {
crm_info("Input has not changed since last time, not saving to disk");
is_repoke = TRUE;
free(digest);
} else {
free(last_digest);
last_digest = digest;
}
if (process) {
pcmk__schedule_actions(sched_data_set, converted, NULL);
}
series_id = get_series();
series_wrap = series[series_id].wrap;
value = pe_pref(sched_data_set->config_hash, series[series_id].param);
if (value != NULL) {
series_wrap = (int) crm_parse_ll(value, NULL);
if (errno != 0) {
series_wrap = series[series_id].wrap;
}
} else {
crm_config_warn("No value specified for cluster"
" preference: %s", series[series_id].param);
}
if (pcmk__read_series_sequence(PE_STATE_DIR, series[series_id].name,
&seq) != pcmk_rc_ok) {
// @TODO maybe handle errors better ...
seq = 0;
}
crm_trace("Series %s: wrap=%d, seq=%u, pref=%s",
series[series_id].name, series_wrap, seq, value);
sched_data_set->input = NULL;
reply = create_reply(msg, sched_data_set->graph);
CRM_ASSERT(reply != NULL);
if (is_repoke == FALSE) {
free(filename);
filename = pcmk__series_filename(PE_STATE_DIR,
series[series_id].name, seq, true);
}
crm_xml_add(reply, F_CRM_TGRAPH_INPUT, filename);
crm_xml_add_int(reply, "graph-errors", was_processing_error);
crm_xml_add_int(reply, "graph-warnings", was_processing_warning);
crm_xml_add_int(reply, "config-errors", crm_config_error);
crm_xml_add_int(reply, "config-warnings", crm_config_warning);
if (pcmk__ipc_send_xml(sender, 0, reply,
crm_ipc_server_event) != pcmk_rc_ok) {
int graph_file_fd = 0;
char *graph_file = NULL;
umask(S_IWGRP | S_IWOTH | S_IROTH);
graph_file = crm_strdup_printf("%s/pengine.graph.XXXXXX",
PE_STATE_DIR);
graph_file_fd = mkstemp(graph_file);
crm_err("Couldn't send transition graph to peer, writing to %s instead",
graph_file);
crm_xml_add(reply, F_CRM_TGRAPH, graph_file);
write_xml_fd(sched_data_set->graph, graph_file, graph_file_fd, FALSE);
free(graph_file);
free_xml(first_named_child(reply, F_CRM_DATA));
CRM_ASSERT(pcmk__ipc_send_xml(sender, 0, reply,
crm_ipc_server_event) == pcmk_rc_ok);
}
free_xml(reply);
pe_reset_working_set(sched_data_set);
pcmk__log_transition_summary(filename);
if (is_repoke == FALSE && series_wrap != 0) {
unlink(filename);
crm_xml_add_ll(xml_data, "execution-date", (long long) execution_date);
write_xml_file(xml_data, filename, TRUE);
pcmk__write_series_sequence(PE_STATE_DIR, series[series_id].name,
++seq, series_wrap);
} else {
crm_trace("Not writing out %s: %d & %d", filename, is_repoke, series_wrap);
}
free_xml(converted);
}
return TRUE;
}
static int32_t
pe_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
crm_trace("Connection %p", c);
if (pcmk__new_client(c, uid, gid) == NULL) {
return -EIO;
}
return 0;
}
-static void
-pe_ipc_created(qb_ipcs_connection_t * c)
-{
- crm_trace("Connection %p", c);
-}
-
gboolean process_pe_message(xmlNode *msg, xmlNode *xml_data,
pcmk__client_t *sender);
static int32_t
pe_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
pcmk__client_t *c = pcmk__find_client(qbc);
xmlNode *msg = pcmk__client_data2xml(c, data, size, &id, &flags);
pcmk__ipc_send_ack(c, id, flags, "ack");
if (msg != NULL) {
xmlNode *data_xml = get_message_xml(msg, F_CRM_DATA);
process_pe_message(msg, data_xml, c);
free_xml(msg);
}
return 0;
}
/* Error code means? */
static int32_t
pe_ipc_closed(qb_ipcs_connection_t * c)
{
pcmk__client_t *client = pcmk__find_client(c);
if (client == NULL) {
return 0;
}
crm_trace("Connection %p", c);
pcmk__free_client(client);
return 0;
}
static void
pe_ipc_destroy(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p", c);
pe_ipc_closed(c);
}
struct qb_ipcs_service_handlers ipc_callbacks = {
.connection_accept = pe_ipc_accept,
- .connection_created = pe_ipc_created,
+ .connection_created = NULL,
.msg_process = pe_ipc_dispatch,
.connection_closed = pe_ipc_closed,
.connection_destroyed = pe_ipc_destroy
};
/* *INDENT-OFF* */
static struct crm_option long_options[] = {
/* Top-level Options */
{"help", 0, 0, '?', "\tThis text"},
{"verbose", 0, 0, 'V', "\tIncrease debug output"},
{0, 0, 0, 0}
};
/* *INDENT-ON* */
int
main(int argc, char **argv)
{
int flag;
int index = 0;
int argerr = 0;
crm_log_preinit(NULL, argc, argv);
crm_set_options(NULL, "[options]",
long_options, "Daemon for calculating the cluster's response to events");
mainloop_add_signal(SIGTERM, pengine_shutdown);
while (1) {
flag = crm_get_option(argc, argv, &index);
if (flag == -1)
break;
switch (flag) {
case 'V':
crm_bump_log_level(argc, argv);
break;
case 'h': /* Help message */
crm_help('?', CRM_EX_OK);
break;
default:
++argerr;
break;
}
}
if (argc - optind == 1 && safe_str_eq("metadata", argv[optind])) {
pe_metadata();
return CRM_EX_OK;
}
if (optind > argc) {
++argerr;
}
if (argerr) {
crm_help('?', CRM_EX_USAGE);
}
crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
crm_notice("Starting Pacemaker scheduler");
if (pcmk__daemon_can_write(PE_STATE_DIR, NULL) == FALSE) {
crm_err("Terminating due to bad permissions on " PE_STATE_DIR);
fprintf(stderr,
"ERROR: Bad permissions on " PE_STATE_DIR " (see logs for details)\n");
fflush(stderr);
return CRM_EX_FATAL;
}
ipcs = mainloop_add_ipc_server(CRM_SYSTEM_PENGINE, QB_IPC_SHM, &ipc_callbacks);
if (ipcs == NULL) {
crm_err("Failed to create IPC server: shutting down and inhibiting respawn");
crm_exit(CRM_EX_FATAL);
}
/* Create the mainloop and run it... */
mainloop = g_main_loop_new(NULL, FALSE);
crm_notice("Pacemaker scheduler successfully started and accepting connections");
g_main_loop_run(mainloop);
pe_free_working_set(sched_data_set);
crm_info("Exiting %s", crm_system_name);
crm_exit(CRM_EX_OK);
}
void
pengine_shutdown(int nsig)
{
mainloop_del_ipc_server(ipcs);
pe_free_working_set(sched_data_set);
crm_exit(CRM_EX_OK);
}
diff --git a/maint/mocked/based.c b/maint/mocked/based.c
index 59cc0d94e6..fa5797f0f5 100644
--- a/maint/mocked/based.c
+++ b/maint/mocked/based.c
@@ -1,338 +1,331 @@
/*
* Copyright 2019-2020 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* Licensed under the GNU General Public License version 2 or later (GPLv2+).
*/
/*
* Clean room attempt (admittedly with lot of code borrowed or inspired from
* the full-blown daemon), minimalistic implementation of based daemon, with
* only important aspects being implemented at the moment.
*
* Hopefully easy to adapt for variety of purposes.
*
* NOTE: currently, only cib_rw API end-point is opened, future refinements
* as new modules are added should conditionalize per what the module
* indicates in the context (which is intentionally very loose data glue
* between the skeleton and modules themselves (like CGI variables so
* to say, but more structurally predestined so as to avoid complexities
* of hash table lookups etc.)
*/
#include <crm_internal.h>
#if 0
#include "crm/common/ipcs_internal.h" /* pcmk__client_t */
#include "crm/common/xml.h" /* crm_xml_add */
#endif
#include "crm/msg_xml.h" /* F_SUBTYPE */
#include "daemons/based/pacemaker-based.h" /* cib_notify_diff */
#include <qb/qbipcs.h> /* qb_ipcs_connection_t */
#include "based.h"
/* direct global access violated in one case only
- mock_based_ipc_accept adds a reference to it to crm_cient_t->userdata */
mock_based_context_t mock_based_context;
/* see based/based_callbacks.c:cib_ipc_accept */
static int32_t
mock_based_ipc_accept(qb_ipcs_connection_t *c, uid_t uid, gid_t gid)
{
int32_t ret = 0;
pcmk__client_t *cib_client;
crm_trace("Connection %p", c);
cib_client = pcmk__new_client(c, uid, gid);
if (cib_client == NULL) {
ret = -EIO;
}
cib_client->userdata = &mock_based_context;
return ret;
}
-/* see based/based_callbacks.c:cib_ipc_created */
-static void
-mock_based_ipc_created(qb_ipcs_connection_t *c)
-{
- crm_trace("Connection %p", c);
-}
-
/* see based/based_callbacks.c:cib_ipc_closed */
static int32_t
mock_based_ipc_closed(qb_ipcs_connection_t *c)
{
pcmk__client_t *client = pcmk__find_client(c);
if (client != NULL) {
crm_trace("Connection %p", c);
pcmk__free_client(client);
}
return 0;
}
/* see based/based_callbacks.c:cib_ipc_destroy */
static void
mock_based_ipc_destroy(qb_ipcs_connection_t *c)
{
crm_trace("Connection %p", c);
mock_based_ipc_closed(c);
}
/* see based/based_callbacks.c:cib_process_command (and more) */
static void
mock_based_handle_query(pcmk__client_t *cib_client, uint32_t flags,
const xmlNode *op_request)
{
xmlNode *reply, *cib;
const char cib_str[] =
#if 0
"<cib/>";
#else
"<cib validate-with='pacemaker-1.2' admin_epoch='0' epoch='0' num_updates='0'>"\
" <configuration>"\
" <crm_config/>"\
" <nodes/>"\
" <resources/>"\
" <constraints/>"\
" </configuration>"\
" <status/>"\
"</cib>";
#endif
cib = xmlReadMemory(cib_str, sizeof(cib_str), "file:///tmp/foo", NULL, 0)->children;
reply = create_xml_node(NULL, "cib-reply");
crm_xml_add(reply, F_TYPE, T_CIB);
crm_xml_add(reply, F_CIB_OPERATION,
crm_element_value(op_request, F_CIB_OPERATION));
crm_xml_add(reply, F_CIB_CALLID,
crm_element_value(op_request, F_CIB_CALLID));
crm_xml_add(reply, F_CIB_CLIENTID,
crm_element_value(op_request, F_CIB_CLIENTID));
crm_xml_add_int(reply, F_CIB_CALLOPTS, flags);
crm_xml_add_int(reply, F_CIB_RC, pcmk_ok);
if (cib != NULL) {
crm_trace("Attaching reply output");
add_message_xml(reply, F_CIB_CALLDATA, cib);
}
pcmk__ipc_send_xml(cib_client, cib_client->request_id, reply,
((flags & cib_sync_call)? crm_ipc_flags_none
: crm_ipc_server_event));
free_xml(reply);
free_xml(cib);
}
/* see based/based_callbacks.c:cib_common_callback_worker */
static void
mock_based_common_callback_worker(uint32_t id, uint32_t flags,
xmlNode *op_request,
pcmk__client_t *cib_client)
{
const char *op = crm_element_value(op_request, F_CIB_OPERATION);
mock_based_context_t *ctxt;
if (!strcmp(op, CRM_OP_REGISTER)) {
if (flags & crm_ipc_client_response) {
xmlNode *ack = create_xml_node(NULL, __FUNCTION__);
crm_xml_add(ack, F_CIB_OPERATION, CRM_OP_REGISTER);
crm_xml_add(ack, F_CIB_CLIENTID, cib_client->id);
pcmk__ipc_send_xml(cib_client, id, ack, flags);
cib_client->request_id = 0;
free_xml(ack);
}
} else if (!strcmp(op, T_CIB_NOTIFY)) {
int on_off = 0;
const char *type = crm_element_value(op_request, F_CIB_NOTIFY_TYPE);
crm_element_value_int(op_request, F_CIB_NOTIFY_ACTIVATE, &on_off);
crm_debug("Setting %s callbacks for %s (%s): %s",
type, cib_client->name, cib_client->id, on_off ? "on" : "off");
if (!strcmp(type, T_CIB_DIFF_NOTIFY) && on_off) {
cib_client->options |= cib_notify_diff;
}
ctxt = (mock_based_context_t *) cib_client->userdata;
for (size_t c = ctxt->modules_cnt; c > 0; c--) {
if (ctxt->modules[c - 1]->hooks.cib_notify != NULL) {
ctxt->modules[c - 1]->hooks.cib_notify(cib_client);
}
}
if (flags & crm_ipc_client_response) {
pcmk__ipc_send_ack(cib_client, id, flags, "ack");
}
} else if (!strcmp(op, CIB_OP_QUERY)) {
mock_based_handle_query(cib_client, flags, op_request);
} else {
crm_notice("Discarded request %s", op);
}
}
/* see based/based_callbacks.c:cib_ipc_dispatch_rw */
static int32_t
mock_based_dispatch_command(qb_ipcs_connection_t *c, void *data, size_t size)
{
uint32_t id = 0, flags = 0;
int call_options = 0;
pcmk__client_t *cib_client = pcmk__find_client(c);
xmlNode *op_request = pcmk__client_data2xml(cib_client, data, size, &id,
&flags);
crm_notice("Got connection %p", c);
assert(op_request != NULL);
if (cib_client == NULL || op_request == NULL) {
if (op_request == NULL) {
crm_trace("Invalid message from %p", c);
pcmk__ipc_send_ack(cib_client, id, flags, "nack");
}
return 0;
}
crm_element_value_int(op_request, F_CIB_CALLOPTS, &call_options);
if (call_options & cib_sync_call) {
assert(flags & crm_ipc_client_response);
cib_client->request_id = id; /* reply only to last in-flight request */
}
assert(cib_client->name == NULL);
crm_element_value_int(op_request, F_CIB_CALLOPTS, &call_options);
crm_xml_add(op_request, F_CIB_CLIENTID, cib_client->id);
crm_xml_add(op_request, F_CIB_CLIENTNAME, cib_client->name);
mock_based_common_callback_worker(id, flags, op_request, cib_client);
free_xml(op_request);
return 0;
}
/* * */
size_t mock_based_register_module(module_t mod) {
module_t *module;
size_t ret = mock_based_context.modules_cnt++;
mock_based_context.modules = realloc(mock_based_context.modules,
sizeof(*mock_based_context.modules)
* mock_based_context.modules_cnt);
if (mock_based_context.modules == NULL
|| (module = malloc(sizeof(module_t))) == NULL) {
abort();
}
memcpy(module, &mod, sizeof(mod));
mock_based_context.modules[mock_based_context.modules_cnt - 1] = module;
return ret;
}
static int
mock_based_options(mock_based_context_t *ctxt,
bool usage, int argc, const char *argv[])
{
const char **args2argv;
char *s;
int ret = 0;
if (argc <= 1) {
const char *help_argv[] = {argv[0], "-h"};
return mock_based_options(ctxt, false, 2, (const char **) &help_argv);
}
for (size_t i = 1; i < argc; i++) {
if (argv[i][0] == '-' && argv[i][1] != '-' && argv[i][1] != '\0') {
if (usage) {
printf("\t-%c\t", argv[i][1]);
}
switch(argv[i][1]) {
case 'h':
if (usage) {
printf("show this help message\n");
ret = 1;
} else {
if ((args2argv
= malloc((ctxt->modules_cnt + 2) * sizeof(*args2argv))) == NULL
|| (s
= malloc((ctxt->modules_cnt * 2 + 2) * sizeof(*s))) == NULL) {
return -1;
}
s[0] = 'h';
args2argv[ctxt->modules_cnt + 1] = (char[]){'-', 'h', '\0'};
for (size_t c = ctxt->modules_cnt; c > 0; c--) {
args2argv[c] = (char[]){'-', ctxt->modules[c - 1]->shortopt, '\0'};
s[(ctxt->modules_cnt - i) + 1] = '|';
s[(ctxt->modules_cnt - i) + 2] = ctxt->modules[c - 1]->shortopt;
}
s[ctxt->modules_cnt * 2 + 1] = '\0';
printf("Usage: %s [-{%s}]\n", argv[0], s);
(void) mock_based_options(ctxt, true, 2 + ctxt->modules_cnt, args2argv);
free(args2argv);
free(s);
}
return ret;
default:
for (size_t c = ctxt->modules_cnt; c > 0; c--) {
if (ctxt->modules[c - 1]->shortopt == argv[i][1]) {
ret = ctxt->modules[c - 1]->hooks.argparse(ctxt, usage, argc - i, &argv[i]);
if (ret < 0) {
break;
} else if (ret > 1) {
i += (ret - 1);
}
}
}
if (ret == 0) {
printf("uknown option \"%s\"\n", argv[i]);
}
break;
}
}
}
return ret;
}
int main(int argc, char *argv[])
{
mock_based_context_t *ctxt = &mock_based_context;
if (mock_based_options(ctxt, false, argc, (const char **) argv) > 0) {
struct qb_ipcs_service_handlers cib_ipc_callbacks = {
.connection_accept = mock_based_ipc_accept,
- .connection_created = mock_based_ipc_created,
+ .connection_created = NULL,
.msg_process = mock_based_dispatch_command,
.connection_closed = mock_based_ipc_closed,
.connection_destroyed = mock_based_ipc_destroy,
};
crm_log_preinit(NULL, argc, argv);
crm_log_init(NULL, LOG_DEBUG, false, true, argc, argv, false);
qb_ipcs_service_t *ipcs_command =
mainloop_add_ipc_server(CIB_CHANNEL_RW, QB_IPC_NATIVE,
&cib_ipc_callbacks);
g_main_loop_run(g_main_loop_new(NULL, false));
qb_ipcs_destroy(ipcs_command);
}
for (size_t c = ctxt->modules_cnt; c > 0; c--) {
if (ctxt->modules[c - 1]->hooks.destroy != NULL) {
ctxt->modules[c - 1]->hooks.destroy(ctxt->modules[c - 1]);
}
free(mock_based_context.modules[c - 1]);
}
free(mock_based_context.modules);
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Sat, Jan 25, 11:57 AM (1 d, 19 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1322470
Default Alt Text
(223 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment