diff --git a/daemons/based/based_callbacks.c b/daemons/based/based_callbacks.c
index a2878981b9..f07d427071 100644
--- a/daemons/based/based_callbacks.c
+++ b/daemons/based/based_callbacks.c
@@ -1,1549 +1,1549 @@
/*
* Copyright 2004-2019 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include /* U64T ~ PRIu64 */
#include
#include
#include
#include
#include
#include
#include
#define EXIT_ESCALATION_MS 10000
static unsigned long cib_local_bcast_num = 0;
typedef struct cib_local_notify_s {
xmlNode *notify_src;
char *client_id;
gboolean from_peer;
gboolean sync_reply;
} cib_local_notify_t;
int next_client_id = 0;
gboolean legacy_mode = FALSE;
qb_ipcs_service_t *ipcs_ro = NULL;
qb_ipcs_service_t *ipcs_rw = NULL;
qb_ipcs_service_t *ipcs_shm = NULL;
void send_cib_replace(const xmlNode * sync_request, const char *host);
static void cib_process_request(xmlNode* request, gboolean force_synchronous,
gboolean privileged, crm_client_t *cib_client);
static int cib_process_command(xmlNode *request, xmlNode **reply,
xmlNode **cib_diff, gboolean privileged);
gboolean cib_common_callback(qb_ipcs_connection_t * c, void *data, size_t size,
gboolean privileged);
gboolean cib_legacy_mode(void)
{
return legacy_mode;
}
static int32_t
cib_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
crm_trace("Connection %p", c);
if (cib_shutdown_flag) {
crm_info("Ignoring new client [%d] during shutdown", crm_ipcs_client_pid(c));
return -EPERM;
}
if (crm_client_new(c, uid, gid) == NULL) {
return -EIO;
}
return 0;
}
static void
cib_ipc_created(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p", c);
}
static int32_t
cib_ipc_dispatch_rw(qb_ipcs_connection_t * c, void *data, size_t size)
{
crm_client_t *client = crm_client_get(c);
crm_trace("%p message from %s", c, client->id);
return cib_common_callback(c, data, size, TRUE);
}
static int32_t
cib_ipc_dispatch_ro(qb_ipcs_connection_t * c, void *data, size_t size)
{
crm_client_t *client = crm_client_get(c);
crm_trace("%p message from %s", c, client->id);
return cib_common_callback(c, data, size, FALSE);
}
/* Error code means? */
static int32_t
cib_ipc_closed(qb_ipcs_connection_t * c)
{
crm_client_t *client = crm_client_get(c);
if (client == NULL) {
return 0;
}
crm_trace("Connection %p", c);
crm_client_destroy(client);
return 0;
}
static void
cib_ipc_destroy(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p", c);
cib_ipc_closed(c);
if (cib_shutdown_flag) {
cib_shutdown(0);
}
}
struct qb_ipcs_service_handlers ipc_ro_callbacks = {
.connection_accept = cib_ipc_accept,
.connection_created = cib_ipc_created,
.msg_process = cib_ipc_dispatch_ro,
.connection_closed = cib_ipc_closed,
.connection_destroyed = cib_ipc_destroy
};
struct qb_ipcs_service_handlers ipc_rw_callbacks = {
.connection_accept = cib_ipc_accept,
.connection_created = cib_ipc_created,
.msg_process = cib_ipc_dispatch_rw,
.connection_closed = cib_ipc_closed,
.connection_destroyed = cib_ipc_destroy
};
void
cib_common_callback_worker(uint32_t id, uint32_t flags, xmlNode * op_request,
crm_client_t * cib_client, gboolean privileged)
{
const char *op = crm_element_value(op_request, F_CIB_OPERATION);
if (crm_str_eq(op, CRM_OP_REGISTER, TRUE)) {
if (flags & crm_ipc_client_response) {
xmlNode *ack = create_xml_node(NULL, __FUNCTION__);
crm_xml_add(ack, F_CIB_OPERATION, CRM_OP_REGISTER);
crm_xml_add(ack, F_CIB_CLIENTID, cib_client->id);
crm_ipcs_send(cib_client, id, ack, flags);
cib_client->request_id = 0;
free_xml(ack);
}
return;
} else if (crm_str_eq(op, T_CIB_NOTIFY, TRUE)) {
/* Update the notify filters for this client */
int on_off = 0;
long long bit = 0;
const char *type = crm_element_value(op_request, F_CIB_NOTIFY_TYPE);
crm_element_value_int(op_request, F_CIB_NOTIFY_ACTIVATE, &on_off);
crm_debug("Setting %s callbacks for %s (%s): %s",
type, cib_client->name, cib_client->id, on_off ? "on" : "off");
if (safe_str_eq(type, T_CIB_POST_NOTIFY)) {
bit = cib_notify_post;
} else if (safe_str_eq(type, T_CIB_PRE_NOTIFY)) {
bit = cib_notify_pre;
} else if (safe_str_eq(type, T_CIB_UPDATE_CONFIRM)) {
bit = cib_notify_confirm;
} else if (safe_str_eq(type, T_CIB_DIFF_NOTIFY)) {
bit = cib_notify_diff;
} else if (safe_str_eq(type, T_CIB_REPLACE_NOTIFY)) {
bit = cib_notify_replace;
}
if (on_off) {
set_bit(cib_client->options, bit);
} else {
clear_bit(cib_client->options, bit);
}
if (flags & crm_ipc_client_response) {
/* TODO - include rc */
crm_ipcs_send_ack(cib_client, id, flags, "ack", __FUNCTION__, __LINE__);
}
return;
}
cib_process_request(op_request, FALSE, privileged, cib_client);
}
int32_t
cib_common_callback(qb_ipcs_connection_t * c, void *data, size_t size, gboolean privileged)
{
uint32_t id = 0;
uint32_t flags = 0;
int call_options = 0;
crm_client_t *cib_client = crm_client_get(c);
xmlNode *op_request = crm_ipcs_recv(cib_client, data, size, &id, &flags);
if (op_request) {
crm_element_value_int(op_request, F_CIB_CALLOPTS, &call_options);
}
if (op_request == NULL) {
crm_trace("Invalid message from %p", c);
crm_ipcs_send_ack(cib_client, id, flags, "nack", __FUNCTION__, __LINE__);
return 0;
} else if(cib_client == NULL) {
crm_trace("Invalid client %p", c);
return 0;
}
if (is_set(call_options, cib_sync_call)) {
CRM_LOG_ASSERT(flags & crm_ipc_client_response);
CRM_LOG_ASSERT(cib_client->request_id == 0); /* This means the client has two synchronous events in-flight */
cib_client->request_id = id; /* Reply only to the last one */
}
if (cib_client->name == NULL) {
const char *value = crm_element_value(op_request, F_CIB_CLIENTNAME);
if (value == NULL) {
cib_client->name = crm_itoa(cib_client->pid);
} else {
cib_client->name = strdup(value);
if (crm_is_daemon_name(value)) {
set_bit(cib_client->options, cib_is_daemon);
}
}
}
/* Allow cluster daemons more leeway before being evicted */
if (is_set(cib_client->options, cib_is_daemon)) {
const char *qmax = cib_config_lookup("cluster-ipc-limit");
if (crm_set_client_queue_max(cib_client, qmax)) {
crm_trace("IPC threshold for %s[%u] is now %u",
cib_client->name, cib_client->pid, cib_client->queue_max);
}
}
crm_xml_add(op_request, F_CIB_CLIENTID, cib_client->id);
crm_xml_add(op_request, F_CIB_CLIENTNAME, cib_client->name);
#if ENABLE_ACL
CRM_LOG_ASSERT(cib_client->user != NULL);
crm_acl_get_set_user(op_request, F_CIB_USER, cib_client->user);
#endif
crm_log_xml_trace(op_request, "Client[inbound]");
cib_common_callback_worker(id, flags, op_request, cib_client, privileged);
free_xml(op_request);
return 0;
}
static uint64_t ping_seq = 0;
static char *ping_digest = NULL;
static bool ping_modified_since = FALSE;
int sync_our_cib(xmlNode * request, gboolean all);
static gboolean
cib_digester_cb(gpointer data)
{
if (cib_is_master) {
char buffer[32];
xmlNode *ping = create_xml_node(NULL, "ping");
ping_seq++;
free(ping_digest);
ping_digest = NULL;
ping_modified_since = FALSE;
snprintf(buffer, 32, "%" U64T, ping_seq);
crm_trace("Requesting peer digests (%s)", buffer);
crm_xml_add(ping, F_TYPE, "cib");
crm_xml_add(ping, F_CIB_OPERATION, CRM_OP_PING);
crm_xml_add(ping, F_CIB_PING_ID, buffer);
crm_xml_add(ping, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET);
send_cluster_message(NULL, crm_msg_cib, ping, TRUE);
free_xml(ping);
}
return FALSE;
}
static void
process_ping_reply(xmlNode *reply)
{
uint64_t seq = 0;
const char *host = crm_element_value(reply, F_ORIG);
xmlNode *pong = get_message_xml(reply, F_CIB_CALLDATA);
const char *seq_s = crm_element_value(pong, F_CIB_PING_ID);
const char *digest = crm_element_value(pong, XML_ATTR_DIGEST);
if (seq_s) {
seq = crm_int_helper(seq_s, NULL);
}
if(digest == NULL) {
crm_trace("Ignoring ping reply %s from %s with no digest", seq_s, host);
} else if(seq != ping_seq) {
crm_trace("Ignoring out of sequence ping reply %s from %s", seq_s, host);
} else if(ping_modified_since) {
crm_trace("Ignoring ping reply %s from %s: cib updated since", seq_s, host);
} else {
const char *version = crm_element_value(pong, XML_ATTR_CRM_VERSION);
if(ping_digest == NULL) {
crm_trace("Calculating new digest");
ping_digest = calculate_xml_versioned_digest(the_cib, FALSE, TRUE, version);
}
crm_trace("Processing ping reply %s from %s (%s)", seq_s, host, digest);
if(safe_str_eq(ping_digest, digest) == FALSE) {
xmlNode *remote_cib = get_message_xml(pong, F_CIB_CALLDATA);
crm_notice("Local CIB %s.%s.%s.%s differs from %s: %s.%s.%s.%s %p",
crm_element_value(the_cib, XML_ATTR_GENERATION_ADMIN),
crm_element_value(the_cib, XML_ATTR_GENERATION),
crm_element_value(the_cib, XML_ATTR_NUMUPDATES),
ping_digest, host,
remote_cib?crm_element_value(remote_cib, XML_ATTR_GENERATION_ADMIN):"_",
remote_cib?crm_element_value(remote_cib, XML_ATTR_GENERATION):"_",
remote_cib?crm_element_value(remote_cib, XML_ATTR_NUMUPDATES):"_",
digest, remote_cib);
if(remote_cib && remote_cib->children) {
/* Additional debug */
xml_calculate_changes(the_cib, remote_cib);
xml_log_changes(LOG_INFO, __FUNCTION__, remote_cib);
crm_trace("End of differences");
}
free_xml(remote_cib);
sync_our_cib(reply, FALSE);
}
}
}
static void
do_local_notify(xmlNode * notify_src, const char *client_id,
gboolean sync_reply, gboolean from_peer)
{
int rid = 0;
int call_id = 0;
ssize_t rc = 0;
crm_client_t *client_obj = NULL;
CRM_ASSERT(notify_src && client_id);
crm_element_value_int(notify_src, F_CIB_CALLID, &call_id);
client_obj = crm_client_get_by_id(client_id);
if (client_obj == NULL) {
crm_debug("Could not send response %d: client %s not found",
call_id, client_id);
return;
}
if (sync_reply) {
if (client_obj->ipcs) {
CRM_LOG_ASSERT(client_obj->request_id);
rid = client_obj->request_id;
client_obj->request_id = 0;
crm_trace("Sending response %d to %s %s",
rid, client_obj->name,
from_peer ? "(originator of delegated request)" : "");
} else {
crm_trace("Sending response [call %d] to %s %s",
call_id, client_obj->name, from_peer ? "(originator of delegated request)" : "");
}
} else {
crm_trace("Sending event %d to %s %s",
call_id, client_obj->name, from_peer ? "(originator of delegated request)" : "");
}
switch (client_obj->kind) {
case CRM_CLIENT_IPC:
rc = crm_ipcs_send(client_obj, rid, notify_src, (sync_reply?
crm_ipc_flags_none : crm_ipc_server_event));
if (rc < 0) {
crm_warn("%s reply to %s failed: %s " CRM_XS " rc=%lld",
(sync_reply? "Synchronous" : "Asynchronous"),
client_obj->name, pcmk_strerror(rc), (long long) rc);
}
break;
#ifdef HAVE_GNUTLS_GNUTLS_H
case CRM_CLIENT_TLS:
#endif
case CRM_CLIENT_TCP:
crm_remote_send(client_obj->remote, notify_src);
break;
default:
crm_err("Unknown transport %d for %s", client_obj->kind, client_obj->name);
}
}
static void
local_notify_destroy_callback(gpointer data)
{
cib_local_notify_t *notify = data;
free_xml(notify->notify_src);
free(notify->client_id);
free(notify);
}
static void
check_local_notify(int bcast_id)
{
cib_local_notify_t *notify = NULL;
if (!local_notify_queue) {
return;
}
notify = g_hash_table_lookup(local_notify_queue, GINT_TO_POINTER(bcast_id));
if (notify) {
do_local_notify(notify->notify_src, notify->client_id, notify->sync_reply,
notify->from_peer);
g_hash_table_remove(local_notify_queue, GINT_TO_POINTER(bcast_id));
}
}
static void
queue_local_notify(xmlNode * notify_src, const char *client_id, gboolean sync_reply,
gboolean from_peer)
{
cib_local_notify_t *notify = calloc(1, sizeof(cib_local_notify_t));
notify->notify_src = notify_src;
notify->client_id = strdup(client_id);
notify->sync_reply = sync_reply;
notify->from_peer = from_peer;
if (!local_notify_queue) {
local_notify_queue = g_hash_table_new_full(g_direct_hash,
g_direct_equal, NULL,
local_notify_destroy_callback);
}
g_hash_table_insert(local_notify_queue, GINT_TO_POINTER(cib_local_bcast_num), notify);
}
static void
parse_local_options_v1(crm_client_t * cib_client, int call_type, int call_options, const char *host,
const char *op, gboolean * local_notify, gboolean * needs_reply,
gboolean * process, gboolean * needs_forward)
{
if (cib_op_modifies(call_type)
&& !(call_options & cib_inhibit_bcast)) {
/* we need to send an update anyway */
*needs_reply = TRUE;
} else {
*needs_reply = FALSE;
}
if (host == NULL && (call_options & cib_scope_local)) {
crm_trace("Processing locally scoped %s op from %s", op, cib_client->name);
*local_notify = TRUE;
} else if (host == NULL && cib_is_master) {
crm_trace("Processing master %s op locally from %s", op, cib_client->name);
*local_notify = TRUE;
} else if (safe_str_eq(host, cib_our_uname)) {
crm_trace("Processing locally addressed %s op from %s", op, cib_client->name);
*local_notify = TRUE;
} else if (stand_alone) {
*needs_forward = FALSE;
*local_notify = TRUE;
*process = TRUE;
} else {
crm_trace("%s op from %s needs to be forwarded to %s",
op, cib_client->name, host ? host : "the master instance");
*needs_forward = TRUE;
*process = FALSE;
}
}
static void
parse_local_options_v2(crm_client_t * cib_client, int call_type, int call_options, const char *host,
const char *op, gboolean * local_notify, gboolean * needs_reply,
gboolean * process, gboolean * needs_forward)
{
if (cib_op_modifies(call_type)) {
if(safe_str_eq(op, CIB_OP_MASTER) || safe_str_eq(op, CIB_OP_SLAVE)) {
/* Always handle these locally */
*process = TRUE;
*needs_reply = FALSE;
*local_notify = TRUE;
*needs_forward = FALSE;
return;
} else {
/* Redirect all other updates via CPG */
*needs_reply = TRUE;
*needs_forward = TRUE;
*process = FALSE;
crm_trace("%s op from %s needs to be forwarded to %s",
op, cib_client->name, host ? host : "the master instance");
return;
}
}
*process = TRUE;
*needs_reply = FALSE;
*local_notify = TRUE;
*needs_forward = FALSE;
if (stand_alone) {
crm_trace("Processing %s op from %s (stand-alone)", op, cib_client->name);
} else if (host == NULL) {
crm_trace("Processing unaddressed %s op from %s", op, cib_client->name);
} else if (safe_str_eq(host, cib_our_uname)) {
crm_trace("Processing locally addressed %s op from %s", op, cib_client->name);
} else {
crm_trace("%s op from %s needs to be forwarded to %s", op, cib_client->name, host);
*needs_forward = TRUE;
*process = FALSE;
}
}
static void
parse_local_options(crm_client_t * cib_client, int call_type, int call_options, const char *host,
const char *op, gboolean * local_notify, gboolean * needs_reply,
gboolean * process, gboolean * needs_forward)
{
if(cib_legacy_mode()) {
parse_local_options_v1(cib_client, call_type, call_options, host,
op, local_notify, needs_reply, process, needs_forward);
} else {
parse_local_options_v2(cib_client, call_type, call_options, host,
op, local_notify, needs_reply, process, needs_forward);
}
}
static gboolean
parse_peer_options_v1(int call_type, xmlNode * request,
gboolean * local_notify, gboolean * needs_reply, gboolean * process,
gboolean * needs_forward)
{
const char *op = NULL;
const char *host = NULL;
const char *delegated = NULL;
const char *originator = crm_element_value(request, F_ORIG);
const char *reply_to = crm_element_value(request, F_CIB_ISREPLY);
const char *update = crm_element_value(request, F_CIB_GLOBAL_UPDATE);
gboolean is_reply = safe_str_eq(reply_to, cib_our_uname);
if (crm_is_true(update)) {
*needs_reply = FALSE;
if (is_reply) {
*local_notify = TRUE;
crm_trace("Processing global/peer update from %s"
" that originated from us", originator);
} else {
crm_trace("Processing global/peer update from %s", originator);
}
return TRUE;
}
op = crm_element_value(request, F_CIB_OPERATION);
crm_trace("Processing %s request sent by %s", op, originator);
if (safe_str_eq(op, "cib_shutdown_req")) {
/* Always process these */
*local_notify = FALSE;
if (reply_to == NULL || is_reply) {
*process = TRUE;
}
if (is_reply) {
*needs_reply = FALSE;
}
return *process;
}
if (is_reply && safe_str_eq(op, CRM_OP_PING)) {
process_ping_reply(request);
return FALSE;
}
if (is_reply) {
crm_trace("Forward reply sent from %s to local clients", originator);
*process = FALSE;
*needs_reply = FALSE;
*local_notify = TRUE;
return TRUE;
}
host = crm_element_value(request, F_CIB_HOST);
if (host != NULL && safe_str_eq(host, cib_our_uname)) {
crm_trace("Processing %s request sent to us from %s", op, originator);
return TRUE;
} else if(is_reply == FALSE && safe_str_eq(op, CRM_OP_PING)) {
crm_trace("Processing %s request sent to %s by %s", op, host?host:"everyone", originator);
*needs_reply = TRUE;
return TRUE;
} else if (host == NULL && cib_is_master == TRUE) {
crm_trace("Processing %s request sent to master instance from %s", op, originator);
return TRUE;
}
delegated = crm_element_value(request, F_CIB_DELEGATED);
if (delegated != NULL) {
crm_trace("Ignoring msg for master instance");
} else if (host != NULL) {
/* this is for a specific instance and we're not it */
crm_trace("Ignoring msg for instance on %s", crm_str(host));
} else if (reply_to == NULL && cib_is_master == FALSE) {
/* this is for the master instance and we're not it */
crm_trace("Ignoring reply to %s", crm_str(reply_to));
} else if (safe_str_eq(op, "cib_shutdown_req")) {
if (reply_to != NULL) {
crm_debug("Processing %s from %s", op, originator);
*needs_reply = FALSE;
} else {
crm_debug("Processing %s reply from %s", op, originator);
}
return TRUE;
} else {
crm_err("Nothing for us to do?");
crm_log_xml_err(request, "Peer[inbound]");
}
return FALSE;
}
static gboolean
parse_peer_options_v2(int call_type, xmlNode * request,
gboolean * local_notify, gboolean * needs_reply, gboolean * process,
gboolean * needs_forward)
{
const char *host = NULL;
const char *delegated = crm_element_value(request, F_CIB_DELEGATED);
const char *op = crm_element_value(request, F_CIB_OPERATION);
const char *originator = crm_element_value(request, F_ORIG);
const char *reply_to = crm_element_value(request, F_CIB_ISREPLY);
const char *update = crm_element_value(request, F_CIB_GLOBAL_UPDATE);
gboolean is_reply = safe_str_eq(reply_to, cib_our_uname);
if(safe_str_eq(op, CIB_OP_REPLACE)) {
/* sync_our_cib() sets F_CIB_ISREPLY */
if (reply_to) {
delegated = reply_to;
}
goto skip_is_reply;
} else if(safe_str_eq(op, CIB_OP_SYNC)) {
} else if (is_reply && safe_str_eq(op, CRM_OP_PING)) {
process_ping_reply(request);
return FALSE;
} else if (safe_str_eq(op, CIB_OP_UPGRADE)) {
/* Only the DC (node with the oldest software) should process
* this operation if F_CIB_SCHEMA_MAX is unset
*
* If the DC is happy it will then send out another
* CIB_OP_UPGRADE which will tell all nodes to do the actual
* upgrade.
*
* Except this time F_CIB_SCHEMA_MAX will be set which puts a
* limit on how far newer nodes will go
*/
const char *max = crm_element_value(request, F_CIB_SCHEMA_MAX);
const char *upgrade_rc = crm_element_value(request, F_CIB_UPGRADE_RC);
crm_trace("Parsing %s operation%s for %s with max=%s and upgrade_rc=%s",
op, (is_reply? " reply" : ""),
(cib_is_master? "master" : "slave"),
(max? max : "none"), (upgrade_rc? upgrade_rc : "none"));
if (upgrade_rc != NULL) {
// Our upgrade request was rejected by DC, notify clients of result
crm_xml_add(request, F_CIB_RC, upgrade_rc);
} else if ((max == NULL) && cib_is_master) {
/* We are the DC, check if this upgrade is allowed */
goto skip_is_reply;
} else if(max) {
/* Ok, go ahead and upgrade to 'max' */
goto skip_is_reply;
} else {
// Ignore broadcast client requests when we're not DC
return FALSE;
}
} else if (crm_is_true(update)) {
crm_info("Detected legacy %s global update from %s", op, originator);
send_sync_request(NULL);
legacy_mode = TRUE;
return FALSE;
} else if (is_reply && cib_op_modifies(call_type)) {
crm_trace("Ignoring legacy %s reply sent from %s to local clients", op, originator);
return FALSE;
} else if (safe_str_eq(op, "cib_shutdown_req")) {
/* Legacy handling */
crm_debug("Legacy handling of %s message from %s", op, originator);
*local_notify = FALSE;
if (reply_to == NULL) {
*process = TRUE;
}
return *process;
}
if(is_reply) {
crm_trace("Handling %s reply sent from %s to local clients", op, originator);
*process = FALSE;
*needs_reply = FALSE;
*local_notify = TRUE;
return TRUE;
}
skip_is_reply:
*process = TRUE;
*needs_reply = FALSE;
if(safe_str_eq(delegated, cib_our_uname)) {
*local_notify = TRUE;
} else {
*local_notify = FALSE;
}
host = crm_element_value(request, F_CIB_HOST);
if (host != NULL && safe_str_eq(host, cib_our_uname)) {
crm_trace("Processing %s request sent to us from %s", op, originator);
*needs_reply = TRUE;
return TRUE;
} else if (host != NULL) {
/* this is for a specific instance and we're not it */
crm_trace("Ignoring %s operation for instance on %s", op, crm_str(host));
return FALSE;
} else if(is_reply == FALSE && safe_str_eq(op, CRM_OP_PING)) {
*needs_reply = TRUE;
}
crm_trace("Processing %s request sent to everyone by %s/%s on %s %s", op,
crm_element_value(request, F_CIB_CLIENTNAME),
crm_element_value(request, F_CIB_CALLID),
originator, (*local_notify)?"(notify)":"");
return TRUE;
}
static gboolean
parse_peer_options(int call_type, xmlNode * request,
gboolean * local_notify, gboolean * needs_reply, gboolean * process,
gboolean * needs_forward)
{
/* TODO: What happens when an update comes in after node A
* requests the CIB from node B, but before it gets the reply (and
* sends out the replace operation)
*/
if(cib_legacy_mode()) {
return parse_peer_options_v1(
call_type, request, local_notify, needs_reply, process, needs_forward);
} else {
return parse_peer_options_v2(
call_type, request, local_notify, needs_reply, process, needs_forward);
}
}
static void
forward_request(xmlNode * request, crm_client_t * cib_client, int call_options)
{
const char *op = crm_element_value(request, F_CIB_OPERATION);
const char *host = crm_element_value(request, F_CIB_HOST);
crm_xml_add(request, F_CIB_DELEGATED, cib_our_uname);
if (host != NULL) {
crm_trace("Forwarding %s op to %s", op, host);
send_cluster_message(crm_get_peer(0, host), crm_msg_cib, request, FALSE);
} else {
crm_trace("Forwarding %s op to master instance", op);
send_cluster_message(NULL, crm_msg_cib, request, FALSE);
}
/* Return the request to its original state */
xml_remove_prop(request, F_CIB_DELEGATED);
if (call_options & cib_discard_reply) {
crm_trace("Client not interested in reply");
}
}
static gboolean
send_peer_reply(xmlNode * msg, xmlNode * result_diff, const char *originator, gboolean broadcast)
{
CRM_ASSERT(msg != NULL);
if (broadcast) {
/* this (successful) call modified the CIB _and_ the
* change needs to be broadcast...
* send via HA to other nodes
*/
int diff_add_updates = 0;
int diff_add_epoch = 0;
int diff_add_admin_epoch = 0;
int diff_del_updates = 0;
int diff_del_epoch = 0;
int diff_del_admin_epoch = 0;
const char *digest = NULL;
int format = 1;
CRM_LOG_ASSERT(result_diff != NULL);
digest = crm_element_value(result_diff, XML_ATTR_DIGEST);
crm_element_value_int(result_diff, "format", &format);
cib_diff_version_details(result_diff,
&diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates,
&diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates);
crm_trace("Sending update diff %d.%d.%d -> %d.%d.%d %s",
diff_del_admin_epoch, diff_del_epoch, diff_del_updates,
diff_add_admin_epoch, diff_add_epoch, diff_add_updates, digest);
crm_xml_add(msg, F_CIB_ISREPLY, originator);
crm_xml_add(msg, F_CIB_GLOBAL_UPDATE, XML_BOOLEAN_TRUE);
crm_xml_add(msg, F_CIB_OPERATION, CIB_OP_APPLY_DIFF);
crm_xml_add(msg, F_CIB_USER, CRM_DAEMON_USER);
if (format == 1) {
CRM_ASSERT(digest != NULL);
}
add_message_xml(msg, F_CIB_UPDATE_DIFF, result_diff);
crm_log_xml_explicit(msg, "copy");
return send_cluster_message(NULL, crm_msg_cib, msg, TRUE);
} else if (originator != NULL) {
/* send reply via HA to originating node */
crm_trace("Sending request result to %s only", originator);
crm_xml_add(msg, F_CIB_ISREPLY, originator);
return send_cluster_message(crm_get_peer(0, originator), crm_msg_cib, msg, FALSE);
}
return FALSE;
}
static void
cib_process_request(xmlNode *request, gboolean force_synchronous,
gboolean privileged, crm_client_t *cib_client)
{
int call_type = 0;
int call_options = 0;
gboolean process = TRUE;
gboolean is_update = TRUE;
gboolean from_peer = TRUE;
gboolean needs_reply = TRUE;
gboolean local_notify = FALSE;
gboolean needs_forward = FALSE;
gboolean global_update = crm_is_true(crm_element_value(request, F_CIB_GLOBAL_UPDATE));
xmlNode *op_reply = NULL;
xmlNode *result_diff = NULL;
int rc = pcmk_ok;
const char *op = crm_element_value(request, F_CIB_OPERATION);
const char *originator = crm_element_value(request, F_ORIG);
const char *host = crm_element_value(request, F_CIB_HOST);
const char *target = NULL;
const char *call_id = crm_element_value(request, F_CIB_CALLID);
const char *client_id = crm_element_value(request, F_CIB_CLIENTID);
const char *client_name = crm_element_value(request, F_CIB_CLIENTNAME);
const char *reply_to = crm_element_value(request, F_CIB_ISREPLY);
if (cib_client) {
from_peer = FALSE;
}
crm_element_value_int(request, F_CIB_CALLOPTS, &call_options);
if (force_synchronous) {
call_options |= cib_sync_call;
}
if (host != NULL && strlen(host) == 0) {
host = NULL;
}
if (host) {
target = host;
} else if (call_options & cib_scope_local) {
target = "local host";
} else {
target = "master";
}
if (from_peer) {
crm_trace("Processing peer %s operation from %s/%s on %s intended for %s (reply=%s)",
op, client_name, call_id, originator, target, reply_to);
} else {
crm_xml_add(request, F_ORIG, cib_our_uname);
crm_trace("Processing local %s operation from %s/%s intended for %s", op, client_name, call_id, target);
}
rc = cib_get_operation_id(op, &call_type);
if (rc != pcmk_ok) {
/* TODO: construct error reply? */
crm_err("Pre-processing of command failed: %s", pcmk_strerror(rc));
return;
}
if (from_peer == FALSE) {
parse_local_options(cib_client, call_type, call_options, host, op,
&local_notify, &needs_reply, &process, &needs_forward);
} else if (parse_peer_options(call_type, request, &local_notify,
&needs_reply, &process, &needs_forward) == FALSE) {
return;
}
is_update = cib_op_modifies(call_type);
if (call_options & cib_discard_reply) {
needs_reply = is_update;
local_notify = FALSE;
}
if (needs_forward) {
const char *host = crm_element_value(request, F_CIB_HOST);
const char *section = crm_element_value(request, F_CIB_SECTION);
int log_level = LOG_INFO;
if (safe_str_eq(op, CRM_OP_NOOP)) {
log_level = LOG_DEBUG;
}
do_crm_log(log_level,
"Forwarding %s operation for section %s to %s (origin=%s/%s/%s)",
op,
section ? section : "'all'",
host ? host : cib_legacy_mode() ? "master" : "all",
originator ? originator : "local",
client_name, call_id);
forward_request(request, cib_client, call_options);
return;
}
if (cib_status != pcmk_ok) {
const char *call = crm_element_value(request, F_CIB_CALLID);
rc = cib_status;
crm_err("Operation ignored, cluster configuration is invalid."
" Please repair and restart: %s", pcmk_strerror(cib_status));
op_reply = create_xml_node(NULL, "cib-reply");
crm_xml_add(op_reply, F_TYPE, T_CIB);
crm_xml_add(op_reply, F_CIB_OPERATION, op);
crm_xml_add(op_reply, F_CIB_CALLID, call);
crm_xml_add(op_reply, F_CIB_CLIENTID, client_id);
crm_xml_add_int(op_reply, F_CIB_CALLOPTS, call_options);
crm_xml_add_int(op_reply, F_CIB_RC, rc);
crm_trace("Attaching reply output");
add_message_xml(op_reply, F_CIB_CALLDATA, the_cib);
crm_log_xml_explicit(op_reply, "cib:reply");
} else if (process) {
time_t finished = 0;
- int now = time(NULL);
+ time_t now = time(NULL);
int level = LOG_INFO;
const char *section = crm_element_value(request, F_CIB_SECTION);
rc = cib_process_command(request, &op_reply, &result_diff, privileged);
if (is_update == FALSE) {
level = LOG_TRACE;
} else if (global_update) {
switch (rc) {
case pcmk_ok:
level = LOG_INFO;
break;
case -pcmk_err_old_data:
case -pcmk_err_diff_resync:
case -pcmk_err_diff_failed:
level = LOG_TRACE;
break;
default:
level = LOG_ERR;
}
} else if (rc != pcmk_ok && is_update) {
level = LOG_WARNING;
}
do_crm_log(level,
"Completed %s operation for section %s: %s (rc=%d, origin=%s/%s/%s, version=%s.%s.%s)",
op, section ? section : "'all'", pcmk_strerror(rc), rc,
originator ? originator : "local", client_name, call_id,
the_cib ? crm_element_value(the_cib, XML_ATTR_GENERATION_ADMIN) : "0",
the_cib ? crm_element_value(the_cib, XML_ATTR_GENERATION) : "0",
the_cib ? crm_element_value(the_cib, XML_ATTR_NUMUPDATES) : "0");
finished = time(NULL);
- if (finished - now > 3) {
+ if ((finished - now) > 3) {
crm_trace("%s operation took %lds to complete", op, (long)(finished - now));
crm_write_blackbox(0, NULL);
}
if (op_reply == NULL && (needs_reply || local_notify)) {
crm_err("Unexpected NULL reply to message");
crm_log_xml_err(request, "null reply");
needs_reply = FALSE;
local_notify = FALSE;
}
}
/* from now on we are the server */
if(is_update && cib_legacy_mode() == FALSE) {
crm_trace("Completed pre-sync update from %s/%s/%s%s",
originator ? originator : "local", client_name, call_id,
local_notify?" with local notification":"");
} else if (needs_reply == FALSE || stand_alone) {
/* nothing more to do...
* this was a non-originating slave update
*/
crm_trace("Completed slave update");
} else if (cib_legacy_mode() &&
rc == pcmk_ok && result_diff != NULL && !(call_options & cib_inhibit_bcast)) {
gboolean broadcast = FALSE;
cib_local_bcast_num++;
crm_xml_add_int(request, F_CIB_LOCAL_NOTIFY_ID, cib_local_bcast_num);
broadcast = send_peer_reply(request, result_diff, originator, TRUE);
if (broadcast && client_id && local_notify && op_reply) {
/* If we have been asked to sync the reply,
* and a bcast msg has gone out, we queue the local notify
* until we know the bcast message has been received */
local_notify = FALSE;
crm_trace("Queuing local %ssync notification for %s",
(call_options & cib_sync_call) ? "" : "a-", client_id);
queue_local_notify(op_reply, client_id, (call_options & cib_sync_call), from_peer);
op_reply = NULL; /* the reply is queued, so don't free here */
}
} else if (call_options & cib_discard_reply) {
crm_trace("Caller isn't interested in reply");
} else if (from_peer) {
if (is_update == FALSE || result_diff == NULL) {
crm_trace("Request not broadcast: R/O call");
} else if (call_options & cib_inhibit_bcast) {
crm_trace("Request not broadcast: inhibited");
} else if (rc != pcmk_ok) {
crm_trace("Request not broadcast: call failed: %s", pcmk_strerror(rc));
} else {
crm_trace("Directing reply to %s", originator);
}
send_peer_reply(op_reply, result_diff, originator, FALSE);
}
if (local_notify && client_id) {
crm_trace("Performing local %ssync notification for %s",
(call_options & cib_sync_call) ? "" : "a-", client_id);
if (process == FALSE) {
do_local_notify(request, client_id, call_options & cib_sync_call, from_peer);
} else {
do_local_notify(op_reply, client_id, call_options & cib_sync_call, from_peer);
}
}
free_xml(op_reply);
free_xml(result_diff);
return;
}
static int
cib_process_command(xmlNode * request, xmlNode ** reply, xmlNode ** cib_diff, gboolean privileged)
{
xmlNode *input = NULL;
xmlNode *output = NULL;
xmlNode *result_cib = NULL;
xmlNode *current_cib = NULL;
int call_type = 0;
int call_options = 0;
const char *op = NULL;
const char *section = NULL;
const char *call_id = crm_element_value(request, F_CIB_CALLID);
int rc = pcmk_ok;
int rc2 = pcmk_ok;
gboolean send_r_notify = FALSE;
gboolean global_update = FALSE;
gboolean config_changed = FALSE;
gboolean manage_counters = TRUE;
static mainloop_timer_t *digest_timer = NULL;
CRM_ASSERT(cib_status == pcmk_ok);
if(digest_timer == NULL) {
digest_timer = mainloop_timer_add("digester", 5000, FALSE, cib_digester_cb, NULL);
}
*reply = NULL;
*cib_diff = NULL;
current_cib = the_cib;
/* Start processing the request... */
op = crm_element_value(request, F_CIB_OPERATION);
crm_element_value_int(request, F_CIB_CALLOPTS, &call_options);
rc = cib_get_operation_id(op, &call_type);
if (rc == pcmk_ok && privileged == FALSE) {
rc = cib_op_can_run(call_type, call_options, privileged, global_update);
}
rc2 = cib_op_prepare(call_type, request, &input, §ion);
if (rc == pcmk_ok) {
rc = rc2;
}
if (rc != pcmk_ok) {
crm_trace("Call setup failed: %s", pcmk_strerror(rc));
goto done;
} else if (cib_op_modifies(call_type) == FALSE) {
rc = cib_perform_op(op, call_options, cib_op_func(call_type), TRUE,
section, request, input, FALSE, &config_changed,
current_cib, &result_cib, NULL, &output);
CRM_CHECK(result_cib == NULL, free_xml(result_cib));
goto done;
}
/* Handle a valid write action */
global_update = crm_is_true(crm_element_value(request, F_CIB_GLOBAL_UPDATE));
if (global_update) {
/* legacy code */
manage_counters = FALSE;
call_options |= cib_force_diff;
crm_trace("Global update detected");
CRM_CHECK(call_type == 3 || call_type == 4, crm_err("Call type: %d", call_type);
crm_log_xml_err(request, "bad op"));
}
if (rc == pcmk_ok) {
ping_modified_since = TRUE;
if (call_options & cib_inhibit_bcast) {
/* skip */
crm_trace("Skipping update: inhibit broadcast");
manage_counters = FALSE;
}
if (is_not_set(call_options, cib_dryrun) && safe_str_eq(section, XML_CIB_TAG_STATUS)) {
/* Copying large CIBs accounts for a huge percentage of our CIB usage */
call_options |= cib_zero_copy;
} else {
clear_bit(call_options, cib_zero_copy);
}
/* result_cib must not be modified after cib_perform_op() returns */
rc = cib_perform_op(op, call_options, cib_op_func(call_type), FALSE,
section, request, input, manage_counters, &config_changed,
current_cib, &result_cib, cib_diff, &output);
if (manage_counters == FALSE) {
int format = 1;
/* Legacy code
* If the diff is NULL at this point, it's because nothing changed
*/
if (*cib_diff) {
crm_element_value_int(*cib_diff, "format", &format);
}
if (format == 1) {
config_changed = cib_config_changed(NULL, NULL, cib_diff);
}
}
/* Always write to disk for replace ops,
* this also negates the need to detect ordering changes
*/
if (crm_str_eq(CIB_OP_REPLACE, op, TRUE)) {
config_changed = TRUE;
}
}
if (rc == pcmk_ok && is_not_set(call_options, cib_dryrun)) {
crm_trace("Activating %s->%s%s%s",
crm_element_value(current_cib, XML_ATTR_NUMUPDATES),
crm_element_value(result_cib, XML_ATTR_NUMUPDATES),
(is_set(call_options, cib_zero_copy)? " zero-copy" : ""),
(config_changed? " changed" : ""));
if(is_not_set(call_options, cib_zero_copy)) {
rc = activateCibXml(result_cib, config_changed, op);
crm_trace("Activated %s (%d)",
crm_element_value(current_cib, XML_ATTR_NUMUPDATES), rc);
}
if (rc == pcmk_ok && cib_internal_config_changed(*cib_diff)) {
cib_read_config(config_hash, result_cib);
}
if (crm_str_eq(CIB_OP_REPLACE, op, TRUE)) {
if (section == NULL) {
send_r_notify = TRUE;
} else if (safe_str_eq(section, XML_TAG_CIB)) {
send_r_notify = TRUE;
} else if (safe_str_eq(section, XML_CIB_TAG_NODES)) {
send_r_notify = TRUE;
} else if (safe_str_eq(section, XML_CIB_TAG_STATUS)) {
send_r_notify = TRUE;
}
} else if (crm_str_eq(CIB_OP_ERASE, op, TRUE)) {
send_r_notify = TRUE;
}
mainloop_timer_stop(digest_timer);
mainloop_timer_start(digest_timer);
} else if (rc == -pcmk_err_schema_validation) {
CRM_ASSERT(is_not_set(call_options, cib_zero_copy));
if (output != NULL) {
crm_log_xml_info(output, "cib:output");
free_xml(output);
}
output = result_cib;
} else {
crm_trace("Not activating %d %d %s", rc, is_set(call_options, cib_dryrun), crm_element_value(result_cib, XML_ATTR_NUMUPDATES));
if(is_not_set(call_options, cib_zero_copy)) {
free_xml(result_cib);
}
}
if ((call_options & (cib_inhibit_notify|cib_dryrun)) == 0) {
const char *client = crm_element_value(request, F_CIB_CLIENTNAME);
crm_trace("Sending notifications %d", is_set(call_options, cib_dryrun));
cib_diff_notify(call_options, client, call_id, op, input, rc, *cib_diff);
}
if (send_r_notify) {
const char *origin = crm_element_value(request, F_ORIG);
cib_replace_notify(origin, the_cib, rc, *cib_diff);
}
xml_log_patchset(LOG_TRACE, "cib:diff", *cib_diff);
done:
if ((call_options & cib_discard_reply) == 0) {
const char *caller = crm_element_value(request, F_CIB_CLIENTID);
*reply = create_xml_node(NULL, "cib-reply");
crm_xml_add(*reply, F_TYPE, T_CIB);
crm_xml_add(*reply, F_CIB_OPERATION, op);
crm_xml_add(*reply, F_CIB_CALLID, call_id);
crm_xml_add(*reply, F_CIB_CLIENTID, caller);
crm_xml_add_int(*reply, F_CIB_CALLOPTS, call_options);
crm_xml_add_int(*reply, F_CIB_RC, rc);
if (output != NULL) {
crm_trace("Attaching reply output");
add_message_xml(*reply, F_CIB_CALLDATA, output);
}
crm_log_xml_explicit(*reply, "cib:reply");
}
crm_trace("cleanup");
if (cib_op_modifies(call_type) == FALSE && output != current_cib) {
free_xml(output);
output = NULL;
}
if (call_type >= 0) {
cib_op_cleanup(call_type, call_options, &input, &output);
}
crm_trace("done");
return rc;
}
void
cib_peer_callback(xmlNode * msg, void *private_data)
{
const char *reason = NULL;
const char *originator = crm_element_value(msg, F_ORIG);
if (cib_legacy_mode() && (originator == NULL || crm_str_eq(originator, cib_our_uname, TRUE))) {
/* message is from ourselves */
int bcast_id = 0;
if (!(crm_element_value_int(msg, F_CIB_LOCAL_NOTIFY_ID, &bcast_id))) {
check_local_notify(bcast_id);
}
return;
} else if (crm_peer_cache == NULL) {
reason = "membership not established";
goto bail;
}
if (crm_element_value(msg, F_CIB_CLIENTNAME) == NULL) {
crm_xml_add(msg, F_CIB_CLIENTNAME, originator);
}
/* crm_log_xml_trace("Peer[inbound]", msg); */
cib_process_request(msg, FALSE, TRUE, NULL);
return;
bail:
if (reason) {
const char *seq = crm_element_value(msg, F_SEQ);
const char *op = crm_element_value(msg, F_CIB_OPERATION);
crm_warn("Discarding %s message (%s) from %s: %s", op, seq, originator, reason);
}
}
static gboolean
cib_force_exit(gpointer data)
{
crm_notice("Forcing exit!");
terminate_cib(__FUNCTION__, CRM_EX_ERROR);
return FALSE;
}
static void
disconnect_remote_client(gpointer key, gpointer value, gpointer user_data)
{
crm_client_t *a_client = value;
crm_err("Disconnecting %s... Not implemented", crm_str(a_client->name));
}
void
cib_shutdown(int nsig)
{
struct qb_ipcs_stats srv_stats;
if (cib_shutdown_flag == FALSE) {
int disconnects = 0;
qb_ipcs_connection_t *c = NULL;
cib_shutdown_flag = TRUE;
c = qb_ipcs_connection_first_get(ipcs_rw);
while (c != NULL) {
qb_ipcs_connection_t *last = c;
c = qb_ipcs_connection_next_get(ipcs_rw, last);
crm_debug("Disconnecting r/w client %p...", last);
qb_ipcs_disconnect(last);
qb_ipcs_connection_unref(last);
disconnects++;
}
c = qb_ipcs_connection_first_get(ipcs_ro);
while (c != NULL) {
qb_ipcs_connection_t *last = c;
c = qb_ipcs_connection_next_get(ipcs_ro, last);
crm_debug("Disconnecting r/o client %p...", last);
qb_ipcs_disconnect(last);
qb_ipcs_connection_unref(last);
disconnects++;
}
c = qb_ipcs_connection_first_get(ipcs_shm);
while (c != NULL) {
qb_ipcs_connection_t *last = c;
c = qb_ipcs_connection_next_get(ipcs_shm, last);
crm_debug("Disconnecting non-blocking r/w client %p...", last);
qb_ipcs_disconnect(last);
qb_ipcs_connection_unref(last);
disconnects++;
}
disconnects += crm_hash_table_size(client_connections);
crm_debug("Disconnecting %d remote clients", crm_hash_table_size(client_connections));
g_hash_table_foreach(client_connections, disconnect_remote_client, NULL);
crm_info("Disconnected %d clients", disconnects);
}
qb_ipcs_stats_get(ipcs_rw, &srv_stats, QB_FALSE);
if (crm_hash_table_size(client_connections) == 0) {
crm_info("All clients disconnected (%d)", srv_stats.active_connections);
initiate_exit();
} else {
crm_info("Waiting on %d clients to disconnect (%d)",
crm_hash_table_size(client_connections), srv_stats.active_connections);
}
}
void
initiate_exit(void)
{
int active = 0;
xmlNode *leaving = NULL;
active = crm_active_peers();
if (active < 2) {
terminate_cib(__FUNCTION__, 0);
return;
}
crm_info("Sending disconnect notification to %d peers...", active);
leaving = create_xml_node(NULL, "exit-notification");
crm_xml_add(leaving, F_TYPE, "cib");
crm_xml_add(leaving, F_CIB_OPERATION, "cib_shutdown_req");
send_cluster_message(NULL, crm_msg_cib, leaving, TRUE);
free_xml(leaving);
g_timeout_add(EXIT_ESCALATION_MS, cib_force_exit, NULL);
}
extern int remote_fd;
extern int remote_tls_fd;
/*!
* \internal
* \brief Close remote sockets, free the global CIB and quit
*
* \param[in] caller Name of calling function (for log message)
* \param[in] fast If -1, skip disconnect; if positive, exit that
*/
void
terminate_cib(const char *caller, int fast)
{
crm_info("%s: Exiting%s...", caller,
(fast > 0)? " fast" : mainloop ? " from mainloop" : "");
if (remote_fd > 0) {
close(remote_fd);
remote_fd = 0;
}
if (remote_tls_fd > 0) {
close(remote_tls_fd);
remote_tls_fd = 0;
}
uninitializeCib();
if (fast > 0) {
/* Quit fast on error */
cib_ipc_servers_destroy(ipcs_ro, ipcs_rw, ipcs_shm);
crm_exit(fast);
} else if ((mainloop != NULL) && g_main_loop_is_running(mainloop)) {
/* Quit via returning from the main loop. If fast == -1, we skip the
* disconnect here, and it will be done when the main loop returns
* (this allows the peer status callback to avoid messing with the
* peer caches).
*/
if (fast == 0) {
crm_cluster_disconnect(&crm_cluster);
}
g_main_loop_quit(mainloop);
} else {
/* Quit via clean exit. Even the peer status callback can disconnect
* here, because we're not returning control to the caller. */
crm_cluster_disconnect(&crm_cluster);
cib_ipc_servers_destroy(ipcs_ro, ipcs_rw, ipcs_shm);
crm_exit(CRM_EX_OK);
}
}
diff --git a/daemons/controld/controld_execd_state.c b/daemons/controld/controld_execd_state.c
index 66ad426ddf..32a25595bf 100644
--- a/daemons/controld/controld_execd_state.c
+++ b/daemons/controld/controld_execd_state.c
@@ -1,829 +1,829 @@
/*
* Copyright 2012-2019 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
GHashTable *lrm_state_table = NULL;
extern GHashTable *proxy_table;
int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg);
void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg));
static void
free_rsc_info(gpointer value)
{
lrmd_rsc_info_t *rsc_info = value;
lrmd_free_rsc_info(rsc_info);
}
static void
free_deletion_op(gpointer value)
{
struct pending_deletion_op_s *op = value;
free(op->rsc);
delete_ha_msg_input(op->input);
free(op);
}
static void
free_recurring_op(gpointer value)
{
struct recurring_op_s *op = (struct recurring_op_s *)value;
free(op->user_data);
free(op->rsc_id);
free(op->op_type);
free(op->op_key);
if (op->params) {
g_hash_table_destroy(op->params);
}
free(op);
}
static gboolean
fail_pending_op(gpointer key, gpointer value, gpointer user_data)
{
lrmd_event_data_t event = { 0, };
lrm_state_t *lrm_state = user_data;
struct recurring_op_s *op = (struct recurring_op_s *)value;
crm_trace("Pre-emptively failing " CRM_OP_FMT " on %s (call=%s, %s)",
op->rsc_id, op->op_type, op->interval_ms,
lrm_state->node_name, (char*)key, op->user_data);
event.type = lrmd_event_exec_complete;
event.rsc_id = op->rsc_id;
event.op_type = op->op_type;
event.user_data = op->user_data;
event.timeout = 0;
event.interval_ms = op->interval_ms;
event.rc = PCMK_OCF_UNKNOWN_ERROR;
event.op_status = PCMK_LRM_OP_NOT_CONNECTED;
- event.t_run = op->start_time;
- event.t_rcchange = op->start_time;
+ event.t_run = (unsigned int) op->start_time;
+ event.t_rcchange = (unsigned int) op->start_time;
event.call_id = op->call_id;
event.remote_nodename = lrm_state->node_name;
event.params = op->params;
process_lrm_event(lrm_state, &event, op, NULL);
return TRUE;
}
gboolean
lrm_state_is_local(lrm_state_t *lrm_state)
{
if (lrm_state == NULL || fsa_our_uname == NULL) {
return FALSE;
}
if (strcmp(lrm_state->node_name, fsa_our_uname) != 0) {
return FALSE;
}
return TRUE;
}
lrm_state_t *
lrm_state_create(const char *node_name)
{
lrm_state_t *state = NULL;
if (!node_name) {
crm_err("No node name given for lrm state object");
return NULL;
}
state = calloc(1, sizeof(lrm_state_t));
if (!state) {
return NULL;
}
state->node_name = strdup(node_name);
state->rsc_info_cache = g_hash_table_new_full(crm_str_hash,
g_str_equal, NULL, free_rsc_info);
state->deletion_ops = g_hash_table_new_full(crm_str_hash, g_str_equal, free,
free_deletion_op);
state->pending_ops = g_hash_table_new_full(crm_str_hash, g_str_equal, free,
free_recurring_op);
state->resource_history = g_hash_table_new_full(crm_str_hash,
g_str_equal, NULL, history_free);
state->metadata_cache = metadata_cache_new();
g_hash_table_insert(lrm_state_table, (char *)state->node_name, state);
return state;
}
void
lrm_state_destroy(const char *node_name)
{
g_hash_table_remove(lrm_state_table, node_name);
}
static gboolean
remote_proxy_remove_by_node(gpointer key, gpointer value, gpointer user_data)
{
remote_proxy_t *proxy = value;
const char *node_name = user_data;
if (safe_str_eq(node_name, proxy->node_name)) {
return TRUE;
}
return FALSE;
}
static void
internal_lrm_state_destroy(gpointer data)
{
lrm_state_t *lrm_state = data;
if (!lrm_state) {
return;
}
crm_trace("Destroying proxy table %s with %d members", lrm_state->node_name, g_hash_table_size(proxy_table));
g_hash_table_foreach_remove(proxy_table, remote_proxy_remove_by_node, (char *) lrm_state->node_name);
remote_ra_cleanup(lrm_state);
lrmd_api_delete(lrm_state->conn);
if (lrm_state->rsc_info_cache) {
crm_trace("Destroying rsc info cache with %d members", g_hash_table_size(lrm_state->rsc_info_cache));
g_hash_table_destroy(lrm_state->rsc_info_cache);
}
if (lrm_state->resource_history) {
crm_trace("Destroying history op cache with %d members", g_hash_table_size(lrm_state->resource_history));
g_hash_table_destroy(lrm_state->resource_history);
}
if (lrm_state->deletion_ops) {
crm_trace("Destroying deletion op cache with %d members", g_hash_table_size(lrm_state->deletion_ops));
g_hash_table_destroy(lrm_state->deletion_ops);
}
if (lrm_state->pending_ops) {
crm_trace("Destroying pending op cache with %d members", g_hash_table_size(lrm_state->pending_ops));
g_hash_table_destroy(lrm_state->pending_ops);
}
metadata_cache_free(lrm_state->metadata_cache);
free((char *)lrm_state->node_name);
free(lrm_state);
}
void
lrm_state_reset_tables(lrm_state_t * lrm_state, gboolean reset_metadata)
{
if (lrm_state->resource_history) {
crm_trace("Re-setting history op cache with %d members",
g_hash_table_size(lrm_state->resource_history));
g_hash_table_remove_all(lrm_state->resource_history);
}
if (lrm_state->deletion_ops) {
crm_trace("Re-setting deletion op cache with %d members",
g_hash_table_size(lrm_state->deletion_ops));
g_hash_table_remove_all(lrm_state->deletion_ops);
}
if (lrm_state->pending_ops) {
crm_trace("Re-setting pending op cache with %d members",
g_hash_table_size(lrm_state->pending_ops));
g_hash_table_remove_all(lrm_state->pending_ops);
}
if (lrm_state->rsc_info_cache) {
crm_trace("Re-setting rsc info cache with %d members",
g_hash_table_size(lrm_state->rsc_info_cache));
g_hash_table_remove_all(lrm_state->rsc_info_cache);
}
if (reset_metadata) {
metadata_cache_reset(lrm_state->metadata_cache);
}
}
gboolean
lrm_state_init_local(void)
{
if (lrm_state_table) {
return TRUE;
}
lrm_state_table =
g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, NULL, internal_lrm_state_destroy);
if (!lrm_state_table) {
return FALSE;
}
proxy_table =
g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, NULL, remote_proxy_free);
if (!proxy_table) {
g_hash_table_destroy(lrm_state_table);
lrm_state_table = NULL;
return FALSE;
}
return TRUE;
}
void
lrm_state_destroy_all(void)
{
if (lrm_state_table) {
crm_trace("Destroying state table with %d members", g_hash_table_size(lrm_state_table));
g_hash_table_destroy(lrm_state_table); lrm_state_table = NULL;
}
if(proxy_table) {
crm_trace("Destroying proxy table with %d members", g_hash_table_size(proxy_table));
g_hash_table_destroy(proxy_table); proxy_table = NULL;
}
}
lrm_state_t *
lrm_state_find(const char *node_name)
{
if (!node_name) {
return NULL;
}
return g_hash_table_lookup(lrm_state_table, node_name);
}
lrm_state_t *
lrm_state_find_or_create(const char *node_name)
{
lrm_state_t *lrm_state;
lrm_state = g_hash_table_lookup(lrm_state_table, node_name);
if (!lrm_state) {
lrm_state = lrm_state_create(node_name);
}
return lrm_state;
}
GList *
lrm_state_get_list(void)
{
return g_hash_table_get_values(lrm_state_table);
}
static remote_proxy_t *
find_connected_proxy_by_node(const char * node_name)
{
GHashTableIter gIter;
remote_proxy_t *proxy = NULL;
CRM_CHECK(proxy_table != NULL, return NULL);
g_hash_table_iter_init(&gIter, proxy_table);
while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) &proxy)) {
if (proxy->source
&& safe_str_eq(node_name, proxy->node_name)) {
return proxy;
}
}
return NULL;
}
static void
remote_proxy_disconnect_by_node(const char * node_name)
{
remote_proxy_t *proxy = NULL;
CRM_CHECK(proxy_table != NULL, return);
while ((proxy = find_connected_proxy_by_node(node_name)) != NULL) {
/* mainloop_del_ipc_client() eventually calls remote_proxy_disconnected()
* , which removes the entry from proxy_table.
* Do not do this in a g_hash_table_iter_next() loop. */
if (proxy->source) {
mainloop_del_ipc_client(proxy->source);
}
}
return;
}
void
lrm_state_disconnect_only(lrm_state_t * lrm_state)
{
int removed = 0;
if (!lrm_state->conn) {
return;
}
crm_trace("Disconnecting %s", lrm_state->node_name);
remote_proxy_disconnect_by_node(lrm_state->node_name);
((lrmd_t *) lrm_state->conn)->cmds->disconnect(lrm_state->conn);
if (is_not_set(fsa_input_register, R_SHUTDOWN)) {
removed = g_hash_table_foreach_remove(lrm_state->pending_ops, fail_pending_op, lrm_state);
crm_trace("Synthesized %d operation failures for %s", removed, lrm_state->node_name);
}
}
void
lrm_state_disconnect(lrm_state_t * lrm_state)
{
if (!lrm_state->conn) {
return;
}
lrm_state_disconnect_only(lrm_state);
lrmd_api_delete(lrm_state->conn);
lrm_state->conn = NULL;
}
int
lrm_state_is_connected(lrm_state_t * lrm_state)
{
if (!lrm_state->conn) {
return FALSE;
}
return ((lrmd_t *) lrm_state->conn)->cmds->is_connected(lrm_state->conn);
}
int
lrm_state_poke_connection(lrm_state_t * lrm_state)
{
if (!lrm_state->conn) {
return -1;
}
return ((lrmd_t *) lrm_state->conn)->cmds->poke_connection(lrm_state->conn);
}
int
lrm_state_ipc_connect(lrm_state_t * lrm_state)
{
int ret;
if (!lrm_state->conn) {
lrm_state->conn = lrmd_api_new();
((lrmd_t *) lrm_state->conn)->cmds->set_callback(lrm_state->conn, lrm_op_callback);
}
ret = ((lrmd_t *) lrm_state->conn)->cmds->connect(lrm_state->conn, CRM_SYSTEM_CRMD, NULL);
if (ret != pcmk_ok) {
lrm_state->num_lrm_register_fails++;
} else {
lrm_state->num_lrm_register_fails = 0;
}
return ret;
}
static remote_proxy_t *
crmd_remote_proxy_new(lrmd_t *lrmd, const char *node_name, const char *session_id, const char *channel)
{
static struct ipc_client_callbacks proxy_callbacks = {
.dispatch = remote_proxy_dispatch,
.destroy = remote_proxy_disconnected
};
remote_proxy_t *proxy = remote_proxy_new(lrmd, &proxy_callbacks, node_name,
session_id, channel);
return proxy;
}
gboolean
crmd_is_proxy_session(const char *session)
{
return g_hash_table_lookup(proxy_table, session) ? TRUE : FALSE;
}
void
crmd_proxy_send(const char *session, xmlNode *msg)
{
remote_proxy_t *proxy = g_hash_table_lookup(proxy_table, session);
lrm_state_t *lrm_state = NULL;
if (!proxy) {
return;
}
crm_log_xml_trace(msg, "to-proxy");
lrm_state = lrm_state_find(proxy->node_name);
if (lrm_state) {
crm_trace("Sending event to %.8s on %s", proxy->session_id, proxy->node_name);
remote_proxy_relay_event(proxy, msg);
}
}
static void
crmd_proxy_dispatch(const char *session, xmlNode *msg)
{
crm_log_xml_trace(msg, "controller-proxy[inbound]");
crm_xml_add(msg, F_CRM_SYS_FROM, session);
if (crmd_authorize_message(msg, NULL, session)) {
route_message(C_IPC_MESSAGE, msg);
}
trigger_fsa(fsa_source);
}
static void
remote_config_check(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
if (rc != pcmk_ok) {
crm_err("Query resulted in an error: %s", pcmk_strerror(rc));
if (rc == -EACCES || rc == -pcmk_err_schema_validation) {
crm_err("The cluster is mis-configured - shutting down and staying down");
}
} else {
lrmd_t * lrmd = (lrmd_t *)user_data;
crm_time_t *now = crm_time_new(NULL);
GHashTable *config_hash = crm_str_table_new();
crm_debug("Call %d : Parsing CIB options", call_id);
unpack_instance_attributes(
output, output, XML_CIB_TAG_PROPSET, NULL, config_hash, CIB_OPTIONS_FIRST, FALSE, now);
/* Now send it to the remote peer */
remote_proxy_check(lrmd, config_hash);
g_hash_table_destroy(config_hash);
crm_time_free(now);
}
}
static void
crmd_remote_proxy_cb(lrmd_t *lrmd, void *userdata, xmlNode *msg)
{
lrm_state_t *lrm_state = userdata;
const char *session = crm_element_value(msg, F_LRMD_IPC_SESSION);
remote_proxy_t *proxy = g_hash_table_lookup(proxy_table, session);
const char *op = crm_element_value(msg, F_LRMD_IPC_OP);
if (safe_str_eq(op, LRMD_IPC_OP_NEW)) {
const char *channel = crm_element_value(msg, F_LRMD_IPC_IPC_SERVER);
proxy = crmd_remote_proxy_new(lrmd, lrm_state->node_name, session, channel);
if (!remote_ra_controlling_guest(lrm_state)) {
if (proxy != NULL) {
/* Look up stonith-watchdog-timeout and send to the remote peer for validation */
int rc = fsa_cib_conn->cmds->query(fsa_cib_conn, XML_CIB_TAG_CRMCONFIG, NULL, cib_scope_local);
fsa_cib_conn->cmds->register_callback_full(fsa_cib_conn, rc, 10, FALSE, lrmd,
"remote_config_check", remote_config_check, NULL);
}
} else {
crm_debug("Skipping remote_config_check for guest-nodes");
}
} else if (safe_str_eq(op, LRMD_IPC_OP_SHUTDOWN_REQ)) {
char *now_s = NULL;
time_t now = time(NULL);
crm_notice("%s requested shutdown of its remote connection",
lrm_state->node_name);
if (!remote_ra_is_in_maintenance(lrm_state)) {
now_s = crm_itoa(now);
update_attrd(lrm_state->node_name, XML_CIB_ATTR_SHUTDOWN, now_s, NULL, TRUE);
free(now_s);
remote_proxy_ack_shutdown(lrmd);
crm_warn("Reconnection attempts to %s may result in failures that must be cleared",
lrm_state->node_name);
} else {
remote_proxy_nack_shutdown(lrmd);
crm_notice("Remote resource for %s is not managed so no ordered shutdown happening",
lrm_state->node_name);
}
return;
} else if (safe_str_eq(op, LRMD_IPC_OP_REQUEST) && proxy && proxy->is_local) {
/* This is for the controller, which we are, so don't try
* to send to ourselves over IPC -- do it directly.
*/
int flags = 0;
xmlNode *request = get_message_xml(msg, F_LRMD_IPC_MSG);
CRM_CHECK(request != NULL, return);
#if ENABLE_ACL
CRM_CHECK(lrm_state->node_name, return);
crm_xml_add(request, XML_ACL_TAG_ROLE, "pacemaker-remote");
crm_acl_get_set_user(request, F_LRMD_IPC_USER, lrm_state->node_name);
#endif
/* Pacemaker Remote nodes don't know their own names (as known to the
* cluster). When getting a node info request with no name or ID, add
* the name, so we don't return info for ourselves instead of the
* Pacemaker Remote node.
*/
if (safe_str_eq(crm_element_value(request, F_CRM_TASK),
CRM_OP_NODE_INFO)) {
int node_id;
crm_element_value_int(request, XML_ATTR_ID, &node_id);
if ((node_id <= 0)
&& (crm_element_value(request, XML_ATTR_UNAME) == NULL)) {
crm_xml_add(request, XML_ATTR_UNAME, lrm_state->node_name);
}
}
crmd_proxy_dispatch(session, request);
crm_element_value_int(msg, F_LRMD_IPC_MSG_FLAGS, &flags);
if (flags & crm_ipc_client_response) {
int msg_id = 0;
xmlNode *op_reply = create_xml_node(NULL, "ack");
crm_xml_add(op_reply, "function", __FUNCTION__);
crm_xml_add_int(op_reply, "line", __LINE__);
crm_element_value_int(msg, F_LRMD_IPC_MSG_ID, &msg_id);
remote_proxy_relay_response(proxy, op_reply, msg_id);
free_xml(op_reply);
}
} else {
remote_proxy_cb(lrmd, lrm_state->node_name, msg);
}
}
int
lrm_state_remote_connect_async(lrm_state_t * lrm_state, const char *server, int port,
int timeout_ms)
{
int ret;
if (!lrm_state->conn) {
lrm_state->conn = lrmd_remote_api_new(lrm_state->node_name, server, port);
if (!lrm_state->conn) {
return -1;
}
((lrmd_t *) lrm_state->conn)->cmds->set_callback(lrm_state->conn, remote_lrm_op_callback);
lrmd_internal_set_proxy_callback(lrm_state->conn, lrm_state, crmd_remote_proxy_cb);
}
crm_trace("initiating remote connection to %s at %d with timeout %d", server, port, timeout_ms);
ret =
((lrmd_t *) lrm_state->conn)->cmds->connect_async(lrm_state->conn, lrm_state->node_name,
timeout_ms);
if (ret != pcmk_ok) {
lrm_state->num_lrm_register_fails++;
} else {
lrm_state->num_lrm_register_fails = 0;
}
return ret;
}
int
lrm_state_get_metadata(lrm_state_t * lrm_state,
const char *class,
const char *provider,
const char *agent, char **output, enum lrmd_call_options options)
{
lrmd_key_value_t *params = NULL;
if (!lrm_state->conn) {
return -ENOTCONN;
}
/* Add the node name to the environment, as is done with normal resource
* action calls. Meta-data calls shouldn't need it, but some agents are
* written with an ocf_local_nodename call at the beginning regardless of
* action. Without the environment variable, the agent would try to contact
* the controller to get the node name -- but the controller would be
* blocking on the synchronous meta-data call.
*
* At this point, we have to assume that agents are unlikely to make other
* calls that require the controller, such as crm_node --quorum or
* --cluster-id.
*
* @TODO Make meta-data calls asynchronous. (This will be part of a larger
* project to make meta-data calls via the executor rather than directly.)
*/
params = lrmd_key_value_add(params, CRM_META "_" XML_LRM_ATTR_TARGET,
lrm_state->node_name);
return ((lrmd_t *) lrm_state->conn)->cmds->get_metadata_params(lrm_state->conn,
class, provider, agent, output, options, params);
}
int
lrm_state_cancel(lrm_state_t *lrm_state, const char *rsc_id, const char *action,
guint interval_ms)
{
if (!lrm_state->conn) {
return -ENOTCONN;
}
/* Figure out a way to make this async?
* NOTICE: Currently it's synced and directly acknowledged in do_lrm_invoke(). */
if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
return remote_ra_cancel(lrm_state, rsc_id, action, interval_ms);
}
return ((lrmd_t *) lrm_state->conn)->cmds->cancel(lrm_state->conn, rsc_id,
action, interval_ms);
}
lrmd_rsc_info_t *
lrm_state_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id, enum lrmd_call_options options)
{
lrmd_rsc_info_t *rsc = NULL;
if (!lrm_state->conn) {
return NULL;
}
if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
return remote_ra_get_rsc_info(lrm_state, rsc_id);
}
rsc = g_hash_table_lookup(lrm_state->rsc_info_cache, rsc_id);
if (rsc == NULL) {
/* only contact the lrmd if we don't already have a cached rsc info */
rsc = ((lrmd_t *) lrm_state->conn)->cmds->get_rsc_info(lrm_state->conn, rsc_id, options);
if (rsc == NULL) {
return NULL;
}
/* cache the result */
g_hash_table_insert(lrm_state->rsc_info_cache, rsc->id, rsc);
}
return lrmd_copy_rsc_info(rsc);
}
int
lrm_state_exec(lrm_state_t *lrm_state, const char *rsc_id, const char *action,
const char *userdata, guint interval_ms,
int timeout, /* ms */
int start_delay, /* ms */
lrmd_key_value_t * params)
{
if (!lrm_state->conn) {
lrmd_key_value_freeall(params);
return -ENOTCONN;
}
if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
return remote_ra_exec(lrm_state, rsc_id, action, userdata, interval_ms,
timeout, start_delay, params);
}
return ((lrmd_t *) lrm_state->conn)->cmds->exec(lrm_state->conn,
rsc_id,
action,
userdata,
interval_ms,
timeout,
start_delay,
lrmd_opt_notify_changes_only, params);
}
int
lrm_state_register_rsc(lrm_state_t * lrm_state,
const char *rsc_id,
const char *class,
const char *provider, const char *agent, enum lrmd_call_options options)
{
lrmd_t *conn = (lrmd_t *) lrm_state->conn;
if (conn == NULL) {
return -ENOTCONN;
}
if (is_remote_lrmd_ra(agent, provider, NULL)) {
return lrm_state_find_or_create(rsc_id)? pcmk_ok : -EINVAL;
}
/* @TODO Implement an asynchronous version of this (currently a blocking
* call to the lrmd).
*/
return conn->cmds->register_rsc(lrm_state->conn, rsc_id, class, provider,
agent, options);
}
int
lrm_state_unregister_rsc(lrm_state_t * lrm_state,
const char *rsc_id, enum lrmd_call_options options)
{
if (!lrm_state->conn) {
return -ENOTCONN;
}
if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
lrm_state_destroy(rsc_id);
return pcmk_ok;
}
g_hash_table_remove(lrm_state->rsc_info_cache, rsc_id);
/* @TODO Optimize this ... this function is a blocking round trip from
* client to daemon. The controld_execd_state.c code path that uses this
* function should always treat it as an async operation. The executor API
* should make an async version available.
*/
return ((lrmd_t *) lrm_state->conn)->cmds->unregister_rsc(lrm_state->conn, rsc_id, options);
}
/*
* Functions for sending alerts via local executor connection
*/
static GListPtr crmd_alert_list = NULL;
void
crmd_unpack_alerts(xmlNode *alerts)
{
pe_free_alert_list(crmd_alert_list);
crmd_alert_list = pe_unpack_alerts(alerts);
}
void
crmd_alert_node_event(crm_node_t *node)
{
lrm_state_t *lrm_state;
if (crmd_alert_list == NULL) {
return;
}
lrm_state = lrm_state_find(fsa_our_uname);
if (lrm_state == NULL) {
return;
}
lrmd_send_node_alert((lrmd_t *) lrm_state->conn, crmd_alert_list,
node->uname, node->id, node->state);
}
void
crmd_alert_fencing_op(stonith_event_t * e)
{
char *desc;
lrm_state_t *lrm_state;
if (crmd_alert_list == NULL) {
return;
}
lrm_state = lrm_state_find(fsa_our_uname);
if (lrm_state == NULL) {
return;
}
desc = crm_strdup_printf("Operation %s of %s by %s for %s@%s: %s (ref=%s)",
e->action, e->target,
(e->executioner? e->executioner : ""),
e->client_origin, e->origin,
pcmk_strerror(e->result), e->id);
lrmd_send_fencing_alert((lrmd_t *) lrm_state->conn, crmd_alert_list,
e->target, e->operation, desc, e->result);
free(desc);
}
void
crmd_alert_resource_op(const char *node, lrmd_event_data_t * op)
{
lrm_state_t *lrm_state;
if (crmd_alert_list == NULL) {
return;
}
lrm_state = lrm_state_find(fsa_our_uname);
if (lrm_state == NULL) {
return;
}
lrmd_send_resource_alert((lrmd_t *) lrm_state->conn, crmd_alert_list, node,
op);
}
diff --git a/daemons/controld/controld_lrm.h b/daemons/controld/controld_lrm.h
index a4965057df..598682b9aa 100644
--- a/daemons/controld/controld_lrm.h
+++ b/daemons/controld/controld_lrm.h
@@ -1,165 +1,167 @@
/*
- * Copyright 2004-2018 Andrew Beekhof
+ * Copyright 2004-2019 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include
#include
extern gboolean verify_stopped(enum crmd_fsa_state cur_state, int log_level);
void lrm_clear_last_failure(const char *rsc_id, const char *node_name,
const char *operation, guint interval_ms);
void lrm_op_callback(lrmd_event_data_t * op);
lrmd_t *crmd_local_lrmd_conn(void);
typedef struct resource_history_s {
char *id;
uint32_t last_callid;
lrmd_rsc_info_t rsc;
lrmd_event_data_t *last;
lrmd_event_data_t *failed;
GList *recurring_op_list;
/* Resources must be stopped using the same
* parameters they were started with. This hashtable
* holds the parameters that should be used for the next stop
* cmd on this resource. */
GHashTable *stop_params;
} rsc_history_t;
void history_free(gpointer data);
/* TODO - Replace this with lrmd_event_data_t */
struct recurring_op_s {
guint interval_ms;
int call_id;
gboolean remove;
gboolean cancelled;
- unsigned int start_time;
+ time_t start_time;
char *rsc_id;
char *op_type;
char *op_key;
char *user_data;
GHashTable *params;
};
typedef struct lrm_state_s {
const char *node_name;
void *conn; // Reserved for controld_execd_state.c usage
void *remote_ra_data; // Reserved for controld_remote_ra.c usage
GHashTable *resource_history;
GHashTable *pending_ops;
GHashTable *deletion_ops;
GHashTable *rsc_info_cache;
GHashTable *metadata_cache; // key = class[:provider]:agent, value = ra_metadata_s
int num_lrm_register_fails;
} lrm_state_t;
struct pending_deletion_op_s {
char *rsc;
ha_msg_input_t *input;
};
/*!
* \brief Check whether this the local IPC connection to the executor
*/
gboolean
lrm_state_is_local(lrm_state_t *lrm_state);
/*!
* \brief Clear all state information from a single state entry.
* \note It sometimes useful to save metadata cache when it won't go stale.
* \note This does not close the executor connection
*/
void lrm_state_reset_tables(lrm_state_t * lrm_state, gboolean reset_metadata);
GList *lrm_state_get_list(void);
/*!
* \brief Initiate internal state tables
*/
gboolean lrm_state_init_local(void);
/*!
* \brief Destroy all state entries and internal state tables
*/
void lrm_state_destroy_all(void);
/*!
* \brief Create executor connection entry
*/
lrm_state_t *lrm_state_create(const char *node_name);
/*!
* \brief Destroy executor connection by node name
*/
void lrm_state_destroy(const char *node_name);
/*!
* \brief Find lrm_state data by node name
*/
lrm_state_t *lrm_state_find(const char *node_name);
/*!
* \brief Either find or create a new entry
*/
lrm_state_t *lrm_state_find_or_create(const char *node_name);
/*!
* The functions below are wrappers for the executor API the the controller
* uses. These wrapper functions allow us to treat the controller's remote
* executor connection resources the same as regular resources. Internally,
* regular resources go to the executor, and remote connection resources are
* handled locally in the controller.
*/
void lrm_state_disconnect_only(lrm_state_t * lrm_state);
void lrm_state_disconnect(lrm_state_t * lrm_state);
int lrm_state_ipc_connect(lrm_state_t * lrm_state);
int lrm_state_remote_connect_async(lrm_state_t * lrm_state, const char *server, int port,
int timeout);
int lrm_state_is_connected(lrm_state_t * lrm_state);
int lrm_state_poke_connection(lrm_state_t * lrm_state);
int lrm_state_get_metadata(lrm_state_t * lrm_state,
const char *class,
const char *provider,
const char *agent, char **output, enum lrmd_call_options options);
int lrm_state_cancel(lrm_state_t *lrm_state, const char *rsc_id,
const char *action, guint interval_ms);
int lrm_state_exec(lrm_state_t *lrm_state, const char *rsc_id,
const char *action, const char *userdata, guint interval_ms,
int timeout, /* ms */
int start_delay, /* ms */
lrmd_key_value_t * params);
lrmd_rsc_info_t *lrm_state_get_rsc_info(lrm_state_t * lrm_state,
const char *rsc_id, enum lrmd_call_options options);
int lrm_state_register_rsc(lrm_state_t * lrm_state,
const char *rsc_id,
const char *class,
const char *provider, const char *agent, enum lrmd_call_options options);
int lrm_state_unregister_rsc(lrm_state_t * lrm_state,
const char *rsc_id, enum lrmd_call_options options);
// Functions used to manage remote executor connection resources
void remote_lrm_op_callback(lrmd_event_data_t * op);
gboolean is_remote_lrmd_ra(const char *agent, const char *provider, const char *id);
lrmd_rsc_info_t *remote_ra_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id);
int remote_ra_cancel(lrm_state_t *lrm_state, const char *rsc_id,
const char *action, guint interval_ms);
int remote_ra_exec(lrm_state_t *lrm_state, const char *rsc_id,
const char *action, const char *userdata, guint interval_ms,
int timeout, /* ms */
int start_delay, /* ms */
lrmd_key_value_t * params);
void remote_ra_cleanup(lrm_state_t * lrm_state);
void remote_ra_fail(const char *node_name);
void remote_ra_process_pseudo(xmlNode *xml);
gboolean remote_ra_is_in_maintenance(lrm_state_t * lrm_state);
void remote_ra_process_maintenance_nodes(xmlNode *xml);
gboolean remote_ra_controlling_guest(lrm_state_t * lrm_state);
void process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op,
struct recurring_op_s *pending, xmlNode *action_xml);
diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c
index 0cf5048053..4fbae45bd8 100644
--- a/daemons/controld/controld_remote_ra.c
+++ b/daemons/controld/controld_remote_ra.c
@@ -1,1295 +1,1295 @@
/*
* Copyright 2013-2019 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#define REMOTE_LRMD_RA "remote"
/* The max start timeout before cmd retry */
#define MAX_START_TIMEOUT_MS 10000
typedef struct remote_ra_cmd_s {
/*! the local node the cmd is issued from */
char *owner;
/*! the remote node the cmd is executed on */
char *rsc_id;
/*! the action to execute */
char *action;
/*! some string the client wants us to give it back */
char *userdata;
char *exit_reason; // descriptive text on error
/*! start delay in ms */
int start_delay;
/*! timer id used for start delay. */
int delay_id;
/*! timeout in ms for cmd */
int timeout;
int remaining_timeout;
/*! recurring interval in ms */
guint interval_ms;
/*! interval timer id */
int interval_id;
int reported_success;
int monitor_timeout_id;
int takeover_timeout_id;
/*! action parameters */
lrmd_key_value_t *params;
/*! executed rc */
int rc;
int op_status;
int call_id;
time_t start_time;
gboolean cancel;
} remote_ra_cmd_t;
enum remote_migration_status {
expect_takeover = 1,
takeover_complete,
};
typedef struct remote_ra_data_s {
crm_trigger_t *work;
remote_ra_cmd_t *cur_cmd;
GList *cmds;
GList *recurring_cmds;
enum remote_migration_status migrate_status;
gboolean active;
/* Maintenance mode is difficult to determine from the controller's context,
* so we have it signalled back with the transition from the scheduler.
*/
gboolean is_maintenance;
/* Similar for whether we are controlling a guest node or remote node.
* Fortunately there is a meta-attribute in the transition already and
* as the situation doesn't change over time we can use the
* resource start for noting down the information for later use when
* the attributes aren't at hand.
*/
gboolean controlling_guest;
} remote_ra_data_t;
static int handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms);
static void handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd);
static GList *fail_all_monitor_cmds(GList * list);
static void
free_cmd(gpointer user_data)
{
remote_ra_cmd_t *cmd = user_data;
if (!cmd) {
return;
}
if (cmd->delay_id) {
g_source_remove(cmd->delay_id);
}
if (cmd->interval_id) {
g_source_remove(cmd->interval_id);
}
if (cmd->monitor_timeout_id) {
g_source_remove(cmd->monitor_timeout_id);
}
if (cmd->takeover_timeout_id) {
g_source_remove(cmd->takeover_timeout_id);
}
free(cmd->owner);
free(cmd->rsc_id);
free(cmd->action);
free(cmd->userdata);
free(cmd->exit_reason);
lrmd_key_value_freeall(cmd->params);
free(cmd);
}
static int
generate_callid(void)
{
static int remote_ra_callid = 0;
remote_ra_callid++;
if (remote_ra_callid <= 0) {
remote_ra_callid = 1;
}
return remote_ra_callid;
}
static gboolean
recurring_helper(gpointer data)
{
remote_ra_cmd_t *cmd = data;
lrm_state_t *connection_rsc = NULL;
cmd->interval_id = 0;
connection_rsc = lrm_state_find(cmd->rsc_id);
if (connection_rsc && connection_rsc->remote_ra_data) {
remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
ra_data->recurring_cmds = g_list_remove(ra_data->recurring_cmds, cmd);
ra_data->cmds = g_list_append(ra_data->cmds, cmd);
mainloop_set_trigger(ra_data->work);
}
return FALSE;
}
static gboolean
start_delay_helper(gpointer data)
{
remote_ra_cmd_t *cmd = data;
lrm_state_t *connection_rsc = NULL;
cmd->delay_id = 0;
connection_rsc = lrm_state_find(cmd->rsc_id);
if (connection_rsc && connection_rsc->remote_ra_data) {
remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
mainloop_set_trigger(ra_data->work);
}
return FALSE;
}
/*!
* \internal
* \brief Handle cluster communication related to pacemaker_remote node joining
*
* \param[in] node_name Name of newly integrated pacemaker_remote node
*/
static void
remote_node_up(const char *node_name)
{
int call_opt, call_id = 0;
xmlNode *update, *state;
crm_node_t *node;
CRM_CHECK(node_name != NULL, return);
crm_info("Announcing pacemaker_remote node %s", node_name);
/* Clear node's operation history. The node's transient attributes should
* and normally will be cleared when the node leaves, but since remote node
* state has a number of corner cases, clear them here as well, to be sure.
*/
call_opt = crmd_cib_smart_opt();
erase_status_tag(node_name, XML_CIB_TAG_LRM, call_opt);
erase_status_tag(node_name, XML_TAG_TRANSIENT_NODEATTRS, call_opt);
/* Clear node's probed attribute */
update_attrd(node_name, CRM_OP_PROBED, NULL, NULL, TRUE);
/* Ensure node is in the remote peer cache with member status */
node = crm_remote_peer_get(node_name);
CRM_CHECK(node != NULL, return);
crm_update_peer_state(__FUNCTION__, node, CRM_NODE_MEMBER, 0);
/* pacemaker_remote nodes don't participate in the membership layer,
* so cluster nodes don't automatically get notified when they come and go.
* We send a cluster message to the DC, and update the CIB node state entry,
* so the DC will get it sooner (via message) or later (via CIB refresh),
* and any other interested parties can query the CIB.
*/
send_remote_state_message(node_name, TRUE);
update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
state = create_node_state_update(node, node_update_cluster, update,
__FUNCTION__);
/* Clear the XML_NODE_IS_FENCED flag in the node state. If the node ever
* needs to be fenced, this flag will allow various actions to determine
* whether the fencing has happened yet.
*/
crm_xml_add(state, XML_NODE_IS_FENCED, "0");
/* TODO: If the remote connection drops, and this (async) CIB update either
* failed or has not yet completed, later actions could mistakenly think the
* node has already been fenced (if the XML_NODE_IS_FENCED attribute was
* previously set, because it won't have been cleared). This could prevent
* actual fencing or allow recurring monitor failures to be cleared too
* soon. Ideally, we wouldn't rely on the CIB for the fenced status.
*/
fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
if (call_id < 0) {
crm_perror(LOG_WARNING, "%s CIB node state setup", node_name);
}
free_xml(update);
}
enum down_opts {
DOWN_KEEP_LRM,
DOWN_ERASE_LRM
};
/*!
* \internal
* \brief Handle cluster communication related to pacemaker_remote node leaving
*
* \param[in] node_name Name of lost node
* \param[in] opts Whether to keep or erase LRM history
*/
static void
remote_node_down(const char *node_name, const enum down_opts opts)
{
xmlNode *update;
int call_id = 0;
int call_opt = crmd_cib_smart_opt();
crm_node_t *node;
/* Purge node from attrd's memory */
update_attrd_remote_node_removed(node_name, NULL);
/* Purge node's transient attributes */
erase_status_tag(node_name, XML_TAG_TRANSIENT_NODEATTRS, call_opt);
/* Normally, the LRM operation history should be kept until the node comes
* back up. However, after a successful fence, we want to clear it, so we
* don't think resources are still running on the node.
*/
if (opts == DOWN_ERASE_LRM) {
erase_status_tag(node_name, XML_CIB_TAG_LRM, call_opt);
}
/* Ensure node is in the remote peer cache with lost state */
node = crm_remote_peer_get(node_name);
CRM_CHECK(node != NULL, return);
crm_update_peer_state(__FUNCTION__, node, CRM_NODE_LOST, 0);
/* Notify DC */
send_remote_state_message(node_name, FALSE);
/* Update CIB node state */
update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
create_node_state_update(node, node_update_cluster, update, __FUNCTION__);
fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
if (call_id < 0) {
crm_perror(LOG_ERR, "%s CIB node state update", node_name);
}
free_xml(update);
}
/*!
* \internal
* \brief Handle effects of a remote RA command on node state
*
* \param[in] cmd Completed remote RA command
*/
static void
check_remote_node_state(remote_ra_cmd_t *cmd)
{
/* Only successful actions can change node state */
if (cmd->rc != PCMK_OCF_OK) {
return;
}
if (safe_str_eq(cmd->action, "start")) {
remote_node_up(cmd->rsc_id);
} else if (safe_str_eq(cmd->action, "migrate_from")) {
/* After a successful migration, we don't need to do remote_node_up()
* because the DC already knows the node is up, and we don't want to
* clear LRM history etc. We do need to add the remote node to this
* host's remote peer cache, because (unless it happens to be DC)
* it hasn't been tracking the remote node, and other code relies on
* the cache to distinguish remote nodes from unseen cluster nodes.
*/
crm_node_t *node = crm_remote_peer_get(cmd->rsc_id);
CRM_CHECK(node != NULL, return);
crm_update_peer_state(__FUNCTION__, node, CRM_NODE_MEMBER, 0);
} else if (safe_str_eq(cmd->action, "stop")) {
lrm_state_t *lrm_state = lrm_state_find(cmd->rsc_id);
remote_ra_data_t *ra_data = lrm_state? lrm_state->remote_ra_data : NULL;
if (ra_data) {
if (ra_data->migrate_status != takeover_complete) {
/* Stop means down if we didn't successfully migrate elsewhere */
remote_node_down(cmd->rsc_id, DOWN_KEEP_LRM);
} else if (AM_I_DC == FALSE) {
/* Only the connection host and DC track node state,
* so if the connection migrated elsewhere and we aren't DC,
* un-cache the node, so we don't have stale info
*/
crm_remote_peer_cache_remove(cmd->rsc_id);
}
}
}
/* We don't do anything for successful monitors, which is correct for
* routine recurring monitors, and for monitors on nodes where the
* connection isn't supposed to be (the cluster will stop the connection in
* that case). However, if the initial probe finds the connection already
* active on the node where we want it, we probably should do
* remote_node_up(). Unfortunately, we can't distinguish that case here.
* Given that connections have to be initiated by the cluster, the chance of
* that should be close to zero.
*/
}
static void
report_remote_ra_result(remote_ra_cmd_t * cmd)
{
lrmd_event_data_t op = { 0, };
check_remote_node_state(cmd);
op.type = lrmd_event_exec_complete;
op.rsc_id = cmd->rsc_id;
op.op_type = cmd->action;
op.user_data = cmd->userdata;
op.exit_reason = cmd->exit_reason;
op.timeout = cmd->timeout;
op.interval_ms = cmd->interval_ms;
op.rc = cmd->rc;
op.op_status = cmd->op_status;
- op.t_run = cmd->start_time;
- op.t_rcchange = cmd->start_time;
+ op.t_run = (unsigned int) cmd->start_time;
+ op.t_rcchange = (unsigned int) cmd->start_time;
if (cmd->reported_success && cmd->rc != PCMK_OCF_OK) {
- op.t_rcchange = time(NULL);
+ op.t_rcchange = (unsigned int) time(NULL);
/* This edge case will likely never ever occur, but if it does the
* result is that a failure will not be processed correctly. This is only
* remotely possible because we are able to detect a connection resource's tcp
* connection has failed at any moment after start has completed. The actual
* recurring operation is just a connectivity ping.
*
* basically, we are not guaranteed that the first successful monitor op and
* a subsequent failed monitor op will not occur in the same timestamp. We have to
* make it look like the operations occurred at separate times though. */
if (op.t_rcchange == op.t_run) {
op.t_rcchange++;
}
}
if (cmd->params) {
lrmd_key_value_t *tmp;
op.params = crm_str_table_new();
for (tmp = cmd->params; tmp; tmp = tmp->next) {
g_hash_table_insert(op.params, strdup(tmp->key), strdup(tmp->value));
}
}
op.call_id = cmd->call_id;
op.remote_nodename = cmd->owner;
lrm_op_callback(&op);
if (op.params) {
g_hash_table_destroy(op.params);
}
}
static void
update_remaining_timeout(remote_ra_cmd_t * cmd)
{
cmd->remaining_timeout = ((cmd->timeout / 1000) - (time(NULL) - cmd->start_time)) * 1000;
}
static gboolean
retry_start_cmd_cb(gpointer data)
{
lrm_state_t *lrm_state = data;
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
remote_ra_cmd_t *cmd = NULL;
int rc = -1;
if (!ra_data || !ra_data->cur_cmd) {
return FALSE;
}
cmd = ra_data->cur_cmd;
if (safe_str_neq(cmd->action, "start") && safe_str_neq(cmd->action, "migrate_from")) {
return FALSE;
}
update_remaining_timeout(cmd);
if (cmd->remaining_timeout > 0) {
rc = handle_remote_ra_start(lrm_state, cmd, cmd->remaining_timeout);
}
if (rc != 0) {
cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
cmd->op_status = PCMK_LRM_OP_ERROR;
report_remote_ra_result(cmd);
if (ra_data->cmds) {
mainloop_set_trigger(ra_data->work);
}
ra_data->cur_cmd = NULL;
free_cmd(cmd);
} else {
/* wait for connection event */
}
return FALSE;
}
static gboolean
connection_takeover_timeout_cb(gpointer data)
{
lrm_state_t *lrm_state = NULL;
remote_ra_cmd_t *cmd = data;
crm_info("takeover event timed out for node %s", cmd->rsc_id);
cmd->takeover_timeout_id = 0;
lrm_state = lrm_state_find(cmd->rsc_id);
handle_remote_ra_stop(lrm_state, cmd);
free_cmd(cmd);
return FALSE;
}
static gboolean
monitor_timeout_cb(gpointer data)
{
lrm_state_t *lrm_state = NULL;
remote_ra_cmd_t *cmd = data;
lrm_state = lrm_state_find(cmd->rsc_id);
crm_info("Timed out waiting for remote poke response from %s%s",
cmd->rsc_id, (lrm_state? "" : " (no LRM state)"));
cmd->monitor_timeout_id = 0;
cmd->op_status = PCMK_LRM_OP_TIMEOUT;
cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
if (lrm_state && lrm_state->remote_ra_data) {
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
if (ra_data->cur_cmd == cmd) {
ra_data->cur_cmd = NULL;
}
if (ra_data->cmds) {
mainloop_set_trigger(ra_data->work);
}
}
report_remote_ra_result(cmd);
free_cmd(cmd);
if(lrm_state) {
lrm_state_disconnect(lrm_state);
}
return FALSE;
}
static void
synthesize_lrmd_success(lrm_state_t *lrm_state, const char *rsc_id, const char *op_type)
{
lrmd_event_data_t op = { 0, };
if (lrm_state == NULL) {
/* if lrm_state not given assume local */
lrm_state = lrm_state_find(fsa_our_uname);
}
CRM_ASSERT(lrm_state != NULL);
op.type = lrmd_event_exec_complete;
op.rsc_id = rsc_id;
op.op_type = op_type;
op.rc = PCMK_OCF_OK;
op.op_status = PCMK_LRM_OP_DONE;
- op.t_run = time(NULL);
+ op.t_run = (unsigned int) time(NULL);
op.t_rcchange = op.t_run;
op.call_id = generate_callid();
process_lrm_event(lrm_state, &op, NULL, NULL);
}
void
remote_lrm_op_callback(lrmd_event_data_t * op)
{
gboolean cmd_handled = FALSE;
lrm_state_t *lrm_state = NULL;
remote_ra_data_t *ra_data = NULL;
remote_ra_cmd_t *cmd = NULL;
crm_debug("Processing '%s%s%s' event on remote connection to %s: %s "
"(%d) status=%s (%d)",
(op->op_type? op->op_type : ""), (op->op_type? " " : ""),
lrmd_event_type2str(op->type), op->remote_nodename,
services_ocf_exitcode_str(op->rc), op->rc,
services_lrm_status_str(op->op_status), op->op_status);
lrm_state = lrm_state_find(op->remote_nodename);
if (!lrm_state || !lrm_state->remote_ra_data) {
crm_debug("No state information found for remote connection event");
return;
}
ra_data = lrm_state->remote_ra_data;
if (op->type == lrmd_event_new_client) {
// Another client has connected to the remote daemon
if (ra_data->migrate_status == expect_takeover) {
// Great, we knew this was coming
ra_data->migrate_status = takeover_complete;
} else {
crm_err("Unexpected pacemaker_remote client takeover for %s. Disconnecting", op->remote_nodename);
/* In this case, lrmd_tls_connection_destroy() will be called under the control of mainloop. */
/* Do not free lrm_state->conn yet. */
/* It'll be freed in the following stop action. */
lrm_state_disconnect_only(lrm_state);
}
return;
}
/* filter all EXEC events up */
if (op->type == lrmd_event_exec_complete) {
if (ra_data->migrate_status == takeover_complete) {
crm_debug("ignoring event, this connection is taken over by another node");
} else {
lrm_op_callback(op);
}
return;
}
if ((op->type == lrmd_event_disconnect) && (ra_data->cur_cmd == NULL)) {
if (ra_data->active == FALSE) {
crm_debug("Disconnection from Pacemaker Remote node %s complete",
lrm_state->node_name);
} else if (!remote_ra_is_in_maintenance(lrm_state)) {
crm_err("Lost connection to Pacemaker Remote node %s",
lrm_state->node_name);
ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
} else {
crm_notice("Unmanaged Pacemaker Remote node %s disconnected",
lrm_state->node_name);
/* Do roughly what a 'stop' on the remote-resource would do */
handle_remote_ra_stop(lrm_state, NULL);
remote_node_down(lrm_state->node_name, DOWN_KEEP_LRM);
/* now fake the reply of a successful 'stop' */
synthesize_lrmd_success(NULL, lrm_state->node_name, "stop");
}
return;
}
if (!ra_data->cur_cmd) {
crm_debug("no event to match");
return;
}
cmd = ra_data->cur_cmd;
/* Start actions and migrate from actions complete after connection
* comes back to us. */
if (op->type == lrmd_event_connect && (safe_str_eq(cmd->action, "start") ||
safe_str_eq(cmd->action, "migrate_from"))) {
if (op->connection_rc < 0) {
update_remaining_timeout(cmd);
if (op->connection_rc == -ENOKEY) {
// Hard error, don't retry
cmd->op_status = PCMK_LRM_OP_ERROR;
cmd->rc = PCMK_OCF_INVALID_PARAM;
cmd->exit_reason = strdup("Authentication key not readable");
} else if (cmd->remaining_timeout > 3000) {
crm_trace("rescheduling start, remaining timeout %d", cmd->remaining_timeout);
g_timeout_add(1000, retry_start_cmd_cb, lrm_state);
return;
} else {
crm_trace("can't reschedule start, remaining timeout too small %d",
cmd->remaining_timeout);
cmd->op_status = PCMK_LRM_OP_TIMEOUT;
cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
}
} else {
lrm_state_reset_tables(lrm_state, TRUE);
cmd->rc = PCMK_OCF_OK;
cmd->op_status = PCMK_LRM_OP_DONE;
ra_data->active = TRUE;
}
crm_debug("Remote connection event matched %s action", cmd->action);
report_remote_ra_result(cmd);
cmd_handled = TRUE;
} else if (op->type == lrmd_event_poke && safe_str_eq(cmd->action, "monitor")) {
if (cmd->monitor_timeout_id) {
g_source_remove(cmd->monitor_timeout_id);
cmd->monitor_timeout_id = 0;
}
/* Only report success the first time, after that only worry about failures.
* For this function, if we get the poke pack, it is always a success. Pokes
* only fail if the send fails, or the response times out. */
if (!cmd->reported_success) {
cmd->rc = PCMK_OCF_OK;
cmd->op_status = PCMK_LRM_OP_DONE;
report_remote_ra_result(cmd);
cmd->reported_success = 1;
}
crm_debug("Remote poke event matched %s action", cmd->action);
/* success, keep rescheduling if interval is present. */
if (cmd->interval_ms && (cmd->cancel == FALSE)) {
ra_data->recurring_cmds = g_list_append(ra_data->recurring_cmds, cmd);
cmd->interval_id = g_timeout_add(cmd->interval_ms,
recurring_helper, cmd);
cmd = NULL; /* prevent free */
}
cmd_handled = TRUE;
} else if (op->type == lrmd_event_disconnect && safe_str_eq(cmd->action, "monitor")) {
if (ra_data->active == TRUE && (cmd->cancel == FALSE)) {
cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
cmd->op_status = PCMK_LRM_OP_ERROR;
report_remote_ra_result(cmd);
crm_err("Remote connection to %s unexpectedly dropped during monitor",
lrm_state->node_name);
}
cmd_handled = TRUE;
} else if (op->type == lrmd_event_new_client && safe_str_eq(cmd->action, "stop")) {
handle_remote_ra_stop(lrm_state, cmd);
cmd_handled = TRUE;
} else {
crm_debug("Event did not match %s action", ra_data->cur_cmd->action);
}
if (cmd_handled) {
ra_data->cur_cmd = NULL;
if (ra_data->cmds) {
mainloop_set_trigger(ra_data->work);
}
free_cmd(cmd);
}
}
static void
handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd)
{
remote_ra_data_t *ra_data = NULL;
CRM_ASSERT(lrm_state);
ra_data = lrm_state->remote_ra_data;
if (ra_data->migrate_status != takeover_complete) {
/* delete pending ops when ever the remote connection is intentionally stopped */
g_hash_table_remove_all(lrm_state->pending_ops);
} else {
/* we no longer hold the history if this connection has been migrated,
* however, we keep metadata cache for future use */
lrm_state_reset_tables(lrm_state, FALSE);
}
ra_data->active = FALSE;
lrm_state_disconnect(lrm_state);
if (ra_data->cmds) {
g_list_free_full(ra_data->cmds, free_cmd);
}
if (ra_data->recurring_cmds) {
g_list_free_full(ra_data->recurring_cmds, free_cmd);
}
ra_data->cmds = NULL;
ra_data->recurring_cmds = NULL;
ra_data->cur_cmd = NULL;
if (cmd) {
cmd->rc = PCMK_OCF_OK;
cmd->op_status = PCMK_LRM_OP_DONE;
report_remote_ra_result(cmd);
}
}
static int
handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms)
{
const char *server = NULL;
lrmd_key_value_t *tmp = NULL;
int port = 0;
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
int timeout_used = timeout_ms > MAX_START_TIMEOUT_MS ? MAX_START_TIMEOUT_MS : timeout_ms;
for (tmp = cmd->params; tmp; tmp = tmp->next) {
if (safe_str_eq(tmp->key, XML_RSC_ATTR_REMOTE_RA_ADDR) ||
safe_str_eq(tmp->key, XML_RSC_ATTR_REMOTE_RA_SERVER)) {
server = tmp->value;
} else if (safe_str_eq(tmp->key, XML_RSC_ATTR_REMOTE_RA_PORT)) {
port = atoi(tmp->value);
} else if (safe_str_eq(tmp->key, CRM_META"_"XML_RSC_ATTR_CONTAINER)) {
ra_data->controlling_guest = TRUE;
}
}
return lrm_state_remote_connect_async(lrm_state, server, port, timeout_used);
}
static gboolean
handle_remote_ra_exec(gpointer user_data)
{
int rc = 0;
lrm_state_t *lrm_state = user_data;
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
remote_ra_cmd_t *cmd;
GList *first = NULL;
if (ra_data->cur_cmd) {
/* still waiting on previous cmd */
return TRUE;
}
while (ra_data->cmds) {
first = ra_data->cmds;
cmd = first->data;
if (cmd->delay_id) {
/* still waiting for start delay timer to trip */
return TRUE;
}
ra_data->cmds = g_list_remove_link(ra_data->cmds, first);
g_list_free_1(first);
if (!strcmp(cmd->action, "start") || !strcmp(cmd->action, "migrate_from")) {
ra_data->migrate_status = 0;
rc = handle_remote_ra_start(lrm_state, cmd, cmd->timeout);
if (rc == 0) {
/* take care of this later when we get async connection result */
crm_debug("Initiated async remote connection, %s action will complete after connect event",
cmd->action);
ra_data->cur_cmd = cmd;
return TRUE;
} else {
crm_debug("Could not initiate remote connection for %s action",
cmd->action);
cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
cmd->op_status = PCMK_LRM_OP_ERROR;
}
report_remote_ra_result(cmd);
} else if (!strcmp(cmd->action, "monitor")) {
if (lrm_state_is_connected(lrm_state) == TRUE) {
rc = lrm_state_poke_connection(lrm_state);
if (rc < 0) {
cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
cmd->op_status = PCMK_LRM_OP_ERROR;
}
} else {
rc = -1;
cmd->op_status = PCMK_LRM_OP_DONE;
cmd->rc = PCMK_OCF_NOT_RUNNING;
}
if (rc == 0) {
crm_debug("Poked Pacemaker Remote at node %s, waiting for async response",
cmd->rsc_id);
ra_data->cur_cmd = cmd;
cmd->monitor_timeout_id = g_timeout_add(cmd->timeout, monitor_timeout_cb, cmd);
return TRUE;
}
report_remote_ra_result(cmd);
} else if (!strcmp(cmd->action, "stop")) {
if (ra_data->migrate_status == expect_takeover) {
/* briefly wait on stop for the takeover event to occur. If the
* takeover event does not occur during the wait period, that's fine.
* It just means that the remote-node's lrm_status section is going to get
* cleared which will require all the resources running in the remote-node
* to be explicitly re-detected via probe actions. If the takeover does occur
* successfully, then we can leave the status section intact. */
cmd->takeover_timeout_id = g_timeout_add((cmd->timeout/2), connection_takeover_timeout_cb, cmd);
ra_data->cur_cmd = cmd;
return TRUE;
}
handle_remote_ra_stop(lrm_state, cmd);
} else if (!strcmp(cmd->action, "migrate_to")) {
ra_data->migrate_status = expect_takeover;
cmd->rc = PCMK_OCF_OK;
cmd->op_status = PCMK_LRM_OP_DONE;
report_remote_ra_result(cmd);
} else if (!strcmp(cmd->action, "reload")) {
/* reloads are a no-op right now, add logic here when they become important */
cmd->rc = PCMK_OCF_OK;
cmd->op_status = PCMK_LRM_OP_DONE;
report_remote_ra_result(cmd);
}
free_cmd(cmd);
}
return TRUE;
}
static void
remote_ra_data_init(lrm_state_t * lrm_state)
{
remote_ra_data_t *ra_data = NULL;
if (lrm_state->remote_ra_data) {
return;
}
ra_data = calloc(1, sizeof(remote_ra_data_t));
ra_data->work = mainloop_add_trigger(G_PRIORITY_HIGH, handle_remote_ra_exec, lrm_state);
lrm_state->remote_ra_data = ra_data;
}
void
remote_ra_cleanup(lrm_state_t * lrm_state)
{
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
if (!ra_data) {
return;
}
if (ra_data->cmds) {
g_list_free_full(ra_data->cmds, free_cmd);
}
if (ra_data->recurring_cmds) {
g_list_free_full(ra_data->recurring_cmds, free_cmd);
}
mainloop_destroy_trigger(ra_data->work);
free(ra_data);
lrm_state->remote_ra_data = NULL;
}
gboolean
is_remote_lrmd_ra(const char *agent, const char *provider, const char *id)
{
if (agent && provider && !strcmp(agent, REMOTE_LRMD_RA) && !strcmp(provider, "pacemaker")) {
return TRUE;
}
if (id && lrm_state_find(id) && safe_str_neq(id, fsa_our_uname)) {
return TRUE;
}
return FALSE;
}
lrmd_rsc_info_t *
remote_ra_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id)
{
lrmd_rsc_info_t *info = NULL;
if ((lrm_state_find(rsc_id))) {
info = calloc(1, sizeof(lrmd_rsc_info_t));
info->id = strdup(rsc_id);
info->type = strdup(REMOTE_LRMD_RA);
info->standard = strdup(PCMK_RESOURCE_CLASS_OCF);
info->provider = strdup("pacemaker");
}
return info;
}
static gboolean
is_remote_ra_supported_action(const char *action)
{
if (!action) {
return FALSE;
} else if (strcmp(action, "start") &&
strcmp(action, "stop") &&
strcmp(action, "reload") &&
strcmp(action, "migrate_to") &&
strcmp(action, "migrate_from") && strcmp(action, "monitor")) {
return FALSE;
}
return TRUE;
}
static GList *
fail_all_monitor_cmds(GList * list)
{
GList *rm_list = NULL;
remote_ra_cmd_t *cmd = NULL;
GListPtr gIter = NULL;
for (gIter = list; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
if ((cmd->interval_ms > 0) && safe_str_eq(cmd->action, "monitor")) {
rm_list = g_list_append(rm_list, cmd);
}
}
for (gIter = rm_list; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
cmd->op_status = PCMK_LRM_OP_ERROR;
crm_trace("Pre-emptively failing %s %s (interval=%u, %s)",
cmd->action, cmd->rsc_id, cmd->interval_ms, cmd->userdata);
report_remote_ra_result(cmd);
list = g_list_remove(list, cmd);
free_cmd(cmd);
}
/* frees only the list data, not the cmds */
g_list_free(rm_list);
return list;
}
static GList *
remove_cmd(GList * list, const char *action, guint interval_ms)
{
remote_ra_cmd_t *cmd = NULL;
GListPtr gIter = NULL;
for (gIter = list; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
if ((cmd->interval_ms == interval_ms)
&& safe_str_eq(cmd->action, action)) {
break;
}
cmd = NULL;
}
if (cmd) {
list = g_list_remove(list, cmd);
free_cmd(cmd);
}
return list;
}
int
remote_ra_cancel(lrm_state_t *lrm_state, const char *rsc_id,
const char *action, guint interval_ms)
{
lrm_state_t *connection_rsc = NULL;
remote_ra_data_t *ra_data = NULL;
connection_rsc = lrm_state_find(rsc_id);
if (!connection_rsc || !connection_rsc->remote_ra_data) {
return -EINVAL;
}
ra_data = connection_rsc->remote_ra_data;
ra_data->cmds = remove_cmd(ra_data->cmds, action, interval_ms);
ra_data->recurring_cmds = remove_cmd(ra_data->recurring_cmds, action,
interval_ms);
if (ra_data->cur_cmd &&
(ra_data->cur_cmd->interval_ms == interval_ms) &&
(safe_str_eq(ra_data->cur_cmd->action, action))) {
ra_data->cur_cmd->cancel = TRUE;
}
return 0;
}
static remote_ra_cmd_t *
handle_dup_monitor(remote_ra_data_t *ra_data, guint interval_ms,
const char *userdata)
{
GList *gIter = NULL;
remote_ra_cmd_t *cmd = NULL;
/* there are 3 places a potential duplicate monitor operation
* could exist.
* 1. recurring_cmds list. where the op is waiting for its next interval
* 2. cmds list, where the op is queued to get executed immediately
* 3. cur_cmd, which means the monitor op is in flight right now.
*/
if (interval_ms == 0) {
return NULL;
}
if (ra_data->cur_cmd &&
ra_data->cur_cmd->cancel == FALSE &&
(ra_data->cur_cmd->interval_ms == interval_ms) &&
safe_str_eq(ra_data->cur_cmd->action, "monitor")) {
cmd = ra_data->cur_cmd;
goto handle_dup;
}
for (gIter = ra_data->recurring_cmds; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
if ((cmd->interval_ms == interval_ms)
&& safe_str_eq(cmd->action, "monitor")) {
goto handle_dup;
}
}
for (gIter = ra_data->cmds; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
if ((cmd->interval_ms == interval_ms)
&& safe_str_eq(cmd->action, "monitor")) {
goto handle_dup;
}
}
return NULL;
handle_dup:
crm_trace("merging duplicate monitor cmd " CRM_OP_FMT,
cmd->rsc_id, "monitor", interval_ms);
/* update the userdata */
if (userdata) {
free(cmd->userdata);
cmd->userdata = strdup(userdata);
}
/* if we've already reported success, generate a new call id */
if (cmd->reported_success) {
cmd->start_time = time(NULL);
cmd->call_id = generate_callid();
cmd->reported_success = 0;
}
/* if we have an interval_id set, that means we are in the process of
* waiting for this cmd's next interval. instead of waiting, cancel
* the timer and execute the action immediately */
if (cmd->interval_id) {
g_source_remove(cmd->interval_id);
cmd->interval_id = 0;
recurring_helper(cmd);
}
return cmd;
}
int
remote_ra_exec(lrm_state_t *lrm_state, const char *rsc_id, const char *action,
const char *userdata, guint interval_ms,
int timeout, /* ms */
int start_delay, /* ms */
lrmd_key_value_t * params)
{
int rc = 0;
lrm_state_t *connection_rsc = NULL;
remote_ra_cmd_t *cmd = NULL;
remote_ra_data_t *ra_data = NULL;
if (is_remote_ra_supported_action(action) == FALSE) {
rc = -EINVAL;
goto exec_done;
}
connection_rsc = lrm_state_find(rsc_id);
if (!connection_rsc) {
rc = -EINVAL;
goto exec_done;
}
remote_ra_data_init(connection_rsc);
ra_data = connection_rsc->remote_ra_data;
cmd = handle_dup_monitor(ra_data, interval_ms, userdata);
if (cmd) {
rc = cmd->call_id;
goto exec_done;
}
cmd = calloc(1, sizeof(remote_ra_cmd_t));
cmd->owner = strdup(lrm_state->node_name);
cmd->rsc_id = strdup(rsc_id);
cmd->action = strdup(action);
cmd->userdata = strdup(userdata);
cmd->interval_ms = interval_ms;
cmd->timeout = timeout;
cmd->start_delay = start_delay;
cmd->params = params;
cmd->start_time = time(NULL);
cmd->call_id = generate_callid();
if (cmd->start_delay) {
cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
}
ra_data->cmds = g_list_append(ra_data->cmds, cmd);
mainloop_set_trigger(ra_data->work);
return cmd->call_id;
exec_done:
lrmd_key_value_freeall(params);
return rc;
}
/*!
* \internal
* \brief Immediately fail all monitors of a remote node, if proxied here
*
* \param[in] node_name Name of pacemaker_remote node
*/
void
remote_ra_fail(const char *node_name)
{
lrm_state_t *lrm_state = lrm_state_find(node_name);
if (lrm_state && lrm_state_is_connected(lrm_state)) {
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
crm_info("Failing monitors on pacemaker_remote node %s", node_name);
ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
}
}
/* A guest node fencing implied by host fencing looks like:
*
*
*
*
*
*
*
*/
#define XPATH_PSEUDO_FENCE "//" XML_GRAPH_TAG_PSEUDO_EVENT \
"[@" XML_LRM_ATTR_TASK "='stonith']/" XML_GRAPH_TAG_DOWNED \
"/" XML_CIB_TAG_NODE
/*!
* \internal
* \brief Check a pseudo-action for Pacemaker Remote node side effects
*
* \param[in] xml XML of pseudo-action to check
*/
void
remote_ra_process_pseudo(xmlNode *xml)
{
xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_FENCE);
if (numXpathResults(search) == 1) {
xmlNode *result = getXpathResult(search, 0);
/* Normally, we handle the necessary side effects of a guest node stop
* action when reporting the remote agent's result. However, if the stop
* is implied due to fencing, it will be a fencing pseudo-event, and
* there won't be a result to report. Handle that case here.
*
* This will result in a duplicate call to remote_node_down() if the
* guest stop was real instead of implied, but that shouldn't hurt.
*
* There is still one corner case that isn't handled: if a guest node
* isn't running any resources when its host is fenced, it will appear
* to be cleanly stopped, so there will be no pseudo-fence, and our
* peer cache state will be incorrect unless and until the guest is
* recovered.
*/
if (result) {
const char *remote = ID(result);
if (remote) {
remote_node_down(remote, DOWN_ERASE_LRM);
}
}
}
freeXpathObject(search);
}
static void
remote_ra_maintenance(lrm_state_t * lrm_state, gboolean maintenance)
{
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
xmlNode *update, *state;
int call_opt, call_id = 0;
crm_node_t *node;
call_opt = crmd_cib_smart_opt();
node = crm_remote_peer_get(lrm_state->node_name);
CRM_CHECK(node != NULL, return);
update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
state = create_node_state_update(node, node_update_none, update,
__FUNCTION__);
crm_xml_add(state, XML_NODE_IS_MAINTENANCE, maintenance?"1":"0");
fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
if (call_id < 0) {
crm_perror(LOG_WARNING, "%s CIB node state update failed", lrm_state->node_name);
} else {
/* TODO: still not 100% sure that async update will succeed ... */
ra_data->is_maintenance = maintenance;
}
free_xml(update);
}
#define XPATH_PSEUDO_MAINTENANCE "//" XML_GRAPH_TAG_PSEUDO_EVENT \
"[@" XML_LRM_ATTR_TASK "='" CRM_OP_MAINTENANCE_NODES "']/" \
XML_GRAPH_TAG_MAINTENANCE
/*!
* \internal
* \brief Check a pseudo-action holding updates for maintenance state
*
* \param[in] xml XML of pseudo-action to check
*/
void
remote_ra_process_maintenance_nodes(xmlNode *xml)
{
xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_MAINTENANCE);
if (numXpathResults(search) == 1) {
xmlNode *node;
int cnt = 0, cnt_remote = 0;
for (node =
first_named_child(getXpathResult(search, 0), XML_CIB_TAG_NODE);
node; node = __xml_next(node)) {
lrm_state_t *lrm_state = lrm_state_find(ID(node));
cnt++;
if (lrm_state && lrm_state->remote_ra_data &&
((remote_ra_data_t *) lrm_state->remote_ra_data)->active) {
cnt_remote++;
remote_ra_maintenance(lrm_state,
crm_atoi(crm_element_value(node,
XML_NODE_IS_MAINTENANCE), "0"));
}
}
crm_trace("Action holds %d nodes (%d remotes found) "
"adjusting maintenance-mode", cnt, cnt_remote);
}
freeXpathObject(search);
}
gboolean
remote_ra_is_in_maintenance(lrm_state_t * lrm_state)
{
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
return ra_data->is_maintenance;
}
gboolean
remote_ra_controlling_guest(lrm_state_t * lrm_state)
{
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
return ra_data->controlling_guest;
}
diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c
index 20fbfac1b2..a463ab6c19 100644
--- a/daemons/execd/execd_commands.c
+++ b/daemons/execd/execd_commands.c
@@ -1,1841 +1,1841 @@
/*
* Copyright 2012-2019 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include "pacemaker-execd.h"
#ifdef HAVE_SYS_TIMEB_H
# include
#endif
#define EXIT_REASON_MAX_LEN 128
GHashTable *rsc_list = NULL;
typedef struct lrmd_cmd_s {
int timeout;
guint interval_ms;
int start_delay;
int timeout_orig;
int call_id;
int exec_rc;
int lrmd_op_status;
int call_opts;
/* Timer ids, must be removed on cmd destruction. */
int delay_id;
int stonith_recurring_id;
int rsc_deleted;
int service_flags;
char *client_id;
char *origin;
char *rsc_id;
char *action;
char *real_action;
char *exit_reason;
char *output;
char *userdata_str;
#ifdef HAVE_SYS_TIMEB_H
/* Recurring and systemd operations may involve more than one executor
* command per operation, so they need info about the original and the most
* recent.
*/
struct timeb t_first_run; /* Timestamp of when op first ran */
struct timeb t_run; /* Timestamp of when op most recently ran */
struct timeb t_first_queue; /* Timestamp of when op first was queued */
struct timeb t_queue; /* Timestamp of when op most recently was queued */
struct timeb t_rcchange; /* Timestamp of last rc change */
#endif
int first_notify_sent;
int last_notify_rc;
int last_notify_op_status;
int last_pid;
GHashTable *params;
} lrmd_cmd_t;
static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc);
static gboolean lrmd_rsc_dispatch(gpointer user_data);
static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id);
static void
log_finished(lrmd_cmd_t * cmd, int exec_time, int queue_time)
{
char pid_str[32] = { 0, };
int log_level = LOG_INFO;
if (cmd->last_pid) {
snprintf(pid_str, 32, "%d", cmd->last_pid);
}
if (safe_str_eq(cmd->action, "monitor")) {
log_level = LOG_DEBUG;
}
#ifdef HAVE_SYS_TIMEB_H
do_crm_log(log_level,
"finished - rsc:%s action:%s call_id:%d %s%s exit-code:%d exec-time:%dms queue-time:%dms",
cmd->rsc_id, cmd->action, cmd->call_id, cmd->last_pid ? "pid:" : "", pid_str,
cmd->exec_rc, exec_time, queue_time);
#else
do_crm_log(log_level, "finished - rsc:%s action:%s call_id:%d %s%s exit-code:%d",
cmd->rsc_id,
cmd->action, cmd->call_id, cmd->last_pid ? "pid:" : "", pid_str, cmd->exec_rc);
#endif
}
static void
log_execute(lrmd_cmd_t * cmd)
{
int log_level = LOG_INFO;
if (safe_str_eq(cmd->action, "monitor")) {
log_level = LOG_DEBUG;
}
do_crm_log(log_level, "executing - rsc:%s action:%s call_id:%d",
cmd->rsc_id, cmd->action, cmd->call_id);
}
static const char *
normalize_action_name(lrmd_rsc_t * rsc, const char *action)
{
if (safe_str_eq(action, "monitor") &&
is_set(pcmk_get_ra_caps(rsc->class), pcmk_ra_cap_status)) {
return "status";
}
return action;
}
static lrmd_rsc_t *
build_rsc_from_xml(xmlNode * msg)
{
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR);
lrmd_rsc_t *rsc = NULL;
rsc = calloc(1, sizeof(lrmd_rsc_t));
crm_element_value_int(msg, F_LRMD_CALLOPTS, &rsc->call_opts);
rsc->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID);
rsc->class = crm_element_value_copy(rsc_xml, F_LRMD_CLASS);
rsc->provider = crm_element_value_copy(rsc_xml, F_LRMD_PROVIDER);
rsc->type = crm_element_value_copy(rsc_xml, F_LRMD_TYPE);
rsc->work = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_rsc_dispatch, rsc);
rsc->st_probe_rc = -ENODEV; // if stonith, initialize to "not running"
return rsc;
}
static lrmd_cmd_t *
create_lrmd_cmd(xmlNode * msg, crm_client_t * client)
{
int call_options = 0;
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR);
lrmd_cmd_t *cmd = NULL;
cmd = calloc(1, sizeof(lrmd_cmd_t));
crm_element_value_int(msg, F_LRMD_CALLOPTS, &call_options);
cmd->call_opts = call_options;
cmd->client_id = strdup(client->id);
crm_element_value_int(msg, F_LRMD_CALLID, &cmd->call_id);
crm_element_value_ms(rsc_xml, F_LRMD_RSC_INTERVAL, &cmd->interval_ms);
crm_element_value_int(rsc_xml, F_LRMD_TIMEOUT, &cmd->timeout);
crm_element_value_int(rsc_xml, F_LRMD_RSC_START_DELAY, &cmd->start_delay);
cmd->timeout_orig = cmd->timeout;
cmd->origin = crm_element_value_copy(rsc_xml, F_LRMD_ORIGIN);
cmd->action = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ACTION);
cmd->userdata_str = crm_element_value_copy(rsc_xml, F_LRMD_RSC_USERDATA_STR);
cmd->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID);
cmd->params = xml2list(rsc_xml);
if (safe_str_eq(g_hash_table_lookup(cmd->params, "CRM_meta_on_fail"), "block")) {
crm_debug("Setting flag to leave pid group on timeout and only kill action pid for " CRM_OP_FMT,
cmd->rsc_id, cmd->action, cmd->interval_ms);
cmd->service_flags |= SVC_ACTION_LEAVE_GROUP;
}
return cmd;
}
static void
stop_recurring_timer(lrmd_cmd_t *cmd)
{
if (cmd) {
if (cmd->stonith_recurring_id) {
g_source_remove(cmd->stonith_recurring_id);
}
cmd->stonith_recurring_id = 0;
}
}
static void
free_lrmd_cmd(lrmd_cmd_t * cmd)
{
stop_recurring_timer(cmd);
if (cmd->delay_id) {
g_source_remove(cmd->delay_id);
}
if (cmd->params) {
g_hash_table_destroy(cmd->params);
}
free(cmd->origin);
free(cmd->action);
free(cmd->real_action);
free(cmd->userdata_str);
free(cmd->rsc_id);
free(cmd->output);
free(cmd->exit_reason);
free(cmd->client_id);
free(cmd);
}
static gboolean
stonith_recurring_op_helper(gpointer data)
{
lrmd_cmd_t *cmd = data;
lrmd_rsc_t *rsc;
cmd->stonith_recurring_id = 0;
if (!cmd->rsc_id) {
return FALSE;
}
rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
CRM_ASSERT(rsc != NULL);
/* take it out of recurring_ops list, and put it in the pending ops
* to be executed */
rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
#ifdef HAVE_SYS_TIMEB_H
ftime(&cmd->t_queue);
if (cmd->t_first_queue.time == 0) {
cmd->t_first_queue = cmd->t_queue;
}
#endif
mainloop_set_trigger(rsc->work);
return FALSE;
}
static inline void
start_recurring_timer(lrmd_cmd_t *cmd)
{
if (cmd && (cmd->interval_ms > 0)) {
cmd->stonith_recurring_id = g_timeout_add(cmd->interval_ms,
stonith_recurring_op_helper,
cmd);
}
}
static gboolean
start_delay_helper(gpointer data)
{
lrmd_cmd_t *cmd = data;
lrmd_rsc_t *rsc = NULL;
cmd->delay_id = 0;
rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
if (rsc) {
mainloop_set_trigger(rsc->work);
}
return FALSE;
}
static gboolean
merge_recurring_duplicate(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
{
GListPtr gIter = NULL;
lrmd_cmd_t * dup = NULL;
gboolean dup_pending = FALSE;
if (cmd->interval_ms == 0) {
return 0;
}
for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) {
dup = gIter->data;
if (safe_str_eq(cmd->action, dup->action)
&& (cmd->interval_ms == dup->interval_ms)) {
dup_pending = TRUE;
goto merge_dup;
}
}
/* if dup is in recurring_ops list, that means it has already executed
* and is in the interval loop. we can't just remove it in this case. */
for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) {
dup = gIter->data;
if (safe_str_eq(cmd->action, dup->action)
&& (cmd->interval_ms == dup->interval_ms)) {
goto merge_dup;
}
}
return FALSE;
merge_dup:
/* This should not occur. If it does, we need to investigate how something
* like this is possible in the controller.
*/
crm_warn("Duplicate recurring op entry detected (" CRM_OP_FMT "), merging with previous op entry",
rsc->rsc_id,
normalize_action_name(rsc, dup->action),
dup->interval_ms);
/* merge */
dup->first_notify_sent = 0;
free(dup->userdata_str);
dup->userdata_str = cmd->userdata_str;
cmd->userdata_str = NULL;
dup->call_id = cmd->call_id;
if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH)) {
/* if we are waiting for the next interval, kick it off now */
if (dup_pending == TRUE) {
stop_recurring_timer(cmd);
stonith_recurring_op_helper(cmd);
}
} else if (dup_pending == FALSE) {
/* if we've already handed this to the service lib, kick off an early execution */
services_action_kick(rsc->rsc_id,
normalize_action_name(rsc, dup->action),
dup->interval_ms);
}
free_lrmd_cmd(cmd);
return TRUE;
}
static void
schedule_lrmd_cmd(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
{
gboolean dup_processed = FALSE;
CRM_CHECK(cmd != NULL, return);
CRM_CHECK(rsc != NULL, return);
crm_trace("Scheduling %s on %s", cmd->action, rsc->rsc_id);
dup_processed = merge_recurring_duplicate(rsc, cmd);
if (dup_processed) {
/* duplicate recurring cmd found, cmds merged */
return;
}
/* The controller expects the executor to automatically cancel
* recurring operations before a resource stops.
*/
if (safe_str_eq(cmd->action, "stop")) {
cancel_all_recurring(rsc, NULL);
}
rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
#ifdef HAVE_SYS_TIMEB_H
ftime(&cmd->t_queue);
if (cmd->t_first_queue.time == 0) {
cmd->t_first_queue = cmd->t_queue;
}
#endif
mainloop_set_trigger(rsc->work);
if (cmd->start_delay) {
cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
}
}
static xmlNode *
create_lrmd_reply(const char *origin, int rc, int call_id)
{
xmlNode *reply = create_xml_node(NULL, T_LRMD_REPLY);
crm_xml_add(reply, F_LRMD_ORIGIN, origin);
crm_xml_add_int(reply, F_LRMD_RC, rc);
crm_xml_add_int(reply, F_LRMD_CALLID, call_id);
return reply;
}
static void
send_client_notify(gpointer key, gpointer value, gpointer user_data)
{
xmlNode *update_msg = user_data;
crm_client_t *client = value;
int rc;
int log_level = LOG_WARNING;
const char *msg = NULL;
CRM_CHECK(client != NULL, return);
if (client->name == NULL) {
crm_trace("Skipping notification to client without name");
return;
}
rc = lrmd_server_send_notify(client, update_msg);
if (rc > 0) {
return; // success
}
switch (rc) {
case 0:
msg = "no data sent";
break;
case -ENOTCONN:
case -EPIPE: // Client exited without waiting for notification
log_level = LOG_INFO;
msg = "Disconnected";
break;
default:
msg = pcmk_strerror(rc);
break;
}
do_crm_log(log_level,
"Could not notify client %s/%s: %s " CRM_XS " rc=%d",
client->name, client->id, msg, rc);
}
#ifdef HAVE_SYS_TIMEB_H
/*!
* \internal
* \brief Return difference between two times in milliseconds
*
* \param[in] now More recent time (or NULL to use current time)
* \param[in] old Earlier time
*
* \return milliseconds difference (or 0 if old is NULL or has time zero)
*/
static int
time_diff_ms(struct timeb *now, struct timeb *old)
{
struct timeb local_now = { 0, };
if (now == NULL) {
ftime(&local_now);
now = &local_now;
}
if ((old == NULL) || (old->time == 0)) {
return 0;
}
return difftime(now->time, old->time) * 1000 + now->millitm - old->millitm;
}
/*!
* \internal
* \brief Reset a command's operation times to their original values.
*
* Reset a command's run and queued timestamps to the timestamps of the original
* command, so we report the entire time since then and not just the time since
* the most recent command (for recurring and systemd operations).
*
* /param[in] cmd Executor command object to reset
*
* /note It's not obvious what the queued time should be for a systemd
* start/stop operation, which might go like this:
* initial command queued 5ms, runs 3s
* monitor command queued 10ms, runs 10s
* monitor command queued 10ms, runs 10s
* Is the queued time for that operation 5ms, 10ms or 25ms? The current
* implementation will report 5ms. If it's 25ms, then we need to
* subtract 20ms from the total exec time so as not to count it twice.
* We can implement that later if it matters to anyone ...
*/
static void
cmd_original_times(lrmd_cmd_t * cmd)
{
cmd->t_run = cmd->t_first_run;
cmd->t_queue = cmd->t_first_queue;
}
#endif
static void
send_cmd_complete_notify(lrmd_cmd_t * cmd)
{
int exec_time = 0;
int queue_time = 0;
xmlNode *notify = NULL;
#ifdef HAVE_SYS_TIMEB_H
exec_time = time_diff_ms(NULL, &cmd->t_run);
queue_time = time_diff_ms(&cmd->t_run, &cmd->t_queue);
#endif
log_finished(cmd, exec_time, queue_time);
/* if the first notify result for a cmd has already been sent earlier, and the
* the option to only send notifies on result changes is set. Check to see
* if the last result is the same as the new one. If so, suppress this update */
if (cmd->first_notify_sent && (cmd->call_opts & lrmd_opt_notify_changes_only)) {
if (cmd->last_notify_rc == cmd->exec_rc &&
cmd->last_notify_op_status == cmd->lrmd_op_status) {
/* only send changes */
return;
}
}
cmd->first_notify_sent = 1;
cmd->last_notify_rc = cmd->exec_rc;
cmd->last_notify_op_status = cmd->lrmd_op_status;
notify = create_xml_node(NULL, T_LRMD_NOTIFY);
crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add_int(notify, F_LRMD_TIMEOUT, cmd->timeout);
crm_xml_add_ms(notify, F_LRMD_RSC_INTERVAL, cmd->interval_ms);
crm_xml_add_int(notify, F_LRMD_RSC_START_DELAY, cmd->start_delay);
crm_xml_add_int(notify, F_LRMD_EXEC_RC, cmd->exec_rc);
crm_xml_add_int(notify, F_LRMD_OP_STATUS, cmd->lrmd_op_status);
crm_xml_add_int(notify, F_LRMD_CALLID, cmd->call_id);
crm_xml_add_int(notify, F_LRMD_RSC_DELETED, cmd->rsc_deleted);
#ifdef HAVE_SYS_TIMEB_H
- crm_xml_add_int(notify, F_LRMD_RSC_RUN_TIME, cmd->t_run.time);
- crm_xml_add_int(notify, F_LRMD_RSC_RCCHANGE_TIME, cmd->t_rcchange.time);
+ crm_xml_add_ll(notify, F_LRMD_RSC_RUN_TIME, (long long) cmd->t_run.time);
+ crm_xml_add_ll(notify, F_LRMD_RSC_RCCHANGE_TIME, (long long) cmd->t_rcchange.time);
crm_xml_add_int(notify, F_LRMD_RSC_EXEC_TIME, exec_time);
crm_xml_add_int(notify, F_LRMD_RSC_QUEUE_TIME, queue_time);
#endif
crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_RSC_EXEC);
crm_xml_add(notify, F_LRMD_RSC_ID, cmd->rsc_id);
if(cmd->real_action) {
crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->real_action);
} else {
crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->action);
}
crm_xml_add(notify, F_LRMD_RSC_USERDATA_STR, cmd->userdata_str);
crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->output);
crm_xml_add(notify, F_LRMD_RSC_EXIT_REASON, cmd->exit_reason);
if (cmd->params) {
char *key = NULL;
char *value = NULL;
GHashTableIter iter;
xmlNode *args = create_xml_node(notify, XML_TAG_ATTRS);
g_hash_table_iter_init(&iter, cmd->params);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
hash2smartfield((gpointer) key, (gpointer) value, args);
}
}
if (cmd->client_id && (cmd->call_opts & lrmd_opt_notify_orig_only)) {
crm_client_t *client = crm_client_get_by_id(cmd->client_id);
if (client) {
send_client_notify(client->id, client, notify);
}
} else if (client_connections != NULL) {
g_hash_table_foreach(client_connections, send_client_notify, notify);
}
free_xml(notify);
}
static void
send_generic_notify(int rc, xmlNode * request)
{
if (client_connections != NULL) {
int call_id = 0;
xmlNode *notify = NULL;
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
const char *op = crm_element_value(request, F_LRMD_OPERATION);
crm_element_value_int(request, F_LRMD_CALLID, &call_id);
notify = create_xml_node(NULL, T_LRMD_NOTIFY);
crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add_int(notify, F_LRMD_RC, rc);
crm_xml_add_int(notify, F_LRMD_CALLID, call_id);
crm_xml_add(notify, F_LRMD_OPERATION, op);
crm_xml_add(notify, F_LRMD_RSC_ID, rsc_id);
g_hash_table_foreach(client_connections, send_client_notify, notify);
free_xml(notify);
}
}
static void
cmd_reset(lrmd_cmd_t * cmd)
{
cmd->lrmd_op_status = 0;
cmd->last_pid = 0;
#ifdef HAVE_SYS_TIMEB_H
memset(&cmd->t_run, 0, sizeof(cmd->t_run));
memset(&cmd->t_queue, 0, sizeof(cmd->t_queue));
#endif
free(cmd->exit_reason);
cmd->exit_reason = NULL;
free(cmd->output);
cmd->output = NULL;
}
static void
cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc)
{
crm_trace("Resource operation rsc:%s action:%s completed (%p %p)", cmd->rsc_id, cmd->action,
rsc ? rsc->active : NULL, cmd);
if (rsc && (rsc->active == cmd)) {
rsc->active = NULL;
mainloop_set_trigger(rsc->work);
}
if (!rsc) {
cmd->rsc_deleted = 1;
}
/* reset original timeout so client notification has correct information */
cmd->timeout = cmd->timeout_orig;
send_cmd_complete_notify(cmd);
if (cmd->interval_ms && (cmd->lrmd_op_status == PCMK_LRM_OP_CANCELLED)) {
if (rsc) {
rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
}
free_lrmd_cmd(cmd);
} else if (cmd->interval_ms == 0) {
if (rsc) {
rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
}
free_lrmd_cmd(cmd);
} else {
/* Clear all the values pertaining just to the last iteration of a recurring op. */
cmd_reset(cmd);
}
}
static int
ocf2uniform_rc(int rc)
{
if (rc < 0 || rc > PCMK_OCF_FAILED_MASTER) {
return PCMK_OCF_UNKNOWN_ERROR;
}
return rc;
}
static int
stonith2uniform_rc(const char *action, int rc)
{
switch (rc) {
case pcmk_ok:
rc = PCMK_OCF_OK;
break;
case -ENODEV:
/* This should be possible only for probes in practice, but
* interpret for all actions to be safe.
*/
if (safe_str_eq(action, "monitor")) {
rc = PCMK_OCF_NOT_RUNNING;
} else if (safe_str_eq(action, "stop")) {
rc = PCMK_OCF_OK;
} else {
rc = PCMK_OCF_NOT_INSTALLED;
}
break;
case -EOPNOTSUPP:
rc = PCMK_OCF_UNIMPLEMENT_FEATURE;
break;
case -ETIME:
case -ETIMEDOUT:
rc = PCMK_OCF_TIMEOUT;
break;
default:
rc = PCMK_OCF_UNKNOWN_ERROR;
break;
}
return rc;
}
#if SUPPORT_NAGIOS
static int
nagios2uniform_rc(const char *action, int rc)
{
if (rc < 0) {
return PCMK_OCF_UNKNOWN_ERROR;
}
switch (rc) {
case NAGIOS_STATE_OK:
return PCMK_OCF_OK;
case NAGIOS_INSUFFICIENT_PRIV:
return PCMK_OCF_INSUFFICIENT_PRIV;
case NAGIOS_NOT_INSTALLED:
return PCMK_OCF_NOT_INSTALLED;
case NAGIOS_STATE_WARNING:
case NAGIOS_STATE_CRITICAL:
case NAGIOS_STATE_UNKNOWN:
case NAGIOS_STATE_DEPENDENT:
default:
return PCMK_OCF_UNKNOWN_ERROR;
}
return PCMK_OCF_UNKNOWN_ERROR;
}
#endif
static int
get_uniform_rc(const char *standard, const char *action, int rc)
{
if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_OCF)) {
return ocf2uniform_rc(rc);
} else if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_STONITH)) {
return stonith2uniform_rc(action, rc);
} else if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_SYSTEMD)) {
return rc;
} else if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_UPSTART)) {
return rc;
#if SUPPORT_NAGIOS
} else if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_NAGIOS)) {
return nagios2uniform_rc(action, rc);
#endif
} else {
return services_get_ocf_exitcode(action, rc);
}
}
static int
action_get_uniform_rc(svc_action_t * action)
{
lrmd_cmd_t *cmd = action->cb_data;
return get_uniform_rc(action->standard, cmd->action, action->rc);
}
void
notify_of_new_client(crm_client_t *new_client)
{
crm_client_t *client = NULL;
GHashTableIter iter;
xmlNode *notify = NULL;
char *key = NULL;
notify = create_xml_node(NULL, T_LRMD_NOTIFY);
crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_NEW_CLIENT);
g_hash_table_iter_init(&iter, client_connections);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & client)) {
if (safe_str_eq(client->id, new_client->id)) {
continue;
}
send_client_notify((gpointer) key, (gpointer) client, (gpointer) notify);
}
free_xml(notify);
}
static char *
parse_exit_reason(const char *output)
{
const char *cur = NULL;
const char *last = NULL;
static int cookie_len = 0;
char *eol = NULL;
size_t reason_len = EXIT_REASON_MAX_LEN;
if (output == NULL) {
return NULL;
}
if (!cookie_len) {
cookie_len = strlen(PCMK_OCF_REASON_PREFIX);
}
cur = strstr(output, PCMK_OCF_REASON_PREFIX);
for (; cur != NULL; cur = strstr(cur, PCMK_OCF_REASON_PREFIX)) {
/* skip over the cookie delimiter string */
cur += cookie_len;
last = cur;
}
if (last == NULL) {
return NULL;
}
// Truncate everything after a new line, and limit reason string size
eol = strchr(last, '\n');
if (eol) {
reason_len = QB_MIN(reason_len, eol - last);
}
return strndup(last, reason_len);
}
void
client_disconnect_cleanup(const char *client_id)
{
GHashTableIter iter;
lrmd_rsc_t *rsc = NULL;
char *key = NULL;
g_hash_table_iter_init(&iter, rsc_list);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) {
if (rsc->call_opts & lrmd_opt_drop_recurring) {
/* This client is disconnecting, drop any recurring operations
* it may have initiated on the resource */
cancel_all_recurring(rsc, client_id);
}
}
}
static void
action_complete(svc_action_t * action)
{
lrmd_rsc_t *rsc;
lrmd_cmd_t *cmd = action->cb_data;
const char *rclass = NULL;
bool goagain = false;
if (!cmd) {
crm_err("Completed executor action (%s) does not match any known operations",
action->id);
return;
}
#ifdef HAVE_SYS_TIMEB_H
if (cmd->exec_rc != action->rc) {
ftime(&cmd->t_rcchange);
}
#endif
cmd->last_pid = action->pid;
cmd->exec_rc = action_get_uniform_rc(action);
cmd->lrmd_op_status = action->status;
rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
if (rsc && safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_SERVICE)) {
rclass = resources_find_service_class(rsc->type);
} else if(rsc) {
rclass = rsc->class;
}
if (safe_str_eq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD)) {
if(cmd->exec_rc == PCMK_OCF_OK && safe_str_eq(cmd->action, "start")) {
/* systemd I curse thee!
*
* systemd returns from start actions after the start _begins_
* not after it completes.
*
* So we have to jump through a few hoops so that we don't
* report 'complete' to the rest of pacemaker until, you know,
* it's actually done.
*/
goagain = true;
cmd->real_action = cmd->action;
cmd->action = strdup("monitor");
} else if(cmd->exec_rc == PCMK_OCF_OK && safe_str_eq(cmd->action, "stop")) {
goagain = true;
cmd->real_action = cmd->action;
cmd->action = strdup("monitor");
} else if(cmd->real_action) {
/* Ok, so this is the follow up monitor action to check if start actually completed */
if(cmd->lrmd_op_status == PCMK_LRM_OP_DONE && cmd->exec_rc == PCMK_OCF_PENDING) {
goagain = true;
} else if(cmd->exec_rc == PCMK_OCF_OK && safe_str_eq(cmd->real_action, "stop")) {
goagain = true;
} else {
#ifdef HAVE_SYS_TIMEB_H
int time_sum = time_diff_ms(NULL, &cmd->t_first_run);
int timeout_left = cmd->timeout_orig - time_sum;
crm_debug("%s %s is now complete (elapsed=%dms, remaining=%dms): %s (%d)",
cmd->rsc_id, cmd->real_action, time_sum, timeout_left, services_ocf_exitcode_str(cmd->exec_rc), cmd->exec_rc);
cmd_original_times(cmd);
#endif
// Monitors may return "not running", but start/stop shouldn't
if ((cmd->lrmd_op_status == PCMK_LRM_OP_DONE)
&& (cmd->exec_rc == PCMK_OCF_NOT_RUNNING)) {
if (safe_str_eq(cmd->real_action, "start")) {
cmd->exec_rc = PCMK_OCF_UNKNOWN_ERROR;
} else if (safe_str_eq(cmd->real_action, "stop")) {
cmd->exec_rc = PCMK_OCF_OK;
}
}
}
}
}
#if SUPPORT_NAGIOS
if (rsc && safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS)) {
if (safe_str_eq(cmd->action, "monitor") &&
(cmd->interval_ms == 0) && cmd->exec_rc == PCMK_OCF_OK) {
/* Successfully executed --version for the nagios plugin */
cmd->exec_rc = PCMK_OCF_NOT_RUNNING;
} else if (safe_str_eq(cmd->action, "start") && cmd->exec_rc != PCMK_OCF_OK) {
goagain = true;
}
}
#endif
/* Wrapping this section in ifdef implies that systemd resources are not
* fully supported on platforms without sys/timeb.h. Since timeb is
* obsolete, we should eventually prefer a clock_gettime() implementation
* (wrapped in its own ifdef) with timeb as a fallback.
*/
if(goagain) {
#ifdef HAVE_SYS_TIMEB_H
int time_sum = time_diff_ms(NULL, &cmd->t_first_run);
int timeout_left = cmd->timeout_orig - time_sum;
int delay = cmd->timeout_orig / 10;
if(delay >= timeout_left && timeout_left > 20) {
delay = timeout_left/2;
}
delay = QB_MIN(2000, delay);
if (delay < timeout_left) {
cmd->start_delay = delay;
cmd->timeout = timeout_left;
if(cmd->exec_rc == PCMK_OCF_OK) {
crm_debug("%s %s may still be in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
cmd->rsc_id, cmd->real_action, time_sum, timeout_left, delay);
} else if(cmd->exec_rc == PCMK_OCF_PENDING) {
crm_info("%s %s is still in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
cmd->rsc_id, cmd->action, time_sum, timeout_left, delay);
} else {
crm_notice("%s %s failed '%s' (%d): re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
cmd->rsc_id, cmd->action, services_ocf_exitcode_str(cmd->exec_rc), cmd->exec_rc, time_sum, timeout_left, delay);
}
cmd_reset(cmd);
if(rsc) {
rsc->active = NULL;
}
schedule_lrmd_cmd(rsc, cmd);
/* Don't finalize cmd, we're not done with it yet */
return;
} else {
crm_notice("Giving up on %s %s (rc=%d): timeout (elapsed=%dms, remaining=%dms)",
cmd->rsc_id, cmd->real_action?cmd->real_action:cmd->action, cmd->exec_rc, time_sum, timeout_left);
cmd->lrmd_op_status = PCMK_LRM_OP_TIMEOUT;
cmd->exec_rc = PCMK_OCF_TIMEOUT;
cmd_original_times(cmd);
}
#endif
}
if (action->stderr_data) {
cmd->output = strdup(action->stderr_data);
cmd->exit_reason = parse_exit_reason(action->stderr_data);
} else if (action->stdout_data) {
cmd->output = strdup(action->stdout_data);
}
cmd_finalize(cmd, rsc);
}
/*!
* \internal
* \brief Determine operation status of a stonith operation
*
* Non-stonith resource operations get their operation status directly from the
* service library, but the fencer does not have an equivalent, so we must infer
* an operation status from the fencer API's return code.
*
* \param[in] action Name of action performed on stonith resource
* \param[in] interval_ms Action interval
* \param[in] rc Action result from fencer
*
* \return Operation status corresponding to fencer API return code
*/
static int
stonith_rc2status(const char *action, guint interval_ms, int rc)
{
int status = PCMK_LRM_OP_DONE;
switch (rc) {
case pcmk_ok:
break;
case -EOPNOTSUPP:
case -EPROTONOSUPPORT:
status = PCMK_LRM_OP_NOTSUPPORTED;
break;
case -ETIME:
case -ETIMEDOUT:
status = PCMK_LRM_OP_TIMEOUT;
break;
case -ENOTCONN:
case -ECOMM:
// Couldn't talk to fencer
status = PCMK_LRM_OP_ERROR;
break;
case -ENODEV:
// The device is not registered with the fencer
status = PCMK_LRM_OP_ERROR;
break;
default:
break;
}
return status;
}
static void
stonith_action_complete(lrmd_cmd_t * cmd, int rc)
{
// This can be NULL if resource was removed before command completed
lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
cmd->exec_rc = stonith2uniform_rc(cmd->action, rc);
/* This function may be called with status already set to cancelled, if a
* pending action was aborted. Otherwise, we need to determine status from
* the fencer return code.
*/
if (cmd->lrmd_op_status != PCMK_LRM_OP_CANCELLED) {
cmd->lrmd_op_status = stonith_rc2status(cmd->action, cmd->interval_ms,
rc);
// Certain successful actions change the known state of the resource
if (rsc && (cmd->exec_rc == PCMK_OCF_OK)) {
if (safe_str_eq(cmd->action, "start")) {
rsc->st_probe_rc = pcmk_ok; // maps to PCMK_OCF_OK
} else if (safe_str_eq(cmd->action, "stop")) {
rsc->st_probe_rc = -ENODEV; // maps to PCMK_OCF_NOT_RUNNING
}
}
}
/* The recurring timer should not be running at this point in any case, but
* as a failsafe, stop it if it is.
*/
stop_recurring_timer(cmd);
/* Reschedule this command if appropriate. If a recurring command is *not*
* rescheduled, its status must be PCMK_LRM_OP_CANCELLED, otherwise it will
* not be removed from recurring_ops by cmd_finalize().
*/
if (rsc && (cmd->interval_ms > 0)
&& (cmd->lrmd_op_status != PCMK_LRM_OP_CANCELLED)) {
start_recurring_timer(cmd);
}
cmd_finalize(cmd, rsc);
}
static void
lrmd_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data)
{
stonith_action_complete(data->userdata, data->rc);
}
void
stonith_connection_failed(void)
{
GHashTableIter iter;
GList *cmd_list = NULL;
GList *cmd_iter = NULL;
lrmd_rsc_t *rsc = NULL;
char *key = NULL;
g_hash_table_iter_init(&iter, rsc_list);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) {
if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH)) {
/* If we registered this fence device, we don't know whether the
* fencer still has the registration or not. Cause future probes to
* return PCMK_OCF_UNKNOWN_ERROR until the resource is stopped or
* started successfully. This is especially important if the
* controller also went away (possibly due to a cluster layer
* restart) and won't receive our client notification of any
* monitors finalized below.
*/
if (rsc->st_probe_rc == pcmk_ok) {
rsc->st_probe_rc = pcmk_err_generic;
}
if (rsc->active) {
cmd_list = g_list_append(cmd_list, rsc->active);
}
if (rsc->recurring_ops) {
cmd_list = g_list_concat(cmd_list, rsc->recurring_ops);
}
if (rsc->pending_ops) {
cmd_list = g_list_concat(cmd_list, rsc->pending_ops);
}
rsc->pending_ops = rsc->recurring_ops = NULL;
}
}
if (!cmd_list) {
return;
}
crm_err("Connection to fencer failed, finalizing %d pending operations",
g_list_length(cmd_list));
for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) {
stonith_action_complete(cmd_iter->data, -ENOTCONN);
}
g_list_free(cmd_list);
}
/*!
* \internal
* \brief Execute a stonith resource "start" action
*
* Start a stonith resource by registering it with the fencer.
* (Stonith agents don't have a start command.)
*
* \param[in] stonith_api Connection to fencer
* \param[in] rsc Stonith resource to start
* \param[in] cmd Start command to execute
*
* \return pcmk_ok on success, -errno otherwise
*/
static int
execd_stonith_start(stonith_t *stonith_api, lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
{
char *key = NULL;
char *value = NULL;
stonith_key_value_t *device_params = NULL;
int rc = pcmk_ok;
// Convert command parameters to stonith API key/values
if (cmd->params) {
GHashTableIter iter;
g_hash_table_iter_init(&iter, cmd->params);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
device_params = stonith_key_value_add(device_params, key, value);
}
}
/* The fencer will automatically register devices via CIB notifications
* when the CIB changes, but to avoid a possible race condition between
* the fencer receiving the notification and the executor requesting that
* resource, the executor registers the device as well. The fencer knows how
* to handle duplicate registrations.
*/
rc = stonith_api->cmds->register_device(stonith_api, st_opt_sync_call,
cmd->rsc_id, rsc->provider,
rsc->type, device_params);
stonith_key_value_freeall(device_params, 1, 1);
return rc;
}
/*!
* \internal
* \brief Execute a stonith resource "stop" action
*
* Stop a stonith resource by unregistering it with the fencer.
* (Stonith agents don't have a stop command.)
*
* \param[in] stonith_api Connection to fencer
* \param[in] rsc Stonith resource to stop
*
* \return pcmk_ok on success, -errno otherwise
*/
static inline int
execd_stonith_stop(stonith_t *stonith_api, const lrmd_rsc_t *rsc)
{
/* @TODO Failure would indicate a problem communicating with fencer;
* perhaps we should try reconnecting and retrying a few times?
*/
return stonith_api->cmds->remove_device(stonith_api, st_opt_sync_call,
rsc->rsc_id);
}
/*!
* \internal
* \brief Initiate a stonith resource agent recurring "monitor" action
*
* \param[in] stonith_api Connection to fencer
* \param[in] rsc Stonith resource to monitor
* \param[in] cmd Monitor command being executed
*
* \return pcmk_ok if monitor was successfully initiated, -errno otherwise
*/
static inline int
execd_stonith_monitor(stonith_t *stonith_api, lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
{
int rc = stonith_api->cmds->monitor(stonith_api, 0, cmd->rsc_id,
cmd->timeout / 1000);
rc = stonith_api->cmds->register_callback(stonith_api, rc, 0, 0, cmd,
"lrmd_stonith_callback",
lrmd_stonith_callback);
if (rc == TRUE) {
rsc->active = cmd;
rc = pcmk_ok;
} else {
rc = -pcmk_err_generic;
}
return rc;
}
static void
lrmd_rsc_execute_stonith(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
{
int rc = 0;
bool do_monitor = FALSE;
stonith_t *stonith_api = get_stonith_connection();
if (!stonith_api) {
rc = -ENOTCONN;
} else if (safe_str_eq(cmd->action, "start")) {
rc = execd_stonith_start(stonith_api, rsc, cmd);
if (rc == 0) {
do_monitor = TRUE;
}
} else if (safe_str_eq(cmd->action, "stop")) {
rc = execd_stonith_stop(stonith_api, rsc);
} else if (safe_str_eq(cmd->action, "monitor")) {
if (cmd->interval_ms > 0) {
do_monitor = TRUE;
} else {
rc = rsc->st_probe_rc;
}
}
if (do_monitor) {
rc = execd_stonith_monitor(stonith_api, rsc, cmd);
if (rc == pcmk_ok) {
// Don't clean up yet, we will find out result of the monitor later
return;
}
}
stonith_action_complete(cmd, rc);
}
static int
lrmd_rsc_execute_service_lib(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
{
svc_action_t *action = NULL;
GHashTable *params_copy = NULL;
CRM_ASSERT(rsc);
CRM_ASSERT(cmd);
crm_trace("Creating action, resource:%s action:%s class:%s provider:%s agent:%s",
rsc->rsc_id, cmd->action, rsc->class, rsc->provider, rsc->type);
#if SUPPORT_NAGIOS
/* Recurring operations are cancelled anyway for a stop operation */
if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS)
&& safe_str_eq(cmd->action, "stop")) {
cmd->exec_rc = PCMK_OCF_OK;
goto exec_done;
}
#endif
params_copy = crm_str_table_dup(cmd->params);
action = resources_action_create(rsc->rsc_id, rsc->class, rsc->provider,
rsc->type,
normalize_action_name(rsc, cmd->action),
cmd->interval_ms, cmd->timeout,
params_copy, cmd->service_flags);
if (!action) {
crm_err("Failed to create action, action:%s on resource %s", cmd->action, rsc->rsc_id);
cmd->lrmd_op_status = PCMK_LRM_OP_ERROR;
goto exec_done;
}
action->cb_data = cmd;
/* 'cmd' may not be valid after this point if
* services_action_async() returned TRUE
*
* Upstart and systemd both synchronously determine monitor/status
* results and call action_complete (which may free 'cmd') if necessary.
*/
if (services_action_async(action, action_complete)) {
return TRUE;
}
cmd->exec_rc = action->rc;
if(action->status != PCMK_LRM_OP_DONE) {
cmd->lrmd_op_status = action->status;
} else {
cmd->lrmd_op_status = PCMK_LRM_OP_ERROR;
}
services_action_free(action);
action = NULL;
exec_done:
cmd_finalize(cmd, rsc);
return TRUE;
}
static gboolean
lrmd_rsc_execute(lrmd_rsc_t * rsc)
{
lrmd_cmd_t *cmd = NULL;
CRM_CHECK(rsc != NULL, return FALSE);
if (rsc->active) {
crm_trace("%s is still active", rsc->rsc_id);
return TRUE;
}
if (rsc->pending_ops) {
GList *first = rsc->pending_ops;
cmd = first->data;
if (cmd->delay_id) {
crm_trace
("Command %s %s was asked to run too early, waiting for start_delay timeout of %dms",
cmd->rsc_id, cmd->action, cmd->start_delay);
return TRUE;
}
rsc->pending_ops = g_list_remove_link(rsc->pending_ops, first);
g_list_free_1(first);
#ifdef HAVE_SYS_TIMEB_H
if (cmd->t_first_run.time == 0) {
ftime(&cmd->t_first_run);
}
ftime(&cmd->t_run);
#endif
}
if (!cmd) {
crm_trace("Nothing further to do for %s", rsc->rsc_id);
return TRUE;
}
rsc->active = cmd; /* only one op at a time for a rsc */
if (cmd->interval_ms) {
rsc->recurring_ops = g_list_append(rsc->recurring_ops, cmd);
}
log_execute(cmd);
if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH)) {
lrmd_rsc_execute_stonith(rsc, cmd);
} else {
lrmd_rsc_execute_service_lib(rsc, cmd);
}
return TRUE;
}
static gboolean
lrmd_rsc_dispatch(gpointer user_data)
{
return lrmd_rsc_execute(user_data);
}
void
free_rsc(gpointer data)
{
GListPtr gIter = NULL;
lrmd_rsc_t *rsc = data;
int is_stonith = safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH);
gIter = rsc->pending_ops;
while (gIter != NULL) {
GListPtr next = gIter->next;
lrmd_cmd_t *cmd = gIter->data;
/* command was never executed */
cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
cmd_finalize(cmd, NULL);
gIter = next;
}
/* frees list, but not list elements. */
g_list_free(rsc->pending_ops);
gIter = rsc->recurring_ops;
while (gIter != NULL) {
GListPtr next = gIter->next;
lrmd_cmd_t *cmd = gIter->data;
if (is_stonith) {
cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
/* If a stonith command is in-flight, just mark it as cancelled;
* it is not safe to finalize/free the cmd until the stonith api
* says it has either completed or timed out.
*/
if (rsc->active != cmd) {
cmd_finalize(cmd, NULL);
}
} else {
/* This command is already handed off to service library,
* let service library cancel it and tell us via the callback
* when it is cancelled. The rsc can be safely destroyed
* even if we are waiting for the cancel result */
services_action_cancel(rsc->rsc_id,
normalize_action_name(rsc, cmd->action),
cmd->interval_ms);
}
gIter = next;
}
/* frees list, but not list elements. */
g_list_free(rsc->recurring_ops);
free(rsc->rsc_id);
free(rsc->class);
free(rsc->provider);
free(rsc->type);
mainloop_destroy_trigger(rsc->work);
free(rsc);
}
static xmlNode *
process_lrmd_signon(crm_client_t *client, xmlNode *request, int call_id)
{
xmlNode *reply = NULL;
int rc = pcmk_ok;
const char *is_ipc_provider = crm_element_value(request, F_LRMD_IS_IPC_PROVIDER);
const char *protocol_version = crm_element_value(request, F_LRMD_PROTOCOL_VERSION);
if (compare_version(protocol_version, LRMD_MIN_PROTOCOL_VERSION) < 0) {
crm_err("Cluster API version must be greater than or equal to %s, not %s",
LRMD_MIN_PROTOCOL_VERSION, protocol_version);
rc = -EPROTO;
}
reply = create_lrmd_reply(__FUNCTION__, rc, call_id);
crm_xml_add(reply, F_LRMD_OPERATION, CRM_OP_REGISTER);
crm_xml_add(reply, F_LRMD_CLIENTID, client->id);
crm_xml_add(reply, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION);
if (crm_is_true(is_ipc_provider)) {
// This is a remote connection from a cluster node's controller
#ifdef SUPPORT_REMOTE
ipc_proxy_add_provider(client);
#endif
}
return reply;
}
static int
process_lrmd_rsc_register(crm_client_t * client, uint32_t id, xmlNode * request)
{
int rc = pcmk_ok;
lrmd_rsc_t *rsc = build_rsc_from_xml(request);
lrmd_rsc_t *dup = g_hash_table_lookup(rsc_list, rsc->rsc_id);
if (dup &&
safe_str_eq(rsc->class, dup->class) &&
safe_str_eq(rsc->provider, dup->provider) && safe_str_eq(rsc->type, dup->type)) {
crm_notice("Ignoring duplicate registration of '%s'", rsc->rsc_id);
free_rsc(rsc);
return rc;
}
g_hash_table_replace(rsc_list, rsc->rsc_id, rsc);
crm_info("Cached agent information for '%s'", rsc->rsc_id);
return rc;
}
static xmlNode *
process_lrmd_get_rsc_info(xmlNode *request, int call_id)
{
int rc = pcmk_ok;
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
xmlNode *reply = NULL;
lrmd_rsc_t *rsc = NULL;
if (rsc_id == NULL) {
rc = -ENODEV;
} else {
rsc = g_hash_table_lookup(rsc_list, rsc_id);
if (rsc == NULL) {
crm_info("Agent information for '%s' not in cache", rsc_id);
rc = -ENODEV;
}
}
reply = create_lrmd_reply(__FUNCTION__, rc, call_id);
if (rsc) {
crm_xml_add(reply, F_LRMD_RSC_ID, rsc->rsc_id);
crm_xml_add(reply, F_LRMD_CLASS, rsc->class);
crm_xml_add(reply, F_LRMD_PROVIDER, rsc->provider);
crm_xml_add(reply, F_LRMD_TYPE, rsc->type);
}
return reply;
}
static int
process_lrmd_rsc_unregister(crm_client_t * client, uint32_t id, xmlNode * request)
{
int rc = pcmk_ok;
lrmd_rsc_t *rsc = NULL;
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
if (!rsc_id) {
return -ENODEV;
}
rsc = g_hash_table_lookup(rsc_list, rsc_id);
if (rsc == NULL) {
crm_info("Ignoring unregistration of resource '%s', which is not registered",
rsc_id);
return pcmk_ok;
}
if (rsc->active) {
/* let the caller know there are still active ops on this rsc to watch for */
crm_trace("Operation (0x%p) still in progress for unregistered resource %s",
rsc->active, rsc_id);
rc = -EINPROGRESS;
}
g_hash_table_remove(rsc_list, rsc_id);
return rc;
}
static int
process_lrmd_rsc_exec(crm_client_t * client, uint32_t id, xmlNode * request)
{
lrmd_rsc_t *rsc = NULL;
lrmd_cmd_t *cmd = NULL;
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
int call_id;
if (!rsc_id) {
return -EINVAL;
}
if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) {
crm_info("Resource '%s' not found (%d active resources)",
rsc_id, g_hash_table_size(rsc_list));
return -ENODEV;
}
cmd = create_lrmd_cmd(request, client);
call_id = cmd->call_id;
/* Don't reference cmd after handing it off to be scheduled.
* The cmd could get merged and freed. */
schedule_lrmd_cmd(rsc, cmd);
return call_id;
}
static int
cancel_op(const char *rsc_id, const char *action, guint interval_ms)
{
GListPtr gIter = NULL;
lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, rsc_id);
/* How to cancel an action.
* 1. Check pending ops list, if it hasn't been handed off
* to the service library or stonith recurring list remove
* it there and that will stop it.
* 2. If it isn't in the pending ops list, then it's either a
* recurring op in the stonith recurring list, or the service
* library's recurring list. Stop it there
* 3. If not found in any lists, then this operation has either
* been executed already and is not a recurring operation, or
* never existed.
*/
if (!rsc) {
return -ENODEV;
}
for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) {
lrmd_cmd_t *cmd = gIter->data;
if (safe_str_eq(cmd->action, action)
&& (cmd->interval_ms == interval_ms)) {
cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
cmd_finalize(cmd, rsc);
return pcmk_ok;
}
}
if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH)) {
/* The service library does not handle stonith operations.
* We have to handle recurring stonith operations ourselves. */
for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) {
lrmd_cmd_t *cmd = gIter->data;
if (safe_str_eq(cmd->action, action)
&& (cmd->interval_ms == interval_ms)) {
cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
if (rsc->active != cmd) {
cmd_finalize(cmd, rsc);
}
return pcmk_ok;
}
}
} else if (services_action_cancel(rsc_id,
normalize_action_name(rsc, action),
interval_ms) == TRUE) {
/* The service library will tell the action_complete callback function
* this action was cancelled, which will destroy the cmd and remove
* it from the recurring_op list. Do not do that in this function
* if the service library says it cancelled it. */
return pcmk_ok;
}
return -EOPNOTSUPP;
}
static void
cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id)
{
GList *cmd_list = NULL;
GList *cmd_iter = NULL;
/* Notice a copy of each list is created when concat is called.
* This prevents odd behavior from occurring when the cmd_list
* is iterated through later on. It is possible the cancel_op
* function may end up modifying the recurring_ops and pending_ops
* lists. If we did not copy those lists, our cmd_list iteration
* could get messed up.*/
if (rsc->recurring_ops) {
cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->recurring_ops));
}
if (rsc->pending_ops) {
cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->pending_ops));
}
if (!cmd_list) {
return;
}
for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) {
lrmd_cmd_t *cmd = cmd_iter->data;
if (cmd->interval_ms == 0) {
continue;
}
if (client_id && safe_str_neq(cmd->client_id, client_id)) {
continue;
}
cancel_op(rsc->rsc_id, cmd->action, cmd->interval_ms);
}
/* frees only the copied list data, not the cmds */
g_list_free(cmd_list);
}
static int
process_lrmd_rsc_cancel(crm_client_t * client, uint32_t id, xmlNode * request)
{
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
const char *action = crm_element_value(rsc_xml, F_LRMD_RSC_ACTION);
guint interval_ms = 0;
crm_element_value_ms(rsc_xml, F_LRMD_RSC_INTERVAL, &interval_ms);
if (!rsc_id || !action) {
return -EINVAL;
}
return cancel_op(rsc_id, action, interval_ms);
}
static void
add_recurring_op_xml(xmlNode *reply, lrmd_rsc_t *rsc)
{
xmlNode *rsc_xml = create_xml_node(reply, F_LRMD_RSC);
crm_xml_add(rsc_xml, F_LRMD_RSC_ID, rsc->rsc_id);
for (GList *item = rsc->recurring_ops; item != NULL; item = item->next) {
lrmd_cmd_t *cmd = item->data;
xmlNode *op_xml = create_xml_node(rsc_xml, T_LRMD_RSC_OP);
crm_xml_add(op_xml, F_LRMD_RSC_ACTION,
(cmd->real_action? cmd->real_action : cmd->action));
crm_xml_add_ms(op_xml, F_LRMD_RSC_INTERVAL, cmd->interval_ms);
crm_xml_add_int(op_xml, F_LRMD_TIMEOUT, cmd->timeout_orig);
}
}
static xmlNode *
process_lrmd_get_recurring(xmlNode *request, int call_id)
{
int rc = pcmk_ok;
const char *rsc_id = NULL;
lrmd_rsc_t *rsc = NULL;
xmlNode *reply = NULL;
xmlNode *rsc_xml = NULL;
// Resource ID is optional
rsc_xml = first_named_child(request, F_LRMD_CALLDATA);
if (rsc_xml) {
rsc_xml = first_named_child(rsc_xml, F_LRMD_RSC);
}
if (rsc_xml) {
rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
}
// If resource ID is specified, resource must exist
if (rsc_id != NULL) {
rsc = g_hash_table_lookup(rsc_list, rsc_id);
if (rsc == NULL) {
crm_info("Resource '%s' not found (%d active resources)",
rsc_id, g_hash_table_size(rsc_list));
rc = -ENODEV;
}
}
reply = create_lrmd_reply(__FUNCTION__, rc, call_id);
// If resource ID is not specified, check all resources
if (rsc_id == NULL) {
GHashTableIter iter;
char *key = NULL;
g_hash_table_iter_init(&iter, rsc_list);
while (g_hash_table_iter_next(&iter, (gpointer *) &key,
(gpointer *) &rsc)) {
add_recurring_op_xml(reply, rsc);
}
} else if (rsc) {
add_recurring_op_xml(reply, rsc);
}
return reply;
}
void
process_lrmd_message(crm_client_t * client, uint32_t id, xmlNode * request)
{
int rc = pcmk_ok;
int call_id = 0;
const char *op = crm_element_value(request, F_LRMD_OPERATION);
int do_reply = 0;
int do_notify = 0;
xmlNode *reply = NULL;
crm_trace("Processing %s operation from %s", op, client->id);
crm_element_value_int(request, F_LRMD_CALLID, &call_id);
if (crm_str_eq(op, CRM_OP_IPC_FWD, TRUE)) {
#ifdef SUPPORT_REMOTE
ipc_proxy_forward_client(client, request);
#endif
do_reply = 1;
} else if (crm_str_eq(op, CRM_OP_REGISTER, TRUE)) {
reply = process_lrmd_signon(client, request, call_id);
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_RSC_REG, TRUE)) {
rc = process_lrmd_rsc_register(client, id, request);
do_notify = 1;
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_RSC_INFO, TRUE)) {
reply = process_lrmd_get_rsc_info(request, call_id);
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_RSC_UNREG, TRUE)) {
rc = process_lrmd_rsc_unregister(client, id, request);
/* don't notify anyone about failed un-registers */
if (rc == pcmk_ok || rc == -EINPROGRESS) {
do_notify = 1;
}
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_RSC_EXEC, TRUE)) {
rc = process_lrmd_rsc_exec(client, id, request);
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_RSC_CANCEL, TRUE)) {
rc = process_lrmd_rsc_cancel(client, id, request);
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_POKE, TRUE)) {
do_notify = 1;
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_CHECK, TRUE)) {
xmlNode *data = get_message_xml(request, F_LRMD_CALLDATA);
const char *timeout = crm_element_value(data, F_LRMD_WATCHDOG);
CRM_LOG_ASSERT(data != NULL);
check_sbd_timeout(timeout);
} else if (crm_str_eq(op, LRMD_OP_ALERT_EXEC, TRUE)) {
rc = process_lrmd_alert_exec(client, id, request);
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_GET_RECURRING, TRUE)) {
reply = process_lrmd_get_recurring(request, call_id);
do_reply = 1;
} else {
rc = -EOPNOTSUPP;
do_reply = 1;
crm_err("Unknown %s from %s", op, client->name);
crm_log_xml_warn(request, "UnknownOp");
}
crm_debug("Processed %s operation from %s: rc=%d, reply=%d, notify=%d",
op, client->id, rc, do_reply, do_notify);
if (do_reply) {
int send_rc = pcmk_ok;
if (reply == NULL) {
reply = create_lrmd_reply(__FUNCTION__, rc, call_id);
}
send_rc = lrmd_server_send_reply(client, id, reply);
free_xml(reply);
if (send_rc < 0) {
crm_warn("Reply to client %s failed: %s " CRM_XS " %d",
client->name, pcmk_strerror(send_rc), send_rc);
}
}
if (do_notify) {
send_generic_notify(rc, request);
}
}
diff --git a/daemons/schedulerd/pacemaker-schedulerd.c b/daemons/schedulerd/pacemaker-schedulerd.c
index 87cf7b0168..e5500fa282 100644
--- a/daemons/schedulerd/pacemaker-schedulerd.c
+++ b/daemons/schedulerd/pacemaker-schedulerd.c
@@ -1,351 +1,351 @@
/*
* Copyright 2004-2019 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#define OPTARGS "hVc"
static GMainLoop *mainloop = NULL;
static qb_ipcs_service_t *ipcs = NULL;
static pe_working_set_t *sched_data_set = NULL;
#define get_series() was_processing_error?1:was_processing_warning?2:3
typedef struct series_s {
const char *name;
const char *param;
int wrap;
} series_t;
series_t series[] = {
{"pe-unknown", "_dont_match_anything_", -1},
{"pe-error", "pe-error-series-max", -1},
{"pe-warn", "pe-warn-series-max", 200},
{"pe-input", "pe-input-series-max", 400},
};
void pengine_shutdown(int nsig);
static gboolean
process_pe_message(xmlNode * msg, xmlNode * xml_data, crm_client_t * sender)
{
static char *last_digest = NULL;
static char *filename = NULL;
- time_t execution_date = time(NULL);
const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO);
const char *op = crm_element_value(msg, F_CRM_TASK);
const char *ref = crm_element_value(msg, F_CRM_REFERENCE);
crm_trace("Processing %s op (ref=%s)...", op, ref);
if (op == NULL) {
/* error */
} else if (strcasecmp(op, CRM_OP_HELLO) == 0) {
/* ignore */
} else if (safe_str_eq(crm_element_value(msg, F_CRM_MSG_TYPE), XML_ATTR_RESPONSE)) {
/* ignore */
} else if (sys_to == NULL || strcasecmp(sys_to, CRM_SYSTEM_PENGINE) != 0) {
crm_trace("Bad sys-to %s", crm_str(sys_to));
return FALSE;
} else if (strcasecmp(op, CRM_OP_PECALC) == 0) {
int seq = -1;
int series_id = 0;
int series_wrap = 0;
char *digest = NULL;
const char *value = NULL;
+ time_t execution_date = time(NULL);
xmlNode *converted = NULL;
xmlNode *reply = NULL;
gboolean is_repoke = FALSE;
gboolean process = TRUE;
crm_config_error = FALSE;
crm_config_warning = FALSE;
was_processing_error = FALSE;
was_processing_warning = FALSE;
if (sched_data_set == NULL) {
sched_data_set = pe_new_working_set();
CRM_ASSERT(sched_data_set != NULL);
}
digest = calculate_xml_versioned_digest(xml_data, FALSE, FALSE, CRM_FEATURE_SET);
converted = copy_xml(xml_data);
if (cli_config_update(&converted, NULL, TRUE) == FALSE) {
sched_data_set->graph = create_xml_node(NULL, XML_TAG_GRAPH);
crm_xml_add_int(sched_data_set->graph, "transition_id", 0);
crm_xml_add_int(sched_data_set->graph, "cluster-delay", 0);
process = FALSE;
free(digest);
} else if (safe_str_eq(digest, last_digest)) {
crm_info("Input has not changed since last time, not saving to disk");
is_repoke = TRUE;
free(digest);
} else {
free(last_digest);
last_digest = digest;
}
if (process) {
pcmk__schedule_actions(sched_data_set, converted, NULL);
}
series_id = get_series();
series_wrap = series[series_id].wrap;
value = pe_pref(sched_data_set->config_hash, series[series_id].param);
if (value != NULL) {
series_wrap = crm_int_helper(value, NULL);
if (errno != 0) {
series_wrap = series[series_id].wrap;
}
} else {
crm_config_warn("No value specified for cluster"
" preference: %s", series[series_id].param);
}
seq = get_last_sequence(PE_STATE_DIR, series[series_id].name);
crm_trace("Series %s: wrap=%d, seq=%d, pref=%s",
series[series_id].name, series_wrap, seq, value);
sched_data_set->input = NULL;
reply = create_reply(msg, sched_data_set->graph);
CRM_ASSERT(reply != NULL);
if (is_repoke == FALSE) {
free(filename);
filename = generate_series_filename(PE_STATE_DIR,
series[series_id].name, seq,
TRUE);
}
crm_xml_add(reply, F_CRM_TGRAPH_INPUT, filename);
crm_xml_add_int(reply, "graph-errors", was_processing_error);
crm_xml_add_int(reply, "graph-warnings", was_processing_warning);
crm_xml_add_int(reply, "config-errors", crm_config_error);
crm_xml_add_int(reply, "config-warnings", crm_config_warning);
if (crm_ipcs_send(sender, 0, reply, crm_ipc_server_event) == FALSE) {
int graph_file_fd = 0;
char *graph_file = NULL;
umask(S_IWGRP | S_IWOTH | S_IROTH);
graph_file = crm_strdup_printf("%s/pengine.graph.XXXXXX",
PE_STATE_DIR);
graph_file_fd = mkstemp(graph_file);
crm_err("Couldn't send transition graph to peer, writing to %s instead",
graph_file);
crm_xml_add(reply, F_CRM_TGRAPH, graph_file);
write_xml_fd(sched_data_set->graph, graph_file, graph_file_fd, FALSE);
free(graph_file);
free_xml(first_named_child(reply, F_CRM_DATA));
CRM_ASSERT(crm_ipcs_send(sender, 0, reply, crm_ipc_server_event));
}
free_xml(reply);
pe_reset_working_set(sched_data_set);
pcmk__log_transition_summary(filename);
if (is_repoke == FALSE && series_wrap != 0) {
unlink(filename);
- crm_xml_add_int(xml_data, "execution-date", execution_date);
+ crm_xml_add_ll(xml_data, "execution-date", (long long) execution_date);
write_xml_file(xml_data, filename, TRUE);
write_last_sequence(PE_STATE_DIR, series[series_id].name, seq + 1, series_wrap);
} else {
crm_trace("Not writing out %s: %d & %d", filename, is_repoke, series_wrap);
}
free_xml(converted);
}
return TRUE;
}
static int32_t
pe_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
crm_trace("Connection %p", c);
if (crm_client_new(c, uid, gid) == NULL) {
return -EIO;
}
return 0;
}
static void
pe_ipc_created(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p", c);
}
gboolean process_pe_message(xmlNode * msg, xmlNode * xml_data, crm_client_t * sender);
static int32_t
pe_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
crm_client_t *c = crm_client_get(qbc);
xmlNode *msg = crm_ipcs_recv(c, data, size, &id, &flags);
crm_ipcs_send_ack(c, id, flags, "ack", __FUNCTION__, __LINE__);
if (msg != NULL) {
xmlNode *data_xml = get_message_xml(msg, F_CRM_DATA);
process_pe_message(msg, data_xml, c);
free_xml(msg);
}
return 0;
}
/* Error code means? */
static int32_t
pe_ipc_closed(qb_ipcs_connection_t * c)
{
crm_client_t *client = crm_client_get(c);
if (client == NULL) {
return 0;
}
crm_trace("Connection %p", c);
crm_client_destroy(client);
return 0;
}
static void
pe_ipc_destroy(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p", c);
pe_ipc_closed(c);
}
struct qb_ipcs_service_handlers ipc_callbacks = {
.connection_accept = pe_ipc_accept,
.connection_created = pe_ipc_created,
.msg_process = pe_ipc_dispatch,
.connection_closed = pe_ipc_closed,
.connection_destroyed = pe_ipc_destroy
};
/* *INDENT-OFF* */
static struct crm_option long_options[] = {
/* Top-level Options */
{"help", 0, 0, '?', "\tThis text"},
{"verbose", 0, 0, 'V', "\tIncrease debug output"},
{0, 0, 0, 0}
};
/* *INDENT-ON* */
int
main(int argc, char **argv)
{
int flag;
int index = 0;
int argerr = 0;
crm_log_preinit(NULL, argc, argv);
crm_set_options(NULL, "[options]",
long_options, "Daemon for calculating the cluster's response to events");
mainloop_add_signal(SIGTERM, pengine_shutdown);
while (1) {
flag = crm_get_option(argc, argv, &index);
if (flag == -1)
break;
switch (flag) {
case 'V':
crm_bump_log_level(argc, argv);
break;
case 'h': /* Help message */
crm_help('?', CRM_EX_OK);
break;
default:
++argerr;
break;
}
}
if (argc - optind == 1 && safe_str_eq("metadata", argv[optind])) {
pe_metadata();
return CRM_EX_OK;
}
if (optind > argc) {
++argerr;
}
if (argerr) {
crm_help('?', CRM_EX_USAGE);
}
crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
crm_notice("Starting Pacemaker scheduler");
if (pcmk__daemon_can_write(PE_STATE_DIR, NULL) == FALSE) {
crm_err("Terminating due to bad permissions on " PE_STATE_DIR);
fprintf(stderr,
"ERROR: Bad permissions on " PE_STATE_DIR " (see logs for details)\n");
fflush(stderr);
return CRM_EX_FATAL;
}
ipcs = mainloop_add_ipc_server(CRM_SYSTEM_PENGINE, QB_IPC_SHM, &ipc_callbacks);
if (ipcs == NULL) {
crm_err("Failed to create IPC server: shutting down and inhibiting respawn");
crm_exit(CRM_EX_FATAL);
}
/* Create the mainloop and run it... */
mainloop = g_main_loop_new(NULL, FALSE);
crm_notice("Pacemaker scheduler successfully started and accepting connections");
g_main_loop_run(mainloop);
pe_free_working_set(sched_data_set);
crm_info("Exiting %s", crm_system_name);
crm_exit(CRM_EX_OK);
}
void
pengine_shutdown(int nsig)
{
mainloop_del_ipc_server(ipcs);
pe_free_working_set(sched_data_set);
crm_exit(CRM_EX_OK);
}
diff --git a/lib/common/ipc.c b/lib/common/ipc.c
index 84d7cb93fa..17ad624e5b 100644
--- a/lib/common/ipc.c
+++ b/lib/common/ipc.c
@@ -1,1605 +1,1605 @@
/*
* Copyright 2004-2019 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include
#if defined(US_AUTH_PEERCRED_UCRED) || defined(US_AUTH_PEERCRED_SOCKPEERCRED)
# ifdef US_AUTH_PEERCRED_UCRED
# ifndef _GNU_SOURCE
# define _GNU_SOURCE
# endif
# endif
# include
#elif defined(US_AUTH_GETPEERUCRED)
# include
#endif
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include /* indirectly: pcmk_err_generic */
#include
#include
#include
#include /* PCMK__SPECIAL_PID* */
#define PCMK_IPC_VERSION 1
/* Evict clients whose event queue grows this large (by default) */
#define PCMK_IPC_DEFAULT_QUEUE_MAX 500
struct crm_ipc_response_header {
struct qb_ipc_response_header qb;
uint32_t size_uncompressed;
uint32_t size_compressed;
uint32_t flags;
uint8_t version; /* Protect against version changes for anyone that might bother to statically link us */
};
static int hdr_offset = 0;
static unsigned int ipc_buffer_max = 0;
static unsigned int pick_ipc_buffer(unsigned int max);
static inline void
crm_ipc_init(void)
{
if (hdr_offset == 0) {
hdr_offset = sizeof(struct crm_ipc_response_header);
}
if (ipc_buffer_max == 0) {
ipc_buffer_max = pick_ipc_buffer(0);
}
}
unsigned int
crm_ipc_default_buffer_size(void)
{
return pick_ipc_buffer(0);
}
static char *
generateReference(const char *custom1, const char *custom2)
{
static uint ref_counter = 0;
- return crm_strdup_printf("%s-%s-%lu-%u",
+ return crm_strdup_printf("%s-%s-%lld-%u",
(custom1? custom1 : "_empty_"),
(custom2? custom2 : "_empty_"),
- (unsigned long)time(NULL), ref_counter++);
+ (long long) time(NULL), ref_counter++);
}
xmlNode *
create_request_adv(const char *task, xmlNode * msg_data,
const char *host_to, const char *sys_to,
const char *sys_from, const char *uuid_from, const char *origin)
{
char *true_from = NULL;
xmlNode *request = NULL;
char *reference = generateReference(task, sys_from);
if (uuid_from != NULL) {
true_from = generate_hash_key(sys_from, uuid_from);
} else if (sys_from != NULL) {
true_from = strdup(sys_from);
} else {
crm_err("No sys from specified");
}
// host_from will get set for us if necessary by the controller when routed
request = create_xml_node(NULL, __FUNCTION__);
crm_xml_add(request, F_CRM_ORIGIN, origin);
crm_xml_add(request, F_TYPE, T_CRM);
crm_xml_add(request, F_CRM_VERSION, CRM_FEATURE_SET);
crm_xml_add(request, F_CRM_MSG_TYPE, XML_ATTR_REQUEST);
crm_xml_add(request, F_CRM_REFERENCE, reference);
crm_xml_add(request, F_CRM_TASK, task);
crm_xml_add(request, F_CRM_SYS_TO, sys_to);
crm_xml_add(request, F_CRM_SYS_FROM, true_from);
/* HOSTTO will be ignored if it is to the DC anyway. */
if (host_to != NULL && strlen(host_to) > 0) {
crm_xml_add(request, F_CRM_HOST_TO, host_to);
}
if (msg_data != NULL) {
add_message_xml(request, F_CRM_DATA, msg_data);
}
free(reference);
free(true_from);
return request;
}
/*
* This method adds a copy of xml_response_data
*/
xmlNode *
create_reply_adv(xmlNode * original_request, xmlNode * xml_response_data, const char *origin)
{
xmlNode *reply = NULL;
const char *host_from = crm_element_value(original_request, F_CRM_HOST_FROM);
const char *sys_from = crm_element_value(original_request, F_CRM_SYS_FROM);
const char *sys_to = crm_element_value(original_request, F_CRM_SYS_TO);
const char *type = crm_element_value(original_request, F_CRM_MSG_TYPE);
const char *operation = crm_element_value(original_request, F_CRM_TASK);
const char *crm_msg_reference = crm_element_value(original_request, F_CRM_REFERENCE);
if (type == NULL) {
crm_err("Cannot create new_message, no message type in original message");
CRM_ASSERT(type != NULL);
return NULL;
#if 0
} else if (strcasecmp(XML_ATTR_REQUEST, type) != 0) {
crm_err("Cannot create new_message, original message was not a request");
return NULL;
#endif
}
reply = create_xml_node(NULL, __FUNCTION__);
if (reply == NULL) {
crm_err("Cannot create new_message, malloc failed");
return NULL;
}
crm_xml_add(reply, F_CRM_ORIGIN, origin);
crm_xml_add(reply, F_TYPE, T_CRM);
crm_xml_add(reply, F_CRM_VERSION, CRM_FEATURE_SET);
crm_xml_add(reply, F_CRM_MSG_TYPE, XML_ATTR_RESPONSE);
crm_xml_add(reply, F_CRM_REFERENCE, crm_msg_reference);
crm_xml_add(reply, F_CRM_TASK, operation);
/* since this is a reply, we reverse the from and to */
crm_xml_add(reply, F_CRM_SYS_TO, sys_from);
crm_xml_add(reply, F_CRM_SYS_FROM, sys_to);
/* HOSTTO will be ignored if it is to the DC anyway. */
if (host_from != NULL && strlen(host_from) > 0) {
crm_xml_add(reply, F_CRM_HOST_TO, host_from);
}
if (xml_response_data != NULL) {
add_message_xml(reply, F_CRM_DATA, xml_response_data);
}
return reply;
}
/* Libqb based IPC */
/* Server... */
GHashTable *client_connections = NULL;
crm_client_t *
crm_client_get(qb_ipcs_connection_t * c)
{
if (client_connections) {
return g_hash_table_lookup(client_connections, c);
}
crm_trace("No client found for %p", c);
return NULL;
}
crm_client_t *
crm_client_get_by_id(const char *id)
{
gpointer key;
crm_client_t *client;
GHashTableIter iter;
if (client_connections && id) {
g_hash_table_iter_init(&iter, client_connections);
while (g_hash_table_iter_next(&iter, &key, (gpointer *) & client)) {
if (strcmp(client->id, id) == 0) {
return client;
}
}
}
crm_trace("No client found with id=%s", id);
return NULL;
}
const char *
crm_client_name(crm_client_t * c)
{
if (c == NULL) {
return "null";
} else if (c->name == NULL && c->id == NULL) {
return "unknown";
} else if (c->name == NULL) {
return c->id;
} else {
return c->name;
}
}
const char *
crm_client_type_text(enum client_type client_type)
{
switch (client_type) {
case CRM_CLIENT_IPC:
return "IPC";
case CRM_CLIENT_TCP:
return "TCP";
#ifdef HAVE_GNUTLS_GNUTLS_H
case CRM_CLIENT_TLS:
return "TLS";
#endif
default:
return "unknown";
}
}
void
crm_client_init(void)
{
if (client_connections == NULL) {
crm_trace("Creating client hash table");
client_connections = g_hash_table_new(g_direct_hash, g_direct_equal);
}
}
void
crm_client_cleanup(void)
{
if (client_connections != NULL) {
int active = g_hash_table_size(client_connections);
if (active) {
crm_err("Exiting with %d active connections", active);
}
g_hash_table_destroy(client_connections); client_connections = NULL;
}
}
void
crm_client_disconnect_all(qb_ipcs_service_t *service)
{
qb_ipcs_connection_t *c = NULL;
if (service == NULL) {
return;
}
c = qb_ipcs_connection_first_get(service);
while (c != NULL) {
qb_ipcs_connection_t *last = c;
c = qb_ipcs_connection_next_get(service, last);
/* There really shouldn't be anyone connected at this point */
crm_notice("Disconnecting client %p, pid=%d...", last, crm_ipcs_client_pid(last));
qb_ipcs_disconnect(last);
qb_ipcs_connection_unref(last);
}
}
/*!
* \internal
* \brief Allocate a new crm_client_t object based on an IPC connection
*
* \param[in] c IPC connection (or NULL to allocate generic client)
* \param[in] key Connection table key (or NULL to use sane default)
* \param[in] uid_client UID corresponding to c (ignored if c is NULL)
*
* \return Pointer to new crm_client_t (or NULL on error)
*/
static crm_client_t *
client_from_connection(qb_ipcs_connection_t *c, void *key, uid_t uid_client)
{
crm_client_t *client = calloc(1, sizeof(crm_client_t));
if (client == NULL) {
crm_perror(LOG_ERR, "Allocating client");
return NULL;
}
if (c) {
#if ENABLE_ACL
client->user = uid2username(uid_client);
if (client->user == NULL) {
client->user = strdup("#unprivileged");
CRM_CHECK(client->user != NULL, free(client); return NULL);
crm_err("Unable to enforce ACLs for user ID %d, assuming unprivileged",
uid_client);
}
#endif
client->ipcs = c;
client->kind = CRM_CLIENT_IPC;
client->pid = crm_ipcs_client_pid(c);
if (key == NULL) {
key = c;
}
}
client->id = crm_generate_uuid();
if (client->id == NULL) {
crm_err("Could not generate UUID for client");
free(client->user);
free(client);
return NULL;
}
if (key == NULL) {
key = client->id;
}
g_hash_table_insert(client_connections, key, client);
return client;
}
/*!
* \brief Allocate a new crm_client_t object and generate its ID
*
* \param[in] key What to use as connections hash table key (NULL to use ID)
*
* \return Pointer to new crm_client_t (asserts on failure)
*/
crm_client_t *
crm_client_alloc(void *key)
{
crm_client_t *client = client_from_connection(NULL, key, 0);
CRM_ASSERT(client != NULL);
return client;
}
crm_client_t *
crm_client_new(qb_ipcs_connection_t * c, uid_t uid_client, gid_t gid_client)
{
static gid_t uid_cluster = 0;
static gid_t gid_cluster = 0;
crm_client_t *client = NULL;
CRM_CHECK(c != NULL, return NULL);
if (uid_cluster == 0) {
if (crm_user_lookup(CRM_DAEMON_USER, &uid_cluster, &gid_cluster) < 0) {
static bool need_log = TRUE;
if (need_log) {
crm_warn("Could not find user and group IDs for user %s",
CRM_DAEMON_USER);
need_log = FALSE;
}
}
}
if (uid_client != 0) {
crm_trace("Giving access to group %u", gid_cluster);
/* Passing -1 to chown(2) means don't change */
qb_ipcs_connection_auth_set(c, -1, gid_cluster, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
}
crm_client_init();
/* TODO: Do our own auth checking, return NULL if unauthorized */
client = client_from_connection(c, NULL, uid_client);
if (client == NULL) {
return NULL;
}
if ((uid_client == 0) || (uid_client == uid_cluster)) {
/* Remember when a connection came from root or hacluster */
set_bit(client->flags, crm_client_flag_ipc_privileged);
}
crm_debug("Connecting %p for uid=%d gid=%d pid=%u id=%s", c, uid_client, gid_client, client->pid, client->id);
return client;
}
static struct iovec *
pcmk__new_ipc_event()
{
struct iovec *iov = calloc(2, sizeof(struct iovec));
CRM_ASSERT(iov != NULL);
return iov;
}
/*!
* \brief Free an I/O vector created by crm_ipc_prepare()
*
* \param[in] event I/O vector to free
*/
void
pcmk_free_ipc_event(struct iovec *event)
{
if (event != NULL) {
free(event[0].iov_base);
free(event[1].iov_base);
free(event);
}
}
static void
free_event(gpointer data)
{
pcmk_free_ipc_event((struct iovec *) data);
}
static void
add_event(crm_client_t *c, struct iovec *iov)
{
if (c->event_queue == NULL) {
c->event_queue = g_queue_new();
}
g_queue_push_tail(c->event_queue, iov);
}
void
crm_client_destroy(crm_client_t * c)
{
if (c == NULL) {
return;
}
if (client_connections) {
if (c->ipcs) {
crm_trace("Destroying %p/%p (%d remaining)",
c, c->ipcs, crm_hash_table_size(client_connections) - 1);
g_hash_table_remove(client_connections, c->ipcs);
} else {
crm_trace("Destroying remote connection %p (%d remaining)",
c, crm_hash_table_size(client_connections) - 1);
g_hash_table_remove(client_connections, c->id);
}
}
if (c->event_timer) {
g_source_remove(c->event_timer);
}
if (c->event_queue) {
crm_debug("Destroying %d events", g_queue_get_length(c->event_queue));
g_queue_free_full(c->event_queue, free_event);
}
free(c->id);
free(c->name);
free(c->user);
if (c->remote) {
if (c->remote->auth_timeout) {
g_source_remove(c->remote->auth_timeout);
}
free(c->remote->buffer);
free(c->remote);
}
free(c);
}
/*!
* \brief Raise IPC eviction threshold for a client, if allowed
*
* \param[in,out] client Client to modify
* \param[in] queue_max New threshold (as string)
*
* \return TRUE if change was allowed, FALSE otherwise
*/
bool
crm_set_client_queue_max(crm_client_t *client, const char *qmax)
{
if (is_set(client->flags, crm_client_flag_ipc_privileged)) {
int qmax_int = crm_int_helper(qmax, NULL);
if ((errno == 0) && (qmax_int > 0)) {
client->queue_max = qmax_int;
return TRUE;
}
}
return FALSE;
}
int
crm_ipcs_client_pid(qb_ipcs_connection_t * c)
{
struct qb_ipcs_connection_stats stats;
stats.client_pid = 0;
qb_ipcs_connection_stats_get(c, &stats, 0);
return stats.client_pid;
}
xmlNode *
crm_ipcs_recv(crm_client_t * c, void *data, size_t size, uint32_t * id, uint32_t * flags)
{
xmlNode *xml = NULL;
char *uncompressed = NULL;
char *text = ((char *)data) + sizeof(struct crm_ipc_response_header);
struct crm_ipc_response_header *header = data;
if (id) {
*id = ((struct qb_ipc_response_header *)data)->id;
}
if (flags) {
*flags = header->flags;
}
if (is_set(header->flags, crm_ipc_proxied)) {
/* Mark this client as being the endpoint of a proxy connection.
* Proxy connections responses are sent on the event channel, to avoid
* blocking the controller serving as proxy.
*/
c->flags |= crm_client_flag_ipc_proxied;
}
if(header->version > PCMK_IPC_VERSION) {
crm_err("Filtering incompatible v%d IPC message, we only support versions <= %d",
header->version, PCMK_IPC_VERSION);
return NULL;
}
if (header->size_compressed) {
int rc = 0;
unsigned int size_u = 1 + header->size_uncompressed;
uncompressed = calloc(1, size_u);
crm_trace("Decompressing message data %u bytes into %u bytes",
header->size_compressed, size_u);
rc = BZ2_bzBuffToBuffDecompress(uncompressed, &size_u, text, header->size_compressed, 1, 0);
text = uncompressed;
if (rc != BZ_OK) {
crm_err("Decompression failed: %s " CRM_XS " bzerror=%d",
bz2_strerror(rc), rc);
free(uncompressed);
return NULL;
}
}
CRM_ASSERT(text[header->size_uncompressed - 1] == 0);
crm_trace("Received %.200s", text);
xml = string2xml(text);
free(uncompressed);
return xml;
}
ssize_t crm_ipcs_flush_events(crm_client_t * c);
static gboolean
crm_ipcs_flush_events_cb(gpointer data)
{
crm_client_t *c = data;
c->event_timer = 0;
crm_ipcs_flush_events(c);
return FALSE;
}
/*!
* \internal
* \brief Add progressive delay before next event queue flush
*
* \param[in,out] c Client connection to add delay to
* \param[in] queue_len Current event queue length
*/
static inline void
delay_next_flush(crm_client_t *c, unsigned int queue_len)
{
/* Delay a maximum of 1.5 seconds */
guint delay = (queue_len < 5)? (1000 + 100 * queue_len) : 1500;
c->event_timer = g_timeout_add(delay, crm_ipcs_flush_events_cb, c);
}
ssize_t
crm_ipcs_flush_events(crm_client_t * c)
{
ssize_t rc = 0;
unsigned int sent = 0;
unsigned int queue_len = 0;
if (c == NULL) {
return pcmk_ok;
} else if (c->event_timer) {
/* There is already a timer, wait until it goes off */
crm_trace("Timer active for %p - %d", c->ipcs, c->event_timer);
return pcmk_ok;
}
if (c->event_queue) {
queue_len = g_queue_get_length(c->event_queue);
}
while (sent < 100) {
struct crm_ipc_response_header *header = NULL;
struct iovec *event = NULL;
if (c->event_queue) {
// We don't pop unless send is successful
event = g_queue_peek_head(c->event_queue);
}
if (event == NULL) { // Queue is empty
break;
}
rc = qb_ipcs_event_sendv(c->ipcs, event, 2);
if (rc < 0) {
break;
}
event = g_queue_pop_head(c->event_queue);
sent++;
header = event[0].iov_base;
if (header->size_compressed) {
crm_trace("Event %d to %p[%d] (%lld compressed bytes) sent",
header->qb.id, c->ipcs, c->pid, (long long) rc);
} else {
crm_trace("Event %d to %p[%d] (%lld bytes) sent: %.120s",
header->qb.id, c->ipcs, c->pid, (long long) rc,
(char *) (event[1].iov_base));
}
pcmk_free_ipc_event(event);
}
queue_len -= sent;
if (sent > 0 || queue_len) {
crm_trace("Sent %d events (%d remaining) for %p[%d]: %s (%lld)",
sent, queue_len, c->ipcs, c->pid,
pcmk_strerror(rc < 0 ? rc : 0), (long long) rc);
}
if (queue_len) {
/* Allow clients to briefly fall behind on processing incoming messages,
* but drop completely unresponsive clients so the connection doesn't
* consume resources indefinitely.
*/
if (queue_len > QB_MAX(c->queue_max, PCMK_IPC_DEFAULT_QUEUE_MAX)) {
if ((c->queue_backlog <= 1) || (queue_len < c->queue_backlog)) {
/* Don't evict for a new or shrinking backlog */
crm_warn("Client with process ID %u has a backlog of %u messages "
CRM_XS " %p", c->pid, queue_len, c->ipcs);
} else {
crm_err("Evicting client with process ID %u due to backlog of %u messages "
CRM_XS " %p", c->pid, queue_len, c->ipcs);
c->queue_backlog = 0;
qb_ipcs_disconnect(c->ipcs);
return rc;
}
}
c->queue_backlog = queue_len;
delay_next_flush(c, queue_len);
} else {
/* Event queue is empty, there is no backlog */
c->queue_backlog = 0;
}
return rc;
}
ssize_t
crm_ipc_prepare(uint32_t request, xmlNode * message, struct iovec ** result, uint32_t max_send_size)
{
static unsigned int biggest = 0;
struct iovec *iov;
unsigned int total = 0;
char *compressed = NULL;
char *buffer = dump_xml_unformatted(message);
struct crm_ipc_response_header *header = calloc(1, sizeof(struct crm_ipc_response_header));
CRM_ASSERT(result != NULL);
crm_ipc_init();
if (max_send_size == 0) {
max_send_size = ipc_buffer_max;
}
CRM_LOG_ASSERT(max_send_size != 0);
*result = NULL;
iov = pcmk__new_ipc_event();
iov[0].iov_len = hdr_offset;
iov[0].iov_base = header;
header->version = PCMK_IPC_VERSION;
header->size_uncompressed = 1 + strlen(buffer);
total = iov[0].iov_len + header->size_uncompressed;
if (total < max_send_size) {
iov[1].iov_base = buffer;
iov[1].iov_len = header->size_uncompressed;
} else {
unsigned int new_size = 0;
if (crm_compress_string
(buffer, header->size_uncompressed, max_send_size, &compressed, &new_size)) {
header->flags |= crm_ipc_compressed;
header->size_compressed = new_size;
iov[1].iov_len = header->size_compressed;
iov[1].iov_base = compressed;
free(buffer);
biggest = QB_MAX(header->size_compressed, biggest);
} else {
ssize_t rc = -EMSGSIZE;
crm_log_xml_trace(message, "EMSGSIZE");
biggest = QB_MAX(header->size_uncompressed, biggest);
crm_err
("Could not compress the message (%u bytes) into less than the configured ipc limit (%u bytes). "
"Set PCMK_ipc_buffer to a higher value (%u bytes suggested)",
header->size_uncompressed, max_send_size, 4 * biggest);
free(compressed);
pcmk_free_ipc_event(iov);
return rc;
}
}
header->qb.size = iov[0].iov_len + iov[1].iov_len;
header->qb.id = (int32_t)request; /* Replying to a specific request */
*result = iov;
CRM_ASSERT(header->qb.size > 0);
return header->qb.size;
}
ssize_t
crm_ipcs_sendv(crm_client_t * c, struct iovec * iov, enum crm_ipc_flags flags)
{
ssize_t rc;
static uint32_t id = 1;
struct crm_ipc_response_header *header = iov[0].iov_base;
if (c->flags & crm_client_flag_ipc_proxied) {
/* _ALL_ replies to proxied connections need to be sent as events */
if (is_not_set(flags, crm_ipc_server_event)) {
flags |= crm_ipc_server_event;
/* this flag lets us know this was originally meant to be a response.
* even though we're sending it over the event channel. */
flags |= crm_ipc_proxied_relay_response;
}
}
header->flags |= flags;
if (flags & crm_ipc_server_event) {
header->qb.id = id++; /* We don't really use it, but doesn't hurt to set one */
if (flags & crm_ipc_server_free) {
crm_trace("Sending the original to %p[%d]", c->ipcs, c->pid);
add_event(c, iov);
} else {
struct iovec *iov_copy = pcmk__new_ipc_event();
crm_trace("Sending a copy to %p[%d]", c->ipcs, c->pid);
iov_copy[0].iov_len = iov[0].iov_len;
iov_copy[0].iov_base = malloc(iov[0].iov_len);
memcpy(iov_copy[0].iov_base, iov[0].iov_base, iov[0].iov_len);
iov_copy[1].iov_len = iov[1].iov_len;
iov_copy[1].iov_base = malloc(iov[1].iov_len);
memcpy(iov_copy[1].iov_base, iov[1].iov_base, iov[1].iov_len);
add_event(c, iov_copy);
}
} else {
CRM_LOG_ASSERT(header->qb.id != 0); /* Replying to a specific request */
rc = qb_ipcs_response_sendv(c->ipcs, iov, 2);
if (rc < header->qb.size) {
crm_notice("Response %d to pid %d failed: %s "
CRM_XS " bytes=%u rc=%lld ipcs=%p",
header->qb.id, c->pid, pcmk_strerror(rc),
header->qb.size, (long long) rc, c->ipcs);
} else {
crm_trace("Response %d sent, %lld bytes to %p[%d]",
header->qb.id, (long long) rc, c->ipcs, c->pid);
}
if (flags & crm_ipc_server_free) {
pcmk_free_ipc_event(iov);
}
}
if (flags & crm_ipc_server_event) {
rc = crm_ipcs_flush_events(c);
} else {
crm_ipcs_flush_events(c);
}
if (rc == -EPIPE || rc == -ENOTCONN) {
crm_trace("Client %p disconnected", c->ipcs);
}
return rc;
}
ssize_t
crm_ipcs_send(crm_client_t * c, uint32_t request, xmlNode * message,
enum crm_ipc_flags flags)
{
struct iovec *iov = NULL;
ssize_t rc = 0;
if(c == NULL) {
return -EDESTADDRREQ;
}
crm_ipc_init();
rc = crm_ipc_prepare(request, message, &iov, ipc_buffer_max);
if (rc > 0) {
rc = crm_ipcs_sendv(c, iov, flags | crm_ipc_server_free);
} else {
pcmk_free_ipc_event(iov);
crm_notice("Message to pid %d failed: %s " CRM_XS " rc=%lld ipcs=%p",
c->pid, pcmk_strerror(rc), (long long) rc, c->ipcs);
}
return rc;
}
void
crm_ipcs_send_ack(crm_client_t * c, uint32_t request, uint32_t flags, const char *tag, const char *function,
int line)
{
if (flags & crm_ipc_client_response) {
xmlNode *ack = create_xml_node(NULL, tag);
crm_trace("Ack'ing msg from %s (%p)", crm_client_name(c), c);
c->request_id = 0;
crm_xml_add(ack, "function", function);
crm_xml_add_int(ack, "line", line);
crm_ipcs_send(c, request, ack, flags);
free_xml(ack);
}
}
/* Client... */
#define MIN_MSG_SIZE 12336 /* sizeof(struct qb_ipc_connection_response) */
#define MAX_MSG_SIZE 128*1024 /* 128k default */
struct crm_ipc_s {
struct pollfd pfd;
/* the max size we can send/receive over ipc */
unsigned int max_buf_size;
/* Size of the allocated 'buffer' */
unsigned int buf_size;
int msg_size;
int need_reply;
char *buffer;
char *name;
qb_ipcc_connection_t *ipc;
};
static unsigned int
pick_ipc_buffer(unsigned int max)
{
static unsigned int global_max = 0;
if (global_max == 0) {
const char *env = getenv("PCMK_ipc_buffer");
if (env) {
int env_max = crm_parse_int(env, "0");
global_max = (env_max > 0)? QB_MAX(MIN_MSG_SIZE, env_max) : MAX_MSG_SIZE;
} else {
global_max = MAX_MSG_SIZE;
}
}
return QB_MAX(max, global_max);
}
crm_ipc_t *
crm_ipc_new(const char *name, size_t max_size)
{
crm_ipc_t *client = NULL;
client = calloc(1, sizeof(crm_ipc_t));
client->name = strdup(name);
client->buf_size = pick_ipc_buffer(max_size);
client->buffer = malloc(client->buf_size);
/* Clients initiating connection pick the max buf size */
client->max_buf_size = client->buf_size;
client->pfd.fd = -1;
client->pfd.events = POLLIN;
client->pfd.revents = 0;
return client;
}
/*!
* \brief Establish an IPC connection to a Pacemaker component
*
* \param[in] client Connection instance obtained from crm_ipc_new()
*
* \return TRUE on success, FALSE otherwise (in which case errno will be set;
* specifically, in case of discovering the remote side is not
* authentic, its value is set to ECONNABORTED).
*/
bool
crm_ipc_connect(crm_ipc_t * client)
{
static uid_t cl_uid = 0;
static gid_t cl_gid = 0;
pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0;
int rv;
client->need_reply = FALSE;
client->ipc = qb_ipcc_connect(client->name, client->buf_size);
if (client->ipc == NULL) {
crm_debug("Could not establish %s connection: %s (%d)", client->name, pcmk_strerror(errno), errno);
return FALSE;
}
client->pfd.fd = crm_ipc_get_fd(client);
if (client->pfd.fd < 0) {
rv = errno;
/* message already omitted */
crm_ipc_close(client);
errno = rv;
return FALSE;
}
if (!cl_uid && !cl_gid
&& (rv = crm_user_lookup(CRM_DAEMON_USER, &cl_uid, &cl_gid)) < 0) {
errno = -rv;
/* message already omitted */
crm_ipc_close(client);
errno = -rv;
return FALSE;
}
if (!(rv = crm_ipc_is_authentic_process(client->pfd.fd, cl_uid, cl_gid,
&found_pid, &found_uid,
&found_gid))) {
crm_err("Daemon (IPC %s) is not authentic:"
" process %lld (uid: %lld, gid: %lld)",
client->name, (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
(long long) found_uid, (long long) found_gid);
crm_ipc_close(client);
errno = ECONNABORTED;
return FALSE;
} else if (rv < 0) {
errno = -rv;
crm_perror(LOG_ERR, "Could not verify authenticity of daemon (IPC %s)",
client->name);
crm_ipc_close(client);
errno = -rv;
return FALSE;
}
qb_ipcc_context_set(client->ipc, client);
#ifdef HAVE_IPCS_GET_BUFFER_SIZE
client->max_buf_size = qb_ipcc_get_buffer_size(client->ipc);
if (client->max_buf_size > client->buf_size) {
free(client->buffer);
client->buffer = calloc(1, client->max_buf_size);
client->buf_size = client->max_buf_size;
}
#endif
return TRUE;
}
void
crm_ipc_close(crm_ipc_t * client)
{
if (client) {
crm_trace("Disconnecting %s IPC connection %p (%p)", client->name, client, client->ipc);
if (client->ipc) {
qb_ipcc_connection_t *ipc = client->ipc;
client->ipc = NULL;
qb_ipcc_disconnect(ipc);
}
}
}
void
crm_ipc_destroy(crm_ipc_t * client)
{
if (client) {
if (client->ipc && qb_ipcc_is_connected(client->ipc)) {
crm_notice("Destroying an active IPC connection to %s", client->name);
/* The next line is basically unsafe
*
* If this connection was attached to mainloop and mainloop is active,
* the 'disconnected' callback will end up back here and we'll end
* up free'ing the memory twice - something that can still happen
* even without this if we destroy a connection and it closes before
* we call exit
*/
/* crm_ipc_close(client); */
}
crm_trace("Destroying IPC connection to %s: %p", client->name, client);
free(client->buffer);
free(client->name);
free(client);
}
}
int
crm_ipc_get_fd(crm_ipc_t * client)
{
int fd = 0;
if (client && client->ipc && (qb_ipcc_fd_get(client->ipc, &fd) == 0)) {
return fd;
}
errno = EINVAL;
crm_perror(LOG_ERR, "Could not obtain file IPC descriptor for %s",
(client? client->name : "unspecified client"));
return -errno;
}
bool
crm_ipc_connected(crm_ipc_t * client)
{
bool rc = FALSE;
if (client == NULL) {
crm_trace("No client");
return FALSE;
} else if (client->ipc == NULL) {
crm_trace("No connection");
return FALSE;
} else if (client->pfd.fd < 0) {
crm_trace("Bad descriptor");
return FALSE;
}
rc = qb_ipcc_is_connected(client->ipc);
if (rc == FALSE) {
client->pfd.fd = -EINVAL;
}
return rc;
}
/*!
* \brief Check whether an IPC connection is ready to be read
*
* \param[in] client Connection to check
*
* \return Positive value if ready to be read, 0 if not ready, -errno on error
*/
int
crm_ipc_ready(crm_ipc_t *client)
{
int rc;
CRM_ASSERT(client != NULL);
if (crm_ipc_connected(client) == FALSE) {
return -ENOTCONN;
}
client->pfd.revents = 0;
rc = poll(&(client->pfd), 1, 0);
return (rc < 0)? -errno : rc;
}
static int
crm_ipc_decompress(crm_ipc_t * client)
{
struct crm_ipc_response_header *header = (struct crm_ipc_response_header *)(void*)client->buffer;
if (header->size_compressed) {
int rc = 0;
unsigned int size_u = 1 + header->size_uncompressed;
/* never let buf size fall below our max size required for ipc reads. */
unsigned int new_buf_size = QB_MAX((hdr_offset + size_u), client->max_buf_size);
char *uncompressed = calloc(1, new_buf_size);
crm_trace("Decompressing message data %u bytes into %u bytes",
header->size_compressed, size_u);
rc = BZ2_bzBuffToBuffDecompress(uncompressed + hdr_offset, &size_u,
client->buffer + hdr_offset, header->size_compressed, 1, 0);
if (rc != BZ_OK) {
crm_err("Decompression failed: %s " CRM_XS " bzerror=%d",
bz2_strerror(rc), rc);
free(uncompressed);
return -EILSEQ;
}
/*
* This assert no longer holds true. For an identical msg, some clients may
* require compression, and others may not. If that same msg (event) is sent
* to multiple clients, it could result in some clients receiving a compressed
* msg even though compression was not explicitly required for them.
*
* CRM_ASSERT((header->size_uncompressed + hdr_offset) >= ipc_buffer_max);
*/
CRM_ASSERT(size_u == header->size_uncompressed);
memcpy(uncompressed, client->buffer, hdr_offset); /* Preserve the header */
header = (struct crm_ipc_response_header *)(void*)uncompressed;
free(client->buffer);
client->buf_size = new_buf_size;
client->buffer = uncompressed;
}
CRM_ASSERT(client->buffer[hdr_offset + header->size_uncompressed - 1] == 0);
return pcmk_ok;
}
long
crm_ipc_read(crm_ipc_t * client)
{
struct crm_ipc_response_header *header = NULL;
CRM_ASSERT(client != NULL);
CRM_ASSERT(client->ipc != NULL);
CRM_ASSERT(client->buffer != NULL);
crm_ipc_init();
client->buffer[0] = 0;
client->msg_size = qb_ipcc_event_recv(client->ipc, client->buffer,
client->buf_size, 0);
if (client->msg_size >= 0) {
int rc = crm_ipc_decompress(client);
if (rc != pcmk_ok) {
return rc;
}
header = (struct crm_ipc_response_header *)(void*)client->buffer;
if(header->version > PCMK_IPC_VERSION) {
crm_err("Filtering incompatible v%d IPC message, we only support versions <= %d",
header->version, PCMK_IPC_VERSION);
return -EBADMSG;
}
crm_trace("Received %s event %d, size=%u, rc=%d, text: %.100s",
client->name, header->qb.id, header->qb.size, client->msg_size,
client->buffer + hdr_offset);
} else {
crm_trace("No message from %s received: %s", client->name, pcmk_strerror(client->msg_size));
}
if (crm_ipc_connected(client) == FALSE || client->msg_size == -ENOTCONN) {
crm_err("Connection to %s failed", client->name);
}
if (header) {
/* Data excluding the header */
return header->size_uncompressed;
}
return -ENOMSG;
}
const char *
crm_ipc_buffer(crm_ipc_t * client)
{
CRM_ASSERT(client != NULL);
return client->buffer + sizeof(struct crm_ipc_response_header);
}
uint32_t
crm_ipc_buffer_flags(crm_ipc_t * client)
{
struct crm_ipc_response_header *header = NULL;
CRM_ASSERT(client != NULL);
if (client->buffer == NULL) {
return 0;
}
header = (struct crm_ipc_response_header *)(void*)client->buffer;
return header->flags;
}
const char *
crm_ipc_name(crm_ipc_t * client)
{
CRM_ASSERT(client != NULL);
return client->name;
}
static int
internal_ipc_send_recv(crm_ipc_t * client, const void *iov)
{
int rc = 0;
do {
rc = qb_ipcc_sendv_recv(client->ipc, iov, 2, client->buffer, client->buf_size, -1);
} while (rc == -EAGAIN && crm_ipc_connected(client));
return rc;
}
static int
internal_ipc_send_request(crm_ipc_t * client, const void *iov, int ms_timeout)
{
int rc = 0;
time_t timeout = time(NULL) + 1 + (ms_timeout / 1000);
do {
rc = qb_ipcc_sendv(client->ipc, iov, 2);
} while (rc == -EAGAIN && time(NULL) < timeout && crm_ipc_connected(client));
return rc;
}
static int
internal_ipc_get_reply(crm_ipc_t * client, int request_id, int ms_timeout)
{
time_t timeout = time(NULL) + 1 + (ms_timeout / 1000);
int rc = 0;
crm_ipc_init();
/* get the reply */
crm_trace("client %s waiting on reply to msg id %d", client->name, request_id);
do {
rc = qb_ipcc_recv(client->ipc, client->buffer, client->buf_size, 1000);
if (rc > 0) {
struct crm_ipc_response_header *hdr = NULL;
int rc = crm_ipc_decompress(client);
if (rc != pcmk_ok) {
return rc;
}
hdr = (struct crm_ipc_response_header *)(void*)client->buffer;
if (hdr->qb.id == request_id) {
/* Got it */
break;
} else if (hdr->qb.id < request_id) {
xmlNode *bad = string2xml(crm_ipc_buffer(client));
crm_err("Discarding old reply %d (need %d)", hdr->qb.id, request_id);
crm_log_xml_notice(bad, "OldIpcReply");
} else {
xmlNode *bad = string2xml(crm_ipc_buffer(client));
crm_err("Discarding newer reply %d (need %d)", hdr->qb.id, request_id);
crm_log_xml_notice(bad, "ImpossibleReply");
CRM_ASSERT(hdr->qb.id <= request_id);
}
} else if (crm_ipc_connected(client) == FALSE) {
crm_err("Server disconnected client %s while waiting for msg id %d", client->name,
request_id);
break;
}
} while (time(NULL) < timeout);
return rc;
}
int
crm_ipc_send(crm_ipc_t * client, xmlNode * message, enum crm_ipc_flags flags, int32_t ms_timeout,
xmlNode ** reply)
{
long rc = 0;
struct iovec *iov;
static uint32_t id = 0;
static int factor = 8;
struct crm_ipc_response_header *header;
crm_ipc_init();
if (client == NULL) {
crm_notice("Invalid connection");
return -ENOTCONN;
} else if (crm_ipc_connected(client) == FALSE) {
/* Don't even bother */
crm_notice("Connection to %s closed", client->name);
return -ENOTCONN;
}
if (ms_timeout == 0) {
ms_timeout = 5000;
}
if (client->need_reply) {
crm_trace("Trying again to obtain pending reply from %s", client->name);
rc = qb_ipcc_recv(client->ipc, client->buffer, client->buf_size, ms_timeout);
if (rc < 0) {
crm_warn("Sending to %s (%p) is disabled until pending reply is received", client->name,
client->ipc);
return -EALREADY;
} else {
crm_notice("Lost reply from %s (%p) finally arrived, sending re-enabled", client->name,
client->ipc);
client->need_reply = FALSE;
}
}
id++;
CRM_LOG_ASSERT(id != 0); /* Crude wrap-around detection */
rc = crm_ipc_prepare(id, message, &iov, client->max_buf_size);
if(rc < 0) {
return rc;
}
header = iov[0].iov_base;
header->flags |= flags;
if(is_set(flags, crm_ipc_proxied)) {
/* Don't look for a synchronous response */
clear_bit(flags, crm_ipc_client_response);
}
if(header->size_compressed) {
if(factor < 10 && (client->max_buf_size / 10) < (rc / factor)) {
crm_notice("Compressed message exceeds %d0%% of the configured ipc limit (%u bytes), "
"consider setting PCMK_ipc_buffer to %u or higher",
factor, client->max_buf_size, 2 * client->max_buf_size);
factor++;
}
}
crm_trace("Sending from client: %s request id: %d bytes: %u timeout:%d msg...",
client->name, header->qb.id, header->qb.size, ms_timeout);
if (ms_timeout > 0 || is_not_set(flags, crm_ipc_client_response)) {
rc = internal_ipc_send_request(client, iov, ms_timeout);
if (rc <= 0) {
crm_trace("Failed to send from client %s request %d with %u bytes...",
client->name, header->qb.id, header->qb.size);
goto send_cleanup;
} else if (is_not_set(flags, crm_ipc_client_response)) {
crm_trace("Message sent, not waiting for reply to %d from %s to %u bytes...",
header->qb.id, client->name, header->qb.size);
goto send_cleanup;
}
rc = internal_ipc_get_reply(client, header->qb.id, ms_timeout);
if (rc < 0) {
/* No reply, for now, disable sending
*
* The alternative is to close the connection since we don't know
* how to detect and discard out-of-sequence replies
*
* TODO - implement the above
*/
client->need_reply = TRUE;
}
} else {
rc = internal_ipc_send_recv(client, iov);
}
if (rc > 0) {
struct crm_ipc_response_header *hdr = (struct crm_ipc_response_header *)(void*)client->buffer;
crm_trace("Received response %d, size=%u, rc=%ld, text: %.200s", hdr->qb.id, hdr->qb.size,
rc, crm_ipc_buffer(client));
if (reply) {
*reply = string2xml(crm_ipc_buffer(client));
}
} else {
crm_trace("Response not received: rc=%ld, errno=%d", rc, errno);
}
send_cleanup:
if (crm_ipc_connected(client) == FALSE) {
crm_notice("Connection to %s closed: %s (%ld)", client->name, pcmk_strerror(rc), rc);
} else if (rc == -ETIMEDOUT) {
crm_warn("Request %d to %s (%p) failed: %s (%ld) after %dms",
header->qb.id, client->name, client->ipc, pcmk_strerror(rc), rc, ms_timeout);
crm_write_blackbox(0, NULL);
} else if (rc <= 0) {
crm_warn("Request %d to %s (%p) failed: %s (%ld)",
header->qb.id, client->name, client->ipc, pcmk_strerror(rc), rc);
}
pcmk_free_ipc_event(iov);
return rc;
}
int
crm_ipc_is_authentic_process(int sock, uid_t refuid, gid_t refgid,
pid_t *gotpid, uid_t *gotuid, gid_t *gotgid) {
int ret = 0;
pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0;
#if defined(US_AUTH_PEERCRED_UCRED)
struct ucred ucred;
socklen_t ucred_len = sizeof(ucred);
if (!getsockopt(sock, SOL_SOCKET, SO_PEERCRED,
&ucred, &ucred_len)
&& ucred_len == sizeof(ucred)) {
found_pid = ucred.pid; found_uid = ucred.uid; found_gid = ucred.gid;
#elif defined(US_AUTH_PEERCRED_SOCKPEERCRED)
struct sockpeercred sockpeercred;
socklen_t sockpeercred_len = sizeof(sockpeercred);
if (!getsockopt(sock, SOL_SOCKET, SO_PEERCRED,
&sockpeercred, &sockpeercred_len)
&& sockpeercred_len == sizeof(sockpeercred_len)) {
found_pid = sockpeercred.pid;
found_uid = sockpeercred.uid; found_gid = sockpeercred.gid;
#elif defined(US_AUTH_GETPEEREID)
if (!getpeereid(sock, &found_uid, &found_gid)) {
found_pid = PCMK__SPECIAL_PID; /* cannot obtain PID (FreeBSD) */
#elif defined(US_AUTH_GETPEERUCRED)
ucred_t *ucred;
if (!getpeerucred(sock, &ucred)) {
errno = 0;
found_pid = ucred_getpid(ucred);
found_uid = ucred_geteuid(ucred); found_gid = ucred_getegid(ucred);
ret = -errno;
ucred_free(ucred);
if (ret) {
return (ret < 0) ? ret : -pcmk_err_generic;
}
#else
# error "No way to authenticate a Unix socket peer"
errno = 0;
if (0) {
#endif
if (gotpid != NULL) {
*gotpid = found_pid;
}
if (gotuid != NULL) {
*gotuid = found_uid;
}
if (gotgid != NULL) {
*gotgid = found_gid;
}
ret = (found_uid == 0 || found_uid == refuid || found_gid == refgid);
} else {
ret = (errno > 0) ? -errno : -pcmk_err_generic;
}
return ret;
}
int
pcmk__ipc_is_authentic_process_active(const char *name, uid_t refuid,
gid_t refgid, pid_t *gotpid) {
static char last_asked_name[PATH_MAX / 2] = ""; /* log spam prevention */
int fd, ret = 0;
pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0;
qb_ipcc_connection_t *c;
if ((c = qb_ipcc_connect(name, 0)) == NULL) {
crm_info("Could not connect to %s IPC: %s", name, strerror(errno));
} else if ((ret = qb_ipcc_fd_get(c, &fd))) {
crm_err("Could not get fd from %s IPC: %s (%d)", name,
strerror(-ret), -ret);
ret = -1;
} else if ((ret = crm_ipc_is_authentic_process(fd, refuid, refgid,
&found_pid, &found_uid,
&found_gid)) < 0) {
if (ret == -pcmk_err_generic) {
crm_err("Could not get peer credentials from %s IPC", name);
} else {
crm_err("Could not get peer credentials from %s IPC: %s (%d)",
name, strerror(-ret), -ret);
}
ret = -1;
} else {
if (gotpid != NULL) {
*gotpid = found_pid;
}
if (!ret) {
crm_err("Daemon (IPC %s) effectively blocked with unauthorized"
" process %lld (uid: %lld, gid: %lld)",
name, (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
(long long) found_uid, (long long) found_gid);
ret = -2;
} else if ((found_uid != refuid || found_gid != refgid)
&& strncmp(last_asked_name, name, sizeof(last_asked_name))) {
if (!found_uid && refuid) {
crm_warn("Daemon (IPC %s) runs as root, whereas the expected"
" credentials are %lld:%lld, hazard of violating"
" the least privilege principle",
name, (long long) refuid, (long long) refgid);
} else {
crm_notice("Daemon (IPC %s) runs as %lld:%lld, whereas the"
" expected credentials are %lld:%lld, which may"
" mean a different set of privileges than expected",
name, (long long) found_uid, (long long) found_gid,
(long long) refuid, (long long) refgid);
}
memccpy(last_asked_name, name, '\0', sizeof(last_asked_name));
}
}
if (ret) { /* here, !ret only when we could not initially connect */
qb_ipcc_disconnect(c);
}
return ret;
}
/* Utils */
xmlNode *
create_hello_message(const char *uuid,
const char *client_name, const char *major_version, const char *minor_version)
{
xmlNode *hello_node = NULL;
xmlNode *hello = NULL;
if (uuid == NULL || strlen(uuid) == 0
|| client_name == NULL || strlen(client_name) == 0
|| major_version == NULL || strlen(major_version) == 0
|| minor_version == NULL || strlen(minor_version) == 0) {
crm_err("Missing fields, Hello message will not be valid.");
return NULL;
}
hello_node = create_xml_node(NULL, XML_TAG_OPTIONS);
crm_xml_add(hello_node, "major_version", major_version);
crm_xml_add(hello_node, "minor_version", minor_version);
crm_xml_add(hello_node, "client_name", client_name);
crm_xml_add(hello_node, "client_uuid", uuid);
crm_trace("creating hello message");
hello = create_request(CRM_OP_HELLO, hello_node, NULL, NULL, client_name, uuid);
free_xml(hello_node);
return hello;
}
diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c
index db13297943..2469c52c54 100644
--- a/lib/lrmd/lrmd_client.c
+++ b/lib/lrmd/lrmd_client.c
@@ -1,2034 +1,2040 @@
/*
* Copyright 2012-2018 David Vossel
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#ifdef HAVE_GNUTLS_GNUTLS_H
# undef KEYFILE
# include
#endif
#include
#include
#include
#include
#include
#define MAX_TLS_RECV_WAIT 10000
CRM_TRACE_INIT_DATA(lrmd);
static int lrmd_api_disconnect(lrmd_t * lrmd);
static int lrmd_api_is_connected(lrmd_t * lrmd);
/* IPC proxy functions */
int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg);
static void lrmd_internal_proxy_dispatch(lrmd_t *lrmd, xmlNode *msg);
void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg));
#ifdef HAVE_GNUTLS_GNUTLS_H
# define LRMD_CLIENT_HANDSHAKE_TIMEOUT 5000 /* 5 seconds */
gnutls_psk_client_credentials_t psk_cred_s;
int lrmd_tls_set_key(gnutls_datum_t * key);
static void lrmd_tls_disconnect(lrmd_t * lrmd);
static int global_remote_msg_id = 0;
int lrmd_tls_send_msg(crm_remote_t * session, xmlNode * msg, uint32_t id, const char *msg_type);
static void lrmd_tls_connection_destroy(gpointer userdata);
#endif
typedef struct lrmd_private_s {
enum client_type type;
char *token;
mainloop_io_t *source;
/* IPC parameters */
crm_ipc_t *ipc;
crm_remote_t *remote;
/* Extra TLS parameters */
char *remote_nodename;
#ifdef HAVE_GNUTLS_GNUTLS_H
char *server;
int port;
gnutls_psk_client_credentials_t psk_cred_c;
/* while the async connection is occurring, this is the id
* of the connection timeout timer. */
int async_timer;
int sock;
/* since tls requires a round trip across the network for a
* request/reply, there are times where we just want to be able
* to send a request from the client and not wait around (or even care
* about) what the reply is. */
int expected_late_replies;
GList *pending_notify;
crm_trigger_t *process_notify;
#endif
lrmd_event_callback callback;
/* Internal IPC proxy msg passing for remote guests */
void (*proxy_callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg);
void *proxy_callback_userdata;
char *peer_version;
} lrmd_private_t;
static lrmd_list_t *
lrmd_list_add(lrmd_list_t * head, const char *value)
{
lrmd_list_t *p, *end;
p = calloc(1, sizeof(lrmd_list_t));
p->val = strdup(value);
end = head;
while (end && end->next) {
end = end->next;
}
if (end) {
end->next = p;
} else {
head = p;
}
return head;
}
void
lrmd_list_freeall(lrmd_list_t * head)
{
lrmd_list_t *p;
while (head) {
char *val = (char *)head->val;
p = head->next;
free(val);
free(head);
head = p;
}
}
lrmd_key_value_t *
lrmd_key_value_add(lrmd_key_value_t * head, const char *key, const char *value)
{
lrmd_key_value_t *p, *end;
p = calloc(1, sizeof(lrmd_key_value_t));
p->key = strdup(key);
p->value = strdup(value);
end = head;
while (end && end->next) {
end = end->next;
}
if (end) {
end->next = p;
} else {
head = p;
}
return head;
}
void
lrmd_key_value_freeall(lrmd_key_value_t * head)
{
lrmd_key_value_t *p;
while (head) {
p = head->next;
free(head->key);
free(head->value);
free(head);
head = p;
}
}
lrmd_event_data_t *
lrmd_copy_event(lrmd_event_data_t * event)
{
lrmd_event_data_t *copy = NULL;
copy = calloc(1, sizeof(lrmd_event_data_t));
/* This will get all the int values.
* we just have to be careful not to leave any
* dangling pointers to strings. */
memcpy(copy, event, sizeof(lrmd_event_data_t));
copy->rsc_id = event->rsc_id ? strdup(event->rsc_id) : NULL;
copy->op_type = event->op_type ? strdup(event->op_type) : NULL;
copy->user_data = event->user_data ? strdup(event->user_data) : NULL;
copy->output = event->output ? strdup(event->output) : NULL;
copy->exit_reason = event->exit_reason ? strdup(event->exit_reason) : NULL;
copy->remote_nodename = event->remote_nodename ? strdup(event->remote_nodename) : NULL;
copy->params = crm_str_table_dup(event->params);
return copy;
}
void
lrmd_free_event(lrmd_event_data_t * event)
{
if (!event) {
return;
}
/* free gives me grief if i try to cast */
free((char *)event->rsc_id);
free((char *)event->op_type);
free((char *)event->user_data);
free((char *)event->output);
free((char *)event->exit_reason);
free((char *)event->remote_nodename);
if (event->params) {
g_hash_table_destroy(event->params);
}
free(event);
}
static int
lrmd_dispatch_internal(lrmd_t * lrmd, xmlNode * msg)
{
const char *type;
const char *proxy_session = crm_element_value(msg, F_LRMD_IPC_SESSION);
lrmd_private_t *native = lrmd->lrmd_private;
lrmd_event_data_t event = { 0, };
if (proxy_session != NULL) {
/* this is proxy business */
lrmd_internal_proxy_dispatch(lrmd, msg);
return 1;
} else if (!native->callback) {
/* no callback set */
crm_trace("notify event received but client has not set callback");
return 1;
}
event.remote_nodename = native->remote_nodename;
type = crm_element_value(msg, F_LRMD_OPERATION);
crm_element_value_int(msg, F_LRMD_CALLID, &event.call_id);
event.rsc_id = crm_element_value(msg, F_LRMD_RSC_ID);
if (crm_str_eq(type, LRMD_OP_RSC_REG, TRUE)) {
event.type = lrmd_event_register;
} else if (crm_str_eq(type, LRMD_OP_RSC_UNREG, TRUE)) {
event.type = lrmd_event_unregister;
} else if (crm_str_eq(type, LRMD_OP_RSC_EXEC, TRUE)) {
+ time_t epoch = 0;
+
crm_element_value_int(msg, F_LRMD_TIMEOUT, &event.timeout);
crm_element_value_ms(msg, F_LRMD_RSC_INTERVAL, &event.interval_ms);
crm_element_value_int(msg, F_LRMD_RSC_START_DELAY, &event.start_delay);
crm_element_value_int(msg, F_LRMD_EXEC_RC, (int *)&event.rc);
crm_element_value_int(msg, F_LRMD_OP_STATUS, &event.op_status);
crm_element_value_int(msg, F_LRMD_RSC_DELETED, &event.rsc_deleted);
- crm_element_value_int(msg, F_LRMD_RSC_RUN_TIME, (int *)&event.t_run);
- crm_element_value_int(msg, F_LRMD_RSC_RCCHANGE_TIME, (int *)&event.t_rcchange);
+ crm_element_value_epoch(msg, F_LRMD_RSC_RUN_TIME, &epoch);
+ event.t_run = (unsigned int) epoch;
+
+ crm_element_value_epoch(msg, F_LRMD_RSC_RCCHANGE_TIME, &epoch);
+ event.t_rcchange = (unsigned int) epoch;
+
crm_element_value_int(msg, F_LRMD_RSC_EXEC_TIME, (int *)&event.exec_time);
crm_element_value_int(msg, F_LRMD_RSC_QUEUE_TIME, (int *)&event.queue_time);
event.op_type = crm_element_value(msg, F_LRMD_RSC_ACTION);
event.user_data = crm_element_value(msg, F_LRMD_RSC_USERDATA_STR);
event.output = crm_element_value(msg, F_LRMD_RSC_OUTPUT);
event.exit_reason = crm_element_value(msg, F_LRMD_RSC_EXIT_REASON);
event.type = lrmd_event_exec_complete;
event.params = xml2list(msg);
} else if (crm_str_eq(type, LRMD_OP_NEW_CLIENT, TRUE)) {
event.type = lrmd_event_new_client;
} else if (crm_str_eq(type, LRMD_OP_POKE, TRUE)) {
event.type = lrmd_event_poke;
} else {
return 1;
}
crm_trace("op %s notify event received", type);
native->callback(&event);
if (event.params) {
g_hash_table_destroy(event.params);
}
return 1;
}
static int
lrmd_ipc_dispatch(const char *buffer, ssize_t length, gpointer userdata)
{
lrmd_t *lrmd = userdata;
lrmd_private_t *native = lrmd->lrmd_private;
xmlNode *msg;
int rc;
if (!native->callback) {
/* no callback set */
return 1;
}
msg = string2xml(buffer);
rc = lrmd_dispatch_internal(lrmd, msg);
free_xml(msg);
return rc;
}
#ifdef HAVE_GNUTLS_GNUTLS_H
static void
lrmd_free_xml(gpointer userdata)
{
free_xml((xmlNode *) userdata);
}
static int
lrmd_tls_connected(lrmd_t * lrmd)
{
lrmd_private_t *native = lrmd->lrmd_private;
if (native->remote->tls_session) {
return TRUE;
}
return FALSE;
}
static int
lrmd_tls_dispatch(gpointer userdata)
{
lrmd_t *lrmd = userdata;
lrmd_private_t *native = lrmd->lrmd_private;
xmlNode *xml = NULL;
int rc = 0;
int disconnected = 0;
if (lrmd_tls_connected(lrmd) == FALSE) {
crm_trace("TLS dispatch triggered after disconnect");
return 0;
}
crm_trace("TLS dispatch triggered");
/* First check if there are any pending notifies to process that came
* while we were waiting for replies earlier. */
if (native->pending_notify) {
GList *iter = NULL;
crm_trace("Processing pending notifies");
for (iter = native->pending_notify; iter; iter = iter->next) {
lrmd_dispatch_internal(lrmd, iter->data);
}
g_list_free_full(native->pending_notify, lrmd_free_xml);
native->pending_notify = NULL;
}
/* Next read the current buffer and see if there are any messages to handle. */
rc = crm_remote_ready(native->remote, 0);
if (rc == 0) {
/* nothing to read, see if any full messages are already in buffer. */
xml = crm_remote_parse_buffer(native->remote);
} else if (rc < 0) {
disconnected = 1;
} else {
crm_remote_recv(native->remote, -1, &disconnected);
xml = crm_remote_parse_buffer(native->remote);
}
while (xml) {
const char *msg_type = crm_element_value(xml, F_LRMD_REMOTE_MSG_TYPE);
if (safe_str_eq(msg_type, "notify")) {
lrmd_dispatch_internal(lrmd, xml);
} else if (safe_str_eq(msg_type, "reply")) {
if (native->expected_late_replies > 0) {
native->expected_late_replies--;
} else {
int reply_id = 0;
crm_element_value_int(xml, F_LRMD_CALLID, &reply_id);
/* if this happens, we want to know about it */
crm_err("Got outdated Pacemaker Remote reply %d", reply_id);
}
}
free_xml(xml);
xml = crm_remote_parse_buffer(native->remote);
}
if (disconnected) {
crm_info("Lost %s executor connection while reading data",
(native->remote_nodename? native->remote_nodename : "local"));
lrmd_tls_disconnect(lrmd);
return 0;
}
return 1;
}
#endif
/* Not used with mainloop */
int
lrmd_poll(lrmd_t * lrmd, int timeout)
{
lrmd_private_t *native = lrmd->lrmd_private;
switch (native->type) {
case CRM_CLIENT_IPC:
return crm_ipc_ready(native->ipc);
#ifdef HAVE_GNUTLS_GNUTLS_H
case CRM_CLIENT_TLS:
if (native->pending_notify) {
return 1;
}
return crm_remote_ready(native->remote, 0);
#endif
default:
crm_err("Unsupported connection type: %d", native->type);
}
return 0;
}
/* Not used with mainloop */
bool
lrmd_dispatch(lrmd_t * lrmd)
{
lrmd_private_t *private = NULL;
CRM_ASSERT(lrmd != NULL);
private = lrmd->lrmd_private;
switch (private->type) {
case CRM_CLIENT_IPC:
while (crm_ipc_ready(private->ipc)) {
if (crm_ipc_read(private->ipc) > 0) {
const char *msg = crm_ipc_buffer(private->ipc);
lrmd_ipc_dispatch(msg, strlen(msg), lrmd);
}
}
break;
#ifdef HAVE_GNUTLS_GNUTLS_H
case CRM_CLIENT_TLS:
lrmd_tls_dispatch(lrmd);
break;
#endif
default:
crm_err("Unsupported connection type: %d", private->type);
}
if (lrmd_api_is_connected(lrmd) == FALSE) {
crm_err("Connection closed");
return FALSE;
}
return TRUE;
}
static xmlNode *
lrmd_create_op(const char *token, const char *op, xmlNode *data, int timeout,
enum lrmd_call_options options)
{
xmlNode *op_msg = create_xml_node(NULL, "lrmd_command");
CRM_CHECK(op_msg != NULL, return NULL);
CRM_CHECK(token != NULL, return NULL);
crm_xml_add(op_msg, F_XML_TAGNAME, "lrmd_command");
crm_xml_add(op_msg, F_TYPE, T_LRMD);
crm_xml_add(op_msg, F_LRMD_CALLBACK_TOKEN, token);
crm_xml_add(op_msg, F_LRMD_OPERATION, op);
crm_xml_add_int(op_msg, F_LRMD_TIMEOUT, timeout);
crm_xml_add_int(op_msg, F_LRMD_CALLOPTS, options);
if (data != NULL) {
add_message_xml(op_msg, F_LRMD_CALLDATA, data);
}
crm_trace("Created executor %s command with call options %.8lx (%d)",
op, (long)options, options);
return op_msg;
}
static void
lrmd_ipc_connection_destroy(gpointer userdata)
{
lrmd_t *lrmd = userdata;
lrmd_private_t *native = lrmd->lrmd_private;
crm_info("IPC connection destroyed");
/* Prevent these from being cleaned up in lrmd_api_disconnect() */
native->ipc = NULL;
native->source = NULL;
if (native->callback) {
lrmd_event_data_t event = { 0, };
event.type = lrmd_event_disconnect;
event.remote_nodename = native->remote_nodename;
native->callback(&event);
}
}
#ifdef HAVE_GNUTLS_GNUTLS_H
static void
lrmd_tls_connection_destroy(gpointer userdata)
{
lrmd_t *lrmd = userdata;
lrmd_private_t *native = lrmd->lrmd_private;
crm_info("TLS connection destroyed");
if (native->remote->tls_session) {
gnutls_bye(*native->remote->tls_session, GNUTLS_SHUT_RDWR);
gnutls_deinit(*native->remote->tls_session);
gnutls_free(native->remote->tls_session);
}
if (native->psk_cred_c) {
gnutls_psk_free_client_credentials(native->psk_cred_c);
}
if (native->sock) {
close(native->sock);
}
if (native->process_notify) {
mainloop_destroy_trigger(native->process_notify);
native->process_notify = NULL;
}
if (native->pending_notify) {
g_list_free_full(native->pending_notify, lrmd_free_xml);
native->pending_notify = NULL;
}
free(native->remote->buffer);
native->remote->buffer = NULL;
native->source = 0;
native->sock = 0;
native->psk_cred_c = NULL;
native->remote->tls_session = NULL;
native->sock = 0;
if (native->callback) {
lrmd_event_data_t event = { 0, };
event.remote_nodename = native->remote_nodename;
event.type = lrmd_event_disconnect;
native->callback(&event);
}
return;
}
int
lrmd_tls_send_msg(crm_remote_t * session, xmlNode * msg, uint32_t id, const char *msg_type)
{
crm_xml_add_int(msg, F_LRMD_REMOTE_MSG_ID, id);
crm_xml_add(msg, F_LRMD_REMOTE_MSG_TYPE, msg_type);
return crm_remote_send(session, msg);
}
static xmlNode *
lrmd_tls_recv_reply(lrmd_t * lrmd, int total_timeout, int expected_reply_id, int *disconnected)
{
lrmd_private_t *native = lrmd->lrmd_private;
xmlNode *xml = NULL;
time_t start = time(NULL);
const char *msg_type = NULL;
int reply_id = 0;
int remaining_timeout = 0;
/* A timeout of 0 here makes no sense. We have to wait a period of time
* for the response to come back. If -1 or 0, default to 10 seconds. */
if (total_timeout <= 0 || total_timeout > MAX_TLS_RECV_WAIT) {
total_timeout = MAX_TLS_RECV_WAIT;
}
while (!xml) {
xml = crm_remote_parse_buffer(native->remote);
if (!xml) {
/* read some more off the tls buffer if we still have time left. */
if (remaining_timeout) {
remaining_timeout = total_timeout - ((time(NULL) - start) * 1000);
} else {
remaining_timeout = total_timeout;
}
if (remaining_timeout <= 0) {
crm_err("Never received the expected reply during the timeout period, disconnecting.");
*disconnected = TRUE;
return NULL;
}
crm_remote_recv(native->remote, remaining_timeout, disconnected);
xml = crm_remote_parse_buffer(native->remote);
if (!xml) {
crm_err("Unable to receive expected reply, disconnecting.");
*disconnected = TRUE;
return NULL;
} else if (*disconnected) {
return NULL;
}
}
CRM_ASSERT(xml != NULL);
crm_element_value_int(xml, F_LRMD_REMOTE_MSG_ID, &reply_id);
msg_type = crm_element_value(xml, F_LRMD_REMOTE_MSG_TYPE);
if (!msg_type) {
crm_err("Empty msg type received while waiting for reply");
free_xml(xml);
xml = NULL;
} else if (safe_str_eq(msg_type, "notify")) {
/* got a notify while waiting for reply, trigger the notify to be processed later */
crm_info("queueing notify");
native->pending_notify = g_list_append(native->pending_notify, xml);
if (native->process_notify) {
crm_info("notify trigger set.");
mainloop_set_trigger(native->process_notify);
}
xml = NULL;
} else if (safe_str_neq(msg_type, "reply")) {
/* msg isn't a reply, make some noise */
crm_err("Expected a reply, got %s", msg_type);
free_xml(xml);
xml = NULL;
} else if (reply_id != expected_reply_id) {
if (native->expected_late_replies > 0) {
native->expected_late_replies--;
} else {
crm_err("Got outdated reply, expected id %d got id %d", expected_reply_id, reply_id);
}
free_xml(xml);
xml = NULL;
}
}
if (native->remote->buffer && native->process_notify) {
mainloop_set_trigger(native->process_notify);
}
return xml;
}
static int
lrmd_tls_send(lrmd_t * lrmd, xmlNode * msg)
{
int rc = 0;
lrmd_private_t *native = lrmd->lrmd_private;
global_remote_msg_id++;
if (global_remote_msg_id <= 0) {
global_remote_msg_id = 1;
}
rc = lrmd_tls_send_msg(native->remote, msg, global_remote_msg_id, "request");
if (rc <= 0) {
crm_err("Disconnecting because TLS message could not be sent to Pacemaker Remote");
lrmd_tls_disconnect(lrmd);
return -ENOTCONN;
}
return pcmk_ok;
}
static int
lrmd_tls_send_recv(lrmd_t * lrmd, xmlNode * msg, int timeout, xmlNode ** reply)
{
int rc = 0;
int disconnected = 0;
xmlNode *xml = NULL;
if (lrmd_tls_connected(lrmd) == FALSE) {
return -1;
}
rc = lrmd_tls_send(lrmd, msg);
if (rc < 0) {
return rc;
}
xml = lrmd_tls_recv_reply(lrmd, timeout, global_remote_msg_id, &disconnected);
if (disconnected) {
crm_err("Pacemaker Remote disconnected while waiting for reply to request id %d",
global_remote_msg_id);
lrmd_tls_disconnect(lrmd);
rc = -ENOTCONN;
} else if (!xml) {
crm_err("Did not receive reply from Pacemaker Remote for request id %d (timeout %dms)",
global_remote_msg_id, timeout);
rc = -ECOMM;
}
if (reply) {
*reply = xml;
} else {
free_xml(xml);
}
return rc;
}
#endif
static int
lrmd_send_xml(lrmd_t * lrmd, xmlNode * msg, int timeout, xmlNode ** reply)
{
int rc = -1;
lrmd_private_t *native = lrmd->lrmd_private;
switch (native->type) {
case CRM_CLIENT_IPC:
rc = crm_ipc_send(native->ipc, msg, crm_ipc_client_response, timeout, reply);
break;
#ifdef HAVE_GNUTLS_GNUTLS_H
case CRM_CLIENT_TLS:
rc = lrmd_tls_send_recv(lrmd, msg, timeout, reply);
break;
#endif
default:
crm_err("Unsupported connection type: %d", native->type);
}
return rc;
}
static int
lrmd_send_xml_no_reply(lrmd_t * lrmd, xmlNode * msg)
{
int rc = -1;
lrmd_private_t *native = lrmd->lrmd_private;
switch (native->type) {
case CRM_CLIENT_IPC:
rc = crm_ipc_send(native->ipc, msg, crm_ipc_flags_none, 0, NULL);
break;
#ifdef HAVE_GNUTLS_GNUTLS_H
case CRM_CLIENT_TLS:
rc = lrmd_tls_send(lrmd, msg);
if (rc == pcmk_ok) {
/* we don't want to wait around for the reply, but
* since the request/reply protocol needs to behave the same
* as libqb, a reply will eventually come later anyway. */
native->expected_late_replies++;
}
break;
#endif
default:
crm_err("Unsupported connection type: %d", native->type);
}
return rc;
}
static int
lrmd_api_is_connected(lrmd_t * lrmd)
{
lrmd_private_t *native = lrmd->lrmd_private;
switch (native->type) {
case CRM_CLIENT_IPC:
return crm_ipc_connected(native->ipc);
break;
#ifdef HAVE_GNUTLS_GNUTLS_H
case CRM_CLIENT_TLS:
return lrmd_tls_connected(lrmd);
break;
#endif
default:
crm_err("Unsupported connection type: %d", native->type);
}
return 0;
}
/*!
* \internal
* \brief Send a prepared API command to the executor
*
* \param[in] lrmd Existing connection to the executor
* \param[in] op Name of API command to send
* \param[in] data Command data XML to add to the sent command
* \param[out] output_data If expecting a reply, it will be stored here
* \param[in] timeout Timeout in milliseconds (if 0, defaults to
* a sensible value per the type of connection,
* standard vs. pacemaker remote);
* also propagated to the command XML
* \param[in] call_options Call options to pass to server when sending
* \param[in] expect_reply If TRUE, wait for a reply from the server;
* must be TRUE for IPC (as opposed to TLS) clients
*
* \return pcmk_ok on success, -errno on error
*/
static int
lrmd_send_command(lrmd_t *lrmd, const char *op, xmlNode *data,
xmlNode **output_data, int timeout,
enum lrmd_call_options options, gboolean expect_reply)
{
int rc = pcmk_ok;
lrmd_private_t *native = lrmd->lrmd_private;
xmlNode *op_msg = NULL;
xmlNode *op_reply = NULL;
if (!lrmd_api_is_connected(lrmd)) {
return -ENOTCONN;
}
if (op == NULL) {
crm_err("No operation specified");
return -EINVAL;
}
CRM_CHECK(native->token != NULL,;
);
crm_trace("Sending %s op to executor", op);
op_msg = lrmd_create_op(native->token, op, data, timeout, options);
if (op_msg == NULL) {
return -EINVAL;
}
if (expect_reply) {
rc = lrmd_send_xml(lrmd, op_msg, timeout, &op_reply);
} else {
rc = lrmd_send_xml_no_reply(lrmd, op_msg);
goto done;
}
if (rc < 0) {
crm_perror(LOG_ERR, "Couldn't perform %s operation (timeout=%d): %d", op, timeout, rc);
rc = -ECOMM;
goto done;
} else if(op_reply == NULL) {
rc = -ENOMSG;
goto done;
}
rc = pcmk_ok;
crm_trace("%s op reply received", op);
if (crm_element_value_int(op_reply, F_LRMD_RC, &rc) != 0) {
rc = -ENOMSG;
goto done;
}
crm_log_xml_trace(op_reply, "Reply");
if (output_data) {
*output_data = op_reply;
op_reply = NULL; /* Prevent subsequent free */
}
done:
if (lrmd_api_is_connected(lrmd) == FALSE) {
crm_err("Executor disconnected");
}
free_xml(op_msg);
free_xml(op_reply);
return rc;
}
static int
lrmd_api_poke_connection(lrmd_t * lrmd)
{
int rc;
lrmd_private_t *native = lrmd->lrmd_private;
xmlNode *data = create_xml_node(NULL, F_LRMD_RSC);
crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__);
rc = lrmd_send_command(lrmd, LRMD_OP_POKE, data, NULL, 0, 0, native->type == CRM_CLIENT_IPC ? TRUE : FALSE);
free_xml(data);
return rc < 0 ? rc : pcmk_ok;
}
int
remote_proxy_check(lrmd_t * lrmd, GHashTable *hash)
{
int rc;
const char *value;
lrmd_private_t *native = lrmd->lrmd_private;
xmlNode *data = create_xml_node(NULL, F_LRMD_OPERATION);
crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__);
value = g_hash_table_lookup(hash, "stonith-watchdog-timeout");
crm_xml_add(data, F_LRMD_WATCHDOG, value);
rc = lrmd_send_command(lrmd, LRMD_OP_CHECK, data, NULL, 0, 0, native->type == CRM_CLIENT_IPC ? TRUE : FALSE);
free_xml(data);
return rc < 0 ? rc : pcmk_ok;
}
static int
lrmd_handshake(lrmd_t * lrmd, const char *name)
{
int rc = pcmk_ok;
lrmd_private_t *native = lrmd->lrmd_private;
xmlNode *reply = NULL;
xmlNode *hello = create_xml_node(NULL, "lrmd_command");
crm_xml_add(hello, F_TYPE, T_LRMD);
crm_xml_add(hello, F_LRMD_OPERATION, CRM_OP_REGISTER);
crm_xml_add(hello, F_LRMD_CLIENTNAME, name);
crm_xml_add(hello, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION);
/* advertise that we are a proxy provider */
if (native->proxy_callback) {
crm_xml_add(hello, F_LRMD_IS_IPC_PROVIDER, "true");
}
rc = lrmd_send_xml(lrmd, hello, -1, &reply);
if (rc < 0) {
crm_perror(LOG_DEBUG, "Couldn't complete registration with the executor API: %d", rc);
rc = -ECOMM;
} else if (reply == NULL) {
crm_err("Did not receive registration reply");
rc = -EPROTO;
} else {
const char *version = crm_element_value(reply, F_LRMD_PROTOCOL_VERSION);
const char *msg_type = crm_element_value(reply, F_LRMD_OPERATION);
const char *tmp_ticket = crm_element_value(reply, F_LRMD_CLIENTID);
crm_element_value_int(reply, F_LRMD_RC, &rc);
if (rc == -EPROTO) {
crm_err("Executor protocol version mismatch between client (%s) and server (%s)",
LRMD_PROTOCOL_VERSION, version);
crm_log_xml_err(reply, "Protocol Error");
} else if (safe_str_neq(msg_type, CRM_OP_REGISTER)) {
crm_err("Invalid registration message: %s", msg_type);
crm_log_xml_err(reply, "Bad reply");
rc = -EPROTO;
} else if (tmp_ticket == NULL) {
crm_err("No registration token provided");
crm_log_xml_err(reply, "Bad reply");
rc = -EPROTO;
} else {
crm_trace("Obtained registration token: %s", tmp_ticket);
native->token = strdup(tmp_ticket);
native->peer_version = strdup(version?version:"1.0"); /* Included since 1.1 */
rc = pcmk_ok;
}
}
free_xml(reply);
free_xml(hello);
if (rc != pcmk_ok) {
lrmd_api_disconnect(lrmd);
}
return rc;
}
static int
lrmd_ipc_connect(lrmd_t * lrmd, int *fd)
{
int rc = pcmk_ok;
lrmd_private_t *native = lrmd->lrmd_private;
static struct ipc_client_callbacks lrmd_callbacks = {
.dispatch = lrmd_ipc_dispatch,
.destroy = lrmd_ipc_connection_destroy
};
crm_info("Connecting to executor");
if (fd) {
/* No mainloop */
native->ipc = crm_ipc_new(CRM_SYSTEM_LRMD, 0);
if (native->ipc && crm_ipc_connect(native->ipc)) {
*fd = crm_ipc_get_fd(native->ipc);
} else if (native->ipc) {
crm_perror(LOG_ERR, "Connection to executor failed");
rc = -ENOTCONN;
}
} else {
native->source = mainloop_add_ipc_client(CRM_SYSTEM_LRMD, G_PRIORITY_HIGH, 0, lrmd, &lrmd_callbacks);
native->ipc = mainloop_get_ipc_client(native->source);
}
if (native->ipc == NULL) {
crm_debug("Could not connect to the executor API");
rc = -ENOTCONN;
}
return rc;
}
#ifdef HAVE_GNUTLS_GNUTLS_H
static void
copy_gnutls_datum(gnutls_datum_t *dest, gnutls_datum_t *source)
{
dest->data = gnutls_malloc(source->size);
CRM_ASSERT(dest->data);
memcpy(dest->data, source->data, source->size);
dest->size = source->size;
}
static void
clear_gnutls_datum(gnutls_datum_t *datum)
{
gnutls_free(datum->data);
datum->data = NULL;
datum->size = 0;
}
#define KEY_READ_LEN 256
static int
set_key(gnutls_datum_t * key, const char *location)
{
FILE *stream;
size_t buf_len = KEY_READ_LEN;
static gnutls_datum_t key_cache = { 0, };
static time_t key_cache_updated = 0;
if (location == NULL) {
return -1;
}
if (key_cache.data != NULL) {
if ((time(NULL) - key_cache_updated) < 60) {
copy_gnutls_datum(key, &key_cache);
crm_debug("Using cached Pacemaker Remote key");
return 0;
} else {
clear_gnutls_datum(&key_cache);
key_cache_updated = 0;
crm_debug("Cleared Pacemaker Remote key cache");
}
}
stream = fopen(location, "r");
if (!stream) {
return -1;
}
key->data = gnutls_malloc(buf_len);
key->size = 0;
while (!feof(stream)) {
int next = fgetc(stream);
if (next == EOF) {
if (!feof(stream)) {
crm_err("Error reading Pacemaker Remote key; copy in memory may be corrupted");
}
break;
}
if (key->size == buf_len) {
buf_len = key->size + KEY_READ_LEN;
key->data = gnutls_realloc(key->data, buf_len);
CRM_ASSERT(key->data);
}
key->data[key->size++] = (unsigned char) next;
}
fclose(stream);
if (key->size == 0) {
clear_gnutls_datum(key);
return -1;
}
if (key_cache.data == NULL) {
copy_gnutls_datum(&key_cache, key);
key_cache_updated = time(NULL);
crm_debug("Cached Pacemaker Remote key");
}
return 0;
}
int
lrmd_tls_set_key(gnutls_datum_t * key)
{
const char *specific_location = getenv("PCMK_authkey_location");
if (set_key(key, specific_location) == 0) {
crm_debug("Using custom authkey location %s", specific_location);
return pcmk_ok;
} else if (specific_location) {
crm_err("No valid Pacemaker Remote key found at %s, trying default location", specific_location);
}
if ((set_key(key, DEFAULT_REMOTE_KEY_LOCATION) != 0)
&& (set_key(key, ALT_REMOTE_KEY_LOCATION) != 0)) {
crm_err("No valid Pacemaker Remote key found at %s", DEFAULT_REMOTE_KEY_LOCATION);
return -ENOKEY;
}
return pcmk_ok;
}
static void
lrmd_gnutls_global_init(void)
{
static int gnutls_init = 0;
if (!gnutls_init) {
crm_gnutls_global_init();
}
gnutls_init = 1;
}
#endif
static void
report_async_connection_result(lrmd_t * lrmd, int rc)
{
lrmd_private_t *native = lrmd->lrmd_private;
if (native->callback) {
lrmd_event_data_t event = { 0, };
event.type = lrmd_event_connect;
event.remote_nodename = native->remote_nodename;
event.connection_rc = rc;
native->callback(&event);
}
}
#ifdef HAVE_GNUTLS_GNUTLS_H
static void
lrmd_tcp_connect_cb(void *userdata, int sock)
{
lrmd_t *lrmd = userdata;
lrmd_private_t *native = lrmd->lrmd_private;
char *name;
static struct mainloop_fd_callbacks lrmd_tls_callbacks = {
.dispatch = lrmd_tls_dispatch,
.destroy = lrmd_tls_connection_destroy,
};
int rc = sock;
gnutls_datum_t psk_key = { NULL, 0 };
native->async_timer = 0;
if (rc < 0) {
lrmd_tls_connection_destroy(lrmd);
crm_info("Could not connect to Pacemaker Remote at %s:%d",
native->server, native->port);
report_async_connection_result(lrmd, rc);
return;
}
/* The TCP connection was successful, so establish the TLS connection.
* @TODO make this async to avoid blocking code in client
*/
native->sock = sock;
rc = lrmd_tls_set_key(&psk_key);
if (rc != 0) {
crm_warn("Could not set key for Pacemaker Remote at %s:%d " CRM_XS " rc=%d",
native->server, native->port, rc);
lrmd_tls_connection_destroy(lrmd);
report_async_connection_result(lrmd, rc);
return;
}
gnutls_psk_allocate_client_credentials(&native->psk_cred_c);
gnutls_psk_set_client_credentials(native->psk_cred_c, DEFAULT_REMOTE_USERNAME, &psk_key, GNUTLS_PSK_KEY_RAW);
gnutls_free(psk_key.data);
native->remote->tls_session = pcmk__new_tls_session(sock, GNUTLS_CLIENT,
GNUTLS_CRD_PSK,
native->psk_cred_c);
if (native->remote->tls_session == NULL) {
lrmd_tls_connection_destroy(lrmd);
report_async_connection_result(lrmd, -EPROTO);
return;
}
if (crm_initiate_client_tls_handshake(native->remote, LRMD_CLIENT_HANDSHAKE_TIMEOUT) != 0) {
crm_warn("Disconnecting after TLS handshake with Pacemaker Remote server %s:%d failed",
native->server, native->port);
gnutls_deinit(*native->remote->tls_session);
gnutls_free(native->remote->tls_session);
native->remote->tls_session = NULL;
lrmd_tls_connection_destroy(lrmd);
report_async_connection_result(lrmd, -EKEYREJECTED);
return;
}
crm_info("TLS connection to Pacemaker Remote server %s:%d succeeded",
native->server, native->port);
name = crm_strdup_printf("pacemaker-remote-%s:%d",
native->server, native->port);
native->process_notify = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_tls_dispatch, lrmd);
native->source =
mainloop_add_fd(name, G_PRIORITY_HIGH, native->sock, lrmd, &lrmd_tls_callbacks);
rc = lrmd_handshake(lrmd, name);
free(name);
report_async_connection_result(lrmd, rc);
return;
}
static int
lrmd_tls_connect_async(lrmd_t * lrmd, int timeout /*ms */ )
{
int sock = 0;
int timer_id = 0;
lrmd_private_t *native = lrmd->lrmd_private;
lrmd_gnutls_global_init();
sock = crm_remote_tcp_connect_async(native->server, native->port, timeout,
&timer_id, lrmd, lrmd_tcp_connect_cb);
if (sock < 0) {
return sock;
}
native->sock = sock;
native->async_timer = timer_id;
return pcmk_ok;
}
static int
lrmd_tls_connect(lrmd_t * lrmd, int *fd)
{
static struct mainloop_fd_callbacks lrmd_tls_callbacks = {
.dispatch = lrmd_tls_dispatch,
.destroy = lrmd_tls_connection_destroy,
};
int rc;
lrmd_private_t *native = lrmd->lrmd_private;
int sock;
gnutls_datum_t psk_key = { NULL, 0 };
lrmd_gnutls_global_init();
sock = crm_remote_tcp_connect(native->server, native->port);
if (sock < 0) {
crm_warn("Could not establish Pacemaker Remote connection to %s", native->server);
lrmd_tls_connection_destroy(lrmd);
return -ENOTCONN;
}
native->sock = sock;
rc = lrmd_tls_set_key(&psk_key);
if (rc < 0) {
lrmd_tls_connection_destroy(lrmd);
return rc;
}
gnutls_psk_allocate_client_credentials(&native->psk_cred_c);
gnutls_psk_set_client_credentials(native->psk_cred_c, DEFAULT_REMOTE_USERNAME, &psk_key, GNUTLS_PSK_KEY_RAW);
gnutls_free(psk_key.data);
native->remote->tls_session = pcmk__new_tls_session(sock, GNUTLS_CLIENT,
GNUTLS_CRD_PSK,
native->psk_cred_c);
if (native->remote->tls_session == NULL) {
lrmd_tls_connection_destroy(lrmd);
return -EPROTO;
}
if (crm_initiate_client_tls_handshake(native->remote, LRMD_CLIENT_HANDSHAKE_TIMEOUT) != 0) {
crm_err("Session creation for %s:%d failed", native->server, native->port);
gnutls_deinit(*native->remote->tls_session);
gnutls_free(native->remote->tls_session);
native->remote->tls_session = NULL;
lrmd_tls_connection_destroy(lrmd);
return -EKEYREJECTED;
}
crm_info("Client TLS connection established with Pacemaker Remote server %s:%d", native->server,
native->port);
if (fd) {
*fd = sock;
} else {
char *name = crm_strdup_printf("pacemaker-remote-%s:%d",
native->server, native->port);
native->process_notify = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_tls_dispatch, lrmd);
native->source =
mainloop_add_fd(name, G_PRIORITY_HIGH, native->sock, lrmd, &lrmd_tls_callbacks);
free(name);
}
return pcmk_ok;
}
#endif
static int
lrmd_api_connect(lrmd_t * lrmd, const char *name, int *fd)
{
int rc = -ENOTCONN;
lrmd_private_t *native = lrmd->lrmd_private;
switch (native->type) {
case CRM_CLIENT_IPC:
rc = lrmd_ipc_connect(lrmd, fd);
break;
#ifdef HAVE_GNUTLS_GNUTLS_H
case CRM_CLIENT_TLS:
rc = lrmd_tls_connect(lrmd, fd);
break;
#endif
default:
crm_err("Unsupported connection type: %d", native->type);
}
if (rc == pcmk_ok) {
rc = lrmd_handshake(lrmd, name);
}
return rc;
}
static int
lrmd_api_connect_async(lrmd_t * lrmd, const char *name, int timeout)
{
int rc = 0;
lrmd_private_t *native = lrmd->lrmd_private;
CRM_CHECK(native && native->callback, return -1);
switch (native->type) {
case CRM_CLIENT_IPC:
/* fake async connection with ipc. it should be fast
* enough that we gain very little from async */
rc = lrmd_api_connect(lrmd, name, NULL);
if (!rc) {
report_async_connection_result(lrmd, rc);
}
break;
#ifdef HAVE_GNUTLS_GNUTLS_H
case CRM_CLIENT_TLS:
rc = lrmd_tls_connect_async(lrmd, timeout);
if (rc) {
/* connection failed, report rc now */
report_async_connection_result(lrmd, rc);
}
break;
#endif
default:
crm_err("Unsupported connection type: %d", native->type);
}
return rc;
}
static void
lrmd_ipc_disconnect(lrmd_t * lrmd)
{
lrmd_private_t *native = lrmd->lrmd_private;
if (native->source != NULL) {
/* Attached to mainloop */
mainloop_del_ipc_client(native->source);
native->source = NULL;
native->ipc = NULL;
} else if (native->ipc) {
/* Not attached to mainloop */
crm_ipc_t *ipc = native->ipc;
native->ipc = NULL;
crm_ipc_close(ipc);
crm_ipc_destroy(ipc);
}
}
#ifdef HAVE_GNUTLS_GNUTLS_H
static void
lrmd_tls_disconnect(lrmd_t * lrmd)
{
lrmd_private_t *native = lrmd->lrmd_private;
if (native->remote->tls_session) {
gnutls_bye(*native->remote->tls_session, GNUTLS_SHUT_RDWR);
gnutls_deinit(*native->remote->tls_session);
gnutls_free(native->remote->tls_session);
native->remote->tls_session = 0;
}
if (native->async_timer) {
g_source_remove(native->async_timer);
native->async_timer = 0;
}
if (native->source != NULL) {
/* Attached to mainloop */
mainloop_del_ipc_client(native->source);
native->source = NULL;
} else if (native->sock) {
close(native->sock);
native->sock = 0;
}
if (native->pending_notify) {
g_list_free_full(native->pending_notify, lrmd_free_xml);
native->pending_notify = NULL;
}
}
#endif
static int
lrmd_api_disconnect(lrmd_t * lrmd)
{
lrmd_private_t *native = lrmd->lrmd_private;
crm_info("Disconnecting %s %s executor connection",
crm_client_type_text(native->type),
(native->remote_nodename? native->remote_nodename : "local"));
switch (native->type) {
case CRM_CLIENT_IPC:
lrmd_ipc_disconnect(lrmd);
break;
#ifdef HAVE_GNUTLS_GNUTLS_H
case CRM_CLIENT_TLS:
lrmd_tls_disconnect(lrmd);
break;
#endif
default:
crm_err("Unsupported connection type: %d", native->type);
}
free(native->token);
native->token = NULL;
free(native->peer_version);
native->peer_version = NULL;
return 0;
}
static int
lrmd_api_register_rsc(lrmd_t * lrmd,
const char *rsc_id,
const char *class,
const char *provider, const char *type, enum lrmd_call_options options)
{
int rc = pcmk_ok;
xmlNode *data = NULL;
if (!class || !type || !rsc_id) {
return -EINVAL;
}
if (is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_provider) && !provider) {
return -EINVAL;
}
data = create_xml_node(NULL, F_LRMD_RSC);
crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add(data, F_LRMD_RSC_ID, rsc_id);
crm_xml_add(data, F_LRMD_CLASS, class);
crm_xml_add(data, F_LRMD_PROVIDER, provider);
crm_xml_add(data, F_LRMD_TYPE, type);
rc = lrmd_send_command(lrmd, LRMD_OP_RSC_REG, data, NULL, 0, options, TRUE);
free_xml(data);
return rc;
}
static int
lrmd_api_unregister_rsc(lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options)
{
int rc = pcmk_ok;
xmlNode *data = create_xml_node(NULL, F_LRMD_RSC);
crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add(data, F_LRMD_RSC_ID, rsc_id);
rc = lrmd_send_command(lrmd, LRMD_OP_RSC_UNREG, data, NULL, 0, options, TRUE);
free_xml(data);
return rc;
}
lrmd_rsc_info_t *
lrmd_new_rsc_info(const char *rsc_id, const char *standard,
const char *provider, const char *type)
{
lrmd_rsc_info_t *rsc_info = calloc(1, sizeof(lrmd_rsc_info_t));
CRM_ASSERT(rsc_info);
if (rsc_id) {
rsc_info->id = strdup(rsc_id);
CRM_ASSERT(rsc_info->id);
}
if (standard) {
rsc_info->standard = strdup(standard);
CRM_ASSERT(rsc_info->standard);
}
if (provider) {
rsc_info->provider = strdup(provider);
CRM_ASSERT(rsc_info->provider);
}
if (type) {
rsc_info->type = strdup(type);
CRM_ASSERT(rsc_info->type);
}
return rsc_info;
}
lrmd_rsc_info_t *
lrmd_copy_rsc_info(lrmd_rsc_info_t * rsc_info)
{
return lrmd_new_rsc_info(rsc_info->id, rsc_info->standard,
rsc_info->provider, rsc_info->type);
}
void
lrmd_free_rsc_info(lrmd_rsc_info_t * rsc_info)
{
if (!rsc_info) {
return;
}
free(rsc_info->id);
free(rsc_info->type);
free(rsc_info->standard);
free(rsc_info->provider);
free(rsc_info);
}
static lrmd_rsc_info_t *
lrmd_api_get_rsc_info(lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options)
{
lrmd_rsc_info_t *rsc_info = NULL;
xmlNode *data = create_xml_node(NULL, F_LRMD_RSC);
xmlNode *output = NULL;
const char *class = NULL;
const char *provider = NULL;
const char *type = NULL;
crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add(data, F_LRMD_RSC_ID, rsc_id);
lrmd_send_command(lrmd, LRMD_OP_RSC_INFO, data, &output, 0, options, TRUE);
free_xml(data);
if (!output) {
return NULL;
}
class = crm_element_value(output, F_LRMD_CLASS);
provider = crm_element_value(output, F_LRMD_PROVIDER);
type = crm_element_value(output, F_LRMD_TYPE);
if (!class || !type) {
free_xml(output);
return NULL;
} else if (is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_provider)
&& !provider) {
free_xml(output);
return NULL;
}
rsc_info = lrmd_new_rsc_info(rsc_id, class, provider, type);
free_xml(output);
return rsc_info;
}
void
lrmd_free_op_info(lrmd_op_info_t *op_info)
{
if (op_info) {
free(op_info->rsc_id);
free(op_info->action);
free(op_info->interval_ms_s);
free(op_info->timeout_ms_s);
free(op_info);
}
}
static int
lrmd_api_get_recurring_ops(lrmd_t *lrmd, const char *rsc_id, int timeout_ms,
enum lrmd_call_options options, GList **output)
{
xmlNode *data = NULL;
xmlNode *output_xml = NULL;
int rc = pcmk_ok;
if (output == NULL) {
return -EINVAL;
}
*output = NULL;
// Send request
if (rsc_id) {
data = create_xml_node(NULL, F_LRMD_RSC);
crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add(data, F_LRMD_RSC_ID, rsc_id);
}
rc = lrmd_send_command(lrmd, LRMD_OP_GET_RECURRING, data, &output_xml,
timeout_ms, options, TRUE);
if (data) {
free_xml(data);
}
// Process reply
if ((rc != pcmk_ok) || (output_xml == NULL)) {
return rc;
}
for (xmlNode *rsc_xml = first_named_child(output_xml, F_LRMD_RSC);
rsc_xml != NULL; rsc_xml = crm_next_same_xml(rsc_xml)) {
rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
if (rsc_id == NULL) {
crm_err("Could not parse recurring operation information from executor");
continue;
}
for (xmlNode *op_xml = first_named_child(rsc_xml, T_LRMD_RSC_OP);
op_xml != NULL; op_xml = crm_next_same_xml(op_xml)) {
lrmd_op_info_t *op_info = calloc(1, sizeof(lrmd_op_info_t));
CRM_CHECK(op_info != NULL, break);
op_info->rsc_id = strdup(rsc_id);
op_info->action = crm_element_value_copy(op_xml, F_LRMD_RSC_ACTION);
op_info->interval_ms_s = crm_element_value_copy(op_xml,
F_LRMD_RSC_INTERVAL);
op_info->timeout_ms_s = crm_element_value_copy(op_xml,
F_LRMD_TIMEOUT);
*output = g_list_prepend(*output, op_info);
}
}
free_xml(output_xml);
return rc;
}
static void
lrmd_api_set_callback(lrmd_t * lrmd, lrmd_event_callback callback)
{
lrmd_private_t *native = lrmd->lrmd_private;
native->callback = callback;
}
void
lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg))
{
lrmd_private_t *native = lrmd->lrmd_private;
native->proxy_callback = callback;
native->proxy_callback_userdata = userdata;
}
void
lrmd_internal_proxy_dispatch(lrmd_t *lrmd, xmlNode *msg)
{
lrmd_private_t *native = lrmd->lrmd_private;
if (native->proxy_callback) {
crm_log_xml_trace(msg, "PROXY_INBOUND");
native->proxy_callback(lrmd, native->proxy_callback_userdata, msg);
}
}
int
lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg)
{
if (lrmd == NULL) {
return -ENOTCONN;
}
crm_xml_add(msg, F_LRMD_OPERATION, CRM_OP_IPC_FWD);
crm_log_xml_trace(msg, "PROXY_OUTBOUND");
return lrmd_send_xml_no_reply(lrmd, msg);
}
static int
stonith_get_metadata(const char *provider, const char *type, char **output)
{
int rc = pcmk_ok;
stonith_t *stonith_api = stonith_api_new();
if (stonith_api == NULL) {
crm_err("Could not get fence agent meta-data: API memory allocation failed");
return -ENOMEM;
}
rc = stonith_api->cmds->metadata(stonith_api, st_opt_sync_call, type,
provider, output, 0);
if ((rc == pcmk_ok) && (*output == NULL)) {
rc = -EIO;
}
stonith_api->cmds->free(stonith_api);
return rc;
}
static int
lrmd_api_get_metadata(lrmd_t *lrmd, const char *standard, const char *provider,
const char *type, char **output,
enum lrmd_call_options options)
{
return lrmd->cmds->get_metadata_params(lrmd, standard, provider, type,
output, options, NULL);
}
static int
lrmd_api_get_metadata_params(lrmd_t *lrmd, const char *standard,
const char *provider, const char *type,
char **output, enum lrmd_call_options options,
lrmd_key_value_t *params)
{
svc_action_t *action = NULL;
GHashTable *params_table = NULL;
if (!standard || !type) {
lrmd_key_value_freeall(params);
return -EINVAL;
}
if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_STONITH)) {
lrmd_key_value_freeall(params);
return stonith_get_metadata(provider, type, output);
}
params_table = crm_str_table_new();
for (const lrmd_key_value_t *param = params; param; param = param->next) {
g_hash_table_insert(params_table, strdup(param->key), strdup(param->value));
}
action = resources_action_create(type, standard, provider, type,
CRMD_ACTION_METADATA, 0,
CRMD_METADATA_CALL_TIMEOUT, params_table,
0);
lrmd_key_value_freeall(params);
if (action == NULL) {
crm_err("Unable to retrieve meta-data for %s:%s:%s",
standard, provider, type);
return -EINVAL;
}
if (!services_action_sync(action)) {
crm_err("Failed to retrieve meta-data for %s:%s:%s",
standard, provider, type);
services_action_free(action);
return -EIO;
}
if (!action->stdout_data) {
crm_err("Failed to receive meta-data for %s:%s:%s",
standard, provider, type);
services_action_free(action);
return -EIO;
}
*output = strdup(action->stdout_data);
services_action_free(action);
return pcmk_ok;
}
static int
lrmd_api_exec(lrmd_t *lrmd, const char *rsc_id, const char *action,
const char *userdata, guint interval_ms,
int timeout, /* ms */
int start_delay, /* ms */
enum lrmd_call_options options, lrmd_key_value_t * params)
{
int rc = pcmk_ok;
xmlNode *data = create_xml_node(NULL, F_LRMD_RSC);
xmlNode *args = create_xml_node(data, XML_TAG_ATTRS);
lrmd_key_value_t *tmp = NULL;
crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add(data, F_LRMD_RSC_ID, rsc_id);
crm_xml_add(data, F_LRMD_RSC_ACTION, action);
crm_xml_add(data, F_LRMD_RSC_USERDATA_STR, userdata);
crm_xml_add_ms(data, F_LRMD_RSC_INTERVAL, interval_ms);
crm_xml_add_int(data, F_LRMD_TIMEOUT, timeout);
crm_xml_add_int(data, F_LRMD_RSC_START_DELAY, start_delay);
for (tmp = params; tmp; tmp = tmp->next) {
hash2smartfield((gpointer) tmp->key, (gpointer) tmp->value, args);
}
rc = lrmd_send_command(lrmd, LRMD_OP_RSC_EXEC, data, NULL, timeout, options, TRUE);
free_xml(data);
lrmd_key_value_freeall(params);
return rc;
}
/* timeout is in ms */
static int
lrmd_api_exec_alert(lrmd_t *lrmd, const char *alert_id, const char *alert_path,
int timeout, lrmd_key_value_t *params)
{
int rc = pcmk_ok;
xmlNode *data = create_xml_node(NULL, F_LRMD_ALERT);
xmlNode *args = create_xml_node(data, XML_TAG_ATTRS);
lrmd_key_value_t *tmp = NULL;
crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add(data, F_LRMD_ALERT_ID, alert_id);
crm_xml_add(data, F_LRMD_ALERT_PATH, alert_path);
crm_xml_add_int(data, F_LRMD_TIMEOUT, timeout);
for (tmp = params; tmp; tmp = tmp->next) {
hash2smartfield((gpointer) tmp->key, (gpointer) tmp->value, args);
}
rc = lrmd_send_command(lrmd, LRMD_OP_ALERT_EXEC, data, NULL, timeout,
lrmd_opt_notify_orig_only, TRUE);
free_xml(data);
lrmd_key_value_freeall(params);
return rc;
}
static int
lrmd_api_cancel(lrmd_t *lrmd, const char *rsc_id, const char *action,
guint interval_ms)
{
int rc = pcmk_ok;
xmlNode *data = create_xml_node(NULL, F_LRMD_RSC);
crm_xml_add(data, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add(data, F_LRMD_RSC_ACTION, action);
crm_xml_add(data, F_LRMD_RSC_ID, rsc_id);
crm_xml_add_ms(data, F_LRMD_RSC_INTERVAL, interval_ms);
rc = lrmd_send_command(lrmd, LRMD_OP_RSC_CANCEL, data, NULL, 0, 0, TRUE);
free_xml(data);
return rc;
}
static int
list_stonith_agents(lrmd_list_t ** resources)
{
int rc = 0;
stonith_t *stonith_api = stonith_api_new();
stonith_key_value_t *stonith_resources = NULL;
stonith_key_value_t *dIter = NULL;
if (stonith_api == NULL) {
crm_err("Could not list fence agents: API memory allocation failed");
return -ENOMEM;
}
stonith_api->cmds->list_agents(stonith_api, st_opt_sync_call, NULL,
&stonith_resources, 0);
stonith_api->cmds->free(stonith_api);
for (dIter = stonith_resources; dIter; dIter = dIter->next) {
rc++;
if (resources) {
*resources = lrmd_list_add(*resources, dIter->value);
}
}
stonith_key_value_freeall(stonith_resources, 1, 0);
return rc;
}
static int
lrmd_api_list_agents(lrmd_t * lrmd, lrmd_list_t ** resources, const char *class,
const char *provider)
{
int rc = 0;
int stonith_count = 0; // Initially, whether to include stonith devices
if (safe_str_eq(class, PCMK_RESOURCE_CLASS_STONITH)) {
stonith_count = 1;
} else {
GListPtr gIter = NULL;
GList *agents = resources_list_agents(class, provider);
for (gIter = agents; gIter != NULL; gIter = gIter->next) {
*resources = lrmd_list_add(*resources, (const char *)gIter->data);
rc++;
}
g_list_free_full(agents, free);
if (!class) {
stonith_count = 1;
}
}
if (stonith_count) {
// Now, if stonith devices are included, how many there are
stonith_count = list_stonith_agents(resources);
if (stonith_count > 0) {
rc += stonith_count;
}
}
if (rc == 0) {
crm_notice("No agents found for class %s", class);
rc = -EPROTONOSUPPORT;
}
return rc;
}
static int
does_provider_have_agent(const char *agent, const char *provider, const char *class)
{
int found = 0;
GList *agents = NULL;
GListPtr gIter2 = NULL;
agents = resources_list_agents(class, provider);
for (gIter2 = agents; gIter2 != NULL; gIter2 = gIter2->next) {
if (safe_str_eq(agent, gIter2->data)) {
found = 1;
}
}
g_list_free_full(agents, free);
return found;
}
static int
lrmd_api_list_ocf_providers(lrmd_t * lrmd, const char *agent, lrmd_list_t ** providers)
{
int rc = pcmk_ok;
char *provider = NULL;
GList *ocf_providers = NULL;
GListPtr gIter = NULL;
ocf_providers = resources_list_providers(PCMK_RESOURCE_CLASS_OCF);
for (gIter = ocf_providers; gIter != NULL; gIter = gIter->next) {
provider = gIter->data;
if (!agent || does_provider_have_agent(agent, provider,
PCMK_RESOURCE_CLASS_OCF)) {
*providers = lrmd_list_add(*providers, (const char *)gIter->data);
rc++;
}
}
g_list_free_full(ocf_providers, free);
return rc;
}
static int
lrmd_api_list_standards(lrmd_t * lrmd, lrmd_list_t ** supported)
{
int rc = 0;
GList *standards = NULL;
GListPtr gIter = NULL;
standards = resources_list_standards();
for (gIter = standards; gIter != NULL; gIter = gIter->next) {
*supported = lrmd_list_add(*supported, (const char *)gIter->data);
rc++;
}
if (list_stonith_agents(NULL) > 0) {
*supported = lrmd_list_add(*supported, PCMK_RESOURCE_CLASS_STONITH);
rc++;
}
g_list_free_full(standards, free);
return rc;
}
lrmd_t *
lrmd_api_new(void)
{
lrmd_t *new_lrmd = NULL;
lrmd_private_t *pvt = NULL;
new_lrmd = calloc(1, sizeof(lrmd_t));
pvt = calloc(1, sizeof(lrmd_private_t));
pvt->remote = calloc(1, sizeof(crm_remote_t));
new_lrmd->cmds = calloc(1, sizeof(lrmd_api_operations_t));
pvt->type = CRM_CLIENT_IPC;
new_lrmd->lrmd_private = pvt;
new_lrmd->cmds->connect = lrmd_api_connect;
new_lrmd->cmds->connect_async = lrmd_api_connect_async;
new_lrmd->cmds->is_connected = lrmd_api_is_connected;
new_lrmd->cmds->poke_connection = lrmd_api_poke_connection;
new_lrmd->cmds->disconnect = lrmd_api_disconnect;
new_lrmd->cmds->register_rsc = lrmd_api_register_rsc;
new_lrmd->cmds->unregister_rsc = lrmd_api_unregister_rsc;
new_lrmd->cmds->get_rsc_info = lrmd_api_get_rsc_info;
new_lrmd->cmds->get_recurring_ops = lrmd_api_get_recurring_ops;
new_lrmd->cmds->set_callback = lrmd_api_set_callback;
new_lrmd->cmds->get_metadata = lrmd_api_get_metadata;
new_lrmd->cmds->exec = lrmd_api_exec;
new_lrmd->cmds->cancel = lrmd_api_cancel;
new_lrmd->cmds->list_agents = lrmd_api_list_agents;
new_lrmd->cmds->list_ocf_providers = lrmd_api_list_ocf_providers;
new_lrmd->cmds->list_standards = lrmd_api_list_standards;
new_lrmd->cmds->exec_alert = lrmd_api_exec_alert;
new_lrmd->cmds->get_metadata_params = lrmd_api_get_metadata_params;
return new_lrmd;
}
lrmd_t *
lrmd_remote_api_new(const char *nodename, const char *server, int port)
{
#ifdef HAVE_GNUTLS_GNUTLS_H
lrmd_t *new_lrmd = lrmd_api_new();
lrmd_private_t *native = new_lrmd->lrmd_private;
if (!nodename && !server) {
lrmd_api_delete(new_lrmd);
return NULL;
}
native->type = CRM_CLIENT_TLS;
native->remote_nodename = nodename ? strdup(nodename) : strdup(server);
native->server = server ? strdup(server) : strdup(nodename);
native->port = port;
if (native->port == 0) {
native->port = crm_default_remote_port();
}
return new_lrmd;
#else
crm_err("Cannot communicate with Pacemaker Remote because GnuTLS is not enabled for this build");
return NULL;
#endif
}
void
lrmd_api_delete(lrmd_t * lrmd)
{
if (!lrmd) {
return;
}
lrmd->cmds->disconnect(lrmd); /* no-op if already disconnected */
free(lrmd->cmds);
if (lrmd->lrmd_private) {
lrmd_private_t *native = lrmd->lrmd_private;
#ifdef HAVE_GNUTLS_GNUTLS_H
free(native->server);
#endif
free(native->remote_nodename);
free(native->remote);
free(native->token);
free(native->peer_version);
}
free(lrmd->lrmd_private);
free(lrmd);
}
diff --git a/lib/pacemaker/pcmk_sched_transition.c b/lib/pacemaker/pcmk_sched_transition.c
index d40e255573..c415b75940 100644
--- a/lib/pacemaker/pcmk_sched_transition.c
+++ b/lib/pacemaker/pcmk_sched_transition.c
@@ -1,867 +1,867 @@
/*
* Copyright 2009-2019 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include // lrmd_event_data_t, lrmd_free_event()
#include
#include
#include
#include
#include
static bool fake_quiet = FALSE;
static cib_t *fake_cib = NULL;
static GListPtr fake_resource_list = NULL;
static GListPtr fake_op_fail_list = NULL;
gboolean bringing_nodes_online = FALSE;
#define STATUS_PATH_MAX 512
#define quiet_log(fmt, args...) do { \
if(fake_quiet) { \
crm_trace(fmt, ##args); \
} else { \
printf(fmt , ##args); \
} \
} while(0)
#define NEW_NODE_TEMPLATE "//"XML_CIB_TAG_NODE"[@uname='%s']"
#define NODE_TEMPLATE "//"XML_CIB_TAG_STATE"[@uname='%s']"
#define RSC_TEMPLATE "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']"
static void
inject_transient_attr(xmlNode * cib_node, const char *name, const char *value)
{
xmlNode *attrs = NULL;
xmlNode *instance_attrs = NULL;
xmlChar *node_path;
const char *node_uuid = ID(cib_node);
node_path = xmlGetNodePath(cib_node);
quiet_log(" + Injecting attribute %s=%s into %s '%s'\n",
name, value, node_path, ID(cib_node));
free(node_path);
attrs = first_named_child(cib_node, XML_TAG_TRANSIENT_NODEATTRS);
if (attrs == NULL) {
attrs = create_xml_node(cib_node, XML_TAG_TRANSIENT_NODEATTRS);
crm_xml_add(attrs, XML_ATTR_ID, node_uuid);
}
instance_attrs = first_named_child(attrs, XML_TAG_ATTR_SETS);
if (instance_attrs == NULL) {
instance_attrs = create_xml_node(attrs, XML_TAG_ATTR_SETS);
crm_xml_add(instance_attrs, XML_ATTR_ID, node_uuid);
}
crm_create_nvpair_xml(instance_attrs, NULL, name, value);
}
static void
update_failcounts(xmlNode * cib_node, const char *resource, const char *task,
guint interval_ms, int rc)
{
if (rc == 0) {
return;
} else if ((rc == 7) && (interval_ms == 0)) {
return;
} else {
char *name = NULL;
char *now = crm_ttoa(time(NULL));
name = crm_failcount_name(resource, task, interval_ms);
inject_transient_attr(cib_node, name, "value++");
free(name);
name = crm_lastfailure_name(resource, task, interval_ms);
inject_transient_attr(cib_node, name, now);
free(name);
free(now);
}
}
static void
create_node_entry(cib_t * cib_conn, const char *node)
{
int rc = pcmk_ok;
char *xpath = crm_strdup_printf(NEW_NODE_TEMPLATE, node);
rc = cib_conn->cmds->query(cib_conn, xpath, NULL, cib_xpath | cib_sync_call | cib_scope_local);
if (rc == -ENXIO) {
xmlNode *cib_object = create_xml_node(NULL, XML_CIB_TAG_NODE);
crm_xml_add(cib_object, XML_ATTR_ID, node); // Use node name as ID
crm_xml_add(cib_object, XML_ATTR_UNAME, node);
cib_conn->cmds->create(cib_conn, XML_CIB_TAG_NODES, cib_object,
cib_sync_call | cib_scope_local);
/* Not bothering with subsequent query to see if it exists,
we'll bomb out later in the call to query_node_uuid()... */
free_xml(cib_object);
}
free(xpath);
}
static lrmd_event_data_t *
create_op(xmlNode *cib_resource, const char *task, guint interval_ms,
int outcome)
{
lrmd_event_data_t *op = NULL;
xmlNode *xop = NULL;
op = calloc(1, sizeof(lrmd_event_data_t));
op->rsc_id = strdup(ID(cib_resource));
op->interval_ms = interval_ms;
op->op_type = strdup(task);
op->rc = outcome;
op->op_status = 0;
op->params = NULL; /* TODO: Fill me in */
- op->t_run = time(NULL);
+ op->t_run = (unsigned int) time(NULL);
op->t_rcchange = op->t_run;
op->call_id = 0;
for (xop = __xml_first_child_element(cib_resource); xop != NULL;
xop = __xml_next_element(xop)) {
int tmp = 0;
crm_element_value_int(xop, XML_LRM_ATTR_CALLID, &tmp);
if (tmp > op->call_id) {
op->call_id = tmp;
}
}
op->call_id++;
return op;
}
static xmlNode *
inject_op(xmlNode * cib_resource, lrmd_event_data_t * op, int target_rc)
{
return pcmk__create_history_xml(cib_resource, op, CRM_FEATURE_SET,
target_rc, NULL, crm_system_name,
LOG_TRACE);
}
static xmlNode *
inject_node_state(cib_t * cib_conn, const char *node, const char *uuid)
{
int rc = pcmk_ok;
xmlNode *cib_object = NULL;
char *xpath = crm_strdup_printf(NODE_TEMPLATE, node);
if (bringing_nodes_online) {
create_node_entry(cib_conn, node);
}
rc = cib_conn->cmds->query(cib_conn, xpath, &cib_object,
cib_xpath | cib_sync_call | cib_scope_local);
if (cib_object && ID(cib_object) == NULL) {
crm_err("Detected multiple node_state entries for xpath=%s, bailing", xpath);
crm_log_xml_warn(cib_object, "Duplicates");
free(xpath);
crm_exit(CRM_EX_SOFTWARE);
return NULL; // not reached, but makes static analysis happy
}
if (rc == -ENXIO) {
char *found_uuid = NULL;
if (uuid == NULL) {
query_node_uuid(cib_conn, node, &found_uuid, NULL);
} else {
found_uuid = strdup(uuid);
}
cib_object = create_xml_node(NULL, XML_CIB_TAG_STATE);
crm_xml_add(cib_object, XML_ATTR_UUID, found_uuid);
crm_xml_add(cib_object, XML_ATTR_UNAME, node);
cib_conn->cmds->create(cib_conn, XML_CIB_TAG_STATUS, cib_object,
cib_sync_call | cib_scope_local);
free_xml(cib_object);
free(found_uuid);
rc = cib_conn->cmds->query(cib_conn, xpath, &cib_object,
cib_xpath | cib_sync_call | cib_scope_local);
crm_trace("injecting node state for %s. rc is %d", node, rc);
}
free(xpath);
CRM_ASSERT(rc == pcmk_ok);
return cib_object;
}
static xmlNode *
modify_node(cib_t * cib_conn, char *node, gboolean up)
{
xmlNode *cib_node = inject_node_state(cib_conn, node, NULL);
if (up) {
crm_xml_add(cib_node, XML_NODE_IN_CLUSTER, XML_BOOLEAN_YES);
crm_xml_add(cib_node, XML_NODE_IS_PEER, ONLINESTATUS);
crm_xml_add(cib_node, XML_NODE_JOIN_STATE, CRMD_JOINSTATE_MEMBER);
crm_xml_add(cib_node, XML_NODE_EXPECTED, CRMD_JOINSTATE_MEMBER);
} else {
crm_xml_add(cib_node, XML_NODE_IN_CLUSTER, XML_BOOLEAN_NO);
crm_xml_add(cib_node, XML_NODE_IS_PEER, OFFLINESTATUS);
crm_xml_add(cib_node, XML_NODE_JOIN_STATE, CRMD_JOINSTATE_DOWN);
crm_xml_add(cib_node, XML_NODE_EXPECTED, CRMD_JOINSTATE_DOWN);
}
crm_xml_add(cib_node, XML_ATTR_ORIGIN, crm_system_name);
return cib_node;
}
static xmlNode *
find_resource_xml(xmlNode * cib_node, const char *resource)
{
xmlNode *match = NULL;
const char *node = crm_element_value(cib_node, XML_ATTR_UNAME);
char *xpath = crm_strdup_printf(RSC_TEMPLATE, node, resource);
match = get_xpath_object(xpath, cib_node, LOG_TRACE);
free(xpath);
return match;
}
static xmlNode *
inject_resource(xmlNode * cib_node, const char *resource, const char *lrm_name,
const char *rclass, const char *rtype, const char *rprovider)
{
xmlNode *lrm = NULL;
xmlNode *container = NULL;
xmlNode *cib_resource = NULL;
char *xpath = NULL;
cib_resource = find_resource_xml(cib_node, resource);
if (cib_resource != NULL) {
/* If an existing LRM history entry uses the resource name,
* continue using it, even if lrm_name is different.
*/
return cib_resource;
}
// Check for history entry under preferred name
if (strcmp(resource, lrm_name)) {
cib_resource = find_resource_xml(cib_node, lrm_name);
if (cib_resource != NULL) {
return cib_resource;
}
}
/* One day, add query for class, provider, type */
if (rclass == NULL || rtype == NULL) {
fprintf(stderr, "Resource %s not found in the status section of %s."
" Please supply the class and type to continue\n", resource, ID(cib_node));
return NULL;
} else if (safe_str_neq(rclass, PCMK_RESOURCE_CLASS_OCF)
&& safe_str_neq(rclass, PCMK_RESOURCE_CLASS_STONITH)
&& safe_str_neq(rclass, PCMK_RESOURCE_CLASS_SERVICE)
&& safe_str_neq(rclass, PCMK_RESOURCE_CLASS_UPSTART)
&& safe_str_neq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD)
&& safe_str_neq(rclass, PCMK_RESOURCE_CLASS_LSB)) {
fprintf(stderr, "Invalid class for %s: %s\n", resource, rclass);
return NULL;
} else if (is_set(pcmk_get_ra_caps(rclass), pcmk_ra_cap_provider)
&& (rprovider == NULL)) {
fprintf(stderr, "Please specify the provider for resource %s\n", resource);
return NULL;
}
xpath = (char *)xmlGetNodePath(cib_node);
crm_info("Injecting new resource %s into %s '%s'", lrm_name, xpath, ID(cib_node));
free(xpath);
lrm = first_named_child(cib_node, XML_CIB_TAG_LRM);
if (lrm == NULL) {
const char *node_uuid = ID(cib_node);
lrm = create_xml_node(cib_node, XML_CIB_TAG_LRM);
crm_xml_add(lrm, XML_ATTR_ID, node_uuid);
}
container = first_named_child(lrm, XML_LRM_TAG_RESOURCES);
if (container == NULL) {
container = create_xml_node(lrm, XML_LRM_TAG_RESOURCES);
}
cib_resource = create_xml_node(container, XML_LRM_TAG_RESOURCE);
// If we're creating a new entry, use the preferred name
crm_xml_add(cib_resource, XML_ATTR_ID, lrm_name);
crm_xml_add(cib_resource, XML_AGENT_ATTR_CLASS, rclass);
crm_xml_add(cib_resource, XML_AGENT_ATTR_PROVIDER, rprovider);
crm_xml_add(cib_resource, XML_ATTR_TYPE, rtype);
return cib_resource;
}
#define XPATH_MAX 1024
static int
find_ticket_state(cib_t * the_cib, const char *ticket_id, xmlNode ** ticket_state_xml)
{
int offset = 0;
int rc = pcmk_ok;
xmlNode *xml_search = NULL;
char *xpath_string = NULL;
CRM_ASSERT(ticket_state_xml != NULL);
*ticket_state_xml = NULL;
xpath_string = calloc(1, XPATH_MAX);
offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "%s", "/cib/status/tickets");
if (ticket_id) {
offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "/%s[@id=\"%s\"]",
XML_CIB_TAG_TICKET_STATE, ticket_id);
}
CRM_LOG_ASSERT(offset > 0);
rc = the_cib->cmds->query(the_cib, xpath_string, &xml_search,
cib_sync_call | cib_scope_local | cib_xpath);
if (rc != pcmk_ok) {
goto bail;
}
crm_log_xml_debug(xml_search, "Match");
if (xml_has_children(xml_search)) {
if (ticket_id) {
fprintf(stdout, "Multiple ticket_states match ticket_id=%s\n", ticket_id);
}
*ticket_state_xml = xml_search;
} else {
*ticket_state_xml = xml_search;
}
bail:
free(xpath_string);
return rc;
}
static int
set_ticket_state_attr(const char *ticket_id, const char *attr_name,
const char *attr_value, cib_t * cib, int cib_options)
{
int rc = pcmk_ok;
xmlNode *xml_top = NULL;
xmlNode *ticket_state_xml = NULL;
rc = find_ticket_state(cib, ticket_id, &ticket_state_xml);
if (rc == pcmk_ok) {
crm_debug("Found a match state for ticket: id=%s", ticket_id);
xml_top = ticket_state_xml;
} else if (rc != -ENXIO) {
return rc;
} else {
xmlNode *xml_obj = NULL;
xml_top = create_xml_node(NULL, XML_CIB_TAG_STATUS);
xml_obj = create_xml_node(xml_top, XML_CIB_TAG_TICKETS);
ticket_state_xml = create_xml_node(xml_obj, XML_CIB_TAG_TICKET_STATE);
crm_xml_add(ticket_state_xml, XML_ATTR_ID, ticket_id);
}
crm_xml_add(ticket_state_xml, attr_name, attr_value);
crm_log_xml_debug(xml_top, "Update");
rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, xml_top, cib_options);
free_xml(xml_top);
return rc;
}
void
modify_configuration(pe_working_set_t * data_set, cib_t *cib,
const char *quorum, const char *watchdog, GListPtr node_up, GListPtr node_down, GListPtr node_fail,
GListPtr op_inject, GListPtr ticket_grant, GListPtr ticket_revoke,
GListPtr ticket_standby, GListPtr ticket_activate)
{
int rc = pcmk_ok;
GListPtr gIter = NULL;
xmlNode *cib_op = NULL;
xmlNode *cib_node = NULL;
xmlNode *cib_resource = NULL;
lrmd_event_data_t *op = NULL;
if (quorum) {
xmlNode *top = create_xml_node(NULL, XML_TAG_CIB);
quiet_log(" + Setting quorum: %s\n", quorum);
/* crm_xml_add(top, XML_ATTR_DC_UUID, dc_uuid); */
crm_xml_add(top, XML_ATTR_HAVE_QUORUM, quorum);
rc = cib->cmds->modify(cib, NULL, top, cib_sync_call | cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
}
if (watchdog) {
quiet_log(" + Setting watchdog: %s\n", watchdog);
rc = update_attr_delegate(cib, cib_sync_call | cib_scope_local,
XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL,
XML_ATTR_HAVE_WATCHDOG, watchdog, FALSE, NULL, NULL);
CRM_ASSERT(rc == pcmk_ok);
}
for (gIter = node_up; gIter != NULL; gIter = gIter->next) {
char *node = (char *)gIter->data;
quiet_log(" + Bringing node %s online\n", node);
cib_node = modify_node(cib, node, TRUE);
CRM_ASSERT(cib_node != NULL);
rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, cib_node,
cib_sync_call | cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
free_xml(cib_node);
}
for (gIter = node_down; gIter != NULL; gIter = gIter->next) {
char xpath[STATUS_PATH_MAX];
char *node = (char *)gIter->data;
quiet_log(" + Taking node %s offline\n", node);
cib_node = modify_node(cib, node, FALSE);
CRM_ASSERT(cib_node != NULL);
rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, cib_node,
cib_sync_call | cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
free_xml(cib_node);
snprintf(xpath, STATUS_PATH_MAX, "//node_state[@uname='%s']/%s", node, XML_CIB_TAG_LRM);
cib->cmds->remove(cib, xpath, NULL,
cib_xpath | cib_sync_call | cib_scope_local);
snprintf(xpath, STATUS_PATH_MAX, "//node_state[@uname='%s']/%s", node,
XML_TAG_TRANSIENT_NODEATTRS);
cib->cmds->remove(cib, xpath, NULL,
cib_xpath | cib_sync_call | cib_scope_local);
}
for (gIter = node_fail; gIter != NULL; gIter = gIter->next) {
char *node = (char *)gIter->data;
quiet_log(" + Failing node %s\n", node);
cib_node = modify_node(cib, node, TRUE);
crm_xml_add(cib_node, XML_NODE_IN_CLUSTER, XML_BOOLEAN_NO);
CRM_ASSERT(cib_node != NULL);
rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, cib_node,
cib_sync_call | cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
free_xml(cib_node);
}
for (gIter = ticket_grant; gIter != NULL; gIter = gIter->next) {
char *ticket_id = (char *)gIter->data;
quiet_log(" + Granting ticket %s\n", ticket_id);
rc = set_ticket_state_attr(ticket_id, "granted", "true",
cib, cib_sync_call | cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
}
for (gIter = ticket_revoke; gIter != NULL; gIter = gIter->next) {
char *ticket_id = (char *)gIter->data;
quiet_log(" + Revoking ticket %s\n", ticket_id);
rc = set_ticket_state_attr(ticket_id, "granted", "false",
cib, cib_sync_call | cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
}
for (gIter = ticket_standby; gIter != NULL; gIter = gIter->next) {
char *ticket_id = (char *)gIter->data;
quiet_log(" + Making ticket %s standby\n", ticket_id);
rc = set_ticket_state_attr(ticket_id, "standby", "true",
cib, cib_sync_call | cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
}
for (gIter = ticket_activate; gIter != NULL; gIter = gIter->next) {
char *ticket_id = (char *)gIter->data;
quiet_log(" + Activating ticket %s\n", ticket_id);
rc = set_ticket_state_attr(ticket_id, "standby", "false",
cib, cib_sync_call | cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
}
for (gIter = op_inject; gIter != NULL; gIter = gIter->next) {
char *spec = (char *)gIter->data;
int rc = 0;
int outcome = 0;
guint interval_ms = 0;
char *key = NULL;
char *node = NULL;
char *task = NULL;
char *resource = NULL;
const char *rtype = NULL;
const char *rclass = NULL;
const char *rprovider = NULL;
resource_t *rsc = NULL;
quiet_log(" + Injecting %s into the configuration\n", spec);
key = calloc(1, strlen(spec) + 1);
node = calloc(1, strlen(spec) + 1);
rc = sscanf(spec, "%[^@]@%[^=]=%d", key, node, &outcome);
if (rc != 3) {
fprintf(stderr, "Invalid operation spec: %s. Only found %d fields\n", spec, rc);
free(key);
free(node);
continue;
}
parse_op_key(key, &resource, &task, &interval_ms);
rsc = pe_find_resource(data_set->resources, resource);
if (rsc == NULL) {
fprintf(stderr, " - Invalid resource name: %s\n", resource);
} else {
rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
rtype = crm_element_value(rsc->xml, XML_ATTR_TYPE);
rprovider = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER);
cib_node = inject_node_state(cib, node, NULL);
CRM_ASSERT(cib_node != NULL);
update_failcounts(cib_node, resource, task, interval_ms, outcome);
cib_resource = inject_resource(cib_node, resource, resource,
rclass, rtype, rprovider);
CRM_ASSERT(cib_resource != NULL);
op = create_op(cib_resource, task, interval_ms, outcome);
CRM_ASSERT(op != NULL);
cib_op = inject_op(cib_resource, op, 0);
CRM_ASSERT(cib_op != NULL);
lrmd_free_event(op);
rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, cib_node,
cib_sync_call | cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
}
free(task);
free(node);
free(key);
}
}
static gboolean
exec_pseudo_action(crm_graph_t * graph, crm_action_t * action)
{
const char *node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
action->confirmed = TRUE;
quiet_log(" * Pseudo action: %s%s%s\n", task, node ? " on " : "", node ? node : "");
update_graph(graph, action);
return TRUE;
}
static gboolean
exec_rsc_action(crm_graph_t * graph, crm_action_t * action)
{
int rc = 0;
GListPtr gIter = NULL;
lrmd_event_data_t *op = NULL;
int target_outcome = 0;
const char *rtype = NULL;
const char *rclass = NULL;
const char *resource = NULL;
const char *rprovider = NULL;
const char *lrm_name = NULL;
const char *operation = crm_element_value(action->xml, "operation");
const char *target_rc_s = crm_meta_value(action->params, XML_ATTR_TE_TARGET_RC);
xmlNode *cib_node = NULL;
xmlNode *cib_resource = NULL;
xmlNode *action_rsc = first_named_child(action->xml, XML_CIB_TAG_RESOURCE);
char *node = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET);
char *uuid = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET_UUID);
const char *router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
if (safe_str_eq(operation, CRM_OP_PROBED)
|| safe_str_eq(operation, CRM_OP_REPROBE)) {
crm_info("Skipping %s op for %s", operation, node);
goto done;
}
if (action_rsc == NULL) {
crm_log_xml_err(action->xml, "Bad");
free(node); free(uuid);
return FALSE;
}
/* Look for the preferred name
* If not found, try the expected 'local' name
* If not found use the preferred name anyway
*/
resource = crm_element_value(action_rsc, XML_ATTR_ID);
CRM_ASSERT(resource != NULL); // makes static analysis happy
lrm_name = resource; // Preferred name when writing history
if (pe_find_resource(fake_resource_list, resource) == NULL) {
const char *longname = crm_element_value(action_rsc, XML_ATTR_ID_LONG);
if (longname && pe_find_resource(fake_resource_list, longname)) {
resource = longname;
}
}
if (safe_str_eq(operation, "delete") || safe_str_eq(operation, RSC_METADATA)) {
quiet_log(" * Resource action: %-15s %s on %s\n", resource, operation, node);
goto done;
}
rclass = crm_element_value(action_rsc, XML_AGENT_ATTR_CLASS);
rtype = crm_element_value(action_rsc, XML_ATTR_TYPE);
rprovider = crm_element_value(action_rsc, XML_AGENT_ATTR_PROVIDER);
if (target_rc_s != NULL) {
target_outcome = crm_parse_int(target_rc_s, "0");
}
CRM_ASSERT(fake_cib->cmds->query(fake_cib, NULL, NULL, cib_sync_call | cib_scope_local) ==
pcmk_ok);
cib_node = inject_node_state(fake_cib, node, (router_node? node : uuid));
CRM_ASSERT(cib_node != NULL);
cib_resource = inject_resource(cib_node, resource, lrm_name,
rclass, rtype, rprovider);
if (cib_resource == NULL) {
crm_err("invalid resource in transition");
free(node); free(uuid);
free_xml(cib_node);
return FALSE;
}
op = convert_graph_action(cib_resource, action, 0, target_outcome);
if (op->interval_ms) {
quiet_log(" * Resource action: %-15s %s=%u on %s\n",
resource, op->op_type, op->interval_ms, node);
} else {
quiet_log(" * Resource action: %-15s %s on %s\n", resource, op->op_type, node);
}
for (gIter = fake_op_fail_list; gIter != NULL; gIter = gIter->next) {
char *spec = (char *)gIter->data;
char *key = NULL;
const char *match_name = NULL;
// Allow user to specify anonymous clone with or without instance number
key = crm_strdup_printf(CRM_OP_FMT "@%s=", resource, op->op_type,
op->interval_ms, node);
if (strncasecmp(key, spec, strlen(key)) == 0) {
match_name = resource;
}
free(key);
if ((match_name == NULL) && strcmp(resource, lrm_name)) {
key = crm_strdup_printf(CRM_OP_FMT "@%s=", lrm_name, op->op_type,
op->interval_ms, node);
if (strncasecmp(key, spec, strlen(key)) == 0) {
match_name = lrm_name;
}
free(key);
}
if (match_name != NULL) {
rc = sscanf(spec, "%*[^=]=%d", (int *) &op->rc);
// ${match_name}_${task}_${interval_in_ms}@${node}=${rc}
if (rc != 1) {
fprintf(stderr,
"Invalid failed operation spec: %s. Result code must be integer\n",
spec);
continue;
}
action->failed = TRUE;
graph->abort_priority = INFINITY;
printf("\tPretending action %d failed with rc=%d\n", action->id, op->rc);
update_failcounts(cib_node, match_name, op->op_type,
op->interval_ms, op->rc);
break;
}
}
inject_op(cib_resource, op, target_outcome);
lrmd_free_event(op);
rc = fake_cib->cmds->modify(fake_cib, XML_CIB_TAG_STATUS, cib_node,
cib_sync_call | cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
done:
free(node); free(uuid);
free_xml(cib_node);
action->confirmed = TRUE;
update_graph(graph, action);
return TRUE;
}
static gboolean
exec_crmd_action(crm_graph_t * graph, crm_action_t * action)
{
const char *node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
xmlNode *rsc = first_named_child(action->xml, XML_CIB_TAG_RESOURCE);
action->confirmed = TRUE;
if(rsc) {
quiet_log(" * Cluster action: %s for %s on %s\n", task, ID(rsc), node);
} else {
quiet_log(" * Cluster action: %s on %s\n", task, node);
}
update_graph(graph, action);
return TRUE;
}
static gboolean
exec_stonith_action(crm_graph_t * graph, crm_action_t * action)
{
const char *op = crm_meta_value(action->params, "stonith_action");
char *target = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET);
quiet_log(" * Fencing %s (%s)\n", target, op);
if(safe_str_neq(op, "on")) {
int rc = 0;
char xpath[STATUS_PATH_MAX];
xmlNode *cib_node = modify_node(fake_cib, target, FALSE);
crm_xml_add(cib_node, XML_ATTR_ORIGIN, __FUNCTION__);
CRM_ASSERT(cib_node != NULL);
rc = fake_cib->cmds->replace(fake_cib, XML_CIB_TAG_STATUS, cib_node,
cib_sync_call | cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
snprintf(xpath, STATUS_PATH_MAX, "//node_state[@uname='%s']/%s", target, XML_CIB_TAG_LRM);
fake_cib->cmds->remove(fake_cib, xpath, NULL,
cib_xpath | cib_sync_call | cib_scope_local);
snprintf(xpath, STATUS_PATH_MAX, "//node_state[@uname='%s']/%s", target,
XML_TAG_TRANSIENT_NODEATTRS);
fake_cib->cmds->remove(fake_cib, xpath, NULL,
cib_xpath | cib_sync_call | cib_scope_local);
free_xml(cib_node);
}
action->confirmed = TRUE;
update_graph(graph, action);
free(target);
return TRUE;
}
int
run_simulation(pe_working_set_t * data_set, cib_t *cib, GListPtr op_fail_list, bool quiet)
{
crm_graph_t *transition = NULL;
enum transition_status graph_rc = -1;
crm_graph_functions_t exec_fns = {
exec_pseudo_action,
exec_rsc_action,
exec_crmd_action,
exec_stonith_action,
};
fake_cib = cib;
fake_quiet = quiet;
fake_op_fail_list = op_fail_list;
quiet_log("\nExecuting cluster transition:\n");
set_graph_functions(&exec_fns);
transition = unpack_graph(data_set->graph, crm_system_name);
print_graph(LOG_DEBUG, transition);
fake_resource_list = data_set->resources;
do {
graph_rc = run_graph(transition);
} while (graph_rc == transition_active);
fake_resource_list = NULL;
if (graph_rc != transition_complete) {
fprintf(stdout, "Transition failed: %s\n", transition_status(graph_rc));
print_graph(LOG_ERR, transition);
}
destroy_graph(transition);
if (graph_rc != transition_complete) {
fprintf(stdout, "An invalid transition was produced\n");
}
if (quiet == FALSE) {
xmlNode *cib_object = NULL;
int rc = fake_cib->cmds->query(fake_cib, NULL, &cib_object, cib_sync_call | cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
pe_reset_working_set(data_set);
data_set->input = cib_object;
}
if (graph_rc != transition_complete) {
return graph_rc;
}
return 0;
}
diff --git a/lib/pacemaker/pcmk_sched_utils.c b/lib/pacemaker/pcmk_sched_utils.c
index 8ef2f48358..d8a9a25b5d 100644
--- a/lib/pacemaker/pcmk_sched_utils.c
+++ b/lib/pacemaker/pcmk_sched_utils.c
@@ -1,687 +1,691 @@
/*
* Copyright 2004-2019 Andrew Beekhof
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include // lrmd_event_data_t
#include
pe__location_t *
rsc2node_new(const char *id, pe_resource_t *rsc,
int node_weight, const char *discover_mode,
pe_node_t *foo_node, pe_working_set_t *data_set)
{
pe__location_t *new_con = NULL;
if (rsc == NULL || id == NULL) {
pe_err("Invalid constraint %s for rsc=%p", crm_str(id), rsc);
return NULL;
} else if (foo_node == NULL) {
CRM_CHECK(node_weight == 0, return NULL);
}
new_con = calloc(1, sizeof(pe__location_t));
if (new_con != NULL) {
new_con->id = strdup(id);
new_con->rsc_lh = rsc;
new_con->node_list_rh = NULL;
new_con->role_filter = RSC_ROLE_UNKNOWN;
if (discover_mode == NULL || safe_str_eq(discover_mode, "always")) {
new_con->discover_mode = pe_discover_always;
} else if (safe_str_eq(discover_mode, "never")) {
new_con->discover_mode = pe_discover_never;
} else if (safe_str_eq(discover_mode, "exclusive")) {
new_con->discover_mode = pe_discover_exclusive;
rsc->exclusive_discover = TRUE;
} else {
pe_err("Invalid %s value %s in location constraint", XML_LOCATION_ATTR_DISCOVERY, discover_mode);
}
if (foo_node != NULL) {
node_t *copy = node_copy(foo_node);
copy->weight = node_weight;
new_con->node_list_rh = g_list_prepend(NULL, copy);
}
data_set->placement_constraints = g_list_prepend(data_set->placement_constraints, new_con);
rsc->rsc_location = g_list_prepend(rsc->rsc_location, new_con);
}
return new_con;
}
gboolean
can_run_resources(const node_t * node)
{
if (node == NULL) {
return FALSE;
}
#if 0
if (node->weight < 0) {
return FALSE;
}
#endif
if (node->details->online == FALSE
|| node->details->shutdown || node->details->unclean
|| node->details->standby || node->details->maintenance) {
crm_trace("%s: online=%d, unclean=%d, standby=%d, maintenance=%d",
node->details->uname, node->details->online,
node->details->unclean, node->details->standby, node->details->maintenance);
return FALSE;
}
return TRUE;
}
struct node_weight_s {
pe_node_t *active;
pe_working_set_t *data_set;
};
/* return -1 if 'a' is more preferred
* return 1 if 'b' is more preferred
*/
static gint
sort_node_weight(gconstpointer a, gconstpointer b, gpointer data)
{
const node_t *node1 = (const node_t *)a;
const node_t *node2 = (const node_t *)b;
struct node_weight_s *nw = data;
int node1_weight = 0;
int node2_weight = 0;
int result = 0;
if (a == NULL) {
return 1;
}
if (b == NULL) {
return -1;
}
node1_weight = node1->weight;
node2_weight = node2->weight;
if (can_run_resources(node1) == FALSE) {
node1_weight = -INFINITY;
}
if (can_run_resources(node2) == FALSE) {
node2_weight = -INFINITY;
}
if (node1_weight > node2_weight) {
crm_trace("%s (%d) > %s (%d) : weight",
node1->details->uname, node1_weight, node2->details->uname, node2_weight);
return -1;
}
if (node1_weight < node2_weight) {
crm_trace("%s (%d) < %s (%d) : weight",
node1->details->uname, node1_weight, node2->details->uname, node2_weight);
return 1;
}
crm_trace("%s (%d) == %s (%d) : weight",
node1->details->uname, node1_weight, node2->details->uname, node2_weight);
if (safe_str_eq(nw->data_set->placement_strategy, "minimal")) {
goto equal;
}
if (safe_str_eq(nw->data_set->placement_strategy, "balanced")) {
result = compare_capacity(node1, node2);
if (result < 0) {
crm_trace("%s > %s : capacity (%d)",
node1->details->uname, node2->details->uname, result);
return -1;
} else if (result > 0) {
crm_trace("%s < %s : capacity (%d)",
node1->details->uname, node2->details->uname, result);
return 1;
}
}
/* now try to balance resources across the cluster */
if (node1->details->num_resources < node2->details->num_resources) {
crm_trace("%s (%d) > %s (%d) : resources",
node1->details->uname, node1->details->num_resources,
node2->details->uname, node2->details->num_resources);
return -1;
} else if (node1->details->num_resources > node2->details->num_resources) {
crm_trace("%s (%d) < %s (%d) : resources",
node1->details->uname, node1->details->num_resources,
node2->details->uname, node2->details->num_resources);
return 1;
}
if (nw->active && nw->active->details == node1->details) {
crm_trace("%s (%d) > %s (%d) : active",
node1->details->uname, node1->details->num_resources,
node2->details->uname, node2->details->num_resources);
return -1;
} else if (nw->active && nw->active->details == node2->details) {
crm_trace("%s (%d) < %s (%d) : active",
node1->details->uname, node1->details->num_resources,
node2->details->uname, node2->details->num_resources);
return 1;
}
equal:
crm_trace("%s = %s", node1->details->uname, node2->details->uname);
return strcmp(node1->details->uname, node2->details->uname);
}
GList *
sort_nodes_by_weight(GList *nodes, pe_node_t *active_node,
pe_working_set_t *data_set)
{
struct node_weight_s nw = { active_node, data_set };
return g_list_sort_with_data(nodes, sort_node_weight, &nw);
}
void
native_deallocate(resource_t * rsc)
{
if (rsc->allocated_to) {
node_t *old = rsc->allocated_to;
crm_info("Deallocating %s from %s", rsc->id, old->details->uname);
set_bit(rsc->flags, pe_rsc_provisional);
rsc->allocated_to = NULL;
old->details->allocated_rsc = g_list_remove(old->details->allocated_rsc, rsc);
old->details->num_resources--;
/* old->count--; */
calculate_utilization(old->details->utilization, rsc->utilization, TRUE);
free(old);
}
}
gboolean
native_assign_node(resource_t * rsc, GListPtr nodes, node_t * chosen, gboolean force)
{
CRM_ASSERT(rsc->variant == pe_native);
if (force == FALSE && chosen != NULL) {
bool unset = FALSE;
if(chosen->weight < 0) {
unset = TRUE;
// Allow the graph to assume that the remote resource will come up
} else if (!can_run_resources(chosen) && !pe__is_guest_node(chosen)) {
unset = TRUE;
}
if(unset) {
crm_debug("All nodes for resource %s are unavailable"
", unclean or shutting down (%s: %d, %d)",
rsc->id, chosen->details->uname, can_run_resources(chosen), chosen->weight);
rsc->next_role = RSC_ROLE_STOPPED;
chosen = NULL;
}
}
/* todo: update the old node for each resource to reflect its
* new resource count
*/
native_deallocate(rsc);
clear_bit(rsc->flags, pe_rsc_provisional);
if (chosen == NULL) {
GListPtr gIter = NULL;
char *rc_inactive = crm_itoa(PCMK_OCF_NOT_RUNNING);
crm_debug("Could not allocate a node for %s", rsc->id);
rsc->next_role = RSC_ROLE_STOPPED;
for (gIter = rsc->actions; gIter != NULL; gIter = gIter->next) {
action_t *op = (action_t *) gIter->data;
const char *interval_ms_s = g_hash_table_lookup(op->meta, XML_LRM_ATTR_INTERVAL_MS);
crm_debug("Processing %s", op->uuid);
if(safe_str_eq(RSC_STOP, op->task)) {
update_action_flags(op, pe_action_optional | pe_action_clear, __FUNCTION__, __LINE__);
} else if(safe_str_eq(RSC_START, op->task)) {
update_action_flags(op, pe_action_runnable | pe_action_clear, __FUNCTION__, __LINE__);
/* set_bit(rsc->flags, pe_rsc_block); */
} else if (interval_ms_s && safe_str_neq(interval_ms_s, "0")) {
if(safe_str_eq(rc_inactive, g_hash_table_lookup(op->meta, XML_ATTR_TE_TARGET_RC))) {
/* This is a recurring monitor for the stopped state, leave it alone */
} else {
/* Normal monitor operation, cancel it */
update_action_flags(op, pe_action_runnable | pe_action_clear, __FUNCTION__, __LINE__);
}
}
}
free(rc_inactive);
return FALSE;
}
crm_debug("Assigning %s to %s", chosen->details->uname, rsc->id);
rsc->allocated_to = node_copy(chosen);
chosen->details->allocated_rsc = g_list_prepend(chosen->details->allocated_rsc, rsc);
chosen->details->num_resources++;
chosen->count++;
calculate_utilization(chosen->details->utilization, rsc->utilization, FALSE);
dump_rsc_utilization(show_utilization ? 0 : utilization_log_level, __FUNCTION__, rsc, chosen);
return TRUE;
}
void
log_action(unsigned int log_level, const char *pre_text, action_t * action, gboolean details)
{
const char *node_uname = NULL;
const char *node_uuid = NULL;
if (action == NULL) {
crm_trace("%s%s: ", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": ");
return;
}
if (is_set(action->flags, pe_action_pseudo)) {
node_uname = NULL;
node_uuid = NULL;
} else if (action->node != NULL) {
node_uname = action->node->details->uname;
node_uuid = action->node->details->id;
} else {
node_uname = "";
node_uuid = NULL;
}
switch (text2task(action->task)) {
case stonith_node:
case shutdown_crm:
crm_trace("%s%s%sAction %d: %s%s%s%s%s%s",
pre_text == NULL ? "" : pre_text,
pre_text == NULL ? "" : ": ",
is_set(action->flags,
pe_action_pseudo) ? "Pseudo " : is_set(action->flags,
pe_action_optional) ?
"Optional " : is_set(action->flags,
pe_action_runnable) ? is_set(action->flags,
pe_action_processed)
? "" : "(Provisional) " : "!!Non-Startable!! ", action->id,
action->uuid, node_uname ? "\ton " : "",
node_uname ? node_uname : "", node_uuid ? "\t\t(" : "",
node_uuid ? node_uuid : "", node_uuid ? ")" : "");
break;
default:
crm_trace("%s%s%sAction %d: %s %s%s%s%s%s%s",
pre_text == NULL ? "" : pre_text,
pre_text == NULL ? "" : ": ",
is_set(action->flags,
pe_action_optional) ? "Optional " : is_set(action->flags,
pe_action_pseudo)
? "Pseudo " : is_set(action->flags,
pe_action_runnable) ? is_set(action->flags,
pe_action_processed)
? "" : "(Provisional) " : "!!Non-Startable!! ", action->id,
action->uuid, action->rsc ? action->rsc->id : "",
node_uname ? "\ton " : "", node_uname ? node_uname : "",
node_uuid ? "\t\t(" : "", node_uuid ? node_uuid : "", node_uuid ? ")" : "");
break;
}
if (details) {
GListPtr gIter = NULL;
crm_trace("\t\t====== Preceding Actions");
gIter = action->actions_before;
for (; gIter != NULL; gIter = gIter->next) {
action_wrapper_t *other = (action_wrapper_t *) gIter->data;
log_action(log_level + 1, "\t\t", other->action, FALSE);
}
crm_trace("\t\t====== Subsequent Actions");
gIter = action->actions_after;
for (; gIter != NULL; gIter = gIter->next) {
action_wrapper_t *other = (action_wrapper_t *) gIter->data;
log_action(log_level + 1, "\t\t", other->action, FALSE);
}
crm_trace("\t\t====== End");
} else {
crm_trace("\t\t(before=%d, after=%d)",
g_list_length(action->actions_before), g_list_length(action->actions_after));
}
}
gboolean
can_run_any(GHashTable * nodes)
{
GHashTableIter iter;
node_t *node = NULL;
if (nodes == NULL) {
return FALSE;
}
g_hash_table_iter_init(&iter, nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
if (can_run_resources(node) && node->weight >= 0) {
return TRUE;
}
}
return FALSE;
}
pe_action_t *
create_pseudo_resource_op(resource_t * rsc, const char *task, bool optional, bool runnable, pe_working_set_t *data_set)
{
pe_action_t *action = custom_action(rsc, generate_op_key(rsc->id, task, 0), task, NULL, optional, TRUE, data_set);
update_action_flags(action, pe_action_pseudo, __FUNCTION__, __LINE__);
update_action_flags(action, pe_action_runnable, __FUNCTION__, __LINE__);
if(runnable) {
update_action_flags(action, pe_action_runnable, __FUNCTION__, __LINE__);
}
return action;
}
/*!
* \internal
* \brief Create an executor cancel op
*
* \param[in] rsc Resource of action to cancel
* \param[in] task Name of action to cancel
* \param[in] interval_ms Interval of action to cancel
* \param[in] node Node of action to cancel
* \param[in] data_set Working set of cluster
*
* \return Created op
*/
pe_action_t *
pe_cancel_op(pe_resource_t *rsc, const char *task, guint interval_ms,
pe_node_t *node, pe_working_set_t *data_set)
{
pe_action_t *cancel_op;
char *interval_ms_s = crm_strdup_printf("%u", interval_ms);
// @TODO dangerous if possible to schedule another action with this key
char *key = generate_op_key(rsc->id, task, interval_ms);
cancel_op = custom_action(rsc, key, RSC_CANCEL, node, FALSE, TRUE,
data_set);
free(cancel_op->task);
cancel_op->task = strdup(RSC_CANCEL);
free(cancel_op->cancel_task);
cancel_op->cancel_task = strdup(task);
add_hash_param(cancel_op->meta, XML_LRM_ATTR_TASK, task);
add_hash_param(cancel_op->meta, XML_LRM_ATTR_INTERVAL_MS, interval_ms_s);
free(interval_ms_s);
return cancel_op;
}
/*!
* \internal
* \brief Create a shutdown op for a scheduler transition
*
* \param[in] node Node being shut down
* \param[in] data_set Working set of cluster
*
* \return Created op
*/
pe_action_t *
sched_shutdown_op(pe_node_t *node, pe_working_set_t *data_set)
{
char *shutdown_id = crm_strdup_printf("%s-%s", CRM_OP_SHUTDOWN,
node->details->uname);
pe_action_t *shutdown_op = custom_action(NULL, shutdown_id, CRM_OP_SHUTDOWN,
node, FALSE, TRUE, data_set);
crm_notice("Scheduling shutdown of node %s", node->details->uname);
shutdown_constraints(node, shutdown_op, data_set);
add_hash_param(shutdown_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE);
return shutdown_op;
}
static char *
generate_transition_magic(const char *transition_key, int op_status, int op_rc)
{
CRM_CHECK(transition_key != NULL, return NULL);
return crm_strdup_printf("%d:%d;%s", op_status, op_rc, transition_key);
}
static void
append_digest(lrmd_event_data_t *op, xmlNode *update, const char *version,
const char *magic, int level)
{
/* this will enable us to later determine that the
* resource's parameters have changed and we should force
* a restart
*/
char *digest = NULL;
xmlNode *args_xml = NULL;
if (op->params == NULL) {
return;
}
args_xml = create_xml_node(NULL, XML_TAG_PARAMS);
g_hash_table_foreach(op->params, hash2field, args_xml);
filter_action_parameters(args_xml, version);
digest = calculate_operation_digest(args_xml, version);
#if 0
if (level < get_crm_log_level()
&& op->interval_ms == 0 && crm_str_eq(op->op_type, CRMD_ACTION_START, TRUE)) {
char *digest_source = dump_xml_unformatted(args_xml);
do_crm_log(level, "Calculated digest %s for %s (%s). Source: %s\n",
digest, ID(update), magic, digest_source);
free(digest_source);
}
#endif
crm_xml_add(update, XML_LRM_ATTR_OP_DIGEST, digest);
free_xml(args_xml);
free(digest);
}
#define FAKE_TE_ID "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
/*!
* \internal
* \brief Create XML for resource operation history update
*
* \param[in,out] parent Parent XML node to add to
* \param[in,out] op Operation event data
* \param[in] caller_version DC feature set
* \param[in] target_rc Expected result of operation
* \param[in] node Name of node on which operation was performed
* \param[in] origin Arbitrary description of update source
* \param[in] level A log message will be logged at this level
*
* \return Newly created XML node for history update
*/
xmlNode *
pcmk__create_history_xml(xmlNode *parent, lrmd_event_data_t *op,
const char *caller_version, int target_rc,
const char *node, const char *origin, int level)
{
char *key = NULL;
char *magic = NULL;
char *op_id = NULL;
char *op_id_additional = NULL;
char *local_user_data = NULL;
const char *exit_reason = NULL;
xmlNode *xml_op = NULL;
const char *task = NULL;
CRM_CHECK(op != NULL, return NULL);
do_crm_log(level, "%s: Updating resource %s after %s op %s (interval=%u)",
origin, op->rsc_id, op->op_type, services_lrm_status_str(op->op_status),
op->interval_ms);
crm_trace("DC version: %s", caller_version);
task = op->op_type;
/* Record a successful reload as a start, and a failed reload as a monitor,
* to make life easier for the scheduler when determining the current state.
*/
if (crm_str_eq(task, "reload", TRUE)) {
if (op->op_status == PCMK_LRM_OP_DONE) {
task = CRMD_ACTION_START;
} else {
task = CRMD_ACTION_STATUS;
}
}
key = generate_op_key(op->rsc_id, task, op->interval_ms);
if (crm_str_eq(task, CRMD_ACTION_NOTIFY, TRUE)) {
const char *n_type = crm_meta_value(op->params, "notify_type");
const char *n_task = crm_meta_value(op->params, "notify_operation");
CRM_LOG_ASSERT(n_type != NULL);
CRM_LOG_ASSERT(n_task != NULL);
op_id = generate_notify_key(op->rsc_id, n_type, n_task);
if (op->op_status != PCMK_LRM_OP_PENDING) {
/* Ignore notify errors.
*
* @TODO It might be better to keep the correct result here, and
* ignore it in process_graph_event().
*/
op->op_status = PCMK_LRM_OP_DONE;
op->rc = 0;
}
} else if (did_rsc_op_fail(op, target_rc)) {
op_id = generate_op_key(op->rsc_id, "last_failure", 0);
if (op->interval_ms == 0) {
// Ensure 'last' gets updated, in case record-pending is true
op_id_additional = generate_op_key(op->rsc_id, "last", 0);
}
exit_reason = op->exit_reason;
} else if (op->interval_ms > 0) {
op_id = strdup(key);
} else {
op_id = generate_op_key(op->rsc_id, "last", 0);
}
again:
xml_op = find_entity(parent, XML_LRM_TAG_RSC_OP, op_id);
if (xml_op == NULL) {
xml_op = create_xml_node(parent, XML_LRM_TAG_RSC_OP);
}
if (op->user_data == NULL) {
crm_debug("Generating fake transition key for: " CRM_OP_FMT " %d from %s",
op->rsc_id, op->op_type, op->interval_ms,
op->call_id, origin);
local_user_data = generate_transition_key(-1, op->call_id, target_rc, FAKE_TE_ID);
op->user_data = local_user_data;
}
if(magic == NULL) {
magic = generate_transition_magic(op->user_data, op->op_status, op->rc);
}
crm_xml_add(xml_op, XML_ATTR_ID, op_id);
crm_xml_add(xml_op, XML_LRM_ATTR_TASK_KEY, key);
crm_xml_add(xml_op, XML_LRM_ATTR_TASK, task);
crm_xml_add(xml_op, XML_ATTR_ORIGIN, origin);
crm_xml_add(xml_op, XML_ATTR_CRM_VERSION, caller_version);
crm_xml_add(xml_op, XML_ATTR_TRANSITION_KEY, op->user_data);
crm_xml_add(xml_op, XML_ATTR_TRANSITION_MAGIC, magic);
crm_xml_add(xml_op, XML_LRM_ATTR_EXIT_REASON, exit_reason == NULL ? "" : exit_reason);
crm_xml_add(xml_op, XML_LRM_ATTR_TARGET, node); /* For context during triage */
crm_xml_add_int(xml_op, XML_LRM_ATTR_CALLID, op->call_id);
crm_xml_add_int(xml_op, XML_LRM_ATTR_RC, op->rc);
crm_xml_add_int(xml_op, XML_LRM_ATTR_OPSTATUS, op->op_status);
crm_xml_add_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, op->interval_ms);
if (compare_version("2.1", caller_version) <= 0) {
if (op->t_run || op->t_rcchange || op->exec_time || op->queue_time) {
crm_trace("Timing data (" CRM_OP_FMT "): last=%u change=%u exec=%u queue=%u",
op->rsc_id, op->op_type, op->interval_ms,
op->t_run, op->t_rcchange, op->exec_time, op->queue_time);
if (op->interval_ms == 0) {
- crm_xml_add_int(xml_op, XML_RSC_OP_LAST_CHANGE, op->t_run);
+ crm_xml_add_ll(xml_op, XML_RSC_OP_LAST_CHANGE,
+ (long long) op->t_run);
// @COMPAT last-run is deprecated
- crm_xml_add_int(xml_op, XML_RSC_OP_LAST_RUN, op->t_run);
+ crm_xml_add_ll(xml_op, XML_RSC_OP_LAST_RUN,
+ (long long) op->t_run);
} else if(op->t_rcchange) {
/* last-run is not accurate for recurring ops */
- crm_xml_add_int(xml_op, XML_RSC_OP_LAST_CHANGE, op->t_rcchange);
+ crm_xml_add_ll(xml_op, XML_RSC_OP_LAST_CHANGE,
+ (long long) op->t_rcchange);
} else {
/* ...but is better than nothing otherwise */
- crm_xml_add_int(xml_op, XML_RSC_OP_LAST_CHANGE, op->t_run);
+ crm_xml_add_ll(xml_op, XML_RSC_OP_LAST_CHANGE,
+ (long long) op->t_run);
}
crm_xml_add_int(xml_op, XML_RSC_OP_T_EXEC, op->exec_time);
crm_xml_add_int(xml_op, XML_RSC_OP_T_QUEUE, op->queue_time);
}
}
if (crm_str_eq(op->op_type, CRMD_ACTION_MIGRATE, TRUE)
|| crm_str_eq(op->op_type, CRMD_ACTION_MIGRATED, TRUE)) {
/*
* Record migrate_source and migrate_target always for migrate ops.
*/
const char *name = XML_LRM_ATTR_MIGRATE_SOURCE;
crm_xml_add(xml_op, name, crm_meta_value(op->params, name));
name = XML_LRM_ATTR_MIGRATE_TARGET;
crm_xml_add(xml_op, name, crm_meta_value(op->params, name));
}
append_digest(op, xml_op, caller_version, magic, LOG_DEBUG);
if (op_id_additional) {
free(op_id);
op_id = op_id_additional;
op_id_additional = NULL;
goto again;
}
if (local_user_data) {
free(local_user_data);
op->user_data = NULL;
}
free(magic);
free(op_id);
free(key);
return xml_op;
}
diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
index 79e3093c42..b2ada2f8d3 100644
--- a/lib/pengine/unpack.c
+++ b/lib/pengine/unpack.c
@@ -1,3636 +1,3637 @@
/*
* Copyright 2004-2019 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
CRM_TRACE_INIT_DATA(pe_status);
#define set_config_flag(data_set, option, flag) do { \
const char *tmp = pe_pref(data_set->config_hash, option); \
if(tmp) { \
if(crm_is_true(tmp)) { \
set_bit(data_set->flags, flag); \
} else { \
clear_bit(data_set->flags, flag); \
} \
} \
} while(0)
gboolean unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, xmlNode ** last_failure,
enum action_fail_response *failed, pe_working_set_t * data_set);
static gboolean determine_remote_online_status(pe_working_set_t * data_set, node_t * this_node);
static void add_node_attrs(xmlNode *attrs, pe_node_t *node, bool overwrite,
pe_working_set_t *data_set);
// Bitmask for warnings we only want to print once
uint32_t pe_wo = 0;
static gboolean
is_dangling_guest_node(node_t *node)
{
/* we are looking for a remote-node that was supposed to be mapped to a
* container resource, but all traces of that container have disappeared
* from both the config and the status section. */
if (pe__is_guest_or_remote_node(node) &&
node->details->remote_rsc &&
node->details->remote_rsc->container == NULL &&
is_set(node->details->remote_rsc->flags, pe_rsc_orphan_container_filler)) {
return TRUE;
}
return FALSE;
}
/*!
* \brief Schedule a fence action for a node
*
* \param[in,out] data_set Current working set of cluster
* \param[in,out] node Node to fence
* \param[in] reason Text description of why fencing is needed
*/
void
pe_fence_node(pe_working_set_t * data_set, node_t * node, const char *reason)
{
CRM_CHECK(node, return);
/* A guest node is fenced by marking its container as failed */
if (pe__is_guest_node(node)) {
resource_t *rsc = node->details->remote_rsc->container;
if (is_set(rsc->flags, pe_rsc_failed) == FALSE) {
if (!is_set(rsc->flags, pe_rsc_managed)) {
crm_notice("Not fencing guest node %s "
"(otherwise would because %s): "
"its guest resource %s is unmanaged",
node->details->uname, reason, rsc->id);
} else {
crm_warn("Guest node %s will be fenced "
"(by recovering its guest resource %s): %s",
node->details->uname, rsc->id, reason);
/* We don't mark the node as unclean because that would prevent the
* node from running resources. We want to allow it to run resources
* in this transition if the recovery succeeds.
*/
node->details->remote_requires_reset = TRUE;
set_bit(rsc->flags, pe_rsc_failed);
}
}
} else if (is_dangling_guest_node(node)) {
crm_info("Cleaning up dangling connection for guest node %s: "
"fencing was already done because %s, "
"and guest resource no longer exists",
node->details->uname, reason);
set_bit(node->details->remote_rsc->flags, pe_rsc_failed);
} else if (pe__is_remote_node(node)) {
resource_t *rsc = node->details->remote_rsc;
if (rsc && (!is_set(rsc->flags, pe_rsc_managed))) {
crm_notice("Not fencing remote node %s "
"(otherwise would because %s): connection is unmanaged",
node->details->uname, reason);
} else if(node->details->remote_requires_reset == FALSE) {
node->details->remote_requires_reset = TRUE;
crm_warn("Remote node %s %s: %s",
node->details->uname,
pe_can_fence(data_set, node)? "will be fenced" : "is unclean",
reason);
}
node->details->unclean = TRUE;
pe_fence_op(node, NULL, TRUE, reason, data_set);
} else if (node->details->unclean) {
crm_trace("Cluster node %s %s because %s",
node->details->uname,
pe_can_fence(data_set, node)? "would also be fenced" : "also is unclean",
reason);
} else {
crm_warn("Cluster node %s %s: %s",
node->details->uname,
pe_can_fence(data_set, node)? "will be fenced" : "is unclean",
reason);
node->details->unclean = TRUE;
pe_fence_op(node, NULL, TRUE, reason, data_set);
}
}
// @TODO xpaths can't handle templates, rules, or id-refs
// nvpair with provides or requires set to unfencing
#define XPATH_UNFENCING_NVPAIR XML_CIB_TAG_NVPAIR \
"[(@" XML_NVPAIR_ATTR_NAME "='" XML_RSC_ATTR_PROVIDES "'" \
"or @" XML_NVPAIR_ATTR_NAME "='" XML_RSC_ATTR_REQUIRES "') " \
"and @" XML_NVPAIR_ATTR_VALUE "='unfencing']"
// unfencing in rsc_defaults or any resource
#define XPATH_ENABLE_UNFENCING \
"/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RESOURCES \
"//" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR \
"|/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RSCCONFIG \
"/" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR
static
void set_if_xpath(unsigned long long flag, const char *xpath,
pe_working_set_t *data_set)
{
xmlXPathObjectPtr result = NULL;
if (is_not_set(data_set->flags, flag)) {
result = xpath_search(data_set->input, xpath);
if (result && (numXpathResults(result) > 0)) {
set_bit(data_set->flags, flag);
}
freeXpathObject(result);
}
}
gboolean
unpack_config(xmlNode * config, pe_working_set_t * data_set)
{
const char *value = NULL;
GHashTable *config_hash = crm_str_table_new();
data_set->config_hash = config_hash;
unpack_instance_attributes(data_set->input, config, XML_CIB_TAG_PROPSET, NULL, config_hash,
CIB_OPTIONS_FIRST, FALSE, data_set->now);
verify_pe_options(data_set->config_hash);
set_config_flag(data_set, "enable-startup-probes", pe_flag_startup_probes);
if(is_not_set(data_set->flags, pe_flag_startup_probes)) {
crm_info("Startup probes: disabled (dangerous)");
}
value = pe_pref(data_set->config_hash, XML_ATTR_HAVE_WATCHDOG);
if (value && crm_is_true(value)) {
crm_notice("Watchdog will be used via SBD if fencing is required");
set_bit(data_set->flags, pe_flag_have_stonith_resource);
}
/* Set certain flags via xpath here, so they can be used before the relevant
* configuration sections are unpacked.
*/
set_if_xpath(pe_flag_enable_unfencing, XPATH_ENABLE_UNFENCING, data_set);
value = pe_pref(data_set->config_hash, "stonith-timeout");
data_set->stonith_timeout = crm_get_msec(value);
crm_debug("STONITH timeout: %d", data_set->stonith_timeout);
set_config_flag(data_set, "stonith-enabled", pe_flag_stonith_enabled);
crm_debug("STONITH of failed nodes is %s",
is_set(data_set->flags, pe_flag_stonith_enabled) ? "enabled" : "disabled");
data_set->stonith_action = pe_pref(data_set->config_hash, "stonith-action");
if (!strcmp(data_set->stonith_action, "poweroff")) {
pe_warn_once(pe_wo_poweroff,
"Support for stonith-action of 'poweroff' is deprecated "
"and will be removed in a future release (use 'off' instead)");
data_set->stonith_action = "off";
}
crm_trace("STONITH will %s nodes", data_set->stonith_action);
set_config_flag(data_set, "concurrent-fencing", pe_flag_concurrent_fencing);
crm_debug("Concurrent fencing is %s",
is_set(data_set->flags, pe_flag_concurrent_fencing) ? "enabled" : "disabled");
set_config_flag(data_set, "stop-all-resources", pe_flag_stop_everything);
crm_debug("Stop all active resources: %s",
is_set(data_set->flags, pe_flag_stop_everything) ? "true" : "false");
set_config_flag(data_set, "symmetric-cluster", pe_flag_symmetric_cluster);
if (is_set(data_set->flags, pe_flag_symmetric_cluster)) {
crm_debug("Cluster is symmetric" " - resources can run anywhere by default");
}
value = pe_pref(data_set->config_hash, "no-quorum-policy");
if (safe_str_eq(value, "ignore")) {
data_set->no_quorum_policy = no_quorum_ignore;
} else if (safe_str_eq(value, "freeze")) {
data_set->no_quorum_policy = no_quorum_freeze;
} else if (safe_str_eq(value, "suicide")) {
if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
int do_panic = 0;
crm_element_value_int(data_set->input, XML_ATTR_QUORUM_PANIC,
&do_panic);
if (do_panic || is_set(data_set->flags, pe_flag_have_quorum)) {
data_set->no_quorum_policy = no_quorum_suicide;
} else {
crm_notice("Resetting no-quorum-policy to 'stop': cluster has never had quorum");
data_set->no_quorum_policy = no_quorum_stop;
}
} else {
crm_config_err("Resetting no-quorum-policy to 'stop': stonith is not configured");
data_set->no_quorum_policy = no_quorum_stop;
}
} else {
data_set->no_quorum_policy = no_quorum_stop;
}
switch (data_set->no_quorum_policy) {
case no_quorum_freeze:
crm_debug("On loss of quorum: Freeze resources");
break;
case no_quorum_stop:
crm_debug("On loss of quorum: Stop ALL resources");
break;
case no_quorum_suicide:
crm_notice("On loss of quorum: Fence all remaining nodes");
break;
case no_quorum_ignore:
crm_notice("On loss of quorum: Ignore");
break;
}
set_config_flag(data_set, "stop-orphan-resources", pe_flag_stop_rsc_orphans);
crm_trace("Orphan resources are %s",
is_set(data_set->flags, pe_flag_stop_rsc_orphans) ? "stopped" : "ignored");
set_config_flag(data_set, "stop-orphan-actions", pe_flag_stop_action_orphans);
crm_trace("Orphan resource actions are %s",
is_set(data_set->flags, pe_flag_stop_action_orphans) ? "stopped" : "ignored");
set_config_flag(data_set, "remove-after-stop", pe_flag_remove_after_stop);
crm_trace("Stopped resources are removed from the status section: %s",
is_set(data_set->flags, pe_flag_remove_after_stop) ? "true" : "false");
set_config_flag(data_set, "maintenance-mode", pe_flag_maintenance_mode);
crm_trace("Maintenance mode: %s",
is_set(data_set->flags, pe_flag_maintenance_mode) ? "true" : "false");
set_config_flag(data_set, "start-failure-is-fatal", pe_flag_start_failure_fatal);
crm_trace("Start failures are %s",
is_set(data_set->flags,
pe_flag_start_failure_fatal) ? "always fatal" : "handled by failcount");
if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
set_config_flag(data_set, "startup-fencing", pe_flag_startup_fencing);
}
if (is_set(data_set->flags, pe_flag_startup_fencing)) {
crm_trace("Unseen nodes will be fenced");
} else {
pe_warn_once(pe_wo_blind, "Blind faith: not fencing unseen nodes");
}
node_score_red = char2score(pe_pref(data_set->config_hash, "node-health-red"));
node_score_green = char2score(pe_pref(data_set->config_hash, "node-health-green"));
node_score_yellow = char2score(pe_pref(data_set->config_hash, "node-health-yellow"));
crm_debug("Node scores: 'red' = %s, 'yellow' = %s, 'green' = %s",
pe_pref(data_set->config_hash, "node-health-red"),
pe_pref(data_set->config_hash, "node-health-yellow"),
pe_pref(data_set->config_hash, "node-health-green"));
data_set->placement_strategy = pe_pref(data_set->config_hash, "placement-strategy");
crm_trace("Placement strategy: %s", data_set->placement_strategy);
return TRUE;
}
static void
destroy_digest_cache(gpointer ptr)
{
op_digest_cache_t *data = ptr;
free_xml(data->params_all);
free_xml(data->params_secure);
free_xml(data->params_restart);
free(data->digest_all_calc);
free(data->digest_restart_calc);
free(data->digest_secure_calc);
free(data);
}
node_t *
pe_create_node(const char *id, const char *uname, const char *type,
const char *score, pe_working_set_t * data_set)
{
node_t *new_node = NULL;
if (pe_find_node(data_set->nodes, uname) != NULL) {
crm_config_warn("Detected multiple node entries with uname=%s"
" - this is rarely intended", uname);
}
new_node = calloc(1, sizeof(node_t));
if (new_node == NULL) {
return NULL;
}
new_node->weight = char2score(score);
new_node->fixed = FALSE;
new_node->details = calloc(1, sizeof(struct pe_node_shared_s));
if (new_node->details == NULL) {
free(new_node);
return NULL;
}
crm_trace("Creating node for entry %s/%s", uname, id);
new_node->details->id = id;
new_node->details->uname = uname;
new_node->details->online = FALSE;
new_node->details->shutdown = FALSE;
new_node->details->rsc_discovery_enabled = TRUE;
new_node->details->running_rsc = NULL;
new_node->details->type = node_ping;
if (safe_str_eq(type, "remote")) {
new_node->details->type = node_remote;
set_bit(data_set->flags, pe_flag_have_remote_nodes);
} else if ((type == NULL) || safe_str_eq(type, "member")) {
new_node->details->type = node_member;
}
new_node->details->attrs = crm_str_table_new();
if (pe__is_guest_or_remote_node(new_node)) {
g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND),
strdup("remote"));
} else {
g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND),
strdup("cluster"));
}
new_node->details->utilization = crm_str_table_new();
new_node->details->digest_cache = g_hash_table_new_full(crm_str_hash,
g_str_equal, free,
destroy_digest_cache);
data_set->nodes = g_list_insert_sorted(data_set->nodes, new_node, sort_node_uname);
return new_node;
}
bool
remote_id_conflict(const char *remote_name, pe_working_set_t *data)
{
bool match = FALSE;
#if 1
pe_find_resource(data->resources, remote_name);
#else
if (data->name_check == NULL) {
data->name_check = g_hash_table_new(crm_str_hash, g_str_equal);
for (xml_rsc = __xml_first_child_element(parent); xml_rsc != NULL;
xml_rsc = __xml_next_element(xml_rsc)) {
const char *id = ID(xml_rsc);
/* avoiding heap allocation here because we know the duration of this hashtable allows us to */
g_hash_table_insert(data->name_check, (char *) id, (char *) id);
}
}
if (g_hash_table_lookup(data->name_check, remote_name)) {
match = TRUE;
}
#endif
if (match) {
crm_err("Invalid remote-node name, a resource called '%s' already exists.", remote_name);
return NULL;
}
return match;
}
static const char *
expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pe_working_set_t *data)
{
xmlNode *attr_set = NULL;
xmlNode *attr = NULL;
const char *container_id = ID(xml_obj);
const char *remote_name = NULL;
const char *remote_server = NULL;
const char *remote_port = NULL;
const char *connect_timeout = "60s";
const char *remote_allow_migrate=NULL;
const char *is_managed = NULL;
for (attr_set = __xml_first_child_element(xml_obj); attr_set != NULL;
attr_set = __xml_next_element(attr_set)) {
if (safe_str_neq((const char *)attr_set->name, XML_TAG_META_SETS)) {
continue;
}
for (attr = __xml_first_child_element(attr_set); attr != NULL;
attr = __xml_next_element(attr)) {
const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE);
const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME);
if (safe_str_eq(name, XML_RSC_ATTR_REMOTE_NODE)) {
remote_name = value;
} else if (safe_str_eq(name, "remote-addr")) {
remote_server = value;
} else if (safe_str_eq(name, "remote-port")) {
remote_port = value;
} else if (safe_str_eq(name, "remote-connect-timeout")) {
connect_timeout = value;
} else if (safe_str_eq(name, "remote-allow-migrate")) {
remote_allow_migrate=value;
} else if (safe_str_eq(name, XML_RSC_ATTR_MANAGED)) {
is_managed = value;
}
}
}
if (remote_name == NULL) {
return NULL;
}
if (remote_id_conflict(remote_name, data)) {
return NULL;
}
pe_create_remote_xml(parent, remote_name, container_id,
remote_allow_migrate, is_managed,
connect_timeout, remote_server, remote_port);
return remote_name;
}
static void
handle_startup_fencing(pe_working_set_t *data_set, node_t *new_node)
{
if ((new_node->details->type == node_remote) && (new_node->details->remote_rsc == NULL)) {
/* Ignore fencing for remote nodes that don't have a connection resource
* associated with them. This happens when remote node entries get left
* in the nodes section after the connection resource is removed.
*/
return;
}
if (is_set(data_set->flags, pe_flag_startup_fencing)) {
// All nodes are unclean until we've seen their status entry
new_node->details->unclean = TRUE;
} else {
// Blind faith ...
new_node->details->unclean = FALSE;
}
/* We need to be able to determine if a node's status section
* exists or not separate from whether the node is unclean. */
new_node->details->unseen = TRUE;
}
gboolean
unpack_nodes(xmlNode * xml_nodes, pe_working_set_t * data_set)
{
xmlNode *xml_obj = NULL;
node_t *new_node = NULL;
const char *id = NULL;
const char *uname = NULL;
const char *type = NULL;
const char *score = NULL;
for (xml_obj = __xml_first_child_element(xml_nodes); xml_obj != NULL;
xml_obj = __xml_next_element(xml_obj)) {
if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_NODE, TRUE)) {
new_node = NULL;
id = crm_element_value(xml_obj, XML_ATTR_ID);
uname = crm_element_value(xml_obj, XML_ATTR_UNAME);
type = crm_element_value(xml_obj, XML_ATTR_TYPE);
score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE);
crm_trace("Processing node %s/%s", uname, id);
if (id == NULL) {
crm_config_err("Must specify id tag in ");
continue;
}
new_node = pe_create_node(id, uname, type, score, data_set);
if (new_node == NULL) {
return FALSE;
}
/* if(data_set->have_quorum == FALSE */
/* && data_set->no_quorum_policy == no_quorum_stop) { */
/* /\* start shutting resources down *\/ */
/* new_node->weight = -INFINITY; */
/* } */
handle_startup_fencing(data_set, new_node);
add_node_attrs(xml_obj, new_node, FALSE, data_set);
unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_UTILIZATION, NULL,
new_node->details->utilization, NULL, FALSE, data_set->now);
crm_trace("Done with node %s", crm_element_value(xml_obj, XML_ATTR_UNAME));
}
}
if (data_set->localhost && pe_find_node(data_set->nodes, data_set->localhost) == NULL) {
crm_info("Creating a fake local node");
pe_create_node(data_set->localhost, data_set->localhost, NULL, 0,
data_set);
}
return TRUE;
}
static void
setup_container(resource_t * rsc, pe_working_set_t * data_set)
{
const char *container_id = NULL;
if (rsc->children) {
GListPtr gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
resource_t *child_rsc = (resource_t *) gIter->data;
setup_container(child_rsc, data_set);
}
return;
}
container_id = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_CONTAINER);
if (container_id && safe_str_neq(container_id, rsc->id)) {
resource_t *container = pe_find_resource(data_set->resources, container_id);
if (container) {
rsc->container = container;
set_bit(container->flags, pe_rsc_is_container);
container->fillers = g_list_append(container->fillers, rsc);
pe_rsc_trace(rsc, "Resource %s's container is %s", rsc->id, container_id);
} else {
pe_err("Resource %s: Unknown resource container (%s)", rsc->id, container_id);
}
}
}
gboolean
unpack_remote_nodes(xmlNode * xml_resources, pe_working_set_t * data_set)
{
xmlNode *xml_obj = NULL;
/* Create remote nodes and guest nodes from the resource configuration
* before unpacking resources.
*/
for (xml_obj = __xml_first_child_element(xml_resources); xml_obj != NULL;
xml_obj = __xml_next_element(xml_obj)) {
const char *new_node_id = NULL;
/* Check for remote nodes, which are defined by ocf:pacemaker:remote
* primitives.
*/
if (xml_contains_remote_node(xml_obj)) {
new_node_id = ID(xml_obj);
/* The "pe_find_node" check is here to make sure we don't iterate over
* an expanded node that has already been added to the node list. */
if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) {
crm_trace("Found remote node %s defined by resource %s",
new_node_id, ID(xml_obj));
pe_create_node(new_node_id, new_node_id, "remote", NULL,
data_set);
}
continue;
}
/* Check for guest nodes, which are defined by special meta-attributes
* of a primitive of any type (for example, VirtualDomain or Xen).
*/
if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_RESOURCE, TRUE)) {
/* This will add an ocf:pacemaker:remote primitive to the
* configuration for the guest node's connection, to be unpacked
* later.
*/
new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources, data_set);
if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) {
crm_trace("Found guest node %s in resource %s",
new_node_id, ID(xml_obj));
pe_create_node(new_node_id, new_node_id, "remote", NULL,
data_set);
}
continue;
}
/* Check for guest nodes inside a group. Clones are currently not
* supported as guest nodes.
*/
if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_GROUP, TRUE)) {
xmlNode *xml_obj2 = NULL;
for (xml_obj2 = __xml_first_child_element(xml_obj); xml_obj2 != NULL;
xml_obj2 = __xml_next_element(xml_obj2)) {
new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources, data_set);
if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) {
crm_trace("Found guest node %s in resource %s inside group %s",
new_node_id, ID(xml_obj2), ID(xml_obj));
pe_create_node(new_node_id, new_node_id, "remote", NULL,
data_set);
}
}
}
}
return TRUE;
}
/* Call this after all the nodes and resources have been
* unpacked, but before the status section is read.
*
* A remote node's online status is reflected by the state
* of the remote node's connection resource. We need to link
* the remote node to this connection resource so we can have
* easy access to the connection resource during the PE calculations.
*/
static void
link_rsc2remotenode(pe_working_set_t *data_set, resource_t *new_rsc)
{
node_t *remote_node = NULL;
if (new_rsc->is_remote_node == FALSE) {
return;
}
if (is_set(data_set->flags, pe_flag_quick_location)) {
/* remote_nodes and remote_resources are not linked in quick location calculations */
return;
}
print_resource(LOG_TRACE, "Linking remote-node connection resource, ", new_rsc, FALSE);
remote_node = pe_find_node(data_set->nodes, new_rsc->id);
CRM_CHECK(remote_node != NULL, return;);
remote_node->details->remote_rsc = new_rsc;
if (new_rsc->container == NULL) {
/* Handle start-up fencing for remote nodes (as opposed to guest nodes)
* the same as is done for cluster nodes.
*/
handle_startup_fencing(data_set, remote_node);
} else {
/* pe_create_node() marks the new node as "remote" or "cluster"; now
* that we know the node is a guest node, update it correctly.
*/
g_hash_table_replace(remote_node->details->attrs, strdup(CRM_ATTR_KIND),
strdup("container"));
}
}
static void
destroy_tag(gpointer data)
{
tag_t *tag = data;
if (tag) {
free(tag->id);
g_list_free_full(tag->refs, free);
free(tag);
}
}
/*!
* \internal
* \brief Parse configuration XML for resource information
*
* \param[in] xml_resources Top of resource configuration XML
* \param[in,out] data_set Where to put resource information
*
* \return TRUE
*
* \note unpack_remote_nodes() MUST be called before this, so that the nodes can
* be used when common_unpack() calls resource_location()
*/
gboolean
unpack_resources(xmlNode * xml_resources, pe_working_set_t * data_set)
{
xmlNode *xml_obj = NULL;
GListPtr gIter = NULL;
data_set->template_rsc_sets = g_hash_table_new_full(crm_str_hash,
g_str_equal, free,
destroy_tag);
for (xml_obj = __xml_first_child_element(xml_resources); xml_obj != NULL;
xml_obj = __xml_next_element(xml_obj)) {
resource_t *new_rsc = NULL;
if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_RSC_TEMPLATE, TRUE)) {
const char *template_id = ID(xml_obj);
if (template_id && g_hash_table_lookup_extended(data_set->template_rsc_sets,
template_id, NULL, NULL) == FALSE) {
/* Record the template's ID for the knowledge of its existence anyway. */
g_hash_table_insert(data_set->template_rsc_sets, strdup(template_id), NULL);
}
continue;
}
crm_trace("Beginning unpack... <%s id=%s... >", crm_element_name(xml_obj), ID(xml_obj));
if (common_unpack(xml_obj, &new_rsc, NULL, data_set)) {
data_set->resources = g_list_append(data_set->resources, new_rsc);
print_resource(LOG_TRACE, "Added ", new_rsc, FALSE);
} else {
crm_config_err("Failed unpacking %s %s",
crm_element_name(xml_obj), crm_element_value(xml_obj, XML_ATTR_ID));
if (new_rsc != NULL && new_rsc->fns != NULL) {
new_rsc->fns->free(new_rsc);
}
}
}
for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
setup_container(rsc, data_set);
link_rsc2remotenode(data_set, rsc);
}
data_set->resources = g_list_sort(data_set->resources, sort_rsc_priority);
if (is_set(data_set->flags, pe_flag_quick_location)) {
/* Ignore */
} else if (is_set(data_set->flags, pe_flag_stonith_enabled)
&& is_set(data_set->flags, pe_flag_have_stonith_resource) == FALSE) {
crm_config_err("Resource start-up disabled since no STONITH resources have been defined");
crm_config_err("Either configure some or disable STONITH with the stonith-enabled option");
crm_config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity");
}
return TRUE;
}
gboolean
unpack_tags(xmlNode * xml_tags, pe_working_set_t * data_set)
{
xmlNode *xml_tag = NULL;
data_set->tags = g_hash_table_new_full(crm_str_hash, g_str_equal, free,
destroy_tag);
for (xml_tag = __xml_first_child_element(xml_tags); xml_tag != NULL;
xml_tag = __xml_next_element(xml_tag)) {
xmlNode *xml_obj_ref = NULL;
const char *tag_id = ID(xml_tag);
if (crm_str_eq((const char *)xml_tag->name, XML_CIB_TAG_TAG, TRUE) == FALSE) {
continue;
}
if (tag_id == NULL) {
crm_config_err("Failed unpacking %s: %s should be specified",
crm_element_name(xml_tag), XML_ATTR_ID);
continue;
}
for (xml_obj_ref = __xml_first_child_element(xml_tag); xml_obj_ref != NULL;
xml_obj_ref = __xml_next_element(xml_obj_ref)) {
const char *obj_ref = ID(xml_obj_ref);
if (crm_str_eq((const char *)xml_obj_ref->name, XML_CIB_TAG_OBJ_REF, TRUE) == FALSE) {
continue;
}
if (obj_ref == NULL) {
crm_config_err("Failed unpacking %s for tag %s: %s should be specified",
crm_element_name(xml_obj_ref), tag_id, XML_ATTR_ID);
continue;
}
if (add_tag_ref(data_set->tags, tag_id, obj_ref) == FALSE) {
return FALSE;
}
}
}
return TRUE;
}
/* The ticket state section:
* "/cib/status/tickets/ticket_state" */
static gboolean
unpack_ticket_state(xmlNode * xml_ticket, pe_working_set_t * data_set)
{
const char *ticket_id = NULL;
const char *granted = NULL;
const char *last_granted = NULL;
const char *standby = NULL;
xmlAttrPtr xIter = NULL;
ticket_t *ticket = NULL;
ticket_id = ID(xml_ticket);
if (ticket_id == NULL || strlen(ticket_id) == 0) {
return FALSE;
}
crm_trace("Processing ticket state for %s", ticket_id);
ticket = g_hash_table_lookup(data_set->tickets, ticket_id);
if (ticket == NULL) {
ticket = ticket_new(ticket_id, data_set);
if (ticket == NULL) {
return FALSE;
}
}
for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) {
const char *prop_name = (const char *)xIter->name;
const char *prop_value = crm_element_value(xml_ticket, prop_name);
if (crm_str_eq(prop_name, XML_ATTR_ID, TRUE)) {
continue;
}
g_hash_table_replace(ticket->state, strdup(prop_name), strdup(prop_value));
}
granted = g_hash_table_lookup(ticket->state, "granted");
if (granted && crm_is_true(granted)) {
ticket->granted = TRUE;
crm_info("We have ticket '%s'", ticket->id);
} else {
ticket->granted = FALSE;
crm_info("We do not have ticket '%s'", ticket->id);
}
last_granted = g_hash_table_lookup(ticket->state, "last-granted");
if (last_granted) {
ticket->last_granted = crm_parse_int(last_granted, 0);
}
standby = g_hash_table_lookup(ticket->state, "standby");
if (standby && crm_is_true(standby)) {
ticket->standby = TRUE;
if (ticket->granted) {
crm_info("Granted ticket '%s' is in standby-mode", ticket->id);
}
} else {
ticket->standby = FALSE;
}
crm_trace("Done with ticket state for %s", ticket_id);
return TRUE;
}
static gboolean
unpack_tickets_state(xmlNode * xml_tickets, pe_working_set_t * data_set)
{
xmlNode *xml_obj = NULL;
for (xml_obj = __xml_first_child_element(xml_tickets); xml_obj != NULL;
xml_obj = __xml_next_element(xml_obj)) {
if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_TICKET_STATE, TRUE) == FALSE) {
continue;
}
unpack_ticket_state(xml_obj, data_set);
}
return TRUE;
}
static void
unpack_handle_remote_attrs(node_t *this_node, xmlNode *state, pe_working_set_t * data_set)
{
const char *resource_discovery_enabled = NULL;
xmlNode *attrs = NULL;
resource_t *rsc = NULL;
if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) {
return;
}
if ((this_node == NULL) || !pe__is_guest_or_remote_node(this_node)) {
return;
}
crm_trace("Processing remote node id=%s, uname=%s", this_node->details->id, this_node->details->uname);
this_node->details->remote_maintenance =
crm_atoi(crm_element_value(state, XML_NODE_IS_MAINTENANCE), "0");
rsc = this_node->details->remote_rsc;
if (this_node->details->remote_requires_reset == FALSE) {
this_node->details->unclean = FALSE;
this_node->details->unseen = FALSE;
}
attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE);
add_node_attrs(attrs, this_node, TRUE, data_set);
if (pe__shutdown_requested(this_node)) {
crm_info("Node %s is shutting down", this_node->details->uname);
this_node->details->shutdown = TRUE;
if (rsc) {
rsc->next_role = RSC_ROLE_STOPPED;
}
}
if (crm_is_true(pe_node_attribute_raw(this_node, "standby"))) {
crm_info("Node %s is in standby-mode", this_node->details->uname);
this_node->details->standby = TRUE;
}
if (crm_is_true(pe_node_attribute_raw(this_node, "maintenance")) ||
(rsc && !is_set(rsc->flags, pe_rsc_managed))) {
crm_info("Node %s is in maintenance-mode", this_node->details->uname);
this_node->details->maintenance = TRUE;
}
resource_discovery_enabled = pe_node_attribute_raw(this_node, XML_NODE_ATTR_RSC_DISCOVERY);
if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) {
if (pe__is_remote_node(this_node)
&& is_not_set(data_set->flags, pe_flag_stonith_enabled)) {
crm_warn("Ignoring %s attribute on remote node %s because stonith is disabled",
XML_NODE_ATTR_RSC_DISCOVERY, this_node->details->uname);
} else {
/* This is either a remote node with fencing enabled, or a guest
* node. We don't care whether fencing is enabled when fencing guest
* nodes, because they are "fenced" by recovering their containing
* resource.
*/
crm_info("Node %s has resource discovery disabled", this_node->details->uname);
this_node->details->rsc_discovery_enabled = FALSE;
}
}
}
static bool
unpack_node_loop(xmlNode * status, bool fence, pe_working_set_t * data_set)
{
bool changed = false;
xmlNode *lrm_rsc = NULL;
for (xmlNode *state = __xml_first_child_element(status); state != NULL;
state = __xml_next_element(state)) {
const char *id = NULL;
const char *uname = NULL;
node_t *this_node = NULL;
bool process = FALSE;
if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) {
continue;
}
id = crm_element_value(state, XML_ATTR_ID);
uname = crm_element_value(state, XML_ATTR_UNAME);
this_node = pe_find_node_any(data_set->nodes, id, uname);
if (this_node == NULL) {
crm_info("Node %s is unknown", id);
continue;
} else if (this_node->details->unpacked) {
crm_info("Node %s is already processed", id);
continue;
} else if (!pe__is_guest_or_remote_node(this_node)
&& is_set(data_set->flags, pe_flag_stonith_enabled)) {
// A redundant test, but preserves the order for regression tests
process = TRUE;
} else if (pe__is_guest_or_remote_node(this_node)) {
bool check = FALSE;
resource_t *rsc = this_node->details->remote_rsc;
if(fence) {
check = TRUE;
} else if(rsc == NULL) {
/* Not ready yet */
} else if (pe__is_guest_node(this_node)
&& rsc->role == RSC_ROLE_STARTED
&& rsc->container->role == RSC_ROLE_STARTED) {
/* Both the connection and its containing resource need to be
* known to be up before we process resources running in it.
*/
check = TRUE;
crm_trace("Checking node %s/%s/%s status %d/%d/%d", id, rsc->id, rsc->container->id, fence, rsc->role, RSC_ROLE_STARTED);
} else if (!pe__is_guest_node(this_node)
&& rsc->role == RSC_ROLE_STARTED) {
check = TRUE;
crm_trace("Checking node %s/%s status %d/%d/%d", id, rsc->id, fence, rsc->role, RSC_ROLE_STARTED);
}
if (check) {
determine_remote_online_status(data_set, this_node);
unpack_handle_remote_attrs(this_node, state, data_set);
process = TRUE;
}
} else if (this_node->details->online) {
process = TRUE;
} else if (fence) {
process = TRUE;
}
if(process) {
crm_trace("Processing lrm resource entries on %shealthy%s node: %s",
fence?"un":"",
(pe__is_guest_or_remote_node(this_node)? " remote" : ""),
this_node->details->uname);
changed = TRUE;
this_node->details->unpacked = TRUE;
lrm_rsc = find_xml_node(state, XML_CIB_TAG_LRM, FALSE);
lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE);
unpack_lrm_resources(this_node, lrm_rsc, data_set);
}
}
return changed;
}
/* remove nodes that are down, stopping */
/* create positive rsc_to_node constraints between resources and the nodes they are running on */
/* anything else? */
gboolean
unpack_status(xmlNode * status, pe_working_set_t * data_set)
{
const char *id = NULL;
const char *uname = NULL;
xmlNode *state = NULL;
node_t *this_node = NULL;
crm_trace("Beginning unpack");
if (data_set->tickets == NULL) {
data_set->tickets = g_hash_table_new_full(crm_str_hash, g_str_equal,
free, destroy_ticket);
}
for (state = __xml_first_child_element(status); state != NULL;
state = __xml_next_element(state)) {
if (crm_str_eq((const char *)state->name, XML_CIB_TAG_TICKETS, TRUE)) {
unpack_tickets_state((xmlNode *) state, data_set);
} else if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE)) {
xmlNode *attrs = NULL;
const char *resource_discovery_enabled = NULL;
id = crm_element_value(state, XML_ATTR_ID);
uname = crm_element_value(state, XML_ATTR_UNAME);
this_node = pe_find_node_any(data_set->nodes, id, uname);
if (uname == NULL) {
/* error */
continue;
} else if (this_node == NULL) {
crm_config_warn("Node %s in status section no longer exists", uname);
continue;
} else if (pe__is_guest_or_remote_node(this_node)) {
/* online state for remote nodes is determined by the
* rsc state after all the unpacking is done. we do however
* need to mark whether or not the node has been fenced as this plays
* a role during unpacking cluster node resource state */
this_node->details->remote_was_fenced =
crm_atoi(crm_element_value(state, XML_NODE_IS_FENCED), "0");
continue;
}
crm_trace("Processing node id=%s, uname=%s", id, uname);
/* Mark the node as provisionally clean
* - at least we have seen it in the current cluster's lifetime
*/
this_node->details->unclean = FALSE;
this_node->details->unseen = FALSE;
attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE);
add_node_attrs(attrs, this_node, TRUE, data_set);
if (crm_is_true(pe_node_attribute_raw(this_node, "standby"))) {
crm_info("Node %s is in standby-mode", this_node->details->uname);
this_node->details->standby = TRUE;
}
if (crm_is_true(pe_node_attribute_raw(this_node, "maintenance"))) {
crm_info("Node %s is in maintenance-mode", this_node->details->uname);
this_node->details->maintenance = TRUE;
}
resource_discovery_enabled = pe_node_attribute_raw(this_node, XML_NODE_ATTR_RSC_DISCOVERY);
if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) {
crm_warn("ignoring %s attribute on node %s, disabling resource discovery is not allowed on cluster nodes",
XML_NODE_ATTR_RSC_DISCOVERY, this_node->details->uname);
}
crm_trace("determining node state");
determine_online_status(state, this_node, data_set);
if (is_not_set(data_set->flags, pe_flag_have_quorum)
&& this_node->details->online
&& (data_set->no_quorum_policy == no_quorum_suicide)) {
/* Everything else should flow from this automatically
* At least until the PE becomes able to migrate off healthy resources
*/
pe_fence_node(data_set, this_node, "cluster does not have quorum");
}
}
}
while(unpack_node_loop(status, FALSE, data_set)) {
crm_trace("Start another loop");
}
// Now catch any nodes we didn't see
unpack_node_loop(status, is_set(data_set->flags, pe_flag_stonith_enabled), data_set);
/* Now that we know where resources are, we can schedule stops of containers
* with failed bundle connections
*/
if (data_set->stop_needed != NULL) {
for (GList *item = data_set->stop_needed; item; item = item->next) {
pe_resource_t *container = item->data;
pe_node_t *node = pe__current_node(container);
if (node) {
stop_action(container, node, FALSE);
}
}
g_list_free(data_set->stop_needed);
data_set->stop_needed = NULL;
}
for (GListPtr gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
node_t *this_node = gIter->data;
if (this_node == NULL) {
continue;
} else if (!pe__is_guest_or_remote_node(this_node)) {
continue;
} else if(this_node->details->unpacked) {
continue;
}
determine_remote_online_status(data_set, this_node);
}
return TRUE;
}
static gboolean
determine_online_status_no_fencing(pe_working_set_t * data_set, xmlNode * node_state,
node_t * this_node)
{
gboolean online = FALSE;
const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE);
const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER);
const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
if (!crm_is_true(in_cluster)) {
crm_trace("Node is down: in_cluster=%s", crm_str(in_cluster));
} else if (safe_str_eq(is_peer, ONLINESTATUS)) {
if (safe_str_eq(join, CRMD_JOINSTATE_MEMBER)) {
online = TRUE;
} else {
crm_debug("Node is not ready to run resources: %s", join);
}
} else if (this_node->details->expected_up == FALSE) {
crm_trace("Controller is down: in_cluster=%s", crm_str(in_cluster));
crm_trace("\tis_peer=%s, join=%s, expected=%s",
crm_str(is_peer), crm_str(join), crm_str(exp_state));
} else {
/* mark it unclean */
pe_fence_node(data_set, this_node, "peer is unexpectedly down");
crm_info("\tin_cluster=%s, is_peer=%s, join=%s, expected=%s",
crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state));
}
return online;
}
static gboolean
determine_online_status_fencing(pe_working_set_t * data_set, xmlNode * node_state,
node_t * this_node)
{
gboolean online = FALSE;
gboolean do_terminate = FALSE;
bool crmd_online = FALSE;
const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE);
const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER);
const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
const char *terminate = pe_node_attribute_raw(this_node, "terminate");
/*
- XML_NODE_IN_CLUSTER ::= true|false
- XML_NODE_IS_PEER ::= online|offline
- XML_NODE_JOIN_STATE ::= member|down|pending|banned
- XML_NODE_EXPECTED ::= member|down
*/
if (crm_is_true(terminate)) {
do_terminate = TRUE;
} else if (terminate != NULL && strlen(terminate) > 0) {
/* could be a time() value */
char t = terminate[0];
if (t != '0' && isdigit(t)) {
do_terminate = TRUE;
}
}
crm_trace("%s: in_cluster=%s, is_peer=%s, join=%s, expected=%s, term=%d",
this_node->details->uname, crm_str(in_cluster), crm_str(is_peer),
crm_str(join), crm_str(exp_state), do_terminate);
online = crm_is_true(in_cluster);
crmd_online = safe_str_eq(is_peer, ONLINESTATUS);
if (exp_state == NULL) {
exp_state = CRMD_JOINSTATE_DOWN;
}
if (this_node->details->shutdown) {
crm_debug("%s is shutting down", this_node->details->uname);
/* Slightly different criteria since we can't shut down a dead peer */
online = crmd_online;
} else if (in_cluster == NULL) {
pe_fence_node(data_set, this_node, "peer has not been seen by the cluster");
} else if (safe_str_eq(join, CRMD_JOINSTATE_NACK)) {
pe_fence_node(data_set, this_node, "peer failed the pacemaker membership criteria");
} else if (do_terminate == FALSE && safe_str_eq(exp_state, CRMD_JOINSTATE_DOWN)) {
if (crm_is_true(in_cluster) || crmd_online) {
crm_info("- Node %s is not ready to run resources", this_node->details->uname);
this_node->details->standby = TRUE;
this_node->details->pending = TRUE;
} else {
crm_trace("%s is down or still coming up", this_node->details->uname);
}
} else if (do_terminate && safe_str_eq(join, CRMD_JOINSTATE_DOWN)
&& crm_is_true(in_cluster) == FALSE && !crmd_online) {
crm_info("Node %s was just shot", this_node->details->uname);
online = FALSE;
} else if (crm_is_true(in_cluster) == FALSE) {
pe_fence_node(data_set, this_node, "peer is no longer part of the cluster");
} else if (!crmd_online) {
pe_fence_node(data_set, this_node, "peer process is no longer available");
/* Everything is running at this point, now check join state */
} else if (do_terminate) {
pe_fence_node(data_set, this_node, "termination was requested");
} else if (safe_str_eq(join, CRMD_JOINSTATE_MEMBER)) {
crm_info("Node %s is active", this_node->details->uname);
} else if (safe_str_eq(join, CRMD_JOINSTATE_PENDING)
|| safe_str_eq(join, CRMD_JOINSTATE_DOWN)) {
crm_info("Node %s is not ready to run resources", this_node->details->uname);
this_node->details->standby = TRUE;
this_node->details->pending = TRUE;
} else {
pe_fence_node(data_set, this_node, "peer was in an unknown state");
crm_warn("%s: in-cluster=%s, is-peer=%s, join=%s, expected=%s, term=%d, shutdown=%d",
this_node->details->uname, crm_str(in_cluster), crm_str(is_peer),
crm_str(join), crm_str(exp_state), do_terminate, this_node->details->shutdown);
}
return online;
}
static gboolean
determine_remote_online_status(pe_working_set_t * data_set, node_t * this_node)
{
resource_t *rsc = this_node->details->remote_rsc;
resource_t *container = NULL;
pe_node_t *host = NULL;
/* If there is a node state entry for a (former) Pacemaker Remote node
* but no resource creating that node, the node's connection resource will
* be NULL. Consider it an offline remote node in that case.
*/
if (rsc == NULL) {
this_node->details->online = FALSE;
goto remote_online_done;
}
container = rsc->container;
if (container && (g_list_length(rsc->running_on) == 1)) {
host = rsc->running_on->data;
}
/* If the resource is currently started, mark it online. */
if (rsc->role == RSC_ROLE_STARTED) {
crm_trace("%s node %s presumed ONLINE because connection resource is started",
(container? "Guest" : "Remote"), this_node->details->id);
this_node->details->online = TRUE;
}
/* consider this node shutting down if transitioning start->stop */
if (rsc->role == RSC_ROLE_STARTED && rsc->next_role == RSC_ROLE_STOPPED) {
crm_trace("%s node %s shutting down because connection resource is stopping",
(container? "Guest" : "Remote"), this_node->details->id);
this_node->details->shutdown = TRUE;
}
/* Now check all the failure conditions. */
if(container && is_set(container->flags, pe_rsc_failed)) {
crm_trace("Guest node %s UNCLEAN because guest resource failed",
this_node->details->id);
this_node->details->online = FALSE;
this_node->details->remote_requires_reset = TRUE;
} else if(is_set(rsc->flags, pe_rsc_failed)) {
crm_trace("%s node %s OFFLINE because connection resource failed",
(container? "Guest" : "Remote"), this_node->details->id);
this_node->details->online = FALSE;
} else if (rsc->role == RSC_ROLE_STOPPED
|| (container && container->role == RSC_ROLE_STOPPED)) {
crm_trace("%s node %s OFFLINE because its resource is stopped",
(container? "Guest" : "Remote"), this_node->details->id);
this_node->details->online = FALSE;
this_node->details->remote_requires_reset = FALSE;
} else if (host && (host->details->online == FALSE)
&& host->details->unclean) {
crm_trace("Guest node %s UNCLEAN because host is unclean",
this_node->details->id);
this_node->details->online = FALSE;
this_node->details->remote_requires_reset = TRUE;
}
remote_online_done:
crm_trace("Remote node %s online=%s",
this_node->details->id, this_node->details->online ? "TRUE" : "FALSE");
return this_node->details->online;
}
gboolean
determine_online_status(xmlNode * node_state, node_t * this_node, pe_working_set_t * data_set)
{
gboolean online = FALSE;
const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
if (this_node == NULL) {
crm_config_err("No node to check");
return online;
}
this_node->details->shutdown = FALSE;
this_node->details->expected_up = FALSE;
if (pe__shutdown_requested(this_node)) {
this_node->details->shutdown = TRUE;
} else if (safe_str_eq(exp_state, CRMD_JOINSTATE_MEMBER)) {
this_node->details->expected_up = TRUE;
}
if (this_node->details->type == node_ping) {
this_node->details->unclean = FALSE;
online = FALSE; /* As far as resource management is concerned,
* the node is safely offline.
* Anyone caught abusing this logic will be shot
*/
} else if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) {
online = determine_online_status_no_fencing(data_set, node_state, this_node);
} else {
online = determine_online_status_fencing(data_set, node_state, this_node);
}
if (online) {
this_node->details->online = TRUE;
} else {
/* remove node from contention */
this_node->fixed = TRUE;
this_node->weight = -INFINITY;
}
if (online && this_node->details->shutdown) {
/* don't run resources here */
this_node->fixed = TRUE;
this_node->weight = -INFINITY;
}
if (this_node->details->type == node_ping) {
crm_info("Node %s is not a pacemaker node", this_node->details->uname);
} else if (this_node->details->unclean) {
pe_proc_warn("Node %s is unclean", this_node->details->uname);
} else if (this_node->details->online) {
crm_info("Node %s is %s", this_node->details->uname,
this_node->details->shutdown ? "shutting down" :
this_node->details->pending ? "pending" :
this_node->details->standby ? "standby" :
this_node->details->maintenance ? "maintenance" : "online");
} else {
crm_trace("Node %s is offline", this_node->details->uname);
}
return online;
}
/*!
* \internal
* \brief Find the end of a resource's name, excluding any clone suffix
*
* \param[in] id Resource ID to check
*
* \return Pointer to last character of resource's base name
*/
const char *
pe_base_name_end(const char *id)
{
if (!crm_strlen_zero(id)) {
const char *end = id + strlen(id) - 1;
for (const char *s = end; s > id; --s) {
switch (*s) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
break;
case ':':
return (s == end)? s : (s - 1);
default:
return end;
}
}
return end;
}
return NULL;
}
/*!
* \internal
* \brief Get a resource name excluding any clone suffix
*
* \param[in] last_rsc_id Resource ID to check
*
* \return Pointer to newly allocated string with resource's base name
* \note It is the caller's responsibility to free() the result.
* This asserts on error, so callers can assume result is not NULL.
*/
char *
clone_strip(const char *last_rsc_id)
{
const char *end = pe_base_name_end(last_rsc_id);
char *basename = NULL;
CRM_ASSERT(end);
basename = strndup(last_rsc_id, end - last_rsc_id + 1);
CRM_ASSERT(basename);
return basename;
}
/*!
* \internal
* \brief Get the name of the first instance of a cloned resource
*
* \param[in] last_rsc_id Resource ID to check
*
* \return Pointer to newly allocated string with resource's base name plus :0
* \note It is the caller's responsibility to free() the result.
* This asserts on error, so callers can assume result is not NULL.
*/
char *
clone_zero(const char *last_rsc_id)
{
const char *end = pe_base_name_end(last_rsc_id);
size_t base_name_len = end - last_rsc_id + 1;
char *zero = NULL;
CRM_ASSERT(end);
zero = calloc(base_name_len + 3, sizeof(char));
CRM_ASSERT(zero);
memcpy(zero, last_rsc_id, base_name_len);
zero[base_name_len] = ':';
zero[base_name_len + 1] = '0';
return zero;
}
static resource_t *
create_fake_resource(const char *rsc_id, xmlNode * rsc_entry, pe_working_set_t * data_set)
{
resource_t *rsc = NULL;
xmlNode *xml_rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE);
copy_in_properties(xml_rsc, rsc_entry);
crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id);
crm_log_xml_debug(xml_rsc, "Orphan resource");
if (!common_unpack(xml_rsc, &rsc, NULL, data_set)) {
return NULL;
}
if (xml_contains_remote_node(xml_rsc)) {
node_t *node;
crm_debug("Detected orphaned remote node %s", rsc_id);
node = pe_find_node(data_set->nodes, rsc_id);
if (node == NULL) {
node = pe_create_node(rsc_id, rsc_id, "remote", NULL, data_set);
}
link_rsc2remotenode(data_set, rsc);
if (node) {
crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id);
node->details->shutdown = TRUE;
}
}
if (crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER)) {
/* This orphaned rsc needs to be mapped to a container. */
crm_trace("Detected orphaned container filler %s", rsc_id);
set_bit(rsc->flags, pe_rsc_orphan_container_filler);
}
set_bit(rsc->flags, pe_rsc_orphan);
data_set->resources = g_list_append(data_set->resources, rsc);
return rsc;
}
/*!
* \internal
* \brief Create orphan instance for anonymous clone resource history
*/
static pe_resource_t *
create_anonymous_orphan(pe_resource_t *parent, const char *rsc_id,
pe_node_t *node, pe_working_set_t *data_set)
{
pe_resource_t *top = pe__create_clone_child(parent, data_set);
// find_rsc() because we might be a cloned group
pe_resource_t *orphan = top->fns->find_rsc(top, rsc_id, NULL, pe_find_clone);
pe_rsc_debug(parent, "Created orphan %s for %s: %s on %s",
top->id, parent->id, rsc_id, node->details->uname);
return orphan;
}
/*!
* \internal
* \brief Check a node for an instance of an anonymous clone
*
* Return a child instance of the specified anonymous clone, in order of
* preference: (1) the instance running on the specified node, if any;
* (2) an inactive instance (i.e. within the total of clone-max instances);
* (3) a newly created orphan (i.e. clone-max instances are already active).
*
* \param[in] data_set Cluster information
* \param[in] node Node on which to check for instance
* \param[in] parent Clone to check
* \param[in] rsc_id Name of cloned resource in history (without instance)
*/
static resource_t *
find_anonymous_clone(pe_working_set_t * data_set, node_t * node, resource_t * parent,
const char *rsc_id)
{
GListPtr rIter = NULL;
pe_resource_t *rsc = NULL;
pe_resource_t *inactive_instance = NULL;
gboolean skip_inactive = FALSE;
CRM_ASSERT(parent != NULL);
CRM_ASSERT(pe_rsc_is_clone(parent));
CRM_ASSERT(is_not_set(parent->flags, pe_rsc_unique));
// Check for active (or partially active, for cloned groups) instance
pe_rsc_trace(parent, "Looking for %s on %s in %s", rsc_id, node->details->uname, parent->id);
for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) {
GListPtr locations = NULL;
resource_t *child = rIter->data;
/* Check whether this instance is already known to be active or pending
* anywhere, at this stage of unpacking. Because this function is called
* for a resource before the resource's individual operation history
* entries are unpacked, locations will generally not contain the
* desired node.
*
* However, there are three exceptions:
* (1) when child is a cloned group and we have already unpacked the
* history of another member of the group on the same node;
* (2) when we've already unpacked the history of another numbered
* instance on the same node (which can happen if globally-unique
* was flipped from true to false); and
* (3) when we re-run calculations on the same data set as part of a
* simulation.
*/
child->fns->location(child, &locations, 2);
if (locations) {
/* We should never associate the same numbered anonymous clone
* instance with multiple nodes, and clone instances can't migrate,
* so there must be only one location, regardless of history.
*/
CRM_LOG_ASSERT(locations->next == NULL);
if (((pe_node_t *)locations->data)->details == node->details) {
/* This child instance is active on the requested node, so check
* for a corresponding configured resource. We use find_rsc()
* instead of child because child may be a cloned group, and we
* need the particular member corresponding to rsc_id.
*
* If the history entry is orphaned, rsc will be NULL.
*/
rsc = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone);
if (rsc) {
/* If there are multiple instance history entries for an
* anonymous clone in a single node's history (which can
* happen if globally-unique is switched from true to
* false), we want to consider the instances beyond the
* first as orphans, even if there are inactive instance
* numbers available.
*/
if (rsc->running_on) {
crm_notice("Active (now-)anonymous clone %s has "
"multiple (orphan) instance histories on %s",
parent->id, node->details->uname);
skip_inactive = TRUE;
rsc = NULL;
} else {
pe_rsc_trace(parent, "Resource %s, active", rsc->id);
}
}
}
g_list_free(locations);
} else {
pe_rsc_trace(parent, "Resource %s, skip inactive", child->id);
if (!skip_inactive && !inactive_instance
&& is_not_set(child->flags, pe_rsc_block)) {
// Remember one inactive instance in case we don't find active
inactive_instance = parent->fns->find_rsc(child, rsc_id, NULL,
pe_find_clone);
/* ... but don't use it if it was already associated with a
* pending action on another node
*/
if (inactive_instance && inactive_instance->pending_node
&& (inactive_instance->pending_node->details != node->details)) {
inactive_instance = NULL;
}
}
}
}
if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) {
pe_rsc_trace(parent, "Resource %s, empty slot", inactive_instance->id);
rsc = inactive_instance;
}
/* If the resource has "requires" set to "quorum" or "nothing", and we don't
* have a clone instance for every node, we don't want to consume a valid
* instance number for unclean nodes. Such instances may appear to be active
* according to the history, but should be considered inactive, so we can
* start an instance elsewhere. Treat such instances as orphans.
*
* An exception is instances running on guest nodes -- since guest node
* "fencing" is actually just a resource stop, requires shouldn't apply.
*
* @TODO Ideally, we'd use an inactive instance number if it is not needed
* for any clean instances. However, we don't know that at this point.
*/
if ((rsc != NULL) && is_not_set(rsc->flags, pe_rsc_needs_fencing)
&& (!node->details->online || node->details->unclean)
&& !pe__is_guest_node(node)
&& !pe__is_universal_clone(parent, data_set)) {
rsc = NULL;
}
if (rsc == NULL) {
rsc = create_anonymous_orphan(parent, rsc_id, node, data_set);
pe_rsc_trace(parent, "Resource %s, orphan", rsc->id);
}
return rsc;
}
static resource_t *
unpack_find_resource(pe_working_set_t * data_set, node_t * node, const char *rsc_id,
xmlNode * rsc_entry)
{
resource_t *rsc = NULL;
resource_t *parent = NULL;
crm_trace("looking for %s", rsc_id);
rsc = pe_find_resource(data_set->resources, rsc_id);
if (rsc == NULL) {
/* If we didn't find the resource by its name in the operation history,
* check it again as a clone instance. Even when clone-max=0, we create
* a single :0 orphan to match against here.
*/
char *clone0_id = clone_zero(rsc_id);
resource_t *clone0 = pe_find_resource(data_set->resources, clone0_id);
if (clone0 && is_not_set(clone0->flags, pe_rsc_unique)) {
rsc = clone0;
parent = uber_parent(clone0);
crm_trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id);
} else {
crm_trace("%s is not known as %s either (orphan)",
rsc_id, clone0_id);
}
free(clone0_id);
} else if (rsc->variant > pe_native) {
crm_trace("Resource history for %s is orphaned because it is no longer primitive",
rsc_id);
return NULL;
} else {
parent = uber_parent(rsc);
}
if (pe_rsc_is_anon_clone(parent)) {
if (pe_rsc_is_bundled(parent)) {
rsc = pe__find_bundle_replica(parent->parent, node);
} else {
char *base = clone_strip(rsc_id);
rsc = find_anonymous_clone(data_set, node, parent, base);
free(base);
CRM_ASSERT(rsc != NULL);
}
}
if (rsc && safe_str_neq(rsc_id, rsc->id)
&& safe_str_neq(rsc_id, rsc->clone_name)) {
free(rsc->clone_name);
rsc->clone_name = strdup(rsc_id);
pe_rsc_debug(rsc, "Internally renamed %s on %s to %s%s",
rsc_id, node->details->uname, rsc->id,
(is_set(rsc->flags, pe_rsc_orphan)? " (ORPHAN)" : ""));
}
return rsc;
}
static resource_t *
process_orphan_resource(xmlNode * rsc_entry, node_t * node, pe_working_set_t * data_set)
{
resource_t *rsc = NULL;
const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
crm_debug("Detected orphan resource %s on %s", rsc_id, node->details->uname);
rsc = create_fake_resource(rsc_id, rsc_entry, data_set);
if (is_set(data_set->flags, pe_flag_stop_rsc_orphans) == FALSE) {
clear_bit(rsc->flags, pe_rsc_managed);
} else {
print_resource(LOG_TRACE, "Added orphan", rsc, FALSE);
CRM_CHECK(rsc != NULL, return NULL);
resource_location(rsc, NULL, -INFINITY, "__orphan_dont_run__", data_set);
}
return rsc;
}
static void
process_rsc_state(resource_t * rsc, node_t * node,
enum action_fail_response on_fail,
xmlNode * migrate_op, pe_working_set_t * data_set)
{
node_t *tmpnode = NULL;
char *reason = NULL;
CRM_ASSERT(rsc);
pe_rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s",
rsc->id, role2text(rsc->role), node->details->uname, fail2text(on_fail));
/* process current state */
if (rsc->role != RSC_ROLE_UNKNOWN) {
resource_t *iter = rsc;
while (iter) {
if (g_hash_table_lookup(iter->known_on, node->details->id) == NULL) {
node_t *n = node_copy(node);
pe_rsc_trace(rsc, "%s (aka. %s) known on %s", rsc->id, rsc->clone_name,
n->details->uname);
g_hash_table_insert(iter->known_on, (gpointer) n->details->id, n);
}
if (is_set(iter->flags, pe_rsc_unique)) {
break;
}
iter = iter->parent;
}
}
/* If a managed resource is believed to be running, but node is down ... */
if (rsc->role > RSC_ROLE_STOPPED
&& node->details->online == FALSE
&& node->details->maintenance == FALSE
&& is_set(rsc->flags, pe_rsc_managed)) {
gboolean should_fence = FALSE;
/* If this is a guest node, fence it (regardless of whether fencing is
* enabled, because guest node fencing is done by recovery of the
* container resource rather than by the fencer). Mark the resource
* we're processing as failed. When the guest comes back up, its
* operation history in the CIB will be cleared, freeing the affected
* resource to run again once we are sure we know its state.
*/
if (pe__is_guest_node(node)) {
set_bit(rsc->flags, pe_rsc_failed);
should_fence = TRUE;
} else if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
if (pe__is_remote_node(node) && node->details->remote_rsc
&& is_not_set(node->details->remote_rsc->flags, pe_rsc_failed)) {
/* Setting unseen means that fencing of the remote node will
* occur only if the connection resource is not going to start
* somewhere. This allows connection resources on a failed
* cluster node to move to another node without requiring the
* remote nodes to be fenced as well.
*/
node->details->unseen = TRUE;
reason = crm_strdup_printf("%s is active there (fencing will be"
" revoked if remote connection can "
"be re-established elsewhere)",
rsc->id);
}
should_fence = TRUE;
}
if (should_fence) {
if (reason == NULL) {
reason = crm_strdup_printf("%s is thought to be active there", rsc->id);
}
pe_fence_node(data_set, node, reason);
}
free(reason);
}
if (node->details->unclean) {
/* No extra processing needed
* Also allows resources to be started again after a node is shot
*/
on_fail = action_fail_ignore;
}
switch (on_fail) {
case action_fail_ignore:
/* nothing to do */
break;
case action_fail_fence:
/* treat it as if it is still running
* but also mark the node as unclean
*/
reason = crm_strdup_printf("%s failed there", rsc->id);
pe_fence_node(data_set, node, reason);
free(reason);
break;
case action_fail_standby:
node->details->standby = TRUE;
node->details->standby_onfail = TRUE;
break;
case action_fail_block:
/* is_managed == FALSE will prevent any
* actions being sent for the resource
*/
clear_bit(rsc->flags, pe_rsc_managed);
set_bit(rsc->flags, pe_rsc_block);
break;
case action_fail_migrate:
/* make sure it comes up somewhere else
* or not at all
*/
resource_location(rsc, node, -INFINITY, "__action_migration_auto__", data_set);
break;
case action_fail_stop:
rsc->next_role = RSC_ROLE_STOPPED;
break;
case action_fail_recover:
if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
set_bit(rsc->flags, pe_rsc_failed);
stop_action(rsc, node, FALSE);
}
break;
case action_fail_restart_container:
set_bit(rsc->flags, pe_rsc_failed);
if (rsc->container && pe_rsc_is_bundled(rsc)) {
/* A bundle's remote connection can run on a different node than
* the bundle's container. We don't necessarily know where the
* container is running yet, so remember it and add a stop
* action for it later.
*/
data_set->stop_needed = g_list_prepend(data_set->stop_needed,
rsc->container);
} else if (rsc->container) {
stop_action(rsc->container, node, FALSE);
} else if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
stop_action(rsc, node, FALSE);
}
break;
case action_fail_reset_remote:
set_bit(rsc->flags, pe_rsc_failed);
if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
tmpnode = NULL;
if (rsc->is_remote_node) {
tmpnode = pe_find_node(data_set->nodes, rsc->id);
}
if (tmpnode &&
pe__is_remote_node(tmpnode) &&
tmpnode->details->remote_was_fenced == 0) {
/* The remote connection resource failed in a way that
* should result in fencing the remote node.
*/
pe_fence_node(data_set, tmpnode,
"remote connection is unrecoverable");
}
}
/* require the stop action regardless if fencing is occurring or not. */
if (rsc->role > RSC_ROLE_STOPPED) {
stop_action(rsc, node, FALSE);
}
/* if reconnect delay is in use, prevent the connection from exiting the
* "STOPPED" role until the failure is cleared by the delay timeout. */
if (rsc->remote_reconnect_ms) {
rsc->next_role = RSC_ROLE_STOPPED;
}
break;
}
/* ensure a remote-node connection failure forces an unclean remote-node
* to be fenced. By setting unseen = FALSE, the remote-node failure will
* result in a fencing operation regardless if we're going to attempt to
* reconnect to the remote-node in this transition or not. */
if (is_set(rsc->flags, pe_rsc_failed) && rsc->is_remote_node) {
tmpnode = pe_find_node(data_set->nodes, rsc->id);
if (tmpnode && tmpnode->details->unclean) {
tmpnode->details->unseen = FALSE;
}
}
if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
if (is_set(rsc->flags, pe_rsc_orphan)) {
if (is_set(rsc->flags, pe_rsc_managed)) {
crm_config_warn("Detected active orphan %s running on %s",
rsc->id, node->details->uname);
} else {
crm_config_warn("Cluster configured not to stop active orphans."
" %s must be stopped manually on %s",
rsc->id, node->details->uname);
}
}
native_add_running(rsc, node, data_set);
if (on_fail != action_fail_ignore) {
set_bit(rsc->flags, pe_rsc_failed);
}
} else if (rsc->clone_name && strchr(rsc->clone_name, ':') != NULL) {
/* Only do this for older status sections that included instance numbers
* Otherwise stopped instances will appear as orphans
*/
pe_rsc_trace(rsc, "Resetting clone_name %s for %s (stopped)", rsc->clone_name, rsc->id);
free(rsc->clone_name);
rsc->clone_name = NULL;
} else {
GList *possible_matches = pe__resource_actions(rsc, node, RSC_STOP,
FALSE);
GListPtr gIter = possible_matches;
for (; gIter != NULL; gIter = gIter->next) {
action_t *stop = (action_t *) gIter->data;
stop->flags |= pe_action_optional;
}
g_list_free(possible_matches);
}
}
/* create active recurring operations as optional */
static void
process_recurring(node_t * node, resource_t * rsc,
int start_index, int stop_index,
GListPtr sorted_op_list, pe_working_set_t * data_set)
{
int counter = -1;
const char *task = NULL;
const char *status = NULL;
GListPtr gIter = sorted_op_list;
CRM_ASSERT(rsc);
pe_rsc_trace(rsc, "%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index);
for (; gIter != NULL; gIter = gIter->next) {
xmlNode *rsc_op = (xmlNode *) gIter->data;
guint interval_ms = 0;
char *key = NULL;
const char *id = ID(rsc_op);
const char *interval_ms_s = NULL;
counter++;
if (node->details->online == FALSE) {
pe_rsc_trace(rsc, "Skipping %s/%s: node is offline", rsc->id, node->details->uname);
break;
/* Need to check if there's a monitor for role="Stopped" */
} else if (start_index < stop_index && counter <= stop_index) {
pe_rsc_trace(rsc, "Skipping %s/%s: resource is not active", id, node->details->uname);
continue;
} else if (counter < start_index) {
pe_rsc_trace(rsc, "Skipping %s/%s: old %d", id, node->details->uname, counter);
continue;
}
interval_ms_s = crm_element_value(rsc_op, XML_LRM_ATTR_INTERVAL_MS);
interval_ms = crm_parse_ms(interval_ms_s);
if (interval_ms == 0) {
pe_rsc_trace(rsc, "Skipping %s/%s: non-recurring", id, node->details->uname);
continue;
}
status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS);
if (safe_str_eq(status, "-1")) {
pe_rsc_trace(rsc, "Skipping %s/%s: status", id, node->details->uname);
continue;
}
task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
/* create the action */
key = generate_op_key(rsc->id, task, interval_ms);
pe_rsc_trace(rsc, "Creating %s/%s", key, node->details->uname);
custom_action(rsc, key, task, node, TRUE, TRUE, data_set);
}
}
void
calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index)
{
int counter = -1;
int implied_monitor_start = -1;
int implied_clone_start = -1;
const char *task = NULL;
const char *status = NULL;
GListPtr gIter = sorted_op_list;
*stop_index = -1;
*start_index = -1;
for (; gIter != NULL; gIter = gIter->next) {
xmlNode *rsc_op = (xmlNode *) gIter->data;
counter++;
task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS);
if (safe_str_eq(task, CRMD_ACTION_STOP)
&& safe_str_eq(status, "0")) {
*stop_index = counter;
} else if (safe_str_eq(task, CRMD_ACTION_START) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
*start_index = counter;
} else if ((implied_monitor_start <= *stop_index) && safe_str_eq(task, CRMD_ACTION_STATUS)) {
const char *rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC);
if (safe_str_eq(rc, "0") || safe_str_eq(rc, "8")) {
implied_monitor_start = counter;
}
} else if (safe_str_eq(task, CRMD_ACTION_PROMOTE) || safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
implied_clone_start = counter;
}
}
if (*start_index == -1) {
if (implied_clone_start != -1) {
*start_index = implied_clone_start;
} else if (implied_monitor_start != -1) {
*start_index = implied_monitor_start;
}
}
}
static resource_t *
unpack_lrm_rsc_state(node_t * node, xmlNode * rsc_entry, pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
int stop_index = -1;
int start_index = -1;
enum rsc_role_e req_role = RSC_ROLE_UNKNOWN;
const char *task = NULL;
const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
resource_t *rsc = NULL;
GListPtr op_list = NULL;
GListPtr sorted_op_list = NULL;
xmlNode *migrate_op = NULL;
xmlNode *rsc_op = NULL;
xmlNode *last_failure = NULL;
enum action_fail_response on_fail = FALSE;
enum rsc_role_e saved_role = RSC_ROLE_UNKNOWN;
crm_trace("[%s] Processing %s on %s",
crm_element_name(rsc_entry), rsc_id, node->details->uname);
/* extract operations */
op_list = NULL;
sorted_op_list = NULL;
for (rsc_op = __xml_first_child_element(rsc_entry); rsc_op != NULL;
rsc_op = __xml_next_element(rsc_op)) {
if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) {
op_list = g_list_prepend(op_list, rsc_op);
}
}
if (op_list == NULL) {
/* if there are no operations, there is nothing to do */
return NULL;
}
/* find the resource */
rsc = unpack_find_resource(data_set, node, rsc_id, rsc_entry);
if (rsc == NULL) {
rsc = process_orphan_resource(rsc_entry, node, data_set);
}
CRM_ASSERT(rsc != NULL);
/* process operations */
saved_role = rsc->role;
on_fail = action_fail_ignore;
rsc->role = RSC_ROLE_UNKNOWN;
sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
xmlNode *rsc_op = (xmlNode *) gIter->data;
task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
migrate_op = rsc_op;
}
unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail, data_set);
}
/* create active recurring operations as optional */
calculate_active_ops(sorted_op_list, &start_index, &stop_index);
process_recurring(node, rsc, start_index, stop_index, sorted_op_list, data_set);
/* no need to free the contents */
g_list_free(sorted_op_list);
process_rsc_state(rsc, node, on_fail, migrate_op, data_set);
if (get_target_role(rsc, &req_role)) {
if (rsc->next_role == RSC_ROLE_UNKNOWN || req_role < rsc->next_role) {
pe_rsc_debug(rsc, "%s: Overwriting calculated next role %s"
" with requested next role %s",
rsc->id, role2text(rsc->next_role), role2text(req_role));
rsc->next_role = req_role;
} else if (req_role > rsc->next_role) {
pe_rsc_info(rsc, "%s: Not overwriting calculated next role %s"
" with requested next role %s",
rsc->id, role2text(rsc->next_role), role2text(req_role));
}
}
if (saved_role > rsc->role) {
rsc->role = saved_role;
}
return rsc;
}
static void
handle_orphaned_container_fillers(xmlNode * lrm_rsc_list, pe_working_set_t * data_set)
{
xmlNode *rsc_entry = NULL;
for (rsc_entry = __xml_first_child_element(lrm_rsc_list); rsc_entry != NULL;
rsc_entry = __xml_next_element(rsc_entry)) {
resource_t *rsc;
resource_t *container;
const char *rsc_id;
const char *container_id;
if (safe_str_neq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE)) {
continue;
}
container_id = crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER);
rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
if (container_id == NULL || rsc_id == NULL) {
continue;
}
container = pe_find_resource(data_set->resources, container_id);
if (container == NULL) {
continue;
}
rsc = pe_find_resource(data_set->resources, rsc_id);
if (rsc == NULL ||
is_set(rsc->flags, pe_rsc_orphan_container_filler) == FALSE ||
rsc->container != NULL) {
continue;
}
pe_rsc_trace(rsc, "Mapped container of orphaned resource %s to %s",
rsc->id, container_id);
rsc->container = container;
container->fillers = g_list_append(container->fillers, rsc);
}
}
gboolean
unpack_lrm_resources(node_t * node, xmlNode * lrm_rsc_list, pe_working_set_t * data_set)
{
xmlNode *rsc_entry = NULL;
gboolean found_orphaned_container_filler = FALSE;
CRM_CHECK(node != NULL, return FALSE);
crm_trace("Unpacking resources on %s", node->details->uname);
for (rsc_entry = __xml_first_child_element(lrm_rsc_list); rsc_entry != NULL;
rsc_entry = __xml_next_element(rsc_entry)) {
if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) {
resource_t *rsc = unpack_lrm_rsc_state(node, rsc_entry, data_set);
if (!rsc) {
continue;
}
if (is_set(rsc->flags, pe_rsc_orphan_container_filler)) {
found_orphaned_container_filler = TRUE;
}
}
}
/* now that all the resource state has been unpacked for this node
* we have to go back and map any orphaned container fillers to their
* container resource */
if (found_orphaned_container_filler) {
handle_orphaned_container_fillers(lrm_rsc_list, data_set);
}
return TRUE;
}
static void
set_active(resource_t * rsc)
{
resource_t *top = uber_parent(rsc);
if (top && is_set(top->flags, pe_rsc_promotable)) {
rsc->role = RSC_ROLE_SLAVE;
} else {
rsc->role = RSC_ROLE_STARTED;
}
}
static void
set_node_score(gpointer key, gpointer value, gpointer user_data)
{
node_t *node = value;
int *score = user_data;
node->weight = *score;
}
#define STATUS_PATH_MAX 1024
static xmlNode *
find_lrm_op(const char *resource, const char *op, const char *node, const char *source,
bool success_only, pe_working_set_t *data_set)
{
int offset = 0;
char xpath[STATUS_PATH_MAX];
xmlNode *xml = NULL;
offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//node_state[@uname='%s']", node);
offset +=
snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//" XML_LRM_TAG_RESOURCE "[@id='%s']",
resource);
/* Need to check against transition_magic too? */
if (source && safe_str_eq(op, CRMD_ACTION_MIGRATE)) {
offset +=
snprintf(xpath + offset, STATUS_PATH_MAX - offset,
"/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_target='%s']", op,
source);
} else if (source && safe_str_eq(op, CRMD_ACTION_MIGRATED)) {
offset +=
snprintf(xpath + offset, STATUS_PATH_MAX - offset,
"/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_source='%s']", op,
source);
} else {
offset +=
snprintf(xpath + offset, STATUS_PATH_MAX - offset,
"/" XML_LRM_TAG_RSC_OP "[@operation='%s']", op);
}
CRM_LOG_ASSERT(offset > 0);
xml = get_xpath_object(xpath, data_set->input, LOG_DEBUG);
if (xml && success_only) {
int rc = PCMK_OCF_UNKNOWN_ERROR;
int status = PCMK_LRM_OP_ERROR;
crm_element_value_int(xml, XML_LRM_ATTR_RC, &rc);
crm_element_value_int(xml, XML_LRM_ATTR_OPSTATUS, &status);
if ((rc != PCMK_OCF_OK) || (status != PCMK_LRM_OP_DONE)) {
return NULL;
}
}
return xml;
}
static int
pe__call_id(xmlNode *op_xml)
{
int id = 0;
if (op_xml) {
crm_element_value_int(op_xml, XML_LRM_ATTR_CALLID, &id);
}
return id;
}
/*!
* \brief Check whether a stop happened on the same node after some event
*
* \param[in] rsc Resource being checked
* \param[in] node Node being checked
* \param[in] xml_op Event that stop is being compared to
* \param[in] data_set Cluster working set
*
* \return TRUE if stop happened after event, FALSE otherwise
*
* \note This is really unnecessary, but kept as a safety mechanism. We
* currently don't save more than one successful event in history, so this
* only matters when processing really old CIB files that we don't
* technically support anymore, or as preparation for logging an extended
* history in the future.
*/
static bool
stop_happened_after(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
pe_working_set_t *data_set)
{
xmlNode *stop_op = find_lrm_op(rsc->id, CRMD_ACTION_STOP,
node->details->uname, NULL, TRUE, data_set);
return (stop_op && (pe__call_id(stop_op) > pe__call_id(xml_op)));
}
static void
unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
pe_working_set_t *data_set)
{
/* A successful migration sequence is:
* migrate_to on source node
* migrate_from on target node
* stop on source node
*
* If a migrate_to is followed by a stop, the entire migration (successful
* or failed) is complete, and we don't care what happened on the target.
*
* If no migrate_from has happened, the migration is considered to be
* "partial". If the migrate_from failed, make sure the resource gets
* stopped on both source and target (if up).
*
* If the migrate_to and migrate_from both succeeded (which also implies the
* resource is no longer running on the source), but there is no stop, the
* migration is considered to be "dangling". Schedule a stop on the source
* in this case.
*/
int from_rc = 0;
int from_status = 0;
pe_node_t *target_node = NULL;
pe_node_t *source_node = NULL;
xmlNode *migrate_from = NULL;
const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
// Sanity check
CRM_CHECK(source && target && !strcmp(source, node->details->uname), return);
if (stop_happened_after(rsc, node, xml_op, data_set)) {
return;
}
// Clones are not allowed to migrate, so role can't be master
rsc->role = RSC_ROLE_STARTED;
target_node = pe_find_node(data_set->nodes, target);
source_node = pe_find_node(data_set->nodes, source);
// Check whether there was a migrate_from action on the target
migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, target,
source, FALSE, data_set);
if (migrate_from) {
crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc);
crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, &from_status);
pe_rsc_trace(rsc, "%s op on %s exited with status=%d, rc=%d",
ID(migrate_from), target, from_status, from_rc);
}
if (migrate_from && from_rc == PCMK_OCF_OK
&& from_status == PCMK_LRM_OP_DONE) {
/* The migrate_to and migrate_from both succeeded, so mark the migration
* as "dangling". This will be used to schedule a stop action on the
* source without affecting the target.
*/
pe_rsc_trace(rsc, "Detected dangling migration op: %s on %s", ID(xml_op),
source);
rsc->role = RSC_ROLE_STOPPED;
rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node);
} else if (migrate_from && (from_status != PCMK_LRM_OP_PENDING)) { // Failed
if (target_node && target_node->details->online) {
pe_rsc_trace(rsc, "Marking active on %s %p %d", target, target_node,
target_node->details->online);
native_add_running(rsc, target_node, data_set);
}
} else { // Pending, or complete but erased
if (target_node && target_node->details->online) {
pe_rsc_trace(rsc, "Marking active on %s %p %d", target, target_node,
target_node->details->online);
native_add_running(rsc, target_node, data_set);
if (source_node && source_node->details->online) {
/* This is a partial migration: the migrate_to completed
* successfully on the source, but the migrate_from has not
* completed. Remember the source and target; if the newly
* chosen target remains the same when we schedule actions
* later, we may continue with the migration.
*/
rsc->partial_migration_target = target_node;
rsc->partial_migration_source = source_node;
}
} else {
/* Consider it failed here - forces a restart, prevents migration */
set_bit(rsc->flags, pe_rsc_failed);
clear_bit(rsc->flags, pe_rsc_allow_migrate);
}
}
}
static void
unpack_migrate_to_failure(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
pe_working_set_t *data_set)
{
int target_stop_id = 0;
int target_migrate_from_id = 0;
xmlNode *target_stop = NULL;
xmlNode *target_migrate_from = NULL;
const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
// Sanity check
CRM_CHECK(source && target && !strcmp(source, node->details->uname), return);
/* If a migration failed, we have to assume the resource is active. Clones
* are not allowed to migrate, so role can't be master.
*/
rsc->role = RSC_ROLE_STARTED;
// Check for stop on the target
target_stop = find_lrm_op(rsc->id, CRMD_ACTION_STOP, target, NULL,
TRUE, data_set);
target_stop_id = pe__call_id(target_stop);
// Check for migrate_from on the target
target_migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, target,
source, TRUE, data_set);
target_migrate_from_id = pe__call_id(target_migrate_from);
if ((target_stop == NULL) || (target_stop_id < target_migrate_from_id)) {
/* There was no stop on the source, or a stop that happened before a
* migrate_from, so assume the resource is still active on the target
* (if it is up).
*/
node_t *target_node = pe_find_node(data_set->nodes, target);
pe_rsc_trace(rsc, "stop (%d) + migrate_from (%d)",
target_stop_id, target_migrate_from_id);
if (target_node && target_node->details->online) {
native_add_running(rsc, target_node, data_set);
}
} else if (target_migrate_from == NULL) {
/* We know there was a stop on the target, but there may not have been a
* migrate_from (the stop could have happened before migrate_from was
* scheduled or attempted).
*
* That means this could be a "dangling" migration. But first, check
* whether there is a newer migrate_from or start on the source node --
* it's possible the failed migration was followed by a successful
* full restart or migration in the reverse direction, in which case we
* don't want to force it to stop.
*/
xmlNode *source_migrate_from = NULL;
xmlNode *source_start = NULL;
int source_migrate_to_id = pe__call_id(xml_op);
source_migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, source,
NULL, TRUE, data_set);
if (pe__call_id(source_migrate_from) > source_migrate_to_id) {
return;
}
source_start = find_lrm_op(rsc->id, CRMD_ACTION_START, source, NULL,
TRUE, data_set);
if (pe__call_id(source_start) > source_migrate_to_id) {
return;
}
// Mark node as having dangling migration so we can force a stop later
rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node);
}
}
static void
unpack_migrate_from_failure(pe_resource_t *rsc, pe_node_t *node,
xmlNode *xml_op, pe_working_set_t *data_set)
{
xmlNode *source_stop = NULL;
xmlNode *source_migrate_to = NULL;
const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
// Sanity check
CRM_CHECK(source && target && !strcmp(target, node->details->uname), return);
/* If a migration failed, we have to assume the resource is active. Clones
* are not allowed to migrate, so role can't be master.
*/
rsc->role = RSC_ROLE_STARTED;
// Check for a stop on the source
source_stop = find_lrm_op(rsc->id, CRMD_ACTION_STOP, source, NULL,
TRUE, data_set);
// Check for a migrate_to on the source
source_migrate_to = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATE,
source, target, TRUE, data_set);
if ((source_stop == NULL)
|| (pe__call_id(source_stop) < pe__call_id(source_migrate_to))) {
/* There was no stop on the source, or a stop that happened before
* migrate_to, so assume the resource is still active on the source (if
* it is up).
*/
pe_node_t *source_node = pe_find_node(data_set->nodes, source);
if (source_node && source_node->details->online) {
native_add_running(rsc, source_node, data_set);
}
}
}
static void
record_failed_op(xmlNode *op, node_t* node, resource_t *rsc, pe_working_set_t * data_set)
{
xmlNode *xIter = NULL;
const char *op_key = crm_element_value(op, XML_LRM_ATTR_TASK_KEY);
if (node->details->online == FALSE) {
return;
}
for (xIter = data_set->failed->children; xIter; xIter = xIter->next) {
const char *key = crm_element_value(xIter, XML_LRM_ATTR_TASK_KEY);
const char *uname = crm_element_value(xIter, XML_ATTR_UNAME);
if(safe_str_eq(op_key, key) && safe_str_eq(uname, node->details->uname)) {
crm_trace("Skipping duplicate entry %s on %s", op_key, node->details->uname);
return;
}
}
crm_trace("Adding entry %s on %s", op_key, node->details->uname);
crm_xml_add(op, XML_ATTR_UNAME, node->details->uname);
crm_xml_add(op, XML_LRM_ATTR_RSCID, rsc->id);
add_node_copy(data_set->failed, op);
}
static const char *get_op_key(xmlNode *xml_op)
{
const char *key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY);
if(key == NULL) {
key = ID(xml_op);
}
return key;
}
static void
unpack_rsc_op_failure(resource_t * rsc, node_t * node, int rc, xmlNode * xml_op, xmlNode ** last_failure,
enum action_fail_response * on_fail, pe_working_set_t * data_set)
{
guint interval_ms = 0;
bool is_probe = FALSE;
action_t *action = NULL;
const char *key = get_op_key(xml_op);
const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
CRM_ASSERT(rsc);
*last_failure = xml_op;
crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
if ((interval_ms == 0) && safe_str_eq(task, CRMD_ACTION_STATUS)) {
is_probe = TRUE;
pe_rsc_trace(rsc, "is a probe: %s", key);
}
if (rc != PCMK_OCF_NOT_INSTALLED || is_set(data_set->flags, pe_flag_symmetric_cluster)) {
crm_warn("Processing failed %s of %s on %s: %s " CRM_XS " rc=%d",
(is_probe? "probe" : task), rsc->id, node->details->uname,
services_ocf_exitcode_str(rc), rc);
if (is_probe && (rc != PCMK_OCF_OK)
&& (rc != PCMK_OCF_NOT_RUNNING)
&& (rc != PCMK_OCF_RUNNING_MASTER)) {
/* A failed (not just unexpected) probe result could mean the user
* didn't know resources will be probed even where they can't run.
*/
crm_notice("If it is not possible for %s to run on %s, see "
"the resource-discovery option for location constraints",
rsc->id, node->details->uname);
}
record_failed_op(xml_op, node, rsc, data_set);
} else {
crm_trace("Processing failed op %s for %s on %s: %s (%d)",
task, rsc->id, node->details->uname, services_ocf_exitcode_str(rc),
rc);
}
action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set);
if ((action->on_fail <= action_fail_fence && *on_fail < action->on_fail) ||
(action->on_fail == action_fail_reset_remote && *on_fail <= action_fail_recover) ||
(action->on_fail == action_fail_restart_container && *on_fail <= action_fail_recover) ||
(*on_fail == action_fail_restart_container && action->on_fail >= action_fail_migrate)) {
pe_rsc_trace(rsc, "on-fail %s -> %s for %s (%s)", fail2text(*on_fail),
fail2text(action->on_fail), action->uuid, key);
*on_fail = action->on_fail;
}
if (safe_str_eq(task, CRMD_ACTION_STOP)) {
resource_location(rsc, node, -INFINITY, "__stop_fail__", data_set);
} else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) {
unpack_migrate_to_failure(rsc, node, xml_op, data_set);
} else if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
unpack_migrate_from_failure(rsc, node, xml_op, data_set);
} else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
rsc->role = RSC_ROLE_MASTER;
} else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
if (action->on_fail == action_fail_block) {
rsc->role = RSC_ROLE_MASTER;
rsc->next_role = RSC_ROLE_STOPPED;
} else if(rc == PCMK_OCF_NOT_RUNNING) {
rsc->role = RSC_ROLE_STOPPED;
} else {
/*
* Staying in master role would put the PE/TE into a loop. Setting
* slave role is not dangerous because the resource will be stopped
* as part of recovery, and any master promotion will be ordered
* after that stop.
*/
rsc->role = RSC_ROLE_SLAVE;
}
}
if(is_probe && rc == PCMK_OCF_NOT_INSTALLED) {
/* leave stopped */
pe_rsc_trace(rsc, "Leaving %s stopped", rsc->id);
rsc->role = RSC_ROLE_STOPPED;
} else if (rsc->role < RSC_ROLE_STARTED) {
pe_rsc_trace(rsc, "Setting %s active", rsc->id);
set_active(rsc);
}
pe_rsc_trace(rsc, "Resource %s: role=%s, unclean=%s, on_fail=%s, fail_role=%s",
rsc->id, role2text(rsc->role),
node->details->unclean ? "true" : "false",
fail2text(action->on_fail), role2text(action->fail_role));
if (action->fail_role != RSC_ROLE_STARTED && rsc->next_role < action->fail_role) {
rsc->next_role = action->fail_role;
}
if (action->fail_role == RSC_ROLE_STOPPED) {
int score = -INFINITY;
resource_t *fail_rsc = rsc;
if (fail_rsc->parent) {
resource_t *parent = uber_parent(fail_rsc);
if (pe_rsc_is_clone(parent)
&& is_not_set(parent->flags, pe_rsc_unique)) {
/* For clone resources, if a child fails on an operation
* with on-fail = stop, all the resources fail. Do this by preventing
* the parent from coming up again. */
fail_rsc = parent;
}
}
crm_warn("Making sure %s doesn't come up again", fail_rsc->id);
/* make sure it doesn't come up again */
if (fail_rsc->allowed_nodes != NULL) {
g_hash_table_destroy(fail_rsc->allowed_nodes);
}
fail_rsc->allowed_nodes = node_hash_from_list(data_set->nodes);
g_hash_table_foreach(fail_rsc->allowed_nodes, set_node_score, &score);
}
pe_free_action(action);
}
/*!
* \internal
* \brief Remap operation status based on action result
*
* Given an action result, determine an appropriate operation status for the
* purposes of responding to the action (the status provided by the executor is
* not directly usable since the executor does not know what was expected).
*
* \param[in,out] rsc Resource that operation history entry is for
* \param[in] rc Actual return code of operation
* \param[in] target_rc Expected return code of operation
* \param[in] node Node where operation was executed
* \param[in] xml_op Operation history entry XML from CIB status
* \param[in,out] on_fail What should be done about the result
* \param[in] data_set Current cluster working set
*
* \return Operation status based on return code and action info
* \note This may update the resource's current and next role.
*/
static int
determine_op_status(
resource_t *rsc, int rc, int target_rc, node_t * node, xmlNode * xml_op, enum action_fail_response * on_fail, pe_working_set_t * data_set)
{
guint interval_ms = 0;
int result = PCMK_LRM_OP_DONE;
const char *key = get_op_key(xml_op);
const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
bool is_probe = FALSE;
CRM_ASSERT(rsc);
crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
if ((interval_ms == 0) && safe_str_eq(task, CRMD_ACTION_STATUS)) {
is_probe = TRUE;
}
if (target_rc < 0) {
/* Pre-1.0 Pacemaker versions, and Pacemaker 1.1.6 or earlier with
* Heartbeat 2.0.7 or earlier as the cluster layer, did not include the
* target_rc in the transition key, which (along with the similar case
* of a corrupted transition key in the CIB) will be reported to this
* function as -1. Pacemaker 2.0+ does not support rolling upgrades from
* those versions or processing of saved CIB files from those versions,
* so we do not need to care much about this case.
*/
result = PCMK_LRM_OP_ERROR;
crm_warn("Expected result not found for %s on %s (corrupt or obsolete CIB?)",
key, node->details->uname);
} else if (target_rc != rc) {
result = PCMK_LRM_OP_ERROR;
pe_rsc_debug(rsc, "%s on %s returned '%s' (%d) instead of the expected value: '%s' (%d)",
key, node->details->uname,
services_ocf_exitcode_str(rc), rc,
services_ocf_exitcode_str(target_rc), target_rc);
}
switch (rc) {
case PCMK_OCF_OK:
// @TODO Should this be (rc != target_rc)?
if (is_probe && (target_rc == PCMK_OCF_NOT_RUNNING)) {
result = PCMK_LRM_OP_DONE;
pe_rsc_info(rsc, "Operation %s found resource %s active on %s",
task, rsc->id, node->details->uname);
}
break;
case PCMK_OCF_NOT_RUNNING:
if (is_probe || target_rc == rc || is_not_set(rsc->flags, pe_rsc_managed)) {
result = PCMK_LRM_OP_DONE;
rsc->role = RSC_ROLE_STOPPED;
/* clear any previous failure actions */
*on_fail = action_fail_ignore;
rsc->next_role = RSC_ROLE_UNKNOWN;
}
break;
case PCMK_OCF_RUNNING_MASTER:
if (is_probe && (rc != target_rc)) {
result = PCMK_LRM_OP_DONE;
pe_rsc_info(rsc, "Operation %s found resource %s active in master mode on %s",
task, rsc->id, node->details->uname);
}
rsc->role = RSC_ROLE_MASTER;
break;
case PCMK_OCF_DEGRADED_MASTER:
case PCMK_OCF_FAILED_MASTER:
rsc->role = RSC_ROLE_MASTER;
result = PCMK_LRM_OP_ERROR;
break;
case PCMK_OCF_NOT_CONFIGURED:
result = PCMK_LRM_OP_ERROR_FATAL;
break;
case PCMK_OCF_UNIMPLEMENT_FEATURE:
if (interval_ms > 0) {
result = PCMK_LRM_OP_NOTSUPPORTED;
break;
}
// fall through
case PCMK_OCF_NOT_INSTALLED:
case PCMK_OCF_INVALID_PARAM:
case PCMK_OCF_INSUFFICIENT_PRIV:
if (!pe_can_fence(data_set, node)
&& safe_str_eq(task, CRMD_ACTION_STOP)) {
/* If a stop fails and we can't fence, there's nothing else we can do */
pe_proc_err("No further recovery can be attempted for %s: %s action failed with '%s' (%d)",
rsc->id, task, services_ocf_exitcode_str(rc), rc);
clear_bit(rsc->flags, pe_rsc_managed);
set_bit(rsc->flags, pe_rsc_block);
}
result = PCMK_LRM_OP_ERROR_HARD;
break;
default:
if (result == PCMK_LRM_OP_DONE) {
crm_info("Treating unknown return code %d for %s on %s as failure",
rc, key, node->details->uname);
result = PCMK_LRM_OP_ERROR;
}
break;
}
return result;
}
static bool check_operation_expiry(resource_t *rsc, node_t *node, int rc, xmlNode *xml_op, pe_working_set_t * data_set)
{
bool expired = FALSE;
time_t last_failure = 0;
guint interval_ms = 0;
int failure_timeout = rsc->failure_timeout;
const char *key = get_op_key(xml_op);
const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
const char *clear_reason = NULL;
crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
/* clearing recurring monitor operation failures automatically
* needs to be carefully considered */
if ((interval_ms != 0) && safe_str_eq(task, "monitor")) {
/* TODO, in the future we should consider not clearing recurring monitor
* op failures unless the last action for a resource was a "stop" action.
* otherwise it is possible that clearing the monitor failure will result
* in the resource being in an undeterministic state.
*
* For now we handle this potential undeterministic condition for remote
* node connection resources by not clearing a recurring monitor op failure
* until after the node has been fenced. */
if (is_set(data_set->flags, pe_flag_stonith_enabled)
&& rsc->remote_reconnect_ms) {
node_t *remote_node = pe_find_node(data_set->nodes, rsc->id);
if (remote_node && remote_node->details->remote_was_fenced == 0) {
if (strstr(ID(xml_op), "last_failure")) {
crm_info("Waiting to clear monitor failure for remote node %s until fencing has occurred", rsc->id);
}
/* disabling failure timeout for this operation because we believe
* fencing of the remote node should occur first. */
failure_timeout = 0;
}
}
}
if (failure_timeout > 0) {
- int last_run = 0;
+ time_t last_run = 0;
- if (crm_element_value_int(xml_op, XML_RSC_OP_LAST_CHANGE, &last_run) == 0) {
+ if (crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE,
+ &last_run) == 0) {
time_t now = get_effective_time(data_set);
if (now > (last_run + failure_timeout)) {
expired = TRUE;
}
}
}
if (expired) {
if (pe_get_failcount(node, rsc, &last_failure, pe_fc_default, xml_op,
data_set)) {
// There is a fail count ignoring timeout
if (pe_get_failcount(node, rsc, &last_failure, pe_fc_effective,
xml_op, data_set) == 0) {
// There is no fail count considering timeout
clear_reason = "it expired";
} else {
expired = FALSE;
}
} else if (rsc->remote_reconnect_ms
&& strstr(ID(xml_op), "last_failure")) {
// Always clear last failure when reconnect interval is set
clear_reason = "reconnect interval is set";
}
} else if (strstr(ID(xml_op), "last_failure") &&
((strcmp(task, "start") == 0) || (strcmp(task, "monitor") == 0))) {
if (pe__bundle_needs_remote_name(rsc)) {
/* We haven't allocated resources yet, so we can't reliably
* substitute addr parameters for the REMOTE_CONTAINER_HACK.
* When that's needed, defer the check until later.
*/
pe__add_param_check(xml_op, rsc, node, pe_check_last_failure,
data_set);
} else {
op_digest_cache_t *digest_data = NULL;
digest_data = rsc_action_digest_cmp(rsc, xml_op, node, data_set);
switch (digest_data->rc) {
case RSC_DIGEST_UNKNOWN:
crm_trace("Resource %s history entry %s on %s has no digest to compare",
rsc->id, key, node->details->id);
break;
case RSC_DIGEST_MATCH:
break;
default:
clear_reason = "resource parameters have changed";
break;
}
}
}
if (clear_reason != NULL) {
// Schedule clearing of the fail count
pe_action_t *clear_op = pe__clear_failcount(rsc, node, clear_reason,
data_set);
if (is_set(data_set->flags, pe_flag_stonith_enabled)
&& rsc->remote_reconnect_ms) {
pe_node_t *remote_node = pe_find_node(data_set->nodes, rsc->id);
if (remote_node) {
/* If we're clearing a remote connection due to a reconnect
* interval, we want to wait until any scheduled fencing
* completes.
*
* We could limit this to remote_node->details->unclean, but at
* this point, that's always true (it won't be reliable until
* after unpack_node_loop() is done).
*/
pe_action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL,
data_set);
crm_info("Clearing %s failure will wait until any scheduled "
"fencing of %s completes", task, rsc->id);
order_actions(fence, clear_op, pe_order_implies_then);
}
}
}
if (expired && (interval_ms == 0) && safe_str_eq(task, CRMD_ACTION_STATUS)) {
switch(rc) {
case PCMK_OCF_OK:
case PCMK_OCF_NOT_RUNNING:
case PCMK_OCF_RUNNING_MASTER:
case PCMK_OCF_DEGRADED:
case PCMK_OCF_DEGRADED_MASTER:
/* Don't expire probes that return these values */
expired = FALSE;
break;
}
}
return expired;
}
int pe__target_rc_from_xml(xmlNode *xml_op)
{
int target_rc = 0;
const char *key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY);
if (key == NULL) {
return -1;
}
decode_transition_key(key, NULL, NULL, NULL, &target_rc);
return target_rc;
}
static enum action_fail_response
get_action_on_fail(resource_t *rsc, const char *key, const char *task, pe_working_set_t * data_set)
{
int result = action_fail_recover;
action_t *action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set);
result = action->on_fail;
pe_free_action(action);
return result;
}
static void
update_resource_state(resource_t * rsc, node_t * node, xmlNode * xml_op, const char * task, int rc,
xmlNode * last_failure, enum action_fail_response * on_fail, pe_working_set_t * data_set)
{
gboolean clear_past_failure = FALSE;
CRM_ASSERT(rsc);
CRM_ASSERT(xml_op);
if (rc == PCMK_OCF_NOT_RUNNING) {
clear_past_failure = TRUE;
} else if (rc == PCMK_OCF_NOT_INSTALLED) {
rsc->role = RSC_ROLE_STOPPED;
} else if (safe_str_eq(task, CRMD_ACTION_STATUS)) {
if (last_failure) {
const char *op_key = get_op_key(xml_op);
const char *last_failure_key = get_op_key(last_failure);
if (safe_str_eq(op_key, last_failure_key)) {
clear_past_failure = TRUE;
}
}
if (rsc->role < RSC_ROLE_STARTED) {
set_active(rsc);
}
} else if (safe_str_eq(task, CRMD_ACTION_START)) {
rsc->role = RSC_ROLE_STARTED;
clear_past_failure = TRUE;
} else if (safe_str_eq(task, CRMD_ACTION_STOP)) {
rsc->role = RSC_ROLE_STOPPED;
clear_past_failure = TRUE;
} else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
rsc->role = RSC_ROLE_MASTER;
clear_past_failure = TRUE;
} else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
/* Demote from Master does not clear an error */
rsc->role = RSC_ROLE_SLAVE;
} else if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
rsc->role = RSC_ROLE_STARTED;
clear_past_failure = TRUE;
} else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) {
unpack_migrate_to_success(rsc, node, xml_op, data_set);
} else if (rsc->role < RSC_ROLE_STARTED) {
pe_rsc_trace(rsc, "%s active on %s", rsc->id, node->details->uname);
set_active(rsc);
}
/* clear any previous failure actions */
if (clear_past_failure) {
switch (*on_fail) {
case action_fail_stop:
case action_fail_fence:
case action_fail_migrate:
case action_fail_standby:
pe_rsc_trace(rsc, "%s.%s is not cleared by a completed stop",
rsc->id, fail2text(*on_fail));
break;
case action_fail_block:
case action_fail_ignore:
case action_fail_recover:
case action_fail_restart_container:
*on_fail = action_fail_ignore;
rsc->next_role = RSC_ROLE_UNKNOWN;
break;
case action_fail_reset_remote:
if (rsc->remote_reconnect_ms == 0) {
/* With no reconnect interval, the connection is allowed to
* start again after the remote node is fenced and
* completely stopped. (With a reconnect interval, we wait
* for the failure to be cleared entirely before attempting
* to reconnect.)
*/
*on_fail = action_fail_ignore;
rsc->next_role = RSC_ROLE_UNKNOWN;
}
break;
}
}
}
gboolean
unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, xmlNode ** last_failure,
enum action_fail_response * on_fail, pe_working_set_t * data_set)
{
int task_id = 0;
const char *key = NULL;
const char *task = NULL;
const char *task_key = NULL;
int rc = 0;
int status = PCMK_LRM_OP_UNKNOWN;
int target_rc = pe__target_rc_from_xml(xml_op);
guint interval_ms = 0;
gboolean expired = FALSE;
resource_t *parent = rsc;
enum action_fail_response failure_strategy = action_fail_recover;
CRM_CHECK(rsc != NULL, return FALSE);
CRM_CHECK(node != NULL, return FALSE);
CRM_CHECK(xml_op != NULL, return FALSE);
task_key = get_op_key(xml_op);
task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY);
crm_element_value_int(xml_op, XML_LRM_ATTR_RC, &rc);
crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id);
crm_element_value_int(xml_op, XML_LRM_ATTR_OPSTATUS, &status);
crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
CRM_CHECK(task != NULL, return FALSE);
CRM_CHECK(status <= PCMK_LRM_OP_INVALID, return FALSE);
CRM_CHECK(status >= PCMK_LRM_OP_PENDING, return FALSE);
if (safe_str_eq(task, CRMD_ACTION_NOTIFY) ||
safe_str_eq(task, CRMD_ACTION_METADATA)) {
/* safe to ignore these */
return TRUE;
}
if (is_not_set(rsc->flags, pe_rsc_unique)) {
parent = uber_parent(rsc);
}
pe_rsc_trace(rsc, "Unpacking task %s/%s (call_id=%d, status=%d, rc=%d) on %s (role=%s)",
task_key, task, task_id, status, rc, node->details->uname, role2text(rsc->role));
if (node->details->unclean) {
pe_rsc_trace(rsc, "Node %s (where %s is running) is unclean."
" Further action depends on the value of the stop's on-fail attribute",
node->details->uname, rsc->id);
}
if(status != PCMK_LRM_OP_NOT_INSTALLED) {
expired = check_operation_expiry(rsc, node, rc, xml_op, data_set);
}
/* Degraded results are informational only, re-map them to their error-free equivalents */
if (rc == PCMK_OCF_DEGRADED && safe_str_eq(task, CRMD_ACTION_STATUS)) {
rc = PCMK_OCF_OK;
/* Add them to the failed list to highlight them for the user */
if ((node->details->shutdown == FALSE) || (node->details->online == TRUE)) {
crm_trace("Remapping %d to %d", PCMK_OCF_DEGRADED, PCMK_OCF_OK);
record_failed_op(xml_op, node, rsc, data_set);
}
} else if (rc == PCMK_OCF_DEGRADED_MASTER && safe_str_eq(task, CRMD_ACTION_STATUS)) {
rc = PCMK_OCF_RUNNING_MASTER;
/* Add them to the failed list to highlight them for the user */
if ((node->details->shutdown == FALSE) || (node->details->online == TRUE)) {
crm_trace("Remapping %d to %d", PCMK_OCF_DEGRADED_MASTER, PCMK_OCF_RUNNING_MASTER);
record_failed_op(xml_op, node, rsc, data_set);
}
}
if (expired && target_rc != rc) {
const char *magic = crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC);
pe_rsc_debug(rsc, "Expired operation '%s' on %s returned '%s' (%d) instead of the expected value: '%s' (%d)",
key, node->details->uname,
services_ocf_exitcode_str(rc), rc,
services_ocf_exitcode_str(target_rc), target_rc);
if (interval_ms == 0) {
crm_notice("Ignoring expired calculated failure %s (rc=%d, magic=%s) on %s",
task_key, rc, magic, node->details->uname);
goto done;
} else if(node->details->online && node->details->unclean == FALSE) {
crm_notice("Re-initiated expired calculated failure %s (rc=%d, magic=%s) on %s",
task_key, rc, magic, node->details->uname);
/* This is SO horrible, but we don't have access to CancelXmlOp() yet */
crm_xml_add(xml_op, XML_LRM_ATTR_RESTART_DIGEST, "calculated-failure-timeout");
goto done;
}
}
/* If the executor reported an operation status of anything but done or
* error, consider that final. But for done or error, we know better whether
* it should be treated as a failure or not, because we know the expected
* result.
*/
if(status == PCMK_LRM_OP_DONE || status == PCMK_LRM_OP_ERROR) {
status = determine_op_status(rsc, rc, target_rc, node, xml_op, on_fail, data_set);
}
pe_rsc_trace(rsc, "Handling status: %d", status);
switch (status) {
case PCMK_LRM_OP_CANCELLED:
/* do nothing?? */
pe_err("Don't know what to do for cancelled ops yet");
break;
case PCMK_LRM_OP_PENDING:
if (safe_str_eq(task, CRMD_ACTION_START)) {
set_bit(rsc->flags, pe_rsc_start_pending);
set_active(rsc);
} else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
rsc->role = RSC_ROLE_MASTER;
} else if (safe_str_eq(task, CRMD_ACTION_MIGRATE) && node->details->unclean) {
/* If a pending migrate_to action is out on a unclean node,
* we have to force the stop action on the target. */
const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
node_t *target = pe_find_node(data_set->nodes, migrate_target);
if (target) {
stop_action(rsc, target, FALSE);
}
}
if (rsc->pending_task == NULL) {
if (safe_str_eq(task, CRMD_ACTION_STATUS) && (interval_ms == 0)) {
/* Pending probes are not printed, even if pending
* operations are requested. If someone ever requests that
* behavior, uncomment this and the corresponding part of
* native.c:native_pending_task().
*/
/*rsc->pending_task = strdup("probe");*/
/*rsc->pending_node = node;*/
} else {
rsc->pending_task = strdup(task);
rsc->pending_node = node;
}
}
break;
case PCMK_LRM_OP_DONE:
pe_rsc_trace(rsc, "%s/%s completed on %s", rsc->id, task, node->details->uname);
update_resource_state(rsc, node, xml_op, task, rc, *last_failure, on_fail, data_set);
break;
case PCMK_LRM_OP_NOT_INSTALLED:
failure_strategy = get_action_on_fail(rsc, task_key, task, data_set);
if (failure_strategy == action_fail_ignore) {
crm_warn("Cannot ignore failed %s (status=%d, rc=%d) on %s: "
"Resource agent doesn't exist",
task_key, status, rc, node->details->uname);
/* Also for printing it as "FAILED" by marking it as pe_rsc_failed later */
*on_fail = action_fail_migrate;
}
resource_location(parent, node, -INFINITY, "hard-error", data_set);
unpack_rsc_op_failure(rsc, node, rc, xml_op, last_failure, on_fail, data_set);
break;
case PCMK_LRM_OP_NOT_CONNECTED:
if (pe__is_guest_or_remote_node(node)
&& is_set(node->details->remote_rsc->flags, pe_rsc_managed)) {
/* We should never get into a situation where a managed remote
* connection resource is considered OK but a resource action
* behind the connection gets a "not connected" status. But as a
* fail-safe in case a bug or unusual circumstances do lead to
* that, ensure the remote connection is considered failed.
*/
set_bit(node->details->remote_rsc->flags, pe_rsc_failed);
}
// fall through
case PCMK_LRM_OP_ERROR:
case PCMK_LRM_OP_ERROR_HARD:
case PCMK_LRM_OP_ERROR_FATAL:
case PCMK_LRM_OP_TIMEOUT:
case PCMK_LRM_OP_NOTSUPPORTED:
case PCMK_LRM_OP_INVALID:
failure_strategy = get_action_on_fail(rsc, task_key, task, data_set);
if ((failure_strategy == action_fail_ignore)
|| (failure_strategy == action_fail_restart_container
&& safe_str_eq(task, CRMD_ACTION_STOP))) {
crm_warn("Pretending the failure of %s (rc=%d) on %s succeeded",
task_key, rc, node->details->uname);
update_resource_state(rsc, node, xml_op, task, target_rc, *last_failure, on_fail, data_set);
crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname);
set_bit(rsc->flags, pe_rsc_failure_ignored);
record_failed_op(xml_op, node, rsc, data_set);
if (failure_strategy == action_fail_restart_container && *on_fail <= action_fail_recover) {
*on_fail = failure_strategy;
}
} else {
unpack_rsc_op_failure(rsc, node, rc, xml_op, last_failure, on_fail, data_set);
if(status == PCMK_LRM_OP_ERROR_HARD) {
do_crm_log(rc != PCMK_OCF_NOT_INSTALLED?LOG_ERR:LOG_NOTICE,
"Preventing %s from re-starting on %s: operation %s failed '%s' (%d)",
parent->id, node->details->uname,
task, services_ocf_exitcode_str(rc), rc);
resource_location(parent, node, -INFINITY, "hard-error", data_set);
} else if(status == PCMK_LRM_OP_ERROR_FATAL) {
crm_err("Preventing %s from re-starting anywhere: operation %s failed '%s' (%d)",
parent->id, task, services_ocf_exitcode_str(rc), rc);
resource_location(parent, NULL, -INFINITY, "fatal-error", data_set);
}
}
break;
}
done:
pe_rsc_trace(rsc, "Resource %s after %s: role=%s, next=%s", rsc->id, task, role2text(rsc->role), role2text(rsc->next_role));
return TRUE;
}
static void
add_node_attrs(xmlNode *xml_obj, pe_node_t *node, bool overwrite,
pe_working_set_t *data_set)
{
const char *cluster_name = NULL;
g_hash_table_insert(node->details->attrs,
strdup(CRM_ATTR_UNAME), strdup(node->details->uname));
g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_ID),
strdup(node->details->id));
if (safe_str_eq(node->details->id, data_set->dc_uuid)) {
data_set->dc_node = node;
node->details->is_dc = TRUE;
g_hash_table_insert(node->details->attrs,
strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_TRUE));
} else {
g_hash_table_insert(node->details->attrs,
strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_FALSE));
}
cluster_name = g_hash_table_lookup(data_set->config_hash, "cluster-name");
if (cluster_name) {
g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_CLUSTER_NAME),
strdup(cluster_name));
}
unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_ATTR_SETS, NULL,
node->details->attrs, NULL, overwrite, data_set->now);
if (pe_node_attribute_raw(node, CRM_ATTR_SITE_NAME) == NULL) {
const char *site_name = pe_node_attribute_raw(node, "site-name");
if (site_name) {
g_hash_table_insert(node->details->attrs,
strdup(CRM_ATTR_SITE_NAME),
strdup(site_name));
} else if (cluster_name) {
/* Default to cluster-name if unset */
g_hash_table_insert(node->details->attrs,
strdup(CRM_ATTR_SITE_NAME),
strdup(cluster_name));
}
}
}
static GListPtr
extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter)
{
int counter = -1;
int stop_index = -1;
int start_index = -1;
xmlNode *rsc_op = NULL;
GListPtr gIter = NULL;
GListPtr op_list = NULL;
GListPtr sorted_op_list = NULL;
/* extract operations */
op_list = NULL;
sorted_op_list = NULL;
for (rsc_op = __xml_first_child_element(rsc_entry);
rsc_op != NULL; rsc_op = __xml_next_element(rsc_op)) {
if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) {
crm_xml_add(rsc_op, "resource", rsc);
crm_xml_add(rsc_op, XML_ATTR_UNAME, node);
op_list = g_list_prepend(op_list, rsc_op);
}
}
if (op_list == NULL) {
/* if there are no operations, there is nothing to do */
return NULL;
}
sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
/* create active recurring operations as optional */
if (active_filter == FALSE) {
return sorted_op_list;
}
op_list = NULL;
calculate_active_ops(sorted_op_list, &start_index, &stop_index);
for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
xmlNode *rsc_op = (xmlNode *) gIter->data;
counter++;
if (start_index < stop_index) {
crm_trace("Skipping %s: not active", ID(rsc_entry));
break;
} else if (counter < start_index) {
crm_trace("Skipping %s: old", ID(rsc_op));
continue;
}
op_list = g_list_append(op_list, rsc_op);
}
g_list_free(sorted_op_list);
return op_list;
}
GListPtr
find_operations(const char *rsc, const char *node, gboolean active_filter,
pe_working_set_t * data_set)
{
GListPtr output = NULL;
GListPtr intermediate = NULL;
xmlNode *tmp = NULL;
xmlNode *status = find_xml_node(data_set->input, XML_CIB_TAG_STATUS, TRUE);
node_t *this_node = NULL;
xmlNode *node_state = NULL;
for (node_state = __xml_first_child_element(status); node_state != NULL;
node_state = __xml_next_element(node_state)) {
if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) {
const char *uname = crm_element_value(node_state, XML_ATTR_UNAME);
if (node != NULL && safe_str_neq(uname, node)) {
continue;
}
this_node = pe_find_node(data_set->nodes, uname);
if(this_node == NULL) {
CRM_LOG_ASSERT(this_node != NULL);
continue;
} else if (pe__is_guest_or_remote_node(this_node)) {
determine_remote_online_status(data_set, this_node);
} else {
determine_online_status(node_state, this_node, data_set);
}
if (this_node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) {
/* offline nodes run no resources...
* unless stonith is enabled in which case we need to
* make sure rsc start events happen after the stonith
*/
xmlNode *lrm_rsc = NULL;
tmp = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE);
tmp = find_xml_node(tmp, XML_LRM_TAG_RESOURCES, FALSE);
for (lrm_rsc = __xml_first_child_element(tmp); lrm_rsc != NULL;
lrm_rsc = __xml_next_element(lrm_rsc)) {
if (crm_str_eq((const char *)lrm_rsc->name, XML_LRM_TAG_RESOURCE, TRUE)) {
const char *rsc_id = crm_element_value(lrm_rsc, XML_ATTR_ID);
if (rsc != NULL && safe_str_neq(rsc_id, rsc)) {
continue;
}
intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter);
output = g_list_concat(output, intermediate);
}
}
}
}
}
return output;
}
diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c
index 7cdd36d7a0..8749661eb1 100644
--- a/lib/pengine/utils.c
+++ b/lib/pengine/utils.c
@@ -1,2536 +1,2537 @@
/*
* Copyright 2004-2019 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
extern xmlNode *get_object_root(const char *object_type, xmlNode * the_root);
void print_str_str(gpointer key, gpointer value, gpointer user_data);
gboolean ghash_free_str_str(gpointer key, gpointer value, gpointer user_data);
void unpack_operation(action_t * action, xmlNode * xml_obj, resource_t * container,
pe_working_set_t * data_set);
static xmlNode *find_rsc_op_entry_helper(resource_t * rsc, const char *key,
gboolean include_disabled);
#if ENABLE_VERSIONED_ATTRS
pe_rsc_action_details_t *
pe_rsc_action_details(pe_action_t *action)
{
pe_rsc_action_details_t *details;
CRM_CHECK(action != NULL, return NULL);
if (action->action_details == NULL) {
action->action_details = calloc(1, sizeof(pe_rsc_action_details_t));
CRM_CHECK(action->action_details != NULL, return NULL);
}
details = (pe_rsc_action_details_t *) action->action_details;
if (details->versioned_parameters == NULL) {
details->versioned_parameters = create_xml_node(NULL,
XML_TAG_OP_VER_ATTRS);
}
if (details->versioned_meta == NULL) {
details->versioned_meta = create_xml_node(NULL, XML_TAG_OP_VER_META);
}
return details;
}
static void
pe_free_rsc_action_details(pe_action_t *action)
{
pe_rsc_action_details_t *details;
if ((action == NULL) || (action->action_details == NULL)) {
return;
}
details = (pe_rsc_action_details_t *) action->action_details;
if (details->versioned_parameters) {
free_xml(details->versioned_parameters);
}
if (details->versioned_meta) {
free_xml(details->versioned_meta);
}
action->action_details = NULL;
}
#endif
/*!
* \internal
* \brief Check whether we can fence a particular node
*
* \param[in] data_set Working set for cluster
* \param[in] node Name of node to check
*
* \return TRUE if node can be fenced, FALSE otherwise
*
* \note This function should only be called for cluster nodes and
* remote nodes; guest nodes are fenced by stopping their container
* resource, so fence execution requirements do not apply to them.
*/
bool pe_can_fence(pe_working_set_t * data_set, node_t *node)
{
if(is_not_set(data_set->flags, pe_flag_stonith_enabled)) {
return FALSE; /* Turned off */
} else if (is_not_set(data_set->flags, pe_flag_have_stonith_resource)) {
return FALSE; /* No devices */
} else if (is_set(data_set->flags, pe_flag_have_quorum)) {
return TRUE;
} else if (data_set->no_quorum_policy == no_quorum_ignore) {
return TRUE;
} else if(node == NULL) {
return FALSE;
} else if(node->details->online) {
crm_notice("We can fence %s without quorum because they're in our membership", node->details->uname);
return TRUE;
}
crm_trace("Cannot fence %s", node->details->uname);
return FALSE;
}
node_t *
node_copy(const node_t *this_node)
{
node_t *new_node = NULL;
CRM_CHECK(this_node != NULL, return NULL);
new_node = calloc(1, sizeof(node_t));
CRM_ASSERT(new_node != NULL);
crm_trace("Copying %p (%s) to %p", this_node, this_node->details->uname, new_node);
new_node->rsc_discover_mode = this_node->rsc_discover_mode;
new_node->weight = this_node->weight;
new_node->fixed = this_node->fixed;
new_node->details = this_node->details;
return new_node;
}
/* any node in list1 or list2 and not in the other gets a score of -INFINITY */
void
node_list_exclude(GHashTable * hash, GListPtr list, gboolean merge_scores)
{
GHashTable *result = hash;
node_t *other_node = NULL;
GListPtr gIter = list;
GHashTableIter iter;
node_t *node = NULL;
g_hash_table_iter_init(&iter, hash);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
other_node = pe_find_node_id(list, node->details->id);
if (other_node == NULL) {
node->weight = -INFINITY;
} else if (merge_scores) {
node->weight = merge_weights(node->weight, other_node->weight);
}
}
for (; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
other_node = pe_hash_table_lookup(result, node->details->id);
if (other_node == NULL) {
node_t *new_node = node_copy(node);
new_node->weight = -INFINITY;
g_hash_table_insert(result, (gpointer) new_node->details->id, new_node);
}
}
}
GHashTable *
node_hash_from_list(GListPtr list)
{
GListPtr gIter = list;
GHashTable *result = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL,
free);
for (; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
node_t *n = node_copy(node);
g_hash_table_insert(result, (gpointer) n->details->id, n);
}
return result;
}
GListPtr
node_list_dup(GListPtr list1, gboolean reset, gboolean filter)
{
GListPtr result = NULL;
GListPtr gIter = list1;
for (; gIter != NULL; gIter = gIter->next) {
node_t *new_node = NULL;
node_t *this_node = (node_t *) gIter->data;
if (filter && this_node->weight < 0) {
continue;
}
new_node = node_copy(this_node);
if (reset) {
new_node->weight = 0;
}
if (new_node != NULL) {
result = g_list_prepend(result, new_node);
}
}
return result;
}
gint
sort_node_uname(gconstpointer a, gconstpointer b)
{
const char *name_a = ((const node_t *) a)->details->uname;
const char *name_b = ((const node_t *) b)->details->uname;
while (*name_a && *name_b) {
if (isdigit(*name_a) && isdigit(*name_b)) {
// If node names contain a number, sort numerically
char *end_a = NULL;
char *end_b = NULL;
long num_a = strtol(name_a, &end_a, 10);
long num_b = strtol(name_b, &end_b, 10);
// allow ordering e.g. 007 > 7
size_t len_a = end_a - name_a;
size_t len_b = end_b - name_b;
if (num_a < num_b) {
return -1;
} else if (num_a > num_b) {
return 1;
} else if (len_a < len_b) {
return -1;
} else if (len_a > len_b) {
return 1;
}
name_a = end_a;
name_b = end_b;
} else {
// Compare non-digits case-insensitively
int lower_a = tolower(*name_a);
int lower_b = tolower(*name_b);
if (lower_a < lower_b) {
return -1;
} else if (lower_a > lower_b) {
return 1;
}
++name_a;
++name_b;
}
}
if (!*name_a && *name_b) {
return -1;
} else if (*name_a && !*name_b) {
return 1;
}
return 0;
}
void
dump_node_scores_worker(int level, const char *file, const char *function, int line,
resource_t * rsc, const char *comment, GHashTable * nodes)
{
GHashTable *hash = nodes;
GHashTableIter iter;
node_t *node = NULL;
if (rsc) {
hash = rsc->allowed_nodes;
}
if (rsc && is_set(rsc->flags, pe_rsc_orphan)) {
/* Don't show the allocation scores for orphans */
return;
}
if (level == 0) {
char score[128];
int len = sizeof(score);
/* For now we want this in sorted order to keep the regression tests happy */
GListPtr gIter = NULL;
GListPtr list = g_hash_table_get_values(hash);
list = g_list_sort(list, sort_node_uname);
gIter = list;
for (; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
/* This function is called a whole lot, use stack allocated score */
score2char_stack(node->weight, score, len);
if (rsc) {
printf("%s: %s allocation score on %s: %s\n",
comment, rsc->id, node->details->uname, score);
} else {
printf("%s: %s = %s\n", comment, node->details->uname, score);
}
}
g_list_free(list);
} else if (hash) {
char score[128];
int len = sizeof(score);
g_hash_table_iter_init(&iter, hash);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
/* This function is called a whole lot, use stack allocated score */
score2char_stack(node->weight, score, len);
if (rsc) {
do_crm_log_alias(LOG_TRACE, file, function, line,
"%s: %s allocation score on %s: %s", comment, rsc->id,
node->details->uname, score);
} else {
do_crm_log_alias(LOG_TRACE, file, function, line + 1, "%s: %s = %s", comment,
node->details->uname, score);
}
}
}
if (rsc && rsc->children) {
GListPtr gIter = NULL;
gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
resource_t *child = (resource_t *) gIter->data;
dump_node_scores_worker(level, file, function, line, child, comment, nodes);
}
}
}
static void
append_dump_text(gpointer key, gpointer value, gpointer user_data)
{
char **dump_text = user_data;
char *new_text = crm_strdup_printf("%s %s=%s",
*dump_text, (char *)key, (char *)value);
free(*dump_text);
*dump_text = new_text;
}
void
dump_node_capacity(int level, const char *comment, node_t * node)
{
char *dump_text = crm_strdup_printf("%s: %s capacity:",
comment, node->details->uname);
g_hash_table_foreach(node->details->utilization, append_dump_text, &dump_text);
if (level == 0) {
fprintf(stdout, "%s\n", dump_text);
} else {
crm_trace("%s", dump_text);
}
free(dump_text);
}
void
dump_rsc_utilization(int level, const char *comment, resource_t * rsc, node_t * node)
{
char *dump_text = crm_strdup_printf("%s: %s utilization on %s:",
comment, rsc->id, node->details->uname);
g_hash_table_foreach(rsc->utilization, append_dump_text, &dump_text);
if (level == 0) {
fprintf(stdout, "%s\n", dump_text);
} else {
crm_trace("%s", dump_text);
}
free(dump_text);
}
gint
sort_rsc_index(gconstpointer a, gconstpointer b)
{
const resource_t *resource1 = (const resource_t *)a;
const resource_t *resource2 = (const resource_t *)b;
if (a == NULL && b == NULL) {
return 0;
}
if (a == NULL) {
return 1;
}
if (b == NULL) {
return -1;
}
if (resource1->sort_index > resource2->sort_index) {
return -1;
}
if (resource1->sort_index < resource2->sort_index) {
return 1;
}
return 0;
}
gint
sort_rsc_priority(gconstpointer a, gconstpointer b)
{
const resource_t *resource1 = (const resource_t *)a;
const resource_t *resource2 = (const resource_t *)b;
if (a == NULL && b == NULL) {
return 0;
}
if (a == NULL) {
return 1;
}
if (b == NULL) {
return -1;
}
if (resource1->priority > resource2->priority) {
return -1;
}
if (resource1->priority < resource2->priority) {
return 1;
}
return 0;
}
action_t *
custom_action(resource_t * rsc, char *key, const char *task,
node_t * on_node, gboolean optional, gboolean save_action,
pe_working_set_t * data_set)
{
action_t *action = NULL;
GListPtr possible_matches = NULL;
CRM_CHECK(key != NULL, return NULL);
CRM_CHECK(task != NULL, free(key); return NULL);
if (save_action && rsc != NULL) {
possible_matches = find_actions(rsc->actions, key, on_node);
} else if(save_action) {
#if 0
action = g_hash_table_lookup(data_set->singletons, key);
#else
/* More expensive but takes 'node' into account */
possible_matches = find_actions(data_set->actions, key, on_node);
#endif
}
if(data_set->singletons == NULL) {
data_set->singletons = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, NULL);
}
if (possible_matches != NULL) {
if (g_list_length(possible_matches) > 1) {
pe_warn("Action %s for %s on %s exists %d times",
task, rsc ? rsc->id : "",
on_node ? on_node->details->uname : "", g_list_length(possible_matches));
}
action = g_list_nth_data(possible_matches, 0);
pe_rsc_trace(rsc, "Found existing action %d (%s) for %s (%s) on %s",
action->id, action->uuid,
(rsc? rsc->id : "no resource"), task,
(on_node? on_node->details->uname : "no node"));
g_list_free(possible_matches);
}
if (action == NULL) {
if (save_action) {
pe_rsc_trace(rsc, "Creating %s action %d: %s for %s (%s) on %s",
(optional? "optional" : " mandatory"),
data_set->action_id, key,
(rsc? rsc->id : "no resource"), task,
(on_node? on_node->details->uname : "no node"));
}
action = calloc(1, sizeof(action_t));
if (save_action) {
action->id = data_set->action_id++;
} else {
action->id = 0;
}
action->rsc = rsc;
CRM_ASSERT(task != NULL);
action->task = strdup(task);
if (on_node) {
action->node = node_copy(on_node);
}
action->uuid = strdup(key);
pe_set_action_bit(action, pe_action_runnable);
if (optional) {
pe_set_action_bit(action, pe_action_optional);
} else {
pe_clear_action_bit(action, pe_action_optional);
}
action->extra = crm_str_table_new();
action->meta = crm_str_table_new();
if (save_action) {
data_set->actions = g_list_prepend(data_set->actions, action);
if(rsc == NULL) {
g_hash_table_insert(data_set->singletons, action->uuid, action);
}
}
if (rsc != NULL) {
action->op_entry = find_rsc_op_entry_helper(rsc, key, TRUE);
unpack_operation(action, action->op_entry, rsc->container, data_set);
if (save_action) {
rsc->actions = g_list_prepend(rsc->actions, action);
}
}
if (save_action) {
pe_rsc_trace(rsc, "Action %d created", action->id);
}
}
if (!optional && is_set(action->flags, pe_action_optional)) {
pe_rsc_trace(rsc, "Unset optional on action %d", action->id);
pe_clear_action_bit(action, pe_action_optional);
}
if (rsc != NULL) {
enum action_tasks a_task = text2task(action->task);
int warn_level = LOG_TRACE;
if (save_action) {
warn_level = LOG_WARNING;
}
if (is_set(action->flags, pe_action_have_node_attrs) == FALSE
&& action->node != NULL && action->op_entry != NULL) {
pe_set_action_bit(action, pe_action_have_node_attrs);
unpack_instance_attributes(data_set->input, action->op_entry, XML_TAG_ATTR_SETS,
action->node->details->attrs,
action->extra, NULL, FALSE, data_set->now);
}
if (is_set(action->flags, pe_action_pseudo)) {
/* leave untouched */
} else if (action->node == NULL) {
pe_rsc_trace(rsc, "Unset runnable on %s", action->uuid);
pe_clear_action_bit(action, pe_action_runnable);
} else if (is_not_set(rsc->flags, pe_rsc_managed)
&& g_hash_table_lookup(action->meta,
XML_LRM_ATTR_INTERVAL_MS) == NULL) {
crm_debug("Action %s (unmanaged)", action->uuid);
pe_rsc_trace(rsc, "Set optional on %s", action->uuid);
pe_set_action_bit(action, pe_action_optional);
/* action->runnable = FALSE; */
} else if (action->node->details->online == FALSE
&& (!pe__is_guest_node(action->node)
|| action->node->details->remote_requires_reset)) {
pe_clear_action_bit(action, pe_action_runnable);
do_crm_log(warn_level, "Action %s on %s is unrunnable (offline)",
action->uuid, action->node->details->uname);
if (is_set(action->rsc->flags, pe_rsc_managed)
&& save_action && a_task == stop_rsc
&& action->node->details->unclean == FALSE) {
pe_fence_node(data_set, action->node, "resource actions are unrunnable");
}
} else if (action->node->details->pending) {
pe_clear_action_bit(action, pe_action_runnable);
do_crm_log(warn_level, "Action %s on %s is unrunnable (pending)",
action->uuid, action->node->details->uname);
} else if (action->needs == rsc_req_nothing) {
pe_rsc_trace(rsc, "Action %s does not require anything", action->uuid);
pe_action_set_reason(action, NULL, TRUE);
pe_set_action_bit(action, pe_action_runnable);
#if 0
/*
* No point checking this
* - if we don't have quorum we can't stonith anyway
*/
} else if (action->needs == rsc_req_stonith) {
crm_trace("Action %s requires only stonith", action->uuid);
action->runnable = TRUE;
#endif
} else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE
&& data_set->no_quorum_policy == no_quorum_stop) {
pe_action_set_flag_reason(__FUNCTION__, __LINE__, action, NULL, "no quorum", pe_action_runnable, TRUE);
crm_debug("%s\t%s (cancelled : quorum)", action->node->details->uname, action->uuid);
} else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE
&& data_set->no_quorum_policy == no_quorum_freeze) {
pe_rsc_trace(rsc, "Check resource is already active: %s %s %s %s", rsc->id, action->uuid, role2text(rsc->next_role), role2text(rsc->role));
if (rsc->fns->active(rsc, TRUE) == FALSE || rsc->next_role > rsc->role) {
pe_action_set_flag_reason(__FUNCTION__, __LINE__, action, NULL, "quorum freeze", pe_action_runnable, TRUE);
pe_rsc_debug(rsc, "%s\t%s (cancelled : quorum freeze)",
action->node->details->uname, action->uuid);
}
} else if(is_not_set(action->flags, pe_action_runnable)) {
pe_rsc_trace(rsc, "Action %s is runnable", action->uuid);
//pe_action_set_reason(action, NULL, TRUE);
pe_set_action_bit(action, pe_action_runnable);
}
if (save_action) {
switch (a_task) {
case stop_rsc:
set_bit(rsc->flags, pe_rsc_stopping);
break;
case start_rsc:
clear_bit(rsc->flags, pe_rsc_starting);
if (is_set(action->flags, pe_action_runnable)) {
set_bit(rsc->flags, pe_rsc_starting);
}
break;
default:
break;
}
}
}
free(key);
return action;
}
static const char *
unpack_operation_on_fail(action_t * action)
{
const char *value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ON_FAIL);
if (safe_str_eq(action->task, CRMD_ACTION_STOP) && safe_str_eq(value, "standby")) {
crm_config_err("on-fail=standby is not allowed for stop actions: %s", action->rsc->id);
return NULL;
} else if (safe_str_eq(action->task, CRMD_ACTION_DEMOTE) && !value) {
/* demote on_fail defaults to master monitor value if present */
xmlNode *operation = NULL;
const char *name = NULL;
const char *role = NULL;
const char *on_fail = NULL;
const char *interval_spec = NULL;
const char *enabled = NULL;
CRM_CHECK(action->rsc != NULL, return NULL);
for (operation = __xml_first_child_element(action->rsc->ops_xml);
operation && !value; operation = __xml_next_element(operation)) {
if (!crm_str_eq((const char *)operation->name, "op", TRUE)) {
continue;
}
name = crm_element_value(operation, "name");
role = crm_element_value(operation, "role");
on_fail = crm_element_value(operation, XML_OP_ATTR_ON_FAIL);
enabled = crm_element_value(operation, "enabled");
interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL);
if (!on_fail) {
continue;
} else if (enabled && !crm_is_true(enabled)) {
continue;
} else if (safe_str_neq(name, "monitor") || safe_str_neq(role, "Master")) {
continue;
} else if (crm_parse_interval_spec(interval_spec) == 0) {
continue;
}
value = on_fail;
}
}
return value;
}
static xmlNode *
find_min_interval_mon(resource_t * rsc, gboolean include_disabled)
{
guint interval_ms = 0;
guint min_interval_ms = G_MAXUINT;
const char *name = NULL;
const char *value = NULL;
const char *interval_spec = NULL;
xmlNode *op = NULL;
xmlNode *operation = NULL;
for (operation = __xml_first_child_element(rsc->ops_xml); operation != NULL;
operation = __xml_next_element(operation)) {
if (crm_str_eq((const char *)operation->name, "op", TRUE)) {
name = crm_element_value(operation, "name");
interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL);
value = crm_element_value(operation, "enabled");
if (!include_disabled && value && crm_is_true(value) == FALSE) {
continue;
}
if (safe_str_neq(name, RSC_STATUS)) {
continue;
}
interval_ms = crm_parse_interval_spec(interval_spec);
if (interval_ms && (interval_ms < min_interval_ms)) {
min_interval_ms = interval_ms;
op = operation;
}
}
}
return op;
}
static int
unpack_start_delay(const char *value, GHashTable *meta)
{
int start_delay = 0;
if (value != NULL) {
start_delay = crm_get_msec(value);
if (start_delay < 0) {
start_delay = 0;
}
if (meta) {
g_hash_table_replace(meta, strdup(XML_OP_ATTR_START_DELAY), crm_itoa(start_delay));
}
}
return start_delay;
}
static int
unpack_interval_origin(const char *value, GHashTable *meta, xmlNode *xml_obj,
guint interval_ms, crm_time_t *now)
{
int start_delay = 0;
if ((interval_ms > 0) && (value != NULL)) {
crm_time_t *origin = crm_time_new(value);
if (origin && now) {
crm_time_t *delay = NULL;
int rc = crm_time_compare(origin, now);
long long delay_s = 0;
int interval_sec = interval_ms / 1000;
crm_trace("Origin: %s, interval: %d", value, interval_sec);
/* If 'origin' is in the future, find the most recent "multiple" that occurred in the past */
while(rc > 0) {
crm_time_add_seconds(origin, -interval_sec);
rc = crm_time_compare(origin, now);
}
/* Now find the first "multiple" that occurs after 'now' */
while (rc < 0) {
crm_time_add_seconds(origin, interval_sec);
rc = crm_time_compare(origin, now);
}
delay = crm_time_calculate_duration(origin, now);
crm_time_log(LOG_TRACE, "origin", origin,
crm_time_log_date | crm_time_log_timeofday |
crm_time_log_with_timezone);
crm_time_log(LOG_TRACE, "now", now,
crm_time_log_date | crm_time_log_timeofday |
crm_time_log_with_timezone);
crm_time_log(LOG_TRACE, "delay", delay, crm_time_log_duration);
delay_s = crm_time_get_seconds(delay);
if (delay_s < 0) {
delay_s = 0;
}
start_delay = delay_s * 1000;
if (xml_obj) {
crm_info("Calculated a start delay of %llds for %s", delay_s, ID(xml_obj));
}
if (meta) {
g_hash_table_replace(meta, strdup(XML_OP_ATTR_START_DELAY),
crm_itoa(start_delay));
}
crm_time_free(origin);
crm_time_free(delay);
} else if (!origin && xml_obj) {
crm_config_err("Operation %s contained an invalid " XML_OP_ATTR_ORIGIN ": %s",
ID(xml_obj), value);
}
}
return start_delay;
}
static int
unpack_timeout(const char *value)
{
int timeout = crm_get_msec(value);
if (timeout < 0) {
timeout = crm_get_msec(CRM_DEFAULT_OP_TIMEOUT_S);
}
return timeout;
}
int
pe_get_configured_timeout(resource_t *rsc, const char *action, pe_working_set_t *data_set)
{
xmlNode *child = NULL;
const char *timeout = NULL;
int timeout_ms = 0;
for (child = first_named_child(rsc->ops_xml, XML_ATTR_OP);
child != NULL; child = crm_next_same_xml(child)) {
if (safe_str_eq(action, crm_element_value(child, XML_NVPAIR_ATTR_NAME))) {
timeout = crm_element_value(child, XML_ATTR_TIMEOUT);
break;
}
}
if (timeout == NULL && data_set->op_defaults) {
GHashTable *action_meta = crm_str_table_new();
unpack_instance_attributes(data_set->input, data_set->op_defaults, XML_TAG_META_SETS,
NULL, action_meta, NULL, FALSE, data_set->now);
timeout = g_hash_table_lookup(action_meta, XML_ATTR_TIMEOUT);
}
// @TODO check meta-attributes (including versioned meta-attributes)
// @TODO maybe use min-interval monitor timeout as default for monitors
timeout_ms = crm_get_msec(timeout);
if (timeout_ms < 0) {
timeout_ms = crm_get_msec(CRM_DEFAULT_OP_TIMEOUT_S);
}
return timeout_ms;
}
#if ENABLE_VERSIONED_ATTRS
static void
unpack_versioned_meta(xmlNode *versioned_meta, xmlNode *xml_obj,
guint interval_ms, crm_time_t *now)
{
xmlNode *attrs = NULL;
xmlNode *attr = NULL;
for (attrs = __xml_first_child_element(versioned_meta); attrs != NULL;
attrs = __xml_next_element(attrs)) {
for (attr = __xml_first_child_element(attrs); attr != NULL;
attr = __xml_next_element(attr)) {
const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME);
const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE);
if (safe_str_eq(name, XML_OP_ATTR_START_DELAY)) {
int start_delay = unpack_start_delay(value, NULL);
crm_xml_add_int(attr, XML_NVPAIR_ATTR_VALUE, start_delay);
} else if (safe_str_eq(name, XML_OP_ATTR_ORIGIN)) {
int start_delay = unpack_interval_origin(value, NULL, xml_obj,
interval_ms, now);
crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, XML_OP_ATTR_START_DELAY);
crm_xml_add_int(attr, XML_NVPAIR_ATTR_VALUE, start_delay);
} else if (safe_str_eq(name, XML_ATTR_TIMEOUT)) {
int timeout = unpack_timeout(value);
crm_xml_add_int(attr, XML_NVPAIR_ATTR_VALUE, timeout);
}
}
}
}
#endif
/*!
* \brief Unpack operation XML into an action structure
*
* Unpack an operation's meta-attributes (normalizing the interval, timeout,
* and start delay values as integer milliseconds), requirements, and
* failure policy.
*
* \param[in,out] action Action to unpack into
* \param[in] xml_obj Operation XML (or NULL if all defaults)
* \param[in] container Resource that contains affected resource, if any
* \param[in] data_set Cluster state
*/
void
unpack_operation(action_t * action, xmlNode * xml_obj, resource_t * container,
pe_working_set_t * data_set)
{
guint interval_ms = 0;
int timeout = 0;
char *value_ms = NULL;
const char *value = NULL;
const char *field = NULL;
char *default_timeout = NULL;
#if ENABLE_VERSIONED_ATTRS
pe_rsc_action_details_t *rsc_details = NULL;
#endif
CRM_CHECK(action && action->rsc, return);
// Cluster-wide
unpack_instance_attributes(data_set->input, data_set->op_defaults, XML_TAG_META_SETS, NULL,
action->meta, NULL, FALSE, data_set->now);
// Probe timeouts default differently, so handle timeout default later
default_timeout = g_hash_table_lookup(action->meta, XML_ATTR_TIMEOUT);
if (default_timeout) {
default_timeout = strdup(default_timeout);
g_hash_table_remove(action->meta, XML_ATTR_TIMEOUT);
}
if (xml_obj) {
xmlAttrPtr xIter = NULL;
// take precedence over defaults
unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_META_SETS,
NULL, action->meta, NULL, TRUE,
data_set->now);
#if ENABLE_VERSIONED_ATTRS
rsc_details = pe_rsc_action_details(action);
pe_unpack_versioned_attributes(data_set->input, xml_obj,
XML_TAG_ATTR_SETS, NULL,
rsc_details->versioned_parameters,
data_set->now);
pe_unpack_versioned_attributes(data_set->input, xml_obj,
XML_TAG_META_SETS, NULL,
rsc_details->versioned_meta,
data_set->now);
#endif
/* Anything set as an XML property has highest precedence.
* This ensures we use the name and interval from the tag.
*/
for (xIter = xml_obj->properties; xIter; xIter = xIter->next) {
const char *prop_name = (const char *)xIter->name;
const char *prop_value = crm_element_value(xml_obj, prop_name);
g_hash_table_replace(action->meta, strdup(prop_name), strdup(prop_value));
}
}
g_hash_table_remove(action->meta, "id");
// Normalize interval to milliseconds
field = XML_LRM_ATTR_INTERVAL;
value = g_hash_table_lookup(action->meta, field);
if (value != NULL) {
interval_ms = crm_parse_interval_spec(value);
} else if ((xml_obj == NULL) && !strcmp(action->task, RSC_STATUS)) {
/* An orphaned recurring monitor will not have any XML. However, we
* want the interval to be set, so the action can be properly detected
* as a recurring monitor. Parse it from the key in this case.
*/
parse_op_key(action->uuid, NULL, NULL, &interval_ms);
}
if (interval_ms > 0) {
value_ms = crm_strdup_printf("%u", interval_ms);
g_hash_table_replace(action->meta, strdup(field), value_ms);
} else if (value) {
g_hash_table_remove(action->meta, field);
}
// Handle timeout default, now that we know the interval
if (g_hash_table_lookup(action->meta, XML_ATTR_TIMEOUT)) {
free(default_timeout);
} else {
// Probe timeouts default to minimum-interval monitor's
if (safe_str_eq(action->task, RSC_STATUS) && (interval_ms == 0)) {
xmlNode *min_interval_mon = find_min_interval_mon(action->rsc, FALSE);
if (min_interval_mon) {
value = crm_element_value(min_interval_mon, XML_ATTR_TIMEOUT);
if (value) {
crm_trace("\t%s defaults to minimum-interval monitor's timeout '%s'",
action->uuid, value);
free(default_timeout);
default_timeout = strdup(value);
}
}
}
if (default_timeout) {
g_hash_table_insert(action->meta, strdup(XML_ATTR_TIMEOUT),
default_timeout);
}
}
if (safe_str_neq(action->task, RSC_START)
&& safe_str_neq(action->task, RSC_PROMOTE)) {
action->needs = rsc_req_nothing;
value = "nothing (not start/promote)";
} else if (is_set(action->rsc->flags, pe_rsc_needs_fencing)) {
action->needs = rsc_req_stonith;
value = "fencing (resource)";
} else if (is_set(action->rsc->flags, pe_rsc_needs_quorum)) {
action->needs = rsc_req_quorum;
value = "quorum (resource)";
} else {
action->needs = rsc_req_nothing;
value = "nothing (resource)";
}
pe_rsc_trace(action->rsc, "\tAction %s requires: %s", action->uuid, value);
value = unpack_operation_on_fail(action);
if (value == NULL) {
} else if (safe_str_eq(value, "block")) {
action->on_fail = action_fail_block;
g_hash_table_insert(action->meta, strdup(XML_OP_ATTR_ON_FAIL), strdup("block"));
value = "block"; // The above could destroy the original string
} else if (safe_str_eq(value, "fence")) {
action->on_fail = action_fail_fence;
value = "node fencing";
if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) {
crm_config_err("Specifying on_fail=fence and" " stonith-enabled=false makes no sense");
action->on_fail = action_fail_stop;
action->fail_role = RSC_ROLE_STOPPED;
value = "stop resource";
}
} else if (safe_str_eq(value, "standby")) {
action->on_fail = action_fail_standby;
value = "node standby";
} else if (safe_str_eq(value, "ignore")
|| safe_str_eq(value, "nothing")) {
action->on_fail = action_fail_ignore;
value = "ignore";
} else if (safe_str_eq(value, "migrate")) {
action->on_fail = action_fail_migrate;
value = "force migration";
} else if (safe_str_eq(value, "stop")) {
action->on_fail = action_fail_stop;
action->fail_role = RSC_ROLE_STOPPED;
value = "stop resource";
} else if (safe_str_eq(value, "restart")) {
action->on_fail = action_fail_recover;
value = "restart (and possibly migrate)";
} else if (safe_str_eq(value, "restart-container")) {
if (container) {
action->on_fail = action_fail_restart_container;
value = "restart container (and possibly migrate)";
} else {
value = NULL;
}
} else {
pe_err("Resource %s: Unknown failure type (%s)", action->rsc->id, value);
value = NULL;
}
/* defaults */
if (value == NULL && container) {
action->on_fail = action_fail_restart_container;
value = "restart container (and possibly migrate) (default)";
/* For remote nodes, ensure that any failure that results in dropping an
* active connection to the node results in fencing of the node.
*
* There are only two action failures that don't result in fencing.
* 1. probes - probe failures are expected.
* 2. start - a start failure indicates that an active connection does not already
* exist. The user can set op on-fail=fence if they really want to fence start
* failures. */
} else if (((value == NULL) || !is_set(action->rsc->flags, pe_rsc_managed)) &&
(pe__resource_is_remote_conn(action->rsc, data_set) &&
!(safe_str_eq(action->task, CRMD_ACTION_STATUS) && (interval_ms == 0)) &&
(safe_str_neq(action->task, CRMD_ACTION_START)))) {
if (!is_set(action->rsc->flags, pe_rsc_managed)) {
action->on_fail = action_fail_stop;
action->fail_role = RSC_ROLE_STOPPED;
value = "stop unmanaged remote node (enforcing default)";
} else {
if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
value = "fence remote node (default)";
} else {
value = "recover remote node connection (default)";
}
if (action->rsc->remote_reconnect_ms) {
action->fail_role = RSC_ROLE_STOPPED;
}
action->on_fail = action_fail_reset_remote;
}
} else if (value == NULL && safe_str_eq(action->task, CRMD_ACTION_STOP)) {
if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
action->on_fail = action_fail_fence;
value = "resource fence (default)";
} else {
action->on_fail = action_fail_block;
value = "resource block (default)";
}
} else if (value == NULL) {
action->on_fail = action_fail_recover;
value = "restart (and possibly migrate) (default)";
}
pe_rsc_trace(action->rsc, "\t%s failure handling: %s", action->task, value);
value = NULL;
if (xml_obj != NULL) {
value = g_hash_table_lookup(action->meta, "role_after_failure");
if (value) {
pe_warn_once(pe_wo_role_after,
"Support for role_after_failure is deprecated and will be removed in a future release");
}
}
if (value != NULL && action->fail_role == RSC_ROLE_UNKNOWN) {
action->fail_role = text2role(value);
}
/* defaults */
if (action->fail_role == RSC_ROLE_UNKNOWN) {
if (safe_str_eq(action->task, CRMD_ACTION_PROMOTE)) {
action->fail_role = RSC_ROLE_SLAVE;
} else {
action->fail_role = RSC_ROLE_STARTED;
}
}
pe_rsc_trace(action->rsc, "\t%s failure results in: %s", action->task,
role2text(action->fail_role));
value = g_hash_table_lookup(action->meta, XML_OP_ATTR_START_DELAY);
if (value) {
unpack_start_delay(value, action->meta);
} else {
value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ORIGIN);
unpack_interval_origin(value, action->meta, xml_obj, interval_ms,
data_set->now);
}
value = g_hash_table_lookup(action->meta, XML_ATTR_TIMEOUT);
timeout = unpack_timeout(value);
g_hash_table_replace(action->meta, strdup(XML_ATTR_TIMEOUT), crm_itoa(timeout));
#if ENABLE_VERSIONED_ATTRS
unpack_versioned_meta(rsc_details->versioned_meta, xml_obj, interval_ms,
data_set->now);
#endif
}
static xmlNode *
find_rsc_op_entry_helper(resource_t * rsc, const char *key, gboolean include_disabled)
{
guint interval_ms = 0;
gboolean do_retry = TRUE;
char *local_key = NULL;
const char *name = NULL;
const char *value = NULL;
const char *interval_spec = NULL;
char *match_key = NULL;
xmlNode *op = NULL;
xmlNode *operation = NULL;
retry:
for (operation = __xml_first_child_element(rsc->ops_xml); operation != NULL;
operation = __xml_next_element(operation)) {
if (crm_str_eq((const char *)operation->name, "op", TRUE)) {
name = crm_element_value(operation, "name");
interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL);
value = crm_element_value(operation, "enabled");
if (!include_disabled && value && crm_is_true(value) == FALSE) {
continue;
}
interval_ms = crm_parse_interval_spec(interval_spec);
match_key = generate_op_key(rsc->id, name, interval_ms);
if (safe_str_eq(key, match_key)) {
op = operation;
}
free(match_key);
if (rsc->clone_name) {
match_key = generate_op_key(rsc->clone_name, name, interval_ms);
if (safe_str_eq(key, match_key)) {
op = operation;
}
free(match_key);
}
if (op != NULL) {
free(local_key);
return op;
}
}
}
free(local_key);
if (do_retry == FALSE) {
return NULL;
}
do_retry = FALSE;
if (strstr(key, CRMD_ACTION_MIGRATE) || strstr(key, CRMD_ACTION_MIGRATED)) {
local_key = generate_op_key(rsc->id, "migrate", 0);
key = local_key;
goto retry;
} else if (strstr(key, "_notify_")) {
local_key = generate_op_key(rsc->id, "notify", 0);
key = local_key;
goto retry;
}
return NULL;
}
xmlNode *
find_rsc_op_entry(resource_t * rsc, const char *key)
{
return find_rsc_op_entry_helper(rsc, key, FALSE);
}
void
print_node(const char *pre_text, node_t * node, gboolean details)
{
if (node == NULL) {
crm_trace("%s%s: ", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": ");
return;
}
CRM_ASSERT(node->details);
crm_trace("%s%s%sNode %s: (weight=%d, fixed=%s)",
pre_text == NULL ? "" : pre_text,
pre_text == NULL ? "" : ": ",
node->details->online ? "" : "Unavailable/Unclean ",
node->details->uname, node->weight, node->fixed ? "True" : "False");
if (details) {
char *pe_mutable = strdup("\t\t");
GListPtr gIter = node->details->running_rsc;
crm_trace("\t\t===Node Attributes");
g_hash_table_foreach(node->details->attrs, print_str_str, pe_mutable);
free(pe_mutable);
crm_trace("\t\t=== Resources");
for (; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
print_resource(LOG_TRACE, "\t\t", rsc, FALSE);
}
}
}
/*
* Used by the HashTable for-loop
*/
void
print_str_str(gpointer key, gpointer value, gpointer user_data)
{
crm_trace("%s%s %s ==> %s",
user_data == NULL ? "" : (char *)user_data,
user_data == NULL ? "" : ": ", (char *)key, (char *)value);
}
void
print_resource(int log_level, const char *pre_text, resource_t * rsc, gboolean details)
{
long options = pe_print_log | pe_print_pending;
if (rsc == NULL) {
do_crm_log(log_level - 1, "%s%s: ",
pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": ");
return;
}
if (details) {
options |= pe_print_details;
}
rsc->fns->print(rsc, pre_text, options, &log_level);
}
void
pe_free_action(action_t * action)
{
if (action == NULL) {
return;
}
g_list_free_full(action->actions_before, free); /* action_wrapper_t* */
g_list_free_full(action->actions_after, free); /* action_wrapper_t* */
if (action->extra) {
g_hash_table_destroy(action->extra);
}
if (action->meta) {
g_hash_table_destroy(action->meta);
}
#if ENABLE_VERSIONED_ATTRS
if (action->rsc) {
pe_free_rsc_action_details(action);
}
#endif
free(action->cancel_task);
free(action->reason);
free(action->task);
free(action->uuid);
free(action->node);
free(action);
}
GListPtr
find_recurring_actions(GListPtr input, node_t * not_on_node)
{
const char *value = NULL;
GListPtr result = NULL;
GListPtr gIter = input;
CRM_CHECK(input != NULL, return NULL);
for (; gIter != NULL; gIter = gIter->next) {
action_t *action = (action_t *) gIter->data;
value = g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL_MS);
if (value == NULL) {
/* skip */
} else if (safe_str_eq(value, "0")) {
/* skip */
} else if (safe_str_eq(CRMD_ACTION_CANCEL, action->task)) {
/* skip */
} else if (not_on_node == NULL) {
crm_trace("(null) Found: %s", action->uuid);
result = g_list_prepend(result, action);
} else if (action->node == NULL) {
/* skip */
} else if (action->node->details != not_on_node->details) {
crm_trace("Found: %s", action->uuid);
result = g_list_prepend(result, action);
}
}
return result;
}
enum action_tasks
get_complex_task(resource_t * rsc, const char *name, gboolean allow_non_atomic)
{
enum action_tasks task = text2task(name);
if (rsc == NULL) {
return task;
} else if (allow_non_atomic == FALSE || rsc->variant == pe_native) {
switch (task) {
case stopped_rsc:
case started_rsc:
case action_demoted:
case action_promoted:
crm_trace("Folding %s back into its atomic counterpart for %s", name, rsc->id);
return task - 1;
break;
default:
break;
}
}
return task;
}
action_t *
find_first_action(GListPtr input, const char *uuid, const char *task, node_t * on_node)
{
GListPtr gIter = NULL;
CRM_CHECK(uuid || task, return NULL);
for (gIter = input; gIter != NULL; gIter = gIter->next) {
action_t *action = (action_t *) gIter->data;
if (uuid != NULL && safe_str_neq(uuid, action->uuid)) {
continue;
} else if (task != NULL && safe_str_neq(task, action->task)) {
continue;
} else if (on_node == NULL) {
return action;
} else if (action->node == NULL) {
continue;
} else if (on_node->details == action->node->details) {
return action;
}
}
return NULL;
}
GListPtr
find_actions(GListPtr input, const char *key, const node_t *on_node)
{
GListPtr gIter = input;
GListPtr result = NULL;
CRM_CHECK(key != NULL, return NULL);
for (; gIter != NULL; gIter = gIter->next) {
action_t *action = (action_t *) gIter->data;
if (safe_str_neq(key, action->uuid)) {
crm_trace("%s does not match action %s", key, action->uuid);
continue;
} else if (on_node == NULL) {
crm_trace("Action %s matches (ignoring node)", key);
result = g_list_prepend(result, action);
} else if (action->node == NULL) {
crm_trace("Action %s matches (unallocated, assigning to %s)",
key, on_node->details->uname);
action->node = node_copy(on_node);
result = g_list_prepend(result, action);
} else if (on_node->details == action->node->details) {
crm_trace("Action %s on %s matches", key, on_node->details->uname);
result = g_list_prepend(result, action);
} else {
crm_trace("Action %s on node %s does not match requested node %s",
key, action->node->details->uname,
on_node->details->uname);
}
}
return result;
}
GList *
find_actions_exact(GList *input, const char *key, const pe_node_t *on_node)
{
GList *result = NULL;
CRM_CHECK(key != NULL, return NULL);
if (on_node == NULL) {
crm_trace("Not searching for action %s because node not specified",
key);
return NULL;
}
for (GList *gIter = input; gIter != NULL; gIter = gIter->next) {
pe_action_t *action = (pe_action_t *) gIter->data;
if (action->node == NULL) {
crm_trace("Skipping comparison of %s vs action %s without node",
key, action->uuid);
} else if (safe_str_neq(key, action->uuid)) {
crm_trace("Desired action %s doesn't match %s", key, action->uuid);
} else if (safe_str_neq(on_node->details->id,
action->node->details->id)) {
crm_trace("Action %s desired node ID %s doesn't match %s",
key, on_node->details->id, action->node->details->id);
} else {
crm_trace("Action %s matches", key);
result = g_list_prepend(result, action);
}
}
return result;
}
/*!
* \brief Find all actions of given type for a resource
*
* \param[in] rsc Resource to search
* \param[in] node Find only actions scheduled on this node
* \param[in] task Action name to search for
* \param[in] require_node If TRUE, NULL node or action node will not match
*
* \return List of actions found (or NULL if none)
* \note If node is not NULL and require_node is FALSE, matching actions
* without a node will be assigned to node.
*/
GList *
pe__resource_actions(const pe_resource_t *rsc, const pe_node_t *node,
const char *task, bool require_node)
{
GList *result = NULL;
char *key = generate_op_key(rsc->id, task, 0);
if (require_node) {
result = find_actions_exact(rsc->actions, key, node);
} else {
result = find_actions(rsc->actions, key, node);
}
free(key);
return result;
}
static void
resource_node_score(resource_t * rsc, node_t * node, int score, const char *tag)
{
node_t *match = NULL;
if ((rsc->exclusive_discover || (node->rsc_discover_mode == pe_discover_never))
&& safe_str_eq(tag, "symmetric_default")) {
/* This string comparision may be fragile, but exclusive resources and
* exclusive nodes should not have the symmetric_default constraint
* applied to them.
*/
return;
} else if (rsc->children) {
GListPtr gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
resource_t *child_rsc = (resource_t *) gIter->data;
resource_node_score(child_rsc, node, score, tag);
}
}
pe_rsc_trace(rsc, "Setting %s for %s on %s: %d", tag, rsc->id, node->details->uname, score);
match = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id);
if (match == NULL) {
match = node_copy(node);
g_hash_table_insert(rsc->allowed_nodes, (gpointer) match->details->id, match);
}
match->weight = merge_weights(match->weight, score);
}
void
resource_location(resource_t * rsc, node_t * node, int score, const char *tag,
pe_working_set_t * data_set)
{
if (node != NULL) {
resource_node_score(rsc, node, score, tag);
} else if (data_set != NULL) {
GListPtr gIter = data_set->nodes;
for (; gIter != NULL; gIter = gIter->next) {
node_t *node_iter = (node_t *) gIter->data;
resource_node_score(rsc, node_iter, score, tag);
}
} else {
GHashTableIter iter;
node_t *node_iter = NULL;
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node_iter)) {
resource_node_score(rsc, node_iter, score, tag);
}
}
if (node == NULL && score == -INFINITY) {
if (rsc->allocated_to) {
crm_info("Deallocating %s from %s", rsc->id, rsc->allocated_to->details->uname);
free(rsc->allocated_to);
rsc->allocated_to = NULL;
}
}
}
#define sort_return(an_int, why) do { \
free(a_uuid); \
free(b_uuid); \
crm_trace("%s (%d) %c %s (%d) : %s", \
a_xml_id, a_call_id, an_int>0?'>':an_int<0?'<':'=', \
b_xml_id, b_call_id, why); \
return an_int; \
} while(0)
gint
sort_op_by_callid(gconstpointer a, gconstpointer b)
{
int a_call_id = -1;
int b_call_id = -1;
char *a_uuid = NULL;
char *b_uuid = NULL;
const xmlNode *xml_a = a;
const xmlNode *xml_b = b;
const char *a_xml_id = crm_element_value(xml_a, XML_ATTR_ID);
const char *b_xml_id = crm_element_value(xml_b, XML_ATTR_ID);
if (safe_str_eq(a_xml_id, b_xml_id)) {
/* We have duplicate lrm_rsc_op entries in the status
* section which is unliklely to be a good thing
* - we can handle it easily enough, but we need to get
* to the bottom of why it's happening.
*/
pe_err("Duplicate lrm_rsc_op entries named %s", a_xml_id);
sort_return(0, "duplicate");
}
crm_element_value_int(xml_a, XML_LRM_ATTR_CALLID, &a_call_id);
crm_element_value_int(xml_b, XML_LRM_ATTR_CALLID, &b_call_id);
if (a_call_id == -1 && b_call_id == -1) {
/* both are pending ops so it doesn't matter since
* stops are never pending
*/
sort_return(0, "pending");
} else if (a_call_id >= 0 && a_call_id < b_call_id) {
sort_return(-1, "call id");
} else if (b_call_id >= 0 && a_call_id > b_call_id) {
sort_return(1, "call id");
} else if (b_call_id >= 0 && a_call_id == b_call_id) {
/*
* The op and last_failed_op are the same
* Order on last-rc-change
*/
- int last_a = -1;
- int last_b = -1;
+ time_t last_a = -1;
+ time_t last_b = -1;
- crm_element_value_int(xml_a, XML_RSC_OP_LAST_CHANGE, &last_a);
- crm_element_value_int(xml_b, XML_RSC_OP_LAST_CHANGE, &last_b);
+ crm_element_value_epoch(xml_a, XML_RSC_OP_LAST_CHANGE, &last_a);
+ crm_element_value_epoch(xml_b, XML_RSC_OP_LAST_CHANGE, &last_b);
- crm_trace("rc-change: %d vs %d", last_a, last_b);
+ crm_trace("rc-change: %lld vs %lld",
+ (long long) last_a, (long long) last_b);
if (last_a >= 0 && last_a < last_b) {
sort_return(-1, "rc-change");
} else if (last_b >= 0 && last_a > last_b) {
sort_return(1, "rc-change");
}
sort_return(0, "rc-change");
} else {
/* One of the inputs is a pending operation
* Attempt to use XML_ATTR_TRANSITION_MAGIC to determine its age relative to the other
*/
int a_id = -1;
int b_id = -1;
const char *a_magic = crm_element_value(xml_a, XML_ATTR_TRANSITION_MAGIC);
const char *b_magic = crm_element_value(xml_b, XML_ATTR_TRANSITION_MAGIC);
CRM_CHECK(a_magic != NULL && b_magic != NULL, sort_return(0, "No magic"));
if (!decode_transition_magic(a_magic, &a_uuid, &a_id, NULL, NULL, NULL,
NULL)) {
sort_return(0, "bad magic a");
}
if (!decode_transition_magic(b_magic, &b_uuid, &b_id, NULL, NULL, NULL,
NULL)) {
sort_return(0, "bad magic b");
}
/* try to determine the relative age of the operation...
* some pending operations (e.g. a start) may have been superseded
* by a subsequent stop
*
* [a|b]_id == -1 means it's a shutdown operation and _always_ comes last
*/
if (safe_str_neq(a_uuid, b_uuid) || a_id == b_id) {
/*
* some of the logic in here may be redundant...
*
* if the UUID from the TE doesn't match then one better
* be a pending operation.
* pending operations don't survive between elections and joins
* because we query the LRM directly
*/
if (b_call_id == -1) {
sort_return(-1, "transition + call");
} else if (a_call_id == -1) {
sort_return(1, "transition + call");
}
} else if ((a_id >= 0 && a_id < b_id) || b_id == -1) {
sort_return(-1, "transition");
} else if ((b_id >= 0 && a_id > b_id) || a_id == -1) {
sort_return(1, "transition");
}
}
/* we should never end up here */
CRM_CHECK(FALSE, sort_return(0, "default"));
}
time_t
get_effective_time(pe_working_set_t * data_set)
{
if(data_set) {
if (data_set->now == NULL) {
crm_trace("Recording a new 'now'");
data_set->now = crm_time_new(NULL);
}
return crm_time_get_seconds_since_epoch(data_set->now);
}
crm_trace("Defaulting to 'now'");
return time(NULL);
}
gboolean
get_target_role(resource_t * rsc, enum rsc_role_e * role)
{
enum rsc_role_e local_role = RSC_ROLE_UNKNOWN;
const char *value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE);
CRM_CHECK(role != NULL, return FALSE);
if (value == NULL || safe_str_eq("started", value)
|| safe_str_eq("default", value)) {
return FALSE;
}
local_role = text2role(value);
if (local_role == RSC_ROLE_UNKNOWN) {
crm_config_err("%s: Unknown value for %s: %s", rsc->id, XML_RSC_ATTR_TARGET_ROLE, value);
return FALSE;
} else if (local_role > RSC_ROLE_STARTED) {
if (is_set(uber_parent(rsc)->flags, pe_rsc_promotable)) {
if (local_role > RSC_ROLE_SLAVE) {
/* This is what we'd do anyway, just leave the default to avoid messing up the placement algorithm */
return FALSE;
}
} else {
crm_config_err("%s is not part of a promotable clone resource, a %s of '%s' makes no sense",
rsc->id, XML_RSC_ATTR_TARGET_ROLE, value);
return FALSE;
}
}
*role = local_role;
return TRUE;
}
gboolean
order_actions(action_t * lh_action, action_t * rh_action, enum pe_ordering order)
{
GListPtr gIter = NULL;
action_wrapper_t *wrapper = NULL;
GListPtr list = NULL;
if (order == pe_order_none) {
return FALSE;
}
if (lh_action == NULL || rh_action == NULL) {
return FALSE;
}
crm_trace("Ordering Action %s before %s", lh_action->uuid, rh_action->uuid);
/* Ensure we never create a dependency on ourselves... it's happened */
CRM_ASSERT(lh_action != rh_action);
/* Filter dups, otherwise update_action_states() has too much work to do */
gIter = lh_action->actions_after;
for (; gIter != NULL; gIter = gIter->next) {
action_wrapper_t *after = (action_wrapper_t *) gIter->data;
if (after->action == rh_action && (after->type & order)) {
return FALSE;
}
}
wrapper = calloc(1, sizeof(action_wrapper_t));
wrapper->action = rh_action;
wrapper->type = order;
list = lh_action->actions_after;
list = g_list_prepend(list, wrapper);
lh_action->actions_after = list;
wrapper = NULL;
/* order |= pe_order_implies_then; */
/* order ^= pe_order_implies_then; */
wrapper = calloc(1, sizeof(action_wrapper_t));
wrapper->action = lh_action;
wrapper->type = order;
list = rh_action->actions_before;
list = g_list_prepend(list, wrapper);
rh_action->actions_before = list;
return TRUE;
}
action_t *
get_pseudo_op(const char *name, pe_working_set_t * data_set)
{
action_t *op = NULL;
if(data_set->singletons) {
op = g_hash_table_lookup(data_set->singletons, name);
}
if (op == NULL) {
op = custom_action(NULL, strdup(name), name, NULL, TRUE, TRUE, data_set);
set_bit(op->flags, pe_action_pseudo);
set_bit(op->flags, pe_action_runnable);
}
return op;
}
void
destroy_ticket(gpointer data)
{
ticket_t *ticket = data;
if (ticket->state) {
g_hash_table_destroy(ticket->state);
}
free(ticket->id);
free(ticket);
}
ticket_t *
ticket_new(const char *ticket_id, pe_working_set_t * data_set)
{
ticket_t *ticket = NULL;
if (ticket_id == NULL || strlen(ticket_id) == 0) {
return NULL;
}
if (data_set->tickets == NULL) {
data_set->tickets =
g_hash_table_new_full(crm_str_hash, g_str_equal, free,
destroy_ticket);
}
ticket = g_hash_table_lookup(data_set->tickets, ticket_id);
if (ticket == NULL) {
ticket = calloc(1, sizeof(ticket_t));
if (ticket == NULL) {
crm_err("Cannot allocate ticket '%s'", ticket_id);
return NULL;
}
crm_trace("Creaing ticket entry for %s", ticket_id);
ticket->id = strdup(ticket_id);
ticket->granted = FALSE;
ticket->last_granted = -1;
ticket->standby = FALSE;
ticket->state = crm_str_table_new();
g_hash_table_insert(data_set->tickets, strdup(ticket->id), ticket);
}
return ticket;
}
static void
filter_parameters(xmlNode * param_set, const char *param_string, bool need_present)
{
if (param_set && param_string) {
xmlAttrPtr xIter = param_set->properties;
while (xIter) {
const char *prop_name = (const char *)xIter->name;
char *name = crm_strdup_printf(" %s ", prop_name);
char *match = strstr(param_string, name);
free(name);
// Do now, because current entry might get removed below
xIter = xIter->next;
if (need_present && match == NULL) {
crm_trace("%s not found in %s", prop_name, param_string);
xml_remove_prop(param_set, prop_name);
} else if (need_present == FALSE && match) {
crm_trace("%s found in %s", prop_name, param_string);
xml_remove_prop(param_set, prop_name);
}
}
}
}
#if ENABLE_VERSIONED_ATTRS
static void
append_versioned_params(xmlNode *versioned_params, const char *ra_version, xmlNode *params)
{
GHashTable *hash = pe_unpack_versioned_parameters(versioned_params, ra_version);
char *key = NULL;
char *value = NULL;
GHashTableIter iter;
g_hash_table_iter_init(&iter, hash);
while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) {
crm_xml_add(params, key, value);
}
g_hash_table_destroy(hash);
}
#endif
/*!
* \internal
* \brief Calculate action digests and store in node's digest cache
*
* \param[in] rsc Resource that action was for
* \param[in] task Name of action performed
* \param[in] key Action's task key
* \param[in] node Node action was performed on
* \param[in] xml_op XML of operation in CIB status (if available)
* \param[in] calc_secure Whether to calculate secure digest
* \param[in] data_set Cluster working set
*
* \return Pointer to node's digest cache entry
*/
static op_digest_cache_t *
rsc_action_digest(pe_resource_t *rsc, const char *task, const char *key,
pe_node_t *node, xmlNode *xml_op, bool calc_secure,
pe_working_set_t *data_set)
{
op_digest_cache_t *data = NULL;
data = g_hash_table_lookup(node->details->digest_cache, key);
if (data == NULL) {
GHashTable *local_rsc_params = crm_str_table_new();
action_t *action = custom_action(rsc, strdup(key), task, node, TRUE, FALSE, data_set);
#if ENABLE_VERSIONED_ATTRS
xmlNode *local_versioned_params = create_xml_node(NULL, XML_TAG_RSC_VER_ATTRS);
const char *ra_version = NULL;
#endif
const char *op_version;
const char *restart_list = NULL;
const char *secure_list = " passwd password ";
data = calloc(1, sizeof(op_digest_cache_t));
CRM_ASSERT(data != NULL);
get_rsc_attributes(local_rsc_params, rsc, node, data_set);
#if ENABLE_VERSIONED_ATTRS
pe_get_versioned_attributes(local_versioned_params, rsc, node, data_set);
#endif
data->params_all = create_xml_node(NULL, XML_TAG_PARAMS);
// REMOTE_CONTAINER_HACK: Allow remote nodes that start containers with pacemaker remote inside
if (pe__add_bundle_remote_name(rsc, data->params_all,
XML_RSC_ATTR_REMOTE_RA_ADDR)) {
crm_trace("Set address for bundle connection %s (on %s)",
rsc->id, node->details->uname);
}
g_hash_table_foreach(local_rsc_params, hash2field, data->params_all);
g_hash_table_foreach(action->extra, hash2field, data->params_all);
g_hash_table_foreach(rsc->parameters, hash2field, data->params_all);
g_hash_table_foreach(action->meta, hash2metafield, data->params_all);
if(xml_op) {
secure_list = crm_element_value(xml_op, XML_LRM_ATTR_OP_SECURE);
restart_list = crm_element_value(xml_op, XML_LRM_ATTR_OP_RESTART);
op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION);
#if ENABLE_VERSIONED_ATTRS
ra_version = crm_element_value(xml_op, XML_ATTR_RA_VERSION);
#endif
} else {
op_version = CRM_FEATURE_SET;
}
#if ENABLE_VERSIONED_ATTRS
append_versioned_params(local_versioned_params, ra_version, data->params_all);
append_versioned_params(rsc->versioned_parameters, ra_version, data->params_all);
{
pe_rsc_action_details_t *details = pe_rsc_action_details(action);
append_versioned_params(details->versioned_parameters, ra_version, data->params_all);
}
#endif
filter_action_parameters(data->params_all, op_version);
g_hash_table_destroy(local_rsc_params);
pe_free_action(action);
data->digest_all_calc = calculate_operation_digest(data->params_all, op_version);
if (calc_secure) {
data->params_secure = copy_xml(data->params_all);
if(secure_list) {
filter_parameters(data->params_secure, secure_list, FALSE);
}
data->digest_secure_calc = calculate_operation_digest(data->params_secure, op_version);
}
if(xml_op && crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST) != NULL) {
data->params_restart = copy_xml(data->params_all);
if (restart_list) {
filter_parameters(data->params_restart, restart_list, TRUE);
}
data->digest_restart_calc = calculate_operation_digest(data->params_restart, op_version);
}
g_hash_table_insert(node->details->digest_cache, strdup(key), data);
}
return data;
}
op_digest_cache_t *
rsc_action_digest_cmp(resource_t * rsc, xmlNode * xml_op, node_t * node,
pe_working_set_t * data_set)
{
op_digest_cache_t *data = NULL;
char *key = NULL;
guint interval_ms = 0;
const char *op_version;
const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
const char *interval_ms_s = crm_element_value(xml_op,
XML_LRM_ATTR_INTERVAL_MS);
const char *digest_all;
const char *digest_restart;
CRM_ASSERT(node != NULL);
op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION);
digest_all = crm_element_value(xml_op, XML_LRM_ATTR_OP_DIGEST);
digest_restart = crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST);
interval_ms = crm_parse_ms(interval_ms_s);
key = generate_op_key(rsc->id, task, interval_ms);
data = rsc_action_digest(rsc, task, key, node, xml_op,
is_set(data_set->flags, pe_flag_sanitized),
data_set);
data->rc = RSC_DIGEST_MATCH;
if (digest_restart && data->digest_restart_calc && strcmp(data->digest_restart_calc, digest_restart) != 0) {
pe_rsc_info(rsc, "Parameters to %s on %s changed: was %s vs. now %s (restart:%s) %s",
key, node->details->uname,
crm_str(digest_restart), data->digest_restart_calc,
op_version, crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC));
data->rc = RSC_DIGEST_RESTART;
} else if (digest_all == NULL) {
/* it is unknown what the previous op digest was */
data->rc = RSC_DIGEST_UNKNOWN;
} else if (strcmp(digest_all, data->digest_all_calc) != 0) {
pe_rsc_info(rsc, "Parameters to %s on %s changed: was %s vs. now %s (%s:%s) %s",
key, node->details->uname,
crm_str(digest_all), data->digest_all_calc,
(interval_ms > 0)? "reschedule" : "reload",
op_version, crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC));
data->rc = RSC_DIGEST_ALL;
}
free(key);
return data;
}
/*!
* \internal
* \brief Create an unfencing summary for use in special node attribute
*
* Create a string combining a fence device's resource ID, agent type, and
* parameter digest (whether for all parameters or just non-private parameters).
* This can be stored in a special node attribute, allowing us to detect changes
* in either the agent type or parameters, to know whether unfencing must be
* redone or can be safely skipped when the device's history is cleaned.
*
* \param[in] rsc_id Fence device resource ID
* \param[in] agent_type Fence device agent
* \param[in] param_digest Fence device parameter digest
*
* \return Newly allocated string with unfencing digest
* \note The caller is responsible for freeing the result.
*/
static inline char *
create_unfencing_summary(const char *rsc_id, const char *agent_type,
const char *param_digest)
{
return crm_strdup_printf("%s:%s:%s", rsc_id, agent_type, param_digest);
}
/*!
* \internal
* \brief Check whether a node can skip unfencing
*
* Check whether a fence device's current definition matches a node's
* stored summary of when it was last unfenced by the device.
*
* \param[in] rsc_id Fence device's resource ID
* \param[in] agent Fence device's agent type
* \param[in] digest_calc Fence device's current parameter digest
* \param[in] node_summary Value of node's special unfencing node attribute
* (a comma-separated list of unfencing summaries for
* all devices that have unfenced this node)
*
* \return TRUE if digest matches, FALSE otherwise
*/
static bool
unfencing_digest_matches(const char *rsc_id, const char *agent,
const char *digest_calc, const char *node_summary)
{
bool matches = FALSE;
if (rsc_id && agent && digest_calc && node_summary) {
char *search_secure = create_unfencing_summary(rsc_id, agent,
digest_calc);
/* The digest was calculated including the device ID and agent,
* so there is no risk of collision using strstr().
*/
matches = (strstr(node_summary, search_secure) != NULL);
crm_trace("Calculated unfencing digest '%s' %sfound in '%s'",
search_secure, matches? "" : "not ", node_summary);
free(search_secure);
}
return matches;
}
/* Magic string to use as action name for digest cache entries used for
* unfencing checks. This is not a real action name (i.e. "on"), so
* check_action_definition() won't confuse these entries with real actions.
*/
#define STONITH_DIGEST_TASK "stonith-on"
/*!
* \internal
* \brief Calculate fence device digests and digest comparison result
*
* \param[in] rsc Fence device resource
* \param[in] agent Fence device's agent type
* \param[in] node Node with digest cache to use
* \param[in] data_set Cluster working set
*
* \return Node's digest cache entry
*/
static op_digest_cache_t *
fencing_action_digest_cmp(pe_resource_t *rsc, const char *agent,
pe_node_t *node, pe_working_set_t *data_set)
{
const char *node_summary = NULL;
// Calculate device's current parameter digests
char *key = generate_op_key(rsc->id, STONITH_DIGEST_TASK, 0);
op_digest_cache_t *data = rsc_action_digest(rsc, STONITH_DIGEST_TASK, key,
node, NULL, TRUE, data_set);
free(key);
// Check whether node has special unfencing summary node attribute
node_summary = pe_node_attribute_raw(node, CRM_ATTR_DIGESTS_ALL);
if (node_summary == NULL) {
data->rc = RSC_DIGEST_UNKNOWN;
return data;
}
// Check whether full parameter digest matches
if (unfencing_digest_matches(rsc->id, agent, data->digest_all_calc,
node_summary)) {
data->rc = RSC_DIGEST_MATCH;
return data;
}
// Check whether secure parameter digest matches
node_summary = pe_node_attribute_raw(node, CRM_ATTR_DIGESTS_SECURE);
if (unfencing_digest_matches(rsc->id, agent, data->digest_secure_calc,
node_summary)) {
data->rc = RSC_DIGEST_MATCH;
if (is_set(data_set->flags, pe_flag_stdout)) {
printf("Only 'private' parameters to %s for unfencing %s changed\n",
rsc->id, node->details->uname);
}
return data;
}
// Parameters don't match
data->rc = RSC_DIGEST_ALL;
if (is_set(data_set->flags, (pe_flag_sanitized|pe_flag_stdout))
&& data->digest_secure_calc) {
char *digest = create_unfencing_summary(rsc->id, agent,
data->digest_secure_calc);
printf("Parameters to %s for unfencing %s changed, try '%s'\n",
rsc->id, node->details->uname, digest);
free(digest);
}
return data;
}
const char *rsc_printable_id(resource_t *rsc)
{
if (is_not_set(rsc->flags, pe_rsc_unique)) {
return ID(rsc->xml);
}
return rsc->id;
}
void
clear_bit_recursive(resource_t * rsc, unsigned long long flag)
{
GListPtr gIter = rsc->children;
clear_bit(rsc->flags, flag);
for (; gIter != NULL; gIter = gIter->next) {
resource_t *child_rsc = (resource_t *) gIter->data;
clear_bit_recursive(child_rsc, flag);
}
}
void
set_bit_recursive(resource_t * rsc, unsigned long long flag)
{
GListPtr gIter = rsc->children;
set_bit(rsc->flags, flag);
for (; gIter != NULL; gIter = gIter->next) {
resource_t *child_rsc = (resource_t *) gIter->data;
set_bit_recursive(child_rsc, flag);
}
}
static GListPtr
find_unfencing_devices(GListPtr candidates, GListPtr matches)
{
for (GListPtr gIter = candidates; gIter != NULL; gIter = gIter->next) {
resource_t *candidate = gIter->data;
const char *provides = g_hash_table_lookup(candidate->meta, XML_RSC_ATTR_PROVIDES);
const char *requires = g_hash_table_lookup(candidate->meta, XML_RSC_ATTR_REQUIRES);
if(candidate->children) {
matches = find_unfencing_devices(candidate->children, matches);
} else if (is_not_set(candidate->flags, pe_rsc_fence_device)) {
continue;
} else if (crm_str_eq(provides, "unfencing", FALSE) || crm_str_eq(requires, "unfencing", FALSE)) {
matches = g_list_prepend(matches, candidate);
}
}
return matches;
}
action_t *
pe_fence_op(node_t * node, const char *op, bool optional, const char *reason, pe_working_set_t * data_set)
{
char *op_key = NULL;
action_t *stonith_op = NULL;
if(op == NULL) {
op = data_set->stonith_action;
}
op_key = crm_strdup_printf("%s-%s-%s", CRM_OP_FENCE, node->details->uname, op);
if(data_set->singletons) {
stonith_op = g_hash_table_lookup(data_set->singletons, op_key);
}
if(stonith_op == NULL) {
stonith_op = custom_action(NULL, op_key, CRM_OP_FENCE, node, TRUE, TRUE, data_set);
add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET, node->details->uname);
add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET_UUID, node->details->id);
add_hash_param(stonith_op->meta, "stonith_action", op);
if (pe__is_guest_or_remote_node(node)
&& is_set(data_set->flags, pe_flag_enable_unfencing)) {
/* Extra work to detect device changes on remotes
*
* We may do this for all nodes in the future, but for now
* the check_action_definition() based stuff works fine.
*/
long max = 1024;
long digests_all_offset = 0;
long digests_secure_offset = 0;
char *digests_all = malloc(max);
char *digests_secure = malloc(max);
GListPtr matches = find_unfencing_devices(data_set->resources, NULL);
for (GListPtr gIter = matches; gIter != NULL; gIter = gIter->next) {
resource_t *match = gIter->data;
const char *agent = g_hash_table_lookup(match->meta,
XML_ATTR_TYPE);
op_digest_cache_t *data = NULL;
data = fencing_action_digest_cmp(match, agent, node, data_set);
if(data->rc == RSC_DIGEST_ALL) {
optional = FALSE;
crm_notice("Unfencing %s (remote): because the definition of %s changed", node->details->uname, match->id);
if (is_set(data_set->flags, pe_flag_stdout)) {
fprintf(stdout, " notice: Unfencing %s (remote): because the definition of %s changed\n", node->details->uname, match->id);
}
}
digests_all_offset += snprintf(
digests_all+digests_all_offset, max-digests_all_offset,
"%s:%s:%s,", match->id, agent, data->digest_all_calc);
digests_secure_offset += snprintf(
digests_secure+digests_secure_offset, max-digests_secure_offset,
"%s:%s:%s,", match->id, agent, data->digest_secure_calc);
}
g_hash_table_insert(stonith_op->meta,
strdup(XML_OP_ATTR_DIGESTS_ALL),
digests_all);
g_hash_table_insert(stonith_op->meta,
strdup(XML_OP_ATTR_DIGESTS_SECURE),
digests_secure);
}
} else {
free(op_key);
}
if(optional == FALSE && pe_can_fence(data_set, node)) {
pe_action_required(stonith_op, NULL, reason);
} else if(reason && stonith_op->reason == NULL) {
stonith_op->reason = strdup(reason);
}
return stonith_op;
}
void
trigger_unfencing(
resource_t * rsc, node_t *node, const char *reason, action_t *dependency, pe_working_set_t * data_set)
{
if(is_not_set(data_set->flags, pe_flag_enable_unfencing)) {
/* No resources require it */
return;
} else if (rsc != NULL && is_not_set(rsc->flags, pe_rsc_fence_device)) {
/* Wasn't a stonith device */
return;
} else if(node
&& node->details->online
&& node->details->unclean == FALSE
&& node->details->shutdown == FALSE) {
action_t *unfence = pe_fence_op(node, "on", FALSE, reason, data_set);
if(dependency) {
order_actions(unfence, dependency, pe_order_optional);
}
} else if(rsc) {
GHashTableIter iter;
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
if(node->details->online && node->details->unclean == FALSE && node->details->shutdown == FALSE) {
trigger_unfencing(rsc, node, reason, dependency, data_set);
}
}
}
}
gboolean
add_tag_ref(GHashTable * tags, const char * tag_name, const char * obj_ref)
{
tag_t *tag = NULL;
GListPtr gIter = NULL;
gboolean is_existing = FALSE;
CRM_CHECK(tags && tag_name && obj_ref, return FALSE);
tag = g_hash_table_lookup(tags, tag_name);
if (tag == NULL) {
tag = calloc(1, sizeof(tag_t));
if (tag == NULL) {
return FALSE;
}
tag->id = strdup(tag_name);
tag->refs = NULL;
g_hash_table_insert(tags, strdup(tag_name), tag);
}
for (gIter = tag->refs; gIter != NULL; gIter = gIter->next) {
const char *existing_ref = (const char *) gIter->data;
if (crm_str_eq(existing_ref, obj_ref, TRUE)){
is_existing = TRUE;
break;
}
}
if (is_existing == FALSE) {
tag->refs = g_list_append(tag->refs, strdup(obj_ref));
crm_trace("Added: tag=%s ref=%s", tag->id, obj_ref);
}
return TRUE;
}
void pe_action_set_flag_reason(const char *function, long line,
pe_action_t *action, pe_action_t *reason, const char *text,
enum pe_action_flags flags, bool overwrite)
{
bool unset = FALSE;
bool update = FALSE;
const char *change = NULL;
if(is_set(flags, pe_action_runnable)) {
unset = TRUE;
change = "unrunnable";
} else if(is_set(flags, pe_action_optional)) {
unset = TRUE;
change = "required";
} else if(is_set(flags, pe_action_migrate_runnable)) {
unset = TRUE;
overwrite = TRUE;
change = "unrunnable";
} else if(is_set(flags, pe_action_dangle)) {
change = "dangling";
} else if(is_set(flags, pe_action_requires_any)) {
change = "required";
} else {
crm_err("Unknown flag change to %x by %s: 0x%s",
flags, action->uuid, (reason? reason->uuid : "0"));
}
if(unset) {
if(is_set(action->flags, flags)) {
action->flags = crm_clear_bit(function, line, action->uuid, action->flags, flags);
update = TRUE;
}
} else {
if(is_not_set(action->flags, flags)) {
action->flags = crm_set_bit(function, line, action->uuid, action->flags, flags);
update = TRUE;
}
}
if((change && update) || text) {
char *reason_text = NULL;
if(reason == NULL) {
pe_action_set_reason(action, text, overwrite);
} else if(reason->rsc == NULL) {
reason_text = crm_strdup_printf("%s %s%c %s", change, reason->task, text?':':0, text?text:"");
} else {
reason_text = crm_strdup_printf("%s %s %s%c %s", change, reason->rsc->id, reason->task, text?':':0, text?text:"NA");
}
if(reason_text && action->rsc != reason->rsc) {
pe_action_set_reason(action, reason_text, overwrite);
}
free(reason_text);
}
}
void pe_action_set_reason(pe_action_t *action, const char *reason, bool overwrite)
{
if(action->reason && overwrite) {
pe_rsc_trace(action->rsc, "Changing %s reason from '%s' to '%s'", action->uuid, action->reason, reason);
free(action->reason);
action->reason = NULL;
}
if(action->reason == NULL) {
if(reason) {
pe_rsc_trace(action->rsc, "Set %s reason to '%s'", action->uuid, reason);
action->reason = strdup(reason);
} else {
action->reason = NULL;
}
}
}
/*!
* \internal
* \brief Check whether shutdown has been requested for a node
*
* \param[in] node Node to check
*
* \return TRUE if node has shutdown attribute set and nonzero, FALSE otherwise
* \note This differs from simply using node->details->shutdown in that it can
* be used before that has been determined (and in fact to determine it),
* and it can also be used to distinguish requested shutdown from implicit
* shutdown of remote nodes by virtue of their connection stopping.
*/
bool
pe__shutdown_requested(pe_node_t *node)
{
const char *shutdown = pe_node_attribute_raw(node, XML_CIB_ATTR_SHUTDOWN);
return shutdown && strcmp(shutdown, "0");
}
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
index 78b0100b6e..07614b9d64 100644
--- a/tools/crm_mon.c
+++ b/tools/crm_mon.c
@@ -1,1891 +1,1891 @@
/*
* Copyright 2004-2019 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include /* crm_ends_with_ext */
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include "crm_mon.h"
#define SUMMARY "Provides a summary of cluster's current state.\n\n" \
"Outputs varying levels of detail in a number of different formats."
/*
* Definitions indicating which items to print
*/
static unsigned int show = mon_show_default;
/*
* Definitions indicating how to output
*/
static mon_output_format_t output_format = mon_output_unset;
static char *output_filename = NULL; /* if sending output to a file, its name */
/* other globals */
static GMainLoop *mainloop = NULL;
static guint timer_id = 0;
static mainloop_timer_t *refresh_timer = NULL;
static pe_working_set_t *mon_data_set = NULL;
static cib_t *cib = NULL;
static stonith_t *st = NULL;
static xmlNode *current_cib = NULL;
static pcmk__common_args_t *args = NULL;
static pcmk__output_t *out = NULL;
static GOptionContext *context = NULL;
/* FIXME allow, detect, and correctly interpret glob pattern or regex? */
const char *print_neg_location_prefix = "";
-long last_refresh = 0;
+static time_t last_refresh = 0;
crm_trigger_t *refresh_trigger = NULL;
static pcmk__supported_format_t formats[] = {
#if CURSES_ENABLED
CRM_MON_SUPPORTED_FORMAT_CURSES,
#endif
PCMK__SUPPORTED_FORMAT_HTML,
PCMK__SUPPORTED_FORMAT_NONE,
PCMK__SUPPORTED_FORMAT_TEXT,
PCMK__SUPPORTED_FORMAT_XML,
{ NULL, NULL, NULL }
};
/* Define exit codes for monitoring-compatible output
* For nagios plugins, the possibilities are
* OK=0, WARN=1, CRIT=2, and UNKNOWN=3
*/
#define MON_STATUS_WARN CRM_EX_ERROR
#define MON_STATUS_CRIT CRM_EX_INVALID_PARAM
#define MON_STATUS_UNKNOWN CRM_EX_UNIMPLEMENT_FEATURE
struct {
int reconnect_msec;
int fence_history_level;
gboolean daemonize;
gboolean show_bans;
char *pid_file;
char *external_agent;
char *external_recipient;
unsigned int mon_ops;
} options = {
.reconnect_msec = 5000,
.fence_history_level = 1,
.mon_ops = mon_op_default
};
static void clean_up_connections(void);
static crm_exit_t clean_up(crm_exit_t exit_code);
static void crm_diff_update(const char *event, xmlNode * msg);
static gboolean mon_refresh_display(gpointer user_data);
static int cib_connect(gboolean full);
static void mon_st_callback_event(stonith_t * st, stonith_event_t * e);
static void mon_st_callback_display(stonith_t * st, stonith_event_t * e);
static void kick_refresh(gboolean data_updated);
static gboolean
as_cgi_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
if (args->output_ty != NULL) {
free(args->output_ty);
}
args->output_ty = strdup("html");
output_format = mon_output_cgi;
options.mon_ops |= mon_op_one_shot;
return TRUE;
}
static gboolean
as_html_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
if (optarg == NULL) {
g_set_error(error, G_OPTION_ERROR, 1, "--as-html requires filename");
return FALSE;
}
if (args->output_ty != NULL) {
free(args->output_ty);
}
args->output_ty = strdup("html");
output_format = mon_output_html;
output_filename = strdup(optarg);
umask(S_IWGRP | S_IWOTH);
return TRUE;
}
static gboolean
as_simple_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
if (args->output_ty != NULL) {
free(args->output_ty);
}
args->output_ty = strdup("text");
output_format = mon_output_monitor;
options.mon_ops |= mon_op_one_shot;
return TRUE;
}
static gboolean
as_xml_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
if (args->output_ty != NULL) {
free(args->output_ty);
}
args->output_ty = strdup("xml");
output_format = mon_output_legacy_xml;
options.mon_ops |= mon_op_one_shot;
return TRUE;
}
static gboolean
fence_history_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
int rc = crm_atoi(optarg, "2");
if (rc == -1 || rc > 3) {
g_set_error(error, G_OPTION_ERROR, CRM_EX_INVALID_PARAM, "Fence history must be 0-3");
return FALSE;
} else {
options.fence_history_level = rc;
}
return TRUE;
}
static gboolean
group_by_node_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
options.mon_ops |= mon_op_group_by_node;
return TRUE;
}
static gboolean
hide_headers_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
show &= ~mon_show_headers;
return TRUE;
}
static gboolean
inactive_resources_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
options.mon_ops |= mon_op_inactive_resources;
return TRUE;
}
static gboolean
no_curses_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
output_format = mon_output_plain;
return TRUE;
}
static gboolean
one_shot_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
options.mon_ops |= mon_op_one_shot;
return TRUE;
}
static gboolean
print_brief_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
options.mon_ops |= mon_op_print_brief;
return TRUE;
}
static gboolean
print_clone_detail_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
options.mon_ops |= mon_op_print_clone_detail;
return TRUE;
}
static gboolean
print_pending_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
options.mon_ops |= mon_op_print_pending;
return TRUE;
}
static gboolean
print_timing_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
options.mon_ops |= mon_op_print_timing;
show |= mon_show_operations;
return TRUE;
}
static gboolean
reconnect_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
int rc = crm_get_msec(optarg);
if (rc == -1) {
g_set_error(error, G_OPTION_ERROR, CRM_EX_INVALID_PARAM, "Invalid value for -i: %s", optarg);
return FALSE;
} else {
options.reconnect_msec = crm_get_msec(optarg);
}
return TRUE;
}
static gboolean
show_attributes_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
show |= mon_show_attributes;
return TRUE;
}
static gboolean
show_bans_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
show |= mon_show_bans;
if (optarg != NULL) {
print_neg_location_prefix = optarg;
}
return TRUE;
}
static gboolean
show_failcounts_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
show |= mon_show_failcounts;
return TRUE;
}
static gboolean
show_operations_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
show |= mon_show_operations;
return TRUE;
}
static gboolean
show_tickets_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
show |= mon_show_tickets;
return TRUE;
}
static gboolean
use_cib_file_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
setenv("CIB_file", optarg, 1);
options.mon_ops |= mon_op_one_shot;
return TRUE;
}
static gboolean
watch_fencing_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
options.mon_ops |= mon_op_watch_fencing;
return TRUE;
}
#define INDENT " "
/* *INDENT-OFF* */
static GOptionEntry addl_entries[] = {
{ "interval", 'i', 0, G_OPTION_ARG_CALLBACK, reconnect_cb,
"Update frequency in seconds",
"SECONDS" },
{ "one-shot", '1', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, one_shot_cb,
"Display the cluster status once on the console and exit",
NULL },
{ "disable-ncurses", 'N', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, no_curses_cb,
"Disable the use of ncurses",
NULL },
{ "daemonize", 'd', 0, G_OPTION_ARG_NONE, &options.daemonize,
"Run in the background as a daemon",
NULL },
{ "pid-file", 'p', 0, G_OPTION_ARG_FILENAME, &options.pid_file,
"(Advanced) Daemon pid file location",
"FILE" },
{ "external-agent", 'E', 0, G_OPTION_ARG_FILENAME, &options.external_agent,
"A program to run when resource operations take place",
"FILE" },
{ "external-recipient", 'e', 0, G_OPTION_ARG_STRING, &options.external_recipient,
"A recipient for your program (assuming you want the program to send something to someone).",
"RCPT" },
{ "xml-file", 'x', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_CALLBACK, use_cib_file_cb,
NULL,
NULL },
{ NULL }
};
static GOptionEntry display_entries[] = {
{ "group-by-node", 'n', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, group_by_node_cb,
"Group resources by node",
NULL },
{ "inactive", 'r', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, inactive_resources_cb,
"Display inactive resources",
NULL },
{ "failcounts", 'f', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_failcounts_cb,
"Display resource fail counts",
NULL },
{ "operations", 'o', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_operations_cb,
"Display resource operation history",
NULL },
{ "timing-details", 't', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_timing_cb,
"Display resource operation history with timing details",
NULL },
{ "tickets", 'c', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_tickets_cb,
"Display cluster tickets",
NULL },
{ "watch-fencing", 'W', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, watch_fencing_cb,
"Listen for fencing events. For use with --external-agent",
NULL },
{ "fence-history", 'm', G_OPTION_FLAG_OPTIONAL_ARG, G_OPTION_ARG_CALLBACK, fence_history_cb,
"Show fence history:\n"
INDENT "0=off, 1=failures and pending (default without option),\n"
INDENT "2=add successes (default without value for option),\n"
INDENT "3=show full history without reduction to most recent of each flavor",
"LEVEL" },
{ "neg-locations", 'L', 0, G_OPTION_ARG_CALLBACK, show_bans_cb,
"Display negative location constraints [optionally filtered by id prefix]",
NULL },
{ "show-node-attributes", 'A', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_attributes_cb,
"Display node attributes",
NULL },
{ "hide-headers", 'D', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, hide_headers_cb,
"Hide all headers",
NULL },
{ "show-detail", 'R', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_clone_detail_cb,
"Show more details (node IDs, individual clone instances)",
NULL },
{ "brief", 'b', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_brief_cb,
"Brief output",
NULL },
{ "pending", 'j', G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_pending_cb,
"Display pending state if 'record-pending' is enabled",
NULL },
{ NULL }
};
static GOptionEntry mode_entries[] = {
{ "as-html", 'h', G_OPTION_FLAG_FILENAME, G_OPTION_ARG_CALLBACK, as_html_cb,
"Write cluster status to the named HTML file",
"FILE" },
{ "as-xml", 'X', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, as_xml_cb,
"Write cluster status as XML to stdout. This will enable one-shot mode.",
NULL },
{ "web-cgi", 'w', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, as_cgi_cb,
"Web mode with output suitable for CGI (preselected when run as *.cgi)",
NULL },
{ "simple-status", 's', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, as_simple_cb,
"Display the cluster status once as a simple one line output (suitable for nagios)",
NULL },
{ NULL }
};
/* *INDENT-ON* */
static gboolean
mon_timer_popped(gpointer data)
{
int rc = pcmk_ok;
#if CURSES_ENABLED
if (output_format == mon_output_console) {
clear();
refresh();
}
#endif
if (timer_id > 0) {
g_source_remove(timer_id);
timer_id = 0;
}
print_as(output_format, "Reconnecting...\n");
rc = cib_connect(TRUE);
if (rc != pcmk_ok) {
timer_id = g_timeout_add(options.reconnect_msec, mon_timer_popped, NULL);
}
return FALSE;
}
static void
mon_cib_connection_destroy(gpointer user_data)
{
print_as(output_format, "Connection to the cluster-daemons terminated\n");
if (refresh_timer != NULL) {
/* we'll trigger a refresh after reconnect */
mainloop_timer_stop(refresh_timer);
}
if (timer_id) {
/* we'll trigger a new reconnect-timeout at the end */
g_source_remove(timer_id);
timer_id = 0;
}
if (st) {
/* the client API won't properly reconnect notifications
* if they are still in the table - so remove them
*/
st->cmds->remove_notification(st, T_STONITH_NOTIFY_DISCONNECT);
st->cmds->remove_notification(st, T_STONITH_NOTIFY_FENCE);
st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY);
if (st->state != stonith_disconnected) {
st->cmds->disconnect(st);
}
}
if (cib) {
cib->cmds->signoff(cib);
timer_id = g_timeout_add(options.reconnect_msec, mon_timer_popped, NULL);
}
return;
}
/*
* Mainloop signal handler.
*/
static void
mon_shutdown(int nsig)
{
clean_up(CRM_EX_OK);
}
#if CURSES_ENABLED
static sighandler_t ncurses_winch_handler;
static void
mon_winresize(int nsig)
{
static int not_done;
int lines = 0, cols = 0;
if (!not_done++) {
if (ncurses_winch_handler)
/* the original ncurses WINCH signal handler does the
* magic of retrieving the new window size;
* otherwise, we'd have to use ioctl or tgetent */
(*ncurses_winch_handler) (SIGWINCH);
getmaxyx(stdscr, lines, cols);
resizeterm(lines, cols);
mainloop_set_trigger(refresh_trigger);
}
not_done--;
}
#endif
static int
cib_connect(gboolean full)
{
int rc = pcmk_ok;
static gboolean need_pass = TRUE;
CRM_CHECK(cib != NULL, return -EINVAL);
if (getenv("CIB_passwd") != NULL) {
need_pass = FALSE;
}
if (is_set(options.mon_ops, mon_op_fence_connect) && st == NULL) {
st = stonith_api_new();
}
if (is_set(options.mon_ops, mon_op_fence_connect) && st != NULL && st->state == stonith_disconnected) {
rc = st->cmds->connect(st, crm_system_name, NULL);
if (rc == pcmk_ok) {
crm_trace("Setting up stonith callbacks");
if (is_set(options.mon_ops, mon_op_watch_fencing)) {
st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT,
mon_st_callback_event);
st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, mon_st_callback_event);
} else {
st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT,
mon_st_callback_display);
st->cmds->register_notification(st, T_STONITH_NOTIFY_HISTORY, mon_st_callback_display);
}
}
}
if (cib->state != cib_connected_query && cib->state != cib_connected_command) {
crm_trace("Connecting to the CIB");
if ((output_format == mon_output_console) && need_pass && (cib->variant == cib_remote)) {
need_pass = FALSE;
print_as(output_format, "Password:");
}
rc = cib->cmds->signon(cib, crm_system_name, cib_query);
if (rc != pcmk_ok) {
return rc;
}
rc = cib->cmds->query(cib, NULL, ¤t_cib, cib_scope_local | cib_sync_call);
if (rc == pcmk_ok) {
mon_refresh_display(&output_format);
}
if (rc == pcmk_ok && full) {
if (rc == pcmk_ok) {
rc = cib->cmds->set_connection_dnotify(cib, mon_cib_connection_destroy);
if (rc == -EPROTONOSUPPORT) {
print_as
(output_format, "Notification setup not supported, won't be able to reconnect after failure");
if (output_format == mon_output_console) {
sleep(2);
}
rc = pcmk_ok;
}
}
if (rc == pcmk_ok) {
cib->cmds->del_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update);
rc = cib->cmds->add_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update);
}
if (rc != pcmk_ok) {
print_as(output_format, "Notification setup failed, could not monitor CIB actions");
if (output_format == mon_output_console) {
sleep(2);
}
clean_up_connections();
}
}
}
return rc;
}
#if CURSES_ENABLED
static const char *
get_option_desc(char c)
{
const char *desc = "No help available";
for (GOptionEntry *entry = display_entries; entry != NULL; entry++) {
if (entry->short_name == c) {
desc = entry->description;
break;
}
}
return desc;
}
#define print_option_help(output_format, option, condition) \
print_as(output_format, "%c %c: \t%s\n", ((condition)? '*': ' '), option, get_option_desc(option));
static gboolean
detect_user_input(GIOChannel *channel, GIOCondition condition, gpointer user_data)
{
int c;
gboolean config_mode = FALSE;
while (1) {
/* Get user input */
c = getchar();
switch (c) {
case 'm':
if (!options.fence_history_level) {
options.mon_ops |= mon_op_fence_history;
options.mon_ops |= mon_op_fence_connect;
if (st == NULL) {
mon_cib_connection_destroy(NULL);
}
}
show ^= mon_show_fence_history;
break;
case 'c':
show ^= mon_show_tickets;
break;
case 'f':
show ^= mon_show_failcounts;
break;
case 'n':
options.mon_ops ^= mon_op_group_by_node;
break;
case 'o':
show ^= mon_show_operations;
if ((show & mon_show_operations) == 0) {
options.mon_ops &= ~mon_op_print_timing;
}
break;
case 'r':
options.mon_ops ^= mon_op_inactive_resources;
break;
case 'R':
options.mon_ops ^= mon_op_print_clone_detail;
break;
case 't':
options.mon_ops ^= mon_op_print_timing;
if (is_set(options.mon_ops, mon_op_print_timing)) {
show |= mon_show_operations;
}
break;
case 'A':
show ^= mon_show_attributes;
break;
case 'L':
show ^= mon_show_bans;
break;
case 'D':
/* If any header is shown, clear them all, otherwise set them all */
if (show & mon_show_headers) {
show &= ~mon_show_headers;
} else {
show |= mon_show_headers;
}
break;
case 'b':
options.mon_ops ^= mon_op_print_brief;
break;
case 'j':
options.mon_ops ^= mon_op_print_pending;
break;
case '?':
config_mode = TRUE;
break;
default:
goto refresh;
}
if (!config_mode)
goto refresh;
blank_screen();
print_as(output_format, "Display option change mode\n");
print_as(output_format, "\n");
print_option_help(output_format, 'c', show & mon_show_tickets);
print_option_help(output_format, 'f', show & mon_show_failcounts);
print_option_help(output_format, 'n', is_set(options.mon_ops, mon_op_group_by_node));
print_option_help(output_format, 'o', show & mon_show_operations);
print_option_help(output_format, 'r', is_set(options.mon_ops, mon_op_inactive_resources));
print_option_help(output_format, 't', is_set(options.mon_ops, mon_op_print_timing));
print_option_help(output_format, 'A', show & mon_show_attributes);
print_option_help(output_format, 'L', show & mon_show_bans);
print_option_help(output_format, 'D', (show & mon_show_headers) == 0);
print_option_help(output_format, 'R', is_set(options.mon_ops, mon_op_print_clone_detail));
print_option_help(output_format, 'b', is_set(options.mon_ops, mon_op_print_brief));
print_option_help(output_format, 'j', is_set(options.mon_ops, mon_op_print_pending));
print_option_help(output_format, 'm', (show & mon_show_fence_history));
print_as(output_format, "\n");
print_as(output_format, "Toggle fields via field letter, type any other key to return");
}
refresh:
mon_refresh_display(NULL);
return TRUE;
}
#endif
// Basically crm_signal_handler(SIGCHLD, SIG_IGN) plus the SA_NOCLDWAIT flag
static void
avoid_zombies()
{
struct sigaction sa;
memset(&sa, 0, sizeof(struct sigaction));
if (sigemptyset(&sa.sa_mask) < 0) {
crm_warn("Cannot avoid zombies: %s", pcmk_strerror(errno));
return;
}
sa.sa_handler = SIG_IGN;
sa.sa_flags = SA_RESTART|SA_NOCLDWAIT;
if (sigaction(SIGCHLD, &sa, NULL) < 0) {
crm_warn("Cannot avoid zombies: %s", pcmk_strerror(errno));
}
}
static GOptionContext *
build_arg_context(pcmk__common_args_t *args) {
GOptionContext *context = NULL;
GOptionEntry extra_prog_entries[] = {
{ "quiet", 'Q', 0, G_OPTION_ARG_NONE, &(args->quiet),
"Be less descriptive in output.",
NULL },
{ NULL }
};
const char *examples = "Examples:\n\n"
"Display the cluster status on the console with updates as they occur:\n\n"
"\tcrm_mon\n\n"
"Display the cluster status on the console just once then exit:\n\n"
"\tcrm_mon -1\n\n"
"Display your cluster status, group resources by node, and include inactive resources in the list:\n\n"
"\tcrm_mon --group-by-node --inactive\n\n"
"Start crm_mon as a background daemon and have it write the cluster status to an HTML file:\n\n"
"\tcrm_mon --daemonize --as-html /path/to/docroot/filename.html\n\n"
"Start crm_mon and export the current cluster status as XML to stdout, then exit:\n\n"
"\tcrm_mon --as-xml\n";
context = pcmk__build_arg_context(args, "console (default), html, text, xml");
pcmk__add_main_args(context, extra_prog_entries);
g_option_context_set_description(context, examples);
pcmk__add_arg_group(context, "mode", "Mode Options (mutually exclusive):",
"Show mode options", mode_entries);
pcmk__add_arg_group(context, "display", "Display Options:",
"Show display options", display_entries);
pcmk__add_arg_group(context, "additional", "Additional Options:",
"Show additional options", addl_entries);
return context;
}
int
main(int argc, char **argv)
{
int rc = pcmk_ok;
char **processed_args = NULL;
GError *error = NULL;
args = pcmk__new_common_args(SUMMARY);
context = build_arg_context(args);
pcmk__register_formats(context, formats);
options.pid_file = strdup("/tmp/ClusterMon.pid");
crm_log_cli_init("crm_mon");
// Avoid needing to wait for subprocesses forked for -E/--external-agent
avoid_zombies();
if (crm_ends_with_ext(argv[0], ".cgi") == TRUE) {
output_format = mon_output_cgi;
options.mon_ops |= mon_op_one_shot;
}
processed_args = pcmk__cmdline_preproc(argc, argv, "ehimpxEL");
if (!g_option_context_parse_strv(context, &processed_args, &error)) {
fprintf(stderr, "%s: %s\n", g_get_prgname(), error->message);
return clean_up(CRM_EX_USAGE);
}
for (int i = 0; i < args->verbosity; i++) {
crm_bump_log_level(argc, argv);
}
/* Which output format to use could come from two places: The --as-xml
* style arguments we gave in mode_entries above, or the formatted output
* arguments added by pcmk__register_formats. If the latter were used,
* output_format will be mon_output_unset.
*
* Call the callbacks as if those older style arguments were provided so
* the various things they do get done.
*/
if (output_format == mon_output_unset) {
gboolean retval = TRUE;
g_clear_error(&error);
/* NOTE: There is no way to specify CGI mode or simple mode with --output-as.
* Those will need to get handled eventually, at which point something else
* will need to be added to this block.
*/
if (safe_str_eq(args->output_ty, "html")) {
retval = as_html_cb("h", args->output_dest, NULL, &error);
} else if (safe_str_eq(args->output_ty, "text")) {
retval = no_curses_cb("N", NULL, NULL, &error);
} else if (safe_str_eq(args->output_ty, "xml")) {
if (args->output_ty != NULL) {
free(args->output_ty);
}
args->output_ty = strdup("xml");
output_format = mon_output_xml;
options.mon_ops |= mon_op_one_shot;
} else if (is_set(options.mon_ops, mon_op_one_shot)) {
if (args->output_ty != NULL) {
free(args->output_ty);
}
args->output_ty = strdup("text");
output_format = mon_output_plain;
} else {
/* Neither old nor new arguments were given, so set the default. */
if (args->output_ty != NULL) {
free(args->output_ty);
}
args->output_ty = strdup("console");
output_format = mon_output_console;
}
if (!retval) {
fprintf(stderr, "%s: %s\n", g_get_prgname(), error->message);
return clean_up(CRM_EX_USAGE);
}
}
/* If certain format options were specified, we want to set some extra
* options. We can just process these like they were given on the
* command line.
*/
g_clear_error(&error);
if (output_format == mon_output_plain) {
if (!pcmk__force_args(context, &error, "%s --output-fancy", g_get_prgname())) {
fprintf(stderr, "%s: %s\n", g_get_prgname(), error->message);
return clean_up(CRM_EX_USAGE);
}
} else if (output_format == mon_output_html) {
if (!pcmk__force_args(context, &error, "%s --output-meta-refresh %d",
g_get_prgname(), options.reconnect_msec/1000)) {
fprintf(stderr, "%s: %s\n", g_get_prgname(), error->message);
return clean_up(CRM_EX_USAGE);
}
} else if (output_format == mon_output_cgi) {
if (!pcmk__force_args(context, &error, "%s --output-cgi", g_get_prgname())) {
fprintf(stderr, "%s: %s\n", g_get_prgname(), error->message);
return clean_up(CRM_EX_USAGE);
}
} else if (output_format == mon_output_legacy_xml) {
output_format = mon_output_xml;
if (!pcmk__force_args(context, &error, "%s --output-legacy-xml", g_get_prgname())) {
fprintf(stderr, "%s: %s\n", g_get_prgname(), error->message);
return clean_up(CRM_EX_USAGE);
}
}
rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
if (rc != 0) {
fprintf(stderr, "Error creating output format %s: %s\n", args->output_ty, pcmk_strerror(rc));
return clean_up(CRM_EX_ERROR);
}
stonith__register_messages(out);
if (args->version) {
/* FIXME: For the moment, this won't do anything on XML or HTML formats
* because finish is not getting called. That's commented out in
* clean_up.
*/
out->version(out, false);
return clean_up(CRM_EX_OK);
}
if (args->quiet) {
show &= ~mon_show_times;
}
if (is_set(options.mon_ops, mon_op_watch_fencing)) {
options.mon_ops |= mon_op_fence_connect;
/* don't moan as fence_history_level == 1 is default */
options.fence_history_level = 0;
}
/* create the cib-object early to be able to do further
* decisions based on the cib-source
*/
cib = cib_new();
if (cib == NULL) {
rc = -EINVAL;
} else {
switch (cib->variant) {
case cib_native:
/* cib & fencing - everything available */
break;
case cib_file:
/* Don't try to connect to fencing as we
* either don't have a running cluster or
* the fencing-information would possibly
* not match the cib data from a file.
* As we don't expect cib-updates coming
* in enforce one-shot. */
options.fence_history_level = 0;
options.mon_ops |= mon_op_one_shot;
break;
case cib_remote:
/* updates coming in but no fencing */
options.fence_history_level = 0;
break;
case cib_undefined:
case cib_database:
default:
/* something is odd */
rc = -EINVAL;
crm_err("Invalid cib-source");
break;
}
}
switch (options.fence_history_level) {
case 3:
options.mon_ops |= mon_op_fence_full_history;
/* fall through to next lower level */
case 2:
show |= mon_show_fence_history;
/* fall through to next lower level */
case 1:
options.mon_ops |= mon_op_fence_history;
options.mon_ops |= mon_op_fence_connect;
break;
default:
break;
}
/* Extra sanity checks when in CGI mode */
if (output_format == mon_output_cgi) {
if (output_filename != NULL) {
fprintf(stderr, "CGI mode cannot be used with -h\n");
return clean_up(CRM_EX_USAGE);
} else if (cib && cib->variant == cib_file) {
fprintf(stderr, "CGI mode used with CIB file\n");
return clean_up(CRM_EX_USAGE);
} else if (options.external_agent != NULL) {
fprintf(stderr, "CGI mode cannot be used with --external-agent\n");
return clean_up(CRM_EX_USAGE);
} else if (options.daemonize == TRUE) {
fprintf(stderr, "CGI mode cannot be used with -d\n");
return clean_up(CRM_EX_USAGE);
}
}
/* XML output always prints everything */
if (output_format == mon_output_xml) {
show = mon_show_all;
options.mon_ops |= mon_op_print_timing;
}
if (is_set(options.mon_ops, mon_op_one_shot)) {
if (output_format == mon_output_console) {
output_format = mon_output_plain;
}
} else if (options.daemonize) {
if ((output_format == mon_output_console) || (output_format == mon_output_plain)) {
output_format = mon_output_none;
}
crm_enable_stderr(FALSE);
if ((output_format != mon_output_html)
&& !options.external_agent) {
printf ("Looks like you forgot to specify one or more of: "
"--as-html, --external-agent\n");
return clean_up(CRM_EX_USAGE);
}
if (cib) {
/* to be on the safe side don't have cib-object around
* when we are forking
*/
cib_delete(cib);
cib = NULL;
crm_make_daemon(crm_system_name, TRUE, options.pid_file);
cib = cib_new();
if (cib == NULL) {
rc = -EINVAL;
}
/* otherwise assume we've got the same cib-object we've just destroyed
* in our parent
*/
}
} else if (output_format == mon_output_console) {
#if CURSES_ENABLED
initscr();
cbreak();
noecho();
crm_enable_stderr(FALSE);
#else
options.mon_ops |= mon_op_one_shot;
output_format = mon_output_plain;
printf("Defaulting to one-shot mode\n");
printf("You need to have curses available at compile time to enable console mode\n");
#endif
}
crm_info("Starting %s", crm_system_name);
if (cib) {
do {
if (is_not_set(options.mon_ops, mon_op_one_shot)) {
print_as(output_format ,"Waiting until cluster is available on this node ...\n");
}
rc = cib_connect(is_not_set(options.mon_ops, mon_op_one_shot));
if (is_set(options.mon_ops, mon_op_one_shot)) {
break;
} else if (rc != pcmk_ok) {
sleep(options.reconnect_msec / 1000);
#if CURSES_ENABLED
if (output_format == mon_output_console) {
clear();
refresh();
}
#endif
} else {
if (output_format == mon_output_html) {
print_as(output_format, "Writing html to %s ...\n", output_filename);
}
}
} while (rc == -ENOTCONN);
}
if (rc != pcmk_ok) {
if (output_format == mon_output_monitor) {
printf("CLUSTER CRIT: Connection to cluster failed: %s\n",
pcmk_strerror(rc));
return clean_up(MON_STATUS_CRIT);
} else {
if (rc == -ENOTCONN) {
print_as(output_format ,"\nError: cluster is not available on this node\n");
} else {
print_as(output_format ,"\nConnection to cluster failed: %s\n",
pcmk_strerror(rc));
}
}
if (output_format == mon_output_console) {
sleep(2);
}
return clean_up(crm_errno2exit(rc));
}
if (is_set(options.mon_ops, mon_op_one_shot)) {
return clean_up(CRM_EX_OK);
}
mainloop = g_main_loop_new(NULL, FALSE);
mainloop_add_signal(SIGTERM, mon_shutdown);
mainloop_add_signal(SIGINT, mon_shutdown);
#if CURSES_ENABLED
if (output_format == mon_output_console) {
ncurses_winch_handler = crm_signal_handler(SIGWINCH, mon_winresize);
if (ncurses_winch_handler == SIG_DFL ||
ncurses_winch_handler == SIG_IGN || ncurses_winch_handler == SIG_ERR)
ncurses_winch_handler = NULL;
g_io_add_watch(g_io_channel_unix_new(STDIN_FILENO), G_IO_IN, detect_user_input, NULL);
}
#endif
refresh_trigger = mainloop_add_trigger(G_PRIORITY_LOW, mon_refresh_display, NULL);
g_main_loop_run(mainloop);
g_main_loop_unref(mainloop);
crm_info("Exiting %s", crm_system_name);
return clean_up(CRM_EX_OK);
}
#define mon_warn(output_format, mon_ops, fmt...) do { \
if (is_not_set(mon_ops, mon_op_has_warnings)) { \
print_as(output_format, "CLUSTER WARN:"); \
} else { \
print_as(output_format, ","); \
} \
print_as(output_format, fmt); \
mon_ops |= mon_op_has_warnings; \
} while(0)
/*!
* \internal
* \brief Print one-line status suitable for use with monitoring software
*
* \param[in] data_set Working set of CIB state
* \param[in] history List of stonith actions
*
* \note This function's output (and the return code when the program exits)
* should conform to https://www.monitoring-plugins.org/doc/guidelines.html
*/
static void
print_simple_status(pe_working_set_t * data_set, stonith_history_t *history,
unsigned int mon_ops, mon_output_format_t output_format)
{
GListPtr gIter = NULL;
int nodes_online = 0;
int nodes_standby = 0;
int nodes_maintenance = 0;
if (data_set->dc_node == NULL) {
mon_warn(output_format, mon_ops, " No DC");
}
for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
if (node->details->standby && node->details->online) {
nodes_standby++;
} else if (node->details->maintenance && node->details->online) {
nodes_maintenance++;
} else if (node->details->online) {
nodes_online++;
} else {
mon_warn(output_format, mon_ops, " offline node: %s", node->details->uname);
}
}
if (is_not_set(mon_ops, mon_op_has_warnings)) {
int nresources = count_resources(data_set, NULL);
print_as(output_format, "CLUSTER OK: %d node%s online", nodes_online, s_if_plural(nodes_online));
if (nodes_standby > 0) {
print_as(output_format, ", %d standby node%s", nodes_standby, s_if_plural(nodes_standby));
}
if (nodes_maintenance > 0) {
print_as(output_format, ", %d maintenance node%s", nodes_maintenance, s_if_plural(nodes_maintenance));
}
print_as(output_format, ", %d resource%s configured", nresources, s_if_plural(nresources));
}
print_as(output_format, "\n");
}
/*!
* \internal
* \brief Reduce the stonith-history
* for successful actions we keep the last of every action-type & target
* for failed actions we record as well who had failed
* for actions in progress we keep full track
*
* \param[in] history List of stonith actions
*
*/
static stonith_history_t *
reduce_stonith_history(stonith_history_t *history)
{
stonith_history_t *new = history, *hp, *np;
if (new) {
hp = new->next;
new->next = NULL;
while (hp) {
stonith_history_t *hp_next = hp->next;
hp->next = NULL;
for (np = new; ; np = np->next) {
if ((hp->state == st_done) || (hp->state == st_failed)) {
/* action not in progress */
if (safe_str_eq(hp->target, np->target) &&
safe_str_eq(hp->action, np->action) &&
(hp->state == np->state) &&
((hp->state == st_done) ||
safe_str_eq(hp->delegate, np->delegate))) {
/* purge older hp */
stonith_history_free(hp);
break;
}
}
if (!np->next) {
np->next = hp;
break;
}
}
hp = hp_next;
}
}
return new;
}
/*!
* \internal
* \brief Sort the stonith-history
* sort by competed most current on the top
* pending actions lacking a completed-stamp are gathered at the top
*
* \param[in] history List of stonith actions
*
*/
static stonith_history_t *
sort_stonith_history(stonith_history_t *history)
{
stonith_history_t *new = NULL, *pending = NULL, *hp, *np, *tmp;
for (hp = history; hp; ) {
tmp = hp->next;
if ((hp->state == st_done) || (hp->state == st_failed)) {
/* sort into new */
if ((!new) || (hp->completed > new->completed)) {
hp->next = new;
new = hp;
} else {
np = new;
do {
if ((!np->next) || (hp->completed > np->next->completed)) {
hp->next = np->next;
np->next = hp;
break;
}
np = np->next;
} while (1);
}
} else {
/* put into pending */
hp->next = pending;
pending = hp;
}
hp = tmp;
}
/* pending actions don't have a completed-stamp so make them go front */
if (pending) {
stonith_history_t *last_pending = pending;
while (last_pending->next) {
last_pending = last_pending->next;
}
last_pending->next = new;
new = pending;
}
return new;
}
static int
send_custom_trap(const char *node, const char *rsc, const char *task, int target_rc, int rc,
int status, const char *desc)
{
pid_t pid;
/*setenv needs chars, these are ints */
char *rc_s = crm_itoa(rc);
char *status_s = crm_itoa(status);
char *target_rc_s = crm_itoa(target_rc);
crm_debug("Sending external notification to '%s' via '%s'", options.external_recipient, options.external_agent);
if(rsc) {
setenv("CRM_notify_rsc", rsc, 1);
}
if (options.external_recipient) {
setenv("CRM_notify_recipient", options.external_recipient, 1);
}
setenv("CRM_notify_node", node, 1);
setenv("CRM_notify_task", task, 1);
setenv("CRM_notify_desc", desc, 1);
setenv("CRM_notify_rc", rc_s, 1);
setenv("CRM_notify_target_rc", target_rc_s, 1);
setenv("CRM_notify_status", status_s, 1);
pid = fork();
if (pid == -1) {
crm_perror(LOG_ERR, "notification fork() failed.");
}
if (pid == 0) {
/* crm_debug("notification: I am the child. Executing the nofitication program."); */
execl(options.external_agent, options.external_agent, NULL);
exit(CRM_EX_ERROR);
}
crm_trace("Finished running custom notification program '%s'.", options.external_agent);
free(target_rc_s);
free(status_s);
free(rc_s);
return 0;
}
static void
handle_rsc_op(xmlNode * xml, const char *node_id)
{
int rc = -1;
int status = -1;
int target_rc = -1;
gboolean notify = TRUE;
char *rsc = NULL;
char *task = NULL;
const char *desc = NULL;
const char *magic = NULL;
const char *id = NULL;
const char *node = NULL;
xmlNode *n = xml;
xmlNode * rsc_op = xml;
if(strcmp((const char*)xml->name, XML_LRM_TAG_RSC_OP) != 0) {
xmlNode *cIter;
for(cIter = xml->children; cIter; cIter = cIter->next) {
handle_rsc_op(cIter, node_id);
}
return;
}
id = crm_element_value(rsc_op, XML_LRM_ATTR_TASK_KEY);
if (id == NULL) {
/* Compatibility with <= 1.1.5 */
id = ID(rsc_op);
}
magic = crm_element_value(rsc_op, XML_ATTR_TRANSITION_MAGIC);
if (magic == NULL) {
/* non-change */
return;
}
if (!decode_transition_magic(magic, NULL, NULL, NULL, &status, &rc,
&target_rc)) {
crm_err("Invalid event %s detected for %s", magic, id);
return;
}
if (parse_op_key(id, &rsc, &task, NULL) == FALSE) {
crm_err("Invalid event detected for %s", id);
goto bail;
}
node = crm_element_value(rsc_op, XML_LRM_ATTR_TARGET);
while (n != NULL && safe_str_neq(XML_CIB_TAG_STATE, TYPE(n))) {
n = n->parent;
}
if(node == NULL && n) {
node = crm_element_value(n, XML_ATTR_UNAME);
}
if (node == NULL && n) {
node = ID(n);
}
if (node == NULL) {
node = node_id;
}
if (node == NULL) {
crm_err("No node detected for event %s (%s)", magic, id);
goto bail;
}
/* look up where we expected it to be? */
desc = pcmk_strerror(pcmk_ok);
if (status == PCMK_LRM_OP_DONE && target_rc == rc) {
crm_notice("%s of %s on %s completed: %s", task, rsc, node, desc);
if (rc == PCMK_OCF_NOT_RUNNING) {
notify = FALSE;
}
} else if (status == PCMK_LRM_OP_DONE) {
desc = services_ocf_exitcode_str(rc);
crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc);
} else {
desc = services_lrm_status_str(status);
crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc);
}
if (notify && options.external_agent) {
send_custom_trap(node, rsc, task, target_rc, rc, status, desc);
}
bail:
free(rsc);
free(task);
}
static gboolean
mon_trigger_refresh(gpointer user_data)
{
mainloop_set_trigger(refresh_trigger);
return FALSE;
}
#define NODE_PATT "/lrm[@id="
static char *
get_node_from_xpath(const char *xpath)
{
char *nodeid = NULL;
char *tmp = strstr(xpath, NODE_PATT);
if(tmp) {
tmp += strlen(NODE_PATT);
tmp += 1;
nodeid = strdup(tmp);
tmp = strstr(nodeid, "\'");
CRM_ASSERT(tmp);
tmp[0] = 0;
}
return nodeid;
}
static void
crm_diff_update_v2(const char *event, xmlNode * msg)
{
xmlNode *change = NULL;
xmlNode *diff = get_message_xml(msg, F_CIB_UPDATE_RESULT);
for (change = __xml_first_child(diff); change != NULL; change = __xml_next(change)) {
const char *name = NULL;
const char *op = crm_element_value(change, XML_DIFF_OP);
const char *xpath = crm_element_value(change, XML_DIFF_PATH);
xmlNode *match = NULL;
const char *node = NULL;
if(op == NULL) {
continue;
} else if(strcmp(op, "create") == 0) {
match = change->children;
} else if(strcmp(op, "move") == 0) {
continue;
} else if(strcmp(op, "delete") == 0) {
continue;
} else if(strcmp(op, "modify") == 0) {
match = first_named_child(change, XML_DIFF_RESULT);
if(match) {
match = match->children;
}
}
if(match) {
name = (const char *)match->name;
}
crm_trace("Handling %s operation for %s %p, %s", op, xpath, match, name);
if(xpath == NULL) {
/* Version field, ignore */
} else if(name == NULL) {
crm_debug("No result for %s operation to %s", op, xpath);
CRM_ASSERT(strcmp(op, "delete") == 0 || strcmp(op, "move") == 0);
} else if(strcmp(name, XML_TAG_CIB) == 0) {
xmlNode *state = NULL;
xmlNode *status = first_named_child(match, XML_CIB_TAG_STATUS);
for (state = __xml_first_child_element(status); state != NULL;
state = __xml_next_element(state)) {
node = crm_element_value(state, XML_ATTR_UNAME);
if (node == NULL) {
node = ID(state);
}
handle_rsc_op(state, node);
}
} else if(strcmp(name, XML_CIB_TAG_STATUS) == 0) {
xmlNode *state = NULL;
for (state = __xml_first_child_element(match); state != NULL;
state = __xml_next_element(state)) {
node = crm_element_value(state, XML_ATTR_UNAME);
if (node == NULL) {
node = ID(state);
}
handle_rsc_op(state, node);
}
} else if(strcmp(name, XML_CIB_TAG_STATE) == 0) {
node = crm_element_value(match, XML_ATTR_UNAME);
if (node == NULL) {
node = ID(match);
}
handle_rsc_op(match, node);
} else if(strcmp(name, XML_CIB_TAG_LRM) == 0) {
node = ID(match);
handle_rsc_op(match, node);
} else if(strcmp(name, XML_LRM_TAG_RESOURCES) == 0) {
char *local_node = get_node_from_xpath(xpath);
handle_rsc_op(match, local_node);
free(local_node);
} else if(strcmp(name, XML_LRM_TAG_RESOURCE) == 0) {
char *local_node = get_node_from_xpath(xpath);
handle_rsc_op(match, local_node);
free(local_node);
} else if(strcmp(name, XML_LRM_TAG_RSC_OP) == 0) {
char *local_node = get_node_from_xpath(xpath);
handle_rsc_op(match, local_node);
free(local_node);
} else {
crm_trace("Ignoring %s operation for %s %p, %s", op, xpath, match, name);
}
}
}
static void
crm_diff_update_v1(const char *event, xmlNode * msg)
{
/* Process operation updates */
xmlXPathObject *xpathObj = xpath_search(msg,
"//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED
"//" XML_LRM_TAG_RSC_OP);
int lpc = 0, max = numXpathResults(xpathObj);
for (lpc = 0; lpc < max; lpc++) {
xmlNode *rsc_op = getXpathResult(xpathObj, lpc);
handle_rsc_op(rsc_op, NULL);
}
freeXpathObject(xpathObj);
}
static void
crm_diff_update(const char *event, xmlNode * msg)
{
int rc = -1;
static bool stale = FALSE;
gboolean cib_updated = FALSE;
xmlNode *diff = get_message_xml(msg, F_CIB_UPDATE_RESULT);
print_dot(output_format);
if (current_cib != NULL) {
rc = xml_apply_patchset(current_cib, diff, TRUE);
switch (rc) {
case -pcmk_err_diff_resync:
case -pcmk_err_diff_failed:
crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc);
free_xml(current_cib); current_cib = NULL;
break;
case pcmk_ok:
cib_updated = TRUE;
break;
default:
crm_notice("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc);
free_xml(current_cib); current_cib = NULL;
}
}
if (current_cib == NULL) {
crm_trace("Re-requesting the full cib");
cib->cmds->query(cib, NULL, ¤t_cib, cib_scope_local | cib_sync_call);
}
if (options.external_agent) {
int format = 0;
crm_element_value_int(diff, "format", &format);
switch(format) {
case 1:
crm_diff_update_v1(event, msg);
break;
case 2:
crm_diff_update_v2(event, msg);
break;
default:
crm_err("Unknown patch format: %d", format);
}
}
if (current_cib == NULL) {
if(!stale) {
print_as(output_format, "--- Stale data ---");
}
stale = TRUE;
return;
}
stale = FALSE;
kick_refresh(cib_updated);
}
static gboolean
mon_refresh_display(gpointer user_data)
{
xmlNode *cib_copy = copy_xml(current_cib);
stonith_history_t *stonith_history = NULL;
/* stdout for everything except the HTML case, which does a bunch of file
* renaming. We'll handle changing stream in print_html_status.
*/
mon_state_t state = { .stream = stdout, .output_format = output_format, .out = out };
last_refresh = time(NULL);
if (cli_config_update(&cib_copy, NULL, FALSE) == FALSE) {
if (cib) {
cib->cmds->signoff(cib);
}
print_as(output_format, "Upgrade failed: %s", pcmk_strerror(-pcmk_err_schema_validation));
if (output_format == mon_output_console) {
sleep(2);
}
clean_up(CRM_EX_CONFIG);
return FALSE;
}
/* get the stonith-history if there is evidence we need it
*/
while (is_set(options.mon_ops, mon_op_fence_history)) {
if (st != NULL) {
if (st->cmds->history(st, st_opt_sync_call, NULL, &stonith_history, 120)) {
fprintf(stderr, "Critical: Unable to get stonith-history\n");
mon_cib_connection_destroy(NULL);
} else {
stonith_history = sort_stonith_history(stonith_history);
if (is_not_set(options.mon_ops, mon_op_fence_full_history) && output_format != mon_output_xml) {
stonith_history = reduce_stonith_history(stonith_history);
}
break; /* all other cases are errors */
}
} else {
fprintf(stderr, "Critical: No stonith-API\n");
}
free_xml(cib_copy);
print_as(output_format, "Reading stonith-history failed");
if (output_format == mon_output_console) {
sleep(2);
}
return FALSE;
}
if (mon_data_set == NULL) {
mon_data_set = pe_new_working_set();
CRM_ASSERT(mon_data_set != NULL);
}
mon_data_set->input = cib_copy;
cluster_status(mon_data_set);
/* Unpack constraints if any section will need them
* (tickets may be referenced in constraints but not granted yet,
* and bans need negative location constraints) */
if (show & (mon_show_bans | mon_show_tickets)) {
xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS,
mon_data_set->input);
unpack_constraints(cib_constraints, mon_data_set);
}
switch (output_format) {
case mon_output_html:
case mon_output_cgi:
if (print_html_status(&state, mon_data_set, output_filename,
stonith_history, options.mon_ops, show,
print_neg_location_prefix, options.reconnect_msec) != 0) {
fprintf(stderr, "Critical: Unable to output html file\n");
clean_up(CRM_EX_CANTCREAT);
return FALSE;
}
break;
case mon_output_legacy_xml:
case mon_output_xml:
print_xml_status(&state, mon_data_set, stonith_history,
options.mon_ops, show, print_neg_location_prefix);
break;
case mon_output_monitor:
print_simple_status(mon_data_set, stonith_history, options.mon_ops, output_format);
if (is_set(options.mon_ops, mon_op_has_warnings)) {
clean_up(MON_STATUS_WARN);
return FALSE;
}
break;
case mon_output_plain:
case mon_output_console:
print_status(&state, mon_data_set, stonith_history, options.mon_ops,
show, print_neg_location_prefix);
break;
case mon_output_unset:
case mon_output_none:
break;
}
stonith_history_free(stonith_history);
stonith_history = NULL;
pe_reset_working_set(mon_data_set);
return TRUE;
}
static void
mon_st_callback_event(stonith_t * st, stonith_event_t * e)
{
if (st->state == stonith_disconnected) {
/* disconnect cib as well and have everything reconnect */
mon_cib_connection_destroy(NULL);
} else if (options.external_agent) {
char *desc = crm_strdup_printf("Operation %s requested by %s for peer %s: %s (ref=%s)",
e->operation, e->origin, e->target, pcmk_strerror(e->result),
e->id);
send_custom_trap(e->target, NULL, e->operation, pcmk_ok, e->result, 0, desc);
free(desc);
}
}
static void
kick_refresh(gboolean data_updated)
{
static int updates = 0;
- long now = time(NULL);
+ time_t now = time(NULL);
if (data_updated) {
updates++;
}
if(refresh_timer == NULL) {
refresh_timer = mainloop_timer_add("refresh", 2000, FALSE, mon_trigger_refresh, NULL);
}
/* Refresh
* - immediately if the last update was more than 5s ago
* - every 10 cib-updates
* - at most 2s after the last update
*/
if ((now - last_refresh) > (options.reconnect_msec / 1000)) {
mainloop_set_trigger(refresh_trigger);
mainloop_timer_stop(refresh_timer);
updates = 0;
} else if(updates >= 10) {
mainloop_set_trigger(refresh_trigger);
mainloop_timer_stop(refresh_timer);
updates = 0;
} else {
mainloop_timer_start(refresh_timer);
}
}
static void
mon_st_callback_display(stonith_t * st, stonith_event_t * e)
{
if (st->state == stonith_disconnected) {
/* disconnect cib as well and have everything reconnect */
mon_cib_connection_destroy(NULL);
} else {
print_dot(output_format);
kick_refresh(TRUE);
}
}
static void
clean_up_connections(void)
{
if (cib != NULL) {
cib->cmds->signoff(cib);
cib_delete(cib);
cib = NULL;
}
if (st != NULL) {
if (st->state != stonith_disconnected) {
st->cmds->remove_notification(st, T_STONITH_NOTIFY_DISCONNECT);
st->cmds->remove_notification(st, T_STONITH_NOTIFY_FENCE);
st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY);
st->cmds->disconnect(st);
}
stonith_api_delete(st);
st = NULL;
}
}
/*
* De-init ncurses, disconnect from the CIB manager, disconnect fencing,
* deallocate memory and show usage-message if requested.
*
* We don't actually return, but nominally returning crm_exit_t allows a usage
* like "return clean_up(exit_code);" which helps static analysis understand the
* code flow.
*/
static crm_exit_t
clean_up(crm_exit_t exit_code)
{
#if CURSES_ENABLED
if (output_format == mon_output_console) {
output_format = mon_output_plain;
echo();
nocbreak();
endwin();
}
#endif
clean_up_connections();
free(output_filename);
free(options.pid_file);
pe_free_working_set(mon_data_set);
mon_data_set = NULL;
if (exit_code == CRM_EX_USAGE) {
if (output_format == mon_output_cgi) {
fprintf(stdout, "Content-Type: text/plain\n"
"Status: 500\n\n");
} else {
fprintf(stderr, "%s", g_option_context_get_help(context, TRUE, NULL));
}
}
g_option_context_free(context);
if (out != NULL) {
/* FIXME: When we are ready to enable formatted output, uncomment
* the following line:
*/
/* out->finish(out, exit_code, true, NULL); */
pcmk__output_free(out);
}
crm_exit(exit_code);
}
diff --git a/tools/crm_mon_print.c b/tools/crm_mon_print.c
index cd230c6fa6..9185787c65 100644
--- a/tools/crm_mon_print.c
+++ b/tools/crm_mon_print.c
@@ -1,2604 +1,2598 @@
/*
* Copyright 2019 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include "crm_mon.h"
static void print_nvpair(mon_state_t *state, const char *name, const char *value,
const char *units, time_t epoch_time);
static void print_node_start(mon_state_t *state, node_t *node, unsigned int mon_ops);
static void print_node_end(mon_state_t *state);
static void print_resources_heading(mon_state_t *state, unsigned int mon_ops);
static void print_resources_closing(mon_state_t *state, gboolean printed_heading,
unsigned int mon_ops);
static void print_resources(mon_state_t *state, pe_working_set_t *data_set,
int print_opts, unsigned int mon_ops);
static void print_rsc_history_start(mon_state_t *state, pe_working_set_t *data_set,
node_t *node, resource_t *rsc, const char *rsc_id,
gboolean all);
static void print_rsc_history_end(mon_state_t *state);
static void print_op_history(mon_state_t *state, pe_working_set_t *data_set,
node_t *node, xmlNode *xml_op, const char *task,
const char *interval_ms_s, int rc, unsigned int mon_ops);
static void print_rsc_history(mon_state_t *state, pe_working_set_t *data_set,
node_t *node, xmlNode *rsc_entry, gboolean operations,
unsigned int mon_ops);
static void print_node_history(mon_state_t *state, pe_working_set_t *data_set,
xmlNode *node_state, gboolean operations,
unsigned int mon_ops);
static gboolean print_attr_msg(mon_state_t *state, node_t * node, GListPtr rsc_list,
const char *attrname, const char *attrvalue);
static void print_node_attribute(gpointer name, gpointer user_data);
static void print_node_summary(mon_state_t *state, pe_working_set_t * data_set,
gboolean operations, unsigned int mon_ops);
static void print_ticket(gpointer name, gpointer value, gpointer user_data);
static void print_cluster_tickets(mon_state_t *state, pe_working_set_t * data_set);
static void print_ban(mon_state_t *state, pe_node_t *node, pe__location_t *location,
unsigned int mon_ops);
static void print_neg_locations(mon_state_t *state, pe_working_set_t *data_set,
unsigned int mon_ops, const char *prefix);
static void print_node_attributes(mon_state_t *state, pe_working_set_t *data_set,
unsigned int mon_ops);
static void print_cluster_summary_header(mon_state_t *state);
static void print_cluster_summary_footer(mon_state_t *state);
static void print_cluster_times(mon_state_t *state, pe_working_set_t *data_set);
static void print_cluster_stack(mon_state_t *state, const char *stack_s);
static void print_cluster_dc(mon_state_t *state, pe_working_set_t *data_set,
unsigned int mon_ops);
static void print_cluster_counts(mon_state_t *state, pe_working_set_t *data_set,
const char *stack_s);
static void print_cluster_options(mon_state_t *state, pe_working_set_t *data_set);
static void print_cluster_summary(mon_state_t *state, pe_working_set_t *data_set,
unsigned int mon_ops, unsigned int show);
static void print_failed_action(mon_state_t *state, xmlNode *xml_op);
static void print_failed_actions(mon_state_t *state, pe_working_set_t *data_set);
static void print_stonith_action(mon_state_t *state, stonith_history_t *event, unsigned int mon_ops, stonith_history_t *top_history);
static void print_failed_stonith_actions(mon_state_t *state, stonith_history_t *history, unsigned int mon_ops);
static void print_stonith_pending(mon_state_t *state, stonith_history_t *history, unsigned int mon_ops);
static void print_stonith_history(mon_state_t *state, stonith_history_t *history, unsigned int mon_ops);
/*!
* \internal
* \brief Print a [name]=[value][units] pair, optionally using time string
*
* \param[in] stream File stream to display output to
* \param[in] name Name to display
* \param[in] value Value to display (or NULL to convert time instead)
* \param[in] units Units to display (or NULL for no units)
* \param[in] epoch_time Epoch time to convert if value is NULL
*/
static void
print_nvpair(mon_state_t *state, const char *name, const char *value,
const char *units, time_t epoch_time)
{
/* print name= */
switch (state->output_format) {
case mon_output_plain:
case mon_output_console:
print_as(state->output_format, " %s=", name);
break;
case mon_output_html:
case mon_output_cgi:
case mon_output_xml:
fprintf(state->stream, " %s=", name);
break;
default:
break;
}
/* If we have a value (and optionally units), print it */
if (value) {
switch (state->output_format) {
case mon_output_plain:
case mon_output_console:
print_as(state->output_format, "%s%s", value, (units? units : ""));
break;
case mon_output_html:
case mon_output_cgi:
fprintf(state->stream, "%s%s", value, (units? units : ""));
break;
case mon_output_xml:
fprintf(state->stream, "\"%s%s\"", value, (units? units : ""));
break;
default:
break;
}
/* Otherwise print user-friendly time string */
} else {
static char empty_str[] = "";
char *c, *date_str = asctime(localtime(&epoch_time));
for (c = (date_str != NULL) ? date_str : empty_str; *c != '\0'; ++c) {
if (*c == '\n') {
*c = '\0';
break;
}
}
switch (state->output_format) {
case mon_output_plain:
case mon_output_console:
print_as(state->output_format, "'%s'", date_str);
break;
case mon_output_html:
case mon_output_cgi:
case mon_output_xml:
fprintf(state->stream, "\"%s\"", date_str);
break;
default:
break;
}
}
}
/*!
* \internal
* \brief Print whatever is needed to start a node section
*
* \param[in] stream File stream to display output to
* \param[in] node Node to print
*/
static void
print_node_start(mon_state_t *state, node_t *node, unsigned int mon_ops)
{
char *node_name;
switch (state->output_format) {
case mon_output_plain:
case mon_output_console:
node_name = get_node_display_name(node, mon_ops);
print_as(state->output_format, "* Node %s:\n", node_name);
free(node_name);
break;
case mon_output_html:
case mon_output_cgi:
node_name = get_node_display_name(node, mon_ops);
fprintf(state->stream, " Node: %s
\n \n", node_name);
free(node_name);
break;
case mon_output_xml:
fprintf(state->stream, " \n", node->details->uname);
break;
default:
break;
}
}
/*!
* \internal
* \brief Print whatever is needed to end a node section
*
* \param[in] stream File stream to display output to
*/
static void
print_node_end(mon_state_t *state)
{
switch (state->output_format) {
case mon_output_html:
case mon_output_cgi:
fprintf(state->stream, "
\n");
break;
case mon_output_xml:
fprintf(state->stream, " \n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print resources section heading appropriate to options
*
* \param[in] stream File stream to display output to
*/
static void
print_resources_heading(mon_state_t *state, unsigned int mon_ops)
{
const char *heading;
if (is_set(mon_ops, mon_op_group_by_node)) {
/* Active resources have already been printed by node */
heading = is_set(mon_ops, mon_op_inactive_resources) ? "Inactive resources" : NULL;
} else if (is_set(mon_ops, mon_op_inactive_resources)) {
heading = "Full list of resources";
} else {
heading = "Active resources";
}
/* Print section heading */
switch (state->output_format) {
case mon_output_plain:
case mon_output_console:
print_as(state->output_format, "\n%s:\n\n", heading);
break;
case mon_output_html:
case mon_output_cgi:
fprintf(state->stream, "
\n %s
\n", heading);
break;
case mon_output_xml:
fprintf(state->stream, " \n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print whatever resource section closing is appropriate
*
* \param[in] stream File stream to display output to
*/
static void
print_resources_closing(mon_state_t *state, gboolean printed_heading,
unsigned int mon_ops)
{
const char *heading;
/* What type of resources we did or did not display */
if (is_set(mon_ops, mon_op_group_by_node)) {
heading = "inactive ";
} else if (is_set(mon_ops, mon_op_inactive_resources)) {
heading = "";
} else {
heading = "active ";
}
switch (state->output_format) {
case mon_output_plain:
case mon_output_console:
if (!printed_heading) {
print_as(state->output_format, "\nNo %sresources\n\n", heading);
}
break;
case mon_output_html:
case mon_output_cgi:
if (!printed_heading) {
fprintf(state->stream, "
\n No %sresources
\n", heading);
}
break;
case mon_output_xml:
fprintf(state->stream, " %s\n",
(printed_heading? "" : ""));
break;
default:
break;
}
}
/*!
* \internal
* \brief Print whatever resource section(s) are appropriate
*
* \param[in] stream File stream to display output to
* \param[in] data_set Cluster state to display
* \param[in] print_opts Bitmask of pe_print_options
*/
static void
print_resources(mon_state_t *state, pe_working_set_t *data_set,
int print_opts, unsigned int mon_ops)
{
GListPtr rsc_iter;
const char *prefix = NULL;
gboolean printed_heading = FALSE;
gboolean brief_output = is_set(mon_ops, mon_op_print_brief);
/* If we already showed active resources by node, and
* we're not showing inactive resources, we have nothing to do
*/
if (is_set(mon_ops, mon_op_group_by_node) && is_not_set(mon_ops, mon_op_inactive_resources)) {
return;
}
/* XML uses an indent, and ignores brief option for resources */
if (state->output_format == mon_output_xml) {
prefix = " ";
brief_output = FALSE;
}
/* If we haven't already printed resources grouped by node,
* and brief output was requested, print resource summary */
if (brief_output && is_not_set(mon_ops, mon_op_group_by_node)) {
print_resources_heading(state, mon_ops);
printed_heading = TRUE;
print_rscs_brief(data_set->resources, NULL, print_opts, state->stream,
is_set(mon_ops, mon_op_inactive_resources));
}
/* For each resource, display it if appropriate */
for (rsc_iter = data_set->resources; rsc_iter != NULL; rsc_iter = rsc_iter->next) {
resource_t *rsc = (resource_t *) rsc_iter->data;
/* Complex resources may have some sub-resources active and some inactive */
gboolean is_active = rsc->fns->active(rsc, TRUE);
gboolean partially_active = rsc->fns->active(rsc, FALSE);
/* Skip inactive orphans (deleted but still in CIB) */
if (is_set(rsc->flags, pe_rsc_orphan) && !is_active) {
continue;
/* Skip active resources if we already displayed them by node */
} else if (is_set(mon_ops, mon_op_group_by_node)) {
if (is_active) {
continue;
}
/* Skip primitives already counted in a brief summary */
} else if (brief_output && (rsc->variant == pe_native)) {
continue;
/* Skip resources that aren't at least partially active,
* unless we're displaying inactive resources
*/
} else if (!partially_active && is_not_set(mon_ops, mon_op_inactive_resources)) {
continue;
}
/* Print this resource */
if (printed_heading == FALSE) {
print_resources_heading(state, mon_ops);
printed_heading = TRUE;
}
rsc->fns->print(rsc, prefix, print_opts, state->stream);
}
print_resources_closing(state, printed_heading, mon_ops);
}
/*!
* \internal
* \brief Print heading for resource history
*
* \param[in] stream File stream to display output to
* \param[in] data_set Current state of CIB
* \param[in] node Node that ran this resource
* \param[in] rsc Resource to print
* \param[in] rsc_id ID of resource to print
* \param[in] all Whether to print every resource or just failed ones
*/
static void
print_rsc_history_start(mon_state_t *state, pe_working_set_t *data_set,
node_t *node, resource_t *rsc, const char *rsc_id,
gboolean all)
{
time_t last_failure = 0;
int failcount = rsc?
pe_get_failcount(node, rsc, &last_failure, pe_fc_default,
NULL, data_set)
: 0;
if (!all && !failcount && (last_failure <= 0)) {
return;
}
/* Print resource ID */
switch (state->output_format) {
case mon_output_plain:
case mon_output_console:
print_as(state->output_format, " %s:", rsc_id);
break;
case mon_output_html:
case mon_output_cgi:
fprintf(state->stream, " %s:", rsc_id);
break;
case mon_output_xml:
fprintf(state->stream, " output_format) {
case mon_output_plain:
case mon_output_console:
print_as(state->output_format, " orphan");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(state->stream, " orphan");
break;
case mon_output_xml:
fprintf(state->stream, " orphan=\"true\"");
break;
default:
break;
}
/* If resource is not an orphan, print some details */
} else if (all || failcount || (last_failure > 0)) {
/* Print migration threshold */
switch (state->output_format) {
case mon_output_plain:
case mon_output_console:
print_as(state->output_format, " migration-threshold=%d", rsc->migration_threshold);
break;
case mon_output_html:
case mon_output_cgi:
fprintf(state->stream, " migration-threshold=%d", rsc->migration_threshold);
break;
case mon_output_xml:
fprintf(state->stream, " orphan=\"false\" migration-threshold=\"%d\"",
rsc->migration_threshold);
break;
default:
break;
}
/* Print fail count if any */
if (failcount > 0) {
switch (state->output_format) {
case mon_output_plain:
case mon_output_console:
print_as(state->output_format, " " CRM_FAIL_COUNT_PREFIX "=%d", failcount);
break;
case mon_output_html:
case mon_output_cgi:
fprintf(state->stream, " " CRM_FAIL_COUNT_PREFIX "=%d", failcount);
break;
case mon_output_xml:
fprintf(state->stream, " " CRM_FAIL_COUNT_PREFIX "=\"%d\"",
failcount);
break;
default:
break;
}
}
/* Print last failure time if any */
if (last_failure > 0) {
print_nvpair(state, CRM_LAST_FAILURE_PREFIX, NULL, NULL, last_failure);
}
}
/* End the heading */
switch (state->output_format) {
case mon_output_plain:
case mon_output_console:
print_as(state->output_format, "\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(state->stream, "\n \n");
break;
case mon_output_xml:
fprintf(state->stream, ">\n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print closing for resource history
*
* \param[in] stream File stream to display output to
*/
static void
print_rsc_history_end(mon_state_t *state)
{
switch (state->output_format) {
case mon_output_html:
case mon_output_cgi:
fprintf(state->stream, "
\n \n");
break;
case mon_output_xml:
fprintf(state->stream, " \n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print operation history
*
* \param[in] stream File stream to display output to
* \param[in] data_set Current state of CIB
* \param[in] node Node this operation is for
* \param[in] xml_op Root of XML tree describing this operation
* \param[in] task Task parsed from this operation's XML
* \param[in] interval_ms_s Interval parsed from this operation's XML
* \param[in] rc Return code parsed from this operation's XML
*/
static void
print_op_history(mon_state_t *state, pe_working_set_t *data_set, node_t *node,
xmlNode *xml_op, const char *task, const char *interval_ms_s,
int rc, unsigned int mon_ops)
{
const char *value = NULL;
const char *call = crm_element_value(xml_op, XML_LRM_ATTR_CALLID);
/* Begin the operation description */
switch (state->output_format) {
case mon_output_plain:
case mon_output_console:
print_as(state->output_format, " + (%s) %s:", call, task);
break;
case mon_output_html:
case mon_output_cgi:
fprintf(state->stream, " (%s) %s:", call, task);
break;
case mon_output_xml:
fprintf(state->stream, " 0) {
- print_nvpair(state, attr, NULL, NULL, int_value);
- }
+ if ((crm_element_value_epoch(xml_op, attr, &epoch) == pcmk_ok)
+ && (epoch > 0)) {
+ print_nvpair(state, attr, NULL, NULL, epoch);
}
// last-run is deprecated
attr = XML_RSC_OP_LAST_RUN;
- value = crm_element_value(xml_op, attr);
- if (value) {
- int_value = crm_parse_int(value, NULL);
- if (int_value > 0) {
- print_nvpair(state, attr, NULL, NULL, int_value);
- }
+ if ((crm_element_value_epoch(xml_op, attr, &epoch) == pcmk_ok)
+ && (epoch > 0)) {
+ print_nvpair(state, attr, NULL, NULL, epoch);
}
attr = XML_RSC_OP_T_EXEC;
value = crm_element_value(xml_op, attr);
if (value) {
print_nvpair(state, attr, value, "ms", 0);
}
attr = XML_RSC_OP_T_QUEUE;
value = crm_element_value(xml_op, attr);
if (value) {
print_nvpair(state, attr, value, "ms", 0);
}
}
/* End the operation description */
switch (state->output_format) {
case mon_output_plain:
case mon_output_console:
print_as(state->output_format, " rc=%d (%s)\n", rc, services_ocf_exitcode_str(rc));
break;
case mon_output_html:
case mon_output_cgi:
fprintf(state->stream, " rc=%d (%s)\n", rc, services_ocf_exitcode_str(rc));
break;
case mon_output_xml:
fprintf(state->stream, " rc=\"%d\" rc_text=\"%s\" />\n", rc, services_ocf_exitcode_str(rc));
break;
default:
break;
}
}
/*!
* \internal
* \brief Print resource operation/failure history
*
* \param[in] stream File stream to display output to
* \param[in] data_set Current state of CIB
* \param[in] node Node that ran this resource
* \param[in] rsc_entry Root of XML tree describing resource status
* \param[in] operations Whether to print operations or just failcounts
*/
static void
print_rsc_history(mon_state_t *state, pe_working_set_t *data_set, node_t *node,
xmlNode *rsc_entry, gboolean operations, unsigned int mon_ops)
{
GListPtr gIter = NULL;
GListPtr op_list = NULL;
gboolean printed = FALSE;
const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
resource_t *rsc = pe_find_resource(data_set->resources, rsc_id);
xmlNode *rsc_op = NULL;
/* If we're not showing operations, just print the resource failure summary */
if (operations == FALSE) {
print_rsc_history_start(state, data_set, node, rsc, rsc_id, FALSE);
print_rsc_history_end(state);
return;
}
/* Create a list of this resource's operations */
for (rsc_op = __xml_first_child_element(rsc_entry); rsc_op != NULL;
rsc_op = __xml_next_element(rsc_op)) {
if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) {
op_list = g_list_append(op_list, rsc_op);
}
}
op_list = g_list_sort(op_list, sort_op_by_callid);
/* Print each operation */
for (gIter = op_list; gIter != NULL; gIter = gIter->next) {
xmlNode *xml_op = (xmlNode *) gIter->data;
const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
const char *interval_ms_s = crm_element_value(xml_op,
XML_LRM_ATTR_INTERVAL_MS);
const char *op_rc = crm_element_value(xml_op, XML_LRM_ATTR_RC);
int rc = crm_parse_int(op_rc, "0");
/* Display 0-interval monitors as "probe" */
if (safe_str_eq(task, CRMD_ACTION_STATUS)
&& ((interval_ms_s == NULL) || safe_str_eq(interval_ms_s, "0"))) {
task = "probe";
}
/* Ignore notifies and some probes */
if (safe_str_eq(task, CRMD_ACTION_NOTIFY) || (safe_str_eq(task, "probe") && (rc == 7))) {
continue;
}
/* If this is the first printed operation, print heading for resource */
if (printed == FALSE) {
printed = TRUE;
print_rsc_history_start(state, data_set, node, rsc, rsc_id, TRUE);
}
/* Print the operation */
print_op_history(state, data_set, node, xml_op, task, interval_ms_s, rc, mon_ops);
}
/* Free the list we created (no need to free the individual items) */
g_list_free(op_list);
/* If we printed anything, close the resource */
if (printed) {
print_rsc_history_end(state);
}
}
/*!
* \internal
* \brief Print node operation/failure history
*
* \param[in] stream File stream to display output to
* \param[in] data_set Current state of CIB
* \param[in] node_state Root of XML tree describing node status
* \param[in] operations Whether to print operations or just failcounts
*/
static void
print_node_history(mon_state_t *state, pe_working_set_t *data_set,
xmlNode *node_state, gboolean operations,
unsigned int mon_ops)
{
node_t *node = pe_find_node_id(data_set->nodes, ID(node_state));
xmlNode *lrm_rsc = NULL;
xmlNode *rsc_entry = NULL;
if (node && node->details && node->details->online) {
print_node_start(state, node, mon_ops);
lrm_rsc = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE);
lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE);
/* Print history of each of the node's resources */
for (rsc_entry = __xml_first_child_element(lrm_rsc); rsc_entry != NULL;
rsc_entry = __xml_next_element(rsc_entry)) {
if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) {
print_rsc_history(state, data_set, node, rsc_entry, operations, mon_ops);
}
}
print_node_end(state);
}
}
/*!
* \internal
* \brief Print extended information about an attribute if appropriate
*
* \param[in] data_set Working set of CIB state
*
* \return TRUE if extended information was printed, FALSE otherwise
* \note Currently, extended information is only supported for ping/pingd
* resources, for which a message will be printed if connectivity is lost
* or degraded.
*/
static gboolean
print_attr_msg(mon_state_t *state, node_t * node, GListPtr rsc_list,
const char *attrname, const char *attrvalue)
{
GListPtr gIter = NULL;
for (gIter = rsc_list; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
const char *type = g_hash_table_lookup(rsc->meta, "type");
if (rsc->children != NULL) {
if (print_attr_msg(state, node, rsc->children, attrname, attrvalue)) {
return TRUE;
}
}
if (safe_str_eq(type, "ping") || safe_str_eq(type, "pingd")) {
const char *name = g_hash_table_lookup(rsc->parameters, "name");
if (name == NULL) {
name = "pingd";
}
/* To identify the resource with the attribute name. */
if (safe_str_eq(name, attrname)) {
int host_list_num = 0;
int expected_score = 0;
int value = crm_parse_int(attrvalue, "0");
const char *hosts = g_hash_table_lookup(rsc->parameters, "host_list");
const char *multiplier = g_hash_table_lookup(rsc->parameters, "multiplier");
if(hosts) {
char **host_list = g_strsplit(hosts, " ", 0);
host_list_num = g_strv_length(host_list);
g_strfreev(host_list);
}
/* pingd multiplier is the same as the default value. */
expected_score = host_list_num * crm_parse_int(multiplier, "1");
switch (state->output_format) {
case mon_output_plain:
case mon_output_console:
if (value <= 0) {
print_as(state->output_format, "\t: Connectivity is lost");
} else if (value < expected_score) {
print_as(state->output_format, "\t: Connectivity is degraded (Expected=%d)", expected_score);
}
break;
case mon_output_html:
case mon_output_cgi:
if (value <= 0) {
fprintf(state->stream, " (connectivity is lost)");
} else if (value < expected_score) {
fprintf(state->stream, " (connectivity is degraded -- expected %d)",
expected_score);
}
break;
case mon_output_xml:
fprintf(state->stream, " expected=\"%d\"", expected_score);
break;
default:
break;
}
return TRUE;
}
}
}
return FALSE;
}
/* structure for passing multiple user data to g_list_foreach() */
struct mon_attr_data {
mon_state_t *state;
node_t *node;
};
static void
print_node_attribute(gpointer name, gpointer user_data)
{
const char *value = NULL;
struct mon_attr_data *data = (struct mon_attr_data *) user_data;
value = pe_node_attribute_raw(data->node, name);
/* Print attribute name and value */
switch (data->state->output_format) {
case mon_output_plain:
case mon_output_console:
print_as(data->state->output_format, " + %-32s\t: %-10s", (char *)name, value);
break;
case mon_output_html:
case mon_output_cgi:
fprintf(data->state->stream, " %s: %s",
(char *)name, value);
break;
case mon_output_xml:
fprintf(data->state->stream,
" state, data->node, data->node->details->running_rsc,
name, value);
/* Close out the attribute */
switch (data->state->output_format) {
case mon_output_plain:
case mon_output_console:
print_as(data->state->output_format, "\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(data->state->stream, "\n");
break;
case mon_output_xml:
fprintf(data->state->stream, " />\n");
break;
default:
break;
}
}
static void
print_node_summary(mon_state_t *state, pe_working_set_t * data_set,
gboolean operations, unsigned int mon_ops)
{
xmlNode *node_state = NULL;
xmlNode *cib_status = get_object_root(XML_CIB_TAG_STATUS, data_set->input);
/* Print heading */
switch (state->output_format) {
case mon_output_plain:
case mon_output_console:
if (operations) {
print_as(state->output_format, "\nOperations:\n");
} else {
print_as(state->output_format, "\nMigration Summary:\n");
}
break;
case mon_output_html:
case mon_output_cgi:
if (operations) {
fprintf(state->stream, "
\n Operations
\n");
} else {
fprintf(state->stream, "
\n Migration Summary
\n");
}
break;
case mon_output_xml:
fprintf(state->stream, " \n");
break;
default:
break;
}
/* Print each node in the CIB status */
for (node_state = __xml_first_child_element(cib_status); node_state != NULL;
node_state = __xml_next_element(node_state)) {
if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) {
print_node_history(state, data_set, node_state, operations, mon_ops);
}
}
/* Close section */
switch (state->output_format) {
case mon_output_xml:
fprintf(state->stream, " \n");
break;
default:
break;
}
}
static void
print_ticket(gpointer name, gpointer value, gpointer user_data)
{
mon_state_t *data = (mon_state_t *) user_data;
ticket_t *ticket = (ticket_t *) value;
switch (data->output_format) {
case mon_output_plain:
case mon_output_console:
print_as(data->output_format, "* %s:\t%s%s", ticket->id,
(ticket->granted? "granted" : "revoked"),
(ticket->standby? " [standby]" : ""));
break;
case mon_output_html:
case mon_output_cgi:
fprintf(data->stream, " %s: %s%s", ticket->id,
(ticket->granted? "granted" : "revoked"),
(ticket->standby? " [standby]" : ""));
break;
case mon_output_xml:
fprintf(data->stream, " id, (ticket->granted? "granted" : "revoked"),
(ticket->standby? "true" : "false"));
break;
default:
break;
}
if (ticket->last_granted > -1) {
print_nvpair(data, "last-granted", NULL, NULL, ticket->last_granted);
}
switch (data->output_format) {
case mon_output_plain:
case mon_output_console:
print_as(data->output_format, "\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(data->stream, "\n");
break;
case mon_output_xml:
fprintf(data->stream, " />\n");
break;
default:
break;
}
}
static void
print_cluster_tickets(mon_state_t *state, pe_working_set_t * data_set)
{
/* Print section heading */
switch (state->output_format) {
case mon_output_plain:
case mon_output_console:
print_as(state->output_format, "\nTickets:\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(state->stream, "
\n Tickets
\n \n");
break;
case mon_output_xml:
fprintf(state->stream, " \n");
break;
default:
break;
}
/* Print each ticket */
g_hash_table_foreach(data_set->tickets, print_ticket, state);
/* Close section */
switch (state->output_format) {
case mon_output_html:
case mon_output_cgi:
fprintf(state->stream, "
\n");
break;
case mon_output_xml:
fprintf(state->stream, " \n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print a negative location constraint
*
* \param[in] stream File stream to display output to
* \param[in] node Node affected by constraint
* \param[in] location Constraint to print
*/
static void
print_ban(mon_state_t *state, pe_node_t *node, pe__location_t *location,
unsigned int mon_ops)
{
char *node_name = NULL;
switch (state->output_format) {
case mon_output_plain:
case mon_output_console:
node_name = get_node_display_name(node, mon_ops);
print_as(state->output_format, " %s\tprevents %s from running %son %s\n",
location->id, location->rsc_lh->id,
((location->role_filter == RSC_ROLE_MASTER)? "as Master " : ""),
node_name);
break;
case mon_output_html:
case mon_output_cgi:
node_name = get_node_display_name(node, mon_ops);
fprintf(state->stream, " %s prevents %s from running %son %s\n",
location->id, location->rsc_lh->id,
((location->role_filter == RSC_ROLE_MASTER)? "as Master " : ""),
node_name);
break;
case mon_output_xml:
fprintf(state->stream,
" \n",
location->id, location->rsc_lh->id, node->details->uname, node->weight,
((location->role_filter == RSC_ROLE_MASTER)? "true" : "false"));
break;
default:
break;
}
free(node_name);
}
/*!
* \internal
* \brief Print section for negative location constraints
*
* \param[in] stream File stream to display output to
* \param[in] data_set Working set corresponding to CIB status to display
*/
static void
print_neg_locations(mon_state_t *state, pe_working_set_t *data_set, unsigned int mon_ops,
const char *prefix)
{
GListPtr gIter, gIter2;
/* Print section heading */
switch (state->output_format) {
case mon_output_plain:
case mon_output_console:
print_as(state->output_format, "\nNegative Location Constraints:\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(state->stream, "
\n Negative Location Constraints
\n \n");
break;
case mon_output_xml:
fprintf(state->stream, " \n");
break;
default:
break;
}
/* Print each ban */
for (gIter = data_set->placement_constraints; gIter != NULL; gIter = gIter->next) {
pe__location_t *location = gIter->data;
if (!g_str_has_prefix(location->id, prefix))
continue;
for (gIter2 = location->node_list_rh; gIter2 != NULL; gIter2 = gIter2->next) {
node_t *node = (node_t *) gIter2->data;
if (node->weight < 0) {
print_ban(state, node, location, mon_ops);
}
}
}
/* Close section */
switch (state->output_format) {
case mon_output_cgi:
case mon_output_html:
fprintf(state->stream, "
\n");
break;
case mon_output_xml:
fprintf(state->stream, " \n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print node attributes section
*
* \param[in] stream File stream to display output to
* \param[in] data_set Working set of CIB state
*/
static void
print_node_attributes(mon_state_t *state, pe_working_set_t *data_set, unsigned int mon_ops)
{
GListPtr gIter = NULL;
/* Print section heading */
switch (state->output_format) {
case mon_output_plain:
case mon_output_console:
print_as(state->output_format, "\nNode Attributes:\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(state->stream, "
\n Node Attributes
\n");
break;
case mon_output_xml:
fprintf(state->stream, " \n");
break;
default:
break;
}
/* Unpack all resource parameters (it would be more efficient to do this
* only when needed for the first time in print_attr_msg())
*/
for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
crm_mon_get_parameters(gIter->data, data_set);
}
/* Display each node's attributes */
for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
struct mon_attr_data data;
data.state = state;
data.node = (node_t *) gIter->data;
if (data.node && data.node->details && data.node->details->online) {
GList *attr_list = NULL;
GHashTableIter iter;
gpointer key, value;
print_node_start(state, data.node, mon_ops);
g_hash_table_iter_init(&iter, data.node->details->attrs);
while (g_hash_table_iter_next (&iter, &key, &value)) {
attr_list = append_attr_list(attr_list, key);
}
g_list_foreach(attr_list, print_node_attribute, &data);
g_list_free(attr_list);
print_node_end(state);
}
}
/* Print section footer */
switch (state->output_format) {
case mon_output_xml:
fprintf(state->stream, " \n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print header for cluster summary if needed
*
* \param[in] stream File stream to display output to
*/
static void
print_cluster_summary_header(mon_state_t *state)
{
switch (state->output_format) {
case mon_output_html:
case mon_output_cgi:
fprintf(state->stream, " Cluster Summary
\n \n");
break;
case mon_output_xml:
fprintf(state->stream, " \n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print footer for cluster summary if needed
*
* \param[in] stream File stream to display output to
*/
static void
print_cluster_summary_footer(mon_state_t *state)
{
switch (state->output_format) {
case mon_output_cgi:
case mon_output_html:
fprintf(state->stream, "
\n");
break;
case mon_output_xml:
fprintf(state->stream, " \n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print times the display was last updated and CIB last changed
*
* \param[in] stream File stream to display output to
* \param[in] data_set Working set of CIB state
*/
static void
print_cluster_times(mon_state_t *state, pe_working_set_t *data_set)
{
const char *last_written = crm_element_value(data_set->input, XML_CIB_ATTR_WRITTEN);
const char *user = crm_element_value(data_set->input, XML_ATTR_UPDATE_USER);
const char *client = crm_element_value(data_set->input, XML_ATTR_UPDATE_CLIENT);
const char *origin = crm_element_value(data_set->input, XML_ATTR_UPDATE_ORIG);
switch (state->output_format) {
case mon_output_plain:
case mon_output_console: {
const char *now_str = crm_now_string(NULL);
print_as(state->output_format, "Last updated: %s", now_str ? now_str : "Could not determine current time");
print_as(state->output_format, (user || client || origin)? "\n" : "\t\t");
print_as(state->output_format, "Last change: %s", last_written ? last_written : "");
if (user) {
print_as(state->output_format, " by %s", user);
}
if (client) {
print_as(state->output_format, " via %s", client);
}
if (origin) {
print_as(state->output_format, " on %s", origin);
}
print_as(state->output_format, "\n");
break;
}
case mon_output_html:
case mon_output_cgi: {
const char *now_str = crm_now_string(NULL);
fprintf(state->stream, " Last updated: %s
\n",
now_str ? now_str : "Could not determine current time");
fprintf(state->stream, " Last change: %s", last_written ? last_written : "");
if (user) {
fprintf(state->stream, " by %s", user);
}
if (client) {
fprintf(state->stream, " via %s", client);
}
if (origin) {
fprintf(state->stream, " on %s", origin);
}
fprintf(state->stream, "
\n");
break;
}
case mon_output_xml: {
const char *now_str = crm_now_string(NULL);
fprintf(state->stream, " \n",
now_str ? now_str : "Could not determine current time");
fprintf(state->stream, " \n",
last_written ? last_written : "", user ? user : "",
client ? client : "", origin ? origin : "");
break;
}
default:
break;
}
}
/*!
* \internal
* \brief Print cluster stack
*
* \param[in] stream File stream to display output to
* \param[in] stack_s Stack name
*/
static void
print_cluster_stack(mon_state_t *state, const char *stack_s)
{
switch (state->output_format) {
case mon_output_plain:
case mon_output_console:
print_as(state->output_format, "Stack: %s\n", stack_s);
break;
case mon_output_html:
case mon_output_cgi:
fprintf(state->stream, " Stack: %s
\n", stack_s);
break;
case mon_output_xml:
fprintf(state->stream, " \n", stack_s);
break;
default:
break;
}
}
/*!
* \internal
* \brief Print current DC and its version
*
* \param[in] stream File stream to display output to
* \param[in] data_set Working set of CIB state
*/
static void
print_cluster_dc(mon_state_t *state, pe_working_set_t *data_set, unsigned int mon_ops)
{
node_t *dc = data_set->dc_node;
xmlNode *dc_version = get_xpath_object("//nvpair[@name='dc-version']",
data_set->input, LOG_DEBUG);
const char *dc_version_s = dc_version?
crm_element_value(dc_version, XML_NVPAIR_ATTR_VALUE)
: NULL;
const char *quorum = crm_element_value(data_set->input, XML_ATTR_HAVE_QUORUM);
char *dc_name = dc? get_node_display_name(dc, mon_ops) : NULL;
switch (state->output_format) {
case mon_output_plain:
case mon_output_console:
print_as(state->output_format, "Current DC: ");
if (dc) {
print_as(state->output_format, "%s (version %s) - partition %s quorum\n",
dc_name, (dc_version_s? dc_version_s : "unknown"),
(crm_is_true(quorum) ? "with" : "WITHOUT"));
} else {
print_as(state->output_format, "NONE\n");
}
break;
case mon_output_html:
case mon_output_cgi:
fprintf(state->stream, " Current DC: ");
if (dc) {
fprintf(state->stream, "%s (version %s) - partition %s quorum",
dc_name, (dc_version_s? dc_version_s : "unknown"),
(crm_is_true(quorum)? "with" : "WITHOUT"));
} else {
fprintf(state->stream, "NONE");
}
fprintf(state->stream, "
\n");
break;
case mon_output_xml:
fprintf(state->stream, " stream,
"present=\"true\" version=\"%s\" name=\"%s\" id=\"%s\" with_quorum=\"%s\"",
(dc_version_s? dc_version_s : ""), dc->details->uname, dc->details->id,
(crm_is_true(quorum) ? "true" : "false"));
} else {
fprintf(state->stream, "present=\"false\"");
}
fprintf(state->stream, " />\n");
break;
default:
break;
}
free(dc_name);
}
/*!
* \internal
* \brief Print counts of configured nodes and resources
*
* \param[in] stream File stream to display output to
* \param[in] data_set Working set of CIB state
* \param[in] stack_s Stack name
*/
static void
print_cluster_counts(mon_state_t *state, pe_working_set_t *data_set, const char *stack_s)
{
int nnodes = g_list_length(data_set->nodes);
int nresources = count_resources(data_set, NULL);
switch (state->output_format) {
case mon_output_plain:
case mon_output_console:
print_as(state->output_format, "\n%d node%s configured\n", nnodes, s_if_plural(nnodes));
print_as(state->output_format, "%d resource%s configured",
nresources, s_if_plural(nresources));
if(data_set->disabled_resources || data_set->blocked_resources) {
print_as(state->output_format, " (");
if (data_set->disabled_resources) {
print_as(state->output_format, "%d DISABLED", data_set->disabled_resources);
}
if (data_set->disabled_resources && data_set->blocked_resources) {
print_as(state->output_format, ", ");
}
if (data_set->blocked_resources) {
print_as(state->output_format, "%d BLOCKED from starting due to failure",
data_set->blocked_resources);
}
print_as(state->output_format, ")");
}
print_as(state->output_format, "\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(state->stream, " %d node%s configured
\n",
nnodes, s_if_plural(nnodes));
fprintf(state->stream, " %d resource%s configured",
nresources, s_if_plural(nresources));
if (data_set->disabled_resources || data_set->blocked_resources) {
fprintf(state->stream, " (");
if (data_set->disabled_resources) {
fprintf(state->stream, "%d DISABLED",
data_set->disabled_resources);
}
if (data_set->disabled_resources && data_set->blocked_resources) {
fprintf(state->stream, ", ");
}
if (data_set->blocked_resources) {
fprintf(state->stream,
"%d BLOCKED from starting due to failure",
data_set->blocked_resources);
}
fprintf(state->stream, ")");
}
fprintf(state->stream, "
\n");
break;
case mon_output_xml:
fprintf(state->stream,
" \n",
g_list_length(data_set->nodes));
fprintf(state->stream,
" \n",
count_resources(data_set, NULL),
data_set->disabled_resources, data_set->blocked_resources);
break;
default:
break;
}
}
/*!
* \internal
* \brief Print cluster-wide options
*
* \param[in] stream File stream to display output to
* \param[in] data_set Working set of CIB state
*
* \note Currently this is only implemented for HTML and XML output, and
* prints only a few options. If there is demand, more could be added.
*/
static void
print_cluster_options(mon_state_t *state, pe_working_set_t *data_set)
{
switch (state->output_format) {
case mon_output_plain:
case mon_output_console:
if (is_set(data_set->flags, pe_flag_maintenance_mode)) {
print_as(state->output_format, "\n *** Resource management is DISABLED ***");
print_as(state->output_format, "\n The cluster will not attempt to start, stop or recover services");
print_as(state->output_format, "\n");
}
break;
case mon_output_html:
fprintf(state->stream, "
\n Config Options
\n");
fprintf(state->stream, " \n");
fprintf(state->stream, " STONITH of failed nodes | %s |
\n",
is_set(data_set->flags, pe_flag_stonith_enabled)? "enabled" : "disabled");
fprintf(state->stream, " Cluster is | %ssymmetric |
\n",
is_set(data_set->flags, pe_flag_symmetric_cluster)? "" : "a");
fprintf(state->stream, " No Quorum Policy | ");
switch (data_set->no_quorum_policy) {
case no_quorum_freeze:
fprintf(state->stream, "Freeze resources");
break;
case no_quorum_stop:
fprintf(state->stream, "Stop ALL resources");
break;
case no_quorum_ignore:
fprintf(state->stream, "Ignore");
break;
case no_quorum_suicide:
fprintf(state->stream, "Suicide");
break;
}
fprintf(state->stream, " |
\n");
fprintf(state->stream, " Resource management | ");
if (is_set(data_set->flags, pe_flag_maintenance_mode)) {
fprintf(state->stream, "DISABLED (the cluster will "
"not attempt to start, stop or recover services)");
} else {
fprintf(state->stream, "enabled");
}
fprintf(state->stream, " |
\n");
fprintf(state->stream, "
\n \n");
break;
case mon_output_xml:
fprintf(state->stream, " stream, " stonith-enabled=\"%s\"",
is_set(data_set->flags, pe_flag_stonith_enabled)?
"true" : "false");
fprintf(state->stream, " symmetric-cluster=\"%s\"",
is_set(data_set->flags, pe_flag_symmetric_cluster)?
"true" : "false");
fprintf(state->stream, " no-quorum-policy=\"");
switch (data_set->no_quorum_policy) {
case no_quorum_freeze:
fprintf(state->stream, "freeze");
break;
case no_quorum_stop:
fprintf(state->stream, "stop");
break;
case no_quorum_ignore:
fprintf(state->stream, "ignore");
break;
case no_quorum_suicide:
fprintf(state->stream, "suicide");
break;
}
fprintf(state->stream, "\"");
fprintf(state->stream, " maintenance-mode=\"%s\"",
is_set(data_set->flags, pe_flag_maintenance_mode)?
"true" : "false");
fprintf(state->stream, " />\n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print a summary of cluster-wide information
*
* \param[in] stream File stream to display output to
* \param[in] data_set Working set of CIB state
*/
static void
print_cluster_summary(mon_state_t *state, pe_working_set_t *data_set,
unsigned int mon_ops, unsigned int show)
{
const char *stack_s = get_cluster_stack(data_set);
gboolean header_printed = FALSE;
if (show & mon_show_stack) {
if (header_printed == FALSE) {
print_cluster_summary_header(state);
header_printed = TRUE;
}
print_cluster_stack(state, stack_s);
}
/* Always print DC if none, even if not requested */
if ((data_set->dc_node == NULL) || (show & mon_show_dc)) {
if (header_printed == FALSE) {
print_cluster_summary_header(state);
header_printed = TRUE;
}
print_cluster_dc(state, data_set, mon_ops);
}
if (show & mon_show_times) {
if (header_printed == FALSE) {
print_cluster_summary_header(state);
header_printed = TRUE;
}
print_cluster_times(state, data_set);
}
if (is_set(data_set->flags, pe_flag_maintenance_mode)
|| data_set->disabled_resources
|| data_set->blocked_resources
|| is_set(show, mon_show_count)) {
if (header_printed == FALSE) {
print_cluster_summary_header(state);
header_printed = TRUE;
}
print_cluster_counts(state, data_set, stack_s);
}
/* There is not a separate option for showing cluster options, so show with
* stack for now; a separate option could be added if there is demand
*/
if (show & mon_show_stack) {
print_cluster_options(state, data_set);
}
if (header_printed) {
print_cluster_summary_footer(state);
}
}
/*!
* \internal
* \brief Print a failed action
*
* \param[in] stream File stream to display output to
* \param[in] xml_op Root of XML tree describing failed action
*/
static void
print_failed_action(mon_state_t *state, xmlNode *xml_op)
{
const char *op_key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY);
const char *op_key_attr = "op_key";
- const char *last = crm_element_value(xml_op, XML_RSC_OP_LAST_CHANGE);
const char *node = crm_element_value(xml_op, XML_ATTR_UNAME);
const char *call = crm_element_value(xml_op, XML_LRM_ATTR_CALLID);
const char *exit_reason = crm_element_value(xml_op, XML_LRM_ATTR_EXIT_REASON);
int rc = crm_parse_int(crm_element_value(xml_op, XML_LRM_ATTR_RC), "0");
int status = crm_parse_int(crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS), "0");
+ time_t last_change = 0;
char *exit_reason_cleaned;
/* If no op_key was given, use id instead */
if (op_key == NULL) {
op_key = ID(xml_op);
op_key_attr = "id";
}
/* If no exit reason was given, use "none" */
if (exit_reason == NULL) {
exit_reason = "none";
}
/* Print common action information */
switch (state->output_format) {
case mon_output_plain:
case mon_output_console:
print_as(state->output_format, "* %s on %s '%s' (%d): call=%s, status=%s, exitreason='%s'",
op_key, node, services_ocf_exitcode_str(rc), rc,
call, services_lrm_status_str(status), exit_reason);
break;
case mon_output_html:
case mon_output_cgi:
fprintf(state->stream, " %s on %s '%s' (%d): call=%s, status=%s, exitreason='%s'",
op_key, node, services_ocf_exitcode_str(rc), rc,
call, services_lrm_status_str(status), exit_reason);
break;
case mon_output_xml:
exit_reason_cleaned = crm_xml_escape(exit_reason);
fprintf(state->stream, " stream, " exitstatus=\"%s\" exitreason=\"%s\" exitcode=\"%d\"",
services_ocf_exitcode_str(rc), exit_reason_cleaned, rc);
fprintf(state->stream, " call=\"%s\" status=\"%s\"",
call, services_lrm_status_str(status));
free(exit_reason_cleaned);
break;
default:
break;
}
/* If last change was given, print timing information as well */
- if (last) {
- time_t run_at = crm_parse_int(last, "0");
- char *run_at_s = ctime(&run_at);
+ if (crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE,
+ &last_change) == pcmk_ok) {
+ char *run_at_s = ctime(&last_change);
if (run_at_s) {
run_at_s[24] = 0; /* Overwrite the newline */
}
switch (state->output_format) {
case mon_output_plain:
case mon_output_console:
print_as(state->output_format, ",\n last-rc-change='%s', queued=%sms, exec=%sms",
run_at_s? run_at_s : "",
crm_element_value(xml_op, XML_RSC_OP_T_QUEUE),
crm_element_value(xml_op, XML_RSC_OP_T_EXEC));
break;
case mon_output_html:
case mon_output_cgi:
fprintf(state->stream, " last-rc-change='%s', queued=%sms, exec=%sms",
run_at_s? run_at_s : "",
crm_element_value(xml_op, XML_RSC_OP_T_QUEUE),
crm_element_value(xml_op, XML_RSC_OP_T_EXEC));
break;
case mon_output_xml:
fprintf(state->stream,
" last-rc-change=\"%s\" queued=\"%s\" exec=\"%s\" interval=\"%u\" task=\"%s\"",
run_at_s? run_at_s : "",
crm_element_value(xml_op, XML_RSC_OP_T_QUEUE),
crm_element_value(xml_op, XML_RSC_OP_T_EXEC),
crm_parse_ms(crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL_MS)),
crm_element_value(xml_op, XML_LRM_ATTR_TASK));
break;
default:
break;
}
}
/* End the action listing */
switch (state->output_format) {
case mon_output_plain:
case mon_output_console:
print_as(state->output_format, "\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(state->stream, "\n");
break;
case mon_output_xml:
fprintf(state->stream, " />\n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print a section for failed actions
*
* \param[in] stream File stream to display output to
* \param[in] data_set Working set of CIB state
*/
static void
print_failed_actions(mon_state_t *state, pe_working_set_t *data_set)
{
xmlNode *xml_op = NULL;
/* Print section heading */
switch (state->output_format) {
case mon_output_plain:
case mon_output_console:
print_as(state->output_format, "\nFailed Resource Actions:\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(state->stream,
"
\n Failed Resource Actions
\n \n");
break;
case mon_output_xml:
fprintf(state->stream, " \n");
break;
default:
break;
}
/* Print each failed action */
for (xml_op = __xml_first_child(data_set->failed); xml_op != NULL;
xml_op = __xml_next(xml_op)) {
print_failed_action(state, xml_op);
}
/* End section */
switch (state->output_format) {
case mon_output_html:
case mon_output_cgi:
fprintf(state->stream, "
\n");
break;
case mon_output_xml:
fprintf(state->stream, " \n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print a stonith action
*
* \param[in] stream File stream to display output to
* \param[in] event stonith event
*/
static void
print_stonith_action(mon_state_t *state, stonith_history_t *event, unsigned int mon_ops, stonith_history_t *top_history)
{
const char *action_s = stonith_action_str(event->action);
char *run_at_s = ctime(&event->completed);
if ((run_at_s) && (run_at_s[0] != 0)) {
run_at_s[strlen(run_at_s)-1] = 0; /* Overwrite the newline */
}
switch(state->output_format) {
case mon_output_xml:
fprintf(state->stream, " target, event->action);
switch(event->state) {
case st_done:
fprintf(state->stream, " state=\"success\"");
break;
case st_failed:
fprintf(state->stream, " state=\"failed\"");
break;
default:
fprintf(state->stream, " state=\"pending\"");
}
fprintf(state->stream, " origin=\"%s\" client=\"%s\"",
event->origin, event->client);
if (event->delegate) {
fprintf(state->stream, " delegate=\"%s\"", event->delegate);
}
switch(event->state) {
case st_done:
case st_failed:
fprintf(state->stream, " completed=\"%s\"", run_at_s?run_at_s:"");
break;
default:
break;
}
fprintf(state->stream, " />\n");
break;
case mon_output_plain:
case mon_output_console:
switch(event->state) {
case st_done:
print_as(state->output_format, "* %s of %s successful: delegate=%s, client=%s, origin=%s,\n"
" %s='%s'\n",
action_s, event->target,
event->delegate ? event->delegate : "",
event->client, event->origin,
is_set(mon_ops, mon_op_fence_full_history) ? "completed" : "last-successful",
run_at_s?run_at_s:"");
break;
case st_failed:
print_as(state->output_format, "* %s of %s failed: delegate=%s, client=%s, origin=%s,\n"
" %s='%s' %s\n",
action_s, event->target,
event->delegate ? event->delegate : "",
event->client, event->origin,
is_set(mon_ops, mon_op_fence_full_history) ? "completed" : "last-failed",
run_at_s?run_at_s:"",
stonith__later_succeeded(event, top_history) ? "(a later attempt succeeded)" : "");
break;
default:
print_as(state->output_format, "* %s of %s pending: client=%s, origin=%s\n",
action_s, event->target,
event->client, event->origin);
}
break;
case mon_output_html:
case mon_output_cgi:
switch(event->state) {
case st_done:
fprintf(state->stream, " %s of %s successful: delegate=%s, "
"client=%s, origin=%s, %s='%s'\n",
action_s, event->target,
event->delegate ? event->delegate : "",
event->client, event->origin,
is_set(mon_ops, mon_op_fence_full_history) ? "completed" : "last-successful",
run_at_s?run_at_s:"");
break;
case st_failed:
fprintf(state->stream, " %s of %s failed: delegate=%s, "
"client=%s, origin=%s, %s='%s' %s\n",
action_s, event->target,
event->delegate ? event->delegate : "",
event->client, event->origin,
is_set(mon_ops, mon_op_fence_full_history) ? "completed" : "last-failed",
run_at_s?run_at_s:"",
stonith__later_succeeded(event, top_history) ? "(a later attempt succeeded)" : "");
break;
default:
fprintf(state->stream, " %s of %s pending: client=%s, "
"origin=%s\n",
action_s, event->target,
event->client, event->origin);
}
break;
default:
/* no support for fence history for other formats so far */
break;
}
}
/*!
* \internal
* \brief Print a section for failed stonith actions
*
* \param[in] stream File stream to display output to
* \param[in] history List of stonith actions
*
*/
static void
print_failed_stonith_actions(mon_state_t *state, stonith_history_t *history, unsigned int mon_ops)
{
stonith_history_t *hp;
for (hp = history; hp; hp = hp->next) {
if (hp->state == st_failed) {
break;
}
}
if (!hp) {
return;
}
/* Print section heading */
switch (state->output_format) {
/* no need to take care of xml in here as xml gets full
* history anyway
*/
case mon_output_plain:
case mon_output_console:
print_as(state->output_format, "\nFailed Fencing Actions:\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(state->stream, "
\n Failed Fencing Actions
\n \n");
break;
default:
break;
}
/* Print each failed stonith action */
for (hp = history; hp; hp = hp->next) {
if (hp->state == st_failed) {
print_stonith_action(state, hp, mon_ops, history);
}
}
/* End section */
switch (state->output_format) {
case mon_output_html:
case mon_output_cgi:
fprintf(state->stream, "
\n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print pending stonith actions
*
* \param[in] stream File stream to display output to
* \param[in] history List of stonith actions
*
*/
static void
print_stonith_pending(mon_state_t *state, stonith_history_t *history, unsigned int mon_ops)
{
/* xml-output always shows the full history
* so we'll never have to show pending-actions
* separately
*/
if (history && (history->state != st_failed) &&
(history->state != st_done)) {
stonith_history_t *hp;
/* Print section heading */
switch (state->output_format) {
case mon_output_plain:
case mon_output_console:
print_as(state->output_format, "\nPending Fencing Actions:\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(state->stream, "
\n Pending Fencing Actions
\n \n");
break;
default:
break;
}
for (hp = history; hp; hp = hp->next) {
if ((hp->state == st_failed) || (hp->state == st_done)) {
break;
}
print_stonith_action(state, hp, mon_ops, NULL);
}
/* End section */
switch (state->output_format) {
case mon_output_html:
case mon_output_cgi:
fprintf(state->stream, "
\n");
break;
default:
break;
}
}
}
/*!
* \internal
* \brief Print a section for stonith-history
*
* \param[in] stream File stream to display output to
* \param[in] history List of stonith actions
*
*/
static void
print_stonith_history(mon_state_t *state, stonith_history_t *history, unsigned int mon_ops)
{
stonith_history_t *hp;
/* Print section heading */
switch (state->output_format) {
case mon_output_plain:
case mon_output_console:
print_as(state->output_format, "\nFencing History:\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(state->stream, "
\n Fencing History
\n \n");
break;
case mon_output_xml:
fprintf(state->stream, " \n");
break;
default:
break;
}
for (hp = history; hp; hp = hp->next) {
if ((hp->state != st_failed) || (state->output_format == mon_output_xml)) {
print_stonith_action(state, hp, mon_ops, NULL);
}
}
/* End section */
switch (state->output_format) {
case mon_output_html:
case mon_output_cgi:
fprintf(state->stream, "
\n");
break;
case mon_output_xml:
fprintf(state->stream, " \n");
break;
default:
break;
}
}
void
print_status(mon_state_t *state, pe_working_set_t *data_set,
stonith_history_t *stonith_history, unsigned int mon_ops,
unsigned int show, const char *prefix)
{
GListPtr gIter = NULL;
int print_opts = get_resource_display_options(mon_ops, state->output_format);
/* space-separated lists of node names */
char *online_nodes = NULL;
char *online_remote_nodes = NULL;
char *online_guest_nodes = NULL;
char *offline_nodes = NULL;
char *offline_remote_nodes = NULL;
if (state->output_format == mon_output_console) {
blank_screen();
}
print_cluster_summary(state, data_set, mon_ops, show);
print_as(state->output_format, "\n");
/* Gather node information (and print if in bad state or grouping by node) */
for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
const char *node_mode = NULL;
char *node_name = get_node_display_name(node, mon_ops);
/* Get node mode */
if (node->details->unclean) {
if (node->details->online) {
node_mode = "UNCLEAN (online)";
} else if (node->details->pending) {
node_mode = "UNCLEAN (pending)";
} else {
node_mode = "UNCLEAN (offline)";
}
} else if (node->details->pending) {
node_mode = "pending";
} else if (node->details->standby_onfail && node->details->online) {
node_mode = "standby (on-fail)";
} else if (node->details->standby) {
if (node->details->online) {
if (node->details->running_rsc) {
node_mode = "standby (with active resources)";
} else {
node_mode = "standby";
}
} else {
node_mode = "OFFLINE (standby)";
}
} else if (node->details->maintenance) {
if (node->details->online) {
node_mode = "maintenance";
} else {
node_mode = "OFFLINE (maintenance)";
}
} else if (node->details->online) {
node_mode = "online";
if (is_not_set(mon_ops, mon_op_group_by_node)) {
if (pe__is_guest_node(node)) {
online_guest_nodes = add_list_element(online_guest_nodes, node_name);
} else if (pe__is_remote_node(node)) {
online_remote_nodes = add_list_element(online_remote_nodes, node_name);
} else {
online_nodes = add_list_element(online_nodes, node_name);
}
free(node_name);
continue;
}
} else {
node_mode = "OFFLINE";
if (is_not_set(mon_ops, mon_op_group_by_node)) {
if (pe__is_remote_node(node)) {
offline_remote_nodes = add_list_element(offline_remote_nodes, node_name);
} else if (pe__is_guest_node(node)) {
/* ignore offline guest nodes */
} else {
offline_nodes = add_list_element(offline_nodes, node_name);
}
free(node_name);
continue;
}
}
/* If we get here, node is in bad state, or we're grouping by node */
/* Print the node name and status */
if (pe__is_guest_node(node)) {
print_as(state->output_format, "Guest");
} else if (pe__is_remote_node(node)) {
print_as(state->output_format, "Remote");
}
print_as(state->output_format, "Node %s: %s\n", node_name, node_mode);
/* If we're grouping by node, print its resources */
if (is_set(mon_ops, mon_op_group_by_node)) {
if (is_set(mon_ops, mon_op_print_brief)) {
print_rscs_brief(node->details->running_rsc, "\t", print_opts | pe_print_rsconly,
state->stream, FALSE);
} else {
GListPtr gIter2 = NULL;
for (gIter2 = node->details->running_rsc; gIter2 != NULL; gIter2 = gIter2->next) {
resource_t *rsc = (resource_t *) gIter2->data;
rsc->fns->print(rsc, "\t", print_opts | pe_print_rsconly, state->stream);
}
}
}
free(node_name);
}
/* If we're not grouping by node, summarize nodes by status */
if (online_nodes) {
print_as(state->output_format, "Online: [%s ]\n", online_nodes);
free(online_nodes);
}
if (offline_nodes) {
print_as(state->output_format, "OFFLINE: [%s ]\n", offline_nodes);
free(offline_nodes);
}
if (online_remote_nodes) {
print_as(state->output_format, "RemoteOnline: [%s ]\n", online_remote_nodes);
free(online_remote_nodes);
}
if (offline_remote_nodes) {
print_as(state->output_format, "RemoteOFFLINE: [%s ]\n", offline_remote_nodes);
free(offline_remote_nodes);
}
if (online_guest_nodes) {
print_as(state->output_format, "GuestOnline: [%s ]\n", online_guest_nodes);
free(online_guest_nodes);
}
/* Print resources section, if needed */
print_resources(state, data_set, print_opts, mon_ops);
/* print Node Attributes section if requested */
if (show & mon_show_attributes) {
print_node_attributes(state, data_set, mon_ops);
}
/* If requested, print resource operations (which includes failcounts)
* or just failcounts
*/
if (show & (mon_show_operations | mon_show_failcounts)) {
print_node_summary(state, data_set,
((show & mon_show_operations)? TRUE : FALSE), mon_ops);
}
/* If there were any failed actions, print them */
if (xml_has_children(data_set->failed)) {
print_failed_actions(state, data_set);
}
/* Print failed stonith actions */
if (is_set(mon_ops, mon_op_fence_history)) {
print_failed_stonith_actions(state, stonith_history, mon_ops);
}
/* Print tickets if requested */
if (show & mon_show_tickets) {
print_cluster_tickets(state, data_set);
}
/* Print negative location constraints if requested */
if (show & mon_show_bans) {
print_neg_locations(state, data_set, mon_ops, prefix);
}
/* Print stonith history */
if (is_set(mon_ops, mon_op_fence_history)) {
if (show & mon_show_fence_history) {
print_stonith_history(state, stonith_history, mon_ops);
} else {
print_stonith_pending(state, stonith_history, mon_ops);
}
}
#if CURSES_ENABLED
if (state->output_format == mon_output_console) {
refresh();
}
#endif
}
void
print_xml_status(mon_state_t *state, pe_working_set_t *data_set,
stonith_history_t *stonith_history, unsigned int mon_ops,
unsigned int show, const char *prefix)
{
GListPtr gIter = NULL;
int print_opts = get_resource_display_options(mon_ops, state->output_format);
fprintf(state->stream, "\n");
fprintf(state->stream, "\n", VERSION);
print_cluster_summary(state, data_set, mon_ops, show);
/*** NODES ***/
fprintf(state->stream, " \n");
for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
const char *node_type = "unknown";
switch (node->details->type) {
case node_member:
node_type = "member";
break;
case node_remote:
node_type = "remote";
break;
case node_ping:
node_type = "ping";
break;
}
fprintf(state->stream, " details->uname);
fprintf(state->stream, "id=\"%s\" ", node->details->id);
fprintf(state->stream, "online=\"%s\" ", node->details->online ? "true" : "false");
fprintf(state->stream, "standby=\"%s\" ", node->details->standby ? "true" : "false");
fprintf(state->stream, "standby_onfail=\"%s\" ", node->details->standby_onfail ? "true" : "false");
fprintf(state->stream, "maintenance=\"%s\" ", node->details->maintenance ? "true" : "false");
fprintf(state->stream, "pending=\"%s\" ", node->details->pending ? "true" : "false");
fprintf(state->stream, "unclean=\"%s\" ", node->details->unclean ? "true" : "false");
fprintf(state->stream, "shutdown=\"%s\" ", node->details->shutdown ? "true" : "false");
fprintf(state->stream, "expected_up=\"%s\" ", node->details->expected_up ? "true" : "false");
fprintf(state->stream, "is_dc=\"%s\" ", node->details->is_dc ? "true" : "false");
fprintf(state->stream, "resources_running=\"%d\" ", g_list_length(node->details->running_rsc));
fprintf(state->stream, "type=\"%s\" ", node_type);
if (pe__is_guest_node(node)) {
fprintf(state->stream, "id_as_resource=\"%s\" ", node->details->remote_rsc->container->id);
}
if (is_set(mon_ops, mon_op_group_by_node)) {
GListPtr lpc2 = NULL;
fprintf(state->stream, ">\n");
for (lpc2 = node->details->running_rsc; lpc2 != NULL; lpc2 = lpc2->next) {
resource_t *rsc = (resource_t *) lpc2->data;
rsc->fns->print(rsc, " ", print_opts | pe_print_rsconly, state->stream);
}
fprintf(state->stream, " \n");
} else {
fprintf(state->stream, "/>\n");
}
}
fprintf(state->stream, " \n");
/* Print resources section, if needed */
print_resources(state, data_set, print_opts, mon_ops);
/* print Node Attributes section if requested */
if (show & mon_show_attributes) {
print_node_attributes(state, data_set, mon_ops);
}
/* If requested, print resource operations (which includes failcounts)
* or just failcounts
*/
if (show & (mon_show_operations | mon_show_failcounts)) {
print_node_summary(state, data_set,
((show & mon_show_operations)? TRUE : FALSE), mon_ops);
}
/* If there were any failed actions, print them */
if (xml_has_children(data_set->failed)) {
print_failed_actions(state, data_set);
}
/* Print stonith history */
if (is_set(mon_ops, mon_op_fence_history)) {
print_stonith_history(state, stonith_history, mon_ops);
}
/* Print tickets if requested */
if (show & mon_show_tickets) {
print_cluster_tickets(state, data_set);
}
/* Print negative location constraints if requested */
if (show & mon_show_bans) {
print_neg_locations(state, data_set, mon_ops, prefix);
}
fprintf(state->stream, "\n");
fflush(state->stream);
}
int
print_html_status(mon_state_t *state, pe_working_set_t *data_set,
const char *filename, stonith_history_t *stonith_history,
unsigned int mon_ops, unsigned int show, const char *prefix,
unsigned int reconnect_msec)
{
GListPtr gIter = NULL;
char *filename_tmp = NULL;
int print_opts = get_resource_display_options(mon_ops, state->output_format);
if (state->output_format == mon_output_cgi) {
fprintf(state->stream, "Content-Type: text/html\n\n");
} else {
filename_tmp = crm_concat(filename, "tmp", '.');
state->stream = fopen(filename_tmp, "w");
if (state->stream == NULL) {
crm_perror(LOG_ERR, "Cannot open %s for writing", filename_tmp);
free(filename_tmp);
return -1;
}
}
fprintf(state->stream, "\n");
fprintf(state->stream, " \n");
fprintf(state->stream, " Cluster status\n");
fprintf(state->stream, " \n", reconnect_msec / 1000);
fprintf(state->stream, " \n");
fprintf(state->stream, "\n");
print_cluster_summary(state, data_set, mon_ops, show);
/*** NODE LIST ***/
fprintf(state->stream, "
\n Node List
\n");
fprintf(state->stream, "\n");
for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
char *node_name = get_node_display_name(node, mon_ops);
fprintf(state->stream, "- Node: %s: ", node_name);
if (node->details->standby_onfail && node->details->online) {
fprintf(state->stream, "standby (on-fail)\n");
} else if (node->details->standby && node->details->online) {
fprintf(state->stream, "standby%s\n",
node->details->running_rsc?" (with active resources)":"");
} else if (node->details->standby) {
fprintf(state->stream, "OFFLINE (standby)\n");
} else if (node->details->maintenance && node->details->online) {
fprintf(state->stream, "maintenance\n");
} else if (node->details->maintenance) {
fprintf(state->stream, "OFFLINE (maintenance)\n");
} else if (node->details->online) {
fprintf(state->stream, "online\n");
} else {
fprintf(state->stream, "OFFLINE\n");
}
if (is_set(mon_ops, mon_op_print_brief) && is_set(mon_ops, mon_op_group_by_node)) {
fprintf(state->stream, "
\n");
print_rscs_brief(node->details->running_rsc, NULL, print_opts | pe_print_rsconly,
state->stream, FALSE);
fprintf(state->stream, "
\n");
} else if (is_set(mon_ops, mon_op_group_by_node)) {
GListPtr lpc2 = NULL;
fprintf(state->stream, "\n");
for (lpc2 = node->details->running_rsc; lpc2 != NULL; lpc2 = lpc2->next) {
resource_t *rsc = (resource_t *) lpc2->data;
fprintf(state->stream, "- ");
rsc->fns->print(rsc, NULL, print_opts | pe_print_rsconly, state->stream);
fprintf(state->stream, "
\n");
}
fprintf(state->stream, "
\n");
}
fprintf(state->stream, " \n");
free(node_name);
}
fprintf(state->stream, "
\n");
/* Print resources section, if needed */
print_resources(state, data_set, print_opts, mon_ops);
/* print Node Attributes section if requested */
if (show & mon_show_attributes) {
print_node_attributes(state, data_set, mon_ops);
}
/* If requested, print resource operations (which includes failcounts)
* or just failcounts
*/
if (show & (mon_show_operations | mon_show_failcounts)) {
print_node_summary(state, data_set,
((show & mon_show_operations)? TRUE : FALSE), mon_ops);
}
/* If there were any failed actions, print them */
if (xml_has_children(data_set->failed)) {
print_failed_actions(state, data_set);
}
/* Print failed stonith actions */
if (is_set(mon_ops, mon_op_fence_history)) {
print_failed_stonith_actions(state, stonith_history, mon_ops);
}
/* Print stonith history */
if (is_set(mon_ops, mon_op_fence_history)) {
if (show & mon_show_fence_history) {
print_stonith_history(state, stonith_history, mon_ops);
} else {
print_stonith_pending(state, stonith_history, mon_ops);
}
}
/* Print tickets if requested */
if (show & mon_show_tickets) {
print_cluster_tickets(state, data_set);
}
/* Print negative location constraints if requested */
if (show & mon_show_bans) {
print_neg_locations(state, data_set, mon_ops, prefix);
}
fprintf(state->stream, "\n");
fprintf(state->stream, "\n");
fflush(state->stream);
if (state->output_format != mon_output_cgi) {
if (rename(filename_tmp, filename) != 0) {
crm_perror(LOG_ERR, "Unable to rename %s->%s", filename_tmp, filename);
}
free(filename_tmp);
}
return 0;
}
diff --git a/tools/crm_resource_print.c b/tools/crm_resource_print.c
index 19e75ee051..a2ff1124e6 100644
--- a/tools/crm_resource_print.c
+++ b/tools/crm_resource_print.c
@@ -1,330 +1,331 @@
/*
* Copyright 2004-2019 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#define cons_string(x) x?x:"NA"
void
cli_resource_print_cts_constraints(pe_working_set_t * data_set)
{
xmlNode *xml_obj = NULL;
xmlNode *lifetime = NULL;
xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, data_set->input);
for (xml_obj = __xml_first_child_element(cib_constraints); xml_obj != NULL;
xml_obj = __xml_next_element(xml_obj)) {
const char *id = crm_element_value(xml_obj, XML_ATTR_ID);
if (id == NULL) {
continue;
}
lifetime = first_named_child(xml_obj, "lifetime");
if (test_ruleset(lifetime, NULL, data_set->now) == FALSE) {
continue;
}
if (safe_str_eq(XML_CONS_TAG_RSC_DEPEND, crm_element_name(xml_obj))) {
printf("Constraint %s %s %s %s %s %s %s\n",
crm_element_name(xml_obj),
cons_string(crm_element_value(xml_obj, XML_ATTR_ID)),
cons_string(crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE)),
cons_string(crm_element_value(xml_obj, XML_COLOC_ATTR_TARGET)),
cons_string(crm_element_value(xml_obj, XML_RULE_ATTR_SCORE)),
cons_string(crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE_ROLE)),
cons_string(crm_element_value(xml_obj, XML_COLOC_ATTR_TARGET_ROLE)));
} else if (safe_str_eq(XML_CONS_TAG_RSC_LOCATION, crm_element_name(xml_obj))) {
/* unpack_location(xml_obj, data_set); */
}
}
}
void
cli_resource_print_cts(resource_t * rsc)
{
GListPtr lpc = NULL;
const char *host = NULL;
bool needs_quorum = TRUE;
const char *rtype = crm_element_value(rsc->xml, XML_ATTR_TYPE);
const char *rprov = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER);
const char *rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
pe_node_t *node = pe__current_node(rsc);
if (safe_str_eq(rclass, PCMK_RESOURCE_CLASS_STONITH)) {
needs_quorum = FALSE;
} else {
// @TODO check requires in resource meta-data and rsc_defaults
}
if (node != NULL) {
host = node->details->uname;
}
printf("Resource: %s %s %s %s %s %s %s %s %d %lld 0x%.16llx\n",
crm_element_name(rsc->xml), rsc->id,
rsc->clone_name ? rsc->clone_name : rsc->id, rsc->parent ? rsc->parent->id : "NA",
rprov ? rprov : "NA", rclass, rtype, host ? host : "NA", needs_quorum, rsc->flags,
rsc->flags);
for (lpc = rsc->children; lpc != NULL; lpc = lpc->next) {
resource_t *child = (resource_t *) lpc->data;
cli_resource_print_cts(child);
}
}
void
cli_resource_print_raw(resource_t * rsc)
{
GListPtr lpc = NULL;
GListPtr children = rsc->children;
if (children == NULL) {
printf("%s\n", rsc->id);
}
for (lpc = children; lpc != NULL; lpc = lpc->next) {
resource_t *child = (resource_t *) lpc->data;
cli_resource_print_raw(child);
}
}
int
cli_resource_print_list(pe_working_set_t * data_set, bool raw)
{
int found = 0;
GListPtr lpc = NULL;
int opts = pe_print_printf | pe_print_rsconly | pe_print_pending;
for (lpc = data_set->resources; lpc != NULL; lpc = lpc->next) {
resource_t *rsc = (resource_t *) lpc->data;
if (is_set(rsc->flags, pe_rsc_orphan)
&& rsc->fns->active(rsc, TRUE) == FALSE) {
continue;
}
rsc->fns->print(rsc, NULL, opts, stdout);
found++;
}
if (found == 0) {
printf("NO resources configured\n");
return -ENXIO;
}
return 0;
}
int
cli_resource_print_operations(const char *rsc_id, const char *host_uname, bool active,
pe_working_set_t * data_set)
{
resource_t *rsc = NULL;
int opts = pe_print_printf | pe_print_rsconly | pe_print_suppres_nl | pe_print_pending;
GListPtr ops = find_operations(rsc_id, host_uname, active, data_set);
GListPtr lpc = NULL;
for (lpc = ops; lpc != NULL; lpc = lpc->next) {
xmlNode *xml_op = (xmlNode *) lpc->data;
const char *op_rsc = crm_element_value(xml_op, "resource");
- const char *last = crm_element_value(xml_op, XML_RSC_OP_LAST_CHANGE);
const char *status_s = crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS);
const char *op_key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY);
int status = crm_parse_int(status_s, "0");
+ time_t last_change = 0;
rsc = pe_find_resource(data_set->resources, op_rsc);
if(rsc) {
rsc->fns->print(rsc, "", opts, stdout);
} else {
fprintf(stdout, "Unknown resource %s", op_rsc);
}
fprintf(stdout, ": %s (node=%s, call=%s, rc=%s",
op_key ? op_key : ID(xml_op),
crm_element_value(xml_op, XML_ATTR_UNAME),
crm_element_value(xml_op, XML_LRM_ATTR_CALLID),
crm_element_value(xml_op, XML_LRM_ATTR_RC));
- if (last) {
- time_t run_at = crm_parse_int(last, "0");
+ if (crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE,
+ &last_change) == pcmk_ok) {
fprintf(stdout, ", last-rc-change=%s, exec=%sms",
- crm_strip_trailing_newline(ctime(&run_at)), crm_element_value(xml_op, XML_RSC_OP_T_EXEC));
+ crm_strip_trailing_newline(ctime(&last_change)),
+ crm_element_value(xml_op, XML_RSC_OP_T_EXEC));
}
fprintf(stdout, "): %s\n", services_lrm_status_str(status));
}
return pcmk_ok;
}
void
cli_resource_print_location(resource_t * rsc, const char *prefix)
{
GListPtr lpc = NULL;
GListPtr list = rsc->rsc_location;
int offset = 0;
if (prefix) {
offset = strlen(prefix) - 2;
}
for (lpc = list; lpc != NULL; lpc = lpc->next) {
pe__location_t *cons = lpc->data;
GListPtr lpc2 = NULL;
for (lpc2 = cons->node_list_rh; lpc2 != NULL; lpc2 = lpc2->next) {
node_t *node = (node_t *) lpc2->data;
char *score = score2char(node->weight);
fprintf(stdout, "%s: Node %-*s (score=%s, id=%s)\n",
prefix ? prefix : " ", 71 - offset, node->details->uname, score, cons->id);
free(score);
}
}
}
void
cli_resource_print_colocation(resource_t * rsc, bool dependents, bool recursive, int offset)
{
char *prefix = NULL;
GListPtr lpc = NULL;
GListPtr list = rsc->rsc_cons;
prefix = calloc(1, (offset * 4) + 1);
memset(prefix, ' ', offset * 4);
if (dependents) {
list = rsc->rsc_cons_lhs;
}
if (is_set(rsc->flags, pe_rsc_allocating)) {
/* Break colocation loops */
printf("loop %s\n", rsc->id);
free(prefix);
return;
}
set_bit(rsc->flags, pe_rsc_allocating);
for (lpc = list; lpc != NULL; lpc = lpc->next) {
rsc_colocation_t *cons = (rsc_colocation_t *) lpc->data;
char *score = NULL;
resource_t *peer = cons->rsc_rh;
if (dependents) {
peer = cons->rsc_lh;
}
if (is_set(peer->flags, pe_rsc_allocating)) {
if (dependents == FALSE) {
fprintf(stdout, "%s%-*s (id=%s - loop)\n", prefix, 80 - (4 * offset), peer->id,
cons->id);
}
continue;
}
if (dependents && recursive) {
cli_resource_print_colocation(peer, dependents, recursive, offset + 1);
}
score = score2char(cons->score);
if (cons->role_rh > RSC_ROLE_STARTED) {
fprintf(stdout, "%s%-*s (score=%s, %s role=%s, id=%s)\n", prefix, 80 - (4 * offset),
peer->id, score, dependents ? "needs" : "with", role2text(cons->role_rh),
cons->id);
} else {
fprintf(stdout, "%s%-*s (score=%s, id=%s)\n", prefix, 80 - (4 * offset),
peer->id, score, cons->id);
}
cli_resource_print_location(peer, prefix);
free(score);
if (!dependents && recursive) {
cli_resource_print_colocation(peer, dependents, recursive, offset + 1);
}
}
free(prefix);
}
int
cli_resource_print(resource_t *rsc, pe_working_set_t *data_set, bool expanded)
{
char *rsc_xml = NULL;
int opts = pe_print_printf | pe_print_pending;
rsc->fns->print(rsc, NULL, opts, stdout);
rsc_xml = dump_xml_formatted((!expanded && rsc->orig_xml)?
rsc->orig_xml : rsc->xml);
fprintf(stdout, "%sxml:\n%s\n", expanded ? "" : "raw ", rsc_xml);
free(rsc_xml);
return 0;
}
int
cli_resource_print_attribute(resource_t *rsc, const char *attr, pe_working_set_t * data_set)
{
int rc = -ENXIO;
unsigned int count = 0;
GHashTable *params = NULL;
const char *value = NULL;
node_t *current = pe__find_active_on(rsc, &count, NULL);
if (count > 1) {
CMD_ERR("%s is active on more than one node,"
" returning the default value for %s", rsc->id, crm_str(attr));
current = NULL;
}
params = crm_str_table_new();
if (safe_str_eq(attr_set_type, XML_TAG_ATTR_SETS)) {
get_rsc_attributes(params, rsc, current, data_set);
} else if (safe_str_eq(attr_set_type, XML_TAG_META_SETS)) {
/* No need to redirect to the parent */
get_meta_attributes(params, rsc, current, data_set);
} else {
unpack_instance_attributes(data_set->input, rsc->xml,
XML_TAG_UTILIZATION, NULL,
params, NULL, FALSE, data_set->now);
}
crm_debug("Looking up %s in %s", attr, rsc->id);
value = g_hash_table_lookup(params, attr);
if (value != NULL) {
fprintf(stdout, "%s\n", value);
rc = 0;
} else {
CMD_ERR("Attribute '%s' not found for '%s'", attr, rsc->id);
}
g_hash_table_destroy(params);
return rc;
}
int
cli_resource_print_property(resource_t *rsc, const char *attr, pe_working_set_t * data_set)
{
const char *value = crm_element_value(rsc->xml, attr);
if (value != NULL) {
fprintf(stdout, "%s\n", value);
return 0;
}
return -ENXIO;
}
diff --git a/tools/crm_simulate.c b/tools/crm_simulate.c
index d4ab6a3432..72df79a163 100644
--- a/tools/crm_simulate.c
+++ b/tools/crm_simulate.c
@@ -1,928 +1,926 @@
/*
* Copyright 2009-2019 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
cib_t *global_cib = NULL;
GListPtr op_fail = NULL;
bool action_numbers = FALSE;
gboolean quiet = FALSE;
gboolean print_pending = TRUE;
char *temp_shadow = NULL;
extern gboolean bringing_nodes_online;
#define quiet_log(fmt, args...) do { \
if(quiet == FALSE) { \
printf(fmt , ##args); \
} \
} while(0)
char *use_date = NULL;
static void
get_date(pe_working_set_t * data_set)
{
- int value = 0;
time_t original_date = 0;
- crm_element_value_int(data_set->input, "execution-date", &value);
- original_date = value;
+ crm_element_value_epoch(data_set->input, "execution-date", &original_date);
if (use_date) {
data_set->now = crm_time_new(use_date);
quiet_log(" + Setting effective cluster time: %s", use_date);
crm_time_log(LOG_NOTICE, "Pretending 'now' is", data_set->now,
crm_time_log_date | crm_time_log_timeofday);
} else if(original_date) {
char *when = NULL;
data_set->now = crm_time_new(NULL);
crm_time_set_timet(data_set->now, &original_date);
when = crm_time_as_string(data_set->now, crm_time_log_date|crm_time_log_timeofday);
printf("Using the original execution date of: %s\n", when);
free(when);
}
}
static void
print_cluster_status(pe_working_set_t * data_set, long options)
{
char *online_nodes = NULL;
char *online_remote_nodes = NULL;
char *online_guest_nodes = NULL;
char *offline_nodes = NULL;
char *offline_remote_nodes = NULL;
GListPtr gIter = NULL;
for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
const char *node_mode = NULL;
char *node_name = NULL;
if (pe__is_guest_node(node)) {
node_name = crm_strdup_printf("%s:%s", node->details->uname, node->details->remote_rsc->container->id);
} else {
node_name = crm_strdup_printf("%s", node->details->uname);
}
if (node->details->unclean) {
if (node->details->online && node->details->unclean) {
node_mode = "UNCLEAN (online)";
} else if (node->details->pending) {
node_mode = "UNCLEAN (pending)";
} else {
node_mode = "UNCLEAN (offline)";
}
} else if (node->details->pending) {
node_mode = "pending";
} else if (node->details->standby_onfail && node->details->online) {
node_mode = "standby (on-fail)";
} else if (node->details->standby) {
if (node->details->online) {
node_mode = "standby";
} else {
node_mode = "OFFLINE (standby)";
}
} else if (node->details->maintenance) {
if (node->details->online) {
node_mode = "maintenance";
} else {
node_mode = "OFFLINE (maintenance)";
}
} else if (node->details->online) {
if (pe__is_guest_node(node)) {
online_guest_nodes = add_list_element(online_guest_nodes, node_name);
} else if (pe__is_remote_node(node)) {
online_remote_nodes = add_list_element(online_remote_nodes, node_name);
} else {
online_nodes = add_list_element(online_nodes, node_name);
}
free(node_name);
continue;
} else {
if (pe__is_remote_node(node)) {
offline_remote_nodes = add_list_element(offline_remote_nodes, node_name);
} else if (pe__is_guest_node(node)) {
/* ignore offline container nodes */
} else {
offline_nodes = add_list_element(offline_nodes, node_name);
}
free(node_name);
continue;
}
if (pe__is_guest_node(node)) {
printf("GuestNode %s: %s\n", node_name, node_mode);
} else if (pe__is_remote_node(node)) {
printf("RemoteNode %s: %s\n", node_name, node_mode);
} else if (safe_str_eq(node->details->uname, node->details->id)) {
printf("Node %s: %s\n", node_name, node_mode);
} else {
printf("Node %s (%s): %s\n", node_name, node->details->id, node_mode);
}
free(node_name);
}
if (online_nodes) {
printf("Online: [%s ]\n", online_nodes);
free(online_nodes);
}
if (offline_nodes) {
printf("OFFLINE: [%s ]\n", offline_nodes);
free(offline_nodes);
}
if (online_remote_nodes) {
printf("RemoteOnline: [%s ]\n", online_remote_nodes);
free(online_remote_nodes);
}
if (offline_remote_nodes) {
printf("RemoteOFFLINE: [%s ]\n", offline_remote_nodes);
free(offline_remote_nodes);
}
if (online_guest_nodes) {
printf("GuestOnline: [%s ]\n", online_guest_nodes);
free(online_guest_nodes);
}
fprintf(stdout, "\n");
for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
if (is_set(rsc->flags, pe_rsc_orphan)
&& rsc->role == RSC_ROLE_STOPPED) {
continue;
}
rsc->fns->print(rsc, NULL, pe_print_printf | options, stdout);
}
fprintf(stdout, "\n");
}
static char *
create_action_name(action_t * action)
{
char *action_name = NULL;
const char *prefix = NULL;
const char *action_host = NULL;
const char *task = action->task;
if (action->node) {
action_host = action->node->details->uname;
} else if (is_not_set(action->flags, pe_action_pseudo)) {
action_host = "";
}
if (safe_str_eq(action->task, RSC_CANCEL)) {
prefix = "Cancel ";
task = "monitor"; /* TO-DO: Hack! */
}
if (action->rsc && action->rsc->clone_name) {
char *key = NULL;
const char *name = action->rsc->clone_name;
const char *interval_ms_s = NULL;
guint interval_ms = 0;
interval_ms_s = g_hash_table_lookup(action->meta,
XML_LRM_ATTR_INTERVAL_MS);
interval_ms = crm_parse_ms(interval_ms_s);
if (safe_str_eq(action->task, RSC_NOTIFY)
|| safe_str_eq(action->task, RSC_NOTIFIED)) {
const char *n_type = g_hash_table_lookup(action->meta, "notify_key_type");
const char *n_task = g_hash_table_lookup(action->meta, "notify_key_operation");
CRM_ASSERT(n_type != NULL);
CRM_ASSERT(n_task != NULL);
key = generate_notify_key(name, n_type, n_task);
} else {
key = generate_op_key(name, task, interval_ms);
}
if (action_host) {
action_name = crm_strdup_printf("%s%s %s", prefix ? prefix : "", key, action_host);
} else {
action_name = crm_strdup_printf("%s%s", prefix ? prefix : "", key);
}
free(key);
} else if (safe_str_eq(action->task, CRM_OP_FENCE)) {
const char *op = g_hash_table_lookup(action->meta, "stonith_action");
action_name = crm_strdup_printf("%s%s '%s' %s", prefix ? prefix : "", action->task, op, action_host);
} else if (action->rsc && action_host) {
action_name = crm_strdup_printf("%s%s %s", prefix ? prefix : "", action->uuid, action_host);
} else if (action_host) {
action_name = crm_strdup_printf("%s%s %s", prefix ? prefix : "", action->task, action_host);
} else {
action_name = crm_strdup_printf("%s", action->uuid);
}
if(action_numbers) {
char *with_id = crm_strdup_printf("%s (%d)", action_name, action->id);
free(action_name);
action_name = with_id;
}
return action_name;
}
static void
create_dotfile(pe_working_set_t * data_set, const char *dot_file, gboolean all_actions)
{
GListPtr gIter = NULL;
FILE *dot_strm = fopen(dot_file, "w");
if (dot_strm == NULL) {
crm_perror(LOG_ERR, "Could not open %s for writing", dot_file);
return;
}
fprintf(dot_strm, " digraph \"g\" {\n");
for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
action_t *action = (action_t *) gIter->data;
const char *style = "dashed";
const char *font = "black";
const char *color = "black";
char *action_name = create_action_name(action);
crm_trace("Action %d: %s %s %p", action->id, action_name, action->uuid, action);
if (is_set(action->flags, pe_action_pseudo)) {
font = "orange";
}
if (is_set(action->flags, pe_action_dumped)) {
style = "bold";
color = "green";
} else if (action->rsc != NULL && is_not_set(action->rsc->flags, pe_rsc_managed)) {
color = "red";
font = "purple";
if (all_actions == FALSE) {
goto dont_write;
}
} else if (is_set(action->flags, pe_action_optional)) {
color = "blue";
if (all_actions == FALSE) {
goto dont_write;
}
} else {
color = "red";
CRM_CHECK(is_set(action->flags, pe_action_runnable) == FALSE,;
);
}
set_bit(action->flags, pe_action_dumped);
crm_trace("\"%s\" [ style=%s color=\"%s\" fontcolor=\"%s\"]",
action_name, style, color, font);
fprintf(dot_strm, "\"%s\" [ style=%s color=\"%s\" fontcolor=\"%s\"]\n",
action_name, style, color, font);
dont_write:
free(action_name);
}
for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
action_t *action = (action_t *) gIter->data;
GListPtr gIter2 = NULL;
for (gIter2 = action->actions_before; gIter2 != NULL; gIter2 = gIter2->next) {
action_wrapper_t *before = (action_wrapper_t *) gIter2->data;
char *before_name = NULL;
char *after_name = NULL;
const char *style = "dashed";
gboolean optional = TRUE;
if (before->state == pe_link_dumped) {
optional = FALSE;
style = "bold";
} else if (is_set(action->flags, pe_action_pseudo)
&& (before->type & pe_order_stonith_stop)) {
continue;
} else if (before->state == pe_link_dup) {
continue;
} else if (before->type == pe_order_none) {
continue;
} else if (is_set(before->action->flags, pe_action_dumped)
&& is_set(action->flags, pe_action_dumped)
&& before->type != pe_order_load) {
optional = FALSE;
}
if (all_actions || optional == FALSE) {
before_name = create_action_name(before->action);
after_name = create_action_name(action);
crm_trace("\"%s\" -> \"%s\" [ style = %s]",
before_name, after_name, style);
fprintf(dot_strm, "\"%s\" -> \"%s\" [ style = %s]\n",
before_name, after_name, style);
free(before_name);
free(after_name);
}
}
}
fprintf(dot_strm, "}\n");
fflush(dot_strm);
fclose(dot_strm);
}
static void
setup_input(const char *input, const char *output)
{
int rc = pcmk_ok;
cib_t *cib_conn = NULL;
xmlNode *cib_object = NULL;
char *local_output = NULL;
if (input == NULL) {
/* Use live CIB */
cib_conn = cib_new();
rc = cib_conn->cmds->signon(cib_conn, crm_system_name, cib_command);
if (rc == pcmk_ok) {
rc = cib_conn->cmds->query(cib_conn, NULL, &cib_object, cib_scope_local | cib_sync_call);
}
cib_conn->cmds->signoff(cib_conn);
cib_delete(cib_conn);
cib_conn = NULL;
if (rc != pcmk_ok) {
fprintf(stderr, "Live CIB query failed: %s (%d)\n", pcmk_strerror(rc), rc);
crm_exit(crm_errno2exit(rc));
} else if (cib_object == NULL) {
fprintf(stderr, "Live CIB query failed: empty result\n");
crm_exit(CRM_EX_NOINPUT);
}
} else if (safe_str_eq(input, "-")) {
cib_object = filename2xml(NULL);
} else {
cib_object = filename2xml(input);
}
if (get_object_root(XML_CIB_TAG_STATUS, cib_object) == NULL) {
create_xml_node(cib_object, XML_CIB_TAG_STATUS);
}
if (cli_config_update(&cib_object, NULL, FALSE) == FALSE) {
free_xml(cib_object);
crm_exit(CRM_EX_CONFIG);
}
if (validate_xml(cib_object, NULL, FALSE) != TRUE) {
free_xml(cib_object);
crm_exit(CRM_EX_CONFIG);
}
if (output == NULL) {
char *pid = crm_getpid_s();
local_output = get_shadow_file(pid);
temp_shadow = strdup(local_output);
output = local_output;
free(pid);
}
rc = write_xml_file(cib_object, output, FALSE);
free_xml(cib_object);
cib_object = NULL;
if (rc < 0) {
fprintf(stderr, "Could not create '%s': %s\n",
output, pcmk_strerror(rc));
crm_exit(CRM_EX_CANTCREAT);
}
setenv("CIB_file", output, 1);
free(local_output);
}
/* *INDENT-OFF* */
static struct crm_option long_options[] = {
/* Top-level Options */
{"help", 0, 0, '?', "\tThis text"},
{"version", 0, 0, '$', "\tVersion information" },
{"quiet", 0, 0, 'Q', "\tDisplay only essentialoutput"},
{"verbose", 0, 0, 'V', "\tIncrease debug output"},
{"-spacer-", 0, 0, '-', "\nOperations:"},
{"run", 0, 0, 'R', "\tDetermine the cluster's response to the given configuration and status"},
{"simulate", 0, 0, 'S', "Simulate the transition's execution and display the resulting cluster status"},
{"in-place", 0, 0, 'X', "Simulate the transition's execution and store the result back to the input file"},
{"show-scores", 0, 0, 's', "Show allocation scores"},
{"show-utilization", 0, 0, 'U', "Show utilization information"},
{"profile", 1, 0, 'P', "Run all tests in the named directory to create profiling data"},
{"pending", 0, 0, 'j', "\tDisplay pending state if 'record-pending' is enabled", pcmk_option_hidden},
{"-spacer-", 0, 0, '-', "\nSynthetic Cluster Events:"},
{"node-up", 1, 0, 'u', "\tBring a node online"},
{"node-down", 1, 0, 'd', "\tTake a node offline"},
{"node-fail", 1, 0, 'f', "\tMark a node as failed"},
{"op-inject", 1, 0, 'i', "\tGenerate a failure for the cluster to react to in the simulation"},
{"-spacer-", 0, 0, '-', "\t\tValue is of the form ${resource}_${task}_${interval_in_ms}@${node}=${rc}."},
{"-spacer-", 0, 0, '-', "\t\tEg. memcached_monitor_20000@bart.example.com=7"},
{"-spacer-", 0, 0, '-', "\t\tFor more information on OCF return codes, refer to: https://clusterlabs.org/pacemaker/doc/en-US/Pacemaker/2.0/html/Pacemaker_Administration/s-ocf-return-codes.html"},
{"op-fail", 1, 0, 'F', "\tIf the specified task occurs during the simulation, have it fail with return code ${rc}"},
{"-spacer-", 0, 0, '-', "\t\tValue is of the form ${resource}_${task}_${interval_in_ms}@${node}=${rc}."},
{"-spacer-", 0, 0, '-', "\t\tEg. memcached_stop_0@bart.example.com=1\n"},
{"-spacer-", 0, 0, '-', "\t\tThe transition will normally stop at the failed action. Save the result with --save-output and re-run with --xml-file"},
{ "set-datetime", required_argument, NULL, 't',
"Set date/time (ISO 8601 format, see https://en.wikipedia.org/wiki/ISO_8601)"
},
{"quorum", 1, 0, 'q', "\tSpecify a value for quorum"},
{"watchdog", 1, 0, 'w', "\tAssume a watchdog device is active"},
{"ticket-grant", 1, 0, 'g', "Grant a ticket"},
{"ticket-revoke", 1, 0, 'r', "Revoke a ticket"},
{"ticket-standby", 1, 0, 'b', "Make a ticket standby"},
{"ticket-activate", 1, 0, 'e', "Activate a ticket"},
{"-spacer-", 0, 0, '-', "\nOutput Options:"},
{"save-input", 1, 0, 'I', "\tSave the input configuration to the named file"},
{"save-output", 1, 0, 'O', "Save the output configuration to the named file"},
{"save-graph", 1, 0, 'G', "\tSave the transition graph (XML format) to the named file"},
{"save-dotfile", 1, 0, 'D', "Save the transition graph (DOT format) to the named file"},
{"all-actions", 0, 0, 'a', "\tDisplay all possible actions in the DOT graph - even ones not part of the transition"},
{"-spacer-", 0, 0, '-', "\nData Source:"},
{"live-check", 0, 0, 'L', "\tConnect to the CIB mamager and use the current CIB contents as input"},
{"xml-file", 1, 0, 'x', "\tRetrieve XML from the named file"},
{"xml-pipe", 0, 0, 'p', "\tRetrieve XML from stdin"},
{"-spacer-", 0, 0, '-', "\nExamples:\n"},
{"-spacer-", 0, 0, '-', "Pretend a recurring monitor action found memcached stopped on node fred.example.com and, during recovery, that the memcached stop action failed", pcmk_option_paragraph},
{"-spacer-", 0, 0, '-', " crm_simulate -LS --op-inject memcached:0_monitor_20000@bart.example.com=7 --op-fail memcached:0_stop_0@fred.example.com=1 --save-output /tmp/memcached-test.xml", pcmk_option_example},
{"-spacer-", 0, 0, '-', "Now see what the reaction to the stop failure would be", pcmk_option_paragraph},
{"-spacer-", 0, 0, '-', " crm_simulate -S --xml-file /tmp/memcached-test.xml", pcmk_option_example},
{0, 0, 0, 0}
};
/* *INDENT-ON* */
static void
profile_one(const char *xml_file, pe_working_set_t *data_set)
{
xmlNode *cib_object = NULL;
printf("* Testing %s\n", xml_file);
cib_object = filename2xml(xml_file);
if (get_object_root(XML_CIB_TAG_STATUS, cib_object) == NULL) {
create_xml_node(cib_object, XML_CIB_TAG_STATUS);
}
if (cli_config_update(&cib_object, NULL, FALSE) == FALSE) {
free_xml(cib_object);
return;
}
if (validate_xml(cib_object, NULL, FALSE) != TRUE) {
free_xml(cib_object);
return;
}
data_set->input = cib_object;
get_date(data_set);
pcmk__schedule_actions(data_set, cib_object, NULL);
pe_reset_working_set(data_set);
}
#ifndef FILENAME_MAX
# define FILENAME_MAX 512
#endif
static void
profile_all(const char *dir, pe_working_set_t *data_set)
{
struct dirent **namelist;
int file_num = scandir(dir, &namelist, 0, alphasort);
if (file_num > 0) {
struct stat prop;
char buffer[FILENAME_MAX];
while (file_num--) {
if ('.' == namelist[file_num]->d_name[0]) {
free(namelist[file_num]);
continue;
} else if (!crm_ends_with_ext(namelist[file_num]->d_name, ".xml")) {
free(namelist[file_num]);
continue;
}
snprintf(buffer, sizeof(buffer), "%s/%s", dir, namelist[file_num]->d_name);
if (stat(buffer, &prop) == 0 && S_ISREG(prop.st_mode)) {
profile_one(buffer, data_set);
}
free(namelist[file_num]);
}
free(namelist);
}
}
static int
count_resources(pe_working_set_t * data_set, resource_t * rsc)
{
int count = 0;
GListPtr gIter = NULL;
if (rsc == NULL) {
gIter = data_set->resources;
} else if (rsc->children) {
gIter = rsc->children;
} else {
return is_not_set(rsc->flags, pe_rsc_orphan);
}
for (; gIter != NULL; gIter = gIter->next) {
count += count_resources(data_set, gIter->data);
}
return count;
}
int
main(int argc, char **argv)
{
int rc = pcmk_ok;
guint modified = 0;
gboolean store = FALSE;
gboolean process = FALSE;
gboolean simulate = FALSE;
gboolean all_actions = FALSE;
gboolean have_stdout = FALSE;
pe_working_set_t *data_set = NULL;
const char *xml_file = "-";
const char *quorum = NULL;
const char *watchdog = NULL;
const char *test_dir = NULL;
const char *dot_file = NULL;
const char *graph_file = NULL;
const char *input_file = NULL;
const char *output_file = NULL;
int flag = 0;
int index = 0;
int argerr = 0;
GListPtr node_up = NULL;
GListPtr node_down = NULL;
GListPtr node_fail = NULL;
GListPtr op_inject = NULL;
GListPtr ticket_grant = NULL;
GListPtr ticket_revoke = NULL;
GListPtr ticket_standby = NULL;
GListPtr ticket_activate = NULL;
xmlNode *input = NULL;
crm_log_cli_init("crm_simulate");
crm_set_options(NULL, "datasource operation [additional options]",
long_options, "Tool for simulating the cluster's response to events");
if (argc < 2) {
crm_help('?', CRM_EX_USAGE);
}
while (1) {
flag = crm_get_option(argc, argv, &index);
if (flag == -1)
break;
switch (flag) {
case 'V':
if (have_stdout == FALSE) {
/* Redirect stderr to stdout so we can grep the output */
have_stdout = TRUE;
close(STDERR_FILENO);
dup2(STDOUT_FILENO, STDERR_FILENO);
}
crm_bump_log_level(argc, argv);
action_numbers = TRUE;
break;
case '?':
case '$':
crm_help(flag, CRM_EX_OK);
break;
case 'p':
xml_file = "-";
break;
case 'Q':
quiet = TRUE;
break;
case 'L':
xml_file = NULL;
break;
case 'x':
xml_file = optarg;
break;
case 'u':
modified++;
bringing_nodes_online = TRUE;
node_up = g_list_append(node_up, optarg);
break;
case 'd':
modified++;
node_down = g_list_append(node_down, optarg);
break;
case 'f':
modified++;
node_fail = g_list_append(node_fail, optarg);
break;
case 't':
use_date = strdup(optarg);
break;
case 'i':
modified++;
op_inject = g_list_append(op_inject, optarg);
break;
case 'F':
process = TRUE;
simulate = TRUE;
op_fail = g_list_append(op_fail, optarg);
break;
case 'w':
modified++;
watchdog = optarg;
break;
case 'q':
modified++;
quorum = optarg;
break;
case 'g':
modified++;
ticket_grant = g_list_append(ticket_grant, optarg);
break;
case 'r':
modified++;
ticket_revoke = g_list_append(ticket_revoke, optarg);
break;
case 'b':
modified++;
ticket_standby = g_list_append(ticket_standby, optarg);
break;
case 'e':
modified++;
ticket_activate = g_list_append(ticket_activate, optarg);
break;
case 'a':
all_actions = TRUE;
break;
case 's':
process = TRUE;
show_scores = TRUE;
break;
case 'U':
process = TRUE;
show_utilization = TRUE;
break;
case 'j':
print_pending = TRUE;
break;
case 'S':
process = TRUE;
simulate = TRUE;
break;
case 'X':
store = TRUE;
process = TRUE;
simulate = TRUE;
break;
case 'R':
process = TRUE;
break;
case 'D':
process = TRUE;
dot_file = optarg;
break;
case 'G':
process = TRUE;
graph_file = optarg;
break;
case 'I':
input_file = optarg;
break;
case 'O':
output_file = optarg;
break;
case 'P':
test_dir = optarg;
break;
default:
++argerr;
break;
}
}
if (optind > argc) {
++argerr;
}
if (argerr) {
crm_help('?', CRM_EX_USAGE);
}
data_set = pe_new_working_set();
if (data_set == NULL) {
crm_perror(LOG_ERR, "Could not allocate working set");
rc = -ENOMEM;
goto done;
}
if (test_dir != NULL) {
profile_all(test_dir, data_set);
return CRM_EX_OK;
}
setup_input(xml_file, store ? xml_file : output_file);
global_cib = cib_new();
rc = global_cib->cmds->signon(global_cib, crm_system_name, cib_command);
if (rc != pcmk_ok) {
fprintf(stderr, "Could not connect to the CIB manager: %s\n",
pcmk_strerror(rc));
goto done;
}
rc = global_cib->cmds->query(global_cib, NULL, &input, cib_sync_call | cib_scope_local);
if (rc != pcmk_ok) {
fprintf(stderr, "Could not get local CIB: %s\n", pcmk_strerror(rc));
goto done;
}
data_set->input = input;
get_date(data_set);
if(xml_file) {
set_bit(data_set->flags, pe_flag_sanitized);
}
set_bit(data_set->flags, pe_flag_stdout);
cluster_status(data_set);
if (quiet == FALSE) {
int options = print_pending ? pe_print_pending : 0;
if (is_set(data_set->flags, pe_flag_maintenance_mode)) {
quiet_log("\n *** Resource management is DISABLED ***");
quiet_log("\n The cluster will not attempt to start, stop or recover services");
quiet_log("\n");
}
if (data_set->disabled_resources || data_set->blocked_resources) {
quiet_log("%d of %d resources DISABLED and %d BLOCKED from being started due to failures\n",
data_set->disabled_resources,
count_resources(data_set, NULL),
data_set->blocked_resources);
}
quiet_log("\nCurrent cluster status:\n");
print_cluster_status(data_set, options);
}
if (modified) {
quiet_log("Performing requested modifications\n");
modify_configuration(data_set, global_cib, quorum, watchdog, node_up, node_down, node_fail, op_inject,
ticket_grant, ticket_revoke, ticket_standby, ticket_activate);
rc = global_cib->cmds->query(global_cib, NULL, &input, cib_sync_call);
if (rc != pcmk_ok) {
fprintf(stderr, "Could not get modified CIB: %s\n", pcmk_strerror(rc));
goto done;
}
cleanup_calculations(data_set);
data_set->input = input;
get_date(data_set);
if(xml_file) {
set_bit(data_set->flags, pe_flag_sanitized);
}
set_bit(data_set->flags, pe_flag_stdout);
cluster_status(data_set);
}
if (input_file != NULL) {
rc = write_xml_file(input, input_file, FALSE);
if (rc < 0) {
fprintf(stderr, "Could not create '%s': %s\n",
input_file, pcmk_strerror(rc));
goto done;
}
}
if (process || simulate) {
crm_time_t *local_date = NULL;
if (show_scores && show_utilization) {
printf("Allocation scores and utilization information:\n");
} else if (show_scores) {
fprintf(stdout, "Allocation scores:\n");
} else if (show_utilization) {
printf("Utilization information:\n");
}
pcmk__schedule_actions(data_set, input, local_date);
input = NULL; /* Don't try and free it twice */
if (graph_file != NULL) {
write_xml_file(data_set->graph, graph_file, FALSE);
}
if (dot_file != NULL) {
create_dotfile(data_set, dot_file, all_actions);
}
if (quiet == FALSE) {
GListPtr gIter = NULL;
quiet_log("%sTransition Summary:\n", show_scores || show_utilization
|| modified ? "\n" : "");
fflush(stdout);
LogNodeActions(data_set, TRUE);
for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
LogActions(rsc, data_set, TRUE);
}
}
}
rc = pcmk_ok;
if (simulate) {
if (run_simulation(data_set, global_cib, op_fail, quiet) != pcmk_ok) {
rc = pcmk_err_generic;
}
if(quiet == FALSE) {
get_date(data_set);
quiet_log("\nRevised cluster status:\n");
set_bit(data_set->flags, pe_flag_stdout);
cluster_status(data_set);
print_cluster_status(data_set, 0);
}
}
done:
pe_free_working_set(data_set);
global_cib->cmds->signoff(global_cib);
cib_delete(global_cib);
free(use_date);
fflush(stderr);
if (temp_shadow) {
unlink(temp_shadow);
free(temp_shadow);
}
crm_exit(crm_errno2exit(rc));
}