Page MenuHomeClusterLabs Projects

No OneTemporary

This file is larger than 256 KB, so syntax highlighting was skipped.
diff --git a/TODO.markdown b/TODO.markdown
index 2e6436384a..39072d304a 100644
--- a/TODO.markdown
+++ b/TODO.markdown
@@ -1,54 +1,53 @@
# Semi-random collection of tasks we'd like to get done
## Targeted for 1.2
- Need a way to indicate when unfencing operations need to be initiated from the host to be unfenced
- Remove all calls to uname() and replace with get_node_name() whcih redirects to ${stack}_node_name()
-- Allow messages to be sent to a nodeid (not just node (u)name)
## Targeted for 1.2.x
- Support
http://cgit.freedesktop.org/systemd/systemd/commit/?id=96342de68d0d6de71a062d984dafd2a0905ed9fe
- Allow stonith_admin to optionally route fencing requests via the CIB (terminate=true)
- Add corosync to ComponentFail cts test
- Support 'yesterday' and 'thursday' and '24-04' as dates in crm_report
- Allow the N in 'give up after N failed fencing attempts' to be configurable
- Check for uppercase letters in node names, warn if found
- Imply startup-failure-is-fatal from on-fail="restart"
- Show an english version of the config with crm_resource --rules
- Convert cts/CIB.py into a supported Python API for the CIB
- Reduce the amount of stonith-ng logging
- Use dlopen for snmp in crm_mon
- Re-implement no-quorum filter for cib updates?
## Targeted for 1.4
- Support A colocated with (B || C || D)
- Implement a truely atomic version of attrd
- Support rolling average values in attrd
- Support heartbeat with the mcp
- Freeze/Thaw
- Create Pacemaker plugin for snmpd - http://www.net-snmp.org/
- Investigate using a DB as the back-end for the CIB
- Decide whether to fully support or drop failover domains
# Testing
- Convert BandwidthTest CTS test into a Scenario wrapper
- find_operations() is not covered by PE regression tests
- no_quorum_policy==suicide is not covered by PE regression tests
- parse_xml_duration() is not covered by PE regression tests
- phase_of_the_moon() is not covered by PE regression tests
- test_role_expression() is not covered by PE regression tests
- native_parameter() is not covered by PE regression tests
- clone_active() is not covered by PE regression tests
- convert_non_atomic_task() in native.c is not covered by PE regression tests
- group_rsc_colocation_lh() is not covered by PE regression tests
- Test on-fail=standby
# Documentation
- Clusters from Scratch: Mail
- Clusters from Scratch: MySQL
- Document reload in Pacemaker Explained
- Document advanced fencing logic in Pacemaker Explained
- Use ann:defaultValue="..." instead of <optional> in the schema more often
- Allow Clusters from Scratch to be built in two flavors - pcs and crm shell
diff --git a/cib/callbacks.c b/cib/callbacks.c
index 09224d1b93..b83469a8d4 100644
--- a/cib/callbacks.c
+++ b/cib/callbacks.c
@@ -1,1474 +1,1474 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <stdio.h>
#include <sys/types.h>
#include <unistd.h>
#include <grp.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/msg_xml.h>
#include <crm/common/ipc.h>
#include <crm/cluster/internal.h>
#include <crm/common/xml.h>
#include <cibio.h>
#include <callbacks.h>
#include <cibmessages.h>
#include <notify.h>
#include "common.h"
extern GMainLoop *mainloop;
extern gboolean cib_shutdown_flag;
extern gboolean stand_alone;
extern const char *cib_root;
static unsigned long cib_local_bcast_num = 0;
typedef struct cib_local_notify_s {
xmlNode *notify_src;
char *client_id;
gboolean from_peer;
gboolean sync_reply;
} cib_local_notify_t;
qb_ipcs_service_t *ipcs_ro = NULL;
qb_ipcs_service_t *ipcs_rw = NULL;
qb_ipcs_service_t *ipcs_shm = NULL;
#if SUPPORT_HEARTBEAT
extern ll_cluster_t *hb_conn;
#endif
extern int cib_update_counter(xmlNode * xml_obj, const char *field, gboolean reset);
extern void GHFunc_count_peers(gpointer key, gpointer value, gpointer user_data);
gint cib_GCompareFunc(gconstpointer a, gconstpointer b);
gboolean can_write(int flags);
void send_cib_replace(const xmlNode * sync_request, const char *host);
void cib_process_request(xmlNode * request, gboolean privileged, gboolean force_synchronous,
gboolean from_peer, cib_client_t * cib_client);
extern GHashTable *client_list;
extern GHashTable *local_notify_queue;
int next_client_id = 0;
extern const char *cib_our_uname;
extern unsigned long cib_num_ops, cib_num_local, cib_num_updates, cib_num_fail;
extern unsigned long cib_bad_connects, cib_num_timeouts;
extern int cib_status;
int cib_process_command(xmlNode * request, xmlNode ** reply,
xmlNode ** cib_diff, gboolean privileged);
gboolean cib_common_callback(qb_ipcs_connection_t *c, void *data, size_t size, gboolean privileged);
static int32_t
cib_ipc_accept(qb_ipcs_connection_t *c, uid_t uid, gid_t gid)
{
cib_client_t *new_client = NULL;
#if ENABLE_ACL
struct group *crm_grp = NULL;
#endif
crm_trace("Connecting %p for uid=%d gid=%d pid=%d", c, uid, gid, crm_ipcs_client_pid(c));
if (cib_shutdown_flag) {
crm_info("Ignoring new client [%d] during shutdown", crm_ipcs_client_pid(c));
return -EPERM;
}
new_client = calloc(1, sizeof(cib_client_t));
new_client->ipc = c;
CRM_CHECK(new_client->id == NULL, free(new_client->id));
new_client->id = crm_generate_uuid();
#if ENABLE_ACL
crm_grp = getgrnam(CRM_DAEMON_GROUP);
if (crm_grp) {
qb_ipcs_connection_auth_set(c, -1, crm_grp->gr_gid, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
}
new_client->user = uid2username(uid);
#endif
/* make sure we can find ourselves later for sync calls
* redirected to the master instance
*/
g_hash_table_insert(client_list, new_client->id, new_client);
qb_ipcs_context_set(c, new_client);
return 0;
}
static void
cib_ipc_created(qb_ipcs_connection_t *c)
{
cib_client_t *cib_client = qb_ipcs_context_get(c);
crm_trace("%p connected for client %s", c, cib_client->id);
}
static int32_t
cib_ipc_dispatch_rw(qb_ipcs_connection_t *c, void *data, size_t size)
{
cib_client_t *cib_client = qb_ipcs_context_get(c);
crm_trace("%p message from %s", c, cib_client->id);
return cib_common_callback(c, data, size, TRUE);
}
static int32_t
cib_ipc_dispatch_ro(qb_ipcs_connection_t *c, void *data, size_t size)
{
cib_client_t *cib_client = qb_ipcs_context_get(c);
crm_trace("%p message from %s", c, cib_client->id);
return cib_common_callback(c, data, size, FALSE);
}
/* Error code means? */
static int32_t
cib_ipc_closed(qb_ipcs_connection_t *c)
{
cib_client_t *cib_client = qb_ipcs_context_get(c);
crm_trace("Connection %p closed", c);
CRM_ASSERT(cib_client != NULL);
CRM_ASSERT(cib_client->id != NULL);
if (!g_hash_table_remove(client_list, cib_client->id)) {
crm_err("Client %s not found in the hashtable", cib_client->name);
}
return 0;
}
static void
cib_ipc_destroy(qb_ipcs_connection_t *c)
{
cib_client_t *cib_client = qb_ipcs_context_get(c);
CRM_ASSERT(cib_client != NULL);
CRM_ASSERT(cib_client->id != NULL);
/* In case we arrive here without a call to cib_ipc_close() */
g_hash_table_remove(client_list, cib_client->id);
crm_trace("Destroying %s (%p)", cib_client->name, c);
free(cib_client->name);
free(cib_client->callback_id);
free(cib_client->id);
free(cib_client->user);
free(cib_client);
crm_trace("Freed the cib client");
if (cib_shutdown_flag) {
cib_shutdown(0);
}
}
struct qb_ipcs_service_handlers ipc_ro_callbacks =
{
.connection_accept = cib_ipc_accept,
.connection_created = cib_ipc_created,
.msg_process = cib_ipc_dispatch_ro,
.connection_closed = cib_ipc_closed,
.connection_destroyed = cib_ipc_destroy
};
struct qb_ipcs_service_handlers ipc_rw_callbacks =
{
.connection_accept = cib_ipc_accept,
.connection_created = cib_ipc_created,
.msg_process = cib_ipc_dispatch_rw,
.connection_closed = cib_ipc_closed,
.connection_destroyed = cib_ipc_destroy
};
void
cib_common_callback_worker(uint32_t id, uint32_t flags, xmlNode * op_request, cib_client_t * cib_client, gboolean privileged)
{
const char *op = crm_element_value(op_request, F_CIB_OPERATION);
if (crm_str_eq(op, CRM_OP_REGISTER, TRUE)) {
if(flags & crm_ipc_client_response) {
xmlNode *ack = create_xml_node(NULL, __FUNCTION__);
crm_xml_add(ack, F_CIB_OPERATION, CRM_OP_REGISTER);
crm_xml_add(ack, F_CIB_CLIENTID, cib_client->id);
crm_ipcs_send(cib_client->ipc, id, ack, FALSE);
cib_client->request_id = 0;
free_xml(ack);
}
return;
} else if (crm_str_eq(op, T_CIB_NOTIFY, TRUE)) {
/* Update the notify filters for this client */
int on_off = 0;
const char *type = crm_element_value(op_request, F_CIB_NOTIFY_TYPE);
crm_element_value_int(op_request, F_CIB_NOTIFY_ACTIVATE, &on_off);
crm_debug("Setting %s callbacks for %s (%s): %s",
type, cib_client->name, cib_client->id, on_off ? "on" : "off");
if (safe_str_eq(type, T_CIB_POST_NOTIFY)) {
cib_client->post_notify = on_off;
} else if (safe_str_eq(type, T_CIB_PRE_NOTIFY)) {
cib_client->pre_notify = on_off;
} else if (safe_str_eq(type, T_CIB_UPDATE_CONFIRM)) {
cib_client->confirmations = on_off;
} else if (safe_str_eq(type, T_CIB_DIFF_NOTIFY)) {
cib_client->diffs = on_off;
} else if (safe_str_eq(type, T_CIB_REPLACE_NOTIFY)) {
cib_client->replace = on_off;
}
if(flags & crm_ipc_client_response) {
/* TODO - include rc */
crm_ipcs_send_ack(cib_client->ipc, id, "ack", __FUNCTION__, __LINE__);
cib_client->request_id = 0;
}
return;
}
cib_client->num_calls++;
cib_process_request(op_request, FALSE, privileged, FALSE, cib_client);
}
int32_t
cib_common_callback(qb_ipcs_connection_t *c, void *data, size_t size, gboolean privileged)
{
uint32_t id = 0;
uint32_t flags = 0;
int call_options = 0;
xmlNode *op_request = crm_ipcs_recv(c, data, size, &id, &flags);
cib_client_t *cib_client = qb_ipcs_context_get(c);
if(op_request) {
crm_element_value_int(op_request, F_CIB_CALLOPTS, &call_options);
}
crm_trace("Inbound: %.200s", data);
if (op_request == NULL || cib_client == NULL) {
crm_ipcs_send_ack(c, id, "nack", __FUNCTION__, __LINE__);
return 0;
}
if(is_set(call_options, cib_sync_call)) {
CRM_ASSERT(flags & crm_ipc_client_response);
}
if(flags & crm_ipc_client_response) {
CRM_LOG_ASSERT(cib_client->request_id == 0); /* This means the client has two synchronous events in-flight */
cib_client->request_id = id; /* Reply only to the last one */
}
if (cib_client->name == NULL) {
const char *value = crm_element_value(op_request, F_CIB_CLIENTNAME);
if (value == NULL) {
cib_client->name = crm_itoa(crm_ipcs_client_pid(c));
} else {
cib_client->name = strdup(value);
}
}
if (cib_client->callback_id == NULL) {
const char *value = crm_element_value(op_request, F_CIB_CALLBACK_TOKEN);
if (value != NULL) {
cib_client->callback_id = strdup(value);
} else {
cib_client->callback_id = strdup(cib_client->id);
}
}
crm_xml_add(op_request, F_CIB_CLIENTID, cib_client->id);
crm_xml_add(op_request, F_CIB_CLIENTNAME, cib_client->name);
#if ENABLE_ACL
determine_request_user(cib_client->user, op_request, F_CIB_USER);
#endif
crm_log_xml_trace(op_request, "Client[inbound]");
cib_common_callback_worker(id, flags, op_request, cib_client, privileged);
free_xml(op_request);
return 0;
}
static void
do_local_notify(xmlNode * notify_src, const char *client_id,
gboolean sync_reply, gboolean from_peer)
{
/* send callback to originating child */
cib_client_t *client_obj = NULL;
int local_rc = pcmk_ok;
if (client_id != NULL) {
client_obj = g_hash_table_lookup(client_list, client_id);
} else {
crm_trace("No client to sent the response to. F_CIB_CLIENTID not set.");
}
if (client_obj == NULL) {
local_rc = -ECONNRESET;
} else {
int rid = 0;
if(sync_reply) {
CRM_LOG_ASSERT(client_obj->request_id);
rid = client_obj->request_id;
client_obj->request_id = 0;
crm_trace("Sending response %d to %s %s",
rid, client_obj->name, from_peer?"(originator of delegated request)":"");
} else {
crm_trace("Sending an event to %s %s",
client_obj->name, from_peer?"(originator of delegated request)":"");
}
if (client_obj->ipc && crm_ipcs_send(client_obj->ipc, rid, notify_src, !sync_reply) < 0) {
local_rc = -ENOMSG;
#ifdef HAVE_GNUTLS_GNUTLS_H
} else if (client_obj->session) {
crm_send_remote_msg(client_obj->session, notify_src, client_obj->encrypted);
#endif
} else if(client_obj->ipc == NULL) {
crm_err("Unknown transport for %s", client_obj->name);
}
}
if (local_rc != pcmk_ok && client_obj != NULL) {
crm_warn("%sSync reply to %s failed: %s",
sync_reply ? "" : "A-",
client_obj ? client_obj->name : "<unknown>", pcmk_strerror(local_rc));
}
}
static void
local_notify_destroy_callback(gpointer data)
{
cib_local_notify_t *notify = data;
free_xml(notify->notify_src);
free(notify->client_id);
free(notify);
}
static void
check_local_notify(int bcast_id)
{
cib_local_notify_t *notify = NULL;
if (!local_notify_queue) {
return;
}
notify = g_hash_table_lookup(local_notify_queue, GINT_TO_POINTER(bcast_id));
if (notify) {
do_local_notify(notify->notify_src, notify->client_id, notify->sync_reply, notify->from_peer);
g_hash_table_remove(local_notify_queue, GINT_TO_POINTER(bcast_id));
}
}
static void
queue_local_notify(xmlNode * notify_src, const char *client_id, gboolean sync_reply, gboolean from_peer)
{
cib_local_notify_t *notify = calloc(1, sizeof(cib_local_notify_t));
notify->notify_src = notify_src;
notify->client_id = strdup(client_id);
notify->sync_reply = sync_reply;
notify->from_peer = from_peer;
if (!local_notify_queue) {
local_notify_queue = g_hash_table_new_full(g_direct_hash,
g_direct_equal, NULL, local_notify_destroy_callback);
}
g_hash_table_insert(local_notify_queue, GINT_TO_POINTER(cib_local_bcast_num), notify);
}
static void
parse_local_options(cib_client_t * cib_client, int call_type, int call_options, const char *host,
const char *op, gboolean * local_notify, gboolean * needs_reply,
gboolean * process, gboolean * needs_forward)
{
if (cib_op_modifies(call_type)
&& !(call_options & cib_inhibit_bcast)) {
/* we need to send an update anyway */
*needs_reply = TRUE;
} else {
*needs_reply = FALSE;
}
if (host == NULL && (call_options & cib_scope_local)) {
crm_trace("Processing locally scoped %s op from %s", op, cib_client->name);
*local_notify = TRUE;
} else if (host == NULL && cib_is_master) {
crm_trace("Processing master %s op locally from %s", op, cib_client->name);
*local_notify = TRUE;
} else if (safe_str_eq(host, cib_our_uname)) {
crm_trace("Processing locally addressed %s op from %s", op, cib_client->name);
*local_notify = TRUE;
} else if (stand_alone) {
*needs_forward = FALSE;
*local_notify = TRUE;
*process = TRUE;
} else {
crm_trace("%s op from %s needs to be forwarded to %s",
op, cib_client->name, host ? host : "the master instance");
*needs_forward = TRUE;
*process = FALSE;
}
}
static gboolean
parse_peer_options(int call_type, xmlNode * request,
gboolean * local_notify, gboolean * needs_reply, gboolean * process,
gboolean * needs_forward)
{
const char *op = NULL;
const char *host = NULL;
const char *delegated = NULL;
const char *originator = crm_element_value(request, F_ORIG);
const char *reply_to = crm_element_value(request, F_CIB_ISREPLY);
const char *update = crm_element_value(request, F_CIB_GLOBAL_UPDATE);
gboolean is_reply = safe_str_eq(reply_to, cib_our_uname);
if (crm_is_true(update)) {
*needs_reply = FALSE;
if (is_reply) {
*local_notify = TRUE;
crm_trace("Processing global/peer update from %s"
" that originated from us", originator);
} else {
crm_trace("Processing global/peer update from %s", originator);
}
return TRUE;
}
host = crm_element_value(request, F_CIB_HOST);
if (host != NULL && safe_str_eq(host, cib_our_uname)) {
crm_trace("Processing request sent to us from %s", originator);
return TRUE;
} else if (host == NULL && cib_is_master == TRUE) {
crm_trace("Processing request sent to master instance from %s", originator);
return TRUE;
}
op = crm_element_value(request, F_CIB_OPERATION);
if(safe_str_eq(op, "cib_shutdown_req")) {
/* Always process these */
*local_notify = FALSE;
if(reply_to == NULL || is_reply) {
*process = TRUE;
}
if(is_reply) {
*needs_reply = FALSE;
}
return *process;
}
if (is_reply) {
crm_trace("Forward reply sent from %s to local clients", originator);
*process = FALSE;
*needs_reply = FALSE;
*local_notify = TRUE;
return TRUE;
}
delegated = crm_element_value(request, F_CIB_DELEGATED);
if (delegated != NULL) {
crm_trace("Ignoring msg for master instance");
} else if (host != NULL) {
/* this is for a specific instance and we're not it */
crm_trace("Ignoring msg for instance on %s", crm_str(host));
} else if (reply_to == NULL && cib_is_master == FALSE) {
/* this is for the master instance and we're not it */
crm_trace("Ignoring reply to %s", crm_str(reply_to));
} else if (safe_str_eq(op, "cib_shutdown_req")) {
if (reply_to != NULL) {
crm_debug("Processing %s from %s", op, host);
*needs_reply = FALSE;
} else {
crm_debug("Processing %s reply from %s", op, host);
}
return TRUE;
} else {
crm_err("Nothing for us to do?");
crm_log_xml_err(request, "Peer[inbound]");
}
return FALSE;
}
static void
forward_request(xmlNode * request, cib_client_t * cib_client, int call_options)
{
const char *op = crm_element_value(request, F_CIB_OPERATION);
const char *host = crm_element_value(request, F_CIB_HOST);
crm_xml_add(request, F_CIB_DELEGATED, cib_our_uname);
if (host != NULL) {
crm_trace("Forwarding %s op to %s", op, host);
- send_cluster_message(host, crm_msg_cib, request, FALSE);
+ send_cluster_message(crm_get_peer(0, host), crm_msg_cib, request, FALSE);
} else {
crm_trace("Forwarding %s op to master instance", op);
send_cluster_message(NULL, crm_msg_cib, request, FALSE);
}
/* Return the request to its original state */
xml_remove_prop(request, F_CIB_DELEGATED);
if (call_options & cib_discard_reply) {
crm_trace("Client not interested in reply");
}
}
static gboolean
send_peer_reply(xmlNode * msg, xmlNode * result_diff, const char *originator, gboolean broadcast)
{
CRM_ASSERT(msg != NULL);
if (broadcast) {
/* this (successful) call modified the CIB _and_ the
* change needs to be broadcast...
* send via HA to other nodes
*/
int diff_add_updates = 0;
int diff_add_epoch = 0;
int diff_add_admin_epoch = 0;
int diff_del_updates = 0;
int diff_del_epoch = 0;
int diff_del_admin_epoch = 0;
char *digest = NULL;
cib_diff_version_details(result_diff,
&diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates,
&diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates);
crm_trace("Sending update diff %d.%d.%d -> %d.%d.%d",
diff_del_admin_epoch, diff_del_epoch, diff_del_updates,
diff_add_admin_epoch, diff_add_epoch, diff_add_updates);
crm_xml_add(msg, F_CIB_ISREPLY, originator);
crm_xml_add(msg, F_CIB_GLOBAL_UPDATE, XML_BOOLEAN_TRUE);
crm_xml_add(msg, F_CIB_OPERATION, CIB_OP_APPLY_DIFF);
/* Its safe to always use the latest version since the election
* ensures the software on this node is the oldest node in the cluster
*/
digest = calculate_xml_versioned_digest(the_cib, FALSE, TRUE, CRM_FEATURE_SET);
crm_xml_add(result_diff, XML_ATTR_DIGEST, digest);
crm_log_xml_trace(the_cib, digest);
free(digest);
add_message_xml(msg, F_CIB_UPDATE_DIFF, result_diff);
crm_log_xml_trace(msg, "copy");
return send_cluster_message(NULL, crm_msg_cib, msg, TRUE);
} else if (originator != NULL) {
/* send reply via HA to originating node */
crm_trace("Sending request result to originator only");
crm_xml_add(msg, F_CIB_ISREPLY, originator);
- return send_cluster_message(originator, crm_msg_cib, msg, FALSE);
+ return send_cluster_message(crm_get_peer(0, originator), crm_msg_cib, msg, FALSE);
}
return FALSE;
}
void
cib_process_request(xmlNode * request, gboolean force_synchronous, gboolean privileged,
gboolean from_peer, cib_client_t * cib_client)
{
int call_type = 0;
int call_options = 0;
gboolean process = TRUE;
gboolean is_update = TRUE;
gboolean needs_reply = TRUE;
gboolean local_notify = FALSE;
gboolean needs_forward = FALSE;
gboolean global_update = crm_is_true(crm_element_value(request, F_CIB_GLOBAL_UPDATE));
xmlNode *op_reply = NULL;
xmlNode *result_diff = NULL;
int rc = pcmk_ok;
const char *op = crm_element_value(request, F_CIB_OPERATION);
const char *originator = crm_element_value(request, F_ORIG);
const char *host = crm_element_value(request, F_CIB_HOST);
const char *client_id = crm_element_value(request, F_CIB_CLIENTID);
crm_trace("%s Processing msg %s", cib_our_uname, crm_element_value(request, F_SEQ));
cib_num_ops++;
if (cib_num_ops == 0) {
cib_num_fail = 0;
cib_num_local = 0;
cib_num_updates = 0;
crm_info("Stats wrapped around");
}
if (host != NULL && strlen(host) == 0) {
host = NULL;
}
crm_element_value_int(request, F_CIB_CALLOPTS, &call_options);
if (force_synchronous) {
call_options |= cib_sync_call;
}
crm_trace("Processing %s message (%s) for %s...",
from_peer ? "peer" : "local",
from_peer ? originator : cib_our_uname, host ? host : "master");
rc = cib_get_operation_id(op, &call_type);
if (rc != pcmk_ok) {
/* TODO: construct error reply? */
crm_err("Pre-processing of command failed: %s", pcmk_strerror(rc));
return;
}
is_update = cib_op_modifies(call_type);
if (is_update) {
cib_num_updates++;
}
if (from_peer == FALSE) {
parse_local_options(cib_client, call_type, call_options, host, op,
&local_notify, &needs_reply, &process, &needs_forward);
} else if (parse_peer_options(call_type, request, &local_notify,
&needs_reply, &process, &needs_forward) == FALSE) {
return;
}
crm_trace("Finished determining processing actions");
if (call_options & cib_discard_reply) {
needs_reply = is_update;
local_notify = FALSE;
}
if (needs_forward) {
forward_request(request, cib_client, call_options);
return;
}
if (cib_status != pcmk_ok) {
rc = cib_status;
crm_err("Operation ignored, cluster configuration is invalid."
" Please repair and restart: %s", pcmk_strerror(cib_status));
op_reply = cib_construct_reply(request, the_cib, cib_status);
} else if (process) {
int level = LOG_INFO;
const char *section = crm_element_value(request, F_CIB_SECTION);
cib_num_local++;
rc = cib_process_command(request, &op_reply, &result_diff, privileged);
if (global_update) {
switch (rc) {
case pcmk_ok:
case -pcmk_err_old_data:
case -pcmk_err_diff_resync:
case -pcmk_err_diff_failed:
level = LOG_DEBUG_2;
break;
default:
level = LOG_ERR;
}
} else if (safe_str_eq(op, CIB_OP_QUERY)) {
level = LOG_DEBUG_2;
} else if (rc != pcmk_ok) {
cib_num_fail++;
level = LOG_WARNING;
} else if (safe_str_eq(op, CIB_OP_SLAVE)) {
level = LOG_DEBUG_2;
} else if (safe_str_eq(section, XML_CIB_TAG_STATUS)) {
level = LOG_DEBUG_2;
}
do_crm_log_unlikely(level,
"Operation complete: op %s for section %s (origin=%s/%s/%s, version=%s.%s.%s): %s (rc=%d)",
op, section ? section : "'all'", originator ? originator : "local",
crm_element_value(request, F_CIB_CLIENTNAME), crm_element_value(request,
F_CIB_CALLID),
the_cib ? crm_element_value(the_cib, XML_ATTR_GENERATION_ADMIN) : "0",
the_cib ? crm_element_value(the_cib, XML_ATTR_GENERATION) : "0",
the_cib ? crm_element_value(the_cib, XML_ATTR_NUMUPDATES) : "0",
pcmk_strerror(rc), rc);
if (op_reply == NULL && (needs_reply || local_notify)) {
crm_err("Unexpected NULL reply to message");
crm_log_xml_err(request, "null reply");
needs_reply = FALSE;
local_notify = FALSE;
}
}
crm_trace("processing response cases %.16x %.16x", call_options, cib_sync_call);
/* from now on we are the server */
if (needs_reply == FALSE || stand_alone) {
/* nothing more to do...
* this was a non-originating slave update
*/
crm_trace("Completed slave update");
} else if (rc == pcmk_ok && result_diff != NULL && !(call_options & cib_inhibit_bcast)) {
gboolean broadcast = FALSE;
cib_local_bcast_num++;
crm_xml_add_int(request, F_CIB_LOCAL_NOTIFY_ID, cib_local_bcast_num);
broadcast = send_peer_reply(request, result_diff, originator, TRUE);
if (broadcast &&
client_id &&
local_notify &&
op_reply) {
/* If we have been asked to sync the reply,
* and a bcast msg has gone out, we queue the local notify
* until we know the bcast message has been received */
local_notify = FALSE;
queue_local_notify(op_reply, client_id, (call_options & cib_sync_call), from_peer);
op_reply = NULL; /* the reply is queued, so don't free here */
}
} else if (call_options & cib_discard_reply) {
crm_trace("Caller isn't interested in reply");
} else if (from_peer) {
if (is_update == FALSE || result_diff == NULL) {
crm_trace("Request not broadcast: R/O call");
} else if (call_options & cib_inhibit_bcast) {
crm_trace("Request not broadcast: inhibited");
} else if (rc != pcmk_ok) {
crm_trace("Request not broadcast: call failed: %s", pcmk_strerror(rc));
} else {
crm_trace("Directing reply to %s", originator);
}
send_peer_reply(op_reply, result_diff, originator, FALSE);
}
if (local_notify && client_id) {
if (process == FALSE) {
do_local_notify(request, client_id, call_options & cib_sync_call, from_peer);
} else {
do_local_notify(op_reply, client_id, call_options & cib_sync_call, from_peer);
}
}
free_xml(op_reply);
free_xml(result_diff);
return;
}
xmlNode *
cib_construct_reply(xmlNode * request, xmlNode * output, int rc)
{
int lpc = 0;
xmlNode *reply = NULL;
const char *name = NULL;
const char *value = NULL;
const char *names[] = {
F_CIB_OPERATION,
F_CIB_CALLID,
F_CIB_CLIENTID,
F_CIB_CALLOPTS
};
static int max = DIMOF(names);
crm_trace("Creating a basic reply");
reply = create_xml_node(NULL, "cib-reply");
crm_xml_add(reply, F_TYPE, T_CIB);
for (lpc = 0; lpc < max; lpc++) {
name = names[lpc];
value = crm_element_value(request, name);
crm_xml_add(reply, name, value);
}
crm_xml_add_int(reply, F_CIB_RC, rc);
if (output != NULL) {
crm_trace("Attaching reply output");
add_message_xml(reply, F_CIB_CALLDATA, output);
}
return reply;
}
int
cib_process_command(xmlNode * request, xmlNode ** reply, xmlNode ** cib_diff, gboolean privileged)
{
xmlNode *input = NULL;
xmlNode *output = NULL;
xmlNode *result_cib = NULL;
xmlNode *current_cib = NULL;
#if ENABLE_ACL
xmlNode *filtered_current_cib = NULL;
#endif
int call_type = 0;
int call_options = 0;
int log_level = LOG_DEBUG_4;
const char *op = NULL;
const char *section = NULL;
int rc = pcmk_ok;
int rc2 = pcmk_ok;
gboolean send_r_notify = FALSE;
gboolean global_update = FALSE;
gboolean config_changed = FALSE;
gboolean manage_counters = TRUE;
CRM_ASSERT(cib_status == pcmk_ok);
*reply = NULL;
*cib_diff = NULL;
current_cib = the_cib;
/* Start processing the request... */
op = crm_element_value(request, F_CIB_OPERATION);
crm_element_value_int(request, F_CIB_CALLOPTS, &call_options);
rc = cib_get_operation_id(op, &call_type);
if (rc == pcmk_ok && privileged == FALSE) {
rc = cib_op_can_run(call_type, call_options, privileged, global_update);
}
rc2 = cib_op_prepare(call_type, request, &input, &section);
if (rc == pcmk_ok) {
rc = rc2;
}
if (rc != pcmk_ok) {
crm_trace("Call setup failed: %s", pcmk_strerror(rc));
goto done;
} else if (cib_op_modifies(call_type) == FALSE) {
#if ENABLE_ACL
if (acl_enabled(config_hash) == FALSE
|| acl_filter_cib(request, current_cib, current_cib, &filtered_current_cib) == FALSE) {
rc = cib_perform_op(op, call_options, cib_op_func(call_type), TRUE,
section, request, input, FALSE, &config_changed,
current_cib, &result_cib, NULL, &output);
} else if (filtered_current_cib == NULL) {
crm_debug("Pre-filtered the entire cib");
rc = -EACCES;
} else {
crm_debug("Pre-filtered the queried cib according to the ACLs");
rc = cib_perform_op(op, call_options, cib_op_func(call_type), TRUE,
section, request, input, FALSE, &config_changed,
filtered_current_cib, &result_cib, NULL, &output);
}
#else
rc = cib_perform_op(op, call_options, cib_op_func(call_type), TRUE,
section, request, input, FALSE, &config_changed,
current_cib, &result_cib, NULL, &output);
#endif
CRM_CHECK(result_cib == NULL, free_xml(result_cib));
goto done;
}
/* Handle a valid write action */
global_update = crm_is_true(crm_element_value(request, F_CIB_GLOBAL_UPDATE));
if (global_update) {
manage_counters = FALSE;
call_options |= cib_force_diff;
CRM_CHECK(call_type == 3 || call_type == 4, crm_err("Call type: %d", call_type);
crm_log_xml_err(request, "bad op"));
}
#ifdef SUPPORT_PRENOTIFY
if ((call_options & cib_inhibit_notify) == 0) {
cib_pre_notify(call_options, op, the_cib, input);
}
#endif
if (rc == pcmk_ok) {
if (call_options & cib_inhibit_bcast) {
/* skip */
crm_trace("Skipping update: inhibit broadcast");
manage_counters = FALSE;
}
rc = cib_perform_op(op, call_options, cib_op_func(call_type), FALSE,
section, request, input, manage_counters, &config_changed,
current_cib, &result_cib, cib_diff, &output);
#if ENABLE_ACL
if (acl_enabled(config_hash) == TRUE
&& acl_check_diff(request, current_cib, result_cib, *cib_diff) == FALSE) {
rc = -EACCES;
}
#endif
if (rc == pcmk_ok && config_changed) {
time_t now;
char *now_str = NULL;
const char *validation = crm_element_value(result_cib, XML_ATTR_VALIDATION);
if (validation) {
int current_version = get_schema_version(validation);
int support_version = get_schema_version("pacemaker-1.1");
/* Once the later schemas support the "update-*" attributes, change "==" to ">=" -- Changed */
if (current_version >= support_version) {
const char *origin = crm_element_value(request, F_ORIG);
crm_xml_replace(result_cib, XML_ATTR_UPDATE_ORIG,
origin ? origin : cib_our_uname);
crm_xml_replace(result_cib, XML_ATTR_UPDATE_CLIENT,
crm_element_value(request, F_CIB_CLIENTNAME));
#if ENABLE_ACL
crm_xml_replace(result_cib, XML_ATTR_UPDATE_USER,
crm_element_value(request, F_CIB_USER));
#endif
}
}
now = time(NULL);
now_str = ctime(&now);
now_str[24] = EOS; /* replace the newline */
crm_xml_replace(result_cib, XML_CIB_ATTR_WRITTEN, now_str);
}
if (manage_counters == FALSE) {
config_changed = cib_config_changed(current_cib, result_cib, cib_diff);
}
/* Always write to disk for replace ops,
* this negates the need to detect ordering changes
*/
if (config_changed == FALSE && crm_str_eq(CIB_OP_REPLACE, op, TRUE)) {
config_changed = TRUE;
}
}
if (rc == pcmk_ok && (call_options & cib_dryrun) == 0) {
rc = activateCibXml(result_cib, config_changed, op);
if (rc == pcmk_ok && cib_internal_config_changed(*cib_diff)) {
cib_read_config(config_hash, result_cib);
}
if (crm_str_eq(CIB_OP_REPLACE, op, TRUE)) {
if (section == NULL) {
send_r_notify = TRUE;
} else if (safe_str_eq(section, XML_TAG_CIB)) {
send_r_notify = TRUE;
} else if (safe_str_eq(section, XML_CIB_TAG_NODES)) {
send_r_notify = TRUE;
} else if (safe_str_eq(section, XML_CIB_TAG_STATUS)) {
send_r_notify = TRUE;
}
} else if (crm_str_eq(CIB_OP_ERASE, op, TRUE)) {
send_r_notify = TRUE;
}
} else if (rc == -pcmk_err_dtd_validation) {
if (output != NULL) {
crm_log_xml_info(output, "cib:output");
free_xml(output);
}
#if ENABLE_ACL
{
xmlNode *filtered_result_cib = NULL;
if (acl_enabled(config_hash) == FALSE
|| acl_filter_cib(request, current_cib, result_cib,
&filtered_result_cib) == FALSE) {
output = result_cib;
} else {
crm_debug("Filtered the result cib for output according to the ACLs");
output = filtered_result_cib;
if (result_cib != NULL) {
free_xml(result_cib);
}
}
}
#else
output = result_cib;
#endif
} else {
free_xml(result_cib);
}
if ((call_options & cib_inhibit_notify) == 0) {
const char *call_id = crm_element_value(request, F_CIB_CALLID);
const char *client = crm_element_value(request, F_CIB_CLIENTNAME);
#ifdef SUPPORT_POSTNOTIFY
cib_post_notify(call_options, op, input, rc, the_cib);
#endif
cib_diff_notify(call_options, client, call_id, op, input, rc, *cib_diff);
}
if (send_r_notify) {
const char *origin = crm_element_value(request, F_ORIG);
cib_replace_notify(origin, the_cib, rc, *cib_diff);
}
if (rc != pcmk_ok) {
log_level = LOG_DEBUG_4;
if (rc == -pcmk_err_dtd_validation && global_update) {
log_level = LOG_WARNING;
crm_log_xml_info(input, "cib:global_update");
}
} else if (config_changed) {
log_level = LOG_DEBUG_3;
if (cib_is_master) {
log_level = LOG_NOTICE;
}
} else if (cib_is_master) {
log_level = LOG_DEBUG_2;
}
log_cib_diff(log_level, *cib_diff, "cib:diff");
done:
if ((call_options & cib_discard_reply) == 0) {
*reply = cib_construct_reply(request, output, rc);
crm_log_xml_trace(*reply, "cib:reply");
}
#if ENABLE_ACL
if (filtered_current_cib != NULL) {
free_xml(filtered_current_cib);
}
#endif
if (call_type >= 0) {
cib_op_cleanup(call_type, call_options, &input, &output);
}
return rc;
}
gint
cib_GCompareFunc(gconstpointer a, gconstpointer b)
{
const xmlNode *a_msg = a;
const xmlNode *b_msg = b;
int msg_a_id = 0;
int msg_b_id = 0;
const char *value = NULL;
value = crm_element_value_const(a_msg, F_CIB_CALLID);
msg_a_id = crm_parse_int(value, NULL);
value = crm_element_value_const(b_msg, F_CIB_CALLID);
msg_b_id = crm_parse_int(value, NULL);
if (msg_a_id == msg_b_id) {
return 0;
} else if (msg_a_id < msg_b_id) {
return -1;
}
return 1;
}
#if SUPPORT_HEARTBEAT
void
cib_ha_peer_callback(HA_Message * msg, void *private_data)
{
xmlNode *xml = convert_ha_message(NULL, msg, __FUNCTION__);
cib_peer_callback(xml, private_data);
free_xml(xml);
}
#endif
void
cib_peer_callback(xmlNode * msg, void *private_data)
{
const char *reason = NULL;
const char *originator = crm_element_value(msg, F_ORIG);
if (originator == NULL || crm_str_eq(originator, cib_our_uname, TRUE)) {
/* message is from ourselves */
int bcast_id = 0;
if (!(crm_element_value_int(msg, F_CIB_LOCAL_NOTIFY_ID, &bcast_id))) {
check_local_notify(bcast_id);
}
return;
} else if (crm_peer_cache == NULL) {
reason = "membership not established";
goto bail;
}
if (crm_element_value(msg, F_CIB_CLIENTNAME) == NULL) {
crm_xml_add(msg, F_CIB_CLIENTNAME, originator);
}
/* crm_log_xml_trace("Peer[inbound]", msg); */
cib_process_request(msg, FALSE, TRUE, TRUE, NULL);
return;
bail:
if (reason) {
const char *seq = crm_element_value(msg, F_SEQ);
const char *op = crm_element_value(msg, F_CIB_OPERATION);
crm_warn("Discarding %s message (%s) from %s: %s", op, seq, originator, reason);
}
}
#if SUPPORT_HEARTBEAT
extern oc_ev_t *cib_ev_token;
static void *ccm_library = NULL;
int (*ccm_api_callback_done) (void *cookie) = NULL;
int (*ccm_api_handle_event) (const oc_ev_t * token) = NULL;
void
cib_client_status_callback(const char *node, const char *client, const char *status, void *private)
{
crm_node_t *peer = NULL;
if (safe_str_eq(client, CRM_SYSTEM_CIB)) {
crm_info("Status update: Client %s/%s now has status [%s]", node, client, status);
if (safe_str_eq(status, JOINSTATUS)) {
status = ONLINESTATUS;
} else if (safe_str_eq(status, LEAVESTATUS)) {
status = OFFLINESTATUS;
}
peer = crm_get_peer(0, node);
crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cib, status);
}
return;
}
int
cib_ccm_dispatch(gpointer user_data)
{
int rc = 0;
oc_ev_t *ccm_token = (oc_ev_t *) user_data;
crm_trace("received callback");
if (ccm_api_handle_event == NULL) {
ccm_api_handle_event =
find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_handle_event", 1);
}
rc = (*ccm_api_handle_event) (ccm_token);
if (0 == rc) {
return 0;
}
crm_err("CCM connection appears to have failed: rc=%d.", rc);
/* eventually it might be nice to recover and reconnect... but until then... */
crm_err("Exiting to recover from CCM connection failure");
exit(2);
return -1;
}
int current_instance = 0;
void
cib_ccm_msg_callback(oc_ed_t event, void *cookie, size_t size, const void *data)
{
gboolean update_id = FALSE;
const oc_ev_membership_t *membership = data;
CRM_ASSERT(membership != NULL);
crm_info("Processing CCM event=%s (id=%d)", ccm_event_name(event), membership->m_instance);
if (current_instance > membership->m_instance) {
crm_err("Membership instance ID went backwards! %d->%d",
current_instance, membership->m_instance);
CRM_ASSERT(current_instance <= membership->m_instance);
}
switch (event) {
case OC_EV_MS_NEW_MEMBERSHIP:
case OC_EV_MS_INVALID:
update_id = TRUE;
break;
case OC_EV_MS_PRIMARY_RESTORED:
update_id = TRUE;
break;
case OC_EV_MS_NOT_PRIMARY:
crm_trace("Ignoring transitional CCM event: %s", ccm_event_name(event));
break;
case OC_EV_MS_EVICTED:
crm_err("Evicted from CCM: %s", ccm_event_name(event));
break;
default:
crm_err("Unknown CCM event: %d", event);
}
if (update_id) {
unsigned int lpc = 0;
CRM_CHECK(membership != NULL, return);
current_instance = membership->m_instance;
for (lpc = 0; lpc < membership->m_n_out; lpc++) {
crm_update_ccm_node(membership, lpc + membership->m_out_idx, CRM_NODE_LOST,
current_instance);
}
for (lpc = 0; lpc < membership->m_n_member; lpc++) {
crm_update_ccm_node(membership, lpc + membership->m_memb_idx, CRM_NODE_ACTIVE,
current_instance);
}
}
if (ccm_api_callback_done == NULL) {
ccm_api_callback_done =
find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_callback_done", 1);
}
(*ccm_api_callback_done) (cookie);
return;
}
#endif
gboolean
can_write(int flags)
{
return TRUE;
}
static gboolean
cib_force_exit(gpointer data)
{
crm_notice("Forcing exit!");
terminate_cib(__FUNCTION__, TRUE);
return FALSE;
}
static void
disconnect_remote_client(gpointer key, gpointer value, gpointer user_data)
{
cib_client_t *a_client = value;
crm_err("Disconnecting %s... Not implemented", crm_str(a_client->name));
}
void
cib_shutdown(int nsig)
{
struct qb_ipcs_stats srv_stats;
if (cib_shutdown_flag == FALSE) {
int disconnects = 0;
qb_ipcs_connection_t *c = NULL;
cib_shutdown_flag = TRUE;
c = qb_ipcs_connection_first_get(ipcs_rw);
while(c != NULL) {
qb_ipcs_connection_t *last = c;
c = qb_ipcs_connection_next_get(ipcs_rw, last);
crm_debug("Disconnecting r/w client %p...", last);
qb_ipcs_disconnect(last);
qb_ipcs_connection_unref(last);
disconnects++;
}
c = qb_ipcs_connection_first_get(ipcs_ro);
while(c != NULL) {
qb_ipcs_connection_t *last = c;
c = qb_ipcs_connection_next_get(ipcs_ro, last);
crm_debug("Disconnecting r/o client %p...", last);
qb_ipcs_disconnect(last);
qb_ipcs_connection_unref(last);
disconnects++;
}
c = qb_ipcs_connection_first_get(ipcs_shm);
while(c != NULL) {
qb_ipcs_connection_t *last = c;
c = qb_ipcs_connection_next_get(ipcs_shm, last);
crm_debug("Disconnecting non-blocking r/w client %p...", last);
qb_ipcs_disconnect(last);
qb_ipcs_connection_unref(last);
disconnects++;
}
disconnects += g_hash_table_size(client_list);
crm_debug("Disconnecting %d remote clients", g_hash_table_size(client_list));
g_hash_table_foreach(client_list, disconnect_remote_client, NULL);
crm_info("Disconnected %d clients", disconnects);
}
qb_ipcs_stats_get(ipcs_rw, &srv_stats, QB_FALSE);
if(g_hash_table_size(client_list) == 0) {
crm_info("All clients disconnected (%d)", srv_stats.active_connections);
initiate_exit();
} else {
crm_info("Waiting on %d clients to disconnect (%d)", g_hash_table_size(client_list), srv_stats.active_connections);
}
}
void
initiate_exit(void)
{
int active = 0;
xmlNode *leaving = NULL;
active = crm_active_peers();
if (active < 2) {
terminate_cib(__FUNCTION__, FALSE);
return;
}
crm_info("Sending disconnect notification to %d peers...", active);
leaving = create_xml_node(NULL, "exit-notification");
crm_xml_add(leaving, F_TYPE, "cib");
crm_xml_add(leaving, F_CIB_OPERATION, "cib_shutdown_req");
send_cluster_message(NULL, crm_msg_cib, leaving, TRUE);
free_xml(leaving);
g_timeout_add(crm_get_msec("5s"), cib_force_exit, NULL);
}
extern int remote_fd;
extern int remote_tls_fd;
extern void terminate_cs_connection(void);
void
terminate_cib(const char *caller, gboolean fast)
{
if (remote_fd > 0) {
close(remote_fd);
remote_fd = 0;
}
if (remote_tls_fd > 0) {
close(remote_tls_fd);
remote_tls_fd = 0;
}
if(!fast) {
if(is_heartbeat_cluster()) {
#if SUPPORT_HEARTBEAT
if (hb_conn != NULL) {
crm_info("%s: Disconnecting heartbeat", caller);
hb_conn->llc_ops->signoff(hb_conn, FALSE);
hb_conn = NULL;
} else {
crm_err("%s: No heartbeat connection", caller);
}
#endif
} else {
#if SUPPORT_COROSYNC
crm_info("%s: Disconnecting corosync", caller);
terminate_cs_connection();
#endif
}
}
uninitializeCib();
crm_info("%s: Exiting%s...", caller, fast?" fast":mainloop?" from mainloop":"");
if(fast == FALSE && mainloop != NULL && g_main_is_running(mainloop)) {
g_main_quit(mainloop);
} else {
qb_ipcs_destroy(ipcs_ro);
qb_ipcs_destroy(ipcs_rw);
qb_ipcs_destroy(ipcs_shm);
qb_log_fini();
if (fast) {
exit(EX_USAGE);
} else {
exit(EX_OK);
}
}
}
diff --git a/cib/messages.c b/cib/messages.c
index 57524613da..4b1157118c 100644
--- a/cib/messages.c
+++ b/cib/messages.c
@@ -1,466 +1,466 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <time.h>
#include <sys/param.h>
#include <sys/types.h>
#include <crm/crm.h>
#include <crm/cib/internal.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/cluster/internal.h>
#include <cibio.h>
#include <cibmessages.h>
#include <callbacks.h>
#define MAX_DIFF_RETRY 5
#ifdef CIBPIPE
gboolean cib_is_master = TRUE;
#else
gboolean cib_is_master = FALSE;
#endif
xmlNode *the_cib = NULL;
gboolean syncd_once = FALSE;
extern const char *cib_our_uname;
int revision_check(xmlNode * cib_update, xmlNode * cib_copy, int flags);
int get_revision(xmlNode * xml_obj, int cur_revision);
int updateList(xmlNode * local_cib, xmlNode * update_command, xmlNode * failed,
int operation, const char *section);
gboolean check_generation(xmlNode * newCib, xmlNode * oldCib);
gboolean update_results(xmlNode * failed, xmlNode * target, const char *operation, int return_code);
int cib_update_counter(xmlNode * xml_obj, const char *field, gboolean reset);
int sync_our_cib(xmlNode * request, gboolean all);
extern xmlNode *cib_msg_copy(const xmlNode * msg, gboolean with_data);
extern gboolean cib_shutdown_flag;
int
cib_process_shutdown_req(const char *op, int options, const char *section, xmlNode * req,
xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib,
xmlNode ** answer)
{
#ifdef CIBPIPE
return -EINVAL;
#else
int result = pcmk_ok;
const char *host = crm_element_value(req, F_ORIG);
*answer = NULL;
if (crm_element_value(req, F_CIB_ISREPLY) == NULL) {
crm_info("Shutdown REQ from %s", host);
return pcmk_ok;
} else if (cib_shutdown_flag) {
crm_info("Shutdown ACK from %s", host);
terminate_cib(__FUNCTION__, FALSE);
return pcmk_ok;
} else {
crm_err("Shutdown ACK from %s - not shutting down", host);
result = -EINVAL;
}
return result;
#endif
}
int
cib_process_default(const char *op, int options, const char *section, xmlNode * req,
xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib,
xmlNode ** answer)
{
int result = pcmk_ok;
crm_trace("Processing \"%s\" event", op);
*answer = NULL;
if (op == NULL) {
result = -EINVAL;
crm_err("No operation specified");
} else if (strcasecmp(CRM_OP_NOOP, op) == 0) {
;
} else {
result = -EPROTONOSUPPORT;
crm_err("Action [%s] is not supported by the CIB", op);
}
return result;
}
int
cib_process_quit(const char *op, int options, const char *section, xmlNode * req, xmlNode * input,
xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer)
{
int result = pcmk_ok;
crm_trace("Processing \"%s\" event", op);
crm_warn("The CRMd has asked us to exit... complying");
exit(0);
return result;
}
int
cib_process_readwrite(const char *op, int options, const char *section, xmlNode * req,
xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib,
xmlNode ** answer)
{
#ifdef CIBPIPE
return -EINVAL;
#else
int result = pcmk_ok;
crm_trace("Processing \"%s\" event", op);
if (safe_str_eq(op, CIB_OP_ISMASTER)) {
if (cib_is_master == TRUE) {
result = pcmk_ok;
} else {
result = -EPERM;
}
return result;
}
if (safe_str_eq(op, CIB_OP_MASTER)) {
if (cib_is_master == FALSE) {
crm_info("We are now in R/W mode");
cib_is_master = TRUE;
syncd_once = TRUE;
} else {
crm_debug("We are still in R/W mode");
}
} else if (cib_is_master) {
crm_info("We are now in R/O mode");
cib_is_master = FALSE;
}
return result;
#endif
}
int
cib_process_ping(const char *op, int options, const char *section, xmlNode * req, xmlNode * input,
xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer)
{
#ifdef CIBPIPE
return -EINVAL;
#else
int result = pcmk_ok;
crm_trace("Processing \"%s\" event", op);
*answer = create_xml_node(NULL, XML_CRM_TAG_PING);
crm_xml_add(*answer, XML_PING_ATTR_STATUS, "ok");
crm_xml_add(*answer, XML_PING_ATTR_SYSFROM, CRM_SYSTEM_CIB);
return result;
#endif
}
int
cib_process_sync(const char *op, int options, const char *section, xmlNode * req, xmlNode * input,
xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer)
{
#ifdef CIBPIPE
return -EINVAL;
#else
return sync_our_cib(req, TRUE);
#endif
}
int
cib_process_sync_one(const char *op, int options, const char *section, xmlNode * req,
xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib,
xmlNode ** answer)
{
#ifdef CIBPIPE
return -EINVAL;
#else
return sync_our_cib(req, FALSE);
#endif
}
int sync_in_progress = 0;
int
cib_server_process_diff(const char *op, int options, const char *section, xmlNode * req,
xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib,
xmlNode ** answer)
{
int rc = pcmk_ok;
if (cib_is_master) {
/* the master is never waiting for a resync */
sync_in_progress = 0;
}
if (sync_in_progress > MAX_DIFF_RETRY) {
/* request another full-sync,
* the last request may have been lost
*/
sync_in_progress = 0;
}
if (sync_in_progress) {
int diff_add_updates = 0;
int diff_add_epoch = 0;
int diff_add_admin_epoch = 0;
int diff_del_updates = 0;
int diff_del_epoch = 0;
int diff_del_admin_epoch = 0;
cib_diff_version_details(input,
&diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates,
&diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates);
sync_in_progress++;
crm_notice("Not applying diff %d.%d.%d -> %d.%d.%d (sync in progress)",
diff_del_admin_epoch, diff_del_epoch, diff_del_updates,
diff_add_admin_epoch, diff_add_epoch, diff_add_updates);
return -pcmk_err_diff_resync;
}
rc = cib_process_diff(op, options, section, req, input, existing_cib, result_cib, answer);
if (rc == -pcmk_err_diff_resync && cib_is_master == FALSE) {
xmlNode *sync_me = create_xml_node(NULL, "sync-me");
free_xml(*result_cib);
*result_cib = NULL;
crm_info("Requesting re-sync from peer");
sync_in_progress++;
crm_xml_add(sync_me, F_TYPE, "cib");
crm_xml_add(sync_me, F_CIB_OPERATION, CIB_OP_SYNC_ONE);
crm_xml_add(sync_me, F_CIB_DELEGATED, cib_our_uname);
if (send_cluster_message(NULL, crm_msg_cib, sync_me, FALSE) == FALSE) {
rc = -ENOTCONN;
}
free_xml(sync_me);
} else if (rc == -pcmk_err_diff_resync) {
rc = -pcmk_err_diff_failed;
if (options & cib_force_diff) {
crm_warn("Not requesting full refresh in R/W mode");
}
}
return rc;
}
int
cib_process_replace_svr(const char *op, int options, const char *section, xmlNode * req,
xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib,
xmlNode ** answer)
{
const char *tag = crm_element_name(input);
int rc =
cib_process_replace(op, options, section, req, input, existing_cib, result_cib, answer);
if (rc == pcmk_ok && safe_str_eq(tag, XML_TAG_CIB)) {
sync_in_progress = 0;
}
return rc;
}
static int
delete_cib_object(xmlNode * parent, xmlNode * delete_spec)
{
const char *object_name = NULL;
const char *object_id = NULL;
xmlNode *equiv_node = NULL;
int result = pcmk_ok;
if (delete_spec != NULL) {
object_name = crm_element_name(delete_spec);
}
object_id = crm_element_value(delete_spec, XML_ATTR_ID);
crm_trace("Processing: <%s id=%s>", crm_str(object_name), crm_str(object_id));
if (delete_spec == NULL) {
result = -EINVAL;
} else if (parent == NULL) {
result = -EINVAL;
} else if (object_id == NULL) {
/* placeholder object */
equiv_node = find_xml_node(parent, object_name, FALSE);
} else {
equiv_node = find_entity(parent, object_name, object_id);
}
if (result != pcmk_ok) {
; /* nothing */
} else if (equiv_node == NULL) {
result = pcmk_ok;
} else if (xml_has_children(delete_spec) == FALSE) {
/* only leaves are deleted */
crm_debug("Removing leaf: <%s id=%s>", crm_str(object_name), crm_str(object_id));
free_xml(equiv_node);
equiv_node = NULL;
} else {
xmlNode *child = NULL;
for (child = __xml_first_child(delete_spec); child != NULL; child = __xml_next(child)) {
int tmp_result = delete_cib_object(equiv_node, child);
/* only the first error is likely to be interesting */
if (tmp_result != pcmk_ok && result == pcmk_ok) {
result = tmp_result;
}
}
}
return result;
}
int
cib_process_delete_absolute(const char *op, int options, const char *section, xmlNode * req,
xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib,
xmlNode ** answer)
{
xmlNode *failed = NULL;
int result = pcmk_ok;
xmlNode *update_section = NULL;
crm_trace("Processing \"%s\" event for section=%s", op, crm_str(section));
if (safe_str_eq(XML_CIB_TAG_SECTION_ALL, section)) {
section = NULL;
} else if (safe_str_eq(XML_TAG_CIB, section)) {
section = NULL;
} else if (safe_str_eq(crm_element_name(input), XML_TAG_CIB)) {
section = NULL;
}
CRM_CHECK(strcasecmp(CIB_OP_DELETE, op) == 0, return -EINVAL);
if (input == NULL) {
crm_err("Cannot perform modification with no data");
return -EINVAL;
}
failed = create_xml_node(NULL, XML_TAG_FAILED);
update_section = get_object_root(section, *result_cib);
result = delete_cib_object(update_section, input);
update_results(failed, input, op, result);
if (xml_has_children(failed)) {
CRM_CHECK(result != pcmk_ok, result = -EINVAL);
}
if (result != pcmk_ok) {
crm_log_xml_err(failed, "CIB Update failures");
*answer = failed;
} else {
free_xml(failed);
}
return result;
}
gboolean
check_generation(xmlNode * newCib, xmlNode * oldCib)
{
if (cib_compare_generation(newCib, oldCib) >= 0) {
return TRUE;
}
crm_warn("Generation from update is older than the existing one");
return FALSE;
}
#ifndef CIBPIPE
int
sync_our_cib(xmlNode * request, gboolean all)
{
int result = pcmk_ok;
char *digest = NULL;
const char *host = crm_element_value(request, F_ORIG);
const char *op = crm_element_value(request, F_CIB_OPERATION);
xmlNode *replace_request = cib_msg_copy(request, FALSE);
CRM_CHECK(the_cib != NULL,;);
CRM_CHECK(replace_request != NULL,;);
crm_debug("Syncing CIB to %s", all ? "all peers" : host);
if (all == FALSE && host == NULL) {
crm_log_xml_err(request, "bad sync");
}
/* remove the "all == FALSE" condition
*
* sync_from was failing, the local client wasnt being notified
* because it didnt know it was a reply
* setting this does not prevent the other nodes from applying it
* if all == TRUE
*/
if (host != NULL) {
crm_xml_add(replace_request, F_CIB_ISREPLY, host);
}
crm_xml_add(replace_request, F_CIB_OPERATION, CIB_OP_REPLACE);
crm_xml_add(replace_request, "original_" F_CIB_OPERATION, op);
crm_xml_add(replace_request, F_CIB_GLOBAL_UPDATE, XML_BOOLEAN_TRUE);
crm_xml_add(replace_request, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET);
digest = calculate_xml_versioned_digest(the_cib, FALSE, TRUE, CRM_FEATURE_SET);
crm_xml_add(replace_request, XML_ATTR_DIGEST, digest);
add_message_xml(replace_request, F_CIB_CALLDATA, the_cib);
- if (send_cluster_message(all ? NULL : host, crm_msg_cib, replace_request, FALSE) == FALSE) {
+ if (send_cluster_message(all ? NULL : crm_get_peer(0, host), crm_msg_cib, replace_request, FALSE) == FALSE) {
result = -ENOTCONN;
}
free_xml(replace_request);
free(digest);
return result;
}
#endif
diff --git a/crmd/election.c b/crmd/election.c
index 3ab9b31df8..6c874a90fb 100644
--- a/crmd/election.c
+++ b/crmd/election.c
@@ -1,550 +1,550 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/cluster/internal.h>
#include <crm/crm.h>
#include <crmd_fsa.h>
#include <crmd_messages.h>
#include <crmd_callbacks.h>
#include <tengine.h>
GHashTable *voted = NULL;
uint highest_born_on = -1;
static int current_election_id = 1;
static int
crm_uptime(struct timeval *output)
{
struct rusage info;
int rc = getrusage(RUSAGE_SELF, &info);
output->tv_sec = 0;
output->tv_usec = 0;
if (rc < 0) {
crm_perror(LOG_ERR, "Could not calculate the current uptime");
return -1;
}
output->tv_sec = info.ru_utime.tv_sec;
output->tv_usec = info.ru_utime.tv_usec;
crm_debug("Current CPU usage is: %lds, %ldus", (long)info.ru_utime.tv_sec,
(long)info.ru_utime.tv_usec);
return 1;
}
static int
crm_compare_age(struct timeval your_age)
{
struct timeval our_age;
if (crm_uptime(&our_age) < 0) {
return -1;
}
/* We want these times to be "significantly" different */
if (our_age.tv_sec > your_age.tv_sec) {
crm_debug("Win: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec);
return 1;
} else if (our_age.tv_sec < your_age.tv_sec) {
crm_debug("Loose: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec);
return -1;
} else if (our_age.tv_usec > your_age.tv_usec) {
crm_debug("Win: %ld vs %ld (usec)", (long)our_age.tv_usec, (long)your_age.tv_usec);
return 1;
} else if (our_age.tv_usec < your_age.tv_usec) {
crm_debug("Loose: %ld vs %ld (usec)", (long)our_age.tv_usec, (long)your_age.tv_usec);
return -1;
}
return 0;
}
/* A_ELECTION_VOTE */
void
do_election_vote(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
struct timeval age;
xmlNode *vote = NULL;
gboolean not_voting = FALSE;
/* don't vote if we're in one of these states or wanting to shut down */
switch (cur_state) {
case S_STARTING:
case S_RECOVERY:
case S_STOPPING:
case S_TERMINATE:
crm_warn("Not voting in election, we're in state %s", fsa_state2string(cur_state));
not_voting = TRUE;
break;
default:
break;
}
if (not_voting == FALSE) {
if (is_set(fsa_input_register, R_STARTING)) {
not_voting = TRUE;
}
}
if (not_voting) {
if (AM_I_DC) {
register_fsa_input(C_FSA_INTERNAL, I_RELEASE_DC, NULL);
} else {
register_fsa_input(C_FSA_INTERNAL, I_PENDING, NULL);
}
return;
}
vote = create_request(CRM_OP_VOTE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
current_election_id++;
crm_xml_add(vote, F_CRM_ELECTION_OWNER, fsa_our_uuid);
crm_xml_add_int(vote, F_CRM_ELECTION_ID, current_election_id);
crm_uptime(&age);
crm_xml_add_int(vote, F_CRM_ELECTION_AGE_S, age.tv_sec);
crm_xml_add_int(vote, F_CRM_ELECTION_AGE_US, age.tv_usec);
send_cluster_message(NULL, crm_msg_crmd, vote, TRUE);
free_xml(vote);
crm_debug("Started election %d", current_election_id);
if (voted) {
g_hash_table_destroy(voted);
}
voted = NULL;
if (cur_state == S_ELECTION || cur_state == S_RELEASE_DC) {
crm_timer_start(election_timeout);
} else if (cur_state != S_INTEGRATION) {
crm_err("Broken? Voting in state %s", fsa_state2string(cur_state));
}
return;
}
char *dc_hb_msg = NULL;
int beat_num = 0;
gboolean
do_dc_heartbeat(gpointer data)
{
return TRUE;
}
struct election_data_s {
const char *winning_uname;
unsigned int winning_bornon;
};
static void
log_member_uname(gpointer key, gpointer value, gpointer user_data)
{
const crm_node_t *node = value;
if (crm_is_peer_active(node)) {
crm_err("%s: %s proc=%.32x", (char *)user_data, (char *)key, node->processes);
}
}
static void
log_node(gpointer key, gpointer value, gpointer user_data)
{
crm_err("%s: %s", (char *)user_data, (char *)key);
}
void
do_election_check(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
int voted_size = 0;
int num_members = crm_active_peers();
if (voted) {
voted_size = g_hash_table_size(voted);
}
/* in the case of #voted > #members, it is better to
* wait for the timeout and give the cluster time to
* stabilize
*/
if (fsa_state != S_ELECTION) {
crm_debug("Ignore election check: we not in an election");
} else if (voted_size >= num_members) {
/* we won and everyone has voted */
crm_timer_stop(election_timeout);
register_fsa_input(C_FSA_INTERNAL, I_ELECTION_DC, NULL);
if (voted_size > num_members) {
char *data = NULL;
data = strdup("member");
g_hash_table_foreach(crm_peer_cache, log_member_uname, data);
free(data);
data = strdup("voted");
g_hash_table_foreach(voted, log_node, data);
free(data);
}
crm_debug("Destroying voted hash");
g_hash_table_destroy(voted);
voted = NULL;
} else {
crm_debug("Still waiting on %d non-votes (%d total)",
num_members - voted_size, num_members);
}
return;
}
#define win_dampen 1 /* in seconds */
#define loss_dampen 2 /* in seconds */
/* A_ELECTION_COUNT */
void
do_election_count_vote(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
struct timeval your_age;
int age;
int election_id = -1;
int log_level = LOG_INFO;
gboolean use_born_on = FALSE;
gboolean done = FALSE;
gboolean we_loose = FALSE;
const char *op = NULL;
const char *vote_from = NULL;
const char *your_version = NULL;
const char *election_owner = NULL;
const char *reason = "unknown";
crm_node_t *our_node = NULL, *your_node = NULL;
ha_msg_input_t *vote = fsa_typed_data(fsa_dt_ha_msg);
static time_t last_election_loss = 0;
/* if the membership copy is NULL we REALLY shouldnt be voting
* the question is how we managed to get here.
*/
CRM_CHECK(msg_data != NULL, return);
CRM_CHECK(crm_peer_cache != NULL, return);
CRM_CHECK(vote != NULL, crm_err("Bogus data from %s", msg_data->origin); return);
CRM_CHECK(vote->msg != NULL, crm_err("Bogus data from %s", msg_data->origin); return);
your_age.tv_sec = 0;
your_age.tv_usec = 0;
op = crm_element_value(vote->msg, F_CRM_TASK);
vote_from = crm_element_value(vote->msg, F_CRM_HOST_FROM);
your_version = crm_element_value(vote->msg, F_CRM_VERSION);
election_owner = crm_element_value(vote->msg, F_CRM_ELECTION_OWNER);
crm_element_value_int(vote->msg, F_CRM_ELECTION_ID, &election_id);
crm_element_value_int(vote->msg, F_CRM_ELECTION_AGE_S, (int *)&(your_age.tv_sec));
crm_element_value_int(vote->msg, F_CRM_ELECTION_AGE_US, (int *)&(your_age.tv_usec));
CRM_CHECK(vote_from != NULL, vote_from = fsa_our_uname);
your_node = crm_get_peer(0, vote_from);
our_node = crm_get_peer(0, fsa_our_uname);
if (voted == NULL) {
crm_debug("Created voted hash");
voted = g_hash_table_new_full(crm_str_hash, g_str_equal,
g_hash_destroy_str, g_hash_destroy_str);
}
if (is_heartbeat_cluster()) {
use_born_on = TRUE;
} else if (is_classic_ais_cluster()) {
use_born_on = TRUE;
}
age = crm_compare_age(your_age);
if (cur_state == S_STARTING) {
reason = "Still starting";
we_loose = TRUE;
} else if (our_node == NULL || crm_is_peer_active(our_node) == FALSE) {
reason = "We are not part of the cluster";
log_level = LOG_ERR;
we_loose = TRUE;
} else if (election_id != current_election_id && crm_str_eq(fsa_our_uuid, election_owner, TRUE)) {
log_level = LOG_DEBUG_2;
reason = "Superceeded";
done = TRUE;
} else if (your_node == NULL || crm_is_peer_active(your_node) == FALSE) {
/* Possibly we cached the message in the FSA queue at a point that it wasn't */
reason = "Peer is not part of our cluster";
log_level = LOG_WARNING;
done = TRUE;
} else if (crm_str_eq(op, CRM_OP_NOVOTE, TRUE)) {
char *op_copy = strdup(op);
char *uname_copy = strdup(vote_from);
CRM_ASSERT(crm_str_eq(fsa_our_uuid, election_owner, TRUE));
/* update the list of nodes that have voted */
g_hash_table_replace(voted, uname_copy, op_copy);
reason = "Recorded";
done = TRUE;
} else if (crm_str_eq(vote_from, fsa_our_uname, TRUE)) {
char *op_copy = strdup(op);
char *uname_copy = strdup(vote_from);
CRM_ASSERT(crm_str_eq(fsa_our_uuid, election_owner, TRUE));
/* update ourselves in the list of nodes that have voted */
g_hash_table_replace(voted, uname_copy, op_copy);
reason = "Recorded";
done = TRUE;
} else if (compare_version(your_version, CRM_FEATURE_SET) < 0) {
reason = "Version";
we_loose = TRUE;
} else if (compare_version(your_version, CRM_FEATURE_SET) > 0) {
reason = "Version";
} else if (age < 0) {
reason = "Uptime";
we_loose = TRUE;
} else if (age > 0) {
reason = "Uptime";
/* TODO: Check for y(our) born < 0 */
} else if (use_born_on && your_node->born < our_node->born) {
reason = "Born";
we_loose = TRUE;
} else if (use_born_on && your_node->born > our_node->born) {
reason = "Born";
} else if (fsa_our_uname == NULL) {
reason = "Unknown host name";
we_loose = TRUE;
} else if (strcasecmp(fsa_our_uname, vote_from) > 0) {
reason = "Host name";
we_loose = TRUE;
} else {
reason = "Host name";
CRM_ASSERT(strcmp(fsa_our_uname, vote_from) != 0);
/* cant happen...
* } else if(strcasecmp(fsa_our_uname, vote_from) == 0) {
*
* default...
* } else { // strcasecmp(fsa_our_uname, vote_from) < 0
* we win
*/
}
if (done) {
do_crm_log(log_level + 1, "Election %d (current: %d, owner: %s): Processed %s from %s (%s)",
election_id, current_election_id, election_owner, op, vote_from, reason);
} else if (we_loose) {
xmlNode *novote = create_request(CRM_OP_NOVOTE, NULL, vote_from,
CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
do_crm_log(log_level, "Election %d (owner: %s) lost: %s from %s (%s)",
election_id, election_owner, op, vote_from, reason);
update_dc(NULL);
crm_timer_stop(election_timeout);
if (fsa_input_register & R_THE_DC) {
crm_trace("Give up the DC to %s", vote_from);
register_fsa_input(C_FSA_INTERNAL, I_RELEASE_DC, NULL);
} else if (cur_state != S_STARTING) {
crm_trace("We werent the DC anyway");
register_fsa_input(C_FSA_INTERNAL, I_PENDING, NULL);
}
crm_xml_add(novote, F_CRM_ELECTION_OWNER, election_owner);
crm_xml_add_int(novote, F_CRM_ELECTION_ID, election_id);
- send_cluster_message(vote_from, crm_msg_crmd, novote, TRUE);
+ send_cluster_message(crm_get_peer(0, vote_from), crm_msg_crmd, novote, TRUE);
free_xml(novote);
fsa_cib_conn->cmds->set_slave(fsa_cib_conn, cib_scope_local);
last_election_loss = time(NULL);
} else {
do_crm_log(log_level, "Election %d (owner: %s) pass: %s from %s (%s)",
election_id, election_owner, op, vote_from, reason);
if (last_election_loss) {
time_t tm_now = time(NULL);
if (tm_now - last_election_loss < (time_t) loss_dampen) {
crm_info("Election %d ignore: We already lost an election less than %ds ago (%s)",
election_id, loss_dampen, ctime(&last_election_loss));
update_dc(NULL);
return;
}
last_election_loss = 0;
}
register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
g_hash_table_destroy(voted);
voted = NULL;
}
}
/* A_ELECT_TIMER_START, A_ELECTION_TIMEOUT */
/* we won */
void
do_election_timer_ctrl(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
}
static void
feature_update_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
if (rc != pcmk_ok) {
fsa_data_t *msg_data = NULL;
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
}
/* A_DC_TAKEOVER */
void
do_dc_takeover(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
int rc = pcmk_ok;
xmlNode *cib = NULL;
GListPtr gIter = NULL;
static const char *cluster_type = NULL;
if (cluster_type == NULL) {
cluster_type = getenv("HA_cluster_type");
}
if (cluster_type == NULL) {
cluster_type = "Heartbeat";
}
crm_info("Taking over DC status for this partition");
set_bit(fsa_input_register, R_THE_DC);
for (gIter = stonith_cleanup_list; gIter != NULL; gIter = gIter->next) {
char *target = gIter->data;
const char *uuid = get_uuid(target);
crm_notice("Marking %s, target of a previous stonith action, as clean", target);
send_stonith_update(NULL, target, uuid);
free(target);
}
g_list_free(stonith_cleanup_list);
stonith_cleanup_list = NULL;
#if SUPPORT_COROSYNC
if (is_classic_ais_cluster()) {
send_ais_text(crm_class_quorum, NULL, TRUE, NULL, crm_msg_ais);
}
#endif
if (voted != NULL) {
crm_trace("Destroying voted hash");
g_hash_table_destroy(voted);
voted = NULL;
}
set_bit(fsa_input_register, R_JOIN_OK);
set_bit(fsa_input_register, R_INVOKE_PE);
fsa_cib_conn->cmds->set_master(fsa_cib_conn, cib_scope_local);
cib = create_xml_node(NULL, XML_TAG_CIB);
crm_xml_add(cib, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET);
fsa_cib_update(XML_TAG_CIB, cib, cib_quorum_override, rc, NULL);
add_cib_op_callback(fsa_cib_conn, rc, FALSE, NULL, feature_update_callback);
update_attr_delegate(
fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL,
"dc-version", VERSION "-" BUILD_VERSION, FALSE, NULL);
update_attr_delegate(
fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL,
"cluster-infrastructure", cluster_type, FALSE, NULL);
mainloop_set_trigger(config_read);
free_xml(cib);
}
/* A_DC_RELEASE */
void
do_dc_release(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
if (action & A_DC_RELEASE) {
crm_debug("Releasing the role of DC");
clear_bit(fsa_input_register, R_THE_DC);
} else if (action & A_DC_RELEASED) {
crm_info("DC role released");
#if 0
if (are there errors) {
/* we cant stay up if not healthy */
/* or perhaps I_ERROR and go to S_RECOVER? */
result = I_SHUTDOWN;
}
#endif
register_fsa_input(C_FSA_INTERNAL, I_RELEASE_SUCCESS, NULL);
} else {
crm_err("Unknown action %s", fsa_action2string(action));
}
crm_trace("Am I still the DC? %s", AM_I_DC ? XML_BOOLEAN_YES : XML_BOOLEAN_NO);
}
diff --git a/crmd/join_client.c b/crmd/join_client.c
index 5872159e33..250fbf6782 100644
--- a/crmd/join_client.c
+++ b/crmd/join_client.c
@@ -1,279 +1,279 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crmd_fsa.h>
#include <crmd_messages.h>
int reannounce_count = 0;
void join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
extern ha_msg_input_t *copy_ha_msg_input(ha_msg_input_t * orig);
/* A_CL_JOIN_QUERY */
/* is there a DC out there? */
void
do_cl_join_query(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
xmlNode *req = create_request(CRM_OP_JOIN_ANNOUNCE, NULL, NULL,
CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);
sleep(1); /* give the CCM time to propogate to the DC */
update_dc(NULL); /* Unset any existing value so that the result is not discarded */
crm_debug("Querying for a DC");
send_cluster_message(NULL, crm_msg_crmd, req, FALSE);
free_xml(req);
}
/* A_CL_JOIN_ANNOUNCE */
/* this is kind of a workaround for the fact that we may not be around
* or are otherwise unable to reply when the DC sends out A_WELCOME_ALL
*/
void
do_cl_join_announce(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
/* Once we hear from the DC, we can stop the timer
*
* This timer was started either on startup or when a node
* left the CCM list
*/
/* dont announce if we're in one of these states */
if (cur_state != S_PENDING) {
crm_warn("Do not announce ourselves in state %s", fsa_state2string(cur_state));
return;
}
if (AM_I_OPERATIONAL) {
/* send as a broadcast */
xmlNode *req = create_request(CRM_OP_JOIN_ANNOUNCE, NULL, NULL,
CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);
crm_debug("Announcing availability");
update_dc(NULL);
send_cluster_message(NULL, crm_msg_crmd, req, FALSE);
free_xml(req);
} else {
/* Delay announce until we have finished local startup */
crm_warn("Delaying announce until local startup is complete");
return;
}
}
static int query_call_id = 0;
/* A_CL_JOIN_REQUEST */
/* aka. accept the welcome offer */
void
do_cl_join_offer_respond(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
const char *welcome_from = crm_element_value(input->msg, F_CRM_HOST_FROM);
const char *join_id = crm_element_value(input->msg, F_CRM_JOIN_ID);
#if 0
if (we are sick) {
log error;
/* save the request for later? */
return;
}
#endif
crm_trace("Accepting join offer: join-%s", crm_element_value(input->msg, F_CRM_JOIN_ID));
/* we only ever want the last one */
if (query_call_id > 0) {
crm_trace("Cancelling previous join query: %d", query_call_id);
remove_cib_op_callback(query_call_id, FALSE);
query_call_id = 0;
}
if (update_dc(input->msg) == FALSE) {
crm_warn("Discarding offer from %s (expected %s)", welcome_from, fsa_our_dc);
return;
}
CRM_LOG_ASSERT(input != NULL);
query_call_id = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local);
add_cib_op_callback(fsa_cib_conn, query_call_id, FALSE, strdup(join_id),
join_query_callback);
crm_trace("Registered join query callback: %d", query_call_id);
register_fsa_action(A_DC_TIMER_STOP);
}
void
join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
xmlNode *local_cib = NULL;
char *join_id = user_data;
xmlNode *generation = create_xml_node(NULL, XML_CIB_TAG_GENERATION_TUPPLE);
CRM_LOG_ASSERT(join_id != NULL);
query_call_id = 0;
if (rc == pcmk_ok) {
local_cib = output;
CRM_LOG_ASSERT(safe_str_eq(crm_element_name(local_cib), XML_TAG_CIB));
}
if (local_cib != NULL) {
xmlNode *reply = NULL;
crm_debug("Respond to join offer join-%s", join_id);
crm_debug("Acknowledging %s as our DC", fsa_our_dc);
copy_in_properties(generation, local_cib);
reply = create_request(CRM_OP_JOIN_REQUEST, generation, fsa_our_dc,
CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);
crm_xml_add(reply, F_CRM_JOIN_ID, join_id);
- send_cluster_message(fsa_our_dc, crm_msg_crmd, reply, TRUE);
+ send_cluster_message(crm_get_peer(0, fsa_our_dc), crm_msg_crmd, reply, TRUE);
free_xml(reply);
} else {
crm_err("Could not retrieve Generation to attach to our"
" join acknowledgement: %s", pcmk_strerror(rc));
register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__);
}
free(join_id);
free_xml(generation);
}
/* A_CL_JOIN_RESULT */
/* aka. this is notification that we have (or have not) been accepted */
void
do_cl_join_finalize_respond(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
xmlNode *tmp1 = NULL;
gboolean was_nack = TRUE;
static gboolean first_join = TRUE;
ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
int join_id = -1;
const char *op = crm_element_value(input->msg, F_CRM_TASK);
const char *ack_nack = crm_element_value(input->msg, CRM_OP_JOIN_ACKNAK);
const char *welcome_from = crm_element_value(input->msg, F_CRM_HOST_FROM);
if (safe_str_neq(op, CRM_OP_JOIN_ACKNAK)) {
crm_trace("Ignoring op=%s message", op);
return;
}
/* calculate if it was an ack or a nack */
if (crm_is_true(ack_nack)) {
was_nack = FALSE;
}
crm_element_value_int(input->msg, F_CRM_JOIN_ID, &join_id);
if (was_nack) {
crm_err("Join (join-%d) with leader %s failed (NACK'd): Shutting down",
join_id, welcome_from);
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
return;
}
if (AM_I_DC == FALSE && safe_str_eq(welcome_from, fsa_our_uname)) {
crm_warn("Discarding our own welcome - we're no longer the DC");
return;
}
if (update_dc(input->msg) == FALSE) {
crm_warn("Discarding %s from %s (expected %s)", op, welcome_from, fsa_our_dc);
return;
}
/* send our status section to the DC */
crm_debug("Confirming join join-%d: %s", join_id, crm_element_value(input->msg, F_CRM_TASK));
tmp1 = do_lrm_query(TRUE);
if (tmp1 != NULL) {
xmlNode *reply = create_request(CRM_OP_JOIN_CONFIRM, tmp1, fsa_our_dc,
CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);
crm_xml_add_int(reply, F_CRM_JOIN_ID, join_id);
crm_debug("join-%d: Join complete."
" Sending local LRM status to %s", join_id, fsa_our_dc);
if (first_join) {
first_join = FALSE;
/*
* Clear any previous transient node attribute and lrm operations
*
* Corosync has a nasty habit of not being able to tell if a
* node is returning or didn't leave in the first place.
* This confuses Pacemaker because it never gets a "node up"
* event which is normally used to clean up the status section.
*
* Do not remove the resources though, they'll be cleaned up in
* do_dc_join_ack(). Removing them here creates a race
* condition if the crmd is being recovered.
* Instead of a list of active resources from the lrmd
* we may end up with a blank status section.
* If we are _NOT_ lucky, we will probe for the "wrong" instance
* of anonymous clones and end up with multiple active
* instances on the machine.
*/
erase_status_tag(fsa_our_uname, XML_TAG_TRANSIENT_NODEATTRS, 0);
/* Just in case attrd was still around too */
if (is_not_set(fsa_input_register, R_SHUTDOWN)) {
update_attrd(fsa_our_uname, "terminate", NULL, NULL);
update_attrd(fsa_our_uname, XML_CIB_ATTR_SHUTDOWN, NULL, NULL);
}
}
- send_cluster_message(fsa_our_dc, crm_msg_crmd, reply, TRUE);
+ send_cluster_message(crm_get_peer(0, fsa_our_dc), crm_msg_crmd, reply, TRUE);
free_xml(reply);
if (AM_I_DC == FALSE) {
register_fsa_input_adv(cause, I_NOT_DC, NULL, A_NOTHING, TRUE, __FUNCTION__);
update_attrd(NULL, NULL, NULL, NULL);
}
free_xml(tmp1);
} else {
crm_err("Could not send our LRM state to the DC");
register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
}
}
diff --git a/crmd/join_dc.c b/crmd/join_dc.c
index 0afdba103e..d57cfd6fe3 100644
--- a/crmd/join_dc.c
+++ b/crmd/join_dc.c
@@ -1,672 +1,672 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/cluster.h>
#include <crmd_fsa.h>
#include <crmd_messages.h>
GHashTable *welcomed_nodes = NULL;
GHashTable *integrated_nodes = NULL;
GHashTable *finalized_nodes = NULL;
GHashTable *confirmed_nodes = NULL;
char *max_epoch = NULL;
char *max_generation_from = NULL;
xmlNode *max_generation_xml = NULL;
void initialize_join(gboolean before);
gboolean finalize_join_for(gpointer key, gpointer value, gpointer user_data);
void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
static int current_join_id = 0;
unsigned long long saved_ccm_membership_id = 0;
void
initialize_join(gboolean before)
{
/* clear out/reset a bunch of stuff */
crm_debug("join-%d: Initializing join data (flag=%s)",
current_join_id, before ? "true" : "false");
g_hash_table_destroy(welcomed_nodes);
g_hash_table_destroy(integrated_nodes);
g_hash_table_destroy(finalized_nodes);
g_hash_table_destroy(confirmed_nodes);
if (before) {
if (max_generation_from != NULL) {
free(max_generation_from);
max_generation_from = NULL;
}
if (max_generation_xml != NULL) {
free_xml(max_generation_xml);
max_generation_xml = NULL;
}
clear_bit(fsa_input_register, R_HAVE_CIB);
clear_bit(fsa_input_register, R_CIB_ASKED);
}
welcomed_nodes = g_hash_table_new_full(crm_str_hash, g_str_equal,
g_hash_destroy_str, g_hash_destroy_str);
integrated_nodes = g_hash_table_new_full(crm_str_hash, g_str_equal,
g_hash_destroy_str, g_hash_destroy_str);
finalized_nodes = g_hash_table_new_full(crm_str_hash, g_str_equal,
g_hash_destroy_str, g_hash_destroy_str);
confirmed_nodes = g_hash_table_new_full(crm_str_hash, g_str_equal,
g_hash_destroy_str, g_hash_destroy_str);
}
void
erase_node_from_join(const char *uname)
{
gboolean w = FALSE, i = FALSE, f = FALSE, c = FALSE;
if (uname == NULL) {
return;
}
if (welcomed_nodes != NULL) {
w = g_hash_table_remove(welcomed_nodes, uname);
}
if (integrated_nodes != NULL) {
i = g_hash_table_remove(integrated_nodes, uname);
}
if (finalized_nodes != NULL) {
f = g_hash_table_remove(finalized_nodes, uname);
}
if (confirmed_nodes != NULL) {
c = g_hash_table_remove(confirmed_nodes, uname);
}
if (w || i || f || c) {
crm_debug("Removed node %s from join calculations:"
" welcomed=%d itegrated=%d finalized=%d confirmed=%d", uname, w, i, f, c);
}
}
static void
join_make_offer(gpointer key, gpointer value, gpointer user_data)
{
const char *join_to = NULL;
const crm_node_t *member = value;
CRM_ASSERT(member != NULL);
if (crm_is_peer_active(member) == FALSE) {
crm_trace("Not making an offer to %s: not active", member->uname);
return;
}
join_to = member->uname;
if (join_to == NULL) {
crm_err("No recipient for welcome message");
return;
}
erase_node_from_join(join_to);
if (saved_ccm_membership_id != crm_peer_seq) {
saved_ccm_membership_id = crm_peer_seq;
crm_info("Making join offers based on membership %llu", crm_peer_seq);
}
if (crm_is_peer_active(member)) {
xmlNode *offer = create_request(CRM_OP_JOIN_OFFER, NULL, join_to,
CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL);
char *join_offered = crm_itoa(current_join_id);
crm_xml_add_int(offer, F_CRM_JOIN_ID, current_join_id);
/* send the welcome */
crm_debug("join-%d: Sending offer to %s", current_join_id, join_to);
- send_cluster_message(join_to, crm_msg_crmd, offer, TRUE);
+ send_cluster_message(crm_get_peer(0, join_to), crm_msg_crmd, offer, TRUE);
free_xml(offer);
g_hash_table_insert(welcomed_nodes, strdup(join_to), join_offered);
} else {
crm_info("Peer process on %s is not active (yet?): %.8lx %d",
join_to, (long)member->processes, g_hash_table_size(crm_peer_cache));
}
}
/* A_DC_JOIN_OFFER_ALL */
void
do_dc_join_offer_all(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
/* reset everyones status back to down or in_ccm in the CIB
*
* any nodes that are active in the CIB but not in the CCM list
* will be seen as offline by the PE anyway
*/
current_join_id++;
initialize_join(TRUE);
/* do_update_cib_nodes(TRUE, __FUNCTION__); */
update_dc(NULL);
if (cause == C_HA_MESSAGE && current_input == I_NODE_JOIN) {
crm_info("A new node joined the cluster");
}
g_hash_table_foreach(crm_peer_cache, join_make_offer, NULL);
/* dont waste time by invoking the PE yet; */
crm_info("join-%d: Waiting on %d outstanding join acks",
current_join_id, g_hash_table_size(welcomed_nodes));
}
/* A_DC_JOIN_OFFER_ONE */
void
do_dc_join_offer_one(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_node_t *member;
ha_msg_input_t *welcome = NULL;
const char *op = NULL;
const char *join_to = NULL;
if (msg_data->data) {
welcome = fsa_typed_data(fsa_dt_ha_msg);
} else {
crm_info("A new node joined - wait until it contacts us");
return;
}
if (welcome == NULL) {
crm_err("Attempt to send welcome message without a message to reply to!");
return;
}
join_to = crm_element_value(welcome->msg, F_CRM_HOST_FROM);
if (join_to == NULL) {
crm_err("Attempt to send welcome message without a host to reply to!");
return;
}
member = crm_get_peer(0, join_to);
crm_update_peer_expected(__FUNCTION__, member, CRMD_JOINSTATE_PENDING);
if (crm_is_peer_active(member) == FALSE) {
crm_err("%s is not a fully active member of our partition", join_to);
return;
}
op = crm_element_value(welcome->msg, F_CRM_TASK);
if (join_to != NULL && (cur_state == S_INTEGRATION || cur_state == S_FINALIZE_JOIN)) {
/* note: it _is_ possible that a node will have been
* sick or starting up when the original offer was made.
* however, it will either re-announce itself in due course
* _or_ we can re-store the original offer on the client.
*/
crm_trace("(Re-)offering membership to %s...", join_to);
}
crm_info("join-%d: Processing %s request from %s in state %s",
current_join_id, op, join_to, fsa_state2string(cur_state));
join_make_offer(NULL, member, NULL);
/* always offer to the DC (ourselves)
* this ensures the correct value for max_generation_from
*/
member = crm_get_peer(0, fsa_our_uname);
join_make_offer(NULL, member, NULL);
/* this was a genuine join request, cancel any existing
* transition and invoke the PE
*/
start_transition(fsa_state);
/* dont waste time by invoking the pe yet; */
crm_debug("Waiting on %d outstanding join acks for join-%d",
g_hash_table_size(welcomed_nodes), current_join_id);
}
static int
compare_int_fields(xmlNode * left, xmlNode * right, const char *field)
{
const char *elem_l = crm_element_value(left, field);
const char *elem_r = crm_element_value(right, field);
int int_elem_l = crm_int_helper(elem_l, NULL);
int int_elem_r = crm_int_helper(elem_r, NULL);
if (int_elem_l < int_elem_r) {
return -1;
} else if (int_elem_l > int_elem_r) {
return 1;
}
return 0;
}
/* A_DC_JOIN_PROCESS_REQ */
void
do_dc_join_filter_offer(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
xmlNode *generation = NULL;
int cmp = 0;
int join_id = -1;
gboolean ack_nack_bool = TRUE;
const char *ack_nack = CRMD_JOINSTATE_MEMBER;
ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM);
const char *ref = crm_element_value(join_ack->msg, XML_ATTR_REFERENCE);
crm_node_t *join_node = crm_get_peer(0, join_from);
crm_debug("Processing req from %s", join_from);
generation = join_ack->xml;
crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id);
if (max_generation_xml != NULL && generation != NULL) {
int lpc = 0;
const char *attributes[] = {
XML_ATTR_GENERATION_ADMIN,
XML_ATTR_GENERATION,
XML_ATTR_NUMUPDATES,
};
for (lpc = 0; cmp == 0 && lpc < DIMOF(attributes); lpc++) {
cmp = compare_int_fields(max_generation_xml, generation, attributes[lpc]);
}
}
if (join_id != current_join_id) {
crm_debug("Invalid response from %s: join-%d vs. join-%d",
join_from, join_id, current_join_id);
check_join_state(cur_state, __FUNCTION__);
return;
} else if (join_node == NULL || crm_is_peer_active(join_node) == FALSE) {
crm_err("Node %s is not a member", join_from);
ack_nack_bool = FALSE;
} else if (generation == NULL) {
crm_err("Generation was NULL");
ack_nack_bool = FALSE;
} else if (max_generation_xml == NULL) {
max_generation_xml = copy_xml(generation);
max_generation_from = strdup(join_from);
} else if (cmp < 0 || (cmp == 0 && safe_str_eq(join_from, fsa_our_uname))) {
crm_debug("%s has a better generation number than"
" the current max %s", join_from, max_generation_from);
if (max_generation_xml) {
crm_log_xml_debug(max_generation_xml, "Max generation");
}
crm_log_xml_debug(generation, "Their generation");
free(max_generation_from);
free_xml(max_generation_xml);
max_generation_from = strdup(join_from);
max_generation_xml = copy_xml(join_ack->xml);
}
if (ack_nack_bool == FALSE) {
/* NACK this client */
ack_nack = CRMD_JOINSTATE_NACK;
crm_err("join-%d: NACK'ing node %s (ref %s)", join_id, join_from, ref);
} else {
crm_debug("join-%d: Welcoming node %s (ref %s)", join_id, join_from, ref);
}
/* add them to our list of CRMD_JOINSTATE_MEMBER nodes */
g_hash_table_insert(integrated_nodes, strdup(join_from), strdup(ack_nack));
crm_update_peer_expected(__FUNCTION__, join_node, ack_nack);
crm_debug("%u nodes have been integrated into join-%d",
g_hash_table_size(integrated_nodes), join_id);
g_hash_table_remove(welcomed_nodes, join_from);
if (check_join_state(cur_state, __FUNCTION__) == FALSE) {
/* dont waste time by invoking the PE yet; */
crm_debug("join-%d: Still waiting on %d outstanding offers",
join_id, g_hash_table_size(welcomed_nodes));
}
}
/* A_DC_JOIN_FINALIZE */
void
do_dc_join_finalize(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
char *sync_from = NULL;
int rc = pcmk_ok;
/* This we can do straight away and avoid clients timing us out
* while we compute the latest CIB
*/
crm_debug("Finializing join-%d for %d clients",
current_join_id, g_hash_table_size(integrated_nodes));
if (g_hash_table_size(integrated_nodes) == 0) {
/* If we don't even have ourself, start again */
register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL, __FUNCTION__);
return;
}
clear_bit(fsa_input_register, R_HAVE_CIB);
if (max_generation_from == NULL || safe_str_eq(max_generation_from, fsa_our_uname)) {
set_bit(fsa_input_register, R_HAVE_CIB);
}
if (is_set(fsa_input_register, R_IN_TRANSITION)) {
crm_warn("join-%d: We are still in a transition."
" Delaying until the TE completes.", current_join_id);
crmd_fsa_stall(NULL);
return;
}
if (is_set(fsa_input_register, R_HAVE_CIB) == FALSE) {
/* ask for the agreed best CIB */
sync_from = strdup(max_generation_from);
crm_log_xml_debug(max_generation_xml, "Requesting version");
set_bit(fsa_input_register, R_CIB_ASKED);
} else {
/* Send _our_ CIB out to everyone */
sync_from = strdup(fsa_our_uname);
}
crm_info("join-%d: Syncing the CIB from %s to the rest of the cluster",
current_join_id, sync_from);
rc = fsa_cib_conn->cmds->sync_from(fsa_cib_conn, sync_from, NULL, cib_quorum_override);
fsa_cib_conn->cmds->register_callback(fsa_cib_conn, rc, 60, FALSE, sync_from,
"finalize_sync_callback", finalize_sync_callback);
}
void
finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
CRM_LOG_ASSERT(-EPERM != rc);
clear_bit(fsa_input_register, R_CIB_ASKED);
if (rc != pcmk_ok) {
do_crm_log((rc == -pcmk_err_old_data ? LOG_WARNING : LOG_ERR),
"Sync from %s failed: %s",
(char *)user_data, pcmk_strerror(rc));
/* restart the whole join process */
register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL, __FUNCTION__);
} else if (AM_I_DC && fsa_state == S_FINALIZE_JOIN) {
set_bit(fsa_input_register, R_HAVE_CIB);
clear_bit(fsa_input_register, R_CIB_ASKED);
/* make sure dc_uuid is re-set to us */
if (check_join_state(fsa_state, __FUNCTION__) == FALSE) {
crm_debug("Notifying %d clients of join-%d results",
g_hash_table_size(integrated_nodes), current_join_id);
g_hash_table_foreach_remove(integrated_nodes, finalize_join_for, NULL);
}
} else {
crm_debug("No longer the DC in S_FINALIZE_JOIN: %s/%s",
AM_I_DC ? "DC" : "CRMd", fsa_state2string(fsa_state));
}
free(user_data);
}
static void
join_update_complete_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
fsa_data_t *msg_data = NULL;
if (rc == pcmk_ok) {
crm_debug("Join update %d complete", call_id);
check_join_state(fsa_state, __FUNCTION__);
} else {
crm_err("Join update %d failed", call_id);
crm_log_xml_debug(msg, "failed");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
}
/* A_DC_JOIN_PROCESS_ACK */
void
do_dc_join_ack(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
int join_id = -1;
int call_id = 0;
ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
const char *join_id_s = NULL;
const char *join_state = NULL;
const char *op = crm_element_value(join_ack->msg, F_CRM_TASK);
const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM);
if (safe_str_neq(op, CRM_OP_JOIN_CONFIRM)) {
crm_debug("Ignoring op=%s message from %s", op, join_from);
return;
}
crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id);
join_id_s = crm_element_value(join_ack->msg, F_CRM_JOIN_ID);
/* now update them to "member" */
crm_trace("Processing ack from %s", join_from);
join_state = (const char *)
g_hash_table_lookup(finalized_nodes, join_from);
if (join_state == NULL) {
crm_err("Join not in progress: ignoring join-%d from %s", join_id, join_from);
return;
} else if (safe_str_neq(join_state, CRMD_JOINSTATE_MEMBER)) {
crm_err("Node %s wasnt invited to join the cluster", join_from);
g_hash_table_remove(finalized_nodes, join_from);
return;
} else if (join_id != current_join_id) {
crm_err("Invalid response from %s: join-%d vs. join-%d",
join_from, join_id, current_join_id);
g_hash_table_remove(finalized_nodes, join_from);
return;
}
g_hash_table_remove(finalized_nodes, join_from);
if (g_hash_table_lookup(confirmed_nodes, join_from) != NULL) {
crm_err("join-%d: hash already contains confirmation from %s", join_id, join_from);
}
g_hash_table_insert(confirmed_nodes, strdup(join_from), strdup(join_id_s));
crm_info("join-%d: Updating node state to %s for %s",
join_id, CRMD_JOINSTATE_MEMBER, join_from);
/* update CIB with the current LRM status from the node
* We dont need to notify the TE of these updates, a transition will
* be started in due time
*/
erase_status_tag(join_from, XML_CIB_TAG_LRM, cib_scope_local);
fsa_cib_update(XML_CIB_TAG_STATUS, join_ack->xml,
cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL);
add_cib_op_callback(fsa_cib_conn, call_id, FALSE, NULL, join_update_complete_callback);
crm_debug("join-%d: Registered callback for LRM update %d", join_id, call_id);
}
gboolean
finalize_join_for(gpointer key, gpointer value, gpointer user_data)
{
const char *join_to = NULL;
const char *join_state = NULL;
xmlNode *acknak = NULL;
xmlNode *tmp1 = NULL;
crm_node_t *join_node = NULL;
if (key == NULL || value == NULL) {
return TRUE;
}
join_to = (const char *)key;
join_state = (const char *)value;
/* make sure a node entry exists for the new node */
crm_trace("Creating node entry for %s", join_to);
tmp1 = create_xml_node(NULL, XML_CIB_TAG_NODE);
set_uuid(tmp1, XML_ATTR_UUID, join_to);
crm_xml_add(tmp1, XML_ATTR_UNAME, join_to);
fsa_cib_anon_update(XML_CIB_TAG_NODES, tmp1, cib_scope_local | cib_quorum_override | cib_can_create);
free_xml(tmp1);
join_node = crm_get_peer(0, join_to);
if (crm_is_peer_active(join_node) == FALSE) {
/*
* NACK'ing nodes that the membership layer doesn't know about yet
* simply creates more churn
*
* Better to leave them waiting and let the join restart when
* the new membership event comes in
*
* All other NACKs (due to versions etc) should still be processed
*/
crm_update_peer_expected(__FUNCTION__, join_node, CRMD_JOINSTATE_PENDING);
return TRUE;
}
/* send the ack/nack to the node */
acknak = create_request(CRM_OP_JOIN_ACKNAK, NULL, join_to,
CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL);
crm_xml_add_int(acknak, F_CRM_JOIN_ID, current_join_id);
/* set the ack/nack */
if (safe_str_eq(join_state, CRMD_JOINSTATE_MEMBER)) {
crm_debug("join-%d: ACK'ing join request from %s, state %s",
current_join_id, join_to, join_state);
crm_xml_add(acknak, CRM_OP_JOIN_ACKNAK, XML_BOOLEAN_TRUE);
g_hash_table_insert(finalized_nodes,
strdup(join_to), strdup(CRMD_JOINSTATE_MEMBER));
crm_update_peer_expected(__FUNCTION__, join_node, CRMD_JOINSTATE_MEMBER);
} else {
crm_warn("join-%d: NACK'ing join request from %s, state %s",
current_join_id, join_to, join_state);
crm_xml_add(acknak, CRM_OP_JOIN_ACKNAK, XML_BOOLEAN_FALSE);
crm_update_peer_expected(__FUNCTION__, join_node, CRMD_JOINSTATE_NACK);
}
- send_cluster_message(join_to, crm_msg_crmd, acknak, TRUE);
+ send_cluster_message(crm_get_peer(0, join_to), crm_msg_crmd, acknak, TRUE);
free_xml(acknak);
return TRUE;
}
void ghash_print_node(gpointer key, gpointer value, gpointer user_data);
gboolean
check_join_state(enum crmd_fsa_state cur_state, const char *source)
{
crm_debug("Invoked by %s in state: %s", source, fsa_state2string(cur_state));
if (saved_ccm_membership_id != crm_peer_seq) {
crm_debug("%s: Membership changed since join started: %llu -> %llu",
source, saved_ccm_membership_id, crm_peer_seq);
register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
} else if (cur_state == S_INTEGRATION) {
if (g_hash_table_size(welcomed_nodes) == 0) {
crm_debug("join-%d: Integration of %d peers complete: %s",
current_join_id, g_hash_table_size(integrated_nodes), source);
register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL);
return TRUE;
}
} else if (cur_state == S_FINALIZE_JOIN) {
if (is_set(fsa_input_register, R_HAVE_CIB) == FALSE) {
crm_debug("join-%d: Delaying I_FINALIZED until we have the CIB", current_join_id);
return TRUE;
} else if (g_hash_table_size(integrated_nodes) == 0
&& g_hash_table_size(finalized_nodes) == 0) {
crm_debug("join-%d complete: %s", current_join_id, source);
register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL);
} else if (g_hash_table_size(integrated_nodes) != 0
&& g_hash_table_size(finalized_nodes) != 0) {
char *msg = NULL;
crm_err("join-%d: Waiting on %d integrated nodes"
" AND %d finalized nodes",
current_join_id,
g_hash_table_size(integrated_nodes), g_hash_table_size(finalized_nodes));
msg = strdup("Integrated node");
g_hash_table_foreach(integrated_nodes, ghash_print_node, msg);
free(msg);
msg = strdup("Finalized node");
g_hash_table_foreach(finalized_nodes, ghash_print_node, msg);
free(msg);
} else if (g_hash_table_size(integrated_nodes) != 0) {
crm_debug("join-%d: Still waiting on %d integrated nodes",
current_join_id, g_hash_table_size(integrated_nodes));
} else if (g_hash_table_size(finalized_nodes) != 0) {
crm_debug("join-%d: Still waiting on %d finalized nodes",
current_join_id, g_hash_table_size(finalized_nodes));
}
}
return FALSE;
}
void
do_dc_join_final(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_debug("Ensuring DC, quorum and node attributes are up-to-date");
update_attrd(NULL, NULL, NULL, NULL);
crm_update_quorum(crm_have_quorum, TRUE);
}
diff --git a/crmd/messages.c b/crmd/messages.c
index 5e787a984e..725fc272df 100644
--- a/crmd/messages.c
+++ b/crmd/messages.c
@@ -1,969 +1,969 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <string.h>
#include <time.h>
#include <crmd_fsa.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/cluster/internal.h>
#include <crm/cib.h>
#include <crmd.h>
#include <crmd_messages.h>
#include <crmd_lrm.h>
GListPtr fsa_message_queue = NULL;
extern void crm_shutdown(int nsig);
void handle_response(xmlNode * stored_msg);
enum crmd_fsa_input handle_request(xmlNode * stored_msg);
enum crmd_fsa_input handle_shutdown_request(xmlNode * stored_msg);
#ifdef MSG_LOG
# define ROUTER_RESULT(x) crm_trace("Router result: %s", x); \
crm_log_xml_trace(msg, "router.log");
#else
# define ROUTER_RESULT(x) crm_trace("Router result: %s", x)
#endif
/* debug only, can wrap all it likes */
int last_data_id = 0;
void
register_fsa_error_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input,
fsa_data_t * cur_data, void *new_data, const char *raised_from)
{
/* save the current actions if any */
if (fsa_actions != A_NOTHING) {
register_fsa_input_adv(cur_data ? cur_data->fsa_cause : C_FSA_INTERNAL,
I_NULL, cur_data ? cur_data->data : NULL,
fsa_actions, TRUE, __FUNCTION__);
}
/* reset the action list */
fsa_actions = A_NOTHING;
/* register the error */
register_fsa_input_adv(cause, input, new_data, A_NOTHING, TRUE, raised_from);
}
int
register_fsa_input_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input,
void *data, long long with_actions,
gboolean prepend, const char *raised_from)
{
unsigned old_len = g_list_length(fsa_message_queue);
fsa_data_t *fsa_data = NULL;
last_data_id++;
CRM_CHECK(raised_from != NULL, raised_from = "<unknown>");
crm_trace("%s %s FSA input %d (%s) (cause=%s) %s data",
raised_from, prepend ? "prepended" : "appended", last_data_id,
fsa_input2string(input), fsa_cause2string(cause), data ? "with" : "without");
if (input == I_WAIT_FOR_EVENT) {
do_fsa_stall = TRUE;
crm_debug("Stalling the FSA pending further input: cause=%s", fsa_cause2string(cause));
if (old_len > 0) {
crm_warn("%s stalled the FSA with pending inputs", raised_from);
fsa_dump_queue(LOG_DEBUG);
}
if (data == NULL) {
set_bit(fsa_actions, with_actions);
with_actions = A_NOTHING;
return 0;
}
crm_err("%s stalled the FSA with data - this may be broken", raised_from);
}
if (input == I_NULL && with_actions == A_NOTHING /* && data == NULL */ ) {
/* no point doing anything */
crm_err("Cannot add entry to queue: no input and no action");
return 0;
}
fsa_data = calloc(1, sizeof(fsa_data_t));
fsa_data->id = last_data_id;
fsa_data->fsa_input = input;
fsa_data->fsa_cause = cause;
fsa_data->origin = raised_from;
fsa_data->data = NULL;
fsa_data->data_type = fsa_dt_none;
fsa_data->actions = with_actions;
if (with_actions != A_NOTHING) {
crm_trace("Adding actions %.16llx to input", with_actions);
}
if (data != NULL) {
switch (cause) {
case C_FSA_INTERNAL:
case C_CRMD_STATUS_CALLBACK:
case C_IPC_MESSAGE:
case C_HA_MESSAGE:
crm_trace("Copying %s data from %s as a HA msg",
fsa_cause2string(cause), raised_from);
CRM_CHECK(((ha_msg_input_t *) data)->msg != NULL,
crm_err("Bogus data from %s", raised_from));
fsa_data->data = copy_ha_msg_input(data);
fsa_data->data_type = fsa_dt_ha_msg;
break;
case C_LRM_OP_CALLBACK:
crm_trace("Copying %s data from %s as lrmd_event_data_t",
fsa_cause2string(cause), raised_from);
fsa_data->data = lrmd_copy_event((lrmd_event_data_t *) data);
fsa_data->data_type = fsa_dt_lrm;
break;
case C_CCM_CALLBACK:
case C_SUBSYSTEM_CONNECT:
case C_LRM_MONITOR_CALLBACK:
case C_TIMER_POPPED:
case C_SHUTDOWN:
case C_HEARTBEAT_FAILED:
case C_HA_DISCONNECT:
case C_ILLEGAL:
case C_UNKNOWN:
case C_STARTUP:
crm_err("Copying %s data (from %s)"
" not yet implemented", fsa_cause2string(cause), raised_from);
exit(1);
break;
}
crm_trace("%s data copied", fsa_cause2string(fsa_data->fsa_cause));
}
/* make sure to free it properly later */
if (prepend) {
crm_trace("Prepending input");
fsa_message_queue = g_list_prepend(fsa_message_queue, fsa_data);
} else {
fsa_message_queue = g_list_append(fsa_message_queue, fsa_data);
}
crm_trace("Queue len: %d", g_list_length(fsa_message_queue));
fsa_dump_queue(LOG_DEBUG_2);
if (old_len == g_list_length(fsa_message_queue)) {
crm_err("Couldnt add message to the queue");
}
if (fsa_source) {
crm_trace("Triggering FSA: %s", __FUNCTION__);
mainloop_set_trigger(fsa_source);
}
return last_data_id;
}
void
fsa_dump_queue(int log_level)
{
int offset = 0;
GListPtr lpc = NULL;
for (lpc = fsa_message_queue; lpc != NULL; lpc = lpc->next) {
fsa_data_t *data = (fsa_data_t *) lpc->data;
do_crm_log_unlikely(log_level,
"queue[%d(%d)]: input %s raised by %s()\t(cause=%s)",
offset++, data->id, fsa_input2string(data->fsa_input),
data->origin, fsa_cause2string(data->fsa_cause));
}
}
ha_msg_input_t *
copy_ha_msg_input(ha_msg_input_t * orig)
{
ha_msg_input_t *copy = NULL;
xmlNodePtr data = NULL;
if (orig != NULL) {
crm_trace("Copy msg");
data = copy_xml(orig->msg);
} else {
crm_trace("No message to copy");
}
copy = new_ha_msg_input(data);
if (orig && orig->msg != NULL) {
CRM_CHECK(copy->msg != NULL, crm_err("copy failed"));
}
return copy;
}
void
delete_fsa_input(fsa_data_t * fsa_data)
{
lrmd_event_data_t *op = NULL;
xmlNode *foo = NULL;
if (fsa_data == NULL) {
return;
}
crm_trace("About to free %s data", fsa_cause2string(fsa_data->fsa_cause));
if (fsa_data->data != NULL) {
switch (fsa_data->data_type) {
case fsa_dt_ha_msg:
delete_ha_msg_input(fsa_data->data);
break;
case fsa_dt_xml:
foo = fsa_data->data;
free_xml(foo);
break;
case fsa_dt_lrm:
op = (lrmd_event_data_t *) fsa_data->data;
lrmd_free_event(op);
break;
case fsa_dt_none:
if (fsa_data->data != NULL) {
crm_err("Dont know how to free %s data from %s",
fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
exit(1);
}
break;
}
crm_trace("%s data freed", fsa_cause2string(fsa_data->fsa_cause));
}
free(fsa_data);
}
/* returns the next message */
fsa_data_t *
get_message(void)
{
fsa_data_t *message = g_list_nth_data(fsa_message_queue, 0);
fsa_message_queue = g_list_remove(fsa_message_queue, message);
crm_trace("Processing input %d", message->id);
return message;
}
/* returns the current head of the FIFO queue */
gboolean
is_message(void)
{
return (g_list_length(fsa_message_queue) > 0);
}
void *
fsa_typed_data_adv(fsa_data_t * fsa_data, enum fsa_data_type a_type, const char *caller)
{
void *ret_val = NULL;
if (fsa_data == NULL) {
crm_err("%s: No FSA data available", caller);
} else if (fsa_data->data == NULL) {
crm_err("%s: No message data available. Origin: %s", caller, fsa_data->origin);
} else if (fsa_data->data_type != a_type) {
crm_crit(
"%s: Message data was the wrong type! %d vs. requested=%d."
" Origin: %s", caller, fsa_data->data_type, a_type, fsa_data->origin);
CRM_ASSERT(fsa_data->data_type == a_type);
} else {
ret_val = fsa_data->data;
}
return ret_val;
}
/* A_MSG_ROUTE */
void
do_msg_route(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
route_message(msg_data->fsa_cause, input->msg);
}
void
route_message(enum crmd_fsa_cause cause, xmlNode * input)
{
ha_msg_input_t fsa_input;
enum crmd_fsa_input result = I_NULL;
fsa_input.msg = input;
CRM_CHECK(cause == C_IPC_MESSAGE || cause == C_HA_MESSAGE, return);
/* try passing the buck first */
if (relay_message(input, cause == C_IPC_MESSAGE)) {
return;
}
/* handle locally */
result = handle_message(input);
/* done or process later? */
switch (result) {
case I_NULL:
case I_CIB_OP:
case I_ROUTER:
case I_NODE_JOIN:
case I_JOIN_REQUEST:
case I_JOIN_RESULT:
break;
default:
/* Defering local processing of message */
register_fsa_input_later(cause, result, &fsa_input);
return;
}
if (result != I_NULL) {
/* add to the front of the queue */
register_fsa_input(cause, result, &fsa_input);
}
}
gboolean
relay_message(xmlNode * msg, gboolean originated_locally)
{
int dest = 1;
int is_for_dc = 0;
int is_for_dcib = 0;
int is_for_te = 0;
int is_for_crm = 0;
int is_for_cib = 0;
int is_local = 0;
gboolean processing_complete = FALSE;
const char *host_to = crm_element_value(msg, F_CRM_HOST_TO);
const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO);
const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM);
const char *type = crm_element_value(msg, F_TYPE);
const char *msg_error = NULL;
crm_trace("Routing message %s", crm_element_value(msg, XML_ATTR_REFERENCE));
if (msg == NULL) {
msg_error = "Cannot route empty message";
} else if (safe_str_eq(CRM_OP_HELLO, crm_element_value(msg, F_CRM_TASK))) {
/* quietly ignore */
processing_complete = TRUE;
} else if (safe_str_neq(type, T_CRM)) {
msg_error = "Bad message type";
} else if (sys_to == NULL) {
msg_error = "Bad message destination: no subsystem";
}
if (msg_error != NULL) {
processing_complete = TRUE;
crm_err("%s", msg_error);
crm_log_xml_warn(msg, "bad msg");
}
if (processing_complete) {
return TRUE;
}
processing_complete = TRUE;
is_for_dc = (strcasecmp(CRM_SYSTEM_DC, sys_to) == 0);
is_for_dcib = (strcasecmp(CRM_SYSTEM_DCIB, sys_to) == 0);
is_for_te = (strcasecmp(CRM_SYSTEM_TENGINE, sys_to) == 0);
is_for_cib = (strcasecmp(CRM_SYSTEM_CIB, sys_to) == 0);
is_for_crm = (strcasecmp(CRM_SYSTEM_CRMD, sys_to) == 0);
is_local = 0;
if (host_to == NULL || strlen(host_to) == 0) {
if (is_for_dc || is_for_te) {
is_local = 0;
} else if (is_for_crm && originated_locally) {
is_local = 0;
} else {
is_local = 1;
}
} else if (safe_str_eq(fsa_our_uname, host_to)) {
is_local = 1;
}
if (is_for_dc || is_for_dcib || is_for_te) {
if (AM_I_DC && is_for_te) {
ROUTER_RESULT("Message result: Local relay");
send_msg_via_ipc(msg, sys_to);
} else if (AM_I_DC) {
ROUTER_RESULT("Message result: DC/CRMd process");
processing_complete = FALSE; /* more to be done by caller */
} else if (originated_locally && safe_str_neq(sys_from, CRM_SYSTEM_PENGINE)
&& safe_str_neq(sys_from, CRM_SYSTEM_TENGINE)) {
/* Neither the TE or PE should be sending messages
* to DC's on other nodes
*
* By definition, if we are no longer the DC, then
* the PE or TE's data should be discarded
*/
#if SUPPORT_COROSYNC
if (is_openais_cluster()) {
dest = text2msg_type(sys_to);
}
#endif
ROUTER_RESULT("Message result: External relay to DC");
- send_cluster_message(host_to, dest, msg, TRUE);
+ send_cluster_message(host_to ? crm_get_peer(0, host_to) : NULL, dest, msg, TRUE);
} else {
/* discard */
ROUTER_RESULT("Message result: Discard, not DC");
}
} else if (is_local && (is_for_crm || is_for_cib)) {
ROUTER_RESULT("Message result: CRMd process");
processing_complete = FALSE; /* more to be done by caller */
} else if (is_local) {
ROUTER_RESULT("Message result: Local relay");
send_msg_via_ipc(msg, sys_to);
} else {
#if SUPPORT_COROSYNC
if (is_openais_cluster()) {
dest = text2msg_type(sys_to);
}
#endif
ROUTER_RESULT("Message result: External relay");
- send_cluster_message(host_to, dest, msg, TRUE);
+ send_cluster_message(host_to ? crm_get_peer(0, host_to) : NULL, dest, msg, TRUE);
}
return processing_complete;
}
static gboolean
process_hello_message(xmlNode * hello,
char **uuid, char **client_name, char **major_version, char **minor_version)
{
const char *local_uuid;
const char *local_client_name;
const char *local_major_version;
const char *local_minor_version;
*uuid = NULL;
*client_name = NULL;
*major_version = NULL;
*minor_version = NULL;
if (hello == NULL) {
return FALSE;
}
local_uuid = crm_element_value(hello, "client_uuid");
local_client_name = crm_element_value(hello, "client_name");
local_major_version = crm_element_value(hello, "major_version");
local_minor_version = crm_element_value(hello, "minor_version");
if (local_uuid == NULL || strlen(local_uuid) == 0) {
crm_err("Hello message was not valid (field %s not found)", "uuid");
return FALSE;
} else if (local_client_name == NULL || strlen(local_client_name) == 0) {
crm_err("Hello message was not valid (field %s not found)", "client name");
return FALSE;
} else if (local_major_version == NULL || strlen(local_major_version) == 0) {
crm_err("Hello message was not valid (field %s not found)", "major version");
return FALSE;
} else if (local_minor_version == NULL || strlen(local_minor_version) == 0) {
crm_err("Hello message was not valid (field %s not found)", "minor version");
return FALSE;
}
*uuid = strdup(local_uuid);
*client_name = strdup(local_client_name);
*major_version = strdup(local_major_version);
*minor_version = strdup(local_minor_version);
crm_trace("Hello message ok");
return TRUE;
}
gboolean
crmd_authorize_message(xmlNode * client_msg, crmd_client_t * curr_client)
{
/* check the best case first */
const char *sys_from = crm_element_value(client_msg, F_CRM_SYS_FROM);
char *uuid = NULL;
char *client_name = NULL;
char *major_version = NULL;
char *minor_version = NULL;
const char *filtered_from;
gpointer table_key = NULL;
gboolean auth_result = FALSE;
gboolean can_reply = FALSE; /* no-one has registered with this id */
xmlNode *xml = NULL;
const char *op = crm_element_value(client_msg, F_CRM_TASK);
if (safe_str_neq(CRM_OP_HELLO, op)) {
if (sys_from == NULL) {
crm_warn("Message [%s] was had no value for %s... discarding",
crm_element_value(client_msg, XML_ATTR_REFERENCE), F_CRM_SYS_FROM);
return FALSE;
}
filtered_from = sys_from;
/* The CIB can have two names on the DC */
if (strcasecmp(sys_from, CRM_SYSTEM_DCIB) == 0)
filtered_from = CRM_SYSTEM_CIB;
if (g_hash_table_lookup(ipc_clients, filtered_from) != NULL) {
can_reply = TRUE; /* reply can be routed */
}
crm_trace("Message reply can%s be routed from %s.", can_reply ? "" : " not", sys_from);
if (can_reply == FALSE) {
crm_warn("Message [%s] not authorized",
crm_element_value(client_msg, XML_ATTR_REFERENCE));
}
return can_reply;
}
crm_trace("received client join msg");
crm_log_xml_trace(client_msg, "join");
xml = get_message_xml(client_msg, F_CRM_DATA);
auth_result = process_hello_message(xml, &uuid, &client_name, &major_version, &minor_version);
if (auth_result == TRUE) {
if (client_name == NULL || uuid == NULL) {
crm_err("Bad client details (client_name=%s, uuid=%s)",
crm_str(client_name), crm_str(uuid));
auth_result = FALSE;
}
}
if (auth_result == TRUE) {
/* check version */
int mav = atoi(major_version);
int miv = atoi(minor_version);
crm_trace("Checking client version number");
if (mav < 0 || miv < 0) {
crm_err("Client version (%d:%d) is not acceptable", mav, miv);
auth_result = FALSE;
}
}
table_key = (gpointer) generate_hash_key(client_name, uuid);
if (auth_result == TRUE) {
crm_trace("Accepted client %s", crm_str(table_key));
curr_client->table_key = table_key;
curr_client->sub_sys = strdup(client_name);
curr_client->uuid = strdup(uuid);
g_hash_table_insert(ipc_clients, table_key, curr_client->ipc);
crm_trace("Updated client list with %s", crm_str(table_key));
crm_trace("Triggering FSA: %s", __FUNCTION__);
mainloop_set_trigger(fsa_source);
} else {
free(table_key);
crm_warn("Rejected client logon request");
qb_ipcs_disconnect(curr_client->ipc);
}
free(uuid);
free(minor_version);
free(major_version);
free(client_name);
/* hello messages should never be processed further */
return FALSE;
}
enum crmd_fsa_input
handle_message(xmlNode * msg)
{
const char *type = NULL;
CRM_CHECK(msg != NULL, return I_NULL);
type = crm_element_value(msg, F_CRM_MSG_TYPE);
if (crm_str_eq(type, XML_ATTR_REQUEST, TRUE)) {
return handle_request(msg);
} else if (crm_str_eq(type, XML_ATTR_RESPONSE, TRUE)) {
handle_response(msg);
return I_NULL;
}
crm_err("Unknown message type: %s", type);
return I_NULL;
}
static enum crmd_fsa_input
handle_failcount_op(xmlNode * stored_msg)
{
const char *rsc = NULL;
xmlNode *xml_rsc = get_xpath_object("//" XML_CIB_TAG_RESOURCE, stored_msg, LOG_ERR);
if (xml_rsc) {
rsc = ID(xml_rsc);
}
if (rsc) {
char *attr = NULL;
crm_info("Removing failcount for %s", rsc);
attr = crm_concat("fail-count", rsc, '-');
update_attrd(NULL, attr, NULL, NULL);
free(attr);
attr = crm_concat("last-failure", rsc, '-');
update_attrd(NULL, attr, NULL, NULL);
free(attr);
lrm_clear_last_failure(rsc);
} else {
crm_log_xml_warn(stored_msg, "invalid failcount op");
}
return I_NULL;
}
enum crmd_fsa_input
handle_request(xmlNode * stored_msg)
{
xmlNode *msg = NULL;
const char *op = crm_element_value(stored_msg, F_CRM_TASK);
/* Optimize this for the DC - it has the most to do */
if (op == NULL) {
crm_log_xml_err(stored_msg, "Bad message");
return I_NULL;
}
/*========== DC-Only Actions ==========*/
if (AM_I_DC) {
if (strcmp(op, CRM_OP_JOIN_ANNOUNCE) == 0) {
return I_NODE_JOIN;
} else if (strcmp(op, CRM_OP_JOIN_REQUEST) == 0) {
return I_JOIN_REQUEST;
} else if (strcmp(op, CRM_OP_JOIN_CONFIRM) == 0) {
return I_JOIN_RESULT;
} else if (strcmp(op, CRM_OP_SHUTDOWN) == 0) {
const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
gboolean dc_match = safe_str_eq(host_from, fsa_our_dc);
if (is_set(fsa_input_register, R_SHUTDOWN)) {
crm_info("Shutting ourselves down (DC)");
return I_STOP;
} else if (dc_match) {
crm_err("We didnt ask to be shut down, yet our"
" TE is telling us too." " Better get out now!");
return I_TERMINATE;
} else if (fsa_state != S_STOPPING) {
crm_err("Another node is asking us to shutdown" " but we think we're ok.");
return I_ELECTION;
}
} else if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) {
/* a slave wants to shut down */
/* create cib fragment and add to message */
return handle_shutdown_request(stored_msg);
}
}
/*========== common actions ==========*/
if (strcmp(op, CRM_OP_NOVOTE) == 0) {
ha_msg_input_t fsa_input;
fsa_input.msg = stored_msg;
register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input,
A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE, __FUNCTION__);
} else if (strcmp(op, CRM_OP_CLEAR_FAILCOUNT) == 0) {
return handle_failcount_op(stored_msg);
} else if (strcmp(op, CRM_OP_VOTE) == 0) {
/* count the vote and decide what to do after that */
ha_msg_input_t fsa_input;
fsa_input.msg = stored_msg;
register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input,
A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE, __FUNCTION__);
/* Sometimes we _must_ go into S_ELECTION */
if (fsa_state == S_HALT) {
crm_debug("Forcing an election from S_HALT");
return I_ELECTION;
#if 0
} else if (AM_I_DC) {
/* This is the old way of doing things but what is gained? */
return I_ELECTION;
#endif
}
} else if (strcmp(op, CRM_OP_JOIN_OFFER) == 0) {
crm_debug("Raising I_JOIN_OFFER: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID));
return I_JOIN_OFFER;
} else if (strcmp(op, CRM_OP_JOIN_ACKNAK) == 0) {
crm_debug("Raising I_JOIN_RESULT: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID));
return I_JOIN_RESULT;
} else if (strcmp(op, CRM_OP_LRM_DELETE) == 0
|| strcmp(op, CRM_OP_LRM_FAIL) == 0
|| strcmp(op, CRM_OP_LRM_REFRESH) == 0 || strcmp(op, CRM_OP_REPROBE) == 0) {
crm_xml_add(stored_msg, F_CRM_SYS_TO, CRM_SYSTEM_LRMD);
return I_ROUTER;
} else if (strcmp(op, CRM_OP_NOOP) == 0) {
return I_NULL;
} else if (strcmp(op, CRM_OP_LOCAL_SHUTDOWN) == 0) {
crm_shutdown(SIGTERM);
/*return I_SHUTDOWN; */
return I_NULL;
/*========== (NOT_DC)-Only Actions ==========*/
} else if (AM_I_DC == FALSE && strcmp(op, CRM_OP_SHUTDOWN) == 0) {
const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
gboolean dc_match = safe_str_eq(host_from, fsa_our_dc);
if (dc_match || fsa_our_dc == NULL) {
if (is_set(fsa_input_register, R_SHUTDOWN) == FALSE) {
crm_err("We didn't ask to be shut down, yet our" " DC is telling us too.");
set_bit(fsa_input_register, R_STAYDOWN);
return I_STOP;
}
crm_info("Shutting down");
return I_STOP;
} else {
crm_warn("Discarding %s op from %s", op, host_from);
}
} else if (strcmp(op, CRM_OP_PING) == 0) {
/* eventually do some stuff to figure out
* if we /are/ ok
*/
const char *sys_to = crm_element_value(stored_msg, F_CRM_SYS_TO);
xmlNode *ping = create_xml_node(NULL, XML_CRM_TAG_PING);
crm_xml_add(ping, XML_PING_ATTR_STATUS, "ok");
crm_xml_add(ping, XML_PING_ATTR_SYSFROM, sys_to);
crm_xml_add(ping, "crmd_state", fsa_state2string(fsa_state));
/* Ok, so technically not so interesting, but CTS needs to see this */
crm_notice("Current ping state: %s", fsa_state2string(fsa_state));
msg = create_reply(stored_msg, ping);
relay_message(msg, TRUE);
free_xml(ping);
free_xml(msg);
} else if (strcmp(op, CRM_OP_RM_NODE_CACHE) == 0) {
xmlNode *options = get_xpath_object("//"XML_TAG_OPTIONS, stored_msg, LOG_ERR);
int id = 0;
if (options) {
crm_element_value_int(options, XML_ATTR_ID, &id);
}
if (id) {
reap_crm_member(id);
}
} else {
crm_err("Unexpected request (%s) sent to %s", op, AM_I_DC ? "the DC" : "non-DC node");
crm_log_xml_err(stored_msg, "Unexpected");
}
return I_NULL;
}
void
handle_response(xmlNode * stored_msg)
{
const char *op = crm_element_value(stored_msg, F_CRM_TASK);
if (op == NULL) {
crm_log_xml_err(stored_msg, "Bad message");
} else if (AM_I_DC && strcmp(op, CRM_OP_PECALC) == 0) {
/* Check if the PE answer been superceeded by a subsequent request? */
const char *msg_ref = crm_element_value(stored_msg, XML_ATTR_REFERENCE);
if (msg_ref == NULL) {
crm_err("%s - Ignoring calculation with no reference", op);
} else if (safe_str_eq(msg_ref, fsa_pe_ref)) {
ha_msg_input_t fsa_input;
fsa_input.msg = stored_msg;
register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input);
crm_trace("Completed: %s...", fsa_pe_ref);
} else {
crm_info("%s calculation %s is obsolete", op, msg_ref);
}
} else if (strcmp(op, CRM_OP_VOTE) == 0
|| strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0 || strcmp(op, CRM_OP_SHUTDOWN) == 0) {
} else {
const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
crm_err("Unexpected response (op=%s, src=%s) sent to the %s",
op, host_from, AM_I_DC ? "DC" : "CRMd");
}
}
enum crmd_fsa_input
handle_shutdown_request(xmlNode * stored_msg)
{
/* handle here to avoid potential version issues
* where the shutdown message/proceedure may have
* been changed in later versions.
*
* This way the DC is always in control of the shutdown
*/
char *now_s = NULL;
time_t now = time(NULL);
const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
if (host_from == NULL) {
/* we're shutting down and the DC */
host_from = fsa_our_uname;
}
crm_info("Creating shutdown request for %s (state=%s)", host_from, fsa_state2string(fsa_state));
crm_log_xml_trace(stored_msg, "message");
now_s = crm_itoa(now);
update_attrd(host_from, XML_CIB_ATTR_SHUTDOWN, now_s, NULL);
free(now_s);
/* will be picked up by the TE as long as its running */
return I_NULL;
}
/* msg is deleted by the time this returns */
extern gboolean process_te_message(xmlNode * msg, xmlNode * xml_data);
gboolean
send_msg_via_ipc(xmlNode * msg, const char *sys)
{
gboolean send_ok = TRUE;
qb_ipcs_connection_t *client_channel;
client_channel = (qb_ipcs_connection_t *) g_hash_table_lookup(ipc_clients, sys);
if (crm_element_value(msg, F_CRM_HOST_FROM) == NULL) {
crm_xml_add(msg, F_CRM_HOST_FROM, fsa_our_uname);
}
if (client_channel != NULL) {
/* Transient clients such as crmadmin */
send_ok = crm_ipcs_send(client_channel, 0, msg, TRUE);
} else if (sys != NULL && strcmp(sys, CRM_SYSTEM_TENGINE) == 0) {
xmlNode *data = get_message_xml(msg, F_CRM_DATA);
process_te_message(msg, data);
} else if (sys != NULL && strcmp(sys, CRM_SYSTEM_LRMD) == 0) {
fsa_data_t fsa_data;
ha_msg_input_t fsa_input;
fsa_input.msg = msg;
fsa_input.xml = get_message_xml(msg, F_CRM_DATA);
fsa_data.id = 0;
fsa_data.actions = 0;
fsa_data.data = &fsa_input;
fsa_data.fsa_input = I_MESSAGE;
fsa_data.fsa_cause = C_IPC_MESSAGE;
fsa_data.origin = __FUNCTION__;
fsa_data.data_type = fsa_dt_ha_msg;
#ifdef FSA_TRACE
crm_trace("Invoking action A_LRM_INVOKE (%.16llx)", A_LRM_INVOKE);
#endif
do_lrm_invoke(A_LRM_INVOKE, C_IPC_MESSAGE, fsa_state, I_MESSAGE, &fsa_data);
} else {
crm_err("Unknown Sub-system (%s)... discarding message.", crm_str(sys));
send_ok = FALSE;
}
return send_ok;
}
ha_msg_input_t *
new_ha_msg_input(xmlNode * orig)
{
ha_msg_input_t *input_copy = NULL;
input_copy = calloc(1, sizeof(ha_msg_input_t));
input_copy->msg = orig;
input_copy->xml = get_message_xml(input_copy->msg, F_CRM_DATA);
return input_copy;
}
void
delete_ha_msg_input(ha_msg_input_t * orig)
{
if (orig == NULL) {
return;
}
free_xml(orig->msg);
free(orig);
}
diff --git a/crmd/te_actions.c b/crmd/te_actions.c
index a7f4f61939..ca02fe5e53 100644
--- a/crmd/te_actions.c
+++ b/crmd/te_actions.c
@@ -1,528 +1,528 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <tengine.h>
#include <crmd_fsa.h>
#include <crmd_messages.h>
#include <crm/cluster.h>
char *te_uuid = NULL;
void send_rsc_command(crm_action_t * action);
static void
te_start_action_timer(crm_graph_t * graph, crm_action_t * action)
{
action->timer = calloc(1, sizeof(crm_action_timer_t));
action->timer->timeout = action->timeout;
action->timer->reason = timeout_action;
action->timer->action = action;
action->timer->source_id = g_timeout_add(action->timer->timeout + graph->network_delay,
action_timer_callback, (void *)action->timer);
CRM_ASSERT(action->timer->source_id != 0);
}
static gboolean
te_pseudo_action(crm_graph_t * graph, crm_action_t * pseudo)
{
crm_debug("Pseudo action %d fired and confirmed", pseudo->id);
pseudo->confirmed = TRUE;
update_graph(graph, pseudo);
trigger_graph();
return TRUE;
}
void
send_stonith_update(crm_action_t * action, const char *target, const char *uuid)
{
int rc = pcmk_ok;
crm_node_t *peer = NULL;
/* zero out the node-status & remove all LRM status info */
xmlNode *node_state = NULL;
CRM_CHECK(target != NULL, return);
CRM_CHECK(uuid != NULL, return);
if(get_node_uuid(0, target) == NULL) {
set_node_uuid(target, uuid);
}
/* Make sure the membership and join caches are accurate */
peer = crm_get_peer(0, target);
if(peer->uuid == NULL) {
crm_info("Recording uuid '%s' for node '%s'", uuid, target);
peer->uuid = strdup(uuid);
}
crm_update_peer_proc(__FUNCTION__, peer, crm_proc_none, NULL);
crm_update_peer_state(__FUNCTION__, peer, CRM_NODE_LOST, 0);
crm_update_peer_expected(__FUNCTION__, peer, CRMD_JOINSTATE_DOWN);
erase_node_from_join(target);
node_state = do_update_node_cib(peer, node_update_cluster|node_update_peer|node_update_join|node_update_expected, NULL, __FUNCTION__);
/* Force our known ID */
crm_xml_add(node_state, XML_ATTR_UUID, uuid);
rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, node_state,
cib_quorum_override | cib_scope_local | cib_can_create);
/* Delay processing the trigger until the update completes */
crm_debug("Sending fencing update %d for %s", rc, target);
add_cib_op_callback(fsa_cib_conn, rc, FALSE, strdup(target), cib_fencing_updated);
/* Make sure it sticks */
/* fsa_cib_conn->cmds->bump_epoch(fsa_cib_conn, cib_quorum_override|cib_scope_local); */
erase_status_tag(target, XML_CIB_TAG_LRM, cib_scope_local);
erase_status_tag(target, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);
free_xml(node_state);
return;
}
static gboolean
te_fence_node(crm_graph_t * graph, crm_action_t * action)
{
int rc = 0;
const char *id = NULL;
const char *uuid = NULL;
const char *target = NULL;
const char *type = NULL;
gboolean invalid_action = FALSE;
enum stonith_call_options options = st_opt_none;
id = ID(action->xml);
target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
type = crm_meta_value(action->params, "stonith_action");
CRM_CHECK(id != NULL, invalid_action = TRUE);
CRM_CHECK(uuid != NULL, invalid_action = TRUE);
CRM_CHECK(type != NULL, invalid_action = TRUE);
CRM_CHECK(target != NULL, invalid_action = TRUE);
if (invalid_action) {
crm_log_xml_warn(action->xml, "BadAction");
return FALSE;
}
crm_notice("Executing %s fencing operation (%s) on %s (timeout=%d)",
type, id, target, transition_graph->stonith_timeout);
/* Passing NULL means block until we can connect... */
te_connect_stonith(NULL);
if (confirmed_nodes && g_hash_table_size(confirmed_nodes) == 1) {
options |= st_opt_allow_suicide;
}
rc = stonith_api->cmds->fence(stonith_api, options, target, type,
transition_graph->stonith_timeout / 1000, 0);
stonith_api->cmds->register_callback(
stonith_api, rc, transition_graph->stonith_timeout / 1000,
st_opt_timeout_updates, generate_transition_key(transition_graph->id, action->id, 0, te_uuid),
"tengine_stonith_callback", tengine_stonith_callback);
return TRUE;
}
static int
get_target_rc(crm_action_t * action)
{
const char *target_rc_s = crm_meta_value(action->params, XML_ATTR_TE_TARGET_RC);
if (target_rc_s != NULL) {
return crm_parse_int(target_rc_s, "0");
}
return 0;
}
static gboolean
te_crm_command(crm_graph_t * graph, crm_action_t * action)
{
char *counter = NULL;
xmlNode *cmd = NULL;
gboolean is_local = FALSE;
const char *id = NULL;
const char *task = NULL;
const char *value = NULL;
const char *on_node = NULL;
gboolean rc = TRUE;
gboolean no_wait = FALSE;
id = ID(action->xml);
task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
CRM_CHECK(on_node != NULL && strlen(on_node) != 0,
crm_err( "Corrupted command (id=%s) %s: no node",
crm_str(id), crm_str(task));
return FALSE);
crm_info( "Executing crm-event (%s): %s on %s%s%s",
crm_str(id), crm_str(task), on_node,
is_local ? " (local)" : "", no_wait ? " - no waiting" : "");
if (safe_str_eq(on_node, fsa_our_uname)) {
is_local = TRUE;
}
value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT);
if (crm_is_true(value)) {
no_wait = TRUE;
}
if (is_local && safe_str_eq(task, CRM_OP_SHUTDOWN)) {
/* defer until everything else completes */
crm_info( "crm-event (%s) is a local shutdown", crm_str(id));
graph->completion_action = tg_shutdown;
graph->abort_reason = "local shutdown";
action->confirmed = TRUE;
update_graph(graph, action);
trigger_graph();
return TRUE;
} else if(safe_str_eq(task, CRM_OP_SHUTDOWN)) {
crm_node_t *peer = crm_get_peer(0, on_node);
crm_update_peer_expected(__FUNCTION__, peer, CRMD_JOINSTATE_DOWN);
}
cmd = create_request(task, action->xml, on_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);
counter =
generate_transition_key(transition_graph->id, action->id, get_target_rc(action), te_uuid);
crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter);
- rc = send_cluster_message(on_node, crm_msg_crmd, cmd, TRUE);
+ rc = send_cluster_message(crm_get_peer(0, on_node), crm_msg_crmd, cmd, TRUE);
free(counter);
free_xml(cmd);
if (rc == FALSE) {
crm_err("Action %d failed: send", action->id);
return FALSE;
} else if (no_wait) {
action->confirmed = TRUE;
update_graph(graph, action);
trigger_graph();
} else {
if (action->timeout <= 0) {
crm_err("Action %d: %s on %s had an invalid timeout (%dms). Using %dms instead",
action->id, task, on_node, action->timeout, graph->network_delay);
action->timeout = graph->network_delay;
}
te_start_action_timer(graph, action);
}
return TRUE;
}
gboolean
cib_action_update(crm_action_t * action, int status, int op_rc)
{
lrmd_event_data_t *op = NULL;
xmlNode *state = NULL;
xmlNode *rsc = NULL;
xmlNode *xml_op = NULL;
xmlNode *action_rsc = NULL;
int rc = pcmk_ok;
const char *name = NULL;
const char *value = NULL;
const char *rsc_id = NULL;
const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
const char *task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
const char *target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
int call_options = cib_quorum_override | cib_scope_local;
int target_rc = get_target_rc(action);
if (status == PCMK_LRM_OP_PENDING) {
crm_debug("%s %d: Recording pending operation %s on %s",
crm_element_name(action->xml), action->id, task_uuid, target);
} else {
crm_warn("%s %d: %s on %s timed out",
crm_element_name(action->xml), action->id, task_uuid, target);
}
action_rsc = find_xml_node(action->xml, XML_CIB_TAG_RESOURCE, TRUE);
if (action_rsc == NULL) {
return FALSE;
}
rsc_id = ID(action_rsc);
CRM_CHECK(rsc_id != NULL, crm_log_xml_err(action->xml, "Bad:action");
return FALSE);
/*
update the CIB
<node_state id="hadev">
<lrm>
<lrm_resources>
<lrm_resource id="rsc2" last_op="start" op_code="0" target="hadev"/>
*/
state = create_xml_node(NULL, XML_CIB_TAG_STATE);
crm_xml_add(state, XML_ATTR_UUID, target_uuid);
crm_xml_add(state, XML_ATTR_UNAME, target);
rsc = create_xml_node(state, XML_CIB_TAG_LRM);
crm_xml_add(rsc, XML_ATTR_ID, target_uuid);
rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCES);
rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCE);
crm_xml_add(rsc, XML_ATTR_ID, rsc_id);
name = XML_ATTR_TYPE;
value = crm_element_value(action_rsc, name);
crm_xml_add(rsc, name, value);
name = XML_AGENT_ATTR_CLASS;
value = crm_element_value(action_rsc, name);
crm_xml_add(rsc, name, value);
name = XML_AGENT_ATTR_PROVIDER;
value = crm_element_value(action_rsc, name);
crm_xml_add(rsc, name, value);
op = convert_graph_action(NULL, action, status, op_rc);
op->call_id = -1;
op->user_data = generate_transition_key(transition_graph->id, action->id, target_rc, te_uuid);
xml_op = create_operation_update(rsc, op, CRM_FEATURE_SET, target_rc, __FUNCTION__, LOG_INFO);
lrmd_free_event(op);
crm_trace("Updating CIB with \"%s\" (%s): %s %s on %s",
status < 0 ? "new action" : XML_ATTR_TIMEOUT,
crm_element_name(action->xml), crm_str(task), rsc_id, target);
crm_log_xml_trace(xml_op, "Op");
rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, state, call_options);
crm_trace("Updating CIB with %s action %d: %s on %s (call_id=%d)",
services_lrm_status_str(status), action->id, task_uuid, target, rc);
add_cib_op_callback(fsa_cib_conn, rc, FALSE, NULL, cib_action_updated);
free_xml(state);
action->sent_update = TRUE;
if (rc < pcmk_ok) {
return FALSE;
}
return TRUE;
}
static gboolean
te_rsc_command(crm_graph_t * graph, crm_action_t * action)
{
/* never overwrite stop actions in the CIB with
* anything other than completed results
*
* Writing pending stops makes it look like the
* resource is running again
*/
xmlNode *cmd = NULL;
xmlNode *rsc_op = NULL;
gboolean rc = TRUE;
gboolean no_wait = FALSE;
gboolean is_local = FALSE;
char *counter = NULL;
const char *task = NULL;
const char *value = NULL;
const char *on_node = NULL;
const char *task_uuid = NULL;
CRM_ASSERT(action != NULL);
CRM_ASSERT(action->xml != NULL);
action->executed = FALSE;
on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
CRM_CHECK(on_node != NULL && strlen(on_node) != 0,
crm_err( "Corrupted command(id=%s) %s: no node",
ID(action->xml), crm_str(task));
return FALSE);
rsc_op = action->xml;
task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
on_node = crm_element_value(rsc_op, XML_LRM_ATTR_TARGET);
counter =
generate_transition_key(transition_graph->id, action->id, get_target_rc(action), te_uuid);
crm_xml_add(rsc_op, XML_ATTR_TRANSITION_KEY, counter);
if (safe_str_eq(on_node, fsa_our_uname)) {
is_local = TRUE;
}
value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT);
if (crm_is_true(value)) {
no_wait = TRUE;
}
crm_info("Initiating action %d: %s %s on %s%s%s",
action->id, task, task_uuid, on_node,
is_local ? " (local)" : "", no_wait ? " - no waiting" : "");
cmd = create_request(CRM_OP_INVOKE_LRM, rsc_op, on_node,
CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL);
if (is_local) {
/* shortcut local resource commands */
ha_msg_input_t data = {
.msg = cmd,
.xml = rsc_op,
};
fsa_data_t msg = {
.id = 0,
.data = &data,
.data_type = fsa_dt_ha_msg,
.fsa_input = I_NULL,
.fsa_cause = C_FSA_INTERNAL,
.actions = A_LRM_INVOKE,
.origin = __FUNCTION__,
};
do_lrm_invoke(A_LRM_INVOKE, C_FSA_INTERNAL, fsa_state, I_NULL, &msg);
} else {
- rc = send_cluster_message(on_node, crm_msg_lrmd, cmd, TRUE);
+ rc = send_cluster_message(crm_get_peer(0, on_node), crm_msg_lrmd, cmd, TRUE);
}
free(counter);
free_xml(cmd);
action->executed = TRUE;
if (rc == FALSE) {
crm_err("Action %d failed: send", action->id);
return FALSE;
} else if (no_wait) {
action->confirmed = TRUE;
update_graph(transition_graph, action);
trigger_graph();
} else {
if (action->timeout <= 0) {
crm_err("Action %d: %s %s on %s had an invalid timeout (%dms). Using %dms instead",
action->id, task, task_uuid, on_node, action->timeout, graph->network_delay);
action->timeout = graph->network_delay;
}
te_start_action_timer(graph, action);
}
value = crm_meta_value(action->params, XML_OP_ATTR_PENDING);
if (crm_is_true(value)) {
/* write a "pending" entry to the CIB, inhibit notification */
crm_info("Recording pending op %s in the CIB", task_uuid);
cib_action_update(action, PCMK_LRM_OP_PENDING, PCMK_EXECRA_STATUS_UNKNOWN);
}
return TRUE;
}
crm_graph_functions_t te_graph_fns = {
te_pseudo_action,
te_rsc_command,
te_crm_command,
te_fence_node
};
void
notify_crmd(crm_graph_t * graph)
{
const char *type = "unknown";
enum crmd_fsa_input event = I_NULL;
crm_debug("Processing transition completion in state %s", fsa_state2string(fsa_state));
CRM_CHECK(graph->complete, graph->complete = TRUE);
switch (graph->completion_action) {
case tg_stop:
type = "stop";
/* fall through */
case tg_done:
type = "done";
if (fsa_state == S_TRANSITION_ENGINE) {
event = I_TE_SUCCESS;
}
break;
case tg_restart:
type = "restart";
if (fsa_state == S_TRANSITION_ENGINE) {
if (transition_timer->period_ms > 0) {
crm_timer_stop(transition_timer);
crm_timer_start(transition_timer);
} else if(too_many_st_failures() == FALSE) {
event = I_PE_CALC;
}
} else if (fsa_state == S_POLICY_ENGINE) {
register_fsa_action(A_PE_INVOKE);
}
break;
case tg_shutdown:
type = "shutdown";
if (is_set(fsa_input_register, R_SHUTDOWN)) {
event = I_STOP;
} else {
crm_err("We didn't ask to be shut down, yet our" " PE is telling us too.");
event = I_TERMINATE;
}
}
crm_debug( "Transition %d status: %s - %s",
graph->id, type, crm_str(graph->abort_reason));
graph->abort_reason = NULL;
graph->completion_action = tg_done;
clear_bit(fsa_input_register, R_IN_TRANSITION);
if (event != I_NULL) {
register_fsa_input(C_FSA_INTERNAL, event, NULL);
} else if (fsa_source) {
mainloop_set_trigger(fsa_source);
}
}
diff --git a/fencing/commands.c b/fencing/commands.c
index 27788c8614..a6f3849aa7 100644
--- a/fencing/commands.c
+++ b/fencing/commands.c
@@ -1,1516 +1,1516 @@
/*
* Copyright (C) 2009 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <unistd.h>
#include <sys/utsname.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <ctype.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/ipc.h>
#include <crm/cluster/internal.h>
#include <crm/common/mainloop.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <crm/common/xml.h>
#include <internal.h>
GHashTable *device_list = NULL;
GHashTable *topology = NULL;
GList *cmd_list = NULL;
extern GHashTable *remote_op_list;
static int active_children = 0;
static gboolean stonith_device_dispatch(gpointer user_data);
static void st_child_done(GPid pid, int rc, const char *output, gpointer user_data);
typedef struct async_command_s {
int id;
int pid;
int fd_stdout;
int options;
int default_timeout;
int timeout;
char *op;
char *origin;
char *client;
char *client_name;
char *remote;
char *victim;
char *action;
char *device;
char *mode;
GListPtr device_list;
GListPtr device_next;
void (*done)(GPid pid, int rc, const char *output, gpointer user_data);
guint timer_sigterm;
guint timer_sigkill;
/*! If the operation timed out, this is the last signal
* we sent to the process to get it to terminate */
int last_timeout_signo;
} async_command_t;
static xmlNode *
stonith_construct_async_reply(async_command_t *cmd, const char *output, xmlNode *data, int rc);
static int
get_action_timeout(stonith_device_t *device, const char *action, int default_timeout)
{
char buffer[512] = { 0, };
char *value = NULL;
CRM_CHECK(action != NULL, return default_timeout);
if (!device->params) {
return default_timeout;
}
snprintf(buffer, sizeof(buffer) - 1, "pcmk_%s_timeout", action);
value = g_hash_table_lookup(device->params, buffer);
if (!value) {
return default_timeout;
}
return atoi(value);
}
static void free_async_command(async_command_t *cmd)
{
if (!cmd) {
return;
}
cmd_list = g_list_remove(cmd_list, cmd);
g_list_free(cmd->device_list);
free(cmd->device);
free(cmd->action);
free(cmd->victim);
free(cmd->remote);
free(cmd->client);
free(cmd->client_name);
free(cmd->origin);
free(cmd->op);
free(cmd);
}
static async_command_t *create_async_command(xmlNode *msg)
{
async_command_t *cmd = NULL;
xmlNode *op = get_xpath_object("//@"F_STONITH_ACTION, msg, LOG_ERR);
const char *action = crm_element_value(op, F_STONITH_ACTION);
CRM_CHECK(action != NULL, crm_log_xml_warn(msg, "NoAction"); return NULL);
crm_log_xml_trace(msg, "Command");
cmd = calloc(1, sizeof(async_command_t));
crm_element_value_int(msg, F_STONITH_CALLID, &(cmd->id));
crm_element_value_int(msg, F_STONITH_CALLOPTS, &(cmd->options));
crm_element_value_int(msg, F_STONITH_TIMEOUT, &(cmd->default_timeout));
cmd->timeout = cmd->default_timeout;
cmd->origin = crm_element_value_copy(msg, F_ORIG);
cmd->remote = crm_element_value_copy(msg, F_STONITH_REMOTE);
cmd->client = crm_element_value_copy(msg, F_STONITH_CLIENTID);
cmd->client_name = crm_element_value_copy(msg, F_STONITH_CLIENTNAME);
cmd->op = crm_element_value_copy(msg, F_STONITH_OPERATION);
cmd->action = strdup(action);
cmd->victim = crm_element_value_copy(op, F_STONITH_TARGET);
cmd->mode = crm_element_value_copy(op, F_STONITH_MODE);
cmd->device = crm_element_value_copy(op, F_STONITH_DEVICE);
CRM_CHECK(cmd->op != NULL, crm_log_xml_warn(msg, "NoOp"); free_async_command(cmd); return NULL);
CRM_CHECK(cmd->client != NULL, crm_log_xml_warn(msg, "NoClient"));
cmd_list = g_list_append(cmd_list, cmd);
return cmd;
}
static int stonith_manual_ack(xmlNode *msg, remote_fencing_op_t *op)
{
async_command_t *cmd = create_async_command(msg);
xmlNode *dev = get_xpath_object("//@"F_STONITH_TARGET, msg, LOG_ERR);
if(cmd == NULL) {
return -EINVAL;
}
cmd->device = strdup("manual_ack");
cmd->remote = strdup(op->id);
crm_notice("Injecting manual confirmation that %s is safely off/down",
crm_element_value(dev, F_STONITH_TARGET));
st_child_done(0, 0, NULL, cmd);
return pcmk_ok;
}
static gboolean stonith_device_execute(stonith_device_t *device)
{
int rc = 0;
int exec_rc = 0;
async_command_t *cmd = NULL;
stonith_action_t *action = NULL;
CRM_CHECK(device != NULL, return FALSE);
if(device->active_pid) {
crm_trace("%s is still active with pid %u", device->id, device->active_pid);
return TRUE;
}
if(device->pending_ops) {
GList *first = device->pending_ops;
device->pending_ops = g_list_remove_link(device->pending_ops, first);
cmd = first->data;
g_list_free_1(first);
}
if(cmd == NULL) {
crm_trace("Nothing further to do for %s", device->id);
return TRUE;
}
action = stonith_action_create(device->agent,
cmd->action,
cmd->victim,
cmd->timeout,
device->params,
device->aliases);
exec_rc = stonith_action_execute_async(action, (void *) cmd, st_child_done);
if(exec_rc > 0) {
crm_debug("Operation %s%s%s on %s now running with pid=%d, timeout=%dms",
cmd->action, cmd->victim?" for node ":"", cmd->victim?cmd->victim:"",
device->id, exec_rc, cmd->timeout);
device->active_pid = exec_rc;
} else {
crm_warn("Operation %s%s%s on %s failed (%d/%d)",
cmd->action, cmd->victim?" for node ":"", cmd->victim?cmd->victim:"",
device->id, exec_rc, rc);
st_child_done(0, rc<0?rc:exec_rc, NULL, cmd);
}
return TRUE;
}
static gboolean stonith_device_dispatch(gpointer user_data)
{
return stonith_device_execute(user_data);
}
static void schedule_stonith_command(async_command_t *cmd, stonith_device_t *device)
{
CRM_CHECK(cmd != NULL, return);
CRM_CHECK(device != NULL, return);
if (cmd->device) {
free(cmd->device);
}
cmd->device = strdup(device->id);
cmd->timeout = get_action_timeout(device, cmd->action, cmd->default_timeout);
crm_debug("Scheduling %s on %s for %s (timeout=%dms)", cmd->action, device->id,
cmd->remote?cmd->remote:cmd->client, cmd->timeout);
device->pending_ops = g_list_append(device->pending_ops, cmd);
mainloop_set_trigger(device->work);
}
void free_device(gpointer data)
{
GListPtr gIter = NULL;
stonith_device_t *device = data;
g_hash_table_destroy(device->params);
g_hash_table_destroy(device->aliases);
for(gIter = device->pending_ops; gIter != NULL; gIter = gIter->next) {
async_command_t *cmd = gIter->data;
crm_warn("Removal of device '%s' purged operation %s", device->id, cmd->action);
st_child_done(0, -ENODEV, NULL, cmd);
free_async_command(cmd);
}
g_list_free(device->pending_ops);
g_list_free_full(device->targets, free);
free(device->namespace);
free(device->agent);
free(device->id);
free(device);
}
static GHashTable *build_port_aliases(const char *hostmap, GListPtr *targets)
{
char *name = NULL;
int last = 0, lpc = 0, max = 0, added = 0;
GHashTable *aliases = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str);
if(hostmap == NULL) {
return aliases;
}
max = strlen(hostmap);
for(; lpc <= max; lpc++) {
switch(hostmap[lpc]) {
/* Assignment chars */
case '=':
case ':':
if(lpc > last) {
free(name);
name = calloc(1, 1 + lpc - last);
memcpy(name, hostmap + last, lpc - last);
}
last = lpc + 1;
break;
/* Delimeter chars */
/* case ',': Potentially used to specify multiple ports */
case 0:
case ';':
case ' ':
case '\t':
if(name) {
char *value = NULL;
value = calloc(1, 1 + lpc - last);
memcpy(value, hostmap + last, lpc - last);
crm_debug("Adding alias '%s'='%s'", name, value);
g_hash_table_replace(aliases, name, value);
if(targets) {
*targets = g_list_append(*targets, strdup(value));
}
value=NULL;
name=NULL;
added++;
} else if(lpc > last) {
crm_debug("Parse error at offset %d near '%s'", lpc-last, hostmap+last);
}
last = lpc + 1;
break;
}
if(hostmap[lpc] == 0) {
break;
}
}
if(added == 0) {
crm_info("No host mappings detected in '%s'", hostmap);
}
free(name);
return aliases;
}
static void parse_host_line(const char *line, GListPtr *output)
{
int lpc = 0;
int max = 0;
int last = 0;
if(line) {
max = strlen(line);
} else {
return;
}
/* Check for any complaints about additional parameters that the device doesn't understand */
if(strstr(line, "invalid") || strstr(line, "variable")) {
crm_debug("Skipping: %s", line);
return;
}
crm_trace("Processing: %s", line);
/* Skip initial whitespace */
for(lpc = 0; lpc <= max && isspace(line[lpc]); lpc++) {
last = lpc+1;
}
/* Now the actual content */
for(lpc = 0; lpc <= max; lpc++) {
gboolean a_space = isspace(line[lpc]);
if(a_space && lpc < max && isspace(line[lpc+1])) {
/* fast-forward to the end of the spaces */
} else if(a_space || line[lpc] == ',' || line[lpc] == 0) {
int rc = 1;
char *entry = NULL;
if(lpc != last) {
entry = calloc(1, 1 + lpc - last);
rc = sscanf(line+last, "%[a-zA-Z0-9_-.]", entry);
}
if(entry == NULL) {
/* Skip */
} else if(rc != 1) {
crm_warn("Could not parse (%d %d): %s", last, lpc, line+last);
} else if(safe_str_neq(entry, "on") && safe_str_neq(entry, "off")) {
crm_trace("Adding '%s'", entry);
*output = g_list_append(*output, entry);
entry = NULL;
}
free(entry);
last = lpc + 1;
}
}
}
static GListPtr parse_host_list(const char *hosts)
{
int lpc = 0;
int max = 0;
int last = 0;
GListPtr output = NULL;
if(hosts == NULL) {
return output;
}
max = strlen(hosts);
for(lpc = 0; lpc <= max; lpc++) {
if(hosts[lpc] == '\n' || hosts[lpc] == 0) {
char *line = NULL;
line = calloc(1, 2 + lpc - last);
snprintf(line, 1 + lpc - last, "%s", hosts+last);
parse_host_line(line, &output);
free(line);
last = lpc + 1;
}
}
return output;
}
static stonith_device_t *build_device_from_xml(xmlNode *msg)
{
xmlNode *dev = get_xpath_object("//"F_STONITH_DEVICE, msg, LOG_ERR);
stonith_device_t *device = NULL;
device = calloc(1, sizeof(stonith_device_t));
device->id = crm_element_value_copy(dev, XML_ATTR_ID);
device->agent = crm_element_value_copy(dev, "agent");
device->namespace = crm_element_value_copy(dev, "namespace");
device->params = xml2list(dev);
device->work = mainloop_add_trigger(G_PRIORITY_HIGH, stonith_device_dispatch, device);
/* TODO: Hook up priority */
return device;
}
static const char *
target_list_type(stonith_device_t *dev)
{
const char *check_type = NULL;
check_type = g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTCHECK);
if(check_type == NULL) {
if(g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTLIST)) {
check_type = "static-list";
} else if(g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTMAP)) {
check_type = "static-list";
} else {
check_type = "dynamic-list";
}
}
return check_type;
}
static void
update_dynamic_list(stonith_device_t *dev)
{
time_t now = time(NULL);
/* Host/alias must be in the list output to be eligable to be fenced
*
* Will cause problems if down'd nodes aren't listed or (for virtual nodes)
* if the guest is still listed despite being moved to another machine
*/
if(dev->targets_age < 0) {
crm_trace("Port list queries disabled for %s", dev->id);
} else if(dev->targets == NULL || dev->targets_age + 60 < now) {
stonith_action_t *action = NULL;
char *output = NULL;
int rc = pcmk_ok;
int exec_rc = pcmk_ok;
if(dev->active_pid != 0) {
crm_notice("Port list query can not execute because device is busy, using cache: %s",
dev->targets ? "YES" : "NO");
return;
}
action = stonith_action_create(dev->agent, "list", NULL, 5, dev->params, NULL);
exec_rc = stonith_action_execute(action, &rc, &output);
if(rc != 0 && dev->active_pid == 0) {
/* This device probably only supports a single
* connection, which appears to already be in use,
* likely involved in a montior or (less likely)
* metadata operation.
*
* Avoid disabling port list queries in the hope that
* the op would succeed next time
*/
crm_info("Couldn't query ports for %s. Call failed with rc=%d and active_pid=%d: %s",
dev->agent, rc, dev->active_pid, output);
} else if(exec_rc < 0 || rc != 0) {
crm_notice("Disabling port list queries for %s (%d/%d): %s",
dev->id, exec_rc, rc, output);
dev->targets_age = -1;
/* Fall back to status */
g_hash_table_replace(dev->params, strdup(STONITH_ATTR_HOSTCHECK), strdup("status"));
g_list_free_full(dev->targets, free);
dev->targets = NULL;
} else {
crm_info("Refreshing port list for %s", dev->id);
g_list_free_full(dev->targets, free);
dev->targets = parse_host_list(output);
dev->targets_age = now;
}
free(output);
}
}
int stonith_device_register(xmlNode *msg, const char **desc)
{
const char *value = NULL;
stonith_device_t *device = build_device_from_xml(msg);
value = g_hash_table_lookup(device->params, STONITH_ATTR_HOSTLIST);
if(value) {
device->targets = parse_host_list(value);
}
value = g_hash_table_lookup(device->params, STONITH_ATTR_HOSTMAP);
device->aliases = build_port_aliases(value, &(device->targets));
value = target_list_type(device);
if (safe_str_eq(value, "dynamic-list")) {
/* set the dynamic list during the register to guarantee we have
* targets cached */
update_dynamic_list(device);
}
g_hash_table_replace(device_list, device->id, device);
crm_notice("Added '%s' to the device list (%d active devices)", device->id, g_hash_table_size(device_list));
if(desc) {
*desc = device->id;
}
return pcmk_ok;
}
static int stonith_device_remove(xmlNode *msg, const char **desc)
{
xmlNode *dev = get_xpath_object("//"F_STONITH_DEVICE, msg, LOG_ERR);
const char *id = crm_element_value(dev, XML_ATTR_ID);
if(g_hash_table_remove(device_list, id)) {
crm_info("Removed '%s' from the device list (%d active devices)",
id, g_hash_table_size(device_list));
} else {
crm_info("Device '%s' not found (%d active devices)",
id, g_hash_table_size(device_list));
}
if(desc) {
*desc = id;
}
return pcmk_ok;
}
static int count_active_levels(stonith_topology_t *tp)
{
int lpc = 0;
int count = 0;
for(lpc = 0; lpc < ST_LEVEL_MAX; lpc++) {
if(tp->levels[lpc] != NULL) {
count++;
}
}
return count;
}
void free_topology_entry(gpointer data)
{
stonith_topology_t *tp = data;
int lpc = 0;
for(lpc = 0; lpc < ST_LEVEL_MAX; lpc++) {
if(tp->levels[lpc] != NULL) {
g_list_free_full(tp->levels[lpc], free);
}
}
free(tp->node);
free(tp);
}
int stonith_level_register(xmlNode *msg, char **desc)
{
int id = 0;
int rc = pcmk_ok;
xmlNode *child = NULL;
xmlNode *level = get_xpath_object("//"F_STONITH_LEVEL, msg, LOG_ERR);
const char *node = crm_element_value(level, F_STONITH_TARGET);
stonith_topology_t *tp = g_hash_table_lookup(topology, node);
crm_element_value_int(level, XML_ATTR_ID, &id);
if(desc) {
*desc = g_strdup_printf("%s[%d]", node, id);
}
if(id <= 0 || id >= ST_LEVEL_MAX) {
return -EINVAL;
}
if(tp == NULL) {
tp = calloc(1, sizeof(stonith_topology_t));
tp->node = strdup(node);
g_hash_table_replace(topology, tp->node, tp);
crm_trace("Added %s to the topology (%d active entries)", node, g_hash_table_size(topology));
}
if(tp->levels[id] != NULL) {
crm_info("Adding to the existing %s[%d] topology entry (%d active entries)", node, id, count_active_levels(tp));
}
for (child = __xml_first_child(level); child != NULL; child = __xml_next(child)) {
const char *device = ID(child);
crm_trace("Adding device '%s' for %s (%d)", device, node, id);
tp->levels[id] = g_list_append(tp->levels[id], strdup(device));
}
crm_info("Node %s has %d active fencing levels", node, count_active_levels(tp));
return rc;
}
int stonith_level_remove(xmlNode *msg, char **desc)
{
int id = 0;
xmlNode *level = get_xpath_object("//"F_STONITH_LEVEL, msg, LOG_ERR);
const char *node = crm_element_value(level, F_STONITH_TARGET);
stonith_topology_t *tp = g_hash_table_lookup(topology, node);
if(desc) {
*desc = g_strdup_printf("%s[%d]", node, id);
}
crm_element_value_int(level, XML_ATTR_ID, &id);
if(tp == NULL) {
crm_info("Node %s not found (%d active entries)",
node, g_hash_table_size(topology));
return pcmk_ok;
} else if(id < 0 || id >= ST_LEVEL_MAX) {
return -EINVAL;
}
if(id == 0 && g_hash_table_remove(topology, node)) {
crm_info("Removed all %s related entries from the topology (%d active entries)",
node, g_hash_table_size(topology));
} else if(id > 0 && tp->levels[id] != NULL) {
g_list_free_full(tp->levels[id], free);
tp->levels[id] = NULL;
crm_info("Removed entry '%d' from %s's topology (%d active entries remaining)",
id, node, count_active_levels(tp));
}
return pcmk_ok;
}
static gboolean string_in_list(GListPtr list, const char *item)
{
int lpc = 0;
int max = g_list_length(list);
for(lpc = 0; lpc < max; lpc ++) {
const char *value = g_list_nth_data(list, lpc);
if(safe_str_eq(item, value)) {
return TRUE;
}
}
return FALSE;
}
static int stonith_device_action(xmlNode *msg, char **output)
{
int rc = pcmk_ok;
xmlNode *dev = get_xpath_object("//"F_STONITH_DEVICE, msg, LOG_ERR);
const char *id = crm_element_value(dev, F_STONITH_DEVICE);
async_command_t *cmd = NULL;
stonith_device_t *device = NULL;
if(id) {
crm_trace("Looking for '%s'", id);
device = g_hash_table_lookup(device_list, id);
}
if(device) {
cmd = create_async_command(msg);
if(cmd == NULL) {
free_device(device);
return -EPROTO;
}
schedule_stonith_command(cmd, device);
rc = -EINPROGRESS;
} else {
crm_info("Device %s not found", id?id:"<none>");
rc = -ENODEV;
}
return rc;
}
static gboolean can_fence_host_with_device(stonith_device_t *dev, const char *host)
{
gboolean can = FALSE;
const char *alias = host;
const char *check_type = NULL;
if(dev == NULL) {
return FALSE;
} else if(host == NULL) {
return TRUE;
}
if(g_hash_table_lookup(dev->aliases, host)) {
alias = g_hash_table_lookup(dev->aliases, host);
}
check_type = target_list_type(dev);
if(safe_str_eq(check_type, "none")) {
can = TRUE;
} else if(safe_str_eq(check_type, "static-list")) {
/* Presence in the hostmap is sufficient
* Only use if all hosts on which the device can be active can always fence all listed hosts
*/
if(string_in_list(dev->targets, host)) {
can = TRUE;
} else if(g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTMAP)
&& g_hash_table_lookup(dev->aliases, host)) {
can = TRUE;
}
} else if(safe_str_eq(check_type, "dynamic-list")) {
update_dynamic_list(dev);
if(string_in_list(dev->targets, alias)) {
can = TRUE;
}
} else if(safe_str_eq(check_type, "status")) {
int rc = 0;
int exec_rc = 0;
stonith_action_t *action = NULL;
/* Run the status operation for the device/target combination
* Will cause problems if the device doesn't return 2 for down'd nodes or
* (for virtual nodes) if the device doesn't return 1 for guests that
* have been moved to another host
*/
action = stonith_action_create(dev->agent, "status", host, 5, dev->params, dev->aliases);
exec_rc = stonith_action_execute(action, &rc, NULL);
if(exec_rc != 0) {
crm_err("Could not invoke %s: rc=%d", dev->id, exec_rc);
} else if(rc == 1 /* unkown */) {
crm_trace("Host %s is not known by %s", host, dev->id);
} else if(rc == 0 /* active */ || rc == 2 /* inactive */) {
can = TRUE;
} else {
crm_notice("Unkown result when testing if %s can fence %s: rc=%d", dev->id, host, rc);
}
} else {
crm_err("Unknown check type: %s", check_type);
}
if(safe_str_eq(host, alias)) {
crm_info("%s can%s fence %s: %s", dev->id, can?"":" not", host, check_type);
} else {
crm_info("%s can%s fence %s (aka. '%s'): %s", dev->id, can?"":" not", host, alias, check_type);
}
return can;
}
struct device_search_s
{
const char *host;
GListPtr capable;
};
static void search_devices(
gpointer key, gpointer value, gpointer user_data)
{
stonith_device_t *dev = value;
struct device_search_s *search = user_data;
if(can_fence_host_with_device(dev, search->host)) {
search->capable = g_list_append(search->capable, value);
}
}
static int stonith_query(xmlNode *msg, xmlNode **list)
{
struct device_search_s search;
int available_devices = 0;
const char *action = NULL;
xmlNode *dev = get_xpath_object("//@"F_STONITH_TARGET, msg, LOG_DEBUG_3);
search.host = NULL;
search.capable = NULL;
if(dev) {
const char *device = crm_element_value(dev, F_STONITH_DEVICE);
search.host = crm_element_value(dev, F_STONITH_TARGET);
if(device && safe_str_eq(device, "manual_ack")) {
/* No query necessary */
if(list) {
*list = NULL;
}
return pcmk_ok;
}
action = crm_element_value(dev, F_STONITH_ACTION);
}
crm_log_xml_debug(msg, "Query");
g_hash_table_foreach(device_list, search_devices, &search);
available_devices = g_list_length(search.capable);
if(search.host) {
crm_debug("Found %d matching devices for '%s'",
available_devices, search.host);
} else {
crm_debug("%d devices installed", available_devices);
}
/* Pack the results into data */
if(list) {
GListPtr lpc = NULL;
*list = create_xml_node(NULL, __FUNCTION__);
crm_xml_add(*list, F_STONITH_TARGET, search.host);
crm_xml_add_int(*list, "st-available-devices", available_devices);
for(lpc = search.capable; lpc != NULL; lpc = lpc->next) {
stonith_device_t *device = (stonith_device_t*)lpc->data;
int action_specific_timeout = get_action_timeout(device, action, 0);
dev = create_xml_node(*list, F_STONITH_DEVICE);
crm_xml_add(dev, XML_ATTR_ID, device->id);
crm_xml_add(dev, "namespace", device->namespace);
crm_xml_add(dev, "agent", device->agent);
if (action_specific_timeout) {
crm_xml_add_int(dev, F_STONITH_ACTION_TIMEOUT, action_specific_timeout);
}
if(search.host == NULL) {
xmlNode *attrs = create_xml_node(dev, XML_TAG_ATTRS);
g_hash_table_foreach(device->params, hash2field, attrs);
}
}
}
g_list_free(search.capable);
return available_devices;
}
static void log_operation(async_command_t *cmd, int rc, int pid, const char *next, const char *output)
{
if(rc == 0) {
next = NULL;
}
if(cmd->victim != NULL) {
do_crm_log(rc==0?LOG_NOTICE:LOG_ERR,
"Operation '%s' [%d] (call %d from %s) for host '%s' with device '%s' returned: %d (%s)%s%s",
cmd->action, pid, cmd->id, cmd->client_name, cmd->victim, cmd->device, rc, pcmk_strerror(rc),
next?". Trying: ":"", next?next:"");
} else {
do_crm_log_unlikely(rc==0?LOG_DEBUG:LOG_NOTICE,
"Operation '%s' [%d] for device '%s' returned: %d (%s)%s%s",
cmd->action, pid, cmd->device, rc, pcmk_strerror(rc), next?". Trying: ":"", next?next:"");
}
if(output) {
/* Logging the whole string confuses syslog when the string is xml */
char *local_copy = strdup(output);
int lpc = 0, last = 0, more = strlen(local_copy);
for(lpc = 0; lpc < more; lpc++) {
if(local_copy[lpc] == '\n' || local_copy[lpc] == 0) {
local_copy[lpc] = 0;
do_crm_log(rc==0?LOG_INFO:LOG_WARNING, "%s: %s",
cmd->device, local_copy+last);
last = lpc+1;
}
}
crm_debug("%s: %s (total %d bytes)", cmd->device, local_copy+last, more);
free(local_copy);
}
}
static void
stonith_send_async_reply(async_command_t *cmd, const char *output, int rc, GPid pid)
{
xmlNode *reply = NULL;
gboolean bcast = TRUE;
reply = stonith_construct_async_reply(cmd, output, NULL, rc);
if(safe_str_eq(cmd->action, "metadata")) {
/* Too verbose to log */
bcast = FALSE;
output = NULL;
crm_trace("Directed reply: %s op", cmd->action);
} else if(crm_str_eq(cmd->action, "monitor", TRUE) ||
crm_str_eq(cmd->action, "list", TRUE) ||
crm_str_eq(cmd->action, "status", TRUE)) {
crm_trace("Directed reply: %s op", cmd->action);
bcast = FALSE;
} else if(safe_str_eq(cmd->mode, "slave")) {
crm_trace("Directed reply: Complex op with %s", cmd->device);
bcast = FALSE;
}
log_operation(cmd, rc, pid, NULL, output);
crm_log_xml_trace(reply, "Reply");
if(bcast && !stand_alone) {
/* Send reply as T_STONITH_NOTIFY so everyone does notifications
* Potentially limit to unsucessful operations to the originator?
*/
crm_trace("Broadcast reply");
crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY);
send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE);
} else if(cmd->origin) {
crm_trace("Directed reply to %s", cmd->origin);
- send_cluster_message(cmd->origin, crm_msg_stonith_ng, reply, FALSE);
+ send_cluster_message(crm_get_peer(0, cmd->origin), crm_msg_stonith_ng, reply, FALSE);
} else {
crm_trace("Directed local %ssync reply to %s", (cmd->options & st_opt_sync_call)?"":"a-", cmd->client_name);
do_local_reply(reply, cmd->client, cmd->options & st_opt_sync_call, FALSE);
}
if(stand_alone) {
/* Do notification with a clean data object */
xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE);
crm_xml_add_int(notify_data, F_STONITH_RC, rc);
crm_xml_add(notify_data, F_STONITH_TARGET, cmd->victim);
crm_xml_add(notify_data, F_STONITH_OPERATION, cmd->op);
crm_xml_add(notify_data, F_STONITH_DELEGATE, cmd->device);
crm_xml_add(notify_data, F_STONITH_REMOTE, cmd->remote);
crm_xml_add(notify_data, F_STONITH_ORIGIN, cmd->client);
do_stonith_notify(0, T_STONITH_NOTIFY_FENCE, rc, notify_data, NULL);
}
free_xml(reply);
}
static void cancel_stonith_command(async_command_t *cmd)
{
stonith_device_t *device;
CRM_CHECK(cmd != NULL, return);
if (!cmd->device) {
return;
}
device = g_hash_table_lookup(device_list, cmd->device);
if (device) {
crm_trace("Cancel scheduled %s on %s", cmd->action, device->id);
device->pending_ops = g_list_remove(device->pending_ops, cmd);
}
}
#define READ_MAX 500
static void st_child_done(GPid pid, int rc, const char *output, gpointer user_data)
{
stonith_device_t *device = NULL;
async_command_t *cmd = user_data;
GListPtr gIter = NULL;
GListPtr gIterNext = NULL;
CRM_CHECK(cmd != NULL, return);
active_children--;
/* The device is ready to do something else now */
device = g_hash_table_lookup(device_list, cmd->device);
if(device) {
device->active_pid = 0;
mainloop_set_trigger(device->work);
}
crm_trace("Operation on %s completed with rc=%d (%d remaining)",
cmd->device, rc, g_list_length(cmd->device_next));
if(rc != 0 && cmd->device_next) {
stonith_device_t *dev = cmd->device_next->data;
log_operation(cmd, rc, pid, dev->id, output);
cmd->device_next = cmd->device_next->next;
schedule_stonith_command(cmd, dev);
/* Prevent cmd from being freed */
cmd = NULL;
goto done;
}
if(rc > 0) {
rc = -pcmk_err_generic;
}
stonith_send_async_reply(cmd, output, rc, pid);
if(rc != 0) {
goto done;
}
/* Check to see if any operations are scheduled to do the exact
* same thing that just completed. If so, rather than
* performing the same fencing operation twice, return the result
* of this operation for all pending commands it matches. */
for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) {
async_command_t *cmd_other = gIter->data;
gIterNext = gIter->next;
if(cmd == cmd_other) {
continue;
}
/* A pending scheduled command matches the command that just finished if.
* 1. The client connections are different.
* 2. The node victim is the same.
* 3. The fencing action is the same.
* 4. The device scheduled to execute the action is the same.
*/
if(safe_str_eq(cmd->client, cmd_other->client) ||
safe_str_neq(cmd->victim, cmd_other->victim) ||
safe_str_neq(cmd->action, cmd_other->action) ||
safe_str_neq(cmd->device, cmd_other->device)) {
continue;
}
crm_notice("Merging stonith action %s for node %s originating from client %s with identical stonith request from client %s",
cmd_other->action,
cmd_other->victim,
cmd_other->client_name,
cmd->client_name);
cmd_list = g_list_remove_link(cmd_list, gIter);
stonith_send_async_reply(cmd_other, output, rc, pid);
cancel_stonith_command(cmd_other);
free_async_command(cmd_other);
g_list_free_1(gIter);
}
done:
free_async_command(cmd);
}
static gint sort_device_priority(gconstpointer a, gconstpointer b)
{
const stonith_device_t *dev_a = a;
const stonith_device_t *dev_b = a;
if(dev_a->priority > dev_b->priority) {
return -1;
} else if(dev_a->priority < dev_b->priority) {
return 1;
}
return 0;
}
static int stonith_fence(xmlNode *msg)
{
int options = 0;
const char *device_id = NULL;
stonith_device_t *device = NULL;
async_command_t *cmd = create_async_command(msg);
xmlNode *dev = get_xpath_object("//@"F_STONITH_TARGET, msg, LOG_ERR);
if(cmd == NULL) {
return -EPROTO;
}
device_id = crm_element_value(dev, F_STONITH_DEVICE);
if(device_id) {
device = g_hash_table_lookup(device_list, device_id);
if(device == NULL) {
crm_err("Requested device '%s' is not available", device_id);
}
} else {
struct device_search_s search;
search.capable = NULL;
search.host = crm_element_value(dev, F_STONITH_TARGET);
crm_element_value_int(msg, F_STONITH_CALLOPTS, &options);
if(options & st_opt_cs_nodeid) {
int nodeid = crm_atoi(search.host, NULL);
crm_node_t *node = crm_get_peer(nodeid, NULL);
if(node) {
search.host = node->uname;
}
}
g_hash_table_foreach(device_list, search_devices, &search);
crm_info("Found %d matching devices for '%s'", g_list_length(search.capable), search.host);
if(g_list_length(search.capable) > 0) {
/* Order based on priority */
search.capable = g_list_sort(search.capable, sort_device_priority);
device = search.capable->data;
if(g_list_length(search.capable) > 1) {
cmd->device_list = search.capable;
cmd->device_next = cmd->device_list->next;
} else {
g_list_free(search.capable);
}
}
}
if(device) {
schedule_stonith_command(cmd, device);
return -EINPROGRESS;
}
free_async_command(cmd);
return -EHOSTUNREACH;
}
xmlNode *stonith_construct_reply(xmlNode *request, char *output, xmlNode *data, int rc)
{
int lpc = 0;
xmlNode *reply = NULL;
const char *name = NULL;
const char *value = NULL;
const char *names[] = {
F_STONITH_OPERATION,
F_STONITH_CALLID,
F_STONITH_CLIENTID,
F_STONITH_CLIENTNAME,
F_STONITH_REMOTE,
F_STONITH_CALLOPTS
};
crm_trace("Creating a basic reply");
reply = create_xml_node(NULL, T_STONITH_REPLY);
crm_xml_add(reply, "st_origin", __FUNCTION__);
crm_xml_add(reply, F_TYPE, T_STONITH_NG);
crm_xml_add(reply, "st_output", output);
crm_xml_add_int(reply, F_STONITH_RC, rc);
CRM_CHECK(request != NULL, crm_warn("Can't create a sane reply"); return reply);
for(lpc = 0; lpc < DIMOF(names); lpc++) {
name = names[lpc];
value = crm_element_value(request, name);
crm_xml_add(reply, name, value);
}
if(data != NULL) {
crm_trace("Attaching reply output");
add_message_xml(reply, F_STONITH_CALLDATA, data);
}
return reply;
}
static xmlNode *
stonith_construct_async_reply(async_command_t *cmd, const char *output, xmlNode *data, int rc)
{
xmlNode *reply = NULL;
crm_trace("Creating a basic reply");
reply = create_xml_node(NULL, T_STONITH_REPLY);
crm_xml_add(reply, "st_origin", __FUNCTION__);
crm_xml_add(reply, F_TYPE, T_STONITH_NG);
crm_xml_add(reply, F_STONITH_OPERATION, cmd->op);
crm_xml_add(reply, F_STONITH_DEVICE, cmd->device);
crm_xml_add(reply, F_STONITH_REMOTE, cmd->remote);
crm_xml_add(reply, F_STONITH_CLIENTID, cmd->client);
crm_xml_add(reply, F_STONITH_CLIENTNAME, cmd->client_name);
crm_xml_add(reply, F_STONITH_TARGET, cmd->victim);
crm_xml_add(reply, F_STONITH_ACTION, cmd->op);
crm_xml_add(reply, F_STONITH_ORIGIN, cmd->origin);
crm_xml_add_int(reply, F_STONITH_CALLID, cmd->id);
crm_xml_add_int(reply, F_STONITH_CALLOPTS, cmd->options);
crm_xml_add_int(reply, F_STONITH_RC, rc);
crm_xml_add(reply, "st_output", output);
if(data != NULL) {
crm_info("Attaching reply output");
add_message_xml(reply, F_STONITH_CALLDATA, data);
}
return reply;
}
void
stonith_command(stonith_client_t *client, uint32_t id, uint32_t flags, xmlNode *request, const char *remote)
{
int call_options = 0;
int rc = -EOPNOTSUPP;
gboolean is_reply = FALSE;
gboolean always_reply = FALSE;
xmlNode *reply = NULL;
xmlNode *data = NULL;
char *output = NULL;
const char *op = crm_element_value(request, F_STONITH_OPERATION);
const char *client_id = crm_element_value(request, F_STONITH_CLIENTID);
crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
if(get_xpath_object("//"T_STONITH_REPLY, request, LOG_DEBUG_3)) {
is_reply = TRUE;
}
crm_debug("Processing %s%s from %s (%16x)", op, is_reply?" reply":"",
client?client->name:remote, call_options);
if(is_set(call_options, st_opt_sync_call)) {
CRM_ASSERT(client == NULL || client->request_id == id);
}
if(crm_str_eq(op, CRM_OP_REGISTER, TRUE)) {
xmlNode *reply = create_xml_node(NULL, "reply");
CRM_ASSERT(client);
crm_xml_add(reply, F_STONITH_OPERATION, CRM_OP_REGISTER);
crm_xml_add(reply, F_STONITH_CLIENTID, client->id);
crm_ipcs_send(client->channel, id, reply, FALSE);
client->request_id = 0;
free_xml(reply);
return;
} else if(crm_str_eq(op, STONITH_OP_EXEC, TRUE)) {
rc = stonith_device_action(request, &output);
} else if (crm_str_eq(op, STONITH_OP_TIMEOUT_UPDATE, TRUE)) {
const char *call_id = crm_element_value(request, F_STONITH_CALLID);
const char *client_id = crm_element_value(request, F_STONITH_CLIENTID);
int op_timeout = 0;
crm_element_value_int(request, F_STONITH_TIMEOUT, &op_timeout);
do_stonith_async_timeout_update(client_id, call_id, op_timeout);
return;
} else if(is_reply && crm_str_eq(op, STONITH_OP_QUERY, TRUE)) {
process_remote_stonith_query(request);
return;
} else if(crm_str_eq(op, STONITH_OP_QUERY, TRUE)) {
create_remote_stonith_op(client_id, request, TRUE); /* Record it for the future notification */
rc = stonith_query(request, &data);
always_reply = TRUE;
if(!data) {
return;
}
} else if(is_reply && crm_str_eq(op, T_STONITH_NOTIFY, TRUE)) {
process_remote_stonith_exec(request);
return;
} else if(is_reply && crm_str_eq(op, STONITH_OP_FENCE, TRUE)) {
/* Reply to a complex fencing op */
process_remote_stonith_exec(request);
return;
} else if(crm_str_eq(op, T_STONITH_NOTIFY, TRUE)) {
const char *flag_name = NULL;
CRM_ASSERT(client);
flag_name = crm_element_value(request, F_STONITH_NOTIFY_ACTIVATE);
if(flag_name) {
crm_debug("Setting %s callbacks for %s (%s): ON",
flag_name, client->name, client->id);
client->flags |= get_stonith_flag(flag_name);
}
flag_name = crm_element_value(request, F_STONITH_NOTIFY_DEACTIVATE);
if(flag_name) {
crm_debug("Setting %s callbacks for %s (%s): off",
flag_name, client->name, client->id);
client->flags |= get_stonith_flag(flag_name);
}
if(flags & crm_ipc_client_response) {
crm_ipcs_send_ack(client->channel, id, "ack", __FUNCTION__, __LINE__);
client->request_id = 0;
}
return;
/* } else if(is_reply && crm_str_eq(op, STONITH_OP_FENCE, TRUE)) { */
/* process_remote_stonith_exec(request); */
/* return; */
} else if(is_reply == FALSE && crm_str_eq(op, STONITH_OP_RELAY, TRUE)) {
xmlNode *dev = get_xpath_object("//@"F_STONITH_TARGET, request, LOG_TRACE);
crm_notice("Peer %s has received a forwarded fencing request from %s to fence (%s) peer %s",
stonith_our_uname,
client ? client->name : remote,
crm_element_value(dev, F_STONITH_ACTION),
crm_element_value(dev, F_STONITH_TARGET));
if(initiate_remote_stonith_op(NULL, request, FALSE) != NULL) {
rc = -EINPROGRESS;
}
} else if(is_reply == FALSE && crm_str_eq(op, STONITH_OP_FENCE, TRUE)) {
if(remote || stand_alone) {
rc = stonith_fence(request);
} else if(call_options & st_opt_manual_ack) {
remote_fencing_op_t *rop = initiate_remote_stonith_op(client, request, TRUE);
rc = stonith_manual_ack(request, rop);
} else {
const char *alternate_host = NULL;
xmlNode *dev = get_xpath_object("//@"F_STONITH_TARGET, request, LOG_TRACE);
const char *target = crm_element_value(dev, F_STONITH_TARGET);
const char *action = crm_element_value(dev, F_STONITH_ACTION);
const char *device = crm_element_value(dev, F_STONITH_DEVICE);
if(client) {
int tolerance = 0;
crm_element_value_int(dev, F_STONITH_TOLERANCE, &tolerance);
crm_notice("Client %s.%.8s wants to fence (%s) '%s' with device '%s'",
client->name, client->id, action, target, device?device:"(any)");
crm_trace("tolerance=%d, remote_op_list=%p", tolerance, remote_op_list);
if(tolerance > 0 && remote_op_list) {
GHashTableIter iter;
time_t now = time(NULL);
remote_fencing_op_t *op = NULL;
g_hash_table_iter_init(&iter, remote_op_list);
while(g_hash_table_iter_next(&iter, NULL, (void**)&op)) {
if (target == NULL || action == NULL) {
continue;
} else if(strcmp(op->target, target) != 0) {
continue;
} else if(op->state != st_done) {
continue;
} else if(strcmp(op->action, action) != 0) {
continue;
} else if((op->completed + tolerance) < now) {
continue;
}
crm_notice("Target %s was fenced (%s) less than %ds ago by %s on behalf of %s",
target, action, tolerance, op->delegate, op->originator);
rc = 0;
goto done;
}
}
} else {
crm_notice("Peer %s wants to fence (%s) '%s' with device '%s'",
remote, action, target, device?device:"(any)");
}
if(g_hash_table_lookup(topology, target) && safe_str_eq(target, stonith_our_uname)) {
GHashTableIter gIter;
crm_node_t *entry = NULL;
int membership = crm_proc_plugin | crm_proc_heartbeat | crm_proc_cpg;
g_hash_table_iter_init(&gIter, crm_peer_cache);
while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
crm_trace("Checking for %s.%d != %s",
entry->uname, entry->id, target);
if(entry->uname
&& (entry->processes & membership)
&& safe_str_neq(entry->uname, target)) {
alternate_host = entry->uname;
break;
}
}
if(alternate_host == NULL) {
crm_err("No alternate host available to handle complex self fencing request");
g_hash_table_iter_init(&gIter, crm_peer_cache);
while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
crm_notice("Peer[%d] %s", entry->id, entry->uname);
}
}
}
if(alternate_host) {
crm_notice("Forwarding complex self fencing request to peer %s", alternate_host);
crm_xml_add(request, F_STONITH_OPERATION, STONITH_OP_RELAY);
crm_xml_add(request, F_STONITH_CLIENTID, client->id);
- send_cluster_message(alternate_host, crm_msg_stonith_ng, request, FALSE);
+ send_cluster_message(crm_get_peer(0, alternate_host), crm_msg_stonith_ng, request, FALSE);
rc = -EINPROGRESS;
} else if(initiate_remote_stonith_op(client, request, FALSE) != NULL) {
rc = -EINPROGRESS;
}
}
} else if (crm_str_eq(op, STONITH_OP_FENCE_HISTORY, TRUE)) {
rc = stonith_fence_history(request, &data);
always_reply = TRUE;
} else if(crm_str_eq(op, CRM_OP_REGISTER, TRUE)) {
return;
} else if(crm_str_eq(op, STONITH_OP_DEVICE_ADD, TRUE)) {
const char *id = NULL;
xmlNode *notify_data = create_xml_node(NULL, op);
rc = stonith_device_register(request, &id);
crm_xml_add(notify_data, F_STONITH_DEVICE, id);
crm_xml_add_int(notify_data, F_STONITH_ACTIVE, g_hash_table_size(device_list));
do_stonith_notify(call_options, op, rc, notify_data, NULL);
free_xml(notify_data);
} else if(crm_str_eq(op, STONITH_OP_DEVICE_DEL, TRUE)) {
const char *id = NULL;
xmlNode *notify_data = create_xml_node(NULL, op);
rc = stonith_device_remove(request, &id);
crm_xml_add(notify_data, F_STONITH_DEVICE, id);
crm_xml_add_int(notify_data, F_STONITH_ACTIVE, g_hash_table_size(device_list));
do_stonith_notify(call_options, op, rc, notify_data, NULL);
free_xml(notify_data);
} else if(crm_str_eq(op, STONITH_OP_LEVEL_ADD, TRUE)) {
char *id = NULL;
xmlNode *notify_data = create_xml_node(NULL, op);
rc = stonith_level_register(request, &id);
crm_xml_add(notify_data, F_STONITH_DEVICE, id);
crm_xml_add_int(notify_data, F_STONITH_ACTIVE, g_hash_table_size(topology));
do_stonith_notify(call_options, op, rc, notify_data, NULL);
free_xml(notify_data);
} else if(crm_str_eq(op, STONITH_OP_LEVEL_DEL, TRUE)) {
char *id = NULL;
xmlNode *notify_data = create_xml_node(NULL, op);
rc = stonith_level_remove(request, &id);
crm_xml_add(notify_data, F_STONITH_DEVICE, id);
crm_xml_add_int(notify_data, F_STONITH_ACTIVE, g_hash_table_size(topology));
do_stonith_notify(call_options, op, rc, notify_data, NULL);
free_xml(notify_data);
} else if(crm_str_eq(op, STONITH_OP_CONFIRM, TRUE)) {
async_command_t *cmd = create_async_command(request);
xmlNode *reply = stonith_construct_async_reply(cmd, NULL, NULL, 0);
crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY);
crm_notice("Broadcasting manual fencing confirmation for node %s", cmd->victim);
send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE);
free_async_command(cmd);
free_xml(reply);
} else {
crm_err("Unknown %s%s from %s", op, is_reply?" reply":"",
client?client->name:remote);
crm_log_xml_warn(request, "UnknownOp");
}
done:
do_crm_log_unlikely(rc>0?LOG_DEBUG:LOG_INFO,"Processed %s%s from %s: %s (%d)", op, is_reply?" reply":"",
client?client->name:remote, rc>0?"":pcmk_strerror(rc), rc);
if(is_reply || rc == -EINPROGRESS) {
/* Nothing (yet) */
} else if(remote) {
reply = stonith_construct_reply(request, output, data, rc);
- send_cluster_message(remote, crm_msg_stonith_ng, reply, FALSE);
+ send_cluster_message(crm_get_peer(0, remote), crm_msg_stonith_ng, reply, FALSE);
free_xml(reply);
} else if(rc <= pcmk_ok || always_reply) {
reply = stonith_construct_reply(request, output, data, rc);
do_local_reply(reply, client_id, call_options & st_opt_sync_call, remote!=NULL);
free_xml(reply);
}
free(output);
free_xml(data);
}
diff --git a/fencing/remote.c b/fencing/remote.c
index 4d423fcc27..8833777721 100644
--- a/fencing/remote.c
+++ b/fencing/remote.c
@@ -1,800 +1,800 @@
/*
* Copyright (C) 2009 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <unistd.h>
#include <sys/utsname.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <ctype.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/ipc.h>
#include <crm/cluster/internal.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <crm/common/xml.h>
#include <crm/common/util.h>
#include <internal.h>
typedef struct st_query_result_s
{
char *host;
int devices;
GListPtr device_list;
GHashTable *custom_action_timeouts;
} st_query_result_t;
GHashTable *remote_op_list = NULL;
void call_remote_stonith(remote_fencing_op_t *op, st_query_result_t *peer);
extern xmlNode *stonith_create_op(
int call_id, const char *token, const char *op, xmlNode *data, int call_options);
static void free_remote_query(gpointer data)
{
if(data) {
st_query_result_t *query = data;
crm_trace("Free'ing query result from %s", query->host);
free(query->host);
g_hash_table_destroy(query->custom_action_timeouts);
free(query);
}
}
static void free_remote_op(gpointer data)
{
remote_fencing_op_t *op = data;
crm_trace("Free'ing op %s for %s", op->id, op->target);
crm_log_xml_debug(op->request, "Destroying");
free(op->id);
free(op->action);
free(op->target);
free(op->client_id);
free(op->client_name);
free(op->originator);
if(op->query_timer) {
g_source_remove(op->query_timer);
}
if(op->op_timer) {
g_source_remove(op->op_timer);
}
if(op->query_results) {
g_list_free_full(op->query_results, free_remote_query);
}
if(op->request) {
free_xml(op->request);
op->request = NULL;
}
free(op);
}
static void remote_op_done(remote_fencing_op_t *op, xmlNode *data, int rc)
{
xmlNode *reply = NULL;
xmlNode *local_data = NULL;
xmlNode *notify_data = NULL;
op->completed = time(NULL);
if(op->query_timer) {
g_source_remove(op->query_timer);
op->query_timer = 0;
}
if(op->op_timer) {
g_source_remove(op->op_timer);
op->op_timer = 0;
}
if(data == NULL) {
data = create_xml_node(NULL, "remote-op");
local_data = data;
} else {
op->delegate = crm_element_value_copy(data, F_ORIG);
}
crm_xml_add_int(data, "state", op->state);
crm_xml_add(data, F_STONITH_TARGET, op->target);
crm_xml_add(data, F_STONITH_OPERATION, op->action);
if(op->request != NULL) {
reply = stonith_construct_reply(op->request, NULL, data, rc);
crm_xml_add(reply, F_STONITH_DELEGATE, op->delegate);
do_crm_log(rc==pcmk_ok?LOG_NOTICE:LOG_ERR,
"Operation %s of %s by %s for %s@%s: %s",
op->action, op->target, op->delegate?op->delegate:"<no-one>",
op->client_name, op->originator, pcmk_strerror(rc));
} else {
crm_err("Already sent notifications for '%s of %s by %s' (op=%s, for=%s@%s, state=%d): %s",
op->action, op->target, op->delegate, op->id, op->client_name, op->originator,
op->state, pcmk_strerror(rc));
return;
}
if(reply) {
do_local_reply(reply, op->client_id, op->call_options & st_opt_sync_call, FALSE);
}
/* Do notification with a clean data object */
notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE);
crm_xml_add_int(notify_data, "state", op->state);
crm_xml_add_int(notify_data, F_STONITH_RC, rc);
crm_xml_add(notify_data, F_STONITH_TARGET, op->target);
crm_xml_add(notify_data, F_STONITH_ACTION, op->action);
crm_xml_add(notify_data, F_STONITH_DELEGATE, op->delegate);
crm_xml_add(notify_data, F_STONITH_REMOTE, op->id);
crm_xml_add(notify_data, F_STONITH_ORIGIN, op->originator);
crm_xml_add(notify_data, F_STONITH_CLIENTID, op->client_id);
crm_xml_add(notify_data, F_STONITH_CLIENTNAME, op->client_name);
do_stonith_notify(0, T_STONITH_NOTIFY_FENCE, rc, notify_data, NULL);
free_xml(notify_data);
free_xml(local_data);
free_xml(reply);
/* Free non-essential parts of the record
* Keep the record around so we can query the history
*/
if(op->query_results) {
g_list_free_full(op->query_results, free_remote_query);
op->query_results = NULL;
}
if(op->request) {
free_xml(op->request);
op->request = NULL;
}
}
static gboolean remote_op_timeout(gpointer userdata)
{
remote_fencing_op_t *op = userdata;
op->query_timer = 0;
if(op->state == st_done) {
crm_debug("Action %s (%s) for %s already completed", op->action, op->id, op->target);
return FALSE;
}
crm_debug("Action %s (%s) for %s timed out", op->action, op->id, op->target);
remote_op_done(op, NULL, -ETIME);
op->state = st_failed;
return FALSE;
}
static gboolean remote_op_query_timeout(gpointer data)
{
remote_fencing_op_t *op = data;
op->query_timer = 0;
if(op->state == st_done) {
crm_debug("Operation %s for %s already completed", op->id, op->target);
} else if(op->state == st_exec) {
crm_debug("Operation %s for %s already in progress", op->id, op->target);
} else if(op->query_results) {
crm_debug("Query %s for %s complete: %d", op->id, op->target, op->state);
call_remote_stonith(op, NULL);
} else {
if(op->op_timer) {
g_source_remove(op->op_timer);
op->op_timer = 0;
}
remote_op_timeout(op);
}
return FALSE;
}
static int stonith_topology_next(remote_fencing_op_t *op)
{
stonith_topology_t *tp = NULL;
if(op->target) {
/* Queries don't have a target set */
tp = g_hash_table_lookup(topology, op->target);
}
if(tp == NULL) {
return pcmk_ok;
}
set_bit(op->call_options, st_opt_topology);
do {
op->level++;
} while(op->level < ST_LEVEL_MAX && tp->levels[op->level] == NULL);
if(op->level < ST_LEVEL_MAX) {
crm_trace("Attempting fencing level %d for %s (%d devices) - %s@%s",
op->level, op->target, g_list_length(tp->levels[op->level]),
op->client_name, op->originator);
op->devices = tp->levels[op->level];
return pcmk_ok;
}
crm_notice("All fencing options to fence %s for %s@%s failed",
op->target, op->client_name, op->originator);
return -EINVAL;
}
void *create_remote_stonith_op(const char *client, xmlNode *request, gboolean peer)
{
remote_fencing_op_t *op = NULL;
xmlNode *dev = get_xpath_object("//@"F_STONITH_TARGET, request, LOG_TRACE);
if(remote_op_list == NULL) {
remote_op_list = g_hash_table_new_full(
crm_str_hash, g_str_equal, NULL, free_remote_op);
}
if(peer && dev) {
const char *peer_id = crm_element_value(dev, F_STONITH_REMOTE);
CRM_CHECK(peer_id != NULL, return NULL);
op = g_hash_table_lookup(remote_op_list, peer_id);
if(op) {
crm_debug("%s already exists", peer_id);
return op;
}
}
op = calloc(1, sizeof(remote_fencing_op_t));
crm_element_value_int(request, F_STONITH_TIMEOUT, (int*)&(op->base_timeout));
if(peer && dev) {
op->id = crm_element_value_copy(dev, F_STONITH_REMOTE);
crm_trace("Recorded new stonith op: %s", op->id);
} else {
op->id = crm_generate_uuid();
crm_trace("Generated new stonith op: %s", op->id);
}
g_hash_table_replace(remote_op_list, op->id, op);
CRM_LOG_ASSERT(g_hash_table_lookup(remote_op_list, op->id) != NULL);
op->state = st_query;
op->action = crm_element_value_copy(dev, F_STONITH_ACTION);
op->originator = crm_element_value_copy(dev, F_STONITH_ORIGIN);
if(op->originator == NULL) {
/* Local request */
op->originator = strdup(stonith_our_uname);
}
if(client) {
op->client_id = strdup(client);
}
op->client_name = crm_element_value_copy(request, F_STONITH_CLIENTNAME);
op->target = crm_element_value_copy(dev, F_STONITH_TARGET);
op->request = copy_xml(request); /* TODO: Figure out how to avoid this */
crm_element_value_int(request, F_STONITH_CALLOPTS, (int*)&(op->call_options));
if(op->call_options & st_opt_cs_nodeid) {
int nodeid = crm_atoi(op->target, NULL);
crm_node_t *node = crm_get_peer(nodeid, NULL);
/* Ensure the conversion only happens once */
op->call_options &= ~st_opt_cs_nodeid;
if(node && node->uname) {
free(op->target);
op->target = strdup(node->uname);
} else {
crm_warn("Could not expand nodeid '%s' into a host name (%p)", op->target, node);
}
}
if(stonith_topology_next(op) != pcmk_ok) {
op->state = st_failed;
}
return op;
}
remote_fencing_op_t *initiate_remote_stonith_op(stonith_client_t *client, xmlNode *request, gboolean manual_ack)
{
xmlNode *query = NULL;
const char *client_id = NULL;
remote_fencing_op_t *op = NULL;
if(client) {
client_id = client->id;
} else {
client_id = crm_element_value(request, F_STONITH_CLIENTID);
}
CRM_LOG_ASSERT(client_id != NULL);
op = create_remote_stonith_op(client_id, request, FALSE);
query = stonith_create_op(0, op->id, STONITH_OP_QUERY, NULL, 0);
if(!manual_ack) {
op->query_timer = g_timeout_add(100*op->base_timeout, remote_op_query_timeout, op);
} else {
crm_xml_add(query, F_STONITH_DEVICE, "manual_ack");
}
crm_xml_add(query, F_STONITH_REMOTE, op->id);
crm_xml_add(query, F_STONITH_TARGET, op->target);
crm_xml_add(query, F_STONITH_ACTION, op->action);
crm_xml_add(query, F_STONITH_ORIGIN, op->originator);
crm_xml_add(query, F_STONITH_CLIENTID, op->client_id);
crm_xml_add(query, F_STONITH_CLIENTNAME, op->client_name);
crm_xml_add_int(query, F_STONITH_TIMEOUT, op->base_timeout);
crm_notice("Initiating remote operation %s for %s: %s", op->action, op->target, op->id);
CRM_CHECK(op->action, return NULL);
send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE);
free_xml(query);
return op;
}
static gint sort_strings(gconstpointer a, gconstpointer b)
{
return strcmp(a, b);
}
static st_query_result_t *stonith_choose_peer(remote_fencing_op_t *op)
{
GListPtr iter = NULL;
do {
if(op->devices) {
crm_trace("Checking for someone to fence %s with %s", op->target, (char*)op->devices->data);
} else {
crm_trace("Checking for someone to fence %s", op->target);
}
for(iter = op->query_results; iter != NULL; iter = iter->next) {
st_query_result_t *peer = iter->data;
if(is_set(op->call_options, st_opt_topology)) {
/* Do they have the next device of the current fencing level? */
GListPtr match = NULL;
if(op->devices) {
match = g_list_find_custom(peer->device_list, op->devices->data, sort_strings);
}
if(match) {
crm_trace("Removing %s from %s (%d remaining)", (char*)match->data, peer->host, g_list_length(peer->device_list));
peer->device_list = g_list_remove(peer->device_list, match->data);
return peer;
}
} else if(peer && peer->devices > 0) {
/* No topology: Use the current best peer */
crm_trace("Simple fencing");
return peer;
}
}
/* Try the next fencing level if there is one */
} while(is_set(op->call_options, st_opt_topology)
&& stonith_topology_next(op) == pcmk_ok);
if(op->devices) {
crm_trace("Couldn't find anyone to fence %s with %s", op->target, (char*)op->devices->data);
} else {
crm_trace("Couldn't find anyone to fence %s", op->target);
}
return NULL;
}
static int
get_device_timeout(st_query_result_t *peer, const char *device, int default_timeout)
{
gpointer res;
if (!peer || !device) {
return default_timeout;
}
res = g_hash_table_lookup(peer->custom_action_timeouts, device);
return res ? GPOINTER_TO_INT(res) : default_timeout;
}
static int
get_op_total_timeout(remote_fencing_op_t *op, st_query_result_t *chosen_peer, int default_timeout)
{
stonith_topology_t *tp = g_hash_table_lookup(topology, op->target);
int total_timeout = 0;
if (is_set(op->call_options, st_opt_topology) && tp) {
int i;
GListPtr device_list = NULL;
GListPtr iter = NULL;
/* Yep, this looks scary, nested loops all over the place.
* Here is what is going on.
* Loop1: Iterate through fencing levels.
* Loop2: If a fencing level has devices, loop through each device
* Loop3: For each device in a fencing level, see what peer owns it
* and what that peer has reported the timeout is for the device.
*/
for (i = 0; i < ST_LEVEL_MAX; i++) {
if (!tp->levels[i]) {
continue;
}
for (device_list = tp->levels[i]; device_list; device_list = device_list->next) {
for(iter = op->query_results; iter != NULL; iter = iter->next) {
st_query_result_t *peer = iter->data;
if (g_list_find_custom(peer->device_list, device_list->data, sort_strings)) {
total_timeout += get_device_timeout(chosen_peer, device_list->data, default_timeout);
break;
}
} /* End Loop3: match device with peer that owns device, find device's timeout period */
} /* End Loop2: iterate through devices at a specific level */
} /*End Loop1: iterate through fencing levels */
} else if (chosen_peer) {
GListPtr cur = NULL;
for (cur = chosen_peer->device_list; cur; cur = cur->next) {
total_timeout += get_device_timeout(chosen_peer, cur->data, default_timeout);
}
} else {
total_timeout = default_timeout;
}
return total_timeout ? total_timeout : default_timeout;
}
static void
report_timeout_period(remote_fencing_op_t *op, int op_timeout)
{
xmlNode *update = NULL;
const char *client_node = NULL;
const char *client_id = NULL;
const char *call_id = NULL;
if (op->call_options & st_opt_sync_call) {
/* There is no reason to report the timeout for a syncronous call. It
* is impossible to use the reported timeout to do anything when the client
* is blocking for the response. This update is only important for
* async calls that require a callback to report the results in. */
return;
} else if (!op->request) {
return;
}
client_node = crm_element_value(op->request, F_STONITH_CLIENTNODE);
call_id = crm_element_value(op->request, F_STONITH_CALLID);
client_id = crm_element_value(op->request, F_STONITH_CLIENTID);
if (!client_node || !call_id || !client_id) {
return;
}
if (safe_str_eq(client_node, stonith_our_uname)) {
/* The client is connected to this node, send the update direclty to them */
do_stonith_async_timeout_update(client_id, call_id, op_timeout);
return;
}
/* The client is connected to another node, relay this update to them */
update = stonith_create_op(0, op->id, STONITH_OP_TIMEOUT_UPDATE, NULL, 0);
crm_xml_add(update, F_STONITH_REMOTE, op->id);
crm_xml_add(update, F_STONITH_CLIENTID, client_id);
crm_xml_add(update, F_STONITH_CALLID, call_id);
crm_xml_add_int(update, F_STONITH_TIMEOUT, op_timeout);
- send_cluster_message(client_node, crm_msg_stonith_ng, update, FALSE);
+ send_cluster_message(crm_get_peer(0, client_node), crm_msg_stonith_ng, update, FALSE);
free_xml(update);
}
void call_remote_stonith(remote_fencing_op_t *op, st_query_result_t *peer)
{
const char *device = NULL;
int timeout = op->base_timeout;
if(peer == NULL && !is_set(op->call_options, st_opt_topology)) {
peer = stonith_choose_peer(op);
}
if(!op->op_timer) {
int op_timeout = get_op_total_timeout(op, peer, op->base_timeout);
op->op_timer = g_timeout_add((1200 * op_timeout), remote_op_timeout, op);
report_timeout_period(op, op_timeout);
crm_info("Total remote op timeout set to %d for fencing of node %s", op_timeout, op->target);
}
if(is_set(op->call_options, st_opt_topology)) {
/* Ignore any preference, they might not have the device we need */
/* When using topology, the stonith_choose_peer function pops off
* the peer from the op's query results. Make sure to calculate
* the op_timeout before calling this function when topology is in use */
peer = stonith_choose_peer(op);
device = op->devices->data;
timeout = get_device_timeout(peer, device, timeout);
}
if(peer) {
xmlNode *query = stonith_create_op(0, op->id, STONITH_OP_FENCE, NULL, 0);
crm_xml_add(query, F_STONITH_REMOTE, op->id);
crm_xml_add(query, F_STONITH_TARGET, op->target);
crm_xml_add(query, F_STONITH_ACTION, op->action);
crm_xml_add(query, F_STONITH_ORIGIN, op->originator);
crm_xml_add(query, F_STONITH_CLIENTID, op->client_id);
crm_xml_add(query, F_STONITH_CLIENTNAME, op->client_name);
crm_xml_add_int(query, F_STONITH_TIMEOUT, timeout);
if(device) {
crm_info("Requesting that %s perform op %s %s with %s", peer->host, op->action, op->target, device);
crm_xml_add(query, F_STONITH_DEVICE, device);
crm_xml_add(query, F_STONITH_MODE, "slave");
} else {
crm_info("Requesting that %s perform op %s %s", peer->host, op->action, op->target);
crm_xml_add(query, F_STONITH_MODE, "smart");
}
op->state = st_exec;
- send_cluster_message(peer->host, crm_msg_stonith_ng, query, FALSE);
+ send_cluster_message(crm_get_peer(0, peer->host), crm_msg_stonith_ng, query, FALSE);
free_xml(query);
return;
} else if(op->query_timer == 0) {
/* We've exhausted all available peers */
crm_info("No remaining peers capable of terminating %s", op->target);
remote_op_timeout(op);
} else if(device) {
crm_info("Waiting for additional peers capable of terminating %s with %s", op->target, device);
} else {
crm_info("Waiting for additional peers capable of terminating %s", op->target);
}
free_remote_query(peer);
}
static gint sort_peers(gconstpointer a, gconstpointer b)
{
const st_query_result_t *peer_a = a;
const st_query_result_t *peer_b = a;
if(peer_a->devices > peer_b->devices) {
return -1;
} else if(peer_a->devices > peer_b->devices) {
return 1;
}
return 0;
}
int process_remote_stonith_query(xmlNode *msg)
{
int devices = 0;
const char *id = NULL;
const char *host = NULL;
remote_fencing_op_t *op = NULL;
st_query_result_t *result = NULL;
xmlNode *dev = get_xpath_object("//@"F_STONITH_REMOTE, msg, LOG_ERR);
xmlNode *child = NULL;
CRM_CHECK(dev != NULL, return -EPROTO);
id = crm_element_value(dev, F_STONITH_REMOTE);
CRM_CHECK(id != NULL, return -EPROTO);
dev = get_xpath_object("//@st-available-devices", msg, LOG_ERR);
CRM_CHECK(dev != NULL, return -EPROTO);
crm_element_value_int(dev, "st-available-devices", &devices);
op = g_hash_table_lookup(remote_op_list, id);
if(op == NULL) {
crm_debug("Unknown or expired remote op: %s", id);
return -EOPNOTSUPP;
}
op->replies++;
host = crm_element_value(msg, F_ORIG);
if(devices <= 0) {
/* If we're doing 'known' then we might need to fire anyway */
crm_trace("Query result from %s (%d devices)", host, devices);
return pcmk_ok;
} else if(op->call_options & st_opt_allow_suicide) {
crm_trace("Allowing %s to potentialy fence itself", op->target);
} else if(safe_str_eq(host, op->target)) {
crm_info("Ignoring reply from %s, hosts are not permitted to commit suicide", op->target);
return pcmk_ok;
}
crm_debug("Query result from %s (%d devices)", host, devices);
result = calloc(1, sizeof(st_query_result_t));
result->host = strdup(host);
result->devices = devices;
result->custom_action_timeouts = g_hash_table_new_full(
crm_str_hash, g_str_equal, free, NULL);
for (child = __xml_first_child(dev); child != NULL; child = __xml_next(child)) {
const char *device = ID(child);
int action_timeout = 0;
if(device) {
result->device_list = g_list_prepend(result->device_list, strdup(device));
crm_element_value_int(child, F_STONITH_ACTION_TIMEOUT, &action_timeout);
if (action_timeout) {
crm_trace("Peer %s with device %s returned action timeout %d",
result->host, device, action_timeout);
g_hash_table_insert(result->custom_action_timeouts,
strdup(device),
GINT_TO_POINTER(action_timeout));
}
}
}
CRM_CHECK(devices == g_list_length(result->device_list),
crm_err("Mis-match: Query claimed to have %d devices but %d found", devices, g_list_length(result->device_list)));
op->query_results = g_list_insert_sorted(op->query_results, result, sort_peers);
if(op->state == st_query && is_set(op->call_options, st_opt_all_replies) == FALSE) {
call_remote_stonith(op, result);
} else if(op->state == st_done) {
crm_info("Discarding query result from %s (%d devices): Operation is in state %d",
result->host, result->devices, op->state);
}
return pcmk_ok;
}
int process_remote_stonith_exec(xmlNode *msg)
{
int rc = 0;
const char *id = NULL;
remote_fencing_op_t *op = NULL;
xmlNode *dev = get_xpath_object("//@"F_STONITH_REMOTE, msg, LOG_ERR);
CRM_CHECK(dev != NULL, return -EPROTO);
id = crm_element_value(dev, F_STONITH_REMOTE);
CRM_CHECK(id != NULL, return -EPROTO);
dev = get_xpath_object("//@"F_STONITH_RC, msg, LOG_ERR);
CRM_CHECK(dev != NULL, return -EPROTO);
crm_element_value_int(dev, F_STONITH_RC, &rc);
if(remote_op_list) {
op = g_hash_table_lookup(remote_op_list, id);
}
if(op == NULL && rc == pcmk_ok) {
/* Record successful fencing operations */
const char *client_id = crm_element_value(msg, F_STONITH_CLIENTID);
op = create_remote_stonith_op(client_id, msg, TRUE);
}
if(op == NULL) {
/* Could be for an event that began before we started */
/* TODO: Record the op for later querying */
crm_info("Unknown or expired remote op: %s", id);
return -EOPNOTSUPP;
}
if(is_set(op->call_options, st_opt_topology)) {
const char *device = crm_element_value(msg, F_STONITH_DEVICE);
crm_notice("Call to %s for %s on behalf of %s@%s: %s (%d)",
device, op->target, op->client_name, op->originator,
rc == pcmk_ok?"passed":"failed", rc);
if(safe_str_eq(op->originator, stonith_our_uname)) {
if(op->state == st_done) {
remote_op_done(op, msg, rc);
return rc;
} else if(rc == pcmk_ok && op->devices) {
/* Success, are there any more? */
op->devices = op->devices->next;
}
if(op->devices == NULL) {
crm_trace("Broadcasting completion of complex fencing op for %s", op->target);
send_cluster_message(NULL, crm_msg_stonith_ng, msg, FALSE);
op->state = st_done;
return rc;
}
} else {
op->state = st_done;
remote_op_done(op, msg, rc);
return rc;
}
} else if(rc == pcmk_ok && op->devices == NULL) {
crm_trace("All done for %s", op->target);
op->state = st_done;
remote_op_done(op, msg, rc);
return rc;
}
/* Retry on failure or execute the rest of the topology */
crm_trace("Next for %s on behalf of %s@%s (rc was %d)", op->target, op->originator, op->client_name, rc);
call_remote_stonith(op, NULL);
return rc;
}
int stonith_fence_history(xmlNode *msg, xmlNode **output)
{
int rc = 0;
const char *target = NULL;
xmlNode *dev = get_xpath_object("//@"F_STONITH_TARGET, msg, LOG_TRACE);
if(dev) {
int options = 0;
target = crm_element_value(dev, F_STONITH_TARGET);
crm_element_value_int(msg, F_STONITH_CALLOPTS, &options);
if(target && (options & st_opt_cs_nodeid)) {
int nodeid = crm_atoi(target, NULL);
crm_node_t *node = crm_get_peer(nodeid, NULL);
if(node) {
target = node->uname;
}
}
}
*output = create_xml_node(NULL, F_STONITH_HISTORY_LIST);
if (remote_op_list) {
GHashTableIter iter;
remote_fencing_op_t *op = NULL;
g_hash_table_iter_init(&iter, remote_op_list);
while(g_hash_table_iter_next(&iter, NULL, (void**)&op)) {
xmlNode *entry = NULL;
if (target && strcmp(op->target, target) != 0) {
continue;
}
rc = 0;
entry = create_xml_node(*output, STONITH_OP_EXEC);
crm_xml_add(entry, F_STONITH_TARGET, op->target);
crm_xml_add(entry, F_STONITH_ACTION, op->action);
crm_xml_add(entry, F_STONITH_ORIGIN, op->originator);
crm_xml_add(entry, F_STONITH_DELEGATE, op->delegate);
crm_xml_add_int(entry, F_STONITH_DATE, op->completed);
crm_xml_add_int(entry, F_STONITH_STATE, op->state);
}
}
return rc;
}
diff --git a/include/crm/cluster.h b/include/crm/cluster.h
index 546ab9623f..2c4fdf99e8 100644
--- a/include/crm/cluster.h
+++ b/include/crm/cluster.h
@@ -1,150 +1,150 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef CRM_COMMON_CLUSTER__H
# define CRM_COMMON_CLUSTER__H
# include <crm/common/xml.h>
# include <crm/common/util.h>
# if SUPPORT_HEARTBEAT
# include <heartbeat/hb_api.h>
# include <ocf/oc_event.h>
# endif
extern gboolean crm_have_quorum;
extern GHashTable *crm_peer_cache;
extern GHashTable *crm_peer_id_cache;
extern unsigned long long crm_peer_seq;
# ifndef CRM_SERVICE
# define CRM_SERVICE PCMK_SERVICE_ID
# endif
/* *INDENT-OFF* */
#define CRM_NODE_LOST "lost"
#define CRM_NODE_MEMBER "member"
#define CRM_NODE_ACTIVE CRM_NODE_MEMBER
#define CRM_NODE_EVICTED "evicted"
/* *INDENT-ON* */
typedef struct crm_peer_node_s {
uint32_t id; /* Only used by corosync derivatives */
uint64_t born; /* Only used by heartbeat and the legacy plugin */
uint64_t last_seen;
int32_t votes; /* Only used by the legacy plugin */
uint32_t processes;
char *uname;
char *uuid;
char *state;
char *expected;
char *addr; /* Only used by the legacy plugin */
char *version;/* Unused */
} crm_node_t;
void crm_peer_init(void);
void crm_peer_destroy(void);
char *get_corosync_uuid(uint32_t id, const char *uname);
const char *get_node_uuid(uint32_t id, const char *uname);
int get_corosync_id(int id, const char *uuid);
typedef struct crm_cluster_s
{
char *uuid;
char *uname;
uint32_t nodeid;
#if SUPPORT_HEARTBEAT
ll_cluster_t *hb_conn;
void (*hb_dispatch)(HA_Message *msg, void *private);
#endif
gboolean (*cs_dispatch) (int kind, const char *from, const char *data);
void (*destroy) (gpointer);
} crm_cluster_t;
gboolean crm_cluster_connect(crm_cluster_t *cluster);
enum crm_ais_msg_types;
-gboolean send_cluster_message(const char *node, enum crm_ais_msg_types service,
+gboolean send_cluster_message(crm_node_t *node, enum crm_ais_msg_types service,
xmlNode * data, gboolean ordered);
void destroy_crm_node(gpointer/* crm_node_t* */ data);
crm_node_t *crm_get_peer(unsigned int id, const char *uname);
guint crm_active_peers(void);
gboolean crm_is_peer_active(const crm_node_t * node);
guint reap_crm_member(uint32_t id);
int crm_terminate_member(int nodeid, const char *uname, void* unused);
int crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection);
gboolean crm_get_cluster_name(char **cname);
# if SUPPORT_HEARTBEAT
gboolean crm_is_heartbeat_peer_active(const crm_node_t * node);
# endif
# if SUPPORT_COROSYNC
extern int ais_fd_sync;
gboolean crm_is_corosync_peer_active(const crm_node_t * node);
gboolean send_ais_text(int class, const char *data, gboolean local,
- const char *node, enum crm_ais_msg_types dest);
+ crm_node_t *node, enum crm_ais_msg_types dest);
gboolean get_ais_nodeid(uint32_t * id, char **uname);
# endif
void empty_uuid_cache(void);
const char *get_uuid(const char *uname);
const char *get_uname(const char *uuid);
void set_uuid(xmlNode * node, const char *attr, const char *uname);
void unget_uuid(const char *uname);
enum crm_status_type {
crm_status_uname,
crm_status_nstate,
crm_status_processes,
};
enum crm_ais_msg_types text2msg_type(const char *text);
void crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *));
/* *INDENT-OFF* */
enum cluster_type_e
{
pcmk_cluster_unknown = 0x0001,
pcmk_cluster_invalid = 0x0002,
pcmk_cluster_heartbeat = 0x0004,
pcmk_cluster_classic_ais = 0x0010,
pcmk_cluster_corosync = 0x0020,
pcmk_cluster_cman = 0x0040,
};
/* *INDENT-ON* */
enum cluster_type_e get_cluster_type(void);
const char *name_for_cluster_type(enum cluster_type_e type);
gboolean is_corosync_cluster(void);
gboolean is_cman_cluster(void);
gboolean is_openais_cluster(void);
gboolean is_classic_ais_cluster(void);
gboolean is_heartbeat_cluster(void);
#endif
diff --git a/include/crm/cluster/internal.h b/include/crm/cluster/internal.h
index 14afa04fc1..01cc69131e 100644
--- a/include/crm/cluster/internal.h
+++ b/include/crm/cluster/internal.h
@@ -1,407 +1,407 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef CRM_CLUSTER_INTERNAL__H
# define CRM_CLUSTER_INTERNAL__H
# include <crm/cluster.h>
# define AIS_IPC_NAME "ais-crm-ipc"
# define AIS_IPC_MESSAGE_SIZE 8192*128
# define CRM_MESSAGE_IPC_ACK 0
typedef struct crm_ais_host_s AIS_Host;
typedef struct crm_ais_msg_s AIS_Message;
enum crm_ais_msg_class {
crm_class_cluster = 0,
crm_class_members = 1,
crm_class_notify = 2,
crm_class_nodeid = 3,
crm_class_rmpeer = 4,
crm_class_quorum = 5,
};
/* order here matters - its used to index into the crm_children array */
enum crm_ais_msg_types {
crm_msg_none = 0,
crm_msg_ais = 1,
crm_msg_lrmd = 2,
crm_msg_cib = 3,
crm_msg_crmd = 4,
crm_msg_attrd = 5,
crm_msg_stonithd = 6,
crm_msg_te = 7,
crm_msg_pe = 8,
crm_msg_stonith_ng = 9,
};
struct crm_ais_host_s {
uint32_t id;
uint32_t pid;
gboolean local;
enum crm_ais_msg_types type;
uint32_t size;
char uname[MAX_NAME];
} __attribute__ ((packed));
struct crm_ais_msg_s {
cs_ipc_header_response_t header __attribute__ ((aligned(8)));
uint32_t id;
gboolean is_compressed;
AIS_Host host;
AIS_Host sender;
uint32_t size;
uint32_t compressed_size;
/* 584 bytes */
char data[0];
} __attribute__ ((packed));
struct crm_ais_nodeid_resp_s {
cs_ipc_header_response_t header __attribute__ ((aligned(8)));
uint32_t id;
uint32_t counter;
char uname[MAX_NAME];
char cname[MAX_NAME];
} __attribute__ ((packed));
struct crm_ais_quorum_resp_s {
cs_ipc_header_response_t header __attribute__ ((aligned(8)));
uint64_t id;
uint32_t votes;
uint32_t expected_votes;
uint32_t quorate;
} __attribute__ ((packed));
enum crm_proc_flag {
crm_proc_none = 0x00000001,
/* These values are sent over the network by the legacy plugin
* Therefor changing any of these values is going to break compatability
*
* So don't
*/
/* 3 messaging types */
crm_proc_heartbeat = 0x01000000,
crm_proc_plugin = 0x00000002,
crm_proc_cpg = 0x04000000,
crm_proc_lrmd = 0x00000010,
crm_proc_cib = 0x00000100,
crm_proc_crmd = 0x00000200,
crm_proc_attrd = 0x00001000,
crm_proc_stonithd = 0x00002000,
crm_proc_stonith_ng= 0x00100000,
crm_proc_pe = 0x00010000,
crm_proc_te = 0x00020000,
crm_proc_mgmtd = 0x00040000,
};
static inline const char *
peer2text(enum crm_proc_flag proc)
{
const char *text = "unknown";
if( proc == (crm_proc_cpg|crm_proc_crmd) ) {
return "peer";
}
switch (proc) {
case crm_proc_none:
text = "none";
break;
case crm_proc_plugin:
text = "ais";
break;
case crm_proc_heartbeat:
text = "heartbeat";
break;
case crm_proc_cib:
text = "cib";
break;
case crm_proc_crmd:
text = "crmd";
break;
case crm_proc_pe:
text = "pengine";
break;
case crm_proc_te:
text = "tengine";
break;
case crm_proc_lrmd:
text = "lrmd";
break;
case crm_proc_attrd:
text = "attrd";
break;
case crm_proc_stonithd:
text = "stonithd";
break;
case crm_proc_stonith_ng:
text = "stonith-ng";
break;
case crm_proc_mgmtd:
text = "mgmtd";
break;
case crm_proc_cpg:
text = "corosync-cpg";
break;
}
return text;
}
static inline enum crm_proc_flag
text2proc(const char *proc)
{
/* We only care about these two so far */
if(proc && strcmp(proc, "cib") == 0) {
return crm_proc_cib;
} else if(proc && strcmp(proc, "crmd") == 0) {
return crm_proc_crmd;
}
return crm_proc_none;
}
static inline const char *
ais_dest(const struct crm_ais_host_s *host)
{
if (host->local) {
return "local";
} else if (host->size > 0) {
return host->uname;
} else {
return "<all>";
}
}
# define ais_data_len(msg) (msg->is_compressed?msg->compressed_size:msg->size)
static inline AIS_Message *
ais_msg_copy(const AIS_Message * source)
{
AIS_Message *target = malloc(sizeof(AIS_Message) + ais_data_len(source));
memcpy(target, source, sizeof(AIS_Message));
memcpy(target->data, source->data, ais_data_len(target));
return target;
}
static inline const char *
ais_error2text(int error)
{
const char *text = "unknown";
# if SUPPORT_COROSYNC
switch (error) {
case CS_OK:
text = "None";
break;
case CS_ERR_LIBRARY:
text = "Library error";
break;
case CS_ERR_VERSION:
text = "Version error";
break;
case CS_ERR_INIT:
text = "Initialization error";
break;
case CS_ERR_TIMEOUT:
text = "Timeout";
break;
case CS_ERR_TRY_AGAIN:
text = "Try again";
break;
case CS_ERR_INVALID_PARAM:
text = "Invalid parameter";
break;
case CS_ERR_NO_MEMORY:
text = "No memory";
break;
case CS_ERR_BAD_HANDLE:
text = "Bad handle";
break;
case CS_ERR_BUSY:
text = "Busy";
break;
case CS_ERR_ACCESS:
text = "Access error";
break;
case CS_ERR_NOT_EXIST:
text = "Doesn't exist";
break;
case CS_ERR_NAME_TOO_LONG:
text = "Name too long";
break;
case CS_ERR_EXIST:
text = "Exists";
break;
case CS_ERR_NO_SPACE:
text = "No space";
break;
case CS_ERR_INTERRUPT:
text = "Interrupt";
break;
case CS_ERR_NAME_NOT_FOUND:
text = "Name not found";
break;
case CS_ERR_NO_RESOURCES:
text = "No resources";
break;
case CS_ERR_NOT_SUPPORTED:
text = "Not supported";
break;
case CS_ERR_BAD_OPERATION:
text = "Bad operation";
break;
case CS_ERR_FAILED_OPERATION:
text = "Failed operation";
break;
case CS_ERR_MESSAGE_ERROR:
text = "Message error";
break;
case CS_ERR_QUEUE_FULL:
text = "Queue full";
break;
case CS_ERR_QUEUE_NOT_AVAILABLE:
text = "Queue not available";
break;
case CS_ERR_BAD_FLAGS:
text = "Bad flags";
break;
case CS_ERR_TOO_BIG:
text = "To big";
break;
case CS_ERR_NO_SECTIONS:
text = "No sections";
break;
}
# endif
return text;
}
static inline const char *
msg_type2text(enum crm_ais_msg_types type)
{
const char *text = "unknown";
switch (type) {
case crm_msg_none:
text = "unknown";
break;
case crm_msg_ais:
text = "ais";
break;
case crm_msg_cib:
text = "cib";
break;
case crm_msg_crmd:
text = "crmd";
break;
case crm_msg_pe:
text = "pengine";
break;
case crm_msg_te:
text = "tengine";
break;
case crm_msg_lrmd:
text = "lrmd";
break;
case crm_msg_attrd:
text = "attrd";
break;
case crm_msg_stonithd:
text = "stonithd";
break;
case crm_msg_stonith_ng:
text = "stonith-ng";
break;
}
return text;
}
enum crm_ais_msg_types text2msg_type(const char *text);
char *get_ais_data(const AIS_Message * msg);
gboolean check_message_sanity(const AIS_Message * msg, const char *data);
# if SUPPORT_HEARTBEAT
extern ll_cluster_t *heartbeat_cluster;
gboolean send_ha_message(ll_cluster_t * hb_conn, xmlNode * msg,
const char *node, gboolean force_ordered);
gboolean ha_msg_dispatch(ll_cluster_t * cluster_conn, gpointer user_data);
gboolean register_heartbeat_conn(crm_cluster_t *cluster);
xmlNode *convert_ha_message(xmlNode * parent, HA_Message *msg, const char *field);
gboolean ccm_have_quorum(oc_ed_t event);
const char *ccm_event_name(oc_ed_t event);
crm_node_t *crm_update_ccm_node(const oc_ev_membership_t * oc, int offset, const char *state,
uint64_t seq);
gboolean heartbeat_initialize_nodelist(void *cluster, gboolean force_member, xmlNode *xml_parent);
# endif
# if SUPPORT_COROSYNC
gboolean corosync_initialize_nodelist(void *cluster, gboolean force_member, xmlNode *xml_parent);
gboolean send_ais_message(xmlNode * msg, gboolean local,
- const char *node, enum crm_ais_msg_types dest);
+ crm_node_t *node, enum crm_ais_msg_types dest);
enum cluster_type_e find_corosync_variant(void);
void terminate_cs_connection(void);
gboolean init_cs_connection(crm_cluster_t *cluster);
gboolean init_cs_connection_once(crm_cluster_t *cluster);
# endif
enum crm_quorum_source {
crm_quorum_cman,
crm_quorum_corosync,
crm_quorum_pacemaker,
};
enum crm_quorum_source get_quorum_source(void);
void crm_update_peer_proc(const char *source, crm_node_t *peer, uint32_t flag, const char *status);
crm_node_t *crm_update_peer(
const char *source, unsigned int id, uint64_t born, uint64_t seen,
int32_t votes, uint32_t children, const char *uuid, const char *uname,
const char *addr, const char *state);
void crm_update_peer_expected(const char *source, crm_node_t *node, const char *expected);
void crm_update_peer_state(const char *source, crm_node_t *node, const char *state, int membership);
gboolean init_cman_connection(
gboolean(*dispatch) (unsigned long long, gboolean), void (*destroy) (gpointer));
gboolean init_quorum_connection(
gboolean(*dispatch) (unsigned long long, gboolean), void (*destroy) (gpointer));
void set_node_uuid(const char *uname, const char *uuid);
#endif
diff --git a/lib/cluster/cluster.c b/lib/cluster/cluster.c
index 0e2e6246f9..674dde821c 100644
--- a/lib/cluster/cluster.c
+++ b/lib/cluster/cluster.c
@@ -1,528 +1,528 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <dlfcn.h>
#include <sys/param.h>
#include <stdio.h>
#include <sys/types.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/ipc.h>
#include <crm/cluster/internal.h>
CRM_TRACE_INIT_DATA(cluster);
#if SUPPORT_HEARTBEAT
void *hb_library = NULL;
#endif
static GHashTable *crm_uuid_cache = NULL;
static GHashTable *crm_uname_cache = NULL;
static char *
get_heartbeat_uuid(uint32_t unused, const char *uname)
{
char *uuid_calc = NULL;
#if SUPPORT_HEARTBEAT
cl_uuid_t uuid_raw;
const char *unknown = "00000000-0000-0000-0000-000000000000";
if (heartbeat_cluster == NULL) {
crm_warn("No connection to heartbeat, using uuid=uname");
return NULL;
}
if (heartbeat_cluster->llc_ops->get_uuid_by_name(heartbeat_cluster, uname, &uuid_raw) ==
HA_FAIL) {
crm_err("get_uuid_by_name() call failed for host %s", uname);
free(uuid_calc);
return NULL;
}
uuid_calc = calloc(1, 50);
cl_uuid_unparse(&uuid_raw, uuid_calc);
if (safe_str_eq(uuid_calc, unknown)) {
crm_warn("Could not calculate UUID for %s", uname);
free(uuid_calc);
return NULL;
}
#endif
return uuid_calc;
}
static gboolean
uname_is_uuid(void)
{
static const char *uuid_pref = NULL;
if (uuid_pref == NULL) {
uuid_pref = getenv("PCMK_uname_is_uuid");
}
if (uuid_pref == NULL) {
/* true is legacy mode */
uuid_pref = "false";
}
return crm_is_true(uuid_pref);
}
int
get_corosync_id(int id, const char *uuid)
{
if (id == 0 && !uname_is_uuid() && is_corosync_cluster()) {
id = crm_atoi(uuid, "0");
}
return id;
}
char *
get_corosync_uuid(uint32_t id, const char *uname)
{
if (!uname_is_uuid() && is_corosync_cluster()) {
if (id <= 0) {
/* Try the membership cache... */
crm_node_t *node = g_hash_table_lookup(crm_peer_cache, uname);
if (node != NULL) {
id = node->id;
}
}
if (id > 0) {
int len = 32;
char *buffer = NULL;
buffer = calloc(1, (len + 1));
if (buffer != NULL) {
snprintf(buffer, len, "%u", id);
}
return buffer;
} else {
crm_warn("Node %s is not yet known by corosync", uname);
}
} else if (uname != NULL) {
return strdup(uname);
}
return NULL;
}
void
set_node_uuid(const char *uname, const char *uuid)
{
CRM_CHECK(uuid != NULL, return);
CRM_CHECK(uname != NULL, return);
if (crm_uuid_cache == NULL) {
crm_uuid_cache = g_hash_table_new_full(crm_str_hash, g_str_equal,
g_hash_destroy_str, g_hash_destroy_str);
}
g_hash_table_insert(crm_uuid_cache, strdup(uname), strdup(uuid));
}
const char *
get_node_uuid(uint32_t id, const char *uname)
{
char *uuid = NULL;
enum cluster_type_e type = get_cluster_type();
if (crm_uuid_cache == NULL) {
crm_uuid_cache = g_hash_table_new_full(crm_str_hash, g_str_equal,
g_hash_destroy_str, g_hash_destroy_str);
}
/* avoid blocking heartbeat calls where possible */
if (uname) {
uuid = g_hash_table_lookup(crm_uuid_cache, uname);
}
if (uuid != NULL) {
return uuid;
}
switch (type) {
case pcmk_cluster_corosync:
uuid = get_corosync_uuid(id, uname);
break;
case pcmk_cluster_cman:
case pcmk_cluster_classic_ais:
if (uname) {
uuid = strdup(uname);
}
break;
case pcmk_cluster_heartbeat:
uuid = get_heartbeat_uuid(id, uname);
break;
case pcmk_cluster_unknown:
case pcmk_cluster_invalid:
crm_err("Unsupported cluster type");
break;
}
if (uuid == NULL) {
return NULL;
}
if (uname) {
g_hash_table_insert(crm_uuid_cache, strdup(uname), uuid);
return g_hash_table_lookup(crm_uuid_cache, uname);
}
/* Memory leak! */
CRM_LOG_ASSERT(uuid != NULL);
return uuid;
}
gboolean
crm_cluster_connect(crm_cluster_t *cluster)
{
enum cluster_type_e type = get_cluster_type();
crm_notice("Connecting to cluster infrastructure: %s", name_for_cluster_type(type));
#if SUPPORT_COROSYNC
if (is_openais_cluster()) {
crm_peer_init();
return init_cs_connection(cluster);
}
#endif
#if SUPPORT_HEARTBEAT
if (is_heartbeat_cluster()) {
int rv;
CRM_ASSERT(cluster->hb_conn != NULL);
/* coverity[var_deref_op] False positive */
if (cluster->hb_conn == NULL) {
/* No object passed in, create a new one. */
ll_cluster_t *(*new_cluster) (const char *llctype) =
find_library_function(&hb_library, HEARTBEAT_LIBRARY, "ll_cluster_new", 1);
cluster->hb_conn = (*new_cluster) ("heartbeat");
/* dlclose(handle); */
} else {
/* Object passed in. Disconnect first, then reconnect below. */
cluster->hb_conn->llc_ops->signoff(cluster->hb_conn, FALSE);
}
/* make sure we are disconnected first with the old object, if any. */
if (heartbeat_cluster && heartbeat_cluster != cluster->hb_conn) {
heartbeat_cluster->llc_ops->signoff(heartbeat_cluster, FALSE);
}
CRM_ASSERT(cluster->hb_conn != NULL);
heartbeat_cluster = cluster->hb_conn;
rv = register_heartbeat_conn(cluster);
if (rv) {
/* we'll benefit from a bigger queue length on heartbeat side.
* Otherwise, if peers send messages faster than we can consume
* them right now, heartbeat messaging layer will kick us out once
* it's (small) default queue fills up :(
* If we fail to adjust the sendq length, that's not yet fatal, though.
*/
if (HA_OK != heartbeat_cluster->llc_ops->set_sendq_len(heartbeat_cluster, 1024)) {
crm_warn("Cannot set sendq length: %s", heartbeat_cluster->llc_ops->errmsg(heartbeat_cluster));
}
}
return rv;
}
#endif
crm_info("Unsupported cluster stack: %s", getenv("HA_cluster_type"));
return FALSE;
}
gboolean
-send_cluster_message(const char *node, enum crm_ais_msg_types service, xmlNode * data,
+send_cluster_message(crm_node_t *node, enum crm_ais_msg_types service, xmlNode * data,
gboolean ordered)
{
#if SUPPORT_COROSYNC
if (is_openais_cluster()) {
return send_ais_message(data, FALSE, node, service);
}
#endif
#if SUPPORT_HEARTBEAT
if (is_heartbeat_cluster()) {
- return send_ha_message(heartbeat_cluster, data, node, ordered);
+ return send_ha_message(heartbeat_cluster, data, node->uname, ordered);
}
#endif
return FALSE;
}
void
empty_uuid_cache(void)
{
if (crm_uuid_cache != NULL) {
g_hash_table_destroy(crm_uuid_cache);
crm_uuid_cache = NULL;
}
}
void
unget_uuid(const char *uname)
{
if (crm_uuid_cache == NULL) {
return;
}
g_hash_table_remove(crm_uuid_cache, uname);
}
const char *
get_uuid(const char *uname)
{
return get_node_uuid(0, uname);
}
const char *
get_uname(const char *uuid)
{
const char *uname = NULL;
if (crm_uname_cache == NULL) {
crm_uname_cache = g_hash_table_new_full(crm_str_hash, g_str_equal,
g_hash_destroy_str, g_hash_destroy_str);
}
CRM_CHECK(uuid != NULL, return NULL);
/* avoid blocking calls where possible */
uname = g_hash_table_lookup(crm_uname_cache, uuid);
if (uname != NULL) {
crm_trace("%s = %s (cached)", uuid, uname);
return uname;
}
#if SUPPORT_COROSYNC
if (is_openais_cluster()) {
if (!uname_is_uuid() && is_corosync_cluster()) {
uint32_t id = crm_int_helper(uuid, NULL);
crm_node_t *node = g_hash_table_lookup(crm_peer_id_cache, GUINT_TO_POINTER(id));
uname = node ? node->uname : NULL;
} else {
uname = uuid;
}
if (uname) {
crm_trace("Storing %s = %s", uuid, uname);
g_hash_table_insert(crm_uname_cache, strdup(uuid), strdup(uname));
}
}
#endif
#if SUPPORT_HEARTBEAT
if (is_heartbeat_cluster()) {
if (heartbeat_cluster != NULL && uuid != NULL) {
cl_uuid_t uuid_raw;
char *hb_uname = NULL;
char *uuid_copy = strdup(uuid);
cl_uuid_parse(uuid_copy, &uuid_raw);
hb_uname = malloc( MAX_NAME);
if (heartbeat_cluster->llc_ops->get_name_by_uuid(heartbeat_cluster, &uuid_raw, hb_uname,
MAX_NAME) == HA_FAIL) {
crm_err("Could not calculate uname for %s", uuid);
free(uuid_copy);
free(hb_uname);
} else {
crm_trace("Storing %s = %s", uuid, uname);
g_hash_table_insert(crm_uname_cache, uuid_copy, hb_uname);
}
}
}
#endif
return g_hash_table_lookup(crm_uname_cache, uuid);
}
void
set_uuid(xmlNode * node, const char *attr, const char *uname)
{
const char *uuid_calc = get_uuid(uname);
crm_xml_add(node, attr, uuid_calc);
return;
}
const char *
name_for_cluster_type(enum cluster_type_e type)
{
switch (type) {
case pcmk_cluster_classic_ais:
return "classic openais (with plugin)";
case pcmk_cluster_cman:
return "cman";
case pcmk_cluster_corosync:
return "corosync";
case pcmk_cluster_heartbeat:
return "heartbeat";
case pcmk_cluster_unknown:
return "unknown";
case pcmk_cluster_invalid:
return "invalid";
}
crm_err("Invalid cluster type: %d", type);
return "invalid";
}
/* Do not expose these two */
int set_cluster_type(enum cluster_type_e type);
static enum cluster_type_e cluster_type = pcmk_cluster_unknown;
int
set_cluster_type(enum cluster_type_e type)
{
if (cluster_type == pcmk_cluster_unknown) {
crm_info("Cluster type set to: %s", name_for_cluster_type(type));
cluster_type = type;
return 0;
} else if (cluster_type == type) {
return 0;
} else if (pcmk_cluster_unknown == type) {
cluster_type = type;
return 0;
}
crm_err("Cluster type already set to %s, ignoring %s",
name_for_cluster_type(cluster_type), name_for_cluster_type(type));
return -1;
}
enum cluster_type_e
get_cluster_type(void)
{
if (cluster_type == pcmk_cluster_unknown) {
const char *cluster = getenv("HA_cluster_type");
cluster_type = pcmk_cluster_invalid;
if (cluster) {
crm_info("Cluster type is: '%s'", cluster);
} else {
#if SUPPORT_COROSYNC
cluster_type = find_corosync_variant();
if (cluster_type == pcmk_cluster_unknown) {
cluster = "heartbeat";
crm_info("Assuming a 'heartbeat' based cluster");
} else {
cluster = name_for_cluster_type(cluster_type);
crm_info("Detected an active '%s' cluster", cluster);
}
#else
cluster = "heartbeat";
#endif
}
if (safe_str_eq(cluster, "heartbeat")) {
#if SUPPORT_HEARTBEAT
cluster_type = pcmk_cluster_heartbeat;
#else
cluster_type = pcmk_cluster_invalid;
#endif
} else if (safe_str_eq(cluster, "openais")
|| safe_str_eq(cluster, "classic openais (with plugin)")) {
#if SUPPORT_COROSYNC
cluster_type = pcmk_cluster_classic_ais;
#else
cluster_type = pcmk_cluster_invalid;
#endif
} else if (safe_str_eq(cluster, "corosync")) {
#if SUPPORT_COROSYNC
cluster_type = pcmk_cluster_corosync;
#else
cluster_type = pcmk_cluster_invalid;
#endif
} else if (safe_str_eq(cluster, "cman")) {
#if SUPPORT_CMAN
cluster_type = pcmk_cluster_cman;
#else
cluster_type = pcmk_cluster_invalid;
#endif
} else {
cluster_type = pcmk_cluster_invalid;
}
if (cluster_type == pcmk_cluster_invalid) {
crm_notice
("This installation of Pacemaker does not support the '%s' cluster infrastructure. Terminating.",
cluster);
exit(100);
}
}
return cluster_type;
}
gboolean
is_cman_cluster(void)
{
return get_cluster_type() == pcmk_cluster_cman;
}
gboolean
is_corosync_cluster(void)
{
return get_cluster_type() == pcmk_cluster_corosync;
}
gboolean
is_classic_ais_cluster(void)
{
return get_cluster_type() == pcmk_cluster_classic_ais;
}
gboolean
is_openais_cluster(void)
{
enum cluster_type_e type = get_cluster_type();
if (type == pcmk_cluster_classic_ais) {
return TRUE;
} else if (type == pcmk_cluster_corosync) {
return TRUE;
} else if (type == pcmk_cluster_cman) {
return TRUE;
}
return FALSE;
}
gboolean
is_heartbeat_cluster(void)
{
return get_cluster_type() == pcmk_cluster_heartbeat;
}
diff --git a/lib/cluster/corosync.c b/lib/cluster/corosync.c
index 7a29d56a38..7ea1f3e63a 100644
--- a/lib/cluster/corosync.c
+++ b/lib/cluster/corosync.c
@@ -1,1086 +1,1087 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <bzlib.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <crm/common/ipc.h>
#include <crm/cluster/internal.h>
#include <crm/common/mainloop.h>
#include <sys/utsname.h>
#include <qb/qbipcc.h>
#include <qb/qbutil.h>
#include <corosync/corodefs.h>
#include <corosync/corotypes.h>
#include <corosync/hdb.h>
#include <corosync/cpg.h>
#include <corosync/cfg.h>
#include <corosync/cmap.h>
#include <corosync/quorum.h>
#include <crm/msg_xml.h>
cpg_handle_t pcmk_cpg_handle = 0;
struct cpg_name pcmk_cpg_group = {
.length = 0,
.value[0] = 0,
};
quorum_handle_t pcmk_quorum_handle = 0;
gboolean(*quorum_app_callback) (unsigned long long seq, gboolean quorate) = NULL;
static char *pcmk_uname = NULL;
static int pcmk_uname_len = 0;
static uint32_t pcmk_nodeid = 0;
#define cs_repeat(counter, max, code) do { \
code; \
if(rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { \
counter++; \
crm_debug("Retrying operation after %ds", counter); \
sleep(counter); \
} else { \
break; \
} \
} while(counter < max)
#ifndef INTERFACE_MAX
# define INTERFACE_MAX 2 /* from the private coroapi.h header */
#endif
static gboolean
corosync_name_is_valid(const char *key, const char *name)
{
int octet;
if(name == NULL) {
crm_trace("%s is empty", key);
return FALSE;
} else if(sscanf(name, "%d.%d.%d.%d", &octet, &octet, &octet, &octet) == 4) {
crm_trace("%s contains an ipv4 address, ignoring: %s", key, name);
return FALSE;
} else if(strstr(name, ":") != NULL) {
crm_trace("%s contains an ipv4 address, ignoring: %s", key, name);
return FALSE;
}
crm_trace("%s is valid", key);
return TRUE;
}
/*
* CFG functionality stolen from node_name() in corosync-quorumtool.c
* This resolves the first address assigned to a node and returns the name or IP address.
*/
static char *corosync_node_name(cmap_handle_t cmap_handle, uint32_t nodeid)
{
int lpc = 0;
int rc = CS_OK;
int retries = 0;
char *name = NULL;
cmap_handle_t local_handle = 0;
corosync_cfg_handle_t cfg_handle = 0;
static corosync_cfg_callbacks_t cfg_callbacks = {};
if(cmap_handle == 0 && local_handle == 0) {
retries = 0;
crm_trace("Initializing CMAP connection");
do {
rc = cmap_initialize(&local_handle);
if(rc != CS_OK) {
retries++;
crm_debug("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc), retries);
sleep(retries);
}
} while(retries < 5 && rc != CS_OK);
if (rc != CS_OK) {
crm_warn("Could not connect to Cluster Configuration Database API, error %d", cs_strerror(rc));
local_handle = 0;
}
}
if(cmap_handle == 0) {
cmap_handle = local_handle;
}
while(name == NULL && cmap_handle != 0) {
uint32_t id = 0;
char *key = NULL;
key = g_strdup_printf("nodelist.node.%d.nodeid", lpc);
rc = cmap_get_uint32(cmap_handle, key, &id);
crm_trace("Checking %u vs %u from %s", nodeid, id, key);
g_free(key);
if(rc != CS_OK) {
break;
}
if(nodeid == id) {
crm_trace("Searching for node name for %u in nodelist.node.%d %s", nodeid, lpc, name);
if(name == NULL) {
key = g_strdup_printf("nodelist.node.%d.ring0_addr", lpc);
rc = cmap_get_string(cmap_handle, key, &name);
crm_trace("%s = %s", key, name);
if(corosync_name_is_valid(key, name) == FALSE) {
free(name); name = NULL;
}
g_free(key);
}
if(name == NULL) {
key = g_strdup_printf("nodelist.node.%d.name", lpc);
rc = cmap_get_string(cmap_handle, key, &name);
crm_trace("%s = %s %d", key, name, rc);
if(corosync_name_is_valid(key, name) == FALSE) {
free(name); name = NULL;
}
g_free(key);
}
break;
}
lpc++;
}
if(name == NULL) {
retries = 0;
crm_trace("Initializing CFG connection");
do {
rc = corosync_cfg_initialize(&cfg_handle, &cfg_callbacks);
if(rc != CS_OK) {
retries++;
crm_debug("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc), retries);
sleep(retries);
}
} while(retries < 5 && rc != CS_OK);
if (rc != CS_OK) {
crm_warn("Could not connect to the Corosync CFG API, error %d", cs_strerror(rc));
cfg_handle = 0;
}
}
if(name == NULL && cfg_handle != 0) {
int numaddrs;
char buf[INET6_ADDRSTRLEN];
socklen_t addrlen;
struct sockaddr_storage *ss;
corosync_cfg_node_address_t addrs[INTERFACE_MAX];
rc = corosync_cfg_get_node_addrs(cfg_handle, nodeid, INTERFACE_MAX, &numaddrs, addrs);
if (rc == CS_OK) {
ss = (struct sockaddr_storage *)addrs[0].address;
if (ss->ss_family == AF_INET6) {
addrlen = sizeof(struct sockaddr_in6);
} else {
addrlen = sizeof(struct sockaddr_in);
}
if (getnameinfo((struct sockaddr *)addrs[0].address, addrlen, buf, sizeof(buf), NULL, 0, 0) == 0) {
crm_notice("Inferred node name '%s' for nodeid %u from DNS", buf, nodeid);
if(corosync_name_is_valid("DNS", buf)) {
name = strdup(buf);
}
}
} else {
crm_debug("Unable to get node address for nodeid %u: %s", nodeid, cs_strerror(rc));
}
cmap_finalize(cfg_handle);
}
if(local_handle) {
cmap_finalize(local_handle);
}
if(name == NULL) {
crm_err("Unable to get node name for nodeid %u", nodeid);
}
return name;
}
enum crm_ais_msg_types
text2msg_type(const char *text)
{
int type = crm_msg_none;
CRM_CHECK(text != NULL, return type);
if (safe_str_eq(text, "ais")) {
type = crm_msg_ais;
} else if (safe_str_eq(text, "crm_plugin")) {
type = crm_msg_ais;
} else if (safe_str_eq(text, CRM_SYSTEM_CIB)) {
type = crm_msg_cib;
} else if (safe_str_eq(text, CRM_SYSTEM_CRMD)) {
type = crm_msg_crmd;
} else if (safe_str_eq(text, CRM_SYSTEM_DC)) {
type = crm_msg_crmd;
} else if (safe_str_eq(text, CRM_SYSTEM_TENGINE)) {
type = crm_msg_te;
} else if (safe_str_eq(text, CRM_SYSTEM_PENGINE)) {
type = crm_msg_pe;
} else if (safe_str_eq(text, CRM_SYSTEM_LRMD)) {
type = crm_msg_lrmd;
} else if (safe_str_eq(text, CRM_SYSTEM_STONITHD)) {
type = crm_msg_stonithd;
} else if (safe_str_eq(text, "stonith-ng")) {
type = crm_msg_stonith_ng;
} else if (safe_str_eq(text, "attrd")) {
type = crm_msg_attrd;
} else {
/* This will normally be a transient client rather than
* a cluster daemon. Set the type to the pid of the client
*/
int scan_rc = sscanf(text, "%d", &type);
if (scan_rc != 1) {
/* Ensure its sane */
type = crm_msg_none;
}
}
return type;
}
static char *ais_cluster_name = NULL;
gboolean
crm_get_cluster_name(char **cname)
{
CRM_CHECK(cname != NULL, return FALSE);
if (ais_cluster_name) {
*cname = strdup(ais_cluster_name);
return TRUE;
}
return FALSE;
}
gboolean
send_ais_text(int class, const char *data,
- gboolean local, const char *node, enum crm_ais_msg_types dest)
+ gboolean local, crm_node_t *node, enum crm_ais_msg_types dest)
{
static int msg_id = 0;
static int local_pid = 0;
int retries = 0;
int rc = CS_OK;
int buf_len = sizeof(cs_ipc_header_response_t);
char *buf = NULL;
struct iovec iov;
const char *transport = "pcmk";
AIS_Message *ais_msg = NULL;
enum crm_ais_msg_types sender = text2msg_type(crm_system_name);
/* There are only 6 handlers registered to crm_lib_service in plugin.c */
CRM_CHECK(class < 6, crm_err("Invalid message class: %d", class); return FALSE);
if (data == NULL) {
data = "";
}
if (local_pid == 0) {
local_pid = getpid();
}
if (sender == crm_msg_none) {
sender = local_pid;
}
ais_msg = calloc(1, sizeof(AIS_Message));
ais_msg->id = msg_id++;
ais_msg->header.id = class;
ais_msg->header.error = CS_OK;
ais_msg->host.type = dest;
ais_msg->host.local = local;
- if (node) {
- ais_msg->host.size = strlen(node);
- memset(ais_msg->host.uname, 0, MAX_NAME);
- memcpy(ais_msg->host.uname, node, ais_msg->host.size);
- ais_msg->host.id = 0;
- } else {
- ais_msg->host.size = 0;
- memset(ais_msg->host.uname, 0, MAX_NAME);
- ais_msg->host.id = 0;
+ if (node) {
+ if (node->uname) {
+ ais_msg->host.size = strlen(node->uname);
+ memset(ais_msg->host.uname, 0, MAX_NAME);
+ memcpy(ais_msg->host.uname, node->uname, ais_msg->host.size);
+ }
+ ais_msg->host.id = node->id;
}
ais_msg->sender.id = 0;
ais_msg->sender.type = sender;
ais_msg->sender.pid = local_pid;
ais_msg->sender.size = pcmk_uname_len;
memset(ais_msg->sender.uname, 0, MAX_NAME);
memcpy(ais_msg->sender.uname, pcmk_uname, ais_msg->sender.size);
ais_msg->size = 1 + strlen(data);
if (ais_msg->size < CRM_BZ2_THRESHOLD) {
failback:
ais_msg = realloc(ais_msg, sizeof(AIS_Message) + ais_msg->size);
memcpy(ais_msg->data, data, ais_msg->size);
} else {
char *compressed = NULL;
char *uncompressed = strdup(data);
unsigned int len = (ais_msg->size * 1.1) + 600; /* recomended size */
crm_trace("Compressing message payload");
/* coverity[returned_null] Ignore */
compressed = malloc( len);
rc = BZ2_bzBuffToBuffCompress(compressed, &len, uncompressed, ais_msg->size, CRM_BZ2_BLOCKS,
0, CRM_BZ2_WORK);
free(uncompressed);
if (rc != BZ_OK) {
crm_err("Compression failed: %d", rc);
free(compressed);
goto failback;
}
ais_msg = realloc(ais_msg, sizeof(AIS_Message) + len + 1);
memcpy(ais_msg->data, compressed, len);
ais_msg->data[len] = 0;
free(compressed);
ais_msg->is_compressed = TRUE;
ais_msg->compressed_size = len;
crm_trace("Compression details: %d -> %d", ais_msg->size, ais_data_len(ais_msg));
}
ais_msg->header.size = sizeof(AIS_Message) + ais_data_len(ais_msg);
crm_trace("Sending%s message %d to %s.%s (data=%d, total=%d)",
ais_msg->is_compressed ? " compressed" : "",
ais_msg->id, ais_dest(&(ais_msg->host)), msg_type2text(dest),
ais_data_len(ais_msg), ais_msg->header.size);
iov.iov_base = ais_msg;
iov.iov_len = ais_msg->header.size;
buf = realloc(buf, buf_len);
do {
if (rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) {
retries++;
crm_info("Peer overloaded or membership in flux:"
" Re-sending message (Attempt %d of 20)", retries);
sleep(retries); /* Proportional back off */
}
errno = 0;
transport = "cpg";
CRM_CHECK(dest != crm_msg_ais, rc = CS_ERR_MESSAGE_ERROR; goto bail);
rc = cpg_mcast_joined(pcmk_cpg_handle, CPG_TYPE_AGREED, &iov, 1);
if (rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) {
cpg_flow_control_state_t fc_state = CPG_FLOW_CONTROL_DISABLED;
int rc2 = cpg_flow_control_state_get(pcmk_cpg_handle, &fc_state);
if (rc2 == CS_OK && fc_state == CPG_FLOW_CONTROL_ENABLED) {
crm_warn("Connection overloaded, cannot send messages");
goto bail;
} else if (rc2 != CS_OK) {
crm_warn("Could not determin the connection state: %s (%d)",
ais_error2text(rc2), rc2);
goto bail;
}
}
} while ((rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) && retries < 20);
bail:
if (rc != CS_OK) {
crm_perror(LOG_ERR, "Sending message %d via %s: FAILED (rc=%d): %s",
ais_msg->id, transport, rc, ais_error2text(rc));
} else {
crm_trace("Message %d: sent", ais_msg->id);
}
free(buf);
free(ais_msg);
return (rc == CS_OK);
}
gboolean
-send_ais_message(xmlNode * msg, gboolean local, const char *node, enum crm_ais_msg_types dest)
+send_ais_message(xmlNode * msg, gboolean local, crm_node_t *node, enum crm_ais_msg_types dest)
{
gboolean rc = TRUE;
char *data = dump_xml_unformatted(msg);
rc = send_ais_text(crm_class_cluster, data, local, node, dest);
free(data);
return rc;
}
void
terminate_cs_connection(void)
{
crm_notice("Disconnecting from Corosync");
if(pcmk_cpg_handle) {
crm_trace("Disconnecting CPG");
cpg_leave(pcmk_cpg_handle, &pcmk_cpg_group);
cpg_finalize(pcmk_cpg_handle);
pcmk_cpg_handle = 0;
} else {
crm_info("No CPG connection");
}
if(pcmk_quorum_handle) {
crm_trace("Disconnecting quorum");
quorum_finalize(pcmk_quorum_handle);
pcmk_quorum_handle = 0;
} else {
crm_info("No Quorum connection");
}
}
int ais_membership_timer = 0;
gboolean ais_membership_force = FALSE;
static gboolean
ais_dispatch_message(AIS_Message * msg, gboolean(*dispatch) (int kind, const char *from, const char *data))
{
char *data = NULL;
char *uncompressed = NULL;
xmlNode *xml = NULL;
CRM_ASSERT(msg != NULL);
crm_trace("Got new%s message (size=%d, %d, %d)",
msg->is_compressed ? " compressed" : "",
ais_data_len(msg), msg->size, msg->compressed_size);
data = msg->data;
if (msg->is_compressed && msg->size > 0) {
int rc = BZ_OK;
unsigned int new_size = msg->size + 1;
if (check_message_sanity(msg, NULL) == FALSE) {
goto badmsg;
}
crm_trace("Decompressing message data");
uncompressed = calloc(1, new_size);
rc = BZ2_bzBuffToBuffDecompress(uncompressed, &new_size, data, msg->compressed_size, 1, 0);
if (rc != BZ_OK) {
crm_err("Decompression failed: %d", rc);
goto badmsg;
}
CRM_ASSERT(rc == BZ_OK);
CRM_ASSERT(new_size == msg->size);
data = uncompressed;
} else if (check_message_sanity(msg, data) == FALSE) {
goto badmsg;
} else if (safe_str_eq("identify", data)) {
int pid = getpid();
char *pid_s = crm_itoa(pid);
send_ais_text(crm_class_cluster, pid_s, TRUE, NULL, crm_msg_ais);
free(pid_s);
goto done;
}
if (msg->header.id != crm_class_members) {
/* Is this even needed anymore? */
crm_get_peer(msg->sender.id, msg->sender.uname);
}
if (msg->header.id == crm_class_rmpeer) {
uint32_t id = crm_int_helper(data, NULL);
crm_info("Removing peer %s/%u", data, id);
reap_crm_member(id);
goto done;
}
crm_trace("Payload: %s", data);
if (dispatch != NULL) {
dispatch(msg->header.id, msg->sender.uname, data);
}
done:
free(uncompressed);
free_xml(xml);
return TRUE;
badmsg:
crm_err("Invalid message (id=%d, dest=%s:%s, from=%s:%s.%d):"
" min=%d, total=%d, size=%d, bz2_size=%d",
msg->id, ais_dest(&(msg->host)), msg_type2text(msg->host.type),
ais_dest(&(msg->sender)), msg_type2text(msg->sender.type),
msg->sender.pid, (int)sizeof(AIS_Message),
msg->header.size, msg->size, msg->compressed_size);
goto done;
}
gboolean(*pcmk_cpg_dispatch_fn) (int kind, const char *from, const char *data) = NULL;
static int
pcmk_cpg_dispatch(gpointer user_data)
{
int rc = 0;
pcmk_cpg_dispatch_fn = user_data;
rc = cpg_dispatch(pcmk_cpg_handle, CS_DISPATCH_ALL);
if (rc != CS_OK) {
crm_err("Connection to the CPG API failed: %d", rc);
return -1;
}
return 0;
}
static void
pcmk_cpg_deliver(cpg_handle_t handle,
const struct cpg_name *groupName,
uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
{
AIS_Message *ais_msg = (AIS_Message *) msg;
if (ais_msg->sender.id > 0 && ais_msg->sender.id != nodeid) {
crm_err("Nodeid mismatch from %d.%d: claimed nodeid=%u", nodeid, pid, ais_msg->sender.id);
return;
} else if (ais_msg->host.size != 0 && safe_str_neq(ais_msg->host.uname, pcmk_uname)) {
/* Not for us */
return;
+ } else if (ais_msg->host.id != 0 && (pcmk_nodeid != ais_msg->host.id)) {
+ /* Not for us */
+ return;
}
ais_msg->sender.id = nodeid;
if (ais_msg->sender.size == 0) {
crm_node_t *peer = crm_get_peer(nodeid, NULL);
if (peer == NULL) {
crm_err("Peer with nodeid=%u is unknown", nodeid);
} else if (peer->uname == NULL) {
crm_err("No uname for peer with nodeid=%u", nodeid);
} else {
crm_notice("Fixing uname for peer with nodeid=%u", nodeid);
ais_msg->sender.size = strlen(peer->uname);
memset(ais_msg->sender.uname, 0, MAX_NAME);
memcpy(ais_msg->sender.uname, peer->uname, ais_msg->sender.size);
}
}
ais_dispatch_message(ais_msg, pcmk_cpg_dispatch_fn);
}
static void
pcmk_cpg_membership(cpg_handle_t handle,
const struct cpg_name *groupName,
const struct cpg_address *member_list, size_t member_list_entries,
const struct cpg_address *left_list, size_t left_list_entries,
const struct cpg_address *joined_list, size_t joined_list_entries)
{
int i;
gboolean found = FALSE;
static int counter = 0;
for (i = 0; i < left_list_entries; i++) {
crm_node_t *peer = crm_get_peer(left_list[i].nodeid, NULL);
crm_info("Left[%d.%d] %s.%d ", counter, i, groupName->value, left_list[i].nodeid);
crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, OFFLINESTATUS);
}
for (i = 0; i < joined_list_entries; i++) {
crm_info("Joined[%d.%d] %s.%d ", counter, i, groupName->value, joined_list[i].nodeid);
}
for (i = 0; i < member_list_entries; i++) {
crm_node_t *peer = crm_get_peer(member_list[i].nodeid, NULL);
crm_info("Member[%d.%d] %s.%d ", counter, i, groupName->value, member_list[i].nodeid);
crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, ONLINESTATUS);
if(pcmk_nodeid == member_list[i].nodeid) {
found = TRUE;
}
}
if(!found) {
crm_err("We're not part of CPG group %s anymore!", groupName->value);
/* Possibly re-call cpg_join() */
}
counter++;
}
cpg_callbacks_t cpg_callbacks = {
.cpg_deliver_fn = pcmk_cpg_deliver,
.cpg_confchg_fn = pcmk_cpg_membership,
};
static gboolean
init_cpg_connection(gboolean(*dispatch) (int kind, const char *from, const char *data), void (*destroy) (gpointer),
uint32_t * nodeid)
{
int rc = -1;
int fd = 0;
int retries = 0;
crm_node_t *peer = NULL;
struct mainloop_fd_callbacks cpg_fd_callbacks = {
.dispatch = pcmk_cpg_dispatch,
.destroy = destroy,
};
strcpy(pcmk_cpg_group.value, crm_system_name);
pcmk_cpg_group.length = strlen(crm_system_name) + 1;
cs_repeat(retries, 30, rc = cpg_initialize(&pcmk_cpg_handle, &cpg_callbacks));
if (rc != CS_OK) {
crm_err("Could not connect to the Cluster Process Group API: %d\n", rc);
goto bail;
}
retries = 0;
cs_repeat(retries, 30, rc = cpg_local_get(pcmk_cpg_handle, (unsigned int *)nodeid));
if (rc != CS_OK) {
crm_err("Could not get local node id from the CPG API");
goto bail;
}
retries = 0;
cs_repeat(retries, 30, rc = cpg_join(pcmk_cpg_handle, &pcmk_cpg_group));
if (rc != CS_OK) {
crm_err("Could not join the CPG group '%s': %d", crm_system_name, rc);
goto bail;
}
rc = cpg_fd_get(pcmk_cpg_handle, &fd);
if (rc != CS_OK) {
crm_err("Could not obtain the CPG API connection: %d\n", rc);
goto bail;
}
mainloop_add_fd("corosync-cpg", G_PRIORITY_MEDIUM, fd, dispatch, &cpg_fd_callbacks);
bail:
if (rc != CS_OK) {
cpg_finalize(pcmk_cpg_handle);
return FALSE;
}
peer = crm_get_peer(pcmk_nodeid, pcmk_uname);
crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, ONLINESTATUS);
return TRUE;
}
static int
pcmk_quorum_dispatch(gpointer user_data)
{
int rc = 0;
rc = quorum_dispatch(pcmk_quorum_handle, CS_DISPATCH_ALL);
if (rc < 0) {
crm_err("Connection to the Quorum API failed: %d", rc);
return -1;
}
return 0;
}
static void
corosync_mark_unseen_peer_dead(gpointer key, gpointer value, gpointer user_data)
{
int *seq = user_data;
crm_node_t *node = value;
if (node->last_seen != *seq && node->state && crm_str_eq(CRM_NODE_LOST, node->state, TRUE) == FALSE) {
crm_notice("Node %d/%s was not seen in the previous transition", node->id, node->uname);
crm_update_peer_state(__FUNCTION__, node, CRM_NODE_LOST, 0);
}
}
static void
corosync_mark_node_unseen(gpointer key, gpointer value, gpointer user_data)
{
crm_node_t *node = value;
node->last_seen = 0;
}
static void
pcmk_quorum_notification(quorum_handle_t handle,
uint32_t quorate,
uint64_t ring_id, uint32_t view_list_entries, uint32_t * view_list)
{
int i;
static gboolean init_phase = TRUE;
if (quorate != crm_have_quorum) {
crm_notice("Membership " U64T ": quorum %s (%lu)", ring_id,
quorate ? "acquired" : "lost", (long unsigned int)view_list_entries);
crm_have_quorum = quorate;
} else {
crm_info("Membership " U64T ": quorum %s (%lu)", ring_id,
quorate ? "retained" : "still lost", (long unsigned int)view_list_entries);
}
if(view_list_entries == 0 && init_phase) {
crm_info("Corosync membership is still forming, ignoring");
return;
}
init_phase = FALSE;
g_hash_table_foreach(crm_peer_cache, corosync_mark_node_unseen, NULL);
for (i = 0; i < view_list_entries; i++) {
uint32_t id = view_list[i];
char *name = NULL;
crm_node_t *node = NULL;
crm_debug("Member[%d] %d ", i, id);
node = crm_get_peer(id, NULL);
if(node->uname == NULL) {
crm_info("Obtaining name for new node %u", id);
name = corosync_node_name(0, id);
node = crm_get_peer(id, name);
}
crm_update_peer_state(__FUNCTION__, node, CRM_NODE_MEMBER, ring_id);
free(name);
}
crm_trace("Reaping unseen nodes...");
g_hash_table_foreach(crm_peer_cache, corosync_mark_unseen_peer_dead, &ring_id);
if (quorum_app_callback) {
quorum_app_callback(ring_id, quorate);
}
}
quorum_callbacks_t quorum_callbacks = {
.quorum_notify_fn = pcmk_quorum_notification,
};
gboolean
init_quorum_connection(gboolean(*dispatch) (unsigned long long, gboolean),
void (*destroy) (gpointer))
{
int rc = -1;
int fd = 0;
int quorate = 0;
uint32_t quorum_type = 0;
struct mainloop_fd_callbacks quorum_fd_callbacks;
quorum_fd_callbacks.dispatch = pcmk_quorum_dispatch;
quorum_fd_callbacks.destroy = destroy;
crm_debug("Configuring Pacemaker to obtain quorum from Corosync");
rc = quorum_initialize(&pcmk_quorum_handle, &quorum_callbacks, &quorum_type);
if (rc != CS_OK) {
crm_err("Could not connect to the Quorum API: %d\n", rc);
goto bail;
} else if (quorum_type != QUORUM_SET) {
crm_err("Corosync quorum is not configured\n");
goto bail;
}
rc = quorum_getquorate(pcmk_quorum_handle, &quorate);
if (rc != CS_OK) {
crm_err("Could not obtain the current Quorum API state: %d\n", rc);
goto bail;
}
crm_notice("Quorum %s", quorate ? "acquired" : "lost");
quorum_app_callback = dispatch;
crm_have_quorum = quorate;
rc = quorum_trackstart(pcmk_quorum_handle, CS_TRACK_CHANGES | CS_TRACK_CURRENT);
if (rc != CS_OK) {
crm_err("Could not setup Quorum API notifications: %d\n", rc);
goto bail;
}
rc = quorum_fd_get(pcmk_quorum_handle, &fd);
if (rc != CS_OK) {
crm_err("Could not obtain the Quorum API connection: %d\n", rc);
goto bail;
}
mainloop_add_fd("quorum", G_PRIORITY_HIGH, fd, dispatch, &quorum_fd_callbacks);
corosync_initialize_nodelist(NULL, FALSE, NULL);
bail:
if (rc != CS_OK) {
quorum_finalize(pcmk_quorum_handle);
return FALSE;
}
return TRUE;
}
gboolean
init_cs_connection(crm_cluster_t *cluster)
{
int retries = 0;
while (retries < 5) {
int rc = init_cs_connection_once(cluster);
retries++;
switch (rc) {
case CS_OK:
return TRUE;
break;
case CS_ERR_TRY_AGAIN:
case CS_ERR_QUEUE_FULL:
sleep(retries);
break;
default:
return FALSE;
}
}
crm_err("Could not connect to corosync after %d retries", retries);
return FALSE;
}
gboolean
init_cs_connection_once(crm_cluster_t *cluster)
{
struct utsname res;
enum cluster_type_e stack = get_cluster_type();
crm_peer_init();
/* Here we just initialize comms */
if(stack != pcmk_cluster_corosync) {
crm_err("Invalid cluster type: %s (%d)", name_for_cluster_type(stack), stack);
return FALSE;
}
if (init_cpg_connection(cluster->cs_dispatch, cluster->destroy, &pcmk_nodeid) == FALSE) {
return FALSE;
} else if (uname(&res) < 0) {
crm_perror(LOG_ERR, "Could not determin the current host");
exit(100);
} else {
pcmk_uname = strdup(res.nodename);
}
crm_info("Connection to '%s': established", name_for_cluster_type(stack));
CRM_ASSERT(pcmk_uname != NULL);
pcmk_uname_len = strlen(pcmk_uname);
if (pcmk_nodeid != 0) {
/* Ensure the local node always exists */
crm_get_peer(pcmk_nodeid, pcmk_uname);
}
cluster->uuid = get_corosync_uuid(pcmk_nodeid, pcmk_uname);
cluster->uname = strdup(pcmk_uname);
cluster->nodeid = pcmk_nodeid;
return TRUE;
}
gboolean
check_message_sanity(const AIS_Message * msg, const char *data)
{
gboolean sane = TRUE;
int dest = msg->host.type;
int tmp_size = msg->header.size - sizeof(AIS_Message);
if (sane && msg->header.size == 0) {
crm_warn("Message with no size");
sane = FALSE;
}
if (sane && msg->header.error != CS_OK) {
crm_warn("Message header contains an error: %d", msg->header.error);
sane = FALSE;
}
if (sane && ais_data_len(msg) != tmp_size) {
crm_warn("Message payload size is incorrect: expected %d, got %d", ais_data_len(msg),
tmp_size);
sane = TRUE;
}
if (sane && ais_data_len(msg) == 0) {
crm_warn("Message with no payload");
sane = FALSE;
}
if (sane && data && msg->is_compressed == FALSE) {
int str_size = strlen(data) + 1;
if (ais_data_len(msg) != str_size) {
int lpc = 0;
crm_warn("Message payload is corrupted: expected %d bytes, got %d",
ais_data_len(msg), str_size);
sane = FALSE;
for (lpc = (str_size - 10); lpc < msg->size; lpc++) {
if (lpc < 0) {
lpc = 0;
}
crm_debug("bad_data[%d]: %d / '%c'", lpc, data[lpc], data[lpc]);
}
}
}
if (sane == FALSE) {
crm_err("Invalid message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)",
msg->id, ais_dest(&(msg->host)), msg_type2text(dest),
ais_dest(&(msg->sender)), msg_type2text(msg->sender.type),
msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size);
} else {
crm_trace
("Verfied message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)",
msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)),
msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed,
ais_data_len(msg), msg->header.size);
}
return sane;
}
enum cluster_type_e
find_corosync_variant(void)
{
int rc = CS_OK;
cmap_handle_t handle;
/* There can be only one (possibility if confdb isn't around) */
rc = cmap_initialize(&handle);
if (rc != CS_OK) {
crm_info("Failed to initialize the cmap API. Error %d", rc);
return pcmk_cluster_unknown;
}
cmap_finalize(handle);
return pcmk_cluster_corosync;
}
gboolean
crm_is_corosync_peer_active(const crm_node_t * node)
{
if (node == NULL) {
crm_trace("NULL");
return FALSE;
} else if(safe_str_neq(node->state, CRM_NODE_MEMBER)) {
crm_trace("%s: state=%s", node->uname, node->state);
return FALSE;
} else if((node->processes & crm_proc_cpg) == 0) {
crm_trace("%s: processes=%.16x", node->uname, node->processes);
return FALSE;
}
return TRUE;
}
gboolean
corosync_initialize_nodelist(void *cluster, gboolean force_member, xmlNode *xml_parent)
{
int lpc = 0;
int rc = CS_OK;
int retries = 0;
gboolean any = FALSE;
cmap_handle_t cmap_handle;
do {
rc = cmap_initialize(&cmap_handle);
if(rc != CS_OK) {
retries++;
crm_debug("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc), retries);
sleep(retries);
}
} while(retries < 5 && rc != CS_OK);
if (rc != CS_OK) {
crm_warn("Could not connect to Cluster Configuration Database API, error %d", rc);
return FALSE;
}
crm_trace("Initializing corosync nodelist");
for(lpc = 0; ; lpc++) {
uint32_t nodeid = 0;
char *name = NULL;
char *key = NULL;
key = g_strdup_printf("nodelist.node.%d.nodeid", lpc);
rc = cmap_get_uint32(cmap_handle, key, &nodeid);
g_free(key);
if(rc != CS_OK) {
break;
}
name = corosync_node_name(cmap_handle, nodeid);
if(nodeid > 0 || name != NULL) {
crm_trace("Initializing node[%d] %u = %s", lpc, nodeid, name);
crm_get_peer(nodeid, name);
}
if(nodeid > 0 && name != NULL) {
any = TRUE;
if(xml_parent) {
xmlNode *node = create_xml_node(xml_parent, XML_CIB_TAG_NODE);
crm_xml_add_int(node, XML_ATTR_ID, nodeid);
crm_xml_add(node, XML_ATTR_UNAME, name);
if(force_member) {
crm_xml_add(node, XML_ATTR_TYPE, CRM_NODE_MEMBER);
}
}
}
free(name);
}
cmap_finalize(cmap_handle);
return any;
}
diff --git a/lib/cluster/legacy.c b/lib/cluster/legacy.c
index 59c61f2843..9dbdd7f8fc 100644
--- a/lib/cluster/legacy.c
+++ b/lib/cluster/legacy.c
@@ -1,1379 +1,1380 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <crm/cluster/internal.h>
#include <bzlib.h>
#include <crm/common/ipc.h>
#include <crm/cluster.h>
#include <crm/common/mainloop.h>
#include <sys/utsname.h>
#if SUPPORT_COROSYNC
# include <corosync/confdb.h>
# include <corosync/corodefs.h>
# include <corosync/cpg.h>
cpg_handle_t pcmk_cpg_handle = 0;
struct cpg_name pcmk_cpg_group = {
.length = 0,
.value[0] = 0,
};
#endif
#if HAVE_CMAP
# include <corosync/cmap.h>
#endif
#if SUPPORT_CMAN
# include <libcman.h>
cman_handle_t pcmk_cman_handle = NULL;
#endif
static char *pcmk_uname = NULL;
static int pcmk_uname_len = 0;
static uint32_t pcmk_nodeid = 0;
int ais_membership_timer = 0;
gboolean ais_membership_force = FALSE;
int ais_dispatch(gpointer user_data);
#define cs_repeat(counter, max, code) do { \
code; \
if(rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { \
counter++; \
crm_debug("Retrying operation after %ds", counter); \
sleep(counter); \
} else { \
break; \
} \
} while(counter < max)
enum crm_ais_msg_types
text2msg_type(const char *text)
{
int type = crm_msg_none;
CRM_CHECK(text != NULL, return type);
if (safe_str_eq(text, "ais")) {
type = crm_msg_ais;
} else if (safe_str_eq(text, "crm_plugin")) {
type = crm_msg_ais;
} else if (safe_str_eq(text, CRM_SYSTEM_CIB)) {
type = crm_msg_cib;
} else if (safe_str_eq(text, CRM_SYSTEM_CRMD)) {
type = crm_msg_crmd;
} else if (safe_str_eq(text, CRM_SYSTEM_DC)) {
type = crm_msg_crmd;
} else if (safe_str_eq(text, CRM_SYSTEM_TENGINE)) {
type = crm_msg_te;
} else if (safe_str_eq(text, CRM_SYSTEM_PENGINE)) {
type = crm_msg_pe;
} else if (safe_str_eq(text, CRM_SYSTEM_LRMD)) {
type = crm_msg_lrmd;
} else if (safe_str_eq(text, CRM_SYSTEM_STONITHD)) {
type = crm_msg_stonithd;
} else if (safe_str_eq(text, "stonith-ng")) {
type = crm_msg_stonith_ng;
} else if (safe_str_eq(text, "attrd")) {
type = crm_msg_attrd;
} else {
/* This will normally be a transient client rather than
* a cluster daemon. Set the type to the pid of the client
*/
int scan_rc = sscanf(text, "%d", &type);
if (scan_rc != 1) {
/* Ensure its sane */
type = crm_msg_none;
}
}
return type;
}
char *
get_ais_data(const AIS_Message * msg)
{
int rc = BZ_OK;
char *uncompressed = NULL;
unsigned int new_size = msg->size + 1;
if (msg->is_compressed == FALSE) {
crm_trace("Returning uncompressed message data");
uncompressed = strdup(msg->data);
} else {
crm_trace("Decompressing message data");
uncompressed = calloc(1, new_size);
rc = BZ2_bzBuffToBuffDecompress(uncompressed, &new_size, (char *)msg->data,
msg->compressed_size, 1, 0);
CRM_ASSERT(rc == BZ_OK);
CRM_ASSERT(new_size == msg->size);
}
return uncompressed;
}
#if SUPPORT_COROSYNC
int ais_fd_sync = -1;
int ais_fd_async = -1; /* never send messages via this channel */
void *ais_ipc_ctx = NULL;
hdb_handle_t ais_ipc_handle = 0;
static char *ais_cluster_name = NULL;
gboolean
get_ais_nodeid(uint32_t * id, char **uname)
{
struct iovec iov;
int retries = 0;
int rc = CS_OK;
cs_ipc_header_response_t header;
struct crm_ais_nodeid_resp_s answer;
header.error = CS_OK;
header.id = crm_class_nodeid;
header.size = sizeof(cs_ipc_header_response_t);
CRM_CHECK(id != NULL, return FALSE);
CRM_CHECK(uname != NULL, return FALSE);
iov.iov_base = &header;
iov.iov_len = header.size;
retry:
errno = 0;
rc = coroipcc_msg_send_reply_receive(ais_ipc_handle, &iov, 1, &answer, sizeof(answer));
if (rc == CS_OK) {
CRM_CHECK(answer.header.size == sizeof(struct crm_ais_nodeid_resp_s),
crm_err("Odd message: id=%d, size=%d, error=%d",
answer.header.id, answer.header.size, answer.header.error));
CRM_CHECK(answer.header.id == crm_class_nodeid,
crm_err("Bad response id: %d", answer.header.id));
}
if ((rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) && retries < 20) {
retries++;
crm_info("Peer overloaded: Re-sending message (Attempt %d of 20)", retries);
sleep(retries); /* Proportional back off */
goto retry;
}
if (rc != CS_OK) {
crm_err("Sending nodeid request: FAILED (rc=%d): %s", rc, ais_error2text(rc));
return FALSE;
} else if (answer.header.error != CS_OK) {
crm_err("Bad response from peer: (rc=%d): %s", rc, ais_error2text(rc));
return FALSE;
}
crm_info("Server details: id=%u uname=%s cname=%s", answer.id, answer.uname, answer.cname);
*id = answer.id;
*uname = strdup(answer.uname);
ais_cluster_name = strdup(answer.cname);
return TRUE;
}
gboolean
crm_get_cluster_name(char **cname)
{
CRM_CHECK(cname != NULL, return FALSE);
if (ais_cluster_name) {
*cname = strdup(ais_cluster_name);
return TRUE;
}
return FALSE;
}
gboolean
send_ais_text(int class, const char *data,
- gboolean local, const char *node, enum crm_ais_msg_types dest)
+ gboolean local, crm_node_t *node, enum crm_ais_msg_types dest)
{
static int msg_id = 0;
static int local_pid = 0;
enum cluster_type_e cluster_type = get_cluster_type();
int retries = 0;
int rc = CS_OK;
int buf_len = sizeof(cs_ipc_header_response_t);
char *buf = NULL;
struct iovec iov;
const char *transport = "pcmk";
cs_ipc_header_response_t *header = NULL;
AIS_Message *ais_msg = NULL;
enum crm_ais_msg_types sender = text2msg_type(crm_system_name);
/* There are only 6 handlers registered to crm_lib_service in plugin.c */
CRM_CHECK(class < 6, crm_err("Invalid message class: %d", class); return FALSE);
if (data == NULL) {
data = "";
}
if (local_pid == 0) {
local_pid = getpid();
}
if (sender == crm_msg_none) {
sender = local_pid;
}
ais_msg = calloc(1, sizeof(AIS_Message));
ais_msg->id = msg_id++;
ais_msg->header.id = class;
ais_msg->header.error = CS_OK;
ais_msg->host.type = dest;
ais_msg->host.local = local;
- if (node) {
- ais_msg->host.size = strlen(node);
- memset(ais_msg->host.uname, 0, MAX_NAME);
- memcpy(ais_msg->host.uname, node, ais_msg->host.size);
- ais_msg->host.id = 0;
- } else {
- ais_msg->host.size = 0;
- memset(ais_msg->host.uname, 0, MAX_NAME);
- ais_msg->host.id = 0;
+ if (node) {
+ if (node->uname) {
+ ais_msg->host.size = strlen(node->uname);
+ memset(ais_msg->host.uname, 0, MAX_NAME);
+ memcpy(ais_msg->host.uname, node->uname, ais_msg->host.size);
+ }
+ ais_msg->host.id = node->id;
}
ais_msg->sender.id = 0;
ais_msg->sender.type = sender;
ais_msg->sender.pid = local_pid;
ais_msg->sender.size = pcmk_uname_len;
memset(ais_msg->sender.uname, 0, MAX_NAME);
memcpy(ais_msg->sender.uname, pcmk_uname, ais_msg->sender.size);
ais_msg->size = 1 + strlen(data);
if (ais_msg->size < CRM_BZ2_THRESHOLD) {
failback:
ais_msg = realloc(ais_msg, sizeof(AIS_Message) + ais_msg->size);
memcpy(ais_msg->data, data, ais_msg->size);
} else {
char *compressed = NULL;
char *uncompressed = strdup(data);
unsigned int len = (ais_msg->size * 1.1) + 600; /* recomended size */
crm_trace("Compressing message payload");
compressed = malloc( len);
rc = BZ2_bzBuffToBuffCompress(compressed, &len, uncompressed, ais_msg->size, CRM_BZ2_BLOCKS,
0, CRM_BZ2_WORK);
free(uncompressed);
if (rc != BZ_OK) {
crm_err("Compression failed: %d", rc);
free(compressed);
goto failback;
}
ais_msg = realloc(ais_msg, sizeof(AIS_Message) + len + 1);
memcpy(ais_msg->data, compressed, len);
ais_msg->data[len] = 0;
free(compressed);
ais_msg->is_compressed = TRUE;
ais_msg->compressed_size = len;
crm_trace("Compression details: %d -> %d", ais_msg->size, ais_data_len(ais_msg));
}
ais_msg->header.size = sizeof(AIS_Message) + ais_data_len(ais_msg);
crm_trace("Sending%s message %d to %s.%s (data=%d, total=%d)",
ais_msg->is_compressed ? " compressed" : "",
ais_msg->id, ais_dest(&(ais_msg->host)), msg_type2text(dest),
ais_data_len(ais_msg), ais_msg->header.size);
iov.iov_base = ais_msg;
iov.iov_len = ais_msg->header.size;
buf = realloc(buf, buf_len);
do {
if (rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) {
retries++;
crm_info("Peer overloaded or membership in flux:"
" Re-sending message (Attempt %d of 20)", retries);
sleep(retries); /* Proportional back off */
}
errno = 0;
switch (cluster_type) {
case pcmk_cluster_corosync:
CRM_ASSERT(FALSE/*Not supported here*/);
break;
case pcmk_cluster_classic_ais:
rc = coroipcc_msg_send_reply_receive(ais_ipc_handle, &iov, 1, buf, buf_len);
header = (cs_ipc_header_response_t *) buf;
if (rc == CS_OK) {
CRM_CHECK(header->size == sizeof(cs_ipc_header_response_t),
crm_err("Odd message: id=%d, size=%d, class=%d, error=%d",
header->id, header->size, class, header->error));
CRM_ASSERT(buf_len >= header->size);
CRM_CHECK(header->id == CRM_MESSAGE_IPC_ACK,
crm_err("Bad response id (%d) for request (%d)", header->id,
ais_msg->header.id));
CRM_CHECK(header->error == CS_OK, rc = header->error);
}
break;
case pcmk_cluster_cman:
transport = "cpg";
CRM_CHECK(dest != crm_msg_ais, rc = CS_ERR_MESSAGE_ERROR; goto bail);
rc = cpg_mcast_joined(pcmk_cpg_handle, CPG_TYPE_AGREED, &iov, 1);
if (rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) {
cpg_flow_control_state_t fc_state = CPG_FLOW_CONTROL_DISABLED;
int rc2 = cpg_flow_control_state_get(pcmk_cpg_handle, &fc_state);
if (rc2 == CS_OK && fc_state == CPG_FLOW_CONTROL_ENABLED) {
crm_warn("Connection overloaded, cannot send messages");
goto bail;
} else if (rc2 != CS_OK) {
crm_warn("Could not determin the connection state: %s (%d)",
ais_error2text(rc2), rc2);
goto bail;
}
}
break;
case pcmk_cluster_unknown:
case pcmk_cluster_invalid:
case pcmk_cluster_heartbeat:
CRM_ASSERT(is_openais_cluster());
break;
}
} while ((rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) && retries < 20);
bail:
if (rc != CS_OK) {
crm_perror(LOG_ERR, "Sending message %d via %s: FAILED (rc=%d): %s",
ais_msg->id, transport, rc, ais_error2text(rc));
} else {
crm_trace("Message %d: sent", ais_msg->id);
}
free(buf);
free(ais_msg);
return (rc == CS_OK);
}
gboolean
-send_ais_message(xmlNode * msg, gboolean local, const char *node, enum crm_ais_msg_types dest)
+send_ais_message(xmlNode * msg, gboolean local, crm_node_t *node, enum crm_ais_msg_types dest)
{
gboolean rc = TRUE;
char *data = NULL;
if (is_classic_ais_cluster()) {
if (ais_fd_async < 0) {
crm_err("Not connected to AIS: %d", ais_fd_async);
return FALSE;
}
}
data = dump_xml_unformatted(msg);
rc = send_ais_text(crm_class_cluster, data, local, node, dest);
free(data);
return rc;
}
void
terminate_cs_connection(void)
{
crm_notice("Disconnecting from Corosync");
if (is_classic_ais_cluster()) {
if(ais_ipc_handle) {
crm_trace("Disconnecting plugin");
coroipcc_service_disconnect(ais_ipc_handle);
ais_ipc_handle = 0;
} else {
crm_info("No plugin connection");
}
} else {
if(pcmk_cpg_handle) {
crm_trace("Disconnecting CPG");
cpg_leave(pcmk_cpg_handle, &pcmk_cpg_group);
cpg_finalize(pcmk_cpg_handle);
pcmk_cpg_handle = 0;
} else {
crm_info("No CPG connection");
}
}
# if SUPPORT_CMAN
if (is_cman_cluster()) {
if(pcmk_cman_handle) {
crm_trace("Disconnecting cman");
cman_stop_notification(pcmk_cman_handle);
cman_finish(pcmk_cman_handle);
} else {
crm_info("No cman connection");
}
}
# endif
ais_fd_async = -1;
ais_fd_sync = -1;
}
static crm_node_t *
crm_update_ais_node(xmlNode * member, long long seq)
{
const char *id_s = crm_element_value(member, "id");
const char *addr = crm_element_value(member, "addr");
const char *uname = crm_element_value(member, "uname");
const char *state = crm_element_value(member, "state");
const char *born_s = crm_element_value(member, "born");
const char *seen_s = crm_element_value(member, "seen");
const char *votes_s = crm_element_value(member, "votes");
const char *procs_s = crm_element_value(member, "processes");
int votes = crm_int_helper(votes_s, NULL);
unsigned int id = crm_int_helper(id_s, NULL);
unsigned int procs = crm_int_helper(procs_s, NULL);
/* TODO: These values will contain garbage if version < 0.7.1 */
uint64_t born = crm_int_helper(born_s, NULL);
uint64_t seen = crm_int_helper(seen_s, NULL);
return crm_update_peer(__FUNCTION__, id, born, seen, votes, procs, uname, uname, addr, state);
}
static gboolean
ais_dispatch_message(AIS_Message * msg, gboolean(*dispatch) (int kind, const char *from, const char *data))
{
char *data = NULL;
char *uncompressed = NULL;
xmlNode *xml = NULL;
CRM_ASSERT(msg != NULL);
crm_trace("Got new%s message (size=%d, %d, %d)",
msg->is_compressed ? " compressed" : "",
ais_data_len(msg), msg->size, msg->compressed_size);
data = msg->data;
if (msg->is_compressed && msg->size > 0) {
int rc = BZ_OK;
unsigned int new_size = msg->size + 1;
if (check_message_sanity(msg, NULL) == FALSE) {
goto badmsg;
}
crm_trace("Decompressing message data");
uncompressed = calloc(1, new_size);
rc = BZ2_bzBuffToBuffDecompress(uncompressed, &new_size, data, msg->compressed_size, 1, 0);
if (rc != BZ_OK) {
crm_err("Decompression failed: %d", rc);
goto badmsg;
}
CRM_ASSERT(rc == BZ_OK);
CRM_ASSERT(new_size == msg->size);
data = uncompressed;
} else if (check_message_sanity(msg, data) == FALSE) {
goto badmsg;
} else if (safe_str_eq("identify", data)) {
int pid = getpid();
char *pid_s = crm_itoa(pid);
send_ais_text(crm_class_cluster, pid_s, TRUE, NULL, crm_msg_ais);
free(pid_s);
goto done;
}
if (msg->header.id != crm_class_members) {
crm_get_peer(msg->sender.id, msg->sender.uname);
}
if (msg->header.id == crm_class_rmpeer) {
uint32_t id = crm_int_helper(data, NULL);
crm_info("Removing peer %s/%u", data, id);
reap_crm_member(id);
goto done;
} else if (is_classic_ais_cluster()) {
if (msg->header.id == crm_class_members || msg->header.id == crm_class_quorum) {
xmlNode *node = NULL;
const char *value = NULL;
gboolean quorate = FALSE;
xml = string2xml(data);
if (xml == NULL) {
crm_err("Invalid membership update: %s", data);
goto badmsg;
}
value = crm_element_value(xml, "quorate");
CRM_CHECK(value != NULL, crm_log_xml_err(xml, "No quorum value:"); goto badmsg);
if (crm_is_true(value)) {
quorate = TRUE;
}
value = crm_element_value(xml, "id");
CRM_CHECK(value != NULL, crm_log_xml_err(xml, "No membership id"); goto badmsg);
crm_peer_seq = crm_int_helper(value, NULL);
if (quorate != crm_have_quorum) {
crm_notice("Membership %s: quorum %s", value, quorate ? "acquired" : "lost");
crm_have_quorum = quorate;
} else {
crm_info("Membership %s: quorum %s", value, quorate ? "retained" : "still lost");
}
for (node = __xml_first_child(xml); node != NULL; node = __xml_next(node)) {
crm_update_ais_node(node, crm_peer_seq);
}
}
}
crm_trace("Payload: %s", data);
if (dispatch != NULL) {
dispatch(msg->header.id, msg->sender.uname, data);
}
done:
free(uncompressed);
free_xml(xml);
return TRUE;
badmsg:
crm_err("Invalid message (id=%d, dest=%s:%s, from=%s:%s.%d):"
" min=%d, total=%d, size=%d, bz2_size=%d",
msg->id, ais_dest(&(msg->host)), msg_type2text(msg->host.type),
ais_dest(&(msg->sender)), msg_type2text(msg->sender.type),
msg->sender.pid, (int)sizeof(AIS_Message),
msg->header.size, msg->size, msg->compressed_size);
goto done;
}
int
ais_dispatch(gpointer user_data)
{
int rc = CS_OK;
gboolean good = TRUE;
gboolean(*dispatch) (int kind, const char *from, const char *data) = user_data;
do {
char *buffer = NULL;
rc = coroipcc_dispatch_get(ais_ipc_handle, (void **)&buffer, 0);
if (rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) {
return 0;
}
if (rc != CS_OK) {
crm_perror(LOG_ERR, "Receiving message body failed: (%d) %s", rc, ais_error2text(rc));
return -1;
}
if (buffer == NULL) {
/* NULL is a legal "no message afterall" value */
return 0;
}
good = ais_dispatch_message((AIS_Message *) buffer, dispatch);
coroipcc_dispatch_put(ais_ipc_handle);
} while (good && ais_ipc_handle);
if(good) {
return 0;
}
return -1;
}
static void
ais_destroy(gpointer user_data)
{
crm_err("AIS connection terminated");
ais_fd_sync = -1;
exit(1);
}
# if SUPPORT_CMAN
static int
pcmk_cman_dispatch(gpointer user_data)
{
int rc = cman_dispatch(pcmk_cman_handle, CMAN_DISPATCH_ALL);
if (rc < 0) {
crm_err("Connection to cman failed: %d", rc);
return FALSE;
}
return TRUE;
}
# define MAX_NODES 256
static void
cman_event_callback(cman_handle_t handle, void *privdata, int reason, int arg)
{
int rc = 0, lpc = 0, node_count = 0;
cman_cluster_t cluster;
static cman_node_t cman_nodes[MAX_NODES];
gboolean(*dispatch) (unsigned long long, gboolean) = privdata;
switch (reason) {
case CMAN_REASON_STATECHANGE:
memset(&cluster, 0, sizeof(cluster));
rc = cman_get_cluster(pcmk_cman_handle, &cluster);
if (rc < 0) {
crm_err("Couldn't query cman cluster details: %d %d", rc, errno);
return;
}
crm_peer_seq = cluster.ci_generation;
if (arg != crm_have_quorum) {
crm_notice("Membership %llu: quorum %s", crm_peer_seq, arg ? "acquired" : "lost");
crm_have_quorum = arg;
} else {
crm_info("Membership %llu: quorum %s", crm_peer_seq,
arg ? "retained" : "still lost");
}
rc = cman_get_nodes(pcmk_cman_handle, MAX_NODES, &node_count, cman_nodes);
if (rc < 0) {
crm_err("Couldn't query cman node list: %d %d", rc, errno);
return;
}
for (lpc = 0; lpc < node_count; lpc++) {
if (cman_nodes[lpc].cn_nodeid == 0) {
/* Never allow node ID 0 to be considered a member #315711 */
cman_nodes[lpc].cn_member = 0;
}
crm_update_peer(__FUNCTION__, cman_nodes[lpc].cn_nodeid, cman_nodes[lpc].cn_incarnation,
cman_nodes[lpc].cn_member ? crm_peer_seq : 0, 0, 0,
cman_nodes[lpc].cn_name, cman_nodes[lpc].cn_name, NULL,
cman_nodes[lpc].cn_member ? CRM_NODE_MEMBER : CRM_NODE_LOST);
}
if (dispatch) {
dispatch(crm_peer_seq, crm_have_quorum);
}
break;
case CMAN_REASON_TRY_SHUTDOWN:
/* Always reply with a negative - pacemaker needs to be stopped first */
crm_info("CMAN wants to shut down: %s", arg ? "forced" : "optional");
cman_replyto_shutdown(pcmk_cman_handle, 0);
break;
case CMAN_REASON_CONFIG_UPDATE:
/* Ignore */
break;
}
}
# endif
gboolean
init_cman_connection(gboolean(*dispatch) (unsigned long long, gboolean), void (*destroy) (gpointer))
{
# if SUPPORT_CMAN
int rc = -1, fd = -1;
cman_cluster_t cluster;
struct mainloop_fd_callbacks cman_fd_callbacks = {
.dispatch = pcmk_cman_dispatch,
.destroy = destroy,
};
crm_info("Configuring Pacemaker to obtain quorum from cman");
memset(&cluster, 0, sizeof(cluster));
pcmk_cman_handle = cman_init(dispatch);
if (pcmk_cman_handle == NULL || cman_is_active(pcmk_cman_handle) == FALSE) {
crm_err("Couldn't connect to cman");
goto cman_bail;
}
rc = cman_get_cluster(pcmk_cman_handle, &cluster);
if (rc < 0) {
crm_err("Couldn't query cman cluster details: %d %d", rc, errno);
goto cman_bail;
}
ais_cluster_name = strdup(cluster.ci_name);
rc = cman_start_notification(pcmk_cman_handle, cman_event_callback);
if (rc < 0) {
crm_err("Couldn't register for cman notifications: %d %d", rc, errno);
goto cman_bail;
}
/* Get the current membership state */
cman_event_callback(pcmk_cman_handle, dispatch, CMAN_REASON_STATECHANGE,
cman_is_quorate(pcmk_cman_handle));
fd = cman_get_fd(pcmk_cman_handle);
mainloop_add_fd("cman", G_PRIORITY_MEDIUM, fd, dispatch, &cman_fd_callbacks);
cman_bail:
if (rc < 0) {
cman_finish(pcmk_cman_handle);
return FALSE;
}
# else
crm_err("cman qorum is not supported in this build");
exit(100);
# endif
return TRUE;
}
# ifdef SUPPORT_COROSYNC
gboolean(*pcmk_cpg_dispatch_fn) (int kind, const char *from, const char *data) = NULL;
static int
pcmk_cpg_dispatch(gpointer user_data)
{
int rc = 0;
pcmk_cpg_dispatch_fn = user_data;
rc = cpg_dispatch(pcmk_cpg_handle, CS_DISPATCH_ALL);
if (rc != CS_OK) {
crm_err("Connection to the CPG API failed: %d", rc);
return -1;
}
return 0;
}
static void
pcmk_cpg_deliver(cpg_handle_t handle,
const struct cpg_name *groupName,
uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
{
AIS_Message *ais_msg = (AIS_Message *) msg;
if (ais_msg->sender.id > 0 && ais_msg->sender.id != nodeid) {
crm_err("Nodeid mismatch from %d.%d: claimed nodeid=%u", nodeid, pid, ais_msg->sender.id);
return;
} else if (ais_msg->host.size != 0 && safe_str_neq(ais_msg->host.uname, pcmk_uname)) {
/* Not for us */
return;
+ } else if (ais_msg->host.id != 0 && (pcmk_nodeid != ais_msg->host.id)) {
+ /* Not for us */
+ return;
}
ais_msg->sender.id = nodeid;
if (ais_msg->sender.size == 0) {
crm_node_t *peer = crm_get_peer(nodeid, NULL);
if (peer == NULL) {
crm_err("Peer with nodeid=%u is unknown", nodeid);
} else if (peer->uname == NULL) {
crm_err("No uname for peer with nodeid=%u", nodeid);
} else {
crm_notice("Fixing uname for peer with nodeid=%u", nodeid);
ais_msg->sender.size = strlen(peer->uname);
memset(ais_msg->sender.uname, 0, MAX_NAME);
memcpy(ais_msg->sender.uname, peer->uname, ais_msg->sender.size);
}
}
ais_dispatch_message(ais_msg, pcmk_cpg_dispatch_fn);
}
static void
pcmk_cpg_membership(cpg_handle_t handle,
const struct cpg_name *groupName,
const struct cpg_address *member_list, size_t member_list_entries,
const struct cpg_address *left_list, size_t left_list_entries,
const struct cpg_address *joined_list, size_t joined_list_entries)
{
int i;
for (i = 0; i < member_list_entries; i++) {
crm_node_t *peer = crm_get_peer(member_list[i].nodeid, NULL);
crm_debug("Member[%d] %d ", i, member_list[i].nodeid);
crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, ONLINESTATUS);
}
for (i = 0; i < left_list_entries; i++) {
crm_node_t *peer = crm_get_peer(left_list[i].nodeid, NULL);
crm_debug("Left[%d] %d ", i, left_list[i].nodeid);
crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, OFFLINESTATUS);
}
}
cpg_callbacks_t cpg_callbacks = {
.cpg_deliver_fn = pcmk_cpg_deliver,
.cpg_confchg_fn = pcmk_cpg_membership,
};
# endif
static gboolean
init_cpg_connection(crm_cluster_t *cluster)
{
# ifdef SUPPORT_COROSYNC
int rc = -1;
int fd = 0;
int retries = 0;
crm_node_t *peer = NULL;
struct mainloop_fd_callbacks cpg_fd_callbacks = {
.dispatch = pcmk_cpg_dispatch,
.destroy = cluster->destroy,
};
strcpy(pcmk_cpg_group.value, crm_system_name);
pcmk_cpg_group.length = strlen(crm_system_name) + 1;
cs_repeat(retries, 30, rc = cpg_initialize(&pcmk_cpg_handle, &cpg_callbacks));
if (rc != CS_OK) {
crm_err("Could not connect to the Cluster Process Group API: %d\n", rc);
goto bail;
}
retries = 0;
cs_repeat(retries, 30, rc = cpg_local_get(pcmk_cpg_handle, (unsigned int *)&cluster->nodeid));
if (rc != CS_OK) {
crm_err("Could not get local node id from the CPG API");
goto bail;
}
retries = 0;
cs_repeat(retries, 30, rc = cpg_join(pcmk_cpg_handle, &pcmk_cpg_group));
if (rc != CS_OK) {
crm_err("Could not join the CPG group '%s': %d", crm_system_name, rc);
goto bail;
}
rc = cpg_fd_get(pcmk_cpg_handle, &fd);
if (rc != CS_OK) {
crm_err("Could not obtain the CPG API connection: %d\n", rc);
goto bail;
}
mainloop_add_fd("corosync-cpg", G_PRIORITY_MEDIUM, fd, cluster->cs_dispatch, &cpg_fd_callbacks);
bail:
if (rc != CS_OK) {
cpg_finalize(pcmk_cpg_handle);
return FALSE;
}
peer = crm_get_peer(cluster->nodeid, pcmk_uname);
crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, ONLINESTATUS);
# else
crm_err("The Corosync CPG API is not supported in this build");
exit(100);
# endif
return TRUE;
}
gboolean
init_quorum_connection(gboolean(*dispatch) (unsigned long long, gboolean),
void (*destroy) (gpointer))
{
crm_err("The Corosync quorum API is not supported in this build");
exit(100);
return TRUE;
}
static gboolean
init_cs_connection_classic(crm_cluster_t *cluster)
{
int rc;
int pid = 0;
char *pid_s = NULL;
struct utsname name;
struct mainloop_fd_callbacks ais_fd_callbacks = {
.dispatch = ais_dispatch,
.destroy = cluster->destroy,
};
crm_info("Creating connection to our Corosync plugin");
rc = coroipcc_service_connect(COROSYNC_SOCKET_NAME, PCMK_SERVICE_ID,
AIS_IPC_MESSAGE_SIZE, AIS_IPC_MESSAGE_SIZE, AIS_IPC_MESSAGE_SIZE,
&ais_ipc_handle);
if (ais_ipc_handle) {
coroipcc_fd_get(ais_ipc_handle, &ais_fd_async);
} else {
crm_info("Connection to our AIS plugin (%d) failed: %s (%d)",
PCMK_SERVICE_ID, strerror(errno), errno);
return FALSE;
}
if (ais_fd_async <= 0 && rc == CS_OK) {
crm_err("No context created, but connection reported 'ok'");
rc = CS_ERR_LIBRARY;
}
if (rc != CS_OK) {
crm_info("Connection to our AIS plugin (%d) failed: %s (%d)", PCMK_SERVICE_ID,
ais_error2text(rc), rc);
}
if (rc != CS_OK) {
return FALSE;
}
if (ais_fd_callbacks.destroy == NULL) {
ais_fd_callbacks.destroy = ais_destroy;
}
mainloop_add_fd("corosync-plugin", G_PRIORITY_MEDIUM, ais_fd_async, cluster->cs_dispatch, &ais_fd_callbacks);
crm_info("AIS connection established");
pid = getpid();
pid_s = crm_itoa(pid);
send_ais_text(crm_class_cluster, pid_s, TRUE, NULL, crm_msg_ais);
free(pid_s);
if (uname(&name) < 0) {
crm_perror(LOG_ERR, "Could not determin the current host");
exit(100);
}
get_ais_nodeid(&pcmk_nodeid, &pcmk_uname);
if (safe_str_neq(name.nodename, pcmk_uname)) {
crm_crit("Node name mismatch! Corosync supplied %s, our lookup returned %s",
pcmk_uname, name.nodename);
crm_notice
("Node name mismatches usually occur when assigned automatically by DHCP servers");
crm_notice("If this node was part of the cluster with a different name,"
" you will need to remove the old entry with crm_node --remove");
}
return TRUE;
}
static int
pcmk_mcp_dispatch(const char *buffer, ssize_t length, gpointer userdata)
{
xmlNode *msg = string2xml(buffer);
if (msg && is_classic_ais_cluster()) {
xmlNode *node = NULL;
for (node = __xml_first_child(msg); node != NULL; node = __xml_next(node)) {
int id = 0;
int children = 0;
const char *uname = crm_element_value(node, "uname");
crm_element_value_int(node, "id", &id);
crm_element_value_int(node, "processes", &children);
if (id == 0) {
crm_log_xml_err(msg, "Bad Update");
} else {
crm_node_t *peer = crm_get_peer(id, uname);
crm_update_peer_proc(__FUNCTION__, peer, children, NULL);
}
}
}
free_xml(msg);
return 0;
}
static void
pcmk_mcp_destroy(gpointer user_data)
{
void (*callback)(gpointer data) = user_data;
if(callback) {
callback(NULL);
}
}
gboolean
init_cs_connection(crm_cluster_t *cluster)
{
int retries = 0;
static struct ipc_client_callbacks mcp_callbacks =
{
.dispatch = pcmk_mcp_dispatch,
.destroy = pcmk_mcp_destroy
};
while (retries < 5) {
int rc = init_cs_connection_once(cluster);
retries++;
switch (rc) {
case CS_OK:
if (getenv("HA_mcp")) {
xmlNode *poke = create_xml_node(NULL, "poke");
mainloop_io_t *ipc = mainloop_add_ipc_client(CRM_SYSTEM_MCP, G_PRIORITY_MEDIUM, 0, cluster->destroy, &mcp_callbacks);
crm_ipc_send(mainloop_get_ipc_client(ipc), poke, 0, 0, NULL);
free_xml(poke);
}
return TRUE;
break;
case CS_ERR_TRY_AGAIN:
case CS_ERR_QUEUE_FULL:
sleep(retries);
break;
default:
return FALSE;
}
}
crm_err("Retry count exceeded: %d", retries);
return FALSE;
}
static char *
get_local_node_name(void)
{
char *name = NULL;
struct utsname res;
if (is_cman_cluster()) {
# if SUPPORT_CMAN
cman_node_t us;
cman_handle_t cman;
cman = cman_init(NULL);
if (cman != NULL && cman_is_active(cman)) {
us.cn_name[0] = 0;
cman_get_node(cman, CMAN_NODEID_US, &us);
name = strdup(us.cn_name);
crm_info("Using CMAN node name: %s", name);
} else {
crm_err("Couldn't determin node name from CMAN");
}
cman_finish(cman);
# endif
} else if (uname(&res) < 0) {
crm_perror(LOG_ERR, "Could not determin the current host");
exit(100);
} else {
name = strdup(res.nodename);
}
return name;
}
extern int set_cluster_type(enum cluster_type_e type);
gboolean
init_cs_connection_once(crm_cluster_t *cluster)
{
enum cluster_type_e stack = get_cluster_type();
crm_peer_init();
/* Here we just initialize comms */
switch (stack) {
case pcmk_cluster_classic_ais:
if (init_cs_connection_classic(cluster) == FALSE) {
return FALSE;
}
break;
case pcmk_cluster_cman:
if (init_cpg_connection(cluster) == FALSE) {
return FALSE;
}
pcmk_uname = get_local_node_name();
break;
case pcmk_cluster_heartbeat:
crm_info("Could not find an active corosync based cluster");
return FALSE;
break;
default:
crm_err("Invalid cluster type: %s (%d)", name_for_cluster_type(stack), stack);
return FALSE;
break;
}
crm_info("Connection to '%s': established", name_for_cluster_type(stack));
CRM_ASSERT(pcmk_uname != NULL);
pcmk_uname_len = strlen(pcmk_uname);
pcmk_nodeid = cluster->nodeid;
if (pcmk_nodeid != 0) {
/* Ensure the local node always exists */
crm_get_peer(pcmk_nodeid, pcmk_uname);
}
cluster->uuid = get_corosync_uuid(pcmk_nodeid, pcmk_uname);
cluster->uname = strdup(pcmk_uname);
return TRUE;
}
gboolean
check_message_sanity(const AIS_Message * msg, const char *data)
{
gboolean sane = TRUE;
gboolean repaired = FALSE;
int dest = msg->host.type;
int tmp_size = msg->header.size - sizeof(AIS_Message);
if (sane && msg->header.size == 0) {
crm_warn("Message with no size");
sane = FALSE;
}
if (sane && msg->header.error != CS_OK) {
crm_warn("Message header contains an error: %d", msg->header.error);
sane = FALSE;
}
if (sane && ais_data_len(msg) != tmp_size) {
crm_warn("Message payload size is incorrect: expected %d, got %d", ais_data_len(msg),
tmp_size);
sane = TRUE;
}
if (sane && ais_data_len(msg) == 0) {
crm_warn("Message with no payload");
sane = FALSE;
}
if (sane && data && msg->is_compressed == FALSE) {
int str_size = strlen(data) + 1;
if (ais_data_len(msg) != str_size) {
int lpc = 0;
crm_warn("Message payload is corrupted: expected %d bytes, got %d",
ais_data_len(msg), str_size);
sane = FALSE;
for (lpc = (str_size - 10); lpc < msg->size; lpc++) {
if (lpc < 0) {
lpc = 0;
}
crm_debug("bad_data[%d]: %d / '%c'", lpc, data[lpc], data[lpc]);
}
}
}
if (sane == FALSE) {
crm_err("Invalid message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)",
msg->id, ais_dest(&(msg->host)), msg_type2text(dest),
ais_dest(&(msg->sender)), msg_type2text(msg->sender.type),
msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size);
} else if (repaired) {
crm_err
("Repaired message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)",
msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)),
msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed,
ais_data_len(msg), msg->header.size);
} else {
crm_trace
("Verfied message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)",
msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)),
msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed,
ais_data_len(msg), msg->header.size);
}
return sane;
}
#endif
static int
get_config_opt(confdb_handle_t config,
hdb_handle_t object_handle, const char *key, char **value, const char *fallback)
{
size_t len = 0;
char *env_key = NULL;
const char *env_value = NULL;
char buffer[256];
if (*value) {
free(*value);
*value = NULL;
}
if (object_handle > 0) {
if (CS_OK == confdb_key_get(config, object_handle, key, strlen(key), &buffer, &len)) {
*value = strdup(buffer);
}
}
if (*value) {
crm_info("Found '%s' for option: %s", *value, key);
return 0;
}
env_key = crm_concat("HA", key, '_');
env_value = getenv(env_key);
free(env_key);
if (*value) {
crm_info("Found '%s' in ENV for option: %s", *value, key);
*value = strdup(env_value);
return 0;
}
if (fallback) {
crm_info("Defaulting to '%s' for option: %s", fallback, key);
*value = strdup(fallback);
} else {
crm_info("No default for option: %s", key);
}
return -1;
}
static confdb_handle_t
config_find_init(confdb_handle_t config)
{
cs_error_t rc = CS_OK;
confdb_handle_t local_handle = OBJECT_PARENT_HANDLE;
rc = confdb_object_find_start(config, local_handle);
if (rc == CS_OK) {
return local_handle;
} else {
crm_err("Couldn't create search context: %d", rc);
}
return 0;
}
static hdb_handle_t
config_find_next(confdb_handle_t config, const char *name, confdb_handle_t top_handle)
{
cs_error_t rc = CS_OK;
hdb_handle_t local_handle = 0;
if (top_handle == 0) {
crm_err("Couldn't search for %s: no valid context", name);
return 0;
}
crm_trace("Searching for %s in " HDB_X_FORMAT, name, top_handle);
rc = confdb_object_find(config, top_handle, name, strlen(name), &local_handle);
if (rc != CS_OK) {
crm_info("No additional configuration supplied for: %s", name);
local_handle = 0;
} else {
crm_info("Processing additional %s options...", name);
}
return local_handle;
}
enum cluster_type_e
find_corosync_variant(void)
{
confdb_handle_t config;
enum cluster_type_e found = pcmk_cluster_unknown;
int rc;
char *value = NULL;
confdb_handle_t top_handle = 0;
hdb_handle_t local_handle = 0;
static confdb_callbacks_t callbacks = { };
rc = confdb_initialize(&config, &callbacks);
if (rc != CS_OK) {
crm_debug("Could not initialize Cluster Configuration Database API instance error %d", rc);
return found;
}
top_handle = config_find_init(config);
local_handle = config_find_next(config, "service", top_handle);
while (local_handle) {
get_config_opt(config, local_handle, "name", &value, NULL);
if (safe_str_eq("pacemaker", value)) {
found = pcmk_cluster_classic_ais;
get_config_opt(config, local_handle, "ver", &value, "0");
crm_trace("Found Pacemaker plugin version: %s", value);
break;
}
local_handle = config_find_next(config, "service", top_handle);
}
if (found == pcmk_cluster_unknown) {
top_handle = config_find_init(config);
local_handle = config_find_next(config, "quorum", top_handle);
get_config_opt(config, local_handle, "provider", &value, NULL);
if (safe_str_eq("quorum_cman", value)) {
crm_trace("Found CMAN quorum provider");
found = pcmk_cluster_cman;
}
}
free(value);
confdb_finalize(config);
return found;
}
gboolean
crm_is_corosync_peer_active(const crm_node_t * node)
{
enum crm_proc_flag proc = crm_proc_none;
if (node == NULL) {
crm_trace("NULL");
return FALSE;
} else if(safe_str_neq(node->state, CRM_NODE_MEMBER)) {
crm_trace("%s: state=%s", node->uname, node->state);
return FALSE;
} else if(is_cman_cluster() && (node->processes & crm_proc_cpg)) {
/* If we can still talk to our peer process on that node,
* then its also part of the corosync membership
*/
crm_trace("%s: processes=%.16x", node->uname, node->processes);
return TRUE;
} else if(is_classic_ais_cluster() && (node->processes & crm_proc_plugin) == 0) {
crm_trace("%s: processes=%.16x", node->uname, node->processes);
return FALSE;
}
proc = text2proc(crm_system_name);
if(proc != crm_proc_none && (node->processes & proc) == 0) {
crm_trace("%s: proc %.16x not in %.16x", node->uname, proc, node->processes);
return FALSE;
}
return TRUE;
}
diff --git a/tools/attrd.c b/tools/attrd.c
index 50ec78d02f..f6eec8dbf7 100644
--- a/tools/attrd.c
+++ b/tools/attrd.c
@@ -1,916 +1,916 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <grp.h>
#include <crm/crm.h>
#include <crm/cib/internal.h>
#include <crm/msg_xml.h>
#include <crm/common/ipc.h>
#include <crm/cluster/internal.h>
#include <crm/common/mainloop.h>
#include <crm/common/xml.h>
#include <crm/attrd.h>
#define OPTARGS "hV"
#if SUPPORT_HEARTBEAT
ll_cluster_t *attrd_cluster_conn;
#endif
GMainLoop *mainloop = NULL;
char *attrd_uname = NULL;
char *attrd_uuid = NULL;
gboolean need_shutdown = FALSE;
GHashTable *attr_hash = NULL;
cib_t *cib_conn = NULL;
typedef struct attrd_client_s {
char *user;
} attrd_client_t;
typedef struct attr_hash_entry_s {
char *uuid;
char *id;
char *set;
char *section;
char *value;
char *stored_value;
int timeout;
char *dampen;
guint timer_id;
char *user;
} attr_hash_entry_t;
void attrd_local_callback(xmlNode * msg);
gboolean attrd_timer_callback(void *user_data);
gboolean attrd_trigger_update(attr_hash_entry_t * hash_entry);
void attrd_perform_update(attr_hash_entry_t * hash_entry);
static void
free_hash_entry(gpointer data)
{
attr_hash_entry_t *entry = data;
if (entry == NULL) {
return;
}
free(entry->id);
free(entry->set);
free(entry->dampen);
free(entry->section);
free(entry->uuid);
free(entry->value);
free(entry->stored_value);
free(entry->user);
free(entry);
}
static int32_t
attrd_ipc_accept(qb_ipcs_connection_t *c, uid_t uid, gid_t gid)
{
attrd_client_t *new_client = NULL;
#if ENABLE_ACL
struct group *crm_grp = NULL;
#endif
crm_trace("Connecting %p for connection from %d by uid=%d gid=%d",
c, crm_ipcs_client_pid(c), uid, gid);
if (need_shutdown) {
crm_info("Ignoring connection request during shutdown");
return FALSE;
}
new_client = calloc(1, sizeof(attrd_client_t));
#if ENABLE_ACL
crm_grp = getgrnam(CRM_DAEMON_GROUP);
if (crm_grp) {
qb_ipcs_connection_auth_set(c, -1, crm_grp->gr_gid, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
}
new_client->user = uid2username(uid);
#endif
qb_ipcs_context_set(c, new_client);
return 0;
}
static void
attrd_ipc_created(qb_ipcs_connection_t *c)
{
crm_trace("Client %p connected from %d", c, crm_ipcs_client_pid(c));
}
/* Exit code means? */
static int32_t
attrd_ipc_dispatch(qb_ipcs_connection_t *c, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
#if ENABLE_ACL
attrd_client_t *client = qb_ipcs_context_get(c);
#endif
xmlNode *msg = crm_ipcs_recv(c, data, size, &id, &flags);
if(flags & crm_ipc_client_response) {
crm_trace("Ack'ing msg from %d (%p)", crm_ipcs_client_pid(c), c);
crm_ipcs_send_ack(c, id, "ack", __FUNCTION__, __LINE__);
}
if (msg == NULL) {
crm_debug("No msg from %d (%p)", crm_ipcs_client_pid(c), c);
return 0;
}
#if ENABLE_ACL
determine_request_user(client->user, msg, F_ATTRD_USER);
#endif
crm_trace("Processing msg from %d (%p)", crm_ipcs_client_pid(c), c);
crm_log_xml_trace(msg, __PRETTY_FUNCTION__);
attrd_local_callback(msg);
free_xml(msg);
return 0;
}
/* Error code means? */
static int32_t
attrd_ipc_closed(qb_ipcs_connection_t *c)
{
crm_trace("Connection %p from %d closed", c, crm_ipcs_client_pid(c));
return 0;
}
static void
attrd_ipc_destroy(qb_ipcs_connection_t *c)
{
attrd_client_t *client = qb_ipcs_context_get(c);
if (client == NULL) {
return;
}
crm_trace("Destroying %p", c);
free(client->user);
free(client);
crm_trace("Free'd the attrd client");
return;
}
struct qb_ipcs_service_handlers ipc_callbacks =
{
.connection_accept = attrd_ipc_accept,
.connection_created = attrd_ipc_created,
.msg_process = attrd_ipc_dispatch,
.connection_closed = attrd_ipc_closed,
.connection_destroyed = attrd_ipc_destroy
};
static void
attrd_shutdown(int nsig)
{
need_shutdown = TRUE;
crm_info("Exiting");
if (mainloop != NULL && g_main_is_running(mainloop)) {
g_main_quit(mainloop);
} else {
exit(0);
}
}
static void
usage(const char *cmd, int exit_status)
{
FILE *stream;
stream = exit_status ? stderr : stdout;
fprintf(stream, "usage: %s [-srkh] [-c configure file]\n", cmd);
/* fprintf(stream, "\t-d\tsets debug level\n"); */
/* fprintf(stream, "\t-s\tgets daemon status\n"); */
/* fprintf(stream, "\t-r\trestarts daemon\n"); */
/* fprintf(stream, "\t-k\tstops daemon\n"); */
/* fprintf(stream, "\t-h\thelp message\n"); */
fflush(stream);
exit(exit_status);
}
static void
stop_attrd_timer(attr_hash_entry_t * hash_entry)
{
if (hash_entry != NULL && hash_entry->timer_id != 0) {
crm_trace("Stopping %s timer", hash_entry->id);
g_source_remove(hash_entry->timer_id);
hash_entry->timer_id = 0;
}
}
static void
log_hash_entry(int level, attr_hash_entry_t * entry, const char *text)
{
do_crm_log(level, "%s: Set: %s, Name: %s, Value: %s, Timeout: %s",
text, entry->section, entry->id, entry->value, entry->dampen);
}
static attr_hash_entry_t *
find_hash_entry(xmlNode * msg)
{
const char *value = NULL;
const char *attr = crm_element_value(msg, F_ATTRD_ATTRIBUTE);
attr_hash_entry_t *hash_entry = NULL;
if (attr == NULL) {
crm_info("Ignoring message with no attribute name");
return NULL;
}
hash_entry = g_hash_table_lookup(attr_hash, attr);
if (hash_entry == NULL) {
/* create one and add it */
crm_info("Creating hash entry for %s", attr);
hash_entry = calloc(1, sizeof(attr_hash_entry_t));
hash_entry->id = strdup(attr);
g_hash_table_insert(attr_hash, hash_entry->id, hash_entry);
hash_entry = g_hash_table_lookup(attr_hash, attr);
CRM_CHECK(hash_entry != NULL, return NULL);
}
value = crm_element_value(msg, F_ATTRD_SET);
if (value != NULL) {
free(hash_entry->set);
hash_entry->set = strdup(value);
crm_debug("\t%s->set: %s", attr, value);
}
value = crm_element_value(msg, F_ATTRD_SECTION);
if (value == NULL) {
value = XML_CIB_TAG_STATUS;
}
free(hash_entry->section);
hash_entry->section = strdup(value);
crm_trace("\t%s->section: %s", attr, value);
value = crm_element_value(msg, F_ATTRD_DAMPEN);
if (value != NULL) {
free(hash_entry->dampen);
hash_entry->dampen = strdup(value);
hash_entry->timeout = crm_get_msec(value);
crm_trace("\t%s->timeout: %s", attr, value);
}
#if ENABLE_ACL
free(hash_entry->user);
value = crm_element_value(msg, F_ATTRD_USER);
if (value != NULL) {
hash_entry->user = strdup(value);
crm_trace("\t%s->user: %s", attr, value);
}
#endif
log_hash_entry(LOG_DEBUG_2, hash_entry, "Found (and updated) entry:");
return hash_entry;
}
#if SUPPORT_HEARTBEAT
static void
attrd_ha_connection_destroy(gpointer user_data)
{
crm_trace("Invoked");
if (need_shutdown) {
/* we signed out, so this is expected */
crm_info("Heartbeat disconnection complete");
return;
}
crm_crit("Lost connection to heartbeat service!");
if (mainloop != NULL && g_main_is_running(mainloop)) {
g_main_quit(mainloop);
return;
}
exit(EX_OK);
}
static void
attrd_ha_callback(HA_Message * msg, void *private_data)
{
attr_hash_entry_t *hash_entry = NULL;
xmlNode *xml = convert_ha_message(NULL, msg, __FUNCTION__);
const char *from = crm_element_value(xml, F_ORIG);
const char *op = crm_element_value(xml, F_ATTRD_TASK);
const char *host = crm_element_value(xml, F_ATTRD_HOST);
const char *ignore = crm_element_value(xml, F_ATTRD_IGNORE_LOCALLY);
if (host != NULL && safe_str_eq(host, attrd_uname)) {
crm_info("Update relayed from %s", from);
attrd_local_callback(xml);
} else if (ignore == NULL || safe_str_neq(from, attrd_uname)) {
crm_info("%s message from %s", op, from);
hash_entry = find_hash_entry(xml);
stop_attrd_timer(hash_entry);
attrd_perform_update(hash_entry);
}
free_xml(xml);
}
#endif
#if SUPPORT_COROSYNC
static gboolean
attrd_ais_dispatch(int kind, const char *from, const char *data)
{
xmlNode *xml = NULL;
if (kind == crm_class_cluster) {
xml = string2xml(data);
if (xml == NULL) {
crm_err("Bad message received: '%.120s'", data);
}
}
if (xml != NULL) {
attr_hash_entry_t *hash_entry = NULL;
const char *op = crm_element_value(xml, F_ATTRD_TASK);
const char *host = crm_element_value(xml, F_ATTRD_HOST);
const char *ignore = crm_element_value(xml, F_ATTRD_IGNORE_LOCALLY);
/* crm_xml_add_int(xml, F_SEQ, wrapper->id); */
crm_xml_add(xml, F_ORIG, from);
if (host != NULL && safe_str_eq(host, attrd_uname)) {
crm_notice("Update relayed from %s", from);
attrd_local_callback(xml);
} else if (ignore == NULL || safe_str_neq(from, attrd_uname)) {
crm_trace("%s message from %s", op, from);
hash_entry = find_hash_entry(xml);
stop_attrd_timer(hash_entry);
attrd_perform_update(hash_entry);
}
free_xml(xml);
}
return TRUE;
}
static void
attrd_ais_destroy(gpointer unused)
{
if (need_shutdown) {
/* we signed out, so this is expected */
crm_info("Corosync disconnection complete");
return;
}
crm_crit("Lost connection to Corosync service!");
if (mainloop != NULL && g_main_is_running(mainloop)) {
g_main_quit(mainloop);
return;
}
exit(EX_USAGE);
}
#endif
static void
attrd_cib_connection_destroy(gpointer user_data)
{
cib_t *conn = user_data;
conn->cmds->signoff(conn); /* Ensure IPC is cleaned up */
if (need_shutdown) {
crm_info("Connection to the CIB terminated...");
} else {
/* eventually this will trigger a reconnect, not a shutdown */
crm_err("Connection to the CIB terminated...");
exit(1);
}
return;
}
static void
update_for_hash_entry(gpointer key, gpointer value, gpointer user_data)
{
attr_hash_entry_t *entry = value;
if (entry->value != NULL) {
attrd_timer_callback(value);
}
}
static void
local_update_for_hash_entry(gpointer key, gpointer value, gpointer user_data)
{
attr_hash_entry_t *entry = value;
if (entry->timer_id == 0) {
crm_trace("Performing local-only update after replace for %s", entry->id);
attrd_perform_update(entry);
/* } else {
* just let the timer expire and attrd_timer_callback() will do the right thing
*/
}
}
static void
do_cib_replaced(const char *event, xmlNode * msg)
{
crm_info("Updating all attributes after %s event", event);
g_hash_table_foreach(attr_hash, local_update_for_hash_entry, NULL);
}
static gboolean
cib_connect(void *user_data)
{
static int attempts = 1;
static int max_retry = 20;
gboolean was_err = FALSE;
static cib_t *local_conn = NULL;
if (local_conn == NULL) {
local_conn = cib_new();
}
if (was_err == FALSE) {
int rc = -ENOTCONN;
if (attempts < max_retry) {
crm_debug("CIB signon attempt %d", attempts);
rc = local_conn->cmds->signon(local_conn, T_ATTRD, cib_command);
}
if (rc != pcmk_ok && attempts > max_retry) {
crm_err("Signon to CIB failed: %s", pcmk_strerror(rc));
was_err = TRUE;
} else if (rc != pcmk_ok) {
attempts++;
return TRUE;
}
}
crm_info("Connected to the CIB after %d signon attempts", attempts);
if (was_err == FALSE) {
int rc =
local_conn->cmds->set_connection_dnotify(local_conn, attrd_cib_connection_destroy);
if (rc != pcmk_ok) {
crm_err("Could not set dnotify callback");
was_err = TRUE;
}
}
if (was_err == FALSE) {
if (pcmk_ok !=
local_conn->cmds->add_notify_callback(local_conn, T_CIB_REPLACE_NOTIFY,
do_cib_replaced)) {
crm_err("Could not set CIB notification callback");
was_err = TRUE;
}
}
if (was_err) {
crm_err("Aborting startup");
exit(100);
}
cib_conn = local_conn;
crm_info("Sending full refresh now that we're connected to the cib");
g_hash_table_foreach(attr_hash, local_update_for_hash_entry, NULL);
return FALSE;
}
int
main(int argc, char **argv)
{
int flag = 0;
int argerr = 0;
crm_cluster_t cluster;
gboolean was_err = FALSE;
qb_ipcs_connection_t *c = NULL;
qb_ipcs_service_t *ipcs = NULL;
crm_log_init(T_ATTRD, LOG_NOTICE, TRUE, FALSE, argc, argv, FALSE);
mainloop_add_signal(SIGTERM, attrd_shutdown);
while ((flag = getopt(argc, argv, OPTARGS)) != EOF) {
switch (flag) {
case 'V':
crm_bump_log_level(argc, argv);
break;
case 'h': /* Help message */
usage(T_ATTRD, EX_OK);
break;
default:
++argerr;
break;
}
}
if (optind > argc) {
++argerr;
}
if (argerr) {
usage(T_ATTRD, EX_USAGE);
}
attr_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_hash_entry);
crm_info("Starting up");
if (was_err == FALSE) {
#if SUPPORT_COROSYNC
if (is_openais_cluster()) {
cluster.destroy = attrd_ais_destroy;
cluster.cs_dispatch = attrd_ais_dispatch;
}
#endif
#if SUPPORT_HEARTBEAT
if (is_heartbeat_cluster()) {
cluster.hb_dispatch = attrd_ha_callback;
cluster.destroy = attrd_ha_connection_destroy;
}
#endif
if (FALSE == crm_cluster_connect(&cluster)) {
crm_err("HA Signon failed");
was_err = TRUE;
}
attrd_uname = cluster.uname;
attrd_uuid = cluster.uuid;
#if SUPPORT_HEARTBEAT
attrd_cluster_conn = cluster.hb_conn;
#endif
}
crm_info("Cluster connection active");
if (was_err == FALSE) {
ipcs = mainloop_add_ipc_server(T_ATTRD, QB_IPC_NATIVE, &ipc_callbacks);
if (ipcs == NULL) {
crm_err("Could not start IPC server");
was_err = TRUE;
}
}
crm_info("Accepting attribute updates");
mainloop = g_main_new(FALSE);
if (0 == g_timeout_add_full(G_PRIORITY_LOW + 1, 5000, cib_connect, NULL, NULL)) {
crm_info("Adding timer failed");
was_err = TRUE;
}
if (was_err) {
crm_err("Aborting startup");
return 100;
}
crm_notice("Starting mainloop...");
g_main_run(mainloop);
crm_notice("Exiting...");
#if SUPPORT_HEARTBEAT
if (is_heartbeat_cluster()) {
attrd_cluster_conn->llc_ops->signoff(attrd_cluster_conn, TRUE);
attrd_cluster_conn->llc_ops->delete(attrd_cluster_conn);
}
#endif
c = qb_ipcs_connection_first_get(ipcs);
while(c != NULL) {
qb_ipcs_connection_t *last = c;
c = qb_ipcs_connection_next_get(ipcs, last);
/* There really shouldn't be anyone connected at this point */
crm_notice("Disconnecting client %p, pid=%d...", last, crm_ipcs_client_pid(last));
qb_ipcs_disconnect(last);
qb_ipcs_connection_unref(last);
}
qb_ipcs_destroy(ipcs);
if (cib_conn) {
cib_conn->cmds->signoff(cib_conn);
cib_delete(cib_conn);
}
g_hash_table_destroy(attr_hash);
free(attrd_uuid);
empty_uuid_cache();
qb_log_fini();
return 0;
}
struct attrd_callback_s {
char *attr;
char *value;
};
static void
attrd_cib_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
attr_hash_entry_t *hash_entry = NULL;
struct attrd_callback_s *data = user_data;
if(call_id < 0) {
crm_warn("Update %s=%s failed: %s", data->attr, data->value, pcmk_strerror(call_id));
goto cleanup;
} else if (data->value == NULL && rc == -ENXIO) {
rc = pcmk_ok;
}
switch (rc) {
case pcmk_ok:
crm_debug("Update %d for %s=%s passed", call_id, data->attr, data->value);
hash_entry = g_hash_table_lookup(attr_hash, data->attr);
if (hash_entry) {
free(hash_entry->stored_value);
hash_entry->stored_value = NULL;
if (data->value != NULL) {
hash_entry->stored_value = strdup(data->value);
}
}
break;
case -pcmk_err_diff_failed: /* When an attr changes while the CIB is syncing */
case -ETIME: /* When an attr changes while there is a DC election */
case -ENXIO: /* When an attr changes while the CIB is syncing a
* newer config from a node that just came up
*/
crm_warn("Update %d for %s=%s failed: %s",
call_id, data->attr, data->value, pcmk_strerror(rc));
break;
default:
crm_err("Update %d for %s=%s failed: %s",
call_id, data->attr, data->value, pcmk_strerror(rc));
}
cleanup:
free(data->value);
free(data->attr);
free(data);
}
void
attrd_perform_update(attr_hash_entry_t * hash_entry)
{
int rc = pcmk_ok;
struct attrd_callback_s *data = NULL;
const char *user_name = NULL;
if (hash_entry == NULL) {
return;
} else if (cib_conn == NULL) {
crm_info("Delaying operation %s=%s: cib not connected", hash_entry->id,
crm_str(hash_entry->value));
return;
}
#if ENABLE_ACL
if (hash_entry->user) {
user_name = hash_entry->user;
crm_trace("Performing request from user '%s'", hash_entry->user);
}
#endif
if (hash_entry->value == NULL) {
/* delete the attr */
rc = delete_attr_delegate(cib_conn, cib_none, hash_entry->section, attrd_uuid, NULL,
hash_entry->set, hash_entry->uuid, hash_entry->id, NULL, FALSE,
user_name);
if (rc >= 0 && hash_entry->stored_value) {
crm_notice("Sent delete %d: node=%s, attr=%s, id=%s, set=%s, section=%s",
rc, attrd_uuid, hash_entry->id,
hash_entry->uuid ? hash_entry->uuid : "<n/a>", hash_entry->set,
hash_entry->section);
} else if (rc < 0 && rc != -ENXIO) {
crm_notice
("Delete operation failed: node=%s, attr=%s, id=%s, set=%s, section=%s: %s (%d)",
attrd_uuid, hash_entry->id, hash_entry->uuid ? hash_entry->uuid : "<n/a>",
hash_entry->set, hash_entry->section, pcmk_strerror(rc), rc);
} else {
crm_trace("Sent delete %d: node=%s, attr=%s, id=%s, set=%s, section=%s",
rc, attrd_uuid, hash_entry->id,
hash_entry->uuid ? hash_entry->uuid : "<n/a>", hash_entry->set,
hash_entry->section);
}
} else {
/* send update */
rc = update_attr_delegate(cib_conn, cib_none, hash_entry->section,
attrd_uuid, NULL, hash_entry->set, hash_entry->uuid,
hash_entry->id, hash_entry->value, FALSE, user_name);
if (rc < 0) {
crm_notice("Sent update %s=%s failed: %s", hash_entry->id, hash_entry->value, pcmk_strerror(rc));
} if (safe_str_neq(hash_entry->value, hash_entry->stored_value) || rc < 0) {
crm_notice("Sent update %d: %s=%s", rc, hash_entry->id, hash_entry->value);
} else {
crm_trace("Sent update %d: %s=%s", rc, hash_entry->id, hash_entry->value);
}
}
data = calloc(1, sizeof(struct attrd_callback_s));
data->attr = strdup(hash_entry->id);
if (hash_entry->value != NULL) {
data->value = strdup(hash_entry->value);
}
add_cib_op_callback(cib_conn, rc, FALSE, data, attrd_cib_callback);
return;
}
void
attrd_local_callback(xmlNode * msg)
{
static int plus_plus_len = 5;
attr_hash_entry_t *hash_entry = NULL;
const char *from = crm_element_value(msg, F_ORIG);
const char *op = crm_element_value(msg, F_ATTRD_TASK);
const char *attr = crm_element_value(msg, F_ATTRD_ATTRIBUTE);
const char *value = crm_element_value(msg, F_ATTRD_VALUE);
const char *host = crm_element_value(msg, F_ATTRD_HOST);
if (safe_str_eq(op, "refresh")) {
crm_notice("Sending full refresh (origin=%s)", from);
g_hash_table_foreach(attr_hash, update_for_hash_entry, NULL);
return;
}
if (host != NULL && safe_str_neq(host, attrd_uname)) {
- send_cluster_message(host, crm_msg_attrd, msg, FALSE);
+ send_cluster_message(crm_get_peer(0, host), crm_msg_attrd, msg, FALSE);
return;
}
crm_debug("%s message from %s: %s=%s", op, from, attr, crm_str(value));
hash_entry = find_hash_entry(msg);
if (hash_entry == NULL) {
return;
}
if (hash_entry->uuid == NULL) {
const char *key = crm_element_value(msg, F_ATTRD_KEY);
if (key) {
hash_entry->uuid = strdup(key);
}
}
crm_debug("Supplied: %s, Current: %s, Stored: %s",
value, hash_entry->value, hash_entry->stored_value);
if (safe_str_eq(value, hash_entry->value)
&& safe_str_eq(value, hash_entry->stored_value)) {
crm_trace("Ignoring non-change");
return;
} else if (value) {
int offset = 1;
int int_value = 0;
int value_len = strlen(value);
if (value_len < (plus_plus_len + 2)
|| value[plus_plus_len] != '+'
|| (value[plus_plus_len + 1] != '+' && value[plus_plus_len + 1] != '=')) {
goto set_unexpanded;
}
int_value = char2score(hash_entry->value);
if (value[plus_plus_len + 1] != '+') {
const char *offset_s = value + (plus_plus_len + 2);
offset = char2score(offset_s);
}
int_value += offset;
if (int_value > INFINITY) {
int_value = INFINITY;
}
crm_info("Expanded %s=%s to %d", attr, value, int_value);
crm_xml_add_int(msg, F_ATTRD_VALUE, int_value);
value = crm_element_value(msg, F_ATTRD_VALUE);
}
set_unexpanded:
if (safe_str_eq(value, hash_entry->value) && hash_entry->timer_id) {
/* We're already waiting to set this value */
return;
}
free(hash_entry->value);
hash_entry->value = NULL;
if (value != NULL) {
hash_entry->value = strdup(value);
crm_debug("New value of %s is %s", attr, value);
}
stop_attrd_timer(hash_entry);
if (hash_entry->timeout > 0) {
hash_entry->timer_id = g_timeout_add(hash_entry->timeout, attrd_timer_callback, hash_entry);
} else {
attrd_trigger_update(hash_entry);
}
return;
}
gboolean
attrd_timer_callback(void *user_data)
{
stop_attrd_timer(user_data);
attrd_trigger_update(user_data);
return TRUE; /* Always return true, removed cleanly by stop_attrd_timer() */
}
gboolean
attrd_trigger_update(attr_hash_entry_t * hash_entry)
{
xmlNode *msg = NULL;
/* send HA message to everyone */
crm_notice("Sending flush op to all hosts for: %s (%s)",
hash_entry->id, crm_str(hash_entry->value));
log_hash_entry(LOG_DEBUG_2, hash_entry, "Sending flush op to all hosts for:");
msg = create_xml_node(NULL, __FUNCTION__);
crm_xml_add(msg, F_TYPE, T_ATTRD);
crm_xml_add(msg, F_ORIG, attrd_uname);
crm_xml_add(msg, F_ATTRD_TASK, "flush");
crm_xml_add(msg, F_ATTRD_ATTRIBUTE, hash_entry->id);
crm_xml_add(msg, F_ATTRD_SET, hash_entry->set);
crm_xml_add(msg, F_ATTRD_SECTION, hash_entry->section);
crm_xml_add(msg, F_ATTRD_DAMPEN, hash_entry->dampen);
crm_xml_add(msg, F_ATTRD_VALUE, hash_entry->value);
#if ENABLE_ACL
if (hash_entry->user) {
crm_xml_add(msg, F_ATTRD_USER, hash_entry->user);
}
#endif
if (hash_entry->timeout <= 0) {
crm_xml_add(msg, F_ATTRD_IGNORE_LOCALLY, hash_entry->value);
attrd_perform_update(hash_entry);
}
send_cluster_message(NULL, crm_msg_attrd, msg, FALSE);
free_xml(msg);
return TRUE;
}

File Metadata

Mime Type
text/x-diff
Expires
Mon, Sep 22, 10:56 PM (15 h, 34 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2403359
Default Alt Text
(372 KB)

Event Timeline